diff --git a/docs/DXIL.rst b/docs/DXIL.rst
index d6a3f3640..e05e3f142 100644
--- a/docs/DXIL.rst
+++ b/docs/DXIL.rst
@@ -1919,135 +1919,125 @@ ID  Name                          Description
 17  Atan                          returns the Atan
 18  Hcos                          returns the Hcos
 19  Hsin                          returns the Hsin
-20  Exp                           returns the Exp
-21  Frc                           returns the Frc
-22  Log                           returns the Log
-23  Sqrt                          returns the Sqrt
-24  Rsqrt                         returns the Rsqrt
-25  Round_ne                      returns the Round_ne
-26  Round_ni                      returns the Round_ni
-27  Round_pi                      returns the Round_pi
-28  Round_z                       returns the Round_z
-29  Bfrev                         returns the reverse bit pattern of the input value
-30  Countbits                     returns the Countbits
-31  FirstbitLo                    returns the FirstbitLo
-32  FirstbitHi                    returns src != 0? (BitWidth-1 - FirstbitHi) : -1
-33  FirstbitSHi                   returns src != 0? (BitWidth-1 - FirstbitSHi) : -1
-34  FMax                          returns the FMax of the input values
-35  FMin                          returns the FMin of the input values
-36  IMax                          returns the IMax of the input values
-37  IMin                          returns the IMin of the input values
-38  UMax                          returns the UMax of the input values
-39  UMin                          returns the UMin of the input values
-40  IMul                          returns the IMul of the input values
-41  UMul                          returns the UMul of the input values
-42  UDiv                          returns the UDiv of the input values
-43  IAddc                         returns the IAddc of the input values
-44  UAddc                         returns the UAddc of the input values
-45  ISubc                         returns the ISubc of the input values
-46  USubc                         returns the USubc of the input values
-47  FMad                          performs a fused multiply add (FMA) of the form a * b + c
-48  Fma                           performs a fused multiply add (FMA) of the form a * b + c
-49  IMad                          performs an integral IMad
-50  UMad                          performs an integral UMad
-51  Msad                          performs an integral Msad
-52  Ibfe                          performs an integral Ibfe
-53  Ubfe                          performs an integral Ubfe
-54  Bfi                           given a bit range from the LSB of a number, places that number of bits in another number at any offset
-55  Dot2                          two-dimensional vector dot-product
-56  Dot3                          three-dimensional vector dot-product
-57  Dot4                          four-dimensional vector dot-product
-58  CreateHandle                  creates the handle to a resource
-59  CBufferLoad                   loads a value from a constant buffer resource
-60  CBufferLoadLegacy             loads a value from a constant buffer resource
-61  Sample                        samples a texture
-62  SampleBias                    samples a texture after applying the input bias to the mipmap level
-63  SampleLevel                   samples a texture using a mipmap-level offset
-64  SampleGrad                    samples a texture using a gradient to influence the way the sample location is calculated
-65  SampleCmp                     samples a texture and compares a single component against the specified comparison value
-66  SampleCmpLevelZero            samples a texture and compares a single component against the specified comparison value
-67  TextureLoad                   reads texel data without any filtering or sampling
-68  TextureStore                  reads texel data without any filtering or sampling
-69  BufferLoad                    reads from a TypedBuffer
-70  BufferStore                   writes to a RWTypedBuffer
-71  BufferUpdateCounter           atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
-72  CheckAccessFullyMapped        determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
-73  GetDimensions                 gets texture size information
-74  TextureGather                 gathers the four texels that would be used in a bi-linear filtering operation
-75  TextureGatherCmp              same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
-76  ToDelete5                     reserved
-77  ToDelete6                     reserved
-78  Texture2DMSGetSamplePosition  gets the position of the specified sample
-79  RenderTargetGetSamplePosition gets the position of the specified sample
-80  RenderTargetGetSampleCount    gets the number of samples for a render target
-81  AtomicBinOp                   performs an atomic operation on two operands
-82  AtomicCompareExchange         atomic compare and exchange to memory
-83  Barrier                       inserts a memory barrier in the shader
-84  CalculateLOD                  calculates the level of detail
-85  Discard                       discard the current pixel
-86  DerivCoarseX                  computes the rate of change of components per stamp
-87  DerivCoarseY                  computes the rate of change of components per stamp
-88  DerivFineX                    computes the rate of change of components per pixel
-89  DerivFineY                    computes the rate of change of components per pixel
-90  EvalSnapped                   evaluates an input attribute at pixel center with an offset
-91  EvalSampleIndex               evaluates an input attribute at a sample location
-92  EvalCentroid                  evaluates an input attribute at pixel center
-93  ThreadId                      reads the thread ID
-94  GroupId                       reads the group ID (SV_GroupID)
-95  ThreadIdInGroup               reads the thread ID within the group (SV_GroupThreadID)
-96  FlattenedThreadIdInGroup      provides a flattened index for a given thread within a given group (SV_GroupIndex)
-97  EmitStream                    emits a vertex to a given stream
-98  CutStream                     completes the current primitive topology at the specified stream
-99  EmitThenCutStream             equivalent to an EmitStream followed by a CutStream
-100 MakeDouble                    creates a double value
-101 ToDelete1                     reserved
-102 ToDelete2                     reserved
-103 SplitDouble                   splits a double into low and high parts
-104 ToDelete3                     reserved
-105 ToDelete4                     reserved
-106 LoadOutputControlPoint        LoadOutputControlPoint
-107 LoadPatchConstant             LoadPatchConstant
-108 DomainLocation                DomainLocation
-109 StorePatchConstant            StorePatchConstant
-110 OutputControlPointID          OutputControlPointID
-111 PrimitiveID                   PrimitiveID
-112 CycleCounterLegacy            CycleCounterLegacy
-113 Htan                          returns the hyperbolic tangent of the specified value
-114 WaveCaptureReserved           reserved
-115 WaveIsFirstLane               returns 1 for the first lane in the wave
-116 WaveGetLaneIndex              returns the index of the current lane in the wave
-117 WaveGetLaneCount              returns the number of lanes in the wave
-118 WaveIsHelperLaneReserved      reserved
-119 WaveAnyTrue                   returns 1 if any of the lane evaluates the value to true
-120 WaveAllTrue                   returns 1 if all the lanes evaluate the value to true
-121 WaveActiveAllEqual            returns 1 if all the lanes have the same value
-122 WaveActiveBallot              returns a struct with a bit set for each lane where the condition is true
-123 WaveReadLaneAt                returns the value from the specified lane
-124 WaveReadLaneFirst             returns the value from the first lane
-125 WaveActiveOp                  returns the result the operation across waves
-126 WaveActiveBit                 returns the result of the operation across all lanes
-127 WavePrefixOp                  returns the result of the operation on prior lanes
-128 WaveGetOrderedIndex           reserved
-129 GlobalOrderedCountIncReserved reserved
-130 QuadReadLaneAt                reads from a lane in the quad
-131 QuadOp                        returns the result of a quad-level operation
-132 BitcastI16toF16               bitcast between different sizes
-133 BitcastF16toI16               bitcast between different sizes
-134 BitcastI32toF32               bitcast between different sizes
-135 BitcastF32toI32               bitcast between different sizes
-136 BitcastI64toF64               bitcast between different sizes
-137 BitcastF64toI64               bitcast between different sizes
-138 GSInstanceID                  GSInstanceID
-139 LegacyF32ToF16                legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
-140 LegacyF16ToF32                legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
-141 LegacyDoubleToFloat           legacy fuction to convert double to float
-142 LegacyDoubleToSInt32          legacy fuction to convert double to int32
-143 LegacyDoubleToUInt32          legacy fuction to convert double to uint32
-144 WaveAllBitCount               returns the count of bits set to 1 across the wave
-145 WavePrefixBitCount            returns the count of bits set to 1 on prior lanes
-146 SampleIndex                   returns the sample index in a sample-frequency pixel shader
-147 Coverage                      returns the coverage mask input in a pixel shader
-148 InnerCoverage                 returns underestimated coverage input from conservative rasterization in a pixel shader
+20  Htan                          returns the Htan
+21  Exp                           returns the Exp
+22  Frc                           returns the Frc
+23  Log                           returns the Log
+24  Sqrt                          returns the Sqrt
+25  Rsqrt                         returns the Rsqrt
+26  Round_ne                      returns the Round_ne
+27  Round_ni                      returns the Round_ni
+28  Round_pi                      returns the Round_pi
+29  Round_z                       returns the Round_z
+30  Bfrev                         returns the reverse bit pattern of the input value
+31  Countbits                     returns the Countbits
+32  FirstbitLo                    returns the FirstbitLo
+33  FirstbitHi                    returns src != 0? (BitWidth-1 - FirstbitHi) : -1
+34  FirstbitSHi                   returns src != 0? (BitWidth-1 - FirstbitSHi) : -1
+35  FMax                          returns the FMax of the input values
+36  FMin                          returns the FMin of the input values
+37  IMax                          returns the IMax of the input values
+38  IMin                          returns the IMin of the input values
+39  UMax                          returns the UMax of the input values
+40  UMin                          returns the UMin of the input values
+41  IMul                          returns the IMul of the input values
+42  UMul                          returns the UMul of the input values
+43  UDiv                          returns the UDiv of the input values
+44  IAddc                         returns the IAddc of the input values
+45  UAddc                         returns the UAddc of the input values
+46  ISubc                         returns the ISubc of the input values
+47  USubc                         returns the USubc of the input values
+48  FMad                          performs a fused multiply add (FMA) of the form a * b + c
+49  Fma                           performs a fused multiply add (FMA) of the form a * b + c
+50  IMad                          performs an integral IMad
+51  UMad                          performs an integral UMad
+52  Msad                          performs an integral Msad
+53  Ibfe                          performs an integral Ibfe
+54  Ubfe                          performs an integral Ubfe
+55  Bfi                           given a bit range from the LSB of a number, places that number of bits in another number at any offset
+56  Dot2                          two-dimensional vector dot-product
+57  Dot3                          three-dimensional vector dot-product
+58  Dot4                          four-dimensional vector dot-product
+59  CreateHandle                  creates the handle to a resource
+60  CBufferLoad                   loads a value from a constant buffer resource
+61  CBufferLoadLegacy             loads a value from a constant buffer resource
+62  Sample                        samples a texture
+63  SampleBias                    samples a texture after applying the input bias to the mipmap level
+64  SampleLevel                   samples a texture using a mipmap-level offset
+65  SampleGrad                    samples a texture using a gradient to influence the way the sample location is calculated
+66  SampleCmp                     samples a texture and compares a single component against the specified comparison value
+67  SampleCmpLevelZero            samples a texture and compares a single component against the specified comparison value
+68  TextureLoad                   reads texel data without any filtering or sampling
+69  TextureStore                  reads texel data without any filtering or sampling
+70  BufferLoad                    reads from a TypedBuffer
+71  BufferStore                   writes to a RWTypedBuffer
+72  BufferUpdateCounter           atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
+73  CheckAccessFullyMapped        determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
+74  GetDimensions                 gets texture size information
+75  TextureGather                 gathers the four texels that would be used in a bi-linear filtering operation
+76  TextureGatherCmp              same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
+77  Texture2DMSGetSamplePosition  gets the position of the specified sample
+78  RenderTargetGetSamplePosition gets the position of the specified sample
+79  RenderTargetGetSampleCount    gets the number of samples for a render target
+80  AtomicBinOp                   performs an atomic operation on two operands
+81  AtomicCompareExchange         atomic compare and exchange to memory
+82  Barrier                       inserts a memory barrier in the shader
+83  CalculateLOD                  calculates the level of detail
+84  Discard                       discard the current pixel
+85  DerivCoarseX                  computes the rate of change of components per stamp
+86  DerivCoarseY                  computes the rate of change of components per stamp
+87  DerivFineX                    computes the rate of change of components per pixel
+88  DerivFineY                    computes the rate of change of components per pixel
+89  EvalSnapped                   evaluates an input attribute at pixel center with an offset
+90  EvalSampleIndex               evaluates an input attribute at a sample location
+91  EvalCentroid                  evaluates an input attribute at pixel center
+92  SampleIndex                   returns the sample index in a sample-frequency pixel shader
+93  Coverage                      returns the coverage mask input in a pixel shader
+94  InnerCoverage                 returns underestimated coverage input from conservative rasterization in a pixel shader
+95  ThreadId                      reads the thread ID
+96  GroupId                       reads the group ID (SV_GroupID)
+97  ThreadIdInGroup               reads the thread ID within the group (SV_GroupThreadID)
+98  FlattenedThreadIdInGroup      provides a flattened index for a given thread within a given group (SV_GroupIndex)
+99  EmitStream                    emits a vertex to a given stream
+100 CutStream                     completes the current primitive topology at the specified stream
+101 EmitThenCutStream             equivalent to an EmitStream followed by a CutStream
+102 GSInstanceID                  GSInstanceID
+103 MakeDouble                    creates a double value
+104 SplitDouble                   splits a double into low and high parts
+105 LoadOutputControlPoint        LoadOutputControlPoint
+106 LoadPatchConstant             LoadPatchConstant
+107 DomainLocation                DomainLocation
+108 StorePatchConstant            StorePatchConstant
+109 OutputControlPointID          OutputControlPointID
+110 PrimitiveID                   PrimitiveID
+111 CycleCounterLegacy            CycleCounterLegacy
+112 WaveIsFirstLane               returns 1 for the first lane in the wave
+113 WaveGetLaneIndex              returns the index of the current lane in the wave
+114 WaveGetLaneCount              returns the number of lanes in the wave
+115 WaveAnyTrue                   returns 1 if any of the lane evaluates the value to true
+116 WaveAllTrue                   returns 1 if all the lanes evaluate the value to true
+117 WaveActiveAllEqual            returns 1 if all the lanes have the same value
+118 WaveActiveBallot              returns a struct with a bit set for each lane where the condition is true
+119 WaveReadLaneAt                returns the value from the specified lane
+120 WaveReadLaneFirst             returns the value from the first lane
+121 WaveActiveOp                  returns the result the operation across waves
+122 WaveActiveBit                 returns the result of the operation across all lanes
+123 WavePrefixOp                  returns the result of the operation on prior lanes
+124 QuadReadLaneAt                reads from a lane in the quad
+125 QuadOp                        returns the result of a quad-level operation
+126 BitcastI16toF16               bitcast between different sizes
+127 BitcastF16toI16               bitcast between different sizes
+128 BitcastI32toF32               bitcast between different sizes
+129 BitcastF32toI32               bitcast between different sizes
+130 BitcastI64toF64               bitcast between different sizes
+131 BitcastF64toI64               bitcast between different sizes
+132 LegacyF32ToF16                legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
+133 LegacyF16ToF32                legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
+134 LegacyDoubleToFloat           legacy fuction to convert double to float
+135 LegacyDoubleToSInt32          legacy fuction to convert double to int32
+136 LegacyDoubleToUInt32          legacy fuction to convert double to uint32
+137 WaveAllBitCount               returns the count of bits set to 1 across the wave
+138 WavePrefixBitCount            returns the count of bits set to 1 on prior lanes
 === ============================= ================================================================================================================
 
 
@@ -2284,6 +2274,7 @@ SM.TRIOUTPUTPRIMITIVEMISMATCH         Hull Shader declared with Tri Domain must
 SM.UNDEFINEDOUTPUT                    Not all elements of output %0 were written
 SM.VALIDDOMAIN                        Invalid Tessellator Domain specified. Must be isoline, tri or quad
 TYPES.DEFINED                         Type must be defined based on DXIL primitives
+TYPES.I8                              I8 can only used as immediate value for intrinsic
 TYPES.INTWIDTH                        Int type must be of valid width
 TYPES.NOMULTIDIM                      Only one dimension allowed for array type
 TYPES.NOVECTOR                        Vector types must not be present
diff --git a/include/dxc/HLSL/DxilConstants.h b/include/dxc/HLSL/DxilConstants.h
index 4904be8ec..93b8fa09f 100644
--- a/include/dxc/HLSL/DxilConstants.h
+++ b/include/dxc/HLSL/DxilConstants.h
@@ -26,8 +26,8 @@ import hctdb_instrhelp
 
 namespace DXIL {
   // DXIL version.
-  const unsigned kDxilMajor = 0;
-  const unsigned kDxilMinor = 7;
+  const unsigned kDxilMajor = 1;
+  const unsigned kDxilMinor = 0;
 
   inline unsigned MakeDxilVersion(unsigned DxilMajor, unsigned DxilMinor) {
     return 0 | (DxilMajor << 8) | (DxilMinor);
@@ -246,137 +246,126 @@ namespace DXIL {
   // OPCODE-ENUM:BEGIN
   // Enumeration for operations specified by DXIL
   enum class OpCode : unsigned {
-    // 
-    GlobalOrderedCountIncReserved = 129, // reserved
-    ToDelete1 = 101, // reserved
-    ToDelete2 = 102, // reserved
-    ToDelete3 = 104, // reserved
-    ToDelete4 = 105, // reserved
-    ToDelete5 = 76, // reserved
-    ToDelete6 = 77, // reserved
-  
     // Binary float
-    FMax = 34, // returns the FMax of the input values
-    FMin = 35, // returns the FMin of the input values
+    FMax = 35, // returns the FMax of the input values
+    FMin = 36, // returns the FMin of the input values
   
     // Binary int with carry
-    IAddc = 43, // returns the IAddc of the input values
-    ISubc = 45, // returns the ISubc of the input values
-    UAddc = 44, // returns the UAddc of the input values
-    USubc = 46, // returns the USubc of the input values
+    IAddc = 44, // returns the IAddc of the input values
+    ISubc = 46, // returns the ISubc of the input values
+    UAddc = 45, // returns the UAddc of the input values
+    USubc = 47, // returns the USubc of the input values
   
     // Binary int with two outputs
-    IMul = 40, // returns the IMul of the input values
-    UDiv = 42, // returns the UDiv of the input values
-    UMul = 41, // returns the UMul of the input values
+    IMul = 41, // returns the IMul of the input values
+    UDiv = 43, // returns the UDiv of the input values
+    UMul = 42, // returns the UMul of the input values
   
     // Binary int
-    IMax = 36, // returns the IMax of the input values
-    IMin = 37, // returns the IMin of the input values
-    UMax = 38, // returns the UMax of the input values
-    UMin = 39, // returns the UMin of the input values
+    IMax = 37, // returns the IMax of the input values
+    IMin = 38, // returns the IMin of the input values
+    UMax = 39, // returns the UMax of the input values
+    UMin = 40, // returns the UMin of the input values
   
     // Bitcasts with different sizes
-    BitcastF16toI16 = 133, // bitcast between different sizes
-    BitcastF32toI32 = 135, // bitcast between different sizes
-    BitcastF64toI64 = 137, // bitcast between different sizes
-    BitcastI16toF16 = 132, // bitcast between different sizes
-    BitcastI32toF32 = 134, // bitcast between different sizes
-    BitcastI64toF64 = 136, // bitcast between different sizes
+    BitcastF16toI16 = 127, // bitcast between different sizes
+    BitcastF32toI32 = 129, // bitcast between different sizes
+    BitcastF64toI64 = 131, // bitcast between different sizes
+    BitcastI16toF16 = 126, // bitcast between different sizes
+    BitcastI32toF32 = 128, // bitcast between different sizes
+    BitcastI64toF64 = 130, // bitcast between different sizes
   
     // Compute shader
-    FlattenedThreadIdInGroup = 96, // provides a flattened index for a given thread within a given group (SV_GroupIndex)
-    GroupId = 94, // reads the group ID (SV_GroupID)
-    ThreadId = 93, // reads the thread ID
-    ThreadIdInGroup = 95, // reads the thread ID within the group (SV_GroupThreadID)
+    FlattenedThreadIdInGroup = 98, // provides a flattened index for a given thread within a given group (SV_GroupIndex)
+    GroupId = 96, // reads the group ID (SV_GroupID)
+    ThreadId = 95, // reads the thread ID
+    ThreadIdInGroup = 97, // reads the thread ID within the group (SV_GroupThreadID)
   
     // Domain and hull shader
-    LoadOutputControlPoint = 106, // LoadOutputControlPoint
-    LoadPatchConstant = 107, // LoadPatchConstant
+    LoadOutputControlPoint = 105, // LoadOutputControlPoint
+    LoadPatchConstant = 106, // LoadPatchConstant
   
     // Domain shader
-    DomainLocation = 108, // DomainLocation
+    DomainLocation = 107, // DomainLocation
   
     // Dot
-    Dot2 = 55, // two-dimensional vector dot-product
-    Dot3 = 56, // three-dimensional vector dot-product
-    Dot4 = 57, // four-dimensional vector dot-product
+    Dot2 = 56, // two-dimensional vector dot-product
+    Dot3 = 57, // three-dimensional vector dot-product
+    Dot4 = 58, // four-dimensional vector dot-product
   
     // Double precision
-    LegacyDoubleToFloat = 141, // legacy fuction to convert double to float
-    LegacyDoubleToSInt32 = 142, // legacy fuction to convert double to int32
-    LegacyDoubleToUInt32 = 143, // legacy fuction to convert double to uint32
-    MakeDouble = 100, // creates a double value
-    SplitDouble = 103, // splits a double into low and high parts
-  
-    // GS
-    GSInstanceID = 138, // GSInstanceID
+    LegacyDoubleToFloat = 134, // legacy fuction to convert double to float
+    LegacyDoubleToSInt32 = 135, // legacy fuction to convert double to int32
+    LegacyDoubleToUInt32 = 136, // legacy fuction to convert double to uint32
+    MakeDouble = 103, // creates a double value
+    SplitDouble = 104, // splits a double into low and high parts
   
     // Geometry shader
-    CutStream = 98, // completes the current primitive topology at the specified stream
-    EmitStream = 97, // emits a vertex to a given stream
-    EmitThenCutStream = 99, // equivalent to an EmitStream followed by a CutStream
+    CutStream = 100, // completes the current primitive topology at the specified stream
+    EmitStream = 99, // emits a vertex to a given stream
+    EmitThenCutStream = 101, // equivalent to an EmitStream followed by a CutStream
+    GSInstanceID = 102, // GSInstanceID
   
     // Hull shader
-    OutputControlPointID = 110, // OutputControlPointID
-    PrimitiveID = 111, // PrimitiveID
-    StorePatchConstant = 109, // StorePatchConstant
+    OutputControlPointID = 109, // OutputControlPointID
+    PrimitiveID = 110, // PrimitiveID
+    StorePatchConstant = 108, // StorePatchConstant
   
     // Legacy floating-point
-    LegacyF16ToF32 = 140, // legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
-    LegacyF32ToF16 = 139, // legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
+    LegacyF16ToF32 = 133, // legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
+    LegacyF32ToF16 = 132, // legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
   
     // Other
-    CycleCounterLegacy = 112, // CycleCounterLegacy
+    CycleCounterLegacy = 111, // CycleCounterLegacy
   
     // Pixel shader
-    CalculateLOD = 84, // calculates the level of detail
-    Coverage = 147, // returns the coverage mask input in a pixel shader
-    DerivCoarseX = 86, // computes the rate of change of components per stamp
-    DerivCoarseY = 87, // computes the rate of change of components per stamp
-    DerivFineX = 88, // computes the rate of change of components per pixel
-    DerivFineY = 89, // computes the rate of change of components per pixel
-    Discard = 85, // discard the current pixel
-    EvalCentroid = 92, // evaluates an input attribute at pixel center
-    EvalSampleIndex = 91, // evaluates an input attribute at a sample location
-    EvalSnapped = 90, // evaluates an input attribute at pixel center with an offset
-    InnerCoverage = 148, // returns underestimated coverage input from conservative rasterization in a pixel shader
-    SampleIndex = 146, // returns the sample index in a sample-frequency pixel shader
+    CalculateLOD = 83, // calculates the level of detail
+    Coverage = 93, // returns the coverage mask input in a pixel shader
+    DerivCoarseX = 85, // computes the rate of change of components per stamp
+    DerivCoarseY = 86, // computes the rate of change of components per stamp
+    DerivFineX = 87, // computes the rate of change of components per pixel
+    DerivFineY = 88, // computes the rate of change of components per pixel
+    Discard = 84, // discard the current pixel
+    EvalCentroid = 91, // evaluates an input attribute at pixel center
+    EvalSampleIndex = 90, // evaluates an input attribute at a sample location
+    EvalSnapped = 89, // evaluates an input attribute at pixel center with an offset
+    InnerCoverage = 94, // returns underestimated coverage input from conservative rasterization in a pixel shader
+    SampleIndex = 92, // returns the sample index in a sample-frequency pixel shader
   
     // Quaternary
-    Bfi = 54, // given a bit range from the LSB of a number, places that number of bits in another number at any offset
+    Bfi = 55, // given a bit range from the LSB of a number, places that number of bits in another number at any offset
   
     // Resources - gather
-    TextureGather = 74, // gathers the four texels that would be used in a bi-linear filtering operation
-    TextureGatherCmp = 75, // same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
+    TextureGather = 75, // gathers the four texels that would be used in a bi-linear filtering operation
+    TextureGatherCmp = 76, // same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
   
     // Resources - sample
-    RenderTargetGetSampleCount = 80, // gets the number of samples for a render target
-    RenderTargetGetSamplePosition = 79, // gets the position of the specified sample
-    Sample = 61, // samples a texture
-    SampleBias = 62, // samples a texture after applying the input bias to the mipmap level
-    SampleCmp = 65, // samples a texture and compares a single component against the specified comparison value
-    SampleCmpLevelZero = 66, // samples a texture and compares a single component against the specified comparison value
-    SampleGrad = 64, // samples a texture using a gradient to influence the way the sample location is calculated
-    SampleLevel = 63, // samples a texture using a mipmap-level offset
-    Texture2DMSGetSamplePosition = 78, // gets the position of the specified sample
+    RenderTargetGetSampleCount = 79, // gets the number of samples for a render target
+    RenderTargetGetSamplePosition = 78, // gets the position of the specified sample
+    Sample = 62, // samples a texture
+    SampleBias = 63, // samples a texture after applying the input bias to the mipmap level
+    SampleCmp = 66, // samples a texture and compares a single component against the specified comparison value
+    SampleCmpLevelZero = 67, // samples a texture and compares a single component against the specified comparison value
+    SampleGrad = 65, // samples a texture using a gradient to influence the way the sample location is calculated
+    SampleLevel = 64, // samples a texture using a mipmap-level offset
+    Texture2DMSGetSamplePosition = 77, // gets the position of the specified sample
   
     // Resources
-    BufferLoad = 69, // reads from a TypedBuffer
-    BufferStore = 70, // writes to a RWTypedBuffer
-    BufferUpdateCounter = 71, // atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
-    CBufferLoad = 59, // loads a value from a constant buffer resource
-    CBufferLoadLegacy = 60, // loads a value from a constant buffer resource
-    CheckAccessFullyMapped = 72, // determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
-    CreateHandle = 58, // creates the handle to a resource
-    GetDimensions = 73, // gets texture size information
-    TextureLoad = 67, // reads texel data without any filtering or sampling
-    TextureStore = 68, // reads texel data without any filtering or sampling
+    BufferLoad = 70, // reads from a TypedBuffer
+    BufferStore = 71, // writes to a RWTypedBuffer
+    BufferUpdateCounter = 72, // atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
+    CBufferLoad = 60, // loads a value from a constant buffer resource
+    CBufferLoadLegacy = 61, // loads a value from a constant buffer resource
+    CheckAccessFullyMapped = 73, // determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
+    CreateHandle = 59, // creates the handle to a resource
+    GetDimensions = 74, // gets texture size information
+    TextureLoad = 68, // reads texel data without any filtering or sampling
+    TextureStore = 69, // reads texel data without any filtering or sampling
   
     // Synchronization
-    AtomicBinOp = 81, // performs an atomic operation on two operands
-    AtomicCompareExchange = 82, // atomic compare and exchange to memory
-    Barrier = 83, // inserts a memory barrier in the shader
+    AtomicBinOp = 80, // performs an atomic operation on two operands
+    AtomicCompareExchange = 81, // atomic compare and exchange to memory
+    Barrier = 82, // inserts a memory barrier in the shader
   
     // Temporary, indexable, input, output registers
     LoadInput = 4, // loads the value from shader input
@@ -387,73 +376,70 @@ namespace DXIL {
     TempRegStore = 1, // helper store operation
   
     // Tertiary float
-    FMad = 47, // performs a fused multiply add (FMA) of the form a * b + c
-    Fma = 48, // performs a fused multiply add (FMA) of the form a * b + c
+    FMad = 48, // performs a fused multiply add (FMA) of the form a * b + c
+    Fma = 49, // performs a fused multiply add (FMA) of the form a * b + c
   
     // Tertiary int
-    IMad = 49, // performs an integral IMad
-    Ibfe = 52, // performs an integral Ibfe
-    Msad = 51, // performs an integral Msad
-    UMad = 50, // performs an integral UMad
-    Ubfe = 53, // performs an integral Ubfe
+    IMad = 50, // performs an integral IMad
+    Ibfe = 53, // performs an integral Ibfe
+    Msad = 52, // performs an integral Msad
+    UMad = 51, // performs an integral UMad
+    Ubfe = 54, // performs an integral Ubfe
   
     // Unary float - rounding
-    Round_ne = 25, // returns the Round_ne
-    Round_ni = 26, // returns the Round_ni
-    Round_pi = 27, // returns the Round_pi
-    Round_z = 28, // returns the Round_z
+    Round_ne = 26, // returns the Round_ne
+    Round_ni = 27, // returns the Round_ni
+    Round_pi = 28, // returns the Round_pi
+    Round_z = 29, // returns the Round_z
   
     // Unary float
     Acos = 15, // returns the Acos
     Asin = 16, // returns the Asin
     Atan = 17, // returns the Atan
     Cos = 12, // returns cosine(theta) for theta in radians.
-    Exp = 20, // returns the Exp
+    Exp = 21, // returns the Exp
     FAbs = 6, // returns the absolute value of the input value.
-    Frc = 21, // returns the Frc
+    Frc = 22, // returns the Frc
     Hcos = 18, // returns the Hcos
     Hsin = 19, // returns the Hsin
-    Htan = 113, // returns the hyperbolic tangent of the specified value
+    Htan = 20, // returns the Htan
     IsFinite = 10, // returns the IsFinite
     IsInf = 9, // returns the IsInf
     IsNaN = 8, // returns the IsNaN
     IsNormal = 11, // returns the IsNormal
-    Log = 22, // returns the Log
-    Rsqrt = 24, // returns the Rsqrt
+    Log = 23, // returns the Log
+    Rsqrt = 25, // returns the Rsqrt
     Saturate = 7, // clamps the result of a single or double precision floating point value to [0.0f...1.0f]
     Sin = 13, // returns the Sin
-    Sqrt = 23, // returns the Sqrt
+    Sqrt = 24, // returns the Sqrt
     Tan = 14, // returns the Tan
   
     // Unary int
-    Bfrev = 29, // returns the reverse bit pattern of the input value
-    Countbits = 30, // returns the Countbits
-    FirstbitHi = 32, // returns src != 0? (BitWidth-1 - FirstbitHi) : -1
-    FirstbitLo = 31, // returns the FirstbitLo
-    FirstbitSHi = 33, // returns src != 0? (BitWidth-1 - FirstbitSHi) : -1
+    Bfrev = 30, // returns the reverse bit pattern of the input value
+    Countbits = 31, // returns the Countbits
+    FirstbitHi = 33, // returns src != 0? (BitWidth-1 - FirstbitHi) : -1
+    FirstbitLo = 32, // returns the FirstbitLo
+    FirstbitSHi = 34, // returns src != 0? (BitWidth-1 - FirstbitSHi) : -1
   
     // Wave
-    QuadOp = 131, // returns the result of a quad-level operation
-    QuadReadLaneAt = 130, // reads from a lane in the quad
-    WaveActiveAllEqual = 121, // returns 1 if all the lanes have the same value
-    WaveActiveBallot = 122, // returns a struct with a bit set for each lane where the condition is true
-    WaveActiveBit = 126, // returns the result of the operation across all lanes
-    WaveActiveOp = 125, // returns the result the operation across waves
-    WaveAllBitCount = 144, // returns the count of bits set to 1 across the wave
-    WaveAllTrue = 120, // returns 1 if all the lanes evaluate the value to true
-    WaveAnyTrue = 119, // returns 1 if any of the lane evaluates the value to true
-    WaveCaptureReserved = 114, // reserved
-    WaveGetLaneCount = 117, // returns the number of lanes in the wave
-    WaveGetLaneIndex = 116, // returns the index of the current lane in the wave
-    WaveGetOrderedIndex = 128, // reserved
-    WaveIsFirstLane = 115, // returns 1 for the first lane in the wave
-    WaveIsHelperLaneReserved = 118, // reserved
-    WavePrefixBitCount = 145, // returns the count of bits set to 1 on prior lanes
-    WavePrefixOp = 127, // returns the result of the operation on prior lanes
-    WaveReadLaneAt = 123, // returns the value from the specified lane
-    WaveReadLaneFirst = 124, // returns the value from the first lane
+    QuadOp = 125, // returns the result of a quad-level operation
+    QuadReadLaneAt = 124, // reads from a lane in the quad
+    WaveActiveAllEqual = 117, // returns 1 if all the lanes have the same value
+    WaveActiveBallot = 118, // returns a struct with a bit set for each lane where the condition is true
+    WaveActiveBit = 122, // returns the result of the operation across all lanes
+    WaveActiveOp = 121, // returns the result the operation across waves
+    WaveAllBitCount = 137, // returns the count of bits set to 1 across the wave
+    WaveAllTrue = 116, // returns 1 if all the lanes evaluate the value to true
+    WaveAnyTrue = 115, // returns 1 if any of the lane evaluates the value to true
+    WaveGetLaneCount = 114, // returns the number of lanes in the wave
+    WaveGetLaneIndex = 113, // returns the index of the current lane in the wave
+    WaveIsFirstLane = 112, // returns 1 for the first lane in the wave
+    WavePrefixBitCount = 138, // returns the count of bits set to 1 on prior lanes
+    WavePrefixOp = 123, // returns the result of the operation on prior lanes
+    WaveReadLaneAt = 119, // returns the value from the specified lane
+    WaveReadLaneFirst = 120, // returns the value from the first lane
   
-    NumOpCodes = 149 // exclusive last value of enumeration
+    NumOpCodes = 139 // exclusive last value of enumeration
   };
   // OPCODE-ENUM:END
 
@@ -461,9 +447,6 @@ namespace DXIL {
   // OPCODECLASS-ENUM:BEGIN
   // Groups for DXIL operations with equivalent function templates
   enum class OpCodeClass : unsigned {
-    // 
-    Reserved,
-  
     // Binary int with carry
     BinaryWithCarry,
   
@@ -506,13 +489,11 @@ namespace DXIL {
     MakeDouble,
     SplitDouble,
   
-    // GS
-    GSInstanceID,
-  
     // Geometry shader
     CutStream,
     EmitStream,
     EmitThenCutStream,
+    GSInstanceID,
   
     // Hull shader
     OutputControlPointID,
@@ -538,6 +519,7 @@ namespace DXIL {
     EvalSnapped,
     InnerCoverage,
     SampleIndex,
+    Unary,
   
     // Quaternary
     Quaternary,
@@ -587,7 +569,6 @@ namespace DXIL {
   
     // Unary float
     IsSpecialFloat,
-    Unary,
   
     // Unary int
     UnaryBits,
@@ -609,7 +590,7 @@ namespace DXIL {
     WaveReadLaneAt,
     WaveReadLaneFirst,
   
-    NumOpClasses = 94 // exclusive last value of enumeration
+    NumOpClasses = 93 // exclusive last value of enumeration
   };
   // OPCODECLASS-ENUM:END
 
diff --git a/include/dxc/HLSL/DxilInstructions.h b/include/dxc/HLSL/DxilInstructions.h
index c5a6dfdb2..de4c11a67 100644
--- a/include/dxc/HLSL/DxilInstructions.h
+++ b/include/dxc/HLSL/DxilInstructions.h
@@ -1058,6 +1058,24 @@ struct DxilInst_Hsin {
   llvm::Value *get_value() const { return Instr->getOperand(1); }
 };
 
+/// This instruction returns the Htan
+struct DxilInst_Htan {
+  const llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_Htan(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::Htan);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Accessors
+  llvm::Value *get_value() const { return Instr->getOperand(1); }
+};
+
 /// This instruction returns the Exp
 struct DxilInst_Exp {
   const llvm::Instruction *Instr;
@@ -2519,6 +2537,54 @@ struct DxilInst_EvalCentroid {
   llvm::Value *get_inputColIndex() const { return Instr->getOperand(3); }
 };
 
+/// This instruction returns the sample index in a sample-frequency pixel shader
+struct DxilInst_SampleIndex {
+  const llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_SampleIndex(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::SampleIndex);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
+/// This instruction returns the coverage mask input in a pixel shader
+struct DxilInst_Coverage {
+  const llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_Coverage(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::Coverage);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
+/// This instruction returns underestimated coverage input from conservative rasterization in a pixel shader
+struct DxilInst_InnerCoverage {
+  const llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_InnerCoverage(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::InnerCoverage);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
 /// This instruction reads the thread ID
 struct DxilInst_ThreadId {
   const llvm::Instruction *Instr;
@@ -2643,6 +2709,22 @@ struct DxilInst_EmitThenCutStream {
   llvm::Value *get_streamId() const { return Instr->getOperand(1); }
 };
 
+/// This instruction GSInstanceID
+struct DxilInst_GSInstanceID {
+  const llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_GSInstanceID(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::GSInstanceID);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
 /// This instruction creates a double value
 struct DxilInst_MakeDouble {
   const llvm::Instruction *Instr;
@@ -2809,24 +2891,6 @@ struct DxilInst_CycleCounterLegacy {
   }
 };
 
-/// This instruction returns the hyperbolic tangent of the specified value
-struct DxilInst_Htan {
-  const llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_Htan(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::Htan);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
-    return true;
-  }
-  // Accessors
-  llvm::Value *get_value() const { return Instr->getOperand(1); }
-};
-
 /// This instruction returns 1 for the first lane in the wave
 struct DxilInst_WaveIsFirstLane {
   const llvm::Instruction *Instr;
@@ -3196,22 +3260,6 @@ struct DxilInst_BitcastF64toI64 {
   llvm::Value *get_value() const { return Instr->getOperand(1); }
 };
 
-/// This instruction GSInstanceID
-struct DxilInst_GSInstanceID {
-  const llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_GSInstanceID(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::GSInstanceID);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
-    return true;
-  }
-};
-
 /// This instruction legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
 struct DxilInst_LegacyF32ToF16 {
   const llvm::Instruction *Instr;
@@ -3337,53 +3385,5 @@ struct DxilInst_WavePrefixBitCount {
   // Accessors
   llvm::Value *get_value() const { return Instr->getOperand(1); }
 };
-
-/// This instruction returns the sample index in a sample-frequency pixel shader
-struct DxilInst_SampleIndex {
-  const llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_SampleIndex(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::SampleIndex);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
-    return true;
-  }
-};
-
-/// This instruction returns the coverage mask input in a pixel shader
-struct DxilInst_Coverage {
-  const llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_Coverage(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::Coverage);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
-    return true;
-  }
-};
-
-/// This instruction returns underestimated coverage input from conservative rasterization in a pixel shader
-struct DxilInst_InnerCoverage {
-  const llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_InnerCoverage(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::InnerCoverage);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
-    return true;
-  }
-};
 // INSTR-HELPER:END
 } // namespace hlsl
diff --git a/include/dxc/HLSL/DxilValidation.h b/include/dxc/HLSL/DxilValidation.h
index 56e7e8233..fa9d4dd8f 100644
--- a/include/dxc/HLSL/DxilValidation.h
+++ b/include/dxc/HLSL/DxilValidation.h
@@ -205,6 +205,7 @@ enum class ValidationRule : unsigned {
 
   // Type system
   TypesDefined, // Type must be defined based on DXIL primitives
+  TypesI8, // I8 can only used as immediate value for intrinsic
   TypesIntWidth, // Int type must be of valid width
   TypesNoMultiDim, // Only one dimension allowed for array type
   TypesNoVector, // Vector types must not be present
diff --git a/include/dxc/HLSL/HLOperationLowerExtension.h b/include/dxc/HLSL/HLOperationLowerExtension.h
index b7ba59107..3d82befb5 100644
--- a/include/dxc/HLSL/HLOperationLowerExtension.h
+++ b/include/dxc/HLSL/HLOperationLowerExtension.h
@@ -14,15 +14,19 @@
 #include "dxc/HLSL/HLSLExtensionsCodegenHelper.h"
 #include "llvm/ADT/StringRef.h"
 #include <string>
+#include <unordered_map>
 
 namespace llvm {
   class Value;
   class CallInst;
   class Function;
   class StringRef;
+  class Instruction;
 }
 
 namespace hlsl {
+  class OP;
+
   // Lowers HLSL extensions from HL operation to DXIL operation.
   class ExtensionLowering {
   public:
@@ -32,11 +36,14 @@ namespace hlsl {
       NoTranslation,  // Propagate the call arguments as is down to dxil.
       Replicate,      // Scalarize the vector arguments and replicate the call.
       Pack,           // Convert the vector arguments into structs.
+      Resource,       // Convert return value to resource return and explode vectors.
     };
 
+    typedef std::unordered_map<llvm::Instruction *, llvm::Value *> HandleMap;
+
     // Create the lowering using the given strategy and custom codegen helper.
-    ExtensionLowering(llvm::StringRef strategy, HLSLExtensionsCodegenHelper *helper);
-    ExtensionLowering(Strategy strategy, HLSLExtensionsCodegenHelper *helper);
+    ExtensionLowering(llvm::StringRef strategy, HLSLExtensionsCodegenHelper *helper, const HandleMap &handleMap, OP& hlslOp);
+    ExtensionLowering(Strategy strategy, HLSLExtensionsCodegenHelper *helper, const HandleMap &handleMap, OP& hlslOp);
 
     // Translate the HL op call to a DXIL op call.
     // Returns a new value if translation was successful.
@@ -62,11 +69,14 @@ namespace hlsl {
   private:
     Strategy m_strategy;
     HLSLExtensionsCodegenHelper *m_helper;
+    const HandleMap &m_handleMap;
+    OP &m_hlslOp;
 
     llvm::Value *Unknown(llvm::CallInst *CI);
     llvm::Value *NoTranslation(llvm::CallInst *CI);
     llvm::Value *Replicate(llvm::CallInst *CI);
     llvm::Value *Pack(llvm::CallInst *CI);
+    llvm::Value *Resource(llvm::CallInst *CI);
 
     // Translate the HL call by replicating the call for each vector element.
     //
diff --git a/lib/HLSL/DxilGenerationPass.cpp b/lib/HLSL/DxilGenerationPass.cpp
index 3c493d913..e3a08f20a 100644
--- a/lib/HLSL/DxilGenerationPass.cpp
+++ b/lib/HLSL/DxilGenerationPass.cpp
@@ -154,8 +154,8 @@ void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, bool HasDebugInfo) {
     b->SetSize(C->GetSize());
     if (HasDebugInfo)
       LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
-    else
-      b->SetGlobalSymbol(UndefValue::get(b->GetGlobalSymbol()->getType()));
+
+    b->SetGlobalSymbol(UndefValue::get(b->GetGlobalSymbol()->getType()));
     M.AddCBuffer(std::move(b));
   }
   for (auto && C : H.GetUAVs()) {
@@ -163,8 +163,8 @@ void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, bool HasDebugInfo) {
     InitResource(C.get(), b.get());
     if (HasDebugInfo)
       LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
-    else
-      b->SetGlobalSymbol(UndefValue::get(b->GetGlobalSymbol()->getType()));
+
+    b->SetGlobalSymbol(UndefValue::get(b->GetGlobalSymbol()->getType()));
     M.AddUAV(std::move(b));
   }
   for (auto && C : H.GetSRVs()) {
@@ -172,8 +172,8 @@ void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, bool HasDebugInfo) {
     InitResource(C.get(), b.get());
     if (HasDebugInfo)
       LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
-    else
-      b->SetGlobalSymbol(UndefValue::get(b->GetGlobalSymbol()->getType()));
+
+    b->SetGlobalSymbol(UndefValue::get(b->GetGlobalSymbol()->getType()));
     M.AddSRV(std::move(b));
   }
   for (auto && C : H.GetSamplers()) {
@@ -182,8 +182,8 @@ void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, bool HasDebugInfo) {
     b->SetSamplerKind(C->GetSamplerKind());
     if (HasDebugInfo)
       LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
-    else
-      b->SetGlobalSymbol(UndefValue::get(b->GetGlobalSymbol()->getType()));
+
+    b->SetGlobalSymbol(UndefValue::get(b->GetGlobalSymbol()->getType()));
     M.AddSampler(std::move(b));
   }
 
diff --git a/lib/HLSL/DxilMetadataHelper.cpp b/lib/HLSL/DxilMetadataHelper.cpp
index 4ebc3c291..b88bb9185 100644
--- a/lib/HLSL/DxilMetadataHelper.cpp
+++ b/lib/HLSL/DxilMetadataHelper.cpp
@@ -17,7 +17,6 @@
 #include "dxc/HLSL/DxilSignature.h"
 #include "dxc/HLSL/DxilTypeSystem.h"
 #include "dxc/HLSL/DxilRootSignature.h"
-#include "dxc/HLSL/DxilValidation.h"
 
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
@@ -27,6 +26,8 @@
 #include "llvm/Support/raw_ostream.h"
 #include <array>
 
+#include "dxc/Support/WinIncludes.h"
+
 using namespace llvm;
 using std::string;
 using std::vector;
@@ -131,10 +132,10 @@ void DxilMDHelper::LoadDxilShaderModel(const ShaderModel *&pSM) {
   ShaderModelName += "_" + std::to_string(Major) + "_" + std::to_string(Minor);
   pSM = ShaderModel::GetByName(ShaderModelName.c_str());
   if (!pSM->IsValid()) {
-    string ErrorMsg = hlsl::GetValidationRuleText(hlsl::ValidationRule::SmName);
-    size_t offset = ErrorMsg.find("%0");
-    if (offset != string::npos)
-      ErrorMsg.replace(offset, 2, ShaderModelName);
+    char ErrorMsgTxt[40];
+    StringCchPrintfA(ErrorMsgTxt, _countof(ErrorMsgTxt),
+                     "Unknown shader model '%s'", ShaderModelName.c_str());
+    string ErrorMsg(ErrorMsgTxt);
     throw hlsl::Exception(DXC_E_INCORRECT_DXIL_METADATA, ErrorMsg);
   }
 }
@@ -547,15 +548,7 @@ void DxilMDHelper::EmitDxilTypeSystem(DxilTypeSystem &TypeSystem, vector<GlobalV
     // Emit struct type field annotations.
     Metadata *pMD = EmitDxilStructAnnotation(*pA);
 
-    // Declare a global dummy variable.
-    string GVName = string(kDxilTypeSystemHelperVariablePrefix) + std::to_string(GVIdx);
-    GlobalVariable *pGV = new GlobalVariable(*m_pModule, pStructType, true, GlobalValue::ExternalLinkage, 
-                                             nullptr, GVName, nullptr,
-                                             GlobalVariable::NotThreadLocal, DXIL::kDeviceMemoryAddrSpace);
-    // Mark GV as being used for LLVM.
-    LLVMUsed.emplace_back(pGV);
-
-    MDVals.push_back(ValueAsMetadata::get(pGV));
+    MDVals.push_back(ValueAsMetadata::get(UndefValue::get(pStructType)));
     MDVals.push_back(pMD);
   }
 
@@ -596,11 +589,11 @@ void DxilMDHelper::LoadDxilTypeSystemNode(const llvm::MDTuple &MDT,
     IFTBOOL((MDT.getNumOperands() & 0x1) == 1, DXC_E_INCORRECT_DXIL_METADATA);
 
     for (unsigned i = 1; i < MDT.getNumOperands(); i += 2) {
-      GlobalVariable *pGV =
-          dyn_cast<GlobalVariable>(ValueMDToValue(MDT.getOperand(i)));
+      Constant *pGV =
+          dyn_cast<Constant>(ValueMDToValue(MDT.getOperand(i)));
       IFTBOOL(pGV != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
       StructType *pGVType =
-          dyn_cast<StructType>(pGV->getType()->getPointerElementType());
+          dyn_cast<StructType>(pGV->getType());
       IFTBOOL(pGVType != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
 
       DxilStructAnnotation *pSA = TypeSystem.AddStructAnnotation(pGVType);
diff --git a/lib/HLSL/DxilModule.cpp b/lib/HLSL/DxilModule.cpp
index e39c31396..ffe00f198 100644
--- a/lib/HLSL/DxilModule.cpp
+++ b/lib/HLSL/DxilModule.cpp
@@ -66,7 +66,7 @@ DxilModule::DxilModule(Module *pModule)
 
   m_NumThreads[0] = m_NumThreads[1] = m_NumThreads[2] = 0;
 
-#ifdef _DEBUG
+#if defined(_DEBUG) || defined(DBG)
   // Pin LLVM dump methods.
   void (__thiscall Module::*pfnModuleDump)() const = &Module::dump;
   void (__thiscall Type::*pfnTypeDump)() const = &Type::dump;
diff --git a/lib/HLSL/DxilOperations.cpp b/lib/HLSL/DxilOperations.cpp
index ba3a69c67..9bbebe4f6 100644
--- a/lib/HLSL/DxilOperations.cpp
+++ b/lib/HLSL/DxilOperations.cpp
@@ -63,6 +63,7 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
   {  OC::Atan,                    "Atan",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
   {  OC::Hcos,                    "Hcos",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
   {  OC::Hsin,                    "Hsin",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Htan,                    "Htan",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
   {  OC::Exp,                     "Exp",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
   {  OC::Frc,                     "Frc",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
   {  OC::Log,                     "Log",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
@@ -148,10 +149,6 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
   {  OC::TextureGather,           "TextureGather",            OCC::TextureGather,            "textureGather",              false, false,  true, false, false, false, false,  true, false, Attribute::ReadOnly, },
   {  OC::TextureGatherCmp,        "TextureGatherCmp",         OCC::TextureGatherCmp,         "textureGatherCmp",           false, false,  true, false, false, false, false,  true, false, Attribute::ReadOnly, },
 
-  //                                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::ToDelete5,               "ToDelete5",                OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::ToDelete6,               "ToDelete6",                OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
-
   // Resources - sample                                                                                                     void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
   {  OC::Texture2DMSGetSamplePosition, "Texture2DMSGetSamplePosition", OCC::Texture2DMSGetSamplePosition, "texture2DMSGetSamplePosition",   true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
   {  OC::RenderTargetGetSamplePosition, "RenderTargetGetSamplePosition", OCC::RenderTargetGetSamplePosition, "renderTargetGetSamplePosition",   true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
@@ -172,6 +169,9 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
   {  OC::EvalSnapped,             "EvalSnapped",              OCC::EvalSnapped,              "evalSnapped",                false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
   {  OC::EvalSampleIndex,         "EvalSampleIndex",          OCC::EvalSampleIndex,          "evalSampleIndex",            false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
   {  OC::EvalCentroid,            "EvalCentroid",             OCC::EvalCentroid,             "evalCentroid",               false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::SampleIndex,             "SampleIndex",              OCC::SampleIndex,              "sampleIndex",                false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
+  {  OC::Coverage,                "Coverage",                 OCC::Coverage,                 "coverage",                   false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
+  {  OC::InnerCoverage,           "InnerCoverage",            OCC::InnerCoverage,            "innerCoverage",              false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
 
   // Compute shader                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
   {  OC::ThreadId,                "ThreadId",                 OCC::ThreadId,                 "threadId",                   false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
@@ -183,21 +183,12 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
   {  OC::EmitStream,              "EmitStream",               OCC::EmitStream,               "emitStream",                  true, false, false, false, false, false, false, false, false, Attribute::None,     },
   {  OC::CutStream,               "CutStream",                OCC::CutStream,                "cutStream",                   true, false, false, false, false, false, false, false, false, Attribute::None,     },
   {  OC::EmitThenCutStream,       "EmitThenCutStream",        OCC::EmitThenCutStream,        "emitThenCutStream",           true, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::GSInstanceID,            "GSInstanceID",             OCC::GSInstanceID,             "gsInstanceID",               false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
 
   // Double precision                                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
   {  OC::MakeDouble,              "MakeDouble",               OCC::MakeDouble,               "makeDouble",                 false, false, false,  true, false, false, false, false, false, Attribute::ReadNone, },
-
-  //                                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::ToDelete1,               "ToDelete1",                OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::ToDelete2,               "ToDelete2",                OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
-
-  // Double precision                                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
   {  OC::SplitDouble,             "SplitDouble",              OCC::SplitDouble,              "splitDouble",                false, false, false,  true, false, false, false, false, false, Attribute::ReadNone, },
 
-  //                                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::ToDelete3,               "ToDelete3",                OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::ToDelete4,               "ToDelete4",                OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
-
   // Domain and hull shader                                                                                                 void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
   {  OC::LoadOutputControlPoint,  "LoadOutputControlPoint",   OCC::LoadOutputControlPoint,   "loadOutputControlPoint",     false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadNone, },
   {  OC::LoadPatchConstant,       "LoadPatchConstant",        OCC::LoadPatchConstant,        "loadPatchConstant",          false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadNone, },
@@ -213,15 +204,10 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
   // Other                                                                                                                  void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
   {  OC::CycleCounterLegacy,      "CycleCounterLegacy",       OCC::CycleCounterLegacy,       "cycleCounterLegacy",          true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
 
-  // Unary float                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::Htan,                    "Htan",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-
   // Wave                                                                                                                   void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::WaveCaptureReserved,     "WaveCaptureReserved",      OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
   {  OC::WaveIsFirstLane,         "WaveIsFirstLane",          OCC::WaveIsFirstLane,          "waveIsFirstLane",             true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
   {  OC::WaveGetLaneIndex,        "WaveGetLaneIndex",         OCC::WaveGetLaneIndex,         "waveGetLaneIndex",            true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
   {  OC::WaveGetLaneCount,        "WaveGetLaneCount",         OCC::WaveGetLaneCount,         "waveGetLaneCount",            true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
-  {  OC::WaveIsHelperLaneReserved, "WaveIsHelperLaneReserved", OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
   {  OC::WaveAnyTrue,             "WaveAnyTrue",              OCC::WaveAnyTrue,              "waveAnyTrue",                 true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
   {  OC::WaveAllTrue,             "WaveAllTrue",              OCC::WaveAllTrue,              "waveAllTrue",                 true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
   {  OC::WaveActiveAllEqual,      "WaveActiveAllEqual",       OCC::WaveActiveAllEqual,       "waveActiveAllEqual",         false,  true,  true,  true,  true,  true,  true,  true,  true, Attribute::ReadOnly, },
@@ -231,12 +217,6 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
   {  OC::WaveActiveOp,            "WaveActiveOp",             OCC::WaveActiveOp,             "waveActiveOp",               false,  true,  true,  true,  true,  true,  true,  true,  true, Attribute::ReadOnly, },
   {  OC::WaveActiveBit,           "WaveActiveBit",            OCC::WaveActiveBit,            "waveActiveBit",              false, false, false, false, false,  true,  true,  true,  true, Attribute::ReadOnly, },
   {  OC::WavePrefixOp,            "WavePrefixOp",             OCC::WavePrefixOp,             "wavePrefixOp",               false,  true,  true,  true, false,  true,  true,  true,  true, Attribute::ReadOnly, },
-  {  OC::WaveGetOrderedIndex,     "WaveGetOrderedIndex",      OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
-
-  //                                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::GlobalOrderedCountIncReserved, "GlobalOrderedCountIncReserved", OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
-
-  // Wave                                                                                                                   void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
   {  OC::QuadReadLaneAt,          "QuadReadLaneAt",           OCC::QuadReadLaneAt,           "quadReadLaneAt",             false,  true,  true,  true,  true,  true,  true,  true,  true, Attribute::ReadOnly, },
   {  OC::QuadOp,                  "QuadOp",                   OCC::QuadOp,                   "quadOp",                     false,  true,  true,  true, false,  true,  true,  true,  true, Attribute::ReadOnly, },
 
@@ -248,9 +228,6 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
   {  OC::BitcastI64toF64,         "BitcastI64toF64",          OCC::BitcastI64toF64,          "bitcastI64toF64",             true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
   {  OC::BitcastF64toI64,         "BitcastF64toI64",          OCC::BitcastF64toI64,          "bitcastF64toI64",             true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
 
-  // GS                                                                                                                     void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::GSInstanceID,            "GSInstanceID",             OCC::GSInstanceID,             "gsInstanceID",               false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-
   // Legacy floating-point                                                                                                  void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
   {  OC::LegacyF32ToF16,          "LegacyF32ToF16",           OCC::LegacyF32ToF16,           "legacyF32ToF16",              true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
   {  OC::LegacyF16ToF32,          "LegacyF16ToF32",           OCC::LegacyF16ToF32,           "legacyF16ToF32",              true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
@@ -263,11 +240,6 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
   // Wave                                                                                                                   void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
   {  OC::WaveAllBitCount,         "WaveAllBitCount",          OCC::WaveAllOp,                "waveAllOp",                   true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
   {  OC::WavePrefixBitCount,      "WavePrefixBitCount",       OCC::WavePrefixOp,             "wavePrefixOp",                true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
-
-  // Pixel shader                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::SampleIndex,             "SampleIndex",              OCC::SampleIndex,              "sampleIndex",                false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::Coverage,                "Coverage",                 OCC::Coverage,                 "coverage",                   false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::InnerCoverage,           "InnerCoverage",            OCC::InnerCoverage,            "innerCoverage",              false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
 };
 // OPCODE-OLOADS:END
 
@@ -379,14 +351,13 @@ bool OP::IsDxilOpWave(OpCode C) {
   unsigned op = (unsigned)C;
   /* <py::lines('OPCODE-WAVE')>hctdb_instrhelp.get_instrs_pred("op", "is_wave")</py>*/
   // OPCODE-WAVE:BEGIN
-  // Instructions: WaveCaptureReserved=114, WaveIsFirstLane=115,
-  // WaveGetLaneIndex=116, WaveGetLaneCount=117, WaveIsHelperLaneReserved=118,
-  // WaveAnyTrue=119, WaveAllTrue=120, WaveActiveAllEqual=121,
-  // WaveActiveBallot=122, WaveReadLaneAt=123, WaveReadLaneFirst=124,
-  // WaveActiveOp=125, WaveActiveBit=126, WavePrefixOp=127,
-  // WaveGetOrderedIndex=128, QuadReadLaneAt=130, QuadOp=131,
-  // WaveAllBitCount=144, WavePrefixBitCount=145
-  return 114 <= op && op <= 128 || 130 <= op && op <= 131 || 144 <= op && op <= 145;
+  // Instructions: WaveIsFirstLane=112, WaveGetLaneIndex=113,
+  // WaveGetLaneCount=114, WaveAnyTrue=115, WaveAllTrue=116,
+  // WaveActiveAllEqual=117, WaveActiveBallot=118, WaveReadLaneAt=119,
+  // WaveReadLaneFirst=120, WaveActiveOp=121, WaveActiveBit=122,
+  // WavePrefixOp=123, QuadReadLaneAt=124, QuadOp=125, WaveAllBitCount=137,
+  // WavePrefixBitCount=138
+  return 112 <= op && op <= 125 || 137 <= op && op <= 138;
   // OPCODE-WAVE:END
 }
 
@@ -394,10 +365,10 @@ bool OP::IsDxilOpGradient(OpCode C) {
   unsigned op = (unsigned)C;
   /* <py::lines('OPCODE-GRADIENT')>hctdb_instrhelp.get_instrs_pred("op", "is_gradient")</py>*/
   // OPCODE-GRADIENT:BEGIN
-  // Instructions: Sample=61, SampleBias=62, SampleCmp=65, TextureGather=74,
-  // TextureGatherCmp=75, CalculateLOD=84, DerivCoarseX=86, DerivCoarseY=87,
-  // DerivFineX=88, DerivFineY=89
-  return 61 <= op && op <= 62 || op == 65 || 74 <= op && op <= 75 || op == 84 || 86 <= op && op <= 89;
+  // Instructions: Sample=62, SampleBias=63, SampleCmp=66, TextureGather=75,
+  // TextureGatherCmp=76, CalculateLOD=83, DerivCoarseX=85, DerivCoarseY=86,
+  // DerivFineX=87, DerivFineY=88
+  return 62 <= op && op <= 63 || op == 66 || 75 <= op && op <= 76 || op == 83 || 85 <= op && op <= 88;
   // OPCODE-GRADIENT:END
 }
 
@@ -514,6 +485,7 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
   case OpCode::Atan:                   A(pETy);     A(pI32); A(pETy); break;
   case OpCode::Hcos:                   A(pETy);     A(pI32); A(pETy); break;
   case OpCode::Hsin:                   A(pETy);     A(pI32); A(pETy); break;
+  case OpCode::Htan:                   A(pETy);     A(pI32); A(pETy); break;
   case OpCode::Exp:                    A(pETy);     A(pI32); A(pETy); break;
   case OpCode::Frc:                    A(pETy);     A(pI32); A(pETy); break;
   case OpCode::Log:                    A(pETy);     A(pI32); A(pETy); break;
@@ -599,10 +571,6 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
   case OpCode::TextureGather:          RRT(pETy);   A(pI32); A(pRes); A(pRes); A(pF32); A(pF32); A(pF32); A(pF32); A(pI32); A(pI32); A(pI32); break;
   case OpCode::TextureGatherCmp:       RRT(pETy);   A(pI32); A(pRes); A(pRes); A(pF32); A(pF32); A(pF32); A(pF32); A(pI32); A(pI32); A(pI32); A(pF32); break;
 
-    // 
-  case OpCode::ToDelete5:              A(pV);       A(pI32); break;
-  case OpCode::ToDelete6:              A(pV);       A(pI32); break;
-
     // Resources - sample
   case OpCode::Texture2DMSGetSamplePosition:A(pPos);     A(pI32); A(pRes); A(pI32); break;
   case OpCode::RenderTargetGetSamplePosition:A(pPos);     A(pI32); A(pI32); break;
@@ -623,6 +591,9 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
   case OpCode::EvalSnapped:            A(pETy);     A(pI32); A(pI32); A(pI32); A(pI8);  A(pI32); A(pI32); break;
   case OpCode::EvalSampleIndex:        A(pETy);     A(pI32); A(pI32); A(pI32); A(pI8);  A(pI32); break;
   case OpCode::EvalCentroid:           A(pETy);     A(pI32); A(pI32); A(pI32); A(pI8);  break;
+  case OpCode::SampleIndex:            A(pI32);     A(pI32); break;
+  case OpCode::Coverage:               A(pI32);     A(pI32); break;
+  case OpCode::InnerCoverage:          A(pI32);     A(pI32); break;
 
     // Compute shader
   case OpCode::ThreadId:               A(pI32);     A(pI32); A(pI32); break;
@@ -634,21 +605,12 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
   case OpCode::EmitStream:             A(pV);       A(pI32); A(pI8);  break;
   case OpCode::CutStream:              A(pV);       A(pI32); A(pI8);  break;
   case OpCode::EmitThenCutStream:      A(pV);       A(pI32); A(pI8);  break;
+  case OpCode::GSInstanceID:           A(pI32);     A(pI32); break;
 
     // Double precision
   case OpCode::MakeDouble:             A(pF64);     A(pI32); A(pI32); A(pI32); break;
-
-    // 
-  case OpCode::ToDelete1:              A(pV);       A(pI32); break;
-  case OpCode::ToDelete2:              A(pV);       A(pI32); break;
-
-    // Double precision
   case OpCode::SplitDouble:            A(pSDT);     A(pI32); A(pF64); break;
 
-    // 
-  case OpCode::ToDelete3:              A(pV);       A(pI32); break;
-  case OpCode::ToDelete4:              A(pV);       A(pI32); break;
-
     // Domain and hull shader
   case OpCode::LoadOutputControlPoint: A(pETy);     A(pI32); A(pI32); A(pI32); A(pI8);  A(pI32); break;
   case OpCode::LoadPatchConstant:      A(pETy);     A(pI32); A(pI32); A(pI32); A(pI8);  break;
@@ -664,15 +626,10 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
     // Other
   case OpCode::CycleCounterLegacy:     A(p2I32);    A(pI32); break;
 
-    // Unary float
-  case OpCode::Htan:                   A(pETy);     A(pI32); A(pETy); break;
-
     // Wave
-  case OpCode::WaveCaptureReserved:    A(pV);       A(pI32); break;
   case OpCode::WaveIsFirstLane:        A(pI1);      A(pI32); break;
   case OpCode::WaveGetLaneIndex:       A(pI32);     A(pI32); break;
   case OpCode::WaveGetLaneCount:       A(pI32);     A(pI32); break;
-  case OpCode::WaveIsHelperLaneReserved:A(pV);       A(pI32); break;
   case OpCode::WaveAnyTrue:            A(pI1);      A(pI32); A(pI1);  break;
   case OpCode::WaveAllTrue:            A(pI1);      A(pI32); A(pI1);  break;
   case OpCode::WaveActiveAllEqual:     A(pI1);      A(pI32); A(pETy); break;
@@ -682,12 +639,6 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
   case OpCode::WaveActiveOp:           A(pETy);     A(pI32); A(pETy); A(pI8);  A(pI8);  break;
   case OpCode::WaveActiveBit:          A(pETy);     A(pI32); A(pETy); A(pI8);  break;
   case OpCode::WavePrefixOp:           A(pETy);     A(pI32); A(pETy); A(pI8);  A(pI8);  break;
-  case OpCode::WaveGetOrderedIndex:    A(pV);       A(pI32); break;
-
-    // 
-  case OpCode::GlobalOrderedCountIncReserved:A(pV);       A(pI32); break;
-
-    // Wave
   case OpCode::QuadReadLaneAt:         A(pETy);     A(pI32); A(pETy); A(pI32); break;
   case OpCode::QuadOp:                 A(pETy);     A(pI32); A(pETy); A(pI8);  break;
 
@@ -699,9 +650,6 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
   case OpCode::BitcastI64toF64:        A(pF64);     A(pI32); A(pI64); break;
   case OpCode::BitcastF64toI64:        A(pI64);     A(pI32); A(pF64); break;
 
-    // GS
-  case OpCode::GSInstanceID:           A(pI32);     A(pI32); break;
-
     // Legacy floating-point
   case OpCode::LegacyF32ToF16:         A(pI32);     A(pI32); A(pF32); break;
   case OpCode::LegacyF16ToF32:         A(pF32);     A(pI32); A(pI32); break;
@@ -714,11 +662,6 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
     // Wave
   case OpCode::WaveAllBitCount:        A(pI32);     A(pI32); A(pI1);  break;
   case OpCode::WavePrefixBitCount:     A(pI32);     A(pI32); A(pI1);  break;
-
-    // Pixel shader
-  case OpCode::SampleIndex:            A(pI32);     A(pI32); break;
-  case OpCode::Coverage:               A(pI32);     A(pI32); break;
-  case OpCode::InnerCoverage:          A(pI32);     A(pI32); break;
   // OPCODE-OLOAD-FUNCS:END
   default: DXASSERT(false, "otherwise unhandled case"); break;
   }
diff --git a/lib/HLSL/DxilValidation.cpp b/lib/HLSL/DxilValidation.cpp
index 682ed14b0..867cbfa9b 100644
--- a/lib/HLSL/DxilValidation.cpp
+++ b/lib/HLSL/DxilValidation.cpp
@@ -162,6 +162,7 @@ const char *hlsl::GetValidationRuleText(ValidationRule value) {
     case hlsl::ValidationRule::TypesDefined: return "Type '%0' is not defined on DXIL primitives";
     case hlsl::ValidationRule::TypesIntWidth: return "Int type '%0' has an invalid width";
     case hlsl::ValidationRule::TypesNoMultiDim: return "Only one dimension allowed for array type";
+    case hlsl::ValidationRule::TypesI8: return "I8 can only used as immediate value for intrinsic";
     case hlsl::ValidationRule::SmName: return "Unknown shader model '%0'";
     case hlsl::ValidationRule::SmOpcode: return "Opcode must be defined in target shader model";
     case hlsl::ValidationRule::SmOperand: return "Operand must be defined in target shader model";
@@ -486,32 +487,32 @@ static bool ValidateOpcodeInProfile(DXIL::OpCode opcode,
   unsigned op = (unsigned)opcode;
   /* <py::lines('VALOPCODESM-TEXT')>hctdb_instrhelp.get_valopcode_sm_text()</py>*/
   // VALOPCODESM-TEXT:BEGIN
-  // Instructions: ThreadId=93, GroupId=94, ThreadIdInGroup=95,
-  // FlattenedThreadIdInGroup=96
-  if (93 <= op && op <= 96)
+  // Instructions: ThreadId=95, GroupId=96, ThreadIdInGroup=97,
+  // FlattenedThreadIdInGroup=98
+  if (95 <= op && op <= 98)
     return pSM->IsCS();
-  // Instructions: DomainLocation=108
-  if (op == 108)
+  // Instructions: DomainLocation=107
+  if (op == 107)
     return pSM->IsDS();
-  // Instructions: LoadOutputControlPoint=106, LoadPatchConstant=107
-  if (106 <= op && op <= 107)
+  // Instructions: LoadOutputControlPoint=105, LoadPatchConstant=106
+  if (105 <= op && op <= 106)
     return pSM->IsDS() || pSM->IsHS();
-  // Instructions: EmitStream=97, CutStream=98, EmitThenCutStream=99,
-  // GSInstanceID=138
-  if (97 <= op && op <= 99 || op == 138)
+  // Instructions: EmitStream=99, CutStream=100, EmitThenCutStream=101,
+  // GSInstanceID=102
+  if (99 <= op && op <= 102)
     return pSM->IsGS();
-  // Instructions: PrimitiveID=111
-  if (op == 111)
+  // Instructions: PrimitiveID=110
+  if (op == 110)
     return pSM->IsGS() || pSM->IsDS() || pSM->IsHS() || pSM->IsPS();
-  // Instructions: StorePatchConstant=109, OutputControlPointID=110
-  if (109 <= op && op <= 110)
+  // Instructions: StorePatchConstant=108, OutputControlPointID=109
+  if (108 <= op && op <= 109)
     return pSM->IsHS();
-  // Instructions: Sample=61, SampleBias=62, SampleCmp=65, SampleCmpLevelZero=66,
-  // RenderTargetGetSamplePosition=79, RenderTargetGetSampleCount=80,
-  // CalculateLOD=84, Discard=85, DerivCoarseX=86, DerivCoarseY=87,
-  // DerivFineX=88, DerivFineY=89, EvalSnapped=90, EvalSampleIndex=91,
-  // EvalCentroid=92, SampleIndex=146, Coverage=147, InnerCoverage=148
-  if (61 <= op && op <= 62 || 65 <= op && op <= 66 || 79 <= op && op <= 80 || 84 <= op && op <= 92 || 146 <= op && op <= 148)
+  // Instructions: Sample=62, SampleBias=63, SampleCmp=66, SampleCmpLevelZero=67,
+  // RenderTargetGetSamplePosition=78, RenderTargetGetSampleCount=79,
+  // CalculateLOD=83, Discard=84, DerivCoarseX=85, DerivCoarseY=86,
+  // DerivFineX=87, DerivFineY=88, EvalSnapped=89, EvalSampleIndex=90,
+  // EvalCentroid=91, SampleIndex=92, Coverage=93, InnerCoverage=94
+  if (62 <= op && op <= 63 || 66 <= op && op <= 67 || 78 <= op && op <= 79 || 83 <= op && op <= 94)
     return pSM->IsPS();
   return true;
   // VALOPCODESM-TEXT:END
@@ -572,7 +573,8 @@ static DxilSignatureElement *ValidateSignatureAccess(Instruction *I, DxilSignatu
   if (isOutput && SE.GetSemantic()->GetKind() == DXIL::SemanticKind::Position) {
     unsigned mask = ValCtx.OutputPositionMask[SE.GetOutputStream()];
     mask |= 1<<col;
-    ValCtx.OutputPositionMask[SE.GetOutputStream()] = mask;
+    if (SE.GetOutputStream() < DXIL::kNumOutputStreams)
+      ValCtx.OutputPositionMask[SE.GetOutputStream()] = mask;
   }
   return &SE;
 }
@@ -923,6 +925,7 @@ static void ValidateGather(CallInst *CI, Value *srvHandle, Value *samplerHandle,
 
   if (resClass != DXIL::ResourceClass::SRV) {
     ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForSamplerGather);
+    return;
   }
 
   // Coord match resource kind.
@@ -1316,6 +1319,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
     if (resClass != DXIL::ResourceClass::SRV) {
       ValCtx.EmitInstrError(CI,
                             ValidationRule::InstrResourceClassForSamplerGather);
+      return;
     }
     // Coord match resource.
     ValidateCalcLODResourceDimensionCoord(
@@ -1886,11 +1890,6 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) {
 
     DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode;
 
-    if (OP::GetOpCodeClass(dxilOpcode) == DXIL::OpCodeClass::Reserved) {
-      // Diagnosed in body validation.
-      continue;
-    }
-
     // In some cases, no overloads are provided (void is exclusive to others)
     Function *dxilFunc;
     if (hlslOP->IsOverloadLegal(dxilOpcode, voidTy)) {
@@ -2336,10 +2335,6 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
 
           unsigned opcode = OpcodeConst->getLimitedValue();
           DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode;
-          if (OP::GetOpCodeClass(dxilOpcode) == DXIL::OpCodeClass::Reserved) {
-            ValCtx.EmitInstrError(&I, ValidationRule::InstrOpCodeReserved);
-            continue;
-          }
 
           if (OP::IsDxilOpGradient(dxilOpcode)) {
             gradientOps.push_back(CI);
@@ -2355,20 +2350,34 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
         continue;
       }
 
-      if (!isa<PHINode>(&I)) {
-        for (Value *op : I.operands()) {
-          if (isa<UndefValue>(op)) {
-            ValCtx.EmitInstrError(&I,
-                                  ValidationRule::InstrNoReadingUninitialized);
-          } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(op)) {
-            for (Value *opCE : CE->operands()) {
-              if (isa<UndefValue>(opCE)) {
-                ValCtx.EmitInstrError(
-                    &I, ValidationRule::InstrNoReadingUninitialized);
-              }
+      for (Value *op : I.operands()) {
+        if (!isa<PHINode>(&I) && isa<UndefValue>(op)) {
+          ValCtx.EmitInstrError(&I,
+                                ValidationRule::InstrNoReadingUninitialized);
+        } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(op)) {
+          for (Value *opCE : CE->operands()) {
+            if (isa<UndefValue>(opCE)) {
+              ValCtx.EmitInstrError(
+                  &I, ValidationRule::InstrNoReadingUninitialized);
             }
           }
         }
+        if (IntegerType *IT = dyn_cast<IntegerType>(op->getType())) {
+          if (IT->getBitWidth() == 8) {
+            ValCtx.EmitInstrError(&I, ValidationRule::TypesI8);
+          }
+        }
+      }
+
+      Type *Ty = I.getType();
+      if (isa<PointerType>(Ty))
+        Ty = Ty->getPointerElementType();
+      while (isa<ArrayType>(Ty))
+        Ty = Ty->getArrayElementType();
+      if (IntegerType *IT = dyn_cast<IntegerType>(Ty)) {
+        if (IT->getBitWidth() == 8) {
+          ValCtx.EmitInstrError(&I, ValidationRule::TypesI8);
+        }
       }
 
       unsigned opcode = I.getOpcode();
diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp
index ab782cfad..0f015c4b9 100644
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@@ -4265,10 +4265,9 @@ Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
   Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);
 
   Type *i1Ty = Type::getInt1Ty(EltTy->getContext());
-  Type *i8Ty = Type::getInt8Ty(EltTy->getContext());
   Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
   Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
-  bool isBool = EltTy == i1Ty || EltTy == i8Ty;
+  bool isBool = EltTy == i1Ty;
   bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
   bool isNormal = !isBool && !is64;
   if (isNormal) {
@@ -4283,14 +4282,12 @@ Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
     Value *Result = Builder.CreateExtractValue(loadLegacy, eltIdx);
     return Result;
   } else {
+    DXASSERT(isBool, "bool should be i1");
     Type *i32Ty = Type::getInt32Ty(EltTy->getContext());
     Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, i32Ty);
     Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
     Value *Result = Builder.CreateExtractValue(loadLegacy, channelOffset);
-    if (EltTy == i1Ty)
-      return Builder.CreateICmpEQ(Result, hlslOP->GetU32Const(0));
-    else
-      return Builder.CreateTrunc(Result, i8Ty);
+    return Builder.CreateICmpEQ(Result, hlslOP->GetU32Const(0));
   }
 }
 
@@ -4302,10 +4299,9 @@ Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
   Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);
 
   Type *i1Ty = Type::getInt1Ty(EltTy->getContext());
-  Type *i8Ty = Type::getInt8Ty(EltTy->getContext());
   Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
   Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
-  bool isBool = EltTy == i1Ty || EltTy == i8Ty;
+  bool isBool = EltTy == i1Ty;
   bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
   bool isNormal = !isBool && !is64;
   if (isNormal) {
@@ -4340,6 +4336,7 @@ Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
     }
     return Result;
   } else {
+    DXASSERT(isBool, "bool should be i1");
     Type *i32Ty = Type::getInt32Ty(EltTy->getContext());
     Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, i32Ty);
     Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
@@ -4348,10 +4345,7 @@ Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
       Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset+i);
       Result = Builder.CreateInsertElement(Result, NewElt, i);
     }
-    if (EltTy == i1Ty)
-      return Builder.CreateICmpEQ(Result, ConstantAggregateZero::get(Result->getType()));
-    else
-      return Builder.CreateTrunc(Result, VectorType::get(i8Ty, vecSize));
+    return Builder.CreateICmpEQ(Result, ConstantAggregateZero::get(Result->getType()));
   }
 }
 
@@ -5723,7 +5717,11 @@ void TranslateHLBuiltinOperation(Function *F, HLOperationLowerHelper &helper,
   }
 }
 
-static void TranslateHLExtension(Function *F, HLSLExtensionsCodegenHelper *helper) {
+typedef std::unordered_map<llvm::Instruction *, llvm::Value *> HandleMap;
+static void TranslateHLExtension(Function *F,
+                                 HLSLExtensionsCodegenHelper *helper,
+                                 const HandleMap &handleMap,
+                                 OP& hlslOp) {
   // Find all calls to the function F.
   // Store the calls in a vector for now to be replaced the loop below.
   // We use a two step "find then replace" to avoid removing uses while
@@ -5737,7 +5735,7 @@ static void TranslateHLExtension(Function *F, HLSLExtensionsCodegenHelper *helpe
 
   // Get the lowering strategy to use for this intrinsic.
   llvm::StringRef LowerStrategy = GetHLLowerStrategy(F);
-  ExtensionLowering lower(LowerStrategy, helper);
+  ExtensionLowering lower(LowerStrategy, helper, handleMap, hlslOp);
 
   // Replace all calls that were successfully translated.
   for (CallInst *CI : CallsToReplace) {
@@ -5773,8 +5771,7 @@ void TranslateBuiltinOperations(
       continue;
     }
     if (group == HLOpcodeGroup::HLExtIntrinsic) {
-      // TODO: consider handling extensions to object methods
-      TranslateHLExtension(F, extCodegenHelper);
+      TranslateHLExtension(F, extCodegenHelper, handleMap, helper.hlslOP);
       continue;
     }
     TranslateHLBuiltinOperation(F, helper, group, &objHelper);
diff --git a/lib/HLSL/HLOperationLowerExtension.cpp b/lib/HLSL/HLOperationLowerExtension.cpp
index 8250ef243..d36fe137c 100644
--- a/lib/HLSL/HLOperationLowerExtension.cpp
+++ b/lib/HLSL/HLOperationLowerExtension.cpp
@@ -34,6 +34,7 @@ ExtensionLowering::Strategy ExtensionLowering::GetStrategy(StringRef strategy) {
     case 'n': return Strategy::NoTranslation;
     case 'r': return Strategy::Replicate;
     case 'p': return Strategy::Pack;
+    case 'm': return Strategy::Resource;
     default: break;
   }
   return Strategy::Unknown;
@@ -44,17 +45,18 @@ llvm::StringRef ExtensionLowering::GetStrategyName(Strategy strategy) {
     case Strategy::NoTranslation: return "n";
     case Strategy::Replicate:     return "r";
     case Strategy::Pack:          return "p";
+    case Strategy::Resource:      return "m"; // m for resource method
     default: break;
   }
   return "?";
 }
 
-ExtensionLowering::ExtensionLowering(Strategy strategy, HLSLExtensionsCodegenHelper *helper) 
-  : m_strategy(strategy), m_helper(helper)
+ExtensionLowering::ExtensionLowering(Strategy strategy, HLSLExtensionsCodegenHelper *helper, const HandleMap &handleMap, OP& hlslOp)
+  : m_strategy(strategy), m_helper(helper), m_handleMap(handleMap), m_hlslOp(hlslOp)
   {}
 
-ExtensionLowering::ExtensionLowering(StringRef strategy, HLSLExtensionsCodegenHelper *helper) 
-  : ExtensionLowering(GetStrategy(strategy), helper)
+ExtensionLowering::ExtensionLowering(StringRef strategy, HLSLExtensionsCodegenHelper *helper, const HandleMap &handleMap, OP& hlslOp)
+  : ExtensionLowering(GetStrategy(strategy), helper, handleMap, hlslOp)
   {}
 
 llvm::Value *ExtensionLowering::Translate(llvm::CallInst *CI) {
@@ -62,6 +64,7 @@ llvm::Value *ExtensionLowering::Translate(llvm::CallInst *CI) {
   case Strategy::NoTranslation: return NoTranslation(CI);
   case Strategy::Replicate:     return Replicate(CI);
   case Strategy::Pack:          return Pack(CI);
+  case Strategy::Resource:      return Resource(CI);
   default: break;
   }
   return Unknown(CI);
@@ -75,8 +78,17 @@ llvm::Value *ExtensionLowering::Unknown(CallInst *CI) {
 // Interface to describe how to translate types from HL-dxil to dxil.
 class FunctionTypeTranslator {
 public:
+  // Arguments can be exploded into multiple copies of the same type.
+  // For example a <2 x i32> could become { i32, 2 } if the vector
+  // is expanded in place or { i32, 1 } if the call is replicated.
+  struct ArgumentType {
+    Type *type;
+    int  count;
+
+    ArgumentType(Type *ty, int cnt = 1) : type(ty), count(cnt) {}
+  };
   virtual Type *TranslateReturnType(CallInst *CI) = 0;
-  virtual Type *TranslateArgumentType(Type *OrigArgType) = 0;
+  virtual ArgumentType TranslateArgumentType(Value *OrigArg) = 0;
 };
 
 // Class to create the new function with the translated types for low-level dxil.
@@ -85,6 +97,10 @@ public:
   template <typename TypeTranslator>
   static Function *GetLoweredFunction(CallInst *CI, ExtensionLowering &lower) {
     TypeTranslator typeTranslator;
+    return GetLoweredFunction(typeTranslator, CI, lower);
+  }
+  
+  static Function *GetLoweredFunction(FunctionTypeTranslator &typeTranslator, CallInst *CI, ExtensionLowering &lower) {
     FunctionTranslator translator(typeTranslator, lower);
     return translator.GetLoweredFunction(CI);
   }
@@ -120,9 +136,11 @@ private:
     SmallVector<Type *, 10> ParamTypes;
     ParamTypes.reserve(CI->getNumArgOperands());
     for (unsigned i = 0; i < CI->getNumArgOperands(); ++i) {
-      Type *OrigTy = CI->getArgOperand(i)->getType();
-      Type *TranslatedTy = m_typeTranslator.TranslateArgumentType(OrigTy);
-      ParamTypes.push_back(TranslatedTy);
+      Value *OrigArg = CI->getArgOperand(i);
+      FunctionTypeTranslator::ArgumentType newArgType = m_typeTranslator.TranslateArgumentType(OrigArg);
+      for (int i = 0; i < newArgType.count; ++i) {
+        ParamTypes.push_back(newArgType.type);
+      }
     }
 
     const bool IsVarArg = false;
@@ -151,8 +169,8 @@ class NoTranslationTypeTranslator : public FunctionTypeTranslator {
   virtual Type *TranslateReturnType(CallInst *CI) override {
     return CI->getType();
   }
-  virtual Type *TranslateArgumentType(Type *OrigArgType) override {
-    return OrigArgType;
+  virtual ArgumentType TranslateArgumentType(Value *OrigArg) override {
+    return ArgumentType(OrigArg->getType());
   }
 };
 
@@ -212,13 +230,13 @@ class ReplicatedFunctionTypeTranslator : public FunctionTypeTranslator {
     return RetTy;
   }
 
-  virtual Type *TranslateArgumentType(Type *OrigArgType) override {
-    Type *Ty = OrigArgType;
+  virtual ArgumentType TranslateArgumentType(Value *OrigArg) override {
+    Type *Ty = OrigArg->getType();
     if (Ty->isVectorTy()) {
       Ty = Ty->getVectorElementType();
     }
 
-    return Ty;
+    return ArgumentType(Ty);
   }
 
 };
@@ -404,8 +422,8 @@ class PackedFunctionTypeTranslator : public FunctionTypeTranslator {
   virtual Type *TranslateReturnType(CallInst *CI) override {
     return TranslateIfVector(CI->getType());
   }
-  virtual Type *TranslateArgumentType(Type *OrigArgType) override {
-    return TranslateIfVector(OrigArgType);
+  virtual ArgumentType TranslateArgumentType(Value *OrigArg) override {
+    return ArgumentType(TranslateIfVector(OrigArg->getType()));
   }
 
   Type *TranslateIfVector(Type *ty) {
@@ -425,6 +443,191 @@ Value *ExtensionLowering::Pack(CallInst *CI) {
   return result;
 }
 
+///////////////////////////////////////////////////////////////////////////////
+// Resource Lowering.
+
+// Modify a call to a resouce method. Makes the following transformation:
+//
+// 1. Convert non-void return value to dx.types.ResRet.
+// 2. Convert resource parameters to the corresponding dx.types.Handle value.
+// 3. Expand vectors in place as separate arguments.
+//
+// Example
+// -----------------------------------------------------------------------------
+//
+//  %0 = call <2 x float> MyBufferOp(i32 138, %class.Buffer %3, <2 x i32> <1 , 2> )
+//  %r = call %dx.types.ResRet.f32 MyBufferOp(i32 138, %dx.types.Handle %buf, i32 1, i32 2 )
+//  %x = extractvalue %r, 0
+//  %y = extractvalue %r, 1
+//  %v = <2 x float> undef
+//  %v.1 = insertelement %v,   %x, 0
+//  %v.2 = insertelement %v.1, %y, 1
+class ResourceMethodCall {
+public:
+  ResourceMethodCall(CallInst *CI, Function &explodedFunction, const ExtensionLowering::HandleMap &handleMap)
+    : m_CI(CI)
+    , m_explodedFunction(explodedFunction)
+    , m_handleMap(handleMap)
+    , m_builder(CI)
+  { }
+
+  Value *Generate() {
+    SmallVector<Value *, 16> args;
+    ExplodeArgs(args);
+    Value *result = CreateCall(args);
+    result = ConvertResult(result);
+    return result;
+  }
+  
+  // Check to see if the value is mapped to a handle in the handleMap.
+  static Instruction *IsResourceHandle(Value *OrigArg, const ExtensionLowering::HandleMap &handleMap) {
+    if (Instruction *Inst = dyn_cast<Instruction>(OrigArg)) {
+      if (handleMap.count(Inst))
+        return Inst;
+    }
+    return nullptr;
+  }
+  
+private:
+  CallInst *m_CI;
+  Function &m_explodedFunction;
+  const ExtensionLowering::HandleMap &m_handleMap;
+  IRBuilder<> m_builder;
+  
+  Value *GetResourceHandle(Value *OrigArg) {
+    if (Instruction *Inst = IsResourceHandle(OrigArg, m_handleMap))
+      return m_handleMap.at(Inst);
+    return nullptr;
+    
+  }
+
+  void ExplodeArgs(SmallVectorImpl<Value*> &args) {
+    for (Value *arg : m_CI->arg_operands()) {
+      // vector arg: <N x ty> -> ty, ty, ..., ty (N times)
+      if (arg->getType()->isVectorTy()) {
+        for (unsigned i = 0; i < arg->getType()->getVectorNumElements(); i++) {
+          Value *xarg = m_builder.CreateExtractElement(arg, i);
+          args.push_back(xarg);
+        }
+      }
+      // resource handle arg: handle -> dx.types.Handle
+      else if (Value *handle = GetResourceHandle(arg)) {
+        args.push_back(handle);
+      }
+      // any other value: arg -> arg
+      else {
+        args.push_back(arg);
+      }
+    }
+  }
+
+  Value *CreateCall(const SmallVectorImpl<Value*> &args) {
+    return m_builder.CreateCall(&m_explodedFunction, args);
+  }
+
+  Value *ConvertResult(Value *result) {
+    Type *origRetTy = m_CI->getType();
+    if (origRetTy->isVoidTy())
+      return ConvertVoidResult(result);
+    else if (origRetTy->isVectorTy())
+      return ConvertVectorResult(origRetTy, result);
+    else
+      return ConvertScalarResult(origRetTy, result);
+  }
+
+  // Void result does not need any conversion.
+  Value *ConvertVoidResult(Value *result) {
+    return result;
+  }
+
+  // Vector result will be populated with the elements from the resource return.
+  Value *ConvertVectorResult(Type *origRetTy, Value *result) {
+    Type *resourceRetTy = result->getType();
+    assert(origRetTy->isVectorTy());
+    assert(resourceRetTy->isStructTy() && "expected resource return type to be a struct");
+    
+    const unsigned vectorSize = origRetTy->getVectorNumElements();
+    const unsigned structSize = resourceRetTy->getStructNumElements();
+    const unsigned size = std::min(vectorSize, structSize);
+    assert(vectorSize < structSize);
+    
+    // Copy resource struct elements to vector.
+    Value *vector = UndefValue::get(origRetTy);
+    for (unsigned i = 0; i < size; ++i) {
+      Value *element = m_builder.CreateExtractValue(result, { i });
+      vector = m_builder.CreateInsertElement(vector, element, i);
+    }
+
+    return vector;
+  }
+
+  // Scalar result will be populated with the first element of the resource return.
+  Value *ConvertScalarResult(Type *origRetTy, Value *result) {
+    assert(origRetTy->isSingleValueType());
+    return m_builder.CreateExtractValue(result, { 0 });
+  }
+
+};
+
+// Translate function return and argument types for resource method lowering.
+class ResourceFunctionTypeTranslator : public FunctionTypeTranslator {
+public:
+  ResourceFunctionTypeTranslator(const ExtensionLowering::HandleMap &handleMap, OP& hlslOp)
+    : m_handleMap(handleMap)
+    , m_hlslOp(hlslOp)
+  { }
+
+  // Translate return type as follows:
+  //
+  // void     -> void
+  // <N x ty> -> dx.types.ResRet.ty
+  //  ty      -> dx.types.ResRet.ty
+  virtual Type *TranslateReturnType(CallInst *CI) override {
+    Type *RetTy = CI->getType();
+    if (RetTy->isVoidTy())
+      return RetTy;
+    else if (RetTy->isVectorTy())
+      RetTy = RetTy->getVectorElementType();
+
+    return m_hlslOp.GetResRetType(RetTy);
+  }
+  
+  // Translate argument type as follows:
+  //
+  // resource -> dx.types.Handle
+  // <N x ty> -> { ty, N }
+  //  ty      -> { ty, 1 }
+  virtual ArgumentType TranslateArgumentType(Value *OrigArg) override {
+    int count = 1;
+    Type *ty = OrigArg->getType();
+
+    if (ty->isVectorTy()) {
+      count = ty->getVectorNumElements();
+      ty = ty->getVectorElementType();
+    }
+    else if (ResourceMethodCall::IsResourceHandle(OrigArg, m_handleMap)) {
+      ty = m_hlslOp.GetHandleType();
+    }
+
+    return ArgumentType(ty, count);
+  }
+
+private:
+  const ExtensionLowering::HandleMap &m_handleMap;
+  OP& m_hlslOp;
+};
+
+Value *ExtensionLowering::Resource(CallInst *CI) {
+  ResourceFunctionTypeTranslator resourceTypeTranslator(m_handleMap, m_hlslOp);
+  Function *resourceFunction = FunctionTranslator::GetLoweredFunction(resourceTypeTranslator, CI, *this);
+  if (!resourceFunction)
+    return nullptr;
+
+  ResourceMethodCall explode(CI, *resourceFunction, m_handleMap);
+  Value *result = explode.Generate();
+  return result;
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 // Computing Extension Names.
 
diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
index 7a48aa39d..bc181acf3 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
@@ -4069,8 +4069,11 @@ void SROA_Parameter_HLSL::flattenArgument(
                 DXASSERT(data->getType()->isPointerTy(),
                          "Append value must be pointer.");
                 IRBuilder<> Builder(CI);
-                Value *ldInst = Builder.CreateLoad(data);
-                Builder.CreateStore(ldInst, outputVal);
+
+                llvm::SmallVector<llvm::Value *, 16> idxList;
+                SplitCpy(data->getType(), outputVal, data, idxList,
+                         /*bAllowReplace*/ false, Builder);
+
                 CI->setArgOperand(HLOperandIndex::kStreamAppendDataOpIndex, outputVal);
               }
               else {
@@ -4089,9 +4092,13 @@ void SROA_Parameter_HLSL::flattenArgument(
                 DXASSERT_LOCALVAR(eltCount, eltCount == EltPtrList.size(), "invalid element count");
 
                 for (unsigned i = HLOperandIndex::kStreamAppendDataOpIndex; i < CI->getNumArgOperands(); i++) {
-                  Value *Elt = Builder.CreateLoad(CI->getArgOperand(i));
-                  Value *EltPtr = EltPtrList[i-HLOperandIndex::kStreamAppendDataOpIndex];
-                  Builder.CreateStore(Elt, EltPtr);
+                  Value *DataPtr = CI->getArgOperand(i);
+                  Value *EltPtr =
+                      EltPtrList[i - HLOperandIndex::kStreamAppendDataOpIndex];
+
+                  llvm::SmallVector<llvm::Value *, 16> idxList;
+                  SplitCpy(DataPtr->getType(), EltPtr, DataPtr, idxList,
+                           /*bAllowReplace*/ false, Builder);
                   CI->setArgOperand(i, EltPtr);
                 }
               }
@@ -4255,6 +4262,17 @@ static void LegalizeDxilInputOutputs(Function *F, DxilFunctionAnnotation *EntryA
       bNeedTemp = true;
       bLoadOutputFromTemp = true;
       bStoreInputToTemp = true;
+    } else if (bLoad && bStore) {
+      bNeedTemp = true;
+      switch (qual) {
+      case DxilParamInputQual::InputPrimitive:
+      case DxilParamInputQual::InputPatch:
+      case DxilParamInputQual::OutputPatch:
+        bStoreInputToTemp = true;
+        break;
+      default:
+        DXASSERT(0, "invalid input qual here");
+      }
     }
 
     if (HLMatrixLower::IsMatrixType(Ty)) {
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index 8ab85c5bd..68d7c504d 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -344,8 +344,8 @@ void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
       // HLSL Change Begins
       // Transfer FPMath flag.
       if (FPMathOperator *FPMath = dyn_cast<FPMathOperator>(New)) {
-        FPMathOperator *FPMathOp = dyn_cast<FPMathOperator>(Op);
-        New->copyFastMathFlags(FPMathOp->getFastMathFlags());
+        if (FPMathOperator *FPMathOp = dyn_cast<FPMathOperator>(Op))
+          New->copyFastMathFlags(FPMathOp->getFastMathFlags());
       }
       // HLSL Change Ends
     }
diff --git a/tools/clang/lib/Basic/Targets.cpp b/tools/clang/lib/Basic/Targets.cpp
index daa781d38..60fc48296 100644
--- a/tools/clang/lib/Basic/Targets.cpp
+++ b/tools/clang/lib/Basic/Targets.cpp
@@ -6987,6 +6987,9 @@ public:
     BigEndian = false;
     TLSSupported = false;
     LongWidth = LongAlign = 64;
+    BoolWidth = 32;
+    // To avoid member for alignment.
+    BoolAlign = 8;
 
     // using the Microsoft ABI.
     TheCXXABI.set(TargetCXXABI::Microsoft);
@@ -7031,6 +7034,9 @@ public:
   DXIL_32TargetInfo(const llvm::Triple &Triple) : DXILTargetInfo(Triple) {
     LongDoubleWidth = LongDoubleAlign = 64;
     LongDoubleFormat = &llvm::APFloat::IEEEdouble;
+    BoolWidth = 32;
+    // To avoid member for alignment.
+    BoolAlign = 8;
     // TODO: Update Description for DXIL
     DescriptionString = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32";
   }
diff --git a/tools/clang/lib/CodeGen/CGExpr.cpp b/tools/clang/lib/CodeGen/CGExpr.cpp
index a7bcdb944..a6761e2e1 100644
--- a/tools/clang/lib/CodeGen/CGExpr.cpp
+++ b/tools/clang/lib/CodeGen/CGExpr.cpp
@@ -1251,7 +1251,11 @@ llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) {
   if (hasBooleanRepresentation(Ty)) {
     assert(Value->getType()->isIntegerTy(getContext().getTypeSize(Ty)) &&
            "wrong value rep of bool");
-    return Builder.CreateTrunc(Value, Builder.getInt1Ty(), "tobool");
+    // HLSL Change Begin.
+    // Use ne v, 0 to convert to i1 instead of trunc.
+    return Builder.CreateICmpNE(
+        Value, llvm::ConstantInt::get(Value->getType(), 0), "tobool");
+    // HLSL Change End.
   }
 
   return Value;
diff --git a/tools/clang/lib/CodeGen/CGHLSLMS.cpp b/tools/clang/lib/CodeGen/CGHLSLMS.cpp
index 14e4fc8cc..ea96e32fc 100644
--- a/tools/clang/lib/CodeGen/CGHLSLMS.cpp
+++ b/tools/clang/lib/CodeGen/CGHLSLMS.cpp
@@ -2607,7 +2607,6 @@ static void ReplaceBoolVectorSubscript(CallInst *CI) {
   Value *Ptr = CI->getArgOperand(0);
   Value *Idx = CI->getArgOperand(1);
   Value *IdxList[] = {ConstantInt::get(Idx->getType(), 0), Idx};
-  llvm::Type *i1Ty = llvm::Type::getInt1Ty(Idx->getContext());
 
   for (auto It = CI->user_begin(), E = CI->user_end(); It != E;) {
     Instruction *user = cast<Instruction>(*(It++));
@@ -2624,7 +2623,8 @@ static void ReplaceBoolVectorSubscript(CallInst *CI) {
       // Must be a store inst here.
       StoreInst *SI = cast<StoreInst>(user);
       Value *V = SI->getValueOperand();
-      Value *cast = Builder.CreateTrunc(V, i1Ty);
+      Value *cast =
+          Builder.CreateICmpNE(V, llvm::ConstantInt::get(V->getType(), 0));
       Builder.CreateStore(cast, GEP);
       SI->eraseFromParent();
     }
@@ -3141,9 +3141,9 @@ static void SimplifyArrayToVector(BitCastInst *BCI, std::vector<Instruction *> &
 
 static void SimplifyBoolCast(BitCastInst *BCI, llvm::Type *i1Ty, std::vector<Instruction *> &deadInsts) {
   // Transform
-  //%22 = bitcast i1* %21 to i8*
-  //%23 = load i8, i8* %22, !tbaa !3, !range !7
-  //%tobool5 = trunc i8 %23 to i1
+  //%22 = bitcast i1* %21 to i32*
+  //%23 = load i32, i32* %22, !tbaa !3, !range !7
+  //%tobool5 = icmp ne i32 %23, 0
   // To
   //%tobool5 = load i1, i1* %21, !tbaa !3, !range !7
   Value *i1Ptr = BCI->getOperand(0);
@@ -3152,17 +3152,21 @@ static void SimplifyBoolCast(BitCastInst *BCI, llvm::Type *i1Ty, std::vector<Ins
       if (!LI->hasOneUse()) {
         continue;
       }
-      if (TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin())) {
-        if (TI->getType() == i1Ty) {
-          IRBuilder<> Builder(LI);
-          Value *i1Val = Builder.CreateLoad(i1Ptr);
-          TI->replaceAllUsesWith(i1Val);
-          deadInsts.emplace_back(LI);
-          deadInsts.emplace_back(TI);
+      if (ICmpInst *II = dyn_cast<ICmpInst>(*LI->user_begin())) {
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getOperand(1))) {
+          if (CI->getLimitedValue() == 0 &&
+              II->getPredicate() == CmpInst::ICMP_NE) {
+            IRBuilder<> Builder(LI);
+            Value *i1Val = Builder.CreateLoad(i1Ptr);
+            II->replaceAllUsesWith(i1Val);
+            deadInsts.emplace_back(LI);
+            deadInsts.emplace_back(II);
+          }
         }
       }
     }
   }
+  deadInsts.emplace_back(BCI);
 }
 
 typedef float(__cdecl *FloatUnaryEvalFuncType)(float);
@@ -5116,8 +5120,8 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
     BasicBlock *EntryBlock = &F->getEntryBlock();
 
     if (ParamTy->isBooleanType()) {
-      // Create i8 for bool.
-      ParamTy = CGM.getContext().CharTy;
+      // Create i32 for bool.
+      ParamTy = CGM.getContext().IntTy;
     }
     // Make sure the alloca is in entry block to stop inline create stacksave.
     IRBuilder<> Builder(EntryBlock->getFirstInsertionPt());
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index a7316b00f..741f2b1e8 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -3073,15 +3073,15 @@ public:
       const HLSL_INTRINSIC *pPrior = nullptr;
       UINT64 lookupCookie = 0;
       CA2W wideTypeName(typeName);
-      table->LookupIntrinsic(wideTypeName, L"*", &pIntrinsic, &lookupCookie);
-      while (pIntrinsic != nullptr) {
+      HRESULT found = table->LookupIntrinsic(wideTypeName, L"*", &pIntrinsic, &lookupCookie);
+      while (pIntrinsic != nullptr && SUCCEEDED(found)) {
         if (!AreIntrinsicTemplatesEquivalent(pIntrinsic, pPrior)) {
           AddObjectIntrinsicTemplate(recordDecl, startDepth, pIntrinsic);
           // NOTE: this only works with the current implementation because
           // intrinsics are alive as long as the table is alive.
           pPrior = pIntrinsic;
         }
-        table->LookupIntrinsic(wideTypeName, L"*", &pIntrinsic, &lookupCookie);
+        found = table->LookupIntrinsic(wideTypeName, L"*", &pIntrinsic, &lookupCookie);
       }
     }
   }
@@ -3868,6 +3868,7 @@ public:
 
   FunctionDecl* AddHLSLIntrinsicMethod(
     LPCSTR tableName,
+    LPCSTR lowering,
     _In_ const HLSL_INTRINSIC* intrinsic,
     _In_ FunctionTemplateDecl *FunctionTemplate,
     ArrayRef<Expr *> Args,
@@ -3956,7 +3957,7 @@ public:
       SC_Extern, InlineSpecifiedFalse, IsConstexprFalse, NoLoc);
 
     // Add intrinsic attr
-    AddHLSLIntrinsicAttr(method, *m_context, tableName, "", intrinsic);
+    AddHLSLIntrinsicAttr(method, *m_context, tableName, lowering, intrinsic);
 
     // Record this function template specialization.
     TemplateArgumentList *argListCopy = TemplateArgumentList::CreateCopy(
@@ -7791,7 +7792,7 @@ Sema::TemplateDeductionResult HLSLExternalSource::DeduceTemplateArgumentsForHLSL
       continue;
     }
 
-    Specialization = AddHLSLIntrinsicMethod(cursor.GetTableName(), *cursor, FunctionTemplate, Args, argTypes, argCount);
+    Specialization = AddHLSLIntrinsicMethod(cursor.GetTableName(), cursor.GetLoweringStrategy(), *cursor, FunctionTemplate, Args, argTypes, argCount);
     DXASSERT_NOMSG(Specialization->getPrimaryTemplate()->getCanonicalDecl() ==
       FunctionTemplate->getCanonicalDecl());
 
diff --git a/tools/clang/test/CodeGenHLSL/BasicHLSL11_VS.hlsl b/tools/clang/test/CodeGenHLSL/BasicHLSL11_VS.hlsl
index 69940c48e..2c4449747 100644
--- a/tools/clang/test/CodeGenHLSL/BasicHLSL11_VS.hlsl
+++ b/tools/clang/test/CodeGenHLSL/BasicHLSL11_VS.hlsl
@@ -20,7 +20,7 @@
 // CHECK: xy
 
 // CHECK: OutputPositionPresent=1
-// CHECK: dx.op.createHandle(i32 58, i8 2, i32 0, i32 5, i1 false)
+// CHECK: dx.op.createHandle(i32 59, i8 2, i32 0, i32 5, i1 false)
 
 //--------------------------------------------------------------------------------------
 // File: BasicHLSL11_VS.hlsl
diff --git a/tools/clang/test/HLSL/dxil_validation/GetDimCalcLOD.hlsl b/tools/clang/test/CodeGenHLSL/GetDimCalcLOD.hlsl
similarity index 100%
rename from tools/clang/test/HLSL/dxil_validation/GetDimCalcLOD.hlsl
rename to tools/clang/test/CodeGenHLSL/GetDimCalcLOD.hlsl
diff --git a/tools/clang/test/HLSL/dxil_validation/InnerCoverage.hlsl b/tools/clang/test/CodeGenHLSL/InnerCoverage.hlsl
similarity index 100%
rename from tools/clang/test/HLSL/dxil_validation/InnerCoverage.hlsl
rename to tools/clang/test/CodeGenHLSL/InnerCoverage.hlsl
diff --git a/tools/clang/test/CodeGenHLSL/InnerCoverage2.hlsl b/tools/clang/test/CodeGenHLSL/InnerCoverage2.hlsl
new file mode 100644
index 000000000..84a0b11c1
--- /dev/null
+++ b/tools/clang/test/CodeGenHLSL/InnerCoverage2.hlsl
@@ -0,0 +1,17 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// note: define GENLL in order to generate the basis for InnerCoverage.ll
+
+// CHECK: error: Parameter with semantic SV_InnerCoverage has overlapping semantic index at 0
+// CHECK: error: Pixel shader inputs SV_Coverage and SV_InnerCoverage are mutually exclusive
+
+void main(snorm float b : B, uint c:C,
+	in uint inner : InnerCoverage,
+	inout uint cover: SV_Coverage)
+{
+#ifndef GENLL
+  cover = cover & c;
+#else
+  cover = cover & inner;
+#endif
+}
diff --git a/tools/clang/test/HLSL/dxil_validation/IntegerDepth.hlsl b/tools/clang/test/CodeGenHLSL/IntegerDepth.hlsl
similarity index 100%
rename from tools/clang/test/HLSL/dxil_validation/IntegerDepth.hlsl
rename to tools/clang/test/CodeGenHLSL/IntegerDepth.hlsl
diff --git a/tools/clang/test/CodeGenHLSL/IntegerDepth2.hlsl b/tools/clang/test/CodeGenHLSL/IntegerDepth2.hlsl
new file mode 100644
index 000000000..e72742a90
--- /dev/null
+++ b/tools/clang/test/CodeGenHLSL/IntegerDepth2.hlsl
@@ -0,0 +1,8 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// CHECK: @main
+
+float main(snorm float b : B, float c:C) : SV_DEPTH
+{
+  return b;
+}
\ No newline at end of file
diff --git a/tools/clang/test/HLSL/dxil_validation/SamplerKind.hlsl b/tools/clang/test/CodeGenHLSL/SamplerKind.hlsl
similarity index 100%
rename from tools/clang/test/HLSL/dxil_validation/SamplerKind.hlsl
rename to tools/clang/test/CodeGenHLSL/SamplerKind.hlsl
diff --git a/tools/clang/test/CodeGenHLSL/Samples/d12_multithreading_vs.hlsl b/tools/clang/test/CodeGenHLSL/Samples/d12_multithreading_vs.hlsl
index ecdb56ac8..afb0938c6 100644
--- a/tools/clang/test/CodeGenHLSL/Samples/d12_multithreading_vs.hlsl
+++ b/tools/clang/test/CodeGenHLSL/Samples/d12_multithreading_vs.hlsl
@@ -3,7 +3,7 @@
 // The constant buffer should be allocated with ID zero and referenced as such.
 
 // CHECK: cb0
-// CHECK: dx.op.createHandle(i32 58, i8 2, i32 0, i32 0
+// CHECK: dx.op.createHandle(i32 59, i8 2, i32 0, i32 0
 
 //*********************************************************
 //
diff --git a/tools/clang/test/CodeGenHLSL/SimpleGS5.hlsl b/tools/clang/test/CodeGenHLSL/SimpleGS5.hlsl
new file mode 100644
index 000000000..a97272172
--- /dev/null
+++ b/tools/clang/test/CodeGenHLSL/SimpleGS5.hlsl
@@ -0,0 +1,29 @@
+// RUN: %dxc -E main -T gs_6_0 %s | FileCheck %s
+
+// CHECK: InputPrimitive=patch2
+// CHECK: emitStream
+// CHECK: cutStream
+// CHECK: i32 24}
+
+struct GSOut {
+  float2 uv : TEXCOORD0;
+  float4 clr : COLOR;
+  float4 pos : SV_Position;
+  float3 norm[2] : NORMAL;
+};
+
+cbuffer b : register(b0) {
+  float2 invViewportSize;
+};
+
+// geometry shader that outputs 3 vertices from a point
+[maxvertexcount(3)]
+[instance(24)]
+void main(InputPatch<GSOut, 2>points, inout PointStream<GSOut> stream) {
+
+  points[0].norm[0] = 1;
+  points[0].norm[1] = 2;
+  stream.Append(points[0]);
+
+  stream.RestartStrip();
+}
\ No newline at end of file
diff --git a/tools/clang/test/HLSL/dxil_validation/UndefValue.hlsl b/tools/clang/test/CodeGenHLSL/UndefValue.hlsl
similarity index 100%
rename from tools/clang/test/HLSL/dxil_validation/UndefValue.hlsl
rename to tools/clang/test/CodeGenHLSL/UndefValue.hlsl
diff --git a/tools/clang/test/CodeGenHLSL/UndefValue2.hlsl b/tools/clang/test/CodeGenHLSL/UndefValue2.hlsl
new file mode 100644
index 000000000..f2fabfe66
--- /dev/null
+++ b/tools/clang/test/CodeGenHLSL/UndefValue2.hlsl
@@ -0,0 +1,9 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// CHECK: @main
+
+float a;
+float main(snorm float b : B) : SV_DEPTH
+{
+  return b + a;
+}
\ No newline at end of file
diff --git a/tools/clang/test/HLSL/dxil_validation/barrier.hlsl b/tools/clang/test/CodeGenHLSL/barrier.hlsl
similarity index 98%
rename from tools/clang/test/HLSL/dxil_validation/barrier.hlsl
rename to tools/clang/test/CodeGenHLSL/barrier.hlsl
index e50651a64..54d2fc89d 100644
--- a/tools/clang/test/HLSL/dxil_validation/barrier.hlsl
+++ b/tools/clang/test/CodeGenHLSL/barrier.hlsl
@@ -73,7 +73,7 @@ void main( uint2 tid : SV_DispatchThreadID, uint2 gid : SV_GroupID, uint2 gtid :
     GroupMemoryBarrierWithGroupSync();
     float2x2 f2x2 = dataC[8*8-1-tid.y%(8*8)];
   AllMemoryBarrier();
-       fA[gidx+2] = f2x2; 
+       fA[gidx+2] = f2x2;
   AllMemoryBarrierWithGroupSync();
       fA[gidx+1] = f2x2;
   DeviceMemoryBarrier();
diff --git a/tools/clang/test/CodeGenHLSL/bindings1.hlsl b/tools/clang/test/CodeGenHLSL/bindings1.hlsl
index 9df4dfbc2..0180d965f 100644
--- a/tools/clang/test/CodeGenHLSL/bindings1.hlsl
+++ b/tools/clang/test/CodeGenHLSL/bindings1.hlsl
@@ -95,35 +95,35 @@
 // CHECK: %struct.Resources = type { %class.Texture2D, %class.Texture2D.0, %class.Texture2D, %class.Texture2D.0, %class.RWTexture2D, %class.RWTexture2D, %class.RWTexture2D, %class.RWTexture2D, %struct.SamplerComparisonState, %struct.SamplerState, %struct.SamplerComparisonState, %struct.SamplerState, <4 x float> }
 
 //                                                CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-// CHECK: %RWTex2_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 7, i1 false)
-// CHECK: %Tex1_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 0, i32 0, i1 false)
-// CHECK: %Samp2_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 0, i32 0, i1 false)
-// CHECK: %tbuf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 8, i32 4, i1 false)
-// CHECK: %tbuf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 7, i32 2, i1 false)
-// CHECK: %tbuf3_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 6, i32 6, i1 false)
-// CHECK: %tbuf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 5, i32 35, i1 false)
-// CHECK: %buf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 4, i32 55, i1 false)
-// CHECK: %buf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 3, i32 104, i1 false)
-// CHECK: %buf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 2, i32 1, i1 false)
-// CHECK: %MyCB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 11, i1 false)
-// CHECK: %MyTB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 4, i32 11, i1 false)
-// CHECK: %Tex2_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 1, i32 30, i1 false)
-// CHECK: %Tex3_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 2, i32 94, i1 false)
-// CHECK: %Tex4_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 3, i32 10, i1 false)
-// CHECK: %RWTex1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 3, i32 2, i1 false)
-// CHECK: %RWTex3_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 1, i32 14, i1 false)
-// CHECK: %RWTex4_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 2, i32 22, i1 false)
-// CHECK: %Samp1_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 3, i32 3, i1 false)
-// CHECK: %Samp3_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 1, i32 29, i1 false)
-// CHECK: %Samp4_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 2, i32 23, i1 false)
+// CHECK: %RWTex2_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 0, i32 7, i1 false)
+// CHECK: %Tex1_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 0, i32 0, i1 false)
+// CHECK: %Samp2_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 0, i32 0, i1 false)
+// CHECK: %tbuf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 8, i32 4, i1 false)
+// CHECK: %tbuf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 7, i32 2, i1 false)
+// CHECK: %tbuf3_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 6, i32 6, i1 false)
+// CHECK: %tbuf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 5, i32 35, i1 false)
+// CHECK: %buf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 4, i32 55, i1 false)
+// CHECK: %buf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 3, i32 104, i1 false)
+// CHECK: %buf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 2, i32 1, i1 false)
+// CHECK: %MyCB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 0, i32 11, i1 false)
+// CHECK: %MyTB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 4, i32 11, i1 false)
+// CHECK: %Tex2_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 1, i32 30, i1 false)
+// CHECK: %Tex3_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 2, i32 94, i1 false)
+// CHECK: %Tex4_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 3, i32 10, i1 false)
+// CHECK: %RWTex1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 3, i32 2, i1 false)
+// CHECK: %RWTex3_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 1, i32 14, i1 false)
+// CHECK: %RWTex4_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 2, i32 22, i1 false)
+// CHECK: %Samp1_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 3, i32 3, i1 false)
+// CHECK: %Samp3_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 1, i32 29, i1 false)
+// CHECK: %Samp4_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 2, i32 23, i1 false)
 
 // check packoffset:
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %MyCB_buffer, i32 4)
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %MyCB_buffer, i32 7)
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %MyCB_buffer, i32 21)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %MyCB_buffer, i32 4)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %MyCB_buffer, i32 7)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %MyCB_buffer, i32 21)
 
 // check element index:
-// CHECK: @dx.op.bufferLoad.i32(i32 69, %dx.types.Handle %tbuf1_buffer, i32 1, i32 undef)
+// CHECK: @dx.op.bufferLoad.i32(i32 70, %dx.types.Handle %tbuf1_buffer, i32 1, i32 undef)
 
 
 
diff --git a/tools/clang/test/CodeGenHLSL/firstbitHi.hlsl b/tools/clang/test/CodeGenHLSL/firstbitHi.hlsl
index 76287059a..ae222be72 100644
--- a/tools/clang/test/CodeGenHLSL/firstbitHi.hlsl
+++ b/tools/clang/test/CodeGenHLSL/firstbitHi.hlsl
@@ -12,10 +12,10 @@
 // CHECK: select
 // CHECK: i32 -1
 
-// CHECK: op.bufferStore.i32(i32 70, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 2, i32 undef, i32 26
-// CHECK: op.bufferStore.i32(i32 70, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 3, i32 undef, i32 23
+// CHECK: op.bufferStore.i32(i32 71, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 2, i32 undef, i32 26
+// CHECK: op.bufferStore.i32(i32 71, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 3, i32 undef, i32 23
 
-// CHECK: dx.op.unaryBits.i64(i32 32, i64
+// CHECK: dx.op.unaryBits.i64(i32 33, i64
 // CHECK: sub i32 63
 // CHECK: icmp ne i32
 // CHECK: select
diff --git a/tools/clang/test/CodeGenHLSL/gatherOffset.hlsl b/tools/clang/test/CodeGenHLSL/gatherOffset.hlsl
index 44da44e0f..bd92e0d5d 100644
--- a/tools/clang/test/CodeGenHLSL/gatherOffset.hlsl
+++ b/tools/clang/test/CodeGenHLSL/gatherOffset.hlsl
@@ -1,17 +1,17 @@
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
 
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
 
 
 SamplerState samp1;
diff --git a/tools/clang/test/HLSL/dxil_validation/hsAttribute.hlsl b/tools/clang/test/CodeGenHLSL/hsAttribute.hlsl
similarity index 100%
rename from tools/clang/test/HLSL/dxil_validation/hsAttribute.hlsl
rename to tools/clang/test/CodeGenHLSL/hsAttribute.hlsl
diff --git a/tools/clang/test/HLSL/dxil_validation/interpChange.hlsl b/tools/clang/test/CodeGenHLSL/interpChange.hlsl
similarity index 100%
rename from tools/clang/test/HLSL/dxil_validation/interpChange.hlsl
rename to tools/clang/test/CodeGenHLSL/interpChange.hlsl
diff --git a/tools/clang/test/HLSL/dxil_validation/interpOnInt.hlsl b/tools/clang/test/CodeGenHLSL/interpOnInt.hlsl
similarity index 100%
rename from tools/clang/test/HLSL/dxil_validation/interpOnInt.hlsl
rename to tools/clang/test/CodeGenHLSL/interpOnInt.hlsl
diff --git a/tools/clang/test/CodeGenHLSL/interpOnInt2.hlsl b/tools/clang/test/CodeGenHLSL/interpOnInt2.hlsl
new file mode 100644
index 000000000..494839a02
--- /dev/null
+++ b/tools/clang/test/CodeGenHLSL/interpOnInt2.hlsl
@@ -0,0 +1,9 @@
+﻿// RUN: %dxc -E main -T ps_6_0 -fcgl %s | FileCheck %s
+
+// CHECK: main
+// After lowering, these would turn into multiple abs calls rather than a 4 x float
+// CHECK: call <4 x float> @"dx.hl.op..<4 x float> (i32, <4 x float>)"(i32 62,
+
+float4 main(float4 a : A, uint4 b : A1) : SV_TARGET {
+  return abs(a*b.yxxx);
+}
diff --git a/tools/clang/test/CodeGenHLSL/legacy_struct.hlsl b/tools/clang/test/CodeGenHLSL/legacy_struct.hlsl
index 635cf85be..61c6cac7b 100644
--- a/tools/clang/test/CodeGenHLSL/legacy_struct.hlsl
+++ b/tools/clang/test/CodeGenHLSL/legacy_struct.hlsl
@@ -1,7 +1,7 @@
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
 
-// CHECK: %dx.alignment.legacy.struct.S = type { i32, i32, i32, <2 x i32>, i32, i32, i32 }
 // CHECK: %"dx.alignment.legacy.$Globals" = type { float, %dx.alignment.legacy.struct.S, [1 x <4 x i32>] }
+// CHECK: %dx.alignment.legacy.struct.S = type { i32, i32, i32, <2 x i32>, i32, i32, i32 }
 
 RasterizerOrderedBuffer<float4> r;
 
diff --git a/tools/clang/test/CodeGenHLSL/multiStreamGS.hlsl b/tools/clang/test/CodeGenHLSL/multiStreamGS.hlsl
index 4c7a83c92..3e2b07980 100644
--- a/tools/clang/test/CodeGenHLSL/multiStreamGS.hlsl
+++ b/tools/clang/test/CodeGenHLSL/multiStreamGS.hlsl
@@ -17,14 +17,14 @@
 
 // CHECK: OutputStreamMask=7
 
-// CHECK: emitStream(i32 97, i8 0)
-// CHECK: cutStream(i32 98, i8 0)
-// CHECK: emitStream(i32 97, i8 1)
-// CHECK: cutStream(i32 98, i8 1)
-// CHECK: emitStream(i32 97, i8 1)
-// CHECK: cutStream(i32 98, i8 1)
-// CHECK: emitStream(i32 97, i8 2)
-// CHECK: cutStream(i32 98, i8 2)
+// CHECK: emitStream(i32 99, i8 0)
+// CHECK: cutStream(i32 100, i8 0)
+// CHECK: emitStream(i32 99, i8 1)
+// CHECK: cutStream(i32 100, i8 1)
+// CHECK: emitStream(i32 99, i8 1)
+// CHECK: cutStream(i32 100, i8 1)
+// CHECK: emitStream(i32 99, i8 2)
+// CHECK: cutStream(i32 100, i8 2)
 
 struct MyStruct
 {
diff --git a/tools/clang/test/HLSL/dxil_validation/phiTGSM.hlsl b/tools/clang/test/CodeGenHLSL/phiTGSM.hlsl
similarity index 100%
rename from tools/clang/test/HLSL/dxil_validation/phiTGSM.hlsl
rename to tools/clang/test/CodeGenHLSL/phiTGSM.hlsl
diff --git a/tools/clang/test/CodeGenHLSL/reducible.hlsl b/tools/clang/test/CodeGenHLSL/reducible.hlsl
new file mode 100644
index 000000000..4a5813be6
--- /dev/null
+++ b/tools/clang/test/CodeGenHLSL/reducible.hlsl
@@ -0,0 +1,22 @@
+// RUN: %dxc -E main -T ps_6_0 %s  | FileCheck %s
+
+// CHECK: !"llvm.loop.unroll.disable"
+uint u;
+float main(float2 a : A, int3 b : B) : SV_Target
+{
+  float s = 0;
+  /*
+  [loop]
+  for(int i = 0; i < b.x; i++) {
+    s += a.x;
+    if (s == 5)
+      break;
+  }
+  */
+  if (s > a)
+    s -= u+b.x;
+  else
+    s += b.x+b.y;
+
+  return s;
+}
diff --git a/tools/clang/test/CodeGenHLSL/rovs.hlsl b/tools/clang/test/CodeGenHLSL/rovs.hlsl
index ece5fdd7f..850a2d970 100644
--- a/tools/clang/test/CodeGenHLSL/rovs.hlsl
+++ b/tools/clang/test/CodeGenHLSL/rovs.hlsl
@@ -34,21 +34,21 @@ float4 main() : SV_TARGET {
 // CHECK: rob_UAV_buf_ROV
 
   float4 result = 0;
-// CHECK: dx.op.bufferLoad.f32(i32 69,
+// CHECK: dx.op.bufferLoad.f32(i32 70,
   result += rob[0];
-// CHECK: dx.op.bufferLoad.i32(i32 69
+// CHECK: dx.op.bufferLoad.i32(i32 70
   result += rba.Load(0);
-// CHECK: dx.op.bufferLoad.f32(i32 69,
+// CHECK: dx.op.bufferLoad.f32(i32 70,
   result += rsb[0].f4;
-// CHECK: dx.op.textureLoad.f32(i32 67,
+// CHECK: dx.op.textureLoad.f32(i32 68,
   result += rt1[0];
-// CHECK: dx.op.textureLoad.f32(i32 67,
+// CHECK: dx.op.textureLoad.f32(i32 68,
   result += rt1a[uint2(0, 0)];
-// CHECK: dx.op.textureLoad.f32(i32 67,
+// CHECK: dx.op.textureLoad.f32(i32 68,
   result += rt2[uint2(0, 1)];
-// CHECK: dx.op.textureLoad.f32(i32 67,
+// CHECK: dx.op.textureLoad.f32(i32 68,
   result += rt2a[uint3(0, 0, 0)];
-// CHECK: dx.op.textureLoad.f32(i32 67,
+// CHECK: dx.op.textureLoad.f32(i32 68,
   result += rt3[uint3(1, 2, 3)];
 
   result += rt4[uint3(1, 2, 3)];
diff --git a/tools/clang/test/HLSL/dxil_validation/semaOverlap.hlsl b/tools/clang/test/CodeGenHLSL/semaOverlap.hlsl
similarity index 100%
rename from tools/clang/test/HLSL/dxil_validation/semaOverlap.hlsl
rename to tools/clang/test/CodeGenHLSL/semaOverlap.hlsl
diff --git a/tools/clang/test/CodeGenHLSL/semaOverlap1.hlsl b/tools/clang/test/CodeGenHLSL/semaOverlap1.hlsl
new file mode 100644
index 000000000..4319fe362
--- /dev/null
+++ b/tools/clang/test/CodeGenHLSL/semaOverlap1.hlsl
@@ -0,0 +1,9 @@
+﻿// RUN: %dxc -E main -T ps_6_0 -fcgl %s | FileCheck %s
+
+// CHECK: main
+// After lowering, these would turn into multiple abs calls rather than a 4 x float
+// CHECK: call <4 x float> @"dx.hl.op..<4 x float> (i32, <4 x float>)"(i32 62,
+
+float4 main(float4 a : A, float4 b : A1) : SV_TARGET {
+  return abs(a*b.yxxx);
+}
diff --git a/tools/clang/test/HLSL/dxil_validation/uavBarrier.hlsl b/tools/clang/test/CodeGenHLSL/uavBarrier.hlsl
similarity index 100%
rename from tools/clang/test/HLSL/dxil_validation/uavBarrier.hlsl
rename to tools/clang/test/CodeGenHLSL/uavBarrier.hlsl
diff --git a/tools/clang/test/CodeGenHLSL/updateCounter2.hlsl b/tools/clang/test/CodeGenHLSL/updateCounter2.hlsl
new file mode 100644
index 000000000..96ce29c84
--- /dev/null
+++ b/tools/clang/test/CodeGenHLSL/updateCounter2.hlsl
@@ -0,0 +1,26 @@
+// RUN: %dxc -E main -T ps_6_0 %s  | FileCheck %s
+
+// CHECK: RWStructuredBuffers may increment or decrement their counters, but not both.
+
+struct Foo
+{
+  float2 a;
+  float3 b;
+  int2 c[4];
+};
+
+Buffer<float4> buf1;
+RWStructuredBuffer<Foo> buf2;
+
+float4 main(float idx1 : Idx1, float idx2 : Idx2) : SV_Target
+{
+  uint status;
+  float4 r = 0;
+  int id = buf2.IncrementCounter();
+  buf2[id].a = float2(idx1, idx2);
+
+  id = buf2.IncrementCounter();
+  r.xy += buf1[id].a;  
+  
+  return r;
+}
diff --git a/tools/clang/test/HLSL/abs2_m.ll b/tools/clang/test/HLSL/abs2_m.ll
index e7d40c2d0..99a2355d9 100644
--- a/tools/clang/test/HLSL/abs2_m.ll
+++ b/tools/clang/test/HLSL/abs2_m.ll
@@ -1,6 +1,8 @@
 ; RUN: %dxv %s | FileCheck %s
 
 ; CHECK: DXIL intrinsic overload must be valid
+; Change dx.op.loadInput.i32(i32 4 to dx.op.loadInput.i32(i32 3
+
 ;
 ; Input signature:
 ;
@@ -82,7 +84,7 @@ attributes #1 = { nounwind readnone }
 !dx.entryPoints = !{!12}
 
 !0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 4}
+!1 = !{i32 1, i32 0}
 !2 = !{!"ps", i32 6, i32 0}
 !3 = !{i32 1, void (<4 x i32>, <4 x i32>*)* @"\01?main@@YA?AV?$vector@H$03@@V1@@Z.flat", !4}
 !4 = !{!5, !7, !10}
diff --git a/tools/clang/test/HLSL/dxil_validation/Eval.ll b/tools/clang/test/HLSL/dxil_validation/Eval.ll
deleted file mode 100644
index a9fa00659..000000000
--- a/tools/clang/test/HLSL/dxil_validation/Eval.ll
+++ /dev/null
@@ -1,94 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Interpolation mode on A used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample
-; CHECK: Interpolation mode on A used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample
-; CHECK: Interpolation mode on A used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample
-
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-; Function Attrs: nounwind
-define void @main.flat(<4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>* nocapture readnone) #0 {
-entry:
-  %RenderTargetGetSampleCount = tail call i32 @dx.op.renderTargetGetSampleCount(i32 80)
-  %sub = add i32 %RenderTargetGetSampleCount, -1
-  %5 = tail call float @dx.op.evalCentroid.f32(i32 92, i32 0, i32 0, i8 0)
-  %6 = tail call float @dx.op.evalCentroid.f32(i32 92, i32 0, i32 0, i8 1)
-  %7 = tail call float @dx.op.evalCentroid.f32(i32 92, i32 0, i32 0, i8 2)
-  %8 = tail call float @dx.op.evalCentroid.f32(i32 92, i32 0, i32 0, i8 3)
-  %9 = tail call float @dx.op.evalSampleIndex.f32(i32 91, i32 0, i32 0, i8 0, i32 %sub)
-  %10 = tail call float @dx.op.evalSampleIndex.f32(i32 91, i32 0, i32 0, i8 1, i32 %sub)
-  %11 = tail call float @dx.op.evalSampleIndex.f32(i32 91, i32 0, i32 0, i8 2, i32 %sub)
-  %12 = tail call float @dx.op.evalSampleIndex.f32(i32 91, i32 0, i32 0, i8 3, i32 %sub)
-  %add.i0 = fadd fast float %9, %5
-  %add.i1 = fadd fast float %10, %6
-  %add.i2 = fadd fast float %11, %7
-  %add.i3 = fadd fast float %12, %8
-  %13 = tail call float @dx.op.evalSnapped.f32(i32 90, i32 0, i32 0, i8 0, i32 1, i32 2)
-  %14 = tail call float @dx.op.evalSnapped.f32(i32 90, i32 0, i32 0, i8 1, i32 1, i32 2)
-  %15 = tail call float @dx.op.evalSnapped.f32(i32 90, i32 0, i32 0, i8 2, i32 1, i32 2)
-  %16 = tail call float @dx.op.evalSnapped.f32(i32 90, i32 0, i32 0, i8 3, i32 1, i32 2)
-  %add5.i0 = fadd fast float %add.i0, %13
-  %add5.i1 = fadd fast float %add.i1, %14
-  %add5.i2 = fadd fast float %add.i2, %15
-  %add5.i3 = fadd fast float %add.i3, %16
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %add5.i0)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %add5.i1)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %add5.i2)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %add5.i3)
-  ret void
-}
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.evalSampleIndex.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind readonly
-declare i32 @dx.op.renderTargetGetSampleCount(i32) #2
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.evalCentroid.f32(i32, i32, i32, i8) #1
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.evalSnapped.f32(i32, i32, i32, i8, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!18}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{i32 1, void (<4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>*)* @main.flat, !4}
-!4 = !{!5, !7, !10, !12, !14, !16}
-!5 = !{i32 0, !6, !6}
-!6 = !{}
-!7 = !{i32 0, !8, !9}
-!8 = !{i32 4, !"A", i32 7, i32 9}
-!9 = !{i32 0}
-!10 = !{i32 0, !11, !9}
-!11 = !{i32 4, !"B", i32 5, i32 4, i32 7, i32 9}
-!12 = !{i32 0, !13, !9}
-!13 = !{i32 4, !"C", i32 5, i32 3, i32 7, i32 9}
-!14 = !{i32 0, !15, !9}
-!15 = !{i32 4, !"D", i32 5, i32 6, i32 7, i32 9}
-!16 = !{i32 1, !17, !9}
-!17 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!18 = !{void (<4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>*)* @main.flat, !"", !19, null, null}
-!19 = !{!20, !25, null}
-!20 = !{!21, !22, !23, !24}
-!21 = !{i32 0, !"A", i8 9, i8 0, !9, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-!22 = !{i32 1, !"B", i8 9, i8 0, !9, i8 4, i32 1, i8 4, i32 1, i8 0, null}
-!23 = !{i32 2, !"C", i8 9, i8 0, !9, i8 3, i32 1, i8 4, i32 2, i8 0, null}
-!24 = !{i32 3, !"D", i8 9, i8 0, !9, i8 6, i32 1, i8 4, i32 3, i8 0, null}
-!25 = !{!26}
-!26 = !{i32 0, !"SV_Target", i8 9, i8 16, !9, i8 0, i32 1, i8 4, i32 0, i8 0, null}
diff --git a/tools/clang/test/HLSL/dxil_validation/GetDimCalcLOD.ll b/tools/clang/test/HLSL/dxil_validation/GetDimCalcLOD.ll
deleted file mode 100644
index 257e1c17b..000000000
--- a/tools/clang/test/HLSL/dxil_validation/GetDimCalcLOD.ll
+++ /dev/null
@@ -1,136 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: GetDimensions used undef dimension z on TextureCube
-; CHECK: coord uninitialized
-
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; UV                       0   xy          0     NONE   float
-;
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; SV_Target                0   xyzw        0   TARGET   float   xyzw
-;
-;
-; Pipeline Runtime Information:
-;
-; Pixel Shader
-; DepthOutput=0
-; SampleFrequency=0
-;
-;
-; Input signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; UV                       0                 linear
-;
-; Output signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; SV_Target                0
-;
-; Buffer Definitions:
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-; g_sam                             sampler      NA          NA      S0             s0     1
-; cube                              texture     f32        cube      T0             t0     1
-;
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%class.TextureCube = type { <4 x float> }
-%struct.SamplerState = type { i32 }
-%dx.types.Handle = type { i8* }
-%dx.types.Dimensions = type { i32, i32, i32, i32 }
-
-@"\01?cube@@3V?$TextureCube@V?$vector@M$03@@@@A" = available_externally global %class.TextureCube zeroinitializer, align 4
-@"\01?g_sam@@3USamplerState@@A" = available_externally global %struct.SamplerState zeroinitializer, align 4
-@dx.typevar.0 = external addrspace(1) constant %class.TextureCube
-@llvm.used = appending global [5 x i8*] [i8* bitcast (%class.TextureCube* @"\01?cube@@3V?$TextureCube@V?$vector@M$03@@@@A" to i8*), i8* bitcast (%struct.SamplerState* @"\01?g_sam@@3USamplerState@@A" to i8*), i8* bitcast (%class.TextureCube* @"\01?cube@@3V?$TextureCube@V?$vector@M$03@@@@A" to i8*), i8* bitcast (%struct.SamplerState* @"\01?g_sam@@3USamplerState@@A" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.TextureCube addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat(<2 x float>, <4 x float>* nocapture readnone) #0 {
-entry:
-  %cube_texture_cube = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %g_sam_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %2 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %3 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %4 = call %dx.types.Dimensions @dx.op.getDimensions(i32 73, %dx.types.Handle %cube_texture_cube, i32 0)  ; GetDimensions(handle,mipLevel)
-  %5 = extractvalue %dx.types.Dimensions %4, 0
-  %6 = extractvalue %dx.types.Dimensions %4, 2
-  %7 = call float @dx.op.calculateLOD.f32(i32 84, %dx.types.Handle %cube_texture_cube, %dx.types.Handle %g_sam_sampler, float %2, float %3, float undef, i1 true)  ; CalculateLOD(handle,sampler,coord0,coord1,coord2,clamped)
-  %conv = uitofp i32 %5 to float
-  %conv1 = uitofp i32 %6 to float
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %conv)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %conv1)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %7)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 1.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.Dimensions @dx.op.getDimensions(i32, %dx.types.Handle, i32) #2
-
-; Function Attrs: nounwind readonly
-declare float @dx.op.calculateLOD.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, i1) #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!9, !12}
-!dx.entryPoints = !{!21}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{!4, null, null, !7}
-!4 = !{!5}
-!5 = !{i32 0, %class.TextureCube* @"\01?cube@@3V?$TextureCube@V?$vector@M$03@@@@A", !"cube", i32 0, i32 0, i32 1, i32 5, i32 0, !6}
-!6 = !{i32 0, i32 9}
-!7 = !{!8}
-!8 = !{i32 0, %struct.SamplerState* @"\01?g_sam@@3USamplerState@@A", !"g_sam", i32 0, i32 0, i32 1, i32 0, null}
-!9 = !{i32 0, %class.TextureCube addrspace(1)* @dx.typevar.0, !10}
-!10 = !{i32 16, !11}
-!11 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!12 = !{i32 1, void (<2 x float>, <4 x float>*)* @main.flat, !13}
-!13 = !{!14, !16, !19}
-!14 = !{i32 0, !15, !15}
-!15 = !{}
-!16 = !{i32 0, !17, !18}
-!17 = !{i32 4, !"UV", i32 7, i32 9}
-!18 = !{i32 0}
-!19 = !{i32 1, !20, !18}
-!20 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!21 = !{void (<2 x float>, <4 x float>*)* @main.flat, !"", !22, !3, null}
-!22 = !{!23, !25, null}
-!23 = !{!24}
-!24 = !{i32 0, !"UV", i8 9, i8 0, !18, i8 2, i32 1, i8 2, i32 0, i8 0, null}
-!25 = !{!26}
-!26 = !{i32 0, !"SV_Target", i8 9, i8 16, !18, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-
diff --git a/tools/clang/test/HLSL/dxil_validation/InnerCoverage.ll b/tools/clang/test/HLSL/dxil_validation/InnerCoverage.ll
deleted file mode 100644
index 509ab05d7..000000000
--- a/tools/clang/test/HLSL/dxil_validation/InnerCoverage.ll
+++ /dev/null
@@ -1,62 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: InnerCoverage and Coverage are mutually exclusive.
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-; Function Attrs: alwaysinline nounwind
-define void @main(float %b, i32 %c, i32* nocapture readnone dereferenceable(4) %cover) #0 {
-entry:
-  %0 = call i32 @dx.op.coverage.i32(i32 147)  ; Coverage()
-  %1 = call i32 @dx.op.innercoverage.i32(i32 148)  ; InnerCoverage()
-  %and = and i32 %1, %0
-  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 %and)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.coverage.i32(i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.innercoverage.i32(i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #2
-
-attributes #0 = { alwaysinline nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind }
-
-!llvm.ident = !{!0}
-!dx.valver = !{!1}
-!dx.version = !{!2}
-!dx.shaderModel = !{!3}
-!dx.typeAnnotations = !{!4}
-!dx.entryPoints = !{!15}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 1, i32 0}
-!2 = !{i32 0, i32 7}
-!3 = !{!"ps", i32 6, i32 0}
-!4 = !{i32 1, void (float, i32, i32*)* @main, !5}
-!5 = !{!6, !8, !11, !13}
-!6 = !{i32 1, !7, !7}
-!7 = !{}
-!8 = !{i32 0, !9, !10}
-!9 = !{i32 4, !"B", i32 7, i32 13}
-!10 = !{i32 0}
-!11 = !{i32 0, !12, !10}
-!12 = !{i32 4, !"C", i32 7, i32 5}
-!13 = !{i32 2, !14, !10}
-!14 = !{i32 4, !"SV_Coverage", i32 7, i32 5}
-!15 = !{void (float, i32, i32*)* @main, !"main", !16, null, null}
-!16 = !{!17, !20, null}
-!17 = !{!18, !19}
-!18 = !{i32 0, !"B", i8 13, i8 0, !10, i8 1, i32 1, i8 1, i32 0, i8 0, null}
-!19 = !{i32 1, !"C", i8 5, i8 0, !10, i8 1, i32 1, i8 1, i32 0, i8 1, null}
-!20 = !{!21}
-!21 = !{i32 0, !"SV_Coverage", i8 5, i8 14, !10, i8 0, i32 1, i8 1, i32 -1, i8 -1, null}
diff --git a/tools/clang/test/HLSL/dxil_validation/IntegerDepth.ll b/tools/clang/test/HLSL/dxil_validation/IntegerDepth.ll
deleted file mode 100644
index 174052bf1..000000000
--- a/tools/clang/test/HLSL/dxil_validation/IntegerDepth.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Invalid interpolation mode for 'C'
-; CHECK: SV_Depth must be float
-; CHECK: External function 'dxil.op.loadInput.f32' is not a DXIL function
-; CHECK: External function 'dx.op.loadInput.f32' is unused
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-; Function Attrs: nounwind
-define void @main.flat(float, float, i32* nocapture readnone) #0 {
-entry:
-  %3 = call float @dxil.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
-  %conv = fptosi float %3 to i32
-  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 %conv)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dxil.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!14}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{i32 1, void (float, float, i32*)* @main.flat, !4}
-!4 = !{!5, !7, !10, !12}
-!5 = !{i32 0, !6, !6}
-!6 = !{}
-!7 = !{i32 0, !8, !9}
-!8 = !{i32 4, !"B", i32 7, i32 13}
-!9 = !{i32 0}
-!10 = !{i32 0, !11, !9}
-!11 = !{i32 4, !"C", i32 7, i32 9}
-!12 = !{i32 1, !13, !9}
-!13 = !{i32 4, !"SV_DEPTH", i32 7, i32 4}
-!14 = !{void (float, float, i32*)* @main.flat, !"", !15, null, null}
-!15 = !{!16, !19, null}
-!16 = !{!17, !18}
-!17 = !{i32 0, !"B", i8 13, i8 0, !9, i8 1, i32 1, i8 1, i32 0, i8 0, null}
-!18 = !{i32 1, !"C", i8 9, i8 0, !9, i8 8, i32 1, i8 1, i32 1, i8 0, null}
-!19 = !{!20}
-!20 = !{i32 0, !"SV_Depth", i8 4, i8 17, !9, i8 0, i32 1, i8 1, i32 -1, i8 -1, null}
diff --git a/tools/clang/test/HLSL/dxil_validation/MultiStream.ll b/tools/clang/test/HLSL/dxil_validation/MultiStream.ll
deleted file mode 100644
index e8bdd02f5..000000000
--- a/tools/clang/test/HLSL/dxil_validation/MultiStream.ll
+++ /dev/null
@@ -1,306 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-;
-; Note: shader requires additional functionality:
-;       SV_RenderTargetArrayIndex or SV_ViewportArrayIndex from any shader feeding rasterizer
-;
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; POSSIZE                  0   xyz         0     NONE   float
-; COLOR                    0   xyzw        1     NONE   float
-;
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; TEXCOORD                 0   xy          0     NONE   float   xyzw
-; COLOR                    0   xyzw        1     NONE   float   xyzw
-; SV_Position              0   xyzw        2      POS   float   xyzw
-; TEXCOORD                 0   xy          3     NONE   float   xyzw
-; COLOR                    0   xyzw        4     NONE   float   xyzw
-; SV_Position              0   xyzw        5      POS   float   xyzw
-; SV_ViewportArrayIndex     0   x           6  VPINDEX    uint   xyzw
-;
-;
-; Pipeline Runtime Information:
-;
-; Geometry Shader
-; InputPrimitive=point
-; OutputTopology=point
-; OutputStreamMask=3
-; OutputPositionPresent=1
-;
-;
-; Input signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; POSSIZE                  0                 linear
-; COLOR                    0                 linear
-; SV_GSInstanceID          0        nointerpolation
-;
-; Output signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; TEXCOORD                 0                 linear
-; COLOR                    0                 linear
-; SV_Position              0          noperspective
-; TEXCOORD                 0                 linear
-; COLOR                    0                 linear
-; SV_Position              0          noperspective
-; SV_ViewportArrayIndex     0        nointerpolation
-;
-; Buffer Definitions:
-;
-; cbuffer b
-; {
-;
-;   struct b
-;   {
-;
-;       float2 invViewportSize;                       ; Offset:    0
-;
-;   } b                                               ; Offset:    0 Size:     8
-;
-; }
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-; b                                 cbuffer      NA          NA     CB0            cb0     1
-;
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%b = type { <2 x float> }
-%struct.VSOutGSIn = type { <3 x float>, <4 x float> }
-%class.PointStream = type { %struct.VSOut }
-%struct.VSOut = type { <2 x float>, <4 x float>, <4 x float> }
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.f32 = type { float, float, float, float }
-
-@b = external constant %b
-@dx.typevar.0 = external addrspace(1) constant %struct.VSOutGSIn
-@dx.typevar.1 = external addrspace(1) constant %class.PointStream
-@dx.typevar.2 = external addrspace(1) constant %struct.VSOut
-@dx.typevar.3 = external addrspace(1) constant %b
-@llvm.used = appending global [5 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.VSOutGSIn addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.PointStream addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.VSOut addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%b addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*), i8* bitcast (%b* @b to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @"\01?main@@YAXY00UVSOutGSIn@@V?$PointStream@UVSOut@@@@1IAAI@Z.flat"([1 x <3 x float>]* nocapture readnone, [1 x <4 x float>]* nocapture readnone, %class.PointStream* nocapture readnone, <2 x float>* nocapture readnone, <4 x float>* nocapture readnone, <4 x float>* nocapture readnone, %class.PointStream* nocapture readnone, <2 x float>* nocapture readnone, <4 x float>* nocapture readnone, <4 x float>* nocapture readnone, i32, i32* nocapture readnone) #0 {
-entry:
-  %12 = tail call i32 @dx.op.gsInstanceID.i32(i32 138)  ; GSInstanceID()
-  %verts.0 = alloca [3 x float], align 4
-  %verts.1 = alloca [3 x float], align 4
-  %13 = getelementptr [3 x float], [3 x float]* %verts.0, i32 0, i32 0
-  %14 = getelementptr [3 x float], [3 x float]* %verts.1, i32 0, i32 0
-  store float -5.000000e-01, float* %13, align 4
-  store float -5.000000e-01, float* %14, align 4
-  %15 = getelementptr [3 x float], [3 x float]* %verts.0, i32 0, i32 1
-  %16 = getelementptr [3 x float], [3 x float]* %verts.1, i32 0, i32 1
-  store float 1.500000e+00, float* %15, align 4
-  store float -5.000000e-01, float* %16, align 4
-  %17 = getelementptr [3 x float], [3 x float]* %verts.0, i32 0, i32 2
-  %18 = getelementptr [3 x float], [3 x float]* %verts.1, i32 0, i32 2
-  store float -5.000000e-01, float* %17, align 4
-  store float 1.500000e+00, float* %18, align 4
-  %19 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 0)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %20 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 0)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %21 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 0)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %22 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 0)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %23 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 0)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %24 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 2, i32 0)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %25 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 3, i32 0)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %rem = urem i32 %12, 3
-  %26 = getelementptr [3 x float], [3 x float]* %verts.0, i32 0, i32 %rem
-  %27 = getelementptr [3 x float], [3 x float]* %verts.1, i32 0, i32 %rem
-  %load30 = load float, float* %26, align 4
-  %load32 = load float, float* %27, align 4
-  %mul.i0 = fmul fast float %load30, %19
-  %mul.i1 = fmul fast float %load32, %19
-  %add.i0 = fadd fast float %mul.i0, %20
-  %add.i1 = fadd fast float %mul.i1, %21
-  %28 = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %29 = tail call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %28, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %30 = extractvalue %dx.types.CBufRet.f32 %29, 0
-  %31 = extractvalue %dx.types.CBufRet.f32 %29, 1
-  %mul.i.i0 = fmul fast float %30, 2.000000e+00
-  %mul.i.i1 = fmul fast float %31, 2.000000e+00
-  %mul1.i.i0 = fmul fast float %mul.i.i0, %add.i0
-  %mul1.i.i1 = fmul fast float %mul.i.i1, %add.i1
-  %sub.i = fadd fast float %mul1.i.i0, -1.000000e+00
-  %sub2.i = fsub fast float 1.000000e+00, %mul1.i.i1
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 0.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 0.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %22)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %23)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 2, float %24)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 3, float %25)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float %sub.i)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float %sub2.i)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 2, float 5.000000e-01)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 3, float 1.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.emitStream(i32 97, i8 0)  ; EmitStream(streamId)
-  %add10 = add nuw nsw i32 %rem, 1
-  %32 = getelementptr [3 x float], [3 x float]* %verts.0, i32 0, i32 %add10
-  %33 = getelementptr [3 x float], [3 x float]* %verts.1, i32 0, i32 %add10
-  %load26 = load float, float* %32, align 4
-  %load28 = load float, float* %33, align 4
-  %mul14.i0 = fmul fast float %load26, %19
-  %mul14.i1 = fmul fast float %load28, %19
-  %add15.i0 = fadd fast float %mul14.i0, %20
-  %add15.i1 = fadd fast float %mul14.i1, %21
-  %mul1.i.31.i0 = fmul fast float %add15.i0, %mul.i.i0
-  %mul1.i.31.i1 = fmul fast float %add15.i1, %mul.i.i1
-  %sub.i.32 = fadd fast float %mul1.i.31.i0, -1.000000e+00
-  %sub2.i.33 = fsub fast float 1.000000e+00, %mul1.i.31.i1
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 2.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 0.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %22)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %23)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 2, float %24)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 3, float %25)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float %sub.i.32)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float %sub2.i.33)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 2, float 5.000000e-01)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 3, float 1.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.emitStream(i32 97, i8 0)  ; EmitStream(streamId)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 0, i8 0, float 2.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 0, i8 1, float 0.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 4, i32 0, i8 0, float %22)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 4, i32 0, i8 1, float %23)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 4, i32 0, i8 2, float %24)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 4, i32 0, i8 3, float %25)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 5, i32 0, i8 0, float %sub.i.32)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 5, i32 0, i8 1, float %sub2.i.33)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 5, i32 0, i8 2, float 5.000000e-01)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 5, i32 0, i8 3, float 1.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.emitStream(i32 97, i8 1)  ; EmitStream(streamId)
-  %add21 = add nuw nsw i32 %rem, 2
-  %34 = getelementptr [3 x float], [3 x float]* %verts.0, i32 0, i32 %add21
-  %35 = getelementptr [3 x float], [3 x float]* %verts.1, i32 0, i32 %add21
-  %load23 = load float, float* %34, align 4
-  %load24 = load float, float* %35, align 4
-  %mul25.i0 = fmul fast float %load23, %19
-  %mul25.i1 = fmul fast float %load24, %19
-  %add26.i0 = fadd fast float %mul25.i0, %20
-  %add26.i1 = fadd fast float %mul25.i1, %21
-  %mul1.i.36.i0 = fmul fast float %add26.i0, %mul.i.i0
-  %mul1.i.36.i1 = fmul fast float %add26.i1, %mul.i.i1
-  %sub.i.37 = fadd fast float %mul1.i.36.i0, -1.000000e+00
-  %sub2.i.38 = fsub fast float 1.000000e+00, %mul1.i.36.i1
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 0.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 2.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %22)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %23)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 2, float %24)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 3, float %25)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float %sub.i.37)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float %sub2.i.38)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 2, float 5.000000e-01)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 3, float 1.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.emitStream(i32 97, i8 0)  ; EmitStream(streamId)
-  tail call void @dx.op.storeOutput.i32(i32 5, i32 6, i32 0, i8 0, i32 2)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.cutStream(i32 98, i8 0)  ; CutStream(streamId)
-  tail call void @dx.op.cutStream(i32 98, i8 1)  ; CutStream(streamId)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.gsInstanceID.i32(i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.cutStream(i32, i8) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.emitStream(i32, i8) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!6, !17}
-!dx.entryPoints = !{!40}
-
-!0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 4}
-!2 = !{!"gs", i32 6, i32 0}
-!3 = !{null, null, !4, null}
-!4 = !{!5}
-!5 = !{i32 0, %b* @b, !"b", i32 0, i32 0, i32 1, i32 8, null}
-!6 = !{i32 0, %struct.VSOutGSIn addrspace(1)* @dx.typevar.0, !7, %class.PointStream addrspace(1)* @dx.typevar.1, !10, %struct.VSOut addrspace(1)* @dx.typevar.2, !12, %b addrspace(1)* @dx.typevar.3, !15}
-!7 = !{i32 32, !8, !9}
-!8 = !{i32 3, i32 0, i32 4, !"POSSIZE", i32 6, !"posSize", i32 7, i32 9}
-!9 = !{i32 3, i32 16, i32 4, !"COLOR", i32 6, !"clr", i32 7, i32 9}
-!10 = !{i32 48, !11}
-!11 = !{i32 3, i32 0, i32 6, !"h"}
-!12 = !{i32 48, !13, !9, !14}
-!13 = !{i32 3, i32 0, i32 4, !"TEXCOORD0", i32 6, !"uv", i32 7, i32 9}
-!14 = !{i32 3, i32 32, i32 4, !"SV_Position", i32 6, !"pos", i32 7, i32 9}
-!15 = !{i32 0, !16}
-!16 = !{i32 3, i32 0, i32 6, !"invViewportSize", i32 7, i32 9}
-!17 = !{i32 1, void ([1 x <3 x float>]*, [1 x <4 x float>]*, %class.PointStream*, <2 x float>*, <4 x float>*, <4 x float>*, %class.PointStream*, <2 x float>*, <4 x float>*, <4 x float>*, i32, i32*)* @"\01?main@@YAXY00UVSOutGSIn@@V?$PointStream@UVSOut@@@@1IAAI@Z.flat", !18}
-!18 = !{!19, !21, !24, !26, !27, !29, !30, !32, !33, !34, !35, !36, !38}
-!19 = !{i32 0, !20, !20}
-!20 = !{}
-!21 = !{i32 0, !22, !23}
-!22 = !{i32 4, !"POSSIZE", i32 7, i32 9}
-!23 = !{i32 0}
-!24 = !{i32 0, !25, !23}
-!25 = !{i32 4, !"COLOR", i32 7, i32 9}
-!26 = !{i32 5, !20, !20}
-!27 = !{i32 5, !28, !23}
-!28 = !{i32 4, !"TEXCOORD0", i32 7, i32 9}
-!29 = !{i32 5, !25, !23}
-!30 = !{i32 5, !31, !23}
-!31 = !{i32 4, !"SV_Position", i32 7, i32 9}
-!32 = !{i32 6, !20, !20}
-!33 = !{i32 6, !28, !23}
-!34 = !{i32 6, !25, !23}
-!35 = !{i32 6, !31, !23}
-!36 = !{i32 0, !37, !23}
-!37 = !{i32 4, !"SV_GSInstanceID", i32 7, i32 5}
-!38 = !{i32 1, !39, !23}
-!39 = !{i32 4, !"SV_ViewportArrayIndex", i32 7, i32 5}
-!40 = !{void ([1 x <3 x float>]*, [1 x <4 x float>]*, %class.PointStream*, <2 x float>*, <4 x float>*, <4 x float>*, %class.PointStream*, <2 x float>*, <4 x float>*, <4 x float>*, i32, i32*)* @"\01?main@@YAXY00UVSOutGSIn@@V?$PointStream@UVSOut@@@@1IAAI@Z.flat", !"", !41, !3, !54}
-!41 = !{!42, !46, null}
-!42 = !{!43, !44, !45}
-!43 = !{i32 0, !"POSSIZE", i8 9, !23, i8 2, i32 1, i8 3, i32 0, i8 0, null}
-!44 = !{i32 1, !"COLOR", i8 9, !23, i8 2, i32 1, i8 4, i32 1, i8 0, null}
-!45 = !{i32 2, !"SV_GSInstanceID", i8 5, !23, i8 1, i32 1, i8 1, i32 2, i8 0, null}
-!46 = !{!47, !44, !48, !49, !51, !52, !53}
-!47 = !{i32 0, !"TEXCOORD", i8 9, !23, i8 2, i32 1, i8 2, i32 0, i8 0, null}
-!48 = !{i32 2, !"SV_Position", i8 9, !23, i8 4, i32 1, i8 4, i32 2, i8 0, null}
-!49 = !{i32 3, !"TEXCOORD", i8 9, !23, i8 2, i32 1, i8 2, i32 3, i8 0, !50}
-!50 = !{i32 0, i32 1}
-!51 = !{i32 4, !"COLOR", i8 9, !23, i8 2, i32 1, i8 4, i32 4, i8 0, !50}
-!52 = !{i32 5, !"SV_Position", i8 9, !23, i8 4, i32 1, i8 4, i32 5, i8 0, !50}
-!53 = !{i32 6, !"SV_ViewportArrayIndex", i8 5, !23, i8 1, i32 1, i8 1, i32 6, i8 0, null}
-!54 = !{i32 0, i64 512, i32 1, !55}
-!55 = !{i32 1, i32 3, i32 3, i32 1, i32 24}
-
diff --git a/tools/clang/test/HLSL/dxil_validation/SimpleDs1.ll b/tools/clang/test/HLSL/dxil_validation/SimpleDs1.ll
deleted file mode 100644
index 6ca01dfb2..000000000
--- a/tools/clang/test/HLSL/dxil_validation/SimpleDs1.ll
+++ /dev/null
@@ -1,196 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: DS input control point count must be [0..32].  36 specified
-; CHECK: Invalid Tessellator Domain specified. Must be isoline, tri or quad
-; CHECK: DomainLocation component index out of bounds for the domain.
-; CHECK: DomainLocation component index out of bounds for the domain.
-; CHECK: DomainLocation component index out of bounds for the domain.
-
-
-
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%struct.PSSceneIn = type { <4 x float>, <2 x float>, <3 x float> }
-%struct.HSPerVertexData = type { %struct.PSSceneIn }
-%struct.HSPerPatchData = type { [3 x float], float }
-%class.OutputPatch = type { [3 x %struct.HSPerVertexData] }
-
-@dx.typevar.0 = external addrspace(1) constant %struct.PSSceneIn
-@dx.typevar.1 = external addrspace(1) constant %struct.HSPerVertexData
-@dx.typevar.2 = external addrspace(1) constant %struct.HSPerPatchData
-@dx.typevar.3 = external addrspace(1) constant %class.OutputPatch
-@llvm.used = appending global [4 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.HSPerVertexData addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.HSPerPatchData addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.OutputPatch addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.PSSceneIn addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat(<3 x float>, [3 x <4 x float>]* nocapture readnone, [3 x <2 x float>]* nocapture readnone, [3 x <3 x float>]* nocapture readnone, [3 x float]* nocapture readnone, float* nocapture readnone, <4 x float>* nocapture readnone, <2 x float>* nocapture readnone, <3 x float>* nocapture readnone) #0 {
-entry:
-  %9 = tail call float @dx.op.domainLocation.f32(i32 108, i8 0)
-  %10 = tail call float @dx.op.domainLocation.f32(i32 108, i8 1)
-  %11 = tail call float @dx.op.domainLocation.f32(i32 108, i8 2)
-  %12 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 0)
-  %13 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 0)
-  %14 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 0)
-  %15 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 0)
-  %mul.i0 = fmul fast float %12, %9
-  %mul.i1 = fmul fast float %13, %9
-  %mul.i2 = fmul fast float %14, %9
-  %mul.i3 = fmul fast float %15, %9
-  %16 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 1)
-  %17 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 1)
-  %18 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 1)
-  %19 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 1)
-  %mul5.i0 = fmul fast float %16, %10
-  %mul5.i1 = fmul fast float %17, %10
-  %mul5.i2 = fmul fast float %18, %10
-  %mul5.i3 = fmul fast float %19, %10
-  %add.i0 = fadd fast float %mul5.i0, %mul.i0
-  %add.i1 = fadd fast float %mul5.i1, %mul.i1
-  %add.i2 = fadd fast float %mul5.i2, %mul.i2
-  %add.i3 = fadd fast float %mul5.i3, %mul.i3
-  %20 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 2)
-  %21 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 2)
-  %22 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 2)
-  %23 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 2)
-  %mul10.i0 = fmul fast float %20, %11
-  %mul10.i1 = fmul fast float %21, %11
-  %mul10.i2 = fmul fast float %22, %11
-  %mul10.i3 = fmul fast float %23, %11
-  %add11.i0 = fadd fast float %add.i0, %mul10.i0
-  %add11.i1 = fadd fast float %add.i1, %mul10.i1
-  %add11.i2 = fadd fast float %add.i2, %mul10.i2
-  %add11.i3 = fadd fast float %add.i3, %mul10.i3
-  %24 = tail call float @dx.op.loadPatchConstant.f32(i32 107, i32 0, i32 1, i8 0)
-  %add14.i0 = fadd fast float %add11.i0, %24
-  %add14.i1 = fadd fast float %add11.i1, %24
-  %add14.i2 = fadd fast float %add11.i2, %24
-  %add14.i3 = fadd fast float %add11.i3, %24
-  %25 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 0)
-  %26 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 0)
-  %mul19.i0 = fmul fast float %25, %9
-  %mul19.i1 = fmul fast float %26, %9
-  %27 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 1)
-  %28 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 1)
-  %mul24.i0 = fmul fast float %27, %10
-  %mul24.i1 = fmul fast float %28, %10
-  %add25.i0 = fadd fast float %mul24.i0, %mul19.i0
-  %add25.i1 = fadd fast float %mul24.i1, %mul19.i1
-  %29 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 2)
-  %30 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 2)
-  %mul30.i0 = fmul fast float %29, %11
-  %mul30.i1 = fmul fast float %30, %11
-  %add31.i0 = fadd fast float %add25.i0, %mul30.i0
-  %add31.i1 = fadd fast float %add25.i1, %mul30.i1
-  %31 = tail call float @dx.op.loadPatchConstant.f32(i32 107, i32 0, i32 0, i8 0)
-  %add36.i0 = fadd fast float %add31.i0, %31
-  %add36.i1 = fadd fast float %add31.i1, %31
-  %32 = tail call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 0)
-  %33 = tail call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 1, i32 0)
-  %34 = tail call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 2, i32 0)
-  %mul41.i0 = fmul fast float %32, %9
-  %mul41.i1 = fmul fast float %33, %9
-  %mul41.i2 = fmul fast float %34, %9
-  %35 = tail call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 1)
-  %36 = tail call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 1, i32 1)
-  %37 = tail call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 2, i32 1)
-  %mul46.i0 = fmul fast float %35, %10
-  %mul46.i1 = fmul fast float %36, %10
-  %mul46.i2 = fmul fast float %37, %10
-  %add47.i0 = fadd fast float %mul46.i0, %mul41.i0
-  %add47.i1 = fadd fast float %mul46.i1, %mul41.i1
-  %add47.i2 = fadd fast float %mul46.i2, %mul41.i2
-  %38 = tail call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 2)
-  %39 = tail call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 1, i32 2)
-  %40 = tail call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 2, i32 2)
-  %mul52.i0 = fmul fast float %38, %11
-  %mul52.i1 = fmul fast float %39, %11
-  %mul52.i2 = fmul fast float %40, %11
-  %add53.i0 = fadd fast float %add47.i0, %mul52.i0
-  %add53.i1 = fadd fast float %add47.i1, %mul52.i1
-  %add53.i2 = fadd fast float %add47.i2, %mul52.i2
-  %41 = tail call float @dx.op.loadPatchConstant.f32(i32 107, i32 1, i32 0, i8 0)
-  %add56.i0 = fadd fast float %add53.i0, %41
-  %add56.i1 = fadd fast float %add53.i1, %41
-  %add56.i2 = fadd fast float %add53.i2, %41
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %add14.i0)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %add14.i1)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %add14.i2)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %add14.i3)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %add36.i0)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %add36.i1)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float %add56.i0)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float %add56.i1)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 2, float %add56.i2)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.domainLocation.f32(i32, i8) #1
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadPatchConstant.f32(i32, i32, i32, i8) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3, !15}
-!dx.entryPoints = !{!36}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ds", i32 6, i32 0}
-!3 = !{i32 0, %struct.PSSceneIn addrspace(1)* @dx.typevar.0, !4, %struct.HSPerVertexData addrspace(1)* @dx.typevar.1, !8, %struct.HSPerPatchData addrspace(1)* @dx.typevar.2, !10, %class.OutputPatch addrspace(1)* @dx.typevar.3, !13}
-!4 = !{i32 44, !5, !6, !7}
-!5 = !{i32 3, i32 0, i32 4, !"SV_Position", i32 6, !"pos", i32 7, i32 9}
-!6 = !{i32 3, i32 16, i32 4, !"TEXCOORD0", i32 6, !"tex", i32 7, i32 9}
-!7 = !{i32 3, i32 32, i32 4, !"NORMAL", i32 6, !"norm", i32 7, i32 9}
-!8 = !{i32 44, !9}
-!9 = !{i32 3, i32 0, i32 6, !"v"}
-!10 = !{i32 40, !11, !12}
-!11 = !{i32 3, i32 0, i32 4, !"SV_TessFactor", i32 6, !"edges", i32 7, i32 9}
-!12 = !{i32 3, i32 36, i32 4, !"SV_InsideTessFactor", i32 6, !"inside", i32 7, i32 9}
-!13 = !{i32 140, !14}
-!14 = !{i32 3, i32 0, i32 6, !"h"}
-!15 = !{i32 1, void (<3 x float>, [3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, [3 x float]*, float*, <4 x float>*, <2 x float>*, <3 x float>*)* @main.flat, !16}
-!16 = !{!17, !19, !22, !24, !26, !28, !31, !33, !34, !35}
-!17 = !{i32 0, !18, !18}
-!18 = !{}
-!19 = !{i32 0, !20, !21}
-!20 = !{i32 4, !"SV_DomainLocation", i32 7, i32 9}
-!21 = !{i32 0}
-!22 = !{i32 4, !23, !21}
-!23 = !{i32 4, !"SV_Position", i32 7, i32 9}
-!24 = !{i32 4, !25, !21}
-!25 = !{i32 4, !"TEXCOORD0", i32 7, i32 9}
-!26 = !{i32 4, !27, !21}
-!27 = !{i32 4, !"NORMAL", i32 7, i32 9}
-!28 = !{i32 0, !29, !30}
-!29 = !{i32 4, !"SV_TessFactor", i32 7, i32 9}
-!30 = !{i32 0, i32 1, i32 2}
-!31 = !{i32 0, !32, !21}
-!32 = !{i32 4, !"SV_InsideTessFactor", i32 7, i32 9}
-!33 = !{i32 1, !23, !21}
-!34 = !{i32 1, !25, !21}
-!35 = !{i32 1, !27, !21}
-!36 = !{void (<3 x float>, [3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, [3 x float]*, float*, <4 x float>*, <2 x float>*, <3 x float>*)* @main.flat, !"", !37, null, !46}
-!37 = !{!38, !38, !42}
-!38 = !{!39, !40, !41}
-!39 = !{i32 0, !"SV_Position", i8 9, i8 3, !21, i8 4, i32 1, i8 4, i32 0, i8 0, null}
-!40 = !{i32 1, !"TEXCOORD", i8 9, i8 0, !21, i8 2, i32 1, i8 2, i32 1, i8 0, null}
-!41 = !{i32 2, !"NORMAL", i8 9, i8 0, !21, i8 2, i32 1, i8 3, i32 2, i8 0, null}
-!42 = !{!44, !45}
-!44 = !{i32 0, !"SV_TessFactor", i8 9, i8 25, !30, i8 0, i32 3, i8 1, i32 0, i8 0, null}
-!45 = !{i32 1, !"SV_InsideTessFactor", i8 9, i8 26, !21, i8 0, i32 1, i8 1, i32 3, i8 0, null}
-!46 = !{i32 2, !47}
-!47 = !{i32 4, i32 36}
-
diff --git a/tools/clang/test/HLSL/dxil_validation/SimpleGs1.ll b/tools/clang/test/HLSL/dxil_validation/SimpleGs1.ll
deleted file mode 100644
index 14f469b25..000000000
--- a/tools/clang/test/HLSL/dxil_validation/SimpleGs1.ll
+++ /dev/null
@@ -1,205 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: GS output vertex count must be [0..1024].  1025 specified
-; CHECK: GS instance count must be [1..32].  33 specified
-; CHECK: GS output primitive topology unrecognized
-; CHECK: GS input primitive unrecognized
-; CHECK: Stream index (5) must between 0 and 3
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%b = type { <2 x float> }
-%struct.VSOutGSIn = type { <3 x float>, <4 x float> }
-%class.TriangleStream = type { %struct.VSOut }
-%struct.VSOut = type { <2 x float>, <4 x float>, <4 x float>, i32 }
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.f32 = type { float, float, float, float }
-
-@b = external constant %b
-@dx.typevar.0 = external addrspace(1) constant %struct.VSOutGSIn
-@dx.typevar.1 = external addrspace(1) constant %class.TriangleStream
-@dx.typevar.2 = external addrspace(1) constant %struct.VSOut
-@dx.typevar.3 = external addrspace(1) constant %b
-@llvm.used = appending global [6 x i8*] [i8* bitcast (%b* @b to i8*), i8* bitcast (%b* @b to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.VSOutGSIn addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.TriangleStream addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.VSOut addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%b addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat([1 x <3 x float>]* nocapture readnone, [1 x <4 x float>]* nocapture readnone, %class.TriangleStream* nocapture readnone, <2 x float>* nocapture readnone, <4 x float>* nocapture readnone, <4 x float>* nocapture readnone, i32* nocapture readnone) #0 {
-entry:
-  %verts.0 = alloca [3 x float], align 4
-  %verts.1 = alloca [3 x float], align 4
-  %7 = getelementptr [3 x float], [3 x float]* %verts.0, i32 0, i32 0
-  %8 = getelementptr [3 x float], [3 x float]* %verts.1, i32 0, i32 0
-  store float -5.000000e-01, float* %7, align 4
-  store float -5.000000e-01, float* %8, align 4
-  %9 = getelementptr [3 x float], [3 x float]* %verts.0, i32 0, i32 1
-  %10 = getelementptr [3 x float], [3 x float]* %verts.1, i32 0, i32 1
-  store float 1.500000e+00, float* %9, align 4
-  store float -5.000000e-01, float* %10, align 4
-  %11 = getelementptr [3 x float], [3 x float]* %verts.0, i32 0, i32 2
-  %12 = getelementptr [3 x float], [3 x float]* %verts.1, i32 0, i32 2
-  store float -5.000000e-01, float* %11, align 4
-  store float 1.500000e+00, float* %12, align 4
-  %13 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 0)
-  %14 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 0)
-  %15 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 0)
-  %16 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 0)
-  %17 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 0)
-  %18 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 2, i32 0)
-  %19 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 3, i32 0)
-  %load30 = load float, float* %7, align 4
-  %load32 = load float, float* %8, align 4
-  %mul.i0 = fmul fast float %load30, %13
-  %mul.i1 = fmul fast float %load32, %13
-  %add.i0 = fadd fast float %mul.i0, %14
-  %add.i1 = fadd fast float %mul.i1, %15
-  %20 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 0, i1 false)
-  %21 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %20, i32 0)
-  %22 = extractvalue %dx.types.CBufRet.f32 %21, 0
-  %23 = extractvalue %dx.types.CBufRet.f32 %21, 1
-  %mul.i.i0 = fmul fast float %22, 2.000000e+00
-  %mul.i.i1 = fmul fast float %23, 2.000000e+00
-  %mul1.i.i0 = fmul fast float %mul.i.i0, %add.i0
-  %mul1.i.i1 = fmul fast float %mul.i.i1, %add.i1
-  %sub.i = fadd fast float %mul1.i.i0, -1.000000e+00
-  %sub2.i = fsub fast float 1.000000e+00, %mul1.i.i1
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 0.000000e+00)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 0.000000e+00)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %16)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %17)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 2, float %18)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 3, float %19)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float %sub.i)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float %sub2.i)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 2, float 5.000000e-01)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 3, float 1.000000e+00)
-  call void @dx.op.storeOutput.i32(i32 5, i32 3, i32 0, i8 0, i32 0)
-  call void @dx.op.emitStream(i32 97, i8 0)
-  %load26 = load float, float* %9, align 4
-  %load28 = load float, float* %10, align 4
-  %mul12.i0 = fmul fast float %load26, %13
-  %mul12.i1 = fmul fast float %load28, %13
-  %add13.i0 = fadd fast float %mul12.i0, %14
-  %add13.i1 = fadd fast float %mul12.i1, %15
-  %mul1.i.29.i0 = fmul fast float %add13.i0, %mul.i.i0
-  %mul1.i.29.i1 = fmul fast float %add13.i1, %mul.i.i1
-  %sub.i.30 = fadd fast float %mul1.i.29.i0, -1.000000e+00
-  %sub2.i.31 = fsub fast float 1.000000e+00, %mul1.i.29.i1
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 2.000000e+00)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 0.000000e+00)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %16)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %17)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 2, float %18)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 3, float %19)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float %sub.i.30)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float %sub2.i.31)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 2, float 5.000000e-01)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 3, float 1.000000e+00)
-  call void @dx.op.storeOutput.i32(i32 5, i32 3, i32 0, i8 0, i32 2)
-  call void @dx.op.emitStream(i32 97, i8 0)
-  %load23 = load float, float* %11, align 4
-  %load24 = load float, float* %12, align 4
-  %mul22.i0 = fmul fast float %load23, %13
-  %mul22.i1 = fmul fast float %load24, %13
-  %add23.i0 = fadd fast float %mul22.i0, %14
-  %add23.i1 = fadd fast float %mul22.i1, %15
-  %mul1.i.34.i0 = fmul fast float %add23.i0, %mul.i.i0
-  %mul1.i.34.i1 = fmul fast float %add23.i1, %mul.i.i1
-  %sub.i.35 = fadd fast float %mul1.i.34.i0, -1.000000e+00
-  %sub2.i.36 = fsub fast float 1.000000e+00, %mul1.i.34.i1
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 0.000000e+00)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 2.000000e+00)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %16)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %17)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 2, float %18)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 3, float %19)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float %sub.i.35)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float %sub2.i.36)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 2, float 5.000000e-01)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 3, float 1.000000e+00)
-  call void @dx.op.storeOutput.i32(i32 5, i32 3, i32 0, i8 0, i32 1)
-  call void @dx.op.emitStream(i32 97, i8 0)
-  call void @dx.op.cutStream(i32 98, i8 0)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.cutStream(i32, i8) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.emitStream(i32, i8) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!6, !18}
-!dx.entryPoints = !{!35}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"gs", i32 6, i32 0}
-!3 = !{null, null, !4, null}
-!4 = !{!5}
-!5 = !{i32 0, %b* @b, !"b", i32 0, i32 0, i32 1, i32 8, null}
-!6 = !{i32 0, %struct.VSOutGSIn addrspace(1)* @dx.typevar.0, !7, %class.TriangleStream addrspace(1)* @dx.typevar.1, !10, %struct.VSOut addrspace(1)* @dx.typevar.2, !12, %b addrspace(1)* @dx.typevar.3, !16}
-!7 = !{i32 32, !8, !9}
-!8 = !{i32 3, i32 0, i32 4, !"POSSIZE", i32 6, !"posSize", i32 7, i32 9}
-!9 = !{i32 3, i32 16, i32 4, !"COLOR", i32 6, !"clr", i32 7, i32 9}
-!10 = !{i32 52, !11}
-!11 = !{i32 3, i32 0, i32 6, !"h"}
-!12 = !{i32 52, !13, !9, !14, !15}
-!13 = !{i32 3, i32 0, i32 4, !"TEXCOORD0", i32 6, !"uv", i32 7, i32 9}
-!14 = !{i32 3, i32 32, i32 4, !"SV_Position", i32 6, !"pos", i32 7, i32 9}
-!15 = !{i32 3, i32 48, i32 4, !"SV_RenderTargetArrayIndex", i32 6, !"index", i32 7, i32 5}
-!16 = !{i32 0, !17}
-!17 = !{i32 3, i32 0, i32 6, !"invViewportSize", i32 7, i32 9}
-!18 = !{i32 1, void ([1 x <3 x float>]*, [1 x <4 x float>]*, %class.TriangleStream*, <2 x float>*, <4 x float>*, <4 x float>*, i32*)* @main.flat, !19}
-!19 = !{!20, !22, !25, !27, !28, !30, !31, !33}
-!20 = !{i32 0, !21, !21}
-!21 = !{}
-!22 = !{i32 0, !23, !24}
-!23 = !{i32 4, !"POSSIZE", i32 7, i32 9}
-!24 = !{i32 0}
-!25 = !{i32 0, !26, !24}
-!26 = !{i32 4, !"COLOR", i32 7, i32 9}
-!27 = !{i32 5, !21, !21}
-!28 = !{i32 5, !29, !24}
-!29 = !{i32 4, !"TEXCOORD0", i32 7, i32 9}
-!30 = !{i32 5, !26, !24}
-!31 = !{i32 5, !32, !24}
-!32 = !{i32 4, !"SV_Position", i32 7, i32 9}
-!33 = !{i32 5, !34, !24}
-!34 = !{i32 4, !"SV_RenderTargetArrayIndex", i32 7, i32 5}
-!35 = !{void ([1 x <3 x float>]*, [1 x <4 x float>]*, %class.TriangleStream*, <2 x float>*, <4 x float>*, <4 x float>*, i32*)* @main.flat, !"", !36, !3, !44}
-!36 = !{!37, !40, null}
-!37 = !{!38, !39}
-!38 = !{i32 0, !"POSSIZE", i8 9, i8 0, !24, i8 2, i32 1, i8 3, i32 0, i8 0, null}
-!39 = !{i32 1, !"COLOR", i8 9, i8 0, !24, i8 2, i32 1, i8 4, i32 1, i8 0, null}
-!40 = !{!41, !39, !42, !43}
-!41 = !{i32 0, !"TEXCOORD", i8 9, i8 0, !24, i8 2, i32 1, i8 2, i32 0, i8 0, !50}
-!42 = !{i32 2, !"SV_Position", i8 9, i8 3, !24, i8 4, i32 1, i8 4, i32 2, i8 0, null}
-!43 = !{i32 3, !"SV_RenderTargetArrayIndex", i8 5, i8 4, !24, i8 1, i32 1, i8 1, i32 3, i8 0, null}
-!44 = !{i32 0, i64 512, i32 1, !45}
-!45 = !{i32 5, i32 1025, i32 1, i32 0, i32 33}
-;!45 = !{i32 1, i32 3, i32 1, i32 4, i32 1}
-
-!50 = !{i32 0, i32 5}
\ No newline at end of file
diff --git a/tools/clang/test/HLSL/dxil_validation/SimpleHs1.ll b/tools/clang/test/HLSL/dxil_validation/SimpleHs1.ll
deleted file mode 100644
index dd480a99a..000000000
--- a/tools/clang/test/HLSL/dxil_validation/SimpleHs1.ll
+++ /dev/null
@@ -1,130 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: For pass thru hull shader, input control point count must match output control point count
-; CHECK: Total number of scalars across all HS output control points must not exceed
-; CHECK: Required TessFactor for domain not found declared anywhere in Patch Constant data
-; CHECK: Required TessFactor for domain not found declared anywhere in Patch Constant data
-
-
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%class.Texture2D = type { <4 x float>, %"class.Texture2D<vector<float, 4> >::mips_type" }
-%"class.Texture2D<vector<float, 4> >::mips_type" = type { i32 }
-%struct.PSSceneIn = type { <4 x float>, <2 x float>, <3 x float> }
-%struct.VSSceneIn = type { <3 x float>, <3 x float>, <2 x float> }
-%struct.HSPerPatchData = type { [3 x float], float }
-%class.InputPatch = type { [3 x %struct.PSSceneIn] }
-%struct.HSPerVertexData = type { %struct.PSSceneIn }
-
-@dx.typevar.0 = external addrspace(1) constant %class.Texture2D
-@dx.typevar.1 = external addrspace(1) constant %"class.Texture2D<vector<float, 4> >::mips_type"
-@dx.typevar.2 = external addrspace(1) constant %struct.PSSceneIn
-@dx.typevar.3 = external addrspace(1) constant %struct.VSSceneIn
-@dx.typevar.4 = external addrspace(1) constant %struct.HSPerPatchData
-@dx.typevar.5 = external addrspace(1) constant %class.InputPatch
-@dx.typevar.6 = external addrspace(1) constant %struct.HSPerVertexData
-@llvm.used = appending global [7 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%class.Texture2D addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.PSSceneIn addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.VSSceneIn addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.HSPerPatchData addrspace(1)* @dx.typevar.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.InputPatch addrspace(1)* @dx.typevar.5 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.HSPerVertexData addrspace(1)* @dx.typevar.6 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$02@@@Z.flat"([3 x <4 x float>]* nocapture readnone, [3 x <2 x float>]* nocapture readnone, [3 x <3 x float>]* nocapture readnone, [3 x float]* nocapture readnone, float* nocapture readnone) #0 {
-entry:
-  %retval.0 = alloca [3 x float], align 4
-  %arrayidx3 = getelementptr inbounds [3 x float], [3 x float]* %retval.0, i32 0, i32 0
-  store float 1.000000e+00, float* %arrayidx3, align 4, !tbaa !62
-  %arrayidx22 = getelementptr inbounds [3 x float], [3 x float]* %retval.0, i32 0, i32 1
-  store float 1.000000e+00, float* %arrayidx22, align 4, !tbaa !62
-  %arrayidx41 = getelementptr inbounds [3 x float], [3 x float]* %retval.0, i32 0, i32 2
-  store float 1.000000e+00, float* %arrayidx41, align 4, !tbaa !62
-  %load = load [3 x float], [3 x float]* %retval.0, align 4
-  %5 = extractvalue [3 x float] %load, 0
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 0, i8 0, float %5)
-  %6 = extractvalue [3 x float] %load, 1
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 1, i8 0, float %6)
-  %7 = extractvalue [3 x float] %load, 2
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 2, i8 0, float %7)
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 1, i32 0, i8 0, float 1.000000e+00)
-  ret void
-}
-
-
-; Function Attrs: nounwind
-declare void @dx.op.storePatchConstant.f32(i32, i32, i32, i8, float) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3, !24}
-!dx.entryPoints = !{!46}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"hs", i32 6, i32 0}
-!3 = !{i32 0, %class.Texture2D addrspace(1)* @dx.typevar.0, !4, %"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1, !7, %struct.PSSceneIn addrspace(1)* @dx.typevar.2, !9, %struct.VSSceneIn addrspace(1)* @dx.typevar.3, !13, %struct.HSPerPatchData addrspace(1)* @dx.typevar.4, !17, %class.InputPatch addrspace(1)* @dx.typevar.5, !20, %struct.HSPerVertexData addrspace(1)* @dx.typevar.6, !22}
-!4 = !{i32 20, !5, !6}
-!5 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!6 = !{i32 3, i32 16, i32 6, !"mips"}
-!7 = !{i32 4, !8}
-!8 = !{i32 3, i32 0, i32 6, !"handle", i32 7, i32 5}
-!9 = !{i32 44, !10, !11, !12}
-!10 = !{i32 3, i32 0, i32 4, !"SV_Position", i32 6, !"pos", i32 7, i32 9}
-!11 = !{i32 3, i32 16, i32 4, !"TEXCOORD0", i32 6, !"tex", i32 7, i32 9}
-!12 = !{i32 3, i32 32, i32 4, !"NORMAL", i32 6, !"norm", i32 7, i32 9}
-!13 = !{i32 40, !14, !15, !16}
-!14 = !{i32 3, i32 0, i32 4, !"POSITION", i32 6, !"pos", i32 7, i32 9}
-!15 = !{i32 3, i32 16, i32 4, !"NORMAL", i32 6, !"norm", i32 7, i32 9}
-!16 = !{i32 3, i32 32, i32 4, !"TEXCOORD0", i32 6, !"tex", i32 7, i32 9}
-!17 = !{i32 40, !18, !19}
-!18 = !{i32 3, i32 0, i32 4, !"SV_TessFactor", i32 6, !"edges", i32 7, i32 9}
-!19 = !{i32 3, i32 36, i32 4, !"SV_InsideTessFactor", i32 6, !"inside", i32 7, i32 9}
-!20 = !{i32 140, !21}
-!21 = !{i32 3, i32 0, i32 6, !"h"}
-!22 = !{i32 44, !23}
-!23 = !{i32 3, i32 0, i32 6, !"v"}
-!24 = !{i32 1, void ([3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, [3 x float]*, float*)* @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$02@@@Z.flat", !40}
-!25 = !{!26, !28, !31, !33, !35, !37, !38, !39}
-!26 = !{i32 0, !27, !27}
-!27 = !{}
-!28 = !{i32 0, !29, !30}
-!29 = !{i32 4, !"SV_OutputControlPointID", i32 7, i32 5}
-!30 = !{i32 0}
-!31 = !{i32 3, !32, !30}
-!32 = !{i32 4, !"SV_Position", i32 7, i32 9}
-!33 = !{i32 3, !34, !30}
-!34 = !{i32 4, !"TEXCOORD0", i32 7, i32 9}
-!35 = !{i32 3, !36, !30}
-!36 = !{i32 4, !"NORMAL", i32 7, i32 9}
-!37 = !{i32 1, !32, !30}
-!38 = !{i32 1, !34, !30}
-!39 = !{i32 1, !36, !30}
-!40 = !{!26, !31, !33, !35, !41, !44}
-!41 = !{i32 1, !42, !43}
-!42 = !{i32 4, !"SV_TessFactor", i32 7, i32 9}
-!43 = !{i32 0, i32 1, i32 2}
-!44 = !{i32 1, !45, !30}
-!45 = !{i32 4, !"SV_InsideTessFactor", i32 7, i32 9}
-!46 = !{null, !"", !47, null, !60}
-!47 = !{!48, !53, !57}
-!48 = !{!50, !51, !52}
-!50 = !{i32 0, !"SV_Position", i8 9, i8 3, !30, i8 4, i32 1, i8 4, i32 0, i8 0, null}
-!51 = !{i32 1, !"TEXCOORD", i8 9, i8 0, !30, i8 2, i32 1, i8 2, i32 1, i8 0, null}
-!52 = !{i32 2, !"NORMAL", i8 9, i8 0, !30, i8 2, i32 1, i8 3, i32 2, i8 0, null}
-!53 = !{!54, !55, !56, !66}
-!54 = !{i32 0, !"SV_Position", i8 9, i8 3, !30, i8 4, i32 1, i8 4, i32 0, i8 0, null}
-!55 = !{i32 1, !"TEXCOORD", i8 9, i8 0, !30, i8 2, i32 1, i8 2, i32 1, i8 0, null}
-!56 = !{i32 2, !"NORMAL", i8 9, i8 0, !30, i8 2, i32 1, i8 3, i32 2, i8 0, null}
-!57 = !{!58, !59}
-!58 = !{i32 0, !"TessFactor", i8 9, i8 0, !43, i8 0, i32 3, i8 1, i32 0, i8 0, null}
-!59 = !{i32 1, !"InsideTessFactor", i8 9, i8 0, !30, i8 0, i32 1, i8 1, i32 3, i8 0, null}
-!60 = !{i32 3, !61}
-!61 = !{void ([3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, [3 x float]*, float*)* @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$02@@@Z.flat", i32 3, i32 2000, i32 2, i32 3, i32 3, float 6.400000e+01}
-!62 = !{!63, !63, i64 0}
-!63 = !{!"float", !64, i64 0}
-!64 = !{!"omnipotent char", !65, i64 0}
-!65 = !{!"Simple C/C++ TBAA"}
-!66 = !{i32 3, !"COLOR", i8 9, i8 0, !30, i8 2, i32 1, i8 3, i32 3, i8 0, null}
-
diff --git a/tools/clang/test/HLSL/dxil_validation/SimpleHs3.ll b/tools/clang/test/HLSL/dxil_validation/SimpleHs3.ll
deleted file mode 100644
index 641fdbd8e..000000000
--- a/tools/clang/test/HLSL/dxil_validation/SimpleHs3.ll
+++ /dev/null
@@ -1,167 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain
-
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%class.Texture2D = type { <4 x float>, %"class.Texture2D<vector<float, 4> >::mips_type" }
-%"class.Texture2D<vector<float, 4> >::mips_type" = type { i32 }
-%struct.PSSceneIn = type { <4 x float>, <2 x float>, <3 x float> }
-%struct.VSSceneIn = type { <3 x float>, <3 x float>, <2 x float> }
-%struct.HSPerPatchData = type { [3 x float], float }
-%class.InputPatch = type { [3 x %struct.PSSceneIn] }
-%class.OutputPatch = type { [3 x %struct.PSSceneIn] }
-%struct.HSPerVertexData = type { %struct.PSSceneIn }
-
-@dx.typevar.0 = external addrspace(1) constant %class.Texture2D
-@dx.typevar.1 = external addrspace(1) constant %"class.Texture2D<vector<float, 4> >::mips_type"
-@dx.typevar.2 = external addrspace(1) constant %struct.PSSceneIn
-@dx.typevar.3 = external addrspace(1) constant %struct.VSSceneIn
-@dx.typevar.4 = external addrspace(1) constant %struct.HSPerPatchData
-@dx.typevar.5 = external addrspace(1) constant %class.InputPatch
-@dx.typevar.6 = external addrspace(1) constant %class.OutputPatch
-@dx.typevar.7 = external addrspace(1) constant %struct.HSPerVertexData
-@llvm.used = appending global [8 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%class.Texture2D addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.PSSceneIn addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.VSSceneIn addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.HSPerPatchData addrspace(1)* @dx.typevar.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.InputPatch addrspace(1)* @dx.typevar.5 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.OutputPatch addrspace(1)* @dx.typevar.6 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.HSPerVertexData addrspace(1)* @dx.typevar.7 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$02@@V?$OutputPatch@UPSSceneIn@@$02@@@Z.flat"([3 x <4 x float>]* nocapture readnone, [3 x <2 x float>]* nocapture readnone, [3 x <3 x float>]* nocapture readnone, [3 x <4 x float>]* nocapture readnone, [3 x <2 x float>]* nocapture readnone, [3 x <3 x float>]* nocapture readnone, [3 x float]* nocapture readnone, float* nocapture readnone) #0 {
-entry:
-  %retval.0 = alloca [3 x float], align 4
-  %8 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 0)
-  %arrayidx3 = getelementptr inbounds [3 x float], [3 x float]* %retval.0, i32 0, i32 0
-  store float %8, float* %arrayidx3, align 4, !tbaa !65
-  %9 = call float @dx.op.loadOutputControlPoint.f32(i32 106, i32 1, i32 0, i8 1, i32 1)
-  %arrayidx32 = getelementptr inbounds [3 x float], [3 x float]* %retval.0, i32 0, i32 1
-  store float %9, float* %arrayidx32, align 4, !tbaa !65
-  %arrayidx51 = getelementptr inbounds [3 x float], [3 x float]* %retval.0, i32 0, i32 2
-  store float 1.000000e+00, float* %arrayidx51, align 4, !tbaa !65
-  %load = load [3 x float], [3 x float]* %retval.0, align 4
-  %10 = extractvalue [3 x float] %load, 0
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 0, i8 0, float %10)
-  %11 = extractvalue [3 x float] %load, 1
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 1, i8 0, float %11)
-  %12 = extractvalue [3 x float] %load, 2
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 2, i8 0, float %12)
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 1, i32 0, i8 0, float 1.000000e+00)
-  ret void
-}
-
-; Function Attrs: nounwind
-define void @main.flat(i32, [3 x <4 x float>]* nocapture readnone, [3 x <2 x float>]* nocapture readnone, [3 x <3 x float>]* nocapture readnone, <4 x float>* nocapture readnone, <2 x float>* nocapture readnone, <3 x float>* nocapture readnone) #0 {
-entry:
-  %7 = call i32 @dx.op.outputControlPointID.i32(i32 110)
-  %8 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 %7)
-  %9 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 %7)
-  %10 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 %7)
-  %11 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 %7)
-  %12 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 %7)
-  %13 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 %7)
-  %14 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 %7)
-  %15 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 1, i32 %7)
-  %16 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 2, i32 %7)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %8)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %9)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %10)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %11)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %12)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %13)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float %14)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float %15)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 2, float %16)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.outputControlPointID.i32(i32) #1
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.storePatchConstant.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadOutputControlPoint.f32(i32, i32, i32, i8, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3, !24}
-!dx.entryPoints = !{!49}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"hs", i32 6, i32 0}
-!3 = !{i32 0, %class.Texture2D addrspace(1)* @dx.typevar.0, !4, %"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1, !7, %struct.PSSceneIn addrspace(1)* @dx.typevar.2, !9, %struct.VSSceneIn addrspace(1)* @dx.typevar.3, !13, %struct.HSPerPatchData addrspace(1)* @dx.typevar.4, !17, %class.InputPatch addrspace(1)* @dx.typevar.5, !20, %class.OutputPatch addrspace(1)* @dx.typevar.6, !20, %struct.HSPerVertexData addrspace(1)* @dx.typevar.7, !22}
-!4 = !{i32 20, !5, !6}
-!5 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!6 = !{i32 3, i32 16, i32 6, !"mips"}
-!7 = !{i32 4, !8}
-!8 = !{i32 3, i32 0, i32 6, !"handle", i32 7, i32 5}
-!9 = !{i32 44, !10, !11, !12}
-!10 = !{i32 3, i32 0, i32 4, !"SV_Position", i32 6, !"pos", i32 7, i32 9}
-!11 = !{i32 3, i32 16, i32 4, !"TEXCOORD0", i32 6, !"tex", i32 7, i32 9}
-!12 = !{i32 3, i32 32, i32 4, !"NORMAL", i32 6, !"norm", i32 7, i32 9}
-!13 = !{i32 40, !14, !15, !16}
-!14 = !{i32 3, i32 0, i32 4, !"POSITION", i32 6, !"pos", i32 7, i32 9}
-!15 = !{i32 3, i32 16, i32 4, !"NORMAL", i32 6, !"norm", i32 7, i32 9}
-!16 = !{i32 3, i32 32, i32 4, !"TEXCOORD0", i32 6, !"tex", i32 7, i32 9}
-!17 = !{i32 40, !18, !19}
-!18 = !{i32 3, i32 0, i32 4, !"SV_TessFactor", i32 6, !"edges", i32 7, i32 9}
-!19 = !{i32 3, i32 36, i32 4, !"SV_InsideTessFactor", i32 6, !"inside", i32 7, i32 9}
-!20 = !{i32 140, !21}
-!21 = !{i32 3, i32 0, i32 6, !"h"}
-!22 = !{i32 44, !23}
-!23 = !{i32 3, i32 0, i32 6, !"v"}
-!24 = !{i32 1, void (i32, [3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, <4 x float>*, <2 x float>*, <3 x float>*)* @main.flat, !25, void ([3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, [3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, [3 x float]*, float*)* @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$02@@V?$OutputPatch@UPSSceneIn@@$02@@@Z.flat", !40}
-!25 = !{!26, !28, !31, !33, !35, !37, !38, !39}
-!26 = !{i32 0, !27, !27}
-!27 = !{}
-!28 = !{i32 0, !29, !30}
-!29 = !{i32 4, !"SV_OutputControlPointID", i32 7, i32 5}
-!30 = !{i32 0}
-!31 = !{i32 3, !32, !30}
-!32 = !{i32 4, !"SV_Position", i32 7, i32 9}
-!33 = !{i32 3, !34, !30}
-!34 = !{i32 4, !"TEXCOORD0", i32 7, i32 9}
-!35 = !{i32 3, !36, !30}
-!36 = !{i32 4, !"NORMAL", i32 7, i32 9}
-!37 = !{i32 1, !32, !30}
-!38 = !{i32 1, !34, !30}
-!39 = !{i32 1, !36, !30}
-!40 = !{!26, !31, !33, !35, !41, !42, !43, !44, !47}
-!41 = !{i32 4, !32, !30}
-!42 = !{i32 4, !34, !30}
-!43 = !{i32 4, !36, !30}
-!44 = !{i32 1, !45, !46}
-!45 = !{i32 4, !"SV_TessFactor", i32 7, i32 9}
-!46 = !{i32 0, i32 1, i32 2}
-!47 = !{i32 1, !48, !30}
-!48 = !{i32 4, !"SV_InsideTessFactor", i32 7, i32 9}
-!49 = !{void (i32, [3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, <4 x float>*, <2 x float>*, <3 x float>*)* @main.flat, !"", !50, null, !63}
-!50 = !{!51, !56, !60}
-!51 = !{!53, !54, !55}
-!53 = !{i32 0, !"SV_Position", i8 9, i8 3, !30, i8 4, i32 1, i8 4, i32 0, i8 0, null}
-!54 = !{i32 1, !"TEXCOORD", i8 9, i8 0, !30, i8 2, i32 1, i8 2, i32 1, i8 0, null}
-!55 = !{i32 2, !"NORMAL", i8 9, i8 0, !30, i8 2, i32 1, i8 3, i32 2, i8 0, null}
-!56 = !{!57, !58, !59}
-!57 = !{i32 0, !"SV_Position", i8 9, i8 3, !30, i8 4, i32 1, i8 4, i32 0, i8 0, null}
-!58 = !{i32 1, !"TEXCOORD", i8 9, i8 0, !30, i8 2, i32 1, i8 2, i32 1, i8 0, null}
-!59 = !{i32 2, !"NORMAL", i8 9, i8 0, !30, i8 2, i32 1, i8 3, i32 2, i8 0, null}
-!60 = !{!61, !62}
-!61 = !{i32 0, !"SV_TessFactor", i8 9, i8 25, !46, i8 0, i32 3, i8 1, i32 0, i8 0, null}
-!62 = !{i32 1, !"SV_InsideTessFactor", i8 9, i8 26, !30, i8 0, i32 1, i8 1, i32 3, i8 0, null}
-!63 = !{i32 3, !64}
-!64 = !{void ([3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, [3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, [3 x float]*, float*)* @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$02@@V?$OutputPatch@UPSSceneIn@@$02@@@Z.flat", i32 3, i32 3, i32 2, i32 3, i32 2, float 6.400000e+01}
-!65 = !{!66, !66, i64 0}
-!66 = !{!"float", !67, i64 0}
-!67 = !{!"omnipotent char", !68, i64 0}
-!68 = !{!"Simple C/C++ TBAA"}
diff --git a/tools/clang/test/HLSL/dxil_validation/SimpleHs4.ll b/tools/clang/test/HLSL/dxil_validation/SimpleHs4.ll
deleted file mode 100644
index d349258ba..000000000
--- a/tools/clang/test/HLSL/dxil_validation/SimpleHs4.ll
+++ /dev/null
@@ -1,225 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Hull Shader declared with IsoLine Domain must specify output primitive point or line. Triangle_cw or triangle_ccw output are not compatible with the IsoLine Domain.
-
-
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; SV_Position              0   xyzw        0      POS   float
-; TEXCOORD                 0   xy          1     NONE   float
-; NORMAL                   0   xyz         2     NONE   float
-;
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; SV_Position              0   xyzw        0      POS   float   xyzw
-; TEXCOORD                 0   xy          1     NONE   float   xyzw
-; NORMAL                   0   xyz         2     NONE   float   xyzw
-;
-;
-; Patch Constant signature signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; SV_TessFactor            0   x           0  LINEDEN   float   xyzw
-; SV_TessFactor            1   x           1  LINEDET   float   xyzw
-;
-;
-; Pipeline Runtime Information:
-;
-; Hull Shader
-; InputControlPointCount=2
-; OutputControlPointCount=2
-; Domain=isoline
-; OutputPrimitive=line
-;
-;
-; Input signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; SV_Position              0          noperspective
-; TEXCOORD                 0                 linear
-; NORMAL                   0                 linear
-;
-; Output signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; SV_Position              0          noperspective
-; TEXCOORD                 0                 linear
-; NORMAL                   0                 linear
-;
-; Patch Constant signature signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; SV_TessFactor            0
-;
-; Buffer Definitions:
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-;
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%class.Texture2D = type { <4 x float>, %"class.Texture2D<vector<float, 4> >::mips_type" }
-%"class.Texture2D<vector<float, 4> >::mips_type" = type { i32 }
-%struct.PSSceneIn = type { <4 x float>, <2 x float>, <3 x float> }
-%struct.VSSceneIn = type { <3 x float>, <3 x float>, <2 x float> }
-%struct.HSPerPatchData = type { [2 x float] }
-%struct.HSPerVertexData = type { %struct.PSSceneIn }
-%class.InputPatch = type { [2 x %struct.PSSceneIn] }
-%class.OutputPatch = type { [2 x %struct.HSPerVertexData] }
-
-@dx.typevar.0 = external addrspace(1) constant %class.Texture2D
-@dx.typevar.1 = external addrspace(1) constant %"class.Texture2D<vector<float, 4> >::mips_type"
-@dx.typevar.2 = external addrspace(1) constant %struct.PSSceneIn
-@dx.typevar.3 = external addrspace(1) constant %struct.VSSceneIn
-@dx.typevar.4 = external addrspace(1) constant %struct.HSPerPatchData
-@dx.typevar.5 = external addrspace(1) constant %struct.HSPerVertexData
-@dx.typevar.6 = external addrspace(1) constant %class.InputPatch
-@dx.typevar.7 = external addrspace(1) constant %class.OutputPatch
-@llvm.used = appending global [8 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%class.Texture2D addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.PSSceneIn addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.VSSceneIn addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.HSPerPatchData addrspace(1)* @dx.typevar.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.HSPerVertexData addrspace(1)* @dx.typevar.5 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.InputPatch addrspace(1)* @dx.typevar.6 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.OutputPatch addrspace(1)* @dx.typevar.7 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$01@@V?$OutputPatch@UHSPerVertexData@@$01@@@Z.flat"([2 x <4 x float>]* nocapture readnone, [2 x <2 x float>]* nocapture readnone, [2 x <3 x float>]* nocapture readnone, [2 x <4 x float>]* nocapture readnone, [2 x <2 x float>]* nocapture readnone, [2 x <3 x float>]* nocapture readnone, [2 x float]* nocapture readnone) #0 {
-entry:
-  %retval.0 = alloca [2 x float], align 4
-  %7 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 0)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %arrayidx2 = getelementptr inbounds [2 x float], [2 x float]* %retval.0, i32 0, i32 0
-  store float %7, float* %arrayidx2, align 4, !tbaa !61
-  %8 = call float @dx.op.loadOutputControlPoint.f32(i32 106, i32 1, i32 0, i8 0, i32 1)  ; LoadOutputControlPoint(inputSigId,row,col,index)
-  %arrayidx31 = getelementptr inbounds [2 x float], [2 x float]* %retval.0, i32 0, i32 1
-  store float %8, float* %arrayidx31, align 4, !tbaa !61
-  %load = load [2 x float], [2 x float]* %retval.0, align 4
-  %9 = extractvalue [2 x float] %load, 0
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 0, i8 0, float %9)  ; StorePatchConstant(outputSigID,row,col,value)
-  %10 = extractvalue [2 x float] %load, 1
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 1, i8 0, float %10)  ; StorePatchConstant(outputSigID,row,col,value)
-  ret void
-}
-
-; Function Attrs: nounwind
-define void @main.flat(i32, [2 x <4 x float>]* nocapture readnone, [2 x <2 x float>]* nocapture readnone, [2 x <3 x float>]* nocapture readnone, <4 x float>* nocapture readnone, <2 x float>* nocapture readnone, <3 x float>* nocapture readnone) #0 {
-entry:
-  %7 = call i32 @dx.op.outputControlPointID.i32(i32 110)  ; OutputControlPointID()
-  %8 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 %7)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %9 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 %7)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %10 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 %7)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %11 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 %7)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %12 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 %7)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %13 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 %7)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %14 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 %7)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %15 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 1, i32 %7)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %16 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 2, i32 %7)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %8)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %9)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %10)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %11)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %12)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %13)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float %14)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float %15)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 2, float %16)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.outputControlPointID.i32(i32) #1
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.storePatchConstant.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadOutputControlPoint.f32(i32, i32, i32, i8, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3, !23}
-!dx.entryPoints = !{!46}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"hs", i32 6, i32 0}
-!3 = !{i32 0, %class.Texture2D addrspace(1)* @dx.typevar.0, !4, %"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1, !7, %struct.PSSceneIn addrspace(1)* @dx.typevar.2, !9, %struct.VSSceneIn addrspace(1)* @dx.typevar.3, !13, %struct.HSPerPatchData addrspace(1)* @dx.typevar.4, !17, %struct.HSPerVertexData addrspace(1)* @dx.typevar.5, !19, %class.InputPatch addrspace(1)* @dx.typevar.6, !21, %class.OutputPatch addrspace(1)* @dx.typevar.7, !21}
-!4 = !{i32 20, !5, !6}
-!5 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!6 = !{i32 3, i32 16, i32 6, !"mips"}
-!7 = !{i32 4, !8}
-!8 = !{i32 3, i32 0, i32 6, !"handle", i32 7, i32 5}
-!9 = !{i32 44, !10, !11, !12}
-!10 = !{i32 3, i32 0, i32 4, !"SV_Position", i32 6, !"pos", i32 7, i32 9}
-!11 = !{i32 3, i32 16, i32 4, !"TEXCOORD0", i32 6, !"tex", i32 7, i32 9}
-!12 = !{i32 3, i32 32, i32 4, !"NORMAL", i32 6, !"norm", i32 7, i32 9}
-!13 = !{i32 40, !14, !15, !16}
-!14 = !{i32 3, i32 0, i32 4, !"POSITION", i32 6, !"pos", i32 7, i32 9}
-!15 = !{i32 3, i32 16, i32 4, !"NORMAL", i32 6, !"norm", i32 7, i32 9}
-!16 = !{i32 3, i32 32, i32 4, !"TEXCOORD0", i32 6, !"tex", i32 7, i32 9}
-!17 = !{i32 20, !18}
-!18 = !{i32 3, i32 0, i32 4, !"SV_TessFactor", i32 6, !"edges", i32 7, i32 9}
-!19 = !{i32 44, !20}
-!20 = !{i32 3, i32 0, i32 6, !"v"}
-!21 = !{i32 92, !22}
-!22 = !{i32 3, i32 0, i32 6, !"h"}
-!23 = !{i32 1, void ([2 x <4 x float>]*, [2 x <2 x float>]*, [2 x <3 x float>]*, [2 x <4 x float>]*, [2 x <2 x float>]*, [2 x <3 x float>]*, [2 x float]*)* @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$01@@V?$OutputPatch@UHSPerVertexData@@$01@@@Z.flat", !24, void (i32, [2 x <4 x float>]*, [2 x <2 x float>]*, [2 x <3 x float>]*, <4 x float>*, <2 x float>*, <3 x float>*)* @main.flat, !40}
-!24 = !{!25, !27, !30, !32, !34, !35, !36, !37}
-!25 = !{i32 0, !26, !26}
-!26 = !{}
-!27 = !{i32 3, !28, !29}
-!28 = !{i32 4, !"SV_Position", i32 7, i32 9}
-!29 = !{i32 0}
-!30 = !{i32 3, !31, !29}
-!31 = !{i32 4, !"TEXCOORD0", i32 7, i32 9}
-!32 = !{i32 3, !33, !29}
-!33 = !{i32 4, !"NORMAL", i32 7, i32 9}
-!34 = !{i32 4, !28, !29}
-!35 = !{i32 4, !31, !29}
-!36 = !{i32 4, !33, !29}
-!37 = !{i32 1, !38, !39}
-!38 = !{i32 4, !"SV_TessFactor", i32 7, i32 9}
-!39 = !{i32 0, i32 1}
-!40 = !{!25, !41, !27, !30, !32, !43, !44, !45}
-!41 = !{i32 0, !42, !29}
-!42 = !{i32 4, !"SV_OutputControlPointID", i32 7, i32 5}
-!43 = !{i32 1, !28, !29}
-!44 = !{i32 1, !31, !29}
-!45 = !{i32 1, !33, !29}
-!46 = !{void (i32, [2 x <4 x float>]*, [2 x <2 x float>]*, [2 x <3 x float>]*, <4 x float>*, <2 x float>*, <3 x float>*)* @main.flat, !"", !47, null, !59}
-!47 = !{!48, !53, !57}
-!48 = !{!50, !51, !52}
-!50 = !{i32 0, !"SV_Position", i8 9, i8 3, !29, i8 4, i32 1, i8 4, i32 0, i8 0, null}
-!51 = !{i32 1, !"TEXCOORD", i8 9, i8 0, !29, i8 2, i32 1, i8 2, i32 1, i8 0, null}
-!52 = !{i32 2, !"NORMAL", i8 9, i8 0, !29, i8 2, i32 1, i8 3, i32 2, i8 0, null}
-!53 = !{!54, !55, !56}
-!54 = !{i32 0, !"SV_Position", i8 9, i8 3, !29, i8 4, i32 1, i8 4, i32 0, i8 0, null}
-!55 = !{i32 1, !"TEXCOORD", i8 9, i8 0, !29, i8 2, i32 1, i8 2, i32 1, i8 0, null}
-!56 = !{i32 2, !"NORMAL", i8 9, i8 0, !29, i8 2, i32 1, i8 3, i32 2, i8 0, null}
-!57 = !{!58}
-!58 = !{i32 0, !"SV_TessFactor", i8 9, i8 25, !39, i8 0, i32 2, i8 1, i32 0, i8 0, null}
-!59 = !{i32 3, !60}
-!60 = !{void ([2 x <4 x float>]*, [2 x <2 x float>]*, [2 x <3 x float>]*, [2 x <4 x float>]*, [2 x <2 x float>]*, [2 x <3 x float>]*, [2 x float]*)* @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$01@@V?$OutputPatch@UHSPerVertexData@@$01@@@Z.flat", i32 2, i32 2, i32 1, i32 3, i32 3, float 6.400000e+01}
-!61 = !{!62, !62, i64 0}
-!62 = !{!"float", !63, i64 0}
-!63 = !{!"omnipotent char", !64, i64 0}
-!64 = !{!"Simple C/C++ TBAA"}
-
diff --git a/tools/clang/test/HLSL/dxil_validation/UndefValue.ll b/tools/clang/test/HLSL/dxil_validation/UndefValue.ll
deleted file mode 100644
index 55576f59e..000000000
--- a/tools/clang/test/HLSL/dxil_validation/UndefValue.ll
+++ /dev/null
@@ -1,92 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Instructions should not read uninitialized value
-
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; B                        0   x           0     NONE unknown
-;
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; SV_Depth                 0   x           0    DEPTH   float   xyzw
-;
-;
-; Pipeline Runtime Information:
-;
-; Pixel Shader
-; DepthOutput=1
-; SampleFrequency=0
-;
-;
-; Input signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; B                        0        nointerpolation
-;
-; Output signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; SV_Depth                 0
-;
-; Buffer Definitions:
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-;
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-; Function Attrs: nounwind
-define void @"\01?main@@YAMM@Z.flat"(float, float* nocapture readnone) #0 {
-entry:
-  %2 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %add = fadd fast float %2, undef
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %add)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!12}
-
-!0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{i32 1, void (float, float*)* @"\01?main@@YAMM@Z.flat", !4}
-!4 = !{!5, !7, !10}
-!5 = !{i32 0, !6, !6}
-!6 = !{}
-!7 = !{i32 0, !8, !9}
-!8 = !{i32 4, !"B", i32 7, i32 13}
-!9 = !{i32 0}
-!10 = !{i32 1, !11, !9}
-!11 = !{i32 4, !"SV_DEPTH", i32 7, i32 9}
-!12 = !{void (float, float*)* @"\01?main@@YAMM@Z.flat", !"", !13, null, null}
-!13 = !{!14, !16, null}
-!14 = !{!15}
-!15 = !{i32 0, !"B", i8 13, i8 0, !9, i8 1, i32 1, i8 1, i32 0, i8 0, null}
-!16 = !{!17}
-!17 = !{i32 0, !"SV_Depth", i8 9, i8 17, !9, i8 0, i32 1, i8 1, i32 0, i8 0, null}
-
diff --git a/tools/clang/test/HLSL/dxil_validation/UpdateCounter.ll b/tools/clang/test/HLSL/dxil_validation/UpdateCounter.ll
deleted file mode 100644
index 5cc029ba3..000000000
--- a/tools/clang/test/HLSL/dxil_validation/UpdateCounter.ll
+++ /dev/null
@@ -1,119 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: BufferUpdateCounter valid only on UAV
-; CHECK: BufferUpdateCounter valid only on structured buffers
-; CHECK: inc of BufferUpdateCounter must be an immediate constant
-; CHECK: RWStructuredBuffers may increment or decrement their counters, but not both.
-
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%class.Buffer = type { <2 x float> }
-%class.RWStructuredBuffer = type { %struct.Foo }
-%struct.Foo = type { <2 x float>, <3 x float>, [4 x <2 x i32>] }
-%dx.types.Handle = type { i8* }
-%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
-
-@"\01?buf1@@3V?$Buffer@V?$vector@M$01@@@@A" = available_externally global %class.Buffer zeroinitializer, align 4
-@"\01?buf2@@3V?$RWStructuredBuffer@UFoo@@@@A" = available_externally global %class.RWStructuredBuffer zeroinitializer, align 4
-@dx.typevar.0 = external addrspace(1) constant %class.Buffer
-@dx.typevar.1 = external addrspace(1) constant %class.RWStructuredBuffer
-@dx.typevar.2 = external addrspace(1) constant %struct.Foo
-@llvm.used = appending global [7 x i8*] [i8* bitcast (%class.RWStructuredBuffer* @"\01?buf2@@3V?$RWStructuredBuffer@UFoo@@@@A" to i8*), i8* bitcast (%class.Buffer* @"\01?buf1@@3V?$Buffer@V?$vector@M$01@@@@A" to i8*), i8* bitcast (%class.Buffer* @"\01?buf1@@3V?$Buffer@V?$vector@M$01@@@@A" to i8*), i8* bitcast (%class.RWStructuredBuffer* @"\01?buf2@@3V?$RWStructuredBuffer@UFoo@@@@A" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.Buffer addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.RWStructuredBuffer addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.Foo addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat(float, float, <4 x float>* nocapture readnone) #0 {
-entry:
-  %buf2_UAV_structbuf = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 0, i1 false)
-  %buf1_texture_buf = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 0, i32 0, i1 false)
-  %3 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)
-  %4 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
-  %5 = call i32 @dx.op.bufferUpdateCounter(i32 71, %dx.types.Handle %buf2_UAV_structbuf, i8 1)
-  call void @dx.op.bufferStore.f32(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i32 %5, i32 0, float %4, float %3, float undef, float undef, i8 3)
-  %6 = call i32 @dx.op.bufferUpdateCounter(i32 71, %dx.types.Handle %buf2_UAV_structbuf, i8 -1)
-  %BufferLoad1 = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %buf2_UAV_structbuf, i32 %6, i32 0)
-  %7 = extractvalue %dx.types.ResRet.f32 %BufferLoad1, 0
-  %8 = extractvalue %dx.types.ResRet.f32 %BufferLoad1, 1
-  %9 = call i32 @dx.op.bufferUpdateCounter(i32 71, %dx.types.Handle %buf1_texture_buf, i8 undef)
-  %BufferLoad = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %buf1_texture_buf, i32 %6, i32 undef)
-  %10 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 0
-  %11 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 1
-  %add.i0 = fadd fast float %10, %7
-  %add.i1 = fadd fast float %11, %8
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %add.i0)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %add.i1)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 0.000000e+00)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 0.000000e+00)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind
-declare i32 @dx.op.bufferUpdateCounter(i32, %dx.types.Handle, i8) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32, %dx.types.Handle, i32, i32) #2
-
-; Function Attrs: nounwind
-declare void @dx.op.bufferStore.f32(i32, %dx.types.Handle, i32, i32, float, float, float, float, i8) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!10, !19}
-!dx.entryPoints = !{!32}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{!4, !7, null, null}
-!4 = !{!5}
-!5 = !{i32 0, %class.Buffer* @"\01?buf1@@3V?$Buffer@V?$vector@M$01@@@@A", !"buf1", i32 0, i32 0, i32 1, i32 10, i32 0, !6}
-!6 = !{i32 0, i32 9}
-!7 = !{!8}
-!8 = !{i32 0, %class.RWStructuredBuffer* @"\01?buf2@@3V?$RWStructuredBuffer@UFoo@@@@A", !"buf2", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !9}
-!9 = !{i32 1, i32 52}
-!10 = !{i32 0, %class.Buffer addrspace(1)* @dx.typevar.0, !11, %class.RWStructuredBuffer addrspace(1)* @dx.typevar.1, !13, %struct.Foo addrspace(1)* @dx.typevar.2, !15}
-!11 = !{i32 8, !12}
-!12 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!13 = !{i32 88, !14}
-!14 = !{i32 3, i32 0, i32 6, !"h"}
-!15 = !{i32 88, !16, !17, !18}
-!16 = !{i32 3, i32 0, i32 6, !"a", i32 7, i32 9}
-!17 = !{i32 3, i32 16, i32 6, !"b", i32 7, i32 9}
-!18 = !{i32 3, i32 32, i32 6, !"c", i32 7, i32 4}
-!19 = !{i32 1, void (float, float, <4 x float>*)* @main.flat, !20}
-!20 = !{!21, !23, !26, !29}
-!21 = !{i32 0, !22, !22}
-!22 = !{}
-!23 = !{i32 0, !24, !25}
-!24 = !{i32 4, !"Idx1", i32 7, i32 9}
-!25 = !{i32 1}
-!26 = !{i32 0, !27, !28}
-!27 = !{i32 4, !"Idx2", i32 7, i32 9}
-!28 = !{i32 2}
-!29 = !{i32 1, !30, !31}
-!30 = !{i32 4, !"SV_Target", i32 7, i32 9}
-!31 = !{i32 0}
-!32 = !{void (float, float, <4 x float>*)* @main.flat, !"", !33, !3, !39}
-!33 = !{!34, !37, null}
-!34 = !{!35, !36}
-!35 = !{i32 0, !"Idx", i8 9, i8 0, !25, i8 2, i32 1, i8 1, i32 0, i8 0, null}
-!36 = !{i32 1, !"Idx", i8 9, i8 0, !28, i8 2, i32 1, i8 1, i32 1, i8 0, null}
-!37 = !{!38}
-!38 = !{i32 0, !"SV_Target", i8 9, i8 16, !31, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-!39 = !{i32 0, i64 8208}
diff --git a/tools/clang/test/HLSL/dxil_validation/barrier.ll b/tools/clang/test/HLSL/dxil_validation/barrier.ll
deleted file mode 100644
index 4d1f85467..000000000
--- a/tools/clang/test/HLSL/dxil_validation/barrier.ll
+++ /dev/null
@@ -1,171 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Internal declaration 'internalGV' is unused
-; CHECK: Vector type '<4 x float>' is not allowed
-; CHECK: External declaration 'dx.typevar.2' is unused
-; CHECK: Mode of Barrier must be an immediate constant
-; CHECK: sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory).  Only _t (thread group sync) is optional.
-; CHECK: sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal.
-
-
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%dx.alignment.legacy.class.RWStructuredBuffer = type { [2 x <2 x float>] }
-%dx.alignment.legacy.class.StructuredBuffer = type { %dx.alignment.legacy.struct.mat }
-%dx.alignment.legacy.struct.mat = type { [2 x <2 x float>] }
-%dx.alignment.legacy.class.StructuredBuffer.0 = type { [2 x <2 x float>] }
-%class.RWStructuredBuffer = type { %class.matrix.float.2.2 }
-%class.matrix.float.2.2 = type { [2 x <2 x float>] }
-%class.StructuredBuffer = type { %struct.mat }
-%struct.mat = type { %class.matrix.float.2.2 }
-%class.StructuredBuffer.0 = type { %class.matrix.float.2.2 }
-%dx.types.Handle = type { i8* }
-%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
-
-@"internalGV" = internal global [64 x <4 x float>] undef
-@"\01?dataC@@3PAV?$matrix@M$01$01@@A.v" = addrspace(3) global [64 x <4 x float>] undef
-@"\01?fA@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A_legacy" = external global %dx.alignment.legacy.class.RWStructuredBuffer
-@"\01?mats@@3V?$StructuredBuffer@Umat@@@@A_legacy" = external global %dx.alignment.legacy.class.StructuredBuffer
-@"\01?mats2@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A_legacy" = external global %dx.alignment.legacy.class.StructuredBuffer.0
-@dx.typevar.0 = external addrspace(1) constant %class.RWStructuredBuffer
-@dx.typevar.1 = external addrspace(1) constant %class.StructuredBuffer
-@dx.typevar.2 = external addrspace(1) constant %struct.mat
-@dx.typevar.3 = external addrspace(1) constant %class.StructuredBuffer.0
-@dx.typevar.4 = external addrspace(1) constant %dx.alignment.legacy.class.RWStructuredBuffer
-@dx.typevar.5 = external addrspace(1) constant %dx.alignment.legacy.struct.mat
-@dx.typevar.6 = external addrspace(1) constant %dx.alignment.legacy.class.StructuredBuffer
-@dx.typevar.7 = external addrspace(1) constant %dx.alignment.legacy.class.StructuredBuffer.0
-@llvm.used = appending global [11 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%dx.alignment.legacy.struct.mat addrspace(1)* @dx.typevar.5 to i8 addrspace(1)*) to i8*), i8* bitcast (%dx.alignment.legacy.class.StructuredBuffer.0* @"\01?mats2@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A_legacy" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.RWStructuredBuffer addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%dx.alignment.legacy.class.StructuredBuffer addrspace(1)* @dx.typevar.6 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.StructuredBuffer addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.StructuredBuffer.0 addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*), i8* bitcast (%dx.alignment.legacy.class.RWStructuredBuffer* @"\01?fA@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A_legacy" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.mat addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* bitcast (%dx.alignment.legacy.class.StructuredBuffer* @"\01?mats@@3V?$StructuredBuffer@Umat@@@@A_legacy" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%dx.alignment.legacy.class.RWStructuredBuffer addrspace(1)* @dx.typevar.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%dx.alignment.legacy.class.StructuredBuffer.0 addrspace(1)* @dx.typevar.7 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: alwaysinline nounwind
-define void @main(<2 x i32> %tid, <2 x i32> %gid, <2 x i32> %gtid, i32 %gidx) #0 {
-entry:
-  %fA_UAV_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 0, i1 false)
-  %mats2_texture_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 1, i32 1, i1 false)
-  %mats_texture_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 0, i32 0, i1 false)
-  %0 = tail call i32 @dx.op.threadId.i32(i32 93, i32 0)
-  %1 = tail call i32 @dx.op.threadId.i32(i32 93, i32 1)
-  %2 = tail call i32 @dx.op.groupId.i32(i32 94, i32 0)
-  %3 = tail call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 1)
-  %4 = tail call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96)
-  %rem = and i32 %0, 63
-  %5 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %rem, i32 0
-  %6 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %rem, i32 1
-  %7 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %rem, i32 2
-  %8 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %rem, i32 3
-  %BufferLoad = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %mats_texture_structbuf, i32 %2, i32 0)
-  %9 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 0
-  %10 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 1
-  %11 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 2
-  %12 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 3
-  %BufferLoad10 = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %mats2_texture_structbuf, i32 %3, i32 0)
-  %13 = extractvalue %dx.types.ResRet.f32 %BufferLoad10, 0
-  %14 = extractvalue %dx.types.ResRet.f32 %BufferLoad10, 1
-  %15 = extractvalue %dx.types.ResRet.f32 %BufferLoad10, 2
-  %16 = extractvalue %dx.types.ResRet.f32 %BufferLoad10, 3
-  %.i0 = fadd fast float %13, %9
-  %.i1 = fadd fast float %14, %11
-  %.i2 = fadd fast float %15, %10
-  %.i3 = fadd fast float %16, %12
-  store float %.i0, float addrspace(3)* %5, align 16
-  store float %.i1, float addrspace(3)* %6, align 4
-  store float %.i2, float addrspace(3)* %7, align 8
-  store float %.i3, float addrspace(3)* %8, align 4
-  tail call void @dx.op.barrier(i32 83, i32 15)
-  tail call void @dx.op.barrier(i32 83, i32 0)
-  %rem3 = and i32 %1, 63
-  %sub = xor i32 %rem3, 63
-  %17 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %sub, i32 0
-  %18 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %sub, i32 1
-  %19 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %sub, i32 2
-  %20 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %sub, i32 3
-  %21 = load float, float addrspace(3)* %17, align 16
-  %22 = load float, float addrspace(3)* %18, align 4
-  %23 = load float, float addrspace(3)* %19, align 8
-  %24 = load float, float addrspace(3)* %20, align 4
-  tail call void @dx.op.barrier(i32 83, i32 10)
-  %add = add i32 %4, 2
-  tail call void @dx.op.bufferStore.f32(i32 70, %dx.types.Handle %fA_UAV_structbuf, i32 %add, i32 0, float %21, float %22, float %23, float %24, i8 15)
-  tail call void @dx.op.barrier(i32 83, i32 %rem)
-  %add6 = add i32 %4, 1
-  %25 = load %struct.mat, %struct.mat addrspace(1)* @dx.typevar.2, align 4
-  tail call void @dx.op.bufferStore.f32(i32 70, %dx.types.Handle %fA_UAV_structbuf, i32 %add6, i32 0, float %21, float %22, float %23, float %24, i8 15)
-  tail call void @dx.op.barrier(i32 83, i32 2)
-  tail call void @dx.op.barrier(i32 83, i32 4)
-  tail call void @dx.op.bufferStore.f32(i32 70, %dx.types.Handle %fA_UAV_structbuf, i32 %4, i32 0, float %21, float %22, float %23, float %24, i8 15)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadId.i32(i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.groupId.i32(i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadIdInGroup.i32(i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.flattenedThreadIdInGroup.i32(i32) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32, %dx.types.Handle, i32, i32) #2
-
-; Function Attrs: nounwind
-declare void @dx.op.barrier(i32, i32) #3
-
-; Function Attrs: nounwind
-declare void @dx.op.bufferStore.f32(i32, %dx.types.Handle, i32, i32, float, float, float, float, i8) #3
-
-attributes #0 = { alwaysinline nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
-attributes #3 = { nounwind }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!10, !19}
-!dx.entryPoints = !{!32}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 5}
-!2 = !{!"cs", i32 6, i32 0}
-!3 = !{!4, !8, null, null}
-!4 = !{!5, !7}
-!5 = !{i32 0, %dx.alignment.legacy.class.StructuredBuffer* @"\01?mats@@3V?$StructuredBuffer@Umat@@@@A_legacy", !"mats", i32 0, i32 0, i32 1, i32 12, i32 0, !6}
-!6 = !{i32 1, i32 16}
-!7 = !{i32 1, %dx.alignment.legacy.class.StructuredBuffer.0* @"\01?mats2@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A_legacy", !"mats2", i32 0, i32 1, i32 1, i32 12, i32 0, !6}
-!8 = !{!9}
-!9 = !{i32 0, %dx.alignment.legacy.class.RWStructuredBuffer* @"\01?fA@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A_legacy", !"fA", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !6}
-!10 = !{i32 0, %class.RWStructuredBuffer addrspace(1)* @dx.typevar.0, !11, %class.StructuredBuffer addrspace(1)* @dx.typevar.1, !14, %struct.mat addrspace(1)* @dx.typevar.2, !16, %class.StructuredBuffer.0 addrspace(1)* @dx.typevar.3, !11, %dx.alignment.legacy.class.RWStructuredBuffer addrspace(1)* @dx.typevar.4, !11, %dx.alignment.legacy.struct.mat addrspace(1)* @dx.typevar.5, !16, %dx.alignment.legacy.class.StructuredBuffer addrspace(1)* @dx.typevar.6, !14, %dx.alignment.legacy.class.StructuredBuffer.0 addrspace(1)* @dx.typevar.7, !11}
-!11 = !{i32 24, !12}
-!12 = !{i32 2, !13, i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!13 = !{i32 2, i32 2, i32 2}
-!14 = !{i32 24, !15}
-!15 = !{i32 3, i32 0, i32 6, !"h"}
-!16 = !{i32 24, !17}
-!17 = !{i32 2, !18, i32 3, i32 0, i32 6, !"f2x2", i32 7, i32 9}
-!18 = !{i32 2, i32 2, i32 1}
-!19 = !{i32 1, void (<2 x i32>, <2 x i32>, <2 x i32>, i32)* @main, !20}
-!20 = !{!21, !23, !26, !28, !30}
-!21 = !{i32 1, !22, !22}
-!22 = !{}
-!23 = !{i32 0, !24, !25}
-!24 = !{i32 4, !"SV_DispatchThreadID", i32 7, i32 5}
-!25 = !{i32 0}
-!26 = !{i32 0, !27, !25}
-!27 = !{i32 4, !"SV_GroupID", i32 7, i32 5}
-!28 = !{i32 0, !29, !25}
-!29 = !{i32 4, !"SV_GroupThreadID", i32 7, i32 5}
-!30 = !{i32 0, !31, !25}
-!31 = !{i32 4, !"SV_GroupIndex", i32 7, i32 5}
-!32 = !{void (<2 x i32>, <2 x i32>, <2 x i32>, i32)* @main, !"", null, !3, !33}
-!33 = !{i32 0, i64 16, i32 4, !34}
-!34 = !{i32 8, i32 8, i32 1}
diff --git a/tools/clang/test/HLSL/dxil_validation/cbuffer1.50.ll b/tools/clang/test/HLSL/dxil_validation/cbuffer1.50.ll
deleted file mode 100644
index b90f78082..000000000
--- a/tools/clang/test/HLSL/dxil_validation/cbuffer1.50.ll
+++ /dev/null
@@ -1,117 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK:Cbuffer access out of bound
-
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no %s
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; SV_Target                0   xyzw        0   TARGET   float   xyzw
-;
-;
-; Pipeline Runtime Information:
-;
-; Pixel Shader
-; DepthOutput=0
-; SampleFrequency=0
-;
-;
-; Output signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; SV_Target                0
-;
-; Buffer Definitions:
-;
-; cbuffer Foo2
-; {
-;
-;   struct Foo2
-;   {
-;
-;       float4 g2;                                    ; Offset:    0
-;
-;   } Foo2                                            ; Offset:    0 Size:    16
-;
-; }
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-; Foo2                              cbuffer      NA          NA     CB0            cb5     1
-;
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%Foo2 = type { <4 x float> }
-%dx.types.Handle = type { i8* }
-
-@Foo2 = external constant %Foo2
-@dx.typevar.0 = external addrspace(1) constant %Foo2
-@llvm.used = appending global [3 x i8*] [i8* bitcast (%Foo2* @Foo2 to i8*), i8* bitcast (%Foo2* @Foo2 to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%Foo2 addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat(<4 x float>* nocapture readnone) #0 {
-entry:
-  %1 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 5, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %2 = call float @dx.op.cbufferLoad.f32(i32 59, %dx.types.Handle %1, i32 0, i32 8)  ; CBufferLoad(handle,byteOffset,alignment)
-  %3 = call float @dx.op.cbufferLoad.f32(i32 59, %dx.types.Handle %1, i32 4, i32 8)  ; CBufferLoad(handle,byteOffset,alignment)
-  %4 = call float @dx.op.cbufferLoad.f32(i32 59, %dx.types.Handle %1, i32 8, i32 8)  ; CBufferLoad(handle,byteOffset,alignment)
-  %5 = call float @dx.op.cbufferLoad.f32(i32 59, %dx.types.Handle %1, i32 16, i32 8)  ; CBufferLoad(handle,byteOffset,alignment)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %2)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %3)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %4)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %5)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  ret void
-}
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.cbufferLoad.f32(i32, %dx.types.Handle, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!6, !9}
-!dx.entryPoints = !{!16}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{null, null, !4, null}
-!4 = !{!5}
-!5 = !{i32 0, %Foo2* @Foo2, !"Foo2", i32 0, i32 5, i32 1, i32 16, null}
-!6 = !{i32 0, %Foo2 addrspace(1)* @dx.typevar.0, !7}
-!7 = !{i32 0, !8}
-!8 = !{i32 3, i32 0, i32 6, !"g2", i32 7, i32 9}
-!9 = !{i32 1, void (<4 x float>*)* @main.flat, !10}
-!10 = !{!11, !13}
-!11 = !{i32 0, !12, !12}
-!12 = !{}
-!13 = !{i32 1, !14, !15}
-!14 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!15 = !{i32 0}
-!16 = !{void (<4 x float>*)* @main.flat, !"", !17, !3, null}
-!17 = !{null, !18, null}
-!18 = !{!19}
-!19 = !{i32 0, !"SV_Target", i8 9, i8 16, !15, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-
diff --git a/tools/clang/test/HLSL/dxil_validation/cbuffer1.50_legacy.ll b/tools/clang/test/HLSL/dxil_validation/cbuffer1.50_legacy.ll
deleted file mode 100644
index e30d47951..000000000
--- a/tools/clang/test/HLSL/dxil_validation/cbuffer1.50_legacy.ll
+++ /dev/null
@@ -1,120 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK:Cbuffer access out of bound
-
-
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no %s
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; SV_Target                0   xyzw        0   TARGET   float   xyzw
-;
-;
-; Pipeline Runtime Information:
-;
-; Pixel Shader
-; DepthOutput=0
-; SampleFrequency=0
-;
-;
-; Output signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; SV_Target                0
-;
-; Buffer Definitions:
-;
-; cbuffer Foo2
-; {
-;
-;   struct Foo2
-;   {
-;
-;       float4 g2;                                    ; Offset:    0
-;
-;   } Foo2                                            ; Offset:    0 Size:    16
-;
-; }
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-; Foo2                              cbuffer      NA          NA     CB0            cb5     1
-;
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%Foo2 = type { <4 x float> }
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.f32 = type { float, float, float, float }
-
-@Foo2 = external constant %Foo2
-@dx.typevar.0 = external addrspace(1) constant %Foo2
-@llvm.used = appending global [3 x i8*] [i8* bitcast (%Foo2* @Foo2 to i8*), i8* bitcast (%Foo2* @Foo2 to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%Foo2 addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat(<4 x float>* nocapture readnone) #0 {
-entry:
-  %1 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 5, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %2 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %1, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
-  %3 = extractvalue %dx.types.CBufRet.f32 %2, 0
-  %4 = extractvalue %dx.types.CBufRet.f32 %2, 1
-  %5 = extractvalue %dx.types.CBufRet.f32 %2, 2
-  %6 = extractvalue %dx.types.CBufRet.f32 %2, 3
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %3)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %4)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %5)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %6)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  ret void
-}
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!6, !9}
-!dx.entryPoints = !{!16}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{null, null, !4, null}
-!4 = !{!5}
-!5 = !{i32 0, %Foo2* @Foo2, !"Foo2", i32 0, i32 5, i32 1, i32 16, null}
-!6 = !{i32 0, %Foo2 addrspace(1)* @dx.typevar.0, !7}
-!7 = !{i32 0, !8}
-!8 = !{i32 3, i32 0, i32 6, !"g2", i32 7, i32 9}
-!9 = !{i32 1, void (<4 x float>*)* @main.flat, !10}
-!10 = !{!11, !13}
-!11 = !{i32 0, !12, !12}
-!12 = !{}
-!13 = !{i32 1, !14, !15}
-!14 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!15 = !{i32 0}
-!16 = !{void (<4 x float>*)* @main.flat, !"", !17, !3, null}
-!17 = !{null, !18, null}
-!18 = !{!19}
-!19 = !{i32 0, !"SV_Target", i8 9, i8 16, !15, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-
diff --git a/tools/clang/test/HLSL/dxil_validation/csThreadSize.ll b/tools/clang/test/HLSL/dxil_validation/csThreadSize.ll
deleted file mode 100644
index 13075371a..000000000
--- a/tools/clang/test/HLSL/dxil_validation/csThreadSize.ll
+++ /dev/null
@@ -1,159 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Declared Thread Group X size 1025 outside valid range [1..1024]
-; CHECK: Declared Thread Group Y size 1025 outside valid range [1..1024]
-; CHECK: Declared Thread Group Z size 1025 outside valid range [1..64]
-; CHECK: Declared Thread Group Count 1076890625 (X*Y*Z) is beyond the valid maximum of 1024
-; CHECK: Total Thread Group Shared Memory storage is 1024000000, exceeded 32768
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%dx.alignment.legacy.class.RWStructuredBuffer = type { [2 x <2 x float>] }
-%dx.alignment.legacy.class.StructuredBuffer = type { %dx.alignment.legacy.struct.mat }
-%dx.alignment.legacy.struct.mat = type { [2 x <2 x float>] }
-%dx.alignment.legacy.class.StructuredBuffer.0 = type { [2 x <2 x float>] }
-%class.RWStructuredBuffer = type { %class.matrix.float.2.2 }
-%class.matrix.float.2.2 = type { [2 x <2 x float>] }
-%class.StructuredBuffer = type { %struct.mat }
-%struct.mat = type { %class.matrix.float.2.2 }
-%class.StructuredBuffer.0 = type { %class.matrix.float.2.2 }
-%dx.types.Handle = type { i8* }
-%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
-
-@"\01?dataC@@3PAV?$matrix@M$01$01@@A.v" = addrspace(3) global [64000000 x <4 x float>] undef
-@"\01?fA@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A_legacy" = external global %dx.alignment.legacy.class.RWStructuredBuffer
-@"\01?mats@@3V?$StructuredBuffer@Umat@@@@A_legacy" = external global %dx.alignment.legacy.class.StructuredBuffer
-@"\01?mats2@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A_legacy" = external global %dx.alignment.legacy.class.StructuredBuffer.0
-@dx.typevar.0 = external addrspace(1) constant %class.RWStructuredBuffer
-@dx.typevar.1 = external addrspace(1) constant %class.StructuredBuffer
-@dx.typevar.2 = external addrspace(1) constant %struct.mat
-@dx.typevar.3 = external addrspace(1) constant %class.StructuredBuffer.0
-@dx.typevar.4 = external addrspace(1) constant %dx.alignment.legacy.class.RWStructuredBuffer
-@dx.typevar.5 = external addrspace(1) constant %dx.alignment.legacy.struct.mat
-@dx.typevar.6 = external addrspace(1) constant %dx.alignment.legacy.class.StructuredBuffer
-@dx.typevar.7 = external addrspace(1) constant %dx.alignment.legacy.class.StructuredBuffer.0
-@llvm.used = appending global [11 x i8*] [i8* bitcast (%dx.alignment.legacy.class.RWStructuredBuffer* @"\01?fA@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A_legacy" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.StructuredBuffer.0 addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*), i8* bitcast (%dx.alignment.legacy.class.StructuredBuffer* @"\01?mats@@3V?$StructuredBuffer@Umat@@@@A_legacy" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%dx.alignment.legacy.class.StructuredBuffer addrspace(1)* @dx.typevar.6 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.RWStructuredBuffer addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%dx.alignment.legacy.class.RWStructuredBuffer addrspace(1)* @dx.typevar.4 to i8 addrspace(1)*) to i8*), i8* bitcast (%dx.alignment.legacy.class.StructuredBuffer.0* @"\01?mats2@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A_legacy" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast 
-(%struct.mat addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.StructuredBuffer addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%dx.alignment.legacy.class.StructuredBuffer.0 addrspace(1)* @dx.typevar.7 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%dx.alignment.legacy.struct.mat addrspace(1)* @dx.typevar.5 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: alwaysinline nounwind
-define void @main(<2 x i32> %tid, <2 x i32> %gid, <2 x i32> %gtid, i32 %gidx) #0 {
-entry:
-  %fA_UAV_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 0, i1 false)
-  %mats2_texture_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 1, i32 0, i1 false)
-  %mats_texture_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 0, i32 0, i1 false)
-  %0 = tail call i32 @dx.op.threadId.i32(i32 93, i32 0)
-  %1 = tail call i32 @dx.op.threadId.i32(i32 93, i32 1)
-  %2 = tail call i32 @dx.op.groupId.i32(i32 94, i32 0)
-  %3 = tail call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 1)
-  %4 = tail call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96)
-  %rem = and i32 %0, 63
-  %5 = getelementptr inbounds [64000000 x <4 x float>], [64000000 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %rem, i32 0
-  %6 = getelementptr inbounds [64000000 x <4 x float>], [64000000 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %rem, i32 1
-  %7 = getelementptr inbounds [64000000 x <4 x float>], [64000000 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %rem, i32 2
-  %8 = getelementptr inbounds [64000000 x <4 x float>], [64000000 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %rem, i32 3
-  %BufferLoad = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %mats_texture_structbuf, i32 %2, i32 0)
-  %9 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 0
-  %10 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 1
-  %11 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 2
-  %12 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 3
-  %BufferLoad7 = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %mats2_texture_structbuf, i32 %3, i32 0)
-  %13 = extractvalue %dx.types.ResRet.f32 %BufferLoad7, 0
-  %14 = extractvalue %dx.types.ResRet.f32 %BufferLoad7, 1
-  %15 = extractvalue %dx.types.ResRet.f32 %BufferLoad7, 2
-  %16 = extractvalue %dx.types.ResRet.f32 %BufferLoad7, 3
-  %.i0 = fadd fast float %13, %9
-  %.i1 = fadd fast float %14, %11
-  %.i2 = fadd fast float %15, %10
-  %.i3 = fadd fast float %16, %12
-  store float %.i0, float addrspace(3)* %5, align 16
-  store float %.i1, float addrspace(3)* %6, align 4
-  store float %.i2, float addrspace(3)* %7, align 8
-  store float %.i3, float addrspace(3)* %8, align 4
-  tail call void @dx.op.barrier(i32 83, i32 9)
-  %rem3 = and i32 %1, 63
-  %sub = xor i32 %rem3, 63
-  %17 = getelementptr inbounds [64000000 x <4 x float>], [64000000 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %sub, i32 0
-  %18 = getelementptr inbounds [64000000 x <4 x float>], [64000000 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %sub, i32 1
-  %19 = getelementptr inbounds [64000000 x <4 x float>], [64000000 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %sub, i32 2
-  %20 = getelementptr inbounds [64000000 x <4 x float>], [64000000 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %sub, i32 3
-  %21 = load float, float addrspace(3)* %17, align 16
-  %22 = load float, float addrspace(3)* %18, align 4
-  %23 = load float, float addrspace(3)* %19, align 8
-  %24 = load float, float addrspace(3)* %20, align 4
-  tail call void @dx.op.bufferStore.f32(i32 70, %dx.types.Handle %fA_UAV_structbuf, i32 %4, i32 0, float %21, float %22, float %23, float %24, i8 15)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadId.i32(i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.groupId.i32(i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadIdInGroup.i32(i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.flattenedThreadIdInGroup.i32(i32) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.bufferStore.f32(i32, %dx.types.Handle, i32, i32, float, float, float, float, i8) #2
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32, %dx.types.Handle, i32, i32) #3
-
-; Function Attrs: nounwind
-declare void @dx.op.barrier(i32, i32) #2
-
-attributes #0 = { alwaysinline nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readonly }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!10, !19}
-!dx.entryPoints = !{!32}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 5}
-!2 = !{!"cs", i32 6, i32 0}
-!3 = !{!4, !8, null, null}
-!4 = !{!5, !7}
-!5 = !{i32 0, %dx.alignment.legacy.class.StructuredBuffer* @"\01?mats@@3V?$StructuredBuffer@Umat@@@@A_legacy", !"mats", i32 0, i32 0, i32 1, i32 12, i32 0, !6}
-!6 = !{i32 1, i32 16}
-!7 = !{i32 1, %dx.alignment.legacy.class.StructuredBuffer.0* @"\01?mats2@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A_legacy", !"mats2", i32 0, i32 1, i32 1, i32 12, i32 0, !6}
-!8 = !{!9}
-!9 = !{i32 0, %dx.alignment.legacy.class.RWStructuredBuffer* @"\01?fA@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A_legacy", !"fA", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !6}
-!10 = !{i32 0, %class.RWStructuredBuffer addrspace(1)* @dx.typevar.0, !11, %class.StructuredBuffer addrspace(1)* @dx.typevar.1, !14, %struct.mat addrspace(1)* @dx.typevar.2, !16, %class.StructuredBuffer.0 addrspace(1)* @dx.typevar.3, !11, %dx.alignment.legacy.class.RWStructuredBuffer addrspace(1)* @dx.typevar.4, !11, %dx.alignment.legacy.struct.mat addrspace(1)* @dx.typevar.5, !16, %dx.alignment.legacy.class.StructuredBuffer addrspace(1)* @dx.typevar.6, !14, %dx.alignment.legacy.class.StructuredBuffer.0 addrspace(1)* @dx.typevar.7, !11}
-!11 = !{i32 24, !12}
-!12 = !{i32 2, !13, i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!13 = !{i32 2, i32 2, i32 2}
-!14 = !{i32 24, !15}
-!15 = !{i32 3, i32 0, i32 6, !"h"}
-!16 = !{i32 24, !17}
-!17 = !{i32 2, !18, i32 3, i32 0, i32 6, !"f2x2", i32 7, i32 9}
-!18 = !{i32 2, i32 2, i32 1}
-!19 = !{i32 1, void (<2 x i32>, <2 x i32>, <2 x i32>, i32)* @main, !20}
-!20 = !{!21, !23, !26, !28, !30}
-!21 = !{i32 1, !22, !22}
-!22 = !{}
-!23 = !{i32 0, !24, !25}
-!24 = !{i32 4, !"SV_DispatchThreadID", i32 7, i32 5}
-!25 = !{i32 0}
-!26 = !{i32 0, !27, !25}
-!27 = !{i32 4, !"SV_GroupID", i32 7, i32 5}
-!28 = !{i32 0, !29, !25}
-!29 = !{i32 4, !"SV_GroupThreadID", i32 7, i32 5}
-!30 = !{i32 0, !31, !25}
-!31 = !{i32 4, !"SV_GroupIndex", i32 7, i32 5}
-!32 = !{void (<2 x i32>, <2 x i32>, <2 x i32>, i32)* @main, !"", null, !3, !33}
-!33 = !{i32 0, i64 16, i32 4, !34}
-!34 = !{i32 1025, i32 1025, i32 1025}
-
diff --git a/tools/clang/test/HLSL/dxil_validation/deadloop.ll b/tools/clang/test/HLSL/dxil_validation/deadloop.ll
deleted file mode 100644
index 1a6bf8341..000000000
--- a/tools/clang/test/HLSL/dxil_validation/deadloop.ll
+++ /dev/null
@@ -1,92 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Named metadata 'dx.unused' is unknown
-; CHECK: Loop must have break
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%"$Globals" = type { i32 }
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
-
-@"\01?i@@3HA" = global i32 0, align 4
-@"$Globals" = external constant %"$Globals"
-@dx.typevar.0 = external addrspace(1) constant %"$Globals"
-@llvm.used = appending global [3 x i8*] [i8* bitcast (%"$Globals"* @"$Globals" to i8*), i8* bitcast (%"$Globals"* @"$Globals" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"$Globals" addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat(<2 x float>, <3 x i32>, float* nocapture readnone) #0 {
-entry:
-  %3 = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0, i32 undef)
-  %4 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
-  %5 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 0, i1 false)
-  %6 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 60, %dx.types.Handle %5, i32 0)
-  %7 = extractvalue %dx.types.CBufRet.i32 %6, 0
-  %cmp = icmp slt i32 %7, %3
-  br i1 %cmp, label %while.body, label %while.end
-
-while.body:                                       ; preds = %while.body, %entry
-  %s.01 = phi float [ %add, %while.body ], [ 0.000000e+00, %entry ]
-  %add = fadd fast float %s.01, %4
-  br label %while.body
-
-while.end:                                        ; preds = %while.body, %entry
-  %s.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %while.body ]
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %s.0.lcssa)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!6, !9}
-!dx.entryPoints = !{!20}
-!dx.unused = !{!20}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{null, null, !4, null}
-!4 = !{!5}
-!5 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 0, i32 1, i32 4, null}
-!6 = !{i32 0, %"$Globals" addrspace(1)* @dx.typevar.0, !7}
-!7 = !{i32 0, !8}
-!8 = !{i32 3, i32 0, i32 6, !"i", i32 7, i32 4}
-!9 = !{i32 1, void (<2 x float>, <3 x i32>, float*)* @main.flat, !10}
-!10 = !{!11, !13, !16, !18}
-!11 = !{i32 0, !12, !12}
-!12 = !{}
-!13 = !{i32 0, !14, !15}
-!14 = !{i32 4, !"A", i32 7, i32 9}
-!15 = !{i32 0}
-!16 = !{i32 0, !17, !15}
-!17 = !{i32 4, !"B", i32 7, i32 4}
-!18 = !{i32 1, !19, !15}
-!19 = !{i32 4, !"SV_Target", i32 7, i32 9}
-!20 = !{void (<2 x float>, <3 x i32>, float*)* @main.flat, !"", !21, !3, null}
-!21 = !{!22, !25, null}
-!22 = !{!23, !24}
-!23 = !{i32 0, !"A", i8 9, i8 0, !15, i8 2, i32 1, i8 2, i32 0, i8 0, null}
-!24 = !{i32 1, !"B", i8 4, i8 0, !15, i8 1, i32 1, i8 3, i32 1, i8 0, null}
-!25 = !{!26}
-!26 = !{i32 0, !"SV_Target", i8 9, i8 16, !15, i8 0, i32 1, i8 1, i32 0, i8 0, null}
diff --git a/tools/clang/test/HLSL/dxil_validation/hsAttribute.ll b/tools/clang/test/HLSL/dxil_validation/hsAttribute.ll
deleted file mode 100644
index 2bd4a1400..000000000
--- a/tools/clang/test/HLSL/dxil_validation/hsAttribute.ll
+++ /dev/null
@@ -1,159 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: HS input control point count must be [1..32].  36 specified
-; CHECK: Invalid Tessellator Domain specified. Must be isoline, tri or quad
-; CHECK: Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even.
-; CHECK: Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW.
-; CHECK: Hull Shader MaxTessFactor must be [1.000000..64.000000].  65.000000 specified
-; CHECK: Invalid Tessellator Domain specified. Must be isoline, tri or quad
-; CHECK: output control point count must be [0..32].  36 specified
-
-
-
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%class.Texture2D = type { <4 x float>, %"class.Texture2D<vector<float, 4> >::mips_type" }
-%"class.Texture2D<vector<float, 4> >::mips_type" = type { i32 }
-%struct.PSSceneIn = type { <4 x float>, <2 x float>, <3 x float> }
-%struct.VSSceneIn = type { <3 x float>, <3 x float>, <2 x float> }
-%struct.HSPerPatchData = type { [3 x float], float }
-%struct.HSPerVertexData = type { %struct.PSSceneIn }
-
-@dx.typevar.0 = external addrspace(1) constant %class.Texture2D
-@dx.typevar.1 = external addrspace(1) constant %"class.Texture2D<vector<float, 4> >::mips_type"
-@dx.typevar.2 = external addrspace(1) constant %struct.PSSceneIn
-@dx.typevar.3 = external addrspace(1) constant %struct.VSSceneIn
-@dx.typevar.4 = external addrspace(1) constant %struct.HSPerPatchData
-@dx.typevar.5 = external addrspace(1) constant %struct.HSPerVertexData
-@llvm.used = appending global [6 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%class.Texture2D addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.PSSceneIn addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.VSSceneIn addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.HSPerPatchData addrspace(1)* @dx.typevar.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.HSPerVertexData addrspace(1)* @dx.typevar.5 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$02@@@Z.flat"([3 x <4 x float>]* nocapture readnone, [3 x <2 x float>]* nocapture readnone, [3 x <3 x float>]* nocapture readnone, [3 x float]* nocapture readnone, float* nocapture readnone) #0 {
-entry:
-  %retval.0 = alloca [3 x float], align 4
-  %arrayidx3 = getelementptr inbounds [3 x float], [3 x float]* %retval.0, i32 0, i32 0
-  store float 1.000000e+00, float* %arrayidx3, align 4, !tbaa !56
-  %arrayidx22 = getelementptr inbounds [3 x float], [3 x float]* %retval.0, i32 0, i32 1
-  store float 1.000000e+00, float* %arrayidx22, align 4, !tbaa !56
-  %arrayidx41 = getelementptr inbounds [3 x float], [3 x float]* %retval.0, i32 0, i32 2
-  store float 1.000000e+00, float* %arrayidx41, align 4, !tbaa !56
-  %load = load [3 x float], [3 x float]* %retval.0, align 4
-  %5 = extractvalue [3 x float] %load, 0
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 0, i8 0, float %5)
-  %6 = extractvalue [3 x float] %load, 1
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 1, i8 0, float %6)
-  %7 = extractvalue [3 x float] %load, 2
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 2, i8 0, float %7)
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 1, i32 0, i8 0, float 1.000000e+00)
-  ret void
-}
-
-; Function Attrs: nounwind
-define void @main.flat(i32, [3 x <4 x float>]* nocapture readnone, [3 x <2 x float>]* nocapture readnone, [3 x <3 x float>]* nocapture readnone, <4 x float>* nocapture readnone, <2 x float>* nocapture readnone, <3 x float>* nocapture readnone) #0 {
-entry:
-  %7 = call i32 @dx.op.outputControlPointID.i32(i32 110)
-  %8 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 %7)
-  %9 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 %7)
-  %10 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 %7)
-  %11 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 %7)
-  %12 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 %7)
-  %13 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 %7)
-  %14 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 %7)
-  %15 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 1, i32 %7)
-  %16 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 2, i32 %7)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %8)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %9)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %10)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %11)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %12)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %13)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float %14)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float %15)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 2, float %16)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.outputControlPointID.i32(i32) #1
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.storePatchConstant.f32(i32, i32, i32, i8, float) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.valver = !{!1}
-!dx.version = !{!2}
-!dx.shaderModel = !{!3}
-!dx.typeAnnotations = !{!4, !23}
-!dx.entryPoints = !{!45}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 1, i32 0}
-!2 = !{i32 0, i32 7}
-!3 = !{!"hs", i32 6, i32 0}
-!4 = !{i32 0, %class.Texture2D addrspace(1)* @dx.typevar.0, !5, %"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1, !8, %struct.PSSceneIn addrspace(1)* @dx.typevar.2, !10, %struct.VSSceneIn addrspace(1)* @dx.typevar.3, !14, %struct.HSPerPatchData addrspace(1)* @dx.typevar.4, !18, %struct.HSPerVertexData addrspace(1)* @dx.typevar.5, !21}
-!5 = !{i32 20, !6, !7}
-!6 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!7 = !{i32 3, i32 16, i32 6, !"mips"}
-!8 = !{i32 4, !9}
-!9 = !{i32 3, i32 0, i32 6, !"handle", i32 7, i32 5}
-!10 = !{i32 44, !11, !12, !13}
-!11 = !{i32 3, i32 0, i32 4, !"SV_Position", i32 6, !"pos", i32 7, i32 9}
-!12 = !{i32 3, i32 16, i32 4, !"TEXCOORD0", i32 6, !"tex", i32 7, i32 9}
-!13 = !{i32 3, i32 32, i32 4, !"NORMAL", i32 6, !"norm", i32 7, i32 9}
-!14 = !{i32 40, !15, !16, !17}
-!15 = !{i32 3, i32 0, i32 4, !"POSITION", i32 6, !"pos", i32 7, i32 9}
-!16 = !{i32 3, i32 16, i32 4, !"NORMAL", i32 6, !"norm", i32 7, i32 9}
-!17 = !{i32 3, i32 32, i32 4, !"TEXCOORD0", i32 6, !"tex", i32 7, i32 9}
-!18 = !{i32 40, !19, !20}
-!19 = !{i32 3, i32 0, i32 4, !"SV_TessFactor", i32 6, !"edges", i32 7, i32 9}
-!20 = !{i32 3, i32 36, i32 4, !"SV_InsideTessFactor", i32 6, !"inside", i32 7, i32 9}
-!21 = !{i32 44, !22}
-!22 = !{i32 3, i32 0, i32 6, !"v"}
-!23 = !{i32 1, void ([3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, [3 x float]*, float*)* @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$02@@@Z.flat", !24, void (i32, [3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, <4 x float>*, <2 x float>*, <3 x float>*)* @main.flat, !39}
-!24 = !{!25, !27, !30, !32, !34, !37}
-!25 = !{i32 0, !26, !26}
-!26 = !{}
-!27 = !{i32 3, !28, !29}
-!28 = !{i32 4, !"SV_Position", i32 7, i32 9}
-!29 = !{i32 0}
-!30 = !{i32 3, !31, !29}
-!31 = !{i32 4, !"TEXCOORD0", i32 7, i32 9}
-!32 = !{i32 3, !33, !29}
-!33 = !{i32 4, !"NORMAL", i32 7, i32 9}
-!34 = !{i32 1, !35, !36}
-!35 = !{i32 4, !"SV_TessFactor", i32 7, i32 9}
-!36 = !{i32 0, i32 1, i32 2}
-!37 = !{i32 1, !38, !29}
-!38 = !{i32 4, !"SV_InsideTessFactor", i32 7, i32 9}
-!39 = !{!25, !40, !27, !30, !32, !42, !43, !44}
-!40 = !{i32 0, !41, !29}
-!41 = !{i32 4, !"SV_OutputControlPointID", i32 7, i32 5}
-!42 = !{i32 1, !28, !29}
-!43 = !{i32 1, !31, !29}
-!44 = !{i32 1, !33, !29}
-!45 = !{void (i32, [3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, <4 x float>*, <2 x float>*, <3 x float>*)* @main.flat, !"", !46, null, !54}
-!46 = !{!47, !47, !51}
-!47 = !{!48, !49, !50}
-!48 = !{i32 0, !"SV_Position", i8 9, i8 3, !29, i8 4, i32 1, i8 4, i32 0, i8 0, null}
-!49 = !{i32 1, !"TEXCOORD", i8 9, i8 0, !29, i8 2, i32 1, i8 2, i32 1, i8 0, null}
-!50 = !{i32 2, !"NORMAL", i8 9, i8 0, !29, i8 2, i32 1, i8 3, i32 2, i8 0, null}
-!51 = !{!52, !53}
-!52 = !{i32 0, !"SV_TessFactor", i8 9, i8 25, !36, i8 0, i32 3, i8 1, i32 0, i8 0, null}
-!53 = !{i32 1, !"SV_InsideTessFactor", i8 9, i8 26, !29, i8 0, i32 1, i8 1, i32 3, i8 0, null}
-!54 = !{i32 3, !55}
-!55 = !{void ([3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, [3 x float]*, float*)* @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$02@@@Z.flat", i32 36, i32 36, i32 0, i32 0, i32 0, float 6.500000e+01}
-!56 = !{!57, !57, i64 0}
-!57 = !{!"float", !58, i64 0}
-!58 = !{!"omnipotent char", !59, i64 0}
-!59 = !{!"Simple C/C++ TBAA"}
diff --git a/tools/clang/test/HLSL/dxil_validation/interpChange.ll b/tools/clang/test/HLSL/dxil_validation/interpChange.ll
deleted file mode 100644
index e890d54ef..000000000
--- a/tools/clang/test/HLSL/dxil_validation/interpChange.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: signature element A at location (0,2) size (1,2) has interpolation mode that differs from another element packed into the same row.
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-; Function Attrs: nounwind
-define void @main.flat(<2 x float>, <2 x float>, <4 x float>* nocapture readnone) #0 {
-entry:
-  %3 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)
-  %4 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 undef)
-  %5 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
-  %6 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %5)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %6)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %3)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %4)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!15}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{i32 1, void (<2 x float>, <2 x float>, <4 x float>*)* @main.flat, !4}
-!4 = !{!5, !7, !10, !13}
-!5 = !{i32 0, !6, !6}
-!6 = !{}
-!7 = !{i32 0, !8, !9}
-!8 = !{i32 4, !"A", i32 7, i32 9}
-!9 = !{i32 0}
-!10 = !{i32 0, !11, !12}
-!11 = !{i32 4, !"A1", i32 5, i32 3, i32 7, i32 9}
-!12 = !{i32 1}
-!13 = !{i32 1, !14, !9}
-!14 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!15 = !{void (<2 x float>, <2 x float>, <4 x float>*)* @main.flat, !"", !16, null, null}
-!16 = !{!17, !20, null}
-!17 = !{!18, !19}
-!18 = !{i32 0, !"A", i8 9, i8 0, !9, i8 2, i32 1, i8 2, i32 0, i8 0, null}
-!19 = !{i32 1, !"A", i8 9, i8 0, !12, i8 3, i32 1, i8 2, i32 0, i8 2, null}
-!20 = !{!21}
-!21 = !{i32 0, !"SV_Target", i8 9, i8 16, !9, i8 0, i32 1, i8 4, i32 0, i8 0, null}
diff --git a/tools/clang/test/HLSL/dxil_validation/interpOnInt.ll b/tools/clang/test/HLSL/dxil_validation/interpOnInt.ll
deleted file mode 100644
index 684190d84..000000000
--- a/tools/clang/test/HLSL/dxil_validation/interpOnInt.ll
+++ /dev/null
@@ -1,76 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK:signature element A specifies invalid interpolation mode for integer component type.
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-; Function Attrs: nounwind
-define void @main.flat(<4 x float>, <4 x i32>, <4 x float>* nocapture readnone) #0 {
-entry:
-  %3 = tail call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0, i32 undef)
-  %4 = tail call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 1, i32 undef)
-  %5 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
-  %6 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
-  %7 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef)
-  %8 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 undef)
-  %conv.i0 = uitofp i32 %4 to float
-  %conv.i1 = uitofp i32 %3 to float
-  %mul.i0 = fmul fast float %5, %conv.i0
-  %mul.i1 = fmul fast float %6, %conv.i1
-  %mul.i2 = fmul fast float %7, %conv.i1
-  %mul.i3 = fmul fast float %8, %conv.i1
-  %FAbs = tail call float @dx.op.unary.f32(i32 6, float %mul.i0)
-  %FAbs1 = tail call float @dx.op.unary.f32(i32 6, float %mul.i1)
-  %FAbs2 = tail call float @dx.op.unary.f32(i32 6, float %mul.i2)
-  %FAbs3 = tail call float @dx.op.unary.f32(i32 6, float %mul.i3)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %FAbs)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %FAbs1)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %FAbs2)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %FAbs3)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!15}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{i32 1, void (<4 x float>, <4 x i32>, <4 x float>*)* @main.flat, !4}
-!4 = !{!5, !7, !10, !13}
-!5 = !{i32 0, !6, !6}
-!6 = !{}
-!7 = !{i32 0, !8, !9}
-!8 = !{i32 4, !"A", i32 7, i32 9}
-!9 = !{i32 0}
-!10 = !{i32 0, !11, !12}
-!11 = !{i32 4, !"A1", i32 5, i32 3, i32 7, i32 5}
-!12 = !{i32 1}
-!13 = !{i32 1, !14, !9}
-!14 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!15 = !{void (<4 x float>, <4 x i32>, <4 x float>*)* @main.flat, !"", !16, null, null}
-!16 = !{!17, !20, null}
-!17 = !{!18, !19}
-!18 = !{i32 0, !"A", i8 9, i8 0, !9, i8 2, i32 1, i8 4, i32 0, i8 0, null}
-!19 = !{i32 1, !"A", i8 5, i8 0, !12, i8 3, i32 1, i8 4, i32 1, i8 0, null}
-!20 = !{!21}
-!21 = !{i32 0, !"SV_Target", i8 9, i8 16, !9, i8 0, i32 1, i8 4, i32 0, i8 0, null}
diff --git a/tools/clang/test/HLSL/dxil_validation/invalidSigCompTy.ll b/tools/clang/test/HLSL/dxil_validation/invalidSigCompTy.ll
deleted file mode 100644
index 89f948786..000000000
--- a/tools/clang/test/HLSL/dxil_validation/invalidSigCompTy.ll
+++ /dev/null
@@ -1,64 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: signature A specifies unrecognized or invalid component type
-
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-; Function Attrs: nounwind
-define void @main.flat(<4 x float>, <4 x float>* nocapture readnone) #0 {
-entry:
-  %2 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
-  %3 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
-  %4 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef)
-  %5 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 undef)
-  %mul.i0 = fmul fast float %3, %2
-  %mul.i2 = fmul fast float %4, %2
-  %mul.i3 = fmul fast float %5, %2
-  %FAbs = tail call float @dx.op.unary.f32(i32 6, float %mul.i0)
-  %FAbs2 = tail call float @dx.op.unary.f32(i32 6, float %mul.i2)
-  %FAbs3 = tail call float @dx.op.unary.f32(i32 6, float %mul.i3)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %FAbs)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %FAbs)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %FAbs2)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %FAbs3)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!12}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{i32 1, void (<4 x float>, <4 x float>*)* @main.flat, !4}
-!4 = !{!5, !7, !10}
-!5 = !{i32 0, !6, !6}
-!6 = !{}
-!7 = !{i32 0, !8, !9}
-!8 = !{i32 4, !"A", i32 7, i32 9}
-!9 = !{i32 0}
-!10 = !{i32 1, !11, !9}
-!11 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!12 = !{void (<4 x float>, <4 x float>*)* @main.flat, !"", !13, null, null}
-!13 = !{!14, !16, null}
-!14 = !{!15}
-!15 = !{i32 0, !"A", i8 0, i8 0, !9, i8 2, i32 1, i8 4, i32 0, i8 0, null}
-!16 = !{!17}
-!17 = !{i32 0, !"SV_Target", i8 9, i8 16, !9, i8 0, i32 1, i8 4, i32 0, i8 0, null}
diff --git a/tools/clang/test/HLSL/dxil_validation/multiStream2.ll b/tools/clang/test/HLSL/dxil_validation/multiStream2.ll
deleted file mode 100644
index 13d9d1f80..000000000
--- a/tools/clang/test/HLSL/dxil_validation/multiStream2.ll
+++ /dev/null
@@ -1,280 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Multiple GS output streams are used but 'XXX' is not pointlist
-
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%"$Globals" = type { i32 }
-%struct.MyStruct = type { <4 x float>, <2 x float> }
-%struct.MyStruct2 = type { <3 x i32>, [3 x <4 x float>], <3 x i32> }
-%class.PointStream = type { %struct.MyStruct2 }
-%class.TriangleStream = type { %struct.MyStruct }
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
-
-@"\01?g1@@3HA" = global i32 0, align 4
-@"$Globals" = external constant %"$Globals"
-@dx.typevar.0 = external addrspace(1) constant %struct.MyStruct
-@dx.typevar.1 = external addrspace(1) constant %struct.MyStruct2
-@dx.typevar.2 = external addrspace(1) constant %"$Globals"
-@llvm.used = appending global [5 x i8*] [i8* bitcast (%"$Globals"* @"$Globals" to i8*), i8* bitcast (%"$Globals"* @"$Globals" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.MyStruct addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.MyStruct2 addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"$Globals" addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat([1 x <4 x float>]* nocapture readnone, %class.TriangleStream* nocapture readnone, <4 x float>* nocapture readnone, <2 x float>* nocapture readnone, %class.PointStream* nocapture readnone, <3 x i32>* nocapture readnone, [3 x <4 x float>]* nocapture readnone, <3 x i32>* nocapture readnone, %class.TriangleStream* nocapture readnone, <4 x float>* nocapture readnone, <2 x float>* nocapture readnone) #0 {
-entry:
-  %11 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 0, i1 false)
-  %b.1.0 = alloca [3 x float], align 4
-  %b.1.1 = alloca [3 x float], align 4
-  %b.1.2 = alloca [3 x float], align 4
-  %b.1.3 = alloca [3 x float], align 4
-  %12 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 0)
-  %13 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 0)
-  %14 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 0)
-  %15 = getelementptr [3 x float], [3 x float]* %b.1.0, i32 0, i32 0
-  %16 = getelementptr [3 x float], [3 x float]* %b.1.1, i32 0, i32 0
-  %17 = getelementptr [3 x float], [3 x float]* %b.1.2, i32 0, i32 0
-  %18 = getelementptr [3 x float], [3 x float]* %b.1.3, i32 0, i32 0
-  store float 0.000000e+00, float* %15, align 4
-  store float 0.000000e+00, float* %16, align 4
-  store float 0.000000e+00, float* %17, align 4
-  store float 0.000000e+00, float* %18, align 4
-  %19 = getelementptr [3 x float], [3 x float]* %b.1.0, i32 0, i32 1
-  %20 = getelementptr [3 x float], [3 x float]* %b.1.1, i32 0, i32 1
-  %21 = getelementptr [3 x float], [3 x float]* %b.1.2, i32 0, i32 1
-  %22 = getelementptr [3 x float], [3 x float]* %b.1.3, i32 0, i32 1
-  store float 0.000000e+00, float* %19, align 4
-  store float 0.000000e+00, float* %20, align 4
-  store float 0.000000e+00, float* %21, align 4
-  store float 0.000000e+00, float* %22, align 4
-  %23 = getelementptr [3 x float], [3 x float]* %b.1.0, i32 0, i32 2
-  %24 = getelementptr [3 x float], [3 x float]* %b.1.1, i32 0, i32 2
-  %25 = getelementptr [3 x float], [3 x float]* %b.1.2, i32 0, i32 2
-  %26 = getelementptr [3 x float], [3 x float]* %b.1.3, i32 0, i32 2
-  %conv = fptoui float %12 to i32
-  %27 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 %conv)
-  %28 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 %conv)
-  %29 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 %conv)
-  %30 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 %conv)
-  %conv3.i1 = fptoui float %13 to i32
-  %conv3.i2 = fptoui float %14 to i32
-  %conv5.i0 = fptoui float %27 to i32
-  %conv5.i1 = fptoui float %28 to i32
-  %conv5.i2 = fptoui float %29 to i32
-  %mul.i0 = fmul fast float %27, 4.400000e+01
-  %mul.i1 = fmul fast float %28, 4.400000e+01
-  %mul.i2 = fmul fast float %29, 4.400000e+01
-  %mul.i3 = fmul fast float %30, 4.400000e+01
-  store float %mul.i0, float* %23, align 4
-  store float %mul.i1, float* %24, align 4
-  store float %mul.i2, float* %25, align 4
-  store float %mul.i3, float* %26, align 4
-  %31 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 60, %dx.types.Handle %11, i32 0)
-  %32 = extractvalue %dx.types.CBufRet.i32 %31, 0
-  %tobool = icmp eq i32 %32, 0
-  br i1 %tobool, label %if.else, label %if.then
-
-if.then:                                          ; preds = %entry
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %27)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %28)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %29)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %30)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %12)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %13)
-  call void @dx.op.emitStream(i32 97, i8 0)
-  call void @dx.op.cutStream(i32 98, i8 0)
-  br label %if.end
-
-if.else:                                          ; preds = %entry
-  %33 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 0)
-  %conv8 = fptoui float %33 to i32
-  %34 = getelementptr inbounds [3 x float], [3 x float]* %b.1.0, i32 0, i32 0
-  %35 = load float, float* %34, align 4
-  %36 = getelementptr inbounds [3 x float], [3 x float]* %b.1.1, i32 0, i32 0
-  %37 = load float, float* %36, align 4
-  %38 = getelementptr inbounds [3 x float], [3 x float]* %b.1.2, i32 0, i32 0
-  %39 = load float, float* %38, align 4
-  %40 = getelementptr inbounds [3 x float], [3 x float]* %b.1.3, i32 0, i32 0
-  %41 = load float, float* %40, align 4
-  %42 = getelementptr inbounds [3 x float], [3 x float]* %b.1.0, i32 0, i32 1
-  %43 = load float, float* %42, align 4
-  %44 = getelementptr inbounds [3 x float], [3 x float]* %b.1.1, i32 0, i32 1
-  %45 = load float, float* %44, align 4
-  %46 = getelementptr inbounds [3 x float], [3 x float]* %b.1.2, i32 0, i32 1
-  %47 = load float, float* %46, align 4
-  %48 = getelementptr inbounds [3 x float], [3 x float]* %b.1.3, i32 0, i32 1
-  %49 = load float, float* %48, align 4
-  %50 = getelementptr inbounds [3 x float], [3 x float]* %b.1.0, i32 0, i32 2
-  %51 = load float, float* %50, align 4
-  %52 = getelementptr inbounds [3 x float], [3 x float]* %b.1.1, i32 0, i32 2
-  %53 = load float, float* %52, align 4
-  %54 = getelementptr inbounds [3 x float], [3 x float]* %b.1.2, i32 0, i32 2
-  %55 = load float, float* %54, align 4
-  %56 = getelementptr inbounds [3 x float], [3 x float]* %b.1.3, i32 0, i32 2
-  %57 = load float, float* %56, align 4
-  call void @dx.op.storeOutput.i32(i32 5, i32 2, i32 0, i8 0, i32 %conv8)
-  call void @dx.op.storeOutput.i32(i32 5, i32 2, i32 0, i8 1, i32 %conv3.i1)
-  call void @dx.op.storeOutput.i32(i32 5, i32 2, i32 0, i8 2, i32 %conv3.i2)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 0, i8 0, float %35)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 0, i8 1, float %37)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 0, i8 2, float %39)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 0, i8 3, float %41)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 1, i8 0, float %43)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 1, i8 1, float %45)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 1, i8 2, float %47)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 1, i8 3, float %49)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 2, i8 0, float %51)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 2, i8 1, float %53)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 2, i8 2, float %55)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 2, i8 3, float %57)
-  call void @dx.op.storeOutput.i32(i32 5, i32 4, i32 0, i8 0, i32 %conv5.i0)
-  call void @dx.op.storeOutput.i32(i32 5, i32 4, i32 0, i8 1, i32 %conv5.i1)
-  call void @dx.op.storeOutput.i32(i32 5, i32 4, i32 0, i8 2, i32 %conv5.i2)
-  call void @dx.op.emitStream(i32 97, i8 1)
-  call void @dx.op.cutStream(i32 98, i8 1)
-  br label %if.end
-
-if.end:                                           ; preds = %if.else, %if.then
-  %b.0.0.i0 = phi i32 [ %conv, %if.then ], [ %conv8, %if.else ]
-  %58 = getelementptr inbounds [3 x float], [3 x float]* %b.1.0, i32 0, i32 0
-  %59 = load float, float* %58, align 4
-  %60 = getelementptr inbounds [3 x float], [3 x float]* %b.1.1, i32 0, i32 0
-  %61 = load float, float* %60, align 4
-  %62 = getelementptr inbounds [3 x float], [3 x float]* %b.1.2, i32 0, i32 0
-  %63 = load float, float* %62, align 4
-  %64 = getelementptr inbounds [3 x float], [3 x float]* %b.1.3, i32 0, i32 0
-  %65 = load float, float* %64, align 4
-  %66 = getelementptr inbounds [3 x float], [3 x float]* %b.1.0, i32 0, i32 1
-  %67 = load float, float* %66, align 4
-  %68 = getelementptr inbounds [3 x float], [3 x float]* %b.1.1, i32 0, i32 1
-  %69 = load float, float* %68, align 4
-  %70 = getelementptr inbounds [3 x float], [3 x float]* %b.1.2, i32 0, i32 1
-  %71 = load float, float* %70, align 4
-  %72 = getelementptr inbounds [3 x float], [3 x float]* %b.1.3, i32 0, i32 1
-  %73 = load float, float* %72, align 4
-  %74 = getelementptr inbounds [3 x float], [3 x float]* %b.1.0, i32 0, i32 2
-  %75 = load float, float* %74, align 4
-  %76 = getelementptr inbounds [3 x float], [3 x float]* %b.1.1, i32 0, i32 2
-  %77 = load float, float* %76, align 4
-  %78 = getelementptr inbounds [3 x float], [3 x float]* %b.1.2, i32 0, i32 2
-  %79 = load float, float* %78, align 4
-  %80 = getelementptr inbounds [3 x float], [3 x float]* %b.1.3, i32 0, i32 2
-  %81 = load float, float* %80, align 4
-  call void @dx.op.storeOutput.i32(i32 5, i32 2, i32 0, i8 0, i32 %b.0.0.i0)
-  call void @dx.op.storeOutput.i32(i32 5, i32 2, i32 0, i8 1, i32 %conv3.i1)
-  call void @dx.op.storeOutput.i32(i32 5, i32 2, i32 0, i8 2, i32 %conv3.i2)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 0, i8 0, float %59)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 0, i8 1, float %61)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 0, i8 2, float %63)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 0, i8 3, float %65)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 1, i8 0, float %67)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 1, i8 1, float %69)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 1, i8 2, float %71)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 1, i8 3, float %73)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 2, i8 0, float %75)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 2, i8 1, float %77)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 2, i8 2, float %79)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 2, i8 3, float %81)
-  call void @dx.op.storeOutput.i32(i32 5, i32 4, i32 0, i8 0, i32 %conv5.i0)
-  call void @dx.op.storeOutput.i32(i32 5, i32 4, i32 0, i8 1, i32 %conv5.i1)
-  call void @dx.op.storeOutput.i32(i32 5, i32 4, i32 0, i8 2, i32 %conv5.i2)
-  call void @dx.op.emitStream(i32 97, i8 1)
-  call void @dx.op.cutStream(i32 98, i8 1)
-  call void @dx.op.storeOutput.f32(i32 5, i32 5, i32 0, i8 0, float %27)
-  call void @dx.op.storeOutput.f32(i32 5, i32 5, i32 0, i8 1, float %28)
-  call void @dx.op.storeOutput.f32(i32 5, i32 5, i32 0, i8 2, float %29)
-  call void @dx.op.storeOutput.f32(i32 5, i32 5, i32 0, i8 3, float %30)
-  call void @dx.op.storeOutput.f32(i32 5, i32 6, i32 0, i8 0, float %12)
-  call void @dx.op.storeOutput.f32(i32 5, i32 6, i32 0, i8 1, float %13)
-  call void @dx.op.emitStream(i32 97, i8 2)
-  call void @dx.op.cutStream(i32 98, i8 2)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.cutStream(i32, i8) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.emitStream(i32, i8) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!6, !16}
-!dx.entryPoints = !{!39}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"gs", i32 6, i32 0}
-!3 = !{null, null, !4, null}
-!4 = !{!5}
-!5 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 0, i32 1, i32 4, null}
-!6 = !{i32 0, %struct.MyStruct addrspace(1)* @dx.typevar.0, !7, %struct.MyStruct2 addrspace(1)* @dx.typevar.1, !10, %"$Globals" addrspace(1)* @dx.typevar.2, !14}
-!7 = !{i32 24, !8, !9}
-!8 = !{i32 3, i32 0, i32 4, !"SV_Position", i32 6, !"pos", i32 7, i32 9}
-!9 = !{i32 3, i32 16, i32 4, !"AAA", i32 6, !"a", i32 7, i32 9}
-!10 = !{i32 76, !11, !12, !13}
-!11 = !{i32 3, i32 0, i32 4, !"XXX", i32 6, !"X", i32 7, i32 5}
-!12 = !{i32 3, i32 16, i32 4, !"PPP", i32 6, !"p", i32 7, i32 9}
-!13 = !{i32 3, i32 64, i32 4, !"YYY", i32 6, !"Y", i32 7, i32 5}
-!14 = !{i32 0, !15}
-!15 = !{i32 3, i32 0, i32 6, !"g1", i32 7, i32 4}
-!16 = !{i32 1, void ([1 x <4 x float>]*, %class.TriangleStream*, <4 x float>*, <2 x float>*, %class.PointStream*, <3 x i32>*, [3 x <4 x float>]*, <3 x i32>*, %class.TriangleStream*, <4 x float>*, <2 x float>*)* @main.flat, !17}
-!17 = !{!18, !20, !23, !24, !26, !28, !29, !31, !34, !36, !37, !38}
-!18 = !{i32 0, !19, !19}
-!19 = !{}
-!20 = !{i32 0, !21, !22}
-!21 = !{i32 4, !"COORD", i32 7, i32 9}
-!22 = !{i32 0}
-!23 = !{i32 5, !19, !19}
-!24 = !{i32 5, !25, !22}
-!25 = !{i32 4, !"SV_Position", i32 7, i32 9}
-!26 = !{i32 5, !27, !22}
-!27 = !{i32 4, !"AAA", i32 7, i32 9}
-!28 = !{i32 6, !19, !19}
-!29 = !{i32 6, !30, !22}
-!30 = !{i32 4, !"XXX", i32 7, i32 5}
-!31 = !{i32 6, !32, !33}
-!32 = !{i32 4, !"PPP", i32 7, i32 9}
-!33 = !{i32 0, i32 1, i32 2}
-!34 = !{i32 6, !35, !22}
-!35 = !{i32 4, !"YYY", i32 7, i32 5}
-!36 = !{i32 7, !19, !19}
-!37 = !{i32 7, !25, !22}
-!38 = !{i32 7, !27, !22}
-!39 = !{void ([1 x <4 x float>]*, %class.TriangleStream*, <4 x float>*, <2 x float>*, %class.PointStream*, <3 x i32>*, [3 x <4 x float>]*, <3 x i32>*, %class.TriangleStream*, <4 x float>*, <2 x float>*)* @main.flat, !"", !40, !3, !53}
-!40 = !{!41, !43, null}
-!41 = !{!42}
-!42 = !{i32 0, !"COORD", i8 9, i8 0, !22, i8 2, i32 1, i8 4, i32 0, i8 0, null}
-!43 = !{!44, !45, !46, !48, !49, !50, !52}
-!44 = !{i32 0, !"SV_Position", i8 9, i8 3, !22, i8 4, i32 1, i8 4, i32 0, i8 0, null}
-!45 = !{i32 1, !"AAA", i8 9, i8 0, !22, i8 2, i32 1, i8 2, i32 1, i8 0, null}
-!46 = !{i32 2, !"XXX", i8 5, i8 0, !22, i8 1, i32 1, i8 3, i32 0, i8 0, !47}
-!47 = !{i32 0, i32 1}
-!48 = !{i32 3, !"PPP", i8 9, i8 0, !33, i8 2, i32 3, i8 4, i32 1, i8 0, !47}
-!49 = !{i32 4, !"YYY", i8 5, i8 0, !22, i8 1, i32 1, i8 3, i32 4, i8 0, !47}
-!50 = !{i32 5, !"SV_Position", i8 9, i8 3, !22, i8 4, i32 1, i8 4, i32 0, i8 0, !51}
-!51 = !{i32 0, i32 2}
-!52 = !{i32 6, !"AAA", i8 9, i8 0, !22, i8 2, i32 1, i8 2, i32 1, i8 0, !51}
-!53 = !{i32 1, !54}
-!54 = !{i32 1, i32 12, i32 7, i32 4, i32 1}
diff --git a/tools/clang/test/HLSL/dxil_validation/phiTGSM.ll b/tools/clang/test/HLSL/dxil_validation/phiTGSM.ll
deleted file mode 100644
index 132502dbd..000000000
--- a/tools/clang/test/HLSL/dxil_validation/phiTGSM.ll
+++ /dev/null
@@ -1,117 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-; CHECK: TGSM pointers must originate from an unambiguous TGSM global variable
-
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no %s
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no %s
-;
-; Pipeline Runtime Information:
-;
-;
-;
-; Buffer Definitions:
-;
-; cbuffer $Globals
-; {
-;
-;   struct $Globals
-;   {
-;
-;       float t;                                      ; Offset:    0
-;
-;   } $Globals                                        ; Offset:    0 Size:     4
-;
-; }
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-; $Globals                          cbuffer      NA          NA     CB0            cb0     1
-;
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%"$Globals" = type { float }
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.f32 = type { float, float, float, float }
-
-@"\01?g_Data@@3PAIA" = addrspace(3) global [32 x i32] zeroinitializer, align 4
-@"\01?g_Data2@@3PAIA" = addrspace(3) global [32 x i32] zeroinitializer, align 4
-@"\01?t@@3MA" = global float 0.000000e+00, align 4
-@dx.typevar.0 = external addrspace(1) constant %"$Globals"
-@llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%"$Globals" addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: alwaysinline nounwind
-define void @main(i32 %idx) #0 {
-entry:
-  %0 = call i32 @dx.op.threadId.i32(i32 93, i32 0)  ; ThreadId(component)
-  %1 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %2 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %1, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %3 = extractvalue %dx.types.CBufRet.f32 %2, 0
-  %cmp = fcmp fast ogt float %3, 1.000000e+00
-  br i1 %cmp, label %if.then, label %if.else
-
-if.then:                                          ; preds = %entry
-  %arrayidx = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @"\01?g_Data@@3PAIA", i32 0, i32 %0
-  br label %if.end
-
-if.else:                                          ; preds = %entry
-  %arrayidx2 = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @"\01?g_Data2@@3PAIA", i32 0, i32 %0
-  br label %if.end
-
-if.end:                                           ; preds = %if.else, %if.then
-  %arrayPhi = phi i32 addrspace(3)* [ %arrayidx, %if.then ], [ %arrayidx2, %if.else ]
-  %4 = atomicrmw add i32 addrspace(3)* %arrayPhi, i32 1 seq_cst
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadId.i32(i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #1
-
-attributes #0 = { alwaysinline nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!6, !9}
-!dx.entryPoints = !{!16}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 6}
-!2 = !{!"cs", i32 6, i32 0}
-!3 = !{null, null, !4, null}
-!4 = !{!5}
-!5 = !{i32 0, %"$Globals"* undef, !"$Globals", i32 0, i32 0, i32 1, i32 4, null}
-!6 = !{i32 0, %"$Globals" addrspace(1)* @dx.typevar.0, !7}
-!7 = !{i32 0, !8}
-!8 = !{i32 3, i32 0, i32 6, !"t", i32 7, i32 9}
-!9 = !{i32 1, void (i32)* @main, !10}
-!10 = !{!11, !13}
-!11 = !{i32 1, !12, !12}
-!12 = !{}
-!13 = !{i32 0, !14, !15}
-!14 = !{i32 4, !"SV_DispatchThreadId", i32 7, i32 5}
-!15 = !{i32 0}
-!16 = !{void (i32)* @main, !"", null, !3, !17}
-!17 = !{i32 4, !18}
-!18 = !{i32 64, i32 1, i32 1}
-
diff --git a/tools/clang/test/HLSL/dxil_validation/reducible.ll b/tools/clang/test/HLSL/dxil_validation/reducible.ll
deleted file mode 100644
index 7bf1dd844..000000000
--- a/tools/clang/test/HLSL/dxil_validation/reducible.ll
+++ /dev/null
@@ -1,176 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-; CHECK: Execution flow must be reducible
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%struct.Interpolants2 = type { <4 x float>, <4 x float>, <4 x float> }
-%struct.Inh = type { %struct.Interpolants, float }
-%struct.Interpolants = type { <4 x float>, <4 x float> }
-%"$Globals" = type { %struct.Interpolants2, %struct.Inh, i32, <4 x i32> }
-%struct.Vertex = type { <4 x float>, <4 x float> }
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
-%dx.types.CBufRet.f32 = type { float, float, float, float }
-
-@"\01?c2@@3UInterpolants2@@A" = global %struct.Interpolants2 zeroinitializer, align 4
-@"\01?c@@3UInh@@A" = global %struct.Inh zeroinitializer, align 4
-@"\01?i@@3HA" = global i32 0, align 4
-@"\01?i4@@3V?$vector@I$03@@A" = global <4 x i32> zeroinitializer, align 4
-@"$Globals" = external constant %"$Globals"
-@dx.typevar.0 = external addrspace(1) constant %struct.Interpolants2
-@dx.typevar.1 = external addrspace(1) constant %struct.Inh
-@dx.typevar.2 = external addrspace(1) constant %struct.Interpolants
-@dx.typevar.3 = external addrspace(1) constant %struct.Vertex
-@dx.typevar.4 = external addrspace(1) constant %"$Globals"
-@llvm.used = appending global [7 x i8*] [i8* bitcast (%"$Globals"* @"$Globals" to i8*), i8* bitcast (%"$Globals"* @"$Globals" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.Interpolants2 addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.Inh addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.Interpolants addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.Vertex addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"$Globals" addrspace(1)* @dx.typevar.4 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat(<4 x float>* nocapture readnone, <4 x float>* nocapture readnone, <4 x float>* nocapture readnone, <4 x float>* nocapture readnone) #0 {
-entry:
-  %4 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %5 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 60, %dx.types.Handle %4, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
-  %6 = extractvalue %dx.types.CBufRet.i32 %5, 1
-  %cmp = icmp sgt i32 %6, 1
-  br i1 %cmp, label %if.then, label %if.else
-
-if.then:                                          ; preds = %entry
-  %7 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 60, %dx.types.Handle %4, i32 6)  ; CBufferLoadLegacy(handle,regIndex)
-  %8 = extractvalue %dx.types.CBufRet.i32 %7, 2
-  %9 = uitofp i32 %8 to float
-  br label %if.then.5
-
-if.else:                                          ; preds = %entry
-  %cmp2 = icmp sgt i32 %6, 0
-  br i1 %cmp2, label %if.then.5, label %if.else.6
-
-if.then.5:                                        ; preds = %if.else
-  %10 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %4, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
-  %11 = extractvalue %dx.types.CBufRet.f32 %10, 0
-  %12 = extractvalue %dx.types.CBufRet.f32 %10, 1
-  %13 = extractvalue %dx.types.CBufRet.f32 %10, 2
-  %14 = extractvalue %dx.types.CBufRet.f32 %10, 3
-  %15 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %4, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
-  %16 = extractvalue %dx.types.CBufRet.f32 %15, 0
-  %17 = extractvalue %dx.types.CBufRet.f32 %15, 1
-  %18 = extractvalue %dx.types.CBufRet.f32 %15, 2
-  %19 = extractvalue %dx.types.CBufRet.f32 %15, 3
-  %cmp12 = icmp sgt i32 %6, 1
-  br i1 %cmp2, label %if.then, label %if.else.6  
-
-if.else.6:                                        ; preds = %if.else
-  %cmp7 = icmp sgt i32 %6, -1
-  br i1 %cmp7, label %if.then.10, label %if.end.13
-
-if.then.10:                                       ; preds = %if.else.6
-  %20 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %4, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %21 = extractvalue %dx.types.CBufRet.f32 %20, 0
-  %22 = extractvalue %dx.types.CBufRet.f32 %20, 1
-  %23 = extractvalue %dx.types.CBufRet.f32 %20, 2
-  %24 = extractvalue %dx.types.CBufRet.f32 %20, 3
-  %25 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %4, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
-  %26 = extractvalue %dx.types.CBufRet.f32 %25, 0
-  %27 = extractvalue %dx.types.CBufRet.f32 %25, 1
-  %28 = extractvalue %dx.types.CBufRet.f32 %25, 2
-  %29 = extractvalue %dx.types.CBufRet.f32 %25, 3
-  br label %return
-
-if.end.13:                                        ; preds = %if.else.6
-  %30 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %31 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %32 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %33 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %34 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %35 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %36 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 2, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %37 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 3, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  br label %return
-
-return:                                           ; preds = %if.end.13, %if.then.10, %if.then.5, %if.then
-  %retval.1.0.i0 = phi float [ %9, %if.then ], [ %16, %if.then.5 ], [ %26, %if.then.10 ], [ %34, %if.end.13 ]
-  %retval.1.0.i1 = phi float [ %9, %if.then ], [ %17, %if.then.5 ], [ %27, %if.then.10 ], [ %35, %if.end.13 ]
-  %retval.1.0.i2 = phi float [ %9, %if.then ], [ %18, %if.then.5 ], [ %28, %if.then.10 ], [ %36, %if.end.13 ]
-  %retval.1.0.i3 = phi float [ %9, %if.then ], [ %19, %if.then.5 ], [ %29, %if.then.10 ], [ %37, %if.end.13 ]
-  %retval.0.0.i0 = phi float [ %9, %if.then ], [ %11, %if.then.5 ], [ %21, %if.then.10 ], [ %30, %if.end.13 ]
-  %retval.0.0.i1 = phi float [ %9, %if.then ], [ %12, %if.then.5 ], [ %22, %if.then.10 ], [ %31, %if.end.13 ]
-  %retval.0.0.i2 = phi float [ %9, %if.then ], [ %13, %if.then.5 ], [ %23, %if.then.10 ], [ %32, %if.end.13 ]
-  %retval.0.0.i3 = phi float [ %9, %if.then ], [ %14, %if.then.5 ], [ %24, %if.then.10 ], [ %33, %if.end.13 ]
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %retval.0.0.i0)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %retval.0.0.i1)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %retval.0.0.i2)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %retval.0.0.i3)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %retval.1.0.i0)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %retval.1.0.i1)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 2, float %retval.1.0.i2)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 3, float %retval.1.0.i3)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!6, !22}
-!dx.entryPoints = !{!34}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"vs", i32 6, i32 0}
-!3 = !{null, null, !4, null}
-!4 = !{!5}
-!5 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 0, i32 1, i32 112, null}
-!6 = !{i32 0, %struct.Interpolants2 addrspace(1)* @dx.typevar.0, !7, %struct.Inh addrspace(1)* @dx.typevar.1, !11, %struct.Interpolants addrspace(1)* @dx.typevar.2, !14, %struct.Vertex addrspace(1)* @dx.typevar.3, !15, %"$Globals" addrspace(1)* @dx.typevar.4, !17}
-!7 = !{i32 48, !8, !9, !10}
-!8 = !{i32 3, i32 0, i32 4, !"SV_POSITION0", i32 6, !"position", i32 7, i32 9}
-!9 = !{i32 3, i32 16, i32 4, !"COLOR0", i32 6, !"color", i32 7, i32 9}
-!10 = !{i32 3, i32 32, i32 4, !"COLOR2", i32 6, !"color2", i32 7, i32 9}
-!11 = !{i32 36, !12, !13}
-!12 = !{i32 3, i32 0, i32 6, !"Interpolants"}
-!13 = !{i32 3, i32 32, i32 6, !"a", i32 7, i32 9}
-!14 = !{i32 32, !8, !9}
-!15 = !{i32 32, !16, !9}
-!16 = !{i32 3, i32 0, i32 4, !"POSITION0", i32 6, !"position", i32 7, i32 9}
-!17 = !{i32 0, !18, !19, !20, !21}
-!18 = !{i32 3, i32 0, i32 6, !"c2"}
-!19 = !{i32 3, i32 48, i32 6, !"c"}
-!20 = !{i32 3, i32 84, i32 6, !"i", i32 7, i32 4}
-!21 = !{i32 3, i32 96, i32 6, !"i4", i32 7, i32 5}
-!22 = !{i32 1, void (<4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*)* @main.flat, !23}
-!23 = !{!24, !26, !29, !31, !33}
-!24 = !{i32 0, !25, !25}
-!25 = !{}
-!26 = !{i32 0, !27, !28}
-!27 = !{i32 4, !"POSITION0", i32 7, i32 9}
-!28 = !{i32 0}
-!29 = !{i32 0, !30, !28}
-!30 = !{i32 4, !"COLOR0", i32 7, i32 9}
-!31 = !{i32 1, !32, !28}
-!32 = !{i32 4, !"SV_POSITION0", i32 7, i32 9}
-!33 = !{i32 1, !30, !28}
-!34 = !{void (<4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*)* @main.flat, !"", !35, !3, null}
-!35 = !{!36, !39, null}
-!36 = !{!37, !38}
-!37 = !{i32 0, !"POSITION", i8 9, i8 3, !28, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-!38 = !{i32 1, !"COLOR", i8 9, i8 0, !28, i8 0, i32 1, i8 4, i32 1, i8 0, null}
-!39 = !{!40, !41}
-!40 = !{i32 0, !"SV_Position", i8 9, i8 3, !28, i8 4, i32 1, i8 4, i32 0, i8 0, null}
-!41 = !{i32 1, !"COLOR", i8 9, i8 0, !28, i8 2, i32 1, i8 4, i32 1, i8 0, null}
-
diff --git a/tools/clang/test/HLSL/dxil_validation/sampleBias.ll b/tools/clang/test/HLSL/dxil_validation/sampleBias.ll
deleted file mode 100644
index 9d2e243b3..000000000
--- a/tools/clang/test/HLSL/dxil_validation/sampleBias.ll
+++ /dev/null
@@ -1,153 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: bias amount for sample_b must be in the range [-16.000000,15.990000], but 18.000000 was specified as an immediate
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%class.Texture2D = type { <4 x float>, %"class.Texture2D<vector<float, 4> >::mips_type" }
-%"class.Texture2D<vector<float, 4> >::mips_type" = type { i32 }
-%"$Globals" = type { float }
-%dx.types.Handle = type { i8* }
-%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
-%dx.types.CBufRet.f32 = type { float, float, float, float }
-%struct.SamplerState = type { i32 }
-
-@"\01?bias@@3MA" = global float 0.000000e+00, align 4
-@dx.typevar.0 = external addrspace(1) constant %class.Texture2D
-@dx.typevar.1 = external addrspace(1) constant %"class.Texture2D<vector<float, 4> >::mips_type"
-@dx.typevar.2 = external addrspace(1) constant %"$Globals"
-@llvm.used = appending global [3 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%class.Texture2D addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"$Globals" addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat(<2 x float>, <4 x float>* nocapture readnone) #0 {
-  %text1_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 0, i32 3, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %samp1_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 0, i32 5, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %3 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %4 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %5 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %6 = call %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32 62, %dx.types.Handle %text1_texture_2d, %dx.types.Handle %samp1_sampler, float %3, float %4, float undef, float undef, i32 undef, i32 undef, i32 undef, float 1.8000000e01, float undef)  ; SampleBias(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,bias,clamp)
-  %7 = extractvalue %dx.types.ResRet.f32 %6, 0
-  %8 = extractvalue %dx.types.ResRet.f32 %6, 1
-  %9 = extractvalue %dx.types.ResRet.f32 %6, 2
-  %10 = extractvalue %dx.types.ResRet.f32 %6, 3
-  %11 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %5, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %12 = extractvalue %dx.types.CBufRet.f32 %11, 0
-  %13 = call %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32 62, %dx.types.Handle %text1_texture_2d, %dx.types.Handle %samp1_sampler, float %3, float %4, float undef, float undef, i32 -5, i32 7, i32 undef, float %12, float undef)  ; SampleBias(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,bias,clamp)
-  %14 = extractvalue %dx.types.ResRet.f32 %13, 0
-  %15 = extractvalue %dx.types.ResRet.f32 %13, 1
-  %16 = extractvalue %dx.types.ResRet.f32 %13, 2
-  %17 = extractvalue %dx.types.ResRet.f32 %13, 3
-  %.i0 = fadd fast float %14, %7
-  %.i1 = fadd fast float %15, %8
-  %.i2 = fadd fast float %16, %9
-  %.i3 = fadd fast float %17, %10
-  %18 = call %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32 62, %dx.types.Handle %text1_texture_2d, %dx.types.Handle %samp1_sampler, float %3, float %4, float undef, float undef, i32 -4, i32 1, i32 undef, float %12, float 1.8000000e01)  ; SampleBias(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,bias,clamp)
-  %19 = extractvalue %dx.types.ResRet.f32 %18, 0
-  %20 = extractvalue %dx.types.ResRet.f32 %18, 1
-  %21 = extractvalue %dx.types.ResRet.f32 %18, 2
-  %22 = extractvalue %dx.types.ResRet.f32 %18, 3
-  %.i01 = fadd fast float %.i0, %19
-  %.i12 = fadd fast float %.i1, %20
-  %.i23 = fadd fast float %.i2, %21
-  %.i34 = fadd fast float %.i3, %22
-  %23 = call %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32 62, %dx.types.Handle %text1_texture_2d, %dx.types.Handle %samp1_sampler, float %3, float %4, float undef, float undef, i32 -3, i32 2, i32 undef, float %12, float 0.000000e+00)  ; SampleBias(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,bias,clamp)
-  %24 = extractvalue %dx.types.ResRet.f32 %23, 0
-  %25 = extractvalue %dx.types.ResRet.f32 %23, 1
-  %26 = extractvalue %dx.types.ResRet.f32 %23, 2
-  %27 = extractvalue %dx.types.ResRet.f32 %23, 3
-  %28 = extractvalue %dx.types.ResRet.f32 %23, 4
-  %.i05 = fadd fast float %.i01, %24
-  %.i16 = fadd fast float %.i12, %25
-  %.i27 = fadd fast float %.i23, %26
-  %.i38 = fadd fast float %.i34, %27
-  %29 = uitofp i32 %28 to float
-  %.i09 = fadd fast float %.i05, %29
-  %.i110 = fadd fast float %.i16, %29
-  %.i211 = fadd fast float %.i27, %29
-  %.i312 = fadd fast float %.i38, %29
-  %30 = call %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32 62, %dx.types.Handle %text1_texture_2d, %dx.types.Handle %samp1_sampler, float %3, float %4, float undef, float undef, i32 -3, i32 2, i32 undef, float %12, float %3)  ; SampleBias(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,bias,clamp)
-  %31 = extractvalue %dx.types.ResRet.f32 %30, 0
-  %32 = extractvalue %dx.types.ResRet.f32 %30, 1
-  %33 = extractvalue %dx.types.ResRet.f32 %30, 2
-  %34 = extractvalue %dx.types.ResRet.f32 %30, 3
-  %35 = extractvalue %dx.types.ResRet.f32 %30, 4
-  %.i013 = fadd fast float %.i09, %31
-  %.i114 = fadd fast float %.i110, %32
-  %.i215 = fadd fast float %.i211, %33
-  %.i316 = fadd fast float %.i312, %34
-  %36 = uitofp i32 %35 to float
-  %.i017 = fadd fast float %.i013, %36
-  %.i118 = fadd fast float %.i114, %36
-  %.i219 = fadd fast float %.i215, %36
-  %.i320 = fadd fast float %.i316, %36
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %.i017)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %.i118)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %.i219)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %.i320)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #2
-
-; Function Attrs: nounwind readonly
-declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #2
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, float, i32, i32, i32, float, float) #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
-
-!llvm.ident = !{!0}
-!dx.valver = !{!1}
-!dx.version = !{!2}
-!dx.shaderModel = !{!3}
-!dx.resources = !{!4}
-!dx.typeAnnotations = !{!12, !20}
-!dx.entryPoints = !{!29}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 1, i32 0}
-!2 = !{i32 0, i32 7}
-!3 = !{!"ps", i32 6, i32 0}
-!4 = !{!5, null, !8, !10}
-!5 = !{!6}
-!6 = !{i32 0, %class.Texture2D* undef, !"text1", i32 0, i32 3, i32 1, i32 2, i32 0, !7}
-!7 = !{i32 0, i32 9}
-!8 = !{!9}
-!9 = !{i32 0, %"$Globals"* undef, !"$Globals", i32 0, i32 0, i32 1, i32 4, null}
-!10 = !{!11}
-!11 = !{i32 0, %struct.SamplerState* undef, !"samp1", i32 0, i32 5, i32 1, i32 0, null}
-!12 = !{i32 0, %class.Texture2D addrspace(1)* @dx.typevar.0, !13, %"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1, !16, %"$Globals" addrspace(1)* @dx.typevar.2, !18}
-!13 = !{i32 20, !14, !15}
-!14 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!15 = !{i32 3, i32 16, i32 6, !"mips"}
-!16 = !{i32 4, !17}
-!17 = !{i32 3, i32 0, i32 6, !"handle", i32 7, i32 5}
-!18 = !{i32 0, !19}
-!19 = !{i32 3, i32 0, i32 6, !"bias", i32 7, i32 9}
-!20 = !{i32 1, void (<2 x float>, <4 x float>*)* @main.flat, !21}
-!21 = !{!22, !24, !27}
-!22 = !{i32 0, !23, !23}
-!23 = !{}
-!24 = !{i32 0, !25, !26}
-!25 = !{i32 4, !"A", i32 7, i32 9}
-!26 = !{i32 0}
-!27 = !{i32 1, !28, !26}
-!28 = !{i32 4, !"SV_Target", i32 7, i32 9}
-!29 = !{void (<2 x float>, <4 x float>*)* @main.flat, !"main", !30, !4, null}
-!30 = !{!31, !33, null}
-!31 = !{!32}
-!32 = !{i32 0, !"A", i8 9, i8 0, !26, i8 2, i32 1, i8 2, i32 0, i8 0, null}
-!33 = !{!34}
-!34 = !{i32 0, !"SV_Target", i8 9, i8 16, !26, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-
diff --git a/tools/clang/test/HLSL/dxil_validation/samplerKind.ll b/tools/clang/test/HLSL/dxil_validation/samplerKind.ll
deleted file mode 100644
index 8256c76fc..000000000
--- a/tools/clang/test/HLSL/dxil_validation/samplerKind.ll
+++ /dev/null
@@ -1,219 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Invalid sampler mode on sampler 'g_samLinear'
-; CHECK: Invalid sampler mode on sampler 'g_samLinearC'
-; CHECK: Type 'st' is a struct type but is used as a parameter in function 'main.flat'
-; CHECK: sample_c_*/gather_c instructions require sampler declared in comparison mode
-; CHECK: sample, lod and gather should on srv resource
-; CHECK: lod instruction requires sampler declared in default mode
-; CHECK: sample, lod and gather should on srv resource
-; CHECK: sample/_l/_d/_cl_s/gather instruction requires sampler declared in default mode
-; CHECK: sample, lod and gather should on srv resource
-; CHECK: sample/_l/_d/_cl_s/gather instruction requires sampler declared in default mode
-; CHECK: sample, lod and gather should on srv resource
-; CHECK: sample_c_*/gather_c instructions require sampler declared in comparison mode
-; CHECK: sample, lod and gather should on srv resource
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%class.Texture2D = type { <4 x float>, %"class.Texture2D<vector<float, 4> >::mips_type" }
-%"class.Texture2D<vector<float, 4> >::mips_type" = type { i32 }
-%class.RWTexture2D = type { <4 x float> }
-%struct.PS_INPUT = type { <3 x float>, <2 x float> }
-%"$Globals" = type { float }
-%cbPerFrame = type { <3 x float>, float }
-%dx.types.Handle = type { i8* }
-%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
-%dx.types.CBufRet.f32 = type { float, float, float, float }
-%struct.SamplerState = type { i32 }
-%struct.SamplerComparisonState = type { i32 }
-
-@"\01?cmpVal@@3MA" = global float 0.000000e+00, align 4
-@dx.typevar.0 = external addrspace(1) constant %class.Texture2D
-@dx.typevar.1 = external addrspace(1) constant %"class.Texture2D<vector<float, 4> >::mips_type"
-@dx.typevar.2 = external addrspace(1) constant %class.RWTexture2D
-@dx.typevar.3 = external addrspace(1) constant %struct.PS_INPUT
-@dx.typevar.4 = external addrspace(1) constant %"$Globals"
-@dx.typevar.5 = external addrspace(1) constant %cbPerFrame
-@llvm.used = appending global [6 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%class.Texture2D addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.RWTexture2D addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.PS_INPUT addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"$Globals" addrspace(1)* @dx.typevar.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%cbPerFrame addrspace(1)* @dx.typevar.5 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat(<3 x float>* nocapture readnone, <2 x float>* nocapture readnone, <4 x float>* nocapture readnone, %struct.PS_INPUT * %st) #0 {
-entry:
-  %uav1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 3, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %g_txDiffuse_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 3, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %g_samLinearC_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 1, i32 1, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %g_samLinear_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %3 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 1, i32 1, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %4 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %5 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %6 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %7 = call %dx.types.ResRet.f32 @dx.op.sample.f32(i32 61, %dx.types.Handle %g_txDiffuse_texture_2d, %dx.types.Handle %g_samLinear_sampler, float %5, float %6, float undef, float undef, i32 undef, i32 undef, i32 undef, float undef)  ; Sample(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,clamp)
-  %8 = extractvalue %dx.types.ResRet.f32 %7, 0
-  %9 = extractvalue %dx.types.ResRet.f32 %7, 1
-  %10 = extractvalue %dx.types.ResRet.f32 %7, 2
-  %11 = extractvalue %dx.types.ResRet.f32 %7, 3
-  %12 = call float @dx.op.calculateLOD.f32(i32 84, %dx.types.Handle %g_txDiffuse_texture_2d, %dx.types.Handle %g_samLinear_sampler, float %5, float %6, float undef, i1 true)  ; CalculateLOD(handle,sampler,coord0,coord1,coord2,clamped)
-  %add.i0 = fadd fast float %8, %12
-  %add.i1 = fadd fast float %9, %12
-  %add.i2 = fadd fast float %10, %12
-  %add.i3 = fadd fast float %11, %12
-  %13 = call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 74, %dx.types.Handle %g_txDiffuse_texture_2d, %dx.types.Handle %g_samLinear_sampler, float %5, float %6, float undef, float undef, i32 undef, i32 undef, i32 0)  ; TextureGather(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,channel)
-  %14 = extractvalue %dx.types.ResRet.f32 %13, 0
-  %15 = extractvalue %dx.types.ResRet.f32 %13, 1
-  %16 = extractvalue %dx.types.ResRet.f32 %13, 2
-  %17 = extractvalue %dx.types.ResRet.f32 %13, 3
-  %add5.i0 = fadd fast float %add.i0, %14
-  %add5.i1 = fadd fast float %add.i1, %15
-  %add5.i2 = fadd fast float %add.i2, %16
-  %add5.i3 = fadd fast float %add.i3, %17
-  %18 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %4, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %19 = extractvalue %dx.types.CBufRet.f32 %18, 0
-  %20 = call %dx.types.ResRet.f32 @dx.op.sampleCmp.f32(i32 65, %dx.types.Handle %g_txDiffuse_texture_2d, %dx.types.Handle %g_samLinearC_sampler, float %5, float %6, float undef, float undef, i32 undef, i32 undef, i32 undef, float %19, float undef)  ; SampleCmp(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,compareValue,clamp)
-  %21 = extractvalue %dx.types.ResRet.f32 %20, 0
-  %add10.i0 = fadd fast float %add5.i0, %21
-  %add10.i1 = fadd fast float %add5.i1, %21
-  %add10.i2 = fadd fast float %add5.i2, %21
-  %add10.i3 = fadd fast float %add5.i3, %21
-  %22 = call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 75, %dx.types.Handle %g_txDiffuse_texture_2d, %dx.types.Handle %g_samLinearC_sampler, float %5, float %6, float undef, float undef, i32 undef, i32 undef, i32 0, float %19)  ; TextureGatherCmp(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,channel,compareVale)
-  %23 = extractvalue %dx.types.ResRet.f32 %22, 0
-  %24 = extractvalue %dx.types.ResRet.f32 %22, 1
-  %25 = extractvalue %dx.types.ResRet.f32 %22, 2
-  %26 = extractvalue %dx.types.ResRet.f32 %22, 3
-  %add13.i0 = fadd fast float %add10.i0, %23
-  %add13.i1 = fadd fast float %add10.i1, %24
-  %add13.i2 = fadd fast float %add10.i2, %25
-  %add13.i3 = fadd fast float %add10.i3, %26
-  %27 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %28 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %29 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %30 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %3, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %31 = extractvalue %dx.types.CBufRet.f32 %30, 0
-  %32 = extractvalue %dx.types.CBufRet.f32 %30, 1
-  %33 = extractvalue %dx.types.CBufRet.f32 %30, 2
-  %34 = call float @dx.op.dot3.f32(i32 56, float %31, float %32, float %33, float %27, float %28, float %29)  ; Dot3(ax,ay,az,bx,by,bz)
-  %Saturate = call float @dx.op.unary.f32(i32 7, float %34)  ; Saturate(value)
-  %35 = extractvalue %dx.types.CBufRet.f32 %30, 3
-  %FMax = call float @dx.op.binary.f32(i32 34, float %Saturate, float %35)  ; FMax(a,b)
-  %mul.i0 = fmul fast float %FMax, %add13.i0
-  %mul.i1 = fmul fast float %FMax, %add13.i1
-  %mul.i2 = fmul fast float %FMax, %add13.i2
-  %mul.i3 = fmul fast float %FMax, %add13.i3
-  %TextureLoad = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 67, %dx.types.Handle %uav1_UAV_2d, i32 undef, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef)  ; TextureLoad(srv,mipLevelOrSampleCount,coord0,coord1,coord2,offset0,offset1,offset2)
-  %36 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 0
-  %37 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 1
-  %38 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 2
-  %39 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 3
-  %mul20.i0 = fmul fast float %mul.i0, %36
-  %mul20.i1 = fmul fast float %mul.i1, %37
-  %mul20.i2 = fmul fast float %mul.i2, %38
-  %mul20.i3 = fmul fast float %mul.i3, %39
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %mul20.i0)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %mul20.i1)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %mul20.i2)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %mul20.i3)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #2
-
-; Function Attrs: nounwind readonly
-declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #2
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, float, i32, i32, i32, float) #2
-
-; Function Attrs: nounwind readonly
-declare float @dx.op.calculateLOD.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, i1) #2
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, float, i32, i32, i32) #2
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.sample.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, float, i32, i32, i32, float) #2
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.sampleCmp.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, float, i32, i32, i32, float, float) #2
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32, %dx.types.Handle, i32, i32, i32, i32, i32, i32, i32) #2
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.dot3.f32(i32, float, float, float, float, float, float) #1
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #1
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.binary.f32(i32, float, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
-
-!llvm.ident = !{!0}
-!dx.valver = !{!1}
-!dx.version = !{!2}
-!dx.shaderModel = !{!3}
-!dx.resources = !{!4}
-!dx.typeAnnotations = !{!16, !31}
-!dx.entryPoints = !{!42}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 1, i32 0}
-!2 = !{i32 0, i32 7}
-!3 = !{!"ps", i32 6, i32 0}
-!4 = !{!5, !8, !10, !13}
-!5 = !{!6}
-!6 = !{i32 0, %class.Texture2D* undef, !"g_txDiffuse", i32 0, i32 0, i32 1, i32 2, i32 0, !7}
-!7 = !{i32 0, i32 9}
-!8 = !{!9}
-!9 = !{i32 0, %class.RWTexture2D* undef, !"uav1", i32 0, i32 3, i32 1, i32 2, i1 false, i1 false, i1 false, !7}
-!10 = !{!11, !12}
-!11 = !{i32 0, %"$Globals"* undef, !"$Globals", i32 0, i32 0, i32 1, i32 4, null}
-!12 = !{i32 1, %cbPerFrame* undef, !"cbPerFrame", i32 0, i32 1, i32 1, i32 16, null}
-!13 = !{!14, !15}
-!14 = !{i32 0, %struct.SamplerState* undef, !"g_samLinear", i32 0, i32 0, i32 1, i32 3, null}
-!15 = !{i32 1, %struct.SamplerComparisonState* undef, !"g_samLinearC", i32 0, i32 1, i32 1, i32 3, null}
-!16 = !{i32 0, %class.Texture2D addrspace(1)* @dx.typevar.0, !17, %"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1, !20, %class.RWTexture2D addrspace(1)* @dx.typevar.2, !22, %struct.PS_INPUT addrspace(1)* @dx.typevar.3, !23, %"$Globals" addrspace(1)* @dx.typevar.4, !26, %cbPerFrame addrspace(1)* @dx.typevar.5, !28}
-!17 = !{i32 20, !18, !19}
-!18 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!19 = !{i32 3, i32 16, i32 6, !"mips"}
-!20 = !{i32 4, !21}
-!21 = !{i32 3, i32 0, i32 6, !"handle", i32 7, i32 5}
-!22 = !{i32 16, !18}
-!23 = !{i32 24, !24, !25}
-!24 = !{i32 3, i32 0, i32 4, !"NORMAL", i32 5, i32 6, i32 6, !"vNormal", i32 7, i32 9}
-!25 = !{i32 3, i32 16, i32 4, !"TEXCOORD0", i32 5, i32 4, i32 6, !"vTexcoord", i32 7, i32 9}
-!26 = !{i32 0, !27}
-!27 = !{i32 3, i32 0, i32 6, !"cmpVal", i32 7, i32 9}
-!28 = !{i32 0, !29, !30}
-!29 = !{i32 3, i32 0, i32 6, !"g_vLightDir", i32 7, i32 9}
-!30 = !{i32 3, i32 12, i32 6, !"g_fAmbient", i32 7, i32 9}
-!31 = !{i32 1, void (<3 x float>*, <2 x float>*, <4 x float>*, %struct.PS_INPUT * )* @main.flat, !32}
-!32 = !{!33, !35, !38, !40, !40}
-!33 = !{i32 0, !34, !34}
-!34 = !{}
-!35 = !{i32 0, !36, !37}
-!36 = !{i32 4, !"NORMAL", i32 5, i32 6, i32 7, i32 9}
-!37 = !{i32 0}
-!38 = !{i32 0, !39, !37}
-!39 = !{i32 4, !"TEXCOORD0", i32 5, i32 4, i32 7, i32 9}
-!40 = !{i32 1, !41, !37}
-!41 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!42 = !{void (<3 x float>*, <2 x float>*, <4 x float>*, %struct.PS_INPUT * )* @main.flat, !"main", !43, !4, !49}
-!43 = !{!44, !47, null}
-!44 = !{!45, !46}
-!45 = !{i32 0, !"NORMAL", i8 9, i8 0, !37, i8 6, i32 1, i8 3, i32 0, i8 0, null}
-!46 = !{i32 1, !"TEXCOORD", i8 9, i8 0, !37, i8 4, i32 1, i8 2, i32 1, i8 0, null}
-!47 = !{!48}
-!48 = !{i32 0, !"SV_Target", i8 9, i8 16, !37, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-!49 = !{i32 0, i64 8192}
diff --git a/tools/clang/test/HLSL/dxil_validation/semaOverlap.ll b/tools/clang/test/HLSL/dxil_validation/semaOverlap.ll
deleted file mode 100644
index decbad1fc..000000000
--- a/tools/clang/test/HLSL/dxil_validation/semaOverlap.ll
+++ /dev/null
@@ -1,68 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Semantic 'A' overlap at 0
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-; Function Attrs: nounwind
-define void @main.flat(<4 x float>, <4 x float>, <4 x float>* nocapture readnone) #0 {
-entry:
-  %3 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)
-  %4 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 undef)
-  %5 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
-  %6 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
-  %7 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef)
-  %8 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 undef)
-  %mul.i0 = fmul fast float %5, %4
-  %mul.i1 = fmul fast float %6, %3
-  %mul.i2 = fmul fast float %7, %3
-  %mul.i3 = fmul fast float %8, %3
-  %FAbs = tail call float @dx.op.unary.f32(i32 6, float %mul.i0)
-  %FAbs1 = tail call float @dx.op.unary.f32(i32 6, float %mul.i1)
-  %FAbs2 = tail call float @dx.op.unary.f32(i32 6, float %mul.i2)
-  %FAbs3 = tail call float @dx.op.unary.f32(i32 6, float %mul.i3)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %FAbs)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %FAbs1)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %FAbs2)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %FAbs3)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!12}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{i32 1, void (<4 x float>, <4 x float>, <4 x float>*)* @main.flat, !4}
-!4 = !{!5, !7, !7, !10}
-!5 = !{i32 0, !6, !6}
-!6 = !{}
-!7 = !{i32 0, !8, !9}
-!8 = !{i32 4, !"A", i32 7, i32 9}
-!9 = !{i32 0}
-!10 = !{i32 1, !11, !9}
-!11 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!12 = !{void (<4 x float>, <4 x float>, <4 x float>*)* @main.flat, !"", !13, null, null}
-!13 = !{!14, !17, null}
-!14 = !{!15, !16}
-!15 = !{i32 0, !"A", i8 9, i8 0, !9, i8 2, i32 1, i8 4, i32 0, i8 0, null}
-!16 = !{i32 1, !"A", i8 9, i8 0, !9, i8 2, i32 1, i8 4, i32 1, i8 0, null}
-!17 = !{!18}
-!18 = !{i32 0, !"SV_Target", i8 9, i8 16, !9, i8 0, i32 1, i8 4, i32 0, i8 0, null}
diff --git a/tools/clang/test/HLSL/dxil_validation/sigOutOfRange.ll b/tools/clang/test/HLSL/dxil_validation/sigOutOfRange.ll
deleted file mode 100644
index 55044600e..000000000
--- a/tools/clang/test/HLSL/dxil_validation/sigOutOfRange.ll
+++ /dev/null
@@ -1,69 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: signature element A at location (8000,0) size (1,4) is out of range.
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-; Function Attrs: nounwind
-define void @main.flat(<4 x float>, <4 x float>, <4 x float>* nocapture readnone) #0 {
-entry:
-  %3 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)
-  %4 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 undef)
-  %5 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
-  %6 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
-  %7 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef)
-  %8 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 undef)
-  %mul.i0 = fmul fast float %5, %4
-  %mul.i1 = fmul fast float %6, %3
-  %mul.i2 = fmul fast float %7, %3
-  %mul.i3 = fmul fast float %8, %3
-  %FAbs = tail call float @dx.op.unary.f32(i32 6, float %mul.i0)
-  %FAbs1 = tail call float @dx.op.unary.f32(i32 6, float %mul.i1)
-  %FAbs2 = tail call float @dx.op.unary.f32(i32 6, float %mul.i2)
-  %FAbs3 = tail call float @dx.op.unary.f32(i32 6, float %mul.i3)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %FAbs)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %FAbs1)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %FAbs2)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %FAbs3)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!12}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{i32 1, void (<4 x float>, <4 x float>, <4 x float>*)* @main.flat, !4}
-!4 = !{!5, !7, !7, !10}
-!5 = !{i32 0, !6, !6}
-!6 = !{}
-!7 = !{i32 0, !8, !9}
-!8 = !{i32 4, !"A", i32 7, i32 9}
-!9 = !{i32 0}
-!10 = !{i32 1, !11, !9}
-!11 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!12 = !{void (<4 x float>, <4 x float>, <4 x float>*)* @main.flat, !"", !13, null, null}
-!13 = !{!14, !17, null}
-!14 = !{!15, !16}
-!15 = !{i32 0, !"A", i8 9, i8 0, !9, i8 2, i32 1, i8 4, i32 0, i8 0, null}
-!16 = !{i32 1, !"A", i8 9, i8 0, !19, i8 2, i32 1, i8 4, i32 8000, i8 0, null}
-!17 = !{!18}
-!18 = !{i32 0, !"SV_Target", i8 9, i8 16, !9, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-!19 = !{i32 1}
diff --git a/tools/clang/test/HLSL/dxil_validation/sigOverlap.ll b/tools/clang/test/HLSL/dxil_validation/sigOverlap.ll
deleted file mode 100644
index 305cbb6d7..000000000
--- a/tools/clang/test/HLSL/dxil_validation/sigOverlap.ll
+++ /dev/null
@@ -1,69 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: signature element A at location (0,0) size (1,4) overlaps another signature element.
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-; Function Attrs: nounwind
-define void @main.flat(<4 x float>, <4 x float>, <4 x float>* nocapture readnone) #0 {
-entry:
-  %3 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)
-  %4 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 undef)
-  %5 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
-  %6 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
-  %7 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef)
-  %8 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 undef)
-  %mul.i0 = fmul fast float %5, %4
-  %mul.i1 = fmul fast float %6, %3
-  %mul.i2 = fmul fast float %7, %3
-  %mul.i3 = fmul fast float %8, %3
-  %FAbs = tail call float @dx.op.unary.f32(i32 6, float %mul.i0)
-  %FAbs1 = tail call float @dx.op.unary.f32(i32 6, float %mul.i1)
-  %FAbs2 = tail call float @dx.op.unary.f32(i32 6, float %mul.i2)
-  %FAbs3 = tail call float @dx.op.unary.f32(i32 6, float %mul.i3)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %FAbs)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %FAbs1)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %FAbs2)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %FAbs3)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!12}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{i32 1, void (<4 x float>, <4 x float>, <4 x float>*)* @main.flat, !4}
-!4 = !{!5, !7, !7, !10}
-!5 = !{i32 0, !6, !6}
-!6 = !{}
-!7 = !{i32 0, !8, !9}
-!8 = !{i32 4, !"A", i32 7, i32 9}
-!9 = !{i32 0}
-!10 = !{i32 1, !11, !9}
-!11 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!12 = !{void (<4 x float>, <4 x float>, <4 x float>*)* @main.flat, !"", !13, null, null}
-!13 = !{!14, !17, null}
-!14 = !{!15, !16}
-!15 = !{i32 0, !"A", i8 9, i8 0, !9, i8 2, i32 1, i8 4, i32 0, i8 0, null}
-!16 = !{i32 1, !"A", i8 9, i8 0, !19, i8 2, i32 1, i8 4, i32 0, i8 0, null}
-!17 = !{!18}
-!18 = !{i32 0, !"SV_Target", i8 9, i8 16, !9, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-!19 = !{i32 1}
diff --git a/tools/clang/test/HLSL/dxil_validation/uavBarrier.ll b/tools/clang/test/HLSL/dxil_validation/uavBarrier.ll
deleted file mode 100644
index d499c8449..000000000
--- a/tools/clang/test/HLSL/dxil_validation/uavBarrier.ll
+++ /dev/null
@@ -1,119 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: uav load don't support offset
-; CHECK: uav load don't support mipLevel/sampleIndex
-; CHECK: store on typed uav must write to all four components of the UAV
-; CHECK: sync in a non-Compute Shader must only sync UAV (sync_uglobal)
-
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%class.RWTexture2D = type { <4 x float> }
-%dx.types.Handle = type { i8* }
-%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
-
-@"\01?uav1@@3V?$RWTexture2D@V?$vector@M$03@@@@A" = available_externally global %class.RWTexture2D zeroinitializer, align 4
-@dx.typevar.0 = external addrspace(1) constant %class.RWTexture2D
-@llvm.used = appending global [2 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%class.RWTexture2D addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* bitcast (%class.RWTexture2D* @"\01?uav1@@3V?$RWTexture2D@V?$vector@M$03@@@@A" to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat(<2 x i32>, <2 x i32>, <4 x float>* nocapture readnone) #0 {
-entry:
-  %uav1_UAV_2d = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 0, i1 false)
-  %3 = tail call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0, i32 undef)
-  %4 = tail call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 1, i32 undef)
-  %5 = tail call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
-  %6 = tail call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 1, i32 undef)
-  %TextureLoad = tail call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 67, %dx.types.Handle %uav1_UAV_2d, i32 %3, i32 %3, i32 %4, i32 %3, i32 undef, i32 %3, i32 undef)
-  %7 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 0
-  %8 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 1
-  %9 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 2
-  %10 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 3
-  tail call void @dx.op.barrier(i32 83, i32 9)
-  %TextureLoad1 = tail call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 67, %dx.types.Handle %uav1_UAV_2d, i32 undef, i32 %5, i32 %6, i32 undef, i32 undef, i32 undef, i32 undef)
-  %11 = extractvalue %dx.types.ResRet.f32 %TextureLoad1, 0
-  %12 = extractvalue %dx.types.ResRet.f32 %TextureLoad1, 1
-  %13 = extractvalue %dx.types.ResRet.f32 %TextureLoad1, 2
-  %14 = extractvalue %dx.types.ResRet.f32 %TextureLoad1, 3
-  %15 = extractvalue %dx.types.ResRet.f32 %TextureLoad1, 4
-  %conv = uitofp i32 %15 to float
-  %factor = fmul fast float %conv, 2.000000e+00
-  %add4.i0 = fadd fast float %11, %7
-  %add9.i0 = fadd fast float %add4.i0, %factor
-  %factor4 = fmul fast float %conv, 2.000000e+00
-  %add4.i1 = fadd fast float %12, %8
-  %add9.i1 = fadd fast float %add4.i1, %factor4
-  %factor5 = fmul fast float %conv, 2.000000e+00
-  %add4.i2 = fadd fast float %13, %9
-  %add9.i2 = fadd fast float %add4.i2, %factor5
-  %factor6 = fmul fast float %conv, 2.000000e+00
-  %add4.i3 = fadd fast float %14, %10
-  %add9.i3 = fadd fast float %add4.i3, %factor6
-  tail call void @dx.op.barrier(i32 83, i32 2)
-  tail call void @dx.op.textureStore.f32(i32 68, %dx.types.Handle %uav1_UAV_2d, i32 %3, i32 %4, i32 undef, float %add9.i0, float %add9.i1, float %add9.i2, float undef, i8 7)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %add9.i0)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %add9.i1)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %add9.i2)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %add9.i3)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32, %dx.types.Handle, i32, i32, i32, i32, i32, i32, i32) #2
-
-; Function Attrs: nounwind
-declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.barrier(i32, i32) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!7, !10}
-!dx.entryPoints = !{!21}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{null, !4, null, null}
-!4 = !{!5}
-!5 = !{i32 0, %class.RWTexture2D* @"\01?uav1@@3V?$RWTexture2D@V?$vector@M$03@@@@A", !"uav1", i32 0, i32 3, i32 1, i32 2, i1 false, i1 false, i1 false, !6}
-!6 = !{i32 0, i32 9}
-!7 = !{i32 0, %class.RWTexture2D addrspace(1)* @dx.typevar.0, !8}
-!8 = !{i32 16, !9}
-!9 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!10 = !{i32 1, void (<2 x i32>, <2 x i32>, <4 x float>*)* @main.flat, !11}
-!11 = !{!12, !14, !17, !19}
-!12 = !{i32 0, !13, !13}
-!13 = !{}
-!14 = !{i32 0, !15, !16}
-!15 = !{i32 4, !"A", i32 7, i32 5}
-!16 = !{i32 0}
-!17 = !{i32 0, !18, !16}
-!18 = !{i32 4, !"B", i32 7, i32 5}
-!19 = !{i32 1, !20, !16}
-!20 = !{i32 4, !"SV_Target", i32 7, i32 9}
-!21 = !{void (<2 x i32>, <2 x i32>, <4 x float>*)* @main.flat, !"", !22, !3, !28}
-!22 = !{!23, !26, null}
-!23 = !{!24, !25}
-!24 = !{i32 0, !"A", i8 5, i8 0, !16, i8 1, i32 1, i8 2, i32 0, i8 0, null}
-!25 = !{i32 1, !"B", i8 5, i8 0, !16, i8 1, i32 1, i8 2, i32 1, i8 0, null}
-!26 = !{!27}
-!27 = !{i32 0, !"SV_Target", i8 9, i8 16, !16, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-!28 = !{i32 0, i64 8192}
diff --git a/tools/clang/test/HLSL/val-inst-disallowed.ll b/tools/clang/test/HLSL/val-inst-disallowed.ll
deleted file mode 100644
index 919918220..000000000
--- a/tools/clang/test/HLSL/val-inst-disallowed.ll
+++ /dev/null
@@ -1,56 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Semantic 'SV_Target' is invalid as vs Output
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%dx.types.wave_t = type { i8* }
-
-define void @"\01?main@@YA?AV?$vector@M$03@@XZ.flat"(<4 x float>*) {
-entry:
-; CHECK: Instructions must not reference reserved opcodes
-  %WaveCapture = call %dx.types.wave_t @dx.op.waveCapture(i32 114, i8 0)
-
-; CHECK: Declaration '%dx.types.wave_t = type { i8* }' uses a reserved prefix
-  %wave_local = alloca %dx.types.wave_t
-
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 0.000000e+00)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 0.000000e+00)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 0.000000e+00)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 0.000000e+00)
-  ret void
-; CHECK: Instructions must be of an allowed type
-  unreachable
-}
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-; Function Attrs: nounwind readonly
-declare %dx.types.wave_t @dx.op.waveCapture(i32, i8) #1
-; Function Attrs: nounwind readonly
-declare i1 @dx.op.waveAllIsTrue(i32, %dx.types.wave_t, i1) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!9}
-
-!0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 4}
-!2 = !{!"vs", i32 6, i32 0}
-!3 = !{i32 1, void (<4 x float>*)* @"\01?main@@YA?AV?$vector@M$03@@XZ.flat", !4}
-!4 = !{!5, !7}
-!5 = !{i32 0, !6, !13}
-!6 = !{}
-!7 = !{i32 1, !8, !13}
-!8 = !{i32 4, !"SV_Target", i32 7, i32 9}
-!9 = !{void (<4 x float>*)* @"\01?main@@YA?AV?$vector@M$03@@XZ.flat", !"", !10, null, null}
-!10 = !{null, !11, null}
-!11 = !{!12}
-!12 = !{i32 0, !"SV_Target", i8 9, i8 16, !13, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-!13 = !{i32 0}
diff --git a/tools/clang/tools/dotnetc/EditorForm.cs b/tools/clang/tools/dotnetc/EditorForm.cs
index bb7461c88..7ee7d1daf 100644
--- a/tools/clang/tools/dotnetc/EditorForm.cs
+++ b/tools/clang/tools/dotnetc/EditorForm.cs
@@ -221,8 +221,8 @@ namespace MainNs
                 "  <InputElement SemanticName='POSITION' Format='R32G32B32_FLOAT' AlignedByteOffset='0' />\r\n" +
                 "  <InputElement SemanticName='COLOR' Format='R32G32B32A32_FLOAT' AlignedByteOffset='12' />\r\n" +
                 " </InputElements>\r\n" +
-                " <Shader Name='VS' Target='vs_5_1' EntryPoint='VSMain' />\r\n" +
-                " <Shader Name='PS' Target='ps_5_1' EntryPoint='PSMain' />\r\n" +
+                " <Shader Name='VS' Target='vs_6_0' EntryPoint='VSMain' />\r\n" +
+                " <Shader Name='PS' Target='ps_6_0' EntryPoint='PSMain' />\r\n" +
                 "</ShaderOp>\r\n";
 
             this.CodeBox.Text =
@@ -670,7 +670,7 @@ namespace MainNs
                 result.SetFromText = options.Count > 0;
                 result.Mode = GetValueOrDefault(options, "mode", "hlsl");
                 result.Entry = GetValueOrDefault(options, "hlsl-entry", "main");
-                result.Target = GetValueOrDefault(options, "hlsl-target", "ps_5_1");
+                result.Target = GetValueOrDefault(options, "hlsl-target", "ps_6_0");
                 result.Arguments = GetValueOrDefault(options, "hlsl-args", "").Split(' ').Select(a => a.Trim()).ToArray();
                 return result;
             }
diff --git a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
index eb632e1dc..eb7e18995 100644
--- a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
+++ b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
@@ -1413,6 +1413,7 @@ static const char *OpCodeSignatures[] = {
   "(value)",  // Atan
   "(value)",  // Hcos
   "(value)",  // Hsin
+  "(value)",  // Htan
   "(value)",  // Exp
   "(value)",  // Frc
   "(value)",  // Log
@@ -1469,8 +1470,6 @@ static const char *OpCodeSignatures[] = {
   "(handle,mipLevel)",  // GetDimensions
   "(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,channel)",  // TextureGather
   "(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,channel,compareVale)",  // TextureGatherCmp
-  "()",  // ToDelete5
-  "()",  // ToDelete6
   "(srv,index)",  // Texture2DMSGetSamplePosition
   "(index)",  // RenderTargetGetSamplePosition
   "()",  // RenderTargetGetSampleCount
@@ -1486,6 +1485,9 @@ static const char *OpCodeSignatures[] = {
   "(inputSigId,inputRowIndex,inputColIndex,offsetX,offsetY)",  // EvalSnapped
   "(inputSigId,inputRowIndex,inputColIndex,sampleIndex)",  // EvalSampleIndex
   "(inputSigId,inputRowIndex,inputColIndex)",  // EvalCentroid
+  "()",  // SampleIndex
+  "()",  // Coverage
+  "()",  // InnerCoverage
   "(component)",  // ThreadId
   "(component)",  // GroupId
   "(component)",  // ThreadIdInGroup
@@ -1493,12 +1495,9 @@ static const char *OpCodeSignatures[] = {
   "(streamId)",  // EmitStream
   "(streamId)",  // CutStream
   "(streamId)",  // EmitThenCutStream
+  "()",  // GSInstanceID
   "(lo,hi)",  // MakeDouble
-  "()",  // ToDelete1
-  "()",  // ToDelete2
   "(value)",  // SplitDouble
-  "()",  // ToDelete3
-  "()",  // ToDelete4
   "(inputSigId,row,col,index)",  // LoadOutputControlPoint
   "(inputSigId,row,col)",  // LoadPatchConstant
   "(component)",  // DomainLocation
@@ -1506,12 +1505,9 @@ static const char *OpCodeSignatures[] = {
   "()",  // OutputControlPointID
   "()",  // PrimitiveID
   "()",  // CycleCounterLegacy
-  "(value)",  // Htan
-  "()",  // WaveCaptureReserved
   "()",  // WaveIsFirstLane
   "()",  // WaveGetLaneIndex
   "()",  // WaveGetLaneCount
-  "()",  // WaveIsHelperLaneReserved
   "(cond)",  // WaveAnyTrue
   "(cond)",  // WaveAllTrue
   "(value)",  // WaveActiveAllEqual
@@ -1521,8 +1517,6 @@ static const char *OpCodeSignatures[] = {
   "(value,op,sop)",  // WaveActiveOp
   "(value,op)",  // WaveActiveBit
   "(value,op,sop)",  // WavePrefixOp
-  "()",  // WaveGetOrderedIndex
-  "()",  // GlobalOrderedCountIncReserved
   "(value,quadLane)",  // QuadReadLaneAt
   "(value,op)",  // QuadOp
   "(value)",  // BitcastI16toF16
@@ -1531,17 +1525,13 @@ static const char *OpCodeSignatures[] = {
   "(value)",  // BitcastF32toI32
   "(value)",  // BitcastI64toF64
   "(value)",  // BitcastF64toI64
-  "()",  // GSInstanceID
   "(value)",  // LegacyF32ToF16
   "(value)",  // LegacyF16ToF32
   "(value)",  // LegacyDoubleToFloat
   "(value)",  // LegacyDoubleToSInt32
   "(value)",  // LegacyDoubleToUInt32
   "(value)",  // WaveAllBitCount
-  "(value)",  // WavePrefixBitCount
-  "()",  // SampleIndex
-  "()",  // Coverage
-  "()"  // InnerCoverage
+  "(value)"  // WavePrefixBitCount
 };
 // OPCODE-SIGS:END
 
diff --git a/tools/clang/unittests/HLSL/CompilerTest.cpp b/tools/clang/unittests/HLSL/CompilerTest.cpp
index 18c9f65bb..5379c8008 100644
--- a/tools/clang/unittests/HLSL/CompilerTest.cpp
+++ b/tools/clang/unittests/HLSL/CompilerTest.cpp
@@ -461,6 +461,7 @@ public:
   TEST_METHOD(CodeGenSimpleGS2)
   TEST_METHOD(CodeGenSimpleGS3)
   TEST_METHOD(CodeGenSimpleGS4)
+  TEST_METHOD(CodeGenSimpleGS5)
   TEST_METHOD(CodeGenSimpleHS1)
   TEST_METHOD(CodeGenSimpleHS2)
   TEST_METHOD(CodeGenSimpleHS3)
@@ -2340,6 +2341,10 @@ TEST_F(CompilerTest, CodeGenSimpleGS4) {
   CodeGenTestCheck(L"..\\CodeGenHLSL\\SimpleGS4.hlsl");
 }
 
+TEST_F(CompilerTest, CodeGenSimpleGS5) {
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\SimpleGS5.hlsl");
+}
+
 TEST_F(CompilerTest, CodeGenSimpleHS1) {
   CodeGenTestCheck(L"..\\CodeGenHLSL\\SimpleHS1.hlsl");
 }
diff --git a/tools/clang/unittests/HLSL/ExtensionTest.cpp b/tools/clang/unittests/HLSL/ExtensionTest.cpp
index ab20327e7..797897708 100644
--- a/tools/clang/unittests/HLSL/ExtensionTest.cpp
+++ b/tools/clang/unittests/HLSL/ExtensionTest.cpp
@@ -15,6 +15,7 @@
 #include "dxc/dxcapi.internal.h"
 #include "dxc/HLSL/HLOperationLowerExtension.h"
 #include "dxc/HlslIntrinsicOp.h"
+#include "llvm/Support/Regex.h"
 
 ///////////////////////////////////////////////////////////////////////////////
 // Support for test intrinsics.
@@ -90,6 +91,12 @@ static const HLSL_INTRINSIC_ARGUMENT TestUnsigned[] = {
   { "x", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_UINT, 1, 1},
 };
 
+// float2 = MyBufferOp(uint2 addr)
+static const HLSL_INTRINSIC_ARGUMENT TestMyBufferOp[] = {
+  { "MyBufferOp", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 2 },
+  { "addr", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_UINT, 1, 2},
+};
+
 struct Intrinsic {
   LPCWSTR hlslName;
   const char *dxilName;
@@ -119,11 +126,79 @@ Intrinsic Intrinsics[] = {
   {L"test_unsigned","test_unsigned",   "n", { static_cast<unsigned>(hlsl::IntrinsicOp::IOP_min), false, true, -1, countof(TestUnsigned), TestUnsigned}},
 };
 
+Intrinsic BufferIntrinsics[] = {
+  {L"MyBufferOp",   "MyBufferOp",      "m", { 12, false, true, -1, countof(TestMyBufferOp), TestMyBufferOp}},
+};
+
+class IntrinsicTable {
+public:
+  IntrinsicTable(wchar_t *ns, Intrinsic *begin, Intrinsic *end)
+    :  m_namespace(ns), m_begin(begin), m_end(end)
+  { }
+
+  struct SearchResult {
+    Intrinsic *intrinsic;
+    uint64_t index;
+
+    SearchResult() : SearchResult(nullptr, 0) {}
+    SearchResult(Intrinsic *i, uint64_t n) : intrinsic(i), index(n) {}
+    operator bool() { return intrinsic != nullptr; }
+  };
+
+  SearchResult Search(const wchar_t *name, std::ptrdiff_t startIndex) const {
+    Intrinsic *begin = m_begin + startIndex;
+    assert(std::distance(begin, m_end) >= 0);
+    if (IsStar(name))
+      return BuildResult(begin);
+
+    Intrinsic *found = std::find_if(begin, m_end, [name](const Intrinsic &i) {
+      return wcscmp(i.hlslName, name) == 0;
+    });
+
+    return BuildResult(found);
+  }
+  
+  SearchResult Search(unsigned opcode) const {
+    Intrinsic *begin = m_begin;
+    assert(std::distance(begin, m_end) >= 0);
+    
+    Intrinsic *found = std::find_if(begin, m_end, [opcode](const Intrinsic &i) {
+      return i.hlsl.Op == opcode;
+    });
+
+    return BuildResult(found);
+  }
+  
+  bool MatchesNamespace(const wchar_t *ns) const {
+    return wcscmp(m_namespace, ns) == 0;
+  }
+
+private:
+  const wchar_t *m_namespace;
+  Intrinsic *m_begin;
+  Intrinsic *m_end;
+
+  bool IsStar(const wchar_t *name) const {
+    return wcscmp(name, L"*") == 0;
+  }
+
+  SearchResult BuildResult(Intrinsic *found) const {
+    if (found == m_end)
+      return SearchResult{ nullptr, std::numeric_limits<uint64_t>::max() };
+
+    return SearchResult{ found, static_cast<uint64_t>(std::distance(m_begin, found)) };
+  }
+};
+
 class TestIntrinsicTable : public IDxcIntrinsicTable {
 private:
   DXC_MICROCOM_REF_FIELD(m_dwRef);
+  std::vector<IntrinsicTable> m_tables;
 public:
-  TestIntrinsicTable() : m_dwRef(0) { }
+  TestIntrinsicTable() : m_dwRef(0) { 
+    m_tables.push_back(IntrinsicTable(L"",       std::begin(Intrinsics), std::end(Intrinsics)));
+    m_tables.push_back(IntrinsicTable(L"Buffer", std::begin(BufferIntrinsics), std::end(BufferIntrinsics)));
+  }
   DXC_MICROCOM_ADDREF_RELEASE_IMPL(m_dwRef)
   __override HRESULT STDMETHODCALLTYPE QueryInterface(REFIID iid, void** ppvObject) {
     return DoBasicQueryInterface<IDxcIntrinsicTable>(this, iid, ppvObject);
@@ -138,47 +213,61 @@ public:
   __override HRESULT STDMETHODCALLTYPE LookupIntrinsic(
       LPCWSTR typeName, LPCWSTR functionName, const HLSL_INTRINSIC **pIntrinsic,
       _Inout_ UINT64 *pLookupCookie) {
-    if (typeName != nullptr && *typeName) return E_FAIL;
-    Intrinsic *intrinsic =
-      std::find_if(std::begin(Intrinsics), std::end(Intrinsics), [functionName](const Intrinsic &i) {
-        return wcscmp(i.hlslName, functionName) == 0;
-    });
-    if (intrinsic == std::end(Intrinsics))
+    if (typeName == nullptr)
       return E_FAIL;
 
-    *pIntrinsic = &intrinsic->hlsl;
-    *pLookupCookie = 0;
-    return S_OK;
+    // Search for matching intrinsic name in matching namespace.
+    IntrinsicTable::SearchResult result;
+    for (const IntrinsicTable &table : m_tables) {
+      if (table.MatchesNamespace(typeName)) {
+        result = table.Search(functionName, *pLookupCookie);
+        break;
+      }
+    }
+
+    if (result) {
+      *pIntrinsic = &result.intrinsic->hlsl;
+      *pLookupCookie = result.index + 1;
+    }
+    else {
+      *pIntrinsic = nullptr;
+      *pLookupCookie = 0;
+    }
+
+    return result.intrinsic ? S_OK : E_FAIL;
   }
 
   __override HRESULT STDMETHODCALLTYPE
   GetLoweringStrategy(UINT opcode, _Outptr_ LPCSTR *pStrategy) {
-    Intrinsic *intrinsic =
-      std::find_if(std::begin(Intrinsics), std::end(Intrinsics), [opcode](const Intrinsic &i) {
-      return i.hlsl.Op == opcode;
-    });
+    Intrinsic *intrinsic = FindByOpcode(opcode);
     
-    if (intrinsic == std::end(Intrinsics))
+    if (!intrinsic)
       return E_FAIL;
 
     *pStrategy = intrinsic->strategy;
-
     return S_OK;
   }
 
   __override HRESULT STDMETHODCALLTYPE
   GetIntrinsicName(UINT opcode, _Outptr_ LPCSTR *pName) {
-    Intrinsic *intrinsic =
-      std::find_if(std::begin(Intrinsics), std::end(Intrinsics), [opcode](const Intrinsic &i) {
-      return i.hlsl.Op == opcode;
-    });
+    Intrinsic *intrinsic = FindByOpcode(opcode);
 
-    if (intrinsic == std::end(Intrinsics))
+    if (!intrinsic)
       return E_FAIL;
 
     *pName = intrinsic->dxilName;
     return S_OK;
   }
+
+  Intrinsic *FindByOpcode(UINT opcode) {
+    IntrinsicTable::SearchResult result;
+    for (const IntrinsicTable &table : m_tables) {
+      result = table.Search(opcode);
+      if (result)
+        break;
+    }
+    return result.intrinsic;
+  }
 };
 
 // A class to test semantic define validation.
@@ -312,6 +401,7 @@ public:
   TEST_METHOD(PackedLowering);
   TEST_METHOD(ReplicateLoweringWhenOnlyVectorIsResult);
   TEST_METHOD(UnsignedOpcodeIsUnchanged);
+  TEST_METHOD(ResourceExtensionIntrinsic);
 };
 
 TEST_F(ExtensionTest, DefineWhenRegisteredThenPreserved) {
@@ -591,3 +681,25 @@ TEST_F(ExtensionTest, UnsignedOpcodeIsUnchanged) {
     disassembly.npos !=
     disassembly.find("call i32 @test_unsigned(i32 113, "));
 }
+
+TEST_F(ExtensionTest, ResourceExtensionIntrinsic) {
+  Compiler c(m_dllSupport);
+  c.RegisterIntrinsicTable(new TestIntrinsicTable());
+  c.Compile(
+    "Buffer<float2> buf;"
+    "float2 main(uint2 v1 : V1) : SV_Target {\n"
+    "  return buf.MyBufferOp(uint2(1, 2));\n"
+    "}\n",
+    { L"/Vd" }, {}
+  );
+  std::string disassembly = c.Disassemble();
+
+  // Things to check
+  // - return type is translated to dx.types.ResRet
+  // - buffer is translated to dx.types.Handle
+  // - vector is exploded
+  llvm::Regex regex("call %dx.types.ResRet.f32 @MyBufferOp\\(i32 12, %dx.types.Handle %.*, i32 1, i32 2\\)");
+  std::string regexErrors;
+  VERIFY_IS_TRUE(regex.isValid(regexErrors));
+  VERIFY_IS_TRUE(regex.match(disassembly));
+}
diff --git a/tools/clang/unittests/HLSL/ValidationTest.cpp b/tools/clang/unittests/HLSL/ValidationTest.cpp
index 327e88d7c..a57256f78 100644
--- a/tools/clang/unittests/HLSL/ValidationTest.cpp
+++ b/tools/clang/unittests/HLSL/ValidationTest.cpp
@@ -122,6 +122,7 @@ public:
   TEST_METHOD(StructBitCast)
   TEST_METHOD(MultiDimArray)
   TEST_METHOD(NoFunctionParam)
+  TEST_METHOD(I8Type)
 
   TEST_METHOD(ClipCullMaxComponents)
   TEST_METHOD(ClipCullMaxRows)
@@ -149,7 +150,6 @@ public:
   TEST_METHOD(WhenDepthNotFloatThenFail);
   TEST_METHOD(BarrierFail);
   TEST_METHOD(CBufferLegacyOutOfBoundFail);
-  TEST_METHOD(CBufferOutOfBoundFail);
   TEST_METHOD(CsThreadSizeFail);
   TEST_METHOD(DeadLoopFail);
   TEST_METHOD(EvalFail);
@@ -218,6 +218,11 @@ public:
         const char *pStart = (const char *)text->GetBufferPointer();
         const char *pEnd = pStart + text->GetBufferSize();
         const char *pMatch = std::search(pStart, pEnd, pErrorMsg, pErrorMsg + strlen(pErrorMsg));
+        if (pEnd == pMatch) {
+          WEX::Logging::Log::Comment(WEX::Common::String().Format(
+              L"Unable to find '%S' in text:\r\n%.*S", pErrorMsg, (pEnd - pStart),
+              pStart));
+        }
         VERIFY_ARE_NOT_EQUAL(pEnd, pMatch);
       }
     }
@@ -284,7 +289,7 @@ public:
 
   void RewriteAssemblyCheckMsg(LPCSTR pSource, LPCSTR pShaderModel,
                                llvm::ArrayRef<LPCSTR> pLookFors, llvm::ArrayRef<LPCSTR> pReplacements,
-                               LPCSTR pErrorMsg, bool bRegex = false) {
+                               llvm::ArrayRef<LPCSTR> pErrorMsgs, bool bRegex = false) {
     CComPtr<IDxcBlob> pText;
     CComPtr<IDxcBlobEncoding> pSourceBlob;
     
@@ -302,11 +307,13 @@ public:
         m_dllSupport.CreateInstance(CLSID_DxcAssembler, &pAssembler));
     VERIFY_SUCCEEDED(pAssembler->AssembleToContainer(pText, &pAssembleResult));
 
-    if (!CheckOperationResultMsg(pAssembleResult, pErrorMsg, true, bRegex)) {
-      // Assembly succeeded, try validation.
-      CComPtr<IDxcBlob> pBlob;
-      VERIFY_SUCCEEDED(pAssembleResult->GetResult(&pBlob));
-      CheckValidationMsg(pBlob, pErrorMsg, bRegex);
+    for (auto pErrorMsg : pErrorMsgs) {
+      if (!CheckOperationResultMsg(pAssembleResult, pErrorMsg, true, bRegex)) {
+        // Assembly succeeded, try validation.
+        CComPtr<IDxcBlob> pBlob;
+        VERIFY_SUCCEEDED(pAssembleResult->GetResult(&pBlob));
+        CheckValidationMsg(pBlob, pErrorMsg, bRegex);
+      }
     }
   }
 
@@ -351,7 +358,7 @@ public:
   
   void RewriteAssemblyCheckMsg(LPCWSTR name, LPCSTR pShaderModel,
                                llvm::ArrayRef<LPCSTR> pLookFors, llvm::ArrayRef<LPCSTR> pReplacements,
-                               LPCSTR pErrorMsg, bool bRegex = false) {
+                               llvm::ArrayRef<LPCSTR> pErrorMsgs, bool bRegex = false) {
     std::wstring fullPath = hlsl_test::GetPathToHlslDataFile(name);
     CComPtr<IDxcLibrary> pLibrary;
     CComPtr<IDxcBlobEncoding> pSource;
@@ -364,19 +371,20 @@ public:
 
     CComPtr<IDxcBlob> pText;
 
-    RewriteAssemblyToText(pSource, pShaderModel, pLookFors, pReplacements, &pText);
+    RewriteAssemblyToText(pSource, pShaderModel, pLookFors, pReplacements, &pText, bRegex);
 
     CComPtr<IDxcAssembler> pAssembler;
     CComPtr<IDxcOperationResult> pAssembleResult;
     VERIFY_SUCCEEDED(
         m_dllSupport.CreateInstance(CLSID_DxcAssembler, &pAssembler));
     VERIFY_SUCCEEDED(pAssembler->AssembleToContainer(pText, &pAssembleResult));
-
-    if (!CheckOperationResultMsg(pAssembleResult, pErrorMsg, true, bRegex)) {
-      // Assembly succeeded, try validation.
-      CComPtr<IDxcBlob> pBlob;
-      VERIFY_SUCCEEDED(pAssembleResult->GetResult(&pBlob));
-      CheckValidationMsg(pBlob, pErrorMsg, bRegex);
+    for (auto pErrorMsg : pErrorMsgs) {
+      if (!CheckOperationResultMsg(pAssembleResult, pErrorMsg, true, bRegex)) {
+        // Assembly succeeded, try validation.
+        CComPtr<IDxcBlob> pBlob;
+        VERIFY_SUCCEEDED(pAssembleResult->GetResult(&pBlob));
+        CheckValidationMsg(pBlob, pErrorMsg, bRegex);
+      }
     }
   }
 };
@@ -457,96 +465,378 @@ TEST_F(ValidationTest, WhenUnknownBlocksThenFail) {
 }
 
 TEST_F(ValidationTest, WhenInstrDisallowedThenFail) {
-  TestCheck(L"val-inst-disallowed.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\abs2.hlsl", "ps_6_0",
+      {
+          "target triple = \"dxil-ms-dx\"",
+          "ret void",
+          "dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 3, i32 undef)",
+          "!\"ps\", i32 6, i32 0",
+      },
+      {
+          "target triple = \"dxil-ms-dx\"\n%dx.types.wave_t = type { i8* }",
+          "unreachable",
+          "dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 3, i32 undef)\n%wave_local = alloca %dx.types.wave_t",
+          "!\"vs\", i32 6, i32 0",
+      },
+      {"Semantic 'SV_Target' is invalid as vs Output",
+       "Declaration '%dx.types.wave_t = type { i8* }' uses a reserved prefix",
+       "Instructions must be of an allowed type",
+      }
+  );
 }
 
 TEST_F(ValidationTest, WhenDepthNotFloatThenFail) {
-  TestCheck(L"dxil_validation\\IntegerDepth.ll");
+  RewriteAssemblyCheckMsg(L"..\\CodeGenHLSL\\IntegerDepth2.hlsl", "ps_6_0",
+                          {
+                              "!\"SV_Depth\", i8 9",
+                          },
+                          {
+                              "!\"SV_Depth\", i8 4",
+                          },
+                          {
+                              "SV_Depth must be float",
+                          });
 }
 
 TEST_F(ValidationTest, BarrierFail) {
-  TestCheck(L"dxil_validation\\barrier.ll");
+    RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\barrier.hlsl", "cs_6_0",
+      {"dx.op.barrier(i32 82, i32 8)",
+        "dx.op.barrier(i32 82, i32 9)",
+        "dx.op.barrier(i32 82, i32 11)",
+        "%class.RWStructuredBuffer = type { %class.matrix.float.2.2 }\n",
+        "call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 98)",
+      },
+      {"dx.op.barrier(i32 82, i32 15)",
+        "dx.op.barrier(i32 82, i32 0)",
+        "dx.op.barrier(i32 82, i32 %rem)",
+        "%class.RWStructuredBuffer = type { %class.matrix.float.2.2 }\n"
+        "@dx.typevar.8 = external addrspace(1) constant %class.RWStructuredBuffer\n"
+        "@\"internalGV\" = internal global [64 x <4 x float>] undef\n",
+        "call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 98)\n"
+        "%load = load %class.RWStructuredBuffer, %class.RWStructuredBuffer addrspace(1)* @dx.typevar.8",
+      },
+      {"Internal declaration 'internalGV' is unused",
+       "External declaration 'dx.typevar.8' is unused",
+       "Vector type '<4 x float>' is not allowed",
+       "Mode of Barrier must be an immediate constant",
+       "sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory)",
+       "sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal"
+      });
 }
 TEST_F(ValidationTest, CBufferLegacyOutOfBoundFail) {
-  TestCheck(L"dxil_validation\\cbuffer1.50_legacy.ll");
-}
-TEST_F(ValidationTest, CBufferOutOfBoundFail) {
-  TestCheck(L"dxil_validation\\cbuffer1.50.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\cbuffer1.50.hlsl", "ps_6_0",
+      "cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %Foo2_buffer, i32 0)",
+      "cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %Foo2_buffer, i32 6)",
+      "Cbuffer access out of bound");
 }
+
 TEST_F(ValidationTest, CsThreadSizeFail) {
-  TestCheck(L"dxil_validation\\csThreadSize.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\share_mem1.hlsl", "cs_6_0",
+      {"!{i32 8, i32 8, i32 1",
+       "[256 x float]"},
+      {"!{i32 1025, i32 1025, i32 1025",
+       "[64000000 x float]"},
+      {"Declared Thread Group X size 1025 outside valid range",
+       "Declared Thread Group Y size 1025 outside valid range",
+       "Declared Thread Group Z size 1025 outside valid range",
+       "Declared Thread Group Count 1076890625 (X*Y*Z) is beyond the valid maximum",
+       "Total Thread Group Shared Memory storage is 256000000, exceeded 32768",
+      });
 }
 TEST_F(ValidationTest, DeadLoopFail) {
-  TestCheck(L"dxil_validation\\deadloop.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\loop1.hlsl", "ps_6_0",
+      {"br i1 %exitcond, label %for.end.loopexit, label %for.body, !llvm.loop !([0-9]+)",
+       "%add.lcssa = phi float \\[ %add, %for.body \\]",
+       "!dx.entryPoints = !\\{!([0-9]+)\\}",
+       "\\[ %add.lcssa, %for.end.loopexit \\]"
+      },
+      {"br label %for.body",
+       "",
+       "!dx.entryPoints = !\\{!\\1\\}\n!dx.unused = !\\{!\\1\\}",
+       "[ 0.000000e+00, %for.end.loopexit ]"
+      },
+      {"Loop must have break",
+       "Named metadata 'dx.unused' is unknown",
+      },
+      /*bRegex*/true);
 }
 TEST_F(ValidationTest, EvalFail) {
-  TestCheck(L"dxil_validation\\Eval.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\Eval.hlsl", "ps_6_0",
+      "!\"A\", i8 9, i8 0, !([0-9]+), i8 2, i32 1, i8 4",
+      "!\"A\", i8 9, i8 0, !\\1, i8 0, i32 1, i8 4",
+      "Interpolation mode on A used with eval_\\* instruction must be ",
+      /*bRegex*/true);
 }
 TEST_F(ValidationTest, GetDimCalcLODFail) {
-  TestCheck(L"dxil_validation\\GetDimCalcLOD.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\GetDimCalcLOD.hlsl", "ps_6_0",
+      {"extractvalue %dx.types.Dimensions %2, 1",
+       "float 1.000000e+00, i1 true"
+      },
+      {"extractvalue %dx.types.Dimensions %2, 2",
+       "float undef, i1 true"
+      },
+      {"GetDimensions used undef dimension z on TextureCube",
+       "coord uninitialized"});
 }
 TEST_F(ValidationTest, HsAttributeFail) {
-  TestCheck(L"dxil_validation\\hsAttribute.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\hsAttribute.hlsl", "hs_6_0",
+      {"i32 3, i32 3, i32 2, i32 3, i32 3, float 6.400000e+01"
+      },
+      {"i32 36, i32 36, i32 0, i32 0, i32 0, float 6.500000e+01"
+      },
+      {"HS input control point count must be [1..32].  36 specified",
+       "Invalid Tessellator Domain specified. Must be isoline, tri or quad",
+       "Invalid Tessellator Partitioning specified",
+       "Invalid Tessellator Output Primitive specified",
+       "Hull Shader MaxTessFactor must be [1.000000..64.000000].  65.000000 specified",
+       "output control point count must be [0..32].  36 specified"});
 }
 TEST_F(ValidationTest, InnerCoverageFail) {
-  TestCheck(L"dxil_validation\\InnerCoverage.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\InnerCoverage2.hlsl", "ps_6_0",
+      {"dx.op.coverage.i32(i32 93)",
+       "declare i32 @dx.op.coverage.i32(i32)"
+      },
+      {"dx.op.coverage.i32(i32 93)\n  %inner = call i32 @dx.op.innercoverage.i32(i32 94)",
+       "declare i32 @dx.op.coverage.i32(i32)\n"
+       "declare i32 @dx.op.innercoverage.i32(i32)"
+      },
+      "InnerCoverage and Coverage are mutually exclusive.");
 }
 TEST_F(ValidationTest, InterpChangeFail) {
-  TestCheck(L"dxil_validation\\interpChange.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\interpChange.hlsl", "ps_6_0",
+      "i32 1, i8 0, null}",
+      "i32 0, i8 2, null}",
+      "interpolation mode that differs from another element packed",
+      /*bRegex*/true);
 }
 TEST_F(ValidationTest, InterpOnIntFail) {
-  TestCheck(L"dxil_validation\\interpOnInt.ll");
+    RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\interpOnInt2.hlsl", "ps_6_0",
+      "!\"A\", i8 5, i8 0, !([0-9]+), i8 1",
+      "!\"A\", i8 5, i8 0, !\\1, i8 2",
+      "signature element A specifies invalid interpolation mode for integer component type",
+      /*bRegex*/true);
 }
 TEST_F(ValidationTest, InvalidSigCompTyFail) {
-  TestCheck(L"dxil_validation\\invalidSigCompTy.ll");
+    RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\abs2.hlsl", "ps_6_0",
+      "!\"A\", i8 4",
+      "!\"A\", i8 0",
+      "A specifies unrecognized or invalid component type");
 }
 TEST_F(ValidationTest, MultiStream2Fail) {
-  TestCheck(L"dxil_validation\\multiStream2.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\multiStreamGS.hlsl", "gs_6_0",
+      "i32 1, i32 12, i32 7, i32 1, i32 1",
+      "i32 1, i32 12, i32 7, i32 2, i32 1",
+      "Multiple GS output streams are used but 'XXX' is not pointlist");
 }
 TEST_F(ValidationTest, PhiTGSMFail) {
-  TestCheck(L"dxil_validation\\phiTGSM.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\phiTGSM.hlsl", "cs_6_0",
+      "ret void",
+      "%arrayPhi = phi i32 addrspace(3)* [ %arrayidx, %if.then ], [ %arrayidx2, %if.else ]\n"
+      "%phiAtom = atomicrmw add i32 addrspace(3)* %arrayPhi, i32 1 seq_cst\n"
+      "ret void",
+      "TGSM pointers must originate from an unambiguous TGSM global variable");
 }
 TEST_F(ValidationTest, ReducibleFail) {
-  TestCheck(L"dxil_validation\\reducible.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\reducible.hlsl", "ps_6_0",
+      {"%conv\n"
+       "  br label %if.end",
+       "to float\n"
+       "  br label %if.end"
+      },
+      {"%conv\n"
+      "  br i1 %cmp.i0, label %if.else, label %if.end",
+       "to float\n"
+       "  br i1 %cmp.i0, label %if.then, label %if.end"
+      },
+      "Execution flow must be reducible");
 }
 TEST_F(ValidationTest, SampleBiasFail) {
-  TestCheck(L"dxil_validation\\sampleBias.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\sampleBias.hlsl", "ps_6_0",
+      {"float -1.600000e+01"
+      },
+      {"float 1.800000e+01"
+      },
+      "bias amount for sample_b must be in the range [-16.000000,15.990000]");
 }
 TEST_F(ValidationTest, SamplerKindFail) {
-  TestCheck(L"dxil_validation\\samplerKind.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\samplerKind.hlsl", "ps_6_0",
+      {"uav1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1",
+       "g_txDiffuse_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0",
+       "\"g_samLinear\", i32 0, i32 0, i32 1, i32 0",
+       "\"g_samLinearC\", i32 0, i32 1, i32 1, i32 1",
+      },
+      {"uav1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0",
+       "g_txDiffuse_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1",
+       "\"g_samLinear\", i32 0, i32 0, i32 1, i32 3",
+       "\"g_samLinearC\", i32 0, i32 1, i32 1, i32 3",
+      },
+      {"Invalid sampler mode",
+       "require sampler declared in comparison mode",
+       "requires sampler declared in default mode",
+       "should on srv resource"});
 }
 TEST_F(ValidationTest, SemaOverlapFail) {
-  TestCheck(L"dxil_validation\\semaOverlap.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\semaOverlap1.hlsl", "ps_6_0",
+      {"!([0-9]+) = !\\{i32 0, !\"A\", i8 9, i8 0, !([0-9]+), i8 2, i32 1, i8 4, i32 0, i8 0, null\\}\n"
+      "!([0-9]+) = !\\{i32 0\\}\n"
+      "!([0-9]+) = !\\{i32 1, !\"A\", i8 9, i8 0, !([0-9]+)",
+      },
+      {"!\\1 = !\\{i32 0, !\"A\", i8 9, i8 0, !\\2, i8 2, i32 1, i8 4, i32 0, i8 0, null\\}\n"
+      "!\\3 = !\\{i32 0\\}\n"
+      "!\\4 = !\\{i32 1, !\"A\", i8 9, i8 0, !\\2",
+      },
+      {"Semantic 'A' overlap at 0"},
+      /*bRegex*/true);
 }
 TEST_F(ValidationTest, SigOutOfRangeFail) {
-  TestCheck(L"dxil_validation\\sigOutOfRange.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\semaOverlap1.hlsl", "ps_6_0",
+      {"i32 1, i8 0, null}",
+      },
+      {"i32 8000, i8 0, null}",
+      },
+      {"signature element A at location (8000,0) size (1,4) is out of range"});
 }
 TEST_F(ValidationTest, SigOverlapFail) {
-  TestCheck(L"dxil_validation\\sigOverlap.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\semaOverlap1.hlsl", "ps_6_0",
+      {"i32 1, i8 0, null}",
+      },
+      {"i32 0, i8 0, null}",
+      },
+      {"signature element A at location (0,0) size (1,4) overlaps another signature element"});
 }
 TEST_F(ValidationTest, SimpleHs1Fail) {
-  TestCheck(L"dxil_validation\\SimpleHs1.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\SimpleHs1.hlsl", "hs_6_0",
+      {"i32 3, i32 3, i32 2, i32 3, i32 3, float 6.400000e+01}",
+       "\"SV_TessFactor\", i8 9, i8 25",
+       "\"SV_InsideTessFactor\", i8 9, i8 26",
+      },
+      {"i32 3, i32 3000, i32 2, i32 3, i32 3, float 6.400000e+01}",
+       "\"TessFactor\", i8 9, i8 0",
+       "\"InsideTessFactor\", i8 9, i8 0",
+      },
+      {"output control point count must be [0..32].  3000 specified",
+       "Required TessFactor for domain not found declared anywhere in Patch Constant data",
+       // TODO: enable this after support pass thru hull shader.
+       //"For pass thru hull shader, input control point count must match output control point count",
+       //"Total number of scalars across all HS output control points must not exceed",
+      });
 }
 TEST_F(ValidationTest, SimpleHs3Fail) {
-  TestCheck(L"dxil_validation\\SimpleHs3.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\SimpleHs3.hlsl", "hs_6_0",
+      {
+          "i32 3, i32 3, i32 2, i32 3, i32 3, float 6.400000e+01}",
+      },
+      {
+          "i32 3, i32 3, i32 2, i32 3, i32 2, float 6.400000e+01}",
+      },
+      {"Hull Shader declared with Tri Domain must specify output primitive "
+       "point, triangle_cw or triangle_ccw. Line output is not compatible with "
+       "the Tri domain"});
 }
 TEST_F(ValidationTest, SimpleHs4Fail) {
-  TestCheck(L"dxil_validation\\SimpleHs4.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\SimpleHs4.hlsl", "hs_6_0",
+      {
+          "i32 2, i32 2, i32 1, i32 3, i32 2, float 6.400000e+01}",
+      },
+      {
+          "i32 2, i32 2, i32 1, i32 3, i32 3, float 6.400000e+01}",
+      },
+      {"Hull Shader declared with IsoLine Domain must specify output primitive "
+       "point or line. Triangle_cw or triangle_ccw output are not compatible "
+       "with the IsoLine Domain"});
 }
 TEST_F(ValidationTest, SimpleDs1Fail) {
-  TestCheck(L"dxil_validation\\SimpleDs1.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\SimpleDs1.hlsl", "ds_6_0",
+      {"!{i32 2, i32 3}"
+      },
+      {"!{i32 4, i32 36}"
+      },
+      {"DS input control point count must be [0..32].  36 specified",
+       "Invalid Tessellator Domain specified. Must be isoline, tri or quad",
+       "DomainLocation component index out of bounds for the domain"});
 }
 TEST_F(ValidationTest, SimpleGs1Fail) {
-  TestCheck(L"dxil_validation\\SimpleGs1.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\SimpleGs1.hlsl", "gs_6_0",
+      {"!{i32 1, i32 3, i32 1, i32 5, i32 1}",
+       "i8 4, i32 1, i8 4, i32 1, i8 0, null}"
+      },
+      {"!{i32 5, i32 1025, i32 1, i32 0, i32 33}",
+      "i8 4, i32 1, i8 4, i32 1, i8 0, !100}\n"
+      "!100 = !{i32 0, i32 5}"
+      },
+      {"GS output vertex count must be [0..1024].  1025 specified",
+       "GS instance count must be [1..32].  33 specified",
+       "GS output primitive topology unrecognized",
+       "GS input primitive unrecognized",
+       "Stream index (5) must between 0 and 3"});
 }
 TEST_F(ValidationTest, UavBarrierFail) {
-  TestCheck(L"dxil_validation\\uavBarrier.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\uavBarrier.hlsl", "ps_6_0",
+      {"dx.op.barrier(i32 82, i32 2)",
+       "textureLoad.f32(i32 68, %dx.types.Handle %uav1_UAV_2d, i32 undef",
+       "i32 undef, i32 undef, i32 undef, i32 undef)",
+       "float %add9.i3, i8 15)",
+      },
+      {"dx.op.barrier(i32 82, i32 9)",
+       "textureLoad.f32(i32 68, %dx.types.Handle %uav1_UAV_2d, i32 1",
+       "i32 1, i32 2, i32 undef, i32 undef)",
+       "float undef, i8 7)",
+      },
+      {"uav load don't support offset",
+       "uav load don't support mipLevel/sampleIndex",
+       "store on typed uav must write to all four components of the UAV",
+       "sync in a non-Compute Shader must only sync UAV (sync_uglobal)"});
 }
 TEST_F(ValidationTest, UndefValueFail) {
-  TestCheck(L"dxil_validation\\UndefValue.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\UndefValue.hlsl", "ps_6_0",
+      {"fadd fast float %([0-9]+)"
+      },
+      {"fadd fast float undef"
+      },
+      {"Instructions should not read uninitialized value"},
+      /*bRegex*/ true);
 }
 TEST_F(ValidationTest, UpdateCounterFail) {
-  TestCheck(L"dxil_validation\\UpdateCounter.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\UpdateCounter2.hlsl", "ps_6_0",
+      {"%2 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 1)",
+       "%3 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 1)"
+      },
+      {"%2 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 -1)",
+       "%3 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 1)\n"
+       "%srvUpdate = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf1_texture_buf, i8 undef)"
+      },
+      {"BufferUpdateCounter valid only on UAV",
+       "BufferUpdateCounter valid only on structured buffers",
+       "inc of BufferUpdateCounter must be an immediate constant",
+       "RWStructuredBuffers may increment or decrement their counters, but not both"});
 }
 
 TEST_F(ValidationTest, WhenIncorrectModelThenFail) {
@@ -588,8 +878,8 @@ TEST_F(ValidationTest, GsVertexIDOutOfBound) {
 TEST_F(ValidationTest, StreamIDOutOfBound) {
   RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\SimpleGs1.hlsl", "gs_6_0",
-      "dx.op.emitStream(i32 97, i8 0)",
-      "dx.op.emitStream(i32 97, i8 1)", 
+      "dx.op.emitStream(i32 99, i8 0)",
+      "dx.op.emitStream(i32 99, i8 1)", 
       "expect StreamID between 0 , got 1");
 }
 
@@ -736,32 +1026,32 @@ TEST_F(ValidationTest, StructBufGlobalCoherentAndCounter) {
 TEST_F(ValidationTest, StructBufStrideAlign) {
     RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\struct_buf1.hlsl", "ps_6_0",
-      "!7 = !{i32 1, i32 52}",
-      "!7 = !{i32 1, i32 50}",
+      "= !{i32 1, i32 52}",
+      "= !{i32 1, i32 50}",
       "structured buffer element size must be a multiple of 4 bytes (actual size 50 bytes)");
 }
 
 TEST_F(ValidationTest, StructBufStrideOutOfBound) {
     RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\struct_buf1.hlsl", "ps_6_0",
-      "!7 = !{i32 1, i32 52}",
-      "!7 = !{i32 1, i32 2052}",
+      "= !{i32 1, i32 52}",
+      "= !{i32 1, i32 2052}",
       "structured buffer elements cannot be larger than 2048 bytes (actual size 2052 bytes)");
 }
 
 TEST_F(ValidationTest, StructBufLoadCoordinates) {
     RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\struct_buf1.hlsl", "ps_6_0",
-      "bufferLoad.f32(i32 69, %dx.types.Handle %buf1_texture_structbuf, i32 1, i32 8)",
-      "bufferLoad.f32(i32 69, %dx.types.Handle %buf1_texture_structbuf, i32 1, i32 undef)",
+      "bufferLoad.f32(i32 70, %dx.types.Handle %buf1_texture_structbuf, i32 1, i32 8)",
+      "bufferLoad.f32(i32 70, %dx.types.Handle %buf1_texture_structbuf, i32 1, i32 undef)",
       "structured buffer require 2 coordinates");
 }
 
 TEST_F(ValidationTest, StructBufStoreCoordinates) {
     RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\struct_buf1.hlsl", "ps_6_0",
-      "bufferStore.f32(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i32 0, i32 0",
-      "bufferStore.f32(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i32 0, i32 undef",
+      "bufferStore.f32(i32 71, %dx.types.Handle %buf2_UAV_structbuf, i32 0, i32 0",
+      "bufferStore.f32(i32 71, %dx.types.Handle %buf2_UAV_structbuf, i32 0, i32 undef",
       "structured buffer require 2 coordinates");
 }
 
@@ -864,17 +1154,19 @@ TEST_F(ValidationTest, PsOutputSemantic) {
 TEST_F(ValidationTest, ArrayOfSVTarget) {
     RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\targetArray.hlsl", "ps_6_0",
-      "i32 6, !\"SV_Target\", i8 9, i8 16, !32, i8 0, i32 1",
-      "i32 6, !\"SV_Target\", i8 9, i8 16, !32, i8 0, i32 2",
-      "Pixel shader output registers are not indexable.");
+      "i32 6, !\"SV_Target\", i8 9, i8 16, !([0-9]+), i8 0, i32 1",
+      "i32 6, !\"SV_Target\", i8 9, i8 16, !\\1, i8 0, i32 2",
+      "Pixel shader output registers are not indexable.",
+      /*bRegex*/true);
 }
 
 TEST_F(ValidationTest, InfiniteLog) {
     RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\intrinsic_val_imm.hlsl", "ps_6_0",
-      "op.unary.f32(i32 22, float %1)",
-      "op.unary.f32(i32 22, float 0x7FF0000000000000)",
-      "No indefinite logarithm");
+      "op.unary.f32\\(i32 23, float %[0-9+]\\)",
+      "op.unary.f32(i32 23, float 0x7FF0000000000000)",
+      "No indefinite logarithm",
+      /*bRegex*/true);
 }
 
 TEST_F(ValidationTest, InfiniteAsin) {
@@ -896,9 +1188,10 @@ TEST_F(ValidationTest, InfiniteAcos) {
 TEST_F(ValidationTest, InfiniteDdxDdy) {
     RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\intrinsic_val_imm.hlsl", "ps_6_0",
-      "op.unary.f32(i32 86, float %1)",
-      "op.unary.f32(i32 86, float 0x7FF0000000000000)",
-      "No indefinite derivative calculation");
+      "op.unary.f32\\(i32 85, float %[0-9]+\\)",
+      "op.unary.f32(i32 85, float 0x7FF0000000000000)",
+      "No indefinite derivative calculation",
+      /*bRegex*/true);
 }
 
 TEST_F(ValidationTest, IDivByZero) {
@@ -981,9 +1274,19 @@ TEST_F(ValidationTest, MultiDimArray) {
 
 TEST_F(ValidationTest, NoFunctionParam) {
   RewriteAssemblyCheckMsg(L"..\\CodeGenHLSL\\abs2.hlsl", "ps_6_0",
-                          {"define void @main()", "void ()* @main", "!5 = !{!6}"},
-                          {"define void @main(<4 x i32> %mainArg)", "void (<4 x i32>)* @main", "!5 = !{!6, !6}"},
-                          "with parameter is not permitted");
+    {"define void @main\\(\\)",               "void \\(\\)\\* @main, !([0-9]+)\\}(.*)!\\1 = !\\{!([0-9]+)\\}",  "void \\(\\)\\* @main"},
+    {"define void @main(<4 x i32> %mainArg)", "void (<4 x i32>)* @main, !\\1}\\2!\\1 = !{!\\3, !\\3}",          "void (<4 x i32>)* @main"},
+    "with parameter is not permitted",
+    /*bRegex*/true);
+}
+
+TEST_F(ValidationTest, I8Type) {
+  RewriteAssemblyCheckMsg(L"..\\CodeGenHLSL\\staticGlobals.hlsl", "ps_6_0",
+                          "%([0-9]+) = alloca \\[4 x float\\]",
+                          "%\\1 = alloca [4 x float]\n"
+                          "  %m8 = alloca i8",
+                          "I8 can only used as immediate value for intrinsic",
+    /*bRegex*/true);
 }
 
 TEST_F(ValidationTest, WhenWaveAffectsGradientThenFail) {
@@ -1044,7 +1347,7 @@ HSPerVertexData main( const uint id : SV_OutputControlPointID,\
     ",
       "hs_6_0", 
       "dx.op.storeOutput.f32(i32 5",
-      "dx.op.storePatchConstant.f32(i32 109",
+      "dx.op.storePatchConstant.f32(i32 108",
       "opcode 'StorePatchConstant' should only used in 'PatchConstant function'");
 }
 
@@ -1095,7 +1398,7 @@ HSPerVertexData main( const uint id : SV_OutputControlPointID,\
     ",
       "hs_6_0",
       "dx.op.loadInput.f32(i32 4",
-      "dx.op.loadOutputControlPoint.f32(i32 106",
+      "dx.op.loadOutputControlPoint.f32(i32 105",
       "opcode 'LoadOutputControlPoint' should only used in 'PatchConstant function'");
 }
 
@@ -1146,7 +1449,7 @@ HSPerVertexData main( const uint id : SV_OutputControlPointID,\
     ",
       "hs_6_0",
       "ret void",
-      "call i32 @dx.op.outputControlPointID.i32(i32 110)\n ret void",
+      "call i32 @dx.op.outputControlPointID.i32(i32 109)\n ret void",
       "opcode 'OutputControlPointID' should only used in 'hull function'");
 }
 
@@ -1643,13 +1946,13 @@ void main( \
     ",
     "vs_6_0",
 
-    "!{i32 1, !\"Array\", i8 5, i8 0, !([0-9]+), i8 1, i32 2, i8 1, i32 1, i8 0, null}\n"
-    "!17 = !{i32 0, i32 1}\n"
-    "!([0-9]+) = !{i32 2, !\"Value\", i8 5, i8 0, !([0-9]+), i8 1, i32 1, i8 3, i32 1, i8 1, null}",
+    {"!{i32 1, !\"Array\", i8 5, i8 0, !([0-9]+), i8 1, i32 2, i8 1, i32 1, i8 0, null}(.*)"
+    "!\\1 = !{i32 0, i32 1}\n",
+    "= !{i32 2, !\"Value\", i8 5, i8 0, !([0-9]+), i8 1, i32 1, i8 3, i32 1, i8 1, null}"},
 
-    "!{i32 1, !\"Array\", i8 5, i8 0, !\\1, i8 1, i32 2, i8 1, i32 1, i8 1, null}\n"
-    "!17 = !{i32 0, i32 1}\n"
-    "!\\2 = !{i32 2, !\"Value\", i8 5, i8 0, !\\3, i8 1, i32 1, i8 3, i32 2, i8 0, null}",
+    {"!{i32 1, !\"Array\", i8 5, i8 0, !\\1, i8 1, i32 2, i8 1, i32 1, i8 1, null}\\2"
+    "!\\1 = !{i32 0, i32 1}\n",
+    "= !{i32 2, !\"Value\", i8 5, i8 0, !\\1, i8 1, i32 1, i8 3, i32 2, i8 0, null}"},
 
     "signature element Value at location \\(2,0\\) size \\(1,3\\) overlaps another signature element.",
     /*bRegex*/true);
@@ -1657,18 +1960,12 @@ void main( \
 
 TEST_F(ValidationTest, SemMultiDepth) {
   RewriteAssemblyCheckMsg(" \
-float4 main(float4 f4 : Input, out float d0 : SV_Depth, out float d1 : SV_Target1) : SV_Target \
+float4 main(float4 f4 : Input, out float d0 : SV_Depth, out float d1 : SV_Target) : SV_Target1 \
 { d0 = f4.z; d1 = f4.w; return f4; } \
     ",
     "ps_6_0",
-
-    "!{i32 1, !\"SV_Target\", i8 9, i8 16, !([0-9]+), i8 0, i32 1, i8 1, i32 1, i8 0, null}\n"
-    "!16 = !{i32 1}\n"
-    "!([0-9]+) = !{i32 2, !\"SV_Target\", i8 9, i8 16, !([0-9]+), i8 0, i32 1, i8 4, i32 0, i8 0, null}",
-
-    "!{i32 1, !\"SV_DepthGreaterEqual\", i8 9, i8 19, !\\3, i8 0, i32 1, i8 1, i32 -1, i8 -1, null}\n"
-    "!\\2 = !{i32 2, !\"SV_Target\", i8 9, i8 16, !\\3, i8 0, i32 1, i8 4, i32 0, i8 0, null}",
-
+    {"!{i32 1, !\"SV_Target\", i8 9, i8 16, !([0-9]+), i8 0, i32 1, i8 1, i32 0, i8 0, null}"},
+    {"!{i32 1, !\"SV_DepthGreaterEqual\", i8 9, i8 19, !\\1, i8 0, i32 1, i8 1, i32 -1, i8 -1, null}"},
     "Pixel Shader only allows one type of depth semantic to be declared",
     /*bRegex*/true);
 }
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index 9f14dc592..fb4bc78b5 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -246,7 +246,7 @@ class db_dxil(object):
         for i in "ThreadId,GroupId,ThreadIdInGroup,FlattenedThreadIdInGroup".split(","):
             self.name_idx[i].category = "Compute shader"
             self.name_idx[i].shader_models = "c"
-        for i in "EmitStream,CutStream,EmitThenCutStream".split(","):
+        for i in "EmitStream,CutStream,EmitThenCutStream,GSInstanceID".split(","):
             self.name_idx[i].category = "Geometry shader"
             self.name_idx[i].shader_models = "g"
         for i in "LoadOutputControlPoint,LoadPatchConstant".split(","):
@@ -262,9 +262,6 @@ class db_dxil(object):
             self.name_idx[i].category = "Double precision"
         for i in "CycleCounterLegacy".split(","):
             self.name_idx[i].category = "Other"
-        for i in "GSInstanceID".split(","):
-            self.name_idx[i].category = "GS"
-            self.name_idx[i].shader_models = "g"
         for i in "LegacyF32ToF16,LegacyF16ToF32".split(","):
             self.name_idx[i].category = "Legacy floating-point"
         for i in self.instr:
@@ -360,39 +357,45 @@ class db_dxil(object):
         # overload types are a string of (v)oid, (h)alf, (f)loat, (d)ouble, (1)-bit, (8)-bit, (w)ord, (i)nt, (l)ong
         self.opcode_param = db_dxil_param(1, "i32", "opcode", "DXIL opcode")
         retvoid_param = db_dxil_param(0, "v", "", "no return value")
-        self.add_dxil_op("TempRegLoad", 0, "TempRegLoad", "helper load operation", "hfwi", "ro", [
+        next_op_idx = 0
+        self.add_dxil_op("TempRegLoad", next_op_idx, "TempRegLoad", "helper load operation", "hfwi", "ro", [
             db_dxil_param(0, "$o", "", "register value"),
             db_dxil_param(2, "u32", "index", "linearized register index")])
-        self.add_dxil_op("TempRegStore", 1, "TempRegStore", "helper store operation", "hfwi", "", [
+        next_op_idx += 1
+        self.add_dxil_op("TempRegStore", next_op_idx, "TempRegStore", "helper store operation", "hfwi", "", [
             retvoid_param,
             db_dxil_param(2, "u32", "index", "linearized register index"),
             db_dxil_param(3, "$o", "value", "value to store")])
-        self.add_dxil_op("MinPrecXRegLoad", 2, "MinPrecXRegLoad", "helper load operation for minprecision", "hw", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("MinPrecXRegLoad", next_op_idx, "MinPrecXRegLoad", "helper load operation for minprecision", "hw", "ro", [
             db_dxil_param(0, "$o", "", "register value"),
             db_dxil_param(2, "pf32", "regIndex", "pointer to indexable register"),
             db_dxil_param(3, "i32", "index", "index"),
             db_dxil_param(4, "u8", "component", "component")])
-        self.add_dxil_op("MinPrecXRegStore", 3, "MinPrecXRegStore", "helper store operation for minprecision", "hw", "", [
+        next_op_idx += 1
+        self.add_dxil_op("MinPrecXRegStore", next_op_idx, "MinPrecXRegStore", "helper store operation for minprecision", "hw", "", [
             retvoid_param,
             db_dxil_param(2, "pf32", "regIndex", "pointer to indexable register"),
             db_dxil_param(3, "i32", "index", "index"),
             db_dxil_param(4, "u8", "component", "component"),
             db_dxil_param(5, "$o", "value", "value to store")])
-        self.add_dxil_op("LoadInput", 4, "LoadInput", "loads the value from shader input", "hfwi", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("LoadInput", next_op_idx, "LoadInput", "loads the value from shader input", "hfwi", "rn", [
             db_dxil_param(0, "$o", "", "input value"),
             db_dxil_param(2, "u32", "inputSigId", "input signature element ID"),
             db_dxil_param(3, "u32", "rowIndex", "row index relative to element"),
             db_dxil_param(4, "u8", "colIndex", "column index relative to element"),
             db_dxil_param(5, "i32", "gsVertexAxis", "gsVertexAxis")])
-        self.add_dxil_op("StoreOutput", 5, "StoreOutput", "stores the value to shader output", "hfwi", "", [ # note, cannot store bit even though load supports it
+        next_op_idx += 1
+        self.add_dxil_op("StoreOutput", next_op_idx, "StoreOutput", "stores the value to shader output", "hfwi", "", [ # note, cannot store bit even though load supports it
             retvoid_param,
             db_dxil_param(2, "u32", "outputtSigId", "output signature element ID"),
             db_dxil_param(3, "u32", "rowIndex", "row index relative to element"),
             db_dxil_param(4, "u8", "colIndex", "column index relative to element"),
             db_dxil_param(5, "$o", "value", "value to store")])
+        next_op_idx += 1
 
         # Unary float operations are regular.
-        next_op_idx = 6
         for i in "FAbs,Saturate".split(","):
             self.add_dxil_op(i, next_op_idx, "Unary", "returns the " + i, "hfd", "rn", [
                 db_dxil_param(0, "$o", "", "operation result"),
@@ -403,12 +406,11 @@ class db_dxil(object):
                 db_dxil_param(0, "i1", "", "operation result"),
                 db_dxil_param(2, "$o", "value", "input value")])
             next_op_idx += 1
-        for i in "Cos,Sin,Tan,Acos,Asin,Atan,Hcos,Hsin,Exp,Frc,Log,Sqrt,Rsqrt,Round_ne,Round_ni,Round_pi,Round_z".split(","):
+        for i in "Cos,Sin,Tan,Acos,Asin,Atan,Hcos,Hsin,Htan,Exp,Frc,Log,Sqrt,Rsqrt,Round_ne,Round_ni,Round_pi,Round_z".split(","):
             self.add_dxil_op(i, next_op_idx, "Unary", "returns the " + i, "hf", "rn", [
                 db_dxil_param(0, "$o", "", "operation result"),
                 db_dxil_param(2, "$o", "value", "input value")])
             next_op_idx += 1
-        # HTan is in this category but is out of order.
 
         # Unary int operations are regular.
         for i in "Bfrev".split(","):
@@ -459,20 +461,20 @@ class db_dxil(object):
             next_op_idx += 1
 
         # Tertiary float.
-        assert next_op_idx == 47, "next operation index is %d rather than 47 and thus opcodes are broken" % next_op_idx
-        self.add_dxil_op("FMad", 47, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", "hfd", "rn", [
+        self.add_dxil_op("FMad", next_op_idx, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", "hfd", "rn", [
             db_dxil_param(0, "$o", "", "the fused multiply-addition of parameters a * b + c"),
             db_dxil_param(2, "$o", "a", "first value for FMA, the first factor"),
             db_dxil_param(3, "$o", "b", "second value for FMA, the second factor"),
             db_dxil_param(4, "$o", "c", "third value for FMA, the addend")])
-        self.add_dxil_op("Fma", 48, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", "d", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("Fma", next_op_idx, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", "d", "rn", [
             db_dxil_param(0, "$o", "", "the double-precision fused multiply-addition of parameters a * b + c, accurate to 0.5 units of least precision (ULP)"),
             db_dxil_param(2, "$o", "a", "first value for FMA, the first factor"),
             db_dxil_param(3, "$o", "b", "second value for FMA, the second factor"),
             db_dxil_param(4, "$o", "c", "third value for FMA, the addend")])
+        next_op_idx += 1
 
         # Tertiary int.
-        next_op_idx = 49
         for i in "IMad,UMad".split(","):
             self.add_dxil_op(i, next_op_idx, "Tertiary", "performs an integral " + i, "wil", "rn", [
                 db_dxil_param(0, "$o", "", "the operation result"),
@@ -489,22 +491,23 @@ class db_dxil(object):
             next_op_idx += 1
 
         # Quaternary
-        assert next_op_idx == 54, "next operation index is %d rather than 54 and thus opcodes are broken" % next_op_idx
-        self.add_dxil_op("Bfi", 54, "Quaternary", "given a bit range from the LSB of a number, places that number of bits in another number at any offset", "i", "rn", [
+        self.add_dxil_op("Bfi", next_op_idx, "Quaternary", "given a bit range from the LSB of a number, places that number of bits in another number at any offset", "i", "rn", [
             db_dxil_param(0, "$o", "", "the operation result"),
             db_dxil_param(2, "$o", "width", "the bitfield width to take from the value"),
             db_dxil_param(3, "$o", "offset", "the bitfield offset to replace in the value"),
             db_dxil_param(4, "$o", "value", "the number the bits are taken from"),
             db_dxil_param(5, "$o", "replaceCount", "the number of bits to be replaced")])
+        next_op_idx += 1
 
         # Dot.
-        self.add_dxil_op("Dot2", 55, "Dot2", "two-dimensional vector dot-product", "hf", "rn", [
+        self.add_dxil_op("Dot2", next_op_idx, "Dot2", "two-dimensional vector dot-product", "hf", "rn", [
             db_dxil_param(0, "$o", "", "the operation result"),
             db_dxil_param(2, "$o", "ax", "the first component of the first vector"),
             db_dxil_param(3, "$o", "ay", "the second component of the first vector"),
             db_dxil_param(4, "$o", "bx", "the first component of the second vector"),
             db_dxil_param(5, "$o", "by", "the second component of the second vector")])
-        self.add_dxil_op("Dot3", 56, "Dot3", "three-dimensional vector dot-product", "hf", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("Dot3", next_op_idx, "Dot3", "three-dimensional vector dot-product", "hf", "rn", [
             db_dxil_param(0, "$o", "", "the operation result"),
             db_dxil_param(2, "$o", "ax", "the first component of the first vector"),
             db_dxil_param(3, "$o", "ay", "the second component of the first vector"),
@@ -512,7 +515,8 @@ class db_dxil(object):
             db_dxil_param(5, "$o", "bx", "the first component of the second vector"),
             db_dxil_param(6, "$o", "by", "the second component of the second vector"),
             db_dxil_param(7, "$o", "bz", "the third component of the second vector")])
-        self.add_dxil_op("Dot4", 57, "Dot4", "four-dimensional vector dot-product", "hf", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("Dot4", next_op_idx, "Dot4", "four-dimensional vector dot-product", "hf", "rn", [
             db_dxil_param(0, "$o", "", "the operation result"),
             db_dxil_param(2, "$o", "ax", "the first component of the first vector"),
             db_dxil_param(3, "$o", "ay", "the second component of the first vector"),
@@ -522,24 +526,28 @@ class db_dxil(object):
             db_dxil_param(7, "$o", "by", "the second component of the second vector"),
             db_dxil_param(8, "$o", "bz", "the third component of the second vector"),
             db_dxil_param(9, "$o", "bw", "the fourth component of the second vector")])
+        next_op_idx += 1
 
         # Resources.
-        self.add_dxil_op("CreateHandle", 58, "CreateHandle", "creates the handle to a resource", "v", "ro", [
+        self.add_dxil_op("CreateHandle", next_op_idx, "CreateHandle", "creates the handle to a resource", "v", "ro", [
             db_dxil_param(0, "res", "", "the handle to the resource"),
             db_dxil_param(2, "i8", "resourceClass", "the class of resource to create (SRV, UAV, CBuffer, Sampler)", is_const=True), # maps to DxilResourceBase::Class
             db_dxil_param(3, "i32", "rangeId", "range identifier for resource"),
             db_dxil_param(4, "i32", "index", "zero-based index into range"),
             db_dxil_param(5, "i1", "nonUniformIndex", "non-uniform resource index", is_const=True)])
-        self.add_dxil_op("CBufferLoad", 59, "CBufferLoad", "loads a value from a constant buffer resource", "hfd8wil", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("CBufferLoad", next_op_idx, "CBufferLoad", "loads a value from a constant buffer resource", "hfd8wil", "ro", [
             db_dxil_param(0, "$o", "", "the value for the constant buffer variable"),
             db_dxil_param(2, "res", "handle", "cbuffer handle"),
             db_dxil_param(3, "u32", "byteOffset", "linear byte offset of value"),
             db_dxil_param(4, "u32", "alignment", "load access alignment", is_const=True)])
-        self.add_dxil_op("CBufferLoadLegacy", 60, "CBufferLoadLegacy", "loads a value from a constant buffer resource", "hfdwi", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("CBufferLoadLegacy", next_op_idx, "CBufferLoadLegacy", "loads a value from a constant buffer resource", "hfdwi", "ro", [
             db_dxil_param(0, "$cb", "", "the value for the constant buffer variable"),
             db_dxil_param(2, "res", "handle", "cbuffer handle"),
             db_dxil_param(3, "u32", "regIndex", "0-based index into cbuffer instance")])
-        self.add_dxil_op("Sample", 61, "Sample", "samples a texture", "hf", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("Sample", next_op_idx, "Sample", "samples a texture", "hf", "ro", [
             db_dxil_param(0, "$r", "", "the sampled value"),
             db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
             db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
@@ -551,7 +559,8 @@ class db_dxil(object):
             db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
             db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
             db_dxil_param(11, "f", "clamp", "clamp value")])
-        self.add_dxil_op("SampleBias", 62, "SampleBias", "samples a texture after applying the input bias to the mipmap level", "hf", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("SampleBias", next_op_idx, "SampleBias", "samples a texture after applying the input bias to the mipmap level", "hf", "ro", [
             db_dxil_param(0, "$r", "", "the sampled value"),
             db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
             db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
@@ -564,7 +573,8 @@ class db_dxil(object):
             db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
             db_dxil_param(11, "f", "bias", "bias value"),
             db_dxil_param(12, "f", "clamp", "clamp value")])
-        self.add_dxil_op("SampleLevel", 63, "SampleLevel", "samples a texture using a mipmap-level offset", "hf", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("SampleLevel", next_op_idx, "SampleLevel", "samples a texture using a mipmap-level offset", "hf", "ro", [
             db_dxil_param(0, "$r", "", "the sampled value"),
             db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
             db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
@@ -576,7 +586,8 @@ class db_dxil(object):
             db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
             db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
             db_dxil_param(11, "f", "LOD", "level of detail, biggest map if less than or equal to zero; fraction used to interpolate across levels")])
-        self.add_dxil_op("SampleGrad", 64, "SampleGrad", "samples a texture using a gradient to influence the way the sample location is calculated", "hf", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("SampleGrad", next_op_idx, "SampleGrad", "samples a texture using a gradient to influence the way the sample location is calculated", "hf", "ro", [
             db_dxil_param(0, "$r", "", "the sampled value"),
             db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
             db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
@@ -594,7 +605,8 @@ class db_dxil(object):
             db_dxil_param(15, "f", "ddy1", "rate of change of the texture coordinate in the y direction"),
             db_dxil_param(16, "f", "ddy2", "rate of change of the texture coordinate in the y direction"),
             db_dxil_param(17, "f", "clamp", "clamp value")])
-        self.add_dxil_op("SampleCmp", 65, "SampleCmp", "samples a texture and compares a single component against the specified comparison value", "hf", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("SampleCmp", next_op_idx, "SampleCmp", "samples a texture and compares a single component against the specified comparison value", "hf", "ro", [
             db_dxil_param(0, "$r", "", "the value for the constant buffer variable"),
             db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
             db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
@@ -607,7 +619,8 @@ class db_dxil(object):
             db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
             db_dxil_param(11, "f", "compareValue", "the value to compare with"),
             db_dxil_param(12, "f", "clamp", "clamp value")])
-        self.add_dxil_op("SampleCmpLevelZero", 66, "SampleCmpLevelZero", "samples a texture and compares a single component against the specified comparison value", "hf", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("SampleCmpLevelZero", next_op_idx, "SampleCmpLevelZero", "samples a texture and compares a single component against the specified comparison value", "hf", "ro", [
             db_dxil_param(0, "$r", "", "the value for the constant buffer variable"),
             db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
             db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
@@ -619,7 +632,8 @@ class db_dxil(object):
             db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
             db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
             db_dxil_param(11, "f", "compareValue", "the value to compare with")])
-        self.add_dxil_op("TextureLoad", 67, "TextureLoad", "reads texel data without any filtering or sampling", "hfwi", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("TextureLoad", next_op_idx, "TextureLoad", "reads texel data without any filtering or sampling", "hfwi", "ro", [
             db_dxil_param(0, "$r", "", "the loaded value"),
             db_dxil_param(2, "res", "srv", "handle of SRV or UAV to sample"),
             db_dxil_param(3, "i32", "mipLevelOrSampleCount", "sample count for Texture2DMS, mip level otherwise"),
@@ -629,7 +643,8 @@ class db_dxil(object):
             db_dxil_param(7, "i32", "offset0", "optional offset"),
             db_dxil_param(8, "i32", "offset1", "optional offset"),
             db_dxil_param(9, "i32", "offset2", "optional offset")])
-        self.add_dxil_op("TextureStore", 68, "TextureStore", "reads texel data without any filtering or sampling", "hfwi", "", [
+        next_op_idx += 1
+        self.add_dxil_op("TextureStore", next_op_idx, "TextureStore", "reads texel data without any filtering or sampling", "hfwi", "", [
             db_dxil_param(0, "v", "", ""),
             db_dxil_param(2, "res", "srv", "handle of UAV to store to"),
             db_dxil_param(3, "i32", "coord0", "coordinate"),
@@ -640,12 +655,14 @@ class db_dxil(object):
             db_dxil_param(8, "$o", "value2", "value"),
             db_dxil_param(9, "$o", "value3", "value"),
             db_dxil_param(10,"i8", "mask", "written value mask")])
-        self.add_dxil_op("BufferLoad", 69, "BufferLoad", "reads from a TypedBuffer", "hfwil", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("BufferLoad", next_op_idx, "BufferLoad", "reads from a TypedBuffer", "hfwil", "ro", [
             db_dxil_param(0, "$r", "", "the loaded value"),
             db_dxil_param(2, "res", "srv", "handle of TypedBuffer SRV to sample"),
             db_dxil_param(3, "i32", "index", "element index"),
             db_dxil_param(4, "i32", "wot", "coordinate")])
-        self.add_dxil_op("BufferStore", 70, "BufferStore", "writes to a RWTypedBuffer", "hfwil", "", [
+        next_op_idx += 1
+        self.add_dxil_op("BufferStore", next_op_idx, "BufferStore", "writes to a RWTypedBuffer", "hfwil", "", [
             db_dxil_param(0, "v", "", ""),
             db_dxil_param(2, "res", "uav", "handle of UAV to store to"),
             db_dxil_param(3, "i32", "coord0", "coordinate in elements"),
@@ -655,18 +672,22 @@ class db_dxil(object):
             db_dxil_param(7, "$o", "value2", "value"),
             db_dxil_param(8, "$o", "value3", "value"),
             db_dxil_param(9, "i8", "mask", "written value mask")])
-        self.add_dxil_op("BufferUpdateCounter", 71, "BufferUpdateCounter", "atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV", "v", "", [
+        next_op_idx += 1
+        self.add_dxil_op("BufferUpdateCounter", next_op_idx, "BufferUpdateCounter", "atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV", "v", "", [
             db_dxil_param(0, "i32", "", "the new value in the buffer"),
             db_dxil_param(2, "res", "uav", "handle to a structured buffer UAV with the count or append flag"),
             db_dxil_param(3, "i8", "inc", "1 to increase, 0 to decrease")])
-        self.add_dxil_op("CheckAccessFullyMapped", 72, "CheckAccessFullyMapped", "determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource", "i", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("CheckAccessFullyMapped", next_op_idx, "CheckAccessFullyMapped", "determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource", "i", "ro", [
             db_dxil_param(0, "i1", "", "nonzero if all values accessed mapped tiles in a tiled resource"),
             db_dxil_param(2, "u32", "status", "status result from the Sample, Gather or Load operation")])
-        self.add_dxil_op("GetDimensions", 73, "GetDimensions", "gets texture size information", "v", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("GetDimensions", next_op_idx, "GetDimensions", "gets texture size information", "v", "ro", [
             db_dxil_param(0, "dims", "", "dimension information for texture"),
             db_dxil_param(2, "res", "handle", "resource handle to query"),
             db_dxil_param(3, "i32", "mipLevel", "mip level to query")])
-        self.add_dxil_op("TextureGather", 74, "TextureGather", "gathers the four texels that would be used in a bi-linear filtering operation", "fi", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("TextureGather", next_op_idx, "TextureGather", "gathers the four texels that would be used in a bi-linear filtering operation", "fi", "ro", [
             db_dxil_param(0, "$r", "", "dimension information for texture"),
             db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
             db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
@@ -677,7 +698,8 @@ class db_dxil(object):
             db_dxil_param(8, "i32", "offset0", "optional offset, applicable to Texture1D, Texture1DArray, and as part of offset1"),
             db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
             db_dxil_param(10, "i32", "channel", "channel to sample")])
-        self.add_dxil_op("TextureGatherCmp", 75, "TextureGatherCmp", "same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp", "fi", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("TextureGatherCmp", next_op_idx, "TextureGatherCmp", "same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp", "fi", "ro", [
             db_dxil_param(0, "$r", "", "gathered texels"),
             db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
             db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
@@ -689,20 +711,23 @@ class db_dxil(object):
             db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
             db_dxil_param(10, "i32", "channel", "channel to sample"),
             db_dxil_param(11, "f", "compareVale", "value to compare with")])
-        self.add_dxil_op_reserved("ToDelete5", 76)
-        self.add_dxil_op_reserved("ToDelete6", 77)
-        self.add_dxil_op("Texture2DMSGetSamplePosition", 78, "Texture2DMSGetSamplePosition", "gets the position of the specified sample", "v", "ro", [
+        next_op_idx += 1
+
+        self.add_dxil_op("Texture2DMSGetSamplePosition", next_op_idx, "Texture2DMSGetSamplePosition", "gets the position of the specified sample", "v", "ro", [
             db_dxil_param(0, "SamplePos", "", "sample position"),
             db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
             db_dxil_param(3, "i32", "index", "zero-based sample index")])
-        self.add_dxil_op("RenderTargetGetSamplePosition", 79, "RenderTargetGetSamplePosition", "gets the position of the specified sample", "v", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("RenderTargetGetSamplePosition", next_op_idx, "RenderTargetGetSamplePosition", "gets the position of the specified sample", "v", "ro", [
             db_dxil_param(0, "SamplePos", "", "sample position"),
             db_dxil_param(2, "i32", "index", "zero-based sample index")])
-        self.add_dxil_op("RenderTargetGetSampleCount", 80, "RenderTargetGetSampleCount", "gets the number of samples for a render target", "v", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("RenderTargetGetSampleCount", next_op_idx, "RenderTargetGetSampleCount", "gets the number of samples for a render target", "v", "ro", [
             db_dxil_param(0, "u32", "", "number of sampling locations for a render target")])
+        next_op_idx += 1
 
         # Atomics. Note that on TGSM, atomics are performed with LLVM instructions.
-        self.add_dxil_op("AtomicBinOp", 81, "AtomicBinOp", "performs an atomic operation on two operands", "i", "", [
+        self.add_dxil_op("AtomicBinOp", next_op_idx, "AtomicBinOp", "performs an atomic operation on two operands", "i", "", [
             db_dxil_param(0, "i32", "", "the original value in the location updated"),
             db_dxil_param(2, "res", "handle", "typed int or uint UAV handle"),
             db_dxil_param(3, "i32", "atomicOp", "atomic operation as per DXIL::AtomicBinOpCode"),
@@ -710,7 +735,8 @@ class db_dxil(object):
             db_dxil_param(5, "i32", "offset1", "offset"),
             db_dxil_param(6, "i32", "offset2", "offset"),
             db_dxil_param(7, "i32", "newValue", "new value")])
-        self.add_dxil_op("AtomicCompareExchange", 82, "AtomicCompareExchange", "atomic compare and exchange to memory", "i", "", [
+        next_op_idx += 1
+        self.add_dxil_op("AtomicCompareExchange", next_op_idx, "AtomicCompareExchange", "atomic compare and exchange to memory", "i", "", [
             db_dxil_param(0, "i32", "", "the original value in the location updated"),
             db_dxil_param(2, "res", "handle", "typed int or uint UAV handle"),
             db_dxil_param(3, "i32", "offset0", "offset in elements"),
@@ -718,14 +744,16 @@ class db_dxil(object):
             db_dxil_param(5, "i32", "offset2", "offset"),
             db_dxil_param(6, "i32", "compareValue", "value to compare for exchange"),
             db_dxil_param(7, "i32", "newValue", "new value")])
+        next_op_idx += 1
 
         # Synchronization.
-        self.add_dxil_op("Barrier", 83, "Barrier", "inserts a memory barrier in the shader", "v", "", [
+        self.add_dxil_op("Barrier", next_op_idx, "Barrier", "inserts a memory barrier in the shader", "v", "", [
             retvoid_param,
             db_dxil_param(2, "i32", "barrierMode", "a mask of DXIL::BarrierMode values", is_const=True)])
+        next_op_idx += 1
 
         # Pixel shader
-        self.add_dxil_op("CalculateLOD", 84, "CalculateLOD", "calculates the level of detail", "f", "ro", [
+        self.add_dxil_op("CalculateLOD", next_op_idx, "CalculateLOD", "calculates the level of detail", "f", "ro", [
             db_dxil_param(0, "f", "", "level of detail"),
             db_dxil_param(2, "res", "handle", "resource handle"),
             db_dxil_param(3, "res", "sampler", "sampler handle"),
@@ -733,147 +761,184 @@ class db_dxil(object):
             db_dxil_param(5, "f", "coord1", "coordinate"),
             db_dxil_param(6, "f", "coord2", "coordinate"),
             db_dxil_param(7, "i1", "clamped", "1 if clampled LOD should be calculated, 0 for unclamped")])
-        self.add_dxil_op("Discard", 85, "Discard", "discard the current pixel", "v", "", [
+        next_op_idx += 1
+        self.add_dxil_op("Discard", next_op_idx, "Discard", "discard the current pixel", "v", "", [
             retvoid_param,
             db_dxil_param(2, "i1", "condition", "condition for conditional discard")])
-        self.add_dxil_op("DerivCoarseX", 86, "Unary", "computes the rate of change of components per stamp", "hf", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("DerivCoarseX", next_op_idx, "Unary", "computes the rate of change of components per stamp", "hf", "rn", [
             db_dxil_param(0, "$o", "", "rate of change in value with regards to RenderTarget x direction"),
             db_dxil_param(2, "$o", "value", "input to rate of change")])
-        self.add_dxil_op("DerivCoarseY", 87, "Unary", "computes the rate of change of components per stamp", "hf", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("DerivCoarseY", next_op_idx, "Unary", "computes the rate of change of components per stamp", "hf", "rn", [
             db_dxil_param(0, "$o", "", "rate of change in value with regards to RenderTarget y direction"),
             db_dxil_param(2, "$o", "value", "input to rate of change")])
-        self.add_dxil_op("DerivFineX", 88, "Unary", "computes the rate of change of components per pixel", "hf", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("DerivFineX", next_op_idx, "Unary", "computes the rate of change of components per pixel", "hf", "rn", [
             db_dxil_param(0, "$o", "", "rate of change in value with regards to RenderTarget x direction"),
             db_dxil_param(2, "$o", "value", "input to rate of change")])
-        self.add_dxil_op("DerivFineY", 89, "Unary", "computes the rate of change of components per pixel", "hf", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("DerivFineY", next_op_idx, "Unary", "computes the rate of change of components per pixel", "hf", "rn", [
             db_dxil_param(0, "$o", "", "rate of change in value with regards to RenderTarget y direction"),
             db_dxil_param(2, "$o", "value", "input to rate of change")])
-        self.add_dxil_op("EvalSnapped", 90, "EvalSnapped", "evaluates an input attribute at pixel center with an offset", "hf", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("EvalSnapped", next_op_idx, "EvalSnapped", "evaluates an input attribute at pixel center with an offset", "hf", "rn", [
             db_dxil_param(0, "$o", "", "result"),
             db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
             db_dxil_param(3, "i32", "inputRowIndex", "row index of an input attribute"),
             db_dxil_param(4, "i8",  "inputColIndex", "column index of an input attribute"),
             db_dxil_param(5, "i32", "offsetX", "2D offset from the pixel center using a 16x16 grid"),
             db_dxil_param(6, "i32", "offsetY", "2D offset from the pixel center using a 16x16 grid")])
-        self.add_dxil_op("EvalSampleIndex", 91, "EvalSampleIndex", "evaluates an input attribute at a sample location", "hf", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("EvalSampleIndex", next_op_idx, "EvalSampleIndex", "evaluates an input attribute at a sample location", "hf", "rn", [
             db_dxil_param(0, "$o", "", "result"),
             db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
             db_dxil_param(3, "i32", "inputRowIndex", "row index of an input attribute"),
             db_dxil_param(4, "i8",  "inputColIndex", "column index of an input attribute"),
             db_dxil_param(5, "i32", "sampleIndex", "sample location")])
-        self.add_dxil_op("EvalCentroid", 92, "EvalCentroid", "evaluates an input attribute at pixel center", "hf", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("EvalCentroid", next_op_idx, "EvalCentroid", "evaluates an input attribute at pixel center", "hf", "rn", [
             db_dxil_param(0, "$o", "", "result"),
             db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
             db_dxil_param(3, "i32", "inputRowIndex", "row index of an input attribute"),
             db_dxil_param(4, "i8",  "inputColIndex", "column index of an input attribute")])
+        next_op_idx += 1
+        self.add_dxil_op("SampleIndex", next_op_idx, "SampleIndex", "returns the sample index in a sample-frequency pixel shader", "i", "rn", [
+            db_dxil_param(0, "i32", "", "result")])
+        next_op_idx += 1
+        self.add_dxil_op("Coverage", next_op_idx, "Coverage", "returns the coverage mask input in a pixel shader", "i", "rn", [
+            db_dxil_param(0, "i32", "", "result")])
+        next_op_idx += 1
+        self.add_dxil_op("InnerCoverage", next_op_idx, "InnerCoverage", "returns underestimated coverage input from conservative rasterization in a pixel shader", "i", "rn", [
+            db_dxil_param(0, "i32", "", "result")])
+        next_op_idx += 1
 
         # Compute shader.
-        self.add_dxil_op("ThreadId", 93, "ThreadId", "reads the thread ID", "i", "rn", [
+        self.add_dxil_op("ThreadId", next_op_idx, "ThreadId", "reads the thread ID", "i", "rn", [
             db_dxil_param(0, "i32", "", "thread ID component"),
             db_dxil_param(2, "i32", "component", "component to read (x,y,z)")])
-        self.add_dxil_op("GroupId", 94, "GroupId", "reads the group ID (SV_GroupID)", "i", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("GroupId", next_op_idx, "GroupId", "reads the group ID (SV_GroupID)", "i", "rn", [
             db_dxil_param(0, "i32", "", "group ID component"),
             db_dxil_param(2, "i32", "component", "component to read")])
-        self.add_dxil_op("ThreadIdInGroup", 95, "ThreadIdInGroup", "reads the thread ID within the group (SV_GroupThreadID)", "i", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("ThreadIdInGroup", next_op_idx, "ThreadIdInGroup", "reads the thread ID within the group (SV_GroupThreadID)", "i", "rn", [
             db_dxil_param(0, "i32", "", "thread ID in group component"),
             db_dxil_param(2, "i32", "component", "component to read (x,y,z)")])
-        self.add_dxil_op("FlattenedThreadIdInGroup", 96, "FlattenedThreadIdInGroup", "provides a flattened index for a given thread within a given group (SV_GroupIndex)", "i", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("FlattenedThreadIdInGroup", next_op_idx, "FlattenedThreadIdInGroup", "provides a flattened index for a given thread within a given group (SV_GroupIndex)", "i", "rn", [
             db_dxil_param(0, "i32", "", "result")])
+        next_op_idx += 1
 
         # Geometry shader
-        self.add_dxil_op("EmitStream", 97, "EmitStream", "emits a vertex to a given stream", "v", "", [
+        self.add_dxil_op("EmitStream", next_op_idx, "EmitStream", "emits a vertex to a given stream", "v", "", [
             retvoid_param,
             db_dxil_param(2, "i8", "streamId", "target stream ID for operation")])
-        self.add_dxil_op("CutStream", 98, "CutStream", "completes the current primitive topology at the specified stream", "v", "", [
+        next_op_idx += 1
+        self.add_dxil_op("CutStream", next_op_idx, "CutStream", "completes the current primitive topology at the specified stream", "v", "", [
             retvoid_param,
             db_dxil_param(2, "i8", "streamId", "target stream ID for operation")])
-        self.add_dxil_op("EmitThenCutStream", 99, "EmitThenCutStream", "equivalent to an EmitStream followed by a CutStream", "v", "", [
+        next_op_idx += 1
+        self.add_dxil_op("EmitThenCutStream", next_op_idx, "EmitThenCutStream", "equivalent to an EmitStream followed by a CutStream", "v", "", [
             retvoid_param,
             db_dxil_param(2, "i8", "streamId", "target stream ID for operation")])
+        next_op_idx += 1
+        self.add_dxil_op("GSInstanceID", next_op_idx, "GSInstanceID", "GSInstanceID", "i", "rn", [
+            db_dxil_param(0, "i32", "", "result")])
+        next_op_idx += 1
 
         # Double precision
-        self.add_dxil_op("MakeDouble", 100, "MakeDouble", "creates a double value", "d", "rn", [
+        self.add_dxil_op("MakeDouble", next_op_idx, "MakeDouble", "creates a double value", "d", "rn", [
             db_dxil_param(0, "d", "", "result"),
             db_dxil_param(2, "i32", "lo", "low part of double"),
             db_dxil_param(3, "i32", "hi", "high part of double")])
-        self.add_dxil_op_reserved("ToDelete1", 101)
-        self.add_dxil_op_reserved("ToDelete2", 102)
-        self.add_dxil_op("SplitDouble", 103, "SplitDouble", "splits a double into low and high parts", "d", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("SplitDouble", next_op_idx, "SplitDouble", "splits a double into low and high parts", "d", "rn", [
             db_dxil_param(0, "splitdouble", "", "result"),
             db_dxil_param(2, "d", "value", "value to split")])
-        self.add_dxil_op_reserved("ToDelete3", 104)
-        self.add_dxil_op_reserved("ToDelete4", 105)
+        next_op_idx += 1
 
         # Domain & Hull shader.
-        self.add_dxil_op("LoadOutputControlPoint", 106, "LoadOutputControlPoint", "LoadOutputControlPoint", "hfwi", "rn", [
+        self.add_dxil_op("LoadOutputControlPoint", next_op_idx, "LoadOutputControlPoint", "LoadOutputControlPoint", "hfwi", "rn", [
             db_dxil_param(0, "$o", "", "result"),
             db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
             db_dxil_param(3, "i32", "row", "row, relative to the element"),
             db_dxil_param(4, "i8", "col", "column, relative to the element"),
             db_dxil_param(5, "i32", "index", "vertex/point index")])
-        self.add_dxil_op("LoadPatchConstant", 107, "LoadPatchConstant", "LoadPatchConstant", "hfwi", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("LoadPatchConstant", next_op_idx, "LoadPatchConstant", "LoadPatchConstant", "hfwi", "rn", [
             db_dxil_param(0, "$o", "", "result"),
             db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
             db_dxil_param(3, "i32", "row", "row, relative to the element"),
             db_dxil_param(4, "i8", "col", "column, relative to the element")])
+        next_op_idx += 1
 
         # Domain shader.
-        self.add_dxil_op("DomainLocation", 108, "DomainLocation", "DomainLocation", "f", "rn", [
+        self.add_dxil_op("DomainLocation", next_op_idx, "DomainLocation", "DomainLocation", "f", "rn", [
             db_dxil_param(0, "f", "", "result"),
             db_dxil_param(2, "i8", "component", "input", is_const=True)])
+        next_op_idx += 1
 
         # Hull shader.
-        self.add_dxil_op("StorePatchConstant", 109, "StorePatchConstant", "StorePatchConstant", "hfwi", "", [
+        self.add_dxil_op("StorePatchConstant", next_op_idx, "StorePatchConstant", "StorePatchConstant", "hfwi", "", [
             retvoid_param,
             db_dxil_param(2, "i32", "outputSigID", "output signature element ID"),
             db_dxil_param(3, "i32", "row", "row, relative to the element"),
             db_dxil_param(4, "i8", "col", "column, relative to the element"),
             db_dxil_param(5, "$o", "value", "value to store")])
-        self.add_dxil_op("OutputControlPointID", 110, "OutputControlPointID", "OutputControlPointID", "i", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("OutputControlPointID", next_op_idx, "OutputControlPointID", "OutputControlPointID", "i", "rn", [
             db_dxil_param(0, "i32", "", "result")])
-        self.add_dxil_op("PrimitiveID", 111, "PrimitiveID", "PrimitiveID", "i", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("PrimitiveID", next_op_idx, "PrimitiveID", "PrimitiveID", "i", "rn", [
             db_dxil_param(0, "i32", "", "result")])
+        next_op_idx += 1
 
-        self.add_dxil_op("CycleCounterLegacy", 112, "CycleCounterLegacy", "CycleCounterLegacy", "v", "rn", [
+        self.add_dxil_op("CycleCounterLegacy", next_op_idx, "CycleCounterLegacy", "CycleCounterLegacy", "v", "rn", [
             db_dxil_param(0, "twoi32", "", "result")])
+        next_op_idx += 1
             
-        self.add_dxil_op("Htan", 113, "Unary", "returns the hyperbolic tangent of the specified value", "hf", "rn", [
-            db_dxil_param(0, "$o", "", "operation result"),
-            db_dxil_param(2, "$o", "value", "input value in radians")])
-
         # Add wave intrinsics.
-        self.add_dxil_op_reserved("WaveCaptureReserved", 114)
-        self.add_dxil_op("WaveIsFirstLane", 115, "WaveIsFirstLane", "returns 1 for the first lane in the wave", "v", "ro", [
+        self.add_dxil_op("WaveIsFirstLane", next_op_idx, "WaveIsFirstLane", "returns 1 for the first lane in the wave", "v", "ro", [
             db_dxil_param(0, "i1", "", "operation result")])
-        self.add_dxil_op("WaveGetLaneIndex", 116, "WaveGetLaneIndex", "returns the index of the current lane in the wave", "v", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("WaveGetLaneIndex", next_op_idx, "WaveGetLaneIndex", "returns the index of the current lane in the wave", "v", "ro", [
             db_dxil_param(0, "i32", "", "operation result")])
-        self.add_dxil_op("WaveGetLaneCount", 117, "WaveGetLaneCount", "returns the number of lanes in the wave", "v", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("WaveGetLaneCount", next_op_idx, "WaveGetLaneCount", "returns the number of lanes in the wave", "v", "ro", [
             db_dxil_param(0, "i32", "", "operation result")])
-        self.add_dxil_op_reserved("WaveIsHelperLaneReserved", 118)
-        self.add_dxil_op("WaveAnyTrue", 119, "WaveAnyTrue", "returns 1 if any of the lane evaluates the value to true", "v", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("WaveAnyTrue", next_op_idx, "WaveAnyTrue", "returns 1 if any of the lane evaluates the value to true", "v", "ro", [
             db_dxil_param(0, "i1", "", "operation result"),
             db_dxil_param(2, "i1", "cond", "condition to test")])
-        self.add_dxil_op("WaveAllTrue", 120, "WaveAllTrue", "returns 1 if all the lanes evaluate the value to true", "v", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("WaveAllTrue", next_op_idx, "WaveAllTrue", "returns 1 if all the lanes evaluate the value to true", "v", "ro", [
             db_dxil_param(0, "i1", "", "operation result"),
             db_dxil_param(2, "i1", "cond", "condition to test")])
-        self.add_dxil_op("WaveActiveAllEqual", 121, "WaveActiveAllEqual", "returns 1 if all the lanes have the same value", "hfd18wil", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("WaveActiveAllEqual", next_op_idx, "WaveActiveAllEqual", "returns 1 if all the lanes have the same value", "hfd18wil", "ro", [
             db_dxil_param(0, "i1", "", "operation result"),
             db_dxil_param(2, "$o", "value", "value to compare")])
-        self.add_dxil_op("WaveActiveBallot", 122, "WaveActiveBallot", "returns a struct with a bit set for each lane where the condition is true", "v", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("WaveActiveBallot", next_op_idx, "WaveActiveBallot", "returns a struct with a bit set for each lane where the condition is true", "v", "ro", [
             db_dxil_param(0, "$u4", "", "operation result"),
             db_dxil_param(2, "i1", "cond", "condition to ballot on")])
-        self.add_dxil_op("WaveReadLaneAt", 123, "WaveReadLaneAt", "returns the value from the specified lane", "hfd18wil", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("WaveReadLaneAt", next_op_idx, "WaveReadLaneAt", "returns the value from the specified lane", "hfd18wil", "ro", [
             db_dxil_param(0, "$o", "", "operation result"),
             db_dxil_param(2, "$o", "value", "value to read"),
             db_dxil_param(3, "i32", "lane", "lane index")])
-        self.add_dxil_op("WaveReadLaneFirst", 124, "WaveReadLaneFirst", "returns the value from the first lane", "hf18wil", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("WaveReadLaneFirst", next_op_idx, "WaveReadLaneFirst", "returns the value from the first lane", "hf18wil", "ro", [
             db_dxil_param(0, "$o", "", "operation result"),
             db_dxil_param(2, "$o", "value", "value to read")])
-        self.add_dxil_op("WaveActiveOp", 125, "WaveActiveOp", "returns the result the operation across waves", "hfd18wil", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("WaveActiveOp", next_op_idx, "WaveActiveOp", "returns the result the operation across waves", "hfd18wil", "ro", [
             db_dxil_param(0, "$o", "", "operation result"),
             db_dxil_param(2, "$o", "value", "input value"),
             db_dxil_param(3, "i8", "op", "kind of operation to perform", enum_name="WaveOpKind", is_const=True),
             db_dxil_param(4, "i8", "sop", "sign of operands", enum_name="SignedOpKind", is_const=True)])
+        next_op_idx += 1
         self.add_enum_type("SignedOpKind", "Sign vs. unsigned operands for operation", [
             (0, "Signed", "signed integer or floating-point operands"),
             (1, "Unsigned", "unsigned integer operands")])
@@ -882,90 +947,97 @@ class db_dxil(object):
             (1, "Product", "product of values"), 
             (2, "Min", "minimum value"), 
             (3, "Max", "maximum value")])
-        self.add_dxil_op("WaveActiveBit", 126, "WaveActiveBit", "returns the result of the operation across all lanes", "8wil", "ro", [
+        self.add_dxil_op("WaveActiveBit", next_op_idx, "WaveActiveBit", "returns the result of the operation across all lanes", "8wil", "ro", [
             db_dxil_param(0, "$o", "", "operation result"),
             db_dxil_param(2, "$o", "value", "input value"),
             db_dxil_param(3, "i8", "op", "kind of operation to perform", enum_name="WaveBitOpKind", is_const=True)])
+        next_op_idx += 1
         self.add_enum_type("WaveBitOpKind", "Kind of bitwise cross-lane operation", [
             (0, "And", "bitwise and of values"), 
             (1, "Or", "bitwise or of values"), 
             (2, "Xor", "bitwise xor of values")])
-        self.add_dxil_op("WavePrefixOp", 127, "WavePrefixOp", "returns the result of the operation on prior lanes", "hfd8wil", "ro", [
+        self.add_dxil_op("WavePrefixOp", next_op_idx, "WavePrefixOp", "returns the result of the operation on prior lanes", "hfd8wil", "ro", [
             db_dxil_param(0, "$o", "", "operation result"),
             db_dxil_param(2, "$o", "value", "input value"),
             db_dxil_param(3, "i8", "op", "0=sum,1=product", enum_name="WaveOpKind", is_const=True),
             db_dxil_param(4, "i8", "sop", "sign of operands", enum_name="SignedOpKind", is_const=True)])
-        self.add_dxil_op_reserved("WaveGetOrderedIndex", 128)
-        self.add_dxil_op_reserved("GlobalOrderedCountIncReserved", 129)
-        self.add_dxil_op("QuadReadLaneAt", 130, "QuadReadLaneAt", "reads from a lane in the quad", "hfd18wil", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("QuadReadLaneAt", next_op_idx, "QuadReadLaneAt", "reads from a lane in the quad", "hfd18wil", "ro", [
             db_dxil_param(0, "$o", "", "operation result"),
             db_dxil_param(2, "$o", "value", "value to read"),
             db_dxil_param(3, "u32", "quadLane", "lane to read from (0-4)", max_value = 3, is_const=True)])
+        next_op_idx += 1
         self.add_enum_type("QuadOpKind", "Kind of quad-level operation", [
             (0, "ReadAcrossX", "returns the value from the other lane in the quad in the horizontal direction"), 
             (1, "ReadAcrossY", "returns the value from the other lane in the quad in the vertical direction"),
             (2, "ReadAcrossDiagonal", "returns the value from the lane across the quad in horizontal and vertical direction")])
-        self.add_dxil_op("QuadOp", 131, "QuadOp", "returns the result of a quad-level operation", "hfd8wil", "ro", [
+        self.add_dxil_op("QuadOp", next_op_idx, "QuadOp", "returns the result of a quad-level operation", "hfd8wil", "ro", [
             db_dxil_param(0, "$o", "", "operation result"),
             db_dxil_param(2, "$o", "value", "value for operation"),
             db_dxil_param(3, "i8", "op", "operation", enum_name = "QuadOpKind", is_const=True)])
+        next_op_idx += 1
 
         # Add bitcasts
-        self.add_dxil_op("BitcastI16toF16", 132, "BitcastI16toF16", "bitcast between different sizes", "v", "rn", [
+        self.add_dxil_op("BitcastI16toF16", next_op_idx, "BitcastI16toF16", "bitcast between different sizes", "v", "rn", [
             db_dxil_param(0, "h", "", "operation result"),
             db_dxil_param(2, "i16", "value", "input value")])
-        self.add_dxil_op("BitcastF16toI16", 133, "BitcastF16toI16", "bitcast between different sizes", "v", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("BitcastF16toI16", next_op_idx, "BitcastF16toI16", "bitcast between different sizes", "v", "rn", [
             db_dxil_param(0, "i16", "", "operation result"),
             db_dxil_param(2, "h", "value", "input value")])
-        self.add_dxil_op("BitcastI32toF32", 134, "BitcastI32toF32", "bitcast between different sizes", "v", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("BitcastI32toF32", next_op_idx, "BitcastI32toF32", "bitcast between different sizes", "v", "rn", [
             db_dxil_param(0, "f", "", "operation result"),
             db_dxil_param(2, "i32", "value", "input value")])
-        self.add_dxil_op("BitcastF32toI32", 135, "BitcastF32toI32", "bitcast between different sizes", "v", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("BitcastF32toI32", next_op_idx, "BitcastF32toI32", "bitcast between different sizes", "v", "rn", [
             db_dxil_param(0, "i32", "", "operation result"),
             db_dxil_param(2, "f", "value", "input value")])
-        self.add_dxil_op("BitcastI64toF64", 136, "BitcastI64toF64", "bitcast between different sizes", "v", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("BitcastI64toF64", next_op_idx, "BitcastI64toF64", "bitcast between different sizes", "v", "rn", [
             db_dxil_param(0, "d", "", "operation result"),
             db_dxil_param(2, "i64", "value", "input value")])
-        self.add_dxil_op("BitcastF64toI64", 137, "BitcastF64toI64", "bitcast between different sizes", "v", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("BitcastF64toI64", next_op_idx, "BitcastF64toI64", "bitcast between different sizes", "v", "rn", [
             db_dxil_param(0, "i64", "", "operation result"),
             db_dxil_param(2, "d", "value", "input value")])
+        next_op_idx += 1
         
-        self.add_dxil_op("GSInstanceID", 138, "GSInstanceID", "GSInstanceID", "i", "rn", [
-            db_dxil_param(0, "i32", "", "result")])
-
-        self.add_dxil_op("LegacyF32ToF16", 139, "LegacyF32ToF16", "legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)", "v", "rn", [
+        self.add_dxil_op("LegacyF32ToF16", next_op_idx, "LegacyF32ToF16", "legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)", "v", "rn", [
             db_dxil_param(0, "i32", "", "low 16 bits - half value, high 16 bits - zeroes"),
             db_dxil_param(2, "f", "value", "float value to convert")])
+        next_op_idx += 1
 
-        self.add_dxil_op("LegacyF16ToF32", 140, "LegacyF16ToF32", "legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)", "v", "rn", [
+        self.add_dxil_op("LegacyF16ToF32", next_op_idx, "LegacyF16ToF32", "legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)", "v", "rn", [
             db_dxil_param(0, "f", "", "converted float value"),
             db_dxil_param(2, "i32", "value", "half value to convert")])
+        next_op_idx += 1
 
-        self.add_dxil_op("LegacyDoubleToFloat", 141, "LegacyDoubleToFloat", "legacy fuction to convert double to float", "v", "rn", [
+        self.add_dxil_op("LegacyDoubleToFloat", next_op_idx, "LegacyDoubleToFloat", "legacy fuction to convert double to float", "v", "rn", [
             db_dxil_param(0, "f", "", "float value"),
             db_dxil_param(2, "d", "value", "double value to convert")])
+        next_op_idx += 1
 
-        self.add_dxil_op("LegacyDoubleToSInt32", 142, "LegacyDoubleToSInt32", "legacy fuction to convert double to int32", "v", "rn", [
+        self.add_dxil_op("LegacyDoubleToSInt32", next_op_idx, "LegacyDoubleToSInt32", "legacy fuction to convert double to int32", "v", "rn", [
             db_dxil_param(0, "i32", "", "i32 value"),
             db_dxil_param(2, "d", "value", "double value to convert")])
+        next_op_idx += 1
 
-        self.add_dxil_op("LegacyDoubleToUInt32", 143, "LegacyDoubleToUInt32", "legacy fuction to convert double to uint32", "v", "rn", [
+        self.add_dxil_op("LegacyDoubleToUInt32", next_op_idx, "LegacyDoubleToUInt32", "legacy fuction to convert double to uint32", "v", "rn", [
             db_dxil_param(0, "i32", "", "i32 value"),
             db_dxil_param(2, "d", "value", "double value to convert")])
+        next_op_idx += 1
 
-        self.add_dxil_op("WaveAllBitCount", 144, "WaveAllOp", "returns the count of bits set to 1 across the wave", "v", "ro", [
+        self.add_dxil_op("WaveAllBitCount", next_op_idx, "WaveAllOp", "returns the count of bits set to 1 across the wave", "v", "ro", [
             db_dxil_param(0, "i32", "", "operation result"),
             db_dxil_param(2, "i1", "value", "input value")])
-        self.add_dxil_op("WavePrefixBitCount", 145, "WavePrefixOp", "returns the count of bits set to 1 on prior lanes", "v", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("WavePrefixBitCount", next_op_idx, "WavePrefixOp", "returns the count of bits set to 1 on prior lanes", "v", "ro", [
             db_dxil_param(0, "i32", "", "operation result"),
             db_dxil_param(2, "i1", "value", "input value")])
+        next_op_idx += 1
 
-        self.add_dxil_op("SampleIndex", 146, "SampleIndex", "returns the sample index in a sample-frequency pixel shader", "i", "rn", [
-            db_dxil_param(0, "i32", "", "result")])
-        self.add_dxil_op("Coverage", 147, "Coverage", "returns the coverage mask input in a pixel shader", "i", "rn", [
-            db_dxil_param(0, "i32", "", "result")])
-        self.add_dxil_op("InnerCoverage", 148, "InnerCoverage", "returns underestimated coverage input from conservative rasterization in a pixel shader", "i", "rn", [
-            db_dxil_param(0, "i32", "", "result")])
+        assert next_op_idx == 139, "next operation index is %d rather than 143 and thus opcodes are broken" % next_op_idx
 
         # Set interesting properties.
         self.build_indices()
@@ -1502,6 +1574,7 @@ class db_dxil(object):
         self.add_valrule_msg("Types.Defined", "Type must be defined based on DXIL primitives", "Type '%0' is not defined on DXIL primitives")
         self.add_valrule_msg("Types.IntWidth", "Int type must be of valid width", "Int type '%0' has an invalid width")
         self.add_valrule("Types.NoMultiDim", "Only one dimension allowed for array type")
+        self.add_valrule("Types.I8", "I8 can only used as immediate value for intrinsic")
 
         self.add_valrule_msg("Sm.Name", "Target shader model name must be known", "Unknown shader model '%0'")
         self.add_valrule("Sm.Opcode", "Opcode must be defined in target shader model")
diff --git a/utils/hct/hctdb_instrhelp.py b/utils/hct/hctdb_instrhelp.py
index a30d76d09..e27490fca 100644
--- a/utils/hct/hctdb_instrhelp.py
+++ b/utils/hct/hctdb_instrhelp.py
@@ -183,6 +183,7 @@ class db_instrhelp_gen:
             "i32": "int32_t",
             "u32": "uint32_t"
             }
+        self.IsDxilOpFuncCallInst = "hlsl::OP::IsDxilOpFuncCallInst"
 
     def print_content(self):
         self.print_header()
@@ -238,7 +239,7 @@ class db_instrhelp_gen:
             print("  operator bool() const {")
             if i.is_dxil_op:
                 op_name = i.fully_qualified_name()
-                print("    return hlsl::OP::IsDxilOpFuncCallInst(Instr, %s);" % op_name)
+                print("    return %s(Instr, %s);" % (self.IsDxilOpFuncCallInst, op_name))
             else:
                 print("    return Instr->getOpcode() == llvm::Instruction::%s;" % i.name)
             print("  }")
diff --git a/utils/hct/hcttest-samples.py b/utils/hct/hcttest-samples.py
index 75c8b95d9..24949c6cc 100644
--- a/utils/hct/hcttest-samples.py
+++ b/utils/hct/hcttest-samples.py
@@ -130,11 +130,8 @@ def ActionCopySDKLayers(args, name, dxil):
         ], args.symbols)
 def ActionCopyWarp12(args, name, dxil):
     CopyBins(args, name, dxil, [
-            'd3d12warp.dll',
+            'd3d10warp.dll',
         ], args.symbols)
-def MakeD3D12WarpCopy(bin_path):
-    # Copy d3d10warp.dll to d3d12warp.dll
-    shutil.copy2(os.path.join(bin_path, 'd3d10warp.dll'), os.path.join(bin_path, 'd3d12warp.dll'))
 
 def PathSplitAll(p):
     s = filter(None, os.path.split(p))
@@ -161,7 +158,7 @@ def ListRuntimeCompilePaths(args):
         'D3D11_3SDKLayers.dll',
         'D3D12SDKLayers.dll',
         'DXGIDebug.dll',
-        'd3d12warp.dll',
+        'd3d10warp.dll',
     ]]
 
 def CheckEnvironment(args):