[SPIRV] Reimplement flat conversion. (#6189)

The implementation of flat conversion in the spir-v codegen is very adhoc. This changes the implemetation to match what is done in DXIL. For now I have only reimplemented to catch all case. I will leave the special cases as they seem to be correct for what they do, and will generate less verbose code. Fixes #4906 --------- Co-authored-by: Nathan Gauër <github@keenuts.net>
2024-01-30 09:26:38 -05:00 · 2024-01-30 09:26:38 -05:00 · 8019c71569
--- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
@ -571,12 +571,21 @@ const StructType *lowerStructType(const SpirvCodeGenOptions &spirvOptions,
 // field-index because bitfields are merged into a single field in the SPIR-V
 // representation.
 //
+// If `includeMerged` is true, `operation` will be called on the same spir-v
+// field for each field it represents. For example, if a spir-v field holds the
+// values for 3 bit-fields, `operation` will be called 3 times with the same
+// `spirvFieldIndex`. The `bitfield` information in `field` will be different.
+//
+// If false, `operation` will be called once on the first field in the merged
+// field.
+//
 // If the operation returns false, we stop processing fields.
 void forEachSpirvField(
    const RecordType *recordType, const StructType *spirvType,
    std::function<bool(size_t spirvFieldIndex, const QualType &fieldType,
                       const StructType::FieldInfo &field)>
-        operation) {
+        operation,
+    bool includeMerged = false) {
  const auto *cxxDecl = recordType->getAsCXXRecordDecl();
  const auto *recordDecl = recordType->getDecl();

@ -598,7 +607,8 @@ void forEachSpirvField(
  for (const auto *field : recordDecl->fields()) {
    const auto &spirvField = spirvType->getFields()[astFieldIndex];
    const uint32_t currentFieldIndex = spirvField.fieldIndex;
-    if (astFieldIndex > 0 && currentFieldIndex == lastConvertedIndex) {
+    if (!includeMerged && astFieldIndex > 0 &&
+        currentFieldIndex == lastConvertedIndex) {
      ++astFieldIndex;
      continue;
    }
@ -3504,8 +3514,8 @@ SpirvInstruction *SpirvEmitter::doCastExpr(const CastExpr *expr,
    if (!subExprInstr)
      return nullptr;

-    auto *val = processFlatConversion(toType, evalType, subExprInstr,
-                                      expr->getExprLoc(), range);
+    auto *val =
+        processFlatConversion(toType, subExprInstr, expr->getExprLoc(), range);
    val->setRValue();
    return val;
  }
@ -3546,135 +3556,38 @@ SpirvInstruction *SpirvEmitter::doCastExpr(const CastExpr *expr,
  }
 }

-SpirvInstruction *SpirvEmitter::processFlatConversion(
-    const QualType type, const QualType initType, SpirvInstruction *initInstr,
-    SourceLocation srcLoc, SourceRange range) {
-  if (isConstantTextureBuffer(initType)) {
-    return reconstructValue(initInstr, type, SpirvLayoutRule::Void, srcLoc,
-                            range);
+SpirvInstruction *
+SpirvEmitter::processFlatConversion(const QualType type,
+                                    SpirvInstruction *initInstr,
+                                    SourceLocation srcLoc, SourceRange range) {
+
+  // If the same literal is used in multiple instructions, then the literal
+  // visitor may not be able to pick the correct type for the literal. That
+  // happens when say one instruction uses the literal as a float and another
+  // uses it as a double. We solve this by setting the type for the literal to
+  // its 32-bit equivalent.
+  //
+  // TODO(6188): This is wrong when the literal is too large to be held in
+  // the the 32-bit type. We do this because it is consistent with the long
+  // standing behaviour. Changing now would result in more 64-bit arithmetic,
+  // which the optimizer does not handle as well.
+  QualType resultType = initInstr->getAstResultType();
+  if (resultType->isSpecificBuiltinType(BuiltinType::LitFloat)) {
+    initInstr->setAstResultType(astContext.FloatTy);
+  } else if (resultType->isSpecificBuiltinType(BuiltinType::LitInt)) {
+    if (resultType->isSignedIntegerType())
+      initInstr->setAstResultType(astContext.IntTy);
+    else
+      initInstr->setAstResultType(astContext.UnsignedIntTy);
  }

-  // Try to translate the canonical type first
-  const auto canonicalType = type.getCanonicalType();
-  if (canonicalType != type)
-    return processFlatConversion(canonicalType, initType, initInstr, srcLoc,
-                                 range);
+  // Decompose `initInstr`.
+  std::vector<SpirvInstruction *> flatValues = decomposeToScalars(initInstr);

-  // Primitive types
-  {
-    QualType ty = {};
-    if (isScalarType(type, &ty)) {
-      if (const auto *builtinType = ty->getAs<BuiltinType>()) {
-        switch (builtinType->getKind()) {
-        case BuiltinType::Void: {
-          emitError("cannot create a constant of void type", srcLoc);
-          return 0;
-        }
-        case BuiltinType::Bool:
-          return castToBool(initInstr, initType, ty, srcLoc, range);
-        // Target type is an integer variant.
-        case BuiltinType::Int:
-        case BuiltinType::Short:
-        case BuiltinType::Min12Int:
-        case BuiltinType::Min16Int:
-        case BuiltinType::Min16UInt:
-        case BuiltinType::UShort:
-        case BuiltinType::UInt:
-        case BuiltinType::Long:
-        case BuiltinType::LongLong:
-        case BuiltinType::ULong:
-        case BuiltinType::ULongLong:
-        case BuiltinType::Int8_4Packed:
-        case BuiltinType::UInt8_4Packed:
-          return castToInt(initInstr, initType, ty, srcLoc, range);
-        // Target type is a float variant.
-        case BuiltinType::Double:
-        case BuiltinType::Float:
-        case BuiltinType::Half:
-        case BuiltinType::HalfFloat:
-        case BuiltinType::Min10Float:
-        case BuiltinType::Min16Float:
-          return castToFloat(initInstr, initType, ty, srcLoc, range);
-        default:
-          emitError("flat conversion of type %0 unimplemented", srcLoc)
-              << builtinType->getTypeClassName();
-          return 0;
-        }
-      }
-    }
+  if (flatValues.size() == 1) {
+    return splatScalarToGenerate(type, flatValues[0], SpirvLayoutRule::Void);
  }
-  // Vector types
-  {
-    QualType elemType = {};
-    uint32_t elemCount = {};
-    if (isVectorType(type, &elemType, &elemCount)) {
-      auto *elem =
-          processFlatConversion(elemType, initType, initInstr, srcLoc, range);
-      llvm::SmallVector<SpirvInstruction *, 4> constituents(size_t(elemCount),
-                                                            elem);
-      return spvBuilder.createCompositeConstruct(type, constituents, srcLoc,
-                                                 range);
-    }
-  }
-
-  // Matrix types
-  {
-    QualType elemType = {};
-    uint32_t rowCount = 0, colCount = 0;
-    if (isMxNMatrix(type, &elemType, &rowCount, &colCount)) {
-      // By default HLSL matrices are row major, while SPIR-V matrices are
-      // column major. We are mapping what HLSL semantically mean a row into a
-      // column here.
-      const QualType vecType = astContext.getExtVectorType(elemType, colCount);
-      auto *elem =
-          processFlatConversion(elemType, initType, initInstr, srcLoc, range);
-      const llvm::SmallVector<SpirvInstruction *, 4> constituents(
-          size_t(colCount), elem);
-      auto *col = spvBuilder.createCompositeConstruct(vecType, constituents,
-                                                      srcLoc, range);
-      const llvm::SmallVector<SpirvInstruction *, 4> rows(size_t(rowCount),
-                                                          col);
-      return spvBuilder.createCompositeConstruct(type, rows, srcLoc, range);
-    }
-  }
-
-  // Struct type
-  if (const auto *structType = type->getAs<RecordType>()) {
-    const auto *decl = structType->getDecl();
-    llvm::SmallVector<SpirvInstruction *, 4> fields;
-
-    for (const auto *field : decl->fields()) {
-      // There is a special case for FlatConversion. If T is a struct with only
-      // one member, S, then (T)<an-instance-of-S> is allowed, which essentially
-      // constructs a new T instance using the instance of S as its only member.
-      // Check whether we are handling that case here first.
-      if (!field->isBitField() &&
-          field->getType().getCanonicalType() == initType.getCanonicalType()) {
-        fields.push_back(initInstr);
-      } else {
-        fields.push_back(processFlatConversion(field->getType(), initType,
-                                               initInstr, srcLoc, range));
-      }
-    }
-
-    return spvBuilder.createCompositeConstruct(type, fields, srcLoc, range);
-  }
-
-  // Array type
-  if (const auto *arrayType = astContext.getAsConstantArrayType(type)) {
-    const auto size =
-        static_cast<uint32_t>(arrayType->getSize().getZExtValue());
-    auto *elem = processFlatConversion(arrayType->getElementType(), initType,
-                                       initInstr, srcLoc, range);
-    llvm::SmallVector<SpirvInstruction *, 4> constituents(size_t(size), elem);
-    return spvBuilder.createCompositeConstruct(type, constituents, srcLoc,
-                                               range);
-  }
-
-  emitError("flat conversion of type %0 unimplemented", {})
-      << type->getTypeClassName();
-  type->dump();
-  return 0;
+  return generateFromScalars(type, flatValues, SpirvLayoutRule::Void);
 }

 SpirvInstruction *
@ -14528,5 +14441,266 @@ SpirvEmitter::doUnaryExprOrTypeTraitExpr(const UnaryExprOrTypeTraitExpr *expr) {
  return sizeConst;
 }

+std::vector<SpirvInstruction *>
+SpirvEmitter::decomposeToScalars(SpirvInstruction *inst) {
+  QualType elementType;
+  uint32_t elementCount = 0;
+  uint32_t numOfRows = 0;
+  uint32_t numOfCols = 0;
+
+  QualType resultType = inst->getAstResultType();
+  if (hlsl::IsHLSLResourceType(resultType)) {
+    resultType = hlsl::GetHLSLResourceResultType(resultType);
+  }
+
+  if (isScalarType(resultType)) {
+    return {inst};
+  }
+
+  if (isVectorType(resultType, &elementType, &elementCount)) {
+    std::vector<SpirvInstruction *> result;
+    for (uint32_t i = 0; i < elementCount; i++) {
+      auto *element = spvBuilder.createCompositeExtract(
+          elementType, inst, {i}, inst->getSourceLocation());
+      element->setLayoutRule(inst->getLayoutRule());
+      result.push_back(element);
+    }
+    return result;
+  }
+
+  if (isMxNMatrix(resultType, &elementType, &numOfRows, &numOfCols)) {
+    std::vector<SpirvInstruction *> result;
+    for (uint32_t i = 0; i < numOfRows; i++) {
+      for (uint32_t j = 0; j < numOfCols; j++) {
+        auto *element = spvBuilder.createCompositeExtract(
+            elementType, inst, {i, j}, inst->getSourceLocation());
+        element->setLayoutRule(inst->getLayoutRule());
+        result.push_back(element);
+      }
+    }
+    return result;
+  }
+
+  if (isArrayType(resultType, &elementType, &elementCount)) {
+    std::vector<SpirvInstruction *> result;
+    for (uint32_t i = 0; i < elementCount; i++) {
+      auto *element = spvBuilder.createCompositeExtract(
+          elementType, inst, {i}, inst->getSourceLocation());
+      element->setLayoutRule(inst->getLayoutRule());
+      auto decomposedElement = decomposeToScalars(element);
+
+      // See how we can improve the performance by avoiding this copy.
+      result.insert(result.end(), decomposedElement.begin(),
+                    decomposedElement.end());
+    }
+    return result;
+  }
+
+  if (const RecordType *recordType = resultType->getAs<RecordType>()) {
+    std::vector<SpirvInstruction *> result;
+
+    const SpirvType *type = nullptr;
+    LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions);
+    type = lowerTypeVisitor.lowerType(resultType, inst->getLayoutRule(), false,
+                                      inst->getSourceLocation());
+
+    forEachSpirvField(
+        recordType, dyn_cast<StructType>(type),
+        [this, inst, &result](size_t spirvFieldIndex, const QualType &fieldType,
+                              const StructType::FieldInfo &fieldInfo) {
+          auto *field = spvBuilder.createCompositeExtract(
+              fieldType, inst, {fieldInfo.fieldIndex},
+              inst->getSourceLocation());
+          field->setLayoutRule(inst->getLayoutRule());
+          auto decomposedField = decomposeToScalars(field);
+
+          // See how we can improve the performance by avoiding this copy.
+          result.insert(result.end(), decomposedField.begin(),
+                        decomposedField.end());
+          return true;
+        },
+        true);
+    return result;
+  }
+
+  llvm_unreachable("Trying to decompose a type that we cannot decompose");
+  return {};
+}
+
+SpirvInstruction *
+SpirvEmitter::generateFromScalars(QualType type,
+                                  std::vector<SpirvInstruction *> &scalars,
+                                  SpirvLayoutRule layoutRule) {
+  QualType elementType;
+  uint32_t elementCount = 0;
+  uint32_t numOfRows = 0;
+  uint32_t numOfCols = 0;
+
+  if (isScalarType(type)) {
+    // If the type is bool with a non-void layout rule, then it should be
+    // treated as a uint.
+    assert(layoutRule == SpirvLayoutRule::Void &&
+           "If the layout type is not void, then we should cast to an int when "
+           "type is a boolean.");
+    QualType sourceType = scalars[0]->getAstResultType();
+    if (sourceType->isBooleanType() &&
+        scalars[0]->getLayoutRule() != SpirvLayoutRule::Void) {
+      sourceType = astContext.UnsignedIntTy;
+    }
+
+    SpirvInstruction *result = castToType(scalars[0], sourceType, type,
+                                          scalars[0]->getSourceLocation());
+    scalars.erase(scalars.begin());
+    return result;
+  } else if (isVectorType(type, &elementType, &elementCount)) {
+    assert(elementCount <= scalars.size());
+    std::vector<SpirvInstruction *> elements;
+    for (uint32_t i = 0; i < elementCount; ++i) {
+      elements.push_back(castToType(scalars[i], scalars[i]->getAstResultType(),
+                                    elementType,
+                                    scalars[i]->getSourceLocation()));
+    }
+    SpirvInstruction *result = spvBuilder.createCompositeConstruct(
+        type, elements, scalars[0]->getSourceLocation());
+    result->setLayoutRule(layoutRule);
+    scalars.erase(scalars.begin(), scalars.begin() + elementCount);
+    return result;
+  } else if (isMxNMatrix(type, &elementType, &numOfRows, &numOfCols)) {
+    std::vector<SpirvInstruction *> rows;
+    SourceLocation loc = scalars[0]->getSourceLocation();
+    QualType rowType = astContext.getExtVectorType(elementType, numOfCols);
+    for (uint32_t i = 0; i < numOfRows; i++) {
+      std::vector<SpirvInstruction *> row;
+      for (uint32_t j = 0; j < numOfCols; j++) {
+        row.push_back(castToType(scalars[j], scalars[j]->getAstResultType(),
+                                 elementType, scalars[j]->getSourceLocation()));
+      }
+      scalars.erase(scalars.begin(), scalars.begin() + numOfCols);
+      SpirvInstruction *r = spvBuilder.createCompositeConstruct(
+          rowType, row, scalars[0]->getSourceLocation());
+      r->setLayoutRule(layoutRule);
+      rows.push_back(r);
+    }
+    SpirvInstruction *result =
+        spvBuilder.createCompositeConstruct(type, rows, loc);
+    result->setLayoutRule(layoutRule);
+    return result;
+  } else if (isArrayType(type, &elementType, &elementCount)) {
+    std::vector<SpirvInstruction *> elements;
+    for (uint32_t i = 0; i < elementCount; i++) {
+      elements.push_back(generateFromScalars(elementType, scalars, layoutRule));
+    }
+    SpirvInstruction *result = spvBuilder.createCompositeConstruct(
+        type, elements, scalars[0]->getSourceLocation());
+    result->setLayoutRule(layoutRule);
+    return result;
+  } else if (const RecordType *recordType = dyn_cast<RecordType>(type)) {
+    SourceLocation loc = scalars[0]->getSourceLocation();
+    std::vector<SpirvInstruction *> elements;
+    LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions);
+    const SpirvType *spirvType =
+        lowerTypeVisitor.lowerType(type, layoutRule, false, loc);
+
+    forEachSpirvField(recordType, dyn_cast<StructType>(spirvType),
+                      [this, &elements, &scalars, layoutRule](
+                          size_t spirvFieldIndex, const QualType &fieldType,
+                          const StructType::FieldInfo &fieldInfo) {
+                        elements.push_back(generateFromScalars(
+                            fieldType, scalars, layoutRule));
+                        return true;
+                      });
+    SpirvInstruction *result =
+        spvBuilder.createCompositeConstruct(type, elements, loc);
+    result->setLayoutRule(layoutRule);
+    return result;
+  } else {
+    llvm_unreachable("Trying to generate a type that we cannot generate");
+  }
+  return {};
+}
+
+SpirvInstruction *
+SpirvEmitter::splatScalarToGenerate(QualType type, SpirvInstruction *scalar,
+                                    SpirvLayoutRule layoutRule) {
+  QualType elementType;
+  uint32_t elementCount = 0;
+  uint32_t numOfRows = 0;
+  uint32_t numOfCols = 0;
+
+  if (isScalarType(type)) {
+    // If the type if bool with a non-void layout rule, then it should be
+    // treated as a uint.
+    assert(layoutRule == SpirvLayoutRule::Void &&
+           "If the layout type is not void, then we should cast to an int when "
+           "type is a boolean.");
+    QualType sourceType = scalar->getAstResultType();
+    if (sourceType->isBooleanType() &&
+        scalar->getLayoutRule() != SpirvLayoutRule::Void) {
+      sourceType = astContext.UnsignedIntTy;
+    }
+
+    SpirvInstruction *result =
+        castToType(scalar, sourceType, type, scalar->getSourceLocation());
+    return result;
+  } else if (isVectorType(type, &elementType, &elementCount)) {
+    SpirvInstruction *element =
+        castToType(scalar, scalar->getAstResultType(), elementType,
+                   scalar->getSourceLocation());
+    std::vector<SpirvInstruction *> elements(elementCount, element);
+    SpirvInstruction *result = spvBuilder.createCompositeConstruct(
+        type, elements, scalar->getSourceLocation());
+    result->setLayoutRule(layoutRule);
+    return result;
+  } else if (isMxNMatrix(type, &elementType, &numOfRows, &numOfCols)) {
+    SourceLocation loc = scalar->getSourceLocation();
+
+    SpirvInstruction *element =
+        castToType(scalar, scalar->getAstResultType(), elementType,
+                   scalar->getSourceLocation());
+    assert(element);
+    std::vector<SpirvInstruction *> row(numOfCols, element);
+
+    QualType rowType = astContext.getExtVectorType(elementType, numOfCols);
+    SpirvInstruction *r =
+        spvBuilder.createCompositeConstruct(rowType, row, loc);
+    r->setLayoutRule(layoutRule);
+    std::vector<SpirvInstruction *> rows(numOfRows, r);
+    SpirvInstruction *result =
+        spvBuilder.createCompositeConstruct(type, rows, loc);
+    result->setLayoutRule(layoutRule);
+    return result;
+  } else if (isArrayType(type, &elementType, &elementCount)) {
+    SpirvInstruction *element =
+        splatScalarToGenerate(elementType, scalar, layoutRule);
+    std::vector<SpirvInstruction *> elements(elementCount, element);
+    SpirvInstruction *result = spvBuilder.createCompositeConstruct(
+        type, elements, scalar->getSourceLocation());
+    result->setLayoutRule(layoutRule);
+    return result;
+  } else if (const RecordType *recordType = dyn_cast<RecordType>(type)) {
+    SourceLocation loc = scalar->getSourceLocation();
+    std::vector<SpirvInstruction *> elements;
+    LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions);
+    const SpirvType *spirvType =
+        lowerTypeVisitor.lowerType(type, SpirvLayoutRule::Void, false, loc);
+
+    forEachSpirvField(recordType, dyn_cast<StructType>(spirvType),
+                      [this, &elements, &scalar, layoutRule](
+                          size_t spirvFieldIndex, const QualType &fieldType,
+                          const StructType::FieldInfo &fieldInfo) {
+                        elements.push_back(splatScalarToGenerate(
+                            fieldType, scalar, layoutRule));
+                        return true;
+                      });
+    SpirvInstruction *result =
+        spvBuilder.createCompositeConstruct(type, elements, loc);
+    result->setLayoutRule(layoutRule);
+    return result;
+  } else {
+    llvm_unreachable("Trying to generate a type that we cannot generate");
+  }
+  return {};
+}
+
 } // end namespace spirv
 } // end namespace clang
--- a/tools/clang/lib/SPIRV/SpirvEmitter.h
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.h
@ -760,10 +760,8 @@ private:

 private:
  /// \brief Performs a FlatConversion implicit cast. Fills an instance of the
-  /// given type with initializer <result-id>. The initializer is of type
-  /// initType.
+  /// given type with initializer <result-id>.
  SpirvInstruction *processFlatConversion(const QualType type,
-                                          const QualType initType,
                                          SpirvInstruction *initId,
                                          SourceLocation,
                                          SourceRange range = {});
@ -1213,6 +1211,26 @@ private:
  /// Returns a function scope parameter with the same type as |param|.
  SpirvVariable *createFunctionScopeTempFromParameter(const ParmVarDecl *param);

+  /// Returns a vector of SpirvInstruction that is the decompostion of `inst`
+  /// into scalars. This is recursive. For example, a struct of a 4 element
+  /// vector will return 4 scalars.
+  std::vector<SpirvInstruction *> decomposeToScalars(SpirvInstruction *inst);
+
+  /// Returns a spirv instruction with the value of the given type and layout
+  /// rule that is obtained by assigning each scalar in `type` to corresponding
+  /// value in `scalars`. This is the inverse of `decomposeToScalars`.
+  SpirvInstruction *
+  generateFromScalars(QualType type, std::vector<SpirvInstruction *> &scalars,
+                      SpirvLayoutRule layoutRule);
+
+  /// Returns a spirv instruction with the value of the given type and layout
+  /// rule that is obtained by assigning `scalar` each scalar in `type`. This is
+  /// the same as calling `generateFromScalars` with a sufficiently large vector
+  /// where every element is `scalar`.
+  SpirvInstruction *splatScalarToGenerate(QualType type,
+                                          SpirvInstruction *scalar,
+                                          SpirvLayoutRule rule);
+
 public:
  /// \brief Wrapper method to create a fatal error message and report it
  /// in the diagnostic engine associated with this consumer.
--- a/tools/clang/test/CodeGenSPIRV/cast.flat-conversion.literal-initializer.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.flat-conversion.literal-initializer.hlsl
@ -9,6 +9,16 @@ struct S {
  uint64_t f;
 };

+struct T {
+  int32_t i;
+  int64_t j;
+};
+
+struct UT {
+  uint32_t i;
+  uint64_t j;
+};
+
 void main() {

 // CHECK:              [[inf:%[0-9]+]] = OpFDiv %float %float_1 %float_0
@ -47,4 +57,27 @@ void main() {
 // CHECK-NEXT:           {{%[0-9]+}} = OpCompositeConstruct %S [[a2_float]] [[a_float_0]] [[a2_double]] [[a]] [[a_int64]] [[a_uint64]]
  double a;
  S s1 = (S)(a);
+
+// TODO(6188): This is wrong because we lose most significant bits in the literal.
+// CHECK: [[lit:%[0-9]+]] = OpIAdd %int %int_0 %int_1
+// CHECK: [[longLit:%[0-9]+]] = OpSConvert %long [[lit]]
+// CHECK: [[t:%[0-9]+]] = OpCompositeConstruct %T [[lit]] [[longLit]]
+// CHECK: OpStore %t [[t]]
+  T t = (T)(0x100000000+1);
+
+// TODO(6188): This is wrong because we lose most significant bits in the literal.
+// CHECK: [[lit:%[0-9]+]] = OpIAdd %uint %uint_0 %uint_1
+// CHECK: [[longLit:%[0-9]+]] = OpUConvert %ulong [[lit]]
+// CHECK: [[t:%[0-9]+]] = OpCompositeConstruct %UT [[lit]] [[longLit]]
+// CHECK: OpStore %ut [[t]]
+  UT ut = (UT)(0x100000000ul+1);
+
+// TODO(6188): This is wrong because we lose most significant bits in the literal.
+// CHECK: [[longLit:%[0-9]+]] = OpIAdd %ulong %ulong_4294967296 %ulong_1
+// CHECK: [[lit:%[0-9]+]] = OpUConvert %uint [[longLit]]
+// CHECK: [[lit2:%[0-9]+]] = OpBitcast %int [[lit]]
+// CHECK: [[longLit2:%[0-9]+]] = OpBitcast %long [[longLit]]
+// CHECK: [[t:%[0-9]+]] = OpCompositeConstruct %T [[lit2]] [[longLit2]]
+// CHECK: OpStore %t2 [[t]]
+  T t2 = (T)(0x100000000ull+1);
 }
--- a/tools/clang/test/CodeGenSPIRV/cast.flat-conversion.matrix.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.flat-conversion.matrix.hlsl
@ -0,0 +1,52 @@
+// RUN: %dxc -T ps_6_0 -E main %s -spirv -Zpc | FileCheck %s -check-prefix=COL -check-prefix=CHECK
+// RUN: %dxc -T ps_6_0 -E main %s -spirv -Zpr | FileCheck %s -check-prefix=ROW -check-prefix=CHECK
+
+struct S {
+  float2x3 a;
+};
+
+struct T {
+  float a[6];
+};
+
+RWStructuredBuffer<S> s_output;
+RWStructuredBuffer<T> t_output;
+
+// See https://github.com/microsoft/DirectXShaderCompiler/blob/438781364eea22d188b337be1dfa4174ed7d928c/docs/SPIR-V.rst#L723.
+// COL: OpMemberDecorate %S 0 RowMajor
+// ROW: OpMemberDecorate %S 0 ColMajor
+
+// The DXIL generated for the two cases seem to produce the same results,
+// and this matches that behaviour.
+// CHECK: [[array_const:%[0-9]+]] = OpConstantComposite %_arr_float_uint_6 %float_0 %float_1 %float_2 %float_3 %float_4 %float_5
+// CHECK: [[t:%[0-9]+]] = OpConstantComposite %T [[array_const]]
+
+// The DXIL that is generates different order for the values depending on
+// whether the matrix is column or row major. However, for SPIR-V, the value
+// stored in both cases is the same because the decoration, which is checked
+// above, is what determines the layout in memory for the value.
+
+// CHECK: [[row0:%[0-9]+]] = OpConstantComposite %v3float %float_0 %float_1 %float_2
+// CHECK: [[row1:%[0-9]+]] = OpConstantComposite %v3float %float_3 %float_4 %float_5
+// CHECK: [[mat:%[0-9]+]] = OpConstantComposite %mat2v3float %33 %34
+// CHECK: [[s:%[0-9]+]] = OpConstantComposite %S %35
+
+void main() {
+  S s;
+  [unroll]
+  for( int i = 0; i < 2; ++i) {
+    [unroll]
+    for( int j = 0; j < 3; ++j) {
+      s.a[i][j] = i*3+j;
+    }
+  }
+// CHECK: [[ac:%[0-9]+]] = OpAccessChain %_ptr_Uniform_T %t_output %int_0 %uint_0
+// CHECK: OpStore [[ac]] [[t]]
+  T t = (T)(s);
+  t_output[0] = t;
+
+// CHECK: [[ac:%[0-9]+]] = OpAccessChain %_ptr_Uniform_S %s_output %int_0 %uint_0
+// CHECK: OpStore [[ac]] [[s]]
+  s = (S)t;
+  s_output[0] = s;
+}
--- a/tools/clang/test/CodeGenSPIRV/cast.flat-conversion.vector.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.flat-conversion.vector.hlsl
@ -1,10 +1,24 @@
 // RUN: %dxc -T ps_6_0 -E main -fcgl  %s -spirv | FileCheck %s

-struct S {
+struct S1 {
    float2 data[2];
 };

-StructuredBuffer<S> MySB;
+StructuredBuffer<S1> MySB;
+
+struct S2 {
+  float b0;
+  float3 b1;
+};
+
+struct S3 {
+  float3 vec;
+};
+
+StructuredBuffer<float4> input2;
+
+
+

 float4 main() : SV_TARGET
 {
@ -19,5 +33,28 @@ float4 main() : SV_TARGET
 // CHECK-NEXT:  [[val:%[0-9]+]] = OpCompositeConstruct %_arr_float_uint_4 [[v1]] [[v2]] [[v3]] [[v4]]
 // CHECK-NEXT:                 OpStore %data [[val]]
    float data[4] = (float[4])MySB[0].data;
-    return data[1];
+
+
+// CHECK: [[ac:%[0-9]+]] = OpAccessChain %_ptr_Uniform_v4float %input2 %int_0 %uint_0
+// CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad %v4float [[ac]]
+// CHECK-NEXT: [[elem0:%[0-9]+]] = OpCompositeExtract %float [[ld]] 0
+// CHECK-NEXT: [[elem1:%[0-9]+]] = OpCompositeExtract %float [[ld]] 1
+// CHECK-NEXT: [[elem2:%[0-9]+]] = OpCompositeExtract %float [[ld]] 2
+// CHECK-NEXT: [[elem3:%[0-9]+]] = OpCompositeExtract %float [[ld]] 3
+// CHECK-NEXT: [[vec:%[0-9]+]] = OpCompositeConstruct %v3float [[elem1]] [[elem2]] [[elem3]]
+// CHECK-NEXT: OpCompositeConstruct %S2 [[elem0]] [[vec]]
+    S2 d2 = (S2)input2[0];
+
+
+// CHECK: [[ac:%[0-9]+]] = OpAccessChain %_ptr_Uniform_v4float %input2 %int_0 %uint_0
+// CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad %v4float [[ac]]
+// CHECK-NEXT: [[elem0:%[0-9]+]] = OpCompositeExtract %float [[ld]] 0
+// CHECK-NEXT: [[elem1:%[0-9]+]] = OpCompositeExtract %float [[ld]] 1
+// CHECK-NEXT: [[elem2:%[0-9]+]] = OpCompositeExtract %float [[ld]] 2
+// CHECK-NEXT: [[elem3:%[0-9]+]] = OpCompositeExtract %float [[ld]] 3
+// CHECK-NEXT: [[vec:%[0-9]+]] = OpCompositeConstruct %v3float [[elem0]] [[elem1]] [[elem2]]
+// CHECK-NEXT: OpCompositeConstruct %S3 [[vec]]
+    S3 d3 = (S3)input2[0];
+
+    return 0;
 }
--- a/tools/clang/test/CodeGenSPIRV/cast.struct-to-int.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.struct-to-int.hlsl
@ -30,6 +30,7 @@ struct Vectors {

 RWStructuredBuffer<uint> buf : r0;
 RWStructuredBuffer<uint64_t> lbuf : r1;
+RWStructuredBuffer<Vectors> vbuf : r2;

 // CHECK: OpName [[BUF:%[^ ]*]] "buf"
 // CHECK: OpName [[LBUF:%[^ ]*]] "lbuf"
@ -83,22 +84,21 @@ void main()
 // CHECK: [[COLORS:%[^ ]*]] = OpLoad [[TWOCOLORS]]
 // CHECK: [[COLORS0:%[^ ]*]] = OpCompositeExtract [[COLORRGBA]] [[COLORS]] 0
 // CHECK: [[COLORS00:%[^ ]*]] = OpCompositeExtract [[UINT]] [[COLORS0]] 0
-// CHECK: [[COLORS000:%[^ ]*]] = OpBitFieldUExtract [[UINT]] [[COLORS00]] [[U0]] [[U8]]
 // CHECK: [[BUF00:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[BUF]] [[I0]] [[U0]]
-// CHECK: OpStore [[BUF00]] [[COLORS000]]
+// CHECK: OpStore [[BUF00]] [[COLORS00]]

    buf[0] -= (uint) rgb;
 // CHECK: [[RGB:%[^ ]*]] = OpLoad [[COLORRGB]]
 // CHECK: [[RGB0:%[^ ]*]] = OpCompositeExtract [[UINT]] [[RGB]] 0
-// CHECK: [[RGB00:%[^ ]*]] = OpBitFieldUExtract [[UINT]] [[RGB0]] [[U0]] [[U8]]
 // CHECK: [[BUF00_0:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[BUF]] [[I0]] [[U0]]
 // CHECK: [[V1:%[^ ]*]] = OpLoad [[UINT]] [[BUF00_0]]
-// CHECK: [[V2:%[^ ]*]] = OpISub [[UINT]] [[V1]] [[RGB00]]
+// CHECK: [[V2:%[^ ]*]] = OpISub [[UINT]] [[V1]] [[RGB0]]
 // CHECK: OpStore [[BUF00_0]] [[V2]]

    lbuf[0] = (uint64_t) v;
 // CHECK: [[VECS:%[^ ]*]] = OpLoad [[VECTORS]]
-// CHECK: [[VECS00:%[^ ]*]] = OpCompositeExtract [[UINT]] [[VECS]] 0 0
+// CHECK: [[VECS0:%[^ ]*]] = OpCompositeExtract {{%v2uint}} [[VECS]] 0
+// CHECK: [[VECS00:%[^ ]*]] = OpCompositeExtract [[UINT]] [[VECS0]] 0
 // CHECK: [[V1_0:%[^ ]*]] = OpUConvert [[ULONG]] [[VECS00]]
 // CHECK: [[LBUF00:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[LBUF]] [[I0]] [[U0]]
 // CHECK: OpStore [[LBUF00]] [[V1_0]]
@ -112,5 +112,22 @@ void main()
 // CHECK: [[V3:%[^ ]*]] = OpLoad [[ULONG]] [[LBUF00_0]]
 // CHECK: [[V4:%[^ ]*]] = OpIAdd [[ULONG]] [[V3]] [[V2_0]]
 // CHECK: OpStore [[LBUF00_0]] [[V4]]
+
+    vbuf[0] = (Vectors) colors;
+// CHECK: [[c0:%[^ ]*]] = OpLoad {{%[^ ]*}} %colors
+// CHECK: [[c0_0:%[^ ]+]] = OpCompositeExtract %ColorRGBA [[c0]] 0
+// The entire bit container extracted for each bitfield.
+// CHECK: [[c0_0_0:%[^ ]*]] = OpCompositeExtract %uint [[c0_0]] 0
+// CHECK: [[c0_0_1:%[^ ]*]] = OpCompositeExtract %uint [[c0_0]] 0
+// CHECK: [[c0_0_2:%[^ ]*]] = OpCompositeExtract %uint [[c0_0]] 0
+// CHECK: [[c0_0_3:%[^ ]*]] = OpCompositeExtract %uint [[c0_0]] 0
+// CHECK: [[v0:%[^ ]*]] = OpCompositeConstruct %v2uint [[c0_0_0]] [[c0_0_1]]
+// CHECK: [[v1:%[^ ]*]] = OpCompositeConstruct %v2uint [[c0_0_2]] [[c0_0_3]]
+// CHECK: [[v:%[^ ]*]] = OpCompositeConstruct %Vectors_0 [[v0]] [[v1]]
+// CHECK: [[vbuf:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %vbuf [[I0]] [[U0]]
+// CHECK: [[v0:%[^ ]*]] = OpCompositeExtract %v2uint [[v]] 0
+// CHECK: [[v1:%[^ ]*]] = OpCompositeExtract %v2uint [[v]] 1
+// CHECK: [[v:%[^ ]*]] = OpCompositeConstruct %Vectors [[v0]] [[v1]]
+// CHECK: OpStore [[vbuf]] [[v]]
 }

--- a/tools/clang/test/CodeGenSPIRV/spirv.legal.cbuffer.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/spirv.legal.cbuffer.hlsl
@ -30,14 +30,24 @@ float4 main(in float4 pos : SV_Position) : SV_Target
 // Initializing a T with a ConstantBuffer<T> is a copy
 // CHECK:      [[val:%[0-9]+]] = OpLoad %type_ConstantBuffer_S %myCBuffer
 // CHECK-NEXT: [[vec:%[0-9]+]] = OpCompositeExtract %v4float [[val]] 0
+// CHECK-NEXT: [[e0:%[0-9]+]] = OpCompositeExtract %float [[vec]] 0
+// CHECK-NEXT: [[e1:%[0-9]+]] = OpCompositeExtract %float [[vec]] 1
+// CHECK-NEXT: [[e2:%[0-9]+]] = OpCompositeExtract %float [[vec]] 2
+// CHECK-NEXT: [[e3:%[0-9]+]] = OpCompositeExtract %float [[vec]] 3
+// CHECK-NEXT: [[vec:%[0-9]+]] = OpCompositeConstruct %v4float  [[e0]] [[e1]] [[e2]] [[e3]]
 // CHECK-NEXT: [[tmp:%[0-9]+]] = OpCompositeConstruct %S_0 [[vec]]
 // CHECK-NEXT:                OpStore %buffer1 [[tmp]]
    S buffer1 = myCBuffer;

 // Assigning a ConstantBuffer<T> to a T is a copy
 // CHECK:      [[val_0:%[0-9]+]] = OpLoad %type_ConstantBuffer_S %myCBuffer
-// CHECK-NEXT: [[vec_0:%[0-9]+]] = OpCompositeExtract %v4float [[val_0]] 0
-// CHECK-NEXT: [[tmp_0:%[0-9]+]] = OpCompositeConstruct %S_0 [[vec_0]]
+// CHECK-NEXT: [[vec:%[0-9]+]] = OpCompositeExtract %v4float [[val_0]] 0
+// CHECK-NEXT: [[e0:%[0-9]+]] = OpCompositeExtract %float [[vec]] 0
+// CHECK-NEXT: [[e1:%[0-9]+]] = OpCompositeExtract %float [[vec]] 1
+// CHECK-NEXT: [[e2:%[0-9]+]] = OpCompositeExtract %float [[vec]] 2
+// CHECK-NEXT: [[e3:%[0-9]+]] = OpCompositeExtract %float [[vec]] 3
+// CHECK-NEXT: [[vec:%[0-9]+]] = OpCompositeConstruct %v4float  [[e0]] [[e1]] [[e2]] [[e3]]
+// CHECK-NEXT: [[tmp_0:%[0-9]+]] = OpCompositeConstruct %S_0 [[vec]]
 // CHECK-NEXT:                OpStore %buffer2 [[tmp_0]]
    S buffer2;
    buffer2 = myCBuffer;
@ -51,8 +61,13 @@ float4 main(in float4 pos : SV_Position) : SV_Target
 // Write out each component recursively
 // CHECK:      [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Uniform_S %myASBuffer %uint_0 {{%[0-9]+}}
 // CHECK-NEXT: [[val_2:%[0-9]+]] = OpLoad %type_ConstantBuffer_S %myCBuffer
-// CHECK-NEXT: [[vec_1:%[0-9]+]] = OpCompositeExtract %v4float [[val_2]] 0
-// CHECK-NEXT: [[tmp_1:%[0-9]+]] = OpCompositeConstruct %S_0 [[vec_1]]
+// CHECK-NEXT: [[vec:%[0-9]+]] = OpCompositeExtract %v4float [[val_2]] 0
+// CHECK-NEXT: [[e0:%[0-9]+]] = OpCompositeExtract %float [[vec]] 0
+// CHECK-NEXT: [[e1:%[0-9]+]] = OpCompositeExtract %float [[vec]] 1
+// CHECK-NEXT: [[e2:%[0-9]+]] = OpCompositeExtract %float [[vec]] 2
+// CHECK-NEXT: [[e3:%[0-9]+]] = OpCompositeExtract %float [[vec]] 3
+// CHECK-NEXT: [[vec:%[0-9]+]] = OpCompositeConstruct %v4float  [[e0]] [[e1]] [[e2]] [[e3]]
+// CHECK-NEXT: [[tmp_1:%[0-9]+]] = OpCompositeConstruct %S_0 [[vec]]
 // CHECK-NEXT: [[vec_2:%[0-9]+]] = OpCompositeExtract %v4float [[tmp_1]] 0
 // CHECK-NEXT: [[tmp_2:%[0-9]+]] = OpCompositeConstruct %S [[vec_2]]
 // CHECK-NEXT:                OpStore [[ptr]] [[tmp_2]]
@ -60,8 +75,13 @@ float4 main(in float4 pos : SV_Position) : SV_Target

 // Passing a ConstantBuffer<T> to a T parameter is a copy
 // CHECK:      [[val_3:%[0-9]+]] = OpLoad %type_ConstantBuffer_S %myCBuffer
-// CHECK-NEXT: [[vec_3:%[0-9]+]] = OpCompositeExtract %v4float [[val_3]] 0
-// CHECK-NEXT: [[tmp_3:%[0-9]+]] = OpCompositeConstruct %S_0 [[vec_3]]
+// CHECK-NEXT: [[vec:%[0-9]+]] = OpCompositeExtract %v4float [[val_3]] 0
+// CHECK-NEXT: [[e0:%[0-9]+]] = OpCompositeExtract %float [[vec]] 0
+// CHECK-NEXT: [[e1:%[0-9]+]] = OpCompositeExtract %float [[vec]] 1
+// CHECK-NEXT: [[e2:%[0-9]+]] = OpCompositeExtract %float [[vec]] 2
+// CHECK-NEXT: [[e3:%[0-9]+]] = OpCompositeExtract %float [[vec]] 3
+// CHECK-NEXT: [[vec:%[0-9]+]] = OpCompositeConstruct %v4float  [[e0]] [[e1]] [[e2]] [[e3]]
+// CHECK-NEXT: [[tmp_3:%[0-9]+]] = OpCompositeConstruct %S_0 [[vec]]
 // CHECK-NEXT:                OpStore %param_var_buffer [[tmp_3]]
    return doStuff(myCBuffer);
 }
@ -69,8 +89,13 @@ float4 main(in float4 pos : SV_Position) : SV_Target
 S retStuff() {
 // Returning a ConstantBuffer<T> as a T is a copy
 // CHECK:      [[val_4:%[0-9]+]] = OpLoad %type_ConstantBuffer_S %myCBuffer
-// CHECK-NEXT: [[vec_4:%[0-9]+]] = OpCompositeExtract %v4float [[val_4]] 0
-// CHECK-NEXT: [[ret:%[0-9]+]] = OpCompositeConstruct %S_0 [[vec_4]]
+// CHECK-NEXT: [[vec:%[0-9]+]] = OpCompositeExtract %v4float [[val_4]] 0
+// CHECK-NEXT: [[e0:%[0-9]+]] = OpCompositeExtract %float [[vec]] 0
+// CHECK-NEXT: [[e1:%[0-9]+]] = OpCompositeExtract %float [[vec]] 1
+// CHECK-NEXT: [[e2:%[0-9]+]] = OpCompositeExtract %float [[vec]] 2
+// CHECK-NEXT: [[e3:%[0-9]+]] = OpCompositeExtract %float [[vec]] 3
+// CHECK-NEXT: [[vec:%[0-9]+]] = OpCompositeConstruct %v4float  [[e0]] [[e1]] [[e2]] [[e3]]
+// CHECK-NEXT: [[ret:%[0-9]+]] = OpCompositeConstruct %S_0 [[vec]]
 // CHECK-NEXT:                OpReturnValue [[ret]]
    return myCBuffer;
 }
--- a/tools/clang/test/CodeGenSPIRV/spirv.legal.tbuffer.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/spirv.legal.tbuffer.hlsl
@ -34,14 +34,24 @@ float4 main(in float4 pos : SV_Position) : SV_Target
 // Initializing a T with a TextureBuffer<T> is a copy
 // CHECK:      [[val:%[0-9]+]] = OpLoad %type_TextureBuffer_S %myTBuffer
 // CHECK-NEXT: [[vec:%[0-9]+]] = OpCompositeExtract %v4float [[val]] 0
-// CHECK-NEXT: [[tmp:%[0-9]+]] = OpCompositeConstruct %S_0 [[vec]]
+// CHECK-NEXT: [[e0:%[0-9]+]] = OpCompositeExtract %float [[vec]] 0
+// CHECK-NEXT: [[e1:%[0-9]+]] = OpCompositeExtract %float [[vec]] 1
+// CHECK-NEXT: [[e2:%[0-9]+]] = OpCompositeExtract %float [[vec]] 2
+// CHECK-NEXT: [[e3:%[0-9]+]] = OpCompositeExtract %float [[vec]] 3
+// CHECK-NEXT: [[vec:%[0-9]+]] = OpCompositeConstruct %v4float  [[e0]] [[e1]] [[e2]] [[e3]]
+// CHECK: [[tmp:%[0-9]+]] = OpCompositeConstruct %S_0 [[vec]]
 // CHECK-NEXT:                OpStore %buffer1 [[tmp]]
    S buffer1 = myTBuffer;

 // Assigning a TextureBuffer<T> to a T is a copy
 // CHECK:      [[val_0:%[0-9]+]] = OpLoad %type_TextureBuffer_S %myTBuffer
-// CHECK-NEXT: [[vec_0:%[0-9]+]] = OpCompositeExtract %v4float [[val_0]] 0
-// CHECK-NEXT: [[tmp_0:%[0-9]+]] = OpCompositeConstruct %S_0 [[vec_0]]
+// CHECK-NEXT: [[vec:%[0-9]+]] = OpCompositeExtract %v4float [[val_0]] 0
+// CHECK-NEXT: [[e0:%[0-9]+]] = OpCompositeExtract %float [[vec]] 0
+// CHECK-NEXT: [[e1:%[0-9]+]] = OpCompositeExtract %float [[vec]] 1
+// CHECK-NEXT: [[e2:%[0-9]+]] = OpCompositeExtract %float [[vec]] 2
+// CHECK-NEXT: [[e3:%[0-9]+]] = OpCompositeExtract %float [[vec]] 3
+// CHECK-NEXT: [[vec:%[0-9]+]] = OpCompositeConstruct %v4float  [[e0]] [[e1]] [[e2]] [[e3]]
+// CHECK-NEXT: [[tmp_0:%[0-9]+]] = OpCompositeConstruct %S_0 [[vec]]
 // CHECK-NEXT:                OpStore %buffer2 [[tmp_0]]
    S buffer2;
    buffer2 = myTBuffer;
@ -57,6 +67,11 @@ float4 main(in float4 pos : SV_Position) : SV_Target
 // CHECK:      [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Uniform_S %myASBuffer %uint_0 {{%[0-9]+}}
 // CHECK-NEXT:  [[tb:%[0-9]+]] = OpLoad %type_TextureBuffer_S %myTBuffer
 // CHECK-NEXT: [[vec_1:%[0-9]+]] = OpCompositeExtract %v4float [[tb]] 0
+// CHECK-NEXT: [[e0:%[0-9]+]] = OpCompositeExtract %float [[vec_1]] 0
+// CHECK-NEXT: [[e1:%[0-9]+]] = OpCompositeExtract %float [[vec_1]] 1
+// CHECK-NEXT: [[e2:%[0-9]+]] = OpCompositeExtract %float [[vec_1]] 2
+// CHECK-NEXT: [[e3:%[0-9]+]] = OpCompositeExtract %float [[vec_1]] 3
+// CHECK-NEXT: [[vec_1:%[0-9]+]] = OpCompositeConstruct %v4float  [[e0]] [[e1]] [[e2]] [[e3]]
 // CHECK-NEXT: [[loc:%[0-9]+]] = OpCompositeConstruct %S_0 [[vec_1]]
 // CHECK-NEXT: [[vec_2:%[0-9]+]] = OpCompositeExtract %v4float [[loc]] 0
 // CHECK-NEXT: [[val_2:%[0-9]+]] = OpCompositeConstruct %S [[vec_2]]
@ -65,8 +80,13 @@ float4 main(in float4 pos : SV_Position) : SV_Target

 // Passing a TextureBuffer<T> to a T parameter is a copy
 // CHECK:      [[val_3:%[0-9]+]] = OpLoad %type_TextureBuffer_S %myTBuffer
-// CHECK-NEXT: [[vec_3:%[0-9]+]] = OpCompositeExtract %v4float [[val_3]] 0
-// CHECK-NEXT: [[tmp_1:%[0-9]+]] = OpCompositeConstruct %S_0 [[vec_3]]
+// CHECK-NEXT: [[vec:%[0-9]+]] = OpCompositeExtract %v4float [[val_3]] 0
+// CHECK-NEXT: [[e0:%[0-9]+]] = OpCompositeExtract %float [[vec]] 0
+// CHECK-NEXT: [[e1:%[0-9]+]] = OpCompositeExtract %float [[vec]] 1
+// CHECK-NEXT: [[e2:%[0-9]+]] = OpCompositeExtract %float [[vec]] 2
+// CHECK-NEXT: [[e3:%[0-9]+]] = OpCompositeExtract %float [[vec]] 3
+// CHECK-NEXT: [[vec:%[0-9]+]] = OpCompositeConstruct %v4float  [[e0]] [[e1]] [[e2]] [[e3]]
+// CHECK-NEXT: [[tmp_1:%[0-9]+]] = OpCompositeConstruct %S_0 [[vec]]
 // CHECK-NEXT:                OpStore %param_var_buffer [[tmp_1]]
    return doStuff(myTBuffer);
 }
@ -74,8 +94,13 @@ float4 main(in float4 pos : SV_Position) : SV_Target
 S retStuff() {
 // Returning a TextureBuffer<T> as a T is a copy
 // CHECK:      [[val_4:%[0-9]+]] = OpLoad %type_TextureBuffer_S %myTBuffer
-// CHECK-NEXT: [[vec_4:%[0-9]+]] = OpCompositeExtract %v4float [[val_4]] 0
-// CHECK-NEXT: [[ret:%[0-9]+]] = OpCompositeConstruct %S_0 [[vec_4]]
+// CHECK-NEXT: [[vec:%[0-9]+]] = OpCompositeExtract %v4float [[val_4]] 0
+// CHECK-NEXT: [[e0:%[0-9]+]] = OpCompositeExtract %float [[vec]] 0
+// CHECK-NEXT: [[e1:%[0-9]+]] = OpCompositeExtract %float [[vec]] 1
+// CHECK-NEXT: [[e2:%[0-9]+]] = OpCompositeExtract %float [[vec]] 2
+// CHECK-NEXT: [[e3:%[0-9]+]] = OpCompositeExtract %float [[vec]] 3
+// CHECK-NEXT: [[vec:%[0-9]+]] = OpCompositeConstruct %v4float  [[e0]] [[e1]] [[e2]] [[e3]]
+// CHECK-NEXT: [[ret:%[0-9]+]] = OpCompositeConstruct %S_0 [[vec]]
 // CHECK-NEXT:                OpReturnValue [[ret]]
    return myTBuffer;
 }
--- a/tools/clang/test/CodeGenSPIRV/type.constant-buffer.bindless.array.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/type.constant-buffer.bindless.array.hlsl
@ -26,6 +26,27 @@ float4 main(in VSInput input) : SV_POSITION {
 // CHECK:        [[ptr:%[a-zA-Z0-9_]+]] = OpAccessChain [[ptr_type_CB_PerDraw]] %PerDraws
 // CHECK: [[cb_PerDraw:%[a-zA-Z0-9_]+]] = OpLoad [[type_CB_PerDraw]] [[ptr]]
 // CHECK:   [[float4x4:%[a-zA-Z0-9_]+]] = OpCompositeExtract %mat4v4float [[cb_PerDraw]] 0
+// CHECK: [[f_0_0:%[0-9]+]] = OpCompositeExtract %float [[float4x4]] 0 0
+// CHECK: [[f_0_1:%[0-9]+]] = OpCompositeExtract %float [[float4x4]] 0 1
+// CHECK: [[f_0_2:%[0-9]+]] = OpCompositeExtract %float [[float4x4]] 0 2
+// CHECK: [[f_0_3:%[0-9]+]] = OpCompositeExtract %float [[float4x4]] 0 3
+// CHECK: [[f_1_0:%[0-9]+]] = OpCompositeExtract %float [[float4x4]] 1 0
+// CHECK: [[f_1_1:%[0-9]+]] = OpCompositeExtract %float [[float4x4]] 1 1
+// CHECK: [[f_1_2:%[0-9]+]] = OpCompositeExtract %float [[float4x4]] 1 2
+// CHECK: [[f_1_3:%[0-9]+]] = OpCompositeExtract %float [[float4x4]] 1 3
+// CHECK: [[f_2_0:%[0-9]+]] = OpCompositeExtract %float [[float4x4]] 2 0
+// CHECK: [[f_2_1:%[0-9]+]] = OpCompositeExtract %float [[float4x4]] 2 1
+// CHECK: [[f_2_2:%[0-9]+]] = OpCompositeExtract %float [[float4x4]] 2 2
+// CHECK: [[f_2_3:%[0-9]+]] = OpCompositeExtract %float [[float4x4]] 2 3
+// CHECK: [[f_3_0:%[0-9]+]] = OpCompositeExtract %float [[float4x4]] 3 0
+// CHECK: [[f_3_1:%[0-9]+]] = OpCompositeExtract %float [[float4x4]] 3 1
+// CHECK: [[f_3_2:%[0-9]+]] = OpCompositeExtract %float [[float4x4]] 3 2
+// CHECK: [[f_3_3:%[0-9]+]] = OpCompositeExtract %float [[float4x4]] 3 3
+// CHECK: [[r0:%[0-9]+]] = OpCompositeConstruct %v4float [[f_0_0]] [[f_0_1]] [[f_0_2]] [[f_0_3]]
+// CHECK: [[r1:%[0-9]+]] = OpCompositeConstruct %v4float [[f_1_0]] [[f_1_1]] [[f_1_2]] [[f_1_3]]
+// CHECK: [[r2:%[0-9]+]] = OpCompositeConstruct %v4float [[f_2_0]] [[f_2_1]] [[f_2_2]] [[f_2_3]]
+// CHECK: [[r3:%[0-9]+]] = OpCompositeConstruct %v4float [[f_3_0]] [[f_3_1]] [[f_3_2]] [[f_3_3]]
+// CHECK: [[float4x4:%[0-9]+]] = OpCompositeConstruct %mat4v4float [[r0]] [[r1]] [[r2]] [[r3]]
 // CHECK:                       OpCompositeConstruct [[type_PerDraw]] [[float4x4]]
  const PerDraw perDraw = PerDraws[input.DrawIdx];
  return mul(float4(input.Position, 1.0f), perDraw.Transform);