[spirv] Add initial support for non-fp matrices (#1057)

This is the support for non-floating-point matrices, which are emulated using arrays of vectors because SPIR-V does not allow non-floating-point matrices. * Initial support for non-fp matrix * Add support for non-fp matrix in all() * Conversion of float matrix to int matrix * support for modf returning an int matrix * Add tests for non-fp matrix access * Mixed arithmetic for non-fp matrices * Support non-fp matrix in flat conversion * Non-fp matrix in asint/asuint/asfloat * Mul of non-fp Matrix with Vector/Scalar * Add tests for non-fp matrix cast TODO: Layout decoration of non-fp matrices TODO: Majorness of non-fp matrices
2018-02-12 15:33:41 -05:00 · 2018-02-12 15:33:41 -05:00 · 774e85eb20
--- a/docs/SPIR-V.rst
+++ b/docs/SPIR-V.rst
@ -365,7 +365,9 @@ are translated into:
 ``|type|1x1``                        The scalar type for ``|type|``
 ==================================== ====================================================

-A MxN HLSL matrix is translated into a SPIR-V matrix with M vectors, each with
+The above table is for float matrices.
+
+A MxN HLSL float matrix is translated into a SPIR-V matrix with M vectors, each with
 N elements. Conceptually HLSL matrices are row-major while SPIR-V matrices are
 column-major, thus all HLSL matrices are represented by their transposes.
 Doing so may require special handling of certain matrix operations:
@ -384,6 +386,10 @@ Doing so may require special handling of certain matrix operations:

 See `Appendix A. Matrix Representation`_ for further explanation regarding these design choices.

+Since the ``Shader`` capability in SPIR-V does not allow to parameterize matrix
+types with non-floating-point types, a non-floating-point MxN matrix is translated
+into an array with M elements, with each element being a vector with N elements.
+
 Structs
 -------

--- a/tools/clang/include/clang/SPIRV/ModuleBuilder.h
+++ b/tools/clang/include/clang/SPIRV/ModuleBuilder.h
@ -384,7 +384,7 @@ public:
  uint32_t getFloat32Type();
  uint32_t getFloat64Type();
  uint32_t getVecType(uint32_t elemType, uint32_t elemCount);
-  uint32_t getMatType(uint32_t colType, uint32_t colCount);
+  uint32_t getMatType(QualType elemType, uint32_t colType, uint32_t colCount);
  uint32_t getPointerType(uint32_t pointeeType, spv::StorageClass);
  uint32_t getStructType(llvm::ArrayRef<uint32_t> fieldTypes,
                         llvm::StringRef structName = "",
--- a/tools/clang/lib/SPIRV/InitListHandler.cpp
+++ b/tools/clang/lib/SPIRV/InitListHandler.cpp
@ -199,11 +199,7 @@ uint32_t InitListHandler::createInitForType(QualType type,
                                   hlsl::GetHLSLVecSize(type), srcLoc);

  if (hlsl::IsHLSLMatType(type)) {
-    uint32_t rowCount = 0, colCount = 0;
-    hlsl::GetHLSLMatRowColCount(type, rowCount, colCount);
-    const QualType elemType = hlsl::GetHLSLMatElementType(type);
-
-    return createInitForMatrixType(elemType, rowCount, colCount, srcLoc);
+    return createInitForMatrixType(type, srcLoc);
  }

  // Samplers, (RW)Buffers, (RW)Textures
@ -298,10 +294,12 @@ uint32_t InitListHandler::createInitForVectorType(QualType elemType,
  return theBuilder.createCompositeConstruct(vecType, elements);
 }

-uint32_t InitListHandler::createInitForMatrixType(QualType elemType,
-                                                  uint32_t rowCount,
-                                                  uint32_t colCount,
+uint32_t InitListHandler::createInitForMatrixType(QualType matrixType,
                                                  SourceLocation srcLoc) {
+  uint32_t rowCount = 0, colCount = 0;
+  hlsl::GetHLSLMatRowColCount(matrixType, rowCount, colCount);
+  const QualType elemType = hlsl::GetHLSLMatElementType(matrixType);
+
  // Same as the vector case, first try to see if we already have a matrix at
  // the beginning of the initializer queue.
  if (scalars.empty()) {
@ -336,12 +334,9 @@ uint32_t InitListHandler::createInitForMatrixType(QualType elemType,
    vectors.push_back(createInitForVectorType(elemType, colCount, srcLoc));
  }

-  const uint32_t elemTypeId = typeTranslator.translateType(elemType);
-  const uint32_t vecType = theBuilder.getVecType(elemTypeId, colCount);
-  const uint32_t matType = theBuilder.getMatType(vecType, rowCount);
-
  // TODO: use OpConstantComposite when all components are constants
-  return theBuilder.createCompositeConstruct(matType, vectors);
+  return theBuilder.createCompositeConstruct(
+      typeTranslator.translateType(matrixType), vectors);
 }

 uint32_t InitListHandler::createInitForStructType(QualType type) {
--- a/tools/clang/lib/SPIRV/InitListHandler.h
+++ b/tools/clang/lib/SPIRV/InitListHandler.h
@ -121,8 +121,7 @@ private:
  uint32_t createInitForBuiltinType(QualType type, SourceLocation);
  uint32_t createInitForVectorType(QualType elemType, uint32_t count,
                                   SourceLocation);
-  uint32_t createInitForMatrixType(QualType elemType, uint32_t rowCount,
-                                   uint32_t colCount, SourceLocation);
+  uint32_t createInitForMatrixType(QualType matrixType, SourceLocation);
  uint32_t createInitForStructType(QualType type);
  uint32_t createInitForConstantArrayType(QualType type, SourceLocation);
  uint32_t createInitForSamplerImageType(QualType type, SourceLocation);
--- a/tools/clang/lib/SPIRV/ModuleBuilder.cpp
+++ b/tools/clang/lib/SPIRV/ModuleBuilder.cpp
@ -880,7 +880,17 @@ uint32_t ModuleBuilder::getVecType(uint32_t elemType, uint32_t elemCount) {
  return typeId;
 }

-uint32_t ModuleBuilder::getMatType(uint32_t colType, uint32_t colCount) {
+uint32_t ModuleBuilder::getMatType(QualType elemType, uint32_t colType,
+                                   uint32_t colCount) {
+  // NOTE: According to Item "Data rules" of SPIR-V Spec 2.16.1 "Universal
+  // Validation Rules":
+  //   Matrix types can only be parameterized with floating-point types.
+  //
+  // So we need special handling of non-fp matrices. We emulate non-fp
+  // matrices as an array of vectors.
+  if (!elemType->isFloatingType())
+    return getArrayType(colType, getConstantUint32(colCount));
+
  const Type *type = Type::getMatrix(theContext, colType, colCount);
  const uint32_t typeId = theContext.getResultIdForType(type);
  theModule.addType(type, typeId);
--- a/tools/clang/lib/SPIRV/SPIRVEmitter.cpp
+++ b/tools/clang/lib/SPIRV/SPIRVEmitter.cpp
@ -2022,7 +2022,7 @@ SpirvEvalInfo SPIRVEmitter::doCastExpr(const CastExpr *expr) {
        theBuilder.createVectorShuffle(vec2Type, vec, vec, {2, 3});

    const auto mat = theBuilder.createCompositeConstruct(
-        theBuilder.getMatType(vec2Type, 2), {subVec1, subVec2});
+        theBuilder.getMatType(elemType, vec2Type, 2), {subVec1, subVec2});

    return SpirvEvalInfo(mat).setRValue();
  }
@ -2250,11 +2250,6 @@ uint32_t SPIRVEmitter::processFlatConversion(const QualType type,
    QualType elemType = {};
    uint32_t rowCount = 0, colCount = 0;
    if (TypeTranslator::isMxNMatrix(type, &elemType, &rowCount, &colCount)) {
-      if (!elemType->isFloatingType()) {
-        emitError("non-floating-point matrix type unimplemented", {});
-        return 0;
-      }
-
      // By default HLSL matrices are row major, while SPIR-V matrices are
      // column major. We are mapping what HLSL semantically mean a row into a
      // column here.
@ -4293,7 +4288,7 @@ SpirvEvalInfo SPIRVEmitter::doUnaryOperator(const UnaryOperator *expr) {
                             ? getMatElemValueOne(subType)
                             : getValueOne(subType);
    uint32_t incValue = 0;
-    if (TypeTranslator::isSpirvAcceptableMatrixType(subType)) {
+    if (TypeTranslator::isMxNMatrix(subType)) {
      // For matrices, we can only increment/decrement each vector of it.
      const auto actOnEachVec = [this, spvOp, one](uint32_t /*index*/,
                                                   uint32_t vecType,
@ -4601,7 +4596,7 @@ SpirvEvalInfo SPIRVEmitter::processBinaryOp(const Expr *lhs, const Expr *rhs,
  // onto each element vector iff the operands are not degenerated matrices
  // and we don't have a matrix specific SPIR-V instruction for the operation.
  if (!isSpirvMatrixOp(mandateGenOpcode) &&
-      TypeTranslator::isSpirvAcceptableMatrixType(lhs->getType())) {
+      TypeTranslator::isMxNMatrix(lhs->getType())) {
    return processMatrixBinaryOp(lhs, rhs, opcode, sourceRange);
  }

@ -5253,7 +5248,7 @@ SpirvEvalInfo SPIRVEmitter::processEachVectorInMatrix(
    llvm::function_ref<uint32_t(uint32_t, uint32_t, uint32_t)>
        actOnEachVector) {
  const auto matType = matrix->getType();
-  assert(TypeTranslator::isSpirvAcceptableMatrixType(matType));
+  assert(TypeTranslator::isMxNMatrix(matType));
  const uint32_t vecType = typeTranslator.getComponentVectorType(matType);

  uint32_t rowCount = 0, colCount = 0;
@ -5344,7 +5339,7 @@ SPIRVEmitter::processMatrixBinaryOp(const Expr *lhs, const Expr *rhs,
                                    SourceRange range) {
  // TODO: some code are duplicated from processBinaryOp. Try to unify them.
  const auto lhsType = lhs->getType();
-  assert(TypeTranslator::isSpirvAcceptableMatrixType(lhsType));
+  assert(TypeTranslator::isMxNMatrix(lhsType));
  const spv::Op spvOp = translateOp(opcode, lhsType);

  uint32_t rhsVal, lhsPtr, lhsVal;
@ -5515,11 +5510,32 @@ uint32_t SPIRVEmitter::castToBool(const uint32_t fromVal, QualType fromType,
  if (TypeTranslator::isSameScalarOrVecType(fromType, toBoolType))
    return fromVal;

+  const uint32_t boolType = typeTranslator.translateType(toBoolType);
+
+  { // Special case handling for converting to a matrix of booleans.
+    QualType elemType = {};
+    uint32_t rowCount = 0, colCount = 0;
+    if (TypeTranslator::isMxNMatrix(fromType, &elemType, &rowCount,
+                                    &colCount)) {
+      const auto fromRowQualType =
+          astContext.getExtVectorType(elemType, colCount);
+      const auto fromRowQualTypeId =
+          typeTranslator.translateType(fromRowQualType);
+      const auto toBoolRowQualType =
+          astContext.getExtVectorType(astContext.BoolTy, colCount);
+      llvm::SmallVector<uint32_t, 4> rows;
+      for (uint32_t i = 0; i < rowCount; ++i) {
+        const auto row =
+            theBuilder.createCompositeExtract(fromRowQualTypeId, fromVal, {i});
+        rows.push_back(castToBool(row, fromRowQualType, toBoolRowQualType));
+      }
+      return theBuilder.createCompositeConstruct(boolType, rows);
+    }
+  }
+
  // Converting to bool means comparing with value zero.
  const spv::Op spvOp = translateOp(BO_NE, fromType);
-  const uint32_t boolType = typeTranslator.translateType(toBoolType);
  const uint32_t zeroVal = getValueZero(fromType);
-
  return theBuilder.createBinaryOp(spvOp, boolType, fromVal, zeroVal);
 }

@ -5549,8 +5565,38 @@ uint32_t SPIRVEmitter::castToInt(const uint32_t fromVal, QualType fromType,
    } else {
      emitError("casting from floating point to integer unimplemented", srcLoc);
    }
-  } else {
-    emitError("casting to integer unimplemented", srcLoc);
+  }
+
+  {
+    QualType elemType = {};
+    uint32_t numRows = 0, numCols = 0;
+    if (TypeTranslator::isMxNMatrix(fromType, &elemType, &numRows, &numCols)) {
+      // The source matrix and the target matrix must have the same dimensions.
+      QualType toElemType = {};
+      uint32_t toNumRows = 0, toNumCols = 0;
+      assert(TypeTranslator::isMxNMatrix(toIntType, &toElemType, &toNumRows,
+                                         &toNumCols) &&
+             numRows == toNumRows && numCols == toNumCols);
+      (void)toElemType;
+      (void)toNumRows;
+      (void)toNumCols;
+
+      // Casting to a matrix of integers: Cast each row and construct a
+      // composite.
+      llvm::SmallVector<uint32_t, 4> castedRows;
+      const uint32_t vecType = typeTranslator.getComponentVectorType(fromType);
+      const auto fromVecQualType =
+          astContext.getExtVectorType(elemType, numCols);
+      const auto toIntVecQualType =
+          astContext.getExtVectorType(toElemType, numCols);
+      for (uint32_t row = 0; row < numRows; ++row) {
+        const auto rowId =
+            theBuilder.createCompositeExtract(vecType, fromVal, {row});
+        castedRows.push_back(
+            castToInt(rowId, fromVecQualType, toIntVecQualType, srcLoc));
+      }
+      return theBuilder.createCompositeConstruct(intType, castedRows);
+    }
  }

  return 0;
@ -5582,6 +5628,39 @@ uint32_t SPIRVEmitter::castToFloat(const uint32_t fromVal, QualType fromType,
    return theBuilder.createUnaryOp(spv::Op::OpFConvert, floatType, fromVal);
  }

+  // Casting matrix types
+  {
+    QualType elemType = {};
+    uint32_t numRows = 0, numCols = 0;
+    if (TypeTranslator::isMxNMatrix(fromType, &elemType, &numRows, &numCols)) {
+      // The source matrix and the target matrix must have the same dimensions.
+      QualType toElemType = {};
+      uint32_t toNumRows = 0, toNumCols = 0;
+      assert(TypeTranslator::isMxNMatrix(toFloatType, &toElemType, &toNumRows,
+                                         &toNumCols) &&
+             numRows == toNumRows && numCols == toNumCols);
+      (void)toElemType;
+      (void)toNumRows;
+      (void)toNumCols;
+
+      // Casting to a matrix of floats: Cast each row and construct a
+      // composite.
+      llvm::SmallVector<uint32_t, 4> castedRows;
+      const uint32_t vecType = typeTranslator.getComponentVectorType(fromType);
+      const auto fromVecQualType =
+          astContext.getExtVectorType(elemType, numCols);
+      const auto toIntVecQualType =
+          astContext.getExtVectorType(toElemType, numCols);
+      for (uint32_t row = 0; row < numRows; ++row) {
+        const auto rowId =
+            theBuilder.createCompositeExtract(vecType, fromVal, {row});
+        castedRows.push_back(
+            castToFloat(rowId, fromVecQualType, toIntVecQualType, srcLoc));
+      }
+      return theBuilder.createCompositeConstruct(floatType, castedRows);
+    }
+  }
+
  emitError("casting to floating point unimplemented", srcLoc);
  return 0;
 }
@ -5726,7 +5805,9 @@ SpirvEvalInfo SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
    retVal =
        theBuilder.createImageSparseTexelsResident(doExpr(callExpr->getArg(0)));
    break;
+
  case hlsl::IntrinsicOp::IOP_mul:
+  case hlsl::IntrinsicOp::IOP_umul:
    retVal = processIntrinsicMul(callExpr);
    break;
  case hlsl::IntrinsicOp::IOP_all:
@ -5806,7 +5887,17 @@ SpirvEvalInfo SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
        << callee->getName();
    return 0;
  }
-    INTRINSIC_SPIRV_OP_CASE(transpose, Transpose, false);
+  case hlsl::IntrinsicOp::IOP_transpose: {
+    const Expr *mat = callExpr->getArg(0);
+    const QualType matType = mat->getType();
+    if (hlsl::GetHLSLMatElementType(matType)->isFloatingType())
+      retVal =
+          processIntrinsicUsingSpirvInst(callExpr, spv::Op::OpTranspose, false);
+    else
+      retVal = processNonFpMatrixTranspose(matType, doExpr(mat));
+
+    break;
+  }
    INTRINSIC_SPIRV_OP_CASE(ddx, DPdx, true);
    INTRINSIC_SPIRV_OP_WITH_CAP_CASE(ddx_coarse, DPdxCoarse, false,
                                     spv::Capability::DerivativeControl);
@ -6189,14 +6280,6 @@ uint32_t SPIRVEmitter::processIntrinsicModf(const CallExpr *callExpr) {
  const uint32_t argId = doExpr(arg);
  const uint32_t ipId = doExpr(ipArg);

-  // TODO: We currently do not support non-float matrices.
-  QualType ipElemType = {};
-  if (TypeTranslator::isMxNMatrix(ipType, &ipElemType) &&
-      !ipElemType->isFloatingType()) {
-    emitError("non-floating-point matrix type unimplemented", {});
-    return 0;
-  }
-
  // For scalar and vector argument types.
  {
    if (TypeTranslator::isScalarType(argType) ||
@ -6235,12 +6318,20 @@ uint32_t SPIRVEmitter::processIntrinsicModf(const CallExpr *callExpr) {
            modfStructTypeId, glslInstSetId, GLSLstd450::GLSLstd450ModfStruct,
            {curRow});
        auto ip = theBuilder.createCompositeExtract(colTypeId, modf, {1});
+
        ips.push_back(ip);
        fracs.push_back(
            theBuilder.createCompositeExtract(colTypeId, modf, {0}));
      }
-      theBuilder.createStore(
-          ipId, theBuilder.createCompositeConstruct(returnTypeId, ips));
+
+      uint32_t ip = theBuilder.createCompositeConstruct(
+          typeTranslator.translateType(argType), ips);
+      // If the 'ip' is not a float type, the AST will not contain a CastExpr
+      // because this is internal to the intrinsic function. So, in such a
+      // case we need to cast manually.
+      if (!hlsl::GetHLSLMatElementType(ipType)->isFloatingType())
+        ip = castToInt(ip, argType, ipType, ipArg->getExprLoc());
+      theBuilder.createStore(ipId, ip);
      return theBuilder.createCompositeConstruct(returnTypeId, fracs);
    }
  }
@ -6532,7 +6623,7 @@ uint32_t SPIRVEmitter::processIntrinsicClamp(const CallExpr *callExpr) {

  // FClamp, UClamp, and SClamp do not operate on matrices, so we should perform
  // the operation on each vector of the matrix.
-  if (TypeTranslator::isSpirvAcceptableMatrixType(argX->getType())) {
+  if (TypeTranslator::isMxNMatrix(argX->getType())) {
    const auto actOnEachVec = [this, glslInstSetId, glslOpcode, argMinId,
                               argMaxId](uint32_t index, uint32_t vecType,
                                         uint32_t curRowId) {
@ -6617,6 +6708,209 @@ uint32_t SPIRVEmitter::processIntrinsicMemoryBarrier(const CallExpr *callExpr,
  return 0;
 }

+uint32_t SPIRVEmitter::processNonFpMatrixTranspose(QualType matType,
+                                                   uint32_t matId) {
+  // Simplest way is to flatten the matrix construct a new matrix from the
+  // flattened elements. (for a mat4x4).
+  QualType elemType = {};
+  uint32_t numRows = 0, numCols = 0;
+  const bool isMat =
+      TypeTranslator::isMxNMatrix(matType, &elemType, &numRows, &numCols);
+  assert(isMat && !elemType->isFloatingType());
+
+  const auto rowQualType = astContext.getExtVectorType(elemType, numCols);
+  const auto colQualType = astContext.getExtVectorType(elemType, numRows);
+  const uint32_t rowTypeId = typeTranslator.translateType(rowQualType);
+  const uint32_t colTypeId = typeTranslator.translateType(colQualType);
+  const uint32_t elemTypeId = typeTranslator.translateType(elemType);
+
+  // You cannot perform a composite construct of an array using a few vectors.
+  // The number of constutients passed to OpCompositeConstruct must be equal to
+  // the number of array elements.
+  llvm::SmallVector<uint32_t, 4> elems;
+  for (uint32_t i = 0; i < numRows; ++i)
+    for (uint32_t j = 0; j < numCols; ++j)
+      elems.push_back(
+          theBuilder.createCompositeExtract(elemTypeId, matId, {i, j}));
+
+  llvm::SmallVector<uint32_t, 4> cols;
+  for (uint32_t i = 0; i < numCols; ++i) {
+    // The elements in the ith vector of the "transposed" array are at offset i,
+    // i + <original-vector-size>, ...
+    llvm::SmallVector<uint32_t, 4> indexes;
+    for (uint32_t j = 0; j < numRows; ++j)
+      indexes.push_back(elems[i + (j * numCols)]);
+
+    cols.push_back(theBuilder.createCompositeConstruct(colTypeId, indexes));
+  }
+
+  const auto transposeTypeId =
+      theBuilder.getArrayType(colTypeId, theBuilder.getConstantUint32(numCols));
+  return theBuilder.createCompositeConstruct(transposeTypeId, cols);
+}
+
+uint32_t SPIRVEmitter::processNonFpDot(uint32_t vec1Id, uint32_t vec2Id,
+                                       uint32_t vecSize, QualType elemType) {
+  const auto elemTypeId = typeTranslator.translateType(elemType);
+  llvm::SmallVector<uint32_t, 4> muls;
+  for (uint32_t i = 0; i < vecSize; ++i) {
+    const auto elem1 =
+        theBuilder.createCompositeExtract(elemTypeId, vec1Id, {i});
+    const auto elem2 =
+        theBuilder.createCompositeExtract(elemTypeId, vec2Id, {i});
+    muls.push_back(theBuilder.createBinaryOp(translateOp(BO_Mul, elemType),
+                                             elemTypeId, elem1, elem2));
+  }
+  uint32_t sum = muls[0];
+  for (uint32_t i = 1; i < vecSize; ++i) {
+    sum = theBuilder.createBinaryOp(translateOp(BO_Add, elemType), elemTypeId,
+                                    sum, muls[i]);
+  }
+  return sum;
+}
+
+uint32_t SPIRVEmitter::processNonFpScalarTimesMatrix(QualType scalarType,
+                                                     uint32_t scalarId,
+                                                     QualType matrixType,
+                                                     uint32_t matrixId) {
+  assert(TypeTranslator::isScalarType(scalarType));
+  QualType elemType = {};
+  uint32_t numRows = 0, numCols = 0;
+  const bool isMat =
+      TypeTranslator::isMxNMatrix(matrixType, &elemType, &numRows, &numCols);
+  assert(isMat);
+  assert(typeTranslator.isSameType(scalarType, elemType));
+
+  // We need to multiply the scalar by each vector of the matrix.
+  // The front-end guarantees that the scalar and matrix element type are
+  // the same. For example, if the scalar is a float, the matrix is casted
+  // to a float matrix before being passed to mul(). It is also guaranteed
+  // that types such as bool are casted to float or int before being
+  // passed to mul().
+  const auto rowType = astContext.getExtVectorType(elemType, numCols);
+  const auto rowTypeId = typeTranslator.translateType(rowType);
+  llvm::SmallVector<uint32_t, 4> splat(size_t(numCols), scalarId);
+  const auto scalarSplat =
+      theBuilder.createCompositeConstruct(rowTypeId, splat);
+  llvm::SmallVector<uint32_t, 4> mulRows;
+  for (uint32_t row = 0; row < numRows; ++row) {
+    const auto rowId =
+        theBuilder.createCompositeExtract(rowTypeId, matrixId, {row});
+    mulRows.push_back(theBuilder.createBinaryOp(translateOp(BO_Mul, scalarType),
+                                                rowTypeId, rowId, scalarSplat));
+  }
+  return theBuilder.createCompositeConstruct(
+      typeTranslator.translateType(matrixType), mulRows);
+}
+
+uint32_t SPIRVEmitter::processNonFpVectorTimesMatrix(QualType vecType,
+                                                     uint32_t vecId,
+                                                     QualType matType,
+                                                     uint32_t matId,
+                                                     uint32_t matTransposeId) {
+  // This function assumes that the vector element type and matrix elemet type
+  // are the same.
+  QualType vecElemType = {}, matElemType = {};
+  uint32_t vecSize = 0, numRows = 0, numCols = 0;
+  const bool isVec =
+      TypeTranslator::isVectorType(vecType, &vecElemType, &vecSize);
+  const bool isMat =
+      TypeTranslator::isMxNMatrix(matType, &matElemType, &numRows, &numCols);
+  assert(typeTranslator.isSameType(vecElemType, matElemType));
+  assert(isVec);
+  assert(isMat);
+  assert(vecSize == numRows);
+
+  // When processing vector times matrix, the vector is a row vector, and it
+  // should be multiplied by the matrix *columns*. The most efficient way to
+  // handle this in SPIR-V would be to first transpose the matrix, and then use
+  // OpAccessChain.
+  if (!matTransposeId)
+    matTransposeId = processNonFpMatrixTranspose(matType, matId);
+
+  const auto vecTypeId = typeTranslator.translateType(vecType);
+  llvm::SmallVector<uint32_t, 4> resultElems;
+  for (uint32_t col = 0; col < numCols; ++col) {
+    const auto colId =
+        theBuilder.createCompositeExtract(vecTypeId, matTransposeId, {col});
+    resultElems.push_back(processNonFpDot(vecId, colId, vecSize, vecElemType));
+  }
+  return theBuilder.createCompositeConstruct(
+      typeTranslator.translateType(
+          astContext.getExtVectorType(vecElemType, numCols)),
+      resultElems);
+}
+
+uint32_t SPIRVEmitter::processNonFpMatrixTimesVector(QualType matType,
+                                                     uint32_t matId,
+                                                     QualType vecType,
+                                                     uint32_t vecId) {
+  // This function assumes that the vector element type and matrix elemet type
+  // are the same.
+  QualType vecElemType = {}, matElemType = {};
+  uint32_t vecSize = 0, numRows = 0, numCols = 0;
+  const bool isVec =
+      TypeTranslator::isVectorType(vecType, &vecElemType, &vecSize);
+  const bool isMat =
+      TypeTranslator::isMxNMatrix(matType, &matElemType, &numRows, &numCols);
+  assert(typeTranslator.isSameType(vecElemType, matElemType));
+  assert(isVec);
+  assert(isMat);
+  assert(vecSize == numCols);
+
+  // When processing matrix times vector, the vector is a column vector. So we
+  // simply get each row of the matrix and perform a dot product with the
+  // vector.
+  const auto vecTypeId = typeTranslator.translateType(vecType);
+  llvm::SmallVector<uint32_t, 4> resultElems;
+  for (uint32_t row = 0; row < numRows; ++row) {
+    const auto rowId =
+        theBuilder.createCompositeExtract(vecTypeId, matId, {row});
+    resultElems.push_back(processNonFpDot(rowId, vecId, vecSize, vecElemType));
+  }
+  return theBuilder.createCompositeConstruct(
+      typeTranslator.translateType(
+          astContext.getExtVectorType(vecElemType, numRows)),
+      resultElems);
+}
+
+uint32_t SPIRVEmitter::processNonFpMatrixTimesMatrix(QualType lhsType,
+                                                     uint32_t lhsId,
+                                                     QualType rhsType,
+                                                     uint32_t rhsId) {
+  // This function assumes that the vector element type and matrix elemet type
+  // are the same.
+  QualType lhsElemType = {}, rhsElemType = {};
+  uint32_t lhsNumRows = 0, lhsNumCols = 0;
+  uint32_t rhsNumRows = 0, rhsNumCols = 0;
+  const bool lhsIsMat = TypeTranslator::isMxNMatrix(lhsType, &lhsElemType,
+                                                    &lhsNumRows, &lhsNumCols);
+  const bool rhsIsMat = TypeTranslator::isMxNMatrix(rhsType, &rhsElemType,
+                                                    &rhsNumRows, &rhsNumCols);
+  assert(typeTranslator.isSameType(lhsElemType, rhsElemType));
+  assert(lhsIsMat && rhsIsMat);
+  assert(lhsNumCols == rhsNumRows);
+
+  const uint32_t rhsTranspose = processNonFpMatrixTranspose(rhsType, rhsId);
+
+  const auto vecType = astContext.getExtVectorType(lhsElemType, lhsNumCols);
+  const auto vecTypeId = typeTranslator.translateType(vecType);
+  llvm::SmallVector<uint32_t, 4> resultRows;
+  for (uint32_t row = 0; row < lhsNumRows; ++row) {
+    const auto rowId =
+        theBuilder.createCompositeExtract(vecTypeId, lhsId, {row});
+    resultRows.push_back(processNonFpVectorTimesMatrix(vecType, rowId, rhsType,
+                                                       rhsId, rhsTranspose));
+  }
+
+  // The resulting matrix will have 'lhsNumRows' rows and 'rhsNumCols' columns.
+  const auto elemTypeId = typeTranslator.translateType(lhsElemType);
+  const auto resultNumRows = theBuilder.getConstantUint32(lhsNumRows);
+  const auto resultColType = theBuilder.getVecType(elemTypeId, rhsNumCols);
+  const auto resultType = theBuilder.getArrayType(resultColType, resultNumRows);
+  return theBuilder.createCompositeConstruct(resultType, resultRows);
+}
+
 uint32_t SPIRVEmitter::processIntrinsicMul(const CallExpr *callExpr) {
  const QualType returnType = callExpr->getType();
  const uint32_t returnTypeId =
@ -6688,61 +6982,85 @@ uint32_t SPIRVEmitter::processIntrinsicMul(const CallExpr *callExpr) {
                                     returnTypeId, arg0Id, arg1Id);

  // mul(scalar, matrix)
-  if (TypeTranslator::isScalarType(arg0Type) &&
-      TypeTranslator::isMxNMatrix(arg1Type)) {
-    // We currently only support float matrices. So we can use
-    // OpMatrixTimesScalar
-    if (arg0Type->isFloatingType())
-      return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesScalar,
-                                       returnTypeId, arg1Id, arg0Id);
+  {
+    QualType elemType = {};
+    if (TypeTranslator::isScalarType(arg0Type) &&
+        TypeTranslator::isMxNMatrix(arg1Type, &elemType)) {
+      // OpMatrixTimesScalar can only be used if *both* the matrix element type
+      // and the scalar type are float.
+      if (arg0Type->isFloatingType() && elemType->isFloatingType())
+        return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesScalar,
+                                         returnTypeId, arg1Id, arg0Id);
+      else
+        return processNonFpScalarTimesMatrix(arg0Type, arg0Id, arg1Type,
+                                             arg1Id);
+    }
  }

  // mul(matrix, scalar)
-  if (TypeTranslator::isScalarType(arg1Type) &&
-      TypeTranslator::isMxNMatrix(arg0Type)) {
-    // We currently only support float matrices. So we can use
-    // OpMatrixTimesScalar
-    if (arg1Type->isFloatingType())
-      return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesScalar,
-                                       returnTypeId, arg0Id, arg1Id);
+  {
+    QualType elemType = {};
+    if (TypeTranslator::isScalarType(arg1Type) &&
+        TypeTranslator::isMxNMatrix(arg0Type, &elemType)) {
+      // OpMatrixTimesScalar can only be used if *both* the matrix element type
+      // and the scalar type are float.
+      if (arg1Type->isFloatingType() && elemType->isFloatingType())
+        return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesScalar,
+                                         returnTypeId, arg0Id, arg1Id);
+      else
+        return processNonFpScalarTimesMatrix(arg1Type, arg1Id, arg0Type,
+                                             arg0Id);
+    }
  }

  // mul(vector, matrix)
  {
-    QualType elemType = {};
+    QualType vecElemType = {}, matElemType = {};
    uint32_t elemCount = 0, numRows = 0;
-    if (TypeTranslator::isVectorType(arg0Type, &elemType, &elemCount) &&
-        TypeTranslator::isMxNMatrix(arg1Type, nullptr, &numRows, nullptr) &&
-        elemType->isFloatingType()) {
+    if (TypeTranslator::isVectorType(arg0Type, &vecElemType, &elemCount) &&
+        TypeTranslator::isMxNMatrix(arg1Type, &matElemType, &numRows)) {
      assert(elemCount == numRows);
-      return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesVector,
-                                       returnTypeId, arg1Id, arg0Id);
+
+      if (vecElemType->isFloatingType() && matElemType->isFloatingType())
+        return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesVector,
+                                         returnTypeId, arg1Id, arg0Id);
+      else
+        return processNonFpVectorTimesMatrix(arg0Type, arg0Id, arg1Type,
+                                             arg1Id);
    }
  }

  // mul(matrix, vector)
  {
-    QualType elemType = {};
+    QualType vecElemType = {}, matElemType = {};
    uint32_t elemCount = 0, numCols = 0;
-    if (TypeTranslator::isMxNMatrix(arg0Type, nullptr, nullptr, &numCols) &&
-        TypeTranslator::isVectorType(arg1Type, &elemType, &elemCount) &&
-        elemType->isFloatingType()) {
+    if (TypeTranslator::isMxNMatrix(arg0Type, &matElemType, nullptr,
+                                    &numCols) &&
+        TypeTranslator::isVectorType(arg1Type, &vecElemType, &elemCount)) {
      assert(elemCount == numCols);
-      return theBuilder.createBinaryOp(spv::Op::OpVectorTimesMatrix,
-                                       returnTypeId, arg1Id, arg0Id);
+      if (vecElemType->isFloatingType() && matElemType->isFloatingType())
+        return theBuilder.createBinaryOp(spv::Op::OpVectorTimesMatrix,
+                                         returnTypeId, arg1Id, arg0Id);
+      else
+        return processNonFpMatrixTimesVector(arg0Type, arg0Id, arg1Type,
+                                             arg1Id);
    }
  }

  // mul(matrix, matrix)
  {
+    // The front-end ensures that the two matrix element types match.
    QualType elemType = {};
-    uint32_t arg0Cols = 0, arg1Rows = 0;
-    if (TypeTranslator::isMxNMatrix(arg0Type, &elemType, nullptr, &arg0Cols) &&
-        TypeTranslator::isMxNMatrix(arg1Type, nullptr, &arg1Rows, nullptr) &&
-        elemType->isFloatingType()) {
-      assert(arg0Cols == arg1Rows);
-      return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesMatrix,
-                                       returnTypeId, arg1Id, arg0Id);
+    uint32_t lhsCols = 0, rhsRows = 0;
+    if (TypeTranslator::isMxNMatrix(arg0Type, &elemType, nullptr, &lhsCols) &&
+        TypeTranslator::isMxNMatrix(arg1Type, nullptr, &rhsRows, nullptr)) {
+      assert(lhsCols == rhsRows);
+      if (elemType->isFloatingType())
+        return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesMatrix,
+                                         returnTypeId, arg1Id, arg0Id);
+      else
+        return processNonFpMatrixTimesMatrix(arg0Type, arg0Id, arg1Type,
+                                             arg1Id);
    }
  }

@ -6889,13 +7207,6 @@ uint32_t SPIRVEmitter::processIntrinsicAllOrAny(const CallExpr *callExpr,
    uint32_t matRowCount = 0, matColCount = 0;
    if (TypeTranslator::isMxNMatrix(argType, &elemType, &matRowCount,
                                    &matColCount)) {
-      if (!elemType->isFloatingType()) {
-        emitError("non-floating-point matrix arguments in all/any intrinsic "
-                  "function unimplemented",
-                  callExpr->getExprLoc());
-        return 0;
-      }
-
      uint32_t matrixId = doExpr(arg);
      const uint32_t vecType = typeTranslator.getComponentVectorType(argType);
      llvm::SmallVector<uint32_t, 4> rowResults;
@ -6967,24 +7278,36 @@ uint32_t SPIRVEmitter::processIntrinsicAsType(const CallExpr *callExpr) {
  const QualType argType = arg0->getType();

  // Method 3 return type may be the same as arg type, so it would be a no-op.
-  if (returnType.getCanonicalType() == argType.getCanonicalType())
+  if (typeTranslator.isSameType(returnType, argType))
    return doExpr(arg0);

-  // SPIR-V does not support non-floating point matrices. For the above methods
-  // that involve matrices, either the input or the output is a non-float
-  // matrix. (except for 'asfloat' taking a float matrix and returning a float
-  // matrix, which is a no-op and is handled by the condition above).
-  if (TypeTranslator::isMxNMatrix(argType)) {
-    emitError("non-floating-point matrix type unimplemented",
-              callExpr->getExprLoc());
-    return 0;
-  }
-
  switch (numArgs) {
  case 1: {
    // Handling Method 1, 2, and 3.
-    return theBuilder.createUnaryOp(spv::Op::OpBitcast, returnTypeId,
-                                    doExpr(arg0));
+    const auto argId = doExpr(arg0);
+    QualType fromElemType = {};
+    uint32_t numRows = 0, numCols = 0;
+    // For non-matrix arguments (scalar or vector), just do an OpBitCast.
+    if (!TypeTranslator::isMxNMatrix(argType, &fromElemType, &numRows,
+                                     &numCols)) {
+      return theBuilder.createUnaryOp(spv::Op::OpBitcast, returnTypeId, argId);
+    }
+
+    // Input or output type is a matrix.
+    const QualType toElemType = hlsl::GetHLSLMatElementType(returnType);
+    llvm::SmallVector<uint32_t, 4> castedRows;
+    const auto fromVecQualType =
+        astContext.getExtVectorType(fromElemType, numCols);
+    const auto toVecQualType = astContext.getExtVectorType(toElemType, numCols);
+    const auto fromVecTypeId = typeTranslator.translateType(fromVecQualType);
+    const auto toVecTypeId = typeTranslator.translateType(toVecQualType);
+    for (uint32_t row = 0; row < numRows; ++row) {
+      const auto rowId =
+          theBuilder.createCompositeExtract(fromVecTypeId, argId, {row});
+      castedRows.push_back(
+          theBuilder.createUnaryOp(spv::Op::OpBitcast, toVecTypeId, rowId));
+    }
+    return theBuilder.createCompositeConstruct(returnTypeId, castedRows);
  }
  case 2: {
    const uint32_t lowbits = doExpr(arg0);
@ -7142,7 +7465,7 @@ uint32_t SPIRVEmitter::processIntrinsicFloatSign(const CallExpr *callExpr) {
  uint32_t floatSignResultId = 0;

  // For matrices, we can perform the instruction on each vector of the matrix.
-  if (TypeTranslator::isSpirvAcceptableMatrixType(argType)) {
+  if (TypeTranslator::isMxNMatrix(argType)) {
    const auto actOnEachVec = [this, glslInstSetId](uint32_t /*index*/,
                                                    uint32_t vecType,
                                                    uint32_t curRowId) {
@ -7257,8 +7580,7 @@ uint32_t SPIRVEmitter::processIntrinsicUsingSpirvInst(

    // If the instruction does not operate on matrices, we can perform the
    // instruction on each vector of the matrix.
-    if (actPerRowForMatrices &&
-        TypeTranslator::isSpirvAcceptableMatrixType(arg->getType())) {
+    if (actPerRowForMatrices && TypeTranslator::isMxNMatrix(arg->getType())) {
      const auto actOnEachVec = [this, opcode](uint32_t /*index*/,
                                               uint32_t vecType,
                                               uint32_t curRowId) {
@ -7273,8 +7595,7 @@ uint32_t SPIRVEmitter::processIntrinsicUsingSpirvInst(
    const uint32_t arg1Id = doExpr(callExpr->getArg(1));
    // If the instruction does not operate on matrices, we can perform the
    // instruction on each vector of the matrix.
-    if (actPerRowForMatrices &&
-        TypeTranslator::isSpirvAcceptableMatrixType(arg0->getType())) {
+    if (actPerRowForMatrices && TypeTranslator::isMxNMatrix(arg0->getType())) {
      const auto actOnEachVec = [this, opcode, arg1Id](uint32_t index,
                                                       uint32_t vecType,
                                                       uint32_t arg0RowId) {
@ -7303,8 +7624,7 @@ uint32_t SPIRVEmitter::processIntrinsicUsingGLSLInst(

    // If the instruction does not operate on matrices, we can perform the
    // instruction on each vector of the matrix.
-    if (actPerRowForMatrices &&
-        TypeTranslator::isSpirvAcceptableMatrixType(arg->getType())) {
+    if (actPerRowForMatrices && TypeTranslator::isMxNMatrix(arg->getType())) {
      const auto actOnEachVec = [this, glslInstSetId,
                                 opcode](uint32_t /*index*/, uint32_t vecType,
                                         uint32_t curRowId) {
@ -7320,8 +7640,7 @@ uint32_t SPIRVEmitter::processIntrinsicUsingGLSLInst(
    const uint32_t arg1Id = doExpr(callExpr->getArg(1));
    // If the instruction does not operate on matrices, we can perform the
    // instruction on each vector of the matrix.
-    if (actPerRowForMatrices &&
-        TypeTranslator::isSpirvAcceptableMatrixType(arg0->getType())) {
+    if (actPerRowForMatrices && TypeTranslator::isMxNMatrix(arg0->getType())) {
      const auto actOnEachVec = [this, glslInstSetId, opcode,
                                 arg1Id](uint32_t index, uint32_t vecType,
                                         uint32_t arg0RowId) {
@ -7341,8 +7660,7 @@ uint32_t SPIRVEmitter::processIntrinsicUsingGLSLInst(
    const uint32_t arg2Id = doExpr(callExpr->getArg(2));
    // If the instruction does not operate on matrices, we can perform the
    // instruction on each vector of the matrix.
-    if (actPerRowForMatrices &&
-        TypeTranslator::isSpirvAcceptableMatrixType(arg0->getType())) {
+    if (actPerRowForMatrices && TypeTranslator::isMxNMatrix(arg0->getType())) {
      const auto actOnEachVec = [this, glslInstSetId, opcode, arg0Id, arg1Id,
                                 arg2Id](uint32_t index, uint32_t vecType,
                                         uint32_t arg0RowId) {
@ -7407,7 +7725,16 @@ uint32_t SPIRVEmitter::getValueZero(QualType type) {
    }
  }

-  // TODO: Handle getValueZero for MxN matrices.
+  {
+    QualType elemType = {};
+    uint32_t rowCount = 0, colCount = 0;
+    if (TypeTranslator::isMxNMatrix(type, &elemType, &rowCount, &colCount)) {
+      const auto row = getVecValueZero(elemType, colCount);
+      llvm::SmallVector<uint32_t, 4> rows((size_t)rowCount, row);
+      return theBuilder.createCompositeConstruct(
+          typeTranslator.translateType(type), rows);
+    }
+  }

  emitError("getting value 0 for type %0 unimplemented", {})
      << type.getAsString();
--- a/tools/clang/lib/SPIRV/SPIRVEmitter.h
+++ b/tools/clang/lib/SPIRV/SPIRVEmitter.h
@ -339,6 +339,43 @@ private:
  /// Processes the 'mul' intrinsic function.
  uint32_t processIntrinsicMul(const CallExpr *);

+  /// Transposes a non-floating point matrix and returns the result-id of the
+  /// transpose.
+  uint32_t processNonFpMatrixTranspose(QualType matType, uint32_t matId);
+
+  /// Processes the dot product of two non-floating point vectors. The SPIR-V
+  /// OpDot only accepts float vectors. Assumes that the two vectors are of the
+  /// same size and have the same element type (elemType).
+  uint32_t processNonFpDot(uint32_t vec1Id, uint32_t vec2Id, uint32_t vecSize,
+                           QualType elemType);
+
+  /// Processes the multiplication of a *non-floating point* matrix by a scalar.
+  /// Assumes that the matrix element type and the scalar type are the same.
+  uint32_t processNonFpScalarTimesMatrix(QualType scalarType, uint32_t scalarId,
+                                         QualType matType, uint32_t matId);
+
+  /// Processes the multiplication of a *non-floating point* matrix by a vector.
+  /// Assumes the matrix element type and the vector element type are the same.
+  /// Notice that the vector in this case is a "row vector" and will be
+  /// multiplied by the matrix columns (dot product). As a result, the given
+  /// matrix must be transposed in order to easily get each column. If
+  /// 'matTransposeId' is non-zero, it will be used as the transpose matrix
+  /// result-id; otherwise the function will perform the transpose itself.
+  uint32_t processNonFpVectorTimesMatrix(QualType vecType, uint32_t vecId,
+                                         QualType matType, uint32_t matId,
+                                         uint32_t matTransposeId = 0);
+
+  /// Processes the multiplication of a vector by a *non-floating point* matrix.
+  /// Assumes the matrix element type and the vector element type are the same.
+  uint32_t processNonFpMatrixTimesVector(QualType matType, uint32_t matId,
+                                         QualType vecType, uint32_t vecId);
+
+  /// Processes a non-floating point matrix multiplication. Assumes that the
+  /// number of columns in lhs matrix is the same as number of rows in the rhs
+  /// matrix. Also assumes that the two matrices have the same element type.
+  uint32_t processNonFpMatrixTimesMatrix(QualType lhsType, uint32_t lhsId,
+                                         QualType rhsType, uint32_t rhsId);
+
  /// Processes the 'dot' intrinsic function.
  uint32_t processIntrinsicDot(const CallExpr *);

--- a/tools/clang/lib/SPIRV/TypeTranslator.cpp
+++ b/tools/clang/lib/SPIRV/TypeTranslator.cpp
@ -345,14 +345,12 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
    QualType elemType = {};
    uint32_t rowCount = 0, colCount = 0;
    if (isMxNMatrix(type, &elemType, &rowCount, &colCount)) {
-      // NOTE: According to Item "Data rules" of SPIR-V Spec 2.16.1 "Universal
-      // Validation Rules":
-      //   Matrix types can only be parameterized with floating-point types.
-      //
-      // So we need special handling of non-fp matrices, probably by emulating
-      // them using other types. But for now just disable them.
-      if (!elemType->isFloatingType()) {
-        emitError("Non-floating-point matrices not supported yet");
+
+      // We cannot handle external initialization of column-major matrices now.
+      if (!elemType->isFloatingType() && rule != LayoutRule::Void &&
+          !isRowMajor) {
+        emitError(
+            "externally initialized column-major matrices not supported yet");
        return 0;
      }

@ -360,7 +358,7 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
      // We are mapping what HLSL semantically mean a row into a column here.
      const uint32_t vecType =
          theBuilder.getVecType(translateType(elemType), colCount);
-      return theBuilder.getMatType(vecType, rowCount);
+      return theBuilder.getMatType(elemType, vecType, rowCount);
    }
  }

@ -763,11 +761,6 @@ bool TypeTranslator::isRowMajorMatrix(QualType type, const Decl *decl) const {
         !decl->hasAttr<HLSLColumnMajorAttr>() && spirvOptions.defaultRowMajor;
 }

-bool TypeTranslator::isSpirvAcceptableMatrixType(QualType type) {
-  QualType elemType = {};
-  return isMxNMatrix(type, &elemType) && elemType->isFloatingType();
-}
-
 bool TypeTranslator::canTreatAsSameScalarType(QualType type1, QualType type2) {
  // Treat const int/float the same as const int/float
  type1.removeLocalConst();
@ -851,7 +844,7 @@ QualType TypeTranslator::getElementType(QualType type) {
 }

 uint32_t TypeTranslator::getComponentVectorType(QualType matrixType) {
-  assert(isSpirvAcceptableMatrixType(matrixType));
+  assert(isMxNMatrix(matrixType));

  const uint32_t elemType =
      translateType(hlsl::GetHLSLMatElementType(matrixType));
--- a/tools/clang/lib/SPIRV/TypeTranslator.h
+++ b/tools/clang/lib/SPIRV/TypeTranslator.h
@ -168,11 +168,6 @@ public:
  /// If decl is not nullptr, is is checked for attributes specifying majorness
  bool isRowMajorMatrix(QualType type, const Decl *decl = nullptr) const;

-  /// \brief Returns true if the given type is a SPIR-V acceptable matrix type,
-  /// i.e., with floating point elements and greater than 1 row and column
-  /// counts.
-  static bool isSpirvAcceptableMatrixType(QualType type);
-
  /// \brief Returns true if the two types are the same scalar or vector type,
  /// regardless of constness and literalness.
  static bool isSameScalarOrVecType(QualType type1, QualType type2);
--- a/tools/clang/test/CodeGenSPIRV/binary-op.arith-assign.matrix.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/binary-op.arith-assign.matrix.hlsl
@ -52,4 +52,21 @@ void main() {
 // CHECK-NEXT: [[j1:%\d+]] = OpCompositeConstruct %mat3v2float [[j1v0]] [[j1v1]] [[j1v2]]
 // CHECK-NEXT: OpStore %j [[j1]]
    j %= i;
+
+// Non-floating point matrices
+
+    int2x3 k, l;
+// CHECK-NEXT: [[k0:%\d+]] = OpLoad %_arr_v3int_uint_2 %k
+// CHECK-NEXT: [[l0:%\d+]] = OpLoad %_arr_v3int_uint_2 %l
+// CHECK-NEXT: [[l0v0:%\d+]] = OpCompositeExtract %v3int [[l0]] 0
+// CHECK-NEXT: [[k0v0:%\d+]] = OpCompositeExtract %v3int [[k0]] 0
+// CHECK-NEXT: [[l1v0:%\d+]] = OpIAdd %v3int [[l0v0]] [[k0v0]]
+// CHECK-NEXT: [[l0v1:%\d+]] = OpCompositeExtract %v3int [[l0]] 1
+// CHECK-NEXT: [[k0v1:%\d+]] = OpCompositeExtract %v3int [[k0]] 1
+// CHECK-NEXT: [[l1v1:%\d+]] = OpIAdd %v3int [[l0v1]] [[k0v1]]
+// CHECK-NEXT: [[l1:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[l1v0]] [[l1v1]]
+// CHECK-NEXT: OpStore %l [[l1]]
+    l += k;
+
+// Note: The front-end disallows using these operators on boolean matrices.
 }
--- a/tools/clang/test/CodeGenSPIRV/binary-op.arith-assign.mixed.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/binary-op.arith-assign.mixed.hlsl
@ -75,4 +75,25 @@ void main() {
 // CHECK-NEXT: [[mul14:%\d+]] = OpFMul %float [[o0]] [[s10]]
 // CHECK-NEXT: OpStore %o [[mul14]]
    o *= s;
+
+// Non-floating point matrices
+
+    int2x3 p;
+
+// Note: The AST includes a MatrixSplat, therefore we splat the scalar to a matrix. So we cannot use OpVectorTimesScalar.
+// CHECK-NEXT:      [[t:%\d+]] = OpLoad %int %t
+// CHECK-NEXT:   [[tvec:%\d+]] = OpCompositeConstruct %v3int [[t]] [[t]] [[t]]
+// CHECK-NEXT:   [[tmat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[tvec]] [[tvec]]
+// CHECK-NEXT:      [[p:%\d+]] = OpLoad %_arr_v3int_uint_2 %p
+// CHECK-NEXT:     [[p0:%\d+]] = OpCompositeExtract %v3int [[p]] 0
+// CHECK-NEXT:  [[tmat0:%\d+]] = OpCompositeExtract %v3int [[tmat]] 0
+// CHECK-NEXT: [[new_p0:%\d+]] = OpIMul %v3int [[p0]] [[tmat0]]
+// CHECK-NEXT:     [[p1:%\d+]] = OpCompositeExtract %v3int [[p]] 1
+// CHECK-NEXT:  [[tmat1:%\d+]] = OpCompositeExtract %v3int [[tmat]] 1
+// CHECK-NEXT: [[new_p1:%\d+]] = OpIMul %v3int [[p1]] [[tmat1]]
+// CHECK-NEXT:  [[new_p:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[new_p0]] [[new_p1]]
+// CHECK-NEXT:                   OpStore %p [[new_p]]
+    p *= t;
+
+// Note: Boolean matrix not allowed by the front-end for these operations.
 }
--- a/tools/clang/test/CodeGenSPIRV/binary-op.arithmetic.matrix.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/binary-op.arithmetic.matrix.hlsl
@ -1,5 +1,8 @@
 // Run: %dxc -T vs_6_0 -E main

+// CHECK: [[v3int1:%\d+]] = OpConstantComposite %v3int %int_1 %int_1 %int_1
+// CHECK: [[v3int0:%\d+]] = OpConstantComposite %v3int %int_0 %int_0 %int_0
+
 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel

@ -144,4 +147,95 @@ void main() {
 // CHECK-NEXT: [[t4:%\d+]] = OpCompositeConstruct %mat2v3float [[t4v0]] [[t4v1]]
 // CHECK-NEXT: OpStore %t [[t4]]
    t = r % s;
+
+    // MxN non-floating point matrices
+    int2x3 u, v, w;
+// CHECK-NEXT: [[u0:%\d+]] = OpLoad %_arr_v3int_uint_2 %u
+// CHECK-NEXT: [[v0:%\d+]] = OpLoad %_arr_v3int_uint_2 %v
+// CHECK-NEXT: [[u0v0:%\d+]] = OpCompositeExtract %v3int [[u0]] 0
+// CHECK-NEXT: [[v0v0:%\d+]] = OpCompositeExtract %v3int [[v0]] 0
+// CHECK-NEXT: [[w0v0:%\d+]] = OpIAdd %v3int [[u0v0]] [[v0v0]]
+// CHECK-NEXT: [[u0v1:%\d+]] = OpCompositeExtract %v3int [[u0]] 1
+// CHECK-NEXT: [[v0v1:%\d+]] = OpCompositeExtract %v3int [[v0]] 1
+// CHECK-NEXT: [[w0v1:%\d+]] = OpIAdd %v3int [[u0v1]] [[v0v1]]
+// CHECK-NEXT: [[w0:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[w0v0]] [[w0v1]]
+// CHECK-NEXT: OpStore %w [[w0]]
+    w = u + v;
+// CHECK-NEXT: [[u1:%\d+]] = OpLoad %_arr_v3int_uint_2 %u
+// CHECK-NEXT: [[v1:%\d+]] = OpLoad %_arr_v3int_uint_2 %v
+// CHECK-NEXT: [[u1v0:%\d+]] = OpCompositeExtract %v3int [[u1]] 0
+// CHECK-NEXT: [[v1v0:%\d+]] = OpCompositeExtract %v3int [[v1]] 0
+// CHECK-NEXT: [[w1v0:%\d+]] = OpISub %v3int [[u1v0]] [[v1v0]]
+// CHECK-NEXT: [[u1v1:%\d+]] = OpCompositeExtract %v3int [[u1]] 1
+// CHECK-NEXT: [[v1v1:%\d+]] = OpCompositeExtract %v3int [[v1]] 1
+// CHECK-NEXT: [[w1v1:%\d+]] = OpISub %v3int [[u1v1]] [[v1v1]]
+// CHECK-NEXT: [[w1:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[w1v0]] [[w1v1]]
+// CHECK-NEXT: OpStore %w [[w1]]
+    w = u - v;
+// CHECK-NEXT: [[u2:%\d+]] = OpLoad %_arr_v3int_uint_2 %u
+// CHECK-NEXT: [[v2:%\d+]] = OpLoad %_arr_v3int_uint_2 %v
+// CHECK-NEXT: [[u2v0:%\d+]] = OpCompositeExtract %v3int [[u2]] 0
+// CHECK-NEXT: [[v2v0:%\d+]] = OpCompositeExtract %v3int [[v2]] 0
+// CHECK-NEXT: [[w2v0:%\d+]] = OpIMul %v3int [[u2v0]] [[v2v0]]
+// CHECK-NEXT: [[u2v1:%\d+]] = OpCompositeExtract %v3int [[u2]] 1
+// CHECK-NEXT: [[v2v1:%\d+]] = OpCompositeExtract %v3int [[v2]] 1
+// CHECK-NEXT: [[w2v1:%\d+]] = OpIMul %v3int [[u2v1]] [[v2v1]]
+// CHECK-NEXT: [[w2:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[w2v0]] [[w2v1]]
+// CHECK-NEXT: OpStore %w [[w2]]
+    w = u * v;
+// CHECK-NEXT: [[u3:%\d+]] = OpLoad %_arr_v3int_uint_2 %u
+// CHECK-NEXT: [[v3:%\d+]] = OpLoad %_arr_v3int_uint_2 %v
+// CHECK-NEXT: [[u3v0:%\d+]] = OpCompositeExtract %v3int [[u3]] 0
+// CHECK-NEXT: [[v3v0:%\d+]] = OpCompositeExtract %v3int [[v3]] 0
+// CHECK-NEXT: [[w3v0:%\d+]] = OpSDiv %v3int [[u3v0]] [[v3v0]]
+// CHECK-NEXT: [[u3v1:%\d+]] = OpCompositeExtract %v3int [[u3]] 1
+// CHECK-NEXT: [[v3v1:%\d+]] = OpCompositeExtract %v3int [[v3]] 1
+// CHECK-NEXT: [[w3v1:%\d+]] = OpSDiv %v3int [[u3v1]] [[v3v1]]
+// CHECK-NEXT: [[w3:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[w3v0]] [[w3v1]]
+// CHECK-NEXT: OpStore %w [[w3]]
+    w = u / v;
+// CHECK-NEXT: [[u4:%\d+]] = OpLoad %_arr_v3int_uint_2 %u
+// CHECK-NEXT: [[v4:%\d+]] = OpLoad %_arr_v3int_uint_2 %v
+// CHECK-NEXT: [[u4v0:%\d+]] = OpCompositeExtract %v3int [[u4]] 0
+// CHECK-NEXT: [[v4v0:%\d+]] = OpCompositeExtract %v3int [[v4]] 0
+// CHECK-NEXT: [[w4v0:%\d+]] = OpSRem %v3int [[u4v0]] [[v4v0]]
+// CHECK-NEXT: [[u4v1:%\d+]] = OpCompositeExtract %v3int [[u4]] 1
+// CHECK-NEXT: [[v4v1:%\d+]] = OpCompositeExtract %v3int [[v4]] 1
+// CHECK-NEXT: [[w4v1:%\d+]] = OpSRem %v3int [[u4v1]] [[v4v1]]
+// CHECK-NEXT: [[w4:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[w4v0]] [[w4v1]]
+// CHECK-NEXT: OpStore %w [[w4]]
+    w = u % v;
+
+    // Boolean matrices
+    // In all cases, the boolean matrix (represented as an array of boolean vectores)
+    // is first casted to an integer matrix (represented as an array of integer vectors).
+    // Then, the binary operation (e.g. '+', '-', '*', '/', '%') is performed and then
+    // it is converted back to a boolean matrix. This behavior is due to the AST.
+    bool2x3 x, y, z;
+// CHECK-NEXT:      [[x0:%\d+]] = OpLoad %_arr_v3bool_uint_2 %x
+// CHECK-NEXT:    [[x0v0:%\d+]] = OpCompositeExtract %v3bool [[x0]] 0
+// CHECK-NEXT: [[x0v0int:%\d+]] = OpSelect %v3int [[x0v0]] [[v3int1]] [[v3int0]]
+// CHECK-NEXT:    [[x0v1:%\d+]] = OpCompositeExtract %v3bool [[x0]] 1
+// CHECK-NEXT: [[x0v1int:%\d+]] = OpSelect %v3int [[x0v1]] [[v3int1]] [[v3int0]]
+// CHECK-NEXT:   [[x0int:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[x0v0int]] [[x0v1int]]
+// CHECK-NEXT:      [[y0:%\d+]] = OpLoad %_arr_v3bool_uint_2 %y
+// CHECK-NEXT:    [[y0v0:%\d+]] = OpCompositeExtract %v3bool [[y0]] 0
+// CHECK-NEXT: [[y0v0int:%\d+]] = OpSelect %v3int [[y0v0]] [[v3int1]] [[v3int0]]
+// CHECK-NEXT:    [[y0v1:%\d+]] = OpCompositeExtract %v3bool [[y0]] 1
+// CHECK-NEXT: [[y0v1int:%\d+]] = OpSelect %v3int [[y0v1]] [[v3int1]] [[v3int0]]
+// CHECK-NEXT:   [[y0int:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[y0v0int]] [[y0v1int]]
+// CHECK-NEXT:    [[x0v0:%\d+]] = OpCompositeExtract %v3int [[x0int]] 0
+// CHECK-NEXT:    [[y0v0:%\d+]] = OpCompositeExtract %v3int [[y0int]] 0
+// CHECK-NEXT:    [[z0v0:%\d+]] = OpIAdd %v3int [[x0v0]] [[y0v0]]
+// CHECK-NEXT:    [[x0v1:%\d+]] = OpCompositeExtract %v3int [[x0int]] 1
+// CHECK-NEXT:    [[y0v1:%\d+]] = OpCompositeExtract %v3int [[y0int]] 1
+// CHECK-NEXT:    [[z0v1:%\d+]] = OpIAdd %v3int [[x0v1]] [[y0v1]]
+// CHECK-NEXT:   [[z_int:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[z0v0]] [[z0v1]]
+// CHECK-NEXT:    [[z0v0:%\d+]] = OpCompositeExtract %v3int [[z_int]] 0
+// CHECK-NEXT:[[z0v0bool:%\d+]] = OpINotEqual %v3bool [[z0v0]] [[v3int0]]
+// CHECK-NEXT:    [[z0v1:%\d+]] = OpCompositeExtract %v3int [[z_int]] 1
+// CHECK-NEXT:[[z0v1bool:%\d+]] = OpINotEqual %v3bool [[z0v1]] [[v3int0]]
+// CHECK-NEXT:       [[z:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[z0v0bool]] [[z0v1bool]]
+// CHECK-NEXT:                    OpStore %z [[z]]
+    z = x + y;
 }
--- a/tools/clang/test/CodeGenSPIRV/binary-op.arithmetic.mixed.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/binary-op.arithmetic.mixed.hlsl
@ -112,4 +112,54 @@ void main() {
 // CHECK-NEXT: [[mul15:%\d+]] = OpFMul %float [[s11]] [[o1]]
 // CHECK-NEXT: OpStore %p [[mul15]]
    p = s * o;
+
+// Non-floating point matrices:
+// Since non-fp matrices are represented as arrays of vectors, we cannot use
+// OpMatrixTimes* instructions.
+
+    int2x3 q;
+
+// Note: The AST includes a MatrixSplat, therefore we splat the scalar to a matrix. So we cannot use OpVectorTimesScalar.
+// CHECK:          [[t:%\d+]] = OpLoad %int %t
+// CHECK-NEXT:  [[tvec:%\d+]] = OpCompositeConstruct %v3int [[t]] [[t]] [[t]]
+// CHECK-NEXT:  [[tmat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[tvec]] [[tvec]]
+// CHECK-NEXT:     [[q:%\d+]] = OpLoad %_arr_v3int_uint_2 %q
+// CHECK-NEXT: [[tmat0:%\d+]] = OpCompositeExtract %v3int [[tmat]] 0
+// CHECK-NEXT:    [[q0:%\d+]] = OpCompositeExtract %v3int [[q]] 0
+// CHECK-NEXT:   [[qt0:%\d+]] = OpIMul %v3int [[tmat0]] [[q0]]
+// CHECK-NEXT: [[tmat1:%\d+]] = OpCompositeExtract %v3int [[tmat]] 1
+// CHECK-NEXT:    [[q1:%\d+]] = OpCompositeExtract %v3int [[q]] 1
+// CHECK-NEXT:   [[qt1:%\d+]] = OpIMul %v3int [[tmat1]] [[q1]]
+// CHECK-NEXT:    [[qt:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[qt0]] [[qt1]]
+// CHECK-NEXT:                  OpStore %qt [[qt]]
+    int2x3 qt = t * q;
+
+    bool2x3 x;
+
+// Note: The AST includes a MatrixSplat, therefore we splat the scalar to a matrix. So we cannot use OpVectorTimesScalar.
+// CHECK:                [[z:%\d+]] = OpLoad %bool %z
+// CHECK-NEXT:        [[zint:%\d+]] = OpSelect %int [[z]] %int_1 %int_0
+// CHECK-NEXT:        [[zvec:%\d+]] = OpCompositeConstruct %v3int [[zint]] [[zint]] [[zint]]
+// CHECK-NEXT:   [[z_int_mat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[zvec]] [[zvec]]
+// CHECK-NEXT:           [[x:%\d+]] = OpLoad %_arr_v3bool_uint_2 %x
+// CHECK-NEXT:          [[x0:%\d+]] = OpCompositeExtract %v3bool [[x]] 0
+// CHECK-NEXT:       [[x0int:%\d+]] = OpSelect %v3int [[x0]] {{%\d+}} {{%\d+}}
+// CHECK-NEXT:          [[x1:%\d+]] = OpCompositeExtract %v3bool [[x]] 1
+// CHECK-NEXT:       [[x1int:%\d+]] = OpSelect %v3int [[x1]] {{%\d+}} {{%\d+}}
+// CHECK-NEXT:   [[x_int_mat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[x0int]] [[x1int]]
+// CHECK-NEXT:          [[z0:%\d+]] = OpCompositeExtract %v3int [[z_int_mat]] 0
+// CHECK-NEXT:          [[x0:%\d+]] = OpCompositeExtract %v3int [[x_int_mat]] 0
+// CHECK-NEXT:         [[zx0:%\d+]] = OpIMul %v3int [[z0]] [[x0]]
+// CHECK-NEXT:          [[z1:%\d+]] = OpCompositeExtract %v3int [[z_int_mat]] 1
+// CHECK-NEXT:          [[x1:%\d+]] = OpCompositeExtract %v3int [[x_int_mat]] 1
+// CHECK-NEXT:         [[zx1:%\d+]] = OpIMul %v3int [[z1]] [[x1]]
+// CHECK-NEXT:  [[zx_int_mat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[zx0]] [[zx1]]
+// CHECK-NEXT:         [[zx0:%\d+]] = OpCompositeExtract %v3int [[zx_int_mat]] 0
+// CHECK-NEXT:     [[zx0bool:%\d+]] = OpINotEqual %v3bool [[zx0]] {{%\d+}}
+// CHECK-NEXT:         [[zx1:%\d+]] = OpCompositeExtract %v3int [[zx_int_mat]] 1
+// CHECK-NEXT:     [[zx1bool:%\d+]] = OpINotEqual %v3bool [[zx1]] {{%\d+}}
+// CHECK-NEXT: [[zx_bool_mat:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[zx0bool]] [[zx1bool]]
+// CHECK-NEXT:                        OpStore %zx [[zx_bool_mat]]
+    bool z;
+    bool2x3 zx = z * x;
 }
--- a/tools/clang/test/CodeGenSPIRV/cast.2bool.implicit.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.2bool.implicit.hlsl
@ -4,6 +4,8 @@
 // CHECK: [[v3bool_0_1_1:%\d+]] = OpConstantComposite %v3bool %false %true %true
 // CHECK: [[v2uint_0_0:%\d+]] = OpConstantComposite %v2uint %uint_0 %uint_0
 // CHECK: [[v3float_0_0_0:%\d+]] = OpConstantComposite %v3float %float_0 %float_0 %float_0
+// CHECK: [[v3i0:%\d+]] = OpConstantComposite %v3int %int_0 %int_0 %int_0
+// CHECK: [[v3u0:%\d+]] = OpConstantComposite %v3uint %uint_0 %uint_0 %uint_0

 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel
@ -62,4 +64,33 @@ void main() {
 // CHECK-NEXT: [[vc3:%\d+]] = OpFOrdNotEqual %v3bool [[vfrom3]] [[v3float_0_0_0]]
 // CHECK-NEXT: OpStore %vb3 [[vc3]]
    vb3 = vfrom3;
+
+    float2x3 floatMat;
+    int2x3   intMat;
+    uint2x3  uintMat;
+    bool2x3 boolMat;
+
+// CHECK:       [[floatMat:%\d+]] = OpLoad %mat2v3float %floatMat
+// CHECK-NEXT: [[floatMat0:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 0
+// CHECK-NEXT:  [[boolMat0:%\d+]] = OpFOrdNotEqual %v3bool [[floatMat0]] [[v3float_0_0_0]]
+// CHECK-NEXT: [[floatMat1:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 1
+// CHECK-NEXT:  [[boolMat1:%\d+]] = OpFOrdNotEqual %v3bool [[floatMat1]] [[v3float_0_0_0]]
+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolMat0]] [[boolMat1]]
+    boolMat = floatMat;
+
+// CHECK:        [[intMat:%\d+]] = OpLoad %_arr_v3int_uint_2 %intMat
+// CHECK-NEXT:  [[intMat0:%\d+]] = OpCompositeExtract %v3int [[intMat]] 0
+// CHECK-NEXT: [[boolMat0:%\d+]] = OpINotEqual %v3bool [[intMat0]] [[v3i0]]
+// CHECK-NEXT:  [[intMat1:%\d+]] = OpCompositeExtract %v3int [[intMat]] 1
+// CHECK-NEXT: [[boolMat1:%\d+]] = OpINotEqual %v3bool [[intMat1]] [[v3i0]]
+// CHECK-NEXT:          {{%\d+}} = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolMat0]] [[boolMat1]]
+    boolMat = intMat;
+
+// CHECK:      [[uintMat:%\d+]] = OpLoad %_arr_v3uint_uint_2 %uintMat
+// CHECK-NEXT: [[uintMat0:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 0
+// CHECK-NEXT: [[boolMat0:%\d+]] = OpINotEqual %v3bool [[uintMat0]] [[v3u0]]
+// CHECK-NEXT: [[uintMat1:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 1
+// CHECK-NEXT: [[boolMat1:%\d+]] = OpINotEqual %v3bool [[uintMat1]] [[v3u0]]
+// CHECK-NEXT:  {{%\d+}} = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolMat0]] [[boolMat1]]
+    boolMat = uintMat;
 }
--- a/tools/clang/test/CodeGenSPIRV/cast.2fp.implicit.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.2fp.implicit.hlsl
@ -2,6 +2,8 @@

 // CHECK: [[v2float_1_0:%\d+]] = OpConstantComposite %v2float %float_1 %float_0
 // CHECK: [[v3float_0_4_n3:%\d+]] = OpConstantComposite %v3float %float_0 %float_4 %float_n3
+// CHECK: [[v3f1:%\d+]] = OpConstantComposite %v3float %float_1 %float_1 %float_1
+// CHECK: [[v3f0:%\d+]] = OpConstantComposite %v3float %float_0 %float_0 %float_0

 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel
@ -67,4 +69,31 @@ void main() {
 // CHECK-NEXT:              {{%\d+}} = OpConvertSToF %float [[zero_minus_a]]
    bool a = false;
    float c = 0-a;
+
+    int2x3   intMat;
+    float2x3 floatMat;
+    uint2x3  uintMat;
+    bool2x3  boolMat;
+
+// CHECK:        [[boolMat:%\d+]] = OpLoad %_arr_v3bool_uint_2 %boolMat
+// CHECK-NEXT:  [[boolMat0:%\d+]] = OpCompositeExtract %v3bool [[boolMat]] 0
+// CHECK-NEXT: [[floatMat0:%\d+]] = OpSelect %v3float [[boolMat0]] [[v3f1]] [[v3f0]]
+// CHECK-NEXT:  [[boolMat1:%\d+]] = OpCompositeExtract %v3bool [[boolMat]] 1
+// CHECK-NEXT: [[floatMat1:%\d+]] = OpSelect %v3float [[boolMat1]] [[v3f1]] [[v3f0]]
+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %mat2v3float [[floatMat0]] [[floatMat1]]
+    floatMat = boolMat;
+// CHECK:        [[uintMat:%\d+]] = OpLoad %_arr_v3uint_uint_2 %uintMat
+// CHECK-NEXT:  [[uintMat0:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 0
+// CHECK-NEXT: [[floatMat0:%\d+]] = OpConvertUToF %v3float [[uintMat0]]
+// CHECK-NEXT:  [[uintMat1:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 1
+// CHECK-NEXT: [[floatMat1:%\d+]] = OpConvertUToF %v3float [[uintMat1]]
+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %mat2v3float [[floatMat0]] [[floatMat1]]
+    floatMat = uintMat;
+// CHECK:         [[intMat:%\d+]] = OpLoad %_arr_v3int_uint_2 %intMat
+// CHECK-NEXT:   [[intMat0:%\d+]] = OpCompositeExtract %v3int [[intMat]] 0
+// CHECK-NEXT: [[floatMat0:%\d+]] = OpConvertSToF %v3float [[intMat0]]
+// CHECK-NEXT:   [[intMat1:%\d+]] = OpCompositeExtract %v3int [[intMat]] 1
+// CHECK-NEXT: [[floatMat1:%\d+]] = OpConvertSToF %v3float [[intMat1]]
+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %mat2v3float [[floatMat0]] [[floatMat1]]
+    floatMat = intMat;
 }
--- a/tools/clang/test/CodeGenSPIRV/cast.2sint.implicit.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.2sint.implicit.hlsl
@ -2,6 +2,8 @@

 // CHECK: [[v2int_1_0:%\d+]] = OpConstantComposite %v2int %int_1 %int_0
 // CHECK: [[v3int_0_2_n3:%\d+]] = OpConstantComposite %v3int %int_0 %int_2 %int_n3
+// CHECK: [[v3i1:%\d+]] = OpConstantComposite %v3int %int_1 %int_1 %int_1
+// CHECK: [[v3i0:%\d+]] = OpConstantComposite %v3int %int_0 %int_0 %int_0

 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel
@ -60,4 +62,31 @@ void main() {
 // CHECK-NEXT: [[vc3:%\d+]] = OpConvertFToS %v3int [[vfrom3]]
 // CHECK-NEXT: OpStore %vi3 [[vc3]]
    vi3 = vfrom3;
-}
+
+    int2x3   intMat;
+    float2x3 floatMat;
+    uint2x3  uintMat;
+    bool2x3  boolMat;
+
+// CHECK:       [[boolMat:%\d+]] = OpLoad %_arr_v3bool_uint_2 %boolMat
+// CHECK-NEXT: [[boolMat0:%\d+]] = OpCompositeExtract %v3bool [[boolMat]] 0
+// CHECK-NEXT:  [[intMat0:%\d+]] = OpSelect %v3int [[boolMat0]] [[v3i1]] [[v3i0]]
+// CHECK-NEXT: [[boolMat1:%\d+]] = OpCompositeExtract %v3bool [[boolMat]] 1
+// CHECK-NEXT:  [[intMat1:%\d+]] = OpSelect %v3int [[boolMat1]] [[v3i1]] [[v3i0]]
+// CHECK-NEXT:          {{%\d+}} = OpCompositeConstruct %_arr_v3int_uint_2 [[intMat0]] [[intMat1]]
+    intMat = boolMat;
+// CHECK:       [[uintMat:%\d+]] = OpLoad %_arr_v3uint_uint_2 %uintMat
+// CHECK-NEXT: [[uintMat0:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 0
+// CHECK-NEXT:  [[intMat0:%\d+]] = OpBitcast %v3int [[uintMat0]]
+// CHECK-NEXT: [[uintMat1:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 1
+// CHECK-NEXT:  [[intMat1:%\d+]] = OpBitcast %v3int [[uintMat1]]
+// CHECK-NEXT:          {{%\d+}} = OpCompositeConstruct %_arr_v3int_uint_2 [[intMat0]] [[intMat1]]
+    intMat = uintMat;
+// CHECK:       [[floatMat:%\d+]] = OpLoad %mat2v3float %floatMat
+// CHECK-NEXT: [[floatMat0:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 0
+// CHECK-NEXT:   [[intMat0:%\d+]] = OpConvertFToS %v3int [[floatMat0]]
+// CHECK-NEXT: [[floatMat1:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 1
+// CHECK-NEXT:   [[intMat1:%\d+]] = OpConvertFToS %v3int [[floatMat1]]
+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %_arr_v3int_uint_2 [[intMat0]] [[intMat1]]
+    intMat = floatMat;
+}
--- a/tools/clang/test/CodeGenSPIRV/cast.2uint.implicit.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.2uint.implicit.hlsl
@ -2,6 +2,8 @@

 // CHECK: [[v2uint_1_0:%\d+]] = OpConstantComposite %v2uint %uint_1 %uint_0
 // CHECK: [[v3uint_0_2_3:%\d+]] = OpConstantComposite %v3uint %uint_0 %uint_2 %uint_3
+// CHECK: [[v3u1:%\d+]] = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
+// CHECK: [[v3u0:%\d+]] = OpConstantComposite %v3uint %uint_0 %uint_0 %uint_0

 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel
@ -60,4 +62,31 @@ void main() {
 // CHECK-NEXT: [[vc3:%\d+]] = OpConvertFToU %v3uint [[vfrom3]]
 // CHECK-NEXT: OpStore %vi3 [[vc3]]
    vi3 = vfrom3;
+
+    int2x3   intMat;
+    float2x3 floatMat;
+    uint2x3  uintMat;
+    bool2x3  boolMat;
+
+// CHECK:       [[boolMat:%\d+]] = OpLoad %_arr_v3bool_uint_2 %boolMat
+// CHECK-NEXT: [[boolMat0:%\d+]] = OpCompositeExtract %v3bool [[boolMat]] 0
+// CHECK-NEXT: [[uintMat0:%\d+]] = OpSelect %v3uint [[boolMat0]] [[v3u1]] [[v3u0]]
+// CHECK-NEXT: [[boolMat1:%\d+]] = OpCompositeExtract %v3bool [[boolMat]] 1
+// CHECK-NEXT: [[uintMat1:%\d+]] = OpSelect %v3uint [[boolMat1]] [[v3u1]] [[v3u0]]
+// CHECK-NEXT:          {{%\d+}} = OpCompositeConstruct %_arr_v3uint_uint_2 [[uintMat0]] [[uintMat1]]
+    uintMat = boolMat;
+// CHECK:        [[intMat:%\d+]] = OpLoad %_arr_v3int_uint_2 %intMat
+// CHECK-NEXT:  [[intMat0:%\d+]] = OpCompositeExtract %v3int [[intMat]] 0
+// CHECK-NEXT: [[uintMat0:%\d+]] = OpBitcast %v3uint [[intMat0]]
+// CHECK-NEXT:  [[intMat1:%\d+]] = OpCompositeExtract %v3int [[intMat]] 1
+// CHECK-NEXT: [[uintMat1:%\d+]] = OpBitcast %v3uint [[intMat1]]
+// CHECK-NEXT:          {{%\d+}} = OpCompositeConstruct %_arr_v3uint_uint_2 [[uintMat0]] [[uintMat1]]
+    uintMat = intMat;
+// CHECK:       [[floatMat:%\d+]] = OpLoad %mat2v3float %floatMat
+// CHECK-NEXT: [[floatMat0:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 0
+// CHECK-NEXT:  [[uintMat0:%\d+]] = OpConvertFToU %v3uint [[floatMat0]]
+// CHECK-NEXT: [[floatMat1:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 1
+// CHECK-NEXT:  [[uintMat1:%\d+]] = OpConvertFToU %v3uint [[floatMat1]]
+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %_arr_v3uint_uint_2 [[uintMat0]] [[uintMat1]]
+    uintMat = floatMat;
 }
--- a/tools/clang/test/CodeGenSPIRV/cast.flat-conversion.implicit.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.flat-conversion.implicit.hlsl
@ -7,6 +7,8 @@ struct VSOutput {
  bool     mybool[2]  : MYBOOL;
  int      arr[5]     : MYARRAY;
  float2x3 mat2x3     : MYMATRIX;
+  int2x3   intmat     : MYINTMATRIX;
+  bool2x3  boolmat    : MYBOOLMATRIX;
 };


@ -34,7 +36,12 @@ void main() {
 // CHECK-NEXT:         [[f1_1:%\d+]] = OpConvertSToF %float %int_1
 // CHECK-NEXT:         [[col3:%\d+]] = OpCompositeConstruct %v3float [[f1_1]] [[f1_1]] [[f1_1]]
 // CHECK-NEXT:    [[matFloat1:%\d+]] = OpCompositeConstruct %mat2v3float [[col3]] [[col3]]
-// CHECK-NEXT: [[flatConvert1:%\d+]] = OpCompositeConstruct %VSOutput [[v4f1]] [[v3u1]] [[v2i1]] [[arr2bool1]] [[arr5i1]] [[matFloat1]]
+// CHECK-NEXT:         [[v3i1:%\d+]] = OpCompositeConstruct %v3int %int_1 %int_1 %int_1
+// CHECK-NEXT:       [[intmat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[v3i1]] [[v3i1]]
+// CHECK-NEXT:         [[true:%\d+]] = OpINotEqual %bool %int_1 %int_0
+// CHECK-NEXT:      [[boolvec:%\d+]] = OpCompositeConstruct %v3bool [[true]] [[true]] [[true]]
+// CHECK-NEXT:      [[boolmat:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolvec]] [[boolvec]]
+// CHECK-NEXT: [[flatConvert1:%\d+]] = OpCompositeConstruct %VSOutput [[v4f1]] [[v3u1]] [[v2i1]] [[arr2bool1]] [[arr5i1]] [[matFloat1]] [[intmat]] [[boolmat]]
 // CHECK-NEXT:                         OpStore %output4 [[flatConvert1]]
  VSOutput output4 = (VSOutput)1;

@ -50,7 +57,12 @@ void main() {
 // CHECK-NEXT:      [[floatX2:%\d+]] = OpConvertSToF %float [[x]]
 // CHECK-NEXT:         [[v3fX:%\d+]] = OpCompositeConstruct %v3float [[floatX2]] [[floatX2]] [[floatX2]]
 // CHECK-NEXT:    [[matFloatX:%\d+]] = OpCompositeConstruct %mat2v3float [[v3fX]] [[v3fX]]
-// CHECK-NEXT: [[flatConvert2:%\d+]] = OpCompositeConstruct %VSOutput [[v4fX]] [[v3uX]] [[v2iX]] [[arr2boolX]] [[arr5iX]] [[matFloatX]]
+// CHECK-NEXT:       [[intvec:%\d+]] = OpCompositeConstruct %v3int [[x]] [[x]] [[x]]
+// CHECK-NEXT:       [[intmat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[intvec]] [[intvec]]
+// CHECK-NEXT:        [[boolx:%\d+]] = OpINotEqual %bool [[x]] %int_0
+// CHECK-NEXT:      [[boolvec:%\d+]] = OpCompositeConstruct %v3bool [[boolx]] [[boolx]] [[boolx]]
+// CHECK-NEXT:      [[boolmat:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolvec]] [[boolvec]]
+// CHECK-NEXT: [[flatConvert2:%\d+]] = OpCompositeConstruct %VSOutput [[v4fX]] [[v3uX]] [[v2iX]] [[arr2boolX]] [[arr5iX]] [[matFloatX]] [[intmat]] [[boolmat]]
 // CHECK-NEXT:                         OpStore %output5 [[flatConvert2]]
  VSOutput output5 = (VSOutput)x;

@ -65,7 +77,13 @@ void main() {
 // CHECK-NEXT:     [[arr5i1_5:%\d+]] = OpCompositeConstruct %_arr_int_uint_5 [[i1_5]] [[i1_5]] [[i1_5]] [[i1_5]] [[i1_5]]
 // CHECK-NEXT:      [[v3f_1_5:%\d+]] = OpCompositeConstruct %v3float %float_1_5 %float_1_5 %float_1_5
 // CHECK-NEXT: [[matFloat_1_5:%\d+]] = OpCompositeConstruct %mat2v3float [[v3f_1_5]] [[v3f_1_5]]
-// CHECK-NEXT:              {{%\d+}} = OpCompositeConstruct %VSOutput [[v4f1_5]] [[v3u1_5]] [[v2i1_5]] [[arr2bool_1_5]] [[arr5i1_5]] [[matFloat_1_5]]
+// CHECK-NEXT:      [[int_1_5:%\d+]] = OpConvertFToS %int %float_1_5
+// CHECK-NEXT:       [[intvec:%\d+]] = OpCompositeConstruct %v3int [[int_1_5]] [[int_1_5]] [[int_1_5]]
+// CHECK-NEXT:       [[intmat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[intvec]] [[intvec]]
+// CHECK-NEXT:     [[bool_1_5:%\d+]] = OpFOrdNotEqual %bool %float_1_5 %float_0
+// CHECK-NEXT:      [[boolvec:%\d+]] = OpCompositeConstruct %v3bool [[bool_1_5]] [[bool_1_5]] [[bool_1_5]]
+// CHECK-NEXT:      [[boolmat:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolvec]] [[boolvec]]
+// CHECK-NEXT:              {{%\d+}} = OpCompositeConstruct %VSOutput [[v4f1_5]] [[v3u1_5]] [[v2i1_5]] [[arr2bool_1_5]] [[arr5i1_5]] [[matFloat_1_5]] [[intmat]] [[boolmat]]
  VSOutput output6 = (VSOutput)1.5;

 // CHECK:      [[float_true:%\d+]] = OpSelect %float %true %float_1 %float_0
@ -80,7 +98,12 @@ void main() {
 // CHECK-NEXT: [[float_true:%\d+]] = OpSelect %float %true %float_1 %float_0
 // CHECK-NEXT:   [[v3f_true:%\d+]] = OpCompositeConstruct %v3float [[float_true]] [[float_true]] [[float_true]]
 // CHECK-NEXT:[[mat2v3_true:%\d+]] = OpCompositeConstruct %mat2v3float [[v3f_true]] [[v3f_true]]
-// CHECK-NEXT:            {{%\d+}} = OpCompositeConstruct %VSOutput [[v4f_true]] [[v3u_true]] [[v2i_true]] [[arr2_true]] [[arr5i_true]] [[mat2v3_true]]
+// CHECK-NEXT:   [[true_int:%\d+]] = OpSelect %int %true %int_1 %int_0
+// CHECK-NEXT:     [[intvec:%\d+]] = OpCompositeConstruct %v3int [[true_int]] [[true_int]] [[true_int]]
+// CHECK-NEXT:     [[intmat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[intvec]] [[intvec]]
+// CHECK-NEXT:    [[boolvec:%\d+]] = OpCompositeConstruct %v3bool %true %true %true
+// CHECK-NEXT:    [[boolmat:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolvec]] [[boolvec]]
+// CHECK-NEXT:            {{%\d+}} = OpCompositeConstruct %VSOutput [[v4f_true]] [[v3u_true]] [[v2i_true]] [[arr2_true]] [[arr5i_true]] [[mat2v3_true]] [[intmat]] [[boolmat]]
  VSOutput output7 = (VSOutput)true;

 }
--- a/tools/clang/test/CodeGenSPIRV/cast.matrix.splat.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.matrix.splat.hlsl
@ -1,9 +1,13 @@
 // Run: %dxc -T vs_6_0 -E main

-// CHECK: [[v2f10_3:%\d+]] = OpConstantComposite %v2float %float_10_3 %float_10_3
-// CHECK: [[v3f10_4:%\d+]] = OpConstantComposite %v3float %float_10_4 %float_10_4 %float_10_4
-// CHECK: [[v2f10_5:%\d+]] = OpConstantComposite %v2float %float_10_5 %float_10_5
-// CHECK: [[m3v2f10_5:%\d+]] = OpConstantComposite %mat3v2float [[v2f10_5]] [[v2f10_5]] [[v2f10_5]]
+// CHECK:      [[v2f10_3:%\d+]] = OpConstantComposite %v2float %float_10_3 %float_10_3
+// CHECK:      [[v3f10_4:%\d+]] = OpConstantComposite %v3float %float_10_4 %float_10_4 %float_10_4
+// CHECK:      [[v2f10_5:%\d+]] = OpConstantComposite %v2float %float_10_5 %float_10_5
+// CHECK:    [[m3v2f10_5:%\d+]] = OpConstantComposite %mat3v2float [[v2f10_5]] [[v2f10_5]] [[v2f10_5]]
+// CHECK:        [[v2i10:%\d+]] = OpConstantComposite %v2int %int_10 %int_10
+// CHECK:   [[int3x2_i10:%\d+]] = OpConstantComposite %_arr_v2int_uint_3 [[v2i10]] [[v2i10]] [[v2i10]]
+// CHECK:       [[v2true:%\d+]] = OpConstantComposite %v2bool %true %true
+// CHECK: [[bool3x2_true:%\d+]] = OpConstantComposite %_arr_v2bool_uint_3 [[v2true]] [[v2true]] [[v2true]]

 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel
@ -20,6 +24,10 @@ void main() {
    float3x1 c = 10.4;
 // CHECK-NEXT: OpStore %d [[m3v2f10_5]]
    float3x2 d = 10.5;
+// CHECK-NEXT: OpStore %e [[int3x2_i10]]
+      int3x2 e = 10;
+// CHECK-NEXT: OpStore %f [[bool3x2_true]]
+     bool3x2 f = true;

    float val;
 // CHECK-NEXT: [[val0:%\d+]] = OpLoad %float %val
@ -41,4 +49,38 @@ void main() {
 // CHECK-NEXT: [[cc3:%\d+]] = OpCompositeConstruct %mat2v3float [[cc2]] [[cc2]]
 // CHECK-NEXT: OpStore %k [[cc3]]
    k = val;
+
+    int intVal;
+// CHECK:      [[intVal:%\d+]] = OpLoad %int %intVal
+// CHECK-NEXT:    [[cc4:%\d+]] = OpCompositeConstruct %v3int [[intVal]] [[intVal]] [[intVal]]
+// CHECK-NEXT: OpStore %m [[cc4]]
+    int1x3 m = intVal;
+    int2x1 n;
+    int2x3 o;
+// CHECK:      [[intVal:%\d+]] = OpLoad %int %intVal
+// CHECK-NEXT:    [[cc5:%\d+]] = OpCompositeConstruct %v2int [[intVal]] [[intVal]]
+// CHECK-NEXT: OpStore %n [[cc5]]
+    n = intVal;
+// CHECK:        [[intVal:%\d+]] = OpLoad %int %intVal
+// CHECK-NEXT: [[v3intVal:%\d+]] = OpCompositeConstruct %v3int [[intVal]] [[intVal]] [[intVal]]
+// CHECK-NEXT:      [[cc6:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[v3intVal]] [[v3intVal]]
+// CHECK-NEXT: OpStore %o [[cc6]]
+    o = intVal;
+
+    bool boolVal;
+// CHECK:      [[boolVal:%\d+]] = OpLoad %bool %boolVal
+// CHECK-NEXT:     [[cc7:%\d+]] = OpCompositeConstruct %v3bool [[boolVal]] [[boolVal]] [[boolVal]]
+// CHECK-NEXT: OpStore %p [[cc7]]
+    bool1x3 p = boolVal;
+    bool2x1 q;
+    bool2x3 r;
+// CHECK:      [[boolVal:%\d+]] = OpLoad %bool %boolVal
+// CHECK-NEXT:     [[cc8:%\d+]] = OpCompositeConstruct %v2bool [[boolVal]] [[boolVal]]
+// CHECK-NEXT: OpStore %q [[cc8]]
+    q = boolVal;
+// CHECK:        [[boolVal:%\d+]] = OpLoad %bool %boolVal
+// CHECK-NEXT: [[v3boolVal:%\d+]] = OpCompositeConstruct %v3bool [[boolVal]] [[boolVal]] [[boolVal]]
+// CHECK-NEXT:       [[cc9:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[v3boolVal]] [[v3boolVal]]
+// CHECK-NEXT: OpStore %r [[cc9]]
+    r = boolVal;
 }
--- a/tools/clang/test/CodeGenSPIRV/cast.matrix.trunc.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.matrix.trunc.hlsl
@ -73,4 +73,59 @@ void main() {
 // CHECK:      [[o:%\d+]] = OpLoad %v3float %o
 // CHECK-NEXT:   {{%\d+}} = OpVectorShuffle %v2float [[o]] [[o]] 0 1
  float2x1 g = (float2x1)o;
+
+  // Non-floating point matrices
+  int3x4 h;
+  int2x3 i;
+  int3x1 j;
+  int1x4 k;
+// CHECK:       [[h:%\d+]] = OpLoad %_arr_v4int_uint_3 %h
+// CHECK-NEXT: [[h0:%\d+]] = OpCompositeExtract %v4int [[h]] 0
+// CHECK-NEXT: [[i0:%\d+]] = OpVectorShuffle %v3int [[h0]] [[h0]] 0 1 2
+// CHECK-NEXT: [[h1:%\d+]] = OpCompositeExtract %v4int [[h]] 1
+// CHECK-NEXT: [[i1:%\d+]] = OpVectorShuffle %v3int [[h1]] [[h1]] 0 1 2
+// CHECK-NEXT:  [[i:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[i0]] [[i1]]
+// CHECK-NEXT:               OpStore %i [[i]]
+  i = (int2x3)h;
+// CHECK:         [[h:%\d+]] = OpLoad %_arr_v4int_uint_3 %h
+// CHECK-NEXT:   [[h0:%\d+]] = OpCompositeExtract %v4int [[h]] 0
+// CHECK-NEXT: [[h0e0:%\d+]] = OpCompositeExtract %int [[h0]] 0
+// CHECK-NEXT:   [[h1:%\d+]] = OpCompositeExtract %v4int [[h]] 1
+// CHECK-NEXT: [[h1e0:%\d+]] = OpCompositeExtract %int [[h1]] 0
+// CHECK-NEXT:   [[h2:%\d+]] = OpCompositeExtract %v4int [[h]] 2
+// CHECK-NEXT: [[h2e0:%\d+]] = OpCompositeExtract %int [[h2]] 0
+// CHECK-NEXT:    [[j:%\d+]] = OpCompositeConstruct %v3int [[h0e0]] [[h1e0]] [[h2e0]]
+// CHECK-NEXT:                 OpStore %j [[j]]
+  j = (int3x1)h;
+// CHECK:       [[h:%\d+]] = OpLoad %_arr_v4int_uint_3 %h
+// CHECK-NEXT: [[h0:%\d+]] = OpCompositeExtract %v4int [[h]] 0
+// CHECK-NEXT:               OpStore %k [[h0]]
+  k = (int1x4)h;
+
+  bool3x4 p;
+  bool2x3 q;
+  bool3x1 r;
+  bool1x4 s;
+// CHECK:       [[p:%\d+]] = OpLoad %_arr_v4bool_uint_3 %p
+// CHECK-NEXT: [[p0:%\d+]] = OpCompositeExtract %v4bool [[p]] 0
+// CHECK-NEXT: [[q0:%\d+]] = OpVectorShuffle %v3bool [[p0]] [[p0]] 0 1 2
+// CHECK-NEXT: [[p1:%\d+]] = OpCompositeExtract %v4bool [[p]] 1
+// CHECK-NEXT: [[q1:%\d+]] = OpVectorShuffle %v3bool [[p1]] [[p1]] 0 1 2
+// CHECK-NEXT:  [[q:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[q0]] [[q1]]
+// CHECK-NEXT:               OpStore %q [[q]]
+  q = (bool2x3)p;
+// CHECK:         [[p:%\d+]] = OpLoad %_arr_v4bool_uint_3 %p
+// CHECK-NEXT:   [[p0:%\d+]] = OpCompositeExtract %v4bool [[p]] 0
+// CHECK-NEXT: [[p0e0:%\d+]] = OpCompositeExtract %bool [[p0]] 0
+// CHECK-NEXT:   [[p1:%\d+]] = OpCompositeExtract %v4bool [[p]] 1
+// CHECK-NEXT: [[p1e0:%\d+]] = OpCompositeExtract %bool [[p1]] 0
+// CHECK-NEXT:   [[p2:%\d+]] = OpCompositeExtract %v4bool [[p]] 2
+// CHECK-NEXT: [[p2e0:%\d+]] = OpCompositeExtract %bool [[p2]] 0
+// CHECK-NEXT:    [[r:%\d+]] = OpCompositeConstruct %v3bool [[p0e0]] [[p1e0]] [[p2e0]]
+// CHECK-NEXT:                 OpStore %r [[r]]
+  r = (bool3x1)p;
+// CHECK:       [[p:%\d+]] = OpLoad %_arr_v4bool_uint_3 %p
+// CHECK-NEXT: [[p0:%\d+]] = OpCompositeExtract %v4bool [[p]] 0
+// CHECK-NEXT:               OpStore %s [[p0]]
+  s = (bool1x4)p;
 }
--- a/tools/clang/test/CodeGenSPIRV/cast.vec-to-mat.explicit.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.vec-to-mat.explicit.hlsl
@ -22,5 +22,12 @@ float4 main(float4 input : A) : SV_Target {
 // CHECK-NEXT:                 OpStore %mat3 [[mat]]
    float2x2 mat3 = (column_major float2x2)input;

+// CHECK:         [[a:%\d+]] = OpLoad %v4int %a
+// CHECK-NEXT: [[vec1:%\d+]] = OpVectorShuffle %v2int [[a]] [[a]] 0 1
+// CHECK-NEXT: [[vec2:%\d+]] = OpVectorShuffle %v2int [[a]] [[a]] 2 3
+// CHECK-NEXT:      {{%\d+}} = OpCompositeConstruct %_arr_v2int_uint_2 [[vec1]] [[vec2]]
+    int4 a;
+    int2x2 b = a;
+
    return float4(mat1[0][0], mat2[0][1], mat3[1][0], mat1[1][1]);
 }
--- a/tools/clang/test/CodeGenSPIRV/constant.matrix.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/constant.matrix.hlsl
@ -19,4 +19,10 @@ void main() {
 // CHECK-NEXT: [[d:%\d+]] = OpCompositeConstruct %mat2v3float [[d0]] [[d1]]
 // CHECK-NEXT: OpStore %d [[d]]
    float2x3 d = float2x3(6., 7., 8., 9., 10., 11.);
+
+// CHECK-NEXT: [[e0:%\d+]] = OpCompositeConstruct %v3int %int_6 %int_7 %int_8
+// CHECK-NEXT: [[e1:%\d+]] = OpCompositeConstruct %v3int %int_9 %int_10 %int_11
+// CHECK-NEXT: [[e:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[e0]] [[e1]]
+// CHECK-NEXT: OpStore %e [[e]]
+    int2x3 e = int2x3(6, 7, 8, 9, 10, 11);
 }
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.all.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.all.hlsl
@ -9,6 +9,7 @@
 // CHECK: [[v4float_0:%\d+]] = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
 // CHECK: [[v3float_0:%\d+]] = OpConstantComposite %v3float %float_0 %float_0 %float_0
 // CHECK: [[v2float_0:%\d+]] = OpConstantComposite %v2float %float_0 %float_0
+// CHECK:   [[v3int_0:%\d+]] = OpConstantComposite %v3int %int_0 %int_0 %int_0

 void main() {
    bool result;
@ -121,4 +122,16 @@ void main() {
    // CHECK-NEXT: OpStore %result [[all_mat3x4]]
    float3x4 p;
    result = all(p);
+
+// CHECK:              [[q:%\d+]] = OpLoad %_arr_v3int_uint_2 %q
+// CHECK-NEXT:      [[row0:%\d+]] = OpCompositeExtract %v3int [[q]] 0
+// CHECK-NEXT: [[row0_bool:%\d+]] = OpINotEqual %v3bool [[row0]] [[v3int_0]]
+// CHECK-NEXT:  [[row0_all:%\d+]] = OpAll %bool [[row0_bool]]
+// CHECK-NEXT:      [[row1:%\d+]] = OpCompositeExtract %v3int [[q]] 1
+// CHECK-NEXT: [[row1_bool:%\d+]] = OpINotEqual %v3bool [[row1]] [[v3int_0]]
+// CHECK-NEXT:  [[row1_all:%\d+]] = OpAll %bool [[row1_bool]]
+// CHECK-NEXT:  [[all_rows:%\d+]] = OpCompositeConstruct %v2bool [[row0_all]] [[row1_all]]
+// CHECK-NEXT:           {{%\d+}} = OpAll %bool [[all_rows]]
+    int2x3 q;
+    result = all(q);
 }
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.asfloat.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.asfloat.hlsl
@ -83,4 +83,24 @@ void main() {
    // CHECK-NEXT: OpStore %result2x3 [[m]]
    float2x3 m;
    result2x3 = asfloat(m);
+
+    int2x3 n;
+    uint2x3 o;
+
+// CHECK:           [[n:%\d+]] = OpLoad %_arr_v3int_uint_2 %n
+// CHECK-NEXT:     [[n0:%\d+]] = OpCompositeExtract %v3int [[n]] 0
+// CHECK-NEXT:   [[row0:%\d+]] = OpBitcast %v3float [[n0]]
+// CHECK-NEXT:     [[n1:%\d+]] = OpCompositeExtract %v3int [[n]] 1
+// CHECK-NEXT:   [[row1:%\d+]] = OpBitcast %v3float [[n1]]
+// CHECK-NEXT: [[result:%\d+]] = OpCompositeConstruct %mat2v3float [[row0]] [[row1]]
+// CHECK-NEXT:                   OpStore %result2x3 [[result]]
+    result2x3 = asfloat(n);
+// CHECK:           [[o:%\d+]] = OpLoad %_arr_v3uint_uint_2 %o
+// CHECK-NEXT:     [[o0:%\d+]] = OpCompositeExtract %v3uint [[o]] 0
+// CHECK-NEXT:   [[row0:%\d+]] = OpBitcast %v3float [[o0]]
+// CHECK-NEXT:     [[o1:%\d+]] = OpCompositeExtract %v3uint [[o]] 1
+// CHECK-NEXT:   [[row1:%\d+]] = OpBitcast %v3float [[o1]]
+// CHECK-NEXT: [[result:%\d+]] = OpCompositeConstruct %mat2v3float [[row0]] [[row1]]
+// CHECK-NEXT:                   OpStore %result2x3 [[result]]
+    result2x3 = asfloat(o);
 }
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.asint.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.asint.hlsl
@ -43,4 +43,24 @@ void main() {
    // CHECK-NEXT: OpStore %result4 [[i_as_int]]
    float4 i;
    result4 = asint(i);
+
+    float2x3 floatMat;
+    uint2x3 uintMat;
+
+// CHECK:       [[floatMat:%\d+]] = OpLoad %mat2v3float %floatMat
+// CHECK-NEXT: [[floatMat0:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 0
+// CHECK-NEXT:      [[row0:%\d+]] = OpBitcast %v3int [[floatMat0]]
+// CHECK-NEXT: [[floatMat1:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 1
+// CHECK-NEXT:      [[row1:%\d+]] = OpBitcast %v3int [[floatMat1]]
+// CHECK-NEXT:         [[j:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[row0]] [[row1]]
+// CHECK-NEXT:                      OpStore %j [[j]]
+    int2x3 j = asint(floatMat);
+// CHECK:       [[uintMat:%\d+]] = OpLoad %_arr_v3uint_uint_2 %uintMat
+// CHECK-NEXT: [[uintMat0:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 0
+// CHECK-NEXT:     [[row0:%\d+]] = OpBitcast %v3int [[uintMat0]]
+// CHECK-NEXT: [[uintMat1:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 1
+// CHECK-NEXT:     [[row1:%\d+]] = OpBitcast %v3int [[uintMat1]]
+// CHECK-NEXT:        [[k:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[row0]] [[row1]]
+// CHECK-NEXT:                     OpStore %k [[k]]
+    int2x3 k = asint(uintMat);
 }
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.asuint.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.asuint.hlsl
@ -53,6 +53,26 @@ void main() {
    float4 i;
    result4 = asuint(i);

+    float2x3 floatMat;
+    int2x3 intMat;
+    
+// CHECK:       [[floatMat:%\d+]] = OpLoad %mat2v3float %floatMat
+// CHECK-NEXT: [[floatMat0:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 0
+// CHECK-NEXT:      [[row0:%\d+]] = OpBitcast %v3uint [[floatMat0]]
+// CHECK-NEXT: [[floatMat1:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 1
+// CHECK-NEXT:      [[row1:%\d+]] = OpBitcast %v3uint [[floatMat1]]
+// CHECK-NEXT:         [[j:%\d+]] = OpCompositeConstruct %_arr_v3uint_uint_2 [[row0]] [[row1]]
+// CHECK-NEXT:                      OpStore %j [[j]]
+    uint2x3 j = asuint(floatMat);
+// CHECK:       [[intMat:%\d+]] = OpLoad %_arr_v3int_uint_2 %intMat
+// CHECK-NEXT: [[intMat0:%\d+]] = OpCompositeExtract %v3int [[intMat]] 0
+// CHECK-NEXT:    [[row0:%\d+]] = OpBitcast %v3uint [[intMat0]]
+// CHECK-NEXT: [[intMat1:%\d+]] = OpCompositeExtract %v3int [[intMat]] 1
+// CHECK-NEXT:    [[row1:%\d+]] = OpBitcast %v3uint [[intMat1]]
+// CHECK-NEXT:       [[k:%\d+]] = OpCompositeConstruct %_arr_v3uint_uint_2 [[row0]] [[row1]]
+// CHECK-NEXT:                    OpStore %k [[k]]
+    uint2x3 k = asuint(intMat);
+
    double value;
    uint lowbits;
    uint highbits;
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.modf.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.modf.hlsl
@ -26,6 +26,8 @@ void main() {
  uint     a, ip_a, frac_a;
  int4     b, ip_b, frac_b;
  float2x3 c, ip_c, frac_c;
+  float2x3 d;
+  int2x3   frac_d, ip_d;

 // CHECK:                 [[a:%\d+]] = OpLoad %uint %a
 // CHECK-NEXT:           [[af:%\d+]] = OpConvertUToF %float [[a]]
@ -63,4 +65,29 @@ void main() {
 // CHECK-NEXT:            [[frac_c:%\d+]] = OpCompositeConstruct %mat2v3float [[frac_c_row0]] [[frac_c_row1]]
 // CHECK-NEXT:                              OpStore %frac_c [[frac_c]]
  frac_c = modf(c, ip_c);
+
+// CHECK:                       [[d:%\d+]] = OpLoad %mat2v3float %d
+// CHECK-NEXT:             [[d_row0:%\d+]] = OpCompositeExtract %v3float [[d]] 0
+// CHECK-NEXT: [[modf_struct_d_row0:%\d+]] = OpExtInst %ModfStructType_1 [[glsl]] ModfStruct [[d_row0]]
+// CHECK-NEXT:          [[ip_d_row0:%\d+]] = OpCompositeExtract %v3float [[modf_struct_d_row0]] 1
+// CHECK-NEXT:        [[frac_d_row0:%\d+]] = OpCompositeExtract %v3float [[modf_struct_d_row0]] 0
+// CHECK-NEXT:             [[d_row1:%\d+]] = OpCompositeExtract %v3float [[d]] 1
+// CHECK-NEXT: [[modf_struct_d_row1:%\d+]] = OpExtInst %ModfStructType_1 [[glsl]] ModfStruct [[d_row1]]
+// CHECK-NEXT:          [[ip_d_row1:%\d+]] = OpCompositeExtract %v3float [[modf_struct_d_row1]] 1
+// CHECK-NEXT:        [[frac_d_row1:%\d+]] = OpCompositeExtract %v3float [[modf_struct_d_row1]] 0
+// CHECK-NEXT:       [[ip_float_mat:%\d+]] = OpCompositeConstruct %mat2v3float [[ip_d_row0]] [[ip_d_row1]]
+// CHECK-NEXT:  [[ip_float_mat_row0:%\d+]] = OpCompositeExtract %v3float [[ip_float_mat]] 0
+// CHECK-NEXT:    [[ip_int_mat_row0:%\d+]] = OpConvertFToS %v3int [[ip_float_mat_row0]]
+// CHECK-NEXT:  [[ip_float_mat_row1:%\d+]] = OpCompositeExtract %v3float [[ip_float_mat]] 1
+// CHECK-NEXT:    [[ip_int_mat_row1:%\d+]] = OpConvertFToS %v3int [[ip_float_mat_row1]]
+// CHECK-NEXT:         [[ip_int_mat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[ip_int_mat_row0]] [[ip_int_mat_row1]]
+// CHECK-NEXT:                               OpStore %ip_d [[ip_int_mat]]
+// CHECK-NEXT:     [[frac_float_mat:%\d+]] = OpCompositeConstruct %mat2v3float [[frac_d_row0]] [[frac_d_row1]]
+// CHECK-NEXT:[[frac_float_mat_row0:%\d+]] = OpCompositeExtract %v3float [[frac_float_mat]] 0
+// CHECK-NEXT:  [[frac_int_mat_row0:%\d+]] = OpConvertFToS %v3int [[frac_float_mat_row0]]
+// CHECK-NEXT:[[frac_float_mat_row1:%\d+]] = OpCompositeExtract %v3float [[frac_float_mat]] 1
+// CHECK-NEXT:  [[frac_int_mat_row1:%\d+]] = OpConvertFToS %v3int [[frac_float_mat_row1]]
+// CHECK-NEXT:       [[frac_int_mat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[frac_int_mat_row0]] [[frac_int_mat_row1]]
+// CHECK-NEXT:                               OpStore %frac_d [[frac_int_mat]]
+  frac_d = modf(d, ip_d);
 }
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.mul.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.mul.hlsl
@ -76,7 +76,7 @@ void main() {
 // CHECK-NEXT: [[c_splat1:%\d+]] = OpCompositeConstruct %v4int [[int_c1]] [[int_c1]] [[int_c1]] [[int_c1]]
 // CHECK-NEXT: {{%\d+}} = OpIMul %v4int [[int4_d1]] [[c_splat1]]
  int4 int_vectorMulScalar = mul(int4_d,int_c);
-  
+
  float e;
  float3x4 f;

@ -84,7 +84,7 @@ void main() {
 // CHECK-NEXT: [[f:%\d+]] = OpLoad %mat3v4float %f
 // CHECK-NEXT: {{%\d+}} = OpMatrixTimesScalar %mat3v4float [[f]] [[e]]
  float3x4 scalarMulMatrix = mul(e,f);
-  
+
 // CHECK:      [[f1:%\d+]] = OpLoad %mat3v4float %f
 // CHECK-NEXT: [[e1:%\d+]] = OpLoad %float %e
 // CHECK-NEXT: {{%\d+}} = OpMatrixTimesScalar %mat3v4float [[f1]] [[e1]]
@ -139,4 +139,302 @@ void main() {
 // CHECK-NEXT: [[n:%\d+]] = OpLoad %mat4v2float %n
 // CHECK-NEXT: {{%\d+}} = OpMatrixTimesMatrix %mat3v2float [[n]] [[m]]
  float3x2 matrixMulMatrix = mul(m,n);
+
+///////////////////////////////////////
+/// Non-floating point matrix cases ///
+///////////////////////////////////////
+
+  uint  uintScalar;
+  int   intScalar;
+  float floatScalar;
+
+  // Scalar * Matrix
+// CHECK:        [[intScalar:%\d+]] = OpLoad %int %intScalar
+// CHECK-NEXT:      [[intMat:%\d+]] = OpLoad %_arr_v3int_uint_2 %intMat2x3
+// CHECK-NEXT: [[v3intScalar:%\d+]] = OpCompositeConstruct %v3int [[intScalar]] [[intScalar]] [[intScalar]]
+// CHECK-NEXT:     [[intMat0:%\d+]] = OpCompositeExtract %v3int [[intMat]] 0
+// CHECK-NEXT:        [[mul0:%\d+]] = OpIMul %v3int [[intMat0]] [[v3intScalar]]
+// CHECK-NEXT:     [[intMat1:%\d+]] = OpCompositeExtract %v3int [[intMat]] 1
+// CHECK-NEXT:        [[mul1:%\d+]] = OpIMul %v3int [[intMat1]] [[v3intScalar]]
+// CHECK-NEXT:             {{%\d+}} = OpCompositeConstruct %_arr_v3int_uint_2 [[mul0]] [[mul1]]
+  int2x3   intMat2x3;
+  int2x3 o = mul(intScalar, intMat2x3);
+
+  // Matrix * Scalar
+// CHECK:           [[uintMat:%\d+]] = OpLoad %_arr_v3uint_uint_2 %uintMat2x3
+// CHECK-NEXT:   [[uintScalar:%\d+]] = OpLoad %uint %uintScalar
+// CHECK-NEXT: [[v3uintScalar:%\d+]] = OpCompositeConstruct %v3uint [[uintScalar]] [[uintScalar]] [[uintScalar]]
+// CHECK-NEXT:     [[uintMat0:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 0
+// CHECK-NEXT:         [[mul0:%\d+]] = OpIMul %v3uint [[uintMat0]] [[v3uintScalar]]
+// CHECK-NEXT:     [[uintMat1:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 1
+// CHECK-NEXT:         [[mul1:%\d+]] = OpIMul %v3uint [[uintMat1]] [[v3uintScalar]]
+// CHECK-NEXT:              {{%\d+}} = OpCompositeConstruct %_arr_v3uint_uint_2 [[mul0]] [[mul1]]
+  uint2x3  uintMat2x3;
+  uint2x3 p = mul(uintMat2x3, uintScalar);
+
+  // Matrix * Scalar (different types)
+  // Casting AST nodes are inserted by the front-end. Mul works same as above.
+// CHECK:           [[intMat:%\d+]] = OpLoad %_arr_v4int_uint_2 %intMat2x4
+// CHECK-NEXT:     [[intMat0:%\d+]] = OpCompositeExtract %v4int [[intMat]] 0
+// CHECK-NEXT:   [[floatMat0:%\d+]] = OpConvertSToF %v4float [[intMat0]]
+// CHECK-NEXT:     [[intMat1:%\d+]] = OpCompositeExtract %v4int [[intMat]] 1
+// CHECK-NEXT:   [[floatMat1:%\d+]] = OpConvertSToF %v4float [[intMat1]]
+// CHECK-NEXT:    [[floatMat:%\d+]] = OpCompositeConstruct %mat2v4float [[floatMat0]] [[floatMat1]]
+// CHECK-NEXT: [[floatScalar:%\d+]] = OpLoad %float %floatScalar
+// CHECK-NEXT:             {{%\d+}} = OpMatrixTimesScalar %mat2v4float [[floatMat]] [[floatScalar]]
+  int2x4 intMat2x4;
+  float2x4 q = mul(intMat2x4, floatScalar);
+
+  // Vector * Matrix
+  // First, we need to get vectors for the columns of the matrix, and then perform
+  // dot product of the vector and the matrix columns.
+// CHECK:               [[intVec:%\d+]] = OpLoad %v2int %intVec2
+// CHECK-NEXT:          [[intMat:%\d+]] = OpLoad %_arr_v3int_uint_2 %intMat2x3
+// CHECK-NEXT:        [[intMat00:%\d+]] = OpCompositeExtract %int [[intMat]] 0 0
+// CHECK-NEXT:        [[intMat01:%\d+]] = OpCompositeExtract %int [[intMat]] 0 1
+// CHECK-NEXT:        [[intMat02:%\d+]] = OpCompositeExtract %int [[intMat]] 0 2
+// CHECK-NEXT:        [[intMat10:%\d+]] = OpCompositeExtract %int [[intMat]] 1 0
+// CHECK-NEXT:        [[intMat11:%\d+]] = OpCompositeExtract %int [[intMat]] 1 1
+// CHECK-NEXT:        [[intMat12:%\d+]] = OpCompositeExtract %int [[intMat]] 1 2
+// CHECK-NEXT:      [[intMatCol0:%\d+]] = OpCompositeConstruct %v2int [[intMat00]] [[intMat10]]
+// CHECK-NEXT:      [[intMatCol1:%\d+]] = OpCompositeConstruct %v2int [[intMat01]] [[intMat11]]
+// CHECK-NEXT:      [[intMatCol2:%\d+]] = OpCompositeConstruct %v2int [[intMat02]] [[intMat12]]
+// CHECK-NEXT: [[intMatTranspose:%\d+]] = OpCompositeConstruct %_arr_v2int_uint_3 [[intMatCol0]] [[intMatCol1]] [[intMatCol2]]
+// CHECK-NEXT:      [[intMatCol0:%\d+]] = OpCompositeExtract %v2int [[intMatTranspose]] 0
+// CHECK-NEXT:         [[intVec0:%\d+]] = OpCompositeExtract %int [[intVec]] 0
+// CHECK-NEXT:     [[intMatCol00:%\d+]] = OpCompositeExtract %int [[intMatCol0]] 0
+// CHECK-NEXT:            [[mul1:%\d+]] = OpIMul %int [[intVec0]] [[intMatCol00]]
+// CHECK-NEXT:         [[intVec1:%\d+]] = OpCompositeExtract %int [[intVec]] 1
+// CHECK-NEXT:     [[intMatCol01:%\d+]] = OpCompositeExtract %int [[intMatCol0]] 1
+// CHECK-NEXT:            [[mul2:%\d+]] = OpIMul %int [[intVec1]] [[intMatCol01]]
+// CHECK-NEXT:              [[r0:%\d+]] = OpIAdd %int [[mul1]] [[mul2]]
+// CHECK-NEXT:      [[intMatCol1:%\d+]] = OpCompositeExtract %v2int [[intMatTranspose]] 1
+// CHECK-NEXT:         [[intVec0:%\d+]] = OpCompositeExtract %int [[intVec]] 0
+// CHECK-NEXT:     [[intMatCol10:%\d+]] = OpCompositeExtract %int [[intMatCol1]] 0
+// CHECK-NEXT:            [[mul3:%\d+]] = OpIMul %int [[intVec0]] [[intMatCol10]]
+// CHECK-NEXT:         [[intVec1:%\d+]] = OpCompositeExtract %int [[intVec]] 1
+// CHECK-NEXT:     [[intMatCol11:%\d+]] = OpCompositeExtract %int [[intMatCol1]] 1
+// CHECK-NEXT:            [[mul4:%\d+]] = OpIMul %int [[intVec1]] [[intMatCol11]]
+// CHECK-NEXT:              [[r1:%\d+]] = OpIAdd %int [[mul3]] [[mul4]]
+// CHECK-NEXT:      [[intMatCol2:%\d+]] = OpCompositeExtract %v2int [[intMatTranspose]] 2
+// CHECK-NEXT:         [[intVec0:%\d+]] = OpCompositeExtract %int [[intVec]] 0
+// CHECK-NEXT:     [[intMatCol20:%\d+]] = OpCompositeExtract %int [[intMatCol2]] 0
+// CHECK-NEXT:            [[mul5:%\d+]] = OpIMul %int [[intVec0]] [[intMatCol20]]
+// CHECK-NEXT:         [[intVec1:%\d+]] = OpCompositeExtract %int [[intVec]] 1
+// CHECK-NEXT:     [[intMatCol21:%\d+]] = OpCompositeExtract %int [[intMatCol2]] 1
+// CHECK-NEXT:            [[mul6:%\d+]] = OpIMul %int [[intVec1]] [[intMatCol21]]
+// CHECK-NEXT:              [[r2:%\d+]] = OpIAdd %int [[mul5]] [[mul6]]
+// CHECK-NEXT:                 {{%\d+}} = OpCompositeConstruct %v3int [[r0]] [[r1]] [[r2]]
+  int2   intVec2;
+  int3 r = mul(intVec2, intMat2x3);
+
+  // Matrix * Vector
+// CHECK:        [[uintMat:%\d+]] = OpLoad %_arr_v2uint_uint_3 %uintMat3x2
+// CHECK-NEXT:   [[uintVec:%\d+]] = OpLoad %v2uint %uintVec2
+// CHECK-NEXT:  [[uintMat0:%\d+]] = OpCompositeExtract %v2uint [[uintMat]] 0
+// CHECK-NEXT: [[uintMat00:%\d+]] = OpCompositeExtract %uint [[uintMat0]] 0
+// CHECK-NEXT:  [[uintVec0:%\d+]] = OpCompositeExtract %uint [[uintVec]] 0
+// CHECK-NEXT:      [[mul1:%\d+]] = OpIMul %uint [[uintMat00]] [[uintVec0]]
+// CHECK-NEXT: [[uintMat01:%\d+]] = OpCompositeExtract %uint [[uintMat0]] 1
+// CHECK-NEXT:  [[uintVec1:%\d+]] = OpCompositeExtract %uint [[uintVec]] 1
+// CHECK-NEXT:      [[mul2:%\d+]] = OpIMul %uint [[uintMat01]] [[uintVec1]]
+// CHECK-NEXT:        [[s0:%\d+]] = OpIAdd %uint [[mul1]] [[mul2]]
+// CHECK-NEXT:  [[uintMat1:%\d+]] = OpCompositeExtract %v2uint [[uintMat]] 1
+// CHECK-NEXT: [[uintMat10:%\d+]] = OpCompositeExtract %uint [[uintMat1]] 0
+// CHECK-NEXT:  [[uintVec0:%\d+]] = OpCompositeExtract %uint [[uintVec]] 0
+// CHECK-NEXT:      [[mul3:%\d+]] = OpIMul %uint [[uintMat10]] [[uintVec0]]
+// CHECK-NEXT: [[uintMat11:%\d+]] = OpCompositeExtract %uint [[uintMat1]] 1
+// CHECK-NEXT:  [[uintVec1:%\d+]] = OpCompositeExtract %uint [[uintVec]] 1
+// CHECK-NEXT:      [[mul4:%\d+]] = OpIMul %uint [[uintMat11]] [[uintVec1]]
+// CHECK-NEXT:        [[s1:%\d+]] = OpIAdd %uint [[mul3]] [[mul4]]
+// CHECK-NEXT:  [[uintMat2:%\d+]] = OpCompositeExtract %v2uint [[uintMat]] 2
+// CHECK-NEXT: [[uintMat20:%\d+]] = OpCompositeExtract %uint [[uintMat2]] 0
+// CHECK-NEXT:  [[uintVec0:%\d+]] = OpCompositeExtract %uint [[uintVec]] 0
+// CHECK-NEXT:      [[mul5:%\d+]] = OpIMul %uint [[uintMat20]] [[uintVec0]]
+// CHECK-NEXT: [[uintMat21:%\d+]] = OpCompositeExtract %uint [[uintMat2]] 1
+// CHECK-NEXT:  [[uintVec1:%\d+]] = OpCompositeExtract %uint [[uintVec]] 1
+// CHECK-NEXT:      [[mul6:%\d+]] = OpIMul %uint [[uintMat21]] [[uintVec1]]
+// CHECK-NEXT:        [[s2:%\d+]] = OpIAdd %uint [[mul5]] [[mul6]]
+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %v3uint [[s0]] [[s1]] [[s2]]
+  uint2     uintVec2;
+  uint3x2   uintMat3x2;
+  uint3 s = mul(uintMat3x2, uintVec2);
+
+  // Matrix * Matrix
+// CHECK:           [[lhs:%\d+]] = OpLoad %_arr_v4int_uint_2 %intMat2x4
+// CHECK-NEXT:      [[rhs:%\d+]] = OpLoad %_arr_v3int_uint_4 %intMat4x3
+
+  ///////////////////////////////////////////
+  /////////// Transpose the rhs /////////////
+  ///////////////////////////////////////////
+// CHECK-NEXT:        [[rhs00:%\d+]] = OpCompositeExtract %int [[rhs]] 0 0
+// CHECK-NEXT:        [[rhs01:%\d+]] = OpCompositeExtract %int [[rhs]] 0 1
+// CHECK-NEXT:        [[rhs02:%\d+]] = OpCompositeExtract %int [[rhs]] 0 2
+// CHECK-NEXT:        [[rhs10:%\d+]] = OpCompositeExtract %int [[rhs]] 1 0
+// CHECK-NEXT:        [[rhs11:%\d+]] = OpCompositeExtract %int [[rhs]] 1 1
+// CHECK-NEXT:        [[rhs12:%\d+]] = OpCompositeExtract %int [[rhs]] 1 2
+// CHECK-NEXT:        [[rhs20:%\d+]] = OpCompositeExtract %int [[rhs]] 2 0
+// CHECK-NEXT:        [[rhs21:%\d+]] = OpCompositeExtract %int [[rhs]] 2 1
+// CHECK-NEXT:        [[rhs22:%\d+]] = OpCompositeExtract %int [[rhs]] 2 2
+// CHECK-NEXT:        [[rhs30:%\d+]] = OpCompositeExtract %int [[rhs]] 3 0
+// CHECK-NEXT:        [[rhs31:%\d+]] = OpCompositeExtract %int [[rhs]] 3 1
+// CHECK-NEXT:        [[rhs32:%\d+]] = OpCompositeExtract %int [[rhs]] 3 2
+// CHECK-NEXT:      [[rhsCol0:%\d+]] = OpCompositeConstruct %v4int [[rhs00]] [[rhs10]] [[rhs20]] [[rhs30]]
+// CHECK-NEXT:      [[rhsCol1:%\d+]] = OpCompositeConstruct %v4int [[rhs01]] [[rhs11]] [[rhs21]] [[rhs31]]
+// CHECK-NEXT:      [[rhsCol2:%\d+]] = OpCompositeConstruct %v4int [[rhs02]] [[rhs12]] [[rhs22]] [[rhs32]]
+// CHECK-NEXT: [[rhsTranspose:%\d+]] = OpCompositeConstruct %_arr_v4int_uint_3 [[rhsCol0]] [[rhsCol1]] [[rhsCol2]]
+  ///////////////////////////////////////////
+  /////////// End: Transpose the rhs ////////
+  ///////////////////////////////////////////
+
+  ///////////////////////////////////////////
+  /////////// LHS Row0 *dot* RHS Col0 ///////
+  ///////////////////////////////////////////
+// CHECK-NEXT:  [[lhsRow0:%\d+]] = OpCompositeExtract %v4int [[lhs]] 0
+// CHECK-NEXT:  [[rhsCol0:%\d+]] = OpCompositeExtract %v4int [[rhsTranspose]] 0
+// CHECK-NEXT: [[lhsRow00:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 0
+// CHECK-NEXT: [[rhsCol00:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 0
+// CHECK-NEXT:     [[mul1:%\d+]] = OpIMul %int [[lhsRow00]] [[rhsCol00]]
+// CHECK-NEXT: [[lhsRow01:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 1
+// CHECK-NEXT: [[rhsCol01:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 1
+// CHECK-NEXT:     [[mul2:%\d+]] = OpIMul %int [[lhsRow01]] [[rhsCol01]]
+// CHECK-NEXT: [[lhsRow02:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 2
+// CHECK-NEXT: [[rhsCol02:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 2
+// CHECK-NEXT:     [[mul3:%\d+]] = OpIMul %int [[lhsRow02]] [[rhsCol02]]
+// CHECK-NEXT: [[lhsRow03:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 3
+// CHECK-NEXT: [[rhsCol03:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 3
+// CHECK-NEXT:     [[mul4:%\d+]] = OpIMul %int [[lhsRow03]] [[rhsCol03]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul1]] [[mul2]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul]] [[mul3]]
+// CHECK-NEXT:      [[t00:%\d+]] = OpIAdd %int [[mul]] [[mul4]]
+  ///////////////////////////////////////////
+  ////// END: LHS Row0 *dot* RHS Col0 ///////
+  ///////////////////////////////////////////
+
+  ///////////////////////////////////////////
+  /////////// LHS Row0 *dot* RHS Col1 ///////
+  ///////////////////////////////////////////
+// CHECK-NEXT:  [[rhsCol1:%\d+]] = OpCompositeExtract %v4int [[rhsTranspose]] 1
+// CHECK-NEXT: [[lhsRow00:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 0
+// CHECK-NEXT: [[rhsCol10:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 0
+// CHECK-NEXT:     [[mul5:%\d+]] = OpIMul %int [[lhsRow00]] [[rhsCol10]]
+// CHECK-NEXT: [[lhsRow01:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 1
+// CHECK-NEXT: [[rhsCol11:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 1
+// CHECK-NEXT:     [[mul6:%\d+]] = OpIMul %int [[lhsRow01]] [[rhsCol11]]
+// CHECK-NEXT: [[lhsRow02:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 2
+// CHECK-NEXT: [[rhsCol12:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 2
+// CHECK-NEXT:     [[mul7:%\d+]] = OpIMul %int [[lhsRow02]] [[rhsCol12]]
+// CHECK-NEXT: [[lhsRow03:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 3
+// CHECK-NEXT: [[rhsCol13:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 3
+// CHECK-NEXT:     [[mul8:%\d+]] = OpIMul %int [[lhsRow03]] [[rhsCol13]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul5]] [[mul6]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul]] [[mul7]]
+// CHECK-NEXT:      [[t01:%\d+]] = OpIAdd %int [[mul]] [[mul8]]
+  ///////////////////////////////////////////
+  ////// END: LHS Row0 *dot* RHS Col1 ///////
+  ///////////////////////////////////////////
+
+  ///////////////////////////////////////////
+  /////////// LHS Row0 *dot* RHS Col2 ///////
+  ///////////////////////////////////////////
+// CHECK-NEXT:  [[rhsCol2:%\d+]] = OpCompositeExtract %v4int [[rhsTranspose]] 2
+// CHECK-NEXT: [[lhsRow00:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 0
+// CHECK-NEXT: [[rhsCol20:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 0
+// CHECK-NEXT:     [[mul9:%\d+]] = OpIMul %int [[lhsRow00]] [[rhsCol20]]
+// CHECK-NEXT: [[lhsRow01:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 1
+// CHECK-NEXT: [[rhsCol21:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 1
+// CHECK-NEXT:    [[mul10:%\d+]] = OpIMul %int [[lhsRow01]] [[rhsCol21]]
+// CHECK-NEXT: [[lhsRow02:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 2
+// CHECK-NEXT: [[rhsCol22:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 2
+// CHECK-NEXT:    [[mul11:%\d+]] = OpIMul %int [[lhsRow02]] [[rhsCol22]]
+// CHECK-NEXT: [[lhsRow03:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 3
+// CHECK-NEXT: [[rhsCol23:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 3
+// CHECK-NEXT:    [[mul12:%\d+]] = OpIMul %int [[lhsRow03]] [[rhsCol23]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul9]] [[mul10]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul]] [[mul11]]
+// CHECK-NEXT:      [[t02:%\d+]] = OpIAdd %int [[mul]] [[mul12]]
+  ///////////////////////////////////////////
+  ////// END: LHS Row0 *dot* RHS Col2 ///////
+  ///////////////////////////////////////////
+
+// Result row 0:
+// CHECK-NEXT: [[t0:%\d+]] = OpCompositeConstruct %v3int [[t00]] [[t01]] [[t02]]
+
+  ///////////////////////////////////////////
+  /////////// LHS Row1 *dot* RHS Col0 ///////
+  ///////////////////////////////////////////
+// CHECK-NEXT:  [[lhsRow1:%\d+]] = OpCompositeExtract %v4int [[lhs]] 1
+// CHECK-NEXT:  [[rhsCol0:%\d+]] = OpCompositeExtract %v4int [[rhsTranspose]] 0
+// CHECK-NEXT: [[lhsRow10:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 0
+// CHECK-NEXT: [[rhsCol00:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 0
+// CHECK-NEXT:     [[mul1:%\d+]] = OpIMul %int [[lhsRow10]] [[rhsCol00]]
+// CHECK-NEXT: [[lhsRow11:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 1
+// CHECK-NEXT: [[rhsCol01:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 1
+// CHECK-NEXT:     [[mul2:%\d+]] = OpIMul %int [[lhsRow11]] [[rhsCol01]]
+// CHECK-NEXT: [[lhsRow12:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 2
+// CHECK-NEXT: [[rhsCol02:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 2
+// CHECK-NEXT:     [[mul3:%\d+]] = OpIMul %int [[lhsRow12]] [[rhsCol02]]
+// CHECK-NEXT: [[lhsRow13:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 3
+// CHECK-NEXT: [[rhsCol03:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 3
+// CHECK-NEXT:     [[mul4:%\d+]] = OpIMul %int [[lhsRow13]] [[rhsCol03]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul1]] [[mul2]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul]] [[mul3]]
+// CHECK-NEXT:      [[t10:%\d+]] = OpIAdd %int [[mul]] [[mul4]]
+  ///////////////////////////////////////////
+  ////// END: LHS Row1 *dot* RHS Col0 ///////
+  ///////////////////////////////////////////
+
+  ///////////////////////////////////////////
+  /////////// LHS Row1 *dot* RHS Col1 ///////
+  ///////////////////////////////////////////
+// CHECK-NEXT:  [[rhsCol1:%\d+]] = OpCompositeExtract %v4int [[rhsTranspose]] 1
+// CHECK-NEXT: [[lhsRow10:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 0
+// CHECK-NEXT: [[rhsCol10:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 0
+// CHECK-NEXT:     [[mul5:%\d+]] = OpIMul %int [[lhsRow10]] [[rhsCol10]]
+// CHECK-NEXT: [[lhsRow11:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 1
+// CHECK-NEXT: [[rhsCol11:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 1
+// CHECK-NEXT:     [[mul6:%\d+]] = OpIMul %int [[lhsRow11]] [[rhsCol11]]
+// CHECK-NEXT: [[lhsRow12:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 2
+// CHECK-NEXT: [[rhsCol12:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 2
+// CHECK-NEXT:     [[mul7:%\d+]] = OpIMul %int [[lhsRow12]] [[rhsCol12]]
+// CHECK-NEXT: [[lhsRow13:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 3
+// CHECK-NEXT: [[rhsCol13:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 3
+// CHECK-NEXT:     [[mul8:%\d+]] = OpIMul %int [[lhsRow13]] [[rhsCol13]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul5]] [[mul6]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul]] [[mul7]]
+// CHECK-NEXT:      [[t11:%\d+]] = OpIAdd %int [[mul]] [[mul8]]
+  ///////////////////////////////////////////
+  ////// END: LHS Row1 *dot* RHS Col1 ///////
+  ///////////////////////////////////////////
+
+  ///////////////////////////////////////////
+  /////////// LHS Row1 *dot* RHS Col2 ///////
+  ///////////////////////////////////////////
+// CHECK-NEXT:  [[rhsCol2:%\d+]] = OpCompositeExtract %v4int [[rhsTranspose]] 2
+// CHECK-NEXT: [[lhsRow10:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 0
+// CHECK-NEXT: [[rhsCol20:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 0
+// CHECK-NEXT:     [[mul9:%\d+]] = OpIMul %int [[lhsRow10]] [[rhsCol20]]
+// CHECK-NEXT: [[lhsRow11:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 1
+// CHECK-NEXT: [[rhsCol21:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 1
+// CHECK-NEXT:    [[mul10:%\d+]] = OpIMul %int [[lhsRow11]] [[rhsCol21]]
+// CHECK-NEXT: [[lhsRow12:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 2
+// CHECK-NEXT: [[rhsCol22:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 2
+// CHECK-NEXT:    [[mul11:%\d+]] = OpIMul %int [[lhsRow12]] [[rhsCol22]]
+// CHECK-NEXT: [[lhsRow13:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 3
+// CHECK-NEXT: [[rhsCol23:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 3
+// CHECK-NEXT:    [[mul12:%\d+]] = OpIMul %int [[lhsRow13]] [[rhsCol23]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul9]] [[mul10]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul]] [[mul11]]
+// CHECK-NEXT:      [[t12:%\d+]] = OpIAdd %int [[mul]] [[mul12]]
+  ///////////////////////////////////////////
+  ////// END: LHS Row1 *dot* RHS Col2 ///////
+  ///////////////////////////////////////////
+
+// Result row 1:
+// CHECK-NEXT: [[t1:%\d+]] = OpCompositeConstruct %v3int [[t10]] [[t11]] [[t12]]
+
+// Final result:
+// CHECK-NEXT:    {{%\d+}} = OpCompositeConstruct %_arr_v3int_uint_2 [[t0]] [[t1]]
+  int4x3 intMat4x3;
+  int2x3 t = mul(intMat2x4, intMat4x3);
 }
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.transpose.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.transpose.hlsl
@ -6,4 +6,59 @@ void main() {
 // CHECK:      [[m:%\d+]] = OpLoad %mat2v3float %m
 // CHECK-NEXT:   {{%\d+}} = OpTranspose %mat3v2float [[m]]
  float3x2 n = transpose(m);
+
+// CHECK:        [[p:%\d+]] = OpLoad %_arr_v3int_uint_2 %p
+// CHECK-NEXT: [[p00:%\d+]] = OpCompositeExtract %int [[p]] 0 0
+// CHECK-NEXT: [[p01:%\d+]] = OpCompositeExtract %int [[p]] 0 1
+// CHECK-NEXT: [[p02:%\d+]] = OpCompositeExtract %int [[p]] 0 2
+// CHECK-NEXT: [[p10:%\d+]] = OpCompositeExtract %int [[p]] 1 0
+// CHECK-NEXT: [[p11:%\d+]] = OpCompositeExtract %int [[p]] 1 1
+// CHECK-NEXT: [[p12:%\d+]] = OpCompositeExtract %int [[p]] 1 2
+// CHECK-NEXT: [[pt0:%\d+]] = OpCompositeConstruct %v2int [[p00]] [[p10]]
+// CHECK-NEXT: [[pt1:%\d+]] = OpCompositeConstruct %v2int [[p01]] [[p11]]
+// CHECK-NEXT: [[pt2:%\d+]] = OpCompositeConstruct %v2int [[p02]] [[p12]]
+// CHECK-NEXT:  [[pt:%\d+]] = OpCompositeConstruct %_arr_v2int_uint_3 [[pt0]] [[pt1]] [[pt2]]
+// CHECK-NEXT:                OpStore %pt [[pt]]
+  int2x3 p;
+  int3x2 pt = transpose(p);
+
+// CHECK:        [[q:%\d+]] = OpLoad %_arr_v2bool_uint_3 %q
+// CHECK-NEXT: [[q00:%\d+]] = OpCompositeExtract %bool [[q]] 0 0
+// CHECK-NEXT: [[q01:%\d+]] = OpCompositeExtract %bool [[q]] 0 1
+// CHECK-NEXT: [[q10:%\d+]] = OpCompositeExtract %bool [[q]] 1 0
+// CHECK-NEXT: [[q11:%\d+]] = OpCompositeExtract %bool [[q]] 1 1
+// CHECK-NEXT: [[q20:%\d+]] = OpCompositeExtract %bool [[q]] 2 0
+// CHECK-NEXT: [[q21:%\d+]] = OpCompositeExtract %bool [[q]] 2 1
+// CHECK-NEXT: [[qt0:%\d+]] = OpCompositeConstruct %v3bool [[q00]] [[q10]] [[q20]]
+// CHECK-NEXT: [[qt1:%\d+]] = OpCompositeConstruct %v3bool [[q01]] [[q11]] [[q21]]
+// CHECK-NEXT:  [[qt:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[qt0]] [[qt1]]
+// CHECK-NEXT:                OpStore %qt [[qt]]
+  bool3x2 q;
+  bool2x3 qt = transpose(q);
+
+// CHECK:         [[r:%\d+]] = OpLoad %_arr_v4uint_uint_4 %r
+// CHECK-NEXT:  [[r00:%\d+]] = OpCompositeExtract %uint [[r]] 0 0
+// CHECK-NEXT:  [[r01:%\d+]] = OpCompositeExtract %uint [[r]] 0 1
+// CHECK-NEXT:  [[r02:%\d+]] = OpCompositeExtract %uint [[r]] 0 2
+// CHECK-NEXT:  [[r03:%\d+]] = OpCompositeExtract %uint [[r]] 0 3
+// CHECK-NEXT:  [[r10:%\d+]] = OpCompositeExtract %uint [[r]] 1 0
+// CHECK-NEXT:  [[r11:%\d+]] = OpCompositeExtract %uint [[r]] 1 1
+// CHECK-NEXT:  [[r12:%\d+]] = OpCompositeExtract %uint [[r]] 1 2
+// CHECK-NEXT:  [[r13:%\d+]] = OpCompositeExtract %uint [[r]] 1 3
+// CHECK-NEXT:  [[r20:%\d+]] = OpCompositeExtract %uint [[r]] 2 0
+// CHECK-NEXT:  [[r21:%\d+]] = OpCompositeExtract %uint [[r]] 2 1
+// CHECK-NEXT:  [[r22:%\d+]] = OpCompositeExtract %uint [[r]] 2 2
+// CHECK-NEXT:  [[r23:%\d+]] = OpCompositeExtract %uint [[r]] 2 3
+// CHECK-NEXT:  [[r30:%\d+]] = OpCompositeExtract %uint [[r]] 3 0
+// CHECK-NEXT:  [[r31:%\d+]] = OpCompositeExtract %uint [[r]] 3 1
+// CHECK-NEXT:  [[r32:%\d+]] = OpCompositeExtract %uint [[r]] 3 2
+// CHECK-NEXT:  [[r33:%\d+]] = OpCompositeExtract %uint [[r]] 3 3
+// CHECK-NEXT:  [[rt0:%\d+]] = OpCompositeConstruct %v4uint [[r00]] [[r10]] [[r20]] [[r30]]
+// CHECK-NEXT:  [[rt1:%\d+]] = OpCompositeConstruct %v4uint [[r01]] [[r11]] [[r21]] [[r31]]
+// CHECK-NEXT:  [[rt2:%\d+]] = OpCompositeConstruct %v4uint [[r02]] [[r12]] [[r22]] [[r32]]
+// CHECK-NEXT:  [[rt3:%\d+]] = OpCompositeConstruct %v4uint [[r03]] [[r13]] [[r23]] [[r33]]
+// CHECK-NEXT:   [[rt:%\d+]] = OpCompositeConstruct %_arr_v4uint_uint_4 [[rt0]] [[rt1]] [[rt2]] [[rt3]]
+// CHECK-NEXT:                 OpStore %rt [[rt]]
+  uint4x4 r;
+  uint4x4 rt = transpose(r);
 }
--- a/tools/clang/test/CodeGenSPIRV/op.matrix.access.mxn.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/op.matrix.access.mxn.hlsl
@ -100,4 +100,102 @@ void main() {
 // CHECK-NEXT: [[load9:%\d+]] = OpLoad %float [[access12]]
 // CHECK-NEXT: OpStore %scalar [[load9]]
    scalar = (mat + mat)[0][index];
+
+// Try non-floating point matrix as they are represented differently (Array of vectors).
+    int2x3 intMat;
+    int3 intVec3;
+    int2 intVec2;
+    int intScalar;
+
+    // 1 element (from lvalue)
+// CHECK:      [[access0:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_1 %int_2
+// CHECK-NEXT: [[load0:%\d+]] = OpLoad %int [[access0]]
+// CHECK-NEXT: OpStore %intScalar [[load0]]
+    intScalar = intMat._m12; // Used as rvalue
+// CHECK-NEXT: [[load1:%\d+]] = OpLoad %int %intScalar
+// CHECK-NEXT: [[access1:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_0 %int_1
+// CHECK-NEXT: OpStore [[access1]] [[load1]]
+    intMat._12 = intScalar; // Used as lvalue
+
+    // >1 elements (from lvalue)
+// CHECK-NEXT: [[access2:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_0 %int_1
+// CHECK-NEXT: [[load2:%\d+]] = OpLoad %int [[access2]]
+// CHECK-NEXT: [[access3:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_0 %int_2
+// CHECK-NEXT: [[load3:%\d+]] = OpLoad %int [[access3]]
+// CHECK-NEXT: [[cc0:%\d+]] = OpCompositeConstruct %v2int [[load2]] [[load3]]
+// CHECK-NEXT: OpStore %intVec2 [[cc0]]
+    intVec2 = intMat._m01_m02; // Used as rvalue
+// CHECK-NEXT: [[rhs0:%\d+]] = OpLoad %v3int %intVec3
+// CHECK-NEXT: [[ce0:%\d+]] = OpCompositeExtract %int [[rhs0]] 0
+// CHECK-NEXT: [[access4:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_1 %int_0
+// CHECK-NEXT: OpStore [[access4]] [[ce0]]
+// CHECK-NEXT: [[ce1:%\d+]] = OpCompositeExtract %int [[rhs0]] 1
+// CHECK-NEXT: [[access5:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_0 %int_1
+// CHECK-NEXT: OpStore [[access5]] [[ce1]]
+// CHECK-NEXT: [[ce2:%\d+]] = OpCompositeExtract %int [[rhs0]] 2
+// CHECK-NEXT: [[access6:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_0 %int_0
+// CHECK-NEXT: OpStore [[access6]] [[ce2]]
+    intMat._21_12_11 = intVec3; // Used as lvalue
+
+    // 1 element (from rvalue)
+// CHECK:      [[cc1:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 {{%\d+}} {{%\d+}}
+// CHECK-NEXT: [[ce3:%\d+]] = OpCompositeExtract %int [[cc1]] 1 2
+// CHECK-NEXT: OpStore %intScalar [[ce3]]
+    // Codegen: construct a temporary matrix first out of (intMat + intMat) and
+    // then extract the value
+    intScalar = (intMat + intMat)._m12;
+
+    // > 1 element (from rvalue)
+// CHECK:      [[cc2:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 {{%\d+}} {{%\d+}}
+// CHECK-NEXT: [[ce4:%\d+]] = OpCompositeExtract %int [[cc2]] 0 1
+// CHECK-NEXT: [[ce5:%\d+]] = OpCompositeExtract %int [[cc2]] 0 2
+// CHECK-NEXT: [[cc3:%\d+]] = OpCompositeConstruct %v2int [[ce4]] [[ce5]]
+// CHECK-NEXT: OpStore %intVec2 [[cc3]]
+    // Codegen: construct a temporary matrix first out of (intMat * intMat) and
+    // then extract the value
+    intVec2 = (intMat * intMat)._m01_m02;
+
+    // One level indexing (from lvalue)
+// CHECK-NEXT: [[access7:%\d+]] = OpAccessChain %_ptr_Function_v3int %intMat %uint_1
+// CHECK-NEXT: [[load4:%\d+]] = OpLoad %v3int [[access7]]
+// CHECK-NEXT: OpStore %intVec3 [[load4]]
+    intVec3 = intMat[1]; // Used as rvalue
+
+    // One level indexing (from lvalue)
+// CHECK-NEXT: [[load5:%\d+]] = OpLoad %v3int %intVec3
+// CHECK-NEXT: [[index0:%\d+]] = OpLoad %uint %index
+// CHECK-NEXT: [[access8:%\d+]] = OpAccessChain %_ptr_Function_v3int %intMat [[index0]]
+// CHECK-NEXT: OpStore [[access8]] [[load5]]
+    intMat[index] = intVec3; // Used as lvalue
+
+    // Two level indexing (from lvalue)
+// CHECK-NEXT: [[index1:%\d+]] = OpLoad %uint %index
+// CHECK-NEXT: [[access9:%\d+]] = OpAccessChain %_ptr_Function_int %intMat [[index1]] %uint_2
+// CHECK-NEXT: [[load6:%\d+]] = OpLoad %int [[access9]]
+// CHECK-NEXT: OpStore %intScalar [[load6]]
+    intScalar = intMat[index][2]; // Used as rvalue
+
+    // Two level indexing (from lvalue)
+// CHECK-NEXT: [[load7:%\d+]] = OpLoad %int %intScalar
+// CHECK-NEXT: [[index2:%\d+]] = OpLoad %uint %index
+// CHECK-NEXT: [[access10:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %uint_1 [[index2]]
+// CHECK-NEXT: OpStore [[access10]] [[load7]]
+    intMat[1][index] = intScalar; // Used as lvalue
+
+    // One level indexing (from rvalue)
+// CHECK:      [[cc4:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 {{%\d+}} {{%\d+}}
+// CHECK-NEXT: OpStore %temp_var_vector_1 [[cc4]]
+// CHECK-NEXT: [[access11:%\d+]] = OpAccessChain %_ptr_Function_v3int %temp_var_vector_1 %uint_0
+// CHECK-NEXT: [[load8:%\d+]] = OpLoad %v3int [[access11]]
+// CHECK-NEXT: OpStore %intVec3 [[load8]]
+    intVec3 = (intMat + intMat)[0];
+
+    // Two level indexing (from rvalue)
+// CHECK-NEXT: [[index3:%\d+]] = OpLoad %uint %index
+// CHECK:      [[cc5:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 {{%\d+}} {{%\d+}}
+// CHECK-NEXT: OpStore %temp_var_vector_2 [[cc5]]
+// CHECK-NEXT: [[access12:%\d+]] = OpAccessChain %_ptr_Function_int %temp_var_vector_2 %uint_0 [[index3]]
+// CHECK-NEXT: [[load9:%\d+]] = OpLoad %int [[access12]]
+// CHECK-NEXT: OpStore %intScalar [[load9]]
+    intScalar = (intMat + intMat)[0][index];
 }
--- a/tools/clang/test/CodeGenSPIRV/type.matrix.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/type.matrix.hlsl
@ -10,90 +10,84 @@
 // other types.

 void main() {
-// XXXXX: %int = OpTypeInt 32 1
-// XXXXX: %uint = OpTypeInt 32 0
-
 // CHECK: %float = OpTypeFloat 32
-    float1x1 mat11;
-// XXXXX: %v2int = OpTypeVector %int 2
-    //int1x2   mat12;
-// XXXXX: %v3uint = OpTypeVector %uint 3
-    //uint1x3  mat13;
-// XXXXX: %bool = OpTypeBool
-// XXXXX-NEXT: %v4bool = OpTypeVector %bool 4
-    //bool1x4  mat14;
+   float1x1 mat11;
+// CHECK: %v2int = OpTypeVector %int 2
+   int1x2   mat12;
+// CHECK: %v3uint = OpTypeVector %uint 3
+   uint1x3  mat13;
+// CHECK: %v4bool = OpTypeVector %bool 4
+   bool1x4  mat14;

-    //int2x1   mat21;
-// XXXXX: %v2uint = OpTypeVector %uint 2
-// XXXXX-NEXT: %mat2v2uint = OpTypeMatrix %v2uint 2
-    //uint2x2  mat22;
-// XXXXX: %v3bool = OpTypeVector %bool 3
-// XXXXX-NEXT: %mat2v3bool = OpTypeMatrix %v3bool 2
-    //bool2x3  mat23;
+   int2x1   mat21;
+// CHECK: %_arr_v2uint_uint_2 = OpTypeArray %v2uint %uint_2
+   uint2x2  mat22;
+// CHECK: %v3bool = OpTypeVector %bool 3
+// CHECK-NEXT: %_arr_v3bool_uint_2 = OpTypeArray %v3bool %uint_2
+   bool2x3  mat23;
 // CHECK: %v4float = OpTypeVector %float 4
 // CHECK-NEXT: %mat2v4float = OpTypeMatrix %v4float 2
-    float2x4 mat24;
+   float2x4 mat24;

-    //uint3x1  mat31;
-// XXXXX: %v2bool = OpTypeVector %bool 2
-// XXXXX-NEXT: %mat3v2bool = OpTypeMatrix %v2bool 3
-    //bool3x2  mat32;
+   uint3x1  mat31;
+// CHECK: %v2bool = OpTypeVector %bool 2
+// CHECK: _arr_v2bool_uint_3 = OpTypeArray %v2bool %uint_3
+   bool3x2  mat32;
 // CHECK: %v3float = OpTypeVector %float 3
 // CHECK-NEXT: %mat3v3float = OpTypeMatrix %v3float 3
-    float3x3 mat33;
-// XXXXX: %v4int = OpTypeVector %int 4
-// XXXXX-NEXT: %mat3v4int = OpTypeMatrix %v4int 3
-    //int3x4   mat34;
+   float3x3 mat33;
+// CHECK: %v4int = OpTypeVector %int 4
+// CHECK-NEXT: %_arr_v4int_uint_3 = OpTypeArray %v4int %uint_3
+   int3x4   mat34;

-    //bool4x1  mat41;
+   bool4x1  mat41;
 // CHECK: %v2float = OpTypeVector %float 2
 // CHECK-NEXT: %mat4v2float = OpTypeMatrix %v2float 4
-    float4x2 mat42;
-// XXXXX: %v3int = OpTypeVector %int 3
-// XXXXX-NEXT: %mat4v3int = OpTypeMatrix %v3int 4
-    //int4x3   mat43;
-// XXXXX: %v4uint = OpTypeVector %uint 4
-// XXXXX-NEXT: %mat4v4uint = OpTypeMatrix %v4uint 4
-    //uint4x4  mat44;
+   float4x2 mat42;
+// CHECK: %v3int = OpTypeVector %int 3
+// CHECK: %_arr_v3int_uint_4 = OpTypeArray %v3int %uint_4
+   int4x3   mat43;
+// CHECK: %v4uint = OpTypeVector %uint 4
+// CHECK: %_arr_v4uint_uint_4 = OpTypeArray %v4uint %uint_4
+   uint4x4  mat44;

 // CHECK: %mat4v4float = OpTypeMatrix %v4float 4
    matrix mat;

-    //matrix<int, 1, 1>   imat11;
-    //matrix<uint, 1, 3>  umat23;
+    matrix<int, 1, 1>   imat11;
+    matrix<uint, 1, 3>  umat23;
    matrix<float, 2, 1> fmat21;
    matrix<float, 1, 2> fmat12;
-// XXXXX: %mat3v4bool = OpTypeMatrix %v4bool 3
-    //matrix<bool, 3, 4>  bmat34;
+// CHECK: %_arr_v4bool_uint_3 = OpTypeArray %v4bool %uint_3
+    matrix<bool, 3, 4>  bmat34;

 // CHECK-LABEL: %bb_entry = OpLabel

-
 // CHECK-NEXT: %mat11 = OpVariable %_ptr_Function_float Function
-// XXXXX-NEXT: %mat12 = OpVariable %_ptr_Function_v2int Function
-// XXXXX-NEXT: %mat13 = OpVariable %_ptr_Function_v3uint Function
-// XXXXX-NEXT: %mat14 = OpVariable %_ptr_Function_v4bool Function
+// CHECK-NEXT: %mat12 = OpVariable %_ptr_Function_v2int Function
+// CHECK-NEXT: %mat13 = OpVariable %_ptr_Function_v3uint Function
+// CHECK-NEXT: %mat14 = OpVariable %_ptr_Function_v4bool Function

-// XXXXX-NEXT: %mat21 = OpVariable %_ptr_Function_v2int Function
-// XXXXX-NEXT: %mat22 = OpVariable %_ptr_Function_mat2v2uint Function
-// XXXXX-NEXT: %mat23 = OpVariable %_ptr_Function_mat2v3bool Function
+// CHECK-NEXT: %mat21 = OpVariable %_ptr_Function_v2int Function
+// CHECK-NEXT: %mat22 = OpVariable %_ptr_Function__arr_v2uint_uint_2 Function
+// CHECK-NEXT: %mat23 = OpVariable %_ptr_Function__arr_v3bool_uint_2 Function
 // CHECK-NEXT: %mat24 = OpVariable %_ptr_Function_mat2v4float Function

-// XXXXX-NEXT: %mat31 = OpVariable %_ptr_Function_v3uint Function
-// XXXXX-NEXT: %mat32 = OpVariable %_ptr_Function_mat3v2bool Function
+// CHECK-NEXT: %mat31 = OpVariable %_ptr_Function_v3uint Function
+// CHECK-NEXT: %mat32 = OpVariable %_ptr_Function__arr_v2bool_uint_3 Function
 // CHECK-NEXT: %mat33 = OpVariable %_ptr_Function_mat3v3float Function
-// XXXXX-NEXT: %mat34 = OpVariable %_ptr_Function_mat3v4int Function
+// CHECK-NEXT: %mat34 = OpVariable %_ptr_Function__arr_v4int_uint_3 Function

-// XXXXX-NEXT: %mat41 = OpVariable %_ptr_Function_v4bool Function
+// CHECK-NEXT: %mat41 = OpVariable %_ptr_Function_v4bool Function
 // CHECK-NEXT: %mat42 = OpVariable %_ptr_Function_mat4v2float Function
-// XXXXX-NEXT: %mat43 = OpVariable %_ptr_Function_mat4v3int Function
-// XXXXX-NEXT: %mat44 = OpVariable %_ptr_Function_mat4v4uint Function
+// CHECK-NEXT: %mat43 = OpVariable %_ptr_Function__arr_v3int_uint_4 Function
+// CHECK-NEXT: %mat44 = OpVariable %_ptr_Function__arr_v4uint_uint_4 Function

 // CHECK-NEXT: %mat = OpVariable %_ptr_Function_mat4v4float Function

-// XXXXX-NEXT: %imat11 = OpVariable %_ptr_Function_int Function
-// XXXXX-NEXT: %umat23 = OpVariable %_ptr_Function_v3uint Function
+// CHECK-NEXT: %imat11 = OpVariable %_ptr_Function_int Function
+// CHECK-NEXT: %umat23 = OpVariable %_ptr_Function_v3uint Function
 // CHECK-NEXT: %fmat21 = OpVariable %_ptr_Function_v2float Function
 // CHECK-NEXT: %fmat12 = OpVariable %_ptr_Function_v2float Function
-// XXXXX-NEXT: %bmat34 = OpVariable %_ptr_Function_mat3v4bool Function
+// CHECK-NEXT: %bmat34 = OpVariable %_ptr_Function__arr_v4bool_uint_3 Function
 }
--- a/tools/clang/test/CodeGenSPIRV/unary-op.postfix-dec.matrix.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/unary-op.postfix-dec.matrix.hlsl
@ -2,6 +2,8 @@

 // CHECK: [[v2f1:%\d+]] = OpConstantComposite %v2float %float_1 %float_1
 // CHECK: [[v3f1:%\d+]] = OpConstantComposite %v3float %float_1 %float_1 %float_1
+// CHECK: [[v3i1:%\d+]] = OpConstantComposite %v3int %int_1 %int_1 %int_1
+
 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel

@ -33,11 +35,24 @@ void main() {
    float2x3 g, h;
 // CHECK-NEXT: [[g0:%\d+]] = OpLoad %mat2v3float %g
 // CHECK-NEXT: [[g0v0:%\d+]] = OpCompositeExtract %v3float [[g0]] 0
-// CHECK-NEXT: [[inc0:%\d+]] = OpFSub %v3float [[g0v0]] [[v3f1]]
+// CHECK-NEXT: [[dec0:%\d+]] = OpFSub %v3float [[g0v0]] [[v3f1]]
 // CHECK-NEXT: [[g0v1:%\d+]] = OpCompositeExtract %v3float [[g0]] 1
-// CHECK-NEXT: [[inc1:%\d+]] = OpFSub %v3float [[g0v1]] [[v3f1]]
-// CHECK-NEXT: [[g1:%\d+]] = OpCompositeConstruct %mat2v3float [[inc0]] [[inc1]]
+// CHECK-NEXT: [[dec1:%\d+]] = OpFSub %v3float [[g0v1]] [[v3f1]]
+// CHECK-NEXT: [[g1:%\d+]] = OpCompositeConstruct %mat2v3float [[dec0]] [[dec1]]
 // CHECK-NEXT: OpStore %g [[g1]]
 // CHECK-NEXT: OpStore %h [[g0]]
    h = g--;
+
+// CHECK:         [[i:%\d+]] = OpLoad %_arr_v3int_uint_2 %i
+// CHECK-NEXT:   [[i0:%\d+]] = OpCompositeExtract %v3int [[i]] 0
+// CHECK-NEXT: [[dec0:%\d+]] = OpISub %v3int [[i0]] [[v3i1]]
+// CHECK-NEXT:   [[i1:%\d+]] = OpCompositeExtract %v3int [[i]] 1
+// CHECK-NEXT: [[dec1:%\d+]] = OpISub %v3int [[i1]] [[v3i1]]
+// CHECK-NEXT:  [[dec:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[dec0]] [[dec1]]
+// CHECK-NEXT: OpStore %i [[dec]]
+// CHECK-NEXT: OpStore %j [[i]]
+    int2x3 i, j;
+    j = i--;
+
+// Note: This postfix decrement is not allowed with boolean matrix type (by the front-end).
 }
--- a/tools/clang/test/CodeGenSPIRV/unary-op.postfix-inc.matrix.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/unary-op.postfix-inc.matrix.hlsl
@ -2,6 +2,8 @@

 // CHECK: [[v2f1:%\d+]] = OpConstantComposite %v2float %float_1 %float_1
 // CHECK: [[v3f1:%\d+]] = OpConstantComposite %v3float %float_1 %float_1 %float_1
+// CHECK: [[v3i1:%\d+]] = OpConstantComposite %v3int %int_1 %int_1 %int_1
+
 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel

@ -40,4 +42,15 @@ void main() {
 // CHECK-NEXT: OpStore %g [[g1]]
 // CHECK-NEXT: OpStore %h [[g0]]
    h = g++;
+
+// CHECK-NEXT: [[m0:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
+// CHECK-NEXT: [[m0v0:%\d+]] = OpCompositeExtract %v3int [[m0]] 0
+// CHECK-NEXT: [[inc0:%\d+]] = OpIAdd %v3int [[m0v0]] [[v3i1]]
+// CHECK-NEXT: [[m0v1:%\d+]] = OpCompositeExtract %v3int [[m0]] 1
+// CHECK-NEXT: [[inc1:%\d+]] = OpIAdd %v3int [[m0v1]] [[v3i1]]
+// CHECK-NEXT: [[m1:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[inc0]] [[inc1]]
+// CHECK-NEXT: OpStore %m [[m1]]
+// CHECK-NEXT: OpStore %n [[m0]]
+    int2x3 m, n;
+    n = m++;
 }
--- a/tools/clang/test/CodeGenSPIRV/unary-op.prefix-dec.matrix.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/unary-op.prefix-dec.matrix.hlsl
@ -2,6 +2,8 @@

 // CHECK: [[v2f1:%\d+]] = OpConstantComposite %v2float %float_1 %float_1
 // CHECK: [[v3f1:%\d+]] = OpConstantComposite %v3float %float_1 %float_1 %float_1
+// CHECK: [[v3i1:%\d+]] = OpConstantComposite %v3int %int_1 %int_1 %int_1
+
 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel

@ -54,10 +56,10 @@ void main() {
    float2x3 g, h;
 // CHECK-NEXT: [[g0:%\d+]] = OpLoad %mat2v3float %g
 // CHECK-NEXT: [[g0v0:%\d+]] = OpCompositeExtract %v3float [[g0]] 0
-// CHECK-NEXT: [[inc0:%\d+]] = OpFSub %v3float [[g0v0]] [[v3f1]]
+// CHECK-NEXT: [[dec0:%\d+]] = OpFSub %v3float [[g0v0]] [[v3f1]]
 // CHECK-NEXT: [[g0v1:%\d+]] = OpCompositeExtract %v3float [[g0]] 1
-// CHECK-NEXT: [[inc1:%\d+]] = OpFSub %v3float [[g0v1]] [[v3f1]]
-// CHECK-NEXT: [[g1:%\d+]] = OpCompositeConstruct %mat2v3float [[inc0]] [[inc1]]
+// CHECK-NEXT: [[dec1:%\d+]] = OpFSub %v3float [[g0v1]] [[v3f1]]
+// CHECK-NEXT: [[g1:%\d+]] = OpCompositeConstruct %mat2v3float [[dec0]] [[dec1]]
 // CHECK-NEXT: OpStore %g [[g1]]
 // CHECK-NEXT: [[g2:%\d+]] = OpLoad %mat2v3float %g
 // CHECK-NEXT: OpStore %h [[g2]]
@ -65,11 +67,33 @@ void main() {
 // CHECK-NEXT: [[h0:%\d+]] = OpLoad %mat2v3float %h
 // CHECK-NEXT: [[g3:%\d+]] = OpLoad %mat2v3float %g
 // CHECK-NEXT: [[g3v0:%\d+]] = OpCompositeExtract %v3float [[g3]] 0
-// CHECK-NEXT: [[inc2:%\d+]] = OpFSub %v3float [[g3v0]] [[v3f1]]
+// CHECK-NEXT: [[dec2:%\d+]] = OpFSub %v3float [[g3v0]] [[v3f1]]
 // CHECK-NEXT: [[g3v1:%\d+]] = OpCompositeExtract %v3float [[g3]] 1
-// CHECK-NEXT: [[inc3:%\d+]] = OpFSub %v3float [[g3v1]] [[v3f1]]
-// CHECK-NEXT: [[g4:%\d+]] = OpCompositeConstruct %mat2v3float [[inc2]] [[inc3]]
+// CHECK-NEXT: [[dec3:%\d+]] = OpFSub %v3float [[g3v1]] [[v3f1]]
+// CHECK-NEXT: [[g4:%\d+]] = OpCompositeConstruct %mat2v3float [[dec2]] [[dec3]]
 // CHECK-NEXT: OpStore %g [[g4]]
 // CHECK-NEXT: OpStore %g [[h0]]
    --g = h;
+
+    int2x3 m, n;
+// CHECK-NEXT: [[m0:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
+// CHECK-NEXT: [[m0v0:%\d+]] = OpCompositeExtract %v3int [[m0]] 0
+// CHECK-NEXT: [[dec0:%\d+]] = OpISub %v3int [[m0v0]] [[v3i1]]
+// CHECK-NEXT: [[m0v1:%\d+]] = OpCompositeExtract %v3int [[m0]] 1
+// CHECK-NEXT: [[dec1:%\d+]] = OpISub %v3int [[m0v1]] [[v3i1]]
+// CHECK-NEXT: [[m1:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[dec0]] [[dec1]]
+// CHECK-NEXT: OpStore %m [[m1]]
+// CHECK-NEXT: [[m2:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
+// CHECK-NEXT: OpStore %n [[m2]]
+    n = --m;
+// CHECK-NEXT: [[n0:%\d+]] = OpLoad %_arr_v3int_uint_2 %n
+// CHECK-NEXT: [[m3:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
+// CHECK-NEXT: [[m3v0:%\d+]] = OpCompositeExtract %v3int [[m3]] 0
+// CHECK-NEXT: [[dec2:%\d+]] = OpISub %v3int [[m3v0]] [[v3i1]]
+// CHECK-NEXT: [[m3v1:%\d+]] = OpCompositeExtract %v3int [[m3]] 1
+// CHECK-NEXT: [[dec3:%\d+]] = OpISub %v3int [[m3v1]] [[v3i1]]
+// CHECK-NEXT: [[m4:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[dec2]] [[dec3]]
+// CHECK-NEXT: OpStore %m [[m4]]
+// CHECK-NEXT: OpStore %m [[n0]]
+    --m = n;
 }
--- a/tools/clang/test/CodeGenSPIRV/unary-op.prefix-inc.matrix.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/unary-op.prefix-inc.matrix.hlsl
@ -2,6 +2,7 @@

 // CHECK: [[v2f1:%\d+]] = OpConstantComposite %v2float %float_1 %float_1
 // CHECK: [[v3f1:%\d+]] = OpConstantComposite %v3float %float_1 %float_1 %float_1
+// CHECK: [[v3i1:%\d+]] = OpConstantComposite %v3int %int_1 %int_1 %int_1
 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel

@ -72,4 +73,28 @@ void main() {
 // CHECK-NEXT: OpStore %g [[g4]]
 // CHECK-NEXT: OpStore %g [[h0]]
    ++g = h;
+
+    int2x3 m, n;
+// CHECK-NEXT: [[m0:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
+// CHECK-NEXT: [[m0v0:%\d+]] = OpCompositeExtract %v3int [[m0]] 0
+// CHECK-NEXT: [[inc0:%\d+]] = OpIAdd %v3int [[m0v0]] [[v3i1]]
+// CHECK-NEXT: [[m0v1:%\d+]] = OpCompositeExtract %v3int [[m0]] 1
+// CHECK-NEXT: [[inc1:%\d+]] = OpIAdd %v3int [[m0v1]] [[v3i1]]
+// CHECK-NEXT: [[m1:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[inc0]] [[inc1]]
+// CHECK-NEXT: OpStore %m [[m1]]
+// CHECK-NEXT: [[m2:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
+// CHECK-NEXT: OpStore %n [[m2]]
+    n = ++m;
+// CHECK-NEXT: [[n0:%\d+]] = OpLoad %_arr_v3int_uint_2 %n
+// CHECK-NEXT: [[m3:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
+// CHECK-NEXT: [[m3v0:%\d+]] = OpCompositeExtract %v3int [[m3]] 0
+// CHECK-NEXT: [[inc2:%\d+]] = OpIAdd %v3int [[m3v0]] [[v3i1]]
+// CHECK-NEXT: [[m3v1:%\d+]] = OpCompositeExtract %v3int [[m3]] 1
+// CHECK-NEXT: [[inc3:%\d+]] = OpIAdd %v3int [[m3v1]] [[v3i1]]
+// CHECK-NEXT: [[m4:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[inc2]] [[inc3]]
+// CHECK-NEXT: OpStore %m [[m4]]
+// CHECK-NEXT: OpStore %m [[n0]]
+    ++m = n;
+
+// Note: Boolean matrices are not allowed by the front-end.
 }
--- a/tools/clang/test/CodeGenSPIRV/var.init.matrix.mxn.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/var.init.matrix.mxn.hlsl
@ -148,4 +148,162 @@ void main() {
 // CHECK-NEXT: [[cc25:%\d+]] = OpCompositeConstruct %mat4v4float [[cc21]] [[cc22]] [[cc23]] [[cc24]]
 // CHECK-NEXT: OpStore %mat11 [[cc25]]
    float4x4 mat11 = {mat8, mat9, mat10};
+
+
+    // Non-floating point matrices
+
+
+    // Constructor
+// CHECK:      [[cc00:%\d+]] = OpCompositeConstruct %v3int %int_1 %int_2 %int_3
+// CHECK-NEXT: [[cc01:%\d+]] = OpCompositeConstruct %v3int %int_4 %int_5 %int_6
+// CHECK-NEXT: [[cc02:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[cc00]] [[cc01]]
+// CHECK-NEXT: OpStore %imat1 [[cc02]]
+    int2x3 imat1 = int2x3(1, 2, 3, 4, 5, 6);
+    // All elements in a single {}
+// CHECK-NEXT: [[cc03:%\d+]] = OpCompositeConstruct %v2int %int_1 %int_2
+// CHECK-NEXT: [[cc04:%\d+]] = OpCompositeConstruct %v2int %int_3 %int_4
+// CHECK-NEXT: [[cc05:%\d+]] = OpCompositeConstruct %v2int %int_5 %int_6
+// CHECK-NEXT: [[cc06:%\d+]] = OpCompositeConstruct %_arr_v2int_uint_3 [[cc03]] [[cc04]] [[cc05]]
+// CHECK-NEXT: OpStore %imat2 [[cc06]]
+    int3x2 imat2 = {1, 2, 3, 4, 5, 6};
+    // Each vector has its own {}
+// CHECK-NEXT: [[cc07:%\d+]] = OpCompositeConstruct %v3int %int_1 %int_2 %int_3
+// CHECK-NEXT: [[cc08:%\d+]] = OpCompositeConstruct %v3int %int_4 %int_5 %int_6
+// CHECK-NEXT: [[cc09:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[cc07]] [[cc08]]
+// CHECK-NEXT: OpStore %imat3 [[cc09]]
+    int2x3 imat3 = {{1, 2, 3}, {4, 5, 6}};
+    // Wired & complicated {}s
+// CHECK-NEXT: [[cc10:%\d+]] = OpCompositeConstruct %v2int %int_1 %int_2
+// CHECK-NEXT: [[cc11:%\d+]] = OpCompositeConstruct %v2int %int_3 %int_4
+// CHECK-NEXT: [[cc12:%\d+]] = OpCompositeConstruct %v2int %int_5 %int_6
+// CHECK-NEXT: [[cc13:%\d+]] = OpCompositeConstruct %_arr_v2int_uint_3 [[cc10]] [[cc11]] [[cc12]]
+// CHECK-NEXT: OpStore %imat4 [[cc13]]
+    int3x2 imat4 = {{1}, {2, 3}, 4, {{5}, {{6}}}};
+
+    int2 intVec2;
+    int3 intVec3;
+    int4 intVec4;
+
+    // Mixed scalar and vector
+// CHECK:         [[s:%\d+]] = OpLoad %int %intScalar
+// CHECK-NEXT: [[vec1:%\d+]] = OpLoad %int %intVec1
+// CHECK-NEXT: [[vec2:%\d+]] = OpLoad %v2int %intVec2
+// CHECK-NEXT: [[ce00:%\d+]] = OpCompositeExtract %int [[vec2]] 0
+// CHECK-NEXT: [[ce01:%\d+]] = OpCompositeExtract %int [[vec2]] 1
+// CHECK-NEXT: [[cc14:%\d+]] = OpCompositeConstruct %v4int [[s]] [[vec1]] [[ce00]] [[ce01]]
+
+// CHECK-NEXT: [[vec3:%\d+]] = OpLoad %v3int %intVec3
+// CHECK-NEXT: [[ce02:%\d+]] = OpCompositeExtract %int [[vec3]] 0
+// CHECK-NEXT: [[ce03:%\d+]] = OpCompositeExtract %int [[vec3]] 1
+// CHECK-NEXT: [[ce04:%\d+]] = OpCompositeExtract %int [[vec3]] 2
+// CHECK-NEXT:[[vec2a:%\d+]] = OpLoad %v2int %intVec2
+// CHECK-NEXT: [[ce05:%\d+]] = OpCompositeExtract %int [[vec2a]] 0
+// CHECK-NEXT: [[ce06:%\d+]] = OpCompositeExtract %int [[vec2a]] 1
+// CHECK-NEXT: [[cc15:%\d+]] = OpCompositeConstruct %v4int [[ce02]] [[ce03]] [[ce04]] [[ce05]]
+
+// CHECK-NEXT: [[cc16:%\d+]] = OpCompositeConstruct %v4int [[ce06]] %int_1 %int_2 %int_3
+
+// CHECK-NEXT: [[vec4:%\d+]] = OpLoad %v4int %intVec4
+
+// CHECK-NEXT: [[cc17:%\d+]] = OpCompositeConstruct %_arr_v4int_uint_4 [[cc14]] [[cc15]] [[cc16]] [[vec4]]
+// CHECK-NEXT:  OpStore %imat5 [[cc17]]
+    int4x4 imat5 = {intScalar, intVec1, intVec2, // [0]
+                    intVec3,   intVec2,          // [1] + 1 scalar
+                     int2(1, 2), 3,              // [2] - 1 scalar
+                     intVec4                     // [3]
+    };
+
+    // From value of the same type
+// CHECK-NEXT: [[imat5:%\d+]] = OpLoad %_arr_v4int_uint_4 %imat5
+// CHECK-NEXT:                  OpStore %imat6 [[imat5]]
+    int4x4 imat6 = int4x4(imat5);
+
+    // Casting
+    float floatScalar;
+// CHECK:                      [[intVec1:%\d+]] = OpLoad %int %intVec1
+// CHECK-NEXT:              [[uintScalar:%\d+]] = OpLoad %uint %uintScalar
+// CHECK-NEXT:               [[intScalar:%\d+]] = OpBitcast %int [[uintScalar]]
+// CHECK-NEXT:                [[uintVec2:%\d+]] = OpLoad %v2uint %uintVec2
+// CHECK-NEXT:              [[uintVec2e0:%\d+]] = OpCompositeExtract %uint [[uintVec2]] 0
+// CHECK-NEXT:              [[uintVec2e1:%\d+]] = OpCompositeExtract %uint [[uintVec2]] 1
+// CHECK-NEXT:  [[convert_uintVec2e0_int:%\d+]] = OpBitcast %int [[uintVec2e0]]
+// CHECK-NEXT:                [[imat7_r0:%\d+]] = OpCompositeConstruct %v3int [[intVec1]] [[intScalar]] [[convert_uintVec2e0_int]]
+// CHECK-NEXT:  [[convert_uintVec2e1_int:%\d+]] = OpBitcast %int [[uintVec2e1]]
+// CHECK-NEXT:             [[floatScalar:%\d+]] = OpLoad %float %floatScalar
+// CHECK-NEXT: [[convert_floatScalar_int:%\d+]] = OpConvertFToS %int [[floatScalar]]
+// CHECK-NEXT:              [[boolScalar:%\d+]] = OpLoad %bool %boolScalar
+// CHECK-NEXT:  [[convert_boolScalar_int:%\d+]] = OpSelect %int [[boolScalar]] %int_1 %int_0
+// CHECK-NEXT:                [[imat7_r1:%\d+]] = OpCompositeConstruct %v3int [[convert_uintVec2e1_int]] [[convert_floatScalar_int]] [[convert_boolScalar_int]]
+// CHECK-NEXT:                  [[v3bool:%\d+]] = OpLoad %v3bool %boolVec3
+// CHECK-NEXT:                [[imat7_r2:%\d+]] = OpSelect %v3int [[v3bool]] {{%\d+}} {{%\d+}}
+// CHECK-NEXT:                         {{%\d+}} = OpCompositeConstruct %_arr_v3int_uint_3 [[imat7_r0]] [[imat7_r1]] [[imat7_r2]] 
+    int3x3 imat7 = {intVec1, uintScalar, uintVec2, // [0] + 1 scalar
+                    floatScalar, boolScalar,       // [1] - 1 scalar
+                    boolVec3                       // [2]
+    };
+
+    // Decomposing matrices
+    int2x2 imat8;
+    int2x4 imat9;
+    int4x1 imat10;
+    // TODO: Optimization opportunity. We are extracting all elements in each
+    // vector and then reconstructing the original vector. Optimally we should
+    // extract vectors from matrices directly.
+
+// CHECK:         [[imat8:%\d+]] = OpLoad %_arr_v2int_uint_2 %imat8
+// CHECK-NEXT: [[imat8_00:%\d+]] = OpCompositeExtract %int [[imat8]] 0 0
+// CHECK-NEXT: [[imat8_01:%\d+]] = OpCompositeExtract %int [[imat8]] 0 1
+// CHECK-NEXT: [[imat8_10:%\d+]] = OpCompositeExtract %int [[imat8]] 1 0
+// CHECK-NEXT: [[imat8_11:%\d+]] = OpCompositeExtract %int [[imat8]] 1 1
+// CHECK-NEXT:     [[cc21:%\d+]] = OpCompositeConstruct %v4int [[imat8_00]] [[imat8_01]] [[imat8_10]] [[imat8_11]]
+
+// CHECK-NEXT:    [[imat9:%\d+]] = OpLoad %_arr_v4int_uint_2 %imat9
+// CHECK-NEXT: [[imat9_00:%\d+]] = OpCompositeExtract %int [[imat9]] 0 0
+// CHECK-NEXT: [[imat9_01:%\d+]] = OpCompositeExtract %int [[imat9]] 0 1
+// CHECK-NEXT: [[imat9_02:%\d+]] = OpCompositeExtract %int [[imat9]] 0 2
+// CHECK-NEXT: [[imat9_03:%\d+]] = OpCompositeExtract %int [[imat9]] 0 3
+// CHECK-NEXT: [[imat9_10:%\d+]] = OpCompositeExtract %int [[imat9]] 1 0
+// CHECK-NEXT: [[imat9_11:%\d+]] = OpCompositeExtract %int [[imat9]] 1 1
+// CHECK-NEXT: [[imat9_12:%\d+]] = OpCompositeExtract %int [[imat9]] 1 2
+// CHECK-NEXT: [[imat9_13:%\d+]] = OpCompositeExtract %int [[imat9]] 1 3
+// CHECK-NEXT:     [[cc22:%\d+]] = OpCompositeConstruct %v4int [[imat9_00]] [[imat9_01]] [[imat9_02]] [[imat9_03]]
+// CHECK-NEXT:     [[cc23:%\d+]] = OpCompositeConstruct %v4int [[imat9_10]] [[imat9_11]] [[imat9_12]] [[imat9_13]]
+
+// CHECK-NEXT: [[imat10:%\d+]] = OpLoad %v4int %imat10
+// CHECK-NEXT: [[imat10_0:%\d+]] = OpCompositeExtract %int [[imat10]] 0
+// CHECK-NEXT: [[imat10_1:%\d+]] = OpCompositeExtract %int [[imat10]] 1
+// CHECK-NEXT: [[imat10_2:%\d+]] = OpCompositeExtract %int [[imat10]] 2
+// CHECK-NEXT: [[imat10_3:%\d+]] = OpCompositeExtract %int [[imat10]] 3
+// CHECK-NEXT: [[cc24:%\d+]] = OpCompositeConstruct %v4int [[imat10_0]] [[imat10_1]] [[imat10_2]] [[imat10_3]]
+
+// CHECK-NEXT: [[cc25:%\d+]] = OpCompositeConstruct %_arr_v4int_uint_4 [[cc21]] [[cc22]] [[cc23]] [[cc24]]
+// CHECK-NEXT: OpStore %imat11 [[cc25]]
+    int4x4 imat11 = {imat8, imat9, imat10};
+
+    // Boolean matrices
+// CHECK:      [[cc00:%\d+]] = OpCompositeConstruct %v3bool %false %true %false
+// CHECK-NEXT: [[cc01:%\d+]] = OpCompositeConstruct %v3bool %true %true %false
+// CHECK-NEXT: [[cc02:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[cc00]] [[cc01]]
+// CHECK-NEXT:                 OpStore %bmat1 [[cc02]]
+    bool2x3 bmat1 = bool2x3(false, true, false, true, true, false);
+    // All elements in a single {}
+// CHECK-NEXT: [[cc03:%\d+]] = OpCompositeConstruct %v2bool %false %true
+// CHECK-NEXT: [[cc04:%\d+]] = OpCompositeConstruct %v2bool %false %true
+// CHECK-NEXT: [[cc05:%\d+]] = OpCompositeConstruct %v2bool %true %false
+// CHECK-NEXT: [[cc06:%\d+]] = OpCompositeConstruct %_arr_v2bool_uint_3 [[cc03]] [[cc04]] [[cc05]]
+// CHECK-NEXT:                 OpStore %bmat2 [[cc06]]
+    bool3x2 bmat2 = {false, true, false, true, true, false};
+    // Each vector has its own {}
+// CHECK-NEXT: [[cc07:%\d+]] = OpCompositeConstruct %v3bool %false %true %false
+// CHECK-NEXT: [[cc08:%\d+]] = OpCompositeConstruct %v3bool %true %true %false
+// CHECK-NEXT: [[cc09:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[cc07]] [[cc08]]
+// CHECK-NEXT:                 OpStore %bmat3 [[cc09]]
+    bool2x3 bmat3 = {{false, true, false}, {true, true, false}};
+    // Wired & complicated {}s
+// CHECK-NEXT: [[cc10:%\d+]] = OpCompositeConstruct %v2bool %false %true
+// CHECK-NEXT: [[cc11:%\d+]] = OpCompositeConstruct %v2bool %false %true
+// CHECK-NEXT: [[cc12:%\d+]] = OpCompositeConstruct %v2bool %true %false
+// CHECK-NEXT: [[cc13:%\d+]] = OpCompositeConstruct %_arr_v2bool_uint_3 [[cc10]] [[cc11]] [[cc12]]
+// CHECK-NEXT:                 OpStore %bmat4 [[cc13]]
+    bool3x2 bmat4 = {{false}, {true, false}, true, {{true}, {{false}}}};
 }