Fp16 constant buffer and shader flag (#575)

This change is to support fp16 on constant buffer. We are still keeping a row pitch of 4 dwords. So we can fit up to 8 halfs in one row. Dword data types will be aligned to dword address space for efficiency. We are also introducing new flags "use native low precision" if we have low precision type present and /no-min-precision option is enabled.
2017-08-21 15:06:45 -07:00 · 2017-08-21 15:06:45 -07:00 · b89c21d25b
--- a/include/dxc/HLSL/DxilCompType.h
+++ b/include/dxc/HLSL/DxilCompType.h
@ -63,6 +63,7 @@ public:
  bool IsSNorm() const;
  bool IsUNorm() const;
  bool Is64Bit() const;
+  bool Is16Bit() const;

  /// For min-precision types, returns upconverted (base) type.
  CompType GetBaseCompType() const;
@ -83,7 +84,7 @@ public:
  static CompType GetCompType(llvm::Type * type);

  const char *GetName() const;
-  const char *GetHLSLName() const;
+  const char *GetHLSLName(bool MinPrecision) const;

 private:
  Kind m_Kind;
--- a/include/dxc/HLSL/DxilConstants.h
+++ b/include/dxc/HLSL/DxilConstants.h
@ -914,6 +914,13 @@ namespace DXIL {
  const uint8_t kCompMask_W     = 0x8;
  const uint8_t kCompMask_All   = 0xF;

+
+  enum class LowPrecisionMode {
+    Undefined = 0,
+    UseMinPrecision,
+    UseNativeLowPrecision
+  };
+
 } // namespace DXIL

 } // namespace hlsl
--- a/include/dxc/HLSL/DxilContainer.h
+++ b/include/dxc/HLSL/DxilContainer.h
@ -91,7 +91,7 @@ static const uint64_t ShaderFeatureInfo_Doubles = 0x0001;
 static const uint64_t ShaderFeatureInfo_ComputeShadersPlusRawAndStructuredBuffersViaShader4X = 0x0002;
 static const uint64_t ShaderFeatureInfo_UAVsAtEveryStage = 0x0004;
 static const uint64_t ShaderFeatureInfo_64UAVs = 0x0008;
-static const uint64_t ShaderFeatureInfo_MininumPrecision = 0x0010;
+static const uint64_t ShaderFeatureInfo_MinimumPrecision = 0x0010;
 static const uint64_t ShaderFeatureInfo_11_1_DoubleExtensions = 0x0020;
 static const uint64_t ShaderFeatureInfo_11_1_ShaderExtensions = 0x0040;
 static const uint64_t ShaderFeatureInfo_LEVEL9ComparisonFiltering = 0x0080;
@ -105,8 +105,9 @@ static const uint64_t ShaderFeatureInfo_WaveOps = 0x4000;
 static const uint64_t ShaderFeatureInfo_Int64Ops = 0x8000;
 static const uint64_t ShaderFeatureInfo_ViewID = 0x10000;
 static const uint64_t ShaderFeatureInfo_Barycentrics = 0x20000;
+static const uint64_t ShaderFeatureInfo_NativeLowPrecision = 0x40000;

-static const unsigned ShaderFeatureInfoCount = 18;
+static const unsigned ShaderFeatureInfoCount = 19;

 struct DxilShaderFeatureInfo {
  uint64_t FeatureFlags;
--- a/include/dxc/HLSL/DxilModule.h
+++ b/include/dxc/HLSL/DxilModule.h
@ -220,8 +220,8 @@ public:
    void SetEnableRawAndStructuredBuffers(bool flag) { m_bEnableRawAndStructuredBuffers = flag; }
    bool GetEnableRawAndStructuredBuffers() const { return m_bEnableRawAndStructuredBuffers; }

-    void SetEnableMinPrecision(bool flag) { m_bEnableMinPrecision = flag; }
-    bool GetEnableMinPrecision() const { return m_bEnableMinPrecision; }
+    void SetLowPrecisionPresent(bool flag) { m_bLowPrecisionPresent = flag; }
+    bool GetLowPrecisionPresent() const { return m_bLowPrecisionPresent; }

    void SetEnableDoubleExtensions(bool flag) { m_bEnableDoubleExtensions = flag; }
    bool GetEnableDoubleExtensions() const { return m_bEnableDoubleExtensions; }
@ -275,6 +275,9 @@ public:
    void SetBarycentrics(bool flag) { m_bBarycentrics = flag; }
    bool GetBarycentrics() const { return m_bBarycentrics; }

+    void SetUseNativeLowPrecision(bool flag) { m_bUseNativeLowPrecision = flag; }
+    bool GetUseNativeLowPrecision() const { return m_bUseNativeLowPrecision; }
+
    static uint64_t GetShaderFlagsRawForCollection(); // some flags are collected (eg use 64-bit), some provided (eg allow refactoring)
    uint64_t GetShaderFlagsRaw() const;
    void SetShaderFlagsRaw(uint64_t data);
@ -285,7 +288,7 @@ public:
    unsigned m_bEnableDoublePrecision :1; // D3D11_SB_GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS
    unsigned m_bForceEarlyDepthStencil :1; // D3D11_SB_GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL
    unsigned m_bEnableRawAndStructuredBuffers :1; // D3D11_SB_GLOBAL_FLAG_ENABLE_RAW_AND_STRUCTURED_BUFFERS
-    unsigned m_bEnableMinPrecision :1; // D3D11_1_SB_GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION
+    unsigned m_bLowPrecisionPresent :1; // D3D11_1_SB_GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION
    unsigned m_bEnableDoubleExtensions :1; // D3D11_1_SB_GLOBAL_FLAG_ENABLE_DOUBLE_EXTENSIONS
    unsigned m_bEnableMSAD :1;        // D3D11_1_SB_GLOBAL_FLAG_ENABLE_SHADER_EXTENSIONS
    unsigned m_bAllResourcesBound :1; // D3D12_SB_GLOBAL_FLAG_ALL_RESOURCES_BOUND
@ -310,7 +313,9 @@ public:
    unsigned m_bViewID : 1;           // SHADER_FEATURE_VIEWID
    unsigned m_bBarycentrics : 1;     // SHADER_FEATURE_BARYCENTRICS

-    unsigned m_align0 : 9;        // align to 32 bit.
+    unsigned m_bUseNativeLowPrecision : 1;
+
+    unsigned m_align0 : 8;        // align to 32 bit.
    uint32_t m_align1;            // align to 64 bit.
  };

--- a/include/dxc/HLSL/DxilOperations.h
+++ b/include/dxc/HLSL/DxilOperations.h
@ -62,6 +62,9 @@ public:
  // Return false if the given function is not a dxil function.
  bool GetOpCodeClass(const llvm::Function *F, OpCodeClass &opClass);

+  // To check if operation uses strict precision types
+  bool UseMinPrecision();
+
  // LLVM helpers. Perhaps, move to a separate utility class.
  llvm::Constant *GetI1Const(bool v);
  llvm::Constant *GetI8Const(char v);
@ -105,6 +108,8 @@ private:
  llvm::Type *m_pSplitDoubleType;
  llvm::Type *m_pInt4Type;

+  DXIL::LowPrecisionMode m_LowPrecisionMode;
+
  static const unsigned kNumTypeOverloads = 9;

  llvm::Type *m_pResRetType[kNumTypeOverloads];
--- a/include/dxc/HLSL/DxilTypeSystem.h
+++ b/include/dxc/HLSL/DxilTypeSystem.h
@ -211,11 +211,15 @@ public:
                              const llvm::Function *pSrcFunction,
                              const DxilTypeSystem &src);

+  bool UseMinPrecision();
+
 private:
  llvm::Module *m_pModule;
  StructAnnotationMap m_StructAnnotations;
  FunctionAnnotationMap m_FunctionAnnotations;

+  DXIL::LowPrecisionMode m_LowPrecisionMode;
+
  llvm::StructType *GetNormFloatType(CompType CT, unsigned NumComps);
 };

--- a/include/dxc/HLSL/HLModule.h
+++ b/include/dxc/HLSL/HLModule.h
@ -59,7 +59,8 @@ struct HLOptions {
  unsigned bLegacyCBufferLoad      : 1;
  unsigned PackingStrategy         : 2;
  static_assert((unsigned)DXIL::PackingStrategy::Invalid < 4, "otherwise 2 bits is not enough to store PackingStrategy");
-  unsigned unused                  : 25;
+  unsigned bUseMinPrecision        : 1;
+  unsigned unused                  : 24;
 };

 /// Use this class to manipulate HLDXIR of a shader.
@ -163,6 +164,7 @@ public:
  static void GetParameterRowsAndCols(llvm::Type *Ty, unsigned &rows, unsigned &cols,
                                      DxilParameterAnnotation &paramAnnotation);
  static const char *GetLegacyDataLayoutDesc();
+  static const char *GetNewDataLayoutDesc();

  static void MergeGepUse(llvm::Value *V);

--- a/lib/HLSL/DxilCompType.cpp
+++ b/lib/HLSL/DxilCompType.cpp
@ -155,6 +155,19 @@ bool CompType::Is64Bit() const {
  }
 }

+bool CompType::Is16Bit() const {
+  switch (m_Kind) {
+  case DXIL::ComponentType::F16:
+  case DXIL::ComponentType::I16:
+  case DXIL::ComponentType::SNormF16:
+  case DXIL::ComponentType::UNormF16:
+  case DXIL::ComponentType::U16:
+    return true;
+  default:
+    return false;
+  }
+}
+
 CompType CompType::GetBaseCompType() const {
  switch (m_Kind) {
  case Kind::I1:        return CompType(Kind::I1);
@ -283,14 +296,21 @@ const char *CompType::GetName() const {
 }

 static const char *s_TypeKindHLSLNames[(unsigned)CompType::Kind::LastEntry] = {
+  "unknown",
+  "bool", "short", "unsigned short", "int", "uint", "int64_t", "uint64_t",
+  "half", "float", "double",
+  "snorm_half", "unorm_half", "snorm_float", "unorm_float", "snorm_double", "unorm_double",
+};
+
+static const char *s_TypeKindHLSLNamesMinPrecision[(unsigned)CompType::Kind::LastEntry] = {
  "unknown",
  "bool", "min16i", "min16ui", "int", "uint", "int64_t", "uint64_t",
-  "min16f", "float", "double",
+  "min16float", "float", "double",
  "snorm_min16f", "unorm_min16f", "snorm_float", "unorm_float", "snorm_double", "unorm_double",
 };

-const char *CompType::GetHLSLName() const {
-  return s_TypeKindHLSLNames[(unsigned)m_Kind];
+const char *CompType::GetHLSLName(bool MinPrecision) const {
+  return MinPrecision ? s_TypeKindHLSLNamesMinPrecision[(unsigned)m_Kind] : s_TypeKindHLSLNames[(unsigned)m_Kind];
 }

 } // namespace hlsl
--- a/lib/HLSL/DxilContainerReflection.cpp
+++ b/lib/HLSL/DxilContainerReflection.cpp
@ -1939,7 +1939,7 @@ UINT64 DxilShaderReflection::GetRequiresFlags() {
  if (features & ShaderFeatureInfo_Doubles) result |= D3D_SHADER_REQUIRES_DOUBLES;
  if (features & ShaderFeatureInfo_UAVsAtEveryStage) result |= D3D_SHADER_REQUIRES_UAVS_AT_EVERY_STAGE;
  if (features & ShaderFeatureInfo_64UAVs) result |= D3D_SHADER_REQUIRES_64_UAVS;
-  if (features & ShaderFeatureInfo_MininumPrecision) result |= D3D_SHADER_REQUIRES_MINIMUM_PRECISION;
+  if (features & ShaderFeatureInfo_MinimumPrecision) result |= D3D_SHADER_REQUIRES_MINIMUM_PRECISION;
  if (features & ShaderFeatureInfo_11_1_DoubleExtensions) result |= D3D_SHADER_REQUIRES_11_1_DOUBLE_EXTENSIONS;
  if (features & ShaderFeatureInfo_11_1_ShaderExtensions) result |= D3D_SHADER_REQUIRES_11_1_SHADER_EXTENSIONS;
  if (features & ShaderFeatureInfo_LEVEL9ComparisonFiltering) result |= D3D_SHADER_REQUIRES_LEVEL_9_COMPARISON_FILTERING;
--- a/lib/HLSL/DxilGenerationPass.cpp
+++ b/lib/HLSL/DxilGenerationPass.cpp
@ -169,7 +169,6 @@ void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, DxilEntrySignature *
  //bool m_bDisableMathRefactoring;
  //bool m_bEnableDoublePrecision;
  //bool m_bEnableDoubleExtensions;
-  //bool m_bEnableMinPrecision;
  //M.CollectShaderFlags();

  //bool m_bForceEarlyDepthStencil;
@ -177,6 +176,8 @@ void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, DxilEntrySignature *
  //bool m_bEnableMSAD;
  //M.m_ShaderFlags.SetAllResourcesBound(H.GetHLOptions().bAllResourcesBound);

+  M.m_ShaderFlags.SetUseNativeLowPrecision(!H.GetHLOptions().bUseMinPrecision);
+
  if (FnProps)
    M.SetShaderProperties(FnProps);

--- a/lib/HLSL/DxilModule.cpp
+++ b/lib/HLSL/DxilModule.cpp
@ -107,7 +107,7 @@ DxilModule::ShaderFlags::ShaderFlags():
 , m_bEnableDoublePrecision(false)
 , m_bForceEarlyDepthStencil(false)
 , m_bEnableRawAndStructuredBuffers(false)
-, m_bEnableMinPrecision(false)
+, m_bLowPrecisionPresent(false)
 , m_bEnableDoubleExtensions(false)
 , m_bEnableMSAD(false)
 , m_bAllResourcesBound(false)
@ -125,6 +125,7 @@ DxilModule::ShaderFlags::ShaderFlags():
 , m_bInt64Ops(false)
 , m_bViewID(false)
 , m_bBarycentrics(false)
+, m_bUseNativeLowPrecision(false)
 , m_align0(0)
 , m_align1(0)
 {}
@ -228,7 +229,7 @@ unsigned DxilModule::ShaderFlags::GetGlobalFlags() const {
  Flags |= m_bEnableDoublePrecision ? DXIL::kEnableDoublePrecision : 0;
  Flags |= m_bForceEarlyDepthStencil ? DXIL::kForceEarlyDepthStencil : 0;
  Flags |= m_bEnableRawAndStructuredBuffers ? DXIL::kEnableRawAndStructuredBuffers : 0;
-  Flags |= m_bEnableMinPrecision ? DXIL::kEnableMinPrecision : 0;
+  Flags |= m_bLowPrecisionPresent && !m_bUseNativeLowPrecision? DXIL::kEnableMinPrecision : 0;
  Flags |= m_bEnableDoubleExtensions ? DXIL::kEnableDoubleExtensions : 0;
  Flags |= m_bEnableMSAD ? DXIL::kEnableMSAD : 0;
  Flags |= m_bAllResourcesBound ? DXIL::kAllResourcesBound : 0;
@ -238,7 +239,8 @@ unsigned DxilModule::ShaderFlags::GetGlobalFlags() const {
 uint64_t DxilModule::ShaderFlags::GetFeatureInfo() const {
  uint64_t Flags = 0;
  Flags |= m_bEnableDoublePrecision ? hlsl::ShaderFeatureInfo_Doubles : 0;
-  Flags |= m_bEnableMinPrecision ? hlsl::ShaderFeatureInfo_MininumPrecision : 0;
+  Flags |= m_bLowPrecisionPresent && !m_bUseNativeLowPrecision ? hlsl::ShaderFeatureInfo_MinimumPrecision: 0;
+  Flags |= m_bLowPrecisionPresent && m_bUseNativeLowPrecision ? hlsl::ShaderFeatureInfo_NativeLowPrecision : 0;
  Flags |= m_bEnableDoubleExtensions ? hlsl::ShaderFeatureInfo_11_1_DoubleExtensions : 0;
  Flags |= m_bWaveOps ? hlsl::ShaderFeatureInfo_WaveOps : 0;
  Flags |= m_bInt64Ops ? hlsl::ShaderFeatureInfo_Int64Ops : 0;
@ -339,7 +341,7 @@ void DxilModule::CollectShaderFlags(ShaderFlags &Flags) {
  // fma has dxil op. Others should check IR instruction div/cast.
  bool hasDoubleExtension = false;
  bool has64Int = false;
-  bool has16FloatInt = false;
+  bool has16 = false;
  bool hasWaveOps = false;
  bool hasCheckAccessFully = false;
  bool hasMSAD = false;
@ -395,8 +397,8 @@ void DxilModule::CollectShaderFlags(ShaderFlags &Flags) {
          }
        }
        
-        has16FloatInt |= isHalf;
-        has16FloatInt |= isInt16;
+        has16 |= isHalf;
+        has16 |= isInt16;
        has64Int |= isInt64;

        if (CallInst *CI = dyn_cast<CallInst>(&I)) {
@ -474,7 +476,7 @@ void DxilModule::CollectShaderFlags(ShaderFlags &Flags) {

  Flags.SetEnableDoublePrecision(hasDouble);
  Flags.SetInt64Ops(has64Int);
-  Flags.SetEnableMinPrecision(has16FloatInt);
+  Flags.SetLowPrecisionPresent(has16);
  Flags.SetEnableDoubleExtensions(hasDoubleExtension);
  Flags.SetWaveOps(hasWaveOps);
  Flags.SetTiledResources(hasCheckAccessFully);
@ -582,7 +584,7 @@ uint64_t DxilModule::ShaderFlags::GetShaderFlagsRawForCollection() {
  ShaderFlags Flags;
  Flags.SetEnableDoublePrecision(true);
  Flags.SetInt64Ops(true);
-  Flags.SetEnableMinPrecision(true);
+  Flags.SetLowPrecisionPresent(true);
  Flags.SetEnableDoubleExtensions(true);
  Flags.SetWaveOps(true);
  Flags.SetTiledResources(true);
@ -1496,10 +1498,10 @@ MDTuple *DxilModule::EmitDxilShaderProperties() {
  vector<Metadata *> MDVals;

  // DXIL shader flags.
-  uint64_t Flags = m_ShaderFlags.GetShaderFlagsRaw();
-  if (Flags != 0) {
+  uint64_t flag = m_ShaderFlags.GetShaderFlagsRaw();
+  if (flag != 0) {
    MDVals.emplace_back(m_pMDHelper->Uint32ToConstMD(DxilMDHelper::kDxilShaderFlagsTag));
-    MDVals.emplace_back(m_pMDHelper->Uint64ToConstMD(Flags));
+    MDVals.emplace_back(m_pMDHelper->Uint64ToConstMD(flag));
  }

  // Compute shader.
--- a/lib/HLSL/DxilOperations.cpp
+++ b/lib/HLSL/DxilOperations.cpp
@ -11,6 +11,8 @@

 #include "dxc/HLSL/DxilOperations.h"
 #include "dxc/Support/Global.h"
+#include "dxc/HLSL/DxilModule.h"
+#include "dxc/HLSL/HLModule.h"

 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/IR/LLVMContext.h"
@ -430,7 +432,8 @@ static Type *GetOrCreateStructType(LLVMContext &Ctx, ArrayRef<Type*> types, Stri
 //
 OP::OP(LLVMContext &Ctx, Module *pModule)
 : m_Ctx(Ctx)
-, m_pModule(pModule) {
+, m_pModule(pModule)
+, m_LowPrecisionMode(DXIL::LowPrecisionMode::Undefined) {
  memset(m_pResRetType, 0, sizeof(m_pResRetType));
  memset(m_pCBufferRetType, 0, sizeof(m_pCBufferRetType));
  memset(m_OpCodeClassCache, 0, sizeof(m_OpCodeClassCache));
@ -783,6 +786,23 @@ bool OP::GetOpCodeClass(const Function *F, OP::OpCodeClass &opClass) {
  return true;
 }

+bool OP::UseMinPrecision() {
+  if (m_LowPrecisionMode == DXIL::LowPrecisionMode::Undefined) {
+    if (&m_pModule->GetDxilModule()) {
+      m_LowPrecisionMode = m_pModule->GetDxilModule().m_ShaderFlags.GetUseNativeLowPrecision() ?
+        DXIL::LowPrecisionMode::UseNativeLowPrecision : DXIL::LowPrecisionMode::UseMinPrecision;
+    }
+    else if (&m_pModule->GetHLModule()) {
+      m_LowPrecisionMode = m_pModule->GetHLModule().GetHLOptions().bUseMinPrecision ?
+        DXIL::LowPrecisionMode::UseMinPrecision : DXIL::LowPrecisionMode::UseNativeLowPrecision;
+    }
+    else {
+      DXASSERT(false, "otherwise module doesn't contain either HLModule or Dxil Module.");
+    }
+  }
+  return m_LowPrecisionMode == DXIL::LowPrecisionMode::UseMinPrecision;
+}
+
 llvm::Type *OP::GetOverloadType(OpCode OpCode, llvm::Function *F) {
  DXASSERT(F, "not work on nullptr");
  Type *Ty = F->getReturnType();
@ -940,15 +960,23 @@ Type *OP::GetCBufferRetType(Type *pOverloadType) {
  if (m_pCBufferRetType[TypeSlot] == nullptr) {
    string TypeName("dx.types.CBufRet.");
    TypeName += GetOverloadTypeName(TypeSlot);
-    if (!pOverloadType->isDoubleTy()) {
-      Type *FieldTypes[4] = { pOverloadType, pOverloadType, pOverloadType, pOverloadType };
-      m_pCBufferRetType[TypeSlot] = GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule);
-    } else {
+    if (pOverloadType->isDoubleTy()) {
      Type *FieldTypes[2] = { pOverloadType, pOverloadType };
      m_pCBufferRetType[TypeSlot] = GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule);
    }
+    else if (!UseMinPrecision() && pOverloadType->isHalfTy()) {
+      TypeName += ".8"; // dx.types.CBufRet.fp16.8 for buffer of 8 halves
+      Type *FieldTypes[8] = {
+          pOverloadType, pOverloadType, pOverloadType, pOverloadType,
+          pOverloadType, pOverloadType, pOverloadType, pOverloadType,
+      };
+      m_pCBufferRetType[TypeSlot] = GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule);
+    }
+    else {
+      Type *FieldTypes[4] = { pOverloadType, pOverloadType, pOverloadType, pOverloadType };
+      m_pCBufferRetType[TypeSlot] = GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule);
+    }
  }
-
  return m_pCBufferRetType[TypeSlot];
 }

--- a/lib/HLSL/DxilTypeSystem.cpp
+++ b/lib/HLSL/DxilTypeSystem.cpp
@ -8,6 +8,8 @@
 ///////////////////////////////////////////////////////////////////////////////

 #include "dxc/HLSL/DxilTypeSystem.h"
+#include "dxc/HLSL/DxilModule.h"
+#include "dxc/HLSL/HLModule.h"
 #include "dxc/Support/Global.h"

 #include "llvm/IR/Module.h"
@ -194,8 +196,8 @@ void DxilFunctionFPFlag::SetFlagValue(const uint32_t flag) {
 // DxilStructAnnotationSystem class methods.
 //
 DxilTypeSystem::DxilTypeSystem(Module *pModule)
-: m_pModule(pModule) {
-}
+    : m_pModule(pModule),
+      m_LowPrecisionMode(DXIL::LowPrecisionMode::Undefined) {}

 DxilStructAnnotation *DxilTypeSystem::AddStructAnnotation(const StructType *pStructType) {
  DXASSERT_NOMSG(m_StructAnnotations.find(pStructType) == m_StructAnnotations.end());
@ -451,4 +453,21 @@ DXIL::SigPointKind SigPointFromInputQual(DxilParamInputQual Q, DXIL::ShaderKind
  return DXIL::SigPointKind::Invalid;
 }

+bool DxilTypeSystem::UseMinPrecision() {
+  if (m_LowPrecisionMode == DXIL::LowPrecisionMode::Undefined) {
+    if (&m_pModule->GetDxilModule()) {
+      m_LowPrecisionMode = m_pModule->GetDxilModule().m_ShaderFlags.GetUseNativeLowPrecision() ?
+        DXIL::LowPrecisionMode::UseNativeLowPrecision : DXIL::LowPrecisionMode::UseMinPrecision;
+    }
+    else if (&m_pModule->GetHLModule()) {
+      m_LowPrecisionMode = m_pModule->GetHLModule().GetHLOptions().bUseMinPrecision ?
+        DXIL::LowPrecisionMode::UseMinPrecision : DXIL::LowPrecisionMode::UseNativeLowPrecision;
+    }
+    else {
+      DXASSERT(false, "otherwise module doesn't contain either HLModule or Dxil Module.");
+    }
+  }
+  return m_LowPrecisionMode == DXIL::LowPrecisionMode::UseMinPrecision;
+}
+
 } // namespace hlsl
--- a/lib/HLSL/DxilUtil.cpp
+++ b/lib/HLSL/DxilUtil.cpp
@ -13,6 +13,7 @@
 #include "llvm/IR/GlobalVariable.h"
 #include "dxc/HLSL/DxilTypeSystem.h"
 #include "dxc/HLSL/DxilUtil.h"
+#include "dxc/HLSL/DxilModule.h"
 #include "llvm/IR/Module.h"

 using namespace llvm;
@ -35,12 +36,14 @@ unsigned
 GetLegacyCBufferFieldElementSize(DxilFieldAnnotation &fieldAnnotation,
                                           llvm::Type *Ty,
                                           DxilTypeSystem &typeSys) {
+
  while (isa<ArrayType>(Ty)) {
    Ty = Ty->getArrayElementType();
  }

  // Bytes.
-  unsigned compSize = fieldAnnotation.GetCompType().Is64Bit()?8:4;
+  CompType compType = fieldAnnotation.GetCompType();
+  unsigned compSize = compType.Is64Bit() ? 8 : compType.Is16Bit() && !typeSys.UseMinPrecision() ? 2 : 4;
  unsigned fieldSize = compSize;
  if (Ty->isVectorTy()) {
    fieldSize *= Ty->getVectorNumElements();
--- a/lib/HLSL/DxilValidation.cpp
+++ b/lib/HLSL/DxilValidation.cpp
@ -1999,7 +1999,8 @@ static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) {
  unsigned EltNum = ST->getNumElements();
  switch (EltNum) {
  case 2:
-  case 4: {
+  case 4:
+  case 8: { // 2 for doubles, 8 for halfs.
    Type *EltTy = ST->getElementType(0);
    return ST == hlslOP->GetCBufferRetType(EltTy);
  } break;
--- a/lib/HLSL/HLModule.cpp
+++ b/lib/HLSL/HLModule.cpp
@ -862,6 +862,12 @@ const char *HLModule::GetLegacyDataLayoutDesc() {
  return kLegacyLayoutString.data();
 }

+// New data layout with native low precision types
+static const StringRef kNewLayoutString = "e-m:e-p:32:32-i1:32:32-i8:32:32-i16:32:32-i64:64-f16:16-f80:32-n8:16:32-a:0:32-S320";
+const char *HLModule::GetNewDataLayoutDesc() {
+  return kNewLayoutString.data();
+}
+
 static Value *MergeGEP(GEPOperator *SrcGEP, GetElementPtrInst *GEP) {
  IRBuilder<> Builder(GEP);
  SmallVector<Value *, 8> Indices;
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@ -4787,15 +4787,18 @@ Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
                            unsigned channelOffset, Type *EltTy,
                            unsigned vecSize, OP *hlslOP,
                            IRBuilder<> &Builder) {
-  DXASSERT((channelOffset + vecSize) <= 4, "legacy cbuffer don't across 16 bytes register.");
  Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);

  Type *i1Ty = Type::getInt1Ty(EltTy->getContext());
  Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
  Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
+  Type *halfTy = Type::getHalfTy(EltTy->getContext());
+
  bool isBool = EltTy == i1Ty;
  bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
-  bool isNormal = !isBool && !is64;
+  bool is16 = EltTy == halfTy && !hlslOP->UseMinPrecision();
+  bool isNormal = !isBool && !is64 && !is16;
+  DXASSERT(is16 || (channelOffset + vecSize) <= 4, "legacy cbuffer don't across 16 bytes register.");
  if (isNormal) {
    Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
    Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
@ -4805,10 +4808,21 @@ Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
      Result = Builder.CreateInsertElement(Result, NewElt, i);
    }
    return Result;
-  } else if (is64) {
+  } else if (is16) {
    Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
    Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
    Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
+    // index aligned by 2 bytes not 4 bytes
+    channelOffset *= 2;
+    for (unsigned i = 0; i < vecSize; ++i) {
+      Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i);
+      Result = Builder.CreateInsertElement(Result, NewElt, i);
+    }
+    return Result;
+  } else if (is64) {
+    Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
+    Value *loadLegacy = Builder.CreateCall(CBLoad, { OpArg, handle, legacyIdx });
+    Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
    unsigned smallVecSize = 2;
    if (vecSize < smallVecSize)
      smallVecSize = vecSize;
--- a/tools/clang/include/clang/Basic/LangOptions.h
+++ b/tools/clang/include/clang/Basic/LangOptions.h
@ -156,7 +156,7 @@ public:
  unsigned RootSigMajor;
  unsigned RootSigMinor;
  bool IsHLSLLibrary;
-  bool NoMinPrecision; // use strict precision, not min precision.
+  bool UseMinPrecision; // use min precision, not native precision.
  // MS Change Ends

  bool SPIRV = false;  // SPIRV Change
--- a/tools/clang/lib/CodeGen/CGHLSLMS.cpp
+++ b/tools/clang/lib/CodeGen/CGHLSLMS.cpp
@ -311,7 +311,7 @@ void clang::CompileRootSignature(
 //
 CGMSHLSLRuntime::CGMSHLSLRuntime(CodeGenModule &CGM)
    : CGHLSLRuntime(CGM), Context(CGM.getLLVMContext()), EntryFunc(nullptr),
-      TheModule(CGM.getModule()), legacyLayout(HLModule::GetLegacyDataLayoutDesc()),
+      TheModule(CGM.getModule()), legacyLayout(CGM.getLangOpts().UseMinPrecision ? HLModule::GetLegacyDataLayoutDesc() : HLModule::GetNewDataLayoutDesc()),
      CBufferType(
          llvm::StructType::create(TheModule.getContext(), "ConstantBuffer")) {
  const hlsl::ShaderModel *SM =
@ -348,6 +348,9 @@ CGMSHLSLRuntime::CGMSHLSLRuntime(CodeGenModule &CGM)
  opts.bLegacyCBufferLoad = !CGM.getCodeGenOpts().HLSLNotUseLegacyCBufLoad;
  opts.bAllResourcesBound = CGM.getCodeGenOpts().HLSLAllResourcesBound;
  opts.PackingStrategy = CGM.getCodeGenOpts().HLSLSignaturePackingStrategy;
+
+  opts.bUseMinPrecision = CGM.getLangOpts().UseMinPrecision;
+
  m_pHLModule->SetHLOptions(opts);

  m_pHLModule->SetValidatorVersion(CGM.getCodeGenOpts().HLSLValidatorMajorVer, CGM.getCodeGenOpts().HLSLValidatorMinorVer);
@ -385,6 +388,7 @@ CGMSHLSLRuntime::CGMSHLSLRuntime(CodeGenModule &CGM)

  // set Float Denorm Mode
  m_pHLModule->SetFPDenormMode(CGM.getCodeGenOpts().HLSLFlushFPDenorm);
+
 }

 bool CGMSHLSLRuntime::IsHlslObjectType(llvm::Type *Ty) {
@ -484,7 +488,7 @@ StringToTessOutputPrimitive(StringRef primitive) {
 }

 static unsigned AlignTo8Bytes(unsigned offset, bool b8BytesAlign) {
-  DXASSERT((offset & 0x3) == 0, "offset should be divisible by 4");
+  DXASSERT((offset & 0x1) == 0, "offset should be divisible by 2");
  if (!b8BytesAlign)
    return offset;
  else if ((offset & 0x7) == 0)
@ -2605,11 +2609,16 @@ void CGMSHLSLRuntime::SetEntryFunction() {

 // Here the size is CB size. So don't need check type.
 static unsigned AlignCBufferOffset(unsigned offset, unsigned size, llvm::Type *Ty) {
+  DXASSERT(!(offset & 1), "otherwise we have an invalid offset.");
  // offset is already 4 bytes aligned.
  bool b8BytesAlign = Ty->isDoubleTy();
  if (llvm::IntegerType *IT = dyn_cast<llvm::IntegerType>(Ty)) {
    b8BytesAlign = IT->getBitWidth() > 32;
  }
+  // If offset is divisible by 2 and not 4, then increase the offset by 2 for dword alignment.
+  if (!Ty->getScalarType()->isHalfTy() && (offset & 0x2)) {
+    offset += 2;
+  }

  // Align it to 4 x 4bytes.
  if (unsigned remainder = (offset & 0xf)) {
--- a/tools/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/tools/clang/lib/Frontend/CompilerInvocation.cpp
@ -1733,7 +1733,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,

  // Enable low precision for HLSL 2018
  // TODO: should we tie low precision to HLSL2018 only?
-  Opts.NoMinPrecision = Args.hasArg(options::OPT_no_min_precision);
+  Opts.UseMinPrecision = !Args.hasArg(options::OPT_no_min_precision);
 #endif // #ifdef MS_SUPPORT_VARIABLE_LANGOPTS
 }

--- a/tools/clang/lib/Sema/SemaExpr.cpp
+++ b/tools/clang/lib/Sema/SemaExpr.cpp
@ -3370,7 +3370,7 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
    else if (getLangOpts().HLSL && Literal.isLong)
      Ty = Context.DoubleTy;
    else if (getLangOpts().HLSL && Literal.isHalf) {
-      Ty = getLangOpts().NoMinPrecision ? Context.HalfTy : Context.FloatTy;
+      Ty = getLangOpts().UseMinPrecision ? Context.FloatTy : Context.HalfTy;
    }
    // HLSL Change Ends
    else if (!Literal.isLong)
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@ -3037,16 +3037,16 @@ public:

  void WarnMinPrecision(HLSLScalarType type, SourceLocation loc) {
    // TODO: enalbe this once we introduce precise master option
-    bool NoMinPrecision = m_context->getLangOpts().NoMinPrecision;
+    bool UseMinPrecision = m_context->getLangOpts().UseMinPrecision;
    if (type == HLSLScalarType_int_min12) {
      const char *PromotedType = "min16int"; // TODO: print int16 once we support true int16/uint16 support.
      m_sema->Diag(loc, diag::warn_hlsl_sema_minprecision_promotion) << "min12int" << PromotedType;
    }
    else if (type == HLSLScalarType_float_min10) {
-      const char *PromotedType = NoMinPrecision ? "half": "min16float";
+      const char *PromotedType = UseMinPrecision ? "min16float": "half";
      m_sema->Diag(loc, diag::warn_hlsl_sema_minprecision_promotion) << "min10float" << PromotedType;
    }
-    if (NoMinPrecision) {
+    if (!UseMinPrecision) {
      if (type == HLSLScalarType_float_min16) {
        m_sema->Diag(loc, diag::warn_hlsl_sema_minprecision_promotion) << "min16float" << "half";
      }
@ -3287,7 +3287,7 @@ public:
      case BuiltinType::Bool: return AR_BASIC_BOOL;
      case BuiltinType::Double: return AR_BASIC_FLOAT64;
      case BuiltinType::Float: return AR_BASIC_FLOAT32;
-      case BuiltinType::Half: return m_context->getLangOpts().NoMinPrecision ? AR_BASIC_FLOAT16 : AR_BASIC_MIN16FLOAT;
+      case BuiltinType::Half: return m_context->getLangOpts().UseMinPrecision ? AR_BASIC_MIN16FLOAT : AR_BASIC_FLOAT16;
      case BuiltinType::Int: return AR_BASIC_INT32;
      case BuiltinType::UInt: return AR_BASIC_UINT32;
      case BuiltinType::Short: return AR_BASIC_MIN16INT;    // rather than AR_BASIC_INT16
@ -3394,7 +3394,7 @@ public:
    case AR_OBJECT_NULL:          return m_context->VoidTy;
    case AR_BASIC_BOOL:           return m_context->BoolTy;
    case AR_BASIC_LITERAL_FLOAT:  return m_context->LitFloatTy;
-    case AR_BASIC_FLOAT16:        return m_context->getLangOpts().NoMinPrecision ? m_context->HalfTy : m_context->FloatTy;
+    case AR_BASIC_FLOAT16:        return m_context->getLangOpts().UseMinPrecision ? m_context->FloatTy : m_context->HalfTy;
    case AR_BASIC_FLOAT32_PARTIAL_PRECISION: return m_context->FloatTy;
    case AR_BASIC_FLOAT32:        return m_context->FloatTy;
    case AR_BASIC_FLOAT64:        return m_context->DoubleTy;
@ -4424,7 +4424,7 @@ void HLSLExternalSource::AddBaseTypes()
  m_baseTypes[HLSLScalarType_int] = m_context->IntTy;
  m_baseTypes[HLSLScalarType_uint] = m_context->UnsignedIntTy;
  m_baseTypes[HLSLScalarType_dword] = m_context->UnsignedIntTy;
-  m_baseTypes[HLSLScalarType_half] = m_context->getLangOpts().NoMinPrecision ? m_context->HalfTy : m_context->FloatTy;
+  m_baseTypes[HLSLScalarType_half] = m_context->getLangOpts().UseMinPrecision ? m_context->FloatTy : m_context->HalfTy;
  m_baseTypes[HLSLScalarType_float] = m_context->FloatTy;
  m_baseTypes[HLSLScalarType_double] = m_context->DoubleTy;
  m_baseTypes[HLSLScalarType_float_min10] = m_context->HalfTy;
--- a/tools/clang/test/CodeGenHLSL/cbufferHalf.hlsl
+++ b/tools/clang/test/CodeGenHLSL/cbufferHalf.hlsl
@ -0,0 +1,110 @@
+// RUN: %dxc -E main -T ps_6_0 -no-min-precision %s | FileCheck %s
+
+// CHECK: Use native low precision
+// CHECK: cbuffer Foo
+// CHECK: {
+// CHECK:   struct dx.alignment.legacy.Foo
+// CHECK:   {
+// CHECK:       half f_h1;                                    ; Offset:    0
+// CHECK:       float3 f_f3;                                  ; Offset:    4
+
+// CHECK:       half2 f_h2;                                   ; Offset:   16
+// CHECK:       float3 f_f3_1;                                ; Offset:   20
+
+// CHECK:       float2 f_f2;                                  ; Offset:   32
+// CHECK:       half4 f_h4;                                   ; Offset:   40
+
+// CHECK:       half2 f_h2_1;                                 ; Offset:   48
+// CHECK:       half3 f_h3;                                   ; Offset:   52
+
+// CHECK:       double f_d1;                                  ; Offset:   64
+// CHECK:   } Foo                                           ; Offset:    0 Size:    72
+// CHECK: }
+
+// CHECK: cbuffer Bar
+// CHECK: {
+// CHECK:   struct dx.alignment.legacy.Bar
+// CHECK:   {
+// CHECK:       half b_h1;                                    ; Offset:    0
+// CHECK:       half b_h2;                                    ; Offset:    2
+// CHECK:       half b_h3;                                    ; Offset:    4
+// CHECK:       half2 b_h4;                                   ; Offset:    6
+// CHECK:       half3 b_h5;                                   ; Offset:   10
+
+// CHECK:       half3 b_h7;                                   ; Offset:   16
+// CHECK:       half4 b_h8;                                   ; Offset:   22
+// CHECK:       half b_h9;                                    ; Offset:   30
+
+// CHECK:       half4 b_h10;                                  ; Offset:   32
+// CHECK:       half3 b_h11;                                  ; Offset:   40
+
+// CHECK:       half2 b_h12;                                  ; Offset:   48
+// CHECK:       half3 b_h13;                                  ; Offset:   52
+// CHECK:       half2 b_h14;                                  ; Offset:   58
+// CHECK:   } Bar                                             ; Offset:    0 Size:    62
+// CHECK: }
+
+// CHECK: %dx.types.CBufRet.f16.8 = type { half, half, half, half, half, half, half, half }
+
+// CHECK: %Foo_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 3
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 5
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 6
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 7
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 3
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 0
+
+cbuffer Foo {
+  half f_h1;
+  float3 f_f3;
+  half2 f_h2;
+  float3 f_f3_1;
+  float2 f_f2;
+  half4 f_h4;
+  half2 f_h2_1;
+  half3 f_h3;
+  double f_d1;
+}
+
+cbuffer Bar {
+  half b_h1;
+  half b_h2;
+  half b_h3;
+  half2 b_h4;
+  half3 b_h5;
+  
+  half3 b_h7;
+  half4 b_h8;
+  half b_h9;
+
+  half4 b_h10;
+  half3 b_h11;
+  
+  half2 b_h12;
+  half3 b_h13;
+  half2 b_h14;
+}
+
+float4 main() : SV_Target {
+  return f_h1 + f_f3.x + f_h2.x + f_h2.y + f_f3_1.z + f_f2.x + f_h4.x + f_h4.y 
+  + f_h4.z + f_h4.w + f_h2_1.x + f_h2_1.y + f_h3.x + f_h3.y + f_h3.z + f_d1
+  + b_h1;
+}
--- a/tools/clang/test/CodeGenHLSL/cbufferMinPrec.hlsl
+++ b/tools/clang/test/CodeGenHLSL/cbufferMinPrec.hlsl
@ -0,0 +1,62 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// CHECK: Minimum-precision data types
+// CHECK: cbuffer Foo
+// CHECK: {
+// CHECK:   struct dx.alignment.legacy.Foo
+// CHECK:   {
+// CHECK:       min16float h1;                                ; Offset:    0
+// CHECK:       float3 f3;                                    ; Offset:    4
+// CHECK:       min16float2 h2;                               ; Offset:   16
+// CHECK:       float3 f3_1;                                  ; Offset:   32
+// CHECK:       float2 f2;                                    ; Offset:   48
+// CHECK:       min16float4 h4;                               ; Offset:   64
+// CHECK:       min16float2 h2_1;                             ; Offset:   80
+// CHECK:       min16float3 h3;                               ; Offset:   96
+// CHECK:       double d1;                                    ; Offset:  112
+// CHECK:   } Foo                                             ; Offset:    0 Size:   120
+// CHECK: }
+
+// CHECK: %dx.types.CBufRet.f16 = type { half, half, half, half }
+
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 2
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 2
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 3
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 6)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 2
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 7)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 0
+
+cbuffer Foo {
+  min16float h1;
+  float3 f3;
+  min16float2 h2;
+  float3 f3_1;
+  float2 f2;
+  min16float4 h4;
+  min16float2 h2_1;
+  min16float3 h3;
+  double d1;
+}
+
+float4 main() : SV_Target {
+  return h1 + f3.x + h2.x + h2.y + f3_1.z + f2.x + h4.x + h4.y + h4.z + h4.w + h2_1.x + h2_1.y + h3.x + h3.y + h3.z + d1;
+}
--- a/tools/clang/tools/dxcompiler/dxcdisassembler.cpp
+++ b/tools/clang/tools/dxcompiler/dxcdisassembler.cpp
@ -313,6 +313,7 @@ PCSTR g_pFeatureInfoNames[] = {
    "64-Bit integer",
    "View Instancing",
    "Barycentrics",
+    "Use native low precision"
 };
 static_assert(_countof(g_pFeatureInfoNames) == ShaderFeatureInfoCount, "g_pFeatureInfoNames needs to be updated");

@ -565,12 +566,12 @@ void PrintStructLayout(StructType *ST, DxilTypeSystem &typeSys,
                              unsigned sizeOfStruct = 0);

 void PrintTypeAndName(llvm::Type *Ty, DxilFieldAnnotation &annotation,
-                             std::string &StreamStr, unsigned arraySize) {
+                             std::string &StreamStr, unsigned arraySize, bool minPrecision) {
  raw_string_ostream Stream(StreamStr);
  while (Ty->isArrayTy())
    Ty = Ty->getArrayElementType();

-  const char *compTyName = annotation.GetCompType().GetHLSLName();
+  const char *compTyName = annotation.GetCompType().GetHLSLName(minPrecision);
  if (annotation.HasMatrixAnnotation()) {
    const DxilMatrixAnnotation &Matrix = annotation.GetMatrixAnnotation();
    switch (Matrix.Orientation) {
@ -650,7 +651,7 @@ void PrintFieldLayout(llvm::Type *Ty, DxilFieldAnnotation &annotation,
    } else {
      (OS << comment).indent(indent);
      std::string NameTypeStr;
-      PrintTypeAndName(Ty, annotation, NameTypeStr, arraySize);
+      PrintTypeAndName(Ty, annotation, NameTypeStr, arraySize, typeSys.UseMinPrecision());
      OS << left_justify(NameTypeStr, offsetIndent);

      // Offset
--- a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
+++ b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
@ -789,7 +789,7 @@ public:
    compiler.getLangOpts().HLSL2016 = Opts.HLSL2016;
    compiler.getLangOpts().HLSL2017 = Opts.HLSL2017;

-    compiler.getLangOpts().NoMinPrecision = Opts.NoMinPrecision;
+    compiler.getLangOpts().UseMinPrecision = !Opts.NoMinPrecision;

 // SPIRV change starts
 #ifdef ENABLE_SPIRV_CODEGEN
--- a/tools/clang/unittests/HLSL/CompilerTest.cpp
+++ b/tools/clang/unittests/HLSL/CompilerTest.cpp
@ -478,7 +478,9 @@ public:
  TEST_METHOD(CodeGenCbuffer6_51)
  TEST_METHOD(CodeGenCbufferAlloc)
  TEST_METHOD(CodeGenCbufferAllocLegacy)
+  TEST_METHOD(CodeGenCbufferHalf)
  TEST_METHOD(CodeGenCbufferInLoop)
+  TEST_METHOD(CodeGenCbufferMinPrec)
  TEST_METHOD(CodeGenClass)
  TEST_METHOD(CodeGenClip)
  TEST_METHOD(CodeGenClipPlanes)
@ -3051,10 +3053,18 @@ TEST_F(CompilerTest, CodeGenCbufferAllocLegacy) {
  CodeGenTestCheck(L"..\\CodeGenHLSL\\cbufferAlloc_legacy.hlsl");
 }

+TEST_F(CompilerTest, CodeGenCbufferHalf) {
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\cbufferHalf.hlsl");
+}
+
 TEST_F(CompilerTest, CodeGenCbufferInLoop) {
  CodeGenTest(L"..\\CodeGenHLSL\\cbufferInLoop.hlsl");
 }

+TEST_F(CompilerTest, CodeGenCbufferMinPrec) {
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\cbufferMinPrec.hlsl");
+}
+
 TEST_F(CompilerTest, CodeGenClass) {
  CodeGenTestCheck(L"..\\CodeGenHLSL\\class.hlsl");
 }