diff --git a/include/dxc/HLSL/DxilCompType.h b/include/dxc/HLSL/DxilCompType.h
index 0d11b9d47..dbb02ad92 100644
--- a/include/dxc/HLSL/DxilCompType.h
+++ b/include/dxc/HLSL/DxilCompType.h
@@ -63,6 +63,7 @@ public:
   bool IsSNorm() const;
   bool IsUNorm() const;
   bool Is64Bit() const;
+  bool Is16Bit() const;
 
   /// For min-precision types, returns upconverted (base) type.
   CompType GetBaseCompType() const;
@@ -83,7 +84,7 @@ public:
   static CompType GetCompType(llvm::Type * type);
 
   const char *GetName() const;
-  const char *GetHLSLName() const;
+  const char *GetHLSLName(bool MinPrecision) const;
 
 private:
   Kind m_Kind;
diff --git a/include/dxc/HLSL/DxilConstants.h b/include/dxc/HLSL/DxilConstants.h
index 04584e7cb..b13735b61 100644
--- a/include/dxc/HLSL/DxilConstants.h
+++ b/include/dxc/HLSL/DxilConstants.h
@@ -914,6 +914,13 @@ namespace DXIL {
   const uint8_t kCompMask_W     = 0x8;
   const uint8_t kCompMask_All   = 0xF;
 
+
+  enum class LowPrecisionMode {
+    Undefined = 0,
+    UseMinPrecision,
+    UseNativeLowPrecision
+  };
+
 } // namespace DXIL
 
 } // namespace hlsl
diff --git a/include/dxc/HLSL/DxilContainer.h b/include/dxc/HLSL/DxilContainer.h
index 0c01eae4f..ce0b905f5 100644
--- a/include/dxc/HLSL/DxilContainer.h
+++ b/include/dxc/HLSL/DxilContainer.h
@@ -91,7 +91,7 @@ static const uint64_t ShaderFeatureInfo_Doubles = 0x0001;
 static const uint64_t ShaderFeatureInfo_ComputeShadersPlusRawAndStructuredBuffersViaShader4X = 0x0002;
 static const uint64_t ShaderFeatureInfo_UAVsAtEveryStage = 0x0004;
 static const uint64_t ShaderFeatureInfo_64UAVs = 0x0008;
-static const uint64_t ShaderFeatureInfo_MininumPrecision = 0x0010;
+static const uint64_t ShaderFeatureInfo_MinimumPrecision = 0x0010;
 static const uint64_t ShaderFeatureInfo_11_1_DoubleExtensions = 0x0020;
 static const uint64_t ShaderFeatureInfo_11_1_ShaderExtensions = 0x0040;
 static const uint64_t ShaderFeatureInfo_LEVEL9ComparisonFiltering = 0x0080;
@@ -105,8 +105,9 @@ static const uint64_t ShaderFeatureInfo_WaveOps = 0x4000;
 static const uint64_t ShaderFeatureInfo_Int64Ops = 0x8000;
 static const uint64_t ShaderFeatureInfo_ViewID = 0x10000;
 static const uint64_t ShaderFeatureInfo_Barycentrics = 0x20000;
+static const uint64_t ShaderFeatureInfo_NativeLowPrecision = 0x40000;
 
-static const unsigned ShaderFeatureInfoCount = 18;
+static const unsigned ShaderFeatureInfoCount = 19;
 
 struct DxilShaderFeatureInfo {
   uint64_t FeatureFlags;
diff --git a/include/dxc/HLSL/DxilModule.h b/include/dxc/HLSL/DxilModule.h
index 95c9b4355..d73f2989d 100644
--- a/include/dxc/HLSL/DxilModule.h
+++ b/include/dxc/HLSL/DxilModule.h
@@ -220,8 +220,8 @@ public:
     void SetEnableRawAndStructuredBuffers(bool flag) { m_bEnableRawAndStructuredBuffers = flag; }
     bool GetEnableRawAndStructuredBuffers() const { return m_bEnableRawAndStructuredBuffers; }
 
-    void SetEnableMinPrecision(bool flag) { m_bEnableMinPrecision = flag; }
-    bool GetEnableMinPrecision() const { return m_bEnableMinPrecision; }
+    void SetLowPrecisionPresent(bool flag) { m_bLowPrecisionPresent = flag; }
+    bool GetLowPrecisionPresent() const { return m_bLowPrecisionPresent; }
 
     void SetEnableDoubleExtensions(bool flag) { m_bEnableDoubleExtensions = flag; }
     bool GetEnableDoubleExtensions() const { return m_bEnableDoubleExtensions; }
@@ -275,6 +275,9 @@ public:
     void SetBarycentrics(bool flag) { m_bBarycentrics = flag; }
     bool GetBarycentrics() const { return m_bBarycentrics; }
 
+    void SetUseNativeLowPrecision(bool flag) { m_bUseNativeLowPrecision = flag; }
+    bool GetUseNativeLowPrecision() const { return m_bUseNativeLowPrecision; }
+
     static uint64_t GetShaderFlagsRawForCollection(); // some flags are collected (eg use 64-bit), some provided (eg allow refactoring)
     uint64_t GetShaderFlagsRaw() const;
     void SetShaderFlagsRaw(uint64_t data);
@@ -285,7 +288,7 @@ public:
     unsigned m_bEnableDoublePrecision :1; // D3D11_SB_GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS
     unsigned m_bForceEarlyDepthStencil :1; // D3D11_SB_GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL
     unsigned m_bEnableRawAndStructuredBuffers :1; // D3D11_SB_GLOBAL_FLAG_ENABLE_RAW_AND_STRUCTURED_BUFFERS
-    unsigned m_bEnableMinPrecision :1; // D3D11_1_SB_GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION
+    unsigned m_bLowPrecisionPresent :1; // D3D11_1_SB_GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION
     unsigned m_bEnableDoubleExtensions :1; // D3D11_1_SB_GLOBAL_FLAG_ENABLE_DOUBLE_EXTENSIONS
     unsigned m_bEnableMSAD :1;        // D3D11_1_SB_GLOBAL_FLAG_ENABLE_SHADER_EXTENSIONS
     unsigned m_bAllResourcesBound :1; // D3D12_SB_GLOBAL_FLAG_ALL_RESOURCES_BOUND
@@ -310,7 +313,9 @@ public:
     unsigned m_bViewID : 1;           // SHADER_FEATURE_VIEWID
     unsigned m_bBarycentrics : 1;     // SHADER_FEATURE_BARYCENTRICS
 
-    unsigned m_align0 : 9;        // align to 32 bit.
+    unsigned m_bUseNativeLowPrecision : 1;
+
+    unsigned m_align0 : 8;        // align to 32 bit.
     uint32_t m_align1;            // align to 64 bit.
   };
 
diff --git a/include/dxc/HLSL/DxilOperations.h b/include/dxc/HLSL/DxilOperations.h
index e126dcf3f..b2ad7a758 100644
--- a/include/dxc/HLSL/DxilOperations.h
+++ b/include/dxc/HLSL/DxilOperations.h
@@ -62,6 +62,9 @@ public:
   // Return false if the given function is not a dxil function.
   bool GetOpCodeClass(const llvm::Function *F, OpCodeClass &opClass);
 
+  // To check if operation uses strict precision types
+  bool UseMinPrecision();
+
   // LLVM helpers. Perhaps, move to a separate utility class.
   llvm::Constant *GetI1Const(bool v);
   llvm::Constant *GetI8Const(char v);
@@ -105,6 +108,8 @@ private:
   llvm::Type *m_pSplitDoubleType;
   llvm::Type *m_pInt4Type;
 
+  DXIL::LowPrecisionMode m_LowPrecisionMode;
+
   static const unsigned kNumTypeOverloads = 9;
 
   llvm::Type *m_pResRetType[kNumTypeOverloads];
diff --git a/include/dxc/HLSL/DxilTypeSystem.h b/include/dxc/HLSL/DxilTypeSystem.h
index d249e5680..e8de774db 100644
--- a/include/dxc/HLSL/DxilTypeSystem.h
+++ b/include/dxc/HLSL/DxilTypeSystem.h
@@ -211,11 +211,15 @@ public:
                               const llvm::Function *pSrcFunction,
                               const DxilTypeSystem &src);
 
+  bool UseMinPrecision();
+
 private:
   llvm::Module *m_pModule;
   StructAnnotationMap m_StructAnnotations;
   FunctionAnnotationMap m_FunctionAnnotations;
 
+  DXIL::LowPrecisionMode m_LowPrecisionMode;
+
   llvm::StructType *GetNormFloatType(CompType CT, unsigned NumComps);
 };
 
diff --git a/include/dxc/HLSL/HLModule.h b/include/dxc/HLSL/HLModule.h
index 9b693f287..5b703cffc 100644
--- a/include/dxc/HLSL/HLModule.h
+++ b/include/dxc/HLSL/HLModule.h
@@ -59,7 +59,8 @@ struct HLOptions {
   unsigned bLegacyCBufferLoad      : 1;
   unsigned PackingStrategy         : 2;
   static_assert((unsigned)DXIL::PackingStrategy::Invalid < 4, "otherwise 2 bits is not enough to store PackingStrategy");
-  unsigned unused                  : 25;
+  unsigned bUseMinPrecision        : 1;
+  unsigned unused                  : 24;
 };
 
 /// Use this class to manipulate HLDXIR of a shader.
@@ -163,6 +164,7 @@ public:
   static void GetParameterRowsAndCols(llvm::Type *Ty, unsigned &rows, unsigned &cols,
                                       DxilParameterAnnotation &paramAnnotation);
   static const char *GetLegacyDataLayoutDesc();
+  static const char *GetNewDataLayoutDesc();
 
   static void MergeGepUse(llvm::Value *V);
 
diff --git a/lib/HLSL/DxilCompType.cpp b/lib/HLSL/DxilCompType.cpp
index b9855fbbb..66f2bc7b7 100644
--- a/lib/HLSL/DxilCompType.cpp
+++ b/lib/HLSL/DxilCompType.cpp
@@ -155,6 +155,19 @@ bool CompType::Is64Bit() const {
   }
 }
 
+bool CompType::Is16Bit() const {
+  switch (m_Kind) {
+  case DXIL::ComponentType::F16:
+  case DXIL::ComponentType::I16:
+  case DXIL::ComponentType::SNormF16:
+  case DXIL::ComponentType::UNormF16:
+  case DXIL::ComponentType::U16:
+    return true;
+  default:
+    return false;
+  }
+}
+
 CompType CompType::GetBaseCompType() const {
   switch (m_Kind) {
   case Kind::I1:        return CompType(Kind::I1);
@@ -283,14 +296,21 @@ const char *CompType::GetName() const {
 }
 
 static const char *s_TypeKindHLSLNames[(unsigned)CompType::Kind::LastEntry] = {
+  "unknown",
+  "bool", "short", "unsigned short", "int", "uint", "int64_t", "uint64_t",
+  "half", "float", "double",
+  "snorm_half", "unorm_half", "snorm_float", "unorm_float", "snorm_double", "unorm_double",
+};
+
+static const char *s_TypeKindHLSLNamesMinPrecision[(unsigned)CompType::Kind::LastEntry] = {
   "unknown",
   "bool", "min16i", "min16ui", "int", "uint", "int64_t", "uint64_t",
-  "min16f", "float", "double",
+  "min16float", "float", "double",
   "snorm_min16f", "unorm_min16f", "snorm_float", "unorm_float", "snorm_double", "unorm_double",
 };
 
-const char *CompType::GetHLSLName() const {
-  return s_TypeKindHLSLNames[(unsigned)m_Kind];
+const char *CompType::GetHLSLName(bool MinPrecision) const {
+  return MinPrecision ? s_TypeKindHLSLNamesMinPrecision[(unsigned)m_Kind] : s_TypeKindHLSLNames[(unsigned)m_Kind];
 }
 
 } // namespace hlsl
diff --git a/lib/HLSL/DxilContainerReflection.cpp b/lib/HLSL/DxilContainerReflection.cpp
index 371f7cbb2..51d80888b 100644
--- a/lib/HLSL/DxilContainerReflection.cpp
+++ b/lib/HLSL/DxilContainerReflection.cpp
@@ -1939,7 +1939,7 @@ UINT64 DxilShaderReflection::GetRequiresFlags() {
   if (features & ShaderFeatureInfo_Doubles) result |= D3D_SHADER_REQUIRES_DOUBLES;
   if (features & ShaderFeatureInfo_UAVsAtEveryStage) result |= D3D_SHADER_REQUIRES_UAVS_AT_EVERY_STAGE;
   if (features & ShaderFeatureInfo_64UAVs) result |= D3D_SHADER_REQUIRES_64_UAVS;
-  if (features & ShaderFeatureInfo_MininumPrecision) result |= D3D_SHADER_REQUIRES_MINIMUM_PRECISION;
+  if (features & ShaderFeatureInfo_MinimumPrecision) result |= D3D_SHADER_REQUIRES_MINIMUM_PRECISION;
   if (features & ShaderFeatureInfo_11_1_DoubleExtensions) result |= D3D_SHADER_REQUIRES_11_1_DOUBLE_EXTENSIONS;
   if (features & ShaderFeatureInfo_11_1_ShaderExtensions) result |= D3D_SHADER_REQUIRES_11_1_SHADER_EXTENSIONS;
   if (features & ShaderFeatureInfo_LEVEL9ComparisonFiltering) result |= D3D_SHADER_REQUIRES_LEVEL_9_COMPARISON_FILTERING;
diff --git a/lib/HLSL/DxilGenerationPass.cpp b/lib/HLSL/DxilGenerationPass.cpp
index e6eb648fe..985c044b7 100644
--- a/lib/HLSL/DxilGenerationPass.cpp
+++ b/lib/HLSL/DxilGenerationPass.cpp
@@ -169,7 +169,6 @@ void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, DxilEntrySignature *
   //bool m_bDisableMathRefactoring;
   //bool m_bEnableDoublePrecision;
   //bool m_bEnableDoubleExtensions;
-  //bool m_bEnableMinPrecision;
   //M.CollectShaderFlags();
 
   //bool m_bForceEarlyDepthStencil;
@@ -177,6 +176,8 @@ void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, DxilEntrySignature *
   //bool m_bEnableMSAD;
   //M.m_ShaderFlags.SetAllResourcesBound(H.GetHLOptions().bAllResourcesBound);
 
+  M.m_ShaderFlags.SetUseNativeLowPrecision(!H.GetHLOptions().bUseMinPrecision);
+
   if (FnProps)
     M.SetShaderProperties(FnProps);
 
diff --git a/lib/HLSL/DxilModule.cpp b/lib/HLSL/DxilModule.cpp
index 28ca19011..660158224 100644
--- a/lib/HLSL/DxilModule.cpp
+++ b/lib/HLSL/DxilModule.cpp
@@ -107,7 +107,7 @@ DxilModule::ShaderFlags::ShaderFlags():
 , m_bEnableDoublePrecision(false)
 , m_bForceEarlyDepthStencil(false)
 , m_bEnableRawAndStructuredBuffers(false)
-, m_bEnableMinPrecision(false)
+, m_bLowPrecisionPresent(false)
 , m_bEnableDoubleExtensions(false)
 , m_bEnableMSAD(false)
 , m_bAllResourcesBound(false)
@@ -125,6 +125,7 @@ DxilModule::ShaderFlags::ShaderFlags():
 , m_bInt64Ops(false)
 , m_bViewID(false)
 , m_bBarycentrics(false)
+, m_bUseNativeLowPrecision(false)
 , m_align0(0)
 , m_align1(0)
 {}
@@ -228,7 +229,7 @@ unsigned DxilModule::ShaderFlags::GetGlobalFlags() const {
   Flags |= m_bEnableDoublePrecision ? DXIL::kEnableDoublePrecision : 0;
   Flags |= m_bForceEarlyDepthStencil ? DXIL::kForceEarlyDepthStencil : 0;
   Flags |= m_bEnableRawAndStructuredBuffers ? DXIL::kEnableRawAndStructuredBuffers : 0;
-  Flags |= m_bEnableMinPrecision ? DXIL::kEnableMinPrecision : 0;
+  Flags |= m_bLowPrecisionPresent && !m_bUseNativeLowPrecision? DXIL::kEnableMinPrecision : 0;
   Flags |= m_bEnableDoubleExtensions ? DXIL::kEnableDoubleExtensions : 0;
   Flags |= m_bEnableMSAD ? DXIL::kEnableMSAD : 0;
   Flags |= m_bAllResourcesBound ? DXIL::kAllResourcesBound : 0;
@@ -238,7 +239,8 @@ unsigned DxilModule::ShaderFlags::GetGlobalFlags() const {
 uint64_t DxilModule::ShaderFlags::GetFeatureInfo() const {
   uint64_t Flags = 0;
   Flags |= m_bEnableDoublePrecision ? hlsl::ShaderFeatureInfo_Doubles : 0;
-  Flags |= m_bEnableMinPrecision ? hlsl::ShaderFeatureInfo_MininumPrecision : 0;
+  Flags |= m_bLowPrecisionPresent && !m_bUseNativeLowPrecision ? hlsl::ShaderFeatureInfo_MinimumPrecision: 0;
+  Flags |= m_bLowPrecisionPresent && m_bUseNativeLowPrecision ? hlsl::ShaderFeatureInfo_NativeLowPrecision : 0;
   Flags |= m_bEnableDoubleExtensions ? hlsl::ShaderFeatureInfo_11_1_DoubleExtensions : 0;
   Flags |= m_bWaveOps ? hlsl::ShaderFeatureInfo_WaveOps : 0;
   Flags |= m_bInt64Ops ? hlsl::ShaderFeatureInfo_Int64Ops : 0;
@@ -339,7 +341,7 @@ void DxilModule::CollectShaderFlags(ShaderFlags &Flags) {
   // fma has dxil op. Others should check IR instruction div/cast.
   bool hasDoubleExtension = false;
   bool has64Int = false;
-  bool has16FloatInt = false;
+  bool has16 = false;
   bool hasWaveOps = false;
   bool hasCheckAccessFully = false;
   bool hasMSAD = false;
@@ -395,8 +397,8 @@ void DxilModule::CollectShaderFlags(ShaderFlags &Flags) {
           }
         }
         
-        has16FloatInt |= isHalf;
-        has16FloatInt |= isInt16;
+        has16 |= isHalf;
+        has16 |= isInt16;
         has64Int |= isInt64;
 
         if (CallInst *CI = dyn_cast<CallInst>(&I)) {
@@ -474,7 +476,7 @@ void DxilModule::CollectShaderFlags(ShaderFlags &Flags) {
 
   Flags.SetEnableDoublePrecision(hasDouble);
   Flags.SetInt64Ops(has64Int);
-  Flags.SetEnableMinPrecision(has16FloatInt);
+  Flags.SetLowPrecisionPresent(has16);
   Flags.SetEnableDoubleExtensions(hasDoubleExtension);
   Flags.SetWaveOps(hasWaveOps);
   Flags.SetTiledResources(hasCheckAccessFully);
@@ -582,7 +584,7 @@ uint64_t DxilModule::ShaderFlags::GetShaderFlagsRawForCollection() {
   ShaderFlags Flags;
   Flags.SetEnableDoublePrecision(true);
   Flags.SetInt64Ops(true);
-  Flags.SetEnableMinPrecision(true);
+  Flags.SetLowPrecisionPresent(true);
   Flags.SetEnableDoubleExtensions(true);
   Flags.SetWaveOps(true);
   Flags.SetTiledResources(true);
@@ -1496,10 +1498,10 @@ MDTuple *DxilModule::EmitDxilShaderProperties() {
   vector<Metadata *> MDVals;
 
   // DXIL shader flags.
-  uint64_t Flags = m_ShaderFlags.GetShaderFlagsRaw();
-  if (Flags != 0) {
+  uint64_t flag = m_ShaderFlags.GetShaderFlagsRaw();
+  if (flag != 0) {
     MDVals.emplace_back(m_pMDHelper->Uint32ToConstMD(DxilMDHelper::kDxilShaderFlagsTag));
-    MDVals.emplace_back(m_pMDHelper->Uint64ToConstMD(Flags));
+    MDVals.emplace_back(m_pMDHelper->Uint64ToConstMD(flag));
   }
 
   // Compute shader.
diff --git a/lib/HLSL/DxilOperations.cpp b/lib/HLSL/DxilOperations.cpp
index db997d709..4ffacf8b4 100644
--- a/lib/HLSL/DxilOperations.cpp
+++ b/lib/HLSL/DxilOperations.cpp
@@ -11,6 +11,8 @@
 
 #include "dxc/HLSL/DxilOperations.h"
 #include "dxc/Support/Global.h"
+#include "dxc/HLSL/DxilModule.h"
+#include "dxc/HLSL/HLModule.h"
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/IR/LLVMContext.h"
@@ -430,7 +432,8 @@ static Type *GetOrCreateStructType(LLVMContext &Ctx, ArrayRef<Type*> types, Stri
 //
 OP::OP(LLVMContext &Ctx, Module *pModule)
 : m_Ctx(Ctx)
-, m_pModule(pModule) {
+, m_pModule(pModule)
+, m_LowPrecisionMode(DXIL::LowPrecisionMode::Undefined) {
   memset(m_pResRetType, 0, sizeof(m_pResRetType));
   memset(m_pCBufferRetType, 0, sizeof(m_pCBufferRetType));
   memset(m_OpCodeClassCache, 0, sizeof(m_OpCodeClassCache));
@@ -783,6 +786,23 @@ bool OP::GetOpCodeClass(const Function *F, OP::OpCodeClass &opClass) {
   return true;
 }
 
+bool OP::UseMinPrecision() {
+  if (m_LowPrecisionMode == DXIL::LowPrecisionMode::Undefined) {
+    if (&m_pModule->GetDxilModule()) {
+      m_LowPrecisionMode = m_pModule->GetDxilModule().m_ShaderFlags.GetUseNativeLowPrecision() ?
+        DXIL::LowPrecisionMode::UseNativeLowPrecision : DXIL::LowPrecisionMode::UseMinPrecision;
+    }
+    else if (&m_pModule->GetHLModule()) {
+      m_LowPrecisionMode = m_pModule->GetHLModule().GetHLOptions().bUseMinPrecision ?
+        DXIL::LowPrecisionMode::UseMinPrecision : DXIL::LowPrecisionMode::UseNativeLowPrecision;
+    }
+    else {
+      DXASSERT(false, "otherwise module doesn't contain either HLModule or Dxil Module.");
+    }
+  }
+  return m_LowPrecisionMode == DXIL::LowPrecisionMode::UseMinPrecision;
+}
+
 llvm::Type *OP::GetOverloadType(OpCode OpCode, llvm::Function *F) {
   DXASSERT(F, "not work on nullptr");
   Type *Ty = F->getReturnType();
@@ -940,15 +960,23 @@ Type *OP::GetCBufferRetType(Type *pOverloadType) {
   if (m_pCBufferRetType[TypeSlot] == nullptr) {
     string TypeName("dx.types.CBufRet.");
     TypeName += GetOverloadTypeName(TypeSlot);
-    if (!pOverloadType->isDoubleTy()) {
-      Type *FieldTypes[4] = { pOverloadType, pOverloadType, pOverloadType, pOverloadType };
-      m_pCBufferRetType[TypeSlot] = GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule);
-    } else {
+    if (pOverloadType->isDoubleTy()) {
       Type *FieldTypes[2] = { pOverloadType, pOverloadType };
       m_pCBufferRetType[TypeSlot] = GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule);
     }
+    else if (!UseMinPrecision() && pOverloadType->isHalfTy()) {
+      TypeName += ".8"; // dx.types.CBufRet.fp16.8 for buffer of 8 halves
+      Type *FieldTypes[8] = {
+          pOverloadType, pOverloadType, pOverloadType, pOverloadType,
+          pOverloadType, pOverloadType, pOverloadType, pOverloadType,
+      };
+      m_pCBufferRetType[TypeSlot] = GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule);
+    }
+    else {
+      Type *FieldTypes[4] = { pOverloadType, pOverloadType, pOverloadType, pOverloadType };
+      m_pCBufferRetType[TypeSlot] = GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule);
+    }
   }
-
   return m_pCBufferRetType[TypeSlot];
 }
 
diff --git a/lib/HLSL/DxilTypeSystem.cpp b/lib/HLSL/DxilTypeSystem.cpp
index b45320d98..832b880fb 100644
--- a/lib/HLSL/DxilTypeSystem.cpp
+++ b/lib/HLSL/DxilTypeSystem.cpp
@@ -8,6 +8,8 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 #include "dxc/HLSL/DxilTypeSystem.h"
+#include "dxc/HLSL/DxilModule.h"
+#include "dxc/HLSL/HLModule.h"
 #include "dxc/Support/Global.h"
 
 #include "llvm/IR/Module.h"
@@ -194,8 +196,8 @@ void DxilFunctionFPFlag::SetFlagValue(const uint32_t flag) {
 // DxilStructAnnotationSystem class methods.
 //
 DxilTypeSystem::DxilTypeSystem(Module *pModule)
-: m_pModule(pModule) {
-}
+    : m_pModule(pModule),
+      m_LowPrecisionMode(DXIL::LowPrecisionMode::Undefined) {}
 
 DxilStructAnnotation *DxilTypeSystem::AddStructAnnotation(const StructType *pStructType) {
   DXASSERT_NOMSG(m_StructAnnotations.find(pStructType) == m_StructAnnotations.end());
@@ -451,4 +453,21 @@ DXIL::SigPointKind SigPointFromInputQual(DxilParamInputQual Q, DXIL::ShaderKind
   return DXIL::SigPointKind::Invalid;
 }
 
+bool DxilTypeSystem::UseMinPrecision() {
+  if (m_LowPrecisionMode == DXIL::LowPrecisionMode::Undefined) {
+    if (&m_pModule->GetDxilModule()) {
+      m_LowPrecisionMode = m_pModule->GetDxilModule().m_ShaderFlags.GetUseNativeLowPrecision() ?
+        DXIL::LowPrecisionMode::UseNativeLowPrecision : DXIL::LowPrecisionMode::UseMinPrecision;
+    }
+    else if (&m_pModule->GetHLModule()) {
+      m_LowPrecisionMode = m_pModule->GetHLModule().GetHLOptions().bUseMinPrecision ?
+        DXIL::LowPrecisionMode::UseMinPrecision : DXIL::LowPrecisionMode::UseNativeLowPrecision;
+    }
+    else {
+      DXASSERT(false, "otherwise module doesn't contain either HLModule or Dxil Module.");
+    }
+  }
+  return m_LowPrecisionMode == DXIL::LowPrecisionMode::UseMinPrecision;
+}
+
 } // namespace hlsl
diff --git a/lib/HLSL/DxilUtil.cpp b/lib/HLSL/DxilUtil.cpp
index 9782887a3..da9f30103 100644
--- a/lib/HLSL/DxilUtil.cpp
+++ b/lib/HLSL/DxilUtil.cpp
@@ -13,6 +13,7 @@
 #include "llvm/IR/GlobalVariable.h"
 #include "dxc/HLSL/DxilTypeSystem.h"
 #include "dxc/HLSL/DxilUtil.h"
+#include "dxc/HLSL/DxilModule.h"
 #include "llvm/IR/Module.h"
 
 using namespace llvm;
@@ -35,12 +36,14 @@ unsigned
 GetLegacyCBufferFieldElementSize(DxilFieldAnnotation &fieldAnnotation,
                                            llvm::Type *Ty,
                                            DxilTypeSystem &typeSys) {
+
   while (isa<ArrayType>(Ty)) {
     Ty = Ty->getArrayElementType();
   }
 
   // Bytes.
-  unsigned compSize = fieldAnnotation.GetCompType().Is64Bit()?8:4;
+  CompType compType = fieldAnnotation.GetCompType();
+  unsigned compSize = compType.Is64Bit() ? 8 : compType.Is16Bit() && !typeSys.UseMinPrecision() ? 2 : 4;
   unsigned fieldSize = compSize;
   if (Ty->isVectorTy()) {
     fieldSize *= Ty->getVectorNumElements();
diff --git a/lib/HLSL/DxilValidation.cpp b/lib/HLSL/DxilValidation.cpp
index ac43283f9..f47e7a5b3 100644
--- a/lib/HLSL/DxilValidation.cpp
+++ b/lib/HLSL/DxilValidation.cpp
@@ -1999,7 +1999,8 @@ static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) {
   unsigned EltNum = ST->getNumElements();
   switch (EltNum) {
   case 2:
-  case 4: {
+  case 4:
+  case 8: { // 2 for doubles, 8 for halfs.
     Type *EltTy = ST->getElementType(0);
     return ST == hlslOP->GetCBufferRetType(EltTy);
   } break;
diff --git a/lib/HLSL/HLModule.cpp b/lib/HLSL/HLModule.cpp
index 4eef68144..d89f7e461 100644
--- a/lib/HLSL/HLModule.cpp
+++ b/lib/HLSL/HLModule.cpp
@@ -862,6 +862,12 @@ const char *HLModule::GetLegacyDataLayoutDesc() {
   return kLegacyLayoutString.data();
 }
 
+// New data layout with native low precision types
+static const StringRef kNewLayoutString = "e-m:e-p:32:32-i1:32:32-i8:32:32-i16:32:32-i64:64-f16:16-f80:32-n8:16:32-a:0:32-S320";
+const char *HLModule::GetNewDataLayoutDesc() {
+  return kNewLayoutString.data();
+}
+
 static Value *MergeGEP(GEPOperator *SrcGEP, GetElementPtrInst *GEP) {
   IRBuilder<> Builder(GEP);
   SmallVector<Value *, 8> Indices;
diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp
index a0659cbe7..6939328b5 100644
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@@ -4787,15 +4787,18 @@ Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
                             unsigned channelOffset, Type *EltTy,
                             unsigned vecSize, OP *hlslOP,
                             IRBuilder<> &Builder) {
-  DXASSERT((channelOffset + vecSize) <= 4, "legacy cbuffer don't across 16 bytes register.");
   Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);
 
   Type *i1Ty = Type::getInt1Ty(EltTy->getContext());
   Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
   Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
+  Type *halfTy = Type::getHalfTy(EltTy->getContext());
+
   bool isBool = EltTy == i1Ty;
   bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
-  bool isNormal = !isBool && !is64;
+  bool is16 = EltTy == halfTy && !hlslOP->UseMinPrecision();
+  bool isNormal = !isBool && !is64 && !is16;
+  DXASSERT(is16 || (channelOffset + vecSize) <= 4, "legacy cbuffer don't across 16 bytes register.");
   if (isNormal) {
     Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
     Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
@@ -4805,10 +4808,21 @@ Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
       Result = Builder.CreateInsertElement(Result, NewElt, i);
     }
     return Result;
-  } else if (is64) {
+  } else if (is16) {
     Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
     Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
     Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
+    // index aligned by 2 bytes not 4 bytes
+    channelOffset *= 2;
+    for (unsigned i = 0; i < vecSize; ++i) {
+      Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i);
+      Result = Builder.CreateInsertElement(Result, NewElt, i);
+    }
+    return Result;
+  } else if (is64) {
+    Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
+    Value *loadLegacy = Builder.CreateCall(CBLoad, { OpArg, handle, legacyIdx });
+    Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
     unsigned smallVecSize = 2;
     if (vecSize < smallVecSize)
       smallVecSize = vecSize;
diff --git a/tools/clang/include/clang/Basic/LangOptions.h b/tools/clang/include/clang/Basic/LangOptions.h
index 082c69bd6..ccf6cb444 100644
--- a/tools/clang/include/clang/Basic/LangOptions.h
+++ b/tools/clang/include/clang/Basic/LangOptions.h
@@ -156,7 +156,7 @@ public:
   unsigned RootSigMajor;
   unsigned RootSigMinor;
   bool IsHLSLLibrary;
-  bool NoMinPrecision; // use strict precision, not min precision.
+  bool UseMinPrecision; // use min precision, not native precision.
   // MS Change Ends
 
   bool SPIRV = false;  // SPIRV Change
diff --git a/tools/clang/lib/CodeGen/CGHLSLMS.cpp b/tools/clang/lib/CodeGen/CGHLSLMS.cpp
index 0898c762b..bd11bd17c 100644
--- a/tools/clang/lib/CodeGen/CGHLSLMS.cpp
+++ b/tools/clang/lib/CodeGen/CGHLSLMS.cpp
@@ -311,7 +311,7 @@ void clang::CompileRootSignature(
 //
 CGMSHLSLRuntime::CGMSHLSLRuntime(CodeGenModule &CGM)
     : CGHLSLRuntime(CGM), Context(CGM.getLLVMContext()), EntryFunc(nullptr),
-      TheModule(CGM.getModule()), legacyLayout(HLModule::GetLegacyDataLayoutDesc()),
+      TheModule(CGM.getModule()), legacyLayout(CGM.getLangOpts().UseMinPrecision ? HLModule::GetLegacyDataLayoutDesc() : HLModule::GetNewDataLayoutDesc()),
       CBufferType(
           llvm::StructType::create(TheModule.getContext(), "ConstantBuffer")) {
   const hlsl::ShaderModel *SM =
@@ -348,6 +348,9 @@ CGMSHLSLRuntime::CGMSHLSLRuntime(CodeGenModule &CGM)
   opts.bLegacyCBufferLoad = !CGM.getCodeGenOpts().HLSLNotUseLegacyCBufLoad;
   opts.bAllResourcesBound = CGM.getCodeGenOpts().HLSLAllResourcesBound;
   opts.PackingStrategy = CGM.getCodeGenOpts().HLSLSignaturePackingStrategy;
+
+  opts.bUseMinPrecision = CGM.getLangOpts().UseMinPrecision;
+
   m_pHLModule->SetHLOptions(opts);
 
   m_pHLModule->SetValidatorVersion(CGM.getCodeGenOpts().HLSLValidatorMajorVer, CGM.getCodeGenOpts().HLSLValidatorMinorVer);
@@ -385,6 +388,7 @@ CGMSHLSLRuntime::CGMSHLSLRuntime(CodeGenModule &CGM)
 
   // set Float Denorm Mode
   m_pHLModule->SetFPDenormMode(CGM.getCodeGenOpts().HLSLFlushFPDenorm);
+
 }
 
 bool CGMSHLSLRuntime::IsHlslObjectType(llvm::Type *Ty) {
@@ -484,7 +488,7 @@ StringToTessOutputPrimitive(StringRef primitive) {
 }
 
 static unsigned AlignTo8Bytes(unsigned offset, bool b8BytesAlign) {
-  DXASSERT((offset & 0x3) == 0, "offset should be divisible by 4");
+  DXASSERT((offset & 0x1) == 0, "offset should be divisible by 2");
   if (!b8BytesAlign)
     return offset;
   else if ((offset & 0x7) == 0)
@@ -2605,11 +2609,16 @@ void CGMSHLSLRuntime::SetEntryFunction() {
 
 // Here the size is CB size. So don't need check type.
 static unsigned AlignCBufferOffset(unsigned offset, unsigned size, llvm::Type *Ty) {
+  DXASSERT(!(offset & 1), "otherwise we have an invalid offset.");
   // offset is already 4 bytes aligned.
   bool b8BytesAlign = Ty->isDoubleTy();
   if (llvm::IntegerType *IT = dyn_cast<llvm::IntegerType>(Ty)) {
     b8BytesAlign = IT->getBitWidth() > 32;
   }
+  // If offset is divisible by 2 and not 4, then increase the offset by 2 for dword alignment.
+  if (!Ty->getScalarType()->isHalfTy() && (offset & 0x2)) {
+    offset += 2;
+  }
 
   // Align it to 4 x 4bytes.
   if (unsigned remainder = (offset & 0xf)) {
diff --git a/tools/clang/lib/Frontend/CompilerInvocation.cpp b/tools/clang/lib/Frontend/CompilerInvocation.cpp
index f88bf6fe2..efac1e686 100644
--- a/tools/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/tools/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1733,7 +1733,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
 
   // Enable low precision for HLSL 2018
   // TODO: should we tie low precision to HLSL2018 only?
-  Opts.NoMinPrecision = Args.hasArg(options::OPT_no_min_precision);
+  Opts.UseMinPrecision = !Args.hasArg(options::OPT_no_min_precision);
 #endif // #ifdef MS_SUPPORT_VARIABLE_LANGOPTS
 }
 
diff --git a/tools/clang/lib/Sema/SemaExpr.cpp b/tools/clang/lib/Sema/SemaExpr.cpp
index 60dd32fcf..54229cf6d 100644
--- a/tools/clang/lib/Sema/SemaExpr.cpp
+++ b/tools/clang/lib/Sema/SemaExpr.cpp
@@ -3370,7 +3370,7 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
     else if (getLangOpts().HLSL && Literal.isLong)
       Ty = Context.DoubleTy;
     else if (getLangOpts().HLSL && Literal.isHalf) {
-      Ty = getLangOpts().NoMinPrecision ? Context.HalfTy : Context.FloatTy;
+      Ty = getLangOpts().UseMinPrecision ? Context.FloatTy : Context.HalfTy;
     }
     // HLSL Change Ends
     else if (!Literal.isLong)
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index ca726b21f..1eef6d83a 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -3037,16 +3037,16 @@ public:
 
   void WarnMinPrecision(HLSLScalarType type, SourceLocation loc) {
     // TODO: enalbe this once we introduce precise master option
-    bool NoMinPrecision = m_context->getLangOpts().NoMinPrecision;
+    bool UseMinPrecision = m_context->getLangOpts().UseMinPrecision;
     if (type == HLSLScalarType_int_min12) {
       const char *PromotedType = "min16int"; // TODO: print int16 once we support true int16/uint16 support.
       m_sema->Diag(loc, diag::warn_hlsl_sema_minprecision_promotion) << "min12int" << PromotedType;
     }
     else if (type == HLSLScalarType_float_min10) {
-      const char *PromotedType = NoMinPrecision ? "half": "min16float";
+      const char *PromotedType = UseMinPrecision ? "min16float": "half";
       m_sema->Diag(loc, diag::warn_hlsl_sema_minprecision_promotion) << "min10float" << PromotedType;
     }
-    if (NoMinPrecision) {
+    if (!UseMinPrecision) {
       if (type == HLSLScalarType_float_min16) {
         m_sema->Diag(loc, diag::warn_hlsl_sema_minprecision_promotion) << "min16float" << "half";
       }
@@ -3287,7 +3287,7 @@ public:
       case BuiltinType::Bool: return AR_BASIC_BOOL;
       case BuiltinType::Double: return AR_BASIC_FLOAT64;
       case BuiltinType::Float: return AR_BASIC_FLOAT32;
-      case BuiltinType::Half: return m_context->getLangOpts().NoMinPrecision ? AR_BASIC_FLOAT16 : AR_BASIC_MIN16FLOAT;
+      case BuiltinType::Half: return m_context->getLangOpts().UseMinPrecision ? AR_BASIC_MIN16FLOAT : AR_BASIC_FLOAT16;
       case BuiltinType::Int: return AR_BASIC_INT32;
       case BuiltinType::UInt: return AR_BASIC_UINT32;
       case BuiltinType::Short: return AR_BASIC_MIN16INT;    // rather than AR_BASIC_INT16
@@ -3394,7 +3394,7 @@ public:
     case AR_OBJECT_NULL:          return m_context->VoidTy;
     case AR_BASIC_BOOL:           return m_context->BoolTy;
     case AR_BASIC_LITERAL_FLOAT:  return m_context->LitFloatTy;
-    case AR_BASIC_FLOAT16:        return m_context->getLangOpts().NoMinPrecision ? m_context->HalfTy : m_context->FloatTy;
+    case AR_BASIC_FLOAT16:        return m_context->getLangOpts().UseMinPrecision ? m_context->FloatTy : m_context->HalfTy;
     case AR_BASIC_FLOAT32_PARTIAL_PRECISION: return m_context->FloatTy;
     case AR_BASIC_FLOAT32:        return m_context->FloatTy;
     case AR_BASIC_FLOAT64:        return m_context->DoubleTy;
@@ -4424,7 +4424,7 @@ void HLSLExternalSource::AddBaseTypes()
   m_baseTypes[HLSLScalarType_int] = m_context->IntTy;
   m_baseTypes[HLSLScalarType_uint] = m_context->UnsignedIntTy;
   m_baseTypes[HLSLScalarType_dword] = m_context->UnsignedIntTy;
-  m_baseTypes[HLSLScalarType_half] = m_context->getLangOpts().NoMinPrecision ? m_context->HalfTy : m_context->FloatTy;
+  m_baseTypes[HLSLScalarType_half] = m_context->getLangOpts().UseMinPrecision ? m_context->FloatTy : m_context->HalfTy;
   m_baseTypes[HLSLScalarType_float] = m_context->FloatTy;
   m_baseTypes[HLSLScalarType_double] = m_context->DoubleTy;
   m_baseTypes[HLSLScalarType_float_min10] = m_context->HalfTy;
diff --git a/tools/clang/test/CodeGenHLSL/cbufferHalf.hlsl b/tools/clang/test/CodeGenHLSL/cbufferHalf.hlsl
new file mode 100644
index 000000000..5c0d63cf8
--- /dev/null
+++ b/tools/clang/test/CodeGenHLSL/cbufferHalf.hlsl
@@ -0,0 +1,110 @@
+// RUN: %dxc -E main -T ps_6_0 -no-min-precision %s | FileCheck %s
+
+// CHECK: Use native low precision
+// CHECK: cbuffer Foo
+// CHECK: {
+// CHECK:   struct dx.alignment.legacy.Foo
+// CHECK:   {
+// CHECK:       half f_h1;                                    ; Offset:    0
+// CHECK:       float3 f_f3;                                  ; Offset:    4
+
+// CHECK:       half2 f_h2;                                   ; Offset:   16
+// CHECK:       float3 f_f3_1;                                ; Offset:   20
+
+// CHECK:       float2 f_f2;                                  ; Offset:   32
+// CHECK:       half4 f_h4;                                   ; Offset:   40
+
+// CHECK:       half2 f_h2_1;                                 ; Offset:   48
+// CHECK:       half3 f_h3;                                   ; Offset:   52
+
+// CHECK:       double f_d1;                                  ; Offset:   64
+// CHECK:   } Foo                                           ; Offset:    0 Size:    72
+// CHECK: }
+
+// CHECK: cbuffer Bar
+// CHECK: {
+// CHECK:   struct dx.alignment.legacy.Bar
+// CHECK:   {
+// CHECK:       half b_h1;                                    ; Offset:    0
+// CHECK:       half b_h2;                                    ; Offset:    2
+// CHECK:       half b_h3;                                    ; Offset:    4
+// CHECK:       half2 b_h4;                                   ; Offset:    6
+// CHECK:       half3 b_h5;                                   ; Offset:   10
+
+// CHECK:       half3 b_h7;                                   ; Offset:   16
+// CHECK:       half4 b_h8;                                   ; Offset:   22
+// CHECK:       half b_h9;                                    ; Offset:   30
+
+// CHECK:       half4 b_h10;                                  ; Offset:   32
+// CHECK:       half3 b_h11;                                  ; Offset:   40
+
+// CHECK:       half2 b_h12;                                  ; Offset:   48
+// CHECK:       half3 b_h13;                                  ; Offset:   52
+// CHECK:       half2 b_h14;                                  ; Offset:   58
+// CHECK:   } Bar                                             ; Offset:    0 Size:    62
+// CHECK: }
+
+// CHECK: %dx.types.CBufRet.f16.8 = type { half, half, half, half, half, half, half, half }
+
+// CHECK: %Foo_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 3
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 5
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 6
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 7
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 3
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 0
+
+cbuffer Foo {
+  half f_h1;
+  float3 f_f3;
+  half2 f_h2;
+  float3 f_f3_1;
+  float2 f_f2;
+  half4 f_h4;
+  half2 f_h2_1;
+  half3 f_h3;
+  double f_d1;
+}
+
+cbuffer Bar {
+  half b_h1;
+  half b_h2;
+  half b_h3;
+  half2 b_h4;
+  half3 b_h5;
+  
+  half3 b_h7;
+  half4 b_h8;
+  half b_h9;
+
+  half4 b_h10;
+  half3 b_h11;
+  
+  half2 b_h12;
+  half3 b_h13;
+  half2 b_h14;
+}
+
+float4 main() : SV_Target {
+  return f_h1 + f_f3.x + f_h2.x + f_h2.y + f_f3_1.z + f_f2.x + f_h4.x + f_h4.y 
+  + f_h4.z + f_h4.w + f_h2_1.x + f_h2_1.y + f_h3.x + f_h3.y + f_h3.z + f_d1
+  + b_h1;
+}
\ No newline at end of file
diff --git a/tools/clang/test/CodeGenHLSL/cbufferMinPrec.hlsl b/tools/clang/test/CodeGenHLSL/cbufferMinPrec.hlsl
new file mode 100644
index 000000000..cfa2576e1
--- /dev/null
+++ b/tools/clang/test/CodeGenHLSL/cbufferMinPrec.hlsl
@@ -0,0 +1,62 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// CHECK: Minimum-precision data types
+// CHECK: cbuffer Foo
+// CHECK: {
+// CHECK:   struct dx.alignment.legacy.Foo
+// CHECK:   {
+// CHECK:       min16float h1;                                ; Offset:    0
+// CHECK:       float3 f3;                                    ; Offset:    4
+// CHECK:       min16float2 h2;                               ; Offset:   16
+// CHECK:       float3 f3_1;                                  ; Offset:   32
+// CHECK:       float2 f2;                                    ; Offset:   48
+// CHECK:       min16float4 h4;                               ; Offset:   64
+// CHECK:       min16float2 h2_1;                             ; Offset:   80
+// CHECK:       min16float3 h3;                               ; Offset:   96
+// CHECK:       double d1;                                    ; Offset:  112
+// CHECK:   } Foo                                             ; Offset:    0 Size:   120
+// CHECK: }
+
+// CHECK: %dx.types.CBufRet.f16 = type { half, half, half, half }
+
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 2
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 2
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 3
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 6)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 2
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 7)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 0
+
+cbuffer Foo {
+  min16float h1;
+  float3 f3;
+  min16float2 h2;
+  float3 f3_1;
+  float2 f2;
+  min16float4 h4;
+  min16float2 h2_1;
+  min16float3 h3;
+  double d1;
+}
+
+float4 main() : SV_Target {
+  return h1 + f3.x + h2.x + h2.y + f3_1.z + f2.x + h4.x + h4.y + h4.z + h4.w + h2_1.x + h2_1.y + h3.x + h3.y + h3.z + d1;
+}
\ No newline at end of file
diff --git a/tools/clang/tools/dxcompiler/dxcdisassembler.cpp b/tools/clang/tools/dxcompiler/dxcdisassembler.cpp
index 76ea7195a..2e5047996 100644
--- a/tools/clang/tools/dxcompiler/dxcdisassembler.cpp
+++ b/tools/clang/tools/dxcompiler/dxcdisassembler.cpp
@@ -313,6 +313,7 @@ PCSTR g_pFeatureInfoNames[] = {
     "64-Bit integer",
     "View Instancing",
     "Barycentrics",
+    "Use native low precision"
 };
 static_assert(_countof(g_pFeatureInfoNames) == ShaderFeatureInfoCount, "g_pFeatureInfoNames needs to be updated");
 
@@ -565,12 +566,12 @@ void PrintStructLayout(StructType *ST, DxilTypeSystem &typeSys,
                               unsigned sizeOfStruct = 0);
 
 void PrintTypeAndName(llvm::Type *Ty, DxilFieldAnnotation &annotation,
-                             std::string &StreamStr, unsigned arraySize) {
+                             std::string &StreamStr, unsigned arraySize, bool minPrecision) {
   raw_string_ostream Stream(StreamStr);
   while (Ty->isArrayTy())
     Ty = Ty->getArrayElementType();
 
-  const char *compTyName = annotation.GetCompType().GetHLSLName();
+  const char *compTyName = annotation.GetCompType().GetHLSLName(minPrecision);
   if (annotation.HasMatrixAnnotation()) {
     const DxilMatrixAnnotation &Matrix = annotation.GetMatrixAnnotation();
     switch (Matrix.Orientation) {
@@ -650,7 +651,7 @@ void PrintFieldLayout(llvm::Type *Ty, DxilFieldAnnotation &annotation,
     } else {
       (OS << comment).indent(indent);
       std::string NameTypeStr;
-      PrintTypeAndName(Ty, annotation, NameTypeStr, arraySize);
+      PrintTypeAndName(Ty, annotation, NameTypeStr, arraySize, typeSys.UseMinPrecision());
       OS << left_justify(NameTypeStr, offsetIndent);
 
       // Offset
diff --git a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
index b88e771d3..ccf59cd91 100644
--- a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
+++ b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
@@ -789,7 +789,7 @@ public:
     compiler.getLangOpts().HLSL2016 = Opts.HLSL2016;
     compiler.getLangOpts().HLSL2017 = Opts.HLSL2017;
 
-    compiler.getLangOpts().NoMinPrecision = Opts.NoMinPrecision;
+    compiler.getLangOpts().UseMinPrecision = !Opts.NoMinPrecision;
 
 // SPIRV change starts
 #ifdef ENABLE_SPIRV_CODEGEN
diff --git a/tools/clang/unittests/HLSL/CompilerTest.cpp b/tools/clang/unittests/HLSL/CompilerTest.cpp
index 73de596f0..bb72388b4 100644
--- a/tools/clang/unittests/HLSL/CompilerTest.cpp
+++ b/tools/clang/unittests/HLSL/CompilerTest.cpp
@@ -478,7 +478,9 @@ public:
   TEST_METHOD(CodeGenCbuffer6_51)
   TEST_METHOD(CodeGenCbufferAlloc)
   TEST_METHOD(CodeGenCbufferAllocLegacy)
+  TEST_METHOD(CodeGenCbufferHalf)
   TEST_METHOD(CodeGenCbufferInLoop)
+  TEST_METHOD(CodeGenCbufferMinPrec)
   TEST_METHOD(CodeGenClass)
   TEST_METHOD(CodeGenClip)
   TEST_METHOD(CodeGenClipPlanes)
@@ -3051,10 +3053,18 @@ TEST_F(CompilerTest, CodeGenCbufferAllocLegacy) {
   CodeGenTestCheck(L"..\\CodeGenHLSL\\cbufferAlloc_legacy.hlsl");
 }
 
+TEST_F(CompilerTest, CodeGenCbufferHalf) {
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\cbufferHalf.hlsl");
+}
+
 TEST_F(CompilerTest, CodeGenCbufferInLoop) {
   CodeGenTest(L"..\\CodeGenHLSL\\cbufferInLoop.hlsl");
 }
 
+TEST_F(CompilerTest, CodeGenCbufferMinPrec) {
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\cbufferMinPrec.hlsl");
+}
+
 TEST_F(CompilerTest, CodeGenClass) {
   CodeGenTestCheck(L"..\\CodeGenHLSL\\class.hlsl");
 }