diff --git a/include/dxc/HLSL/DxilCompType.h b/include/dxc/HLSL/DxilCompType.h index 0d11b9d47..dbb02ad92 100644 --- a/include/dxc/HLSL/DxilCompType.h +++ b/include/dxc/HLSL/DxilCompType.h @@ -63,6 +63,7 @@ public: bool IsSNorm() const; bool IsUNorm() const; bool Is64Bit() const; + bool Is16Bit() const; /// For min-precision types, returns upconverted (base) type. CompType GetBaseCompType() const; @@ -83,7 +84,7 @@ public: static CompType GetCompType(llvm::Type * type); const char *GetName() const; - const char *GetHLSLName() const; + const char *GetHLSLName(bool MinPrecision) const; private: Kind m_Kind; diff --git a/include/dxc/HLSL/DxilConstants.h b/include/dxc/HLSL/DxilConstants.h index 04584e7cb..b13735b61 100644 --- a/include/dxc/HLSL/DxilConstants.h +++ b/include/dxc/HLSL/DxilConstants.h @@ -914,6 +914,13 @@ namespace DXIL { const uint8_t kCompMask_W = 0x8; const uint8_t kCompMask_All = 0xF; + + enum class LowPrecisionMode { + Undefined = 0, + UseMinPrecision, + UseNativeLowPrecision + }; + } // namespace DXIL } // namespace hlsl diff --git a/include/dxc/HLSL/DxilContainer.h b/include/dxc/HLSL/DxilContainer.h index 0c01eae4f..ce0b905f5 100644 --- a/include/dxc/HLSL/DxilContainer.h +++ b/include/dxc/HLSL/DxilContainer.h @@ -91,7 +91,7 @@ static const uint64_t ShaderFeatureInfo_Doubles = 0x0001; static const uint64_t ShaderFeatureInfo_ComputeShadersPlusRawAndStructuredBuffersViaShader4X = 0x0002; static const uint64_t ShaderFeatureInfo_UAVsAtEveryStage = 0x0004; static const uint64_t ShaderFeatureInfo_64UAVs = 0x0008; -static const uint64_t ShaderFeatureInfo_MininumPrecision = 0x0010; +static const uint64_t ShaderFeatureInfo_MinimumPrecision = 0x0010; static const uint64_t ShaderFeatureInfo_11_1_DoubleExtensions = 0x0020; static const uint64_t ShaderFeatureInfo_11_1_ShaderExtensions = 0x0040; static const uint64_t ShaderFeatureInfo_LEVEL9ComparisonFiltering = 0x0080; @@ -105,8 +105,9 @@ static const uint64_t ShaderFeatureInfo_WaveOps = 0x4000; static const uint64_t ShaderFeatureInfo_Int64Ops = 0x8000; static const uint64_t ShaderFeatureInfo_ViewID = 0x10000; static const uint64_t ShaderFeatureInfo_Barycentrics = 0x20000; +static const uint64_t ShaderFeatureInfo_NativeLowPrecision = 0x40000; -static const unsigned ShaderFeatureInfoCount = 18; +static const unsigned ShaderFeatureInfoCount = 19; struct DxilShaderFeatureInfo { uint64_t FeatureFlags; diff --git a/include/dxc/HLSL/DxilModule.h b/include/dxc/HLSL/DxilModule.h index 95c9b4355..d73f2989d 100644 --- a/include/dxc/HLSL/DxilModule.h +++ b/include/dxc/HLSL/DxilModule.h @@ -220,8 +220,8 @@ public: void SetEnableRawAndStructuredBuffers(bool flag) { m_bEnableRawAndStructuredBuffers = flag; } bool GetEnableRawAndStructuredBuffers() const { return m_bEnableRawAndStructuredBuffers; } - void SetEnableMinPrecision(bool flag) { m_bEnableMinPrecision = flag; } - bool GetEnableMinPrecision() const { return m_bEnableMinPrecision; } + void SetLowPrecisionPresent(bool flag) { m_bLowPrecisionPresent = flag; } + bool GetLowPrecisionPresent() const { return m_bLowPrecisionPresent; } void SetEnableDoubleExtensions(bool flag) { m_bEnableDoubleExtensions = flag; } bool GetEnableDoubleExtensions() const { return m_bEnableDoubleExtensions; } @@ -275,6 +275,9 @@ public: void SetBarycentrics(bool flag) { m_bBarycentrics = flag; } bool GetBarycentrics() const { return m_bBarycentrics; } + void SetUseNativeLowPrecision(bool flag) { m_bUseNativeLowPrecision = flag; } + bool GetUseNativeLowPrecision() const { return m_bUseNativeLowPrecision; } + static uint64_t GetShaderFlagsRawForCollection(); // some flags are collected (eg use 64-bit), some provided (eg allow refactoring) uint64_t GetShaderFlagsRaw() const; void SetShaderFlagsRaw(uint64_t data); @@ -285,7 +288,7 @@ public: unsigned m_bEnableDoublePrecision :1; // D3D11_SB_GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS unsigned m_bForceEarlyDepthStencil :1; // D3D11_SB_GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL unsigned m_bEnableRawAndStructuredBuffers :1; // D3D11_SB_GLOBAL_FLAG_ENABLE_RAW_AND_STRUCTURED_BUFFERS - unsigned m_bEnableMinPrecision :1; // D3D11_1_SB_GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION + unsigned m_bLowPrecisionPresent :1; // D3D11_1_SB_GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION unsigned m_bEnableDoubleExtensions :1; // D3D11_1_SB_GLOBAL_FLAG_ENABLE_DOUBLE_EXTENSIONS unsigned m_bEnableMSAD :1; // D3D11_1_SB_GLOBAL_FLAG_ENABLE_SHADER_EXTENSIONS unsigned m_bAllResourcesBound :1; // D3D12_SB_GLOBAL_FLAG_ALL_RESOURCES_BOUND @@ -310,7 +313,9 @@ public: unsigned m_bViewID : 1; // SHADER_FEATURE_VIEWID unsigned m_bBarycentrics : 1; // SHADER_FEATURE_BARYCENTRICS - unsigned m_align0 : 9; // align to 32 bit. + unsigned m_bUseNativeLowPrecision : 1; + + unsigned m_align0 : 8; // align to 32 bit. uint32_t m_align1; // align to 64 bit. }; diff --git a/include/dxc/HLSL/DxilOperations.h b/include/dxc/HLSL/DxilOperations.h index e126dcf3f..b2ad7a758 100644 --- a/include/dxc/HLSL/DxilOperations.h +++ b/include/dxc/HLSL/DxilOperations.h @@ -62,6 +62,9 @@ public: // Return false if the given function is not a dxil function. bool GetOpCodeClass(const llvm::Function *F, OpCodeClass &opClass); + // To check if operation uses strict precision types + bool UseMinPrecision(); + // LLVM helpers. Perhaps, move to a separate utility class. llvm::Constant *GetI1Const(bool v); llvm::Constant *GetI8Const(char v); @@ -105,6 +108,8 @@ private: llvm::Type *m_pSplitDoubleType; llvm::Type *m_pInt4Type; + DXIL::LowPrecisionMode m_LowPrecisionMode; + static const unsigned kNumTypeOverloads = 9; llvm::Type *m_pResRetType[kNumTypeOverloads]; diff --git a/include/dxc/HLSL/DxilTypeSystem.h b/include/dxc/HLSL/DxilTypeSystem.h index d249e5680..e8de774db 100644 --- a/include/dxc/HLSL/DxilTypeSystem.h +++ b/include/dxc/HLSL/DxilTypeSystem.h @@ -211,11 +211,15 @@ public: const llvm::Function *pSrcFunction, const DxilTypeSystem &src); + bool UseMinPrecision(); + private: llvm::Module *m_pModule; StructAnnotationMap m_StructAnnotations; FunctionAnnotationMap m_FunctionAnnotations; + DXIL::LowPrecisionMode m_LowPrecisionMode; + llvm::StructType *GetNormFloatType(CompType CT, unsigned NumComps); }; diff --git a/include/dxc/HLSL/HLModule.h b/include/dxc/HLSL/HLModule.h index 9b693f287..5b703cffc 100644 --- a/include/dxc/HLSL/HLModule.h +++ b/include/dxc/HLSL/HLModule.h @@ -59,7 +59,8 @@ struct HLOptions { unsigned bLegacyCBufferLoad : 1; unsigned PackingStrategy : 2; static_assert((unsigned)DXIL::PackingStrategy::Invalid < 4, "otherwise 2 bits is not enough to store PackingStrategy"); - unsigned unused : 25; + unsigned bUseMinPrecision : 1; + unsigned unused : 24; }; /// Use this class to manipulate HLDXIR of a shader. @@ -163,6 +164,7 @@ public: static void GetParameterRowsAndCols(llvm::Type *Ty, unsigned &rows, unsigned &cols, DxilParameterAnnotation ¶mAnnotation); static const char *GetLegacyDataLayoutDesc(); + static const char *GetNewDataLayoutDesc(); static void MergeGepUse(llvm::Value *V); diff --git a/lib/HLSL/DxilCompType.cpp b/lib/HLSL/DxilCompType.cpp index b9855fbbb..66f2bc7b7 100644 --- a/lib/HLSL/DxilCompType.cpp +++ b/lib/HLSL/DxilCompType.cpp @@ -155,6 +155,19 @@ bool CompType::Is64Bit() const { } } +bool CompType::Is16Bit() const { + switch (m_Kind) { + case DXIL::ComponentType::F16: + case DXIL::ComponentType::I16: + case DXIL::ComponentType::SNormF16: + case DXIL::ComponentType::UNormF16: + case DXIL::ComponentType::U16: + return true; + default: + return false; + } +} + CompType CompType::GetBaseCompType() const { switch (m_Kind) { case Kind::I1: return CompType(Kind::I1); @@ -283,14 +296,21 @@ const char *CompType::GetName() const { } static const char *s_TypeKindHLSLNames[(unsigned)CompType::Kind::LastEntry] = { + "unknown", + "bool", "short", "unsigned short", "int", "uint", "int64_t", "uint64_t", + "half", "float", "double", + "snorm_half", "unorm_half", "snorm_float", "unorm_float", "snorm_double", "unorm_double", +}; + +static const char *s_TypeKindHLSLNamesMinPrecision[(unsigned)CompType::Kind::LastEntry] = { "unknown", "bool", "min16i", "min16ui", "int", "uint", "int64_t", "uint64_t", - "min16f", "float", "double", + "min16float", "float", "double", "snorm_min16f", "unorm_min16f", "snorm_float", "unorm_float", "snorm_double", "unorm_double", }; -const char *CompType::GetHLSLName() const { - return s_TypeKindHLSLNames[(unsigned)m_Kind]; +const char *CompType::GetHLSLName(bool MinPrecision) const { + return MinPrecision ? s_TypeKindHLSLNamesMinPrecision[(unsigned)m_Kind] : s_TypeKindHLSLNames[(unsigned)m_Kind]; } } // namespace hlsl diff --git a/lib/HLSL/DxilContainerReflection.cpp b/lib/HLSL/DxilContainerReflection.cpp index 371f7cbb2..51d80888b 100644 --- a/lib/HLSL/DxilContainerReflection.cpp +++ b/lib/HLSL/DxilContainerReflection.cpp @@ -1939,7 +1939,7 @@ UINT64 DxilShaderReflection::GetRequiresFlags() { if (features & ShaderFeatureInfo_Doubles) result |= D3D_SHADER_REQUIRES_DOUBLES; if (features & ShaderFeatureInfo_UAVsAtEveryStage) result |= D3D_SHADER_REQUIRES_UAVS_AT_EVERY_STAGE; if (features & ShaderFeatureInfo_64UAVs) result |= D3D_SHADER_REQUIRES_64_UAVS; - if (features & ShaderFeatureInfo_MininumPrecision) result |= D3D_SHADER_REQUIRES_MINIMUM_PRECISION; + if (features & ShaderFeatureInfo_MinimumPrecision) result |= D3D_SHADER_REQUIRES_MINIMUM_PRECISION; if (features & ShaderFeatureInfo_11_1_DoubleExtensions) result |= D3D_SHADER_REQUIRES_11_1_DOUBLE_EXTENSIONS; if (features & ShaderFeatureInfo_11_1_ShaderExtensions) result |= D3D_SHADER_REQUIRES_11_1_SHADER_EXTENSIONS; if (features & ShaderFeatureInfo_LEVEL9ComparisonFiltering) result |= D3D_SHADER_REQUIRES_LEVEL_9_COMPARISON_FILTERING; diff --git a/lib/HLSL/DxilGenerationPass.cpp b/lib/HLSL/DxilGenerationPass.cpp index e6eb648fe..985c044b7 100644 --- a/lib/HLSL/DxilGenerationPass.cpp +++ b/lib/HLSL/DxilGenerationPass.cpp @@ -169,7 +169,6 @@ void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, DxilEntrySignature * //bool m_bDisableMathRefactoring; //bool m_bEnableDoublePrecision; //bool m_bEnableDoubleExtensions; - //bool m_bEnableMinPrecision; //M.CollectShaderFlags(); //bool m_bForceEarlyDepthStencil; @@ -177,6 +176,8 @@ void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, DxilEntrySignature * //bool m_bEnableMSAD; //M.m_ShaderFlags.SetAllResourcesBound(H.GetHLOptions().bAllResourcesBound); + M.m_ShaderFlags.SetUseNativeLowPrecision(!H.GetHLOptions().bUseMinPrecision); + if (FnProps) M.SetShaderProperties(FnProps); diff --git a/lib/HLSL/DxilModule.cpp b/lib/HLSL/DxilModule.cpp index 28ca19011..660158224 100644 --- a/lib/HLSL/DxilModule.cpp +++ b/lib/HLSL/DxilModule.cpp @@ -107,7 +107,7 @@ DxilModule::ShaderFlags::ShaderFlags(): , m_bEnableDoublePrecision(false) , m_bForceEarlyDepthStencil(false) , m_bEnableRawAndStructuredBuffers(false) -, m_bEnableMinPrecision(false) +, m_bLowPrecisionPresent(false) , m_bEnableDoubleExtensions(false) , m_bEnableMSAD(false) , m_bAllResourcesBound(false) @@ -125,6 +125,7 @@ DxilModule::ShaderFlags::ShaderFlags(): , m_bInt64Ops(false) , m_bViewID(false) , m_bBarycentrics(false) +, m_bUseNativeLowPrecision(false) , m_align0(0) , m_align1(0) {} @@ -228,7 +229,7 @@ unsigned DxilModule::ShaderFlags::GetGlobalFlags() const { Flags |= m_bEnableDoublePrecision ? DXIL::kEnableDoublePrecision : 0; Flags |= m_bForceEarlyDepthStencil ? DXIL::kForceEarlyDepthStencil : 0; Flags |= m_bEnableRawAndStructuredBuffers ? DXIL::kEnableRawAndStructuredBuffers : 0; - Flags |= m_bEnableMinPrecision ? DXIL::kEnableMinPrecision : 0; + Flags |= m_bLowPrecisionPresent && !m_bUseNativeLowPrecision? DXIL::kEnableMinPrecision : 0; Flags |= m_bEnableDoubleExtensions ? DXIL::kEnableDoubleExtensions : 0; Flags |= m_bEnableMSAD ? DXIL::kEnableMSAD : 0; Flags |= m_bAllResourcesBound ? DXIL::kAllResourcesBound : 0; @@ -238,7 +239,8 @@ unsigned DxilModule::ShaderFlags::GetGlobalFlags() const { uint64_t DxilModule::ShaderFlags::GetFeatureInfo() const { uint64_t Flags = 0; Flags |= m_bEnableDoublePrecision ? hlsl::ShaderFeatureInfo_Doubles : 0; - Flags |= m_bEnableMinPrecision ? hlsl::ShaderFeatureInfo_MininumPrecision : 0; + Flags |= m_bLowPrecisionPresent && !m_bUseNativeLowPrecision ? hlsl::ShaderFeatureInfo_MinimumPrecision: 0; + Flags |= m_bLowPrecisionPresent && m_bUseNativeLowPrecision ? hlsl::ShaderFeatureInfo_NativeLowPrecision : 0; Flags |= m_bEnableDoubleExtensions ? hlsl::ShaderFeatureInfo_11_1_DoubleExtensions : 0; Flags |= m_bWaveOps ? hlsl::ShaderFeatureInfo_WaveOps : 0; Flags |= m_bInt64Ops ? hlsl::ShaderFeatureInfo_Int64Ops : 0; @@ -339,7 +341,7 @@ void DxilModule::CollectShaderFlags(ShaderFlags &Flags) { // fma has dxil op. Others should check IR instruction div/cast. bool hasDoubleExtension = false; bool has64Int = false; - bool has16FloatInt = false; + bool has16 = false; bool hasWaveOps = false; bool hasCheckAccessFully = false; bool hasMSAD = false; @@ -395,8 +397,8 @@ void DxilModule::CollectShaderFlags(ShaderFlags &Flags) { } } - has16FloatInt |= isHalf; - has16FloatInt |= isInt16; + has16 |= isHalf; + has16 |= isInt16; has64Int |= isInt64; if (CallInst *CI = dyn_cast(&I)) { @@ -474,7 +476,7 @@ void DxilModule::CollectShaderFlags(ShaderFlags &Flags) { Flags.SetEnableDoublePrecision(hasDouble); Flags.SetInt64Ops(has64Int); - Flags.SetEnableMinPrecision(has16FloatInt); + Flags.SetLowPrecisionPresent(has16); Flags.SetEnableDoubleExtensions(hasDoubleExtension); Flags.SetWaveOps(hasWaveOps); Flags.SetTiledResources(hasCheckAccessFully); @@ -582,7 +584,7 @@ uint64_t DxilModule::ShaderFlags::GetShaderFlagsRawForCollection() { ShaderFlags Flags; Flags.SetEnableDoublePrecision(true); Flags.SetInt64Ops(true); - Flags.SetEnableMinPrecision(true); + Flags.SetLowPrecisionPresent(true); Flags.SetEnableDoubleExtensions(true); Flags.SetWaveOps(true); Flags.SetTiledResources(true); @@ -1496,10 +1498,10 @@ MDTuple *DxilModule::EmitDxilShaderProperties() { vector MDVals; // DXIL shader flags. - uint64_t Flags = m_ShaderFlags.GetShaderFlagsRaw(); - if (Flags != 0) { + uint64_t flag = m_ShaderFlags.GetShaderFlagsRaw(); + if (flag != 0) { MDVals.emplace_back(m_pMDHelper->Uint32ToConstMD(DxilMDHelper::kDxilShaderFlagsTag)); - MDVals.emplace_back(m_pMDHelper->Uint64ToConstMD(Flags)); + MDVals.emplace_back(m_pMDHelper->Uint64ToConstMD(flag)); } // Compute shader. diff --git a/lib/HLSL/DxilOperations.cpp b/lib/HLSL/DxilOperations.cpp index db997d709..4ffacf8b4 100644 --- a/lib/HLSL/DxilOperations.cpp +++ b/lib/HLSL/DxilOperations.cpp @@ -11,6 +11,8 @@ #include "dxc/HLSL/DxilOperations.h" #include "dxc/Support/Global.h" +#include "dxc/HLSL/DxilModule.h" +#include "dxc/HLSL/HLModule.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/IR/LLVMContext.h" @@ -430,7 +432,8 @@ static Type *GetOrCreateStructType(LLVMContext &Ctx, ArrayRef types, Stri // OP::OP(LLVMContext &Ctx, Module *pModule) : m_Ctx(Ctx) -, m_pModule(pModule) { +, m_pModule(pModule) +, m_LowPrecisionMode(DXIL::LowPrecisionMode::Undefined) { memset(m_pResRetType, 0, sizeof(m_pResRetType)); memset(m_pCBufferRetType, 0, sizeof(m_pCBufferRetType)); memset(m_OpCodeClassCache, 0, sizeof(m_OpCodeClassCache)); @@ -783,6 +786,23 @@ bool OP::GetOpCodeClass(const Function *F, OP::OpCodeClass &opClass) { return true; } +bool OP::UseMinPrecision() { + if (m_LowPrecisionMode == DXIL::LowPrecisionMode::Undefined) { + if (&m_pModule->GetDxilModule()) { + m_LowPrecisionMode = m_pModule->GetDxilModule().m_ShaderFlags.GetUseNativeLowPrecision() ? + DXIL::LowPrecisionMode::UseNativeLowPrecision : DXIL::LowPrecisionMode::UseMinPrecision; + } + else if (&m_pModule->GetHLModule()) { + m_LowPrecisionMode = m_pModule->GetHLModule().GetHLOptions().bUseMinPrecision ? + DXIL::LowPrecisionMode::UseMinPrecision : DXIL::LowPrecisionMode::UseNativeLowPrecision; + } + else { + DXASSERT(false, "otherwise module doesn't contain either HLModule or Dxil Module."); + } + } + return m_LowPrecisionMode == DXIL::LowPrecisionMode::UseMinPrecision; +} + llvm::Type *OP::GetOverloadType(OpCode OpCode, llvm::Function *F) { DXASSERT(F, "not work on nullptr"); Type *Ty = F->getReturnType(); @@ -940,15 +960,23 @@ Type *OP::GetCBufferRetType(Type *pOverloadType) { if (m_pCBufferRetType[TypeSlot] == nullptr) { string TypeName("dx.types.CBufRet."); TypeName += GetOverloadTypeName(TypeSlot); - if (!pOverloadType->isDoubleTy()) { - Type *FieldTypes[4] = { pOverloadType, pOverloadType, pOverloadType, pOverloadType }; - m_pCBufferRetType[TypeSlot] = GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); - } else { + if (pOverloadType->isDoubleTy()) { Type *FieldTypes[2] = { pOverloadType, pOverloadType }; m_pCBufferRetType[TypeSlot] = GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); } + else if (!UseMinPrecision() && pOverloadType->isHalfTy()) { + TypeName += ".8"; // dx.types.CBufRet.fp16.8 for buffer of 8 halves + Type *FieldTypes[8] = { + pOverloadType, pOverloadType, pOverloadType, pOverloadType, + pOverloadType, pOverloadType, pOverloadType, pOverloadType, + }; + m_pCBufferRetType[TypeSlot] = GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); + } + else { + Type *FieldTypes[4] = { pOverloadType, pOverloadType, pOverloadType, pOverloadType }; + m_pCBufferRetType[TypeSlot] = GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); + } } - return m_pCBufferRetType[TypeSlot]; } diff --git a/lib/HLSL/DxilTypeSystem.cpp b/lib/HLSL/DxilTypeSystem.cpp index b45320d98..832b880fb 100644 --- a/lib/HLSL/DxilTypeSystem.cpp +++ b/lib/HLSL/DxilTypeSystem.cpp @@ -8,6 +8,8 @@ /////////////////////////////////////////////////////////////////////////////// #include "dxc/HLSL/DxilTypeSystem.h" +#include "dxc/HLSL/DxilModule.h" +#include "dxc/HLSL/HLModule.h" #include "dxc/Support/Global.h" #include "llvm/IR/Module.h" @@ -194,8 +196,8 @@ void DxilFunctionFPFlag::SetFlagValue(const uint32_t flag) { // DxilStructAnnotationSystem class methods. // DxilTypeSystem::DxilTypeSystem(Module *pModule) -: m_pModule(pModule) { -} + : m_pModule(pModule), + m_LowPrecisionMode(DXIL::LowPrecisionMode::Undefined) {} DxilStructAnnotation *DxilTypeSystem::AddStructAnnotation(const StructType *pStructType) { DXASSERT_NOMSG(m_StructAnnotations.find(pStructType) == m_StructAnnotations.end()); @@ -451,4 +453,21 @@ DXIL::SigPointKind SigPointFromInputQual(DxilParamInputQual Q, DXIL::ShaderKind return DXIL::SigPointKind::Invalid; } +bool DxilTypeSystem::UseMinPrecision() { + if (m_LowPrecisionMode == DXIL::LowPrecisionMode::Undefined) { + if (&m_pModule->GetDxilModule()) { + m_LowPrecisionMode = m_pModule->GetDxilModule().m_ShaderFlags.GetUseNativeLowPrecision() ? + DXIL::LowPrecisionMode::UseNativeLowPrecision : DXIL::LowPrecisionMode::UseMinPrecision; + } + else if (&m_pModule->GetHLModule()) { + m_LowPrecisionMode = m_pModule->GetHLModule().GetHLOptions().bUseMinPrecision ? + DXIL::LowPrecisionMode::UseMinPrecision : DXIL::LowPrecisionMode::UseNativeLowPrecision; + } + else { + DXASSERT(false, "otherwise module doesn't contain either HLModule or Dxil Module."); + } + } + return m_LowPrecisionMode == DXIL::LowPrecisionMode::UseMinPrecision; +} + } // namespace hlsl diff --git a/lib/HLSL/DxilUtil.cpp b/lib/HLSL/DxilUtil.cpp index 9782887a3..da9f30103 100644 --- a/lib/HLSL/DxilUtil.cpp +++ b/lib/HLSL/DxilUtil.cpp @@ -13,6 +13,7 @@ #include "llvm/IR/GlobalVariable.h" #include "dxc/HLSL/DxilTypeSystem.h" #include "dxc/HLSL/DxilUtil.h" +#include "dxc/HLSL/DxilModule.h" #include "llvm/IR/Module.h" using namespace llvm; @@ -35,12 +36,14 @@ unsigned GetLegacyCBufferFieldElementSize(DxilFieldAnnotation &fieldAnnotation, llvm::Type *Ty, DxilTypeSystem &typeSys) { + while (isa(Ty)) { Ty = Ty->getArrayElementType(); } // Bytes. - unsigned compSize = fieldAnnotation.GetCompType().Is64Bit()?8:4; + CompType compType = fieldAnnotation.GetCompType(); + unsigned compSize = compType.Is64Bit() ? 8 : compType.Is16Bit() && !typeSys.UseMinPrecision() ? 2 : 4; unsigned fieldSize = compSize; if (Ty->isVectorTy()) { fieldSize *= Ty->getVectorNumElements(); diff --git a/lib/HLSL/DxilValidation.cpp b/lib/HLSL/DxilValidation.cpp index ac43283f9..f47e7a5b3 100644 --- a/lib/HLSL/DxilValidation.cpp +++ b/lib/HLSL/DxilValidation.cpp @@ -1999,7 +1999,8 @@ static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) { unsigned EltNum = ST->getNumElements(); switch (EltNum) { case 2: - case 4: { + case 4: + case 8: { // 2 for doubles, 8 for halfs. Type *EltTy = ST->getElementType(0); return ST == hlslOP->GetCBufferRetType(EltTy); } break; diff --git a/lib/HLSL/HLModule.cpp b/lib/HLSL/HLModule.cpp index 4eef68144..d89f7e461 100644 --- a/lib/HLSL/HLModule.cpp +++ b/lib/HLSL/HLModule.cpp @@ -862,6 +862,12 @@ const char *HLModule::GetLegacyDataLayoutDesc() { return kLegacyLayoutString.data(); } +// New data layout with native low precision types +static const StringRef kNewLayoutString = "e-m:e-p:32:32-i1:32:32-i8:32:32-i16:32:32-i64:64-f16:16-f80:32-n8:16:32-a:0:32-S320"; +const char *HLModule::GetNewDataLayoutDesc() { + return kNewLayoutString.data(); +} + static Value *MergeGEP(GEPOperator *SrcGEP, GetElementPtrInst *GEP) { IRBuilder<> Builder(GEP); SmallVector Indices; diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index a0659cbe7..6939328b5 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -4787,15 +4787,18 @@ Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx, unsigned channelOffset, Type *EltTy, unsigned vecSize, OP *hlslOP, IRBuilder<> &Builder) { - DXASSERT((channelOffset + vecSize) <= 4, "legacy cbuffer don't across 16 bytes register."); Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy); Type *i1Ty = Type::getInt1Ty(EltTy->getContext()); Type *doubleTy = Type::getDoubleTy(EltTy->getContext()); Type *i64Ty = Type::getInt64Ty(EltTy->getContext()); + Type *halfTy = Type::getHalfTy(EltTy->getContext()); + bool isBool = EltTy == i1Ty; bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty); - bool isNormal = !isBool && !is64; + bool is16 = EltTy == halfTy && !hlslOP->UseMinPrecision(); + bool isNormal = !isBool && !is64 && !is16; + DXASSERT(is16 || (channelOffset + vecSize) <= 4, "legacy cbuffer don't across 16 bytes register."); if (isNormal) { Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy); Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx}); @@ -4805,10 +4808,21 @@ Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx, Result = Builder.CreateInsertElement(Result, NewElt, i); } return Result; - } else if (is64) { + } else if (is16) { Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy); Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx}); Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize)); + // index aligned by 2 bytes not 4 bytes + channelOffset *= 2; + for (unsigned i = 0; i < vecSize; ++i) { + Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i); + Result = Builder.CreateInsertElement(Result, NewElt, i); + } + return Result; + } else if (is64) { + Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy); + Value *loadLegacy = Builder.CreateCall(CBLoad, { OpArg, handle, legacyIdx }); + Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize)); unsigned smallVecSize = 2; if (vecSize < smallVecSize) smallVecSize = vecSize; diff --git a/tools/clang/include/clang/Basic/LangOptions.h b/tools/clang/include/clang/Basic/LangOptions.h index 082c69bd6..ccf6cb444 100644 --- a/tools/clang/include/clang/Basic/LangOptions.h +++ b/tools/clang/include/clang/Basic/LangOptions.h @@ -156,7 +156,7 @@ public: unsigned RootSigMajor; unsigned RootSigMinor; bool IsHLSLLibrary; - bool NoMinPrecision; // use strict precision, not min precision. + bool UseMinPrecision; // use min precision, not native precision. // MS Change Ends bool SPIRV = false; // SPIRV Change diff --git a/tools/clang/lib/CodeGen/CGHLSLMS.cpp b/tools/clang/lib/CodeGen/CGHLSLMS.cpp index 0898c762b..bd11bd17c 100644 --- a/tools/clang/lib/CodeGen/CGHLSLMS.cpp +++ b/tools/clang/lib/CodeGen/CGHLSLMS.cpp @@ -311,7 +311,7 @@ void clang::CompileRootSignature( // CGMSHLSLRuntime::CGMSHLSLRuntime(CodeGenModule &CGM) : CGHLSLRuntime(CGM), Context(CGM.getLLVMContext()), EntryFunc(nullptr), - TheModule(CGM.getModule()), legacyLayout(HLModule::GetLegacyDataLayoutDesc()), + TheModule(CGM.getModule()), legacyLayout(CGM.getLangOpts().UseMinPrecision ? HLModule::GetLegacyDataLayoutDesc() : HLModule::GetNewDataLayoutDesc()), CBufferType( llvm::StructType::create(TheModule.getContext(), "ConstantBuffer")) { const hlsl::ShaderModel *SM = @@ -348,6 +348,9 @@ CGMSHLSLRuntime::CGMSHLSLRuntime(CodeGenModule &CGM) opts.bLegacyCBufferLoad = !CGM.getCodeGenOpts().HLSLNotUseLegacyCBufLoad; opts.bAllResourcesBound = CGM.getCodeGenOpts().HLSLAllResourcesBound; opts.PackingStrategy = CGM.getCodeGenOpts().HLSLSignaturePackingStrategy; + + opts.bUseMinPrecision = CGM.getLangOpts().UseMinPrecision; + m_pHLModule->SetHLOptions(opts); m_pHLModule->SetValidatorVersion(CGM.getCodeGenOpts().HLSLValidatorMajorVer, CGM.getCodeGenOpts().HLSLValidatorMinorVer); @@ -385,6 +388,7 @@ CGMSHLSLRuntime::CGMSHLSLRuntime(CodeGenModule &CGM) // set Float Denorm Mode m_pHLModule->SetFPDenormMode(CGM.getCodeGenOpts().HLSLFlushFPDenorm); + } bool CGMSHLSLRuntime::IsHlslObjectType(llvm::Type *Ty) { @@ -484,7 +488,7 @@ StringToTessOutputPrimitive(StringRef primitive) { } static unsigned AlignTo8Bytes(unsigned offset, bool b8BytesAlign) { - DXASSERT((offset & 0x3) == 0, "offset should be divisible by 4"); + DXASSERT((offset & 0x1) == 0, "offset should be divisible by 2"); if (!b8BytesAlign) return offset; else if ((offset & 0x7) == 0) @@ -2605,11 +2609,16 @@ void CGMSHLSLRuntime::SetEntryFunction() { // Here the size is CB size. So don't need check type. static unsigned AlignCBufferOffset(unsigned offset, unsigned size, llvm::Type *Ty) { + DXASSERT(!(offset & 1), "otherwise we have an invalid offset."); // offset is already 4 bytes aligned. bool b8BytesAlign = Ty->isDoubleTy(); if (llvm::IntegerType *IT = dyn_cast(Ty)) { b8BytesAlign = IT->getBitWidth() > 32; } + // If offset is divisible by 2 and not 4, then increase the offset by 2 for dword alignment. + if (!Ty->getScalarType()->isHalfTy() && (offset & 0x2)) { + offset += 2; + } // Align it to 4 x 4bytes. if (unsigned remainder = (offset & 0xf)) { diff --git a/tools/clang/lib/Frontend/CompilerInvocation.cpp b/tools/clang/lib/Frontend/CompilerInvocation.cpp index f88bf6fe2..efac1e686 100644 --- a/tools/clang/lib/Frontend/CompilerInvocation.cpp +++ b/tools/clang/lib/Frontend/CompilerInvocation.cpp @@ -1733,7 +1733,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, // Enable low precision for HLSL 2018 // TODO: should we tie low precision to HLSL2018 only? - Opts.NoMinPrecision = Args.hasArg(options::OPT_no_min_precision); + Opts.UseMinPrecision = !Args.hasArg(options::OPT_no_min_precision); #endif // #ifdef MS_SUPPORT_VARIABLE_LANGOPTS } diff --git a/tools/clang/lib/Sema/SemaExpr.cpp b/tools/clang/lib/Sema/SemaExpr.cpp index 60dd32fcf..54229cf6d 100644 --- a/tools/clang/lib/Sema/SemaExpr.cpp +++ b/tools/clang/lib/Sema/SemaExpr.cpp @@ -3370,7 +3370,7 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { else if (getLangOpts().HLSL && Literal.isLong) Ty = Context.DoubleTy; else if (getLangOpts().HLSL && Literal.isHalf) { - Ty = getLangOpts().NoMinPrecision ? Context.HalfTy : Context.FloatTy; + Ty = getLangOpts().UseMinPrecision ? Context.FloatTy : Context.HalfTy; } // HLSL Change Ends else if (!Literal.isLong) diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index ca726b21f..1eef6d83a 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -3037,16 +3037,16 @@ public: void WarnMinPrecision(HLSLScalarType type, SourceLocation loc) { // TODO: enalbe this once we introduce precise master option - bool NoMinPrecision = m_context->getLangOpts().NoMinPrecision; + bool UseMinPrecision = m_context->getLangOpts().UseMinPrecision; if (type == HLSLScalarType_int_min12) { const char *PromotedType = "min16int"; // TODO: print int16 once we support true int16/uint16 support. m_sema->Diag(loc, diag::warn_hlsl_sema_minprecision_promotion) << "min12int" << PromotedType; } else if (type == HLSLScalarType_float_min10) { - const char *PromotedType = NoMinPrecision ? "half": "min16float"; + const char *PromotedType = UseMinPrecision ? "min16float": "half"; m_sema->Diag(loc, diag::warn_hlsl_sema_minprecision_promotion) << "min10float" << PromotedType; } - if (NoMinPrecision) { + if (!UseMinPrecision) { if (type == HLSLScalarType_float_min16) { m_sema->Diag(loc, diag::warn_hlsl_sema_minprecision_promotion) << "min16float" << "half"; } @@ -3287,7 +3287,7 @@ public: case BuiltinType::Bool: return AR_BASIC_BOOL; case BuiltinType::Double: return AR_BASIC_FLOAT64; case BuiltinType::Float: return AR_BASIC_FLOAT32; - case BuiltinType::Half: return m_context->getLangOpts().NoMinPrecision ? AR_BASIC_FLOAT16 : AR_BASIC_MIN16FLOAT; + case BuiltinType::Half: return m_context->getLangOpts().UseMinPrecision ? AR_BASIC_MIN16FLOAT : AR_BASIC_FLOAT16; case BuiltinType::Int: return AR_BASIC_INT32; case BuiltinType::UInt: return AR_BASIC_UINT32; case BuiltinType::Short: return AR_BASIC_MIN16INT; // rather than AR_BASIC_INT16 @@ -3394,7 +3394,7 @@ public: case AR_OBJECT_NULL: return m_context->VoidTy; case AR_BASIC_BOOL: return m_context->BoolTy; case AR_BASIC_LITERAL_FLOAT: return m_context->LitFloatTy; - case AR_BASIC_FLOAT16: return m_context->getLangOpts().NoMinPrecision ? m_context->HalfTy : m_context->FloatTy; + case AR_BASIC_FLOAT16: return m_context->getLangOpts().UseMinPrecision ? m_context->FloatTy : m_context->HalfTy; case AR_BASIC_FLOAT32_PARTIAL_PRECISION: return m_context->FloatTy; case AR_BASIC_FLOAT32: return m_context->FloatTy; case AR_BASIC_FLOAT64: return m_context->DoubleTy; @@ -4424,7 +4424,7 @@ void HLSLExternalSource::AddBaseTypes() m_baseTypes[HLSLScalarType_int] = m_context->IntTy; m_baseTypes[HLSLScalarType_uint] = m_context->UnsignedIntTy; m_baseTypes[HLSLScalarType_dword] = m_context->UnsignedIntTy; - m_baseTypes[HLSLScalarType_half] = m_context->getLangOpts().NoMinPrecision ? m_context->HalfTy : m_context->FloatTy; + m_baseTypes[HLSLScalarType_half] = m_context->getLangOpts().UseMinPrecision ? m_context->FloatTy : m_context->HalfTy; m_baseTypes[HLSLScalarType_float] = m_context->FloatTy; m_baseTypes[HLSLScalarType_double] = m_context->DoubleTy; m_baseTypes[HLSLScalarType_float_min10] = m_context->HalfTy; diff --git a/tools/clang/test/CodeGenHLSL/cbufferHalf.hlsl b/tools/clang/test/CodeGenHLSL/cbufferHalf.hlsl new file mode 100644 index 000000000..5c0d63cf8 --- /dev/null +++ b/tools/clang/test/CodeGenHLSL/cbufferHalf.hlsl @@ -0,0 +1,110 @@ +// RUN: %dxc -E main -T ps_6_0 -no-min-precision %s | FileCheck %s + +// CHECK: Use native low precision +// CHECK: cbuffer Foo +// CHECK: { +// CHECK: struct dx.alignment.legacy.Foo +// CHECK: { +// CHECK: half f_h1; ; Offset: 0 +// CHECK: float3 f_f3; ; Offset: 4 + +// CHECK: half2 f_h2; ; Offset: 16 +// CHECK: float3 f_f3_1; ; Offset: 20 + +// CHECK: float2 f_f2; ; Offset: 32 +// CHECK: half4 f_h4; ; Offset: 40 + +// CHECK: half2 f_h2_1; ; Offset: 48 +// CHECK: half3 f_h3; ; Offset: 52 + +// CHECK: double f_d1; ; Offset: 64 +// CHECK: } Foo ; Offset: 0 Size: 72 +// CHECK: } + +// CHECK: cbuffer Bar +// CHECK: { +// CHECK: struct dx.alignment.legacy.Bar +// CHECK: { +// CHECK: half b_h1; ; Offset: 0 +// CHECK: half b_h2; ; Offset: 2 +// CHECK: half b_h3; ; Offset: 4 +// CHECK: half2 b_h4; ; Offset: 6 +// CHECK: half3 b_h5; ; Offset: 10 + +// CHECK: half3 b_h7; ; Offset: 16 +// CHECK: half4 b_h8; ; Offset: 22 +// CHECK: half b_h9; ; Offset: 30 + +// CHECK: half4 b_h10; ; Offset: 32 +// CHECK: half3 b_h11; ; Offset: 40 + +// CHECK: half2 b_h12; ; Offset: 48 +// CHECK: half3 b_h13; ; Offset: 52 +// CHECK: half2 b_h14; ; Offset: 58 +// CHECK: } Bar ; Offset: 0 Size: 62 +// CHECK: } + +// CHECK: %dx.types.CBufRet.f16.8 = type { half, half, half, half, half, half, half, half } + +// CHECK: %Foo_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false) ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex) +// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 0) ; CBufferLoadLegacy(handle,regIndex) +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0 +// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 0) ; CBufferLoadLegacy(handle,regIndex) +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 1 +// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 1) ; CBufferLoadLegacy(handle,regIndex) +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0 +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1 +// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 1) ; CBufferLoadLegacy(handle,regIndex) +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 3 +// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 2) ; CBufferLoadLegacy(handle,regIndex) +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 0 +// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 2) ; CBufferLoadLegacy(handle,regIndex) +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4 +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 5 +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 6 +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 7 +// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 3) ; CBufferLoadLegacy(handle,regIndex) +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0 +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1 +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2 +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 3 +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4 +// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 4) ; CBufferLoadLegacy(handle,regIndex) +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 0 + +cbuffer Foo { + half f_h1; + float3 f_f3; + half2 f_h2; + float3 f_f3_1; + float2 f_f2; + half4 f_h4; + half2 f_h2_1; + half3 f_h3; + double f_d1; +} + +cbuffer Bar { + half b_h1; + half b_h2; + half b_h3; + half2 b_h4; + half3 b_h5; + + half3 b_h7; + half4 b_h8; + half b_h9; + + half4 b_h10; + half3 b_h11; + + half2 b_h12; + half3 b_h13; + half2 b_h14; +} + +float4 main() : SV_Target { + return f_h1 + f_f3.x + f_h2.x + f_h2.y + f_f3_1.z + f_f2.x + f_h4.x + f_h4.y + + f_h4.z + f_h4.w + f_h2_1.x + f_h2_1.y + f_h3.x + f_h3.y + f_h3.z + f_d1 + + b_h1; +} \ No newline at end of file diff --git a/tools/clang/test/CodeGenHLSL/cbufferMinPrec.hlsl b/tools/clang/test/CodeGenHLSL/cbufferMinPrec.hlsl new file mode 100644 index 000000000..cfa2576e1 --- /dev/null +++ b/tools/clang/test/CodeGenHLSL/cbufferMinPrec.hlsl @@ -0,0 +1,62 @@ +// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s + +// CHECK: Minimum-precision data types +// CHECK: cbuffer Foo +// CHECK: { +// CHECK: struct dx.alignment.legacy.Foo +// CHECK: { +// CHECK: min16float h1; ; Offset: 0 +// CHECK: float3 f3; ; Offset: 4 +// CHECK: min16float2 h2; ; Offset: 16 +// CHECK: float3 f3_1; ; Offset: 32 +// CHECK: float2 f2; ; Offset: 48 +// CHECK: min16float4 h4; ; Offset: 64 +// CHECK: min16float2 h2_1; ; Offset: 80 +// CHECK: min16float3 h3; ; Offset: 96 +// CHECK: double d1; ; Offset: 112 +// CHECK: } Foo ; Offset: 0 Size: 120 +// CHECK: } + +// CHECK: %dx.types.CBufRet.f16 = type { half, half, half, half } + +// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 0) ; CBufferLoadLegacy(handle,regIndex) +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0 +// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 0) ; CBufferLoadLegacy(handle,regIndex) +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 1 +// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 1) ; CBufferLoadLegacy(handle,regIndex) +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0 +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 1 +// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 2) ; CBufferLoadLegacy(handle,regIndex) +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 2 +// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 3) ; CBufferLoadLegacy(handle,regIndex) +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 0 +// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 4) ; CBufferLoadLegacy(handle,regIndex) +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0 +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 1 +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 2 +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 3 +// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 5) ; CBufferLoadLegacy(handle,regIndex) +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0 +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 1 +// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 6) ; CBufferLoadLegacy(handle,regIndex) +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0 +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 1 +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 2 +// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 7) ; CBufferLoadLegacy(handle,regIndex) +// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 0 + +cbuffer Foo { + min16float h1; + float3 f3; + min16float2 h2; + float3 f3_1; + float2 f2; + min16float4 h4; + min16float2 h2_1; + min16float3 h3; + double d1; +} + +float4 main() : SV_Target { + return h1 + f3.x + h2.x + h2.y + f3_1.z + f2.x + h4.x + h4.y + h4.z + h4.w + h2_1.x + h2_1.y + h3.x + h3.y + h3.z + d1; +} \ No newline at end of file diff --git a/tools/clang/tools/dxcompiler/dxcdisassembler.cpp b/tools/clang/tools/dxcompiler/dxcdisassembler.cpp index 76ea7195a..2e5047996 100644 --- a/tools/clang/tools/dxcompiler/dxcdisassembler.cpp +++ b/tools/clang/tools/dxcompiler/dxcdisassembler.cpp @@ -313,6 +313,7 @@ PCSTR g_pFeatureInfoNames[] = { "64-Bit integer", "View Instancing", "Barycentrics", + "Use native low precision" }; static_assert(_countof(g_pFeatureInfoNames) == ShaderFeatureInfoCount, "g_pFeatureInfoNames needs to be updated"); @@ -565,12 +566,12 @@ void PrintStructLayout(StructType *ST, DxilTypeSystem &typeSys, unsigned sizeOfStruct = 0); void PrintTypeAndName(llvm::Type *Ty, DxilFieldAnnotation &annotation, - std::string &StreamStr, unsigned arraySize) { + std::string &StreamStr, unsigned arraySize, bool minPrecision) { raw_string_ostream Stream(StreamStr); while (Ty->isArrayTy()) Ty = Ty->getArrayElementType(); - const char *compTyName = annotation.GetCompType().GetHLSLName(); + const char *compTyName = annotation.GetCompType().GetHLSLName(minPrecision); if (annotation.HasMatrixAnnotation()) { const DxilMatrixAnnotation &Matrix = annotation.GetMatrixAnnotation(); switch (Matrix.Orientation) { @@ -650,7 +651,7 @@ void PrintFieldLayout(llvm::Type *Ty, DxilFieldAnnotation &annotation, } else { (OS << comment).indent(indent); std::string NameTypeStr; - PrintTypeAndName(Ty, annotation, NameTypeStr, arraySize); + PrintTypeAndName(Ty, annotation, NameTypeStr, arraySize, typeSys.UseMinPrecision()); OS << left_justify(NameTypeStr, offsetIndent); // Offset diff --git a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp index b88e771d3..ccf59cd91 100644 --- a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp +++ b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp @@ -789,7 +789,7 @@ public: compiler.getLangOpts().HLSL2016 = Opts.HLSL2016; compiler.getLangOpts().HLSL2017 = Opts.HLSL2017; - compiler.getLangOpts().NoMinPrecision = Opts.NoMinPrecision; + compiler.getLangOpts().UseMinPrecision = !Opts.NoMinPrecision; // SPIRV change starts #ifdef ENABLE_SPIRV_CODEGEN diff --git a/tools/clang/unittests/HLSL/CompilerTest.cpp b/tools/clang/unittests/HLSL/CompilerTest.cpp index 73de596f0..bb72388b4 100644 --- a/tools/clang/unittests/HLSL/CompilerTest.cpp +++ b/tools/clang/unittests/HLSL/CompilerTest.cpp @@ -478,7 +478,9 @@ public: TEST_METHOD(CodeGenCbuffer6_51) TEST_METHOD(CodeGenCbufferAlloc) TEST_METHOD(CodeGenCbufferAllocLegacy) + TEST_METHOD(CodeGenCbufferHalf) TEST_METHOD(CodeGenCbufferInLoop) + TEST_METHOD(CodeGenCbufferMinPrec) TEST_METHOD(CodeGenClass) TEST_METHOD(CodeGenClip) TEST_METHOD(CodeGenClipPlanes) @@ -3051,10 +3053,18 @@ TEST_F(CompilerTest, CodeGenCbufferAllocLegacy) { CodeGenTestCheck(L"..\\CodeGenHLSL\\cbufferAlloc_legacy.hlsl"); } +TEST_F(CompilerTest, CodeGenCbufferHalf) { + CodeGenTestCheck(L"..\\CodeGenHLSL\\cbufferHalf.hlsl"); +} + TEST_F(CompilerTest, CodeGenCbufferInLoop) { CodeGenTest(L"..\\CodeGenHLSL\\cbufferInLoop.hlsl"); } +TEST_F(CompilerTest, CodeGenCbufferMinPrec) { + CodeGenTestCheck(L"..\\CodeGenHLSL\\cbufferMinPrec.hlsl"); +} + TEST_F(CompilerTest, CodeGenClass) { CodeGenTestCheck(L"..\\CodeGenHLSL\\class.hlsl"); }