From 467d0a193a57eb8acf9f846bde292432008b9b52 Mon Sep 17 00:00:00 2001 From: Mikko Strandborg Date: Sun, 10 Jun 2018 10:08:26 +0300 Subject: [PATCH] Bring changes from upstream ( 281bc7d8cfdb ) --- include/ShaderInfo.h | 16 +- include/UnityInstancingFlexibleArraySize.h | 15 + include/hlslcc.h | 127 +- src/ControlFlowGraph.cpp | 2 +- src/DataTypeAnalysis.cpp | 31 +- src/HLSLCrossCompilerContext.cpp | 50 +- src/HLSLcc.cpp | 6 +- src/HLSLccToolkit.cpp | 30 +- src/LoopTransform.cpp | 41 +- src/Operand.cpp | 24 +- src/ShaderInfo.cpp | 10 +- src/decode.cpp | 9 +- src/internal_includes/ControlFlowGraph.h | 9 - src/internal_includes/Declaration.h | 3 +- .../HLSLCrossCompilerContext.h | 9 +- src/internal_includes/HLSLccToolkit.h | 11 +- src/internal_includes/LoopTransform.h | 4 +- src/internal_includes/Operand.h | 11 - src/internal_includes/Shader.h | 5 +- src/internal_includes/Translator.h | 2 +- src/internal_includes/languages.h | 73 +- src/internal_includes/toGLSL.h | 42 +- src/internal_includes/toMetal.h | 84 +- src/internal_includes/tokens.h | 21 +- src/reflect.cpp | 4 + src/toGLSL.cpp | 308 ++++- src/toGLSLDeclaration.cpp | 1062 +++++++++++------ src/toGLSLInstruction.cpp | 793 +++++++----- src/toGLSLOperand.cpp | 192 ++- src/toMetal.cpp | 660 +++++++++- src/toMetalDeclaration.cpp | 720 +++++++---- src/toMetalInstruction.cpp | 251 ++-- src/toMetalOperand.cpp | 226 +++- 33 files changed, 3494 insertions(+), 1357 deletions(-) create mode 100644 include/UnityInstancingFlexibleArraySize.h diff --git a/include/ShaderInfo.h b/include/ShaderInfo.h index 4d0b298..30791b3 100644 --- a/include/ShaderInfo.h +++ b/include/ShaderInfo.h @@ -6,6 +6,7 @@ #include #include "growing_array.h" #include + //Reflection #define MAX_RESOURCE_BINDINGS 256 @@ -109,6 +110,14 @@ enum TESSELLATOR_OUTPUT_PRIMITIVE TESSELLATOR_OUTPUT_TRIANGLE_CCW = 4 }; +typedef enum TESSELLATOR_DOMAIN +{ + TESSELLATOR_DOMAIN_UNDEFINED = 0, + TESSELLATOR_DOMAIN_ISOLINE = 1, + TESSELLATOR_DOMAIN_TRI = 2, + TESSELLATOR_DOMAIN_QUAD = 3 +} TESSELLATOR_DOMAIN; + enum SPECIAL_NAME { NAME_UNDEFINED = 0, @@ -232,6 +241,7 @@ struct ResourceBinding RESOURCE_RETURN_TYPE ui32ReturnType; uint32_t ui32NumSamples; REFLECT_RESOURCE_PRECISION ePrecision; + int m_SamplerMode; // (SB_SAMPLER_MODE) For samplers, this is the sampler mode this sampler is declared with SHADER_VARIABLE_TYPE GetDataType() const { @@ -462,7 +472,7 @@ public: int32_t* pi32Rebase, uint32_t flags); - static std::string GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector &indices, const std::string dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors); + static std::string GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, const std::vector& indices, const std::string& dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors); // Apply shader precision information to resource bindings void AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info); @@ -491,5 +501,9 @@ public: TESSELLATOR_PARTITIONING eTessPartitioning; TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim; + uint32_t ui32TessInputControlPointCount; + uint32_t ui32TessOutputControlPointCount; + TESSELLATOR_DOMAIN eTessDomain; + bool bEarlyFragmentTests; }; diff --git a/include/UnityInstancingFlexibleArraySize.h b/include/UnityInstancingFlexibleArraySize.h new file mode 100644 index 0000000..6a6dd00 --- /dev/null +++ b/include/UnityInstancingFlexibleArraySize.h @@ -0,0 +1,15 @@ +#pragma once + +// In Unity, instancing array sizes should be able to be dynamically patched at runtime by defining the macro. + +#include +#define UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "UNITY_RUNTIME_INSTANCING_ARRAY_SIZE" + +const unsigned int kArraySizeConstantID = 0; + +// TODO: share with Runtime/GfxDevice/InstancingUtilities.h +inline bool IsUnityInstancingConstantBufferName(const char* cbName) +{ + static const char kInstancedCbNamePrefix[] = "UnityInstancing"; + return strncmp(cbName, kInstancedCbNamePrefix, sizeof(kInstancedCbNamePrefix) - 1) == 0; +} diff --git a/include/hlslcc.h b/include/hlslcc.h index c367901..ce32214 100644 --- a/include/hlslcc.h +++ b/include/hlslcc.h @@ -4,6 +4,7 @@ #include #include #include +#include #if defined (_WIN32) && defined(HLSLCC_DYNLIB) #define HLSLCC_APIENTRY __stdcall @@ -48,6 +49,7 @@ typedef struct GlExtensions { } GlExtensions; #include "ShaderInfo.h" +#include "UnityInstancingFlexibleArraySize.h" typedef std::vector TextureSamplerPairs; @@ -123,6 +125,88 @@ typedef enum // Using a texture or uniform name like this will cause conflicts #define HLSLCC_TEMP_PREFIX "u_xlat" +typedef std::vector> MemberDefinitions; + +// We store struct definition contents inside a vector of strings +struct StructDefinition +{ + StructDefinition() : m_Members(), m_Dependencies(), m_IsPrinted(false) {} + + MemberDefinitions m_Members; // A vector of strings with the struct members + std::vector m_Dependencies; // A vector of struct names this struct depends on. + bool m_IsPrinted; // Has this struct been printed out yet? +}; + +typedef std::map StructDefinitions; + +// Map of extra function definitions we need to add before the shader body but after the declarations. +typedef std::map FunctionDefinitions; + +// A helper class for allocating binding slots +// (because both UAVs and textures use the same slots in Metal, also constant buffers and other buffers etc) +class BindingSlotAllocator +{ + typedef std::map SlotMap; + SlotMap m_Allocations; + uint32_t m_ShaderStageAllocations; +public: + BindingSlotAllocator() : m_Allocations(), m_ShaderStageAllocations(0) + { + for(int i = MAX_RESOURCE_BINDINGS-1; i >= 0; i --) + m_FreeSlots.push_back(i); + } + + enum BindType + { + ConstantBuffer = 0, + RWBuffer, + Texture, + UAV + }; + + uint32_t GetBindingSlot(uint32_t regNo, BindType type) + { + // The key is regNumber with the bindtype stored to highest 16 bits + uint32_t key = (m_ShaderStageAllocations + regNo) | (uint32_t(type) << 16); + SlotMap::iterator itr = m_Allocations.find(key); + if(itr == m_Allocations.end()) + { + uint32_t slot = m_FreeSlots.back(); + m_FreeSlots.pop_back(); + m_Allocations.insert(std::make_pair(key, slot)); + return slot; + } + return itr->second; + } + + // Func for reserving binding slots with the original reg number. + // Used for fragment shader UAVs (SetRandomWriteTarget etc). + void ReserveBindingSlot(uint32_t regNo, BindType type) + { + uint32_t key = regNo | (uint32_t(type) << 16); + m_Allocations.insert(std::make_pair(key, regNo)); + + // Remove regNo from free slots + for (int i = m_FreeSlots.size() - 1; i >= 0; i--) + { + if (m_FreeSlots[i] == regNo) + { + m_FreeSlots.erase(m_FreeSlots.begin() + i); + return; + } + } + } + + uint32_t SaveTotalShaderStageAllocationsCount() + { + m_ShaderStageAllocations = m_Allocations.size(); + return m_ShaderStageAllocations; + } + +private: + std::vector m_FreeSlots; +}; + //The shader stages (Vertex, Pixel et al) do not depend on each other //in HLSL. GLSL is a different story. HLSLCrossCompiler requires //that hull shaders must be compiled before domain shaders, and @@ -207,6 +291,10 @@ public: GLSLCrossDependencyData() : eTessPartitioning(), eTessOutPrim(), + fMaxTessFactor(64.0), + numPatchesInThreadGroup(0), + hasControlPoint(false), + hasPatchConstant(false), ui32ProgramStages(0), m_ExtBlendModes(), m_NextSpecID(0) @@ -290,6 +378,10 @@ public: //can be saved when compiling hull and passed to domain compilation. TESSELLATOR_PARTITIONING eTessPartitioning; TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim; + float fMaxTessFactor; + int numPatchesInThreadGroup; + bool hasControlPoint; + bool hasPatchConstant; // Bitfield for the shader stages this program is going to include (see PS_FLAG_*). // Needed so we can construct proper shader input and output names @@ -313,6 +405,28 @@ public: pixelInterpolation[regNo] = mode; } + struct CompareFirst + { + CompareFirst(std::string val) : m_Val (val) {} + bool operator()(const std::pair& elem) const + { + return m_Val == elem.first; + } + private: + std::string m_Val; + }; + + inline bool IsMemberDeclared(const std::string &name) + { + if (std::find_if(m_SharedFunctionMembers.begin(), m_SharedFunctionMembers.end(), CompareFirst(name)) != m_SharedFunctionMembers.end()) + return true; + return false; + } + + MemberDefinitions m_SharedFunctionMembers; + BindingSlotAllocator m_SharedTextureSlots, m_SharedSamplerSlots; + BindingSlotAllocator m_SharedBufferSlots; + inline void ClearCrossDependencyData() { pixelInterpolation.clear(); @@ -321,8 +435,9 @@ public: varyingLocationsMap[i].clear(); nextAvailableVaryingLocation[i] = 0; } - m_NextSpecID = 0; + m_NextSpecID = kArraySizeConstantID + 1; m_SpecConstantMap.clear(); + m_SharedFunctionMembers.clear(); } // Retrieve or allocate a layout slot for Vulkan specialization constant @@ -368,9 +483,11 @@ public: virtual bool OnConstant(const std::string &name, int bindIndex, SHADER_VARIABLE_TYPE cType, int rows, int cols, bool isMatrix, int arraySize) { return true; } virtual void OnConstantBufferBinding(const std::string &name, int bindIndex) {} - virtual void OnTextureBinding(const std::string &name, int bindIndex, int samplerIndex, HLSLCC_TEX_DIMENSION dim, bool isUAV) {} + virtual void OnTextureBinding(const std::string &name, int bindIndex, int samplerIndex, bool multisampled, HLSLCC_TEX_DIMENSION dim, bool isUAV) {} virtual void OnBufferBinding(const std::string &name, int bindIndex, bool isUAV) {} virtual void OnThreadGroupSize(unsigned int xSize, unsigned int ySize, unsigned int zSize) {} + virtual void OnTessellationInfo(uint32_t tessPartitionMode, uint32_t tessOutputWindingOrder, uint32_t tessMaxFactor, uint32_t tessNumPatchesInThreadGroup) {} + virtual void OnTessellationKernelInfo(uint32_t patchKernelBufferCount) {} }; @@ -460,6 +577,12 @@ static const unsigned int HLSLCC_FLAG_NVN_TARGET = 0x800000; // as long as they are part of the same final linked program. Uniform buffer instance names solve this cross-shader symbol conflict issue. static const unsigned int HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME = 0x1000000; +// Massage shader steps into Metal compute kernel from vertex/hull shaders + post-tessellation vertex shader from domain shader +static const unsigned int HLSLCC_FLAG_METAL_TESSELLATION = 0x2000000; + +// Disable fastmath +static const unsigned int HLSLCC_FLAG_DISABLE_FASTMATH = 0x4000000; + #ifdef __cplusplus extern "C" { #endif diff --git a/src/ControlFlowGraph.cpp b/src/ControlFlowGraph.cpp index 4fbd68e..6131924 100644 --- a/src/ControlFlowGraph.cpp +++ b/src/ControlFlowGraph.cpp @@ -341,7 +341,7 @@ void BasicBlock::RVarUnion(ReachableVariables &a, const ReachableVariables &b) #define UNITY_EXTERNAL_TOOL 1 #include "Testing.h" // From Runtime/Testing -UNIT_TEST_SUITE(HLSLccTests) +UNIT_TEST_SUITE(HLSLcc) { TEST(ControlFlowGraph_Build_Simple_Works) { diff --git a/src/DataTypeAnalysis.cpp b/src/DataTypeAnalysis.cpp index e99c3aa..2378ccc 100644 --- a/src/DataTypeAnalysis.cpp +++ b/src/DataTypeAnalysis.cpp @@ -451,15 +451,23 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext, break; case OPCODE_RESINFO: - { - if (psInst->eResInfoReturnType != RESINFO_INSTRUCTION_RETURN_UINT) - MarkAllOperandsAs(psInst, SVT_FLOAT, aeTempVecType); - break; - } + // Operand 0 depends on the return type declaration, op 1 is always uint + MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); + switch (psInst->eResInfoReturnType) + { + default: + case RESINFO_INSTRUCTION_RETURN_FLOAT: + case RESINFO_INSTRUCTION_RETURN_RCPFLOAT: + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + break; + case RESINFO_INSTRUCTION_RETURN_UINT: + MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); + break; + } case OPCODE_SAMPLE_INFO: - // TODO decode the _uint flag - MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + // Sample_info uses the same RESINFO_RETURN_TYPE for storage. 0 = float, 1 = uint. + MarkOperandAs(&psInst->asOperands[0], psInst->eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_FLOAT ? SVT_FLOAT : SVT_UINT, aeTempVecType); break; case OPCODE_SAMPLE_POS: @@ -469,6 +477,7 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext, case OPCODE_LD_UAV_TYPED: // translates to gvec4 loadImage(gimage i, ivec p). + MarkOperandAs(&psInst->asOperands[0], SVT_INT, aeTempVecType); MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p break; @@ -507,9 +516,13 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext, break; case OPCODE_F32TOF16: + MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType); + break; + case OPCODE_F16TOF32: - // TODO - ASSERT(0); + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); break; diff --git a/src/HLSLCrossCompilerContext.cpp b/src/HLSLCrossCompilerContext.cpp index fec51a8..77c29fa 100644 --- a/src/HLSLCrossCompilerContext.cpp +++ b/src/HLSLCrossCompilerContext.cpp @@ -8,6 +8,7 @@ #include "internal_includes/debug.h" #include "internal_includes/Translator.h" #include "internal_includes/ControlFlowGraph.h" +#include "include/hlslcc.h" #include void HLSLCrossCompilerContext::DoDataTypeAnalysis(ShaderPhase *psPhase) @@ -95,15 +96,26 @@ void HLSLCrossCompilerContext::AddIndentation() } } -void HLSLCrossCompilerContext::RequireExtension(const std::string &extName) +bool HLSLCrossCompilerContext::RequireExtension(const std::string &extName) { if (m_EnabledExtensions.find(extName) != m_EnabledExtensions.end()) - return; + return true; m_EnabledExtensions.insert(extName); - bformata(extensions, "#ifdef %s\n", extName.c_str()); bformata(extensions, "#extension %s : require\n", extName.c_str()); - bcatcstr(extensions, "#endif\n"); + return false; +} + +bool HLSLCrossCompilerContext::EnableExtension(const std::string &extName) +{ + if (m_EnabledExtensions.find(extName) != m_EnabledExtensions.end()) + return true; + + m_EnabledExtensions.insert(extName); + bformata(extensions, "#ifdef %s\n", extName.c_str()); + bformata(extensions, "#extension %s : enable\n", extName.c_str()); + bcatcstr(extensions, "#endif\n"); + return false; } std::string HLSLCrossCompilerContext::GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const @@ -133,9 +145,11 @@ std::string HLSLCrossCompilerContext::GetDeclaredInputName(const Operand* psOper if (psIn && piRebase) *piRebase = psIn->iRebase; + const std::string patchPrefix = psShader->eTargetLanguage == LANG_METAL ? "patch." : "patch"; std::string res = ""; + bool skipPrefix = false; - if (psTranslator->TranslateSystemValue(psOperand, psIn, res, puiIgnoreSwizzle, psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0, true, &skipPrefix)) + if (psTranslator->TranslateSystemValue(psOperand, psIn, res, puiIgnoreSwizzle, psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0, true, &skipPrefix, &iIgnoreRedirect)) { if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0) && !skipPrefix) return inputPrefix + res; @@ -144,7 +158,7 @@ std::string HLSLCrossCompilerContext::GetDeclaredInputName(const Operand* psOper } ASSERT(psIn != NULL); - oss << inputPrefix << (regSpace == 1 ? "patch" : "") << psIn->semanticName << psIn->ui32SemanticIndex; + oss << inputPrefix << (regSpace == 1 ? patchPrefix : "") << psIn->semanticName << psIn->ui32SemanticIndex; return oss.str(); } @@ -193,23 +207,25 @@ std::string HLSLCrossCompilerContext::GetDeclaredOutputName(const Operand* psOpe return oss.str(); } + const std::string patchPrefix = psShader->eTargetLanguage == LANG_METAL ? "patch." : "patch"; std::string res = ""; - if (psTranslator->TranslateSystemValue(psOperand, psOut, res, puiIgnoreSwizzle, psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber], false)) + + if (psTranslator->TranslateSystemValue(psOperand, psOut, res, puiIgnoreSwizzle, psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber], false, NULL, &iIgnoreRedirect)) { - // HACK: i couldnt find better way to handle it - // clip planes will always have interim variable, as HLSL operates on float4 but we need to size output accordingly with actual planes count - // for some reason TranslateSystemValue return *outSkipPrefix = true for ALL system vars and then we simply ignore it here - const bool isClipPlanes = psOut && psOut->eSystemValueType == NAME_CLIP_DISTANCE; - - if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0) && !isClipPlanes) + // clip/cull planes will always have interim variable, as HLSL operates on float4 but we need to size output accordingly with actual planes count + // with tessellation factor buffers, a separate buffer from output is used. for some reason TranslateSystemValue return *outSkipPrefix = true + // for ALL system vars and then we simply ignore it here, so opt to modify iIgnoreRedirect for these special cases + if (psShader->eTargetLanguage == LANG_METAL && regSpace == 0 && (iIgnoreRedirect == 0)) return outputPrefix + res; + else if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0)) + return patchPrefix + res; else return res; } ASSERT(psOut != NULL); - oss << outputPrefix << (regSpace == 1 ? "patch" : "") << psOut->semanticName << psOut->ui32SemanticIndex; + oss << outputPrefix << (regSpace == 1 ? patchPrefix : "") << psOut->semanticName << psOut->ui32SemanticIndex; return oss.str(); } @@ -275,3 +291,9 @@ bool HLSLCrossCompilerContext::OutputNeedsDeclaring(const Operand* psOperand, co return false; } + +bool HLSLCrossCompilerContext::IsVulkan() const +{ + return (flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; +} + diff --git a/src/HLSLcc.cpp b/src/HLSLcc.cpp index 9aa0bbf..5b894ff 100644 --- a/src/HLSLcc.cpp +++ b/src/HLSLcc.cpp @@ -80,8 +80,8 @@ HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader, if (language == LANG_METAL) { - // Tessellation or geometry shaders are not supported - if (psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER || psShader->eShaderType == GEOMETRY_SHADER) + // Geometry shader is not supported + if (psShader->eShaderType == GEOMETRY_SHADER) { result->sourceCode = ""; return 0; @@ -205,7 +205,7 @@ HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename, length = ftell(shaderFile); fseek(shaderFile, 0, SEEK_SET); - shader.resize(length + 1); + shader.reserve(length + 1); readLength = fread(&shader[0], 1, length, shaderFile); diff --git a/src/HLSLccToolkit.cpp b/src/HLSLccToolkit.cpp index d43d6d7..9a9a7c7 100644 --- a/src/HLSLccToolkit.cpp +++ b/src/HLSLccToolkit.cpp @@ -4,6 +4,8 @@ #include "internal_includes/toGLSLOperand.h" #include "internal_includes/HLSLCrossCompilerContext.h" #include "internal_includes/Shader.h" +#include "internal_includes/languages.h" +#include "include/UnityInstancingFlexibleArraySize.h" #include #include @@ -54,8 +56,7 @@ namespace HLSLcc return SVT_FLOAT; } - const char * GetConstructorForTypeGLSL(const SHADER_VARIABLE_TYPE eType, - const int components, bool useGLSLPrecision) + const char * GetConstructorForTypeGLSL(const HLSLCrossCompilerContext *context, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision) { static const char * const uintTypes[] = { " ", "uint", "uvec2", "uvec3", "uvec4" }; static const char * const uint16Types[] = { " ", "mediump uint", "mediump uvec2", "mediump uvec3", "mediump uvec4" }; @@ -68,11 +69,12 @@ namespace HLSLcc static const char * const boolTypes[] = { " ", "bool", "bvec2", "bvec3", "bvec4" }; ASSERT(components >= 1 && components <= 4); + bool emitLowp = EmitLowp(context); switch (eType) { case SVT_UINT: - return uintTypes[components]; + return HaveUnsignedTypes(context->psShader->eTargetLanguage) ? uintTypes[components] : intTypes[components]; case SVT_UINT16: return useGLSLPrecision ? uint16Types[components] : uintTypes[components]; case SVT_INT: @@ -80,13 +82,13 @@ namespace HLSLcc case SVT_INT16: return useGLSLPrecision ? int16Types[components] : intTypes[components]; case SVT_INT12: - return useGLSLPrecision ? int12Types[components] : intTypes[components]; + return useGLSLPrecision ? (emitLowp ? int12Types[components] : int16Types[components]) : intTypes[components]; case SVT_FLOAT: return floatTypes[components]; case SVT_FLOAT16: return useGLSLPrecision ? float16Types[components] : floatTypes[components]; case SVT_FLOAT10: - return useGLSLPrecision ? float10Types[components] : floatTypes[components]; + return useGLSLPrecision ? (emitLowp ? float10Types[components] : float16Types[components]) : floatTypes[components]; case SVT_BOOL: return boolTypes[components]; default: @@ -137,7 +139,7 @@ namespace HLSLcc if (psContext->psShader->eTargetLanguage == LANG_METAL) return GetConstructorForTypeMetal(eType, components); else - return GetConstructorForTypeGLSL(eType, components, useGLSLPrecision); + return GetConstructorForTypeGLSL(psContext, eType, components, useGLSLPrecision); } std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows) @@ -442,7 +444,7 @@ namespace HLSLcc } // Returns true if a direct constructor can convert src->dest - bool CanDoDirectCast(SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest) + bool CanDoDirectCast(const HLSLCrossCompilerContext *context, SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest) { // uint<->int<->bool conversions possible if ((src == SVT_INT || src == SVT_UINT || src == SVT_BOOL || src == SVT_INT12 || src == SVT_INT16 || src == SVT_UINT16) && @@ -454,9 +456,23 @@ namespace HLSLcc (dest == SVT_FLOAT || dest == SVT_DOUBLE || dest == SVT_FLOAT16 || dest == SVT_FLOAT10)) return true; + if (context->psShader->eTargetLanguage == LANG_METAL) + { + // avoid compiler error: cannot use as_type to cast from 'half' to 'unsigned int', types of different size + if ((src == SVT_FLOAT16 || src == SVT_FLOAT10) && (dest == SVT_UINT)) + return true; + } + return false; } + bool IsUnityFlexibleInstancingBuffer(const ConstantBuffer* psCBuf) + { + return psCBuf != NULL && psCBuf->asVars.size() == 1 + && psCBuf->asVars[0].sType.Class == SVC_STRUCT && psCBuf->asVars[0].sType.Elements == 2 + && IsUnityInstancingConstantBufferName(psCBuf->name.c_str()); + } + #ifndef fpcheck #ifdef _MSC_VER #define fpcheck(x) (_isnan(x) || !_finite(x)) diff --git a/src/LoopTransform.cpp b/src/LoopTransform.cpp index 60f6157..05c72cf 100644 --- a/src/LoopTransform.cpp +++ b/src/LoopTransform.cpp @@ -1,4 +1,5 @@ +#include "src/internal_includes/HLSLCrossCompilerContext.h" #include "src/internal_includes/LoopTransform.h" #include "src/internal_includes/Shader.h" #include "src/internal_includes/debug.h" @@ -159,7 +160,7 @@ namespace HLSLcc } // Attempt to transform a single loop into a for-statement - static void AttemptLoopTransform(ShaderPhase &phase, LoopInfo &li) + static void AttemptLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase, LoopInfo &li) { // In order to transform a loop into a for, the following has to hold: // - The loop must start with a comparison instruction where one of the src operands is a temp (induction variable), followed by OPCODE_BREAKC. @@ -216,19 +217,22 @@ namespace HLSLcc // but then fails miserably if the loop variable is used as an index to UAV loads/stores or some other cases ("array access too complex") // This is also triggered when the driver optimizer sees "simple enough" arithmetics (whatever that is) done on the loop variable before indexing. // So, disable for-loop transformation altogether whenever we see a UAV load or store inside a loop. - for (auto itr = li.m_StartLoop; itr != li.m_EndLoop; itr++) + if(psContext->psShader->eTargetLanguage >= LANG_400 && psContext->psShader->eTargetLanguage < LANG_GL_LAST && !psContext->IsVulkan()) { - switch (itr->eOpcode) + for (auto itr = li.m_StartLoop; itr != li.m_EndLoop; itr++) { - case OPCODE_LD_RAW: - case OPCODE_LD_STRUCTURED: - case OPCODE_LD_UAV_TYPED: - case OPCODE_STORE_RAW: - case OPCODE_STORE_STRUCTURED: - case OPCODE_STORE_UAV_TYPED: - return; // Nope, can't do a for, not even a partial one. - default: - break; + switch (itr->eOpcode) + { + case OPCODE_LD_RAW: + case OPCODE_LD_STRUCTURED: + case OPCODE_LD_UAV_TYPED: + case OPCODE_STORE_RAW: + case OPCODE_STORE_STRUCTURED: + case OPCODE_STORE_UAV_TYPED: + return; // Nope, can't do a for, not even a partial one. + default: + break; + } } } @@ -265,6 +269,13 @@ namespace HLSLcc // Initializer must only write to one component if (initializer && initializer->asOperands[0].GetNumSwizzleElements() != 1) initializer = 0; + // Initializer data type must be int or uint + if (initializer) + { + SHADER_VARIABLE_TYPE dataType = initializer->asOperands[0].GetDataType(psContext); + if (dataType != SVT_INT && dataType != SVT_UINT) + return; + } // Check that the initializer is only used within the range so we can move it to for statement if (initializer) @@ -343,12 +354,12 @@ namespace HLSLcc } - void DoLoopTransform(ShaderPhase &phase) + void DoLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase) { Loops loops; BuildLoopInfo(phase, loops); - std::for_each(loops.begin(), loops.end(), [&phase](LoopInfo &li) + std::for_each(loops.begin(), loops.end(), [&phase, psContext](LoopInfo &li) { // Some sanity checks: start and end points must be initialized, we shouldn't have any switches here, and each loop must have at least one exit point // Also that there's at least 2 instructions in loop body @@ -357,7 +368,7 @@ namespace HLSLcc ASSERT(li.m_EndLoop > li.m_StartLoop + 2); ASSERT(!li.m_IsSwitch); ASSERT(!li.m_ExitPoints.empty()); - AttemptLoopTransform(phase, li); + AttemptLoopTransform(psContext, phase, li); }); } }; \ No newline at end of file diff --git a/src/Operand.cpp b/src/Operand.cpp index e231d4c..0502980 100644 --- a/src/Operand.cpp +++ b/src/Operand.cpp @@ -5,7 +5,6 @@ #include "internal_includes/Shader.h" #include "internal_includes/HLSLCrossCompilerContext.h" #include "internal_includes/Instruction.h" -#include uint32_t Operand::GetAccessMask() const { @@ -337,8 +336,11 @@ SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, S if (regSpace == 0) psContext->psShader->sInfo.GetOutputSignatureFromRegister(ui32Register, GetAccessMask(), psContext->psShader->ui32CurrentVertexOutputStream, &psOut); - else - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psOut); + else { + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psOut, true); + if (!psOut) + return SVT_FLOAT; + } ASSERT(psOut != NULL); if (psOut->eMinPrec != MIN_PRECISION_DEFAULT) @@ -403,7 +405,6 @@ SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, S case NAME_RENDER_TARGET_ARRAY_INDEX: case NAME_VIEWPORT_ARRAY_INDEX: case NAME_SAMPLE_INDEX: - return SVT_INT; case NAME_IS_FRONT_FACE: @@ -411,6 +412,7 @@ SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, S case NAME_POSITION: case NAME_CLIP_DISTANCE: + case NAME_CULL_DISTANCE: return SVT_FLOAT; default: @@ -528,8 +530,12 @@ SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, S { return SVT_INT; } + case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: // constant array is floats everywhere except on vulkan + { + return psContext->IsVulkan() ? SVT_UINT : SVT_FLOAT; + } + case OPERAND_TYPE_INDEXABLE_TEMP: // Indexable temps are always floats - case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: // So are const arrays currently default: { return SVT_FLOAT; @@ -619,7 +625,9 @@ Operand* Operand::GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, co } else if (psDynIndexOrigin->eOpcode == OPCODE_ISHL) { - if (asOps[2].eType == OPERAND_TYPE_IMMEDIATE32) + if (asOps[2].eType == OPERAND_TYPE_IMMEDIATE32 && asOps[1].eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + psOriginOp = &asOps[0]; + else if (asOps[2].eType == OPERAND_TYPE_IMMEDIATE32) psOriginOp = &asOps[1]; } @@ -632,7 +640,7 @@ Operand* Operand::GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, co // -> we can use src straight and no index revert calc is needed if ((psOriginOp->eType == OPERAND_TYPE_INPUT) || ((psOriginOp->ui32RegisterNumber != psDynIndexOp->ui32RegisterNumber || psOriginOp->GetDataType(psContext) != psDynIndexOp->GetDataType(psContext)) - && psOriginOp->m_Defines[0].m_Inst->m_Uses.size() == 1)) + && (!psOriginOp->m_Defines.empty()) && psOriginOp->m_Defines[0].m_Inst->m_Uses.size() == 1)) { psDynIndexOp = psOriginOp; *needsIndexCalcRevert = false; @@ -646,4 +654,4 @@ Operand* Operand::GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, co } return psDynIndexOp; -} \ No newline at end of file +} diff --git a/src/ShaderInfo.cpp b/src/ShaderInfo.cpp index 4ec6181..e862f8d 100644 --- a/src/ShaderInfo.cpp +++ b/src/ShaderInfo.cpp @@ -89,12 +89,9 @@ int ShaderInfo::GetPatchConstantSignatureFromRegister(const uint32_t ui32Registe } } - if (allowNull) - return 0; - // There are situations (especially when using dcl_indexrange) where the compiler happily writes outside the actual masks. // In those situations just take the last signature that uses that register (it's typically the "highest" one) - for (i = ui32NumVars - 1; i != 0xffffffff; i--) + for( i = ui32NumVars - 1; i-- > 0; ) { if (ui32Register == psPatchConstantSignatures[i].ui32Register) { @@ -103,8 +100,7 @@ int ShaderInfo::GetPatchConstantSignatureFromRegister(const uint32_t ui32Registe } } - - ASSERT(0); + ASSERT(allowNull); return 0; } @@ -314,7 +310,7 @@ int ShaderInfo::GetShaderVarFromOffset(const uint32_t ui32Vec4Offset, // Patches the fullName of the var with given array indices. Does not insert the indexing for the var itself if it is an array. // Searches for brackets and inserts indices one by one. -std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector &indices, const std::string dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors) +std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, const std::vector& indices, const std::string& dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors) { std::ostringstream oss; size_t prevpos = 0; diff --git a/src/decode.cpp b/src/decode.cpp index 9717e92..cd5f740 100644 --- a/src/decode.cpp +++ b/src/decode.cpp @@ -443,6 +443,8 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, case OPCODE_DCL_SAMPLER: { psDecl->ui32NumOperands = 1; + psDecl->value.eSamplerMode = DecodeSamplerMode(*pui32Token); + DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); break; } @@ -628,9 +630,9 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, ui32OperandOffset++; - psDecl->value.interface.ui32InterfaceID = interfaceID; - psDecl->value.interface.ui32NumFuncTables = numClassesImplementingThisInterface; - psDecl->value.interface.ui32ArraySize = arrayLen; + psDecl->value.iface.ui32InterfaceID = interfaceID; + psDecl->value.iface.ui32NumFuncTables = numClassesImplementingThisInterface; + psDecl->value.iface.ui32ArraySize = arrayLen; psShader->funcPointer[interfaceID].ui32NumBodiesPerTable = psDecl->ui32TableLength; @@ -678,6 +680,7 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, } case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: { + psDecl->value.ui32MaxOutputVertexCount = DecodeOutputControlPointCount(*pui32Token); break; } case OPCODE_HS_DECLS: diff --git a/src/internal_includes/ControlFlowGraph.h b/src/internal_includes/ControlFlowGraph.h index 7c26255..e21c4ca 100644 --- a/src/internal_includes/ControlFlowGraph.h +++ b/src/internal_includes/ControlFlowGraph.h @@ -6,10 +6,6 @@ #include #include -#ifdef __APPLE__ -#include -#endif - #include struct Instruction; @@ -17,12 +13,7 @@ class Operand; namespace HLSLcc { -#ifdef __APPLE__ - // Herp derp Apple is stuck in 2005 - using namespace std::tr1; -#else using namespace std; -#endif namespace ControlFlow { diff --git a/src/internal_includes/Declaration.h b/src/internal_includes/Declaration.h index 94f80b2..a9123c1 100644 --- a/src/internal_includes/Declaration.h +++ b/src/internal_includes/Declaration.h @@ -50,13 +50,14 @@ struct Declaration float fMaxTessFactor; uint32_t ui32IndexRange; uint32_t ui32GSInstanceCount; + SB_SAMPLER_MODE eSamplerMode; // For sampler declarations, the sampler mode. struct Interface_TAG { uint32_t ui32InterfaceID; uint32_t ui32NumFuncTables; uint32_t ui32ArraySize; - } interface; + } iface; } value; uint32_t ui32BufferStride; diff --git a/src/internal_includes/HLSLCrossCompilerContext.h b/src/internal_includes/HLSLCrossCompilerContext.h index b3df7a0..50198d5 100644 --- a/src/internal_includes/HLSLCrossCompilerContext.h +++ b/src/internal_includes/HLSLCrossCompilerContext.h @@ -19,6 +19,7 @@ public: bstring glsl; bstring extensions; + bstring beforeMain; bstring* currentGLSLString;//either glsl or earlyMain of current phase @@ -26,6 +27,11 @@ public: int indent; unsigned int flags; + + // Helper functions for checking flags + // Returns true if VULKAN_BINDINGS flag is set + bool IsVulkan() const; + Shader* psShader; GLSLCrossDependencyData* psDependencies; const char *inputPrefix; // Prefix for shader inputs @@ -48,7 +54,8 @@ public: bool OutputNeedsDeclaring(const Operand* psOperand, const int count); - void RequireExtension(const std::string &extName); + bool RequireExtension(const std::string &extName); + bool EnableExtension(const std::string &extName); private: std::set m_EnabledExtensions; diff --git a/src/internal_includes/HLSLccToolkit.h b/src/internal_includes/HLSLccToolkit.h index 1112246..1fed038 100644 --- a/src/internal_includes/HLSLccToolkit.h +++ b/src/internal_includes/HLSLccToolkit.h @@ -9,6 +9,7 @@ #include "internal_includes/Operand.h" class HLSLCrossCompilerContext; +struct ConstantBuffer; namespace HLSLcc { @@ -20,11 +21,9 @@ namespace HLSLcc const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision = true); - const char * GetConstructorForTypeGLSL(const SHADER_VARIABLE_TYPE eType, - const int components, bool useGLSLPrecision); + const char * GetConstructorForTypeGLSL(const HLSLCrossCompilerContext *context, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision); - const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType, - const int components); + const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType, const int components); std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows); @@ -55,7 +54,9 @@ namespace HLSLcc // Returns true if the instruction adds 1 to the destination temp register bool IsAddOneInstruction(const Instruction *psInst); - bool CanDoDirectCast(SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest); + bool CanDoDirectCast(const HLSLCrossCompilerContext *context, SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest); + + bool IsUnityFlexibleInstancingBuffer(const ConstantBuffer* psCBuf); // Helper function to print floats with full precision void PrintFloat(bstring b, float f); diff --git a/src/internal_includes/LoopTransform.h b/src/internal_includes/LoopTransform.h index 63caaf8..c3b0fc4 100644 --- a/src/internal_includes/LoopTransform.h +++ b/src/internal_includes/LoopTransform.h @@ -2,8 +2,8 @@ #pragma once class ShaderPhase; - +class HLSLCrossCompilerContext; namespace HLSLcc { - void DoLoopTransform(ShaderPhase &phase); + void DoLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase); }; diff --git a/src/internal_includes/Operand.h b/src/internal_includes/Operand.h index 439db70..701d505 100644 --- a/src/internal_includes/Operand.h +++ b/src/internal_includes/Operand.h @@ -4,10 +4,6 @@ #include #include -#ifdef __APPLE__ -#include -#endif - enum{ MAX_SUB_OPERANDS = 3 }; class Operand; class HLSLCrossCompilerContext; @@ -21,14 +17,7 @@ struct Instruction; class Operand { public: -#ifdef __APPLE__ - // Herp derp Apple is stuck in 2005 - typedef std::tr1::shared_ptr SubOperandPtr; -#else typedef std::shared_ptr SubOperandPtr; -#endif - - Operand() : diff --git a/src/internal_includes/Shader.h b/src/internal_includes/Shader.h index 38892b2..26eabee 100644 --- a/src/internal_includes/Shader.h +++ b/src/internal_includes/Shader.h @@ -145,7 +145,8 @@ public: ui32CurrentVertexOutputStream(0), textureSamplers(), aui32StructuredBufferBindingPoints(MAX_RESOURCE_BINDINGS, 0), - ui32CurrentStructuredBufferIndex() + ui32CurrentStructuredBufferIndex(), + m_DummySamplerDeclared(false) { } @@ -257,6 +258,8 @@ public: std::vector psDoubleTempSizes; // ...and for doubles std::vector psBoolTempSizes; // ... and for bools + bool m_DummySamplerDeclared; // If true, the shader doesn't declare any samplers but uses texelFetch and we have added a dummy sampler for Vulkan for that. + private: void DoIOOverlapOperand(ShaderPhase *psPhase, Operand *psOperand); diff --git a/src/internal_includes/Translator.h b/src/internal_includes/Translator.h index 7650985..e41cff5 100644 --- a/src/internal_includes/Translator.h +++ b/src/internal_includes/Translator.h @@ -18,7 +18,7 @@ public: virtual void TranslateDeclaration(const Declaration *psDecl) = 0; // Translate system value type to name, return true if succeeded and no further translation is necessary - virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL) = 0; + virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL) = 0; // In GLSL, the input and output names cannot clash. // Also, the output name of previous stage must match the input name of the next stage. diff --git a/src/internal_includes/languages.h b/src/internal_includes/languages.h index a3d417d..d6c77c0 100644 --- a/src/internal_includes/languages.h +++ b/src/internal_includes/languages.h @@ -43,11 +43,11 @@ static int HaveOverloadedTextureFuncs(const GLLang eLang) return 1; } -//Only enable for ES. +//Only enable for ES. Vulkan and Switch. //Not present in 120, ignored in other desktop languages. Specifically enabled on Vulkan. static int HavePrecisionQualifiers(const HLSLCrossCompilerContext *psContext) { - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0 || (psContext->flags & HLSLCC_FLAG_NVN_TARGET) != 0) return 1; const GLLang eLang = psContext->psShader->eTargetLanguage; @@ -58,6 +58,12 @@ static int HavePrecisionQualifiers(const HLSLCrossCompilerContext *psContext) return 0; } +static int EmitLowp(const HLSLCrossCompilerContext *psContext) +{ + const GLLang eLang = psContext->psShader->eTargetLanguage; + return eLang == LANG_ES_100 ? 1 : 0; +} + static int HaveCubemapArray(const GLLang eLang) { if (eLang >= LANG_400 && eLang <= LANG_GL_LAST) @@ -139,17 +145,68 @@ static int PixelInterpDependency(const GLLang eLang) return 0; } -static int HaveUVec(const GLLang eLang) +static int HaveUnsignedTypes(const GLLang eLang) { - switch(eLang) - { + switch(eLang) + { case LANG_ES_100: case LANG_120: - return 0; + return 0; default: break; - } - return 1; + } + return 1; +} + +static int HaveBitEncodingOps(const GLLang eLang) +{ + switch(eLang) + { + case LANG_ES_100: + case LANG_120: + return 0; + default: + break; + } + return 1; +} + +static int HaveNativeBitwiseOps(const GLLang eLang) +{ + switch(eLang) + { + case LANG_ES_100: + case LANG_120: + return 0; + default: + break; + } + return 1; +} + +static int HaveDynamicIndexing(HLSLCrossCompilerContext *psContext, const Operand* psOperand = NULL) +{ + // WebGL only allows dynamic indexing with constant expressions, loop indices or a combination. + // The only exception is for uniform access in vertex shaders, which can be indexed using any expression. + + switch(psContext->psShader->eTargetLanguage) + { + case LANG_ES_100: + case LANG_120: + if (psOperand != NULL) + { + if (psOperand->m_ForLoopInductorName) + return 1; + + if (psContext->psShader->eShaderType == VERTEX_SHADER && psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER) + return 1; + } + + return 0; + default: + break; + } + return 1; } static int HaveGather(const GLLang eLang) diff --git a/src/internal_includes/toGLSL.h b/src/internal_includes/toGLSL.h index 830a0a6..87ffdf7 100644 --- a/src/internal_includes/toGLSL.h +++ b/src/internal_includes/toGLSL.h @@ -9,20 +9,23 @@ class ToGLSL : public Translator { protected: GLLang language; + bool m_NeedUnityInstancingArraySizeDecl; + public: - explicit ToGLSL(HLSLCrossCompilerContext *ctx) : Translator(ctx), language(LANG_DEFAULT) {} + explicit ToGLSL(HLSLCrossCompilerContext *ctx) : Translator(ctx), language(LANG_DEFAULT), m_NeedUnityInstancingArraySizeDecl(false), m_NumDeclaredWhileTrueLoops(0) {} // Sets the target language according to given input. if LANG_DEFAULT, does autodetect and returns the selected language GLLang SetLanguage(GLLang suggestedLanguage); virtual bool Translate(); virtual void TranslateDeclaration(const Declaration* psDecl); - virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL); + virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL); virtual void SetIOPrefixes(); private: // Vulkan-only: detect which branches only depend on uniforms and immediate values and can be turned into specialization constants. void IdentifyStaticBranches(ShaderPhase *psPhase); - void BuildStaticBranchNameForInstruction(Instruction &inst); + // May return false when we detect too complex stuff (matrices, arrays etc) + bool BuildStaticBranchNameForInstruction(Instruction &inst); void DeclareSpecializationConstants(ShaderPhase &phase); @@ -42,14 +45,17 @@ private: SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis); void AddAssignPrologue(int numParenthesis, bool isEmbedded = false); + void AddBuiltinOutput(const Declaration* psDecl, int arrayElements, const char* builtinName); + void AddBuiltinInput(const Declaration* psDecl, const char* builtinName); void HandleOutputRedirect(const Declaration *psDecl, const char *Precision); void HandleInputRedirect(const Declaration *psDecl, const char *Precision); void AddUserOutput(const Declaration* psDecl); - void DeclareStructConstants(const uint32_t ui32BindingPoint, - const ConstantBuffer* psCBuf, const Operand* psOperand, - bstring glsl); + void DeclareStructConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, const Operand* psOperand, bstring glsl); + void DeclareConstBufferShaderVariable(const char* varName, const struct ShaderVarType* psType, const struct ConstantBuffer* psCBuf, int unsizedArray, bool addUniformPrefix = false); + void PreDeclareStructType(const std::string &name, const struct ShaderVarType* psType); + void DeclareUBOConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, bstring glsl); typedef enum { @@ -88,10 +94,6 @@ private: Instruction* psInst, const ResourceBinding* psBinding, bstring glsl); - void TranslateTexelFetchOffset( - Instruction* psInst, - const ResourceBinding* psBinding, - bstring glsl); void TranslateTexCoord( const RESOURCE_DIMENSION eResDim, Operand* psTexCoordOperand); @@ -107,8 +109,20 @@ private: Instruction* psInst, bstring glsl); + // Add an extra function to the m_FunctionDefinitions list, unless it's already there. + bool DeclareExtraFunction(const std::string &name, bstring body); + void UseExtraFunctionDependency(const std::string &name); + + void DeclareDynamicIndexWrapper(const struct ShaderVarType* psType); + void DeclareDynamicIndexWrapper(const char* psName, SHADER_VARIABLE_CLASS eClass, SHADER_VARIABLE_TYPE eType, uint32_t ui32Rows, uint32_t ui32Columns, uint32_t ui32Elements); + + bool RenderTargetDeclared(uint32_t input); + + std::string GetVulkanDummySamplerName(); + + // A map of extra helper functions we'll need. + FunctionDefinitions m_FunctionDefinitions; + + std::set m_DeclaredRenderTarget; + int m_NumDeclaredWhileTrueLoops; }; - - - - diff --git a/src/internal_includes/toMetal.h b/src/internal_includes/toMetal.h index a159d51..b24ae01 100644 --- a/src/internal_includes/toMetal.h +++ b/src/internal_includes/toMetal.h @@ -4,79 +4,6 @@ #include #include -// We store struct definition contents inside a vector of strings -struct StructDefinition -{ - StructDefinition() : m_Members(), m_Dependencies(), m_IsPrinted(false) {} - - std::vector m_Members; // A vector of strings with the struct members - std::vector m_Dependencies; // A vector of struct names this struct depends on. - bool m_IsPrinted; // Has this struct been printed out yet? -}; - -typedef std::map StructDefinitions; - -// Map of extra function definitions we need to add before the shader body but after the declarations. -typedef std::map FunctionDefinitions; - -// A helper class for allocating binding slots -// (because both UAVs and textures use the same slots in Metal, also constant buffers and other buffers etc) -class BindingSlotAllocator -{ - typedef std::map SlotMap; - SlotMap m_Allocations; -public: - BindingSlotAllocator() : m_Allocations() - { - for(int i = MAX_RESOURCE_BINDINGS-1; i >= 0; i --) - m_FreeSlots.push_back(i); - } - - enum BindType - { - ConstantBuffer = 0, - RWBuffer, - Texture, - UAV - }; - - uint32_t GetBindingSlot(uint32_t regNo, BindType type) - { - // The key is regNumber with the bindtype stored to highest 16 bits - uint32_t key = regNo | (uint32_t(type) << 16); - SlotMap::iterator itr = m_Allocations.find(key); - if(itr == m_Allocations.end()) - { - uint32_t slot = m_FreeSlots.back(); - m_FreeSlots.pop_back(); - m_Allocations.insert(std::make_pair(key, slot)); - return slot; - } - return itr->second; - } - - // Func for reserving binding slots with the original reg number. - // Used for fragment shader UAVs (SetRandomWriteTarget etc). - void ReserveBindingSlot(uint32_t regNo, BindType type) - { - uint32_t key = regNo | (uint32_t(type) << 16); - m_Allocations.insert(std::make_pair(key, regNo)); - - // Remove regNo from free slots - for (int i = m_FreeSlots.size() - 1; i >= 0; i--) - { - if (m_FreeSlots[i] == regNo) - { - m_FreeSlots.erase(m_FreeSlots.begin() + i); - return; - } - } - } - -private: - std::vector m_FreeSlots; -}; - struct SamplerDesc { std::string name; @@ -87,10 +14,11 @@ struct TextureSamplerDesc std::string name; int textureBind, samplerBind; HLSLCC_TEX_DIMENSION dim; + bool isMultisampled; + bool isDepthSampler; bool uav; }; - class ToMetal : public Translator { protected: @@ -105,7 +33,7 @@ public: virtual bool Translate(); virtual void TranslateDeclaration(const Declaration *psDecl); - virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL); + virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL); std::string TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL); virtual void SetIOPrefixes(); @@ -121,7 +49,9 @@ private: // Retrieve the name of the output struct for this shader std::string GetOutputStructName() const; std::string GetInputStructName() const; + std::string GetCBName(const std::string& cbName) const; + void DeclareHullShaderPassthrough(); void HandleInputRedirect(const Declaration *psDecl, const std::string &typeName); void HandleOutputRedirect(const Declaration *psDecl, const std::string &typeName); @@ -137,7 +67,7 @@ private: void DeclareOutput(const Declaration *decl); - void PrintStructDeclarations(StructDefinitions &defs); + void PrintStructDeclarations(StructDefinitions &defs, const char *name = ""); std::string ResourceName(ResourceGroup group, const uint32_t ui32RegisterNumber); @@ -173,6 +103,8 @@ private: int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType); void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, int dest, int src0, int src1, int src2, uint32_t dataType); + void CallHelper3(const char* name, Instruction* psInst, + int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask, uint32_t ui32Flags); void CallHelper3(const char* name, Instruction* psInst, int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask); void CallHelper2(const char* name, Instruction* psInst, diff --git a/src/internal_includes/tokens.h b/src/internal_includes/tokens.h index d602f75..6c24535 100644 --- a/src/internal_includes/tokens.h +++ b/src/internal_includes/tokens.h @@ -665,14 +665,6 @@ static TESSELLATOR_PARTITIONING DecodeTessPartitioning(uint32_t ui32Token) return (TESSELLATOR_PARTITIONING)((ui32Token & 0x00003800) >> 11); } -typedef enum TESSELLATOR_DOMAIN -{ - TESSELLATOR_DOMAIN_UNDEFINED = 0, - TESSELLATOR_DOMAIN_ISOLINE = 1, - TESSELLATOR_DOMAIN_TRI = 2, - TESSELLATOR_DOMAIN_QUAD = 3 -} TESSELLATOR_DOMAIN; - static TESSELLATOR_DOMAIN DecodeTessDomain(uint32_t ui32Token) { return (TESSELLATOR_DOMAIN)((ui32Token & 0x00001800) >> 11); @@ -780,4 +772,17 @@ static RESINFO_RETURN_TYPE DecodeResInfoReturnType(uint32_t ui32Token) return (RESINFO_RETURN_TYPE)((ui32Token & 0x00001800) >> 11); } +typedef enum SB_SAMPLER_MODE +{ + D3D10_SB_SAMPLER_MODE_DEFAULT = 0, + D3D10_SB_SAMPLER_MODE_COMPARISON = 1, + D3D10_SB_SAMPLER_MODE_MONO = 2, +} SB_SAMPLER_MODE; + +static SB_SAMPLER_MODE DecodeSamplerMode(uint32_t ui32Token) +{ + return (SB_SAMPLER_MODE)((ui32Token & 0x00001800) >> 11); +} + + #endif diff --git a/src/reflect.cpp b/src/reflect.cpp index b80a933..eef15ce 100644 --- a/src/reflect.cpp +++ b/src/reflect.cpp @@ -564,6 +564,10 @@ void LoadShaderInfo(const uint32_t ui32MajorVersion, psInfo->eTessOutPrim = TESSELLATOR_OUTPUT_UNDEFINED; psInfo->eTessPartitioning = TESSELLATOR_PARTITIONING_UNDEFINED; + psInfo->ui32TessInputControlPointCount = 0; + psInfo->ui32TessOutputControlPointCount = 0; + psInfo->eTessDomain = TESSELLATOR_DOMAIN_UNDEFINED; + psInfo->bEarlyFragmentTests = false; psInfo->ui32MajorVersion = ui32MajorVersion; psInfo->ui32MinorVersion = ui32MinorVersion; diff --git a/src/toGLSL.cpp b/src/toGLSL.cpp index 8301bb7..42715c6 100644 --- a/src/toGLSL.cpp +++ b/src/toGLSL.cpp @@ -17,6 +17,7 @@ #include "internal_includes/HLSLCrossCompilerContext.h" #include "internal_includes/Instruction.h" #include "internal_includes/LoopTransform.h" +#include "UnityInstancingFlexibleArraySize.h" #include #include @@ -98,16 +99,16 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext) bool GL_ARB_shader_storage_buffer_object = false; bool GL_ARB_shader_image_load_store = false; - if(psContext->psShader->ui32MajorVersion > 3 && psContext->psShader->eTargetLanguage != LANG_ES_300 && psContext->psShader->eTargetLanguage != LANG_ES_310 && !(psContext->psShader->eTargetLanguage >= LANG_330)) + if(psContext->psShader->ui32MajorVersion > 3 && psContext->psShader->eTargetLanguage != LANG_ES_100 && psContext->psShader->eTargetLanguage != LANG_ES_300 && psContext->psShader->eTargetLanguage != LANG_ES_310 && !(psContext->psShader->eTargetLanguage >= LANG_330)) { - bcatcstr(extensions,"#extension GL_ARB_shader_bit_encoding : enable\n"); + psContext->EnableExtension("GL_ARB_shader_bit_encoding"); } if(!HaveCompute(psContext->psShader->eTargetLanguage)) { if(psContext->psShader->eShaderType == COMPUTE_SHADER) { - bcatcstr(extensions,"#extension GL_ARB_compute_shader : enable\n"); + psContext->EnableExtension("GL_ARB_compute_shader"); } if (psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED] || @@ -126,7 +127,7 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext) psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_CONSUME] || psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED]) { - bcatcstr(extensions,"#extension GL_ARB_shader_atomic_counters : enable\n"); + psContext->EnableExtension("GL_ARB_shader_atomic_counters"); } } @@ -154,7 +155,7 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext) if (!HaveImageAtomics(psContext->psShader->eTargetLanguage)) { if (isES) - bcatcstr(extensions, "#extension GL_OES_shader_image_atomic : enable\n"); + psContext->EnableExtension("GL_OES_shader_image_atomic"); else GL_ARB_shader_image_load_store = true; } @@ -167,7 +168,50 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext) psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO] || psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_C]) { - bcatcstr(extensions,"#extension GL_ARB_texture_gather : enable\n"); + psContext->EnableExtension("GL_ARB_texture_gather"); + } + } + + if(IsESLanguage(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTX_COARSE] || + psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTX_FINE] || + psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTX] || + psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTY_COARSE] || + psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTY_FINE] || + psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTY]) + { + if (psContext->psShader->eTargetLanguage < LANG_ES_300) + { + psContext->EnableExtension("GL_OES_standard_derivatives"); + } + } + + if (psContext->psShader->eShaderType == PIXEL_SHADER && + (psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_L] || + psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_C_LZ] || + psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_D])) + { + psContext->EnableExtension("GL_EXT_shader_texture_lod"); + + static const int tex_sampler_type_count = 4; + static const char* tex_sampler_dim_name[tex_sampler_type_count] = { + "1D", "2D", "3D", "Cube", + }; + + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + { + bcatcstr(extensions,"#if !defined(GL_EXT_shader_texture_lod)\n"); + + for (int dim = 0; dim < tex_sampler_type_count; dim++) + { + bformata(extensions, "#define texture%sLodEXT texture%s\n", tex_sampler_dim_name[dim], tex_sampler_dim_name[dim]); + + if (dim == 1) // 2D + bformata(extensions, "#define texture%sProjLodEXT texture%sProj\n", tex_sampler_dim_name[dim], tex_sampler_dim_name[dim]); + } + bcatcstr(extensions,"#endif\n"); + } } } @@ -176,7 +220,7 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext) if(psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO_C] || psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO]) { - bcatcstr(extensions,"#extension GL_ARB_gpu_shader5 : enable\n"); + psContext->EnableExtension("GL_ARB_gpu_shader5"); } } @@ -184,7 +228,7 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext) { if(psContext->psShader->aiOpcodeUsed[OPCODE_LOD]) { - bcatcstr(extensions,"#extension GL_ARB_texture_query_lod : enable\n"); + psContext->EnableExtension("GL_ARB_texture_query_lod"); } } @@ -192,14 +236,14 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext) { if(psContext->psShader->aiOpcodeUsed[OPCODE_RESINFO]) { - bcatcstr(extensions,"#extension GL_ARB_texture_query_levels : enable\n"); - bcatcstr(extensions, "#extension GL_ARB_shader_image_size : enable\n"); + psContext->EnableExtension("GL_ARB_texture_query_levels"); + psContext->EnableExtension("GL_ARB_shader_image_size"); } } if (psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_INFO ]) { - bcatcstr(extensions, "#extension GL_ARB_shader_texture_image_samples : enable\n"); + psContext->EnableExtension("GL_ARB_shader_texture_image_samples"); } if(!HaveImageLoadStore(psContext->psShader->eTargetLanguage)) @@ -209,7 +253,7 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext) psContext->psShader->aiOpcodeUsed[OPCODE_STORE_STRUCTURED]) { GL_ARB_shader_image_load_store = true; - bcatcstr(extensions,"#extension GL_ARB_shader_bit_encoding : enable\n"); + psContext->EnableExtension("GL_ARB_shader_bit_encoding"); } else if(psContext->psShader->aiOpcodeUsed[OPCODE_LD_UAV_TYPED] || @@ -224,7 +268,7 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext) { if(psContext->psShader->eShaderType == GEOMETRY_SHADER) { - bcatcstr(extensions,"#extension GL_ARB_geometry_shader : enable\n"); + psContext->EnableExtension("GL_ARB_geometry_shader"); } } @@ -232,8 +276,8 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext) { if(psContext->psShader->eShaderType == GEOMETRY_SHADER) { - bcatcstr(extensions,"#extension GL_OES_geometry_shader : enable\n"); - bcatcstr(extensions,"#extension GL_EXT_geometry_shader : enable\n"); + psContext->EnableExtension("GL_OES_geometry_shader"); + psContext->EnableExtension("GL_EXT_geometry_shader"); } } @@ -241,39 +285,37 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext) { if(psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER) { - bcatcstr(extensions,"#extension GL_OES_tessellation_shader : enable\n"); - bcatcstr(extensions,"#extension GL_EXT_tessellation_shader : enable\n"); + psContext->EnableExtension("GL_OES_tessellation_shader"); + psContext->EnableExtension("GL_EXT_tessellation_shader"); } } if (GL_ARB_shader_storage_buffer_object) - bcatcstr(extensions, "#extension GL_ARB_shader_storage_buffer_object : enable\n"); + psContext->EnableExtension("GL_ARB_shader_storage_buffer_object"); if (GL_ARB_shader_image_load_store) - bcatcstr(extensions, "#extension GL_ARB_shader_image_load_store : enable\n"); + psContext->EnableExtension("GL_ARB_shader_image_load_store"); if(psContext->psShader->eShaderType == PIXEL_SHADER && psContext->psShader->eTargetLanguage >= LANG_120 && !HaveFragmentCoordConventions(psContext->psShader->eTargetLanguage)) { - bcatcstr(extensions,"#extension GL_ARB_fragment_coord_conventions : require\n"); + psContext->RequireExtension("GL_ARB_fragment_coord_conventions"); } if (psContext->psShader->extensions->EXT_shader_framebuffer_fetch && psContext->psShader->eShaderType == PIXEL_SHADER && psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH) { - bcatcstr(extensions, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); - bcatcstr(extensions, "#extension GL_EXT_shader_framebuffer_fetch : enable\n"); - bcatcstr(extensions, "#endif\n"); + psContext->EnableExtension("GL_EXT_shader_framebuffer_fetch"); } //Handle fragment shader default precision - if ((psContext->psShader->eShaderType == PIXEL_SHADER) && - (psContext->psShader->eTargetLanguage == LANG_ES_100 || psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310)) + if (psContext->psShader->eShaderType == PIXEL_SHADER && + (psContext->psShader->eTargetLanguage == LANG_ES_100 || psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310 || (psContext->flags & HLSLCC_FLAG_NVN_TARGET))) { - // Float default precision is patched during runtime in GlslGpuProgramGLES.cpp:PatchupFragmentShaderText() - // Except on Vulkan - if(psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) + if((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) || (psContext->flags & HLSLCC_FLAG_NVN_TARGET)) bcatcstr(glsl, "precision highp float;\n"); - - + else if (psContext->psShader->eTargetLanguage == LANG_ES_100) + // gles 2.0 shaders can have mediump as default if the GPU doesn't have highp support + bcatcstr(glsl, "#ifdef GL_FRAGMENT_PRECISION_HIGH\nprecision highp float;\n#else\nprecision mediump float;\n#endif\n"); + // Define default int precision to highp to avoid issues on platforms that actually implement mediump bcatcstr(glsl, "precision highp int;\n"); } @@ -540,13 +582,16 @@ bool ToGLSL::Translate() if (psShader->extensions) { if(psContext->flags & HLSLCC_FLAG_NVN_TARGET) - bcatcstr(extensions, "#extension GL_ARB_separate_shader_objects : enable\n"); + { + psContext->EnableExtension("GL_ARB_separate_shader_objects"); + psContext->EnableExtension("GL_NV_desktop_lowp_mediump"); // This flag allow FP16 operations (mediump in GLSL) + } if (psShader->extensions->ARB_explicit_attrib_location) - bcatcstr(extensions, "#extension GL_ARB_explicit_attrib_location : require\n"); + psContext->RequireExtension("GL_ARB_explicit_attrib_location"); if (psShader->extensions->ARB_explicit_uniform_location) - bcatcstr(extensions, "#extension GL_ARB_explicit_uniform_location : require\n"); + psContext->RequireExtension("GL_ARB_explicit_uniform_location"); if (psShader->extensions->ARB_shading_language_420pack) - bcatcstr(extensions, "#extension GL_ARB_shading_language_420pack : require\n"); + psContext->RequireExtension("GL_ARB_shading_language_420pack"); } psContext->ClearDependencyData(); @@ -562,7 +607,7 @@ bool ToGLSL::Translate() if (!psContext->psDependencies->m_ExtBlendModes.empty() && psShader->eShaderType == PIXEL_SHADER) { - bcatcstr(extensions, "#extension GL_KHR_blend_equation_advanced : enable\n"); + psContext->EnableExtension("GL_KHR_blend_equation_advanced"); bcatcstr(glsl, "#if GL_KHR_blend_equation_advanced\n"); for (i = 0; i < psContext->psDependencies->m_ExtBlendModes.size(); i++) { @@ -581,8 +626,8 @@ bool ToGLSL::Translate() psContext->DoDataTypeAnalysis(&phase); phase.ResolveUAVProperties(); psShader->ResolveStructuredBufferBindingSlots(&phase); - phase.PruneConstArrays(); - + if(!psContext->IsVulkan()) + phase.PruneConstArrays(); } psShader->PruneTempRegisters(); @@ -591,7 +636,7 @@ bool ToGLSL::Translate() { // Loop transform can only be done after the temps have been pruned ShaderPhase &phase = psShader->asPhases[ui32Phase]; - HLSLcc::DoLoopTransform(phase); + HLSLcc::DoLoopTransform(psContext, phase); if ((psContext->flags & HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS) != 0) { @@ -817,6 +862,16 @@ bool ToGLSL::Translate() } } + bstring beforeMain = NULL; + bstring beforeMainKeyword = NULL; + + if (!HaveDynamicIndexing(psContext)) + { + beforeMain = bfromcstr(""); + beforeMainKeyword = bfromcstr("\n// Before Main\n\n"); + psContext->beforeMain = beforeMain; + } + for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i) { TranslateDeclaration(&psShader->asPhases[0].psDecl[i]); @@ -827,6 +882,12 @@ bool ToGLSL::Translate() DeclareSpecializationConstants(psShader->asPhases[0]); } + // Search and replace string, for injecting stuff from translation that need to be after normal declarations and before main + if (!HaveDynamicIndexing(psContext)) + { + bconcat(glsl, beforeMainKeyword); + } + bcatcstr(glsl, "void main()\n{\n"); psContext->indent++; @@ -853,15 +914,149 @@ bool ToGLSL::Translate() bcatcstr(glsl, "}\n"); + // Print out extra functions we generated, in reverse order for potential dependencies + std::for_each(m_FunctionDefinitions.rbegin(), m_FunctionDefinitions.rend(), [&extensions](const FunctionDefinitions::value_type &p) + { + bcatcstr(extensions, p.second.c_str()); + bcatcstr(extensions, "\n"); + }); + // Concat extensions and glsl for the final shader code. + if (m_NeedUnityInstancingArraySizeDecl) + { + if (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) + { + bformata(extensions, "layout(constant_id = %d) const int %s = 2;\n", kArraySizeConstantID, UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO); + } + else + { + bcatcstr(extensions, "#ifndef " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "\n\t#define " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO " 2\n#endif\n"); + } + } bconcat(extensions, glsl); bdestroy(glsl); + + if (!HaveDynamicIndexing(psContext)) + { + bstring empty = bfromcstr(""); + + if (beforeMain->slen > 1) + bfindreplace(extensions, beforeMainKeyword, beforeMain, 0); + else + bfindreplace(extensions, beforeMainKeyword, empty, 0); + + psContext->beforeMain = NULL; + + bdestroy(empty); + bdestroy(beforeMain); + bdestroy(beforeMainKeyword); + } + psContext->glsl = extensions; glsl = NULL; return true; } +bool ToGLSL::DeclareExtraFunction(const std::string &name, bstring body) +{ + if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end()) + return true; + m_FunctionDefinitions.insert(std::make_pair(name, (const char *) body->data)); + return false; +} + +static void PrintComponentWrapper1(bstring code, const char *func, const char *type2, const char *type3, const char *type4) +{ + bformata(code, "%s %s(%s a) { a.x = %s(a.x); a.y = %s(a.y); return a; }\n", type2, func, type2, func, func); + bformata(code, "%s %s(%s a) { a.x = %s(a.x); a.y = %s(a.y); a.z = %s(a.z); return a; }\n", type3, func, type3, func, func, func); + bformata(code, "%s %s(%s a) { a.x = %s(a.x); a.y = %s(a.y); a.z = %s(a.z); a.w = %s(a.w); return a; }\n", type4, func, type4, func, func, func, func); +} + +static void PrintComponentWrapper2(bstring code, const char *func, const char *type2, const char *type3, const char *type4) +{ + bformata(code, "%s %s(%s a, %s b) { a.x = %s(a.x, b.x); a.y = %s(a.y, b.y); return a; }\n", type2, func, type2, type2, func, func); + bformata(code, "%s %s(%s a, %s b) { a.x = %s(a.x, b.x); a.y = %s(a.y, b.y); a.z = %s(a.z, b.z); return a; }\n", type3, func, type3, type3, func, func, func); + bformata(code, "%s %s(%s a, %s b) { a.x = %s(a.x, b.x); a.y = %s(a.y, b.y); a.z = %s(a.z, b.z); a.w = %s(a.w, b.w); return a; }\n", type4, func, type4, type4, func, func, func, func); +} + +static void PrintTrunc(bstring code, const char *type) +{ + bformata(code, "%s trunc(%s x) { return sign(x)*floor(abs(x)); }\n", type, type); +} + +void ToGLSL::UseExtraFunctionDependency(const std::string &name) +{ + if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end()) + return; + + bstring code = bfromcstr(""); + bool match = true; + + if (name == "trunc") + { + PrintTrunc(code, "float"); + PrintTrunc(code, "vec2"); + PrintTrunc(code, "vec3"); + PrintTrunc(code, "vec4"); + } + else if (name == "roundEven") + { + bformata(code, "float roundEven(float x) { float y = floor(x + 0.5); return (y - x == 0.5) ? floor(0.5*y) * 2.0 : y; }\n"); + PrintComponentWrapper1(code, "roundEven", "vec2", "vec3", "vec4"); + } + else if (name == "op_modi") + { + bformata(code, "const int BITWISE_BIT_COUNT = 32;\nint op_modi(int x, int y) { return x - y * (x / y); }\n"); + PrintComponentWrapper2(code, "op_modi", "ivec2", "ivec3", "ivec4"); + } + else if (name == "op_and") + { + UseExtraFunctionDependency("op_modi"); + + bformata(code, "int op_and(int a, int b) { int result = 0; int n = 1; for (int i = 0; i < BITWISE_BIT_COUNT; i++) { if ((op_modi(a, 2) == 1) && (op_modi(b, 2) == 1)) { result += n; } a = a / 2; b = b / 2; n = n * 2; if (!(a > 0 && b > 0)) { break; } } return result; }\n"); + PrintComponentWrapper2(code, "op_and", "ivec2", "ivec3", "ivec4"); + } + else if (name == "op_or") + { + UseExtraFunctionDependency("op_modi"); + + bformata(code, "int op_or(int a, int b) { int result = 0; int n = 1; for (int i = 0; i < BITWISE_BIT_COUNT; i++) { if ((op_modi(a, 2) == 1) || (op_modi(b, 2) == 1)) { result += n; } a = a / 2; b = b / 2; n = n * 2; if (!(a > 0 || b > 0)) { break; } } return result; }\n"); + PrintComponentWrapper2(code, "op_or", "ivec2", "ivec3", "ivec4"); + } + else if (name == "op_xor") + { + UseExtraFunctionDependency("op_and"); + + bformata(code, "int op_xor(int a, int b) { return (a + b - 2 * op_and(a, b)); }\n"); + PrintComponentWrapper2(code, "op_xor", "ivec2", "ivec3", "ivec4"); + } + else if (name == "op_shr") + { + bformata(code, "int op_shr(int a, int b) { return int(floor(float(a) / pow(2.0, float(b)))); }\n"); + PrintComponentWrapper2(code, "op_shr", "ivec2", "ivec3", "ivec4"); + } + else if (name == "op_shl") + { + bformata(code, "int op_shl(int a, int b) { return int(floor(float(a) * pow(2.0, float(b)))); }\n"); + PrintComponentWrapper2(code, "op_shl", "ivec2", "ivec3", "ivec4"); + } + else if (name == "op_not") + { + bformata(code, "int op_not(int value) { return -value - 1; }\n"); + PrintComponentWrapper1(code, "op_not", "ivec2", "ivec3", "ivec4"); + } + else + { + match = false; + } + + if (match) + DeclareExtraFunction(name, code); + + bdestroy(code); +} + void ToGLSL::DeclareSpecializationConstants(ShaderPhase &phase) { bstring glsl = psContext->glsl; @@ -933,7 +1128,7 @@ static void Base64Encode(const std::string &in, std::string& result) } -void ToGLSL::BuildStaticBranchNameForInstruction(Instruction &inst) +bool ToGLSL::BuildStaticBranchNameForInstruction(Instruction &inst) { std::ostringstream oss; if (!inst.m_StaticBranchCondition) @@ -966,7 +1161,13 @@ void ToGLSL::BuildStaticBranchNameForInstruction(Instruction &inst) bcstrfree(str); bdestroy(varname); oss << "!=0"; - Base64Encode(oss.str(), inst.m_StaticBranchName); + std::string res = oss.str(); + // Sanity checks: no arrays, no matrices + if (res.find('[') != std::string::npos) + return false; + if (res.find("hlslcc_mtx") != std::string::npos) + return false; + Base64Encode(res, inst.m_StaticBranchName); } else { @@ -1012,9 +1213,16 @@ void ToGLSL::BuildStaticBranchNameForInstruction(Instruction &inst) bdestroy(res); if(argType != SVT_BOOL) oss << "!=0"; - Base64Encode(oss.str(), inst.m_StaticBranchName); + std::string ress = oss.str(); + // Sanity checks: no arrays, no matrices + if (ress.find('[') != std::string::npos) + return false; + if (ress.find("hlslcc_mtx") != std::string::npos) + return false; + Base64Encode(ress, inst.m_StaticBranchName); } + return true; } @@ -1030,10 +1238,12 @@ void ToGLSL::IdentifyStaticBranches(ShaderPhase *psPhase) // Simple case, direct conditional branch if (i.asOperands[0].eType == OPERAND_TYPE_CONSTANT_BUFFER) { - psPhase->m_StaticBranchInstructions.push_back(&i); - i.m_IsStaticBranch = true; i.m_StaticBranchCondition = NULL; - BuildStaticBranchNameForInstruction(i); + if (BuildStaticBranchNameForInstruction(i)) + { + psPhase->m_StaticBranchInstructions.push_back(&i); + i.m_IsStaticBranch = true; + } } // Indirect, comparison via another instruction if (i.asOperands[0].eType == OPERAND_TYPE_TEMP) @@ -1065,10 +1275,14 @@ void ToGLSL::IdentifyStaticBranches(ShaderPhase *psPhase) } if (isStatic) { - psPhase->m_StaticBranchInstructions.push_back(&i); - i.m_IsStaticBranch = true; i.m_StaticBranchCondition = &def; - BuildStaticBranchNameForInstruction(i); + if (BuildStaticBranchNameForInstruction(i)) + { + psPhase->m_StaticBranchInstructions.push_back(&i); + i.m_IsStaticBranch = true; + } + else + i.m_StaticBranchCondition = NULL; } } } diff --git a/src/toGLSLDeclaration.cpp b/src/toGLSLDeclaration.cpp index 7fbde17..3130f61 100644 --- a/src/toGLSLDeclaration.cpp +++ b/src/toGLSLDeclaration.cpp @@ -14,6 +14,7 @@ #include #include #include "internal_includes/toGLSL.h" +#include "UnityInstancingFlexibleArraySize.h" using namespace HLSLcc; @@ -25,18 +26,22 @@ using namespace HLSLcc; #endif #endif // #ifndef fpcheck -static void DeclareConstBufferShaderVariable(const HLSLCrossCompilerContext *psContext, const char* Name, const struct ShaderVarType* psType, int unsizedArray, bool addUniformPrefix = false) - //const SHADER_VARIABLE_CLASS eClass, const SHADER_VARIABLE_TYPE eType, - //const char* pszName) +void ToGLSL::DeclareConstBufferShaderVariable(const char* varName, const struct ShaderVarType* psType, const struct ConstantBuffer* psCBuf, int unsizedArray, bool addUniformPrefix) { bstring glsl = *psContext->currentGLSLString; if (psType->Class == SVC_STRUCT) { - bformata(glsl, "\t%s%s_Type %s", addUniformPrefix ? "UNITY_UNIFORM " : "", Name, Name); + bformata(glsl, "\t%s%s_Type %s", addUniformPrefix ? "UNITY_UNIFORM " : "", varName, varName); if (psType->Elements > 1) { - bformata(glsl, "[%d]", psType->Elements); + if (HLSLcc::IsUnityFlexibleInstancingBuffer(psCBuf)) + { + bformata(glsl, "[" UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "]"); + m_NeedUnityInstancingArraySizeDecl = true; + } + else + bformata(glsl, "[%d]", psType->Elements); } } else if(psType->Class == SVC_MATRIX_COLUMNS || psType->Class == SVC_MATRIX_ROWS) @@ -44,7 +49,7 @@ static void DeclareConstBufferShaderVariable(const HLSLCrossCompilerContext *psC if (psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) { // Translate matrices into vec4 arrays - bformata(glsl, "\t%s%s " HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "%s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, 4), psType->Rows, psType->Columns, Name); + bformata(glsl, "\t%s%s " HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "%s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, 4), psType->Rows, psType->Columns, varName); uint32_t elemCount = (psType->Class == SVC_MATRIX_COLUMNS ? psType->Columns : psType->Rows); if (psType->Elements > 1) { @@ -54,7 +59,7 @@ static void DeclareConstBufferShaderVariable(const HLSLCrossCompilerContext *psC } else { - bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetMatrixTypeName(psContext, psType->Type, psType->Columns, psType->Rows).c_str(), Name); + bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetMatrixTypeName(psContext, psType->Type, psType->Columns, psType->Rows).c_str(), varName); if (psType->Elements > 1) { bformata(glsl, "[%d]", psType->Elements); @@ -64,7 +69,7 @@ static void DeclareConstBufferShaderVariable(const HLSLCrossCompilerContext *psC else if (psType->Class == SVC_VECTOR && psType->Columns > 1) { - bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, psType->Columns), Name); + bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, psType->Columns), varName); if(psType->Elements > 1) { @@ -83,7 +88,7 @@ static void DeclareConstBufferShaderVariable(const HLSLCrossCompilerContext *psC ((ShaderVarType *)psType)->Type = SVT_INT; } - bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, 1), Name); + bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, 1), varName); if(psType->Elements > 1) { @@ -96,7 +101,7 @@ static void DeclareConstBufferShaderVariable(const HLSLCrossCompilerContext *psC } //In GLSL embedded structure definitions are not supported. -static void PreDeclareStructType(const HLSLCrossCompilerContext *psContext, const std::string &name, const struct ShaderVarType* psType) +void ToGLSL::PreDeclareStructType(const std::string &name, const struct ShaderVarType* psType) { bstring glsl = *psContext->currentGLSLString; uint32_t i; @@ -105,7 +110,7 @@ static void PreDeclareStructType(const HLSLCrossCompilerContext *psContext, cons { if(psType->Members[i].Class == SVC_STRUCT) { - PreDeclareStructType(psContext, psType->Members[i].name, &psType->Members[i]); + PreDeclareStructType(psType->Members[i].name, &psType->Members[i]); } } @@ -120,7 +125,7 @@ static void PreDeclareStructType(const HLSLCrossCompilerContext *psContext, cons { ASSERT(psType->Members.size() != 0); - DeclareConstBufferShaderVariable(psContext, psType->Members[i].name.c_str(), &psType->Members[i], 0); + DeclareConstBufferShaderVariable(psType->Members[i].name.c_str(), &psType->Members[i], NULL, 0); } bformata(glsl, "};\n"); @@ -261,7 +266,7 @@ static void DeclareInput( } psShader->acInputDeclared[regSpace][ui32Reg] = (char)psSig->ui32Mask; - + // Do the reflection report on vertex shader inputs if (psShader->eShaderType == VERTEX_SHADER) { @@ -280,16 +285,16 @@ static void DeclareInput( psContext->psShader->abScalarInput[regSpace][regNum] |= (int)ui32CompMask; if(psShader->eShaderType == HULL_SHADER || psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT_CONTROL_POINT) - bformata(glsl, "%s%s %s %s %s [];\n", locationQualifier.c_str(), StorageQualifier, Precision, scalarType, InputName); + bformata(glsl, "%s%s%s %s %s %s [];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, scalarType, InputName); else - bformata(glsl, "%s%s %s %s %s [%d];\n", locationQualifier.c_str(), StorageQualifier, Precision, scalarType, InputName, arraySize); + bformata(glsl, "%s%s%s %s %s %s [%d];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, scalarType, InputName, arraySize); } else { if (psShader->eShaderType == HULL_SHADER || psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT_CONTROL_POINT) - bformata(glsl, "%s%s %s %s%d %s [];\n", locationQualifier.c_str(), StorageQualifier, Precision, vecType, iNumComponents, InputName); + bformata(glsl, "%s%s%s %s %s%d %s [];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); else - bformata(glsl, "%s%s %s %s%d %s [%d];\n", locationQualifier.c_str(), StorageQualifier, Precision, vecType, iNumComponents, InputName, + bformata(glsl, "%s%s%s %s %s%d %s [%d];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName, psDecl->asOperands[0].aui32ArraySizes[0]); } break; @@ -326,19 +331,35 @@ static void DeclareInput( } } -static void AddBuiltinInput(HLSLCrossCompilerContext* psContext, const Declaration* psDecl, const char* builtinName) +bool ToGLSL::RenderTargetDeclared(uint32_t input) { - // Nothing to do currently as we read from builtins directly. + if (m_DeclaredRenderTarget.find(input) != m_DeclaredRenderTarget.end()) + return true; + + m_DeclaredRenderTarget.insert(input); + return false; } +void ToGLSL::AddBuiltinInput(const Declaration* psDecl, const char* builtinName) +{ + Shader* psShader = psContext->psShader; const Operand* op = &psDecl->asOperands[0]; + + const int regSpace = op->GetRegisterSpace(psContext); ASSERT(regSpace == 0); + const uint32_t ui32Reg = op->ui32RegisterNumber, ui32CompMask = op->ui32CompMask; + + // we need to at least mark if they are scalars or not (as we might need to use vector ctor) + if(op->GetNumInputElements(psContext) == 1) + psShader->abScalarInput[regSpace][ui32Reg] |= (int)ui32CompMask; +} void ToGLSL::AddBuiltinOutput(const Declaration* psDecl, int arrayElements, const char* builtinName) { bstring glsl = *psContext->currentGLSLString; Shader* psShader = psContext->psShader; + const SPECIAL_NAME eSpecialName = psDecl->asOperands[0].eSpecialName; - if (psDecl->asOperands[0].eSpecialName != NAME_CLIP_DISTANCE) + if (eSpecialName != NAME_CLIP_DISTANCE && eSpecialName != NAME_CULL_DISTANCE) return; psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1; @@ -359,87 +380,66 @@ void ToGLSL::AddBuiltinOutput(const Declaration* psDecl, int arrayElements, cons { } - else + else if((eSpecialName == NAME_CLIP_DISTANCE || eSpecialName == NAME_CULL_DISTANCE) && psContext->psShader->eShaderType != HULL_SHADER) { - // Case 828454 : For some reason DX compiler seems to inject clip distance declaration to the hull shader sometimes + // Case 828454 : For some reason DX compiler seems to inject clip/cull distance declaration to the hull shader sometimes // even though it's not used at all, and overlaps some completely unrelated patch constant declarations. We'll just ignore this now. // Revisit this if this actually pops up elsewhere. - if(psDecl->asOperands[0].eSpecialName == NAME_CLIP_DISTANCE && psContext->psShader->eShaderType != HULL_SHADER) + + // cull/clip distance are pretty similar (the only real difference is extension name (and functionality, but we dont care here)) + int max = psDecl->asOperands[0].GetMaxComponent(); + + if (IsESLanguage(psShader->eTargetLanguage)) + psContext->RequireExtension("GL_EXT_clip_cull_distance"); + else if(eSpecialName == NAME_CULL_DISTANCE) + psContext->RequireExtension("GL_ARB_cull_distance"); // TODO: it is builtin in GLSL 4.5 (should we care?) + const char* glName = eSpecialName == NAME_CLIP_DISTANCE ? "Clip" : "Cull"; + + int applySwizzle = psDecl->asOperands[0].GetNumSwizzleElements() > 1 ? 1 : 0; + const char* swizzle[] = {".x", ".y", ".z", ".w"}; + + ASSERT(psSignature!=NULL); + const int index = psSignature->ui32SemanticIndex; + + //Clip/Cull distance can be spread across 1 or 2 outputs (each no more than a vec4). + //Some examples: + //float4 clip[2] : SV_ClipDistance; //8 clip distances + //float3 clip[2] : SV_ClipDistance; //6 clip distances + //float4 clip : SV_ClipDistance; //4 clip distances + //float clip : SV_ClipDistance; //1 clip distance. + + //In GLSL the clip/cull distance built-in is an array of up to 8 floats. + //So vector to array conversion needs to be done here. + int multiplier = 1; + if(index == 1) { - int max = psDecl->asOperands[0].GetMaxComponent(); - - if (IsESLanguage(psShader->eTargetLanguage)) + const ShaderInfo::InOutSignature* psFirstClipSignature; + if (psShader->sInfo.GetOutputSignatureFromSystemValue(eSpecialName, 1, &psFirstClipSignature)) { - psContext->RequireExtension("GL_EXT_clip_cull_distance"); - } - - int applySwizzle = psDecl->asOperands[0].GetNumSwizzleElements() > 1 ? 1 : 0; - int index; - int i; - int multiplier = 1; - const char* swizzle[] = {".x", ".y", ".z", ".w"}; - - ASSERT(psSignature!=NULL); - - index = psSignature->ui32SemanticIndex; - - //Clip distance can be spread across 1 or 2 outputs (each no more than a vec4). - //Some examples: - //float4 clip[2] : SV_ClipDistance; //8 clip distances - //float3 clip[2] : SV_ClipDistance; //6 clip distances - //float4 clip : SV_ClipDistance; //4 clip distances - //float clip : SV_ClipDistance; //1 clip distance. - - //In GLSL the clip distance built-in is an array of up to 8 floats. - //So vector to array conversion needs to be done here. - if(index == 1) - { - const ShaderInfo::InOutSignature* psFirstClipSignature; - if (psShader->sInfo.GetOutputSignatureFromSystemValue(NAME_CLIP_DISTANCE, 1, &psFirstClipSignature)) - { - if(psFirstClipSignature->ui32Mask & (1 << 3)) - { - multiplier = 4; - } - else - if(psFirstClipSignature->ui32Mask & (1 << 2)) - { - multiplier = 3; - } - else - if(psFirstClipSignature->ui32Mask & (1 << 1)) - { - multiplier = 2; - } - } - } - - // Add a specially crafted comment so runtime knows to enable clip planes. - // We may end up doing 2 of these, so at runtime OR the results - uint32_t clipmask = psDecl->asOperands[0].GetAccessMask(); - if(index != 0) - clipmask <<= multiplier; - bformata(psContext->glsl, "// HLSLcc_ClipDistances_%x\n", clipmask); - - psContext->psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psSignature->ui32Register] = 0xff; - bformata(psContext->glsl, "vec4 phase%d_glClipDistance%d;\n", psContext->currentPhase, index); - - for(i=0; iAddIndentation(); - bformata(glsl, "%s[%d] = (", builtinName, i + multiplier*index); - TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); - if(applySwizzle) - { - bformata(glsl, ")%s;\n", swizzle[i]); - } - else - { - bformata(glsl, ");\n"); - } + if(psFirstClipSignature->ui32Mask & (1 << 3)) multiplier = 4; + else if(psFirstClipSignature->ui32Mask & (1 << 2)) multiplier = 3; + else if(psFirstClipSignature->ui32Mask & (1 << 1)) multiplier = 2; } } + // Add a specially crafted comment so runtime knows to enable clip planes. + // We may end up doing 2 of these, so at runtime OR the results + uint32_t clipmask = psDecl->asOperands[0].GetAccessMask(); + if(index != 0) + clipmask <<= multiplier; + bformata(psContext->glsl, "// HLSLcc_%sDistances_%x\n", glName, clipmask); + + psContext->psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psSignature->ui32Register] = 0xff; + bformata(psContext->glsl, "vec4 phase%d_gl%sDistance%d;\n", psContext->currentPhase, glName, index); + + for(int i=0; iAddIndentation(); + bformata(glsl, "%s[%d] = (", builtinName, i + multiplier*index); + TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); + if(applySwizzle) bformata(glsl, ")%s;\n", swizzle[i]); + else bformata(glsl, ");\n"); + } } psContext->indent--; psContext->currentGLSLString = &psContext->glsl; @@ -508,12 +508,12 @@ void ToGLSL::HandleOutputRedirect(const Declaration *psDecl, const char *Precisi if (psSig->eComponentType == INOUT_COMPONENT_SINT32) { - bformata(psPhase->postShaderCode, "floatBitsToInt("); + bformata(psPhase->postShaderCode, HaveBitEncodingOps(psContext->psShader->eTargetLanguage) ? "floatBitsToInt(" : "int("); hasCast = 1; } else if (psSig->eComponentType == INOUT_COMPONENT_UINT32) { - bformata(psPhase->postShaderCode, "floatBitsToUint("); + bformata(psPhase->postShaderCode, HaveBitEncodingOps(psContext->psShader->eTargetLanguage) ? "floatBitsToUint(" : "int("); hasCast = 1; } bformata(psPhase->postShaderCode, "phase%d_Output%d_%d.", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); @@ -545,7 +545,6 @@ void ToGLSL::HandleOutputRedirect(const Declaration *psDecl, const char *Precisi void ToGLSL::AddUserOutput(const Declaration* psDecl) { bstring glsl = *psContext->currentGLSLString; - bstring extensions = psContext->extensions; Shader* psShader = psContext->psShader; if(psContext->OutputNeedsDeclaring(&psDecl->asOperands[0], 1)) @@ -570,7 +569,7 @@ void ToGLSL::AddUserOutput(const Declaration* psDecl) if (psSignature->semanticName == "POS" && psOperand->ui32RegisterNumber == 0 && psContext->psShader->eShaderType == VERTEX_SHADER) return; - + iNumComponents = GetNumberBitsSet(psSignature->ui32Mask); if (iNumComponents == 1) psContext->psShader->abScalarOutput[regSpace][ui32Reg] |= (int)psDecl->asOperands[0].ui32CompMask; @@ -622,7 +621,7 @@ void ToGLSL::AddUserOutput(const Declaration* psDecl) } case OPERAND_MIN_PRECISION_FLOAT_2_8: { - Precision = "lowp "; + Precision = EmitLowp(psContext) ? "lowp " : "mediump "; break; } case OPERAND_MIN_PRECISION_SINT_16: @@ -647,16 +646,22 @@ void ToGLSL::AddUserOutput(const Declaration* psDecl) switch(psDecl->asOperands[0].eType) { case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + { + break; + } case OPERAND_TYPE_OUTPUT_DEPTH: { - + if (psShader->eTargetLanguage == LANG_ES_100 && !psContext->EnableExtension("GL_EXT_frag_depth")) + { + bcatcstr(psContext->extensions, "#ifdef GL_EXT_frag_depth\n"); + bcatcstr(psContext->extensions, "#define gl_FragDepth gl_FragDepthEXT\n"); + bcatcstr(psContext->extensions, "#endif\n"); + } break; } case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: { - bcatcstr(extensions, "#ifdef GL_ARB_conservative_depth\n"); - bcatcstr(extensions, "#extension GL_ARB_conservative_depth : enable\n"); - bcatcstr(extensions, "#endif\n"); + psContext->EnableExtension("GL_ARB_conservative_depth"); bcatcstr(glsl, "#ifdef GL_ARB_conservative_depth\n"); bcatcstr(glsl, "layout (depth_greater) out float gl_FragDepth;\n"); bcatcstr(glsl, "#endif\n"); @@ -664,9 +669,7 @@ void ToGLSL::AddUserOutput(const Declaration* psDecl) } case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: { - bcatcstr(extensions, "#ifdef GL_ARB_conservative_depth\n"); - bcatcstr(extensions, "#extension GL_ARB_conservative_depth : enable\n"); - bcatcstr(extensions, "#endif\n"); + psContext->EnableExtension("GL_ARB_conservative_depth"); bcatcstr(glsl, "#ifdef GL_ARB_conservative_depth\n"); bcatcstr(glsl, "layout (depth_less) out float gl_FragDepth;\n"); bcatcstr(glsl, "#endif\n"); @@ -674,55 +677,62 @@ void ToGLSL::AddUserOutput(const Declaration* psDecl) } default: { + uint32_t renderTarget = psDecl->asOperands[0].ui32RegisterNumber; + + char OutputName[512]; + bstring oname; + oname = bformat("%s%s%d", psContext->outputPrefix, psSignature->semanticName.c_str(), renderTarget); + strncpy(OutputName, (char *)oname->data, 512); + bdestroy(oname); + + if (psShader->eTargetLanguage == LANG_ES_100 && renderTarget > 0) + psContext->EnableExtension("GL_EXT_draw_buffers"); + + bool haveFramebufferFetch = (psShader->extensions->EXT_shader_framebuffer_fetch && + psShader->eShaderType == PIXEL_SHADER && + psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH); + if(WriteToFragData(psContext->psShader->eTargetLanguage)) { - bformata(glsl, "#define Output%d gl_FragData[%d]\n", psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].ui32RegisterNumber); + bformata(glsl, "#define %s gl_FragData[%d]\n", OutputName, renderTarget); } else { - char OutputName[512]; - bstring oname; - oname = bformat("%s%s%d", psContext->outputPrefix, psSignature->semanticName.c_str(), psSignature->ui32SemanticIndex); - strncpy(OutputName, (char *)oname->data, 512); - bdestroy(oname); - - bstring layoutQualifier = bformat(""); - bool haveFramebufferFetch = (psShader->extensions->EXT_shader_framebuffer_fetch && - psShader->eShaderType == PIXEL_SHADER && - psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH); - if (haveFramebufferFetch) - bcatcstr(glsl, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); - - if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage) || - HaveLimitedInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) + if (!RenderTargetDeclared(renderTarget)) { - uint32_t index = 0; - uint32_t renderTarget = psDecl->asOperands[0].ui32RegisterNumber; + bstring layoutQualifier = bformat(""); - if((psContext->flags & HLSLCC_FLAG_DUAL_SOURCE_BLENDING) && DualSourceBlendSupported(psContext->psShader->eTargetLanguage)) + if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage) || + HaveLimitedInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) { - if(renderTarget > 0) + uint32_t index = 0; + + if((psContext->flags & HLSLCC_FLAG_DUAL_SOURCE_BLENDING) && DualSourceBlendSupported(psContext->psShader->eTargetLanguage)) { - renderTarget = 0; - index = 1; + if(renderTarget > 0) + { + renderTarget = 0; + index = 1; + } + layoutQualifier = bformat("layout(location = %d, index = %d) ", renderTarget, index); } - layoutQualifier = bformat("layout(location = %d, index = %d) ", renderTarget, index); + else + { + layoutQualifier = bformat("layout(location = %d) ", renderTarget); + } + } + + if (haveFramebufferFetch) + { + bcatcstr(glsl, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); + bformata(glsl, "%sinout %s%s %s;\n", bstr2cstr(layoutQualifier, '\0'), Precision, type->data, OutputName); + bcatcstr(glsl, "#else\n"); + bformata(glsl, "%sout %s%s %s;\n", bstr2cstr(layoutQualifier, '\0'), Precision, type->data, OutputName); + bcatcstr(glsl, "#endif\n"); } else - { - layoutQualifier = bformat("layout(location = %d) ", renderTarget); - } + bformata(glsl, "%sout %s%s %s;\n", bstr2cstr(layoutQualifier, '\0'), Precision, type->data, OutputName); } - - if (haveFramebufferFetch) - { - bformata(glsl, "%sinout %s%s %s;\n", bstr2cstr(layoutQualifier, '\0'), Precision, type->data, OutputName); - bcatcstr(glsl, "#else\n"); - bformata(glsl, "%sout %s%s %s;\n", bstr2cstr(layoutQualifier, '\0'), Precision, type->data, OutputName); - bcatcstr(glsl, "#endif\n"); - } - else - bformata(glsl, "%sout %s%s %s;\n", bstr2cstr(layoutQualifier, '\0'), Precision, type->data, OutputName); } break; } @@ -743,7 +753,7 @@ void ToGLSL::AddUserOutput(const Declaration* psDecl) if (psShader->eShaderType == VERTEX_SHADER) { - if (psSignature->eComponentType == INOUT_COMPONENT_UINT32 || + if (psSignature->eComponentType == INOUT_COMPONENT_UINT32 || psSignature->eComponentType == INOUT_COMPONENT_SINT32) // GLSL spec requires that integer vertex outputs always have "flat" interpolation { Interpolation = GetInterpolationString(INTERPOLATION_CONSTANT, psContext->psShader->eTargetLanguage); @@ -782,7 +792,7 @@ void ToGLSL::AddUserOutput(const Declaration* psDecl) default: ASSERT(0); break; - + } HandleOutputRedirect(psDecl, Precision); bdestroy(type); @@ -790,20 +800,18 @@ void ToGLSL::AddUserOutput(const Declaration* psDecl) } -static void DeclareUBOConstants(HLSLCrossCompilerContext* psContext, const uint32_t ui32BindingPoint, - const ConstantBuffer* psCBuf, - bstring glsl) +void ToGLSL::DeclareUBOConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, bstring glsl) { uint32_t i; bool skipUnused = false; - + if((psContext->flags & HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS) && psCBuf->name == "$Globals") skipUnused = true; - - - std::string Name = psCBuf->name; - if(Name == "$Globals") + + + std::string cbName = psCBuf->name; + if(cbName == "$Globals") { // Need to tweak Globals struct name to prevent clashes between shader stages char prefix = 'A'; @@ -832,17 +840,15 @@ static void DeclareUBOConstants(HLSLCrossCompilerContext* psContext, const uint3 break; } - Name[0] = prefix; + cbName[0] = prefix; } for(i=0; i < psCBuf->asVars.size(); ++i) { if(skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) continue; - - PreDeclareStructType(psContext, - psCBuf->asVars[i].name, - &psCBuf->asVars[i].sType); + + PreDeclareStructType(psCBuf->asVars[i].name, &psCBuf->asVars[i].sType); } if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) @@ -851,7 +857,7 @@ static void DeclareUBOConstants(HLSLCrossCompilerContext* psContext, const uint3 /* [layout (location = X)] uniform vec4 HLSLConstantBufferName[numConsts]; */ if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) { - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(Name, false, 1); + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(cbName, false, 1); bformata(glsl, "layout(set = %d, binding = %d, std140) ", binding.first, binding.second); } else @@ -862,7 +868,7 @@ static void DeclareUBOConstants(HLSLCrossCompilerContext* psContext, const uint3 bcatcstr(glsl, "layout(std140) "); } - bformata(glsl, "uniform %s {\n", Name.c_str()); + bformata(glsl, "uniform %s {\n", cbName.c_str()); if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) bformata(glsl, "#else\n#define UNITY_UNIFORM uniform\n#endif\n"); @@ -871,10 +877,9 @@ static void DeclareUBOConstants(HLSLCrossCompilerContext* psContext, const uint3 { if(skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) continue; - - DeclareConstBufferShaderVariable(psContext, - psCBuf->asVars[i].name.c_str(), - &psCBuf->asVars[i].sType, 0, psContext->flags & HLSLCC_FLAG_WRAP_UBO ? true : false); + + DeclareConstBufferShaderVariable(psCBuf->asVars[i].name.c_str(), + &psCBuf->asVars[i].sType, psCBuf, 0, psContext->flags & HLSLCC_FLAG_WRAP_UBO ? true : false); } if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) @@ -936,7 +941,12 @@ static void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, uint32_t bformata(glsl, "coherent uint %s_counter;\n\t", BufName.c_str()); if (isRaw) - bcatcstr(glsl, "uint"); + { + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, "uint"); + else + bcatcstr(glsl, "int"); + } else bformata(glsl, "%s_type", BufName.c_str()); @@ -961,16 +971,14 @@ void ToGLSL::DeclareStructConstants(const uint32_t ui32BindingPoint, if ((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT) == 0) useGlobalsStruct = 0; - - + + for(i=0; i < psCBuf->asVars.size(); ++i) { if(skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) continue; - - PreDeclareStructType(psContext, - psCBuf->asVars[i].name, - &psCBuf->asVars[i].sType); + + PreDeclareStructType(psCBuf->asVars[i].name, &psCBuf->asVars[i].sType); } /* [layout (location = X)] uniform vec4 HLSLConstantBufferName[numConsts]; */ @@ -998,13 +1006,11 @@ void ToGLSL::DeclareStructConstants(const uint32_t ui32BindingPoint, { if(skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) continue; - + if(!useGlobalsStruct) bcatcstr(glsl, "uniform "); - DeclareConstBufferShaderVariable(psContext, - psCBuf->asVars[i].name.c_str(), - &psCBuf->asVars[i].sType, 0); + DeclareConstBufferShaderVariable(psCBuf->asVars[i].name.c_str(), &psCBuf->asVars[i].sType, psCBuf, 0); } if(useGlobalsStruct) @@ -1017,7 +1023,170 @@ void ToGLSL::DeclareStructConstants(const uint32_t ui32BindingPoint, } } -static const char* GetSamplerType(HLSLCrossCompilerContext* psContext, +static const char* GetVulkanTextureType(HLSLCrossCompilerContext* psContext, + const RESOURCE_DIMENSION eDimension, + const uint32_t ui32RegisterNumber) +{ + const ResourceBinding* psBinding = 0; + RESOURCE_RETURN_TYPE eType = RETURN_TYPE_UNORM; + int found; + found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32RegisterNumber, &psBinding); + if (found) + { + eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; + } + switch (eDimension) + { + case RESOURCE_DIMENSION_BUFFER: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itextureBuffer"; + case RETURN_TYPE_UINT: + return "utextureBuffer"; + default: + return "textureBuffer"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE1D: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture1D"; + case RETURN_TYPE_UINT: + return "utexture1D"; + default: + return "texture1D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2D: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture2D"; + case RETURN_TYPE_UINT: + return "utexture2D"; + default: + return "texture2D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMS: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture2DMS"; + case RETURN_TYPE_UINT: + return "utexture2DMS"; + default: + return "texture2DMS"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE3D: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture3D"; + case RETURN_TYPE_UINT: + return "utexture3D"; + default: + return "texture3D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURECUBE: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itextureCube"; + case RETURN_TYPE_UINT: + return "utextureCube"; + default: + return "textureCube"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture1DArray"; + case RETURN_TYPE_UINT: + return "utexture1DArray"; + default: + return "texture1DArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture2DArray"; + case RETURN_TYPE_UINT: + return "utexture2DArray"; + default: + return "texture2DArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture2DMSArray"; + case RETURN_TYPE_UINT: + return "utexture2DMSArray"; + default: + return "texture2DMSArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itextureCubeArray"; + case RETURN_TYPE_UINT: + return "utextureCubeArray"; + default: + return "textureCubeArray"; + } + break; + } + default: + ASSERT(0); + break; + + } + + return "texture2D"; +} + +// Not static because this is used in toGLSLInstruction.cpp when sampling Vulkan textures +const char* GetSamplerType(HLSLCrossCompilerContext* psContext, const RESOURCE_DIMENSION eDimension, const uint32_t ui32RegisterNumber) { @@ -1191,13 +1360,39 @@ static const char *GetSamplerPrecision(const HLSLCrossCompilerContext *psContext default: case REFLECT_RESOURCE_PRECISION_UNKNOWN: case REFLECT_RESOURCE_PRECISION_LOWP: - return "lowp "; + return EmitLowp(psContext) ? "lowp " : "mediump "; case REFLECT_RESOURCE_PRECISION_HIGHP: return "highp "; case REFLECT_RESOURCE_PRECISION_MEDIUMP: return "mediump "; } } +static void TranslateVulkanResource(HLSLCrossCompilerContext* psContext, const Declaration* psDecl) +{ + bstring glsl = *psContext->currentGLSLString; + Shader* psShader = psContext->psShader; + + const ResourceBinding *psBinding = NULL; + psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); + ASSERT(psBinding != NULL); + + const char *samplerPrecision = GetSamplerPrecision(psContext, psBinding ? psBinding->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN); + std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 0); + + const char* samplerTypeName = GetVulkanTextureType(psContext, + psDecl->value.eResourceDimension, + psDecl->asOperands[0].ui32RegisterNumber); + + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(tname); + bformata(glsl, "layout(set = %d, binding = %d) ", binding.first, binding.second); + bcatcstr(glsl, "uniform "); + bcatcstr(glsl, samplerPrecision); + bcatcstr(glsl, samplerTypeName); + bcatcstr(glsl, " "); + bcatcstr(glsl, tname.c_str()); + bcatcstr(glsl, ";\n"); + +} static void TranslateResourceTexture(HLSLCrossCompilerContext* psContext, const Declaration* psDecl, uint32_t samplerCanDoShadowCmp) { @@ -1216,13 +1411,18 @@ static void TranslateResourceTexture(HLSLCrossCompilerContext* psContext, const // Need to enable extension (either OES or ARB), but we only need to add it once if (IsESLanguage(psContext->psShader->eTargetLanguage)) { - psContext->RequireExtension("GL_OES_texture_cube_map_array"); - psContext->RequireExtension("GL_EXT_texture_cube_map_array"); + psContext->EnableExtension("GL_OES_texture_cube_map_array"); + psContext->EnableExtension("GL_EXT_texture_cube_map_array"); } else psContext->RequireExtension("GL_ARB_texture_cube_map_array"); } + if (psContext->psShader->eTargetLanguage == LANG_ES_100 && samplerCanDoShadowCmp && psDecl->ui32IsShadowTex) + { + psContext->EnableExtension("GL_EXT_shadow_samplers"); + } + const ResourceBinding *psBinding = NULL; psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); ASSERT(psBinding != NULL); @@ -1236,11 +1436,6 @@ static void TranslateResourceTexture(HLSLCrossCompilerContext* psContext, const for (i = psDecl->samplersUsed.begin(); i != psDecl->samplersUsed.end(); i++) { std::string tname = TextureSamplerName(&psShader->sInfo, psDecl->asOperands[0].ui32RegisterNumber, *i, 1); - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) - { - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(tname); - bformata(glsl, "layout(set = %d, binding = %d) ", binding.first, binding.second); - } bcatcstr(glsl, "uniform "); bcatcstr(glsl, samplerPrecision); bcatcstr(glsl, samplerTypeName); @@ -1252,11 +1447,6 @@ static void TranslateResourceTexture(HLSLCrossCompilerContext* psContext, const for (i = psDecl->samplersUsed.begin(); i != psDecl->samplersUsed.end(); i++) { std::string tname = TextureSamplerName(&psShader->sInfo, psDecl->asOperands[0].ui32RegisterNumber, *i, 0); - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) - { - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(tname); - bformata(glsl, "layout(set = %d, binding = %d) ", binding.first, binding.second); - } bcatcstr(glsl, "uniform "); bcatcstr(glsl, samplerPrecision); bcatcstr(glsl, samplerTypeName); @@ -1272,12 +1462,6 @@ static void TranslateResourceTexture(HLSLCrossCompilerContext* psContext, const //HLSL does not have separate types for depth compare, just different functions. std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 1); - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) - { - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(tname); - bformata(glsl, "layout(set = %d, binding = %d) ", binding.first, binding.second); - } - bcatcstr(glsl, "uniform "); bcatcstr(glsl, samplerPrecision); bcatcstr(glsl, samplerTypeName); @@ -1288,11 +1472,6 @@ static void TranslateResourceTexture(HLSLCrossCompilerContext* psContext, const std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 0); - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) - { - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(tname); - bformata(glsl, "layout(set = %d, binding = %d) ", binding.first, binding.second); - } bcatcstr(glsl, "uniform "); bcatcstr(glsl, samplerPrecision); bcatcstr(glsl, samplerTypeName); @@ -1349,7 +1528,7 @@ void ToGLSL::HandleInputRedirect(const Declaration *psDecl, const char *Precisio psContext->indent++; // Do a conditional loop. In normal cases needsLooping == 0 so this is only run once. - do + do { int comp = 0; psContext->AddIndentation(); @@ -1373,12 +1552,12 @@ void ToGLSL::HandleInputRedirect(const Declaration *psDecl, const char *Precisio numComps = GetNumberBitsSet(psSig->ui32Mask); if (psSig->eComponentType == INOUT_COMPONENT_SINT32) { - bformata(psPhase->earlyMain, "intBitsToFloat("); + bformata(psPhase->earlyMain, HaveBitEncodingOps(psContext->psShader->eTargetLanguage) ? "intBitsToFloat(" : "float("); hasCast = 1; } else if (psSig->eComponentType == INOUT_COMPONENT_UINT32) { - bformata(psPhase->earlyMain, "uintBitsToFloat("); + bformata(psPhase->earlyMain, HaveBitEncodingOps(psContext->psShader->eTargetLanguage) ? "uintBitsToFloat(" : "float("); hasCast = 1; } @@ -1426,7 +1605,6 @@ void ToGLSL::HandleInputRedirect(const Declaration *psDecl, const char *Precisio void ToGLSL::TranslateDeclaration(const Declaration* psDecl) { bstring glsl = *psContext->currentGLSLString; - bstring extensions = psContext->extensions; Shader* psShader = psContext->psShader; switch(psDecl->eOpcode) @@ -1439,27 +1617,37 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) { case NAME_POSITION: { - AddBuiltinInput(psContext, psDecl, "gl_Position"); + AddBuiltinInput(psDecl, "gl_Position"); break; } case NAME_RENDER_TARGET_ARRAY_INDEX: { - AddBuiltinInput(psContext, psDecl, "gl_Layer"); + AddBuiltinInput(psDecl, "gl_Layer"); + if (psShader->eShaderType == VERTEX_SHADER) + { + psContext->RequireExtension("GL_AMD_vertex_shader_layer"); + } + break; } case NAME_CLIP_DISTANCE: { - AddBuiltinInput(psContext, psDecl, "gl_ClipDistance"); + AddBuiltinInput(psDecl, "gl_ClipDistance"); + break; + } + case NAME_CULL_DISTANCE: + { + AddBuiltinInput(psDecl, "gl_CullDistance"); break; } case NAME_VIEWPORT_ARRAY_INDEX: { - AddBuiltinInput(psContext, psDecl, "gl_ViewportIndex"); + AddBuiltinInput(psDecl, "gl_ViewportIndex"); break; } case NAME_INSTANCE_ID: { - AddBuiltinInput(psContext, psDecl, "gl_InstanceID"); + AddBuiltinInput(psDecl, "gl_InstanceID"); break; } case NAME_IS_FRONT_FACE: @@ -1470,25 +1658,28 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) Suggests no implicit conversion for bool<->int. */ - AddBuiltinInput(psContext, psDecl, "(gl_FrontFacing ? 0xffffffffu : uint(0))"); // Hi Adreno. + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + AddBuiltinInput(psDecl, "(gl_FrontFacing ? 0xffffffffu : uint(0))"); // Hi Adreno. + else + AddBuiltinInput(psDecl, "(gl_FrontFacing ? int(1) : int(0))"); break; } case NAME_SAMPLE_INDEX: { - AddBuiltinInput(psContext, psDecl, "gl_SampleID"); + AddBuiltinInput(psDecl, "gl_SampleID"); break; } case NAME_VERTEX_ID: { - AddBuiltinInput(psContext, psDecl, "gl_VertexID"); + AddBuiltinInput(psDecl, "gl_VertexID"); break; } case NAME_PRIMITIVE_ID: { if(psShader->eShaderType == GEOMETRY_SHADER) - AddBuiltinInput(psContext, psDecl, "gl_PrimitiveIDIn"); // LOL opengl. + AddBuiltinInput(psDecl, "gl_PrimitiveIDIn"); // LOL opengl. else - AddBuiltinInput(psContext, psDecl, "gl_PrimitiveID"); + AddBuiltinInput(psDecl, "gl_PrimitiveID"); break; } default: @@ -1511,6 +1702,11 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) case NAME_RENDER_TARGET_ARRAY_INDEX: { AddBuiltinOutput(psDecl, 0, "gl_Layer"); + if (psShader->eShaderType == VERTEX_SHADER) + { + psContext->RequireExtension("GL_AMD_vertex_shader_layer"); + } + break; } case NAME_CLIP_DISTANCE: @@ -1518,6 +1714,11 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) AddBuiltinOutput(psDecl, 0, "gl_ClipDistance"); break; } + case NAME_CULL_DISTANCE: + { + AddBuiltinOutput(psDecl, 0, "gl_CullDistance"); + break; + } case NAME_VIEWPORT_ARRAY_INDEX: { AddBuiltinOutput(psDecl, 0, "gl_ViewportIndex"); @@ -1715,7 +1916,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) } case OPERAND_MIN_PRECISION_FLOAT_2_8: { - Precision = "lowp"; + Precision = EmitLowp(psContext) ? "lowp " : "mediump "; break; } case OPERAND_MIN_PRECISION_SINT_16: @@ -1731,8 +1932,29 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) } } + const char * Interpolation = ""; + + if (psShader->eShaderType == GEOMETRY_SHADER || psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) + { + const ShaderInfo::InOutSignature* psSignature = NULL; + + psShader->sInfo.GetInputSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + &psSignature, true); + + if ((psSignature != NULL) && (psSignature->eComponentType == INOUT_COMPONENT_UINT32 || + psSignature->eComponentType == INOUT_COMPONENT_SINT32)) // GLSL spec requires that integer inputs always have "flat" interpolation + { + Interpolation = GetInterpolationString(INTERPOLATION_CONSTANT, psContext->psShader->eTargetLanguage); + } + else if (psContext->psDependencies) // For floats we get the interpolation that was resolved from the fragment shader input + { + Interpolation = GetInterpolationString(psContext->psDependencies->GetInterpolationMode(psDecl->asOperands[0].ui32RegisterNumber), psContext->psShader->eTargetLanguage); + } + } + DeclareInput(psContext, psDecl, - "", StorageQualifier, Precision, iNumComponents, (OPERAND_INDEX_DIMENSION)psOperand->iIndexDims, inputName.c_str(), psOperand->ui32CompMask); + Interpolation, StorageQualifier, Precision, iNumComponents, (OPERAND_INDEX_DIMENSION)psOperand->iIndexDims, inputName.c_str(), psOperand->ui32CompMask); HandleInputRedirect(psDecl, Precision); break; @@ -1743,13 +1965,13 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) { case NAME_POSITION: { - AddBuiltinInput(psContext, psDecl, "gl_FragCoord"); + AddBuiltinInput(psDecl, "gl_FragCoord"); break; } default: ASSERT(0); break; - + } break; } @@ -1772,59 +1994,63 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) int hasNoPerspective = psContext->psShader->eTargetLanguage <= LANG_ES_310 ? 0 : 1; inputName = psContext->GetDeclaredInputName(psOperand, NULL, 1, NULL); - // If this is a SV_Target input and framebuffer fetch is enabled then skip the declaration - if (psShader->extensions->EXT_shader_framebuffer_fetch && - psShader->eShaderType == PIXEL_SHADER && - psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH) - { - if(inputName == "vs_SV_Target0") - break; - } - if (InOutSupported(psContext->psShader->eTargetLanguage)) { StorageQualifier = "in"; } + const ShaderInfo::InOutSignature* psSignature = NULL; - switch(psDecl->value.eInterpolation) + psShader->sInfo.GetInputSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + &psSignature); + + if (psSignature->eComponentType == INOUT_COMPONENT_UINT32 || + psSignature->eComponentType == INOUT_COMPONENT_SINT32) // GLSL spec requires that integer inputs always have "flat" interpolation { - case INTERPOLATION_CONSTANT: + Interpolation = GetInterpolationString(INTERPOLATION_CONSTANT, psContext->psShader->eTargetLanguage); + } + else + { + switch (psDecl->value.eInterpolation) { - Interpolation = "flat "; - break; + case INTERPOLATION_CONSTANT: + { + Interpolation = "flat "; + break; + } + case INTERPOLATION_LINEAR: + { + break; + } + case INTERPOLATION_LINEAR_CENTROID: + { + Interpolation = "centroid "; + break; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE: + { + Interpolation = hasNoPerspective ? "noperspective " : ""; + break; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: + { + Interpolation = hasNoPerspective ? "noperspective centroid " : "centroid"; + break; + } + case INTERPOLATION_LINEAR_SAMPLE: + { + Interpolation = hasNoPerspective ? "sample " : ""; + break; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: + { + Interpolation = hasNoPerspective ? "noperspective sample " : ""; + break; + } + default: + ASSERT(0); + break; } - case INTERPOLATION_LINEAR: - { - break; - } - case INTERPOLATION_LINEAR_CENTROID: - { - Interpolation = "centroid "; - break; - } - case INTERPOLATION_LINEAR_NOPERSPECTIVE: - { - Interpolation = hasNoPerspective ? "noperspective " : ""; - break; - } - case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: - { - Interpolation = hasNoPerspective ? "noperspective centroid " : "centroid" ; - break; - } - case INTERPOLATION_LINEAR_SAMPLE: - { - Interpolation = hasNoPerspective ? "sample " : ""; - break; - } - case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: - { - Interpolation = hasNoPerspective ? "noperspective sample " : ""; - break; - } - default: - ASSERT(0); - break; } if(HavePrecisionQualifiers(psContext)) @@ -1843,7 +2069,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) } case OPERAND_MIN_PRECISION_FLOAT_2_8: { - Precision = "lowp"; + Precision = EmitLowp(psContext) ? "lowp " : "mediump "; break; } case OPERAND_MIN_PRECISION_SINT_16: @@ -1859,6 +2085,108 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) } } + bool haveFramebufferFetch = (psShader->extensions->EXT_shader_framebuffer_fetch && + psShader->eShaderType == PIXEL_SHADER && + psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH); + + // If this is a SV_Target input and framebuffer fetch is enabled, do special input declaration unless output is declared later + if (haveFramebufferFetch && psOperand->iPSInOut && inputName.size() == 13 && !strncmp(inputName.c_str(), "vs_SV_Target", 12)) + { + bstring type = NULL; + + switch (psSignature->eComponentType) + { + case INOUT_COMPONENT_UINT32: + { + if (iNumComponents > 1) + type = bformat("uvec%d", iNumComponents); + else + type = bformat("uint"); + break; + } + case INOUT_COMPONENT_SINT32: + { + if (iNumComponents > 1) + type = bformat("ivec%d", iNumComponents); + else + type = bformat("int"); + break; + } + case INOUT_COMPONENT_FLOAT32: + { + if (iNumComponents > 1) + type = bformat("vec%d", iNumComponents); + else + type = bformat("float"); + break; + } + default: + ASSERT(0); + break; + } + + uint32_t renderTarget = psSignature->ui32SemanticIndex; + + char OutputName[512]; + bstring oname; + oname = bformat("%s%s%d", psContext->outputPrefix, psSignature->semanticName.c_str(), renderTarget); + strncpy(OutputName, (char *)oname->data, 512); + bdestroy(oname); + + if(WriteToFragData(psContext->psShader->eTargetLanguage)) + { + if (haveFramebufferFetch) + { + bcatcstr(glsl, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); + bformata(glsl, "#define vs_%s gl_LastFragData[%d]\n", OutputName, renderTarget); + bcatcstr(glsl, "#else\n"); + bformata(glsl, "#define vs_%s gl_FragData[%d]\n", OutputName, renderTarget); + bcatcstr(glsl, "#endif\n"); + } + else + bformata(glsl, "#define vs_%s gl_FragData[%d]\n", OutputName, renderTarget); + } + else + { + if (!RenderTargetDeclared(renderTarget)) + { + bstring layoutQualifier = bformat(""); + + if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage) || + HaveLimitedInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) + { + uint32_t index = 0; + + if((psContext->flags & HLSLCC_FLAG_DUAL_SOURCE_BLENDING) && DualSourceBlendSupported(psContext->psShader->eTargetLanguage)) + { + if(renderTarget > 0) + { + renderTarget = 0; + index = 1; + } + layoutQualifier = bformat("layout(location = %d, index = %d) ", renderTarget, index); + } + else + { + layoutQualifier = bformat("layout(location = %d) ", renderTarget); + } + } + + if (haveFramebufferFetch) + { + bcatcstr(glsl, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); + bformata(glsl, "%sinout %s %s %s;\n", bstr2cstr(layoutQualifier, '\0'), Precision, type->data, OutputName); + bcatcstr(glsl, "#else\n"); + bformata(glsl, "%sout %s %s %s;\n", bstr2cstr(layoutQualifier, '\0'), Precision, type->data, OutputName); + bcatcstr(glsl, "#endif\n"); + } + else + bformata(glsl, "%sout %s %s %s;\n", bstr2cstr(layoutQualifier, '\0'), Precision, type->data, OutputName); + } + } + break; + } + DeclareInput(psContext, psDecl, Interpolation, StorageQualifier, Precision, iNumComponents, INDEX_1D, inputName.c_str(), psOperand->ui32CompMask); @@ -1915,11 +2243,11 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) { if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) bformata(glsl, "layout(location = %d) ",ui32BindingPoint); - + bformata(glsl, "layout(std140) uniform ConstantBuffer%d {\n\tvec4 data[%d];\n} cb%d;\n", ui32BindingPoint,psOperand->aui32ArraySizes[1],ui32BindingPoint); break; } - + if (psCBuf->name.substr(0, 20) == "hlslcc_SubpassInput_" && psCBuf->name.length() >= 23 && !psCBuf->asVars.empty()) { // Special case for vulkan subpass input. @@ -1984,15 +2312,16 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) // Munge the name so it'll get the correct function call in GLSL directly sv.name.insert(0, "subpassLoad("); if (isMS) - sv.name[sv.name.length() - 2] = ','; - sv.name.append(")"); + sv.name.append(","); + else + sv.name.append(")"); // Also update the type name sv.sType.name = sv.name; sv.sType.fullName = sv.name; } } - // Break out so this doesn't get declared. + // Break out so this doesn't get declared. break; } @@ -2000,7 +2329,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) { // Special case for piggy-backing multiview info out // This is not really a cbuffer, but if we see this being accessed, we know we need viewID - + // Extract numViews uint32_t numViews = 0; for(std::vector::const_iterator itr = psCBuf->asVars.begin(); itr != psCBuf->asVars.end(); itr++) @@ -2015,11 +2344,11 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) if(numViews > 0 && numViews < 10) { // multiview2 is required because we have built-in shaders that do eye-dependent work other than just position - bcatcstr(extensions, "#extension GL_OVR_multiview2 : require\n"); + psContext->RequireExtension("GL_OVR_multiview2"); if(psShader->eShaderType == VERTEX_SHADER) bformata(glsl, "layout(num_views = %d) in;\n", numViews); - + break; // Break out so we don't actually declare this cbuffer } @@ -2033,7 +2362,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) } else { - DeclareUBOConstants(psContext, ui32BindingPoint, psCBuf, glsl); + DeclareUBOConstants(ui32BindingPoint, psCBuf, glsl); } } else @@ -2044,9 +2373,16 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) } case OPCODE_DCL_RESOURCE: { - // Skip the location declaration on Vulkan - if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags) - && ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) == 0)) + psShader->aeResourceDims[psDecl->asOperands[0].ui32RegisterNumber] = psDecl->value.eResourceDimension; + + // Vulkan doesn't use combined textures+samplers, so do own handling in a separate func + if (psContext->IsVulkan()) + { + TranslateVulkanResource(psContext, psDecl); + break; + } + + if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) { // Explicit layout bindings are not currently compatible with combined texture samplers. The layout below assumes there is exactly one GLSL sampler // for each HLSL texture declaration, but when combining textures+samplers, there can be multiple OGL samplers for each HLSL texture declaration. @@ -2062,15 +2398,6 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) { case RESOURCE_DIMENSION_BUFFER: { - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) - { - const ResourceBinding *psBinding = NULL; - psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); - std::string tname = psBinding->name; - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(tname); - bformata(glsl, "layout(set = %d, binding = %d) ", binding.first, binding.second); - } - bcatcstr(glsl, "uniform "); if (IsESLanguage(psContext->psShader->eTargetLanguage)) bcatcstr(glsl, "highp "); @@ -2131,7 +2458,6 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) break; } - psShader->aeResourceDims[psDecl->asOperands[0].ui32RegisterNumber] = psDecl->value.eResourceDimension; break; } case OPCODE_DCL_OUTPUT: @@ -2148,7 +2474,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) AddBuiltinOutput(psDecl, 0, "gl_out[gl_InvocationID].gl_Position"); } } - + if(needsDeclare) { AddUserOutput(psDecl); @@ -2159,9 +2485,10 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) { uint32_t ui32Flags = psDecl->value.ui32GlobalFlags; - if(ui32Flags & GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL) + if(ui32Flags & GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL && psContext->psShader->eShaderType == PIXEL_SHADER) { bcatcstr(glsl, "layout(early_fragment_tests) in;\n"); + psShader->sInfo.bEarlyFragmentTests = true; } if(!(ui32Flags & GLOBAL_FLAG_REFACTORING_ALLOWED)) { @@ -2170,7 +2497,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) } if(ui32Flags & GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS) { - bcatcstr(extensions, "#extension GL_ARB_gpu_shader_fp64 : enable\n"); + psContext->EnableExtension("GL_ARB_gpu_shader_fp64"); psShader->fp64 = 1; } break; @@ -2313,8 +2640,8 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) } case OPCODE_DCL_INTERFACE: { - const uint32_t interfaceID = psDecl->value.interface.ui32InterfaceID; - const uint32_t numUniforms = psDecl->value.interface.ui32ArraySize; + const uint32_t interfaceID = psDecl->value.iface.ui32InterfaceID; + const uint32_t numUniforms = psDecl->value.iface.ui32ArraySize; const uint32_t ui32NumBodiesPerTable = psContext->psShader->funcPointer[interfaceID].ui32NumBodiesPerTable; ShaderVar* psVar; uint32_t varFound; @@ -2339,64 +2666,94 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) } case OPCODE_CUSTOMDATA: { - // TODO: This is only ever accessed as a float currently. Do trickery if we ever see ints accessed from an array. - // Walk through all the chunks we've seen in this phase. - ShaderPhase &sp = psShader->asPhases[psContext->currentPhase]; - std::for_each(sp.m_ConstantArrayInfo.m_Chunks.begin(), sp.m_ConstantArrayInfo.m_Chunks.end(), [this](const std::pair &chunk) + // On Vulkan we just spew the data in uints as-is + if (psContext->IsVulkan()) { bstring glsl = *psContext->currentGLSLString; - uint32_t componentCount = chunk.second.m_ComponentCount; - // Just do the declaration here and contents to earlyMain. - if (componentCount == 1) - bformata(glsl, "float ImmCB_%d_%d_%d[%d];\n", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, chunk.second.m_Size); - else - bformata(glsl, "vec%d ImmCB_%d_%d_%d[%d];\n", componentCount, psContext->currentPhase, chunk.first, chunk.second.m_Rebase, chunk.second.m_Size); - - bstring tgt = psContext->psShader->asPhases[psContext->currentPhase].earlyMain; - Declaration *psDecl = psContext->psShader->asPhases[psContext->currentPhase].m_ConstantArrayInfo.m_OrigDeclaration; - if (componentCount == 1) + bformata(glsl, "const uvec4 ImmCB_%d[] = uvec4[%d] (\n", psContext->currentPhase, psDecl->asImmediateConstBuffer.size()); + bool isFirst = true; + std::for_each(psDecl->asImmediateConstBuffer.begin(), psDecl->asImmediateConstBuffer.end(), [&](const ICBVec4 &data) { - for (uint32_t i = 0; i < chunk.second.m_Size; i++) + if (!isFirst) { - float val[4] = { - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].a, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].b, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].c, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d - }; - bformata(tgt, "\tImmCB_%d_%d_%d[%d] = ", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, i); - if (fpcheck(val[chunk.second.m_Rebase])) - bformata(tgt, "uintBitsToFloat(uint(0x%Xu))", *(uint32_t *)&val[chunk.second.m_Rebase]); - else - HLSLcc::PrintFloat(tgt, val[chunk.second.m_Rebase]); - bcatcstr(tgt, ";\n"); + bcatcstr(glsl, ",\n"); } - } - else + isFirst = false; + bformata(glsl, "\tuvec4(0x%X, 0x%X, 0x%X, 0x%X)", data.a, data.b, data.c, data.d); + }); + bcatcstr(glsl, ");\n"); + } + else + { + // TODO: This is only ever accessed as a float currently. Do trickery if we ever see ints accessed from an array. + // Walk through all the chunks we've seen in this phase. + ShaderPhase &sp = psShader->asPhases[psContext->currentPhase]; + std::for_each(sp.m_ConstantArrayInfo.m_Chunks.begin(), sp.m_ConstantArrayInfo.m_Chunks.end(), [this](const std::pair &chunk) { - for (uint32_t i = 0; i < chunk.second.m_Size; i++) + bstring glsl = *psContext->currentGLSLString; + uint32_t componentCount = chunk.second.m_ComponentCount; + // Just do the declaration here and contents to earlyMain. + if (componentCount == 1) + bformata(glsl, "float ImmCB_%d_%d_%d[%d];\n", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, chunk.second.m_Size); + else + bformata(glsl, "vec%d ImmCB_%d_%d_%d[%d];\n", componentCount, psContext->currentPhase, chunk.first, chunk.second.m_Rebase, chunk.second.m_Size); + + if (!HaveDynamicIndexing(psContext)) { - float val[4] = { - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].a, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].b, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].c, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d - }; - bformata(tgt, "\tImmCB_%d_%d_%d[%d] = vec%d(", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, i, componentCount); - for (uint32_t k = 0; k < componentCount; k++) + bstring name = bfromcstr(""); + bformata(name, "ImmCB_%d_%d_%d", psContext->currentPhase, chunk.first, chunk.second.m_Rebase); + SHADER_VARIABLE_CLASS eClass = componentCount > 1 ? SVC_VECTOR : SVC_SCALAR; + + DeclareDynamicIndexWrapper((const char *)name->data, eClass, SVT_FLOAT, 1, componentCount, chunk.second.m_Size); + bdestroy(name); + } + + bstring tgt = psContext->psShader->asPhases[psContext->currentPhase].earlyMain; + Declaration *psDecl = psContext->psShader->asPhases[psContext->currentPhase].m_ConstantArrayInfo.m_OrigDeclaration; + if (componentCount == 1) + { + for (uint32_t i = 0; i < chunk.second.m_Size; i++) { - if (k != 0) - bcatcstr(tgt, ", "); - if (fpcheck(val[k])) - bformata(tgt, "uintBitsToFloat(uint(0x%Xu))", *(uint32_t *)&val[k + chunk.second.m_Rebase]); + float val[4] = { + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].a, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].b, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].c, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d + }; + bformata(tgt, "\tImmCB_%d_%d_%d[%d] = ", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, i); + if (fpcheck(val[chunk.second.m_Rebase]) && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) + bformata(tgt, "uintBitsToFloat(uint(0x%Xu))", *(uint32_t *)&val[chunk.second.m_Rebase]); else - HLSLcc::PrintFloat(tgt, val[k + chunk.second.m_Rebase]); + HLSLcc::PrintFloat(tgt, val[chunk.second.m_Rebase]); + bcatcstr(tgt, ";\n"); + } + } + else + { + for (uint32_t i = 0; i < chunk.second.m_Size; i++) + { + float val[4] = { + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].a, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].b, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].c, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d + }; + bformata(tgt, "\tImmCB_%d_%d_%d[%d] = vec%d(", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, i, componentCount); + for (uint32_t k = 0; k < componentCount; k++) + { + if (k != 0) + bcatcstr(tgt, ", "); + if (fpcheck(val[k]) && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) + bformata(tgt, "uintBitsToFloat(uint(0x%Xu))", *(uint32_t *)&val[k + chunk.second.m_Rebase]); + else + HLSLcc::PrintFloat(tgt, val[k + chunk.second.m_Rebase]); + } + bcatcstr(tgt, ");\n"); } - bcatcstr(tgt, ");\n"); } - } - }); + }); + } @@ -2468,7 +2825,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) default: ASSERT(0); break; - + } if (HavePrecisionQualifiers(psContext)) @@ -2490,7 +2847,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) } case MIN_PRECISION_FLOAT_2_8: { - Precision = "lowp "; + Precision = EmitLowp(psContext) ? "lowp " : "mediump "; break; } } @@ -2524,7 +2881,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) if ((psSig->ui32Mask & destMask) == 0) continue; // Skip dummy writes (vec2 texcoords get filled to vec4 with zeroes etc) - + while ((psSig->ui32Mask & (1 << rebase)) == 0) rebase++; @@ -2672,6 +3029,20 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) } case OPCODE_DCL_SAMPLER: { + if (psContext->IsVulkan()) + { + ResourceBinding *pRes = NULL; + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_SAMPLER, psDecl->asOperands[0].ui32RegisterNumber, (const ResourceBinding **)&pRes); + ASSERT(pRes != NULL); + std::string name = ResourceName(psContext, RGROUP_SAMPLER, psDecl->asOperands[0].ui32RegisterNumber, 0); + const char *samplerPrecision = GetSamplerPrecision(psContext, pRes ? pRes->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN); + + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(name); + const char *samplerType = psDecl->value.eSamplerMode == D3D10_SB_SAMPLER_MODE_COMPARISON ? "samplerShadow" : "sampler"; + bformata(glsl, "layout(set = %d, binding = %d) uniform %s %s %s;\n", binding.first, binding.second, samplerPrecision, samplerType, name.c_str()); + // Store the sampler mode to ShaderInfo, it's needed when we use the sampler + pRes->m_SamplerMode = psDecl->value.eSamplerMode; + } break; } case OPCODE_DCL_HS_MAX_TESSFACTOR: @@ -2815,7 +3186,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) default: ASSERT(0); break; - + } TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); bcatcstr(glsl, ";\n"); @@ -2827,7 +3198,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) const bool avoidAtomicCounter = (psContext->flags & HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS) != 0; if(psDecl->sUAV.bCounter) { - if (isVulkan) + if (isVulkan) { std::string uavname = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); GLSLCrossDependencyData::VulkanResourceBinding uavBinding = psContext->psDependencies->GetVulkanResourceBinding(uavname, true); @@ -2961,7 +3332,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) } } -bool ToGLSL::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix) +bool ToGLSL::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix, int *iIgnoreRedirect) { ASSERT(sig != NULL); if (psContext->psShader->eShaderType == HULL_SHADER && sig->semanticName == "SV_TessFactor") @@ -3000,10 +3371,12 @@ bool ToGLSL::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::In *pui32IgnoreSwizzle = 1; return true; case NAME_CLIP_DISTANCE: + case NAME_CULL_DISTANCE: { + const char* glName = sig->eSystemValueType == NAME_CLIP_DISTANCE ? "Clip" : "Cull"; // This is always routed through temp std::ostringstream oss; - oss << "phase" << psContext->currentPhase << "_glClipDistance" << sig->ui32SemanticIndex; + oss << "phase" << psContext->currentPhase << "_gl" << glName << "Distance" << sig->ui32SemanticIndex; result = oss.str(); return true; } @@ -3029,7 +3402,10 @@ bool ToGLSL::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::In *pui32IgnoreSwizzle = 1; return true; case NAME_IS_FRONT_FACE: - result = "(gl_FrontFacing ? 0xffffffffu : uint(0))"; + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + result = "(gl_FrontFacing ? 0xffffffffu : uint(0))"; + else + result = "(gl_FrontFacing ? int(1) : int(0))"; if (pui32IgnoreSwizzle) *pui32IgnoreSwizzle = 1; return true; diff --git a/src/toGLSLInstruction.cpp b/src/toGLSLInstruction.cpp index 7aca1a8..1c1dee2 100644 --- a/src/toGLSLInstruction.cpp +++ b/src/toGLSLInstruction.cpp @@ -10,9 +10,14 @@ #include "internal_includes/Shader.h" #include "internal_includes/Instruction.h" #include "internal_includes/toGLSL.h" +#include using namespace HLSLcc; +// In toGLSLDeclaration.cpp +const char* GetSamplerType(HLSLCrossCompilerContext* psContext, + const RESOURCE_DIMENSION eDimension, + const uint32_t ui32RegisterNumber); // This function prints out the destination name, possible destination writemask, assignment operator // and any possible conversions needed based on the eSrcType+ui32SrcElementCount (type and size of data expected to be coming in) @@ -38,7 +43,7 @@ void ToGLSL::AddOpAssignToDestWithMask(const Operand* psDest, // eg. MOV r0, c0.x => Temp[0] = vec4(c0.x); if (ui32DestElementCount > ui32SrcElementCount) { - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(eDestDataType, ui32DestElementCount, false)); + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); *pNeedsParenthesis = 1; } else @@ -53,36 +58,36 @@ void ToGLSL::AddOpAssignToDestWithMask(const Operand* psDest, case SVT_INT16: // Bitcasts from lower precisions are ambiguous ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); - if (eSrcType == SVT_FLOAT && psContext->psShader->ui32MajorVersion > 3) + if (eSrcType == SVT_FLOAT && psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) { bformata(glsl, " %s floatBitsToInt(", szAssignmentOp); // Cover cases where the HLSL language expects the rest of the components to be default-filled if (ui32DestElementCount > ui32SrcElementCount) { - bformata(glsl, "%s(", GetConstructorForTypeGLSL(eSrcType, ui32DestElementCount, false)); + bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eSrcType, ui32DestElementCount, false)); (*pNeedsParenthesis)++; } } else - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(eDestDataType, ui32DestElementCount, false)); + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); (*pNeedsParenthesis)++; break; case SVT_UINT: case SVT_UINT16: ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); - if (eSrcType == SVT_FLOAT && psContext->psShader->ui32MajorVersion > 3) + if (eSrcType == SVT_FLOAT && psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) { bformata(glsl, " %s floatBitsToUint(", szAssignmentOp); // Cover cases where the HLSL language expects the rest of the components to be default-filled if (ui32DestElementCount > ui32SrcElementCount) { - bformata(glsl, "%s(", GetConstructorForTypeGLSL(eSrcType, ui32DestElementCount, false)); + bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eSrcType, ui32DestElementCount, false)); (*pNeedsParenthesis)++; } } else - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(eDestDataType, ui32DestElementCount, false)); + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); (*pNeedsParenthesis)++; break; @@ -91,7 +96,7 @@ void ToGLSL::AddOpAssignToDestWithMask(const Operand* psDest, case SVT_FLOAT10: case SVT_FLOAT16: ASSERT(eSrcType != SVT_INT12 || (eSrcType != SVT_INT16 && eSrcType != SVT_UINT16)); - if (psContext->psShader->ui32MajorVersion > 3) + if (psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) { if (eSrcType == SVT_INT) bformata(glsl, " %s intBitsToFloat(", szAssignmentOp); @@ -100,12 +105,12 @@ void ToGLSL::AddOpAssignToDestWithMask(const Operand* psDest, // Cover cases where the HLSL language expects the rest of the components to be default-filled if (ui32DestElementCount > ui32SrcElementCount) { - bformata(glsl, "%s(", GetConstructorForTypeGLSL(eSrcType, ui32DestElementCount, false)); + bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eSrcType, ui32DestElementCount, false)); (*pNeedsParenthesis)++; } } else - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(eDestDataType, ui32DestElementCount, false)); + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); (*pNeedsParenthesis)++; break; @@ -190,7 +195,7 @@ void ToGLSL::AddComparison(Instruction* psInst, ComparisonType eType, { AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, &needsParenthesis); - bcatcstr(glsl, GetConstructorForTypeGLSL(floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, false)); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, false)); bcatcstr(glsl, "("); } bformata(glsl, "%s(", glslOpcode[eType]); @@ -204,7 +209,10 @@ void ToGLSL::AddComparison(Instruction* psInst, ComparisonType eType, bcatcstr(glsl, ")"); if (!floatResult) { - bcatcstr(glsl, " * 0xFFFFFFFFu"); + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, " * 0xFFFFFFFFu"); + else + bcatcstr(glsl, " * 0xFFFFFFFF"); } } @@ -252,7 +260,12 @@ void ToGLSL::AddComparison(Instruction* psInst, ComparisonType eType, if (floatResult) bcatcstr(glsl, "1.0 : 0.0"); else - bcatcstr(glsl, "0xFFFFFFFFu : uint(0u)"); // Adreno can't handle 0u. + { + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, "0xFFFFFFFFu : uint(0u)"); // Adreno can't handle 0u. + else + bcatcstr(glsl, "0xFFFFFFFF : int(0)"); + } AddAssignPrologue(needsParenthesis, true); bcatcstr(glsl, "; }\n"); } @@ -283,7 +296,10 @@ void ToGLSL::AddComparison(Instruction* psInst, ComparisonType eType, } else { - bcatcstr(glsl, ") ? 0xFFFFFFFFu : uint(0u)"); // Adreno can't handle 0u. + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, ") ? 0xFFFFFFFFu : uint(0u)"); // Adreno can't handle 0u. + else + bcatcstr(glsl, ") ? 0xFFFFFFFF : int(0)"); } } AddAssignPrologue(needsParenthesis); @@ -357,7 +373,10 @@ void ToGLSL::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand else { if (s0Type == SVT_UINT || s0Type == SVT_UINT16) - bcatcstr(glsl, " != uint(0u)) ? "); // Adreno doesn't understand 0u. + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, " != uint(0u)) ? "); // Adreno doesn't understand 0u. + else + bcatcstr(glsl, " != int(0)) ? "); else if (s0Type == SVT_BOOL) bcatcstr(glsl, ") ? "); else @@ -382,6 +401,27 @@ void ToGLSL::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand // TODO: We can actually do this in one op using mix(). int srcElem = -1; SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); + + // Use an extra temp if dest is also one of the sources. Without this some swizzle combinations + // might alter the source before all components are handled. + const char* tempName = "hlslcc_movcTemp"; + bool dstIsSrc1 = (pDest->eType == src1->eType) && (pDest->ui32RegisterNumber == src1->ui32RegisterNumber); + bool dstIsSrc2 = (pDest->eType == src2->eType) && (pDest->ui32RegisterNumber == src2->ui32RegisterNumber); + + if (dstIsSrc1 || dstIsSrc2) + { + psContext->AddIndentation(); + bcatcstr(glsl, "{\n"); + ++psContext->indent; + psContext->AddIndentation(); + int numComponents = (pDest->eType == OPERAND_TYPE_TEMP) ? + psContext->psShader->GetTempComponentCount(eDestType, pDest->ui32RegisterNumber) : + pDest->iNumComponents; + bformata(glsl, "%s %s = ", HLSLcc::GetConstructorForType(psContext, eDestType, numComponents), tempName); + TranslateOperand(glsl, pDest, TO_FLAG_NAME_ONLY); + bcatcstr(glsl, ";\n"); + } + for (destElem = 0; destElem < 4; ++destElem) { int numParenthesis = 0; @@ -412,12 +452,33 @@ void ToGLSL::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand } } - TranslateOperand(src1, SVTTypeToFlag(eDestType), 1 << srcElem); + if (!dstIsSrc1) + TranslateOperand(src1, SVTTypeToFlag(eDestType), 1 << srcElem); + else + { + bformata(glsl, "%s", tempName); + TranslateOperandSwizzleWithMask(glsl, psContext, src1, 1 << srcElem, 0); + } + bcatcstr(glsl, " : "); - TranslateOperand(src2, SVTTypeToFlag(eDestType), 1 << srcElem); + + if (!dstIsSrc2) + TranslateOperand(src2, SVTTypeToFlag(eDestType), 1 << srcElem); + else + { + bformata(glsl, "%s", tempName); + TranslateOperandSwizzleWithMask(glsl, psContext, src2, 1 << srcElem, 0); + } AddAssignPrologue(numParenthesis); } + + if (dstIsSrc1 || dstIsSrc2) + { + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + } } } @@ -436,6 +497,32 @@ void ToGLSL::CallBinaryOp(const char* name, Instruction* psInst, uint32_t src1AccessCount = GetNumberBitsSet(src1AccessMask); int needsParenthesis = 0; + if (!HaveNativeBitwiseOps(psContext->psShader->eTargetLanguage)) + { + const char *binaryOpWrap = NULL; + + if (!strcmp("%", name)) + binaryOpWrap = "op_modi"; + else if (!strcmp("&", name)) + binaryOpWrap = "op_and"; + else if (!strcmp("|", name)) + binaryOpWrap = "op_or"; + else if (!strcmp("^", name)) + binaryOpWrap = "op_xor"; + else if (!strcmp(">>", name)) + binaryOpWrap = "op_shr"; + else if (!strcmp("<<", name)) + binaryOpWrap = "op_shl"; + // op_not handled separately at OPCODE_NOT + + if (binaryOpWrap) + { + UseExtraFunctionDependency(binaryOpWrap); + CallHelper2Int(binaryOpWrap, psInst, 0, 1, 2, 1); + return; + } + } + if (src1SwizCount != src0SwizCount) { uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); @@ -456,7 +543,7 @@ void ToGLSL::CallBinaryOp(const char* name, Instruction* psInst, { uint32_t i; int firstPrinted = 0; - bcatcstr(glsl, GetConstructorForTypeGLSL(eDataType, dstSwizCount, false)); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); bcatcstr(glsl, "("); for (i = 0; i < 4; i++) { @@ -682,160 +769,154 @@ void ToGLSL::CallHelper1Int( AddAssignPrologue(numParenthesis); } +// Texel fetches etc need a dummy sampler (because glslang wants one, for Reasons(tm)). +// Any non-shadow sampler will do, so try to get one from sampler registers. If the current shader doesn't have any, declare a dummy one. +std::string ToGLSL:: GetVulkanDummySamplerName() +{ + std::string dummySmpName = "hlslcc_dummyPointClamp"; + if (!psContext->IsVulkan()) + return ""; + + const ResourceBinding *pSmpInfo = NULL; + int smpIdx = 0; + + while (psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_SAMPLER, smpIdx, &pSmpInfo) != 0) + { + if (pSmpInfo->m_SamplerMode != D3D10_SB_SAMPLER_MODE_COMPARISON) + return ResourceName(psContext, RGROUP_SAMPLER, smpIdx, 0); + + smpIdx++; + } + + if (!psContext->psShader->m_DummySamplerDeclared) + { + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(dummySmpName); + bstring code = bfromcstr(""); + bformata(code, "layout(set = %d, binding = %d) uniform mediump sampler %s;", binding.first, binding.second, dummySmpName.c_str()); + DeclareExtraFunction(dummySmpName, code); + bdestroy(code); + psContext->psShader->m_DummySamplerDeclared = true; + } + return dummySmpName; +} + void ToGLSL::TranslateTexelFetch( Instruction* psInst, const ResourceBinding* psBinding, bstring glsl) { int numParenthesis = 0; + + std::string vulkanSamplerName = GetVulkanDummySamplerName(); + + std::string texName = ResourceName(psContext, RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, 0); + const bool hasOffset = (psInst->bAddressOffset != 0); + + // On Vulkan wrap the tex name with the sampler constructor + if (psContext->IsVulkan()) + { + const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; + std::string smpType = GetSamplerType(psContext, eResDim, psInst->asOperands[2].ui32RegisterNumber); + std::ostringstream oss; + oss << smpType; + oss << "(" << texName << ", " << vulkanSamplerName << ")"; + texName = oss.str(); + } + psContext->AddIndentation(); AddAssignToDest(&psInst->asOperands[0], TypeFlagsToSVTType(ResourceReturnTypeToFlag(psBinding->ui32ReturnType)), 4, &numParenthesis); - bcatcstr(glsl, "texelFetch("); - // TODO Lod is being completely ignored!! Redo all of this. + if(hasOffset) + bcatcstr(glsl, "texelFetchOffset("); + else + bcatcstr(glsl, "texelFetch("); + switch (psBinding->eDimension) { - case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: - case REFLECT_RESOURCE_DIMENSION_BUFFER: - { - TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); - if (psBinding->eDimension != REFLECT_RESOURCE_DIMENSION_BUFFER) - bcatcstr(glsl, ", 0"); // Buffers don't have LOD - bcatcstr(glsl, ")"); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: - { - TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); - bcatcstr(glsl, ", 0)"); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: - case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bcatcstr(glsl, ", 0)"); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: // TODO does this make any sense at all? - { - ASSERT(psInst->eOpcode == OPCODE_LD_MS); - TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[3], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); - bcatcstr(glsl, ")"); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - { - ASSERT(psInst->eOpcode == OPCODE_LD_MS); - TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[3], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); - bcatcstr(glsl, ")"); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: - case REFLECT_RESOURCE_DIMENSION_BUFFEREX: - default: - { - ASSERT(0); - break; - } + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: + case REFLECT_RESOURCE_DIMENSION_BUFFER: + { + bcatcstr(glsl, texName.c_str()); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); + // Buffers don't have LOD or offset + if (psBinding->eDimension != REFLECT_RESOURCE_DIMENSION_BUFFER) + { + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_A); + if (hasOffset) + bformata(glsl, ", %d", psInst->iUAddrOffset); + } + bcatcstr(glsl, ")"); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: + { + bcatcstr(glsl, texName.c_str()); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_A); + if (hasOffset && psBinding->eDimension == REFLECT_RESOURCE_DIMENSION_TEXTURE3D) + bformata(glsl, ", ivec3(%d, %d, %d)", psInst->iUAddrOffset, psInst->iVAddrOffset, psInst->iWAddrOffset); + if (hasOffset && psBinding->eDimension == REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY) + bformata(glsl, ", ivec3(%d, %d)", psInst->iUAddrOffset, psInst->iVAddrOffset); + bcatcstr(glsl, ")"); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + bcatcstr(glsl, texName.c_str()); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_A); + if (hasOffset && psBinding->eDimension == REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY) + bformata(glsl, ", %d", psInst->iUAddrOffset); + if (hasOffset && psBinding->eDimension == REFLECT_RESOURCE_DIMENSION_TEXTURE2D) + bformata(glsl, ", ivec3(%d, %d)", psInst->iUAddrOffset, psInst->iVAddrOffset); + bcatcstr(glsl, ")"); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: + { + ASSERT(psInst->eOpcode == OPCODE_LD_MS); + bcatcstr(glsl, texName.c_str()); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[3], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bcatcstr(glsl, ")"); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + ASSERT(psInst->eOpcode == OPCODE_LD_MS); + bcatcstr(glsl, texName.c_str()); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[3], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bcatcstr(glsl, ")"); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + case REFLECT_RESOURCE_DIMENSION_BUFFEREX: + default: + { + // Not possible in either HLSL or GLSL + ASSERT(0); + break; + } } TranslateOperandSwizzleWithMask(psContext, &psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); AddAssignPrologue(numParenthesis); } -void ToGLSL::TranslateTexelFetchOffset( - Instruction* psInst, - const ResourceBinding* psBinding, - bstring glsl) -{ - int numParenthesis = 0; - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], TypeFlagsToSVTType(ResourceReturnTypeToFlag(psBinding->ui32ReturnType)), 4, &numParenthesis); - - bcatcstr(glsl, "texelFetchOffset("); - - switch (psBinding->eDimension) - { - case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: - { - TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); - bformata(glsl, ", 0, %d)", psInst->iUAddrOffset); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); - bformata(glsl, ", 0, ivec2(%d, %d))", - psInst->iUAddrOffset, - psInst->iVAddrOffset); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: - { - TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); - bformata(glsl, ", 0, ivec3(%d, %d, %d))", - psInst->iUAddrOffset, - psInst->iVAddrOffset, - psInst->iWAddrOffset); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: - { - TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bformata(glsl, ", 0, ivec2(%d, %d))", psInst->iUAddrOffset, psInst->iVAddrOffset); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bformata(glsl, ", 0, int(%d))", psInst->iUAddrOffset); - break; - } - case REFLECT_RESOURCE_DIMENSION_BUFFER: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: - case REFLECT_RESOURCE_DIMENSION_BUFFEREX: - default: - { - ASSERT(0); - break; - } - } - - TranslateOperandSwizzleWithMask(psContext, &psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); - AddAssignPrologue(numParenthesis); -} - - //Makes sure the texture coordinate swizzle is appropriate for the texture type. //i.e. vecX for X-dimension texture. //Currently supports floating point coord only, so not used for texelFetch. @@ -895,6 +976,21 @@ void ToGLSL::GetResInfoData(Instruction* psInst, int index, int destElem) bool isUAV = (psInst->asOperands[2].eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW); bool isMS = psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DMS || psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DMSARRAY; + std::string texName = ResourceName(psContext, isUAV ? RGROUP_UAV : RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, 0); + + // On Vulkan wrap the tex name with the sampler constructor + if (psContext->IsVulkan() && !isUAV) + { + std::string vulkanSamplerName = GetVulkanDummySamplerName(); + + const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; + std::string smpType = GetSamplerType(psContext, eResDim, psInst->asOperands[2].ui32RegisterNumber); + std::ostringstream oss; + oss << smpType; + oss << "(" << texName << ", " << vulkanSamplerName << ")"; + texName = oss.str(); + } + psContext->AddIndentation(); AddOpAssignToDestWithMask(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, "=", &numParenthesis, 1 << destElem); @@ -905,12 +1001,20 @@ void ToGLSL::GetResInfoData(Instruction* psInst, int index, int destElem) bcatcstr(glsl, "("); if (dim < (index + 1)) { - bcatcstr(glsl, eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? "uint(0u)" : "0.0"); + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? "uint(0u)" : "0.0"); + else + bcatcstr(glsl, eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? "int(0)" : "0.0"); } else { if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT) - bformata(glsl, "uvec%d(", dim); + { + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bformata(glsl, "uvec%d(", dim); + else + bformata(glsl, "ivec%d(", dim); + } else if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_RCPFLOAT) bformata(glsl, "vec%d(1.0) / vec%d(", dim, dim); else @@ -921,7 +1025,7 @@ void ToGLSL::GetResInfoData(Instruction* psInst, int index, int destElem) else bcatcstr(glsl, "textureSize("); - TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, texName.c_str()); if (!isUAV && !isMS) { @@ -950,11 +1054,16 @@ void ToGLSL::GetResInfoData(Instruction* psInst, int index, int destElem) { ASSERT(!isUAV); if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT) - bcatcstr(glsl, "uint("); + { + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, "uint("); + else + bcatcstr(glsl, "int("); + } else bcatcstr(glsl, "float("); bcatcstr(glsl, "textureQueryLevels("); - TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, texName.c_str()); bcatcstr(glsl, "))"); } AddAssignPrologue(numParenthesis); @@ -982,6 +1091,7 @@ void ToGLSL::TranslateTextureSample(Instruction* psInst, const char* offset = ""; const char* depthCmpCoordType = ""; const char* gradSwizzle = ""; + const char* ext = ""; uint32_t ui32NumOffsets = 0; @@ -1028,7 +1138,7 @@ void ToGLSL::TranslateTextureSample(Instruction* psInst, } case RESOURCE_DIMENSION_TEXTURECUBE: { - depthCmpCoordType = "vec3"; + depthCmpCoordType = "vec4"; gradSwizzle = ".xyz"; ui32NumOffsets = 3; if (!iHaveOverloadedTexFuncs) @@ -1106,20 +1216,50 @@ void ToGLSL::TranslateTextureSample(Instruction* psInst, psContext->AddIndentation(); AddAssignToDest(psDest, dataType, psSrcTex->GetNumSwizzleElements(), &numParenthesis); - // Func name depending on the flags - if (ui32Flags & (TEXSMP_FLAG_LOD | TEXSMP_FLAG_FIRSTLOD)) - bformata(glsl, "%sLod%s(", funcName, offset); - else if (ui32Flags & TEXSMP_FLAG_GRAD) - bformata(glsl, "%sGrad%s(", funcName, offset); - else - bformata(glsl, "%s%s(", funcName, offset); + // GLSL doesn't have textureLod() for 2d shadow samplers, we'll have to use grad instead. In that case assume LOD 0. + const bool needsLodWorkaround = (eResDim == RESOURCE_DIMENSION_TEXTURE2DARRAY) && (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE); + const bool needsLodWorkaroundES2 = (psContext->psShader->eTargetLanguage == LANG_ES_100 && psContext->psShader->eShaderType == PIXEL_SHADER && (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE)); - // Sampler name - if (!useCombinedTextureSamplers) - ResourceName(glsl, psContext, RGROUP_TEXTURE, psSrcTex->ui32RegisterNumber, ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE); + if (needsLodWorkaround) + { + bformata(glsl, "%sGrad%s(", funcName, offset); + } else - bcatcstr(glsl, TextureSamplerName(&psContext->psShader->sInfo, psSrcTex->ui32RegisterNumber, psSrcSamp->ui32RegisterNumber, ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE).c_str()); - + { + if (psContext->psShader->eTargetLanguage == LANG_ES_100 && + psContext->psShader->eShaderType == PIXEL_SHADER && + ui32Flags & (TEXSMP_FLAG_LOD | TEXSMP_FLAG_FIRSTLOD | TEXSMP_FLAG_GRAD)) + ext = "EXT"; + + if (ui32Flags & (TEXSMP_FLAG_LOD | TEXSMP_FLAG_FIRSTLOD) && !needsLodWorkaroundES2) + bformata(glsl, "%sLod%s%s(", funcName, ext, offset); + else if (ui32Flags & TEXSMP_FLAG_GRAD) + bformata(glsl, "%sGrad%s%s(", funcName, ext, offset); + else + bformata(glsl, "%s%s%s(", funcName, ext, offset); + } + + if (psContext->IsVulkan()) + { + // Build the sampler name here + std::string samplerType = GetSamplerType(psContext, eResDim, psSrcTex->ui32RegisterNumber); + const ResourceBinding *pSmpRes = NULL; + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_SAMPLER, psSrcSamp->ui32RegisterNumber, &pSmpRes); + + if (pSmpRes->m_SamplerMode == D3D10_SB_SAMPLER_MODE_COMPARISON) + samplerType.append("Shadow"); + std::string texName = ResourceName(psContext, RGROUP_TEXTURE, psSrcTex->ui32RegisterNumber, 0); + std::string smpName = ResourceName(psContext, RGROUP_SAMPLER, psSrcSamp->ui32RegisterNumber, 0); + bformata(glsl, "%s(%s, %s)", samplerType.c_str(), texName.c_str(), smpName.c_str()); + } + else + { + // Sampler name + if (!useCombinedTextureSamplers) + ResourceName(glsl, psContext, RGROUP_TEXTURE, psSrcTex->ui32RegisterNumber, ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE); + else + bcatcstr(glsl, TextureSamplerName(&psContext->psShader->sInfo, psSrcTex->ui32RegisterNumber, psSrcSamp->ui32RegisterNumber, ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE).c_str()); + } bcatcstr(glsl, ", "); // Texture coordinates, either from previously constructed temp @@ -1142,18 +1282,26 @@ void ToGLSL::TranslateTextureSample(Instruction* psInst, } // Add LOD/grad parameters based on the flags - if (ui32Flags & TEXSMP_FLAG_LOD) + if (needsLodWorkaround) { - bcatcstr(glsl, ", "); - TranslateOperand(psSrcLOD, TO_AUTO_BITCAST_TO_FLOAT); - if (psContext->psShader->ui32MajorVersion < 4) + bcatcstr(glsl, ", vec2(0.0, 0.0), vec2(0.0, 0.0)"); + } + else if (ui32Flags & TEXSMP_FLAG_LOD) + { + if (!needsLodWorkaroundES2) { - bcatcstr(glsl, ".w"); + bcatcstr(glsl, ", "); + TranslateOperand(psSrcLOD, TO_AUTO_BITCAST_TO_FLOAT); + if (psContext->psShader->ui32MajorVersion < 4) + { + bcatcstr(glsl, ".w"); + } } } else if (ui32Flags & TEXSMP_FLAG_FIRSTLOD) { - bcatcstr(glsl, ", 0.0"); + if (!needsLodWorkaroundES2) + bcatcstr(glsl, ", 0.0"); } else if (ui32Flags & TEXSMP_FLAG_GRAD) { @@ -1255,14 +1403,28 @@ void ToGLSL::TranslateDynamicComponentSelection(const ShaderVarType* psVarType, if (offset > 0) bcatcstr(glsl, "("); - // The var containing byte address to the requested element - TranslateOperand(psByteAddr, TO_FLAG_UNSIGNED_INTEGER, mask); + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + { + // The var containing byte address to the requested element + TranslateOperand(psByteAddr, TO_FLAG_UNSIGNED_INTEGER, mask); - if (offset > 0)// If the vector is part of a struct, there is an extra offset in our byte address - bformata(glsl, " - %du)", offset); // Subtract that first + if (offset > 0)// If the vector is part of a struct, there is an extra offset in our byte address + bformata(glsl, " - %du)", offset); // Subtract that first - bcatcstr(glsl, " >> 0x2u"); // Convert byte offset to index: div by four - bcatcstr(glsl, "]"); + bcatcstr(glsl, " >> 0x2u"); // Convert byte offset to index: div by four + bcatcstr(glsl, "]"); + } + else + { + // The var containing byte address to the requested element + TranslateOperand(psByteAddr, TO_FLAG_INTEGER, mask); + + if (offset > 0)// If the vector is part of a struct, there is an extra offset in our byte address + bformata(glsl, " - %d)", offset); // Subtract that first + + bcatcstr(glsl, " >> 0x2"); // Convert byte offset to index: div by four + bcatcstr(glsl, "]"); + } } void ToGLSL::TranslateShaderStorageStore(Instruction* psInst) @@ -1296,7 +1458,7 @@ void ToGLSL::TranslateShaderStorageStore(Instruction* psInst) uint32_t dstOffFlag = TO_FLAG_UNSIGNED_INTEGER; SHADER_VARIABLE_TYPE dstOffType = psDestByteOff->GetDataType(psContext); - if (dstOffType == SVT_INT || dstOffType == SVT_INT16 || dstOffType == SVT_INT12) + if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) || dstOffType == SVT_INT || dstOffType == SVT_INT16 || dstOffType == SVT_INT12) dstOffFlag = TO_FLAG_INTEGER; for (component = 0; component < 4; component++) @@ -1380,14 +1542,14 @@ void ToGLSL::TranslateShaderStorageLoad(Instruction* psInst) SHADER_VARIABLE_TYPE destDataType = psDest->GetDataType(psContext); uint32_t srcOffFlag = TO_FLAG_UNSIGNED_INTEGER; SHADER_VARIABLE_TYPE srcOffType = psSrcByteOff->GetDataType(psContext); - if (srcOffType == SVT_INT || srcOffType == SVT_INT16 || srcOffType == SVT_INT12) + if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) || srcOffType == SVT_INT || srcOffType == SVT_INT16 || srcOffType == SVT_INT12) srcOffFlag = TO_FLAG_INTEGER; psContext->AddIndentation(); AddAssignToDest(psDest, destDataType, destCount, &numParenthesis); //TODO check this out? if (destCount > 1) { - bformata(glsl, "%s(", GetConstructorForTypeGLSL(destDataType, destCount, false)); + bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, destDataType, destCount, false)); numParenthesis++; } for (component = 0; component < 4; component++) @@ -1404,7 +1566,10 @@ void ToGLSL::TranslateShaderStorageLoad(Instruction* psInst) // always uint array atm if (destDataType == SVT_FLOAT) { - bcatcstr(glsl, "uintBitsToFloat("); + if (HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, "uintBitsToFloat("); + else + bcatcstr(glsl, "float("); addedBitcast = 1; } else if (destDataType == SVT_INT || destDataType == SVT_INT16 || destDataType == SVT_INT12) @@ -1742,7 +1907,7 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) } } - if (isUint) + if (isUint && HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) ui32DataTypeFlag = TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT; else ui32DataTypeFlag = TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT; @@ -1777,7 +1942,7 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) uint32_t destAddrFlag = TO_FLAG_UNSIGNED_INTEGER; SHADER_VARIABLE_TYPE destAddrType = destAddr->GetDataType(psContext); - if (destAddrType == SVT_INT || destAddrType == SVT_INT16 || destAddrType == SVT_INT12) + if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) || destAddrType == SVT_INT || destAddrType == SVT_INT16 || destAddrType == SVT_INT12) destAddrFlag = TO_FLAG_INTEGER; bcatcstr(glsl, "["); @@ -1863,7 +2028,7 @@ void ToGLSL::TranslateConditional( { uint32_t oFlag = TO_FLAG_UNSIGNED_INTEGER; bool isInt = false; - if (argType == SVT_INT || argType == SVT_INT16 || argType == SVT_INT12) + if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) || argType == SVT_INT || argType == SVT_INT16 || argType == SVT_INT12) { isInt = true; oFlag = TO_FLAG_INTEGER; @@ -1979,7 +2144,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals psContext->AddIndentation(); AddAssignToDest(&psInst->asOperands[0], castType, srcCount, &numParenthesis); - bcatcstr(glsl, GetConstructorForTypeGLSL(castType, dstCount, false)); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, castType, dstCount, false)); bcatcstr(glsl, "("); // 1 TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT, psInst->asOperands[0].GetAccessMask()); bcatcstr(glsl, ")"); // 1 @@ -2037,7 +2202,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals psContext->AddIndentation(); AddAssignToDest(&psInst->asOperands[0], castType, srcCount, &numParenthesis); - bcatcstr(glsl, GetConstructorForTypeGLSL(castType, dstCount, false)); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, castType, dstCount, false)); bcatcstr(glsl, "("); // 1 TranslateOperand(&psInst->asOperands[1], psInst->eOpcode == OPCODE_UTOF ? TO_AUTO_BITCAST_TO_UINT : TO_AUTO_BITCAST_TO_INT, psInst->asOperands[0].GetAccessMask()); bcatcstr(glsl, ")"); // 1 @@ -2222,7 +2387,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); bcatcstr(glsl, " : "); - bcatcstr(glsl, GetConstructorForTypeGLSL(eDataType, dstSwizCount, false)); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); bcatcstr(glsl, "("); for (i = 0; i < dstSwizCount; i++) { @@ -2248,7 +2413,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals // We can use mix() AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, &needsParenthesis); bcatcstr(glsl, "mix("); - bcatcstr(glsl, GetConstructorForTypeGLSL(eDataType, dstSwizCount, false)); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); bcatcstr(glsl, "("); for (i = 0; i < dstSwizCount; i++) { @@ -2270,7 +2435,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals bcatcstr(glsl, "), "); TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); bcatcstr(glsl, ", "); - bcatcstr(glsl, GetConstructorForTypeGLSL(eDataType, dstSwizCount, false)); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); bcatcstr(glsl, "("); TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); bcatcstr(glsl, ")"); @@ -2280,10 +2445,13 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals { AddAssignToDest(&psInst->asOperands[0], SVT_UINT, dstSwizCount, &needsParenthesis); bcatcstr(glsl, "("); - bcatcstr(glsl, GetConstructorForTypeGLSL(SVT_UINT, dstSwizCount, false)); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, SVT_UINT, dstSwizCount, false)); bcatcstr(glsl, "("); TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); - bcatcstr(glsl, ") * 0xffffffffu) & "); + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, ") * 0xffffffffu) & "); + else + bcatcstr(glsl, ") * 0xffffffff) & "); TranslateOperand(&psInst->asOperands[otherOp], TO_FLAG_UNSIGNED_INTEGER, destMask); } @@ -2601,6 +2769,9 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals psContext->AddIndentation(); bcatcstr(glsl, "//ROUND_Z\n"); #endif + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + UseExtraFunctionDependency("trunc"); + CallHelper1("trunc", psInst, 0, 1, 1); break; } @@ -2610,6 +2781,10 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals psContext->AddIndentation(); bcatcstr(glsl, "//ROUND_NE\n"); #endif + + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + UseExtraFunctionDependency("roundEven"); + CallHelper1("roundEven", psInst, 0, 1, 1); break; } @@ -2628,7 +2803,10 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals psContext->AddIndentation(); bcatcstr(glsl, "//IMAX\n"); #endif - CallHelper2Int("max", psInst, 0, 1, 2, 1); + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + CallHelper2("max", psInst, 0, 1, 2, 1); + else + CallHelper2Int("max", psInst, 0, 1, 2, 1); break; } case OPCODE_UMAX: @@ -2637,7 +2815,10 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals psContext->AddIndentation(); bcatcstr(glsl, "//UMAX\n"); #endif - CallHelper2UInt("max", psInst, 0, 1, 2, 1); + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + CallHelper2("max", psInst, 0, 1, 2, 1); + else + CallHelper2UInt("max", psInst, 0, 1, 2, 1); break; } case OPCODE_MAX: @@ -2655,7 +2836,10 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals psContext->AddIndentation(); bcatcstr(glsl, "//IMIN\n"); #endif - CallHelper2Int("min", psInst, 0, 1, 2, 1); + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + CallHelper2("min", psInst, 0, 1, 2, 1); + else + CallHelper2Int("min", psInst, 0, 1, 2, 1); break; } case OPCODE_UMIN: @@ -2664,7 +2848,10 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals psContext->AddIndentation(); bcatcstr(glsl, "//UMIN\n"); #endif - CallHelper2UInt("min", psInst, 0, 1, 2, 1); + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + CallHelper2("min", psInst, 0, 1, 2, 1); + else + CallHelper2UInt("min", psInst, 0, 1, 2, 1); break; } case OPCODE_MIN: @@ -2912,12 +3099,12 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_BFI: { + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); uint32_t numelements_width = psInst->asOperands[1].GetNumSwizzleElements(); uint32_t numelements_offset = psInst->asOperands[2].GetNumSwizzleElements(); uint32_t numelements_dest = psInst->asOperands[0].GetNumSwizzleElements(); uint32_t numoverall_elements = std::min(std::min(numelements_width, numelements_offset), numelements_dest); - uint32_t i, j; - static const char* bfi_elementidx[] = { "x", "y", "z", "w" }; + uint32_t i, j, k; #ifdef _DEBUG psContext->AddIndentation(); bcatcstr(glsl, "//BFI\n"); @@ -2931,35 +3118,27 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals else bformata(glsl, "ivec%d(", numoverall_elements); - for (i = 0; i < numoverall_elements; ++i) + k = 0; + for (i = 0; i < 4; ++i) { + if ((destMask & (1 << i)) == 0) + continue; + + k++; bcatcstr(glsl, "bitfieldInsert("); for (j = 4; j >= 1; --j) { - uint32_t opSwizzleCount = psInst->asOperands[j].GetNumSwizzleElements(); - - if (opSwizzleCount != 1) - bcatcstr(glsl, " ("); - TranslateOperand(&psInst->asOperands[j], TO_FLAG_INTEGER); - if (opSwizzleCount != 1) - bformata(glsl, " ).%s", bfi_elementidx[i]); + TranslateOperand(&psInst->asOperands[j], TO_FLAG_INTEGER, 1 << i); if (j != 1) bcatcstr(glsl, ","); } bcatcstr(glsl, ") "); - if (i + 1 != numoverall_elements) + if (k != numoverall_elements) bcatcstr(glsl, ", "); } bcatcstr(glsl, ")"); - - if (numoverall_elements > 1) - { - bcatcstr(glsl, "."); - for (i = 0; i < numoverall_elements; ++i) - bformata(glsl, "%s", bfi_elementidx[i]); - } AddAssignPrologue(numParenthesis); break; } @@ -3154,6 +3333,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals else if (psInst->m_LoopInductors[1] != 0) { // Can emit as for + uint32_t typeFlags = TO_FLAG_INTEGER; bcatcstr(glsl, "for("); if (psInst->m_LoopInductors[0] != 0) { @@ -3167,6 +3347,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals break; case SVT_UINT: bcatcstr(glsl, "uint "); + typeFlags = TO_FLAG_UNSIGNED_INTEGER; break; default: ASSERT(0); @@ -3189,7 +3370,6 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals if (psInst->m_LoopInductors[1]->asOperands[1].eType == OPERAND_TYPE_IMMEDIATE32) negateOrder = true; - uint32_t typeFlags = TO_FLAG_INTEGER; const char *cmpOp = ""; switch (psInst->m_LoopInductors[1]->eOpcode) { @@ -3256,7 +3436,20 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } else { - bcatcstr(glsl, "while(true){\n"); + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + { + bstring name; + name = bformat(HLSLCC_TEMP_PREFIX "i_while_true_%d", m_NumDeclaredWhileTrueLoops++); + + // Workaround limitation with WebGL 1.0 GLSL, as we're expecting something to break the loop in any case + int hardcoded_iteration_limit = 0x7FFFFFFF; + + bformata(glsl, "for(int %s = 0 ; %s < 0x%X ; %s++){\n", name->data, name->data, hardcoded_iteration_limit, name->data); + } + else + { + bcatcstr(glsl, "while(true){\n"); + } ++psContext->indent; } break; @@ -3488,14 +3681,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, &psBinding); - if (psInst->bAddressOffset) - { - TranslateTexelFetchOffset(psInst, psBinding, glsl); - } - else - { - TranslateTexelFetch(psInst, psBinding, glsl); - } + TranslateTexelFetch(psInst, psBinding, glsl); break; } case OPCODE_DISCARD: @@ -3859,9 +4045,9 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals #endif psContext->AddIndentation(); AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, srcElemCount, &numParenthesis); - bcatcstr(glsl, GetConstructorForTypeGLSL(SVT_FLOAT, destElemCount, false)); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, SVT_FLOAT, destElemCount, false)); bcatcstr(glsl, "(1.0) / "); - bcatcstr(glsl, GetConstructorForTypeGLSL(SVT_FLOAT, destElemCount, false)); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, SVT_FLOAT, destElemCount, false)); bcatcstr(glsl, "("); numParenthesis++; TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE, psInst->asOperands[0].GetAccessMask()); @@ -3870,71 +4056,56 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_F32TOF16: { - const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); - const uint32_t s0ElemCount = psInst->asOperands[1].GetNumSwizzleElements(); - uint32_t destElem; + uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); + #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//F32TOF16\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "//F32TOF16\n"); #endif - for (destElem = 0; destElem < destElemCount; ++destElem) - { - const char* swizzle[] = { ".x", ".y", ".z", ".w" }; - //unpackHalf2x16 converts two f16s packed into uint to two f32s. + for (int i = 0; i < 4; i++) + { + if ((writeMask & (1 << i)) == 0) + continue; + psContext->AddIndentation(); + psInst->asOperands[0].ui32CompMask = (1 << i); + psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); - //dest.swiz.x = unpackHalf2x16(src.swiz.x).x - //dest.swiz.y = unpackHalf2x16(src.swiz.y).x - //dest.swiz.z = unpackHalf2x16(src.swiz.z).x - //dest.swiz.w = unpackHalf2x16(src.swiz.w).x + bcatcstr(glsl, "packHalf2x16(vec2("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE, (1 << i)); + bcatcstr(glsl, ", 0.0))"); + AddAssignPrologue(numParenthesis); - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - if (destElemCount > 1) - bcatcstr(glsl, swizzle[destElem]); - - bcatcstr(glsl, " = unpackHalf2x16("); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); - if (s0ElemCount > 1) - bcatcstr(glsl, swizzle[destElem]); - bcatcstr(glsl, ").x;\n"); - - } - break; + } + break; } case OPCODE_F16TOF32: { - const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); - const uint32_t s0ElemCount = psInst->asOperands[1].GetNumSwizzleElements(); - uint32_t destElem; + uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); + #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//F16TOF32\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "//F16TOF32\n"); #endif - for (destElem = 0; destElem < destElemCount; ++destElem) - { - const char* swizzle[] = { ".x", ".y", ".z", ".w" }; - //packHalf2x16 converts two f32s to two f16s packed into a uint. + for (int i = 0; i < 4; i++) + { + if ((writeMask & (1 << i)) == 0) + continue; + psContext->AddIndentation(); + psInst->asOperands[0].ui32CompMask = (1 << i); + psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, &numParenthesis); - //dest.swiz.x = packHalf2x16(vec2(src.swiz.x)) & 0xFFFF - //dest.swiz.y = packHalf2x16(vec2(src.swiz.y)) & 0xFFFF - //dest.swiz.z = packHalf2x16(vec2(src.swiz.z)) & 0xFFFF - //dest.swiz.w = packHalf2x16(vec2(src.swiz.w)) & 0xFFFF + bcatcstr(glsl, "unpackHalf2x16("); + TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_UINT, (1 << i)); + bcatcstr(glsl, ").x"); + AddAssignPrologue(numParenthesis); + } + break; - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_UNSIGNED_INTEGER); - if (destElemCount > 1) - bcatcstr(glsl, swizzle[destElem]); - bcatcstr(glsl, " = packHalf2x16(vec2("); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); - if (s0ElemCount > 1) - bcatcstr(glsl, swizzle[destElem]); - bcatcstr(glsl, ")) & 0xFFFF;\n"); - - } - break; } case OPCODE_INEG: { @@ -4063,16 +4234,30 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals case OPCODE_NOT: { #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//INOT\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "//NOT\n"); #endif - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); + if (!HaveNativeBitwiseOps(psContext->psShader->eTargetLanguage)) + { + UseExtraFunctionDependency("op_not"); - bcatcstr(glsl, "~"); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); - AddAssignPrologue(numParenthesis); - break; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); + bcatcstr(glsl, "op_not("); + numParenthesis++; + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + } + else + { + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); + + bcatcstr(glsl, "~"); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + } + break; } case OPCODE_XOR: { @@ -4086,16 +4271,18 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals case OPCODE_RESINFO: { - uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); uint32_t destElem; + uint32_t mask = psInst->asOperands[0].GetAccessMask(); + #ifdef _DEBUG psContext->AddIndentation(); bcatcstr(glsl, "//RESINFO\n"); #endif - for (destElem = 0; destElem < destElemCount; ++destElem) + for (destElem = 0; destElem < 4; ++destElem) { - GetResInfoData(psInst, psInst->asOperands[2].aui32Swizzle[destElem], destElem); + if (1 << destElem & mask) + GetResInfoData(psInst, psInst->asOperands[2].aui32Swizzle[destElem], destElem); } break; @@ -4121,9 +4308,21 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals #endif const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_FLOAT ? SVT_FLOAT : SVT_UINT, 1, &numParenthesis); bcatcstr(glsl, "textureSamples("); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_NAME_ONLY); + std::string texName = ResourceName(psContext, RGROUP_TEXTURE, psInst->asOperands[1].ui32RegisterNumber, 0); + if (psContext->IsVulkan()) + { + std::string vulkanSamplerName = GetVulkanDummySamplerName(); + + const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; + std::string smpType = GetSamplerType(psContext, eResDim, psInst->asOperands[2].ui32RegisterNumber); + std::ostringstream oss; + oss << smpType; + oss << "(" << texName << ", " << vulkanSamplerName << ")"; + texName = oss.str(); + } + bcatcstr(glsl, texName.c_str()); bcatcstr(glsl, ")"); AddAssignPrologue(numParenthesis); break; diff --git a/src/toGLSLOperand.cpp b/src/toGLSLOperand.cpp index 9480bc6..4cb2f03 100644 --- a/src/toGLSLOperand.cpp +++ b/src/toGLSLOperand.cpp @@ -1,6 +1,7 @@ #include "internal_includes/toGLSLOperand.h" #include "internal_includes/HLSLccToolkit.h" #include "internal_includes/HLSLCrossCompilerContext.h" +#include "internal_includes/languages.h" #include "bstrlib.h" #include "hlslcc.h" #include "internal_includes/debug.h" @@ -13,7 +14,6 @@ #include #include -#include using namespace HLSLcc; @@ -25,6 +25,8 @@ using namespace HLSLcc; #endif #endif // #ifndef fpcheck +// In case we need to fake dynamic indexing +static const char *squareBrackets[2][2] = { { "DynamicIndex(", ")" }, { "[", "]" } }; // Returns nonzero if types are just different precisions of the same underlying type static bool AreTypesCompatible(SHADER_VARIABLE_TYPE a, uint32_t ui32TOFlag) @@ -359,7 +361,7 @@ static void printImmediate32(HLSLCrossCompilerContext *psContext, uint32_t value int needsParenthesis = 0; // Print floats as bit patterns. - if ((eType == SVT_FLOAT || eType == SVT_FLOAT16 || eType == SVT_FLOAT10) && psContext->psShader->ui32MajorVersion > 3 && fpcheck(*((float *)(&value)))) + if ((eType == SVT_FLOAT || eType == SVT_FLOAT16 || eType == SVT_FLOAT10) && psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage) && fpcheck(*((float *)(&value)))) { if (psContext->psShader->eTargetLanguage == LANG_METAL) bcatcstr(glsl, "as_type("); @@ -376,9 +378,18 @@ static void printImmediate32(HLSLCrossCompilerContext *psContext, uint32_t value case SVT_INT: case SVT_INT16: case SVT_INT12: + // Adreno bug (happens only on android 4.* GLES3) casting unsigned representation of negative values to signed int + // results in undefined value/fails to link shader, need to print as signed decimal + if (value > 0x7fffffff && psContext->psShader->eTargetLanguage == LANG_ES_300) + bformata(glsl, "%i", (int32_t)value); // Need special handling for anything >= uint 0x3fffffff - if (value > 0x3ffffffe) - bformata(glsl, "int(0x%Xu)", value); + else if (value > 0x3ffffffe) + { + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bformata(glsl, "int(0x%Xu)", value); + else + bformata(glsl, "0x%X", value); + } else if(value <= 1024) // Print anything below 1024 as decimal, and hex after that bformata(glsl, "%d", value); else @@ -412,6 +423,77 @@ void ToGLSL::TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui TranslateVariableNameWithMask(*psContext->currentGLSLString, psOperand, ui32TOFlag, pui32IgnoreSwizzle, ui32CompMask, piRebase); } +void ToGLSL::DeclareDynamicIndexWrapper(const struct ShaderVarType* psType) +{ + DeclareDynamicIndexWrapper(psType->name.c_str(), psType->Class, psType->Type, psType->Rows, psType->Columns, psType->Elements); +} + +void ToGLSL::DeclareDynamicIndexWrapper(const char* psName, SHADER_VARIABLE_CLASS eClass, SHADER_VARIABLE_TYPE eType, uint32_t ui32Rows, uint32_t ui32Columns, uint32_t ui32Elements) +{ + bstring glsl = psContext->beforeMain; + + const char* suffix = "DynamicIndex"; + const uint32_t maxElemCount = 256; + uint32_t elemCount = ui32Elements; + + if (m_FunctionDefinitions.find(psName) != m_FunctionDefinitions.end()) + return; + + // Add a simple define that one can search and replace on devices that support dynamic indexing the usual way + if (m_FunctionDefinitions.find(suffix) == m_FunctionDefinitions.end()) + { + m_FunctionDefinitions.insert(std::make_pair(suffix, "#define UNITY_DYNAMIC_INDEX_ES2 0\n")); + } + + bcatcstr(glsl, "\n"); + + if (eClass == SVC_STRUCT) + { + bformata(glsl, "%s_Type %s%s", psName, psName, suffix); + } + else if(eClass == SVC_MATRIX_COLUMNS || eClass == SVC_MATRIX_ROWS) + { + if (psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) + { + // Translate matrices into vec4 arrays + bformata(glsl, "%s " HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "%s%s", HLSLcc::GetConstructorForType(psContext, eType, 4), ui32Rows, ui32Columns, psName, suffix); + elemCount = (eClass == SVC_MATRIX_COLUMNS ? ui32Columns : ui32Rows); + if (ui32Elements > 1) + { + elemCount *= ui32Elements; + } + } + else + { + bformata(glsl, "%s %s%s", HLSLcc::GetMatrixTypeName(psContext, eType, ui32Columns, ui32Rows).c_str(), psName, suffix); + } + } + else if (eClass == SVC_VECTOR && ui32Columns > 1) + { + bformata(glsl, "%s %s%s", HLSLcc::GetConstructorForType(psContext, eType, ui32Columns), psName, suffix); + } + else if ((eClass == SVC_SCALAR) || (eClass == SVC_VECTOR && ui32Columns == 1)) + { + bformata(glsl, "%s %s%s", HLSLcc::GetConstructorForType(psContext, eType, 1), psName, suffix); + } + bformata(glsl, "(int i){\n"); + bcatcstr(glsl, "#if UNITY_DYNAMIC_INDEX_ES2\n"); + bformata(glsl, " return %s[i];\n", psName); + bcatcstr(glsl, "#else\n"); + bformata(glsl, "#define d_ar %s\n", psName); + bformata(glsl, " if (i <= 0) return d_ar[0];"); + + // Let's draw a line somewhere with this workaround + for (int i = 1; i < std::min(elemCount, maxElemCount); i++) { + bformata(glsl, " else if (i == %d) return d_ar[%d];", i, i); + } + bformata(glsl, "\n return d_ar[0];\n"); + bformata(glsl, "#undef d_ar\n"); + bcatcstr(glsl, "#endif\n"); + bformata(glsl, "}\n\n"); + m_FunctionDefinitions.insert(std::make_pair(psName, "")); +} + void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase) { int numParenthesis = 0; @@ -498,7 +580,7 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan if (AreTypesCompatible(eType, ui32TOFlag) == 0) { - if (CanDoDirectCast(eType, requestedType)) + if (CanDoDirectCast(psContext, eType, requestedType) || !HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) { bformata(glsl, "%s(", GetConstructorForType(psContext, requestedType, requestedComponents, false)); numParenthesis++; @@ -635,8 +717,10 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan psContext->psShader->eShaderType == PIXEL_SHADER && psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH) { - if(name == "vs_SV_Target0") - bcatcstr(glsl, "SV_Target0"); + // With ES2, leave separate variable names for input + if (!WriteToFragData(psContext->psShader->eTargetLanguage) && + name.size() == 13 && !strncmp(name.c_str(), "vs_SV_Target", 12)) + bcatcstr(glsl, name.substr(3).c_str()); else bcatcstr(glsl, name.c_str()); } @@ -674,6 +758,13 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan break; } case OPERAND_TYPE_OUTPUT_DEPTH: + if (psContext->psShader->eTargetLanguage == LANG_ES_100 && !psContext->EnableExtension("GL_EXT_frag_depth")) + { + bcatcstr(psContext->extensions, "#ifdef GL_EXT_frag_depth\n"); + bcatcstr(psContext->extensions, "#define gl_FragDepth gl_FragDepthEXT\n"); + bcatcstr(psContext->extensions, "#endif\n"); + } + // fall through case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: { @@ -819,6 +910,7 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan int32_t index = -1; std::vector arrayIndices; bool isArray = false; + bool isSubpassMS = false; psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); switch(psContext->psShader->eShaderType) @@ -947,7 +1039,7 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan if (eType != SVT_INT && eType != SVT_UINT) opFlags = TO_AUTO_BITCAST_TO_INT; - TranslateOperand(dynamicIndex, psDynIndexOp, opFlags); + TranslateOperand(dynamicIndex, psDynIndexOp, opFlags, 0x1); // We only care about the first component } char *tmp = bstr2cstr(dynamicIndex, '\0'); @@ -966,6 +1058,10 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan bformata(glsl, "%s.", instanceName.c_str()); } + // Special hack for MSAA subpass inputs: the index is actually the sample index, so do special handling later. + if (strncmp(fullName.c_str(), "subpassLoad", 11) == 0 && fullName[fullName.length() - 1] == ',') + isSubpassMS = true; + if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS))) { // We'll need to add the prefix only to the last section of the name @@ -1017,6 +1113,10 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags); std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); + // Special hack for MSAA subpass inputs: the index is actually the sample index, so do special handling later. + if (strncmp(fullName.c_str(), "subpassLoad", 11) == 0 && fullName[fullName.length() - 1] == ',') + isSubpassMS = true; + if (tmpVarType->Class == SVC_SCALAR) { bformata(glsl, "%s%s", instanceNamePrefix.c_str(), fullName.c_str()); @@ -1056,15 +1156,23 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan else // hasImmediateStr fullIndexOss << index; + int squareBracketType = hasDynamicIndex ? HaveDynamicIndexing(psContext, psOperand) : 1; + + if (!squareBracketType) + DeclareDynamicIndexWrapper(psVarType); + if (((psVarType->Class == SVC_MATRIX_COLUMNS) || (psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) { // Special handling for old matrix arrays - bformata(glsl, "[%s / 4]", fullIndexOss.str().c_str()); - bformata(glsl, "[%s %% 4]", fullIndexOss.str().c_str()); + bformata(glsl, "%%s / 4%s", squareBrackets[squareBracketType][0], fullIndexOss.str().c_str(), squareBrackets[squareBracketType][1]); + bformata(glsl, "%s%s %% 4%s", squareBrackets[squareBracketType][0], fullIndexOss.str().c_str(), squareBrackets[squareBracketType][1]); } else // This path is atm the default { - bformata(glsl, "[%s]", fullIndexOss.str().c_str()); + if(isSubpassMS) + bformata(glsl, "%s%s%s", " ", fullIndexOss.str().c_str(), ")"); + else + bformata(glsl, "%s%s%s", squareBrackets[squareBracketType][0], fullIndexOss.str().c_str(), squareBrackets[squareBracketType][1]); } } } @@ -1155,15 +1263,25 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan } case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: { - bformata(glsl, "ImmCB_%d_%d_%d", psContext->currentPhase, psOperand->ui32RegisterNumber, psOperand->m_Rebase); - if(psOperand->m_SubOperands[0].get()) + if (psContext->IsVulkan()) { - bcatcstr(glsl, "["); //Indexes must be integral. Offset is already taken care of above. - TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER); - bcatcstr(glsl, "]"); + bformata(glsl, "ImmCB_%d", psContext->currentPhase); + TranslateOperandIndex(psOperand, 0); + } + else + { + int squareBracketType = HaveDynamicIndexing(psContext, psOperand); + + bformata(glsl, "ImmCB_%d_%d_%d", psContext->currentPhase, psOperand->ui32RegisterNumber, psOperand->m_Rebase); + if (psOperand->m_SubOperands[0].get()) + { + bformata(glsl, "%s", squareBrackets[squareBracketType][0]); //Indexes must be integral. Offset is already taken care of above. + TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER); + bformata(glsl, "%s", squareBrackets[squareBracketType][1]); + } + if (psOperand->m_Size == 1) + *pui32IgnoreSwizzle = 1; } - if (psOperand->m_Size == 1) - *pui32IgnoreSwizzle = 1; break; } case OPERAND_TYPE_INPUT_DOMAIN_POINT: @@ -1322,6 +1440,10 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan bcatcstr(glsl, "gl_ClipDistance"); *pui32IgnoreSwizzle = 1; break; + case NAME_CULL_DISTANCE: + bcatcstr(glsl, "gl_CullDistance"); + *pui32IgnoreSwizzle = 1; + break; case NAME_VIEWPORT_ARRAY_INDEX: bcatcstr(glsl, "gl_ViewportIndex"); *pui32IgnoreSwizzle = 1; @@ -1341,7 +1463,10 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan *pui32IgnoreSwizzle = 1; break; case NAME_IS_FRONT_FACE: - bcatcstr(glsl, "(gl_FrontFacing ? 0xffffffffu : uint(0))"); + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, "(gl_FrontFacing ? 0xffffffffu : uint(0))"); + else + bcatcstr(glsl, "(gl_FrontFacing ? int(1) : int(0))"); *pui32IgnoreSwizzle = 1; break; case NAME_PRIMITIVE_ID: @@ -1414,7 +1539,7 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan if (hasCtor && (*pui32IgnoreSwizzle == 0)) { - TranslateOperandSwizzleWithMask(psContext, psOperand, ui32CompMask, piRebase ? *piRebase : 0); + TranslateOperandSwizzleWithMask(glsl, psContext, psOperand, ui32CompMask, piRebase ? *piRebase : 0); *pui32IgnoreSwizzle = 1; } @@ -1423,7 +1548,13 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan if (requestedType == SVT_UINT || requestedType == SVT_UINT16 || requestedType == SVT_UINT8) bcatcstr(glsl, ") * 0xffffffffu"); else - bcatcstr(glsl, ") * int(0xffffffffu)"); + { + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, ") * int(0xffffffffu)"); + else + bcatcstr(glsl, ") * int(0xffffffff)"); + } + numParenthesis--; } @@ -1453,6 +1584,12 @@ void ToGLSL::TranslateOperand(bstring glsl, const Operand* psOperand, uint32_t u ui32TOFlag &= ~(TO_AUTO_BITCAST_TO_FLOAT|TO_AUTO_BITCAST_TO_INT|TO_AUTO_BITCAST_TO_UINT); } + if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) && (ui32TOFlag & TO_FLAG_UNSIGNED_INTEGER)) + { + ui32TOFlag &= ~TO_FLAG_UNSIGNED_INTEGER; + ui32TOFlag |= TO_FLAG_INTEGER; + } + if(ui32TOFlag & TO_FLAG_NAME_ONLY) { TranslateVariableNameWithMask(glsl, psOperand, ui32TOFlag, &ui32IgnoreSwizzle, OPERAND_4_COMPONENT_MASK_ALL, &iRebase); @@ -1560,14 +1697,23 @@ std::string ResourceName(HLSLCrossCompilerContext* psContext, ResourceGroup grou { oss << name; } - if (((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) && group == RGROUP_UAV) + if (psContext->IsVulkan() && group == RGROUP_UAV) oss << "_origX" << ui32RegisterNumber << "X"; + } else { oss << "UnknownResource" << ui32RegisterNumber; } - return oss.str(); + std::string res = oss.str(); + // Prefix sampler names with 'sampler' unless it already starts with it + if (group == RGROUP_SAMPLER) + { + if (strncmp(res.c_str(), "sampler", 7) != 0) + res.insert(0, "sampler"); + } + + return res; } void ResourceName(bstring targetStr, HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare) { diff --git a/src/toMetal.cpp b/src/toMetal.cpp index 1fb0ad1..e8d1cff 100644 --- a/src/toMetal.cpp +++ b/src/toMetal.cpp @@ -25,10 +25,10 @@ static void PrintStructDeclaration(HLSLCrossCompilerContext *psContext, bstring bformata(glsl, "struct %s\n{\n", sname.c_str()); psContext->indent++; - std::for_each(d.m_Members.begin(), d.m_Members.end(), [&psContext, &glsl](std::string &mem) + std::for_each(d.m_Members.begin(), d.m_Members.end(), [&psContext, &glsl](const MemberDefinitions::value_type &mem) { psContext->AddIndentation(); - bcatcstr(glsl, mem.c_str()); + bcatcstr(glsl, mem.second.c_str()); bcatcstr(glsl, ";\n"); }); @@ -36,10 +36,10 @@ static void PrintStructDeclaration(HLSLCrossCompilerContext *psContext, bstring bcatcstr(glsl, "};\n\n"); } -void ToMetal::PrintStructDeclarations(StructDefinitions &defs) +void ToMetal::PrintStructDeclarations(StructDefinitions &defs, const char *name) { bstring glsl = *psContext->currentGLSLString; - StructDefinition &args = defs[""]; + StructDefinition &args = defs[name]; std::for_each(args.m_Dependencies.begin(), args.m_Dependencies.end(), [this, glsl, &defs](std::string &sname) { PrintStructDeclaration(psContext, glsl, sname, defs); @@ -47,11 +47,40 @@ void ToMetal::PrintStructDeclarations(StructDefinitions &defs) } +static const char * GetPhaseFuncName(SHADER_PHASE_TYPE eType) +{ + switch (eType) + { + default: + case MAIN_PHASE: return ""; + case HS_GLOBAL_DECL_PHASE: return "hs_global_decls"; + case HS_FORK_PHASE: return "fork_phase"; + case HS_CTRL_POINT_PHASE: return "control_point_phase"; + case HS_JOIN_PHASE: return "join_phase"; + } +} + +static void DoHullShaderPassthrough(HLSLCrossCompilerContext *psContext) +{ + uint32_t i; + bstring glsl = *psContext->currentGLSLString; + + for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++) + { + const ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; + + psContext->AddIndentation(); + bformata(glsl, "%s%s%d = %scp[controlPointID].%s%d;\n", psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); + } +} + bool ToMetal::Translate() { bstring glsl; uint32_t i; Shader* psShader = psContext->psShader; + uint32_t ui32Phase; + psContext->psTranslator = this; SetIOPrefixes(); @@ -79,35 +108,261 @@ bool ToMetal::Translate() psContext->ClearDependencyData(); + const SHADER_PHASE_TYPE ePhaseFuncCallOrder[3] = { HS_CTRL_POINT_PHASE, HS_FORK_PHASE, HS_JOIN_PHASE }; + uint32_t ui32PhaseCallIndex; + int hasControlPointPhase = 0; + + const int maxThreadsPerThreadGroup = 32; + int numPatchesInThreadGroup = 0; + bool hasControlPoint = false; + bool hasPatchConstant = false; + std::string tessVertexFunctionArguments; + + if ((psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0) + { + if (psContext->psDependencies) + { + m_StructDefinitions[""].m_Members = psContext->psDependencies->m_SharedFunctionMembers; + m_TextureSlots = psContext->psDependencies->m_SharedTextureSlots; + m_SamplerSlots = psContext->psDependencies->m_SharedSamplerSlots; + m_BufferSlots = psContext->psDependencies->m_SharedBufferSlots; + hasControlPoint = psContext->psDependencies->hasControlPoint; + hasPatchConstant = psContext->psDependencies->hasPatchConstant; + } + } + ClampPartialPrecisions(); - ShaderPhase &phase = psShader->asPhases[0]; - phase.UnvectorizeImmMoves(); - psContext->DoDataTypeAnalysis(&phase); - phase.ResolveUAVProperties(); - ReserveUAVBindingSlots(&phase); // TODO: unify slot allocation code between gl/metal/vulkan - phase.PruneConstArrays(); - HLSLcc::DoLoopTransform(phase); + for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + ShaderPhase &phase = psShader->asPhases[ui32Phase]; + phase.UnvectorizeImmMoves(); + psContext->DoDataTypeAnalysis(&phase); + phase.ResolveUAVProperties(); + ReserveUAVBindingSlots(&phase); // TODO: unify slot allocation code between gl/metal/vulkan + HLSLcc::DoLoopTransform(psContext, phase); + } psShader->PruneTempRegisters(); - bcatcstr(glsl, "#include \n#include \nusing namespace metal;\n"); + //Special case. Can have multiple phases. + if(psShader->eShaderType == HULL_SHADER) + { + psShader->ConsolidateHullTempVars(); - for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i) - TranslateDeclaration(&psShader->asPhases[0].psDecl[i]); + // Find out if we have a passthrough hull shader + for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) + hasControlPointPhase = 1; + } + } - // Output default implementations for framebuffer index remap if needed - if(m_NeedFBOutputRemapDecl) - bcatcstr(glsl, "#ifndef XLT_REMAP_O\n#define XLT_REMAP_O {0, 1, 2, 3, 4, 5, 6, 7}\n#endif\nconstexpr constant uint xlt_remap_o[] = XLT_REMAP_O;\n"); - if(m_NeedFBInputRemapDecl) - bcatcstr(glsl, "#ifndef XLT_REMAP_I\n#define XLT_REMAP_I {0, 1, 2, 3, 4, 5, 6, 7}\n#endif\nconstexpr constant uint xlt_remap_i[] = XLT_REMAP_I;\n"); - - DeclareClipPlanes(&psShader->asPhases[0].psDecl[0], psShader->asPhases[0].psDecl.size()); - GenerateTexturesReflection(&psContext->m_Reflection); + // Hull and Domain shaders get merged into vertex shader output + if (!(psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER)) + { + if (psContext->flags & HLSLCC_FLAG_DISABLE_FASTMATH) + bcatcstr(glsl, "#define UNITY_DISABLE_FASTMATH\n"); + bcatcstr(glsl, "#include \n#include \nusing namespace metal;\n"); + bcatcstr(glsl, "\n#if !(__HAVE_FMA__)\n#define fma(a,b,c) ((a) * (b) + (c))\n#endif\n\n"); + } + + if (psShader->eShaderType == HULL_SHADER) + { + psContext->indent++; + + // Phase 1 is always the global decls phase, no instructions + for(i=0; i < psShader->asPhases[1].psDecl.size(); ++i) + { + TranslateDeclaration(&psShader->asPhases[1].psDecl[i]); + } + + if (hasControlPointPhase == 0) + { + DeclareHullShaderPassthrough(); + } + + for(ui32PhaseCallIndex=0; ui32PhaseCallIndex<3; ui32PhaseCallIndex++) + { + for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; + if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex]) + continue; + psContext->currentPhase = ui32Phase; + +#ifdef _DEBUG + // bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase)); +#endif + for (i = 0; i < psPhase->psDecl.size(); ++i) + { + TranslateDeclaration(&psPhase->psDecl[i]); + } + } + } + + psContext->indent--; + + numPatchesInThreadGroup = maxThreadsPerThreadGroup / std::max(psShader->sInfo.ui32TessInputControlPointCount, psShader->sInfo.ui32TessOutputControlPointCount); + } + else + { + for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i) + TranslateDeclaration(&psShader->asPhases[0].psDecl[i]); + + // Output default implementations for framebuffer index remap if needed + if (m_NeedFBOutputRemapDecl) + bcatcstr(glsl, "#ifndef XLT_REMAP_O\n\t#define XLT_REMAP_O {0, 1, 2, 3, 4, 5, 6, 7}\n#endif\nconstexpr constant uint xlt_remap_o[] = XLT_REMAP_O;\n"); + if (m_NeedFBInputRemapDecl) + bcatcstr(glsl, "#ifndef XLT_REMAP_I\n\t#define XLT_REMAP_I {0, 1, 2, 3, 4, 5, 6, 7}\n#endif\nconstexpr constant uint xlt_remap_i[] = XLT_REMAP_I;\n"); + + DeclareClipPlanes(&psShader->asPhases[0].psDecl[0], psShader->asPhases[0].psDecl.size()); + GenerateTexturesReflection(&psContext->m_Reflection); + } + + if (psShader->eShaderType == HULL_SHADER) + { + psContext->currentPhase = MAIN_PHASE; + + if (m_StructDefinitions["Mtl_ControlPoint"].m_Members.size() > 0) + { + hasControlPoint = true; + + m_StructDefinitions["Mtl_ControlPoint"].m_Dependencies.push_back("Mtl_ControlPoint"); + m_StructDefinitions["Mtl_ControlPointIn"].m_Dependencies.push_back("Mtl_ControlPointIn"); + PrintStructDeclarations(m_StructDefinitions, "Mtl_ControlPoint"); + PrintStructDeclarations(m_StructDefinitions, "Mtl_ControlPointIn"); + } + + if (m_StructDefinitions["Mtl_PatchConstant"].m_Members.size() > 0) + { + hasPatchConstant = true; + + m_StructDefinitions["Mtl_PatchConstant"].m_Dependencies.push_back("Mtl_PatchConstant"); + m_StructDefinitions["Mtl_PatchConstantIn"].m_Dependencies.push_back("Mtl_PatchConstantIn"); + PrintStructDeclarations(m_StructDefinitions, "Mtl_PatchConstant"); + PrintStructDeclarations(m_StructDefinitions, "Mtl_PatchConstantIn"); + } + + m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.push_back(std::make_pair("numPatches", "uint numPatches")); + m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.push_back(std::make_pair("numControlPointsPerPatch", "ushort numControlPointsPerPatch")); + + if (m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.size() > 0) + { + m_StructDefinitions["Mtl_KernelPatchInfo"].m_Dependencies.push_back("Mtl_KernelPatchInfo"); + PrintStructDeclarations(m_StructDefinitions, "Mtl_KernelPatchInfo"); + } + + if (m_StructDefinitions[GetInputStructName()].m_Members.size() > 0) + { + m_StructDefinitions[GetInputStructName()].m_Dependencies.push_back(GetInputStructName()); + + // Hack, we're reusing Mtl_VertexOut as an hull shader input array, so no need to declare original contents + m_StructDefinitions[GetInputStructName()].m_Members.clear(); + + bstring vertexOut = bfromcstr(""); + bformata(vertexOut, "Mtl_VertexOut cp[%d]", psShader->sInfo.ui32TessOutputControlPointCount); + m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("cp", (const char *) vertexOut->data)); + bdestroy(vertexOut); + } + + if(psContext->psDependencies) + { + for (auto itr = psContext->psDependencies->m_SharedFunctionMembers.begin(); itr != psContext->psDependencies->m_SharedFunctionMembers.end(); itr++) + { + tessVertexFunctionArguments += itr->first.c_str(); + tessVertexFunctionArguments += ", "; + } + } + } + + if (psShader->eShaderType == DOMAIN_SHADER) + { + // For preserving data layout, reuse Mtl_ControlPoint/Mtl_PatchConstant from hull shader + if (hasControlPoint) + m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("cp", "patch_control_point cp")); + if (hasPatchConstant) + m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("patch", "Mtl_PatchConstantIn patch")); + } + + if ((psShader->eShaderType == VERTEX_SHADER || psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0) + { + if (psContext->psDependencies) + { + psContext->psDependencies->m_SharedFunctionMembers = m_StructDefinitions[""].m_Members; + psContext->psDependencies->m_SharedTextureSlots = m_TextureSlots; + psContext->psDependencies->m_SharedTextureSlots.SaveTotalShaderStageAllocationsCount(); + psContext->psDependencies->m_SharedSamplerSlots = m_SamplerSlots; + psContext->psDependencies->m_SharedSamplerSlots.SaveTotalShaderStageAllocationsCount(); + psContext->psDependencies->m_SharedBufferSlots = m_BufferSlots; + psContext->psDependencies->m_SharedBufferSlots.SaveTotalShaderStageAllocationsCount(); + } + } if (m_StructDefinitions[GetInputStructName()].m_Members.size() > 0) { - m_StructDefinitions[""].m_Members.push_back(GetInputStructName() + " input [[ stage_in ]]"); + if (psShader->eShaderType == HULL_SHADER) + { + m_StructDefinitions[""].m_Members.push_back(std::make_pair("vertexInput", "Mtl_VertexIn vertexInput [[ stage_in ]]")); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("tID", "uint2 tID [[ thread_position_in_grid ]]")); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("groupID", "ushort2 groupID [[ threadgroup_position_in_grid ]]")); + + bstring buffer = bfromcstr(""); + uint32_t slot = 0; + + if (hasControlPoint) + { + slot = m_BufferSlots.GetBindingSlot(0xffff - 1, BindingSlotAllocator::ConstantBuffer); + bformata(buffer, "device Mtl_ControlPoint *controlPoints [[ buffer(%d) ]]", slot); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("controlPoints", (const char *) buffer->data)); + btrunc(buffer, 0); + } + + if (hasPatchConstant) + { + slot = m_BufferSlots.GetBindingSlot(0xffff - 2, BindingSlotAllocator::ConstantBuffer); + bformata(buffer, "device Mtl_PatchConstant *patchConstants [[ buffer(%d) ]]", slot); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("patchConstants", (const char *) buffer->data)); + btrunc(buffer, 0); + } + + slot = m_BufferSlots.GetBindingSlot(0xffff - 3, BindingSlotAllocator::ConstantBuffer); + bformata(buffer, "device %s *tessFactors [[ buffer(%d) ]]", psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "MTLQuadTessellationFactorsHalf" : "MTLTriangleTessellationFactorsHalf", slot); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("tessFactors", (const char *) buffer->data)); + btrunc(buffer, 0); + + slot = m_BufferSlots.GetBindingSlot(0xffff - 4, BindingSlotAllocator::ConstantBuffer); + bformata(buffer, "constant Mtl_KernelPatchInfo &patchInfo [[ buffer(%d) ]]", slot); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("patchInfo", (const char *) buffer->data)); + btrunc(buffer, 0); + + bdestroy(buffer); + } + else if (psShader->eShaderType == VERTEX_SHADER && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0) + { + m_StructDefinitions[""].m_Members.push_back(std::make_pair("input", GetInputStructName() + " input")); + } + else + { + m_StructDefinitions[""].m_Members.push_back(std::make_pair("input", GetInputStructName() + " input [[ stage_in ]]")); + } + + if ((psShader->eShaderType == VERTEX_SHADER || psShader->eShaderType == HULL_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0) + { + // m_StructDefinitions is inherited between tessellation shader stages but some builtins need exceptions + std::for_each(m_StructDefinitions[""].m_Members.begin(), m_StructDefinitions[""].m_Members.end(), [&psShader](MemberDefinitions::value_type &mem) + { + if (mem.first == "mtl_InstanceID") + { + if (psShader->eShaderType == VERTEX_SHADER) + mem.second.assign("uint mtl_InstanceID"); + else if (psShader->eShaderType == HULL_SHADER) + mem.second.assign("// mtl_InstanceID passed through groupID"); + } + }); + + } + m_StructDefinitions[""].m_Dependencies.push_back(GetInputStructName()); } @@ -123,55 +378,285 @@ bool ToMetal::Translate() psContext->currentGLSLString = &bodyglsl; + bool popPragmaDiagnostic = false; + if (psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) + { + popPragmaDiagnostic = true; + + bcatcstr(bodyglsl, "#pragma clang diagnostic push\n"); + bcatcstr(bodyglsl, "#pragma clang diagnostic ignored \"-Wunused-parameter\"\n"); + } + switch (psShader->eShaderType) { case VERTEX_SHADER: - bcatcstr(bodyglsl, "vertex Mtl_VertexOut xlatMtlMain(\n"); + if ((psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) == 0) + bcatcstr(bodyglsl, "vertex Mtl_VertexOut xlatMtlMain(\n"); + else + bcatcstr(bodyglsl, "static Mtl_VertexOut vertexFunction(\n"); break; case PIXEL_SHADER: + if (psShader->sInfo.bEarlyFragmentTests) + bcatcstr(bodyglsl, "[[early_fragment_tests]]\n"); bcatcstr(bodyglsl, "fragment Mtl_FragmentOut xlatMtlMain(\n"); break; case COMPUTE_SHADER: bcatcstr(bodyglsl, "kernel void computeMain(\n"); break; + case HULL_SHADER: + bcatcstr(bodyglsl, "kernel void patchKernel(\n"); + break; + case DOMAIN_SHADER: + { + const char *patchType = psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "quad" : "triangle"; + uint32_t patchCount = psShader->sInfo.ui32TessOutputControlPointCount; + bformata(bodyglsl, "[[patch(%s, %d)]] vertex Mtl_VertexOutPostTess xlatMtlMain(\n", patchType, patchCount); + break; + } default: // Not supported ASSERT(0); return false; } + psContext->indent++; - for (auto itr = m_StructDefinitions[""].m_Members.begin(); itr != m_StructDefinitions[""].m_Members.end(); itr++) + for (auto itr = m_StructDefinitions[""].m_Members.begin(); ;) { + if (itr == m_StructDefinitions[""].m_Members.end()) + break; + psContext->AddIndentation(); - bcatcstr(bodyglsl, itr->c_str()); - if (itr + 1 != m_StructDefinitions[""].m_Members.end()) + bcatcstr(bodyglsl, itr->second.c_str()); + + itr++; + if (itr != m_StructDefinitions[""].m_Members.end()) bcatcstr(bodyglsl, ",\n"); } bcatcstr(bodyglsl, ")\n{\n"); + + if (popPragmaDiagnostic) + bcatcstr(bodyglsl, "#pragma clang diagnostic pop\n"); + if (psShader->eShaderType != COMPUTE_SHADER) { - psContext->AddIndentation(); - bcatcstr(bodyglsl, GetOutputStructName().c_str()); - bcatcstr(bodyglsl, " output;\n"); + if (m_StructDefinitions[GetOutputStructName().c_str()].m_Members.size() > 0) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, GetOutputStructName().c_str()); + bcatcstr(bodyglsl, " output;\n"); + } } - if (psContext->psShader->asPhases[0].earlyMain->slen > 1) + if (psShader->eShaderType == HULL_SHADER) { -#ifdef _DEBUG + if (hasPatchConstant) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "Mtl_PatchConstant patch;\n"); + } + psContext->AddIndentation(); - bcatcstr(bodyglsl, "//--- Start Early Main ---\n"); -#endif - bconcat(bodyglsl, psContext->psShader->asPhases[0].earlyMain); -#ifdef _DEBUG + bformata(bodyglsl, "const uint numPatchesInThreadGroup = %d;\n", numPatchesInThreadGroup); // Hardcoded because of threadgroup array below psContext->AddIndentation(); - bcatcstr(bodyglsl, "//--- End Early Main ---\n"); -#endif + bcatcstr(bodyglsl, "const uint patchID = (tID.x / patchInfo.numControlPointsPerPatch);\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const bool patchValid = (patchID < patchInfo.numPatches);\n"); + + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint mtl_InstanceID = groupID.y;\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint internalPatchID = mtl_InstanceID * patchInfo.numPatches + patchID;\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint patchIDInThreadGroup = (patchID % numPatchesInThreadGroup);\n"); + + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint controlPointID = (tID.x % patchInfo.numControlPointsPerPatch);\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint internalControlPointID = (mtl_InstanceID * (patchInfo.numControlPointsPerPatch * patchInfo.numPatches)) + tID.x;\n"); + + psContext->AddIndentation(); + bformata(bodyglsl, "threadgroup %s inputGroup[numPatchesInThreadGroup];\n", GetInputStructName().c_str()); + psContext->AddIndentation(); + bformata(bodyglsl, "threadgroup %s &input = inputGroup[patchIDInThreadGroup];\n", GetInputStructName().c_str()); + + psContext->AddIndentation(); + std::string tessFactorBufferType = psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "MTLQuadTessellationFactorsHalf" : "MTLTriangleTessellationFactorsHalf"; + bformata(bodyglsl, "%s tessFactor;\n", tessFactorBufferType.c_str()); } - for (i = 0; i < psShader->asPhases[0].psInst.size(); ++i) + // There are cases when there are no control point phases and we have to do passthrough + if (psShader->eShaderType == HULL_SHADER && hasControlPointPhase == 0) { - TranslateInstruction(&psShader->asPhases[0].psInst[i]); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "if (patchValid) {\n"); + psContext->indent++; + + // Passthrough control point phase, run the rest only once per patch + psContext->AddIndentation(); + bformata(bodyglsl, "input.cp[controlPointID] = vertexFunction(%svertexInput);\n", tessVertexFunctionArguments.c_str()); + + DoHullShaderPassthrough(psContext); + + psContext->indent--; + psContext->AddIndentation(); + bcatcstr(bodyglsl, "}\n"); + + psContext->AddIndentation(); + bcatcstr(bodyglsl, "threadgroup_barrier(mem_flags::mem_threadgroup);\n"); + + psContext->AddIndentation(); + bcatcstr(bodyglsl, "if (!patchValid) {\n"); + psContext->indent++; + psContext->AddIndentation(); + bcatcstr(bodyglsl, "return;\n"); + psContext->indent--; + psContext->AddIndentation(); + bcatcstr(bodyglsl, "}\n"); + } + + if (psShader->eShaderType == HULL_SHADER) + { + for(ui32PhaseCallIndex=0; ui32PhaseCallIndex<3; ui32PhaseCallIndex++) + { + for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + uint32_t i; + ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; + if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex]) + continue; + psContext->currentPhase = ui32Phase; + + if (psPhase->earlyMain->slen > 1) + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- Start Early Main ---\n"); +#endif + bconcat(bodyglsl, psPhase->earlyMain); +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- End Early Main ---\n"); +#endif + } + + psContext->AddIndentation(); + bformata(bodyglsl, "// %s%d\n", GetPhaseFuncName(psShader->asPhases[ui32Phase].ePhase), ui32Phase); + if (psPhase->ui32InstanceCount > 1) + { + psContext->AddIndentation(); + bformata(bodyglsl, "for (int phaseInstanceID = 0; phaseInstanceID < %d; phaseInstanceID++) {\n", psPhase->ui32InstanceCount); + psContext->indent++; + } + else + { + if (psContext->currentPhase == HS_CTRL_POINT_PHASE && hasControlPointPhase == 1) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "if (patchValid) {\n"); + psContext->indent++; + + psContext->AddIndentation(); + bformata(bodyglsl, "input.cp[controlPointID] = vertexFunction(%svertexInput);\n", tessVertexFunctionArguments.c_str()); + } + else + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "{\n"); + psContext->indent++; + } + } + + if (psPhase->psInst.size() > 0) + { + //The minus one here is remove the return statement at end of phases. + //We don't want to translate that, we'll just end the function body. + ASSERT(psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_RET); + for (i = 0; i < psPhase->psInst.size() - 1; ++i) + { + TranslateInstruction(&psPhase->psInst[i]); + } + } + + psContext->indent--; + psContext->AddIndentation(); + bformata(bodyglsl, "}\n"); + + if (psPhase->hasPostShaderCode) + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- Post shader code ---\n"); +#endif + bconcat(bodyglsl, psPhase->postShaderCode); +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- End post shader code ---\n"); +#endif + } + + if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) + { + // We're done printing control point phase, run the rest only once per patch + psContext->AddIndentation(); + bcatcstr(bodyglsl, "threadgroup_barrier(mem_flags::mem_threadgroup);\n"); + + psContext->AddIndentation(); + bcatcstr(bodyglsl, "if (!patchValid) {\n"); + psContext->indent++; + psContext->AddIndentation(); + bcatcstr(bodyglsl, "return;\n"); + psContext->indent--; + psContext->AddIndentation(); + bcatcstr(bodyglsl, "}\n"); + } + } + } + + if (hasControlPoint) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "controlPoints[internalControlPointID] = output;\n"); + } + + psContext->AddIndentation(); + bcatcstr(bodyglsl, "tessFactors[internalPatchID] = tessFactor;\n"); + + if (hasPatchConstant) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "patchConstants[internalPatchID] = patch;\n"); + } + + if(psContext->psDependencies) + { + //Save partitioning and primitive type for use by domain shader. + psContext->psDependencies->eTessOutPrim = psShader->sInfo.eTessOutPrim; + psContext->psDependencies->eTessPartitioning = psShader->sInfo.eTessPartitioning; + psContext->psDependencies->numPatchesInThreadGroup = numPatchesInThreadGroup; + psContext->psDependencies->hasControlPoint = hasControlPoint; + psContext->psDependencies->hasPatchConstant = hasPatchConstant; + } + } + else + { + if (psContext->psShader->asPhases[0].earlyMain->slen > 1) + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- Start Early Main ---\n"); +#endif + bconcat(bodyglsl, psContext->psShader->asPhases[0].earlyMain); +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- End Early Main ---\n"); +#endif + } + + for (i = 0; i < psShader->asPhases[0].psInst.size(); ++i) + { + TranslateInstruction(&psShader->asPhases[0].psInst[i]); + } } psContext->indent--; @@ -179,7 +664,60 @@ bool ToMetal::Translate() bcatcstr(bodyglsl, "}\n"); psContext->currentGLSLString = &glsl; - + + if(psShader->eShaderType == HULL_SHADER && psContext->psDependencies) + { + psContext->m_Reflection.OnTessellationKernelInfo(psContext->psDependencies->m_SharedBufferSlots.SaveTotalShaderStageAllocationsCount()); + } + + if(psShader->eShaderType == DOMAIN_SHADER && psContext->psDependencies) + { + int mtlTessellationPartitionMode = -1; + int mtlWinding = -1; + + switch (psContext->psDependencies->eTessPartitioning) + { + case TESSELLATOR_PARTITIONING_INTEGER: + mtlTessellationPartitionMode = 1; // MTLTessellationPartitionModeInteger + break; + case TESSELLATOR_PARTITIONING_POW2: + mtlTessellationPartitionMode = 0; // MTLTessellationPartitionModePow2 + break; + case TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: + mtlTessellationPartitionMode = 2; // MTLTessellationPartitionModeFractionalOdd + break; + case TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: + mtlTessellationPartitionMode = 3; // MTLTessellationPartitionModeFractionalEven + break; + case TESSELLATOR_PARTITIONING_UNDEFINED: + default: + ASSERT(0); + break; + } + + switch (psContext->psDependencies->eTessOutPrim) + { + case TESSELLATOR_OUTPUT_TRIANGLE_CW: + mtlWinding = 0; // MTLWindingClockwise + break; + case TESSELLATOR_OUTPUT_TRIANGLE_CCW: + mtlWinding = 1; // MTLWindingCounterClockwise + break; + case TESSELLATOR_OUTPUT_POINT: + psContext->m_Reflection.OnDiagnostics("Metal Tessellation: outputtopology(\"point\") not supported.", 0, true); + break; + case TESSELLATOR_OUTPUT_LINE: + psContext->m_Reflection.OnDiagnostics("Metal Tessellation: outputtopology(\"line\") not supported.", 0, true); + break; + case TESSELLATOR_OUTPUT_UNDEFINED: + default: + ASSERT(0); + break; + } + + psContext->m_Reflection.OnTessellationInfo(mtlTessellationPartitionMode, mtlWinding, (uint32_t) psContext->psDependencies->fMaxTessFactor, psContext->psDependencies->numPatchesInThreadGroup); + } + bcatcstr(glsl, m_ExtraGlobalDefinitions.c_str()); // Print out extra functions we generated @@ -212,6 +750,13 @@ std::string ToMetal::GetOutputStructName() const return "Mtl_VertexOut"; case PIXEL_SHADER: return "Mtl_FragmentOut"; + case HULL_SHADER: + if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_FORK_PHASE || + psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_JOIN_PHASE) + return "Mtl_PatchConstant"; + return "Mtl_ControlPoint"; + case DOMAIN_SHADER: + return "Mtl_VertexOutPostTess"; default: ASSERT(0); return ""; @@ -228,17 +773,50 @@ std::string ToMetal::GetInputStructName() const return "Mtl_FragmentIn"; case COMPUTE_SHADER: return "Mtl_KernelIn"; + case HULL_SHADER: + return "Mtl_HullIn"; + case DOMAIN_SHADER: + return "Mtl_VertexInPostTess"; default: ASSERT(0); return ""; } } +std::string ToMetal::GetCBName(const std::string& cbName) const +{ + std::string output = cbName; + if (cbName[0] == '$') + { + // "$Globals" should have different names in different shaders so that CbKey can discretely identify a CB. + switch (psContext->psShader->eShaderType) + { + case VERTEX_SHADER: + case HULL_SHADER: + case DOMAIN_SHADER: + output[0] = 'V'; + break; + case PIXEL_SHADER: + output[0] = 'F'; + break; + case COMPUTE_SHADER: + output = cbName.substr(1); + break; + default: + ASSERT(0); + break; + } + } + return output; +} + void ToMetal::SetIOPrefixes() { switch (psContext->psShader->eShaderType) { case VERTEX_SHADER: + case HULL_SHADER: + case DOMAIN_SHADER: psContext->inputPrefix = "input."; psContext->outputPrefix = "output."; break; diff --git a/src/toMetalDeclaration.cpp b/src/toMetalDeclaration.cpp index 538972c..bce240c 100644 --- a/src/toMetalDeclaration.cpp +++ b/src/toMetalDeclaration.cpp @@ -16,9 +16,44 @@ #endif // #ifndef fpcheck -bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix) +bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix, int *iIgnoreRedirect) { - if (sig && (sig->eSystemValueType == NAME_POSITION || (sig->semanticName == "POS" && sig->ui32SemanticIndex == 0)) && psContext->psShader->eShaderType == VERTEX_SHADER) + if (psContext->psShader->eShaderType == HULL_SHADER && sig && sig->semanticName == "SV_TessFactor") + { + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + ASSERT(sig->ui32SemanticIndex <= 3); + std::ostringstream oss; + oss << "tessFactor.edgeTessellationFactor[" << sig->ui32SemanticIndex << "]"; + result = oss.str(); + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (iIgnoreRedirect != NULL) *iIgnoreRedirect = 1; + return true; + } + + if (psContext->psShader->eShaderType == HULL_SHADER && sig && sig->semanticName == "SV_InsideTessFactor") + { + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + ASSERT(sig->ui32SemanticIndex <= 1); + std::ostringstream oss; + oss << "tessFactor.insideTessellationFactor"; + if (psContext->psShader->sInfo.eTessDomain != TESSELLATOR_DOMAIN_TRI) + oss << "[" << sig->ui32SemanticIndex << "]"; + result = oss.str(); + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (iIgnoreRedirect != NULL) *iIgnoreRedirect = 1; + return true; + } + + if (sig && sig->semanticName == "SV_InstanceID") + { + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + } + + if (sig && ((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0) && + ((psContext->psShader->eShaderType == VERTEX_SHADER && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) == 0))) { result = "mtl_Position"; return true; @@ -29,8 +64,10 @@ bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::I switch (sig->eSystemValueType) { case NAME_POSITION: - ASSERT(psContext->psShader->eShaderType == PIXEL_SHADER); - result = "mtl_FragCoord"; + if (psContext->psShader->eShaderType == PIXEL_SHADER) + result = "mtl_FragCoord"; + else + result = "mtl_Position"; if (outSkipPrefix != NULL) *outSkipPrefix = true; return true; case NAME_RENDER_TARGET_ARRAY_INDEX: @@ -41,17 +78,19 @@ bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::I return true; case NAME_CLIP_DISTANCE: { - // this is temp variabe, declaration and redirecting to actual output is handled in DeclareClipPlanes + // this is temp variable, declaration and redirecting to actual output is handled in DeclareClipPlanes char tmpName[128]; sprintf(tmpName, "phase%d_ClipDistance%d", psContext->currentPhase, sig->ui32SemanticIndex); result = tmpName; if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (iIgnoreRedirect != NULL) *iIgnoreRedirect = 1; return true; } - /* case NAME_VIEWPORT_ARRAY_INDEX: - result = "gl_ViewportIndex"; - if (puiIgnoreSwizzle) - *puiIgnoreSwizzle = 1; - return true;*/ + case NAME_VIEWPORT_ARRAY_INDEX: + result = "mtl_ViewPortIndex"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; case NAME_VERTEX_ID: result = "mtl_VertexID"; if (outSkipPrefix != NULL) *outSkipPrefix = true; @@ -82,6 +121,16 @@ bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::I } } + if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE || + psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_FORK_PHASE) + { + + std::ostringstream oss; + oss << sig->semanticName << sig->ui32SemanticIndex; + result = oss.str(); + return true; + } + switch (psOperand->eType) { case OPERAND_TYPE_INPUT_COVERAGE_MASK: @@ -109,6 +158,12 @@ bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::I if (pui32IgnoreSwizzle) *pui32IgnoreSwizzle = 1; return true; + case OPERAND_TYPE_INPUT_DOMAIN_POINT: + result = "mtl_TessCoord"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; case OPERAND_TYPE_OUTPUT_DEPTH: case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: @@ -128,6 +183,23 @@ bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::I *pui32IgnoreSwizzle = 1; return true; } + case OPERAND_TYPE_INPUT_PATCH_CONSTANT: + { + std::ostringstream oss; + oss << sig->semanticName << sig->ui32SemanticIndex; + result = oss.str(); + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + } + case OPERAND_TYPE_INPUT_CONTROL_POINT: + { + std::ostringstream oss; + oss << sig->semanticName << sig->ui32SemanticIndex; + result = oss.str(); + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + break; + } default: ASSERT(0); break; @@ -146,37 +218,37 @@ void ToMetal::DeclareBuiltinInput(const Declaration *psDecl) { case NAME_POSITION: ASSERT(psContext->psShader->eShaderType == PIXEL_SHADER); - m_StructDefinitions[""].m_Members.push_back("float4 mtl_FragCoord [[ position ]]"); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_FragCoord", "float4 mtl_FragCoord [[ position ]]")); break; case NAME_RENDER_TARGET_ARRAY_INDEX: // Only supported on a Mac - m_StructDefinitions[""].m_Members.push_back("uint mtl_Layer [[ render_target_array_index ]]"); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_Layer", "uint mtl_Layer [[ render_target_array_index ]]")); break; case NAME_CLIP_DISTANCE: ASSERT(0); // Should never be an input break; case NAME_VIEWPORT_ARRAY_INDEX: - // Not on Metal - ASSERT(0); + // Only supported on a Mac + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_ViewPortIndex", "uint mtl_ViewPortIndex [[ viewport_array_index ]]")); break; case NAME_INSTANCE_ID: - m_StructDefinitions[""].m_Members.push_back("uint mtl_InstanceID [[ instance_id ]]"); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_InstanceID", "uint mtl_InstanceID [[ instance_id ]]")); break; case NAME_IS_FRONT_FACE: - m_StructDefinitions[""].m_Members.push_back("bool mtl_FrontFace [[ front_facing ]]"); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_FrontFace", "bool mtl_FrontFace [[ front_facing ]]")); break; case NAME_SAMPLE_INDEX: - m_StructDefinitions[""].m_Members.push_back("uint mtl_SampleID [[ sample_id ]]"); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_SampleID", "uint mtl_SampleID [[ sample_id ]]")); break; case NAME_VERTEX_ID: - m_StructDefinitions[""].m_Members.push_back("uint mtl_VertexID [[ vertex_id ]]"); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_VertexID", "uint mtl_VertexID [[ vertex_id ]]")); break; case NAME_PRIMITIVE_ID: // Not on Metal ASSERT(0); break; default: - m_StructDefinitions[""].m_Members.push_back(std::string("float4 ").append(psDecl->asOperands[0].specialName)); + m_StructDefinitions[""].m_Members.push_back(std::make_pair(psDecl->asOperands[0].specialName, std::string("float4 ").append(psDecl->asOperands[0].specialName))); ASSERT(0); // Catch this to see what's happening break; } @@ -195,7 +267,7 @@ void ToMetal::DeclareClipPlanes(const Declaration* decl, unsigned declCount) std::ostringstream oss; oss << "float mtl_ClipDistance [[ clip_distance ]]"; if(planeCount > 1) oss << "[" << planeCount << "]"; - m_StructDefinitions[GetOutputStructName()].m_Members.push_back(oss.str()); + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(std::string("mtl_ClipDistance"), oss.str())); Shader* shader = psContext->psShader; @@ -253,7 +325,7 @@ void ToMetal::GenerateTexturesReflection(HLSLccReflection* refl) } for(unsigned i = 0, n = m_Textures.size() ; i < n ; ++i) - refl->OnTextureBinding(m_Textures[i].name, m_Textures[i].textureBind, m_Textures[i].samplerBind, m_Textures[i].dim, m_Textures[i].uav); + refl->OnTextureBinding(m_Textures[i].name, m_Textures[i].textureBind, m_Textures[i].samplerBind, m_Textures[i].isMultisampled, m_Textures[i].dim, m_Textures[i].uav); } void ToMetal::DeclareBuiltinOutput(const Declaration *psDecl) @@ -263,19 +335,18 @@ void ToMetal::DeclareBuiltinOutput(const Declaration *psDecl) switch (psDecl->asOperands[0].eSpecialName) { case NAME_POSITION: - m_StructDefinitions[out].m_Members.push_back("float4 mtl_Position [[ position ]]"); + m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_Position", "float4 mtl_Position [[ position ]]")); break; case NAME_RENDER_TARGET_ARRAY_INDEX: // Only supported on a Mac - m_StructDefinitions[out].m_Members.push_back("uint mtl_Layer [[ render_target_array_index ]]"); + m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_Layer", "uint mtl_Layer [[ render_target_array_index ]]")); break; case NAME_CLIP_DISTANCE: // it will be done separately in DeclareClipPlanes break; - case NAME_VIEWPORT_ARRAY_INDEX: - // Not on Metal - ASSERT(0); + // Only supported on a Mac + m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_ViewPortIndex", "uint mtl_ViewPortIndex [[ viewport_array_index ]]")); break; case NAME_VERTEX_ID: ASSERT(0); //VertexID is not an output @@ -290,51 +361,28 @@ void ToMetal::DeclareBuiltinOutput(const Declaration *psDecl) case NAME_IS_FRONT_FACE: ASSERT(0); //FrontFacing is not an output break; + + //For the quadrilateral domain, there are 6 factors (4 sides, 2 inner). case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: - // Not on Metal - ASSERT(0); - break; case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: - // Not on Metal - ASSERT(0); - break; case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: - // Not on Metal - ASSERT(0); - break; case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: - // Not on Metal - ASSERT(0); - break; - case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: - // Not on Metal - ASSERT(0); - break; - case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: - // Not on Metal - ASSERT(0); - break; - case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: - // Not on Metal - ASSERT(0); - break; - case NAME_FINAL_LINE_DENSITY_TESSFACTOR: - // Not on Metal - ASSERT(0); - break; - case NAME_FINAL_LINE_DETAIL_TESSFACTOR: - // Not on Metal - ASSERT(0); - break; - case NAME_FINAL_TRI_INSIDE_TESSFACTOR: case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: - // Not on Metal - ASSERT(0); - break; case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: - // Not on Metal - ASSERT(0); + + //For the triangular domain, there are 4 factors (3 sides, 1 inner) + case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_INSIDE_TESSFACTOR: + + //For the isoline domain, there are 2 factors (detail and density). + case NAME_FINAL_LINE_DETAIL_TESSFACTOR: + case NAME_FINAL_LINE_DENSITY_TESSFACTOR: + { + // Handled separately break; + } default: // This might be SV_Position (because d3dcompiler is weird). Get signature and check const ShaderInfo::InOutSignature *sig = NULL; @@ -342,7 +390,7 @@ void ToMetal::DeclareBuiltinOutput(const Declaration *psDecl) ASSERT(sig != NULL); if (sig->eSystemValueType == NAME_POSITION && sig->ui32SemanticIndex == 0) { - m_StructDefinitions[out].m_Members.push_back("float4 mtl_Position [[ position ]]"); + m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_Position", "float4 mtl_Position [[ position ]]")); break; } @@ -397,6 +445,50 @@ static std::string BuildOperandTypeString(OPERAND_MIN_PRECISION ePrec, INOUT_COM return HLSLcc::GetConstructorForTypeMetal(t, numComponents); } +void ToMetal::DeclareHullShaderPassthrough() +{ + uint32_t i; + + for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++) + { + ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; + + std::string name; + { + std::ostringstream oss; + oss << psSig->semanticName << psSig->ui32SemanticIndex; + name = oss.str(); + } + + uint32_t ui32NumComponents = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); + std::string typeName = BuildOperandTypeString(OPERAND_MIN_PRECISION_DEFAULT, psSig->eComponentType, ui32NumComponents); + + std::ostringstream oss; + oss << typeName << " " << name; + oss << " [[ user(" << name << ") ]]"; + + std::string declString; + declString = oss.str(); + + m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair(name, declString)); + + std::string out = GetOutputStructName(); + m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, declString)); + + // For preserving data layout, declare output struct as domain shader input, too + oss.str(""); + out += "In"; + + oss << typeName << " " << name; + // VERTEX_SHADER hardcoded on purpose + uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true); + oss << " [[ " << "attribute(" << loc << ")" << " ]] "; + + psContext->m_Reflection.OnInputBinding(name, loc); + m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, oss.str())); + } +} + void ToMetal::HandleOutputRedirect(const Declaration *psDecl, const std::string &typeName) { const Operand *psOperand = &psDecl->asOperands[0]; @@ -500,8 +592,14 @@ void ToMetal::HandleInputRedirect(const Declaration *psDecl, const std::string & const ShaderInfo::InOutSignature *psSig = NULL; int regSpace = psOperand->GetRegisterSpace(psContext); - if (regSpace == 0 && psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + if (regSpace == 0) { + if (psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + needsRedirect = 1; + } + else if (psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + { + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); needsRedirect = 1; } @@ -520,7 +618,19 @@ void ToMetal::HandleInputRedirect(const Declaration *psDecl, const std::string & psContext->AddIndentation(); bcatcstr(psPhase->earlyMain, " "); - bformata(psPhase->earlyMain, "%s phase%d_Input%d_%d;\n", typeName.c_str(), psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + // Does the input have multiple array components (such as geometry shader input, or domain shader control point input) + if ((psShader->eShaderType == DOMAIN_SHADER && regSpace == 0) || (psShader->eShaderType == GEOMETRY_SHADER)) + { + // The count is actually stored in psOperand->aui32ArraySizes[0] + origArraySize = psOperand->aui32ArraySizes[0]; + // bformata(glsl, "%s vec4 phase%d_Input%d_%d[%d];\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, origArraySize); + bformata(psPhase->earlyMain, "%s phase%d_Input%d_%d[%d];\n", typeName.c_str(), psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, origArraySize); + needsLooping = 1; + i = origArraySize - 1; + } + else + // bformata(glsl, "%s vec4 phase%d_Input%d_%d;\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + bformata(psPhase->earlyMain, "%s phase%d_Input%d_%d;\n", typeName.c_str(), psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); // Do a conditional loop. In normal cases needsLooping == 0 so this is only run once. do @@ -818,7 +928,7 @@ void ToMetal::DeclareStructVariable(const std::string &parentName, const ShaderV { oss << "[" << var.Elements << "]"; } - m_StructDefinitions[parentName].m_Members.push_back(oss.str()); + m_StructDefinitions[parentName].m_Members.push_back(std::make_pair(var.name, oss.str())); m_StructDefinitions[parentName].m_Dependencies.push_back(var.name + "_Type"); return; } @@ -846,7 +956,7 @@ void ToMetal::DeclareStructVariable(const std::string &parentName, const ShaderV // On non-compute we can fake that we still have a matrix, as CB upload code will fill the data correctly on 4x4 matrices. // That way we avoid the issues with mismatching types for builtins etc. if (psContext->psShader->eShaderType == COMPUTE_SHADER) - doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, 4, 1, false, elemCount); + doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, 1, 4, false, elemCount); else doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, var.Rows, var.Columns, true, var.Elements); } @@ -866,7 +976,7 @@ void ToMetal::DeclareStructVariable(const std::string &parentName, const ShaderV } if (doDeclare) - m_StructDefinitions[parentName].m_Members.push_back(oss.str()); + m_StructDefinitions[parentName].m_Members.push_back(std::make_pair(var.name, oss.str())); } else if (var.Class == SVC_VECTOR && var.Columns > 1) @@ -883,7 +993,7 @@ void ToMetal::DeclareStructVariable(const std::string &parentName, const ShaderV doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, 1, var.Columns, false, var.Elements); if (doDeclare) - m_StructDefinitions[parentName].m_Members.push_back(oss.str()); + m_StructDefinitions[parentName].m_Members.push_back(std::make_pair(var.name, oss.str())); } else if ((var.Class == SVC_SCALAR) || @@ -909,7 +1019,7 @@ void ToMetal::DeclareStructVariable(const std::string &parentName, const ShaderV doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, 1, 1, false, var.Elements); if (doDeclare) - m_StructDefinitions[parentName].m_Members.push_back(oss.str()); + m_StructDefinitions[parentName].m_Members.push_back(std::make_pair(var.name, oss.str())); } else { @@ -938,29 +1048,32 @@ void ToMetal::DeclareStructType(const std::string &name, const std::vectorname.c_str(); - - const bool isGlobals = (cbname == "$Globals"); + const bool isGlobals = (psCBuf->name == "$Globals"); const bool stripUnused = isGlobals && (psContext->flags & HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS); - - if (cbname[0] == '$') - cbname = cbname.substr(1); - + std::string cbname = GetCBName(psCBuf->name); + // Note: if we're stripping unused members, both ui32TotalSizeInBytes and individual offsets into reflection will be completely off. // However, the reflection layer re-calculates both to match Metal alignment rules anyway, so we're good. if (!psContext->m_Reflection.OnConstantBuffer(cbname, psCBuf->ui32TotalSizeInBytes, psCBuf->GetMemberCount(stripUnused))) return; + if (psContext->psDependencies->IsMemberDeclared(cbname)) + return; + DeclareStructType(cbname + "_Type", psCBuf->asVars, true, 0, stripUnused); + std::ostringstream oss; uint32_t slot = m_BufferSlots.GetBindingSlot(ui32BindingPoint, BindingSlotAllocator::ConstantBuffer); - oss << "constant " << cbname << "_Type& " << cbname << " [[ buffer("<< slot <<") ]]"; - m_StructDefinitions[""].m_Members.push_back(oss.str()); + + if (HLSLcc::IsUnityFlexibleInstancingBuffer(psCBuf)) + oss << "const constant " << psCBuf->asVars[0].name << "_Type* "; + else + oss << "constant " << cbname << "_Type& "; + oss << cbname << " [[ buffer(" << slot << ") ]]"; + + m_StructDefinitions[""].m_Members.push_back(std::make_pair(cbname, oss.str())); m_StructDefinitions[""].m_Dependencies.push_back(cbname + "_Type"); - psContext->m_Reflection.OnConstantBufferBinding(cbname, slot); - - } void ToMetal::DeclareBufferVariable(const Declaration *psDecl, bool isRaw, bool isUAV) @@ -980,35 +1093,37 @@ void ToMetal::DeclareBufferVariable(const Declaration *psDecl, bool isRaw, bool BufType = BufName + "_Type"; typeoss << "uint value["; typeoss << psDecl->ui32BufferStride / 4 << "]"; - m_StructDefinitions[BufType].m_Members.push_back(typeoss.str()); + m_StructDefinitions[BufType].m_Members.push_back(std::make_pair("value", typeoss.str())); m_StructDefinitions[""].m_Dependencies.push_back(BufType); } - std::ostringstream oss; - - if (!isUAV || ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) == 0)) + if (!psContext->psDependencies->IsMemberDeclared(BufName)) { - BufConst = "const "; - oss << BufConst; - } - else - { - if (psContext->psShader->eShaderType != COMPUTE_SHADER) - psContext->m_Reflection.OnDiagnostics("This shader might not work on all Metal devices because of buffer writes on non-compute shaders.", 0, false); + std::ostringstream oss; + + if (!isUAV || ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) == 0)) + { + BufConst = "const "; + oss << BufConst; + } + else + { + if (psContext->psShader->eShaderType != COMPUTE_SHADER) + psContext->m_Reflection.OnDiagnostics("This shader might not work on all Metal devices because of buffer writes on non-compute shaders.", 0, false); + } + + if (isRaw) + oss << "device uint *" << BufName; + else + oss << "device " << BufType << " *" << BufName; + + uint32_t loc = m_BufferSlots.GetBindingSlot(regNo, isUAV ? BindingSlotAllocator::RWBuffer : BindingSlotAllocator::Texture); + oss << " [[ buffer(" << loc << ") ]]"; + + m_StructDefinitions[""].m_Members.push_back(std::make_pair(BufName, oss.str())); + psContext->m_Reflection.OnBufferBinding(BufName, loc, isUAV); } - if (isRaw) - oss << "device uint *" << BufName; - else - oss << "device " << BufType << " *" << BufName; - - uint32_t loc = m_BufferSlots.GetBindingSlot(regNo, isUAV ? BindingSlotAllocator::RWBuffer : BindingSlotAllocator::Texture); - oss << " [[ buffer(" << loc << ") ]]"; - - m_StructDefinitions[""].m_Members.push_back(oss.str()); - psContext->m_Reflection.OnBufferBinding(BufName, loc, isUAV); - - // In addition to the actual declaration, we need pointer modification and possible counter declaration // in early main: std::ostringstream earlymainoss; @@ -1159,6 +1274,29 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) { const Operand* psOperand = &psDecl->asOperands[0]; + if((psOperand->eType == OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID)|| + (psOperand->eType == OPERAND_TYPE_INPUT_FORK_INSTANCE_ID)) + { + break; + } + + // No need to declare patch constants read again by the hull shader. + if ((psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT) && psContext->psShader->eShaderType == HULL_SHADER) + { + break; + } + // ...or control points + if ((psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT) && psContext->psShader->eShaderType == HULL_SHADER) + { + break; + } + + //Already declared as part of an array. + if(psShader->aIndexedInput[psOperand->GetRegisterSpace(psContext)][psDecl->asOperands[0].ui32RegisterNumber] == -1) + { + break; + } + uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; uint32_t ui32CompMask = psDecl->asOperands[0].ui32CompMask; @@ -1179,7 +1317,7 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) { std::ostringstream oss; oss << "uint " << name << " [[ sample_mask ]]"; - m_StructDefinitions[""].m_Members.push_back(oss.str()); + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name,oss.str())); break; } @@ -1187,7 +1325,7 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) { std::ostringstream oss; oss << "uint3 " << name << " [[ thread_position_in_grid ]]"; - m_StructDefinitions[""].m_Members.push_back(oss.str()); + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); break; } @@ -1195,7 +1333,7 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) { std::ostringstream oss; oss << "uint3 " << name << " [[ threadgroup_position_in_grid ]]"; - m_StructDefinitions[""].m_Members.push_back(oss.str()); + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); break; } @@ -1203,27 +1341,42 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) { std::ostringstream oss; oss << "uint3 " << name << " [[ thread_position_in_threadgroup ]]"; - m_StructDefinitions[""].m_Members.push_back(oss.str()); + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); break; } if (psOperand->eSpecialName == NAME_RENDER_TARGET_ARRAY_INDEX) { std::ostringstream oss; oss << "uint " << name << " [[ render_target_array_index ]]"; - m_StructDefinitions[""].m_Members.push_back(oss.str()); + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + if (psOperand->eType == OPERAND_TYPE_INPUT_DOMAIN_POINT) + { + std::ostringstream oss; + std::string patchPositionType = psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "float2 " : "float3 "; + oss << patchPositionType << name << " [[ position_in_patch ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); break; } if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED) { std::ostringstream oss; oss << "uint " << name << " [[ thread_index_in_threadgroup ]]"; - m_StructDefinitions[""].m_Members.push_back(oss.str()); + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); break; } - + if (psOperand->eSpecialName == NAME_VIEWPORT_ARRAY_INDEX) + { + std::ostringstream oss; + oss << "uint " << name << " [[ viewport_array_index ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + if(psDecl->eOpcode == OPCODE_DCL_INPUT_PS_SIV && psOperand->eSpecialName == NAME_POSITION) { - m_StructDefinitions[""].m_Members.push_back("float4 mtl_FragCoord [[ position ]]"); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_FragCoord", "float4 mtl_FragCoord [[ position ]]")); break; } @@ -1235,8 +1388,23 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) } } - const ShaderInfo::InOutSignature *psSig = NULL; - psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); + int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + + const ShaderInfo::InOutSignature *psSig = NULL; + + // This falls within the specified index ranges. The default is 0 if no input range is specified + if (regSpace == 0) + psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); + + if (!psSig) + break; + + // fragment shader cannot reference builtins generated by vertex program (with obvious exception of position) + // TODO: some visible error? handle more builtins? + if (psContext->psShader->eShaderType == PIXEL_SHADER && !strncmp(psSig->semanticName.c_str(), "PSIZE", 5)) + break; int iNumComponents = psOperand->GetNumInputElements(psContext); psShader->acInputDeclared[0][ui32Reg] = (char)psSig->ui32Mask; @@ -1244,9 +1412,10 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) std::string typeName = BuildOperandTypeString(psOperand->eMinPrecision, psSig->eComponentType, iNumComponents); std::string semantic; - if (psContext->psShader->eShaderType == VERTEX_SHADER) + if (psContext->psShader->eShaderType == VERTEX_SHADER || psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER) { std::ostringstream oss; + // VERTEX_SHADER hardcoded on purpose uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true); oss << "attribute(" << loc << ")"; semantic = oss.str(); @@ -1261,9 +1430,6 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) if (psOperand->iPSInOut && name.size() == 10 && !strncmp(name.c_str(), "SV_Target", 9)) { // Metal allows color(X) declared in input/output structs - // - // TODO: Improve later when GLES3 support arrives, it requires - // single declaration through inout oss << "color(xlt_remap_i[" << psSig->ui32SemanticIndex << "])"; m_NeedFBInputRemapDecl = true; } @@ -1281,20 +1447,40 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) } std::string declString; - if ((OPERAND_INDEX_DIMENSION)psOperand->iIndexDims == INDEX_2D) + if ((OPERAND_INDEX_DIMENSION)psOperand->iIndexDims == INDEX_2D && psOperand->eType != OPERAND_TYPE_INPUT_CONTROL_POINT && psContext->psShader->eShaderType != HULL_SHADER) { std::ostringstream oss; - oss << typeName << " " << name << " [ " << psOperand->aui32ArraySizes[0] << " ] " << " [[ " << semantic << " ]] " << interpolation; + oss << typeName << " " << name << " [ " << psOperand->aui32ArraySizes[0] << " ] "; + + if (psContext->psShader->eShaderType != HULL_SHADER) + oss << " [[ " << semantic << " ]] " << interpolation; declString = oss.str(); } else { std::ostringstream oss; - oss << typeName << " " << name << " [[ " << semantic << " ]] " << interpolation; + oss << typeName << " " << name; + if (psContext->psShader->eShaderType != HULL_SHADER) + oss << " [[ " << semantic << " ]] " << interpolation; declString = oss.str(); } - m_StructDefinitions[GetInputStructName()].m_Members.push_back(declString); + if (psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT && psContext->psShader->eShaderType == DOMAIN_SHADER) + { + m_StructDefinitions["Mtl_PatchConstant"].m_Members.push_back(std::make_pair(name, declString)); + } + else if (psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT && psContext->psShader->eShaderType == DOMAIN_SHADER) + { + m_StructDefinitions["Mtl_ControlPoint"].m_Members.push_back(std::make_pair(name, declString)); + } + else if (psContext->psShader->eShaderType == HULL_SHADER) + { + m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair(name, declString)); + } + else + { + m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair(name, declString)); + } HandleInputRedirect(psDecl, BuildOperandTypeString(psOperand->eMinPrecision, INOUT_COMPONENT_FLOAT32, 4)); break; @@ -1357,22 +1543,22 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) case 'f': case 'F': oss << "float4 " << sv.name << " [[ color(xlt_remap_i["<< idx <<"]) ]]"; - m_StructDefinitions[""].m_Members.push_back(oss.str()); + m_StructDefinitions[""].m_Members.push_back(std::make_pair(sv.name, oss.str())); break; case 'h': case 'H': oss << "half4 " << sv.name << " [[ color(xlt_remap_i[" << idx << "]) ]]"; - m_StructDefinitions[""].m_Members.push_back(oss.str()); + m_StructDefinitions[""].m_Members.push_back(std::make_pair(sv.name, oss.str())); break; case 'i': case 'I': oss << "int4 " << sv.name << " [[ color(xlt_remap_i[" << idx << "]) ]]"; - m_StructDefinitions[""].m_Members.push_back(oss.str()); + m_StructDefinitions[""].m_Members.push_back(std::make_pair(sv.name, oss.str())); break; case 'u': case 'U': oss << "uint4 " << sv.name << " [[ color(xlt_remap_i[" << idx << "]) ]]"; - m_StructDefinitions[""].m_Members.push_back(oss.str()); + m_StructDefinitions[""].m_Members.push_back(std::make_pair(sv.name, oss.str())); break; default: break; @@ -1400,9 +1586,9 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) { uint32_t ui32Flags = psDecl->value.ui32GlobalFlags; - if (ui32Flags & GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL) + if (ui32Flags & GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL && psContext->psShader->eShaderType == PIXEL_SHADER) { -// bcatcstr(glsl, "layout(early_fragment_tests) in;\n"); + psShader->sInfo.bEarlyFragmentTests = true; } if (!(ui32Flags & GLOBAL_FLAG_REFACTORING_ALLOWED)) { @@ -1412,7 +1598,6 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) if (ui32Flags & GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS) { // Not supported on Metal -// bcatcstr(glsl, "#extension GL_ARB_gpu_shader_fp64 : enable\n"); // psShader->fp64 = 1; } break; @@ -1427,17 +1612,27 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) } case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: { - // Not supported + if(psContext->psShader->eShaderType == HULL_SHADER) + { + psContext->psShader->sInfo.eTessOutPrim = psDecl->value.eTessOutPrim; + if (psContext->psShader->sInfo.eTessOutPrim == TESSELLATOR_OUTPUT_TRIANGLE_CW) + psContext->psShader->sInfo.eTessOutPrim = TESSELLATOR_OUTPUT_TRIANGLE_CCW; + else if (psContext->psShader->sInfo.eTessOutPrim == TESSELLATOR_OUTPUT_TRIANGLE_CCW) + psContext->psShader->sInfo.eTessOutPrim = TESSELLATOR_OUTPUT_TRIANGLE_CW; + } break; } case OPCODE_DCL_TESS_DOMAIN: { - // Not supported + psContext->psShader->sInfo.eTessDomain = psDecl->value.eTessDomain; + + if (psContext->psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_ISOLINE) + psContext->m_Reflection.OnDiagnostics("Metal Tessellation: domain(\"isoline\") not supported.", 0, true); break; } case OPCODE_DCL_TESS_PARTITIONING: { - // Not supported + psContext->psShader->sInfo.eTessPartitioning = psDecl->value.eTessPartitioning; break; } case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: @@ -1475,69 +1670,38 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) { // TODO: This is only ever accessed as a float currently. Do trickery if we ever see ints accessed from an array. // Walk through all the chunks we've seen in this phase. - ShaderPhase &sp = psShader->asPhases[psContext->currentPhase]; - std::for_each(sp.m_ConstantArrayInfo.m_Chunks.begin(), sp.m_ConstantArrayInfo.m_Chunks.end(), [this](const std::pair &chunk) - { - bstring glsl = *psContext->currentGLSLString; - uint32_t componentCount = chunk.second.m_ComponentCount; - // Just do the declaration here and contents to earlyMain. - if (componentCount == 1) - bformata(glsl, "constant float ImmCB_%d_%d_%d[%d] =\n{\n", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, chunk.second.m_Size); - else - bformata(glsl, "constant float%d ImmCB_%d_%d_%d[%d] =\n{\n", componentCount, psContext->currentPhase, chunk.first, chunk.second.m_Rebase, chunk.second.m_Size); - Declaration *psDecl = psContext->psShader->asPhases[psContext->currentPhase].m_ConstantArrayInfo.m_OrigDeclaration; - if (componentCount == 1) - { - for (uint32_t i = 0; i < chunk.second.m_Size; i++) - { - if (i != 0) - bcatcstr(glsl, ",\n"); - float val[4] = { - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].a, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].b, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].c, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d - }; - if (fpcheck(val[chunk.second.m_Rebase])) - bformata(glsl, "\tas_type(0x%Xu)", *(uint32_t *)&val[chunk.second.m_Rebase]); - else - { - bcatcstr(glsl, "\t"); - HLSLcc::PrintFloat(glsl, val[chunk.second.m_Rebase]); - } - } - bcatcstr(glsl, "\n};\n"); - } - else - { - for (uint32_t i = 0; i < chunk.second.m_Size; i++) + bstring glsl = *psContext->currentGLSLString; + bformata(glsl, "constant float4 ImmCB_%d[%d] =\n{\n", psContext->currentPhase, psDecl->asImmediateConstBuffer.size()); + bool isFirst = true; + std::for_each(psDecl->asImmediateConstBuffer.begin(), psDecl->asImmediateConstBuffer.end(), [&](const ICBVec4 &data) { - if (i != 0) - bcatcstr(glsl, ",\n"); + if (!isFirst) + { + bcatcstr(glsl, ",\n"); + } + isFirst = false; + float val[4] = { - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].a, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].b, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].c, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d + *(float*)&data.a, + *(float*)&data.b, + *(float*)&data.c, + *(float*)&data.d }; - bformata(glsl, "\tfloat%d(", componentCount); - for (uint32_t k = 0; k < componentCount; k++) + + bformata(glsl, "\tfloat4("); + for (uint32_t k = 0; k < 4; k++) { - if (k != 0) - bcatcstr(glsl, ", "); + if (k != 0) + bcatcstr(glsl, ", "); if (fpcheck(val[k])) - bformata(glsl, "as_type(0x%Xu)", *(uint32_t *)&val[k + chunk.second.m_Rebase]); + bformata(glsl, "as_type(0x%Xu)", *(uint32_t *)&val[k]); else - HLSLcc::PrintFloat(glsl, val[k + chunk.second.m_Rebase]); + HLSLcc::PrintFloat(glsl, val[k]); } - bcatcstr(glsl, ")"); - } - bcatcstr(glsl, "\n};\n"); - } - + bcatcstr(glsl, ")"); }); - + bcatcstr(glsl, "\n};\n"); break; } case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: @@ -1627,6 +1791,10 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) } startReg = psDecl->asOperands[0].ui32RegisterNumber; + oldString = psContext->currentGLSLString; + psContext->currentGLSLString = &psContext->psShader->asPhases[psContext->currentPhase].earlyMain; + psContext->AddIndentation(); + psContext->currentGLSLString = oldString; bformata(psContext->psShader->asPhases[psContext->currentPhase].earlyMain, "%s4 phase%d_%sput%d_%d[%d];\n", type, psContext->currentPhase, isInput ? "In" : "Out", regSpace, startReg, psDecl->value.ui32IndexRange); oldString = psContext->currentGLSLString; glsl = isInput ? psContext->psShader->asPhases[psContext->currentPhase].earlyMain : psContext->psShader->asPhases[psContext->currentPhase].postShaderCode; @@ -1700,7 +1868,7 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) } else { - realName = psContext->GetDeclaredOutputName(&psDecl->asOperands[0], &dummy, NULL, NULL, 1); + realName = psContext->GetDeclaredOutputName(&psDecl->asOperands[0], &dummy, NULL, NULL, 0); psContext->AddIndentation(); bcatcstr(glsl, realName.c_str()); @@ -1785,12 +1953,16 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) } case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: { - // Not supported + if(psContext->psShader->eShaderType == HULL_SHADER) + psShader->sInfo.ui32TessInputControlPointCount = psDecl->value.ui32MaxOutputVertexCount; + else if(psContext->psShader->eShaderType == DOMAIN_SHADER) + psShader->sInfo.ui32TessOutputControlPointCount = psDecl->value.ui32MaxOutputVertexCount; break; } case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: { - // Not supported + if(psContext->psShader->eShaderType == HULL_SHADER) + psShader->sInfo.ui32TessOutputControlPointCount = psDecl->value.ui32MaxOutputVertexCount; break; } case OPCODE_HS_FORK_PHASE: @@ -1811,19 +1983,30 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) { // for some reason we have some samplers start with "sampler" and some not const bool startsWithSampler = name.find("sampler") == 0; - const uint32_t slot = m_SamplerSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::Texture); - std::ostringstream oss; oss << "sampler " << (startsWithSampler ? "" : "sampler") << name << " [[ sampler (" << slot << ") ]]"; - m_StructDefinitions[""].m_Members.push_back(oss.str()); - SamplerDesc desc = { name, psDecl->asOperands[0].ui32RegisterNumber, slot }; - m_Samplers.push_back(desc); + std::ostringstream samplerOss; + samplerOss << (startsWithSampler ? "" : "sampler") << name; + std::string samplerName = samplerOss.str(); + + if (!psContext->psDependencies->IsMemberDeclared(samplerName)) + { + const uint32_t slot = m_SamplerSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::Texture); + std::ostringstream oss; + oss << "sampler " << samplerName << " [[ sampler (" << slot << ") ]]"; + + m_StructDefinitions[""].m_Members.push_back(std::make_pair(samplerName, oss.str())); + + SamplerDesc desc = { name, psDecl->asOperands[0].ui32RegisterNumber, slot }; + m_Samplers.push_back(desc); + } } break; } case OPCODE_DCL_HS_MAX_TESSFACTOR: { - // Not supported + if(psContext->psShader->eShaderType == HULL_SHADER && psContext->psDependencies) + psContext->psDependencies->fMaxTessFactor = psDecl->value.fMaxTessFactor; break; } case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: @@ -1839,16 +2022,40 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) } std::string texName = ResourceName(RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber); std::string samplerTypeName = TranslateResourceDeclaration(psContext, psDecl, texName, false, true); - uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::UAV); std::ostringstream oss; - oss << samplerTypeName << " " << texName - << " [[ texture (" << slot << ") ]] "; + if (!psContext->psDependencies->IsMemberDeclared(texName)) + { + uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::UAV); - m_StructDefinitions[""].m_Members.push_back(oss.str()); + std::ostringstream oss; + oss << samplerTypeName << " " << texName << " [[ texture(" << slot << ") ]] "; - // TODO: translate psDecl->value.eResourceDimension into HLSLCC_TEX_DIMENSION - TextureSamplerDesc desc = {texName, (int)slot, -1, TD_2D, true}; - m_Textures.push_back(desc); + m_StructDefinitions[""].m_Members.push_back(std::make_pair(texName, oss.str())); + HLSLCC_TEX_DIMENSION texDim = TD_INT; + switch (psDecl->value.eResourceDimension) + { + default: break; + case RESOURCE_DIMENSION_TEXTURE2D: + case RESOURCE_DIMENSION_TEXTURE2DMS: + texDim = TD_2D; + break; + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + texDim = TD_2DARRAY; + break; + case RESOURCE_DIMENSION_TEXTURE3D: + texDim = TD_3D; + break; + case RESOURCE_DIMENSION_TEXTURECUBE: + texDim = TD_CUBE; + break; + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + texDim = TD_CUBEARRAY; + break; + } + TextureSamplerDesc desc = {texName, (int)slot, -1, texDim, false, false, true}; + m_Textures.push_back(desc); + } break; } @@ -1877,7 +2084,7 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; std::ostringstream oss; oss << "uint value[" << psDecl->sTGSM.ui32Stride / 4 << "]"; - m_StructDefinitions[TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + "_Type"].m_Members.push_back(oss.str()); + m_StructDefinitions[TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + "_Type"].m_Members.push_back(std::make_pair("value", oss.str())); m_StructDefinitions[""].m_Dependencies.push_back(TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + "_Type"); oss.str(""); oss << "threadgroup " << TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) @@ -1971,24 +2178,36 @@ std::string ToMetal::ResourceName(ResourceGroup group, const uint32_t ui32Regist void ToMetal::TranslateResourceTexture(const Declaration* psDecl, uint32_t samplerCanDoShadowCmp, HLSLCC_TEX_DIMENSION texDim) { - std::string texName = ResourceName(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber); - std::string samplerTypeName = TranslateResourceDeclaration(psContext, - psDecl, texName, (samplerCanDoShadowCmp && psDecl->ui32IsShadowTex), false); + const bool isDepthSampler = (samplerCanDoShadowCmp && psDecl->ui32IsShadowTex); + std::string samplerTypeName = TranslateResourceDeclaration(psContext, psDecl, texName, isDepthSampler, false); - uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::Texture); - std::ostringstream oss; - oss << samplerTypeName << " " << texName - << " [[ texture (" << slot << ") ]] "; + bool isMS = false; + switch(psDecl->value.eResourceDimension) + { + default: + break; + case RESOURCE_DIMENSION_TEXTURE2DMS: + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + isMS = true; + break; + } - m_StructDefinitions[""].m_Members.push_back(oss.str()); + if (!psContext->psDependencies->IsMemberDeclared(texName)) + { + uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::Texture); - TextureSamplerDesc desc = {texName, (int)slot, -1, texDim, false}; - m_Textures.push_back(desc); + std::ostringstream oss; + oss << samplerTypeName << " " << texName << " [[ texture(" << slot << ") ]] "; - if (samplerCanDoShadowCmp && psDecl->ui32IsShadowTex) - EnsureShadowSamplerDeclared(); + m_StructDefinitions[""].m_Members.push_back(std::make_pair(texName, oss.str())); + TextureSamplerDesc desc = {texName, (int)slot, -1, texDim, isMS, isDepthSampler, false}; + m_Textures.push_back(desc); + + if (isDepthSampler) + EnsureShadowSamplerDeclared(); + } } void ToMetal::DeclareResource(const Declaration *psDecl) @@ -2000,19 +2219,23 @@ void ToMetal::DeclareResource(const Declaration *psDecl) // Fake single comp 32bit texel buffers by using raw buffer DeclareBufferVariable(psDecl, true, false); break; - - /* TODO: re-enable this code for buffer textures when sripting API has proper support for it - uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::Texture); - std::string texName = TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY); - std::ostringstream oss; - oss << "device " << TranslateResourceDeclaration(psContext, psDecl, texName, false, false); - oss << texName << " [[ texture(" << slot << ") ]]"; + // TODO: re-enable this code for buffer textures when sripting API has proper support for it +#if 0 + if (!psContext->psDependencies->IsMemberDeclared(texName)) + { + uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::Texture); + std::string texName = TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY); + std::ostringstream oss; + oss << "device " << TranslateResourceDeclaration(psContext, psDecl, texName, false, false); - m_StructDefinitions[""].m_Members.push_back(oss.str()); - psContext->m_Reflection.OnTextureBinding(texName, slot, TD_2D, false); //TODO: correct HLSLCC_TEX_DIMENSION? - break;*/ + oss << texName << " [[ texture(" << slot << ") ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(texName, oss.str())); + psContext->m_Reflection.OnTextureBinding(texName, slot, TD_2D, false); //TODO: correct HLSLCC_TEX_DIMENSION? + } + break; +#endif } default: ASSERT(0); @@ -2161,28 +2384,28 @@ void ToMetal::DeclareOutput(const Declaration *psDecl) { std::ostringstream oss; oss << type << " " << name << " [[ sample_mask ]]"; - m_StructDefinitions[GetOutputStructName()].m_Members.push_back(oss.str()); + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); break; } case OPERAND_TYPE_OUTPUT_DEPTH: { std::ostringstream oss; oss << type << " " << name << " [[ depth(any) ]]"; - m_StructDefinitions[GetOutputStructName()].m_Members.push_back(oss.str()); + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); break; } case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: { std::ostringstream oss; oss << type << " " << name << " [[ depth(greater) ]]"; - m_StructDefinitions[GetOutputStructName()].m_Members.push_back(oss.str()); + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); break; } case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: { std::ostringstream oss; oss << type << " " << name << " [[ depth(less) ]]"; - m_StructDefinitions[GetOutputStructName()].m_Members.push_back(oss.str()); + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); break; } default: @@ -2190,27 +2413,46 @@ void ToMetal::DeclareOutput(const Declaration *psDecl) std::ostringstream oss; oss << type << " " << name << " [[ color(xlt_remap_o[" << psSignature->ui32SemanticIndex << "]) ]]"; m_NeedFBOutputRemapDecl = true; - m_StructDefinitions[GetOutputStructName()].m_Members.push_back(oss.str()); + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); } } break; } case VERTEX_SHADER: + case DOMAIN_SHADER: + case HULL_SHADER: { + std::string out = GetOutputStructName(); + bool isTessKernel = (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0 && (psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == VERTEX_SHADER); + std::ostringstream oss; oss << type << " " << name; - if (psSignature->eSystemValueType == NAME_POSITION || (psSignature->semanticName == "POS" && psOperand->ui32RegisterNumber == 0 )) + if (!isTessKernel && (psSignature->eSystemValueType == NAME_POSITION || psSignature->semanticName == "POS") && psOperand->ui32RegisterNumber == 0) oss << " [[ position ]]"; - else if (psSignature->eSystemValueType == NAME_UNDEFINED && psSignature->semanticName == "PSIZE" && psSignature->ui32SemanticIndex == 0 ) + else if (!isTessKernel && psSignature->eSystemValueType == NAME_UNDEFINED && psSignature->semanticName == "PSIZE" && psSignature->ui32SemanticIndex == 0 ) oss << " [[ point_size ]]"; else oss << " [[ user(" << name << ") ]]"; - m_StructDefinitions[GetOutputStructName()].m_Members.push_back(oss.str()); + m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, oss.str())); + + // For preserving data layout, declare output struct as domain shader input, too + if (psContext->psShader->eShaderType == HULL_SHADER) + { + out += "In"; + + std::ostringstream oss; + oss << type << " " << name; + + // VERTEX_SHADER hardcoded on purpose + uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true); + oss << " [[ " << "attribute(" << loc << ")" << " ]] "; + + psContext->m_Reflection.OnInputBinding(name, loc); + m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, oss.str())); + } break; } case GEOMETRY_SHADER: - case DOMAIN_SHADER: - case HULL_SHADER: default: ASSERT(0); break; @@ -2226,7 +2468,7 @@ void ToMetal::EnsureShadowSamplerDeclared() if (m_ShadowSamplerDeclared) return; - if((psContext->flags & HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR) != 0) + if((psContext->flags & HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR) != 0 || (psContext->psShader->eShaderType == COMPUTE_SHADER)) m_ExtraGlobalDefinitions += "constexpr sampler _mtl_xl_shadow_sampler(address::clamp_to_edge, filter::linear, compare_func::greater_equal);\n"; else m_ExtraGlobalDefinitions += "constexpr sampler _mtl_xl_shadow_sampler(address::clamp_to_edge, filter::nearest, compare_func::greater_equal);\n"; diff --git a/src/toMetalInstruction.cpp b/src/toMetalInstruction.cpp index b581e3e..d5a60b5 100644 --- a/src/toMetalInstruction.cpp +++ b/src/toMetalInstruction.cpp @@ -246,7 +246,7 @@ void ToMetal::AddComparison(Instruction* psInst, ComparisonType eType, glsl << TranslateOperand(&psInst->asOperands[2], typeFlag, destMask); if (!isBoolDest) { - bcatcstr(glsl, ") ? 0xFFFFFFFFu : 0u"); + bcatcstr(glsl, ") ? 0xFFFFFFFFu : 0u"); } AddAssignPrologue(needsParenthesis); } @@ -361,6 +361,25 @@ void ToMetal::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand // TODO: We can actually do this in one op using mix(). int srcElem = -1; SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); + + // Use an extra temp if dest is also one of the sources. Without this some swizzle combinations + // might alter the source before all components are handled. + const char* tempName = "hlslcc_movcTemp"; + bool dstIsSrc1 = (pDest->eType == src1->eType) && (pDest->ui32RegisterNumber == src1->ui32RegisterNumber); + bool dstIsSrc2 = (pDest->eType == src2->eType) && (pDest->ui32RegisterNumber == src2->ui32RegisterNumber); + + if (dstIsSrc1 || dstIsSrc2) + { + psContext->AddIndentation(); + bcatcstr(glsl, "{\n"); + ++psContext->indent; + psContext->AddIndentation(); + int numComponents = (pDest->eType == OPERAND_TYPE_TEMP) ? + psContext->psShader->GetTempComponentCount(eDestType, pDest->ui32RegisterNumber) : + pDest->iNumComponents; + bformata(glsl, "%s %s = %s;\n", HLSLcc::GetConstructorForType(psContext, eDestType, numComponents), tempName, TranslateOperand(pDest, TO_FLAG_NAME_ONLY).c_str()); + } + for (destElem = 0; destElem < 4; ++destElem) { int numParenthesis = 0; @@ -391,12 +410,27 @@ void ToMetal::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand } } - glsl << TranslateOperand(src1, SVTTypeToFlag(eDestType), 1 << srcElem); + if (!dstIsSrc1) + glsl << TranslateOperand(src1, SVTTypeToFlag(eDestType), 1 << srcElem); + else + bformata(glsl, "%s%s", tempName, TranslateOperandSwizzle(src1, 1 << srcElem, 0).c_str()); + bcatcstr(glsl, " : "); - glsl << TranslateOperand(src2, SVTTypeToFlag(eDestType), 1 << srcElem); + + if (!dstIsSrc2) + glsl << TranslateOperand(src2, SVTTypeToFlag(eDestType), 1 << srcElem); + else + bformata(glsl, "%s%s", tempName, TranslateOperandSwizzle(src2, 1 << srcElem, 0).c_str()); AddAssignPrologue(numParenthesis); } + + if (dstIsSrc1 || dstIsSrc2) + { + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + } } } @@ -484,9 +518,8 @@ void ToMetal::CallTernaryOp(const char* op1, const char* op2, Instruction* psIns } void ToMetal::CallHelper3(const char* name, Instruction* psInst, - int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask) + int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask, uint32_t ui32Flags) { - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; bstring glsl = *psContext->currentGLSLString; uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); @@ -521,6 +554,12 @@ void ToMetal::CallHelper3(const char* name, Instruction* psInst, AddAssignPrologue(numParenthesis); } +void ToMetal::CallHelper3(const char* name, Instruction* psInst, + int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask) +{ + CallHelper3(name, psInst, dest, src0, src1, src2, paramsShouldFollowWriteMask, TO_AUTO_BITCAST_TO_FLOAT); +} + void ToMetal::CallHelper2(const char* name, Instruction* psInst, int dest, int src0, int src1, int paramsShouldFollowWriteMask) { @@ -850,9 +889,9 @@ void ToMetal::TranslateTexCoord( opMask = OPERAND_4_COMPONENT_MASK_X; bstring glsl = *psContext->currentGLSLString; glsl << TranslateOperand(psTexCoordOperand, flags, opMask); - + bcatcstr(glsl, ", round("); - + opMask = OPERAND_4_COMPONENT_MASK_Y; flags = TO_AUTO_BITCAST_TO_FLOAT; isArray = true; @@ -878,10 +917,10 @@ void ToMetal::TranslateTexCoord( // xy for coord, z for array element opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; flags |= TO_AUTO_EXPAND_TO_VEC2; - + bstring glsl = *psContext->currentGLSLString; glsl << TranslateOperand(psTexCoordOperand, flags, opMask); - + bcatcstr(glsl, ", round("); opMask = OPERAND_4_COMPONENT_MASK_Z; @@ -894,12 +933,12 @@ void ToMetal::TranslateTexCoord( // xyz for coord, w for array element opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; flags |= TO_AUTO_EXPAND_TO_VEC3; - + bstring glsl = *psContext->currentGLSLString; glsl << TranslateOperand(psTexCoordOperand, flags, opMask); - + bcatcstr(glsl, ", round("); - + opMask = OPERAND_4_COMPONENT_MASK_W; flags = TO_AUTO_BITCAST_TO_FLOAT; isArray = true; @@ -915,7 +954,7 @@ void ToMetal::TranslateTexCoord( //FIXME detect when integer coords are needed. bstring glsl = *psContext->currentGLSLString; glsl << TranslateOperand(psTexCoordOperand, flags, opMask); - + if (isArray) bcatcstr(glsl, ")"); @@ -948,7 +987,7 @@ void ToMetal::GetResInfoData(Instruction* psInst, int index, int destElem) bcatcstr(glsl, "1.0f / float("); numParenthesis++; } - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NAME_ONLY); if ((index == 1 && psInst->eResDim == RESOURCE_DIMENSION_TEXTURE1DARRAY) || (index == 2 && (psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DARRAY || psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DMSARRAY))) @@ -958,13 +997,13 @@ void ToMetal::GetResInfoData(Instruction* psInst, int index, int destElem) else { bcatcstr(glsl, metalGetters[index]); - + if (index < 3) { if (psInst->eResDim != RESOURCE_DIMENSION_TEXTURE2DMS && psInst->eResDim != RESOURCE_DIMENSION_TEXTURE2DMSARRAY) glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); //mip level - + bcatcstr(glsl, ")"); } } @@ -1073,13 +1112,23 @@ void ToMetal::TranslateTextureSample(Instruction* psInst, SHADER_VARIABLE_TYPE dataType = psContext->psShader->sInfo.GetTextureDataType(psSrcTex->ui32RegisterNumber); psContext->AddIndentation(); AddAssignToDest(psDest, dataType, psSrcTex->GetNumSwizzleElements(), &numParenthesis); - + std::string texName = TranslateOperand(psSrcTex, TO_FLAG_NAME_ONLY); // TextureName.FuncName( glsl << texName; bformata(glsl, ".%s(", funcName); + bool isDepthSampler = false; + for(unsigned j = 0, m = m_Textures.size() ; j < m ; ++j) + { + if(m_Textures[j].name == texName) + { + isDepthSampler = m_Textures[j].isDepthSampler; + break; + } + } + // Sampler name //TODO: Is it ok to use fixed shadow sampler in all cases of depth compare or would we need more // accurate way of detecting shadow cases (atm all depth compares are interpreted as shadow usage) @@ -1143,8 +1192,8 @@ void ToMetal::TranslateTextureSample(Instruction* psInst, } bool hadOffset = false; - - // Add offset param + + // Add offset param if (psInst->bAddressOffset) { hadOffset = true; @@ -1178,7 +1227,7 @@ void ToMetal::TranslateTextureSample(Instruction* psInst, mask |= OPERAND_4_COMPONENT_MASK_Y; if (ui32NumOffsets > 2) mask |= OPERAND_4_COMPONENT_MASK_Z; - + bcatcstr(glsl, ","); glsl << TranslateOperand(psSrcOff, TO_FLAG_INTEGER, mask); } @@ -1191,7 +1240,7 @@ void ToMetal::TranslateTextureSample(Instruction* psInst, { if (!(ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE)) { - // Need to add offset param to match func overload + // Need to add offset param to match func overload if (!hadOffset) { if (ui32NumOffsets == 1) @@ -1199,7 +1248,7 @@ void ToMetal::TranslateTextureSample(Instruction* psInst, else bformata(glsl, ", int%d(0)", ui32NumOffsets); } - + bcatcstr(glsl, ", component::"); glsl << TranslateOperandSwizzle(psSrcSamp, OPERAND_4_COMPONENT_MASK_ALL, 0, false); } @@ -1212,7 +1261,7 @@ void ToMetal::TranslateTextureSample(Instruction* psInst, bcatcstr(glsl, ")"); - if (!(ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) || (ui32Flags & TEXSMP_FLAG_GATHER)) + if (!((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) || isDepthSampler) || (ui32Flags & TEXSMP_FLAG_GATHER)) { // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms // does not make sense. But need to re-enable to correctly swizzle this particular instruction. @@ -1232,7 +1281,7 @@ void ToMetal::TranslateDynamicComponentSelection(const ShaderVarType* psVarType, bstring glsl = *psContext->currentGLSLString; ASSERT(psVarType->Class == SVC_VECTOR); - bcatcstr(glsl, "["); // Access vector component with [] notation + bcatcstr(glsl, "["); // Access vector component with [] notation if (offset > 0) bcatcstr(glsl, "("); @@ -1290,7 +1339,7 @@ void ToMetal::TranslateShaderStorageStore(Instruction* psInst) { psContext->AddIndentation(); glsl << TranslateOperand(psDest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY); - + if (psDestAddr) { bcatcstr(glsl, "["); @@ -1427,7 +1476,7 @@ void ToMetal::TranslateShaderStorageLoad(Instruction* psInst) bcatcstr(glsl, "u"); } bcatcstr(glsl, "]"); - + if (addedBitcast) bcatcstr(glsl, ")"); } @@ -1780,7 +1829,7 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) glsl << TranslateOperand(dest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY); bcatcstr(glsl, "["); glsl << TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_X); - + if (!psBinding || psBinding->eType != RTYPE_UAV_RWTYPED) { // Structured buf if we have both x & y swizzles. Raw buf has only x -> no .value[] @@ -2026,7 +2075,7 @@ void ToMetal::TranslateInstruction(Instruction* psInst) psContext->AddIndentation(); bcatcstr(glsl, "//MAD\n"); #endif - CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, TO_FLAG_NONE); + CallHelper3("fma", psInst, 0, 1, 2, 3, 1); break; } case OPCODE_IMAD: @@ -2045,6 +2094,16 @@ void ToMetal::TranslateInstruction(Instruction* psInst) CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, ui32Flags); break; } + case OPCODE_DFMA: + { + uint32_t ui32Flags = TO_FLAG_DOUBLE; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DFMA\n"); +#endif + CallHelper3("fma", psInst, 0, 1, 2, 3, 1, ui32Flags); + break; + } case OPCODE_DADD: { #ifdef _DEBUG @@ -2992,24 +3051,27 @@ void ToMetal::TranslateInstruction(Instruction* psInst) psContext->AddIndentation(); bcatcstr(glsl, "//SYNC\n"); #endif - const char *barrierFlags = "mem_none"; - if (ui32SyncFlags & SYNC_THREAD_GROUP_SHARED_MEMORY) - { - barrierFlags = "mem_threadgroup"; - } - if (ui32SyncFlags & (SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP | SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL)) - { - barrierFlags = "mem_device"; - if (ui32SyncFlags & SYNC_THREAD_GROUP_SHARED_MEMORY) - { - barrierFlags = "mem_device_and_threadgroup"; - } - } - psContext->AddIndentation(); + const bool sync_threadgroup = (ui32SyncFlags & SYNC_THREAD_GROUP_SHARED_MEMORY) != 0; + const bool sync_device = (ui32SyncFlags & (SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP | SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL)) != 0; + + const char* barrierFlags = "mem_flags::mem_none"; + if(sync_threadgroup && sync_device) barrierFlags = "mem_flags::mem_threadgroup | mem_flags::mem_device"; + else if(sync_threadgroup) barrierFlags = "mem_flags::mem_threadgroup"; + else if(sync_device) barrierFlags = "mem_flags::mem_device"; + if (ui32SyncFlags & SYNC_THREADS_IN_GROUP) - bformata(glsl, "threadgroup_barrier(mem_flags::%s);\n", barrierFlags); + { + psContext->AddIndentation(); + bformata(glsl, "threadgroup_barrier(%s);\n", barrierFlags); + } else - bformata(glsl, "simdgroup_barrier(mem_flags::%s);\n", barrierFlags); + { + psContext->AddIndentation(); bformata(glsl, "#if __HAVE_SIMDGROUP_BARRIER__\n"); + psContext->AddIndentation(); bformata(glsl, "simdgroup_barrier(%s);\n", barrierFlags); + psContext->AddIndentation(); bformata(glsl, "#else\n"); + psContext->AddIndentation(); bformata(glsl, "threadgroup_barrier(%s);\n", barrierFlags); + psContext->AddIndentation(); bformata(glsl, "#endif\n"); + } break; } @@ -3107,7 +3169,7 @@ void ToMetal::TranslateInstruction(Instruction* psInst) #endif psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, &psBinding); - + if (psInst->eResDim == RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf { psInst->eOpcode = OPCODE_LD_UAV_TYPED; @@ -3586,75 +3648,53 @@ template vec bitFieldExtractI(const vec width, const ve } case OPCODE_F32TOF16: { - // TODO Metallize - ASSERT(0); // Are these even used? - const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); - const uint32_t s0ElemCount = psInst->asOperands[1].GetNumSwizzleElements(); - uint32_t destElem; + uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); + #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//F32TOF16\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "//F32TOF16\n"); #endif - for (destElem = 0; destElem < destElemCount; ++destElem) - { - const char* swizzle[] = { ".x", ".y", ".z", ".w" }; - //unpackHalf2x16 converts two f16s packed into uint to two f32s. + for (int i = 0; i < 4; i++) + { + if ((writeMask & (1 << i)) == 0) + continue; + psContext->AddIndentation(); + psInst->asOperands[0].ui32CompMask = (1 << i); + psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); - //dest.swiz.x = unpackHalf2x16(src.swiz.x).x - //dest.swiz.y = unpackHalf2x16(src.swiz.y).x - //dest.swiz.z = unpackHalf2x16(src.swiz.z).x - //dest.swiz.w = unpackHalf2x16(src.swiz.w).x - - psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - if (destElemCount > 1) - bcatcstr(glsl, swizzle[destElem]); - - bcatcstr(glsl, " = unpackHalf2x16("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); - if (s0ElemCount > 1) - bcatcstr(glsl, swizzle[destElem]); - bcatcstr(glsl, ").x;\n"); - - } - break; + bcatcstr(glsl, "as_type(half2("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE, (1 << i)); + bcatcstr(glsl, ", 0.0))"); + AddAssignPrologue(numParenthesis); + } + break; } case OPCODE_F16TOF32: { - // TODO metallize - ASSERT(0); // Are these even used? - const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); - const uint32_t s0ElemCount = psInst->asOperands[1].GetNumSwizzleElements(); - uint32_t destElem; + uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); + #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//F16TOF32\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "//F16TOF32\n"); #endif - for (destElem = 0; destElem < destElemCount; ++destElem) - { - const char* swizzle[] = { ".x", ".y", ".z", ".w" }; - //packHalf2x16 converts two f32s to two f16s packed into a uint. + for (int i = 0; i < 4; i++) + { + if ((writeMask & (1 << i)) == 0) + continue; + psContext->AddIndentation(); + psInst->asOperands[0].ui32CompMask = (1 << i); + psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, &numParenthesis); - //dest.swiz.x = packHalf2x16(vec2(src.swiz.x)) & 0xFFFF - //dest.swiz.y = packHalf2x16(vec2(src.swiz.y)) & 0xFFFF - //dest.swiz.z = packHalf2x16(vec2(src.swiz.z)) & 0xFFFF - //dest.swiz.w = packHalf2x16(vec2(src.swiz.w)) & 0xFFFF - - psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_UNSIGNED_INTEGER); - if (destElemCount > 1) - bcatcstr(glsl, swizzle[destElem]); - - bcatcstr(glsl, " = packHalf2x16(vec2("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); - if (s0ElemCount > 1) - bcatcstr(glsl, swizzle[destElem]); - bcatcstr(glsl, ")) & 0xFFFF;\n"); - - } - break; + bcatcstr(glsl, "as_type("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_UINT, (1 << i)); + bcatcstr(glsl, ").x"); + AddAssignPrologue(numParenthesis); + } + break; } case OPCODE_INEG: { @@ -3783,7 +3823,7 @@ template vec bitFieldExtractI(const vec width, const ve { #ifdef _DEBUG psContext->AddIndentation(); - bcatcstr(glsl, "//INOT\n"); + bcatcstr(glsl, "//NOT\n"); #endif psContext->AddIndentation(); AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); @@ -3830,7 +3870,7 @@ template vec bitFieldExtractI(const vec width, const ve psContext->m_Reflection.OnDiagnostics("Metal shading language does not support buffer size query from shader. Pass the size to shader as const instead.\n", 0, false); // TODO: change this into error after modifying gfx-test 450 break; } - + case OPCODE_SAMPLE_INFO: { #ifdef _DEBUG @@ -3858,7 +3898,6 @@ template vec bitFieldExtractI(const vec width, const ve case OPCODE_DTOF: case OPCODE_FTOD: case OPCODE_DDIV: - case OPCODE_DFMA: case OPCODE_DRCP: case OPCODE_MSAD: case OPCODE_DTOI: diff --git a/src/toMetalOperand.cpp b/src/toMetalOperand.cpp index af1e72e..63531e1 100644 --- a/src/toMetalOperand.cpp +++ b/src/toMetalOperand.cpp @@ -7,7 +7,6 @@ #include "internal_includes/toMetal.h" #include #include -#include #include #include @@ -331,6 +330,16 @@ static std::string printImmediate32(uint32_t value, SHADER_VARIABLE_TYPE eType) return oss.str(); } +static std::string MakeCBVarName(const std::string &cbName, const std::string &fullName, bool isUnityInstancingBuffer) +{ + // For Unity instancing buffer: "CBufferName.StructTypeName[] -> CBufferName[]". See ToMetal::DeclareConstantBuffer. + if (isUnityInstancingBuffer && !cbName.empty() && cbName[cbName.size() - 1] == '.' && fullName.find_first_of('[') != std::string::npos) + { + return cbName.substr(0, cbName.size() - 1) + fullName.substr(fullName.find_first_of('[')); + } + return cbName + fullName; +} + std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase) { std::ostringstream oss; @@ -402,7 +411,7 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui bool bitcast = false; if (AreTypesCompatibleMetal(eType, ui32TOFlag) == 0) { - if (CanDoDirectCast(eType, requestedType)) + if (CanDoDirectCast(psContext, eType, requestedType)) { oss << GetConstructorForType(psContext, requestedType, requestedComponents, false) << "("; numParenthesis++; @@ -490,19 +499,15 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui { const ShaderInfo::InOutSignature *psSig = NULL; psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); - if ((psSig->eSystemValueType == NAME_POSITION && psSig->ui32SemanticIndex == 0) || - (psSig->semanticName == "POS" && psSig->ui32SemanticIndex == 0) || - (psSig->semanticName == "SV_POSITION" && psSig->ui32SemanticIndex == 0)) + if (psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER) { - // Shouldn't happen on Metal? - ASSERT(0); - break; -// bcatcstr(glsl, "gl_in"); -// TranslateOperandIndex(psOperand, 0);//Vertex index -// bcatcstr(glsl, ".gl_Position"); + oss << "input.cp"; + oss << TranslateOperandIndex(psOperand, 0);//Vertex index + oss << "." << psContext->GetDeclaredInputName(psOperand, piRebase, 1, pui32IgnoreSwizzle); } else { + // Not sure if this codepath is active outside hull/domain oss << psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); oss << TranslateOperandIndex(psOperand, 0);//Vertex index @@ -654,6 +659,7 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui int32_t index = -1; std::vector arrayIndices; bool isArray = false; + bool isFBInput = false; psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); ASSERT(psCBuf != NULL); @@ -665,14 +671,7 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui if(psCBuf) { //$Globals. - if(psCBuf->name[0] == '$') - { - cbName = "Globals"; - } - else - { - cbName = psCBuf->name; - } + cbName = GetCBName(psCBuf->name); cbName += "."; // Drop the constant buffer name from subpass inputs if (cbName.substr(0, 19) == "hlslcc_SubpassInput") @@ -702,13 +701,23 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui componentsNeeded = maxSwiz - minSwiz + 1; } - ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags); + // When we have a component mask that doesn't have .x set (this basically only happens when we manually open operands into components) + // We have to pull down the swizzle array to match the first bit that's actually set + uint32_t tmpSwizzle[4] = { 0 }; + int firstBitSet = 0; + if (ui32CompMask == 0) + ui32CompMask = 0xf; + while ((ui32CompMask & (1 << firstBitSet)) == 0) + firstBitSet++; + std::copy(&psOperand->aui32Swizzle[firstBitSet], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); + + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags); // Get a possible dynamic array index - std::ostringstream dynIndexOss; + std::string dynamicIndexStr; bool needsIndexCalcRevert = false; bool isAoS = ((!isArray && arrayIndices.size() > 0) || (isArray && arrayIndices.size() > 1)); - + bool isUnityInstancingBuffer = isAoS && IsUnityFlexibleInstancingBuffer(psCBuf); Operand *psDynIndexOp = psOperand->GetDynamicIndexOperand(psContext, psVarType, isAoS, &needsIndexCalcRevert); if (psDynIndexOp != NULL) @@ -719,16 +728,18 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui if (eType != SVT_INT && eType != SVT_UINT) opFlags = TO_AUTO_BITCAST_TO_INT; - dynIndexOss << TranslateOperand(psDynIndexOp, opFlags); + dynamicIndexStr = TranslateOperand(psDynIndexOp, opFlags, 0x1); // Just take the first component for the index } - std::string dynamicIndexStr = dynIndexOss.str(); - if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || (componentsNeeded <= psVarType->Columns)) { // Simple case: just access one component std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); + // Special hack for MSAA subpass inputs: in Metal we can only read the "current" sample, so ignore the index + if (strncmp(fullName.c_str(), "hlslcc_fbinput", 14) == 0) + isFBInput = true; + if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS))) { // We'll need to add the prefix only to the last section of the name @@ -741,7 +752,7 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui fullName.insert(commaPos + 1, prefix); } - oss << cbName << fullName; + oss << MakeCBVarName(cbName, fullName, isUnityInstancingBuffer); } else { @@ -769,18 +780,15 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags); std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); - - if (tmpVarType->Class == SVC_SCALAR) - { - oss << cbName << fullName; - } - else + oss << MakeCBVarName(cbName, fullName, isUnityInstancingBuffer); + + if (tmpVarType->Class != SVC_SCALAR) { uint32_t swizzle; tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3 swizzle = psOperand->aui32Swizzle[i] - tmpRebase; - oss << cbName << fullName << "." << ("xyzw"[swizzle]); + oss << "." << ("xyzw"[swizzle]); } } oss << ")"; @@ -799,7 +807,12 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui bool hasDynamicIndex = !dynamicIndexStr.empty() && (arrayIndices.size() <= 1); bool hasImmediateIndex = (index != -1) && !(hasDynamicIndex && index == 0); - if (hasDynamicIndex || hasImmediateIndex) + // Ignore index altogether on fb inputs + if (isFBInput) + { + // Nothing to do here + } + else if (hasDynamicIndex || hasImmediateIndex) { std::ostringstream fullIndexOss; if (hasDynamicIndex && hasImmediateIndex) @@ -901,34 +914,47 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: case OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID: { - // Not supported on Metal - ASSERT(0); + oss << "phaseInstanceID"; // Not a real builtin, but passed as a function parameter. + *pui32IgnoreSwizzle = 1; break; } case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: { - oss << "ImmCB_" << psContext->currentPhase - << "_" << psOperand->ui32RegisterNumber - << "_" << psOperand->m_Rebase; - if (psOperand->m_SubOperands[0].get()) - { - //Indexes must be integral. Offset is already taken care of above. - oss << "[" << TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER) << "]"; - } - if (psOperand->m_Size == 1) - *pui32IgnoreSwizzle = 1; + oss << "ImmCB_" << psContext->currentPhase; + oss << TranslateOperandIndex(psOperand, 0); break; } case OPERAND_TYPE_INPUT_DOMAIN_POINT: { - // Not supported on Metal - ASSERT(0); + oss << "mtl_TessCoord"; break; } case OPERAND_TYPE_INPUT_CONTROL_POINT: { - // Not supported on Metal - ASSERT(0); + int ignoreRedirect = 1; + int regSpace = psOperand->GetRegisterSpace(psContext); + + if ((regSpace == 0 && psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe) || + (regSpace == 1 && psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)) + { + ignoreRedirect = 0; + } + + if (ignoreRedirect) + { + oss << "input.cp"; + oss << TranslateOperandIndex(psOperand, 0);//Vertex index + oss << "." << psContext->GetDeclaredInputName(psOperand, piRebase, ignoreRedirect, pui32IgnoreSwizzle); + } + else + { + oss << psContext->GetDeclaredInputName(psOperand, piRebase, ignoreRedirect, pui32IgnoreSwizzle); + oss << TranslateOperandIndex(psOperand, 0);//Vertex index + } + + // Check for scalar + if ((psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) + *pui32IgnoreSwizzle = 1; break; } case OPERAND_TYPE_NULL: @@ -939,8 +965,8 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui } case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: { - // Not supported on Metal - ASSERT(0); + oss << "controlPointID"; + *pui32IgnoreSwizzle = 1; break; } case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: @@ -1030,9 +1056,101 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui } case OPERAND_TYPE_INPUT_PATCH_CONSTANT: { - // Not supported on Metal - ASSERT(0); + const ShaderInfo::InOutSignature* psIn; + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn); + *piRebase = psIn->iRebase; + switch (psIn->eSystemValueType) + { + case NAME_POSITION: + oss << "mtl_Position"; + break; + case NAME_RENDER_TARGET_ARRAY_INDEX: + oss << "mtl_Layer"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_CLIP_DISTANCE: + // this is temp variable, declaration and redirecting to actual output is handled in DeclareClipPlanes + char tmpName[128]; sprintf(tmpName, "phase%d_ClipDistance%d", psContext->currentPhase, psIn->ui32SemanticIndex); + oss << tmpName; + *pui32IgnoreSwizzle = 1; + break; + case NAME_VIEWPORT_ARRAY_INDEX: + oss << "mtl_ViewPortIndex"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_VERTEX_ID: + oss << "mtl_VertexID"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_INSTANCE_ID: + oss << "mtl_InstanceID"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_IS_FRONT_FACE: + oss << "(mtl_FrontFace ? 0xffffffffu : uint(0))"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_PRIMITIVE_ID: + // Not on Metal + ASSERT(0); + break; + case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_LINE_DENSITY_TESSFACTOR: + if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber]) + oss << "edgeTessellationFactor"; + else + oss << "edgeTessellationFactor[0]"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_LINE_DETAIL_TESSFACTOR: + oss << "edgeTessellationFactor[1]"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: + oss << "edgeTessellationFactor[2]"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: + oss << "edgeTessellationFactor[3]"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_TRI_INSIDE_TESSFACTOR: + case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: + if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber]) + oss << "insideTessellationFactor"; + else + oss << "insideTessellationFactor[0]"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: + oss << "insideTessellationFactor[1]"; + *pui32IgnoreSwizzle = 1; + break; + default: + const std::string patchPrefix = "patch."; + if (psContext->psShader->eShaderType == DOMAIN_SHADER) + oss << psContext->inputPrefix << patchPrefix << psIn->semanticName << psIn->ui32SemanticIndex; + else + oss << patchPrefix << psIn->semanticName << psIn->ui32SemanticIndex; + + // Disable swizzles if this is a scalar + if (psContext->psShader->eShaderType == HULL_SHADER) + { + if ((psContext->psShader->abScalarOutput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) + *pui32IgnoreSwizzle = 1; + } + else + { + if ((psContext->psShader->abScalarInput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) + *pui32IgnoreSwizzle = 1; + } + break; + } break; } default: