diff --git a/include/ShaderInfo.h b/include/ShaderInfo.h index 05ce37e..4d0b298 100644 --- a/include/ShaderInfo.h +++ b/include/ShaderInfo.h @@ -451,6 +451,8 @@ public: static ResourceGroup ResourceTypeToResourceGroup(ResourceType); + static uint32_t GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors, bool wholeArraySize = false); + static int GetShaderVarFromOffset(const uint32_t ui32Vec4Offset, const uint32_t (&pui32Swizzle)[4], const ConstantBuffer* psCBuf, @@ -460,7 +462,7 @@ public: int32_t* pi32Rebase, uint32_t flags); - static std::string GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector &indices); + static std::string GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector &indices, const std::string dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors); // Apply shader precision information to resource bindings void AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info); diff --git a/include/hlslcc.h b/include/hlslcc.h index f5f327e..c367901 100644 --- a/include/hlslcc.h +++ b/include/hlslcc.h @@ -444,8 +444,8 @@ static const unsigned int HLSLCC_FLAG_VULKAN_BINDINGS = 0x40000; // If set, metal output will use linear sampler for shadow compares, otherwise point sampler. static const unsigned int HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR = 0x80000; -// If set, emits for NVN, the Nvidia-provided graphics API for Nintendo Switch. -static const unsigned int HLSLCC_FLAG_NVN_TARGET = 0x100000; +// If set, avoid emit atomic counter (ARB_shader_atomic_counters) and use atomic functions provided by ARB_shader_storage_buffer_object instead. +static const unsigned int HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS = 0x100000; // If set, and generating Vulkan shaders, attempts to detect static branching and transforms them into specialization constants static const unsigned int HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS = 0x200000; @@ -453,6 +453,12 @@ static const unsigned int HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS = 0x200000 // If set, this shader uses the GLSL extension EXT_shader_framebuffer_fetch static const unsigned int HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH = 0x400000; +// Build for Switch. +static const unsigned int HLSLCC_FLAG_NVN_TARGET = 0x800000; + +// If set, generate an instance name for constant buffers. GLSL specs 4.5 disallows uniform variables from different constant buffers sharing the same name +// as long as they are part of the same final linked program. Uniform buffer instance names solve this cross-shader symbol conflict issue. +static const unsigned int HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME = 0x1000000; #ifdef __cplusplus extern "C" { diff --git a/src/DataTypeAnalysis.cpp b/src/DataTypeAnalysis.cpp index 6f8c564..e99c3aa 100644 --- a/src/DataTypeAnalysis.cpp +++ b/src/DataTypeAnalysis.cpp @@ -315,13 +315,6 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext, MarkOperandAs(&psInst->asOperands[2], SVT_INT_AMBIGUOUS, aeTempVecType); break; - case OPCODE_AND: - MarkOperandAs(&psInst->asOperands[0], SVT_INT_AMBIGUOUS, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType); - MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType); - break; - - case OPCODE_IF: case OPCODE_BREAKC: case OPCODE_CALLC: @@ -344,12 +337,18 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext, MarkOperandAs(&psInst->asOperands[2], SVT_UINT, aeTempVecType); break; + case OPCODE_AND: + case OPCODE_OR: + MarkOperandAs(&psInst->asOperands[0], SVT_INT_AMBIGUOUS, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType); + break; + // Integer ops that don't care of signedness case OPCODE_IADD: case OPCODE_INEG: case OPCODE_ISHL: case OPCODE_NOT: - case OPCODE_OR: case OPCODE_XOR: case OPCODE_BUFINFO: case OPCODE_COUNTBITS: @@ -673,7 +672,9 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext, } } - if (foundImmediate && dataType == SVT_VOID) + // Use at minimum int type when any operand is immediate. + // Allowing bool could lead into bugs like case 883080 + if (foundImmediate && (dataType == SVT_VOID || dataType == SVT_BOOL)) dataType = SVT_INT; if (dataType != SVT_VOID) diff --git a/src/HLSLCrossCompilerContext.cpp b/src/HLSLCrossCompilerContext.cpp index 53cf376..fec51a8 100644 --- a/src/HLSLCrossCompilerContext.cpp +++ b/src/HLSLCrossCompilerContext.cpp @@ -101,7 +101,9 @@ void HLSLCrossCompilerContext::RequireExtension(const std::string &extName) return; m_EnabledExtensions.insert(extName); + bformata(extensions, "#ifdef %s\n", extName.c_str()); bformata(extensions, "#extension %s : require\n", extName.c_str()); + bcatcstr(extensions, "#endif\n"); } std::string HLSLCrossCompilerContext::GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const diff --git a/src/HLSLccToolkit.cpp b/src/HLSLccToolkit.cpp index 56cb927..d43d6d7 100644 --- a/src/HLSLccToolkit.cpp +++ b/src/HLSLccToolkit.cpp @@ -457,11 +457,13 @@ namespace HLSLcc return false; } +#ifndef fpcheck #ifdef _MSC_VER #define fpcheck(x) (_isnan(x) || !_finite(x)) #else #define fpcheck(x) (std::isnan(x) || std::isinf(x)) #endif +#endif // #ifndef fpcheck // Helper function to print floats with full precision void PrintFloat(bstring b, float f) diff --git a/src/Operand.cpp b/src/Operand.cpp index 45bb8fd..70aa4e1 100644 --- a/src/Operand.cpp +++ b/src/Operand.cpp @@ -475,12 +475,11 @@ SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, S const ShaderVarType* psVarType = NULL; int32_t rebase = -1; bool isArray; - int foundVar; psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, aui32ArraySizes[0], &psCBuf); if (psCBuf) { - foundVar = ShaderInfo::GetShaderVarFromOffset(aui32ArraySizes[1], aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags); - if (foundVar && m_SubOperands[1].get() == NULL) // TODO: why this suboperand thing? + int foundVar = ShaderInfo::GetShaderVarFromOffset(aui32ArraySizes[1], aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags); + if (foundVar) { return psVarType->Type; } @@ -583,4 +582,67 @@ int Operand::GetNumInputElements(const HLSLCrossCompilerContext *psContext) cons // TODO: Are there ever any cases where the mask has 'holes'? return HLSLcc::GetNumberBitsSet(psSig->ui32Mask); +} + +Operand* Operand::GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, const ShaderVarType* psVar, bool isAoS, bool *needsIndexCalcRevert) const +{ + Operand *psDynIndexOp = m_SubOperands[0].get(); + if (psDynIndexOp == NULL) + psDynIndexOp = m_SubOperands[1].get(); + + *needsIndexCalcRevert = false; + if (psDynIndexOp != NULL && isAoS) + { + // if dynamically indexing array of structs, try using the original index var before the float4 address calc + bool indexVarFound = false; + *needsIndexCalcRevert = true; + Instruction *psDynIndexOrigin = psDynIndexOp->m_Defines[0].m_Inst; + Operand *asOps = psDynIndexOrigin->asOperands; + Operand *psOriginOp = NULL; + + // DXBC always addresses as float4, find the address calculation + + // Special case where struct is float4 size, no extra calc is done + if (ShaderInfo::GetCBVarSize(psVar->Parent, true) <= 16) // matrixAsVectors arg does not matter here as with matrices the size will go over the limit anyway + { + indexVarFound = true; + *needsIndexCalcRevert = false; + } + else if (psDynIndexOrigin->eOpcode == OPCODE_IMUL) + { + // check which one of the src operands is the original index + if ((asOps[2].eType == OPERAND_TYPE_TEMP || asOps[2].eType == OPERAND_TYPE_INPUT) && asOps[3].eType == OPERAND_TYPE_IMMEDIATE32) + psOriginOp = &asOps[2]; + else if ((asOps[3].eType == OPERAND_TYPE_TEMP || asOps[3].eType == OPERAND_TYPE_INPUT) && asOps[2].eType == OPERAND_TYPE_IMMEDIATE32) + psOriginOp = &asOps[3]; + } + else if (psDynIndexOrigin->eOpcode == OPCODE_ISHL) + { + if (asOps[2].eType == OPERAND_TYPE_IMMEDIATE32) + psOriginOp = &asOps[1]; + } + + if (psOriginOp != NULL) + { + indexVarFound = true; + + // Check if the mul dest is not the same temp as the src. Also check that the temp + // does not have multiple uses (which could override the value) + // -> we can use src straight and no index revert calc is needed + if ((psOriginOp->eType == OPERAND_TYPE_INPUT) + || ((psOriginOp->ui32RegisterNumber != psDynIndexOp->ui32RegisterNumber || psOriginOp->GetDataType(psContext) != psDynIndexOp->GetDataType(psContext)) + && psOriginOp->m_Defines[0].m_Inst->m_Uses.size() == 1)) + { + psDynIndexOp = psOriginOp; + *needsIndexCalcRevert = false; + } + } + + // Atm we support only this very basic case of dynamic indexing array of structs. + // Return error if something else is encountered. + if (!indexVarFound) + psContext->m_Reflection.OnDiagnostics("Unsupported dynamic indexing scheme on constant buffer vars.", 0, true); + } + + return psDynIndexOp; } \ No newline at end of file diff --git a/src/ShaderInfo.cpp b/src/ShaderInfo.cpp index 0671079..4ec6181 100644 --- a/src/ShaderInfo.cpp +++ b/src/ShaderInfo.cpp @@ -150,29 +150,37 @@ int ShaderInfo::GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType, return 0; } -static uint32_t GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors) +uint32_t ShaderInfo::GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors, bool wholeArraySize) { - // Struct size is calculated from the offset and size of its last member + // Default is regular matrices, vectors and scalars + uint32_t size = psType->Columns * psType->Rows * 4; + + // Struct size is calculated from the offset and size of its last member. + // Need to take into account that members could be arrays. if (psType->Class == SVC_STRUCT) { - return psType->Members.back().Offset + GetCBVarSize(&psType->Members.back(), matrixAsVectors); + size = psType->Members.back().Offset + GetCBVarSize(&psType->Members.back(), matrixAsVectors, true); } - // Matrices represented as vec4 arrays have special size calculation - if (matrixAsVectors) + else if (matrixAsVectors) { if (psType->Class == SVC_MATRIX_ROWS) { - return psType->Rows * 16; + size = psType->Rows * 16; } else if (psType->Class == SVC_MATRIX_COLUMNS) { - return psType->Columns * 16; + size = psType->Columns * 16; } } - // Regular matrices, vectors and scalars - return psType->Columns * psType->Rows * 4; + if (wholeArraySize && psType->Elements > 1) + { + uint32_t paddedSize = ((size + 15) / 16) * 16; // Arrays are padded to float4 size + size = (psType->Elements - 1) * paddedSize + size; // Except the last element + } + + return size; } static const ShaderVarType* IsOffsetInType(const ShaderVarType* psType, @@ -184,10 +192,8 @@ static const ShaderVarType* IsOffsetInType(const ShaderVarType* psType, uint32_t flags) { uint32_t thisOffset = parentOffset + psType->Offset; - uint32_t thisSize = GetCBVarSize(psType, (flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0); - uint32_t paddedSize = thisSize; - if (thisSize % 16 > 0) - paddedSize += (16 - (thisSize % 16)); + uint32_t thisSize = ShaderInfo::GetCBVarSize(psType, (flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0); + uint32_t paddedSize = ((thisSize + 15) / 16) * 16; uint32_t arraySize = thisSize; // Array elements are padded to align on vec4 size, except for the last one @@ -308,7 +314,7 @@ int ShaderInfo::GetShaderVarFromOffset(const uint32_t ui32Vec4Offset, // Patches the fullName of the var with given array indices. Does not insert the indexing for the var itself if it is an array. // Searches for brackets and inserts indices one by one. -std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector &indices) +std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector &indices, const std::string dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors) { std::ostringstream oss; size_t prevpos = 0; @@ -318,8 +324,29 @@ std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShade { pos++; oss << psShaderVar->fullName.substr(prevpos, pos - prevpos); - if (i < indices.size()) + + // Add possibly given dynamic index for the root array. + if (i == 0 && !dynamicIndex.empty()) + { + oss << dynamicIndex; + + // if we couldn't use original index temp, revert the float4 address calc here + if (revertDynamicIndexCalc) + { + const ShaderVarType* psRootVar = psShaderVar; + while (psRootVar->Parent != NULL) + psRootVar = psRootVar->Parent; + + uint32_t thisSize = (GetCBVarSize(psRootVar, matrixAsVectors) + 15) / 16; // size in float4 + oss << " / " << thisSize; + } + + if (!indices.empty() && indices[i] != 0) + oss << " + " << indices[i]; + } + else if (i < indices.size()) oss << indices[i]; + prevpos = pos; i++; pos = psShaderVar->fullName.find('[', prevpos); diff --git a/src/internal_includes/Operand.h b/src/internal_includes/Operand.h index fc6466e..439db70 100644 --- a/src/internal_includes/Operand.h +++ b/src/internal_includes/Operand.h @@ -92,6 +92,11 @@ public: // Same as above but with explicit shader type and phase int GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const; + // Find the operand that contains the dynamic index for this operand (array in constant buffer). + // When isAoS is true, we'll try to find the original index var to avoid additional calculations. + // needsIndexCalcRevert output will tell if we need to divide the value to get the correct index. + Operand* GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, const ShaderVarType* psVar, bool isAoS, bool *needsIndexCalcRevert) const; + // Maps REFLECT_RESOURCE_PRECISION into OPERAND_MIN_PRECISION as much as possible static OPERAND_MIN_PRECISION ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec); diff --git a/src/internal_includes/toGLSLOperand.h b/src/internal_includes/toGLSLOperand.h index c17852a..deda652 100644 --- a/src/internal_includes/toGLSLOperand.h +++ b/src/internal_includes/toGLSLOperand.h @@ -21,4 +21,6 @@ std::string ResourceName(HLSLCrossCompilerContext* psContext, ResourceGroup grou std::string TextureSamplerName(ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare); void ConcatTextureSamplerName(bstring str, ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare); +std::string UniformBufferInstanceName(HLSLCrossCompilerContext* psContext, const std::string& name); + #endif diff --git a/src/internal_includes/toMetal.h b/src/internal_includes/toMetal.h index 3022333..a159d51 100644 --- a/src/internal_includes/toMetal.h +++ b/src/internal_includes/toMetal.h @@ -165,6 +165,8 @@ private: void AddComparison(Instruction* psInst, ComparisonType eType, uint32_t typeFlag); + bool CanForceToHalfOperand(const Operand *psOperand); + void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc); void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2); void CallBinaryOp(const char* name, Instruction* psInst, diff --git a/src/toGLSL.cpp b/src/toGLSL.cpp index 04bfcc0..8301bb7 100644 --- a/src/toGLSL.cpp +++ b/src/toGLSL.cpp @@ -643,7 +643,7 @@ bool ToGLSL::Translate() if ((psContext->flags & HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS) != 0) { - DeclareSpecializationConstants(psShader->asPhases[i]); + DeclareSpecializationConstants(*psPhase); } diff --git a/src/toGLSLDeclaration.cpp b/src/toGLSLDeclaration.cpp index cd88bff..7fbde17 100644 --- a/src/toGLSLDeclaration.cpp +++ b/src/toGLSLDeclaration.cpp @@ -12,16 +12,18 @@ #include #include #include +#include #include "internal_includes/toGLSL.h" using namespace HLSLcc; +#ifndef fpcheck #ifdef _MSC_VER #define fpcheck(x) (_isnan(x) || !_finite(x)) #else -#include -#define fpcheck(x) ((std::isnan(x)) || (std::isinf(x))) +#define fpcheck(x) (std::isnan(x) || std::isinf(x)) #endif +#endif // #ifndef fpcheck static void DeclareConstBufferShaderVariable(const HLSLCrossCompilerContext *psContext, const char* Name, const struct ShaderVarType* psType, int unsizedArray, bool addUniformPrefix = false) //const SHADER_VARIABLE_CLASS eClass, const SHADER_VARIABLE_TYPE eType, @@ -877,14 +879,23 @@ static void DeclareUBOConstants(HLSLCrossCompilerContext* psContext, const uint3 if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) bformata(glsl, "#ifndef HLSLCC_DISABLE_UNIFORM_BUFFERS\n"); - bcatcstr(glsl, "};\n"); + + + if (psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME) + { + std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name); + bformata(glsl, "} %s;\n", instanceName.c_str()); + } + else + bcatcstr(glsl, "};\n"); + if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) bformata(glsl, "#endif\n#undef UNITY_UNIFORM\n"); } static void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, uint32_t ui32BindingPoint, const Operand* psOperand, const uint32_t ui32GloballyCoherentAccess, - const uint32_t isRaw, const uint32_t isUAV, const uint32_t stride, bstring glsl) + const uint32_t isRaw, const uint32_t isUAV, const uint32_t hasEmbeddedCounter, const uint32_t stride, bstring glsl) { const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; bstring BufNamebstr = bfromcstr(""); @@ -921,6 +932,9 @@ static void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, uint32_t bformata(glsl, "buffer %s {\n\t", BufName.c_str()); + if (hasEmbeddedCounter) + bformata(glsl, "coherent uint %s_counter;\n\t", BufName.c_str()); + if (isRaw) bcatcstr(glsl, "uint"); else @@ -1201,7 +1215,10 @@ static void TranslateResourceTexture(HLSLCrossCompilerContext* psContext, const { // Need to enable extension (either OES or ARB), but we only need to add it once if (IsESLanguage(psContext->psShader->eTargetLanguage)) + { psContext->RequireExtension("GL_OES_texture_cube_map_array"); + psContext->RequireExtension("GL_EXT_texture_cube_map_array"); + } else psContext->RequireExtension("GL_ARB_texture_cube_map_array"); } @@ -1997,8 +2014,8 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) } if(numViews > 0 && numViews < 10) { - bcatcstr(extensions, "#extension GL_OVR_multiview : require\n"); - bcatcstr(extensions, "#extension GL_OVR_multiview2 : enable\n"); + // multiview2 is required because we have built-in shaders that do eye-dependent work other than just position + bcatcstr(extensions, "#extension GL_OVR_multiview2 : require\n"); if(psShader->eShaderType == VERTEX_SHADER) bformata(glsl, "layout(num_views = %d) in;\n", numViews); @@ -2349,7 +2366,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) }; bformata(tgt, "\tImmCB_%d_%d_%d[%d] = ", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, i); if (fpcheck(val[chunk.second.m_Rebase])) - bformata(tgt, "uintBitsToFloat(uint(%Xu))", *(uint32_t *)&val[chunk.second.m_Rebase]); + bformata(tgt, "uintBitsToFloat(uint(0x%Xu))", *(uint32_t *)&val[chunk.second.m_Rebase]); else HLSLcc::PrintFloat(tgt, val[chunk.second.m_Rebase]); bcatcstr(tgt, ";\n"); @@ -2371,7 +2388,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) if (k != 0) bcatcstr(tgt, ", "); if (fpcheck(val[k])) - bformata(tgt, "uintBitsToFloat(uint(%Xu))", *(uint32_t *)&val[k + chunk.second.m_Rebase]); + bformata(tgt, "uintBitsToFloat(uint(0x%Xu))", *(uint32_t *)&val[k + chunk.second.m_Rebase]); else HLSLcc::PrintFloat(tgt, val[k + chunk.second.m_Rebase]); } @@ -2807,6 +2824,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: { const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; + const bool avoidAtomicCounter = (psContext->flags & HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS) != 0; if(psDecl->sUAV.bCounter) { if (isVulkan) @@ -2815,6 +2833,14 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) GLSLCrossDependencyData::VulkanResourceBinding uavBinding = psContext->psDependencies->GetVulkanResourceBinding(uavname, true); GLSLCrossDependencyData::VulkanResourceBinding counterBinding = std::make_pair(uavBinding.first, uavBinding.second+1); bformata(glsl, "layout(set = %d, binding = %d) buffer %s_counterBuf { highp uint %s_counter; };\n", counterBinding.first, counterBinding.second, uavname.c_str(), uavname.c_str()); + + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl); + } + else if (avoidAtomicCounter) // no support for atomic counter. We must use atomic functions in SSBO instead. + { + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 1, psDecl->ui32BufferStride, glsl); } else { @@ -2824,12 +2850,18 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) bcatcstr(glsl, "highp "); bcatcstr(glsl, "atomic_uint "); ResourceName(glsl, psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); - bformata(glsl, "_counter; \n"); + bcatcstr(glsl, "_counter; \n"); + + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl); } } + else + { + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl); + } - DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], - psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, psDecl->ui32BufferStride, glsl); break; } case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: @@ -2856,20 +2888,20 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) } DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], - psDecl->sUAV.ui32GloballyCoherentAccess, 1, 1, psDecl->ui32BufferStride, glsl); + psDecl->sUAV.ui32GloballyCoherentAccess, 1, 1, 0, psDecl->ui32BufferStride, glsl); break; } case OPCODE_DCL_RESOURCE_STRUCTURED: { DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], - psDecl->sUAV.ui32GloballyCoherentAccess, 0, 0, psDecl->ui32BufferStride, glsl); + psDecl->sUAV.ui32GloballyCoherentAccess, 0, 0, 0, psDecl->ui32BufferStride, glsl); break; } case OPCODE_DCL_RESOURCE_RAW: { DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], - psDecl->sUAV.ui32GloballyCoherentAccess, 1, 0, psDecl->ui32BufferStride, glsl); + psDecl->sUAV.ui32GloballyCoherentAccess, 1, 0, 0, psDecl->ui32BufferStride, glsl); break; } case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: diff --git a/src/toGLSLInstruction.cpp b/src/toGLSLInstruction.cpp index 18092e8..7aca1a8 100644 --- a/src/toGLSLInstruction.cpp +++ b/src/toGLSLInstruction.cpp @@ -1925,6 +1925,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals bstring glsl = *psContext->currentGLSLString; int numParenthesis = 0; const bool isVulkan = ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0); + const bool avoidAtomicCounter = ((psContext->flags & HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS) != 0); if (!isEmbedded) { @@ -2130,10 +2131,10 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } else { - // Do component-wise and, glsl doesn't support && on bvecs + // Do component-wise and, glsl doesn't support || on bvecs for (uint32_t k = 0; k < 4; k++) { - if ((destMask && (1 << k)) == 0) + if ((destMask & (1 << k)) == 0) continue; int needsParenthesis = 0; @@ -2341,8 +2342,20 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals bcatcstr(glsl, "//UDIV\n"); #endif //destQuotient, destRemainder, src0, src1 - CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); - CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); + + // There are cases where destQuotient is the same variable as src0 or src1. If that happens, + // we need to compute "%" before the "/" in order to avoid src0 or src1 being overriden first. + if ((psInst->asOperands[0].eType != psInst->asOperands[2].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[2].ui32RegisterNumber) + && (psInst->asOperands[0].eType != psInst->asOperands[3].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[3].ui32RegisterNumber)) + { + CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); + CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); + } + else + { + CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); + CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); + } break; } case OPCODE_DIV: @@ -3673,6 +3686,8 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals break; default: ASSERT(0); + // Suppress uninitialised variable warning + srcDataType = SVT_VOID; break; } @@ -4010,13 +4025,13 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals #endif psContext->AddIndentation(); AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); - if (isVulkan) + if (isVulkan || avoidAtomicCounter) bcatcstr(glsl, "atomicAdd("); else bcatcstr(glsl, "atomicCounterIncrement("); ResourceName(glsl, psContext, RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber, 0); bformata(glsl, "_counter"); - if (isVulkan) + if (isVulkan || avoidAtomicCounter) bcatcstr(glsl, ", 1u)"); else bcatcstr(glsl, ")"); @@ -4031,13 +4046,13 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals #endif psContext->AddIndentation(); AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); - if (isVulkan) + if (isVulkan || avoidAtomicCounter) bcatcstr(glsl, "(atomicAdd("); else bcatcstr(glsl, "atomicCounterDecrement("); ResourceName(glsl, psContext, RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber, 0); bformata(glsl, "_counter"); - if (isVulkan) + if (isVulkan || avoidAtomicCounter) bcatcstr(glsl, ", 0xffffffffu) + 0xffffffffu)"); else bcatcstr(glsl, ")"); diff --git a/src/toGLSLOperand.cpp b/src/toGLSLOperand.cpp index b30b88d..d96365d 100644 --- a/src/toGLSLOperand.cpp +++ b/src/toGLSLOperand.cpp @@ -16,11 +16,13 @@ using namespace HLSLcc; +#ifndef fpcheck #ifdef _MSC_VER #define fpcheck(x) (_isnan(x) || !_finite(x)) #else #define fpcheck(x) (std::isnan(x) || std::isinf(x)) #endif +#endif // #ifndef fpcheck // Returns nonzero if types are just different precisions of the same underlying type @@ -434,8 +436,19 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan if (psOperand->eType == OPERAND_TYPE_INPUT) { // Check for scalar - if (psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask() - && psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + // You would think checking would be easy but there is a caveat: + // checking abScalarInput might report as scalar, while in reality that was redirected and now is vector so swizzle must be preserved + // as an example consider we have input: + // float2 x; float y; + // and later on we do + // tex2D(xxx, fixed2(x.x, y)); + // in that case we will generate redirect but which ui32RegisterNumber will be used for it is not strictly "specified" + // so we may end up with treating it as scalar (even though it is vector now) + const int redirectInput = psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber]; + const bool wasRedirected = redirectInput == 0xFF || redirectInput == 0xFE; + + const int scalarInput = psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber]; + if (!wasRedirected && (scalarInput & psOperand->GetAccessMask()) && (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)) { scalarWithSwizzle = 1; *pui32IgnoreSwizzle = 1; @@ -884,197 +897,177 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan //Work out the variable name. Don't apply swizzle to that variable yet. int32_t rebase = 0; - if(psCBuf) + ASSERT(psCBuf != NULL); + + uint32_t componentsNeeded = 1; + uint32_t minSwiz = 3; + uint32_t maxSwiz = 0; + if (psOperand->eSelMode != OPERAND_4_COMPONENT_SELECT_1_MODE) { - uint32_t componentsNeeded = 1; - uint32_t minSwiz = 3; - uint32_t maxSwiz = 0; - if (psOperand->eSelMode != OPERAND_4_COMPONENT_SELECT_1_MODE) + int i; + for (i = 0; i < 4; i++) { - int i; - for (i = 0; i < 4; i++) - { - if ((ui32CompMask & (1 << i)) == 0) - continue; - minSwiz = std::min(minSwiz, psOperand->aui32Swizzle[i]); - maxSwiz = std::max(maxSwiz, psOperand->aui32Swizzle[i]); - } - componentsNeeded = maxSwiz - minSwiz + 1; + if ((ui32CompMask & (1 << i)) == 0) + continue; + minSwiz = std::min(minSwiz, psOperand->aui32Swizzle[i]); + maxSwiz = std::max(maxSwiz, psOperand->aui32Swizzle[i]); + } + componentsNeeded = maxSwiz - minSwiz + 1; + } + else + { + minSwiz = maxSwiz = 1; + } + + // When we have a component mask that doesn't have .x set (this basically only happens when we manually open operands into components) + // We have to pull down the swizzle array to match the first bit that's actually set + uint32_t tmpSwizzle[4] = { 0 }; + int firstBitSet = 0; + if (ui32CompMask == 0) + ui32CompMask = 0xf; + while ((ui32CompMask & (1 << firstBitSet)) == 0) + firstBitSet++; + std::copy(&psOperand->aui32Swizzle[firstBitSet], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); + + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags); + + // Get a possible dynamic array index + bstring dynamicIndex = bfromcstr(""); + bool needsIndexCalcRevert = false; + bool isAoS = ((!isArray && arrayIndices.size() > 0) || (isArray && arrayIndices.size() > 1)); + + Operand *psDynIndexOp = psOperand->GetDynamicIndexOperand(psContext, psVarType, isAoS, &needsIndexCalcRevert); + + if (psDynIndexOp != NULL) + { + SHADER_VARIABLE_TYPE eType = psDynIndexOp->GetDataType(psContext); + uint32_t opFlags = TO_FLAG_INTEGER; + + if (eType != SVT_INT && eType != SVT_UINT) + opFlags = TO_AUTO_BITCAST_TO_INT; + + TranslateOperand(dynamicIndex, psDynIndexOp, opFlags); + } + + char *tmp = bstr2cstr(dynamicIndex, '\0'); + std::string dynamicIndexStr = tmp; + bcstrfree(tmp); + bdestroy(dynamicIndex); + + if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || ((componentsNeeded+minSwiz) <= psVarType->Columns)) + { + // Simple case: just access one component + std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); + + if ((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME) && psCBuf) + { + std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name); + bformata(glsl, "%s.", instanceName.c_str()); + } + + if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS))) + { + // We'll need to add the prefix only to the last section of the name + size_t commaPos = fullName.find_last_of('.'); + char prefix[256]; + sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, psVarType->Rows, psVarType->Columns); + if (commaPos == std::string::npos) + fullName.insert(0, prefix); + else + fullName.insert(commaPos + 1, prefix); + + bformata(glsl, "%s", fullName.c_str()); } else + bformata(glsl, "%s", fullName.c_str()); + } + else + { + // Non-simple case: build vec4 and apply mask + + std::string instanceNamePrefix; + if ((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME) && psCBuf) { - minSwiz = maxSwiz = 1; + std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name); + instanceNamePrefix = instanceName + "."; } - // When we have a component mask that doesn't have .x set (this basically only happens when we manually open operands into components) - // We have to pull down the swizzle array to match the first bit that's actually set - uint32_t tmpSwizzle[4] = { 0 }; - int firstBitSet = 0; - if (ui32CompMask == 0) - ui32CompMask = 0xf; - while ((ui32CompMask & (1 << firstBitSet)) == 0) - firstBitSet++; - std::copy(&psOperand->aui32Swizzle[firstBitSet], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); + uint32_t i; + std::vector tmpArrayIndices; + bool tmpIsArray; + int32_t tmpRebase; + int firstItemAdded = 0; - ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags); - if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || ((componentsNeeded+minSwiz) <= psVarType->Columns)) + bformata(glsl, "%s(", GetConstructorForType(psContext, psVarType->Type, GetNumberBitsSet(ui32CompMask), false)); + for (i = 0; i < 4; i++) { - // Simple case: just access one component - std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices); + const ShaderVarType *tmpVarType = NULL; + if ((ui32CompMask & (1 << i)) == 0) + continue; + tmpRebase = 0; + if (firstItemAdded != 0) + bcatcstr(glsl, ", "); + else + firstItemAdded = 1; - if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS))) + memset(tmpSwizzle, 0, sizeof(uint32_t) * 4); + std::copy(&psOperand->aui32Swizzle[i], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); + + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags); + std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); + + if (tmpVarType->Class == SVC_SCALAR) { - // We'll need to add the prefix only to the last section of the name - size_t commaPos = fullName.find_last_of('.'); - char prefix[256]; - sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, psVarType->Rows, psVarType->Columns); - if (commaPos == std::string::npos) - fullName.insert(0, prefix); - else - fullName.insert(commaPos + 1, prefix); - - bformata(glsl, "%s", fullName.c_str()); + bformata(glsl, "%s%s", instanceNamePrefix.c_str(), fullName.c_str()); } else - bformata(glsl, "%s", fullName.c_str()); - } - else - { - // Non-simple case: build vec4 and apply mask - uint32_t i; - std::vector tmpArrayIndices; - bool tmpIsArray; - int32_t tmpRebase; - int firstItemAdded = 0; - - bformata(glsl, "%s(", GetConstructorForType(psContext, psVarType->Type, GetNumberBitsSet(ui32CompMask), false)); - for (i = 0; i < 4; i++) { - const ShaderVarType *tmpVarType = NULL; - if ((ui32CompMask & (1 << i)) == 0) - continue; - tmpRebase = 0; - if (firstItemAdded != 0) - bcatcstr(glsl, ", "); - else - firstItemAdded = 1; + uint32_t swizzle; + tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3 + swizzle = psOperand->aui32Swizzle[i] - tmpRebase; - memset(tmpSwizzle, 0, sizeof(uint32_t) * 4); - std::copy(&psOperand->aui32Swizzle[i], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); - - ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags); - std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices); - - if (tmpVarType->Class == SVC_SCALAR) - { - bformata(glsl, "%s", fullName.c_str()); - } - else - { - uint32_t swizzle; - tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3 - swizzle = psOperand->aui32Swizzle[i] - tmpRebase; - - bformata(glsl, "%s", fullName.c_str()); - bformata(glsl, ".%c", "xyzw"[swizzle]); - } + bformata(glsl, "%s%s", instanceNamePrefix.c_str(), fullName.c_str()); + bformata(glsl, ".%c", "xyzw"[swizzle]); } - bcatcstr(glsl, ")"); - // Clear rebase, we've already done it. - rebase = 0; - // Also swizzle. - *pui32IgnoreSwizzle = 1; } - } - else // We don't have a semantic for this variable, so try the raw dump appoach. - { - ASSERT(0); - //bformata(glsl, "cb%d.data", psOperand->aui32ArraySizes[0]);// - //index = psOperand->aui32ArraySizes[1]; + bcatcstr(glsl, ")"); + // Clear rebase, we've already done it. + rebase = 0; + // Also swizzle. + *pui32IgnoreSwizzle = 1; } if (isArray) + { index = arrayIndices.back(); - //Dx9 only? - if(psOperand->m_SubOperands[0].get() != NULL) - { - // Array of matrices is treated as array of vec4s in HLSL, - // but that would mess up uniform types in GLSL. Do gymnastics. - uint32_t opFlags = TO_FLAG_INTEGER; + // Dynamic index is atm supported only at the root array level. Add here only if there is no such parent. + bool hasDynamicIndex = !dynamicIndexStr.empty() && (arrayIndices.size() <= 1); + bool hasImmediateIndex = (index != -1) && !(hasDynamicIndex && index == 0); - if (((psVarType->Class == SVC_MATRIX_COLUMNS) || (psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) - { - // Special handling for matrix arrays - bcatcstr(glsl, "[("); - TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags); - bformata(glsl, ") / 4]"); - { - bcatcstr(glsl, "[(("); - TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags, OPERAND_4_COMPONENT_MASK_X); - bformata(glsl, ") %% 4)]"); - } - } - else - { - bcatcstr(glsl, "["); - TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags); - bformata(glsl, "]"); - } - } - else - if(index != -1 && psOperand->m_SubOperands[1].get() != NULL) - { - // Array of matrices is treated as array of vec4s in HLSL, - // but that would mess up uniform types in GLSL. Do gymnastics. - SHADER_VARIABLE_TYPE eType = psOperand->m_SubOperands[1].get()->GetDataType(psContext); - uint32_t opFlags = TO_FLAG_INTEGER; - if (eType != SVT_INT && eType != SVT_UINT) - opFlags = TO_AUTO_BITCAST_TO_INT; - - if (((psVarType->Class == SVC_MATRIX_COLUMNS) ||( psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) - { - // Special handling for matrix arrays - bcatcstr(glsl, "[("); - TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags); - bformata(glsl, " + %d) / 4]", index); - { - bcatcstr(glsl, "[(("); - TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags); - bformata(glsl, " + %d) %% 4)]", index); - } - } - else - { - bcatcstr(glsl, "["); - TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags); - if (index != 0) - bformata(glsl, " + %d]", index); - else - bcatcstr(glsl, "]"); - } - } - else if(index != -1) - { - if (((psVarType->Class == SVC_MATRIX_COLUMNS) || (psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) - { - // Special handling for matrix arrays, open them up into vec4's - size_t matidx = index / 4; - size_t rowidx = index - (matidx*4); - bformata(glsl, "[%d][%d]", matidx, rowidx); - } - else - { - bformata(glsl, "[%d]", index); - } - } - else if(psOperand->m_SubOperands[1].get() != NULL) - { - bcatcstr(glsl, "["); - TranslateOperand(psOperand->m_SubOperands[1].get(), TO_FLAG_INTEGER); - bcatcstr(glsl, "]"); - } + if (hasDynamicIndex || hasImmediateIndex) + { + std::ostringstream fullIndexOss; + if (hasDynamicIndex && hasImmediateIndex) + fullIndexOss << "(" << dynamicIndexStr << " + " << index << ")"; + else if (hasDynamicIndex) + fullIndexOss << dynamicIndexStr; + else // hasImmediateStr + fullIndexOss << index; + if (((psVarType->Class == SVC_MATRIX_COLUMNS) || (psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) + { + // Special handling for old matrix arrays + bformata(glsl, "[%s / 4]", fullIndexOss.str().c_str()); + bformata(glsl, "[%s %% 4]", fullIndexOss.str().c_str()); + } + else // This path is atm the default + { + bformata(glsl, "[%s]", fullIndexOss.str().c_str()); + } + } + } + if(psVarType && psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle) { switch(rebase) @@ -1642,3 +1635,41 @@ void ConcatTextureSamplerName(bstring str, ShaderInfo* psShaderInfo, const uint3 std::string texturesamplername = TextureSamplerName(psShaderInfo, ui32TextureRegisterNumber, ui32SamplerRegisterNumber, bZCompare); bcatcstr(str, texturesamplername.c_str()); } + +// Take an uniform buffer name and generate an instance name. +std::string UniformBufferInstanceName(HLSLCrossCompilerContext* psContext, const std::string& name) +{ + if (name == "$Globals") + { + char prefix = 'A'; + // Need to tweak Globals struct name to prevent clashes between shader stages + switch (psContext->psShader->eShaderType) + { + default: + ASSERT(0); + break; + case COMPUTE_SHADER: + prefix = 'C'; + break; + case VERTEX_SHADER: + prefix = 'V'; + break; + case PIXEL_SHADER: + prefix = 'P'; + break; + case GEOMETRY_SHADER: + prefix = 'G'; + break; + case HULL_SHADER: + prefix = 'H'; + break; + case DOMAIN_SHADER: + prefix = 'D'; + break; + } + + return std::string("_") + prefix + name.substr(1); + } + else + return std::string("_") + name; +} diff --git a/src/toMetalDeclaration.cpp b/src/toMetalDeclaration.cpp index 3a1216b..538972c 100644 --- a/src/toMetalDeclaration.cpp +++ b/src/toMetalDeclaration.cpp @@ -5,13 +5,15 @@ #include "internal_includes/Declaration.h" #include #include +#include +#ifndef fpcheck #ifdef _MSC_VER #define fpcheck(x) (_isnan(x) || !_finite(x)) #else -#include -#define fpcheck(x) ((std::isnan(x)) || (std::isinf(x))) +#define fpcheck(x) (std::isnan(x) || std::isinf(x)) #endif +#endif // #ifndef fpcheck bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix) @@ -34,6 +36,8 @@ bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::I case NAME_RENDER_TARGET_ARRAY_INDEX: result = "mtl_Layer"; if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; return true; case NAME_CLIP_DISTANCE: { @@ -145,13 +149,8 @@ void ToMetal::DeclareBuiltinInput(const Declaration *psDecl) m_StructDefinitions[""].m_Members.push_back("float4 mtl_FragCoord [[ position ]]"); break; case NAME_RENDER_TARGET_ARRAY_INDEX: -#if 0 // Only supported on a Mac m_StructDefinitions[""].m_Members.push_back("uint mtl_Layer [[ render_target_array_index ]]"); -#else - // Not on Metal - ASSERT(0); -#endif break; case NAME_CLIP_DISTANCE: ASSERT(0); // Should never be an input @@ -267,13 +266,8 @@ void ToMetal::DeclareBuiltinOutput(const Declaration *psDecl) m_StructDefinitions[out].m_Members.push_back("float4 mtl_Position [[ position ]]"); break; case NAME_RENDER_TARGET_ARRAY_INDEX: -#if 0 // Only supported on a Mac m_StructDefinitions[out].m_Members.push_back("uint mtl_Layer [[ render_target_array_index ]]"); -#else - // Not on Metal - ASSERT(0); -#endif break; case NAME_CLIP_DISTANCE: // it will be done separately in DeclareClipPlanes @@ -663,8 +657,15 @@ static std::string TranslateResourceDeclaration(HLSLCrossCompilerContext* psCont } } } - if (eDimension == RESOURCE_DIMENSION_BUFFER) - access = "read"; + switch (eDimension) + { + case RESOURCE_DIMENSION_BUFFER: + case RESOURCE_DIMENSION_TEXTURE2DMS: + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + access = "read"; + default: + break; + } } SHADER_VARIABLE_TYPE svtType = HLSLcc::ResourceReturnTypeToSVTType(eType, ePrec); @@ -768,19 +769,19 @@ static std::string GetInterpolationString(INTERPOLATION_MODE eMode) return ""; case INTERPOLATION_LINEAR_CENTROID: - return " [[ centroid ]]"; + return " [[ centroid_perspective ]]"; case INTERPOLATION_LINEAR_NOPERSPECTIVE: - return " [[ center_perspective ]]"; + return " [[ center_no_perspective ]]"; case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: - return " [[ centroid_noperspective ]]"; + return " [[ centroid_no_perspective ]]"; case INTERPOLATION_LINEAR_SAMPLE: return " [[ sample_perspective ]]"; case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: - return " [[ sample_noperspective ]]"; + return " [[ sample_no_perspective ]]"; default: ASSERT(0); return ""; @@ -801,9 +802,17 @@ void ToMetal::DeclareStructVariable(const std::string &parentName, const ShaderV if (var.Class == SVC_STRUCT) { - std::ostringstream oss; if (m_StructDefinitions.find(var.name + "_Type") == m_StructDefinitions.end()) DeclareStructType(var.name + "_Type", var.Members, withinCB, cumulativeOffset + var.Offset); + + // Report Array-of-Struct CB top-level struct var after all members are reported. + if (var.Parent == NULL && var.Elements > 1 && withinCB) + { + // var.Type being SVT_VOID indicates it is a struct in this case. + psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, var.Rows, var.Columns, false, var.Elements); + } + + std::ostringstream oss; oss << var.name << "_Type " << var.name; if (var.Elements > 1) { @@ -1197,6 +1206,13 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) m_StructDefinitions[""].m_Members.push_back(oss.str()); break; } + if (psOperand->eSpecialName == NAME_RENDER_TARGET_ARRAY_INDEX) + { + std::ostringstream oss; + oss << "uint " << name << " [[ render_target_array_index ]]"; + m_StructDefinitions[""].m_Members.push_back(oss.str()); + break; + } if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED) { std::ostringstream oss; @@ -1484,7 +1500,7 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d }; if (fpcheck(val[chunk.second.m_Rebase])) - bformata(glsl, "\tas_type(%Xu)", *(uint32_t *)&val[chunk.second.m_Rebase]); + bformata(glsl, "\tas_type(0x%Xu)", *(uint32_t *)&val[chunk.second.m_Rebase]); else { bcatcstr(glsl, "\t"); @@ -1511,7 +1527,7 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) if (k != 0) bcatcstr(glsl, ", "); if (fpcheck(val[k])) - bformata(glsl, "as_type(%Xu)", *(uint32_t *)&val[k + chunk.second.m_Rebase]); + bformata(glsl, "as_type(0x%Xu)", *(uint32_t *)&val[k + chunk.second.m_Rebase]); else HLSLcc::PrintFloat(glsl, val[k + chunk.second.m_Rebase]); } diff --git a/src/toMetalInstruction.cpp b/src/toMetalInstruction.cpp index f8c94f0..b581e3e 100644 --- a/src/toMetalInstruction.cpp +++ b/src/toMetalInstruction.cpp @@ -6,6 +6,7 @@ #include "stdio.h" #include #include +#include #include "internal_includes/debug.h" #include "internal_includes/Shader.h" #include "internal_includes/Instruction.h" @@ -175,8 +176,8 @@ void ToMetal::AddComparison(Instruction* psInst, ComparisonType eType, int needsParenthesis = 0; if (typeFlag == TO_FLAG_NONE - && psInst->asOperands[1].GetDataType(psContext) == SVT_FLOAT16 - && psInst->asOperands[1].GetDataType(psContext) == SVT_FLOAT16) + && CanForceToHalfOperand(&psInst->asOperands[1]) + && CanForceToHalfOperand(&psInst->asOperands[2])) typeFlag = TO_FLAG_FORCE_HALF; ASSERT(s0ElemCount == s1ElemCount || s1ElemCount == 1 || s0ElemCount == 1); if ((s0ElemCount != s1ElemCount) && (destElemCount > 1)) @@ -251,6 +252,25 @@ void ToMetal::AddComparison(Instruction* psInst, ComparisonType eType, } } +bool ToMetal::CanForceToHalfOperand(const Operand *psOperand) +{ + if (psOperand->GetDataType(psContext) == SVT_FLOAT16) + return true; + + if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER) + { + for (int i = 0; i < psOperand->iNumComponents; i++) + { + float val = fabs(psOperand->afImmediates[i]); + // Do not allow forcing immediate value to half if value is beyond half min/max boundaries + if (val != 0 && (val > 65504 || val < 6.10352e-5)) + return false; + } + return true; + } + + return false; +} void ToMetal::AddMOVBinaryOp(const Operand *pDest, Operand *pSrc) { @@ -392,10 +412,13 @@ void ToMetal::CallBinaryOp(const char* name, Instruction* psInst, int needsParenthesis = 0; if (eDataType == SVT_FLOAT - && psInst->asOperands[dest].GetDataType(psContext) == SVT_FLOAT16 - && psInst->asOperands[src0].GetDataType(psContext) == SVT_FLOAT16 - && psInst->asOperands[src1].GetDataType(psContext) == SVT_FLOAT16) + && CanForceToHalfOperand(&psInst->asOperands[dest]) + && CanForceToHalfOperand(&psInst->asOperands[src0]) + && CanForceToHalfOperand(&psInst->asOperands[src1])) + { ui32Flags = TO_FLAG_FORCE_HALF; + eDataType = SVT_FLOAT16; + } uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); if (src1SwizCount != src0SwizCount) @@ -436,11 +459,11 @@ void ToMetal::CallTernaryOp(const char* op1, const char* op2, Instruction* psIns int numParenthesis = 0; if (dataType == TO_FLAG_NONE - && psInst->asOperands[dest].GetDataType(psContext) == SVT_FLOAT16 - && psInst->asOperands[src0].GetDataType(psContext) == SVT_FLOAT16 - && psInst->asOperands[src1].GetDataType(psContext) == SVT_FLOAT16 - && psInst->asOperands[src2].GetDataType(psContext) == SVT_FLOAT16) - ui32Flags = TO_FLAG_FORCE_HALF; + && CanForceToHalfOperand(&psInst->asOperands[dest]) + && CanForceToHalfOperand(&psInst->asOperands[src0]) + && CanForceToHalfOperand(&psInst->asOperands[src1]) + && CanForceToHalfOperand(&psInst->asOperands[src2])) + ui32Flags = dataType = TO_FLAG_FORCE_HALF; if (src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) { @@ -472,10 +495,10 @@ void ToMetal::CallHelper3(const char* name, Instruction* psInst, uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); int numParenthesis = 0; - if (psInst->asOperands[dest].GetDataType(psContext) == SVT_FLOAT16 - && psInst->asOperands[src0].GetDataType(psContext) == SVT_FLOAT16 - && psInst->asOperands[src1].GetDataType(psContext) == SVT_FLOAT16 - && psInst->asOperands[src2].GetDataType(psContext) == SVT_FLOAT16) + if (CanForceToHalfOperand(&psInst->asOperands[dest]) + && CanForceToHalfOperand(&psInst->asOperands[src0]) + && CanForceToHalfOperand(&psInst->asOperands[src1]) + && CanForceToHalfOperand(&psInst->asOperands[src2])) ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; if ((src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) @@ -511,9 +534,9 @@ void ToMetal::CallHelper2(const char* name, Instruction* psInst, int isDotProduct = (strncmp(name, "dot", 3) == 0) ? 1 : 0; int numParenthesis = 0; - if (psInst->asOperands[dest].GetDataType(psContext) == SVT_FLOAT16 - && psInst->asOperands[src0].GetDataType(psContext) == SVT_FLOAT16 - && psInst->asOperands[src1].GetDataType(psContext) == SVT_FLOAT16) + if (CanForceToHalfOperand(&psInst->asOperands[dest]) + && CanForceToHalfOperand(&psInst->asOperands[src0]) + && CanForceToHalfOperand(&psInst->asOperands[src1])) ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; @@ -604,8 +627,8 @@ void ToMetal::CallHelper1(const char* name, Instruction* psInst, int numParenthesis = 0; psContext->AddIndentation(); - if (psInst->asOperands[dest].GetDataType(psContext) == SVT_FLOAT16 - && psInst->asOperands[src0].GetDataType(psContext) == SVT_FLOAT16) + if (CanForceToHalfOperand(&psInst->asOperands[dest]) + && CanForceToHalfOperand(&psInst->asOperands[src0])) ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, dstSwizCount, &numParenthesis); @@ -2238,8 +2261,20 @@ void ToMetal::TranslateInstruction(Instruction* psInst) bcatcstr(glsl, "//UDIV\n"); #endif //destQuotient, destRemainder, src0, src1 - CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); - CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); + + // There are cases where destQuotient is the same variable as src0 or src1. If that happens, + // we need to compute "%" before the "/" in order to avoid src0 or src1 being overriden first. + if ((psInst->asOperands[0].eType != psInst->asOperands[2].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[2].ui32RegisterNumber) + && (psInst->asOperands[0].eType != psInst->asOperands[3].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[3].ui32RegisterNumber)) + { + CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); + CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); + } + else + { + CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); + CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); + } break; } case OPCODE_DIV: @@ -2299,8 +2334,8 @@ void ToMetal::TranslateInstruction(Instruction* psInst) psContext->AddIndentation(); SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); uint32_t typeFlags = TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2; - if (psInst->asOperands[1].GetDataType(psContext) == SVT_FLOAT16 - && psInst->asOperands[2].GetDataType(psContext) == SVT_FLOAT16) + if (CanForceToHalfOperand(&psInst->asOperands[1]) + && CanForceToHalfOperand(&psInst->asOperands[2])) typeFlags = TO_FLAG_FORCE_HALF | TO_AUTO_EXPAND_TO_VEC2; if (dstType != SVT_FLOAT16) @@ -2325,8 +2360,8 @@ void ToMetal::TranslateInstruction(Instruction* psInst) psContext->AddIndentation(); SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); uint32_t typeFlags = TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3; - if (psInst->asOperands[1].GetDataType(psContext) == SVT_FLOAT16 - && psInst->asOperands[2].GetDataType(psContext) == SVT_FLOAT16) + if (CanForceToHalfOperand(&psInst->asOperands[1]) + && CanForceToHalfOperand(&psInst->asOperands[2])) typeFlags = TO_FLAG_FORCE_HALF | TO_AUTO_EXPAND_TO_VEC3; if (dstType != SVT_FLOAT16) @@ -2795,7 +2830,7 @@ void ToMetal::TranslateInstruction(Instruction* psInst) DeclareExtraFunction("BFI", "\ template UVecType bitFieldInsert(const UVecType width, const UVecType offset, const UVecType src2, const UVecType src3)\n\ {\n\ - UVecType bitmask = (((1 << width)-1) << offset) & 0xffffffff;\n\ + UVecType bitmask = (((UVecType(1) << width)-1) << offset) & 0xffffffff;\n\ return ((src2 << offset) & bitmask) | (src3 & ~bitmask);\n\ }; "); psContext->AddIndentation(); @@ -2971,7 +3006,10 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } } psContext->AddIndentation(); - bformata(glsl, "threadgroup_barrier(mem_flags::%s);\n", barrierFlags); + if (ui32SyncFlags & SYNC_THREADS_IN_GROUP) + bformata(glsl, "threadgroup_barrier(mem_flags::%s);\n", barrierFlags); + else + bformata(glsl, "simdgroup_barrier(mem_flags::%s);\n", barrierFlags); break; } @@ -3215,60 +3253,79 @@ void ToMetal::TranslateInstruction(Instruction* psInst) case OPCODE_LD_UAV_TYPED: { #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LD_UAV_TYPED\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_UAV_TYPED\n"); #endif - Operand* psDest = &psInst->asOperands[0]; - Operand* psSrc = &psInst->asOperands[2]; - Operand* psSrcAddr = &psInst->asOperands[1]; + Operand* psDest = &psInst->asOperands[0]; + Operand* psSrc = &psInst->asOperands[2]; + Operand* psSrcAddr = &psInst->asOperands[1]; - const ResourceBinding* psRes = 0; - psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psSrc->ui32RegisterNumber, &psRes); - SHADER_VARIABLE_TYPE srcDataType = ResourceReturnTypeToSVTType(psRes->ui32ReturnType, psRes->ePrecision); + const ResourceBinding* psRes = 0; + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psSrc->ui32RegisterNumber, &psRes); + SHADER_VARIABLE_TYPE srcDataType = ResourceReturnTypeToSVTType(psRes->ui32ReturnType, psRes->ePrecision); - if (psInst->eResDim == RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf - { - psSrc->aeDataType[0] = srcDataType; - psSrcAddr->eSelMode = OPERAND_4_COMPONENT_SELECT_1_MODE; - if (psSrcAddr->eType == OPERAND_TYPE_IMMEDIATE32) - psSrcAddr->iNumComponents = 1; - TranslateShaderStorageLoad(psInst); - break; - } + if (psInst->eResDim == RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf + { + psSrc->aeDataType[0] = srcDataType; + psSrcAddr->eSelMode = OPERAND_4_COMPONENT_SELECT_1_MODE; + if (psSrcAddr->eType == OPERAND_TYPE_IMMEDIATE32) + psSrcAddr->iNumComponents = 1; + TranslateShaderStorageLoad(psInst); + break; + } - int srcCount = psSrc->GetNumSwizzleElements(); - int numParenthesis = 0; - uint32_t compMask = 0; +#define RRD(n) REFLECT_RESOURCE_DIMENSION_ ## n - switch (psInst->eResDim) - { - case RESOURCE_DIMENSION_TEXTURE3D: - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - compMask |= (1 << 2); - case RESOURCE_DIMENSION_TEXTURECUBE: - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - case RESOURCE_DIMENSION_TEXTURE2D: - case RESOURCE_DIMENSION_TEXTURE2DMS: - compMask |= (1 << 1); - case RESOURCE_DIMENSION_TEXTURE1D: - compMask |= 1; - break; - default: - ASSERT(0); - break; - } + // unlike glsl, texture arrays will have index in separate argument + const bool isArray = psRes->eDimension == RRD(TEXTURE1DARRAY) || psRes->eDimension == RRD(TEXTURE2DARRAY) + || psRes->eDimension == RRD(TEXTURE2DMSARRAY) || psRes->eDimension == RRD(TEXTURECUBEARRAY); - psContext->AddIndentation(); - AddAssignToDest(psDest, srcDataType, srcCount, &numParenthesis); - glsl << TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); - bcatcstr(glsl, ".read("); - glsl << TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER, compMask); - bcatcstr(glsl, ")"); - glsl << TranslateOperandSwizzle(&psInst->asOperands[0], OPERAND_4_COMPONENT_MASK_ALL, 0); - AddAssignPrologue(numParenthesis); - break; + uint32_t flags = TO_FLAG_UNSIGNED_INTEGER, opMask = OPERAND_4_COMPONENT_MASK_ALL; + switch (psRes->eDimension) + { + case RRD(TEXTURE3D): + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; + flags |= TO_AUTO_EXPAND_TO_VEC3; + break; + case RRD(TEXTURECUBE): case RRD(TEXTURECUBEARRAY): + case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): case RRD(TEXTURE2D): case RRD(TEXTURE2DMS): + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; + flags |= TO_AUTO_EXPAND_TO_VEC2; + break; + case RRD(TEXTURE1D): case RRD(TEXTURE1DARRAY): + opMask = OPERAND_4_COMPONENT_MASK_X; + break; + default: + ASSERT(0); break; + } + + int srcCount = psSrc->GetNumSwizzleElements(), numParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(psDest, srcDataType, srcCount, &numParenthesis); + glsl << TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); + bcatcstr(glsl, ".read("); + glsl << TranslateOperand(psSrcAddr, flags, opMask); + if(isArray) + { + // NB cube array is handled incorrectly - it needs extra "face" arg + switch (psRes->eDimension) + { + case RRD(TEXTURE1DARRAY): opMask = OPERAND_4_COMPONENT_MASK_Y; break; + case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): opMask = OPERAND_4_COMPONENT_MASK_Z; break; + case RRD(TEXTURECUBEARRAY): opMask = OPERAND_4_COMPONENT_MASK_W; break; + default: ASSERT(0); break; + } + + bcatcstr(glsl, ", "); + glsl << TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER, opMask); + } + bcatcstr(glsl, ")"); + glsl << TranslateOperandSwizzle(&psInst->asOperands[0], OPERAND_4_COMPONENT_MASK_ALL, 0); + AddAssignPrologue(numParenthesis); + +#undef RRD + + break; } case OPCODE_STORE_RAW: { @@ -3291,66 +3348,82 @@ void ToMetal::TranslateInstruction(Instruction* psInst) case OPCODE_STORE_UAV_TYPED: { - const ResourceBinding* psRes; - int foundResource; - uint32_t flags = TO_FLAG_UNSIGNED_INTEGER; - uint32_t opMask = OPERAND_4_COMPONENT_MASK_ALL; + const ResourceBinding* psRes; + int foundResource; + #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//STORE_UAV_TYPED\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_UAV_TYPED\n"); #endif - foundResource = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, - psInst->asOperands[0].ui32RegisterNumber, - &psRes); - ASSERT(foundResource); + foundResource = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, + psInst->asOperands[0].ui32RegisterNumber, + &psRes); + ASSERT(foundResource); - if (psRes->eDimension == REFLECT_RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf - { - psInst->asOperands[0].aeDataType[0] = ResourceReturnTypeToSVTType(psRes->ui32ReturnType, psRes->ePrecision); - psInst->asOperands[1].eSelMode = OPERAND_4_COMPONENT_SELECT_1_MODE; - if (psInst->asOperands[1].eType == OPERAND_TYPE_IMMEDIATE32) - psInst->asOperands[1].iNumComponents = 1; - TranslateShaderStorageStore(psInst); - break; - } + if (psRes->eDimension == REFLECT_RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf + { + psInst->asOperands[0].aeDataType[0] = ResourceReturnTypeToSVTType(psRes->ui32ReturnType, psRes->ePrecision); + psInst->asOperands[1].eSelMode = OPERAND_4_COMPONENT_SELECT_1_MODE; + if (psInst->asOperands[1].eType == OPERAND_TYPE_IMMEDIATE32) + psInst->asOperands[1].iNumComponents = 1; + TranslateShaderStorageStore(psInst); + break; + } - psContext->AddIndentation(); + psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_NAME_ONLY); - bcatcstr(glsl, ".write("); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_NAME_ONLY); + bcatcstr(glsl, ".write("); - switch (psRes->eDimension) - { - case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: - opMask = OPERAND_4_COMPONENT_MASK_X; - break; - case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: - case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; - flags |= TO_AUTO_EXPAND_TO_VEC2; - break; - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; - flags |= TO_AUTO_EXPAND_TO_VEC3; - break; - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: - flags |= TO_AUTO_EXPAND_TO_VEC4; - break; - default: - ASSERT(0); - break; - }; + #define RRD(n) REFLECT_RESOURCE_DIMENSION_ ## n - glsl << TranslateOperand(&psInst->asOperands[2], ResourceReturnTypeToFlag(psRes->ui32ReturnType)); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[1], flags, opMask); - bformata(glsl, ");\n"); + // unlike glsl, texture arrays will have index in separate argument + const bool isArray = psRes->eDimension == RRD(TEXTURE1DARRAY) || psRes->eDimension == RRD(TEXTURE2DARRAY) + || psRes->eDimension == RRD(TEXTURE2DMSARRAY) || psRes->eDimension == RRD(TEXTURECUBEARRAY); - break; + uint32_t flags = TO_FLAG_UNSIGNED_INTEGER, opMask = OPERAND_4_COMPONENT_MASK_ALL; + switch (psRes->eDimension) + { + case RRD(TEXTURE1D): case RRD(TEXTURE1DARRAY): + opMask = OPERAND_4_COMPONENT_MASK_X; + break; + case RRD(TEXTURE2D): case RRD(TEXTURE2DMS): case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; + flags |= TO_AUTO_EXPAND_TO_VEC2; + break; + case RRD(TEXTURE3D): case RRD(TEXTURECUBE): case RRD(TEXTURECUBEARRAY): + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; + flags |= TO_AUTO_EXPAND_TO_VEC3; + break; + default: + ASSERT(0); + break; + }; + + + glsl << TranslateOperand(&psInst->asOperands[2], ResourceReturnTypeToFlag(psRes->ui32ReturnType)); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], flags, opMask); + if(isArray) + { + // NB cube array is handled incorrectly - it needs extra "face" arg + flags = TO_FLAG_UNSIGNED_INTEGER; + switch (psRes->eDimension) + { + case RRD(TEXTURE1DARRAY): opMask = OPERAND_4_COMPONENT_MASK_Y; break; + case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY):opMask = OPERAND_4_COMPONENT_MASK_Z; break; + case RRD(TEXTURECUBEARRAY): opMask = OPERAND_4_COMPONENT_MASK_Z; break; + default: ASSERT(0); break; + } + + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], flags, opMask); + } + bformata(glsl, ");\n"); + +#undef RRD + + break; } case OPCODE_LD_RAW: { @@ -3639,10 +3712,10 @@ template vec bitFieldExtractI(const vec width, const ve #endif psContext->AddIndentation(); bool isFP16 = false; - if (psInst->asOperands[0].GetDataType(psContext) == SVT_FLOAT16 - && psInst->asOperands[1].GetDataType(psContext) == SVT_FLOAT16 - && psInst->asOperands[2].GetDataType(psContext) == SVT_FLOAT16 - && psInst->asOperands[2].GetDataType(psContext) == SVT_FLOAT16) + if (CanForceToHalfOperand(&psInst->asOperands[0]) + && CanForceToHalfOperand(&psInst->asOperands[1]) + && CanForceToHalfOperand(&psInst->asOperands[2]) + && CanForceToHalfOperand(&psInst->asOperands[2])) isFP16 = true; int parenthesis = 0; AddAssignToDest(&psInst->asOperands[0], isFP16 ? SVT_FLOAT16 : SVT_FLOAT, 2, &parenthesis); @@ -3767,7 +3840,7 @@ template vec bitFieldExtractI(const vec width, const ve const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; psContext->AddIndentation(); AddAssignToDest(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, &numParenthesis); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_NAME_ONLY); + bcatcstr(glsl, TranslateOperand(&psInst->asOperands[1], TO_FLAG_NAME_ONLY).c_str()); bcatcstr(glsl, ".get_num_samples()"); AddAssignPrologue(numParenthesis); break; diff --git a/src/toMetalOperand.cpp b/src/toMetalOperand.cpp index 8e77513..4a02cb0 100644 --- a/src/toMetalOperand.cpp +++ b/src/toMetalOperand.cpp @@ -14,18 +14,22 @@ using namespace HLSLcc; #ifdef _MSC_VER - #if _MSC_VER < 1900 #define snprintf _snprintf #endif +#endif +#ifndef fpcheck +#ifdef _MSC_VER #define fpcheck(x) (_isnan(x) || !_finite(x)) #else #define fpcheck(x) (std::isnan(x) || std::isinf(x)) #endif +#endif // #ifndef fpcheck + // Returns nonzero if types are just different precisions of the same underlying type -static bool AreTypesCompatible(SHADER_VARIABLE_TYPE a, uint32_t ui32TOFlag) +static bool AreTypesCompatibleMetal(SHADER_VARIABLE_TYPE a, uint32_t ui32TOFlag) { SHADER_VARIABLE_TYPE b = TypeFlagsToSVTType(ui32TOFlag); @@ -352,8 +356,19 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui if (psOperand->eType == OPERAND_TYPE_INPUT) { // Check for scalar - if (psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask() - && psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + // You would think checking would be easy but there is a caveat: + // checking abScalarInput might report as scalar, while in reality that was redirected and now is vector so swizzle must be preserved + // as an example consider we have input: + // float2 x; float y; + // and later on we do + // tex2D(xxx, fixed2(x.x, y)); + // in that case we will generate redirect but which ui32RegisterNumber will be used for it is not strictly "specified" + // so we may end up with treating it as scalar (even though it is vector now) + const int redirectInput = psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber]; + const bool wasRedirected = redirectInput == 0xFF || redirectInput == 0xFE; + + const int scalarInput = psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber]; + if (!wasRedirected && (scalarInput & psOperand->GetAccessMask()) && (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)) { scalarWithSwizzle = 1; *pui32IgnoreSwizzle = 1; @@ -384,7 +399,7 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui } bool bitcast = false; - if (AreTypesCompatible(eType, ui32TOFlag) == 0) + if (AreTypesCompatibleMetal(eType, ui32TOFlag) == 0) { if (CanDoDirectCast(eType, requestedType)) { @@ -406,7 +421,15 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui // Add ctor if needed (upscaling). Type conversion is already handled above, so here we must // use the original type to not make type conflicts in bitcasts - if (((numComponents < requestedComponents)||(scalarWithSwizzle != 0)) && (hasCtor == 0 || bitcast)) + bool needsUpscaling = ((numComponents < requestedComponents)||(scalarWithSwizzle != 0)) && (hasCtor == 0 || bitcast); + + // Add constuctor if half precision is forced to avoid template ambiguity error from compiler + bool needsForcedCtor = (ui32TOFlag & TO_FLAG_FORCE_HALF) && (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE64); + + if (needsForcedCtor) + requestedComponents = std::max(requestedComponents, 1); + + if (needsUpscaling || needsForcedCtor) { oss << GetConstructorForType(psContext, eType, requestedComponents, false) << "("; @@ -660,162 +683,144 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui //Work out the variable name. Don't apply swizzle to that variable yet. int32_t rebase = 0; - if(psCBuf) - { - uint32_t componentsNeeded = 1; - if (psOperand->eSelMode != OPERAND_4_COMPONENT_SELECT_1_MODE) - { - uint32_t minSwiz = 3; - uint32_t maxSwiz = 0; - int i; - for (i = 0; i < 4; i++) - { - if ((ui32CompMask & (1 << i)) == 0) - continue; - minSwiz = std::min(minSwiz, psOperand->aui32Swizzle[i]); - maxSwiz = std::max(maxSwiz, psOperand->aui32Swizzle[i]); - } - componentsNeeded = maxSwiz - minSwiz + 1; - } + ASSERT(psCBuf != NULL); - ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags); - if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || (componentsNeeded <= psVarType->Columns)) + uint32_t componentsNeeded = 1; + if (psOperand->eSelMode != OPERAND_4_COMPONENT_SELECT_1_MODE) + { + uint32_t minSwiz = 3; + uint32_t maxSwiz = 0; + int i; + for (i = 0; i < 4; i++) { - // Simple case: just access one component - std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices); + if ((ui32CompMask & (1 << i)) == 0) + continue; + minSwiz = std::min(minSwiz, psOperand->aui32Swizzle[i]); + maxSwiz = std::max(maxSwiz, psOperand->aui32Swizzle[i]); + } + componentsNeeded = maxSwiz - minSwiz + 1; + } + + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags); + + // Get a possible dynamic array index + std::ostringstream dynIndexOss; + bool needsIndexCalcRevert = false; + bool isAoS = ((!isArray && arrayIndices.size() > 0) || (isArray && arrayIndices.size() > 1)); + + Operand *psDynIndexOp = psOperand->GetDynamicIndexOperand(psContext, psVarType, isAoS, &needsIndexCalcRevert); + + if (psDynIndexOp != NULL) + { + SHADER_VARIABLE_TYPE eType = psDynIndexOp->GetDataType(psContext); + uint32_t opFlags = TO_FLAG_INTEGER; + + if (eType != SVT_INT && eType != SVT_UINT) + opFlags = TO_AUTO_BITCAST_TO_INT; + + dynIndexOss << TranslateOperand(psDynIndexOp, opFlags); + } + + std::string dynamicIndexStr = dynIndexOss.str(); + + if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || (componentsNeeded <= psVarType->Columns)) + { + // Simple case: just access one component + std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); - if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS))) - { - // We'll need to add the prefix only to the last section of the name - size_t commaPos = fullName.find_last_of('.'); - char prefix[256]; - sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, psVarType->Rows, psVarType->Columns); - if (commaPos == std::string::npos) - fullName.insert(0, prefix); - else - fullName.insert(commaPos + 1, prefix); - } - - oss << cbName << fullName; - } - else + if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS))) { - // Non-simple case: build vec4 and apply mask - uint32_t i; - int32_t tmpRebase; - std::vector tmpArrayIndices; - bool tmpIsArray; - int firstItemAdded = 0; - - oss << GetConstructorForTypeMetal(psVarType->Type, GetNumberBitsSet(ui32CompMask)) << "("; - for (i = 0; i < 4; i++) - { - const ShaderVarType *tmpVarType = NULL; - if ((ui32CompMask & (1 << i)) == 0) - continue; - tmpRebase = 0; - if (firstItemAdded != 0) - oss << ", "; - else - firstItemAdded = 1; - - uint32_t tmpSwizzle[4] = { 0 }; - std::copy(&psOperand->aui32Swizzle[i], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); - - ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags); - std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices); - - if (tmpVarType->Class == SVC_SCALAR) - { - oss << cbName << fullName; - } - else - { - uint32_t swizzle; - tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3 - swizzle = psOperand->aui32Swizzle[i] - tmpRebase; - - oss << cbName << fullName << "." << ("xyzw"[swizzle]); - } - } - oss << ")"; - // Clear rebase, we've already done it. - rebase = 0; - // Also swizzle. - *pui32IgnoreSwizzle = 1; + // We'll need to add the prefix only to the last section of the name + size_t commaPos = fullName.find_last_of('.'); + char prefix[256]; + sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, psVarType->Rows, psVarType->Columns); + if (commaPos == std::string::npos) + fullName.insert(0, prefix); + else + fullName.insert(commaPos + 1, prefix); } - } - else // We don't have a semantic for this variable, so try the raw dump appoach. - { - ASSERT(0); // We're screwed. -// bformata(glsl, "cb%d.data", psOperand->aui32ArraySizes[0]);// -// index = psOperand->aui32ArraySizes[1]; - } - if (isArray) - index = arrayIndices.back(); - - //Dx9 only? - if (psOperand->m_SubOperands[0].get() != NULL) - { - // Array of matrices is treated as array of vec4s in HLSL, - // but that would mess up uniform types in GLSL. Do gymnastics. - uint32_t opFlags = TO_FLAG_INTEGER; - - if ((psVarType->Class == SVC_MATRIX_COLUMNS || psVarType->Class == SVC_MATRIX_ROWS) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) - { - // Special handling for matrix arrays - oss << "[(" << TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags) << ") / 4]"; - oss << "[((" << TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags, OPERAND_4_COMPONENT_MASK_X) << ") % 4)]"; - } - else - { - oss << "[" << TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags) << "]"; - } + oss << cbName << fullName; } else - if (index != -1 && psOperand->m_SubOperands[1].get() != NULL) - { - // Array of matrices is treated as array of vec4s in HLSL, - // but that would mess up uniform types in GLSL. Do gymnastics. - SHADER_VARIABLE_TYPE eType = psOperand->m_SubOperands[1].get()->GetDataType(psContext); - uint32_t opFlags = TO_FLAG_INTEGER; - if (eType != SVT_INT && eType != SVT_UINT) - opFlags = TO_AUTO_BITCAST_TO_INT; - - if ((psVarType->Class == SVC_MATRIX_COLUMNS || psVarType->Class == SVC_MATRIX_ROWS) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) - { - // Special handling for matrix arrays - oss << "[(" << TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags) << " + " << index <<") / 4]"; - oss << "[((" << TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags, OPERAND_4_COMPONENT_MASK_X) << " + " << index << ") % 4)]"; - } - else - { - if (index != 0) - oss << "[" << TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags) << " + " << index << "]"; - else - oss << "[" << TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags) << "]"; - } - } - else if (index != -1) - { - if ((psVarType->Class == SVC_MATRIX_COLUMNS || psVarType->Class == SVC_MATRIX_ROWS) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) - { - // Special handling for matrix arrays, open them up into vec4's - size_t matidx = index / 4; - size_t rowidx = index - (matidx * 4); - oss << "[" << matidx << "][" << rowidx << "]"; - } - else - { - oss << "[" << index << "]"; - } - } - else if (psOperand->m_SubOperands[1].get() != NULL) { - oss << "[" << TranslateOperand(psOperand->m_SubOperands[1].get(), TO_FLAG_INTEGER) << "]"; + // Non-simple case: build vec4 and apply mask + uint32_t i; + int32_t tmpRebase; + std::vector tmpArrayIndices; + bool tmpIsArray; + int firstItemAdded = 0; + + oss << GetConstructorForTypeMetal(psVarType->Type, GetNumberBitsSet(ui32CompMask)) << "("; + for (i = 0; i < 4; i++) + { + const ShaderVarType *tmpVarType = NULL; + if ((ui32CompMask & (1 << i)) == 0) + continue; + tmpRebase = 0; + if (firstItemAdded != 0) + oss << ", "; + else + firstItemAdded = 1; + + uint32_t tmpSwizzle[4] = { 0 }; + std::copy(&psOperand->aui32Swizzle[i], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); + + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags); + std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); + + if (tmpVarType->Class == SVC_SCALAR) + { + oss << cbName << fullName; + } + else + { + uint32_t swizzle; + tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3 + swizzle = psOperand->aui32Swizzle[i] - tmpRebase; + + oss << cbName << fullName << "." << ("xyzw"[swizzle]); + } + } + oss << ")"; + // Clear rebase, we've already done it. + rebase = 0; + // Also swizzle. + *pui32IgnoreSwizzle = 1; } + + if (isArray) + { + index = arrayIndices.back(); + + // Dynamic index is atm supported only at the root array level. Add here only if there is no such parent. + bool hasDynamicIndex = !dynamicIndexStr.empty() && (arrayIndices.size() <= 1); + bool hasImmediateIndex = (index != -1) && !(hasDynamicIndex && index == 0); + + if (hasDynamicIndex || hasImmediateIndex) + { + std::ostringstream fullIndexOss; + if (hasDynamicIndex && hasImmediateIndex) + fullIndexOss << "(" << dynamicIndexStr << " + " << index << ")"; + else if (hasDynamicIndex) + fullIndexOss << dynamicIndexStr; + else // hasImmediateStr + fullIndexOss << index; + + if (((psVarType->Class == SVC_MATRIX_COLUMNS) || (psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) + { + // Special handling for old matrix arrays + oss << "[" << fullIndexOss.str() << " / 4]"; + oss << "[" << fullIndexOss.str() << " %% 4]"; + } + else // This path is atm the default + { + oss << "[" << fullIndexOss.str() << "]"; + } + } + } + if(psVarType && psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle) { switch(rebase) @@ -974,6 +979,7 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui case OPERAND_TYPE_UNORDERED_ACCESS_VIEW: { oss << ResourceName(RGROUP_UAV, psOperand->ui32RegisterNumber); + *pui32IgnoreSwizzle = 1; break; } case OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY: