Pulled changes from internal unity repo at changeset 42ec4a54d7ca

This commit is contained in:
Mikko Strandborg 2017-07-31 10:46:48 +03:00
Родитель 5f8fc43675
Коммит ab352e4603
17 изменённых файлов: 810 добавлений и 526 удалений

Просмотреть файл

@ -451,6 +451,8 @@ public:
static ResourceGroup ResourceTypeToResourceGroup(ResourceType); static ResourceGroup ResourceTypeToResourceGroup(ResourceType);
static uint32_t GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors, bool wholeArraySize = false);
static int GetShaderVarFromOffset(const uint32_t ui32Vec4Offset, static int GetShaderVarFromOffset(const uint32_t ui32Vec4Offset,
const uint32_t (&pui32Swizzle)[4], const uint32_t (&pui32Swizzle)[4],
const ConstantBuffer* psCBuf, const ConstantBuffer* psCBuf,
@ -460,7 +462,7 @@ public:
int32_t* pi32Rebase, int32_t* pi32Rebase,
uint32_t flags); uint32_t flags);
static std::string GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector<uint32_t> &indices); static std::string GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector<uint32_t> &indices, const std::string dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors);
// Apply shader precision information to resource bindings // Apply shader precision information to resource bindings
void AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info); void AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info);

Просмотреть файл

@ -444,8 +444,8 @@ static const unsigned int HLSLCC_FLAG_VULKAN_BINDINGS = 0x40000;
// If set, metal output will use linear sampler for shadow compares, otherwise point sampler. // If set, metal output will use linear sampler for shadow compares, otherwise point sampler.
static const unsigned int HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR = 0x80000; static const unsigned int HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR = 0x80000;
// If set, emits for NVN, the Nvidia-provided graphics API for Nintendo Switch. // If set, avoid emit atomic counter (ARB_shader_atomic_counters) and use atomic functions provided by ARB_shader_storage_buffer_object instead.
static const unsigned int HLSLCC_FLAG_NVN_TARGET = 0x100000; static const unsigned int HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS = 0x100000;
// If set, and generating Vulkan shaders, attempts to detect static branching and transforms them into specialization constants // If set, and generating Vulkan shaders, attempts to detect static branching and transforms them into specialization constants
static const unsigned int HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS = 0x200000; static const unsigned int HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS = 0x200000;
@ -453,6 +453,12 @@ static const unsigned int HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS = 0x200000
// If set, this shader uses the GLSL extension EXT_shader_framebuffer_fetch // If set, this shader uses the GLSL extension EXT_shader_framebuffer_fetch
static const unsigned int HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH = 0x400000; static const unsigned int HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH = 0x400000;
// Build for Switch.
static const unsigned int HLSLCC_FLAG_NVN_TARGET = 0x800000;
// If set, generate an instance name for constant buffers. GLSL specs 4.5 disallows uniform variables from different constant buffers sharing the same name
// as long as they are part of the same final linked program. Uniform buffer instance names solve this cross-shader symbol conflict issue.
static const unsigned int HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME = 0x1000000;
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {

Просмотреть файл

@ -315,13 +315,6 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext,
MarkOperandAs(&psInst->asOperands[2], SVT_INT_AMBIGUOUS, aeTempVecType); MarkOperandAs(&psInst->asOperands[2], SVT_INT_AMBIGUOUS, aeTempVecType);
break; break;
case OPCODE_AND:
MarkOperandAs(&psInst->asOperands[0], SVT_INT_AMBIGUOUS, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType);
MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType);
break;
case OPCODE_IF: case OPCODE_IF:
case OPCODE_BREAKC: case OPCODE_BREAKC:
case OPCODE_CALLC: case OPCODE_CALLC:
@ -344,12 +337,18 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext,
MarkOperandAs(&psInst->asOperands[2], SVT_UINT, aeTempVecType); MarkOperandAs(&psInst->asOperands[2], SVT_UINT, aeTempVecType);
break; break;
case OPCODE_AND:
case OPCODE_OR:
MarkOperandAs(&psInst->asOperands[0], SVT_INT_AMBIGUOUS, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType);
MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType);
break;
// Integer ops that don't care of signedness // Integer ops that don't care of signedness
case OPCODE_IADD: case OPCODE_IADD:
case OPCODE_INEG: case OPCODE_INEG:
case OPCODE_ISHL: case OPCODE_ISHL:
case OPCODE_NOT: case OPCODE_NOT:
case OPCODE_OR:
case OPCODE_XOR: case OPCODE_XOR:
case OPCODE_BUFINFO: case OPCODE_BUFINFO:
case OPCODE_COUNTBITS: case OPCODE_COUNTBITS:
@ -673,7 +672,9 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext,
} }
} }
if (foundImmediate && dataType == SVT_VOID) // Use at minimum int type when any operand is immediate.
// Allowing bool could lead into bugs like case 883080
if (foundImmediate && (dataType == SVT_VOID || dataType == SVT_BOOL))
dataType = SVT_INT; dataType = SVT_INT;
if (dataType != SVT_VOID) if (dataType != SVT_VOID)

Просмотреть файл

@ -101,7 +101,9 @@ void HLSLCrossCompilerContext::RequireExtension(const std::string &extName)
return; return;
m_EnabledExtensions.insert(extName); m_EnabledExtensions.insert(extName);
bformata(extensions, "#ifdef %s\n", extName.c_str());
bformata(extensions, "#extension %s : require\n", extName.c_str()); bformata(extensions, "#extension %s : require\n", extName.c_str());
bcatcstr(extensions, "#endif\n");
} }
std::string HLSLCrossCompilerContext::GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const std::string HLSLCrossCompilerContext::GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const

Просмотреть файл

@ -457,11 +457,13 @@ namespace HLSLcc
return false; return false;
} }
#ifndef fpcheck
#ifdef _MSC_VER #ifdef _MSC_VER
#define fpcheck(x) (_isnan(x) || !_finite(x)) #define fpcheck(x) (_isnan(x) || !_finite(x))
#else #else
#define fpcheck(x) (std::isnan(x) || std::isinf(x)) #define fpcheck(x) (std::isnan(x) || std::isinf(x))
#endif #endif
#endif // #ifndef fpcheck
// Helper function to print floats with full precision // Helper function to print floats with full precision
void PrintFloat(bstring b, float f) void PrintFloat(bstring b, float f)

Просмотреть файл

@ -475,12 +475,11 @@ SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, S
const ShaderVarType* psVarType = NULL; const ShaderVarType* psVarType = NULL;
int32_t rebase = -1; int32_t rebase = -1;
bool isArray; bool isArray;
int foundVar;
psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, aui32ArraySizes[0], &psCBuf); psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, aui32ArraySizes[0], &psCBuf);
if (psCBuf) if (psCBuf)
{ {
foundVar = ShaderInfo::GetShaderVarFromOffset(aui32ArraySizes[1], aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags); int foundVar = ShaderInfo::GetShaderVarFromOffset(aui32ArraySizes[1], aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags);
if (foundVar && m_SubOperands[1].get() == NULL) // TODO: why this suboperand thing? if (foundVar)
{ {
return psVarType->Type; return psVarType->Type;
} }
@ -583,4 +582,67 @@ int Operand::GetNumInputElements(const HLSLCrossCompilerContext *psContext) cons
// TODO: Are there ever any cases where the mask has 'holes'? // TODO: Are there ever any cases where the mask has 'holes'?
return HLSLcc::GetNumberBitsSet(psSig->ui32Mask); return HLSLcc::GetNumberBitsSet(psSig->ui32Mask);
}
Operand* Operand::GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, const ShaderVarType* psVar, bool isAoS, bool *needsIndexCalcRevert) const
{
Operand *psDynIndexOp = m_SubOperands[0].get();
if (psDynIndexOp == NULL)
psDynIndexOp = m_SubOperands[1].get();
*needsIndexCalcRevert = false;
if (psDynIndexOp != NULL && isAoS)
{
// if dynamically indexing array of structs, try using the original index var before the float4 address calc
bool indexVarFound = false;
*needsIndexCalcRevert = true;
Instruction *psDynIndexOrigin = psDynIndexOp->m_Defines[0].m_Inst;
Operand *asOps = psDynIndexOrigin->asOperands;
Operand *psOriginOp = NULL;
// DXBC always addresses as float4, find the address calculation
// Special case where struct is float4 size, no extra calc is done
if (ShaderInfo::GetCBVarSize(psVar->Parent, true) <= 16) // matrixAsVectors arg does not matter here as with matrices the size will go over the limit anyway
{
indexVarFound = true;
*needsIndexCalcRevert = false;
}
else if (psDynIndexOrigin->eOpcode == OPCODE_IMUL)
{
// check which one of the src operands is the original index
if ((asOps[2].eType == OPERAND_TYPE_TEMP || asOps[2].eType == OPERAND_TYPE_INPUT) && asOps[3].eType == OPERAND_TYPE_IMMEDIATE32)
psOriginOp = &asOps[2];
else if ((asOps[3].eType == OPERAND_TYPE_TEMP || asOps[3].eType == OPERAND_TYPE_INPUT) && asOps[2].eType == OPERAND_TYPE_IMMEDIATE32)
psOriginOp = &asOps[3];
}
else if (psDynIndexOrigin->eOpcode == OPCODE_ISHL)
{
if (asOps[2].eType == OPERAND_TYPE_IMMEDIATE32)
psOriginOp = &asOps[1];
}
if (psOriginOp != NULL)
{
indexVarFound = true;
// Check if the mul dest is not the same temp as the src. Also check that the temp
// does not have multiple uses (which could override the value)
// -> we can use src straight and no index revert calc is needed
if ((psOriginOp->eType == OPERAND_TYPE_INPUT)
|| ((psOriginOp->ui32RegisterNumber != psDynIndexOp->ui32RegisterNumber || psOriginOp->GetDataType(psContext) != psDynIndexOp->GetDataType(psContext))
&& psOriginOp->m_Defines[0].m_Inst->m_Uses.size() == 1))
{
psDynIndexOp = psOriginOp;
*needsIndexCalcRevert = false;
}
}
// Atm we support only this very basic case of dynamic indexing array of structs.
// Return error if something else is encountered.
if (!indexVarFound)
psContext->m_Reflection.OnDiagnostics("Unsupported dynamic indexing scheme on constant buffer vars.", 0, true);
}
return psDynIndexOp;
} }

Просмотреть файл

@ -150,29 +150,37 @@ int ShaderInfo::GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType,
return 0; return 0;
} }
static uint32_t GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors) uint32_t ShaderInfo::GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors, bool wholeArraySize)
{ {
// Struct size is calculated from the offset and size of its last member // Default is regular matrices, vectors and scalars
uint32_t size = psType->Columns * psType->Rows * 4;
// Struct size is calculated from the offset and size of its last member.
// Need to take into account that members could be arrays.
if (psType->Class == SVC_STRUCT) if (psType->Class == SVC_STRUCT)
{ {
return psType->Members.back().Offset + GetCBVarSize(&psType->Members.back(), matrixAsVectors); size = psType->Members.back().Offset + GetCBVarSize(&psType->Members.back(), matrixAsVectors, true);
} }
// Matrices represented as vec4 arrays have special size calculation // Matrices represented as vec4 arrays have special size calculation
if (matrixAsVectors) else if (matrixAsVectors)
{ {
if (psType->Class == SVC_MATRIX_ROWS) if (psType->Class == SVC_MATRIX_ROWS)
{ {
return psType->Rows * 16; size = psType->Rows * 16;
} }
else if (psType->Class == SVC_MATRIX_COLUMNS) else if (psType->Class == SVC_MATRIX_COLUMNS)
{ {
return psType->Columns * 16; size = psType->Columns * 16;
} }
} }
// Regular matrices, vectors and scalars if (wholeArraySize && psType->Elements > 1)
return psType->Columns * psType->Rows * 4; {
uint32_t paddedSize = ((size + 15) / 16) * 16; // Arrays are padded to float4 size
size = (psType->Elements - 1) * paddedSize + size; // Except the last element
}
return size;
} }
static const ShaderVarType* IsOffsetInType(const ShaderVarType* psType, static const ShaderVarType* IsOffsetInType(const ShaderVarType* psType,
@ -184,10 +192,8 @@ static const ShaderVarType* IsOffsetInType(const ShaderVarType* psType,
uint32_t flags) uint32_t flags)
{ {
uint32_t thisOffset = parentOffset + psType->Offset; uint32_t thisOffset = parentOffset + psType->Offset;
uint32_t thisSize = GetCBVarSize(psType, (flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0); uint32_t thisSize = ShaderInfo::GetCBVarSize(psType, (flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0);
uint32_t paddedSize = thisSize; uint32_t paddedSize = ((thisSize + 15) / 16) * 16;
if (thisSize % 16 > 0)
paddedSize += (16 - (thisSize % 16));
uint32_t arraySize = thisSize; uint32_t arraySize = thisSize;
// Array elements are padded to align on vec4 size, except for the last one // Array elements are padded to align on vec4 size, except for the last one
@ -308,7 +314,7 @@ int ShaderInfo::GetShaderVarFromOffset(const uint32_t ui32Vec4Offset,
// Patches the fullName of the var with given array indices. Does not insert the indexing for the var itself if it is an array. // Patches the fullName of the var with given array indices. Does not insert the indexing for the var itself if it is an array.
// Searches for brackets and inserts indices one by one. // Searches for brackets and inserts indices one by one.
std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector<uint32_t> &indices) std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector<uint32_t> &indices, const std::string dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors)
{ {
std::ostringstream oss; std::ostringstream oss;
size_t prevpos = 0; size_t prevpos = 0;
@ -318,8 +324,29 @@ std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShade
{ {
pos++; pos++;
oss << psShaderVar->fullName.substr(prevpos, pos - prevpos); oss << psShaderVar->fullName.substr(prevpos, pos - prevpos);
if (i < indices.size())
// Add possibly given dynamic index for the root array.
if (i == 0 && !dynamicIndex.empty())
{
oss << dynamicIndex;
// if we couldn't use original index temp, revert the float4 address calc here
if (revertDynamicIndexCalc)
{
const ShaderVarType* psRootVar = psShaderVar;
while (psRootVar->Parent != NULL)
psRootVar = psRootVar->Parent;
uint32_t thisSize = (GetCBVarSize(psRootVar, matrixAsVectors) + 15) / 16; // size in float4
oss << " / " << thisSize;
}
if (!indices.empty() && indices[i] != 0)
oss << " + " << indices[i];
}
else if (i < indices.size())
oss << indices[i]; oss << indices[i];
prevpos = pos; prevpos = pos;
i++; i++;
pos = psShaderVar->fullName.find('[', prevpos); pos = psShaderVar->fullName.find('[', prevpos);

Просмотреть файл

@ -92,6 +92,11 @@ public:
// Same as above but with explicit shader type and phase // Same as above but with explicit shader type and phase
int GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const; int GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const;
// Find the operand that contains the dynamic index for this operand (array in constant buffer).
// When isAoS is true, we'll try to find the original index var to avoid additional calculations.
// needsIndexCalcRevert output will tell if we need to divide the value to get the correct index.
Operand* GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, const ShaderVarType* psVar, bool isAoS, bool *needsIndexCalcRevert) const;
// Maps REFLECT_RESOURCE_PRECISION into OPERAND_MIN_PRECISION as much as possible // Maps REFLECT_RESOURCE_PRECISION into OPERAND_MIN_PRECISION as much as possible
static OPERAND_MIN_PRECISION ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec); static OPERAND_MIN_PRECISION ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec);

Просмотреть файл

@ -21,4 +21,6 @@ std::string ResourceName(HLSLCrossCompilerContext* psContext, ResourceGroup grou
std::string TextureSamplerName(ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare); std::string TextureSamplerName(ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare);
void ConcatTextureSamplerName(bstring str, ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare); void ConcatTextureSamplerName(bstring str, ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare);
std::string UniformBufferInstanceName(HLSLCrossCompilerContext* psContext, const std::string& name);
#endif #endif

Просмотреть файл

@ -165,6 +165,8 @@ private:
void AddComparison(Instruction* psInst, ComparisonType eType, void AddComparison(Instruction* psInst, ComparisonType eType,
uint32_t typeFlag); uint32_t typeFlag);
bool CanForceToHalfOperand(const Operand *psOperand);
void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc); void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc);
void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2); void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2);
void CallBinaryOp(const char* name, Instruction* psInst, void CallBinaryOp(const char* name, Instruction* psInst,

Просмотреть файл

@ -643,7 +643,7 @@ bool ToGLSL::Translate()
if ((psContext->flags & HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS) != 0) if ((psContext->flags & HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS) != 0)
{ {
DeclareSpecializationConstants(psShader->asPhases[i]); DeclareSpecializationConstants(*psPhase);
} }

Просмотреть файл

@ -12,16 +12,18 @@
#include <float.h> #include <float.h>
#include <sstream> #include <sstream>
#include <algorithm> #include <algorithm>
#include <cmath>
#include "internal_includes/toGLSL.h" #include "internal_includes/toGLSL.h"
using namespace HLSLcc; using namespace HLSLcc;
#ifndef fpcheck
#ifdef _MSC_VER #ifdef _MSC_VER
#define fpcheck(x) (_isnan(x) || !_finite(x)) #define fpcheck(x) (_isnan(x) || !_finite(x))
#else #else
#include <cmath> #define fpcheck(x) (std::isnan(x) || std::isinf(x))
#define fpcheck(x) ((std::isnan(x)) || (std::isinf(x)))
#endif #endif
#endif // #ifndef fpcheck
static void DeclareConstBufferShaderVariable(const HLSLCrossCompilerContext *psContext, const char* Name, const struct ShaderVarType* psType, int unsizedArray, bool addUniformPrefix = false) static void DeclareConstBufferShaderVariable(const HLSLCrossCompilerContext *psContext, const char* Name, const struct ShaderVarType* psType, int unsizedArray, bool addUniformPrefix = false)
//const SHADER_VARIABLE_CLASS eClass, const SHADER_VARIABLE_TYPE eType, //const SHADER_VARIABLE_CLASS eClass, const SHADER_VARIABLE_TYPE eType,
@ -877,14 +879,23 @@ static void DeclareUBOConstants(HLSLCrossCompilerContext* psContext, const uint3
if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) if (psContext->flags & HLSLCC_FLAG_WRAP_UBO)
bformata(glsl, "#ifndef HLSLCC_DISABLE_UNIFORM_BUFFERS\n"); bformata(glsl, "#ifndef HLSLCC_DISABLE_UNIFORM_BUFFERS\n");
bcatcstr(glsl, "};\n");
if (psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME)
{
std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name);
bformata(glsl, "} %s;\n", instanceName.c_str());
}
else
bcatcstr(glsl, "};\n");
if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) if (psContext->flags & HLSLCC_FLAG_WRAP_UBO)
bformata(glsl, "#endif\n#undef UNITY_UNIFORM\n"); bformata(glsl, "#endif\n#undef UNITY_UNIFORM\n");
} }
static void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, uint32_t ui32BindingPoint, static void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, uint32_t ui32BindingPoint,
const Operand* psOperand, const uint32_t ui32GloballyCoherentAccess, const Operand* psOperand, const uint32_t ui32GloballyCoherentAccess,
const uint32_t isRaw, const uint32_t isUAV, const uint32_t stride, bstring glsl) const uint32_t isRaw, const uint32_t isUAV, const uint32_t hasEmbeddedCounter, const uint32_t stride, bstring glsl)
{ {
const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0;
bstring BufNamebstr = bfromcstr(""); bstring BufNamebstr = bfromcstr("");
@ -921,6 +932,9 @@ static void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, uint32_t
bformata(glsl, "buffer %s {\n\t", BufName.c_str()); bformata(glsl, "buffer %s {\n\t", BufName.c_str());
if (hasEmbeddedCounter)
bformata(glsl, "coherent uint %s_counter;\n\t", BufName.c_str());
if (isRaw) if (isRaw)
bcatcstr(glsl, "uint"); bcatcstr(glsl, "uint");
else else
@ -1201,7 +1215,10 @@ static void TranslateResourceTexture(HLSLCrossCompilerContext* psContext, const
{ {
// Need to enable extension (either OES or ARB), but we only need to add it once // Need to enable extension (either OES or ARB), but we only need to add it once
if (IsESLanguage(psContext->psShader->eTargetLanguage)) if (IsESLanguage(psContext->psShader->eTargetLanguage))
{
psContext->RequireExtension("GL_OES_texture_cube_map_array"); psContext->RequireExtension("GL_OES_texture_cube_map_array");
psContext->RequireExtension("GL_EXT_texture_cube_map_array");
}
else else
psContext->RequireExtension("GL_ARB_texture_cube_map_array"); psContext->RequireExtension("GL_ARB_texture_cube_map_array");
} }
@ -1997,8 +2014,8 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl)
} }
if(numViews > 0 && numViews < 10) if(numViews > 0 && numViews < 10)
{ {
bcatcstr(extensions, "#extension GL_OVR_multiview : require\n"); // multiview2 is required because we have built-in shaders that do eye-dependent work other than just position
bcatcstr(extensions, "#extension GL_OVR_multiview2 : enable\n"); bcatcstr(extensions, "#extension GL_OVR_multiview2 : require\n");
if(psShader->eShaderType == VERTEX_SHADER) if(psShader->eShaderType == VERTEX_SHADER)
bformata(glsl, "layout(num_views = %d) in;\n", numViews); bformata(glsl, "layout(num_views = %d) in;\n", numViews);
@ -2349,7 +2366,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl)
}; };
bformata(tgt, "\tImmCB_%d_%d_%d[%d] = ", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, i); bformata(tgt, "\tImmCB_%d_%d_%d[%d] = ", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, i);
if (fpcheck(val[chunk.second.m_Rebase])) if (fpcheck(val[chunk.second.m_Rebase]))
bformata(tgt, "uintBitsToFloat(uint(%Xu))", *(uint32_t *)&val[chunk.second.m_Rebase]); bformata(tgt, "uintBitsToFloat(uint(0x%Xu))", *(uint32_t *)&val[chunk.second.m_Rebase]);
else else
HLSLcc::PrintFloat(tgt, val[chunk.second.m_Rebase]); HLSLcc::PrintFloat(tgt, val[chunk.second.m_Rebase]);
bcatcstr(tgt, ";\n"); bcatcstr(tgt, ";\n");
@ -2371,7 +2388,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl)
if (k != 0) if (k != 0)
bcatcstr(tgt, ", "); bcatcstr(tgt, ", ");
if (fpcheck(val[k])) if (fpcheck(val[k]))
bformata(tgt, "uintBitsToFloat(uint(%Xu))", *(uint32_t *)&val[k + chunk.second.m_Rebase]); bformata(tgt, "uintBitsToFloat(uint(0x%Xu))", *(uint32_t *)&val[k + chunk.second.m_Rebase]);
else else
HLSLcc::PrintFloat(tgt, val[k + chunk.second.m_Rebase]); HLSLcc::PrintFloat(tgt, val[k + chunk.second.m_Rebase]);
} }
@ -2807,6 +2824,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl)
case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED:
{ {
const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0;
const bool avoidAtomicCounter = (psContext->flags & HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS) != 0;
if(psDecl->sUAV.bCounter) if(psDecl->sUAV.bCounter)
{ {
if (isVulkan) if (isVulkan)
@ -2815,6 +2833,14 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl)
GLSLCrossDependencyData::VulkanResourceBinding uavBinding = psContext->psDependencies->GetVulkanResourceBinding(uavname, true); GLSLCrossDependencyData::VulkanResourceBinding uavBinding = psContext->psDependencies->GetVulkanResourceBinding(uavname, true);
GLSLCrossDependencyData::VulkanResourceBinding counterBinding = std::make_pair(uavBinding.first, uavBinding.second+1); GLSLCrossDependencyData::VulkanResourceBinding counterBinding = std::make_pair(uavBinding.first, uavBinding.second+1);
bformata(glsl, "layout(set = %d, binding = %d) buffer %s_counterBuf { highp uint %s_counter; };\n", counterBinding.first, counterBinding.second, uavname.c_str(), uavname.c_str()); bformata(glsl, "layout(set = %d, binding = %d) buffer %s_counterBuf { highp uint %s_counter; };\n", counterBinding.first, counterBinding.second, uavname.c_str(), uavname.c_str());
DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0],
psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl);
}
else if (avoidAtomicCounter) // no support for atomic counter. We must use atomic functions in SSBO instead.
{
DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0],
psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 1, psDecl->ui32BufferStride, glsl);
} }
else else
{ {
@ -2824,12 +2850,18 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl)
bcatcstr(glsl, "highp "); bcatcstr(glsl, "highp ");
bcatcstr(glsl, "atomic_uint "); bcatcstr(glsl, "atomic_uint ");
ResourceName(glsl, psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); ResourceName(glsl, psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0);
bformata(glsl, "_counter; \n"); bcatcstr(glsl, "_counter; \n");
DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0],
psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl);
} }
} }
else
{
DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0],
psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl);
}
DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0],
psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, psDecl->ui32BufferStride, glsl);
break; break;
} }
case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW:
@ -2856,20 +2888,20 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl)
} }
DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0],
psDecl->sUAV.ui32GloballyCoherentAccess, 1, 1, psDecl->ui32BufferStride, glsl); psDecl->sUAV.ui32GloballyCoherentAccess, 1, 1, 0, psDecl->ui32BufferStride, glsl);
break; break;
} }
case OPCODE_DCL_RESOURCE_STRUCTURED: case OPCODE_DCL_RESOURCE_STRUCTURED:
{ {
DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0],
psDecl->sUAV.ui32GloballyCoherentAccess, 0, 0, psDecl->ui32BufferStride, glsl); psDecl->sUAV.ui32GloballyCoherentAccess, 0, 0, 0, psDecl->ui32BufferStride, glsl);
break; break;
} }
case OPCODE_DCL_RESOURCE_RAW: case OPCODE_DCL_RESOURCE_RAW:
{ {
DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0],
psDecl->sUAV.ui32GloballyCoherentAccess, 1, 0, psDecl->ui32BufferStride, glsl); psDecl->sUAV.ui32GloballyCoherentAccess, 1, 0, 0, psDecl->ui32BufferStride, glsl);
break; break;
} }
case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED:

Просмотреть файл

@ -1925,6 +1925,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals
bstring glsl = *psContext->currentGLSLString; bstring glsl = *psContext->currentGLSLString;
int numParenthesis = 0; int numParenthesis = 0;
const bool isVulkan = ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0); const bool isVulkan = ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0);
const bool avoidAtomicCounter = ((psContext->flags & HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS) != 0);
if (!isEmbedded) if (!isEmbedded)
{ {
@ -2130,10 +2131,10 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals
} }
else else
{ {
// Do component-wise and, glsl doesn't support && on bvecs // Do component-wise and, glsl doesn't support || on bvecs
for (uint32_t k = 0; k < 4; k++) for (uint32_t k = 0; k < 4; k++)
{ {
if ((destMask && (1 << k)) == 0) if ((destMask & (1 << k)) == 0)
continue; continue;
int needsParenthesis = 0; int needsParenthesis = 0;
@ -2341,8 +2342,20 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals
bcatcstr(glsl, "//UDIV\n"); bcatcstr(glsl, "//UDIV\n");
#endif #endif
//destQuotient, destRemainder, src0, src1 //destQuotient, destRemainder, src0, src1
CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT);
CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); // There are cases where destQuotient is the same variable as src0 or src1. If that happens,
// we need to compute "%" before the "/" in order to avoid src0 or src1 being overriden first.
if ((psInst->asOperands[0].eType != psInst->asOperands[2].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[2].ui32RegisterNumber)
&& (psInst->asOperands[0].eType != psInst->asOperands[3].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[3].ui32RegisterNumber))
{
CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT);
CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT);
}
else
{
CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT);
CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT);
}
break; break;
} }
case OPCODE_DIV: case OPCODE_DIV:
@ -3673,6 +3686,8 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals
break; break;
default: default:
ASSERT(0); ASSERT(0);
// Suppress uninitialised variable warning
srcDataType = SVT_VOID;
break; break;
} }
@ -4010,13 +4025,13 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals
#endif #endif
psContext->AddIndentation(); psContext->AddIndentation();
AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis);
if (isVulkan) if (isVulkan || avoidAtomicCounter)
bcatcstr(glsl, "atomicAdd("); bcatcstr(glsl, "atomicAdd(");
else else
bcatcstr(glsl, "atomicCounterIncrement("); bcatcstr(glsl, "atomicCounterIncrement(");
ResourceName(glsl, psContext, RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber, 0); ResourceName(glsl, psContext, RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber, 0);
bformata(glsl, "_counter"); bformata(glsl, "_counter");
if (isVulkan) if (isVulkan || avoidAtomicCounter)
bcatcstr(glsl, ", 1u)"); bcatcstr(glsl, ", 1u)");
else else
bcatcstr(glsl, ")"); bcatcstr(glsl, ")");
@ -4031,13 +4046,13 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals
#endif #endif
psContext->AddIndentation(); psContext->AddIndentation();
AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis);
if (isVulkan) if (isVulkan || avoidAtomicCounter)
bcatcstr(glsl, "(atomicAdd("); bcatcstr(glsl, "(atomicAdd(");
else else
bcatcstr(glsl, "atomicCounterDecrement("); bcatcstr(glsl, "atomicCounterDecrement(");
ResourceName(glsl, psContext, RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber, 0); ResourceName(glsl, psContext, RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber, 0);
bformata(glsl, "_counter"); bformata(glsl, "_counter");
if (isVulkan) if (isVulkan || avoidAtomicCounter)
bcatcstr(glsl, ", 0xffffffffu) + 0xffffffffu)"); bcatcstr(glsl, ", 0xffffffffu) + 0xffffffffu)");
else else
bcatcstr(glsl, ")"); bcatcstr(glsl, ")");

Просмотреть файл

@ -16,11 +16,13 @@
using namespace HLSLcc; using namespace HLSLcc;
#ifndef fpcheck
#ifdef _MSC_VER #ifdef _MSC_VER
#define fpcheck(x) (_isnan(x) || !_finite(x)) #define fpcheck(x) (_isnan(x) || !_finite(x))
#else #else
#define fpcheck(x) (std::isnan(x) || std::isinf(x)) #define fpcheck(x) (std::isnan(x) || std::isinf(x))
#endif #endif
#endif // #ifndef fpcheck
// Returns nonzero if types are just different precisions of the same underlying type // Returns nonzero if types are just different precisions of the same underlying type
@ -434,8 +436,19 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
if (psOperand->eType == OPERAND_TYPE_INPUT) if (psOperand->eType == OPERAND_TYPE_INPUT)
{ {
// Check for scalar // Check for scalar
if (psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask() // You would think checking would be easy but there is a caveat:
&& psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) // checking abScalarInput might report as scalar, while in reality that was redirected and now is vector so swizzle must be preserved
// as an example consider we have input:
// float2 x; float y;
// and later on we do
// tex2D(xxx, fixed2(x.x, y));
// in that case we will generate redirect but which ui32RegisterNumber will be used for it is not strictly "specified"
// so we may end up with treating it as scalar (even though it is vector now)
const int redirectInput = psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber];
const bool wasRedirected = redirectInput == 0xFF || redirectInput == 0xFE;
const int scalarInput = psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber];
if (!wasRedirected && (scalarInput & psOperand->GetAccessMask()) && (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE))
{ {
scalarWithSwizzle = 1; scalarWithSwizzle = 1;
*pui32IgnoreSwizzle = 1; *pui32IgnoreSwizzle = 1;
@ -884,197 +897,177 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
//Work out the variable name. Don't apply swizzle to that variable yet. //Work out the variable name. Don't apply swizzle to that variable yet.
int32_t rebase = 0; int32_t rebase = 0;
if(psCBuf) ASSERT(psCBuf != NULL);
uint32_t componentsNeeded = 1;
uint32_t minSwiz = 3;
uint32_t maxSwiz = 0;
if (psOperand->eSelMode != OPERAND_4_COMPONENT_SELECT_1_MODE)
{ {
uint32_t componentsNeeded = 1; int i;
uint32_t minSwiz = 3; for (i = 0; i < 4; i++)
uint32_t maxSwiz = 0;
if (psOperand->eSelMode != OPERAND_4_COMPONENT_SELECT_1_MODE)
{ {
int i; if ((ui32CompMask & (1 << i)) == 0)
for (i = 0; i < 4; i++) continue;
{ minSwiz = std::min(minSwiz, psOperand->aui32Swizzle[i]);
if ((ui32CompMask & (1 << i)) == 0) maxSwiz = std::max(maxSwiz, psOperand->aui32Swizzle[i]);
continue; }
minSwiz = std::min(minSwiz, psOperand->aui32Swizzle[i]); componentsNeeded = maxSwiz - minSwiz + 1;
maxSwiz = std::max(maxSwiz, psOperand->aui32Swizzle[i]); }
} else
componentsNeeded = maxSwiz - minSwiz + 1; {
minSwiz = maxSwiz = 1;
}
// When we have a component mask that doesn't have .x set (this basically only happens when we manually open operands into components)
// We have to pull down the swizzle array to match the first bit that's actually set
uint32_t tmpSwizzle[4] = { 0 };
int firstBitSet = 0;
if (ui32CompMask == 0)
ui32CompMask = 0xf;
while ((ui32CompMask & (1 << firstBitSet)) == 0)
firstBitSet++;
std::copy(&psOperand->aui32Swizzle[firstBitSet], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]);
ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags);
// Get a possible dynamic array index
bstring dynamicIndex = bfromcstr("");
bool needsIndexCalcRevert = false;
bool isAoS = ((!isArray && arrayIndices.size() > 0) || (isArray && arrayIndices.size() > 1));
Operand *psDynIndexOp = psOperand->GetDynamicIndexOperand(psContext, psVarType, isAoS, &needsIndexCalcRevert);
if (psDynIndexOp != NULL)
{
SHADER_VARIABLE_TYPE eType = psDynIndexOp->GetDataType(psContext);
uint32_t opFlags = TO_FLAG_INTEGER;
if (eType != SVT_INT && eType != SVT_UINT)
opFlags = TO_AUTO_BITCAST_TO_INT;
TranslateOperand(dynamicIndex, psDynIndexOp, opFlags);
}
char *tmp = bstr2cstr(dynamicIndex, '\0');
std::string dynamicIndexStr = tmp;
bcstrfree(tmp);
bdestroy(dynamicIndex);
if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || ((componentsNeeded+minSwiz) <= psVarType->Columns))
{
// Simple case: just access one component
std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES);
if ((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME) && psCBuf)
{
std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name);
bformata(glsl, "%s.", instanceName.c_str());
}
if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS)))
{
// We'll need to add the prefix only to the last section of the name
size_t commaPos = fullName.find_last_of('.');
char prefix[256];
sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, psVarType->Rows, psVarType->Columns);
if (commaPos == std::string::npos)
fullName.insert(0, prefix);
else
fullName.insert(commaPos + 1, prefix);
bformata(glsl, "%s", fullName.c_str());
} }
else else
bformata(glsl, "%s", fullName.c_str());
}
else
{
// Non-simple case: build vec4 and apply mask
std::string instanceNamePrefix;
if ((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME) && psCBuf)
{ {
minSwiz = maxSwiz = 1; std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name);
instanceNamePrefix = instanceName + ".";
} }
// When we have a component mask that doesn't have .x set (this basically only happens when we manually open operands into components) uint32_t i;
// We have to pull down the swizzle array to match the first bit that's actually set std::vector<uint32_t> tmpArrayIndices;
uint32_t tmpSwizzle[4] = { 0 }; bool tmpIsArray;
int firstBitSet = 0; int32_t tmpRebase;
if (ui32CompMask == 0) int firstItemAdded = 0;
ui32CompMask = 0xf;
while ((ui32CompMask & (1 << firstBitSet)) == 0)
firstBitSet++;
std::copy(&psOperand->aui32Swizzle[firstBitSet], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]);
ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags); bformata(glsl, "%s(", GetConstructorForType(psContext, psVarType->Type, GetNumberBitsSet(ui32CompMask), false));
if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || ((componentsNeeded+minSwiz) <= psVarType->Columns)) for (i = 0; i < 4; i++)
{ {
// Simple case: just access one component const ShaderVarType *tmpVarType = NULL;
std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices); if ((ui32CompMask & (1 << i)) == 0)
continue;
tmpRebase = 0;
if (firstItemAdded != 0)
bcatcstr(glsl, ", ");
else
firstItemAdded = 1;
if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS))) memset(tmpSwizzle, 0, sizeof(uint32_t) * 4);
std::copy(&psOperand->aui32Swizzle[i], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]);
ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags);
std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES);
if (tmpVarType->Class == SVC_SCALAR)
{ {
// We'll need to add the prefix only to the last section of the name bformata(glsl, "%s%s", instanceNamePrefix.c_str(), fullName.c_str());
size_t commaPos = fullName.find_last_of('.');
char prefix[256];
sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, psVarType->Rows, psVarType->Columns);
if (commaPos == std::string::npos)
fullName.insert(0, prefix);
else
fullName.insert(commaPos + 1, prefix);
bformata(glsl, "%s", fullName.c_str());
} }
else else
bformata(glsl, "%s", fullName.c_str());
}
else
{
// Non-simple case: build vec4 and apply mask
uint32_t i;
std::vector<uint32_t> tmpArrayIndices;
bool tmpIsArray;
int32_t tmpRebase;
int firstItemAdded = 0;
bformata(glsl, "%s(", GetConstructorForType(psContext, psVarType->Type, GetNumberBitsSet(ui32CompMask), false));
for (i = 0; i < 4; i++)
{ {
const ShaderVarType *tmpVarType = NULL; uint32_t swizzle;
if ((ui32CompMask & (1 << i)) == 0) tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3
continue; swizzle = psOperand->aui32Swizzle[i] - tmpRebase;
tmpRebase = 0;
if (firstItemAdded != 0)
bcatcstr(glsl, ", ");
else
firstItemAdded = 1;
memset(tmpSwizzle, 0, sizeof(uint32_t) * 4); bformata(glsl, "%s%s", instanceNamePrefix.c_str(), fullName.c_str());
std::copy(&psOperand->aui32Swizzle[i], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); bformata(glsl, ".%c", "xyzw"[swizzle]);
ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags);
std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices);
if (tmpVarType->Class == SVC_SCALAR)
{
bformata(glsl, "%s", fullName.c_str());
}
else
{
uint32_t swizzle;
tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3
swizzle = psOperand->aui32Swizzle[i] - tmpRebase;
bformata(glsl, "%s", fullName.c_str());
bformata(glsl, ".%c", "xyzw"[swizzle]);
}
} }
bcatcstr(glsl, ")");
// Clear rebase, we've already done it.
rebase = 0;
// Also swizzle.
*pui32IgnoreSwizzle = 1;
} }
} bcatcstr(glsl, ")");
else // We don't have a semantic for this variable, so try the raw dump appoach. // Clear rebase, we've already done it.
{ rebase = 0;
ASSERT(0); // Also swizzle.
//bformata(glsl, "cb%d.data", psOperand->aui32ArraySizes[0]);// *pui32IgnoreSwizzle = 1;
//index = psOperand->aui32ArraySizes[1];
} }
if (isArray) if (isArray)
{
index = arrayIndices.back(); index = arrayIndices.back();
//Dx9 only? // Dynamic index is atm supported only at the root array level. Add here only if there is no such parent.
if(psOperand->m_SubOperands[0].get() != NULL) bool hasDynamicIndex = !dynamicIndexStr.empty() && (arrayIndices.size() <= 1);
{ bool hasImmediateIndex = (index != -1) && !(hasDynamicIndex && index == 0);
// Array of matrices is treated as array of vec4s in HLSL,
// but that would mess up uniform types in GLSL. Do gymnastics.
uint32_t opFlags = TO_FLAG_INTEGER;
if (((psVarType->Class == SVC_MATRIX_COLUMNS) || (psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) if (hasDynamicIndex || hasImmediateIndex)
{ {
// Special handling for matrix arrays std::ostringstream fullIndexOss;
bcatcstr(glsl, "[("); if (hasDynamicIndex && hasImmediateIndex)
TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags); fullIndexOss << "(" << dynamicIndexStr << " + " << index << ")";
bformata(glsl, ") / 4]"); else if (hasDynamicIndex)
{ fullIndexOss << dynamicIndexStr;
bcatcstr(glsl, "[(("); else // hasImmediateStr
TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags, OPERAND_4_COMPONENT_MASK_X); fullIndexOss << index;
bformata(glsl, ") %% 4)]");
}
}
else
{
bcatcstr(glsl, "[");
TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags);
bformata(glsl, "]");
}
}
else
if(index != -1 && psOperand->m_SubOperands[1].get() != NULL)
{
// Array of matrices is treated as array of vec4s in HLSL,
// but that would mess up uniform types in GLSL. Do gymnastics.
SHADER_VARIABLE_TYPE eType = psOperand->m_SubOperands[1].get()->GetDataType(psContext);
uint32_t opFlags = TO_FLAG_INTEGER;
if (eType != SVT_INT && eType != SVT_UINT)
opFlags = TO_AUTO_BITCAST_TO_INT;
if (((psVarType->Class == SVC_MATRIX_COLUMNS) ||( psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0))
{
// Special handling for matrix arrays
bcatcstr(glsl, "[(");
TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags);
bformata(glsl, " + %d) / 4]", index);
{
bcatcstr(glsl, "[((");
TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags);
bformata(glsl, " + %d) %% 4)]", index);
}
}
else
{
bcatcstr(glsl, "[");
TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags);
if (index != 0)
bformata(glsl, " + %d]", index);
else
bcatcstr(glsl, "]");
}
}
else if(index != -1)
{
if (((psVarType->Class == SVC_MATRIX_COLUMNS) || (psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0))
{
// Special handling for matrix arrays, open them up into vec4's
size_t matidx = index / 4;
size_t rowidx = index - (matidx*4);
bformata(glsl, "[%d][%d]", matidx, rowidx);
}
else
{
bformata(glsl, "[%d]", index);
}
}
else if(psOperand->m_SubOperands[1].get() != NULL)
{
bcatcstr(glsl, "[");
TranslateOperand(psOperand->m_SubOperands[1].get(), TO_FLAG_INTEGER);
bcatcstr(glsl, "]");
}
if (((psVarType->Class == SVC_MATRIX_COLUMNS) || (psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0))
{
// Special handling for old matrix arrays
bformata(glsl, "[%s / 4]", fullIndexOss.str().c_str());
bformata(glsl, "[%s %% 4]", fullIndexOss.str().c_str());
}
else // This path is atm the default
{
bformata(glsl, "[%s]", fullIndexOss.str().c_str());
}
}
}
if(psVarType && psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle) if(psVarType && psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle)
{ {
switch(rebase) switch(rebase)
@ -1642,3 +1635,41 @@ void ConcatTextureSamplerName(bstring str, ShaderInfo* psShaderInfo, const uint3
std::string texturesamplername = TextureSamplerName(psShaderInfo, ui32TextureRegisterNumber, ui32SamplerRegisterNumber, bZCompare); std::string texturesamplername = TextureSamplerName(psShaderInfo, ui32TextureRegisterNumber, ui32SamplerRegisterNumber, bZCompare);
bcatcstr(str, texturesamplername.c_str()); bcatcstr(str, texturesamplername.c_str());
} }
// Take an uniform buffer name and generate an instance name.
std::string UniformBufferInstanceName(HLSLCrossCompilerContext* psContext, const std::string& name)
{
if (name == "$Globals")
{
char prefix = 'A';
// Need to tweak Globals struct name to prevent clashes between shader stages
switch (psContext->psShader->eShaderType)
{
default:
ASSERT(0);
break;
case COMPUTE_SHADER:
prefix = 'C';
break;
case VERTEX_SHADER:
prefix = 'V';
break;
case PIXEL_SHADER:
prefix = 'P';
break;
case GEOMETRY_SHADER:
prefix = 'G';
break;
case HULL_SHADER:
prefix = 'H';
break;
case DOMAIN_SHADER:
prefix = 'D';
break;
}
return std::string("_") + prefix + name.substr(1);
}
else
return std::string("_") + name;
}

Просмотреть файл

@ -5,13 +5,15 @@
#include "internal_includes/Declaration.h" #include "internal_includes/Declaration.h"
#include <algorithm> #include <algorithm>
#include <sstream> #include <sstream>
#include <cmath>
#ifndef fpcheck
#ifdef _MSC_VER #ifdef _MSC_VER
#define fpcheck(x) (_isnan(x) || !_finite(x)) #define fpcheck(x) (_isnan(x) || !_finite(x))
#else #else
#include <cmath> #define fpcheck(x) (std::isnan(x) || std::isinf(x))
#define fpcheck(x) ((std::isnan(x)) || (std::isinf(x)))
#endif #endif
#endif // #ifndef fpcheck
bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix) bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix)
@ -34,6 +36,8 @@ bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::I
case NAME_RENDER_TARGET_ARRAY_INDEX: case NAME_RENDER_TARGET_ARRAY_INDEX:
result = "mtl_Layer"; result = "mtl_Layer";
if (outSkipPrefix != NULL) *outSkipPrefix = true; if (outSkipPrefix != NULL) *outSkipPrefix = true;
if (pui32IgnoreSwizzle)
*pui32IgnoreSwizzle = 1;
return true; return true;
case NAME_CLIP_DISTANCE: case NAME_CLIP_DISTANCE:
{ {
@ -145,13 +149,8 @@ void ToMetal::DeclareBuiltinInput(const Declaration *psDecl)
m_StructDefinitions[""].m_Members.push_back("float4 mtl_FragCoord [[ position ]]"); m_StructDefinitions[""].m_Members.push_back("float4 mtl_FragCoord [[ position ]]");
break; break;
case NAME_RENDER_TARGET_ARRAY_INDEX: case NAME_RENDER_TARGET_ARRAY_INDEX:
#if 0
// Only supported on a Mac // Only supported on a Mac
m_StructDefinitions[""].m_Members.push_back("uint mtl_Layer [[ render_target_array_index ]]"); m_StructDefinitions[""].m_Members.push_back("uint mtl_Layer [[ render_target_array_index ]]");
#else
// Not on Metal
ASSERT(0);
#endif
break; break;
case NAME_CLIP_DISTANCE: case NAME_CLIP_DISTANCE:
ASSERT(0); // Should never be an input ASSERT(0); // Should never be an input
@ -267,13 +266,8 @@ void ToMetal::DeclareBuiltinOutput(const Declaration *psDecl)
m_StructDefinitions[out].m_Members.push_back("float4 mtl_Position [[ position ]]"); m_StructDefinitions[out].m_Members.push_back("float4 mtl_Position [[ position ]]");
break; break;
case NAME_RENDER_TARGET_ARRAY_INDEX: case NAME_RENDER_TARGET_ARRAY_INDEX:
#if 0
// Only supported on a Mac // Only supported on a Mac
m_StructDefinitions[out].m_Members.push_back("uint mtl_Layer [[ render_target_array_index ]]"); m_StructDefinitions[out].m_Members.push_back("uint mtl_Layer [[ render_target_array_index ]]");
#else
// Not on Metal
ASSERT(0);
#endif
break; break;
case NAME_CLIP_DISTANCE: case NAME_CLIP_DISTANCE:
// it will be done separately in DeclareClipPlanes // it will be done separately in DeclareClipPlanes
@ -663,8 +657,15 @@ static std::string TranslateResourceDeclaration(HLSLCrossCompilerContext* psCont
} }
} }
} }
if (eDimension == RESOURCE_DIMENSION_BUFFER) switch (eDimension)
access = "read"; {
case RESOURCE_DIMENSION_BUFFER:
case RESOURCE_DIMENSION_TEXTURE2DMS:
case RESOURCE_DIMENSION_TEXTURE2DMSARRAY:
access = "read";
default:
break;
}
} }
SHADER_VARIABLE_TYPE svtType = HLSLcc::ResourceReturnTypeToSVTType(eType, ePrec); SHADER_VARIABLE_TYPE svtType = HLSLcc::ResourceReturnTypeToSVTType(eType, ePrec);
@ -768,19 +769,19 @@ static std::string GetInterpolationString(INTERPOLATION_MODE eMode)
return ""; return "";
case INTERPOLATION_LINEAR_CENTROID: case INTERPOLATION_LINEAR_CENTROID:
return " [[ centroid ]]"; return " [[ centroid_perspective ]]";
case INTERPOLATION_LINEAR_NOPERSPECTIVE: case INTERPOLATION_LINEAR_NOPERSPECTIVE:
return " [[ center_perspective ]]"; return " [[ center_no_perspective ]]";
case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID:
return " [[ centroid_noperspective ]]"; return " [[ centroid_no_perspective ]]";
case INTERPOLATION_LINEAR_SAMPLE: case INTERPOLATION_LINEAR_SAMPLE:
return " [[ sample_perspective ]]"; return " [[ sample_perspective ]]";
case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE:
return " [[ sample_noperspective ]]"; return " [[ sample_no_perspective ]]";
default: default:
ASSERT(0); ASSERT(0);
return ""; return "";
@ -801,9 +802,17 @@ void ToMetal::DeclareStructVariable(const std::string &parentName, const ShaderV
if (var.Class == SVC_STRUCT) if (var.Class == SVC_STRUCT)
{ {
std::ostringstream oss;
if (m_StructDefinitions.find(var.name + "_Type") == m_StructDefinitions.end()) if (m_StructDefinitions.find(var.name + "_Type") == m_StructDefinitions.end())
DeclareStructType(var.name + "_Type", var.Members, withinCB, cumulativeOffset + var.Offset); DeclareStructType(var.name + "_Type", var.Members, withinCB, cumulativeOffset + var.Offset);
// Report Array-of-Struct CB top-level struct var after all members are reported.
if (var.Parent == NULL && var.Elements > 1 && withinCB)
{
// var.Type being SVT_VOID indicates it is a struct in this case.
psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, var.Rows, var.Columns, false, var.Elements);
}
std::ostringstream oss;
oss << var.name << "_Type " << var.name; oss << var.name << "_Type " << var.name;
if (var.Elements > 1) if (var.Elements > 1)
{ {
@ -1197,6 +1206,13 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl)
m_StructDefinitions[""].m_Members.push_back(oss.str()); m_StructDefinitions[""].m_Members.push_back(oss.str());
break; break;
} }
if (psOperand->eSpecialName == NAME_RENDER_TARGET_ARRAY_INDEX)
{
std::ostringstream oss;
oss << "uint " << name << " [[ render_target_array_index ]]";
m_StructDefinitions[""].m_Members.push_back(oss.str());
break;
}
if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED) if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED)
{ {
std::ostringstream oss; std::ostringstream oss;
@ -1484,7 +1500,7 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl)
*(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d
}; };
if (fpcheck(val[chunk.second.m_Rebase])) if (fpcheck(val[chunk.second.m_Rebase]))
bformata(glsl, "\tas_type<float>(%Xu)", *(uint32_t *)&val[chunk.second.m_Rebase]); bformata(glsl, "\tas_type<float>(0x%Xu)", *(uint32_t *)&val[chunk.second.m_Rebase]);
else else
{ {
bcatcstr(glsl, "\t"); bcatcstr(glsl, "\t");
@ -1511,7 +1527,7 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl)
if (k != 0) if (k != 0)
bcatcstr(glsl, ", "); bcatcstr(glsl, ", ");
if (fpcheck(val[k])) if (fpcheck(val[k]))
bformata(glsl, "as_type<float>(%Xu)", *(uint32_t *)&val[k + chunk.second.m_Rebase]); bformata(glsl, "as_type<float>(0x%Xu)", *(uint32_t *)&val[k + chunk.second.m_Rebase]);
else else
HLSLcc::PrintFloat(glsl, val[k + chunk.second.m_Rebase]); HLSLcc::PrintFloat(glsl, val[k + chunk.second.m_Rebase]);
} }

Просмотреть файл

@ -6,6 +6,7 @@
#include "stdio.h" #include "stdio.h"
#include <stdlib.h> #include <stdlib.h>
#include <algorithm> #include <algorithm>
#include <cmath>
#include "internal_includes/debug.h" #include "internal_includes/debug.h"
#include "internal_includes/Shader.h" #include "internal_includes/Shader.h"
#include "internal_includes/Instruction.h" #include "internal_includes/Instruction.h"
@ -175,8 +176,8 @@ void ToMetal::AddComparison(Instruction* psInst, ComparisonType eType,
int needsParenthesis = 0; int needsParenthesis = 0;
if (typeFlag == TO_FLAG_NONE if (typeFlag == TO_FLAG_NONE
&& psInst->asOperands[1].GetDataType(psContext) == SVT_FLOAT16 && CanForceToHalfOperand(&psInst->asOperands[1])
&& psInst->asOperands[1].GetDataType(psContext) == SVT_FLOAT16) && CanForceToHalfOperand(&psInst->asOperands[2]))
typeFlag = TO_FLAG_FORCE_HALF; typeFlag = TO_FLAG_FORCE_HALF;
ASSERT(s0ElemCount == s1ElemCount || s1ElemCount == 1 || s0ElemCount == 1); ASSERT(s0ElemCount == s1ElemCount || s1ElemCount == 1 || s0ElemCount == 1);
if ((s0ElemCount != s1ElemCount) && (destElemCount > 1)) if ((s0ElemCount != s1ElemCount) && (destElemCount > 1))
@ -251,6 +252,25 @@ void ToMetal::AddComparison(Instruction* psInst, ComparisonType eType,
} }
} }
bool ToMetal::CanForceToHalfOperand(const Operand *psOperand)
{
if (psOperand->GetDataType(psContext) == SVT_FLOAT16)
return true;
if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER)
{
for (int i = 0; i < psOperand->iNumComponents; i++)
{
float val = fabs(psOperand->afImmediates[i]);
// Do not allow forcing immediate value to half if value is beyond half min/max boundaries
if (val != 0 && (val > 65504 || val < 6.10352e-5))
return false;
}
return true;
}
return false;
}
void ToMetal::AddMOVBinaryOp(const Operand *pDest, Operand *pSrc) void ToMetal::AddMOVBinaryOp(const Operand *pDest, Operand *pSrc)
{ {
@ -392,10 +412,13 @@ void ToMetal::CallBinaryOp(const char* name, Instruction* psInst,
int needsParenthesis = 0; int needsParenthesis = 0;
if (eDataType == SVT_FLOAT if (eDataType == SVT_FLOAT
&& psInst->asOperands[dest].GetDataType(psContext) == SVT_FLOAT16 && CanForceToHalfOperand(&psInst->asOperands[dest])
&& psInst->asOperands[src0].GetDataType(psContext) == SVT_FLOAT16 && CanForceToHalfOperand(&psInst->asOperands[src0])
&& psInst->asOperands[src1].GetDataType(psContext) == SVT_FLOAT16) && CanForceToHalfOperand(&psInst->asOperands[src1]))
{
ui32Flags = TO_FLAG_FORCE_HALF; ui32Flags = TO_FLAG_FORCE_HALF;
eDataType = SVT_FLOAT16;
}
uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); uint32_t maxElems = std::max(src1SwizCount, src0SwizCount);
if (src1SwizCount != src0SwizCount) if (src1SwizCount != src0SwizCount)
@ -436,11 +459,11 @@ void ToMetal::CallTernaryOp(const char* op1, const char* op2, Instruction* psIns
int numParenthesis = 0; int numParenthesis = 0;
if (dataType == TO_FLAG_NONE if (dataType == TO_FLAG_NONE
&& psInst->asOperands[dest].GetDataType(psContext) == SVT_FLOAT16 && CanForceToHalfOperand(&psInst->asOperands[dest])
&& psInst->asOperands[src0].GetDataType(psContext) == SVT_FLOAT16 && CanForceToHalfOperand(&psInst->asOperands[src0])
&& psInst->asOperands[src1].GetDataType(psContext) == SVT_FLOAT16 && CanForceToHalfOperand(&psInst->asOperands[src1])
&& psInst->asOperands[src2].GetDataType(psContext) == SVT_FLOAT16) && CanForceToHalfOperand(&psInst->asOperands[src2]))
ui32Flags = TO_FLAG_FORCE_HALF; ui32Flags = dataType = TO_FLAG_FORCE_HALF;
if (src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) if (src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount)
{ {
@ -472,10 +495,10 @@ void ToMetal::CallHelper3(const char* name, Instruction* psInst,
uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements();
int numParenthesis = 0; int numParenthesis = 0;
if (psInst->asOperands[dest].GetDataType(psContext) == SVT_FLOAT16 if (CanForceToHalfOperand(&psInst->asOperands[dest])
&& psInst->asOperands[src0].GetDataType(psContext) == SVT_FLOAT16 && CanForceToHalfOperand(&psInst->asOperands[src0])
&& psInst->asOperands[src1].GetDataType(psContext) == SVT_FLOAT16 && CanForceToHalfOperand(&psInst->asOperands[src1])
&& psInst->asOperands[src2].GetDataType(psContext) == SVT_FLOAT16) && CanForceToHalfOperand(&psInst->asOperands[src2]))
ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT;
if ((src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) if ((src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) && paramsShouldFollowWriteMask)
@ -511,9 +534,9 @@ void ToMetal::CallHelper2(const char* name, Instruction* psInst,
int isDotProduct = (strncmp(name, "dot", 3) == 0) ? 1 : 0; int isDotProduct = (strncmp(name, "dot", 3) == 0) ? 1 : 0;
int numParenthesis = 0; int numParenthesis = 0;
if (psInst->asOperands[dest].GetDataType(psContext) == SVT_FLOAT16 if (CanForceToHalfOperand(&psInst->asOperands[dest])
&& psInst->asOperands[src0].GetDataType(psContext) == SVT_FLOAT16 && CanForceToHalfOperand(&psInst->asOperands[src0])
&& psInst->asOperands[src1].GetDataType(psContext) == SVT_FLOAT16) && CanForceToHalfOperand(&psInst->asOperands[src1]))
ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT;
@ -604,8 +627,8 @@ void ToMetal::CallHelper1(const char* name, Instruction* psInst,
int numParenthesis = 0; int numParenthesis = 0;
psContext->AddIndentation(); psContext->AddIndentation();
if (psInst->asOperands[dest].GetDataType(psContext) == SVT_FLOAT16 if (CanForceToHalfOperand(&psInst->asOperands[dest])
&& psInst->asOperands[src0].GetDataType(psContext) == SVT_FLOAT16) && CanForceToHalfOperand(&psInst->asOperands[src0]))
ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT;
AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, dstSwizCount, &numParenthesis); AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, dstSwizCount, &numParenthesis);
@ -2238,8 +2261,20 @@ void ToMetal::TranslateInstruction(Instruction* psInst)
bcatcstr(glsl, "//UDIV\n"); bcatcstr(glsl, "//UDIV\n");
#endif #endif
//destQuotient, destRemainder, src0, src1 //destQuotient, destRemainder, src0, src1
CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT);
CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); // There are cases where destQuotient is the same variable as src0 or src1. If that happens,
// we need to compute "%" before the "/" in order to avoid src0 or src1 being overriden first.
if ((psInst->asOperands[0].eType != psInst->asOperands[2].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[2].ui32RegisterNumber)
&& (psInst->asOperands[0].eType != psInst->asOperands[3].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[3].ui32RegisterNumber))
{
CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT);
CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT);
}
else
{
CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT);
CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT);
}
break; break;
} }
case OPCODE_DIV: case OPCODE_DIV:
@ -2299,8 +2334,8 @@ void ToMetal::TranslateInstruction(Instruction* psInst)
psContext->AddIndentation(); psContext->AddIndentation();
SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext);
uint32_t typeFlags = TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2; uint32_t typeFlags = TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2;
if (psInst->asOperands[1].GetDataType(psContext) == SVT_FLOAT16 if (CanForceToHalfOperand(&psInst->asOperands[1])
&& psInst->asOperands[2].GetDataType(psContext) == SVT_FLOAT16) && CanForceToHalfOperand(&psInst->asOperands[2]))
typeFlags = TO_FLAG_FORCE_HALF | TO_AUTO_EXPAND_TO_VEC2; typeFlags = TO_FLAG_FORCE_HALF | TO_AUTO_EXPAND_TO_VEC2;
if (dstType != SVT_FLOAT16) if (dstType != SVT_FLOAT16)
@ -2325,8 +2360,8 @@ void ToMetal::TranslateInstruction(Instruction* psInst)
psContext->AddIndentation(); psContext->AddIndentation();
SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext);
uint32_t typeFlags = TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3; uint32_t typeFlags = TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3;
if (psInst->asOperands[1].GetDataType(psContext) == SVT_FLOAT16 if (CanForceToHalfOperand(&psInst->asOperands[1])
&& psInst->asOperands[2].GetDataType(psContext) == SVT_FLOAT16) && CanForceToHalfOperand(&psInst->asOperands[2]))
typeFlags = TO_FLAG_FORCE_HALF | TO_AUTO_EXPAND_TO_VEC3; typeFlags = TO_FLAG_FORCE_HALF | TO_AUTO_EXPAND_TO_VEC3;
if (dstType != SVT_FLOAT16) if (dstType != SVT_FLOAT16)
@ -2795,7 +2830,7 @@ void ToMetal::TranslateInstruction(Instruction* psInst)
DeclareExtraFunction("BFI", "\ DeclareExtraFunction("BFI", "\
template <typename UVecType> UVecType bitFieldInsert(const UVecType width, const UVecType offset, const UVecType src2, const UVecType src3)\n\ template <typename UVecType> UVecType bitFieldInsert(const UVecType width, const UVecType offset, const UVecType src2, const UVecType src3)\n\
{\n\ {\n\
UVecType bitmask = (((1 << width)-1) << offset) & 0xffffffff;\n\ UVecType bitmask = (((UVecType(1) << width)-1) << offset) & 0xffffffff;\n\
return ((src2 << offset) & bitmask) | (src3 & ~bitmask);\n\ return ((src2 << offset) & bitmask) | (src3 & ~bitmask);\n\
}; "); }; ");
psContext->AddIndentation(); psContext->AddIndentation();
@ -2971,7 +3006,10 @@ void ToMetal::TranslateInstruction(Instruction* psInst)
} }
} }
psContext->AddIndentation(); psContext->AddIndentation();
bformata(glsl, "threadgroup_barrier(mem_flags::%s);\n", barrierFlags); if (ui32SyncFlags & SYNC_THREADS_IN_GROUP)
bformata(glsl, "threadgroup_barrier(mem_flags::%s);\n", barrierFlags);
else
bformata(glsl, "simdgroup_barrier(mem_flags::%s);\n", barrierFlags);
break; break;
} }
@ -3215,60 +3253,79 @@ void ToMetal::TranslateInstruction(Instruction* psInst)
case OPCODE_LD_UAV_TYPED: case OPCODE_LD_UAV_TYPED:
{ {
#ifdef _DEBUG #ifdef _DEBUG
psContext->AddIndentation(); psContext->AddIndentation();
bcatcstr(glsl, "//LD_UAV_TYPED\n"); bcatcstr(glsl, "//LD_UAV_TYPED\n");
#endif #endif
Operand* psDest = &psInst->asOperands[0]; Operand* psDest = &psInst->asOperands[0];
Operand* psSrc = &psInst->asOperands[2]; Operand* psSrc = &psInst->asOperands[2];
Operand* psSrcAddr = &psInst->asOperands[1]; Operand* psSrcAddr = &psInst->asOperands[1];
const ResourceBinding* psRes = 0; const ResourceBinding* psRes = 0;
psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psSrc->ui32RegisterNumber, &psRes); psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psSrc->ui32RegisterNumber, &psRes);
SHADER_VARIABLE_TYPE srcDataType = ResourceReturnTypeToSVTType(psRes->ui32ReturnType, psRes->ePrecision); SHADER_VARIABLE_TYPE srcDataType = ResourceReturnTypeToSVTType(psRes->ui32ReturnType, psRes->ePrecision);
if (psInst->eResDim == RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf if (psInst->eResDim == RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf
{ {
psSrc->aeDataType[0] = srcDataType; psSrc->aeDataType[0] = srcDataType;
psSrcAddr->eSelMode = OPERAND_4_COMPONENT_SELECT_1_MODE; psSrcAddr->eSelMode = OPERAND_4_COMPONENT_SELECT_1_MODE;
if (psSrcAddr->eType == OPERAND_TYPE_IMMEDIATE32) if (psSrcAddr->eType == OPERAND_TYPE_IMMEDIATE32)
psSrcAddr->iNumComponents = 1; psSrcAddr->iNumComponents = 1;
TranslateShaderStorageLoad(psInst); TranslateShaderStorageLoad(psInst);
break; break;
} }
int srcCount = psSrc->GetNumSwizzleElements(); #define RRD(n) REFLECT_RESOURCE_DIMENSION_ ## n
int numParenthesis = 0;
uint32_t compMask = 0;
switch (psInst->eResDim) // unlike glsl, texture arrays will have index in separate argument
{ const bool isArray = psRes->eDimension == RRD(TEXTURE1DARRAY) || psRes->eDimension == RRD(TEXTURE2DARRAY)
case RESOURCE_DIMENSION_TEXTURE3D: || psRes->eDimension == RRD(TEXTURE2DMSARRAY) || psRes->eDimension == RRD(TEXTURECUBEARRAY);
case RESOURCE_DIMENSION_TEXTURE2DARRAY:
case RESOURCE_DIMENSION_TEXTURE2DMSARRAY:
case RESOURCE_DIMENSION_TEXTURECUBEARRAY:
compMask |= (1 << 2);
case RESOURCE_DIMENSION_TEXTURECUBE:
case RESOURCE_DIMENSION_TEXTURE1DARRAY:
case RESOURCE_DIMENSION_TEXTURE2D:
case RESOURCE_DIMENSION_TEXTURE2DMS:
compMask |= (1 << 1);
case RESOURCE_DIMENSION_TEXTURE1D:
compMask |= 1;
break;
default:
ASSERT(0);
break;
}
psContext->AddIndentation(); uint32_t flags = TO_FLAG_UNSIGNED_INTEGER, opMask = OPERAND_4_COMPONENT_MASK_ALL;
AddAssignToDest(psDest, srcDataType, srcCount, &numParenthesis); switch (psRes->eDimension)
glsl << TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); {
bcatcstr(glsl, ".read("); case RRD(TEXTURE3D):
glsl << TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER, compMask); opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z;
bcatcstr(glsl, ")"); flags |= TO_AUTO_EXPAND_TO_VEC3;
glsl << TranslateOperandSwizzle(&psInst->asOperands[0], OPERAND_4_COMPONENT_MASK_ALL, 0); break;
AddAssignPrologue(numParenthesis); case RRD(TEXTURECUBE): case RRD(TEXTURECUBEARRAY):
break; case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): case RRD(TEXTURE2D): case RRD(TEXTURE2DMS):
opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y;
flags |= TO_AUTO_EXPAND_TO_VEC2;
break;
case RRD(TEXTURE1D): case RRD(TEXTURE1DARRAY):
opMask = OPERAND_4_COMPONENT_MASK_X;
break;
default:
ASSERT(0); break;
}
int srcCount = psSrc->GetNumSwizzleElements(), numParenthesis = 0;
psContext->AddIndentation();
AddAssignToDest(psDest, srcDataType, srcCount, &numParenthesis);
glsl << TranslateOperand(psSrc, TO_FLAG_NAME_ONLY);
bcatcstr(glsl, ".read(");
glsl << TranslateOperand(psSrcAddr, flags, opMask);
if(isArray)
{
// NB cube array is handled incorrectly - it needs extra "face" arg
switch (psRes->eDimension)
{
case RRD(TEXTURE1DARRAY): opMask = OPERAND_4_COMPONENT_MASK_Y; break;
case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): opMask = OPERAND_4_COMPONENT_MASK_Z; break;
case RRD(TEXTURECUBEARRAY): opMask = OPERAND_4_COMPONENT_MASK_W; break;
default: ASSERT(0); break;
}
bcatcstr(glsl, ", ");
glsl << TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER, opMask);
}
bcatcstr(glsl, ")");
glsl << TranslateOperandSwizzle(&psInst->asOperands[0], OPERAND_4_COMPONENT_MASK_ALL, 0);
AddAssignPrologue(numParenthesis);
#undef RRD
break;
} }
case OPCODE_STORE_RAW: case OPCODE_STORE_RAW:
{ {
@ -3291,66 +3348,82 @@ void ToMetal::TranslateInstruction(Instruction* psInst)
case OPCODE_STORE_UAV_TYPED: case OPCODE_STORE_UAV_TYPED:
{ {
const ResourceBinding* psRes; const ResourceBinding* psRes;
int foundResource; int foundResource;
uint32_t flags = TO_FLAG_UNSIGNED_INTEGER;
uint32_t opMask = OPERAND_4_COMPONENT_MASK_ALL;
#ifdef _DEBUG #ifdef _DEBUG
psContext->AddIndentation(); psContext->AddIndentation();
bcatcstr(glsl, "//STORE_UAV_TYPED\n"); bcatcstr(glsl, "//STORE_UAV_TYPED\n");
#endif #endif
foundResource = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, foundResource = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV,
psInst->asOperands[0].ui32RegisterNumber, psInst->asOperands[0].ui32RegisterNumber,
&psRes); &psRes);
ASSERT(foundResource); ASSERT(foundResource);
if (psRes->eDimension == REFLECT_RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf if (psRes->eDimension == REFLECT_RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf
{ {
psInst->asOperands[0].aeDataType[0] = ResourceReturnTypeToSVTType(psRes->ui32ReturnType, psRes->ePrecision); psInst->asOperands[0].aeDataType[0] = ResourceReturnTypeToSVTType(psRes->ui32ReturnType, psRes->ePrecision);
psInst->asOperands[1].eSelMode = OPERAND_4_COMPONENT_SELECT_1_MODE; psInst->asOperands[1].eSelMode = OPERAND_4_COMPONENT_SELECT_1_MODE;
if (psInst->asOperands[1].eType == OPERAND_TYPE_IMMEDIATE32) if (psInst->asOperands[1].eType == OPERAND_TYPE_IMMEDIATE32)
psInst->asOperands[1].iNumComponents = 1; psInst->asOperands[1].iNumComponents = 1;
TranslateShaderStorageStore(psInst); TranslateShaderStorageStore(psInst);
break; break;
} }
psContext->AddIndentation(); psContext->AddIndentation();
glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_NAME_ONLY); glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_NAME_ONLY);
bcatcstr(glsl, ".write("); bcatcstr(glsl, ".write(");
switch (psRes->eDimension) #define RRD(n) REFLECT_RESOURCE_DIMENSION_ ## n
{
case REFLECT_RESOURCE_DIMENSION_TEXTURE1D:
opMask = OPERAND_4_COMPONENT_MASK_X;
break;
case REFLECT_RESOURCE_DIMENSION_TEXTURE2D:
case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY:
case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS:
opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y;
flags |= TO_AUTO_EXPAND_TO_VEC2;
break;
case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY:
case REFLECT_RESOURCE_DIMENSION_TEXTURE3D:
case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY:
case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE:
opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z;
flags |= TO_AUTO_EXPAND_TO_VEC3;
break;
case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY:
flags |= TO_AUTO_EXPAND_TO_VEC4;
break;
default:
ASSERT(0);
break;
};
glsl << TranslateOperand(&psInst->asOperands[2], ResourceReturnTypeToFlag(psRes->ui32ReturnType)); // unlike glsl, texture arrays will have index in separate argument
bcatcstr(glsl, ", "); const bool isArray = psRes->eDimension == RRD(TEXTURE1DARRAY) || psRes->eDimension == RRD(TEXTURE2DARRAY)
glsl << TranslateOperand(&psInst->asOperands[1], flags, opMask); || psRes->eDimension == RRD(TEXTURE2DMSARRAY) || psRes->eDimension == RRD(TEXTURECUBEARRAY);
bformata(glsl, ");\n");
break; uint32_t flags = TO_FLAG_UNSIGNED_INTEGER, opMask = OPERAND_4_COMPONENT_MASK_ALL;
switch (psRes->eDimension)
{
case RRD(TEXTURE1D): case RRD(TEXTURE1DARRAY):
opMask = OPERAND_4_COMPONENT_MASK_X;
break;
case RRD(TEXTURE2D): case RRD(TEXTURE2DMS): case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY):
opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y;
flags |= TO_AUTO_EXPAND_TO_VEC2;
break;
case RRD(TEXTURE3D): case RRD(TEXTURECUBE): case RRD(TEXTURECUBEARRAY):
opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z;
flags |= TO_AUTO_EXPAND_TO_VEC3;
break;
default:
ASSERT(0);
break;
};
glsl << TranslateOperand(&psInst->asOperands[2], ResourceReturnTypeToFlag(psRes->ui32ReturnType));
bcatcstr(glsl, ", ");
glsl << TranslateOperand(&psInst->asOperands[1], flags, opMask);
if(isArray)
{
// NB cube array is handled incorrectly - it needs extra "face" arg
flags = TO_FLAG_UNSIGNED_INTEGER;
switch (psRes->eDimension)
{
case RRD(TEXTURE1DARRAY): opMask = OPERAND_4_COMPONENT_MASK_Y; break;
case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY):opMask = OPERAND_4_COMPONENT_MASK_Z; break;
case RRD(TEXTURECUBEARRAY): opMask = OPERAND_4_COMPONENT_MASK_Z; break;
default: ASSERT(0); break;
}
bcatcstr(glsl, ", ");
glsl << TranslateOperand(&psInst->asOperands[1], flags, opMask);
}
bformata(glsl, ");\n");
#undef RRD
break;
} }
case OPCODE_LD_RAW: case OPCODE_LD_RAW:
{ {
@ -3639,10 +3712,10 @@ template <int N> vec<int, N> bitFieldExtractI(const vec<uint, N> width, const ve
#endif #endif
psContext->AddIndentation(); psContext->AddIndentation();
bool isFP16 = false; bool isFP16 = false;
if (psInst->asOperands[0].GetDataType(psContext) == SVT_FLOAT16 if (CanForceToHalfOperand(&psInst->asOperands[0])
&& psInst->asOperands[1].GetDataType(psContext) == SVT_FLOAT16 && CanForceToHalfOperand(&psInst->asOperands[1])
&& psInst->asOperands[2].GetDataType(psContext) == SVT_FLOAT16 && CanForceToHalfOperand(&psInst->asOperands[2])
&& psInst->asOperands[2].GetDataType(psContext) == SVT_FLOAT16) && CanForceToHalfOperand(&psInst->asOperands[2]))
isFP16 = true; isFP16 = true;
int parenthesis = 0; int parenthesis = 0;
AddAssignToDest(&psInst->asOperands[0], isFP16 ? SVT_FLOAT16 : SVT_FLOAT, 2, &parenthesis); AddAssignToDest(&psInst->asOperands[0], isFP16 ? SVT_FLOAT16 : SVT_FLOAT, 2, &parenthesis);
@ -3767,7 +3840,7 @@ template <int N> vec<int, N> bitFieldExtractI(const vec<uint, N> width, const ve
const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType;
psContext->AddIndentation(); psContext->AddIndentation();
AddAssignToDest(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, &numParenthesis); AddAssignToDest(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, &numParenthesis);
TranslateOperand(&psInst->asOperands[1], TO_FLAG_NAME_ONLY); bcatcstr(glsl, TranslateOperand(&psInst->asOperands[1], TO_FLAG_NAME_ONLY).c_str());
bcatcstr(glsl, ".get_num_samples()"); bcatcstr(glsl, ".get_num_samples()");
AddAssignPrologue(numParenthesis); AddAssignPrologue(numParenthesis);
break; break;

Просмотреть файл

@ -14,18 +14,22 @@
using namespace HLSLcc; using namespace HLSLcc;
#ifdef _MSC_VER #ifdef _MSC_VER
#if _MSC_VER < 1900 #if _MSC_VER < 1900
#define snprintf _snprintf #define snprintf _snprintf
#endif #endif
#endif
#ifndef fpcheck
#ifdef _MSC_VER
#define fpcheck(x) (_isnan(x) || !_finite(x)) #define fpcheck(x) (_isnan(x) || !_finite(x))
#else #else
#define fpcheck(x) (std::isnan(x) || std::isinf(x)) #define fpcheck(x) (std::isnan(x) || std::isinf(x))
#endif #endif
#endif // #ifndef fpcheck
// Returns nonzero if types are just different precisions of the same underlying type // Returns nonzero if types are just different precisions of the same underlying type
static bool AreTypesCompatible(SHADER_VARIABLE_TYPE a, uint32_t ui32TOFlag) static bool AreTypesCompatibleMetal(SHADER_VARIABLE_TYPE a, uint32_t ui32TOFlag)
{ {
SHADER_VARIABLE_TYPE b = TypeFlagsToSVTType(ui32TOFlag); SHADER_VARIABLE_TYPE b = TypeFlagsToSVTType(ui32TOFlag);
@ -352,8 +356,19 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
if (psOperand->eType == OPERAND_TYPE_INPUT) if (psOperand->eType == OPERAND_TYPE_INPUT)
{ {
// Check for scalar // Check for scalar
if (psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask() // You would think checking would be easy but there is a caveat:
&& psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) // checking abScalarInput might report as scalar, while in reality that was redirected and now is vector so swizzle must be preserved
// as an example consider we have input:
// float2 x; float y;
// and later on we do
// tex2D(xxx, fixed2(x.x, y));
// in that case we will generate redirect but which ui32RegisterNumber will be used for it is not strictly "specified"
// so we may end up with treating it as scalar (even though it is vector now)
const int redirectInput = psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber];
const bool wasRedirected = redirectInput == 0xFF || redirectInput == 0xFE;
const int scalarInput = psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber];
if (!wasRedirected && (scalarInput & psOperand->GetAccessMask()) && (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE))
{ {
scalarWithSwizzle = 1; scalarWithSwizzle = 1;
*pui32IgnoreSwizzle = 1; *pui32IgnoreSwizzle = 1;
@ -384,7 +399,7 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
} }
bool bitcast = false; bool bitcast = false;
if (AreTypesCompatible(eType, ui32TOFlag) == 0) if (AreTypesCompatibleMetal(eType, ui32TOFlag) == 0)
{ {
if (CanDoDirectCast(eType, requestedType)) if (CanDoDirectCast(eType, requestedType))
{ {
@ -406,7 +421,15 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
// Add ctor if needed (upscaling). Type conversion is already handled above, so here we must // Add ctor if needed (upscaling). Type conversion is already handled above, so here we must
// use the original type to not make type conflicts in bitcasts // use the original type to not make type conflicts in bitcasts
if (((numComponents < requestedComponents)||(scalarWithSwizzle != 0)) && (hasCtor == 0 || bitcast)) bool needsUpscaling = ((numComponents < requestedComponents)||(scalarWithSwizzle != 0)) && (hasCtor == 0 || bitcast);
// Add constuctor if half precision is forced to avoid template ambiguity error from compiler
bool needsForcedCtor = (ui32TOFlag & TO_FLAG_FORCE_HALF) && (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE64);
if (needsForcedCtor)
requestedComponents = std::max(requestedComponents, 1);
if (needsUpscaling || needsForcedCtor)
{ {
oss << GetConstructorForType(psContext, eType, requestedComponents, false) << "("; oss << GetConstructorForType(psContext, eType, requestedComponents, false) << "(";
@ -660,162 +683,144 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
//Work out the variable name. Don't apply swizzle to that variable yet. //Work out the variable name. Don't apply swizzle to that variable yet.
int32_t rebase = 0; int32_t rebase = 0;
if(psCBuf) ASSERT(psCBuf != NULL);
{
uint32_t componentsNeeded = 1;
if (psOperand->eSelMode != OPERAND_4_COMPONENT_SELECT_1_MODE)
{
uint32_t minSwiz = 3;
uint32_t maxSwiz = 0;
int i;
for (i = 0; i < 4; i++)
{
if ((ui32CompMask & (1 << i)) == 0)
continue;
minSwiz = std::min(minSwiz, psOperand->aui32Swizzle[i]);
maxSwiz = std::max(maxSwiz, psOperand->aui32Swizzle[i]);
}
componentsNeeded = maxSwiz - minSwiz + 1;
}
ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags); uint32_t componentsNeeded = 1;
if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || (componentsNeeded <= psVarType->Columns)) if (psOperand->eSelMode != OPERAND_4_COMPONENT_SELECT_1_MODE)
{
uint32_t minSwiz = 3;
uint32_t maxSwiz = 0;
int i;
for (i = 0; i < 4; i++)
{ {
// Simple case: just access one component if ((ui32CompMask & (1 << i)) == 0)
std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices); continue;
minSwiz = std::min(minSwiz, psOperand->aui32Swizzle[i]);
maxSwiz = std::max(maxSwiz, psOperand->aui32Swizzle[i]);
}
componentsNeeded = maxSwiz - minSwiz + 1;
}
ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags);
// Get a possible dynamic array index
std::ostringstream dynIndexOss;
bool needsIndexCalcRevert = false;
bool isAoS = ((!isArray && arrayIndices.size() > 0) || (isArray && arrayIndices.size() > 1));
Operand *psDynIndexOp = psOperand->GetDynamicIndexOperand(psContext, psVarType, isAoS, &needsIndexCalcRevert);
if (psDynIndexOp != NULL)
{
SHADER_VARIABLE_TYPE eType = psDynIndexOp->GetDataType(psContext);
uint32_t opFlags = TO_FLAG_INTEGER;
if (eType != SVT_INT && eType != SVT_UINT)
opFlags = TO_AUTO_BITCAST_TO_INT;
dynIndexOss << TranslateOperand(psDynIndexOp, opFlags);
}
std::string dynamicIndexStr = dynIndexOss.str();
if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || (componentsNeeded <= psVarType->Columns))
{
// Simple case: just access one component
std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES);
if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS))) if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS)))
{
// We'll need to add the prefix only to the last section of the name
size_t commaPos = fullName.find_last_of('.');
char prefix[256];
sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, psVarType->Rows, psVarType->Columns);
if (commaPos == std::string::npos)
fullName.insert(0, prefix);
else
fullName.insert(commaPos + 1, prefix);
}
oss << cbName << fullName;
}
else
{ {
// Non-simple case: build vec4 and apply mask // We'll need to add the prefix only to the last section of the name
uint32_t i; size_t commaPos = fullName.find_last_of('.');
int32_t tmpRebase; char prefix[256];
std::vector<uint32_t> tmpArrayIndices; sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, psVarType->Rows, psVarType->Columns);
bool tmpIsArray; if (commaPos == std::string::npos)
int firstItemAdded = 0; fullName.insert(0, prefix);
else
oss << GetConstructorForTypeMetal(psVarType->Type, GetNumberBitsSet(ui32CompMask)) << "("; fullName.insert(commaPos + 1, prefix);
for (i = 0; i < 4; i++)
{
const ShaderVarType *tmpVarType = NULL;
if ((ui32CompMask & (1 << i)) == 0)
continue;
tmpRebase = 0;
if (firstItemAdded != 0)
oss << ", ";
else
firstItemAdded = 1;
uint32_t tmpSwizzle[4] = { 0 };
std::copy(&psOperand->aui32Swizzle[i], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]);
ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags);
std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices);
if (tmpVarType->Class == SVC_SCALAR)
{
oss << cbName << fullName;
}
else
{
uint32_t swizzle;
tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3
swizzle = psOperand->aui32Swizzle[i] - tmpRebase;
oss << cbName << fullName << "." << ("xyzw"[swizzle]);
}
}
oss << ")";
// Clear rebase, we've already done it.
rebase = 0;
// Also swizzle.
*pui32IgnoreSwizzle = 1;
} }
}
else // We don't have a semantic for this variable, so try the raw dump appoach.
{
ASSERT(0); // We're screwed.
// bformata(glsl, "cb%d.data", psOperand->aui32ArraySizes[0]);//
// index = psOperand->aui32ArraySizes[1];
}
if (isArray) oss << cbName << fullName;
index = arrayIndices.back();
//Dx9 only?
if (psOperand->m_SubOperands[0].get() != NULL)
{
// Array of matrices is treated as array of vec4s in HLSL,
// but that would mess up uniform types in GLSL. Do gymnastics.
uint32_t opFlags = TO_FLAG_INTEGER;
if ((psVarType->Class == SVC_MATRIX_COLUMNS || psVarType->Class == SVC_MATRIX_ROWS) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0))
{
// Special handling for matrix arrays
oss << "[(" << TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags) << ") / 4]";
oss << "[((" << TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags, OPERAND_4_COMPONENT_MASK_X) << ") % 4)]";
}
else
{
oss << "[" << TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags) << "]";
}
} }
else else
if (index != -1 && psOperand->m_SubOperands[1].get() != NULL)
{
// Array of matrices is treated as array of vec4s in HLSL,
// but that would mess up uniform types in GLSL. Do gymnastics.
SHADER_VARIABLE_TYPE eType = psOperand->m_SubOperands[1].get()->GetDataType(psContext);
uint32_t opFlags = TO_FLAG_INTEGER;
if (eType != SVT_INT && eType != SVT_UINT)
opFlags = TO_AUTO_BITCAST_TO_INT;
if ((psVarType->Class == SVC_MATRIX_COLUMNS || psVarType->Class == SVC_MATRIX_ROWS) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0))
{
// Special handling for matrix arrays
oss << "[(" << TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags) << " + " << index <<") / 4]";
oss << "[((" << TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags, OPERAND_4_COMPONENT_MASK_X) << " + " << index << ") % 4)]";
}
else
{
if (index != 0)
oss << "[" << TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags) << " + " << index << "]";
else
oss << "[" << TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags) << "]";
}
}
else if (index != -1)
{
if ((psVarType->Class == SVC_MATRIX_COLUMNS || psVarType->Class == SVC_MATRIX_ROWS) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0))
{
// Special handling for matrix arrays, open them up into vec4's
size_t matidx = index / 4;
size_t rowidx = index - (matidx * 4);
oss << "[" << matidx << "][" << rowidx << "]";
}
else
{
oss << "[" << index << "]";
}
}
else if (psOperand->m_SubOperands[1].get() != NULL)
{ {
oss << "[" << TranslateOperand(psOperand->m_SubOperands[1].get(), TO_FLAG_INTEGER) << "]"; // Non-simple case: build vec4 and apply mask
uint32_t i;
int32_t tmpRebase;
std::vector<uint32_t> tmpArrayIndices;
bool tmpIsArray;
int firstItemAdded = 0;
oss << GetConstructorForTypeMetal(psVarType->Type, GetNumberBitsSet(ui32CompMask)) << "(";
for (i = 0; i < 4; i++)
{
const ShaderVarType *tmpVarType = NULL;
if ((ui32CompMask & (1 << i)) == 0)
continue;
tmpRebase = 0;
if (firstItemAdded != 0)
oss << ", ";
else
firstItemAdded = 1;
uint32_t tmpSwizzle[4] = { 0 };
std::copy(&psOperand->aui32Swizzle[i], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]);
ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags);
std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES);
if (tmpVarType->Class == SVC_SCALAR)
{
oss << cbName << fullName;
}
else
{
uint32_t swizzle;
tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3
swizzle = psOperand->aui32Swizzle[i] - tmpRebase;
oss << cbName << fullName << "." << ("xyzw"[swizzle]);
}
}
oss << ")";
// Clear rebase, we've already done it.
rebase = 0;
// Also swizzle.
*pui32IgnoreSwizzle = 1;
} }
if (isArray)
{
index = arrayIndices.back();
// Dynamic index is atm supported only at the root array level. Add here only if there is no such parent.
bool hasDynamicIndex = !dynamicIndexStr.empty() && (arrayIndices.size() <= 1);
bool hasImmediateIndex = (index != -1) && !(hasDynamicIndex && index == 0);
if (hasDynamicIndex || hasImmediateIndex)
{
std::ostringstream fullIndexOss;
if (hasDynamicIndex && hasImmediateIndex)
fullIndexOss << "(" << dynamicIndexStr << " + " << index << ")";
else if (hasDynamicIndex)
fullIndexOss << dynamicIndexStr;
else // hasImmediateStr
fullIndexOss << index;
if (((psVarType->Class == SVC_MATRIX_COLUMNS) || (psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0))
{
// Special handling for old matrix arrays
oss << "[" << fullIndexOss.str() << " / 4]";
oss << "[" << fullIndexOss.str() << " %% 4]";
}
else // This path is atm the default
{
oss << "[" << fullIndexOss.str() << "]";
}
}
}
if(psVarType && psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle) if(psVarType && psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle)
{ {
switch(rebase) switch(rebase)
@ -974,6 +979,7 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
case OPERAND_TYPE_UNORDERED_ACCESS_VIEW: case OPERAND_TYPE_UNORDERED_ACCESS_VIEW:
{ {
oss << ResourceName(RGROUP_UAV, psOperand->ui32RegisterNumber); oss << ResourceName(RGROUP_UAV, psOperand->ui32RegisterNumber);
*pui32IgnoreSwizzle = 1;
break; break;
} }
case OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY: case OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY: