diff --git a/README.md b/README.md index 57c48c9..e6ed077 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,50 @@ # HLSLcc DirectX shader bytecode cross compiler + +Originally based on https://github.com/James-Jones/HLSLCrossCompiler. + +This library takes DirectX bytecode as input, and translates it into the following languages: +- GLSL (OpenGL 3.2 and later) +- GLSL ES (OpenGL ES 3.0 and later) +- GLSL ES for Vulkan consumption +- Metal Shading Language + +This library is used to generate all shaders in Unity for OpenGL, OpenGL ES 3.0+, Metal and Vulkan. + +Changes from original HLSLCrossCompiler: +- Codebase changed to C++11, with major code reorganizations. +- Support for multiple language output backends (currently ToGLSL and ToMetal) +- Metal language output support +- Temp register type analysis: In DX bytecode the registers are typeless 32-bit 4-vectors. We do code analysis to infer the actual data types (to prevent the need for tons of bitcasts). +- Loop transformation: Detect constructs that look like for-loops and transform them back to their original form +- Support for partial precision variables in HLSL (min16float etc). Do extra analysis pass to infer the intended precision of samplers. +- Reflection interface to retrieve the shader inputs and their types. +- Lots of workarounds for various driver/shader compiler bugs. +- Lots of minor fixes and improvements for correctness +- Lots of Unity-specific tweaks to allow extending HLSL without having to change the D3D compiler itself. + +## Note + +This project does not include build files, or test suite, as they are integrated into the Unity build systems. However, building this library should be fairly straightforward: just compile src/*.cpp (in C++11 mode!) and src/cbstring/*.c with the following include paths: + +- include +- src/internal_includes +- src/cbstrinc +- src + +The main entry point is TranslateHLSLFromMem() function in HLSLcc.cpp (taking DX bytecode as input). + + +## Contributors +- Mikko Strandborg +- Juho Oravainen +- David Rogers +- Marton Ekler +- Antti Tapaninen +- Florian Penzkofer +- Alexey Orlov +- Povilas Kanapickas + +## License + +See license.txt. diff --git a/include/ShaderInfo.h b/include/ShaderInfo.h new file mode 100644 index 0000000..05ce37e --- /dev/null +++ b/include/ShaderInfo.h @@ -0,0 +1,493 @@ +#pragma once + +#include +#include +#include +#include +#include "growing_array.h" +#include +//Reflection +#define MAX_RESOURCE_BINDINGS 256 + +typedef enum _SHADER_VARIABLE_TYPE { + SVT_VOID = 0, + SVT_BOOL = 1, + SVT_INT = 2, + SVT_FLOAT = 3, + SVT_STRING = 4, + SVT_TEXTURE = 5, + SVT_TEXTURE1D = 6, + SVT_TEXTURE2D = 7, + SVT_TEXTURE3D = 8, + SVT_TEXTURECUBE = 9, + SVT_SAMPLER = 10, + SVT_PIXELSHADER = 15, + SVT_VERTEXSHADER = 16, + SVT_UINT = 19, + SVT_UINT8 = 20, + SVT_GEOMETRYSHADER = 21, + SVT_RASTERIZER = 22, + SVT_DEPTHSTENCIL = 23, + SVT_BLEND = 24, + SVT_BUFFER = 25, + SVT_CBUFFER = 26, + SVT_TBUFFER = 27, + SVT_TEXTURE1DARRAY = 28, + SVT_TEXTURE2DARRAY = 29, + SVT_RENDERTARGETVIEW = 30, + SVT_DEPTHSTENCILVIEW = 31, + SVT_TEXTURE2DMS = 32, + SVT_TEXTURE2DMSARRAY = 33, + SVT_TEXTURECUBEARRAY = 34, + SVT_HULLSHADER = 35, + SVT_DOMAINSHADER = 36, + SVT_INTERFACE_POINTER = 37, + SVT_COMPUTESHADER = 38, + SVT_DOUBLE = 39, + SVT_RWTEXTURE1D = 40, + SVT_RWTEXTURE1DARRAY = 41, + SVT_RWTEXTURE2D = 42, + SVT_RWTEXTURE2DARRAY = 43, + SVT_RWTEXTURE3D = 44, + SVT_RWBUFFER = 45, + SVT_BYTEADDRESS_BUFFER = 46, + SVT_RWBYTEADDRESS_BUFFER = 47, + SVT_STRUCTURED_BUFFER = 48, + SVT_RWSTRUCTURED_BUFFER = 49, + SVT_APPEND_STRUCTURED_BUFFER = 50, + SVT_CONSUME_STRUCTURED_BUFFER = 51, + + + + // Only used as a marker when analyzing register types + SVT_FORCED_INT = 152, + // Integer that can be either signed or unsigned. Only used as an intermediate step when doing data type analysis + SVT_INT_AMBIGUOUS = 153, + + // Partial precision types. Used when doing type analysis + SVT_FLOAT10 = 53, // Seems to be used in constant buffers + SVT_FLOAT16 = 54, + SVT_INT16 = 156, + SVT_INT12 = 157, + SVT_UINT16 = 158, + + SVT_FORCE_DWORD = 0x7fffffff +} SHADER_VARIABLE_TYPE; + +typedef enum _SHADER_VARIABLE_CLASS { + SVC_SCALAR = 0, + SVC_VECTOR = (SVC_SCALAR + 1), + SVC_MATRIX_ROWS = (SVC_VECTOR + 1), + SVC_MATRIX_COLUMNS = (SVC_MATRIX_ROWS + 1), + SVC_OBJECT = (SVC_MATRIX_COLUMNS + 1), + SVC_STRUCT = (SVC_OBJECT + 1), + SVC_INTERFACE_CLASS = (SVC_STRUCT + 1), + SVC_INTERFACE_POINTER = (SVC_INTERFACE_CLASS + 1), + SVC_FORCE_DWORD = 0x7fffffff +} SHADER_VARIABLE_CLASS; + + + +/////////////////////////////////////// +// Types + +enum TESSELLATOR_PARTITIONING +{ + TESSELLATOR_PARTITIONING_UNDEFINED = 0, + TESSELLATOR_PARTITIONING_INTEGER = 1, + TESSELLATOR_PARTITIONING_POW2 = 2, + TESSELLATOR_PARTITIONING_FRACTIONAL_ODD = 3, + TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 4 +}; + +enum TESSELLATOR_OUTPUT_PRIMITIVE +{ + TESSELLATOR_OUTPUT_UNDEFINED = 0, + TESSELLATOR_OUTPUT_POINT = 1, + TESSELLATOR_OUTPUT_LINE = 2, + TESSELLATOR_OUTPUT_TRIANGLE_CW = 3, + TESSELLATOR_OUTPUT_TRIANGLE_CCW = 4 +}; + +enum SPECIAL_NAME +{ + NAME_UNDEFINED = 0, + NAME_POSITION = 1, + NAME_CLIP_DISTANCE = 2, + NAME_CULL_DISTANCE = 3, + NAME_RENDER_TARGET_ARRAY_INDEX = 4, + NAME_VIEWPORT_ARRAY_INDEX = 5, + NAME_VERTEX_ID = 6, + NAME_PRIMITIVE_ID = 7, + NAME_INSTANCE_ID = 8, + NAME_IS_FRONT_FACE = 9, + NAME_SAMPLE_INDEX = 10, + // The following are added for D3D11 + NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR = 11, + NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR = 12, + NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR = 13, + NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR = 14, + NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR = 15, + NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR = 16, + NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR = 17, + NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR = 18, + NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR = 19, + NAME_FINAL_TRI_INSIDE_TESSFACTOR = 20, + NAME_FINAL_LINE_DETAIL_TESSFACTOR = 21, + NAME_FINAL_LINE_DENSITY_TESSFACTOR = 22, +}; + + +enum INOUT_COMPONENT_TYPE { + INOUT_COMPONENT_UNKNOWN = 0, + INOUT_COMPONENT_UINT32 = 1, + INOUT_COMPONENT_SINT32 = 2, + INOUT_COMPONENT_FLOAT32 = 3 +}; + +enum MIN_PRECISION { + MIN_PRECISION_DEFAULT = 0, + MIN_PRECISION_FLOAT_16 = 1, + MIN_PRECISION_FLOAT_2_8 = 2, + MIN_PRECISION_RESERVED = 3, + MIN_PRECISION_SINT_16 = 4, + MIN_PRECISION_UINT_16 = 5, + MIN_PRECISION_ANY_16 = 0xf0, + MIN_PRECISION_ANY_10 = 0xf1 +}; + +enum ResourceType +{ + RTYPE_CBUFFER,//0 + RTYPE_TBUFFER,//1 + RTYPE_TEXTURE,//2 + RTYPE_SAMPLER,//3 + RTYPE_UAV_RWTYPED,//4 + RTYPE_STRUCTURED,//5 + RTYPE_UAV_RWSTRUCTURED,//6 + RTYPE_BYTEADDRESS,//7 + RTYPE_UAV_RWBYTEADDRESS,//8 + RTYPE_UAV_APPEND_STRUCTURED,//9 + RTYPE_UAV_CONSUME_STRUCTURED,//10 + RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER,//11 + RTYPE_COUNT, +}; + +enum ResourceGroup { + RGROUP_CBUFFER, + RGROUP_TEXTURE, + RGROUP_SAMPLER, + RGROUP_UAV, + RGROUP_COUNT, +}; + +enum REFLECT_RESOURCE_DIMENSION +{ + REFLECT_RESOURCE_DIMENSION_UNKNOWN = 0, + REFLECT_RESOURCE_DIMENSION_BUFFER = 1, + REFLECT_RESOURCE_DIMENSION_TEXTURE1D = 2, + REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY = 3, + REFLECT_RESOURCE_DIMENSION_TEXTURE2D = 4, + REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY = 5, + REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS = 6, + REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 7, + REFLECT_RESOURCE_DIMENSION_TEXTURE3D = 8, + REFLECT_RESOURCE_DIMENSION_TEXTURECUBE = 9, + REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10, + REFLECT_RESOURCE_DIMENSION_BUFFEREX = 11, +}; + +enum REFLECT_RESOURCE_PRECISION +{ + REFLECT_RESOURCE_PRECISION_UNKNOWN = 0, + REFLECT_RESOURCE_PRECISION_LOWP = 1, + REFLECT_RESOURCE_PRECISION_MEDIUMP = 2, + REFLECT_RESOURCE_PRECISION_HIGHP = 3, + +}; + +enum RESOURCE_RETURN_TYPE +{ + RETURN_TYPE_UNORM = 1, + RETURN_TYPE_SNORM = 2, + RETURN_TYPE_SINT = 3, + RETURN_TYPE_UINT = 4, + RETURN_TYPE_FLOAT = 5, + RETURN_TYPE_MIXED = 6, + RETURN_TYPE_DOUBLE = 7, + RETURN_TYPE_CONTINUED = 8, + RETURN_TYPE_UNUSED = 9, +}; + +typedef std::map HLSLccSamplerPrecisionInfo; + +struct ResourceBinding +{ + std::string name; + ResourceType eType; + uint32_t ui32BindPoint; + uint32_t ui32BindCount; + uint32_t ui32Flags; + REFLECT_RESOURCE_DIMENSION eDimension; + RESOURCE_RETURN_TYPE ui32ReturnType; + uint32_t ui32NumSamples; + REFLECT_RESOURCE_PRECISION ePrecision; + + SHADER_VARIABLE_TYPE GetDataType() const + { + switch (ePrecision) + { + case REFLECT_RESOURCE_PRECISION_LOWP: + switch (ui32ReturnType) + { + case RETURN_TYPE_UNORM: + case RETURN_TYPE_SNORM: + case RETURN_TYPE_FLOAT: + return SVT_FLOAT10; + case RETURN_TYPE_SINT: + return SVT_INT16; + case RETURN_TYPE_UINT: + return SVT_UINT16; + default: +// ASSERT(0); + return SVT_FLOAT10; + } + + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + switch (ui32ReturnType) + { + case RETURN_TYPE_UNORM: + case RETURN_TYPE_SNORM: + case RETURN_TYPE_FLOAT: + return SVT_FLOAT16; + case RETURN_TYPE_SINT: + return SVT_INT16; + case RETURN_TYPE_UINT: + return SVT_UINT16; + default: +// ASSERT(0); + return SVT_FLOAT16; + } + + default: + switch (ui32ReturnType) + { + case RETURN_TYPE_UNORM: + case RETURN_TYPE_SNORM: + case RETURN_TYPE_FLOAT: + return SVT_FLOAT; + case RETURN_TYPE_SINT: + return SVT_INT; + case RETURN_TYPE_UINT: + return SVT_UINT; + case RETURN_TYPE_DOUBLE: + return SVT_DOUBLE; + default: +// ASSERT(0); + return SVT_FLOAT; + } + } + } +}; + +struct ShaderVarType +{ + ShaderVarType() : + Class(), + Type(), + Rows(), + Columns(), + Elements(), + MemberCount(), + Offset(), + ParentCount(), + Parent(), + m_IsUsed(false) + {} + + SHADER_VARIABLE_CLASS Class; + SHADER_VARIABLE_TYPE Type; + uint32_t Rows; + uint32_t Columns; + uint32_t Elements; + uint32_t MemberCount; + uint32_t Offset; + std::string name; + + uint32_t ParentCount; + struct ShaderVarType * Parent; + //Includes all parent names. + std::string fullName; + + std::vector Members; + + bool m_IsUsed; // If not set, is not used in the shader code + + uint32_t GetMemberCount() const + { + if (Class == SVC_STRUCT) + { + uint32_t res = 0; + std::vector::const_iterator itr; + for (itr = Members.begin(); itr != Members.end(); itr++) + { + res += itr->GetMemberCount(); + } + return res; + } + else + return 1; + } + +}; + +struct ShaderVar +{ + std::string name; + int haveDefaultValue; + std::vector pui32DefaultValues; + //Offset/Size in bytes. + uint32_t ui32StartOffset; + uint32_t ui32Size; + + ShaderVarType sType; +}; + +struct ConstantBuffer +{ + std::string name; + + std::vector asVars; + + uint32_t ui32TotalSizeInBytes; + + uint32_t GetMemberCount(bool stripUnused) const + { + uint32_t res = 0; + std::vector::const_iterator itr; + for (itr = asVars.begin(); itr != asVars.end(); itr++) + { + if(stripUnused && !itr->sType.m_IsUsed) + continue; + res += itr->sType.GetMemberCount(); + } + return res; + } +}; + +struct ClassType +{ + std::string name; + uint16_t ui16ID; + uint16_t ui16ConstBufStride; + uint16_t ui16Texture; + uint16_t ui16Sampler; +}; + +struct ClassInstance +{ + std::string name; + uint16_t ui16ID; + uint16_t ui16ConstBuf; + uint16_t ui16ConstBufOffset; + uint16_t ui16Texture; + uint16_t ui16Sampler; +}; + +class Operand; + +class ShaderInfo +{ +public: + + struct InOutSignature + { + std::string semanticName; + uint32_t ui32SemanticIndex; + SPECIAL_NAME eSystemValueType; + INOUT_COMPONENT_TYPE eComponentType; + uint32_t ui32Register; + uint32_t ui32Mask; + uint32_t ui32ReadWriteMask; + + int iRebase; // If mask does not start from zero, this indicates the offset that needs to be subtracted from each swizzle + + uint32_t ui32Stream; + MIN_PRECISION eMinPrec; + + std::set isIndexed; // Set of phases where this input/output is part of a index range. + std::map indexStart; // If indexed, contains the start index for the range + std::map index; // If indexed, contains the current index relative to the index start. + + }; + + ShaderInfo() : + ui32MajorVersion(), + ui32MinorVersion(), + psResourceBindings(), + psConstantBuffers(), + psThisPointerConstBuffer(), + psClassTypes(), + psClassInstances() + {} + + SHADER_VARIABLE_TYPE GetTextureDataType(uint32_t regNo); + + int GetResourceFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ResourceBinding** ppsOutBinding) const; + + void GetConstantBufferFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ConstantBuffer** ppsConstBuf) const; + + int GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderVar** ppsShaderVar) const; + + int GetInputSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull = false) const; + int GetPatchConstantSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull = false) const; + int GetOutputSignatureFromRegister(const uint32_t ui32Register, + const uint32_t ui32CompMask, + const uint32_t ui32Stream, + const InOutSignature** ppsOut, + bool allowNull = false) const; + + int GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType, uint32_t ui32SemanticIndex, const InOutSignature** ppsOut) const; + + static ResourceGroup ResourceTypeToResourceGroup(ResourceType); + + static int GetShaderVarFromOffset(const uint32_t ui32Vec4Offset, + const uint32_t (&pui32Swizzle)[4], + const ConstantBuffer* psCBuf, + const ShaderVarType** ppsShaderVar, + bool* isArray, + std::vector* arrayIndices, + int32_t* pi32Rebase, + uint32_t flags); + + static std::string GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector &indices); + + // Apply shader precision information to resource bindings + void AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info); + + uint32_t ui32MajorVersion; + uint32_t ui32MinorVersion; + + std::vector psInputSignatures; + std::vector psOutputSignatures; + std::vector psPatchConstantSignatures; + + std::vector psResourceBindings; + + std::vector psConstantBuffers; + ConstantBuffer* psThisPointerConstBuffer; + + std::vector psClassTypes; + std::vector psClassInstances; + + //Func table ID to class name ID. + HLSLcc::growing_vector aui32TableIDToTypeID; + + HLSLcc::growing_vector aui32ResourceMap[RGROUP_COUNT]; + + HLSLcc::growing_vector sGroupSharedVarType; + + TESSELLATOR_PARTITIONING eTessPartitioning; + TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim; +}; + diff --git a/include/growing_array.h b/include/growing_array.h new file mode 100644 index 0000000..199d04a --- /dev/null +++ b/include/growing_array.h @@ -0,0 +1,47 @@ +#pragma once + +namespace HLSLcc +{ + // A vector that automatically grows when written to, fills the intermediate ones with default value. + // Reading from an index returns the default value if attempting to access out of bounds. + template class growing_vector + { + public: + growing_vector() : data() {} + + std::vector data; + + T & operator[](std::size_t idx) + { + if (idx >= data.size()) + data.resize((idx + 1) * 2); + return data[idx]; + } + + const T & operator[](std::size_t idx) const + { + static T defaultValue = T(); + if (idx >= data.size()) + return defaultValue; + return data[idx]; + } + + }; + + // Same but with bool specialization + template <> class growing_vector + { + public: + growing_vector() : data() {} + + std::vector data; + + std::vector::reference operator[](std::size_t idx) + { + if (idx >= data.size()) + data.resize((idx + 1) * 2, false); + return data[idx]; + } + + }; +}; diff --git a/include/hlslcc.h b/include/hlslcc.h new file mode 100644 index 0000000..baba406 --- /dev/null +++ b/include/hlslcc.h @@ -0,0 +1,454 @@ +#ifndef HLSLCC_H_ +#define HLSLCC_H_ + +#include +#include +#include + +#if defined (_WIN32) && defined(HLSLCC_DYNLIB) + #define HLSLCC_APIENTRY __stdcall + #if defined(libHLSLcc_EXPORTS) + #define HLSLCC_API __declspec(dllexport) + #else + #define HLSLCC_API __declspec(dllimport) + #endif +#else + #define HLSLCC_APIENTRY + #define HLSLCC_API +#endif + +#include +#include + +typedef enum +{ + LANG_DEFAULT,// Depends on the HLSL shader model. + LANG_ES_100, LANG_ES_FIRST=LANG_ES_100, + LANG_ES_300, + LANG_ES_310, LANG_ES_LAST = LANG_ES_310, + LANG_120, LANG_GL_FIRST = LANG_120, + LANG_130, + LANG_140, + LANG_150, + LANG_330, + LANG_400, + LANG_410, + LANG_420, + LANG_430, + LANG_440, LANG_GL_LAST = LANG_440, + LANG_METAL, +} GLLang; + +typedef struct GlExtensions { + uint32_t ARB_explicit_attrib_location : 1; + uint32_t ARB_explicit_uniform_location : 1; + uint32_t ARB_shading_language_420pack : 1; +}GlExtensions; + +#include "ShaderInfo.h" + +typedef std::vector TextureSamplerPairs; + +typedef enum INTERPOLATION_MODE +{ + INTERPOLATION_UNDEFINED = 0, + INTERPOLATION_CONSTANT = 1, + INTERPOLATION_LINEAR = 2, + INTERPOLATION_LINEAR_CENTROID = 3, + INTERPOLATION_LINEAR_NOPERSPECTIVE = 4, + INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID = 5, + INTERPOLATION_LINEAR_SAMPLE = 6, + INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE = 7, +} INTERPOLATION_MODE; + +#define PS_FLAG_VERTEX_SHADER 0x1 +#define PS_FLAG_HULL_SHADER 0x2 +#define PS_FLAG_DOMAIN_SHADER 0x4 +#define PS_FLAG_GEOMETRY_SHADER 0x8 +#define PS_FLAG_PIXEL_SHADER 0x10 + +#define TO_FLAG_NONE 0x0 +#define TO_FLAG_INTEGER 0x1 +#define TO_FLAG_NAME_ONLY 0x2 +#define TO_FLAG_DECLARATION_NAME 0x4 +#define TO_FLAG_DESTINATION 0x8 //Operand is being written to by assignment. +#define TO_FLAG_UNSIGNED_INTEGER 0x10 +#define TO_FLAG_DOUBLE 0x20 +// --- TO_AUTO_BITCAST_TO_FLOAT --- +//If the operand is an integer temp variable then this flag +//indicates that the temp has a valid floating point encoding +//and that the current expression expects the operand to be floating point +//and therefore intBitsToFloat must be applied to that variable. +#define TO_AUTO_BITCAST_TO_FLOAT 0x40 +#define TO_AUTO_BITCAST_TO_INT 0x80 +#define TO_AUTO_BITCAST_TO_UINT 0x100 +// AUTO_EXPAND flags automatically expand the operand to at least (i/u)vecX +// to match HLSL functionality. +#define TO_AUTO_EXPAND_TO_VEC2 0x200 +#define TO_AUTO_EXPAND_TO_VEC3 0x400 +#define TO_AUTO_EXPAND_TO_VEC4 0x800 +#define TO_FLAG_BOOL 0x1000 +// These flags are only used for Metal: +// Force downscaling of the operand to match +// the other operand (Metal doesn't like mixing halfs with floats) +#define TO_FLAG_FORCE_HALF 0x2000 + +typedef enum +{ + INVALID_SHADER = -1, + PIXEL_SHADER, + VERTEX_SHADER, + GEOMETRY_SHADER, + HULL_SHADER, + DOMAIN_SHADER, + COMPUTE_SHADER, +} SHADER_TYPE; + +// Enum for texture dimension reflection data +typedef enum +{ + TD_FLOAT = 0, + TD_INT, + TD_2D, + TD_3D, + TD_CUBE, + TD_2DSHADOW, + TD_2DARRAY, + TD_CUBEARRAY +} HLSLCC_TEX_DIMENSION; + +// The prefix for all temporary variables used by the generated code. +// Using a texture or uniform name like this will cause conflicts +#define HLSLCC_TEMP_PREFIX "u_xlat" + +//The shader stages (Vertex, Pixel et al) do not depend on each other +//in HLSL. GLSL is a different story. HLSLCrossCompiler requires +//that hull shaders must be compiled before domain shaders, and +//the pixel shader must be compiled before all of the others. +//During compilation the GLSLCrossDependencyData struct will +//carry over any information needed about a different shader stage +//in order to construct valid GLSL shader combinations. + +//Using GLSLCrossDependencyData is optional. However some shader +//combinations may show link failures, or runtime errors. +class GLSLCrossDependencyData +{ +public: + // A container for a single Vulkan resource binding ( pair) + typedef std::pair VulkanResourceBinding; + +private: + //Required if PixelInterpDependency is true + std::vector pixelInterpolation; + + // Map of varying locations, indexed by varying names. + typedef std::map VaryingLocations; + + static const int MAX_NAMESPACES = 6; // Max namespaces: vert input, hull input, domain input, geom input, ps input, (ps output) + + VaryingLocations varyingLocationsMap[MAX_NAMESPACES]; + uint32_t nextAvailableVaryingLocation[MAX_NAMESPACES]; + + typedef std::map VulkanResourceBindings; + VulkanResourceBindings m_VulkanResourceBindings; + uint32_t m_NextAvailableVulkanResourceBinding[8]; // one per set. + + inline int GetVaryingNamespace(SHADER_TYPE eShaderType, bool isInput) + { + switch (eShaderType) + { + case VERTEX_SHADER: + return isInput ? 0 : 1; + + case HULL_SHADER: + return isInput ? 1 : 2; + + case DOMAIN_SHADER: + return isInput ? 2 : 3; + + case GEOMETRY_SHADER: + // The input depends on whether there's a tessellation shader before us + if (isInput) + { + return ui32ProgramStages & PS_FLAG_DOMAIN_SHADER ? 3 : 1; + } + return 4; + + case PIXEL_SHADER: + // The inputs can come from geom shader, domain shader or directly from vertex shader + if (isInput) + { + if (ui32ProgramStages & PS_FLAG_GEOMETRY_SHADER) + { + return 4; + } + else if (ui32ProgramStages & PS_FLAG_DOMAIN_SHADER) + { + return 3; + } + else + { + return 1; + } + } + return 5; // This value never really used + default: + return 0; + } + } + + + +public: + GLSLCrossDependencyData() + : eTessPartitioning(), + eTessOutPrim(), + ui32ProgramStages(0) + { + memset(nextAvailableVaryingLocation, 0, sizeof(nextAvailableVaryingLocation)); + memset(m_NextAvailableVulkanResourceBinding, 0, sizeof(m_NextAvailableVulkanResourceBinding)); + } + + + // Retrieve the location for a varying with a given name. + // If the name doesn't already have an allocated location, allocate one + // and store it into the map. + inline uint32_t GetVaryingLocation(const std::string &name, SHADER_TYPE eShaderType, bool isInput) + { + int nspace = GetVaryingNamespace(eShaderType, isInput); + VaryingLocations::iterator itr = varyingLocationsMap[nspace].find(name); + if (itr != varyingLocationsMap[nspace].end()) + return itr->second; + + uint32_t newKey = nextAvailableVaryingLocation[nspace]; + nextAvailableVaryingLocation[nspace]++; + varyingLocationsMap[nspace].insert(std::make_pair(name, newKey)); + return newKey; + } + + // Retrieve the binding for a resource (texture, constant buffer, image) with a given name + // If not found, allocate a new one (in set 0) and return that + // The returned value is a pair of + // If the name contains "hlslcc_set_X_bind_Y", those values (from the first found occurence in the name) + // will be used instead, and all occurences of that string will be removed from name, so name parameter can be modified + // if allocRoomForCounter is true, the following binding number in the same set will be allocated with name + '_counter' + inline std::pair GetVulkanResourceBinding(std::string &name, bool allocRoomForCounter = false, uint32_t preferredSet = 0) + { + // scan for the special marker + const char *marker = "Xhlslcc_set_%d_bind_%dX"; + uint32_t Set = 0, Binding = 0; + size_t startLoc = name.find("Xhlslcc"); + if ((startLoc != std::string::npos) && (sscanf(name.c_str() + startLoc, marker, &Set, &Binding) == 2)) + { + // Get rid of all markers + while ((startLoc = name.find("Xhlslcc")) != std::string::npos) + { + size_t endLoc = name.find('X', startLoc + 1); + if (endLoc == std::string::npos) + break; + name.erase(startLoc, endLoc - startLoc + 1); + } + // Add to map + VulkanResourceBinding newBind = std::make_pair(Set, Binding); + m_VulkanResourceBindings.insert(std::make_pair(name, newBind)); + if (allocRoomForCounter) + { + VulkanResourceBinding counterBind = std::make_pair(Set, Binding+1); + m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind)); + } + + return newBind; + } + + VulkanResourceBindings::iterator itr = m_VulkanResourceBindings.find(name); + if (itr != m_VulkanResourceBindings.end()) + return itr->second; + + // Allocate a new one + VulkanResourceBinding newBind = std::make_pair(preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet]); + m_NextAvailableVulkanResourceBinding[preferredSet]++; + m_VulkanResourceBindings.insert(std::make_pair(name, newBind)); + if (allocRoomForCounter) + { + VulkanResourceBinding counterBind = std::make_pair(preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet]); + m_NextAvailableVulkanResourceBinding[preferredSet]++; + m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind)); + } + return newBind; + } + + //dcl_tessellator_partitioning and dcl_tessellator_output_primitive appear in hull shader for D3D, + //but they appear on inputs inside domain shaders for GL. + //Hull shader must be compiled before domain so the + //ensure correct partitioning and primitive type information + //can be saved when compiling hull and passed to domain compilation. + TESSELLATOR_PARTITIONING eTessPartitioning; + TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim; + + // Bitfield for the shader stages this program is going to include (see PS_FLAG_*). + // Needed so we can construct proper shader input and output names + uint32_t ui32ProgramStages; + + inline INTERPOLATION_MODE GetInterpolationMode(uint32_t regNo) + { + if (regNo >= pixelInterpolation.size()) + return INTERPOLATION_UNDEFINED; + else + return pixelInterpolation[regNo]; + } + + inline void SetInterpolationMode(uint32_t regNo, INTERPOLATION_MODE mode) + { + if (regNo >= pixelInterpolation.size()) + pixelInterpolation.resize((regNo + 1) * 2, INTERPOLATION_UNDEFINED); + + pixelInterpolation[regNo] = mode; + } + + inline void ClearCrossDependencyData() + { + pixelInterpolation.clear(); + for (int i = 0; i < MAX_NAMESPACES; i++) + { + varyingLocationsMap[i].clear(); + nextAvailableVaryingLocation[i] = 0; + } + } + + +}; + +struct GLSLShader +{ + int shaderType; //One of the GL enums. + std::string sourceCode; + ShaderInfo reflection; + GLLang GLSLLanguage; + TextureSamplerPairs textureSamplers; // HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS fills this out +}; + +// Interface for retrieving reflection and diagnostics data +class HLSLccReflection +{ +public: + HLSLccReflection() {} + virtual ~HLSLccReflection() {} + + // Called on errors or diagnostic messages + virtual void OnDiagnostics(const std::string &error, int line, bool isError) {} + + virtual void OnInputBinding(const std::string &name, int bindIndex) {} + + virtual bool OnConstantBuffer(const std::string &name, size_t bufferSize, size_t memberCount) { return true; } + + virtual bool OnConstant(const std::string &name, int bindIndex, SHADER_VARIABLE_TYPE cType, int rows, int cols, bool isMatrix, int arraySize) { return true; } + + virtual void OnConstantBufferBinding(const std::string &name, int bindIndex) {} + + virtual void OnTextureBinding(const std::string &name, int bindIndex, HLSLCC_TEX_DIMENSION dim, bool isUAV) {} + + virtual void OnBufferBinding(const std::string &name, int bindIndex, bool isUAV) {} + + virtual void OnThreadGroupSize(unsigned int xSize, unsigned int ySize, unsigned int zSize) {} +}; + + +/*HLSL constant buffers are treated as default-block unform arrays by default. This is done + to support versions of GLSL which lack ARB_uniform_buffer_object functionality. + Setting this flag causes each one to have its own uniform block. + Note: Currently the nth const buffer will be named UnformBufferN. This is likey to change to the original HLSL name in the future.*/ +static const unsigned int HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT = 0x1; + +static const unsigned int HLSLCC_FLAG_ORIGIN_UPPER_LEFT = 0x2; + +static const unsigned int HLSLCC_FLAG_PIXEL_CENTER_INTEGER = 0x4; + +static const unsigned int HLSLCC_FLAG_GLOBAL_CONSTS_NEVER_IN_UBO = 0x8; + +//GS enabled? +//Affects vertex shader (i.e. need to compile vertex shader again to use with/without GS). +//This flag is needed in order for the interfaces between stages to match when GS is in use. +//PS inputs VtxGeoOutput +//GS outputs VtxGeoOutput +//Vs outputs VtxOutput if GS enabled. VtxGeoOutput otherwise. +static const unsigned int HLSLCC_FLAG_GS_ENABLED = 0x10; + +static const unsigned int HLSLCC_FLAG_TESS_ENABLED = 0x20; + +//Either use this flag or glBindFragDataLocationIndexed. +//When set the first pixel shader output is the first input to blend +//equation, the others go to the second input. +static const unsigned int HLSLCC_FLAG_DUAL_SOURCE_BLENDING = 0x40; + +//If set, shader inputs and outputs are declared with their semantic name. +static const unsigned int HLSLCC_FLAG_INOUT_SEMANTIC_NAMES = 0x80; +//If set, shader inputs and outputs are declared with their semantic name appended. +static const unsigned int HLSLCC_FLAG_INOUT_APPEND_SEMANTIC_NAMES = 0x100; + +//If set, combines texture/sampler pairs used together into samplers named "texturename_X_samplername". +static const unsigned int HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS = 0x200; + +//If set, attribute and uniform explicit location qualifiers are disabled (even if the language version supports that) +static const unsigned int HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS = 0x400; + +//If set, global uniforms are not stored in a struct. +static const unsigned int HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT = 0x800; + +//If set, image declarations will always have binding and format qualifiers. +static const unsigned int HLSLCC_FLAG_GLES31_IMAGE_QUALIFIERS = 0x1000; + +// If set, treats sampler names ending with _highp, _mediump, and _lowp as sampler precision qualifiers +// Also removes that prefix from generated output +static const unsigned int HLSLCC_FLAG_SAMPLER_PRECISION_ENCODED_IN_NAME = 0x2000; + +// If set, adds location qualifiers to intra-shader varyings. +static const unsigned int HLSLCC_FLAG_SEPARABLE_SHADER_OBJECTS = 0x4000; + +// If set, wraps all uniform buffer declarations in a preprocessor macro #ifndef HLSLCC_DISABLE_UNIFORM_BUFFERS +// so that if that macro is defined, all UBO declarations will become normal uniforms +static const unsigned int HLSLCC_FLAG_WRAP_UBO = 0x8000; + +// If set, skips all members of the $Globals constant buffer struct that are not referenced in the shader code +static const unsigned int HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS = 0x10000; + +#define HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "hlslcc_mtx%dx%d" + +// If set, translates all matrix declarations into vec4 arrays (as the DX bytecode treats them), and prefixes the name with 'hlslcc_mtxx' +static const unsigned int HLSLCC_FLAG_TRANSLATE_MATRICES = 0x20000; + +// If set, emits Vulkan-style (set, binding) bindings, also captures that info from any declaration named "_hlslcc_set_X_bind_Y" +// Unless bindings are given explicitly, they are allocated into set 0 (map stored in GLSLCrossDependencyData) +static const unsigned int HLSLCC_FLAG_VULKAN_BINDINGS = 0x40000; + +// If set, metal output will use linear sampler for shadow compares, otherwise point sampler. +static const unsigned int HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR = 0x80000; + +#ifdef __cplusplus +extern "C" { +#endif + +HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename, + unsigned int flags, + GLLang language, + const GlExtensions *extensions, + GLSLCrossDependencyData* dependencies, + HLSLccSamplerPrecisionInfo& samplerPrecisions, + HLSLccReflection& reflectionCallbacks, + GLSLShader* result + ); + +HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader, + unsigned int flags, + GLLang language, + const GlExtensions *extensions, + GLSLCrossDependencyData* dependencies, + HLSLccSamplerPrecisionInfo& samplerPrecisions, + HLSLccReflection& reflectionCallbacks, + GLSLShader* result); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/include/hlslcc.hpp b/include/hlslcc.hpp new file mode 100644 index 0000000..fa4dd96 --- /dev/null +++ b/include/hlslcc.hpp @@ -0,0 +1,5 @@ + +extern "C" { +#include "hlslcc.h" +} + diff --git a/include/pstdint.h b/include/pstdint.h new file mode 100644 index 0000000..00fc1fc --- /dev/null +++ b/include/pstdint.h @@ -0,0 +1,800 @@ +/* A portable stdint.h + **************************************************************************** + * BSD License: + **************************************************************************** + * + * Copyright (c) 2005-2011 Paul Hsieh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************** + * + * Version 0.1.12 + * + * The ANSI C standard committee, for the C99 standard, specified the + * inclusion of a new standard include file called stdint.h. This is + * a very useful and long desired include file which contains several + * very precise definitions for integer scalar types that is + * critically important for making portable several classes of + * applications including cryptography, hashing, variable length + * integer libraries and so on. But for most developers its likely + * useful just for programming sanity. + * + * The problem is that most compiler vendors have decided not to + * implement the C99 standard, and the next C++ language standard + * (which has a lot more mindshare these days) will be a long time in + * coming and its unknown whether or not it will include stdint.h or + * how much adoption it will have. Either way, it will be a long time + * before all compilers come with a stdint.h and it also does nothing + * for the extremely large number of compilers available today which + * do not include this file, or anything comparable to it. + * + * So that's what this file is all about. Its an attempt to build a + * single universal include file that works on as many platforms as + * possible to deliver what stdint.h is supposed to. A few things + * that should be noted about this file: + * + * 1) It is not guaranteed to be portable and/or present an identical + * interface on all platforms. The extreme variability of the + * ANSI C standard makes this an impossibility right from the + * very get go. Its really only meant to be useful for the vast + * majority of platforms that possess the capability of + * implementing usefully and precisely defined, standard sized + * integer scalars. Systems which are not intrinsically 2s + * complement may produce invalid constants. + * + * 2) There is an unavoidable use of non-reserved symbols. + * + * 3) Other standard include files are invoked. + * + * 4) This file may come in conflict with future platforms that do + * include stdint.h. The hope is that one or the other can be + * used with no real difference. + * + * 5) In the current verison, if your platform can't represent + * int32_t, int16_t and int8_t, it just dumps out with a compiler + * error. + * + * 6) 64 bit integers may or may not be defined. Test for their + * presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX. + * Note that this is different from the C99 specification which + * requires the existence of 64 bit support in the compiler. If + * this is not defined for your platform, yet it is capable of + * dealing with 64 bits then it is because this file has not yet + * been extended to cover all of your system's capabilities. + * + * 7) (u)intptr_t may or may not be defined. Test for its presence + * with the test: #ifdef PTRDIFF_MAX. If this is not defined + * for your platform, then it is because this file has not yet + * been extended to cover all of your system's capabilities, not + * because its optional. + * + * 8) The following might not been defined even if your platform is + * capable of defining it: + * + * WCHAR_MIN + * WCHAR_MAX + * (u)int64_t + * PTRDIFF_MIN + * PTRDIFF_MAX + * (u)intptr_t + * + * 9) The following have not been defined: + * + * WINT_MIN + * WINT_MAX + * + * 10) The criteria for defining (u)int_least(*)_t isn't clear, + * except for systems which don't have a type that precisely + * defined 8, 16, or 32 bit types (which this include file does + * not support anyways). Default definitions have been given. + * + * 11) The criteria for defining (u)int_fast(*)_t isn't something I + * would trust to any particular compiler vendor or the ANSI C + * committee. It is well known that "compatible systems" are + * commonly created that have very different performance + * characteristics from the systems they are compatible with, + * especially those whose vendors make both the compiler and the + * system. Default definitions have been given, but its strongly + * recommended that users never use these definitions for any + * reason (they do *NOT* deliver any serious guarantee of + * improved performance -- not in this file, nor any vendor's + * stdint.h). + * + * 12) The following macros: + * + * PRINTF_INTMAX_MODIFIER + * PRINTF_INT64_MODIFIER + * PRINTF_INT32_MODIFIER + * PRINTF_INT16_MODIFIER + * PRINTF_LEAST64_MODIFIER + * PRINTF_LEAST32_MODIFIER + * PRINTF_LEAST16_MODIFIER + * PRINTF_INTPTR_MODIFIER + * + * are strings which have been defined as the modifiers required + * for the "d", "u" and "x" printf formats to correctly output + * (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t, + * (u)least32_t, (u)least16_t and (u)intptr_t types respectively. + * PRINTF_INTPTR_MODIFIER is not defined for some systems which + * provide their own stdint.h. PRINTF_INT64_MODIFIER is not + * defined if INT64_MAX is not defined. These are an extension + * beyond what C99 specifies must be in stdint.h. + * + * In addition, the following macros are defined: + * + * PRINTF_INTMAX_HEX_WIDTH + * PRINTF_INT64_HEX_WIDTH + * PRINTF_INT32_HEX_WIDTH + * PRINTF_INT16_HEX_WIDTH + * PRINTF_INT8_HEX_WIDTH + * PRINTF_INTMAX_DEC_WIDTH + * PRINTF_INT64_DEC_WIDTH + * PRINTF_INT32_DEC_WIDTH + * PRINTF_INT16_DEC_WIDTH + * PRINTF_INT8_DEC_WIDTH + * + * Which specifies the maximum number of characters required to + * print the number of that type in either hexadecimal or decimal. + * These are an extension beyond what C99 specifies must be in + * stdint.h. + * + * Compilers tested (all with 0 warnings at their highest respective + * settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32 + * bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio + * .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3 + * + * This file should be considered a work in progress. Suggestions for + * improvements, especially those which increase coverage are strongly + * encouraged. + * + * Acknowledgements + * + * The following people have made significant contributions to the + * development and testing of this file: + * + * Chris Howie + * John Steele Scott + * Dave Thorup + * John Dill + * + */ + +#include +#include +#include + +/* + * For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and + * do nothing else. On the Mac OS X version of gcc this is _STDINT_H_. + */ + +#if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_) || defined (__UINT_FAST64_TYPE__)) )) && !defined (_PSTDINT_H_INCLUDED) +#include +#define _PSTDINT_H_INCLUDED +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "ll" +# endif +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "l" +# endif +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "h" +# endif +# ifndef PRINTF_INTMAX_MODIFIER +# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER +# endif +# ifndef PRINTF_INT64_HEX_WIDTH +# define PRINTF_INT64_HEX_WIDTH "16" +# endif +# ifndef PRINTF_INT32_HEX_WIDTH +# define PRINTF_INT32_HEX_WIDTH "8" +# endif +# ifndef PRINTF_INT16_HEX_WIDTH +# define PRINTF_INT16_HEX_WIDTH "4" +# endif +# ifndef PRINTF_INT8_HEX_WIDTH +# define PRINTF_INT8_HEX_WIDTH "2" +# endif +# ifndef PRINTF_INT64_DEC_WIDTH +# define PRINTF_INT64_DEC_WIDTH "20" +# endif +# ifndef PRINTF_INT32_DEC_WIDTH +# define PRINTF_INT32_DEC_WIDTH "10" +# endif +# ifndef PRINTF_INT16_DEC_WIDTH +# define PRINTF_INT16_DEC_WIDTH "5" +# endif +# ifndef PRINTF_INT8_DEC_WIDTH +# define PRINTF_INT8_DEC_WIDTH "3" +# endif +# ifndef PRINTF_INTMAX_HEX_WIDTH +# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH +# endif +# ifndef PRINTF_INTMAX_DEC_WIDTH +# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH +# endif + +/* + * Something really weird is going on with Open Watcom. Just pull some of + * these duplicated definitions from Open Watcom's stdint.h file for now. + */ + +# if defined (__WATCOMC__) && __WATCOMC__ >= 1250 +# if !defined (INT64_C) +# define INT64_C(x) (x + (INT64_MAX - INT64_MAX)) +# endif +# if !defined (UINT64_C) +# define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX)) +# endif +# if !defined (INT32_C) +# define INT32_C(x) (x + (INT32_MAX - INT32_MAX)) +# endif +# if !defined (UINT32_C) +# define UINT32_C(x) (x + (UINT32_MAX - UINT32_MAX)) +# endif +# if !defined (INT16_C) +# define INT16_C(x) (x) +# endif +# if !defined (UINT16_C) +# define UINT16_C(x) (x) +# endif +# if !defined (INT8_C) +# define INT8_C(x) (x) +# endif +# if !defined (UINT8_C) +# define UINT8_C(x) (x) +# endif +# if !defined (UINT64_MAX) +# define UINT64_MAX 18446744073709551615ULL +# endif +# if !defined (INT64_MAX) +# define INT64_MAX 9223372036854775807LL +# endif +# if !defined (UINT32_MAX) +# define UINT32_MAX 4294967295UL +# endif +# if !defined (INT32_MAX) +# define INT32_MAX 2147483647L +# endif +# if !defined (INTMAX_MAX) +# define INTMAX_MAX INT64_MAX +# endif +# if !defined (INTMAX_MIN) +# define INTMAX_MIN INT64_MIN +# endif +# endif +#endif + +#ifndef _PSTDINT_H_INCLUDED +#define _PSTDINT_H_INCLUDED + +#ifndef SIZE_MAX +# define SIZE_MAX (~(size_t)0) +#endif + +/* + * Deduce the type assignments from limits.h under the assumption that + * integer sizes in bits are powers of 2, and follow the ANSI + * definitions. + */ + +#ifndef UINT8_MAX +# define UINT8_MAX 0xff +#endif +#ifndef uint8_t +# if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S) + typedef unsigned char uint8_t; +# define UINT8_C(v) ((uint8_t) v) +# else +# error "Platform not supported" +# endif +#endif + +#ifndef INT8_MAX +# define INT8_MAX 0x7f +#endif +#ifndef INT8_MIN +# define INT8_MIN INT8_C(0x80) +#endif +#ifndef int8_t +# if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S) + typedef signed char int8_t; +# define INT8_C(v) ((int8_t) v) +# else +# error "Platform not supported" +# endif +#endif + +#ifndef UINT16_MAX +# define UINT16_MAX 0xffff +#endif +#ifndef uint16_t +#if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S) + typedef unsigned int uint16_t; +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "" +# endif +# define UINT16_C(v) ((uint16_t) (v)) +#elif (USHRT_MAX == UINT16_MAX) + typedef unsigned short uint16_t; +# define UINT16_C(v) ((uint16_t) (v)) +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "h" +# endif +#else +#error "Platform not supported" +#endif +#endif + +#ifndef INT16_MAX +# define INT16_MAX 0x7fff +#endif +#ifndef INT16_MIN +# define INT16_MIN INT16_C(0x8000) +#endif +#ifndef int16_t +#if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S) + typedef signed int int16_t; +# define INT16_C(v) ((int16_t) (v)) +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "" +# endif +#elif (SHRT_MAX == INT16_MAX) + typedef signed short int16_t; +# define INT16_C(v) ((int16_t) (v)) +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "h" +# endif +#else +#error "Platform not supported" +#endif +#endif + +#ifndef UINT32_MAX +# define UINT32_MAX (0xffffffffUL) +#endif +#ifndef uint32_t +#if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S) + typedef unsigned long uint32_t; +# define UINT32_C(v) v ## UL +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "l" +# endif +#elif (UINT_MAX == UINT32_MAX) + typedef unsigned int uint32_t; +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +# define UINT32_C(v) v ## U +#elif (USHRT_MAX == UINT32_MAX) + typedef unsigned short uint32_t; +# define UINT32_C(v) ((unsigned short) (v)) +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +#else +#error "Platform not supported" +#endif +#endif + +#ifndef INT32_MAX +# define INT32_MAX (0x7fffffffL) +#endif +#ifndef INT32_MIN +# define INT32_MIN INT32_C(0x80000000) +#endif +#ifndef int32_t +#if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S) + typedef signed long int32_t; +# define INT32_C(v) v ## L +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "l" +# endif +#elif (INT_MAX == INT32_MAX) + typedef signed int int32_t; +# define INT32_C(v) v +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +#elif (SHRT_MAX == INT32_MAX) + typedef signed short int32_t; +# define INT32_C(v) ((short) (v)) +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +#else +#error "Platform not supported" +#endif +#endif + +/* + * The macro stdint_int64_defined is temporarily used to record + * whether or not 64 integer support is available. It must be + * defined for any 64 integer extensions for new platforms that are + * added. + */ + +#undef stdint_int64_defined +#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S) +# if (__STDC__ && __STDC_VERSION__ >= 199901L) || defined (S_SPLINT_S) +# define stdint_int64_defined + typedef long long int64_t; + typedef unsigned long long uint64_t; +# define UINT64_C(v) v ## ULL +# define INT64_C(v) v ## LL +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "ll" +# endif +# endif +#endif + +#if !defined (stdint_int64_defined) +# if defined(__GNUC__) +# define stdint_int64_defined + __extension__ typedef long long int64_t; + __extension__ typedef unsigned long long uint64_t; +# define UINT64_C(v) v ## ULL +# define INT64_C(v) v ## LL +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "ll" +# endif +# elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S) +# define stdint_int64_defined + typedef long long int64_t; + typedef unsigned long long uint64_t; +# define UINT64_C(v) v ## ULL +# define INT64_C(v) v ## LL +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "ll" +# endif +# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC) +# define stdint_int64_defined + typedef __int64 int64_t; + typedef unsigned __int64 uint64_t; +# define UINT64_C(v) v ## UI64 +# define INT64_C(v) v ## I64 +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "I64" +# endif +# endif +#endif + +#if !defined (LONG_LONG_MAX) && defined (INT64_C) +# define LONG_LONG_MAX INT64_C (9223372036854775807) +#endif +#ifndef ULONG_LONG_MAX +# define ULONG_LONG_MAX UINT64_C (18446744073709551615) +#endif + +#if !defined (INT64_MAX) && defined (INT64_C) +# define INT64_MAX INT64_C (9223372036854775807) +#endif +#if !defined (INT64_MIN) && defined (INT64_C) +# define INT64_MIN INT64_C (-9223372036854775808) +#endif +#if !defined (UINT64_MAX) && defined (INT64_C) +# define UINT64_MAX UINT64_C (18446744073709551615) +#endif + +/* + * Width of hexadecimal for number field. + */ + +#ifndef PRINTF_INT64_HEX_WIDTH +# define PRINTF_INT64_HEX_WIDTH "16" +#endif +#ifndef PRINTF_INT32_HEX_WIDTH +# define PRINTF_INT32_HEX_WIDTH "8" +#endif +#ifndef PRINTF_INT16_HEX_WIDTH +# define PRINTF_INT16_HEX_WIDTH "4" +#endif +#ifndef PRINTF_INT8_HEX_WIDTH +# define PRINTF_INT8_HEX_WIDTH "2" +#endif + +#ifndef PRINTF_INT64_DEC_WIDTH +# define PRINTF_INT64_DEC_WIDTH "20" +#endif +#ifndef PRINTF_INT32_DEC_WIDTH +# define PRINTF_INT32_DEC_WIDTH "10" +#endif +#ifndef PRINTF_INT16_DEC_WIDTH +# define PRINTF_INT16_DEC_WIDTH "5" +#endif +#ifndef PRINTF_INT8_DEC_WIDTH +# define PRINTF_INT8_DEC_WIDTH "3" +#endif + +/* + * Ok, lets not worry about 128 bit integers for now. Moore's law says + * we don't need to worry about that until about 2040 at which point + * we'll have bigger things to worry about. + */ + +#ifdef stdint_int64_defined + typedef int64_t intmax_t; + typedef uint64_t uintmax_t; +# define INTMAX_MAX INT64_MAX +# define INTMAX_MIN INT64_MIN +# define UINTMAX_MAX UINT64_MAX +# define UINTMAX_C(v) UINT64_C(v) +# define INTMAX_C(v) INT64_C(v) +# ifndef PRINTF_INTMAX_MODIFIER +# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER +# endif +# ifndef PRINTF_INTMAX_HEX_WIDTH +# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH +# endif +# ifndef PRINTF_INTMAX_DEC_WIDTH +# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH +# endif +#else + typedef int32_t intmax_t; + typedef uint32_t uintmax_t; +# define INTMAX_MAX INT32_MAX +# define UINTMAX_MAX UINT32_MAX +# define UINTMAX_C(v) UINT32_C(v) +# define INTMAX_C(v) INT32_C(v) +# ifndef PRINTF_INTMAX_MODIFIER +# define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER +# endif +# ifndef PRINTF_INTMAX_HEX_WIDTH +# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH +# endif +# ifndef PRINTF_INTMAX_DEC_WIDTH +# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH +# endif +#endif + +/* + * Because this file currently only supports platforms which have + * precise powers of 2 as bit sizes for the default integers, the + * least definitions are all trivial. Its possible that a future + * version of this file could have different definitions. + */ + +#ifndef stdint_least_defined + typedef int8_t int_least8_t; + typedef uint8_t uint_least8_t; + typedef int16_t int_least16_t; + typedef uint16_t uint_least16_t; + typedef int32_t int_least32_t; + typedef uint32_t uint_least32_t; +# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER +# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER +# define UINT_LEAST8_MAX UINT8_MAX +# define INT_LEAST8_MAX INT8_MAX +# define UINT_LEAST16_MAX UINT16_MAX +# define INT_LEAST16_MAX INT16_MAX +# define UINT_LEAST32_MAX UINT32_MAX +# define INT_LEAST32_MAX INT32_MAX +# define INT_LEAST8_MIN INT8_MIN +# define INT_LEAST16_MIN INT16_MIN +# define INT_LEAST32_MIN INT32_MIN +# ifdef stdint_int64_defined + typedef int64_t int_least64_t; + typedef uint64_t uint_least64_t; +# define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER +# define UINT_LEAST64_MAX UINT64_MAX +# define INT_LEAST64_MAX INT64_MAX +# define INT_LEAST64_MIN INT64_MIN +# endif +#endif +#undef stdint_least_defined + +/* + * The ANSI C committee pretending to know or specify anything about + * performance is the epitome of misguided arrogance. The mandate of + * this file is to *ONLY* ever support that absolute minimum + * definition of the fast integer types, for compatibility purposes. + * No extensions, and no attempt to suggest what may or may not be a + * faster integer type will ever be made in this file. Developers are + * warned to stay away from these types when using this or any other + * stdint.h. + */ + +typedef int_least8_t int_fast8_t; +typedef uint_least8_t uint_fast8_t; +typedef int_least16_t int_fast16_t; +typedef uint_least16_t uint_fast16_t; +typedef int_least32_t int_fast32_t; +typedef uint_least32_t uint_fast32_t; +#define UINT_FAST8_MAX UINT_LEAST8_MAX +#define INT_FAST8_MAX INT_LEAST8_MAX +#define UINT_FAST16_MAX UINT_LEAST16_MAX +#define INT_FAST16_MAX INT_LEAST16_MAX +#define UINT_FAST32_MAX UINT_LEAST32_MAX +#define INT_FAST32_MAX INT_LEAST32_MAX +#define INT_FAST8_MIN INT_LEAST8_MIN +#define INT_FAST16_MIN INT_LEAST16_MIN +#define INT_FAST32_MIN INT_LEAST32_MIN +#ifdef stdint_int64_defined + typedef int_least64_t int_fast64_t; + typedef uint_least64_t uint_fast64_t; +# define UINT_FAST64_MAX UINT_LEAST64_MAX +# define INT_FAST64_MAX INT_LEAST64_MAX +# define INT_FAST64_MIN INT_LEAST64_MIN +#endif + +#undef stdint_int64_defined + +/* + * Whatever piecemeal, per compiler thing we can do about the wchar_t + * type limits. + */ + +#if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__) +# include +# ifndef WCHAR_MIN +# define WCHAR_MIN 0 +# endif +# ifndef WCHAR_MAX +# define WCHAR_MAX ((wchar_t)-1) +# endif +#endif + +/* + * Whatever piecemeal, per compiler/platform thing we can do about the + * (u)intptr_t types and limits. + */ + +#if defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED) +# define STDINT_H_UINTPTR_T_DEFINED +#endif + +#ifndef STDINT_H_UINTPTR_T_DEFINED +# if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64) +# define stdint_intptr_bits 64 +# elif defined (__WATCOMC__) || defined (__TURBOC__) +# if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__) +# define stdint_intptr_bits 16 +# else +# define stdint_intptr_bits 32 +# endif +# elif defined (__i386__) || defined (_WIN32) || defined (WIN32) +# define stdint_intptr_bits 32 +# elif defined (__INTEL_COMPILER) +/* TODO -- what did Intel do about x86-64? */ +# endif + +# ifdef stdint_intptr_bits +# define stdint_intptr_glue3_i(a,b,c) a##b##c +# define stdint_intptr_glue3(a,b,c) stdint_intptr_glue3_i(a,b,c) +# ifndef PRINTF_INTPTR_MODIFIER +# define PRINTF_INTPTR_MODIFIER stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER) +# endif +# ifndef PTRDIFF_MAX +# define PTRDIFF_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX) +# endif +# ifndef PTRDIFF_MIN +# define PTRDIFF_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN) +# endif +# ifndef UINTPTR_MAX +# define UINTPTR_MAX stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX) +# endif +# ifndef INTPTR_MAX +# define INTPTR_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX) +# endif +# ifndef INTPTR_MIN +# define INTPTR_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN) +# endif +# ifndef INTPTR_C +# define INTPTR_C(x) stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x) +# endif +# ifndef UINTPTR_C +# define UINTPTR_C(x) stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x) +# endif + typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t; + typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t) intptr_t; +# else +/* TODO -- This following is likely wrong for some platforms, and does + nothing for the definition of uintptr_t. */ + typedef ptrdiff_t intptr_t; +# endif +# define STDINT_H_UINTPTR_T_DEFINED +#endif + +/* + * Assumes sig_atomic_t is signed and we have a 2s complement machine. + */ + +#ifndef SIG_ATOMIC_MAX +# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1) +#endif + +#endif + +#if defined (__TEST_PSTDINT_FOR_CORRECTNESS) + +/* + * Please compile with the maximum warning settings to make sure macros are not + * defined more than once. + */ + +#include +#include +#include + +#define glue3_aux(x,y,z) x ## y ## z +#define glue3(x,y,z) glue3_aux(x,y,z) + +#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,=) glue3(UINT,bits,_C) (0); +#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,=) glue3(INT,bits,_C) (0); + +#define DECL(us,bits) glue3(DECL,us,) (bits) + +#define TESTUMAX(bits) glue3(u,bits,=) glue3(~,u,bits); if (glue3(UINT,bits,_MAX) glue3(!=,u,bits)) printf ("Something wrong with UINT%d_MAX\n", bits) + +int main () { + DECL(I,8) + DECL(U,8) + DECL(I,16) + DECL(U,16) + DECL(I,32) + DECL(U,32) +#ifdef INT64_MAX + DECL(I,64) + DECL(U,64) +#endif + intmax_t imax = INTMAX_C(0); + uintmax_t umax = UINTMAX_C(0); + char str0[256], str1[256]; + + sprintf (str0, "%d %x\n", 0, ~0); + + sprintf (str1, "%d %x\n", i8, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1); + sprintf (str1, "%u %x\n", u8, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1); + sprintf (str1, "%d %x\n", i16, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1); + sprintf (str1, "%u %x\n", u16, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1); + sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n", i32, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1); + sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n", u32, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1); +#ifdef INT64_MAX + sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n", i64, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1); +#endif + sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n", imax, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1); + sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n", umax, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1); + + TESTUMAX(8); + TESTUMAX(16); + TESTUMAX(32); +#ifdef INT64_MAX + TESTUMAX(64); +#endif + + return EXIT_SUCCESS; +} + +#endif diff --git a/license.txt b/license.txt new file mode 100644 index 0000000..6e2d4bb --- /dev/null +++ b/license.txt @@ -0,0 +1,53 @@ + +Original HLSLcc source code Copyright (c) 2012 James Jones +Further improvements Copyright (c) 2014-2016 Unity Technologies +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +This software makes use of the bstring library which is provided under the following license: + +Copyright (c) 2002-2008 Paul Hsieh +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + Neither the name of bstrlib nor the names of its contributors may be used + to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/src/ControlFlowGraph.cpp b/src/ControlFlowGraph.cpp new file mode 100644 index 0000000..4fbd68e --- /dev/null +++ b/src/ControlFlowGraph.cpp @@ -0,0 +1,824 @@ + +#include "internal_includes/debug.h" +#include "internal_includes/ControlFlowGraph.h" +#include "internal_includes/ControlFlowGraphUtils.h" +#include "internal_includes/Instruction.h" +#include "internal_includes/Operand.h" +#include "internal_includes/HLSLccToolkit.h" +#include + +using namespace HLSLcc::ControlFlow; +using HLSLcc::ForEachOperand; + +const BasicBlock &ControlFlowGraph::Build(const Instruction *firstInstruction) +{ + using std::for_each; + + m_BlockMap.clear(); + m_BlockStorage.clear(); + + // Self-registering into m_BlockStorage so it goes out of the scope when ControlFlowGraph does + BasicBlock *root = new BasicBlock(Utils::GetNextNonLabelInstruction(firstInstruction), *this, NULL); + + // Build the reachable set for each block + bool hadChanges; + do + { + hadChanges = false; + for_each(m_BlockStorage.begin(), m_BlockStorage.end(), [&](const shared_ptr &bb) + { + BasicBlock &b = *bb.get(); + if (b.RebuildReachable()) + { + hadChanges = true; + } + }); + } while (hadChanges == true); + + return *root; +} + +const BasicBlock *ControlFlowGraph::GetBasicBlockForInstruction(const Instruction *instruction) const +{ + BasicBlockMap::const_iterator itr = m_BlockMap.find(Utils::GetNextNonLabelInstruction(instruction)); + if (itr == m_BlockMap.end()) + return NULL; + + return itr->second; +} + +BasicBlock *ControlFlowGraph::GetBasicBlockForInstruction(const Instruction *instruction) +{ + BasicBlockMap::iterator itr = m_BlockMap.find(Utils::GetNextNonLabelInstruction(instruction)); + if (itr == m_BlockMap.end()) + return NULL; + + return itr->second; +} + + + + +// Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build(). +// Auto-registers itself into ControlFlowGraph +BasicBlock::BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead) + : m_Graph(graph) + , m_First(psFirst) + , m_Last(NULL) +{ + m_UEVar.clear(); + m_VarKill.clear(); + m_Preceding.clear(); + m_Succeeding.clear(); + m_DEDef.clear(); + m_Reachable.clear(); + + // Check that we've pruned the labels + ASSERT(psFirst == Utils::GetNextNonLabelInstruction(psFirst)); + + // Insert to block storage, block map and connect to previous block + m_Graph.m_BlockStorage.push_back(shared_ptr(this)); + + bool didInsert = m_Graph.m_BlockMap.insert(std::make_pair(psFirst, this)).second; + ASSERT(didInsert); + + if (psPrecedingBlockHead != NULL) + { + m_Preceding.insert(psPrecedingBlockHead); + BasicBlock *prec = m_Graph.GetBasicBlockForInstruction(psPrecedingBlockHead); + ASSERT(prec != 0); + didInsert = prec->m_Succeeding.insert(psFirst).second; + ASSERT(didInsert); + } + + Build(); +} + +void BasicBlock::Build() +{ + const Instruction *inst = m_First; + while (1) + { + // Process sources first + ForEachOperand(inst, inst+1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND, + [this](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType) + { + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + uint32_t tempReg = psOperand->ui32RegisterNumber; + uint32_t accessMask = psOperand->GetAccessMask(); + + // Go through each component + for (int k = 0; k < 4; k++) + { + if (!(accessMask & (1 << k))) + continue; + + uint32_t regIdx = tempReg * 4 + k; + // Is this idx already in the kill set, meaning that it's already been re-defined in this basic block? Ignore + if (m_VarKill.find(regIdx) != m_VarKill.end()) + continue; + + // Add to UEVars set. Doesn't matter if it's already there. + m_UEVar.insert(regIdx); + } + return; + }); + + // Then the destination operands + ForEachOperand(inst, inst+1, FEO_FLAG_DEST_OPERAND, + [this](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType) + { + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + uint32_t tempReg = psOperand->ui32RegisterNumber; + uint32_t accessMask = psOperand->GetAccessMask(); + + // Go through each component + for (int k = 0; k < 4; k++) + { + if (!(accessMask & (1 << k))) + continue; + + uint32_t regIdx = tempReg * 4 + k; + + // Add to kill set. Dupes are fine, this is a set. + m_VarKill.insert(regIdx); + // Also into the downward definitions. Overwrite the previous definition in this basic block, if any + Definition d(psInst, psOperand); + m_DEDef[regIdx].clear(); + m_DEDef[regIdx].insert(d); + } + return; + }); + + // Check for flow control instructions + bool blockDone = false; + switch (inst->eOpcode) + { + default: + break; + case OPCODE_RET: + blockDone = true; + break; + case OPCODE_RETC: + // Basic block is done, start a next one. + // There REALLY should be no existing blocks for this one + ASSERT(m_Graph.GetBasicBlockForInstruction(Utils::GetNextNonLabelInstruction(inst+1)) == NULL); + AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1)); + blockDone = true; + break; + case OPCODE_LOOP: + case OPCODE_CASE: + case OPCODE_ENDIF: + case OPCODE_ENDSWITCH: + // Not a flow control branch, but need to start a new block anyway. + AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1)); + blockDone = true; + break; + + // Branches + case OPCODE_IF: + case OPCODE_BREAKC: + case OPCODE_CONTINUEC: + { + const Instruction *jumpPoint = Utils::GetJumpPoint(inst); + ASSERT(jumpPoint != NULL); + + // The control branches to the next instruction or jumps to jumpPoint + AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst+1)); + AddChildBasicBlock(jumpPoint); + + blockDone = true; + break; + } + case OPCODE_SWITCH: + { + bool sawEndSwitch = false; + bool needConnectToParent = false; + const Instruction *jumpPoint = Utils::GetJumpPoint(inst, &sawEndSwitch, &needConnectToParent); + ASSERT(jumpPoint != NULL); + + while (1) + { + if(!sawEndSwitch || needConnectToParent) + AddChildBasicBlock(jumpPoint); + + if (sawEndSwitch) + break; + + // The -1 is a bit of a hack: we always scroll past all labels so rewind to the last one so we'll know to search for the next label + ASSERT((jumpPoint - 1)->eOpcode == OPCODE_CASE || (jumpPoint - 1)->eOpcode == OPCODE_DEFAULT); + jumpPoint = Utils::GetJumpPoint(jumpPoint-1, &sawEndSwitch, &needConnectToParent); + ASSERT(jumpPoint != NULL); + } + blockDone = true; + break; + } + + // Non-conditional jumps + case OPCODE_BREAK: + case OPCODE_ELSE: + case OPCODE_CONTINUE: + case OPCODE_ENDLOOP: + { + const Instruction *jumpPoint = Utils::GetJumpPoint(inst); + ASSERT(jumpPoint != NULL); + + AddChildBasicBlock(jumpPoint); + + blockDone = true; + break; + } + } + + if (blockDone) + break; + + inst++; + } + // In initial building phase, just make m_Reachable equal to m_DEDef + m_Reachable = m_DEDef; + + // Tag the end of the basic block + m_Last = inst; +// printf("Basic Block %d -> %d\n", (int)m_First->id, (int)m_Last->id); +} + + +BasicBlock * BasicBlock::AddChildBasicBlock(const Instruction *psFirst) +{ + // First see if this already exists + BasicBlock *b = m_Graph.GetBasicBlockForInstruction(psFirst); + if (b) + { + // Just add dependency and we're done + b->m_Preceding.insert(m_First); + m_Succeeding.insert(psFirst); + return b; + } + // Otherwise create one. Self-registering and self-connecting + return new BasicBlock(psFirst, m_Graph, m_First); +} + +bool BasicBlock::RebuildReachable() +{ + // Building the Reachable set is an iterative process, where each block gets rebuilt until nothing changes. + // Formula: reachable = this.DEDef union ( each preceding.Reachable() minus this.VarKill()) + + ReachableVariables newReachable = m_DEDef; + bool hasChanges = false; + + // Loop each predecessor + std::for_each(Preceding().begin(), Preceding().end(), [&](const Instruction *instr) + { + const BasicBlock *prec = m_Graph.GetBasicBlockForInstruction(instr); + const ReachableVariables &precReachable = prec->Reachable(); + + // Loop each variable*component + std::for_each(precReachable.begin(), precReachable.end(), [&](const std::pair &itr2) + { + uint32_t regIdx = itr2.first; + const BasicBlock::ReachableDefinitionsPerVariable &defs = itr2.second; + + // Already killed in this block? + if (VarKill().find(regIdx) != VarKill().end()) + return; + + // Only do comparisons against current definitions if we've yet to find any changes + BasicBlock::ReachableDefinitionsPerVariable *currReachablePerVar = 0; + if (!hasChanges) + currReachablePerVar = &m_Reachable[regIdx]; + + BasicBlock::ReachableDefinitionsPerVariable &newReachablePerVar = newReachable[regIdx]; + + // Loop each definition + std::for_each(defs.begin(), defs.end(), [&](const BasicBlock::Definition &d) + { + if (!hasChanges) + { + // Check if already there + if (currReachablePerVar->find(d) == currReachablePerVar->end()) + hasChanges = true; + } + newReachablePerVar.insert(d); + }); // definition + + }); // variable*component + }); // predecessor + + if (hasChanges) + { + std::swap(m_Reachable, newReachable); + } + + return hasChanges; +} + +void BasicBlock::RVarUnion(ReachableVariables &a, const ReachableVariables &b) +{ + std::for_each(b.begin(), b.end(), [&a](const std::pair &rpvPair) + { + uint32_t regIdx = rpvPair.first; + const ReachableDefinitionsPerVariable &rpv = rpvPair.second; + // No previous definitions for this variable? + auto aRPVItr = a.find(regIdx); + if (aRPVItr == a.end()) + { + // Just set the definitions and continue + a[regIdx] = rpv; + return; + } + ReachableDefinitionsPerVariable &aRPV = aRPVItr->second; + aRPV.insert(rpv.begin(), rpv.end()); + }); +} + +#if ENABLE_UNIT_TESTS + +#define UNITY_EXTERNAL_TOOL 1 +#include "Testing.h" // From Runtime/Testing + +UNIT_TEST_SUITE(HLSLccTests) +{ + TEST(ControlFlowGraph_Build_Simple_Works) + { + Instruction inst[] = + { + // MOV t0.xyzw, I0.xyzw + Instruction(0, OPCODE_MOV, 0, 0xf, 0xffffffff, 0xf), + Instruction(1, OPCODE_RET) + }; + + ControlFlowGraph cfg; + const BasicBlock &root = cfg.Build(inst); + + CHECK_EQUAL(&inst[0], root.First()); + CHECK_EQUAL(&inst[1], root.Last()); + + CHECK(root.Preceding().empty()); + CHECK(root.Succeeding().empty()); + + CHECK_EQUAL(4, root.VarKill().size()); + + // Check that all components from t0 are killed + CHECK_EQUAL(1, root.VarKill().count(0)); + CHECK_EQUAL(1, root.VarKill().count(1)); + CHECK_EQUAL(1, root.VarKill().count(2)); + CHECK_EQUAL(1, root.VarKill().count(3)); + + CHECK_EQUAL(&inst[0], root.DEDef().find(0)->second.begin()->m_Instruction); + CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(0)->second.begin()->m_Operand); + CHECK_EQUAL(&inst[0], root.DEDef().find(1)->second.begin()->m_Instruction); + CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(1)->second.begin()->m_Operand); + CHECK_EQUAL(&inst[0], root.DEDef().find(2)->second.begin()->m_Instruction); + CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(2)->second.begin()->m_Operand); + CHECK_EQUAL(&inst[0], root.DEDef().find(3)->second.begin()->m_Instruction); + CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(3)->second.begin()->m_Operand); + + } + + TEST(ControlFlowGraph_Build_If_Works) + { + Instruction inst[] = + { + // B0 + // 0: MOV t1.xyzw, i0.xyzw + Instruction(0, OPCODE_MOV, 1, 0xf, 0xffffffff, 0xf), + // 1: MUL t0, t1, t1 + Instruction(1, OPCODE_MUL, 0, 0xf, 1, 0xf, 1, 0xf), + // 2: IF t1.y + Instruction(2, OPCODE_IF, 1, 2), + // B1 + // 3: MOV o0, t0 + Instruction(3, OPCODE_MOV, 0xffffffff, 0xf, 0, 0xf), + // 4: + Instruction(4, OPCODE_ELSE), + // B2 + // 5: MOV o0, t1 + Instruction(5, OPCODE_MOV, 0xffffffff, 0xf, 1, 0xf), + // 6: + Instruction(6, OPCODE_ENDIF), + // B3 + // 7: + Instruction(7, OPCODE_NOP), + // 8: + Instruction(8, OPCODE_RET) + }; + + ControlFlowGraph cfg; + const BasicBlock &root = cfg.Build(inst); + + CHECK_EQUAL(root.First(), &inst[0]); + CHECK_EQUAL(root.Last(), &inst[2]); + + CHECK(root.Preceding().empty()); + + const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[3]); + const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[5]); + const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[7]); + + CHECK(b1 != NULL); + CHECK(b2 != NULL); + CHECK(b3 != NULL); + + CHECK_EQUAL(&inst[3], b1->First()); + CHECK_EQUAL(&inst[5], b2->First()); + CHECK_EQUAL(&inst[7], b3->First()); + + CHECK_EQUAL(&inst[4], b1->Last()); + CHECK_EQUAL(&inst[6], b2->Last()); + CHECK_EQUAL(&inst[8], b3->Last()); + + CHECK_EQUAL(1, root.Succeeding().count(&inst[3])); + CHECK_EQUAL(1, root.Succeeding().count(&inst[5])); + CHECK_EQUAL(2, root.Succeeding().size()); + + CHECK_EQUAL(1, b1->Preceding().size()); + CHECK_EQUAL(1, b1->Preceding().count(&inst[0])); + + CHECK_EQUAL(1, b2->Preceding().size()); + CHECK_EQUAL(1, b2->Preceding().count(&inst[0])); + + CHECK_EQUAL(2, b3->Preceding().size()); + CHECK_EQUAL(0, b3->Preceding().count(&inst[0])); + CHECK_EQUAL(1, b3->Preceding().count(&inst[3])); + CHECK_EQUAL(1, b3->Preceding().count(&inst[5])); + + // The if block must have upwards-exposed t0 + CHECK_EQUAL(1, b1->UEVar().count(0)); + CHECK_EQUAL(1, b1->UEVar().count(1)); + CHECK_EQUAL(1, b1->UEVar().count(2)); + CHECK_EQUAL(1, b1->UEVar().count(3)); + + // The else block must have upwards-exposed t1 + CHECK_EQUAL(1, b2->UEVar().count(4)); + CHECK_EQUAL(1, b2->UEVar().count(5)); + CHECK_EQUAL(1, b2->UEVar().count(6)); + CHECK_EQUAL(1, b2->UEVar().count(7)); + + CHECK_EQUAL(8, root.VarKill().size()); + + // Check that all components from t0 and t1 are killed + CHECK_EQUAL(1, root.VarKill().count(0)); + CHECK_EQUAL(1, root.VarKill().count(1)); + CHECK_EQUAL(1, root.VarKill().count(2)); + CHECK_EQUAL(1, root.VarKill().count(3)); + + CHECK_EQUAL(1, root.VarKill().count(4)); + CHECK_EQUAL(1, root.VarKill().count(5)); + CHECK_EQUAL(1, root.VarKill().count(6)); + CHECK_EQUAL(1, root.VarKill().count(7)); + + // The expected downwards-exposed definitions: + // B0: t0, t1 + // B1-B3: none + + CHECK_EQUAL(8, root.DEDef().size()); + CHECK_EQUAL(0, b1->DEDef().size()); + CHECK_EQUAL(0, b2->DEDef().size()); + CHECK_EQUAL(0, b3->DEDef().size()); + + CHECK(root.DEDef()==root.Reachable()); + + CHECK(root.Reachable()==b1->Reachable()); + CHECK(root.Reachable()==b2->Reachable()); + CHECK(root.Reachable()==b3->Reachable()); + + + } + + TEST(ControlFlowGraph_Build_SwitchCase_Works) + { + Instruction inst[] = + { + // Start B0 + // i0: MOV t0.x, I0.x + Instruction(0, OPCODE_MOV, 0, 1, 0xffffffff, 1), + // i1: MOVE t1.xyz, I0.yzw + Instruction(1, OPCODE_MOV, 1, 7, 0xffffffff, 0xe), + // i2: MOVE t1.w, t0.x + Instruction(2, OPCODE_MOV, 1, 8, 0xffffffff, 0x1), + // i3: MOVE t2, I0 + Instruction(3, OPCODE_MOV, 2, 0xf, 0xffffffff, 0xf), + // i4: SWITCH t0.y + Instruction(4, OPCODE_SWITCH, 1, 2), + // End B0 + // i5: CASE + Instruction(5, OPCODE_CASE), + // i6: DEFAULT + Instruction(6, OPCODE_DEFAULT), + // Start B1 + // i7: MOC t1.z, t0.x + Instruction(7, OPCODE_MOV, 1, 4, 0, 1), + // i8: CASE + Instruction(8, OPCODE_CASE), + // End B1 + // Start B2 + // i9: MOV t1.z, t2.x + Instruction(9, OPCODE_MOV, 1, 4, 2, 1), + // i10: BREAK + Instruction(10, OPCODE_BREAK), + // End B2 + // i11: CASE + Instruction(11, OPCODE_CASE), + // Start B3 + // i12: MOV t1.z, t2.y + Instruction(12, OPCODE_MOV, 1, 4, 2, 2), + // i13: BREAKC t0.x + Instruction(13, OPCODE_BREAKC, 0, 1), + // End B3 + // i14: CASE + Instruction(14, OPCODE_CASE), + // Start B4 + // i15: MOV t1.z, t2.z + Instruction(15, OPCODE_MOV, 1, 4, 2, 4), + // i16: ENDSWITCH + Instruction(16, OPCODE_ENDSWITCH), + // End B4 + // Start B5 + // i17: MOV o0, t1 + Instruction(17, OPCODE_MOV, 0xffffffff, 0xf, 1, 0xf), + // i18: RET + Instruction(18, OPCODE_RET) + // End B5 + }; + + ControlFlowGraph cfg; + const BasicBlock &root = cfg.Build(inst); + + CHECK_EQUAL(&inst[0], root.First()); + CHECK_EQUAL(&inst[4], root.Last()); + + const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[7]); + const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[9]); + const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[12]); + const BasicBlock *b4 = cfg.GetBasicBlockForInstruction(&inst[15]); + const BasicBlock *b5 = cfg.GetBasicBlockForInstruction(&inst[17]); + + CHECK(b1 != NULL); + CHECK(b2 != NULL); + CHECK(b3 != NULL); + CHECK(b4 != NULL); + CHECK(b5 != NULL); + + // Check instruction ranges + CHECK_EQUAL(&inst[8], b1->Last()); + CHECK_EQUAL(&inst[10], b2->Last()); + CHECK_EQUAL(&inst[13], b3->Last()); + CHECK_EQUAL(&inst[16], b4->Last()); + CHECK_EQUAL(&inst[18], b5->Last()); + + // Nothing before the root, nothing after b5 + CHECK(root.Preceding().empty()); + CHECK(b5->Succeeding().empty()); + + // Check that all connections are there and no others. + + // B0->B1 + // B0->B2 + // B0->B3 + // B0->B4 + CHECK_EQUAL(1, root.Succeeding().count(&inst[7])); + CHECK_EQUAL(1, root.Succeeding().count(&inst[9])); + CHECK_EQUAL(1, root.Succeeding().count(&inst[12])); + CHECK_EQUAL(1, root.Succeeding().count(&inst[15])); + + CHECK_EQUAL(4, root.Succeeding().size()); + + // B1 + + // B1->B2 + CHECK_EQUAL(1, b1->Succeeding().count(&inst[9])); + CHECK_EQUAL(1, b1->Succeeding().size()); + + // B0->B1, reverse + CHECK_EQUAL(1, b1->Preceding().count(&inst[0])); + CHECK_EQUAL(1, b1->Preceding().size()); + + // B2 + + // B2->B5 + CHECK_EQUAL(1, b2->Succeeding().count(&inst[17])); + CHECK_EQUAL(1, b2->Succeeding().size()); + CHECK_EQUAL(1, b2->Preceding().count(&inst[7])); + CHECK_EQUAL(1, b2->Preceding().count(&inst[0])); + CHECK_EQUAL(2, b2->Preceding().size()); + + // B3 + // B3->B4 + // B3->B5 + CHECK_EQUAL(1, b3->Succeeding().count(&inst[15])); + CHECK_EQUAL(1, b3->Succeeding().count(&inst[17])); + CHECK_EQUAL(2, b3->Succeeding().size()); + CHECK_EQUAL(1, b3->Preceding().count(&inst[0])); + CHECK_EQUAL(1, b3->Preceding().size()); + + // B4 + CHECK_EQUAL(1, b4->Succeeding().count(&inst[17])); + CHECK_EQUAL(1, b4->Succeeding().size()); + CHECK_EQUAL(1, b4->Preceding().count(&inst[0])); + CHECK_EQUAL(2, b4->Preceding().size()); + + // B5 + CHECK_EQUAL(0, b5->Succeeding().size()); + CHECK_EQUAL(3, b5->Preceding().size()); //b2, b3, b4 + CHECK_EQUAL(1, b5->Preceding().count(&inst[9])); + CHECK_EQUAL(1, b5->Preceding().count(&inst[12])); + CHECK_EQUAL(1, b5->Preceding().count(&inst[15])); + + + // Verify reachable sets + + CHECK(root.Reachable() == root.DEDef()); + CHECK_EQUAL(9, root.Reachable().size()); + + // B5 should have these reachables: + // t0.x only from b0 + // t1.xy from b0, i1 + // t1.z from b2,i9 + b3,i12 + b4,i15 (the defs from b0 and b1 are killed by b2) + // t1.w from b0, i2 + // t2.xyzw from b0, i3 + + // Cast away const so [] works. + BasicBlock::ReachableVariables &r = (BasicBlock::ReachableVariables &)b5->Reachable(); + + CHECK_EQUAL(9, r.size()); + + CHECK_EQUAL(1, r[0].size()); + CHECK_EQUAL(0, r[1].size()); + CHECK_EQUAL(0, r[2].size()); + CHECK_EQUAL(0, r[3].size()); + CHECK_EQUAL(&inst[0], r[0].begin()->m_Instruction); + + CHECK_EQUAL(1, r[4].size()); + CHECK_EQUAL(1, r[5].size()); + CHECK_EQUAL(3, r[6].size()); + CHECK_EQUAL(1, r[7].size()); + + const BasicBlock::ReachableDefinitionsPerVariable &d = r[6]; + BasicBlock::ReachableDefinitionsPerVariable t; + t.insert(BasicBlock::Definition(&inst[9], &inst[9].asOperands[0])); + t.insert(BasicBlock::Definition(&inst[12], &inst[12].asOperands[0])); + t.insert(BasicBlock::Definition(&inst[15], &inst[15].asOperands[0])); + + CHECK(t == d); + + CHECK_EQUAL(1, r[8].size()); + CHECK_EQUAL(1, r[9].size()); + CHECK_EQUAL(1, r[10].size()); + CHECK_EQUAL(1, r[11].size()); + + + } + + TEST(ControlFlowGraph_Build_Loop_Works) + { + Instruction inst[] = + { + // Start B0 + // i0: MOV t0.x, I0.x + Instruction(0, OPCODE_MOV, 0, 1, 0xffffffff, 1), + // i1: MOVE t1.xy, I0.zw // The .x definition should not make it past the loop, .y should. + Instruction(1, OPCODE_MOV, 1, 3, 0xffffffff, 0xc), + // i2: LOOP + Instruction(2, OPCODE_LOOP, 1, 2), + // End B0 -> B1 + // Begin B1 + // i3: MOV t1.x, t0.x + Instruction(3, OPCODE_MOV, 1, 1, 0, 1), + // i4: BREAKC t0.x + Instruction(4, OPCODE_BREAKC, 0, 1), + // End B1 -> B2, B3 + // Begin B2 + // i5: ADD t0.x, t0.y + Instruction(5, OPCODE_ADD, 0, 1, 0, 2), + // i6: MOV t1.x, t0.x // This should never show up as definition + Instruction(6, OPCODE_MOV, 1, 1, 0, 1), + // i7: ENDLOOP + Instruction(7, OPCODE_ENDLOOP), + // End B2 -> B1 + // Start B3 + // i8: MOV O0.x, t1.x + Instruction(8, OPCODE_MOV, 0xffffffff, 1, 1, 1), + // i9: RET + Instruction(9, OPCODE_RET), + // End B3 + }; + + ControlFlowGraph cfg; + const BasicBlock &root = cfg.Build(inst); + + CHECK_EQUAL(&inst[0], root.First()); + CHECK_EQUAL(&inst[2], root.Last()); + + const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[3]); + const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[5]); + const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[8]); + + CHECK(b1 != NULL); + CHECK(b2 != NULL); + CHECK(b3 != NULL); + + // Check instruction ranges + CHECK_EQUAL(&inst[4], b1->Last()); + CHECK_EQUAL(&inst[7], b2->Last()); + CHECK_EQUAL(&inst[9], b3->Last()); + + // Nothing before the root, nothing after b3 + CHECK(root.Preceding().empty()); + CHECK(b3->Succeeding().empty()); + + // Check that all connections are there and no others. + + // B0->B1 + CHECK_EQUAL(1, root.Succeeding().count(&inst[3])); + CHECK_EQUAL(1, root.Succeeding().size()); + + // B1 + + // B1->B2 + // B1->B3 + CHECK_EQUAL(1, b1->Succeeding().count(&inst[5])); + CHECK_EQUAL(1, b1->Succeeding().count(&inst[8])); + CHECK_EQUAL(2, b1->Succeeding().size()); + + // B0->B1, reverse + CHECK_EQUAL(1, b1->Preceding().count(&inst[0])); + // We may also come from B2 + CHECK_EQUAL(1, b1->Preceding().count(&inst[5])); + CHECK_EQUAL(2, b1->Preceding().size()); + + // B2 + + // B2->B1 + CHECK_EQUAL(1, b2->Succeeding().count(&inst[3])); + CHECK_EQUAL(1, b2->Succeeding().size()); + CHECK_EQUAL(1, b2->Preceding().count(&inst[3])); + CHECK_EQUAL(1, b2->Preceding().size()); + + // B3 + CHECK_EQUAL(1, b3->Preceding().count(&inst[3])); + CHECK_EQUAL(1, b3->Preceding().size()); + + // Verify reachable sets + + + BasicBlock::ReachableVariables t; + + // B0 DEDef and Reachable + t.clear(); + t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0])); + t[4].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); + t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); + + CHECK(root.DEDef() == t); + CHECK(root.Reachable() == root.DEDef()); + + // B1 DEDef and Reachable + t.clear(); + t[4].insert(BasicBlock::Definition(&inst[3], &inst[3].asOperands[0])); + CHECK(b1->DEDef() == t); + + t = b1->DEDef(); + // t0.x from i0, t1.y (but not .x) from i1 + t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0])); + t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); + + // t0.x from i5, but nothing from i6 + t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0])); + CHECK(b1->Reachable() == t); + + // B2 + t.clear(); + t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0])); + t[4].insert(BasicBlock::Definition(&inst[6], &inst[6].asOperands[0])); + CHECK(b2->DEDef() == t); + + t = b2->DEDef(); + t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); + + CHECK(b2->Reachable() == t); + + // B3 + t.clear(); + CHECK(b3->DEDef() == t); + // t0.x from i0, t1.y from i1 + t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0])); + t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); + + // t1.x from i3 + t[4].insert(BasicBlock::Definition(&inst[3], &inst[3].asOperands[0])); + + // t0.x from i5 + t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0])); + + CHECK(b3->Reachable() == t); + } + +} + +#endif + diff --git a/src/ControlFlowGraphUtils.cpp b/src/ControlFlowGraphUtils.cpp new file mode 100644 index 0000000..648b469 --- /dev/null +++ b/src/ControlFlowGraphUtils.cpp @@ -0,0 +1,121 @@ + +#include "ControlFlowGraphUtils.h" + +#include "internal_includes/debug.h" +#include "internal_includes/Instruction.h" +#include "internal_includes/Operand.h" + + + +// Get the next instruction that's not one of CASE, DEFAULT, LOOP, ENDSWITCH +const Instruction *HLSLcc::ControlFlow::Utils::GetNextNonLabelInstruction(const Instruction *psStart, bool *sawEndSwitch /*= 0*/) +{ + const Instruction *inst = psStart; + // Skip CASE/DEFAULT/ENDSWITCH/LOOP labels + while (inst->eOpcode == OPCODE_CASE || inst->eOpcode == OPCODE_DEFAULT || inst->eOpcode == OPCODE_ENDSWITCH || inst->eOpcode == OPCODE_LOOP) + { + // We really shouldn't be seeing ENDSWITCH without sawEndSwitch being set (as in, we're expecting it) + ASSERT(inst->eOpcode != OPCODE_ENDSWITCH || sawEndSwitch != NULL); + if (inst->eOpcode == OPCODE_ENDSWITCH && sawEndSwitch != NULL) + *sawEndSwitch = true; + inst++; + } + return inst; + +} + +// For a given flow-control instruction, find the corresponding jump location: +// If the input is OPCODE_IF, then find the next same-level ELSE or ENDIF +1 +// For ELSE, find same level ENDIF + 1 +// For BREAK/BREAKC, find next ENDLOOP or ENDSWITCH + 1 +// For SWITCH, find next same-level CASE/DEFAULT (skip multiple consecutive case/default labels) or ENDSWITCH + 1 +// For ENDLOOP, find previous same-level LOOP + 1 +// For CASE/DEFAULT, find next same-level CASE/DEFAULT or ENDSWITCH + 1, skip multiple consecutive case/default labels +// For CONTINUE/C the previous LOOP + 1 +// Note that LOOP/ENDSWITCH itself is nothing but a label but it still starts a new basic block. +// Note that CASE labels fall through. +// Always returns the beginning of the next block, so skip multiple CASE/DEFAULT labels etc. +const Instruction * HLSLcc::ControlFlow::Utils::GetJumpPoint(const Instruction *psStart, bool *sawEndSwitch /*= 0*/, bool *needConnectToParent /* = 0*/) +{ + const Instruction *inst = psStart; + int depth = 0; + OPCODE_TYPE op = psStart->eOpcode; + ASSERT(op == OPCODE_IF || op == OPCODE_ELSE || op == OPCODE_BREAK || op == OPCODE_BREAKC + || op == OPCODE_SWITCH || op == OPCODE_CASE || op == OPCODE_DEFAULT + || op == OPCODE_ENDLOOP || op == OPCODE_CONTINUE || op == OPCODE_CONTINUEC); + + switch (op) + { + default: + ASSERT(0); + break; + case OPCODE_IF: + case OPCODE_ELSE: + while (1) + { + inst++; + if ((inst->eOpcode == OPCODE_ELSE || inst->eOpcode == OPCODE_ENDIF) && (depth == 0)) + { + return GetNextNonLabelInstruction(inst + 1, sawEndSwitch); + } + if (inst->eOpcode == OPCODE_IF) + depth++; + if (inst->eOpcode == OPCODE_ENDIF) + depth--; + } + case OPCODE_BREAK: + case OPCODE_BREAKC: + while (1) + { + inst++; + if ((inst->eOpcode == OPCODE_ENDLOOP || inst->eOpcode == OPCODE_ENDSWITCH) && (depth == 0)) + { + return GetNextNonLabelInstruction(inst + 1, sawEndSwitch); + } + if (inst->eOpcode == OPCODE_SWITCH || inst->eOpcode == OPCODE_LOOP) + depth++; + if (inst->eOpcode == OPCODE_ENDSWITCH || inst->eOpcode == OPCODE_ENDLOOP) + depth--; + } + case OPCODE_CONTINUE: + case OPCODE_CONTINUEC: + case OPCODE_ENDLOOP: + while (1) + { + inst--; + if ((inst->eOpcode == OPCODE_LOOP) && (depth == 0)) + { + return GetNextNonLabelInstruction(inst + 1, sawEndSwitch); + } + if (inst->eOpcode == OPCODE_LOOP) + depth--; + if (inst->eOpcode == OPCODE_ENDLOOP) + depth++; + } + case OPCODE_SWITCH: + case OPCODE_CASE: + case OPCODE_DEFAULT: + while (1) + { + inst++; + if ((inst->eOpcode == OPCODE_CASE || inst->eOpcode == OPCODE_DEFAULT || inst->eOpcode == OPCODE_ENDSWITCH) && (depth == 0)) + { + // Note that we'll skip setting sawEndSwitch if inst->eOpcode = OPCODE_ENDSWITCH + // so that BasicBlock::Build can distinguish between there being a direct route + // from SWITCH->ENDSWITCH (CASE followed directly by ENDSWITCH) and not. + + if (inst->eOpcode == OPCODE_ENDSWITCH && sawEndSwitch != 0) + *sawEndSwitch = true; + + return GetNextNonLabelInstruction(inst + 1, needConnectToParent); + } + if (inst->eOpcode == OPCODE_SWITCH) + depth++; + if (inst->eOpcode == OPCODE_ENDSWITCH) + depth--; + } + + } + return 0; +} + diff --git a/src/DataTypeAnalysis.cpp b/src/DataTypeAnalysis.cpp new file mode 100644 index 0000000..dce05cd --- /dev/null +++ b/src/DataTypeAnalysis.cpp @@ -0,0 +1,769 @@ + +#include "internal_includes/debug.h" +#include "internal_includes/tokens.h" +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/DataTypeAnalysis.h" +#include "internal_includes/Shader.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "internal_includes/Instruction.h" +#include + + +// Helper function to set the vector type of 1 or more components in a vector +// If the existing values (in vector we're writing to) are all SVT_VOID, just upgrade the value and we're done +// Otherwise, set all the components in the vector that currently are set to that same value OR are now being written to +// to the "highest" type value (ordering int->uint->float) +static void SetVectorType(std::vector &aeTempVecType, uint32_t regBaseIndex, uint32_t componentMask, SHADER_VARIABLE_TYPE eType, int *psMadeProgress) +{ + int i = 0; + + // Expand the mask to include all components that are used, also upgrade type + for (i = 0; i < 4; i++) + { + if (aeTempVecType[regBaseIndex + i] != SVT_VOID) + { + componentMask |= (1 << i); + eType = HLSLcc::SelectHigherType(eType, aeTempVecType[regBaseIndex + i]); + } + } + + // Now componentMask contains the components we actually need to update and eType may have been changed to something else. + // Write the results + for (i = 0; i < 4; i++) + { + if (componentMask & (1 << i)) + { + if (aeTempVecType[regBaseIndex + i] != eType) + { + aeTempVecType[regBaseIndex + i] = eType; + if (psMadeProgress) + *psMadeProgress = 1; + } + } + } + +} + +static SHADER_VARIABLE_TYPE OperandPrecisionToShaderVariableType(OPERAND_MIN_PRECISION prec, SHADER_VARIABLE_TYPE eDefault) +{ + SHADER_VARIABLE_TYPE eType = eDefault; + switch (prec) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + case OPERAND_MIN_PRECISION_SINT_16: + eType = SVT_INT16; + break; + case OPERAND_MIN_PRECISION_UINT_16: + eType = SVT_UINT16; + break; + case OPERAND_MIN_PRECISION_FLOAT_2_8: + eType = SVT_FLOAT10; + break; + case OPERAND_MIN_PRECISION_FLOAT_16: + eType = SVT_FLOAT16; + break; + default: + ASSERT(0); // Catch this to see what's going on. + break; + } + return eType; + +} + +static void MarkOperandAs(Operand *psOperand, SHADER_VARIABLE_TYPE eType, std::vector &aeTempVecType) +{ + if (psOperand->eType == OPERAND_TYPE_TEMP) + { + const uint32_t ui32RegIndex = psOperand->ui32RegisterNumber * 4; + uint32_t mask = psOperand->GetAccessMask(); + // Adjust type based on operand precision + eType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, eType); + + SetVectorType(aeTempVecType, ui32RegIndex, mask, eType, NULL); + } +} + +static void MarkAllOperandsAs(Instruction* psInst, SHADER_VARIABLE_TYPE eType, std::vector &aeTempVecType) +{ + uint32_t i = 0; + for (i = 0; i < psInst->ui32NumOperands; i++) + { + MarkOperandAs(&psInst->asOperands[i], eType, aeTempVecType); + } +} + +// Mark scalars from CBs. TODO: Do we need to do the same for vec2/3's as well? There may be swizzles involved which make it vec4 or something else again. +static void SetCBOperandComponents(HLSLCrossCompilerContext *psContext, Operand *psOperand) +{ + const ConstantBuffer* psCBuf = NULL; + const ShaderVarType* psVarType = NULL; + int32_t rebase = 0; + bool isArray; + + if (psOperand->eType != OPERAND_TYPE_CONSTANT_BUFFER) + return; + + // Ignore selection modes that access more than one component + switch (psOperand->eSelMode) + { + case OPERAND_4_COMPONENT_SELECT_1_MODE: + break; + case OPERAND_4_COMPONENT_SWIZZLE_MODE: + if (!psOperand->IsSwizzleReplicated()) + return; + break; + case OPERAND_4_COMPONENT_MASK_MODE: + return; + } + + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags); + + if (psVarType->Class == SVC_SCALAR) + psOperand->iNumComponents = 1; + +} + +struct SetPartialDataTypes +{ + SetPartialDataTypes(SHADER_VARIABLE_TYPE *_aeTempVec) + : m_TempVec(_aeTempVec) + {} + SHADER_VARIABLE_TYPE *m_TempVec; + + template void operator()(ItrType inst, Operand *psOperand, uint32_t ui32OperandType) const + { + uint32_t mask = 0; + SHADER_VARIABLE_TYPE *aeTempVecType = m_TempVec; + SHADER_VARIABLE_TYPE newType; + uint32_t i, reg; + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + if (ui32OperandType == FEO_FLAG_SUBOPERAND) + { + // We really shouldn't ever be getting minprecision float indices here + ASSERT(psOperand->eMinPrecision != OPERAND_MIN_PRECISION_FLOAT_16 && psOperand->eMinPrecision != OPERAND_MIN_PRECISION_FLOAT_2_8); + + mask = psOperand->GetAccessMask(); + reg = psOperand->ui32RegisterNumber; + newType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, SVT_INT_AMBIGUOUS); + for (i = 0; i < 4; i++) + { + if (!(mask & (1 << i))) + continue; + if (aeTempVecType[reg * 4 + i] == SVT_VOID) + aeTempVecType[reg * 4 + i] = newType; + } + return; + + } + + if (psOperand->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT) + return; + + mask = psOperand->GetAccessMask(); + reg = psOperand->ui32RegisterNumber; + newType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, SVT_VOID); + ASSERT(newType != SVT_VOID); + for (i = 0; i < 4; i++) + { + if (!(mask & (1 << i))) + continue; + aeTempVecType[reg * 4 + i] = newType; + } + return; + } +}; + +// Write back the temp datatypes into operands. Also mark scalars in constant buffers + +struct WritebackDataTypes +{ + WritebackDataTypes(HLSLCrossCompilerContext *_ctx, SHADER_VARIABLE_TYPE *_aeTempVec) + : m_Context(_ctx) + , m_TempVec(_aeTempVec) + {} + HLSLCrossCompilerContext *m_Context; + SHADER_VARIABLE_TYPE *m_TempVec; + + template void operator()(ItrType inst, Operand *psOperand, uint32_t ui32OperandType) const + { + SHADER_VARIABLE_TYPE *aeTempVecType = m_TempVec; + uint32_t reg, mask, i; + SHADER_VARIABLE_TYPE dtype; + + if (psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER) + SetCBOperandComponents(m_Context, psOperand); + + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + reg = psOperand->ui32RegisterNumber; + mask = psOperand->GetAccessMask(); + dtype = SVT_VOID; + + for (i = 0; i < 4; i++) + { + if (!(mask & (1 << i))) + continue; + + // Check that all components have the same type + ASSERT(dtype == SVT_VOID || dtype == aeTempVecType[reg * 4 + i]); + + dtype = aeTempVecType[reg * 4 + i]; + + ASSERT(dtype != SVT_VOID); + ASSERT(dtype == OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, dtype)); + + psOperand->aeDataType[i] = dtype; + } + + return; + } + +}; + + +void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext, std::vector & instructions, uint32_t ui32TempCount, std::vector &results) +{ + uint32_t i; + Instruction *psFirstInst = &instructions[0]; + Instruction *psInst = psFirstInst; + // Start with void, then move up the chain void->ambiguous int->minprec int/uint->int/uint->minprec float->float + std::vector &aeTempVecType = results; + + aeTempVecType.clear(); + aeTempVecType.resize(ui32TempCount * 4, SVT_VOID); + + if (ui32TempCount == 0) + return; + + // Go through the instructions, pick up partial datatypes, because we at least know those for a fact. + // Also set all suboperands to be integers (they're always used as indices) + ForEachOperand(instructions.begin(), instructions.end(), FEO_FLAG_ALL, SetPartialDataTypes(&aeTempVecType[0])); + + // if (psContext->psShader->ui32MajorVersion <= 3) + { + // First pass, do analysis: deduce the data type based on opcodes, fill out aeTempVecType table + // Only ever to int->float promotion (or int->uint), never the other way around + for (i = 0; i < (uint32_t)instructions.size(); ++i, psInst++) + { + int k = 0; + if (psInst->ui32NumOperands == 0) + continue; +#ifdef _DEBUG + for (k = 0; k < (int)psInst->ui32NumOperands; k++) + { + if (psInst->asOperands[k].eType == OPERAND_TYPE_TEMP) + { + ASSERT(psInst->asOperands[k].ui32RegisterNumber < ui32TempCount); + } + } +#endif + + switch (psInst->eOpcode) + { + // All float-only ops + case OPCODE_ADD: + case OPCODE_DERIV_RTX: + case OPCODE_DERIV_RTY: + case OPCODE_DIV: + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_EXP: + case OPCODE_FRC: + case OPCODE_LOG: + case OPCODE_MAD: + case OPCODE_MIN: + case OPCODE_MAX: + case OPCODE_MUL: + case OPCODE_ROUND_NE: + case OPCODE_ROUND_NI: + case OPCODE_ROUND_PI: + case OPCODE_ROUND_Z: + case OPCODE_RSQ: + case OPCODE_SAMPLE: + case OPCODE_SAMPLE_C: + case OPCODE_SAMPLE_C_LZ: + case OPCODE_SAMPLE_L: + case OPCODE_SAMPLE_D: + case OPCODE_SAMPLE_B: + case OPCODE_SQRT: + case OPCODE_SINCOS: + case OPCODE_LOD: + case OPCODE_GATHER4: + + case OPCODE_DERIV_RTX_COARSE: + case OPCODE_DERIV_RTX_FINE: + case OPCODE_DERIV_RTY_COARSE: + case OPCODE_DERIV_RTY_FINE: + case OPCODE_GATHER4_C: + case OPCODE_GATHER4_PO: + case OPCODE_GATHER4_PO_C: + case OPCODE_RCP: + + MarkAllOperandsAs(psInst, SVT_FLOAT, aeTempVecType); + break; + + // Comparison ops, need to enable possibility for going boolean + case OPCODE_IEQ: + case OPCODE_INE: + MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_INT_AMBIGUOUS, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_INT_AMBIGUOUS, aeTempVecType); + break; + + case OPCODE_AND: + MarkOperandAs(&psInst->asOperands[0], SVT_INT_AMBIGUOUS, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType); + break; + + + case OPCODE_IF: + case OPCODE_BREAKC: + case OPCODE_CALLC: + case OPCODE_CONTINUEC: + case OPCODE_RETC: + MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); + break; + + case OPCODE_ILT: + case OPCODE_IGE: + MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); + break; + + case OPCODE_ULT: + case OPCODE_UGE: + MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_UINT, aeTempVecType); + break; + + // Integer ops that don't care of signedness + case OPCODE_IADD: + case OPCODE_INEG: + case OPCODE_ISHL: + case OPCODE_NOT: + case OPCODE_OR: + case OPCODE_XOR: + case OPCODE_BUFINFO: + case OPCODE_COUNTBITS: + case OPCODE_FIRSTBIT_HI: + case OPCODE_FIRSTBIT_LO: + case OPCODE_FIRSTBIT_SHI: + case OPCODE_BFI: + case OPCODE_BFREV: + case OPCODE_ATOMIC_AND: + case OPCODE_ATOMIC_OR: + case OPCODE_ATOMIC_XOR: + case OPCODE_ATOMIC_CMP_STORE: + case OPCODE_ATOMIC_IADD: + case OPCODE_IMM_ATOMIC_IADD: + case OPCODE_IMM_ATOMIC_AND: + case OPCODE_IMM_ATOMIC_OR: + case OPCODE_IMM_ATOMIC_XOR: + case OPCODE_IMM_ATOMIC_EXCH: + case OPCODE_IMM_ATOMIC_CMP_EXCH: + + + MarkAllOperandsAs(psInst, SVT_INT_AMBIGUOUS, aeTempVecType); + break; + + + // Integer ops + case OPCODE_IMAD: + case OPCODE_IMAX: + case OPCODE_IMIN: + case OPCODE_IMUL: + case OPCODE_ISHR: + case OPCODE_IBFE: + + case OPCODE_ATOMIC_IMAX: + case OPCODE_ATOMIC_IMIN: + case OPCODE_IMM_ATOMIC_IMAX: + case OPCODE_IMM_ATOMIC_IMIN: + MarkAllOperandsAs(psInst, SVT_INT, aeTempVecType); + break; + + + // uint ops + case OPCODE_UDIV: + case OPCODE_UMUL: + case OPCODE_UMAD: + case OPCODE_UMAX: + case OPCODE_UMIN: + case OPCODE_USHR: + case OPCODE_UADDC: + case OPCODE_USUBB: + case OPCODE_ATOMIC_UMAX: + case OPCODE_ATOMIC_UMIN: + case OPCODE_IMM_ATOMIC_UMAX: + case OPCODE_IMM_ATOMIC_UMIN: + case OPCODE_IMM_ATOMIC_ALLOC: + case OPCODE_IMM_ATOMIC_CONSUME: + MarkAllOperandsAs(psInst, SVT_UINT, aeTempVecType); + break; + case OPCODE_UBFE: + MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[3], SVT_UINT, aeTempVecType); + break; + + // Need special handling + case OPCODE_FTOI: + case OPCODE_FTOU: + MarkOperandAs(&psInst->asOperands[0], psInst->eOpcode == OPCODE_FTOI ? SVT_INT : SVT_UINT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType); + break; + + case OPCODE_GE: + case OPCODE_LT: + case OPCODE_EQ: + case OPCODE_NE: + + MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_FLOAT, aeTempVecType); + break; + + case OPCODE_ITOF: + case OPCODE_UTOF: + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], psInst->eOpcode == OPCODE_ITOF ? SVT_INT : SVT_UINT, aeTempVecType); + break; + + case OPCODE_LD: + case OPCODE_LD_MS: + // TODO: Would need to know the sampler return type + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); + break; + + case OPCODE_MOVC: + MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType); + case OPCODE_SWAPC: + MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType); + break; + + case OPCODE_RESINFO: + { + if (psInst->eResInfoReturnType != RESINFO_INSTRUCTION_RETURN_UINT) + MarkAllOperandsAs(psInst, SVT_FLOAT, aeTempVecType); + break; + } + + case OPCODE_SAMPLE_INFO: + // TODO decode the _uint flag + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + break; + + case OPCODE_SAMPLE_POS: + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + break; + + + case OPCODE_LD_UAV_TYPED: + // translates to gvec4 loadImage(gimage i, ivec p). + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p + break; + + case OPCODE_STORE_UAV_TYPED: + // translates to storeImage(gimage i, ivec p, gvec4 data) + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p + MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); // gvec4 data + break; + + case OPCODE_LD_RAW: + if (psInst->asOperands[2].eType == OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); + else + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); + break; + + case OPCODE_STORE_RAW: + if (psInst->asOperands[0].eType == OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); + else + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); + break; + + case OPCODE_LD_STRUCTURED: + MarkOperandAs(&psInst->asOperands[0], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); + break; + + case OPCODE_STORE_STRUCTURED: + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[3], SVT_INT, aeTempVecType); + break; + + case OPCODE_F32TOF16: + case OPCODE_F16TOF32: + // TODO + ASSERT(0); + break; + + + + // No-operands, should never get here anyway + /* case OPCODE_BREAK: + case OPCODE_CALL: + case OPCODE_CASE: + case OPCODE_CONTINUE: + case OPCODE_CUT: + case OPCODE_DEFAULT: + case OPCODE_DISCARD: + case OPCODE_ELSE: + case OPCODE_EMIT: + case OPCODE_EMITTHENCUT: + case OPCODE_ENDIF: + case OPCODE_ENDLOOP: + case OPCODE_ENDSWITCH: + + case OPCODE_LABEL: + case OPCODE_LOOP: + case OPCODE_CUSTOMDATA: + case OPCODE_NOP: + case OPCODE_RET: + case OPCODE_SWITCH: + case OPCODE_DCL_RESOURCE: // DCL* opcodes have + case OPCODE_DCL_CONSTANT_BUFFER: // custom operand formats. + case OPCODE_DCL_SAMPLER: + case OPCODE_DCL_INDEX_RANGE: + case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: + case OPCODE_DCL_GS_INPUT_PRIMITIVE: + case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: + case OPCODE_DCL_INPUT: + case OPCODE_DCL_INPUT_SGV: + case OPCODE_DCL_INPUT_SIV: + case OPCODE_DCL_INPUT_PS: + case OPCODE_DCL_INPUT_PS_SGV: + case OPCODE_DCL_INPUT_PS_SIV: + case OPCODE_DCL_OUTPUT: + case OPCODE_DCL_OUTPUT_SGV: + case OPCODE_DCL_OUTPUT_SIV: + case OPCODE_DCL_TEMPS: + case OPCODE_DCL_INDEXABLE_TEMP: + case OPCODE_DCL_GLOBAL_FLAGS: + + + case OPCODE_HS_DECLS: // token marks beginning of HS sub-shader + case OPCODE_HS_CONTROL_POINT_PHASE: // token marks beginning of HS sub-shader + case OPCODE_HS_FORK_PHASE: // token marks beginning of HS sub-shader + case OPCODE_HS_JOIN_PHASE: // token marks beginning of HS sub-shader + + case OPCODE_EMIT_STREAM: + case OPCODE_CUT_STREAM: + case OPCODE_EMITTHENCUT_STREAM: + case OPCODE_INTERFACE_CALL: + + + case OPCODE_DCL_STREAM: + case OPCODE_DCL_FUNCTION_BODY: + case OPCODE_DCL_FUNCTION_TABLE: + case OPCODE_DCL_INTERFACE: + + case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: + case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: + case OPCODE_DCL_TESS_DOMAIN: + case OPCODE_DCL_TESS_PARTITIONING: + case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: + case OPCODE_DCL_HS_MAX_TESSFACTOR: + case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + + case OPCODE_DCL_THREAD_GROUP: + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: + case OPCODE_DCL_RESOURCE_RAW: + case OPCODE_DCL_RESOURCE_STRUCTURED: + case OPCODE_SYNC: + + // TODO + case OPCODE_DADD: + case OPCODE_DMAX: + case OPCODE_DMIN: + case OPCODE_DMUL: + case OPCODE_DEQ: + case OPCODE_DGE: + case OPCODE_DLT: + case OPCODE_DNE: + case OPCODE_DMOV: + case OPCODE_DMOVC: + case OPCODE_DTOF: + case OPCODE_FTOD: + + case OPCODE_EVAL_SNAPPED: + case OPCODE_EVAL_SAMPLE_INDEX: + case OPCODE_EVAL_CENTROID: + + case OPCODE_DCL_GS_INSTANCE_COUNT: + + case OPCODE_ABORT: + case OPCODE_DEBUG_BREAK:*/ + + default: + break; + } + } + } + + { + int madeProgress = 0; + // Next go through MOV and MOVC and propagate the data type of whichever parameter we happen to have + do + { + madeProgress = 0; + psInst = psFirstInst; + for (i = 0; i < (uint32_t)instructions.size(); ++i, psInst++) + { + if (psInst->eOpcode == OPCODE_MOV || psInst->eOpcode == OPCODE_MOVC) + { + // Figure out the data type + uint32_t k; + SHADER_VARIABLE_TYPE dataType = SVT_VOID; + int foundImmediate = 0; + for (k = 0; k < psInst->ui32NumOperands; k++) + { + uint32_t mask, j; + if (psInst->eOpcode == OPCODE_MOVC && k == 1) + continue; // Ignore the condition operand, it's always int + + if (psInst->asOperands[k].eType == OPERAND_TYPE_IMMEDIATE32) + { + foundImmediate = 1; + continue; // We don't know the data type of immediates yet, but if this is the only one found, mark as int, it'll get promoted later if needed + } + + if (psInst->asOperands[k].eType != OPERAND_TYPE_TEMP) + { + dataType = psInst->asOperands[k].GetDataType(psContext); + break; + } + + if (psInst->asOperands[k].eModifier != OPERAND_MODIFIER_NONE) + { + // If any modifiers are used in MOV or MOVC, that automatically is treated as float. + dataType = SVT_FLOAT; + break; + } + + mask = psInst->asOperands[k].GetAccessMask(); + for (j = 0; j < 4; j++) + { + if (!(mask & (1 << j))) + continue; + if (aeTempVecType[psInst->asOperands[k].ui32RegisterNumber * 4 + j] != SVT_VOID) + { + dataType = HLSLcc::SelectHigherType(dataType, aeTempVecType[psInst->asOperands[k].ui32RegisterNumber * 4 + j]); + } + } + } + + if (foundImmediate && dataType == SVT_VOID) + dataType = SVT_INT; + + if (dataType != SVT_VOID) + { + // Found data type, write to all operands + // First adjust it to not have precision qualifiers in it + switch (dataType) + { + case SVT_FLOAT10: + case SVT_FLOAT16: + dataType = SVT_FLOAT; + break; + case SVT_INT12: + case SVT_INT16: + dataType = SVT_INT; + break; + case SVT_UINT16: + case SVT_UINT8: + dataType = SVT_UINT; + break; + default: + break; + } + for (k = 0; k < psInst->ui32NumOperands; k++) + { + uint32_t mask; + if (psInst->eOpcode == OPCODE_MOVC && k == 1) + continue; // Ignore the condition operand, it's always int + + if (psInst->asOperands[k].eType != OPERAND_TYPE_TEMP) + continue; + if (psInst->asOperands[k].eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT) + continue; + + mask = psInst->asOperands[k].GetAccessMask(); + SetVectorType(aeTempVecType, psInst->asOperands[k].ui32RegisterNumber * 4, mask, dataType, &madeProgress); + + } + + } + } + } + } while (madeProgress != 0); + } + + + // translate forced_int and int_ambiguous back to int + for (i = 0; i < ui32TempCount * 4; i++) + { + if (aeTempVecType[i] == SVT_FORCED_INT || aeTempVecType[i] == SVT_INT_AMBIGUOUS) + aeTempVecType[i] = SVT_INT; + } + + ForEachOperand(instructions.begin(), instructions.end(), FEO_FLAG_ALL, WritebackDataTypes(psContext, &aeTempVecType[0])); + + // Propagate boolean data types over logical operators + bool didProgress = false; + do + { + didProgress = false; + std::for_each(instructions.begin(), instructions.end(), [&didProgress, &psContext, &aeTempVecType](Instruction &i) + { + if ((i.eOpcode == OPCODE_AND || i.eOpcode == OPCODE_OR) + && (i.asOperands[1].GetDataType(psContext) == SVT_BOOL && i.asOperands[2].GetDataType(psContext) == SVT_BOOL) + && (i.asOperands[0].eType == OPERAND_TYPE_TEMP && i.asOperands[0].GetDataType(psContext) != SVT_BOOL)) + { + // Check if all uses see only this define + bool isStandalone = true; + std::for_each(i.m_Uses.begin(), i.m_Uses.end(), [&isStandalone](Instruction::Use &u) + { + if (u.m_Op->m_Defines.size() > 1) + isStandalone = false; + }); + + if (isStandalone) + { + didProgress = true; + // Change data type of this and all uses + i.asOperands[0].aeDataType[0] = i.asOperands[0].aeDataType[1] = i.asOperands[0].aeDataType[2] = i.asOperands[0].aeDataType[3] = SVT_BOOL; + uint32_t reg = i.asOperands[0].ui32RegisterNumber; + aeTempVecType[reg * 4 + 0] = aeTempVecType[reg * 4 + 1] = aeTempVecType[reg * 4 + 2] = aeTempVecType[reg * 4 + 3] = SVT_BOOL; + + std::for_each(i.m_Uses.begin(), i.m_Uses.end(), [](Instruction::Use &u) + { + u.m_Op->aeDataType[0] = u.m_Op->aeDataType[1] = u.m_Op->aeDataType[2] = u.m_Op->aeDataType[3] = SVT_BOOL; + }); + } + } + }); + } while (didProgress); + +} diff --git a/src/Declaration.cpp b/src/Declaration.cpp new file mode 100644 index 0000000..b9b4d42 --- /dev/null +++ b/src/Declaration.cpp @@ -0,0 +1,2 @@ + +#include "internal_includes/Declaration.h" \ No newline at end of file diff --git a/src/HLSLCrossCompilerContext.cpp b/src/HLSLCrossCompilerContext.cpp new file mode 100644 index 0000000..73efca7 --- /dev/null +++ b/src/HLSLCrossCompilerContext.cpp @@ -0,0 +1,253 @@ + +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/Shader.h" +#include "internal_includes/DataTypeAnalysis.h" +#include "internal_includes/UseDefineChains.h" +#include "internal_includes/Declaration.h" +#include "internal_includes/debug.h" +#include "internal_includes/Translator.h" +#include "internal_includes/ControlFlowGraph.h" +#include + +void HLSLCrossCompilerContext::DoDataTypeAnalysis(ShaderPhase *psPhase) +{ + size_t ui32DeclCount = psPhase->psDecl.size(); + uint32_t i; + + psPhase->psTempDeclaration = NULL; + psPhase->ui32OrigTemps = 0; + psPhase->ui32TotalTemps = 0; + + // Retrieve the temp decl count + for (i = 0; i < ui32DeclCount; ++i) + { + if (psPhase->psDecl[i].eOpcode == OPCODE_DCL_TEMPS) + { + psPhase->ui32TotalTemps = psPhase->psDecl[i].value.ui32NumTemps; + psPhase->psTempDeclaration = &psPhase->psDecl[i]; + break; + } + } + + if (psPhase->ui32TotalTemps == 0) + return; + + psPhase->ui32OrigTemps = psPhase->ui32TotalTemps; + + // The split table is a table containing the index of the original register this register was split out from, or 0xffffffff + // Format: lowest 16 bits: original register. bits 16-23: rebase (eg value of 1 means .yzw was changed to .xyz): bits 24-31: component count + psPhase->pui32SplitInfo.clear(); + psPhase->pui32SplitInfo.resize(psPhase->ui32TotalTemps * 2, 0xffffffff); + + // Build use-define chains and split temps based on those. + { + DefineUseChains duChains; + UseDefineChains udChains; + + BuildUseDefineChains(psPhase->psInst, psPhase->ui32TotalTemps, duChains, udChains, psPhase->GetCFG()); + + CalculateStandaloneDefinitions(duChains, psPhase->ui32TotalTemps); + + // Only do sampler precision downgrade on pixel shaders. + if (psShader->eShaderType == PIXEL_SHADER) + UpdateSamplerPrecisions(psShader->sInfo, duChains, psPhase->ui32TotalTemps); + + UDSplitTemps(&psPhase->ui32TotalTemps, duChains, udChains, psPhase->pui32SplitInfo); + + WriteBackUsesAndDefines(duChains); + } + + HLSLcc::DataTypeAnalysis::SetDataTypes(this, psPhase->psInst, psPhase->ui32TotalTemps, psPhase->peTempTypes); + + if (psPhase->psTempDeclaration && (psPhase->ui32OrigTemps != psPhase->ui32TotalTemps)) + psPhase->psTempDeclaration->value.ui32NumTemps = psPhase->ui32TotalTemps; +} + +void HLSLCrossCompilerContext::ClearDependencyData() +{ + + switch (psShader->eShaderType) + { + case PIXEL_SHADER: + { + psDependencies->ClearCrossDependencyData(); + } + case HULL_SHADER: + { + psDependencies->eTessPartitioning = TESSELLATOR_PARTITIONING_UNDEFINED; + psDependencies->eTessOutPrim = TESSELLATOR_OUTPUT_UNDEFINED; + break; + } + default: + break; + } +} + +void HLSLCrossCompilerContext::AddIndentation() +{ + int i; + bstring glsl = *currentGLSLString; + for (i = 0; i < indent; ++i) + { + bcatcstr(glsl, " "); + } +} + + +std::string HLSLCrossCompilerContext::GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const +{ + std::ostringstream oss; + const ShaderInfo::InOutSignature* psIn = NULL; + int regSpace = psOperand->GetRegisterSpace(this); + + if (iIgnoreRedirect == 0) + { + if ((regSpace == 0 && psShader->asPhases[currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe) + || + (regSpace == 1 && psShader->asPhases[currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)) + { + oss << "phase" << currentPhase << "_Input" << regSpace << "_" << psOperand->ui32RegisterNumber; + if (piRebase) + *piRebase = 0; + return oss.str(); + } + } + + if (regSpace == 0) + psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn, true); + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn, true); + + if (psIn && piRebase) + *piRebase = psIn->iRebase; + + std::string res = ""; + bool skipPrefix = false; + if (psTranslator->TranslateSystemValue(psOperand, psIn, res, puiIgnoreSwizzle, psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0, true, &skipPrefix)) + { + if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0) && !skipPrefix) + return inputPrefix + res; + else + return res; + } + + ASSERT(psIn != NULL); + oss << inputPrefix << (regSpace == 1 ? "patch" : "") << psIn->semanticName << psIn->ui32SemanticIndex; + return oss.str(); +} + + +std::string HLSLCrossCompilerContext::GetDeclaredOutputName(const Operand* psOperand, + int* piStream, + uint32_t *puiIgnoreSwizzle, + int *piRebase, + int iIgnoreRedirect) const +{ + std::ostringstream oss; + const ShaderInfo::InOutSignature* psOut = NULL; + int regSpace = psOperand->GetRegisterSpace(this); + + if (iIgnoreRedirect == 0) + { + if ((regSpace == 0 && psShader->asPhases[currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe) + || (regSpace == 1 && psShader->asPhases[currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)) + { + oss << "phase" << currentPhase << "_Output" << regSpace << "_" << psOperand->ui32RegisterNumber; + if (piRebase) + *piRebase = 0; + return oss.str(); + } + } + + if (regSpace == 0) + psShader->sInfo.GetOutputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), psShader->ui32CurrentVertexOutputStream, &psOut, true); + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psOut, true); + + + if (psOut && piRebase) + *piRebase = psOut->iRebase; + + if (psOut && (psOut->isIndexed.find(currentPhase) != psOut->isIndexed.end())) + { + // Need to route through temp output variable + oss << "phase" << currentPhase << "_Output" << regSpace << "_" << psOut->indexStart.find(currentPhase)->second; + if (!psOperand->m_SubOperands[0].get()) + { + oss << "[" << psOperand->ui32RegisterNumber << "]"; + } + if (piRebase) + *piRebase = 0; + return oss.str(); + } + + std::string res = ""; + if (psTranslator->TranslateSystemValue(psOperand, psOut, res, puiIgnoreSwizzle, psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber], false)) + { + if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0)) + return outputPrefix + res; + else + return res; + } + ASSERT(psOut != NULL); + + oss << outputPrefix << (regSpace == 1 ? "patch" : "") << psOut->semanticName << psOut->ui32SemanticIndex; + return oss.str(); +} + +bool HLSLCrossCompilerContext::OutputNeedsDeclaring(const Operand* psOperand, const int count) +{ + char compMask = (char)psOperand->ui32CompMask; + int regSpace = psOperand->GetRegisterSpace(this); + uint32_t startIndex = psOperand->ui32RegisterNumber + (psShader->ui32CurrentVertexOutputStream * 1024); // Assume less than 1K input streams + ASSERT(psShader->ui32CurrentVertexOutputStream < 4); + + // First check for various builtins, mostly depth-output ones. + if (psShader->eShaderType == PIXEL_SHADER) + { + if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL || + psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL) + { + return true; + } + + if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH) + { + // GL doesn't need declaration, Metal does. + return psShader->eTargetLanguage == LANG_METAL; + } + } + + // Needs declaring if any of the components hasn't been already declared + if ((compMask & ~psShader->acOutputDeclared[regSpace][startIndex]) != 0) + { + int offset; + const ShaderInfo::InOutSignature* psSignature = NULL; + + if (psOperand->eSpecialName == NAME_UNDEFINED) + { + // Need to fetch the actual comp mask + if (regSpace == 0) + psShader->sInfo.GetOutputSignatureFromRegister( + psOperand->ui32RegisterNumber, + psOperand->ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + else + psShader->sInfo.GetPatchConstantSignatureFromRegister( + psOperand->ui32RegisterNumber, + psOperand->ui32CompMask, + &psSignature); + + compMask = (char)psSignature->ui32Mask; + } + for (offset = 0; offset < count; offset++) + { + psShader->acOutputDeclared[regSpace][startIndex + offset] |= compMask; + } + return true; + } + + return false; +} \ No newline at end of file diff --git a/src/HLSLcc.cpp b/src/HLSLcc.cpp new file mode 100644 index 0000000..8b3bac1 --- /dev/null +++ b/src/HLSLcc.cpp @@ -0,0 +1,212 @@ + +#include "hlslcc.h" + +#include +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "internal_includes/toGLSL.h" +#include "internal_includes/toMetal.h" +#include "internal_includes/Shader.h" +#include "internal_includes/decode.h" + + +#ifndef GL_VERTEX_SHADER_ARB +#define GL_VERTEX_SHADER_ARB 0x8B31 +#endif +#ifndef GL_FRAGMENT_SHADER_ARB +#define GL_FRAGMENT_SHADER_ARB 0x8B30 +#endif +#ifndef GL_GEOMETRY_SHADER +#define GL_GEOMETRY_SHADER 0x8DD9 +#endif +#ifndef GL_TESS_EVALUATION_SHADER +#define GL_TESS_EVALUATION_SHADER 0x8E87 +#endif +#ifndef GL_TESS_CONTROL_SHADER +#define GL_TESS_CONTROL_SHADER 0x8E88 +#endif +#ifndef GL_COMPUTE_SHADER +#define GL_COMPUTE_SHADER 0x91B9 +#endif + + +HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader, + unsigned int flags, + GLLang language, + const GlExtensions *extensions, + GLSLCrossDependencyData* dependencies, + HLSLccSamplerPrecisionInfo& samplerPrecisions, + HLSLccReflection& reflectionCallbacks, + GLSLShader* result) +{ + uint32_t* tokens; + char* glslcstr = NULL; + int GLSLShaderType = GL_FRAGMENT_SHADER_ARB; + int success = 0; + uint32_t i; + + tokens = (uint32_t*)shader; + + std::auto_ptr psShader(DecodeDXBC(tokens, flags)); + + if (psShader.get()) + { + HLSLCrossCompilerContext sContext(reflectionCallbacks); + + // Add shader precisions from the list + psShader->sInfo.AddSamplerPrecisions(samplerPrecisions); + + if (psShader->ui32MajorVersion <= 3) + { + flags &= ~HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS; + } + + sContext.psShader = psShader.get(); + sContext.flags = flags; + sContext.psDependencies = dependencies; + + for (i = 0; i < psShader->asPhases.size(); ++i) + { + psShader->asPhases[i].hasPostShaderCode = 0; + } + + if (language == LANG_METAL) + { + // Tessellation or geometry shaders are not supported + if (psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER || psShader->eShaderType == GEOMETRY_SHADER) + { + result->sourceCode = ""; + return 0; + } + ToMetal translator(&sContext); + if(!translator.Translate()) + { + bdestroy(sContext.glsl); + for (i = 0; i < psShader->asPhases.size(); ++i) + { + bdestroy(psShader->asPhases[i].postShaderCode); + bdestroy(psShader->asPhases[i].earlyMain); + } + + return 0; + } + } + else + { + ToGLSL translator(&sContext); + language = translator.SetLanguage(language); + translator.SetExtensions(extensions); + if (!translator.Translate()) + { + bdestroy(sContext.glsl); + for (i = 0; i < psShader->asPhases.size(); ++i) + { + bdestroy(psShader->asPhases[i].postShaderCode); + bdestroy(psShader->asPhases[i].earlyMain); + } + + return 0; + } + } + + switch (psShader->eShaderType) + { + case VERTEX_SHADER: + { + GLSLShaderType = GL_VERTEX_SHADER_ARB; + break; + } + case GEOMETRY_SHADER: + { + GLSLShaderType = GL_GEOMETRY_SHADER; + break; + } + case DOMAIN_SHADER: + { + GLSLShaderType = GL_TESS_EVALUATION_SHADER; + break; + } + case HULL_SHADER: + { + GLSLShaderType = GL_TESS_CONTROL_SHADER; + break; + } + case COMPUTE_SHADER: + { + GLSLShaderType = GL_COMPUTE_SHADER; + break; + } + default: + { + break; + } + } + + glslcstr = bstr2cstr(sContext.glsl, '\0'); + result->sourceCode = glslcstr; + bcstrfree(glslcstr); + + bdestroy(sContext.glsl); + for (i = 0; i < psShader->asPhases.size(); ++i) + { + bdestroy(psShader->asPhases[i].postShaderCode); + bdestroy(psShader->asPhases[i].earlyMain); + } + + result->reflection = psShader->sInfo; + + result->textureSamplers = psShader->textureSamplers; + + success = 1; + } + + shader = 0; + tokens = 0; + + /* Fill in the result struct */ + + result->shaderType = GLSLShaderType; + result->GLSLLanguage = language; + + return success; +} + +HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename, + unsigned int flags, + GLLang language, + const GlExtensions *extensions, + GLSLCrossDependencyData* dependencies, + HLSLccSamplerPrecisionInfo& samplerPrecisions, + HLSLccReflection& reflectionCallbacks, + GLSLShader* result) +{ + FILE* shaderFile; + int length; + size_t readLength; + std::vector shader; + int success = 0; + + shaderFile = fopen(filename, "rb"); + + if (!shaderFile) + { + return 0; + } + + fseek(shaderFile, 0, SEEK_END); + length = ftell(shaderFile); + fseek(shaderFile, 0, SEEK_SET); + + shader.reserve(length + 1); + + readLength = fread(&shader[0], 1, length, shaderFile); + + fclose(shaderFile); + shaderFile = 0; + + shader[readLength] = '\0'; + + success = TranslateHLSLFromMem(&shader[0], flags, language, extensions, dependencies, samplerPrecisions, reflectionCallbacks, result); + + return success; +} + diff --git a/src/HLSLccToolkit.cpp b/src/HLSLccToolkit.cpp new file mode 100644 index 0000000..763279b --- /dev/null +++ b/src/HLSLccToolkit.cpp @@ -0,0 +1,482 @@ + +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/debug.h" +#include "internal_includes/toGLSLOperand.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "internal_includes/Shader.h" +#include +#include + +namespace HLSLcc +{ + uint32_t GetNumberBitsSet(uint32_t a) + { + // Calculate number of bits in a + // Taken from https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64 + // Works only up to 14 bits (we're only using up to 4) + return (a * 0x200040008001ULL & 0x111111111111111ULL) % 0xf; + } + + uint32_t SVTTypeToFlag(const SHADER_VARIABLE_TYPE eType) + { + if (eType == SVT_FLOAT16) + { + return TO_FLAG_FORCE_HALF; + } + if (eType == SVT_UINT || eType == SVT_UINT16) + { + return TO_FLAG_UNSIGNED_INTEGER; + } + else if (eType == SVT_INT || eType == SVT_INT16 || eType == SVT_INT12) + { + return TO_FLAG_INTEGER; + } + else if (eType == SVT_BOOL) + { + return TO_FLAG_BOOL; + } + else + { + return TO_FLAG_NONE; + } + } + + SHADER_VARIABLE_TYPE TypeFlagsToSVTType(const uint32_t typeflags) + { + if (typeflags & TO_FLAG_FORCE_HALF) + return SVT_FLOAT16; + if (typeflags & (TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT)) + return SVT_INT; + if (typeflags & (TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT)) + return SVT_UINT; + if (typeflags & TO_FLAG_BOOL) + return SVT_BOOL; + return SVT_FLOAT; + } + + const char * GetConstructorForTypeGLSL(const SHADER_VARIABLE_TYPE eType, + const int components, bool useGLSLPrecision) + { + static const char * const uintTypes[] = { " ", "uint", "uvec2", "uvec3", "uvec4" }; + static const char * const uint16Types[] = { " ", "mediump uint", "mediump uvec2", "mediump uvec3", "mediump uvec4" }; + static const char * const intTypes[] = { " ", "int", "ivec2", "ivec3", "ivec4" }; + static const char * const int16Types[] = { " ", "mediump int", "mediump ivec2", "mediump ivec3", "mediump ivec4" }; + static const char * const int12Types[] = { " ", "lowp int", "lowp ivec2", "lowp ivec3", "lowp ivec4" }; + static const char * const floatTypes[] = { " ", "float", "vec2", "vec3", "vec4" }; + static const char * const float16Types[] = { " ", "mediump float", "mediump vec2", "mediump vec3", "mediump vec4" }; + static const char * const float10Types[] = { " ", "lowp float", "lowp vec2", "lowp vec3", "lowp vec4" }; + static const char * const boolTypes[] = { " ", "bool", "bvec2", "bvec3", "bvec4" }; + + ASSERT(components >= 1 && components <= 4); + + switch (eType) + { + case SVT_UINT: + return uintTypes[components]; + case SVT_UINT16: + return useGLSLPrecision ? uint16Types[components] : uintTypes[components]; + case SVT_INT: + return intTypes[components]; + case SVT_INT16: + return useGLSLPrecision ? int16Types[components] : intTypes[components]; + case SVT_INT12: + return useGLSLPrecision ? int12Types[components] : intTypes[components]; + case SVT_FLOAT: + return floatTypes[components]; + case SVT_FLOAT16: + return useGLSLPrecision ? float16Types[components] : floatTypes[components]; + case SVT_FLOAT10: + return useGLSLPrecision ? float10Types[components] : floatTypes[components]; + case SVT_BOOL: + return boolTypes[components]; + default: + ASSERT(0); + return " "; + } + } + + const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType, + const int components) + { + static const char * const uintTypes[] = { " ", "uint", "uint2", "uint3", "uint4" }; + static const char * const ushortTypes[] = { " ", "ushort", "ushort2", "ushort3", "ushort4" }; + static const char * const intTypes[] = { " ", "int", "int2", "int3", "int4" }; + static const char * const shortTypes[] = { " ", "short", "short2", "short3", "short4" }; + static const char * const floatTypes[] = { " ", "float", "float2", "float3", "float4" }; + static const char * const halfTypes[] = { " ", "half", "half2", "half3", "half4" }; + static const char * const boolTypes[] = { " ", "bool", "bool2", "bool3", "bool4" }; + + ASSERT(components >= 1 && components <= 4); + + switch (eType) + { + case SVT_UINT: + return uintTypes[components]; + case SVT_UINT16: + return ushortTypes[components]; + case SVT_INT: + return intTypes[components]; + case SVT_INT16: + case SVT_INT12: + return shortTypes[components]; + case SVT_FLOAT: + return floatTypes[components]; + case SVT_FLOAT16: + case SVT_FLOAT10: + return halfTypes[components]; + case SVT_BOOL: + return boolTypes[components]; + default: + ASSERT(0); + return " "; + } + } + + const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision /* = true*/) + { + if (psContext->psShader->eTargetLanguage == LANG_METAL) + return GetConstructorForTypeMetal(eType, components); + else + return GetConstructorForTypeGLSL(eType, components, useGLSLPrecision); + } + + std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows) + { + std::string result; + std::ostringstream oss; + if (psContext->psShader->eTargetLanguage == LANG_METAL) + { + switch (eBaseType) + { + case SVT_FLOAT: + oss << "float" << columns << "x" << rows; + break; + case SVT_FLOAT16: + case SVT_FLOAT10: + oss << "half" << columns << "x" << rows; + break; + default: + ASSERT(0); + break; + } + } + else + { + switch (eBaseType) + { + case SVT_FLOAT: + oss << "mat" << columns << "x" << rows; + break; + case SVT_FLOAT16: + oss << "mediump mat" << columns << "x" << rows; + break; + case SVT_FLOAT10: + oss << "lowp mat" << columns << "x" << rows; + break; + default: + ASSERT(0); + break; + } + + } + result = oss.str(); + return result; + } + + void AddSwizzleUsingElementCount(bstring dest, uint32_t count) + { + if (count == 4) + return; + if (count) + { + bcatcstr(dest, "."); + bcatcstr(dest, "x"); + count--; + } + if (count) + { + bcatcstr(dest, "y"); + count--; + } + if (count) + { + bcatcstr(dest, "z"); + count--; + } + if (count) + { + bcatcstr(dest, "w"); + count--; + } + } + + // Calculate the bits set in mask + int WriteMaskToComponentCount(uint32_t writeMask) + { + // In HLSL bytecode writemask 0 also means everything + if (writeMask == 0) + return 4; + + return (int)GetNumberBitsSet(writeMask); + } + + uint32_t BuildComponentMaskFromElementCount(int count) + { + // Translate numComponents into bitmask + // 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15 + return (1 << count) - 1; + } + + // Returns true if we can do direct assignment between types (mostly for mediump<->highp floats etc) + bool DoAssignmentDataTypesMatch(SHADER_VARIABLE_TYPE dest, SHADER_VARIABLE_TYPE src) + { + if (src == dest) + return true; + + if ((dest == SVT_FLOAT || dest == SVT_FLOAT10 || dest == SVT_FLOAT16) && + (src == SVT_FLOAT || src == SVT_FLOAT10 || src == SVT_FLOAT16)) + return true; + + if ((dest == SVT_INT || dest == SVT_INT12 || dest == SVT_INT16) && + (src == SVT_INT || src == SVT_INT12 || src == SVT_INT16)) + return true; + + if ((dest == SVT_UINT || dest == SVT_UINT16) && + (src == SVT_UINT || src == SVT_UINT16)) + return true; + + return false; + } + + uint32_t ResourceReturnTypeToFlag(const RESOURCE_RETURN_TYPE eType) + { + if (eType == RETURN_TYPE_SINT) + { + return TO_FLAG_INTEGER; + } + else if (eType == RETURN_TYPE_UINT) + { + return TO_FLAG_UNSIGNED_INTEGER; + } + else + { + return TO_FLAG_NONE; + } + } + + SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec) + { + if (eType == RETURN_TYPE_SINT) + { + switch (ePrec) + { + default: + return SVT_INT; + case REFLECT_RESOURCE_PRECISION_LOWP: + return SVT_INT12; + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + return SVT_INT16; + } + } + else if (eType == RETURN_TYPE_UINT) + { + switch (ePrec) + { + default: + return SVT_UINT; + case REFLECT_RESOURCE_PRECISION_LOWP: + return SVT_UINT8; + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + return SVT_UINT16; + } + } + else + { + switch (ePrec) + { + default: + return SVT_FLOAT; + case REFLECT_RESOURCE_PRECISION_LOWP: + return SVT_FLOAT10; + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + return SVT_FLOAT16; + } + } + } + + + uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount) + { + return TO_AUTO_EXPAND_TO_VEC2 << (elemCount - 2); + } + + // Returns true if the operation is commutative + bool IsOperationCommutative(int eOpCode) + { + switch ((OPCODE_TYPE)eOpCode) + { + case OPCODE_DADD: + case OPCODE_IADD: + case OPCODE_ADD: + case OPCODE_MUL: + case OPCODE_IMUL: + case OPCODE_OR: + case OPCODE_AND: + return true; + default: + return false; + }; + } + + // Returns true if operands are identical, only cares about temp registers currently. + bool AreTempOperandsIdentical(const Operand * psA, const Operand * psB) + { + if (!psA || !psB) + return 0; + + if (psA->eType != OPERAND_TYPE_TEMP || psB->eType != OPERAND_TYPE_TEMP) + return 0; + + if (psA->eModifier != psB->eModifier) + return 0; + + if (psA->iNumComponents != psB->iNumComponents) + return 0; + + if (psA->ui32RegisterNumber != psB->ui32RegisterNumber) + return 0; + + if (psA->eSelMode != psB->eSelMode) + return 0; + + if (psA->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && psA->ui32CompMask != psB->ui32CompMask) + return 0; + + if (psA->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE && psA->aui32Swizzle[0] != psB->aui32Swizzle[0]) + return 0; + + if (psA->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE && std::equal(&psA->aui32Swizzle[0], &psA->aui32Swizzle[4], &psB->aui32Swizzle[0])) + return 0; + + return 1; + } + + bool IsAddOneInstruction(const Instruction *psInst) + { + if (psInst->eOpcode != OPCODE_IADD) + return false; + if (psInst->asOperands[0].eType != OPERAND_TYPE_TEMP) + return false; + + if (psInst->asOperands[1].eType == OPERAND_TYPE_TEMP) + { + if (psInst->asOperands[1].ui32RegisterNumber != psInst->asOperands[0].ui32RegisterNumber) + return false; + if (psInst->asOperands[2].eType != OPERAND_TYPE_IMMEDIATE32) + return false; + + if (*(int *)&psInst->asOperands[2].afImmediates[0] != 1) + return false; + } + else + { + if (psInst->asOperands[1].eType != OPERAND_TYPE_IMMEDIATE32) + return false; + if (psInst->asOperands[2].eType != OPERAND_TYPE_TEMP) + return false; + + if (psInst->asOperands[2].ui32RegisterNumber != psInst->asOperands[0].ui32RegisterNumber) + return false; + + if (*(int *)&psInst->asOperands[1].afImmediates[0] != 1) + return false; + } + return true; + } + + + int GetNumTextureDimensions(int /* RESOURCE_DIMENSION */ eResDim) + { + switch ((RESOURCE_DIMENSION)eResDim) + { + case RESOURCE_DIMENSION_TEXTURE1D: + return 1; + case RESOURCE_DIMENSION_TEXTURE2D: + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + case RESOURCE_DIMENSION_TEXTURECUBE: + return 2; + case RESOURCE_DIMENSION_TEXTURE3D: + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + return 3; + default: + ASSERT(0); + break; + } + return 0; + } + + // Returns the "more important" type of a and b, currently int < uint < float + SHADER_VARIABLE_TYPE SelectHigherType(SHADER_VARIABLE_TYPE a, SHADER_VARIABLE_TYPE b) + { +#define DO_CHECK(type) if( a == type || b == type ) return type + + // Priority ordering + DO_CHECK(SVT_FLOAT16); + DO_CHECK(SVT_FLOAT10); + DO_CHECK(SVT_UINT16); + DO_CHECK(SVT_UINT8); + DO_CHECK(SVT_INT16); + DO_CHECK(SVT_INT12); + DO_CHECK(SVT_FORCED_INT); + DO_CHECK(SVT_FLOAT); + DO_CHECK(SVT_UINT); + DO_CHECK(SVT_INT); + DO_CHECK(SVT_INT_AMBIGUOUS); + +#undef DO_CHECK + // After these just rely on ordering. + return a > b ? a : b; + } + + // Returns true if a direct constructor can convert src->dest + bool CanDoDirectCast(SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest) + { + // uint<->int<->bool conversions possible + if ((src == SVT_INT || src == SVT_UINT || src == SVT_BOOL || src == SVT_INT12 || src == SVT_INT16 || src == SVT_UINT16) && + (dest == SVT_INT || dest == SVT_UINT || dest == SVT_BOOL || dest == SVT_INT12 || dest == SVT_INT16 || dest == SVT_UINT16)) + return true; + + // float<->double possible + if ((src == SVT_FLOAT || src == SVT_DOUBLE || src == SVT_FLOAT16 || src == SVT_FLOAT10) && + (dest == SVT_FLOAT || dest == SVT_DOUBLE || dest == SVT_FLOAT16 || dest == SVT_FLOAT10)) + return true; + + return false; + } + +#ifdef _MSC_VER +#define fpcheck(x) (_isnan(x) || !_finite(x)) +#else +#define fpcheck(x) (std::isnan(x) || std::isinf(x)) +#endif + + // Helper function to print floats with full precision + void PrintFloat(bstring b, float f) + { + bstring temp; + int ePos; + int pointPos; + + temp = bformat("%.9g", f); + ePos = bstrchrp(temp, 'e', 0); + pointPos = bstrchrp(temp, '.', 0); + + bconcat(b, temp); + bdestroy(temp); + + if (ePos < 0 && pointPos < 0 && !fpcheck(f)) + bcatcstr(b, ".0"); + } +}; + diff --git a/src/HLSLccTypes.natvis b/src/HLSLccTypes.natvis new file mode 100644 index 0000000..6456304 --- /dev/null +++ b/src/HLSLccTypes.natvis @@ -0,0 +1,10 @@ + + + + {{ id={id} op={eOpcode} o0={asOperands[0]}, o1={asOperands[1]}}} + + + {{ type={eType}, reg={ui32RegisterNumber} }} + + + \ No newline at end of file diff --git a/src/Instruction.cpp b/src/Instruction.cpp new file mode 100644 index 0000000..b24160a --- /dev/null +++ b/src/Instruction.cpp @@ -0,0 +1,351 @@ + +#include "internal_includes/Instruction.h" +#include "internal_includes/debug.h" +#include "include/ShaderInfo.h" + +// Returns the result swizzle operand for an instruction, or NULL if all src operands have swizzles +static Operand *GetSrcSwizzleOperand(Instruction *psInst) +{ + switch (psInst->eOpcode) + { + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_NOP: + case OPCODE_SWAPC: + case OPCODE_SAMPLE_C: + case OPCODE_SAMPLE_C_LZ: + ASSERT(0); + return NULL; + + // Normal arithmetics, all srcs have swizzles + case OPCODE_ADD: + case OPCODE_AND: + case OPCODE_DERIV_RTX: + case OPCODE_DERIV_RTX_COARSE: + case OPCODE_DERIV_RTX_FINE: + case OPCODE_DERIV_RTY: + case OPCODE_DERIV_RTY_COARSE: + case OPCODE_DERIV_RTY_FINE: + case OPCODE_DIV: + case OPCODE_EQ: + case OPCODE_EXP: + case OPCODE_FRC: + case OPCODE_FTOI: + case OPCODE_FTOU: + case OPCODE_GE: + case OPCODE_IADD: + case OPCODE_IEQ: + case OPCODE_IGE: + case OPCODE_ILT: + case OPCODE_IMAD: + case OPCODE_IMAX: + case OPCODE_IMIN: + case OPCODE_IMUL: + case OPCODE_INE: + case OPCODE_INEG: + case OPCODE_ITOF: + case OPCODE_LOG: + case OPCODE_LT: + case OPCODE_MAD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MOV: + case OPCODE_MUL: + case OPCODE_NE: + case OPCODE_NOT: + case OPCODE_OR: + case OPCODE_ROUND_NE: + case OPCODE_ROUND_NI: + case OPCODE_ROUND_PI: + case OPCODE_ROUND_Z: + case OPCODE_RSQ: + case OPCODE_SINCOS: + case OPCODE_SQRT: + case OPCODE_UDIV: + case OPCODE_UGE: + case OPCODE_ULT: + case OPCODE_UMAD: + case OPCODE_UMAX: + case OPCODE_UMIN: + case OPCODE_UMUL: + case OPCODE_UTOF: + case OPCODE_XOR: + + case OPCODE_BFI: + case OPCODE_BFREV: + case OPCODE_COUNTBITS: + case OPCODE_DADD: + case OPCODE_DDIV: + case OPCODE_DEQ: + case OPCODE_DFMA: + case OPCODE_DGE: + case OPCODE_DLT: + case OPCODE_DMAX: + case OPCODE_DMIN: + case OPCODE_DMUL: + case OPCODE_DMOV: + case OPCODE_DNE: + case OPCODE_DRCP: + case OPCODE_DTOF: + case OPCODE_F16TOF32: + case OPCODE_F32TOF16: + case OPCODE_FIRSTBIT_HI: + case OPCODE_FIRSTBIT_LO: + case OPCODE_FIRSTBIT_SHI: + case OPCODE_FTOD: + case OPCODE_IBFE: + case OPCODE_RCP: + case OPCODE_UADDC: + case OPCODE_UBFE: + case OPCODE_USUBB: + case OPCODE_MOVC: + case OPCODE_DMOVC: + return NULL; + + // Special cases: + case OPCODE_GATHER4: + case OPCODE_GATHER4_C: + case OPCODE_LD: + case OPCODE_LD_MS: + case OPCODE_LOD: + case OPCODE_LD_UAV_TYPED: + case OPCODE_LD_RAW: + case OPCODE_SAMPLE: + case OPCODE_SAMPLE_B: + case OPCODE_SAMPLE_L: + case OPCODE_SAMPLE_D: + case OPCODE_RESINFO: + return &psInst->asOperands[2]; + + case OPCODE_GATHER4_PO: + case OPCODE_GATHER4_PO_C: + case OPCODE_LD_STRUCTURED: + return &psInst->asOperands[3]; + + case OPCODE_ISHL: + case OPCODE_ISHR: + case OPCODE_USHR: + return &psInst->asOperands[1]; + + default: + ASSERT(0); + return NULL; + + + } + +} + +// Tweak the source operands of an instruction so that the rebased write mask will still work +static void DoSrcOperandRebase(Operand *psOperand, uint32_t rebase) +{ + uint32_t i; + switch (psOperand->eSelMode) + { + default: + case OPERAND_4_COMPONENT_MASK_MODE: + ASSERT(psOperand->ui32CompMask == 0 || psOperand->ui32CompMask == OPERAND_4_COMPONENT_MASK_ALL); + + // Special case for immediates, they do not have swizzles + if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32) + { + if (psOperand->iNumComponents > 1) + std::copy(&psOperand->afImmediates[rebase], &psOperand->afImmediates[4], &psOperand->afImmediates[0]); + return; + } + if (psOperand->eType == OPERAND_TYPE_IMMEDIATE64) + { + if (psOperand->iNumComponents > 1) + std::copy(&psOperand->adImmediates[rebase], &psOperand->adImmediates[4], &psOperand->adImmediates[0]); + return; + } + + // Need to change this to swizzle + psOperand->eSelMode = OPERAND_4_COMPONENT_SWIZZLE_MODE; + psOperand->ui32Swizzle = 0; + for (i = 0; i < 4 - rebase; i++) + psOperand->aui32Swizzle[i] = i + rebase; + for (; i < 4; i++) + psOperand->aui32Swizzle[i] = rebase; // The first actual input. + break; + case OPERAND_4_COMPONENT_SELECT_1_MODE: + // Nothing to do + break; + case OPERAND_4_COMPONENT_SWIZZLE_MODE: + for (i = rebase; i < 4; i++) + psOperand->aui32Swizzle[i - rebase] = psOperand->aui32Swizzle[i]; + break; + } +} + +void Instruction::ChangeOperandTempRegister(Operand *psOperand, uint32_t oldReg, uint32_t newReg, uint32_t compMask, uint32_t flags, uint32_t rebase) +{ + uint32_t i = 0; + uint32_t accessMask = 0; + int isDestination = 0; + Operand *psSwizzleOperand = NULL; + + if (flags & UD_CHANGE_SUBOPERANDS) + { + for (i = 0; i < MAX_SUB_OPERANDS; i++) + { + if (psOperand->m_SubOperands[i].get()) + ChangeOperandTempRegister(psOperand->m_SubOperands[i].get(), oldReg, newReg, compMask, UD_CHANGE_ALL, rebase); + } + } + + if ((flags & UD_CHANGE_MAIN_OPERAND) == 0) + return; + + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + if (psOperand->ui32RegisterNumber != oldReg) + return; + + accessMask = psOperand->GetAccessMask(); + // If this operation touches other components than the one(s) we're splitting, skip it + if ((accessMask & (~compMask)) != 0) + { + // Verify that we've not messed up in reachability analysis. + // This would mean that we've encountered an instruction that accesses + // a component in multi-component mode and we're supposed to treat it as single-use only. + // Now that we track operands we can bring this back + ASSERT((accessMask & compMask) == 0); + return; + } + +#if 0 + printf("Updating operand %d with access mask %X\n", (int)psOperand->id, accessMask); +#endif + psOperand->ui32RegisterNumber = newReg; + + if (rebase == 0) + return; + + // Update component mask. Note that we don't need to do anything to the suboperands. They do not affect destination writemask. + switch (psOperand->eSelMode) + { + case OPERAND_4_COMPONENT_MASK_MODE: + { + uint32_t oldMask = psOperand->ui32CompMask; + if (oldMask == 0) + oldMask = OPERAND_4_COMPONENT_MASK_ALL; + + // Check that we're not losing any information + ASSERT((oldMask >> rebase) << rebase == oldMask); + psOperand->ui32CompMask = (oldMask >> rebase); + break; + } + case OPERAND_4_COMPONENT_SELECT_1_MODE: + ASSERT(psOperand->aui32Swizzle[0] >= rebase); + psOperand->aui32Swizzle[0] -= rebase; + break; + case OPERAND_4_COMPONENT_SWIZZLE_MODE: + { + for (i = 0; i < 4; i++) + { + // Note that this rebase is different from the one done for source operands + ASSERT(psOperand->aui32Swizzle[i] >= rebase); + psOperand->aui32Swizzle[i] -= rebase; + } + break; + } + default: + ASSERT(0); + + } + + // Tweak operand datatypes + std::copy(&psOperand->aeDataType[rebase], &psOperand->aeDataType[4], &psOperand->aeDataType[0]); + + // If this operand is a destination, we'll need to tweak sources as well + for (i = 0; i < ui32FirstSrc; i++) + { + if (psOperand == &asOperands[i]) + { + isDestination = 1; + break; + } + } + + if (isDestination == 0) + return; + + // Nasty corner case of 2 destinations, not supported if both targets are written + ASSERT((ui32FirstSrc < 2) || (asOperands[0].eType == OPERAND_TYPE_NULL) || (asOperands[1].eType == OPERAND_TYPE_NULL)); + + // If we made it this far, we're rebasing a destination temp (and the only destination), need to tweak sources depending on the instruction + switch (eOpcode) + { + // The opcodes that do not need tweaking: + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_BUFINFO: + case OPCODE_SAMPLE_C: + case OPCODE_SAMPLE_C_LZ: + return; + + default: + psSwizzleOperand = GetSrcSwizzleOperand(this); // Null means tweak all source operands + if (psSwizzleOperand) + { + DoSrcOperandRebase(psSwizzleOperand, rebase); + return; + } + else + { + for (i = ui32FirstSrc; i < ui32NumOperands; i++) + { + DoSrcOperandRebase(&asOperands[i], rebase); + } + } + return; + } + +} + + +// Returns nonzero if psInst is a sample instruction and the sampler has medium or low precision +bool Instruction::IsPartialPrecisionSamplerInstruction(const ShaderInfo &info, OPERAND_MIN_PRECISION *pType) const +{ + const Operand *op; + const ResourceBinding *psBinding = NULL; + OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT; + switch (eOpcode) + { + default: + return false; + case OPCODE_SAMPLE: + case OPCODE_SAMPLE_B: + case OPCODE_SAMPLE_L: + case OPCODE_SAMPLE_D: + case OPCODE_SAMPLE_C: + case OPCODE_SAMPLE_C_LZ: + break; + } + + op = &asOperands[3]; + ASSERT(op->eType == OPERAND_TYPE_SAMPLER); + + info.GetResourceFromBindingPoint(RGROUP_SAMPLER, op->ui32RegisterNumber, &psBinding); + if (!psBinding) + { + /* Try to look from texture group */ + info.GetResourceFromBindingPoint(RGROUP_TEXTURE, op->ui32RegisterNumber, &psBinding); + } + + sType = Operand::ResourcePrecisionToOperandPrecision(psBinding ? psBinding->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN); + + if (sType == OPERAND_MIN_PRECISION_DEFAULT) + return false; + + if (pType) + *pType = sType; + + return true; +} + + diff --git a/src/LoopTransform.cpp b/src/LoopTransform.cpp new file mode 100644 index 0000000..e7039a1 --- /dev/null +++ b/src/LoopTransform.cpp @@ -0,0 +1,363 @@ + +#include "src/internal_includes/LoopTransform.h" +#include "src/internal_includes/Shader.h" +#include "src/internal_includes/debug.h" +#include +#include +#include + +namespace HLSLcc +{ + + struct LoopInfo + { + public: + LoopInfo() : m_StartLoop(0), m_EndLoop(0), m_ExitPoints(), m_IsSwitch(false) {} + + Instruction * m_StartLoop; // OPCODE_LOOP + Instruction * m_EndLoop; // OPCODE_ENDLOOP that matches the LOOP above. + std::vector m_ExitPoints; // Any BREAK/RET/BREAKC instructions within the same loop depth + bool m_IsSwitch; // True if this is a switch-case and not a LOOP/ENDLOOP pair. Used as a helper when parsing. + }; + + typedef std::list Loops; + + // Build a loopinfo array of all the loops in this shader phase + void BuildLoopInfo(ShaderPhase &phase, Loops &res) + { + using namespace std; + res.clear(); + + Instruction *i = &phase.psInst[0]; + // A stack of loopinfo elements (stored in res) + list loopStack; + + // Storage for dummy LoopInfo elements to be used for switch-cases. We don't want them cluttering the Loops list so store them here. + list dummyLIForSwitches; + + while (i != &*phase.psInst.end()) + { + if (i->eOpcode == OPCODE_LOOP) + { + LoopInfo *currLoopInfo = &*res.insert(res.end(), LoopInfo()); + currLoopInfo->m_StartLoop = i; + loopStack.push_front(currLoopInfo); + } + else if(i->eOpcode == OPCODE_ENDLOOP) + { + ASSERT(!loopStack.empty()); + LoopInfo *li = *loopStack.begin(); + loopStack.pop_front(); + li->m_EndLoop = i; + } + else if (i->eOpcode == OPCODE_SWITCH) + { + // Create a dummy entry into the stack + LoopInfo *li = &*dummyLIForSwitches.insert(dummyLIForSwitches.end(), LoopInfo()); + li->m_IsSwitch = true; + loopStack.push_front(li); + } + else if (i->eOpcode == OPCODE_ENDSWITCH) + { + ASSERT(!loopStack.empty()); + LoopInfo *li = *loopStack.begin(); + loopStack.pop_front(); + ASSERT(li->m_IsSwitch); + } + else if (i->eOpcode == OPCODE_BREAK || i->eOpcode == OPCODE_BREAKC) + { + // Get the current loopstack head + ASSERT(!loopStack.empty()); + LoopInfo *li = *loopStack.begin(); + // Ignore breaks from switch-cases + if(!li->m_IsSwitch) + { + li->m_ExitPoints.push_back(i); + } + } + i++; + } + + } + + // Returns true if the given instruction is a non-vectorized int or uint comparison instruction that reads from at least one temp and writes to a temp + static bool IsScalarTempComparisonInstruction(const Instruction *i) + { + switch (i->eOpcode) + { + default: + return false; + case OPCODE_IGE: + case OPCODE_ILT: + case OPCODE_IEQ: + case OPCODE_INE: + case OPCODE_UGE: + case OPCODE_ULT: + break; + } + + if (i->asOperands[0].eType != OPERAND_TYPE_TEMP) + return false; + + int tempOp = -1; + if (i->asOperands[1].eType == OPERAND_TYPE_TEMP) + tempOp = 1; + else if (i->asOperands[2].eType == OPERAND_TYPE_TEMP) + tempOp = 2; + + // Also reject comparisons where we compare temp.x vs temp.y + if (i->asOperands[1].eType == OPERAND_TYPE_TEMP && i->asOperands[2].eType == OPERAND_TYPE_TEMP && i->asOperands[1].ui32RegisterNumber == i->asOperands[2].ui32RegisterNumber) + return false; + + if (tempOp == -1) + return false; + + if (i->asOperands[0].GetNumSwizzleElements() != 1) + return false; + + return true; + } + + // Returns true iff both instructions perform identical operation. For the purposes of Loop transformation, we only consider operations of type tX = tX imm32 + static bool AreInstructionsIdentical(const Instruction *a, const Instruction *b) + { + if (a->eOpcode != b->eOpcode) + return false; + ASSERT(a->ui32NumOperands == b->ui32NumOperands); + uint32_t dstReg = 0; + if (a->asOperands[0].eType != OPERAND_TYPE_TEMP) + return false; + dstReg = a->asOperands[0].ui32RegisterNumber; + + for (uint32_t i = 0; i < a->ui32NumOperands; i++) + { + const Operand &aop = a->asOperands[i]; + const Operand &bop = b->asOperands[i]; + if (aop.eType != bop.eType) + return false; + + if (aop.GetAccessMask() != bop.GetAccessMask()) + return false; + + if (aop.GetNumSwizzleElements() != 1) + return false; + + if (aop.eType == OPERAND_TYPE_TEMP) + { + if (aop.ui32RegisterNumber != bop.ui32RegisterNumber) + return false; + if (aop.ui32RegisterNumber != dstReg) + return false; + } + else if (aop.eType == OPERAND_TYPE_IMMEDIATE32) + { + if (memcmp(aop.afImmediates, bop.afImmediates, 4 * sizeof(float)) != 0) + return false; + } + } + return true; + } + + // Attempt to transform a single loop into a for-statement + static void AttemptLoopTransform(ShaderPhase &phase, LoopInfo &li) + { + // In order to transform a loop into a for, the following has to hold: + // - The loop must start with a comparison instruction where one of the src operands is a temp (induction variable), followed by OPCODE_BREAKC. + // - The loop must end with an arithmetic operation (SUB or ADD) where the dest operand is the same temp as one of the sources in the comparison instruction above + // Additionally, if the loop induction variable is initialized before the start of the loop and it has only uses inside the LOOP/ENDLOOP pair, we can declare that inside the for statement. + // Also, the loop induction variable must be standalone (as in, never used as part of a larger vector) + + Instruction *cmpInst = li.m_StartLoop + 1; + + if (!IsScalarTempComparisonInstruction(cmpInst)) + return; + + Instruction *breakInst = li.m_StartLoop + 2; + if (breakInst->eOpcode != OPCODE_BREAKC) + return; + if (breakInst->asOperands[0].eType != OPERAND_TYPE_TEMP) + return; + if (breakInst->asOperands[0].ui32RegisterNumber != cmpInst->asOperands[0].ui32RegisterNumber) + return; + + // Check that the comparison result isn't used anywhere else + if (cmpInst->m_Uses.size() != 1) + return; + + ASSERT(cmpInst->m_Uses[0].m_Inst == breakInst); + + // Ok, at least we have the comparison + breakc combo at top. Try to find the induction variable + uint32_t inductionVarIdx = 0; + + Instruction *lastInst = li.m_EndLoop - 1; + if (lastInst->eOpcode != OPCODE_IADD) + return; + if (lastInst->asOperands[0].eType != OPERAND_TYPE_TEMP) + return; + + if (lastInst->asOperands[0].GetNumSwizzleElements() != 1) + return; + + uint32_t indVar = lastInst->asOperands[0].ui32RegisterNumber; + // Verify that the induction variable actually matches. + if (cmpInst->asOperands[1].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[1].ui32RegisterNumber == indVar) + inductionVarIdx = 1; + else if (cmpInst->asOperands[2].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[2].ui32RegisterNumber == indVar) + inductionVarIdx = 2; + else + return; + + // Verify that we also read from the induction variable in the last instruction + if (!((lastInst->asOperands[1].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[1].ui32RegisterNumber == indVar) || + (lastInst->asOperands[2].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[2].ui32RegisterNumber == indVar))) + return; + + // Nvidia compiler bug workaround: The shader compiler tries to be smart and unrolls constant loops, + // but then fails miserably if the loop variable is used as an index to UAV loads/stores or some other cases ("array access too complex") + // This is also triggered when the driver optimizer sees "simple enough" arithmetics (whatever that is) done on the loop variable before indexing. + // So, disable for-loop transformation altogether whenever we see a UAV load or store inside a loop. + for (auto itr = li.m_StartLoop; itr != li.m_EndLoop; itr++) + { + switch (itr->eOpcode) + { + case OPCODE_LD_RAW: + case OPCODE_LD_STRUCTURED: + case OPCODE_LD_UAV_TYPED: + case OPCODE_STORE_RAW: + case OPCODE_STORE_STRUCTURED: + case OPCODE_STORE_UAV_TYPED: + return; // Nope, can't do a for, not even a partial one. + default: + break; + } + } + + // One more thing to check: The comparison input may only see 1 definition that originates from inside the loop range: the one in lastInst. + // Anything else means that there's a continue statement, or another break/breakc and that means that lastInst wouldn't get called. + // Of course, if all those instructions are identical, then it's fine. + // Ideally, if there's only one definition that's from outside the loop range, then we can use that as the initializer, as well. + + Instruction *initializer = NULL; + std::vector definitionsOutsideRange; + std::vector definitionsInsideRange; + std::for_each(cmpInst->asOperands[inductionVarIdx].m_Defines.begin(), cmpInst->asOperands[inductionVarIdx].m_Defines.end(), [&](const Operand::Define &def) + { + if (def.m_Inst < li.m_StartLoop || def.m_Inst > li.m_EndLoop) + definitionsOutsideRange.push_back(&def); + else + definitionsInsideRange.push_back(&def); + }); + + if (definitionsInsideRange.size() != 1) + { + // All definitions must be identical + for (std::vector::iterator itr = definitionsInsideRange.begin()+1; itr != definitionsInsideRange.end(); itr++) + { + if (!AreInstructionsIdentical((*itr)->m_Inst, definitionsInsideRange[0]->m_Inst)) + return; + } + } + + ASSERT(definitionsOutsideRange.size() > 0); + if (definitionsOutsideRange.size() == 1) + initializer = definitionsOutsideRange[0]->m_Inst; + + // Initializer must only write to one component + if (initializer && initializer->asOperands[0].GetNumSwizzleElements() != 1) + initializer = 0; + + // Check that the initializer is only used within the range so we can move it to for statement + if (initializer) + { + bool hasUsesOutsideRange = false; + std::for_each(initializer->m_Uses.begin(), initializer->m_Uses.end(), [&](const Instruction::Use &u) + { + if (u.m_Inst < li.m_StartLoop || u.m_Inst > li.m_EndLoop) + hasUsesOutsideRange = true; + }); + // Has outside uses? we cannot pull that up to the for statement + if (hasUsesOutsideRange) + initializer = 0; + } + + // Check that the loop adder instruction only has uses inside the loop range, otherwise we cannot move the initializer either + if (initializer) + { + bool cannotDoInitializer = false; + for (auto itr = lastInst->m_Uses.begin(); itr != lastInst->m_Uses.end(); itr++) + { + const Instruction::Use &u = *itr; + if (u.m_Inst < li.m_StartLoop || u.m_Inst > li.m_EndLoop) + { + cannotDoInitializer = true; + break; + } + // Also check that the uses are not vector ops (temp splitting has already pulled everything to .x if this is a standalone var) + if (u.m_Op->GetAccessMask() != 1) + { + cannotDoInitializer = true; + break; + } + } + // Has outside uses? we cannot pull that up to the for statement + if (cannotDoInitializer) + initializer = 0; + } + + + if (initializer) + { + // We can declare the initializer in the for loop header, allocate a new number for it and change all uses into that. + uint32_t newRegister = phase.m_NextFreeTempRegister++; + li.m_StartLoop->m_InductorRegister = newRegister; + std::for_each(initializer->m_Uses.begin(), initializer->m_Uses.end(), [newRegister](const Instruction::Use &u) + { + u.m_Op->m_ForLoopInductorName = newRegister; + }); + // Also tweak the destinations for cmpInst, and lastInst + if (cmpInst->asOperands[1].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[1].ui32RegisterNumber == initializer->asOperands[0].ui32RegisterNumber) + cmpInst->asOperands[1].m_ForLoopInductorName = newRegister; + else + cmpInst->asOperands[2].m_ForLoopInductorName = newRegister; + + if (lastInst->asOperands[1].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[1].ui32RegisterNumber == initializer->asOperands[0].ui32RegisterNumber) + lastInst->asOperands[1].m_ForLoopInductorName = newRegister; + else + lastInst->asOperands[2].m_ForLoopInductorName = newRegister; + + lastInst->asOperands[0].m_ForLoopInductorName = newRegister; + initializer->asOperands[0].m_ForLoopInductorName = newRegister; + } + + // This loop can be transformed to for-loop. Do the necessary magicks. + li.m_StartLoop->m_LoopInductors[0] = initializer; + li.m_StartLoop->m_LoopInductors[1] = cmpInst; + li.m_StartLoop->m_LoopInductors[2] = breakInst; + li.m_StartLoop->m_LoopInductors[3] = lastInst; + + if (initializer) + initializer->m_SkipTranslation = true; + cmpInst->m_SkipTranslation = true; + breakInst->m_SkipTranslation = true; + lastInst->m_SkipTranslation = true; + + } + + void DoLoopTransform(ShaderPhase &phase) + { + Loops loops; + BuildLoopInfo(phase, loops); + + std::for_each(loops.begin(), loops.end(), [&phase](LoopInfo &li) + { + // Some sanity checks: start and end points must be initialized, we shouldn't have any switches here, and each loop must have at least one exit point + // Also that there's at least 2 instructions in loop body + ASSERT(li.m_StartLoop != 0); + ASSERT(li.m_EndLoop != 0); + ASSERT(li.m_EndLoop > li.m_StartLoop + 2); + ASSERT(!li.m_IsSwitch); + ASSERT(!li.m_ExitPoints.empty()); + AttemptLoopTransform(phase, li); + }); + } +}; \ No newline at end of file diff --git a/src/Operand.cpp b/src/Operand.cpp new file mode 100644 index 0000000..ff80fec --- /dev/null +++ b/src/Operand.cpp @@ -0,0 +1,586 @@ + +#include "internal_includes/Operand.h" +#include "internal_includes/debug.h" +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/Shader.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "internal_includes/Instruction.h" + +uint32_t Operand::GetAccessMask() const +{ + int i; + uint32_t accessMask = 0; + // TODO: Destination writemask can (AND DOES) affect access from sources, but do it conservatively for now. + switch (eSelMode) + { + default: + case OPERAND_4_COMPONENT_MASK_MODE: + // Update access mask + accessMask = ui32CompMask; + if (accessMask == 0) + accessMask = OPERAND_4_COMPONENT_MASK_ALL; + break; + + case OPERAND_4_COMPONENT_SWIZZLE_MODE: + accessMask = 0; + for (i = 0; i < 4; i++) + accessMask |= 1 << (aui32Swizzle[i]); + break; + + case OPERAND_4_COMPONENT_SELECT_1_MODE: + accessMask = 1 << (aui32Swizzle[0]); + break; + + } + ASSERT(accessMask != 0); + return accessMask; +} + +int Operand::GetMaxComponent() const +{ + if (iWriteMaskEnabled && + iNumComponents == 4) + { + //Component Mask + if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE) + { + if (ui32CompMask != 0 && ui32CompMask != (OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z | OPERAND_4_COMPONENT_MASK_W)) + { + if (ui32CompMask & OPERAND_4_COMPONENT_MASK_W) + { + return 4; + } + if (ui32CompMask & OPERAND_4_COMPONENT_MASK_Z) + { + return 3; + } + if (ui32CompMask & OPERAND_4_COMPONENT_MASK_Y) + { + return 2; + } + if (ui32CompMask & OPERAND_4_COMPONENT_MASK_X) + { + return 1; + } + } + } + else + //Component Swizzle + if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + if (ui32Swizzle == NO_SWIZZLE) + return 4; + + uint32_t res = 0; + for (int i = 0; i < 4; i++) + { + res = std::max(aui32Swizzle[i], res); + } + return (int)res + 1; + } + else + if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + { + return 1; + } + } + + return 4; +} + +//Single component repeated +//e..g .wwww +bool Operand::IsSwizzleReplicated() const +{ + if (iWriteMaskEnabled && + iNumComponents == 4) + { + if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + if (ui32Swizzle == WWWW_SWIZZLE || + ui32Swizzle == ZZZZ_SWIZZLE || + ui32Swizzle == YYYY_SWIZZLE || + ui32Swizzle == XXXX_SWIZZLE) + { + return true; + } + } + } + return false; +} + + +// Get the number of elements returned by operand, taking additional component mask into account +uint32_t Operand::GetNumSwizzleElements(uint32_t _ui32CompMask /* = OPERAND_4_COMPONENT_MASK_ALL */) const +{ + uint32_t count = 0; + + switch (eType) + { + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED: + return 1; // TODO: does mask make any sense here? + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: + case OPERAND_TYPE_INPUT_THREAD_ID: + case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: + // Adjust component count and break to more processing + ((Operand *)this)->iNumComponents = 3; + break; + case OPERAND_TYPE_IMMEDIATE32: + case OPERAND_TYPE_IMMEDIATE64: + case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: + case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: + case OPERAND_TYPE_OUTPUT_DEPTH: + { + // Translate numComponents into bitmask + // 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15 + uint32_t compMask = (1 << iNumComponents) - 1; + + compMask &= _ui32CompMask; + // Calculate bits left in compMask + return HLSLcc::GetNumberBitsSet(compMask); + } + default: + { + break; + } + } + + if (iWriteMaskEnabled && + iNumComponents != 1) + { + //Component Mask + if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE) + { + uint32_t compMask = ui32CompMask; + if (compMask == 0) + compMask = OPERAND_4_COMPONENT_MASK_ALL; + compMask &= _ui32CompMask; + + if (compMask == OPERAND_4_COMPONENT_MASK_ALL) + return 4; + + if (compMask & OPERAND_4_COMPONENT_MASK_X) + { + count++; + } + if (compMask & OPERAND_4_COMPONENT_MASK_Y) + { + count++; + } + if (compMask & OPERAND_4_COMPONENT_MASK_Z) + { + count++; + } + if (compMask & OPERAND_4_COMPONENT_MASK_W) + { + count++; + } + } + else + //Component Swizzle + if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + uint32_t i; + for (i = 0; i < 4; ++i) + { + if ((_ui32CompMask & (1 << i)) == 0) + continue; + + count++; + } + } + else + if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + { + if (aui32Swizzle[0] == OPERAND_4_COMPONENT_X && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_X)) + { + count++; + } + else + if (aui32Swizzle[0] == OPERAND_4_COMPONENT_Y && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_Y)) + { + count++; + } + else + if (aui32Swizzle[0] == OPERAND_4_COMPONENT_Z && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_Z)) + { + count++; + } + else + if (aui32Swizzle[0] == OPERAND_4_COMPONENT_W && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_W)) + { + count++; + } + } + + //Component Select 1 + } + + if (!count) + { + // Translate numComponents into bitmask + // 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15 + uint32_t compMask = (1 << iNumComponents) - 1; + + compMask &= _ui32CompMask; + // Calculate bits left in compMask + return HLSLcc::GetNumberBitsSet(compMask); + } + + return count; +} + +// Returns 0 if the register used by the operand is per-vertex, or 1 if per-patch +int Operand::GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const +{ + if (eShaderType != HULL_SHADER && eShaderType != DOMAIN_SHADER) + return 0; + + if (eShaderType == HULL_SHADER && eShaderPhaseType == HS_CTRL_POINT_PHASE) + return 0; + + if (eShaderType == DOMAIN_SHADER && eType == OPERAND_TYPE_OUTPUT) + return 0; + + if (eType == OPERAND_TYPE_INPUT_CONTROL_POINT || eType == OPERAND_TYPE_OUTPUT_CONTROL_POINT) + return 0; + + return 1; +} + +int Operand::GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const +{ + return GetRegisterSpace(psContext->psShader->eShaderType, psContext->psShader->asPhases[psContext->currentPhase].ePhase); +} + +SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates /* = SVT_INT */) const +{ + // The min precision qualifier overrides all of the stuff below + switch (eMinPrecision) + { + case OPERAND_MIN_PRECISION_FLOAT_16: + return SVT_FLOAT16; + case OPERAND_MIN_PRECISION_FLOAT_2_8: + return SVT_FLOAT10; + case OPERAND_MIN_PRECISION_SINT_16: + return SVT_INT16; + case OPERAND_MIN_PRECISION_UINT_16: + return SVT_UINT16; + default: + break; + } + + switch (eType) + { + case OPERAND_TYPE_TEMP: + { + SHADER_VARIABLE_TYPE eCurrentType; + int i = 0; + + if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + { + return aeDataType[aui32Swizzle[0]]; + } + if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + if (ui32Swizzle == (NO_SWIZZLE)) + { + return aeDataType[0]; + } + + return aeDataType[aui32Swizzle[0]]; + } + + if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE) + { + uint32_t mask = ui32CompMask; + if (!mask) + { + mask = OPERAND_4_COMPONENT_MASK_ALL; + } + for (; i < 4; ++i) + { + if (mask & (1 << i)) + { + eCurrentType = aeDataType[i]; + break; + } + } + +#ifdef _DEBUG + //Check if all elements have the same basic type. + for (; i < 4; ++i) + { + if (mask & (1 << i)) + { + if (eCurrentType != aeDataType[i]) + { + ASSERT(0); + } + } + } +#endif + return eCurrentType; + } + + ASSERT(0); + + break; + } + case OPERAND_TYPE_OUTPUT: + { + const uint32_t ui32Register = ui32RegisterNumber; + int regSpace = GetRegisterSpace(psContext); + const ShaderInfo::InOutSignature* psOut = NULL; + + if (regSpace == 0) + psContext->psShader->sInfo.GetOutputSignatureFromRegister(ui32Register, GetAccessMask(), psContext->psShader->ui32CurrentVertexOutputStream, + &psOut); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psOut); + + ASSERT(psOut != NULL); + if (psOut->eMinPrec != MIN_PRECISION_DEFAULT) + { + switch (psOut->eMinPrec) + { + default: + ASSERT(0); + break; + case MIN_PRECISION_FLOAT_16: + return SVT_FLOAT16; + case MIN_PRECISION_FLOAT_2_8: + if (psContext->psShader->eTargetLanguage == LANG_METAL) + return SVT_FLOAT16; + else + return SVT_FLOAT10; + case MIN_PRECISION_SINT_16: + return SVT_INT16; + case MIN_PRECISION_UINT_16: + return SVT_UINT16; + } + } + if (psOut->eComponentType == INOUT_COMPONENT_UINT32) + { + return SVT_UINT; + } + else if (psOut->eComponentType == INOUT_COMPONENT_SINT32) + { + return SVT_INT; + } + return SVT_FLOAT; + break; + } + case OPERAND_TYPE_INPUT: + { + const uint32_t ui32Register = aui32ArraySizes[iIndexDims - 1]; + int regSpace = GetRegisterSpace(psContext); + const ShaderInfo::InOutSignature* psIn = NULL; + + if (regSpace == 0) + { + if (psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[ui32Register] != 0) + return SVT_FLOAT; // All combined inputs are stored as floats + psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Register, GetAccessMask(), + &psIn); + } + else + { + if (psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[ui32Register] != 0) + return SVT_FLOAT; // All combined inputs are stored as floats + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psIn); + } + + ASSERT(psIn != NULL); + + switch (eSpecialName) + { + //UINT in DX, INT in GL. + case NAME_PRIMITIVE_ID: + case NAME_VERTEX_ID: + case NAME_INSTANCE_ID: + case NAME_RENDER_TARGET_ARRAY_INDEX: + case NAME_VIEWPORT_ARRAY_INDEX: + case NAME_SAMPLE_INDEX: + + return SVT_INT; + + case NAME_IS_FRONT_FACE: + return SVT_UINT; + + case NAME_POSITION: + case NAME_CLIP_DISTANCE: + return SVT_FLOAT; + + default: + break; + // fall through + } + + if (psIn->eSystemValueType == NAME_IS_FRONT_FACE) + return SVT_UINT; + + if (eSpecialName == NAME_PRIMITIVE_ID || eSpecialName == NAME_VERTEX_ID) + { + return SVT_INT; + } + + //UINT in DX, INT in GL. + if (psIn->eSystemValueType == NAME_INSTANCE_ID || + psIn->eSystemValueType == NAME_PRIMITIVE_ID || + psIn->eSystemValueType == NAME_VERTEX_ID || + psIn->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX || + psIn->eSystemValueType == NAME_VIEWPORT_ARRAY_INDEX || + psIn->eSystemValueType == NAME_SAMPLE_INDEX + ) + { + return SVT_INT; + } + + if (psIn->eMinPrec != MIN_PRECISION_DEFAULT) + { + switch (psIn->eMinPrec) + { + default: + ASSERT(0); + break; + case MIN_PRECISION_FLOAT_16: + return SVT_FLOAT16; + case MIN_PRECISION_FLOAT_2_8: + if (psContext->psShader->eTargetLanguage == LANG_METAL) + return SVT_FLOAT16; + else + return SVT_FLOAT10; + case MIN_PRECISION_SINT_16: + return SVT_INT16; + case MIN_PRECISION_UINT_16: + return SVT_UINT16; + } + } + + if (psIn->eComponentType == INOUT_COMPONENT_UINT32) + { + return SVT_UINT; + } + else if (psIn->eComponentType == INOUT_COMPONENT_SINT32) + { + return SVT_INT; + } + return SVT_FLOAT; + break; + } + case OPERAND_TYPE_CONSTANT_BUFFER: + { + const ConstantBuffer* psCBuf = NULL; + const ShaderVarType* psVarType = NULL; + int32_t rebase = -1; + bool isArray; + int foundVar; + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, aui32ArraySizes[0], &psCBuf); + if (psCBuf) + { + foundVar = ShaderInfo::GetShaderVarFromOffset(aui32ArraySizes[1], aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags); + if (foundVar && m_SubOperands[1].get() == NULL) // TODO: why this suboperand thing? + { + return psVarType->Type; + } + } + else + { + // Todo: this isn't correct yet. + return SVT_FLOAT; + } + break; + } + case OPERAND_TYPE_IMMEDIATE32: + { + return ePreferredTypeForImmediates; + } + + case OPERAND_TYPE_IMMEDIATE64: + { + return SVT_DOUBLE; + } + + case OPERAND_TYPE_INPUT_THREAD_ID: + case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED: + { + return SVT_UINT; + } + case OPERAND_TYPE_SPECIAL_ADDRESS: + case OPERAND_TYPE_SPECIAL_LOOPCOUNTER: + case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: + case OPERAND_TYPE_INPUT_PRIMITIVEID: + { + return SVT_INT; + } + case OPERAND_TYPE_INPUT_GS_INSTANCE_ID: + { + return SVT_UINT; + } + case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + { + return SVT_INT; + } + case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: + { + return SVT_INT; + } + case OPERAND_TYPE_INDEXABLE_TEMP: // Indexable temps are always floats + case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: // So are const arrays currently + default: + { + return SVT_FLOAT; + } + } + + return SVT_FLOAT; +} + +OPERAND_MIN_PRECISION Operand::ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec) +{ + switch (ePrec) + { + default: + case REFLECT_RESOURCE_PRECISION_UNKNOWN: + case REFLECT_RESOURCE_PRECISION_LOWP: + return OPERAND_MIN_PRECISION_FLOAT_2_8; + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + return OPERAND_MIN_PRECISION_FLOAT_16; + case REFLECT_RESOURCE_PRECISION_HIGHP: + return OPERAND_MIN_PRECISION_DEFAULT; + } +} + +int Operand::GetNumInputElements(const HLSLCrossCompilerContext *psContext) const +{ + const ShaderInfo::InOutSignature *psSig = NULL; + int regSpace = GetRegisterSpace(psContext); + + switch (eType) + { + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED: + case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: + case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: + return 1; + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: + case OPERAND_TYPE_INPUT_THREAD_ID: + case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: + case OPERAND_TYPE_INPUT_DOMAIN_POINT: + return 3; + default: + break; + } + + if (regSpace == 0) + psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32RegisterNumber, GetAccessMask(), &psSig); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32RegisterNumber, GetAccessMask(), &psSig); + + ASSERT(psSig != NULL); + + // TODO: Are there ever any cases where the mask has 'holes'? + return HLSLcc::GetNumberBitsSet(psSig->ui32Mask); +} \ No newline at end of file diff --git a/src/Shader.cpp b/src/Shader.cpp new file mode 100644 index 0000000..9c63fae --- /dev/null +++ b/src/Shader.cpp @@ -0,0 +1,1018 @@ + +#include "internal_includes/Shader.h" +#include "internal_includes/debug.h" +#include +#include "internal_includes/Instruction.h" +#include "internal_includes/Declaration.h" +#include "internal_includes/HLSLccToolkit.h" + +uint32_t Shader::GetTempComponentCount(SHADER_VARIABLE_TYPE eType, uint32_t ui32Reg) const +{ + switch (eType) + { + case SVT_FLOAT: + return psFloatTempSizes[ui32Reg]; + case SVT_FLOAT16: + return psFloat16TempSizes[ui32Reg]; + case SVT_FLOAT10: + return psFloat10TempSizes[ui32Reg]; + case SVT_INT: + return psIntTempSizes[ui32Reg]; + case SVT_INT16: + return psInt16TempSizes[ui32Reg]; + case SVT_INT12: + return psInt12TempSizes[ui32Reg]; + case SVT_UINT: + return psUIntTempSizes[ui32Reg]; + case SVT_UINT16: + return psUInt16TempSizes[ui32Reg]; + case SVT_DOUBLE: + return psDoubleTempSizes[ui32Reg]; + case SVT_BOOL: + return psBoolTempSizes[ui32Reg]; + default: + ASSERT(0); + } + return 0; +} + +void Shader::ConsolidateHullTempVars() +{ + uint32_t i, phase; + uint32_t numTemps = 0; + for (phase = 0; phase < asPhases.size(); phase++) + { + for (i = 0; i < asPhases[phase].psDecl.size(); i++) + { + if (asPhases[phase].psDecl[i].eOpcode == OPCODE_DCL_TEMPS) + { + if (asPhases[phase].psDecl[i].value.ui32NumTemps > numTemps) + numTemps = asPhases[phase].psDecl[i].value.ui32NumTemps; + asPhases[phase].psDecl[i].value.ui32NumTemps = 0; + } + } + } + // Now we have the max temps, write it back to the first one we see. + for (phase = 0; phase < asPhases.size(); phase++) + { + for (i = 0; i < asPhases[phase].psDecl.size(); i++) + { + if (asPhases[phase].psDecl[i].eOpcode == OPCODE_DCL_TEMPS) + { + asPhases[phase].psDecl[i].value.ui32NumTemps = numTemps; + return; + } + } + } +} + +// HLSL has separate register spaces for UAV and structured buffers. GLSL has shared register space for all buffers. +// The aim here is to preserve the UAV buffer bindings as they are and use remaining binding points for structured buffers. +// In this step make aui32StructuredBufferBindingPoints contain increasingly ordered uints starting from zero. +void Shader::PrepareStructuredBufferBindingSlots() +{ + uint32_t i; + + for (i = 0; i < MAX_RESOURCE_BINDINGS; i++) + { + aui32StructuredBufferBindingPoints[i] = i; + } +} + +// Go through all declarations and remove UAV occupied binding points from the aui32StructuredBufferBindingPoints list +void Shader::ResolveStructuredBufferBindingSlots(ShaderPhase *psPhase) +{ + uint32_t p; + std::vector &bindingArray = aui32StructuredBufferBindingPoints; + + for (p = 0; p < psPhase->psDecl.size(); ++p) + { + if (psPhase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW || + psPhase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED) + { + uint32_t uav = psPhase->psDecl[p].asOperands[0].ui32RegisterNumber; // uav binding point + uint32_t i; + + // Find uav binding point from the list. Drop search if not found. + for (i = 0; i < MAX_RESOURCE_BINDINGS && bindingArray[i] <= uav; i++) + { + if (bindingArray[i] == uav) // Remove uav binding point from the list by copying array remainder here + { + memcpy(&bindingArray[i], &bindingArray[i + 1], (MAX_RESOURCE_BINDINGS - 1 - i)*sizeof(uint32_t)); + break; + } + } + } + } +} + +// Image (RWTexture in HLSL) declaration op does not provide enough info about the format and accessing. +// Go through all image declarations and instructions accessing it to see if it is readonly/writeonly. +// While doing that we also get the number of components expected in the image format. +// Also resolve access flags for other UAVs as well. No component count resolving for them. +void ShaderPhase::ResolveUAVProperties() +{ + Declaration *psFirstDeclaration = &psDecl[0]; + + uint32_t ui32NumDeclarations = (uint32_t)psDecl.size(); + Instruction *psFirstInstruction = &psInst[0]; + uint32_t ui32NumInstructions = (uint32_t)psInst.size(); + + if (ui32NumDeclarations == 0 || ui32NumInstructions == 0) + return; + + Declaration *psLastDeclaration = psFirstDeclaration + ui32NumDeclarations - 1; + Instruction *psLastInstruction = psFirstInstruction + ui32NumInstructions - 1; + Declaration *psDecl; + + for (psDecl = psFirstDeclaration; psDecl <= psLastDeclaration; psDecl++) + { + Instruction *psInst; + uint32_t uavReg; + if (psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED && + psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED && + psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW) + continue; + + uavReg = psDecl->asOperands[0].ui32RegisterNumber; + + for (psInst = psFirstInstruction; psInst <= psLastInstruction; psInst++) + { + uint32_t opIndex; + uint32_t accessFlags; + uint32_t numComponents; + + switch (psInst->eOpcode) + { + case OPCODE_LD_UAV_TYPED: + opIndex = 2; + accessFlags = ACCESS_FLAG_READ; + numComponents = psInst->asOperands[0].GetNumSwizzleElements(); // get component count from the write target + break; + + case OPCODE_STORE_UAV_TYPED: + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW); + opIndex = 0; + accessFlags = ACCESS_FLAG_WRITE; + numComponents = 0; // store op does not contribute on the component count resolving + break; + + case OPCODE_ATOMIC_CMP_STORE: + case OPCODE_ATOMIC_AND: + case OPCODE_ATOMIC_IADD: + case OPCODE_ATOMIC_OR: + case OPCODE_ATOMIC_XOR: + case OPCODE_ATOMIC_IMIN: + case OPCODE_ATOMIC_UMIN: + opIndex = 0; + accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE; + numComponents = 1; + break; + + case OPCODE_IMM_ATOMIC_AND: + case OPCODE_IMM_ATOMIC_IADD: + case OPCODE_IMM_ATOMIC_IMAX: + case OPCODE_IMM_ATOMIC_IMIN: + case OPCODE_IMM_ATOMIC_UMAX: + case OPCODE_IMM_ATOMIC_UMIN: + case OPCODE_IMM_ATOMIC_OR: + case OPCODE_IMM_ATOMIC_XOR: + case OPCODE_IMM_ATOMIC_EXCH: + case OPCODE_IMM_ATOMIC_CMP_EXCH: + opIndex = 1; + accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE; + numComponents = 1; + break; + + // The rest of the ops here are only for buffer UAVs. No need for component count resolving. + case OPCODE_LD_STRUCTURED: + opIndex = 3; + accessFlags = ACCESS_FLAG_READ; + numComponents = 0; + break; + + case OPCODE_STORE_STRUCTURED: + opIndex = 0; + accessFlags = ACCESS_FLAG_WRITE; + numComponents = 0; + break; + + case OPCODE_LD_RAW: + opIndex = 2; + accessFlags = ACCESS_FLAG_READ; + numComponents = 0; + break; + + case OPCODE_STORE_RAW: + opIndex = 0; + accessFlags = ACCESS_FLAG_WRITE; + numComponents = 0; + break; + + case OPCODE_IMM_ATOMIC_ALLOC: + case OPCODE_IMM_ATOMIC_CONSUME: + opIndex = 1; + accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE; + numComponents = 0; + break; + + default: + continue; + } + + // Buffer loads can also happen on non-uav. Skip those. + if(psInst->asOperands[opIndex].eType != OPERAND_TYPE_UNORDERED_ACCESS_VIEW) + continue; + + // Check the instruction is operating on the declared uav + if (psInst->asOperands[opIndex].ui32RegisterNumber != uavReg) + continue; + + psDecl->sUAV.ui32AccessFlags |= accessFlags; + + // get the max components accessed, but only for typed (texture) UAVs + if (numComponents > psDecl->sUAV.ui32NumComponents && psDecl->eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) + { + psDecl->sUAV.ui32NumComponents = numComponents; + } + } + } +} + + +static void GatherOperandAccessMasks(const Operand *psOperand, char *destTable) +{ + int i; + uint32_t reg; + for (i = 0; i < MAX_SUB_OPERANDS; i++) + { + if (psOperand->m_SubOperands[i].get()) + GatherOperandAccessMasks(psOperand->m_SubOperands[i].get(), destTable); + } + + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + reg = psOperand->ui32RegisterNumber & 0xffff; // We add 0x10000 to all newly created ones earlier + + destTable[reg] |= (char)psOperand->GetAccessMask(); +} + +// Coalesce the split temps back based on their original temp register. Keep uint/int/float operations separate +static void CoalesceTemps(Shader *psShader, ShaderPhase *psPhase, uint32_t ui32MaxOrigTemps) +{ + // Just move all operations back to their original registers, but keep the data type assignments. + uint32_t i, k; + Instruction *psLastInstruction = &psPhase->psInst[psPhase->psInst.size() - 1]; + std::vector opAccessMasks; + + // First move all newly created temps to high enough so they won't overlap with the rebased ones + + Instruction *inst = &psPhase->psInst[0]; + + if (psPhase->psInst.size() == 0 || psPhase->ui32OrigTemps == 0) + return; + + while (inst <= psLastInstruction) + { + // Update all operands and their suboperands + for (i = psPhase->ui32OrigTemps; i < psPhase->ui32TotalTemps; i++) + { + for (k = 0; k < inst->ui32NumOperands; k++) + inst->ChangeOperandTempRegister(&inst->asOperands[k], i, 0x10000 + i, OPERAND_4_COMPONENT_MASK_ALL, UD_CHANGE_ALL, 0); + } + inst++; + } + + // Prune the original registers, rebase if necessary + opAccessMasks.clear(); + opAccessMasks.resize(psPhase->ui32TotalTemps, 0); + inst = &psPhase->psInst[0]; + while (inst <= psLastInstruction) + { + for (k = 0; k < inst->ui32NumOperands; k++) + GatherOperandAccessMasks(&inst->asOperands[k], &opAccessMasks[0]); + inst++; + } + + for (i = 0; i < psPhase->ui32TotalTemps; i++) + { + uint32_t rebase, count; + uint32_t newReg = i; + uint32_t origReg = i; + int needsMoving = 0; + SHADER_VARIABLE_TYPE dataType; + + // Figure out rebase and count + rebase = 0; + count = 0; + if (i < psPhase->ui32OrigTemps) + { + // One of the original registers + k = opAccessMasks[i]; + if (k == 0) + continue; + + while ((k & 1) == 0) + { + rebase++; + k = k >> 1; + } + while (k != 0) + { + count++; + k = k >> 1; + } + newReg = i + ui32MaxOrigTemps * rebase; + if (rebase != 0) + needsMoving = 1; + } + else + { + // Newly created split registers, read info from table + // Read the count and rebase from split info table + count = (psPhase->pui32SplitInfo[i] >> 24) & 0xff; + rebase = (psPhase->pui32SplitInfo[i] >> 16) & 0xff; + origReg = 0x10000 + i; + newReg = (psPhase->pui32SplitInfo[i]) & 0xffff; + while (psPhase->pui32SplitInfo[newReg] != 0xffffffff) + newReg = (psPhase->pui32SplitInfo[newReg]) & 0xffff; + + // If count is 4, verify that we have both first and last bit set + ASSERT(count != 4 || (opAccessMasks[i] & 9) == 9); + + newReg = newReg + ui32MaxOrigTemps * rebase; + + // Don't rebase again + rebase = 0; + needsMoving = 1; + + } + + if (needsMoving) + { + // printf("Moving reg %d to %d, count %d rebase %d\n", origReg, newReg, count, rebase); + + // Move directly to correct location + inst = &psPhase->psInst[0]; + while (inst <= psLastInstruction) + { + for (k = 0; k < inst->ui32NumOperands; k++) + inst->ChangeOperandTempRegister(&inst->asOperands[k], origReg, newReg, OPERAND_4_COMPONENT_MASK_ALL, UD_CHANGE_ALL, rebase); + inst++; + } + } + // Mark the count + dataType = psPhase->peTempTypes[i * 4 + rebase]; + switch (dataType) + { + default: + ASSERT(0); + break; + case SVT_BOOL: + psShader->psBoolTempSizes[newReg] = std::max(psShader->psBoolTempSizes[newReg], (char)count); + break; + case SVT_FLOAT: + psShader->psFloatTempSizes[newReg] = std::max(psShader->psFloatTempSizes[newReg], (char)count); + break; + case SVT_FLOAT16: + psShader->psFloat16TempSizes[newReg] = std::max(psShader->psFloat16TempSizes[newReg], (char)count); + break; + case SVT_FLOAT10: + psShader->psFloat10TempSizes[newReg] = std::max(psShader->psFloat10TempSizes[newReg], (char)count); + break; + case SVT_INT: + psShader->psIntTempSizes[newReg] = std::max(psShader->psIntTempSizes[newReg], (char)count); + break; + case SVT_INT16: + psShader->psInt16TempSizes[newReg] = std::max(psShader->psInt16TempSizes[newReg], (char)count); + break; + case SVT_INT12: + psShader->psInt12TempSizes[newReg] = std::max(psShader->psInt12TempSizes[newReg], (char)count); + break; + case SVT_UINT: + psShader->psUIntTempSizes[newReg] = std::max(psShader->psUIntTempSizes[newReg], (char)count); + break; + case SVT_UINT16: + psShader->psUInt16TempSizes[newReg] = std::max(psShader->psUInt16TempSizes[newReg], (char)count); + break; + case SVT_DOUBLE: + psShader->psDoubleTempSizes[newReg] = std::max(psShader->psDoubleTempSizes[newReg], (char)count); + break; + } + } + +} + +// Mark whether the temp registers are used per each data type. +void Shader::PruneTempRegisters() +{ + uint32_t k; + uint32_t maxOrigTemps = 0; + uint32_t maxTotalTemps = 0; + // First find the total amount of temps + for (k = 0; k < asPhases.size(); k++) + { + ShaderPhase *psPhase = &asPhases[k]; + maxOrigTemps = std::max(maxOrigTemps, psPhase->ui32OrigTemps); + maxTotalTemps = std::max(maxTotalTemps, psPhase->ui32TotalTemps); + } + + if (maxTotalTemps == 0) + return; // splitarrays are nulls, no need to free + + // Allocate and zero-initialize arrays for each temp sizes. *4 is for every possible rebase + psIntTempSizes.clear(); + psIntTempSizes.resize(maxOrigTemps * 4, 0); + psInt12TempSizes.clear(); + psInt12TempSizes.resize(maxOrigTemps * 4, 0); + psInt16TempSizes.clear(); + psInt16TempSizes.resize(maxOrigTemps * 4, 0); + psUIntTempSizes.clear(); + psUIntTempSizes.resize(maxOrigTemps * 4, 0); + psUInt16TempSizes.clear(); + psUInt16TempSizes.resize(maxOrigTemps * 4, 0); + psFloatTempSizes.clear(); + psFloatTempSizes.resize(maxOrigTemps * 4, 0); + psFloat16TempSizes.clear(); + psFloat16TempSizes.resize(maxOrigTemps * 4, 0); + psFloat10TempSizes.clear(); + psFloat10TempSizes.resize(maxOrigTemps * 4, 0); + psDoubleTempSizes.clear(); + psDoubleTempSizes.resize(maxOrigTemps * 4, 0); + psBoolTempSizes.clear(); + psBoolTempSizes.resize(maxOrigTemps * 4, 0); + + for (k = 0; k < asPhases.size(); k++) + { + ShaderPhase *psPhase = &asPhases[k]; + CoalesceTemps(this, psPhase, maxOrigTemps); + if (psPhase->psTempDeclaration) + psPhase->psTempDeclaration->value.ui32NumTemps = maxOrigTemps * 4; + } + +} + +static void DoSignatureAnalysis(std::vector &psSignatures, std::vector &outTable) +{ + // Fill the char, 2 bits per component so that each 2 bits encode the following info: + // 0: unused OR used by the first signature we happened to see + // 1: used by the second signature + // 2: used by the third sig + // 3: used by the fourth sig. + + // The counters for each input/output/patch. Start with 8 registers, grow as needed + std::vector counters(8, (unsigned char)0); + outTable.clear(); + outTable.resize(8, (unsigned char)0); + + size_t i; + for (i = 0; i < psSignatures.size(); i++) + { + ShaderInfo::InOutSignature *psSig = &psSignatures[i]; + char currCounter; + char mask; + ASSERT(psSig != NULL); + + // We'll skip SV_Depth and others that put -1 to the register. + if (psSig->ui32Register == 0xffffffffu) + continue; + + // Make sure there's enough room in the table + if (psSig->ui32Register >= counters.size()) + { + counters.resize(psSig->ui32Register * 2, 0); + outTable.resize(psSig->ui32Register * 2, 0); + } + + // Apply counter value to masked items + currCounter = counters[psSig->ui32Register]; + // Duplicate counter bits + currCounter = currCounter | (currCounter << 2) | (currCounter << 4) | (currCounter << 6); + // Widen the mask + mask = (unsigned char)psSig->ui32Mask; + mask = ((mask & 8) << 3) | ((mask & 4) << 2) | ((mask & 2) << 1) | (mask & 1); + mask = mask | (mask << 1); + // Write output + outTable[psSig->ui32Register] |= (currCounter & mask); + // Update counter + counters[psSig->ui32Register]++; + } +} + +void Shader::DoIOOverlapOperand(ShaderPhase *psPhase, Operand *psOperand) +{ + uint32_t i; + uint32_t regSpace = psOperand->GetRegisterSpace(eShaderType, psPhase->ePhase); + unsigned char *redirectTable = NULL; + unsigned char redir = 0; + unsigned char firstFound = 0; + uint32_t mask; + + for (i = 0; i < MAX_SUB_OPERANDS; i++) + if (psOperand->m_SubOperands[i].get()) + DoIOOverlapOperand(psPhase, psOperand->m_SubOperands[i].get()); + + + switch (psOperand->eType) + { + case OPERAND_TYPE_INPUT: + case OPERAND_TYPE_INPUT_CONTROL_POINT: + case OPERAND_TYPE_INPUT_PATCH_CONSTANT: + redirectTable = regSpace == 0 ? &psPhase->acInputNeedsRedirect[0] : &psPhase->acPatchConstantsNeedsRedirect[0]; + break; + + case OPERAND_TYPE_OUTPUT: + case OPERAND_TYPE_OUTPUT_CONTROL_POINT: + redirectTable = regSpace == 0 ? &psPhase->acOutputNeedsRedirect[0] : &psPhase->acPatchConstantsNeedsRedirect[0]; + break; + + default: + // Not a input or output, nothing to do here + return; + } + + redir = redirectTable[psOperand->ui32RegisterNumber]; + + if (redir == 0xff) // Already found overlap? + return; + + mask = psOperand->GetAccessMask(); + i = 0; + // Find the first mask bit set. + while ((mask & (1 << i)) == 0) + i++; + + firstFound = (redir >> (i * 2)) & 3; + for (; i < 4; i++) + { + unsigned char sig; + if ((mask & (1 << i)) == 0) + continue; + + sig = (redir >> (i * 2)) & 3; + // All set bits must access the same signature + if (sig != firstFound) + { + redirectTable[psOperand->ui32RegisterNumber] = 0xff; + return; + } + } + +} + +static void PruneRedirectEntry(unsigned char &itr) +{ + if (itr != 0xff) + itr = 0; +} + +// Check if inputs and outputs are accessed across semantic boundaries +// as in, 2x texcoord vec2's are packed together as vec4 but still accessed together. +void Shader::AnalyzeIOOverlap() +{ + uint32_t i, k; + std::vector outData; + DoSignatureAnalysis(sInfo.psInputSignatures, outData); + + // Now data has the values, copy them to all phases + for (i = 0; i < asPhases.size(); i++) + asPhases[i].acInputNeedsRedirect = outData; + + DoSignatureAnalysis(sInfo.psOutputSignatures, outData); + for (i = 0; i < asPhases.size(); i++) + asPhases[i].acOutputNeedsRedirect = outData; + + DoSignatureAnalysis(sInfo.psPatchConstantSignatures, outData); + for (i = 0; i < asPhases.size(); i++) + asPhases[i].acPatchConstantsNeedsRedirect = outData; + + // Now walk through all operands and suboperands in all instructions and write 0xff to the dest (cannot occur otherwise) + // if we're crossing signature borders + for (i = 0; i < asPhases.size(); i++) + { + ShaderPhase *psPhase = &asPhases[i]; + for (k = 0; k < psPhase->psInst.size(); k++) + { + Instruction *psInst = &psPhase->psInst[k]; + uint32_t j; + for (j = 0; j < psInst->ui32NumOperands; j++) + DoIOOverlapOperand(psPhase, &psInst->asOperands[j]); + } + + // Now prune all tables from anything except 0xff. + std::for_each(psPhase->acInputNeedsRedirect.begin(), psPhase->acInputNeedsRedirect.end(), PruneRedirectEntry); + std::for_each(psPhase->acOutputNeedsRedirect.begin(), psPhase->acOutputNeedsRedirect.end(), PruneRedirectEntry); + std::for_each(psPhase->acPatchConstantsNeedsRedirect.begin(), psPhase->acPatchConstantsNeedsRedirect.end(), PruneRedirectEntry); + } +} + + +// In DX bytecode, all const arrays are vec4's, and all arrays are stuffed to one large array. +// Luckily, each chunk is always accessed with suboperand plus (in ui32RegisterNumber) +// So do an analysis pass. Also trim the vec4's into smaller formats if the extra components are never read. +void ShaderPhase::PruneConstArrays() +{ + using namespace std; + auto customDataItr = find_if(psDecl.begin(), psDecl.end(), [](const Declaration &d) { return d.eOpcode == OPCODE_CUSTOMDATA; }); + // Not found? We're done. + if (customDataItr == psDecl.end()) + return; + + // Store the original declaration + m_ConstantArrayInfo.m_OrigDeclaration = &(*customDataItr); + + // Loop through each operand and pick up usage masks + HLSLcc::ForEachOperand(psInst.begin(), psInst.end(), FEO_FLAG_ALL, [this](const std::vector::iterator &psInst, const Operand *psOperand, uint32_t ui32OperandType) + { + using namespace std; + if (psOperand->eType == OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER) + { + uint32_t accessMask = psOperand->GetAccessMask(); + uint32_t offset = psOperand->ui32RegisterNumber; + + // Update the chunk access mask + + // Find all existing entries that have anything common with the access mask + auto cbrange = m_ConstantArrayInfo.m_Chunks.equal_range(offset); + vector matchingEntries; + for (auto itr = cbrange.first; itr != cbrange.second; itr++) + { + if ((itr->second.m_AccessMask & accessMask) != 0) + { + matchingEntries.push_back(itr); + } + }; + + if (matchingEntries.empty()) + { + // Not found, create new entry + m_ConstantArrayInfo.m_Chunks.insert(make_pair(offset, ConstantArrayChunk(0u, accessMask, (Operand *)psOperand))); + } + else if(matchingEntries.size() == 1) + { + // Update access mask of the one existing entry + matchingEntries[0]->second.m_AccessMask |= accessMask; + matchingEntries[0]->second.m_UseSites.push_back((Operand *)psOperand); + } + else + { + // Multiple entries with (now) overlapping mask. Merge to the first one. + ChunkMap::iterator tgt = matchingEntries[0]; + tgt->second.m_AccessMask |= accessMask; + tgt->second.m_UseSites.push_back((Operand *)psOperand); + ChunkMap &chunks = m_ConstantArrayInfo.m_Chunks; + for_each(matchingEntries.begin() + 1, matchingEntries.end(), [&tgt, &chunks](ChunkMap::iterator itr) + { + tgt->second.m_AccessMask |= itr->second.m_AccessMask; + chunks.erase(itr); + }); + } + } + }); + + // Figure out how large each chunk is by finding the next chunk that uses any bits from the current mask (or the max size if not found) + + uint32_t totalSize = (uint32_t)m_ConstantArrayInfo.m_OrigDeclaration->asImmediateConstBuffer.size(); + for (auto chunk = m_ConstantArrayInfo.m_Chunks.begin(); chunk != m_ConstantArrayInfo.m_Chunks.end(); chunk++) + { + // Find the next chunk that shares any bits in the access mask + auto nextItr = find_if(m_ConstantArrayInfo.m_Chunks.lower_bound(chunk->first + 1), m_ConstantArrayInfo.m_Chunks.end(), [&chunk](ChunkMap::value_type &itr) + { + return (chunk->second.m_AccessMask & itr.second.m_AccessMask) != 0; + }); + + // Not found? Must continue until the end of array + if (nextItr == m_ConstantArrayInfo.m_Chunks.end()) + chunk->second.m_Size = totalSize - chunk->first; + else + { + // Otherwise we know the chunk size directly. + chunk->second.m_Size = nextItr->first - chunk->first; + } + + // Do rebase on the operands if necessary + chunk->second.m_Rebase = 0; + uint32_t t = chunk->second.m_AccessMask; + ASSERT(t != 0); + while ((t & 1) == 0) + { + chunk->second.m_Rebase++; + t >>= 1; + } + uint32_t rebase = chunk->second.m_Rebase; + uint32_t componentCount = 0; + while (t != 0) + { + componentCount++; + t >>= 1; + } + chunk->second.m_ComponentCount = componentCount; + + for_each(chunk->second.m_UseSites.begin(), chunk->second.m_UseSites.end(), [&rebase, &componentCount](Operand *op) + { + // Store the rebase value to each operand and do the actual rebase. + op->m_Rebase = rebase; + op->m_Size = componentCount; + + if (rebase != 0) + { + // Update component mask. Note that we don't need to do anything to the suboperands. They do not affect destination writemask. + switch (op->eSelMode) + { + case OPERAND_4_COMPONENT_MASK_MODE: + { + uint32_t oldMask = op->ui32CompMask; + if (oldMask == 0) + oldMask = OPERAND_4_COMPONENT_MASK_ALL; + + // Check that we're not losing any information + ASSERT((oldMask >> rebase) << rebase == oldMask); + op->ui32CompMask = (oldMask >> rebase); + break; + } + case OPERAND_4_COMPONENT_SELECT_1_MODE: + ASSERT(op->aui32Swizzle[0] >= rebase); + op->aui32Swizzle[0] -= rebase; + break; + case OPERAND_4_COMPONENT_SWIZZLE_MODE: + { + for (int i = 0; i < 4; i++) + { + // Note that this rebase is different from the one done for source operands + ASSERT(op->aui32Swizzle[i] >= rebase); + op->aui32Swizzle[i] -= rebase; + } + break; + } + default: + ASSERT(0); + + } + } + }); + } + + + // We'll do the actual declaration and pruning later on, now that we have the info stored up. + +} + +HLSLcc::ControlFlow::ControlFlowGraph &ShaderPhase::GetCFG() +{ + if (!m_CFGInitialized) + { + m_CFG.Build(&psInst[0]); + m_CFGInitialized = true; + } + + return m_CFG; +} + +void ShaderPhase::UnvectorizeImmMoves() +{ + // NOTE must be called before datatype analysis and other analysis phases are done, as the pointers won't match anymore + // (we insert new instructions there) + using namespace std; + vector nInst; + // Reserve 1.5x space + nInst.reserve(psInst.size() * 3 / 2); + + for_each(psInst.begin(), psInst.end(), [&](Instruction &i) + { + if (i.eOpcode != OPCODE_MOV || i.asOperands[0].eType != OPERAND_TYPE_TEMP || i.asOperands[1].eType != OPERAND_TYPE_IMMEDIATE32 || i.asOperands[0].GetNumSwizzleElements() == 1) + { + nInst.push_back(i); + return; + } + // Ok, found one to unvectorize. + ASSERT(i.asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); + uint32_t mask = i.asOperands[0].ui32CompMask; + for (uint32_t j = 0; j < 4; j++) + { + if ((mask & (1 << j)) == 0) + continue; + + Instruction ni = i; + ni.asOperands[0].ui32CompMask = (1 << j); + nInst.push_back(ni); + } + }); + psInst.clear(); + psInst.swap(nInst); +} + +void ShaderPhase::ExpandSWAPCs() +{ + // First find the DCL_TEMPS declaration + auto dcitr = std::find_if(psDecl.begin(), psDecl.end(), [](const Declaration &decl) -> bool { return decl.eOpcode == OPCODE_DCL_TEMPS; }); + if (dcitr == psDecl.end()) + { + // No temp declaration? Probably we won't have SWAPC either, then. + return; + } + Declaration &tmpDecl = *dcitr; + + uint32_t extraTemp = 0; + bool extraTempAllocated = false; + + // Parse through instructions, open up SWAPCs if necessary + while (1) + { + // Need to find from top every time, because we're inserting stuff into the vector + auto swapItr = std::find_if(psInst.begin(), psInst.end(), [](const Instruction &inst) -> bool { return inst.eOpcode == OPCODE_SWAPC; }); + if (swapItr == psInst.end()) + break; + + // Ok swapItr now points to a SWAPC instruction that we'll have to split up like this (from MSDN): + +/* swapc dest0[.mask], + dest1[.mask], + src0[.swizzle], + src1[.swizzle], + src2[.swizzle] + + expands to : + + movc temp[dest0s mask], + src0[.swizzle], + src2[.swizzle], src1[.swizzle] + + movc dest1[.mask], + src0[.swizzle], + src1[.swizzle], src2[.swizzle] + + mov dest0.mask, temp +*/ + // Allocate a new temp, if not already done + if (!extraTempAllocated) + { + extraTemp = tmpDecl.value.ui32NumTemps++; + extraTempAllocated = true; + } + + Instruction origSwapInst; +#if _DEBUG + origSwapInst.id = swapItr->id; +#endif + std::swap(*swapItr, origSwapInst); // Store the original swapc for reading + + // OP 1: MOVC temp[dest0 mask], src0, src2, stc1 + swapItr->eOpcode = OPCODE_MOVC; + swapItr->ui32NumOperands = 4; + swapItr->ui32FirstSrc = 1; + swapItr->asOperands[0].eType = OPERAND_TYPE_TEMP; + swapItr->asOperands[0].ui32RegisterNumber = extraTemp; + // mask is already fine + swapItr->asOperands[1] = origSwapInst.asOperands[2]; // src0 + swapItr->asOperands[2] = origSwapInst.asOperands[4]; // src2 + swapItr->asOperands[3] = origSwapInst.asOperands[3]; // src1 + // swapItr is already in the psInst vector. + + Instruction newInst[2] = { Instruction(), Instruction() }; + // OP 2: MOVC dest1, src0, src1, src2 + newInst[0].eOpcode = OPCODE_MOVC; + newInst[0].ui32NumOperands = 4; + newInst[0].ui32FirstSrc = 1; + newInst[0].asOperands[0] = origSwapInst.asOperands[1]; // dest1 + newInst[0].asOperands[1] = origSwapInst.asOperands[2]; // src0 + newInst[0].asOperands[2] = origSwapInst.asOperands[3]; // src1 + newInst[0].asOperands[3] = origSwapInst.asOperands[4]; // src2 +#if _DEBUG + newInst[0].id = swapItr->id; +#endif + + // OP 3: mov dest0.mask, temp + newInst[1].eOpcode = OPCODE_MOV; + newInst[1].ui32NumOperands = 2; + newInst[1].ui32FirstSrc = 1; + newInst[1].asOperands[0] = origSwapInst.asOperands[0]; // dest 0 + // First copy dest0 to src as well to get the mask set up correctly + newInst[1].asOperands[1] = origSwapInst.asOperands[0]; // dest 0; + // Then overwrite with temp reg + newInst[1].asOperands[1].eType = OPERAND_TYPE_TEMP; + newInst[1].asOperands[1].ui32RegisterNumber = extraTemp; +#if _DEBUG + newInst[1].id = swapItr->id; +#endif + + // Insert the new instructions to the vector + psInst.insert(swapItr + 1, newInst, newInst + 2); + } +} + +void Shader::ExpandSWAPCs() +{ + // Just call ExpandSWAPCs for each phase + for (int i = 0; i < asPhases.size(); i++) + { + asPhases[i].ExpandSWAPCs(); + } +} + +void Shader::ForcePositionToHighp() +{ + // Only sensible in vertex shaders (TODO: is this an issue in tessellation shaders? Do we even care?) + if (eShaderType != VERTEX_SHADER) + return; + + ShaderPhase &phase = asPhases[0]; + + // Find the output declaration + std::vector::iterator itr = std::find_if(phase.psDecl.begin(), phase.psDecl.end(), [this](const Declaration &decl) -> bool + { + if (decl.eOpcode == OPCODE_DCL_OUTPUT_SIV) + { + if (decl.asOperands[0].eSpecialName == NAME_POSITION) + return true; + if (decl.asOperands[0].eSpecialName != NAME_UNDEFINED) + return false; + + // This might be SV_Position (because d3dcompiler is weird). Get signature and check + const ShaderInfo::InOutSignature *sig = NULL; + sInfo.GetOutputSignatureFromRegister(decl.asOperands[0].ui32RegisterNumber, decl.asOperands[0].GetAccessMask(), 0, &sig); + ASSERT(sig != NULL); + if ((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0) + { + ((ShaderInfo::InOutSignature *)sig)->eMinPrec = MIN_PRECISION_DEFAULT; + return true; + } + return false; + } + else if (decl.eOpcode == OPCODE_DCL_OUTPUT) + { + const ShaderInfo::InOutSignature *sig = NULL; + sInfo.GetOutputSignatureFromRegister(decl.asOperands[0].ui32RegisterNumber, decl.asOperands[0].GetAccessMask(), 0, &sig); + ASSERT(sig != NULL); + if ((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0) + { + ((ShaderInfo::InOutSignature *)sig)->eMinPrec = MIN_PRECISION_DEFAULT; + return true; + } + return false; + } + return false; + + }); + + // Do nothing if we don't find suitable output. This may well be INTERNALTESSPOS for tessellation etc. + if (itr == phase.psDecl.end()) + return; + + uint32_t outputPosReg = itr->asOperands[0].ui32RegisterNumber; + + HLSLcc::ForEachOperand(phase.psInst.begin(), phase.psInst.end(), FEO_FLAG_DEST_OPERAND, [outputPosReg](std::vector::iterator itr, Operand *op, uint32_t flags) + { + if (op->eType == OPERAND_TYPE_OUTPUT && op->ui32RegisterNumber == outputPosReg) + op->eMinPrecision = OPERAND_MIN_PRECISION_DEFAULT; + }); + +} + +void Shader::FindUnusedGlobals(uint32_t flags) +{ + for(int i = 0; i < asPhases.size(); i++) + { + ShaderPhase &phase = asPhases[i]; + + // Loop through every operand and pick up usages + HLSLcc::ForEachOperand(phase.psInst.begin(), phase.psInst.end(), FEO_FLAG_SRC_OPERAND|FEO_FLAG_SUBOPERAND, [&](std::vector::iterator inst, Operand *op, uint32_t flags) + { + // Not a constant buffer read? continue + if(op->eType != OPERAND_TYPE_CONSTANT_BUFFER) + return; + + const uint32_t ui32BindingPoint = op->aui32ArraySizes[0]; + const ConstantBuffer *psCBuf = NULL; + sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, ui32BindingPoint, &psCBuf); + + if(!psCBuf) + return; + + // Get all the struct members that can be reached from this usage: + uint32_t mask = op->GetAccessMask(); + for(uint32_t k = 0; k < 4; k++) + { + if((mask & (1 << k)) == 0) + continue; + + uint32_t tmpSwizzle[4] = {k, k, k, k}; + int rebase; + bool isArray; + + ShaderVarType *psVarType = NULL; + + ShaderInfo::GetShaderVarFromOffset(op->aui32ArraySizes[1], tmpSwizzle, psCBuf, (const ShaderVarType**)&psVarType, &isArray, NULL, &rebase, flags); + + // Mark as used. Also all parents. + while(psVarType) + { + psVarType->m_IsUsed = true; + psVarType = psVarType->Parent; + } + } + }); + + } +} + diff --git a/src/ShaderInfo.cpp b/src/ShaderInfo.cpp new file mode 100644 index 0000000..0671079 --- /dev/null +++ b/src/ShaderInfo.cpp @@ -0,0 +1,387 @@ + +#include "ShaderInfo.h" +#include "internal_includes/debug.h" +#include "internal_includes/tokens.h" +#include "Operand.h" +#include +#include + +SHADER_VARIABLE_TYPE ShaderInfo::GetTextureDataType(uint32_t regNo) +{ + const ResourceBinding* psBinding = 0; + int found; + found = GetResourceFromBindingPoint(RGROUP_TEXTURE, regNo, &psBinding); + ASSERT(found != 0); + return psBinding->GetDataType(); +} + +void ShaderInfo::GetConstantBufferFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ConstantBuffer** ppsConstBuf) const +{ + ASSERT(ui32MajorVersion > 3); + *ppsConstBuf = &psConstantBuffers[aui32ResourceMap[eGroup][ui32BindPoint]]; +} + +int ShaderInfo::GetResourceFromBindingPoint(const ResourceGroup eGroup, uint32_t const ui32BindPoint, const ResourceBinding** ppsOutBinding) const +{ + size_t i; + const size_t ui32NumBindings = psResourceBindings.size(); + const ResourceBinding* psBindings = &psResourceBindings[0]; + + for (i = 0; i < ui32NumBindings; ++i) + { + if (ResourceTypeToResourceGroup(psBindings[i].eType) == eGroup) + { + if (ui32BindPoint >= psBindings[i].ui32BindPoint && ui32BindPoint < (psBindings[i].ui32BindPoint + psBindings[i].ui32BindCount)) + { + *ppsOutBinding = psBindings + i; + return 1; + } + } + } + return 0; +} + +int ShaderInfo::GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderVar** ppsShaderVar) const +{ + size_t i; + const size_t ui32NumVars = psThisPointerConstBuffer->asVars.size(); + + for (i = 0; i < ui32NumVars; ++i) + { + if (ui32Offset >= psThisPointerConstBuffer->asVars[i].ui32StartOffset && + ui32Offset < (psThisPointerConstBuffer->asVars[i].ui32StartOffset + psThisPointerConstBuffer->asVars[i].ui32Size)) + { + *ppsShaderVar = &psThisPointerConstBuffer->asVars[i]; + return 1; + } + } + return 0; +} + +int ShaderInfo::GetInputSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull /* == false */) const +{ + size_t i; + const size_t ui32NumVars = psInputSignatures.size(); + + for (i = 0; i < ui32NumVars; ++i) + { + if ((ui32Register == psInputSignatures[i].ui32Register) && (((~psInputSignatures[i].ui32Mask) & ui32Mask) == 0)) + { + *ppsOut = &psInputSignatures[i]; + return 1; + } + } + ASSERT(allowNull); + return 0; +} + +int ShaderInfo::GetPatchConstantSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull /* == false */) const +{ + size_t i; + const size_t ui32NumVars = psPatchConstantSignatures.size(); + + for (i = 0; i < ui32NumVars; ++i) + { + if ((ui32Register == psPatchConstantSignatures[i].ui32Register) && (((~psPatchConstantSignatures[i].ui32Mask) & ui32Mask) == 0)) + { + *ppsOut = &psPatchConstantSignatures[i]; + return 1; + } + } + + if (allowNull) + return 0; + + // There are situations (especially when using dcl_indexrange) where the compiler happily writes outside the actual masks. + // In those situations just take the last signature that uses that register (it's typically the "highest" one) + for (i = ui32NumVars - 1; i != 0xffffffff; i--) + { + if (ui32Register == psPatchConstantSignatures[i].ui32Register) + { + *ppsOut = &psPatchConstantSignatures[i]; + return 1; + } + } + + + ASSERT(0); + return 0; +} + +int ShaderInfo::GetOutputSignatureFromRegister(const uint32_t ui32Register, + const uint32_t ui32CompMask, + const uint32_t ui32Stream, + const InOutSignature** ppsOut, + bool allowNull /* = false */) const +{ + size_t i; + const size_t ui32NumVars = psOutputSignatures.size(); + ASSERT(ui32CompMask != 0); + + for (i = 0; i < ui32NumVars; ++i) + { + if (ui32Register == psOutputSignatures[i].ui32Register && + (ui32CompMask & psOutputSignatures[i].ui32Mask) && + ui32Stream == psOutputSignatures[i].ui32Stream) + { + *ppsOut = &psOutputSignatures[i]; + return 1; + } + } + ASSERT(allowNull); + return 0; +} + +int ShaderInfo::GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType, uint32_t ui32SemanticIndex, const InOutSignature** ppsOut) const +{ + size_t i; + const size_t ui32NumVars = psOutputSignatures.size(); + + for (i = 0; i < ui32NumVars; ++i) + { + if (eSystemValueType == psOutputSignatures[i].eSystemValueType && + ui32SemanticIndex == psOutputSignatures[i].ui32SemanticIndex) + { + *ppsOut = &psOutputSignatures[i]; + return 1; + } + } + ASSERT(0); + return 0; +} + +static uint32_t GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors) +{ + // Struct size is calculated from the offset and size of its last member + if (psType->Class == SVC_STRUCT) + { + return psType->Members.back().Offset + GetCBVarSize(&psType->Members.back(), matrixAsVectors); + } + + // Matrices represented as vec4 arrays have special size calculation + if (matrixAsVectors) + { + if (psType->Class == SVC_MATRIX_ROWS) + { + return psType->Rows * 16; + } + else if (psType->Class == SVC_MATRIX_COLUMNS) + { + return psType->Columns * 16; + } + } + + // Regular matrices, vectors and scalars + return psType->Columns * psType->Rows * 4; +} + +static const ShaderVarType* IsOffsetInType(const ShaderVarType* psType, + uint32_t parentOffset, + uint32_t offsetToFind, + bool* isArray, + std::vector* arrayIndices, + int32_t* pi32Rebase, + uint32_t flags) +{ + uint32_t thisOffset = parentOffset + psType->Offset; + uint32_t thisSize = GetCBVarSize(psType, (flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0); + uint32_t paddedSize = thisSize; + if (thisSize % 16 > 0) + paddedSize += (16 - (thisSize % 16)); + uint32_t arraySize = thisSize; + + // Array elements are padded to align on vec4 size, except for the last one + if (psType->Elements) + arraySize = (paddedSize * (psType->Elements - 1)) + thisSize; + + if ((offsetToFind >= thisOffset) && + offsetToFind < (thisOffset + arraySize)) + { + *isArray = false; + if (psType->Class == SVC_STRUCT) + { + if (psType->Elements > 1 && arrayIndices != NULL) + arrayIndices->push_back((offsetToFind - thisOffset) / thisSize); + + // Need to bring offset back to element zero in case of array of structs + uint32_t offsetInStruct = (offsetToFind - thisOffset) % paddedSize; + uint32_t m = 0; + + for (m = 0; m < psType->MemberCount; ++m) + { + const ShaderVarType* psMember = &psType->Members[m]; + + const ShaderVarType* foundType = IsOffsetInType(psMember, thisOffset, thisOffset + offsetInStruct, isArray, arrayIndices, pi32Rebase, flags); + if (foundType != NULL) + return foundType; + } + } + // Check for array of scalars or vectors (both take up 16 bytes per element). + // Matrices are also treated as arrays of vectors. + else if ((psType->Class == SVC_MATRIX_ROWS || psType->Class == SVC_MATRIX_COLUMNS) || + ((psType->Class == SVC_SCALAR || psType->Class == SVC_VECTOR) && psType->Elements > 1)) + { + *isArray = true; + if (arrayIndices != NULL) + arrayIndices->push_back((offsetToFind - thisOffset) / 16); + } + else if (psType->Class == SVC_VECTOR) + { + //Check for vector starting at a non-vec4 offset. + + // cbuffer $Globals + // { + // + // float angle; // Offset: 0 Size: 4 + // float2 angle2; // Offset: 4 Size: 8 + // + // } + + //cb0[0].x = angle + //cb0[0].yzyy = angle2.xyxx + + //Rebase angle2 so that .y maps to .x, .z maps to .y + + pi32Rebase[0] = thisOffset % 16; + } + + return psType; + } + return NULL; +} + +int ShaderInfo::GetShaderVarFromOffset(const uint32_t ui32Vec4Offset, + const uint32_t(&pui32Swizzle)[4], + const ConstantBuffer* psCBuf, + const ShaderVarType** ppsShaderVar, // Output the found var + bool* isArray, // Output bool that tells if the found var is an array + std::vector* arrayIndices, // Output vector of array indices in order from root parent to the found var + int32_t* pi32Rebase, // Output swizzle rebase + uint32_t flags) +{ + size_t i; + + uint32_t ui32ByteOffset = ui32Vec4Offset * 16; + + //Swizzle can point to another variable. In the example below + //cbUIUpdates.g_uMaxFaces would be cb1[2].z. The scalars are combined + //into vectors. psCBuf->ui32NumVars will be 3. + + // cbuffer cbUIUpdates + // { + // float g_fLifeSpan; // Offset: 0 Size: 4 + // float g_fLifeSpanVar; // Offset: 4 Size: 4 [unused] + // float g_fRadiusMin; // Offset: 8 Size: 4 [unused] + // float g_fRadiusMax; // Offset: 12 Size: 4 [unused] + // float g_fGrowTime; // Offset: 16 Size: 4 [unused] + // float g_fStepSize; // Offset: 20 Size: 4 + // float g_fTurnRate; // Offset: 24 Size: 4 + // float g_fTurnSpeed; // Offset: 28 Size: 4 [unused] + // float g_fLeafRate; // Offset: 32 Size: 4 + // float g_fShrinkTime; // Offset: 36 Size: 4 [unused] + // uint g_uMaxFaces; // Offset: 40 Size: 4 + // } + if (pui32Swizzle[0] == OPERAND_4_COMPONENT_Y) + { + ui32ByteOffset += 4; + } + else if (pui32Swizzle[0] == OPERAND_4_COMPONENT_Z) + { + ui32ByteOffset += 8; + } + else if (pui32Swizzle[0] == OPERAND_4_COMPONENT_W) + { + ui32ByteOffset += 12; + } + + const size_t ui32NumVars = psCBuf->asVars.size(); + + for (i = 0; i < ui32NumVars; ++i) + { + ppsShaderVar[0] = IsOffsetInType(&psCBuf->asVars[i].sType, psCBuf->asVars[i].ui32StartOffset, ui32ByteOffset, isArray, arrayIndices, pi32Rebase, flags); + + if (ppsShaderVar[0] != NULL) + return 1; + } + return 0; +} + +// Patches the fullName of the var with given array indices. Does not insert the indexing for the var itself if it is an array. +// Searches for brackets and inserts indices one by one. +std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector &indices) +{ + std::ostringstream oss; + size_t prevpos = 0; + size_t pos = psShaderVar->fullName.find('[', 0); + uint32_t i = 0; + while (pos != std::string::npos) + { + pos++; + oss << psShaderVar->fullName.substr(prevpos, pos - prevpos); + if (i < indices.size()) + oss << indices[i]; + prevpos = pos; + i++; + pos = psShaderVar->fullName.find('[', prevpos); + } + oss << psShaderVar->fullName.substr(prevpos); + + return oss.str(); +} + +ResourceGroup ShaderInfo::ResourceTypeToResourceGroup(ResourceType eType) +{ + switch (eType) + { + case RTYPE_CBUFFER: + return RGROUP_CBUFFER; + + case RTYPE_SAMPLER: + return RGROUP_SAMPLER; + + case RTYPE_TEXTURE: + case RTYPE_BYTEADDRESS: + case RTYPE_STRUCTURED: + return RGROUP_TEXTURE; + + case RTYPE_UAV_RWTYPED: + case RTYPE_UAV_RWSTRUCTURED: + case RTYPE_UAV_RWBYTEADDRESS: + case RTYPE_UAV_APPEND_STRUCTURED: + case RTYPE_UAV_CONSUME_STRUCTURED: + case RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER: + return RGROUP_UAV; + + case RTYPE_TBUFFER: + ASSERT(0); // Need to find out which group this belongs to + return RGROUP_TEXTURE; + default: + break; + } + + ASSERT(0); + return RGROUP_CBUFFER; +} + +void ShaderInfo::AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info) +{ + if (info.empty()) + return; + + for (size_t i = 0; i < psResourceBindings.size(); i++) + { + ResourceBinding *rb = &psResourceBindings[i]; + if (rb->eType != RTYPE_SAMPLER && rb->eType != RTYPE_TEXTURE) + continue; + + HLSLccSamplerPrecisionInfo::iterator j = info.find(rb->name); // Try finding exact match + + // If match not found, check if name has "sampler" prefix + // -> try finding a match without the prefix (DX11 style sampler case) + if (j == info.end() && rb->name.compare(0, 7, "sampler") == 0) + j = info.find(rb->name.substr(7, rb->name.size() - 7)); + + if (j != info.end()) + rb->ePrecision = j->second; + } +} diff --git a/src/UseDefineChains.cpp b/src/UseDefineChains.cpp new file mode 100644 index 0000000..c1709ad --- /dev/null +++ b/src/UseDefineChains.cpp @@ -0,0 +1,887 @@ + +#include "internal_includes/UseDefineChains.h" +#include "internal_includes/debug.h" +#include "internal_includes/Instruction.h" + +#include "internal_includes/ControlFlowGraph.h" +#include "internal_includes/debug.h" +#include "internal_includes/HLSLccToolkit.h" +#include + +using HLSLcc::ForEachOperand; + +#define DEBUG_UDCHAINS 0 + +#if DEBUG_UDCHAINS +// Debug mode +static void UDCheckConsistencyDUChain(uint32_t idx, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions) +{ + DefineUseChain::iterator du = psDUChains[idx].begin(); + UseDefineChain::iterator ud = psUDChains[idx].begin(); + while (du != psDUChains[idx].end()) + { + ASSERT(du->index == idx % 4); + // Check that the definition actually writes to idx + { + uint32_t tempReg = idx / 4; + uint32_t offs = idx - (tempReg * 4); + uint32_t accessMask = 1 << offs; + uint32_t i; + int found = 0; + for (i = 0; i < du->psInst->ui32FirstSrc; i++) + { + if (du->psInst->asOperands[i].eType == OPERAND_TYPE_TEMP) + { + if (du->psInst->asOperands[i].ui32RegisterNumber == tempReg) + { + uint32_t writeMask = GetOperandWriteMask(&du->psInst->asOperands[i]); + if (writeMask & accessMask) + { + ASSERT(writeMask == du->writeMask); + found = 1; + break; + } + } + } + } + ASSERT(found); + } + + // Check that each usage of each definition also is found in the use-define chain + UsageSet::iterator ul = du->usages.begin(); + while (ul != du->usages.end()) + { + // Search for the usage in the chain + UseDefineChain::iterator use = ud; + while (use != psUDChains[idx].end() && &*use != *ul) + use++; + ASSERT(use != psUDChains[idx].end()); + ASSERT(&*use == *ul); + + // Check that the mapping back is also found + ASSERT(std::find(use->defines.begin(), use->defines.end(), &*du) != use->defines.end()); + + ul++; + } + + du++; + } +} + +static void UDCheckConsistencyUDChain(uint32_t idx, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions) +{ + DefineUseChain::iterator du = psDUChains[idx].begin(); + UseDefineChain::iterator ud = psUDChains[idx].begin(); + while (ud != psUDChains[idx].end()) + { + // Check that each definition of each usage also is found in the define-use chain + DefineSet::iterator dl = ud->defines.begin(); + ASSERT(ud->psOp->ui32RegisterNumber == idx / 4); + ASSERT(ud->index == idx % 4); + while (dl != ud->defines.end()) + { + // Search for the definition in the chain + DefineUseChain::iterator def = du; + while (def != psDUChains[idx].end() && &*def != *dl) + def++; + ASSERT(def != psDUChains[idx].end()); + ASSERT(&*def == *dl); + + // Check that the mapping back is also found + ASSERT(std::find(def->usages.begin(), def->usages.end(), &*ud) != def->usages.end()); + + dl++; + } + ud++; + } + +} + +static void UDCheckConsistency(uint32_t tempRegs, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions) +{ + uint32_t i; + for (i = 0; i < tempRegs * 4; i++) + { + UDCheckConsistencyDUChain(i, psDUChains, psUDChains, activeDefinitions); + UDCheckConsistencyUDChain(i, psDUChains, psUDChains, activeDefinitions); + } +} + +#define printf_console printf + +#endif + +using namespace HLSLcc::ControlFlow; +using std::for_each; + +static DefineUseChainEntry *GetOrCreateDefinition(const BasicBlock::Definition &def, DefineUseChain &psDUChain, uint32_t index) +{ + // Try to find an existing entry + auto itr = std::find_if(psDUChain.begin(), psDUChain.end(), [&](const DefineUseChainEntry &de) + { + return de.psInst == def.m_Instruction && de.psOp == def.m_Operand; + }); + + if (itr != psDUChain.end()) + { + return &(*itr); + } + + // Not found, create + psDUChain.push_front(DefineUseChainEntry()); + DefineUseChainEntry &de = *psDUChain.begin(); + + de.psInst = (Instruction *)def.m_Instruction; + de.psOp = (Operand *)def.m_Operand; + de.index = index; + de.writeMask = def.m_Operand->GetAccessMask(); + de.psSiblings[index] = &de; + + return &de; +} + + + +// Do flow control analysis on the instructions and build the define-use and use-define chains +void BuildUseDefineChains(std::vector &instructions, uint32_t ui32NumTemps, DefineUseChains &psDUChain, UseDefineChains &psUDChain, HLSLcc::ControlFlow::ControlFlowGraph &cfg) +{ + + Instruction *psFirstInstruction = &instructions[0]; + Instruction *psLastInstruction = &instructions[instructions.size() - 1]; + + ActiveDefinitions lastSeenDefinitions(ui32NumTemps * 4, NULL); // Array of pointers to the currently active definition for each temp + + psDUChain.clear(); + psUDChain.clear(); + + for (uint32_t i = 0; i < ui32NumTemps * 4; i++) + { + psUDChain.insert(std::make_pair(i, UseDefineChain())); + psDUChain.insert(std::make_pair(i, DefineUseChain())); + } + + const ControlFlowGraph::BasicBlockStorage &blocks = cfg.AllBlocks(); + + // Loop through each block, first calculate the union of all the reachables of all preceding blocks + // and then build on that as we go along the basic block instructions + for_each(blocks.begin(), blocks.end(), [&](const HLSLcc::shared_ptr &bptr) + { + const BasicBlock &b = *bptr.get(); + BasicBlock::ReachableVariables rvars; + for_each(b.Preceding().begin(), b.Preceding().end(), [&](const Instruction *precBlock) + { + const BasicBlock &b = *cfg.GetBasicBlockForInstruction(precBlock); + BasicBlock::RVarUnion(rvars, b.Reachable()); + }); + + // Now we have a Reachable set for the beginning of this block in rvars. Loop through all instructions and their operands and pick up uses and definitions + for (const Instruction *inst = b.First(); inst <= b.Last(); inst++) + { + // Process sources first + ForEachOperand(inst, inst+1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND, + [&](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType) + { + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + uint32_t tempReg = psOperand->ui32RegisterNumber; + uint32_t accessMask = psOperand->GetAccessMask(); + + // Go through each component + for (int k = 0; k < 4; k++) + { + if (!(accessMask & (1 << k))) + continue; + + uint32_t regIdx = tempReg * 4 + k; + + // Add an use for all visible definitions + psUDChain[regIdx].push_front(UseDefineChainEntry()); + UseDefineChainEntry &ue = *psUDChain[regIdx].begin(); + ue.psInst = (Instruction *)psInst; + ue.psOp = (Operand *)psOperand; + ue.accessMask = accessMask; + ue.index = k; + ue.psSiblings[k] = &ue; + // ue.siblings will be filled out later. + + BasicBlock::ReachableDefinitionsPerVariable& rpv = rvars[regIdx]; + for_each(rpv.begin(), rpv.end(), [&](const BasicBlock::Definition &def) + { + DefineUseChainEntry *duentry = GetOrCreateDefinition(def, psDUChain[regIdx], k); + ue.defines.insert(duentry); + duentry->usages.insert(&ue); + }); + } + return; + }); + + // Then the destination operands + ForEachOperand(inst, inst+1, FEO_FLAG_DEST_OPERAND, + [&](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType) + { + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + uint32_t tempReg = psOperand->ui32RegisterNumber; + uint32_t accessMask = psOperand->GetAccessMask(); + + // Go through each component + for (int k = 0; k < 4; k++) + { + if (!(accessMask & (1 << k))) + continue; + + uint32_t regIdx = tempReg * 4 + k; + + // Overwrite whatever's in rvars; they are killed by this + rvars[regIdx].clear(); + rvars[regIdx].insert(BasicBlock::Definition(psInst, psOperand)); + + // Make sure the definition gets created even though it doesn't have any uses at all + // (happens when sampling a texture but not all channels are used etc). + GetOrCreateDefinition(BasicBlock::Definition(psInst, psOperand), psDUChain[regIdx], k); + + } + return; + }); + } + }); + + // Connect the siblings for all uses and definitions + for_each(psUDChain.begin(), psUDChain.end(), [&](std::pair &udpair) + { + UseDefineChain &ud = udpair.second; + // Clear out the bottom 2 bits to get the actual base reg + uint32_t baseReg = udpair.first & ~(3); + + for_each(ud.begin(), ud.end(), [&](UseDefineChainEntry &ue) + { + ASSERT(baseReg / 4 == ue.psOp->ui32RegisterNumber); + + // Go through each component + for (int k = 0; k < 4; k++) + { + // Skip components that we don't access, or the one that's our own + if (!(ue.accessMask & (1 << k)) || ue.index == k) + continue; + + // Find the corresponding sibling. We can uniquely identify it by the operand pointer alone. + UseDefineChain::iterator siblItr = std::find_if(psUDChain[baseReg + k].begin(), psUDChain[baseReg + k].end(), [&](const UseDefineChainEntry &_sibl) -> bool { return _sibl.psOp == ue.psOp; }); + ASSERT(siblItr != psUDChain[baseReg + k].end()); + UseDefineChainEntry &sibling = *siblItr; + ue.psSiblings[k] = &sibling; + } + }); + }); + + // Same for definitions + for_each(psDUChain.begin(), psDUChain.end(), [&](std::pair &dupair) + { + DefineUseChain &du = dupair.second; + // Clear out the bottom 2 bits to get the actual base reg + uint32_t baseReg = dupair.first & ~(3); + + for_each(du.begin(), du.end(), [&](DefineUseChainEntry &de) + { + ASSERT(baseReg / 4 == de.psOp->ui32RegisterNumber); + + // Go through each component + for (int k = 0; k < 4; k++) + { + // Skip components that we don't access, or the one that's our own + if (!(de.writeMask & (1 << k)) || de.index == k) + continue; + + // Find the corresponding sibling. We can uniquely identify it by the operand pointer alone. + DefineUseChain::iterator siblItr = std::find_if(psDUChain[baseReg + k].begin(), psDUChain[baseReg + k].end(), [&](const DefineUseChainEntry &_sibl) -> bool { return _sibl.psOp == de.psOp; }); + ASSERT(siblItr != psDUChain[baseReg + k].end()); + DefineUseChainEntry &sibling = *siblItr; + de.psSiblings[k] = &sibling; + } + }); + }); + +#if DEBUG_UDCHAINS + UDCheckConsistency(ui32NumTemps, psDUChain, psUDChain, lastSeenDefinitions); +#endif +} + + +typedef std::vector SplitDefinitions; + +// Split out a define to use a new temp register +static void UDDoSplit(SplitDefinitions &defs, uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector &pui32SplitTable) +{ + uint32_t newReg = *psNumTemps; + uint32_t oldReg = defs[0]->psOp->ui32RegisterNumber; + uint32_t accessMask = defs[0]->writeMask; + uint32_t i, u32def; + uint32_t rebase, count; + uint32_t splitTableValue; + + ASSERT(defs.size() > 0); + for (i = 1; i < defs.size(); i++) + { + ASSERT(defs[i]->psOp->ui32RegisterNumber == oldReg); + accessMask |= defs[i]->writeMask; + } + + + (*psNumTemps)++; + + +#if DEBUG_UDCHAINS + UDCheckConsistency((*psNumTemps) - 1, psDUChains, psUDChains, ActiveDefinitions()); +#endif + ASSERT(accessMask != 0 && accessMask <= 0xf); + // Calculate rebase value and component count + rebase = 0; + count = 0; + i = accessMask; + while ((i & 1) == 0) + { + rebase++; + i = i >> 1; + } + while (i != 0) + { + count++; + i = i >> 1; + } + + // Make sure there's enough room in the split table + if (pui32SplitTable.size() <= newReg) + { + size_t newSize = pui32SplitTable.size() * 2; + pui32SplitTable.resize(newSize, 0xffffffff); + } + + // Set the original temp of the new register + { + uint32_t origTemp = oldReg; + while (pui32SplitTable[origTemp] != 0xffffffff) + origTemp = pui32SplitTable[origTemp] & 0xffff; + + ASSERT(rebase < 4); + ASSERT(count <= 4); + splitTableValue = (count << 24) | (rebase << 16) | origTemp; + + pui32SplitTable[newReg] = splitTableValue; + } + + // Insert the new temps to the map + for (i = newReg * 4; i < newReg * 4 + 4; i++) + { + psUDChains.insert(std::make_pair(i, UseDefineChain())); + psDUChains.insert(std::make_pair(i, DefineUseChain())); + } + + for (u32def = 0; u32def < defs.size(); u32def++) + { + DefineUseChainEntry *defineToSplit = defs[u32def]; + uint32_t oldIdx = defineToSplit->index; +#if DEBUG_UDCHAINS + printf("Split def at instruction %d (reg %d -> %d, access %X, rebase %d, count: %d)\n", (int)defineToSplit->psInst->id, oldReg, newReg, accessMask, rebase, count); +#endif + + // We may have moved the opcodes already because of multiple defines pointing to the same op + if (defineToSplit->psOp->ui32RegisterNumber != newReg) + { + ASSERT(defineToSplit->psOp->ui32RegisterNumber == oldReg); + // Update the declaration operand + // Don't change possible suboperands as they are sources + defineToSplit->psInst->ChangeOperandTempRegister(defineToSplit->psOp, oldReg, newReg, accessMask, UD_CHANGE_MAIN_OPERAND, rebase); + } + + defineToSplit->writeMask >>= rebase; + defineToSplit->index -= rebase; + // Change the temp register number for all usages + UsageSet::iterator ul = defineToSplit->usages.begin(); + while (ul != defineToSplit->usages.end()) + { + // Already updated by one of the siblings? Skip. + if ((*ul)->psOp->ui32RegisterNumber != newReg) + { + ASSERT((*ul)->psOp->ui32RegisterNumber == oldReg); + (*ul)->psInst->ChangeOperandTempRegister((*ul)->psOp, oldReg, newReg, accessMask, UD_CHANGE_MAIN_OPERAND, rebase); + } + + // Update the UD chain + { + UseDefineChain::iterator udLoc = psUDChains[oldReg * 4 + oldIdx].begin(); + while (udLoc != psUDChains[oldReg * 4 + oldIdx].end()) + { + if (&*udLoc == *ul) + { + // Move to new list + psUDChains[newReg * 4 + oldIdx - rebase].splice(psUDChains[newReg * 4 + oldIdx - rebase].begin(), psUDChains[oldReg * 4 + oldIdx], udLoc); + + if (rebase > 0) + { + (*ul)->accessMask >>= rebase; + (*ul)->index -= rebase; + memmove((*ul)->psSiblings, (*ul)->psSiblings + rebase, (4 - rebase) * sizeof(UseDefineChain *)); + } + break; + } + udLoc++; + } + } + + ul++; + } + + // Move the define out of the old chain (if its still there) + { + // Find the define in the old chain + DefineUseChain::iterator duLoc = psDUChains[oldReg * 4 + oldIdx].begin(); + while (duLoc != psDUChains[oldReg * 4 + oldIdx].end() && ((&*duLoc) != defineToSplit)) + { + duLoc++; + } + ASSERT(duLoc != psDUChains[oldReg * 4 + oldIdx].end()); + { + // Move directly to new chain + psDUChains[newReg * 4 + oldIdx - rebase].splice(psDUChains[newReg * 4 + oldIdx - rebase].begin(), psDUChains[oldReg * 4 + oldIdx], duLoc); + if (rebase != 0) + { + memmove(defineToSplit->psSiblings, defineToSplit->psSiblings + rebase, (4 - rebase) * sizeof(DefineUseChain *)); + } + } + + } + + } + +#if DEBUG_UDCHAINS + UDCheckConsistency(*psNumTemps, psDUChains, psUDChains, ActiveDefinitions()); +#endif +} + +// Adds a define and all its siblings to the list, checking duplicates +static void AddDefineToList(SplitDefinitions &defs, DefineUseChainEntry *newDef) +{ + uint32_t k; + for (k = 0; k < 4; k++) + { + if (newDef->psSiblings[k]) + { + DefineUseChainEntry *defToAdd = newDef->psSiblings[k]; + uint32_t m; + int defFound = 0; + for (m = 0; m < defs.size(); m++) + { + if (defs[m] == defToAdd) + { + defFound = 1; + break; + } + } + if (defFound == 0) + { + defs.push_back(newDef->psSiblings[k]); + } + } + } +} + +// Check if a set of definitions can be split and does the split. Returns nonzero if a split took place +static int AttemptSplitDefinitions(SplitDefinitions &defs, uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector &pui32SplitTable) +{ + uint32_t reg; + uint32_t combinedMask; + uint32_t i, k, u32def; + int canSplit = 1; + DefineUseChain::iterator du; + int hasLeftoverDefinitions = 0; + // Initial checks: all definitions must: + // Access the same register + // Have at least one definition in any of the 4 register slots that isn't included + if (defs.empty()) + return 0; + + reg = defs[0]->psOp->ui32RegisterNumber; + combinedMask = defs[0]->writeMask; + for (i = 1; i < defs.size(); i++) + { + if (reg != defs[i]->psOp->ui32RegisterNumber) + return 0; + + combinedMask |= defs[i]->writeMask; + } + for (i = 0; i < 4; i++) + { + du = psDUChains[reg * 4 + i].begin(); + while (du != psDUChains[reg * 4 + i].end()) + { + int defFound = 0; + for (k = 0; k < defs.size(); k++) + { + if (&*du == defs[k]) + { + defFound = 1; + break; + } + } + if (defFound == 0) + { + hasLeftoverDefinitions = 1; + break; + } + du++; + } + if (hasLeftoverDefinitions) + break; + } + // We'd be splitting the entire register and all its definitions, no point in that. + if (hasLeftoverDefinitions == 0) + return 0; + + // Check all the definitions. Any of them must not have any usages that see any definitions not in our defs array. + for (u32def = 0; u32def < defs.size(); u32def++) + { + DefineUseChainEntry *def = defs[u32def]; + + UsageSet::iterator ul = def->usages.begin(); + while (ul != def->usages.end()) + { + uint32_t j; + + // Check that we only read a subset of the combined writemask + if (((*ul)->accessMask & (~combinedMask)) != 0) + { + // Do an additional attempt, pick up all the sibling definitions as well + // Only do this if we have the space in the definitions table + for (j = 0; j < 4; j++) + { + if (((*ul)->accessMask & (1 << j)) == 0) + continue; + AddDefineToList(defs, *(*ul)->psSiblings[j]->defines.begin()); + } + return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable); + + } + + // It must have at least one declaration + ASSERT(!(*ul)->defines.empty()); + + // Check that all siblings for the usage use one of the definitions + for (j = 0; j < 4; j++) + { + uint32_t m; + int defineFound = 0; + if (((*ul)->accessMask & (1 << j)) == 0) + continue; + + ASSERT((*ul)->psSiblings[j] != NULL); + ASSERT(!(*ul)->psSiblings[j]->defines.empty()); + + // Check that all definitions for this usage are found from the definitions table + DefineSet::iterator dl = (*ul)->psSiblings[j]->defines.begin(); + while (dl != (*ul)->psSiblings[j]->defines.end()) + { + defineFound = 0; + for (m = 0; m < defs.size(); m++) + { + if (*dl == defs[m]) + { + defineFound = 1; + break; + } + } + if (defineFound == 0) + { + // Add this define and all its siblings to the table and try again + AddDefineToList(defs, *dl); + return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable); + canSplit = 0; + break; + } + + dl++; + } + + if (defineFound == 0) + { + canSplit = 0; + break; + } + } + if (canSplit == 0) + break; + + // This'll do, check next usage + ul++; + } + if (canSplit == 0) + break; + + } + if (canSplit) + { + UDDoSplit(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable); + return 1; + } + return 0; +} + +// Do temp splitting based on use-define chains +void UDSplitTemps(uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector &pui32SplitTable) +{ + // Algorithm overview: + // Take each definition and look at all its usages. If all usages only see this definition (and this is not the only definition for this variable), + // split it out. + uint32_t i; + uint32_t tempsAtStart = *psNumTemps; // We don't need to try to analyze the newly created ones, they're unsplittable by definition + for (i = 0; i < tempsAtStart * 4; i++) + { + // No definitions? + if (psDUChains[i].empty()) + continue; + + DefineUseChain::iterator du = psDUChains[i].begin(); + // Ok we have multiple definitions for a temp, check them through + while (du != psDUChains[i].end()) + { + SplitDefinitions sd; + AddDefineToList(sd, &*du); + du++; + // If we split, we'll have to start from the beginning of this chain because du might no longer be in this chain + if (AttemptSplitDefinitions(sd, psNumTemps, psDUChains, psUDChains, pui32SplitTable)) + { + du = psDUChains[i].begin(); + } + } + } +} + +// Returns nonzero if all the operands have partial precision and at least one of them has been downgraded as part of shader downgrading process. +// Sampler ops, bitwise ops and comparisons are ignored. +static int CanDowngradeDefinitionPrecision(DefineUseChain::iterator du, OPERAND_MIN_PRECISION *pType) +{ + Instruction *psInst = du->psInst; + int hasFullPrecOperands = 0; + uint32_t i; + + if (du->psOp->eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT) + return 0; + + switch (psInst->eOpcode) + { + case OPCODE_ADD: + case OPCODE_MUL: + case OPCODE_MOV: + case OPCODE_MAD: + case OPCODE_DIV: + case OPCODE_LOG: + case OPCODE_EXP: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_DP2: + case OPCODE_DP2ADD: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_RSQ: + case OPCODE_SQRT: + break; + default: + return 0; + } + + for (i = psInst->ui32FirstSrc; i < psInst->ui32NumOperands; i++) + { + Operand *op = &psInst->asOperands[i]; + if (op->eType == OPERAND_TYPE_IMMEDIATE32) + continue; // Immediate values are ignored + + if (op->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT) + { + hasFullPrecOperands = 1; + break; + } + } + + if (hasFullPrecOperands) + return 0; + + if (pType) + *pType = OPERAND_MIN_PRECISION_FLOAT_16; // Don't go lower than mediump + + return 1; +} + +// Returns true if all the usages of this definitions are instructions that deal with floating point data +static bool HasOnlyFloatUsages(DefineUseChain::iterator du) +{ + UsageSet::iterator itr = du->usages.begin(); + for (; itr != du->usages.end(); itr++) + { + Instruction *psInst = (*itr)->psInst; + + if ((*itr)->psOp->eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT) + return false; + + switch (psInst->eOpcode) + { + case OPCODE_ADD: + case OPCODE_MUL: + case OPCODE_MOV: + case OPCODE_MAD: + case OPCODE_DIV: + case OPCODE_LOG: + case OPCODE_EXP: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_DP2: + case OPCODE_DP2ADD: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_RSQ: + case OPCODE_SQRT: + break; + default: + return false; + } + } + return true; +} + +// Based on the sampler precisions, downgrade the definitions if possible. +void UpdateSamplerPrecisions(const ShaderInfo &info, DefineUseChains &psDUChains, uint32_t ui32NumTemps) +{ + uint32_t madeProgress = 0; + do + { + uint32_t i; + madeProgress = 0; + for (i = 0; i < ui32NumTemps * 4; i++) + { + DefineUseChain::iterator du = psDUChains[i].begin(); + while (du != psDUChains[i].end()) + { + OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT; + if ((du->psInst->IsPartialPrecisionSamplerInstruction(info, &sType) + || CanDowngradeDefinitionPrecision(du, &sType)) + && du->psInst->asOperands[0].eType == OPERAND_TYPE_TEMP + && du->psInst->asOperands[0].eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT + && du->isStandalone + && HasOnlyFloatUsages(du)) + { + uint32_t sibl; + // Ok we can change the precision. + ASSERT(du->psOp->eType == OPERAND_TYPE_TEMP); + ASSERT(sType != OPERAND_MIN_PRECISION_DEFAULT); + du->psOp->eMinPrecision = sType; + + // Update all the uses of all the siblings + for (sibl = 0; sibl < 4; sibl++) + { + if (!du->psSiblings[sibl]) + continue; + + UsageSet::iterator ul = du->psSiblings[sibl]->usages.begin(); + while (ul != du->psSiblings[sibl]->usages.end()) + { + ASSERT((*ul)->psOp->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT || + (*ul)->psOp->eMinPrecision == sType); + // We may well write this multiple times to the same op but that's fine. + (*ul)->psOp->eMinPrecision = sType; + + ul++; + } + } + madeProgress = 1; + } + du++; + } + } + } while (madeProgress != 0); + +} + +void CalculateStandaloneDefinitions(DefineUseChains &psDUChains, uint32_t ui32NumTemps) +{ + uint32_t i; + for (i = 0; i < ui32NumTemps * 4; i++) + { + DefineUseChain::iterator du = psDUChains[i].begin(); + while (du != psDUChains[i].end()) + { + uint32_t sibl; + int isStandalone = 1; + if (du->isStandalone) + { + du++; + continue; + } + + for (sibl = 0; sibl < 4; sibl++) + { + if (!du->psSiblings[sibl]) + continue; + + UsageSet::iterator ul = du->psSiblings[sibl]->usages.begin(); + while (ul != du->psSiblings[sibl]->usages.end()) + { + uint32_t k; + ASSERT(!(*ul)->defines.empty()); + + // Need to check that all the siblings of this usage only see this definition's corresponding sibling + for (k = 0; k < 4; k++) + { + if (!(*ul)->psSiblings[k]) + continue; + + if ((*ul)->psSiblings[k]->defines.size() > 1 + || *(*ul)->psSiblings[k]->defines.begin() != du->psSiblings[k]) + { + isStandalone = 0; + break; + } + } + if (isStandalone == 0) + break; + + ul++; + } + if (isStandalone == 0) + break; + } + + if (isStandalone) + { + // Yep, mark it + for (sibl = 0; sibl < 4; sibl++) + { + if (!du->psSiblings[sibl]) + continue; + du->psSiblings[sibl]->isStandalone = 1; + } + } + du++; + } + } +} + +// Write the uses and defines back to Instruction and Operand member lists. +void WriteBackUsesAndDefines(DefineUseChains &psDUChains) +{ + using namespace std; + // Loop through the whole data structure, and write usages and defines to Instructions and Operands as we see them + for_each(psDUChains.begin(), psDUChains.end(), [](const DefineUseChains::value_type &itr) + { + const DefineUseChain &duChain = itr.second; + for_each(duChain.begin(), duChain.end(), [](const DefineUseChain::value_type &du) + { + for_each(du.usages.begin(), du.usages.end(), [&du](const UseDefineChainEntry *usage) + { + // Update instruction use list + du.psInst->m_Uses.push_back(Instruction::Use(usage->psInst, usage->psOp)); + // And the usage's definition + usage->psOp->m_Defines.push_back(Operand::Define(du.psInst, du.psOp)); + + }); + }); + }); +} diff --git a/src/cbstring/bsafe.c b/src/cbstring/bsafe.c new file mode 100644 index 0000000..2a4cf1f --- /dev/null +++ b/src/cbstring/bsafe.c @@ -0,0 +1,85 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause + * BSD open source license or GPL v2.0. Refer to the accompanying documentation + * for details on usage and license. + */ + +/* + * bsafe.c + * + * This is an optional module that can be used to help enforce a safety + * standard based on pervasive usage of bstrlib. This file is not necessarily + * portable, however, it has been tested to work correctly with Intel's C/C++ + * compiler, WATCOM C/C++ v11.x and Microsoft Visual C++. + */ + +#include +#include +#include "bsafe.h" + +static int bsafeShouldExit = 1; + +#if 0 +char * strcpy (char *dst, const char *src); +char * strcat (char *dst, const char *src); + +char * strcpy (char *dst, const char *src) { + dst = dst; + src = src; + fprintf (stderr, "bsafe error: strcpy() is not safe, use bstrcpy instead.\n"); + if (bsafeShouldExit) exit (-1); + return NULL; +} + +char * strcat (char *dst, const char *src) { + dst = dst; + src = src; + fprintf (stderr, "bsafe error: strcat() is not safe, use bstrcat instead.\n"); + if (bsafeShouldExit) exit (-1); + return NULL; +} + +#if !defined (__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310)) +char * (gets) (char * buf) { + buf = buf; + fprintf (stderr, "bsafe error: gets() is not safe, use bgets.\n"); + if (bsafeShouldExit) exit (-1); + return NULL; +} +#endif + +char * (strncpy) (char *dst, const char *src, size_t n) { + dst = dst; + src = src; + n = n; + fprintf (stderr, "bsafe error: strncpy() is not safe, use bmidstr instead.\n"); + if (bsafeShouldExit) exit (-1); + return NULL; +} + +char * (strncat) (char *dst, const char *src, size_t n) { + dst = dst; + src = src; + n = n; + fprintf (stderr, "bsafe error: strncat() is not safe, use bstrcat then btrunc\n\tor cstr2tbstr, btrunc then bstrcat instead.\n"); + if (bsafeShouldExit) exit (-1); + return NULL; +} + +char * (strtok) (char *s1, const char *s2) { + s1 = s1; + s2 = s2; + fprintf (stderr, "bsafe error: strtok() is not safe, use bsplit or bsplits instead.\n"); + if (bsafeShouldExit) exit (-1); + return NULL; +} + +char * (strdup) (const char *s) { + s = s; + fprintf (stderr, "bsafe error: strdup() is not safe, use bstrcpy.\n"); + if (bsafeShouldExit) exit (-1); + return NULL; +} + +#endif diff --git a/src/cbstring/bsafe.h b/src/cbstring/bsafe.h new file mode 100644 index 0000000..eb41ec2 --- /dev/null +++ b/src/cbstring/bsafe.h @@ -0,0 +1,43 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause + * BSD open source license or GPL v2.0. Refer to the accompanying documentation + * for details on usage and license. + */ + +/* + * bsafe.h + * + * This is an optional module that can be used to help enforce a safety + * standard based on pervasive usage of bstrlib. This file is not necessarily + * portable, however, it has been tested to work correctly with Intel's C/C++ + * compiler, WATCOM C/C++ v11.x and Microsoft Visual C++. + */ + +#ifndef BSTRLIB_BSAFE_INCLUDE +#define BSTRLIB_BSAFE_INCLUDE + +#ifdef __cplusplus +extern "C" { +#endif + +#if !defined (__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310)) +/* This is caught in the linker, so its not necessary for gcc. */ +extern char * (gets) (char * buf); +#endif + +extern char * (strncpy) (char *dst, const char *src, size_t n); +extern char * (strncat) (char *dst, const char *src, size_t n); +extern char * (strtok) (char *s1, const char *s2); +extern char * (strdup) (const char *s); + +#undef strcpy +#undef strcat +#define strcpy(a,b) bsafe_strcpy(a,b) +#define strcat(a,b) bsafe_strcat(a,b) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/cbstring/bstraux.c b/src/cbstring/bstraux.c new file mode 100644 index 0000000..975932c --- /dev/null +++ b/src/cbstring/bstraux.c @@ -0,0 +1,1133 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause + * BSD open source license or GPL v2.0. Refer to the accompanying documentation + * for details on usage and license. + */ + +/* + * bstraux.c + * + * This file is not necessarily part of the core bstring library itself, but + * is just an auxilliary module which includes miscellaneous or trivial + * functions. + */ + +#include +#include +#include +#include +#include +#include "bstrlib.h" +#include "bstraux.h" + +/* bstring bTail (bstring b, int n) + * + * Return with a string of the last n characters of b. + */ +bstring bTail (bstring b, int n) { + if (b == NULL || n < 0 || (b->mlen < b->slen && b->mlen > 0)) return NULL; + if (n >= b->slen) return bstrcpy (b); + return bmidstr (b, b->slen - n, n); +} + +/* bstring bHead (bstring b, int n) + * + * Return with a string of the first n characters of b. + */ +bstring bHead (bstring b, int n) { + if (b == NULL || n < 0 || (b->mlen < b->slen && b->mlen > 0)) return NULL; + if (n >= b->slen) return bstrcpy (b); + return bmidstr (b, 0, n); +} + +/* int bFill (bstring a, char c, int len) + * + * Fill a given bstring with the character in parameter c, for a length n. + */ +int bFill (bstring b, char c, int len) { + if (b == NULL || len < 0 || (b->mlen < b->slen && b->mlen > 0)) return -__LINE__; + b->slen = 0; + return bsetstr (b, len, NULL, c); +} + +/* int bReplicate (bstring b, int n) + * + * Replicate the contents of b end to end n times and replace it in b. + */ +int bReplicate (bstring b, int n) { + return bpattern (b, n * b->slen); +} + +/* int bReverse (bstring b) + * + * Reverse the contents of b in place. + */ +int bReverse (bstring b) { +int i, n, m; +unsigned char t; + + if (b == NULL || b->slen < 0 || b->mlen < b->slen) return -__LINE__; + n = b->slen; + if (2 <= n) { + m = ((unsigned)n) >> 1; + n--; + for (i=0; i < m; i++) { + t = b->data[n - i]; + b->data[n - i] = b->data[i]; + b->data[i] = t; + } + } + return 0; +} + +/* int bInsertChrs (bstring b, int pos, int len, unsigned char c, unsigned char fill) + * + * Insert a repeated sequence of a given character into the string at + * position pos for a length len. + */ +int bInsertChrs (bstring b, int pos, int len, unsigned char c, unsigned char fill) { + if (b == NULL || b->slen < 0 || b->mlen < b->slen || pos < 0 || len <= 0) return -__LINE__; + + if (pos > b->slen + && 0 > bsetstr (b, pos, NULL, fill)) return -__LINE__; + + if (0 > balloc (b, b->slen + len)) return -__LINE__; + if (pos < b->slen) memmove (b->data + pos + len, b->data + pos, b->slen - pos); + memset (b->data + pos, c, len); + b->slen += len; + b->data[b->slen] = (unsigned char) '\0'; + return BSTR_OK; +} + +/* int bJustifyLeft (bstring b, int space) + * + * Left justify a string. + */ +int bJustifyLeft (bstring b, int space) { +int j, i, s, t; +unsigned char c = (unsigned char) space; + + if (b == NULL || b->slen < 0 || b->mlen < b->slen) return -__LINE__; + if (space != (int) c) return BSTR_OK; + + for (s=j=i=0; i < b->slen; i++) { + t = s; + s = c != (b->data[j] = b->data[i]); + j += (t|s); + } + if (j > 0 && b->data[j-1] == c) j--; + + b->data[j] = (unsigned char) '\0'; + b->slen = j; + return BSTR_OK; +} + +/* int bJustifyRight (bstring b, int width, int space) + * + * Right justify a string to within a given width. + */ +int bJustifyRight (bstring b, int width, int space) { +int ret; + if (width <= 0) return -__LINE__; + if (0 > (ret = bJustifyLeft (b, space))) return ret; + if (b->slen <= width) + return bInsertChrs (b, 0, width - b->slen, (unsigned char) space, (unsigned char) space); + return BSTR_OK; +} + +/* int bJustifyCenter (bstring b, int width, int space) + * + * Center a string's non-white space characters to within a given width by + * inserting whitespaces at the beginning. + */ +int bJustifyCenter (bstring b, int width, int space) { +int ret; + if (width <= 0) return -__LINE__; + if (0 > (ret = bJustifyLeft (b, space))) return ret; + if (b->slen <= width) + return bInsertChrs (b, 0, (width - b->slen + 1) >> 1, (unsigned char) space, (unsigned char) space); + return BSTR_OK; +} + +/* int bJustifyMargin (bstring b, int width, int space) + * + * Stretch a string to flush against left and right margins by evenly + * distributing additional white space between words. If the line is too + * long to be margin justified, it is left justified. + */ +int bJustifyMargin (bstring b, int width, int space) { +struct bstrList * sl; +int i, l, c; + + if (b == NULL || b->slen < 0 || b->mlen == 0 || b->mlen < b->slen) return -__LINE__; + if (NULL == (sl = bsplit (b, (unsigned char) space))) return -__LINE__; + for (l=c=i=0; i < sl->qty; i++) { + if (sl->entry[i]->slen > 0) { + c ++; + l += sl->entry[i]->slen; + } + } + + if (l + c >= width || c < 2) { + bstrListDestroy (sl); + return bJustifyLeft (b, space); + } + + b->slen = 0; + for (i=0; i < sl->qty; i++) { + if (sl->entry[i]->slen > 0) { + if (b->slen > 0) { + int s = (width - l + (c / 2)) / c; + bInsertChrs (b, b->slen, s, (unsigned char) space, (unsigned char) space); + l += s; + } + bconcat (b, sl->entry[i]); + c--; + if (c <= 0) break; + } + } + + bstrListDestroy (sl); + return BSTR_OK; +} + +static size_t readNothing (void *buff, size_t elsize, size_t nelem, void *parm) { + buff = buff; + elsize = elsize; + nelem = nelem; + parm = parm; + return 0; /* Immediately indicate EOF. */ +} + +/* struct bStream * bsFromBstr (const_bstring b); + * + * Create a bStream whose contents are a copy of the bstring passed in. + * This allows the use of all the bStream APIs with bstrings. + */ +struct bStream * bsFromBstr (const_bstring b) { +struct bStream * s = bsopen ((bNread) readNothing, NULL); + bsunread (s, b); /* Push the bstring data into the empty bStream. */ + return s; +} + +static size_t readRef (void *buff, size_t elsize, size_t nelem, void *parm) { +struct tagbstring * t = (struct tagbstring *) parm; +size_t tsz = elsize * nelem; + + if (tsz > (size_t) t->slen) tsz = (size_t) t->slen; + if (tsz > 0) { + memcpy (buff, t->data, tsz); + t->slen -= (int) tsz; + t->data += tsz; + return tsz / elsize; + } + return 0; +} + +/* The "by reference" version of the above function. This function puts + * a number of restrictions on the call site (the passed in struct + * tagbstring *will* be modified by this function, and the source data + * must remain alive and constant for the lifetime of the bStream). + * Hence it is not presented as an extern. + */ +static struct bStream * bsFromBstrRef (struct tagbstring * t) { + if (!t) return NULL; + return bsopen ((bNread) readRef, t); +} + +/* char * bStr2NetStr (const_bstring b) + * + * Convert a bstring to a netstring. See + * http://cr.yp.to/proto/netstrings.txt for a description of netstrings. + * Note: 1) The value returned should be freed with a call to bcstrfree() at + * the point when it will no longer be referenced to avoid a memory + * leak. + * 2) If the returned value is non-NULL, then it also '\0' terminated + * in the character position one past the "," terminator. + */ +char * bStr2NetStr (const_bstring b) { +char strnum[sizeof (b->slen) * 3 + 1]; +bstring s; +unsigned char * buff; + + if (b == NULL || b->data == NULL || b->slen < 0) return NULL; + sprintf (strnum, "%d:", b->slen); + if (NULL == (s = bfromcstr (strnum)) + || bconcat (s, b) == BSTR_ERR || bconchar (s, (char) ',') == BSTR_ERR) { + bdestroy (s); + return NULL; + } + buff = s->data; + bcstrfree ((char *) s); + return (char *) buff; +} + +/* bstring bNetStr2Bstr (const char * buf) + * + * Convert a netstring to a bstring. See + * http://cr.yp.to/proto/netstrings.txt for a description of netstrings. + * Note that the terminating "," *must* be present, however a following '\0' + * is *not* required. + */ +bstring bNetStr2Bstr (const char * buff) { +int i, x; +bstring b; + if (buff == NULL) return NULL; + x = 0; + for (i=0; buff[i] != ':'; i++) { + unsigned int v = buff[i] - '0'; + if (v > 9 || x > ((INT_MAX - (signed int)v) / 10)) return NULL; + x = (x * 10) + v; + } + + /* This thing has to be properly terminated */ + if (buff[i + 1 + x] != ',') return NULL; + + if (NULL == (b = bfromcstr (""))) return NULL; + if (balloc (b, x + 1) != BSTR_OK) { + bdestroy (b); + return NULL; + } + memcpy (b->data, buff + i + 1, x); + b->data[x] = (unsigned char) '\0'; + b->slen = x; + return b; +} + +static char b64ETable[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +/* bstring bBase64Encode (const_bstring b) + * + * Generate a base64 encoding. See: RFC1341 + */ +bstring bBase64Encode (const_bstring b) { +int i, c0, c1, c2, c3; +bstring out; + + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + + out = bfromcstr (""); + for (i=0; i + 2 < b->slen; i += 3) { + if (i && ((i % 57) == 0)) { + if (bconchar (out, (char) '\015') < 0 || bconchar (out, (char) '\012') < 0) { + bdestroy (out); + return NULL; + } + } + c0 = b->data[i] >> 2; + c1 = ((b->data[i] << 4) | + (b->data[i+1] >> 4)) & 0x3F; + c2 = ((b->data[i+1] << 2) | + (b->data[i+2] >> 6)) & 0x3F; + c3 = b->data[i+2] & 0x3F; + if (bconchar (out, b64ETable[c0]) < 0 || + bconchar (out, b64ETable[c1]) < 0 || + bconchar (out, b64ETable[c2]) < 0 || + bconchar (out, b64ETable[c3]) < 0) { + bdestroy (out); + return NULL; + } + } + + if (i && ((i % 57) == 0)) { + if (bconchar (out, (char) '\015') < 0 || bconchar (out, (char) '\012') < 0) { + bdestroy (out); + return NULL; + } + } + + switch (i + 2 - b->slen) { + case 0: c0 = b->data[i] >> 2; + c1 = ((b->data[i] << 4) | + (b->data[i+1] >> 4)) & 0x3F; + c2 = (b->data[i+1] << 2) & 0x3F; + if (bconchar (out, b64ETable[c0]) < 0 || + bconchar (out, b64ETable[c1]) < 0 || + bconchar (out, b64ETable[c2]) < 0 || + bconchar (out, (char) '=') < 0) { + bdestroy (out); + return NULL; + } + break; + case 1: c0 = b->data[i] >> 2; + c1 = (b->data[i] << 4) & 0x3F; + if (bconchar (out, b64ETable[c0]) < 0 || + bconchar (out, b64ETable[c1]) < 0 || + bconchar (out, (char) '=') < 0 || + bconchar (out, (char) '=') < 0) { + bdestroy (out); + return NULL; + } + break; + case 2: break; + } + + return out; +} + +#define B64_PAD (-2) +#define B64_ERR (-1) + +static int base64DecodeSymbol (unsigned char alpha) { + if ((alpha >= 'A') && (alpha <= 'Z')) return (int)(alpha - 'A'); + else if ((alpha >= 'a') && (alpha <= 'z')) + return 26 + (int)(alpha - 'a'); + else if ((alpha >= '0') && (alpha <= '9')) + return 52 + (int)(alpha - '0'); + else if (alpha == '+') return 62; + else if (alpha == '/') return 63; + else if (alpha == '=') return B64_PAD; + else return B64_ERR; +} + +/* bstring bBase64DecodeEx (const_bstring b, int * boolTruncError) + * + * Decode a base64 block of data. All MIME headers are assumed to have been + * removed. See: RFC1341 + */ +bstring bBase64DecodeEx (const_bstring b, int * boolTruncError) { +int i, v; +unsigned char c0, c1, c2; +bstring out; + + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + if (boolTruncError) *boolTruncError = 0; + out = bfromcstr (""); + i = 0; + for (;;) { + do { + if (i >= b->slen) return out; + if (b->data[i] == '=') { /* Bad "too early" truncation */ + if (boolTruncError) { + *boolTruncError = 1; + return out; + } + bdestroy (out); + return NULL; + } + v = base64DecodeSymbol (b->data[i]); + i++; + } while (v < 0); + c0 = (unsigned char) (v << 2); + do { + if (i >= b->slen || b->data[i] == '=') { /* Bad "too early" truncation */ + if (boolTruncError) { + *boolTruncError = 1; + return out; + } + bdestroy (out); + return NULL; + } + v = base64DecodeSymbol (b->data[i]); + i++; + } while (v < 0); + c0 |= (unsigned char) (v >> 4); + c1 = (unsigned char) (v << 4); + do { + if (i >= b->slen) { + if (boolTruncError) { + *boolTruncError = 1; + return out; + } + bdestroy (out); + return NULL; + } + if (b->data[i] == '=') { + i++; + if (i >= b->slen || b->data[i] != '=' || bconchar (out, c0) < 0) { + if (boolTruncError) { + *boolTruncError = 1; + return out; + } + bdestroy (out); /* Missing "=" at the end. */ + return NULL; + } + return out; + } + v = base64DecodeSymbol (b->data[i]); + i++; + } while (v < 0); + c1 |= (unsigned char) (v >> 2); + c2 = (unsigned char) (v << 6); + do { + if (i >= b->slen) { + if (boolTruncError) { + *boolTruncError = 1; + return out; + } + bdestroy (out); + return NULL; + } + if (b->data[i] == '=') { + if (bconchar (out, c0) < 0 || bconchar (out, c1) < 0) { + if (boolTruncError) { + *boolTruncError = 1; + return out; + } + bdestroy (out); + return NULL; + } + if (boolTruncError) *boolTruncError = 0; + return out; + } + v = base64DecodeSymbol (b->data[i]); + i++; + } while (v < 0); + c2 |= (unsigned char) (v); + if (bconchar (out, c0) < 0 || + bconchar (out, c1) < 0 || + bconchar (out, c2) < 0) { + if (boolTruncError) { + *boolTruncError = -1; + return out; + } + bdestroy (out); + return NULL; + } + } +} + +#define UU_DECODE_BYTE(b) (((b) == (signed int)'`') ? 0 : (b) - (signed int)' ') + +struct bUuInOut { + bstring src, dst; + int * badlines; +}; + +#define UU_MAX_LINELEN 45 + +static int bUuDecLine (void * parm, int ofs, int len) { +struct bUuInOut * io = (struct bUuInOut *) parm; +bstring s = io->src; +bstring t = io->dst; +int i, llen, otlen, ret, c0, c1, c2, c3, d0, d1, d2, d3; + + if (len == 0) return 0; + llen = UU_DECODE_BYTE (s->data[ofs]); + ret = 0; + + otlen = t->slen; + + if (((unsigned) llen) > UU_MAX_LINELEN) { ret = -__LINE__; + goto bl; + } + + llen += t->slen; + + for (i=1; i < s->slen && t->slen < llen;i += 4) { + unsigned char outoctet[3]; + c0 = UU_DECODE_BYTE (d0 = (int) bchare (s, i+ofs+0, ' ' - 1)); + c1 = UU_DECODE_BYTE (d1 = (int) bchare (s, i+ofs+1, ' ' - 1)); + c2 = UU_DECODE_BYTE (d2 = (int) bchare (s, i+ofs+2, ' ' - 1)); + c3 = UU_DECODE_BYTE (d3 = (int) bchare (s, i+ofs+3, ' ' - 1)); + + if (((unsigned) (c0|c1) >= 0x40)) { if (!ret) ret = -__LINE__; + if (d0 > 0x60 || (d0 < (' ' - 1) && !isspace (d0)) || + d1 > 0x60 || (d1 < (' ' - 1) && !isspace (d1))) { + t->slen = otlen; + goto bl; + } + c0 = c1 = 0; + } + outoctet[0] = (unsigned char) ((c0 << 2) | ((unsigned) c1 >> 4)); + if (t->slen+1 >= llen) { + if (0 > bconchar (t, (char) outoctet[0])) return -__LINE__; + break; + } + if ((unsigned) c2 >= 0x40) { if (!ret) ret = -__LINE__; + if (d2 > 0x60 || (d2 < (' ' - 1) && !isspace (d2))) { + t->slen = otlen; + goto bl; + } + c2 = 0; + } + outoctet[1] = (unsigned char) ((c1 << 4) | ((unsigned) c2 >> 2)); + if (t->slen+2 >= llen) { + if (0 > bcatblk (t, outoctet, 2)) return -__LINE__; + break; + } + if ((unsigned) c3 >= 0x40) { if (!ret) ret = -__LINE__; + if (d3 > 0x60 || (d3 < (' ' - 1) && !isspace (d3))) { + t->slen = otlen; + goto bl; + } + c3 = 0; + } + outoctet[2] = (unsigned char) ((c2 << 6) | ((unsigned) c3)); + if (0 > bcatblk (t, outoctet, 3)) return -__LINE__; + } + if (t->slen < llen) { if (0 == ret) ret = -__LINE__; + t->slen = otlen; + } + bl:; + if (ret && io->badlines) { + (*io->badlines)++; + return 0; + } + return ret; +} + +/* bstring bUuDecodeEx (const_bstring src, int * badlines) + * + * Performs a UUDecode of a block of data. If there are errors in the + * decoding, they are counted up and returned in "badlines", if badlines is + * not NULL. It is assumed that the "begin" and "end" lines have already + * been stripped off. The potential security problem of writing the + * filename in the begin line is something that is beyond the scope of a + * portable library. + */ + +#ifdef _MSC_VER +#pragma warning(disable:4204) +#endif + +bstring bUuDecodeEx (const_bstring src, int * badlines) { +struct tagbstring t; +struct bStream * s; +struct bStream * d; +bstring b; + + if (!src) return NULL; + t = *src; /* Short lifetime alias to header of src */ + s = bsFromBstrRef (&t); /* t is undefined after this */ + if (!s) return NULL; + d = bsUuDecode (s, badlines); + b = bfromcstralloc (256, ""); + if (NULL == b || 0 > bsread (b, d, INT_MAX)) { + bdestroy (b); + bsclose (d); + bsclose (s); + return NULL; + } + return b; +} + +struct bsUuCtx { + struct bUuInOut io; + struct bStream * sInp; +}; + +static size_t bsUuDecodePart (void *buff, size_t elsize, size_t nelem, void *parm) { +static struct tagbstring eol = bsStatic ("\r\n"); +struct bsUuCtx * luuCtx = (struct bsUuCtx *) parm; +size_t tsz; +int l, lret; + + if (NULL == buff || NULL == parm) return 0; + tsz = elsize * nelem; + + CheckInternalBuffer:; + /* If internal buffer has sufficient data, just output it */ + if (((size_t) luuCtx->io.dst->slen) > tsz) { + memcpy (buff, luuCtx->io.dst->data, tsz); + bdelete (luuCtx->io.dst, 0, (int) tsz); + return nelem; + } + + DecodeMore:; + if (0 <= (l = binchr (luuCtx->io.src, 0, &eol))) { + int ol = 0; + struct tagbstring t; + bstring s = luuCtx->io.src; + luuCtx->io.src = &t; + + do { + if (l > ol) { + bmid2tbstr (t, s, ol, l - ol); + lret = bUuDecLine (&luuCtx->io, 0, t.slen); + if (0 > lret) { + luuCtx->io.src = s; + goto Done; + } + } + ol = l + 1; + if (((size_t) luuCtx->io.dst->slen) > tsz) break; + l = binchr (s, ol, &eol); + } while (BSTR_ERR != l); + bdelete (s, 0, ol); + luuCtx->io.src = s; + goto CheckInternalBuffer; + } + + if (BSTR_ERR != bsreada (luuCtx->io.src, luuCtx->sInp, bsbufflength (luuCtx->sInp, BSTR_BS_BUFF_LENGTH_GET))) { + goto DecodeMore; + } + + bUuDecLine (&luuCtx->io, 0, luuCtx->io.src->slen); + + Done:; + /* Output any lingering data that has been translated */ + if (((size_t) luuCtx->io.dst->slen) > 0) { + if (((size_t) luuCtx->io.dst->slen) > tsz) goto CheckInternalBuffer; + memcpy (buff, luuCtx->io.dst->data, luuCtx->io.dst->slen); + tsz = luuCtx->io.dst->slen / elsize; + luuCtx->io.dst->slen = 0; + if (tsz > 0) return tsz; + } + + /* Deallocate once EOF becomes triggered */ + bdestroy (luuCtx->io.dst); + bdestroy (luuCtx->io.src); + free (luuCtx); + return 0; +} + +/* bStream * bsUuDecode (struct bStream * sInp, int * badlines) + * + * Creates a bStream which performs the UUDecode of an an input stream. If + * there are errors in the decoding, they are counted up and returned in + * "badlines", if badlines is not NULL. It is assumed that the "begin" and + * "end" lines have already been stripped off. The potential security + * problem of writing the filename in the begin line is something that is + * beyond the scope of a portable library. + */ + +struct bStream * bsUuDecode (struct bStream * sInp, int * badlines) { +struct bsUuCtx * luuCtx = (struct bsUuCtx *) malloc (sizeof (struct bsUuCtx)); +struct bStream * sOut; + + if (NULL == luuCtx) return NULL; + + luuCtx->io.src = bfromcstr (""); + luuCtx->io.dst = bfromcstr (""); + if (NULL == luuCtx->io.dst || NULL == luuCtx->io.src) { + CleanUpFailureToAllocate:; + bdestroy (luuCtx->io.dst); + bdestroy (luuCtx->io.src); + free (luuCtx); + return NULL; + } + luuCtx->io.badlines = badlines; + if (badlines) *badlines = 0; + + luuCtx->sInp = sInp; + + sOut = bsopen ((bNread) bsUuDecodePart, luuCtx); + if (NULL == sOut) goto CleanUpFailureToAllocate; + return sOut; +} + +#define UU_ENCODE_BYTE(b) (char) (((b) == 0) ? '`' : ((b) + ' ')) + +/* bstring bUuEncode (const_bstring src) + * + * Performs a UUEncode of a block of data. The "begin" and "end" lines are + * not appended. + */ +bstring bUuEncode (const_bstring src) { +bstring out; +int i, j, jm; +unsigned int c0, c1, c2; + if (src == NULL || src->slen < 0 || src->data == NULL) return NULL; + if ((out = bfromcstr ("")) == NULL) return NULL; + for (i=0; i < src->slen; i += UU_MAX_LINELEN) { + if ((jm = i + UU_MAX_LINELEN) > src->slen) jm = src->slen; + if (bconchar (out, UU_ENCODE_BYTE (jm - i)) < 0) { + bstrFree (out); + break; + } + for (j = i; j < jm; j += 3) { + c0 = (unsigned int) bchar (src, j ); + c1 = (unsigned int) bchar (src, j + 1); + c2 = (unsigned int) bchar (src, j + 2); + if (bconchar (out, UU_ENCODE_BYTE ( (c0 & 0xFC) >> 2)) < 0 || + bconchar (out, UU_ENCODE_BYTE (((c0 & 0x03) << 4) | ((c1 & 0xF0) >> 4))) < 0 || + bconchar (out, UU_ENCODE_BYTE (((c1 & 0x0F) << 2) | ((c2 & 0xC0) >> 6))) < 0 || + bconchar (out, UU_ENCODE_BYTE ( (c2 & 0x3F))) < 0) { + bstrFree (out); + goto End; + } + } + if (bconchar (out, (char) '\r') < 0 || bconchar (out, (char) '\n') < 0) { + bstrFree (out); + break; + } + } + End:; + return out; +} + +/* bstring bYEncode (const_bstring src) + * + * Performs a YEncode of a block of data. No header or tail info is + * appended. See: http://www.yenc.org/whatis.htm and + * http://www.yenc.org/yenc-draft.1.3.txt + */ +bstring bYEncode (const_bstring src) { +int i; +bstring out; +unsigned char c; + + if (src == NULL || src->slen < 0 || src->data == NULL) return NULL; + if ((out = bfromcstr ("")) == NULL) return NULL; + for (i=0; i < src->slen; i++) { + c = (unsigned char)(src->data[i] + 42); + if (c == '=' || c == '\0' || c == '\r' || c == '\n') { + if (0 > bconchar (out, (char) '=')) { + bdestroy (out); + return NULL; + } + c += (unsigned char) 64; + } + if (0 > bconchar (out, c)) { + bdestroy (out); + return NULL; + } + } + return out; +} + +/* bstring bYDecode (const_bstring src) + * + * Performs a YDecode of a block of data. See: + * http://www.yenc.org/whatis.htm and http://www.yenc.org/yenc-draft.1.3.txt + */ +#define MAX_OB_LEN (64) + +bstring bYDecode (const_bstring src) { +int i; +bstring out; +unsigned char c; +unsigned char octetbuff[MAX_OB_LEN]; +int obl; + + if (src == NULL || src->slen < 0 || src->data == NULL) return NULL; + if ((out = bfromcstr ("")) == NULL) return NULL; + + obl = 0; + + for (i=0; i < src->slen; i++) { + if ('=' == (c = src->data[i])) { /* The = escape mode */ + i++; + if (i >= src->slen) { + bdestroy (out); + return NULL; + } + c = (unsigned char) (src->data[i] - 64); + } else { + if ('\0' == c) { + bdestroy (out); + return NULL; + } + + /* Extraneous CR/LFs are to be ignored. */ + if (c == '\r' || c == '\n') continue; + } + + octetbuff[obl] = (unsigned char) ((int) c - 42); + obl++; + + if (obl >= MAX_OB_LEN) { + if (0 > bcatblk (out, octetbuff, obl)) { + bdestroy (out); + return NULL; + } + obl = 0; + } + } + + if (0 > bcatblk (out, octetbuff, obl)) { + bdestroy (out); + out = NULL; + } + return out; +} + +/* bstring bStrfTime (const char * fmt, const struct tm * timeptr) + * + * Takes a format string that is compatible with strftime and a struct tm + * pointer, formats the time according to the format string and outputs + * the bstring as a result. Note that if there is an early generation of a + * '\0' character, the bstring will be truncated to this end point. + */ +bstring bStrfTime (const char * fmt, const struct tm * timeptr) { +#if defined (__TURBOC__) && !defined (__BORLANDC__) +static struct tagbstring ns = bsStatic ("bStrfTime Not supported"); + fmt = fmt; + timeptr = timeptr; + return &ns; +#else +bstring buff; +int n; +size_t r; + + if (fmt == NULL) return NULL; + + /* Since the length is not determinable beforehand, a search is + performed using the truncating "strftime" call on increasing + potential sizes for the output result. */ + + if ((n = (int) (2*strlen (fmt))) < 16) n = 16; + buff = bfromcstralloc (n+2, ""); + + for (;;) { + if (BSTR_OK != balloc (buff, n + 2)) { + bdestroy (buff); + return NULL; + } + + r = strftime ((char *) buff->data, n + 1, fmt, timeptr); + + if (r > 0) { + buff->slen = (int) r; + break; + } + + n += n; + } + + return buff; +#endif +} + +/* int bSetCstrChar (bstring a, int pos, char c) + * + * Sets the character at position pos to the character c in the bstring a. + * If the character c is NUL ('\0') then the string is truncated at this + * point. Note: this does not enable any other '\0' character in the bstring + * as terminator indicator for the string. pos must be in the position + * between 0 and b->slen inclusive, otherwise BSTR_ERR will be returned. + */ +int bSetCstrChar (bstring b, int pos, char c) { + if (NULL == b || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen) + return BSTR_ERR; + if (pos < 0 || pos > b->slen) return BSTR_ERR; + + if (pos == b->slen) { + if ('\0' != c) return bconchar (b, c); + return 0; + } + + b->data[pos] = (unsigned char) c; + if ('\0' == c) b->slen = pos; + + return 0; +} + +/* int bSetChar (bstring b, int pos, char c) + * + * Sets the character at position pos to the character c in the bstring a. + * The string is not truncated if the character c is NUL ('\0'). pos must + * be in the position between 0 and b->slen inclusive, otherwise BSTR_ERR + * will be returned. + */ +int bSetChar (bstring b, int pos, char c) { + if (NULL == b || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen) + return BSTR_ERR; + if (pos < 0 || pos > b->slen) return BSTR_ERR; + + if (pos == b->slen) { + return bconchar (b, c); + } + + b->data[pos] = (unsigned char) c; + return 0; +} + +#define INIT_SECURE_INPUT_LENGTH (256) + +/* bstring bSecureInput (int maxlen, int termchar, + * bNgetc vgetchar, void * vgcCtx) + * + * Read input from an abstracted input interface, for a length of at most + * maxlen characters. If maxlen <= 0, then there is no length limit put + * on the input. The result is terminated early if vgetchar() return EOF + * or the user specified value termchar. + * + */ +bstring bSecureInput (int maxlen, int termchar, bNgetc vgetchar, void * vgcCtx) { +int i, m, c; +bstring b, t; + + if (!vgetchar) return NULL; + + b = bfromcstralloc (INIT_SECURE_INPUT_LENGTH, ""); + if ((c = UCHAR_MAX + 1) == termchar) c++; + + for (i=0; ; i++) { + if (termchar == c || (maxlen > 0 && i >= maxlen)) c = EOF; + else c = vgetchar (vgcCtx); + + if (EOF == c) break; + + if (i+1 >= b->mlen) { + + /* Double size, but deal with unusual case of numeric + overflows */ + + if ((m = b->mlen << 1) <= b->mlen && + (m = b->mlen + 1024) <= b->mlen && + (m = b->mlen + 16) <= b->mlen && + (m = b->mlen + 1) <= b->mlen) t = NULL; + else t = bfromcstralloc (m, ""); + + if (t) memcpy (t->data, b->data, i); + bSecureDestroy (b); /* Cleanse previous buffer */ + b = t; + if (!b) return b; + } + + b->data[i] = (unsigned char) c; + } + + b->slen = i; + b->data[i] = (unsigned char) '\0'; + return b; +} + +#define BWS_BUFF_SZ (1024) + +struct bwriteStream { + bstring buff; /* Buffer for underwrites */ + void * parm; /* The stream handle for core stream */ + bNwrite writeFn; /* fwrite work-a-like fnptr for core stream */ + int isEOF; /* track stream's EOF state */ + int minBuffSz; +}; + +/* struct bwriteStream * bwsOpen (bNwrite writeFn, void * parm) + * + * Wrap a given open stream (described by a fwrite work-a-like function + * pointer and stream handle) into an open bwriteStream suitable for write + * streaming functions. + */ +struct bwriteStream * bwsOpen (bNwrite writeFn, void * parm) { +struct bwriteStream * ws; + + if (NULL == writeFn) return NULL; + ws = (struct bwriteStream *) malloc (sizeof (struct bwriteStream)); + if (ws) { + if (NULL == (ws->buff = bfromcstr (""))) { + free (ws); + ws = NULL; + } else { + ws->parm = parm; + ws->writeFn = writeFn; + ws->isEOF = 0; + ws->minBuffSz = BWS_BUFF_SZ; + } + } + return ws; +} + +#define internal_bwswriteout(ws,b) { \ + if ((b)->slen > 0) { \ + if (1 != (ws->writeFn ((b)->data, (b)->slen, 1, ws->parm))) { \ + ws->isEOF = 1; \ + return BSTR_ERR; \ + } \ + } \ +} + +/* int bwsWriteFlush (struct bwriteStream * ws) + * + * Force any pending data to be written to the core stream. + */ +int bwsWriteFlush (struct bwriteStream * ws) { + if (NULL == ws || ws->isEOF || 0 >= ws->minBuffSz || + NULL == ws->writeFn || NULL == ws->buff) return BSTR_ERR; + internal_bwswriteout (ws, ws->buff); + ws->buff->slen = 0; + return 0; +} + +/* int bwsWriteBstr (struct bwriteStream * ws, const_bstring b) + * + * Send a bstring to a bwriteStream. If the stream is at EOF BSTR_ERR is + * returned. Note that there is no deterministic way to determine the exact + * cut off point where the core stream stopped accepting data. + */ +int bwsWriteBstr (struct bwriteStream * ws, const_bstring b) { +struct tagbstring t; +int l; + + if (NULL == ws || NULL == b || NULL == ws->buff || + ws->isEOF || 0 >= ws->minBuffSz || NULL == ws->writeFn) + return BSTR_ERR; + + /* Buffer prepacking optimization */ + if (b->slen > 0 && ws->buff->mlen - ws->buff->slen > b->slen) { + static struct tagbstring empty = bsStatic (""); + if (0 > bconcat (ws->buff, b)) return BSTR_ERR; + return bwsWriteBstr (ws, &empty); + } + + if (0 > (l = ws->minBuffSz - ws->buff->slen)) { + internal_bwswriteout (ws, ws->buff); + ws->buff->slen = 0; + l = ws->minBuffSz; + } + + if (b->slen < l) return bconcat (ws->buff, b); + + if (0 > bcatblk (ws->buff, b->data, l)) return BSTR_ERR; + internal_bwswriteout (ws, ws->buff); + ws->buff->slen = 0; + + bmid2tbstr (t, (bstring) b, l, b->slen); + + if (t.slen >= ws->minBuffSz) { + internal_bwswriteout (ws, &t); + return 0; + } + + return bassign (ws->buff, &t); +} + +/* int bwsWriteBlk (struct bwriteStream * ws, void * blk, int len) + * + * Send a block of data a bwriteStream. If the stream is at EOF BSTR_ERR is + * returned. + */ +int bwsWriteBlk (struct bwriteStream * ws, void * blk, int len) { +struct tagbstring t; + if (NULL == blk || len < 0) return BSTR_ERR; + blk2tbstr (t, blk, len); + return bwsWriteBstr (ws, &t); +} + +/* int bwsIsEOF (const struct bwriteStream * ws) + * + * Returns 0 if the stream is currently writable, 1 if the core stream has + * responded by not accepting the previous attempted write. + */ +int bwsIsEOF (const struct bwriteStream * ws) { + if (NULL == ws || NULL == ws->buff || 0 > ws->minBuffSz || + NULL == ws->writeFn) return BSTR_ERR; + return ws->isEOF; +} + +/* int bwsBuffLength (struct bwriteStream * ws, int sz) + * + * Set the length of the buffer used by the bwsStream. If sz is zero, the + * length is not set. This function returns with the previous length. + */ +int bwsBuffLength (struct bwriteStream * ws, int sz) { +int oldSz; + if (ws == NULL || sz < 0) return BSTR_ERR; + oldSz = ws->minBuffSz; + if (sz > 0) ws->minBuffSz = sz; + return oldSz; +} + +/* void * bwsClose (struct bwriteStream * s) + * + * Close the bwriteStream, and return the handle to the stream that was + * originally used to open the given stream. Note that even if the stream + * is at EOF it still needs to be closed with a call to bwsClose. + */ +void * bwsClose (struct bwriteStream * ws) { +void * parm; + if (NULL == ws || NULL == ws->buff || 0 >= ws->minBuffSz || + NULL == ws->writeFn) return NULL; + bwsWriteFlush (ws); + parm = ws->parm; + ws->parm = NULL; + ws->minBuffSz = -1; + ws->writeFn = NULL; + bstrFree (ws->buff); + free (ws); + return parm; +} + diff --git a/src/cbstring/bstraux.h b/src/cbstring/bstraux.h new file mode 100644 index 0000000..17d4ea7 --- /dev/null +++ b/src/cbstring/bstraux.h @@ -0,0 +1,112 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause + * BSD open source license or GPL v2.0. Refer to the accompanying documentation + * for details on usage and license. + */ + +/* + * bstraux.h + * + * This file is not a necessary part of the core bstring library itself, but + * is just an auxilliary module which includes miscellaneous or trivial + * functions. + */ + +#ifndef BSTRAUX_INCLUDE +#define BSTRAUX_INCLUDE + +#include +#include "bstrlib.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Safety mechanisms */ +#define bstrDeclare(b) bstring (b) = NULL; +#define bstrFree(b) {if ((b) != NULL && (b)->slen >= 0 && (b)->mlen >= (b)->slen) { bdestroy (b); (b) = NULL; }} + +/* Backward compatibilty with previous versions of Bstrlib */ +#define bAssign(a,b) ((bassign)((a), (b))) +#define bSubs(b,pos,len,a,c) ((breplace)((b),(pos),(len),(a),(unsigned char)(c))) +#define bStrchr(b,c) ((bstrchr)((b), (c))) +#define bStrchrFast(b,c) ((bstrchr)((b), (c))) +#define bCatCstr(b,s) ((bcatcstr)((b), (s))) +#define bCatBlk(b,s,len) ((bcatblk)((b),(s),(len))) +#define bCatStatic(b,s) bCatBlk ((b), ("" s ""), sizeof (s) - 1) +#define bTrunc(b,n) ((btrunc)((b), (n))) +#define bReplaceAll(b,find,repl,pos) ((bfindreplace)((b),(find),(repl),(pos))) +#define bUppercase(b) ((btoupper)(b)) +#define bLowercase(b) ((btolower)(b)) +#define bCaselessCmp(a,b) ((bstricmp)((a), (b))) +#define bCaselessNCmp(a,b,n) ((bstrnicmp)((a), (b), (n))) +#define bBase64Decode(b) (bBase64DecodeEx ((b), NULL)) +#define bUuDecode(b) (bUuDecodeEx ((b), NULL)) + +/* Unusual functions */ +extern struct bStream * bsFromBstr (const_bstring b); +extern bstring bTail (bstring b, int n); +extern bstring bHead (bstring b, int n); +extern int bSetCstrChar (bstring a, int pos, char c); +extern int bSetChar (bstring b, int pos, char c); +extern int bFill (bstring a, char c, int len); +extern int bReplicate (bstring b, int n); +extern int bReverse (bstring b); +extern int bInsertChrs (bstring b, int pos, int len, unsigned char c, unsigned char fill); +extern bstring bStrfTime (const char * fmt, const struct tm * timeptr); +#define bAscTime(t) (bStrfTime ("%c\n", (t))) +#define bCTime(t) ((t) ? bAscTime (localtime (t)) : NULL) + +/* Spacing formatting */ +extern int bJustifyLeft (bstring b, int space); +extern int bJustifyRight (bstring b, int width, int space); +extern int bJustifyMargin (bstring b, int width, int space); +extern int bJustifyCenter (bstring b, int width, int space); + +/* Esoteric standards specific functions */ +extern char * bStr2NetStr (const_bstring b); +extern bstring bNetStr2Bstr (const char * buf); +extern bstring bBase64Encode (const_bstring b); +extern bstring bBase64DecodeEx (const_bstring b, int * boolTruncError); +extern struct bStream * bsUuDecode (struct bStream * sInp, int * badlines); +extern bstring bUuDecodeEx (const_bstring src, int * badlines); +extern bstring bUuEncode (const_bstring src); +extern bstring bYEncode (const_bstring src); +extern bstring bYDecode (const_bstring src); + +/* Writable stream */ +typedef int (* bNwrite) (const void * buf, size_t elsize, size_t nelem, void * parm); + +struct bwriteStream * bwsOpen (bNwrite writeFn, void * parm); +int bwsWriteBstr (struct bwriteStream * stream, const_bstring b); +int bwsWriteBlk (struct bwriteStream * stream, void * blk, int len); +int bwsWriteFlush (struct bwriteStream * stream); +int bwsIsEOF (const struct bwriteStream * stream); +int bwsBuffLength (struct bwriteStream * stream, int sz); +void * bwsClose (struct bwriteStream * stream); + +/* Security functions */ +#define bSecureDestroy(b) { \ +bstring bstr__tmp = (b); \ + if (bstr__tmp && bstr__tmp->mlen > 0 && bstr__tmp->data) { \ + (void) memset (bstr__tmp->data, 0, (size_t) bstr__tmp->mlen); \ + bdestroy (bstr__tmp); \ + } \ +} +#define bSecureWriteProtect(t) { \ + if ((t).mlen >= 0) { \ + if ((t).mlen > (t).slen)) { \ + (void) memset ((t).data + (t).slen, 0, (size_t) (t).mlen - (t).slen); \ + } \ + (t).mlen = -1; \ + } \ +} +extern bstring bSecureInput (int maxlen, int termchar, + bNgetc vgetchar, void * vgcCtx); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/cbstring/bstrlib.c b/src/cbstring/bstrlib.c new file mode 100644 index 0000000..acc6b1a --- /dev/null +++ b/src/cbstring/bstrlib.c @@ -0,0 +1,2974 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause + * BSD open source license or GPL v2.0. Refer to the accompanying documentation + * for details on usage and license. + */ + +/* + * bstrlib.c + * + * This file is the core module for implementing the bstring functions. + */ + +#include +#include +#include +#include +#include +#include +#include "bstrlib.h" + +/* Optionally include a mechanism for debugging memory */ + +#if defined(MEMORY_DEBUG) || defined(BSTRLIB_MEMORY_DEBUG) +#include "memdbg.h" +#endif + +#ifndef bstr__alloc +#define bstr__alloc(x) malloc (x) +#endif + +#ifndef bstr__free +#define bstr__free(p) free (p) +#endif + +#ifndef bstr__realloc +#define bstr__realloc(p,x) realloc ((p), (x)) +#endif + +#ifndef bstr__memcpy +#define bstr__memcpy(d,s,l) memcpy ((d), (s), (l)) +#endif + +#ifndef bstr__memmove +#define bstr__memmove(d,s,l) memmove ((d), (s), (l)) +#endif + +#ifndef bstr__memset +#define bstr__memset(d,c,l) memset ((d), (c), (l)) +#endif + +#ifndef bstr__memcmp +#define bstr__memcmp(d,c,l) memcmp ((d), (c), (l)) +#endif + +#ifndef bstr__memchr +#define bstr__memchr(s,c,l) memchr ((s), (c), (l)) +#endif + +/* Just a length safe wrapper for memmove. */ + +#define bBlockCopy(D,S,L) { if ((L) > 0) bstr__memmove ((D),(S),(L)); } + +/* Compute the snapped size for a given requested size. By snapping to powers + of 2 like this, repeated reallocations are avoided. */ +static int snapUpSize (int i) { + if (i < 8) { + i = 8; + } else { + unsigned int j; + j = (unsigned int) i; + + j |= (j >> 1); + j |= (j >> 2); + j |= (j >> 4); + j |= (j >> 8); /* Ok, since int >= 16 bits */ +#if (UINT_MAX != 0xffff) + j |= (j >> 16); /* For 32 bit int systems */ +#if (UINT_MAX > 0xffffffffUL) + j |= (j >> 32); /* For 64 bit int systems */ +#endif +#endif + /* Least power of two greater than i */ + j++; + if ((int) j >= i) i = (int) j; + } + return i; +} + +/* int balloc (bstring b, int len) + * + * Increase the size of the memory backing the bstring b to at least len. + */ +int balloc (bstring b, int olen) { + int len; + if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen <= 0 || + b->mlen < b->slen || olen <= 0) { + return BSTR_ERR; + } + + if (olen >= b->mlen) { + unsigned char * x; + + if ((len = snapUpSize (olen)) <= b->mlen) return BSTR_OK; + + /* Assume probability of a non-moving realloc is 0.125 */ + if (7 * b->mlen < 8 * b->slen) { + + /* If slen is close to mlen in size then use realloc to reduce + the memory defragmentation */ + + reallocStrategy:; + + x = (unsigned char *) bstr__realloc (b->data, (size_t) len); + if (x == NULL) { + + /* Since we failed, try allocating the tighest possible + allocation */ + + if (NULL == (x = (unsigned char *) bstr__realloc (b->data, (size_t) (len = olen)))) { + return BSTR_ERR; + } + } + } else { + + /* If slen is not close to mlen then avoid the penalty of copying + the extra bytes that are allocated, but not considered part of + the string */ + + if (NULL == (x = (unsigned char *) bstr__alloc ((size_t) len))) { + + /* Perhaps there is no available memory for the two + allocations to be in memory at once */ + + goto reallocStrategy; + + } else { + if (b->slen) bstr__memcpy ((char *) x, (char *) b->data, (size_t) b->slen); + bstr__free (b->data); + } + } + b->data = x; + b->mlen = len; + b->data[b->slen] = (unsigned char) '\0'; + } + + return BSTR_OK; +} + +/* int ballocmin (bstring b, int len) + * + * Set the size of the memory backing the bstring b to len or b->slen+1, + * whichever is larger. Note that repeated use of this function can degrade + * performance. + */ +int ballocmin (bstring b, int len) { + unsigned char * s; + + if (b == NULL || b->data == NULL || (b->slen+1) < 0 || b->mlen <= 0 || + b->mlen < b->slen || len <= 0) { + return BSTR_ERR; + } + + if (len < b->slen + 1) len = b->slen + 1; + + if (len != b->mlen) { + s = (unsigned char *) bstr__realloc (b->data, (size_t) len); + if (NULL == s) return BSTR_ERR; + s[b->slen] = (unsigned char) '\0'; + b->data = s; + b->mlen = len; + } + + return BSTR_OK; +} + +/* bstring bfromcstr (const char * str) + * + * Create a bstring which contains the contents of the '\0' terminated char * + * buffer str. + */ +bstring bfromcstr (const char * str) { +bstring b; +int i; +size_t j; + + if (str == NULL) return NULL; + j = (strlen) (str); + i = snapUpSize ((int) (j + (2 - (j != 0)))); + if (i <= (int) j) return NULL; + + b = (bstring) bstr__alloc (sizeof (struct tagbstring)); + if (NULL == b) return NULL; + b->slen = (int) j; + if (NULL == (b->data = (unsigned char *) bstr__alloc (b->mlen = i))) { + bstr__free (b); + return NULL; + } + + bstr__memcpy (b->data, str, j+1); + return b; +} + +/* bstring bfromcstralloc (int mlen, const char * str) + * + * Create a bstring which contains the contents of the '\0' terminated char * + * buffer str. The memory buffer backing the string is at least len + * characters in length. + */ +bstring bfromcstralloc (int mlen, const char * str) { +bstring b; +int i; +size_t j; + + if (str == NULL) return NULL; + j = (strlen) (str); + i = snapUpSize ((int) (j + (2 - (j != 0)))); + if (i <= (int) j) return NULL; + + b = (bstring) bstr__alloc (sizeof (struct tagbstring)); + if (b == NULL) return NULL; + b->slen = (int) j; + if (i < mlen) i = mlen; + + if (NULL == (b->data = (unsigned char *) bstr__alloc (b->mlen = i))) { + bstr__free (b); + return NULL; + } + + bstr__memcpy (b->data, str, j+1); + return b; +} + +/* bstring blk2bstr (const void * blk, int len) + * + * Create a bstring which contains the content of the block blk of length + * len. + */ +bstring blk2bstr (const void * blk, int len) { +bstring b; +int i; + + if (blk == NULL || len < 0) return NULL; + b = (bstring) bstr__alloc (sizeof (struct tagbstring)); + if (b == NULL) return NULL; + b->slen = len; + + i = len + (2 - (len != 0)); + i = snapUpSize (i); + + b->mlen = i; + + b->data = (unsigned char *) bstr__alloc ((size_t) b->mlen); + if (b->data == NULL) { + bstr__free (b); + return NULL; + } + + if (len > 0) bstr__memcpy (b->data, blk, (size_t) len); + b->data[len] = (unsigned char) '\0'; + + return b; +} + +/* char * bstr2cstr (const_bstring s, char z) + * + * Create a '\0' terminated char * buffer which is equal to the contents of + * the bstring s, except that any contained '\0' characters are converted + * to the character in z. This returned value should be freed with a + * bcstrfree () call, by the calling application. + */ +char * bstr2cstr (const_bstring b, char z) { +int i, l; +char * r; + + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + l = b->slen; + r = (char *) bstr__alloc ((size_t) (l + 1)); + if (r == NULL) return r; + + for (i=0; i < l; i ++) { + r[i] = (char) ((b->data[i] == '\0') ? z : (char) (b->data[i])); + } + + r[l] = (unsigned char) '\0'; + + return r; +} + +/* int bcstrfree (char * s) + * + * Frees a C-string generated by bstr2cstr (). This is normally unnecessary + * since it just wraps a call to bstr__free (), however, if bstr__alloc () + * and bstr__free () have been redefined as a macros within the bstrlib + * module (via defining them in memdbg.h after defining + * BSTRLIB_MEMORY_DEBUG) with some difference in behaviour from the std + * library functions, then this allows a correct way of freeing the memory + * that allows higher level code to be independent from these macro + * redefinitions. + */ +int bcstrfree (char * s) { + if (s) { + bstr__free (s); + return BSTR_OK; + } + return BSTR_ERR; +} + +/* int bconcat (bstring b0, const_bstring b1) + * + * Concatenate the bstring b1 to the bstring b0. + */ +int bconcat (bstring b0, const_bstring b1) { +int len, d; +bstring aux = (bstring) b1; + + if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL) return BSTR_ERR; + + d = b0->slen; + len = b1->slen; + if ((d | (b0->mlen - d) | len | (d + len)) < 0) return BSTR_ERR; + + if (b0->mlen <= d + len + 1) { + ptrdiff_t pd = b1->data - b0->data; + if (0 <= pd && pd < b0->mlen) { + if (NULL == (aux = bstrcpy (b1))) return BSTR_ERR; + } + if (balloc (b0, d + len + 1) != BSTR_OK) { + if (aux != b1) bdestroy (aux); + return BSTR_ERR; + } + } + + bBlockCopy (&b0->data[d], &aux->data[0], (size_t) len); + b0->data[d + len] = (unsigned char) '\0'; + b0->slen = d + len; + if (aux != b1) bdestroy (aux); + return BSTR_OK; +} + +/* int bconchar (bstring b, char c) +/ * + * Concatenate the single character c to the bstring b. + */ +int bconchar (bstring b, char c) { +int d; + + if (b == NULL) return BSTR_ERR; + d = b->slen; + if ((d | (b->mlen - d)) < 0 || balloc (b, d + 2) != BSTR_OK) return BSTR_ERR; + b->data[d] = (unsigned char) c; + b->data[d + 1] = (unsigned char) '\0'; + b->slen++; + return BSTR_OK; +} + +/* int bcatcstr (bstring b, const char * s) + * + * Concatenate a char * string to a bstring. + */ +int bcatcstr (bstring b, const char * s) { +char * d; +int i, l; + + if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen + || b->mlen <= 0 || s == NULL) return BSTR_ERR; + + /* Optimistically concatenate directly */ + l = b->mlen - b->slen; + d = (char *) &b->data[b->slen]; + for (i=0; i < l; i++) { + if ((*d++ = *s++) == '\0') { + b->slen += i; + return BSTR_OK; + } + } + b->slen += i; + + /* Need to explicitely resize and concatenate tail */ + return bcatblk (b, (const void *) s, (int) strlen (s)); +} + +/* int bcatblk (bstring b, const void * s, int len) + * + * Concatenate a fixed length buffer to a bstring. + */ +int bcatblk (bstring b, const void * s, int len) { +int nl; + + if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen + || b->mlen <= 0 || s == NULL || len < 0) return BSTR_ERR; + + if (0 > (nl = b->slen + len)) return BSTR_ERR; /* Overflow? */ + if (b->mlen <= nl && 0 > balloc (b, nl + 1)) return BSTR_ERR; + + bBlockCopy (&b->data[b->slen], s, (size_t) len); + b->slen = nl; + b->data[nl] = (unsigned char) '\0'; + return BSTR_OK; +} + +/* bstring bstrcpy (const_bstring b) + * + * Create a copy of the bstring b. + */ +bstring bstrcpy (const_bstring b) { +bstring b0; +int i,j; + + /* Attempted to copy an invalid string? */ + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + + b0 = (bstring) bstr__alloc (sizeof (struct tagbstring)); + if (b0 == NULL) { + /* Unable to allocate memory for string header */ + return NULL; + } + + i = b->slen; + j = snapUpSize (i + 1); + + b0->data = (unsigned char *) bstr__alloc (j); + if (b0->data == NULL) { + j = i + 1; + b0->data = (unsigned char *) bstr__alloc (j); + if (b0->data == NULL) { + /* Unable to allocate memory for string data */ + bstr__free (b0); + return NULL; + } + } + + b0->mlen = j; + b0->slen = i; + + if (i) bstr__memcpy ((char *) b0->data, (char *) b->data, i); + b0->data[b0->slen] = (unsigned char) '\0'; + + return b0; +} + +/* int bassign (bstring a, const_bstring b) + * + * Overwrite the string a with the contents of string b. + */ +int bassign (bstring a, const_bstring b) { + if (b == NULL || b->data == NULL || b->slen < 0) + return BSTR_ERR; + if (b->slen != 0) { + if (balloc (a, b->slen) != BSTR_OK) return BSTR_ERR; + bstr__memmove (a->data, b->data, b->slen); + } else { + if (a == NULL || a->data == NULL || a->mlen < a->slen || + a->slen < 0 || a->mlen == 0) + return BSTR_ERR; + } + a->data[b->slen] = (unsigned char) '\0'; + a->slen = b->slen; + return BSTR_OK; +} + +/* int bassignmidstr (bstring a, const_bstring b, int left, int len) + * + * Overwrite the string a with the middle of contents of string b + * starting from position left and running for a length len. left and + * len are clamped to the ends of b as with the function bmidstr. + */ +int bassignmidstr (bstring a, const_bstring b, int left, int len) { + if (b == NULL || b->data == NULL || b->slen < 0) + return BSTR_ERR; + + if (left < 0) { + len += left; + left = 0; + } + + if (len > b->slen - left) len = b->slen - left; + + if (a == NULL || a->data == NULL || a->mlen < a->slen || + a->slen < 0 || a->mlen == 0) + return BSTR_ERR; + + if (len > 0) { + if (balloc (a, len) != BSTR_OK) return BSTR_ERR; + bstr__memmove (a->data, b->data + left, len); + a->slen = len; + } else { + a->slen = 0; + } + a->data[a->slen] = (unsigned char) '\0'; + return BSTR_OK; +} + +/* int bassigncstr (bstring a, const char * str) + * + * Overwrite the string a with the contents of char * string str. Note that + * the bstring a must be a well defined and writable bstring. If an error + * occurs BSTR_ERR is returned however a may be partially overwritten. + */ +int bassigncstr (bstring a, const char * str) { +int i; +size_t len; + if (a == NULL || a->data == NULL || a->mlen < a->slen || + a->slen < 0 || a->mlen == 0 || NULL == str) + return BSTR_ERR; + + for (i=0; i < a->mlen; i++) { + if ('\0' == (a->data[i] = str[i])) { + a->slen = i; + return BSTR_OK; + } + } + + a->slen = i; + len = strlen (str + i); + if (len > INT_MAX || i + len + 1 > INT_MAX || + 0 > balloc (a, (int) (i + len + 1))) return BSTR_ERR; + bBlockCopy (a->data + i, str + i, (size_t) len + 1); + a->slen += (int) len; + return BSTR_OK; +} + +/* int bassignblk (bstring a, const void * s, int len) + * + * Overwrite the string a with the contents of the block (s, len). Note that + * the bstring a must be a well defined and writable bstring. If an error + * occurs BSTR_ERR is returned and a is not overwritten. + */ +int bassignblk (bstring a, const void * s, int len) { + if (a == NULL || a->data == NULL || a->mlen < a->slen || + a->slen < 0 || a->mlen == 0 || NULL == s || len + 1 < 1) + return BSTR_ERR; + if (len + 1 > a->mlen && 0 > balloc (a, len + 1)) return BSTR_ERR; + bBlockCopy (a->data, s, (size_t) len); + a->data[len] = (unsigned char) '\0'; + a->slen = len; + return BSTR_OK; +} + +/* int btrunc (bstring b, int n) + * + * Truncate the bstring to at most n characters. + */ +int btrunc (bstring b, int n) { + if (n < 0 || b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + if (b->slen > n) { + b->slen = n; + b->data[n] = (unsigned char) '\0'; + } + return BSTR_OK; +} + +#define upcase(c) (toupper ((unsigned char) c)) +#define downcase(c) (tolower ((unsigned char) c)) +#define wspace(c) (isspace ((unsigned char) c)) + +/* int btoupper (bstring b) + * + * Convert contents of bstring to upper case. + */ +int btoupper (bstring b) { +int i, len; + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + for (i=0, len = b->slen; i < len; i++) { + b->data[i] = (unsigned char) upcase (b->data[i]); + } + return BSTR_OK; +} + +/* int btolower (bstring b) + * + * Convert contents of bstring to lower case. + */ +int btolower (bstring b) { +int i, len; + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + for (i=0, len = b->slen; i < len; i++) { + b->data[i] = (unsigned char) downcase (b->data[i]); + } + return BSTR_OK; +} + +/* int bstricmp (const_bstring b0, const_bstring b1) + * + * Compare two strings without differentiating between case. The return + * value is the difference of the values of the characters where the two + * strings first differ after lower case transformation, otherwise 0 is + * returned indicating that the strings are equal. If the lengths are + * different, then a difference from 0 is given, but if the first extra + * character is '\0', then it is taken to be the value UCHAR_MAX+1. + */ +int bstricmp (const_bstring b0, const_bstring b1) { +int i, v, n; + + if (bdata (b0) == NULL || b0->slen < 0 || + bdata (b1) == NULL || b1->slen < 0) return SHRT_MIN; + if ((n = b0->slen) > b1->slen) n = b1->slen; + else if (b0->slen == b1->slen && b0->data == b1->data) return BSTR_OK; + + for (i = 0; i < n; i ++) { + v = (char) downcase (b0->data[i]) + - (char) downcase (b1->data[i]); + if (0 != v) return v; + } + + if (b0->slen > n) { + v = (char) downcase (b0->data[n]); + if (v) return v; + return UCHAR_MAX + 1; + } + if (b1->slen > n) { + v = - (char) downcase (b1->data[n]); + if (v) return v; + return - (int) (UCHAR_MAX + 1); + } + return BSTR_OK; +} + +/* int bstrnicmp (const_bstring b0, const_bstring b1, int n) + * + * Compare two strings without differentiating between case for at most n + * characters. If the position where the two strings first differ is + * before the nth position, the return value is the difference of the values + * of the characters, otherwise 0 is returned. If the lengths are different + * and less than n characters, then a difference from 0 is given, but if the + * first extra character is '\0', then it is taken to be the value + * UCHAR_MAX+1. + */ +int bstrnicmp (const_bstring b0, const_bstring b1, int n) { +int i, v, m; + + if (bdata (b0) == NULL || b0->slen < 0 || + bdata (b1) == NULL || b1->slen < 0 || n < 0) return SHRT_MIN; + m = n; + if (m > b0->slen) m = b0->slen; + if (m > b1->slen) m = b1->slen; + + if (b0->data != b1->data) { + for (i = 0; i < m; i ++) { + v = (char) downcase (b0->data[i]); + v -= (char) downcase (b1->data[i]); + if (v != 0) return b0->data[i] - b1->data[i]; + } + } + + if (n == m || b0->slen == b1->slen) return BSTR_OK; + + if (b0->slen > m) { + v = (char) downcase (b0->data[m]); + if (v) return v; + return UCHAR_MAX + 1; + } + + v = - (char) downcase (b1->data[m]); + if (v) return v; + return - (int) (UCHAR_MAX + 1); +} + +/* int biseqcaseless (const_bstring b0, const_bstring b1) + * + * Compare two strings for equality without differentiating between case. + * If the strings differ other than in case, 0 is returned, if the strings + * are the same, 1 is returned, if there is an error, -1 is returned. If + * the length of the strings are different, this function is O(1). '\0' + * termination characters are not treated in any special way. + */ +int biseqcaseless (const_bstring b0, const_bstring b1) { +int i, n; + + if (bdata (b0) == NULL || b0->slen < 0 || + bdata (b1) == NULL || b1->slen < 0) return BSTR_ERR; + if (b0->slen != b1->slen) return BSTR_OK; + if (b0->data == b1->data || b0->slen == 0) return 1; + for (i=0, n=b0->slen; i < n; i++) { + if (b0->data[i] != b1->data[i]) { + unsigned char c = (unsigned char) downcase (b0->data[i]); + if (c != (unsigned char) downcase (b1->data[i])) return 0; + } + } + return 1; +} + +/* int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len) + * + * Compare beginning of string b0 with a block of memory of length len + * without differentiating between case for equality. If the beginning of b0 + * differs from the memory block other than in case (or if b0 is too short), + * 0 is returned, if the strings are the same, 1 is returned, if there is an + * error, -1 is returned. '\0' characters are not treated in any special + * way. + */ +int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len) { +int i; + + if (bdata (b0) == NULL || b0->slen < 0 || NULL == blk || len < 0) + return BSTR_ERR; + if (b0->slen < len) return BSTR_OK; + if (b0->data == (const unsigned char *) blk || len == 0) return 1; + + for (i = 0; i < len; i ++) { + if (b0->data[i] != ((const unsigned char *) blk)[i]) { + if (downcase (b0->data[i]) != + downcase (((const unsigned char *) blk)[i])) return 0; + } + } + return 1; +} + +/* + * int bltrimws (bstring b) + * + * Delete whitespace contiguous from the left end of the string. + */ +int bltrimws (bstring b) { +int i, len; + + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + + for (len = b->slen, i = 0; i < len; i++) { + if (!wspace (b->data[i])) { + return bdelete (b, 0, i); + } + } + + b->data[0] = (unsigned char) '\0'; + b->slen = 0; + return BSTR_OK; +} + +/* + * int brtrimws (bstring b) + * + * Delete whitespace contiguous from the right end of the string. + */ +int brtrimws (bstring b) { +int i; + + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + + for (i = b->slen - 1; i >= 0; i--) { + if (!wspace (b->data[i])) { + if (b->mlen > i) b->data[i+1] = (unsigned char) '\0'; + b->slen = i + 1; + return BSTR_OK; + } + } + + b->data[0] = (unsigned char) '\0'; + b->slen = 0; + return BSTR_OK; +} + +/* + * int btrimws (bstring b) + * + * Delete whitespace contiguous from both ends of the string. + */ +int btrimws (bstring b) { +int i, j; + + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + + for (i = b->slen - 1; i >= 0; i--) { + if (!wspace (b->data[i])) { + if (b->mlen > i) b->data[i+1] = (unsigned char) '\0'; + b->slen = i + 1; + for (j = 0; wspace (b->data[j]); j++) {} + return bdelete (b, 0, j); + } + } + + b->data[0] = (unsigned char) '\0'; + b->slen = 0; + return BSTR_OK; +} + +/* int biseq (const_bstring b0, const_bstring b1) + * + * Compare the string b0 and b1. If the strings differ, 0 is returned, if + * the strings are the same, 1 is returned, if there is an error, -1 is + * returned. If the length of the strings are different, this function is + * O(1). '\0' termination characters are not treated in any special way. + */ +int biseq (const_bstring b0, const_bstring b1) { + if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || + b0->slen < 0 || b1->slen < 0) return BSTR_ERR; + if (b0->slen != b1->slen) return BSTR_OK; + if (b0->data == b1->data || b0->slen == 0) return 1; + return !bstr__memcmp (b0->data, b1->data, b0->slen); +} + +/* int bisstemeqblk (const_bstring b0, const void * blk, int len) + * + * Compare beginning of string b0 with a block of memory of length len for + * equality. If the beginning of b0 differs from the memory block (or if b0 + * is too short), 0 is returned, if the strings are the same, 1 is returned, + * if there is an error, -1 is returned. '\0' characters are not treated in + * any special way. + */ +int bisstemeqblk (const_bstring b0, const void * blk, int len) { +int i; + + if (bdata (b0) == NULL || b0->slen < 0 || NULL == blk || len < 0) + return BSTR_ERR; + if (b0->slen < len) return BSTR_OK; + if (b0->data == (const unsigned char *) blk || len == 0) return 1; + + for (i = 0; i < len; i ++) { + if (b0->data[i] != ((const unsigned char *) blk)[i]) return BSTR_OK; + } + return 1; +} + +/* int biseqcstr (const_bstring b, const char *s) + * + * Compare the bstring b and char * string s. The C string s must be '\0' + * terminated at exactly the length of the bstring b, and the contents + * between the two must be identical with the bstring b with no '\0' + * characters for the two contents to be considered equal. This is + * equivalent to the condition that their current contents will be always be + * equal when comparing them in the same format after converting one or the + * other. If the strings are equal 1 is returned, if they are unequal 0 is + * returned and if there is a detectable error BSTR_ERR is returned. + */ +int biseqcstr (const_bstring b, const char * s) { +int i; + if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR; + for (i=0; i < b->slen; i++) { + if (s[i] == '\0' || b->data[i] != (unsigned char) s[i]) return BSTR_OK; + } + return s[i] == '\0'; +} + +/* int biseqcstrcaseless (const_bstring b, const char *s) + * + * Compare the bstring b and char * string s. The C string s must be '\0' + * terminated at exactly the length of the bstring b, and the contents + * between the two must be identical except for case with the bstring b with + * no '\0' characters for the two contents to be considered equal. This is + * equivalent to the condition that their current contents will be always be + * equal ignoring case when comparing them in the same format after + * converting one or the other. If the strings are equal, except for case, + * 1 is returned, if they are unequal regardless of case 0 is returned and + * if there is a detectable error BSTR_ERR is returned. + */ +int biseqcstrcaseless (const_bstring b, const char * s) { +int i; + if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR; + for (i=0; i < b->slen; i++) { + if (s[i] == '\0' || + (b->data[i] != (unsigned char) s[i] && + downcase (b->data[i]) != (unsigned char) downcase (s[i]))) + return BSTR_OK; + } + return s[i] == '\0'; +} + +/* int bstrcmp (const_bstring b0, const_bstring b1) + * + * Compare the string b0 and b1. If there is an error, SHRT_MIN is returned, + * otherwise a value less than or greater than zero, indicating that the + * string pointed to by b0 is lexicographically less than or greater than + * the string pointed to by b1 is returned. If the the string lengths are + * unequal but the characters up until the length of the shorter are equal + * then a value less than, or greater than zero, indicating that the string + * pointed to by b0 is shorter or longer than the string pointed to by b1 is + * returned. 0 is returned if and only if the two strings are the same. If + * the length of the strings are different, this function is O(n). Like its + * standard C library counter part strcmp, the comparison does not proceed + * past any '\0' termination characters encountered. + */ +int bstrcmp (const_bstring b0, const_bstring b1) { +int i, v, n; + + if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || + b0->slen < 0 || b1->slen < 0) return SHRT_MIN; + n = b0->slen; if (n > b1->slen) n = b1->slen; + if (b0->slen == b1->slen && (b0->data == b1->data || b0->slen == 0)) + return BSTR_OK; + + for (i = 0; i < n; i ++) { + v = ((char) b0->data[i]) - ((char) b1->data[i]); + if (v != 0) return v; + if (b0->data[i] == (unsigned char) '\0') return BSTR_OK; + } + + if (b0->slen > n) return 1; + if (b1->slen > n) return -1; + return BSTR_OK; +} + +/* int bstrncmp (const_bstring b0, const_bstring b1, int n) + * + * Compare the string b0 and b1 for at most n characters. If there is an + * error, SHRT_MIN is returned, otherwise a value is returned as if b0 and + * b1 were first truncated to at most n characters then bstrcmp was called + * with these new strings are paremeters. If the length of the strings are + * different, this function is O(n). Like its standard C library counter + * part strcmp, the comparison does not proceed past any '\0' termination + * characters encountered. + */ +int bstrncmp (const_bstring b0, const_bstring b1, int n) { +int i, v, m; + + if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || + b0->slen < 0 || b1->slen < 0) return SHRT_MIN; + m = n; + if (m > b0->slen) m = b0->slen; + if (m > b1->slen) m = b1->slen; + + if (b0->data != b1->data) { + for (i = 0; i < m; i ++) { + v = ((char) b0->data[i]) - ((char) b1->data[i]); + if (v != 0) return v; + if (b0->data[i] == (unsigned char) '\0') return BSTR_OK; + } + } + + if (n == m || b0->slen == b1->slen) return BSTR_OK; + + if (b0->slen > m) return 1; + return -1; +} + +/* bstring bmidstr (const_bstring b, int left, int len) + * + * Create a bstring which is the substring of b starting from position left + * and running for a length len (clamped by the end of the bstring b.) If + * b is detectably invalid, then NULL is returned. The section described + * by (left, len) is clamped to the boundaries of b. + */ +bstring bmidstr (const_bstring b, int left, int len) { + + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + + if (left < 0) { + len += left; + left = 0; + } + + if (len > b->slen - left) len = b->slen - left; + + if (len <= 0) return bfromcstr (""); + return blk2bstr (b->data + left, len); +} + +/* int bdelete (bstring b, int pos, int len) + * + * Removes characters from pos to pos+len-1 inclusive and shifts the tail of + * the bstring starting from pos+len to pos. len must be positive for this + * call to have any effect. The section of the string described by (pos, + * len) is clamped to boundaries of the bstring b. + */ +int bdelete (bstring b, int pos, int len) { + /* Clamp to left side of bstring */ + if (pos < 0) { + len += pos; + pos = 0; + } + + if (len < 0 || b == NULL || b->data == NULL || b->slen < 0 || + b->mlen < b->slen || b->mlen <= 0) + return BSTR_ERR; + if (len > 0 && pos < b->slen) { + if (pos + len >= b->slen) { + b->slen = pos; + } else { + bBlockCopy ((char *) (b->data + pos), + (char *) (b->data + pos + len), + b->slen - (pos+len)); + b->slen -= len; + } + b->data[b->slen] = (unsigned char) '\0'; + } + return BSTR_OK; +} + +/* int bdestroy (bstring b) + * + * Free up the bstring. Note that if b is detectably invalid or not writable + * then no action is performed and BSTR_ERR is returned. Like a freed memory + * allocation, dereferences, writes or any other action on b after it has + * been bdestroyed is undefined. + */ +int bdestroy (bstring b) { + if (b == NULL || b->slen < 0 || b->mlen <= 0 || b->mlen < b->slen || + b->data == NULL) + return BSTR_ERR; + + bstr__free (b->data); + + /* In case there is any stale usage, there is one more chance to + notice this error. */ + + b->slen = -1; + b->mlen = -__LINE__; + b->data = NULL; + + bstr__free (b); + return BSTR_OK; +} + +/* int binstr (const_bstring b1, int pos, const_bstring b2) + * + * Search for the bstring b2 in b1 starting from position pos, and searching + * forward. If it is found then return with the first position where it is + * found, otherwise return BSTR_ERR. Note that this is just a brute force + * string searcher that does not attempt clever things like the Boyer-Moore + * search algorithm. Because of this there are many degenerate cases where + * this can take much longer than it needs to. + */ +int binstr (const_bstring b1, int pos, const_bstring b2) { +int j, ii, ll, lf; +unsigned char * d0; +unsigned char c0; +register unsigned char * d1; +register unsigned char c1; +register int i; + + if (b1 == NULL || b1->data == NULL || b1->slen < 0 || + b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; + if (b1->slen == pos) return (b2->slen == 0)?pos:BSTR_ERR; + if (b1->slen < pos || pos < 0) return BSTR_ERR; + if (b2->slen == 0) return pos; + + /* No space to find such a string? */ + if ((lf = b1->slen - b2->slen + 1) <= pos) return BSTR_ERR; + + /* An obvious alias case */ + if (b1->data == b2->data && pos == 0) return 0; + + i = pos; + + d0 = b2->data; + d1 = b1->data; + ll = b2->slen; + + /* Peel off the b2->slen == 1 case */ + c0 = d0[0]; + if (1 == ll) { + for (;i < lf; i++) if (c0 == d1[i]) return i; + return BSTR_ERR; + } + + c1 = c0; + j = 0; + lf = b1->slen - 1; + + ii = -1; + if (i < lf) do { + /* Unrolled current character test */ + if (c1 != d1[i]) { + if (c1 != d1[1+i]) { + i += 2; + continue; + } + i++; + } + + /* Take note if this is the start of a potential match */ + if (0 == j) ii = i; + + /* Shift the test character down by one */ + j++; + i++; + + /* If this isn't past the last character continue */ + if (j < ll) { + c1 = d0[j]; + continue; + } + + N0:; + + /* If no characters mismatched, then we matched */ + if (i == ii+j) return ii; + + /* Shift back to the beginning */ + i -= j; + j = 0; + c1 = c0; + } while (i < lf); + + /* Deal with last case if unrolling caused a misalignment */ + if (i == lf && ll == j+1 && c1 == d1[i]) goto N0; + + return BSTR_ERR; +} + +/* int binstrr (const_bstring b1, int pos, const_bstring b2) + * + * Search for the bstring b2 in b1 starting from position pos, and searching + * backward. If it is found then return with the first position where it is + * found, otherwise return BSTR_ERR. Note that this is just a brute force + * string searcher that does not attempt clever things like the Boyer-Moore + * search algorithm. Because of this there are many degenerate cases where + * this can take much longer than it needs to. + */ +int binstrr (const_bstring b1, int pos, const_bstring b2) { +int j, i, l; +unsigned char * d0, * d1; + + if (b1 == NULL || b1->data == NULL || b1->slen < 0 || + b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; + if (b1->slen == pos && b2->slen == 0) return pos; + if (b1->slen < pos || pos < 0) return BSTR_ERR; + if (b2->slen == 0) return pos; + + /* Obvious alias case */ + if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return 0; + + i = pos; + if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR; + + /* If no space to find such a string then snap back */ + if (l + 1 <= i) i = l; + j = 0; + + d0 = b2->data; + d1 = b1->data; + l = b2->slen; + + for (;;) { + if (d0[j] == d1[i + j]) { + j ++; + if (j >= l) return i; + } else { + i --; + if (i < 0) break; + j=0; + } + } + + return BSTR_ERR; +} + +/* int binstrcaseless (const_bstring b1, int pos, const_bstring b2) + * + * Search for the bstring b2 in b1 starting from position pos, and searching + * forward but without regard to case. If it is found then return with the + * first position where it is found, otherwise return BSTR_ERR. Note that + * this is just a brute force string searcher that does not attempt clever + * things like the Boyer-Moore search algorithm. Because of this there are + * many degenerate cases where this can take much longer than it needs to. + */ +int binstrcaseless (const_bstring b1, int pos, const_bstring b2) { +int j, i, l, ll; +unsigned char * d0, * d1; + + if (b1 == NULL || b1->data == NULL || b1->slen < 0 || + b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; + if (b1->slen == pos) return (b2->slen == 0)?pos:BSTR_ERR; + if (b1->slen < pos || pos < 0) return BSTR_ERR; + if (b2->slen == 0) return pos; + + l = b1->slen - b2->slen + 1; + + /* No space to find such a string? */ + if (l <= pos) return BSTR_ERR; + + /* An obvious alias case */ + if (b1->data == b2->data && pos == 0) return BSTR_OK; + + i = pos; + j = 0; + + d0 = b2->data; + d1 = b1->data; + ll = b2->slen; + + for (;;) { + if (d0[j] == d1[i + j] || downcase (d0[j]) == downcase (d1[i + j])) { + j ++; + if (j >= ll) return i; + } else { + i ++; + if (i >= l) break; + j=0; + } + } + + return BSTR_ERR; +} + +/* int binstrrcaseless (const_bstring b1, int pos, const_bstring b2) + * + * Search for the bstring b2 in b1 starting from position pos, and searching + * backward but without regard to case. If it is found then return with the + * first position where it is found, otherwise return BSTR_ERR. Note that + * this is just a brute force string searcher that does not attempt clever + * things like the Boyer-Moore search algorithm. Because of this there are + * many degenerate cases where this can take much longer than it needs to. + */ +int binstrrcaseless (const_bstring b1, int pos, const_bstring b2) { +int j, i, l; +unsigned char * d0, * d1; + + if (b1 == NULL || b1->data == NULL || b1->slen < 0 || + b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; + if (b1->slen == pos && b2->slen == 0) return pos; + if (b1->slen < pos || pos < 0) return BSTR_ERR; + if (b2->slen == 0) return pos; + + /* Obvious alias case */ + if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return BSTR_OK; + + i = pos; + if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR; + + /* If no space to find such a string then snap back */ + if (l + 1 <= i) i = l; + j = 0; + + d0 = b2->data; + d1 = b1->data; + l = b2->slen; + + for (;;) { + if (d0[j] == d1[i + j] || downcase (d0[j]) == downcase (d1[i + j])) { + j ++; + if (j >= l) return i; + } else { + i --; + if (i < 0) break; + j=0; + } + } + + return BSTR_ERR; +} + + +/* int bstrchrp (const_bstring b, int c, int pos) + * + * Search for the character c in b forwards from the position pos + * (inclusive). + */ +int bstrchrp (const_bstring b, int c, int pos) { +unsigned char * p; + + if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR; + p = (unsigned char *) bstr__memchr ((b->data + pos), (unsigned char) c, (b->slen - pos)); + if (p) return (int) (p - b->data); + return BSTR_ERR; +} + +/* int bstrrchrp (const_bstring b, int c, int pos) + * + * Search for the character c in b backwards from the position pos in string + * (inclusive). + */ +int bstrrchrp (const_bstring b, int c, int pos) { +int i; + + if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR; + for (i=pos; i >= 0; i--) { + if (b->data[i] == (unsigned char) c) return i; + } + return BSTR_ERR; +} + +#if !defined (BSTRLIB_AGGRESSIVE_MEMORY_FOR_SPEED_TRADEOFF) +#define LONG_LOG_BITS_QTY (3) +#define LONG_BITS_QTY (1 << LONG_LOG_BITS_QTY) +#define LONG_TYPE unsigned char + +#define CFCLEN ((1 << CHAR_BIT) / LONG_BITS_QTY) +struct charField { LONG_TYPE content[CFCLEN]; }; +#define testInCharField(cf,c) ((cf)->content[(c) >> LONG_LOG_BITS_QTY] & (((long)1) << ((c) & (LONG_BITS_QTY-1)))) +#define setInCharField(cf,idx) { \ + unsigned int c = (unsigned int) (idx); \ + (cf)->content[c >> LONG_LOG_BITS_QTY] |= (LONG_TYPE) (1ul << (c & (LONG_BITS_QTY-1))); \ +} + +#else + +#define CFCLEN (1 << CHAR_BIT) +struct charField { unsigned char content[CFCLEN]; }; +#define testInCharField(cf,c) ((cf)->content[(unsigned char) (c)]) +#define setInCharField(cf,idx) (cf)->content[(unsigned int) (idx)] = ~0 + +#endif + +/* Convert a bstring to charField */ +static int buildCharField (struct charField * cf, const_bstring b) { +int i; + if (b == NULL || b->data == NULL || b->slen <= 0) return BSTR_ERR; + memset ((void *) cf->content, 0, sizeof (struct charField)); + for (i=0; i < b->slen; i++) { + setInCharField (cf, b->data[i]); + } + return BSTR_OK; +} + +static void invertCharField (struct charField * cf) { +int i; + for (i=0; i < CFCLEN; i++) cf->content[i] = ~cf->content[i]; +} + +/* Inner engine for binchr */ +static int binchrCF (const unsigned char * data, int len, int pos, const struct charField * cf) { +int i; + for (i=pos; i < len; i++) { + unsigned char c = (unsigned char) data[i]; + if (testInCharField (cf, c)) return i; + } + return BSTR_ERR; +} + +/* int binchr (const_bstring b0, int pos, const_bstring b1); + * + * Search for the first position in b0 starting from pos or after, in which + * one of the characters in b1 is found and return it. If such a position + * does not exist in b0, then BSTR_ERR is returned. + */ +int binchr (const_bstring b0, int pos, const_bstring b1) { +struct charField chrs; + if (pos < 0 || b0 == NULL || b0->data == NULL || + b0->slen <= pos) return BSTR_ERR; + if (1 == b1->slen) return bstrchrp (b0, b1->data[0], pos); + if (0 > buildCharField (&chrs, b1)) return BSTR_ERR; + return binchrCF (b0->data, b0->slen, pos, &chrs); +} + +/* Inner engine for binchrr */ +static int binchrrCF (const unsigned char * data, int pos, const struct charField * cf) { +int i; + for (i=pos; i >= 0; i--) { + unsigned int c = (unsigned int) data[i]; + if (testInCharField (cf, c)) return i; + } + return BSTR_ERR; +} + +/* int binchrr (const_bstring b0, int pos, const_bstring b1); + * + * Search for the last position in b0 no greater than pos, in which one of + * the characters in b1 is found and return it. If such a position does not + * exist in b0, then BSTR_ERR is returned. + */ +int binchrr (const_bstring b0, int pos, const_bstring b1) { +struct charField chrs; + if (pos < 0 || b0 == NULL || b0->data == NULL || b1 == NULL || + b0->slen < pos) return BSTR_ERR; + if (pos == b0->slen) pos--; + if (1 == b1->slen) return bstrrchrp (b0, b1->data[0], pos); + if (0 > buildCharField (&chrs, b1)) return BSTR_ERR; + return binchrrCF (b0->data, pos, &chrs); +} + +/* int bninchr (const_bstring b0, int pos, const_bstring b1); + * + * Search for the first position in b0 starting from pos or after, in which + * none of the characters in b1 is found and return it. If such a position + * does not exist in b0, then BSTR_ERR is returned. + */ +int bninchr (const_bstring b0, int pos, const_bstring b1) { +struct charField chrs; + if (pos < 0 || b0 == NULL || b0->data == NULL || + b0->slen <= pos) return BSTR_ERR; + if (buildCharField (&chrs, b1) < 0) return BSTR_ERR; + invertCharField (&chrs); + return binchrCF (b0->data, b0->slen, pos, &chrs); +} + +/* int bninchrr (const_bstring b0, int pos, const_bstring b1); + * + * Search for the last position in b0 no greater than pos, in which none of + * the characters in b1 is found and return it. If such a position does not + * exist in b0, then BSTR_ERR is returned. + */ +int bninchrr (const_bstring b0, int pos, const_bstring b1) { +struct charField chrs; + if (pos < 0 || b0 == NULL || b0->data == NULL || + b0->slen < pos) return BSTR_ERR; + if (pos == b0->slen) pos--; + if (buildCharField (&chrs, b1) < 0) return BSTR_ERR; + invertCharField (&chrs); + return binchrrCF (b0->data, pos, &chrs); +} + +/* int bsetstr (bstring b0, int pos, bstring b1, unsigned char fill) + * + * Overwrite the string b0 starting at position pos with the string b1. If + * the position pos is past the end of b0, then the character "fill" is + * appended as necessary to make up the gap between the end of b0 and pos. + * If b1 is NULL, it behaves as if it were a 0-length string. + */ +int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill) { +int d, newlen; +ptrdiff_t pd; +bstring aux = (bstring) b1; + + if (pos < 0 || b0 == NULL || b0->slen < 0 || NULL == b0->data || + b0->mlen < b0->slen || b0->mlen <= 0) return BSTR_ERR; + if (b1 != NULL && (b1->slen < 0 || b1->data == NULL)) return BSTR_ERR; + + d = pos; + + /* Aliasing case */ + if (NULL != aux) { + if ((pd = (ptrdiff_t) (b1->data - b0->data)) >= 0 && pd < (ptrdiff_t) b0->mlen) { + if (NULL == (aux = bstrcpy (b1))) return BSTR_ERR; + } + d += aux->slen; + } + + /* Increase memory size if necessary */ + if (balloc (b0, d + 1) != BSTR_OK) { + if (aux != b1) bdestroy (aux); + return BSTR_ERR; + } + + newlen = b0->slen; + + /* Fill in "fill" character as necessary */ + if (pos > newlen) { + bstr__memset (b0->data + b0->slen, (int) fill, (size_t) (pos - b0->slen)); + newlen = pos; + } + + /* Copy b1 to position pos in b0. */ + if (aux != NULL) { + bBlockCopy ((char *) (b0->data + pos), (char *) aux->data, aux->slen); + if (aux != b1) bdestroy (aux); + } + + /* Indicate the potentially increased size of b0 */ + if (d > newlen) newlen = d; + + b0->slen = newlen; + b0->data[newlen] = (unsigned char) '\0'; + + return BSTR_OK; +} + +/* int binsert (bstring b1, int pos, bstring b2, unsigned char fill) + * + * Inserts the string b2 into b1 at position pos. If the position pos is + * past the end of b1, then the character "fill" is appended as necessary to + * make up the gap between the end of b1 and pos. Unlike bsetstr, binsert + * does not allow b2 to be NULL. + */ +int binsert (bstring b1, int pos, const_bstring b2, unsigned char fill) { +int d, l; +ptrdiff_t pd; +bstring aux = (bstring) b2; + + if (pos < 0 || b1 == NULL || b2 == NULL || b1->slen < 0 || + b2->slen < 0 || b1->mlen < b1->slen || b1->mlen <= 0) return BSTR_ERR; + + /* Aliasing case */ + if ((pd = (ptrdiff_t) (b2->data - b1->data)) >= 0 && pd < (ptrdiff_t) b1->mlen) { + if (NULL == (aux = bstrcpy (b2))) return BSTR_ERR; + } + + /* Compute the two possible end pointers */ + d = b1->slen + aux->slen; + l = pos + aux->slen; + if ((d|l) < 0) return BSTR_ERR; + + if (l > d) { + /* Inserting past the end of the string */ + if (balloc (b1, l + 1) != BSTR_OK) { + if (aux != b2) bdestroy (aux); + return BSTR_ERR; + } + bstr__memset (b1->data + b1->slen, (int) fill, (size_t) (pos - b1->slen)); + b1->slen = l; + } else { + /* Inserting in the middle of the string */ + if (balloc (b1, d + 1) != BSTR_OK) { + if (aux != b2) bdestroy (aux); + return BSTR_ERR; + } + bBlockCopy (b1->data + l, b1->data + pos, d - l); + b1->slen = d; + } + bBlockCopy (b1->data + pos, aux->data, aux->slen); + b1->data[b1->slen] = (unsigned char) '\0'; + if (aux != b2) bdestroy (aux); + return BSTR_OK; +} + +/* int breplace (bstring b1, int pos, int len, bstring b2, + * unsigned char fill) + * + * Replace a section of a string from pos for a length len with the string b2. + * fill is used is pos > b1->slen. + */ +int breplace (bstring b1, int pos, int len, const_bstring b2, + unsigned char fill) { +int pl, ret; +ptrdiff_t pd; +bstring aux = (bstring) b2; + + if (pos < 0 || len < 0 || (pl = pos + len) < 0 || b1 == NULL || + b2 == NULL || b1->data == NULL || b2->data == NULL || + b1->slen < 0 || b2->slen < 0 || b1->mlen < b1->slen || + b1->mlen <= 0) return BSTR_ERR; + + /* Straddles the end? */ + if (pl >= b1->slen) { + if ((ret = bsetstr (b1, pos, b2, fill)) < 0) return ret; + if (pos + b2->slen < b1->slen) { + b1->slen = pos + b2->slen; + b1->data[b1->slen] = (unsigned char) '\0'; + } + return ret; + } + + /* Aliasing case */ + if ((pd = (ptrdiff_t) (b2->data - b1->data)) >= 0 && pd < (ptrdiff_t) b1->slen) { + if (NULL == (aux = bstrcpy (b2))) return BSTR_ERR; + } + + if (aux->slen > len) { + if (balloc (b1, b1->slen + aux->slen - len) != BSTR_OK) { + if (aux != b2) bdestroy (aux); + return BSTR_ERR; + } + } + + if (aux->slen != len) bstr__memmove (b1->data + pos + aux->slen, b1->data + pos + len, b1->slen - (pos + len)); + bstr__memcpy (b1->data + pos, aux->data, aux->slen); + b1->slen += aux->slen - len; + b1->data[b1->slen] = (unsigned char) '\0'; + if (aux != b2) bdestroy (aux); + return BSTR_OK; +} + +/* + * findreplaceengine is used to implement bfindreplace and + * bfindreplacecaseless. It works by breaking the three cases of + * expansion, reduction and replacement, and solving each of these + * in the most efficient way possible. + */ + +typedef int (*instr_fnptr) (const_bstring s1, int pos, const_bstring s2); + +#define INITIAL_STATIC_FIND_INDEX_COUNT 32 + +static int findreplaceengine (bstring b, const_bstring find, const_bstring repl, int pos, instr_fnptr instr) { +int i, ret, slen, mlen, delta, acc; +int * d; +int static_d[INITIAL_STATIC_FIND_INDEX_COUNT+1]; /* This +1 is unnecessary, but it shuts up LINT. */ +ptrdiff_t pd; +bstring auxf = (bstring) find; +bstring auxr = (bstring) repl; + + if (b == NULL || b->data == NULL || find == NULL || + find->data == NULL || repl == NULL || repl->data == NULL || + pos < 0 || find->slen <= 0 || b->mlen < 0 || b->slen > b->mlen || + b->mlen <= 0 || b->slen < 0 || repl->slen < 0) return BSTR_ERR; + if (pos > b->slen - find->slen) return BSTR_OK; + + /* Alias with find string */ + pd = (ptrdiff_t) (find->data - b->data); + if ((ptrdiff_t) (pos - find->slen) < pd && pd < (ptrdiff_t) b->slen) { + if (NULL == (auxf = bstrcpy (find))) return BSTR_ERR; + } + + /* Alias with repl string */ + pd = (ptrdiff_t) (repl->data - b->data); + if ((ptrdiff_t) (pos - repl->slen) < pd && pd < (ptrdiff_t) b->slen) { + if (NULL == (auxr = bstrcpy (repl))) { + if (auxf != find) bdestroy (auxf); + return BSTR_ERR; + } + } + + delta = auxf->slen - auxr->slen; + + /* in-place replacement since find and replace strings are of equal + length */ + if (delta == 0) { + while ((pos = instr (b, pos, auxf)) >= 0) { + bstr__memcpy (b->data + pos, auxr->data, auxr->slen); + pos += auxf->slen; + } + if (auxf != find) bdestroy (auxf); + if (auxr != repl) bdestroy (auxr); + return BSTR_OK; + } + + /* shrinking replacement since auxf->slen > auxr->slen */ + if (delta > 0) { + acc = 0; + + while ((i = instr (b, pos, auxf)) >= 0) { + if (acc && i > pos) + bstr__memmove (b->data + pos - acc, b->data + pos, i - pos); + if (auxr->slen) + bstr__memcpy (b->data + i - acc, auxr->data, auxr->slen); + acc += delta; + pos = i + auxf->slen; + } + + if (acc) { + i = b->slen; + if (i > pos) + bstr__memmove (b->data + pos - acc, b->data + pos, i - pos); + b->slen -= acc; + b->data[b->slen] = (unsigned char) '\0'; + } + + if (auxf != find) bdestroy (auxf); + if (auxr != repl) bdestroy (auxr); + return BSTR_OK; + } + + /* expanding replacement since find->slen < repl->slen. Its a lot + more complicated. This works by first finding all the matches and + storing them to a growable array, then doing at most one resize of + the destination bstring and then performing the direct memory transfers + of the string segment pieces to form the final result. The growable + array of matches uses a deferred doubling reallocing strategy. What + this means is that it starts as a reasonably fixed sized auto array in + the hopes that many if not most cases will never need to grow this + array. But it switches as soon as the bounds of the array will be + exceeded. An extra find result is always appended to this array that + corresponds to the end of the destination string, so slen is checked + against mlen - 1 rather than mlen before resizing. + */ + + mlen = INITIAL_STATIC_FIND_INDEX_COUNT; + d = (int *) static_d; /* Avoid malloc for trivial/initial cases */ + acc = slen = 0; + + while ((pos = instr (b, pos, auxf)) >= 0) { + if (slen >= mlen - 1) { + int sl, *t; + + mlen += mlen; + sl = sizeof (int *) * mlen; + if (static_d == d) d = NULL; /* static_d cannot be realloced */ + if (mlen <= 0 || sl < mlen || NULL == (t = (int *) bstr__realloc (d, sl))) { + ret = BSTR_ERR; + goto done; + } + if (NULL == d) bstr__memcpy (t, static_d, sizeof (static_d)); + d = t; + } + d[slen] = pos; + slen++; + acc -= delta; + pos += auxf->slen; + if (pos < 0 || acc < 0) { + ret = BSTR_ERR; + goto done; + } + } + + /* slen <= INITIAL_STATIC_INDEX_COUNT-1 or mlen-1 here. */ + d[slen] = b->slen; + + if (BSTR_OK == (ret = balloc (b, b->slen + acc + 1))) { + b->slen += acc; + for (i = slen-1; i >= 0; i--) { + int s, l; + s = d[i] + auxf->slen; + l = d[i+1] - s; /* d[slen] may be accessed here. */ + if (l) { + bstr__memmove (b->data + s + acc, b->data + s, l); + } + if (auxr->slen) { + bstr__memmove (b->data + s + acc - auxr->slen, + auxr->data, auxr->slen); + } + acc += delta; + } + b->data[b->slen] = (unsigned char) '\0'; + } + + done:; + if (static_d == d) d = NULL; + bstr__free (d); + if (auxf != find) bdestroy (auxf); + if (auxr != repl) bdestroy (auxr); + return ret; +} + +/* int bfindreplace (bstring b, const_bstring find, const_bstring repl, + * int pos) + * + * Replace all occurrences of a find string with a replace string after a + * given point in a bstring. + */ +int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos) { + return findreplaceengine (b, find, repl, pos, binstr); +} + +/* int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, + * int pos) + * + * Replace all occurrences of a find string, ignoring case, with a replace + * string after a given point in a bstring. + */ +int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos) { + return findreplaceengine (b, find, repl, pos, binstrcaseless); +} + +/* int binsertch (bstring b, int pos, int len, unsigned char fill) + * + * Inserts the character fill repeatedly into b at position pos for a + * length len. If the position pos is past the end of b, then the + * character "fill" is appended as necessary to make up the gap between the + * end of b and the position pos + len. + */ +int binsertch (bstring b, int pos, int len, unsigned char fill) { +int d, l, i; + + if (pos < 0 || b == NULL || b->slen < 0 || b->mlen < b->slen || + b->mlen <= 0 || len < 0) return BSTR_ERR; + + /* Compute the two possible end pointers */ + d = b->slen + len; + l = pos + len; + if ((d|l) < 0) return BSTR_ERR; + + if (l > d) { + /* Inserting past the end of the string */ + if (balloc (b, l + 1) != BSTR_OK) return BSTR_ERR; + pos = b->slen; + b->slen = l; + } else { + /* Inserting in the middle of the string */ + if (balloc (b, d + 1) != BSTR_OK) return BSTR_ERR; + for (i = d - 1; i >= l; i--) { + b->data[i] = b->data[i - len]; + } + b->slen = d; + } + + for (i=pos; i < l; i++) b->data[i] = fill; + b->data[b->slen] = (unsigned char) '\0'; + return BSTR_OK; +} + +/* int bpattern (bstring b, int len) + * + * Replicate the bstring, b in place, end to end repeatedly until it + * surpasses len characters, then chop the result to exactly len characters. + * This function operates in-place. The function will return with BSTR_ERR + * if b is NULL or of length 0, otherwise BSTR_OK is returned. + */ +int bpattern (bstring b, int len) { +int i, d; + + d = blength (b); + if (d <= 0 || len < 0 || balloc (b, len + 1) != BSTR_OK) return BSTR_ERR; + if (len > 0) { + if (d == 1) return bsetstr (b, len, NULL, b->data[0]); + for (i = d; i < len; i++) b->data[i] = b->data[i - d]; + } + b->data[len] = (unsigned char) '\0'; + b->slen = len; + return BSTR_OK; +} + +#define BS_BUFF_SZ (1024) + +/* int breada (bstring b, bNread readPtr, void * parm) + * + * Use a finite buffer fread-like function readPtr to concatenate to the + * bstring b the entire contents of file-like source data in a roughly + * efficient way. + */ +int breada (bstring b, bNread readPtr, void * parm) { +int i, l, n; + + if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || + b->mlen <= 0 || readPtr == NULL) return BSTR_ERR; + + i = b->slen; + for (n=i+16; ; n += ((n < BS_BUFF_SZ) ? n : BS_BUFF_SZ)) { + if (BSTR_OK != balloc (b, n + 1)) return BSTR_ERR; + l = (int) readPtr ((void *) (b->data + i), 1, n - i, parm); + i += l; + b->slen = i; + if (i < n) break; + } + + b->data[i] = (unsigned char) '\0'; + return BSTR_OK; +} + +/* bstring bread (bNread readPtr, void * parm) + * + * Use a finite buffer fread-like function readPtr to create a bstring + * filled with the entire contents of file-like source data in a roughly + * efficient way. + */ +bstring bread (bNread readPtr, void * parm) { +bstring buff; + + if (0 > breada (buff = bfromcstr (""), readPtr, parm)) { + bdestroy (buff); + return NULL; + } + return buff; +} + +/* int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator) + * + * Use an fgetc-like single character stream reading function (getcPtr) to + * obtain a sequence of characters which are concatenated to the end of the + * bstring b. The stream read is terminated by the passed in terminator + * parameter. + * + * If getcPtr returns with a negative number, or the terminator character + * (which is appended) is read, then the stream reading is halted and the + * function returns with a partial result in b. If there is an empty partial + * result, 1 is returned. If no characters are read, or there is some other + * detectable error, BSTR_ERR is returned. + */ +int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator) { +int c, d, e; + + if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || + b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR; + d = 0; + e = b->mlen - 2; + + while ((c = getcPtr (parm)) >= 0) { + if (d > e) { + b->slen = d; + if (balloc (b, d + 2) != BSTR_OK) return BSTR_ERR; + e = b->mlen - 2; + } + b->data[d] = (unsigned char) c; + d++; + if (c == terminator) break; + } + + b->data[d] = (unsigned char) '\0'; + b->slen = d; + + return d == 0 && c < 0; +} + +/* int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator) + * + * Use an fgetc-like single character stream reading function (getcPtr) to + * obtain a sequence of characters which are concatenated to the end of the + * bstring b. The stream read is terminated by the passed in terminator + * parameter. + * + * If getcPtr returns with a negative number, or the terminator character + * (which is appended) is read, then the stream reading is halted and the + * function returns with a partial result concatentated to b. If there is + * an empty partial result, 1 is returned. If no characters are read, or + * there is some other detectable error, BSTR_ERR is returned. + */ +int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator) { +int c, d, e; + + if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || + b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR; + d = b->slen; + e = b->mlen - 2; + + while ((c = getcPtr (parm)) >= 0) { + if (d > e) { + b->slen = d; + if (balloc (b, d + 2) != BSTR_OK) return BSTR_ERR; + e = b->mlen - 2; + } + b->data[d] = (unsigned char) c; + d++; + if (c == terminator) break; + } + + b->data[d] = (unsigned char) '\0'; + b->slen = d; + + return d == 0 && c < 0; +} + +/* bstring bgets (bNgetc getcPtr, void * parm, char terminator) + * + * Use an fgetc-like single character stream reading function (getcPtr) to + * obtain a sequence of characters which are concatenated into a bstring. + * The stream read is terminated by the passed in terminator function. + * + * If getcPtr returns with a negative number, or the terminator character + * (which is appended) is read, then the stream reading is halted and the + * result obtained thus far is returned. If no characters are read, or + * there is some other detectable error, NULL is returned. + */ +bstring bgets (bNgetc getcPtr, void * parm, char terminator) { +bstring buff; + + if (0 > bgetsa (buff = bfromcstr (""), getcPtr, parm, terminator) || 0 >= buff->slen) { + bdestroy (buff); + buff = NULL; + } + return buff; +} + +struct bStream { + bstring buff; /* Buffer for over-reads */ + void * parm; /* The stream handle for core stream */ + bNread readFnPtr; /* fread compatible fnptr for core stream */ + int isEOF; /* track file's EOF state */ + int maxBuffSz; +}; + +/* struct bStream * bsopen (bNread readPtr, void * parm) + * + * Wrap a given open stream (described by a fread compatible function + * pointer and stream handle) into an open bStream suitable for the bstring + * library streaming functions. + */ +struct bStream * bsopen (bNread readPtr, void * parm) { +struct bStream * s; + + if (readPtr == NULL) return NULL; + s = (struct bStream *) bstr__alloc (sizeof (struct bStream)); + if (s == NULL) return NULL; + s->parm = parm; + s->buff = bfromcstr (""); + s->readFnPtr = readPtr; + s->maxBuffSz = BS_BUFF_SZ; + s->isEOF = 0; + return s; +} + +/* int bsbufflength (struct bStream * s, int sz) + * + * Set the length of the buffer used by the bStream. If sz is zero, the + * length is not set. This function returns with the previous length. + */ +int bsbufflength (struct bStream * s, int sz) { +int oldSz; + if (s == NULL || sz < 0) return BSTR_ERR; + oldSz = s->maxBuffSz; + if (sz > 0) s->maxBuffSz = sz; + return oldSz; +} + +int bseof (const struct bStream * s) { + if (s == NULL || s->readFnPtr == NULL) return BSTR_ERR; + return s->isEOF && (s->buff->slen == 0); +} + +/* void * bsclose (struct bStream * s) + * + * Close the bStream, and return the handle to the stream that was originally + * used to open the given stream. + */ +void * bsclose (struct bStream * s) { +void * parm; + if (s == NULL) return NULL; + s->readFnPtr = NULL; + if (s->buff) bdestroy (s->buff); + s->buff = NULL; + parm = s->parm; + s->parm = NULL; + s->isEOF = 1; + bstr__free (s); + return parm; +} + +/* int bsreadlna (bstring r, struct bStream * s, char terminator) + * + * Read a bstring terminated by the terminator character or the end of the + * stream from the bStream (s) and return it into the parameter r. This + * function may read additional characters from the core stream that are not + * returned, but will be retained for subsequent read operations. + */ +int bsreadlna (bstring r, struct bStream * s, char terminator) { +int i, l, ret, rlo; +char * b; +struct tagbstring x; + + if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 || + r->slen < 0 || r->mlen < r->slen) return BSTR_ERR; + l = s->buff->slen; + if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + b = (char *) s->buff->data; + x.data = (unsigned char *) b; + + /* First check if the current buffer holds the terminator */ + b[l] = terminator; /* Set sentinel */ + for (i=0; b[i] != terminator; i++) ; + if (i < l) { + x.slen = i + 1; + ret = bconcat (r, &x); + s->buff->slen = l; + if (BSTR_OK == ret) bdelete (s->buff, 0, i + 1); + return BSTR_OK; + } + + rlo = r->slen; + + /* If not then just concatenate the entire buffer to the output */ + x.slen = l; + if (BSTR_OK != bconcat (r, &x)) return BSTR_ERR; + + /* Perform direct in-place reads into the destination to allow for + the minimum of data-copies */ + for (;;) { + if (BSTR_OK != balloc (r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR; + b = (char *) (r->data + r->slen); + l = (int) s->readFnPtr (b, 1, s->maxBuffSz, s->parm); + if (l <= 0) { + r->data[r->slen] = (unsigned char) '\0'; + s->buff->slen = 0; + s->isEOF = 1; + /* If nothing was read return with an error message */ + return BSTR_ERR & -(r->slen == rlo); + } + b[l] = terminator; /* Set sentinel */ + for (i=0; b[i] != terminator; i++) ; + if (i < l) break; + r->slen += l; + } + + /* Terminator found, push over-read back to buffer */ + i++; + r->slen += i; + s->buff->slen = l - i; + bstr__memcpy (s->buff->data, b + i, l - i); + r->data[r->slen] = (unsigned char) '\0'; + return BSTR_OK; +} + +/* int bsreadlnsa (bstring r, struct bStream * s, bstring term) + * + * Read a bstring terminated by any character in the term string or the end + * of the stream from the bStream (s) and return it into the parameter r. + * This function may read additional characters from the core stream that + * are not returned, but will be retained for subsequent read operations. + */ +int bsreadlnsa (bstring r, struct bStream * s, const_bstring term) { +int i, l, ret, rlo; +unsigned char * b; +struct tagbstring x; +struct charField cf; + + if (s == NULL || s->buff == NULL || r == NULL || term == NULL || + term->data == NULL || r->mlen <= 0 || r->slen < 0 || + r->mlen < r->slen) return BSTR_ERR; + if (term->slen == 1) return bsreadlna (r, s, term->data[0]); + if (term->slen < 1 || buildCharField (&cf, term)) return BSTR_ERR; + + l = s->buff->slen; + if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + b = (unsigned char *) s->buff->data; + x.data = b; + + /* First check if the current buffer holds the terminator */ + b[l] = term->data[0]; /* Set sentinel */ + for (i=0; !testInCharField (&cf, b[i]); i++) ; + if (i < l) { + x.slen = i + 1; + ret = bconcat (r, &x); + s->buff->slen = l; + if (BSTR_OK == ret) bdelete (s->buff, 0, i + 1); + return BSTR_OK; + } + + rlo = r->slen; + + /* If not then just concatenate the entire buffer to the output */ + x.slen = l; + if (BSTR_OK != bconcat (r, &x)) return BSTR_ERR; + + /* Perform direct in-place reads into the destination to allow for + the minimum of data-copies */ + for (;;) { + if (BSTR_OK != balloc (r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR; + b = (unsigned char *) (r->data + r->slen); + l = (int) s->readFnPtr (b, 1, s->maxBuffSz, s->parm); + if (l <= 0) { + r->data[r->slen] = (unsigned char) '\0'; + s->buff->slen = 0; + s->isEOF = 1; + /* If nothing was read return with an error message */ + return BSTR_ERR & -(r->slen == rlo); + } + + b[l] = term->data[0]; /* Set sentinel */ + for (i=0; !testInCharField (&cf, b[i]); i++) ; + if (i < l) break; + r->slen += l; + } + + /* Terminator found, push over-read back to buffer */ + i++; + r->slen += i; + s->buff->slen = l - i; + bstr__memcpy (s->buff->data, b + i, l - i); + r->data[r->slen] = (unsigned char) '\0'; + return BSTR_OK; +} + +/* int bsreada (bstring r, struct bStream * s, int n) + * + * Read a bstring of length n (or, if it is fewer, as many bytes as is + * remaining) from the bStream. This function may read additional + * characters from the core stream that are not returned, but will be + * retained for subsequent read operations. This function will not read + * additional characters from the core stream beyond virtual stream pointer. + */ +int bsreada (bstring r, struct bStream * s, int n) { +int l, ret, orslen; +char * b; +struct tagbstring x; + + if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 + || r->slen < 0 || r->mlen < r->slen || n <= 0) return BSTR_ERR; + + n += r->slen; + if (n <= 0) return BSTR_ERR; + + l = s->buff->slen; + + orslen = r->slen; + + if (0 == l) { + if (s->isEOF) return BSTR_ERR; + if (r->mlen > n) { + l = (int) s->readFnPtr (r->data + r->slen, 1, n - r->slen, s->parm); + if (0 >= l || l > n - r->slen) { + s->isEOF = 1; + return BSTR_ERR; + } + r->slen += l; + r->data[r->slen] = (unsigned char) '\0'; + return 0; + } + } + + if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + b = (char *) s->buff->data; + x.data = (unsigned char *) b; + + do { + if (l + r->slen >= n) { + x.slen = n - r->slen; + ret = bconcat (r, &x); + s->buff->slen = l; + if (BSTR_OK == ret) bdelete (s->buff, 0, x.slen); + return BSTR_ERR & -(r->slen == orslen); + } + + x.slen = l; + if (BSTR_OK != bconcat (r, &x)) break; + + l = n - r->slen; + if (l > s->maxBuffSz) l = s->maxBuffSz; + + l = (int) s->readFnPtr (b, 1, l, s->parm); + + } while (l > 0); + if (l < 0) l = 0; + if (l == 0) s->isEOF = 1; + s->buff->slen = l; + return BSTR_ERR & -(r->slen == orslen); +} + +/* int bsreadln (bstring r, struct bStream * s, char terminator) + * + * Read a bstring terminated by the terminator character or the end of the + * stream from the bStream (s) and return it into the parameter r. This + * function may read additional characters from the core stream that are not + * returned, but will be retained for subsequent read operations. + */ +int bsreadln (bstring r, struct bStream * s, char terminator) { + if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0) + return BSTR_ERR; + if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + r->slen = 0; + return bsreadlna (r, s, terminator); +} + +/* int bsreadlns (bstring r, struct bStream * s, bstring term) + * + * Read a bstring terminated by any character in the term string or the end + * of the stream from the bStream (s) and return it into the parameter r. + * This function may read additional characters from the core stream that + * are not returned, but will be retained for subsequent read operations. + */ +int bsreadlns (bstring r, struct bStream * s, const_bstring term) { + if (s == NULL || s->buff == NULL || r == NULL || term == NULL + || term->data == NULL || r->mlen <= 0) return BSTR_ERR; + if (term->slen == 1) return bsreadln (r, s, term->data[0]); + if (term->slen < 1) return BSTR_ERR; + if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + r->slen = 0; + return bsreadlnsa (r, s, term); +} + +/* int bsread (bstring r, struct bStream * s, int n) + * + * Read a bstring of length n (or, if it is fewer, as many bytes as is + * remaining) from the bStream. This function may read additional + * characters from the core stream that are not returned, but will be + * retained for subsequent read operations. This function will not read + * additional characters from the core stream beyond virtual stream pointer. + */ +int bsread (bstring r, struct bStream * s, int n) { + if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 + || n <= 0) return BSTR_ERR; + if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + r->slen = 0; + return bsreada (r, s, n); +} + +/* int bsunread (struct bStream * s, const_bstring b) + * + * Insert a bstring into the bStream at the current position. These + * characters will be read prior to those that actually come from the core + * stream. + */ +int bsunread (struct bStream * s, const_bstring b) { + if (s == NULL || s->buff == NULL) return BSTR_ERR; + return binsert (s->buff, 0, b, (unsigned char) '?'); +} + +/* int bspeek (bstring r, const struct bStream * s) + * + * Return the currently buffered characters from the bStream that will be + * read prior to reads from the core stream. + */ +int bspeek (bstring r, const struct bStream * s) { + if (s == NULL || s->buff == NULL) return BSTR_ERR; + return bassign (r, s->buff); +} + +/* bstring bjoin (const struct bstrList * bl, const_bstring sep); + * + * Join the entries of a bstrList into one bstring by sequentially + * concatenating them with the sep string in between. If there is an error + * NULL is returned, otherwise a bstring with the correct result is returned. + */ +bstring bjoin (const struct bstrList * bl, const_bstring sep) { +bstring b; +int i, c, v; + + if (bl == NULL || bl->qty < 0) return NULL; + if (sep != NULL && (sep->slen < 0 || sep->data == NULL)) return NULL; + + for (i = 0, c = 1; i < bl->qty; i++) { + v = bl->entry[i]->slen; + if (v < 0) return NULL; /* Invalid input */ + c += v; + if (c < 0) return NULL; /* Wrap around ?? */ + } + + if (sep != NULL) c += (bl->qty - 1) * sep->slen; + + b = (bstring) bstr__alloc (sizeof (struct tagbstring)); + if (NULL == b) return NULL; /* Out of memory */ + b->data = (unsigned char *) bstr__alloc (c); + if (b->data == NULL) { + bstr__free (b); + return NULL; + } + + b->mlen = c; + b->slen = c-1; + + for (i = 0, c = 0; i < bl->qty; i++) { + if (i > 0 && sep != NULL) { + bstr__memcpy (b->data + c, sep->data, sep->slen); + c += sep->slen; + } + v = bl->entry[i]->slen; + bstr__memcpy (b->data + c, bl->entry[i]->data, v); + c += v; + } + b->data[c] = (unsigned char) '\0'; + return b; +} + +#define BSSSC_BUFF_LEN (256) + +/* int bssplitscb (struct bStream * s, const_bstring splitStr, + * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) + * + * Iterate the set of disjoint sequential substrings read from a stream + * divided by any of the characters in splitStr. An empty splitStr causes + * the whole stream to be iterated once. + * + * Note: At the point of calling the cb function, the bStream pointer is + * pointed exactly at the position right after having read the split + * character. The cb function can act on the stream by causing the bStream + * pointer to move, and bssplitscb will continue by starting the next split + * at the position of the pointer after the return from cb. + * + * However, if the cb causes the bStream s to be destroyed then the cb must + * return with a negative value, otherwise bssplitscb will continue in an + * undefined manner. + */ +int bssplitscb (struct bStream * s, const_bstring splitStr, + int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) { +struct charField chrs; +bstring buff; +int i, p, ret; + + if (cb == NULL || s == NULL || s->readFnPtr == NULL + || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; + + if (NULL == (buff = bfromcstr (""))) return BSTR_ERR; + + if (splitStr->slen == 0) { + while (bsreada (buff, s, BSSSC_BUFF_LEN) >= 0) ; + if ((ret = cb (parm, 0, buff)) > 0) + ret = 0; + } else { + buildCharField (&chrs, splitStr); + ret = p = i = 0; + for (;;) { + if (i >= buff->slen) { + bsreada (buff, s, BSSSC_BUFF_LEN); + if (i >= buff->slen) { + if (0 < (ret = cb (parm, p, buff))) ret = 0; + break; + } + } + if (testInCharField (&chrs, buff->data[i])) { + struct tagbstring t; + unsigned char c; + + blk2tbstr (t, buff->data + i + 1, buff->slen - (i + 1)); + if ((ret = bsunread (s, &t)) < 0) break; + buff->slen = i; + c = buff->data[i]; + buff->data[i] = (unsigned char) '\0'; + if ((ret = cb (parm, p, buff)) < 0) break; + buff->data[i] = c; + buff->slen = 0; + p += i + 1; + i = -1; + } + i++; + } + } + + bdestroy (buff); + return ret; +} + +/* int bssplitstrcb (struct bStream * s, const_bstring splitStr, + * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) + * + * Iterate the set of disjoint sequential substrings read from a stream + * divided by the entire substring splitStr. An empty splitStr causes + * each character of the stream to be iterated. + * + * Note: At the point of calling the cb function, the bStream pointer is + * pointed exactly at the position right after having read the split + * character. The cb function can act on the stream by causing the bStream + * pointer to move, and bssplitscb will continue by starting the next split + * at the position of the pointer after the return from cb. + * + * However, if the cb causes the bStream s to be destroyed then the cb must + * return with a negative value, otherwise bssplitscb will continue in an + * undefined manner. + */ +int bssplitstrcb (struct bStream * s, const_bstring splitStr, + int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) { +bstring buff; +int i, p, ret; + + if (cb == NULL || s == NULL || s->readFnPtr == NULL + || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; + + if (splitStr->slen == 1) return bssplitscb (s, splitStr, cb, parm); + + if (NULL == (buff = bfromcstr (""))) return BSTR_ERR; + + if (splitStr->slen == 0) { + for (i=0; bsreada (buff, s, BSSSC_BUFF_LEN) >= 0; i++) { + if ((ret = cb (parm, 0, buff)) < 0) { + bdestroy (buff); + return ret; + } + buff->slen = 0; + } + return BSTR_OK; + } else { + ret = p = i = 0; + for (i=p=0;;) { + if ((ret = binstr (buff, 0, splitStr)) >= 0) { + struct tagbstring t; + blk2tbstr (t, buff->data, ret); + i = ret + splitStr->slen; + if ((ret = cb (parm, p, &t)) < 0) break; + p += i; + bdelete (buff, 0, i); + } else { + bsreada (buff, s, BSSSC_BUFF_LEN); + if (bseof (s)) { + if ((ret = cb (parm, p, buff)) > 0) ret = 0; + break; + } + } + } + } + + bdestroy (buff); + return ret; +} + +/* int bstrListCreate (void) + * + * Create a bstrList. + */ +struct bstrList * bstrListCreate (void) { +struct bstrList * sl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); + if (sl) { + sl->entry = (bstring *) bstr__alloc (1*sizeof (bstring)); + if (!sl->entry) { + bstr__free (sl); + sl = NULL; + } else { + sl->qty = 0; + sl->mlen = 1; + } + } + return sl; +} + +/* int bstrListDestroy (struct bstrList * sl) + * + * Destroy a bstrList that has been created by bsplit, bsplits or bstrListCreate. + */ +int bstrListDestroy (struct bstrList * sl) { +int i; + if (sl == NULL || sl->qty < 0) return BSTR_ERR; + for (i=0; i < sl->qty; i++) { + if (sl->entry[i]) { + bdestroy (sl->entry[i]); + sl->entry[i] = NULL; + } + } + sl->qty = -1; + sl->mlen = -1; + bstr__free (sl->entry); + sl->entry = NULL; + bstr__free (sl); + return BSTR_OK; +} + +/* int bstrListAlloc (struct bstrList * sl, int msz) + * + * Ensure that there is memory for at least msz number of entries for the + * list. + */ +int bstrListAlloc (struct bstrList * sl, int msz) { +bstring * l; +int smsz; +size_t nsz; + if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR; + if (sl->mlen >= msz) return BSTR_OK; + smsz = snapUpSize (msz); + nsz = ((size_t) smsz) * sizeof (bstring); + if (nsz < (size_t) smsz) return BSTR_ERR; + l = (bstring *) bstr__realloc (sl->entry, nsz); + if (!l) { + smsz = msz; + nsz = ((size_t) smsz) * sizeof (bstring); + l = (bstring *) bstr__realloc (sl->entry, nsz); + if (!l) return BSTR_ERR; + } + sl->mlen = smsz; + sl->entry = l; + return BSTR_OK; +} + +/* int bstrListAllocMin (struct bstrList * sl, int msz) + * + * Try to allocate the minimum amount of memory for the list to include at + * least msz entries or sl->qty whichever is greater. + */ +int bstrListAllocMin (struct bstrList * sl, int msz) { +bstring * l; +size_t nsz; + if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR; + if (msz < sl->qty) msz = sl->qty; + if (sl->mlen == msz) return BSTR_OK; + nsz = ((size_t) msz) * sizeof (bstring); + if (nsz < (size_t) msz) return BSTR_ERR; + l = (bstring *) bstr__realloc (sl->entry, nsz); + if (!l) return BSTR_ERR; + sl->mlen = msz; + sl->entry = l; + return BSTR_OK; +} + +/* int bsplitcb (const_bstring str, unsigned char splitChar, int pos, + * int (* cb) (void * parm, int ofs, int len), void * parm) + * + * Iterate the set of disjoint sequential substrings over str divided by the + * character in splitChar. + * + * Note: Non-destructive modification of str from within the cb function + * while performing this split is not undefined. bsplitcb behaves in + * sequential lock step with calls to cb. I.e., after returning from a cb + * that return a non-negative integer, bsplitcb continues from the position + * 1 character after the last detected split character and it will halt + * immediately if the length of str falls below this point. However, if the + * cb function destroys str, then it *must* return with a negative value, + * otherwise bsplitcb will continue in an undefined manner. + */ +int bsplitcb (const_bstring str, unsigned char splitChar, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm) { +int i, p, ret; + + if (cb == NULL || str == NULL || pos < 0 || pos > str->slen) + return BSTR_ERR; + + p = pos; + do { + for (i=p; i < str->slen; i++) { + if (str->data[i] == splitChar) break; + } + if ((ret = cb (parm, p, i - p)) < 0) return ret; + p = i + 1; + } while (p <= str->slen); + return BSTR_OK; +} + +/* int bsplitscb (const_bstring str, const_bstring splitStr, int pos, + * int (* cb) (void * parm, int ofs, int len), void * parm) + * + * Iterate the set of disjoint sequential substrings over str divided by any + * of the characters in splitStr. An empty splitStr causes the whole str to + * be iterated once. + * + * Note: Non-destructive modification of str from within the cb function + * while performing this split is not undefined. bsplitscb behaves in + * sequential lock step with calls to cb. I.e., after returning from a cb + * that return a non-negative integer, bsplitscb continues from the position + * 1 character after the last detected split character and it will halt + * immediately if the length of str falls below this point. However, if the + * cb function destroys str, then it *must* return with a negative value, + * otherwise bsplitscb will continue in an undefined manner. + */ +int bsplitscb (const_bstring str, const_bstring splitStr, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm) { +struct charField chrs; +int i, p, ret; + + if (cb == NULL || str == NULL || pos < 0 || pos > str->slen + || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; + if (splitStr->slen == 0) { + if ((ret = cb (parm, 0, str->slen)) > 0) ret = 0; + return ret; + } + + if (splitStr->slen == 1) + return bsplitcb (str, splitStr->data[0], pos, cb, parm); + + buildCharField (&chrs, splitStr); + + p = pos; + do { + for (i=p; i < str->slen; i++) { + if (testInCharField (&chrs, str->data[i])) break; + } + if ((ret = cb (parm, p, i - p)) < 0) return ret; + p = i + 1; + } while (p <= str->slen); + return BSTR_OK; +} + +/* int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, + * int (* cb) (void * parm, int ofs, int len), void * parm) + * + * Iterate the set of disjoint sequential substrings over str divided by the + * substring splitStr. An empty splitStr causes the whole str to be + * iterated once. + * + * Note: Non-destructive modification of str from within the cb function + * while performing this split is not undefined. bsplitstrcb behaves in + * sequential lock step with calls to cb. I.e., after returning from a cb + * that return a non-negative integer, bsplitscb continues from the position + * 1 character after the last detected split character and it will halt + * immediately if the length of str falls below this point. However, if the + * cb function destroys str, then it *must* return with a negative value, + * otherwise bsplitscb will continue in an undefined manner. + */ +int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm) { +int i, p, ret; + + if (cb == NULL || str == NULL || pos < 0 || pos > str->slen + || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; + + if (0 == splitStr->slen) { + for (i=pos; i < str->slen; i++) { + if ((ret = cb (parm, i, 1)) < 0) return ret; + } + return BSTR_OK; + } + + if (splitStr->slen == 1) + return bsplitcb (str, splitStr->data[0], pos, cb, parm); + + for (i=p=pos; i <= str->slen - splitStr->slen; i++) { + if (0 == bstr__memcmp (splitStr->data, str->data + i, splitStr->slen)) { + if ((ret = cb (parm, p, i - p)) < 0) return ret; + i += splitStr->slen; + p = i; + } + } + if ((ret = cb (parm, p, str->slen - p)) < 0) return ret; + return BSTR_OK; +} + +struct genBstrList { + bstring b; + struct bstrList * bl; +}; + +static int bscb (void * parm, int ofs, int len) { +struct genBstrList * g = (struct genBstrList *) parm; + if (g->bl->qty >= g->bl->mlen) { + int mlen = g->bl->mlen * 2; + bstring * tbl; + + while (g->bl->qty >= mlen) { + if (mlen < g->bl->mlen) return BSTR_ERR; + mlen += mlen; + } + + tbl = (bstring *) bstr__realloc (g->bl->entry, sizeof (bstring) * mlen); + if (tbl == NULL) return BSTR_ERR; + + g->bl->entry = tbl; + g->bl->mlen = mlen; + } + + g->bl->entry[g->bl->qty] = bmidstr (g->b, ofs, len); + g->bl->qty++; + return BSTR_OK; +} + +/* struct bstrList * bsplit (const_bstring str, unsigned char splitChar) + * + * Create an array of sequential substrings from str divided by the character + * splitChar. + */ +struct bstrList * bsplit (const_bstring str, unsigned char splitChar) { +struct genBstrList g; + + if (str == NULL || str->data == NULL || str->slen < 0) return NULL; + + g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); + if (g.bl == NULL) return NULL; + g.bl->mlen = 4; + g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring)); + if (NULL == g.bl->entry) { + bstr__free (g.bl); + return NULL; + } + + g.b = (bstring) str; + g.bl->qty = 0; + if (bsplitcb (str, splitChar, 0, bscb, &g) < 0) { + bstrListDestroy (g.bl); + return NULL; + } + return g.bl; +} + +/* struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr) + * + * Create an array of sequential substrings from str divided by the entire + * substring splitStr. + */ +struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr) { +struct genBstrList g; + + if (str == NULL || str->data == NULL || str->slen < 0) return NULL; + + g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); + if (g.bl == NULL) return NULL; + g.bl->mlen = 4; + g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring)); + if (NULL == g.bl->entry) { + bstr__free (g.bl); + return NULL; + } + + g.b = (bstring) str; + g.bl->qty = 0; + if (bsplitstrcb (str, splitStr, 0, bscb, &g) < 0) { + bstrListDestroy (g.bl); + return NULL; + } + return g.bl; +} + +/* struct bstrList * bsplits (const_bstring str, bstring splitStr) + * + * Create an array of sequential substrings from str divided by any of the + * characters in splitStr. An empty splitStr causes a single entry bstrList + * containing a copy of str to be returned. + */ +struct bstrList * bsplits (const_bstring str, const_bstring splitStr) { +struct genBstrList g; + + if ( str == NULL || str->slen < 0 || str->data == NULL || + splitStr == NULL || splitStr->slen < 0 || splitStr->data == NULL) + return NULL; + + g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); + if (g.bl == NULL) return NULL; + g.bl->mlen = 4; + g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring)); + if (NULL == g.bl->entry) { + bstr__free (g.bl); + return NULL; + } + g.b = (bstring) str; + g.bl->qty = 0; + + if (bsplitscb (str, splitStr, 0, bscb, &g) < 0) { + bstrListDestroy (g.bl); + return NULL; + } + return g.bl; +} + +#if defined (__TURBOC__) && !defined (__BORLANDC__) +# ifndef BSTRLIB_NOVSNP +# define BSTRLIB_NOVSNP +# endif +#endif + +/* Give WATCOM C/C++, MSVC some latitude for their non-support of vsnprintf */ +#if defined(__WATCOMC__) || defined(_MSC_VER) +#define exvsnprintf(r,b,n,f,a) {r = _vsnprintf (b,n,f,a);} +#else +#ifdef BSTRLIB_NOVSNP +/* This is just a hack. If you are using a system without a vsnprintf, it is + not recommended that bformat be used at all. */ +#define exvsnprintf(r,b,n,f,a) {vsprintf (b,f,a); r = -1;} +#define START_VSNBUFF (256) +#else + +#ifdef __GNUC__ +/* Something is making gcc complain about this prototype not being here, so + I've just gone ahead and put it in. */ +//extern int vsnprintf (char *buf, size_t count, const char *format, va_list arg); +#endif + +#define exvsnprintf(r,b,n,f,a) {r = vsnprintf (b,n,f,a);} +#endif +#endif + +#if !defined (BSTRLIB_NOVSNP) + +#ifndef START_VSNBUFF +#define START_VSNBUFF (16) +#endif + +/* On IRIX vsnprintf returns n-1 when the operation would overflow the target + buffer, WATCOM and MSVC both return -1, while C99 requires that the + returned value be exactly what the length would be if the buffer would be + large enough. This leads to the idea that if the return value is larger + than n, then changing n to the return value will reduce the number of + iterations required. */ + +/* int bformata (bstring b, const char * fmt, ...) + * + * After the first parameter, it takes the same parameters as printf (), but + * rather than outputting results to stdio, it appends the results to + * a bstring which contains what would have been output. Note that if there + * is an early generation of a '\0' character, the bstring will be truncated + * to this end point. + */ +int bformata (bstring b, const char * fmt, ...) { +va_list arglist; +bstring buff; +int n, r; + + if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0 + || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; + + /* Since the length is not determinable beforehand, a search is + performed using the truncating "vsnprintf" call (to avoid buffer + overflows) on increasing potential sizes for the output result. */ + + if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF; + if (NULL == (buff = bfromcstralloc (n + 2, ""))) { + n = 1; + if (NULL == (buff = bfromcstralloc (n + 2, ""))) return BSTR_ERR; + } + + for (;;) { + va_start (arglist, fmt); + exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist); + va_end (arglist); + + buff->data[n] = (unsigned char) '\0'; + buff->slen = (int) (strlen) ((char *) buff->data); + + if (buff->slen < n) break; + + if (r > n) n = r; else n += n; + + if (BSTR_OK != balloc (buff, n + 2)) { + bdestroy (buff); + return BSTR_ERR; + } + } + + r = bconcat (b, buff); + bdestroy (buff); + return r; +} + +/* int bassignformat (bstring b, const char * fmt, ...) + * + * After the first parameter, it takes the same parameters as printf (), but + * rather than outputting results to stdio, it outputs the results to + * the bstring parameter b. Note that if there is an early generation of a + * '\0' character, the bstring will be truncated to this end point. + */ +int bassignformat (bstring b, const char * fmt, ...) { +va_list arglist; +bstring buff; +int n, r; + + if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0 + || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; + + /* Since the length is not determinable beforehand, a search is + performed using the truncating "vsnprintf" call (to avoid buffer + overflows) on increasing potential sizes for the output result. */ + + if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF; + if (NULL == (buff = bfromcstralloc (n + 2, ""))) { + n = 1; + if (NULL == (buff = bfromcstralloc (n + 2, ""))) return BSTR_ERR; + } + + for (;;) { + va_start (arglist, fmt); + exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist); + va_end (arglist); + + buff->data[n] = (unsigned char) '\0'; + buff->slen = (int) (strlen) ((char *) buff->data); + + if (buff->slen < n) break; + + if (r > n) n = r; else n += n; + + if (BSTR_OK != balloc (buff, n + 2)) { + bdestroy (buff); + return BSTR_ERR; + } + } + + r = bassign (b, buff); + bdestroy (buff); + return r; +} + +/* bstring bformat (const char * fmt, ...) + * + * Takes the same parameters as printf (), but rather than outputting results + * to stdio, it forms a bstring which contains what would have been output. + * Note that if there is an early generation of a '\0' character, the + * bstring will be truncated to this end point. + */ +bstring bformat (const char * fmt, ...) { +va_list arglist; +bstring buff; +int n, r; + + if (fmt == NULL) return NULL; + + /* Since the length is not determinable beforehand, a search is + performed using the truncating "vsnprintf" call (to avoid buffer + overflows) on increasing potential sizes for the output result. */ + + if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF; + if (NULL == (buff = bfromcstralloc (n + 2, ""))) { + n = 1; + if (NULL == (buff = bfromcstralloc (n + 2, ""))) return NULL; + } + + for (;;) { + va_start (arglist, fmt); + exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist); + va_end (arglist); + + buff->data[n] = (unsigned char) '\0'; + buff->slen = (int) (strlen) ((char *) buff->data); + + if (buff->slen < n) break; + + if (r > n) n = r; else n += n; + + if (BSTR_OK != balloc (buff, n + 2)) { + bdestroy (buff); + return NULL; + } + } + + return buff; +} + +/* int bvcformata (bstring b, int count, const char * fmt, va_list arglist) + * + * The bvcformata function formats data under control of the format control + * string fmt and attempts to append the result to b. The fmt parameter is + * the same as that of the printf function. The variable argument list is + * replaced with arglist, which has been initialized by the va_start macro. + * The size of the appended output is upper bounded by count. If the + * required output exceeds count, the string b is not augmented with any + * contents and a value below BSTR_ERR is returned. If a value below -count + * is returned then it is recommended that the negative of this value be + * used as an update to the count in a subsequent pass. On other errors, + * such as running out of memory, parameter errors or numeric wrap around + * BSTR_ERR is returned. BSTR_OK is returned when the output is successfully + * generated and appended to b. + * + * Note: There is no sanity checking of arglist, and this function is + * destructive of the contents of b from the b->slen point onward. If there + * is an early generation of a '\0' character, the bstring will be truncated + * to this end point. + */ +int bvcformata (bstring b, int count, const char * fmt, va_list arg) { +int n, r, l; + + if (b == NULL || fmt == NULL || count <= 0 || b->data == NULL + || b->mlen <= 0 || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; + + if (count > (n = b->slen + count) + 2) return BSTR_ERR; + if (BSTR_OK != balloc (b, n + 2)) return BSTR_ERR; + + exvsnprintf (r, (char *) b->data + b->slen, count + 2, fmt, arg); + + /* Did the operation complete successfully within bounds? */ + for (l = b->slen; l <= n; l++) { + if ('\0' == b->data[l]) { + b->slen = l; + return BSTR_OK; + } + } + + /* Abort, since the buffer was not large enough. The return value + tries to help set what the retry length should be. */ + + b->data[b->slen] = '\0'; + if (r > count + 1) { /* Does r specify a particular target length? */ + n = r; + } else { + n = count + count; /* If not, just double the size of count */ + if (count > n) n = INT_MAX; + } + n = -n; + + if (n > BSTR_ERR-1) n = BSTR_ERR-1; + return n; +} + +#endif diff --git a/src/cbstring/bstrlib.h b/src/cbstring/bstrlib.h new file mode 100644 index 0000000..24626b9 --- /dev/null +++ b/src/cbstring/bstrlib.h @@ -0,0 +1,304 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause + * BSD open source license or GPL v2.0. Refer to the accompanying documentation + * for details on usage and license. + */ + +/* + * bstrlib.h + * + * This file is the header file for the core module for implementing the + * bstring functions. + */ + +#ifndef BSTRLIB_INCLUDE +#define BSTRLIB_INCLUDE + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include + +#if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP) +# if defined (__TURBOC__) && !defined (__BORLANDC__) +# define BSTRLIB_NOVSNP +# endif +#endif + +#define BSTR_ERR (-1) +#define BSTR_OK (0) +#define BSTR_BS_BUFF_LENGTH_GET (0) + +typedef struct tagbstring * bstring; +typedef const struct tagbstring * const_bstring; + +/* Copy functions */ +#define cstr2bstr bfromcstr +extern bstring bfromcstr (const char * str); +extern bstring bfromcstralloc (int mlen, const char * str); +extern bstring blk2bstr (const void * blk, int len); +extern char * bstr2cstr (const_bstring s, char z); +extern int bcstrfree (char * s); +extern bstring bstrcpy (const_bstring b1); +extern int bassign (bstring a, const_bstring b); +extern int bassignmidstr (bstring a, const_bstring b, int left, int len); +extern int bassigncstr (bstring a, const char * str); +extern int bassignblk (bstring a, const void * s, int len); + +/* Destroy function */ +extern int bdestroy (bstring b); + +/* Space allocation hinting functions */ +extern int balloc (bstring s, int len); +extern int ballocmin (bstring b, int len); + +/* Substring extraction */ +extern bstring bmidstr (const_bstring b, int left, int len); + +/* Various standard manipulations */ +extern int bconcat (bstring b0, const_bstring b1); +extern int bconchar (bstring b0, char c); +extern int bcatcstr (bstring b, const char * s); +extern int bcatblk (bstring b, const void * s, int len); +extern int binsert (bstring s1, int pos, const_bstring s2, unsigned char fill); +extern int binsertch (bstring s1, int pos, int len, unsigned char fill); +extern int breplace (bstring b1, int pos, int len, const_bstring b2, unsigned char fill); +extern int bdelete (bstring s1, int pos, int len); +extern int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill); +extern int btrunc (bstring b, int n); + +/* Scan/search functions */ +extern int bstricmp (const_bstring b0, const_bstring b1); +extern int bstrnicmp (const_bstring b0, const_bstring b1, int n); +extern int biseqcaseless (const_bstring b0, const_bstring b1); +extern int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len); +extern int biseq (const_bstring b0, const_bstring b1); +extern int bisstemeqblk (const_bstring b0, const void * blk, int len); +extern int biseqcstr (const_bstring b, const char * s); +extern int biseqcstrcaseless (const_bstring b, const char * s); +extern int bstrcmp (const_bstring b0, const_bstring b1); +extern int bstrncmp (const_bstring b0, const_bstring b1, int n); +extern int binstr (const_bstring s1, int pos, const_bstring s2); +extern int binstrr (const_bstring s1, int pos, const_bstring s2); +extern int binstrcaseless (const_bstring s1, int pos, const_bstring s2); +extern int binstrrcaseless (const_bstring s1, int pos, const_bstring s2); +extern int bstrchrp (const_bstring b, int c, int pos); +extern int bstrrchrp (const_bstring b, int c, int pos); +#define bstrchr(b,c) bstrchrp ((b), (c), 0) +#define bstrrchr(b,c) bstrrchrp ((b), (c), blength(b)-1) +extern int binchr (const_bstring b0, int pos, const_bstring b1); +extern int binchrr (const_bstring b0, int pos, const_bstring b1); +extern int bninchr (const_bstring b0, int pos, const_bstring b1); +extern int bninchrr (const_bstring b0, int pos, const_bstring b1); +extern int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos); +extern int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos); + +/* List of string container functions */ +struct bstrList { + int qty, mlen; + bstring * entry; +}; +extern struct bstrList * bstrListCreate (void); +extern int bstrListDestroy (struct bstrList * sl); +extern int bstrListAlloc (struct bstrList * sl, int msz); +extern int bstrListAllocMin (struct bstrList * sl, int msz); + +/* String split and join functions */ +extern struct bstrList * bsplit (const_bstring str, unsigned char splitChar); +extern struct bstrList * bsplits (const_bstring str, const_bstring splitStr); +extern struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr); +extern bstring bjoin (const struct bstrList * bl, const_bstring sep); +extern int bsplitcb (const_bstring str, unsigned char splitChar, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm); +extern int bsplitscb (const_bstring str, const_bstring splitStr, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm); +extern int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm); + +/* Miscellaneous functions */ +extern int bpattern (bstring b, int len); +extern int btoupper (bstring b); +extern int btolower (bstring b); +extern int bltrimws (bstring b); +extern int brtrimws (bstring b); +extern int btrimws (bstring b); + +/* <*>printf format functions */ +#if !defined (BSTRLIB_NOVSNP) +extern bstring bformat (const char * fmt, ...); +extern int bformata (bstring b, const char * fmt, ...); +extern int bassignformat (bstring b, const char * fmt, ...); +extern int bvcformata (bstring b, int count, const char * fmt, va_list arglist); + +#define bvformata(ret, b, fmt, lastarg) { \ +bstring bstrtmp_b = (b); \ +const char * bstrtmp_fmt = (fmt); \ +int bstrtmp_r = BSTR_ERR, bstrtmp_sz = 16; \ + for (;;) { \ + va_list bstrtmp_arglist; \ + va_start (bstrtmp_arglist, lastarg); \ + bstrtmp_r = bvcformata (bstrtmp_b, bstrtmp_sz, bstrtmp_fmt, bstrtmp_arglist); \ + va_end (bstrtmp_arglist); \ + if (bstrtmp_r >= 0) { /* Everything went ok */ \ + bstrtmp_r = BSTR_OK; \ + break; \ + } else if (-bstrtmp_r <= bstrtmp_sz) { /* A real error? */ \ + bstrtmp_r = BSTR_ERR; \ + break; \ + } \ + bstrtmp_sz = -bstrtmp_r; /* Doubled or target size */ \ + } \ + ret = bstrtmp_r; \ +} + +#endif + +typedef int (*bNgetc) (void *parm); +typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, void *parm); + +/* Input functions */ +extern bstring bgets (bNgetc getcPtr, void * parm, char terminator); +extern bstring bread (bNread readPtr, void * parm); +extern int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator); +extern int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator); +extern int breada (bstring b, bNread readPtr, void * parm); + +/* Stream functions */ +extern struct bStream * bsopen (bNread readPtr, void * parm); +extern void * bsclose (struct bStream * s); +extern int bsbufflength (struct bStream * s, int sz); +extern int bsreadln (bstring b, struct bStream * s, char terminator); +extern int bsreadlns (bstring r, struct bStream * s, const_bstring term); +extern int bsread (bstring b, struct bStream * s, int n); +extern int bsreadlna (bstring b, struct bStream * s, char terminator); +extern int bsreadlnsa (bstring r, struct bStream * s, const_bstring term); +extern int bsreada (bstring b, struct bStream * s, int n); +extern int bsunread (struct bStream * s, const_bstring b); +extern int bspeek (bstring r, const struct bStream * s); +extern int bssplitscb (struct bStream * s, const_bstring splitStr, + int (* cb) (void * parm, int ofs, const_bstring entry), void * parm); +extern int bssplitstrcb (struct bStream * s, const_bstring splitStr, + int (* cb) (void * parm, int ofs, const_bstring entry), void * parm); +extern int bseof (const struct bStream * s); + +struct tagbstring { + int mlen; + int slen; + unsigned char * data; +}; + +/* Accessor macros */ +#define blengthe(b, e) (((b) == (void *)0 || (b)->slen < 0) ? (int)(e) : ((b)->slen)) +#define blength(b) (blengthe ((b), 0)) +#define bdataofse(b, o, e) (((b) == (void *)0 || (b)->data == (void*)0) ? (char *)(e) : ((char *)(b)->data) + (o)) +#define bdataofs(b, o) (bdataofse ((b), (o), (void *)0)) +#define bdatae(b, e) (bdataofse (b, 0, e)) +#define bdata(b) (bdataofs (b, 0)) +#define bchare(b, p, e) ((((unsigned)(p)) < (unsigned)blength(b)) ? ((b)->data[(p)]) : (e)) +#define bchar(b, p) bchare ((b), (p), '\0') + +/* Static constant string initialization macro */ +#define bsStaticMlen(q,m) {(m), (int) sizeof(q)-1, (unsigned char *) ("" q "")} +#if defined(_MSC_VER) +/* There are many versions of MSVC which emit __LINE__ as a non-constant. */ +# define bsStatic(q) bsStaticMlen(q,-32) +#endif +#ifndef bsStatic +# define bsStatic(q) bsStaticMlen(q,-__LINE__) +#endif + +/* Static constant block parameter pair */ +#define bsStaticBlkParms(q) ((void *)("" q "")), ((int) sizeof(q)-1) + +/* Reference building macros */ +#define cstr2tbstr btfromcstr +#define btfromcstr(t,s) { \ + (t).data = (unsigned char *) (s); \ + (t).slen = ((t).data) ? ((int) (strlen) ((char *)(t).data)) : 0; \ + (t).mlen = -1; \ +} +#define blk2tbstr(t,s,l) { \ + (t).data = (unsigned char *) (s); \ + (t).slen = l; \ + (t).mlen = -1; \ +} +#define btfromblk(t,s,l) blk2tbstr(t,s,l) +#define bmid2tbstr(t,b,p,l) { \ + const_bstring bstrtmp_s = (b); \ + if (bstrtmp_s && bstrtmp_s->data && bstrtmp_s->slen >= 0) { \ + int bstrtmp_left = (p); \ + int bstrtmp_len = (l); \ + if (bstrtmp_left < 0) { \ + bstrtmp_len += bstrtmp_left; \ + bstrtmp_left = 0; \ + } \ + if (bstrtmp_len > bstrtmp_s->slen - bstrtmp_left) \ + bstrtmp_len = bstrtmp_s->slen - bstrtmp_left; \ + if (bstrtmp_len <= 0) { \ + (t).data = (unsigned char *)""; \ + (t).slen = 0; \ + } else { \ + (t).data = bstrtmp_s->data + bstrtmp_left; \ + (t).slen = bstrtmp_len; \ + } \ + } else { \ + (t).data = (unsigned char *)""; \ + (t).slen = 0; \ + } \ + (t).mlen = -__LINE__; \ +} +#define btfromblkltrimws(t,s,l) { \ + int bstrtmp_idx = 0, bstrtmp_len = (l); \ + unsigned char * bstrtmp_s = (s); \ + if (bstrtmp_s && bstrtmp_len >= 0) { \ + for (; bstrtmp_idx < bstrtmp_len; bstrtmp_idx++) { \ + if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \ + } \ + } \ + (t).data = bstrtmp_s + bstrtmp_idx; \ + (t).slen = bstrtmp_len - bstrtmp_idx; \ + (t).mlen = -__LINE__; \ +} +#define btfromblkrtrimws(t,s,l) { \ + int bstrtmp_len = (l) - 1; \ + unsigned char * bstrtmp_s = (s); \ + if (bstrtmp_s && bstrtmp_len >= 0) { \ + for (; bstrtmp_len >= 0; bstrtmp_len--) { \ + if (!isspace (bstrtmp_s[bstrtmp_len])) break; \ + } \ + } \ + (t).data = bstrtmp_s; \ + (t).slen = bstrtmp_len + 1; \ + (t).mlen = -__LINE__; \ +} +#define btfromblktrimws(t,s,l) { \ + int bstrtmp_idx = 0, bstrtmp_len = (l) - 1; \ + unsigned char * bstrtmp_s = (s); \ + if (bstrtmp_s && bstrtmp_len >= 0) { \ + for (; bstrtmp_idx <= bstrtmp_len; bstrtmp_idx++) { \ + if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \ + } \ + for (; bstrtmp_len >= bstrtmp_idx; bstrtmp_len--) { \ + if (!isspace (bstrtmp_s[bstrtmp_len])) break; \ + } \ + } \ + (t).data = bstrtmp_s + bstrtmp_idx; \ + (t).slen = bstrtmp_len + 1 - bstrtmp_idx; \ + (t).mlen = -__LINE__; \ +} + +/* Write protection macros */ +#define bwriteprotect(t) { if ((t).mlen >= 0) (t).mlen = -1; } +#define bwriteallow(t) { if ((t).mlen == -1) (t).mlen = (t).slen + ((t).slen == 0); } +#define biswriteprotected(t) ((t).mlen <= 0) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/cbstring/bstrlib.txt b/src/cbstring/bstrlib.txt new file mode 100644 index 0000000..bf48491 --- /dev/null +++ b/src/cbstring/bstrlib.txt @@ -0,0 +1,3202 @@ +Better String library +--------------------- + +by Paul Hsieh + +The bstring library is an attempt to provide improved string processing +functionality to the C and C++ language. At the heart of the bstring library +(Bstrlib for short) is the management of "bstring"s which are a significant +improvement over '\0' terminated char buffers. + +=============================================================================== + +Motivation +---------- + +The standard C string library has serious problems: + + 1) Its use of '\0' to denote the end of the string means knowing a + string's length is O(n) when it could be O(1). + 2) It imposes an interpretation for the character value '\0'. + 3) gets() always exposes the application to a buffer overflow. + 4) strtok() modifies the string its parsing and thus may not be usable in + programs which are re-entrant or multithreaded. + 5) fgets has the unusual semantic of ignoring '\0's that occur before + '\n's are consumed. + 6) There is no memory management, and actions performed such as strcpy, + strcat and sprintf are common places for buffer overflows. + 7) strncpy() doesn't '\0' terminate the destination in some cases. + 8) Passing NULL to C library string functions causes an undefined NULL + pointer access. + 9) Parameter aliasing (overlapping, or self-referencing parameters) + within most C library functions has undefined behavior. + 10) Many C library string function calls take integer parameters with + restricted legal ranges. Parameters passed outside these ranges are + not typically detected and cause undefined behavior. + +So the desire is to create an alternative string library that does not suffer +from the above problems and adds in the following functionality: + + 1) Incorporate string functionality seen from other languages. + a) MID$() - from BASIC + b) split()/join() - from Python + c) string/char x n - from Perl + 2) Implement analogs to functions that combine stream IO and char buffers + without creating a dependency on stream IO functionality. + 3) Implement the basic text editor-style functions insert, delete, find, + and replace. + 4) Implement reference based sub-string access (as a generalization of + pointer arithmetic.) + 5) Implement runtime write protection for strings. + +There is also a desire to avoid "API-bloat". So functionality that can be +implemented trivially in other functionality is omitted. So there is no +left$() or right$() or reverse() or anything like that as part of the core +functionality. + +Explaining Bstrings +------------------- + +A bstring is basically a header which wraps a pointer to a char buffer. Lets +start with the declaration of a struct tagbstring: + + struct tagbstring { + int mlen; + int slen; + unsigned char * data; + }; + +This definition is considered exposed, not opaque (though it is neither +necessary nor recommended that low level maintenance of bstrings be performed +whenever the abstract interfaces are sufficient). The mlen field (usually) +describes a lower bound for the memory allocated for the data field. The +slen field describes the exact length for the bstring. The data field is a +single contiguous buffer of unsigned chars. Note that the existence of a '\0' +character in the unsigned char buffer pointed to by the data field does not +necessarily denote the end of the bstring. + +To be a well formed modifiable bstring the mlen field must be at least the +length of the slen field, and slen must be non-negative. Furthermore, the +data field must point to a valid buffer in which access to the first mlen +characters has been acquired. So the minimal check for correctness is: + + (slen >= 0 && mlen >= slen && data != NULL) + +bstrings returned by bstring functions can be assumed to be either NULL or +satisfy the above property. (When bstrings are only readable, the mlen >= +slen restriction is not required; this is discussed later in this section.) +A bstring itself is just a pointer to a struct tagbstring: + + typedef struct tagbstring * bstring; + +Note that use of the prefix "tag" in struct tagbstring is required to work +around the inconsistency between C and C++'s struct namespace usage. This +definition is also considered exposed. + +Bstrlib basically manages bstrings allocated as a header and an associated +data-buffer. Since the implementation is exposed, they can also be +constructed manually. Functions which mutate bstrings assume that the header +and data buffer have been malloced; the bstring library may perform free() or +realloc() on both the header and data buffer of any bstring parameter. +Functions which return bstring's create new bstrings. The string memory is +freed by a bdestroy() call (or using the bstrFree macro). + +The following related typedef is also provided: + + typedef const struct tagbstring * const_bstring; + +which is also considered exposed. These are directly bstring compatible (no +casting required) but are just used for parameters which are meant to be +non-mutable. So in general, bstring parameters which are read as input but +not meant to be modified will be declared as const_bstring, and bstring +parameters which may be modified will be declared as bstring. This convention +is recommended for user written functions as well. + +Since bstrings maintain interoperability with C library char-buffer style +strings, all functions which modify, update or create bstrings also append a +'\0' character into the position slen + 1. This trailing '\0' character is +not required for bstrings input to the bstring functions; this is provided +solely as a convenience for interoperability with standard C char-buffer +functionality. + +Analogs for the ANSI C string library functions have been created when they +are necessary, but have also been left out when they are not. In particular +there are no functions analogous to fwrite, or puts just for the purposes of +bstring. The ->data member of any string is exposed, and therefore can be +used just as easily as char buffers for C functions which read strings. + +For those that wish to hand construct bstrings, the following should be kept +in mind: + + 1) While bstrlib can accept constructed bstrings without terminating + '\0' characters, the rest of the C language string library will not + function properly on such non-terminated strings. This is obvious + but must be kept in mind. + 2) If it is intended that a constructed bstring be written to by the + bstring library functions then the data portion should be allocated + by the malloc function and the slen and mlen fields should be entered + properly. The struct tagbstring header is not reallocated, and only + freed by bdestroy. + 3) Writing arbitrary '\0' characters at various places in the string + will not modify its length as perceived by the bstring library + functions. In fact, '\0' is a legitimate non-terminating character + for a bstring to contain. + 4) For read only parameters, bstring functions do not check the mlen. + I.e., the minimal correctness requirements are reduced to: + + (slen >= 0 && data != NULL) + +Better pointer arithmetic +------------------------- + +One built-in feature of '\0' terminated char * strings, is that its very easy +and fast to obtain a reference to the tail of any string using pointer +arithmetic. Bstrlib does one better by providing a way to get a reference to +any substring of a bstring (or any other length delimited block of memory.) +So rather than just having pointer arithmetic, with bstrlib one essentially +has segment arithmetic. This is achieved using the macro blk2tbstr() which +builds a reference to a block of memory and the macro bmid2tbstr() which +builds a reference to a segment of a bstring. Bstrlib also includes +functions for direct consumption of memory blocks into bstrings, namely +bcatblk () and blk2bstr (). + +One scenario where this can be extremely useful is when string contains many +substrings which one would like to pass as read-only reference parameters to +some string consuming function without the need to allocate entire new +containers for the string data. More concretely, imagine parsing a command +line string whose parameters are space delimited. This can only be done for +tails of the string with '\0' terminated char * strings. + +Improved NULL semantics and error handling +------------------------------------------ + +Unless otherwise noted, if a NULL pointer is passed as a bstring or any other +detectably illegal parameter, the called function will return with an error +indicator (either NULL or BSTR_ERR) rather than simply performing a NULL +pointer access, or having undefined behavior. + +To illustrate the value of this, consider the following example: + + strcpy (p = malloc (13 * sizeof (char)), "Hello,"); + strcat (p, " World"); + +This is not correct because malloc may return NULL (due to an out of memory +condition), and the behaviour of strcpy is undefined if either of its +parameters are NULL. However: + + bstrcat (p = bfromcstr ("Hello,"), q = bfromcstr (" World")); + bdestroy (q); + +is well defined, because if either p or q are assigned NULL (indicating a +failure to allocate memory) both bstrcat and bdestroy will recognize it and +perform no detrimental action. + +Note that it is not necessary to check any of the members of a returned +bstring for internal correctness (in particular the data member does not need +to be checked against NULL when the header is non-NULL), since this is +assured by the bstring library itself. + +bStreams +-------- + +In addition to the bgets and bread functions, bstrlib can abstract streams +with a high performance read only stream called a bStream. In general, the +idea is to open a core stream (with something like fopen) then pass its +handle as well as a bNread function pointer (like fread) to the bsopen +function which will return a handle to an open bStream. Then the functions +bsread, bsreadln or bsreadlns can be called to read portions of the stream. +Finally, the bsclose function is called to close the bStream -- it will +return a handle to the original (core) stream. So bStreams, essentially, +wrap other streams. + +The bStreams have two main advantages over the bgets and bread (as well as +fgets/ungetc) paradigms: + +1) Improved functionality via the bunread function which allows a stream to + unread characters, giving the bStream stack-like functionality if so + desired. +2) A very high performance bsreadln function. The C library function fgets() + (and the bgets function) can typically be written as a loop on top of + fgetc(), thus paying all of the overhead costs of calling fgetc on a per + character basis. bsreadln will read blocks at a time, thus amortizing the + overhead of fread calls over many characters at once. + +However, clearly bStreams are suboptimal or unusable for certain kinds of +streams (stdin) or certain usage patterns (a few spotty, or non-sequential +reads from a slow stream.) For those situations, using bgets will be more +appropriate. + +The semantics of bStreams allows practical construction of layerable data +streams. What this means is that by writing a bNread compatible function on +top of a bStream, one can construct a new bStream on top of it. This can be +useful for writing multi-pass parsers that don't actually read the entire +input more than once and don't require the use of intermediate storage. + +Aliasing +-------- + +Aliasing occurs when a function is given two parameters which point to data +structures which overlap in the memory they occupy. While this does not +disturb read only functions, for many libraries this can make functions that +write to these memory locations malfunction. This is a common problem of the +C standard library and especially the string functions in the C standard +library. + +The C standard string library is entirely char by char oriented (as is +bstring) which makes conforming implementations alias safe for some +scenarios. However no actual detection of aliasing is typically performed, +so it is easy to find cases where the aliasing will cause anomolous or +undesirable behaviour (consider: strcat (p, p).) The C99 standard includes +the "restrict" pointer modifier which allows the compiler to document and +assume a no-alias condition on usage. However, only the most trivial cases +can be caught (if at all) by the compiler at compile time, and thus there is +no actual enforcement of non-aliasing. + +Bstrlib, by contrast, permits aliasing and is completely aliasing safe, in +the C99 sense of aliasing. That is to say, under the assumption that +pointers of incompatible types from distinct objects can never alias, bstrlib +is completely aliasing safe. (In practice this means that the data buffer +portion of any bstring and header of any bstring are assumed to never alias.) +With the exception of the reference building macros, the library behaves as +if all read-only parameters are first copied and replaced by temporary +non-aliased parameters before any writing to any output bstring is performed +(though actual copying is extremely rarely ever done.) + +Besides being a useful safety feature, bstring searching/comparison +functions can improve to O(1) execution when aliasing is detected. + +Note that aliasing detection and handling code in Bstrlib is generally +extremely cheap. There is almost never any appreciable performance penalty +for using aliased parameters. + +Reenterancy +----------- + +Nearly every function in Bstrlib is a leaf function, and is completely +reenterable with the exception of writing to common bstrings. The split +functions which use a callback mechanism requires only that the source string +not be destroyed by the callback function unless the callback function returns +with an error status (note that Bstrlib functions which return an error do +not modify the string in any way.) The string can in fact be modified by the +callback and the behaviour is deterministic. See the documentation of the +various split functions for more details. + +Undefined scenarios +------------------- + +One of the basic important premises for Bstrlib is to not to increase the +propogation of undefined situations from parameters that are otherwise legal +in of themselves. In particular, except for extremely marginal cases, usages +of bstrings that use the bstring library functions alone cannot lead to any +undefined action. But due to C/C++ language and library limitations, there +is no way to define a non-trivial library that is completely without +undefined operations. All such possible undefined operations are described +below: + +1) bstrings or struct tagbstrings that are not explicitely initialized cannot + be passed as a parameter to any bstring function. +2) The members of the NULL bstring cannot be accessed directly. (Though all + APIs and macros detect the NULL bstring.) +3) A bstring whose data member has not been obtained from a malloc or + compatible call and which is write accessible passed as a writable + parameter will lead to undefined results. (i.e., do not writeAllow any + constructed bstrings unless the data portion has been obtained from the + heap.) +4) If the headers of two strings alias but are not identical (which can only + happen via a defective manual construction), then passing them to a + bstring function in which one is writable is not defined. +5) If the mlen member is larger than the actual accessible length of the data + member for a writable bstring, or if the slen member is larger than the + readable length of the data member for a readable bstring, then the + corresponding bstring operations are undefined. +6) Any bstring definition whose header or accessible data portion has been + assigned to inaccessible or otherwise illegal memory clearly cannot be + acted upon by the bstring library in any way. +7) Destroying the source of an incremental split from within the callback + and not returning with a negative value (indicating that it should abort) + will lead to undefined behaviour. (Though *modifying* or adjusting the + state of the source data, even if those modification fail within the + bstrlib API, has well defined behavior.) +8) Modifying a bstring which is write protected by direct access has + undefined behavior. + +While this may seem like a long list, with the exception of invalid uses of +the writeAllow macro, and source destruction during an iterative split +without an accompanying abort, no usage of the bstring API alone can cause +any undefined scenario to occurr. I.e., the policy of restricting usage of +bstrings to the bstring API can significantly reduce the risk of runtime +errors (in practice it should eliminate them) related to string manipulation +due to undefined action. + +C++ wrapper +----------- + +A C++ wrapper has been created to enable bstring functionality for C++ in the +most natural (for C++ programers) way possible. The mandate for the C++ +wrapper is different from the base C bstring library. Since the C++ language +has far more abstracting capabilities, the CBString structure is considered +fully abstracted -- i.e., hand generated CBStrings are not supported (though +conversion from a struct tagbstring is allowed) and all detectable errors are +manifest as thrown exceptions. + +- The C++ class definitions are all under the namespace Bstrlib. bstrwrap.h + enables this namespace (with a using namespace Bstrlib; directive at the + end) unless the macro BSTRLIB_DONT_ASSUME_NAMESPACE has been defined before + it is included. + +- Erroneous accesses results in an exception being thrown. The exception + parameter is of type "struct CBStringException" which is derived from + std::exception if STL is used. A verbose description of the error message + can be obtained from the what() method. + +- CBString is a C++ structure derived from a struct tagbstring. An address + of a CBString cast to a bstring must not be passed to bdestroy. The bstring + C API has been made C++ safe and can be used directly in a C++ project. + +- It includes constructors which can take a char, '\0' terminated char + buffer, tagbstring, (char, repeat-value), a length delimited buffer or a + CBStringList to initialize it. + +- Concatenation is performed with the + and += operators. Comparisons are + done with the ==, !=, <, >, <= and >= operators. Note that == and != use + the biseq call, while <, >, <= and >= use bstrcmp. + +- CBString's can be directly cast to const character buffers. + +- CBString's can be directly cast to double, float, int or unsigned int so + long as the CBString are decimal representations of those types (otherwise + an exception will be thrown). Converting the other way should be done with + the format(a) method(s). + +- CBString contains the length, character and [] accessor methods. The + character and [] accessors are aliases of each other. If the bounds for + the string are exceeded, an exception is thrown. To avoid the overhead for + this check, first cast the CBString to a (const char *) and use [] to + dereference the array as normal. Note that the character and [] accessor + methods allows both reading and writing of individual characters. + +- The methods: format, formata, find, reversefind, findcaseless, + reversefindcaseless, midstr, insert, insertchrs, replace, findreplace, + findreplacecaseless, remove, findchr, nfindchr, alloc, toupper, tolower, + gets, read are analogous to the functions that can be found in the C API. + +- The caselessEqual and caselessCmp methods are analogous to biseqcaseless + and bstricmp functions respectively. + +- Note that just like the bformat function, the format and formata methods do + not automatically cast CBStrings into char * strings for "%s"-type + substitutions: + + CBString w("world"); + CBString h("Hello"); + CBString hw; + + /* The casts are necessary */ + hw.format ("%s, %s", (const char *)h, (const char *)w); + +- The methods trunc and repeat have been added instead of using pattern. + +- ltrim, rtrim and trim methods have been added. These remove characters + from a given character string set (defaulting to the whitespace characters) + from either the left, right or both ends of the CBString, respectively. + +- The method setsubstr is also analogous in functionality to bsetstr, except + that it cannot be passed NULL. Instead the method fill and the fill-style + constructor have been supplied to enable this functionality. + +- The writeprotect(), writeallow() and iswriteprotected() methods are + analogous to the bwriteprotect(), bwriteallow() and biswriteprotected() + macros in the C API. Write protection semantics in CBString are stronger + than with the C API in that indexed character assignment is checked for + write protection. However, unlike with the C API, a write protected + CBString can be destroyed by the destructor. + +- CBStream is a C++ structure which wraps a struct bStream (its not derived + from it, since destruction is slightly different). It is constructed by + passing in a bNread function pointer and a stream parameter cast to void *. + This structure includes methods for detecting eof, setting the buffer + length, reading the whole stream or reading entries line by line or block + by block, an unread function, and a peek function. + +- If STL is available, the CBStringList structure is derived from a vector of + CBString with various split methods. The split method has been overloaded + to accept either a character or CBString as the second parameter (when the + split parameter is a CBString any character in that CBString is used as a + seperator). The splitstr method takes a CBString as a substring seperator. + Joins can be performed via a CBString constructor which takes a + CBStringList as a parameter, or just using the CBString::join() method. + +- If there is proper support for std::iostreams, then the >> and << operators + and the getline() function have been added (with semantics the same as + those for std::string). + +Multithreading +-------------- + +A mutable bstring is kind of analogous to a small (two entry) linked list +allocated by malloc, with all aliasing completely under programmer control. +I.e., manipulation of one bstring will never affect any other distinct +bstring unless explicitely constructed to do so by the programmer via hand +construction or via building a reference. Bstrlib also does not use any +static or global storage, so there are no hidden unremovable race conditions. +Bstrings are also clearly not inherently thread local. So just like +char *'s, bstrings can be passed around from thread to thread and shared and +so on, so long as modifications to a bstring correspond to some kind of +exclusive access lock as should be expected (or if the bstring is read-only, +which can be enforced by bstring write protection) for any sort of shared +object in a multithreaded environment. + +Bsafe module +------------ + +For convenience, a bsafe module has been included. The idea is that if this +module is included, inadvertant usage of the most dangerous C functions will +be overridden and lead to an immediate run time abort. Of course, it should +be emphasized that usage of this module is completely optional. The +intention is essentially to provide an option for creating project safety +rules which can be enforced mechanically rather than socially. This is +useful for larger, or open development projects where its more difficult to +enforce social rules or "coding conventions". + +Problems not solved +------------------- + +Bstrlib is written for the C and C++ languages, which have inherent weaknesses +that cannot be easily solved: + +1. Memory leaks: Forgetting to call bdestroy on a bstring that is about to be + unreferenced, just as forgetting to call free on a heap buffer that is + about to be dereferenced. Though bstrlib itself is leak free. +2. Read before write usage: In C, declaring an auto bstring does not + automatically fill it with legal/valid contents. This problem has been + somewhat mitigated in C++. (The bstrDeclare and bstrFree macros from + bstraux can be used to help mitigate this problem.) + +Other problems not addressed: + +3. Built-in mutex usage to automatically avoid all bstring internal race + conditions in multitasking environments: The problem with trying to + implement such things at this low a level is that it is typically more + efficient to use locks in higher level primitives. There is also no + platform independent way to implement locks or mutexes. +4. Unicode/widecharacter support. + +Note that except for spotty support of wide characters, the default C +standard library does not address any of these problems either. + +Configurable compilation options +-------------------------------- + +All configuration options are meant solely for the purpose of compiler +compatibility. Configuration options are not meant to change the semantics +or capabilities of the library, except where it is unavoidable. + +Since some C++ compilers don't include the Standard Template Library and some +have the options of disabling exception handling, a number of macros can be +used to conditionally compile support for each of this: + +BSTRLIB_CAN_USE_STL + + - defining this will enable the used of the Standard Template Library. + Defining BSTRLIB_CAN_USE_STL overrides the BSTRLIB_CANNOT_USE_STL macro. + +BSTRLIB_CANNOT_USE_STL + + - defining this will disable the use of the Standard Template Library. + Defining BSTRLIB_CAN_USE_STL overrides the BSTRLIB_CANNOT_USE_STL macro. + +BSTRLIB_CAN_USE_IOSTREAM + + - defining this will enable the used of streams from class std. Defining + BSTRLIB_CAN_USE_IOSTREAM overrides the BSTRLIB_CANNOT_USE_IOSTREAM macro. + +BSTRLIB_CANNOT_USE_IOSTREAM + + - defining this will disable the use of streams from class std. Defining + BSTRLIB_CAN_USE_IOSTREAM overrides the BSTRLIB_CANNOT_USE_IOSTREAM macro. + +BSTRLIB_THROWS_EXCEPTIONS + + - defining this will enable the exception handling within bstring. + Defining BSTRLIB_THROWS_EXCEPTIONS overrides the + BSTRLIB_DOESNT_THROWS_EXCEPTIONS macro. + +BSTRLIB_DOESNT_THROW_EXCEPTIONS + + - defining this will disable the exception handling within bstring. + Defining BSTRLIB_THROWS_EXCEPTIONS overrides the + BSTRLIB_DOESNT_THROW_EXCEPTIONS macro. + +Note that these macros must be defined consistently throughout all modules +that use CBStrings including bstrwrap.cpp. + +Some older C compilers do not support functions such as vsnprintf. This is +handled by the following macro variables: + +BSTRLIB_NOVSNP + + - defining this indicates that the compiler does not support vsnprintf. + This will cause bformat and bformata to not be declared. Note that + for some compilers, such as Turbo C, this is set automatically. + Defining BSTRLIB_NOVSNP overrides the BSTRLIB_VSNP_OK macro. + +BSTRLIB_VSNP_OK + + - defining this will disable the autodetection of compilers the do not + support of compilers that do not support vsnprintf. + Defining BSTRLIB_NOVSNP overrides the BSTRLIB_VSNP_OK macro. + +Semantic compilation options +---------------------------- + +Bstrlib comes with very few compilation options for changing the semantics of +of the library. These are described below. + +BSTRLIB_DONT_ASSUME_NAMESPACE + + - Defining this before including bstrwrap.h will disable the automatic + enabling of the Bstrlib namespace for the C++ declarations. + +BSTRLIB_DONT_USE_VIRTUAL_DESTRUCTOR + + - Defining this will make the CBString destructor non-virtual. + +BSTRLIB_MEMORY_DEBUG + + - Defining this will cause the bstrlib modules bstrlib.c and bstrwrap.cpp + to invoke a #include "memdbg.h". memdbg.h has to be supplied by the user. + +Note that these macros must be defined consistently throughout all modules +that use bstrings or CBStrings including bstrlib.c, bstraux.c and +bstrwrap.cpp. + +=============================================================================== + +Files +----- + +bstrlib.c - C implementaion of bstring functions. +bstrlib.h - C header file for bstring functions. +bstraux.c - C example that implements trivial additional functions. +bstraux.h - C header for bstraux.c +bstest.c - C unit/regression test for bstrlib.c + +bstrwrap.cpp - C++ implementation of CBString. +bstrwrap.h - C++ header file for CBString. +test.cpp - C++ unit/regression test for bstrwrap.cpp + +bsafe.c - C runtime stubs to abort usage of unsafe C functions. +bsafe.h - C header file for bsafe.c functions. + +C projects need only include bstrlib.h and compile/link bstrlib.c to use the +bstring library. C++ projects need to additionally include bstrwrap.h and +compile/link bstrwrap.cpp. For both, there may be a need to make choices +about feature configuration as described in the "Configurable compilation +options" in the section above. + +Other files that are included in this archive are: + +license.txt - The 3 clause BSD license for Bstrlib +gpl.txt - The GPL version 2 +security.txt - A security statement useful for auditting Bstrlib +porting.txt - A guide to porting Bstrlib +bstrlib.txt - This file + +=============================================================================== + +The functions +------------- + + extern bstring bfromcstr (const char * str); + + Take a standard C library style '\0' terminated char buffer and generate + a bstring with the same contents as the char buffer. If an error occurs + NULL is returned. + + So for example: + + bstring b = bfromcstr ("Hello"); + if (!b) { + fprintf (stderr, "Out of memory"); + } else { + puts ((char *) b->data); + } + + .......................................................................... + + extern bstring bfromcstralloc (int mlen, const char * str); + + Create a bstring which contains the contents of the '\0' terminated + char * buffer str. The memory buffer backing the bstring is at least + mlen characters in length. If an error occurs NULL is returned. + + So for example: + + bstring b = bfromcstralloc (64, someCstr); + if (b) b->data[63] = 'x'; + + The idea is that this will set the 64th character of b to 'x' if it is at + least 64 characters long otherwise do nothing. And we know this is well + defined so long as b was successfully created, since it will have been + allocated with at least 64 characters. + + .......................................................................... + + extern bstring blk2bstr (const void * blk, int len); + + Create a bstring whose contents are described by the contiguous buffer + pointing to by blk with a length of len bytes. Note that this function + creates a copy of the data in blk, rather than simply referencing it. + Compare with the blk2tbstr macro. If an error occurs NULL is returned. + + .......................................................................... + + extern char * bstr2cstr (const_bstring s, char z); + + Create a '\0' terminated char buffer which contains the contents of the + bstring s, except that any contained '\0' characters are converted to the + character in z. This returned value should be freed with bcstrfree(), by + the caller. If an error occurs NULL is returned. + + .......................................................................... + + extern int bcstrfree (char * s); + + Frees a C-string generated by bstr2cstr (). This is normally unnecessary + since it just wraps a call to free (), however, if malloc () and free () + have been redefined as a macros within the bstrlib module (via macros in + the memdbg.h backdoor) with some difference in behaviour from the std + library functions, then this allows a correct way of freeing the memory + that allows higher level code to be independent from these macro + redefinitions. + + .......................................................................... + + extern bstring bstrcpy (const_bstring b1); + + Make a copy of the passed in bstring. The copied bstring is returned if + there is no error, otherwise NULL is returned. + + .......................................................................... + + extern int bassign (bstring a, const_bstring b); + + Overwrite the bstring a with the contents of bstring b. Note that the + bstring a must be a well defined and writable bstring. If an error + occurs BSTR_ERR is returned and a is not overwritten. + + .......................................................................... + + int bassigncstr (bstring a, const char * str); + + Overwrite the string a with the contents of char * string str. Note that + the bstring a must be a well defined and writable bstring. If an error + occurs BSTR_ERR is returned and a may be partially overwritten. + + .......................................................................... + + int bassignblk (bstring a, const void * s, int len); + + Overwrite the string a with the contents of the block (s, len). Note that + the bstring a must be a well defined and writable bstring. If an error + occurs BSTR_ERR is returned and a is not overwritten. + + .......................................................................... + + extern int bassignmidstr (bstring a, const_bstring b, int left, int len); + + Overwrite the bstring a with the middle of contents of bstring b + starting from position left and running for a length len. left and + len are clamped to the ends of b as with the function bmidstr. Note that + the bstring a must be a well defined and writable bstring. If an error + occurs BSTR_ERR is returned and a is not overwritten. + + .......................................................................... + + extern bstring bmidstr (const_bstring b, int left, int len); + + Create a bstring which is the substring of b starting from position left + and running for a length len (clamped by the end of the bstring b.) If + there was no error, the value of this constructed bstring is returned + otherwise NULL is returned. + + .......................................................................... + + extern int bdelete (bstring s1, int pos, int len); + + Removes characters from pos to pos+len-1 and shifts the tail of the + bstring starting from pos+len to pos. len must be positive for this call + to have any effect. The section of the bstring described by (pos, len) + is clamped to boundaries of the bstring b. The value BSTR_OK is returned + if the operation is successful, otherwise BSTR_ERR is returned. + + .......................................................................... + + extern int bconcat (bstring b0, const_bstring b1); + + Concatenate the bstring b1 to the end of bstring b0. The value BSTR_OK + is returned if the operation is successful, otherwise BSTR_ERR is + returned. + + .......................................................................... + + extern int bconchar (bstring b, char c); + + Concatenate the character c to the end of bstring b. The value BSTR_OK + is returned if the operation is successful, otherwise BSTR_ERR is + returned. + + .......................................................................... + + extern int bcatcstr (bstring b, const char * s); + + Concatenate the char * string s to the end of bstring b. The value + BSTR_OK is returned if the operation is successful, otherwise BSTR_ERR is + returned. + + .......................................................................... + + extern int bcatblk (bstring b, const void * s, int len); + + Concatenate a fixed length buffer (s, len) to the end of bstring b. The + value BSTR_OK is returned if the operation is successful, otherwise + BSTR_ERR is returned. + + .......................................................................... + + extern int biseq (const_bstring b0, const_bstring b1); + + Compare the bstring b0 and b1 for equality. If the bstrings differ, 0 + is returned, if the bstrings are the same, 1 is returned, if there is an + error, -1 is returned. If the length of the bstrings are different, this + function has O(1) complexity. Contained '\0' characters are not treated + as a termination character. + + Note that the semantics of biseq are not completely compatible with + bstrcmp because of its different treatment of the '\0' character. + + .......................................................................... + + extern int bisstemeqblk (const_bstring b, const void * blk, int len); + + Compare beginning of bstring b0 with a block of memory of length len for + equality. If the beginning of b0 differs from the memory block (or if b0 + is too short), 0 is returned, if the bstrings are the same, 1 is returned, + if there is an error, -1 is returned. + + .......................................................................... + + extern int biseqcaseless (const_bstring b0, const_bstring b1); + + Compare two bstrings for equality without differentiating between case. + If the bstrings differ other than in case, 0 is returned, if the bstrings + are the same, 1 is returned, if there is an error, -1 is returned. If + the length of the bstrings are different, this function is O(1). '\0' + termination characters are not treated in any special way. + + .......................................................................... + + extern int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len); + + Compare beginning of bstring b0 with a block of memory of length len + without differentiating between case for equality. If the beginning of b0 + differs from the memory block other than in case (or if b0 is too short), + 0 is returned, if the bstrings are the same, 1 is returned, if there is an + error, -1 is returned. + + .......................................................................... + + extern int biseqcstr (const_bstring b, const char *s); + + Compare the bstring b and char * bstring s. The C string s must be '\0' + terminated at exactly the length of the bstring b, and the contents + between the two must be identical with the bstring b with no '\0' + characters for the two contents to be considered equal. This is + equivalent to the condition that their current contents will be always be + equal when comparing them in the same format after converting one or the + other. If they are equal 1 is returned, if they are unequal 0 is + returned and if there is a detectable error BSTR_ERR is returned. + + .......................................................................... + + extern int biseqcstrcaseless (const_bstring b, const char *s); + + Compare the bstring b and char * string s. The C string s must be '\0' + terminated at exactly the length of the bstring b, and the contents + between the two must be identical except for case with the bstring b with + no '\0' characters for the two contents to be considered equal. This is + equivalent to the condition that their current contents will be always be + equal ignoring case when comparing them in the same format after + converting one or the other. If they are equal, except for case, 1 is + returned, if they are unequal regardless of case 0 is returned and if + there is a detectable error BSTR_ERR is returned. + + .......................................................................... + + extern int bstrcmp (const_bstring b0, const_bstring b1); + + Compare the bstrings b0 and b1 for ordering. If there is an error, + SHRT_MIN is returned, otherwise a value less than or greater than zero, + indicating that the bstring pointed to by b0 is lexicographically less + than or greater than the bstring pointed to by b1 is returned. If the + bstring lengths are unequal but the characters up until the length of the + shorter are equal then a value less than, or greater than zero, + indicating that the bstring pointed to by b0 is shorter or longer than the + bstring pointed to by b1 is returned. 0 is returned if and only if the + two bstrings are the same. If the length of the bstrings are different, + this function is O(n). Like its standard C library counter part, the + comparison does not proceed past any '\0' termination characters + encountered. + + The seemingly odd error return value, merely provides slightly more + granularity than the undefined situation given in the C library function + strcmp. The function otherwise behaves very much like strcmp(). + + Note that the semantics of bstrcmp are not completely compatible with + biseq because of its different treatment of the '\0' termination + character. + + .......................................................................... + + extern int bstrncmp (const_bstring b0, const_bstring b1, int n); + + Compare the bstrings b0 and b1 for ordering for at most n characters. If + there is an error, SHRT_MIN is returned, otherwise a value is returned as + if b0 and b1 were first truncated to at most n characters then bstrcmp + was called with these new bstrings are paremeters. If the length of the + bstrings are different, this function is O(n). Like its standard C + library counter part, the comparison does not proceed past any '\0' + termination characters encountered. + + The seemingly odd error return value, merely provides slightly more + granularity than the undefined situation given in the C library function + strncmp. The function otherwise behaves very much like strncmp(). + + .......................................................................... + + extern int bstricmp (const_bstring b0, const_bstring b1); + + Compare two bstrings without differentiating between case. The return + value is the difference of the values of the characters where the two + bstrings first differ, otherwise 0 is returned indicating that the + bstrings are equal. If the lengths are different, then a difference from + 0 is given, but if the first extra character is '\0', then it is taken to + be the value UCHAR_MAX+1. + + .......................................................................... + + extern int bstrnicmp (const_bstring b0, const_bstring b1, int n); + + Compare two bstrings without differentiating between case for at most n + characters. If the position where the two bstrings first differ is + before the nth position, the return value is the difference of the values + of the characters, otherwise 0 is returned. If the lengths are different + and less than n characters, then a difference from 0 is given, but if the + first extra character is '\0', then it is taken to be the value + UCHAR_MAX+1. + + .......................................................................... + + extern int bdestroy (bstring b); + + Deallocate the bstring passed. Passing NULL in as a parameter will have + no effect. Note that both the header and the data portion of the bstring + will be freed. No other bstring function which modifies one of its + parameters will free or reallocate the header. Because of this, in + general, bdestroy cannot be called on any declared struct tagbstring even + if it is not write protected. A bstring which is write protected cannot + be destroyed via the bdestroy call. Any attempt to do so will result in + no action taken, and BSTR_ERR will be returned. + + Note to C++ users: Passing in a CBString cast to a bstring will lead to + undefined behavior (free will be called on the header, rather than the + CBString destructor.) Instead just use the ordinary C++ language + facilities to dealloc a CBString. + + .......................................................................... + + extern int binstr (const_bstring s1, int pos, const_bstring s2); + + Search for the bstring s2 in s1 starting at position pos and looking in a + forward (increasing) direction. If it is found then it returns with the + first position after pos where it is found, otherwise it returns BSTR_ERR. + The algorithm used is brute force; O(m*n). + + .......................................................................... + + extern int binstrr (const_bstring s1, int pos, const_bstring s2); + + Search for the bstring s2 in s1 starting at position pos and looking in a + backward (decreasing) direction. If it is found then it returns with the + first position after pos where it is found, otherwise return BSTR_ERR. + Note that the current position at pos is tested as well -- so to be + disjoint from a previous forward search it is recommended that the + position be backed up (decremented) by one position. The algorithm used + is brute force; O(m*n). + + .......................................................................... + + extern int binstrcaseless (const_bstring s1, int pos, const_bstring s2); + + Search for the bstring s2 in s1 starting at position pos and looking in a + forward (increasing) direction but without regard to case. If it is + found then it returns with the first position after pos where it is + found, otherwise it returns BSTR_ERR. The algorithm used is brute force; + O(m*n). + + .......................................................................... + + extern int binstrrcaseless (const_bstring s1, int pos, const_bstring s2); + + Search for the bstring s2 in s1 starting at position pos and looking in a + backward (decreasing) direction but without regard to case. If it is + found then it returns with the first position after pos where it is + found, otherwise return BSTR_ERR. Note that the current position at pos + is tested as well -- so to be disjoint from a previous forward search it + is recommended that the position be backed up (decremented) by one + position. The algorithm used is brute force; O(m*n). + + .......................................................................... + + extern int binchr (const_bstring b0, int pos, const_bstring b1); + + Search for the first position in b0 starting from pos or after, in which + one of the characters in b1 is found. This function has an execution + time of O(b0->slen + b1->slen). If such a position does not exist in b0, + then BSTR_ERR is returned. + + .......................................................................... + + extern int binchrr (const_bstring b0, int pos, const_bstring b1); + + Search for the last position in b0 no greater than pos, in which one of + the characters in b1 is found. This function has an execution time + of O(b0->slen + b1->slen). If such a position does not exist in b0, + then BSTR_ERR is returned. + + .......................................................................... + + extern int bninchr (const_bstring b0, int pos, const_bstring b1); + + Search for the first position in b0 starting from pos or after, in which + none of the characters in b1 is found and return it. This function has + an execution time of O(b0->slen + b1->slen). If such a position does + not exist in b0, then BSTR_ERR is returned. + + .......................................................................... + + extern int bninchrr (const_bstring b0, int pos, const_bstring b1); + + Search for the last position in b0 no greater than pos, in which none of + the characters in b1 is found and return it. This function has an + execution time of O(b0->slen + b1->slen). If such a position does not + exist in b0, then BSTR_ERR is returned. + + .......................................................................... + + extern int bstrchr (const_bstring b, int c); + + Search for the character c in the bstring b forwards from the start of + the bstring. Returns the position of the found character or BSTR_ERR if + it is not found. + + NOTE: This has been implemented as a macro on top of bstrchrp (). + + .......................................................................... + + extern int bstrrchr (const_bstring b, int c); + + Search for the character c in the bstring b backwards from the end of the + bstring. Returns the position of the found character or BSTR_ERR if it is + not found. + + NOTE: This has been implemented as a macro on top of bstrrchrp (). + + .......................................................................... + + extern int bstrchrp (const_bstring b, int c, int pos); + + Search for the character c in b forwards from the position pos + (inclusive). Returns the position of the found character or BSTR_ERR if + it is not found. + + .......................................................................... + + extern int bstrrchrp (const_bstring b, int c, int pos); + + Search for the character c in b backwards from the position pos in bstring + (inclusive). Returns the position of the found character or BSTR_ERR if + it is not found. + + .......................................................................... + + extern int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill); + + Overwrite the bstring b0 starting at position pos with the bstring b1. If + the position pos is past the end of b0, then the character "fill" is + appended as necessary to make up the gap between the end of b0 and pos. + If b1 is NULL, it behaves as if it were a 0-length bstring. The value + BSTR_OK is returned if the operation is successful, otherwise BSTR_ERR is + returned. + + .......................................................................... + + extern int binsert (bstring s1, int pos, const_bstring s2, unsigned char fill); + + Inserts the bstring s2 into s1 at position pos. If the position pos is + past the end of s1, then the character "fill" is appended as necessary to + make up the gap between the end of s1 and pos. The value BSTR_OK is + returned if the operation is successful, otherwise BSTR_ERR is returned. + + .......................................................................... + + extern int binsertch (bstring s1, int pos, int len, unsigned char fill); + + Inserts the character fill repeatedly into s1 at position pos for a + length len. If the position pos is past the end of s1, then the + character "fill" is appended as necessary to make up the gap between the + end of s1 and the position pos + len (exclusive). The value BSTR_OK is + returned if the operation is successful, otherwise BSTR_ERR is returned. + + .......................................................................... + + extern int breplace (bstring b1, int pos, int len, const_bstring b2, + unsigned char fill); + + Replace a section of a bstring from pos for a length len with the bstring + b2. If the position pos is past the end of b1 then the character "fill" + is appended as necessary to make up the gap between the end of b1 and + pos. + + .......................................................................... + + extern int bfindreplace (bstring b, const_bstring find, + const_bstring replace, int position); + + Replace all occurrences of the find substring with a replace bstring + after a given position in the bstring b. The find bstring must have a + length > 0 otherwise BSTR_ERR is returned. This function does not + perform recursive per character replacement; that is to say successive + searches resume at the position after the last replace. + + So for example: + + bfindreplace (a0 = bfromcstr("aabaAb"), a1 = bfromcstr("a"), + a2 = bfromcstr("aa"), 0); + + Should result in changing a0 to "aaaabaaAb". + + This function performs exactly (b->slen - position) bstring comparisons, + and data movement is bounded above by character volume equivalent to size + of the output bstring. + + .......................................................................... + + extern int bfindreplacecaseless (bstring b, const_bstring find, + const_bstring replace, int position); + + Replace all occurrences of the find substring, ignoring case, with a + replace bstring after a given position in the bstring b. The find bstring + must have a length > 0 otherwise BSTR_ERR is returned. This function + does not perform recursive per character replacement; that is to say + successive searches resume at the position after the last replace. + + So for example: + + bfindreplacecaseless (a0 = bfromcstr("AAbaAb"), a1 = bfromcstr("a"), + a2 = bfromcstr("aa"), 0); + + Should result in changing a0 to "aaaabaaaab". + + This function performs exactly (b->slen - position) bstring comparisons, + and data movement is bounded above by character volume equivalent to size + of the output bstring. + + .......................................................................... + + extern int balloc (bstring b, int length); + + Increase the allocated memory backing the data buffer for the bstring b + to a length of at least length. If the memory backing the bstring b is + already large enough, not action is performed. This has no effect on the + bstring b that is visible to the bstring API. Usually this function will + only be used when a minimum buffer size is required coupled with a direct + access to the ->data member of the bstring structure. + + Be warned that like any other bstring function, the bstring must be well + defined upon entry to this function. I.e., doing something like: + + b->slen *= 2; /* ?? Most likely incorrect */ + balloc (b, b->slen); + + is invalid, and should be implemented as: + + int t; + if (BSTR_OK == balloc (b, t = (b->slen * 2))) b->slen = t; + + This function will return with BSTR_ERR if b is not detected as a valid + bstring or length is not greater than 0, otherwise BSTR_OK is returned. + + .......................................................................... + + extern int ballocmin (bstring b, int length); + + Change the amount of memory backing the bstring b to at least length. + This operation will never truncate the bstring data including the + extra terminating '\0' and thus will not decrease the length to less than + b->slen + 1. Note that repeated use of this function may cause + performance problems (realloc may be called on the bstring more than + the O(log(INT_MAX)) times). This function will return with BSTR_ERR if b + is not detected as a valid bstring or length is not greater than 0, + otherwise BSTR_OK is returned. + + So for example: + + if (BSTR_OK == ballocmin (b, 64)) b->data[63] = 'x'; + + The idea is that this will set the 64th character of b to 'x' if it is at + least 64 characters long otherwise do nothing. And we know this is well + defined so long as the ballocmin call was successfully, since it will + ensure that b has been allocated with at least 64 characters. + + .......................................................................... + + int btrunc (bstring b, int n); + + Truncate the bstring to at most n characters. This function will return + with BSTR_ERR if b is not detected as a valid bstring or n is less than + 0, otherwise BSTR_OK is returned. + + .......................................................................... + + extern int bpattern (bstring b, int len); + + Replicate the starting bstring, b, end to end repeatedly until it + surpasses len characters, then chop the result to exactly len characters. + This function operates in-place. This function will return with BSTR_ERR + if b is NULL or of length 0, otherwise BSTR_OK is returned. + + .......................................................................... + + extern int btoupper (bstring b); + + Convert contents of bstring to upper case. This function will return with + BSTR_ERR if b is NULL or of length 0, otherwise BSTR_OK is returned. + + .......................................................................... + + extern int btolower (bstring b); + + Convert contents of bstring to lower case. This function will return with + BSTR_ERR if b is NULL or of length 0, otherwise BSTR_OK is returned. + + .......................................................................... + + extern int bltrimws (bstring b); + + Delete whitespace contiguous from the left end of the bstring. This + function will return with BSTR_ERR if b is NULL or of length 0, otherwise + BSTR_OK is returned. + + .......................................................................... + + extern int brtrimws (bstring b); + + Delete whitespace contiguous from the right end of the bstring. This + function will return with BSTR_ERR if b is NULL or of length 0, otherwise + BSTR_OK is returned. + + .......................................................................... + + extern int btrimws (bstring b); + + Delete whitespace contiguous from both ends of the bstring. This function + will return with BSTR_ERR if b is NULL or of length 0, otherwise BSTR_OK + is returned. + + .......................................................................... + + extern int bstrListCreate (void); + + Create an empty struct bstrList. The struct bstrList output structure is + declared as follows: + + struct bstrList { + int qty, mlen; + bstring * entry; + }; + + The entry field actually is an array with qty number entries. The mlen + record counts the maximum number of bstring's for which there is memory + in the entry record. + + The Bstrlib API does *NOT* include a comprehensive set of functions for + full management of struct bstrList in an abstracted way. The reason for + this is because aliasing semantics of the list are best left to the user + of this function, and performance varies wildly depending on the + assumptions made. For a complete list of bstring data type it is + recommended that the C++ public std::vector be used, since its + semantics are usage are more standard. + + .......................................................................... + + extern int bstrListDestroy (struct bstrList * sl); + + Destroy a struct bstrList structure that was returned by the bsplit + function. Note that this will destroy each bstring in the ->entry array + as well. See bstrListCreate() above for structure of struct bstrList. + + .......................................................................... + + extern int bstrListAlloc (struct bstrList * sl, int msz); + + Ensure that there is memory for at least msz number of entries for the + list. + + .......................................................................... + + extern int bstrListAllocMin (struct bstrList * sl, int msz); + + Try to allocate the minimum amount of memory for the list to include at + least msz entries or sl->qty whichever is greater. + + .......................................................................... + + extern struct bstrList * bsplit (bstring str, unsigned char splitChar); + + Create an array of sequential substrings from str divided by the + character splitChar. Successive occurrences of the splitChar will be + divided by empty bstring entries, following the semantics from the Python + programming language. To reclaim the memory from this output structure, + bstrListDestroy () should be called. See bstrListCreate() above for + structure of struct bstrList. + + .......................................................................... + + extern struct bstrList * bsplits (bstring str, const_bstring splitStr); + + Create an array of sequential substrings from str divided by any + character contained in splitStr. An empty splitStr causes a single entry + bstrList containing a copy of str to be returned. See bstrListCreate() + above for structure of struct bstrList. + + .......................................................................... + + extern struct bstrList * bsplitstr (bstring str, const_bstring splitStr); + + Create an array of sequential substrings from str divided by the entire + substring splitStr. An empty splitStr causes a single entry bstrList + containing a copy of str to be returned. See bstrListCreate() above for + structure of struct bstrList. + + .......................................................................... + + extern bstring bjoin (const struct bstrList * bl, const_bstring sep); + + Join the entries of a bstrList into one bstring by sequentially + concatenating them with the sep bstring in between. If sep is NULL, it + is treated as if it were the empty bstring. Note that: + + bjoin (l = bsplit (b, s->data[0]), s); + + should result in a copy of b, if s->slen is 1. If there is an error NULL + is returned, otherwise a bstring with the correct result is returned. + See bstrListCreate() above for structure of struct bstrList. + + .......................................................................... + + extern int bsplitcb (const_bstring str, unsigned char splitChar, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm); + + Iterate the set of disjoint sequential substrings over str starting at + position pos divided by the character splitChar. The parm passed to + bsplitcb is passed on to cb. If the function cb returns a value < 0, + then further iterating is halted and this value is returned by bsplitcb. + + Note: Non-destructive modification of str from within the cb function + while performing this split is not undefined. bsplitcb behaves in + sequential lock step with calls to cb. I.e., after returning from a cb + that return a non-negative integer, bsplitcb continues from the position + 1 character after the last detected split character and it will halt + immediately if the length of str falls below this point. However, if the + cb function destroys str, then it *must* return with a negative value, + otherwise bsplitcb will continue in an undefined manner. + + This function is provided as an incremental alternative to bsplit that is + abortable and which does not impose additional memory allocation. + + .......................................................................... + + extern int bsplitscb (const_bstring str, const_bstring splitStr, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm); + + Iterate the set of disjoint sequential substrings over str starting at + position pos divided by any of the characters in splitStr. An empty + splitStr causes the whole str to be iterated once. The parm passed to + bsplitcb is passed on to cb. If the function cb returns a value < 0, + then further iterating is halted and this value is returned by bsplitcb. + + Note: Non-destructive modification of str from within the cb function + while performing this split is not undefined. bsplitscb behaves in + sequential lock step with calls to cb. I.e., after returning from a cb + that return a non-negative integer, bsplitscb continues from the position + 1 character after the last detected split character and it will halt + immediately if the length of str falls below this point. However, if the + cb function destroys str, then it *must* return with a negative value, + otherwise bsplitscb will continue in an undefined manner. + + This function is provided as an incremental alternative to bsplits that + is abortable and which does not impose additional memory allocation. + + .......................................................................... + + extern int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm); + + Iterate the set of disjoint sequential substrings over str starting at + position pos divided by the entire substring splitStr. An empty splitStr + causes each character of str to be iterated. The parm passed to bsplitcb + is passed on to cb. If the function cb returns a value < 0, then further + iterating is halted and this value is returned by bsplitcb. + + Note: Non-destructive modification of str from within the cb function + while performing this split is not undefined. bsplitstrcb behaves in + sequential lock step with calls to cb. I.e., after returning from a cb + that return a non-negative integer, bsplitstrcb continues from the position + 1 character after the last detected split character and it will halt + immediately if the length of str falls below this point. However, if the + cb function destroys str, then it *must* return with a negative value, + otherwise bsplitscb will continue in an undefined manner. + + This function is provided as an incremental alternative to bsplitstr that + is abortable and which does not impose additional memory allocation. + + .......................................................................... + + extern bstring bformat (const char * fmt, ...); + + Takes the same parameters as printf (), but rather than outputting + results to stdio, it forms a bstring which contains what would have been + output. Note that if there is an early generation of a '\0' character, + the bstring will be truncated to this end point. + + Note that %s format tokens correspond to '\0' terminated char * buffers, + not bstrings. To print a bstring, first dereference data element of the + the bstring: + + /* b1->data needs to be '\0' terminated, so tagbstrings generated + by blk2tbstr () might not be suitable. */ + b0 = bformat ("Hello, %s", b1->data); + + Note that if the BSTRLIB_NOVSNP macro has been set when bstrlib has been + compiled the bformat function is not present. + + .......................................................................... + + extern int bformata (bstring b, const char * fmt, ...); + + In addition to the initial output buffer b, bformata takes the same + parameters as printf (), but rather than outputting results to stdio, it + appends the results to the initial bstring parameter. Note that if + there is an early generation of a '\0' character, the bstring will be + truncated to this end point. + + Note that %s format tokens correspond to '\0' terminated char * buffers, + not bstrings. To print a bstring, first dereference data element of the + the bstring: + + /* b1->data needs to be '\0' terminated, so tagbstrings generated + by blk2tbstr () might not be suitable. */ + bformata (b0 = bfromcstr ("Hello"), ", %s", b1->data); + + Note that if the BSTRLIB_NOVSNP macro has been set when bstrlib has been + compiled the bformata function is not present. + + .......................................................................... + + extern int bassignformat (bstring b, const char * fmt, ...); + + After the first parameter, it takes the same parameters as printf (), but + rather than outputting results to stdio, it outputs the results to + the bstring parameter b. Note that if there is an early generation of a + '\0' character, the bstring will be truncated to this end point. + + Note that %s format tokens correspond to '\0' terminated char * buffers, + not bstrings. To print a bstring, first dereference data element of the + the bstring: + + /* b1->data needs to be '\0' terminated, so tagbstrings generated + by blk2tbstr () might not be suitable. */ + bassignformat (b0 = bfromcstr ("Hello"), ", %s", b1->data); + + Note that if the BSTRLIB_NOVSNP macro has been set when bstrlib has been + compiled the bassignformat function is not present. + + .......................................................................... + + extern int bvcformata (bstring b, int count, const char * fmt, va_list arglist); + + The bvcformata function formats data under control of the format control + string fmt and attempts to append the result to b. The fmt parameter is + the same as that of the printf function. The variable argument list is + replaced with arglist, which has been initialized by the va_start macro. + The size of the output is upper bounded by count. If the required output + exceeds count, the string b is not augmented with any contents and a value + below BSTR_ERR is returned. If a value below -count is returned then it + is recommended that the negative of this value be used as an update to the + count in a subsequent pass. On other errors, such as running out of + memory, parameter errors or numeric wrap around BSTR_ERR is returned. + BSTR_OK is returned when the output is successfully generated and + appended to b. + + Note: There is no sanity checking of arglist, and this function is + destructive of the contents of b from the b->slen point onward. If there + is an early generation of a '\0' character, the bstring will be truncated + to this end point. + + Although this function is part of the external API for Bstrlib, the + interface and semantics (length limitations, and unusual return codes) + are fairly atypical. The real purpose for this function is to provide an + engine for the bvformata macro. + + Note that if the BSTRLIB_NOVSNP macro has been set when bstrlib has been + compiled the bvcformata function is not present. + + .......................................................................... + + extern bstring bread (bNread readPtr, void * parm); + typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, + void *parm); + + Read an entire stream into a bstring, verbatum. The readPtr function + pointer is compatible with fread sematics, except that it need not obtain + the stream data from a file. The intention is that parm would contain + the stream data context/state required (similar to the role of the FILE* + I/O stream parameter of fread.) + + Abstracting the block read function allows for block devices other than + file streams to be read if desired. Note that there is an ANSI + compatibility issue if "fread" is used directly; see the ANSI issues + section below. + + .......................................................................... + + extern int breada (bstring b, bNread readPtr, void * parm); + + Read an entire stream and append it to a bstring, verbatum. Behaves + like bread, except that it appends it results to the bstring b. + BSTR_ERR is returned on error, otherwise 0 is returned. + + .......................................................................... + + extern bstring bgets (bNgetc getcPtr, void * parm, char terminator); + typedef int (* bNgetc) (void * parm); + + Read a bstring from a stream. As many bytes as is necessary are read + until the terminator is consumed or no more characters are available from + the stream. If read from the stream, the terminator character will be + appended to the end of the returned bstring. The getcPtr function must + have the same semantics as the fgetc C library function (i.e., returning + an integer whose value is negative when there are no more characters + available, otherwise the value of the next available unsigned character + from the stream.) The intention is that parm would contain the stream + data context/state required (similar to the role of the FILE* I/O stream + parameter of fgets.) If no characters are read, or there is some other + detectable error, NULL is returned. + + bgets will never call the getcPtr function more often than necessary to + construct its output (including a single call, if required, to determine + that the stream contains no more characters.) + + Abstracting the character stream function and terminator character allows + for different stream devices and string formats other than '\n' + terminated lines in a file if desired (consider \032 terminated email + messages, in a UNIX mailbox for example.) + + For files, this function can be used analogously as fgets as follows: + + fp = fopen ( ... ); + if (fp) b = bgets ((bNgetc) fgetc, fp, '\n'); + + (Note that only one terminator character can be used, and that '\0' is + not assumed to terminate the stream in addition to the terminator + character. This is consistent with the semantics of fgets.) + + .......................................................................... + + extern int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator); + + Read from a stream and concatenate to a bstring. Behaves like bgets, + except that it appends it results to the bstring b. The value 1 is + returned if no characters are read before a negative result is returned + from getcPtr. Otherwise BSTR_ERR is returned on error, and 0 is returned + in other normal cases. + + .......................................................................... + + extern int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator); + + Read from a stream and concatenate to a bstring. Behaves like bgets, + except that it assigns the results to the bstring b. The value 1 is + returned if no characters are read before a negative result is returned + from getcPtr. Otherwise BSTR_ERR is returned on error, and 0 is returned + in other normal cases. + + .......................................................................... + + extern struct bStream * bsopen (bNread readPtr, void * parm); + + Wrap a given open stream (described by a fread compatible function + pointer and stream handle) into an open bStream suitable for the bstring + library streaming functions. + + .......................................................................... + + extern void * bsclose (struct bStream * s); + + Close the bStream, and return the handle to the stream that was + originally used to open the given stream. If s is NULL or detectably + invalid, NULL will be returned. + + .......................................................................... + + extern int bsbufflength (struct bStream * s, int sz); + + Set the length of the buffer used by the bStream. If sz is the macro + BSTR_BS_BUFF_LENGTH_GET (which is 0), the length is not set. If s is + NULL or sz is negative, the function will return with BSTR_ERR, otherwise + this function returns with the previous length. + + .......................................................................... + + extern int bsreadln (bstring r, struct bStream * s, char terminator); + + Read a bstring terminated by the terminator character or the end of the + stream from the bStream (s) and return it into the parameter r. The + matched terminator, if found, appears at the end of the line read. If + the stream has been exhausted of all available data, before any can be + read, BSTR_ERR is returned. This function may read additional characters + into the stream buffer from the core stream that are not returned, but + will be retained for subsequent read operations. When reading from high + speed streams, this function can perform significantly faster than bgets. + + .......................................................................... + + extern int bsreadlna (bstring r, struct bStream * s, char terminator); + + Read a bstring terminated by the terminator character or the end of the + stream from the bStream (s) and concatenate it to the parameter r. The + matched terminator, if found, appears at the end of the line read. If + the stream has been exhausted of all available data, before any can be + read, BSTR_ERR is returned. This function may read additional characters + into the stream buffer from the core stream that are not returned, but + will be retained for subsequent read operations. When reading from high + speed streams, this function can perform significantly faster than bgets. + + .......................................................................... + + extern int bsreadlns (bstring r, struct bStream * s, bstring terminators); + + Read a bstring terminated by any character in the terminators bstring or + the end of the stream from the bStream (s) and return it into the + parameter r. This function may read additional characters from the core + stream that are not returned, but will be retained for subsequent read + operations. + + .......................................................................... + + extern int bsreadlnsa (bstring r, struct bStream * s, bstring terminators); + + Read a bstring terminated by any character in the terminators bstring or + the end of the stream from the bStream (s) and concatenate it to the + parameter r. If the stream has been exhausted of all available data, + before any can be read, BSTR_ERR is returned. This function may read + additional characters from the core stream that are not returned, but + will be retained for subsequent read operations. + + .......................................................................... + + extern int bsread (bstring r, struct bStream * s, int n); + + Read a bstring of length n (or, if it is fewer, as many bytes as is + remaining) from the bStream. This function will read the minimum + required number of additional characters from the core stream. When the + stream is at the end of the file BSTR_ERR is returned, otherwise BSTR_OK + is returned. + + .......................................................................... + + extern int bsreada (bstring r, struct bStream * s, int n); + + Read a bstring of length n (or, if it is fewer, as many bytes as is + remaining) from the bStream and concatenate it to the parameter r. This + function will read the minimum required number of additional characters + from the core stream. When the stream is at the end of the file BSTR_ERR + is returned, otherwise BSTR_OK is returned. + + .......................................................................... + + extern int bsunread (struct bStream * s, const_bstring b); + + Insert a bstring into the bStream at the current position. These + characters will be read prior to those that actually come from the core + stream. + + .......................................................................... + + extern int bspeek (bstring r, const struct bStream * s); + + Return the number of currently buffered characters from the bStream that + will be read prior to reads from the core stream, and append it to the + the parameter r. + + .......................................................................... + + extern int bssplitscb (struct bStream * s, const_bstring splitStr, + int (* cb) (void * parm, int ofs, const_bstring entry), void * parm); + + Iterate the set of disjoint sequential substrings over the stream s + divided by any character from the bstring splitStr. The parm passed to + bssplitscb is passed on to cb. If the function cb returns a value < 0, + then further iterating is halted and this return value is returned by + bssplitscb. + + Note: At the point of calling the cb function, the bStream pointer is + pointed exactly at the position right after having read the split + character. The cb function can act on the stream by causing the bStream + pointer to move, and bssplitscb will continue by starting the next split + at the position of the pointer after the return from cb. + + However, if the cb causes the bStream s to be destroyed then the cb must + return with a negative value, otherwise bssplitscb will continue in an + undefined manner. + + This function is provided as way to incrementally parse through a file + or other generic stream that in total size may otherwise exceed the + practical or desired memory available. As with the other split callback + based functions this is abortable and does not impose additional memory + allocation. + + .......................................................................... + + extern int bssplitstrcb (struct bStream * s, const_bstring splitStr, + int (* cb) (void * parm, int ofs, const_bstring entry), void * parm); + + Iterate the set of disjoint sequential substrings over the stream s + divided by the entire substring splitStr. The parm passed to + bssplitstrcb is passed on to cb. If the function cb returns a + value < 0, then further iterating is halted and this return value is + returned by bssplitstrcb. + + Note: At the point of calling the cb function, the bStream pointer is + pointed exactly at the position right after having read the split + character. The cb function can act on the stream by causing the bStream + pointer to move, and bssplitstrcb will continue by starting the next + split at the position of the pointer after the return from cb. + + However, if the cb causes the bStream s to be destroyed then the cb must + return with a negative value, otherwise bssplitscb will continue in an + undefined manner. + + This function is provided as way to incrementally parse through a file + or other generic stream that in total size may otherwise exceed the + practical or desired memory available. As with the other split callback + based functions this is abortable and does not impose additional memory + allocation. + + .......................................................................... + + extern int bseof (const struct bStream * s); + + Return the defacto "EOF" (end of file) state of a stream (1 if the + bStream is in an EOF state, 0 if not, and BSTR_ERR if stream is closed or + detectably erroneous.) When the readPtr callback returns a value <= 0 + the stream reaches its "EOF" state. Note that bunread with non-empty + content will essentially turn off this state, and the stream will not be + in its "EOF" state so long as its possible to read more data out of it. + + Also note that the semantics of bseof() are slightly different from + something like feof(). I.e., reaching the end of the stream does not + necessarily guarantee that bseof() will return with a value indicating + that this has happened. bseof() will only return indicating that it has + reached the "EOF" and an attempt has been made to read past the end of + the bStream. + +The macros +---------- + + The macros described below are shown in a prototype form indicating their + intended usage. Note that the parameters passed to these macros will be + referenced multiple times. As with all macros, programmer care is + required to guard against unintended side effects. + + int blengthe (const_bstring b, int err); + + Returns the length of the bstring. If the bstring is NULL err is + returned. + + .......................................................................... + + int blength (const_bstring b); + + Returns the length of the bstring. If the bstring is NULL, the length + returned is 0. + + .......................................................................... + + int bchare (const_bstring b, int p, int c); + + Returns the p'th character of the bstring b. If the position p refers to + a position that does not exist in the bstring or the bstring is NULL, + then c is returned. + + .......................................................................... + + char bchar (const_bstring b, int p); + + Returns the p'th character of the bstring b. If the position p refers to + a position that does not exist in the bstring or the bstring is NULL, + then '\0' is returned. + + .......................................................................... + + char * bdatae (bstring b, char * err); + + Returns the char * data portion of the bstring b. If b is NULL, err is + returned. + + .......................................................................... + + char * bdata (bstring b); + + Returns the char * data portion of the bstring b. If b is NULL, NULL is + returned. + + .......................................................................... + + char * bdataofse (bstring b, int ofs, char * err); + + Returns the char * data portion of the bstring b offset by ofs. If b is + NULL, err is returned. + + .......................................................................... + + char * bdataofs (bstring b, int ofs); + + Returns the char * data portion of the bstring b offset by ofs. If b is + NULL, NULL is returned. + + .......................................................................... + + struct tagbstring var = bsStatic ("..."); + + The bsStatic macro allows for static declarations of literal string + constants as struct tagbstring structures. The resulting tagbstring does + not need to be freed or destroyed. Note that this macro is only well + defined for string literal arguments. For more general string pointers, + use the btfromcstr macro. + + The resulting struct tagbstring is permanently write protected. Attempts + to write to this struct tagbstring from any bstrlib function will lead to + BSTR_ERR being returned. Invoking the bwriteallow macro onto this struct + tagbstring has no effect. + + .......................................................................... + + <- bsStaticBlkParms ("...") + + The bsStaticBlkParms macro emits a pair of comma seperated parameters + corresponding to the block parameters for the block functions in Bstrlib + (i.e., blk2bstr, bcatblk, blk2tbstr, bisstemeqblk, bisstemeqcaselessblk.) + Note that this macro is only well defined for string literal arguments. + + Examples: + + bstring b = blk2bstr (bsStaticBlkParms ("Fast init. ")); + bcatblk (b, bsStaticBlkParms ("No frills fast concatenation.")); + + These are faster than using bfromcstr() and bcatcstr() respectively + because the length of the inline string is known as a compile time + constant. Also note that seperate struct tagbstring declarations for + holding the output of a bsStatic() macro are not required. + + .......................................................................... + + void btfromcstr (struct tagbstring& t, const char * s); + + Fill in the tagbstring t with the '\0' terminated char buffer s. This + action is purely reference oriented; no memory management is done. The + data member is just assigned s, and slen is assigned the strlen of s. + The s parameter is accessed exactly once in this macro. + + The resulting struct tagbstring is initially write protected. Attempts + to write to this struct tagbstring in a write protected state from any + bstrlib function will lead to BSTR_ERR being returned. Invoke the + bwriteallow on this struct tagbstring to make it writeable (though this + requires that s be obtained from a function compatible with malloc.) + + .......................................................................... + + void btfromblk (struct tagbstring& t, void * s, int len); + + Fill in the tagbstring t with the data buffer s with length len. This + action is purely reference oriented; no memory management is done. The + data member of t is just assigned s, and slen is assigned len. Note that + the buffer is not appended with a '\0' character. The s and len + parameters are accessed exactly once each in this macro. + + The resulting struct tagbstring is initially write protected. Attempts + to write to this struct tagbstring in a write protected state from any + bstrlib function will lead to BSTR_ERR being returned. Invoke the + bwriteallow on this struct tagbstring to make it writeable (though this + requires that s be obtained from a function compatible with malloc.) + + .......................................................................... + + void btfromblkltrimws (struct tagbstring& t, void * s, int len); + + Fill in the tagbstring t with the data buffer s with length len after it + has been left trimmed. This action is purely reference oriented; no + memory management is done. The data member of t is just assigned to a + pointer inside the buffer s. Note that the buffer is not appended with a + '\0' character. The s and len parameters are accessed exactly once each + in this macro. + + The resulting struct tagbstring is permanently write protected. Attempts + to write to this struct tagbstring from any bstrlib function will lead to + BSTR_ERR being returned. Invoking the bwriteallow macro onto this struct + tagbstring has no effect. + + .......................................................................... + + void btfromblkrtrimws (struct tagbstring& t, void * s, int len); + + Fill in the tagbstring t with the data buffer s with length len after it + has been right trimmed. This action is purely reference oriented; no + memory management is done. The data member of t is just assigned to a + pointer inside the buffer s. Note that the buffer is not appended with a + '\0' character. The s and len parameters are accessed exactly once each + in this macro. + + The resulting struct tagbstring is permanently write protected. Attempts + to write to this struct tagbstring from any bstrlib function will lead to + BSTR_ERR being returned. Invoking the bwriteallow macro onto this struct + tagbstring has no effect. + + .......................................................................... + + void btfromblktrimws (struct tagbstring& t, void * s, int len); + + Fill in the tagbstring t with the data buffer s with length len after it + has been left and right trimmed. This action is purely reference + oriented; no memory management is done. The data member of t is just + assigned to a pointer inside the buffer s. Note that the buffer is not + appended with a '\0' character. The s and len parameters are accessed + exactly once each in this macro. + + The resulting struct tagbstring is permanently write protected. Attempts + to write to this struct tagbstring from any bstrlib function will lead to + BSTR_ERR being returned. Invoking the bwriteallow macro onto this struct + tagbstring has no effect. + + .......................................................................... + + void bmid2tbstr (struct tagbstring& t, bstring b, int pos, int len); + + Fill the tagbstring t with the substring from b, starting from position + pos with a length len. The segment is clamped by the boundaries of + the bstring b. This action is purely reference oriented; no memory + management is done. Note that the buffer is not appended with a '\0' + character. Note that the t parameter to this macro may be accessed + multiple times. Note that the contents of t will become undefined + if the contents of b change or are destroyed. + + The resulting struct tagbstring is permanently write protected. Attempts + to write to this struct tagbstring in a write protected state from any + bstrlib function will lead to BSTR_ERR being returned. Invoking the + bwriteallow macro on this struct tagbstring will have no effect. + + .......................................................................... + + void bvformata (int& ret, bstring b, const char * format, lastarg); + + Append the bstring b with printf like formatting with the format control + string, and the arguments taken from the ... list of arguments after + lastarg passed to the containing function. If the containing function + does not have ... parameters or lastarg is not the last named parameter + before the ... then the results are undefined. If successful, the + results are appended to b and BSTR_OK is assigned to ret. Otherwise + BSTR_ERR is assigned to ret. + + Example: + + void dbgerror (FILE * fp, const char * fmt, ...) { + int ret; + bstring b; + bvformata (ret, b = bfromcstr ("DBG: "), fmt, fmt); + if (BSTR_OK == ret) fputs ((char *) bdata (b), fp); + bdestroy (b); + } + + Note that if the BSTRLIB_NOVSNP macro was set when bstrlib had been + compiled the bvformata macro will not link properly. If the + BSTRLIB_NOVSNP macro has been set, the bvformata macro will not be + available. + + .......................................................................... + + void bwriteprotect (struct tagbstring& t); + + Disallow bstring from being written to via the bstrlib API. Attempts to + write to the resulting tagbstring from any bstrlib function will lead to + BSTR_ERR being returned. + + Note: bstrings which are write protected cannot be destroyed via bdestroy. + + Note to C++ users: Setting a CBString as write protected will not prevent + it from being destroyed by the destructor. + + .......................................................................... + + void bwriteallow (struct tagbstring& t); + + Allow bstring to be written to via the bstrlib API. Note that such an + action makes the bstring both writable and destroyable. If the bstring is + not legitimately writable (as is the case for struct tagbstrings + initialized with a bsStatic value), the results of this are undefined. + + Note that invoking the bwriteallow macro may increase the number of + reallocs by one more than necessary for every call to bwriteallow + interleaved with any bstring API which writes to this bstring. + + .......................................................................... + + int biswriteprotected (struct tagbstring& t); + + Returns 1 if the bstring is write protected, otherwise 0 is returned. + +=============================================================================== + +The bstest module +----------------- + +The bstest module is just a unit test for the bstrlib module. For correct +implementations of bstrlib, it should execute with 0 failures being reported. +This test should be utilized if modifications/customizations to bstrlib have +been performed. It tests each core bstrlib function with bstrings of every +mode (read-only, NULL, static and mutable) and ensures that the expected +semantics are observed (including results that should indicate an error). It +also tests for aliasing support. Passing bstest is a necessary but not a +sufficient condition for ensuring the correctness of the bstrlib module. + + +The test module +--------------- + +The test module is just a unit test for the bstrwrap module. For correct +implementations of bstrwrap, it should execute with 0 failures being +reported. This test should be utilized if modifications/customizations to +bstrwrap have been performed. It tests each core bstrwrap function with +CBStrings write protected or not and ensures that the expected semantics are +observed (including expected exceptions.) Note that exceptions cannot be +disabled to run this test. Passing test is a necessary but not a sufficient +condition for ensuring the correctness of the bstrwrap module. + +=============================================================================== + +Using Bstring and CBString as an alternative to the C library +------------------------------------------------------------- + +First let us give a table of C library functions and the alternative bstring +functions and CBString methods that should be used instead of them. + +C-library Bstring alternative CBString alternative +--------- ------------------- -------------------- +gets bgets ::gets +strcpy bassign = operator +strncpy bassignmidstr ::midstr +strcat bconcat += operator +strncat bconcat + btrunc += operator + ::trunc +strtok bsplit, bsplits ::split +sprintf b(assign)format ::format +snprintf b(assign)format + btrunc ::format + ::trunc +vsprintf bvformata bvformata + +vsnprintf bvformata + btrunc bvformata + btrunc +vfprintf bvformata + fputs use bvformata + fputs +strcmp biseq, bstrcmp comparison operators. +strncmp bstrncmp, memcmp bstrncmp, memcmp +strlen ->slen, blength ::length +strdup bstrcpy constructor +strset bpattern ::fill +strstr binstr ::find +strpbrk binchr ::findchr +stricmp bstricmp cast & use bstricmp +strlwr btolower cast & use btolower +strupr btoupper cast & use btoupper +strrev bReverse (aux module) cast & use bReverse +strchr bstrchr cast & use bstrchr +strspnp use strspn use strspn +ungetc bsunread bsunread + +The top 9 C functions listed here are troublesome in that they impose memory +management in the calling function. The Bstring and CBstring interfaces have +built-in memory management, so there is far less code with far less potential +for buffer overrun problems. strtok can only be reliably called as a "leaf" +calculation, since it (quite bizarrely) maintains hidden internal state. And +gets is well known to be broken no matter what. The Bstrlib alternatives do +not suffer from those sorts of problems. + +The substitute for strncat can be performed with higher performance by using +the blk2tbstr macro to create a presized second operand for bconcat. + +C-library Bstring alternative CBString alternative +--------- ------------------- -------------------- +strspn strspn acceptable strspn acceptable +strcspn strcspn acceptable strcspn acceptable +strnset strnset acceptable strnset acceptable +printf printf acceptable printf acceptable +puts puts acceptable puts acceptable +fprintf fprintf acceptable fprintf acceptable +fputs fputs acceptable fputs acceptable +memcmp memcmp acceptable memcmp acceptable + +Remember that Bstring (and CBstring) functions will automatically append the +'\0' character to the character data buffer. So by simply accessing the data +buffer directly, ordinary C string library functions can be called directly +on them. Note that bstrcmp is not the same as memcmp in exactly the same way +that strcmp is not the same as memcmp. + +C-library Bstring alternative CBString alternative +--------- ------------------- -------------------- +fread balloc + fread ::alloc + fread +fgets balloc + fgets ::alloc + fgets + +These are odd ones because of the exact sizing of the buffer required. The +Bstring and CBString alternatives requires that the buffers are forced to +hold at least the prescribed length, then just use fread or fgets directly. +However, typically the automatic memory management of Bstring and CBstring +will make the typical use of fgets and fread to read specifically sized +strings unnecessary. + +Implementation Choices +---------------------- + +Overhead: +......... + +The bstring library has more overhead versus straight char buffers for most +functions. This overhead is essentially just the memory management and +string header allocation. This overhead usually only shows up for small +string manipulations. The performance loss has to be considered in +light of the following: + +1) What would be the performance loss of trying to write this management + code in one's own application? +2) Since the bstring library source code is given, a sufficiently powerful + modern inlining globally optimizing compiler can remove function call + overhead. + +Since the data type is exposed, a developer can replace any unsatisfactory +function with their own inline implementation. And that is besides the main +point of what the better string library is mainly meant to provide. Any +overhead lost has to be compared against the value of the safe abstraction +for coupling memory management and string functionality. + +Performance of the C interface: +............................... + +The algorithms used have performance advantages versus the analogous C +library functions. For example: + +1. bfromcstr/blk2str/bstrcpy versus strcpy/strdup. By using memmove instead + of strcpy, the break condition of the copy loop is based on an independent + counter (that should be allocated in a register) rather than having to + check the results of the load. Modern out-of-order executing CPUs can + parallelize the final branch mis-predict penality with the loading of the + source string. Some CPUs will also tend to have better built-in hardware + support for counted memory moves than load-compare-store. (This is a + minor, but non-zero gain.) +2. biseq versus strcmp. If the strings are unequal in length, bsiseq will + return in O(1) time. If the strings are aliased, or have aliased data + buffers, biseq will return in O(1) time. strcmp will always be O(k), + where k is the length of the common prefix or the whole string if they are + identical. +3. ->slen versus strlen. ->slen is obviously always O(1), while strlen is + always O(n) where n is the length of the string. +4. bconcat versus strcat. Both rely on precomputing the length of the + destination string argument, which will favor the bstring library. On + iterated concatenations the performance difference can be enormous. +5. bsreadln versus fgets. The bsreadln function reads large blocks at a time + from the given stream, then parses out lines from the buffers directly. + Some C libraries will implement fgets as a loop over single fgetc calls. + Testing indicates that the bsreadln approach can be several times faster + for fast stream devices (such as a file that has been entirely cached.) +6. bsplits/bsplitscb versus strspn. Accelerators for the set of match + characters are generated only once. +7. binstr versus strstr. The binstr implementation unrolls the loops to + help reduce loop overhead. This will matter if the target string is + long and source string is not found very early in the target string. + With strstr, while it is possible to unroll the source contents, it is + not possible to do so with the destination contents in a way that is + effective because every destination character must be tested against + '\0' before proceeding to the next character. +8. bReverse versus strrev. The C function must find the end of the string + first before swaping character pairs. +9. bstrrchr versus no comparable C function. Its not hard to write some C + code to search for a character from the end going backwards. But there + is no way to do this without computing the length of the string with + strlen. + +Practical testing indicates that in general Bstrlib is never signifcantly +slower than the C library for common operations, while very often having a +performance advantage that ranges from significant to massive. Even for +functions like b(n)inchr versus str(c)spn() (where, in theory, there is no +advantage for the Bstrlib architecture) the performance of Bstrlib is vastly +superior to most tested C library implementations. + +Some of Bstrlib's extra functionality also lead to inevitable performance +advantages over typical C solutions. For example, using the blk2tbstr macro, +one can (in O(1) time) generate an internal substring by reference while not +disturbing the original string. If disturbing the original string is not an +option, typically, a comparable char * solution would have to make a copy of +the substring to provide similar functionality. Another example is reverse +character set scanning -- the str(c)spn functions only scan in a forward +direction which can complicate some parsing algorithms. + +Where high performance char * based algorithms are available, Bstrlib can +still leverage them by accessing the ->data field on bstrings. So +realistically Bstrlib can never be significantly slower than any standard +'\0' terminated char * based solutions. + +Performance of the C++ interface: +................................. + +The C++ interface has been designed with an emphasis on abstraction and safety +first. However, since it is substantially a wrapper for the C bstring +functions, for longer strings the performance comments described in the +"Performance of the C interface" section above still apply. Note that the +(CBString *) type can be directly cast to a (bstring) type, and passed as +parameters to the C functions (though a CBString must never be passed to +bdestroy.) + +Probably the most controversial choice is performing full bounds checking on +the [] operator. This decision was made because 1) the fast alternative of +not bounds checking is still available by first casting the CBString to a +(const char *) buffer or to a (struct tagbstring) then derefencing .data and +2) because the lack of bounds checking is seen as one of the main weaknesses +of C/C++ versus other languages. This check being done on every access leads +to individual character extraction being actually slower than other languages +in this one respect (other language's compilers will normally dedicate more +resources on hoisting or removing bounds checking as necessary) but otherwise +bring C++ up to the level of other languages in terms of functionality. + +It is common for other C++ libraries to leverage the abstractions provided by +C++ to use reference counting and "copy on write" policies. While these +techniques can speed up some scenarios, they impose a problem with respect to +thread safety. bstrings and CBStrings can be properly protected with +"per-object" mutexes, meaning that two bstrlib calls can be made and execute +simultaneously, so long as the bstrings and CBstrings are distinct. With a +reference count and alias before copy on write policy, global mutexes are +required that prevent multiple calls to the strings library to execute +simultaneously regardless of whether or not the strings represent the same +string. + +One interesting trade off in CBString is that the default constructor is not +trivial. I.e., it always prepares a ready to use memory buffer. The purpose +is to ensure that there is a uniform internal composition for any functioning +CBString that is compatible with bstrings. It also means that the other +methods in the class are not forced to perform "late initialization" checks. +In the end it means that construction of CBStrings are slower than other +comparable C++ string classes. Initial testing, however, indicates that +CBString outperforms std::string and MFC's CString, for example, in all other +operations. So to work around this weakness it is recommended that CBString +declarations be pushed outside of inner loops. + +Practical testing indicates that with the exception of the caveats given +above (constructors and safe index character manipulations) the C++ API for +Bstrlib generally outperforms popular standard C++ string classes. Amongst +the standard libraries and compilers, the quality of concatenation operations +varies wildly and very little care has gone into search functions. Bstrlib +dominates those performance benchmarks. + +Memory management: +.................. + +The bstring functions which write and modify bstrings will automatically +reallocate the backing memory for the char buffer whenever it is required to +grow. The algorithm for resizing chosen is to snap up to sizes that are a +power of two which are sufficient to hold the intended new size. Memory +reallocation is not performed when the required size of the buffer is +decreased. This behavior can be relied on, and is necessary to make the +behaviour of balloc deterministic. This trades off additional memory usage +for decreasing the frequency for required reallocations: + +1. For any bstring whose size never exceeds n, its buffer is not ever + reallocated more than log_2(n) times for its lifetime. +2. For any bstring whose size never exceeds n, its buffer is never more than + 2*(n+1) in length. (The extra characters beyond 2*n are to allow for the + implicit '\0' which is always added by the bstring modifying functions.) + +Decreasing the buffer size when the string decreases in size would violate 1) +above and in real world case lead to pathological heap thrashing. Similarly, +allocating more tightly than "least power of 2 greater than necessary" would +lead to a violation of 1) and have the same potential for heap thrashing. + +Property 2) needs emphasizing. Although the memory allocated is always a +power of 2, for a bstring that grows linearly in size, its buffer memory also +grows linearly, not exponentially. The reason is that the amount of extra +space increases with each reallocation, which decreases the frequency of +future reallocations. + +Obviously, given that bstring writing functions may reallocate the data +buffer backing the target bstring, one should not attempt to cache the data +buffer address and use it after such bstring functions have been called. +This includes making reference struct tagbstrings which alias to a writable +bstring. + +balloc or bfromcstralloc can be used to preallocate the minimum amount of +space used for a given bstring. This will reduce even further the number of +times the data portion is reallocated. If the length of the string is never +more than one less than the memory length then there will be no further +reallocations. + +Note that invoking the bwriteallow macro may increase the number of reallocs +by one more than necessary for every call to bwriteallow interleaved with any +bstring API which writes to this bstring. + +The library does not use any mechanism for automatic clean up for the C API. +Thus explicit clean up via calls to bdestroy() are required to avoid memory +leaks. + +Constant and static tagbstrings: +................................ + +A struct tagbstring can be write protected from any bstrlib function using +the bwriteprotect macro. A write protected struct tagbstring can then be +reset to being writable via the bwriteallow macro. There is, of course, no +protection from attempts to directly access the bstring members. Modifying a +bstring which is write protected by direct access has undefined behavior. + +static struct tagbstrings can be declared via the bsStatic macro. They are +considered permanently unwritable. Such struct tagbstrings's are declared +such that attempts to write to it are not well defined. Invoking either +bwriteallow or bwriteprotect on static struct tagbstrings has no effect. + +struct tagbstring's initialized via btfromcstr or blk2tbstr are protected by +default but can be made writeable via the bwriteallow macro. If bwriteallow +is called on such struct tagbstring's, it is the programmer's responsibility +to ensure that: + +1) the buffer supplied was allocated from the heap. +2) bdestroy is not called on this tagbstring (unless the header itself has + also been allocated from the heap.) +3) free is called on the buffer to reclaim its memory. + +bwriteallow and bwriteprotect can be invoked on ordinary bstrings (they have +to be dereferenced with the (*) operator to get the levels of indirection +correct) to give them write protection. + +Buffer declaration: +................... + +The memory buffer is actually declared "unsigned char *" instead of "char *". +The reason for this is to trigger compiler warnings whenever uncasted char +buffers are assigned to the data portion of a bstring. This will draw more +diligent programmers into taking a second look at the code where they +have carelessly left off the typically required cast. (Research from +AT&T/Lucent indicates that additional programmer eyeballs is one of the most +effective mechanisms at ferreting out bugs.) + +Function pointers: +.................. + +The bgets, bread and bStream functions use function pointers to obtain +strings from data streams. The function pointer declarations have been +specifically chosen to be compatible with the fgetc and fread functions. +While this may seem to be a convoluted way of implementing fgets and fread +style functionality, it has been specifically designed this way to ensure +that there is no dependency on a single narrowly defined set of device +interfaces, such as just stream I/O. In the embedded world, its quite +possible to have environments where such interfaces may not exist in the +standard C library form. Furthermore, the generalization that this opens up +allows for more sophisticated uses for these functions (performing an fgets +like function on a socket, for example.) By using function pointers, it also +allows such abstract stream interfaces to be created using the bstring library +itself while not creating a circular dependency. + +Use of int's for sizes: +....................... + +This is just a recognition that 16bit platforms with requirements for strings +that are larger than 64K and 32bit+ platforms with requirements for strings +that are larger than 4GB are pretty marginal. The main focus is for 32bit +platforms, and emerging 64bit platforms with reasonable < 4GB string +requirements. Using ints allows for negative values which has meaning +internally to bstrlib. + +Semantic consideration: +....................... + +Certain care needs to be taken when copying and aliasing bstrings. A bstring +is essentially a pointer type which points to a multipart abstract data +structure. Thus usage, and lifetime of bstrings have semantics that follow +these considerations. For example: + + bstring a, b; + struct tagbstring t; + + a = bfromcstr("Hello"); /* Create new bstring and copy "Hello" into it. */ + b = a; /* Alias b to the contents of a. */ + t = *a; /* Create a current instance pseudo-alias of a. */ + bconcat (a, b); /* Double a and b, t is now undefined. */ + bdestroy (a); /* Destroy the contents of both a and b. */ + +Variables of type bstring are really just references that point to real +bstring objects. The equal operator (=) creates aliases, and the asterisk +dereference operator (*) creates a kind of alias to the current instance (which +is generally not useful for any purpose.) Using bstrcpy() is the correct way +of creating duplicate instances. The ampersand operator (&) is useful for +creating aliases to struct tagbstrings (remembering that constructed struct +tagbstrings are not writable by default.) + +CBStrings use complete copy semantics for the equal operator (=), and thus do +not have these sorts of issues. + +Debugging: +.......... + +Bstrings have a simple, exposed definition and construction, and the library +itself is open source. So most debugging is going to be fairly straight- +forward. But the memory for bstrings come from the heap, which can often be +corrupted indirectly, and it might not be obvious what has happened even from +direct examination of the contents in a debugger or a core dump. There are +some tools such as Purify, Insure++ and Electric Fence which can help solve +such problems, however another common approach is to directly instrument the +calls to malloc, realloc, calloc, free, memcpy, memmove and/or other calls +by overriding them with macro definitions. + +Although the user could hack on the Bstrlib sources directly as necessary to +perform such an instrumentation, Bstrlib comes with a built-in mechanism for +doing this. By defining the macro BSTRLIB_MEMORY_DEBUG and providing an +include file named memdbg.h this will force the core Bstrlib modules to +attempt to include this file. In such a file, macros could be defined which +overrides Bstrlib's useage of the C standard library. + +Rather than calling malloc, realloc, free, memcpy or memmove directly, Bstrlib +emits the macros bstr__alloc, bstr__realloc, bstr__free, bstr__memcpy and +bstr__memmove in their place respectively. By default these macros are simply +assigned to be equivalent to their corresponding C standard library function +call. However, if they are given earlier macro definitions (via the back +door include file) they will not be given their default definition. In this +way Bstrlib's interface to the standard library can be changed but without +having to directly redefine or link standard library symbols (both of which +are not strictly ANSI C compliant.) + +An example definition might include: + + #define bstr__alloc(sz) X_malloc ((sz), __LINE__, __FILE__) + +which might help contextualize heap entries in a debugging environment. + +The NULL parameter and sanity checking of bstrings is part of the Bstrlib +API, and thus Bstrlib itself does not present any different modes which would +correspond to "Debug" or "Release" modes. Bstrlib always contains mechanisms +which one might think of as debugging features, but retains the performance +and small memory footprint one would normally associate with release mode +code. + +Integration Microsoft's Visual Studio debugger: +............................................... + +Microsoft's Visual Studio debugger has a capability of customizable mouse +float over data type descriptions. This is accomplished by editting the +AUTOEXP.DAT file to include the following: + + ; new for CBString + tagbstring =slen= mlen= + Bstrlib::CBStringList =count= + +In Visual C++ 6.0 this file is located in the directory: + + C:\Program Files\Microsoft Visual Studio\Common\MSDev98\Bin + +and in Visual Studio .NET 2003 its located here: + + C:\Program Files\Microsoft Visual Studio .NET 2003\Common7\Packages\Debugger + +This will improve the ability of debugging with Bstrlib under Visual Studio. + +Security +-------- + +Bstrlib does not come with explicit security features outside of its fairly +comprehensive error detection, coupled with its strict semantic support. +That is to say that certain common security problems, such as buffer overrun, +constant overwrite, arbitrary truncation etc, are far less likely to happen +inadvertently. Where it does help, Bstrlib maximizes its advantage by +providing developers a simple adoption path that lets them leave less secure +string mechanisms behind. The library will not leave developers wanting, so +they will be less likely to add new code using a less secure string library +to add functionality that might be missing from Bstrlib. + +That said there are a number of security ideas not addressed by Bstrlib: + +1. Race condition exploitation (i.e., verifying a string's contents, then +raising the privilege level and execute it as a shell command as two +non-atomic steps) is well beyond the scope of what Bstrlib can provide. It +should be noted that MFC's built-in string mutex actually does not solve this +problem either -- it just removes immediate data corruption as a possible +outcome of such exploit attempts (it can be argued that this is worse, since +it will leave no trace of the exploitation). In general race conditions have +to be dealt with by careful design and implementation; it cannot be assisted +by a string library. + +2. Any kind of access control or security attributes to prevent usage in +dangerous interfaces such as system(). Perl includes a "trust" attribute +which can be endowed upon strings that are intended to be passed to such +dangerous interfaces. However, Perl's solution reflects its own limitations +-- notably that it is not a strongly typed language. In the example code for +Bstrlib, there is a module called taint.cpp. It demonstrates how to write a +simple wrapper class for managing "untainted" or trusted strings using the +type system to prevent questionable mixing of ordinary untrusted strings with +untainted ones then passing them to dangerous interfaces. In this way the +security correctness of the code reduces to auditing the direct usages of +dangerous interfaces or promotions of tainted strings to untainted ones. + +3. Encryption of string contents is way beyond the scope of Bstrlib. +Maintaining encrypted string contents in the futile hopes of thwarting things +like using system-level debuggers to examine sensitive string data is likely +to be a wasted effort (imagine a debugger that runs at a higher level than a +virtual processor where the application runs). For more standard encryption +usages, since the bstring contents are simply binary blocks of data, this +should pose no problem for usage with other standard encryption libraries. + +Compatibility +------------- + +The Better String Library is known to compile and function correctly with the +following compilers: + + - Microsoft Visual C++ + - Watcom C/C++ + - Intel's C/C++ compiler (Windows) + - The GNU C/C++ compiler (cygwin and Linux on PPC64) + - Borland C + - Turbo C + +Setting of configuration options should be unnecessary for these compilers +(unless exceptions are being disabled or STLport has been added to WATCOM +C/C++). Bstrlib has been developed with an emphasis on portability. As such +porting it to other compilers should be straight forward. This package +includes a porting guide (called porting.txt) which explains what issues may +exist for porting Bstrlib to different compilers and environments. + +ANSI issues +----------- + +1. The function pointer types bNgetc and bNread have prototypes which are very +similar to, but not exactly the same as fgetc and fread respectively. +Basically the FILE * parameter is replaced by void *. The purpose of this +was to allow one to create other functions with fgetc and fread like +semantics without being tied to ANSI C's file streaming mechanism. I.e., one +could very easily adapt it to sockets, or simply reading a block of memory, +or procedurally generated strings (for fractal generation, for example.) + +The problem is that invoking the functions (bNgetc)fgetc and (bNread)fread is +not technically legal in ANSI C. The reason being that the compiler is only +able to coerce the function pointers themselves into the target type, however +are unable to perform any cast (implicit or otherwise) on the parameters +passed once invoked. I.e., if internally void * and FILE * need some kind of +mechanical coercion, the compiler will not properly perform this conversion +and thus lead to undefined behavior. + +Apparently a platform from Data General called "Eclipse" and another from +Tandem called "NonStop" have a different representation for pointers to bytes +and pointers to words, for example, where coercion via casting is necessary. +(Actual confirmation of the existence of such machines is hard to come by, so +it is prudent to be skeptical about this information.) However, this is not +an issue for any known contemporary platforms. One may conclude that such +platforms are effectively apocryphal even if they do exist. + +To correctly work around this problem to the satisfaction of the ANSI +limitations, one needs to create wrapper functions for fgets and/or +fread with the prototypes of bNgetc and/or bNread respectively which performs +no other action other than to explicitely cast the void * parameter to a +FILE *, and simply pass the remaining parameters straight to the function +pointer call. + +The wrappers themselves are trivial: + + size_t freadWrap (void * buff, size_t esz, size_t eqty, void * parm) { + return fread (buff, esz, eqty, (FILE *) parm); + } + + int fgetcWrap (void * parm) { + return fgetc ((FILE *) parm); + } + +These have not been supplied in bstrlib or bstraux to prevent unnecessary +linking with file I/O functions. + +2. vsnprintf is not available on all compilers. Because of this, the bformat +and bformata functions (and format and formata methods) are not guaranteed to +work properly. For those compilers that don't have vsnprintf, the +BSTRLIB_NOVSNP macro should be set before compiling bstrlib, and the format +functions/method will be disabled. + +The more recent ANSI C standards have specified the required inclusion of a +vsnprintf function. + +3. The bstrlib function names are not unique in the first 6 characters. This +is only an issue for older C compiler environments which do not store more +than 6 characters for function names. + +4. The bsafe module defines macros and function names which are part of the +C library. This simply overrides the definition as expected on all platforms +tested, however it is not sanctioned by the ANSI standard. This module is +clearly optional and should be omitted on platforms which disallow its +undefined semantics. + +In practice the real issue is that some compilers in some modes of operation +can/will inline these standard library functions on a module by module basis +as they appear in each. The linker will thus have no opportunity to override +the implementation of these functions for those cases. This can lead to +inconsistent behaviour of the bsafe module on different platforms and +compilers. + +=============================================================================== + +Comparison with Microsoft's CString class +----------------------------------------- + +Although developed independently, CBStrings have very similar functionality to +Microsoft's CString class. However, the bstring library has significant +advantages over CString: + +1. Bstrlib is a C-library as well as a C++ library (using the C++ wrapper). + + - Thus it is compatible with more programming environments and + available to a wider population of programmers. + +2. The internal structure of a bstring is considered exposed. + + - A single contiguous block of data can be cut into read-only pieces by + simply creating headers, without allocating additional memory to create + reference copies of each of these sub-strings. + - In this way, using bstrings in a totally abstracted way becomes a choice + rather than an imposition. Further this choice can be made differently + at different layers of applications that use it. + +3. Static declaration support precludes the need for constructor + invocation. + + - Allows for static declarations of constant strings that has no + additional constructor overhead. + +4. Bstrlib is not attached to another library. + + - Bstrlib is designed to be easily plugged into any other library + collection, without dependencies on other libraries or paradigms (such + as "MFC".) + +The bstring library also comes with a few additional functions that are not +available in the CString class: + + - bsetstr + - bsplit + - bread + - breplace (this is different from CString::Replace()) + - Writable indexed characters (for example a[i]='x') + +Interestingly, although Microsoft did implement mid$(), left$() and right$() +functional analogues (these are functions from GWBASIC) they seem to have +forgotten that mid$() could be also used to write into the middle of a string. +This functionality exists in Bstrlib with the bsetstr() and breplace() +functions. + +Among the disadvantages of Bstrlib is that there is no special support for +localization or wide characters. Such things are considered beyond the scope +of what bstrings are trying to deliver. CString essentially supports the +older UCS-2 version of Unicode via widechar_t as an application-wide compile +time switch. + +CString's also use built-in mechanisms for ensuring thread safety under all +situations. While this makes writing thread safe code that much easier, this +built-in safety feature has a price -- the inner loops of each CString method +runs in its own critical section (grabbing and releasing a light weight mutex +on every operation.) The usual way to decrease the impact of a critical +section performance penalty is to amortize more operations per critical +section. But since the implementation of CStrings is fixed as a one critical +section per-operation cost, there is no way to leverage this common +performance enhancing idea. + +The search facilities in Bstrlib are comparable to those in MFC's CString +class, though it is missing locale specific collation. But because Bstrlib +is interoperable with C's char buffers, it will allow programmers to write +their own string searching mechanism (such as Boyer-Moore), or be able to +choose from a variety of available existing string searching libraries (such +as those for regular expressions) without difficulty. + +Microsoft used a very non-ANSI conforming trick in its implementation to +allow printf() to use the "%s" specifier to output a CString correctly. This +can be convenient, but it is inherently not portable. CBString requires an +explicit cast, while bstring requires the data member to be dereferenced. +Microsoft's own documentation recommends casting, instead of relying on this +feature. + +Comparison with C++'s std::string +--------------------------------- + +This is the C++ language's standard STL based string class. + +1. There is no C implementation. +2. The [] operator is not bounds checked. +3. Missing a lot of useful functions like printf-like formatting. +4. Some sub-standard std::string implementations (SGI) are necessarily unsafe + to use with multithreading. +5. Limited by STL's std::iostream which in turn is limited by ifstream which + can only take input from files. (Compare to CBStream's API which can take + abstracted input.) +6. Extremely uneven performance across implementations. + +Comparison with ISO C TR 24731 proposal +--------------------------------------- + +Following the ISO C99 standard, Microsoft has proposed a group of C library +extensions which are supposedly "safer and more secure". This proposal is +expected to be adopted by the ISO C standard which follows C99. + +The proposal reveals itself to be very similar to Microsoft's "StrSafe" +library. The functions are basically the same as other standard C library +string functions except that destination parameters are paired with an +additional length parameter of type rsize_t. rsize_t is the same as size_t, +however, the range is checked to make sure its between 1 and RSIZE_MAX. Like +Bstrlib, the functions perform a "parameter check". Unlike Bstrlib, when a +parameter check fails, rather than simply outputing accumulatable error +statuses, they call a user settable global error function handler, and upon +return of control performs no (additional) detrimental action. The proposal +covers basic string functions as well as a few non-reenterable functions +(asctime, ctime, and strtok). + +1. Still based solely on char * buffers (and therefore strlen() and strcat() + is still O(n), and there are no faster streq() comparison functions.) +2. No growable string semantics. +3. Requires manual buffer length synchronization in the source code. +4. No attempt to enhance functionality of the C library. +5. Introduces a new error scenario (strings exceeding RSIZE_MAX length). + +The hope is that by exposing the buffer length requirements there will be +fewer buffer overrun errors. However, the error modes are really just +transformed, rather than removed. The real problem of buffer overflows is +that they all happen as a result of erroneous programming. So forcing +programmers to manually deal with buffer limits, will make them more aware of +the problem but doesn't remove the possibility of erroneous programming. So +a programmer that erroneously mixes up the rsize_t parameters is no better off +from a programmer that introduces potential buffer overflows through other +more typical lapses. So at best this may reduce the rate of erroneous +programming, rather than making any attempt at removing failure modes. + +The error handler can discriminate between types of failures, but does not +take into account any callsite context. So the problem is that the error is +going to be manifest in a piece of code, but there is no pointer to that +code. It would seem that passing in the call site __FILE__, __LINE__ as +parameters would be very useful, but the API clearly doesn't support such a +thing (it would increase code bloat even more than the extra length +parameter does, and would require macro tricks to implement). + +The Bstrlib C API takes the position that error handling needs to be done at +the callsite, and just tries to make it as painless as possible. Furthermore, +error modes are removed by supporting auto-growing strings and aliasing. For +capturing errors in more central code fragments, Bstrlib's C++ API uses +exception handling extensively, which is superior to the leaf-only error +handler approach. + +Comparison with Managed String Library CERT proposal +---------------------------------------------------- + +The main webpage for the managed string library: +http://www.cert.org/secure-coding/managedstring.html + +Robert Seacord at CERT has proposed a C string library that he calls the +"Managed String Library" for C. Like Bstrlib, it introduces a new type +which is called a managed string. The structure of a managed string +(string_m) is like a struct tagbstring but missing the length field. This +internal structure is considered opaque. The length is, like the C standard +library, always computed on the fly by searching for a terminating NUL on +every operation that requires it. So it suffers from every performance +problem that the C standard library suffers from. Interoperating with C +string APIs (like printf, fopen, or anything else that takes a string +parameter) requires copying to additionally allocating buffers that have to +be manually freed -- this makes this library probably slower and more +cumbersome than any other string library in existence. + +The library gives a fully populated error status as the return value of every +string function. The hope is to be able to diagnose all problems +specifically from the return code alone. Comparing this to Bstrlib, which +aways returns one consistent error message, might make it seem that Bstrlib +would be harder to debug; but this is not true. With Bstrlib, if an error +occurs there is always enough information from just knowing there was an error +and examining the parameters to deduce exactly what kind of error has +happened. The managed string library thus gives up nested function calls +while achieving little benefit, while Bstrlib does not. + +One interesting feature that "managed strings" has is the idea of data +sanitization via character set whitelisting. That is to say, a globally +definable filter that makes any attempt to put invalid characters into strings +lead to an error and not modify the string. The author gives the following +example: + + // create valid char set + if (retValue = strcreate_m(&str1, "abc") ) { + fprintf( + stderr, + "Error %d from strcreate_m.\n", + retValue + ); + } + if (retValue = setcharset(str1)) { + fprintf( + stderr, + "Error %d from setcharset().\n", + retValue + ); + } + if (retValue = strcreate_m(&str1, "aabbccabc")) { + fprintf( + stderr, + "Error %d from strcreate_m.\n", + retValue + ); + } + // create string with invalid char set + if (retValue = strcreate_m(&str1, "abbccdabc")) { + fprintf( + stderr, + "Error %d from strcreate_m.\n", + retValue + ); + } + +Which we can compare with a more Bstrlib way of doing things: + + bstring bCreateWithFilter (const char * cstr, const_bstring filter) { + bstring b = bfromcstr (cstr); + if (BSTR_ERR != bninchr (b, filter) && NULL != b) { + fprintf (stderr, "Filter violation.\n"); + bdestroy (b); + b = NULL; + } + return b; + } + + struct tagbstring charFilter = bsStatic ("abc"); + bstring str1 = bCreateWithFilter ("aabbccabc", &charFilter); + bstring str2 = bCreateWithFilter ("aabbccdabc", &charFilter); + +The first thing we should notice is that with the Bstrlib approach you can +have different filters for different strings if necessary. Furthermore, +selecting a charset filter in the Managed String Library is uni-contextual. +That is to say, there can only be one such filter active for the entire +program, which means its usage is not well defined for intermediate library +usage (a library that uses it will interfere with user code that uses it, and +vice versa.) It is also likely to be poorly defined in multi-threading +environments. + +There is also a question as to whether the data sanitization filter is checked +on every operation, or just on creation operations. Since the charset can be +set arbitrarily at run time, it might be set *after* some managed strings have +been created. This would seem to imply that all functions should run this +additional check every time if there is an attempt to enforce this. This +would make things tremendously slow. On the other hand, if it is assumed that +only creates and other operations that take char *'s as input need be checked +because the charset was only supposed to be called once at and before any +other managed string was created, then one can see that its easy to cover +Bstrlib with equivalent functionality via a few wrapper calls such as the +example given above. + +And finally we have to question the value of sanitation in the first place. +For example, for httpd servers, there is generally a requirement that the +URLs parsed have some form that avoids undesirable translation to local file +system filenames or resources. The problem is that the way URLs can be +encoded, it must be completely parsed and translated to know if it is using +certain invalid character combinations. That is to say, merely filtering +each character one at a time is not necessarily the right way to ensure that +a string has safe contents. + +In the article that describes this proposal, it is claimed that it fairly +closely approximates the existing C API semantics. On this point we should +compare this "closeness" with Bstrlib: + + Bstrlib Managed String Library + ------- ---------------------- + +Pointer arithmetic Segment arithmetic N/A + +Use in C Std lib ->data, or bdata{e} getstr_m(x,*) ... free(x) + +String literals bsStatic, bsStaticBlk strcreate_m() + +Transparency Complete None + +Its pretty clear that the semantic mapping from C strings to Bstrlib is fairly +straightforward, and that in general semantic capabilities are the same or +superior in Bstrlib. On the other hand the Managed String Library is either +missing semantics or changes things fairly significantly. + +Comparison with Annexia's c2lib library +--------------------------------------- + +This library is available at: +http://www.annexia.org/freeware/c2lib + +1. Still based solely on char * buffers (and therefore strlen() and strcat() + is still O(n), and there are no faster streq() comparison functions.) + Their suggestion that alternatives which wrap the string data type (such as + bstring does) imposes a difficulty in interoperating with the C langauge's + ordinary C string library is not founded. +2. Introduction of memory (and vector?) abstractions imposes a learning + curve, and some kind of memory usage policy that is outside of the strings + themselves (and therefore must be maintained by the developer.) +3. The API is massive, and filled with all sorts of trivial (pjoin) and + controvertial (pmatch -- regular expression are not sufficiently + standardized, and there is a very large difference in performance between + compiled and non-compiled, REs) functions. Bstrlib takes a decidely + minimal approach -- none of the functionality in c2lib is difficult or + challenging to implement on top of Bstrlib (except the regex stuff, which + is going to be difficult, and controvertial no matter what.) +4. Understanding why c2lib is the way it is pretty much requires a working + knowledge of Perl. bstrlib requires only knowledge of the C string library + while providing just a very select few worthwhile extras. +5. It is attached to a lot of cruft like a matrix math library (that doesn't + include any functions for getting the determinant, eigenvectors, + eigenvalues, the matrix inverse, test for singularity, test for + orthogonality, a grahm schmit orthogonlization, LU decomposition ... I + mean why bother?) + +Convincing a development house to use c2lib is likely quite difficult. It +introduces too much, while not being part of any kind of standards body. The +code must therefore be trusted, or maintained by those that use it. While +bstring offers nothing more on this front, since its so much smaller, covers +far less in terms of scope, and will typically improve string performance, +the barrier to usage should be much smaller. + +Comparison with stralloc/qmail +------------------------------ + +More information about this library can be found here: +http://www.canonical.org/~kragen/stralloc.html or here: +http://cr.yp.to/lib/stralloc.html + +1. Library is very very minimal. A little too minimal. +2. Untargetted source parameters are not declared const. +3. Slightly different expected emphasis (like _cats function which takes an + ordinary C string char buffer as a parameter.) Its clear that the + remainder of the C string library is still required to perform more + useful string operations. + +The struct declaration for their string header is essentially the same as that +for bstring. But its clear that this was a quickly written hack whose goals +are clearly a subset of what Bstrlib supplies. For anyone who is served by +stralloc, Bstrlib is complete substitute that just adds more functionality. + +stralloc actually uses the interesting policy that a NULL data pointer +indicates an empty string. In this way, non-static empty strings can be +declared without construction. This advantage is minimal, since static empty +bstrings can be declared inline without construction, and if the string needs +to be written to it should be constructed from an empty string (or its first +initializer) in any event. + +wxString class +-------------- + +This is the string class used in the wxWindows project. A description of +wxString can be found here: +http://www.wxwindows.org/manuals/2.4.2/wx368.htm#wxstring + +This C++ library is similar to CBString. However, it is littered with +trivial functions (IsAscii, UpperCase, RemoveLast etc.) + +1. There is no C implementation. +2. The memory management strategy is to allocate a bounded fixed amount of + additional space on each resize, meaning that it does not have the + log_2(n) property that Bstrlib has (it will thrash very easily, cause + massive fragmentation in common heap implementations, and can easily be a + common source of performance problems). +3. The library uses a "copy on write" strategy, meaning that it has to deal + with multithreading problems. + +Vstr +---- + +This is a highly orthogonal C string library with an emphasis on +networking/realtime programming. It can be found here: +http://www.and.org/vstr/ + +1. The convoluted internal structure does not contain a '\0' char * compatible + buffer, so interoperability with the C library a non-starter. +2. The API and implementation is very large (owing to its orthogonality) and + can lead to difficulty in understanding its exact functionality. +3. An obvious dependency on gnu tools (confusing make configure step) +4. Uses a reference counting system, meaning that it is not likely to be + thread safe. + +The implementation has an extreme emphasis on performance for nontrivial +actions (adds, inserts and deletes are all constant or roughly O(#operations) +time) following the "zero copy" principle. This trades off performance of +trivial functions (character access, char buffer access/coersion, alias +detection) which becomes significantly slower, as well as incremental +accumulative costs for its searching/parsing functions. Whether or not Vstr +wins any particular performance benchmark will depend a lot on the benchmark, +but it should handily win on some, while losing dreadfully on others. + +The learning curve for Vstr is very steep, and it doesn't come with any +obvious way to build for Windows or other platforms without gnu tools. At +least one mechanism (the iterator) introduces a new undefined scenario +(writing to a Vstr while iterating through it.) Vstr has a very large +footprint, and is very ambitious in its total functionality. Vstr has no C++ +API. + +Vstr usage requires context initialization via vstr_init() which must be run +in a thread-local context. Given the totally reference based architecture +this means that sharing Vstrings across threads is not well defined, or at +least not safe from race conditions. This API is clearly geared to the older +standard of fork() style multitasking in UNIX, and is not safely transportable +to modern shared memory multithreading available in Linux and Windows. There +is no portable external solution making the library thread safe (since it +requires a mutex around each Vstr context -- not each string.) + +In the documentation for this library, a big deal is made of its self hosted +s(n)printf-like function. This is an issue for older compilers that don't +include vsnprintf(), but also an issue because Vstr has a slow conversion to +'\0' terminated char * mechanism. That is to say, using "%s" to format data +that originates from Vstr would be slow without some sort of native function +to do so. Bstrlib sidesteps the issue by relying on what snprintf-like +functionality does exist and having a high performance conversion to a char * +compatible string so that "%s" can be used directly. + +Str Library +----------- + +This is a fairly extensive string library, that includes full unicode support +and targetted at the goal of out performing MFC and STL. The architecture, +similarly to MFC's CStrings, is a copy on write reference counting mechanism. + +http://www.utilitycode.com/str/default.aspx + +1. Commercial. +2. C++ only. + +This library, like Vstr, uses a ref counting system. There is only so deeply +I can analyze it, since I don't have a license for it. However, performance +improvements over MFC's and STL, doesn't seem like a sufficient reason to +move your source base to it. For example, in the future, Microsoft may +improve the performance CString. + +It should be pointed out that performance testing of Bstrlib has indicated +that its relative performance advantage versus MFC's CString and STL's +std::string is at least as high as that for the Str library. + +libmib astrings +--------------- + +A handful of functional extensions to the C library that add dynamic string +functionality. +http://www.mibsoftware.com/libmib/astring/ + +This package basically references strings through char ** pointers and assumes +they are pointing to the top of an allocated heap entry (or NULL, in which +case memory will be newly allocated from the heap.) So its still up to user +to mix and match the older C string functions with these functions whenever +pointer arithmetic is used (i.e., there is no leveraging of the type system +to assert semantic differences between references and base strings as Bstrlib +does since no new types are introduced.) Unlike Bstrlib, exact string length +meta data is not stored, thus requiring a strlen() call on *every* string +writing operation. The library is very small, covering only a handful of C's +functions. + +While this is better than nothing, it is clearly slower than even the +standard C library, less safe and less functional than Bstrlib. + +To explain the advantage of using libmib, their website shows an example of +how dangerous C code: + + char buf[256]; + char *pszExtraPath = ";/usr/local/bin"; + + strcpy(buf,getenv("PATH")); /* oops! could overrun! */ + strcat(buf,pszExtraPath); /* Could overrun as well! */ + + printf("Checking...%s\n",buf); /* Some printfs overrun too! */ + +is avoided using libmib: + + char *pasz = 0; /* Must initialize to 0 */ + char *paszOut = 0; + char *pszExtraPath = ";/usr/local/bin"; + + if (!astrcpy(&pasz,getenv("PATH"))) /* malloc error */ exit(-1); + if (!astrcat(&pasz,pszExtraPath)) /* malloc error */ exit(-1); + + /* Finally, a "limitless" printf! we can use */ + asprintf(&paszOut,"Checking...%s\n",pasz);fputs(paszOut,stdout); + + astrfree(&pasz); /* Can use free(pasz) also. */ + astrfree(&paszOut); + +However, compare this to Bstrlib: + + bstring b, out; + + bcatcstr (b = bfromcstr (getenv ("PATH")), ";/usr/local/bin"); + out = bformat ("Checking...%s\n", bdatae (b, "")); + /* if (out && b) */ fputs (bdatae (out, ""), stdout); + bdestroy (b); + bdestroy (out); + +Besides being shorter, we can see that error handling can be deferred right +to the very end. Also, unlike the above two versions, if getenv() returns +with NULL, the Bstrlib version will not exhibit undefined behavior. +Initialization starts with the relevant content rather than an extra +autoinitialization step. + +libclc +------ + +An attempt to add to the standard C library with a number of common useful +functions, including additional string functions. +http://libclc.sourceforge.net/ + +1. Uses standard char * buffer, and adopts C 99's usage of "restrict" to pass + the responsibility to guard against aliasing to the programmer. +2. Adds no safety or memory management whatsoever. +3. Most of the supplied string functions are completely trivial. + +The goals of libclc and Bstrlib are clearly quite different. + +fireString +---------- + +http://firestuff.org/ + +1. Uses standard char * buffer, and adopts C 99's usage of "restrict" to pass + the responsibility to guard against aliasing to the programmer. +2. Mixes char * and length wrapped buffers (estr) functions, doubling the API + size, with safety limited to only half of the functions. + +Firestring was originally just a wrapper of char * functionality with extra +length parameters. However, it has been augmented with the inclusion of the +estr type which has similar functionality to stralloc. But firestring does +not nearly cover the functional scope of Bstrlib. + +Safe C String Library +--------------------- + +A library written for the purpose of increasing safety and power to C's string +handling capabilities. +http://www.zork.org/safestr/safestr.html + +1. While the safestr_* functions are safe in of themselves, interoperating + with char * string has dangerous unsafe modes of operation. +2. The architecture of safestr's causes the base pointer to change. Thus, + its not practical/safe to store a safestr in multiple locations if any + single instance can be manipulated. +3. Dependent on an additional error handling library. +4. Uses reference counting, meaning that it is either not thread safe or + slow and not portable. + +I think the idea of reallocating (and hence potentially changing) the base +pointer is a serious design flaw that is fatal to this architecture. True +safety is obtained by having automatic handling of all common scenarios +without creating implicit constraints on the user. + +Because of its automatic temporary clean up system, it cannot use "const" +semantics on input arguments. Interesting anomolies such as: + + safestr_t s, t; + s = safestr_replace (t = SAFESTR_TEMP ("This is a test"), + SAFESTR_TEMP (" "), SAFESTR_TEMP (".")); + /* t is now undefined. */ + +are possible. If one defines a function which takes a safestr_t as a +parameter, then the function would not know whether or not the safestr_t is +defined after it passes it to a safestr library function. The author +recommended method for working around this problem is to examine the +attributes of the safestr_t within the function which is to modify any of +its parameters and play games with its reference count. I think, therefore, +that the whole SAFESTR_TEMP idea is also fatally broken. + +The library implements immutability, optional non-resizability, and a "trust" +flag. This trust flag is interesting, and suggests that applying any +arbitrary sequence of safestr_* function calls on any set of trusted strings +will result in a trusted string. It seems to me, however, that if one wanted +to implement a trusted string semantic, one might do so by actually creating +a different *type* and only implement the subset of string functions that are +deemed safe (i.e., user input would be excluded, for example.) This, in +essence, would allow the compiler to enforce trust propogation at compile +time rather than run time. Non-resizability is also interesting, however, +it seems marginal (i.e., to want a string that cannot be resized, yet can be +modified and yet where a fixed sized buffer is undesirable.) + +=============================================================================== + +Examples +-------- + + Dumping a line numbered file: + + FILE * fp; + int i, ret; + struct bstrList * lines; + struct tagbstring prefix = bsStatic ("-> "); + + if (NULL != (fp = fopen ("bstrlib.txt", "rb"))) { + bstring b = bread ((bNread) fread, fp); + fclose (fp); + if (NULL != (lines = bsplit (b, '\n'))) { + for (i=0; i < lines->qty; i++) { + binsert (lines->entry[i], 0, &prefix, '?'); + printf ("%04d: %s\n", i, bdatae (lines->entry[i], "NULL")); + } + bstrListDestroy (lines); + } + bdestroy (b); + } + +For numerous other examples, see bstraux.c, bstraux.h and the example archive. + +=============================================================================== + +License +------- + +This is a fork of The Better String Library, licensed under the 3-clause BSD +license (see the accompanying license.txt). The original work is available under +either the 3 clause BSD license or the Gnu Public License version 2 at the option +of the user. + +=============================================================================== + +Acknowledgements +---------------- + +The following individuals have made significant contributions to the design +and testing of the Better String Library: + +Bjorn Augestad +Clint Olsen +Darryl Bleau +Fabian Cenedese +Graham Wideman +Ignacio Burgueno +International Business Machines Corporation +Ira Mica +John Kortink +Manuel Woelker +Marcel van Kervinck +Michael Hsieh +Richard A. Smith +Simon Ekstrom +Wayne Scott + +=============================================================================== diff --git a/src/cbstring/license.txt b/src/cbstring/license.txt new file mode 100644 index 0000000..cf78a98 --- /dev/null +++ b/src/cbstring/license.txt @@ -0,0 +1,29 @@ +Copyright (c) 2002-2008 Paul Hsieh +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + Neither the name of bstrlib nor the names of its contributors may be used + to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + diff --git a/src/cbstring/porting.txt b/src/cbstring/porting.txt new file mode 100644 index 0000000..11d8d13 --- /dev/null +++ b/src/cbstring/porting.txt @@ -0,0 +1,172 @@ +Better String library Porting Guide +----------------------------------- + +by Paul Hsieh + +The bstring library is an attempt to provide improved string processing +functionality to the C and C++ language. At the heart of the bstring library +is the management of "bstring"s which are a significant improvement over '\0' +terminated char buffers. See the accompanying documenation file bstrlib.txt +for more information. + +=============================================================================== + +Identifying the Compiler +------------------------ + +Bstrlib has been tested on the following compilers: + + Microsoft Visual C++ + Watcom C/C++ (32 bit flat) + Intel's C/C++ compiler (on Windows) + The GNU C/C++ compiler (on Windows/Linux on x86 and PPC64) + Borland C++ + Turbo C + +There are slight differences in these compilers which requires slight +differences in the implementation of Bstrlib. These are accomodated in the +same sources using #ifdef/#if defined() on compiler specific macros. To +port Bstrlib to a new compiler not listed above, it is recommended that the +same strategy be followed. If you are unaware of the compiler specific +identifying preprocessor macro for your compiler you might find it here: + +http://predef.sourceforge.net/precomp.html + +Note that Intel C/C++ on Windows sets the Microsoft identifier: _MSC_VER. + +16-bit vs. 32-bit vs. 64-bit Systems +------------------------------------ + +Bstrlib has been architected to deal with strings of length between 0 and +INT_MAX (inclusive). Since the values of int are never higher than size_t +there will be no issue here. Note that on most 64-bit systems int is 32-bit. + +Dependency on The C-Library +--------------------------- + +Bstrlib uses the functions memcpy, memmove, malloc, realloc, free and +vsnprintf. Many free standing C compiler implementations that have a mode in +which the C library is not available will typically not include these +functions which will make porting Bstrlib to it onerous. Bstrlib is not +designed for such bare bones compiler environments. This usually includes +compilers that target ROM environments. + +Porting Issues +-------------- + +Bstrlib has been written completely in ANSI/ISO C and ISO C++, however, there +are still a few porting issues. These are described below. + +1. The vsnprintf () function. + +Unfortunately, the earlier ANSI/ISO C standards did not include this function. +If the compiler of interest does not support this function then the +BSTRLIB_NOVSNP should be defined via something like: + + #if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP) + # if defined (__TURBOC__) || defined (__COMPILERVENDORSPECIFICMACRO__) + # define BSTRLIB_NOVSNP + # endif + #endif + +which appears at the top of bstrlib.h. Note that the bformat(a) functions +will not be declared or implemented if the BSTRLIB_NOVSNP macro is set. If +the compiler has renamed vsnprintf() to some other named function, then +search for the definition of the exvsnprintf macro in bstrlib.c file and be +sure its defined appropriately: + + #if defined (__COMPILERVENDORSPECIFICMACRO__) + # define exvsnprintf(r,b,n,f,a) {r=__compiler_specific_vsnprintf(b,n,f,a);} + #else + # define exvsnprintf(r,b,n,f,a) {r=vsnprintf(b,n,f,a);} + #endif + +Take notice of the return value being captured in the variable r. It is +assumed that r exceeds n if and only if the underlying vsnprintf function has +determined what the true maximal output length would be for output if the +buffer were large enough to hold it. Non-modern implementations must output a +lesser number (the macro can and should be modified to ensure this). + +2. Weak C++ compiler. + +C++ is a much more complicated language to implement than C. This has lead +to varying quality of compiler implementations. The weaknesses isolated in +the initial ports are inclusion of the Standard Template Library, +std::iostream and exception handling. By default it is assumed that the C++ +compiler supports all of these things correctly. If your compiler does not +support one or more of these define the corresponding macro: + + BSTRLIB_CANNOT_USE_STL + BSTRLIB_CANNOT_USE_IOSTREAM + BSTRLIB_DOESNT_THROW_EXCEPTIONS + +The compiler specific detected macro should be defined at the top of +bstrwrap.h in the Configuration defines section. Note that these disabling +macros can be overrided with the associated enabling macro if a subsequent +version of the compiler gains support. (For example, its possible to rig +up STLport to provide STL support for WATCOM C/C++, so -DBSTRLIB_CAN_USE_STL +can be passed in as a compiler option.) + +3. The bsafe module, and reserved words. + +The bsafe module is in gross violation of the ANSI/ISO C standard in the +sense that it redefines what could be implemented as reserved words on a +given compiler. The typical problem is that a compiler may inline some of the +functions and thus not be properly overridden by the definitions in the bsafe +module. It is also possible that a compiler may prohibit the redefinitions in +the bsafe module. Compiler specific action will be required to deal with +these situations. + +Platform Specific Files +----------------------- + +The makefiles for the examples are basically setup of for particular +environments for each platform. In general these makefiles are not portable +and should be constructed as necessary from scratch for each platform. + +Testing a port +-------------- + +To test that a port compiles correctly do the following: + +1. Build a sample project that includes the bstrlib, bstraux, bstrwrap, and + bsafe modules. +2. Compile bstest against the bstrlib module. +3. Run bstest and ensure that 0 errors are reported. +4. Compile test against the bstrlib and bstrwrap modules. +5. Run test and ensure that 0 errors are reported. +6. Compile each of the examples (except for the "re" example, which may be + complicated and is not a real test of bstrlib and except for the mfcbench + example which is Windows specific.) +7. Run each of the examples. + +The builds must have 0 errors, and should have the absolute minimum number of +warnings (in most cases can be reduced to 0.) The result of execution should +be essentially identical on each platform. + +Performance +----------- + +Different CPU and compilers have different capabilities in terms of +performance. It is possible for Bstrlib to assume performance +characteristics that a platform doesn't have (since it was primarily +developed on just one platform). The goal of Bstrlib is to provide very good +performance on all platforms regardless of this but without resorting to +extreme measures (such as using assembly language, or non-portable intrinsics +or library extensions.) + +There are two performance benchmarks that can be found in the example/ +directory. They are: cbench.c and cppbench.cpp. These are variations and +expansions of a benchmark for another string library. They don't cover all +string functionality, but do include the most basic functions which will be +common in most string manipulation kernels. + +............................................................................... + +Feedback +-------- + +In all cases, you may email issues found to the primary author of Bstrlib at +the email address: websnarf@users.sourceforge.net + +=============================================================================== diff --git a/src/cbstring/security.txt b/src/cbstring/security.txt new file mode 100644 index 0000000..9761409 --- /dev/null +++ b/src/cbstring/security.txt @@ -0,0 +1,221 @@ +Better String library Security Statement +---------------------------------------- + +by Paul Hsieh + +=============================================================================== + +Introduction +------------ + +The Better String library (hereafter referred to as Bstrlib) is an attempt to +provide improved string processing functionality to the C and C++ languages. +At the heart of the Bstrlib is the management of "bstring"s which are a +significant improvement over '\0' terminated char buffers. See the +accompanying documenation file bstrlib.txt for more information. + +DISCLAIMER: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT +NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Like any software, there is always a possibility of failure due to a flawed +implementation. Nevertheless a good faith effort has been made to minimize +such flaws in Bstrlib. Also, use of Bstrlib by itself will not make an +application secure or free from implementation failures. However, it is the +author's conviction that use of Bstrlib can greatly facilitate the creation +of software meeting the highest possible standards of security. + +Part of the reason why this document has been created, is for the purpose of +security auditing, or the creation of further "Statements on Security" for +software that is created that uses Bstrlib. An auditor may check the claims +below against Bstrlib, and use this as a basis for analysis of software which +uses Bstrlib. + +=============================================================================== + +Statement on Security +--------------------- + +This is a document intended to give consumers of the Better String Library +who are interested in security an idea of where the Better String Library +stands on various security issues. Any deviation observed in the actual +library itself from the descriptions below should be considered an +implementation error, not a design flaw. + +This statement is not an analytical proof of correctness or an outline of one +but rather an assertion similar to a scientific claim or hypothesis. By use, +testing and open independent examination (otherwise known as scientific +falsifiability), the credibility of the claims made below can rise to the +level of an established theory. + +Common security issues: +....................... + +1. Buffer Overflows + +The Bstrlib API allows the programmer a way to deal with strings without +having to deal with the buffers containing them. Ordinary usage of the +Bstrlib API itself makes buffer overflows impossible. + +Furthermore, the Bstrlib API has a superset of basic string functionality as +compared to the C library's char * functions, C++'s std::string class and +Microsoft's MFC based CString class. It also has abstracted mechanisms for +dealing with IO. This is important as it gives developers a way of migrating +all their code from a functionality point of view. + +2. Memory size overflow/wrap around attack + +Bstrlib is, by design, impervious to memory size overflow attacks. The +reason is it is resiliant to length overflows is that bstring lengths are +bounded above by INT_MAX, instead of ~(size_t)0. So length addition +overflows cause a wrap around of the integer value making them negative +causing balloc() to fail before an erroneous operation can occurr. Attempted +conversions of char * strings which may have lengths greater than INT_MAX are +detected and the conversion is aborted. + +It is unknown if this property holds on machines that don't represent +integers as 2s complement. It is recommended that Bstrlib be carefully +auditted by anyone using a system which is not 2s complement based. + +3. Constant string protection + +Bstrlib implements runtime enforced constant and read-only string semantics. +I.e., bstrings which are declared as constant via the bsStatic() macro cannot +be modified or deallocated directly through the Bstrlib API, and this cannot +be subverted by casting or other type coercion. This is independent of the +use of the const_bstring data type. + +The Bstrlib C API uses the type const_bstring to specify bstring parameters +whose contents do not change. Although the C language cannot enforce this, +this is nevertheless guaranteed by the implementation of the Bstrlib library +of C functions. The C++ API enforces the const attribute on CBString types +correctly. + +4. Aliased bstring support + +Bstrlib detects and supports aliased parameter management throughout the API. +The kind of aliasing that is allowed is the one where pointers of the same +basic type may be pointing to overlapping objects (this is the assumption the +ANSI C99 specification makes.) Each function behaves as if all read-only +parameters were copied to temporaries which are used in their stead before +the function is enacted (it rarely actually does this). No function in the +Bstrlib uses the "restrict" parameter attribute from the ANSI C99 +specification. + +5. Information leaking + +In bstraux.h, using the semantically equivalent macros bSecureDestroy() and +bSecureWriteProtect() in place of bdestroy() and bwriteprotect() respectively +will ensure that stale data does not linger in the heap's free space after +strings have been released back to memory. Created bstrings or CBStrings +are not linked to anything external to themselves, and thus cannot expose +deterministic data leaking. If a bstring is resized, the preimage may exist +as a copy that is released to the heap. Thus for sensitive data, the bstring +should be sufficiently presized before manipulated so that it is not resized. +bSecureInput() has been supplied in bstraux.c, which can be used to obtain +input securely without any risk of leaving any part of the input image in the +heap except for the allocated bstring that is returned. + +6. Memory leaking + +Bstrlib can be built using memdbg.h enabled via the BSTRLIB_MEMORY_DEBUG +macro. User generated definitions for malloc, realloc and free can then be +supplied which can implement special strategies for memory corruption +detection or memory leaking. Otherwise, bstrlib does not do anything out of +the ordinary to attempt to deal with the standard problem of memory leaking +(i.e., losing references to allocated memory) when programming in the C and +C++ languages. However, it does not compound the problem any more than exists +either, as it doesn't have any intrinsic inescapable leaks in it. Bstrlib +does not preclude the use of automatic garbage collection mechanisms such as +the Boehm garbage collector. + +7. Encryption + +Bstrlib does not present any built-in encryption mechanism. However, it +supports full binary contents in its data buffers, so any standard block +based encryption mechanism can make direct use of bstrings/CBStrings for +buffer management. + +8. Double freeing + +Freeing a pointer that is already free is an extremely rare, but nevertheless +a potentially ruthlessly corrupting operation (its possible to cause Win 98 to +reboot, by calling free mulitiple times on already freed data using the WATCOM +CRT.) Bstrlib invalidates the bstring header data before freeing, so that in +many cases a double free will be detected and an error will be reported +(though this behaviour is not guaranteed and should not be relied on). + +Using bstrFree pervasively (instead of bdestroy) can lead to somewhat +improved invalid free avoidance (it is completely safe whenever bstring +instances are only stored in unique variables). For example: + + struct tagbstring hw = bsStatic ("Hello, world"); + bstring cpHw = bstrcpy (&hw); + + #ifdef NOT_QUITE_AS_SAFE + bdestroy (cpHw); /* Never fail */ + bdestroy (cpHw); /* Error sometimes detected at runtime */ + bdestroy (&hw); /* Error detected at run time */ + #else + bstrFree (cpHw); /* Never fail */ + bstrFree (cpHw); /* Will do nothing */ + bstrFree (&hw); /* Will lead to a compile time error */ + #endif + +9. Resource based denial of service + +bSecureInput() has been supplied in bstraux.c. It has an optional upper limit +for input length. But unlike fgets(), it is also easily determined if the +buffer has been truncated early. In this way, a program can set an upper limit +on input sizes while still allowing for implementing context specific +truncation semantics (i.e., does the program consume but dump the extra +input, or does it consume it in later inputs?) + +10. Mixing char *'s and bstrings + +The bstring and char * representations are not identical. So there is a risk +when converting back and forth that data may lost. Essentially bstrings can +contain '\0' as a valid non-terminating character, while char * strings +cannot and in fact must use the character as a terminator. The risk of data +loss is very low, since: + + A) the simple method of only using bstrings in a char * semantically + compatible way is both easy to achieve and pervasively supported. + B) obtaining '\0' content in a string is either deliberate or indicative + of another, likely more serious problem in the code. + C) the library comes with various functions which deal with this issue + (namely: bfromcstr(), bstr2cstr (), and bSetCstrChar ()) + +Marginal security issues: +......................... + +11. 8-bit versus 9-bit portability + +Bstrlib uses CHAR_BIT and other limits.h constants to the maximum extent +possible to avoid portability problems. However, Bstrlib has not been tested +on any system that does not represent char as 8-bits. So whether or not it +works on 9-bit systems is an open question. It is recommended that Bstrlib be +carefully auditted by anyone using a system in which CHAR_BIT is not 8. + +12. EBCDIC/ASCII/UTF-8 data representation attacks. + +Bstrlib uses ctype.h functions to ensure that it remains portable to non- +ASCII systems. It also checks range to make sure it is well defined even for +data that ANSI does not define for the ctype functions. + +Obscure issues: +............... + +13. Data attributes + +There is no support for a Perl-like "taint" attribute, however, an example of +how to do this using C++'s type system is given as an example. + diff --git a/src/decode.cpp b/src/decode.cpp new file mode 100644 index 0000000..cd13045 --- /dev/null +++ b/src/decode.cpp @@ -0,0 +1,1639 @@ + +#include "internal_includes/tokens.h" +#include "internal_includes/decode.h" +#include "stdlib.h" +#include "stdio.h" +#include "internal_includes/reflect.h" +#include "internal_includes/debug.h" +#include "internal_includes/toGLSLOperand.h" +#include "internal_includes/Shader.h" +#include "internal_includes/Instruction.h" +#include "internal_includes/Declaration.h" + +#define FOURCC(a, b, c, d) ((uint32_t)(uint8_t)(a) | ((uint32_t)(uint8_t)(b) << 8) | ((uint32_t)(uint8_t)(c) << 16) | ((uint32_t)(uint8_t)(d) << 24 )) +enum { FOURCC_DXBC = FOURCC('D', 'X', 'B', 'C') }; //DirectX byte code +enum { FOURCC_SHDR = FOURCC('S', 'H', 'D', 'R') }; //Shader model 4 code +enum { FOURCC_SHEX = FOURCC('S', 'H', 'E', 'X') }; //Shader model 5 code +enum { FOURCC_RDEF = FOURCC('R', 'D', 'E', 'F') }; //Resource definition (e.g. constant buffers) +enum { FOURCC_ISGN = FOURCC('I', 'S', 'G', 'N') }; //Input signature +enum { FOURCC_IFCE = FOURCC('I', 'F', 'C', 'E') }; //Interface (for dynamic linking) +enum { FOURCC_OSGN = FOURCC('O', 'S', 'G', 'N') }; //Output signature +enum { FOURCC_PSGN = FOURCC('P', 'C', 'S', 'G') }; //Patch-constant signature + +enum { FOURCC_ISG1 = FOURCC('I', 'S', 'G', '1') }; //Input signature with Stream and MinPrecision +enum { FOURCC_OSG1 = FOURCC('O', 'S', 'G', '1') }; //Output signature with Stream and MinPrecision +enum { FOURCC_OSG5 = FOURCC('O', 'S', 'G', '5') }; //Output signature with Stream +enum { FOURCC_PSG1 = FOURCC('P', 'S', 'G', '1') }; //Patch constant signature with MinPrecision + +enum { FOURCC_STAT = FOURCC('S', 'T', 'A', 'T') }; // Chunks that we ignore +enum { FOURCC_SFI0 = FOURCC('S', 'F', 'I', '0') }; // Chunks that we ignore + + +typedef struct DXBCContainerHeaderTAG +{ + unsigned fourcc; + uint32_t unk[4]; + uint32_t one; + uint32_t totalSize; + uint32_t chunkCount; +} DXBCContainerHeader; + +typedef struct DXBCChunkHeaderTAG +{ + unsigned fourcc; + unsigned size; +} DXBCChunkHeader; + +#ifdef _DEBUG +static uint64_t operandID = 0; +static uint64_t instructionID = 0; +#endif + +void DecodeNameToken(const uint32_t* pui32NameToken, Operand* psOperand) +{ + psOperand->eSpecialName = DecodeOperandSpecialName(*pui32NameToken); + switch(psOperand->eSpecialName) + { + case NAME_UNDEFINED: + { + psOperand->specialName = "undefined"; + break; + } + case NAME_POSITION: + { + psOperand->specialName = "position"; + break; + } + case NAME_CLIP_DISTANCE: + { + psOperand->specialName = "clipDistance"; + break; + } + case NAME_CULL_DISTANCE: + { + psOperand->specialName = "cullDistance"; + break; + } + case NAME_RENDER_TARGET_ARRAY_INDEX: + { + psOperand->specialName = "renderTargetArrayIndex"; + break; + } + case NAME_VIEWPORT_ARRAY_INDEX: + { + psOperand->specialName = "viewportArrayIndex"; + break; + } + case NAME_VERTEX_ID: + { + psOperand->specialName = "vertexID"; + break; + } + case NAME_PRIMITIVE_ID: + { + psOperand->specialName = "primitiveID"; + break; + } + case NAME_INSTANCE_ID: + { + psOperand->specialName = "instanceID"; + break; + } + case NAME_IS_FRONT_FACE: + { + psOperand->specialName = "isFrontFace"; + break; + } + case NAME_SAMPLE_INDEX: + { + psOperand->specialName = "sampleIndex"; + break; + } + //For the quadrilateral domain, there are 6 factors (4 sides, 2 inner). + case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: + case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: + case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: + case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: + + //For the triangular domain, there are 4 factors (3 sides, 1 inner) + case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_INSIDE_TESSFACTOR: + + //For the isoline domain, there are 2 factors (detail and density). + case NAME_FINAL_LINE_DETAIL_TESSFACTOR: + case NAME_FINAL_LINE_DENSITY_TESSFACTOR: + { + psOperand->specialName = "tessFactor"; + break; + } + default: + { + ASSERT(0); + break; + } + } + + return; +} + +// Find the declaration of the texture described by psTextureOperand and +// mark it as a shadow type. (e.g. accessed via sampler2DShadow rather than sampler2D) +static void MarkTextureAsShadow(ShaderInfo* psShaderInfo, std::vector &declarations, const Operand* psTextureOperand) +{ + ASSERT(psTextureOperand->eType == OPERAND_TYPE_RESOURCE); + + for (std::vector::iterator psDecl = declarations.begin(); psDecl != declarations.end(); psDecl++) + { + if(psDecl->eOpcode == OPCODE_DCL_RESOURCE) + { + if(psDecl->asOperands[0].eType == OPERAND_TYPE_RESOURCE && + psDecl->asOperands[0].ui32RegisterNumber == psTextureOperand->ui32RegisterNumber) + { + psDecl->ui32IsShadowTex = 1; + break; + } + } + } +} + +static void MarkTextureSamplerPair(ShaderInfo* psShaderInfo, std::vector & declarations, const Operand* psTextureOperand, const Operand* psSamplerOperand, TextureSamplerPairs& samplers) +{ + ASSERT(psTextureOperand->eType == OPERAND_TYPE_RESOURCE); + ASSERT(psSamplerOperand->eType == OPERAND_TYPE_SAMPLER); + + for (std::vector::iterator psDecl = declarations.begin(); psDecl != declarations.end(); psDecl++) + { + if(psDecl->eOpcode == OPCODE_DCL_RESOURCE) + { + if(psDecl->asOperands[0].eType == OPERAND_TYPE_RESOURCE && + psDecl->asOperands[0].ui32RegisterNumber == psTextureOperand->ui32RegisterNumber) + { + // psDecl is the texture resource referenced by psTextureOperand + + // add psSamplerOperand->ui32RegisterNumber to list of samplers that use this texture + // set::insert returns a pair of which .second tells whether a new element was actually added + if (psDecl->samplersUsed.insert(psSamplerOperand->ui32RegisterNumber).second) + { + // Record the texturename_X_samplername string in the TextureSamplerPair array that we return to the client + std::string combinedname = TextureSamplerName(psShaderInfo, psTextureOperand->ui32RegisterNumber, psSamplerOperand->ui32RegisterNumber, psDecl->ui32IsShadowTex); + samplers.push_back(combinedname); + } + break; + } + } + } +} + +uint32_t DecodeOperand (const uint32_t *pui32Tokens, Operand* psOperand) +{ + int i; + uint32_t ui32NumTokens = 1; + OPERAND_NUM_COMPONENTS eNumComponents; + +#ifdef _DEBUG + psOperand->id = operandID++; +#endif + + //Some defaults + psOperand->iWriteMaskEnabled = 1; + psOperand->iGSInput = 0; + psOperand->iPSInOut = 0; + psOperand->aeDataType[0] = SVT_FLOAT; + psOperand->aeDataType[1] = SVT_FLOAT; + psOperand->aeDataType[2] = SVT_FLOAT; + psOperand->aeDataType[3] = SVT_FLOAT; + + psOperand->iExtended = DecodeIsOperandExtended(*pui32Tokens); + + + psOperand->eModifier = OPERAND_MODIFIER_NONE; + psOperand->m_SubOperands[0].reset(); + psOperand->m_SubOperands[1].reset(); + psOperand->m_SubOperands[2].reset(); + + psOperand->eMinPrecision = OPERAND_MIN_PRECISION_DEFAULT; + + /* Check if this instruction is extended. If it is, + * we need to print the information first */ + if (psOperand->iExtended) + { + /* OperandToken1 is the second token */ + ui32NumTokens++; + + if(DecodeExtendedOperandType(pui32Tokens[1]) == EXTENDED_OPERAND_MODIFIER) + { + psOperand->eModifier = DecodeExtendedOperandModifier(pui32Tokens[1]); + psOperand->eMinPrecision = (OPERAND_MIN_PRECISION)DecodeOperandMinPrecision(pui32Tokens[1]); + } + + } + + psOperand->iIndexDims = DecodeOperandIndexDimension(*pui32Tokens); + psOperand->eType = DecodeOperandType(*pui32Tokens); + + psOperand->ui32RegisterNumber = 0; + + eNumComponents = DecodeOperandNumComponents(*pui32Tokens); + + if (psOperand->eType == OPERAND_TYPE_INPUT_GS_INSTANCE_ID) + { + eNumComponents = OPERAND_1_COMPONENT; + psOperand->aeDataType[0] = SVT_UINT; + } + + switch(eNumComponents) + { + case OPERAND_1_COMPONENT: + { + psOperand->iNumComponents = 1; + break; + } + case OPERAND_4_COMPONENT: + { + psOperand->iNumComponents = 4; + break; + } + default: + { + psOperand->iNumComponents = 0; + break; + } + } + + if(psOperand->iWriteMaskEnabled && + psOperand->iNumComponents == 4) + { + psOperand->eSelMode = DecodeOperand4CompSelMode(*pui32Tokens); + + if(psOperand->eSelMode == OPERAND_4_COMPONENT_MASK_MODE) + { + psOperand->ui32CompMask = DecodeOperand4CompMask(*pui32Tokens); + } + else + if(psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + psOperand->ui32Swizzle = DecodeOperand4CompSwizzle(*pui32Tokens); + + if(psOperand->ui32Swizzle != NO_SWIZZLE) + { + psOperand->aui32Swizzle[0] = DecodeOperand4CompSwizzleSource(*pui32Tokens, 0); + psOperand->aui32Swizzle[1] = DecodeOperand4CompSwizzleSource(*pui32Tokens, 1); + psOperand->aui32Swizzle[2] = DecodeOperand4CompSwizzleSource(*pui32Tokens, 2); + psOperand->aui32Swizzle[3] = DecodeOperand4CompSwizzleSource(*pui32Tokens, 3); + } + else + { + psOperand->aui32Swizzle[0] = OPERAND_4_COMPONENT_X; + psOperand->aui32Swizzle[1] = OPERAND_4_COMPONENT_Y; + psOperand->aui32Swizzle[2] = OPERAND_4_COMPONENT_Z; + psOperand->aui32Swizzle[3] = OPERAND_4_COMPONENT_W; + } + } + else + if(psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + { + psOperand->aui32Swizzle[0] = DecodeOperand4CompSel1(*pui32Tokens); + } + } + + if(psOperand->eType == OPERAND_TYPE_IMMEDIATE32) + { + for(i=0; i< psOperand->iNumComponents; ++i) + { + psOperand->afImmediates[i] = *((float*)(&pui32Tokens[ui32NumTokens])); + ui32NumTokens ++; + } + } + else + if(psOperand->eType == OPERAND_TYPE_IMMEDIATE64) + { + for(i=0; i< psOperand->iNumComponents; ++i) + { + psOperand->adImmediates[i] = *((double*)(&pui32Tokens[ui32NumTokens])); + ui32NumTokens +=2; + } + } + + if(psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL || + psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL) + { + psOperand->ui32RegisterNumber = -1; + psOperand->ui32CompMask = -1; + } + + // Used only for Metal + if(psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH) + { + psOperand->ui32RegisterNumber = 0; + psOperand->ui32CompMask = 1; + } + + for(i=0; i iIndexDims; ++i) + { + OPERAND_INDEX_REPRESENTATION eRep = DecodeOperandIndexRepresentation(i ,*pui32Tokens); + + psOperand->eIndexRep[i] = eRep; + + psOperand->aui32ArraySizes[i] = 0; + psOperand->ui32RegisterNumber = 0; + + switch(eRep) + { + case OPERAND_INDEX_IMMEDIATE32: + { + psOperand->ui32RegisterNumber = *(pui32Tokens+ui32NumTokens); + psOperand->aui32ArraySizes[i] = psOperand->ui32RegisterNumber; + break; + } + case OPERAND_INDEX_RELATIVE: + { + psOperand->m_SubOperands[i].reset(new Operand()); + DecodeOperand(pui32Tokens+ui32NumTokens, psOperand->m_SubOperands[i].get()); + + ui32NumTokens++; + break; + } + case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: + { + psOperand->ui32RegisterNumber = *(pui32Tokens+ui32NumTokens); + psOperand->aui32ArraySizes[i] = psOperand->ui32RegisterNumber; + + ui32NumTokens++; + + psOperand->m_SubOperands[i].reset(new Operand()); + DecodeOperand(pui32Tokens + ui32NumTokens, psOperand->m_SubOperands[i].get()); + + ui32NumTokens++; + break; + } + default: + { + ASSERT(0); + break; + } + } + + // Indices should be ints + switch(eRep) + { + case OPERAND_INDEX_IMMEDIATE32: + case OPERAND_INDEX_RELATIVE: + case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: + { + int j = 0; + for(; j < psOperand->iNumComponents; j++) + { + psOperand->aeDataType[j] = SVT_INT; + } + break; + } + default: + { + break; + } + } + ui32NumTokens++; + } + + psOperand->specialName = ""; + + return ui32NumTokens; +} + +const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, Declaration* psDecl, ShaderPhase *psPhase) +{ + uint32_t ui32TokenLength = DecodeInstructionLength(*pui32Token); + const uint32_t bExtended = DecodeIsOpcodeExtended(*pui32Token); + const OPCODE_TYPE eOpcode = DecodeOpcodeType(*pui32Token); + uint32_t ui32OperandOffset = 1; + + if(eOpcode < NUM_OPCODES && eOpcode >= 0) + { + psShader->aiOpcodeUsed[eOpcode] = 1; + } + + psDecl->eOpcode = eOpcode; + + psDecl->ui32IsShadowTex = 0; + + if(bExtended) + { + ui32OperandOffset = 2; + } + + switch (eOpcode) + { + case OPCODE_DCL_RESOURCE: // DCL* opcodes have + { + psDecl->value.eResourceDimension = DecodeResourceDimension(*pui32Token); + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_CONSTANT_BUFFER: // custom operand formats. + { + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_SAMPLER: + { + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_INDEX_RANGE: + { + int regSpace = 0; + psDecl->ui32NumOperands = 1; + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + psDecl->value.ui32IndexRange = pui32Token[ui32OperandOffset]; + + regSpace = psDecl->asOperands[0].GetRegisterSpace(psShader->eShaderType, psPhase->ePhase); + if(psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT) + { + uint32_t i; + const uint32_t indexRange = psDecl->value.ui32IndexRange; + const uint32_t reg = psDecl->asOperands[0].ui32RegisterNumber; + + psShader->aIndexedInput[regSpace][reg] = indexRange; + psShader->aIndexedInputParents[regSpace][reg] = reg; + + //-1 means don't declare this input because it falls in + //the range of an already declared array. + for(i=reg+1; iaIndexedInput[regSpace][i] = -1; + psShader->aIndexedInputParents[regSpace][i] = reg; + } + } + + if(psDecl->asOperands[0].eType == OPERAND_TYPE_OUTPUT) + { + psShader->aIndexedOutput[regSpace][psDecl->asOperands[0].ui32RegisterNumber] = true;; + } + break; + } + case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: + { + psDecl->value.eOutputPrimitiveTopology = DecodeGSOutputPrimitiveTopology(*pui32Token); + break; + } + case OPCODE_DCL_GS_INPUT_PRIMITIVE: + { + psDecl->value.eInputPrimitive = DecodeGSInputPrimitive(*pui32Token); + break; + } + case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: + { + psDecl->value.ui32MaxOutputVertexCount = pui32Token[1]; + break; + } + case OPCODE_DCL_TESS_PARTITIONING: + { + psDecl->value.eTessPartitioning = DecodeTessPartitioning(*pui32Token); + break; + } + case OPCODE_DCL_TESS_DOMAIN: + { + psDecl->value.eTessDomain = DecodeTessDomain(*pui32Token); + break; + } + case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: + { + psDecl->value.eTessOutPrim = DecodeTessOutPrim(*pui32Token); + break; + } + case OPCODE_DCL_THREAD_GROUP: + { + psDecl->value.aui32WorkGroupSize[0] = pui32Token[1]; + psDecl->value.aui32WorkGroupSize[1] = pui32Token[2]; + psDecl->value.aui32WorkGroupSize[2] = pui32Token[3]; + break; + } + case OPCODE_DCL_INPUT: + { + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_INPUT_SIV: + { + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + if(psShader->eShaderType == PIXEL_SHADER) + { + psDecl->value.eInterpolation = DecodeInterpolationMode(*pui32Token); + + } + break; + } + case OPCODE_DCL_INPUT_PS: + { + psDecl->ui32NumOperands = 1; + psDecl->value.eInterpolation = DecodeInterpolationMode(*pui32Token); + Operand* psOperand = &psDecl->asOperands[0]; + DecodeOperand(pui32Token+ui32OperandOffset, psOperand); + + ShaderInfo::InOutSignature *psSig = NULL; + psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, (const ShaderInfo::InOutSignature**) &psSig); + + /* UNITY_FRAMEBUFFER_FETCH_AVAILABLE + special case mapping for inout color. + + In the fragment shader, setting inout var : SV_Target would result to + compiler error, unless SV_Target is defined to COLOR semantic for compatibility + reasons. Unfortunately, we still need to have a clear distinction between + vertex shader COLOR output and SV_Target, so the following workaround abuses + the fact that semantic names are case insensitive and preprocessor macros + are not. The resulting HLSL bytecode has semantics in case preserving form, + helps code generator to do extra work required for framebuffer fetch + + See also HLSLSupport.cginc + */ + if (psSig->eSystemValueType == NAME_UNDEFINED && + psSig->semanticName.size() == 5 && !strncmp(psSig->semanticName.c_str(), "CoLoR", 5)) + { + // Rename into something more readable, matches output + psSig->semanticName.replace(0, 9, "SV_Target"); + psOperand->iPSInOut = 1; + } + + break; + } + case OPCODE_DCL_INPUT_SGV: + case OPCODE_DCL_INPUT_PS_SGV: + { + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + DecodeNameToken(pui32Token + 3, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_INPUT_PS_SIV: + { + psDecl->ui32NumOperands = 1; + psDecl->value.eInterpolation = DecodeInterpolationMode(*pui32Token); + DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + DecodeNameToken(pui32Token + 3, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_OUTPUT: + { + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_OUTPUT_SGV: + { + break; + } + case OPCODE_DCL_OUTPUT_SIV: + { + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + DecodeNameToken(pui32Token + 3, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_TEMPS: + { + psDecl->value.ui32NumTemps = *(pui32Token+ui32OperandOffset); + break; + } + case OPCODE_DCL_INDEXABLE_TEMP: + { + psDecl->sIdxTemp.ui32RegIndex = *(pui32Token+ui32OperandOffset); + psDecl->sIdxTemp.ui32RegCount = *(pui32Token+ui32OperandOffset+1); + psDecl->sIdxTemp.ui32RegComponentSize = *(pui32Token+ui32OperandOffset+2); + break; + } + case OPCODE_DCL_GLOBAL_FLAGS: + { + psDecl->value.ui32GlobalFlags = DecodeGlobalFlags(*pui32Token); + break; + } + case OPCODE_DCL_INTERFACE: + { + uint32_t func = 0, numClassesImplementingThisInterface, arrayLen, interfaceID; + interfaceID = pui32Token[ui32OperandOffset]; + ui32OperandOffset++; + psDecl->ui32TableLength = pui32Token[ui32OperandOffset]; + ui32OperandOffset++; + + numClassesImplementingThisInterface = DecodeInterfaceTableLength(*(pui32Token+ui32OperandOffset)); + arrayLen = DecodeInterfaceArrayLength(*(pui32Token+ui32OperandOffset)); + + ui32OperandOffset++; + + psDecl->value.interface.ui32InterfaceID = interfaceID; + psDecl->value.interface.ui32NumFuncTables = numClassesImplementingThisInterface; + psDecl->value.interface.ui32ArraySize = arrayLen; + + psShader->funcPointer[interfaceID].ui32NumBodiesPerTable = psDecl->ui32TableLength; + + for(;func < numClassesImplementingThisInterface; ++func) + { + uint32_t ui32FuncTable = *(pui32Token+ui32OperandOffset); + psShader->aui32FuncTableToFuncPointer[ui32FuncTable] = interfaceID; + + psShader->funcPointer[interfaceID].aui32FuncTables[func] = ui32FuncTable; + ui32OperandOffset++; + } + + break; + } + case OPCODE_DCL_FUNCTION_BODY: + { + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_FUNCTION_TABLE: + { + uint32_t ui32Func; + const uint32_t ui32FuncTableID = pui32Token[ui32OperandOffset++]; + const uint32_t ui32NumFuncsInTable = pui32Token[ui32OperandOffset++]; + + for(ui32Func=0; ui32Funcaui32FuncBodyToFuncTable[ui32FuncBodyID] = ui32FuncTableID; + + psShader->funcTable[ui32FuncTableID].aui32FuncBodies[ui32Func] = ui32FuncBodyID; + + } + +// OpcodeToken0 is followed by a DWORD that represents the function table +// identifier and another DWORD (TableLength) that gives the number of +// functions in the table. +// +// This is followed by TableLength DWORDs which are function body indices. +// + + break; + } + case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: + { + break; + } + case OPCODE_HS_DECLS: + { + break; + } + case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: + { + psDecl->value.ui32MaxOutputVertexCount = DecodeOutputControlPointCount(*pui32Token); + break; + } + case OPCODE_HS_JOIN_PHASE: + case OPCODE_HS_FORK_PHASE: + case OPCODE_HS_CONTROL_POINT_PHASE: + { + break; + } + case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + { + psDecl->value.ui32HullPhaseInstanceCount = pui32Token[1]; + psPhase->ui32InstanceCount = psDecl->value.ui32HullPhaseInstanceCount; + break; + } + case OPCODE_CUSTOMDATA: + { + ui32TokenLength = pui32Token[1]; + { +// int iTupleSrc = 0, iTupleDest = 0; + //const uint32_t ui32ConstCount = pui32Token[1] - 2; + //const uint32_t ui32TupleCount = (ui32ConstCount / 4); + /*CUSTOMDATA_CLASS eClass =*/ DecodeCustomDataClass(pui32Token[0]); + + const uint32_t ui32NumVec4 = (ui32TokenLength - 2) / 4; + + ICBVec4 const *pVec4Array = (ICBVec4 const *)(void*) (pui32Token + 2); + + /* must be a multiple of 4 */ + ASSERT(((ui32TokenLength - 2) % 4) == 0); + + psDecl->asImmediateConstBuffer.assign(pVec4Array, pVec4Array + ui32NumVec4); + + psDecl->ui32NumOperands = ui32NumVec4; + } + break; + } + case OPCODE_DCL_HS_MAX_TESSFACTOR: + { + psDecl->value.fMaxTessFactor = *((float*)&pui32Token[1]); + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: + { + psDecl->ui32NumOperands = 2; + psDecl->value.eResourceDimension = DecodeResourceDimension(*pui32Token); + psDecl->sUAV.ui32GloballyCoherentAccess = DecodeAccessCoherencyFlags(*pui32Token); + psDecl->sUAV.bCounter = 0; + psDecl->ui32BufferStride = 4; + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + psDecl->sUAV.Type = DecodeResourceReturnType(0, pui32Token[ui32OperandOffset]); + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: + { + psDecl->ui32NumOperands = 1; + psDecl->sUAV.ui32GloballyCoherentAccess = DecodeAccessCoherencyFlags(*pui32Token); + psDecl->sUAV.bCounter = 0; + psDecl->ui32BufferStride = 4; + DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + //This should be a RTYPE_UAV_RWBYTEADDRESS buffer. It is memory backed by + //a shader storage buffer whose is unknown at compile time. + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: + { + const ResourceBinding* psBinding = NULL; + const ConstantBuffer* psBuffer = NULL; + + psDecl->ui32NumOperands = 1; + psDecl->sUAV.ui32GloballyCoherentAccess = DecodeAccessCoherencyFlags(*pui32Token); + psDecl->sUAV.bCounter = 0; + DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + + psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); + psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_UAV, psBinding->ui32BindPoint, &psBuffer); + psDecl->ui32BufferStride = psBuffer->ui32TotalSizeInBytes; + + switch(psBinding->eType) + { + case RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER: + case RTYPE_UAV_APPEND_STRUCTURED: + case RTYPE_UAV_CONSUME_STRUCTURED: + psDecl->sUAV.bCounter = 1; + break; + default: + break; + } + break; + } + case OPCODE_DCL_RESOURCE_STRUCTURED: + { + const ResourceBinding* psBinding = NULL; + const ConstantBuffer* psBuffer = NULL; + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + + psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); + psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_TEXTURE, psBinding->ui32BindPoint, &psBuffer); + psDecl->ui32BufferStride = psBuffer->ui32TotalSizeInBytes; + break; + } + case OPCODE_DCL_RESOURCE_RAW: + { + psDecl->ui32NumOperands = 1; + psDecl->ui32BufferStride = 4; + DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: + { + psDecl->ui32NumOperands = 1; + psDecl->sUAV.ui32GloballyCoherentAccess = 0; + + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + + psDecl->sTGSM.ui32Stride = pui32Token[ui32OperandOffset++]; + psDecl->sTGSM.ui32Count = pui32Token[ui32OperandOffset++]; + break; + } + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: + { + psDecl->ui32NumOperands = 1; + psDecl->sUAV.ui32GloballyCoherentAccess = 0; + + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + + psDecl->sTGSM.ui32Stride = 4; + psDecl->sTGSM.ui32Count = pui32Token[ui32OperandOffset++]; + break; + } + case OPCODE_DCL_STREAM: + { + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_GS_INSTANCE_COUNT: + { + psDecl->ui32NumOperands = 0; + psDecl->value.ui32GSInstanceCount = pui32Token[1]; + break; + } + default: + { + //Reached end of declarations + return 0; + } + } + + return pui32Token + ui32TokenLength; +} + +const uint32_t* DecodeInstruction(const uint32_t* pui32Token, Instruction* psInst, Shader* psShader, ShaderPhase *psPhase) +{ + uint32_t ui32TokenLength = DecodeInstructionLength(*pui32Token); + const uint32_t bExtended = DecodeIsOpcodeExtended(*pui32Token); + const OPCODE_TYPE eOpcode = DecodeOpcodeType(*pui32Token); + uint32_t ui32OperandOffset = 1; + +#ifdef _DEBUG + psInst->id = instructionID++; +#endif + + psInst->eOpcode = eOpcode; + + psInst->bSaturate = DecodeInstructionSaturate(*pui32Token); + + psInst->bAddressOffset = 0; + + psInst->ui32FirstSrc = 1; + + psInst->iCausedSplit = 0; + + if(bExtended) + { + do { + const uint32_t ui32ExtOpcodeToken = pui32Token[ui32OperandOffset]; + const EXTENDED_OPCODE_TYPE eExtType = DecodeExtendedOpcodeType(ui32ExtOpcodeToken); + + if(eExtType == EXTENDED_OPCODE_SAMPLE_CONTROLS) + { + struct {int i4:4;} sU; + struct {int i4:4;} sV; + struct {int i4:4;} sW; + + psInst->bAddressOffset = 1; + + sU.i4 = DecodeImmediateAddressOffset( + IMMEDIATE_ADDRESS_OFFSET_U, ui32ExtOpcodeToken); + sV.i4 = DecodeImmediateAddressOffset( + IMMEDIATE_ADDRESS_OFFSET_V, ui32ExtOpcodeToken); + sW.i4 = DecodeImmediateAddressOffset( + IMMEDIATE_ADDRESS_OFFSET_W, ui32ExtOpcodeToken); + + psInst->iUAddrOffset = sU.i4; + psInst->iVAddrOffset = sV.i4; + psInst->iWAddrOffset = sW.i4; + } + else if(eExtType == EXTENDED_OPCODE_RESOURCE_RETURN_TYPE) + { + psInst->xType = DecodeExtendedResourceReturnType(0, ui32ExtOpcodeToken); + psInst->yType = DecodeExtendedResourceReturnType(1, ui32ExtOpcodeToken); + psInst->zType = DecodeExtendedResourceReturnType(2, ui32ExtOpcodeToken); + psInst->wType = DecodeExtendedResourceReturnType(3, ui32ExtOpcodeToken); + } + else if(eExtType == EXTENDED_OPCODE_RESOURCE_DIM) + { + psInst->eResDim = DecodeExtendedResourceDimension(ui32ExtOpcodeToken); + } + + ui32OperandOffset++; + } + while(DecodeIsOpcodeExtended(pui32Token[ui32OperandOffset-1])); + } + + if(eOpcode < NUM_OPCODES && eOpcode >= 0) + { + psShader->aiOpcodeUsed[eOpcode] = 1; + } + + switch (eOpcode) + { + //no operands + case OPCODE_CUT: + case OPCODE_EMIT: + case OPCODE_EMITTHENCUT: + case OPCODE_RET: + case OPCODE_LOOP: + case OPCODE_ENDLOOP: + case OPCODE_BREAK: + case OPCODE_ELSE: + case OPCODE_ENDIF: + case OPCODE_CONTINUE: + case OPCODE_DEFAULT: + case OPCODE_ENDSWITCH: + case OPCODE_NOP: + case OPCODE_HS_CONTROL_POINT_PHASE: + case OPCODE_HS_FORK_PHASE: + case OPCODE_HS_JOIN_PHASE: + { + psInst->ui32NumOperands = 0; + psInst->ui32FirstSrc = 0; + break; + } + case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + { + psInst->ui32NumOperands = 0; + psInst->ui32FirstSrc = 0; + break; + } + case OPCODE_SYNC: + { + psInst->ui32NumOperands = 0; + psInst->ui32FirstSrc = 0; + psInst->ui32SyncFlags = DecodeSyncFlags(*pui32Token); + break; + } + + //1 operand + case OPCODE_EMIT_STREAM: + case OPCODE_CUT_STREAM: + case OPCODE_EMITTHENCUT_STREAM: + case OPCODE_CASE: + case OPCODE_SWITCH: + case OPCODE_LABEL: + { + psInst->ui32NumOperands = 1; + psInst->ui32FirstSrc = 0; + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); + break; + } + + case OPCODE_INTERFACE_CALL: + { + psInst->ui32NumOperands = 1; + psInst->ui32FirstSrc = 0; + psInst->ui32FuncIndexWithinInterface = pui32Token[ui32OperandOffset]; + ui32OperandOffset++; + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); + + break; + } + + /* Floating point instruction decodes */ + + //Instructions with two operands go here + case OPCODE_MOV: + { + psInst->ui32NumOperands = 2; + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); + break; + } + case OPCODE_LOG: + case OPCODE_RSQ: + case OPCODE_EXP: + case OPCODE_SQRT: + case OPCODE_ROUND_PI: + case OPCODE_ROUND_NI: + case OPCODE_ROUND_Z: + case OPCODE_ROUND_NE: + case OPCODE_FRC: + case OPCODE_FTOU: + case OPCODE_FTOI: + case OPCODE_UTOF: + case OPCODE_ITOF: + case OPCODE_INEG: + case OPCODE_IMM_ATOMIC_ALLOC: + case OPCODE_IMM_ATOMIC_CONSUME: + case OPCODE_DMOV: + case OPCODE_DTOF: + case OPCODE_FTOD: + case OPCODE_DRCP: + case OPCODE_COUNTBITS: + case OPCODE_FIRSTBIT_HI: + case OPCODE_FIRSTBIT_LO: + case OPCODE_FIRSTBIT_SHI: + case OPCODE_BFREV: + case OPCODE_F32TOF16: + case OPCODE_F16TOF32: + case OPCODE_RCP: + case OPCODE_DERIV_RTX: + case OPCODE_DERIV_RTY: + case OPCODE_DERIV_RTX_COARSE: + case OPCODE_DERIV_RTX_FINE: + case OPCODE_DERIV_RTY_COARSE: + case OPCODE_DERIV_RTY_FINE: + case OPCODE_NOT: + case OPCODE_BUFINFO: + { + psInst->ui32NumOperands = 2; + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); + break; + } + + //Instructions with three operands go here + case OPCODE_SINCOS: + { + psInst->ui32FirstSrc = 2; + //Intentional fall-through + } + case OPCODE_IMIN: + case OPCODE_UMIN: + case OPCODE_UMAX: + case OPCODE_MIN: + case OPCODE_IMAX: + case OPCODE_MAX: + case OPCODE_MUL: + case OPCODE_DIV: + case OPCODE_ADD: + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_NE: + case OPCODE_OR: + case OPCODE_XOR: + case OPCODE_LT: + case OPCODE_IEQ: + case OPCODE_IADD: + case OPCODE_AND: + case OPCODE_GE: + case OPCODE_IGE: + case OPCODE_EQ: + case OPCODE_USHR: + case OPCODE_ISHL: + case OPCODE_ISHR: + case OPCODE_LD: + case OPCODE_ILT: + case OPCODE_INE: + case OPCODE_UGE: + case OPCODE_ULT: + case OPCODE_ATOMIC_AND: + case OPCODE_ATOMIC_IADD: + case OPCODE_ATOMIC_OR: + case OPCODE_ATOMIC_XOR: + case OPCODE_ATOMIC_IMAX: + case OPCODE_ATOMIC_IMIN: + case OPCODE_ATOMIC_UMAX: + case OPCODE_ATOMIC_UMIN: + case OPCODE_DADD: + case OPCODE_DMAX: + case OPCODE_DMIN: + case OPCODE_DMUL: + case OPCODE_DEQ: + case OPCODE_DGE: + case OPCODE_DLT: + case OPCODE_DNE: + case OPCODE_DDIV: + { + psInst->ui32NumOperands = 3; + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]); + break; + } + //Instructions with four operands go here + case OPCODE_MAD: + case OPCODE_MOVC: + case OPCODE_IMAD: + case OPCODE_UDIV: + case OPCODE_LOD: + case OPCODE_SAMPLE: + case OPCODE_GATHER4: + case OPCODE_LD_MS: + case OPCODE_UBFE: + case OPCODE_IBFE: + case OPCODE_ATOMIC_CMP_STORE: + case OPCODE_IMM_ATOMIC_IADD: + case OPCODE_IMM_ATOMIC_AND: + case OPCODE_IMM_ATOMIC_OR: + case OPCODE_IMM_ATOMIC_XOR: + case OPCODE_IMM_ATOMIC_EXCH: + case OPCODE_IMM_ATOMIC_IMAX: + case OPCODE_IMM_ATOMIC_IMIN: + case OPCODE_IMM_ATOMIC_UMAX: + case OPCODE_IMM_ATOMIC_UMIN: + case OPCODE_DMOVC: + case OPCODE_DFMA: + case OPCODE_IMUL: + { + psInst->ui32NumOperands = 4; + + if(eOpcode == OPCODE_IMUL) + { + psInst->ui32FirstSrc = 2; + } + + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[3]); + break; + } + case OPCODE_GATHER4_PO: + case OPCODE_SAMPLE_L: + case OPCODE_BFI: + case OPCODE_SWAPC: + case OPCODE_IMM_ATOMIC_CMP_EXCH: + { + psInst->ui32NumOperands = 5; + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[3]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[4]); + break; + } + case OPCODE_GATHER4_C: + case OPCODE_SAMPLE_C: + case OPCODE_SAMPLE_C_LZ: + case OPCODE_SAMPLE_B: + { + psInst->ui32NumOperands = 5; + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[3]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[4]); + + /* sample_b is not a shadow sampler, others need flagging */ + if (eOpcode != OPCODE_SAMPLE_B) + { + MarkTextureAsShadow(&psShader->sInfo, psPhase->psDecl, &psInst->asOperands[2]); + } + + break; + } + case OPCODE_GATHER4_PO_C: + case OPCODE_SAMPLE_D: + { + psInst->ui32NumOperands = 6; + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[3]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[4]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[5]); + + /* sample_d is not a shadow sampler, others need flagging */ + if (eOpcode != OPCODE_SAMPLE_D) + { + MarkTextureAsShadow(&psShader->sInfo, + psPhase->psDecl, + &psInst->asOperands[2]); + } + break; + } + case OPCODE_IF: + case OPCODE_BREAKC: + case OPCODE_CONTINUEC: + case OPCODE_RETC: + case OPCODE_DISCARD: + { + psInst->eBooleanTestType = DecodeInstrTestBool(*pui32Token); + psInst->ui32NumOperands = 1; + psInst->ui32FirstSrc = 0; // no destination registers + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); + break; + } + case OPCODE_CALLC: + { + psInst->eBooleanTestType = DecodeInstrTestBool(*pui32Token); + psInst->ui32NumOperands = 2; + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); + break; + } + case OPCODE_CUSTOMDATA: + { + psInst->ui32NumOperands = 0; + ui32TokenLength = pui32Token[1]; + break; + } + case OPCODE_EVAL_CENTROID: + { + psInst->ui32NumOperands = 2; + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); + break; + } + case OPCODE_EVAL_SAMPLE_INDEX: + case OPCODE_EVAL_SNAPPED: + case OPCODE_STORE_UAV_TYPED: + case OPCODE_LD_UAV_TYPED: + case OPCODE_LD_RAW: + case OPCODE_STORE_RAW: + { + psInst->ui32NumOperands = 3; + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]); + break; + } + case OPCODE_STORE_STRUCTURED: + case OPCODE_LD_STRUCTURED: + { + psInst->ui32NumOperands = 4; + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[3]); + break; + } + case OPCODE_RESINFO: + { + psInst->ui32NumOperands = 3; + + psInst->eResInfoReturnType = DecodeResInfoReturnType(pui32Token[0]); + + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]); + break; + } + case OPCODE_MSAD: + default: + { + ASSERT(0); + break; + } + } + + // For opcodes that sample textures, mark which samplers are used by each texture + { + uint32_t ui32TextureRegisterNumber; + uint32_t ui32SamplerRegisterNumber; + uint32_t bTextureSampleInstruction = 0; + switch (eOpcode) + { + case OPCODE_GATHER4: + // dest, coords, tex, sampler + ui32TextureRegisterNumber = 2; + ui32SamplerRegisterNumber = 3; + bTextureSampleInstruction = 1; + break; + case OPCODE_GATHER4_PO: + //dest, coords, offset, tex, sampler + ui32TextureRegisterNumber = 3; + ui32SamplerRegisterNumber = 4; + bTextureSampleInstruction = 1; + break; + case OPCODE_GATHER4_C: + //dest, coords, tex, sampler srcReferenceValue + ui32TextureRegisterNumber = 2; + ui32SamplerRegisterNumber = 3; + bTextureSampleInstruction = 1; + break; + case OPCODE_GATHER4_PO_C: + //dest, coords, offset, tex, sampler, srcReferenceValue + ui32TextureRegisterNumber = 3; + ui32SamplerRegisterNumber = 4; + bTextureSampleInstruction = 1; + break; + case OPCODE_SAMPLE: + case OPCODE_SAMPLE_L: + case OPCODE_SAMPLE_C: + case OPCODE_SAMPLE_C_LZ: + case OPCODE_SAMPLE_B: + case OPCODE_SAMPLE_D: + // dest, coords, tex, sampler [, reference] + ui32TextureRegisterNumber = 2; + ui32SamplerRegisterNumber = 3; + bTextureSampleInstruction = 1; + break; + default: + break; + } + + if (bTextureSampleInstruction) + { + MarkTextureSamplerPair(&psShader->sInfo, + psPhase->psDecl, + &psInst->asOperands[ui32TextureRegisterNumber], + &psInst->asOperands[ui32SamplerRegisterNumber], + psShader->textureSamplers); + } + } + + return pui32Token + ui32TokenLength; +} + +const uint32_t* DecodeShaderPhase(const uint32_t* pui32Tokens, + Shader* psShader, + const SHADER_PHASE_TYPE ePhaseType, + ShaderPhase *psPhase) +{ + const uint32_t* pui32CurrentToken = pui32Tokens; + const uint32_t ui32ShaderLength = psShader->ui32ShaderLength; + + psPhase->ePhase = ePhaseType; + //Using ui32ShaderLength as the declaration and instruction count + //will allocate more than enough memory. Avoids having to + //traverse the entire shader just to get the real counts. + + psPhase->psDecl.clear(); + psPhase->psDecl.reserve(ui32ShaderLength); + + while(1) //Keep going until we reach the first non-declaration token, or the end of the shader. + { + psPhase->psDecl.push_back(Declaration()); + const uint32_t* pui32Result = DecodeDeclaration(psShader, pui32CurrentToken, &psPhase->psDecl[psPhase->psDecl.size()-1], psPhase); + + if(pui32Result) + { + pui32CurrentToken = pui32Result; + + if(pui32CurrentToken >= (psShader->pui32FirstToken + ui32ShaderLength)) + { + break; + } + } + else + { + psPhase->psDecl.pop_back(); // Remove the last one, it wasn't needed after all + break; + } + } + + +//Instructions + psPhase->psInst.clear(); + psPhase->psInst.reserve(ui32ShaderLength); + + while (pui32CurrentToken < (psShader->pui32FirstToken + ui32ShaderLength)) + { + psPhase->psInst.push_back(Instruction()); + const uint32_t* nextInstr = DecodeInstruction(pui32CurrentToken, &psPhase->psInst[psPhase->psInst.size()-1], psShader, psPhase); + +#ifdef _DEBUG + if(nextInstr == pui32CurrentToken) + { + ASSERT(0); + break; + } +#endif + + if (psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_HS_FORK_PHASE || psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_HS_JOIN_PHASE) + { + psPhase->psInst.pop_back(); + return pui32CurrentToken; + } + pui32CurrentToken = nextInstr; + } + + return pui32CurrentToken; +} + +const void AllocateHullPhaseArrays(const uint32_t* pui32Tokens, + Shader* psShader) +{ + const uint32_t* pui32CurrentToken = pui32Tokens; + const uint32_t ui32ShaderLength = psShader->ui32ShaderLength; + uint32_t ui32PhaseCount = 2; // Always the main phase and the HS global declarations + uint32_t i; + + while(1) //Keep going until we reach the first non-declaration token, or the end of the shader. + { + uint32_t ui32TokenLength = DecodeInstructionLength(*pui32CurrentToken); + /*const uint32_t bExtended =*/ DecodeIsOpcodeExtended(*pui32CurrentToken); + const OPCODE_TYPE eOpcode = DecodeOpcodeType(*pui32CurrentToken); + + if(eOpcode == OPCODE_CUSTOMDATA) + { + ui32TokenLength = pui32CurrentToken[1]; + } + + pui32CurrentToken = pui32CurrentToken + ui32TokenLength; + + switch (eOpcode) + { + case OPCODE_HS_CONTROL_POINT_PHASE: + case OPCODE_HS_JOIN_PHASE: + case OPCODE_HS_FORK_PHASE: + ui32PhaseCount++; + break; + default: + break; + } + + if(pui32CurrentToken >= (psShader->pui32FirstToken + ui32ShaderLength)) + { + break; + } + } + + psShader->asPhases.clear(); + psShader->asPhases.resize(ui32PhaseCount); + for (i = 0; i < ui32PhaseCount; i++) + psShader->asPhases[i].ui32InstanceCount = 1; +} + +const uint32_t* DecodeHullShader(const uint32_t* pui32Tokens, Shader* psShader) +{ + const uint32_t* pui32CurrentToken = pui32Tokens; + const uint32_t ui32ShaderLength = psShader->ui32ShaderLength; + ShaderPhase *psPhase; + + AllocateHullPhaseArrays(pui32Tokens, psShader); + + // Index 1 is HS_GLOBAL_DECL + psShader->asPhases[1].psInst.clear(); + psShader->asPhases[1].psDecl.clear(); + psShader->asPhases[1].ePhase = HS_GLOBAL_DECL_PHASE; + psShader->asPhases[1].ui32InstanceCount = 1; + + // The next phase to parse in. + psPhase = &psShader->asPhases[2]; + + //Keep going until we have done all phases or the end of the shader. + while(1) + { + Declaration newDecl; + const uint32_t* pui32Result = DecodeDeclaration(psShader, pui32CurrentToken, &newDecl, psPhase); + + if(pui32Result) + { + pui32CurrentToken = pui32Result; + + if(newDecl.eOpcode == OPCODE_HS_CONTROL_POINT_PHASE) + { + pui32CurrentToken = DecodeShaderPhase(pui32CurrentToken, psShader, HS_CTRL_POINT_PHASE, psPhase); + psPhase++; + } + else if(newDecl.eOpcode == OPCODE_HS_FORK_PHASE) + { + pui32CurrentToken = DecodeShaderPhase(pui32CurrentToken, psShader, HS_FORK_PHASE, psPhase++); + } + else if(newDecl.eOpcode == OPCODE_HS_JOIN_PHASE) + { + pui32CurrentToken = DecodeShaderPhase(pui32CurrentToken, psShader, HS_JOIN_PHASE, psPhase++); + } + else + { + psShader->asPhases[1].psDecl.push_back(newDecl); + } + + if(pui32CurrentToken >= (psShader->pui32FirstToken + ui32ShaderLength)) + { + break; + } + } + else + { + break; + } + } + + return pui32CurrentToken; +} + +void Decode(const uint32_t* pui32Tokens, Shader* psShader) +{ + const uint32_t* pui32CurrentToken = pui32Tokens; + const uint32_t ui32ShaderLength = pui32Tokens[1]; + + psShader->ui32MajorVersion = DecodeProgramMajorVersion(*pui32CurrentToken); + psShader->ui32MinorVersion = DecodeProgramMinorVersion(*pui32CurrentToken); + psShader->eShaderType = DecodeShaderType(*pui32CurrentToken); + + pui32CurrentToken++;//Move to shader length + psShader->ui32ShaderLength = ui32ShaderLength; + pui32CurrentToken++;//Move to after shader length (usually a declaration) + + psShader->pui32FirstToken = pui32Tokens; + + if(psShader->eShaderType == HULL_SHADER) + { + // DecodeHullShader will allocate psShader->asPhases array. + pui32CurrentToken = DecodeHullShader(pui32CurrentToken, psShader); + return; + } + else + { + psShader->asPhases.clear(); + psShader->asPhases.resize(1); + } + + // Phase 0 is always the main phase + psShader->asPhases[0].ui32InstanceCount = 1; + + DecodeShaderPhase(pui32CurrentToken, psShader, MAIN_PHASE, &psShader->asPhases[0]); +} + +Shader* DecodeDXBC(uint32_t* data, uint32_t decodeFlags) +{ + Shader* psShader; + DXBCContainerHeader* header = (DXBCContainerHeader*)data; + uint32_t i; + uint32_t chunkCount; + uint32_t* chunkOffsets; + ReflectionChunks refChunks; + uint32_t* shaderChunk = 0; + + if(header->fourcc != FOURCC_DXBC) + { + ASSERT(0 && "Invalid shader type (DX9 shaders no longer supported)!"); + } + + refChunks.pui32Inputs = NULL; + refChunks.pui32Interfaces = NULL; + refChunks.pui32Outputs = NULL; + refChunks.pui32Resources = NULL; + refChunks.pui32Inputs11 = NULL; + refChunks.pui32Outputs11 = NULL; + refChunks.pui32OutputsWithStreams = NULL; + refChunks.pui32PatchConstants = NULL; + refChunks.pui32PatchConstants11 = NULL; + + chunkOffsets = (uint32_t*)(header + 1); + + chunkCount = header->chunkCount; + + for(i = 0; i < chunkCount; ++i) + { + uint32_t offset = chunkOffsets[i]; + + DXBCChunkHeader* chunk = (DXBCChunkHeader*)((char*)data + offset); + + switch(chunk->fourcc) + { + case FOURCC_ISGN: + { + refChunks.pui32Inputs = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_ISG1: + { + refChunks.pui32Inputs11 = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_RDEF: + { + refChunks.pui32Resources = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_IFCE: + { + refChunks.pui32Interfaces = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_OSGN: + { + refChunks.pui32Outputs = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_OSG1: + { + refChunks.pui32Outputs11 = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_OSG5: + { + refChunks.pui32OutputsWithStreams = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_SHDR: + case FOURCC_SHEX: + { + shaderChunk = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_PSGN: + { + refChunks.pui32PatchConstants = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_PSG1: + { + refChunks.pui32PatchConstants11 = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_STAT: + case FOURCC_SFI0: + { + break; // Ignored + } + default: + { +// ASSERT(0); // Uncomment this to hunt for unknown chunks later on. + break; + } + } + } + + if(shaderChunk) + { + uint32_t ui32MajorVersion; + uint32_t ui32MinorVersion; + + psShader = new Shader(); + + ui32MajorVersion = DecodeProgramMajorVersion(*shaderChunk); + ui32MinorVersion = DecodeProgramMinorVersion(*shaderChunk); + + LoadShaderInfo(ui32MajorVersion, + ui32MinorVersion, + &refChunks, + &psShader->sInfo, decodeFlags); + + Decode(shaderChunk, psShader); + + return psShader; + } + + return 0; +} + diff --git a/src/internal_includes/ControlFlowGraph.h b/src/internal_includes/ControlFlowGraph.h new file mode 100644 index 0000000..7c26255 --- /dev/null +++ b/src/internal_includes/ControlFlowGraph.h @@ -0,0 +1,163 @@ +#pragma once + +#include +#include +#include +#include +#include + +#ifdef __APPLE__ +#include +#endif + +#include + +struct Instruction; +class Operand; + +namespace HLSLcc +{ +#ifdef __APPLE__ + // Herp derp Apple is stuck in 2005 + using namespace std::tr1; +#else + using namespace std; +#endif + + namespace ControlFlow + { + class BasicBlock; + + class ControlFlowGraph + { + friend class BasicBlock; + public: + ControlFlowGraph() + : m_BlockMap() + , m_BlockStorage() + {} + + typedef std::vector > BasicBlockStorage; + + const BasicBlock &Build(const Instruction *firstInstruction); + + // Only works for instructions that start the basic block + const BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction) const; + + // non-const version for BasicBlock + BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction); + + const BasicBlockStorage &AllBlocks() const { return m_BlockStorage; } + private: + + // Map for storing the created basic blocks. Map key is the pointer to the first instruction in the block + typedef std::map BasicBlockMap; + + BasicBlockMap m_BlockMap; + + // auto_ptr -type storage for multiple BasicBlocks. BlockMap above only has pointers into these + BasicBlockStorage m_BlockStorage; + }; + + + class BasicBlock + { + friend class ControlFlowGraph; + public: + // A set of register indices, one per each vec4 component per register + typedef std::set RegisterSet; + // The connections (either incoming or outgoing) from this block. The instruction is the same one as the key in ControlFlowGraph to that basic block + typedef std::set ConnectionSet; + + struct Definition + { + Definition(const Instruction *i = NULL, const Operand *o = NULL) + : m_Instruction(i) + , m_Operand(o) + {} + + Definition(const Definition &a) + : m_Instruction(a.m_Instruction) + , m_Operand(a.m_Operand) + {} + + bool operator==(const Definition &a) const + { + if (a.m_Instruction != m_Instruction) + return false; + return a.m_Operand == m_Operand; + } + + bool operator!=(const Definition &a) const + { + if (a.m_Instruction == m_Instruction) + return false; + return a.m_Operand != m_Operand; + } + + bool operator<(const Definition &a) const + { + if (m_Instruction != a.m_Instruction) + return m_Instruction < a.m_Instruction; + return m_Operand < a.m_Operand; + } + + const Instruction *m_Instruction; + const Operand *m_Operand; + }; + + typedef std::set ReachableDefinitionsPerVariable; // A set of possibly visible definitions for one component of one vec4 variable + typedef std::map ReachableVariables; // A VisibleDefinitionSet for each variable*component. + + const Instruction *First() const { return m_First; } + const Instruction *Last() const { return m_Last; } + + const RegisterSet &UEVar() const { return m_UEVar; } + const RegisterSet &VarKill() const { return m_VarKill; } + + const ConnectionSet &Preceding() const { return m_Preceding; } + const ConnectionSet &Succeeding() const { return m_Succeeding; } + + const ReachableVariables &DEDef() const { return m_DEDef; } + const ReachableVariables &Reachable() const { return m_Reachable; } + + // Helper function: Do union of 2 ReachableVariables, store result in a. + static void RVarUnion(ReachableVariables &a, const ReachableVariables &b); + + private: + + // Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build() + BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead); + + // Walk through the instructions and build UEVar and VarKill sets, create succeeding nodes if they don't exist already. + void Build(); + + bool RebuildReachable(); // Rebuild m_Reachable from preceding blocks and this one. Returns true if current value changed. + + + BasicBlock * AddChildBasicBlock(const Instruction *psFirst); + + private: + ControlFlowGraph &m_Graph; // The graph object containing this block + + const Instruction *m_First; // The first instruction in the basic block + const Instruction *m_Last; // The last instruction in the basic block. Either OPCODE_RET or a branch/jump/loop instruction + + RegisterSet m_UEVar; // Upwards-exposed variables (temps that need definition from upstream and are used in this basic block) + RegisterSet m_VarKill; // Set of variables that are defined in this block. + + ConnectionSet m_Preceding; // Set of blocks that immediately precede this block in the CFG + ConnectionSet m_Succeeding; // Set of blocks that follow this block in the CFG + + ReachableVariables m_DEDef; // Downward-exposed definitions from this basic block. Always only one item per set. + + ReachableVariables m_Reachable; // The set of variable definitions that are visible at the end of this block. + + }; + + + + }; +}; + + diff --git a/src/internal_includes/ControlFlowGraphUtils.h b/src/internal_includes/ControlFlowGraphUtils.h new file mode 100644 index 0000000..0a799b2 --- /dev/null +++ b/src/internal_includes/ControlFlowGraphUtils.h @@ -0,0 +1,31 @@ +#pragma once + +struct Instruction; + +namespace HLSLcc +{ + namespace ControlFlow + { + class Utils + { + public: + // For a given flow-control instruction, find the corresponding jump location: + // If the input is OPCODE_IF, then find the next same-level ELSE or ENDIF +1 + // For ELSE, find same level ENDIF + 1 + // For BREAK/BREAKC, find next ENDLOOP or ENDSWITCH + 1 + // For SWITCH, find next same-level CASE/DEFAULT (skip multiple consecutive case/default labels) or ENDSWITCH + 1 + // For ENDLOOP, find previous same-level LOOP + 1 + // For CASE/DEFAULT, find next same-level CASE/DEFAULT or ENDSWITCH + 1, skip multiple consecutive case/default labels + // For CONTINUE/C the previous LOOP + 1 + // Note that LOOP/ENDSWITCH itself is nothing but a label but it still starts a new basic block. + // Note that CASE labels fall through. + // Always returns the beginning of the next block, so skip multiple CASE/DEFAULT labels etc. + // If sawEndSwitch != null, will bet set to true if the label skipping saw past ENDSWITCH + // If needConnectToParent != null, will be set to true if sawEndSwitch == true and there are one or more case labels directly before it. + static const Instruction * GetJumpPoint(const Instruction *psStart, bool *sawEndSwitch = 0, bool *needConnectToParent = 0); + + static const Instruction *GetNextNonLabelInstruction(const Instruction *psStart, bool *sawEndSwitch = 0); + + }; + } +} \ No newline at end of file diff --git a/src/internal_includes/DataTypeAnalysis.h b/src/internal_includes/DataTypeAnalysis.h new file mode 100644 index 0000000..8c0207b --- /dev/null +++ b/src/internal_includes/DataTypeAnalysis.h @@ -0,0 +1,15 @@ +#pragma once + +#include "include/ShaderInfo.h" +#include + +class HLSLCrossCompilerContext; +struct Instruction; + +namespace HLSLcc +{ + namespace DataTypeAnalysis + { + void SetDataTypes(HLSLCrossCompilerContext* psContext, std::vector &instructions, uint32_t ui32TempCount, std::vector &results); + }; +}; diff --git a/src/internal_includes/Declaration.h b/src/internal_includes/Declaration.h new file mode 100644 index 0000000..94f80b2 --- /dev/null +++ b/src/internal_includes/Declaration.h @@ -0,0 +1,101 @@ + +#pragma once + +#include +#include +#include "internal_includes/tokens.h" +#include "internal_includes/Operand.h" + +typedef struct ICBVec4_TAG { + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; +} ICBVec4; + +#define ACCESS_FLAG_READ 0x1 +#define ACCESS_FLAG_WRITE 0x2 + +struct Declaration +{ + Declaration() + : + eOpcode(OPCODE_INVALID), + ui32NumOperands(0), + ui32BufferStride(0) + {} + + OPCODE_TYPE eOpcode; + + uint32_t ui32NumOperands; + + Operand asOperands[2]; + + std::vector asImmediateConstBuffer; + //The declaration can set one of these + //values depending on the opcode. + union { + uint32_t ui32GlobalFlags; + uint32_t ui32NumTemps; + RESOURCE_DIMENSION eResourceDimension; + INTERPOLATION_MODE eInterpolation; + PRIMITIVE_TOPOLOGY eOutputPrimitiveTopology; + PRIMITIVE eInputPrimitive; + uint32_t ui32MaxOutputVertexCount; + TESSELLATOR_DOMAIN eTessDomain; + TESSELLATOR_PARTITIONING eTessPartitioning; + TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim; + uint32_t aui32WorkGroupSize[3]; + uint32_t ui32HullPhaseInstanceCount; + float fMaxTessFactor; + uint32_t ui32IndexRange; + uint32_t ui32GSInstanceCount; + + struct Interface_TAG + { + uint32_t ui32InterfaceID; + uint32_t ui32NumFuncTables; + uint32_t ui32ArraySize; + } interface; + } value; + + uint32_t ui32BufferStride; + + struct UAV_TAG + { + UAV_TAG() : + ui32GloballyCoherentAccess(0), + bCounter(0), + Type(RETURN_TYPE_UNORM), + ui32NumComponents(0), + ui32AccessFlags(0) + { + } + uint32_t ui32GloballyCoherentAccess; + uint8_t bCounter; + RESOURCE_RETURN_TYPE Type; + uint32_t ui32NumComponents; + uint32_t ui32AccessFlags; + } sUAV; + + struct TGSM_TAG + { + uint32_t ui32Stride; + uint32_t ui32Count; + } sTGSM; + + struct IndexableTemp_TAG + { + uint32_t ui32RegIndex; + uint32_t ui32RegCount; + uint32_t ui32RegComponentSize; + } sIdxTemp; + + uint32_t ui32TableLength; + + uint32_t ui32IsShadowTex; + + // Set indexed by sampler register number. + std::set samplersUsed; +}; + diff --git a/src/internal_includes/HLSLCrossCompilerContext.h b/src/internal_includes/HLSLCrossCompilerContext.h new file mode 100644 index 0000000..2245bfa --- /dev/null +++ b/src/internal_includes/HLSLCrossCompilerContext.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +#include "bstrlib.h" + +class Shader; +class GLSLCrossDependencyData; +class ShaderPhase; +class Translator; +class Operand; +class HLSLccReflection; + +class HLSLCrossCompilerContext +{ +public: + HLSLCrossCompilerContext(HLSLccReflection &refl) : m_Reflection(refl) {} + + bstring glsl; + bstring extensions; + + bstring* currentGLSLString;//either glsl or earlyMain of current phase + + uint32_t currentPhase; + + int indent; + unsigned int flags; + Shader* psShader; + GLSLCrossDependencyData* psDependencies; + const char *inputPrefix; // Prefix for shader inputs + const char *outputPrefix; // Prefix for shader outputs + + void DoDataTypeAnalysis(ShaderPhase *psPhase); + + void ClearDependencyData(); + + void AddIndentation(); + + // Currently active translator + Translator *psTranslator; + + HLSLccReflection &m_Reflection; // Callbacks for bindings and diagnostic info + + // Retrieve the name for which the input or output is declared as. Takes into account possible redirections. + std::string GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const; + std::string GetDeclaredOutputName(const Operand* psOperand, int* stream, uint32_t *puiIgnoreSwizzle, int *piRebase, int iIgnoreRedirect) const; + + bool OutputNeedsDeclaring(const Operand* psOperand, const int count); + +}; diff --git a/src/internal_includes/HLSLccToolkit.h b/src/internal_includes/HLSLccToolkit.h new file mode 100644 index 0000000..95f52c6 --- /dev/null +++ b/src/internal_includes/HLSLccToolkit.h @@ -0,0 +1,127 @@ +#pragma once +#include "hlslcc.h" +#include "bstrlib.h" +#include +#include + +#include "internal_includes/Instruction.h" +#include "internal_includes/Operand.h" + +class HLSLCrossCompilerContext; + +namespace HLSLcc +{ + uint32_t GetNumberBitsSet(uint32_t a); + + uint32_t SVTTypeToFlag(const SHADER_VARIABLE_TYPE eType); + + SHADER_VARIABLE_TYPE TypeFlagsToSVTType(const uint32_t typeflags); + + const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision = true); + + const char * GetConstructorForTypeGLSL(const SHADER_VARIABLE_TYPE eType, + const int components, bool useGLSLPrecision); + + const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType, + const int components); + + std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows); + + void AddSwizzleUsingElementCount(bstring dest, uint32_t count); + + int WriteMaskToComponentCount(uint32_t writeMask); + + uint32_t BuildComponentMaskFromElementCount(int count); + + // Returns true if we can do direct assignment between types (mostly for mediump<->highp floats etc) + bool DoAssignmentDataTypesMatch(SHADER_VARIABLE_TYPE dest, SHADER_VARIABLE_TYPE src); + + // Convert resource return type to SVT_ flags + uint32_t ResourceReturnTypeToFlag(const RESOURCE_RETURN_TYPE eType); + + SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec); + + uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount); + + bool IsOperationCommutative(int /* OPCODE_TYPE */ eOpCode); + + bool AreTempOperandsIdentical(const Operand * psA, const Operand * psB); + + int GetNumTextureDimensions(int /* RESOURCE_DIMENSION */ eResDim); + + SHADER_VARIABLE_TYPE SelectHigherType(SHADER_VARIABLE_TYPE a, SHADER_VARIABLE_TYPE b); + + // Returns true if the instruction adds 1 to the destination temp register + bool IsAddOneInstruction(const Instruction *psInst); + + bool CanDoDirectCast(SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest); + + // Helper function to print floats with full precision + void PrintFloat(bstring b, float f); + + // Flags for ForeachOperand + // Process suboperands +#define FEO_FLAG_SUBOPERAND 1 + // Process src operands +#define FEO_FLAG_SRC_OPERAND 2 + // Process destination operands +#define FEO_FLAG_DEST_OPERAND 4 + // Convenience: Process all operands, both src and dest, and all suboperands +#define FEO_FLAG_ALL (FEO_FLAG_SUBOPERAND | FEO_FLAG_SRC_OPERAND | FEO_FLAG_DEST_OPERAND) + + // For_each for all operands within a range of instructions. Flags above. + template void ForEachOperand(ItrType _begin, ItrType _end, int flags, F callback) + { + ItrType inst = _begin; + while (inst != _end) + { + uint32_t i, k; + + if ((flags & FEO_FLAG_DEST_OPERAND) || (flags & FEO_FLAG_SUBOPERAND)) + { + for (i = 0; i < inst->ui32FirstSrc; i++) + { + if (flags & FEO_FLAG_SUBOPERAND) + { + for (k = 0; k < MAX_SUB_OPERANDS; k++) + { + if (inst->asOperands[i].m_SubOperands[k].get()) + { + callback(inst, inst->asOperands[i].m_SubOperands[k].get(), FEO_FLAG_SUBOPERAND); + } + } + } + if (flags & FEO_FLAG_DEST_OPERAND) + { + callback(inst, &inst->asOperands[i], FEO_FLAG_DEST_OPERAND); + } + } + } + + if ((flags & FEO_FLAG_SRC_OPERAND) || (flags & FEO_FLAG_SUBOPERAND)) + { + for (i = inst->ui32FirstSrc; i < inst->ui32NumOperands; i++) + { + if (flags & FEO_FLAG_SUBOPERAND) + { + for (k = 0; k < MAX_SUB_OPERANDS; k++) + { + if (inst->asOperands[i].m_SubOperands[k].get()) + { + callback(inst, inst->asOperands[i].m_SubOperands[k].get(), FEO_FLAG_SUBOPERAND); + } + } + } + if (flags & FEO_FLAG_SRC_OPERAND) + { + callback(inst, &inst->asOperands[i], FEO_FLAG_SRC_OPERAND); + } + } + } + + inst++; + } + } + + +}; diff --git a/src/internal_includes/Instruction.h b/src/internal_includes/Instruction.h new file mode 100644 index 0000000..35442b2 --- /dev/null +++ b/src/internal_includes/Instruction.h @@ -0,0 +1,134 @@ +#pragma once + +#include "internal_includes/Operand.h" +#include "internal_includes/tokens.h" +#include "include/ShaderInfo.h" +#include + +#define ATOMIC_ADDRESS_BASIC 0 +#define ATOMIC_ADDRESS_ARRAY_DYNAMIC 1 +#define ATOMIC_ADDRESS_STRUCT_DYNAMIC 2 + +#define TEXSMP_FLAG_NONE 0x0 +#define TEXSMP_FLAG_LOD 0x1 //LOD comes from operand +#define TEXSMP_FLAG_DEPTHCOMPARE 0x2 +#define TEXSMP_FLAG_FIRSTLOD 0x4 //LOD is 0 +#define TEXSMP_FLAG_BIAS 0x8 +#define TEXSMP_FLAG_GRAD 0x10 +//Gather specific flags +#define TEXSMP_FLAG_GATHER 0x20 +#define TEXSMP_FLAG_PARAMOFFSET 0x40 //Offset comes from operand + +struct Instruction +{ + Instruction() + : eOpcode(OPCODE_NOP) + , eBooleanTestType(INSTRUCTION_TEST_ZERO) + , ui32NumOperands(0) + , ui32FirstSrc(0) + , m_Uses() + , m_SkipTranslation(false) + , m_InductorRegister(0) + , bSaturate(0) + { + m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0; + } + + // For creating unit tests only. Create an instruction with temps (unless reg is 0xffffffff in which case use OPERAND_TYPE_INPUT/OUTPUT) + Instruction(uint64_t _id, OPCODE_TYPE opcode, uint32_t reg1 = 0, uint32_t reg1Mask = 0, uint32_t reg2 = 0, uint32_t reg2Mask = 0, uint32_t reg3 = 0, uint32_t reg3Mask = 0, uint32_t reg4 = 0, uint32_t reg4Mask = 0) + { + id = _id; + eOpcode = opcode; + eBooleanTestType = INSTRUCTION_TEST_ZERO; + ui32FirstSrc = 0; + ui32NumOperands = 0; + m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0; + m_SkipTranslation = false; + m_InductorRegister = 0; + + if (reg1Mask == 0) + return; + + ui32NumOperands++; + asOperands[0].eType = reg1 == 0xffffffff ? OPERAND_TYPE_OUTPUT : OPERAND_TYPE_TEMP; + asOperands[0].ui32RegisterNumber = reg1 == 0xffffffff ? 0 : reg1; + asOperands[0].ui32CompMask = reg1Mask; + asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + + if (reg2Mask == 0) + return; + + ui32FirstSrc = 1; + ui32NumOperands++; + + asOperands[1].eType = reg2 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP; + asOperands[1].ui32RegisterNumber = reg2 == 0xffffffff ? 0 : reg2; + asOperands[1].ui32CompMask = reg2Mask; + asOperands[1].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + + if (reg3Mask == 0) + return; + ui32NumOperands++; + + asOperands[2].eType = reg3 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP; + asOperands[2].ui32RegisterNumber = reg3 == 0xffffffff ? 0 : reg3; + asOperands[2].ui32CompMask = reg3Mask; + asOperands[2].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + + if (reg4Mask == 0) + return; + ui32NumOperands++; + + asOperands[3].eType = reg4 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP; + asOperands[3].ui32RegisterNumber = reg4 == 0xffffffff ? 0 : reg4; + asOperands[3].ui32CompMask = reg4Mask; + asOperands[3].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + } + + + bool IsPartialPrecisionSamplerInstruction(const ShaderInfo &info, OPERAND_MIN_PRECISION *pType) const; + + // Flags for ChangeOperandTempRegister +#define UD_CHANGE_SUBOPERANDS 1 +#define UD_CHANGE_MAIN_OPERAND 2 +#define UD_CHANGE_ALL 3 + + void ChangeOperandTempRegister(Operand *psOperand, uint32_t oldReg, uint32_t newReg, uint32_t compMask, uint32_t flags, uint32_t rebase); + + + OPCODE_TYPE eOpcode; + INSTRUCTION_TEST_BOOLEAN eBooleanTestType; + uint32_t ui32SyncFlags; + uint32_t ui32NumOperands; + uint32_t ui32FirstSrc; + Operand asOperands[6]; + uint32_t bSaturate; + uint32_t ui32FuncIndexWithinInterface; + RESINFO_RETURN_TYPE eResInfoReturnType; + + int bAddressOffset; + int8_t iUAddrOffset; + int8_t iVAddrOffset; + int8_t iWAddrOffset; + RESOURCE_RETURN_TYPE xType, yType, zType, wType; + RESOURCE_DIMENSION eResDim; + int8_t iCausedSplit; // Nonzero if has caused a temp split. Later used by sampler datatype tweaking + + struct Use + { + Use() : m_Inst(0), m_Op(0) {} + Use(const Use &a) : m_Inst(a.m_Inst), m_Op(a.m_Op) {} + Use(Instruction *inst, Operand *op) : m_Inst(inst), m_Op(op) {} + + Instruction *m_Inst; // The instruction that references the result of this instruction + Operand *m_Op; // The operand within the instruction above. Note: can also be suboperand. + }; + + std::vector m_Uses; // Array of use sites for the result(s) of this instruction, if any of the results is a temp reg. + + Instruction *m_LoopInductors[4]; // If OPCODE_LOOP and is suitable for transforming into for-loop, contains pointers to for initializer, end condition, breakc, and increment. + bool m_SkipTranslation; // If true, don't emit this instruction (currently used by the for loop translation) + uint32_t m_InductorRegister; // If non-zero, the inductor variable can be declared in the for statement, and this register number has been allocated for it + + uint64_t id; +}; diff --git a/src/internal_includes/LoopTransform.h b/src/internal_includes/LoopTransform.h new file mode 100644 index 0000000..63caaf8 --- /dev/null +++ b/src/internal_includes/LoopTransform.h @@ -0,0 +1,9 @@ + +#pragma once + +class ShaderPhase; + +namespace HLSLcc +{ + void DoLoopTransform(ShaderPhase &phase); +}; diff --git a/src/internal_includes/Operand.h b/src/internal_includes/Operand.h new file mode 100644 index 0000000..fc6466e --- /dev/null +++ b/src/internal_includes/Operand.h @@ -0,0 +1,152 @@ +#pragma once + +#include "internal_includes/tokens.h" +#include +#include + +#ifdef __APPLE__ +#include +#endif + +enum{ MAX_SUB_OPERANDS = 3 }; +class Operand; +class HLSLCrossCompilerContext; +struct Instruction; + +#if _MSC_VER +// We want to disable the "array will be default-initialized" warning, as that's exactly what we want +#pragma warning(disable: 4351) +#endif + +class Operand +{ +public: +#ifdef __APPLE__ + // Herp derp Apple is stuck in 2005 + typedef std::tr1::shared_ptr SubOperandPtr; +#else + typedef std::shared_ptr SubOperandPtr; +#endif + + + + Operand() + : + iExtended(), + eType(), + eModifier(), + eMinPrecision(), + iIndexDims(), + iWriteMask(), + iGSInput(), + iPSInOut(), + iWriteMaskEnabled(), + iArrayElements(), + iNumComponents(), + eSelMode(), + ui32CompMask(), + ui32Swizzle(), + aui32Swizzle(), + aui32ArraySizes(), + ui32RegisterNumber(), + afImmediates(), + adImmediates(), + eSpecialName(), + specialName(), + eIndexRep(), + m_SubOperands(), + aeDataType(), + m_Rebase(0), + m_Size(0), + m_Defines(), + m_ForLoopInductorName(0) +#ifdef _DEBUG + , id(0) +#endif + {} + + // Retrieve the mask of all the components this operand accesses (either reads from or writes to). + // Note that destination writemask does affect the effective access mask. + uint32_t GetAccessMask() const; + + // Returns the index of the highest accessed component, based on component mask + int GetMaxComponent() const; + + bool IsSwizzleReplicated() const; + + // Get the number of elements returned by operand, taking additional component mask into account + //e.g. + //.z = 1 + //.x = 1 + //.yw = 2 + uint32_t GetNumSwizzleElements(uint32_t ui32CompMask = OPERAND_4_COMPONENT_MASK_ALL) const; + + // When this operand is used as an input declaration, how many components does it have? + int GetNumInputElements(const HLSLCrossCompilerContext *psContext) const; + + // Retrieve the operand data type. + SHADER_VARIABLE_TYPE GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates = SVT_INT) const; + + // Returns 0 if the register used by the operand is per-vertex, or 1 if per-patch + int GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const; + // Same as above but with explicit shader type and phase + int GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const; + + // Maps REFLECT_RESOURCE_PRECISION into OPERAND_MIN_PRECISION as much as possible + static OPERAND_MIN_PRECISION ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec); + + int iExtended; + OPERAND_TYPE eType; + OPERAND_MODIFIER eModifier; + OPERAND_MIN_PRECISION eMinPrecision; + int iIndexDims; + int iWriteMask; + int iGSInput; + int iPSInOut; + int iWriteMaskEnabled; + int iArrayElements; + int iNumComponents; + + OPERAND_4_COMPONENT_SELECTION_MODE eSelMode; + uint32_t ui32CompMask; + uint32_t ui32Swizzle; + uint32_t aui32Swizzle[4]; + + uint32_t aui32ArraySizes[3]; + uint32_t ui32RegisterNumber; + //If eType is OPERAND_TYPE_IMMEDIATE32 + float afImmediates[4]; + //If eType is OPERAND_TYPE_IMMEDIATE64 + double adImmediates[4]; + + SPECIAL_NAME eSpecialName; + std::string specialName; + + OPERAND_INDEX_REPRESENTATION eIndexRep[3]; + + SubOperandPtr m_SubOperands[MAX_SUB_OPERANDS]; + + //One type for each component. + SHADER_VARIABLE_TYPE aeDataType[4]; + + uint32_t m_Rebase; // Rebase value, for constant array accesses. + uint32_t m_Size; // Component count, only for constant array access. + + struct Define + { + Define() : m_Inst(0), m_Op(0) {} + Define(const Define &a) : m_Inst(a.m_Inst), m_Op(a.m_Op) {} + Define(Instruction *inst, Operand *op) : m_Inst(inst), m_Op(op) {} + + Instruction *m_Inst; // Instruction that writes to the temp + Operand *m_Op; // The (destination) operand within that instruction. + }; + + std::vector m_Defines; // Array of instructions whose results this operand can use. (only if eType == OPERAND_TYPE_TEMP) + uint32_t m_ForLoopInductorName; // If non-zero, this (eType==OPERAND_TYPE_TEMP) is an inductor variable used in for loop, and it has a special number as given here (overrides ui32RegisterNumber) + +#ifdef _DEBUG + uint64_t id; +#endif +}; + diff --git a/src/internal_includes/Shader.h b/src/internal_includes/Shader.h new file mode 100644 index 0000000..7a8a330 --- /dev/null +++ b/src/internal_includes/Shader.h @@ -0,0 +1,267 @@ + +#pragma once + +#include +#include +#include + +#include "growing_array.h" +#include "internal_includes/tokens.h" +#include "internal_includes/reflect.h" +#include "include/ShaderInfo.h" +#include "internal_includes/Instruction.h" +#include "internal_includes/Declaration.h" +#include "internal_includes/ControlFlowGraph.h" +#include "bstrlib.h" + +struct ConstantArrayChunk +{ + ConstantArrayChunk() : m_Size(0), m_AccessMask(0) {} + ConstantArrayChunk(uint32_t sz, uint32_t mask, Operand *firstUse) + : m_Size(sz), m_AccessMask(mask) + { + m_UseSites.push_back(firstUse); + } + + uint32_t m_Size; + uint32_t m_AccessMask; + uint32_t m_Rebase; + uint32_t m_ComponentCount; + + std::vector m_UseSites; +}; +typedef std::multimap ChunkMap; + +struct ConstantArrayInfo +{ + ConstantArrayInfo() : m_OrigDeclaration(0), m_Chunks() {} + + Declaration *m_OrigDeclaration; // Pointer to the original declaration of the const array + ChunkMap m_Chunks; // map of , same start offset might have multiple entries for different access masks +}; + +class ShaderPhase +{ +public: + ShaderPhase() + : + ePhase(MAIN_PHASE), + ui32InstanceCount(0), + postShaderCode(), + hasPostShaderCode(0), + earlyMain(), + ui32OrigTemps(0), + ui32TotalTemps(0), + psTempDeclaration(NULL), + pui32SplitInfo(), + peTempTypes(), + acInputNeedsRedirect(), + acOutputNeedsRedirect(), + acPatchConstantsNeedsRedirect(), + m_CFG(), + m_CFGInitialized(false), + m_NextFreeTempRegister(1), + m_NextTexCoordTemp(0) + {} + + void ResolveUAVProperties(); + + void UnvectorizeImmMoves(); // Transform MOV tX.xyz, (0, 1, 2) into MOV tX.x, 0; MOV tX.y, 1; MOV tX.z, 2 to make datatype analysis easier + + void PruneConstArrays(); // Walk through everything that accesses a const array to see if we could make it smaller + + void ExpandSWAPCs(); // Expand all SWAPC opcodes into a bunch of MOVCs. Must be done first! + + ConstantArrayInfo m_ConstantArrayInfo; + + std::vector psDecl; + std::vector psInst; + + SHADER_PHASE_TYPE ePhase; + uint32_t ui32InstanceCount; // In case of hull shaders, how many instances this phase needs to have. Defaults to 1. + bstring postShaderCode;//End of main or before emit() + int hasPostShaderCode; + + bstring earlyMain;//Code to be inserted at the start of phase + + uint32_t ui32OrigTemps; // The number of temporaries this phase originally declared + uint32_t ui32TotalTemps; // The number of temporaries this phase has now + Declaration *psTempDeclaration; // Shortcut to the OPCODE_DCL_TEMPS opcode + + // The split table is a table containing the index of the original register this register was split out from, or 0xffffffff + // Format: lowest 16 bits: original register. bits 16-23: rebase (eg value of 1 means .yzw was changed to .xyz): bits 24-31: component count + std::vector pui32SplitInfo; + std::vector peTempTypes; + + // These are needed in cases we have 2 vec2 texcoords combined into one vec4 and they are accessed together. + std::vector acInputNeedsRedirect; // If 0xff, requires re-routing all reads via a combined vec4. If 0xfe, the same but the vec4 has already been declared. + std::vector acOutputNeedsRedirect; // Same for outputs + std::vector acPatchConstantsNeedsRedirect; // Same for patch constants + + // Get the Control Flow Graph for this phase, build it if necessary. + HLSLcc::ControlFlow::ControlFlowGraph &GetCFG(); + + uint32_t m_NextFreeTempRegister; // A counter for creating new temporaries for for-loops. + uint32_t m_NextTexCoordTemp; // A counter for creating tex coord temps for driver issue workarounds + +private: + bool m_CFGInitialized; + HLSLcc::ControlFlow::ControlFlowGraph m_CFG; +}; + +class Shader +{ +public: + + Shader() + : + ui32MajorVersion(0), + ui32MinorVersion(0), + eShaderType(INVALID_SHADER), + eTargetLanguage(LANG_DEFAULT), + extensions(0), + fp64(0), + ui32ShaderLength(0), + aui32FuncTableToFuncPointer(), + aui32FuncBodyToFuncTable(), + funcTable(), + funcPointer(), + ui32NextClassFuncName(), + pui32FirstToken(NULL), + asPhases(), + sInfo(), + abScalarInput(), + abScalarOutput(), + aIndexedInput(), + aIndexedOutput(), + aIndexedInputParents(), + aeResourceDims(), + acInputDeclared(), + acOutputDeclared(), + aiOpcodeUsed(NUM_OPCODES, 0), + ui32CurrentVertexOutputStream(0), + textureSamplers(), + aui32StructuredBufferBindingPoints(MAX_RESOURCE_BINDINGS, 0), + ui32CurrentStructuredBufferIndex(), + m_CubemapArrayExtensionDeclared(false), + m_TextureBufferExtensionDeclared(false), + m_ClipDistanceExtensionDeclared(false) + { + } + + // Retrieve the number of components the temp register has. + uint32_t GetTempComponentCount(SHADER_VARIABLE_TYPE eType, uint32_t ui32Reg) const; + + //Hull shaders have multiple phases. + //Each phase has its own temps. + //Convert from per-phase temps to global temps. + void ConsolidateHullTempVars(); + + // Go through all declarations and remove UAV occupied binding points from the aui32StructuredBufferBindingPoints list + void ResolveStructuredBufferBindingSlots(ShaderPhase *psPhase); + + // HLSL has separate register spaces for UAV and structured buffers. GLSL has shared register space for all buffers. + // The aim here is to preserve the UAV buffer bindings as they are and use remaining binding points for structured buffers. + // In this step make aui32StructuredBufferBindingPoints contain increasingly ordered uints starting from zero. + void PrepareStructuredBufferBindingSlots(); + + // Detect temp registers per data type that are actually used. + void PruneTempRegisters(); + + // Check if inputs and outputs are accessed across semantic boundaries + // as in, 2x texcoord vec2's are packed together as vec4 but still accessed together. + void AnalyzeIOOverlap(); + + // Change all references to vertex position to always be highp, having them be mediump causes problems on Metal and Vivante GPUs. + void ForcePositionToHighp(); + + void FindUnusedGlobals(uint32_t flags); // Finds the DCL_CONSTANT_BUFFER with name "$Globals" and searches through all usages for each member of it and mark if they're actually ever used. + + void ExpandSWAPCs(); + + uint32_t ui32MajorVersion; + uint32_t ui32MinorVersion; + SHADER_TYPE eShaderType; + + GLLang eTargetLanguage; + const struct GlExtensions *extensions; + + int fp64; + + //DWORDs in program code, including version and length tokens. + uint32_t ui32ShaderLength; + + + //Instruction* functions;//non-main subroutines + HLSLcc::growing_vector aui32FuncTableToFuncPointer; // dynamic alloc? + HLSLcc::growing_vector aui32FuncBodyToFuncTable; + + struct FuncTableEntry{ + HLSLcc::growing_vector aui32FuncBodies; + }; + HLSLcc::growing_vector funcTable; + + struct FuncPointerEntry { + HLSLcc::growing_vector aui32FuncTables; + uint32_t ui32NumBodiesPerTable; + }; + + HLSLcc::growing_vector funcPointer; + + HLSLcc::growing_vector ui32NextClassFuncName; + + const uint32_t* pui32FirstToken;//Reference for calculating current position in token stream. + + std::vector asPhases; + + ShaderInfo sInfo; + + // There are 2 input/output register spaces in DX bytecode: one for per-patch data and one for per-vertex. + // Which one is used depends on the context: + // per-vertex space is used in vertex/pixel/geom shaders always + // hull shader control point phase uses per-vertex by default, other phases are per-patch by default (can access per-vertex with OPERAND_TYPE_I/O_CONTROL_POINT) + // domain shader is per-patch by default, can access per-vertex with OPERAND_TYPE_I/O_CONTROL_POINT + + // Below, the [2] is accessed with 0 == per-vertex, 1 == per-patch + // Note that these ints are component masks + HLSLcc::growing_vector abScalarInput[2]; + HLSLcc::growing_vector abScalarOutput[2]; + + HLSLcc::growing_vector aIndexedInput[2]; + HLSLcc::growing_vector aIndexedOutput[2]; + + HLSLcc::growing_vector aIndexedInputParents[2]; + + HLSLcc::growing_vector aeResourceDims; + + HLSLcc::growing_vector acInputDeclared[2]; + HLSLcc::growing_vector acOutputDeclared[2]; + + std::vector aiOpcodeUsed; // Initialized to NUM_OPCODES elements above. + + uint32_t ui32CurrentVertexOutputStream; + + TextureSamplerPairs textureSamplers; + + std::vector aui32StructuredBufferBindingPoints; + uint32_t ui32CurrentStructuredBufferIndex; + + bool m_CubemapArrayExtensionDeclared; + bool m_TextureBufferExtensionDeclared; + bool m_ClipDistanceExtensionDeclared; + + std::vector psIntTempSizes; // Array for whether this temp register needs declaration as int temp + std::vector psInt16TempSizes; // min16ints + std::vector psInt12TempSizes; // min12ints + std::vector psUIntTempSizes; // Same for uints + std::vector psUInt16TempSizes; // ... and for uint16's + std::vector psFloatTempSizes; // ...and for floats + std::vector psFloat16TempSizes; // ...and for min16floats + std::vector psFloat10TempSizes; // ...and for min10floats + std::vector psDoubleTempSizes; // ...and for doubles + std::vector psBoolTempSizes; // ... and for bools + +private: + void DoIOOverlapOperand(ShaderPhase *psPhase, Operand *psOperand); + +}; diff --git a/src/internal_includes/Translator.h b/src/internal_includes/Translator.h new file mode 100644 index 0000000..7650985 --- /dev/null +++ b/src/internal_includes/Translator.h @@ -0,0 +1,35 @@ + +#pragma once +#include "HLSLCrossCompilerContext.h" +#include "Shader.h" + +struct Declaration; +// Base class for translator backend implenentations. +class Translator +{ +protected: + HLSLCrossCompilerContext *psContext; +public: + explicit Translator(HLSLCrossCompilerContext *ctx) : psContext(ctx) {} + virtual ~Translator() {} + + virtual bool Translate() = 0; + + virtual void TranslateDeclaration(const Declaration *psDecl) = 0; + + // Translate system value type to name, return true if succeeded and no further translation is necessary + virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL) = 0; + + // In GLSL, the input and output names cannot clash. + // Also, the output name of previous stage must match the input name of the next stage. + // So, do gymnastics depending on which shader we're running on and which other shaders exist in this program. + // + virtual void SetIOPrefixes() = 0; + + void SetExtensions(const struct GlExtensions *ext) + { + psContext->psShader->extensions = ext; + } + + +}; \ No newline at end of file diff --git a/src/internal_includes/UseDefineChains.h b/src/internal_includes/UseDefineChains.h new file mode 100644 index 0000000..3d28279 --- /dev/null +++ b/src/internal_includes/UseDefineChains.h @@ -0,0 +1,141 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +struct DefineUseChainEntry; +struct UseDefineChainEntry; + +typedef std::set DefineSet; +typedef std::set UsageSet; + +struct Instruction; +class Operand; +class ShaderInfo; +namespace HLSLcc +{ + namespace ControlFlow + { + class ControlFlowGraph; + }; +}; + + +// Def-Use chain per temp component +struct DefineUseChainEntry +{ + DefineUseChainEntry() + : psInst(0) + , psOp(0) + , usages() + , writeMask(0) + , index(0) + , isStandalone(0) + { + memset(psSiblings, 0, 4 * sizeof(DefineUseChainEntry *)); + } + + Instruction *psInst; // The declaration (write to this temp component) + Operand *psOp; // The operand within this instruction for the write target + UsageSet usages; // List of usages that are dependent on this write + uint32_t writeMask; // Access mask; which all components were written to in the same op + uint32_t index; // For which component was this definition created for? + uint32_t isStandalone; // A shortcut for analysis: if nonzero, all siblings of all usages for both this and all this siblings + struct DefineUseChainEntry *psSiblings[4]; // In case of vectorized op, contains pointer to this define's corresponding entries for the other components. + +#if _DEBUG + bool operator==(const DefineUseChainEntry &a) const + { + if (psInst != a.psInst) + return false; + if (psOp != a.psOp) + return false; + if (writeMask != a.writeMask) + return false; + if (index != a.index) + return false; + if (isStandalone != a.isStandalone) + return false; + + // Just check that each one has the same amount of usages + if (usages.size() != a.usages.size()) + return false; + + return true; + } + +#endif + +}; + +typedef std::list DefineUseChain; + +struct UseDefineChainEntry +{ + UseDefineChainEntry() + : psInst(0) + , psOp(0) + , defines() + , accessMask(0) + , index(0) + { + memset(psSiblings, 0, 4 * sizeof(UseDefineChainEntry *)); + } + + Instruction *psInst; // The use (read from this temp component) + Operand *psOp; // The operand within this instruction for the read + DefineSet defines; // List of writes that are visible to this read + uint32_t accessMask; // Which all components were read together with this one + uint32_t index; // For which component was this usage created for? + struct UseDefineChainEntry *psSiblings[4]; // In case of vectorized op, contains pointer to this usage's corresponding entries for the other components. + +#if _DEBUG + bool operator==(const UseDefineChainEntry &a) const + { + if (psInst != a.psInst) + return false; + if (psOp != a.psOp) + return false; + if (accessMask != a.accessMask) + return false; + if (index != a.index) + return false; + + // Just check that each one has the same amount of usages + if (defines.size() != a.defines.size()) + return false; + + return true; + } + +#endif + +}; + +typedef std::list UseDefineChain; + +typedef std::map UseDefineChains; +typedef std::map DefineUseChains; +typedef std::vector ActiveDefinitions; + +// Do flow control analysis on the instructions and build the define-use and use-define chains +void BuildUseDefineChains(std::vector &instructions, uint32_t ui32NumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, HLSLcc::ControlFlow::ControlFlowGraph &cfg); + +// Do temp splitting based on use-define chains +void UDSplitTemps(uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector &pui32SplitTable); + +// Based on the sampler precisions, downgrade the definitions if possible. +void UpdateSamplerPrecisions(const ShaderInfo &psContext, DefineUseChains &psDUChains, uint32_t ui32NumTemps); + +// Optimization pass for successive passes: Mark Operand->isStandalone for definitions that are "standalone": all usages (and all their sibligns) of this and all its siblings only see this definition. +void CalculateStandaloneDefinitions(DefineUseChains &psDUChains, uint32_t ui32NumTemps); + +// Write the uses and defines back to Instruction and Operand member lists. +void WriteBackUsesAndDefines(DefineUseChains &psDUChains); + diff --git a/src/internal_includes/debug.h b/src/internal_includes/debug.h new file mode 100644 index 0000000..759f4f0 --- /dev/null +++ b/src/internal_includes/debug.h @@ -0,0 +1,18 @@ +#ifndef DEBUG_H_ +#define DEBUG_H_ + +#ifdef _DEBUG +#include "assert.h" +#define ASSERT(expr) CustomAssert(expr) +static void CustomAssert(int expression) +{ + if(!expression) + { + assert(0); + } +} +#else +#define ASSERT(expr) +#endif + +#endif diff --git a/src/internal_includes/decode.h b/src/internal_includes/decode.h new file mode 100644 index 0000000..331cca4 --- /dev/null +++ b/src/internal_includes/decode.h @@ -0,0 +1,10 @@ +#ifndef DECODE_H +#define DECODE_H + +#include "internal_includes/Shader.h" + +Shader* DecodeDXBC(uint32_t* data, uint32_t decodeFlags); + +void UpdateOperandReferences(Shader* psShader, SHADER_PHASE_TYPE eShaderPhaseType, Instruction* psInst); + +#endif diff --git a/src/internal_includes/languages.h b/src/internal_includes/languages.h new file mode 100644 index 0000000..aa7e78c --- /dev/null +++ b/src/internal_includes/languages.h @@ -0,0 +1,249 @@ +#ifndef LANGUAGES_H +#define LANGUAGES_H + +#include "hlslcc.h" + +static int InOutSupported(const GLLang eLang) +{ + if(eLang == LANG_ES_100 || eLang == LANG_120) + { + return 0; + } + return 1; +} + +static int WriteToFragData(const GLLang eLang) +{ + if(eLang == LANG_ES_100 || eLang == LANG_120) + { + return 1; + } + return 0; +} + +static int ShaderBitEncodingSupported(const GLLang eLang) +{ + if( eLang != LANG_ES_300 && + eLang != LANG_ES_310 && + eLang < LANG_330) + { + return 0; + } + return 1; +} + +static int HaveOverloadedTextureFuncs(const GLLang eLang) +{ + if(eLang == LANG_ES_100 || eLang == LANG_120) + { + return 0; + } + return 1; +} + +//Only enable for ES. +//Not present in 120, ignored in other desktop languages. +static int HavePrecisionQualifers(const GLLang eLang) +{ + if(eLang >= LANG_ES_100 && eLang <= LANG_ES_310) + { + return 1; + } + return 0; +} + +static int HaveCubemapArray(const GLLang eLang) +{ + if (eLang >= LANG_400 && eLang <= LANG_GL_LAST) + return 1; + return 0; +} + +static bool IsESLanguage(const GLLang eLang) +{ + return (eLang >= LANG_ES_FIRST && eLang <= LANG_ES_LAST); +} + +static bool IsDesktopGLLanguage(const GLLang eLang) +{ + return (eLang >= LANG_GL_FIRST && eLang <= LANG_GL_LAST); +} + +//Only on vertex inputs and pixel outputs. +static int HaveLimitedInOutLocationQualifier(const GLLang eLang, const struct GlExtensions *extensions) +{ + if(eLang >= LANG_330 || eLang == LANG_ES_300 || eLang == LANG_ES_310 || (extensions && ((struct GlExtensions*)extensions)->ARB_explicit_attrib_location)) + { + return 1; + } + return 0; +} + +static int HaveInOutLocationQualifier(const GLLang eLang) +{ + if(eLang >= LANG_410 || eLang == LANG_ES_310) + { + return 1; + } + return 0; +} + +//layout(binding = X) uniform {uniformA; uniformB;} +//layout(location = X) uniform uniform_name; +static int HaveUniformBindingsAndLocations(const GLLang eLang,const struct GlExtensions *extensions, unsigned int flags) +{ + if (flags & HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS) + return 0; + + if (eLang >= LANG_430 || eLang == LANG_ES_310 || + (extensions && ((struct GlExtensions*)extensions)->ARB_explicit_uniform_location && ((struct GlExtensions*)extensions)->ARB_shading_language_420pack)) + { + return 1; + } + return 0; +} + +static int DualSourceBlendSupported(const GLLang eLang) +{ + if(eLang >= LANG_330) + { + return 1; + } + return 0; +} + +static int SubroutinesSupported(const GLLang eLang) +{ + if(eLang >= LANG_400) + { + return 1; + } + return 0; +} + +//Before 430, flat/smooth/centroid/noperspective must match +//between fragment and its previous stage. +//HLSL bytecode only tells us the interpolation in pixel shader. +static int PixelInterpDependency(const GLLang eLang) +{ + if(eLang < LANG_430) + { + return 1; + } + return 0; +} + +static int HaveUVec(const GLLang eLang) +{ + switch(eLang) + { + case LANG_ES_100: + case LANG_120: + return 0; + default: + break; + } + return 1; +} + +static int HaveGather(const GLLang eLang) +{ + if(eLang >= LANG_400 || eLang == LANG_ES_310) + { + return 1; + } + return 0; +} + +static int HaveGatherNonConstOffset(const GLLang eLang) +{ + if(eLang >= LANG_420 || eLang == LANG_ES_310) + { + return 1; + } + return 0; +} + + +static int HaveQueryLod(const GLLang eLang) +{ + if(eLang >= LANG_400) + { + return 1; + } + return 0; +} + +static int HaveQueryLevels(const GLLang eLang) +{ + if(eLang >= LANG_430) + { + return 1; + } + return 0; +} + +static int HaveFragmentCoordConventions(const GLLang eLang) +{ + if(eLang >= LANG_150) + { + return 1; + } + return 0; +} + +static int HaveGeometryShaderARB(const GLLang eLang) +{ + if(eLang >= LANG_150) + { + return 1; + } + return 0; +} + +static int HaveAtomicCounter(const GLLang eLang) +{ + if(eLang >= LANG_420 || eLang == LANG_ES_310) + { + return 1; + } + return 0; +} + +static int HaveAtomicMem(const GLLang eLang) +{ + if (eLang >= LANG_430 || eLang == LANG_ES_310) + { + return 1; + } + return 0; +} + +static int HaveImageAtomics(const GLLang eLang) +{ + if (eLang >= LANG_420) + { + return 1; + } + return 0; +} + +static int HaveCompute(const GLLang eLang) +{ + if(eLang >= LANG_430 || eLang == LANG_ES_310) + { + return 1; + } + return 0; +} + +static int HaveImageLoadStore(const GLLang eLang) +{ + if(eLang >= LANG_420 || eLang == LANG_ES_310) + { + return 1; + } + return 0; +} + +#endif diff --git a/src/internal_includes/reflect.h b/src/internal_includes/reflect.h new file mode 100644 index 0000000..e7c801d --- /dev/null +++ b/src/internal_includes/reflect.h @@ -0,0 +1,27 @@ +#ifndef REFLECT_H +#define REFLECT_H + +#include "hlslcc.h" + +struct ShaderPhase_TAG; + +typedef struct +{ + uint32_t* pui32Inputs; + uint32_t* pui32Outputs; + uint32_t* pui32Resources; + uint32_t* pui32Interfaces; + uint32_t* pui32Inputs11; + uint32_t* pui32Outputs11; + uint32_t* pui32OutputsWithStreams; + uint32_t* pui32PatchConstants; + uint32_t* pui32PatchConstants11; +} ReflectionChunks; + +void LoadShaderInfo(const uint32_t ui32MajorVersion, + const uint32_t ui32MinorVersion, + const ReflectionChunks* psChunks, + ShaderInfo* psInfo, uint32_t decodeFlags); + +#endif + diff --git a/src/internal_includes/toGLSL.h b/src/internal_includes/toGLSL.h new file mode 100644 index 0000000..5ba58a4 --- /dev/null +++ b/src/internal_includes/toGLSL.h @@ -0,0 +1,107 @@ +#pragma once + +#include "hlslcc.h" +#include "internal_includes/Translator.h" + +class HLSLCrossCompilerContext; + +class ToGLSL : public Translator +{ +protected: + GLLang language; +public: + explicit ToGLSL(HLSLCrossCompilerContext *ctx) : Translator(ctx), language(LANG_DEFAULT) {} + // Sets the target language according to given input. if LANG_DEFAULT, does autodetect and returns the selected language + GLLang SetLanguage(GLLang suggestedLanguage); + + virtual bool Translate(); + virtual void TranslateDeclaration(const Declaration* psDecl); + virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL); + virtual void SetIOPrefixes(); + +private: + + void TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL); + void TranslateInstruction(Instruction* psInst, bool isEmbedded = false); + + void TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase); + + void TranslateOperandIndex(const Operand* psOperand, int index); + void TranslateOperandIndexMAD(const Operand* psOperand, int index, uint32_t multiply, uint32_t add); + + void AddOpAssignToDestWithMask(const Operand* psDest, + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask); + void AddAssignToDest(const Operand* psDest, + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis); + void AddAssignPrologue(int numParenthesis, bool isEmbedded = false); + + void AddBuiltinOutput(const Declaration* psDecl, int arrayElements, const char* builtinName); + void HandleOutputRedirect(const Declaration *psDecl, const char *Precision); + void HandleInputRedirect(const Declaration *psDecl, const char *Precision); + + void AddUserOutput(const Declaration* psDecl); + void DeclareStructConstants(const uint32_t ui32BindingPoint, + const ConstantBuffer* psCBuf, const Operand* psOperand, + bstring glsl); + + typedef enum + { + CMP_EQ, + CMP_LT, + CMP_GE, + CMP_NE, + } ComparisonType; + + void AddComparison(Instruction* psInst, ComparisonType eType, + uint32_t typeFlag); + + void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, bool isEmbedded = false); + void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2); + void CallBinaryOp(const char* name, Instruction* psInst, + int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType, bool isEmbedded = false); + void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, + int dest, int src0, int src1, int src2, uint32_t dataType); + void CallHelper3(const char* name, Instruction* psInst, + int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask); + void CallHelper2(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask); + void CallHelper2Int(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask); + void CallHelper2UInt(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask); + void CallHelper1(const char* name, Instruction* psInst, + int dest, int src0, int paramsShouldFollowWriteMask); + void CallHelper1Int( + const char* name, + Instruction* psInst, + const int dest, + const int src0, + int paramsShouldFollowWriteMask); + void TranslateTexelFetch( + Instruction* psInst, + const ResourceBinding* psBinding, + bstring glsl); + void TranslateTexelFetchOffset( + Instruction* psInst, + const ResourceBinding* psBinding, + bstring glsl); + void TranslateTexCoord( + const RESOURCE_DIMENSION eResDim, + Operand* psTexCoordOperand); + void GetResInfoData(Instruction* psInst, int index, int destElem); + void TranslateTextureSample(Instruction* psInst, + uint32_t ui32Flags); + void TranslateDynamicComponentSelection(const ShaderVarType* psVarType, + const Operand* psByteAddr, uint32_t offset, uint32_t mask); + void TranslateShaderStorageStore(Instruction* psInst); + void TranslateShaderStorageLoad(Instruction* psInst); + void TranslateAtomicMemOp(Instruction* psInst); + void TranslateConditional( + Instruction* psInst, + bstring glsl); + +}; + + + + diff --git a/src/internal_includes/toGLSLOperand.h b/src/internal_includes/toGLSLOperand.h new file mode 100644 index 0000000..1a643ad --- /dev/null +++ b/src/internal_includes/toGLSLOperand.h @@ -0,0 +1,23 @@ +#ifndef TO_GLSL_OPERAND_H +#define TO_GLSL_OPERAND_H + +#include +#include "bstrlib.h" +#include "ShaderInfo.h" + +class HLSLCrossCompilerContext; + +//void TranslateOperand(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32TOFlag); +// Translate operand but add additional component mask +//void TranslateOperandWithMask(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask); + +void TranslateOperandSwizzle(HLSLCrossCompilerContext* psContext, const Operand* psOperand, int iRebase); +void TranslateOperandSwizzleWithMask(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase); + +void ResourceName(bstring targetStr, HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare); +std::string ResourceName(HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare); + +std::string TextureSamplerName(ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare); +void ConcatTextureSamplerName(bstring str, ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare); + +#endif diff --git a/src/internal_includes/toMetal.h b/src/internal_includes/toMetal.h new file mode 100644 index 0000000..4d47e8f --- /dev/null +++ b/src/internal_includes/toMetal.h @@ -0,0 +1,193 @@ + +#pragma once +#include "internal_includes/Translator.h" +#include +#include + +// We store struct definition contents inside a vector of strings +struct StructDefinition +{ + StructDefinition() : m_Members(), m_Dependencies(), m_IsPrinted(false) {} + + std::vector m_Members; // A vector of strings with the struct members + std::vector m_Dependencies; // A vector of struct names this struct depends on. + bool m_IsPrinted; // Has this struct been printed out yet? +}; + +typedef std::map StructDefinitions; + +// Map of extra function definitions we need to add before the shader body but after the declarations. +typedef std::map FunctionDefinitions; + +// A helper class for allocating binding slots +// (because both UAVs and textures use the same slots in Metal, also constant buffers and other buffers etc) +class BindingSlotAllocator +{ + typedef std::map SlotMap; + SlotMap m_Allocations; +public: + BindingSlotAllocator() : m_Allocations(), m_NextFreeSlot(0) {} + + enum BindType + { + ConstantBuffer = 0, + RWBuffer, + Texture, + UAV + }; + + // isUAV is only meaningful for texture slots + + uint32_t GetBindingSlot(uint32_t regNo, BindType type) + { + // The key is regNumber with the bindtype stored to highest 16 bits + uint32_t key = regNo | (uint32_t(type) << 16); + SlotMap::iterator itr = m_Allocations.find(key); + if (itr == m_Allocations.end()) + { + m_Allocations.insert(std::make_pair(key, m_NextFreeSlot)); + return m_NextFreeSlot++; + } + return itr->second; + } + +private: + uint32_t m_NextFreeSlot; +}; + + +class ToMetal : public Translator +{ +protected: + GLLang language; +public: + explicit ToMetal(HLSLCrossCompilerContext *ctx) : Translator(ctx), m_ShadowSamplerDeclared(false) {} + + virtual bool Translate(); + virtual void TranslateDeclaration(const Declaration *psDecl); + virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL); + std::string TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL); + + virtual void SetIOPrefixes(); + +private: + void TranslateInstruction(Instruction* psInst); + + void DeclareBuiltinInput(const Declaration *psDecl); + void DeclareBuiltinOutput(const Declaration *psDecl); + + // Retrieve the name of the output struct for this shader + std::string GetOutputStructName() const; + std::string GetInputStructName() const; + + void HandleInputRedirect(const Declaration *psDecl, const std::string &typeName); + void HandleOutputRedirect(const Declaration *psDecl, const std::string &typeName); + + void DeclareConstantBuffer(const ConstantBuffer *psCBuf, uint32_t ui32BindingPoint); + void DeclareStructType(const std::string &name, const std::vector &contents, bool withinCB = false, uint32_t cumulativeOffset = 0, bool stripUnused = false); + void DeclareStructType(const std::string &name, const std::vector &contents, bool withinCB = false, uint32_t cumulativeOffset = 0); + void DeclareStructVariable(const std::string &parentName, const ShaderVar &var, bool withinCB = false, uint32_t cumulativeOffset = 0); + void DeclareStructVariable(const std::string &parentName, const ShaderVarType &var, bool withinCB = false, uint32_t cumulativeOffset = 0); + void DeclareBufferVariable(const Declaration *psDecl, const bool isRaw, const bool isUAV); + + void DeclareResource(const Declaration *psDecl); + void TranslateResourceTexture(const Declaration* psDecl, uint32_t samplerCanDoShadowCmp, HLSLCC_TEX_DIMENSION texDim); + + void DeclareOutput(const Declaration *decl); + + void PrintStructDeclarations(StructDefinitions &defs); + + std::string ResourceName(ResourceGroup group, const uint32_t ui32RegisterNumber); + + // ToMetalOperand.cpp + std::string TranslateOperandSwizzle(const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase, bool includeDot = true); + std::string TranslateOperandIndex(const Operand* psOperand, int index); + std::string TranslateVariableName(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase); + + // ToMetalInstruction.cpp + + void AddOpAssignToDestWithMask(const Operand* psDest, + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask); + void AddAssignToDest(const Operand* psDest, + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis); + void AddAssignPrologue(int numParenthesis); + + typedef enum + { + CMP_EQ, + CMP_LT, + CMP_GE, + CMP_NE, + } ComparisonType; + + void AddComparison(Instruction* psInst, ComparisonType eType, + uint32_t typeFlag); + + void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc); + void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2); + void CallBinaryOp(const char* name, Instruction* psInst, + int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType); + void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, + int dest, int src0, int src1, int src2, uint32_t dataType); + void CallHelper3(const char* name, Instruction* psInst, + int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask); + void CallHelper2(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask); + void CallHelper2Int(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask); + void CallHelper2UInt(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask); + void CallHelper1(const char* name, Instruction* psInst, + int dest, int src0, int paramsShouldFollowWriteMask); + void CallHelper1Int( + const char* name, + Instruction* psInst, + const int dest, + const int src0, + int paramsShouldFollowWriteMask); + void TranslateTexelFetch( + Instruction* psInst, + const ResourceBinding* psBinding, + bstring glsl); + void TranslateTexelFetchOffset( + Instruction* psInst, + const ResourceBinding* psBinding, + bstring glsl); + void TranslateTexCoord( + const RESOURCE_DIMENSION eResDim, + Operand* psTexCoordOperand); + void GetResInfoData(Instruction* psInst, int index, int destElem); + void TranslateTextureSample(Instruction* psInst, + uint32_t ui32Flags); + void TranslateDynamicComponentSelection(const ShaderVarType* psVarType, + const Operand* psByteAddr, uint32_t offset, uint32_t mask); + void TranslateShaderStorageStore(Instruction* psInst); + void TranslateShaderStorageLoad(Instruction* psInst); + void TranslateAtomicMemOp(Instruction* psInst); + void TranslateConditional( + Instruction* psInst, + bstring glsl); + + // The map is keyed by struct name. The special name "" (empty string) is reserved for entry point function parameters + StructDefinitions m_StructDefinitions; + + // A map of extra helper functions we'll need. + FunctionDefinitions m_FunctionDefinitions; + + BindingSlotAllocator m_TextureSlots; + BindingSlotAllocator m_BufferSlots; + + std::string m_ExtraGlobalDefinitions; + + bool m_ShadowSamplerDeclared; + + void EnsureShadowSamplerDeclared(); + + // Add an extra function to the m_FunctionDefinitions list, unless it's already there. + void DeclareExtraFunction(const std::string &name, const std::string &body); + + // Move all lowp -> mediump + void ClampPartialPrecisions(); +}; + + diff --git a/src/internal_includes/toMetalDeclaration.h b/src/internal_includes/toMetalDeclaration.h new file mode 100644 index 0000000..f51f48c --- /dev/null +++ b/src/internal_includes/toMetalDeclaration.h @@ -0,0 +1,3 @@ +#pragma once + +#include "internal_includes/Declaration.h" \ No newline at end of file diff --git a/src/internal_includes/tokens.h b/src/internal_includes/tokens.h new file mode 100644 index 0000000..d602f75 --- /dev/null +++ b/src/internal_includes/tokens.h @@ -0,0 +1,783 @@ +#ifndef TOKENS_H +#define TOKENS_H + +#include "hlslcc.h" + +enum SHADER_PHASE_TYPE +{ + SHADER_PHASE_INVALID = -1, + MAIN_PHASE = 0, + HS_GLOBAL_DECL_PHASE = 1, + HS_CTRL_POINT_PHASE = 2, + HS_FORK_PHASE = 3, + HS_JOIN_PHASE = 4 +}; + +static SHADER_TYPE DecodeShaderType(uint32_t ui32Token) +{ + return (SHADER_TYPE)((ui32Token & 0xffff0000) >> 16); +} + +static uint32_t DecodeProgramMajorVersion(uint32_t ui32Token) +{ + return (ui32Token & 0x000000f0) >> 4; +} + +static uint32_t DecodeProgramMinorVersion(uint32_t ui32Token) +{ + return (ui32Token & 0x0000000f); +} + +static uint32_t DecodeInstructionLength(uint32_t ui32Token) +{ + return (ui32Token & 0x7f000000) >> 24; +} + +static uint32_t DecodeIsOpcodeExtended(uint32_t ui32Token) +{ + return (ui32Token & 0x80000000) >> 31; +} + +typedef enum EXTENDED_OPCODE_TYPE +{ + EXTENDED_OPCODE_EMPTY = 0, + EXTENDED_OPCODE_SAMPLE_CONTROLS = 1, + EXTENDED_OPCODE_RESOURCE_DIM = 2, + EXTENDED_OPCODE_RESOURCE_RETURN_TYPE = 3, +} EXTENDED_OPCODE_TYPE; + +static EXTENDED_OPCODE_TYPE DecodeExtendedOpcodeType(uint32_t ui32Token) +{ + return (EXTENDED_OPCODE_TYPE)(ui32Token & 0x0000003f); +} + + +static RESOURCE_RETURN_TYPE DecodeResourceReturnType(uint32_t ui32Coord, uint32_t ui32Token) +{ + return (RESOURCE_RETURN_TYPE)((ui32Token>>(ui32Coord * 4))&0xF); +} + +static RESOURCE_RETURN_TYPE DecodeExtendedResourceReturnType(uint32_t ui32Coord, uint32_t ui32Token) +{ + return (RESOURCE_RETURN_TYPE)((ui32Token>>(ui32Coord * 4 + 6))&0xF); +} + +enum OPCODE_TYPE +{ + //For DX9 + OPCODE_POW = -6, + OPCODE_DP2ADD = -5, + OPCODE_LRP = -4, + OPCODE_ENDREP = -3, + OPCODE_REP = -2, + OPCODE_SPECIAL_DCL_IMMCONST = -1, + + OPCODE_ADD, + OPCODE_AND, + OPCODE_BREAK, + OPCODE_BREAKC, + OPCODE_CALL, + OPCODE_CALLC, + OPCODE_CASE, + OPCODE_CONTINUE, + OPCODE_CONTINUEC, + OPCODE_CUT, + OPCODE_DEFAULT, + OPCODE_DERIV_RTX, + OPCODE_DERIV_RTY, + OPCODE_DISCARD, + OPCODE_DIV, + OPCODE_DP2, + OPCODE_DP3, + OPCODE_DP4, + OPCODE_ELSE, + OPCODE_EMIT, + OPCODE_EMITTHENCUT, + OPCODE_ENDIF, + OPCODE_ENDLOOP, + OPCODE_ENDSWITCH, + OPCODE_EQ, + OPCODE_EXP, + OPCODE_FRC, + OPCODE_FTOI, + OPCODE_FTOU, + OPCODE_GE, + OPCODE_IADD, + OPCODE_IF, + OPCODE_IEQ, + OPCODE_IGE, + OPCODE_ILT, + OPCODE_IMAD, + OPCODE_IMAX, + OPCODE_IMIN, + OPCODE_IMUL, + OPCODE_INE, + OPCODE_INEG, + OPCODE_ISHL, + OPCODE_ISHR, + OPCODE_ITOF, + OPCODE_LABEL, + OPCODE_LD, + OPCODE_LD_MS, + OPCODE_LOG, + OPCODE_LOOP, + OPCODE_LT, + OPCODE_MAD, + OPCODE_MIN, + OPCODE_MAX, + OPCODE_CUSTOMDATA, + OPCODE_MOV, + OPCODE_MOVC, + OPCODE_MUL, + OPCODE_NE, + OPCODE_NOP, + OPCODE_NOT, + OPCODE_OR, + OPCODE_RESINFO, + OPCODE_RET, + OPCODE_RETC, + OPCODE_ROUND_NE, + OPCODE_ROUND_NI, + OPCODE_ROUND_PI, + OPCODE_ROUND_Z, + OPCODE_RSQ, + OPCODE_SAMPLE, + OPCODE_SAMPLE_C, + OPCODE_SAMPLE_C_LZ, + OPCODE_SAMPLE_L, + OPCODE_SAMPLE_D, + OPCODE_SAMPLE_B, + OPCODE_SQRT, + OPCODE_SWITCH, + OPCODE_SINCOS, + OPCODE_UDIV, + OPCODE_ULT, + OPCODE_UGE, + OPCODE_UMUL, + OPCODE_UMAD, + OPCODE_UMAX, + OPCODE_UMIN, + OPCODE_USHR, + OPCODE_UTOF, + OPCODE_XOR, + OPCODE_DCL_RESOURCE, // DCL* opcodes have + OPCODE_DCL_CONSTANT_BUFFER, // custom operand formats. + OPCODE_DCL_SAMPLER, + OPCODE_DCL_INDEX_RANGE, + OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY, + OPCODE_DCL_GS_INPUT_PRIMITIVE, + OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT, + OPCODE_DCL_INPUT, + OPCODE_DCL_INPUT_SGV, + OPCODE_DCL_INPUT_SIV, + OPCODE_DCL_INPUT_PS, + OPCODE_DCL_INPUT_PS_SGV, + OPCODE_DCL_INPUT_PS_SIV, + OPCODE_DCL_OUTPUT, + OPCODE_DCL_OUTPUT_SGV, + OPCODE_DCL_OUTPUT_SIV, + OPCODE_DCL_TEMPS, + OPCODE_DCL_INDEXABLE_TEMP, + OPCODE_DCL_GLOBAL_FLAGS, + +// ----------------------------------------------- + + OPCODE_RESERVED_10, + +// ---------- DX 10.1 op codes--------------------- + + OPCODE_LOD, + OPCODE_GATHER4, + OPCODE_SAMPLE_POS, + OPCODE_SAMPLE_INFO, + +// ----------------------------------------------- + + // This should be 10.1's version of NUM_OPCODES + OPCODE_RESERVED_10_1, + +// ---------- DX 11 op codes--------------------- + OPCODE_HS_DECLS, // token marks beginning of HS sub-shader + OPCODE_HS_CONTROL_POINT_PHASE, // token marks beginning of HS sub-shader + OPCODE_HS_FORK_PHASE, // token marks beginning of HS sub-shader + OPCODE_HS_JOIN_PHASE, // token marks beginning of HS sub-shader + + OPCODE_EMIT_STREAM, + OPCODE_CUT_STREAM, + OPCODE_EMITTHENCUT_STREAM, + OPCODE_INTERFACE_CALL, + + OPCODE_BUFINFO, + OPCODE_DERIV_RTX_COARSE, + OPCODE_DERIV_RTX_FINE, + OPCODE_DERIV_RTY_COARSE, + OPCODE_DERIV_RTY_FINE, + OPCODE_GATHER4_C, + OPCODE_GATHER4_PO, + OPCODE_GATHER4_PO_C, + OPCODE_RCP, + OPCODE_F32TOF16, + OPCODE_F16TOF32, + OPCODE_UADDC, + OPCODE_USUBB, + OPCODE_COUNTBITS, + OPCODE_FIRSTBIT_HI, + OPCODE_FIRSTBIT_LO, + OPCODE_FIRSTBIT_SHI, + OPCODE_UBFE, + OPCODE_IBFE, + OPCODE_BFI, + OPCODE_BFREV, + OPCODE_SWAPC, + + OPCODE_DCL_STREAM, + OPCODE_DCL_FUNCTION_BODY, + OPCODE_DCL_FUNCTION_TABLE, + OPCODE_DCL_INTERFACE, + + OPCODE_DCL_INPUT_CONTROL_POINT_COUNT, + OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT, + OPCODE_DCL_TESS_DOMAIN, + OPCODE_DCL_TESS_PARTITIONING, + OPCODE_DCL_TESS_OUTPUT_PRIMITIVE, + OPCODE_DCL_HS_MAX_TESSFACTOR, + OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT, + OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, + + OPCODE_DCL_THREAD_GROUP, + OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED, + OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW, + OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED, + OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW, + OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED, + OPCODE_DCL_RESOURCE_RAW, + OPCODE_DCL_RESOURCE_STRUCTURED, + OPCODE_LD_UAV_TYPED, + OPCODE_STORE_UAV_TYPED, + OPCODE_LD_RAW, + OPCODE_STORE_RAW, + OPCODE_LD_STRUCTURED, + OPCODE_STORE_STRUCTURED, + OPCODE_ATOMIC_AND, + OPCODE_ATOMIC_OR, + OPCODE_ATOMIC_XOR, + OPCODE_ATOMIC_CMP_STORE, + OPCODE_ATOMIC_IADD, + OPCODE_ATOMIC_IMAX, + OPCODE_ATOMIC_IMIN, + OPCODE_ATOMIC_UMAX, + OPCODE_ATOMIC_UMIN, + OPCODE_IMM_ATOMIC_ALLOC, + OPCODE_IMM_ATOMIC_CONSUME, + OPCODE_IMM_ATOMIC_IADD, + OPCODE_IMM_ATOMIC_AND, + OPCODE_IMM_ATOMIC_OR, + OPCODE_IMM_ATOMIC_XOR, + OPCODE_IMM_ATOMIC_EXCH, + OPCODE_IMM_ATOMIC_CMP_EXCH, + OPCODE_IMM_ATOMIC_IMAX, + OPCODE_IMM_ATOMIC_IMIN, + OPCODE_IMM_ATOMIC_UMAX, + OPCODE_IMM_ATOMIC_UMIN, + OPCODE_SYNC, + + OPCODE_DADD, + OPCODE_DMAX, + OPCODE_DMIN, + OPCODE_DMUL, + OPCODE_DEQ, + OPCODE_DGE, + OPCODE_DLT, + OPCODE_DNE, + OPCODE_DMOV, + OPCODE_DMOVC, + OPCODE_DTOF, + OPCODE_FTOD, + + OPCODE_EVAL_SNAPPED, + OPCODE_EVAL_SAMPLE_INDEX, + OPCODE_EVAL_CENTROID, + + OPCODE_DCL_GS_INSTANCE_COUNT, + + OPCODE_ABORT, + OPCODE_DEBUG_BREAK, + +// ----------------------------------------------- + + // This marks the end of D3D11.0 opcodes + OPCODE_RESERVED_11, + + OPCODE_DDIV, + OPCODE_DFMA, + OPCODE_DRCP, + + OPCODE_MSAD, + + OPCODE_DTOI, + OPCODE_DTOU, + OPCODE_ITOD, + OPCODE_UTOD, + +// ----------------------------------------------- + + // This marks the end of D3D11.1 opcodes + OPCODE_RESERVED_11_1, + + NUM_OPCODES, + OPCODE_INVALID = NUM_OPCODES, +}; + +static OPCODE_TYPE DecodeOpcodeType(uint32_t ui32Token) +{ + return (OPCODE_TYPE)(ui32Token & 0x00007ff); +} + +typedef enum +{ + INDEX_0D, + INDEX_1D, + INDEX_2D, + INDEX_3D, +} OPERAND_INDEX_DIMENSION; + +static OPERAND_INDEX_DIMENSION DecodeOperandIndexDimension(uint32_t ui32Token) +{ + return (OPERAND_INDEX_DIMENSION)((ui32Token & 0x00300000) >> 20); +} + +typedef enum OPERAND_TYPE +{ + OPERAND_TYPE_SPECIAL_LOOPCOUNTER = -10, + OPERAND_TYPE_SPECIAL_IMMCONSTINT = -9, + OPERAND_TYPE_SPECIAL_TEXCOORD = -8, + OPERAND_TYPE_SPECIAL_POSITION = -7, + OPERAND_TYPE_SPECIAL_FOG = -6, + OPERAND_TYPE_SPECIAL_POINTSIZE = -5, + OPERAND_TYPE_SPECIAL_OUTOFFSETCOLOUR = -4, + OPERAND_TYPE_SPECIAL_OUTBASECOLOUR = -3, + OPERAND_TYPE_SPECIAL_ADDRESS = -2, + OPERAND_TYPE_SPECIAL_IMMCONST = -1, + OPERAND_TYPE_TEMP = 0, // Temporary Register File + OPERAND_TYPE_INPUT = 1, // General Input Register File + OPERAND_TYPE_OUTPUT = 2, // General Output Register File + OPERAND_TYPE_INDEXABLE_TEMP = 3, // Temporary Register File (indexable) + OPERAND_TYPE_IMMEDIATE32 = 4, // 32bit/component immediate value(s) + // If for example, operand token bits + // [01:00]==OPERAND_4_COMPONENT, + // this means that the operand type: + // OPERAND_TYPE_IMMEDIATE32 + // results in 4 additional 32bit + // DWORDS present for the operand. + OPERAND_TYPE_IMMEDIATE64 = 5, // 64bit/comp.imm.val(s)HI:LO + OPERAND_TYPE_SAMPLER = 6, // Reference to sampler state + OPERAND_TYPE_RESOURCE = 7, // Reference to memory resource (e.g. texture) + OPERAND_TYPE_CONSTANT_BUFFER= 8, // Reference to constant buffer + OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER= 9, // Reference to immediate constant buffer + OPERAND_TYPE_LABEL = 10, // Label + OPERAND_TYPE_INPUT_PRIMITIVEID = 11, // Input primitive ID + OPERAND_TYPE_OUTPUT_DEPTH = 12, // Output Depth + OPERAND_TYPE_NULL = 13, // Null register, used to discard results of operations + // Below Are operands new in DX 10.1 + OPERAND_TYPE_RASTERIZER = 14, // DX10.1 Rasterizer register, used to denote the depth/stencil and render target resources + OPERAND_TYPE_OUTPUT_COVERAGE_MASK = 15, // DX10.1 PS output MSAA coverage mask (scalar) + // Below Are operands new in DX 11 + OPERAND_TYPE_STREAM = 16, // Reference to GS stream output resource + OPERAND_TYPE_FUNCTION_BODY = 17, // Reference to a function definition + OPERAND_TYPE_FUNCTION_TABLE = 18, // Reference to a set of functions used by a class + OPERAND_TYPE_INTERFACE = 19, // Reference to an interface + OPERAND_TYPE_FUNCTION_INPUT = 20, // Reference to an input parameter to a function + OPERAND_TYPE_FUNCTION_OUTPUT = 21, // Reference to an output parameter to a function + OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID = 22, // HS Control Point phase input saying which output control point ID this is + OPERAND_TYPE_INPUT_FORK_INSTANCE_ID = 23, // HS Fork Phase input instance ID + OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID = 24, // HS Join Phase input instance ID + OPERAND_TYPE_INPUT_CONTROL_POINT = 25, // HS Fork+Join, DS phase input control points (array of them) + OPERAND_TYPE_OUTPUT_CONTROL_POINT = 26, // HS Fork+Join phase output control points (array of them) + OPERAND_TYPE_INPUT_PATCH_CONSTANT = 27, // DS+HSJoin Input Patch Constants (array of them) + OPERAND_TYPE_INPUT_DOMAIN_POINT = 28, // DS Input Domain point + OPERAND_TYPE_THIS_POINTER = 29, // Reference to an interface this pointer + OPERAND_TYPE_UNORDERED_ACCESS_VIEW = 30, // Reference to UAV u# + OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY = 31, // Reference to Thread Group Shared Memory g# + OPERAND_TYPE_INPUT_THREAD_ID = 32, // Compute Shader Thread ID + OPERAND_TYPE_INPUT_THREAD_GROUP_ID = 33, // Compute Shader Thread Group ID + OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP = 34, // Compute Shader Thread ID In Thread Group + OPERAND_TYPE_INPUT_COVERAGE_MASK = 35, // Pixel shader coverage mask input + OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED = 36, // Compute Shader Thread ID In Group Flattened to a 1D value. + OPERAND_TYPE_INPUT_GS_INSTANCE_ID = 37, // Input GS instance ID + OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL = 38, // Output Depth, forced to be greater than or equal than current depth + OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL = 39, // Output Depth, forced to be less than or equal to current depth + OPERAND_TYPE_CYCLE_COUNTER = 40, // Cycle counter +} OPERAND_TYPE; + +static OPERAND_TYPE DecodeOperandType(uint32_t ui32Token) +{ + return (OPERAND_TYPE)((ui32Token & 0x000ff000) >> 12); +} + +static SPECIAL_NAME DecodeOperandSpecialName(uint32_t ui32Token) +{ + return (SPECIAL_NAME)(ui32Token & 0x0000ffff); +} + +typedef enum OPERAND_INDEX_REPRESENTATION +{ + OPERAND_INDEX_IMMEDIATE32 = 0, // Extra DWORD + OPERAND_INDEX_IMMEDIATE64 = 1, // 2 Extra DWORDs + // (HI32:LO32) + OPERAND_INDEX_RELATIVE = 2, // Extra operand + OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE = 3, // Extra DWORD followed by + // extra operand + OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE = 4, // 2 Extra DWORDS + // (HI32:LO32) followed + // by extra operand +} OPERAND_INDEX_REPRESENTATION; + +static OPERAND_INDEX_REPRESENTATION DecodeOperandIndexRepresentation(uint32_t ui32Dimension, uint32_t ui32Token) +{ + return (OPERAND_INDEX_REPRESENTATION)((ui32Token & (0x3<<(22+3*((ui32Dimension)&3)))) >> (22+3*((ui32Dimension)&3))); +} + +typedef enum OPERAND_NUM_COMPONENTS +{ + OPERAND_0_COMPONENT = 0, + OPERAND_1_COMPONENT = 1, + OPERAND_4_COMPONENT = 2, + OPERAND_N_COMPONENT = 3 // unused for now +} OPERAND_NUM_COMPONENTS; + +static OPERAND_NUM_COMPONENTS DecodeOperandNumComponents(uint32_t ui32Token) +{ + return (OPERAND_NUM_COMPONENTS)(ui32Token & 0x00000003); +} + +typedef enum OPERAND_4_COMPONENT_SELECTION_MODE +{ + OPERAND_4_COMPONENT_MASK_MODE = 0, // mask 4 components + OPERAND_4_COMPONENT_SWIZZLE_MODE = 1, // swizzle 4 components + OPERAND_4_COMPONENT_SELECT_1_MODE = 2, // select 1 of 4 components +} OPERAND_4_COMPONENT_SELECTION_MODE; + +static OPERAND_4_COMPONENT_SELECTION_MODE DecodeOperand4CompSelMode(uint32_t ui32Token) +{ + return (OPERAND_4_COMPONENT_SELECTION_MODE)((ui32Token & 0x0000000c) >> 2); +} + +#define OPERAND_4_COMPONENT_MASK_X 0x00000001 +#define OPERAND_4_COMPONENT_MASK_Y 0x00000002 +#define OPERAND_4_COMPONENT_MASK_Z 0x00000004 +#define OPERAND_4_COMPONENT_MASK_W 0x00000008 +#define OPERAND_4_COMPONENT_MASK_R OPERAND_4_COMPONENT_MASK_X +#define OPERAND_4_COMPONENT_MASK_G OPERAND_4_COMPONENT_MASK_Y +#define OPERAND_4_COMPONENT_MASK_B OPERAND_4_COMPONENT_MASK_Z +#define OPERAND_4_COMPONENT_MASK_A OPERAND_4_COMPONENT_MASK_W +#define OPERAND_4_COMPONENT_MASK_ALL 0x0000000f + +static uint32_t DecodeOperand4CompMask(uint32_t ui32Token) +{ + return (uint32_t)((ui32Token & 0x000000f0) >> 4); +} + +static uint32_t DecodeOperand4CompSwizzle(uint32_t ui32Token) +{ + return (uint32_t)((ui32Token & 0x00000ff0) >> 4); +} + +static uint32_t DecodeOperand4CompSel1(uint32_t ui32Token) +{ + return (uint32_t)((ui32Token & 0x00000030) >> 4); +} + +#define OPERAND_4_COMPONENT_X 0 +#define OPERAND_4_COMPONENT_Y 1 +#define OPERAND_4_COMPONENT_Z 2 +#define OPERAND_4_COMPONENT_W 3 + +static const uint32_t NO_SWIZZLE = (( (OPERAND_4_COMPONENT_X) | (OPERAND_4_COMPONENT_Y<<2) | (OPERAND_4_COMPONENT_Z << 4) | (OPERAND_4_COMPONENT_W << 6))/*<<4*/); + +static const uint32_t XXXX_SWIZZLE = (((OPERAND_4_COMPONENT_X) | (OPERAND_4_COMPONENT_X << 2) | (OPERAND_4_COMPONENT_X << 4) | (OPERAND_4_COMPONENT_X << 6))); +static const uint32_t YYYY_SWIZZLE = (((OPERAND_4_COMPONENT_Y) | (OPERAND_4_COMPONENT_Y << 2) | (OPERAND_4_COMPONENT_Y << 4) | (OPERAND_4_COMPONENT_Y << 6))); +static const uint32_t ZZZZ_SWIZZLE = (((OPERAND_4_COMPONENT_Z) | (OPERAND_4_COMPONENT_Z << 2) | (OPERAND_4_COMPONENT_Z << 4) | (OPERAND_4_COMPONENT_Z << 6))); +static const uint32_t WWWW_SWIZZLE = (((OPERAND_4_COMPONENT_W) | (OPERAND_4_COMPONENT_W << 2) | (OPERAND_4_COMPONENT_W << 4) | (OPERAND_4_COMPONENT_W << 6))); + +static uint32_t DecodeOperand4CompSwizzleSource(uint32_t ui32Token, uint32_t comp) +{ + return (uint32_t)(((ui32Token)>>(4+2*((comp)&3)))&3); +} + +typedef enum RESOURCE_DIMENSION +{ + RESOURCE_DIMENSION_UNKNOWN = 0, + RESOURCE_DIMENSION_BUFFER = 1, + RESOURCE_DIMENSION_TEXTURE1D = 2, + RESOURCE_DIMENSION_TEXTURE2D = 3, + RESOURCE_DIMENSION_TEXTURE2DMS = 4, + RESOURCE_DIMENSION_TEXTURE3D = 5, + RESOURCE_DIMENSION_TEXTURECUBE = 6, + RESOURCE_DIMENSION_TEXTURE1DARRAY = 7, + RESOURCE_DIMENSION_TEXTURE2DARRAY = 8, + RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 9, + RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10, + RESOURCE_DIMENSION_RAW_BUFFER = 11, + RESOURCE_DIMENSION_STRUCTURED_BUFFER = 12, +} RESOURCE_DIMENSION; + +static RESOURCE_DIMENSION DecodeResourceDimension(uint32_t ui32Token) +{ + return (RESOURCE_DIMENSION)((ui32Token & 0x0000f800) >> 11); +} + +static RESOURCE_DIMENSION DecodeExtendedResourceDimension(uint32_t ui32Token) +{ + return (RESOURCE_DIMENSION)((ui32Token & 0x000007C0) >> 6); +} + +typedef enum INSTRUCTION_TEST_BOOLEAN +{ + INSTRUCTION_TEST_ZERO = 0, + INSTRUCTION_TEST_NONZERO = 1 +} INSTRUCTION_TEST_BOOLEAN; + +static INSTRUCTION_TEST_BOOLEAN DecodeInstrTestBool(uint32_t ui32Token) +{ + return (INSTRUCTION_TEST_BOOLEAN)((ui32Token & 0x00040000) >> 18); +} + +static uint32_t DecodeIsOperandExtended(uint32_t ui32Token) +{ + return (ui32Token & 0x80000000) >> 31; +} + +typedef enum EXTENDED_OPERAND_TYPE +{ + EXTENDED_OPERAND_EMPTY = 0, + EXTENDED_OPERAND_MODIFIER = 1, +} EXTENDED_OPERAND_TYPE; + +static EXTENDED_OPERAND_TYPE DecodeExtendedOperandType(uint32_t ui32Token) +{ + return (EXTENDED_OPERAND_TYPE)(ui32Token & 0x0000003f); +} + +typedef enum OPERAND_MODIFIER +{ + OPERAND_MODIFIER_NONE = 0, + OPERAND_MODIFIER_NEG = 1, + OPERAND_MODIFIER_ABS = 2, + OPERAND_MODIFIER_ABSNEG = 3, +} OPERAND_MODIFIER; + +static OPERAND_MODIFIER DecodeExtendedOperandModifier(uint32_t ui32Token) +{ + return (OPERAND_MODIFIER)((ui32Token & 0x00003fc0) >> 6); +} + +static const uint32_t GLOBAL_FLAG_REFACTORING_ALLOWED = (1<<11); +static const uint32_t GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS = (1<<12); +static const uint32_t GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL = (1<<13); +static const uint32_t GLOBAL_FLAG_ENABLE_RAW_AND_STRUCTURED_BUFFERS = (1<<14); +static const uint32_t GLOBAL_FLAG_SKIP_OPTIMIZATION = (1<<15); +static const uint32_t GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION = (1<<16); +static const uint32_t GLOBAL_FLAG_ENABLE_DOUBLE_EXTENSIONS = (1<<17); +static const uint32_t GLOBAL_FLAG_ENABLE_SHADER_EXTENSIONS = (1<<18); + +static uint32_t DecodeGlobalFlags(uint32_t ui32Token) +{ + return (uint32_t)(ui32Token & 0x00fff800); +} + +static INTERPOLATION_MODE DecodeInterpolationMode(uint32_t ui32Token) +{ + return (INTERPOLATION_MODE)((ui32Token & 0x00007800) >> 11); +} + + +typedef enum PRIMITIVE_TOPOLOGY +{ + PRIMITIVE_TOPOLOGY_UNDEFINED = 0, + PRIMITIVE_TOPOLOGY_POINTLIST = 1, + PRIMITIVE_TOPOLOGY_LINELIST = 2, + PRIMITIVE_TOPOLOGY_LINESTRIP = 3, + PRIMITIVE_TOPOLOGY_TRIANGLELIST = 4, + PRIMITIVE_TOPOLOGY_TRIANGLESTRIP = 5, + // 6 is reserved for legacy triangle fans + // Adjacency values should be equal to (0x8 & non-adjacency): + PRIMITIVE_TOPOLOGY_LINELIST_ADJ = 10, + PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ = 11, + PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ = 12, + PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ = 13, +} PRIMITIVE_TOPOLOGY; + +static PRIMITIVE_TOPOLOGY DecodeGSOutputPrimitiveTopology(uint32_t ui32Token) +{ + return (PRIMITIVE_TOPOLOGY)((ui32Token & 0x0001f800) >> 11); +} + +typedef enum PRIMITIVE +{ + PRIMITIVE_UNDEFINED = 0, + PRIMITIVE_POINT = 1, + PRIMITIVE_LINE = 2, + PRIMITIVE_TRIANGLE = 3, + // Adjacency values should be equal to (0x4 & non-adjacency): + PRIMITIVE_LINE_ADJ = 6, + PRIMITIVE_TRIANGLE_ADJ = 7, + PRIMITIVE_1_CONTROL_POINT_PATCH = 8, + PRIMITIVE_2_CONTROL_POINT_PATCH = 9, + PRIMITIVE_3_CONTROL_POINT_PATCH = 10, + PRIMITIVE_4_CONTROL_POINT_PATCH = 11, + PRIMITIVE_5_CONTROL_POINT_PATCH = 12, + PRIMITIVE_6_CONTROL_POINT_PATCH = 13, + PRIMITIVE_7_CONTROL_POINT_PATCH = 14, + PRIMITIVE_8_CONTROL_POINT_PATCH = 15, + PRIMITIVE_9_CONTROL_POINT_PATCH = 16, + PRIMITIVE_10_CONTROL_POINT_PATCH = 17, + PRIMITIVE_11_CONTROL_POINT_PATCH = 18, + PRIMITIVE_12_CONTROL_POINT_PATCH = 19, + PRIMITIVE_13_CONTROL_POINT_PATCH = 20, + PRIMITIVE_14_CONTROL_POINT_PATCH = 21, + PRIMITIVE_15_CONTROL_POINT_PATCH = 22, + PRIMITIVE_16_CONTROL_POINT_PATCH = 23, + PRIMITIVE_17_CONTROL_POINT_PATCH = 24, + PRIMITIVE_18_CONTROL_POINT_PATCH = 25, + PRIMITIVE_19_CONTROL_POINT_PATCH = 26, + PRIMITIVE_20_CONTROL_POINT_PATCH = 27, + PRIMITIVE_21_CONTROL_POINT_PATCH = 28, + PRIMITIVE_22_CONTROL_POINT_PATCH = 29, + PRIMITIVE_23_CONTROL_POINT_PATCH = 30, + PRIMITIVE_24_CONTROL_POINT_PATCH = 31, + PRIMITIVE_25_CONTROL_POINT_PATCH = 32, + PRIMITIVE_26_CONTROL_POINT_PATCH = 33, + PRIMITIVE_27_CONTROL_POINT_PATCH = 34, + PRIMITIVE_28_CONTROL_POINT_PATCH = 35, + PRIMITIVE_29_CONTROL_POINT_PATCH = 36, + PRIMITIVE_30_CONTROL_POINT_PATCH = 37, + PRIMITIVE_31_CONTROL_POINT_PATCH = 38, + PRIMITIVE_32_CONTROL_POINT_PATCH = 39, +} PRIMITIVE; + +static PRIMITIVE DecodeGSInputPrimitive(uint32_t ui32Token) +{ + return (PRIMITIVE)((ui32Token & 0x0001f800) >> 11); +} + +static TESSELLATOR_PARTITIONING DecodeTessPartitioning(uint32_t ui32Token) +{ + return (TESSELLATOR_PARTITIONING)((ui32Token & 0x00003800) >> 11); +} + +typedef enum TESSELLATOR_DOMAIN +{ + TESSELLATOR_DOMAIN_UNDEFINED = 0, + TESSELLATOR_DOMAIN_ISOLINE = 1, + TESSELLATOR_DOMAIN_TRI = 2, + TESSELLATOR_DOMAIN_QUAD = 3 +} TESSELLATOR_DOMAIN; + +static TESSELLATOR_DOMAIN DecodeTessDomain(uint32_t ui32Token) +{ + return (TESSELLATOR_DOMAIN)((ui32Token & 0x00001800) >> 11); +} + +static TESSELLATOR_OUTPUT_PRIMITIVE DecodeTessOutPrim(uint32_t ui32Token) +{ + return (TESSELLATOR_OUTPUT_PRIMITIVE)((ui32Token & 0x00003800) >> 11); +} + +static const uint32_t SYNC_THREADS_IN_GROUP = 0x00000800; +static const uint32_t SYNC_THREAD_GROUP_SHARED_MEMORY = 0x00001000; +static const uint32_t SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP = 0x00002000; +static const uint32_t SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL = 0x00004000; + +static uint32_t DecodeSyncFlags(uint32_t ui32Token) +{ + return ui32Token & 0x00007800; +} + +// The number of types that implement this interface +static uint32_t DecodeInterfaceTableLength(uint32_t ui32Token) +{ + return (uint32_t)((ui32Token & 0x0000ffff) >> 0); +} + +// The number of interfaces that are defined in this array. +static uint32_t DecodeInterfaceArrayLength(uint32_t ui32Token) +{ + return (uint32_t)((ui32Token & 0xffff0000) >> 16); +} + +typedef enum CUSTOMDATA_CLASS +{ + CUSTOMDATA_COMMENT = 0, + CUSTOMDATA_DEBUGINFO, + CUSTOMDATA_OPAQUE, + CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER, + CUSTOMDATA_SHADER_MESSAGE, +} CUSTOMDATA_CLASS; + +static CUSTOMDATA_CLASS DecodeCustomDataClass(uint32_t ui32Token) +{ + return (CUSTOMDATA_CLASS)((ui32Token & 0xfffff800) >> 11); +} + +static uint32_t DecodeInstructionSaturate(uint32_t ui32Token) +{ + return (ui32Token & 0x00002000) ? 1 : 0; +} + +typedef enum OPERAND_MIN_PRECISION +{ + OPERAND_MIN_PRECISION_DEFAULT = 0, // Default precision + // for the shader model + OPERAND_MIN_PRECISION_FLOAT_16 = 1, // Min 16 bit/component float + OPERAND_MIN_PRECISION_FLOAT_2_8 = 2, // Min 10(2.8)bit/comp. float + OPERAND_MIN_PRECISION_SINT_16 = 4, // Min 16 bit/comp. signed integer + OPERAND_MIN_PRECISION_UINT_16 = 5, // Min 16 bit/comp. unsigned integer +} OPERAND_MIN_PRECISION; + +static uint32_t DecodeOperandMinPrecision(uint32_t ui32Token) +{ + return (ui32Token & 0x0001C000) >> 14; +} + +static uint32_t DecodeOutputControlPointCount(uint32_t ui32Token) +{ + return ((ui32Token & 0x0001f800) >> 11); +} + +typedef enum IMMEDIATE_ADDRESS_OFFSET_COORD +{ + IMMEDIATE_ADDRESS_OFFSET_U = 0, + IMMEDIATE_ADDRESS_OFFSET_V = 1, + IMMEDIATE_ADDRESS_OFFSET_W = 2, +} IMMEDIATE_ADDRESS_OFFSET_COORD; + + +#define IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord) (9+4*((Coord)&3)) +#define IMMEDIATE_ADDRESS_OFFSET_MASK(Coord) (0x0000000f<>(IMMEDIATE_ADDRESS_OFFSET_SHIFT(eCoord)))); +} + +// UAV access scope flags +static const uint32_t GLOBALLY_COHERENT_ACCESS = 0x00010000; +static uint32_t DecodeAccessCoherencyFlags(uint32_t ui32Token) +{ + return ui32Token & 0x00010000; +} + + +typedef enum RESINFO_RETURN_TYPE +{ + RESINFO_INSTRUCTION_RETURN_FLOAT = 0, + RESINFO_INSTRUCTION_RETURN_RCPFLOAT = 1, + RESINFO_INSTRUCTION_RETURN_UINT = 2 +} RESINFO_RETURN_TYPE; + +static RESINFO_RETURN_TYPE DecodeResInfoReturnType(uint32_t ui32Token) +{ + return (RESINFO_RETURN_TYPE)((ui32Token & 0x00001800) >> 11); +} + +#endif diff --git a/src/reflect.cpp b/src/reflect.cpp new file mode 100644 index 0000000..58208b4 --- /dev/null +++ b/src/reflect.cpp @@ -0,0 +1,600 @@ + +#include "internal_includes/reflect.h" +#include "internal_includes/debug.h" +#include "internal_includes/decode.h" +#include "bstrlib.h" +#include +#include +#include + +static void FormatVariableName(std::string & Name) +{ + /* MSDN http://msdn.microsoft.com/en-us/library/windows/desktop/bb944006(v=vs.85).aspx + The uniform function parameters appear in the + constant table prepended with a dollar sign ($), + unlike the global variables. The dollar sign is + required to avoid name collisions between local + uniform inputs and global variables of the same name.*/ + + /* Leave $ThisPointer, $Element and $Globals as-is. + Otherwise remove $ character ($ is not a valid character for GLSL variable names). */ + if(Name[0] == '$') + { + if(strcmp(Name.c_str(), "$Element") !=0 && + strcmp(Name.c_str(), "$Globals") != 0 && + strcmp(Name.c_str(), "$ThisPointer") != 0) + { + Name[0] = '_'; + } + } +} + +static std::string ReadStringFromTokenStream(const uint32_t* tokens) +{ + char* charTokens = (char*) tokens; + return std::string(charTokens); +} + +static int MaskToRebaseOffset(const uint32_t mask) +{ + int res = 0; + uint32_t m = mask; + while ((m & 1) == 0) + { + res++; + m = m >> 1; + } + return res; +} + +static void ReadInputSignatures(const uint32_t* pui32Tokens, + ShaderInfo* psShaderInfo, + const int extended) +{ + uint32_t i; + + const uint32_t* pui32FirstSignatureToken = pui32Tokens; + const uint32_t ui32ElementCount = *pui32Tokens++; + /* const uint32_t ui32Key = * */ pui32Tokens++; + + psShaderInfo->psInputSignatures.clear(); + psShaderInfo->psInputSignatures.resize(ui32ElementCount); + + for(i=0; ipsInputSignatures[i]; + uint32_t ui32SemanticNameOffset; + + psCurrentSignature->ui32Stream = 0; + psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT; + + if(extended) + psCurrentSignature->ui32Stream = *pui32Tokens++; + + ui32SemanticNameOffset = *pui32Tokens++; + psCurrentSignature->ui32SemanticIndex = *pui32Tokens++; + psCurrentSignature->eSystemValueType = (SPECIAL_NAME) *pui32Tokens++; + psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE) *pui32Tokens++; + psCurrentSignature->ui32Register = *pui32Tokens++; + + ui32ComponentMasks = *pui32Tokens++; + psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F; + //Shows which components are read + psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8; + psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask); + + if(extended) + psCurrentSignature->eMinPrec = (MIN_PRECISION) *pui32Tokens++; + + psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken+ui32SemanticNameOffset)); + } +} + +static void ReadOutputSignatures(const uint32_t* pui32Tokens, + ShaderInfo* psShaderInfo, + const int minPrec, + const int streams) +{ + uint32_t i; + + const uint32_t* pui32FirstSignatureToken = pui32Tokens; + const uint32_t ui32ElementCount = *pui32Tokens++; + /*const uint32_t ui32Key = * */ pui32Tokens++; + + psShaderInfo->psOutputSignatures.clear(); + psShaderInfo->psOutputSignatures.resize(ui32ElementCount); + + for(i=0; ipsOutputSignatures[i]; + uint32_t ui32SemanticNameOffset; + + psCurrentSignature->ui32Stream = 0; + psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT; + + if(streams) + psCurrentSignature->ui32Stream = *pui32Tokens++; + + ui32SemanticNameOffset = *pui32Tokens++; + psCurrentSignature->ui32SemanticIndex = *pui32Tokens++; + psCurrentSignature->eSystemValueType = (SPECIAL_NAME)*pui32Tokens++; + psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE) *pui32Tokens++; + psCurrentSignature->ui32Register = *pui32Tokens++; + + // Massage some special inputs/outputs to match the types of GLSL counterparts + if (psCurrentSignature->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX) + { + psCurrentSignature->eComponentType = INOUT_COMPONENT_SINT32; + } + + ui32ComponentMasks = *pui32Tokens++; + psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F; + //Shows which components are NEVER written. + psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8; + psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask); + + if(minPrec) + psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++; + + psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset)); + } +} + +static void ReadPatchConstantSignatures(const uint32_t* pui32Tokens, + ShaderInfo* psShaderInfo, + const int minPrec, + const int streams) +{ + uint32_t i; + + const uint32_t* pui32FirstSignatureToken = pui32Tokens; + const uint32_t ui32ElementCount = *pui32Tokens++; + /*const uint32_t ui32Key = * */ pui32Tokens++; + + psShaderInfo->psPatchConstantSignatures.clear(); + psShaderInfo->psPatchConstantSignatures.resize(ui32ElementCount); + + for(i=0; ipsPatchConstantSignatures[i]; + uint32_t ui32SemanticNameOffset; + + psCurrentSignature->ui32Stream = 0; + psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT; + + if(streams) + psCurrentSignature->ui32Stream = *pui32Tokens++; + + ui32SemanticNameOffset = *pui32Tokens++; + psCurrentSignature->ui32SemanticIndex = *pui32Tokens++; + psCurrentSignature->eSystemValueType = (SPECIAL_NAME)*pui32Tokens++; + psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE) *pui32Tokens++; + psCurrentSignature->ui32Register = *pui32Tokens++; + + // Massage some special inputs/outputs to match the types of GLSL counterparts + if (psCurrentSignature->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX) + { + psCurrentSignature->eComponentType = INOUT_COMPONENT_SINT32; + } + + ui32ComponentMasks = *pui32Tokens++; + psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F; + //Shows which components are NEVER written. + psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8; + psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask); + + if(minPrec) + psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++; + + psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset)); + } +} + +static const uint32_t* ReadResourceBinding(const uint32_t* pui32FirstResourceToken, const uint32_t* pui32Tokens, ResourceBinding* psBinding, uint32_t decodeFlags) +{ + uint32_t ui32NameOffset = *pui32Tokens++; + + psBinding->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstResourceToken+ui32NameOffset)); + FormatVariableName(psBinding->name); + + psBinding->eType = (ResourceType)*pui32Tokens++; + psBinding->ui32ReturnType = (RESOURCE_RETURN_TYPE)*pui32Tokens++; + psBinding->eDimension = (REFLECT_RESOURCE_DIMENSION)*pui32Tokens++; + psBinding->ui32NumSamples = *pui32Tokens++; + psBinding->ui32BindPoint = *pui32Tokens++; + psBinding->ui32BindCount = *pui32Tokens++; + psBinding->ui32Flags = *pui32Tokens++; + psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_UNKNOWN; + + if (decodeFlags & HLSLCC_FLAG_SAMPLER_PRECISION_ENCODED_IN_NAME) + { + if (psBinding->name.rfind("_highp") == psBinding->name.length() - 6) + { + psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_HIGHP; + psBinding->name.resize(psBinding->name.length() - 6); + } + else if (psBinding->name.rfind("_mediump") == psBinding->name.length() - 8) + { + psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_MEDIUMP; + psBinding->name.resize(psBinding->name.length() - 8); + } + else if (psBinding->name.rfind("_lowp") == psBinding->name.length() - 5) + { + psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_LOWP; + psBinding->name.resize(psBinding->name.length() - 5); + } + } + + return pui32Tokens; +} + +//Read D3D11_SHADER_TYPE_DESC +static void ReadShaderVariableType(const uint32_t ui32MajorVersion, + const uint32_t* pui32FirstConstBufToken, + const uint32_t* pui32tokens, ShaderVarType* varType) +{ + const uint16_t* pui16Tokens = (const uint16_t*) pui32tokens; + uint16_t ui32MemberCount; + uint32_t ui32MemberOffset; + const uint32_t* pui32MemberTokens; + uint32_t i; + + varType->Class = (SHADER_VARIABLE_CLASS)pui16Tokens[0]; + varType->Type = (SHADER_VARIABLE_TYPE)pui16Tokens[1]; + varType->Rows = pui16Tokens[2]; + varType->Columns = pui16Tokens[3]; + varType->Elements = pui16Tokens[4]; + + varType->MemberCount = ui32MemberCount = pui16Tokens[5]; + varType->Members.clear(); + + if(varType->ParentCount) + { + // Add empty brackets for array parents. Indices are filled in later in the printing codes. + if (varType->Parent->Elements > 1) + varType->fullName = varType->Parent->fullName + "[]." + varType->name; + else + varType->fullName = varType->Parent->fullName + "." + varType->name; + } + + if(ui32MemberCount) + { + varType->Members.resize(ui32MemberCount); + + ui32MemberOffset = pui32tokens[3]; + + pui32MemberTokens = (const uint32_t*)((const char*)pui32FirstConstBufToken+ui32MemberOffset); + + for(i=0; i< ui32MemberCount; ++i) + { + uint32_t ui32NameOffset = *pui32MemberTokens++; + uint32_t ui32MemberTypeOffset = *pui32MemberTokens++; + + varType->Members[i].Parent = varType; + varType->Members[i].ParentCount = varType->ParentCount + 1; + + varType->Members[i].Offset = *pui32MemberTokens++; + + varType->Members[i].name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset)); + + ReadShaderVariableType(ui32MajorVersion, pui32FirstConstBufToken, + (const uint32_t*)((const char*)pui32FirstConstBufToken+ui32MemberTypeOffset), &varType->Members[i]); + } + } +} + +static const uint32_t* ReadConstantBuffer(ShaderInfo* psShaderInfo, + const uint32_t* pui32FirstConstBufToken, const uint32_t* pui32Tokens, ConstantBuffer* psBuffer) +{ + uint32_t i; + uint32_t ui32NameOffset = *pui32Tokens++; + uint32_t ui32VarCount = *pui32Tokens++; + uint32_t ui32VarOffset = *pui32Tokens++; + const uint32_t* pui32VarToken = (const uint32_t*)((const char*)pui32FirstConstBufToken+ui32VarOffset); + + psBuffer->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset)); + FormatVariableName(psBuffer->name); + + psBuffer->asVars.clear(); + psBuffer->asVars.resize(ui32VarCount); + + for(i=0; iasVars[i]; + + uint32_t ui32Flags; + uint32_t ui32TypeOffset; + uint32_t ui32DefaultValueOffset; + + ui32NameOffset = *pui32VarToken++; + + psVar->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset)); + FormatVariableName(psVar->name); + + psVar->ui32StartOffset = *pui32VarToken++; + psVar->ui32Size = *pui32VarToken++; + ui32Flags = *pui32VarToken++; + ui32TypeOffset = *pui32VarToken++; + + psVar->sType.name = psVar->name; + psVar->sType.fullName = psVar->name; + psVar->sType.Parent = 0; + psVar->sType.ParentCount = 0; + psVar->sType.Offset = 0; + psVar->sType.m_IsUsed = false; + + ReadShaderVariableType(psShaderInfo->ui32MajorVersion, pui32FirstConstBufToken, + (const uint32_t*)((const char*)pui32FirstConstBufToken+ui32TypeOffset), &psVar->sType); + + ui32DefaultValueOffset = *pui32VarToken++; + + + if (psShaderInfo->ui32MajorVersion >= 5) + { + /*uint32_t StartTexture = * */pui32VarToken++; + /*uint32_t TextureSize = * */pui32VarToken++; + /*uint32_t StartSampler = * */pui32VarToken++; + /*uint32_t SamplerSize = * */pui32VarToken++; + } + + psVar->haveDefaultValue = 0; + + if(ui32DefaultValueOffset) + { + uint32_t i = 0; + const uint32_t ui32NumDefaultValues = psVar->ui32Size / 4; + const uint32_t* pui32DefaultValToken = (const uint32_t*)((const char*)pui32FirstConstBufToken+ui32DefaultValueOffset); + + //Always a sequence of 4-bytes at the moment. + //bool const becomes 0 or 0xFFFFFFFF int, int & float are 4-bytes. + ASSERT(psVar->ui32Size%4 == 0); + + psVar->haveDefaultValue = 1; + + psVar->pui32DefaultValues.clear(); + psVar->pui32DefaultValues.resize(psVar->ui32Size / 4); + + for(i=0; ipui32DefaultValues[i] = pui32DefaultValToken[i]; + } + } + } + + + { + uint32_t ui32Flags; + uint32_t ui32BufferType; + + psBuffer->ui32TotalSizeInBytes = *pui32Tokens++; + ui32Flags = *pui32Tokens++; + ui32BufferType = *pui32Tokens++; + } + + return pui32Tokens; +} + +static void ReadResources(const uint32_t* pui32Tokens,//in + ShaderInfo* psShaderInfo, //out + uint32_t decodeFlags) +{ + ResourceBinding* psResBindings; + ConstantBuffer* psConstantBuffers; + const uint32_t* pui32ConstantBuffers; + const uint32_t* pui32ResourceBindings; + const uint32_t* pui32FirstToken = pui32Tokens; + uint32_t i; + + const uint32_t ui32NumConstantBuffers = *pui32Tokens++; + const uint32_t ui32ConstantBufferOffset = *pui32Tokens++; + + uint32_t ui32NumResourceBindings = *pui32Tokens++; + uint32_t ui32ResourceBindingOffset = *pui32Tokens++; + /*uint32_t ui32ShaderModel = * */ pui32Tokens++; + /*uint32_t ui32CompileFlags = * */ pui32Tokens++;//D3DCompile flags? http://msdn.microsoft.com/en-us/library/gg615083(v=vs.85).aspx + + //Resources + pui32ResourceBindings = (const uint32_t*)((const char*)pui32FirstToken + ui32ResourceBindingOffset); + + psShaderInfo->psResourceBindings.clear(); + psShaderInfo->psResourceBindings.resize(ui32NumResourceBindings); + psResBindings = &psShaderInfo->psResourceBindings[0]; + + for(i=0; i < ui32NumResourceBindings; ++i) + { + pui32ResourceBindings = ReadResourceBinding(pui32FirstToken, pui32ResourceBindings, psResBindings+i, decodeFlags); + ASSERT(psResBindings[i].ui32BindPoint < MAX_RESOURCE_BINDINGS); + } + + //Constant buffers + pui32ConstantBuffers = (const uint32_t*)((const char*)pui32FirstToken + ui32ConstantBufferOffset); + + psShaderInfo->psConstantBuffers.clear(); + psShaderInfo->psConstantBuffers.resize(ui32NumConstantBuffers); + psConstantBuffers = &psShaderInfo->psConstantBuffers[0]; + + for(i=0; i < ui32NumConstantBuffers; ++i) + { + pui32ConstantBuffers = ReadConstantBuffer(psShaderInfo, pui32FirstToken, pui32ConstantBuffers, psConstantBuffers+i); + } + + + //Map resource bindings to constant buffers + if(psShaderInfo->psConstantBuffers.size()) + { + for(i=0; i < ui32NumResourceBindings; ++i) + { + ResourceGroup eRGroup; + uint32_t cbufIndex = 0; + + eRGroup = ShaderInfo::ResourceTypeToResourceGroup(psResBindings[i].eType); + + //Find the constant buffer whose name matches the resource at the given resource binding point + for(cbufIndex=0; cbufIndex < psShaderInfo->psConstantBuffers.size(); cbufIndex++) + { + if(psConstantBuffers[cbufIndex].name == psResBindings[i].name) + { + psShaderInfo->aui32ResourceMap[eRGroup][psResBindings[i].ui32BindPoint] = cbufIndex; + } + } + } + } +} + +static const uint16_t* ReadClassType(const uint32_t* pui32FirstInterfaceToken, const uint16_t* pui16Tokens, ClassType* psClassType) +{ + const uint32_t* pui32Tokens = (const uint32_t*)pui16Tokens; + uint32_t ui32NameOffset = *pui32Tokens; + pui16Tokens+= 2; + + psClassType->ui16ID = *pui16Tokens++; + psClassType->ui16ConstBufStride = *pui16Tokens++; + psClassType->ui16Texture = *pui16Tokens++; + psClassType->ui16Sampler = *pui16Tokens++; + + psClassType->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32NameOffset)); + + return pui16Tokens; +} + +static const uint16_t* ReadClassInstance(const uint32_t* pui32FirstInterfaceToken, const uint16_t* pui16Tokens, ClassInstance* psClassInstance) +{ + uint32_t ui32NameOffset = *pui16Tokens++ << 16; + ui32NameOffset |= *pui16Tokens++; + + psClassInstance->ui16ID = *pui16Tokens++; + psClassInstance->ui16ConstBuf = *pui16Tokens++; + psClassInstance->ui16ConstBufOffset = *pui16Tokens++; + psClassInstance->ui16Texture = *pui16Tokens++; + psClassInstance->ui16Sampler = *pui16Tokens++; + + psClassInstance->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32NameOffset)); + + return pui16Tokens; +} + + +static void ReadInterfaces(const uint32_t* pui32Tokens, + ShaderInfo* psShaderInfo) +{ + uint32_t i; + uint32_t ui32StartSlot; + const uint32_t* pui32FirstInterfaceToken = pui32Tokens; + const uint32_t ui32ClassInstanceCount = *pui32Tokens++; + const uint32_t ui32ClassTypeCount = *pui32Tokens++; + const uint32_t ui32InterfaceSlotRecordCount = *pui32Tokens++; + /*const uint32_t ui32InterfaceSlotCount = * */ pui32Tokens++; + const uint32_t ui32ClassInstanceOffset = *pui32Tokens++; + const uint32_t ui32ClassTypeOffset = *pui32Tokens++; + const uint32_t ui32InterfaceSlotOffset = *pui32Tokens++; + + const uint16_t* pui16ClassTypes = (const uint16_t*)((const char*)pui32FirstInterfaceToken + ui32ClassTypeOffset); + const uint16_t* pui16ClassInstances = (const uint16_t*)((const char*)pui32FirstInterfaceToken + ui32ClassInstanceOffset); + const uint32_t* pui32InterfaceSlots = (const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32InterfaceSlotOffset); + + const uint32_t* pui32InterfaceSlotTokens = pui32InterfaceSlots; + + ClassType* psClassTypes; + ClassInstance* psClassInstances; + + psShaderInfo->psClassTypes.clear(); + psShaderInfo->psClassTypes.resize(ui32ClassTypeCount); + psClassTypes = &psShaderInfo->psClassTypes[0]; + + for(i=0; ipsClassInstances.clear(); + psShaderInfo->psClassInstances.resize(ui32ClassInstanceCount); + psClassInstances = &psShaderInfo->psClassInstances[0]; + + for(i=0; iaui32TableIDToTypeID[*pui32TableID++] = *pui16TypeID++; + } + + ui32StartSlot += ui32SlotSpan; + } + +} + +void LoadShaderInfo(const uint32_t ui32MajorVersion, + const uint32_t ui32MinorVersion, + const ReflectionChunks* psChunks, + ShaderInfo* psInfo, + uint32_t decodeFlags) +{ + const uint32_t* pui32Inputs = psChunks->pui32Inputs; + const uint32_t* pui32Inputs11 = psChunks->pui32Inputs11; + const uint32_t* pui32Resources = psChunks->pui32Resources; + const uint32_t* pui32Interfaces = psChunks->pui32Interfaces; + const uint32_t* pui32Outputs = psChunks->pui32Outputs; + const uint32_t* pui32Outputs11 = psChunks->pui32Outputs11; + const uint32_t* pui32OutputsWithStreams = psChunks->pui32OutputsWithStreams; + const uint32_t* pui32PatchConstants = psChunks->pui32PatchConstants; + const uint32_t* pui32PatchConstants11 = psChunks->pui32PatchConstants11; + + psInfo->eTessOutPrim = TESSELLATOR_OUTPUT_UNDEFINED; + psInfo->eTessPartitioning = TESSELLATOR_PARTITIONING_UNDEFINED; + + psInfo->ui32MajorVersion = ui32MajorVersion; + psInfo->ui32MinorVersion = ui32MinorVersion; + + + if(pui32Inputs) + ReadInputSignatures(pui32Inputs, psInfo, 0); + if(pui32Inputs11) + ReadInputSignatures(pui32Inputs11, psInfo, 1); + if(pui32Resources) + ReadResources(pui32Resources, psInfo, decodeFlags); + if(pui32Interfaces) + ReadInterfaces(pui32Interfaces, psInfo); + if(pui32Outputs) + ReadOutputSignatures(pui32Outputs, psInfo, 0, 0); + if(pui32Outputs11) + ReadOutputSignatures(pui32Outputs11, psInfo, 1, 1); + if(pui32OutputsWithStreams) + ReadOutputSignatures(pui32OutputsWithStreams, psInfo, 0, 1); + if(pui32PatchConstants) + ReadPatchConstantSignatures(pui32PatchConstants, psInfo, 0, 0); + if (pui32PatchConstants11) + ReadPatchConstantSignatures(pui32PatchConstants11, psInfo, 1, 1); + + { + uint32_t i; + for(i=0; ipsConstantBuffers.size();++i) + { + if (psInfo->psConstantBuffers[i].name == "$ThisPointer") + { + psInfo->psThisPointerConstBuffer = &psInfo->psConstantBuffers[i]; + } + } + } +} + diff --git a/src/toGLSL.cpp b/src/toGLSL.cpp new file mode 100644 index 0000000..1e76523 --- /dev/null +++ b/src/toGLSL.cpp @@ -0,0 +1,806 @@ +#include + +#include "internal_includes/tokens.h" +#include "internal_includes/decode.h" +#include "stdlib.h" +#include "stdio.h" +#include "bstrlib.h" +#include "internal_includes/toGLSL.h" +#include "internal_includes/toGLSLOperand.h" +#include "internal_includes/Declaration.h" +#include "internal_includes/languages.h" +#include "internal_includes/debug.h" +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/UseDefineChains.h" +#include "internal_includes/DataTypeAnalysis.h" +#include "internal_includes/Shader.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "internal_includes/Instruction.h" +#include "internal_includes/LoopTransform.h" +#include +#include + +// In GLSL, the input and output names cannot clash. +// Also, the output name of previous stage must match the input name of the next stage. +// So, do gymnastics depending on which shader we're running on and which other shaders exist in this program. +// +void ToGLSL::SetIOPrefixes() +{ + switch (psContext->psShader->eShaderType) + { + case VERTEX_SHADER: + psContext->inputPrefix = "in_"; + psContext->outputPrefix = "vs_"; + break; + + case HULL_SHADER: + // Input always coming from vertex shader + psContext->inputPrefix = "vs_"; + psContext->outputPrefix = "hs_"; + break; + + case DOMAIN_SHADER: + // There's no domain shader without hull shader + psContext->inputPrefix = "hs_"; + psContext->outputPrefix = "ds_"; + break; + + case GEOMETRY_SHADER: + // The input depends on whether there's a tessellation shader before us + if (psContext->psDependencies && (psContext->psDependencies->ui32ProgramStages & PS_FLAG_DOMAIN_SHADER)) + psContext->inputPrefix = "ds_"; + else + psContext->inputPrefix = "vs_"; + + psContext->outputPrefix = "gs_"; + break; + + case PIXEL_SHADER: + // The inputs can come from geom shader, domain shader or directly from vertex shader + if (psContext->psDependencies) + { + if (psContext->psDependencies->ui32ProgramStages & PS_FLAG_GEOMETRY_SHADER) + { + psContext->inputPrefix = "gs_"; + } + else if (psContext->psDependencies->ui32ProgramStages & PS_FLAG_DOMAIN_SHADER) + { + psContext->inputPrefix = "ds_"; + } + else + { + psContext->inputPrefix = "vs_"; + } + } + else + { + psContext->inputPrefix = "vs_"; + } + psContext->outputPrefix = ""; + break; + + + case COMPUTE_SHADER: + default: + // No prefixes + psContext->inputPrefix = ""; + psContext->outputPrefix = ""; + break; + } +} + + +static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext) +{ + bstring glsl = *psContext->currentGLSLString; + bstring extensions = psContext->extensions; + bool isES = (psContext->psShader->eTargetLanguage >= LANG_ES_100 && psContext->psShader->eTargetLanguage <= LANG_ES_310); + bool GL_ARB_shader_image_load_store = false; + + if(psContext->psShader->ui32MajorVersion > 3 && psContext->psShader->eTargetLanguage != LANG_ES_300 && psContext->psShader->eTargetLanguage != LANG_ES_310 && !(psContext->psShader->eTargetLanguage >= LANG_330)) + { + bcatcstr(extensions,"#extension GL_ARB_shader_bit_encoding : enable\n"); + } + + if(!HaveCompute(psContext->psShader->eTargetLanguage)) + { + if(psContext->psShader->eShaderType == COMPUTE_SHADER) + { + bcatcstr(extensions,"#extension GL_ARB_compute_shader : enable\n"); + } + + if (psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED] || + psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW] || + psContext->psShader->aiOpcodeUsed[OPCODE_DCL_RESOURCE_STRUCTURED] || + psContext->psShader->aiOpcodeUsed[OPCODE_DCL_RESOURCE_RAW]) + { + bcatcstr(extensions, "#extension GL_ARB_shader_storage_buffer_object : enable\n"); + } + } + + if (!HaveAtomicMem(psContext->psShader->eTargetLanguage) || + !HaveAtomicCounter(psContext->psShader->eTargetLanguage)) + { + if( psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_ALLOC] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_CONSUME] || + psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED]) + { + bcatcstr(extensions,"#extension GL_ARB_shader_atomic_counters : enable\n"); + } + } + + if (!HaveImageAtomics(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_CMP_STORE] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_AND] || + psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_AND] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IADD] || + psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_IADD] || + psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_OR] || + psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_XOR] || + psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_IMIN] || + psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_UMIN] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IMAX] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IMIN] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_UMAX] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_UMIN] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_OR] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_XOR] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_EXCH] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_CMP_EXCH]) + { + if (isES) + bcatcstr(extensions, "#extension GL_OES_shader_image_atomic : enable\n"); + else + GL_ARB_shader_image_load_store = true; + } + } + + if(!HaveGather(psContext->psShader->eTargetLanguage)) + { + if(psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4] || + psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO_C] || + psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO] || + psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_C]) + { + bcatcstr(extensions,"#extension GL_ARB_texture_gather : enable\n"); + } + } + + if(!HaveGatherNonConstOffset(psContext->psShader->eTargetLanguage)) + { + if(psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO_C] || + psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO]) + { + bcatcstr(extensions,"#extension GL_ARB_gpu_shader5 : enable\n"); + } + } + + if(!HaveQueryLod(psContext->psShader->eTargetLanguage)) + { + if(psContext->psShader->aiOpcodeUsed[OPCODE_LOD]) + { + bcatcstr(extensions,"#extension GL_ARB_texture_query_lod : enable\n"); + } + } + + if(!HaveQueryLevels(psContext->psShader->eTargetLanguage)) + { + if(psContext->psShader->aiOpcodeUsed[OPCODE_RESINFO]) + { + bcatcstr(extensions,"#extension GL_ARB_texture_query_levels : enable\n"); + } + } + + if(!HaveImageLoadStore(psContext->psShader->eTargetLanguage)) + { + if(psContext->psShader->aiOpcodeUsed[OPCODE_STORE_UAV_TYPED] || + psContext->psShader->aiOpcodeUsed[OPCODE_STORE_RAW] || + psContext->psShader->aiOpcodeUsed[OPCODE_STORE_STRUCTURED]) + { + GL_ARB_shader_image_load_store = true; + bcatcstr(extensions,"#extension GL_ARB_shader_bit_encoding : enable\n"); + } + else + if(psContext->psShader->aiOpcodeUsed[OPCODE_LD_UAV_TYPED] || + psContext->psShader->aiOpcodeUsed[OPCODE_LD_RAW] || + psContext->psShader->aiOpcodeUsed[OPCODE_LD_STRUCTURED]) + { + GL_ARB_shader_image_load_store = true; + } + } + + if(!HaveGeometryShaderARB(psContext->psShader->eTargetLanguage)) + { + if(psContext->psShader->eShaderType == GEOMETRY_SHADER) + { + bcatcstr(extensions,"#extension GL_ARB_geometry_shader : enable\n"); + } + } + + if(psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310) + { + if(psContext->psShader->eShaderType == GEOMETRY_SHADER) + { + bcatcstr(extensions,"#extension GL_OES_geometry_shader : enable\n"); + bcatcstr(extensions,"#extension GL_EXT_geometry_shader : enable\n"); + } + } + + if(psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310) + { + if(psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER) + { + bcatcstr(extensions,"#extension GL_OES_tessellation_shader : enable\n"); + bcatcstr(extensions,"#extension GL_EXT_tessellation_shader : enable\n"); + } + } + + if (GL_ARB_shader_image_load_store) + bcatcstr(extensions, "#extension GL_ARB_shader_image_load_store : enable\n"); + + //Handle fragment shader default precision + if ((psContext->psShader->eShaderType == PIXEL_SHADER) && + (psContext->psShader->eTargetLanguage == LANG_ES_100 || psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310)) + { + // Float default precision is patched during runtime in GlslGpuProgramGLES.cpp:PatchupFragmentShaderText() + // Except on Vulkan + if(psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) + bcatcstr(glsl, "precision highp float;\n"); + + + // Define default int precision to highp to avoid issues on platforms that actually implement mediump + bcatcstr(glsl, "precision highp int;\n"); + } + + if(psContext->psShader->eShaderType == PIXEL_SHADER && psContext->psShader->eTargetLanguage >= LANG_120 && !HaveFragmentCoordConventions(psContext->psShader->eTargetLanguage)) + { + bcatcstr(extensions,"#extension GL_ARB_fragment_coord_conventions : require\n"); + } + + if(psContext->psShader->eShaderType == PIXEL_SHADER && psContext->psShader->eTargetLanguage >= LANG_150) + { + if(psContext->flags & HLSLCC_FLAG_ORIGIN_UPPER_LEFT) + bcatcstr(glsl,"layout(origin_upper_left) in vec4 gl_FragCoord;\n"); + + if(psContext->flags & HLSLCC_FLAG_PIXEL_CENTER_INTEGER) + bcatcstr(glsl,"layout(pixel_center_integer) in vec4 gl_FragCoord;\n"); + } + + + /* + OpenGL 4.1 API spec: + To use any built-in input or output in the gl_PerVertex block in separable + program objects, shader code must redeclare that block prior to use. + */ + /* DISABLED FOR NOW */ +/* if(psContext->psShader->eShaderType == VERTEX_SHADER && psContext->psShader->eTargetLanguage >= LANG_410) + { + bcatcstr(glsl, "out gl_PerVertex {\n"); + bcatcstr(glsl, "vec4 gl_Position;\n"); + bcatcstr(glsl, "float gl_PointSize;\n"); + bcatcstr(glsl, "float gl_ClipDistance[];"); + bcatcstr(glsl, "};\n"); + }*/ +} + +GLLang ChooseLanguage(Shader* psShader) +{ + // Depends on the HLSL shader model extracted from bytecode. + switch(psShader->ui32MajorVersion) + { + case 5: + { + return LANG_430; + } + case 4: + { + return LANG_330; + } + default: + { + return LANG_120; + } + } +} + +const char* GetVersionString(GLLang language) +{ + switch(language) + { + case LANG_ES_100: + { + return "#version 100\n"; + break; + } + case LANG_ES_300: + { + return "#version 300 es\n"; + break; + } + case LANG_ES_310: + { + return "#version 310 es\n"; + break; + } + case LANG_120: + { + return "#version 120\n"; + break; + } + case LANG_130: + { + return "#version 130\n"; + break; + } + case LANG_140: + { + return "#version 140\n"; + break; + } + case LANG_150: + { + return "#version 150\n"; + break; + } + case LANG_330: + { + return "#version 330\n"; + break; + } + case LANG_400: + { + return "#version 400\n"; + break; + } + case LANG_410: + { + return "#version 410\n"; + break; + } + case LANG_420: + { + return "#version 420\n"; + break; + } + case LANG_430: + { + return "#version 430\n"; + break; + } + case LANG_440: + { + return "#version 440\n"; + break; + } + default: + { + return ""; + break; + } + } +} + +static const char * GetPhaseFuncName(SHADER_PHASE_TYPE eType) +{ + switch (eType) + { + default: + case MAIN_PHASE: return ""; + case HS_GLOBAL_DECL_PHASE: return "hs_global_decls"; + case HS_FORK_PHASE: return "fork_phase"; + case HS_CTRL_POINT_PHASE: return "control_point_phase"; + case HS_JOIN_PHASE: return "join_phase"; + } +} + +static void DoHullShaderPassthrough(HLSLCrossCompilerContext *psContext) +{ + uint32_t i; + bstring glsl = psContext->glsl; + + for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++) + { + ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; + const char *Type; + uint32_t ui32NumComponents = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); + switch (psSig->eComponentType) + { + default: + case INOUT_COMPONENT_FLOAT32: + Type = ui32NumComponents > 1 ? "vec" : "float"; + break; + case INOUT_COMPONENT_SINT32: + Type = ui32NumComponents > 1 ? "ivec" : "int"; + break; + case INOUT_COMPONENT_UINT32: + Type = ui32NumComponents > 1 ? "uvec" : "uint"; + break; + } + if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0) + continue; + + std::string inputName; + + { + std::ostringstream oss; + oss << psContext->inputPrefix << psSig->semanticName << psSig->ui32SemanticIndex; + inputName = oss.str(); + } + + std::string outputName; + { + std::ostringstream oss; + oss << psContext->outputPrefix << psSig->semanticName << psSig->ui32SemanticIndex; + outputName = oss.str(); + } + + const char * prec = HavePrecisionQualifers(psContext->psShader->eTargetLanguage) ? "highp ": ""; + + psContext->AddIndentation(); + if (ui32NumComponents > 1) // TODO Precision + bformata(glsl, "in %s%s%d %s%s%d[];\n", prec, Type, ui32NumComponents, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); + else + bformata(glsl, "in %s%s %s%s%d[];\n", prec, Type, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); + + psContext->AddIndentation(); + if (ui32NumComponents > 1) // TODO Precision + bformata(glsl, "out %s%s%d %s%s%d[];\n", prec, Type, ui32NumComponents, psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); + else + bformata(glsl, "out %s%s %s%s%d[];\n", prec, Type, psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); + } + + psContext->AddIndentation(); + bcatcstr(glsl, "void passthrough_ctrl_points()\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "{\n"); + psContext->indent++; + + for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++) + { + const ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; + + psContext->AddIndentation(); + + if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0) + bformata(glsl, "gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"); + else + bformata(glsl, "%s%s%d[gl_InvocationID] = %s%s%d[gl_InvocationID];\n", psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); + } + + psContext->indent--; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); +} + +GLLang ToGLSL::SetLanguage(GLLang suggestedLanguage) +{ + language = suggestedLanguage; + if (language == LANG_DEFAULT) + { + language = ChooseLanguage(psContext->psShader); + } + return language; +} + +bool ToGLSL::Translate() +{ + bstring glsl; + uint32_t i; + Shader* psShader = psContext->psShader; + uint32_t ui32Phase; + + psContext->psTranslator = this; + + if (language == LANG_DEFAULT) + SetLanguage(LANG_DEFAULT); + + SetIOPrefixes(); + psShader->ExpandSWAPCs(); + psShader->ForcePositionToHighp(); + psShader->AnalyzeIOOverlap(); + psShader->FindUnusedGlobals(psContext->flags); + + psContext->indent = 0; + + glsl = bfromcstralloc (1024 * 10, "\n"); + bstring extensions = bfromcstralloc (1024 * 10, GetVersionString(language)); + psContext->extensions = extensions; + + psContext->glsl = glsl; + for(i=0; iasPhases.size();++i) + { + psShader->asPhases[i].postShaderCode = bfromcstralloc (1024 * 5, ""); + psShader->asPhases[i].earlyMain = bfromcstralloc(1024 * 5, ""); + } + psContext->currentGLSLString = &glsl; + psShader->eTargetLanguage = language; + psContext->currentPhase = MAIN_PHASE; + + if (psShader->extensions) + { + if (psShader->extensions->ARB_explicit_attrib_location) + bcatcstr(extensions, "#extension GL_ARB_explicit_attrib_location : require\n"); + if (psShader->extensions->ARB_explicit_uniform_location) + bcatcstr(extensions, "#extension GL_ARB_explicit_uniform_location : require\n"); + if (psShader->extensions->ARB_shading_language_420pack) + bcatcstr(extensions, "#extension GL_ARB_shading_language_420pack : require\n"); + } + + psContext->ClearDependencyData(); + + AddVersionDependentCode(psContext); + + psShader->PrepareStructuredBufferBindingSlots(); + + for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + ShaderPhase &phase = psShader->asPhases[ui32Phase]; + phase.UnvectorizeImmMoves(); + psContext->DoDataTypeAnalysis(&phase); + phase.ResolveUAVProperties(); + psShader->ResolveStructuredBufferBindingSlots(&phase); + phase.PruneConstArrays(); + } + + psShader->PruneTempRegisters(); + + for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + // Loop transform can only be done after the temps have been pruned + ShaderPhase &phase = psShader->asPhases[ui32Phase]; + HLSLcc::DoLoopTransform(phase); + } + + //Special case. Can have multiple phases. + if(psShader->eShaderType == HULL_SHADER) + { + const SHADER_PHASE_TYPE ePhaseFuncCallOrder[3] = { HS_CTRL_POINT_PHASE, HS_FORK_PHASE, HS_JOIN_PHASE }; + uint32_t ui32PhaseCallIndex; + int perPatchSectionAdded = 0; + int hasControlPointPhase = 0; + + psShader->ConsolidateHullTempVars(); + + // Find out if we have a passthrough hull shader + for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) + hasControlPointPhase = 1; + } + + // Phase 1 is always the global decls phase, no instructions + for(i=0; i < psShader->asPhases[1].psDecl.size(); ++i) + { + TranslateDeclaration(&psShader->asPhases[1].psDecl[i]); + } + + if (hasControlPointPhase == 0) + { + DoHullShaderPassthrough(psContext); + } + + for(ui32Phase=2; ui32PhaseasPhases.size(); ui32Phase++) + { + ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; + psContext->currentPhase = ui32Phase; + +#ifdef _DEBUG + bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase)); +#endif + for (i = 0; i < psPhase->psDecl.size(); ++i) + { + TranslateDeclaration(&psPhase->psDecl[i]); + } + + bformata(glsl, "void %s%d(int phaseInstanceID)\n{\n", GetPhaseFuncName(psPhase->ePhase), ui32Phase); + psContext->indent++; + + if (psPhase->psInst.size() > 0) + { + //The minus one here is remove the return statement at end of phases. + //We don't want to translate that, we'll just end the function body. + ASSERT(psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_RET); + for (i = 0; i < psPhase->psInst.size() - 1; ++i) + { + TranslateInstruction(&psPhase->psInst[i]); + } + } + + + psContext->indent--; + bcatcstr(glsl, "}\n"); + } + + bcatcstr(glsl, "void main()\n{\n"); + + psContext->indent++; + + // There are cases when there are no control point phases and we have to do passthrough + if (hasControlPointPhase == 0) + { + // Passthrough control point phase, run the rest only once per patch + psContext->AddIndentation(); + bcatcstr(glsl, "passthrough_ctrl_points();\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "barrier();\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "if (gl_InvocationID == 0)\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "{\n"); + psContext->indent++; + perPatchSectionAdded = 1; + } + + for(ui32PhaseCallIndex=0; ui32PhaseCallIndex<3; ui32PhaseCallIndex++) + { + for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + uint32_t i; + ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; + if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex]) + continue; + + if (psPhase->earlyMain->slen > 1) + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Start Early Main ---\n"); +#endif + bconcat(glsl, psPhase->earlyMain); +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End Early Main ---\n"); +#endif + } + + for (i = 0; i < psPhase->ui32InstanceCount; i++) + { + + psContext->AddIndentation(); + bformata(glsl, "%s%d(%d);\n", GetPhaseFuncName(psShader->asPhases[ui32Phase].ePhase), ui32Phase, i); + } + + if (psPhase->hasPostShaderCode) + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Post shader code ---\n"); +#endif + bconcat(glsl, psPhase->postShaderCode); +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End post shader code ---\n"); +#endif + } + + + if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) + { + // We're done printing control point phase, run the rest only once per patch + psContext->AddIndentation(); + bcatcstr(glsl, "barrier();\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "if (gl_InvocationID == 0)\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "{\n"); + psContext->indent++; + perPatchSectionAdded = 1; + } + } + } + + if (perPatchSectionAdded != 0) + { + psContext->indent--; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + } + + psContext->indent--; + + bcatcstr(glsl, "}\n"); + + // Concat extensions and glsl for the final shader code. + bconcat(extensions, glsl); + bdestroy(glsl); + psContext->glsl = extensions; + glsl = NULL; + + if(psContext->psDependencies) + { + //Save partitioning and primitive type for use by domain shader. + psContext->psDependencies->eTessOutPrim = psShader->sInfo.eTessOutPrim; + + psContext->psDependencies->eTessPartitioning = psShader->sInfo.eTessPartitioning; + } + + return true; + } + + if(psShader->eShaderType == DOMAIN_SHADER && psContext->psDependencies) + { + //Load partitioning and primitive type from hull shader. + switch(psContext->psDependencies->eTessOutPrim) + { + case TESSELLATOR_OUTPUT_TRIANGLE_CCW: + { + bcatcstr(glsl, "layout(ccw) in;\n"); + break; + } + case TESSELLATOR_OUTPUT_TRIANGLE_CW: + { + bcatcstr(glsl, "layout(cw) in;\n"); + break; + } + case TESSELLATOR_OUTPUT_POINT: + { + bcatcstr(glsl, "layout(point_mode) in;\n"); + break; + } + default: + { + break; + } + } + + switch(psContext->psDependencies->eTessPartitioning) + { + case TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: + { + bcatcstr(glsl, "layout(fractional_odd_spacing) in;\n"); + break; + } + case TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: + { + bcatcstr(glsl, "layout(fractional_even_spacing) in;\n"); + break; + } + default: + { + break; + } + } + } + + for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i) + { + TranslateDeclaration(&psShader->asPhases[0].psDecl[i]); + } + + bcatcstr(glsl, "void main()\n{\n"); + + psContext->indent++; + + if (psContext->psShader->asPhases[0].earlyMain->slen > 1) + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Start Early Main ---\n"); +#endif + bconcat(glsl, psContext->psShader->asPhases[0].earlyMain); +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End Early Main ---\n"); +#endif + } + + for(i=0; i < psShader->asPhases[0].psInst.size(); ++i) + { + TranslateInstruction(&psShader->asPhases[0].psInst[i]); + } + + psContext->indent--; + + bcatcstr(glsl, "}\n"); + + // Concat extensions and glsl for the final shader code. + bconcat(extensions, glsl); + bdestroy(glsl); + psContext->glsl = extensions; + glsl = NULL; + + return true; +} + + diff --git a/src/toGLSLDeclaration.cpp b/src/toGLSLDeclaration.cpp new file mode 100644 index 0000000..cdfa10e --- /dev/null +++ b/src/toGLSLDeclaration.cpp @@ -0,0 +1,2994 @@ +#include "hlslcc.h" +#include "internal_includes/Declaration.h" +#include "internal_includes/toGLSLOperand.h" +#include "internal_includes/toGLSL.h" +#include "internal_includes/languages.h" +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/Shader.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "bstrlib.h" +#include "internal_includes/debug.h" +#include +#include +#include +#include +#include "internal_includes/toGLSL.h" + +using namespace HLSLcc; + +#ifdef _MSC_VER +#define fpcheck(x) (_isnan(x) || !_finite(x)) +#else +#include +#define fpcheck(x) ((std::isnan(x)) || (std::isinf(x))) +#endif + +static void DeclareConstBufferShaderVariable(const HLSLCrossCompilerContext *psContext, const char* Name, const struct ShaderVarType* psType, int unsizedArray, bool addUniformPrefix = false) + //const SHADER_VARIABLE_CLASS eClass, const SHADER_VARIABLE_TYPE eType, + //const char* pszName) +{ + bstring glsl = *psContext->currentGLSLString; + + if (psType->Class == SVC_STRUCT) + { + bformata(glsl, "\t%s%s_Type %s", addUniformPrefix ? "UNITY_UNIFORM " : "", Name, Name); + if (psType->Elements > 1) + { + bformata(glsl, "[%d]", psType->Elements); + } + } + else if(psType->Class == SVC_MATRIX_COLUMNS || psType->Class == SVC_MATRIX_ROWS) + { + if (psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) + { + // Translate matrices into vec4 arrays + bformata(glsl, "\t%s%s " HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "%s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, 4), psType->Rows, psType->Columns, Name); + uint32_t elemCount = (psType->Class == SVC_MATRIX_COLUMNS ? psType->Columns : psType->Rows); + if (psType->Elements > 1) + { + elemCount *= psType->Elements; + } + bformata(glsl, "[%d]", elemCount); + } + else + { + bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetMatrixTypeName(psContext, psType->Type, psType->Columns, psType->Rows).c_str(), Name); + if (psType->Elements > 1) + { + bformata(glsl, "[%d]", psType->Elements); + } + } + } + else + if (psType->Class == SVC_VECTOR && psType->Columns > 1) + { + bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, psType->Columns), Name); + + if(psType->Elements > 1) + { + bformata(glsl, "[%d]", psType->Elements); + } + } + else + if ((psType->Class == SVC_SCALAR) || + (psType->Class == SVC_VECTOR && psType->Columns == 1)) + { + if (psType->Type == SVT_BOOL) + { + //Use int instead of bool. + //Allows implicit conversions to integer and + //bool consumes 4-bytes in HLSL and GLSL anyway. + ((ShaderVarType *)psType)->Type = SVT_INT; + } + + bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, 1), Name); + + if(psType->Elements > 1) + { + bformata(glsl, "[%d]", psType->Elements); + } + } + if(unsizedArray) + bformata(glsl, "[]"); + bformata(glsl, ";\n"); +} + +//In GLSL embedded structure definitions are not supported. +static void PreDeclareStructType(const HLSLCrossCompilerContext *psContext, const std::string &name, const struct ShaderVarType* psType) +{ + bstring glsl = *psContext->currentGLSLString; + uint32_t i; + + for(i=0; iMemberCount; ++i) + { + if(psType->Members[i].Class == SVC_STRUCT) + { + PreDeclareStructType(psContext, psType->Members[i].name, &psType->Members[i]); + } + } + + if(psType->Class == SVC_STRUCT) + { + //Not supported at the moment + ASSERT(name != "$Element"); + + bformata(glsl, "struct %s_Type {\n", name.c_str()); + + for(i=0; iMemberCount; ++i) + { + ASSERT(psType->Members.size() != 0); + + DeclareConstBufferShaderVariable(psContext, psType->Members[i].name.c_str(), &psType->Members[i], 0); + } + + bformata(glsl, "};\n"); + } +} + + +static const char* GetInterpolationString(INTERPOLATION_MODE eMode, GLLang lang) +{ + switch(eMode) + { + case INTERPOLATION_CONSTANT: + { + return "flat "; + } + case INTERPOLATION_LINEAR: + { + return ""; + } + case INTERPOLATION_LINEAR_CENTROID: + { + return "centroid "; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE: + { + return lang <= LANG_ES_310 ? "" : "noperspective "; + break; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: + { + return lang <= LANG_ES_310 ? "centroid " : "noperspective centroid "; + } + case INTERPOLATION_LINEAR_SAMPLE: + { + return "sample "; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: + { + return lang <= LANG_ES_310 ? "" : "noperspective sample "; + } + default: + { + return ""; + } + } +} + +static void DeclareInput( + HLSLCrossCompilerContext* psContext, + const Declaration* psDecl, + const char* Interpolation, const char* StorageQualifier, const char* Precision, int iNumComponents, OPERAND_INDEX_DIMENSION eIndexDim, const char* InputName, const uint32_t ui32CompMask) +{ + Shader* psShader = psContext->psShader; + bstring glsl = *psContext->currentGLSLString; + int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; + const ShaderInfo::InOutSignature *psSig = NULL; + + // This falls within the specified index ranges. The default is 0 if no input range is specified + + if (regSpace == 0) + psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); + + ASSERT(psSig != NULL); + + // No need to declare input pos 0 on HS control point phases, it's always position + // Also no point in declaring the builtins + if (psShader->eShaderType == HULL_SHADER && psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE) + { + if (regSpace == 0) + { + if (psSig->semanticName == "POS" && psSig->ui32SemanticIndex == 0) + return; + } + } + + if((ui32CompMask & ~psShader->acInputDeclared[regSpace][ui32Reg]) != 0) + { + const char* vecType = "vec"; + const char* scalarType = "float"; + + switch(psSig->eComponentType) + { + case INOUT_COMPONENT_UINT32: + { + vecType = "uvec"; + scalarType = "uint"; + break; + } + case INOUT_COMPONENT_SINT32: + { + vecType = "ivec"; + scalarType = "int"; + break; + } + case INOUT_COMPONENT_FLOAT32: + { + break; + } + default: + { + ASSERT(0); + break; + } + } + + if(psContext->psDependencies) + { + if(psShader->eShaderType == PIXEL_SHADER) + { + psContext->psDependencies->SetInterpolationMode(ui32Reg, psDecl->value.eInterpolation); + } + } + + std::string locationQualifier = ""; + + if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage)) + { + bool addLocation = false; + + // Add locations to vertex shader inputs unless disabled in flags + if (psShader->eShaderType == VERTEX_SHADER && !(psContext->flags & HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS)) + addLocation = true; + + // Add intra-shader locations if requested in flags + if (psShader->eShaderType != VERTEX_SHADER && (psContext->flags & HLSLCC_FLAG_SEPARABLE_SHADER_OBJECTS)) + addLocation = true; + + if (addLocation) + { + std::ostringstream oss; + oss << "layout(location = " << psContext->psDependencies->GetVaryingLocation(std::string(InputName), psShader->eShaderType, true) << ") "; + locationQualifier = oss.str(); + } + } + + psShader->acInputDeclared[regSpace][ui32Reg] = (char)psSig->ui32Mask; + + // Do the reflection report on vertex shader inputs + if (psShader->eShaderType == VERTEX_SHADER) + { + psContext->m_Reflection.OnInputBinding(std::string(InputName), psContext->psDependencies->GetVaryingLocation(std::string(InputName), VERTEX_SHADER, true)); + } + + switch (eIndexDim) + { + case INDEX_2D: + { + if(iNumComponents == 1) + { + const uint32_t regNum = psDecl->asOperands[0].ui32RegisterNumber; + const uint32_t arraySize = psDecl->asOperands[0].aui32ArraySizes[0]; + + psContext->psShader->abScalarInput[regSpace][regNum] |= (int)ui32CompMask; + + if(psShader->eShaderType == HULL_SHADER || psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT_CONTROL_POINT) + bformata(glsl, "%s%s %s %s %s [];\n", locationQualifier.c_str(), StorageQualifier, Precision, scalarType, InputName); + else + bformata(glsl, "%s%s %s %s %s [%d];\n", locationQualifier.c_str(), StorageQualifier, Precision, scalarType, InputName, arraySize); + } + else + { + if (psShader->eShaderType == HULL_SHADER || psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT_CONTROL_POINT) + bformata(glsl, "%s%s %s %s%d %s [];\n", locationQualifier.c_str(), StorageQualifier, Precision, vecType, iNumComponents, InputName); + else + bformata(glsl, "%s%s %s %s%d %s [%d];\n", locationQualifier.c_str(), StorageQualifier, Precision, vecType, iNumComponents, InputName, + psDecl->asOperands[0].aui32ArraySizes[0]); + } + break; + } + default: + { + if(iNumComponents == 1) + { + psContext->psShader->abScalarInput[regSpace][ui32Reg] |= (int)ui32CompMask; + + bformata(glsl, "%s%s%s %s %s %s;\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, scalarType, InputName); + } + else + { + if(psShader->aIndexedInput[regSpace][ui32Reg] > 0) + { + bformata(glsl, "%s%s%s %s %s%d %s", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); + if (psShader->eShaderType == HULL_SHADER) + bcatcstr(glsl, "[];\n"); + else + bcatcstr(glsl, ";\n"); + } + else + { + if (psShader->eShaderType == HULL_SHADER) + bformata(glsl, "%s%s%s %s %s%d %s[];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); + else + bformata(glsl, "%s%s%s %s %s%d %s;\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); + } + } + break; + } + } + } +} + +static void AddBuiltinInput(HLSLCrossCompilerContext* psContext, const Declaration* psDecl, const char* builtinName) +{ + // Nothing to do currently as we read from builtins directly. +} + + + +void ToGLSL::AddBuiltinOutput(const Declaration* psDecl, int arrayElements, const char* builtinName) +{ + bstring glsl = *psContext->currentGLSLString; + Shader* psShader = psContext->psShader; + + if (psDecl->asOperands[0].eSpecialName != NAME_CLIP_DISTANCE) + return; + + psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1; + + if(psContext->OutputNeedsDeclaring(&psDecl->asOperands[0], arrayElements ? arrayElements : 1)) + { + const ShaderInfo::InOutSignature* psSignature = NULL; + + psShader->sInfo.GetOutputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + 0, + &psSignature); + psContext->currentGLSLString = &psContext->psShader->asPhases[psContext->currentPhase].postShaderCode; + glsl = *psContext->currentGLSLString; + psContext->indent++; + if(arrayElements) + { + + } + else + { + // Case 828454 : For some reason DX compiler seems to inject clip distance declaration to the hull shader sometimes + // even though it's not used at all, and overlaps some completely unrelated patch constant declarations. We'll just ignore this now. + // Revisit this if this actually pops up elsewhere. + if(psDecl->asOperands[0].eSpecialName == NAME_CLIP_DISTANCE && psContext->psShader->eShaderType != HULL_SHADER) + { + int max = psDecl->asOperands[0].GetMaxComponent(); + + if (IsESLanguage(psShader->eTargetLanguage) && !psShader->m_ClipDistanceExtensionDeclared) + { + bcatcstr(psContext->extensions, "#extension GL_EXT_clip_cull_distance : require\n"); + psShader->m_ClipDistanceExtensionDeclared = true; + } + + int applySwizzle = psDecl->asOperands[0].GetNumSwizzleElements() > 1 ? 1 : 0; + int index; + int i; + int multiplier = 1; + const char* swizzle[] = {".x", ".y", ".z", ".w"}; + + ASSERT(psSignature!=NULL); + + index = psSignature->ui32SemanticIndex; + + //Clip distance can be spread across 1 or 2 outputs (each no more than a vec4). + //Some examples: + //float4 clip[2] : SV_ClipDistance; //8 clip distances + //float3 clip[2] : SV_ClipDistance; //6 clip distances + //float4 clip : SV_ClipDistance; //4 clip distances + //float clip : SV_ClipDistance; //1 clip distance. + + //In GLSL the clip distance built-in is an array of up to 8 floats. + //So vector to array conversion needs to be done here. + if(index == 1) + { + const ShaderInfo::InOutSignature* psFirstClipSignature; + if (psShader->sInfo.GetOutputSignatureFromSystemValue(NAME_CLIP_DISTANCE, 1, &psFirstClipSignature)) + { + if(psFirstClipSignature->ui32Mask & (1 << 3)) + { + multiplier = 4; + } + else + if(psFirstClipSignature->ui32Mask & (1 << 2)) + { + multiplier = 3; + } + else + if(psFirstClipSignature->ui32Mask & (1 << 1)) + { + multiplier = 2; + } + } + } + + // Add a specially crafted comment so runtime knows to enable clip planes. + // We may end up doing 2 of these, so at runtime OR the results + uint32_t clipmask = psDecl->asOperands[0].GetAccessMask(); + if(index != 0) + clipmask <<= multiplier; + bformata(psContext->glsl, "// HLSLcc_ClipDistances_%x\n", clipmask); + + psContext->psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psSignature->ui32Register] = 0xff; + bformata(psContext->glsl, "vec4 phase%d_glClipDistance%d;\n", psContext->currentPhase, index); + + for(i=0; iAddIndentation(); + bformata(glsl, "%s[%d] = (", builtinName, i + multiplier*index); + TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); + if(applySwizzle) + { + bformata(glsl, ")%s;\n", swizzle[i]); + } + else + { + bformata(glsl, ");\n"); + } + } + } + + } + psContext->indent--; + psContext->currentGLSLString = &psContext->glsl; + } +} + +void ToGLSL::HandleOutputRedirect(const Declaration *psDecl, const char *Precision) +{ + const Operand *psOperand = &psDecl->asOperands[0]; + Shader *psShader = psContext->psShader; + bstring glsl = *psContext->currentGLSLString; + int needsRedirect = 0; + const ShaderInfo::InOutSignature *psSig = NULL; + + int regSpace = psOperand->GetRegisterSpace(psContext); + if (regSpace == 0 && psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + { + needsRedirect = 1; + } + else if (regSpace == 1 && psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + { + needsRedirect = 1; + } + + if (needsRedirect == 1) + { + // TODO What if this is indexed? + ShaderPhase *psPhase = &psShader->asPhases[psContext->currentPhase]; + int comp = 0; + uint32_t origMask = psOperand->ui32CompMask; + + ASSERT(psContext->psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber] == 0); + + psContext->AddIndentation(); + bformata(glsl, "%s vec4 phase%d_Output%d_%d;\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + + psPhase->hasPostShaderCode = 1; + psContext->currentGLSLString = &psPhase->postShaderCode; + + while (comp < 4) + { + int numComps = 0; + int hasCast = 0; + uint32_t mask, i; + psSig = NULL; + if (regSpace == 0) + psContext->psShader->sInfo.GetOutputSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, psContext->psShader->ui32CurrentVertexOutputStream, &psSig, true); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); + + // The register isn't necessarily packed full. Continue with the next component. + if (psSig == NULL) + { + comp++; + continue; + } + + numComps = GetNumberBitsSet(psSig->ui32Mask); + mask = psSig->ui32Mask; + + ((Operand *)psOperand)->ui32CompMask = 1 << comp; + psContext->AddIndentation(); + TranslateOperand(psOperand, TO_FLAG_NAME_ONLY); + + bcatcstr(psPhase->postShaderCode, " = "); + + if (psSig->eComponentType == INOUT_COMPONENT_SINT32) + { + bformata(psPhase->postShaderCode, "floatBitsToInt("); + hasCast = 1; + } + else if (psSig->eComponentType == INOUT_COMPONENT_UINT32) + { + bformata(psPhase->postShaderCode, "floatBitsToUint("); + hasCast = 1; + } + bformata(psPhase->postShaderCode, "phase%d_Output%d_%d.", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + // Print out mask + for (i = 0; i < 4; i++) + { + if ((mask & (1 << i)) == 0) + continue; + + bformata(psPhase->postShaderCode, "%c", "xyzw"[i]); + } + + if (hasCast) + bcatcstr(psPhase->postShaderCode, ")"); + comp += numComps; + bcatcstr(psPhase->postShaderCode, ";\n"); + } + + psContext->currentGLSLString = &psContext->glsl; + + ((Operand *)psOperand)->ui32CompMask = origMask; + if (regSpace == 0) + psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + else + psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + } +} + +void ToGLSL::AddUserOutput(const Declaration* psDecl) +{ + bstring glsl = *psContext->currentGLSLString; + bstring extensions = psContext->extensions; + Shader* psShader = psContext->psShader; + + if(psContext->OutputNeedsDeclaring(&psDecl->asOperands[0], 1)) + { + const Operand* psOperand = &psDecl->asOperands[0]; + const char* Precision = ""; + int iNumComponents; + bstring type; + int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; + + const ShaderInfo::InOutSignature* psSignature = NULL; + + if (regSpace == 0) + psShader->sInfo.GetOutputSignatureFromRegister( + ui32Reg, + psDecl->asOperands[0].ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Reg, psDecl->asOperands[0].ui32CompMask, &psSignature); + + if (psSignature->semanticName == "POS" && psOperand->ui32RegisterNumber == 0 && psContext->psShader->eShaderType == VERTEX_SHADER) + return; + + iNumComponents = GetNumberBitsSet(psSignature->ui32Mask); + if (iNumComponents == 1) + psContext->psShader->abScalarOutput[regSpace][ui32Reg] |= (int)psDecl->asOperands[0].ui32CompMask; + + switch (psSignature->eComponentType) + { + case INOUT_COMPONENT_UINT32: + { + if (iNumComponents > 1) + type = bformat("uvec%d", iNumComponents); + else + type = bformat("uint"); + break; + } + case INOUT_COMPONENT_SINT32: + { + if (iNumComponents > 1) + type = bformat("ivec%d", iNumComponents); + else + type = bformat("int"); + break; + } + case INOUT_COMPONENT_FLOAT32: + { + if (iNumComponents > 1) + type = bformat("vec%d", iNumComponents); + else + type = bformat("float"); + break; + } + default: + ASSERT(0); + break; + } + + if(HavePrecisionQualifers(psShader->eTargetLanguage)) + { + switch(psOperand->eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + { + Precision = "highp "; + break; + } + case OPERAND_MIN_PRECISION_FLOAT_16: + { + Precision = "mediump "; + break; + } + case OPERAND_MIN_PRECISION_FLOAT_2_8: + { + Precision = "lowp "; + break; + } + case OPERAND_MIN_PRECISION_SINT_16: + { + Precision = "mediump "; + //type = "ivec"; + break; + } + case OPERAND_MIN_PRECISION_UINT_16: + { + Precision = "mediump "; + //type = "uvec"; + break; + } + } + } + + switch(psShader->eShaderType) + { + case PIXEL_SHADER: + { + switch(psDecl->asOperands[0].eType) + { + case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + case OPERAND_TYPE_OUTPUT_DEPTH: + { + + break; + } + case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: + { + bcatcstr(extensions, "#ifdef GL_ARB_conservative_depth\n"); + bcatcstr(extensions, "#extension GL_ARB_conservative_depth : enable\n"); + bcatcstr(extensions, "#endif\n"); + bcatcstr(glsl, "#ifdef GL_ARB_conservative_depth\n"); + bcatcstr(glsl, "layout (depth_greater) out float gl_FragDepth;\n"); + bcatcstr(glsl, "#endif\n"); + break; + } + case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: + { + bcatcstr(extensions, "#ifdef GL_ARB_conservative_depth\n"); + bcatcstr(extensions, "#extension GL_ARB_conservative_depth : enable\n"); + bcatcstr(extensions, "layout (depth_less) out float gl_FragDepth;\n"); + bcatcstr(glsl, "#endif\n"); + bcatcstr(glsl, "#ifdef GL_ARB_conservative_depth\n"); + bcatcstr(glsl, "layout (depth_less) out float gl_FragDepth;\n"); + bcatcstr(glsl, "#endif\n"); + break; + } + default: + { + if(WriteToFragData(psContext->psShader->eTargetLanguage)) + { + bformata(glsl, "#define Output%d gl_FragData[%d]\n", psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].ui32RegisterNumber); + } + else + { + char OutputName[512]; + bstring oname; + oname = bformat("%s%s%d", psContext->outputPrefix, psSignature->semanticName.c_str(), psSignature->ui32SemanticIndex); + strncpy(OutputName, (char *)oname->data, 512); + bdestroy(oname); + + if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage) || + HaveLimitedInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) + { + uint32_t index = 0; + uint32_t renderTarget = psDecl->asOperands[0].ui32RegisterNumber; + + if((psContext->flags & HLSLCC_FLAG_DUAL_SOURCE_BLENDING) && DualSourceBlendSupported(psContext->psShader->eTargetLanguage)) + { + if(renderTarget > 0) + { + renderTarget = 0; + index = 1; + } + bformata(glsl, "layout(location = %d, index = %d) ", renderTarget, index); + } + else + { + bformata(glsl, "layout(location = %d) ", renderTarget); + } + } + + bformata(glsl, "out %s%s %s;\n", Precision, type->data, OutputName); + } + break; + } + } + break; + } + case VERTEX_SHADER: + case GEOMETRY_SHADER: + case DOMAIN_SHADER: + case HULL_SHADER: + { + const char* Interpolation = ""; + char OutputName[512]; + bstring oname; + oname = bformat("%s%s%s%d", psContext->outputPrefix, regSpace == 0 ? "" : "patch", psSignature->semanticName.c_str(), psSignature->ui32SemanticIndex); + strncpy(OutputName, (char *)oname->data, 512); + bdestroy(oname); + + if (psShader->eShaderType == VERTEX_SHADER) + { + if (psSignature->eComponentType == INOUT_COMPONENT_UINT32 || + psSignature->eComponentType == INOUT_COMPONENT_SINT32) // GLSL spec requires that integer vertex outputs always have "flat" interpolation + { + Interpolation = GetInterpolationString(INTERPOLATION_CONSTANT, psContext->psShader->eTargetLanguage); + } + else if (psContext->psDependencies) // For floats we get the interpolation that was resolved from the fragment shader input + { + Interpolation = GetInterpolationString(psContext->psDependencies->GetInterpolationMode(psDecl->asOperands[0].ui32RegisterNumber), psContext->psShader->eTargetLanguage); + } + } + + if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage) && (psContext->flags & HLSLCC_FLAG_SEPARABLE_SHADER_OBJECTS)) + { + bformata(glsl, "layout(location = %d) ", psContext->psDependencies->GetVaryingLocation(std::string(OutputName), psShader->eShaderType, false)); + } + + if(InOutSupported(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->eShaderType == HULL_SHADER) + { + // In Hull shaders outputs are either per-vertex (and need []) or per-patch (need 'out patch') + if (regSpace == 0) + bformata(glsl, "%sout %s%s %s[];\n", Interpolation, Precision, type->data, OutputName); + else + bformata(glsl, "patch %sout %s%s %s;\n", Interpolation, Precision, type->data, OutputName); + } + else + bformata(glsl, "%sout %s%s %s;\n", Interpolation, Precision, type->data, OutputName); + } + else + { + bformata(glsl, "%svarying %s%s %s;\n", Interpolation, Precision, type->data, OutputName); + } + + break; + } + default: + ASSERT(0); + break; + + } + HandleOutputRedirect(psDecl, Precision); + bdestroy(type); + } + +} + +static void DeclareUBOConstants(HLSLCrossCompilerContext* psContext, const uint32_t ui32BindingPoint, + const ConstantBuffer* psCBuf, + bstring glsl) +{ + uint32_t i; + + bool skipUnused = false; + + if((psContext->flags & HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS) && psCBuf->name == "$Globals") + skipUnused = true; + + + std::string Name = psCBuf->name; + if(Name == "$Globals") + { + // Need to tweak Globals struct name to prevent clashes between shader stages + char prefix = 'A'; + switch (psContext->psShader->eShaderType) + { + default: + ASSERT(0); + break; + case COMPUTE_SHADER: + prefix = 'C'; + break; + case VERTEX_SHADER: + prefix = 'V'; + break; + case PIXEL_SHADER: + prefix = 'P'; + break; + case GEOMETRY_SHADER: + prefix = 'G'; + break; + case HULL_SHADER: + prefix = 'H'; + break; + case DOMAIN_SHADER: + prefix = 'D'; + break; + } + + Name[0] = prefix; + } + + for(i=0; i < psCBuf->asVars.size(); ++i) + { + if(skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) + continue; + + PreDeclareStructType(psContext, + psCBuf->asVars[i].name, + &psCBuf->asVars[i].sType); + } + + if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) + bformata(glsl, "#ifndef HLSLCC_DISABLE_UNIFORM_BUFFERS\n#define UNITY_UNIFORM\n"); + + /* [layout (location = X)] uniform vec4 HLSLConstantBufferName[numConsts]; */ + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + { + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(Name, false, 1); + bformata(glsl, "layout(set = %d, binding = %d, std140) ", binding.first, binding.second); + } + else + { + if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) + bformata(glsl, "layout(binding = %d, std140) ", ui32BindingPoint); + else + bcatcstr(glsl, "layout(std140) "); + } + + bformata(glsl, "uniform %s {\n", Name.c_str()); + + if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) + bformata(glsl, "#else\n#define UNITY_UNIFORM uniform\n#endif\n"); + + for(i=0; i < psCBuf->asVars.size(); ++i) + { + if(skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) + continue; + + DeclareConstBufferShaderVariable(psContext, + psCBuf->asVars[i].name.c_str(), + &psCBuf->asVars[i].sType, 0, psContext->flags & HLSLCC_FLAG_WRAP_UBO ? true : false); + } + + if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) + bformata(glsl, "#ifndef HLSLCC_DISABLE_UNIFORM_BUFFERS\n"); + bcatcstr(glsl, "};\n"); + if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) + bformata(glsl, "#endif\n#undef UNITY_UNIFORM\n"); +} + +static void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, uint32_t ui32BindingPoint, + const Operand* psOperand, const uint32_t ui32GloballyCoherentAccess, + const uint32_t isRaw, const uint32_t isUAV, const uint32_t stride, bstring glsl) +{ + const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; + bstring BufNamebstr = bfromcstr(""); + // Use original HLSL bindings for UAVs only. For non-UAV buffers we have resolved new binding points from the same register space. + if (!isUAV && !isVulkan) + ui32BindingPoint = psContext->psShader->aui32StructuredBufferBindingPoints[psContext->psShader->ui32CurrentStructuredBufferIndex++]; + + ResourceName(BufNamebstr, psContext, isUAV ? RGROUP_UAV : RGROUP_TEXTURE, psOperand->ui32RegisterNumber, 0); + + char *btmp = bstr2cstr(BufNamebstr, '\0'); + std::string BufName = btmp; + bcstrfree(btmp); + bdestroy(BufNamebstr); + + // Declare the struct type for structured buffers + if (!isRaw) + bformata(glsl, " struct %s_type {\n\tuint[%d] value;\n};\n\n", BufName.c_str(), stride / 4); + + if (isVulkan) + { + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(BufName); + bformata(glsl, "layout(set = %d, binding = %d, std430) ", binding.first, binding.second); + } + else + { + bformata(glsl, "layout(std430, binding = %d) ", ui32BindingPoint); + } + + if (ui32GloballyCoherentAccess & GLOBALLY_COHERENT_ACCESS) + bcatcstr(glsl, "coherent "); + + if (!isUAV) + bcatcstr(glsl, "readonly "); + + bformata(glsl, "buffer %s {\n\t", BufName.c_str()); + + if (isRaw) + bcatcstr(glsl, "uint"); + else + bformata(glsl, "%s_type", BufName.c_str()); + + bformata(glsl, " %s_buf[];\n};\n", BufName.c_str()); + +} + +void ToGLSL::DeclareStructConstants(const uint32_t ui32BindingPoint, + const ConstantBuffer* psCBuf, const Operand* psOperand, + bstring glsl) +{ + uint32_t i; + int useGlobalsStruct = 1; + bool skipUnused = false; + + if((psContext->flags & HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT) && psCBuf->name[0] == '$') + useGlobalsStruct = 0; + + if((psContext->flags & HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS) && psCBuf->name == "$Globals") + skipUnused = true; + + if ((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT) == 0) + useGlobalsStruct = 0; + + + + for(i=0; i < psCBuf->asVars.size(); ++i) + { + if(skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) + continue; + + PreDeclareStructType(psContext, + psCBuf->asVars[i].name, + &psCBuf->asVars[i].sType); + } + + /* [layout (location = X)] uniform vec4 HLSLConstantBufferName[numConsts]; */ + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + { + ASSERT(0); // Catch this to see what's going on + std::string bname = "wut"; + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(bname); + bformata(glsl, "layout(set = %d, binding = %d) ", binding.first, binding.second); + } + else + { + if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) + bformata(glsl, "layout(location = %d) ", ui32BindingPoint); + } + if(useGlobalsStruct) + { + bcatcstr(glsl, "uniform struct "); + TranslateOperand(psOperand, TO_FLAG_DECLARATION_NAME); + + bcatcstr(glsl, "_Type {\n"); + } + + for(i=0; i < psCBuf->asVars.size(); ++i) + { + if(skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) + continue; + + if(!useGlobalsStruct) + bcatcstr(glsl, "uniform "); + + DeclareConstBufferShaderVariable(psContext, + psCBuf->asVars[i].name.c_str(), + &psCBuf->asVars[i].sType, 0); + } + + if(useGlobalsStruct) + { + bcatcstr(glsl, "} "); + + TranslateOperand(psOperand, TO_FLAG_DECLARATION_NAME); + + bcatcstr(glsl, ";\n"); + } +} + +static const char* GetSamplerType(HLSLCrossCompilerContext* psContext, + const RESOURCE_DIMENSION eDimension, + const uint32_t ui32RegisterNumber) +{ + const ResourceBinding* psBinding = 0; + RESOURCE_RETURN_TYPE eType = RETURN_TYPE_UNORM; + int found; + found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32RegisterNumber, &psBinding); + if(found) + { + eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; + } + switch(eDimension) + { + case RESOURCE_DIMENSION_BUFFER: + { + switch(eType) + { + case RETURN_TYPE_SINT: + return "isamplerBuffer"; + case RETURN_TYPE_UINT: + return "usamplerBuffer"; + default: + return "samplerBuffer"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE1D: + { + switch(eType) + { + case RETURN_TYPE_SINT: + return "isampler1D"; + case RETURN_TYPE_UINT: + return "usampler1D"; + default: + return "sampler1D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2D: + { + switch(eType) + { + case RETURN_TYPE_SINT: + return "isampler2D"; + case RETURN_TYPE_UINT: + return "usampler2D"; + default: + return "sampler2D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMS: + { + switch(eType) + { + case RETURN_TYPE_SINT: + return "isampler2DMS"; + case RETURN_TYPE_UINT: + return "usampler2DMS"; + default: + return "sampler2DMS"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE3D: + { + switch(eType) + { + case RETURN_TYPE_SINT: + return "isampler3D"; + case RETURN_TYPE_UINT: + return "usampler3D"; + default: + return "sampler3D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURECUBE: + { + switch(eType) + { + case RETURN_TYPE_SINT: + return "isamplerCube"; + case RETURN_TYPE_UINT: + return "usamplerCube"; + default: + return "samplerCube"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + switch(eType) + { + case RETURN_TYPE_SINT: + return "isampler1DArray"; + case RETURN_TYPE_UINT: + return "usampler1DArray"; + default: + return "sampler1DArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + switch(eType) + { + case RETURN_TYPE_SINT: + return "isampler2DArray"; + case RETURN_TYPE_UINT: + return "usampler2DArray"; + default: + return "sampler2DArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + switch(eType) + { + case RETURN_TYPE_SINT: + return "isampler2DMSArray"; + case RETURN_TYPE_UINT: + return "usampler2DMSArray"; + default: + return "sampler2DMSArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + switch(eType) + { + case RETURN_TYPE_SINT: + return "isamplerCubeArray"; + case RETURN_TYPE_UINT: + return "usamplerCubeArray"; + default: + return "samplerCubeArray"; + } + break; + } + default: + ASSERT(0); + break; + + } + + return "sampler2D"; +} + +static const char *GetSamplerPrecision(GLLang langVersion, REFLECT_RESOURCE_PRECISION ePrec) +{ + if (!HavePrecisionQualifers(langVersion)) + return " "; + + switch (ePrec) + { + default: + case REFLECT_RESOURCE_PRECISION_UNKNOWN: + case REFLECT_RESOURCE_PRECISION_LOWP: + return "lowp "; + case REFLECT_RESOURCE_PRECISION_HIGHP: + return "highp "; + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + return "mediump "; + } +} + +static void TranslateResourceTexture(HLSLCrossCompilerContext* psContext, const Declaration* psDecl, uint32_t samplerCanDoShadowCmp) +{ + bstring glsl = *psContext->currentGLSLString; + Shader* psShader = psContext->psShader; + const char *samplerPrecision = NULL; + std::set::iterator i; + + const char* samplerTypeName = GetSamplerType(psContext, + psDecl->value.eResourceDimension, + psDecl->asOperands[0].ui32RegisterNumber); + + if (psDecl->value.eResourceDimension == RESOURCE_DIMENSION_TEXTURECUBEARRAY + && !HaveCubemapArray(psContext->psShader->eTargetLanguage) + && !psContext->psShader->m_CubemapArrayExtensionDeclared) + { + // Need to enable extension (either OES or ARB), but we only need to add it once + if (IsESLanguage(psContext->psShader->eTargetLanguage)) + bformata(psContext->extensions, "#extension GL_OES_texture_cube_map_array : enable\n"); + else + bformata(psContext->extensions, "#extension GL_ARB_texture_cube_map_array : enable\n"); + + psContext->psShader->m_CubemapArrayExtensionDeclared = true; + } + + const ResourceBinding *psBinding = NULL; + psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); + ASSERT(psBinding != NULL); + + samplerPrecision = GetSamplerPrecision(psContext->psShader->eTargetLanguage, psBinding ? psBinding->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN); + + if (psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) + { + if(samplerCanDoShadowCmp && psDecl->ui32IsShadowTex) + { + for (i = psDecl->samplersUsed.begin(); i != psDecl->samplersUsed.end(); i++) + { + std::string tname = TextureSamplerName(&psShader->sInfo, psDecl->asOperands[0].ui32RegisterNumber, *i, 1); + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + { + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(tname); + bformata(glsl, "layout(set = %d, binding = %d) ", binding.first, binding.second); + } + bcatcstr(glsl, "uniform "); + bcatcstr(glsl, samplerPrecision); + bcatcstr(glsl, samplerTypeName); + bcatcstr(glsl, "Shadow "); + bcatcstr(glsl, tname.c_str()); + bcatcstr(glsl, ";\n"); + } + } + for (i = psDecl->samplersUsed.begin(); i != psDecl->samplersUsed.end(); i++) + { + std::string tname = TextureSamplerName(&psShader->sInfo, psDecl->asOperands[0].ui32RegisterNumber, *i, 0); + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + { + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(tname); + bformata(glsl, "layout(set = %d, binding = %d) ", binding.first, binding.second); + } + bcatcstr(glsl, "uniform "); + bcatcstr(glsl, samplerPrecision); + bcatcstr(glsl, samplerTypeName); + bcatcstr(glsl, " "); + bcatcstr(glsl, tname.c_str()); + bcatcstr(glsl, ";\n"); + } + } + + if(samplerCanDoShadowCmp && psDecl->ui32IsShadowTex) + { + //Create shadow and non-shadow sampler. + //HLSL does not have separate types for depth compare, just different functions. + std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 1); + + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + { + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(tname); + bformata(glsl, "layout(set = %d, binding = %d) ", binding.first, binding.second); + } + + bcatcstr(glsl, "uniform "); + bcatcstr(glsl, samplerPrecision); + bcatcstr(glsl, samplerTypeName); + bcatcstr(glsl, "Shadow "); + bcatcstr(glsl, tname.c_str()); + bcatcstr(glsl, ";\n"); + } + + std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 0); + + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + { + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(tname); + bformata(glsl, "layout(set = %d, binding = %d) ", binding.first, binding.second); + } + bcatcstr(glsl, "uniform "); + bcatcstr(glsl, samplerPrecision); + bcatcstr(glsl, samplerTypeName); + bcatcstr(glsl, " "); + bcatcstr(glsl, tname.c_str()); + bcatcstr(glsl, ";\n"); +} + +void ToGLSL::HandleInputRedirect(const Declaration *psDecl, const char *Precision) +{ + Operand *psOperand = (Operand *)&psDecl->asOperands[0]; + Shader *psShader = psContext->psShader; + bstring glsl = *psContext->currentGLSLString; + int needsRedirect = 0; + const ShaderInfo::InOutSignature *psSig = NULL; + + int regSpace = psOperand->GetRegisterSpace(psContext); + if (regSpace == 0) + { + if (psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + needsRedirect = 1; + } + else if (psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + { + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); + needsRedirect = 1; + } + + if (needsRedirect == 1) + { + // TODO What if this is indexed? + ShaderPhase *psPhase = &psShader->asPhases[psContext->currentPhase]; + int needsLooping = 0; + int i = 0; + uint32_t origArraySize = 0; + uint32_t origMask = psOperand->ui32CompMask; + + ASSERT(psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] == 0); + + psContext->AddIndentation(); + // Does the input have multiple array components (such as geometry shader input, or domain shader control point input) + if ((psShader->eShaderType == DOMAIN_SHADER && regSpace == 0) || (psShader->eShaderType == GEOMETRY_SHADER)) + { + // The count is actually stored in psOperand->aui32ArraySizes[0] + origArraySize = psOperand->aui32ArraySizes[0]; + bformata(glsl, "%s vec4 phase%d_Input%d_%d[%d];\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, origArraySize); + needsLooping = 1; + i = origArraySize - 1; + } + else + bformata(glsl, "%s vec4 phase%d_Input%d_%d;\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + + psContext->currentGLSLString = &psPhase->earlyMain; + psContext->indent++; + + // Do a conditional loop. In normal cases needsLooping == 0 so this is only run once. + do + { + int comp = 0; + psContext->AddIndentation(); + if (needsLooping) + bformata(psPhase->earlyMain, "phase%d_Input%d_%d[%d] = vec4(", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, i); + else + bformata(psPhase->earlyMain, "phase%d_Input%d_%d = vec4(", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + + while (comp < 4) + { + int numComps = 0; + int hasCast = 0; + int hasSig = 0; + if (regSpace == 0) + hasSig = psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); + else + hasSig = psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); + + if (hasSig) + { + numComps = GetNumberBitsSet(psSig->ui32Mask); + if (psSig->eComponentType == INOUT_COMPONENT_SINT32) + { + bformata(psPhase->earlyMain, "intBitsToFloat("); + hasCast = 1; + } + else if (psSig->eComponentType == INOUT_COMPONENT_UINT32) + { + bformata(psPhase->earlyMain, "uintBitsToFloat("); + hasCast = 1; + } + + // Override the array size of the operand so TranslateOperand call below prints the correct index + if (needsLooping) + psOperand->aui32ArraySizes[0] = i; + + // And the component mask + psOperand->ui32CompMask = 1 << comp; + + TranslateOperand(psOperand, TO_FLAG_NAME_ONLY); + + // Restore the original array size value and mask + psOperand->ui32CompMask = origMask; + if (needsLooping) + psOperand->aui32ArraySizes[0] = origArraySize; + + if (hasCast) + bcatcstr(psPhase->earlyMain, ")"); + comp += numComps; + } + else // no signature found -> fill with zero + { + bcatcstr(psPhase->earlyMain, "0"); + comp++; + } + + if (comp < 4) + bcatcstr(psPhase->earlyMain, ", "); + } + bcatcstr(psPhase->earlyMain, ");\n"); + + } while ((--i) >= 0); + + psContext->currentGLSLString = &psContext->glsl; + psContext->indent--; + + if (regSpace == 0) + psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + else + psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + } +} + +void ToGLSL::TranslateDeclaration(const Declaration* psDecl) +{ + bstring glsl = *psContext->currentGLSLString; + bstring extensions = psContext->extensions; + Shader* psShader = psContext->psShader; + + switch(psDecl->eOpcode) + { + case OPCODE_DCL_INPUT_SGV: + case OPCODE_DCL_INPUT_PS_SGV: + { + const SPECIAL_NAME eSpecialName = psDecl->asOperands[0].eSpecialName; + switch(eSpecialName) + { + case NAME_POSITION: + { + AddBuiltinInput(psContext, psDecl, "gl_Position"); + break; + } + case NAME_RENDER_TARGET_ARRAY_INDEX: + { + AddBuiltinInput(psContext, psDecl, "gl_Layer"); + break; + } + case NAME_CLIP_DISTANCE: + { + AddBuiltinInput(psContext, psDecl, "gl_ClipDistance"); + break; + } + case NAME_VIEWPORT_ARRAY_INDEX: + { + AddBuiltinInput(psContext, psDecl, "gl_ViewportIndex"); + break; + } + case NAME_INSTANCE_ID: + { + AddBuiltinInput(psContext, psDecl, "gl_InstanceID"); + break; + } + case NAME_IS_FRONT_FACE: + { + /* + Cast to int used because + if(gl_FrontFacing != 0) failed to compiled on Intel HD 4000. + Suggests no implicit conversion for bool<->int. + */ + + AddBuiltinInput(psContext, psDecl, "(gl_FrontFacing ? 0xffffffffu : uint(0))"); // Hi Adreno. + break; + } + case NAME_SAMPLE_INDEX: + { + AddBuiltinInput(psContext, psDecl, "gl_SampleID"); + break; + } + case NAME_VERTEX_ID: + { + AddBuiltinInput(psContext, psDecl, "gl_VertexID"); + break; + } + case NAME_PRIMITIVE_ID: + { + if(psShader->eShaderType == GEOMETRY_SHADER) + AddBuiltinInput(psContext, psDecl, "gl_PrimitiveIDIn"); // LOL opengl. + else + AddBuiltinInput(psContext, psDecl, "gl_PrimitiveID"); + break; + } + default: + { + bformata(glsl, "in vec4 %s;\n", psDecl->asOperands[0].specialName.c_str()); + } + } + break; + } + + case OPCODE_DCL_OUTPUT_SIV: + { + switch(psDecl->asOperands[0].eSpecialName) + { + case NAME_POSITION: + { + AddBuiltinOutput(psDecl, 0, "gl_Position"); + break; + } + case NAME_RENDER_TARGET_ARRAY_INDEX: + { + AddBuiltinOutput(psDecl, 0, "gl_Layer"); + break; + } + case NAME_CLIP_DISTANCE: + { + AddBuiltinOutput(psDecl, 0, "gl_ClipDistance"); + break; + } + case NAME_VIEWPORT_ARRAY_INDEX: + { + AddBuiltinOutput(psDecl, 0, "gl_ViewportIndex"); + break; + } + case NAME_VERTEX_ID: + { + ASSERT(0); //VertexID is not an output + break; + } + case NAME_PRIMITIVE_ID: + { + AddBuiltinOutput(psDecl, 0, "gl_PrimitiveID"); + break; + } + case NAME_INSTANCE_ID: + { + ASSERT(0); //InstanceID is not an output + break; + } + case NAME_IS_FRONT_FACE: + { + ASSERT(0); //FrontFacing is not an output + break; + } + case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: + { + if(psContext->psShader->aIndexedOutput[1][psDecl->asOperands[0].ui32RegisterNumber]) + { + AddBuiltinOutput(psDecl, 4, "gl_TessLevelOuter"); + } + else + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[0]"); + } + break; + } + case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[1]"); + break; + } + case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[2]"); + break; + } + case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[3]"); + break; + } + case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: + { + if(psContext->psShader->aIndexedOutput[1][psDecl->asOperands[0].ui32RegisterNumber]) + { + AddBuiltinOutput(psDecl, 3, "gl_TessLevelOuter"); + } + else + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[0]"); + } + break; + } + case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[1]"); + break; + } + case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[2]"); + break; + } + case NAME_FINAL_LINE_DENSITY_TESSFACTOR: + { + if(psContext->psShader->aIndexedOutput[1][psDecl->asOperands[0].ui32RegisterNumber]) + { + AddBuiltinOutput(psDecl, 2, "gl_TessLevelOuter"); + } + else + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[0]"); + } + break; + } + case NAME_FINAL_LINE_DETAIL_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[1]"); + break; + } + case NAME_FINAL_TRI_INSIDE_TESSFACTOR: + case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: + { + if(psContext->psShader->aIndexedOutput[1][psDecl->asOperands[0].ui32RegisterNumber]) + { + AddBuiltinOutput(psDecl, 2, "gl_TessLevelInner"); + } + else + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelInner[0]"); + } + break; + } + case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelInner[1]"); + break; + } + default: + { + // Sometimes DX compiler seems to declare patch constant outputs like this. Anyway, nothing for us to do. +// bformata(glsl, "out vec4 %s;\n", psDecl->asOperands[0].specialName.c_str()); + +/* bcatcstr(glsl, "#define "); + TranslateOperand(psContext, &psDecl->asOperands[0], TO_FLAG_NONE); + bformata(glsl, " %s\n", psDecl->asOperands[0].pszSpecialName); + break;*/ + } + } + break; + } + case OPCODE_DCL_INPUT: + { + const Operand* psOperand = &psDecl->asOperands[0]; + + int iNumComponents = psOperand->GetNumInputElements(psContext); + const char* StorageQualifier = "attribute"; + std::string inputName; + const char* Precision = ""; + + if((psOperand->eType == OPERAND_TYPE_INPUT_DOMAIN_POINT)|| + (psOperand->eType == OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID)|| + (psOperand->eType == OPERAND_TYPE_INPUT_COVERAGE_MASK)|| + (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID)|| + (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_GROUP_ID)|| + (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP)|| + (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED) || + (psOperand->eType == OPERAND_TYPE_INPUT_FORK_INSTANCE_ID)) + { + break; + } + + // No need to declare patch constants read again by the hull shader. + if ((psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT) && psContext->psShader->eShaderType == HULL_SHADER) + { + break; + } + // ...or control points + if ((psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT) && psContext->psShader->eShaderType == HULL_SHADER) + { + break; + } + + // Also skip position input to domain shader + if ((psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT) && psContext->psShader->eShaderType == DOMAIN_SHADER) + { + const ShaderInfo::InOutSignature *psIn = NULL; + psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn); + ASSERT(psIn != NULL); + + if ((psIn->semanticName == "SV_POSITION" || psIn->semanticName == "POS") && psIn->ui32SemanticIndex == 0) + break; + } + + //Already declared as part of an array. + if(psShader->aIndexedInput[psOperand->GetRegisterSpace(psContext)][psDecl->asOperands[0].ui32RegisterNumber] == -1) + { + break; + } + + inputName = psContext->GetDeclaredInputName(psOperand, NULL, 1, NULL); + + if(InOutSupported(psContext->psShader->eTargetLanguage)) + { + if (psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT && psContext->psShader->eShaderType == DOMAIN_SHADER) + StorageQualifier = "patch in"; + else + StorageQualifier = "in"; + } + + if(HavePrecisionQualifers(psShader->eTargetLanguage)) + { + switch(psOperand->eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + { + Precision = "highp"; + break; + } + case OPERAND_MIN_PRECISION_FLOAT_16: + { + Precision = "mediump"; + break; + } + case OPERAND_MIN_PRECISION_FLOAT_2_8: + { + Precision = "lowp"; + break; + } + case OPERAND_MIN_PRECISION_SINT_16: + { + Precision = "mediump"; + break; + } + case OPERAND_MIN_PRECISION_UINT_16: + { + Precision = "mediump"; + break; + } + } + } + + DeclareInput(psContext, psDecl, + "", StorageQualifier, Precision, iNumComponents, (OPERAND_INDEX_DIMENSION)psOperand->iIndexDims, inputName.c_str(), psOperand->ui32CompMask); + + HandleInputRedirect(psDecl, Precision); + break; + } + case OPCODE_DCL_INPUT_PS_SIV: + { + switch(psDecl->asOperands[0].eSpecialName) + { + case NAME_POSITION: + { + AddBuiltinInput(psContext, psDecl, "gl_FragCoord"); + break; + } + default: + ASSERT(0); + break; + + } + break; + } + case OPCODE_DCL_INPUT_SIV: + { + if(psShader->eShaderType == PIXEL_SHADER && psContext->psDependencies) + { + psContext->psDependencies->SetInterpolationMode(psDecl->asOperands[0].ui32RegisterNumber, psDecl->value.eInterpolation); + } + break; + } + case OPCODE_DCL_INPUT_PS: + { + const Operand* psOperand = &psDecl->asOperands[0]; + int iNumComponents = psOperand->GetNumInputElements(psContext); + const char* StorageQualifier = "varying"; + const char* Precision = ""; + std::string inputName; + const char* Interpolation = ""; + int hasNoPerspective = psContext->psShader->eTargetLanguage <= LANG_ES_310 ? 0 : 1; + inputName = psContext->GetDeclaredInputName(psOperand, NULL, 1, NULL); + if (InOutSupported(psContext->psShader->eTargetLanguage)) + { + StorageQualifier = "in"; + } + + switch(psDecl->value.eInterpolation) + { + case INTERPOLATION_CONSTANT: + { + Interpolation = "flat "; + break; + } + case INTERPOLATION_LINEAR: + { + break; + } + case INTERPOLATION_LINEAR_CENTROID: + { + Interpolation = "centroid "; + break; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE: + { + Interpolation = hasNoPerspective ? "noperspective " : ""; + break; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: + { + Interpolation = hasNoPerspective ? "noperspective centroid " : "centroid" ; + break; + } + case INTERPOLATION_LINEAR_SAMPLE: + { + Interpolation = hasNoPerspective ? "sample " : ""; + break; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: + { + Interpolation = hasNoPerspective ? "noperspective sample " : ""; + break; + } + default: + ASSERT(0); + break; + } + + if(HavePrecisionQualifers(psShader->eTargetLanguage)) + { + switch(psOperand->eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + { + Precision = "highp"; + break; + } + case OPERAND_MIN_PRECISION_FLOAT_16: + { + Precision = "mediump"; + break; + } + case OPERAND_MIN_PRECISION_FLOAT_2_8: + { + Precision = "lowp"; + break; + } + case OPERAND_MIN_PRECISION_SINT_16: + { + Precision = "mediump"; + break; + } + case OPERAND_MIN_PRECISION_UINT_16: + { + Precision = "mediump"; + break; + } + } + } + + DeclareInput(psContext, psDecl, + Interpolation, StorageQualifier, Precision, iNumComponents, INDEX_1D, inputName.c_str(), psOperand->ui32CompMask); + + HandleInputRedirect(psDecl, Precision); + + break; + } + case OPCODE_DCL_TEMPS: + { + uint32_t i = 0; + const uint32_t ui32NumTemps = psDecl->value.ui32NumTemps; + bool usePrecision = (HavePrecisionQualifers(psShader->eTargetLanguage) != 0); + + for (i = 0; i < ui32NumTemps; i++) + { + if (psShader->psFloatTempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT, psShader->psFloatTempSizes[i], usePrecision), i); + if (psShader->psFloat16TempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT16, psShader->psFloat16TempSizes[i], usePrecision), i); + if (psShader->psFloat10TempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "10_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT10, psShader->psFloat10TempSizes[i], usePrecision), i); + if (psShader->psIntTempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT, psShader->psIntTempSizes[i], usePrecision), i); + if (psShader->psInt16TempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT16, psShader->psInt16TempSizes[i], usePrecision), i); + if (psShader->psInt12TempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i12_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT12, psShader->psInt12TempSizes[i], usePrecision), i); + if (psShader->psUIntTempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "u%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT, psShader->psUIntTempSizes[i], usePrecision), i); + if (psShader->psUInt16TempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "u16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT16, psShader->psUInt16TempSizes[i], usePrecision), i); + if (psShader->fp64 && (psShader->psDoubleTempSizes[i] != 0)) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "d%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_DOUBLE, psShader->psDoubleTempSizes[i], usePrecision), i); + if (psShader->psBoolTempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "b%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_BOOL, psShader->psBoolTempSizes[i], usePrecision), i); + } + break; + } + case OPCODE_SPECIAL_DCL_IMMCONST: + { + ASSERT(0 && "DX9 shaders no longer supported!"); + break; + } + case OPCODE_DCL_CONSTANT_BUFFER: + { + const Operand* psOperand = &psDecl->asOperands[0]; + const uint32_t ui32BindingPoint = psOperand->aui32ArraySizes[0]; + + const char* StageName = "VS"; + + const ConstantBuffer* psCBuf = NULL; + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, ui32BindingPoint, &psCBuf); + + // We don't have a original resource name, maybe generate one??? + if(!psCBuf) + { + if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) + bformata(glsl, "layout(location = %d) ",ui32BindingPoint); + + bformata(glsl, "layout(std140) uniform ConstantBuffer%d {\n\tvec4 data[%d];\n} cb%d;\n", ui32BindingPoint,psOperand->aui32ArraySizes[1],ui32BindingPoint); + break; + } + + switch(psContext->psShader->eShaderType) + { + case PIXEL_SHADER: + { + StageName = "PS"; + break; + } + case HULL_SHADER: + { + StageName = "HS"; + break; + } + case DOMAIN_SHADER: + { + StageName = "DS"; + break; + } + case GEOMETRY_SHADER: + { + StageName = "GS"; + break; + } + case COMPUTE_SHADER: + { + StageName = "CS"; + break; + } + default: + { + break; + } + } + + if(psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT) + { + if(psContext->flags & HLSLCC_FLAG_GLOBAL_CONSTS_NEVER_IN_UBO && psCBuf->name[0] == '$') + { + DeclareStructConstants(ui32BindingPoint, psCBuf, psOperand, glsl); + } + else + { + DeclareUBOConstants(psContext, ui32BindingPoint, psCBuf, glsl); + } + } + else + { + DeclareStructConstants(ui32BindingPoint, psCBuf, psOperand, glsl); + } + break; + } + case OPCODE_DCL_RESOURCE: + { + if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) + { + // Explicit layout bindings are not currently compatible with combined texture samplers. The layout below assumes there is exactly one GLSL sampler + // for each HLSL texture declaration, but when combining textures+samplers, there can be multiple OGL samplers for each HLSL texture declaration. + if((psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) != HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) + { + //Constant buffer locations start at 0. Resource locations start at ui32NumConstantBuffers. + bformata(glsl, "layout(location = %d) ", + psContext->psShader->sInfo.psConstantBuffers.size() + psDecl->asOperands[0].ui32RegisterNumber); + } + } + + switch(psDecl->value.eResourceDimension) + { + case RESOURCE_DIMENSION_BUFFER: + { + bformata(glsl, "uniform %s ", GetSamplerType(psContext, + RESOURCE_DIMENSION_BUFFER, + psDecl->asOperands[0].ui32RegisterNumber)); + TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); + bcatcstr(glsl, ";\n"); + break; + } + case RESOURCE_DIMENSION_TEXTURE1D: + { + TranslateResourceTexture(psContext, psDecl, 1); + break; + } + case RESOURCE_DIMENSION_TEXTURE2D: + { + TranslateResourceTexture(psContext, psDecl, 1); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DMS: + { + TranslateResourceTexture(psContext, psDecl, 0); + break; + } + case RESOURCE_DIMENSION_TEXTURE3D: + { + TranslateResourceTexture(psContext, psDecl, 0); + break; + } + case RESOURCE_DIMENSION_TEXTURECUBE: + { + TranslateResourceTexture(psContext, psDecl, 1); + break; + } + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + TranslateResourceTexture(psContext, psDecl, 1); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + TranslateResourceTexture(psContext, psDecl, 1); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + TranslateResourceTexture(psContext, psDecl, 0); + break; + } + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + TranslateResourceTexture(psContext, psDecl, 1); + break; + } + default: + ASSERT(0); + break; + + } + psShader->aeResourceDims[psDecl->asOperands[0].ui32RegisterNumber] = psDecl->value.eResourceDimension; + break; + } + case OPCODE_DCL_OUTPUT: + { + bool needsDeclare = true; + if(psShader->eShaderType == HULL_SHADER && psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE && psDecl->asOperands[0].ui32RegisterNumber==0) + { + // Need extra check from signature: + const ShaderInfo::InOutSignature *sig = NULL; + psShader->sInfo.GetOutputSignatureFromRegister(0, psDecl->asOperands->GetAccessMask(), 0, &sig, true); + if (!sig || sig->semanticName == "POSITION" || sig->semanticName == "POS") + { + needsDeclare = false; + AddBuiltinOutput(psDecl, 0, "gl_out[gl_InvocationID].gl_Position"); + } + } + + if(needsDeclare) + { + AddUserOutput(psDecl); + } + break; + } + case OPCODE_DCL_GLOBAL_FLAGS: + { + uint32_t ui32Flags = psDecl->value.ui32GlobalFlags; + + if(ui32Flags & GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL) + { + bcatcstr(glsl, "layout(early_fragment_tests) in;\n"); + } + if(!(ui32Flags & GLOBAL_FLAG_REFACTORING_ALLOWED)) + { + //TODO add precise + //HLSL precise - http://msdn.microsoft.com/en-us/library/windows/desktop/hh447204(v=vs.85).aspx + } + if(ui32Flags & GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS) + { + bcatcstr(extensions, "#extension GL_ARB_gpu_shader_fp64 : enable\n"); + psShader->fp64 = 1; + } + break; + } + + case OPCODE_DCL_THREAD_GROUP: + { + bformata(glsl, "layout(local_size_x = %d, local_size_y = %d, local_size_z = %d) in;\n", + psDecl->value.aui32WorkGroupSize[0], + psDecl->value.aui32WorkGroupSize[1], + psDecl->value.aui32WorkGroupSize[2]); + break; + } + case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: + { + if(psContext->psShader->eShaderType == HULL_SHADER) + { + psContext->psShader->sInfo.eTessOutPrim = psDecl->value.eTessOutPrim; + // Invert triangle winding order to match glsl better, except on vulkan + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) == 0) + { + if (psContext->psShader->sInfo.eTessOutPrim == TESSELLATOR_OUTPUT_TRIANGLE_CW) + psContext->psShader->sInfo.eTessOutPrim = TESSELLATOR_OUTPUT_TRIANGLE_CCW; + else if (psContext->psShader->sInfo.eTessOutPrim == TESSELLATOR_OUTPUT_TRIANGLE_CCW) + psContext->psShader->sInfo.eTessOutPrim = TESSELLATOR_OUTPUT_TRIANGLE_CW; + } + } + break; + } + case OPCODE_DCL_TESS_DOMAIN: + { + if(psContext->psShader->eShaderType == DOMAIN_SHADER) + { + switch(psDecl->value.eTessDomain) + { + case TESSELLATOR_DOMAIN_ISOLINE: + { + bcatcstr(glsl, "layout(isolines) in;\n"); + break; + } + case TESSELLATOR_DOMAIN_TRI: + { + bcatcstr(glsl, "layout(triangles) in;\n"); + break; + } + case TESSELLATOR_DOMAIN_QUAD: + { + bcatcstr(glsl, "layout(quads) in;\n"); + break; + } + default: + { + break; + } + } + } + break; + } + case OPCODE_DCL_TESS_PARTITIONING: + { + if(psContext->psShader->eShaderType == HULL_SHADER) + { + psContext->psShader->sInfo.eTessPartitioning = psDecl->value.eTessPartitioning; + } + break; + } + case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: + { + switch(psDecl->value.eOutputPrimitiveTopology) + { + case PRIMITIVE_TOPOLOGY_POINTLIST: + { + bcatcstr(glsl, "layout(points) out;\n"); + break; + } + case PRIMITIVE_TOPOLOGY_LINELIST_ADJ: + case PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ: + case PRIMITIVE_TOPOLOGY_LINELIST: + case PRIMITIVE_TOPOLOGY_LINESTRIP: + { + bcatcstr(glsl, "layout(line_strip) out;\n"); + break; + } + + case PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ: + case PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ: + case PRIMITIVE_TOPOLOGY_TRIANGLESTRIP: + case PRIMITIVE_TOPOLOGY_TRIANGLELIST: + { + bcatcstr(glsl, "layout(triangle_strip) out;\n"); + break; + } + default: + { + break; + } + } + break; + } + case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: + { + bformata(glsl, "layout(max_vertices = %d) out;\n", psDecl->value.ui32MaxOutputVertexCount); + break; + } + case OPCODE_DCL_GS_INPUT_PRIMITIVE: + { + switch(psDecl->value.eInputPrimitive) + { + case PRIMITIVE_POINT: + { + bcatcstr(glsl, "layout(points) in;\n"); + break; + } + case PRIMITIVE_LINE: + { + bcatcstr(glsl, "layout(lines) in;\n"); + break; + } + case PRIMITIVE_LINE_ADJ: + { + bcatcstr(glsl, "layout(lines_adjacency) in;\n"); + break; + } + case PRIMITIVE_TRIANGLE: + { + bcatcstr(glsl, "layout(triangles) in;\n"); + break; + } + case PRIMITIVE_TRIANGLE_ADJ: + { + bcatcstr(glsl, "layout(triangles_adjacency) in;\n"); + break; + } + default: + { + break; + } + } + break; + } + case OPCODE_DCL_INTERFACE: + { + const uint32_t interfaceID = psDecl->value.interface.ui32InterfaceID; + const uint32_t numUniforms = psDecl->value.interface.ui32ArraySize; + const uint32_t ui32NumBodiesPerTable = psContext->psShader->funcPointer[interfaceID].ui32NumBodiesPerTable; + ShaderVar* psVar; + uint32_t varFound; + + const char* uniformName; + + varFound = psContext->psShader->sInfo.GetInterfaceVarFromOffset(interfaceID, &psVar); + ASSERT(varFound); + uniformName = &psVar->name[0]; + + bformata(glsl, "subroutine uniform SubroutineType %s[%d*%d];\n", uniformName, numUniforms, ui32NumBodiesPerTable); + break; + } + case OPCODE_DCL_FUNCTION_BODY: + { + //bformata(glsl, "void Func%d();//%d\n", psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].eType); + break; + } + case OPCODE_DCL_FUNCTION_TABLE: + { + break; + } + case OPCODE_CUSTOMDATA: + { + // TODO: This is only ever accessed as a float currently. Do trickery if we ever see ints accessed from an array. + // Walk through all the chunks we've seen in this phase. + ShaderPhase &sp = psShader->asPhases[psContext->currentPhase]; + std::for_each(sp.m_ConstantArrayInfo.m_Chunks.begin(), sp.m_ConstantArrayInfo.m_Chunks.end(), [this](const std::pair &chunk) + { + bstring glsl = *psContext->currentGLSLString; + uint32_t componentCount = chunk.second.m_ComponentCount; + // Just do the declaration here and contents to earlyMain. + if (componentCount == 1) + bformata(glsl, "float ImmCB_%d_%d_%d[%d];\n", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, chunk.second.m_Size); + else + bformata(glsl, "vec%d ImmCB_%d_%d_%d[%d];\n", componentCount, psContext->currentPhase, chunk.first, chunk.second.m_Rebase, chunk.second.m_Size); + + bstring tgt = psContext->psShader->asPhases[psContext->currentPhase].earlyMain; + Declaration *psDecl = psContext->psShader->asPhases[psContext->currentPhase].m_ConstantArrayInfo.m_OrigDeclaration; + if (componentCount == 1) + { + for (uint32_t i = 0; i < chunk.second.m_Size; i++) + { + float val[4] = { + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].a, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].b, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].c, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d + }; + bformata(tgt, "\tImmCB_%d_%d_%d[%d] = ", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, i); + if (fpcheck(val[chunk.second.m_Rebase])) + bformata(tgt, "uintBitsToFloat(uint(%Xu))", *(uint32_t *)&val[chunk.second.m_Rebase]); + else + HLSLcc::PrintFloat(tgt, val[chunk.second.m_Rebase]); + bcatcstr(tgt, ";\n"); + } + } + else + { + for (uint32_t i = 0; i < chunk.second.m_Size; i++) + { + float val[4] = { + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].a, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].b, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].c, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d + }; + bformata(tgt, "\tImmCB_%d_%d_%d[%d] = vec%d(", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, i, componentCount); + for (uint32_t k = 0; k < componentCount; k++) + { + if (k != 0) + bcatcstr(tgt, ", "); + if (fpcheck(val[k])) + bformata(tgt, "uintBitsToFloat(uint(%Xu))", *(uint32_t *)&val[k + chunk.second.m_Rebase]); + else + HLSLcc::PrintFloat(tgt, val[k + chunk.second.m_Rebase]); + } + bcatcstr(tgt, ");\n"); + } + } + + }); + + + + break; + } + case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + break; // Nothing to do + + case OPCODE_DCL_INDEXABLE_TEMP: + { + const uint32_t ui32RegIndex = psDecl->sIdxTemp.ui32RegIndex; + const uint32_t ui32RegCount = psDecl->sIdxTemp.ui32RegCount; + const uint32_t ui32RegComponentSize = psDecl->sIdxTemp.ui32RegComponentSize; + bformata(glsl, "vec%d TempArray%d[%d];\n", ui32RegComponentSize, ui32RegIndex, ui32RegCount); + break; + } + case OPCODE_DCL_INDEX_RANGE: + { + switch (psDecl->asOperands[0].eType) + { + case OPERAND_TYPE_OUTPUT: + case OPERAND_TYPE_INPUT: + { + const ShaderInfo::InOutSignature* psSignature = NULL; + const char* type = "vec"; + const char* Precision = ""; + uint32_t startReg = 0; + uint32_t i; + bstring *oldString; + int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + int isInput = psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT ? 1 : 0; + + if (regSpace == 0) + { + if (isInput) + psShader->sInfo.GetInputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + &psSignature); + else + psShader->sInfo.GetOutputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + } + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].ui32CompMask, &psSignature); + + ASSERT(psSignature != NULL); + + switch (psSignature->eComponentType) + { + case INOUT_COMPONENT_UINT32: + { + type = "uvec"; + break; + } + case INOUT_COMPONENT_SINT32: + { + type = "ivec"; + break; + } + case INOUT_COMPONENT_FLOAT32: + { + break; + } + default: + ASSERT(0); + break; + + } + + if (HavePrecisionQualifers(psShader->eTargetLanguage)) + { + switch (psSignature->eMinPrec) // TODO What if the inputs in the indexed range are of different precisions? + { + default: + { + Precision = "highp "; + break; + } + case MIN_PRECISION_ANY_16: + case MIN_PRECISION_FLOAT_16: + case MIN_PRECISION_SINT_16: + case MIN_PRECISION_UINT_16: + { + Precision = "mediump "; + break; + } + case MIN_PRECISION_FLOAT_2_8: + { + Precision = "lowp "; + break; + } + } + } + + startReg = psDecl->asOperands[0].ui32RegisterNumber; + bformata(glsl, "%s%s4 phase%d_%sput%d_%d[%d];\n", Precision, type, psContext->currentPhase, isInput ? "In" : "Out", regSpace, startReg, psDecl->value.ui32IndexRange); + oldString = psContext->currentGLSLString; + glsl = isInput ? psContext->psShader->asPhases[psContext->currentPhase].earlyMain : psContext->psShader->asPhases[psContext->currentPhase].postShaderCode; + psContext->currentGLSLString = &glsl; + if (isInput == 0) + psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1; + for (i = 0; i < psDecl->value.ui32IndexRange; i++) + { + int dummy = 0; + std::string realName; + uint32_t destMask = psDecl->asOperands[0].ui32CompMask; + uint32_t rebase = 0; + const ShaderInfo::InOutSignature *psSig = NULL; + uint32_t regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + + if (regSpace == 0) + if (isInput) + psContext->psShader->sInfo.GetInputSignatureFromRegister(startReg + i, destMask, &psSig); + else + psContext->psShader->sInfo.GetOutputSignatureFromRegister(startReg + i, destMask, 0, &psSig); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(startReg + i, destMask, &psSig); + + ASSERT(psSig != NULL); + + if ((psSig->ui32Mask & destMask) == 0) + continue; // Skip dummy writes (vec2 texcoords get filled to vec4 with zeroes etc) + + while ((psSig->ui32Mask & (1 << rebase)) == 0) + rebase++; + + ((Declaration *)psDecl)->asOperands[0].ui32RegisterNumber = startReg + i; + + if (isInput) + { + realName = psContext->GetDeclaredInputName(&psDecl->asOperands[0], &dummy, 1, NULL); + + psContext->AddIndentation(); + + bformata(glsl, "phase%d_Input%d_%d[%d]", psContext->currentPhase, regSpace, startReg, i); + + if (destMask != OPERAND_4_COMPONENT_MASK_ALL) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k]); + } + } + } + bcatcstr(glsl, " = "); + bcatcstr(glsl, realName.c_str()); + if (destMask != OPERAND_4_COMPONENT_MASK_ALL && destMask != psSig->ui32Mask) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k - rebase]); + } + } + } + } + else + { + realName = psContext->GetDeclaredOutputName(&psDecl->asOperands[0], &dummy, NULL, NULL, 1); + + psContext->AddIndentation(); + bcatcstr(glsl, realName.c_str()); + if (destMask != OPERAND_4_COMPONENT_MASK_ALL && destMask != psSig->ui32Mask) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k - rebase]); + } + } + } + + bformata(glsl, " = phase%d_Output%d_%d[%d]", psContext->currentPhase, regSpace, startReg, i); + + if (destMask != OPERAND_4_COMPONENT_MASK_ALL) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k]); + } + } + } + } + + bcatcstr(glsl, ";\n"); + } + + ((Declaration *)psDecl)->asOperands[0].ui32RegisterNumber = startReg; + psContext->currentGLSLString = oldString; + glsl = *psContext->currentGLSLString; + + for (i = 0; i < psDecl->value.ui32IndexRange; i++) + { + if (regSpace == 0) + { + if (isInput) + psShader->sInfo.GetInputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber + i, + psDecl->asOperands[0].ui32CompMask, + &psSignature); + else + psShader->sInfo.GetOutputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber + i, + psDecl->asOperands[0].ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + } + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber + i, psDecl->asOperands[0].ui32CompMask, &psSignature); + + ASSERT(psSignature != NULL); + + ((ShaderInfo::InOutSignature *)psSignature)->isIndexed.insert(psContext->currentPhase); + ((ShaderInfo::InOutSignature *)psSignature)->indexStart[psContext->currentPhase] = startReg; + ((ShaderInfo::InOutSignature *)psSignature)->index[psContext->currentPhase] = i; + } + + + break; + } + default: + // TODO Input index ranges. + ASSERT(0); + } + break; + } + case OPCODE_HS_DECLS: + { + break; + } + case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: + { + break; + } + case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: + { + if(psContext->psShader->eShaderType == HULL_SHADER) + { + bformata(glsl, "layout(vertices=%d) out;\n", psDecl->value.ui32MaxOutputVertexCount); + } + break; + } + case OPCODE_HS_FORK_PHASE: + { + break; + } + case OPCODE_HS_JOIN_PHASE: + { + break; + } + case OPCODE_DCL_SAMPLER: + { + break; + } + case OPCODE_DCL_HS_MAX_TESSFACTOR: + { + //For GLSL the max tessellation factor is fixed to the value of gl_MaxTessGenLevel. + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: + { + // non-float images need either 'i' or 'u' prefix. + char imageTypePrefix[2] = { 0, 0 }; + uint32_t bindpoint = psDecl->asOperands[0].ui32RegisterNumber; + const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS); + + if(psDecl->sUAV.ui32GloballyCoherentAccess & GLOBALLY_COHERENT_ACCESS) + { + bcatcstr(glsl, "coherent "); + } + + if (!(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ) && + !(psContext->flags & HLSLCC_FLAG_GLES31_IMAGE_QUALIFIERS) && !isVulkan) + { //Special case on desktop glsl: writeonly image does not need format qualifier + bformata(glsl, "writeonly layout(binding=%d) ", bindpoint); + } + else + { + // Use 4 component format as a fallback if no instruction defines it + uint32_t numComponents = psDecl->sUAV.ui32NumComponents > 0 ? psDecl->sUAV.ui32NumComponents : 4; + + if (!(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ)) + bcatcstr(glsl, "writeonly "); + else if (!(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE)) + bcatcstr(glsl, "readonly "); + + if ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) && IsESLanguage(psShader->eTargetLanguage)) + { + // Need to require the extension + if (!psShader->m_TextureBufferExtensionDeclared) + { + bcatcstr(psContext->extensions, "#extension GL_EXT_texture_buffer : require\n"); + psShader->m_TextureBufferExtensionDeclared = true; + } + + } + + if(isVulkan) + { + std::string name = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(name); + bformata(glsl, "layout(set = %d, binding = %d, ", binding.first, binding.second); + } + else + bformata(glsl, "layout(binding=%d, ", bindpoint); + + //TODO: catch bad format cases. e.g. es supports only limited format set. no rgb formats on glsl + if (numComponents >= 1) + bcatcstr(glsl, "r"); + if (numComponents >= 2) + bcatcstr(glsl, "g"); + if (numComponents >= 3) + bcatcstr(glsl, "ba"); + + switch (psDecl->sUAV.Type) + { + case RETURN_TYPE_FLOAT: + bcatcstr(glsl, "32f) highp "); //TODO: half case? + break; + case RETURN_TYPE_UNORM: + bcatcstr(glsl, "8) lowp "); + break; + case RETURN_TYPE_SNORM: + bcatcstr(glsl, "8_snorm) lowp "); + break; + case RETURN_TYPE_UINT: + bcatcstr(glsl, "32ui) highp "); //TODO: 16/8 cases? + break; + case RETURN_TYPE_SINT: + bcatcstr(glsl, "32i) highp "); //TODO: 16/8 cases? + break; + default: + ASSERT(0); + } + } + + if (psDecl->sUAV.Type == RETURN_TYPE_UINT) + imageTypePrefix[0] = 'u'; + else if (psDecl->sUAV.Type == RETURN_TYPE_SINT) + imageTypePrefix[0] = 'i'; + + // GLSL requires images to be always explicitly defined as uniforms + switch(psDecl->value.eResourceDimension) + { + case RESOURCE_DIMENSION_BUFFER: + { + bformata(glsl, "uniform %simageBuffer ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE1D: + { + bformata(glsl, "uniform %simage1D ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE2D: + { + bformata(glsl, "uniform %simage2D ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DMS: + { + bformata(glsl, "uniform %simage2DMS ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE3D: + { + bformata(glsl, "uniform %simage3D ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURECUBE: + { + bformata(glsl, "uniform %simageCube ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + bformata(glsl, "uniform %simage1DArray ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + bformata(glsl, "uniform %simage2DArray ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + bformata(glsl, "uniform %simage3DArray ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + bformata(glsl, "uniform %simageCubeArray ", imageTypePrefix); + break; + } + default: + ASSERT(0); + break; + + } + TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); + bcatcstr(glsl, ";\n"); + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: + { + const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS); + if(psDecl->sUAV.bCounter) + { + if (isVulkan) + { + std::string uavname = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); + GLSLCrossDependencyData::VulkanResourceBinding uavBinding = psContext->psDependencies->GetVulkanResourceBinding(uavname, true); + GLSLCrossDependencyData::VulkanResourceBinding counterBinding = std::make_pair(uavBinding.first, uavBinding.second+1); + bformata(glsl, "layout(set = %d, binding = %d) buffer %s_counterBuf { highp uint %s_counter; };\n", counterBinding.first, counterBinding.second, uavname.c_str(), uavname.c_str()); + } + else + { + bcatcstr(glsl, "layout (binding = 0) uniform "); + + if (HavePrecisionQualifers(psShader->eTargetLanguage)) + bcatcstr(glsl, "highp "); + bcatcstr(glsl, "atomic_uint "); + ResourceName(glsl, psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); + bformata(glsl, "_counter; \n"); + } + } + + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, psDecl->ui32BufferStride, glsl); + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: + { + const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS); + if(psDecl->sUAV.bCounter) + { + if (isVulkan) + { + std::string uavname = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); + GLSLCrossDependencyData::VulkanResourceBinding uavBinding = psContext->psDependencies->GetVulkanResourceBinding(uavname, true); + GLSLCrossDependencyData::VulkanResourceBinding counterBinding = std::make_pair(uavBinding.first, uavBinding.second + 1); + bformata(glsl, "layout(set = %d, binding = %d) buffer %s_counterBuf { highp uint %s_counter; };\n", counterBinding.first, counterBinding.second, uavname.c_str(), uavname.c_str()); + } + else + { + bcatcstr(glsl, "layout (binding = 0) uniform "); + if (HavePrecisionQualifers(psShader->eTargetLanguage)) + bcatcstr(glsl, "highp "); + bcatcstr(glsl, "atomic_uint "); + ResourceName(glsl, psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); + bformata(glsl, "_counter; \n"); + } + } + + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 1, 1, psDecl->ui32BufferStride, glsl); + + break; + } + case OPCODE_DCL_RESOURCE_STRUCTURED: + { + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 0, 0, psDecl->ui32BufferStride, glsl); + break; + } + case OPCODE_DCL_RESOURCE_RAW: + { + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 1, 0, psDecl->ui32BufferStride, glsl); + break; + } + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: + { + ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; + + bcatcstr(glsl, "shared struct {\n"); + bformata(glsl, "\tuint value[%d];\n", psDecl->sTGSM.ui32Stride/4); + bcatcstr(glsl, "} "); + TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); + bformata(glsl, "[%d];\n", + psDecl->sTGSM.ui32Count); + psVarType->name = "value"; + + psVarType->Columns = psDecl->sTGSM.ui32Stride/4; + psVarType->Elements = psDecl->sTGSM.ui32Count; + break; + } + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: + { + ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; + + bcatcstr(glsl, "shared uint "); + TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); + bformata(glsl, "[%d];\n", psDecl->sTGSM.ui32Count / psDecl->sTGSM.ui32Stride); + + psVarType->name = "$Element"; + + psVarType->Columns = 1; + psVarType->Elements = psDecl->sTGSM.ui32Count / psDecl->sTGSM.ui32Stride; + break; + } + case OPCODE_DCL_STREAM: + { + ASSERT(psDecl->asOperands[0].eType == OPERAND_TYPE_STREAM); + + + if (psShader->eTargetLanguage >= LANG_400 && (psShader->ui32CurrentVertexOutputStream != psDecl->asOperands[0].ui32RegisterNumber)) + { + // Only emit stream declaration for desktop GL >= 4.0, and only if we're declaring something else than the default 0 + bformata(glsl, "layout(stream = %d) out;\n", psShader->ui32CurrentVertexOutputStream); + } + psShader->ui32CurrentVertexOutputStream = psDecl->asOperands[0].ui32RegisterNumber; + + break; + } + case OPCODE_DCL_GS_INSTANCE_COUNT: + { + bformata(glsl, "layout(invocations = %d) in;\n", psDecl->value.ui32GSInstanceCount); + break; + } + default: + { + ASSERT(0); + break; + } + } +} + +bool ToGLSL::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix) +{ + ASSERT(sig != NULL); + if (psContext->psShader->eShaderType == HULL_SHADER && sig->semanticName == "SV_TessFactor") + { + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + ASSERT(sig->ui32SemanticIndex <= 3); + std::ostringstream oss; + oss << "gl_TessLevelOuter[" << sig->ui32SemanticIndex << "]"; + result = oss.str(); + return true; + } + + if (psContext->psShader->eShaderType == HULL_SHADER && sig->semanticName == "SV_InsideTessFactor") + { + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + ASSERT(sig->ui32SemanticIndex <= 1); + std::ostringstream oss; + oss << "gl_TessLevelInner[" << sig->ui32SemanticIndex << "]"; + result = oss.str(); + return true; + } + + switch (sig->eSystemValueType) + { + case NAME_POSITION: + if (psContext->psShader->eShaderType == PIXEL_SHADER) + result = "gl_FragCoord"; + else + result = "gl_Position"; + return true; + case NAME_RENDER_TARGET_ARRAY_INDEX: + result = "gl_Layer"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_CLIP_DISTANCE: + { + // This is always routed through temp + std::ostringstream oss; + oss << "phase" << psContext->currentPhase << "_glClipDistance" << sig->ui32SemanticIndex; + result = oss.str(); + return true; + } + case NAME_VIEWPORT_ARRAY_INDEX: + result = "gl_ViewportIndex"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_VERTEX_ID: + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + result = "gl_VertexIndex"; + else + result = "gl_VertexID"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_INSTANCE_ID: + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + result = "gl_InstanceIndex"; + else + result = "gl_InstanceID"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_IS_FRONT_FACE: + result = "(gl_FrontFacing ? 0xffffffffu : uint(0))"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_PRIMITIVE_ID: + if (isInput && psContext->psShader->eShaderType == GEOMETRY_SHADER) + result = "gl_PrimitiveIDIn"; // LOL opengl + else + result = "gl_PrimitiveID"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_SAMPLE_INDEX: + result = "gl_SampleID"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_LINE_DENSITY_TESSFACTOR: + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + if (isIndexed) + { + result = "gl_TessLevelOuter"; + return true; + } + else + { + result = "gl_TessLevelOuter[0]"; + return true; + } + case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_LINE_DETAIL_TESSFACTOR: + result = "gl_TessLevelOuter[1]"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: + result = "gl_TessLevelOuter[2]"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: + result = "gl_TessLevelOuter[3]"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + + case NAME_FINAL_TRI_INSIDE_TESSFACTOR: + case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + if (isIndexed) + { + result = "gl_TessLevelInner"; + return true; + } + else + { + result = "gl_TessLevelInner[0]"; + return true; + } + case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: + result = "gl_TessLevelInner[3]"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + default: + break; + } + + if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE) + { + if (sig->semanticName == "POS" && sig->ui32SemanticIndex == 0) + { + result = "gl_out[gl_InvocationID].gl_Position"; + return true; + } + std::ostringstream oss; + if(isInput) + oss << psContext->inputPrefix << sig->semanticName << sig->ui32SemanticIndex; + else + oss << psContext->outputPrefix << sig->semanticName << sig->ui32SemanticIndex << "[gl_InvocationID]"; + result = oss.str(); + return true; + } + + // TODO: Add other builtins here. + if (sig->eSystemValueType == NAME_POSITION || (sig->semanticName == "POS" && sig->ui32SemanticIndex == 0 && psContext->psShader->eShaderType == VERTEX_SHADER)) + { + result = "gl_Position"; + return true; + } + + return false; +} + diff --git a/src/toGLSLInstruction.cpp b/src/toGLSLInstruction.cpp new file mode 100644 index 0000000..0304131 --- /dev/null +++ b/src/toGLSLInstruction.cpp @@ -0,0 +1,4127 @@ +#include "internal_includes/toGLSLOperand.h" +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/languages.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "bstrlib.h" +#include "stdio.h" +#include +#include +#include "internal_includes/debug.h" +#include "internal_includes/Shader.h" +#include "internal_includes/Instruction.h" +#include "internal_includes/toGLSL.h" + +using namespace HLSLcc; + + +// This function prints out the destination name, possible destination writemask, assignment operator +// and any possible conversions needed based on the eSrcType+ui32SrcElementCount (type and size of data expected to be coming in) +// As an output, pNeedsParenthesis will be filled with the amount of closing parenthesis needed +// and pSrcCount will be filled with the number of components expected +// ui32CompMask can be used to only write to 1 or more components (used by MOVC) +void ToGLSL::AddOpAssignToDestWithMask(const Operand* psDest, + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask) +{ + uint32_t ui32DestElementCount = psDest->GetNumSwizzleElements(ui32CompMask); + bstring glsl = *psContext->currentGLSLString; + SHADER_VARIABLE_TYPE eDestDataType = psDest->GetDataType(psContext); + ASSERT(pNeedsParenthesis != NULL); + + *pNeedsParenthesis = 0; + + TranslateOperand(psDest, TO_FLAG_DESTINATION, ui32CompMask); + + // Simple path: types match. + if (DoAssignmentDataTypesMatch(eDestDataType, eSrcType)) + { + // Cover cases where the HLSL language expects the rest of the components to be default-filled + // eg. MOV r0, c0.x => Temp[0] = vec4(c0.x); + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(eDestDataType, ui32DestElementCount, false)); + *pNeedsParenthesis = 1; + } + else + bformata(glsl, " %s ", szAssignmentOp); + return; + } + + switch (eDestDataType) + { + case SVT_INT: + case SVT_INT12: + case SVT_INT16: + // Bitcasts from lower precisions are ambiguous + ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); + if (eSrcType == SVT_FLOAT && psContext->psShader->ui32MajorVersion > 3) + { + bformata(glsl, " %s floatBitsToInt(", szAssignmentOp); + // Cover cases where the HLSL language expects the rest of the components to be default-filled + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, "%s(", GetConstructorForTypeGLSL(eSrcType, ui32DestElementCount, false)); + (*pNeedsParenthesis)++; + } + } + else + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(eDestDataType, ui32DestElementCount, false)); + + (*pNeedsParenthesis)++; + break; + case SVT_UINT: + case SVT_UINT16: + ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); + if (eSrcType == SVT_FLOAT && psContext->psShader->ui32MajorVersion > 3) + { + bformata(glsl, " %s floatBitsToUint(", szAssignmentOp); + // Cover cases where the HLSL language expects the rest of the components to be default-filled + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, "%s(", GetConstructorForTypeGLSL(eSrcType, ui32DestElementCount, false)); + (*pNeedsParenthesis)++; + } + } + else + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(eDestDataType, ui32DestElementCount, false)); + + (*pNeedsParenthesis)++; + break; + + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + ASSERT(eSrcType != SVT_INT12 || eSrcType != SVT_INT16 && eSrcType != SVT_UINT16); + if (psContext->psShader->ui32MajorVersion > 3) + { + if (eSrcType == SVT_INT) + bformata(glsl, " %s intBitsToFloat(", szAssignmentOp); + else + bformata(glsl, " %s uintBitsToFloat(", szAssignmentOp); + // Cover cases where the HLSL language expects the rest of the components to be default-filled + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, "%s(", GetConstructorForTypeGLSL(eSrcType, ui32DestElementCount, false)); + (*pNeedsParenthesis)++; + } + } + else + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(eDestDataType, ui32DestElementCount, false)); + + (*pNeedsParenthesis)++; + break; + default: + // TODO: Handle bools? + ASSERT(0); + break; + } + return; +} + +void ToGLSL::AddAssignToDest(const Operand* psDest, + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis) +{ + AddOpAssignToDestWithMask(psDest, eSrcType, ui32SrcElementCount, "=", pNeedsParenthesis, OPERAND_4_COMPONENT_MASK_ALL); +} + +void ToGLSL::AddAssignPrologue(int numParenthesis, bool isEmbedded /* = false*/) +{ + bstring glsl = *psContext->currentGLSLString; + while (numParenthesis != 0) + { + bcatcstr(glsl, ")"); + numParenthesis--; + } + if(!isEmbedded) + bcatcstr(glsl, ";\n"); + +} + + +void ToGLSL::AddComparison(Instruction* psInst, ComparisonType eType, + uint32_t typeFlag) +{ + // Multiple cases to consider here: + // For shader model <=3: all comparisons are floats + // otherwise: + // OPCODE_LT, _GT, _NE etc: inputs are floats, outputs UINT 0xffffffff or 0. typeflag: TO_FLAG_NONE + // OPCODE_ILT, _IGT etc: comparisons are signed ints, outputs UINT 0xffffffff or 0 typeflag TO_FLAG_INTEGER + // _ULT, UGT etc: inputs unsigned ints, outputs UINTs typeflag TO_FLAG_UNSIGNED_INTEGER + // + // Additional complexity: if dest swizzle element count is 1, we can use normal comparison operators, otherwise glsl intrinsics. + + + bstring glsl = *psContext->currentGLSLString; + const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); + const uint32_t s0ElemCount = psInst->asOperands[1].GetNumSwizzleElements(); + const uint32_t s1ElemCount = psInst->asOperands[2].GetNumSwizzleElements(); + int isBoolDest = psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL; + + int floatResult = 0; + + ASSERT(s0ElemCount == s1ElemCount || s1ElemCount == 1 || s0ElemCount == 1); + if (s0ElemCount != s1ElemCount) + { + // Set the proper auto-expand flag is either argument is scalar + typeFlag |= (TO_AUTO_EXPAND_TO_VEC2 << (std::max(s0ElemCount, s1ElemCount) - 2)); + } + + if (psContext->psShader->ui32MajorVersion < 4) + { + floatResult = 1; + } + + if (destElemCount > 1) + { + const char* glslOpcode[] = { + "equal", + "lessThan", + "greaterThanEqual", + "notEqual", + }; + + int needsParenthesis = 0; + psContext->AddIndentation(); + if (isBoolDest) + { + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); + bcatcstr(glsl, " = "); + } + else + { + AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, &needsParenthesis); + + bcatcstr(glsl, GetConstructorForTypeGLSL(floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, false)); + bcatcstr(glsl, "("); + } + bformata(glsl, "%s(", glslOpcode[eType]); + TranslateOperand(&psInst->asOperands[1], typeFlag); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], typeFlag); + bcatcstr(glsl, ")"); + TranslateOperandSwizzle(psContext, &psInst->asOperands[0], 0); + if (!isBoolDest) + { + bcatcstr(glsl, ")"); + if (!floatResult) + { + bcatcstr(glsl, " * 0xFFFFFFFFu"); + } + } + + AddAssignPrologue(needsParenthesis); + } + else + { + const char* glslOpcode[] = { + "==", + "<", + ">=", + "!=", + }; + + //Scalar compare + + const bool workaroundAdrenoBugs = psContext->psShader->eTargetLanguage == LANG_ES_300; + + if (workaroundAdrenoBugs) + { + // Workarounds for bug cases 777617, 735299, 776827 + bcatcstr(glsl, "#ifdef UNITY_ADRENO_ES3\n"); + + int needsParenthesis = 0; + psContext->AddIndentation(); + if (isBoolDest) + { + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); + bcatcstr(glsl, " = !!("); + needsParenthesis += 1; + TranslateOperand(&psInst->asOperands[1], typeFlag); + bformata(glsl, "%s", glslOpcode[eType]); + TranslateOperand(&psInst->asOperands[2], typeFlag); + AddAssignPrologue(needsParenthesis); + } + else + { + bcatcstr(glsl, "{ bool cond = "); + TranslateOperand(&psInst->asOperands[1], typeFlag); + bformata(glsl, "%s", glslOpcode[eType]); + TranslateOperand(&psInst->asOperands[2], typeFlag); + bcatcstr(glsl, "; "); + AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, &needsParenthesis); + bcatcstr(glsl, "!!cond ? "); + if (floatResult) + bcatcstr(glsl, "1.0 : 0.0"); + else + bcatcstr(glsl, "0xFFFFFFFFu : uint(0u)"); // Adreno can't handle 0u. + AddAssignPrologue(needsParenthesis, true); + bcatcstr(glsl, "; }\n"); + } + + bcatcstr(glsl, "#else\n"); + } + + int needsParenthesis = 0; + psContext->AddIndentation(); + if (isBoolDest) + { + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); + bcatcstr(glsl, " = "); + } + else + { + AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, &needsParenthesis); + bcatcstr(glsl, "("); + } + TranslateOperand(&psInst->asOperands[1], typeFlag); + bformata(glsl, "%s", glslOpcode[eType]); + TranslateOperand(&psInst->asOperands[2], typeFlag); + if (!isBoolDest) + { + if (floatResult) + { + bcatcstr(glsl, ") ? 1.0 : 0.0"); + } + else + { + bcatcstr(glsl, ") ? 0xFFFFFFFFu : uint(0u)"); // Adreno can't handle 0u. + } + } + AddAssignPrologue(needsParenthesis); + + if (workaroundAdrenoBugs) + bcatcstr(glsl, "#endif\n"); + } +} + + +void ToGLSL::AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, bool isEmbedded /* = false*/) +{ + int numParenthesis = 0; + int srcSwizzleCount = pSrc->GetNumSwizzleElements(); + uint32_t writeMask = pDest->GetAccessMask(); + + const SHADER_VARIABLE_TYPE eSrcType = pSrc->GetDataType(psContext, pDest->GetDataType(psContext)); + uint32_t flags = SVTTypeToFlag(eSrcType); + + AddAssignToDest(pDest, eSrcType, srcSwizzleCount, &numParenthesis); + TranslateOperand(pSrc, flags, writeMask); + + AddAssignPrologue(numParenthesis, isEmbedded); +} + +void ToGLSL::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2) +{ + bstring glsl = *psContext->currentGLSLString; + uint32_t destElemCount = pDest->GetNumSwizzleElements(); + uint32_t s0ElemCount = src0->GetNumSwizzleElements(); + uint32_t s1ElemCount = src1->GetNumSwizzleElements(); + uint32_t s2ElemCount = src2->GetNumSwizzleElements(); + uint32_t destWriteMask = pDest->GetAccessMask(); + uint32_t destElem; + + const SHADER_VARIABLE_TYPE eDestType = pDest->GetDataType(psContext); + /* + for each component in dest[.mask] + if the corresponding component in src0 (POS-swizzle) + has any bit set + { + copy this component (POS-swizzle) from src1 into dest + } + else + { + copy this component (POS-swizzle) from src2 into dest + } + endfor + */ + + /* Single-component conditional variable (src0) */ + if (s0ElemCount == 1 || src0->IsSwizzleReplicated()) + { + int numParenthesis = 0; + SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); + psContext->AddIndentation(); + AddAssignToDest(pDest, eDestType, destElemCount, &numParenthesis); + bcatcstr(glsl, "("); + if (s0Type == SVT_UINT || s0Type == SVT_UINT16) + TranslateOperand(src0, TO_AUTO_BITCAST_TO_UINT, OPERAND_4_COMPONENT_MASK_X); + else if (s0Type == SVT_BOOL) + TranslateOperand(src0, TO_FLAG_BOOL, OPERAND_4_COMPONENT_MASK_X); + else + TranslateOperand(src0, TO_AUTO_BITCAST_TO_INT, OPERAND_4_COMPONENT_MASK_X); + + if (psContext->psShader->ui32MajorVersion < 4) + { + //cmp opcode uses >= 0 + bcatcstr(glsl, " >= 0) ? "); + } + else + { + if (s0Type == SVT_UINT || s0Type == SVT_UINT16) + bcatcstr(glsl, " != uint(0u)) ? "); // Adreno doesn't understand 0u. + else if (s0Type == SVT_BOOL) + bcatcstr(glsl, ") ? "); + else + bcatcstr(glsl, " != 0) ? "); + } + + if (s1ElemCount == 1 && destElemCount > 1) + TranslateOperand(src1, SVTTypeToFlag(eDestType) | ElemCountToAutoExpandFlag(destElemCount)); + else + TranslateOperand(src1, SVTTypeToFlag(eDestType), destWriteMask); + + bcatcstr(glsl, " : "); + if (s2ElemCount == 1 && destElemCount > 1) + TranslateOperand(src2, SVTTypeToFlag(eDestType) | ElemCountToAutoExpandFlag(destElemCount)); + else + TranslateOperand(src2, SVTTypeToFlag(eDestType), destWriteMask); + + AddAssignPrologue(numParenthesis); + } + else + { + // TODO: We can actually do this in one op using mix(). + int srcElem = -1; + SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); + for (destElem = 0; destElem < 4; ++destElem) + { + int numParenthesis = 0; + srcElem++; + if (pDest->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && pDest->ui32CompMask != 0 && !(pDest->ui32CompMask & (1 << destElem))) + continue; + + psContext->AddIndentation(); + AddOpAssignToDestWithMask(pDest, eDestType, 1, "=", &numParenthesis, 1 << destElem); + bcatcstr(glsl, "("); + if (s0Type == SVT_BOOL) + { + TranslateOperand(src0, TO_FLAG_BOOL, 1 << srcElem); + bcatcstr(glsl, ") ? "); + } + else + { + TranslateOperand(src0, TO_AUTO_BITCAST_TO_INT, 1 << srcElem); + + if (psContext->psShader->ui32MajorVersion < 4) + { + //cmp opcode uses >= 0 + bcatcstr(glsl, " >= 0) ? "); + } + else + { + bcatcstr(glsl, " != 0) ? "); + } + } + + TranslateOperand(src1, SVTTypeToFlag(eDestType), 1 << srcElem); + bcatcstr(glsl, " : "); + TranslateOperand(src2, SVTTypeToFlag(eDestType), 1 << srcElem); + + AddAssignPrologue(numParenthesis); + } + } +} + +void ToGLSL::CallBinaryOp(const char* name, Instruction* psInst, + int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType, bool isEmbedded /* = false*/) +{ + uint32_t ui32Flags = SVTTypeToFlag(eDataType); + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = psInst->asOperands[dest].GetAccessMask(); + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + uint32_t src0AccessMask = psInst->asOperands[src0].GetAccessMask(); + uint32_t src1AccessMask = psInst->asOperands[src1].GetAccessMask(); + uint32_t src0AccessCount = GetNumberBitsSet(src0AccessMask); + uint32_t src1AccessCount = GetNumberBitsSet(src1AccessMask); + int needsParenthesis = 0; + + if (src1SwizCount != src0SwizCount) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + if(!isEmbedded) + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], eDataType, dstSwizCount, &needsParenthesis); + + // Horrible Adreno bug workaround: + // All pre-ES3.1 Adreno GLES3.0 drivers fail in cases like this: + // vec4 a.xyz = b.xyz + c.yzw; + // Attempt to detect this and fall back to component-wise binary op. + if ( (psContext->psShader->eTargetLanguage == LANG_ES_300) && + ((src0AccessCount > 1 && !(src0AccessMask & OPERAND_4_COMPONENT_MASK_X)) || (src1AccessCount > 1 && !(src1AccessMask & OPERAND_4_COMPONENT_MASK_X)))) + { + uint32_t i; + int firstPrinted = 0; + bcatcstr(glsl, GetConstructorForTypeGLSL(eDataType, dstSwizCount, false)); + bcatcstr(glsl, "("); + for (i = 0; i < 4; i++) + { + if (!(destMask & (1 << i))) + continue; + + if (firstPrinted != 0) + bcatcstr(glsl, ", "); + else + firstPrinted = 1; + + // Remove the auto expand flags + ui32Flags &= ~(TO_AUTO_EXPAND_TO_VEC2 | TO_AUTO_EXPAND_TO_VEC3 | TO_AUTO_EXPAND_TO_VEC4); + + TranslateOperand(&psInst->asOperands[src0], ui32Flags, 1 << i); + bformata(glsl, " %s ", name); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, 1 << i); + } + bcatcstr(glsl, ")"); + } + else + { + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bformata(glsl, " %s ", name); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + } + AddAssignPrologue(needsParenthesis, isEmbedded); +} + +void ToGLSL::CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, + int dest, int src0, int src1, int src2, uint32_t dataType) +{ + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = psInst->asOperands[dest].GetAccessMask(); + uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + + uint32_t ui32Flags = dataType; + int numParenthesis = 0; + + if (src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) + { + uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], TypeFlagsToSVTType(dataType), dstSwizCount, &numParenthesis); + + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bformata(glsl, " %s ", op1); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + bformata(glsl, " %s ", op2); + TranslateOperand(&psInst->asOperands[src2], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +void ToGLSL::CallHelper3(const char* name, Instruction* psInst, + int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int numParenthesis = 0; + + if ((src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, dstSwizCount, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[src2], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +void ToGLSL::CallHelper2(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + + int isDotProduct = (strncmp(name, "dot", 3) == 0) ? 1 : 0; + int numParenthesis = 0; + + if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, isDotProduct ? 1 : dstSwizCount, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + + AddAssignPrologue(numParenthesis); +} + +void ToGLSL::CallHelper2Int(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_INT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int numParenthesis = 0; + + if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +void ToGLSL::CallHelper2UInt(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_UINT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int numParenthesis = 0; + + if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], SVT_UINT, dstSwizCount, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +void ToGLSL::CallHelper1(const char* name, Instruction* psInst, + int dest, int src0, int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; + bstring glsl = *psContext->currentGLSLString; + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + int numParenthesis = 0; + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, dstSwizCount, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +//Result is an int. +void ToGLSL::CallHelper1Int( + const char* name, + Instruction* psInst, + const int dest, + const int src0, + int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_INT; + bstring glsl = *psContext->currentGLSLString; + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + int numParenthesis = 0; + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +void ToGLSL::TranslateTexelFetch( + Instruction* psInst, + const ResourceBinding* psBinding, + bstring glsl) +{ + int numParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], TypeFlagsToSVTType(ResourceReturnTypeToFlag(psBinding->ui32ReturnType)), 4, &numParenthesis); + bcatcstr(glsl, "texelFetch("); + + // TODO Lod is being completely ignored!! Redo all of this. + switch (psBinding->eDimension) + { + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: + case REFLECT_RESOURCE_DIMENSION_BUFFER: + { + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); + if (psBinding->eDimension != REFLECT_RESOURCE_DIMENSION_BUFFER) + bcatcstr(glsl, ", 0"); // Buffers don't have LOD + bcatcstr(glsl, ")"); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: + { + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bcatcstr(glsl, ", 0)"); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ", 0)"); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: // TODO does this make any sense at all? + { + ASSERT(psInst->eOpcode == OPCODE_LD_MS); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[3], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bcatcstr(glsl, ")"); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + ASSERT(psInst->eOpcode == OPCODE_LD_MS); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[3], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bcatcstr(glsl, ")"); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + case REFLECT_RESOURCE_DIMENSION_BUFFEREX: + default: + { + ASSERT(0); + break; + } + } + + TranslateOperandSwizzleWithMask(psContext, &psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); + AddAssignPrologue(numParenthesis); +} + +void ToGLSL::TranslateTexelFetchOffset( + Instruction* psInst, + const ResourceBinding* psBinding, + bstring glsl) +{ + int numParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], TypeFlagsToSVTType(ResourceReturnTypeToFlag(psBinding->ui32ReturnType)), 4, &numParenthesis); + + bcatcstr(glsl, "texelFetchOffset("); + + switch (psBinding->eDimension) + { + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: + { + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bformata(glsl, ", 0, %d)", psInst->iUAddrOffset); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bformata(glsl, ", 0, ivec2(%d, %d))", + psInst->iUAddrOffset, + psInst->iVAddrOffset); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: + { + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bformata(glsl, ", 0, ivec3(%d, %d, %d))", + psInst->iUAddrOffset, + psInst->iVAddrOffset, + psInst->iWAddrOffset); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + { + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bformata(glsl, ", 0, ivec2(%d, %d))", psInst->iUAddrOffset, psInst->iVAddrOffset); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bformata(glsl, ", 0, int(%d))", psInst->iUAddrOffset); + break; + } + case REFLECT_RESOURCE_DIMENSION_BUFFER: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + case REFLECT_RESOURCE_DIMENSION_BUFFEREX: + default: + { + ASSERT(0); + break; + } + } + + TranslateOperandSwizzleWithMask(psContext, &psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); + AddAssignPrologue(numParenthesis); +} + + +//Makes sure the texture coordinate swizzle is appropriate for the texture type. +//i.e. vecX for X-dimension texture. +//Currently supports floating point coord only, so not used for texelFetch. +void ToGLSL::TranslateTexCoord( + const RESOURCE_DIMENSION eResDim, + Operand* psTexCoordOperand) +{ + uint32_t flags = TO_AUTO_BITCAST_TO_FLOAT; + uint32_t opMask = OPERAND_4_COMPONENT_MASK_ALL; + + switch (eResDim) + { + case RESOURCE_DIMENSION_TEXTURE1D: + { + //Vec1 texcoord. Mask out the other components. + opMask = OPERAND_4_COMPONENT_MASK_X; + break; + } + case RESOURCE_DIMENSION_TEXTURE2D: + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + //Vec2 texcoord. Mask out the other components. + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; + flags |= TO_AUTO_EXPAND_TO_VEC2; + break; + } + case RESOURCE_DIMENSION_TEXTURECUBE: + case RESOURCE_DIMENSION_TEXTURE3D: + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + //Vec3 texcoord. Mask out the other components. + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; + flags |= TO_AUTO_EXPAND_TO_VEC3; + break; + } + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + flags |= TO_AUTO_EXPAND_TO_VEC4; + break; + } + default: + { + ASSERT(0); + break; + } + } + + //FIXME detect when integer coords are needed. + TranslateOperand(psTexCoordOperand, flags, opMask); +} + +void ToGLSL::GetResInfoData(Instruction* psInst, int index, int destElem) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; + const int isUAV = (psInst->asOperands[2].eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW); + + psContext->AddIndentation(); + AddOpAssignToDestWithMask(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, "=", &numParenthesis, 1 << destElem); + + //[width, height, depth or array size, total-mip-count] + if (index < 3) + { + int dim = GetNumTextureDimensions(psInst->eResDim); + bcatcstr(glsl, "("); + if (dim < (index + 1)) + { + bcatcstr(glsl, eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? "uint(0u)" : "0.0"); + } + else + { + if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT) + bformata(glsl, "uvec%d(", dim); + else if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_RCPFLOAT) + bformata(glsl, "vec%d(1.0) / vec%d(", dim, dim); + else + bformata(glsl, "vec%d(", dim); + + if (isUAV) + bcatcstr(glsl, "imageSize("); + else + bcatcstr(glsl, "textureSize("); + + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + + if (!isUAV) + { + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + } + bcatcstr(glsl, "))"); + + switch (index) + { + case 0: + bcatcstr(glsl, ".x"); + break; + case 1: + bcatcstr(glsl, ".y"); + break; + case 2: + bcatcstr(glsl, ".z"); + break; + } + } + + bcatcstr(glsl, ")"); + } + else + { + ASSERT(!isUAV); + if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT) + bcatcstr(glsl, "uint("); + else + bcatcstr(glsl, "float("); + bcatcstr(glsl, "textureQueryLevels("); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, "))"); + } + AddAssignPrologue(numParenthesis); +} + +void ToGLSL::TranslateTextureSample(Instruction* psInst, + uint32_t ui32Flags) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + int hasParamOffset = (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) ? 1 : 0; + + Operand* psDest = &psInst->asOperands[0]; + Operand* psDestAddr = &psInst->asOperands[1]; + Operand* psSrcOff = (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) ? &psInst->asOperands[2] : 0; + Operand* psSrcTex = &psInst->asOperands[2 + hasParamOffset]; + Operand* psSrcSamp = &psInst->asOperands[3 + hasParamOffset]; + Operand* psSrcRef = (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) ? &psInst->asOperands[4 + hasParamOffset] : 0; + Operand* psSrcLOD = (ui32Flags & TEXSMP_FLAG_LOD) ? &psInst->asOperands[4] : 0; + Operand* psSrcDx = (ui32Flags & TEXSMP_FLAG_GRAD) ? &psInst->asOperands[4] : 0; + Operand* psSrcDy = (ui32Flags & TEXSMP_FLAG_GRAD) ? &psInst->asOperands[5] : 0; + Operand* psSrcBias = (ui32Flags & TEXSMP_FLAG_BIAS) ? &psInst->asOperands[4] : 0; + + const char* funcName = "texture"; + const char* offset = ""; + const char* depthCmpCoordType = ""; + const char* gradSwizzle = ""; + + uint32_t ui32NumOffsets = 0; + + const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psSrcTex->ui32RegisterNumber]; + const int iHaveOverloadedTexFuncs = HaveOverloadedTextureFuncs(psContext->psShader->eTargetLanguage); + const int useCombinedTextureSamplers = (psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) ? 1 : 0; + + if (psInst->bAddressOffset) + { + offset = "Offset"; + } + + switch (eResDim) + { + case RESOURCE_DIMENSION_TEXTURE1D: + { + depthCmpCoordType = "vec2"; + gradSwizzle = ".x"; + ui32NumOffsets = 1; + if (!iHaveOverloadedTexFuncs) + { + funcName = "texture1D"; + if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) + { + funcName = "shadow1D"; + } + } + break; + } + case RESOURCE_DIMENSION_TEXTURE2D: + { + depthCmpCoordType = "vec3"; + gradSwizzle = ".xy"; + ui32NumOffsets = 2; + if (!iHaveOverloadedTexFuncs) + { + funcName = "texture2D"; + if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) + { + funcName = "shadow2D"; + } + } + break; + } + case RESOURCE_DIMENSION_TEXTURECUBE: + { + depthCmpCoordType = "vec3"; + gradSwizzle = ".xyz"; + ui32NumOffsets = 3; + if (!iHaveOverloadedTexFuncs) + { + funcName = "textureCube"; + } + break; + } + case RESOURCE_DIMENSION_TEXTURE3D: + { + depthCmpCoordType = "vec4"; + gradSwizzle = ".xyz"; + ui32NumOffsets = 3; + if (!iHaveOverloadedTexFuncs) + { + funcName = "texture3D"; + } + break; + } + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + depthCmpCoordType = "vec3"; + gradSwizzle = ".x"; + ui32NumOffsets = 1; + break; + } + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + depthCmpCoordType = "vec4"; + gradSwizzle = ".xy"; + ui32NumOffsets = 2; + break; + } + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + gradSwizzle = ".xyz"; + ui32NumOffsets = 3; + break; + } + default: + { + ASSERT(0); + break; + } + } + + if (ui32Flags & TEXSMP_FLAG_GATHER) + funcName = "textureGather"; + + uint32_t uniqueNameCounter; + + // In GLSL, for every texture sampling func overload, except for cubemap arrays, the + // depth compare reference value is given as the last component of the texture coord vector. + // Cubemap array sampling as well as all the gather funcs have a separate parameter for it. + // HLSL always provides the reference as a separate param. + // + // Here we create a temp texcoord var with the reference value embedded + if ((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) && + eResDim != RESOURCE_DIMENSION_TEXTURECUBEARRAY && + !(ui32Flags & TEXSMP_FLAG_GATHER)) + { + uniqueNameCounter = psContext->psShader->asPhases[psContext->currentPhase].m_NextTexCoordTemp++; + psContext->AddIndentation(); + // Create a temp variable for the coordinate as Adrenos hate nonstandard swizzles in the texcoords + bformata(glsl, "%s txVec%d = ", depthCmpCoordType, uniqueNameCounter); + bformata(glsl, "%s(", depthCmpCoordType); + TranslateTexCoord(eResDim, psDestAddr); + bcatcstr(glsl, ","); + // Last component is the reference + TranslateOperand(psSrcRef, TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, ");\n"); + } + + SHADER_VARIABLE_TYPE dataType = psContext->psShader->sInfo.GetTextureDataType(psSrcTex->ui32RegisterNumber); + psContext->AddIndentation(); + AddAssignToDest(psDest, dataType, psSrcTex->GetNumSwizzleElements(), &numParenthesis); + + // Func name depending on the flags + if (ui32Flags & (TEXSMP_FLAG_LOD | TEXSMP_FLAG_FIRSTLOD)) + bformata(glsl, "%sLod%s(", funcName, offset); + else if (ui32Flags & TEXSMP_FLAG_GRAD) + bformata(glsl, "%sGrad%s(", funcName, offset); + else + bformata(glsl, "%s%s(", funcName, offset); + + // Sampler name + if (!useCombinedTextureSamplers) + ResourceName(glsl, psContext, RGROUP_TEXTURE, psSrcTex->ui32RegisterNumber, ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE); + else + bcatcstr(glsl, TextureSamplerName(&psContext->psShader->sInfo, psSrcTex->ui32RegisterNumber, psSrcSamp->ui32RegisterNumber, ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE).c_str()); + + bcatcstr(glsl, ", "); + + // Texture coordinates, either from previously constructed temp + // or straight from the psDestAddr operand + if ((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) && + eResDim != RESOURCE_DIMENSION_TEXTURECUBEARRAY && + !(ui32Flags & TEXSMP_FLAG_GATHER)) + bformata(glsl, "txVec%d", uniqueNameCounter); + else + TranslateTexCoord(eResDim, psDestAddr); + + // If depth compare reference was not embedded to texcoord + // then insert it here as a separate param + if ((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) && + eResDim == RESOURCE_DIMENSION_TEXTURECUBEARRAY && + (ui32Flags & TEXSMP_FLAG_GATHER)) + { + bcatcstr(glsl, ", "); + TranslateOperand(psSrcRef, TO_AUTO_BITCAST_TO_FLOAT); + } + + // Add LOD/grad parameters based on the flags + if (ui32Flags & TEXSMP_FLAG_LOD) + { + bcatcstr(glsl, ", "); + TranslateOperand(psSrcLOD, TO_AUTO_BITCAST_TO_FLOAT); + if (psContext->psShader->ui32MajorVersion < 4) + { + bcatcstr(glsl, ".w"); + } + } + else if (ui32Flags & TEXSMP_FLAG_FIRSTLOD) + { + bcatcstr(glsl, ", 0.0"); + } + else if (ui32Flags & TEXSMP_FLAG_GRAD) + { + bcatcstr(glsl, ", vec4("); + TranslateOperand(psSrcDx, TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, ")"); + bcatcstr(glsl, gradSwizzle); + bcatcstr(glsl, ", vec4("); + TranslateOperand(psSrcDy, TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, ")"); + bcatcstr(glsl, gradSwizzle); + } + + // Add offset param + if (psInst->bAddressOffset) + { + if (ui32NumOffsets == 1) + { + bformata(glsl, ", %d", + psInst->iUAddrOffset); + } + else + if (ui32NumOffsets == 2) + { + bformata(glsl, ", ivec2(%d, %d)", + psInst->iUAddrOffset, + psInst->iVAddrOffset); + } + else + if (ui32NumOffsets == 3) + { + bformata(glsl, ", ivec3(%d, %d, %d)", + psInst->iUAddrOffset, + psInst->iVAddrOffset, + psInst->iWAddrOffset); + } + } + // HLSL gather has a variant with separate offset operand + else if (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) + { + uint32_t mask = OPERAND_4_COMPONENT_MASK_X; + if (ui32NumOffsets > 1) + mask |= OPERAND_4_COMPONENT_MASK_Y; + if (ui32NumOffsets > 2) + mask |= OPERAND_4_COMPONENT_MASK_Z; + + bcatcstr(glsl, ","); + TranslateOperand(psSrcOff, TO_FLAG_INTEGER, mask); + } + + // Add bias if present + if (ui32Flags & TEXSMP_FLAG_BIAS) + { + bcatcstr(glsl, ", "); + TranslateOperand(psSrcBias, TO_AUTO_BITCAST_TO_FLOAT); + } + + // Add texture gather component selection if needed + if ((ui32Flags & TEXSMP_FLAG_GATHER) && psSrcSamp->GetNumSwizzleElements() > 0) + { + ASSERT(psSrcSamp->GetNumSwizzleElements() == 1); + if (psSrcSamp->aui32Swizzle[0] != OPERAND_4_COMPONENT_X) + { + if (!(ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE)) + { + bformata(glsl, ", %d", psSrcSamp->aui32Swizzle[0]); + } + else + { + // Comp selection not supported with dephth compare gather + } + } + } + + bcatcstr(glsl, ")"); + + if (!(ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) || (ui32Flags & TEXSMP_FLAG_GATHER)) + { + // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms + // does not make sense. But need to re-enable to correctly swizzle this particular instruction. + psSrcTex->iWriteMaskEnabled = 1; + TranslateOperandSwizzleWithMask(psContext, psSrcTex, psDest->GetAccessMask(), 0); + } + AddAssignPrologue(numParenthesis); +} + +const char* swizzleString[] = { ".x", ".y", ".z", ".w" }; + +// Handle cases where vector components are accessed with dynamic index ([] notation). +// A bit ugly hack because compiled HLSL uses byte offsets to access data in structs => we are converting +// the offset back to vector component index in runtime => calculating stuff back and forth. +// TODO: Would be better to eliminate the offset calculation ops and use indexes straight on. Could be tricky though... +void ToGLSL::TranslateDynamicComponentSelection(const ShaderVarType* psVarType, const Operand* psByteAddr, uint32_t offset, uint32_t mask) +{ + bstring glsl = *psContext->currentGLSLString; + ASSERT(psVarType->Class == SVC_VECTOR); + + bcatcstr(glsl, "["); // Access vector component with [] notation + if (offset > 0) + bcatcstr(glsl, "("); + + // The var containing byte address to the requested element + TranslateOperand(psByteAddr, TO_FLAG_UNSIGNED_INTEGER, mask); + + if (offset > 0)// If the vector is part of a struct, there is an extra offset in our byte address + bformata(glsl, " - %du)", offset); // Subtract that first + + bcatcstr(glsl, " >> 0x2u"); // Convert byte offset to index: div by four + bcatcstr(glsl, "]"); +} + +void ToGLSL::TranslateShaderStorageStore(Instruction* psInst) +{ + bstring glsl = *psContext->currentGLSLString; + int component; + int srcComponent = 0; + + Operand* psDest = 0; + Operand* psDestAddr = 0; + Operand* psDestByteOff = 0; + Operand* psSrc = 0; + + switch (psInst->eOpcode) + { + case OPCODE_STORE_STRUCTURED: + psDest = &psInst->asOperands[0]; + psDestAddr = &psInst->asOperands[1]; + psDestByteOff = &psInst->asOperands[2]; + psSrc = &psInst->asOperands[3]; + break; + case OPCODE_STORE_RAW: + psDest = &psInst->asOperands[0]; + psDestByteOff = &psInst->asOperands[1]; + psSrc = &psInst->asOperands[2]; + break; + default: + ASSERT(0); + break; + } + + uint32_t dstOffFlag = TO_FLAG_UNSIGNED_INTEGER; + SHADER_VARIABLE_TYPE dstOffType = psDestByteOff->GetDataType(psContext); + if (dstOffType == SVT_INT || dstOffType == SVT_INT16 || dstOffType == SVT_INT12) + dstOffFlag = TO_FLAG_INTEGER; + + for (component = 0; component < 4; component++) + { + ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); + if (psInst->asOperands[0].ui32CompMask & (1 << component)) + { + psContext->AddIndentation(); + + TranslateOperand(psDest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY); + + if (psDest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + bcatcstr(glsl, "_buf"); + + if (psDestAddr) + { + bcatcstr(glsl, "["); + TranslateOperand(psDestAddr, TO_FLAG_INTEGER | TO_FLAG_UNSIGNED_INTEGER); + bcatcstr(glsl, "].value"); + } + + bcatcstr(glsl, "[("); + TranslateOperand(psDestByteOff, dstOffFlag); + bcatcstr(glsl, " >> 2"); + if (dstOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + bcatcstr(glsl, ")"); + + if (component != 0) + { + bformata(glsl, " + %d", component); + if (dstOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + } + + bcatcstr(glsl, "]"); + + //Dest type is currently always a uint array. + bcatcstr(glsl, " = "); + if (psSrc->GetNumSwizzleElements() > 1) + TranslateOperand(psSrc, TO_FLAG_UNSIGNED_INTEGER, 1 << (srcComponent++)); + else + TranslateOperand(psSrc, TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); + + bcatcstr(glsl, ";\n"); + } + } +} +void ToGLSL::TranslateShaderStorageLoad(Instruction* psInst) +{ + bstring glsl = *psContext->currentGLSLString; + int component; + Operand* psDest = 0; + Operand* psSrcAddr = 0; + Operand* psSrcByteOff = 0; + Operand* psSrc = 0; + + switch (psInst->eOpcode) + { + case OPCODE_LD_STRUCTURED: + psDest = &psInst->asOperands[0]; + psSrcAddr = &psInst->asOperands[1]; + psSrcByteOff = &psInst->asOperands[2]; + psSrc = &psInst->asOperands[3]; + break; + case OPCODE_LD_RAW: + psDest = &psInst->asOperands[0]; + psSrcByteOff = &psInst->asOperands[1]; + psSrc = &psInst->asOperands[2]; + break; + default: + ASSERT(0); + break; + } + + uint32_t destCount = psDest->GetNumSwizzleElements(); + uint32_t destMask = psDest->GetAccessMask(); + + int numParenthesis = 0; + int firstItemAdded = 0; + SHADER_VARIABLE_TYPE destDataType = psDest->GetDataType(psContext); + uint32_t srcOffFlag = TO_FLAG_UNSIGNED_INTEGER; + SHADER_VARIABLE_TYPE srcOffType = psSrcByteOff->GetDataType(psContext); + if (srcOffType == SVT_INT || srcOffType == SVT_INT16 || srcOffType == SVT_INT12) + srcOffFlag = TO_FLAG_INTEGER; + + psContext->AddIndentation(); + AddAssignToDest(psDest, destDataType, destCount, &numParenthesis); //TODO check this out? + if (destCount > 1) + { + bformata(glsl, "%s(", GetConstructorForTypeGLSL(destDataType, destCount, false)); + numParenthesis++; + } + for (component = 0; component < 4; component++) + { + const ShaderVarType *psVar = NULL; + int addedBitcast = 0; + if (!(destMask & (1 << component))) + continue; + + if (firstItemAdded) + bcatcstr(glsl, ", "); + else + firstItemAdded = 1; + + // always uint array atm + if (destDataType == SVT_FLOAT) + { + bcatcstr(glsl, "uintBitsToFloat("); + addedBitcast = 1; + } + else if (destDataType == SVT_INT || destDataType == SVT_INT16 || destDataType == SVT_INT12) + { + bcatcstr(glsl, "int("); + addedBitcast = 1; + } + + TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); + + if (psSrc->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + bcatcstr(glsl, "_buf"); + + if (psSrcAddr) + { + bcatcstr(glsl, "["); + TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_INTEGER); + bcatcstr(glsl, "].value"); + } + bcatcstr(glsl, "[("); + TranslateOperand(psSrcByteOff, srcOffFlag); + bcatcstr(glsl, " >> 2"); + if (srcOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + + bformata(glsl, ") + %d", psSrc->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE ? psSrc->aui32Swizzle[component] : component); + if (srcOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + + bcatcstr(glsl, "]"); + + if (addedBitcast) + bcatcstr(glsl, ")"); + } + AddAssignPrologue(numParenthesis); +} + +void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + uint32_t ui32DataTypeFlag = TO_FLAG_INTEGER; + const char* func = ""; + Operand* dest = 0; + Operand* previousValue = 0; + Operand* destAddr = 0; + Operand* src = 0; + Operand* compare = 0; + int texDim = 0; + bool isUint = true; + + switch (psInst->eOpcode) + { + case OPCODE_IMM_ATOMIC_IADD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IADD\n"); +#endif + func = "Add"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_IADD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IADD\n"); +#endif + func = "Add"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_AND: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_AND\n"); +#endif + func = "And"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_AND: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_AND\n"); +#endif + func = "And"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_OR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_OR\n"); +#endif + func = "Or"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_OR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_OR\n"); +#endif + func = "Or"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_XOR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_XOR\n"); +#endif + func = "Xor"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_XOR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_XOR\n"); +#endif + func = "Xor"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + + case OPCODE_IMM_ATOMIC_EXCH: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_EXCH\n"); +#endif + func = "Exchange"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_IMM_ATOMIC_CMP_EXCH: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_CMP_EXC\n"); +#endif + func = "CompSwap"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + compare = &psInst->asOperands[3]; + src = &psInst->asOperands[4]; + break; + } + case OPCODE_ATOMIC_CMP_STORE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_CMP_STORE\n"); +#endif + func = "CompSwap"; + previousValue = 0; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + compare = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_IMM_ATOMIC_UMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_UMIN\n"); +#endif + func = "Min"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_UMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_UMIN\n"); +#endif + func = "Min"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_IMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IMIN\n"); +#endif + func = "Min"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_IMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IMIN\n"); +#endif + func = "Min"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_UMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_UMAX\n"); +#endif + func = "Max"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_UMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_UMAX\n"); +#endif + func = "Max"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_IMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IMAX\n"); +#endif + func = "Max"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_IMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IMAX\n"); +#endif + func = "Max"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + default: + ASSERT(0); + break; + } + + psContext->AddIndentation(); + + if (dest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + { + const ResourceBinding* psBinding = 0; + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, dest->ui32RegisterNumber, &psBinding); + + if (psBinding->eType == RTYPE_UAV_RWTYPED) + { + isUint = (psBinding->ui32ReturnType == RETURN_TYPE_UINT); + + // Find out if it's texture and of what dimension + switch (psBinding->eDimension) + { + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: + case REFLECT_RESOURCE_DIMENSION_BUFFER: + texDim = 1; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: + texDim = 2; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + texDim = 3; + break; + default: + ASSERT(0); + break; + } + } + } + + if (isUint) + ui32DataTypeFlag = TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT; + else + ui32DataTypeFlag = TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT; + + if (previousValue) + AddAssignToDest(previousValue, isUint ? SVT_UINT : SVT_INT, 1, &numParenthesis); + + if (texDim > 0) + bcatcstr(glsl, "imageAtomic"); + else + bcatcstr(glsl, "atomic"); + + bcatcstr(glsl, func); + bcatcstr(glsl, "("); + + TranslateOperand(dest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY); + if (texDim > 0) + { + bcatcstr(glsl, ", "); + unsigned int compMask = OPERAND_4_COMPONENT_MASK_X; + if (texDim >= 2) + compMask |= OPERAND_4_COMPONENT_MASK_Y; + if (texDim == 3) + compMask |= OPERAND_4_COMPONENT_MASK_Z; + + TranslateOperand(destAddr, TO_FLAG_INTEGER, compMask); + } + else + { + if (dest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + bcatcstr(glsl, "_buf"); + + uint32_t destAddrFlag = TO_FLAG_UNSIGNED_INTEGER; + SHADER_VARIABLE_TYPE destAddrType = destAddr->GetDataType(psContext); + if (destAddrType == SVT_INT || destAddrType == SVT_INT16 || destAddrType == SVT_INT12) + destAddrFlag = TO_FLAG_INTEGER; + + bcatcstr(glsl, "["); + TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_X); + + // Structured buf if we have both x & y swizzles. Raw buf has only x -> no .value[] + if (destAddr->GetNumSwizzleElements(OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y) == 2) + { + bcatcstr(glsl, "]"); + + bcatcstr(glsl, ".value["); + TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_Y); + } + + bcatcstr(glsl, " >> 2");//bytes to floats + if (destAddrFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + + bcatcstr(glsl, "]"); + } + + bcatcstr(glsl, ", "); + + if (compare) + { + TranslateOperand(compare, ui32DataTypeFlag); + bcatcstr(glsl, ", "); + } + + TranslateOperand(src, ui32DataTypeFlag); + bcatcstr(glsl, ")"); + if (previousValue) + { + AddAssignPrologue(numParenthesis); + } + else + bcatcstr(glsl, ";\n"); +} + +void ToGLSL::TranslateConditional( + Instruction* psInst, + bstring glsl) +{ + const char* statement = ""; + if (psInst->eOpcode == OPCODE_BREAKC) + { + statement = "break"; + } + else if (psInst->eOpcode == OPCODE_CONTINUEC) + { + statement = "continue"; + } + else if (psInst->eOpcode == OPCODE_RETC) // FIXME! Need to spew out shader epilogue + { + statement = "return"; + } + + SHADER_VARIABLE_TYPE argType = psInst->asOperands[0].GetDataType(psContext); + if (argType == SVT_BOOL) + { + bcatcstr(glsl, "if("); + if (psInst->eBooleanTestType != INSTRUCTION_TEST_NONZERO) + bcatcstr(glsl, "!"); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_BOOL); + if (psInst->eOpcode != OPCODE_IF) + { + bformata(glsl, "){%s;}\n", statement); + } + else + { + bcatcstr(glsl, "){\n"); + } + } + else + { + uint32_t oFlag = TO_FLAG_UNSIGNED_INTEGER; + bool isInt = false; + if (argType == SVT_INT || argType == SVT_INT16 || argType == SVT_INT12) + { + isInt = true; + oFlag = TO_FLAG_INTEGER; + } + + bcatcstr(glsl, "if("); + TranslateOperand(&psInst->asOperands[0], oFlag); + + if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) + bcatcstr(glsl, " == "); + else + bcatcstr(glsl, " != "); + + if (isInt) + bcatcstr(glsl, "0)"); + else + bcatcstr(glsl, "uint(0u))"); + + + if (psInst->eOpcode != OPCODE_IF) + { + bformata(glsl, " {%s;}\n", statement); + } + else + { + bcatcstr(glsl, " {\n"); + } + } +} + +void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = false */) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + const bool isVulkan = ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0); + + if (!isEmbedded) + { + +#ifdef _DEBUG + psContext->AddIndentation(); + bformata(glsl, "//Instruction %d\n", psInst->id); +#if 0 + if (psInst->id == 73) + { + ASSERT(1); //Set breakpoint here to debug an instruction from its ID. + } +#endif +#endif + + if (psInst->m_SkipTranslation) + return; + } + + switch (psInst->eOpcode) + { + case OPCODE_FTOI: + case OPCODE_FTOU: + { + uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); + SHADER_VARIABLE_TYPE castType = psInst->eOpcode == OPCODE_FTOU ? SVT_UINT : SVT_INT; +#ifdef _DEBUG + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_FTOU) + bcatcstr(glsl, "//FTOU\n"); + else + bcatcstr(glsl, "//FTOI\n"); +#endif + switch (psInst->asOperands[0].eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + case OPERAND_MIN_PRECISION_SINT_16: + castType = SVT_INT16; + ASSERT(psInst->eOpcode == OPCODE_FTOI); + break; + case OPERAND_MIN_PRECISION_UINT_16: + castType = SVT_UINT16; + ASSERT(psInst->eOpcode == OPCODE_FTOU); + break; + default: + ASSERT(0); // We'd be doing bitcasts into low/mediump ints, not good. + } + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[0], castType, srcCount, &numParenthesis); + bcatcstr(glsl, GetConstructorForTypeGLSL(castType, dstCount, false)); + bcatcstr(glsl, "("); // 1 + TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT, psInst->asOperands[0].GetAccessMask()); + bcatcstr(glsl, ")"); // 1 + AddAssignPrologue(numParenthesis); + break; + } + + case OPCODE_MOV: + { +#ifdef _DEBUG + if (!isEmbedded) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MOV\n"); + } +#endif + if(!isEmbedded) + psContext->AddIndentation(); + + AddMOVBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], isEmbedded); + break; + } + case OPCODE_ITOF://signed to float + case OPCODE_UTOF://unsigned to float + { + SHADER_VARIABLE_TYPE castType = SVT_FLOAT; + uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); + +#ifdef _DEBUG + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_ITOF) + { + bcatcstr(glsl, "//ITOF\n"); + } + else + { + bcatcstr(glsl, "//UTOF\n"); + } +#endif + + switch (psInst->asOperands[0].eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + case OPERAND_MIN_PRECISION_FLOAT_2_8: + castType = SVT_FLOAT10; + break; + case OPERAND_MIN_PRECISION_FLOAT_16: + castType = SVT_FLOAT16; + break; + default: + ASSERT(0); // We'd be doing bitcasts into low/mediump ints, not good. + } + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], castType, srcCount, &numParenthesis); + bcatcstr(glsl, GetConstructorForTypeGLSL(castType, dstCount, false)); + bcatcstr(glsl, "("); // 1 + TranslateOperand(&psInst->asOperands[1], psInst->eOpcode == OPCODE_UTOF ? TO_AUTO_BITCAST_TO_UINT : TO_AUTO_BITCAST_TO_INT, psInst->asOperands[0].GetAccessMask()); + bcatcstr(glsl, ")"); // 1 + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_MAD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MAD\n"); +#endif + CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, TO_FLAG_NONE); + break; + } + case OPCODE_IMAD: + { + uint32_t ui32Flags = TO_FLAG_INTEGER; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMAD\n"); +#endif + + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + ui32Flags = TO_FLAG_UNSIGNED_INTEGER; + } + + CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, ui32Flags); + break; + } + case OPCODE_DADD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DADD\n"); +#endif + CallBinaryOp("+", psInst, 0, 1, 2, SVT_DOUBLE); + break; + } + case OPCODE_IADD: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; +#ifdef _DEBUG + if (!isEmbedded) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IADD\n"); + } +#endif + //Is this a signed or unsigned add? + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + CallBinaryOp("+", psInst, 0, 1, 2, eType, isEmbedded); + break; + } + case OPCODE_ADD: + { + +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ADD\n"); +#endif + CallBinaryOp("+", psInst, 0, 1, 2, SVT_FLOAT); + break; + } + case OPCODE_OR: + { + /*Todo: vector version */ +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//OR\n"); +#endif + uint32_t dstSwizCount = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) + { + if (dstSwizCount == 1) + { + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + + int needsParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), &needsParenthesis); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, " || "); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); + AddAssignPrologue(needsParenthesis); + } + else + { + // Do component-wise and, glsl doesn't support && on bvecs + for (uint32_t k = 0; k < 4; k++) + { + if ((destMask && (1 << k)) == 0) + continue; + + int needsParenthesis = 0; + psContext->AddIndentation(); + // Override dest mask temporarily + psInst->asOperands[0].ui32CompMask = (1 << k); + ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, 1, &needsParenthesis); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, 1 << k); + bcatcstr(glsl, " || "); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, 1 << k); + AddAssignPrologue(needsParenthesis); + + } + // Restore old mask + psInst->asOperands[0].ui32CompMask = destMask; + } + + } + else + CallBinaryOp("|", psInst, 0, 1, 2, SVT_UINT); + break; + } + case OPCODE_AND: + { + SHADER_VARIABLE_TYPE eA = psInst->asOperands[1].GetDataType(psContext); + SHADER_VARIABLE_TYPE eB = psInst->asOperands[2].GetDataType(psContext); +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//AND\n"); +#endif + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + uint32_t dstSwizCount = psInst->asOperands[0].GetNumSwizzleElements(); + SHADER_VARIABLE_TYPE eDataType = psInst->asOperands[0].GetDataType(psContext); + uint32_t ui32Flags = SVTTypeToFlag(eDataType); + if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) + { + if (dstSwizCount == 1) + { + int needsParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), &needsParenthesis); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, " && "); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); + AddAssignPrologue(needsParenthesis); + } + else + { + // Do component-wise and, glsl doesn't support && on bvecs + for (uint32_t k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) == 0) + continue; + + int needsParenthesis = 0; + psContext->AddIndentation(); + // Override dest mask temporarily + psInst->asOperands[0].ui32CompMask = (1 << k); + ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, 1, &needsParenthesis); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, 1 << k); + bcatcstr(glsl, " && "); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, 1 << k); + AddAssignPrologue(needsParenthesis); + + } + // Restore old mask + psInst->asOperands[0].ui32CompMask = destMask; + } + } + else if ((eA == SVT_BOOL || eB == SVT_BOOL) && !(eA == SVT_BOOL && eB == SVT_BOOL)) + { + int boolOp = eA == SVT_BOOL ? 1 : 2; + int otherOp = eA == SVT_BOOL ? 2 : 1; + int needsParenthesis = 0; + uint32_t i; + psContext->AddIndentation(); + + if (dstSwizCount == 1) + { + AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, &needsParenthesis); + TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, " ? "); + TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); + bcatcstr(glsl, " : "); + + bcatcstr(glsl, GetConstructorForTypeGLSL(eDataType, dstSwizCount, false)); + bcatcstr(glsl, "("); + for (i = 0; i < dstSwizCount; i++) + { + if (i > 0) + bcatcstr(glsl, ", "); + switch (eDataType) + { + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + case SVT_DOUBLE: + bcatcstr(glsl, "0.0"); + break; + default: + bcatcstr(glsl, "0"); + + } + } + bcatcstr(glsl, ")"); + } + else if (eDataType == SVT_FLOAT) + { + // We can use mix() + AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, &needsParenthesis); + bcatcstr(glsl, "mix("); + bcatcstr(glsl, GetConstructorForTypeGLSL(eDataType, dstSwizCount, false)); + bcatcstr(glsl, "("); + for (i = 0; i < dstSwizCount; i++) + { + if (i > 0) + bcatcstr(glsl, ", "); + switch (eDataType) + { + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + case SVT_DOUBLE: + bcatcstr(glsl, "0.0"); + break; + default: + bcatcstr(glsl, "0"); + + } + } + bcatcstr(glsl, "), "); + TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); + bcatcstr(glsl, ", "); + bcatcstr(glsl, GetConstructorForTypeGLSL(eDataType, dstSwizCount, false)); + bcatcstr(glsl, "("); + TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, ")"); + bcatcstr(glsl, ")"); + } + else + { + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, dstSwizCount, &needsParenthesis); + bcatcstr(glsl, "("); + bcatcstr(glsl, GetConstructorForTypeGLSL(SVT_UINT, dstSwizCount, false)); + bcatcstr(glsl, "("); + TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, ") * 0xffffffffu) & "); + TranslateOperand(&psInst->asOperands[otherOp], TO_FLAG_UNSIGNED_INTEGER, destMask); + } + + AddAssignPrologue(needsParenthesis); + } + else + { + CallBinaryOp("&", psInst, 0, 1, 2, SVT_UINT); + } + + break; + } + case OPCODE_GE: + { + /* + dest = vec4(greaterThanEqual(vec4(srcA), vec4(srcB)); + Caveat: The result is a boolean but HLSL asm returns 0xFFFFFFFF/0x0 instead. + */ +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//GE\n"); +#endif + AddComparison(psInst, CMP_GE, TO_FLAG_NONE); + break; + } + case OPCODE_MUL: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MUL\n"); +#endif + CallBinaryOp("*", psInst, 0, 1, 2, SVT_FLOAT); + break; + } + case OPCODE_IMUL: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMUL\n"); +#endif + if (psInst->asOperands[1].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_NULL); + + CallBinaryOp("*", psInst, 1, 2, 3, eType); + break; + } + case OPCODE_UDIV: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//UDIV\n"); +#endif + //destQuotient, destRemainder, src0, src1 + CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); + CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); + break; + } + case OPCODE_DIV: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DIV\n"); +#endif + CallBinaryOp("/", psInst, 0, 1, 2, SVT_FLOAT); + break; + } + case OPCODE_SINCOS: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SINCOS\n"); +#endif + // Need careful ordering if src == dest[0], as then the cos() will be reading from wrong value + if (psInst->asOperands[0].eType == psInst->asOperands[2].eType && + psInst->asOperands[0].ui32RegisterNumber == psInst->asOperands[2].ui32RegisterNumber) + { + // sin() result overwrites source, do cos() first. + // The case where both write the src shouldn't really happen anyway. + if (psInst->asOperands[1].eType != OPERAND_TYPE_NULL) + { + CallHelper1("cos", psInst, 1, 2, 1); + } + + if (psInst->asOperands[0].eType != OPERAND_TYPE_NULL) + { + CallHelper1( + "sin", psInst, 0, 2, 1); + } + } + else + { + if (psInst->asOperands[0].eType != OPERAND_TYPE_NULL) + { + CallHelper1("sin", psInst, 0, 2, 1); + } + + if (psInst->asOperands[1].eType != OPERAND_TYPE_NULL) + { + CallHelper1("cos", psInst, 1, 2, 1); + } + } + break; + } + + case OPCODE_DP2: + { + int numParenthesis = 0; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DP2\n"); +#endif + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, &numParenthesis); + bcatcstr(glsl, "dot("); + TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_DP3: + { + int numParenthesis = 0; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DP3\n"); +#endif + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, &numParenthesis); + bcatcstr(glsl, "dot("); + TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_DP4: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DP4\n"); +#endif + CallHelper2("dot", psInst, 0, 1, 2, 0); + break; + } + case OPCODE_INE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//INE\n"); +#endif + AddComparison(psInst, CMP_NE, TO_FLAG_INTEGER); + break; + } + case OPCODE_NE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//NE\n"); +#endif + AddComparison(psInst, CMP_NE, TO_FLAG_NONE); + break; + } + case OPCODE_IGE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IGE\n"); +#endif + AddComparison(psInst, CMP_GE, TO_FLAG_INTEGER); + break; + } + case OPCODE_ILT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ILT\n"); +#endif + AddComparison(psInst, CMP_LT, TO_FLAG_INTEGER); + break; + } + case OPCODE_LT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LT\n"); +#endif + AddComparison(psInst, CMP_LT, TO_FLAG_NONE); + break; + } + case OPCODE_IEQ: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IEQ\n"); +#endif + AddComparison(psInst, CMP_EQ, TO_FLAG_INTEGER); + break; + } + case OPCODE_ULT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ULT\n"); +#endif + AddComparison(psInst, CMP_LT, TO_FLAG_UNSIGNED_INTEGER); + break; + } + case OPCODE_UGE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//UGE\n"); +#endif + AddComparison(psInst, CMP_GE, TO_FLAG_UNSIGNED_INTEGER); + break; + } + case OPCODE_MOVC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MOVC\n"); +#endif + AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3]); + break; + } + case OPCODE_SWAPC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SWAPC\n"); +#endif + // TODO needs temps!! + AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[2], &psInst->asOperands[4], &psInst->asOperands[3]); + AddMOVCBinaryOp(&psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3], &psInst->asOperands[4]); + break; + } + + case OPCODE_LOG: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LOG\n"); +#endif + CallHelper1("log2", psInst, 0, 1, 1); + break; + } + case OPCODE_RSQ: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//RSQ\n"); +#endif + CallHelper1("inversesqrt", psInst, 0, 1, 1); + break; + } + case OPCODE_EXP: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EXP\n"); +#endif + CallHelper1("exp2", psInst, 0, 1, 1); + break; + } + case OPCODE_SQRT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SQRT\n"); +#endif + CallHelper1("sqrt", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_PI: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_PI\n"); +#endif + CallHelper1("ceil", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_NI: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_NI\n"); +#endif + CallHelper1("floor", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_Z: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_Z\n"); +#endif + CallHelper1("trunc", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_NE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_NE\n"); +#endif + CallHelper1("roundEven", psInst, 0, 1, 1); + break; + } + case OPCODE_FRC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//FRC\n"); +#endif + CallHelper1("fract", psInst, 0, 1, 1); + break; + } + case OPCODE_IMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMAX\n"); +#endif + CallHelper2Int("max", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_UMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//UMAX\n"); +#endif + CallHelper2UInt("max", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_MAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MAX\n"); +#endif + CallHelper2("max", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_IMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMIN\n"); +#endif + CallHelper2Int("min", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_UMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//UMIN\n"); +#endif + CallHelper2UInt("min", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_MIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MIN\n"); +#endif + CallHelper2("min", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_GATHER4: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER); + break; + } + case OPCODE_GATHER4_PO_C: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_PO_C\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET | TEXSMP_FLAG_DEPTHCOMPARE); + break; + } + case OPCODE_GATHER4_PO: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_PO\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET); + break; + } + case OPCODE_GATHER4_C: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_C\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_DEPTHCOMPARE); + break; + } + case OPCODE_SAMPLE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_NONE); + break; + } + case OPCODE_SAMPLE_L: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_L\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_LOD); + break; + } + case OPCODE_SAMPLE_C: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_C\n"); +#endif + + TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE); + break; + } + case OPCODE_SAMPLE_C_LZ: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_C_LZ\n"); +#endif + + TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE | TEXSMP_FLAG_FIRSTLOD); + break; + } + case OPCODE_SAMPLE_D: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_D\n"); +#endif + + TranslateTextureSample(psInst, TEXSMP_FLAG_GRAD); + break; + } + case OPCODE_SAMPLE_B: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_B\n"); +#endif + + TranslateTextureSample(psInst, TEXSMP_FLAG_BIAS); + break; + } + case OPCODE_RET: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//RET\n"); +#endif + if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Post shader code ---\n"); +#endif + bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End post shader code ---\n"); +#endif + } + psContext->AddIndentation(); + bcatcstr(glsl, "return;\n"); + break; + } + case OPCODE_INTERFACE_CALL: + { + const char* name; + ShaderVar* psVar; + uint32_t varFound; + + uint32_t funcPointer; + uint32_t funcTableIndex; + uint32_t funcTable; + uint32_t funcBodyIndex; + uint32_t funcBody; + uint32_t ui32NumBodiesPerTable; + +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//INTERFACE_CALL\n"); +#endif + + ASSERT(psInst->asOperands[0].eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32); + + funcPointer = psInst->asOperands[0].aui32ArraySizes[0]; + funcTableIndex = psInst->asOperands[0].aui32ArraySizes[1]; + funcBodyIndex = psInst->ui32FuncIndexWithinInterface; + + ui32NumBodiesPerTable = psContext->psShader->funcPointer[funcPointer].ui32NumBodiesPerTable; + + funcTable = psContext->psShader->funcPointer[funcPointer].aui32FuncTables[funcTableIndex]; + + funcBody = psContext->psShader->funcTable[funcTable].aui32FuncBodies[funcBodyIndex]; + + varFound = psContext->psShader->sInfo.GetInterfaceVarFromOffset(funcPointer, &psVar); + + ASSERT(varFound); + + name = &psVar->name[0]; + + psContext->AddIndentation(); + bcatcstr(glsl, name); + TranslateOperandIndexMAD(&psInst->asOperands[0], 1, ui32NumBodiesPerTable, funcBodyIndex); + //bformata(glsl, "[%d]", funcBodyIndex); + bcatcstr(glsl, "();\n"); + break; + } + case OPCODE_LABEL: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LABEL\n"); +#endif + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); //Closing brace ends the previous function. + psContext->AddIndentation(); + + bcatcstr(glsl, "subroutine(SubroutineType)\n"); + bcatcstr(glsl, "void "); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, "(){\n"); + ++psContext->indent; + break; + } + case OPCODE_COUNTBITS: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//COUNTBITS\n"); +#endif + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = bitCount("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_FIRSTBIT_HI: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_HI\n"); +#endif + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = findMSB("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_FIRSTBIT_LO: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_LO\n"); +#endif + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = findLSB("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_FIRSTBIT_SHI: //signed high + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_SHI\n"); +#endif + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = findMSB("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_BFREV: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//BFREV\n"); +#endif + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = bitfieldReverse("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_BFI: + { + uint32_t numelements_width = psInst->asOperands[1].GetNumSwizzleElements(); + uint32_t numelements_offset = psInst->asOperands[2].GetNumSwizzleElements(); + uint32_t numelements_dest = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t numoverall_elements = std::min(std::min(numelements_width, numelements_offset), numelements_dest); + uint32_t i, j; + static const char* bfi_elementidx[] = { "x", "y", "z", "w" }; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//BFI\n"); +#endif + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, numoverall_elements, &numParenthesis); + + if (numoverall_elements == 1) + bformata(glsl, "int("); + else + bformata(glsl, "ivec%d(", numoverall_elements); + + for (i = 0; i < numoverall_elements; ++i) + { + bcatcstr(glsl, "bitfieldInsert("); + + for (j = 4; j >= 1; --j) + { + uint32_t opSwizzleCount = psInst->asOperands[j].GetNumSwizzleElements(); + + if (opSwizzleCount != 1) + bcatcstr(glsl, " ("); + TranslateOperand(&psInst->asOperands[j], TO_FLAG_INTEGER); + if (opSwizzleCount != 1) + bformata(glsl, " ).%s", bfi_elementidx[i]); + if (j != 1) + bcatcstr(glsl, ","); + } + + bcatcstr(glsl, ") "); + if (i + 1 != numoverall_elements) + bcatcstr(glsl, ", "); + } + bcatcstr(glsl, ")"); + + if (numoverall_elements > 1) + { + bcatcstr(glsl, "."); + for (i = 0; i < numoverall_elements; ++i) + bformata(glsl, "%s", bfi_elementidx[i]); + } + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_CUT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//CUT\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "EndPrimitive();\n"); + break; + } + case OPCODE_EMIT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EMIT\n"); +#endif + if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Post shader code ---\n"); +#endif + bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End post shader code ---\n"); +#endif + } + + psContext->AddIndentation(); + bcatcstr(glsl, "EmitVertex();\n"); + break; + } + case OPCODE_EMITTHENCUT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EMITTHENCUT\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "EmitVertex();\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "EndPrimitive();\n"); + break; + } + + case OPCODE_CUT_STREAM: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//CUT_STREAM\n"); +#endif + psContext->AddIndentation(); + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_STREAM); + if (psContext->psShader->eTargetLanguage < LANG_400 || psInst->asOperands[0].ui32RegisterNumber == 0) + { + // ES geom shaders only support one stream. + bcatcstr(glsl, "EndPrimitive();\n"); + } + else + { + bcatcstr(glsl, "EndStreamPrimitive("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, ");\n"); + } + + break; + } + case OPCODE_EMIT_STREAM: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EMIT_STREAM\n"); +#endif + if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Post shader code ---\n"); +#endif + bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End post shader code ---\n"); +#endif + } + + psContext->AddIndentation(); + + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_STREAM); + if (psContext->psShader->eTargetLanguage < LANG_400 || psInst->asOperands[0].ui32RegisterNumber == 0) + { + // ES geom shaders only support one stream. + bcatcstr(glsl, "EmitVertex();\n"); + } + else + { + bcatcstr(glsl, "EmitStreamVertex("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, ");\n"); + } + break; + } + case OPCODE_EMITTHENCUT_STREAM: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EMITTHENCUT\n"); +#endif + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_STREAM); + if (psContext->psShader->eTargetLanguage < LANG_400 || psInst->asOperands[0].ui32RegisterNumber == 0) + { + // ES geom shaders only support one stream. + bcatcstr(glsl, "EmitVertex();\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "EndPrimitive();\n"); + } + else + { + bcatcstr(glsl, "EmitStreamVertex("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, ");\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "EndStreamPrimitive("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, ");\n"); + } + break; + } + case OPCODE_REP: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//REP\n"); +#endif + //Need to handle nesting. + //Max of 4 for rep - 'Flow Control Limitations' http://msdn.microsoft.com/en-us/library/windows/desktop/bb219848(v=vs.85).aspx + + psContext->AddIndentation(); + bcatcstr(glsl, "RepCounter = "); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bcatcstr(glsl, ";\n"); + + psContext->AddIndentation(); + bcatcstr(glsl, "while(RepCounter!=0){\n"); + ++psContext->indent; + break; + } + case OPCODE_ENDREP: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDREP\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "RepCounter--;\n"); + + --psContext->indent; + + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + break; + } + case OPCODE_LOOP: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LOOP\n"); +#endif + psContext->AddIndentation(); + + if (psInst->ui32NumOperands == 2) + { + //DX9 version + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_SPECIAL_LOOPCOUNTER); + bcatcstr(glsl, "for("); + bcatcstr(glsl, "LoopCounter = "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); + bcatcstr(glsl, ".y, ZeroBasedCounter = 0;"); + bcatcstr(glsl, "ZeroBasedCounter < "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); + bcatcstr(glsl, ".x;"); + + bcatcstr(glsl, "LoopCounter += "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); + bcatcstr(glsl, ".z, ZeroBasedCounter++){\n"); + ++psContext->indent; + } + else if (psInst->m_LoopInductors[1] != 0) + { + // Can emit as for + bcatcstr(glsl, "for("); + if (psInst->m_LoopInductors[0] != 0) + { + if (psInst->m_InductorRegister != 0) + { + // Do declaration here as well + switch (psInst->m_LoopInductors[0]->asOperands[0].GetDataType(psContext)) + { + case SVT_INT: + bcatcstr(glsl, "int "); + break; + case SVT_UINT: + bcatcstr(glsl, "uint "); + break; + default: + ASSERT(0); + break; + } + } + TranslateInstruction(psInst->m_LoopInductors[0], true); + } + bcatcstr(glsl, " ; "); + bool negateCondition = psInst->m_LoopInductors[1]->eBooleanTestType != INSTRUCTION_TEST_NONZERO; + bool negateOrder = false; + + // Yet Another NVidia OSX shader compiler bug workaround (really nvidia, get your s#!t together): + // For reasons unfathomable to us, this breaks SSAO effect on OSX (case 756028) + // Broken: for(int ti_loop_1 = int(int(0xFFFFFFFCu)) ; 4 >= ti_loop_1 ; ti_loop_1++) + // Works: for (int ti_loop_1 = int(int(0xFFFFFFFCu)); ti_loop_1 <= 4; ti_loop_1++) + // + // So, check if the first argument is an immediate value, and if so, switch the order or the operands + // (and adjust condition) + if (psInst->m_LoopInductors[1]->asOperands[1].eType == OPERAND_TYPE_IMMEDIATE32) + negateOrder = true; + + uint32_t typeFlags = TO_FLAG_INTEGER; + const char *cmpOp = ""; + switch (psInst->m_LoopInductors[1]->eOpcode) + { + case OPCODE_IGE: + if(negateOrder) + cmpOp = negateCondition ? ">" : "<="; + else + cmpOp = negateCondition ? "<" : ">="; + break; + case OPCODE_ILT: + if(negateOrder) + cmpOp = negateCondition ? "<=" : ">"; + else + cmpOp = negateCondition ? ">=" : "<"; + break; + case OPCODE_IEQ: + // No need to change the comparison if negateOrder is true + cmpOp = negateCondition ? "!=" : "=="; + if (psInst->m_LoopInductors[1]->asOperands[0].GetDataType(psContext) == SVT_UINT) + typeFlags = TO_FLAG_UNSIGNED_INTEGER; + break; + case OPCODE_INE: + // No need to change the comparison if negateOrder is true + cmpOp = negateCondition ? "==" : "!="; + if (psInst->m_LoopInductors[1]->asOperands[0].GetDataType(psContext) == SVT_UINT) + typeFlags = TO_FLAG_UNSIGNED_INTEGER; + break; + case OPCODE_UGE: + if(negateOrder) + cmpOp = negateCondition ? ">" : "<="; + else + cmpOp = negateCondition ? "<" : ">="; + typeFlags = TO_FLAG_UNSIGNED_INTEGER; + break; + case OPCODE_ULT: + if(negateOrder) + cmpOp = negateCondition ? "<=" : ">"; + else + cmpOp = negateCondition ? ">=" : "<"; + typeFlags = TO_FLAG_UNSIGNED_INTEGER; + break; + + default: + ASSERT(0); + } + TranslateOperand(&psInst->m_LoopInductors[1]->asOperands[negateOrder ? 2 : 1], typeFlags); + bcatcstr(glsl, cmpOp); + TranslateOperand(&psInst->m_LoopInductors[1]->asOperands[negateOrder ? 1 : 2], typeFlags); + + bcatcstr(glsl, " ; "); + // One more shortcut: translate IADD tX, tX, 1 to tX++ + if (HLSLcc::IsAddOneInstruction(psInst->m_LoopInductors[3])) + { + TranslateOperand(&psInst->m_LoopInductors[3]->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, "++"); + } + else + TranslateInstruction(psInst->m_LoopInductors[3], true); + + bcatcstr(glsl, ")\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "{\n"); + ++psContext->indent; + } + else + { + bcatcstr(glsl, "while(true){\n"); + ++psContext->indent; + } + break; + } + case OPCODE_ENDLOOP: + { + --psContext->indent; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDLOOP\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + break; + } + case OPCODE_BREAK: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//BREAK\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "break;\n"); + break; + } + case OPCODE_BREAKC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//BREAKC\n"); +#endif + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + break; + } + case OPCODE_CONTINUEC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//CONTINUEC\n"); +#endif + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + break; + } + case OPCODE_IF: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IF\n"); +#endif + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + ++psContext->indent; + break; + } + case OPCODE_RETC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//RETC\n"); +#endif + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + break; + } + case OPCODE_ELSE: + { + --psContext->indent; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ELSE\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "} else {\n"); + psContext->indent++; + break; + } + case OPCODE_ENDSWITCH: + case OPCODE_ENDIF: + { + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDIF\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + break; + } + case OPCODE_CONTINUE: + { + psContext->AddIndentation(); + bcatcstr(glsl, "continue;\n"); + break; + } + case OPCODE_DEFAULT: + { + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "default:\n"); + ++psContext->indent; + break; + } + case OPCODE_NOP: + { + break; + } + case OPCODE_SYNC: + { + const uint32_t ui32SyncFlags = psInst->ui32SyncFlags; + +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SYNC\n"); +#endif + + if (ui32SyncFlags & SYNC_THREAD_GROUP_SHARED_MEMORY) + { + psContext->AddIndentation(); + bcatcstr(glsl, "memoryBarrierShared();\n"); + } + if (ui32SyncFlags & (SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP | SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL)) + { + psContext->AddIndentation(); + bcatcstr(glsl, "memoryBarrier();\n"); + } + if (ui32SyncFlags & SYNC_THREADS_IN_GROUP) + { + psContext->AddIndentation(); + bcatcstr(glsl, "barrier();\n"); + } + break; + } + case OPCODE_SWITCH: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SWITCH\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "switch(int("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ")){\n"); + + psContext->indent += 2; + break; + } + case OPCODE_CASE: + { + --psContext->indent; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//case\n"); +#endif + psContext->AddIndentation(); + + bcatcstr(glsl, "case "); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ":\n"); + + ++psContext->indent; + break; + } + case OPCODE_EQ: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EQ\n"); +#endif + AddComparison(psInst, CMP_EQ, TO_FLAG_NONE); + break; + } + case OPCODE_USHR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//USHR\n"); +#endif + CallBinaryOp(">>", psInst, 0, 1, 2, SVT_UINT); + break; + } + case OPCODE_ISHL: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; + +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ISHL\n"); +#endif + + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + + CallBinaryOp("<<", psInst, 0, 1, 2, eType); + break; + } + case OPCODE_ISHR: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ISHR\n"); +#endif + + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + + CallBinaryOp(">>", psInst, 0, 1, 2, eType); + break; + } + case OPCODE_LD: + case OPCODE_LD_MS: + { + const ResourceBinding* psBinding = 0; +#ifdef _DEBUG + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_LD) + bcatcstr(glsl, "//LD\n"); + else + bcatcstr(glsl, "//LD_MS\n"); +#endif + + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, &psBinding); + + if (psInst->bAddressOffset) + { + TranslateTexelFetchOffset(psInst, psBinding, glsl); + } + else + { + TranslateTexelFetch(psInst, psBinding, glsl); + } + break; + } + case OPCODE_DISCARD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DISCARD\n"); +#endif + psContext->AddIndentation(); + if (psContext->psShader->ui32MajorVersion <= 3) + { + bcatcstr(glsl, "if(any(lessThan(("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_NONE); + + if (psContext->psShader->ui32MajorVersion == 1) + { + /* SM1.X only kills based on the rgb channels */ + bcatcstr(glsl, ").xyz, vec3(0)))){discard;}\n"); + } + else + { + bcatcstr(glsl, "), vec4(0)))){discard;}\n"); + } + } + else if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) + { + bcatcstr(glsl, "if(("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ")==0){discard;}\n"); + } + else + { + ASSERT(psInst->eBooleanTestType == INSTRUCTION_TEST_NONZERO); + bcatcstr(glsl, "if(("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ")!=0){discard;}\n"); + } + break; + } + case OPCODE_LOD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LOD\n"); +#endif + //LOD computes the following vector (ClampedLOD, NonClampedLOD, 0, 0) + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 4, &numParenthesis); + + //If the core language does not have query-lod feature, + //then the extension is used. The name of the function + //changed between extension and core. + if (HaveQueryLod(psContext->psShader->eTargetLanguage)) + { + bcatcstr(glsl, "textureQueryLod("); + } + else + { + bcatcstr(glsl, "textureQueryLOD("); + } + + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ","); + TranslateTexCoord( + psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber], + &psInst->asOperands[1]); + bcatcstr(glsl, ")"); + + //The swizzle on srcResource allows the returned values to be swizzled arbitrarily before they are written to the destination. + + // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms + // does not make sense. But need to re-enable to correctly swizzle this particular instruction. + psInst->asOperands[2].iWriteMaskEnabled = 1; + TranslateOperandSwizzleWithMask(psContext, &psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_EVAL_CENTROID: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_CENTROID\n"); +#endif + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = interpolateAtCentroid("); + //interpolateAtCentroid accepts in-qualified variables. + //As long as bytecode only writes vX registers in declarations + //we should be able to use the declared name directly. + TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_EVAL_SAMPLE_INDEX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_SAMPLE_INDEX\n"); +#endif + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = interpolateAtSample("); + //interpolateAtSample accepts in-qualified variables. + //As long as bytecode only writes vX registers in declarations + //we should be able to use the declared name directly. + TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_EVAL_SNAPPED: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_SNAPPED\n"); +#endif + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = interpolateAtOffset("); + //interpolateAtOffset accepts in-qualified variables. + //As long as bytecode only writes vX registers in declarations + //we should be able to use the declared name directly. + TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_INTEGER); + bcatcstr(glsl, ".xy);\n"); + break; + } + case OPCODE_LD_STRUCTURED: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_STRUCTURED\n"); +#endif + TranslateShaderStorageLoad(psInst); + break; + } + case OPCODE_LD_UAV_TYPED: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_UAV_TYPED\n"); +#endif + Operand* psDest = &psInst->asOperands[0]; + Operand* psSrc = &psInst->asOperands[2]; + Operand* psSrcAddr = &psInst->asOperands[1]; + + int srcCount = psSrc->GetNumSwizzleElements(); + int numParenthesis = 0; + uint32_t compMask = 0; + + switch (psInst->eResDim) + { + case RESOURCE_DIMENSION_TEXTURE3D: + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + compMask |= (1 << 2); + case RESOURCE_DIMENSION_TEXTURECUBE: + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + case RESOURCE_DIMENSION_TEXTURE2D: + case RESOURCE_DIMENSION_TEXTURE2DMS: + compMask |= (1 << 1); + case RESOURCE_DIMENSION_TEXTURE1D: + case RESOURCE_DIMENSION_BUFFER: + compMask |= 1; + break; + default: + ASSERT(0); + break; + } + + SHADER_VARIABLE_TYPE srcDataType; + const ResourceBinding* psBinding = 0; + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psSrc->ui32RegisterNumber, &psBinding); + switch (psBinding->ui32ReturnType) + { + case RETURN_TYPE_FLOAT: + srcDataType = SVT_FLOAT; + break; + case RETURN_TYPE_SINT: + srcDataType = SVT_INT; + break; + case RETURN_TYPE_UINT: + srcDataType = SVT_UINT; + break; + default: + ASSERT(0); + break; + } + + psContext->AddIndentation(); + AddAssignToDest(psDest, srcDataType, srcCount, &numParenthesis); + bcatcstr(glsl, "imageLoad("); + TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); + bcatcstr(glsl, ", "); + TranslateOperand(psSrcAddr, TO_FLAG_INTEGER, compMask); + bcatcstr(glsl, ")"); + TranslateOperandSwizzle(psContext, &psInst->asOperands[0], 0); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_STORE_RAW: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_RAW\n"); +#endif + TranslateShaderStorageStore(psInst); + break; + } + case OPCODE_STORE_STRUCTURED: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_STRUCTURED\n"); +#endif + TranslateShaderStorageStore(psInst); + break; + } + + case OPCODE_STORE_UAV_TYPED: + { + const ResourceBinding* psRes; + int foundResource; + uint32_t flags = TO_FLAG_INTEGER; + uint32_t opMask = OPERAND_4_COMPONENT_MASK_ALL; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_UAV_TYPED\n"); +#endif + psContext->AddIndentation(); + + foundResource = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, + psInst->asOperands[0].ui32RegisterNumber, + &psRes); + + ASSERT(foundResource); + + bcatcstr(glsl, "imageStore("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_NAME_ONLY); + bcatcstr(glsl, ", "); + + switch (psRes->eDimension) + { + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: + case REFLECT_RESOURCE_DIMENSION_BUFFER: + opMask = OPERAND_4_COMPONENT_MASK_X; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; + flags |= TO_AUTO_EXPAND_TO_VEC2; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; + flags |= TO_AUTO_EXPAND_TO_VEC3; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + flags |= TO_AUTO_EXPAND_TO_VEC4; + break; + default: + ASSERT(0); + break; + }; + + TranslateOperand(&psInst->asOperands[1], flags, opMask); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], ResourceReturnTypeToFlag(psRes->ui32ReturnType)); + bformata(glsl, ");\n"); + + break; + } + case OPCODE_LD_RAW: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_RAW\n"); +#endif + + TranslateShaderStorageLoad(psInst); + break; + } + + case OPCODE_ATOMIC_AND: + case OPCODE_ATOMIC_OR: + case OPCODE_ATOMIC_XOR: + case OPCODE_ATOMIC_CMP_STORE: + case OPCODE_ATOMIC_IADD: + case OPCODE_ATOMIC_IMAX: + case OPCODE_ATOMIC_IMIN: + case OPCODE_ATOMIC_UMAX: + case OPCODE_ATOMIC_UMIN: + case OPCODE_IMM_ATOMIC_IADD: + case OPCODE_IMM_ATOMIC_AND: + case OPCODE_IMM_ATOMIC_OR: + case OPCODE_IMM_ATOMIC_XOR: + case OPCODE_IMM_ATOMIC_EXCH: + case OPCODE_IMM_ATOMIC_CMP_EXCH: + case OPCODE_IMM_ATOMIC_IMAX: + case OPCODE_IMM_ATOMIC_IMIN: + case OPCODE_IMM_ATOMIC_UMAX: + case OPCODE_IMM_ATOMIC_UMIN: + { + TranslateAtomicMemOp(psInst); + break; + } + case OPCODE_UBFE: + case OPCODE_IBFE: + { + int numParenthesis = 0; + int i; + uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); + SHADER_VARIABLE_TYPE dataType = psInst->eOpcode == OPCODE_UBFE ? SVT_UINT : SVT_INT; + uint32_t flags = psInst->eOpcode == OPCODE_UBFE ? TO_AUTO_BITCAST_TO_UINT : TO_AUTO_BITCAST_TO_INT; +#ifdef _DEBUG + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_UBFE) + bcatcstr(glsl, "//OPCODE_UBFE\n"); + else + bcatcstr(glsl, "//OPCODE_IBFE\n"); +#endif + // Need to open this up, GLSL bitfieldextract uses same offset and width for all components + for (i = 0; i < 4; i++) + { + if ((writeMask & (1 << i)) == 0) + continue; + psContext->AddIndentation(); + psInst->asOperands[0].ui32CompMask = (1 << i); + psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + AddAssignToDest(&psInst->asOperands[0], dataType, 1, &numParenthesis); + + bcatcstr(glsl, "bitfieldExtract("); + TranslateOperand(&psInst->asOperands[3], flags, (1 << i)); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], TO_AUTO_BITCAST_TO_INT, (1 << i)); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_INT, (1 << i)); + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + + } + break; + } + case OPCODE_RCP: + { + const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); + const uint32_t srcElemCount = psInst->asOperands[1].GetNumSwizzleElements(); + int numParenthesis = 0; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//RCP\n"); +#endif + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, srcElemCount, &numParenthesis); + bcatcstr(glsl, GetConstructorForTypeGLSL(SVT_FLOAT, destElemCount, false)); + bcatcstr(glsl, "(1.0) / "); + bcatcstr(glsl, GetConstructorForTypeGLSL(SVT_FLOAT, destElemCount, false)); + bcatcstr(glsl, "("); + numParenthesis++; + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_F32TOF16: + { + const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); + const uint32_t s0ElemCount = psInst->asOperands[1].GetNumSwizzleElements(); + uint32_t destElem; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//F32TOF16\n"); +#endif + for (destElem = 0; destElem < destElemCount; ++destElem) + { + const char* swizzle[] = { ".x", ".y", ".z", ".w" }; + + //unpackHalf2x16 converts two f16s packed into uint to two f32s. + + //dest.swiz.x = unpackHalf2x16(src.swiz.x).x + //dest.swiz.y = unpackHalf2x16(src.swiz.y).x + //dest.swiz.z = unpackHalf2x16(src.swiz.z).x + //dest.swiz.w = unpackHalf2x16(src.swiz.w).x + + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + if (destElemCount > 1) + bcatcstr(glsl, swizzle[destElem]); + + bcatcstr(glsl, " = unpackHalf2x16("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); + if (s0ElemCount > 1) + bcatcstr(glsl, swizzle[destElem]); + bcatcstr(glsl, ").x;\n"); + + } + break; + } + case OPCODE_F16TOF32: + { + const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); + const uint32_t s0ElemCount = psInst->asOperands[1].GetNumSwizzleElements(); + uint32_t destElem; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//F16TOF32\n"); +#endif + for (destElem = 0; destElem < destElemCount; ++destElem) + { + const char* swizzle[] = { ".x", ".y", ".z", ".w" }; + + //packHalf2x16 converts two f32s to two f16s packed into a uint. + + //dest.swiz.x = packHalf2x16(vec2(src.swiz.x)) & 0xFFFF + //dest.swiz.y = packHalf2x16(vec2(src.swiz.y)) & 0xFFFF + //dest.swiz.z = packHalf2x16(vec2(src.swiz.z)) & 0xFFFF + //dest.swiz.w = packHalf2x16(vec2(src.swiz.w)) & 0xFFFF + + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_UNSIGNED_INTEGER); + if (destElemCount > 1) + bcatcstr(glsl, swizzle[destElem]); + + bcatcstr(glsl, " = packHalf2x16(vec2("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); + if (s0ElemCount > 1) + bcatcstr(glsl, swizzle[destElem]); + bcatcstr(glsl, ")) & 0xFFFF;\n"); + + } + break; + } + case OPCODE_INEG: + { + int numParenthesis = 0; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//INEG\n"); +#endif + //dest = 0 - src0 + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); + + bcatcstr(glsl, "0 - "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_DERIV_RTX_COARSE: + case OPCODE_DERIV_RTX_FINE: + case OPCODE_DERIV_RTX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DERIV_RTX\n"); +#endif + CallHelper1("dFdx", psInst, 0, 1, 1); + break; + } + case OPCODE_DERIV_RTY_COARSE: + case OPCODE_DERIV_RTY_FINE: + case OPCODE_DERIV_RTY: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DERIV_RTY\n"); +#endif + CallHelper1("dFdy", psInst, 0, 1, 1); + break; + } + case OPCODE_LRP: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LRP\n"); +#endif + CallHelper3("mix", psInst, 0, 2, 3, 1, 1); + break; + } + case OPCODE_DP2ADD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DP2ADD\n"); +#endif + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = dot(vec2("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); + bcatcstr(glsl, "), vec2("); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ")) + "); + TranslateOperand(&psInst->asOperands[3], TO_FLAG_NONE); + bcatcstr(glsl, ";\n"); + break; + } + case OPCODE_POW: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//POW\n"); +#endif + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = pow(abs("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); + bcatcstr(glsl, "), "); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ");\n"); + break; + } + + case OPCODE_IMM_ATOMIC_ALLOC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_ALLOC\n"); +#endif + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); + if (isVulkan) + bcatcstr(glsl, "atomicAdd("); + else + bcatcstr(glsl, "atomicCounterIncrement("); + ResourceName(glsl, psContext, RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber, 0); + bformata(glsl, "_counter"); + if (isVulkan) + bcatcstr(glsl, ", 1u)"); + else + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_IMM_ATOMIC_CONSUME: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_CONSUME\n"); +#endif + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); + if (isVulkan) + bcatcstr(glsl, "atomicAdd("); + else + bcatcstr(glsl, "atomicCounterDecrement("); + ResourceName(glsl, psContext, RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber, 0); + bformata(glsl, "_counter"); + if (isVulkan) + bcatcstr(glsl, ", 0xffffffffu)"); + else + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + + case OPCODE_NOT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//INOT\n"); +#endif + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); + + bcatcstr(glsl, "~"); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_XOR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//XOR\n"); +#endif + CallBinaryOp("^", psInst, 0, 1, 2, SVT_UINT); + break; + } + case OPCODE_RESINFO: + { + + uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t destElem; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//RESINFO\n"); +#endif + + for (destElem = 0; destElem < destElemCount; ++destElem) + { + GetResInfoData(psInst, psInst->asOperands[2].aui32Swizzle[destElem], destElem); + } + + break; + } + case OPCODE_BUFINFO: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//BUFINFO\n"); +#endif + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, 1, &numParenthesis); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NAME_ONLY); + bcatcstr(glsl, "_buf.length()"); + AddAssignPrologue(numParenthesis); + break; + } + + case OPCODE_DMAX: + case OPCODE_DMIN: + case OPCODE_DMUL: + case OPCODE_DEQ: + case OPCODE_DGE: + case OPCODE_DLT: + case OPCODE_DNE: + case OPCODE_DMOV: + case OPCODE_DMOVC: + case OPCODE_DTOF: + case OPCODE_FTOD: + case OPCODE_DDIV: + case OPCODE_DFMA: + case OPCODE_DRCP: + case OPCODE_MSAD: + case OPCODE_DTOI: + case OPCODE_DTOU: + case OPCODE_ITOD: + case OPCODE_UTOD: + default: + { + ASSERT(0); + break; + } + } + + if (psInst->bSaturate) //Saturate is only for floating point data (float opcodes or MOV) + { + int dstCount = psInst->asOperands[0].GetNumSwizzleElements(); + + const bool workaroundAdrenoBugs = psContext->psShader->eTargetLanguage == LANG_ES_300; + + if (workaroundAdrenoBugs) + bcatcstr(glsl, "#ifdef UNITY_ADRENO_ES3\n"); + + for (int i = workaroundAdrenoBugs ? 0 : 1; i < 2; ++i) + { + const bool generateWorkaround = (i == 0); + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, dstCount, &numParenthesis); + bcatcstr(glsl, generateWorkaround ? "min(max(" : "clamp("); + TranslateOperand(&psInst->asOperands[0], TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, generateWorkaround ? ", 0.0), 1.0)" : ", 0.0, 1.0)"); + AddAssignPrologue(numParenthesis); + + if (generateWorkaround) + bcatcstr(glsl, "#else\n"); + } + + if (workaroundAdrenoBugs) + bcatcstr(glsl, "#endif\n"); + } +} diff --git a/src/toGLSLOperand.cpp b/src/toGLSLOperand.cpp new file mode 100644 index 0000000..a7c158f --- /dev/null +++ b/src/toGLSLOperand.cpp @@ -0,0 +1,1616 @@ +#include "internal_includes/toGLSLOperand.h" +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "bstrlib.h" +#include "hlslcc.h" +#include "internal_includes/debug.h" +#include "internal_includes/Shader.h" +#include "internal_includes/toGLSL.h" +#include + +#include + +#include +#include + +using namespace HLSLcc; + +#ifdef _MSC_VER +#define fpcheck(x) (_isnan(x) || !_finite(x)) +#else +#define fpcheck(x) (std::isnan(x) || std::isinf(x)) +#endif + + +// Returns nonzero if types are just different precisions of the same underlying type +static bool AreTypesCompatible(SHADER_VARIABLE_TYPE a, uint32_t ui32TOFlag) +{ + SHADER_VARIABLE_TYPE b = TypeFlagsToSVTType(ui32TOFlag); + + if (a == b) + return true; + + // Special case for array indices: both uint and int are fine + if ((ui32TOFlag & TO_FLAG_INTEGER) && (ui32TOFlag & TO_FLAG_UNSIGNED_INTEGER) && + (a == SVT_INT || a == SVT_INT16 || a == SVT_UINT || a == SVT_UINT16)) + return true; + + if ((a == SVT_FLOAT || a == SVT_FLOAT16 || a == SVT_FLOAT10) && + (b == SVT_FLOAT || b == SVT_FLOAT16 || b == SVT_FLOAT10)) + return true; + + if ((a == SVT_INT || a == SVT_INT16 || a == SVT_INT12) && + (b == SVT_INT || b == SVT_INT16 || a == SVT_INT12)) + return true; + + if ((a == SVT_UINT || a == SVT_UINT16) && + (b == SVT_UINT || b == SVT_UINT16)) + return true; + + return false; +} + +void TranslateOperandSwizzle(HLSLCrossCompilerContext* psContext, const Operand* psOperand, int iRebase) +{ + TranslateOperandSwizzleWithMask(psContext, psOperand, OPERAND_4_COMPONENT_MASK_ALL, iRebase); +} + +void TranslateOperandSwizzleWithMask(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase) +{ + bstring glsl = *psContext->currentGLSLString; + uint32_t accessMask = ui32ComponentMask & psOperand->GetAccessMask(); + if(psOperand->eType == OPERAND_TYPE_INPUT) + { + int regSpace = psOperand->GetRegisterSpace(psContext); + // Skip swizzle for scalar inputs, but only if we haven't redirected them + if (regSpace == 0) + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return; + } + } + else + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return; + } + } + } + if (psOperand->eType == OPERAND_TYPE_OUTPUT) + { + int regSpace = psOperand->GetRegisterSpace(psContext); + // Skip swizzle for scalar outputs, but only if we haven't redirected them + if (regSpace == 0) + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarOutput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return; + } + } + else + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarOutput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return; + } + } + } + + if(psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER) + { + /*ConstantBuffer* psCBuf = NULL; + ShaderVar* psVar = NULL; + int32_t index = -1; + GetConstantBufferFromBindingPoint(psOperand->aui32ArraySizes[0], &psContext->psShader->sInfo, &psCBuf); + + //Access the Nth vec4 (N=psOperand->aui32ArraySizes[1]) + //then apply the sizzle. + + GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVar, &index); + + bformata(glsl, ".%s", psVar->Name); + if(index != -1) + { + bformata(glsl, "[%d]", index); + }*/ + + //return; + } + + if(psOperand->iWriteMaskEnabled && + psOperand->iNumComponents != 1) + { + //Component Mask + if(psOperand->eSelMode == OPERAND_4_COMPONENT_MASK_MODE) + { + uint32_t mask; + if (psOperand->ui32CompMask != 0) + mask = psOperand->ui32CompMask & ui32ComponentMask; + else + mask = ui32ComponentMask; + + if(mask != 0 && mask != OPERAND_4_COMPONENT_MASK_ALL) + { + bcatcstr(glsl, "."); + if(mask & OPERAND_4_COMPONENT_MASK_X) + { + ASSERT(iRebase == 0); + bcatcstr(glsl, "x"); + } + if(mask & OPERAND_4_COMPONENT_MASK_Y) + { + ASSERT(iRebase <= 1); + bformata(glsl, "%c", "xy"[1 - iRebase]); + } + if(mask & OPERAND_4_COMPONENT_MASK_Z) + { + ASSERT(iRebase <= 2); + bformata(glsl, "%c", "xyz"[2 - iRebase]); + } + if(mask & OPERAND_4_COMPONENT_MASK_W) + { + ASSERT(iRebase <= 3); + bformata(glsl, "%c", "xyzw"[3 - iRebase]); + } + } + } + else + //Component Swizzle + if(psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + if (ui32ComponentMask != OPERAND_4_COMPONENT_MASK_ALL || + !(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_X && + psOperand->aui32Swizzle[1] == OPERAND_4_COMPONENT_Y && + psOperand->aui32Swizzle[2] == OPERAND_4_COMPONENT_Z && + psOperand->aui32Swizzle[3] == OPERAND_4_COMPONENT_W + ) + ) + { + uint32_t i; + + bcatcstr(glsl, "."); + + for (i = 0; i < 4; ++i) + { + if (!(ui32ComponentMask & (OPERAND_4_COMPONENT_MASK_X << i))) + continue; + + if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_X) + { + ASSERT(iRebase == 0); + bcatcstr(glsl, "x"); + } + else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Y) + { + ASSERT(iRebase <= 1); + bformata(glsl, "%c", "xy"[1 - iRebase]); + } + else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Z) + { + ASSERT(iRebase <= 2); + bformata(glsl, "%c", "xyz"[2 - iRebase]); + } + else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_W) + { + ASSERT(iRebase <= 3); + bformata(glsl, "%c", "xyzw"[3 - iRebase]); + } + } + } + } + else + if(psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) // ui32ComponentMask is ignored in this case + { + bcatcstr(glsl, "."); + + if(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_X) + { + ASSERT(iRebase == 0); + bcatcstr(glsl, "x"); + } + else + if(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_Y) + { + ASSERT(iRebase <= 1); + bformata(glsl, "%c", "xy"[1 - iRebase]); + } + else + if(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_Z) + { + ASSERT(iRebase <= 2); + bformata(glsl, "%c", "xyz"[2 - iRebase]); + } + else + if(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_W) + { + ASSERT(iRebase <= 3); + bformata(glsl, "%c", "xyzw"[3 - iRebase]); + } + } + + //Component Select 1 + } +} + +void ToGLSL::TranslateOperandIndex(const Operand* psOperand, int index) +{ + int i = index; + + bstring glsl = *psContext->currentGLSLString; + + ASSERT(index < psOperand->iIndexDims); + + switch(psOperand->eIndexRep[i]) + { + case OPERAND_INDEX_IMMEDIATE32: + { + bformata(glsl, "[%d]", psOperand->aui32ArraySizes[i]); + break; + } + case OPERAND_INDEX_RELATIVE: + { + bcatcstr(glsl, "["); + TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_INTEGER); + bcatcstr(glsl, "]"); + break; + } + case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: + { + bcatcstr(glsl, "["); //Indexes must be integral. + TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_INTEGER); + bformata(glsl, " + %d]", psOperand->aui32ArraySizes[i]); + break; + } + default: + { + break; + } + } +} + +void ToGLSL::TranslateOperandIndexMAD(const Operand* psOperand, int index, uint32_t multiply, uint32_t add) +{ + int i = index; + int isGeoShader = psContext->psShader->eShaderType == GEOMETRY_SHADER ? 1 : 0; + + bstring glsl = *psContext->currentGLSLString; + + ASSERT(index < psOperand->iIndexDims); + + switch(psOperand->eIndexRep[i]) + { + case OPERAND_INDEX_IMMEDIATE32: + { + if(i > 0 || isGeoShader) + { + bformata(glsl, "[%d*%d+%d]", psOperand->aui32ArraySizes[i], multiply, add); + } + else + { + bformata(glsl, "%d*%d+%d", psOperand->aui32ArraySizes[i], multiply, add); + } + break; + } + case OPERAND_INDEX_RELATIVE: + { + bcatcstr(glsl, "[int("); //Indexes must be integral. + TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_NONE); + bformata(glsl, ")*%d+%d]", multiply, add); + break; + } + case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: + { + bcatcstr(glsl, "[(int("); //Indexes must be integral. + TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_NONE); + bformata(glsl, ") + %d)*%d+%d]", psOperand->aui32ArraySizes[i], multiply, add); + break; + } + default: + { + break; + } + } +} + +static std::string GetBitcastOp(HLSLCrossCompilerContext *psContext, SHADER_VARIABLE_TYPE from, SHADER_VARIABLE_TYPE to, uint32_t numComponents) +{ + if (psContext->psShader->eTargetLanguage == LANG_METAL) + { + std::ostringstream oss; + oss << "as_type<"; + oss << GetConstructorForTypeMetal(to, numComponents); + oss << ">"; + return oss.str(); + } + else + { + if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_INT) + return "intBitsToFloat"; + else if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_UINT) + return "uintBitsToFloat"; + else if (to == SVT_INT && (from == SVT_FLOAT || from == SVT_FLOAT16 || from == SVT_FLOAT10)) + return "floatBitsToInt"; + else if (to == SVT_UINT && (from == SVT_FLOAT || from == SVT_FLOAT16 || from == SVT_FLOAT10)) + return "floatBitsToUint"; + } + + ASSERT(0); + return "ERROR missing components in GetBitcastOp()"; +} + +// Helper function to print out a single 32-bit immediate value in desired format +static void printImmediate32(HLSLCrossCompilerContext *psContext, uint32_t value, SHADER_VARIABLE_TYPE eType) +{ + bstring glsl = *psContext->currentGLSLString; + int needsParenthesis = 0; + + // Print floats as bit patterns. + if ((eType == SVT_FLOAT || eType == SVT_FLOAT16 || eType == SVT_FLOAT10) && psContext->psShader->ui32MajorVersion > 3 && fpcheck(*((float *)(&value)))) + { + if (psContext->psShader->eTargetLanguage == LANG_METAL) + bcatcstr(glsl, "as_type("); + else + bcatcstr(glsl, "intBitsToFloat("); + eType = SVT_INT; + needsParenthesis = 1; + } + + switch (eType) + { + default: + ASSERT(0); + case SVT_INT: + case SVT_INT16: + case SVT_INT12: + // Need special handling for anything >= uint 0x3fffffff + if (value > 0x3ffffffe) + bformata(glsl, "int(0x%Xu)", value); + else if(value <= 1024) // Print anything below 1024 as decimal, and hex after that + bformata(glsl, "%d", value); + else + bformata(glsl, "0x%X", value); + break; + case SVT_UINT: + case SVT_UINT16: + // Adreno bug workaround (happens only on pre-lollipop Nexus 4's): '0u' is treated as int. + if (value == 0 && psContext->psShader->eTargetLanguage == LANG_ES_300) + bcatcstr(glsl, "uint(0u)"); + else + bformata(glsl, "%uu", value); + break; + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + HLSLcc::PrintFloat(glsl, *((float *)(&value))); + break; + case SVT_BOOL: + if (value == 0) + bcatcstr(glsl, "false"); + else + bcatcstr(glsl, "true"); + } + if (needsParenthesis) + bcatcstr(glsl, ")"); +} + +void ToGLSL::TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase) +{ + int numParenthesis = 0; + int hasCtor = 0; + int needsBoolUpscale = 0; // If nonzero, bools need * 0xffffffff in them + bstring glsl = *psContext->currentGLSLString; + SHADER_VARIABLE_TYPE requestedType = TypeFlagsToSVTType(ui32TOFlag); + SHADER_VARIABLE_TYPE eType = psOperand->GetDataType(psContext, requestedType); + int numComponents = psOperand->GetNumSwizzleElements(ui32CompMask); + int requestedComponents = 0; + int scalarWithSwizzle = 0; + + *pui32IgnoreSwizzle = 0; + + if (psOperand->eType == OPERAND_TYPE_TEMP) + { + // Check for scalar + if (psContext->psShader->GetTempComponentCount(eType, psOperand->ui32RegisterNumber) == 1 && psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + scalarWithSwizzle = 1; // Going to need a constructor + } + } + + if (psOperand->eType == OPERAND_TYPE_INPUT) + { + // Check for scalar + if (psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask() + && psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + scalarWithSwizzle = 1; + *pui32IgnoreSwizzle = 1; + } + } + + if (psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER && psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE && psOperand->IsSwizzleReplicated()) + { + // Needs scalar check as well + const ConstantBuffer* psCBuf = NULL; + const ShaderVarType* psVarType = NULL; + int32_t rebase = 0; + bool isArray; + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags); + if (psVarType->Columns == 1) + { + scalarWithSwizzle = 1; // Needs a constructor + *pui32IgnoreSwizzle = 1; + } + + } + + if (piRebase) + *piRebase = 0; + + if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC2) + requestedComponents = 2; + else if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC3) + requestedComponents = 3; + else if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC4) + requestedComponents = 4; + + requestedComponents = std::max(requestedComponents, numComponents); + + if (!(ui32TOFlag & (TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY | TO_FLAG_DECLARATION_NAME))) + { + if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE64) + { + // Mark the operand type to match whatever we're asking for in the flags. + ((Operand *)psOperand)->aeDataType[0] = requestedType; + ((Operand *)psOperand)->aeDataType[1] = requestedType; + ((Operand *)psOperand)->aeDataType[2] = requestedType; + ((Operand *)psOperand)->aeDataType[3] = requestedType; + } + + if (AreTypesCompatible(eType, ui32TOFlag) == 0) + { + if (CanDoDirectCast(eType, requestedType)) + { + bformata(glsl, "%s(", GetConstructorForType(psContext, requestedType, requestedComponents, false)); + numParenthesis++; + hasCtor = 1; + if (eType == SVT_BOOL) + needsBoolUpscale = 1; + } + else + { + // Direct cast not possible, need to do bitcast. + bformata(glsl, "%s(", GetBitcastOp(psContext, eType, requestedType, requestedComponents).c_str()); + numParenthesis++; + } + } + + // Add ctor if needed (upscaling). Type conversion is already handled above, so here we must + // use the original type to not make type conflicts in bitcasts + if (((numComponents < requestedComponents)||(scalarWithSwizzle != 0)) && (hasCtor == 0)) + { +// ASSERT(numComponents == 1); + bformata(glsl, "%s(", GetConstructorForType(psContext, eType, requestedComponents, false)); + numParenthesis++; + hasCtor = 1; + } + } + + + switch(psOperand->eType) + { + case OPERAND_TYPE_IMMEDIATE32: + { + if(psOperand->iNumComponents == 1) + { + printImmediate32(psContext, *((unsigned int*)(&psOperand->afImmediates[0])), requestedType); + } + else + { + int i; + int firstItemAdded = 0; + if (hasCtor == 0) + { + bformata(glsl, "%s(", GetConstructorForType(psContext, requestedType, requestedComponents, false)); + numParenthesis++; + hasCtor = 1; + } + for (i = 0; i < 4; i++) + { + uint32_t uval; + if (!(ui32CompMask & (1 << i))) + continue; + + if (firstItemAdded) + bcatcstr(glsl, ", "); + uval = *((uint32_t*)(&psOperand->afImmediates[i >= psOperand->iNumComponents ? psOperand->iNumComponents-1 : i])); + printImmediate32(psContext, uval, requestedType); + firstItemAdded = 1; + } + bcatcstr(glsl, ")"); + *pui32IgnoreSwizzle = 1; + numParenthesis--; + } + break; + } + case OPERAND_TYPE_IMMEDIATE64: + { + if(psOperand->iNumComponents == 1) + { + bformata(glsl, "%.17g", + psOperand->adImmediates[0]); + } + else + { + bformata(glsl, "dvec4(%.17g, %.17g, %.17g, %.17g)", + psOperand->adImmediates[0], + psOperand->adImmediates[1], + psOperand->adImmediates[2], + psOperand->adImmediates[3]); + if(psOperand->iNumComponents != 4) + { + AddSwizzleUsingElementCount(glsl, psOperand->iNumComponents); + } + } + break; + } + case OPERAND_TYPE_INPUT: + { + int regSpace = psOperand->GetRegisterSpace(psContext); + switch(psOperand->iIndexDims) + { + case INDEX_2D: + { + const ShaderInfo::InOutSignature *psSig = NULL; + psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); + + if ((psSig->eSystemValueType == NAME_POSITION && psSig->ui32SemanticIndex == 0) || + (psSig->semanticName == "POS" && psSig->ui32SemanticIndex == 0) || + (psSig->semanticName == "SV_POSITION" && psSig->ui32SemanticIndex == 0)) + { + bcatcstr(glsl, "gl_in"); + TranslateOperandIndex(psOperand, 0);//Vertex index + bcatcstr(glsl, ".gl_Position"); + } + else + { + std::string name = psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); + + bformata(glsl, "%s", name.c_str()); + TranslateOperandIndex(psOperand, 0);//Vertex index + } + break; + } + default: + { + if(psOperand->eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE) + { + bformata(glsl, "phase%d_Input%d_%d[", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER); + bcatcstr(glsl, "]"); + } + else + { + if(psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0) + { + const uint32_t parentIndex = psContext->psShader->aIndexedInputParents[regSpace][psOperand->ui32RegisterNumber]; + bformata(glsl, "phase%d_Input%d_%d[%d]", psContext->currentPhase, regSpace, parentIndex, + psOperand->ui32RegisterNumber - parentIndex); + } + else + { + std::string name = psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); + bcatcstr(glsl, name.c_str()); + } + } + break; + } + } + break; + } + case OPERAND_TYPE_OUTPUT: + { + /*if(psContext->psShader->eShaderType == HULL_SHADER && psOperand->eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE) + { + int stream = 0; + const char* name = GetDeclaredOutputName(psContext, HULL_SHADER, psOperand, &stream); + bcatcstr(glsl, name); + } + else*/ + { + int stream = 0; + std::string name = psContext->GetDeclaredOutputName(psOperand, &stream, pui32IgnoreSwizzle, piRebase, 0); + bcatcstr(glsl, name.c_str()); + if (psOperand->m_SubOperands[0].get()) + { + bcatcstr(glsl, "["); + TranslateOperand(psOperand->m_SubOperands[0].get(), TO_AUTO_BITCAST_TO_INT); + bcatcstr(glsl, "]"); + } + } + break; + } + case OPERAND_TYPE_OUTPUT_DEPTH: + case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: + case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: + { + bcatcstr(glsl, "gl_FragDepth"); + break; + } + case OPERAND_TYPE_TEMP: + { + SHADER_VARIABLE_TYPE eTempType = psOperand->GetDataType(psContext); + bcatcstr(glsl, HLSLCC_TEMP_PREFIX); + ASSERT(psOperand->ui32RegisterNumber < 0x10000); // Sanity check after temp splitting. + switch (eTempType) + { + case SVT_FLOAT: + ASSERT(psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] != 0); + if (psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_FLOAT16: + ASSERT(psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "16_"); + if (psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_FLOAT10: + ASSERT(psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "10_"); + if (psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_INT: + ASSERT(psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "i"); + if (psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_INT16: + ASSERT(psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "i16_"); + if (psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_INT12: + ASSERT(psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "i12_"); + if (psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_UINT: + ASSERT(psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "u"); + if (psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_UINT16: + ASSERT(psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "u16_"); + if (psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_DOUBLE: + ASSERT(psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "d"); + if (psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_BOOL: + ASSERT(psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "b"); + if (psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + default: + ASSERT(0 && "Should never get here!"); + } + // m_ForLoopInductorName overrides the register number, if available + if (psOperand->m_ForLoopInductorName != 0) + { + bformata(glsl, "_loop_%d", psOperand->m_ForLoopInductorName); + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + } + else + bformata(glsl, "%d", psOperand->ui32RegisterNumber); + break; + } + case OPERAND_TYPE_SPECIAL_IMMCONSTINT: + { + bformata(glsl, "IntImmConst%d", psOperand->ui32RegisterNumber); + break; + } + case OPERAND_TYPE_SPECIAL_IMMCONST: + { + ASSERT(0 && "DX9 shaders no longer supported!"); + break; + } + case OPERAND_TYPE_SPECIAL_OUTBASECOLOUR: + { + bcatcstr(glsl, "BaseColour"); + break; + } + case OPERAND_TYPE_SPECIAL_OUTOFFSETCOLOUR: + { + bcatcstr(glsl, "OffsetColour"); + break; + } + case OPERAND_TYPE_SPECIAL_POSITION: + { + bcatcstr(glsl, "gl_Position"); + break; + } + case OPERAND_TYPE_SPECIAL_FOG: + { + bcatcstr(glsl, "Fog"); + break; + } + case OPERAND_TYPE_SPECIAL_POINTSIZE: + { + bcatcstr(glsl, "gl_PointSize"); + break; + } + case OPERAND_TYPE_SPECIAL_ADDRESS: + { + bcatcstr(glsl, "Address"); + break; + } + case OPERAND_TYPE_SPECIAL_LOOPCOUNTER: + { + bcatcstr(glsl, "LoopCounter"); + pui32IgnoreSwizzle[0] = 1; + break; + } + case OPERAND_TYPE_SPECIAL_TEXCOORD: + { + bformata(glsl, "TexCoord%d", psOperand->ui32RegisterNumber); + break; + } + case OPERAND_TYPE_CONSTANT_BUFFER: + { + const char* StageName = "VS"; + const ConstantBuffer* psCBuf = NULL; + const ShaderVarType* psVarType = NULL; + int32_t index = -1; + std::vector arrayIndices; + bool isArray; + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); + + switch(psContext->psShader->eShaderType) + { + case PIXEL_SHADER: + { + StageName = "PS"; + break; + } + case HULL_SHADER: + { + StageName = "HS"; + break; + } + case DOMAIN_SHADER: + { + StageName = "DS"; + break; + } + case GEOMETRY_SHADER: + { + StageName = "GS"; + break; + } + case COMPUTE_SHADER: + { + StageName = "CS"; + break; + } + default: + { + break; + } + } + + if(ui32TOFlag & TO_FLAG_DECLARATION_NAME) + { + pui32IgnoreSwizzle[0] = 1; + } + + // FIXME: With ES 3.0 the buffer name is often not prepended to variable names + if(((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT)!=HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT) && + ((psContext->flags & HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT)!=HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT)) + { + if(psCBuf) + { + //$Globals. + if(psCBuf->name[0] == '$') + { + bformata(glsl, "Globals%s", StageName); + } + else + { + bformata(glsl, "%s%s", psCBuf->name.c_str(), StageName); + } + if((ui32TOFlag & TO_FLAG_DECLARATION_NAME) != TO_FLAG_DECLARATION_NAME) + { + bcatcstr(glsl, "."); + } + } + else + { + //bformata(glsl, "cb%d", psOperand->aui32ArraySizes[0]); + } + } + + if((ui32TOFlag & TO_FLAG_DECLARATION_NAME) != TO_FLAG_DECLARATION_NAME) + { + //Work out the variable name. Don't apply swizzle to that variable yet. + int32_t rebase = 0; + + if(psCBuf) + { + uint32_t componentsNeeded = 1; + uint32_t minSwiz = 3; + uint32_t maxSwiz = 0; + if (psOperand->eSelMode != OPERAND_4_COMPONENT_SELECT_1_MODE) + { + int i; + for (i = 0; i < 4; i++) + { + if ((ui32CompMask & (1 << i)) == 0) + continue; + minSwiz = std::min(minSwiz, psOperand->aui32Swizzle[i]); + maxSwiz = std::max(maxSwiz, psOperand->aui32Swizzle[i]); + } + componentsNeeded = maxSwiz - minSwiz + 1; + } + else + { + minSwiz = maxSwiz = 1; + } + + // When we have a component mask that doesn't have .x set (this basically only happens when we manually open operands into components) + // We have to pull down the swizzle array to match the first bit that's actually set + uint32_t tmpSwizzle[4] = { 0 }; + int firstBitSet = 0; + if (ui32CompMask == 0) + ui32CompMask = 0xf; + while ((ui32CompMask & (1 << firstBitSet)) == 0) + firstBitSet++; + std::copy(&psOperand->aui32Swizzle[firstBitSet], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); + + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags); + if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || ((componentsNeeded+minSwiz) <= psVarType->Columns)) + { + // Simple case: just access one component + std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices); + + if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS))) + { + // We'll need to add the prefix only to the last section of the name + size_t commaPos = fullName.find_last_of('.'); + char prefix[256]; + sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, psVarType->Rows, psVarType->Columns); + if (commaPos == std::string::npos) + fullName.insert(0, prefix); + else + fullName.insert(commaPos + 1, prefix); + + bformata(glsl, "%s", fullName.c_str()); + } + else + bformata(glsl, "%s", fullName.c_str()); + } + else + { + // Non-simple case: build vec4 and apply mask + uint32_t i; + std::vector tmpArrayIndices; + bool tmpIsArray; + int32_t tmpRebase; + int firstItemAdded = 0; + + bformata(glsl, "%s(", GetConstructorForType(psContext, psVarType->Type, GetNumberBitsSet(ui32CompMask), false)); + for (i = 0; i < 4; i++) + { + const ShaderVarType *tmpVarType = NULL; + if ((ui32CompMask & (1 << i)) == 0) + continue; + tmpRebase = 0; + if (firstItemAdded != 0) + bcatcstr(glsl, ", "); + else + firstItemAdded = 1; + + memset(tmpSwizzle, 0, sizeof(uint32_t) * 4); + std::copy(&psOperand->aui32Swizzle[i], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); + + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags); + std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices); + + if (tmpVarType->Class == SVC_SCALAR) + { + bformata(glsl, "%s", fullName.c_str()); + } + else + { + uint32_t swizzle; + tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3 + swizzle = psOperand->aui32Swizzle[i] - tmpRebase; + + bformata(glsl, "%s", fullName.c_str()); + bformata(glsl, ".%c", "xyzw"[swizzle]); + } + } + bcatcstr(glsl, ")"); + // Clear rebase, we've already done it. + rebase = 0; + // Also swizzle. + *pui32IgnoreSwizzle = 1; + } + } + else // We don't have a semantic for this variable, so try the raw dump appoach. + { + ASSERT(0); + //bformata(glsl, "cb%d.data", psOperand->aui32ArraySizes[0]);// + //index = psOperand->aui32ArraySizes[1]; + } + + if (isArray) + index = arrayIndices.back(); + + //Dx9 only? + if(psOperand->m_SubOperands[0].get() != NULL) + { + // Array of matrices is treated as array of vec4s in HLSL, + // but that would mess up uniform types in GLSL. Do gymnastics. + uint32_t opFlags = TO_FLAG_INTEGER; + + if (((psVarType->Class == SVC_MATRIX_COLUMNS) || (psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) + { + // Special handling for matrix arrays + bcatcstr(glsl, "[("); + TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags); + bformata(glsl, ") / 4]"); + { + bcatcstr(glsl, "[(("); + TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags, OPERAND_4_COMPONENT_MASK_X); + bformata(glsl, ") %% 4)]"); + } + } + else + { + bcatcstr(glsl, "["); + TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags); + bformata(glsl, "]"); + } + } + else + if(index != -1 && psOperand->m_SubOperands[1].get() != NULL) + { + // Array of matrices is treated as array of vec4s in HLSL, + // but that would mess up uniform types in GLSL. Do gymnastics. + SHADER_VARIABLE_TYPE eType = psOperand->m_SubOperands[1].get()->GetDataType(psContext); + uint32_t opFlags = TO_FLAG_INTEGER; + if (eType != SVT_INT && eType != SVT_UINT) + opFlags = TO_AUTO_BITCAST_TO_INT; + + if (((psVarType->Class == SVC_MATRIX_COLUMNS) ||( psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) + { + // Special handling for matrix arrays + bcatcstr(glsl, "[("); + TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags); + bformata(glsl, " + %d) / 4]", index); + { + bcatcstr(glsl, "[(("); + TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags); + bformata(glsl, " + %d) %% 4)]", index); + } + } + else + { + bcatcstr(glsl, "["); + TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags); + if (index != 0) + bformata(glsl, " + %d]", index); + else + bcatcstr(glsl, "]"); + } + } + else if(index != -1) + { + if (((psVarType->Class == SVC_MATRIX_COLUMNS) || (psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) + { + // Special handling for matrix arrays, open them up into vec4's + size_t matidx = index / 4; + size_t rowidx = index - (matidx*4); + bformata(glsl, "[%d][%d]", matidx, rowidx); + } + else + { + bformata(glsl, "[%d]", index); + } + } + else if(psOperand->m_SubOperands[1].get() != NULL) + { + bcatcstr(glsl, "["); + TranslateOperand(psOperand->m_SubOperands[1].get(), TO_FLAG_INTEGER); + bcatcstr(glsl, "]"); + } + + if(psVarType && psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle) + { + switch(rebase) + { + case 4: + { + if(psVarType->Columns == 2) + { + //.x(GLSL) is .y(HLSL). .y(GLSL) is .z(HLSL) + bcatcstr(glsl, ".xxyx"); + } + else if(psVarType->Columns == 3) + { + //.x(GLSL) is .y(HLSL). .y(GLSL) is .z(HLSL) .z(GLSL) is .w(HLSL) + bcatcstr(glsl, ".xxyz"); + } + break; + } + case 8: + { + if(psVarType->Columns == 2) + { + //.x(GLSL) is .z(HLSL). .y(GLSL) is .w(HLSL) + bcatcstr(glsl, ".xxxy"); + } + break; + } + case 0: + default: + { + //No rebase, but extend to vec4 if needed + uint32_t maxComp = psOperand->GetMaxComponent(); + if(psVarType->Columns == 2 && maxComp > 2) + { + bcatcstr(glsl, ".xyxx"); + } + else if(psVarType->Columns == 3 && maxComp > 3) + { + bcatcstr(glsl, ".xyzx"); + } + break; + } + + } + } + + if(psVarType && psVarType->Class == SVC_SCALAR) + { + *pui32IgnoreSwizzle = 1; + } + } + break; + } + case OPERAND_TYPE_RESOURCE: + { + ResourceName(glsl, psContext, RGROUP_TEXTURE, psOperand->ui32RegisterNumber, 0); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_SAMPLER: + { + bformata(glsl, "Sampler%d", psOperand->ui32RegisterNumber); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_FUNCTION_BODY: + { + const uint32_t ui32FuncBody = psOperand->ui32RegisterNumber; + const uint32_t ui32FuncTable = psContext->psShader->aui32FuncBodyToFuncTable[ui32FuncBody]; + //const uint32_t ui32FuncPointer = psContext->psShader->aui32FuncTableToFuncPointer[ui32FuncTable]; + const uint32_t ui32ClassType = psContext->psShader->sInfo.aui32TableIDToTypeID[ui32FuncTable]; + const char* ClassTypeName = &psContext->psShader->sInfo.psClassTypes[ui32ClassType].name[0]; + const uint32_t ui32UniqueClassFuncIndex = psContext->psShader->ui32NextClassFuncName[ui32ClassType]++; + + bformata(glsl, "%s_Func%d", ClassTypeName, ui32UniqueClassFuncIndex); + break; + } + case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: + case OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID: + { + bcatcstr(glsl, "phaseInstanceID"); // Not a real builtin, but passed as a function parameter. + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: + { + bformata(glsl, "ImmCB_%d_%d_%d", psContext->currentPhase, psOperand->ui32RegisterNumber, psOperand->m_Rebase); + if(psOperand->m_SubOperands[0].get()) + { + bcatcstr(glsl, "["); //Indexes must be integral. Offset is already taken care of above. + TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER); + bcatcstr(glsl, "]"); + } + if (psOperand->m_Size == 1) + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_INPUT_DOMAIN_POINT: + { + bcatcstr(glsl, "gl_TessCoord"); + break; + } + case OPERAND_TYPE_INPUT_CONTROL_POINT: + { + const ShaderInfo::InOutSignature *psSig = NULL; + psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); + + if ((psSig->eSystemValueType == NAME_POSITION && psSig->ui32SemanticIndex == 0) || + (psSig->semanticName == "POS" && psSig->ui32SemanticIndex == 0) || + (psSig->semanticName == "SV_POSITION" && psSig->ui32SemanticIndex == 0)) + { + bcatcstr(glsl, "gl_in"); + TranslateOperandIndex(psOperand, 0);//Vertex index + bcatcstr(glsl, ".gl_Position"); + } + else + { + std::string name = psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); + + bformata(glsl, "%s", name.c_str()); + TranslateOperandIndex(psOperand, 0);//Vertex index + + // Check for scalar + if ((psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) + *pui32IgnoreSwizzle = 1; + } + break; + } + case OPERAND_TYPE_NULL: + { + // Null register, used to discard results of operations + bcatcstr(glsl, "//null"); + break; + } + case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: + { + bcatcstr(glsl, "gl_InvocationID"); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + { + bcatcstr(glsl, "gl_SampleMask[0]"); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_INPUT_COVERAGE_MASK: + { + bcatcstr(glsl, "gl_SampleMaskIn[0]"); + //Skip swizzle on scalar types. + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_INPUT_THREAD_ID://SV_DispatchThreadID + { + bcatcstr(glsl, "gl_GlobalInvocationID"); + break; + } + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP://SV_GroupThreadID + { + bcatcstr(glsl, "gl_LocalInvocationID"); + break; + } + case OPERAND_TYPE_INPUT_THREAD_GROUP_ID://SV_GroupID + { + bcatcstr(glsl, "gl_WorkGroupID"); + break; + } + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED://SV_GroupIndex + { + bcatcstr(glsl, "gl_LocalInvocationIndex"); + *pui32IgnoreSwizzle = 1; // No swizzle meaningful for scalar. + break; + } + case OPERAND_TYPE_UNORDERED_ACCESS_VIEW: + { + ResourceName(glsl, psContext, RGROUP_UAV, psOperand->ui32RegisterNumber, 0); + break; + } + case OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY: + { + bformata(glsl, "TGSM%d", psOperand->ui32RegisterNumber); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_INPUT_PRIMITIVEID: + { + if(psContext->psShader->eShaderType == GEOMETRY_SHADER) + bcatcstr(glsl, "gl_PrimitiveIDIn"); // LOL OpenGL + else + bcatcstr(glsl, "gl_PrimitiveID"); + + break; + } + case OPERAND_TYPE_INDEXABLE_TEMP: + { + bformata(glsl, "TempArray%d", psOperand->aui32ArraySizes[0]); + bcatcstr(glsl, "["); + if (psOperand->aui32ArraySizes[1] != 0 || !psOperand->m_SubOperands[1].get()) + bformata(glsl, "%d", psOperand->aui32ArraySizes[1]); + + if(psOperand->m_SubOperands[1].get()) + { + if (psOperand->aui32ArraySizes[1] != 0) + bcatcstr(glsl, "+"); + TranslateOperand(psOperand->m_SubOperands[1].get(), TO_FLAG_INTEGER); + + } + bcatcstr(glsl, "]"); + break; + } + case OPERAND_TYPE_STREAM: + { + bformata(glsl, "%d", psOperand->ui32RegisterNumber); + break; + } + case OPERAND_TYPE_INPUT_GS_INSTANCE_ID: + { + // In HLSL the instance id is uint, so cast here. + bcatcstr(glsl, "uint(gl_InvocationID)"); + break; + } + case OPERAND_TYPE_THIS_POINTER: + { + /* + The "this" register is a register that provides up to 4 pieces of information: + X: Which CB holds the instance data + Y: Base element offset of the instance data within the instance CB + Z: Base sampler index + W: Base Texture index + + Can be different for each function call + */ + break; + } + case OPERAND_TYPE_INPUT_PATCH_CONSTANT: + { + const ShaderInfo::InOutSignature* psIn; + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn); + *piRebase = psIn->iRebase; + switch (psIn->eSystemValueType) + { + case NAME_POSITION: + bcatcstr(glsl, "gl_Position"); + break; + case NAME_RENDER_TARGET_ARRAY_INDEX: + bcatcstr(glsl, "gl_Layer"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_CLIP_DISTANCE: + bcatcstr(glsl, "gl_ClipDistance"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_VIEWPORT_ARRAY_INDEX: + bcatcstr(glsl, "gl_ViewportIndex"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_VERTEX_ID: + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + bcatcstr(glsl, "gl_VertexIndex"); + else + bcatcstr(glsl, "gl_VertexID"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_INSTANCE_ID: + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + bcatcstr(glsl, "gl_InstanceIndex"); + else + bcatcstr(glsl, "gl_InstanceID"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_IS_FRONT_FACE: + bcatcstr(glsl, "(gl_FrontFacing ? 0xffffffffu : uint(0))"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_PRIMITIVE_ID: + bcatcstr(glsl, "gl_PrimitiveID"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_LINE_DENSITY_TESSFACTOR: + if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber]) + bcatcstr(glsl, "gl_TessLevelOuter"); + else + bcatcstr(glsl, "gl_TessLevelOuter[0]"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_LINE_DETAIL_TESSFACTOR: + bcatcstr(glsl, "gl_TessLevelOuter[1]"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: + bcatcstr(glsl, "gl_TessLevelOuter[2]"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: + bcatcstr(glsl, "gl_TessLevelOuter[3]"); + *pui32IgnoreSwizzle = 1; + break; + + case NAME_FINAL_TRI_INSIDE_TESSFACTOR: + case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: + if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber]) + bcatcstr(glsl, "gl_TessLevelInner"); + else + bcatcstr(glsl, "gl_TessLevelInner[0]"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: + bcatcstr(glsl, "gl_TessLevelInner[1]"); + *pui32IgnoreSwizzle = 1; + break; + default: + bformata(glsl, "%spatch%s%d", psContext->psShader->eShaderType == HULL_SHADER ? psContext->outputPrefix : psContext->inputPrefix, psIn->semanticName.c_str(), psIn->ui32SemanticIndex); + // Disable swizzles if this is a scalar + if (psContext->psShader->eShaderType == HULL_SHADER) + { + if ((psContext->psShader->abScalarOutput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) + *pui32IgnoreSwizzle = 1; + } + else + { + if ((psContext->psShader->abScalarInput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) + *pui32IgnoreSwizzle = 1; + } + + break; + } + + + break; + } + default: + { + ASSERT(0); + break; + } + } + + if (hasCtor && (*pui32IgnoreSwizzle == 0)) + { + TranslateOperandSwizzleWithMask(psContext, psOperand, ui32CompMask, piRebase ? *piRebase : 0); + *pui32IgnoreSwizzle = 1; + } + + if (needsBoolUpscale) + { + if (requestedType == SVT_UINT || requestedType == SVT_UINT16 || requestedType == SVT_UINT8) + bcatcstr(glsl, ") * 0xffffffffu"); + else + bcatcstr(glsl, ") * int(0xffffffffu)"); + numParenthesis--; + } + + while (numParenthesis != 0) + { + bcatcstr(glsl, ")"); + numParenthesis--; + } +} + +void ToGLSL::TranslateOperand(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask) +{ + bstring glsl = *psContext->currentGLSLString; + uint32_t ui32IgnoreSwizzle = 0; + int iRebase = 0; + + // in single-component mode there is no need to use mask + if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL; + + if(psContext->psShader->ui32MajorVersion <=3) + { + ui32TOFlag &= ~(TO_AUTO_BITCAST_TO_FLOAT|TO_AUTO_BITCAST_TO_INT|TO_AUTO_BITCAST_TO_UINT); + } + + if(ui32TOFlag & TO_FLAG_NAME_ONLY) + { + TranslateVariableNameWithMask(psOperand, ui32TOFlag, &ui32IgnoreSwizzle, OPERAND_4_COMPONENT_MASK_ALL, &iRebase); + return; + } + + switch(psOperand->eModifier) + { + case OPERAND_MODIFIER_NONE: + { + break; + } + case OPERAND_MODIFIER_NEG: + { + bcatcstr(glsl, "(-"); + break; + } + case OPERAND_MODIFIER_ABS: + { + bcatcstr(glsl, "abs("); + break; + } + case OPERAND_MODIFIER_ABSNEG: + { + bcatcstr(glsl, "-abs("); + break; + } + } + + TranslateVariableNameWithMask(psOperand, ui32TOFlag, &ui32IgnoreSwizzle, ui32ComponentMask, &iRebase); + + if(psContext->psShader->eShaderType == HULL_SHADER && psOperand->eType == OPERAND_TYPE_OUTPUT && + psOperand->ui32RegisterNumber != 0 && psOperand->iArrayElements != 0 && psOperand->eIndexRep[0] != OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE + && psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE) + { + bcatcstr(glsl, "[gl_InvocationID]"); + } + + if(!ui32IgnoreSwizzle) + { + TranslateOperandSwizzleWithMask(psContext, psOperand, ui32ComponentMask, iRebase); + } + + switch(psOperand->eModifier) + { + case OPERAND_MODIFIER_NONE: + { + break; + } + case OPERAND_MODIFIER_NEG: + { + bcatcstr(glsl, ")"); + break; + } + case OPERAND_MODIFIER_ABS: + { + bcatcstr(glsl, ")"); + break; + } + case OPERAND_MODIFIER_ABSNEG: + { + bcatcstr(glsl, ")"); + break; + } + } + +} + +std::string ResourceName(HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare) +{ + std::ostringstream oss; + const ResourceBinding* psBinding = 0; + int found; + + found = psContext->psShader->sInfo.GetResourceFromBindingPoint(group, ui32RegisterNumber, &psBinding); + + if(bZCompare) + { + oss << "hlslcc_zcmp"; + } + + if(found) + { + int i = 0; + std::string name = psBinding->name; + uint32_t ui32ArrayOffset = ui32RegisterNumber - psBinding->ui32BindPoint; + + while(i < name.length()) + { + //array syntax [X] becomes _0_ + //Otherwise declarations could end up as: + //uniform sampler2D SomeTextures[0]; + //uniform sampler2D SomeTextures[1]; + if(name[i] == '[' || name[i] == ']') + name[i] = '_'; + + ++i; + } + + if(ui32ArrayOffset) + { + oss << name << ui32ArrayOffset; + } + else + { + oss << name; + } + if (((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) && group == RGROUP_UAV) + oss << "_origX" << ui32RegisterNumber << "X"; + } + else + { + oss << "UnknownResource" << ui32RegisterNumber; + } + return oss.str(); +} +void ResourceName(bstring targetStr, HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare) +{ + bstring glsl = (targetStr == NULL) ? *psContext->currentGLSLString : targetStr; + std::string res = ResourceName(psContext, group, ui32RegisterNumber, bZCompare); + bcatcstr(glsl, res.c_str()); +} + +std::string TextureSamplerName(ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare) +{ + std::ostringstream oss; + const ResourceBinding* psTextureBinding = 0; + const ResourceBinding* psSamplerBinding = 0; + int foundTexture, foundSampler; + uint32_t i = 0; + uint32_t ui32ArrayOffset; + + foundTexture = psShaderInfo->GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32TextureRegisterNumber, &psTextureBinding); + foundSampler = psShaderInfo->GetResourceFromBindingPoint(RGROUP_SAMPLER, ui32SamplerRegisterNumber, &psSamplerBinding); + + if (!foundTexture || !foundSampler) + { + oss << "UnknownResource" << ui32TextureRegisterNumber << "_" << ui32SamplerRegisterNumber; + return oss.str(); + } + + ui32ArrayOffset = ui32TextureRegisterNumber - psTextureBinding->ui32BindPoint; + + std::string texName = psTextureBinding->name; + + while (i < texName.length()) + { + //array syntax [X] becomes _0_ + //Otherwise declarations could end up as: + //uniform sampler2D SomeTextures[0]; + //uniform sampler2D SomeTextures[1]; + if(texName[i] == '[' || texName[i] == ']') + { + texName[i] = '_'; + } + + ++i; + } + + + if(bZCompare) + { + oss << "hlslcc_zcmp"; + } + + + if(ui32ArrayOffset) + { + oss << texName << ui32ArrayOffset << "_X_" << psSamplerBinding->name; + } + else + { + if((i>0) && (texName[i-1] == '_'))//Prevent double underscore which is reserved + { + oss << texName << "X_" << psSamplerBinding->name; + } + else + { + oss << texName << "_X_" << psSamplerBinding->name; + } + } + + return oss.str(); +} + +void ConcatTextureSamplerName(bstring str, ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare) +{ + std::string texturesamplername = TextureSamplerName(psShaderInfo, ui32TextureRegisterNumber, ui32SamplerRegisterNumber, bZCompare); + bcatcstr(str, texturesamplername.c_str()); +} diff --git a/src/toMetal.cpp b/src/toMetal.cpp new file mode 100644 index 0000000..8538eb3 --- /dev/null +++ b/src/toMetal.cpp @@ -0,0 +1,265 @@ + +#include "internal_includes/toMetal.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "internal_includes/Shader.h" +#include "internal_includes/debug.h" + +#include "internal_includes/Declaration.h" +#include "internal_includes/toGLSL.h" +#include "internal_includes/LoopTransform.h" +#include "internal_includes/HLSLccToolkit.h" +#include + +static void PrintStructDeclaration(HLSLCrossCompilerContext *psContext, bstring glsl, std::string &sname, StructDefinitions &defs) +{ + StructDefinition &d = defs[sname]; + if (d.m_IsPrinted) + return; + d.m_IsPrinted = true; + + + std::for_each(d.m_Dependencies.begin(), d.m_Dependencies.end(), [&psContext, &glsl, &defs](std::string &depName) + { + PrintStructDeclaration(psContext, glsl, depName, defs); + }); + + bformata(glsl, "struct %s\n{\n", sname.c_str()); + psContext->indent++; + std::for_each(d.m_Members.begin(), d.m_Members.end(), [&psContext, &glsl](std::string &mem) + { + psContext->AddIndentation(); + bcatcstr(glsl, mem.c_str()); + bcatcstr(glsl, ";\n"); + }); + + psContext->indent--; + bcatcstr(glsl, "};\n\n"); +} + +void ToMetal::PrintStructDeclarations(StructDefinitions &defs) +{ + bstring glsl = *psContext->currentGLSLString; + StructDefinition &args = defs[""]; + std::for_each(args.m_Dependencies.begin(), args.m_Dependencies.end(), [this, glsl, &defs](std::string &sname) + { + PrintStructDeclaration(psContext, glsl, sname, defs); + }); + +} + +bool ToMetal::Translate() +{ + bstring glsl; + uint32_t i; + Shader* psShader = psContext->psShader; + psContext->psTranslator = this; + + SetIOPrefixes(); + psShader->ExpandSWAPCs(); + psShader->ForcePositionToHighp(); + psShader->AnalyzeIOOverlap(); + psShader->FindUnusedGlobals(psContext->flags); + + psContext->indent = 0; + + glsl = bfromcstralloc(1024 * 10, ""); + bstring bodyglsl = bfromcstralloc(1024 * 10, ""); + + psContext->glsl = glsl; + for (i = 0; i < psShader->asPhases.size(); ++i) + { + psShader->asPhases[i].postShaderCode = bfromcstralloc(1024 * 5, ""); + psShader->asPhases[i].earlyMain = bfromcstralloc(1024 * 5, ""); + } + + psContext->currentGLSLString = &glsl; + psShader->eTargetLanguage = LANG_METAL; + psShader->extensions = NULL; + psContext->currentPhase = MAIN_PHASE; + + psContext->ClearDependencyData(); + + ClampPartialPrecisions(); + + psShader->PrepareStructuredBufferBindingSlots(); + + ShaderPhase &phase = psShader->asPhases[0]; + phase.UnvectorizeImmMoves(); + psContext->DoDataTypeAnalysis(&phase); + phase.ResolveUAVProperties(); + psShader->ResolveStructuredBufferBindingSlots(&phase); + phase.PruneConstArrays(); + HLSLcc::DoLoopTransform(phase); + + psShader->PruneTempRegisters(); + + bcatcstr(glsl, "#include \n#include \nusing namespace metal;\n"); + + + for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i) + { + TranslateDeclaration(&psShader->asPhases[0].psDecl[i]); + } + + if (m_StructDefinitions[GetInputStructName()].m_Members.size() > 0) + { + m_StructDefinitions[""].m_Members.push_back(GetInputStructName() + " input [[ stage_in ]]"); + m_StructDefinitions[""].m_Dependencies.push_back(GetInputStructName()); + } + + if (psShader->eShaderType != COMPUTE_SHADER) + { + if (m_StructDefinitions[GetOutputStructName()].m_Members.size() > 0) + { + m_StructDefinitions[""].m_Dependencies.push_back(GetOutputStructName()); + } + } + + PrintStructDeclarations(m_StructDefinitions); + + psContext->currentGLSLString = &bodyglsl; + + switch (psShader->eShaderType) + { + case VERTEX_SHADER: + bcatcstr(bodyglsl, "vertex Mtl_VertexOut xlatMtlMain(\n"); + break; + case PIXEL_SHADER: + bcatcstr(bodyglsl, "fragment Mtl_FragmentOut xlatMtlMain(\n"); + break; + case COMPUTE_SHADER: + bcatcstr(bodyglsl, "kernel void computeMain(\n"); + break; + default: + // Not supported + ASSERT(0); + return false; + } + psContext->indent++; + for (auto itr = m_StructDefinitions[""].m_Members.begin(); itr != m_StructDefinitions[""].m_Members.end(); itr++) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, itr->c_str()); + if (itr + 1 != m_StructDefinitions[""].m_Members.end()) + bcatcstr(bodyglsl, ",\n"); + } + + bcatcstr(bodyglsl, ")\n{\n"); + if (psShader->eShaderType != COMPUTE_SHADER) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, GetOutputStructName().c_str()); + bcatcstr(bodyglsl, " output;\n"); + } + + if (psContext->psShader->asPhases[0].earlyMain->slen > 1) + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- Start Early Main ---\n"); +#endif + bconcat(bodyglsl, psContext->psShader->asPhases[0].earlyMain); +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- End Early Main ---\n"); +#endif + } + + for (i = 0; i < psShader->asPhases[0].psInst.size(); ++i) + { + TranslateInstruction(&psShader->asPhases[0].psInst[i]); + } + + psContext->indent--; + + bcatcstr(bodyglsl, "}\n"); + + psContext->currentGLSLString = &glsl; + + bcatcstr(glsl, m_ExtraGlobalDefinitions.c_str()); + + // Print out extra functions we generated + std::for_each(m_FunctionDefinitions.begin(), m_FunctionDefinitions.end(), [&glsl](const FunctionDefinitions::value_type &p) + { + bcatcstr(glsl, p.second.c_str()); + bcatcstr(glsl, "\n"); + }); + + // And then the actual function body + bconcat(glsl, bodyglsl); + bdestroy(bodyglsl); + + return true; +} + +void ToMetal::DeclareExtraFunction(const std::string &name, const std::string &body) +{ + if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end()) + return; + m_FunctionDefinitions.insert(std::make_pair(name, body)); +} + + +std::string ToMetal::GetOutputStructName() const +{ + switch(psContext->psShader->eShaderType) + { + case VERTEX_SHADER: + return "Mtl_VertexOut"; + case PIXEL_SHADER: + return "Mtl_FragmentOut"; + default: + ASSERT(0); + return ""; + } +} + +std::string ToMetal::GetInputStructName() const +{ + switch (psContext->psShader->eShaderType) + { + case VERTEX_SHADER: + return "Mtl_VertexIn"; + case PIXEL_SHADER: + return "Mtl_FragmentIn"; + case COMPUTE_SHADER: + return "Mtl_KernelIn"; + default: + ASSERT(0); + return ""; + } +} + +void ToMetal::SetIOPrefixes() +{ + switch (psContext->psShader->eShaderType) + { + case VERTEX_SHADER: + psContext->inputPrefix = "input."; + psContext->outputPrefix = "output."; + break; + + case PIXEL_SHADER: + psContext->inputPrefix = "input."; + psContext->outputPrefix = "output."; + break; + + case COMPUTE_SHADER: + psContext->inputPrefix = ""; + psContext->outputPrefix = ""; + break; + default: + ASSERT(0); + break; + } +} + +void ToMetal::ClampPartialPrecisions() +{ + HLSLcc::ForEachOperand(psContext->psShader->asPhases[0].psInst.begin(), psContext->psShader->asPhases[0].psInst.end(), FEO_FLAG_ALL, + [](std::vector::iterator &i, Operand *o, uint32_t flags) + { + if (o->eMinPrecision == OPERAND_MIN_PRECISION_FLOAT_2_8) + o->eMinPrecision = OPERAND_MIN_PRECISION_FLOAT_16; + }); +} diff --git a/src/toMetalDeclaration.cpp b/src/toMetalDeclaration.cpp new file mode 100644 index 0000000..acbc392 --- /dev/null +++ b/src/toMetalDeclaration.cpp @@ -0,0 +1,1979 @@ + +#include "internal_includes/toMetal.h" +#include "internal_includes/debug.h" +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/Declaration.h" +#include +#include + +#ifdef _MSC_VER +#define fpcheck(x) (_isnan(x) || !_finite(x)) +#else +#include +#define fpcheck(x) ((std::isnan(x)) || (std::isinf(x))) +#endif + + +bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix) +{ + if (sig && (sig->eSystemValueType == NAME_POSITION || (sig->semanticName == "POS" && sig->ui32SemanticIndex == 0)) && psContext->psShader->eShaderType == VERTEX_SHADER) + { + result = "mtl_Position"; + return true; + } + + if (sig) + { + switch (sig->eSystemValueType) + { + case NAME_POSITION: + ASSERT(psContext->psShader->eShaderType == PIXEL_SHADER); + result = "mtl_FragCoord"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + case NAME_RENDER_TARGET_ARRAY_INDEX: + result = "mtl_Layer"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + case NAME_CLIP_DISTANCE: + result = "mtl_ClipDistance"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + /* case NAME_VIEWPORT_ARRAY_INDEX: + result = "gl_ViewportIndex"; + if (puiIgnoreSwizzle) + *puiIgnoreSwizzle = 1; + return true;*/ + case NAME_VERTEX_ID: + result = "mtl_VertexID"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_INSTANCE_ID: + result = "mtl_InstanceID"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + case NAME_IS_FRONT_FACE: + result = "(mtl_FrontFace ? 0xffffffffu : uint(0))"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_SAMPLE_INDEX: + result = "mtl_SampleID"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + + default: + break; + } + } + + switch (psOperand->eType) + { + case OPERAND_TYPE_INPUT_COVERAGE_MASK: + case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + result = "mtl_CoverageMask"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case OPERAND_TYPE_INPUT_THREAD_ID: + result = "mtl_ThreadID"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: + result = "mtl_ThreadGroupID"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: + result = "mtl_ThreadIDInGroup"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED: + result = "mtl_ThreadIndexInThreadGroup"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case OPERAND_TYPE_OUTPUT_DEPTH: + case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: + case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: + result = "mtl_Depth"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case OPERAND_TYPE_OUTPUT: + case OPERAND_TYPE_INPUT: + { + std::ostringstream oss; + ASSERT(sig != NULL); + oss << sig->semanticName << sig->ui32SemanticIndex; + result = oss.str(); + if (HLSLcc::WriteMaskToComponentCount(sig->ui32Mask) == 1 && pui32IgnoreSwizzle != NULL) + *pui32IgnoreSwizzle = 1; + return true; + } + default: + ASSERT(0); + break; + } + + + + return false; +} + +void ToMetal::DeclareBuiltinInput(const Declaration *psDecl) +{ + const SPECIAL_NAME eSpecialName = psDecl->asOperands[0].eSpecialName; + + switch (eSpecialName) + { + case NAME_POSITION: + ASSERT(psContext->psShader->eShaderType == PIXEL_SHADER); + m_StructDefinitions[""].m_Members.push_back("float4 mtl_FragCoord [[ position ]]"); + break; + case NAME_RENDER_TARGET_ARRAY_INDEX: +#if 0 + // Only supported on a Mac + m_StructDefinitions[""].m_Members.push_back("uint mtl_Layer [[ render_target_array_index ]]"); +#else + // Not on Metal + ASSERT(0); +#endif + break; + case NAME_CLIP_DISTANCE: + ASSERT(0); // Should never be an input + break; + case NAME_VIEWPORT_ARRAY_INDEX: + // Not on Metal + ASSERT(0); + break; + case NAME_INSTANCE_ID: + m_StructDefinitions[""].m_Members.push_back("uint mtl_InstanceID [[ instance_id ]]"); + break; + case NAME_IS_FRONT_FACE: + m_StructDefinitions[""].m_Members.push_back("bool mtl_FrontFace [[ front_facing ]]"); + break; + case NAME_SAMPLE_INDEX: + m_StructDefinitions[""].m_Members.push_back("uint mtl_SampleID [[ sample_id ]]"); + break; + case NAME_VERTEX_ID: + m_StructDefinitions[""].m_Members.push_back("uint mtl_VertexID [[ vertex_id ]]"); + break; + case NAME_PRIMITIVE_ID: + // Not on Metal + ASSERT(0); + break; + default: + m_StructDefinitions[""].m_Members.push_back(std::string("float4 ").append(psDecl->asOperands[0].specialName)); + ASSERT(0); // Catch this to see what's happening + break; + } +} + +void ToMetal::DeclareBuiltinOutput(const Declaration *psDecl) +{ + std::string out = GetOutputStructName(); + + switch (psDecl->asOperands[0].eSpecialName) + { + case NAME_POSITION: + m_StructDefinitions[out].m_Members.push_back("float4 mtl_Position [[ position ]]"); + break; + case NAME_RENDER_TARGET_ARRAY_INDEX: +#if 0 + // Only supported on a Mac + m_StructDefinitions[out].m_Members.push_back("uint mtl_Layer [[ render_target_array_index ]]"); +#else + // Not on Metal + ASSERT(0); +#endif + break; + case NAME_CLIP_DISTANCE: + m_StructDefinitions[out].m_Members.push_back("float4 mtl_ClipDistance [[ clip_distance ]]"); + break; + + case NAME_VIEWPORT_ARRAY_INDEX: + // Not on Metal + ASSERT(0); + break; + case NAME_VERTEX_ID: + ASSERT(0); //VertexID is not an output + break; + case NAME_PRIMITIVE_ID: + // Not on Metal + ASSERT(0); + break; + case NAME_INSTANCE_ID: + ASSERT(0); //InstanceID is not an output + break; + case NAME_IS_FRONT_FACE: + ASSERT(0); //FrontFacing is not an output + break; + case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: + // Not on Metal + ASSERT(0); + break; + case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: + // Not on Metal + ASSERT(0); + break; + case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: + // Not on Metal + ASSERT(0); + break; + case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: + // Not on Metal + ASSERT(0); + break; + case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: + // Not on Metal + ASSERT(0); + break; + case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: + // Not on Metal + ASSERT(0); + break; + case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: + // Not on Metal + ASSERT(0); + break; + case NAME_FINAL_LINE_DENSITY_TESSFACTOR: + // Not on Metal + ASSERT(0); + break; + case NAME_FINAL_LINE_DETAIL_TESSFACTOR: + // Not on Metal + ASSERT(0); + break; + case NAME_FINAL_TRI_INSIDE_TESSFACTOR: + case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: + // Not on Metal + ASSERT(0); + break; + case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: + // Not on Metal + ASSERT(0); + break; + default: + // This might be SV_Position (because d3dcompiler is weird). Get signature and check + const ShaderInfo::InOutSignature *sig = NULL; + psContext->psShader->sInfo.GetOutputSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].GetAccessMask(), 0, &sig); + ASSERT(sig != NULL); + if (sig->eSystemValueType == NAME_POSITION && sig->ui32SemanticIndex == 0) + { + m_StructDefinitions[out].m_Members.push_back("float4 mtl_Position [[ position ]]"); + break; + } + + ASSERT(0); // Wut + break; + } +} + +static std::string BuildOperandTypeString(OPERAND_MIN_PRECISION ePrec, INOUT_COMPONENT_TYPE eType, int numComponents) +{ + SHADER_VARIABLE_TYPE t = SVT_FLOAT; + switch (eType) + { + case INOUT_COMPONENT_FLOAT32: + t = SVT_FLOAT; + break; + case INOUT_COMPONENT_UINT32: + t = SVT_UINT; + break; + case INOUT_COMPONENT_SINT32: + t = SVT_INT; + break; + default: + ASSERT(0); + break; + } + // Can be overridden by precision + switch (ePrec) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + + case OPERAND_MIN_PRECISION_FLOAT_16: + ASSERT(eType == INOUT_COMPONENT_FLOAT32); + t = SVT_FLOAT16; + break; + + case OPERAND_MIN_PRECISION_FLOAT_2_8: + ASSERT(eType == INOUT_COMPONENT_FLOAT32); + t = SVT_FLOAT10; + break; + + case OPERAND_MIN_PRECISION_SINT_16: + ASSERT(eType == INOUT_COMPONENT_SINT32); + t = SVT_INT16; + break; + case OPERAND_MIN_PRECISION_UINT_16: + ASSERT(eType == INOUT_COMPONENT_UINT32); + t = SVT_UINT16; + break; + } + return HLSLcc::GetConstructorForTypeMetal(t, numComponents); +} + +void ToMetal::HandleOutputRedirect(const Declaration *psDecl, const std::string &typeName) +{ + const Operand *psOperand = &psDecl->asOperands[0]; + Shader *psShader = psContext->psShader; + bstring glsl = *psContext->currentGLSLString; + int needsRedirect = 0; + const ShaderInfo::InOutSignature *psSig = NULL; + + int regSpace = psOperand->GetRegisterSpace(psContext); + if (regSpace == 0 && psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + { + needsRedirect = 1; + } + else if (regSpace == 1 && psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + { + needsRedirect = 1; + } + + if (needsRedirect == 1) + { + // TODO What if this is indexed? + ShaderPhase *psPhase = &psShader->asPhases[psContext->currentPhase]; + int comp = 0; + uint32_t origMask = psOperand->ui32CompMask; + + ASSERT(psContext->psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber] == 0); + + psContext->AddIndentation(); + bformata(psPhase->earlyMain, "%s phase%d_Output%d_%d;\n", typeName.c_str(), psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + + psPhase->hasPostShaderCode = 1; + psContext->currentGLSLString = &psPhase->postShaderCode; + + while (comp < 4) + { + int numComps = 0; + int hasCast = 0; + uint32_t mask, i; + psSig = NULL; + if (regSpace == 0) + psContext->psShader->sInfo.GetOutputSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, psContext->psShader->ui32CurrentVertexOutputStream, &psSig, true); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); + + // The register isn't necessarily packed full. Continue with the next component. + if (psSig == NULL) + { + comp++; + continue; + } + + numComps = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); + mask = psSig->ui32Mask; + + ((Operand *)psOperand)->ui32CompMask = 1 << comp; + psContext->AddIndentation(); + bcatcstr(psPhase->postShaderCode, TranslateOperand(psOperand, TO_FLAG_NAME_ONLY).c_str()); + + bcatcstr(psPhase->postShaderCode, " = "); + + if (psSig->eComponentType == INOUT_COMPONENT_SINT32) + { + bformata(psPhase->postShaderCode, "as_type("); + hasCast = 1; + } + else if (psSig->eComponentType == INOUT_COMPONENT_UINT32) + { + bformata(psPhase->postShaderCode, "as_type("); + hasCast = 1; + } + bformata(psPhase->postShaderCode, "phase%d_Output%d_%d.", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + // Print out mask + for (i = 0; i < 4; i++) + { + if ((mask & (1 << i)) == 0) + continue; + + bformata(psPhase->postShaderCode, "%c", "xyzw"[i]); + } + + if (hasCast) + bcatcstr(psPhase->postShaderCode, ")"); + comp += numComps; + bcatcstr(psPhase->postShaderCode, ";\n"); + } + + psContext->currentGLSLString = &psContext->glsl; + + ((Operand *)psOperand)->ui32CompMask = origMask; + if (regSpace == 0) + psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + else + psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + } +} + +void ToMetal::HandleInputRedirect(const Declaration *psDecl, const std::string &typeName) +{ + Operand *psOperand = (Operand *)&psDecl->asOperands[0]; + Shader *psShader = psContext->psShader; + bstring glsl = *psContext->currentGLSLString; + int needsRedirect = 0; + const ShaderInfo::InOutSignature *psSig = NULL; + + int regSpace = psOperand->GetRegisterSpace(psContext); + if (regSpace == 0 && psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + { + needsRedirect = 1; + } + + if (needsRedirect == 1) + { + // TODO What if this is indexed? + ShaderPhase *psPhase = &psShader->asPhases[psContext->currentPhase]; + int needsLooping = 0; + int i = 0; + uint32_t origArraySize = 0; + uint32_t origMask = psOperand->ui32CompMask; + + ASSERT(psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] == 0); + + psContext->currentGLSLString = &psPhase->earlyMain; + psContext->AddIndentation(); + + bcatcstr(psPhase->earlyMain, " "); + bformata(psPhase->earlyMain, "%s phase%d_Input%d_%d;\n", typeName.c_str(), psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + + // Do a conditional loop. In normal cases needsLooping == 0 so this is only run once. + do + { + int comp = 0; + bcatcstr(psPhase->earlyMain, " "); + if (needsLooping) + bformata(psPhase->earlyMain, "phase%d_Input%d_%d[%d] = %s(", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, i, typeName.c_str()); + else + bformata(psPhase->earlyMain, "phase%d_Input%d_%d = %s(", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, typeName.c_str()); + + while (comp < 4) + { + int numComps = 0; + int hasCast = 0; + int hasSig = 0; + if (regSpace == 0) + hasSig = psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); + else + hasSig = psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); + + if (hasSig) + { + numComps = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); + if (psSig->eComponentType != INOUT_COMPONENT_FLOAT32) + { + if (numComps > 1) + bformata(psPhase->earlyMain, "as_type(", numComps); + else + bformata(psPhase->earlyMain, "as_type("); + hasCast = 1; + } + + // Override the array size of the operand so TranslateOperand call below prints the correct index + if (needsLooping) + psOperand->aui32ArraySizes[0] = i; + + // And the component mask + psOperand->ui32CompMask = 1 << comp; + + bformata(psPhase->earlyMain, TranslateOperand(psOperand, TO_FLAG_NAME_ONLY).c_str()); + + // Restore the original array size value and mask + psOperand->ui32CompMask = origMask; + if (needsLooping) + psOperand->aui32ArraySizes[0] = origArraySize; + + if (hasCast) + bcatcstr(psPhase->earlyMain, ")"); + comp += numComps; + } + else // no signature found -> fill with zero + { + bcatcstr(psPhase->earlyMain, "0"); + comp++; + } + + if (comp < 4) + bcatcstr(psPhase->earlyMain, ", "); + } + bcatcstr(psPhase->earlyMain, ");\n"); + + } while ((--i) >= 0); + + psContext->currentGLSLString = &psContext->glsl; + + if (regSpace == 0) + psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + else + psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + } +} + +static std::string TranslateResourceDeclaration(HLSLCrossCompilerContext* psContext, + const Declaration *psDecl, + bool isDepthSampler, bool isUAV) +{ + std::ostringstream oss; + const ResourceBinding* psBinding = 0; + const RESOURCE_DIMENSION eDimension = psDecl->value.eResourceDimension; + const uint32_t ui32RegisterNumber = psDecl->asOperands[0].ui32RegisterNumber; + REFLECT_RESOURCE_PRECISION ePrec = REFLECT_RESOURCE_PRECISION_UNKNOWN; + RESOURCE_RETURN_TYPE eType = RETURN_TYPE_UNORM; + std::string access = "sample"; + + if (isUAV) + { + if ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) != 0) + { + access = "write"; + if (psContext->psShader->eShaderType != COMPUTE_SHADER) + psContext->m_Reflection.OnDiagnostics("This shader might not work on all Metal devices because of texture writes on non-compute shaders.", 0, false); + + if ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ) != 0) + { + access = "read_write"; + } + } + else + { + access = "read"; + eType = psDecl->sUAV.Type; + } + int found; + found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, ui32RegisterNumber, &psBinding); + if (found) + { + ePrec = psBinding->ePrecision; + eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; + // Figured out by reverse engineering bitcode. flags b00xx means float1, b01xx = float2, b10xx = float3 and b11xx = float4 + } + + } + else + { + int found; + found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32RegisterNumber, &psBinding); + if (found) + { + eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; + ePrec = psBinding->ePrecision; + + // TODO: it might make sense to propagate float earlier (as hlslcc might declare other variables depending on sampler prec) + // metal supports ONLY float32 depth textures + if(isDepthSampler) + { + switch(eDimension) + { + case RESOURCE_DIMENSION_TEXTURE2D: case RESOURCE_DIMENSION_TEXTURE2DMS: case RESOURCE_DIMENSION_TEXTURECUBE: + case RESOURCE_DIMENSION_TEXTURE2DARRAY: case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + ePrec = REFLECT_RESOURCE_PRECISION_HIGHP, eType = RETURN_TYPE_FLOAT; break; + default: + break; + } + } + } + if (eDimension == RESOURCE_DIMENSION_BUFFER) + access = "read"; + } + + std::string typeName = HLSLcc::GetConstructorForTypeMetal(HLSLcc::ResourceReturnTypeToSVTType(eType, ePrec), 1); + + switch (eDimension) + { + case RESOURCE_DIMENSION_BUFFER: + { + oss << "texture1d<" << typeName << ", access::"<< access <<" >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE1D: + { + oss << "texture1d<" << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE2D: + { + oss << (isDepthSampler ? "depth2d<" : "texture2d<") << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMS: + { + oss << (isDepthSampler ? "depth2d_ms<" : "texture2d_ms<") << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE3D: + { + oss << "texture3d<" << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURECUBE: + { + oss << (isDepthSampler ? "depthcube<" : "texturecube<") << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + oss << "texture1d_array<" << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + oss << (isDepthSampler ? "depth2d_array<" : "texture2d_array<") << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + // Not really supported in Metal but let's print it here anyway + oss << "texture2d_ms_array<" << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + oss << (isDepthSampler ? "depthcube_array<" : "texturecube_array<") << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + default: + ASSERT(0); + oss << "texture2d<" << typeName << ", access::" << access << " >"; + return oss.str(); + } + +} + +static std::string GetInterpolationString(INTERPOLATION_MODE eMode) +{ + switch (eMode) + { + case INTERPOLATION_CONSTANT: + return " [[ flat ]]"; + + case INTERPOLATION_LINEAR: + return ""; + + case INTERPOLATION_LINEAR_CENTROID: + return " [[ centroid ]]"; + + case INTERPOLATION_LINEAR_NOPERSPECTIVE: + return " [[ center_perspective ]]"; + + case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: + return " [[ centroid_noperspective ]]"; + + case INTERPOLATION_LINEAR_SAMPLE: + return " [[ sample_perspective ]]"; + + case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: + return " [[ sample_noperspective ]]"; + default: + ASSERT(0); + return ""; + } +} + + +void ToMetal::DeclareStructVariable(const std::string &parentName, const ShaderVar &var, bool withinCB, uint32_t cumulativeOffset) +{ + DeclareStructVariable(parentName, var.sType, withinCB, cumulativeOffset + var.ui32StartOffset); +} + +void ToMetal::DeclareStructVariable(const std::string &parentName, const ShaderVarType &var, bool withinCB, uint32_t cumulativeOffset) +{ + // CB arrays need to be defined as 4 component vectors to match DX11 data layout + bool arrayWithinCB = (withinCB && (var.Elements > 1) && (psContext->psShader->eShaderType == COMPUTE_SHADER)); + bool doDeclare = true; + + if (var.Class == SVC_STRUCT) + { + std::ostringstream oss; + if (m_StructDefinitions.find(var.name + "_Type") == m_StructDefinitions.end()) + DeclareStructType(var.name + "_Type", var.Members, withinCB, cumulativeOffset + var.Offset); + oss << var.name << "_Type " << var.name; + if (var.Elements > 1) + { + oss << "[" << var.Elements << "]"; + } + m_StructDefinitions[parentName].m_Members.push_back(oss.str()); + m_StructDefinitions[parentName].m_Dependencies.push_back(var.name + "_Type"); + return; + } + + else if (var.Class == SVC_MATRIX_COLUMNS || var.Class == SVC_MATRIX_ROWS) + { + std::ostringstream oss; + if (psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) + { + // Translate matrices into vec4 arrays + char prefix[256]; + sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, var.Rows, var.Columns); + oss << HLSLcc::GetConstructorForType(psContext, var.Type, 4) << " " << prefix << var.name; + + uint32_t elemCount = (var.Class == SVC_MATRIX_COLUMNS ? var.Columns : var.Rows); + if (var.Elements > 1) + { + elemCount *= var.Elements; + } + oss << "[" << elemCount << "]"; + + if(withinCB) + { + // On compute shaders we need to reflect the vec array as it is to support all possible matrix sizes correctly. + // On non-compute we can fake that we still have a matrix, as CB upload code will fill the data correctly on 4x4 matrices. + // That way we avoid the issues with mismatching types for builtins etc. + if (psContext->psShader->eShaderType == COMPUTE_SHADER) + doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, 4, 1, false, elemCount); + else + doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, var.Rows, var.Columns, true, var.Elements); + } + } + else + { + oss << HLSLcc::GetMatrixTypeName(psContext, var.Type, var.Columns, var.Rows); + oss << " " << var.name; + if (var.Elements > 1) + { + oss << "[" << var.Elements << "]"; + } + + // TODO Verify whether the offset is from the beginning of the CB or from the beginning of the struct + if(withinCB) + doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, var.Rows, var.Columns, true, var.Elements); + } + + if (doDeclare) + m_StructDefinitions[parentName].m_Members.push_back(oss.str()); + } + else + if (var.Class == SVC_VECTOR && var.Columns > 1) + { + std::ostringstream oss; + oss << HLSLcc::GetConstructorForTypeMetal(var.Type, arrayWithinCB ? 4 : var.Columns); + oss << " " << var.name; + if (var.Elements > 1) + { + oss << "[" << var.Elements << "]"; + } + + if (withinCB) + doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, 1, var.Columns, false, var.Elements); + + if (doDeclare) + m_StructDefinitions[parentName].m_Members.push_back(oss.str()); + } + else + if ((var.Class == SVC_SCALAR) || + (var.Class == SVC_VECTOR && var.Columns == 1)) + { + if (var.Type == SVT_BOOL) + { + //Use int instead of bool. + //Allows implicit conversions to integer and + //bool consumes 4-bytes in HLSL and GLSL anyway. + ((ShaderVarType &)var).Type = SVT_INT; + } + + std::ostringstream oss; + oss << HLSLcc::GetConstructorForTypeMetal(var.Type, arrayWithinCB ? 4 : 1); + oss << " " << var.name; + if (var.Elements > 1) + { + oss << "[" << var.Elements << "]"; + } + + if (withinCB) + doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, 1, 1, false, var.Elements); + + if (doDeclare) + m_StructDefinitions[parentName].m_Members.push_back(oss.str()); + } + else + { + ASSERT(0); + } +} + +void ToMetal::DeclareStructType(const std::string &name, const std::vector &contents, bool withinCB, uint32_t cumulativeOffset, bool stripUnused /* = false */) +{ + for (std::vector::const_iterator itr = contents.begin(); itr != contents.end(); itr++) + { + if(stripUnused && !itr->sType.m_IsUsed) + continue; + + DeclareStructVariable(name, *itr, withinCB, cumulativeOffset); + } +} + +void ToMetal::DeclareStructType(const std::string &name, const std::vector &contents, bool withinCB, uint32_t cumulativeOffset) +{ + for (std::vector::const_iterator itr = contents.begin(); itr != contents.end(); itr++) + { + DeclareStructVariable(name, *itr, withinCB, cumulativeOffset); + } +} + +void ToMetal::DeclareConstantBuffer(const ConstantBuffer *psCBuf, uint32_t ui32BindingPoint) +{ + std::string cbname = psCBuf->name.c_str(); + + const bool isGlobals = (cbname == "$Globals"); + const bool stripUnused = isGlobals && (psContext->flags & HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS); + + if (cbname[0] == '$') + cbname = cbname.substr(1); + + // Note: if we're stripping unused members, both ui32TotalSizeInBytes and individual offsets into reflection will be completely off. + // However, the reflection layer re-calculates both to match Metal alignment rules anyway, so we're good. + if (!psContext->m_Reflection.OnConstantBuffer(cbname, psCBuf->ui32TotalSizeInBytes, psCBuf->GetMemberCount(stripUnused))) + return; + + DeclareStructType(cbname + "_Type", psCBuf->asVars, true, 0, stripUnused); + std::ostringstream oss; + uint32_t slot = m_BufferSlots.GetBindingSlot(ui32BindingPoint, BindingSlotAllocator::ConstantBuffer); + oss << "constant " << cbname << "_Type& " << cbname << " [[ buffer("<< slot <<") ]]"; + m_StructDefinitions[""].m_Members.push_back(oss.str()); + m_StructDefinitions[""].m_Dependencies.push_back(cbname + "_Type"); + + psContext->m_Reflection.OnConstantBufferBinding(cbname, slot); + + +} + +void ToMetal::DeclareBufferVariable(const Declaration *psDecl, const bool isRaw, const bool isUAV) +{ + uint32_t ui32BindingPoint = psDecl->asOperands[0].ui32RegisterNumber; + std::string BufName, BufType; + + BufName = ""; + BufType = ""; + + // Use original HLSL bindings for UAVs only. For non-UAV buffers we have resolved new binding points from the same register space. + if (!isUAV) + ui32BindingPoint = psContext->psShader->aui32StructuredBufferBindingPoints[psContext->psShader->ui32CurrentStructuredBufferIndex++]; + + BufName = ResourceName(isUAV ? RGROUP_UAV : RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber); + + if (!isRaw) // declare struct containing uint array when needed + { + std::ostringstream typeoss; + BufType = BufName + "_Type"; + typeoss << "uint value["; + typeoss << psDecl->ui32BufferStride / 4 << "]"; + m_StructDefinitions[BufType].m_Members.push_back(typeoss.str()); + m_StructDefinitions[""].m_Dependencies.push_back(BufType); + } + + std::ostringstream oss; + + if (!isUAV || ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) == 0)) + { + oss << "const "; + } + else + { + if (psContext->psShader->eShaderType != COMPUTE_SHADER) + psContext->m_Reflection.OnDiagnostics("This shader might not work on all Metal devices because of buffer writes on non-compute shaders.", 0, false); + } + + if (isRaw) + oss << "device uint *" << BufName; + else + oss << "device " << BufType << " *" << BufName; + + uint32_t loc = m_BufferSlots.GetBindingSlot(ui32BindingPoint, BindingSlotAllocator::RWBuffer); + oss << " [[ buffer(" << loc << ") ]]"; + + m_StructDefinitions[""].m_Members.push_back(oss.str()); + psContext->m_Reflection.OnBufferBinding(BufName, loc, isUAV); +} + + +void ToMetal::TranslateDeclaration(const Declaration* psDecl) +{ + bstring glsl = *psContext->currentGLSLString; + Shader* psShader = psContext->psShader; + + switch (psDecl->eOpcode) + { + + case OPCODE_DCL_INPUT_SGV: + case OPCODE_DCL_INPUT_PS_SGV: + DeclareBuiltinInput(psDecl); + break; + case OPCODE_DCL_OUTPUT_SIV: + DeclareBuiltinOutput(psDecl); + break; + case OPCODE_DCL_INPUT: + case OPCODE_DCL_INPUT_PS_SIV: + case OPCODE_DCL_INPUT_SIV: + case OPCODE_DCL_INPUT_PS: + { + const Operand* psOperand = &psDecl->asOperands[0]; + + uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; + uint32_t ui32CompMask = psDecl->asOperands[0].ui32CompMask; + + std::string name = psContext->GetDeclaredInputName(psOperand, nullptr, 1, nullptr); + + //Already declared as part of an array? + if (psShader->aIndexedInput[0][psDecl->asOperands[0].ui32RegisterNumber] == -1) + { + ASSERT(0); // Find out what's happening + break; + } + // Already declared? + if ((ui32CompMask != 0) && ((ui32CompMask & ~psShader->acInputDeclared[0][ui32Reg]) == 0)) + { + ASSERT(0); // Catch this + break; + } + + if (psOperand->eType == OPERAND_TYPE_INPUT_COVERAGE_MASK) + { + std::ostringstream oss; + oss << "uint " << name << " [[ sample_mask ]]"; + m_StructDefinitions[""].m_Members.push_back(oss.str()); + break; + } + + if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID) + { + std::ostringstream oss; + oss << "uint3 " << name << " [[ thread_position_in_grid ]]"; + m_StructDefinitions[""].m_Members.push_back(oss.str()); + break; + } + + if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_GROUP_ID) + { + std::ostringstream oss; + oss << "uint3 " << name << " [[ threadgroup_position_in_grid ]]"; + m_StructDefinitions[""].m_Members.push_back(oss.str()); + break; + } + + if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP) + { + std::ostringstream oss; + oss << "uint3 " << name << " [[ thread_position_in_threadgroup ]]"; + m_StructDefinitions[""].m_Members.push_back(oss.str()); + break; + } + if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED) + { + std::ostringstream oss; + oss << "uint " << name << " [[ thread_index_in_threadgroup ]]"; + m_StructDefinitions[""].m_Members.push_back(oss.str()); + break; + } + + if(psDecl->eOpcode == OPCODE_DCL_INPUT_PS_SIV && psOperand->eSpecialName == NAME_POSITION) + { + m_StructDefinitions[""].m_Members.push_back("float4 mtl_FragCoord [[ position ]]"); + break; + } + + if (psContext->psDependencies) + { + if (psShader->eShaderType == PIXEL_SHADER) + { + psContext->psDependencies->SetInterpolationMode(ui32Reg, psDecl->value.eInterpolation); + } + } + + const ShaderInfo::InOutSignature *psSig = NULL; + psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); + + int iNumComponents = psOperand->GetNumInputElements(psContext); + psShader->acInputDeclared[0][ui32Reg] = (char)psSig->ui32Mask; + + std::string typeName = BuildOperandTypeString(psOperand->eMinPrecision, psSig->eComponentType, iNumComponents); + + std::string semantic; + if (psContext->psShader->eShaderType == VERTEX_SHADER) + { + std::ostringstream oss; + uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true); + oss << "attribute(" << loc << ")"; + semantic = oss.str(); + psContext->m_Reflection.OnInputBinding(name, loc); + } + else + { + std::ostringstream oss; + + // UNITY_FRAMEBUFFER_FETCH_AVAILABLE + // special case mapping for inout color, see HLSLSupport.cginc + if (psOperand->iPSInOut && name.size() == 10 && !strncmp(name.c_str(), "SV_Target", 9)) + { + // Metal allows color(X) declared in input/output structs + // + // TODO: Improve later when GLES3 support arrives, it requires + // single declaration through inout + oss << "color(" << psSig->ui32SemanticIndex << ")"; + } + else + { + oss << "user(" << name << ")"; + } + semantic = oss.str(); + } + + std::string interpolation = ""; + if (psDecl->eOpcode == OPCODE_DCL_INPUT_PS) + { + interpolation = GetInterpolationString(psDecl->value.eInterpolation); + } + + std::string declString; + if ((OPERAND_INDEX_DIMENSION)psOperand->iIndexDims == INDEX_2D) + { + std::ostringstream oss; + oss << typeName << " " << name << " [ " << psOperand->aui32ArraySizes[0] << " ] " << " [[ " << semantic << " ]] " << interpolation; + declString = oss.str(); + } + else + { + std::ostringstream oss; + oss << typeName << " " << name << " [[ " << semantic << " ]] " << interpolation; + declString = oss.str(); + } + + m_StructDefinitions[GetInputStructName()].m_Members.push_back(declString); + + HandleInputRedirect(psDecl, BuildOperandTypeString(psOperand->eMinPrecision, INOUT_COMPONENT_FLOAT32, 4)); + break; + } + case OPCODE_DCL_TEMPS: + { + uint32_t i = 0; + const uint32_t ui32NumTemps = psDecl->value.ui32NumTemps; + glsl = psContext->psShader->asPhases[psContext->currentPhase].earlyMain; + for (i = 0; i < ui32NumTemps; i++) + { + if (psShader->psFloatTempSizes[i] != 0) + bformata(glsl, " %s " HLSLCC_TEMP_PREFIX "%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT, psShader->psFloatTempSizes[i]), i); + if (psShader->psFloat16TempSizes[i] != 0) + bformata(glsl, " %s " HLSLCC_TEMP_PREFIX "16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT16, psShader->psFloat16TempSizes[i]), i); + if (psShader->psFloat10TempSizes[i] != 0) + bformata(glsl, " %s " HLSLCC_TEMP_PREFIX "10_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT10, psShader->psFloat10TempSizes[i]), i); + if (psShader->psIntTempSizes[i] != 0) + bformata(glsl, " %s " HLSLCC_TEMP_PREFIX "i%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT, psShader->psIntTempSizes[i]), i); + if (psShader->psInt16TempSizes[i] != 0) + bformata(glsl, " %s " HLSLCC_TEMP_PREFIX "i16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT16, psShader->psInt16TempSizes[i]), i); + if (psShader->psInt12TempSizes[i] != 0) + bformata(glsl, " %s " HLSLCC_TEMP_PREFIX "i12_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT12, psShader->psInt12TempSizes[i]), i); + if (psShader->psUIntTempSizes[i] != 0) + bformata(glsl, " %s " HLSLCC_TEMP_PREFIX "u%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT, psShader->psUIntTempSizes[i]), i); + if (psShader->psUInt16TempSizes[i] != 0) + bformata(glsl, " %s " HLSLCC_TEMP_PREFIX "u16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT16, psShader->psUInt16TempSizes[i]), i); + if (psShader->fp64 && (psShader->psDoubleTempSizes[i] != 0)) + bformata(glsl, " %s " HLSLCC_TEMP_PREFIX "d%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_DOUBLE, psShader->psDoubleTempSizes[i]), i); + if (psShader->psBoolTempSizes[i] != 0) + bformata(glsl, " %s " HLSLCC_TEMP_PREFIX "b%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_BOOL, psShader->psBoolTempSizes[i]), i); + } + break; + } + case OPCODE_SPECIAL_DCL_IMMCONST: + { + ASSERT(0 && "DX9 shaders no longer supported!"); + break; + } + case OPCODE_DCL_CONSTANT_BUFFER: + { + const ConstantBuffer* psCBuf = NULL; + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psDecl->asOperands[0].aui32ArraySizes[0], &psCBuf); + ASSERT(psCBuf != NULL); + DeclareConstantBuffer(psCBuf, psDecl->asOperands[0].aui32ArraySizes[0]); + break; + } + case OPCODE_DCL_RESOURCE: + { + DeclareResource(psDecl); + break; + } + case OPCODE_DCL_OUTPUT: + { + DeclareOutput(psDecl); + break; + } + + case OPCODE_DCL_GLOBAL_FLAGS: + { + uint32_t ui32Flags = psDecl->value.ui32GlobalFlags; + + if (ui32Flags & GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL) + { +// bcatcstr(glsl, "layout(early_fragment_tests) in;\n"); + } + if (!(ui32Flags & GLOBAL_FLAG_REFACTORING_ALLOWED)) + { + //TODO add precise + //HLSL precise - http://msdn.microsoft.com/en-us/library/windows/desktop/hh447204(v=vs.85).aspx + } + if (ui32Flags & GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS) + { + // Not supported on Metal +// bcatcstr(glsl, "#extension GL_ARB_gpu_shader_fp64 : enable\n"); +// psShader->fp64 = 1; + } + break; + } + case OPCODE_DCL_THREAD_GROUP: + { + // Send this info to reflecion: Metal gives this at runtime as a param + psContext->m_Reflection.OnThreadGroupSize(psDecl->value.aui32WorkGroupSize[0], + psDecl->value.aui32WorkGroupSize[1], + psDecl->value.aui32WorkGroupSize[2]); + break; + } + case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: + { + // Not supported + break; + } + case OPCODE_DCL_TESS_DOMAIN: + { + // Not supported + break; + } + case OPCODE_DCL_TESS_PARTITIONING: + { + // Not supported + break; + } + case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: + { + // Not supported + break; + } + case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: + { + // Not supported + break; + } + case OPCODE_DCL_GS_INPUT_PRIMITIVE: + { + // Not supported + break; + } + case OPCODE_DCL_INTERFACE: + { + // Are interfaces ever even used? + ASSERT(0); + break; + } + case OPCODE_DCL_FUNCTION_BODY: + { + ASSERT(0); + break; + } + case OPCODE_DCL_FUNCTION_TABLE: + { + ASSERT(0); + break; + } + case OPCODE_CUSTOMDATA: + { + // TODO: This is only ever accessed as a float currently. Do trickery if we ever see ints accessed from an array. + // Walk through all the chunks we've seen in this phase. + ShaderPhase &sp = psShader->asPhases[psContext->currentPhase]; + std::for_each(sp.m_ConstantArrayInfo.m_Chunks.begin(), sp.m_ConstantArrayInfo.m_Chunks.end(), [this](const std::pair &chunk) + { + bstring glsl = *psContext->currentGLSLString; + uint32_t componentCount = chunk.second.m_ComponentCount; + // Just do the declaration here and contents to earlyMain. + if (componentCount == 1) + bformata(glsl, "constant float ImmCB_%d_%d_%d[%d] =\n{\n", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, chunk.second.m_Size); + else + bformata(glsl, "constant float%d ImmCB_%d_%d_%d[%d] =\n{\n", componentCount, psContext->currentPhase, chunk.first, chunk.second.m_Rebase, chunk.second.m_Size); + + Declaration *psDecl = psContext->psShader->asPhases[psContext->currentPhase].m_ConstantArrayInfo.m_OrigDeclaration; + if (componentCount == 1) + { + for (uint32_t i = 0; i < chunk.second.m_Size; i++) + { + if (i != 0) + bcatcstr(glsl, ",\n"); + float val[4] = { + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].a, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].b, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].c, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d + }; + if (fpcheck(val[chunk.second.m_Rebase])) + bformata(glsl, "\tas_type(%Xu)", *(uint32_t *)&val[chunk.second.m_Rebase]); + else + { + bcatcstr(glsl, "\t"); + HLSLcc::PrintFloat(glsl, val[chunk.second.m_Rebase]); + } + } + bcatcstr(glsl, "\n};\n"); + } + else + { + for (uint32_t i = 0; i < chunk.second.m_Size; i++) + { + if (i != 0) + bcatcstr(glsl, ",\n"); + float val[4] = { + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].a, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].b, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].c, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d + }; + bformata(glsl, "\tfloat%d(", componentCount); + for (uint32_t k = 0; k < componentCount; k++) + { + if (k != 0) + bcatcstr(glsl, ", "); + if (fpcheck(val[k])) + bformata(glsl, "as_type(%Xu)", *(uint32_t *)&val[k + chunk.second.m_Rebase]); + else + HLSLcc::PrintFloat(glsl, val[k + chunk.second.m_Rebase]); + } + bcatcstr(glsl, ")"); + } + bcatcstr(glsl, "\n};\n"); + } + + }); + + break; + } + case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + break; // Nothing to do + + case OPCODE_DCL_INDEXABLE_TEMP: + { + const uint32_t ui32RegIndex = psDecl->sIdxTemp.ui32RegIndex; + const uint32_t ui32RegCount = psDecl->sIdxTemp.ui32RegCount; + const uint32_t ui32RegComponentSize = psDecl->sIdxTemp.ui32RegComponentSize; + bformata(psContext->psShader->asPhases[psContext->currentPhase].earlyMain, "float%d TempArray%d[%d];\n", ui32RegComponentSize, ui32RegIndex, ui32RegCount); + break; + } + case OPCODE_DCL_INDEX_RANGE: + { + switch (psDecl->asOperands[0].eType) + { + case OPERAND_TYPE_OUTPUT: + case OPERAND_TYPE_INPUT: + { + const ShaderInfo::InOutSignature* psSignature = NULL; + const char* type = "float"; + uint32_t startReg = 0; + uint32_t i; + bstring *oldString; + int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + int isInput = psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT ? 1 : 0; + + if (regSpace == 0) + { + if (isInput) + psShader->sInfo.GetInputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + &psSignature); + else + psShader->sInfo.GetOutputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + } + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].ui32CompMask, &psSignature); + + ASSERT(psSignature != NULL); + + switch (psSignature->eComponentType) + { + case INOUT_COMPONENT_UINT32: + { + type = "uint"; + break; + } + case INOUT_COMPONENT_SINT32: + { + type = "int"; + break; + } + case INOUT_COMPONENT_FLOAT32: + { + break; + } + default: + ASSERT(0); + break; + } + + switch (psSignature->eMinPrec) // TODO What if the inputs in the indexed range are of different precisions? + { + default: + break; + case MIN_PRECISION_ANY_16: + ASSERT(0); // Wut? + break; + case MIN_PRECISION_FLOAT_16: + case MIN_PRECISION_FLOAT_2_8: + type = "half"; + break; + case MIN_PRECISION_SINT_16: + type = "short"; + break; + case MIN_PRECISION_UINT_16: + type = "ushort"; + break; + } + + startReg = psDecl->asOperands[0].ui32RegisterNumber; + bformata(psContext->psShader->asPhases[psContext->currentPhase].earlyMain, "%s4 phase%d_%sput%d_%d[%d];\n", type, psContext->currentPhase, isInput ? "In" : "Out", regSpace, startReg, psDecl->value.ui32IndexRange); + oldString = psContext->currentGLSLString; + glsl = isInput ? psContext->psShader->asPhases[psContext->currentPhase].earlyMain : psContext->psShader->asPhases[psContext->currentPhase].postShaderCode; + psContext->currentGLSLString = &glsl; + if (isInput == 0) + psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1; + for (i = 0; i < psDecl->value.ui32IndexRange; i++) + { + int dummy = 0; + std::string realName; + uint32_t destMask = psDecl->asOperands[0].ui32CompMask; + uint32_t rebase = 0; + const ShaderInfo::InOutSignature *psSig = NULL; + uint32_t regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext) == 0; + + if (regSpace) + if (isInput) + psContext->psShader->sInfo.GetInputSignatureFromRegister(startReg + i, destMask, &psSig); + else + psContext->psShader->sInfo.GetOutputSignatureFromRegister(startReg + i, destMask, 0, &psSig); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(startReg + i, destMask, &psSig); + + ASSERT(psSig != NULL); + + if ((psSig->ui32Mask & destMask) == 0) + continue; // Skip dummy writes (vec2 texcoords get filled to vec4 with zeroes etc) + + while ((psSig->ui32Mask & (1 << rebase)) == 0) + rebase++; + + ((Declaration *)psDecl)->asOperands[0].ui32RegisterNumber = startReg + i; + + if (isInput) + { + realName = psContext->GetDeclaredInputName(&psDecl->asOperands[0], &dummy, 1, NULL); + + psContext->AddIndentation(); + + bformata(glsl, "phase%d_Input%d_%d[%d]", psContext->currentPhase, regSpace, startReg, i); + + if (destMask != OPERAND_4_COMPONENT_MASK_ALL) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k]); + } + } + } + bcatcstr(glsl, " = "); + bcatcstr(glsl, realName.c_str()); + if (destMask != OPERAND_4_COMPONENT_MASK_ALL && destMask != psSig->ui32Mask) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k - rebase]); + } + } + } + } + else + { + realName = psContext->GetDeclaredOutputName(&psDecl->asOperands[0], &dummy, NULL, NULL, 1); + + psContext->AddIndentation(); + bcatcstr(glsl, realName.c_str()); + if (destMask != OPERAND_4_COMPONENT_MASK_ALL && destMask != psSig->ui32Mask) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k - rebase]); + } + } + } + + bformata(glsl, " = phase%d_Output%d_%d[%d]", psContext->currentPhase, regSpace, startReg, i); + + if (destMask != OPERAND_4_COMPONENT_MASK_ALL) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k]); + } + } + } + } + + bcatcstr(glsl, ";\n"); + } + + ((Declaration *)psDecl)->asOperands[0].ui32RegisterNumber = startReg; + psContext->currentGLSLString = oldString; + glsl = *psContext->currentGLSLString; + + for (i = 0; i < psDecl->value.ui32IndexRange; i++) + { + if (regSpace == 0) + { + if (isInput) + psShader->sInfo.GetInputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber + i, + psDecl->asOperands[0].ui32CompMask, + &psSignature); + else + psShader->sInfo.GetOutputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber + i, + psDecl->asOperands[0].ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + } + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber + i, psDecl->asOperands[0].ui32CompMask, &psSignature); + + ASSERT(psSignature != NULL); + + ((ShaderInfo::InOutSignature *)psSignature)->isIndexed.insert(psContext->currentPhase); + ((ShaderInfo::InOutSignature *)psSignature)->indexStart[psContext->currentPhase] = startReg; + ((ShaderInfo::InOutSignature *)psSignature)->index[psContext->currentPhase] = i; + } + + + break; + } + default: + // TODO Input index ranges. + ASSERT(0); + } + break; + } + + case OPCODE_HS_DECLS: + { + // Not supported + break; + } + case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: + { + // Not supported + break; + } + case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: + { + // Not supported + break; + } + case OPCODE_HS_FORK_PHASE: + { + // Not supported + break; + } + case OPCODE_HS_JOIN_PHASE: + { + // Not supported + break; + } + case OPCODE_DCL_SAMPLER: + { + // Find out if the sampler is good for a builtin + std::string name = TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY); + std::transform(name.begin(), name.end(), name.begin(), ::tolower); + bool linear = (name.find("linear") != std::string::npos); + bool point = (name.find("point") != std::string::npos); + bool clamp = (name.find("clamp") != std::string::npos); + bool repeat = (name.find("repeat") != std::string::npos); + + // Declare only builtin samplers here. Default samplers are declared together with the texture. + if ((linear != point) && (clamp != repeat)) + { + std::ostringstream oss; + oss << "constexpr sampler " << TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) << "("; + oss << (linear ? "filter::linear, " : "filter::nearest, "); + oss << (clamp ? "address::clamp_to_edge" : "address::repeat"); + oss << ")"; + + bformata(psContext->psShader->asPhases[psContext->currentPhase].earlyMain, "\t%s;\n", oss.str().c_str()); + } + break; + } + case OPCODE_DCL_HS_MAX_TESSFACTOR: + { + // Not supported + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: + { + std::string samplerTypeName = TranslateResourceDeclaration(psContext, + psDecl, false, true); + std::string texName = ResourceName(RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber); + uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::UAV); std::ostringstream oss; + oss << samplerTypeName << " " << texName + << " [[ texture (" << slot << ") ]] "; + + m_StructDefinitions[""].m_Members.push_back(oss.str()); + psContext->m_Reflection.OnTextureBinding(texName, slot, TD_2D, true); // TODO: translate psDecl->value.eResourceDimension into HLSLCC_TEX_DIMENSION + + break; + } + + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: + { + if (psDecl->sUAV.bCounter) + { + std::ostringstream oss; + std::string bufName = ResourceName(RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber); + + // Some GPUs don't allow memory access below buffer binding offset in the shader so always bind compute buffer + // at offset 0 instead of GetDataOffset() to access counter value and translate the buffer pointer in the shader. + oss << "device atomic_uint *" << bufName << "_counter = reinterpret_cast (" << bufName << ");"; + oss << "\n " << bufName << " = reinterpret_cast (reinterpret_cast (" << bufName << ") + 1)"; + bformata(psContext->psShader->asPhases[psContext->currentPhase].earlyMain, " %s;\n", oss.str().c_str()); + } + + DeclareBufferVariable(psDecl, 0, 1); + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: + { + if (psDecl->sUAV.bCounter) + { + std::ostringstream oss; + std::string bufName = ResourceName(RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber); + oss << "device atomic_uint *" << bufName << "_counter = reinterpret_cast (" << bufName << ") - 1"; + bformata(psContext->psShader->asPhases[psContext->currentPhase].earlyMain, "\t%s;\n", oss.str().c_str()); } + + DeclareBufferVariable(psDecl, 1, 1); + + break; + } + case OPCODE_DCL_RESOURCE_STRUCTURED: + { + DeclareBufferVariable(psDecl, 0, 0); + break; + } + case OPCODE_DCL_RESOURCE_RAW: + { + DeclareBufferVariable(psDecl, 1, 0); + break; + } + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: + { + ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; + std::ostringstream oss; + oss << "uint value[" << psDecl->sTGSM.ui32Stride / 4 << "]"; + m_StructDefinitions[TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + "_Type"].m_Members.push_back(oss.str()); + m_StructDefinitions[""].m_Dependencies.push_back(TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + "_Type"); + oss.str(""); + oss << "threadgroup " << TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + << "_Type " << TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + << "[" << psDecl->sTGSM.ui32Count << "]"; + + bformata(psContext->psShader->asPhases[psContext->currentPhase].earlyMain, "\t%s;\n", oss.str().c_str()); + + psVarType->name = "$Element"; + + psVarType->Columns = psDecl->sTGSM.ui32Stride / 4; + psVarType->Elements = psDecl->sTGSM.ui32Count; + break; + } + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: + { + ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; + + std::ostringstream oss; + oss << "threadgroup uint " << TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + << "[" << (psDecl->sTGSM.ui32Count / psDecl->sTGSM.ui32Stride) << "]"; + + bformata(psContext->psShader->asPhases[psContext->currentPhase].earlyMain, "\t%s;\n", oss.str().c_str()); + + psVarType->name = "$Element"; + + psVarType->Columns = 1; + psVarType->Elements = psDecl->sTGSM.ui32Count / psDecl->sTGSM.ui32Stride; + break; + } + + case OPCODE_DCL_STREAM: + { + // Not supported on Metal + break; + } + case OPCODE_DCL_GS_INSTANCE_COUNT: + { + // Not supported on Metal + break; + } + + default: + ASSERT(0); + break; + } +} + +std::string ToMetal::ResourceName(ResourceGroup group, const uint32_t ui32RegisterNumber) +{ + const ResourceBinding* psBinding = 0; + std::ostringstream oss; + int found; + + found = psContext->psShader->sInfo.GetResourceFromBindingPoint(group, ui32RegisterNumber, &psBinding); + + if (found) + { + size_t i = 0; + std::string name = psBinding->name; + uint32_t ui32ArrayOffset = ui32RegisterNumber - psBinding->ui32BindPoint; + + while (i < name.length()) + { + //array syntax [X] becomes _0_ + //Otherwise declarations could end up as: + //uniform sampler2D SomeTextures[0]; + //uniform sampler2D SomeTextures[1]; + if (name[i] == '[' || name[i] == ']') + name[i] = '_'; + + ++i; + } + + if (ui32ArrayOffset) + { + oss << name << ui32ArrayOffset; + return oss.str(); + } + else + { + return name; + } + } + else + { + oss << "UnknownResource" << ui32RegisterNumber; + return oss.str(); + } +} + +void ToMetal::TranslateResourceTexture(const Declaration* psDecl, uint32_t samplerCanDoShadowCmp, HLSLCC_TEX_DIMENSION texDim) +{ + + std::string samplerTypeName = TranslateResourceDeclaration(psContext, + psDecl, (samplerCanDoShadowCmp && psDecl->ui32IsShadowTex), false); + + uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::Texture); + std::string texName = ResourceName(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber); + std::ostringstream oss; + oss << samplerTypeName << " " << texName + << " [[ texture (" << slot << ") ]] "; + + m_StructDefinitions[""].m_Members.push_back(oss.str()); + psContext->m_Reflection.OnTextureBinding(texName, slot, texDim, false); + oss.str(""); + // the default sampler for a texture is named after the texture with a "sampler" prefix + oss << "sampler sampler" << texName + << " [[ sampler (" << slot << ") ]] "; + m_StructDefinitions[""].m_Members.push_back(oss.str()); + + if (samplerCanDoShadowCmp && psDecl->ui32IsShadowTex) + EnsureShadowSamplerDeclared(); + +} + +void ToMetal::DeclareResource(const Declaration *psDecl) +{ + switch (psDecl->value.eResourceDimension) + { + case RESOURCE_DIMENSION_BUFFER: + { + uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::Texture); + std::string texName = TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY); + std::ostringstream oss; + oss << "device " << TranslateResourceDeclaration(psContext, + psDecl, false, false); + + oss << texName << " [[ texture(" << slot << ") ]]"; + + m_StructDefinitions[""].m_Members.push_back(oss.str()); + psContext->m_Reflection.OnTextureBinding(texName, slot, TD_2D, false); //TODO: correct HLSLCC_TEX_DIMENSION? + break; + + } + default: + ASSERT(0); + break; + + case RESOURCE_DIMENSION_TEXTURE1D: + { + TranslateResourceTexture(psDecl, 1, TD_2D); //TODO: correct HLSLCC_TEX_DIMENSION? + break; + } + case RESOURCE_DIMENSION_TEXTURE2D: + { + TranslateResourceTexture(psDecl, 1, TD_2D); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DMS: + { + TranslateResourceTexture(psDecl, 0, TD_2D); + break; + } + case RESOURCE_DIMENSION_TEXTURE3D: + { + TranslateResourceTexture(psDecl, 0, TD_3D); + break; + } + case RESOURCE_DIMENSION_TEXTURECUBE: + { + TranslateResourceTexture(psDecl, 1, TD_CUBE); + break; + } + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + TranslateResourceTexture(psDecl, 1, TD_2DARRAY); //TODO: correct HLSLCC_TEX_DIMENSION? + break; + } + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + TranslateResourceTexture(psDecl, 1, TD_2DARRAY); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + TranslateResourceTexture(psDecl, 0, TD_2DARRAY); + break; + } + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + TranslateResourceTexture(psDecl, 1, TD_CUBEARRAY); + break; + } + } + psContext->psShader->aeResourceDims[psDecl->asOperands[0].ui32RegisterNumber] = psDecl->value.eResourceDimension; + + +} + +void ToMetal::DeclareOutput(const Declaration *psDecl) +{ + bstring glsl = *psContext->currentGLSLString; + Shader* psShader = psContext->psShader; + + if (!psContext->OutputNeedsDeclaring(&psDecl->asOperands[0], 1)) + return; + + const Operand* psOperand = &psDecl->asOperands[0]; + int iNumComponents; + int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; + + const ShaderInfo::InOutSignature* psSignature = NULL; + SHADER_VARIABLE_TYPE cType = SVT_VOID; + + if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH || + psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL || + psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL) + { + iNumComponents = 1; + cType = SVT_FLOAT; + } + else + { + if (regSpace == 0) + psShader->sInfo.GetOutputSignatureFromRegister( + ui32Reg, + psDecl->asOperands[0].ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Reg, psDecl->asOperands[0].ui32CompMask, &psSignature); + + iNumComponents = HLSLcc::GetNumberBitsSet(psSignature->ui32Mask); + + switch (psSignature->eComponentType) + { + case INOUT_COMPONENT_UINT32: + { + cType = SVT_UINT; + break; + } + case INOUT_COMPONENT_SINT32: + { + cType = SVT_INT; + break; + } + case INOUT_COMPONENT_FLOAT32: + { + cType = SVT_FLOAT; + break; + } + default: + ASSERT(0); + break; + } + // Don't set this for oDepth (or variants), because depth output register is in separate space from other outputs (regno 0, but others may overlap with that) + if (iNumComponents == 1) + psContext->psShader->abScalarOutput[regSpace][ui32Reg] |= (int)psDecl->asOperands[0].ui32CompMask; + + switch (psOperand->eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + case OPERAND_MIN_PRECISION_FLOAT_16: + cType = SVT_FLOAT16; + break; + case OPERAND_MIN_PRECISION_FLOAT_2_8: + cType = SVT_FLOAT10; + break; + case OPERAND_MIN_PRECISION_SINT_16: + cType = SVT_INT16; + break; + case OPERAND_MIN_PRECISION_UINT_16: + cType = SVT_UINT16; + break; + } + } + + std::string type = HLSLcc::GetConstructorForTypeMetal(cType, iNumComponents); + std::string name = psContext->GetDeclaredOutputName(&psDecl->asOperands[0], nullptr, nullptr, nullptr, 1); + + switch (psShader->eShaderType) + { + case PIXEL_SHADER: + { + switch (psDecl->asOperands[0].eType) + { + case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + { + std::ostringstream oss; + oss << type << " " << name << " [[ sample_mask ]]"; + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(oss.str()); + break; + } + case OPERAND_TYPE_OUTPUT_DEPTH: + { + std::ostringstream oss; + oss << type << " " << name << " [[ depth(any) ]]"; + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(oss.str()); + break; + } + case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: + { + std::ostringstream oss; + oss << type << " " << name << " [[ depth(greater) ]]"; + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(oss.str()); + break; + } + case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: + { + std::ostringstream oss; + oss << type << " " << name << " [[ depth(less) ]]"; + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(oss.str()); + break; + } + default: + { + std::ostringstream oss; + oss << type << " " << name << " [[ color(" << psSignature->ui32SemanticIndex << ") ]]"; + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(oss.str()); + } + } + break; + } + case VERTEX_SHADER: + { + std::ostringstream oss; + oss << type << " " << name; + if (psSignature->eSystemValueType == NAME_POSITION || (psSignature->semanticName == "POS" && psOperand->ui32RegisterNumber == 0 )) + oss << " [[ position ]]"; + else if (psSignature->eSystemValueType == NAME_UNDEFINED && psSignature->semanticName == "PSIZE" && psSignature->ui32SemanticIndex == 0 ) + oss << " [[ point_size ]]"; + else + oss << " [[ user(" << name << ") ]]"; + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(oss.str()); + break; + } + case GEOMETRY_SHADER: + case DOMAIN_SHADER: + case HULL_SHADER: + default: + ASSERT(0); + break; + + } + HandleOutputRedirect(psDecl, HLSLcc::GetConstructorForTypeMetal(cType, 4)); + + +} + +void ToMetal::EnsureShadowSamplerDeclared() +{ + if (m_ShadowSamplerDeclared) + return; + + if((psContext->flags & HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR) != 0) + m_ExtraGlobalDefinitions += "constexpr sampler _mtl_xl_shadow_sampler(address::clamp_to_edge, filter::linear, compare_func::greater_equal);\n"; + else + m_ExtraGlobalDefinitions += "constexpr sampler _mtl_xl_shadow_sampler(address::clamp_to_edge, filter::nearest, compare_func::greater_equal);\n"; + m_ShadowSamplerDeclared = true; +} diff --git a/src/toMetalInstruction.cpp b/src/toMetalInstruction.cpp new file mode 100644 index 0000000..f1cdf1e --- /dev/null +++ b/src/toMetalInstruction.cpp @@ -0,0 +1,3731 @@ +#include "internal_includes/toMetal.h" +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/languages.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "bstrlib.h" +#include "stdio.h" +#include +#include +#include "internal_includes/debug.h" +#include "internal_includes/Shader.h" +#include "internal_includes/Instruction.h" +#include "hlslcc.h" + +using namespace HLSLcc; + +bstring operator << (bstring a, const std::string &b) +{ + bcatcstr(a, b.c_str()); + return a; +} + +// This function prints out the destination name, possible destination writemask, assignment operator +// and any possible conversions needed based on the eSrcType+ui32SrcElementCount (type and size of data expected to be coming in) +// As an output, pNeedsParenthesis will be filled with the amount of closing parenthesis needed +// and pSrcCount will be filled with the number of components expected +// ui32CompMask can be used to only write to 1 or more components (used by MOVC) +void ToMetal::AddOpAssignToDestWithMask(const Operand* psDest, + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask) +{ + uint32_t ui32DestElementCount = psDest->GetNumSwizzleElements(ui32CompMask); + bstring glsl = *psContext->currentGLSLString; + SHADER_VARIABLE_TYPE eDestDataType = psDest->GetDataType(psContext); + ASSERT(pNeedsParenthesis != NULL); + + *pNeedsParenthesis = 0; + + glsl << TranslateOperand(psDest, TO_FLAG_DESTINATION, ui32CompMask); + + // Simple path: types match. + if (eDestDataType == eSrcType) + { + // Cover cases where the HLSL language expects the rest of the components to be default-filled + // eg. MOV r0, c0.x => Temp[0] = vec4(c0.x); + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeMetal(eDestDataType, ui32DestElementCount)); + *pNeedsParenthesis = 1; + } + else + bformata(glsl, " %s ", szAssignmentOp); + return; + } + // Up/downscaling with cast. The monster of condition there checks if the underlying datatypes are the same, just with prec differences + if (((eDestDataType == SVT_FLOAT || eDestDataType == SVT_FLOAT16 || eDestDataType == SVT_FLOAT10) && (eSrcType == SVT_FLOAT || eSrcType == SVT_FLOAT16 || eSrcType == SVT_FLOAT10)) + || ((eDestDataType == SVT_INT || eDestDataType == SVT_INT16 || eDestDataType == SVT_INT12) && (eSrcType == SVT_INT || eSrcType == SVT_INT16 || eSrcType == SVT_INT12)) + || ((eDestDataType == SVT_UINT || eDestDataType == SVT_UINT16) && (eSrcType == SVT_UINT || eSrcType == SVT_UINT16))) + { + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeMetal(eDestDataType, ui32DestElementCount)); + *pNeedsParenthesis = 1; + return; + } + + switch (eDestDataType) + { + case SVT_INT: + case SVT_INT12: + case SVT_INT16: + // Bitcasts from lower precisions are ambiguous + ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); + if (eSrcType == SVT_FLOAT) + { + if(ui32DestElementCount > 1) + bformata(glsl, " %s as_type(", szAssignmentOp, ui32DestElementCount); + else + bformata(glsl, " %s as_type(", szAssignmentOp); + + // Cover cases where the HLSL language expects the rest of the components to be default-filled + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, "%s(", GetConstructorForTypeMetal(eSrcType, ui32DestElementCount)); + (*pNeedsParenthesis)++; + } + } + else + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeMetal(eDestDataType, ui32DestElementCount)); + + (*pNeedsParenthesis)++; + break; + case SVT_UINT: + case SVT_UINT16: + ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); + if (eSrcType == SVT_FLOAT) + { + if (ui32DestElementCount > 1) + bformata(glsl, " %s as_type(", szAssignmentOp, ui32DestElementCount); + else + bformata(glsl, " %s as_type(", szAssignmentOp); + // Cover cases where the HLSL language expects the rest of the components to be default-filled + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, "%s(", GetConstructorForTypeMetal(eSrcType, ui32DestElementCount)); + (*pNeedsParenthesis)++; + } + } + else + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeMetal(eDestDataType, ui32DestElementCount)); + + (*pNeedsParenthesis)++; + break; + + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + ASSERT(eSrcType != SVT_INT12 || eSrcType != SVT_INT16 && eSrcType != SVT_UINT16); + if (psContext->psShader->ui32MajorVersion > 3) + { + if (ui32DestElementCount > 1) + bformata(glsl, " %s as_type(", szAssignmentOp, ui32DestElementCount); + else + bformata(glsl, " %s as_type(", szAssignmentOp); + // Cover cases where the HLSL language expects the rest of the components to be default-filled + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, "%s(", GetConstructorForTypeMetal(eSrcType, ui32DestElementCount)); + (*pNeedsParenthesis)++; + } + } + else + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeMetal(eDestDataType, ui32DestElementCount)); + + (*pNeedsParenthesis)++; + break; + default: + // TODO: Handle bools? + ASSERT(0); + break; + } + return; +} + +void ToMetal::AddAssignToDest(const Operand* psDest, + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis) +{ + AddOpAssignToDestWithMask(psDest, eSrcType, ui32SrcElementCount, "=", pNeedsParenthesis, OPERAND_4_COMPONENT_MASK_ALL); +} + +void ToMetal::AddAssignPrologue(int numParenthesis) +{ + bstring glsl = *psContext->currentGLSLString; + while (numParenthesis != 0) + { + bcatcstr(glsl, ")"); + numParenthesis--; + } + bcatcstr(glsl, ";\n"); + +} + +void ToMetal::AddComparison(Instruction* psInst, ComparisonType eType, + uint32_t typeFlag) +{ + // Multiple cases to consider here: + // OPCODE_LT, _GT, _NE etc: inputs are floats, outputs UINT 0xffffffff or 0. typeflag: TO_FLAG_NONE + // OPCODE_ILT, _IGT etc: comparisons are signed ints, outputs UINT 0xffffffff or 0 typeflag TO_FLAG_INTEGER + // _ULT, UGT etc: inputs unsigned ints, outputs UINTs typeflag TO_FLAG_UNSIGNED_INTEGER + // + + + bstring glsl = *psContext->currentGLSLString; + const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); + const uint32_t s0ElemCount = psInst->asOperands[1].GetNumSwizzleElements(); + const uint32_t s1ElemCount = psInst->asOperands[2].GetNumSwizzleElements(); + int isBoolDest = psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL; + const uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + + int needsParenthesis = 0; + if (typeFlag == TO_FLAG_NONE + && psInst->asOperands[1].GetDataType(psContext) == SVT_FLOAT16 + && psInst->asOperands[1].GetDataType(psContext) == SVT_FLOAT16) + typeFlag = TO_FLAG_FORCE_HALF; + ASSERT(s0ElemCount == s1ElemCount || s1ElemCount == 1 || s0ElemCount == 1); + if ((s0ElemCount != s1ElemCount) && (destElemCount > 1)) + { + // Set the proper auto-expand flag is either argument is scalar + typeFlag |= (TO_AUTO_EXPAND_TO_VEC2 << (std::min(std::max(s0ElemCount, s1ElemCount), destElemCount) - 2)); + } + if (destElemCount > 1) + { + const char* glslOpcode[] = { + "==", + "<", + ">=", + "!=", + }; + psContext->AddIndentation(); + if (isBoolDest) + { + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); + bcatcstr(glsl, " = "); + } + else + { + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, destElemCount, &needsParenthesis); + + bcatcstr(glsl, GetConstructorForTypeMetal(SVT_UINT, destElemCount)); + bcatcstr(glsl, "("); + } + bcatcstr(glsl, "("); + glsl << TranslateOperand(&psInst->asOperands[1], typeFlag, destMask); + bformata(glsl, "%s", glslOpcode[eType]); + glsl << TranslateOperand(&psInst->asOperands[2], typeFlag, destMask); + bcatcstr(glsl, ")"); + if (!isBoolDest) + { + bcatcstr(glsl, ")"); + bcatcstr(glsl, " * 0xFFFFFFFFu"); + } + + AddAssignPrologue(needsParenthesis); + } + else + { + const char* glslOpcode[] = { + "==", + "<", + ">=", + "!=", + }; + + //Scalar compare + + psContext->AddIndentation(); + if (isBoolDest) + { + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); + bcatcstr(glsl, " = "); + } + else + { + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, destElemCount, &needsParenthesis); + bcatcstr(glsl, "("); + } + glsl << TranslateOperand(&psInst->asOperands[1], typeFlag, destMask); + bformata(glsl, "%s", glslOpcode[eType]); + glsl << TranslateOperand(&psInst->asOperands[2], typeFlag, destMask); + if (!isBoolDest) + { + bcatcstr(glsl, ") ? 0xFFFFFFFFu : 0u"); + } + AddAssignPrologue(needsParenthesis); + } +} + + +void ToMetal::AddMOVBinaryOp(const Operand *pDest, Operand *pSrc) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + int srcSwizzleCount = pSrc->GetNumSwizzleElements(); + uint32_t writeMask = pDest->GetAccessMask(); + + const SHADER_VARIABLE_TYPE eSrcType = pSrc->GetDataType(psContext, pDest->GetDataType(psContext)); + uint32_t flags = SVTTypeToFlag(eSrcType); + + AddAssignToDest(pDest, eSrcType, srcSwizzleCount, &numParenthesis); + glsl << TranslateOperand(pSrc, flags, writeMask); + + AddAssignPrologue(numParenthesis); +} + +void ToMetal::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2) +{ + bstring glsl = *psContext->currentGLSLString; + uint32_t destElemCount = pDest->GetNumSwizzleElements(); + uint32_t s0ElemCount = src0->GetNumSwizzleElements(); + uint32_t s1ElemCount = src1->GetNumSwizzleElements(); + uint32_t s2ElemCount = src2->GetNumSwizzleElements(); + uint32_t destWriteMask = pDest->GetAccessMask(); + uint32_t destElem; + + const SHADER_VARIABLE_TYPE eDestType = pDest->GetDataType(psContext); + /* + for each component in dest[.mask] + if the corresponding component in src0 (POS-swizzle) + has any bit set + { + copy this component (POS-swizzle) from src1 into dest + } + else + { + copy this component (POS-swizzle) from src2 into dest + } + endfor + */ + + /* Single-component conditional variable (src0) */ + if (s0ElemCount == 1 || src0->IsSwizzleReplicated()) + { + int numParenthesis = 0; + SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); + psContext->AddIndentation(); + AddAssignToDest(pDest, eDestType, destElemCount, &numParenthesis); + bcatcstr(glsl, "("); + if (s0Type == SVT_UINT || s0Type == SVT_UINT16) + glsl << TranslateOperand(src0, TO_AUTO_BITCAST_TO_UINT, OPERAND_4_COMPONENT_MASK_X); + else if (s0Type == SVT_BOOL) + glsl << TranslateOperand(src0, TO_FLAG_BOOL, OPERAND_4_COMPONENT_MASK_X); + else + glsl << TranslateOperand(src0, TO_AUTO_BITCAST_TO_INT, OPERAND_4_COMPONENT_MASK_X); + + if (psContext->psShader->ui32MajorVersion < 4) + { + //cmp opcode uses >= 0 + bcatcstr(glsl, " >= 0) ? "); + } + else + { + if (s0Type == SVT_UINT || s0Type == SVT_UINT16) + bcatcstr(glsl, " != 0u) ? "); + else if (s0Type == SVT_BOOL) + bcatcstr(glsl, ") ? "); + else + bcatcstr(glsl, " != 0) ? "); + } + + if (s1ElemCount == 1 && destElemCount > 1) + glsl << TranslateOperand(src1, SVTTypeToFlag(eDestType) | ElemCountToAutoExpandFlag(destElemCount)); + else + glsl << TranslateOperand(src1, SVTTypeToFlag(eDestType), destWriteMask); + + bcatcstr(glsl, " : "); + if (s2ElemCount == 1 && destElemCount > 1) + glsl << TranslateOperand(src2, SVTTypeToFlag(eDestType) | ElemCountToAutoExpandFlag(destElemCount)); + else + glsl << TranslateOperand(src2, SVTTypeToFlag(eDestType), destWriteMask); + + AddAssignPrologue(numParenthesis); + } + else + { + // TODO: We can actually do this in one op using mix(). + int srcElem = -1; + SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); + for (destElem = 0; destElem < 4; ++destElem) + { + int numParenthesis = 0; + srcElem++; + if (pDest->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && pDest->ui32CompMask != 0 && !(pDest->ui32CompMask & (1 << destElem))) + continue; + + psContext->AddIndentation(); + AddOpAssignToDestWithMask(pDest, eDestType, 1, "=", &numParenthesis, 1 << destElem); + bcatcstr(glsl, "("); + if (s0Type == SVT_BOOL) + { + glsl << TranslateOperand(src0, TO_FLAG_BOOL, 1 << srcElem); + bcatcstr(glsl, ") ? "); + } + else + { + glsl << TranslateOperand(src0, TO_AUTO_BITCAST_TO_INT, 1 << srcElem); + + if (psContext->psShader->ui32MajorVersion < 4) + { + //cmp opcode uses >= 0 + bcatcstr(glsl, " >= 0) ? "); + } + else + { + bcatcstr(glsl, " != 0) ? "); + } + } + + glsl << TranslateOperand(src1, SVTTypeToFlag(eDestType), 1 << srcElem); + bcatcstr(glsl, " : "); + glsl << TranslateOperand(src2, SVTTypeToFlag(eDestType), 1 << srcElem); + + AddAssignPrologue(numParenthesis); + } + } +} + +void ToMetal::CallBinaryOp(const char* name, Instruction* psInst, + int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType) +{ + uint32_t ui32Flags = SVTTypeToFlag(eDataType); + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = psInst->asOperands[dest].GetAccessMask(); + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int needsParenthesis = 0; + + if (eDataType == SVT_FLOAT + && psInst->asOperands[dest].GetDataType(psContext) == SVT_FLOAT16 + && psInst->asOperands[src0].GetDataType(psContext) == SVT_FLOAT16 + && psInst->asOperands[src1].GetDataType(psContext) == SVT_FLOAT16) + ui32Flags = TO_FLAG_FORCE_HALF; + + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + if (src1SwizCount != src0SwizCount) + { + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], eDataType, dstSwizCount, &needsParenthesis); + +/* bool s0NeedsUpscaling = false, s1NeedsUpscaling = false; + SHADER_VARIABLE_TYPE s0Type = psInst->asOperands[src0].GetDataType(psContext); + SHADER_VARIABLE_TYPE s1Type = psInst->asOperands[src1].GetDataType(psContext); + + if((s0Type == SVT_FLOAT10 || s0Type == SVT_FLOAT16) && (s1Type != s) + */ + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bformata(glsl, " %s ", name); + glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + + AddAssignPrologue(needsParenthesis); +} + + + +void ToMetal::CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, + int dest, int src0, int src1, int src2, uint32_t dataType) +{ + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = psInst->asOperands[dest].GetAccessMask(); + uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + + uint32_t ui32Flags = dataType; + int numParenthesis = 0; + + if (dataType == TO_FLAG_NONE + && psInst->asOperands[dest].GetDataType(psContext) == SVT_FLOAT16 + && psInst->asOperands[src0].GetDataType(psContext) == SVT_FLOAT16 + && psInst->asOperands[src1].GetDataType(psContext) == SVT_FLOAT16 + && psInst->asOperands[src2].GetDataType(psContext) == SVT_FLOAT16) + ui32Flags = TO_FLAG_FORCE_HALF; + + if (src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) + { + uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], TypeFlagsToSVTType(dataType), dstSwizCount, &numParenthesis); + + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bformata(glsl, " %s ", op1); + glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + bformata(glsl, " %s ", op2); + glsl << TranslateOperand(&psInst->asOperands[src2], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +void ToMetal::CallHelper3(const char* name, Instruction* psInst, + int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int numParenthesis = 0; + + if (psInst->asOperands[dest].GetDataType(psContext) == SVT_FLOAT16 + && psInst->asOperands[src0].GetDataType(psContext) == SVT_FLOAT16 + && psInst->asOperands[src1].GetDataType(psContext) == SVT_FLOAT16 + && psInst->asOperands[src2].GetDataType(psContext) == SVT_FLOAT16) + ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; + + if ((src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, dstSwizCount, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[src2], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +void ToMetal::CallHelper2(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + + int isDotProduct = (strncmp(name, "dot", 3) == 0) ? 1 : 0; + int numParenthesis = 0; + + if (psInst->asOperands[dest].GetDataType(psContext) == SVT_FLOAT16 + && psInst->asOperands[src0].GetDataType(psContext) == SVT_FLOAT16 + && psInst->asOperands[src1].GetDataType(psContext) == SVT_FLOAT16) + ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; + + + if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, isDotProduct ? 1 : dstSwizCount, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + + AddAssignPrologue(numParenthesis); +} + +void ToMetal::CallHelper2Int(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_INT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int numParenthesis = 0; + + if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +void ToMetal::CallHelper2UInt(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_UINT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int numParenthesis = 0; + + if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], SVT_UINT, dstSwizCount, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +void ToMetal::CallHelper1(const char* name, Instruction* psInst, + int dest, int src0, int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; + bstring glsl = *psContext->currentGLSLString; + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + int numParenthesis = 0; + + psContext->AddIndentation(); + if (psInst->asOperands[dest].GetDataType(psContext) == SVT_FLOAT16 + && psInst->asOperands[src0].GetDataType(psContext) == SVT_FLOAT16) + ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; + + AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, dstSwizCount, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +//Result is an int. +void ToMetal::CallHelper1Int( + const char* name, + Instruction* psInst, + const int dest, + const int src0, + int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_INT; + bstring glsl = *psContext->currentGLSLString; + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + int numParenthesis = 0; + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +void ToMetal::TranslateTexelFetch( + Instruction* psInst, + const ResourceBinding* psBinding, + bstring glsl) +{ + int numParenthesis = 0; + uint32_t destCount = psInst->asOperands[0].GetNumSwizzleElements(); + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], psContext->psShader->sInfo.GetTextureDataType(psInst->asOperands[2].ui32RegisterNumber), 4, &numParenthesis); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ".read("); + + switch (psBinding->eDimension) + { + case REFLECT_RESOURCE_DIMENSION_BUFFER: + { + psContext->m_Reflection.OnDiagnostics("Buffer resources not supported in Metal (in texel fetch)", 0, true); + return; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level + break; + } + + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_Z); // Array index + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[3], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); // Sample index + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + case REFLECT_RESOURCE_DIMENSION_BUFFEREX: + default: + { + // Shouldn't happen. Cubemap reads are not supported in HLSL + ASSERT(0); + break; + } + } + bcatcstr(glsl, ")"); + + AddSwizzleUsingElementCount(glsl, destCount); + AddAssignPrologue(numParenthesis); +} + +void ToMetal::TranslateTexelFetchOffset( + Instruction* psInst, + const ResourceBinding* psBinding, + bstring glsl) +{ + int numParenthesis = 0; + uint32_t destCount = psInst->asOperands[0].GetNumSwizzleElements(); + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], psContext->psShader->sInfo.GetTextureDataType(psInst->asOperands[2].ui32RegisterNumber), 4, &numParenthesis); + + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ".read("); + + switch (psBinding->eDimension) + { + case REFLECT_RESOURCE_DIMENSION_BUFFER: + { + psContext->m_Reflection.OnDiagnostics("Buffer resources not supported in Metal (in texel fetch)", 0, true); + return; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + psContext->m_Reflection.OnDiagnostics("Multisampled texture arrays not supported in Metal (in texel fetch)", 0, true); + return; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bformata(glsl, " + %d", psInst->iUAddrOffset); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bformata(glsl, " + %d, ", psInst->iUAddrOffset); + + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_Y); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bformata(glsl, "+ ivec2(%d, %d), ", psInst->iUAddrOffset, psInst->iVAddrOffset); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bformata(glsl, "+ ivec2(%d, %d), ", psInst->iUAddrOffset, psInst->iVAddrOffset); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_Z); // Array index + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bformata(glsl, "+ ivec3(%d, %d, %d), ", psInst->iUAddrOffset, psInst->iVAddrOffset, psInst->iWAddrOffset); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bformata(glsl, "+ ivec2(%d, %d), ", psInst->iUAddrOffset, psInst->iVAddrOffset); + glsl << TranslateOperand(&psInst->asOperands[3], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); // Sample index + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + case REFLECT_RESOURCE_DIMENSION_BUFFEREX: + default: + { + // Shouldn't happen. Cubemap reads are not supported in HLSL + ASSERT(0); + break; + } + } + bcatcstr(glsl, ")"); + + AddSwizzleUsingElementCount(glsl, destCount); + AddAssignPrologue(numParenthesis); +} + + +//Makes sure the texture coordinate swizzle is appropriate for the texture type. +//i.e. vecX for X-dimension texture. +//Currently supports floating point coord only, so not used for texelFetch. +void ToMetal::TranslateTexCoord( + const RESOURCE_DIMENSION eResDim, + Operand* psTexCoordOperand) +{ + uint32_t flags = TO_AUTO_BITCAST_TO_FLOAT; + uint32_t opMask = OPERAND_4_COMPONENT_MASK_ALL; + + switch (eResDim) + { + case RESOURCE_DIMENSION_TEXTURE1D: + { + //Vec1 texcoord. Mask out the other components. + opMask = OPERAND_4_COMPONENT_MASK_X; + break; + } + case RESOURCE_DIMENSION_TEXTURE2D: + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + //Vec2 texcoord. Mask out the other components. + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; + flags |= TO_AUTO_EXPAND_TO_VEC2; + break; + } + case RESOURCE_DIMENSION_TEXTURECUBE: + case RESOURCE_DIMENSION_TEXTURE3D: + { + //Vec3 texcoord. Mask out the other components. + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; + flags |= TO_AUTO_EXPAND_TO_VEC3; + break; + } + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; + flags |= TO_AUTO_EXPAND_TO_VEC2; + + bstring glsl = *psContext->currentGLSLString; + glsl << TranslateOperand(psTexCoordOperand, flags, opMask); + + bcatcstr(glsl, ", "); + + opMask = OPERAND_4_COMPONENT_MASK_Z; + flags = TO_AUTO_BITCAST_TO_FLOAT; + + break; + } + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + flags |= TO_AUTO_EXPAND_TO_VEC4; + break; + } + default: + { + ASSERT(0); + break; + } + } + + //FIXME detect when integer coords are needed. + bstring glsl = *psContext->currentGLSLString; + glsl << TranslateOperand(psTexCoordOperand, flags, opMask); +} + +void ToMetal::GetResInfoData(Instruction* psInst, int index, int destElem) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; + const int isUAV = (psInst->asOperands[2].eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW); + + psContext->AddIndentation(); + AddOpAssignToDestWithMask(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, "=", &numParenthesis, 1 << destElem); + + const char *metalGetters[] = { ".get_width()", ".get_height()", ".get_depth()", ".get_num_mip_levels()" }; + int dim = GetNumTextureDimensions(psInst->eResDim); + if (dim < (index + 1) && index != 3) + { + bcatcstr(glsl, eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? "0u" : "0.0"); + } + else + { + if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_FLOAT) + { + bcatcstr(glsl, "float("); + numParenthesis++; + } + else if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_RCPFLOAT) + { + bcatcstr(glsl, "1.0f / float("); + numParenthesis++; + } + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + if (index == 2 && + (psInst->eResDim == RESOURCE_DIMENSION_TEXTURE1DARRAY || + psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DARRAY || + psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DMSARRAY)) + { + bcatcstr(glsl, ".get_array_size()"); + } + else + bcatcstr(glsl, metalGetters[index]); + + // TODO Metal has no way to query for info on lower mip levels, now always returns info for highest. + } + AddAssignPrologue(numParenthesis); +} + +void ToMetal::TranslateTextureSample(Instruction* psInst, + uint32_t ui32Flags) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + int hasParamOffset = (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) ? 1 : 0; + + Operand* psDest = &psInst->asOperands[0]; + Operand* psDestAddr = &psInst->asOperands[1]; + Operand* psSrcOff = (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) ? &psInst->asOperands[2] : 0; + Operand* psSrcTex = &psInst->asOperands[2 + hasParamOffset]; + Operand* psSrcSamp = &psInst->asOperands[3 + hasParamOffset]; + Operand* psSrcRef = (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) ? &psInst->asOperands[4 + hasParamOffset] : 0; + Operand* psSrcLOD = (ui32Flags & TEXSMP_FLAG_LOD) ? &psInst->asOperands[4] : 0; + Operand* psSrcDx = (ui32Flags & TEXSMP_FLAG_GRAD) ? &psInst->asOperands[4] : 0; + Operand* psSrcDy = (ui32Flags & TEXSMP_FLAG_GRAD) ? &psInst->asOperands[5] : 0; + Operand* psSrcBias = (ui32Flags & TEXSMP_FLAG_BIAS) ? &psInst->asOperands[4] : 0; + + const char *funcName = ""; + const char* offset = ""; + const char* gradSwizzle = ""; + const char *gradientName = ""; + + uint32_t ui32NumOffsets = 0; + + const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psSrcTex->ui32RegisterNumber]; + + if (ui32Flags & TEXSMP_FLAG_GATHER) + { + if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) + funcName = "gather_compare"; + else + funcName = "gather"; + } + else + { + if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) + funcName = "sample_compare"; + else + funcName = "sample"; + } + + switch (eResDim) + { + case RESOURCE_DIMENSION_TEXTURE1D: + { + gradSwizzle = ".x"; + ui32NumOffsets = 1; + break; + } + case RESOURCE_DIMENSION_TEXTURE2D: + { + gradSwizzle = ".xy"; + gradientName = "gradient2d"; + ui32NumOffsets = 2; + break; + } + case RESOURCE_DIMENSION_TEXTURECUBE: + { + gradSwizzle = ".xyz"; + ui32NumOffsets = 3; + gradientName = "gradientcube"; + break; + } + case RESOURCE_DIMENSION_TEXTURE3D: + { + gradSwizzle = ".xyz"; + ui32NumOffsets = 3; + gradientName = "gradient3d"; + break; + } + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + gradSwizzle = ".x"; + ui32NumOffsets = 1; + break; + } + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + gradSwizzle = ".xy"; + ui32NumOffsets = 2; + gradientName = "gradient2d"; + break; + } + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + gradSwizzle = ".xyz"; + ui32NumOffsets = 3; + gradientName = "gradientcube"; + break; + } + default: + { + ASSERT(0); + break; + } + } + + + SHADER_VARIABLE_TYPE dataType = psContext->psShader->sInfo.GetTextureDataType(psSrcTex->ui32RegisterNumber); + psContext->AddIndentation(); + AddAssignToDest(psDest, dataType, psSrcTex->GetNumSwizzleElements(), &numParenthesis); + + std::string texName = TranslateOperand(psSrcTex, TO_FLAG_NAME_ONLY); + + // TextureName.FuncName( + glsl << texName; + bformata(glsl, ".%s(", funcName); + + // Sampler name + //TODO: Is it ok to use fixed shadow sampler in all cases of depth compare or would we need more + // accurate way of detecting shadow cases (atm all depth compares are interpreted as shadow usage) + if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) + { + bcatcstr(glsl, "_mtl_xl_shadow_sampler"); + } + else + { + std::string sampName = TranslateOperand(psSrcSamp, TO_FLAG_NAME_ONLY); + + // insert the "sampler" prefix if the sampler name is equal to the texture name (default sampler) + if (texName == sampName) + sampName.insert(0, "sampler"); + glsl << sampName; + } + + bcatcstr(glsl, ", "); + + // Texture coordinates + TranslateTexCoord(eResDim, psDestAddr); + + // Depth compare reference value + if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) + { + bcatcstr(glsl, ", saturate("); // TODO: why the saturate here? + glsl << TranslateOperand(psSrcRef, TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, ")"); + } + + // lod_options (LOD/grad/bias) based on the flags + if (ui32Flags & TEXSMP_FLAG_LOD) + { + bcatcstr(glsl, ", level("); + glsl << TranslateOperand(psSrcLOD, TO_AUTO_BITCAST_TO_FLOAT); + if (psContext->psShader->ui32MajorVersion < 4) + { + bcatcstr(glsl, ".w"); + } + bcatcstr(glsl, ")"); + } + else if (ui32Flags & TEXSMP_FLAG_FIRSTLOD) + { + bcatcstr(glsl, ", level(0.0)"); + } + else if (ui32Flags & TEXSMP_FLAG_GRAD) + { + glsl << std::string(", ") << std::string(gradientName) << std::string("(float4("); + glsl << TranslateOperand(psSrcDx, TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, ")"); + bcatcstr(glsl, gradSwizzle); + bcatcstr(glsl, ", float4("); + glsl << TranslateOperand(psSrcDy, TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, ")"); + bcatcstr(glsl, gradSwizzle); + bcatcstr(glsl, ")"); + } + else if (ui32Flags & (TEXSMP_FLAG_BIAS)) + { + glsl << std::string(", bias(") << TranslateOperand(psSrcBias, TO_AUTO_BITCAST_TO_FLOAT) << std::string(")"); + } + + bool hadOffset = false; + + // Add offset param + if (psInst->bAddressOffset) + { + hadOffset = true; + if (ui32NumOffsets == 1) + { + bformata(glsl, ", %d", + psInst->iUAddrOffset); + } + else + if (ui32NumOffsets == 2) + { + bformata(glsl, ", int2(%d, %d)", + psInst->iUAddrOffset, + psInst->iVAddrOffset); + } + else + if (ui32NumOffsets == 3) + { + bformata(glsl, ", int3(%d, %d, %d)", + psInst->iUAddrOffset, + psInst->iVAddrOffset, + psInst->iWAddrOffset); + } + } + // HLSL gather has a variant with separate offset operand + else if (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) + { + hadOffset = true; + uint32_t mask = OPERAND_4_COMPONENT_MASK_X; + if (ui32NumOffsets > 1) + mask |= OPERAND_4_COMPONENT_MASK_Y; + if (ui32NumOffsets > 2) + mask |= OPERAND_4_COMPONENT_MASK_Z; + + bcatcstr(glsl, ","); + glsl << TranslateOperand(psSrcOff, TO_FLAG_INTEGER, mask); + } + + // Add texture gather component selection if needed + if ((ui32Flags & TEXSMP_FLAG_GATHER) && psSrcSamp->GetNumSwizzleElements() > 0) + { + ASSERT(psSrcSamp->GetNumSwizzleElements() == 1); + if (psSrcSamp->aui32Swizzle[0] != OPERAND_4_COMPONENT_X) + { + if (!(ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE)) + { + // Need to add offset param to match func overload + if (!hadOffset) + { + if (ui32NumOffsets == 1) + bcatcstr(glsl, ", 0"); + else + bformata(glsl, ", int%d(0)", ui32NumOffsets); + } + + bcatcstr(glsl, ", component::"); + glsl << TranslateOperandSwizzle(psSrcSamp, OPERAND_4_COMPONENT_MASK_ALL, 0, false); + } + else + { + psContext->m_Reflection.OnDiagnostics("Metal supports gather compare only for the first component.", 0, true); + } + } + } + + bcatcstr(glsl, ")"); + + if (!(ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) || (ui32Flags & TEXSMP_FLAG_GATHER)) + { + // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms + // does not make sense. But need to re-enable to correctly swizzle this particular instruction. + psSrcTex->iWriteMaskEnabled = 1; + glsl << TranslateOperandSwizzle(psSrcTex, psDest->GetAccessMask(), 0); + } + AddAssignPrologue(numParenthesis); +} + +static const char* swizzleString[] = { ".x", ".y", ".z", ".w" }; + +// Handle cases where vector components are accessed with dynamic index ([] notation). +// A bit ugly hack because compiled HLSL uses byte offsets to access data in structs => we are converting +// the offset back to vector component index in runtime => calculating stuff back and forth. +// TODO: Would be better to eliminate the offset calculation ops and use indexes straight on. Could be tricky though... +void ToMetal::TranslateDynamicComponentSelection(const ShaderVarType* psVarType, const Operand* psByteAddr, uint32_t offset, uint32_t mask) +{ + bstring glsl = *psContext->currentGLSLString; + ASSERT(psVarType->Class == SVC_VECTOR); + + bcatcstr(glsl, "["); // Access vector component with [] notation + if (offset > 0) + bcatcstr(glsl, "("); + + // The var containing byte address to the requested element + glsl << TranslateOperand(psByteAddr, TO_FLAG_UNSIGNED_INTEGER, mask); + + if (offset > 0)// If the vector is part of a struct, there is an extra offset in our byte address + bformata(glsl, " - %du)", offset); // Subtract that first + + bcatcstr(glsl, " >> 0x2u"); // Convert byte offset to index: div by four + bcatcstr(glsl, "]"); +} + +void ToMetal::TranslateShaderStorageStore(Instruction* psInst) +{ + bstring glsl = *psContext->currentGLSLString; + int component; + int srcComponent = 0; + + Operand* psDest = 0; + Operand* psDestAddr = 0; + Operand* psDestByteOff = 0; + Operand* psSrc = 0; + + + switch (psInst->eOpcode) + { + case OPCODE_STORE_STRUCTURED: + psDest = &psInst->asOperands[0]; + psDestAddr = &psInst->asOperands[1]; + psDestByteOff = &psInst->asOperands[2]; + psSrc = &psInst->asOperands[3]; + + break; + case OPCODE_STORE_RAW: + psDest = &psInst->asOperands[0]; + psDestByteOff = &psInst->asOperands[1]; + psSrc = &psInst->asOperands[2]; + break; + default: + ASSERT(0); + break; + } + + uint32_t dstOffFlag = TO_FLAG_UNSIGNED_INTEGER; + SHADER_VARIABLE_TYPE dstOffType = psDestByteOff->GetDataType(psContext); + if (dstOffType == SVT_INT || dstOffType == SVT_INT16 || dstOffType == SVT_INT12) + dstOffFlag = TO_FLAG_INTEGER; + + for (component = 0; component < 4; component++) + { + ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); + if (psInst->asOperands[0].ui32CompMask & (1 << component)) + { + psContext->AddIndentation(); + glsl << TranslateOperand(psDest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY); + + if (psDestAddr) + { + bcatcstr(glsl, "["); + glsl << TranslateOperand(psDestAddr, TO_FLAG_INTEGER | TO_FLAG_UNSIGNED_INTEGER); + bcatcstr(glsl, "].value"); + } + + bcatcstr(glsl, "[("); + glsl << TranslateOperand(psDestByteOff, dstOffFlag); + bcatcstr(glsl, " >> 2"); + if (dstOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + bcatcstr(glsl, ")"); + + if (component != 0) + { + bformata(glsl, " + %d", component); + if (dstOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + } + + bcatcstr(glsl, "]"); + + //Dest type is currently always a uint array. + bcatcstr(glsl, " = "); + if (psSrc->GetNumSwizzleElements() > 1) + glsl << TranslateOperand(psSrc, TO_FLAG_UNSIGNED_INTEGER, 1 << (srcComponent++)); + else + glsl << TranslateOperand(psSrc, TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); + + bformata(glsl, ";\n"); + } + } +} + +void ToMetal::TranslateShaderStorageLoad(Instruction* psInst) +{ + bstring glsl = *psContext->currentGLSLString; + int component; + Operand* psDest = 0; + Operand* psSrcAddr = 0; + Operand* psSrcByteOff = 0; + Operand* psSrc = 0; + + switch (psInst->eOpcode) + { + case OPCODE_LD_STRUCTURED: + psDest = &psInst->asOperands[0]; + psSrcAddr = &psInst->asOperands[1]; + psSrcByteOff = &psInst->asOperands[2]; + psSrc = &psInst->asOperands[3]; + break; + case OPCODE_LD_RAW: + psDest = &psInst->asOperands[0]; + psSrcByteOff = &psInst->asOperands[1]; + psSrc = &psInst->asOperands[2]; + break; + default: + ASSERT(0); + break; + } + + uint32_t destCount = psDest->GetNumSwizzleElements(); + uint32_t destMask = psDest->GetAccessMask(); + + int numParenthesis = 0; + int firstItemAdded = 0; + SHADER_VARIABLE_TYPE destDataType = psDest->GetDataType(psContext); + uint32_t srcOffFlag = TO_FLAG_UNSIGNED_INTEGER; + SHADER_VARIABLE_TYPE srcOffType = psSrcByteOff->GetDataType(psContext); + if (srcOffType == SVT_INT || srcOffType == SVT_INT16 || srcOffType == SVT_INT12) + srcOffFlag = TO_FLAG_INTEGER; + + psContext->AddIndentation(); + AddAssignToDest(psDest, destDataType, destCount, &numParenthesis); + if (destCount > 1) + { + bformata(glsl, "%s(", GetConstructorForTypeMetal(destDataType, destCount)); + numParenthesis++; + } + for (component = 0; component < 4; component++) + { + bool addedBitcast = false; + if (!(destMask & (1 << component))) + continue; + + if (firstItemAdded) + bcatcstr(glsl, ", "); + else + firstItemAdded = 1; + + // always uint array atm + if (destDataType == SVT_FLOAT) + { + // input already in uints, need bitcast + bcatcstr(glsl, "as_type("); + addedBitcast = true; + } + else if (destDataType == SVT_INT || destDataType == SVT_INT16 || destDataType == SVT_INT12) + { + bcatcstr(glsl, "int("); + addedBitcast = true; + } + + glsl << TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); + + if (psSrcAddr) + { + bcatcstr(glsl, "["); + glsl << TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_INTEGER); + bcatcstr(glsl, "].value"); + } + bcatcstr(glsl, "[("); + glsl << TranslateOperand(psSrcByteOff, srcOffFlag); + bcatcstr(glsl, " >> 2"); + if (srcOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + + bformata(glsl, ") + %d", psSrc->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE ? psSrc->aui32Swizzle[component] : component); + if (srcOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + + bcatcstr(glsl, "]"); + + if (addedBitcast) + bcatcstr(glsl, ")"); + } + AddAssignPrologue(numParenthesis); +} + +void ToMetal::TranslateAtomicMemOp(Instruction* psInst) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + uint32_t ui32DataTypeFlag = TO_FLAG_INTEGER; + const char* func = ""; + Operand* dest = 0; + Operand* previousValue = 0; + Operand* destAddr = 0; + Operand* src = 0; + Operand* compare = 0; + int texDim = 0; + bool isUint = true; + bool shouldAddFailMemoryOrder = false; + bool shouldExtractCompare = false; + + switch (psInst->eOpcode) + { + case OPCODE_IMM_ATOMIC_IADD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IADD\n"); +#endif + func = "atomic_fetch_add_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_IADD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IADD\n"); +#endif + func = "atomic_fetch_add_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_AND: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_AND\n"); +#endif + func = "atomic_fetch_and_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_AND: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_AND\n"); +#endif + func = "atomic_fetch_and_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_OR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_OR\n"); +#endif + func = "atomic_fetch_or_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_OR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_OR\n"); +#endif + func = "atomic_fetch_or_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_XOR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_XOR\n"); +#endif + func = "atomic_fetch_xor_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_XOR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_XOR\n"); +#endif + func = "atomic_fetch_xor_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + + case OPCODE_IMM_ATOMIC_EXCH: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_EXCH\n"); +#endif + func = "atomic_exchange_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_IMM_ATOMIC_CMP_EXCH: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_CMP_EXC\n"); +#endif + func = "atomic_compare_exchange_weak_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + compare = &psInst->asOperands[3]; + src = &psInst->asOperands[4]; + shouldAddFailMemoryOrder = true; + shouldExtractCompare = true; + break; + } + case OPCODE_ATOMIC_CMP_STORE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_CMP_STORE\n"); +#endif + func = "atomic_compare_exchange_weak_explicit"; + previousValue = 0; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + compare = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + shouldAddFailMemoryOrder = true; + shouldExtractCompare = true; + break; + } + case OPCODE_IMM_ATOMIC_UMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_UMIN\n"); +#endif + func = "atomic_fetch_min_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_UMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_UMIN\n"); +#endif + func = "atomic_fetch_min_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; +} + case OPCODE_IMM_ATOMIC_IMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IMIN\n"); +#endif + func = "atomic_fetch_min_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_IMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IMIN\n"); +#endif + func = "atomic_fetch_min_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_UMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_UMAX\n"); +#endif + func = "atomic_fetch_max_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_UMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_UMAX\n"); +#endif + func = "atomic_fetch_max_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_IMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IMAX\n"); +#endif + func = "atomic_fetch_max_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_IMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IMAX\n"); +#endif + func = "atomic_fetch_max_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + default: + ASSERT(0); + break; + } + + psContext->AddIndentation(); + + if (dest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + { + const ResourceBinding* psBinding = 0; + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, dest->ui32RegisterNumber, &psBinding); + + if (psBinding->eType == RTYPE_UAV_RWTYPED) + { + isUint = (psBinding->ui32ReturnType == RETURN_TYPE_UINT); + + // Find out if it's texture and of what dimension + switch (psBinding->eDimension) + { + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: + texDim = 1; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: + texDim = 2; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + texDim = 3; + break; + default: + ASSERT(0); + break; + } + } + } + + if (texDim > 0) + { + psContext->m_Reflection.OnDiagnostics("Texture atomics are not supported in Metal", 0, true); + return; + } + + if (isUint) + ui32DataTypeFlag = TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT; + else + ui32DataTypeFlag = TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT; + + if (shouldExtractCompare) + { + bcatcstr(glsl, "{\n"); + psContext->AddIndentation(); + psContext->AddIndentation(); + bcatcstr(glsl, "uint compare_value = "); + glsl << TranslateOperand(compare, ui32DataTypeFlag); + bcatcstr(glsl, "; "); + } + + if (previousValue) + AddAssignToDest(previousValue, isUint ? SVT_UINT : SVT_INT, 1, &numParenthesis); + + bcatcstr(glsl, func); + bcatcstr(glsl, "("); + + uint32_t destAddrFlag = TO_FLAG_UNSIGNED_INTEGER; + SHADER_VARIABLE_TYPE destAddrType = destAddr->GetDataType(psContext); + if (destAddrType == SVT_INT || destAddrType == SVT_INT16 || destAddrType == SVT_INT12) + destAddrFlag = TO_FLAG_INTEGER; + + if(dest->eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW) + bcatcstr(glsl, "reinterpret_cast(&"); + else + bcatcstr(glsl, "reinterpret_cast(&"); + glsl << TranslateOperand(dest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY); + bcatcstr(glsl, "["); + glsl << TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_X); + + // Structured buf if we have both x & y swizzles. Raw buf has only x -> no .value[] + if (destAddr->GetNumSwizzleElements(OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y) == 2) + { + bcatcstr(glsl, "]"); + bcatcstr(glsl, ".value["); + glsl << TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_Y); + } + + bcatcstr(glsl, " >> 2");//bytes to floats + if (destAddrFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + + bcatcstr(glsl, "]), "); + + if (compare) + { + if (shouldExtractCompare) + { + bcatcstr(glsl, "&compare_value, "); + } + else + { + glsl << TranslateOperand(compare, ui32DataTypeFlag); + bcatcstr(glsl, ", "); + } + } + + glsl << TranslateOperand(src, ui32DataTypeFlag); + bcatcstr(glsl, ", memory_order::memory_order_relaxed"); + if (shouldAddFailMemoryOrder) + bcatcstr(glsl, ", memory_order::memory_order_relaxed"); + bcatcstr(glsl, ")"); + if (previousValue) + { + AddAssignPrologue(numParenthesis); + } + else + bcatcstr(glsl, ";\n"); + + if (shouldExtractCompare) + { + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + } +} + +void ToMetal::TranslateConditional( + Instruction* psInst, + bstring glsl) +{ + const char* statement = ""; + if (psInst->eOpcode == OPCODE_BREAKC) + { + statement = "break"; + } + else if (psInst->eOpcode == OPCODE_CONTINUEC) + { + statement = "continue"; + } + else if (psInst->eOpcode == OPCODE_RETC) // FIXME! Need to spew out shader epilogue + { + if (psContext->psShader->eShaderType == COMPUTE_SHADER) + statement = "return"; + else + statement = "return output"; + } + + + int isBool = psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL; + + if (isBool) + { + bcatcstr(glsl, "if("); + if (psInst->eBooleanTestType != INSTRUCTION_TEST_NONZERO) + bcatcstr(glsl, "!"); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_BOOL); + if (psInst->eOpcode != OPCODE_IF) + { + bformata(glsl, "){%s;}\n", statement); + } + else + { + bcatcstr(glsl, "){\n"); + } + } + else + { + if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) + { + bcatcstr(glsl, "if(("); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER); + + if (psInst->eOpcode != OPCODE_IF) + { + bformata(glsl, ")==uint(0u)){%s;}\n", statement); + } + else + { + bcatcstr(glsl, ")==uint(0u)){\n"); + } + } + else + { + ASSERT(psInst->eBooleanTestType == INSTRUCTION_TEST_NONZERO); + bcatcstr(glsl, "if(("); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER); + + if (psInst->eOpcode != OPCODE_IF) + { + bformata(glsl, ")!=uint(0u)){%s;}\n", statement); + } + else + { + bcatcstr(glsl, ")!=uint(0u)){\n"); + } + } + } +} + +void ToMetal::TranslateInstruction(Instruction* psInst) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + +#ifdef _DEBUG + psContext->AddIndentation(); + bformata(glsl, "//Instruction %d\n", psInst->id); +#if 0 + if(psInst->id == 73) + { + ASSERT(1); //Set breakpoint here to debug an instruction from its ID. + } +#endif +#endif + + switch (psInst->eOpcode) + { + case OPCODE_FTOI: + case OPCODE_FTOU: + { + uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); + SHADER_VARIABLE_TYPE castType = psInst->eOpcode == OPCODE_FTOU ? SVT_UINT : SVT_INT; +#ifdef _DEBUG + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_FTOU) + bcatcstr(glsl, "//FTOU\n"); + else + bcatcstr(glsl, "//FTOI\n"); +#endif + switch (psInst->asOperands[0].eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + case OPERAND_MIN_PRECISION_SINT_16: + castType = SVT_INT16; + ASSERT(psInst->eOpcode == OPCODE_FTOI); + break; + case OPERAND_MIN_PRECISION_UINT_16: + castType = SVT_UINT16; + ASSERT(psInst->eOpcode == OPCODE_FTOU); + break; + default: + ASSERT(0); // We'd be doing bitcasts into low/mediump ints, not good. + } + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[0], castType, srcCount, &numParenthesis); + bcatcstr(glsl, GetConstructorForTypeMetal(castType, dstCount)); + bcatcstr(glsl, "("); // 1 + glsl << TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT, psInst->asOperands[0].GetAccessMask()); + bcatcstr(glsl, ")"); // 1 + AddAssignPrologue(numParenthesis); + break; + } + + case OPCODE_MOV: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MOV\n"); +#endif + psContext->AddIndentation(); + AddMOVBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1]); + break; + } + case OPCODE_ITOF://signed to float + case OPCODE_UTOF://unsigned to float + { + SHADER_VARIABLE_TYPE castType = SVT_FLOAT; + uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); + +#ifdef _DEBUG + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_ITOF) + { + bcatcstr(glsl, "//ITOF\n"); + } + else + { + bcatcstr(glsl, "//UTOF\n"); + } +#endif + + switch (psInst->asOperands[0].eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + case OPERAND_MIN_PRECISION_FLOAT_2_8: + castType = SVT_FLOAT10; + break; + case OPERAND_MIN_PRECISION_FLOAT_16: + castType = SVT_FLOAT16; + break; + default: + ASSERT(0); // We'd be doing bitcasts into low/mediump ints, not good. + } + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], castType, srcCount, &numParenthesis); + bcatcstr(glsl, GetConstructorForTypeMetal(castType, dstCount)); + bcatcstr(glsl, "("); // 1 + glsl << TranslateOperand(&psInst->asOperands[1], psInst->eOpcode == OPCODE_UTOF ? TO_AUTO_BITCAST_TO_UINT : TO_AUTO_BITCAST_TO_INT, psInst->asOperands[0].GetAccessMask()); + bcatcstr(glsl, ")"); // 1 + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_MAD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MAD\n"); +#endif + CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, TO_FLAG_NONE); + break; + } + case OPCODE_IMAD: + { + uint32_t ui32Flags = TO_FLAG_INTEGER; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMAD\n"); +#endif + + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + ui32Flags = TO_FLAG_UNSIGNED_INTEGER; + } + + CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, ui32Flags); + break; + } + case OPCODE_DADD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DADD\n"); +#endif + CallBinaryOp("+", psInst, 0, 1, 2, SVT_DOUBLE); + break; + } + case OPCODE_IADD: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IADD\n"); +#endif + //Is this a signed or unsigned add? + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + CallBinaryOp("+", psInst, 0, 1, 2, eType); + break; + } + case OPCODE_ADD: + { + +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ADD\n"); +#endif + CallBinaryOp("+", psInst, 0, 1, 2, SVT_FLOAT); + break; + } + case OPCODE_OR: + { + /*Todo: vector version */ +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//OR\n"); +#endif + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) + { + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + + int needsParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), &needsParenthesis); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, " || "); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); + AddAssignPrologue(needsParenthesis); + } + else + CallBinaryOp("|", psInst, 0, 1, 2, SVT_UINT); + break; + } + case OPCODE_AND: + { + SHADER_VARIABLE_TYPE eA = psInst->asOperands[1].GetDataType(psContext); + SHADER_VARIABLE_TYPE eB = psInst->asOperands[2].GetDataType(psContext); +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//AND\n"); +#endif + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + uint32_t dstSwizCount = psInst->asOperands[0].GetNumSwizzleElements(); + SHADER_VARIABLE_TYPE eDataType = psInst->asOperands[0].GetDataType(psContext); + uint32_t ui32Flags = SVTTypeToFlag(eDataType); + if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) + { + int needsParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), &needsParenthesis); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, " && "); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); + AddAssignPrologue(needsParenthesis); + } + else if ((eA == SVT_BOOL || eB == SVT_BOOL) && !(eA == SVT_BOOL && eB == SVT_BOOL)) + { + int boolOp = eA == SVT_BOOL ? 1 : 2; + int otherOp = eA == SVT_BOOL ? 2 : 1; + int needsParenthesis = 0; + uint32_t i; + psContext->AddIndentation(); + + if (dstSwizCount == 1) + { + AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, &needsParenthesis); + glsl << TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, " ? "); + glsl << TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); + bcatcstr(glsl, " : "); + + bcatcstr(glsl, GetConstructorForTypeMetal(eDataType, dstSwizCount)); + bcatcstr(glsl, "("); + for (i = 0; i < dstSwizCount; i++) + { + if (i > 0) + bcatcstr(glsl, ", "); + switch (eDataType) + { + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + case SVT_DOUBLE: + bcatcstr(glsl, "0.0"); + break; + default: + bcatcstr(glsl, "0"); + + } + } + bcatcstr(glsl, ")"); + } + else if (eDataType == SVT_FLOAT) + { + // We can use select() + AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, &needsParenthesis); + bcatcstr(glsl, "select("); + bcatcstr(glsl, GetConstructorForTypeMetal(eDataType, dstSwizCount)); + bcatcstr(glsl, "("); + for (i = 0; i < dstSwizCount; i++) + { + if (i > 0) + bcatcstr(glsl, ", "); + switch (eDataType) + { + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + case SVT_DOUBLE: + bcatcstr(glsl, "0.0"); + break; + default: + bcatcstr(glsl, "0"); + + } + } + bcatcstr(glsl, "), "); + glsl << TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); + bcatcstr(glsl, ", "); + bcatcstr(glsl, GetConstructorForTypeMetal(SVT_BOOL, dstSwizCount)); + bcatcstr(glsl, "("); + glsl << TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, ")"); + bcatcstr(glsl, ")"); + } + else + { + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, dstSwizCount, &needsParenthesis); + bcatcstr(glsl, "("); + bcatcstr(glsl, GetConstructorForTypeMetal(SVT_UINT, dstSwizCount)); + bcatcstr(glsl, "("); + glsl << TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, ") * 0xffffffffu) & "); + glsl << TranslateOperand(&psInst->asOperands[otherOp], TO_FLAG_UNSIGNED_INTEGER, destMask); + } + + AddAssignPrologue(needsParenthesis); + } + else + { + CallBinaryOp("&", psInst, 0, 1, 2, SVT_UINT); + } + + + break; + } + case OPCODE_GE: + { + /* + dest = vec4(greaterThanEqual(vec4(srcA), vec4(srcB)); + Caveat: The result is a boolean but HLSL asm returns 0xFFFFFFFF/0x0 instead. + */ +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//GE\n"); +#endif + AddComparison(psInst, CMP_GE, TO_FLAG_NONE); + break; + } + case OPCODE_MUL: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MUL\n"); +#endif + CallBinaryOp("*", psInst, 0, 1, 2, SVT_FLOAT); + break; + } + case OPCODE_IMUL: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMUL\n"); +#endif + if (psInst->asOperands[1].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_NULL); + + CallBinaryOp("*", psInst, 1, 2, 3, eType); + break; + } + case OPCODE_UDIV: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//UDIV\n"); +#endif + //destQuotient, destRemainder, src0, src1 + CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); + CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); + break; + } + case OPCODE_DIV: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DIV\n"); +#endif + CallBinaryOp("/", psInst, 0, 1, 2, SVT_FLOAT); + break; + } + case OPCODE_SINCOS: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SINCOS\n"); +#endif + // Need careful ordering if src == dest[0], as then the cos() will be reading from wrong value + if (psInst->asOperands[0].eType == psInst->asOperands[2].eType && + psInst->asOperands[0].ui32RegisterNumber == psInst->asOperands[2].ui32RegisterNumber) + { + // sin() result overwrites source, do cos() first. + // The case where both write the src shouldn't really happen anyway. + if (psInst->asOperands[1].eType != OPERAND_TYPE_NULL) + { + CallHelper1("cos", psInst, 1, 2, 1); + } + + if (psInst->asOperands[0].eType != OPERAND_TYPE_NULL) + { + CallHelper1( + "sin", psInst, 0, 2, 1); + } + } + else + { + if (psInst->asOperands[0].eType != OPERAND_TYPE_NULL) + { + CallHelper1("sin", psInst, 0, 2, 1); + } + + if (psInst->asOperands[1].eType != OPERAND_TYPE_NULL) + { + CallHelper1("cos", psInst, 1, 2, 1); + } + } + break; + } + + case OPCODE_DP2: + { + int numParenthesis = 0; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DP2\n"); +#endif + psContext->AddIndentation(); + SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); + uint32_t typeFlags = TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2; + if (psInst->asOperands[1].GetDataType(psContext) == SVT_FLOAT16 + && psInst->asOperands[2].GetDataType(psContext) == SVT_FLOAT16) + typeFlags = TO_FLAG_FORCE_HALF | TO_AUTO_EXPAND_TO_VEC2; + + if (dstType != SVT_FLOAT16) + dstType = SVT_FLOAT; + + AddAssignToDest(&psInst->asOperands[0], dstType, 1, &numParenthesis); + bcatcstr(glsl, "dot("); + glsl << TranslateOperand(&psInst->asOperands[1], typeFlags, 3 /* .xy */); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], typeFlags, 3 /* .xy */); + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_DP3: + { + int numParenthesis = 0; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DP3\n"); +#endif + psContext->AddIndentation(); + SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); + uint32_t typeFlags = TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3; + if (psInst->asOperands[1].GetDataType(psContext) == SVT_FLOAT16 + && psInst->asOperands[2].GetDataType(psContext) == SVT_FLOAT16) + typeFlags = TO_FLAG_FORCE_HALF | TO_AUTO_EXPAND_TO_VEC3; + + if (dstType != SVT_FLOAT16) + dstType = SVT_FLOAT; + + AddAssignToDest(&psInst->asOperands[0], dstType, 1, &numParenthesis); + bcatcstr(glsl, "dot("); + glsl << TranslateOperand(&psInst->asOperands[1], typeFlags, 7 /* .xyz */); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], typeFlags, 7 /* .xyz */); + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_DP4: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DP4\n"); +#endif + CallHelper2("dot", psInst, 0, 1, 2, 0); + break; + } + case OPCODE_INE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//INE\n"); +#endif + AddComparison(psInst, CMP_NE, TO_FLAG_INTEGER); + break; + } + case OPCODE_NE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//NE\n"); +#endif + AddComparison(psInst, CMP_NE, TO_FLAG_NONE); + break; + } + case OPCODE_IGE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IGE\n"); +#endif + AddComparison(psInst, CMP_GE, TO_FLAG_INTEGER); + break; + } + case OPCODE_ILT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ILT\n"); +#endif + AddComparison(psInst, CMP_LT, TO_FLAG_INTEGER); + break; + } + case OPCODE_LT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LT\n"); +#endif + AddComparison(psInst, CMP_LT, TO_FLAG_NONE); + break; + } + case OPCODE_IEQ: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IEQ\n"); +#endif + AddComparison(psInst, CMP_EQ, TO_FLAG_INTEGER); + break; + } + case OPCODE_ULT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ULT\n"); +#endif + AddComparison(psInst, CMP_LT, TO_FLAG_UNSIGNED_INTEGER); + break; + } + case OPCODE_UGE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//UGE\n"); +#endif + AddComparison(psInst, CMP_GE, TO_FLAG_UNSIGNED_INTEGER); + break; + } + case OPCODE_MOVC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MOVC\n"); +#endif + AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3]); + break; + } + case OPCODE_SWAPC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SWAPC\n"); +#endif + // TODO needs temps!! + ASSERT(0); + AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[2], &psInst->asOperands[4], &psInst->asOperands[3]); + AddMOVCBinaryOp(&psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3], &psInst->asOperands[4]); + break; + } + + case OPCODE_LOG: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LOG\n"); +#endif + CallHelper1("log2", psInst, 0, 1, 1); + break; + } + case OPCODE_RSQ: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//RSQ\n"); +#endif + CallHelper1("rsqrt", psInst, 0, 1, 1); + break; + } + case OPCODE_EXP: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EXP\n"); +#endif + CallHelper1("exp2", psInst, 0, 1, 1); + break; + } + case OPCODE_SQRT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SQRT\n"); +#endif + CallHelper1("sqrt", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_PI: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_PI\n"); +#endif + CallHelper1("ceil", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_NI: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_NI\n"); +#endif + CallHelper1("floor", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_Z: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_Z\n"); +#endif + CallHelper1("trunc", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_NE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_NE\n"); +#endif + CallHelper1("rint", psInst, 0, 1, 1); + break; + } + case OPCODE_FRC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//FRC\n"); +#endif + CallHelper1("fract", psInst, 0, 1, 1); + break; + } + case OPCODE_IMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMAX\n"); +#endif + CallHelper2Int("max", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_UMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//UMAX\n"); +#endif + CallHelper2UInt("max", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_MAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MAX\n"); +#endif + CallHelper2("max", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_IMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMIN\n"); +#endif + CallHelper2Int("min", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_UMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//UMIN\n"); +#endif + CallHelper2UInt("min", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_MIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MIN\n"); +#endif + CallHelper2("min", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_GATHER4: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER); + break; + } + case OPCODE_GATHER4_PO_C: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_PO_C\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET | TEXSMP_FLAG_DEPTHCOMPARE); + break; + } + case OPCODE_GATHER4_PO: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_PO\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET); + break; + } + case OPCODE_GATHER4_C: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_C\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_DEPTHCOMPARE); + break; + } + case OPCODE_SAMPLE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_NONE); + break; + } + case OPCODE_SAMPLE_L: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_L\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_LOD); + break; + } + case OPCODE_SAMPLE_C: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_C\n"); +#endif + + TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE); + break; + } + case OPCODE_SAMPLE_C_LZ: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_C_LZ\n"); +#endif + + TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE | TEXSMP_FLAG_FIRSTLOD); + break; + } + case OPCODE_SAMPLE_D: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_D\n"); +#endif + + TranslateTextureSample(psInst, TEXSMP_FLAG_GRAD); + break; + } + case OPCODE_SAMPLE_B: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_B\n"); +#endif + + TranslateTextureSample(psInst, TEXSMP_FLAG_BIAS); + break; + } + case OPCODE_RET: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//RET\n"); +#endif + if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Post shader code ---\n"); +#endif + bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End post shader code ---\n"); +#endif + } + psContext->AddIndentation(); + if(psContext->psShader->eShaderType == COMPUTE_SHADER) + bcatcstr(glsl, "return;\n"); + else + bcatcstr(glsl, "return output;\n"); + + break; + } + case OPCODE_INTERFACE_CALL: + { + ASSERT(0); + } + case OPCODE_LABEL: + { + ASSERT(0); // Never seen this + } + case OPCODE_COUNTBITS: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//COUNTBITS\n"); +#endif + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = popCount("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_FIRSTBIT_HI: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_HI\n"); +#endif + DeclareExtraFunction("firstBit_hi", "template UVecType firstBit_hi(const UVecType input) { UVecType res = clz(input); return res; };"); + // TODO implement the 0-case (must return 0xffffffff) + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = firstBit_hi("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_FIRSTBIT_LO: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_LO\n"); +#endif + // TODO implement the 0-case (must return 0xffffffff) + DeclareExtraFunction("firstBit_lo", "template UVecType firstBit_lo(const UVecType input) { UVecType res = ctz(input); return res; };"); + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = firstBit_lo("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_FIRSTBIT_SHI: //signed high + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_SHI\n"); +#endif + // TODO Not at all correct for negative values yet. + DeclareExtraFunction("firstBit_shi", "template IVecType firstBit_shi(const IVecType input) { IVecType res = clz(input); return res; };"); + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = firstBit_shi("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_BFREV: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//BFREV\n"); +#endif + DeclareExtraFunction("bitReverse", "template UVecType bitReverse(const UVecType input)\n\ + { UVecType x = input;\n\ + x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1));\n\ + x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2));\n\ + x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4));\n\ + x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8));\n\ + return((x >> 16) | (x << 16));\n\ + }; "); + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = bitReverse("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_BFI: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//BFI\n"); +#endif + DeclareExtraFunction("BFI", "\ + template UVecType bitFieldInsert(const UVecType width, const UVecType offset, const UVecType src2, const UVecType src3)\n\ + {\n\ + UVecType bitmask = (((1 << width)-1) << offset) & 0xffffffff;\n\ + return ((src2 << offset) & bitmask) | (src3 & ~bitmask);\n\ + }; "); + psContext->AddIndentation(); + + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, psInst->asOperands[0].GetNumSwizzleElements(), &numParenthesis); + bcatcstr(glsl, "bitFieldInsert("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_UNSIGNED_INTEGER, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[3], TO_FLAG_UNSIGNED_INTEGER, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[4], TO_FLAG_UNSIGNED_INTEGER, destMask); + bcatcstr(glsl, ")"); + + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_CUT: + case OPCODE_EMITTHENCUT_STREAM: + case OPCODE_EMIT: + case OPCODE_EMITTHENCUT: + case OPCODE_CUT_STREAM: + case OPCODE_EMIT_STREAM: + { + ASSERT(0); // Not on metal + } + case OPCODE_REP: + case OPCODE_ENDREP: + { + ASSERT(0); // Shouldn't see these anymore + } + case OPCODE_LOOP: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LOOP\n"); +#endif + psContext->AddIndentation(); + + bcatcstr(glsl, "while(true){\n"); + ++psContext->indent; + break; + } + case OPCODE_ENDLOOP: + { + --psContext->indent; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDLOOP\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + break; + } + case OPCODE_BREAK: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//BREAK\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "break;\n"); + break; + } + case OPCODE_BREAKC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//BREAKC\n"); +#endif + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + break; + } + case OPCODE_CONTINUEC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//CONTINUEC\n"); +#endif + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + break; + } + case OPCODE_IF: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IF\n"); +#endif + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + ++psContext->indent; + break; + } + case OPCODE_RETC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//RETC\n"); +#endif + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + break; + } + case OPCODE_ELSE: + { + --psContext->indent; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ELSE\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "} else {\n"); + psContext->indent++; + break; + } + case OPCODE_ENDSWITCH: + case OPCODE_ENDIF: + { + --psContext->indent; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDIF\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + break; + } + case OPCODE_CONTINUE: + { + psContext->AddIndentation(); + bcatcstr(glsl, "continue;\n"); + break; + } + case OPCODE_DEFAULT: + { + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "default:\n"); + ++psContext->indent; + break; + } + case OPCODE_NOP: + { + break; + } + case OPCODE_SYNC: + { + const uint32_t ui32SyncFlags = psInst->ui32SyncFlags; + +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SYNC\n"); +#endif + const char *barrierFlags = "mem_none"; + if (ui32SyncFlags & SYNC_THREAD_GROUP_SHARED_MEMORY) + { + barrierFlags = "mem_threadgroup"; + } + if (ui32SyncFlags & (SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP | SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL)) + { + barrierFlags = "mem_device"; + if (ui32SyncFlags & SYNC_THREAD_GROUP_SHARED_MEMORY) + { + barrierFlags = "mem_device_and_threadgroup"; + } + } + psContext->AddIndentation(); + bformata(glsl, "threadgroup_barrier(mem_flags::%s);\n", barrierFlags); + + break; + } + case OPCODE_SWITCH: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SWITCH\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "switch(int("); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ")){\n"); + + psContext->indent += 2; + break; + } + case OPCODE_CASE: + { + --psContext->indent; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//case\n"); +#endif + psContext->AddIndentation(); + + bcatcstr(glsl, "case "); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ":\n"); + + ++psContext->indent; + break; + } + case OPCODE_EQ: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EQ\n"); +#endif + AddComparison(psInst, CMP_EQ, TO_FLAG_NONE); + break; + } + case OPCODE_USHR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//USHR\n"); +#endif + CallBinaryOp(">>", psInst, 0, 1, 2, SVT_UINT); + break; + } + case OPCODE_ISHL: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; + +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ISHL\n"); +#endif + + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + + CallBinaryOp("<<", psInst, 0, 1, 2, eType); + break; + } + case OPCODE_ISHR: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ISHR\n"); +#endif + + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + + CallBinaryOp(">>", psInst, 0, 1, 2, eType); + break; + } + case OPCODE_LD: + case OPCODE_LD_MS: + { + const ResourceBinding* psBinding = 0; +#ifdef _DEBUG + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_LD) + bcatcstr(glsl, "//LD\n"); + else + bcatcstr(glsl, "//LD_MS\n"); +#endif + + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, &psBinding); + + if (psInst->bAddressOffset) + { + TranslateTexelFetchOffset(psInst, psBinding, glsl); + } + else + { + TranslateTexelFetch(psInst, psBinding, glsl); + } + break; + } + case OPCODE_DISCARD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DISCARD\n"); +#endif + psContext->AddIndentation(); + if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) + { + bcatcstr(glsl, "if(("); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ")==0){discard_fragment();}\n"); + } + else + { + ASSERT(psInst->eBooleanTestType == INSTRUCTION_TEST_NONZERO); + bcatcstr(glsl, "if(("); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ")!=0){discard_fragment();}\n"); + } + break; + } + case OPCODE_LOD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LOD\n"); +#endif + //LOD computes the following vector (ClampedLOD, NonClampedLOD, 0, 0) + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 4, &numParenthesis); + + //If the core language does not have query-lod feature, + //then the extension is used. The name of the function + //changed between extension and core. + if (HaveQueryLod(psContext->psShader->eTargetLanguage)) + { + bcatcstr(glsl, "textureQueryLod("); + } + else + { + bcatcstr(glsl, "textureQueryLOD("); + } + + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ","); + TranslateTexCoord( + psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber], + &psInst->asOperands[1]); + bcatcstr(glsl, ")"); + + //The swizzle on srcResource allows the returned values to be swizzled arbitrarily before they are written to the destination. + + // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms + // does not make sense. But need to re-enable to correctly swizzle this particular instruction. + psInst->asOperands[2].iWriteMaskEnabled = 1; + glsl << TranslateOperandSwizzle(&psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_EVAL_CENTROID: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_CENTROID\n"); +#endif + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = interpolateAtCentroid("); + //interpolateAtCentroid accepts in-qualified variables. + //As long as bytecode only writes vX registers in declarations + //we should be able to use the declared name directly. + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_EVAL_SAMPLE_INDEX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_SAMPLE_INDEX\n"); +#endif + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = interpolateAtSample("); + //interpolateAtSample accepts in-qualified variables. + //As long as bytecode only writes vX registers in declarations + //we should be able to use the declared name directly. + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_EVAL_SNAPPED: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_SNAPPED\n"); +#endif + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = interpolateAtOffset("); + //interpolateAtOffset accepts in-qualified variables. + //As long as bytecode only writes vX registers in declarations + //we should be able to use the declared name directly. + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_INTEGER); + bcatcstr(glsl, ".xy);\n"); + break; + } + case OPCODE_LD_STRUCTURED: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_STRUCTURED\n"); +#endif + TranslateShaderStorageLoad(psInst); + break; + } + case OPCODE_LD_UAV_TYPED: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_UAV_TYPED\n"); +#endif + Operand* psDest = &psInst->asOperands[0]; + Operand* psSrc = &psInst->asOperands[2]; + Operand* psSrcAddr = &psInst->asOperands[1]; + + int srcCount = psSrc->GetNumSwizzleElements(); + int numParenthesis = 0; + uint32_t compMask = 0; + + switch (psInst->eResDim) + { + case RESOURCE_DIMENSION_TEXTURE3D: + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + compMask |= (1 << 2); + case RESOURCE_DIMENSION_TEXTURECUBE: + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + case RESOURCE_DIMENSION_TEXTURE2D: + case RESOURCE_DIMENSION_TEXTURE2DMS: + compMask |= (1 << 1); + case RESOURCE_DIMENSION_TEXTURE1D: + compMask |= 1; + break; + default: + ASSERT(0); + break; + } + + SHADER_VARIABLE_TYPE srcDataType; + const ResourceBinding* psBinding = 0; + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psSrc->ui32RegisterNumber, &psBinding); + switch (psBinding->ui32ReturnType) + { + case RETURN_TYPE_FLOAT: + srcDataType = SVT_FLOAT; + break; + case RETURN_TYPE_SINT: + srcDataType = SVT_INT; + break; + case RETURN_TYPE_UINT: + srcDataType = SVT_UINT; + break; + default: + ASSERT(0); + break; + } + + psContext->AddIndentation(); + AddAssignToDest(psDest, srcDataType, srcCount, &numParenthesis); + glsl << TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); + bcatcstr(glsl, ".read("); + glsl << TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER, compMask); + bcatcstr(glsl, ")"); + glsl << TranslateOperandSwizzle(&psInst->asOperands[0], OPERAND_4_COMPONENT_MASK_ALL, 0); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_STORE_RAW: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_RAW\n"); +#endif + TranslateShaderStorageStore(psInst); + break; + } + case OPCODE_STORE_STRUCTURED: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_STRUCTURED\n"); +#endif + TranslateShaderStorageStore(psInst); + break; + } + + case OPCODE_STORE_UAV_TYPED: + { + const ResourceBinding* psRes; + int foundResource; + uint32_t flags = TO_FLAG_UNSIGNED_INTEGER; + uint32_t opMask = OPERAND_4_COMPONENT_MASK_ALL; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_UAV_TYPED\n"); +#endif + psContext->AddIndentation(); + + foundResource = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, + psInst->asOperands[0].ui32RegisterNumber, + &psRes); + + ASSERT(foundResource); + + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_NAME_ONLY); + bcatcstr(glsl, ".write("); + + switch (psRes->eDimension) + { + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: + opMask = OPERAND_4_COMPONENT_MASK_X; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; + flags |= TO_AUTO_EXPAND_TO_VEC2; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; + flags |= TO_AUTO_EXPAND_TO_VEC3; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + flags |= TO_AUTO_EXPAND_TO_VEC4; + break; + default: + ASSERT(0); + break; + }; + + glsl << TranslateOperand(&psInst->asOperands[2], ResourceReturnTypeToFlag(psRes->ui32ReturnType)); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], flags, opMask); + bformata(glsl, ");\n"); + + break; + } + case OPCODE_LD_RAW: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_RAW\n"); +#endif + + TranslateShaderStorageLoad(psInst); + break; + } + + case OPCODE_ATOMIC_CMP_STORE: + case OPCODE_IMM_ATOMIC_AND: + case OPCODE_ATOMIC_AND: + case OPCODE_IMM_ATOMIC_IADD: + case OPCODE_ATOMIC_IADD: + case OPCODE_ATOMIC_OR: + case OPCODE_ATOMIC_XOR: + case OPCODE_ATOMIC_IMAX: + case OPCODE_ATOMIC_IMIN: + case OPCODE_ATOMIC_UMAX: + case OPCODE_ATOMIC_UMIN: + case OPCODE_IMM_ATOMIC_IMAX: + case OPCODE_IMM_ATOMIC_IMIN: + case OPCODE_IMM_ATOMIC_UMAX: + case OPCODE_IMM_ATOMIC_UMIN: + case OPCODE_IMM_ATOMIC_OR: + case OPCODE_IMM_ATOMIC_XOR: + case OPCODE_IMM_ATOMIC_EXCH: + case OPCODE_IMM_ATOMIC_CMP_EXCH: + { + TranslateAtomicMemOp(psInst); + break; + } + case OPCODE_UBFE: + case OPCODE_IBFE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_UBFE) + bcatcstr(glsl, "//OPCODE_UBFE\n"); + else + bcatcstr(glsl, "//OPCODE_IBFE\n"); +#endif + + bool isUBFE = psInst->eOpcode == OPCODE_UBFE; + bool isScalar = psInst->asOperands[0].GetNumSwizzleElements() == 1; + + if (isUBFE) + { + if (isScalar) + { + DeclareExtraFunction("UBFE", "\ +uint bitFieldExtractU(uint width, uint offset, uint src);\n\ +uint bitFieldExtractU(uint width, uint offset, uint src)\n\ +{\n\ + bool isWidthZero = (width == 0);\n\ + bool needsClamp = ((width + offset) < 32);\n\ + uint clampVersion = src << (32-(width+offset));\n\ + clampVersion = clampVersion >> (32 - width);\n\ + uint simpleVersion = src >> offset;\n\ + uint res = select(simpleVersion, clampVersion, needsClamp);\n\ + return select(res, (uint)0, isWidthZero);\n\ +}; "); + } + else + { + DeclareExtraFunction("UBFEV", "\ +template vec bitFieldExtractU(const vec width, const vec offset, const vec src)\n\ +{\n\ + vec isWidthZero = (width == 0);\n\ + vec needsClamp = ((width + offset) < 32);\n\ + vec clampVersion = src << (32-(width+offset));\n\ + clampVersion = clampVersion >> (32 - width);\n\ + vec simpleVersion = src >> offset;\n\ + vec res = select(simpleVersion, clampVersion, needsClamp);\n\ + return select(res, vec(0), isWidthZero);\n\ +}; "); + } + } + else + { + if (isScalar) + { + DeclareExtraFunction("IBFE", "\ +template int bitFieldExtractI(uint width, uint offset, int src)\n\ +{\n\ + bool isWidthZero = (width == 0);\n\ + bool needsClamp = ((width + offset) < 32);\n\ + int clampVersion = src << (32-(width+offset));\n\ + clampVersion = clampVersion >> (32 - width);\n\ + int simpleVersion = src >> offset;\n\ + int res = select(simpleVersion, clampVersion, needsClamp);\n\ + return select(res, (int)0, isWidthZero);\n\ +}; "); + } + else + { + DeclareExtraFunction("IBFEV", "\ +template vec bitFieldExtractI(const vec width, const vec offset, const vec src)\n\ +{\n\ + vec isWidthZero = (width == 0);\n\ + vec needsClamp = ((width + offset) < 32);\n\ + vec clampVersion = src << (32-(width+offset));\n\ + clampVersion = clampVersion >> (32 - width);\n\ + vec simpleVersion = src >> offset;\n\ + vec res = select(simpleVersion, clampVersion, needsClamp);\n\ + return select(res, vec(0), isWidthZero);\n\ +}; "); + } + } + psContext->AddIndentation(); + + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + AddAssignToDest(&psInst->asOperands[0], isUBFE ? SVT_UINT : SVT_INT, psInst->asOperands[0].GetNumSwizzleElements(), &numParenthesis); + bcatcstr(glsl, "bitFieldExtract"); + bcatcstr(glsl, isUBFE ? "U" : "I"); + bcatcstr(glsl, "("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_UNSIGNED_INTEGER, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[3], isUBFE ? TO_FLAG_UNSIGNED_INTEGER : TO_FLAG_INTEGER, destMask); + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_RCP: + { + const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); + const uint32_t srcElemCount = psInst->asOperands[1].GetNumSwizzleElements(); + int numParenthesis = 0; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//RCP\n"); +#endif + psContext->AddIndentation(); + + SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); + SHADER_VARIABLE_TYPE srcType = psInst->asOperands[1].GetDataType(psContext); + + uint32_t typeFlags = TO_FLAG_NONE; + if (dstType == SVT_FLOAT16 && srcType == SVT_FLOAT16) + { + typeFlags = TO_FLAG_FORCE_HALF; + } + else + srcType = SVT_FLOAT; + + AddAssignToDest(&psInst->asOperands[0], srcType, srcElemCount, &numParenthesis); + bcatcstr(glsl, GetConstructorForTypeMetal(srcType, destElemCount)); + bcatcstr(glsl, "(1.0) / "); + bcatcstr(glsl, GetConstructorForTypeMetal(srcType, destElemCount)); + bcatcstr(glsl, "("); + numParenthesis++; + glsl << TranslateOperand(&psInst->asOperands[1], typeFlags, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_F32TOF16: + { + // TODO Metallize + ASSERT(0); // Are these even used? + const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); + const uint32_t s0ElemCount = psInst->asOperands[1].GetNumSwizzleElements(); + uint32_t destElem; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//F32TOF16\n"); +#endif + for (destElem = 0; destElem < destElemCount; ++destElem) + { + const char* swizzle[] = { ".x", ".y", ".z", ".w" }; + + //unpackHalf2x16 converts two f16s packed into uint to two f32s. + + //dest.swiz.x = unpackHalf2x16(src.swiz.x).x + //dest.swiz.y = unpackHalf2x16(src.swiz.y).x + //dest.swiz.z = unpackHalf2x16(src.swiz.z).x + //dest.swiz.w = unpackHalf2x16(src.swiz.w).x + + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + if (destElemCount > 1) + bcatcstr(glsl, swizzle[destElem]); + + bcatcstr(glsl, " = unpackHalf2x16("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); + if (s0ElemCount > 1) + bcatcstr(glsl, swizzle[destElem]); + bcatcstr(glsl, ").x;\n"); + + } + break; + } + case OPCODE_F16TOF32: + { + // TODO metallize + ASSERT(0); // Are these even used? + const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); + const uint32_t s0ElemCount = psInst->asOperands[1].GetNumSwizzleElements(); + uint32_t destElem; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//F16TOF32\n"); +#endif + for (destElem = 0; destElem < destElemCount; ++destElem) + { + const char* swizzle[] = { ".x", ".y", ".z", ".w" }; + + //packHalf2x16 converts two f32s to two f16s packed into a uint. + + //dest.swiz.x = packHalf2x16(vec2(src.swiz.x)) & 0xFFFF + //dest.swiz.y = packHalf2x16(vec2(src.swiz.y)) & 0xFFFF + //dest.swiz.z = packHalf2x16(vec2(src.swiz.z)) & 0xFFFF + //dest.swiz.w = packHalf2x16(vec2(src.swiz.w)) & 0xFFFF + + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_UNSIGNED_INTEGER); + if (destElemCount > 1) + bcatcstr(glsl, swizzle[destElem]); + + bcatcstr(glsl, " = packHalf2x16(vec2("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); + if (s0ElemCount > 1) + bcatcstr(glsl, swizzle[destElem]); + bcatcstr(glsl, ")) & 0xFFFF;\n"); + + } + break; + } + case OPCODE_INEG: + { + int numParenthesis = 0; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//INEG\n"); +#endif + //dest = 0 - src0 + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); + + bcatcstr(glsl, "0 - "); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_DERIV_RTX_COARSE: + case OPCODE_DERIV_RTX_FINE: + case OPCODE_DERIV_RTX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DERIV_RTX\n"); +#endif + CallHelper1("dfdx", psInst, 0, 1, 1); + break; + } + case OPCODE_DERIV_RTY_COARSE: + case OPCODE_DERIV_RTY_FINE: + case OPCODE_DERIV_RTY: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DERIV_RTY\n"); +#endif + CallHelper1("dfdy", psInst, 0, 1, 1); + break; + } + case OPCODE_LRP: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LRP\n"); +#endif + CallHelper3("mix", psInst, 0, 2, 3, 1, 1); + break; + } + case OPCODE_DP2ADD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DP2ADD\n"); +#endif + psContext->AddIndentation(); + bool isFP16 = false; + if (psInst->asOperands[0].GetDataType(psContext) == SVT_FLOAT16 + && psInst->asOperands[1].GetDataType(psContext) == SVT_FLOAT16 + && psInst->asOperands[2].GetDataType(psContext) == SVT_FLOAT16 + && psInst->asOperands[2].GetDataType(psContext) == SVT_FLOAT16) + isFP16 = true; + int parenthesis = 0; + AddAssignToDest(&psInst->asOperands[0], isFP16 ? SVT_FLOAT16 : SVT_FLOAT, 2, &parenthesis); + + uint32_t flags = TO_AUTO_EXPAND_TO_VEC2; + flags |= isFP16 ? TO_FLAG_FORCE_HALF : TO_AUTO_BITCAST_TO_FLOAT; + + bcatcstr(glsl, "dot("); + glsl << TranslateOperand(&psInst->asOperands[1], flags); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], flags); + bcatcstr(glsl, ") + "); + glsl << TranslateOperand(&psInst->asOperands[3], flags); + AddAssignPrologue(parenthesis); + break; + } + case OPCODE_POW: + { + // TODO Check POW opcode whether it actually needs the abs +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//POW\n"); +#endif + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = powr(abs("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); + bcatcstr(glsl, "), "); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ");\n"); + break; + } + + case OPCODE_IMM_ATOMIC_ALLOC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_ALLOC\n"); +#endif + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); + bcatcstr(glsl, "atomic_fetch_add_explicit("); + glsl << ResourceName(RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber); + bcatcstr(glsl, "_counter, 1, memory_order::memory_order_relaxed)"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_IMM_ATOMIC_CONSUME: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_CONSUME\n"); +#endif + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); + bcatcstr(glsl, "atomic_fetch_sub_explicit("); + glsl << ResourceName(RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber); + // Metal atomic sub returns previous value. Therefore minus one here to get the correct data index. + bcatcstr(glsl, "_counter, 1, memory_order::memory_order_relaxed) - 1"); + AddAssignPrologue(numParenthesis); + break; + } + + case OPCODE_NOT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//INOT\n"); +#endif + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); + + bcatcstr(glsl, "~"); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_XOR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//XOR\n"); +#endif + + CallBinaryOp("^", psInst, 0, 1, 2, SVT_UINT); + break; + } + case OPCODE_RESINFO: + { + + uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t destElem; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//RESINFO\n"); +#endif + + for (destElem = 0; destElem < destElemCount; ++destElem) + { + GetResInfoData(psInst, psInst->asOperands[2].aui32Swizzle[destElem], destElem); + } + + break; + } + + case OPCODE_BUFINFO: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//BUFINFO\n"); +#endif + psContext->m_Reflection.OnDiagnostics("Metal shading language does not support buffer size query from shader. Pass the size to shader as const instead.\n", 0, false); // TODO: change this into error after modifying gfx-test 450 + break; + } + + case OPCODE_DMAX: + case OPCODE_DMIN: + case OPCODE_DMUL: + case OPCODE_DEQ: + case OPCODE_DGE: + case OPCODE_DLT: + case OPCODE_DNE: + case OPCODE_DMOV: + case OPCODE_DMOVC: + case OPCODE_DTOF: + case OPCODE_FTOD: + case OPCODE_DDIV: + case OPCODE_DFMA: + case OPCODE_DRCP: + case OPCODE_MSAD: + case OPCODE_DTOI: + case OPCODE_DTOU: + case OPCODE_ITOD: + case OPCODE_UTOD: + default: + { + ASSERT(0); + break; + } + } + + if (psInst->bSaturate) //Saturate is only for floating point data (float opcodes or MOV) + { + int dstCount = psInst->asOperands[0].GetNumSwizzleElements(); + psContext->AddIndentation(); + bool isFP16 = false; + if (psInst->asOperands[0].GetDataType(psContext) == SVT_FLOAT16) + isFP16 = true; + AddAssignToDest(&psInst->asOperands[0], isFP16 ? SVT_FLOAT16 : SVT_FLOAT, dstCount, &numParenthesis); + bcatcstr(glsl, "clamp("); + + glsl << TranslateOperand(&psInst->asOperands[0], isFP16 ? TO_FLAG_FORCE_HALF : TO_AUTO_BITCAST_TO_FLOAT); + if(isFP16) + bcatcstr(glsl, ", 0.0h, 1.0h)"); + else + bcatcstr(glsl, ", 0.0f, 1.0f)"); + AddAssignPrologue(numParenthesis); + } +} diff --git a/src/toMetalOperand.cpp b/src/toMetalOperand.cpp new file mode 100644 index 0000000..665d0a6 --- /dev/null +++ b/src/toMetalOperand.cpp @@ -0,0 +1,1120 @@ +#include +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "hlslcc.h" +#include "internal_includes/debug.h" +#include "internal_includes/Shader.h" +#include "internal_includes/toMetal.h" +#include +#include + +#include +#include + +using namespace HLSLcc; + +#ifdef _MSC_VER +#define snprintf _snprintf +#define fpcheck(x) (_isnan(x) || !_finite(x)) +#else +#define fpcheck(x) (std::isnan(x) || std::isinf(x)) +#endif + +// Returns nonzero if types are just different precisions of the same underlying type +static bool AreTypesCompatible(SHADER_VARIABLE_TYPE a, uint32_t ui32TOFlag) +{ + SHADER_VARIABLE_TYPE b = TypeFlagsToSVTType(ui32TOFlag); + + if (a == b) + return true; + + // Special case for array indices: both uint and int are fine + if ((ui32TOFlag & TO_FLAG_INTEGER) && (ui32TOFlag & TO_FLAG_UNSIGNED_INTEGER) && + (a == SVT_INT || a == SVT_INT16 || a == SVT_UINT || a == SVT_UINT16)) + return true; + + return false; +} + +std::string ToMetal::TranslateOperandSwizzle(const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase, bool includeDot /*= true*/) +{ + std::ostringstream oss; + uint32_t accessMask = ui32ComponentMask & psOperand->GetAccessMask(); + if(psOperand->eType == OPERAND_TYPE_INPUT) + { + int regSpace = psOperand->GetRegisterSpace(psContext); + // Skip swizzle for scalar inputs, but only if we haven't redirected them + if (regSpace == 0) + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return ""; + } + } + else + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return ""; + } + } + } + if (psOperand->eType == OPERAND_TYPE_OUTPUT) + { + int regSpace = psOperand->GetRegisterSpace(psContext); + // Skip swizzle for scalar outputs, but only if we haven't redirected them + if (regSpace == 0) + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarOutput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return ""; + } + } + else + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarOutput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return ""; + } + } + } + + if(psOperand->iWriteMaskEnabled && + psOperand->iNumComponents != 1) + { + //Component Mask + if(psOperand->eSelMode == OPERAND_4_COMPONENT_MASK_MODE) + { + uint32_t mask; + if (psOperand->ui32CompMask != 0) + mask = psOperand->ui32CompMask & ui32ComponentMask; + else + mask = ui32ComponentMask; + + if(mask != 0 && mask != OPERAND_4_COMPONENT_MASK_ALL) + { + if (includeDot) + oss << "."; + if(mask & OPERAND_4_COMPONENT_MASK_X) + { + ASSERT(iRebase == 0); + oss << "x"; + } + if(mask & OPERAND_4_COMPONENT_MASK_Y) + { + ASSERT(iRebase <= 1); + oss << "xy"[1 - iRebase]; + } + if(mask & OPERAND_4_COMPONENT_MASK_Z) + { + ASSERT(iRebase <= 2); + oss << "xyz"[2 - iRebase]; + } + if(mask & OPERAND_4_COMPONENT_MASK_W) + { + ASSERT(iRebase <= 3); + oss << "xyzw"[3 - iRebase]; + } + } + } + else + //Component Swizzle + if(psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + if (ui32ComponentMask != OPERAND_4_COMPONENT_MASK_ALL || + !(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_X && + psOperand->aui32Swizzle[1] == OPERAND_4_COMPONENT_Y && + psOperand->aui32Swizzle[2] == OPERAND_4_COMPONENT_Z && + psOperand->aui32Swizzle[3] == OPERAND_4_COMPONENT_W + ) + ) + { + uint32_t i; + + if (includeDot) + oss << "."; + + for (i = 0; i < 4; ++i) + { + if (!(ui32ComponentMask & (OPERAND_4_COMPONENT_MASK_X << i))) + continue; + + if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_X) + { + ASSERT(iRebase == 0); + oss << "x"; + } + else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Y) + { + ASSERT(iRebase <= 1); + oss << "xy"[1 - iRebase]; + } + else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Z) + { + ASSERT(iRebase <= 2); + oss << "xyz"[2 - iRebase]; + } + else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_W) + { + ASSERT(iRebase <= 3); + oss << "xyzw"[3 - iRebase]; + } + } + } + } + else + if(psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) // ui32ComponentMask is ignored in this case + { + if (includeDot) + oss << "."; + + if(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_X) + { + ASSERT(iRebase == 0); + oss << "x"; + } + else + if(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_Y) + { + ASSERT(iRebase <= 1); + oss << "xy"[1 - iRebase]; + } + else + if(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_Z) + { + ASSERT(iRebase <= 2); + oss << "xyz"[2 - iRebase]; + } + else + if(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_W) + { + ASSERT(iRebase <= 3); + oss << "xyzw"[3 - iRebase]; + } + } + } + return oss.str(); +} + +std::string ToMetal::TranslateOperandIndex(const Operand* psOperand, int index) +{ + int i = index; + std::ostringstream oss; + ASSERT(index < psOperand->iIndexDims); + + switch(psOperand->eIndexRep[i]) + { + case OPERAND_INDEX_IMMEDIATE32: + { + oss << "[" << psOperand->aui32ArraySizes[i] << "]"; + return oss.str(); + } + case OPERAND_INDEX_RELATIVE: + { + oss << "[" << TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_INTEGER) << "]"; + return oss.str(); + } + case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: + { + oss << "[" << TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_INTEGER) << " + "<< psOperand->aui32ArraySizes[i] <<"]"; + return oss.str(); + } + default: + { + ASSERT(0); + return ""; + break; + } + } +} + +/*static std::string GetBitcastOp(HLSLCrossCompilerContext *psContext, SHADER_VARIABLE_TYPE from, SHADER_VARIABLE_TYPE to, uint32_t numComponents) +{ + if (psContext->psShader->eTargetLanguage == LANG_METAL) + { + std::ostringstream oss; + oss << "as_type<"; + oss << GetConstructorForTypeMetal(to, numComponents); + oss << ">"; + return oss.str(); + } + else + { + if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_INT) + return "intBitsToFloat"; + else if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_UINT) + return "uintBitsToFloat"; + else if (to == SVT_INT && from == SVT_FLOAT) + return "floatBitsToInt"; + else if (to == SVT_UINT && from == SVT_FLOAT) + return "floatBitsToUint"; + } + + ASSERT(0); + return "ERROR missing components in GetBitcastOp()"; +}*/ + + +// Helper function to print floats with full precision +static std::string printFloat(float f) +{ + char temp[30]; + + snprintf(temp, 30, "%.9g", f); + char * ePos = strchr(temp, 'e'); + char * pointPos = strchr(temp, '.'); + + if (ePos == NULL && pointPos == NULL && !fpcheck(f)) + return std::string(temp) + ".0"; + else + return std::string(temp); +} + +// Helper function to print out a single 32-bit immediate value in desired format +static std::string printImmediate32(uint32_t value, SHADER_VARIABLE_TYPE eType) +{ + std::ostringstream oss; + int needsParenthesis = 0; + + // Print floats as bit patterns. + if ((eType == SVT_FLOAT || eType == SVT_FLOAT16 || eType == SVT_FLOAT10) && fpcheck(*((float *)(&value)))) + { + oss << "as_type("; + eType = SVT_INT; + needsParenthesis = 1; + } + + switch (eType) + { + default: + ASSERT(0); + case SVT_INT: + case SVT_INT16: + case SVT_INT12: + // Need special handling for anything >= uint 0x3fffffff + if (value > 0x3ffffffe) + oss << "int(0x" << std::hex << value << "u)"; + else + oss << "0x" << std::hex << value << ""; + break; + case SVT_UINT: + case SVT_UINT16: + oss << "0x" << std::hex << value << "u"; + break; + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + oss << printFloat(*((float *)(&value))); + break; + case SVT_BOOL: + if (value == 0) + oss << "false"; + else + oss << "true"; + } + if (needsParenthesis) + oss << ")"; + + return oss.str(); +} + +std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase) +{ + std::ostringstream oss; + int numParenthesis = 0; + int hasCtor = 0; + int needsBoolUpscale = 0; // If nonzero, bools need * 0xffffffff in them + SHADER_VARIABLE_TYPE requestedType = TypeFlagsToSVTType(ui32TOFlag); + SHADER_VARIABLE_TYPE eType = psOperand->GetDataType(psContext, requestedType); + int numComponents = psOperand->GetNumSwizzleElements(ui32CompMask); + int requestedComponents = 0; + int scalarWithSwizzle = 0; + + *pui32IgnoreSwizzle = 0; + + if (psOperand->eType == OPERAND_TYPE_TEMP) + { + // Check for scalar + if (psContext->psShader->GetTempComponentCount(eType, psOperand->ui32RegisterNumber) == 1 && psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + scalarWithSwizzle = 1; // Going to need a constructor + } + } + + if (psOperand->eType == OPERAND_TYPE_INPUT) + { + // Check for scalar + if (psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask() + && psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + scalarWithSwizzle = 1; + *pui32IgnoreSwizzle = 1; + } + } + + if (piRebase) + *piRebase = 0; + + if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC2) + requestedComponents = 2; + else if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC3) + requestedComponents = 3; + else if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC4) + requestedComponents = 4; + + requestedComponents = std::max(requestedComponents, numComponents); + + if (!(ui32TOFlag & (TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY | TO_FLAG_DECLARATION_NAME))) + { + if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE64) + { + // Mark the operand type to match whatever we're asking for in the flags. + ((Operand *)psOperand)->aeDataType[0] = requestedType; + ((Operand *)psOperand)->aeDataType[1] = requestedType; + ((Operand *)psOperand)->aeDataType[2] = requestedType; + ((Operand *)psOperand)->aeDataType[3] = requestedType; + } + + if (AreTypesCompatible(eType, ui32TOFlag) == 0) + { + if (CanDoDirectCast(eType, requestedType)) + { + oss << GetConstructorForType(psContext, requestedType, requestedComponents, false) << "("; + numParenthesis++; + hasCtor = 1; + if (eType == SVT_BOOL) + needsBoolUpscale = 1; + } + else + { + // Direct cast not possible, need to do bitcast. + oss << "as_type<"<< GetConstructorForTypeMetal(requestedType, requestedComponents) << ">("; + hasCtor = 1; + numParenthesis++; + } + } + + // Add ctor if needed (upscaling). Type conversion is already handled above, so here we must + // use the original type to not make type conflicts in bitcasts + if (((numComponents < requestedComponents)||(scalarWithSwizzle != 0)) && (hasCtor == 0)) + { + oss << GetConstructorForType(psContext, requestedType, requestedComponents, false) << "("; + + numParenthesis++; + hasCtor = 1; + } + } + + + switch(psOperand->eType) + { + case OPERAND_TYPE_IMMEDIATE32: + { + if(psOperand->iNumComponents == 1) + { + oss << printImmediate32(*((unsigned int*)(&psOperand->afImmediates[0])), requestedType); + } + else + { + int i; + int firstItemAdded = 0; + if (hasCtor == 0) + { + oss << GetConstructorForTypeMetal(requestedType, requestedComponents) << "("; + numParenthesis++; + hasCtor = 1; + } + for (i = 0; i < 4; i++) + { + uint32_t uval; + if (!(ui32CompMask & (1 << i))) + continue; + + if (firstItemAdded) + oss << ", "; + uval = *((uint32_t*)(&psOperand->afImmediates[i >= psOperand->iNumComponents ? psOperand->iNumComponents-1 : i])); + oss << printImmediate32(uval, requestedType); + firstItemAdded = 1; + } + oss << ")"; + *pui32IgnoreSwizzle = 1; + numParenthesis--; + } + break; + } + case OPERAND_TYPE_IMMEDIATE64: + { + ASSERT(0); // doubles not supported on Metal + break; + } + case OPERAND_TYPE_INPUT: + { + int regSpace = psOperand->GetRegisterSpace(psContext); + switch(psOperand->iIndexDims) + { + case INDEX_2D: + { + const ShaderInfo::InOutSignature *psSig = NULL; + psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); + if ((psSig->eSystemValueType == NAME_POSITION && psSig->ui32SemanticIndex == 0) || + (psSig->semanticName == "POS" && psSig->ui32SemanticIndex == 0) || + (psSig->semanticName == "SV_POSITION" && psSig->ui32SemanticIndex == 0)) + { + // Shouldn't happen on Metal? + ASSERT(0); + break; +// bcatcstr(glsl, "gl_in"); +// TranslateOperandIndex(psOperand, 0);//Vertex index +// bcatcstr(glsl, ".gl_Position"); + } + else + { + oss << psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); + + oss << TranslateOperandIndex(psOperand, 0);//Vertex index + } + break; + } + default: + { + if(psOperand->eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE) + { + ASSERT(psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0); + oss << "phase" << psContext->currentPhase << "_Input" << regSpace << "_" << psOperand->ui32RegisterNumber << "["; + oss << TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER); + oss << "]"; + } + else + { + if(psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0) + { + const uint32_t parentIndex = psContext->psShader->aIndexedInputParents[regSpace][psOperand->ui32RegisterNumber]; + oss << "phase" << psContext->currentPhase << "_Input" << regSpace << "_" << parentIndex << "[" << (psOperand->ui32RegisterNumber - parentIndex) << "]"; + } + else + { + oss << psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); + } + } + break; + } + } + break; + } + case OPERAND_TYPE_OUTPUT: + case OPERAND_TYPE_OUTPUT_DEPTH: + case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: + case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: + { + + int stream = 0; + oss << psContext->GetDeclaredOutputName(psOperand, &stream, pui32IgnoreSwizzle, piRebase, 0); + if (psOperand->m_SubOperands[0].get()) + { + oss << "["; + oss << TranslateOperand(psOperand->m_SubOperands[0].get(), TO_AUTO_BITCAST_TO_INT); + oss << "]"; + } + break; + } + case OPERAND_TYPE_TEMP: + { + SHADER_VARIABLE_TYPE eTempType = psOperand->GetDataType(psContext); + oss << HLSLCC_TEMP_PREFIX; + ASSERT(psOperand->ui32RegisterNumber < 0x10000); // Sanity check after temp splitting. + switch (eTempType) + { + case SVT_FLOAT: + ASSERT(psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] != 0); + if (psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_FLOAT16: + ASSERT(psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("16_"); + if (psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_FLOAT10: + ASSERT(psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("10_"); + if (psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_INT: + ASSERT(psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("i"); + if (psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_INT16: + ASSERT(psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("i16_"); + if (psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_INT12: + ASSERT(psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("i12_"); + if (psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_UINT: + ASSERT(psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("u"); + if (psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_UINT16: + ASSERT(psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("u16_"); + if (psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_DOUBLE: + ASSERT(psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("d"); + if (psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_BOOL: + ASSERT(psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("b"); + if (psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + default: + ASSERT(0 && "Should never get here!"); + } + oss << psOperand->ui32RegisterNumber; + break; + } + case OPERAND_TYPE_SPECIAL_IMMCONSTINT: + case OPERAND_TYPE_SPECIAL_IMMCONST: + case OPERAND_TYPE_SPECIAL_OUTBASECOLOUR: + case OPERAND_TYPE_SPECIAL_OUTOFFSETCOLOUR: + case OPERAND_TYPE_SPECIAL_FOG: + case OPERAND_TYPE_SPECIAL_ADDRESS: + case OPERAND_TYPE_SPECIAL_LOOPCOUNTER: + case OPERAND_TYPE_SPECIAL_TEXCOORD: + { + ASSERT(0 && "DX9 shaders no longer supported!"); + break; + } + case OPERAND_TYPE_SPECIAL_POSITION: + { + ASSERT(0 && "TODO normal shader support"); +// bcatcstr(glsl, "gl_Position"); + break; + } + case OPERAND_TYPE_SPECIAL_POINTSIZE: + { + ASSERT(0 && "TODO normal shader support"); + // bcatcstr(glsl, "gl_PointSize"); + break; + } + case OPERAND_TYPE_CONSTANT_BUFFER: + { + const ConstantBuffer* psCBuf = NULL; + const ShaderVarType* psVarType = NULL; + int32_t index = -1; + std::vector arrayIndices; + bool isArray; + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); + ASSERT(psCBuf != NULL); + + if(ui32TOFlag & TO_FLAG_DECLARATION_NAME) + { + pui32IgnoreSwizzle[0] = 1; + } + std::string cbName = ""; + if(psCBuf) + { + //$Globals. + if(psCBuf->name[0] == '$') + { + cbName = "Globals"; + } + else + { + cbName = psCBuf->name; + } + cbName += "."; + } + + if((ui32TOFlag & TO_FLAG_DECLARATION_NAME) != TO_FLAG_DECLARATION_NAME) + { + //Work out the variable name. Don't apply swizzle to that variable yet. + int32_t rebase = 0; + + if(psCBuf) + { + uint32_t componentsNeeded = 1; + if (psOperand->eSelMode != OPERAND_4_COMPONENT_SELECT_1_MODE) + { + uint32_t minSwiz = 3; + uint32_t maxSwiz = 0; + int i; + for (i = 0; i < 4; i++) + { + if ((ui32CompMask & (1 << i)) == 0) + continue; + minSwiz = std::min(minSwiz, psOperand->aui32Swizzle[i]); + maxSwiz = std::max(maxSwiz, psOperand->aui32Swizzle[i]); + } + componentsNeeded = maxSwiz - minSwiz + 1; + } + + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags); + if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || (componentsNeeded <= psVarType->Columns)) + { + // Simple case: just access one component + std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices); + + if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS))) + { + // We'll need to add the prefix only to the last section of the name + size_t commaPos = fullName.find_last_of('.'); + char prefix[256]; + sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, psVarType->Rows, psVarType->Columns); + if (commaPos == std::string::npos) + fullName.insert(0, prefix); + else + fullName.insert(commaPos + 1, prefix); + } + + oss << cbName << fullName; + } + else + { + // Non-simple case: build vec4 and apply mask + uint32_t i; + int32_t tmpRebase; + std::vector tmpArrayIndices; + bool tmpIsArray; + int firstItemAdded = 0; + + oss << GetConstructorForTypeMetal(psVarType->Type, GetNumberBitsSet(ui32CompMask)) << "("; + for (i = 0; i < 4; i++) + { + const ShaderVarType *tmpVarType = NULL; + if ((ui32CompMask & (1 << i)) == 0) + continue; + tmpRebase = 0; + if (firstItemAdded != 0) + oss << ", "; + else + firstItemAdded = 1; + + uint32_t tmpSwizzle[4] = { 0 }; + std::copy(&psOperand->aui32Swizzle[i], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); + + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags); + std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices); + + if (tmpVarType->Class == SVC_SCALAR) + { + oss << cbName << fullName; + } + else + { + uint32_t swizzle; + tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3 + swizzle = psOperand->aui32Swizzle[i] - tmpRebase; + + oss << cbName << fullName << "." << ("xyzw"[swizzle]); + } + } + oss << ")"; + // Clear rebase, we've already done it. + rebase = 0; + // Also swizzle. + *pui32IgnoreSwizzle = 1; + } + } + else // We don't have a semantic for this variable, so try the raw dump appoach. + { + ASSERT(0); // We're screwed. +// bformata(glsl, "cb%d.data", psOperand->aui32ArraySizes[0]);// +// index = psOperand->aui32ArraySizes[1]; + } + + if (isArray) + index = arrayIndices.back(); + + //Dx9 only? + if (psOperand->m_SubOperands[0].get() != NULL) + { + // Array of matrices is treated as array of vec4s in HLSL, + // but that would mess up uniform types in GLSL. Do gymnastics. + uint32_t opFlags = TO_FLAG_INTEGER; + + if ((psVarType->Class == SVC_MATRIX_COLUMNS || psVarType->Class == SVC_MATRIX_ROWS) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) + { + // Special handling for matrix arrays + oss << "[(" << TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags) << ") / 4]"; + oss << "[((" << TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags, OPERAND_4_COMPONENT_MASK_X) << ") % 4)]"; + } + else + { + oss << "[" << TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags) << "]"; + } + } + else + if (index != -1 && psOperand->m_SubOperands[1].get() != NULL) + { + // Array of matrices is treated as array of vec4s in HLSL, + // but that would mess up uniform types in GLSL. Do gymnastics. + SHADER_VARIABLE_TYPE eType = psOperand->m_SubOperands[1].get()->GetDataType(psContext); + uint32_t opFlags = TO_FLAG_INTEGER; + if (eType != SVT_INT && eType != SVT_UINT) + opFlags = TO_AUTO_BITCAST_TO_INT; + + if ((psVarType->Class == SVC_MATRIX_COLUMNS || psVarType->Class == SVC_MATRIX_ROWS) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) + { + // Special handling for matrix arrays + oss << "[(" << TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags) << " + " << index <<") / 4]"; + oss << "[((" << TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags, OPERAND_4_COMPONENT_MASK_X) << " + " << index << ") % 4)]"; + } + else + { + if (index != 0) + oss << "[" << TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags) << " + " << index << "]"; + else + oss << "[" << TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags) << "]"; + } + } + else if (index != -1) + { + if ((psVarType->Class == SVC_MATRIX_COLUMNS || psVarType->Class == SVC_MATRIX_ROWS) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) + { + // Special handling for matrix arrays, open them up into vec4's + size_t matidx = index / 4; + size_t rowidx = index - (matidx * 4); + oss << "[" << matidx << "][" << rowidx << "]"; + } + else + { + oss << "[" << index << "]"; + } + } + else if (psOperand->m_SubOperands[1].get() != NULL) + { + oss << "[" << TranslateOperand(psOperand->m_SubOperands[1].get(), TO_FLAG_INTEGER) << "]"; + } + + if(psVarType && psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle) + { + switch(rebase) + { + case 4: + { + if(psVarType->Columns == 2) + { + //.x(GLSL) is .y(HLSL). .y(GLSL) is .z(HLSL) + oss << ".xxyx"; + } + else if(psVarType->Columns == 3) + { + //.x(GLSL) is .y(HLSL). .y(GLSL) is .z(HLSL) .z(GLSL) is .w(HLSL) + oss << ".xxyz"; + } + break; + } + case 8: + { + if(psVarType->Columns == 2) + { + //.x(GLSL) is .z(HLSL). .y(GLSL) is .w(HLSL) + oss << ".xxxy"; + } + break; + } + case 0: + default: + { + //No rebase, but extend to vec4. + if(psVarType->Columns == 2) + { + oss << ".xyxx"; + } + else if(psVarType->Columns == 3) + { + oss << ".xyzx"; + } + break; + } + + } + } + + if(psVarType && psVarType->Class == SVC_SCALAR) + { + *pui32IgnoreSwizzle = 1; + + // CB arrays are all declared as 4-component vectors to match DX11 data layout. + // Therefore add swizzle here to access the element corresponding to the scalar var. + if ((psVarType->Elements > 0) && (psContext->psShader->eShaderType == COMPUTE_SHADER)) + { + oss << ".x"; + } + } + } + break; + } + case OPERAND_TYPE_RESOURCE: + { + oss << ResourceName(RGROUP_TEXTURE, psOperand->ui32RegisterNumber); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_SAMPLER: + { + oss << ResourceName(RGROUP_SAMPLER, psOperand->ui32RegisterNumber); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_FUNCTION_BODY: + { + ASSERT(0); + break; + } + case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: + case OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID: + { + // Not supported on Metal + ASSERT(0); + break; + } + case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: + { + oss << "ImmCB_" << psContext->currentPhase + << "_" << psOperand->ui32RegisterNumber + << "_" << psOperand->m_Rebase; + if (psOperand->m_SubOperands[0].get()) + { + //Indexes must be integral. Offset is already taken care of above. + oss << "[" << TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER) << "]"; + } + if (psOperand->m_Size == 1) + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_INPUT_DOMAIN_POINT: + { + // Not supported on Metal + ASSERT(0); + break; + } + case OPERAND_TYPE_INPUT_CONTROL_POINT: + { + // Not supported on Metal + ASSERT(0); + break; + } + case OPERAND_TYPE_NULL: + { + // Null register, used to discard results of operations + oss << "//null"; + break; + } + case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: + { + // Not supported on Metal + ASSERT(0); + break; + } + case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + { + oss << "mtl_CoverageMask"; + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_INPUT_COVERAGE_MASK: + { + oss << "mtl_CoverageMask"; + //Skip swizzle on scalar types. + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_INPUT_THREAD_ID://SV_DispatchThreadID + { + oss << "mtl_ThreadID"; + break; + } + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP://SV_GroupThreadID + { + oss << "mtl_ThreadIDInGroup"; + break; + } + case OPERAND_TYPE_INPUT_THREAD_GROUP_ID://SV_GroupID + { + oss << "mtl_ThreadGroupID"; + break; + } + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED://SV_GroupIndex + { + oss << "mtl_ThreadIndexInThreadGroup"; + *pui32IgnoreSwizzle = 1; // No swizzle meaningful for scalar. + break; + } + case OPERAND_TYPE_UNORDERED_ACCESS_VIEW: + { + oss << ResourceName(RGROUP_UAV, psOperand->ui32RegisterNumber); + break; + } + case OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY: + { + oss << "TGSM" << psOperand->ui32RegisterNumber; + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_INPUT_PRIMITIVEID: + { + // Not supported on Metal + ASSERT(0); + break; + } + case OPERAND_TYPE_INDEXABLE_TEMP: + { + oss << "TempArray" << psOperand->aui32ArraySizes[0] << "["; + if (psOperand->aui32ArraySizes[1] != 0 || !psOperand->m_SubOperands[1].get()) + oss << psOperand->aui32ArraySizes[1]; + + if(psOperand->m_SubOperands[1].get()) + { + if (psOperand->aui32ArraySizes[1] != 0) + oss << "+"; + oss << TranslateOperand(psOperand->m_SubOperands[1].get(), TO_FLAG_INTEGER); + + } + oss << "]"; + break; + } + case OPERAND_TYPE_STREAM: + { + // Not supported on Metal + ASSERT(0); + break; + } + case OPERAND_TYPE_INPUT_GS_INSTANCE_ID: + { + // Not supported on Metal + ASSERT(0); + break; + } + case OPERAND_TYPE_THIS_POINTER: + { + ASSERT(0); // Nope. + break; + } + case OPERAND_TYPE_INPUT_PATCH_CONSTANT: + { + // Not supported on Metal + ASSERT(0); + + break; + } + default: + { + ASSERT(0); + break; + } + } + + if (hasCtor && (*pui32IgnoreSwizzle == 0)) + { + oss << TranslateOperandSwizzle(psOperand, ui32CompMask, piRebase ? *piRebase : 0); + *pui32IgnoreSwizzle = 1; + } + + if (needsBoolUpscale) + { + if (requestedType == SVT_UINT || requestedType == SVT_UINT16 || requestedType == SVT_UINT8) + oss << ") * 0xffffffffu"; + else + oss << ") * int(0xffffffffu)"; + numParenthesis--; + } + + while (numParenthesis != 0) + { + oss << ")"; + numParenthesis--; + } + return oss.str(); +} + +std::string ToMetal::TranslateOperand(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask) +{ + std::ostringstream oss; + uint32_t ui32IgnoreSwizzle = 0; + int iRebase = 0; + + // in single-component mode there is no need to use mask + if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL; + + if(ui32TOFlag & TO_FLAG_NAME_ONLY) + { + return TranslateVariableName(psOperand, ui32TOFlag, &ui32IgnoreSwizzle, OPERAND_4_COMPONENT_MASK_ALL, &iRebase); + } + + switch (psOperand->eModifier) + { + case OPERAND_MODIFIER_NONE: + { + break; + } + case OPERAND_MODIFIER_NEG: + { + oss << ("(-"); + break; + } + case OPERAND_MODIFIER_ABS: + { + oss << ("abs("); + break; + } + case OPERAND_MODIFIER_ABSNEG: + { + oss << ("-abs("); + break; + } + } + + oss << TranslateVariableName(psOperand, ui32TOFlag, &ui32IgnoreSwizzle, ui32ComponentMask, &iRebase); + + if (!ui32IgnoreSwizzle) + { + oss << TranslateOperandSwizzle(psOperand, ui32ComponentMask, iRebase); + } + + switch (psOperand->eModifier) + { + case OPERAND_MODIFIER_NONE: + { + break; + } + case OPERAND_MODIFIER_NEG: + { + oss << (")"); + break; + } + case OPERAND_MODIFIER_ABS: + { + oss << (")"); + break; + } + case OPERAND_MODIFIER_ABSNEG: + { + oss << (")"); + break; + } + } + return oss.str(); +}