commit 75471fbb985e66a396e8d8ec98187a05a2900d37 Author: Hans-Kristian Arntzen Date: Wed Mar 2 18:09:16 2016 +0100 Initial commit. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..48724f4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +*.o +*.d +*.txt +/test +/spir2cross +*.spv +/obj +/msvc/x64 +/msvc/Debug +/msvc/Release +*.suo +*.sdf +*.opensdf diff --git a/GLSL.std.450.h b/GLSL.std.450.h new file mode 100644 index 0000000..df31092 --- /dev/null +++ b/GLSL.std.450.h @@ -0,0 +1,131 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLstd450_H +#define GLSLstd450_H + +static const int GLSLstd450Version = 100; +static const int GLSLstd450Revision = 1; + +enum GLSLstd450 { + GLSLstd450Bad = 0, // Don't use + + GLSLstd450Round = 1, + GLSLstd450RoundEven = 2, + GLSLstd450Trunc = 3, + GLSLstd450FAbs = 4, + GLSLstd450SAbs = 5, + GLSLstd450FSign = 6, + GLSLstd450SSign = 7, + GLSLstd450Floor = 8, + GLSLstd450Ceil = 9, + GLSLstd450Fract = 10, + + GLSLstd450Radians = 11, + GLSLstd450Degrees = 12, + GLSLstd450Sin = 13, + GLSLstd450Cos = 14, + GLSLstd450Tan = 15, + GLSLstd450Asin = 16, + GLSLstd450Acos = 17, + GLSLstd450Atan = 18, + GLSLstd450Sinh = 19, + GLSLstd450Cosh = 20, + GLSLstd450Tanh = 21, + GLSLstd450Asinh = 22, + GLSLstd450Acosh = 23, + GLSLstd450Atanh = 24, + GLSLstd450Atan2 = 25, + + GLSLstd450Pow = 26, + GLSLstd450Exp = 27, + GLSLstd450Log = 28, + GLSLstd450Exp2 = 29, + GLSLstd450Log2 = 30, + GLSLstd450Sqrt = 31, + GLSLstd450InverseSqrt = 32, + + GLSLstd450Determinant = 33, + GLSLstd450MatrixInverse = 34, + + GLSLstd450Modf = 35, // second operand needs an OpVariable to write to + GLSLstd450ModfStruct = 36, // no OpVariable operand + GLSLstd450FMin = 37, + GLSLstd450UMin = 38, + GLSLstd450SMin = 39, + GLSLstd450FMax = 40, + GLSLstd450UMax = 41, + GLSLstd450SMax = 42, + GLSLstd450FClamp = 43, + GLSLstd450UClamp = 44, + GLSLstd450SClamp = 45, + GLSLstd450FMix = 46, + GLSLstd450IMix = 47, // Reserved + GLSLstd450Step = 48, + GLSLstd450SmoothStep = 49, + + GLSLstd450Fma = 50, + GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 52, // no OpVariable operand + GLSLstd450Ldexp = 53, + + GLSLstd450PackSnorm4x8 = 54, + GLSLstd450PackUnorm4x8 = 55, + GLSLstd450PackSnorm2x16 = 56, + GLSLstd450PackUnorm2x16 = 57, + GLSLstd450PackHalf2x16 = 58, + GLSLstd450PackDouble2x32 = 59, + GLSLstd450UnpackSnorm2x16 = 60, + GLSLstd450UnpackUnorm2x16 = 61, + GLSLstd450UnpackHalf2x16 = 62, + GLSLstd450UnpackSnorm4x8 = 63, + GLSLstd450UnpackUnorm4x8 = 64, + GLSLstd450UnpackDouble2x32 = 65, + + GLSLstd450Length = 66, + GLSLstd450Distance = 67, + GLSLstd450Cross = 68, + GLSLstd450Normalize = 69, + GLSLstd450FaceForward = 70, + GLSLstd450Reflect = 71, + GLSLstd450Refract = 72, + + GLSLstd450FindILsb = 73, + GLSLstd450FindSMsb = 74, + GLSLstd450FindUMsb = 75, + + GLSLstd450InterpolateAtCentroid = 76, + GLSLstd450InterpolateAtSample = 77, + GLSLstd450InterpolateAtOffset = 78, + + GLSLstd450NMin = 79, + GLSLstd450NMax = 80, + GLSLstd450NClamp = 81, + + GLSLstd450Count +}; + +#endif // #ifndef GLSLstd450_H diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5ffab47 --- /dev/null +++ b/Makefile @@ -0,0 +1,27 @@ +TARGET := spir2cross +SOURCES := $(wildcard *.cpp) +OBJECTS := $(SOURCES:.cpp=.o) +DEPS := $(OBJECTS:.o=.d) + +CXXFLAGS += -std=c++11 -Wall -Wextra + +ifeq ($(DEBUG), 1) + CXXFLAGS += -O0 -gdwarf-2 +else + CXXFLAGS += -O2 -gdwarf-2 +endif + +all: $(TARGET) + +-include $(DEPS) + +$(TARGET): $(OBJECTS) + $(CXX) -o $@ $(OBJECTS) $(LDFLAGS) + +%.o: %.cpp + $(CXX) -c -o $@ $< $(CXXFLAGS) -MMD + +clean: + rm -f $(TARGET) $(OBJECTS) + +.PHONY: clean diff --git a/README.md b/README.md new file mode 100644 index 0000000..4441dd4 --- /dev/null +++ b/README.md @@ -0,0 +1,75 @@ +# SPIR2CROSS + +SPIR2CROSS is a tool designed for parsing and converting SPIR-V to other shader languages. + +## Features + + - Convert SPIR-V to readable, usable and efficient GLSL + - Convert SPIR-V to debuggable C++ [EXPERIMENTAL] + - Reflection API to simplify the creation of Vulkan pipeline layouts + - Reflection API to modify and tweak OpDecorations + - Supports "all" of vertex, fragment, tessellation, geometry and compute shaders. + +SPIR2CROSS tries hard to emit readable and clean output from the SPIR-V. +The goal is to emit GLSL that looks like it was written by a human and not awkward IR/assembly-like code. + +NOTE: Individual features are expected to be mostly complete, but it is possible that certain obscure GLSL features are not yet supported. +However, most missing features are expected to be "trivial" improvements at this stage. + +Occasionally, missing features is due to glslangValidator's lack of proper support for that feature making testing hard. + +## Building + +SPIR2CROSS has been tested on Linux, OSX and Windows. + +### Linux and OSX + +Just run `make` on the command line. A recent GCC (4.8+) or Clang (3.x+) compiler is required as SPIR2CROSS uses C++11 extensively. + +### Windows + +MinGW-w64 based compilation works with `make`, and an MSVC 2013 solution is also included. + +## Usage + +### Creating a SPIR-V file from GLSL with glslang + +``` +glslangValidator -H -V -o test.spv test.frag +``` + +### Converting a SPIR-V file to GLSL ES + +``` +glslangValidator -H -V -o test.spv shaders/comp/basic.comp +./spir2cross --version 310 --es test.spv +``` + +#### Converting to desktop GLSL + +``` +glslangValidator -H -V -o test.spv shaders/comp/basic.comp +./spir2cross --version 330 test.spv --output test.comp +``` + +#### Disable prettifying optimizations + +``` +glslangValidator -H -V -o test.spv shaders/comp/basic.comp +./spir2cross --version 310 --es test.spv --output test.comp --force-temporary +``` + +## ABI concerns + +### SPIR-V headers + +The current repository uses the latest SPIR-V and GLSL.std.450 headers. +SPIR-V files created from older headers could have ABI issues. + +## Regression testing + +In shaders/ a collection of shaders are maintained for purposes of regression testing. +The current reference output is contained in reference/. +`./test_shaders.py shaders` can be run to perform regression testing. +Currently, the Mali Offline Compiler `malisc` is used to verify the outputs from SPIR2CROSS. + diff --git a/include/spir2cross/barrier.hpp b/include/spir2cross/barrier.hpp new file mode 100644 index 0000000..3685250 --- /dev/null +++ b/include/spir2cross/barrier.hpp @@ -0,0 +1,79 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIR2CROSS_BARRIER_HPP +#define SPIR2CROSS_BARRIER_HPP + +#include +#include + +namespace spir2cross +{ + class Barrier + { + public: + Barrier() + { + count.store(0); + iteration.store(0); + } + + void set_release_divisor(unsigned divisor) + { + this->divisor = divisor; + } + + static inline void memoryBarrier() + { + std::atomic_thread_fence(std::memory_order_seq_cst); + } + + void reset_counter() + { + count.store(0); + iteration.store(0); + } + + void wait() + { + unsigned target_iteration = iteration.load(std::memory_order_relaxed) + 1; + // Overflows cleanly. + unsigned target_count = divisor * target_iteration; + + // Barriers don't enforce memory ordering. + // Be as relaxed about the barrier as we possibly can! + unsigned c = count.fetch_add(1u, std::memory_order_relaxed); + + if (c + 1 == target_count) + { + iteration.store(target_iteration, std::memory_order_relaxed); + } + else + { + // If we have more threads than the CPU, don't hog the CPU for very long periods of time. + while (iteration.load(std::memory_order_relaxed) != target_iteration) + std::this_thread::yield(); + } + } + + private: + unsigned divisor = 1; + std::atomic count; + std::atomic iteration; + }; +} + +#endif diff --git a/include/spir2cross/external_interface.h b/include/spir2cross/external_interface.h new file mode 100644 index 0000000..96172fe --- /dev/null +++ b/include/spir2cross/external_interface.h @@ -0,0 +1,137 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIR2CROSS_EXTERNAL_INTERFACE_H +#define SPIR2CROSS_EXTERNAL_INTERFACE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef struct spir2cross_shader spir2cross_shader_t; + +struct spir2cross_interface +{ + spir2cross_shader_t* (*construct)(void); + void (*destruct)(spir2cross_shader_t *thiz); + void (*invoke)(spir2cross_shader_t *thiz); +}; + +void spir2cross_set_stage_input(spir2cross_shader_t *thiz, + unsigned location, void *data, size_t size); + +void spir2cross_set_stage_output(spir2cross_shader_t *thiz, + unsigned location, void *data, size_t size); + +void spir2cross_set_push_constant(spir2cross_shader_t *thiz, + void *data, size_t size); + +void spir2cross_set_uniform_constant(spir2cross_shader_t *thiz, + unsigned location, + void *data, size_t size); + +void spir2cross_set_resource(spir2cross_shader_t *thiz, + unsigned set, unsigned binding, + void **data, size_t size); + +const struct spir2cross_interface* spir2cross_get_interface(void); + +typedef enum spir2cross_builtin +{ + SPIR2CROSS_BUILTIN_POSITION = 0, + SPIR2CROSS_BUILTIN_FRAG_COORD = 1, + SPIR2CROSS_BUILTIN_WORK_GROUP_ID = 2, + SPIR2CROSS_BUILTIN_NUM_WORK_GROUPS = 3, + SPIR2CROSS_NUM_BUILTINS +} spir2cross_builtin; + +void spir2cross_set_builtin(spir2cross_shader_t *thiz, + spir2cross_builtin builtin, + void *data, size_t size); + +#define SPIR2CROSS_NUM_DESCRIPTOR_SETS 4 +#define SPIR2CROSS_NUM_DESCRIPTOR_BINDINGS 16 +#define SPIR2CROSS_NUM_STAGE_INPUTS 16 +#define SPIR2CROSS_NUM_STAGE_OUTPUTS 16 +#define SPIR2CROSS_NUM_UNIFORM_CONSTANTS 32 + +enum spir2cross_format +{ + SPIR2CROSS_FORMAT_R8_UNORM = 0, + SPIR2CROSS_FORMAT_R8G8_UNORM = 1, + SPIR2CROSS_FORMAT_R8G8B8_UNORM = 2, + SPIR2CROSS_FORMAT_R8G8B8A8_UNORM = 3, + + SPIR2CROSS_NUM_FORMATS +}; + +enum spir2cross_wrap +{ + SPIR2CROSS_WRAP_CLAMP_TO_EDGE = 0, + SPIR2CROSS_WRAP_REPEAT = 1, + + SPIR2CROSS_NUM_WRAP +}; + +enum spir2cross_filter +{ + SPIR2CROSS_FILTER_NEAREST = 0, + SPIR2CROSS_FILTER_LINEAR = 1, + + SPIR2CROSS_NUM_FILTER +}; + +enum spir2cross_mipfilter +{ + SPIR2CROSS_MIPFILTER_BASE = 0, + SPIR2CROSS_MIPFILTER_NEAREST = 1, + SPIR2CROSS_MIPFILTER_LINEAR = 2, + + SPIR2CROSS_NUM_MIPFILTER +}; + +struct spir2cross_miplevel +{ + const void *data; + unsigned width, height; + size_t stride; +}; + +struct spir2cross_sampler_info +{ + const struct spir2cross_miplevel *mipmaps; + unsigned num_mipmaps; + + enum spir2cross_format format; + enum spir2cross_wrap wrap_s; + enum spir2cross_wrap wrap_t; + enum spir2cross_filter min_filter; + enum spir2cross_filter mag_filter; + enum spir2cross_mipfilter mip_filter; +}; + +typedef struct spir2cross_sampler_2d spir2cross_sampler_2d_t; +spir2cross_sampler_2d_t *spir2cross_create_sampler_2d(const struct spir2cross_sampler_info *info); +void spir2cross_destroy_sampler_2d(spir2cross_sampler_2d_t *samp); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/spir2cross/image.hpp b/include/spir2cross/image.hpp new file mode 100644 index 0000000..3388069 --- /dev/null +++ b/include/spir2cross/image.hpp @@ -0,0 +1,45 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIR2CROSS_IMAGE_HPP +#define SPIR2CROSS_IMAGE_HPP + +#ifndef GLM_SWIZZLE +#define GLM_SWIZZLE +#endif + +#ifndef GLM_FORCE_RADIANS +#define GLM_FORCE_RADIANS +#endif + +#include + +namespace spir2cross +{ + template + struct image2DBase + { + virtual ~image2DBase() = default; + inline virtual T load(glm::ivec2 coord) { return T(0, 0, 0, 1); } + inline virtual void store(glm::ivec2 coord, const T &v) {} + }; + + typedef image2DBase image2D; + typedef image2DBase iimage2D; + typedef image2DBase uimage2D; +} + +#endif diff --git a/include/spir2cross/internal_interface.hpp b/include/spir2cross/internal_interface.hpp new file mode 100644 index 0000000..68145cf --- /dev/null +++ b/include/spir2cross/internal_interface.hpp @@ -0,0 +1,531 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIR2CROSS_INTERNAL_INTERFACE_HPP +#define SPIR2CROSS_INTERNAL_INTERFACE_HPP + +// This file must only be included by the shader generated by spir2cross! + +#ifndef GLM_SWIZZLE +#define GLM_SWIZZLE +#endif + +#ifndef GLM_FORCE_RADIANS +#define GLM_FORCE_RADIANS +#endif + +#include + +#include +#include +#include "external_interface.h" +#include "barrier.hpp" +#include "thread_group.hpp" +#include "sampler.hpp" +#include "image.hpp" + +namespace internal +{ + // Adaptor helpers to adapt GLSL access chain syntax to C++. + // Don't bother with arrays of arrays on uniforms ... + // Would likely need horribly complex variadic template munging. + + template + struct Interface + { + enum { ArraySize = 1, Size = sizeof(T) }; + + Interface() : ptr(0) {} + T& get() + { + assert(ptr); + return *ptr; + } + + T *ptr; + }; + + // For array types, return a pointer instead. + template + struct Interface + { + enum { ArraySize = U, Size = U * sizeof(T) }; + + Interface() : ptr(0) {} + T* get() + { + assert(ptr); + return ptr; + } + + T *ptr; + }; + + // For case when array size is 1, avoid double dereference. + template + struct PointerInterface + { + enum { ArraySize = 1, Size = sizeof(T*) }; + enum { PreDereference = true }; + + PointerInterface() : ptr(0) {} + + T& get() + { + assert(ptr); + return *ptr; + } + + T *ptr; + }; + + // Automatically converts a pointer down to reference to match GLSL syntax. + template + struct DereferenceAdaptor + { + DereferenceAdaptor(T **ptr) : ptr(ptr) {} + T& operator[](unsigned index) const { return *(ptr[index]); } + T **ptr; + }; + + // We can't have a linear array of T* since T* can be an abstract type in case of samplers. + // We also need a list of pointers since we can have run-time length SSBOs. + template + struct PointerInterface + { + enum { ArraySize = U, Size = sizeof(T*) * U }; + enum { PreDereference = false }; + PointerInterface() : ptr(0) {} + + DereferenceAdaptor get() + { + assert(ptr); + return DereferenceAdaptor(ptr); + } + + T **ptr; + }; + + // Resources can be more abstract and be unsized, + // so we need to have an array of pointers for those cases. + template struct Resource : PointerInterface {}; + + // POD with no unknown sizes, so we can express these as flat arrays. + template struct UniformConstant : Interface {}; + template struct StageInput : Interface {}; + template struct StageOutput : Interface {}; + template struct PushConstant : Interface {}; +} + +struct spir2cross_shader +{ + struct PPSize + { + PPSize() : ptr(0), size(0) {} + void **ptr; + size_t size; + }; + + struct PPSizeResource + { + PPSizeResource() : ptr(0), size(0), pre_dereference(false) {} + void **ptr; + size_t size; + bool pre_dereference; + }; + + PPSizeResource resources[SPIR2CROSS_NUM_DESCRIPTOR_SETS][SPIR2CROSS_NUM_DESCRIPTOR_BINDINGS]; + PPSize stage_inputs[SPIR2CROSS_NUM_STAGE_INPUTS]; + PPSize stage_outputs[SPIR2CROSS_NUM_STAGE_OUTPUTS]; + PPSize uniform_constants[SPIR2CROSS_NUM_UNIFORM_CONSTANTS]; + PPSize push_constant; + PPSize builtins[SPIR2CROSS_NUM_BUILTINS]; + + template + void register_builtin(spir2cross_builtin builtin, const U& value) + { + assert(!builtins[builtin].ptr); + + builtins[builtin].ptr = (void**)&value.ptr; + builtins[builtin].size = sizeof(*value.ptr) * U::ArraySize; + } + + void set_builtin(spir2cross_builtin builtin, void *data, size_t size) + { + assert(builtins[builtin].ptr); + assert(size >= builtins[builtin].size); + + *builtins[builtin].ptr = data; + } + + template + void register_resource(const internal::Resource &value, unsigned set, unsigned binding) + { + assert(set < SPIR2CROSS_NUM_DESCRIPTOR_SETS); + assert(binding < SPIR2CROSS_NUM_DESCRIPTOR_BINDINGS); + assert(!resources[set][binding].ptr); + + resources[set][binding].ptr = (void**)&value.ptr; + resources[set][binding].size = internal::Resource::Size; + resources[set][binding].pre_dereference = internal::Resource::PreDereference; + } + + template + void register_stage_input(const internal::StageInput &value, unsigned location) + { + assert(location < SPIR2CROSS_NUM_STAGE_INPUTS); + assert(!stage_inputs[location].ptr); + + stage_inputs[location].ptr = (void**)&value.ptr; + stage_inputs[location].size = internal::StageInput::Size; + } + + template + void register_stage_output(const internal::StageOutput &value, unsigned location) + { + assert(location < SPIR2CROSS_NUM_STAGE_OUTPUTS); + assert(!stage_outputs[location].ptr); + + stage_outputs[location].ptr = (void**)&value.ptr; + stage_outputs[location].size = internal::StageOutput::Size; + } + + template + void register_uniform_constant(const internal::UniformConstant &value, unsigned location) + { + assert(location < SPIR2CROSS_NUM_UNIFORM_CONSTANTS); + assert(!uniform_constants[location].ptr); + + uniform_constants[location].ptr = (void**)&value.ptr; + uniform_constants[location].size = internal::UniformConstant::Size; + } + + template + void register_push_constant(const internal::PushConstant &value) + { + assert(!push_constant.ptr); + + push_constant.ptr = (void**)&value.ptr; + push_constant.size = internal::PushConstant::Size; + } + + void set_stage_input(unsigned location, void *data, size_t size) + { + assert(location < SPIR2CROSS_NUM_STAGE_INPUTS); + assert(stage_inputs[location].ptr); + assert(size >= stage_inputs[location].size); + + *stage_inputs[location].ptr = data; + } + + void set_stage_output(unsigned location, void *data, size_t size) + { + assert(location < SPIR2CROSS_NUM_STAGE_OUTPUTS); + assert(stage_outputs[location].ptr); + assert(size >= stage_outputs[location].size); + + *stage_outputs[location].ptr = data; + } + + void set_uniform_constant(unsigned location, void *data, size_t size) + { + assert(location < SPIR2CROSS_NUM_UNIFORM_CONSTANTS); + assert(uniform_constants[location].ptr); + assert(size >= uniform_constants[location].size); + + *uniform_constants[location].ptr = data; + } + + void set_push_constant(void *data, size_t size) + { + assert(push_constant.ptr); + assert(size >= push_constant.size); + + *push_constant.ptr = data; + } + + void set_resource(unsigned set, unsigned binding, + void **data, size_t size) + { + assert(set < SPIR2CROSS_NUM_DESCRIPTOR_SETS); + assert(binding < SPIR2CROSS_NUM_DESCRIPTOR_BINDINGS); + assert(resources[set][binding].ptr); + assert(size >= resources[set][binding].size); + + // We're using the regular PointerInterface, dereference ahead of time. + if (resources[set][binding].pre_dereference) + *resources[set][binding].ptr = *data; + else + *resources[set][binding].ptr = data; + } +}; + +namespace spir2cross +{ + template + struct BaseShader : spir2cross_shader + { + void invoke() + { + static_cast(this)->main(); + } + }; + + struct FragmentResources + { + internal::StageOutput gl_FragCoord; + void init(spir2cross_shader &s) + { + s.register_builtin(SPIR2CROSS_BUILTIN_FRAG_COORD, gl_FragCoord); + } +#define gl_FragCoord __res->gl_FragCoord.get() + }; + + template + struct FragmentShader : BaseShader > + { + inline void main() + { + impl.main(); + } + + FragmentShader() + { + resources.init(*this); + impl.__res = &resources; + } + + T impl; + Res resources; + }; + + struct VertexResources + { + internal::StageOutput gl_Position; + void init(spir2cross_shader &s) + { + s.register_builtin(SPIR2CROSS_BUILTIN_POSITION, gl_Position); + } +#define gl_Position __res->gl_Position.get() + }; + + template + struct VertexShader : BaseShader > + { + inline void main() + { + impl.main(); + } + + VertexShader() + { + resources.init(*this); + impl.__res = &resources; + } + + T impl; + Res resources; + }; + + struct TessEvaluationResources + { + inline void init(spir2cross_shader&) {} + }; + + template + struct TessEvaluationShader : BaseShader > + { + inline void main() + { + impl.main(); + } + + TessEvaluationShader() + { + resources.init(*this); + impl.__res = &resources; + } + + T impl; + Res resources; + }; + + struct TessControlResources + { + inline void init(spir2cross_shader&) {} + }; + + template + struct TessControlShader : BaseShader > + { + inline void main() + { + impl.main(); + } + + TessControlShader() + { + resources.init(*this); + impl.__res = &resources; + } + + T impl; + Res resources; + }; + + struct GeometryResources + { + inline void init(spir2cross_shader&) {} + }; + + template + struct GeometryShader : BaseShader > + { + inline void main() + { + impl.main(); + } + + GeometryShader() + { + resources.init(*this); + impl.__res = &resources; + } + + T impl; + Res resources; + }; + + struct ComputeResources + { + internal::StageInput gl_WorkGroupID__; + internal::StageInput gl_NumWorkGroups__; + void init(spir2cross_shader &s) + { + s.register_builtin(SPIR2CROSS_BUILTIN_WORK_GROUP_ID, gl_WorkGroupID__); + s.register_builtin(SPIR2CROSS_BUILTIN_NUM_WORK_GROUPS, gl_NumWorkGroups__); + } +#define gl_WorkGroupID __res->gl_WorkGroupID__.get() +#define gl_NumWorkGroups __res->gl_NumWorkGroups.get() + + Barrier barrier__; +#define barrier() __res->barrier__.wait() + }; + + struct ComputePrivateResources + { + uint32_t gl_LocalInvocationIndex__; +#define gl_LocalInvocationIndex __priv_res.gl_LocalInvocationIndex__ + glm::uvec3 gl_LocalInvocationID__; +#define gl_LocalInvocationID __priv_res.gl_LocalInvocationID__ + glm::uvec3 gl_GlobalInvocationID__; +#define gl_GlobalInvocationID __priv_res.gl_GlobalInvocationID__ + }; + + template + struct ComputeShader : BaseShader > + { + inline void main() + { + resources.barrier__.reset_counter(); + + for (unsigned z = 0; z < WorkGroupZ; z++) + for (unsigned y = 0; y < WorkGroupY; y++) + for (unsigned x = 0; x < WorkGroupX; x++) + impl[z][y][x].__priv_res.gl_GlobalInvocationID__ = + glm::uvec3(WorkGroupX, WorkGroupY, WorkGroupZ) * resources.gl_WorkGroupID__.get() + + glm::uvec3(x, y, z); + + group.run(); + group.wait(); + } + + ComputeShader() + : group(&impl[0][0][0]) + { + resources.init(*this); + resources.barrier__.set_release_divisor(WorkGroupX * WorkGroupY * WorkGroupZ); + + unsigned i = 0; + for (unsigned z = 0; z < WorkGroupZ; z++) + { + for (unsigned y = 0; y < WorkGroupY; y++) + { + for (unsigned x = 0; x < WorkGroupX; x++) + { + impl[z][y][x].__priv_res.gl_LocalInvocationID__ = + glm::uvec3(x, y, z); + impl[z][y][x].__priv_res.gl_LocalInvocationIndex__ = i++; + impl[z][y][x].__res = &resources; + } + } + } + } + + T impl[WorkGroupZ][WorkGroupY][WorkGroupX]; + ThreadGroup group; + Res resources; + }; + + inline void memoryBarrierShared() { Barrier::memoryBarrier(); } + inline void memoryBarrier() { Barrier::memoryBarrier(); } + // TODO: Rest of the barriers. + + // Atomics + template + inline T atomicAdd(T &v, T a) + { + static_assert(sizeof(std::atomic) == sizeof(T), "Cannot cast properly to std::atomic."); + + // We need explicit memory barriers in GLSL to enfore any ordering. + // FIXME: Can we really cast this? There is no other way I think ... + return std::atomic_fetch_add_explicit(reinterpret_cast*>(&v), + a, std::memory_order_relaxed); + } +} + +void spir2cross_set_stage_input(spir2cross_shader_t *shader, unsigned location, void *data, size_t size) +{ + shader->set_stage_input(location, data, size); +} + +void spir2cross_set_stage_output(spir2cross_shader_t *shader, unsigned location, void *data, size_t size) +{ + shader->set_stage_output(location, data, size); +} + +void spir2cross_set_uniform_constant(spir2cross_shader_t *shader, unsigned location, void *data, size_t size) +{ + shader->set_uniform_constant(location, data, size); +} + +void spir2cross_set_resource(spir2cross_shader_t *shader, unsigned set, unsigned binding, void **data, size_t size) +{ + shader->set_resource(set, binding, data, size); +} + +void spir2cross_set_push_constant(spir2cross_shader_t *shader, void *data, size_t size) +{ + shader->set_push_constant(data, size); +} + +void spir2cross_set_builtin(spir2cross_shader_t *shader, spir2cross_builtin builtin, + void *data, size_t size) +{ + shader->set_builtin(builtin, data, size); +} + +#endif diff --git a/include/spir2cross/sampler.hpp b/include/spir2cross/sampler.hpp new file mode 100644 index 0000000..8437f5f --- /dev/null +++ b/include/spir2cross/sampler.hpp @@ -0,0 +1,101 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIR2CROSS_SAMPLER_HPP +#define SPIR2CROSS_SAMPLER_HPP + +#include + +namespace spir2cross +{ + struct spir2cross_sampler_2d + { + inline virtual ~spir2cross_sampler_2d() {} + }; + + template + struct sampler2DBase : spir2cross_sampler_2d + { + sampler2DBase(const spir2cross_sampler_info *info) + { + mips.insert(mips.end(), info->mipmaps, info->mipmaps + info->num_mipmaps); + format = info->format; + wrap_s = info->wrap_s; + wrap_t = info->wrap_t; + min_filter = info->min_filter; + mag_filter = info->mag_filter; + mip_filter = info->mip_filter; + } + + inline virtual T sample(glm::vec2 uv, float bias) + { + return sampleLod(uv, bias); + } + + inline virtual T sampleLod(glm::vec2 uv, float lod) + { + if (mag_filter == SPIR2CROSS_FILTER_NEAREST) + { + uv.x = wrap(uv.x, wrap_s, mips[0].width); + uv.y = wrap(uv.y, wrap_t, mips[0].height); + glm::vec2 uv_full = uv * glm::vec2(mips[0].width, mips[0].height); + + int x = int(uv_full.x); + int y = int(uv_full.y); + return sample(x, y, 0); + } + else + { + return T(0, 0, 0, 1); + } + } + + inline float wrap(float v, spir2cross_wrap wrap, unsigned size) + { + switch (wrap) + { + case SPIR2CROSS_WRAP_REPEAT: + return v - glm::floor(v); + case SPIR2CROSS_WRAP_CLAMP_TO_EDGE: + { + float half = 0.5f / size; + return glm::clamp(v, half, 1.0f - half); + } + + default: + return 0.0f; + } + } + + std::vector mips; + spir2cross_format format; + spir2cross_wrap wrap_s; + spir2cross_format wrap_t; + spir2cross_filter min_filter; + spir2cross_filter mag_filter; + spir2cross_mipfilter mip_filter; + }; + + typedef sampler2DBase sampler2D; + typedef sampler2DBase isampler2D; + typedef sampler2DBase usampler2D; + + template + inline T texture(const sampler2DBase &samp, const glm::vec2 &uv, float bias = 0.0f) { return samp.sample(uv, bias); } + +} + +#endif diff --git a/include/spir2cross/thread_group.hpp b/include/spir2cross/thread_group.hpp new file mode 100644 index 0000000..3d6536f --- /dev/null +++ b/include/spir2cross/thread_group.hpp @@ -0,0 +1,113 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIR2CROSS_THREAD_GROUP_HPP +#define SPIR2CROSS_THREAD_GROUP_HPP + +#include +#include +#include + +namespace spir2cross +{ + template + class ThreadGroup + { + public: + ThreadGroup(T *impl) + { + for (unsigned i = 0; i < Size; i++) + workers[i].start(&impl[i]); + } + + void run() + { + for (auto &worker : workers) + worker.run(); + } + + void wait() + { + for (auto &worker : workers) + worker.wait(); + } + + private: + struct Thread + { + enum State + { + Idle, + Running, + Dying + }; + State state = Idle; + + void start(T *impl) + { + worker = std::thread([impl, this] { + for (;;) + { + { + std::unique_lock l{lock}; + cond.wait(l, [this] { return state != Idle; }); + if (state == Dying) + break; + } + + impl->main(); + + std::lock_guard l{lock}; + state = Idle; + cond.notify_one(); + } + }); + } + + void wait() + { + std::unique_lock l{lock}; + cond.wait(l, [this] { return state == Idle; }); + } + + void run() + { + std::lock_guard l{lock}; + state = Running; + cond.notify_one(); + } + + ~Thread() + { + if (worker.joinable()) + { + { + std::lock_guard l{lock}; + state = Dying; + cond.notify_one(); + } + worker.join(); + } + } + std::thread worker; + std::condition_variable cond; + std::mutex lock; + }; + Thread workers[Size]; + }; +} + +#endif diff --git a/jni/Android.mk b/jni/Android.mk new file mode 100644 index 0000000..64d4409 --- /dev/null +++ b/jni/Android.mk @@ -0,0 +1,11 @@ +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_CFLAGS += -std=c++11 -Wall -Wextra +LOCAL_MODULE := spir2cross +LOCAL_SRC_FILES := ../spir2cross.cpp ../spir2glsl.cpp ../spir2cpp.cpp +LOCAL_CPP_FEATURES := exceptions +LOCAL_ARM_MODE := arm + +include $(BUILD_STATIC_LIBRARY) diff --git a/jni/Application.mk b/jni/Application.mk new file mode 100644 index 0000000..9a2e77f --- /dev/null +++ b/jni/Application.mk @@ -0,0 +1,2 @@ +APP_STL := c++_static +APP_ABI := armeabi-v7a diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..9cf1eab --- /dev/null +++ b/main.cpp @@ -0,0 +1,425 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spir2cpp.hpp" +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace spv; +using namespace spir2cross; +using namespace std; + +struct CLIParser; +struct CLICallbacks +{ + void add(const char *cli, const function &func) + { + callbacks[cli] = func; + } + unordered_map> callbacks; + function error_handler; + function default_handler; +}; + +struct CLIParser +{ + CLIParser(CLICallbacks cbs, int argc, char *argv[]) + : cbs(move(cbs)), argc(argc), argv(argv) + {} + + bool parse() + { + try + { + while (argc && !ended_state) + { + const char *next = *argv++; + argc--; + + if (*next != '-' && cbs.default_handler) + { + cbs.default_handler(next); + } + else + { + auto itr = cbs.callbacks.find(next); + if (itr == ::end(cbs.callbacks)) + { + throw logic_error("Invalid argument.\n"); + } + + itr->second(*this); + } + } + + return true; + } + catch (...) + { + if (cbs.error_handler) + { + cbs.error_handler(); + } + return false; + } + } + + void end() + { + ended_state = true; + } + + uint32_t next_uint() + { + if (!argc) + { + throw logic_error("Tried to parse uint, but nothing left in arguments.\n"); + } + + uint32_t val = stoul(*argv); + if (val > numeric_limits::max()) + { + throw out_of_range("next_uint() out of range.\n"); + } + + argc--; + argv++; + + return val; + } + + double next_double() + { + if (!argc) + { + throw logic_error("Tried to parse double, but nothing left in arguments.\n"); + } + + double val = stod(*argv); + + argc--; + argv++; + + return val; + } + + const char *next_string() + { + if (!argc) + { + throw logic_error("Tried to parse string, but nothing left in arguments.\n"); + } + + const char *ret = *argv; + argc--; + argv++; + return ret; + } + + CLICallbacks cbs; + int argc; + char **argv; + bool ended_state = false; +}; + +static vector read_spirv_file(const char *path) +{ + FILE *file = fopen(path, "rb"); + if (!file) + { + fprintf(stderr, "Failed to open SPIRV file: %s\n", path); + return {}; + } + + fseek(file, 0, SEEK_END); + long len = ftell(file) / sizeof(uint32_t); + rewind(file); + + vector spirv(len); + if (fread(spirv.data(), sizeof(uint32_t), len, file) != size_t(len)) + spirv.clear(); + + fclose(file); + return spirv; +} + +static bool write_string_to_file(const char *path, const char *string) +{ + FILE *file = fopen(path, "w"); + if (!file) + { + fprintf(file, "Failed to write file: %s\n", path); + return false; + } + + fprintf(file, "%s", string); + fclose(file); + return true; +} + +static void print_resources(const Compiler &compiler, const char *tag, const vector &resources) +{ + fprintf(stderr, "%s\n", tag); + fprintf(stderr, "=============\n\n"); + for (auto &res : resources) + { + auto &type = compiler.get_type(res.type_id); + auto mask = compiler.get_decoration_mask(res.id); + + // If we don't have a name, use the fallback for the type instead of the variable + // for SSBOs and UBOs since those are the only meaningful names to use externally. + // Push constant blocks are still accessed by name and not block name, even though they are technically Blocks. + bool is_push_constant = compiler.get_storage_class(res.id) == StorageClassPushConstant; + bool is_block = (compiler.get_decoration_mask(type.self) & + ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0; + uint32_t fallback_id = !is_push_constant && is_block ? res.type_id : res.id; + + fprintf(stderr, " ID %03u : %s", res.id, + !res.name.empty() ? res.name.c_str() : compiler.get_fallback_name(fallback_id).c_str()); + + if (mask & (1ull << DecorationLocation)) + fprintf(stderr, " (Location : %u)", compiler.get_decoration(res.id, DecorationLocation)); + if (mask & (1ull << DecorationDescriptorSet)) + fprintf(stderr, " (Set : %u)", compiler.get_decoration(res.id, DecorationDescriptorSet)); + if (mask & (1ull << DecorationBinding)) + fprintf(stderr, " (Binding : %u)", compiler.get_decoration(res.id, DecorationBinding)); + fprintf(stderr, "\n"); + } + fprintf(stderr, "=============\n\n"); +} + +static void print_resources(const Compiler &compiler, const ShaderResources &res) +{ + print_resources(compiler, "subpass inputs", res.subpass_inputs); + print_resources(compiler, "inputs", res.stage_inputs); + print_resources(compiler, "outputs", res.stage_outputs); + print_resources(compiler, "textures", res.sampled_images); + print_resources(compiler, "images", res.storage_images); + print_resources(compiler, "ssbos", res.storage_buffers); + print_resources(compiler, "ubos", res.uniform_buffers); + print_resources(compiler, "push", res.push_constant_buffers); + print_resources(compiler, "counters", res.atomic_counters); +} + +static void print_push_constant_resources(const Compiler &compiler, const vector &res) +{ + for (auto &block : res) + { + auto ranges = compiler.get_active_buffer_ranges(block.id); + fprintf(stderr, "Active members in buffer: %s\n", + !block.name.empty() ? block.name.c_str() : compiler.get_fallback_name(block.id).c_str()); + + fprintf(stderr, "==================\n\n"); + for (auto &range : ranges) + { + const auto &name = compiler.get_member_name(block.type_id, range.index); + + fprintf(stderr, "Member #%3u (%s): Offset: %4u, Range: %4u\n", + range.index, !name.empty() ? name.c_str() : compiler.get_fallback_member_name(range.index).c_str(), + unsigned(range.offset), unsigned(range.range)); + } + fprintf(stderr, "==================\n\n"); + } +} + +struct PLSArg +{ + PlsFormat format; + string name; +}; + +struct CLIArguments +{ + const char *input = nullptr; + const char *output = nullptr; + uint32_t version = 0; + bool es = false; + bool set_version = false; + bool set_es = false; + bool dump_resources = false; + bool force_temporary = false; + bool flatten_ubo = false; + bool fixup = false; + vector pls_in; + vector pls_out; + + uint32_t iterations = 1; + bool cpp = false; +}; + +static void print_help() +{ + fprintf(stderr, "Usage: spir2cross [--output ] [SPIR-V file] [--es] [--no-es] [--version ] [--dump-resources] [--help] [--force-temporary] [-cpp] [--flatten-ubo] [--fixup-clipspace] [--iterations iter] [--pls-in format input-name] [--pls-out format output-name]\n"); +} + +static vector remap_pls(const vector &pls_variables, const vector &resources, const vector *secondary_resources) +{ + vector ret; + + for (auto &pls : pls_variables) + { + bool found = false; + for (auto &res : resources) + { + if (res.name == pls.name) + { + ret.push_back({ res.id, pls.format }); + found = true; + break; + } + } + + if (!found && secondary_resources) + { + for (auto &res : *secondary_resources) + { + if (res.name == pls.name) + { + ret.push_back({ res.id, pls.format }); + found = true; + break; + } + } + } + + if (!found) + fprintf(stderr, "Did not find stage input/output/target with name \"%s\".\n", + pls.name.c_str()); + } + + return ret; +} + +static PlsFormat pls_format(const char *str) +{ + if (!strcmp(str, "r11f_g11f_b10f")) return PlsR11FG11FB10F; + else if (!strcmp(str, "r32f")) return PlsR32F; + else if (!strcmp(str, "rg16f")) return PlsRG16F; + else if (!strcmp(str, "rg16")) return PlsRG16; + else if (!strcmp(str, "rgb10_a2")) return PlsRGB10A2; + else if (!strcmp(str, "rgba8")) return PlsRGBA8; + else if (!strcmp(str, "rgba8i")) return PlsRGBA8I; + else if (!strcmp(str, "rgba8ui")) return PlsRGBA8UI; + else if (!strcmp(str, "rg16i")) return PlsRG16I; + else if (!strcmp(str, "rgb10_a2ui")) return PlsRGB10A2UI; + else if (!strcmp(str, "rg16ui")) return PlsRG16UI; + else if (!strcmp(str, "r32ui")) return PlsR32UI; + else return PlsNone; +} + +int main(int argc, char *argv[]) +{ + CLIArguments args; + CLICallbacks cbs; + + cbs.add("--help", [](CLIParser &parser) { print_help(); parser.end(); }); + cbs.add("--output", [&args](CLIParser &parser) { args.output = parser.next_string(); }); + cbs.add("--es", [&args](CLIParser &) { args.es = true; args.set_es = true; }); + cbs.add("--no-es", [&args](CLIParser &) { args.es = false; args.set_es = true; }); + cbs.add("--version", [&args](CLIParser &parser) { args.version = parser.next_uint(); args.set_version = true; }); + cbs.add("--dump-resources", [&args](CLIParser &) { args.dump_resources = true; }); + cbs.add("--force-temporary", [&args](CLIParser &) { args.force_temporary = true; }); + cbs.add("--flatten-ubo", [&args](CLIParser &) { args.flatten_ubo = true; }); + cbs.add("--fixup-clipspace", [&args](CLIParser &) { args.fixup = true; }); + cbs.add("--iterations", [&args](CLIParser &parser) { args.iterations = parser.next_uint(); }); + cbs.add("--cpp", [&args](CLIParser &) { args.cpp = true; }); + + cbs.add("--pls-in", [&args](CLIParser &parser) { + auto fmt = pls_format(parser.next_string()); + auto name = parser.next_string(); + args.pls_in.push_back({ move(fmt), move(name) }); + }); + cbs.add("--pls-out", [&args](CLIParser &parser) { + auto fmt = pls_format(parser.next_string()); + auto name = parser.next_string(); + args.pls_out.push_back({ move(fmt), move(name) }); + }); + + cbs.default_handler = [&args](const char *value) { args.input = value; }; + cbs.error_handler = []{ print_help(); }; + + CLIParser parser{move(cbs), argc - 1, argv + 1}; + if (!parser.parse()) + { + return EXIT_FAILURE; + } + else if (parser.ended_state) + { + return EXIT_SUCCESS; + } + + if (!args.input) + { + fprintf(stderr, "Didn't specify input file.\n"); + print_help(); + return EXIT_FAILURE; + } + + unique_ptr compiler; + + if (args.cpp) + compiler = unique_ptr(new CompilerCPP(read_spirv_file(args.input))); + else + compiler = unique_ptr(new CompilerGLSL(read_spirv_file(args.input))); + + if (!args.set_version && !compiler->get_options().version) + { + fprintf(stderr, "Didn't specify GLSL version and SPIR-V did not specify language.\n"); + print_help(); + return EXIT_FAILURE; + } + + CompilerGLSL::Options opts = compiler->get_options(); + if (args.set_version) + opts.version = args.version; + if (args.set_es) + opts.es = args.es; + opts.force_temporary = args.force_temporary; + opts.vertex.fixup_clipspace = args.fixup; + compiler->set_options(opts); + + auto res = compiler->get_shader_resources(); + + if (args.flatten_ubo) + for (auto &ubo : res.uniform_buffers) + compiler->flatten_interface_block(ubo.id); + + auto pls_inputs = remap_pls(args.pls_in, res.stage_inputs, &res.subpass_inputs); + auto pls_outputs = remap_pls(args.pls_out, res.stage_outputs, nullptr); + compiler->remap_pixel_local_storage(move(pls_inputs), move(pls_outputs)); + + if (args.dump_resources) + { + print_resources(*compiler, res); + print_push_constant_resources(*compiler, res.push_constant_buffers); + } + + string glsl; + for (uint32_t i = 0; i < args.iterations; i++) + glsl = compiler->compile(); + + if (args.output) + write_string_to_file(args.output, glsl.c_str()); + else + printf("%s", glsl.c_str()); +} + diff --git a/msvc/spir2cross.sln b/msvc/spir2cross.sln new file mode 100644 index 0000000..328c2cd --- /dev/null +++ b/msvc/spir2cross.sln @@ -0,0 +1,28 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Express 2013 for Windows Desktop +VisualStudioVersion = 12.0.31101.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "spir2cross", "spir2cross.vcxproj", "{977E3701-1A21-4425-B7E5-6BDF5EA062CD}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Debug|x64 = Debug|x64 + Release|Win32 = Release|Win32 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {977E3701-1A21-4425-B7E5-6BDF5EA062CD}.Debug|Win32.ActiveCfg = Debug|Win32 + {977E3701-1A21-4425-B7E5-6BDF5EA062CD}.Debug|Win32.Build.0 = Debug|Win32 + {977E3701-1A21-4425-B7E5-6BDF5EA062CD}.Debug|x64.ActiveCfg = Debug|x64 + {977E3701-1A21-4425-B7E5-6BDF5EA062CD}.Debug|x64.Build.0 = Debug|x64 + {977E3701-1A21-4425-B7E5-6BDF5EA062CD}.Release|Win32.ActiveCfg = Release|Win32 + {977E3701-1A21-4425-B7E5-6BDF5EA062CD}.Release|Win32.Build.0 = Release|Win32 + {977E3701-1A21-4425-B7E5-6BDF5EA062CD}.Release|x64.ActiveCfg = Release|x64 + {977E3701-1A21-4425-B7E5-6BDF5EA062CD}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/msvc/spir2cross.vcxproj b/msvc/spir2cross.vcxproj new file mode 100644 index 0000000..5b4045a --- /dev/null +++ b/msvc/spir2cross.vcxproj @@ -0,0 +1,160 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {977E3701-1A21-4425-B7E5-6BDF5EA062CD} + spir2cross + + + + Application + true + v120 + MultiByte + + + Application + true + v120 + MultiByte + + + Application + false + v120 + true + MultiByte + + + Application + false + v120 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + Level3 + Disabled + true + _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + MultiThreadedDebugDLL + + + true + + + + + Level3 + Disabled + true + _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + MultiThreadedDebugDLL + + + true + + + + + Level3 + MaxSpeed + true + true + true + _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + MultiThreadedDLL + + + true + true + true + + + + + Level3 + MaxSpeed + true + true + true + _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + MultiThreadedDLL + + + true + true + true + + + + + true + true + true + true + + + true + true + true + true + + + true + true + true + true + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/msvc/spir2cross.vcxproj.filters b/msvc/spir2cross.vcxproj.filters new file mode 100644 index 0000000..ba124bc --- /dev/null +++ b/msvc/spir2cross.vcxproj.filters @@ -0,0 +1,63 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + {61390b44-2b95-4b9a-8910-9ea1bc3a4920} + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files\cpp + + + Source Files\cpp + + + Source Files\cpp + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/reference/shaders/comp/atomic.comp b/reference/shaders/comp/atomic.comp new file mode 100644 index 0000000..d1b8d2e --- /dev/null +++ b/reference/shaders/comp/atomic.comp @@ -0,0 +1,47 @@ +#version 310 es +#extension GL_OES_shader_image_atomic : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 2, std430) buffer SSBO +{ + uint u32; + int i32; +} ssbo; + +layout(binding = 0, r32ui) uniform highp uimage2D uImage; +layout(binding = 1, r32i) uniform highp iimage2D iImage; + +void main() +{ + uint _19 = imageAtomicAdd(uImage, ivec2(1, 5), 1u); + uint _21 = imageAtomicOr(uImage, ivec2(1, 5), 1u); + uint _23 = imageAtomicXor(uImage, ivec2(1, 5), 1u); + uint _25 = imageAtomicAnd(uImage, ivec2(1, 5), 1u); + uint _27 = imageAtomicMin(uImage, ivec2(1, 5), 1u); + uint _29 = imageAtomicMax(uImage, ivec2(1, 5), 1u); + uint _33 = imageAtomicCompSwap(uImage, ivec2(1, 5), 10u, 2u); + int _41 = imageAtomicAdd(iImage, ivec2(1, 6), 1); + int _43 = imageAtomicOr(iImage, ivec2(1, 6), 1); + int _45 = imageAtomicXor(iImage, ivec2(1, 6), 1); + int _47 = imageAtomicAnd(iImage, ivec2(1, 6), 1); + int _49 = imageAtomicMin(iImage, ivec2(1, 6), 1); + int _51 = imageAtomicMax(iImage, ivec2(1, 6), 1); + int _55 = imageAtomicCompSwap(iImage, ivec2(1, 5), 10, 2); + uint _62 = atomicAdd(ssbo.u32, 1u); + uint _64 = atomicOr(ssbo.u32, 1u); + uint _66 = atomicXor(ssbo.u32, 1u); + uint _68 = atomicAnd(ssbo.u32, 1u); + uint _70 = atomicMin(ssbo.u32, 1u); + uint _72 = atomicMax(ssbo.u32, 1u); + uint _74 = atomicExchange(ssbo.u32, 1u); + uint _76 = atomicCompSwap(ssbo.u32, 10u, 2u); + int _79 = atomicAdd(ssbo.i32, 1); + int _81 = atomicOr(ssbo.i32, 1); + int _83 = atomicXor(ssbo.i32, 1); + int _85 = atomicAnd(ssbo.i32, 1); + int _87 = atomicMin(ssbo.i32, 1); + int _89 = atomicMax(ssbo.i32, 1); + int _91 = atomicExchange(ssbo.i32, 1); + int _93 = atomicCompSwap(ssbo.i32, 10, 2); +} + diff --git a/reference/shaders/comp/bake_gradient.comp b/reference/shaders/comp/bake_gradient.comp new file mode 100644 index 0000000..eb87488 --- /dev/null +++ b/reference/shaders/comp/bake_gradient.comp @@ -0,0 +1,39 @@ +#version 310 es +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(binding = 4, std140) uniform UBO +{ + vec4 uInvSize; + vec4 uScale; +} _46; + +layout(binding = 0) uniform mediump sampler2D uHeight; +layout(binding = 1) uniform mediump sampler2D uDisplacement; +layout(binding = 2, rgba16f) uniform mediump writeonly image2D iHeightDisplacement; +layout(binding = 3, rgba16f) uniform mediump writeonly image2D iGradJacobian; + +mediump float jacobian(mediump vec2 dDdx, mediump vec2 dDdy) +{ + return (((1.000000 + dDdx.x) * (1.000000 + dDdy.y)) - (dDdx.y * dDdy.x)); +} + +void main() +{ + vec4 uv = ((vec2(gl_GlobalInvocationID.xy) * _46.uInvSize.xy).xyxy + (_46.uInvSize * 0.500000)); + float h = textureLod(uHeight, uv.xy, 0.000000).x; + float x0 = textureLodOffset(uHeight, uv.xy, 0.000000, ivec2(-1, 0)).x; + float x1 = textureLodOffset(uHeight, uv.xy, 0.000000, ivec2(1, 0)).x; + float y0 = textureLodOffset(uHeight, uv.xy, 0.000000, ivec2(0, -1)).x; + float y1 = textureLodOffset(uHeight, uv.xy, 0.000000, ivec2(0, 1)).x; + vec2 grad = ((_46.uScale.xy * 0.500000) * vec2((x1 - x0), (y1 - y0))); + vec2 displacement = (textureLod(uDisplacement, uv.zw, 0.000000).xy * 1.200000); + vec2 dDdx = ((textureLodOffset(uDisplacement, uv.zw, 0.000000, ivec2(1, 0)).xy - textureLodOffset(uDisplacement, uv.zw, 0.000000, ivec2(-1, 0)).xy) * 0.600000); + vec2 dDdy = ((textureLodOffset(uDisplacement, uv.zw, 0.000000, ivec2(0, 1)).xy - textureLodOffset(uDisplacement, uv.zw, 0.000000, ivec2(0, -1)).xy) * 0.600000); + vec2 param = (dDdx * _46.uScale.z); + vec2 param_1 = (dDdy * _46.uScale.z); + float j = jacobian(param, param_1); + displacement = vec2(0.000000); + imageStore(iHeightDisplacement, ivec2(gl_GlobalInvocationID.xy), vec4(h, displacement, 0.000000)); + imageStore(iGradJacobian, ivec2(gl_GlobalInvocationID.xy), vec4(grad, j, 0.000000)); +} + diff --git a/reference/shaders/comp/basic.comp b/reference/shaders/comp/basic.comp new file mode 100644 index 0000000..f055a51 --- /dev/null +++ b/reference/shaders/comp/basic.comp @@ -0,0 +1,29 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + vec4 in_data[]; +} _23; + +layout(binding = 1, std430) buffer SSBO2 +{ + vec4 out_data[]; +} _45; + +layout(binding = 2, std430) buffer SSBO3 +{ + uint counter; +} _48; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 idata = _23.in_data[ident]; + if ((dot(idata, vec4(1.000000, 5.000000, 6.000000, 2.000000)) > 8.200000)) + { + uint _52 = atomicAdd(_48.counter, 1u); + _45.out_data[_52] = idata; + } +} + diff --git a/reference/shaders/comp/culling.comp b/reference/shaders/comp/culling.comp new file mode 100644 index 0000000..1083266 --- /dev/null +++ b/reference/shaders/comp/culling.comp @@ -0,0 +1,29 @@ +#version 310 es +layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + float in_data[]; +} _22; + +layout(binding = 1, std430) buffer SSBO2 +{ + float out_data[]; +} _38; + +layout(binding = 2, std430) buffer SSBO3 +{ + uint count; +} _41; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + float idata = _22.in_data[ident]; + if ((idata > 12.000000)) + { + uint _45 = atomicAdd(_41.count, 1u); + _38.out_data[_45] = idata; + } +} + diff --git a/reference/shaders/comp/dowhile.comp b/reference/shaders/comp/dowhile.comp new file mode 100644 index 0000000..e62fb61 --- /dev/null +++ b/reference/shaders/comp/dowhile.comp @@ -0,0 +1,29 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + mat4 mvp; + vec4 in_data[]; +} _28; + +layout(binding = 1, std430) buffer SSBO2 +{ + vec4 out_data[]; +} _52; + +int i; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + i = 0; + vec4 idat = _28.in_data[ident]; + do + { + idat = (_28.mvp * idat); + i = (i + 1); + } while ((i < 16)); + _52.out_data[ident] = idat; +} + diff --git a/reference/shaders/comp/generate_height.comp b/reference/shaders/comp/generate_height.comp new file mode 100644 index 0000000..8055483 --- /dev/null +++ b/reference/shaders/comp/generate_height.comp @@ -0,0 +1,96 @@ +#version 310 es +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer Distribution +{ + vec2 distribution[]; +} _190; + +layout(binding = 2, std140) uniform UBO +{ + vec4 uModTime; +} _218; + +layout(binding = 1, std430) buffer HeightmapFFT +{ + uint heights[]; +} _276; + +uvec2 workaround_mix(uvec2 a, uvec2 b, bvec2 sel) +{ + uint _137; + uint _148; + if (sel.x) + { + _137 = b.x; + } + else + { + _137 = a.x; + } + uint _147 = _137; + if (sel.y) + { + _148 = b.y; + } + else + { + _148 = a.y; + } + return uvec2(_147, _148); +} + +vec2 alias(vec2 i, vec2 N) +{ + return mix(i, (i - N), greaterThan(i, (N * 0.500000))); +} + +vec2 cmul(vec2 a, vec2 b) +{ + vec2 r3 = a.yx; + vec2 r1 = b.xx; + vec2 R0 = (a * r1); + vec2 r2 = b.yy; + vec2 R1 = (r2 * r3); + return (R0 + vec2((-R1.x), R1.y)); +} + +uint pack2(vec2 v) +{ + return packHalf2x16(v); +} + +void generate_heightmap() +{ + uvec2 N = (uvec2(64u, 1u) * gl_NumWorkGroups.xy); + uvec2 i = gl_GlobalInvocationID.xy; + uvec2 param = (N - i); + uvec2 param_1 = uvec2(0u); + bvec2 param_2 = equal(i, uvec2(0u)); + uvec2 wi = workaround_mix(param, param_1, param_2); + vec2 a = _190.distribution[((i.y * N.x) + i.x)]; + vec2 b = _190.distribution[((wi.y * N.x) + wi.x)]; + vec2 param_3 = vec2(i); + vec2 param_4 = vec2(N); + vec2 k = (_218.uModTime.xy * alias(param_3, param_4)); + float k_len = length(k); + float w = (sqrt((9.810000 * k_len)) * _218.uModTime.z); + float cw = cos(w); + float sw = sin(w); + vec2 param_5 = a; + vec2 param_6 = vec2(cw, sw); + a = cmul(param_5, param_6); + vec2 param_7 = b; + vec2 param_8 = vec2(cw, sw); + b = cmul(param_7, param_8); + b = vec2(b.x, (-b.y)); + vec2 res = (a + b); + vec2 param_9 = res; + _276.heights[((i.y * N.x) + i.x)] = pack2(param_9); +} + +void main() +{ + generate_heightmap(); +} + diff --git a/reference/shaders/comp/image.comp b/reference/shaders/comp/image.comp new file mode 100644 index 0000000..19e15c9 --- /dev/null +++ b/reference/shaders/comp/image.comp @@ -0,0 +1,12 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, rgba8) uniform mediump readonly image2D uImageIn; +layout(binding = 1, rgba8) uniform mediump writeonly image2D uImageOut; + +void main() +{ + vec4 v = imageLoad(uImageIn, (ivec2(gl_GlobalInvocationID.xy) + imageSize(uImageIn))); + imageStore(uImageOut, ivec2(gl_GlobalInvocationID.xy), v); +} + diff --git a/reference/shaders/comp/inout-struct.comp b/reference/shaders/comp/inout-struct.comp new file mode 100644 index 0000000..95379ae --- /dev/null +++ b/reference/shaders/comp/inout-struct.comp @@ -0,0 +1,61 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +struct Foo +{ + vec4 a; + vec4 b; + vec4 c; + vec4 d; +}; + +layout(binding = 1, std430) buffer SSBO2 +{ + vec4 data[]; +} indata; + +layout(binding = 0, std430) buffer SSBO +{ + vec4 data[]; +} outdata; + +layout(binding = 2, std430) buffer SSBO3 +{ + Foo foos[]; +} foobar; + +void baz(out Foo foo) +{ + uint ident = gl_GlobalInvocationID.x; + foo.a = indata.data[((4u * ident) + 0u)]; + foo.b = indata.data[((4u * ident) + 1u)]; + foo.c = indata.data[((4u * ident) + 2u)]; + foo.d = indata.data[((4u * ident) + 3u)]; +} + +void meow(inout Foo foo) +{ + foo.a = (foo.a + vec4(10.000000)); + foo.b = (foo.b + vec4(20.000000)); + foo.c = (foo.c + vec4(30.000000)); + foo.d = (foo.d + vec4(40.000000)); +} + +vec4 bar(Foo foo) +{ + return (((foo.a + foo.b) + foo.c) + foo.d); +} + +void main() +{ + Foo param; + baz(param); + Foo foo = param; + Foo param_1 = foo; + meow(param_1); + foo = param_1; + Foo param_2 = foo; + Foo param_3 = foobar.foos[gl_GlobalInvocationID.x]; + outdata.data[gl_GlobalInvocationID.x] = (bar(param_2) + bar(param_3)); +} + diff --git a/reference/shaders/comp/insert.comp b/reference/shaders/comp/insert.comp new file mode 100644 index 0000000..4796925 --- /dev/null +++ b/reference/shaders/comp/insert.comp @@ -0,0 +1,19 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + vec4 out_data[]; +} _27; + +void main() +{ + vec4 v; + v.x = 10.000000; + v.y = 30.000000; + v.z = 70.000000; + v.w = 90.000000; + _27.out_data[gl_GlobalInvocationID.x] = v; + _27.out_data[gl_GlobalInvocationID.x].y = 20.000000; +} + diff --git a/reference/shaders/comp/loop.comp b/reference/shaders/comp/loop.comp new file mode 100644 index 0000000..11f9e9e --- /dev/null +++ b/reference/shaders/comp/loop.comp @@ -0,0 +1,72 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + mat4 mvp; + vec4 in_data[]; +} _24; + +layout(binding = 1, std430) buffer SSBO2 +{ + vec4 out_data[]; +} _125; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 idat = _24.in_data[ident]; + int k = 0; + uint i; + uint j; + int l; + while ((k < 10)) + { + idat = (idat * 2.000000); + k = (k + 1); + } + i = 0u; + for (; (i < 16u); i = (i + uint(1)), k = (k + 1)) + { + j = 0u; + for (; (j < 30u); j = (j + uint(1))) + { + idat = (_24.mvp * idat); + } + } + k = 0; + for (;;) + { + k = (k + 1); + if ((k > 10)) + { + k = (k + 2); + } + else + { + k = (k + 3); + continue; + } + k = (k + 10); + continue; + } + k = 0; + do + { + k = (k + 1); + } while ((k > 10)); + l = 0; + for (;;) + { + if ((l == 5)) + { + l = (l + 1); + continue; + } + idat = (idat + vec4(1.000000)); + l = (l + 1); + continue; + } + _125.out_data[ident] = idat; +} + diff --git a/reference/shaders/comp/mat3.comp b/reference/shaders/comp/mat3.comp new file mode 100644 index 0000000..356bf70 --- /dev/null +++ b/reference/shaders/comp/mat3.comp @@ -0,0 +1,14 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 1, std430) buffer SSBO2 +{ + mat3 out_data[]; +} _22; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + _22.out_data[ident] = mat3(vec3(10.000000), vec3(20.000000), vec3(40.000000)); +} + diff --git a/reference/shaders/comp/modf.comp b/reference/shaders/comp/modf.comp new file mode 100644 index 0000000..721d812 --- /dev/null +++ b/reference/shaders/comp/modf.comp @@ -0,0 +1,22 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + vec4 in_data[]; +} _23; + +layout(binding = 1, std430) buffer SSBO2 +{ + vec4 out_data[]; +} _35; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 i; + vec4 _31 = modf(_23.in_data[ident], i); + vec4 v = _31; + _35.out_data[ident] = v; +} + diff --git a/reference/shaders/comp/return.comp b/reference/shaders/comp/return.comp new file mode 100644 index 0000000..7792b3f --- /dev/null +++ b/reference/shaders/comp/return.comp @@ -0,0 +1,36 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 1, std430) buffer SSBO2 +{ + vec4 out_data[]; +} _27; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + int i; + if ((ident == 2u)) + { + _27.out_data[ident] = vec4(20.000000); + } + else + { + if ((ident == 4u)) + { + _27.out_data[ident] = vec4(10.000000); + return; + } + } + i = 0; + for (; (i < 20); i = (i + 1)) + { + if ((i == 10)) + { + break; + } + return; + } + _27.out_data[ident] = vec4(10.000000); +} + diff --git a/reference/shaders/comp/shared.comp b/reference/shaders/comp/shared.comp new file mode 100644 index 0000000..cbf8d5d --- /dev/null +++ b/reference/shaders/comp/shared.comp @@ -0,0 +1,25 @@ +#version 310 es +layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + float in_data[]; +} _22; + +layout(binding = 1, std430) buffer SSBO2 +{ + float out_data[]; +} _44; + +shared float sShared[4]; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + float idata = _22.in_data[ident]; + sShared[gl_LocalInvocationIndex] = idata; + memoryBarrierShared(); + barrier(); + _44.out_data[ident] = sShared[((4u - gl_LocalInvocationIndex) - 1u)]; +} + diff --git a/reference/shaders/comp/struct-layout.comp b/reference/shaders/comp/struct-layout.comp new file mode 100644 index 0000000..0aae525 --- /dev/null +++ b/reference/shaders/comp/struct-layout.comp @@ -0,0 +1,24 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +struct Foo +{ + mat4 m; +}; + +layout(binding = 1, std430) buffer SSBO2 +{ + Foo out_data[]; +} _23; + +layout(binding = 0, std430) buffer SSBO +{ + Foo in_data[]; +} _30; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + _23.out_data[ident].m = (_30.in_data[ident].m * _30.in_data[ident].m); +} + diff --git a/reference/shaders/comp/struct-packing.comp b/reference/shaders/comp/struct-packing.comp new file mode 100644 index 0000000..9f776f8 --- /dev/null +++ b/reference/shaders/comp/struct-packing.comp @@ -0,0 +1,68 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +struct S0 +{ + vec2 a[1]; + float b; +}; + +struct S1 +{ + vec3 a; + float b; +}; + +struct S2 +{ + vec3 a[1]; + float b; +}; + +struct S3 +{ + vec2 a; + float b; +}; + +struct Content +{ + S0 m0s[1]; + S1 m1s[1]; + S2 m2s[1]; + S0 m0; + S1 m1; + S2 m2; + S3 m3; + float m4; +}; + +layout(binding = 1, std140) buffer SSBO1 +{ + Content content; + Content content1[2]; + Content content2; + mat2 m0; + mat2 m1; + mat2x3 m2[4]; + mat3x2 m3; + layout(row_major) mat2 m4; + layout(row_major) mat2 m5[9]; + layout(row_major) mat2x3 m6[2][4]; + layout(row_major) mat3x2 m7; + float array[]; +} ssbo_430; + +layout(binding = 0, std140) buffer SSBO0 +{ + Content content; + Content content1[2]; + Content content2; + float array[]; +} ssbo_140; + +void main() +{ + ssbo_430.content = ssbo_140.content; +} + diff --git a/reference/shaders/comp/torture-loop.comp b/reference/shaders/comp/torture-loop.comp new file mode 100644 index 0000000..61f624f --- /dev/null +++ b/reference/shaders/comp/torture-loop.comp @@ -0,0 +1,66 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + mat4 mvp; + vec4 in_data[]; +} _24; + +layout(binding = 1, std430) buffer SSBO2 +{ + vec4 out_data[]; +} _89; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 idat = _24.in_data[ident]; + int k = 0; + uint i; + uint j; + for (;;) + { + int _39 = k; + int _40 = _39 + 1; + k = _40; + if ((_40 < 10)) + { + idat = (idat * 2.000000); + int _47 = k; + k = (_47 + 1); + continue; + } + else + { + break; + } + } + i = 0u; + int _76; + for (; (i < 16u); i = (i + uint(1)), _76 = k, k = (_76 + 1)) + { + j = 0u; + for (; (j < 30u); j = (j + uint(1))) + { + idat = (_24.mvp * idat); + } + } + int _84; + for (;;) + { + int _82 = k; + k = (_82 + 1); + int _84 = k; + if ((_84 > 10)) + { + continue; + } + else + { + break; + } + } + _89.out_data[ident] = idat; +} + diff --git a/reference/shaders/comp/udiv.comp b/reference/shaders/comp/udiv.comp new file mode 100644 index 0000000..1a75f72 --- /dev/null +++ b/reference/shaders/comp/udiv.comp @@ -0,0 +1,18 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO2 +{ + uint outputs[]; +} _10; + +layout(binding = 0, std430) buffer SSBO +{ + uint inputs[]; +} _23; + +void main() +{ + _10.outputs[gl_GlobalInvocationID.x] = (_23.inputs[gl_GlobalInvocationID.x] / 29u); +} + diff --git a/reference/shaders/frag/basic.frag b/reference/shaders/frag/basic.frag new file mode 100644 index 0000000..c7c4972 --- /dev/null +++ b/reference/shaders/frag/basic.frag @@ -0,0 +1,15 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(binding = 0) uniform mediump sampler2D uTex; + +layout(location = 0) out vec4 FragColor; +in vec4 vColor; +in vec2 vTex; + +void main() +{ + FragColor = (vColor * texture(uTex, vTex)); +} + diff --git a/reference/shaders/frag/constant-array.frag b/reference/shaders/frag/constant-array.frag new file mode 100644 index 0000000..3d3de62 --- /dev/null +++ b/reference/shaders/frag/constant-array.frag @@ -0,0 +1,28 @@ +#version 310 es +precision mediump float; +precision highp int; + +struct Foobar +{ + float a; + float b; +}; + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in mediump flat int index; + +vec4 resolve(Foobar f) +{ + return vec4((f.a + f.b)); +} + +void main() +{ + highp vec4 indexable[3] = vec4[](vec4(1.000000), vec4(2.000000), vec4(3.000000)); + highp vec4 indexable_1[2][2] = vec4[][](vec4[](vec4(1.000000), vec4(2.000000)), vec4[](vec4(8.000000), vec4(10.000000))); + Foobar param = Foobar(10.000000, 20.000000); + Foobar indexable_2[2] = Foobar[](Foobar(10.000000, 40.000000), Foobar(90.000000, 70.000000)); + Foobar param_1 = indexable_2[index]; + FragColor = (((indexable[index] + indexable_1[index][(index + 1)]) + resolve(param)) + resolve(param_1)); +} + diff --git a/reference/shaders/frag/flush_params.frag b/reference/shaders/frag/flush_params.frag new file mode 100644 index 0000000..5ff9e4f --- /dev/null +++ b/reference/shaders/frag/flush_params.frag @@ -0,0 +1,30 @@ +#version 310 es +precision mediump float; +precision highp int; + +struct Structy +{ + vec4 c; +}; + +layout(location = 0) out vec4 FragColor; + +void foo2(out Structy f) +{ + f.c = vec4(10.000000); +} + +Structy foo() +{ + Structy param; + foo2(param); + Structy f = param; + return f; +} + +void main() +{ + Structy s = foo(); + FragColor = s.c; +} + diff --git a/reference/shaders/frag/ground.frag b/reference/shaders/frag/ground.frag new file mode 100644 index 0000000..44f5185 --- /dev/null +++ b/reference/shaders/frag/ground.frag @@ -0,0 +1,62 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(binding = 4, std140) uniform GlobalPSData +{ + vec4 g_CamPos; + vec4 g_SunDir; + vec4 g_SunColor; + vec4 g_ResolutionParams; + vec4 g_TimeParams; + vec4 g_FogColor_Distance; +} _56; + +layout(binding = 2) uniform mediump sampler2D TexNormalmap; + +layout(location = 3) out vec4 LightingOut; +layout(location = 2) out vec4 NormalOut; +layout(location = 1) out vec4 SpecularOut; +layout(location = 0) out vec4 AlbedoOut; +layout(location = 0) in vec2 TexCoord; +layout(location = 1) in vec3 EyeVec; + +float saturate(float x) +{ + return clamp(x, 0.000000, 1.000000); +} + +void Resolve(vec3 Albedo, vec3 Normal, float Roughness, float Metallic) +{ + LightingOut = vec4(0.000000); + NormalOut = vec4(((Normal * 0.500000) + vec3(0.500000)), 0.000000); + SpecularOut = vec4(Roughness, Metallic, 0.000000, 0.000000); + AlbedoOut = vec4(Albedo, 1.000000); +} + +void main() +{ + vec3 Normal = ((texture(TexNormalmap, TexCoord).xyz * 2.000000) - vec3(1.000000)); + Normal = normalize(Normal); + highp float param = (length(EyeVec) / 1000.000000); + vec2 scatter_uv; + scatter_uv.x = saturate(param); + vec3 nEye = normalize(EyeVec); + scatter_uv.y = 0.000000; + vec3 Color = vec3(0.100000, 0.300000, 0.100000); + vec3 grass = vec3(0.100000, 0.300000, 0.100000); + vec3 dirt = vec3(0.100000); + vec3 snow = vec3(0.800000); + float grass_snow = smoothstep(0.000000, 0.150000, ((_56.g_CamPos.y + EyeVec.y) / 200.000000)); + vec3 base = mix(grass, snow, vec3(grass_snow)); + float edge = smoothstep(0.700000, 0.750000, Normal.y); + Color = mix(dirt, base, vec3(edge)); + Color = (Color * Color); + float Roughness = (1.000000 - (edge * grass_snow)); + highp vec3 param_1 = Color; + highp vec3 param_2 = Normal; + highp float param_3 = Roughness; + highp float param_4 = 0.000000; + Resolve(param_1, param_2, param_3, param_4); +} + diff --git a/reference/shaders/frag/mix.frag b/reference/shaders/frag/mix.frag new file mode 100644 index 0000000..3ea1a9d --- /dev/null +++ b/reference/shaders/frag/mix.frag @@ -0,0 +1,38 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec4 vIn0; +layout(location = 1) in vec4 vIn1; +layout(location = 2) in float vIn2; +layout(location = 3) in float vIn3; + +void main() +{ + bvec4 l = bvec4(false, true, false, false); + FragColor = mix(vIn0, vIn1, l); + bool f = true; + FragColor = vec4(mix(vIn2, vIn3, f)); + highp vec4 _35; + highp float _44; + if (f) + { + _35 = vIn0; + } + else + { + _35 = vIn1; + } + FragColor = _35; + if (f) + { + _44 = vIn2; + } + else + { + _44 = vIn3; + } + FragColor = vec4(_44); +} + diff --git a/reference/shaders/frag/pls.frag b/reference/shaders/frag/pls.frag new file mode 100644 index 0000000..7acc777 --- /dev/null +++ b/reference/shaders/frag/pls.frag @@ -0,0 +1,21 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(location = 0) out vec4 PLSOut0; +layout(location = 0) in vec4 PLSIn0; +layout(location = 1) out vec4 PLSOut1; +layout(location = 1) in vec4 PLSIn1; +layout(location = 2) out vec4 PLSOut2; +in vec4 PLSIn2; +layout(location = 3) out vec4 PLSOut3; +in vec4 PLSIn3; + +void main() +{ + PLSOut0 = (PLSIn0 * 2.000000); + PLSOut1 = (PLSIn1 * 6.000000); + PLSOut2 = (PLSIn2 * 7.000000); + PLSOut3 = (PLSIn3 * 4.000000); +} + diff --git a/reference/shaders/frag/sampler.frag b/reference/shaders/frag/sampler.frag new file mode 100644 index 0000000..af07f17 --- /dev/null +++ b/reference/shaders/frag/sampler.frag @@ -0,0 +1,21 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(binding = 0) uniform mediump sampler2D uTex; + +layout(location = 0) out vec4 FragColor; +in vec4 vColor; +in vec2 vTex; + +vec4 sample_texture(mediump sampler2D tex, vec2 uv) +{ + return texture(tex, uv); +} + +void main() +{ + highp vec2 param = vTex; + FragColor = (vColor * sample_texture(uTex, param)); +} + diff --git a/reference/shaders/frag/swizzle.frag b/reference/shaders/frag/swizzle.frag new file mode 100644 index 0000000..4ffa0ce --- /dev/null +++ b/reference/shaders/frag/swizzle.frag @@ -0,0 +1,20 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(location = 0) uniform mediump sampler2D samp; + +layout(location = 0) out vec4 FragColor; +layout(location = 2) in vec2 vUV; +layout(location = 1) in vec3 vNormal; + +void main() +{ + FragColor = vec4(texture(samp, vUV).xyz, 1.000000); + FragColor = vec4(texture(samp, vUV).xz, 1.000000, 4.000000); + FragColor = vec4(texture(samp, vUV).xx, texture(samp, (vUV + vec2(0.100000))).yy); + FragColor = vec4(vNormal, 1.000000); + FragColor = vec4((vNormal + vec3(1.800000)), 1.000000); + FragColor = vec4(vUV, (vUV + vec2(1.800000))); +} + diff --git a/reference/shaders/frag/ubo_layout.frag b/reference/shaders/frag/ubo_layout.frag new file mode 100644 index 0000000..1ea3150 --- /dev/null +++ b/reference/shaders/frag/ubo_layout.frag @@ -0,0 +1,26 @@ +#version 310 es +precision mediump float; +precision highp int; + +struct Str +{ + mat4 foo; +}; + +layout(binding = 0, std140) uniform UBO1 +{ + layout(row_major) Str foo; +} ubo1; + +layout(binding = 1, std140) uniform UBO2 +{ + Str foo; +} ubo0; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = (ubo1.foo.foo[0] + ubo0.foo.foo[0]); +} + diff --git a/reference/shaders/geom/basic.geom b/reference/shaders/geom/basic.geom new file mode 100644 index 0000000..d9a7510 --- /dev/null +++ b/reference/shaders/geom/basic.geom @@ -0,0 +1,26 @@ +#version 310 es +#extension GL_EXT_geometry_shader : require +layout(invocations = 4, triangles) in; +layout(max_vertices = 3, triangle_strip) out; + +out vec3 vNormal; +in VertexData +{ + vec3 normal; +} vin[3]; + + +void main() +{ + gl_Position = gl_in[0].gl_Position; + vNormal = (vin[0].normal + vec3(float(gl_InvocationID))); + EmitVertex(); + gl_Position = gl_in[1].gl_Position; + vNormal = (vin[1].normal + vec3((4.000000 * float(gl_InvocationID)))); + EmitVertex(); + gl_Position = gl_in[2].gl_Position; + vNormal = (vin[2].normal + vec3((2.000000 * float(gl_InvocationID)))); + EmitVertex(); + EndPrimitive(); +} + diff --git a/reference/shaders/tesc/basic.tesc b/reference/shaders/tesc/basic.tesc new file mode 100644 index 0000000..08f4219 --- /dev/null +++ b/reference/shaders/tesc/basic.tesc @@ -0,0 +1,17 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require +layout(vertices = 1) out; + +out patch vec3 vFoo; + +void main() +{ + gl_TessLevelInner[0] = 8.900000; + gl_TessLevelInner[1] = 6.900000; + gl_TessLevelOuter[0] = 8.900000; + gl_TessLevelOuter[1] = 6.900000; + gl_TessLevelOuter[2] = 3.900000; + gl_TessLevelOuter[3] = 4.900000; + vFoo = vec3(1.000000); +} + diff --git a/reference/shaders/tesc/water_tess.tesc b/reference/shaders/tesc/water_tess.tesc new file mode 100644 index 0000000..660dc53 --- /dev/null +++ b/reference/shaders/tesc/water_tess.tesc @@ -0,0 +1,117 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require +layout(vertices = 1) out; + +layout(std140) uniform UBO +{ + vec4 uScale; + vec3 uCamPos; + vec2 uPatchSize; + vec2 uMaxTessLevel; + float uDistanceMod; + vec4 uFrustum[6]; +} _41; + +out patch vec2 vOutPatchPosBase; +out patch vec4 vPatchLods; +in vec2 vPatchPosBase[32]; + +bool frustum_cull(vec2 p0) +{ + vec2 min_xz = ((p0 - vec2(10.000000)) * _41.uScale.xy); + vec2 max_xz = (((p0 + _41.uPatchSize) + vec2(10.000000)) * _41.uScale.xy); + vec3 bb_min = vec3(min_xz.x, -10.000000, min_xz.y); + vec3 bb_max = vec3(max_xz.x, 10.000000, max_xz.y); + vec3 center = ((bb_min + bb_max) * 0.500000); + float radius = (0.500000 * length((bb_max - bb_min))); + vec3 f0 = vec3(dot(_41.uFrustum[0], vec4(center, 1.000000)), dot(_41.uFrustum[1], vec4(center, 1.000000)), dot(_41.uFrustum[2], vec4(center, 1.000000))); + vec3 f1 = vec3(dot(_41.uFrustum[3], vec4(center, 1.000000)), dot(_41.uFrustum[4], vec4(center, 1.000000)), dot(_41.uFrustum[5], vec4(center, 1.000000))); + bool _205 = any(lessThanEqual(f0, vec3((-radius)))); + bool _215; + if ((!_205)) + { + _215 = any(lessThanEqual(f1, vec3((-radius)))); + } + else + { + _215 = _205; + } + return (!_215); +} + +float lod_factor(vec2 pos_) +{ + vec2 pos = (pos_ * _41.uScale.xy); + vec3 dist_to_cam = (_41.uCamPos - vec3(pos.x, 0.000000, pos.y)); + float level = log2(((length(dist_to_cam) + 0.000100) * _41.uDistanceMod)); + return clamp(level, 0.000000, _41.uMaxTessLevel.x); +} + +vec4 tess_level(vec4 lod) +{ + return (exp2((-lod)) * _41.uMaxTessLevel.y); +} + +float tess_level(float lod) +{ + return (_41.uMaxTessLevel.y * exp2((-lod))); +} + +void compute_tess_levels(vec2 p0) +{ + vOutPatchPosBase = p0; + vec2 param = (p0 + (vec2(-0.500000) * _41.uPatchSize)); + float l00 = lod_factor(param); + vec2 param_1 = (p0 + (vec2(0.500000, -0.500000) * _41.uPatchSize)); + float l10 = lod_factor(param_1); + vec2 param_2 = (p0 + (vec2(1.500000, -0.500000) * _41.uPatchSize)); + float l20 = lod_factor(param_2); + vec2 param_3 = (p0 + (vec2(-0.500000, 0.500000) * _41.uPatchSize)); + float l01 = lod_factor(param_3); + vec2 param_4 = (p0 + (vec2(0.500000) * _41.uPatchSize)); + float l11 = lod_factor(param_4); + vec2 param_5 = (p0 + (vec2(1.500000, 0.500000) * _41.uPatchSize)); + float l21 = lod_factor(param_5); + vec2 param_6 = (p0 + (vec2(-0.500000, 1.500000) * _41.uPatchSize)); + float l02 = lod_factor(param_6); + vec2 param_7 = (p0 + (vec2(0.500000, 1.500000) * _41.uPatchSize)); + float l12 = lod_factor(param_7); + vec2 param_8 = (p0 + (vec2(1.500000) * _41.uPatchSize)); + float l22 = lod_factor(param_8); + vec4 lods = vec4(dot(vec4(l01, l11, l02, l12), vec4(0.250000)), dot(vec4(l00, l10, l01, l11), vec4(0.250000)), dot(vec4(l10, l20, l11, l21), vec4(0.250000)), dot(vec4(l11, l21, l12, l22), vec4(0.250000))); + vPatchLods = lods; + vec4 outer_lods = min(lods, lods.yzwx); + vec4 param_9 = outer_lods; + vec4 levels = tess_level(param_9); + gl_TessLevelOuter[0] = levels.x; + gl_TessLevelOuter[1] = levels.y; + gl_TessLevelOuter[2] = levels.z; + gl_TessLevelOuter[3] = levels.w; + float min_lod = min(min(lods.x, lods.y), min(lods.z, lods.w)); + float param_10 = min(min_lod, l11); + float inner = tess_level(param_10); + gl_TessLevelInner[0] = inner; + gl_TessLevelInner[1] = inner; +} + +void main() +{ + vec2 p0 = vPatchPosBase[0]; + vec2 param = p0; + vec2 param_1; + if ((!frustum_cull(param))) + { + gl_TessLevelOuter[0] = -1.000000; + gl_TessLevelOuter[1] = -1.000000; + gl_TessLevelOuter[2] = -1.000000; + gl_TessLevelOuter[3] = -1.000000; + gl_TessLevelInner[0] = -1.000000; + gl_TessLevelInner[1] = -1.000000; + } + else + { + param_1 = p0; + compute_tess_levels(param_1); + } +} + diff --git a/reference/shaders/tese/water_tess.tese b/reference/shaders/tese/water_tess.tese new file mode 100644 index 0000000..3724545 --- /dev/null +++ b/reference/shaders/tese/water_tess.tese @@ -0,0 +1,61 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require +layout(quads, cw, fractional_even_spacing) in; + +layout(binding = 1, std140) uniform UBO +{ + mat4 uMVP; + vec4 uScale; + vec2 uInvScale; + vec3 uCamPos; + vec2 uPatchSize; + vec2 uInvHeightmapSize; +} _31; + +layout(binding = 0) uniform mediump sampler2D uHeightmapDisplacement; + +in patch vec2 vOutPatchPosBase; +in patch vec4 vPatchLods; +out vec4 vGradNormalTex; +out vec3 vWorld; + +vec2 lerp_vertex(vec2 tess_coord) +{ + return (vOutPatchPosBase + (tess_coord * _31.uPatchSize)); +} + +mediump vec2 lod_factor(vec2 tess_coord) +{ + mediump vec2 x = mix(vPatchLods.yx, vPatchLods.zw, vec2(tess_coord.x)); + mediump float level = mix(x.x, x.y, tess_coord.y); + mediump float floor_level = floor(level); + mediump float fract_level = (level - floor_level); + return vec2(floor_level, fract_level); +} + +mediump vec3 sample_height_displacement(vec2 uv, vec2 off, mediump vec2 lod) +{ + return mix(textureLod(uHeightmapDisplacement, (uv + (off * 0.500000)), lod.x).xyz, textureLod(uHeightmapDisplacement, (uv + (off * 1.000000)), (lod.x + 1.000000)).xyz, vec3(lod.y)); +} + +void main() +{ + vec2 tess_coord = gl_TessCoord.xy; + vec2 param = tess_coord; + vec2 pos = lerp_vertex(param); + vec2 param_1 = tess_coord; + mediump vec2 lod = lod_factor(param_1); + vec2 tex = (pos * _31.uInvHeightmapSize); + pos = (pos * _31.uScale.xy); + mediump float delta_mod = exp2(lod.x); + vec2 off = (_31.uInvHeightmapSize * delta_mod); + vGradNormalTex = vec4((tex + (_31.uInvHeightmapSize * 0.500000)), (tex * _31.uScale.zw)); + vec2 param_2 = tex; + vec2 param_3 = off; + vec2 param_4 = lod; + vec3 height_displacement = sample_height_displacement(param_2, param_3, param_4); + pos = (pos + height_displacement.yz); + vWorld = vec3(pos.x, height_displacement.x, pos.y); + gl_Position = (_31.uMVP * vec4(vWorld, 1.000000)); +} + diff --git a/reference/shaders/vert/basic.vert b/reference/shaders/vert/basic.vert new file mode 100644 index 0000000..fc81060 --- /dev/null +++ b/reference/shaders/vert/basic.vert @@ -0,0 +1,17 @@ +#version 310 es + +layout(std140) uniform UBO +{ + mat4 uMVP; +} _16; + +in vec4 aVertex; +out vec3 vNormal; +in vec3 aNormal; + +void main() +{ + gl_Position = (_16.uMVP * aVertex); + vNormal = aNormal; +} + diff --git a/reference/shaders/vert/ground.vert b/reference/shaders/vert/ground.vert new file mode 100644 index 0000000..0082eb9 --- /dev/null +++ b/reference/shaders/vert/ground.vert @@ -0,0 +1,111 @@ +#version 310 es + +struct PatchData +{ + vec4 Position; + vec4 LODs; +}; + +layout(binding = 0, std140) uniform GlobalVSData +{ + vec4 g_ViewProj_Row0; + vec4 g_ViewProj_Row1; + vec4 g_ViewProj_Row2; + vec4 g_ViewProj_Row3; + vec4 g_CamPos; + vec4 g_CamRight; + vec4 g_CamUp; + vec4 g_CamFront; + vec4 g_SunDir; + vec4 g_SunColor; + vec4 g_TimeParams; + vec4 g_ResolutionParams; + vec4 g_CamAxisRight; + vec4 g_FogColor_Distance; + vec4 g_ShadowVP_Row0; + vec4 g_ShadowVP_Row1; + vec4 g_ShadowVP_Row2; + vec4 g_ShadowVP_Row3; +} _58; + +layout(binding = 0, std140) uniform PerPatch +{ + PatchData Patches[256]; +} _284; + +layout(binding = 2, std140) uniform GlobalGround +{ + vec4 GroundScale; + vec4 GroundPosition; + vec4 InvGroundSize_PatchScale; +} _381; + +layout(binding = 1) uniform mediump sampler2D TexLOD; +layout(binding = 0) uniform mediump sampler2D TexHeightmap; + +layout(location = 1) in vec4 LODWeights; +layout(location = 0) in vec2 Position; +layout(location = 1) out vec3 EyeVec; +layout(location = 0) out vec2 TexCoord; + +vec2 warp_position() +{ + float vlod = dot(LODWeights, _284.Patches[gl_InstanceID].LODs); + vlod = mix(vlod, _284.Patches[gl_InstanceID].Position.w, all(equal(LODWeights, vec4(0.000000)))); + float floor_lod = floor(vlod); + float fract_lod = (vlod - floor_lod); + uint ufloor_lod = uint(floor_lod); + uvec2 uPosition = uvec2(Position); + uvec2 mask = ((uvec2(1u) << uvec2(ufloor_lod, (ufloor_lod + 1u))) - uvec2(1u)); + uvec2 rounding; + uint _332; + uint _343; + vec4 lower_upper_snapped; + if ((uPosition.x < 32u)) + { + _332 = mask.x; + } + else + { + _332 = 0u; + } + uint _342 = _332; + if ((uPosition.y < 32u)) + { + _343 = mask.y; + } + else + { + _343 = 0u; + } + rounding = uvec2(_342, _343); + lower_upper_snapped = vec4(((uPosition + rounding).xyxy & (~mask).xxyy)); + return mix(lower_upper_snapped.xy, lower_upper_snapped.zw, vec2(fract_lod)); +} + +vec2 lod_factor(vec2 uv) +{ + float level = (textureLod(TexLOD, uv, 0.000000).x * 7.968750); + float floor_level = floor(level); + float fract_level = (level - floor_level); + return vec2(floor_level, fract_level); +} + +void main() +{ + vec2 PatchPos = (_284.Patches[gl_InstanceID].Position.xz * _381.InvGroundSize_PatchScale.zw); + vec2 WarpedPos = warp_position(); + vec2 VertexPos = (PatchPos + WarpedPos); + vec2 NormalizedPos = (VertexPos * _381.InvGroundSize_PatchScale.xy); + vec2 param = NormalizedPos; + vec2 lod = lod_factor(param); + vec2 Offset = (_381.InvGroundSize_PatchScale.xy * exp2(lod.x)); + float Elevation = mix(textureLod(TexHeightmap, (NormalizedPos + (Offset * 0.500000)), lod.x).x, textureLod(TexHeightmap, (NormalizedPos + (Offset * 1.000000)), (lod.x + 1.000000)).x, lod.y); + vec3 WorldPos = vec3(NormalizedPos.x, Elevation, NormalizedPos.y); + WorldPos = (WorldPos * _381.GroundScale.xyz); + WorldPos = (WorldPos + _381.GroundPosition.xyz); + EyeVec = (WorldPos - _58.g_CamPos.xyz); + TexCoord = (NormalizedPos + (_381.InvGroundSize_PatchScale.xy * 0.500000)); + gl_Position = ((((_58.g_ViewProj_Row0 * WorldPos.x) + (_58.g_ViewProj_Row1 * WorldPos.y)) + (_58.g_ViewProj_Row2 * WorldPos.z)) + _58.g_ViewProj_Row3); +} + diff --git a/reference/shaders/vert/ocean.vert b/reference/shaders/vert/ocean.vert new file mode 100644 index 0000000..87faf5d --- /dev/null +++ b/reference/shaders/vert/ocean.vert @@ -0,0 +1,133 @@ +#version 310 es + +struct PatchData +{ + vec4 Position; + vec4 LODs; +}; + +layout(binding = 0, std140) uniform GlobalVSData +{ + vec4 g_ViewProj_Row0; + vec4 g_ViewProj_Row1; + vec4 g_ViewProj_Row2; + vec4 g_ViewProj_Row3; + vec4 g_CamPos; + vec4 g_CamRight; + vec4 g_CamUp; + vec4 g_CamFront; + vec4 g_SunDir; + vec4 g_SunColor; + vec4 g_TimeParams; + vec4 g_ResolutionParams; + vec4 g_CamAxisRight; + vec4 g_FogColor_Distance; + vec4 g_ShadowVP_Row0; + vec4 g_ShadowVP_Row1; + vec4 g_ShadowVP_Row2; + vec4 g_ShadowVP_Row3; +} _58; + +layout(binding = 0, std140) uniform Offsets +{ + PatchData Patches[256]; +} _284; + +layout(binding = 4, std140) uniform GlobalOcean +{ + vec4 OceanScale; + vec4 OceanPosition; + vec4 InvOceanSize_PatchScale; + vec4 NormalTexCoordScale; +} _405; + +layout(binding = 1) uniform mediump sampler2D TexLOD; +layout(binding = 0) uniform mediump sampler2D TexDisplacement; + +layout(location = 1) in vec4 LODWeights; +layout(location = 0) in vec4 Position; +layout(location = 0) out vec3 EyeVec; +layout(location = 1) out vec4 TexCoord; + +vec2 warp_position() +{ + float vlod = dot(LODWeights, _284.Patches[gl_InstanceID].LODs); + vlod = mix(vlod, _284.Patches[gl_InstanceID].Position.w, all(equal(LODWeights, vec4(0.000000)))); + float floor_lod = floor(vlod); + float fract_lod = (vlod - floor_lod); + uint ufloor_lod = uint(floor_lod); + uvec4 uPosition = uvec4(Position); + uvec2 mask = ((uvec2(1u) << uvec2(ufloor_lod, (ufloor_lod + 1u))) - uvec2(1u)); + uvec4 rounding; + uint _333; + uint _345; + uint _356; + uint _368; + vec4 lower_upper_snapped; + if ((uPosition.x < 32u)) + { + _333 = mask.x; + } + else + { + _333 = 0u; + } + rounding.x = _333; + if ((uPosition.y < 32u)) + { + _345 = mask.x; + } + else + { + _345 = 0u; + } + rounding.y = _345; + if ((uPosition.x < 32u)) + { + _356 = mask.y; + } + else + { + _356 = 0u; + } + rounding.z = _356; + if ((uPosition.y < 32u)) + { + _368 = mask.y; + } + else + { + _368 = 0u; + } + rounding.w = _368; + lower_upper_snapped = vec4(((uPosition.xyxy + rounding) & (~mask).xxyy)); + return mix(lower_upper_snapped.xy, lower_upper_snapped.zw, vec2(fract_lod)); +} + +vec2 lod_factor(vec2 uv) +{ + float level = (textureLod(TexLOD, uv, 0.000000).x * 7.968750); + float floor_level = floor(level); + float fract_level = (level - floor_level); + return vec2(floor_level, fract_level); +} + +void main() +{ + vec2 PatchPos = (_284.Patches[gl_InstanceID].Position.xz * _405.InvOceanSize_PatchScale.zw); + vec2 WarpedPos = warp_position(); + vec2 VertexPos = (PatchPos + WarpedPos); + vec2 NormalizedPos = (VertexPos * _405.InvOceanSize_PatchScale.xy); + vec2 NormalizedTex = (NormalizedPos * _405.NormalTexCoordScale.zw); + vec2 param = NormalizedPos; + vec2 lod = lod_factor(param); + vec2 Offset = ((_405.InvOceanSize_PatchScale.xy * exp2(lod.x)) * _405.NormalTexCoordScale.zw); + vec3 Displacement = mix(textureLod(TexDisplacement, (NormalizedTex + (Offset * 0.500000)), lod.x).yxz, textureLod(TexDisplacement, (NormalizedTex + (Offset * 1.000000)), (lod.x + 1.000000)).yxz, vec3(lod.y)); + vec3 WorldPos = (vec3(NormalizedPos.x, 0.000000, NormalizedPos.y) + Displacement); + WorldPos = (WorldPos * _405.OceanScale.xyz); + WorldPos = (WorldPos + _405.OceanPosition.xyz); + EyeVec = (WorldPos - _58.g_CamPos.xyz); + TexCoord = (vec4(NormalizedTex, (NormalizedTex * _405.NormalTexCoordScale.xy)) + ((_405.InvOceanSize_PatchScale.xyxy * 0.500000) * _405.NormalTexCoordScale.zwzw)); + gl_Position = ((((_58.g_ViewProj_Row0 * WorldPos.x) + (_58.g_ViewProj_Row1 * WorldPos.y)) + (_58.g_ViewProj_Row2 * WorldPos.z)) + _58.g_ViewProj_Row3); +} + diff --git a/reference/shaders/vert/texture_buffer.vert b/reference/shaders/vert/texture_buffer.vert new file mode 100644 index 0000000..b5fa0dc --- /dev/null +++ b/reference/shaders/vert/texture_buffer.vert @@ -0,0 +1,11 @@ +#version 310 es +#extension GL_OES_texture_buffer : require + +layout(binding = 4) uniform highp samplerBuffer uSamp; +layout(binding = 5, rgba32f) uniform highp readonly imageBuffer uSampo; + +void main() +{ + gl_Position = (texelFetch(uSamp, 10) + imageLoad(uSampo, 100)); +} + diff --git a/reference/shaders/vert/ubo.vert b/reference/shaders/vert/ubo.vert new file mode 100644 index 0000000..6e38dde --- /dev/null +++ b/reference/shaders/vert/ubo.vert @@ -0,0 +1,17 @@ +#version 310 es + +layout(binding = 0, std140) uniform UBO +{ + mat4 mvp; +} _16; + +in vec4 aVertex; +out vec3 vNormal; +in vec3 aNormal; + +void main() +{ + gl_Position = (_16.mvp * aVertex); + vNormal = aNormal; +} + diff --git a/shaders/comp/atomic.comp b/shaders/comp/atomic.comp new file mode 100644 index 0000000..fadab97 --- /dev/null +++ b/shaders/comp/atomic.comp @@ -0,0 +1,51 @@ +#version 310 es +#extension GL_OES_shader_image_atomic : require +layout(local_size_x = 1) in; + +layout(r32ui, binding = 0) uniform highp uimage2D uImage; +layout(r32i, binding = 1) uniform highp iimage2D iImage; +layout(binding = 2, std430) buffer SSBO +{ + uint u32; + int i32; +} ssbo; + +void main() +{ + imageAtomicAdd(uImage, ivec2(1, 5), 1u); + imageAtomicOr(uImage, ivec2(1, 5), 1u); + imageAtomicXor(uImage, ivec2(1, 5), 1u); + imageAtomicAnd(uImage, ivec2(1, 5), 1u); + imageAtomicMin(uImage, ivec2(1, 5), 1u); + imageAtomicMax(uImage, ivec2(1, 5), 1u); + //imageAtomicExchange(uImage, ivec2(1, 5), 1u); + imageAtomicCompSwap(uImage, ivec2(1, 5), 10u, 2u); + + imageAtomicAdd(iImage, ivec2(1, 6), 1); + imageAtomicOr(iImage, ivec2(1, 6), 1); + imageAtomicXor(iImage, ivec2(1, 6), 1); + imageAtomicAnd(iImage, ivec2(1, 6), 1); + imageAtomicMin(iImage, ivec2(1, 6), 1); + imageAtomicMax(iImage, ivec2(1, 6), 1); + //imageAtomicExchange(iImage, ivec2(1, 5), 1u); + imageAtomicCompSwap(iImage, ivec2(1, 5), 10, 2); + + atomicAdd(ssbo.u32, 1u); + atomicOr(ssbo.u32, 1u); + atomicXor(ssbo.u32, 1u); + atomicAnd(ssbo.u32, 1u); + atomicMin(ssbo.u32, 1u); + atomicMax(ssbo.u32, 1u); + atomicExchange(ssbo.u32, 1u); + atomicCompSwap(ssbo.u32, 10u, 2u); + + atomicAdd(ssbo.i32, 1); + atomicOr(ssbo.i32, 1); + atomicXor(ssbo.i32, 1); + atomicAnd(ssbo.i32, 1); + atomicMin(ssbo.i32, 1); + atomicMax(ssbo.i32, 1); + atomicExchange(ssbo.i32, 1); + atomicCompSwap(ssbo.i32, 10, 2); +} + diff --git a/shaders/comp/bake_gradient.comp b/shaders/comp/bake_gradient.comp new file mode 100644 index 0000000..4885ff0 --- /dev/null +++ b/shaders/comp/bake_gradient.comp @@ -0,0 +1,55 @@ +#version 310 es + +layout(local_size_x = 8, local_size_y = 8) in; + +layout(binding = 0) uniform sampler2D uHeight; +layout(binding = 1) uniform sampler2D uDisplacement; +layout(rgba16f, binding = 2) uniform writeonly mediump image2D iHeightDisplacement; +layout(rgba16f, binding = 3) uniform writeonly mediump image2D iGradJacobian; + +layout(binding = 4) uniform UBO +{ + vec4 uInvSize; + vec4 uScale; +}; + +mediump float jacobian(mediump vec2 dDdx, mediump vec2 dDdy) +{ + return (1.0 + dDdx.x) * (1.0 + dDdy.y) - dDdx.y * dDdy.x; +} +#define LAMBDA 1.2 + +void main() +{ + vec4 uv = (vec2(gl_GlobalInvocationID.xy) * uInvSize.xy).xyxy + 0.5 * uInvSize; + + float h = textureLod(uHeight, uv.xy, 0.0).x; + + // Compute the heightmap gradient by simple differentiation. + float x0 = textureLodOffset(uHeight, uv.xy, 0.0, ivec2(-1, 0)).x; + float x1 = textureLodOffset(uHeight, uv.xy, 0.0, ivec2(+1, 0)).x; + float y0 = textureLodOffset(uHeight, uv.xy, 0.0, ivec2(0, -1)).x; + float y1 = textureLodOffset(uHeight, uv.xy, 0.0, ivec2(0, +1)).x; + vec2 grad = uScale.xy * 0.5 * vec2(x1 - x0, y1 - y0); + + // Displacement map must be sampled with a different offset since it's a smaller texture. + vec2 displacement = LAMBDA * textureLod(uDisplacement, uv.zw, 0.0).xy; + + // Compute jacobian. + vec2 dDdx = 0.5 * LAMBDA * ( + textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(+1, 0)).xy - + textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(-1, 0)).xy); + vec2 dDdy = 0.5 * LAMBDA * ( + textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(0, +1)).xy - + textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(0, -1)).xy); + float j = jacobian(dDdx * uScale.z, dDdy * uScale.z); + + displacement = vec2(0.0); + + // Read by vertex shader/tess shader. + imageStore(iHeightDisplacement, ivec2(gl_GlobalInvocationID.xy), vec4(h, displacement, 0.0)); + + // Read by fragment shader. + imageStore(iGradJacobian, ivec2(gl_GlobalInvocationID.xy), vec4(grad, j, 0.0)); +} + diff --git a/shaders/comp/basic.comp b/shaders/comp/basic.comp new file mode 100644 index 0000000..f9bf556 --- /dev/null +++ b/shaders/comp/basic.comp @@ -0,0 +1,28 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + vec4 in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +layout(std430, binding = 2) buffer SSBO3 +{ + uint counter; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 idata = in_data[ident]; + if (dot(idata, vec4(1.0, 5.0, 6.0, 2.0)) > 8.2) + { + out_data[atomicAdd(counter, 1u)] = idata; + } +} + diff --git a/shaders/comp/culling.comp b/shaders/comp/culling.comp new file mode 100644 index 0000000..9f8331b --- /dev/null +++ b/shaders/comp/culling.comp @@ -0,0 +1,26 @@ +#version 310 es +layout(local_size_x = 4) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + float in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + float out_data[]; +}; + +layout(std430, binding = 2) buffer SSBO3 +{ + uint count; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + float idata = in_data[ident]; + if (idata > 12.0) + out_data[atomicAdd(count, 1u)] = idata; +} + diff --git a/shaders/comp/dowhile.comp b/shaders/comp/dowhile.comp new file mode 100644 index 0000000..709db75 --- /dev/null +++ b/shaders/comp/dowhile.comp @@ -0,0 +1,31 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + mat4 mvp; + vec4 in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +int i; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + + i = 0; + vec4 idat = in_data[ident]; + do + { + idat = mvp * idat; + i++; + } while(i < 16); + + out_data[ident] = idat; +} + diff --git a/shaders/comp/generate_height.comp b/shaders/comp/generate_height.comp new file mode 100644 index 0000000..16cef4d --- /dev/null +++ b/shaders/comp/generate_height.comp @@ -0,0 +1,97 @@ +#version 310 es + +layout(local_size_x = 64) in; + +layout(std430, binding = 0) readonly buffer Distribution +{ + vec2 distribution[]; +}; + +layout(std430, binding = 1) writeonly buffer HeightmapFFT +{ + uint heights[]; +}; + +layout(binding = 2, std140) uniform UBO +{ + vec4 uModTime; +}; + +vec2 alias(vec2 i, vec2 N) +{ + return mix(i, i - N, greaterThan(i, 0.5 * N)); +} + +vec4 cmul(vec4 a, vec4 b) +{ + vec4 r3 = a.yxwz; + vec4 r1 = b.xxzz; + vec4 R0 = a * r1; + vec4 r2 = b.yyww; + vec4 R1 = r2 * r3; + return R0 + vec4(-R1.x, R1.y, -R1.z, R1.w); +} + +vec2 cmul(vec2 a, vec2 b) +{ + vec2 r3 = a.yx; + vec2 r1 = b.xx; + vec2 R0 = a * r1; + vec2 r2 = b.yy; + vec2 R1 = r2 * r3; + return R0 + vec2(-R1.x, R1.y); +} + +uint pack2(vec2 v) +{ + return packHalf2x16(v); +} + +uvec2 pack4(vec4 v) +{ + return uvec2(packHalf2x16(v.xy), packHalf2x16(v.zw)); +} + +uvec2 workaround_mix(uvec2 a, uvec2 b, bvec2 sel) +{ + return uvec2(sel.x ? b.x : a.x, sel.y ? b.y : a.y); +} + +void generate_heightmap() +{ + uvec2 N = gl_WorkGroupSize.xy * gl_NumWorkGroups.xy; + uvec2 i = gl_GlobalInvocationID.xy; + // Pick out the negative frequency variant. + uvec2 wi = workaround_mix(N - i, uvec2(0u), equal(i, uvec2(0u))); + + // Pick out positive and negative travelling waves. + vec2 a = distribution[i.y * N.x + i.x]; + vec2 b = distribution[wi.y * N.x + wi.x]; + + vec2 k = uModTime.xy * alias(vec2(i), vec2(N)); + float k_len = length(k); + + const float G = 9.81; + + // If this sample runs for hours on end, the cosines of very large numbers will eventually become unstable. + // It is fairly easy to fix this by wrapping uTime, + // and quantizing w such that wrapping uTime does not change the result. + // See Tessendorf's paper for how to do it. + // The sqrt(G * k_len) factor represents how fast ocean waves at different frequencies propagate. + float w = sqrt(G * k_len) * uModTime.z; + float cw = cos(w); + float sw = sin(w); + + // Complex multiply to rotate our frequency samples. + a = cmul(a, vec2(cw, sw)); + b = cmul(b, vec2(cw, sw)); + b = vec2(b.x, -b.y); // Complex conjugate since we picked a frequency with the opposite direction. + vec2 res = a + b; // Sum up forward and backwards travelling waves. + heights[i.y * N.x + i.x] = pack2(res); +} + +void main() +{ + generate_heightmap(); +} + diff --git a/shaders/comp/image.comp b/shaders/comp/image.comp new file mode 100644 index 0000000..e375534 --- /dev/null +++ b/shaders/comp/image.comp @@ -0,0 +1,12 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(rgba8, binding = 0) uniform readonly mediump image2D uImageIn; +layout(rgba8, binding = 1) uniform writeonly mediump image2D uImageOut; + +void main() +{ + vec4 v = imageLoad(uImageIn, ivec2(gl_GlobalInvocationID.xy) + imageSize(uImageIn)); + imageStore(uImageOut, ivec2(gl_GlobalInvocationID.xy), v); +} + diff --git a/shaders/comp/inout-struct.comp b/shaders/comp/inout-struct.comp new file mode 100644 index 0000000..c1de959 --- /dev/null +++ b/shaders/comp/inout-struct.comp @@ -0,0 +1,55 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) writeonly buffer SSBO +{ + vec4 data[]; +} outdata; + +layout(std430, binding = 1) readonly buffer SSBO2 +{ + vec4 data[]; +} indata; + +struct Foo +{ + vec4 a; + vec4 b; + vec4 c; + vec4 d; +}; + +layout(std430, binding = 2) readonly buffer SSBO3 +{ + Foo foos[]; +} foobar; + +vec4 bar(Foo foo) +{ + return foo.a + foo.b + foo.c + foo.d; +} + +void baz(out Foo foo) +{ + uint ident = gl_GlobalInvocationID.x; + foo.a = indata.data[4u * ident + 0u]; + foo.b = indata.data[4u * ident + 1u]; + foo.c = indata.data[4u * ident + 2u]; + foo.d = indata.data[4u * ident + 3u]; +} + +void meow(inout Foo foo) +{ + foo.a += 10.0; + foo.b += 20.0; + foo.c += 30.0; + foo.d += 40.0; +} + +void main() +{ + Foo foo; + baz(foo); + meow(foo); + outdata.data[gl_GlobalInvocationID.x] = bar(foo) + bar(foobar.foos[gl_GlobalInvocationID.x]); +} diff --git a/shaders/comp/insert.comp b/shaders/comp/insert.comp new file mode 100644 index 0000000..07c1f8d --- /dev/null +++ b/shaders/comp/insert.comp @@ -0,0 +1,18 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) writeonly buffer SSBO +{ + vec4 out_data[]; +}; + +void main() +{ + vec4 v; + v.x = 10.0; + v.y = 30.0; + v.z = 70.0; + v.w = 90.0; + out_data[gl_GlobalInvocationID.x] = v; + out_data[gl_GlobalInvocationID.x].y = 20.0; +} diff --git a/shaders/comp/loop.comp b/shaders/comp/loop.comp new file mode 100644 index 0000000..03f38af --- /dev/null +++ b/shaders/comp/loop.comp @@ -0,0 +1,66 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + mat4 mvp; + vec4 in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 idat = in_data[ident]; + + int k = 0; + while (k < 10) + { + idat *= 2.0; + k++; + } + + for (uint i = 0u; i < 16u; i++, k++) + for (uint j = 0u; j < 30u; j++) + idat = mvp * idat; + + k = 0; + for (;;) + { + k++; + if (k > 10) + { + k += 2; + } + else + { + k += 3; + continue; + } + + k += 10; + } + + k = 0; + do + { + k++; + } while (k > 10); + + int l = 0; + for (;; l++) + { + if (l == 5) + { + continue; + } + + idat += 1.0; + } + out_data[ident] = idat; +} + diff --git a/shaders/comp/mat3.comp b/shaders/comp/mat3.comp new file mode 100644 index 0000000..7c5bb1e --- /dev/null +++ b/shaders/comp/mat3.comp @@ -0,0 +1,14 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + mat3 out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + out_data[ident] = mat3(vec3(10.0), vec3(20.0), vec3(40.0)); +} + diff --git a/shaders/comp/modf.comp b/shaders/comp/modf.comp new file mode 100644 index 0000000..edadefc --- /dev/null +++ b/shaders/comp/modf.comp @@ -0,0 +1,23 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + vec4 in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 i; + //vec4 v = frexp(in_data[ident], i); + //out_data[ident] = ldexp(v, i); + vec4 v = modf(in_data[ident], i); + out_data[ident] = v; +} + diff --git a/shaders/comp/return.comp b/shaders/comp/return.comp new file mode 100644 index 0000000..617f437 --- /dev/null +++ b/shaders/comp/return.comp @@ -0,0 +1,33 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + + if (ident == 2u) + { + out_data[ident] = vec4(20.0); + } + else if (ident == 4u) + { + out_data[ident] = vec4(10.0); + return; + } + + for (int i = 0; i < 20; i++) + { + if (i == 10) + break; + + return; + } + + out_data[ident] = vec4(10.0); +} + diff --git a/shaders/comp/shared.comp b/shaders/comp/shared.comp new file mode 100644 index 0000000..4deff93 --- /dev/null +++ b/shaders/comp/shared.comp @@ -0,0 +1,27 @@ +#version 310 es +layout(local_size_x = 4) in; + +shared float sShared[gl_WorkGroupSize.x]; + +layout(std430, binding = 0) readonly buffer SSBO +{ + float in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + float out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + float idata = in_data[ident]; + + sShared[gl_LocalInvocationIndex] = idata; + memoryBarrierShared(); + barrier(); + + out_data[ident] = sShared[gl_WorkGroupSize.x - gl_LocalInvocationIndex - 1u]; +} + diff --git a/shaders/comp/struct-layout.comp b/shaders/comp/struct-layout.comp new file mode 100644 index 0000000..5a2b780 --- /dev/null +++ b/shaders/comp/struct-layout.comp @@ -0,0 +1,24 @@ +#version 310 es +layout(local_size_x = 1) in; + +struct Foo +{ + mat4 m; +}; + +layout(std430, binding = 0) readonly buffer SSBO +{ + Foo in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + Foo out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + out_data[ident].m = in_data[ident].m * in_data[ident].m; +} + diff --git a/shaders/comp/struct-packing.comp b/shaders/comp/struct-packing.comp new file mode 100644 index 0000000..b1cd558 --- /dev/null +++ b/shaders/comp/struct-packing.comp @@ -0,0 +1,69 @@ +#version 310 es + +struct S0 +{ + vec2 a[1]; + float b; +}; + +struct S1 +{ + vec3 a; + float b; +}; + +struct S2 +{ + vec3 a[1]; + float b; +}; + +struct S3 +{ + vec2 a; + float b; +}; + +struct Content +{ + S0 m0s[1]; + S1 m1s[1]; + S2 m2s[1]; + S0 m0; + S1 m1; + S2 m2; + + S3 m3; + float m4; +}; + +layout(binding = 0, std140) buffer SSBO0 +{ + Content content; + Content content1[2]; + Content content2; + float array[]; +} ssbo_140; + +layout(binding = 1, std430) buffer SSBO1 +{ + Content content; + Content content1[2]; + Content content2; + + layout(column_major) mat2 m0; + layout(column_major) mat2 m1; + layout(column_major) mat2x3 m2[4]; + layout(column_major) mat3x2 m3; + layout(row_major) mat2 m4; + layout(row_major) mat2 m5[9]; + layout(row_major) mat2x3 m6[4][2]; + layout(row_major) mat3x2 m7; + float array[]; +} ssbo_430; + +void main() +{ + ssbo_430.content = ssbo_140.content; +} + diff --git a/shaders/comp/torture-loop.comp b/shaders/comp/torture-loop.comp new file mode 100644 index 0000000..54a1221 --- /dev/null +++ b/shaders/comp/torture-loop.comp @@ -0,0 +1,40 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + mat4 mvp; + vec4 in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 idat = in_data[ident]; + + int k = 0; + + // Continue with side effects. + while (++k < 10) + { + idat *= 2.0; + k++; + } + + // Again used here ... + for (uint i = 0u; i < 16u; i++, k++) + for (uint j = 0u; j < 30u; j++) + idat = mvp * idat; + + do + { + k++; + } while (k > 10); + out_data[ident] = idat; +} + diff --git a/shaders/comp/udiv.comp b/shaders/comp/udiv.comp new file mode 100644 index 0000000..33fe564 --- /dev/null +++ b/shaders/comp/udiv.comp @@ -0,0 +1,17 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) buffer SSBO +{ + uint inputs[]; +}; + +layout(std430, binding = 0) buffer SSBO2 +{ + uint outputs[]; +}; + +void main() +{ + outputs[gl_GlobalInvocationID.x] = inputs[gl_GlobalInvocationID.x] / 29u; +} diff --git a/shaders/frag/basic.frag b/shaders/frag/basic.frag new file mode 100644 index 0000000..7c3ad20 --- /dev/null +++ b/shaders/frag/basic.frag @@ -0,0 +1,13 @@ +#version 310 es +precision mediump float; + +in vec4 vColor; +in vec2 vTex; +layout(binding = 0) uniform sampler2D uTex; +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vColor * texture(uTex, vTex); +} + diff --git a/shaders/frag/constant-array.frag b/shaders/frag/constant-array.frag new file mode 100644 index 0000000..b862cb1 --- /dev/null +++ b/shaders/frag/constant-array.frag @@ -0,0 +1,21 @@ +#version 310 es +precision mediump float; +layout(location = 0) out vec4 FragColor; + +layout(location = 0) flat in int index; + +struct Foobar { float a; float b; }; + +vec4 resolve(Foobar f) +{ + return vec4(f.a + f.b); +} + +void main() +{ + const vec4 foo[3] = vec4[](vec4(1.0), vec4(2.0), vec4(3.0)); + const vec4 foobars[2][2] = vec4[][](vec4[](vec4(1.0), vec4(2.0)), vec4[](vec4(8.0), vec4(10.0))); + const Foobar foos[2] = Foobar[](Foobar(10.0, 40.0), Foobar(90.0, 70.0)); + + FragColor = foo[index] + foobars[index][index + 1] + resolve(Foobar(10.0, 20.0)) + resolve(foos[index]); +} diff --git a/shaders/frag/flush_params.frag b/shaders/frag/flush_params.frag new file mode 100644 index 0000000..8a26ad3 --- /dev/null +++ b/shaders/frag/flush_params.frag @@ -0,0 +1,27 @@ +#version 310 es +precision mediump float; + +layout(location = 0) out vec4 FragColor; + +struct Structy +{ + vec4 c; +}; + +void foo2(out Structy f) +{ + f.c = vec4(10.0); +} + +Structy foo() +{ + Structy f; + foo2(f); + return f; +} + +void main() +{ + Structy s = foo(); + FragColor = s.c; +} diff --git a/shaders/frag/ground.frag b/shaders/frag/ground.frag new file mode 100755 index 0000000..d1fcfd4 --- /dev/null +++ b/shaders/frag/ground.frag @@ -0,0 +1,162 @@ +#version 310 es +precision mediump float; + +#define DEBUG_NONE 0 +#define DEBUG_DIFFUSE 1 +#define DEBUG_SPECULAR 2 +#define DEBUG_LIGHTING 3 +#define DEBUG_FOG 4 +#define DEBUG DEBUG_NONE + +#define FORWARD 0 +#define DEFERRED 1 +#define DEFERRED_VTEX 2 + +float saturate(float x) { return clamp(x, 0.0, 1.0); } + +layout(std140, binding = 4) uniform GlobalPSData +{ + vec4 g_CamPos; + vec4 g_SunDir; + vec4 g_SunColor; + vec4 g_ResolutionParams; + vec4 g_TimeParams; + vec4 g_FogColor_Distance; +}; + +vec4 ComputeFogFactor(vec3 WorldPos) +{ + vec4 FogData; + vec3 vEye = WorldPos - g_CamPos.xyz; + vec3 nEye = normalize(vEye); + FogData.w = exp(-dot(vEye, vEye) * g_FogColor_Distance.w * 0.75); + + float fog_sun_factor = pow(saturate(dot(nEye, g_SunDir.xyz)), 8.0); + FogData.xyz = mix(vec3(1.0, 1.0, 1.0), vec3(0.6, 0.6, 0.9), nEye.y * 0.5 + 0.5); + FogData.xyz = mix(FogData.xyz, vec3(0.95, 0.87, 0.78), fog_sun_factor); + return FogData; +} + +void ApplyFog(inout vec3 Color, vec4 FogData) +{ + Color = mix(FogData.xyz, Color, FogData.w); +} + +void ApplyLighting(inout mediump vec3 Color, mediump float DiffuseFactor) +{ + mediump vec3 DiffuseLight = g_SunColor.xyz * DiffuseFactor; + mediump vec3 AmbientLight = vec3(0.2, 0.35, 0.55) * 0.5; + mediump vec3 Lighting = DiffuseLight + AmbientLight; +#if DEBUG == DEBUG_LIGHTING + Color = Lighting; +#else + Color *= Lighting; +#endif +} + +#define SPECULAR 0 +#define GLOSSMAP 0 + +void ApplySpecular(inout mediump vec3 Color, mediump vec3 EyeVec, mediump vec3 Normal, mediump vec3 SpecularColor, mediump float Shininess, mediump float FresnelAmount) +{ + mediump vec3 HalfAngle = normalize(-EyeVec + g_SunDir.xyz); + + mediump float v_dot_h = saturate(dot(HalfAngle, -EyeVec)); + mediump float n_dot_l = saturate(dot(Normal, g_SunDir.xyz)); + mediump float n_dot_h = saturate(dot(Normal, HalfAngle)); + mediump float n_dot_v = saturate(dot(-EyeVec, Normal)); + mediump float h_dot_l = saturate(dot(g_SunDir.xyz, HalfAngle)); + + const mediump float roughness_value = 0.25; + + mediump float r_sq = roughness_value * roughness_value; + mediump float n_dot_h_sq = n_dot_h * n_dot_h; + mediump float roughness_a = 1.0 / (4.0 * r_sq * n_dot_h_sq * n_dot_h_sq); + mediump float roughness_b = n_dot_h_sq - 1.0; + mediump float roughness_c = r_sq * n_dot_h_sq; + mediump float roughness = saturate(roughness_a * exp(roughness_b / roughness_c)); + + FresnelAmount = 0.5; + mediump float fresnel_term = pow(1.0 - n_dot_v, 5.0) * (1.0 - FresnelAmount) + FresnelAmount; + + mediump float geo_numerator = 2.0 * n_dot_h; + mediump float geo_denominator = 1.0 / v_dot_h; + mediump float geo_term = min(1.0, min(n_dot_v, n_dot_l) * geo_numerator * geo_denominator); + +#if SPECULAR || GLOSSMAP + Color += SpecularColor * g_SunColor.xyz * fresnel_term * roughness * n_dot_l * geo_term / (n_dot_v * n_dot_l + 0.0001); +#endif + + //Color = vec3(0.025 * 1.0 / (n_dot_v * n_dot_l)); +} +layout(location = 0) in vec2 TexCoord; +layout(location = 1) in vec3 EyeVec; + +layout(binding = 2) uniform sampler2D TexNormalmap; +//layout(binding = 3) uniform sampler2D TexScatteringLUT; + +#define DIFFUSE_ONLY 0 +#define GLOBAL_RENDERER DEFERRED +#define OUTPUT_FEEDBACK_TEXTURE 0 + +#if DIFFUSE_ONLY +layout(location = 0) out vec4 ColorOut; +layout(location = 1) out vec4 NormalOut; +#else +layout(location = 0) out vec4 AlbedoOut; +layout(location = 1) out vec4 SpecularOut; +layout(location = 2) out vec4 NormalOut; +layout(location = 3) out vec4 LightingOut; +#endif + +void Resolve(vec3 Albedo, vec3 Normal, float Roughness, float Metallic) +{ +#if (GLOBAL_RENDERER == FORWARD) || OUTPUT_FEEDBACK_TEXTURE + float Lighting = saturate(dot(Normal, normalize(vec3(1.0, 0.5, 1.0)))); + ColorOut.xyz = Albedo * Lighting; + ColorOut.w = 1.0; +#elif DIFFUSE_ONLY + ColorOut = vec4(Albedo, 0.0); + NormalOut.xyz = Normal * 0.5 + 0.5; + NormalOut.w = 1.0; + + // linearize and map to 0..255 range + ColorOut.w = -0.003921569 / (gl_FragCoord.z - 1.003921569); + ColorOut.w = log2(1.0 + saturate(length(EyeVec.xyz) / 200.0)); + ColorOut.w -= 1.0 / 255.0; +#else + LightingOut = vec4(0.0); + NormalOut = vec4(Normal * 0.5 + 0.5, 0.0); + SpecularOut = vec4(Roughness, Metallic, 0.0, 0.0); + AlbedoOut = vec4(Albedo, 1.0); +#endif +} + +void main() +{ + vec3 Normal = texture(TexNormalmap, TexCoord).xyz * 2.0 - 1.0; + Normal = normalize(Normal); + + vec2 scatter_uv; + scatter_uv.x = saturate(length(EyeVec) / 1000.0); + + vec3 nEye = normalize(EyeVec); + scatter_uv.y = 0.0; //nEye.x * 0.5 + 0.5; + + vec3 Color = vec3(0.1, 0.3, 0.1); + vec3 grass = vec3(0.1, 0.3, 0.1); + vec3 dirt = vec3(0.1, 0.1, 0.1); + vec3 snow = vec3(0.8, 0.8, 0.8); + + float grass_snow = smoothstep(0.0, 0.15, (g_CamPos.y + EyeVec.y) / 200.0); + vec3 base = mix(grass, snow, grass_snow); + + float edge = smoothstep(0.7, 0.75, Normal.y); + Color = mix(dirt, base, edge); + Color *= Color; + + float Roughness = 1.0 - edge * grass_snow; + + Resolve(Color, Normal, Roughness, 0.0); +} + diff --git a/shaders/frag/mix.frag b/shaders/frag/mix.frag new file mode 100644 index 0000000..a5d589d --- /dev/null +++ b/shaders/frag/mix.frag @@ -0,0 +1,20 @@ +#version 310 es +precision mediump float; + +layout(location = 0) in vec4 vIn0; +layout(location = 1) in vec4 vIn1; +layout(location = 2) in float vIn2; +layout(location = 3) in float vIn3; +layout(location = 0) out vec4 FragColor; + +void main() +{ + bvec4 l = bvec4(false, true, false, false); + FragColor = mix(vIn0, vIn1, l); + + bool f = true; + FragColor = vec4(mix(vIn2, vIn3, f)); + + FragColor = f ? vIn0 : vIn1; + FragColor = vec4(f ? vIn2 : vIn3); +} diff --git a/shaders/frag/pls.frag b/shaders/frag/pls.frag new file mode 100644 index 0000000..314fd99 --- /dev/null +++ b/shaders/frag/pls.frag @@ -0,0 +1,20 @@ +#version 310 es +precision mediump float; + +layout(location = 0) in vec4 PLSIn0; +layout(location = 1) in vec4 PLSIn1; +in vec4 PLSIn2; +in vec4 PLSIn3; + +layout(location = 0) out vec4 PLSOut0; +layout(location = 1) out vec4 PLSOut1; +layout(location = 2) out vec4 PLSOut2; +layout(location = 3) out vec4 PLSOut3; + +void main() +{ + PLSOut0 = 2.0 * PLSIn0; + PLSOut1 = 6.0 * PLSIn1; + PLSOut2 = 7.0 * PLSIn2; + PLSOut3 = 4.0 * PLSIn3; +} diff --git a/shaders/frag/sampler.frag b/shaders/frag/sampler.frag new file mode 100644 index 0000000..fb4292f --- /dev/null +++ b/shaders/frag/sampler.frag @@ -0,0 +1,18 @@ +#version 310 es +precision mediump float; + +in vec4 vColor; +in vec2 vTex; +layout(binding = 0) uniform sampler2D uTex; +layout(location = 0) out vec4 FragColor; + +vec4 sample_texture(sampler2D tex, vec2 uv) +{ + return texture(tex, uv); +} + +void main() +{ + FragColor = vColor * sample_texture(uTex, vTex); +} + diff --git a/shaders/frag/swizzle.frag b/shaders/frag/swizzle.frag new file mode 100644 index 0000000..271ba6c --- /dev/null +++ b/shaders/frag/swizzle.frag @@ -0,0 +1,17 @@ +#version 310 es +precision mediump float; + +layout(location = 0) uniform sampler2D samp; +layout(location = 0) out vec4 FragColor; +layout(location = 1) in vec3 vNormal; +layout(location = 2) in vec2 vUV; + +void main() +{ + FragColor = vec4(texture(samp, vUV).xyz, 1.0); + FragColor = vec4(texture(samp, vUV).xz, 1.0, 4.0); + FragColor = vec4(texture(samp, vUV).xx, texture(samp, vUV + vec2(0.1)).yy); + FragColor = vec4(vNormal, 1.0); + FragColor = vec4(vNormal + 1.8, 1.0); + FragColor = vec4(vUV, vUV + 1.8); +} diff --git a/shaders/frag/ubo_layout.frag b/shaders/frag/ubo_layout.frag new file mode 100644 index 0000000..80f9f16 --- /dev/null +++ b/shaders/frag/ubo_layout.frag @@ -0,0 +1,24 @@ +#version 310 es +precision mediump float; + +layout(location = 0) out vec4 FragColor; + +struct Str +{ + mat4 foo; +}; + +layout(binding = 0, std140) uniform UBO1 +{ + layout(row_major) Str foo; +} ubo1; + +layout(binding = 1, std140) uniform UBO2 +{ + layout(column_major) Str foo; +} ubo0; + +void main() +{ + FragColor = ubo1.foo.foo[0] + ubo0.foo.foo[0]; +} diff --git a/shaders/geom/basic.geom b/shaders/geom/basic.geom new file mode 100644 index 0000000..de5f515 --- /dev/null +++ b/shaders/geom/basic.geom @@ -0,0 +1,28 @@ +#version 310 es +#extension GL_EXT_geometry_shader : require + +layout(triangles, invocations = 4) in; +layout(triangle_strip, max_vertices = 3) out; + +in VertexData { + vec3 normal; +} vin[]; + +out vec3 vNormal; + +void main() +{ + gl_Position = gl_in[0].gl_Position; + vNormal = vin[0].normal + float(gl_InvocationID); + EmitVertex(); + + gl_Position = gl_in[1].gl_Position; + vNormal = vin[1].normal + 4.0 * float(gl_InvocationID); + EmitVertex(); + + gl_Position = gl_in[2].gl_Position; + vNormal = vin[2].normal + 2.0 * float(gl_InvocationID); + EmitVertex(); + + EndPrimitive(); +} diff --git a/shaders/tesc/basic.tesc b/shaders/tesc/basic.tesc new file mode 100644 index 0000000..1a0e1d6 --- /dev/null +++ b/shaders/tesc/basic.tesc @@ -0,0 +1,17 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require + +patch out vec3 vFoo; + +layout(vertices = 1) out; + +void main() +{ + gl_TessLevelInner[0] = 8.9; + gl_TessLevelInner[1] = 6.9; + gl_TessLevelOuter[0] = 8.9; + gl_TessLevelOuter[1] = 6.9; + gl_TessLevelOuter[2] = 3.9; + gl_TessLevelOuter[3] = 4.9; + vFoo = vec3(1.0); +} diff --git a/shaders/tesc/water_tess.tesc b/shaders/tesc/water_tess.tesc new file mode 100644 index 0000000..9e9c0d4 --- /dev/null +++ b/shaders/tesc/water_tess.tesc @@ -0,0 +1,115 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require + +layout(vertices = 1) out; +in vec2 vPatchPosBase[]; + +layout(std140) uniform UBO +{ + vec4 uScale; + highp vec3 uCamPos; + vec2 uPatchSize; + vec2 uMaxTessLevel; + float uDistanceMod; + vec4 uFrustum[6]; +}; + +patch out vec2 vOutPatchPosBase; +patch out vec4 vPatchLods; + +float lod_factor(vec2 pos_) +{ + vec2 pos = pos_ * uScale.xy; + vec3 dist_to_cam = uCamPos - vec3(pos.x, 0.0, pos.y); + float level = log2((length(dist_to_cam) + 0.0001) * uDistanceMod); + return clamp(level, 0.0, uMaxTessLevel.x); +} + +float tess_level(float lod) +{ + return uMaxTessLevel.y * exp2(-lod); +} + +vec4 tess_level(vec4 lod) +{ + return uMaxTessLevel.y * exp2(-lod); +} + +// Guard band for vertex displacement. +#define GUARD_BAND 10.0 +bool frustum_cull(vec2 p0) +{ + vec2 min_xz = (p0 - GUARD_BAND) * uScale.xy; + vec2 max_xz = (p0 + uPatchSize + GUARD_BAND) * uScale.xy; + + vec3 bb_min = vec3(min_xz.x, -GUARD_BAND, min_xz.y); + vec3 bb_max = vec3(max_xz.x, +GUARD_BAND, max_xz.y); + vec3 center = 0.5 * (bb_min + bb_max); + float radius = 0.5 * length(bb_max - bb_min); + + vec3 f0 = vec3( + dot(uFrustum[0], vec4(center, 1.0)), + dot(uFrustum[1], vec4(center, 1.0)), + dot(uFrustum[2], vec4(center, 1.0))); + + vec3 f1 = vec3( + dot(uFrustum[3], vec4(center, 1.0)), + dot(uFrustum[4], vec4(center, 1.0)), + dot(uFrustum[5], vec4(center, 1.0))); + + return !(any(lessThanEqual(f0, vec3(-radius))) || any(lessThanEqual(f1, vec3(-radius)))); +} + +void compute_tess_levels(vec2 p0) +{ + vOutPatchPosBase = p0; + + float l00 = lod_factor(p0 + vec2(-0.5, -0.5) * uPatchSize); + float l10 = lod_factor(p0 + vec2(+0.5, -0.5) * uPatchSize); + float l20 = lod_factor(p0 + vec2(+1.5, -0.5) * uPatchSize); + float l01 = lod_factor(p0 + vec2(-0.5, +0.5) * uPatchSize); + float l11 = lod_factor(p0 + vec2(+0.5, +0.5) * uPatchSize); + float l21 = lod_factor(p0 + vec2(+1.5, +0.5) * uPatchSize); + float l02 = lod_factor(p0 + vec2(-0.5, +1.5) * uPatchSize); + float l12 = lod_factor(p0 + vec2(+0.5, +1.5) * uPatchSize); + float l22 = lod_factor(p0 + vec2(+1.5, +1.5) * uPatchSize); + + vec4 lods = vec4( + dot(vec4(l01, l11, l02, l12), vec4(0.25)), + dot(vec4(l00, l10, l01, l11), vec4(0.25)), + dot(vec4(l10, l20, l11, l21), vec4(0.25)), + dot(vec4(l11, l21, l12, l22), vec4(0.25))); + + vPatchLods = lods; + + vec4 outer_lods = min(lods.xyzw, lods.yzwx); + vec4 levels = tess_level(outer_lods); + gl_TessLevelOuter[0] = levels.x; + gl_TessLevelOuter[1] = levels.y; + gl_TessLevelOuter[2] = levels.z; + gl_TessLevelOuter[3] = levels.w; + + float min_lod = min(min(lods.x, lods.y), min(lods.z, lods.w)); + float inner = tess_level(min(min_lod, l11)); + gl_TessLevelInner[0] = inner; + gl_TessLevelInner[1] = inner; +} + +void main() +{ + vec2 p0 = vPatchPosBase[0]; + if (!frustum_cull(p0)) + { + gl_TessLevelOuter[0] = -1.0; + gl_TessLevelOuter[1] = -1.0; + gl_TessLevelOuter[2] = -1.0; + gl_TessLevelOuter[3] = -1.0; + gl_TessLevelInner[0] = -1.0; + gl_TessLevelInner[1] = -1.0; + } + else + { + compute_tess_levels(p0); + } +} + diff --git a/shaders/tese/water_tess.tese b/shaders/tese/water_tess.tese new file mode 100644 index 0000000..f9628b1 --- /dev/null +++ b/shaders/tese/water_tess.tese @@ -0,0 +1,65 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require +precision highp int; + +layout(cw, quads, fractional_even_spacing) in; + +patch in vec2 vOutPatchPosBase; +patch in vec4 vPatchLods; + +layout(binding = 1, std140) uniform UBO +{ + mat4 uMVP; + vec4 uScale; + vec2 uInvScale; + vec3 uCamPos; + vec2 uPatchSize; + vec2 uInvHeightmapSize; +}; +layout(binding = 0) uniform mediump sampler2D uHeightmapDisplacement; + +highp out vec3 vWorld; +highp out vec4 vGradNormalTex; + +vec2 lerp_vertex(vec2 tess_coord) +{ + return vOutPatchPosBase + tess_coord * uPatchSize; +} + +mediump vec2 lod_factor(vec2 tess_coord) +{ + mediump vec2 x = mix(vPatchLods.yx, vPatchLods.zw, tess_coord.x); + mediump float level = mix(x.x, x.y, tess_coord.y); + mediump float floor_level = floor(level); + mediump float fract_level = level - floor_level; + return vec2(floor_level, fract_level); +} + +mediump vec3 sample_height_displacement(vec2 uv, vec2 off, mediump vec2 lod) +{ + return mix( + textureLod(uHeightmapDisplacement, uv + 0.5 * off, lod.x).xyz, + textureLod(uHeightmapDisplacement, uv + 1.0 * off, lod.x + 1.0).xyz, + lod.y); +} + +void main() +{ + vec2 tess_coord = gl_TessCoord.xy; + vec2 pos = lerp_vertex(tess_coord); + mediump vec2 lod = lod_factor(tess_coord); + + vec2 tex = pos * uInvHeightmapSize.xy; + pos *= uScale.xy; + + mediump float delta_mod = exp2(lod.x); + vec2 off = uInvHeightmapSize.xy * delta_mod; + + vGradNormalTex = vec4(tex + 0.5 * uInvHeightmapSize.xy, tex * uScale.zw); + vec3 height_displacement = sample_height_displacement(tex, off, lod); + + pos += height_displacement.yz; + vWorld = vec3(pos.x, height_displacement.x, pos.y); + gl_Position = uMVP * vec4(vWorld, 1.0); +} + diff --git a/shaders/vert/basic.vert b/shaders/vert/basic.vert new file mode 100644 index 0000000..801724f --- /dev/null +++ b/shaders/vert/basic.vert @@ -0,0 +1,15 @@ +#version 310 es + +layout(std140) uniform UBO +{ + uniform mat4 uMVP; +}; +in vec4 aVertex; +in vec3 aNormal; +out vec3 vNormal; + +void main() +{ + gl_Position = uMVP * aVertex; + vNormal = aNormal; +} diff --git a/shaders/vert/ground.vert b/shaders/vert/ground.vert new file mode 100755 index 0000000..f903a51 --- /dev/null +++ b/shaders/vert/ground.vert @@ -0,0 +1,196 @@ +#version 310 es + +#define YFLIP 0 +#define SPECULAR 0 +#define GLOSSMAP 0 + +#define DEBUG_NONE 0 +#define DEBUG_DIFFUSE 1 +#define DEBUG_SPECULAR 2 +#define DEBUG_LIGHTING 3 +#define DEBUG_FOG 4 +#define DEBUG DEBUG_NONE + +#define FORWARD 0 +#define DEFERRED 1 +#define DEFERRED_VTEX 2 + +float saturate(float x) { return clamp(x, 0.0, 1.0); } + +layout(std140, binding = 0) uniform GlobalVSData +{ + vec4 g_ViewProj_Row0; + vec4 g_ViewProj_Row1; + vec4 g_ViewProj_Row2; + vec4 g_ViewProj_Row3; + vec4 g_CamPos; + vec4 g_CamRight; + vec4 g_CamUp; + vec4 g_CamFront; + vec4 g_SunDir; + vec4 g_SunColor; + vec4 g_TimeParams; + vec4 g_ResolutionParams; + vec4 g_CamAxisRight; + vec4 g_FogColor_Distance; + vec4 g_ShadowVP_Row0; + vec4 g_ShadowVP_Row1; + vec4 g_ShadowVP_Row2; + vec4 g_ShadowVP_Row3; +}; + +vec4 ComputeFogFactor(vec3 WorldPos) +{ + vec4 FogData; + vec3 vEye = WorldPos - g_CamPos.xyz; + vec3 nEye = normalize(vEye); + FogData.w = exp(-dot(vEye, vEye) * g_FogColor_Distance.w * 0.75); + + float fog_sun_factor = pow(saturate(dot(nEye, g_SunDir.xyz)), 8.0); + FogData.xyz = mix(vec3(1.0, 1.0, 1.0), vec3(0.6, 0.6, 0.9), nEye.y * 0.5 + 0.5); + FogData.xyz = mix(FogData.xyz, vec3(0.95, 0.87, 0.78), fog_sun_factor); + return FogData; +} + +void ApplyFog(inout vec3 Color, vec4 FogData) +{ + Color = mix(FogData.xyz, Color, FogData.w); +} + +void ApplyLighting(inout mediump vec3 Color, mediump float DiffuseFactor) +{ + mediump vec3 DiffuseLight = g_SunColor.xyz * DiffuseFactor; + mediump vec3 AmbientLight = vec3(0.2, 0.35, 0.55) * 0.5; + mediump vec3 Lighting = DiffuseLight + AmbientLight; +#if DEBUG == DEBUG_LIGHTING + Color = Lighting; +#else + Color *= Lighting; +#endif +} + +#pragma VARIANT SPECULAR +#pragma VARIANT GLOSSMAP + +void ApplySpecular(inout mediump vec3 Color, mediump vec3 EyeVec, mediump vec3 Normal, mediump vec3 SpecularColor, mediump float Shininess, mediump float FresnelAmount) +{ + mediump vec3 HalfAngle = normalize(-EyeVec + g_SunDir.xyz); + + mediump float v_dot_h = saturate(dot(HalfAngle, -EyeVec)); + mediump float n_dot_l = saturate(dot(Normal, g_SunDir.xyz)); + mediump float n_dot_h = saturate(dot(Normal, HalfAngle)); + mediump float n_dot_v = saturate(dot(-EyeVec, Normal)); + mediump float h_dot_l = saturate(dot(g_SunDir.xyz, HalfAngle)); + + const mediump float roughness_value = 0.25; + + mediump float r_sq = roughness_value * roughness_value; + mediump float n_dot_h_sq = n_dot_h * n_dot_h; + mediump float roughness_a = 1.0 / (4.0 * r_sq * n_dot_h_sq * n_dot_h_sq); + mediump float roughness_b = n_dot_h_sq - 1.0; + mediump float roughness_c = r_sq * n_dot_h_sq; + mediump float roughness = saturate(roughness_a * exp(roughness_b / roughness_c)); + + FresnelAmount = 0.5; + mediump float fresnel_term = pow(1.0 - n_dot_v, 5.0) * (1.0 - FresnelAmount) + FresnelAmount; + + mediump float geo_numerator = 2.0 * n_dot_h; + mediump float geo_denominator = 1.0 / v_dot_h; + mediump float geo_term = min(1.0, min(n_dot_v, n_dot_l) * geo_numerator * geo_denominator); + +#if SPECULAR || GLOSSMAP + Color += SpecularColor * g_SunColor.xyz * fresnel_term * roughness * n_dot_l * geo_term / (n_dot_v * n_dot_l + 0.0001); +#endif + + //Color = vec3(0.025 * 1.0 / (n_dot_v * n_dot_l)); +} + +layout(location = 0) in vec2 Position; +layout(location = 1) in vec4 LODWeights; + +layout(location = 0) out vec2 TexCoord; +layout(location = 1) out vec3 EyeVec; + +layout(std140, binding = 2) uniform GlobalGround +{ + vec4 GroundScale; + vec4 GroundPosition; + vec4 InvGroundSize_PatchScale; +}; + +struct PatchData +{ + vec4 Position; + vec4 LODs; +}; + +layout(std140, binding = 0) uniform PerPatch +{ + PatchData Patches[256]; +}; + +layout(binding = 0) uniform sampler2D TexHeightmap; +layout(binding = 1) uniform sampler2D TexLOD; + +vec2 lod_factor(vec2 uv) +{ + float level = textureLod(TexLOD, uv, 0.0).x * (255.0 / 32.0); + float floor_level = floor(level); + float fract_level = level - floor_level; + return vec2(floor_level, fract_level); +} + +vec2 warp_position() +{ + float vlod = dot(LODWeights, Patches[gl_InstanceID].LODs); + vlod = mix(vlod, Patches[gl_InstanceID].Position.w, all(equal(LODWeights, vec4(0.0)))); + +#ifdef DEBUG_LOD_HEIGHT + LODFactor = vec4(vlod); +#endif + + float floor_lod = floor(vlod); + float fract_lod = vlod - floor_lod; + uint ufloor_lod = uint(floor_lod); + +#ifdef DEBUG_LOD_HEIGHT + LODFactor = vec4(fract_lod); +#endif + + uvec2 uPosition = uvec2(Position); + uvec2 mask = (uvec2(1u) << uvec2(ufloor_lod, ufloor_lod + 1u)) - 1u; + //uvec2 rounding = mix(uvec2(0u), mask, lessThan(uPosition, uvec2(32u))); + + uvec2 rounding = uvec2( + uPosition.x < 32u ? mask.x : 0u, + uPosition.y < 32u ? mask.y : 0u); + + vec4 lower_upper_snapped = vec4((uPosition + rounding).xyxy & (~mask).xxyy); + return mix(lower_upper_snapped.xy, lower_upper_snapped.zw, fract_lod); +} + +void main() +{ + vec2 PatchPos = Patches[gl_InstanceID].Position.xz * InvGroundSize_PatchScale.zw; + vec2 WarpedPos = warp_position(); + vec2 VertexPos = PatchPos + WarpedPos; + vec2 NormalizedPos = VertexPos * InvGroundSize_PatchScale.xy; + vec2 lod = lod_factor(NormalizedPos); + + vec2 Offset = exp2(lod.x) * InvGroundSize_PatchScale.xy; + + float Elevation = + mix(textureLod(TexHeightmap, NormalizedPos + 0.5 * Offset, lod.x).x, + textureLod(TexHeightmap, NormalizedPos + 1.0 * Offset, lod.x + 1.0).x, + lod.y); + + vec3 WorldPos = vec3(NormalizedPos.x, Elevation, NormalizedPos.y); + WorldPos *= GroundScale.xyz; + WorldPos += GroundPosition.xyz; + + EyeVec = WorldPos - g_CamPos.xyz; + TexCoord = NormalizedPos + 0.5 * InvGroundSize_PatchScale.xy; + + gl_Position = WorldPos.x * g_ViewProj_Row0 + WorldPos.y * g_ViewProj_Row1 + WorldPos.z * g_ViewProj_Row2 + g_ViewProj_Row3; +} + diff --git a/shaders/vert/ocean.vert b/shaders/vert/ocean.vert new file mode 100644 index 0000000..9b1488c --- /dev/null +++ b/shaders/vert/ocean.vert @@ -0,0 +1,194 @@ +#version 310 es + +#define YFLIP 0 +#define SPECULAR 0 +#define GLOSSMAP 0 + +#define DEBUG_NONE 0 +#define DEBUG_DIFFUSE 1 +#define DEBUG_SPECULAR 2 +#define DEBUG_LIGHTING 3 +#define DEBUG_FOG 4 +#define DEBUG DEBUG_NONE + +#define FORWARD 0 +#define DEFERRED 1 +#define DEFERRED_VTEX 2 + +float saturate(float x) { return clamp(x, 0.0, 1.0); } + +layout(std140, binding = 0) uniform GlobalVSData +{ + vec4 g_ViewProj_Row0; + vec4 g_ViewProj_Row1; + vec4 g_ViewProj_Row2; + vec4 g_ViewProj_Row3; + vec4 g_CamPos; + vec4 g_CamRight; + vec4 g_CamUp; + vec4 g_CamFront; + vec4 g_SunDir; + vec4 g_SunColor; + vec4 g_TimeParams; + vec4 g_ResolutionParams; + vec4 g_CamAxisRight; + vec4 g_FogColor_Distance; + vec4 g_ShadowVP_Row0; + vec4 g_ShadowVP_Row1; + vec4 g_ShadowVP_Row2; + vec4 g_ShadowVP_Row3; +}; + +vec4 ComputeFogFactor(vec3 WorldPos) +{ + vec4 FogData; + vec3 vEye = WorldPos - g_CamPos.xyz; + vec3 nEye = normalize(vEye); + FogData.w = exp(-dot(vEye, vEye) * g_FogColor_Distance.w * 0.75); + + float fog_sun_factor = pow(saturate(dot(nEye, g_SunDir.xyz)), 8.0); + FogData.xyz = mix(vec3(1.0, 1.0, 1.0), vec3(0.6, 0.6, 0.9), nEye.y * 0.5 + 0.5); + FogData.xyz = mix(FogData.xyz, vec3(0.95, 0.87, 0.78), fog_sun_factor); + return FogData; +} + +void ApplyFog(inout vec3 Color, vec4 FogData) +{ + Color = mix(FogData.xyz, Color, FogData.w); +} + +void ApplyLighting(inout mediump vec3 Color, mediump float DiffuseFactor) +{ + mediump vec3 DiffuseLight = g_SunColor.xyz * DiffuseFactor; + mediump vec3 AmbientLight = vec3(0.2, 0.35, 0.55) * 0.5; + mediump vec3 Lighting = DiffuseLight + AmbientLight; +#if DEBUG == DEBUG_LIGHTING + Color = Lighting; +#else + Color *= Lighting; +#endif +} + +void ApplySpecular(inout mediump vec3 Color, mediump vec3 EyeVec, mediump vec3 Normal, mediump vec3 SpecularColor, mediump float Shininess, mediump float FresnelAmount) +{ + mediump vec3 HalfAngle = normalize(-EyeVec + g_SunDir.xyz); + + mediump float v_dot_h = saturate(dot(HalfAngle, -EyeVec)); + mediump float n_dot_l = saturate(dot(Normal, g_SunDir.xyz)); + mediump float n_dot_h = saturate(dot(Normal, HalfAngle)); + mediump float n_dot_v = saturate(dot(-EyeVec, Normal)); + mediump float h_dot_l = saturate(dot(g_SunDir.xyz, HalfAngle)); + + const mediump float roughness_value = 0.25; + + mediump float r_sq = roughness_value * roughness_value; + mediump float n_dot_h_sq = n_dot_h * n_dot_h; + mediump float roughness_a = 1.0 / (4.0 * r_sq * n_dot_h_sq * n_dot_h_sq); + mediump float roughness_b = n_dot_h_sq - 1.0; + mediump float roughness_c = r_sq * n_dot_h_sq; + mediump float roughness = saturate(roughness_a * exp(roughness_b / roughness_c)); + + FresnelAmount = 0.5; + mediump float fresnel_term = pow(1.0 - n_dot_v, 5.0) * (1.0 - FresnelAmount) + FresnelAmount; + + mediump float geo_numerator = 2.0 * n_dot_h; + mediump float geo_denominator = 1.0 / v_dot_h; + mediump float geo_term = min(1.0, min(n_dot_v, n_dot_l) * geo_numerator * geo_denominator); + +#if SPECULAR || GLOSSMAP + Color += SpecularColor * g_SunColor.xyz * fresnel_term * roughness * n_dot_l * geo_term / (n_dot_v * n_dot_l + 0.0001); +#endif + + //Color = vec3(0.025 * 1.0 / (n_dot_v * n_dot_l)); +} + + +precision highp int; + +layout(binding = 0) uniform mediump sampler2D TexDisplacement; +layout(binding = 1) uniform mediump sampler2D TexLOD; + +layout(location = 0) in vec4 Position; +layout(location = 1) in vec4 LODWeights; + +layout(location = 0) out highp vec3 EyeVec; +layout(location = 1) out highp vec4 TexCoord; + +layout(std140, binding = 4) uniform GlobalOcean +{ + vec4 OceanScale; + vec4 OceanPosition; + vec4 InvOceanSize_PatchScale; + vec4 NormalTexCoordScale; +}; + +struct PatchData +{ + vec4 Position; + vec4 LODs; +}; + +layout(std140, binding = 0) uniform Offsets +{ + PatchData Patches[256]; +}; + +vec2 lod_factor(vec2 uv) +{ + float level = textureLod(TexLOD, uv, 0.0).x * (255.0 / 32.0); + float floor_level = floor(level); + float fract_level = level - floor_level; + return vec2(floor_level, fract_level); +} + +vec2 warp_position() +{ + float vlod = dot(LODWeights, Patches[gl_InstanceID].LODs); + vlod = mix(vlod, Patches[gl_InstanceID].Position.w, all(equal(LODWeights, vec4(0.0)))); + + float floor_lod = floor(vlod); + float fract_lod = vlod - floor_lod; + uint ufloor_lod = uint(floor_lod); + + uvec4 uPosition = uvec4(Position); + uvec2 mask = (uvec2(1u) << uvec2(ufloor_lod, ufloor_lod + 1u)) - 1u; + + uvec4 rounding; + rounding.x = uPosition.x < 32u ? mask.x : 0u; + rounding.y = uPosition.y < 32u ? mask.x : 0u; + rounding.z = uPosition.x < 32u ? mask.y : 0u; + rounding.w = uPosition.y < 32u ? mask.y : 0u; + + //rounding = uPosition.xyxy * mask.xxyy; + vec4 lower_upper_snapped = vec4((uPosition.xyxy + rounding) & (~mask).xxyy); + return mix(lower_upper_snapped.xy, lower_upper_snapped.zw, fract_lod); +} + +void main() +{ + vec2 PatchPos = Patches[gl_InstanceID].Position.xz * InvOceanSize_PatchScale.zw; + vec2 WarpedPos = warp_position(); + vec2 VertexPos = PatchPos + WarpedPos; + vec2 NormalizedPos = VertexPos * InvOceanSize_PatchScale.xy; + vec2 NormalizedTex = NormalizedPos * NormalTexCoordScale.zw; + vec2 lod = lod_factor(NormalizedPos); + vec2 Offset = exp2(lod.x) * InvOceanSize_PatchScale.xy * NormalTexCoordScale.zw; + + vec3 Displacement = + mix(textureLod(TexDisplacement, NormalizedTex + 0.5 * Offset, lod.x).yxz, + textureLod(TexDisplacement, NormalizedTex + 1.0 * Offset, lod.x + 1.0).yxz, + lod.y); + + vec3 WorldPos = vec3(NormalizedPos.x, 0.0, NormalizedPos.y) + Displacement; + WorldPos *= OceanScale.xyz; + WorldPos += OceanPosition.xyz; + + EyeVec = WorldPos - g_CamPos.xyz; + TexCoord = vec4(NormalizedTex, NormalizedTex * NormalTexCoordScale.xy) + 0.5 * InvOceanSize_PatchScale.xyxy * NormalTexCoordScale.zwzw; + + gl_Position = WorldPos.x * g_ViewProj_Row0 + WorldPos.y * g_ViewProj_Row1 + WorldPos.z * g_ViewProj_Row2 + g_ViewProj_Row3; +#if YFLIP + gl_Position *= vec4(1.0, -1.0, 1.0, 1.0); +#endif +} + diff --git a/shaders/vert/texture_buffer.vert b/shaders/vert/texture_buffer.vert new file mode 100644 index 0000000..6bc7ddf --- /dev/null +++ b/shaders/vert/texture_buffer.vert @@ -0,0 +1,10 @@ +#version 310 es +#extension GL_OES_texture_buffer : require + +layout(binding = 4) uniform highp samplerBuffer uSamp; +layout(rgba32f, binding = 5) uniform readonly highp imageBuffer uSampo; + +void main() +{ + gl_Position = texelFetch(uSamp, 10) + imageLoad(uSampo, 100); +} diff --git a/shaders/vert/ubo.vert b/shaders/vert/ubo.vert new file mode 100644 index 0000000..f304c1e --- /dev/null +++ b/shaders/vert/ubo.vert @@ -0,0 +1,16 @@ +#version 310 es + +layout(binding = 0, std140) uniform UBO +{ + mat4 mvp; +}; + +in vec4 aVertex; +in vec3 aNormal; +out vec3 vNormal; + +void main() +{ + gl_Position = mvp * aVertex; + vNormal = aNormal; +} diff --git a/spir2common.hpp b/spir2common.hpp new file mode 100644 index 0000000..d2df785 --- /dev/null +++ b/spir2common.hpp @@ -0,0 +1,635 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIR2COMMON_HPP +#define SPIR2COMMON_HPP + +#include +#include + +namespace spir2cross +{ + class CompilerError : public std::runtime_error + { + public: + CompilerError(const std::string &str) : std::runtime_error(str) {} + }; + + namespace inner + { + template + void join_helper(std::ostringstream &stream, T&& t) + { + stream << std::forward(t); + } + + template + void join_helper(std::ostringstream &stream, T&& t, Ts&&... ts) + { + stream << std::forward(t); + join_helper(stream, std::forward(ts)...); + } + } + + // Helper template to avoid lots of nasty string temporary munging. + template + std::string join(Ts&&... ts) + { + std::ostringstream stream; + inner::join_helper(stream, std::forward(ts)...); + return stream.str(); + } + + inline std::string merge(const std::vector &list) + { + std::string s; + for (auto &elem : list) + { + s += elem; + if (&elem != &list.back()) + s += ", "; + } + return s; + } + + template + inline std::string convert_to_string(T&& t) + { + return std::to_string(std::forward(t)); + } + + inline std::string convert_to_string(float t) + { + // std::to_string for floating point values is broken. + // Fallback to something more sane. + char buf[64]; + sprintf(buf, "%.32g", t); + return buf; + } + + inline std::string convert_to_string(double t) + { + // std::to_string for floating point values is broken. + // Fallback to something more sane. + char buf[64]; + sprintf(buf, "%.32g", t); + return buf; + } + + struct Instruction + { + Instruction(const std::vector &spirv, uint32_t &index); + + uint16_t op; + uint16_t count; + uint32_t offset; + uint32_t length; + }; + + // Helper for Variant interface. + struct IVariant + { + virtual ~IVariant() = default; + uint32_t self = 0; + }; + + enum Types + { + TypeNone, + TypeType, + TypeVariable, + TypeConstant, + TypeFunction, + TypeFunctionPrototype, + TypePointer, + TypeBlock, + TypeExtension, + TypeExpression, + TypeUndef + }; + + struct SPIRUndef : IVariant + { + enum { type = TypeUndef }; + SPIRUndef(uint32_t basetype) : basetype(basetype) {} + uint32_t basetype; + }; + + struct SPIRType : IVariant + { + enum { type = TypeType }; + + enum BaseType + { + Unknown, + Void, + Bool, + Int, + UInt, + AtomicCounter, + Float, + Struct, + Image, + SampledImage, + Sampler + }; + + // Scalar/vector/matrix support. + BaseType basetype = Unknown; + uint32_t width = 0; + uint32_t vecsize = 1; + uint32_t columns = 1; + + // Arrays, suport array of arrays by having a vector of array sizes. + std::vector array; + + // Pointers + bool pointer = false; + spv::StorageClass storage = spv::StorageClassGeneric; + + std::vector member_types; + + struct Image + { + uint32_t type; + spv::Dim dim; + bool depth; + bool arrayed; + bool ms; + uint32_t sampled; + spv::ImageFormat format; + } image; + }; + + struct SPIRExtension : IVariant + { + enum { type = TypeExtension }; + + enum Extension + { + GLSL + }; + + SPIRExtension(Extension ext) + : ext(ext) {} + + Extension ext; + }; + + struct SPIRExpression : IVariant + { + enum { type = TypeExpression }; + + // Only created by the backend target to avoid creating tons of temporaries. + SPIRExpression(std::string expr, uint32_t expression_type, bool immutable) + : expression(move(expr)), expression_type(expression_type), immutable(immutable) {} + + // If non-zero, prepend expression with to_expression(base_expression). + // Used in amortizing multiple calls to to_expression() + // where in certain cases that would quickly force a temporary when not needed. + uint32_t base_expression = 0; + + std::string expression; + uint32_t expression_type = 0; + + // If this expression is a forwarded load, + // allow us to reference the original variable. + uint32_t loaded_from = 0; + + // If this expression will never change, we can avoid lots of temporaries + // in high level source. + bool immutable = false; + + // If this expression has been used while invalidated. + bool used_while_invalidated = false; + + // A list of a variables for which this expression was invalidated by. + std::vector invalidated_by; + }; + + struct SPIRFunctionPrototype : IVariant + { + enum { type = TypeFunctionPrototype }; + + SPIRFunctionPrototype(uint32_t return_type) + : return_type(return_type) {} + + uint32_t return_type; + std::vector parameter_types; + }; + + struct SPIRBlock : IVariant + { + enum { type = TypeBlock }; + + enum Terminator + { + Unknown, + Direct, // Emit next block directly without a particular condition. + + Select, // Block ends with an if/else block. + MultiSelect, // Block ends with switch statement. + Loop, // Block ends with a loop. + + Return, // Block ends with return. + Unreachable, // Noop + Kill // Discard + }; + + enum Merge + { + MergeNone, + MergeLoop, + MergeSelection + }; + + enum Method + { + MergeToSelectForLoop, + MergeToDirectForLoop + }; + + enum ContinueBlockType + { + ContinueNone, + + // Continue block is branchless and has at least one instruction. + ForLoop, + + // Noop continue block. + WhileLoop, + + // Continue block is conditional. + DoWhileLoop, + + // Highly unlikely that anything will use this, + // since it is really awkward/impossible to express in GLSL. + ComplexLoop + }; + + enum { NoDominator = 0xffffffffu }; + + Terminator terminator = Unknown; + Merge merge = MergeNone; + uint32_t next_block = 0; + uint32_t merge_block = 0; + uint32_t continue_block = 0; + + uint32_t return_value = 0; // If 0, return nothing (void). + uint32_t condition = 0; + uint32_t true_block = 0; + uint32_t false_block = 0; + uint32_t default_block = 0; + + std::vector ops; + + struct Phi + { + uint32_t local_variable; // flush local variable ... + uint32_t parent; // If we're in from_block and want to branch into this block ... + uint32_t function_variable; // to this function-global "phi" variable first. + }; + + // Before entering this block flush out local variables to magical "phi" variables. + std::vector phi_variables; + + // Declare these temporaries before beginning the block. + // Used for handling complex continue blocks which have side effects. + std::vector> declare_temporary; + + struct Case + { + uint32_t value; + uint32_t block; + }; + std::vector cases; + + // If we have tried to optimize code for this block but failed, + // keep track of this. + bool disable_block_optimization = false; + + // If the continue block is complex, fallback to "dumb" for loops. + bool complex_continue = false; + + // The dominating block which this block might be within. + // Used in continue; blocks to determine if we really need to write continue. + uint32_t loop_dominator = 0; + }; + + struct SPIRFunction : IVariant + { + enum { type = TypeFunction }; + + SPIRFunction(uint32_t return_type, uint32_t function_type) + : return_type(return_type), function_type(function_type) + {} + + struct Parameter + { + uint32_t type; + uint32_t id; + uint32_t read_count; + uint32_t write_count; + }; + + uint32_t return_type; + uint32_t function_type; + std::vector arguments; + std::vector local_variables; + uint32_t entry_block = 0; + std::vector blocks; + + void add_local_variable(uint32_t id) + { + local_variables.push_back(id); + } + + void add_parameter(uint32_t type, uint32_t id) + { + // Arguments are read-only until proven otherwise. + arguments.push_back({ type, id, 0u, 0u }); + } + + bool active = false; + bool flush_undeclared = true; + }; + + struct SPIRVariable : IVariant + { + enum { type = TypeVariable }; + + SPIRVariable() = default; + SPIRVariable(uint32_t basetype, spv::StorageClass storage, uint32_t initializer = 0) + : basetype(basetype), storage(storage), initializer(initializer) + {} + + uint32_t basetype = 0; + spv::StorageClass storage = spv::StorageClassGeneric; + uint32_t decoration = 0; + uint32_t initializer = 0; + + std::vector dereference_chain; + bool compat_builtin = false; + + // If a variable is shadowed, we only statically assign to it + // and never actually emit a statement for it. + // When we read the variable as an expression, just forward + // shadowed_id as the expression. + bool statically_assigned = false; + uint32_t static_expression = 0; + + // Temporaries which can remain forwarded as long as this variable is not modified. + std::vector dependees; + bool forwardable = true; + + bool deferred_declaration = false; + bool phi_variable = false; + bool remapped_variable = false; + + SPIRFunction::Parameter *parameter = nullptr; + }; + + struct SPIRConstant : IVariant + { + enum { type = TypeConstant }; + + union Constant + { + uint32_t u32; + int32_t i32; + float f32; + }; + + struct ConstantVector + { + Constant r[4]; + uint32_t vecsize; + }; + + struct ConstantMatrix + { + ConstantVector c[4]; + uint32_t columns; + }; + + inline uint32_t scalar(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].u32; + } + + inline float scalar_f32(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].f32; + } + + inline int scalar_i32(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].i32; + } + + inline const ConstantVector& vector() const { return m.c[0]; } + inline uint32_t vector_size() const { return m.c[0].vecsize; } + inline uint32_t columns() const { return m.columns; } + + SPIRConstant(uint32_t constant_type, const uint32_t *elements, uint32_t num_elements) : + constant_type(constant_type) + { + subconstants.insert(end(subconstants), elements, elements + num_elements); + } + + SPIRConstant(uint32_t constant_type, uint32_t v0) : + constant_type(constant_type) + { + m.c[0].r[0].u32 = v0; + m.c[0].vecsize = 1; + m.columns = 1; + } + + SPIRConstant(uint32_t constant_type, uint32_t v0, uint32_t v1) : + constant_type(constant_type) + { + m.c[0].r[0].u32 = v0; + m.c[0].r[1].u32 = v1; + m.c[0].vecsize = 2; + m.columns = 1; + } + + SPIRConstant(uint32_t constant_type, uint32_t v0, uint32_t v1, uint32_t v2) : + constant_type(constant_type) + { + m.c[0].r[0].u32 = v0; + m.c[0].r[1].u32 = v1; + m.c[0].r[2].u32 = v2; + m.c[0].vecsize = 3; + m.columns = 1; + } + + SPIRConstant(uint32_t constant_type, uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3) : + constant_type(constant_type) + { + m.c[0].r[0].u32 = v0; + m.c[0].r[1].u32 = v1; + m.c[0].r[2].u32 = v2; + m.c[0].r[3].u32 = v3; + m.c[0].vecsize = 4; + m.columns = 1; + } + + SPIRConstant(uint32_t constant_type, + const ConstantVector &vec0) : + constant_type(constant_type) + { + m.columns = 1; + m.c[0] = vec0; + } + + SPIRConstant(uint32_t constant_type, + const ConstantVector &vec0, const ConstantVector &vec1) : + constant_type(constant_type) + { + m.columns = 2; + m.c[0] = vec0; + m.c[1] = vec1; + } + + SPIRConstant(uint32_t constant_type, + const ConstantVector &vec0, const ConstantVector &vec1, + const ConstantVector &vec2) : + constant_type(constant_type) + { + m.columns = 3; + m.c[0] = vec0; + m.c[1] = vec1; + m.c[2] = vec2; + } + + SPIRConstant(uint32_t constant_type, + const ConstantVector &vec0, const ConstantVector &vec1, + const ConstantVector &vec2, const ConstantVector &vec3) : + constant_type(constant_type) + { + m.columns = 4; + m.c[0] = vec0; + m.c[1] = vec1; + m.c[2] = vec2; + m.c[3] = vec3; + } + + uint32_t constant_type; + ConstantMatrix m; + bool specialization = false; // If the constant is a specialization constant. + + // For composites which are constant arrays, etc. + std::vector subconstants; + }; + + class Variant + { + public: + // MSVC 2013 workaround, we shouldn't need these constructors. + Variant() = default; + Variant(Variant&& other) { *this = std::move(other); } + Variant& operator=(Variant&& other) + { + if (this != &other) + { + holder = move(other.holder); + type = other.type; + other.type = TypeNone; + } + return *this; + } + + void set(std::unique_ptr val, uint32_t type) + { + holder = std::move(val); + if (this->type != TypeNone && this->type != type) + throw CompilerError("Overwriting a variant with new type."); + this->type = type; + } + + template + T& get() + { + if (!holder) + throw CompilerError("nullptr"); + if (T::type != type) + throw CompilerError("Bad cast"); + return *static_cast(holder.get()); + } + + template + const T& get() const + { + if (!holder) + throw CompilerError("nullptr"); + if (T::type != type) + throw CompilerError("Bad cast"); + return *static_cast(holder.get()); + } + + uint32_t get_type() const { return type; } + bool empty() const { return !holder; } + void reset() { holder.reset(); type = TypeNone; } + + private: + std::unique_ptr holder; + uint32_t type = TypeNone; + }; + + template + T& variant_get(Variant &var) + { + return var.get(); + } + + template + const T& variant_get(const Variant &var) + { + return var.get(); + } + + template + T& variant_set(Variant &var, P&&... args) + { + auto uptr = std::unique_ptr(new T(std::forward

(args)...)); + auto ptr = uptr.get(); + var.set(std::move(uptr), T::type); + return *ptr; + } + + struct Meta + { + struct Decoration + { + std::string alias; + uint64_t decoration_flags = 0; + spv::BuiltIn builtin_type; + uint32_t location = 0; + uint32_t set = 0; + uint32_t binding = 0; + uint32_t offset = 0; + uint32_t array_stride = 0; + bool builtin = false; + }; + + Decoration decoration; + std::vector members; + }; +} + +#endif + diff --git a/spir2cpp.cpp b/spir2cpp.cpp new file mode 100644 index 0000000..ae34ec6 --- /dev/null +++ b/spir2cpp.cpp @@ -0,0 +1,436 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spir2cpp.hpp" + +using namespace spv; +using namespace spir2cross; +using namespace std; + +void CompilerCPP::emit_buffer_block(const SPIRVariable &var) +{ + auto &type = get(var.basetype); + auto instance_name = to_name(var.self); + + uint32_t set = meta[var.self].decoration.set; + uint32_t binding = meta[var.self].decoration.binding; + + emit_struct(type); + statement("internal::Resource<", type_to_glsl(type), type_to_array_glsl(type), "> ", instance_name, "__;"); + statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()"); + resource_registrations.push_back(join("s.register_resource(", instance_name, "__", ", ", set, ", ", binding, ");")); + statement(""); +} + +void CompilerCPP::emit_interface_block(const SPIRVariable &var) +{ + auto &type = get(var.basetype); + + const char *qual = var.storage == StorageClassInput ? "StageInput" : "StageOutput"; + const char *lowerqual = var.storage == StorageClassInput ? "stage_input" : "stage_output"; + auto instance_name = to_name(var.self); + uint32_t location = meta[var.self].decoration.location; + + auto flags = meta[type.self].decoration.decoration_flags; + if (flags & (1ull << DecorationBlock)) + emit_struct(type); + + statement("internal::", qual, "<", type_to_glsl(type), type_to_array_glsl(type), "> ", instance_name, "__;"); + statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()"); + resource_registrations.push_back(join("s.register_", lowerqual, "(", instance_name, "__", ", ", location, ");")); + statement(""); +} + +void CompilerCPP::emit_shared(const SPIRVariable &var) +{ + auto instance_name = to_name(var.self); + statement(variable_decl(var), ";"); + statement_no_indent("#define ", instance_name, " __res->", instance_name); +} + +void CompilerCPP::emit_uniform(const SPIRVariable &var) +{ + auto &type = get(var.basetype); + auto instance_name = to_name(var.self); + + uint32_t set = meta[var.self].decoration.set; + uint32_t binding = meta[var.self].decoration.binding; + uint32_t location = meta[var.self].decoration.location; + + if (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::AtomicCounter) + { + statement("internal::Resource<", type_to_glsl(type), type_to_array_glsl(type), "> ", instance_name, "__;"); + statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()"); + resource_registrations.push_back(join("s.register_resource(", instance_name, "__", ", ", set, ", ", binding, ");")); + } + else + { + statement("internal::UniformConstant<", type_to_glsl(type), type_to_array_glsl(type), "> ", instance_name, "__;"); + statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()"); + resource_registrations.push_back(join("s.register_uniform_constant(", instance_name, "__", ", ", location, ");")); + } + + statement(""); +} + +void CompilerCPP::emit_push_constant_block(const SPIRVariable &var) +{ + auto &type = get(var.basetype); + auto &flags = meta[var.self].decoration.decoration_flags; + if ((flags & (1ull << DecorationBinding)) || (flags & (1ull << DecorationDescriptorSet))) + throw CompilerError("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. " + "Remap to location with reflection API first or disable these decorations."); + + emit_struct(type); + auto instance_name = to_name(var.self); + + statement("internal::PushConstant<", type_to_glsl(type), type_to_array_glsl(type), "> ", instance_name, ";"); + statement_no_indent("#define ", instance_name, " __res->", instance_name, ".get()"); + resource_registrations.push_back(join("s.register_push_constant(", instance_name, "__", ");")); + statement(""); +} + +void CompilerCPP::emit_resources() +{ + // Output all basic struct types which are not Block or BufferBlock as these are declared inplace + // when such variables are instantiated. + for (auto &id : ids) + { + if (id.get_type() == TypeType) + { + auto &type = id.get(); + if (type.basetype == SPIRType::Struct && + type.array.empty() && + !type.pointer && + (meta[type.self].decoration.decoration_flags & ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) == 0) + { + emit_struct(type); + } + } + } + + statement("struct Resources : ", resource_type); + begin_scope(); + + // Output UBOs and SSBOs + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + + if (type.pointer && type.storage == StorageClassUniform && + !is_builtin_variable(var) && + (meta[type.self].decoration.decoration_flags & ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock)))) + { + emit_buffer_block(var); + } + } + } + + // Output push constant blocks + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + if (type.pointer && type.storage == StorageClassPushConstant) + emit_push_constant_block(var); + } + } + + // Output in/out interfaces. + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + + if (!is_builtin_variable(var) && + !var.remapped_variable && + type.pointer && + (var.storage == StorageClassInput || var.storage == StorageClassOutput)) + { + emit_interface_block(var); + } + } + } + + // Output Uniform Constants (values, samplers, images, etc). + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + + if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && + (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter)) + { + emit_uniform(var); + } + } + } + + // Global variables. + bool emitted = false; + for (auto global : global_variables) + { + auto &var = get(global); + if (var.storage == StorageClassWorkgroup) + { + emit_shared(var); + emitted = true; + } + } + + if (emitted) + statement(""); + + statement("inline void init(spir2cross_shader& s)"); + begin_scope(); + statement(resource_type, "::init(s);"); + for (auto ® : resource_registrations) + statement(reg); + end_scope(); + resource_registrations.clear(); + + end_scope_decl(); + + statement(""); + statement("Resources* __res;"); + if (execution.model == ExecutionModelGLCompute) + statement("ComputePrivateResources __priv_res;"); + statement(""); + + // Emit regular globals which are allocated per invocation. + emitted = false; + for (auto global : global_variables) + { + auto &var = get(global); + if (var.storage == StorageClassPrivate) + { + if (var.storage == StorageClassWorkgroup) + emit_shared(var); + else + statement(variable_decl(var), ";"); + emitted = true; + } + } + + if (emitted) + statement(""); +} + +string CompilerCPP::compile() +{ + // Do not deal with ES-isms like precision, older extensions and such. + options.es = false; + options.version = 450; + backend.float_literal_suffix = true; + backend.uint32_t_literal_suffix = true; + backend.basic_int_type = "int32_t"; + backend.basic_uint_type = "uint32_t"; + backend.swizzle_is_function = true; + backend.shared_is_implied = true; + + uint32_t pass_count = 0; + do + { + if (pass_count >= 2) + throw CompilerError("Over 2 compilation loops detected. Must be a bug!"); + + resource_registrations.clear(); + reset(); + + // Move constructor for this type is broken on GCC 4.9 ... + buffer = unique_ptr(new ostringstream()); + + emit_header(); + emit_resources(); + + emit_function(get(execution.entry_point), 0); + + pass_count++; + } while (force_recompile); + + // Match opening scope of emit_header(). + end_scope_decl(); + // namespace + end_scope(); + + // Emit C entry points + emit_c_linkage(); + + return buffer->str(); +} + +void CompilerCPP::emit_c_linkage() +{ + statement(""); + + statement("spir2cross_shader_t* spir2cross_construct(void)"); + begin_scope(); + statement("return new ", impl_type, "();"); + end_scope(); + + statement(""); + statement("void spir2cross_destruct(spir2cross_shader_t *shader)"); + begin_scope(); + statement("delete static_cast<", impl_type, "*>(shader);"); + end_scope(); + + statement(""); + statement("void spir2cross_invoke(spir2cross_shader_t *shader)"); + begin_scope(); + statement("static_cast<", impl_type, "*>(shader)->invoke();"); + end_scope(); + + statement(""); + statement("static const struct spir2cross_interface vtable ="); + begin_scope(); + statement("spir2cross_construct,"); + statement("spir2cross_destruct,"); + statement("spir2cross_invoke,"); + end_scope_decl(); + + statement(""); + statement("const struct spir2cross_interface* spir2cross_get_interface(void)"); + begin_scope(); + statement("return &vtable;"); + end_scope(); +} + +void CompilerCPP::emit_function_prototype(SPIRFunction &func, uint64_t) +{ + local_variables.clear(); + string decl; + + auto &type = get(func.return_type); + decl += "inline "; + decl += type_to_glsl(type); + decl += " "; + + if (func.self == execution.entry_point) + { + decl += "main"; + processing_entry_point = true; + } + else + decl += to_name(func.self); + + decl += "("; + for (auto &arg : func.arguments) + { + add_local_variable(arg.id); + + decl += argument_decl(arg); + if (&arg != &func.arguments.back()) + decl += ", "; + + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get(arg.id); + if (var) + var->parameter = &arg; + } + + decl += ")"; + statement(decl); +} + +string CompilerCPP::argument_decl(const SPIRFunction::Parameter &arg) +{ + auto &type = expression_type(arg.id); + bool constref = !type.pointer || arg.write_count == 0; + + auto &var = get(arg.id); + return join(constref ? "const " : "", + type_to_glsl(type), "& ", to_name(var.self), type_to_array_glsl(type)); +} + +void CompilerCPP::emit_header() +{ + statement("// This C++ shader is autogenerated by spir2cross."); + statement("#include \"spir2cross/internal_interface.hpp\""); + statement("#include \"spir2cross/external_interface.h\""); + statement("#include "); + statement(""); + statement("using namespace spir2cross;"); + statement("using namespace glm;"); + statement(""); + + statement("namespace Impl"); + begin_scope(); + + switch (execution.model) + { + case ExecutionModelGeometry: + case ExecutionModelTessellationControl: + case ExecutionModelTessellationEvaluation: + case ExecutionModelGLCompute: + case ExecutionModelFragment: + case ExecutionModelVertex: + statement("struct Shader"); + begin_scope(); + break; + + default: + throw CompilerError("Unsupported execution model."); + } + + switch (execution.model) + { + case ExecutionModelGeometry: + impl_type = "GeometryShader"; + resource_type = "GeometryResources"; + break; + + case ExecutionModelVertex: + impl_type = "VertexShader"; + resource_type = "VertexResources"; + break; + + case ExecutionModelFragment: + impl_type = "FragmentShader"; + resource_type = "FragmentResources"; + break; + + case ExecutionModelGLCompute: + impl_type = join("ComputeShader"); + resource_type = "ComputeResources"; + break; + + case ExecutionModelTessellationControl: + impl_type = "TessControlShader"; + resource_type = "TessControlResources"; + break; + + case ExecutionModelTessellationEvaluation: + impl_type = "TessEvaluationShader"; + resource_type = "TessEvaluationResources"; + break; + + default: + throw CompilerError("Unsupported execution model."); + } +} + diff --git a/spir2cpp.hpp b/spir2cpp.hpp new file mode 100644 index 0000000..c6d5989 --- /dev/null +++ b/spir2cpp.hpp @@ -0,0 +1,53 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIR2CPP +#define SPIR2CPP + +#include "spir2glsl.hpp" +#include + +namespace spir2cross +{ + class CompilerCPP : public CompilerGLSL + { + public: + CompilerCPP(std::vector spirv) : CompilerGLSL(move(spirv)) {} + std::string compile() override; + + private: + void emit_header() override; + void emit_c_linkage(); + void emit_function_prototype(SPIRFunction &func, uint64_t return_flags) override; + + void emit_resources(); + void emit_buffer_block(const SPIRVariable &type); + void emit_push_constant_block(const SPIRVariable &var); + void emit_interface_block(const SPIRVariable &type); + void emit_block_chain(SPIRBlock &block); + void emit_uniform(const SPIRVariable &var); + void emit_shared(const SPIRVariable &var); + + std::string argument_decl(const SPIRFunction::Parameter &arg); + + std::vector resource_registrations; + std::string impl_type; + std::string resource_type; + uint32_t shared_counter = 0; + }; +} + +#endif diff --git a/spir2cross.cpp b/spir2cross.cpp new file mode 100644 index 0000000..49e438f --- /dev/null +++ b/spir2cross.cpp @@ -0,0 +1,1802 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spir2cross.hpp" +#include "GLSL.std.450.h" +#include +#include +#include + +using namespace std; +using namespace spv; +using namespace spir2cross; + +#define log(...) fprintf(stderr, __VA_ARGS__) + +Instruction::Instruction(const vector &spirv, uint32_t &index) +{ + op = spirv[index] & 0xffff; + count = (spirv[index] >> 16) & 0xffff; + offset = index + 1; + length = count - 1; + + index += count; + + if (index > spirv.size()) + throw CompilerError("SPIR-V instruction goes out of bounds."); +} + +Compiler::Compiler(vector ir) + : spirv(move(ir)) +{ + parse(); +} + +string Compiler::compile() { return ""; } + +bool Compiler::variable_storage_is_aliased(const SPIRVariable &v) +{ + auto &type = get(v.basetype); + bool ssbo = (meta[type.self].decoration.decoration_flags & (1ull << DecorationBufferBlock)) != 0; + bool image = type.basetype == SPIRType::Image; + bool counter = type.basetype == SPIRType::AtomicCounter; + return ssbo || image || counter; +} + +bool Compiler::block_is_pure(const SPIRBlock &block) +{ + for (auto &i : block.ops) + { + auto ops = stream(i.offset); + auto op = static_cast(i.op); + + switch (op) + { + case OpFunctionCall: + { + uint32_t func = ops[2]; + if (!function_is_pure(get(func))) + return false; + break; + } + + case OpStore: + { + auto &type = expression_type(ops[0]); + if (type.storage != StorageClassFunction) + return false; + break; + } + + case OpImageWrite: + return false; + + default: + break; + } + } + + return true; +} + +string Compiler::to_name(uint32_t id) +{ + if (meta[id].decoration.alias.empty()) + return join("_", id); + else + return meta.at(id).decoration.alias; +} + +bool Compiler::function_is_pure(const SPIRFunction &func) +{ + for (auto block : func.blocks) + { + if (!block_is_pure(get(block))) + { + //fprintf(stderr, "Function %s is impure!\n", to_name(func.self).c_str()); + return false; + } + } + + //fprintf(stderr, "Function %s is pure!\n", to_name(func.self).c_str()); + return true; +} + +void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_t id) +{ + for (auto &i : block.ops) + { + auto ops = stream(i.offset); + auto op = static_cast(i.op); + + switch (op) + { + case OpFunctionCall: + { + uint32_t func = ops[2]; + register_global_read_dependencies(get(func), id); + break; + } + + case OpLoad: + case OpImageRead: + { + // If we're in a storage class which does not get invalidated, adding dependencies here is no big deal. + auto *var = maybe_get_backing_variable(ops[2]); + if (var && var->storage != StorageClassFunction) + { + auto &type = get(var->basetype); + + // InputTargets are immutable. + if (type.basetype != SPIRType::Image && type.image.dim != DimSubpassData) + var->dependees.push_back(id); + } + break; + } + + default: + break; + } + } +} + +void Compiler::register_global_read_dependencies(const SPIRFunction &func, uint32_t id) +{ + for (auto block : func.blocks) + register_global_read_dependencies(get(block), id); +} + +SPIRVariable* Compiler::maybe_get_backing_variable(uint32_t chain) +{ + auto *var = maybe_get(chain); + if (!var) + { + auto *cexpr = maybe_get(chain); + if (cexpr) + var = maybe_get(cexpr->loaded_from); + } + + return var; +} + +void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded) +{ + auto &e = get(expr); + auto *var = maybe_get_backing_variable(chain); + + if (var) + { + e.loaded_from = var->self; + + if (forwarded) + var->dependees.push_back(e.self); + + // If we load from a parameter, make sure we create "inout" if we also write to the parameter. + // The default is "in" however, so we never invalidate our complication by reading. + if (var && var->parameter) + var->parameter->read_count++; + } +} + +void Compiler::register_write(uint32_t chain) +{ + auto *var = maybe_get(chain); + if (!var) + { + // If we're storing through an access chain, invalidate the backing variable instead. + auto *expr = maybe_get(chain); + if (expr && expr->loaded_from) + var = maybe_get(expr->loaded_from); + } + + if (var) + { + // If our variable is in a storage class which can alias with other buffers, + // invalidate all variables which depend on aliased variables. + if (variable_storage_is_aliased(*var)) + flush_all_aliased_variables(); + else if (var) + flush_dependees(*var); + + // We tried to write to a parameter which is not marked with out qualifier, force a recompile. + if (var->parameter && var->parameter->write_count == 0) + { + var->parameter->write_count++; + force_recompile = true; + } + } +} + +void Compiler::flush_dependees(SPIRVariable &var) +{ + for (auto expr : var.dependees) + { + invalid_expressions.insert(expr); + get(expr).invalidated_by.push_back(var.self); + } + var.dependees.clear(); +} + +void Compiler::flush_all_aliased_variables() +{ + for (auto aliased : aliased_variables) + flush_dependees(get(aliased)); +} + +void Compiler::flush_all_atomic_capable_variables() +{ + for (auto global : global_variables) + flush_dependees(get(global)); + flush_all_aliased_variables(); +} + +void Compiler::flush_all_active_variables() +{ + // Invalidate all temporaries we read from variables in this block since they were forwarded. + // Invalidate all temporaries we read from globals. + for (auto &v : function->local_variables) + flush_dependees(get(v)); + for (auto &arg : function->arguments) + flush_dependees(get(arg.id)); + for (auto global : global_variables) + flush_dependees(get(global)); + + flush_all_aliased_variables(); +} + +const SPIRType& Compiler::expression_type(uint32_t id) const +{ + switch (ids[id].get_type()) + { + case TypeVariable: + return get(get(id).basetype); + + case TypeExpression: + return get(get(id).expression_type); + + case TypeConstant: + return get(get(id).constant_type); + + case TypeUndef: + return get(get(id).basetype); + + default: + throw CompilerError("Cannot resolve expression type."); + } +} + +bool Compiler::expression_is_lvalue(uint32_t id) const +{ + auto &type = expression_type(id); + switch (type.basetype) + { + case SPIRType::SampledImage: + case SPIRType::Image: + return false; + + default: + return true; + } +} + +bool Compiler::is_immutable(uint32_t id) const +{ + if (ids[id].get_type() == TypeVariable) + { + auto &var = get(id); + return var.phi_variable || var.forwardable || !expression_is_lvalue(id); + } + else if (ids[id].get_type() == TypeExpression) + return get(id).immutable; + else if (ids[id].get_type() == TypeConstant || ids[id].get_type() == TypeUndef) + return true; + else + return false; +} + +bool Compiler::is_builtin_variable(const SPIRVariable &var) const +{ + if (var.compat_builtin || meta[var.self].decoration.builtin) + return true; + + // We can have builtin structs as well. If one member of a struct is builtin, the struct must also be builtin. + for (auto &m : meta[get(var.basetype).self].members) + if (m.builtin) + return true; + + return false; +} + +bool Compiler::is_member_builtin(const SPIRType &type, uint32_t index, BuiltIn *builtin) const +{ + auto &memb = meta[type.self].members; + if (index < memb.size() && memb[index].builtin) + { + if (builtin) + *builtin = memb[index].builtin_type; + return true; + } + + return false; +} + +ShaderResources Compiler::get_shader_resources() const +{ + ShaderResources res; + + for (auto &id : ids) + { + if (id.get_type() != TypeVariable) + continue; + + auto &var = id.get(); + auto &type = get(var.basetype); + + if (!type.pointer || is_builtin_variable(var)) + continue; + + // Input + if (var.storage == StorageClassInput) + { + if (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock)) + res.stage_inputs.push_back({ var.self, type.self, meta[type.self].decoration.alias }); + else + res.stage_inputs.push_back({ var.self, type.self, meta[var.self].decoration.alias }); + } + // Subpass inputs + else if (var.storage == StorageClassUniformConstant && + type.image.dim == DimSubpassData) + { + res.subpass_inputs.push_back({ var.self, type.self, meta[var.self].decoration.alias }); + } + // Outputs + else if (var.storage == StorageClassOutput) + { + if (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock)) + res.stage_outputs.push_back({ var.self, type.self, meta[type.self].decoration.alias }); + else + res.stage_outputs.push_back({ var.self, type.self, meta[var.self].decoration.alias }); + } + // UBOs + else if (type.storage == StorageClassUniform && + (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock))) + { + res.uniform_buffers.push_back({ var.self, type.self, meta[type.self].decoration.alias }); + } + // SSBOs + else if (type.storage == StorageClassUniform && + (meta[type.self].decoration.decoration_flags & (1ull << DecorationBufferBlock))) + { + res.storage_buffers.push_back({ var.self, type.self, meta[type.self].decoration.alias }); + } + // Push constant blocks + else if (type.storage == StorageClassPushConstant) + { + // There can only be one push constant block, but keep the vector in case this restriction is lifted + // in the future. + res.push_constant_buffers.push_back({ var.self, type.self, meta[var.self].decoration.alias }); + } + // Images + else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image) + { + res.storage_images.push_back({ var.self, type.self, meta[var.self].decoration.alias }); + } + // Textures + else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::SampledImage) + { + res.sampled_images.push_back({ var.self, type.self, meta[var.self].decoration.alias }); + } + // Atomic counters + else if (type.storage == StorageClassAtomicCounter) + { + res.atomic_counters.push_back({ var.self, type.self, meta[var.self].decoration.alias }); + } + } + + return res; +} + +static inline uint32_t swap_endian(uint32_t v) +{ + return ((v >> 24) & 0x000000ffu) | + ((v >> 8) & 0x0000ff00u) | + ((v << 8) & 0x00ff0000u) | + ((v << 24) & 0xff000000u); +} + +static string extract_string(const vector &spirv, uint32_t offset) +{ + string ret; + for (uint32_t i = offset; i < spirv.size(); i++) + { + uint32_t w = spirv[i]; + + for (uint32_t j = 0; j < 4; j++, w >>= 8) + { + char c = w & 0xff; + if (c == '\0') + return ret; + ret += c; + } + } + + throw CompilerError("String was not terminated before EOF"); +} + +void Compiler::parse() +{ + auto len = spirv.size(); + auto s = stream(0); + + if (len < 5) + throw CompilerError("SPIRV file too small."); + + // Endian-swap if we need to. + if (s[0] == swap_endian(MagicNumber)) + transform(begin(spirv), end(spirv), begin(spirv), [](uint32_t c) { return swap_endian(c); }); + + // Allow v99 since it tends to just work, but warn about this. + if (s[0] != MagicNumber || (s[1] != Version && s[1] != 99)) + throw CompilerError("Invalid SPIRV format."); + + if (s[1] != Version) + { + fprintf(stderr, "SPIR2CROSS was compiled against SPIR-V version %d, but SPIR-V uses version %u. Buggy behavior due to ABI incompatibility might occur.\n", + Version, s[1]); + } + + uint32_t bound = s[3]; + ids.resize(bound); + meta.resize(bound); + + uint32_t offset = 5; + while (offset < len) + inst.emplace_back(spirv, offset); + + for (auto &i : inst) + parse(i); + + if (function) + throw CompilerError("Function was not terminated."); + if (block) + throw CompilerError("Block was not terminated."); +} + +void Compiler::flatten_interface_block(uint32_t id) +{ + auto &var = get(id); + auto &type = get(var.basetype); + auto flags = meta.at(type.self).decoration.decoration_flags; + + if (!type.array.empty()) + throw CompilerError("Type is array of UBOs."); + if (type.basetype != SPIRType::Struct) + throw CompilerError("Type is not a struct."); + if ((flags & (1ull << DecorationBlock)) == 0) + throw CompilerError("Type is not a block."); + if (type.member_types.empty()) + throw CompilerError("Member list of struct is empty."); + + uint32_t t = type.member_types[0]; + for (auto &m : type.member_types) + if (t != m) + throw CompilerError("Types in block differ."); + + auto &mtype = get(t); + if (!mtype.array.empty()) + throw CompilerError("Member type cannot be arrays."); + if (mtype.basetype == SPIRType::Struct) + throw CompilerError("Member type cannot be struct."); + + // Inherit variable name from interface block name. + meta.at(var.self).decoration.alias = meta.at(type.self).decoration.alias; + + auto storage = var.storage; + if (storage == StorageClassUniform) + storage = StorageClassUniformConstant; + + // Change type definition in-place into an array instead. + // Access chains will still work as-is. + uint32_t array_size = uint32_t(type.member_types.size()); + type = mtype; + type.array.push_back(array_size); + type.pointer = true; + type.storage = storage; + var.storage = storage; +} + +void Compiler::update_name_cache(unordered_set &cache, string &name) +{ + if (name.empty()) + return; + + if (cache.find(name) == end(cache)) + { + cache.insert(name); + return; + } + + uint32_t counter = 0; + auto tmpname = name; + + // If there is a collision (very rare), + // keep tacking on extra identifier until it's unique. + do + { + counter++; + name = tmpname + "_" + convert_to_string(counter); + } while (cache.find(name) != end(cache)); + cache.insert(name); +} + +void Compiler::set_name(uint32_t id, const std::string& name) +{ + auto &str = meta.at(id).decoration.alias; + str.clear(); + + if (name.empty()) + return; + // Reserved for temporaries. + if (name[0] == '_') + return; + + // Functions in glslangValidator are mangled with name( stuff. + // Normally, we would never see '(' in any legal indentifiers, so just strip them out. + str = name.substr(0, name.find('(')); + + for (uint32_t i = 0; i < str.size(); i++) + { + auto &c = str[i]; + + // _ variables are reserved by the internal implementation, + // otherwise, make sure the name is a valid identifier. + if (i == 0 || (str[0] == '_' && i == 1)) + c = isalpha(c) ? c : '_'; + else + c = isalnum(c) ? c : '_'; + } +} + +const SPIRType& Compiler::get_type(uint32_t id) const +{ + return get(id); +} + +void Compiler::set_member_decoration(uint32_t id, uint32_t index, Decoration decoration, uint32_t argument) +{ + meta.at(id).members.resize(max(meta[id].members.size(), size_t(index) + 1)); + auto &dec = meta.at(id).members[index]; + dec.decoration_flags |= 1ull << decoration; + + switch (decoration) + { + case DecorationBuiltIn: + dec.builtin = true; + dec.builtin_type = static_cast(argument); + break; + + case DecorationLocation: + dec.location = argument; + break; + + case DecorationOffset: + dec.offset = argument; + break; + + default: + break; + } +} + +void Compiler::set_member_name(uint32_t id, uint32_t index, const std::string& name) +{ + meta.at(id).members.resize(max(meta[id].members.size(), size_t(index) + 1)); + meta.at(id).members[index].alias = name; +} + +const std::string& Compiler::get_member_name(uint32_t id, uint32_t index) const +{ + auto &m = meta.at(id); + if (index >= m.members.size()) + { + static string empty; + return empty; + } + + return m.members[index].alias; +} + +uint32_t Compiler::get_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const +{ + auto &dec = meta.at(id).members.at(index); + if (!(dec.decoration_flags & (1ull << decoration))) + return 0; + + switch (decoration) + { + case DecorationBuiltIn: return dec.builtin_type; + case DecorationLocation: return dec.location; + case DecorationOffset: return dec.offset; + default: return 0; + } +} + +uint64_t Compiler::get_member_decoration_mask(uint32_t id, uint32_t index) const +{ + auto &m = meta.at(id); + if (index >= m.members.size()) + return 0; + + return m.members[index].decoration_flags; +} + +void Compiler::unset_member_decoration(uint32_t id, uint32_t index, Decoration decoration) +{ + auto &m = meta.at(id); + if (index >= m.members.size()) + return; + + auto &dec = m.members[index]; + + dec.decoration_flags &= ~(1ull << decoration); + switch (decoration) + { + case DecorationBuiltIn: + dec.builtin = false; + break; + + case DecorationLocation: + dec.location = 0; + break; + + case DecorationOffset: + dec.offset = 0; + break; + + default: + break; + } +} + +void Compiler::set_decoration(uint32_t id, Decoration decoration, uint32_t argument) +{ + auto &dec = meta.at(id).decoration; + dec.decoration_flags |= 1ull << decoration; + + switch (decoration) + { + case DecorationBuiltIn: + dec.builtin = true; + dec.builtin_type = static_cast(argument); + break; + + case DecorationLocation: + dec.location = argument; + break; + + case DecorationOffset: + dec.offset = argument; + break; + + case DecorationArrayStride: + dec.array_stride = argument; + break; + + case DecorationBinding: + dec.binding = argument; + break; + + case DecorationDescriptorSet: + dec.set = argument; + break; + + default: + break; + } +} + +StorageClass Compiler::get_storage_class(uint32_t id) const +{ + return get(id).storage; +} + +const std::string& Compiler::get_name(uint32_t id) const +{ + return meta.at(id).decoration.alias; +} + +uint64_t Compiler::get_decoration_mask(uint32_t id) const +{ + auto &dec = meta.at(id).decoration; + return dec.decoration_flags; +} + +uint32_t Compiler::get_decoration(uint32_t id, Decoration decoration) const +{ + auto &dec = meta.at(id).decoration; + if (!(dec.decoration_flags & (1ull << decoration))) + return 0; + + switch (decoration) + { + case DecorationBuiltIn: return dec.builtin_type; + case DecorationLocation: return dec.location; + case DecorationOffset: return dec.offset; + case DecorationBinding: return dec.binding; + case DecorationDescriptorSet: return dec.set; + default: return 0; + } +} + +void Compiler::unset_decoration(uint32_t id, Decoration decoration) +{ + auto &dec = meta.at(id).decoration; + dec.decoration_flags &= ~(1ull << decoration); + switch (decoration) + { + case DecorationBuiltIn: + dec.builtin = false; + break; + + case DecorationLocation: + dec.location = 0; + break; + + case DecorationOffset: + dec.offset = 0; + break; + + case DecorationBinding: + dec.binding = 0; + break; + + case DecorationDescriptorSet: + dec.set = 0; + break; + + default: + break; + } +} + +void Compiler::parse(const Instruction &i) +{ + auto ops = stream(i.offset); + auto op = static_cast(i.op); + uint32_t length = i.length; + + if (i.offset + length > spirv.size()) + throw CompilerError("Compiler::parse() opcode out of range."); + + switch (op) + { + case OpMemoryModel: + case OpSourceExtension: + case OpNop: + break; + + case OpSource: + { + auto lang = static_cast(ops[0]); + switch (lang) + { + case SourceLanguageESSL: + source.es = true; + source.version = ops[1]; + source.known = true; + break; + + case SourceLanguageGLSL: + source.es = false; + source.version = ops[1]; + source.known = true; + break; + + default: + source.known = false; + break; + } + break; + } + + case OpUndef: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + set(id, result_type); + break; + } + + case OpCapability: + { + uint32_t cap = ops[0]; + if (cap == CapabilityKernel) + throw CompilerError("Kernel capability not supported."); + break; + } + + case OpExtInstImport: + { + uint32_t id = ops[0]; + auto ext = extract_string(spirv, i.offset + 1); + if (ext == "GLSL.std.450") + set(id, SPIRExtension::GLSL); + else + throw CompilerError("Only GLSL.std.450 extension interface supported."); + + break; + } + + case OpEntryPoint: + { + if (execution.entry_point) + throw CompilerError("More than one entry point not supported."); + + execution.model = static_cast(ops[0]); + execution.entry_point = ops[1]; + break; + } + + case OpExecutionMode: + { + uint32_t entry = ops[0]; + if (entry != execution.entry_point) + throw CompilerError("Cannot set execution mode to non-existing entry point."); + + auto mode = static_cast(ops[1]); + execution.flags |= 1ull << mode; + + switch (mode) + { + case ExecutionModeInvocations: + execution.invocations = ops[2]; + break; + + case ExecutionModeLocalSize: + execution.workgroup_size.x = ops[2]; + execution.workgroup_size.y = ops[3]; + execution.workgroup_size.z = ops[4]; + break; + + case ExecutionModeOutputVertices: + execution.output_vertices = ops[2]; + break; + + default: + break; + } + break; + } + + case OpName: + { + uint32_t id = ops[0]; + set_name(id, extract_string(spirv, i.offset + 1)); + break; + } + + case OpMemberName: + { + uint32_t id = ops[0]; + uint32_t member = ops[1]; + set_member_name(id, member, extract_string(spirv, i.offset + 2)); + break; + } + + case OpDecorate: + { + uint32_t id = ops[0]; + + auto decoration = static_cast(ops[1]); + if (length >= 3) + set_decoration(id, decoration, ops[2]); + else + set_decoration(id, decoration); + break; + } + + case OpMemberDecorate: + { + uint32_t id = ops[0]; + uint32_t member = ops[1]; + auto decoration = static_cast(ops[2]); + if (length >= 4) + set_member_decoration(id, member, decoration, ops[3]); + else + set_member_decoration(id, member, decoration); + break; + } + + // Build up basic types. + case OpTypeVoid: + { + uint32_t id = ops[0]; + auto &type = set(id); + type.basetype = SPIRType::Void; + break; + } + + case OpTypeBool: + { + uint32_t id = ops[0]; + auto &type = set(id); + type.basetype = SPIRType::Bool; + type.width = 1; + break; + } + + case OpTypeFloat: + { + uint32_t id = ops[0]; + uint32_t width = ops[1]; + auto &type = set(id); + type.basetype = SPIRType::Float; + type.width = width; + break; + } + + case OpTypeInt: + { + uint32_t id = ops[0]; + uint32_t width = ops[1]; + auto &type = set(id); + type.basetype = ops[2] ? SPIRType::Int : SPIRType::UInt; + type.width = width; + break; + } + + // Build composite types by "inheriting". + // NOTE: The self member is also copied! For pointers and array modifiers this is a good thing + // since we can refer to decorations on pointee classes which is needed for UBO/SSBO, I/O blocks in geometry/tess etc. + case OpTypeVector: + { + uint32_t id = ops[0]; + uint32_t vecsize = ops[2]; + + auto &base = get(ops[1]); + auto &vecbase = set(id); + + vecbase = base; + vecbase.vecsize = vecsize; + vecbase.self = id; + break; + } + + case OpTypeMatrix: + { + uint32_t id = ops[0]; + uint32_t colcount = ops[2]; + + auto &base = get(ops[1]); + auto &matrixbase = set(id); + + matrixbase = base; + matrixbase.columns = colcount; + matrixbase.self = id; + break; + } + + case OpTypeArray: + { + uint32_t id = ops[0]; + + auto &base = get(ops[1]); + auto &arraybase = set(id); + + arraybase = base; + arraybase.array.push_back(get(ops[2]).scalar()); + // Do NOT set arraybase.self! + break; + } + + case OpTypeRuntimeArray: + { + uint32_t id = ops[0]; + + auto &base = get(ops[1]); + auto &arraybase = set(id); + + arraybase = base; + arraybase.array.push_back(0); + // Do NOT set arraybase.self! + break; + } + + case OpTypeImage: + { + uint32_t id = ops[0]; + auto &type = set(id); + type.basetype = SPIRType::Image; + type.image.type = ops[1]; + type.image.dim = static_cast(ops[2]); + type.image.depth = ops[3] != 0; + type.image.arrayed = ops[4] != 0; + type.image.ms = ops[5] != 0; + type.image.sampled = ops[6]; + type.image.format = static_cast(ops[7]); + break; + } + + case OpTypeSampledImage: + { + uint32_t id = ops[0]; + uint32_t imagetype = ops[1]; + auto &type = set(id); + type = get(imagetype); + type.basetype = SPIRType::SampledImage; + type.self = id; + break; + } + + // Not really used. + case OpTypeSampler: + { + uint32_t id = ops[0]; + auto &type = set(id); + type.basetype = SPIRType::Sampler; + break; + } + + case OpTypePointer: + { + uint32_t id = ops[0]; + + auto &base = get(ops[2]); + auto &ptrbase = set(id); + + ptrbase = base; + if (ptrbase.pointer) + throw CompilerError("Cannot make pointer-to-pointer type."); + ptrbase.pointer = true; + ptrbase.storage = static_cast(ops[1]); + + if (ptrbase.storage == StorageClassAtomicCounter) + ptrbase.basetype = SPIRType::AtomicCounter; + + // Do NOT set ptrbase.self! + break; + } + + case OpTypeStruct: + { + uint32_t id = ops[0]; + auto &type = set(id); + type.basetype = SPIRType::Struct; + for (uint32_t i = 1; i < length; i++) + type.member_types.push_back(ops[i]); + break; + } + + case OpTypeFunction: + { + uint32_t id = ops[0]; + uint32_t ret = ops[1]; + + auto &func = set(id, ret); + for (uint32_t i = 2; i < length; i++) + func.parameter_types.push_back(ops[i]); + break; + } + + // Variable declaration + // All variables are essentially pointers with a storage qualifier. + case OpVariable: + { + uint32_t type = ops[0]; + uint32_t id = ops[1]; + auto storage = static_cast(ops[2]); + uint32_t initializer = length == 4 ? ops[3] : 0; + + if (storage == StorageClassFunction) + { + if (!function) + throw CompilerError("No function currently in scope"); + function->add_local_variable(id); + } + else if (storage == StorageClassPrivate || + storage == StorageClassWorkgroup || + storage == StorageClassOutput) + { + global_variables.push_back(id); + } + + auto &var = set(id, type, storage, initializer); + + if (variable_storage_is_aliased(var)) + aliased_variables.push_back(var.self); + + // glslangValidator does not emit required qualifiers here. + // Solve this by making the image access as restricted as possible + // and loosen up if we need to. + auto &vartype = expression_type(id); + if (vartype.basetype == SPIRType::Image) + { + auto &flags = meta.at(id).decoration.decoration_flags; + flags |= 1ull << DecorationNonWritable; + flags |= 1ull << DecorationNonReadable; + } + + break; + } + + // OpPhi + // OpPhi is a fairly magical opcode. + // It selects temporary variables based on which parent block we *came from*. + // In high-level languages we can "de-SSA" by creating a function local, and flush out temporaries to this function-local + // variable to emulate SSA Phi. + case OpPhi: + { + if (!function) + throw CompilerError("No function currently in scope"); + if (!block) + throw CompilerError("No block currently in scope"); + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + // Instead of a temporary, create a new function-wide temporary with this ID instead. + auto &var = set(id, result_type, spv::StorageClassFunction); + var.phi_variable = true; + + function->add_local_variable(id); + + for (uint32_t i = 2; i + 2 <= length; i += 2) + block->phi_variables.push_back({ ops[i], ops[i + 1], id }); + break; + } + + // Constants + case OpSpecConstant: + case OpConstant: + { + uint32_t id = ops[1]; + set(id, ops[0], ops[2]).specialization = op == OpSpecConstant; + break; + } + + case OpSpecConstantFalse: + case OpConstantFalse: + { + uint32_t id = ops[1]; + set(id, ops[0], 0).specialization = op == OpSpecConstantFalse; + break; + } + + case OpSpecConstantTrue: + case OpConstantTrue: + { + uint32_t id = ops[1]; + set(id, ops[0], 1).specialization = op == OpSpecConstantTrue; + break; + } + + case OpSpecConstantComposite: + case OpConstantComposite: + { + uint32_t id = ops[1]; + uint32_t type = ops[0]; + + auto &ctype = get(type); + SPIRConstant *constant = nullptr; + + // We can have constants which are structs and arrays. + // In this case, our SPIRConstant will be a list of other SPIRConstant ids which we + // can refer to. + if (ctype.basetype == SPIRType::Struct || !ctype.array.empty()) + { + constant = &set(id, type, ops + 2, length - 2); + constant->specialization = op == OpSpecConstantComposite; + break; + } + + bool matrix = ctype.columns > 1; + + if (matrix) + { + switch (length - 2) + { + case 1: + constant = &set(id, type, + get(ops[2]).vector()); + break; + + case 2: + constant = &set(id, type, + get(ops[2]).vector(), + get(ops[3]).vector()); + break; + + case 3: + constant = &set(id, type, + get(ops[2]).vector(), + get(ops[3]).vector(), + get(ops[4]).vector()); + break; + + case 4: + constant = &set(id, type, + get(ops[2]).vector(), + get(ops[3]).vector(), + get(ops[4]).vector(), + get(ops[5]).vector()); + break; + + default: throw CompilerError("OpConstantComposite only supports 1, 2, 3 and 4 columns."); + } + } + else + { + switch (length - 2) + { + case 1: + constant = &set(id, type, + get(ops[2]).scalar()); + break; + + case 2: + constant = &set(id, type, + get(ops[2]).scalar(), + get(ops[3]).scalar()); + break; + + case 3: + constant = &set(id, type, + get(ops[2]).scalar(), + get(ops[3]).scalar(), + get(ops[4]).scalar()); + break; + + case 4: + constant = &set(id, type, + get(ops[2]).scalar(), + get(ops[3]).scalar(), + get(ops[4]).scalar(), + get(ops[5]).scalar()); + break; + + default: throw CompilerError("OpConstantComposite only supports 1, 2, 3 and 4 components."); + } + } + + constant->specialization = op == OpSpecConstantComposite; + break; + } + + // Functions + case OpFunction: + { + uint32_t res = ops[0]; + uint32_t id = ops[1]; + // Control + uint32_t type = ops[3]; + + if (function) + throw CompilerError("Must end a function before starting a new one!"); + + function = &set(id, res, type); + break; + } + + case OpFunctionParameter: + { + uint32_t type = ops[0]; + uint32_t id = ops[1]; + + if (!function) + throw CompilerError("Must be in a function!"); + + function->add_parameter(type, id); + set(id, type, StorageClassFunction); + break; + } + + case OpFunctionEnd: + { + function = nullptr; + break; + } + + // Blocks + case OpLabel: + { + // OpLabel always starts a block. + if (!function) + throw CompilerError("Blocks cannot exist outside functions!"); + + uint32_t id = ops[0]; + + function->blocks.push_back(id); + if (!function->entry_block) + function->entry_block = id; + + if (block) + throw CompilerError("Cannot start a block before ending the current block."); + + block = &set(id); + break; + } + + // Branch instructions end blocks. + case OpBranch: + { + if (!block) + throw CompilerError("Trying to end a non-existing block."); + + uint32_t target = ops[0]; + block->terminator = SPIRBlock::Direct; + block->next_block = target; + block = nullptr; + break; + } + + case OpBranchConditional: + { + if (!block) + throw CompilerError("Trying to end a non-existing block."); + + block->condition = ops[0]; + block->true_block = ops[1]; + block->false_block = ops[2]; + + block->terminator = SPIRBlock::Select; + block = nullptr; + break; + } + + case OpSwitch: + { + if (!block) + throw CompilerError("Trying to end a non-existing block."); + + if (block->merge == SPIRBlock::MergeNone) + throw CompilerError("Switch statement is not structured"); + + block->terminator = SPIRBlock::MultiSelect; + + block->condition = ops[0]; + block->default_block = ops[1]; + + for (uint32_t i = 2; i + 2 <= length; i += 2) + block->cases.push_back({ ops[i], ops[i + 1] }); + + // If we jump to next block, make it break instead since we're inside a switch case block at that point. + multiselect_merge_target.insert(block->next_block); + + block = nullptr; + break; + } + + case OpKill: + { + if (!block) + throw CompilerError("Trying to end a non-existing block."); + block->terminator = SPIRBlock::Kill; + block = nullptr; + break; + } + + case OpReturn: + { + if (!block) + throw CompilerError("Trying to end a non-existing block."); + block->terminator = SPIRBlock::Return; + block = nullptr; + break; + } + + case OpReturnValue: + { + if (!block) + throw CompilerError("Trying to end a non-existing block."); + block->terminator = SPIRBlock::Return; + block->return_value = ops[0]; + block = nullptr; + break; + } + + case OpUnreachable: + { + if (!block) + throw CompilerError("Trying to end a non-existing block."); + block->terminator = SPIRBlock::Unreachable; + block = nullptr; + break; + } + + case OpSelectionMerge: + { + if (!block) + throw CompilerError("Trying to modify a non-existing block."); + + block->next_block = ops[0]; + block->merge = SPIRBlock::MergeSelection; + selection_merge_target.insert(block->next_block); + break; + } + + case OpLoopMerge: + { + if (!block) + throw CompilerError("Trying to modify a non-existing block."); + + block->merge_block = ops[0]; + block->continue_block = ops[1]; + block->merge = SPIRBlock::MergeLoop; + + loop_block.insert(block->self); + loop_merge_target.insert(block->merge_block); + continue_block.insert(block->continue_block); + break; + } + + // Actual opcodes. + default: + { + if (!block) + throw CompilerError("Currently no block to insert opcode."); + + block->ops.push_back(i); + break; + } + } +} + +bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const +{ + // Tried and failed. + if (block.disable_block_optimization || block.complex_continue) + return false; + + if (method == SPIRBlock::MergeToSelectForLoop) + { + // Try to detect common for loop pattern + // which the code backend can use to create cleaner code. + // for(;;) { if (cond) { some_body; } else { break; } } + // is the pattern we're looking for. + bool ret = + block.terminator == SPIRBlock::Select && + block.merge == SPIRBlock::MergeLoop && + block.true_block != block.merge_block && + block.true_block != block.self && + block.false_block == block.merge_block; + + // If we have OpPhi which depends on branches which came from our own block, + // we need to flush phi variables in else block instead of a trivial break, + // so we cannot assume this is a for loop candidate. + if (ret) + { + for (auto &phi : block.phi_variables) + if (phi.parent == block.self) + return false; + + auto *merge = maybe_get(block.merge_block); + if (merge) + for (auto &phi : merge->phi_variables) + if (phi.parent == block.self) + return false; + } + return ret; + } + else if (method == SPIRBlock::MergeToDirectForLoop) + { + // Empty loop header that just sets up merge target + // and branches to loop body. + bool ret = + block.terminator == SPIRBlock::Direct && + block.merge == SPIRBlock::MergeLoop && + block.ops.empty(); + + if (!ret) + return false; + + auto &child = get(block.next_block); + ret = + child.terminator == SPIRBlock::Select && + child.merge == SPIRBlock::MergeNone && + child.false_block == block.merge_block && + child.true_block != block.merge_block && + child.true_block != block.self; + + // If we have OpPhi which depends on branches which came from our own block, + // we need to flush phi variables in else block instead of a trivial break, + // so we cannot assume this is a for loop candidate. + if (ret) + { + for (auto &phi : block.phi_variables) + if (phi.parent == block.self || phi.parent == child.self) + return false; + + for (auto &phi : child.phi_variables) + if (phi.parent == block.self) + return false; + + auto *merge = maybe_get(block.merge_block); + if (merge) + for (auto &phi : merge->phi_variables) + if (phi.parent == block.self || phi.parent == child.false_block) + return false; + } + + return ret; + } + else + return false; +} + +bool Compiler::block_is_outside_flow_control_from_block(const SPIRBlock &from, const SPIRBlock &to) +{ + auto *start = &from; + + if (start->self == to.self) + return true; + + // Break cycles. + if (is_continue(start->self)) + return false; + + // If our select block doesn't merge, we must break or continue in these blocks, + // so if continues occur branchless within these blocks, consider them branchless as well. + // This is typically used for loop control. + if (start->terminator == SPIRBlock::Select && + start->merge == SPIRBlock::MergeNone && + (block_is_outside_flow_control_from_block(get(start->true_block), to) || + block_is_outside_flow_control_from_block(get(start->false_block), to))) + { + return true; + } + else if (start->merge_block && + block_is_outside_flow_control_from_block(get(start->merge_block), to)) + { + return true; + } + else if (start->next_block && + block_is_outside_flow_control_from_block(get(start->next_block), to)) + { + return true; + } + else + return false; +} + +bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const +{ + if (!execution_is_branchless(from, to)) + return false; + + auto *start = &from; + for (;;) + { + if (start->self == to.self) + return true; + + if (!start->ops.empty()) + return false; + + start = &get(start->next_block); + } +} + +bool Compiler::execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const +{ + auto *start = &from; + for (;;) + { + if (start->self == to.self) + return true; + + if (start->terminator == SPIRBlock::Direct && start->merge == SPIRBlock::MergeNone) + start = &get(start->next_block); + else + return false; + } +} + +SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &continue_block) const +{ + auto &dominator = get(continue_block.loop_dominator); + + // The block was deemed too complex during code emit, pick conservative fallback paths. + if (continue_block.complex_continue) + return SPIRBlock::ComplexLoop; + + if (execution_is_noop(continue_block, dominator)) + return SPIRBlock::WhileLoop; + else if (execution_is_branchless(continue_block, dominator)) + return SPIRBlock::ForLoop; + else + { + if (continue_block.merge == SPIRBlock::MergeNone && + continue_block.terminator == SPIRBlock::Select && + continue_block.true_block == dominator.self && + continue_block.false_block == dominator.merge_block) + { + return SPIRBlock::DoWhileLoop; + } + else + return SPIRBlock::ComplexLoop; + } +} + +bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const +{ + // Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks, + // but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing + // inside dead blocks ... + for (auto &i : block.ops) + { + auto ops = stream(i.offset); + auto op = static_cast(i.op); + + if (!handler.handle(op, ops, i.length)) + return false; + + uint32_t func = ops[2]; + if (op == OpFunctionCall && + !traverse_all_reachable_opcodes(get(func), handler)) + return false; + } + + return true; +} + +bool Compiler::traverse_all_reachable_opcodes(const SPIRFunction &func, OpcodeHandler &handler) const +{ + for (auto block : func.blocks) + if (!traverse_all_reachable_opcodes(get(block), handler)) + return false; + + return true; +} + +uint32_t Compiler::type_struct_member_offset(const SPIRType &type, uint32_t index) const +{ + // Decoration must be set in valid SPIR-V, otherwise throw. + auto &dec = meta[type.self].members.at(index); + if (dec.decoration_flags & (1ull << DecorationOffset)) + return dec.offset; + else + throw CompilerError("Struct member does not have Offset set."); +} + +uint32_t Compiler::type_struct_member_array_stride(const SPIRType &type, uint32_t index) const +{ + // Decoration must be set in valid SPIR-V, otherwise throw. + // ArrayStride is part of the array type not OpMemberDecorate. + auto &dec = meta[type.member_types[index]].decoration; + if (dec.decoration_flags & (1ull << DecorationArrayStride)) + return dec.array_stride; + else + throw CompilerError("Struct member does not have ArrayStride set."); +} + +size_t Compiler::get_declared_struct_size(const SPIRType &type) const +{ + uint32_t last = uint32_t(type.member_types.size() - 1); + size_t offset = type_struct_member_offset(type, last); + size_t size = get_declared_struct_member_size(type, last); + return offset + size; +} + +size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const +{ + auto flags = get_member_decoration_mask(struct_type.self, index); + auto &type = get(struct_type.member_types[index]); + + if (type.basetype != SPIRType::Struct) + { + switch (type.basetype) + { + case SPIRType::Unknown: + case SPIRType::Void: + case SPIRType::Bool: // Bools are purely logical, and cannot be used for externally visible types. + case SPIRType::AtomicCounter: + case SPIRType::Image: + case SPIRType::SampledImage: + case SPIRType::Sampler: + throw CompilerError("Querying size for object with opaque size.\n"); + + default: + break; + } + + size_t component_size = type.width / 8; + unsigned vecsize = type.vecsize; + unsigned columns = type.columns; + + if (type.array.empty()) + { + // Vectors. + if (columns == 1) + return vecsize * component_size; + else + { + // Per SPIR-V spec, matrices must be tightly packed and aligned up for vec3 accesses. + if ((flags & (1ull << DecorationRowMajor)) && columns == 3) + columns = 4; + else if ((flags & (1ull << DecorationColMajor)) && vecsize == 3) + vecsize = 4; + + return vecsize * columns * component_size; + } + } + else + { + // For arrays, we can use ArrayStride to get an easy check. + return type_struct_member_array_stride(struct_type, index) * type.array.back(); + } + } + else + { + // Recurse. + uint32_t last = uint32_t(struct_type.member_types.size() - 1); + uint32_t offset = type_struct_member_offset(struct_type, last); + size_t size = get_declared_struct_size(get(struct_type.member_types.back())); + return offset + size; + } +} + +bool Compiler::BufferAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + if (opcode != OpAccessChain && opcode != OpInBoundsAccessChain) + return true; + + // Invalid SPIR-V. + if (length < 4) + return false; + + if (args[2] != id) + return true; + + // Don't bother traversing the entire access chain tree yet. + // If we access a struct member, assume we access the entire member. + uint32_t index = compiler.get(args[3]).scalar(); + + // Seen this index already. + if (seen.find(index) != end(seen)) + return true; + seen.insert(index); + + auto &type = compiler.expression_type(id); + uint32_t offset = compiler.type_struct_member_offset(type, index); + + size_t range; + // If we have another member in the struct, deduce the range by looking at the next member. + // This is okay since structs in SPIR-V can have padding, but Offset decoration must be + // monotonically increasing. + // Of course, this doesn't take into account if the SPIR-V for some reason decided to add + // very large amounts of padding, but that's not really a big deal. + if (index + 1 < type.member_types.size()) + { + range = compiler.type_struct_member_offset(type, index + 1) - offset; + } + else + { + // No padding, so just deduce it from the size of the member directly. + range = compiler.get_declared_struct_member_size(type, index); + } + + ranges.push_back({ index, offset, range }); + return true; +} + +std::vector Compiler::get_active_buffer_ranges(unsigned id) const +{ + std::vector ranges; + BufferAccessHandler handler(*this, ranges, id); + traverse_all_reachable_opcodes(get(execution.entry_point), handler); + return ranges; +} + diff --git a/spir2cross.hpp b/spir2cross.hpp new file mode 100644 index 0000000..c7d0e02 --- /dev/null +++ b/spir2cross.hpp @@ -0,0 +1,345 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIR2CROSS_HPP +#define SPIR2CROSS_HPP + +#include "spirv.hpp" +#include +#include +#include +#include +#include +#include +#include + +#include "spir2common.hpp" + +namespace spir2cross +{ + struct Resource + { + // Resources are identified with their SPIR-V ID. + // This is the ID of the OpVariable. + uint32_t id; + + // The type of the declared resource. + uint32_t type_id; + + // The declared name (OpName) of the resource. + // For Buffer blocks, the name actually reflects the externally + // visible Block name. + // + // This name can be retrieved again by using either + // get_name(id) or get_name(type_id) depending if it's a buffer block or not. + // + // This name can be an empty string in which case get_fallback_name(id) can be + // used which obtains a suitable fallback identifier for an ID. + std::string name; + }; + + struct ShaderResources + { + std::vector uniform_buffers; + std::vector storage_buffers; + std::vector stage_inputs; + std::vector stage_outputs; + std::vector subpass_inputs; + std::vector storage_images; + std::vector sampled_images; + std::vector atomic_counters; + + // There can only be one push constant block, + // but keep the vector in case this restriction is lifted in the future. + std::vector push_constant_buffers; + }; + + struct BufferRange + { + unsigned index; + size_t offset; + size_t range; + }; + + class Compiler + { + public: + // The constructor takes a buffer of SPIR-V words and parses it. + Compiler(std::vector ir); + + // After parsing, API users can modify the SPIR-V via reflection and call this + // to disassemble the SPIR-V into the desired langauage. + // Sub-classes actually implement this. + virtual std::string compile(); + + // Gets the identifier (OpName) of an ID. If not defined, an empty string will be returned. + const std::string& get_name(uint32_t id) const; + + // Applies a decoration to an ID. Effectively injects OpDecorate. + void set_decoration(uint32_t id, spv::Decoration decoration, uint32_t argument = 0); + + // Overrides the identifier OpName of an ID. + // Identifiers beginning with underscores or identifiers which contain double underscores + // are reserved by the implementation. + void set_name(uint32_t id, const std::string& name); + + // Gets a bitmask for the decorations which are applied to ID. + // I.e. (1ull << spv::DecorationFoo) | (1ull << spv::DecorationBar) + uint64_t get_decoration_mask(uint32_t id) const; + + // Gets the value for decorations which take arguments. + // If decoration doesn't exist or decoration is not recognized, + // 0 will be returned. + uint32_t get_decoration(uint32_t id, spv::Decoration decoration) const; + + // Removes the decoration for a an ID. + void unset_decoration(uint32_t id, spv::Decoration decoration); + + // Gets the SPIR-V associated with ID. + // Mostly used with Resource::type_id to parse the underlying type of a resource. + const SPIRType& get_type(uint32_t id) const; + + // Gets the underlying storage class for an OpVariable. + spv::StorageClass get_storage_class(uint32_t id) const; + + // If get_name() is an empty string, get the fallback name which will be used + // instead in the disassembled source. + virtual const std::string get_fallback_name(uint32_t id) const + { + return join("_", id); + } + + // Given an OpTypeStruct in ID, obtain the identifier for member number "index". + // This may be an empty string. + const std::string& get_member_name(uint32_t id, uint32_t index) const; + + // Given an OpTypeStruct in ID, obtain the OpMemberDecoration for member number "index". + uint32_t get_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const; + + // Sets the member identifier for OpTypeStruct ID, member number "index". + void set_member_name(uint32_t id, uint32_t index, const std::string& name); + + // Gets the decoration mask for a member of a struct, similar to get_decoration_mask. + uint64_t get_member_decoration_mask(uint32_t id, uint32_t index) const; + + // Similar to set_decoration, but for struct members. + void set_member_decoration(uint32_t id, uint32_t index, + spv::Decoration decoration, uint32_t argument = 0); + + // Unsets a member decoration, similar to unset_decoration. + void unset_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration); + + // Gets the fallback name for a member, similar to get_fallback_name. + virtual const std::string get_fallback_member_name(uint32_t index) const + { + return join("_", index); + } + + // Returns a vector of which members of a struct are potentially in use by a + // SPIR-V shader. The granularity of this analysis is per-member of a struct. + // This can be used for Buffer (UBO), BufferBlock (SSBO) and PushConstant blocks. + // ID is the Resource::id obtained from get_shader_resources(). + std::vector get_active_buffer_ranges(unsigned id) const; + + // Returns the effective size of a buffer block. + size_t get_declared_struct_size(const SPIRType &struct_type) const; + + // Legacy GLSL compatibility method. + // Takes a variable with a block interface and flattens it into a T array[N]; array instead. + // For this to work, all types in the block must not themselves be composites + // (except vectors and matrices), and all types must be the same. + // The name of the uniform will be the same as the interface block name. + void flatten_interface_block(uint32_t id); + + // Query shader resources, use ids with reflection interface to modify or query binding points, etc. + ShaderResources get_shader_resources() const; + + protected: + const uint32_t* stream(uint32_t offset) const + { + if (offset > spirv.size()) + throw CompilerError("Compiler::stream() out of range."); + return &spirv[offset]; + } + std::vector spirv; + + std::vector inst; + std::vector ids; + std::vector meta; + + SPIRFunction *function = nullptr; + SPIRBlock *block = nullptr; + std::vector global_variables; + std::vector aliased_variables; + + // If our IDs are out of range here as part of opcodes, throw instead of + // undefined behavior. + template + T& set(uint32_t id, P&&... args) + { + auto &var = variant_set(ids.at(id), std::forward

(args)...); + var.self = id; + return var; + } + + template + T& get(uint32_t id) + { + return variant_get(ids.at(id)); + } + + template + T* maybe_get(uint32_t id) + { + if (ids.at(id).get_type() == T::type) + return &get(id); + else + return nullptr; + } + + template + const T& get(uint32_t id) const + { + return variant_get(ids.at(id)); + } + + template + const T* maybe_get(uint32_t id) const + { + if (ids.at(id).get_type() == T::type) + return &get(id); + else + return nullptr; + } + + struct + { + uint64_t flags = 0; + spv::ExecutionModel model; + uint32_t entry_point = 0; + struct + { + uint32_t x = 0, y = 0, z = 0; + } workgroup_size; + uint32_t invocations = 0; + uint32_t output_vertices = 0; + } execution; + + struct + { + uint32_t version = 0; + bool es = false; + bool known = false; + } source; + + std::unordered_set loop_block; + std::unordered_set continue_block; + std::unordered_set loop_merge_target; + std::unordered_set selection_merge_target; + std::unordered_set multiselect_merge_target; + + std::string to_name(uint32_t id); + bool is_builtin_variable(const SPIRVariable &var) const; + bool is_immutable(uint32_t id) const; + bool is_member_builtin(const SPIRType &type, uint32_t index, spv::BuiltIn *builtin) const; + const SPIRType& expression_type(uint32_t id) const; + bool expression_is_lvalue(uint32_t id) const; + bool variable_storage_is_aliased(const SPIRVariable &var); + SPIRVariable* maybe_get_backing_variable(uint32_t chain); + + void register_read(uint32_t expr, uint32_t chain, bool forwarded); + void register_write(uint32_t chain); + + inline bool is_continue(uint32_t next) const + { + return continue_block.find(next) != end(continue_block); + } + + inline bool is_break(uint32_t next) const + { + return loop_merge_target.find(next) != end(loop_merge_target) || + multiselect_merge_target.find(next) != end(multiselect_merge_target); + } + + inline bool is_conditional(uint32_t next) const + { + return selection_merge_target.find(next) != end(selection_merge_target) && + multiselect_merge_target.find(next) == end(multiselect_merge_target); + } + + // Dependency tracking for temporaries read from variables. + void flush_dependees(SPIRVariable &var); + void flush_all_active_variables(); + void flush_all_atomic_capable_variables(); + void flush_all_aliased_variables(); + void register_global_read_dependencies(const SPIRBlock &func, uint32_t id); + void register_global_read_dependencies(const SPIRFunction &func, uint32_t id); + std::unordered_set invalid_expressions; + + void update_name_cache(std::unordered_set &cache, std::string &name); + std::unordered_set global_struct_cache; + + bool function_is_pure(const SPIRFunction &func); + bool block_is_pure(const SPIRBlock &block); + bool block_is_outside_flow_control_from_block(const SPIRBlock &from, const SPIRBlock &to); + + bool execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const; + bool execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const; + SPIRBlock::ContinueBlockType continue_block_type(const SPIRBlock &continue_block) const; + + bool force_recompile = false; + + uint32_t type_struct_member_offset(const SPIRType &type, uint32_t index) const; + uint32_t type_struct_member_array_stride(const SPIRType &type, uint32_t index) const; + + bool block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const; + + private: + void parse(); + void parse(const Instruction &i); + + // Used internally to implement various traversals for queries. + struct OpcodeHandler + { + virtual ~OpcodeHandler() = default; + + // Return true if traversal should continue. + // If false, traversal will end immediately. + virtual bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) = 0; + }; + + struct BufferAccessHandler : OpcodeHandler + { + BufferAccessHandler(const Compiler &compiler, std::vector &ranges, unsigned id) + : compiler(compiler), ranges(ranges), id(id) {} + + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + + const Compiler &compiler; + std::vector &ranges; + uint32_t id; + + std::unordered_set seen; + }; + + bool traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const; + bool traverse_all_reachable_opcodes(const SPIRFunction &block, OpcodeHandler &handler) const; + + size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const; + }; +} + +#endif diff --git a/spir2glsl.cpp b/spir2glsl.cpp new file mode 100644 index 0000000..926d062 --- /dev/null +++ b/spir2glsl.cpp @@ -0,0 +1,4225 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spir2glsl.hpp" +#include "GLSL.std.450.h" +#include +#include + +using namespace spv; +using namespace spir2cross; +using namespace std; + +static const char* to_pls_layout(PlsFormat format) +{ + switch (format) + { + case PlsR11FG11FB10F: return "layout(r11f_g11f_b10f) "; + case PlsR32F: return "layout(r32f) "; + case PlsRG16F: return "layout(rg16f) "; + case PlsRGB10A2: return "layout(rgb10_a2) "; + case PlsRGBA8: return "layout(rgba8) "; + case PlsRG16: return "layout(rg16) "; + case PlsRGBA8I: return "layout(rgba8i)" ; + case PlsRG16I: return "layout(rg16i) "; + case PlsRGB10A2UI: return "layout(rgb10_a2ui) "; + case PlsRGBA8UI: return "layout(rgba8ui) "; + case PlsRG16UI: return "layout(rg16ui) "; + case PlsR32UI: return "layout(r32ui) "; + default: return ""; + } +} + +static SPIRType::BaseType pls_format_to_basetype(PlsFormat format) +{ + switch (format) + { + default: + case PlsR11FG11FB10F: + case PlsR32F: + case PlsRG16F: + case PlsRGB10A2: + case PlsRGBA8: + case PlsRG16: + return SPIRType::Float; + + case PlsRGBA8I: + case PlsRG16I: + return SPIRType::Int; + + case PlsRGB10A2UI: + case PlsRGBA8UI: + case PlsRG16UI: + case PlsR32UI: + return SPIRType::UInt; + } +} + +static uint32_t pls_format_to_components(PlsFormat format) +{ + switch (format) + { + default: + case PlsR32F: + case PlsR32UI: + return 1; + + case PlsRG16F: + case PlsRG16: + case PlsRG16UI: + case PlsRG16I: + return 2; + + case PlsR11FG11FB10F: + return 3; + + case PlsRGB10A2: + case PlsRGBA8: + case PlsRGBA8I: + case PlsRGB10A2UI: + case PlsRGBA8UI: + return 4; + } +} + +void CompilerGLSL::reset() +{ + // We do some speculative optimizations which should pretty much always work out, + // but just in case the SPIR-V is rather weird, recompile until it's happy. + // This typically only means one extra pass. + force_recompile = false; + + // Clear invalid expression tracking. + invalid_expressions.clear(); + function = nullptr; + + // Clear temporary usage tracking. + expression_usage_counts.clear(); + forwarded_temporaries.clear(); + + // Clear identifier caches + global_struct_cache.clear(); + + for (auto &id : ids) + { + // Clear unflushed dependees. + if (id.get_type() == TypeVariable) + id.get().dependees.clear(); + // And remove all expressions. + else if (id.get_type() == TypeExpression) + id.reset(); + // Reset active state for all functions. + else if (id.get_type() == TypeFunction) + { + id.get().active = false; + id.get().flush_undeclared = true; + } + } + + statement_count = 0; + indent = 0; +} + +void CompilerGLSL::remap_pls_variables() +{ + for (auto &input : pls_inputs) + { + auto &var = get(input.id); + + bool input_is_target = false; + if (var.storage == StorageClassUniformConstant) + { + auto &type = get(var.basetype); + input_is_target = type.image.dim == DimSubpassData; + } + + if (var.storage != StorageClassInput && !input_is_target) + throw CompilerError("Can only use in and target variables for PLS inputs."); + var.remapped_variable = true; + } + + for (auto &output : pls_outputs) + { + auto &var = get(output.id); + if (var.storage != StorageClassOutput) + throw CompilerError("Can only use out variables for PLS outputs."); + var.remapped_variable = true; + } +} + +string CompilerGLSL::compile() +{ + uint32_t pass_count = 0; + do + { + if (pass_count >= 3) + throw CompilerError("Over 3 compilation loops detected. Must be a bug!"); + + reset(); + + // Move constructor for this type is broken on GCC 4.9 ... + buffer = unique_ptr(new ostringstream()); + + emit_header(); + emit_resources(); + + emit_function(get(execution.entry_point), 0); + + pass_count++; + } while (force_recompile); + + return buffer->str(); +} + +void CompilerGLSL::emit_header() +{ + statement("#version ", options.version, options.es && options.version > 100 ? " es" : ""); + + // Needed for binding = # on UBOs, etc. + if (!options.es && options.version < 420) + { + statement("#ifdef GL_ARB_shading_language_420pack"); + statement("#extension GL_ARB_shading_language_420pack : require"); + statement("#endif"); + } + + for (auto &ext : forced_extensions) + statement("#extension ", ext, " : require"); + + if (!pls_inputs.empty() || !pls_outputs.empty()) + statement("#extension GL_EXT_shader_pixel_local_storage : require"); + + vector inputs; + vector outputs; + + switch (execution.model) + { + case ExecutionModelGeometry: + if (options.es && options.version < 320) + statement("#extension GL_EXT_geometry_shader : require"); + if (!options.es && options.version < 320) + statement("#extension GL_ARB_geometry_shader4 : require"); + outputs.push_back(join("max_vertices = ", execution.output_vertices)); + if (execution.flags & (1ull << ExecutionModeInvocations)) + inputs.push_back(join("invocations = ", execution.invocations)); + if (execution.flags & (1ull << ExecutionModeInputPoints)) + inputs.push_back("points"); + if (execution.flags & (1ull << ExecutionModeInputLines)) + inputs.push_back("lines"); + if (execution.flags & (1ull << ExecutionModeInputLinesAdjacency)) + inputs.push_back("lines_adjacency"); + if (execution.flags & (1ull << ExecutionModeTriangles)) + inputs.push_back("triangles"); + if (execution.flags & (1ull << ExecutionModeInputTrianglesAdjacency)) + inputs.push_back("triangles_adjacency"); + if (execution.flags & (1ull << ExecutionModeOutputTriangleStrip)) + outputs.push_back("triangle_strip"); + if (execution.flags & (1ull << ExecutionModeOutputPoints)) + outputs.push_back("points"); + if (execution.flags & (1ull << ExecutionModeOutputLineStrip)) + outputs.push_back("line_strip"); + break; + + case ExecutionModelTessellationControl: + if (options.es && options.version < 320) + statement("#extension GL_EXT_tessellation_shader : require"); + if (!options.es && options.version < 400) + statement("#extension GL_ARB_tessellation_shader : require"); + if (execution.flags & (1ull << ExecutionModeOutputVertices)) + outputs.push_back(join("vertices = ", execution.output_vertices)); + break; + + case ExecutionModelTessellationEvaluation: + if (options.es && options.version < 320) + statement("#extension GL_EXT_tessellation_shader : require"); + if (!options.es && options.version < 400) + statement("#extension GL_ARB_tessellation_shader : require"); + if (execution.flags & (1ull << ExecutionModeQuads)) + inputs.push_back("quads"); + if (execution.flags & (1ull << ExecutionModeIsolines)) + inputs.push_back("isolines"); + if (execution.flags & (1ull << ExecutionModePointMode)) + inputs.push_back("point_mode"); + if (execution.flags & (1ull << ExecutionModeVertexOrderCw)) + inputs.push_back("cw"); + if (execution.flags & (1ull << ExecutionModeVertexOrderCcw)) + inputs.push_back("ccw"); + if (execution.flags & (1ull << ExecutionModeSpacingFractionalEven)) + inputs.push_back("fractional_even_spacing"); + if (execution.flags & (1ull << ExecutionModeSpacingFractionalOdd)) + inputs.push_back("fractional_odd_spacing"); + if (execution.flags & (1ull << ExecutionModeSpacingEqual)) + inputs.push_back("equal_spacing"); + break; + + case ExecutionModelGLCompute: + if (!options.es && options.version < 430) + statement("#extension GL_ARB_compute_shader : require"); + if (options.es && options.version < 310) + throw CompilerError("At least ESSL 3.10 required for compute shaders."); + inputs.push_back(join("local_size_x = ", execution.workgroup_size.x)); + inputs.push_back(join("local_size_y = ", execution.workgroup_size.y)); + inputs.push_back(join("local_size_z = ", execution.workgroup_size.z)); + break; + + case ExecutionModelFragment: + if (options.es) + { + switch (options.fragment.default_float_precision) + { + case Options::Lowp: + statement("precision lowp float;"); + break; + + case Options::Mediump: + statement("precision mediump float;"); + break; + + case Options::Highp: + statement("precision highp float;"); + break; + + default: + break; + } + + switch (options.fragment.default_int_precision) + { + case Options::Lowp: + statement("precision lowp int;"); + break; + + case Options::Mediump: + statement("precision mediump int;"); + break; + + case Options::Highp: + statement("precision highp int;"); + break; + + default: + break; + } + } + + if (execution.flags & (1ull << ExecutionModeEarlyFragmentTests)) + inputs.push_back("early_fragment_tests"); + if (execution.flags & (1ull << ExecutionModeDepthGreater)) + inputs.push_back("depth_greater"); + if (execution.flags & (1ull << ExecutionModeDepthLess)) + inputs.push_back("depth_less"); + + break; + + default: + break; + } + + if (!inputs.empty()) + statement("layout(", merge(inputs), ") in;"); + if (!outputs.empty()) + statement("layout(", merge(outputs), ") out;"); + + statement(""); +} + +void CompilerGLSL::emit_struct(const SPIRType &type) +{ + auto name = type_to_glsl(type); + + // Struct types can be stamped out multiple times + // with just different offsets, matrix layouts, etc ... + // Type-punning with these types is legal, which complicates things + // when we are storing struct and array types in an SSBO for example. + // For now, detect this duplication via OpName, but ideally we should + // find proper aliases by inspecting the actual type. + if (global_struct_cache.find(name) != end(global_struct_cache)) + return; + update_name_cache(global_struct_cache, name); + + statement("struct ", name); + begin_scope(); + + uint32_t i = 0; + bool emitted = false; + for (auto &member : type.member_types) + { + auto &membertype = get(member); + statement(member_decl(type, membertype, i), ";"); + i++; + emitted = true; + } + end_scope_decl(); + + if (emitted) + statement(""); +} + +uint64_t CompilerGLSL::combined_decoration_for_member(const SPIRType &type, uint32_t index) +{ + uint64_t flags = 0; + auto &memb = meta[type.self].members; + if (index >= memb.size()) + return 0; + auto &dec = memb[index]; + + // If our type is a sturct, traverse all the members as well recursively. + flags |= dec.decoration_flags; + for (uint32_t i = 0; i < type.member_types.size(); i++) + flags |= combined_decoration_for_member(get(type.member_types[i]), i); + + return flags; +} + +string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index) +{ + bool is_block = (meta[type.self].decoration.decoration_flags & + ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0; + if (!is_block) + return ""; + + auto &memb = meta[type.self].members; + if (index >= memb.size()) + return 0; + auto &dec = memb[index]; + + vector attr; + + // We can only apply layouts on members in block interfaces. + // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly. + // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct + // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL. + // + // We would like to go from (SPIR-V style): + // + // struct Foo { layout(row_major) mat4 matrix; }; + // buffer UBO { Foo foo; }; + // + // to + // + // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations. + // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level. + auto flags = combined_decoration_for_member(type, index); + + if (flags & (1ull << DecorationRowMajor)) + attr.push_back("row_major"); + // We don't emit any global layouts, so column_major is default. + //if (flags & (1ull << DecorationColMajor)) + // attr.push_back("column_major"); + + if (dec.decoration_flags & (1ull << DecorationLocation)) + attr.push_back(join("location = ", dec.location)); + + if (attr.empty()) + return ""; + + string res = "layout("; + res += merge(attr); + res += ") "; + return res; +} + +const char* CompilerGLSL::format_to_glsl(spv::ImageFormat format) +{ + // Only handle GLES 3.1 compliant types for now ... + switch (format) + { + case ImageFormatRgba32f: return "rgba32f"; + case ImageFormatRgba16f: return "rgba16f"; + case ImageFormatR32f: return "r32f"; + case ImageFormatRgba8: return "rgba8"; + case ImageFormatRgba8Snorm: return "rgba8_snorm"; + case ImageFormatRg32f: return "rg32f"; + case ImageFormatRg16f: return "rg16f"; + + case ImageFormatRgba32i: return "rgba32i"; + case ImageFormatRgba16i: return "rgba16i"; + case ImageFormatR32i: return "r32i"; + case ImageFormatRgba8i: return "rgba8i"; + case ImageFormatRg32i: return "rg32i"; + case ImageFormatRg16i: return "rg16i"; + + case ImageFormatRgba32ui: return "rgba32ui"; + case ImageFormatRgba16ui: return "rgba16ui"; + case ImageFormatR32ui: return "r32ui"; + case ImageFormatRgba8ui: return "rgba8ui"; + case ImageFormatRg32ui: return "rg32ui"; + case ImageFormatRg16ui: return "rg16ui"; + + case ImageFormatUnknown: return nullptr; + default: return "UNSUPPORTED"; // TODO: Fill in rest. + } +} + +uint32_t CompilerGLSL::type_to_std430_alignment(const SPIRType &type, uint64_t flags) +{ + // float, int and uint all take 4 bytes. + const uint32_t base_alignment = 4; + + if (type.basetype == SPIRType::Struct) + { + // Rule 9. Structs alignments are maximum alignment of its members. + uint32_t alignment = 0; + for (uint32_t i = 0; i < type.member_types.size(); i++) + { + auto member_flags = meta[type.self].members.at(i).decoration_flags; + alignment = max(alignment, + type_to_std430_alignment(get(type.member_types[i]), member_flags)); + } + + return alignment; + } + else + { + // From 7.6.2.2 in GL 4.5 core spec. + // Rule 1 + if (type.vecsize == 1 && type.columns == 1) + return base_alignment; + + // Rule 2 + if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1) + return type.vecsize * base_alignment; + + // Rule 3 + if (type.vecsize == 3 && type.columns == 1) + return 4 * base_alignment; + + // Rule 4 implied. Alignment does not change in std430. + + // Rule 5. Column-major matrices are stored as arrays of + // vectors. + if ((flags & (1ull << DecorationColMajor)) && type.columns > 1) + { + if (type.vecsize == 3) + return 4 * base_alignment; + else + return type.vecsize * base_alignment; + } + + // Rule 6 implied. + + // Rule 7. + if ((flags & (1ull << DecorationRowMajor)) && type.vecsize > 1) + { + if (type.columns == 3) + return 4 * base_alignment; + else + return type.columns * base_alignment; + } + + // Rule 8 implied. + } + + throw CompilerError("Did not find suitable std430 rule for type. Bogus decorations?"); +} + +uint32_t CompilerGLSL::type_to_std430_array_stride(const SPIRType &type, uint64_t flags) +{ + // Array stride is equal to aligned size of the underlying type. + SPIRType tmp = type; + tmp.array.pop_back(); + uint32_t size = type_to_std430_size(tmp, flags); + uint32_t alignment = type_to_std430_alignment(tmp, flags); + return (size + alignment - 1) & ~(alignment - 1); +} + +uint32_t CompilerGLSL::type_to_std430_size(const SPIRType &type, uint64_t flags) +{ + if (!type.array.empty()) + return type.array.back() * type_to_std430_array_stride(type, flags); + + // float, int and uint all take 4 bytes. + const uint32_t base_alignment = 4; + uint32_t size = 0; + + if (type.basetype == SPIRType::Struct) + { + for (uint32_t i = 0; i < type.member_types.size(); i++) + { + auto member_flags = meta[type.self].members.at(i).decoration_flags; + auto &member_type = get(type.member_types[i]); + uint32_t alignment = type_to_std430_alignment(member_type, member_flags); + size = (size + alignment - 1) & ~(alignment - 1); + size += type_to_std430_size(member_type, member_flags); + } + } + else + { + if (type.columns == 1) + size = type.vecsize * base_alignment; + + if ((flags & (1ull << DecorationColMajor)) && type.columns > 1) + { + if (type.vecsize == 3) + size = type.columns * 4 * base_alignment; + else + size = type.columns * type.vecsize * base_alignment; + } + + if ((flags & (1ull << DecorationRowMajor)) && type.vecsize > 1) + { + if (type.columns == 3) + size = type.vecsize * 4 * base_alignment; + else + size = type.vecsize * type.columns * base_alignment; + } + } + + return size; +} + +bool CompilerGLSL::ssbo_is_std430_packing(const SPIRType &type) +{ + // This is very tricky and error prone, but try to be exhaustive and correct here. + // SPIR-V doesn't directly say if we're using std430 or std140. + // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters), + // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information. + // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing). + // + // It is almost certain that we're using std430, but it gets tricky with arrays in particular. + // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430. + // + // The only two differences between std140 and std430 are related to padding alignment/array stride + // in arrays and structs. In std140 they take minimum vec4 alignment. + // std430 only removes the vec4 requirement. + + uint32_t offset = 0; + + for (uint32_t i = 0; i < type.member_types.size(); i++) + { + auto &memb_type = get(type.member_types[i]); + auto member_flags = meta[type.self].members.at(i).decoration_flags; + + // Verify alignment rules. + uint32_t std430_alignment = type_to_std430_alignment(memb_type, member_flags); + offset = (offset + std430_alignment - 1) & ~(std430_alignment - 1); + + uint32_t actual_offset = type_struct_member_offset(type, i); + if (actual_offset != offset) // This cannot be std430. + return false; + + // Verify array stride rules. + if (!memb_type.array.empty() && + type_to_std430_array_stride(memb_type, member_flags) != + type_struct_member_array_stride(type, i)) + return false; + + // Verify that sub-structs also follow std430 rules. + if (!memb_type.member_types.empty() && + !ssbo_is_std430_packing(memb_type)) + return false; + + // Bump size. + offset += type_to_std430_size(memb_type, member_flags); + } + + return true; +} + +string CompilerGLSL::layout_for_variable(const SPIRVariable &var) +{ + vector attr; + + auto &dec = meta[var.self].decoration; + auto &type = get(var.basetype); + auto flags = dec.decoration_flags; + auto typeflags = meta[type.self].decoration.decoration_flags; + + if (flags & (1ull << DecorationRowMajor)) + attr.push_back("row_major"); + if (flags & (1ull << DecorationColMajor)) + attr.push_back("column_major"); + if (flags & (1ull << DecorationLocation)) + attr.push_back(join("location = ", dec.location)); + if ((flags & (1ull << DecorationDescriptorSet)) && dec.set != 0) // set = 0 is the default. + attr.push_back(join("set = ", dec.set)); + if (flags & (1ull << DecorationBinding)) + attr.push_back(join("binding = ", dec.binding)); + if (flags & (1ull << DecorationCoherent)) + attr.push_back("coherent"); + if (flags & (1ull << DecorationOffset)) + attr.push_back(join("offset = ", dec.offset)); + + // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430. + // If SPIR-V does not comply with either layout, we cannot really work around it. + if (var.storage == StorageClassUniform && + (typeflags & (1ull << DecorationBlock))) + attr.push_back("std140"); + + if (var.storage == StorageClassUniform && + (typeflags & (1ull << DecorationBufferBlock))) + attr.push_back(ssbo_is_std430_packing(type) ? "std430" : "std140"); + + // For images, the type itself adds a layout qualifer. + if (type.basetype == SPIRType::Image) + { + const char *fmt = format_to_glsl(type.image.format); + if (fmt) + attr.push_back(fmt); + } + + if (attr.empty()) + return ""; + + string res = "layout("; + res += merge(attr); + res += ") "; + return res; +} + +void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var) +{ + // OpenGL has no concept of push constant blocks, implement it as a uniform struct. + auto &type = get(var.basetype); + + auto &flags = meta[var.self].decoration.decoration_flags; + flags &= ~((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)); + +#if 0 + if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet))) + throw CompilerError("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. " + "Remap to location with reflection API first or disable these decorations."); +#endif + + // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily. + // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed. + auto &block_flags = meta[type.self].decoration.decoration_flags; + uint64_t block_flag = block_flags & (1ull << DecorationBlock); + block_flags &= ~block_flag; + + emit_struct(type); + + block_flags |= block_flag; + + emit_uniform(var); + statement(""); +} + +void CompilerGLSL::emit_buffer_block(const SPIRVariable &var) +{ + auto &type = get(var.basetype); + auto ssbo = meta[type.self].decoration.decoration_flags & (1ull << DecorationBufferBlock); + auto buffer_name = to_name(type.self); + statement(layout_for_variable(var) + (ssbo ? "buffer " : "uniform ") + buffer_name); + begin_scope(); + + uint32_t i = 0; + for (auto &member : type.member_types) + { + auto &membertype = get(member); + statement(member_decl(type, membertype, i), ";"); + i++; + } + + end_scope_decl(to_name(var.self) + type_to_array_glsl(type)); + statement(""); +} + +void CompilerGLSL::emit_interface_block(const SPIRVariable &var) +{ + auto &type = get(var.basetype); + + // Either make it plain in/out or in/out blocks depending on what shader is doing ... + bool block = (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock)) != 0; + + const char *qual = nullptr; + if (is_legacy() && execution.model == ExecutionModelVertex) + qual = var.storage == StorageClassInput ? "attribute " : "varying "; + else if (is_legacy() && execution.model == ExecutionModelFragment) + qual = "varying "; // Fragment outputs are renamed so they never hit this case. + else + qual = var.storage == StorageClassInput ? "in " : "out "; + + if (block) + { + statement(layout_for_variable(var), qual, to_name(type.self)); + begin_scope(); + + uint32_t i = 0; + for (auto &member : type.member_types) + { + auto &membertype = get(member); + statement(member_decl(type, membertype, i), ";"); + i++; + } + + end_scope_decl(join(to_name(var.self), type_to_array_glsl(type))); + statement(""); + } + else + { + statement(layout_for_variable(var), qual, variable_decl(var), ";"); + } +} + +void CompilerGLSL::emit_uniform(const SPIRVariable &var) +{ + auto &type = get(var.basetype); + if (type.basetype == SPIRType::Image) + { + if (!options.es && options.version < 420) + require_extension("GL_ARB_shader_image_load_store"); + else if (options.es && options.version < 310) + throw CompilerError("At least ESSL 3.10 required for shader image load store."); + } + + statement(layout_for_variable(var), "uniform ", variable_decl(var), ";"); +} + +void CompilerGLSL::replace_fragment_output(SPIRVariable &var) +{ + auto &m = meta[var.self].decoration; + uint32_t location = 0; + if (m.decoration_flags & (1ull << DecorationLocation)) + location = m.location; + + m.alias = join("gl_FragData[", location, "]"); + var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is. +} + +void CompilerGLSL::replace_fragment_outputs() +{ + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + + if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput) + replace_fragment_output(var); + } + } +} + +string CompilerGLSL::remap_swizzle(uint32_t result_type, uint32_t input_components, uint32_t expr) +{ + auto &out_type = get(result_type); + + if (out_type.vecsize == input_components) + return to_expression(expr); + else if (input_components == 1) + return join(type_to_glsl(out_type), "(", to_expression(expr), ")"); + else + { + auto e = to_expression(expr) + "."; + // Just clamp the swizzle index if we have more outputs than inputs. + for (uint32_t c = 0; c < out_type.vecsize; c++) + e += index_to_swizzle(min(c, input_components - 1)); + if (backend.swizzle_is_function && out_type.vecsize > 1) + e += "()"; + return e; + } +} + +void CompilerGLSL::emit_pls() +{ + if (execution.model != ExecutionModelFragment) + throw CompilerError("Pixel local storage only supported in fragment shaders."); + + if (!options.es) + throw CompilerError("Pixel local storage only supported in OpenGL ES."); + + if (options.version < 300) + throw CompilerError("Pixel local storage only supported in ESSL 3.0 and above."); + + if (!pls_inputs.empty()) + { + statement("__pixel_local_inEXT _PLSIn"); + begin_scope(); + for (auto &input : pls_inputs) + statement(pls_decl(input), ";"); + end_scope_decl(); + statement(""); + } + + if (!pls_outputs.empty()) + { + statement("__pixel_local_outEXT _PLSOut"); + begin_scope(); + for (auto &output : pls_outputs) + statement(pls_decl(output), ";"); + end_scope_decl(); + statement(""); + } +} + +void CompilerGLSL::emit_resources() +{ + // Legacy GL uses gl_FragData[], redeclare all fragment outputs + // with builtins. + if (execution.model == ExecutionModelFragment && is_legacy()) + replace_fragment_outputs(); + + // Emit PLS blocks if we have such variables. + if (!pls_inputs.empty() || !pls_outputs.empty()) + emit_pls(); + + // Output all basic struct types which are not Block or BufferBlock as these are declared inplace + // when such variables are instantiated. + for (auto &id : ids) + { + if (id.get_type() == TypeType) + { + auto &type = id.get(); + if (type.basetype == SPIRType::Struct && + type.array.empty() && + !type.pointer && + (meta[type.self].decoration.decoration_flags & ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) == 0) + { + emit_struct(type); + } + } + } + + // Output UBOs and SSBOs + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + + if (type.pointer && type.storage == StorageClassUniform && + !is_builtin_variable(var) && + (meta[type.self].decoration.decoration_flags & ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock)))) + { + emit_buffer_block(var); + } + } + } + + // Output push constant blocks + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + if (type.pointer && type.storage == StorageClassPushConstant) + emit_push_constant_block(var); + } + } + + bool emitted = false; + + // Output Uniform Constants (values, samplers, images, etc). + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + + if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && + (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter)) + { + emit_uniform(var); + emitted = true; + } + } + } + + if (emitted) + statement(""); + emitted = false; + + // Output in/out interfaces. + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + + if (!is_builtin_variable(var) && + !var.remapped_variable && + type.pointer && + (var.storage == StorageClassInput || var.storage == StorageClassOutput)) + { + emit_interface_block(var); + emitted = true; + } + else if (is_builtin_variable(var)) + { + // For gl_InstanceIndex emulation on GLES, the API user needs to + // supply this uniform. + if (meta[var.self].decoration.builtin_type == BuiltInInstanceIndex) + { + statement("uniform int SPIR2CROSS_BaseInstance;"); + emitted = true; + } + } + } + } + + // Global variables. + for (auto global : global_variables) + { + auto &var = get(global); + if (var.storage != StorageClassOutput) + { + statement(variable_decl(var), ";"); + emitted = true; + } + } + + if (emitted) + statement(""); +} + +string CompilerGLSL::to_expression(uint32_t id) +{ + auto itr = invalid_expressions.find(id); + if (itr != end(invalid_expressions)) + { + auto &expr = get(id); + + // This expression has been invalidated in the past. + // Be careful with this expression next pass ... + // Used for OpCompositeInsert forwarding atm. + expr.used_while_invalidated = true; + + // We tried to read an invalidated expression. + // This means we need another pass at compilation, but next time, do not try to forward + // the variables which caused invalidation to happen in the first place. + for (auto var : expr.invalidated_by) + { + //fprintf(stderr, "Expression %u was invalidated due to variable %u being invalid at read time!\n", id, var); + get(var).forwardable = false; + } + + if (expr.invalidated_by.empty() && expr.loaded_from) + { + //fprintf(stderr, "Expression %u was invalidated due to variable %u being invalid at read time!\n", id, expr.loaded_from); + get(expr.loaded_from).forwardable = false; + } + force_recompile = true; + } + + track_expression_read(id); + + switch (ids[id].get_type()) + { + case TypeExpression: + { + auto &e = get(id); + if (e.base_expression) + return to_expression(e.base_expression) + e.expression; + else + return e.expression; + } + + case TypeConstant: + return constant_expression(get(id)); + + case TypeVariable: + { + auto &var = get(id); + if (var.statically_assigned) + return to_expression(var.static_expression); + else if (var.deferred_declaration) + { + var.deferred_declaration = false; + return variable_decl(var); + } + else + { + auto &dec = meta[var.self].decoration; + if (dec.builtin) + return builtin_to_glsl(dec.builtin_type); + else + return to_name(id); + } + } + + default: + return to_name(id); + } +} + +string CompilerGLSL::constant_expression(const SPIRConstant &c) +{ + if (!c.subconstants.empty()) + { + // Handles Arrays and structures. + string res = type_to_glsl_constructor(get(c.constant_type)) + "("; + for (auto &elem : c.subconstants) + { + res += constant_expression(get(elem)); + if (&elem != &c.subconstants.back()) + res += ", "; + } + res += ")"; + return res; + } + else if (c.columns() == 1) + { + return constant_expression_vector(c, 0); + } + else + { + string res = type_to_glsl(get(c.constant_type)) + "("; + for (uint32_t col = 0; col < c.columns(); col++) + { + res += constant_expression_vector(c, col); + if (col + 1 < c.columns()) + res += ", "; + } + res += ")"; + return res; + } +} + +string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector) +{ + auto type = get(c.constant_type); + type.columns = 1; + + string res; + if (c.vector_size() > 1) + res += type_to_glsl(type) + "("; + + bool splat = c.vector_size() > 1; + if (splat) + { + uint32_t ident = c.scalar(vector, 0); + for (uint32_t i = 1; i < c.vector_size(); i++) + if (ident != c.scalar(vector, i)) + splat = false; + } + + switch (type.basetype) + { + case SPIRType::Float: + if (splat) + { + res += convert_to_string(c.scalar_f32(vector, 0)); + if (backend.float_literal_suffix) + res += "f"; + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + res += convert_to_string(c.scalar_f32(vector, i)); + if (backend.float_literal_suffix) + res += "f"; + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::UInt: + if (splat) + { + res += convert_to_string(c.scalar(vector, 0)); + if (backend.uint32_t_literal_suffix) + res += "u"; + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + res += convert_to_string(c.scalar(vector, i)); + if (backend.uint32_t_literal_suffix) + res += "u"; + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + + case SPIRType::Int: + if (splat) + res += convert_to_string(c.scalar_i32(vector, 0)); + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + res += convert_to_string(c.scalar_i32(vector, i)); + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Bool: + if (splat) + res += c.scalar(vector, 0) ? "true" : "false"; + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + res += c.scalar(vector, i) ? "true" : "false"; + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + default: + throw CompilerError("Invalid constant expression basetype."); + } + + if (c.vector_size() > 1) + res += ")"; + + return res; +} + +string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) +{ + auto &type = get(result_type); + auto flags = meta[result_id].decoration.decoration_flags; + + // If we're declaring temporaries inside continue blocks, + // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. + if (current_continue_block) + { + auto &header = get(current_continue_block->loop_dominator); + if (find_if(begin(header.declare_temporary), end(header.declare_temporary), + [result_type, result_id](const pair &tmp) { + return tmp.first == result_type && tmp.second == result_id; + }) == end(header.declare_temporary)) + { + header.declare_temporary.emplace_back(result_type, result_id); + force_recompile = true; + } + + return join(to_name(result_id), " = "); + } + else + { + // The result_id has not been made into an expression yet, so use flags interface. + return join(flags_to_precision_qualifiers_glsl(type, flags), + type_to_glsl(type), " ", to_name(result_id), type_to_array_glsl(type), " = "); + } +} + +bool CompilerGLSL::expression_is_forwarded(uint32_t id) +{ + return forwarded_temporaries.find(id) != end(forwarded_temporaries); +} + +SPIRExpression& CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, + bool forwarding, bool extra_parens, bool suppress_usage_tracking) +{ + if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries))) + { + // Just forward it without temporary. + // If the forward is trivial, we do not force flushing to temporary for this expression. + if (!suppress_usage_tracking) + forwarded_temporaries.insert(result_id); + + if (extra_parens) + return set(result_id, join("(", rhs, ")"), result_type, true); + else + return set(result_id, rhs, result_type, true); + } + else + { + // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are). + statement(declare_temporary(result_type, result_id), rhs, ";"); + return set(result_id, to_name(result_id), result_type, true); + } +} + +void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) +{ + emit_op(result_type, result_id, join(op, to_expression(op0)), should_forward(op0), true); +} + +void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) +{ + emit_op(result_type, result_id, join(bitcast_glsl(result_type, op0), " ", op, " ", bitcast_glsl(result_type, op1)), + should_forward(op0) && should_forward(op1), true); +} + +void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) +{ + emit_op(result_type, result_id, join(op, "(", to_expression(op0), ")"), should_forward(op0), false); +} + +void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) +{ + emit_op(result_type, result_id, join(op, "(", to_expression(op0), ", ", to_expression(op1), ")"), + should_forward(op0) && should_forward(op1), false); +} + +void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op) +{ + emit_op(result_type, result_id, join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(op2), ")"), + should_forward(op0) && should_forward(op1) && should_forward(op2), false); +} + +void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, uint32_t op3, const char *op) +{ + emit_op(result_type, result_id, join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(op2), ", ", to_expression(op3), ")"), + should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3), false); +} + +string CompilerGLSL::legacy_tex_op(const std::string &op, + const SPIRType &imgtype) +{ + const char *type; + switch (imgtype.image.dim) + { + case spv::Dim1D: type = "1D"; break; + case spv::Dim2D: type = "2D"; break; + case spv::Dim3D: type = "3D"; break; + case spv::DimCube: type = "Cube"; break; + case spv::DimBuffer: type = "Buffer"; break; + case spv::DimSubpassData: type = "2D"; break; + default: type = ""; break; + } + + if (op == "texture") + return join("texture", type); + else if (op == "textureLod") + return join("texture", type, "Lod"); + else if (op == "textureProj") + return join("texture", type, "Proj"); + else if (op == "textureProjLod") + return join("texture", type, "ProjLod"); + else + throw CompilerError(join("Unsupported legacy texture op: ", op)); +} + +void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, + uint32_t left, uint32_t right, uint32_t lerp) +{ + auto &lerptype = expression_type(lerp); + auto &restype = get(result_type); + + bool has_boolean_mix = (options.es && options.version >= 310) || + (!options.es && options.version >= 450); + + // Boolean mix not supported on desktop without extension. + // Was added in OpenGL 4.5 with ES 3.1 compat. + if (!has_boolean_mix && lerptype.basetype == SPIRType::Bool) + { + // Could use GL_EXT_shader_integer_mix on desktop at least, + // but Apple doesn't support it. :( + // Just implement it as ternary expressions. + string expr; + if (lerptype.vecsize == 1) + expr = join(to_expression(lerp), " ? ", to_expression(right), " : ", to_expression(left)); + else + { + auto swiz = [this](uint32_t id, uint32_t i) { + return join(to_expression(id), ".", index_to_swizzle(i)); + }; + + expr = type_to_glsl_constructor(restype); + expr += "("; + for (uint32_t i = 0; i < restype.vecsize; i++) + { + expr += swiz(lerp, i); + expr += " ? "; + expr += swiz(right, i); + expr += " : "; + expr += swiz(left, i); + if (i + 1 < restype.vecsize) + expr += ", "; + } + expr += ")"; + } + + emit_op(result_type, id, expr, + should_forward(left) && + should_forward(right) && + should_forward(lerp), + false); + } + else + emit_trinary_func_op(result_type, id, left, right, lerp, "mix"); +} + +void CompilerGLSL::emit_texture_op(const Instruction &i) +{ + auto ops = stream(i.offset); + auto op = static_cast(i.op); + uint32_t length = i.length; + + if (i.offset + length > spirv.size()) + throw CompilerError("Compiler::parse() opcode out of range."); + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t img = ops[2]; + uint32_t coord = ops[3]; + uint32_t dref = 0; + uint32_t comp = 0; + bool gather = false; + bool proj = false; + const uint32_t *opt = nullptr; + + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleDrefExplicitLod: + dref = ops[4]; + opt = &ops[5]; + length -= 5; + break; + + case OpImageSampleProjDrefImplicitLod: + case OpImageSampleProjDrefExplicitLod: + dref = ops[4]; + proj = true; + opt = &ops[5]; + length -= 5; + break; + + case OpImageDrefGather: + dref = ops[4]; + opt = &ops[5]; + gather = true; + length -= 5; + break; + + case OpImageGather: + comp = ops[4]; + opt = &ops[5]; + gather = true; + length -= 5; + break; + + case OpImageSampleProjImplicitLod: + case OpImageSampleProjExplicitLod: + opt = &ops[4]; + length -= 4; + proj = true; + break; + + default: + opt = &ops[4]; + length -= 4; + break; + } + + auto &imgtype = expression_type(img); + uint32_t coord_components = 0; + switch (imgtype.image.dim) + { + case spv::Dim1D: coord_components = 1; break; + case spv::Dim2D: coord_components = 2; break; + case spv::Dim3D: coord_components = 3; break; + case spv::DimCube: coord_components = 3; break; + case spv::DimBuffer: coord_components = 1; break; + default: coord_components = 2; break; + } + + if (proj) + coord_components++; + if (imgtype.image.arrayed) + coord_components++; + + uint32_t bias = 0; + uint32_t lod = 0; + uint32_t grad_x = 0; + uint32_t grad_y = 0; + uint32_t coffset = 0; + uint32_t offset = 0; + uint32_t coffsets = 0; + uint32_t sample = 0; + uint32_t flags = 0; + + if (length) + { + flags = opt[0]; + opt++; + length--; + } + + auto test = [&](uint32_t &v, uint32_t flag) { + if (length && (flags & flag)) + { + v = *opt++; + length--; + } + }; + + test(bias, ImageOperandsBiasMask); + test(lod, ImageOperandsLodMask); + test(grad_x, ImageOperandsGradMask); + test(grad_y, ImageOperandsGradMask); + test(coffset, ImageOperandsConstOffsetMask); + test(offset, ImageOperandsOffsetMask); + test(coffsets, ImageOperandsConstOffsetsMask); + test(sample, ImageOperandsSampleMask); + + string expr; + string texop; + + if (op == OpImageFetch) + texop += "texelFetch"; + else + { + texop += "texture"; + + if (gather) + texop += "Gather"; + if (coffsets) + texop += "Offsets"; + if (proj) + texop += "Proj"; + if (grad_x || grad_y) + texop += "Grad"; + if (lod) + texop += "Lod"; + } + + if (coffset || offset) + texop += "Offset"; + + if (is_legacy()) + texop = legacy_tex_op(texop, imgtype); + + expr += texop; + expr += "("; + expr += to_expression(img); + + bool swizz_func = backend.swizzle_is_function; + auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char* { + if (comps == in_comps) + return ""; + + switch (comps) + { + case 1: return ".x"; + case 2: return swizz_func ? ".xy()" : ".xy"; + case 3: return swizz_func ? ".xyz()" :".xyz"; + default: return ""; + } + }; + + bool forward = should_forward(coord); + + // The IR can give us more components than we need, so chop them off as needed. + auto coord_expr = to_expression(coord) + swizzle(coord_components, expression_type(coord).vecsize); + + // TODO: implement rest ... A bit intensive. + + if (dref) + { + forward = forward && should_forward(dref); + + // SPIR-V splits dref and coordinate. + if (coord_components == 4) // GLSL also splits the arguments in two. + { + expr += ", "; + expr += to_expression(coord); + expr += ", "; + expr += to_expression(dref); + } + else + { + // Create a composite which merges coord/dref into a single vector. + auto type = expression_type(coord); + type.vecsize = coord_components + 1; + expr += ", "; + expr += type_to_glsl_constructor(type); + expr += "("; + expr += coord_expr; + expr += ", "; + expr += to_expression(dref); + expr += ")"; + } + } + else + { + expr += ", "; + expr += coord_expr; + } + + if (grad_x || grad_y) + { + forward = forward && should_forward(grad_x); + forward = forward && should_forward(grad_y); + expr += ", "; + expr += to_expression(grad_x); + expr += ", "; + expr += to_expression(grad_y); + } + + if (lod) + { + forward = forward && should_forward(lod); + expr += ", "; + expr += to_expression(lod); + } + + if (coffset) + { + forward = forward && should_forward(coffset); + expr += ", "; + expr += to_expression(coffset); + } + else if (offset) + { + forward = forward && should_forward(offset); + expr += ", "; + expr += to_expression(offset); + } + + if (bias) + { + forward = forward && should_forward(bias); + expr += ", "; + expr += to_expression(bias); + } + + if (comp) + { + forward = forward && should_forward(comp); + expr += ", "; + expr += to_expression(comp); + } + + expr += ")"; + + emit_op(result_type, id, expr, forward, false); +} + +void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t) +{ + GLSLstd450 op = static_cast(eop); + + switch (op) + { + // FP fiddling + case GLSLstd450Round: + case GLSLstd450RoundEven: + emit_unary_func_op(result_type, id, args[0], "round"); + break; + case GLSLstd450Trunc: + emit_unary_func_op(result_type, id, args[0], "trunc"); + break; + case GLSLstd450SAbs: + case GLSLstd450FAbs: + emit_unary_func_op(result_type, id, args[0], "abs"); + break; + case GLSLstd450SSign: + case GLSLstd450FSign: + emit_unary_func_op(result_type, id, args[0], "sign"); + break; + case GLSLstd450Floor: + emit_unary_func_op(result_type, id, args[0], "floor"); + break; + case GLSLstd450Ceil: + emit_unary_func_op(result_type, id, args[0], "ceil"); + break; + case GLSLstd450Fract: + emit_unary_func_op(result_type, id, args[0], "fract"); + break; + case GLSLstd450Radians: + emit_unary_func_op(result_type, id, args[0], "radians"); + break; + case GLSLstd450Degrees: + emit_unary_func_op(result_type, id, args[0], "degrees"); + break; + case GLSLstd450Fma: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma"); + break; + case GLSLstd450Modf: + register_call_out_argument(args[1]); + forced_temporaries.insert(id); + emit_binary_func_op(result_type, id, args[0], args[1], "modf"); + break; + + // Minmax + case GLSLstd450FMin: + case GLSLstd450UMin: + case GLSLstd450SMin: + emit_binary_func_op(result_type, id, args[0], args[1], "min"); + break; + case GLSLstd450FMax: + case GLSLstd450UMax: + case GLSLstd450SMax: + emit_binary_func_op(result_type, id, args[0], args[1], "max"); + break; + case GLSLstd450FClamp: + case GLSLstd450UClamp: + case GLSLstd450SClamp: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp"); + break; + + // Trig + case GLSLstd450Sin: + emit_unary_func_op(result_type, id, args[0], "sin"); + break; + case GLSLstd450Cos: + emit_unary_func_op(result_type, id, args[0], "cos"); + break; + case GLSLstd450Tan: + emit_unary_func_op(result_type, id, args[0], "tan"); + break; + case GLSLstd450Asin: + emit_unary_func_op(result_type, id, args[0], "asin"); + break; + case GLSLstd450Acos: + emit_unary_func_op(result_type, id, args[0], "acos"); + break; + case GLSLstd450Atan: + emit_unary_func_op(result_type, id, args[0], "atan"); + break; + case GLSLstd450Sinh: + emit_unary_func_op(result_type, id, args[0], "sinh"); + break; + case GLSLstd450Cosh: + emit_unary_func_op(result_type, id, args[0], "cosh"); + break; + case GLSLstd450Tanh: + emit_unary_func_op(result_type, id, args[0], "tanh"); + break; + case GLSLstd450Asinh: + emit_unary_func_op(result_type, id, args[0], "asinh"); + break; + case GLSLstd450Acosh: + emit_unary_func_op(result_type, id, args[0], "acosh"); + break; + case GLSLstd450Atanh: + emit_unary_func_op(result_type, id, args[0], "atanh"); + break; + case GLSLstd450Atan2: + emit_binary_func_op(result_type, id, args[0], args[1], "atan"); + break; + + // Exponentials + case GLSLstd450Pow: + emit_binary_func_op(result_type, id, args[0], args[1], "pow"); + break; + case GLSLstd450Exp: + emit_unary_func_op(result_type, id, args[0], "exp"); + break; + case GLSLstd450Log: + emit_unary_func_op(result_type, id, args[0], "log"); + break; + case GLSLstd450Exp2: + emit_unary_func_op(result_type, id, args[0], "exp2"); + break; + case GLSLstd450Log2: + emit_unary_func_op(result_type, id, args[0], "log2"); + break; + case GLSLstd450Sqrt: + emit_unary_func_op(result_type, id, args[0], "sqrt"); + break; + case GLSLstd450InverseSqrt: + emit_unary_func_op(result_type, id, args[0], "inversesqrt"); + break; + + // Matrix math + case GLSLstd450Determinant: + emit_unary_func_op(result_type, id, args[0], "determinant"); + break; + case GLSLstd450MatrixInverse: + emit_unary_func_op(result_type, id, args[0], "inverse"); + break; + + // Lerping + case GLSLstd450FMix: + case GLSLstd450IMix: + { + emit_mix_op(result_type, id, args[0], args[1], args[2]); + break; + } + case GLSLstd450Step: + emit_binary_func_op(result_type, id, args[0], args[1], "step"); + break; + case GLSLstd450SmoothStep: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep"); + break; + + // Packing + case GLSLstd450Frexp: + register_call_out_argument(args[1]); + forced_temporaries.insert(id); + emit_binary_func_op(result_type, id, args[0], args[1], "frexp"); + break; + case GLSLstd450Ldexp: + emit_binary_func_op(result_type, id, args[0], args[1], "ldexp"); + break; + case GLSLstd450PackSnorm4x8: + emit_unary_func_op(result_type, id, args[0], "packSnorm4x8"); + break; + case GLSLstd450PackUnorm4x8: + emit_unary_func_op(result_type, id, args[0], "packUnorm4x8"); + break; + case GLSLstd450PackSnorm2x16: + emit_unary_func_op(result_type, id, args[0], "packSnorm2x16"); + break; + case GLSLstd450PackUnorm2x16: + emit_unary_func_op(result_type, id, args[0], "packUnorm2x16"); + break; + case GLSLstd450PackHalf2x16: + emit_unary_func_op(result_type, id, args[0], "packHalf2x16"); + break; + case GLSLstd450UnpackSnorm4x8: + emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8"); + break; + case GLSLstd450UnpackUnorm4x8: + emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8"); + break; + case GLSLstd450UnpackSnorm2x16: + emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16"); + break; + case GLSLstd450UnpackUnorm2x16: + emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16"); + break; + case GLSLstd450UnpackHalf2x16: + emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16"); + break; + + // Vector math + case GLSLstd450Length: + emit_unary_func_op(result_type, id, args[0], "length"); + break; + case GLSLstd450Distance: + emit_binary_func_op(result_type, id, args[0], args[1], "distance"); + break; + case GLSLstd450Cross: + emit_binary_func_op(result_type, id, args[0], args[1], "cross"); + break; + case GLSLstd450Normalize: + emit_unary_func_op(result_type, id, args[0], "normalize"); + break; + case GLSLstd450FaceForward: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward"); + break; + case GLSLstd450Reflect: + emit_binary_func_op(result_type, id, args[0], args[1], "reflect"); + break; + case GLSLstd450Refract: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract"); + break; + + // Bit-fiddling + case GLSLstd450FindILsb: + emit_unary_func_op(result_type, id, args[0], "findLSB"); + break; + case GLSLstd450FindSMsb: + case GLSLstd450FindUMsb: + emit_unary_func_op(result_type, id, args[0], "findMSB"); + break; + + // Multisampled varying + case GLSLstd450InterpolateAtCentroid: + emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid"); + break; + case GLSLstd450InterpolateAtSample: + emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample"); + break; + case GLSLstd450InterpolateAtOffset: + emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset"); + break; + + default: + statement("// unimplemented GLSL op ", eop); + break; + } +} + +string CompilerGLSL::bitcast_glsl_op(uint32_t result_type, uint32_t argument) +{ + auto &out_type = get(result_type); + auto &in_type = expression_type(argument); + + if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int) + return type_to_glsl(out_type); + else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float) + return "floatBitsToUint"; + else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::UInt) + return type_to_glsl(out_type); + else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float) + return "floatBitsToInt"; + else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt) + return "uintBitsToFloat"; + else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int) + return "intBitsToFloat"; + else + return ""; +} + +string CompilerGLSL::bitcast_glsl(uint32_t result_type, uint32_t argument) +{ + auto op = bitcast_glsl_op(result_type, argument); + if (op.empty()) + return to_expression(argument); + else + return join(op, "(", to_expression(argument), ")"); +} + +const char* CompilerGLSL::builtin_to_glsl(BuiltIn builtin) +{ + switch (builtin) + { + case BuiltInPosition: return "gl_Position"; + case BuiltInPointSize: return "gl_PointSize"; + case BuiltInVertexId: return "gl_VertexID"; + case BuiltInInstanceId: return "gl_InstanceID"; + case BuiltInVertexIndex: return "gl_VertexID"; // gl_VertexID already has the base offset applied. + case BuiltInInstanceIndex: return "(gl_InstanceID + SPIR2CROSS_BaseInstance)"; // ... but not gl_InstanceID. + case BuiltInPrimitiveId: return "gl_PrimitiveID"; + case BuiltInInvocationId: return "gl_InvocationID"; + case BuiltInLayer: return "gl_Layer"; + case BuiltInTessLevelOuter: return "gl_TessLevelOuter"; + case BuiltInTessLevelInner: return "gl_TessLevelInner"; + case BuiltInTessCoord: return "gl_TessCoord"; + case BuiltInFragCoord: return "gl_FragCoord"; + case BuiltInPointCoord: return "gl_PointCoord"; + case BuiltInFrontFacing: return "gl_FrontFacing"; + case BuiltInFragDepth: return "gl_FragDepth"; + case BuiltInNumWorkgroups: return "gl_NumWorkGroups"; + case BuiltInWorkgroupSize: return "gl_WorkGroupSize"; + case BuiltInWorkgroupId: return "gl_WorkGroupID"; + case BuiltInLocalInvocationId: return "gl_LocalInvocationID"; + case BuiltInGlobalInvocationId: return "gl_GlobalInvocationID"; + case BuiltInLocalInvocationIndex: return "gl_LocalInvocationIndex"; + default: return "gl_???"; + } +} + +const char* CompilerGLSL::index_to_swizzle(uint32_t index) +{ + switch (index) + { + case 0: return "x"; + case 1: return "y"; + case 2: return "z"; + case 3: return "w"; + default: throw CompilerError("Swizzle index out of range"); + } +} + +string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, bool index_is_literal, bool chain_only) +{ + string expr; + if (!chain_only) + expr = to_expression(base); + + const auto *type = &expression_type(base); + + // For resolving array accesses, etc, keep a local copy for poking. + SPIRType temp; + + bool access_chain_is_arrayed = false; + + for (uint32_t i = 0; i < count; i++) + { + uint32_t index = indices[i]; + + // Arrays + if (!type->array.empty()) + { + expr += "["; + if (index_is_literal) + expr += convert_to_string(index); + else + expr += to_expression(index); + expr += "]"; + + // We have to modify the type, so keep a local copy. + if (&temp != type) + temp = *type; + type = &temp; + temp.array.pop_back(); + + access_chain_is_arrayed = true; + } + // For structs, the index refers to a constant, which indexes into the members. + // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. + else if (type->basetype == SPIRType::Struct) + { + if (!index_is_literal) + index = get(index).scalar(); + + if (index >= type->member_types.size()) + throw CompilerError("Member index is out of bounds!"); + + BuiltIn builtin; + if (is_member_builtin(*type, index, &builtin)) + { + // FIXME: We rely here on OpName on gl_in/gl_out to make this work properly. + // To make this properly work by omitting all OpName opcodes, + // we need to infer gl_in or gl_out based on the builtin, and stage. + if (access_chain_is_arrayed) + { + expr += "."; + expr += builtin_to_glsl(builtin); + } + else + expr = builtin_to_glsl(builtin); + } + else + { + expr += "."; + expr += to_member_name(*type, index); + } + type = &get(type->member_types[index]); + } + // Matrix -> Vector + else if (type->columns > 1) + { + expr += "["; + if (index_is_literal) + expr += convert_to_string(index); + else + expr += to_expression(index); + expr += "]"; + + // We have to modify the type, so keep a local copy. + if (&temp != type) + temp = *type; + type = &temp; + temp.columns = 1; + } + // Vector -> Scalar + else if (type->vecsize > 1) + { + if (index_is_literal) + { + expr += "."; + expr += index_to_swizzle(index); + } + else if (ids[index].get_type() == TypeConstant) + { + auto &c = get(index); + expr += "."; + expr += index_to_swizzle(c.scalar()); + } + else + { + expr += "["; + expr += to_expression(index); + expr += "]"; + } + + // We have to modify the type, so keep a local copy. + if (&temp != type) + temp = *type; + type = &temp; + temp.vecsize = 1; + } + else + throw CompilerError("Cannot subdivide a scalar value!"); + } + + return expr; +} + +bool CompilerGLSL::should_forward(uint32_t id) +{ + return is_immutable(id) && + !options.force_temporary; +} + +void CompilerGLSL::track_expression_read(uint32_t id) +{ + // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice. + // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice. + if (expression_is_forwarded(id)) + { + auto &v = expression_usage_counts[id]; + v++; + + if (v >= 2) + { + //if (v == 2) + // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id); + + forced_temporaries.insert(id); + // Force a recompile after this pass to avoid forwarding this variable. + force_recompile = true; + } + } +} + +bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure) +{ + if (forced_temporaries.find(id) != end(forced_temporaries)) + return false; + + for (uint32_t i = 0; i < num_args; i++) + if (!should_forward(args[i])) + return false; + + // We need to forward globals as well. + if (!pure) + { + for (auto global : global_variables) + if (!should_forward(global)) + return false; + for (auto aliased : aliased_variables) + if (!should_forward(aliased)) + return false; + } + + return true; +} + +void CompilerGLSL::register_impure_function_call() +{ + // Impure functions can modify globals and aliased variables, so invalidate them as well. + for (auto global : global_variables) + flush_dependees(get(global)); + for (auto aliased : aliased_variables) + flush_dependees(get(aliased)); +} + +void CompilerGLSL::register_call_out_argument(uint32_t id) +{ + register_write(id); + + auto *var = maybe_get(id); + if (var) + flush_variable_declaration(var->self); +} + +void CompilerGLSL::flush_variable_declaration(uint32_t id) +{ + auto *var = maybe_get(id); + if (var && var->deferred_declaration) + { + statement(variable_decl(*var), ";"); + var->deferred_declaration = false; + } +} + +bool CompilerGLSL::remove_duplicate_swizzle(string &op) +{ + auto pos = op.find_last_of('.'); + if (pos == string::npos || pos == 0) + return false; + + string final_swiz = op.substr(pos + 1, string::npos); + + if (backend.swizzle_is_function) + { + if (final_swiz.size() < 2) + return false; + + if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()") + final_swiz.erase(final_swiz.size() - 2, string::npos); + else + return false; + } + + // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. + // If so, and previous swizzle is of same length, + // we can drop the final swizzle altogether. + for (uint32_t i = 0; i < final_swiz.size(); i++) + { + static const char expected[] = { 'x', 'y', 'z', 'w' }; + if (i >= 4 || final_swiz[i] != expected[i]) + return false; + } + + auto prevpos = op.find_last_of('.', pos - 1); + if (prevpos == string::npos) + return false; + + prevpos++; + + // Make sure there are only swizzles here ... + for (auto i = prevpos; i < pos; i++) + { + if (op[i] < 'w' || op[i] > 'z') + { + // If swizzles are foo.xyz() like in C++ backend for example, check for that. + if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')') + break; + return false; + } + } + + // If original swizzle is large enough, just carve out the components we need. + // E.g. foobar.wyx.xy will turn into foobar.wy. + if (pos - prevpos >= final_swiz.size()) + { + op.erase(prevpos + final_swiz.size(), string::npos); + + // Add back the function call ... + if (backend.swizzle_is_function) + op += "()"; + } + return true; +} + +// Optimizes away vector swizzles where we have something like +// vec3 foo; +// foo.xyz <-- swizzle expression does nothing. +// This is a very common pattern after OpCompositeCombine. +bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op) +{ + auto pos = op.find_last_of('.'); + if (pos == string::npos || pos == 0) + return false; + + string final_swiz = op.substr(pos + 1, string::npos); + + if (backend.swizzle_is_function) + { + if (final_swiz.size() < 2) + return false; + + if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()") + final_swiz.erase(final_swiz.size() - 2, string::npos); + else + return false; + } + + // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. + // If so, and previous swizzle is of same length, + // we can drop the final swizzle altogether. + for (uint32_t i = 0; i < final_swiz.size(); i++) + { + static const char expected[] = { 'x', 'y', 'z', 'w' }; + if (i >= 4 || final_swiz[i] != expected[i]) + return false; + } + + auto &type = expression_type(base); + + // Sanity checking ... + assert(type.columns == 1 && type.array.empty()); + + if (type.vecsize == final_swiz.size()) + op.erase(pos, string::npos); + return true; +} + +string CompilerGLSL::build_composite_combiner(const uint32_t *elems, uint32_t length) +{ + uint32_t base = 0; + bool swizzle_optimization = false; + string op; + + for (uint32_t i = 0; i < length; i++) + { + auto *e = maybe_get(elems[i]); + + // If we're merging another scalar which belongs to the same base + // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible! + if (e && e->base_expression && e->base_expression == base) + { + // Only supposed to be used for vector swizzle -> scalar. + assert(!e->expression.empty() && e->expression.front() == '.'); + op += e->expression.substr(1, string::npos); + swizzle_optimization = true; + } + else + { + // We'll likely end up with duplicated swizzles, e.g. + // foobar.xyz.xyz from patterns like + // OpVectorSwizzle + // OpCompositeExtract x 3 + // OpCompositeConstruct 3x + other scalar. + // Just modify op in-place. + if (swizzle_optimization) + { + if (backend.swizzle_is_function) + op += "()"; + + // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles. + // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on. + // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize. + // Essentially, we can only remove one set of swizzles, since that's what we have control over ... + // Case 1: + // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done. + // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo. + // Case 2: + // foo.xyz: Duplicate swizzle won't kick in. + // If foo is vec3, we can remove xyz, giving just foo. + if (!remove_duplicate_swizzle(op)) + remove_unity_swizzle(base, op); + swizzle_optimization = false; + } + + if (i) + op += ", "; + op += to_expression(elems[i]); + } + + base = e ? e->base_expression : 0; + } + + if (swizzle_optimization) + { + if (backend.swizzle_is_function) + op += "()"; + + if (!remove_duplicate_swizzle(op)) + remove_unity_swizzle(base, op); + } + + return op; +} + +void CompilerGLSL::emit_instruction(const Instruction &i) +{ + auto ops = stream(i.offset); + auto op = static_cast(i.op); + uint32_t length = i.length; + +#define BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) +#define UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) +#define QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) +#define TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op) +#define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) +#define UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op) + + switch (op) + { + // Dealing with memory + case OpLoad: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + + flush_variable_declaration(ptr); + + // If we're loading from memory that cannot be changed by the shader, + // just forward the expression directly to avoid needless temporaries. + if (should_forward(ptr)) + { + set(id, to_expression(ptr), result_type, true); + register_read(id, ptr, true); + } + else + { + // If the variable can be modified after this OpLoad, we cannot just forward the expression. + // We must read it now and store it in a temporary. + emit_op(result_type, id, to_expression(ptr), false, false); + register_read(id, ptr, false); + } + break; + } + + case OpInBoundsAccessChain: + case OpAccessChain: + { + auto *var = maybe_get(ops[2]); + if (var) + flush_variable_declaration(var->self); + + // If the base is immutable, the access chain pointer must also be. + auto e = access_chain(ops[2], &ops[3], length - 3, false); + auto &expr = set(ops[1], move(e), ops[0], is_immutable(ops[2])); + expr.loaded_from = ops[2]; + break; + } + + case OpStore: + { + auto *var = maybe_get(ops[0]); + + if (var && var->statically_assigned) + var->static_expression = ops[1]; + else + { + auto lhs = to_expression(ops[0]); + auto rhs = to_expression(ops[1]); + + // It is possible with OpLoad/OpCompositeInsert/OpStore that we get = . + // For this case, we don't need to invalidate anything and emit any opcode. + if (lhs != rhs) + { + register_write(ops[0]); + statement(lhs, " = ", rhs, ";"); + } + } + break; + } + + case OpArrayLength: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto e = access_chain(ops[2], &ops[3], length - 3, true); + set(id, e + ".length()", result_type, true); + break; + } + + // Function calls + case OpFunctionCall: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t func = ops[2]; + const auto *arg = &ops[3]; + length -= 3; + + auto &callee = get(func); + bool pure = function_is_pure(callee); + + bool callee_has_out_variables = false; + + // Invalidate out variables passed to functions since they can be OpStore'd to. + for (uint32_t i = 0; i < length; i++) + { + if (callee.arguments[i].write_count) + { + register_call_out_argument(arg[i]); + callee_has_out_variables = true; + } + + flush_variable_declaration(arg[i]); + } + + if (!pure) + register_impure_function_call(); + + string funexpr; + funexpr += to_name(func) + "("; + for (uint32_t i = 0; i < length; i++) + { + funexpr += to_expression(arg[i]); + if (i + 1 < length) + funexpr += ", "; + } + funexpr += ")"; + + if (get(result_type).basetype != SPIRType::Void) + { + // If the function actually writes to an out variable, + // take the conservative route and do not forward. + // The problem is that we might not read the function + // result (and emit the function) before an out variable + // is read (common case when return value is ignored! + // In order to avoid start tracking invalid variables, + // just avoid the forwarding problem altogether. + bool forward = args_will_forward(id, arg, length, pure) && + !callee_has_out_variables && pure && + (forced_temporaries.find(id) == end(forced_temporaries)); + + emit_op(result_type, id, funexpr, forward, false); + + // Function calls are implicit loads from all variables in question. + // Set dependencies for them. + for (uint32_t i = 0; i < length; i++) + register_read(id, arg[i], forward); + + // If we're going to forward the temporary result, + // put dependencies on every variable that must not change. + if (forward) + register_global_read_dependencies(callee, id); + } + else + statement(funexpr, ";"); + + break; + } + + // Composite munging + case OpCompositeConstruct: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + const auto *elems = &ops[2]; + length -= 2; + + if (!length) + throw CompilerError("Invalid input to OpCompositeConstruct."); + + bool forward = true; + for (uint32_t i = 0; i < length; i++) + forward = forward && should_forward(elems[i]); + + auto &in_type = expression_type(elems[0]); + bool splat = in_type.vecsize == 1 && in_type.columns == 1; + + if (splat) + { + uint32_t input = elems[0]; + for (uint32_t i = 0; i < length; i++) + if (input != elems[i]) + splat = false; + } + + auto op = type_to_glsl_constructor(get(result_type)) + "("; + if (splat) + op += to_expression(elems[0]); + else + op += build_composite_combiner(elems, length); + op += ")"; + emit_op(result_type, id, op, forward, false); + break; + } + + case OpVectorInsertDynamic: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t vec = ops[2]; + uint32_t comp = ops[3]; + uint32_t index = ops[4]; + + flush_variable_declaration(vec); + + // Make a copy, then use access chain to store the variable. + statement(declare_temporary(result_type, id), to_expression(vec), ";"); + set(id, to_name(id), result_type, true); + auto chain = access_chain(id, &index, 1, false); + statement(chain, " = ", to_expression(comp), ";"); + break; + } + + case OpVectorExtractDynamic: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto expr = access_chain(ops[2], &ops[3], 1, false); + emit_op(result_type, id, expr, should_forward(ops[2]), false); + break; + } + + case OpCompositeExtract: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + length -= 3; + + auto &type = get(result_type); + + // Only apply this optimization if result is scalar. + if (should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1) + { + // We want to split the access chain from the base. + // This is so we can later combine different CompositeExtract results + // with CompositeConstruct without emitting code like + // + // vec3 temp = texture(...).xyz + // vec4(temp.x, temp.y, temp.z, 1.0). + // + // when we actually wanted to emit this + // vec4(texture(...).xyz, 1.0). + // + // Including the base will prevent this and would trigger multiple reads + // from expression causing it to be forced to an actual temporary in GLSL. + auto expr = access_chain(ops[2], &ops[3], length, true, true); + auto &e = emit_op(result_type, id, expr, true, false, + !expression_is_forwarded(ops[2])); + e.base_expression = ops[2]; + } + else + { + auto expr = access_chain(ops[2], &ops[3], length, true); + emit_op(result_type, id, expr, should_forward(ops[2]), false, + !expression_is_forwarded(ops[2])); + } + break; + } + + case OpCompositeInsert: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t obj = ops[2]; + uint32_t composite = ops[3]; + const auto *elems = &ops[4]; + length -= 4; + + flush_variable_declaration(composite); + + auto *expr = maybe_get(id); + if ((expr && expr->used_while_invalidated) || !should_forward(composite)) + { + // Make a copy, then use access chain to store the variable. + statement(declare_temporary(result_type, id), to_expression(composite), ";"); + set(id, to_name(id), result_type, true); + auto chain = access_chain(id, elems, length, true); + statement(chain, " = ", to_expression(obj), ";"); + } + else + { + auto chain = access_chain(composite, elems, length, true); + statement(chain, " = ", to_expression(obj), ";"); + set(id, to_expression(composite), result_type, true); + + register_write(composite); + register_read(id, composite, true); + // Invalidate the old expression we inserted into. + invalid_expressions.insert(composite); + } + break; + } + + case OpCopyObject: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t rhs = ops[2]; + if (expression_is_lvalue(rhs)) + { + // Need a copy. + statement(declare_temporary(result_type, id), to_expression(rhs), ";"); + set(id, to_name(id), result_type, true); + } + else + { + // RHS expression is immutable, so just forward it. + // Copying these things really make no sense, but + // seems to be allowed anyways. + set(id, to_expression(rhs), result_type, true); + } + break; + } + + case OpVectorShuffle: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t vec0 = ops[2]; + uint32_t vec1 = ops[3]; + const auto *elems = &ops[4]; + length -= 4; + + auto &type0 = expression_type(vec0); + + bool shuffle = false; + for (uint32_t i = 0; i < length; i++) + if (elems[i] >= type0.vecsize) + shuffle = true; + + string expr; + bool trivial_forward; + + if (shuffle) + { + trivial_forward = !expression_is_forwarded(vec0) && !expression_is_forwarded(vec1); + + // Constructor style and shuffling from two different vectors. + vector args; + for (uint32_t i = 0; i < length; i++) + { + if (elems[i] >= type0.vecsize) + args.push_back(join(to_expression(vec1), ".", index_to_swizzle(elems[i] - type0.vecsize))); + else + args.push_back(join(to_expression(vec0), ".", index_to_swizzle(elems[i]))); + } + expr += join(type_to_glsl_constructor(get(result_type)), "(", merge(args), ")"); + } + else + { + trivial_forward = !expression_is_forwarded(vec0); + + // We only source from first vector, so can use swizzle. + expr += to_expression(vec0); + expr += "."; + for (uint32_t i = 0; i < length; i++) + expr += index_to_swizzle(elems[i]); + if (backend.swizzle_is_function && length > 1) + expr += "()"; + } + + // A shuffle is trivial in that it doesn't actually *do* anything. + // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed. + + emit_op(result_type, id, expr, should_forward(vec0) && should_forward(vec1), false, trivial_forward); + break; + } + + // ALU + case OpIsNan: + UFOP(isnan); + break; + + case OpIsInf: + UFOP(isinf); + break; + + case OpSNegate: + case OpFNegate: + UOP(-); + break; + + case OpIAdd: + case OpFAdd: + BOP(+); + break; + + case OpISub: + case OpFSub: + BOP(-); + break; + + case OpIMul: + case OpFMul: + case OpMatrixTimesVector: + case OpMatrixTimesScalar: + case OpVectorTimesScalar: + case OpVectorTimesMatrix: + case OpMatrixTimesMatrix: + BOP(*); + break; + + case OpOuterProduct: + UFOP(outerProduct); + break; + + case OpDot: + BFOP(dot); + break; + + case OpTranspose: + UFOP(transpose); + break; + + case OpSDiv: + case OpUDiv: + case OpFDiv: + BOP(/); + break; + + // Might need workaround if RightLocal can be used on signed types ... + case OpShiftRightLogical: + case OpShiftRightArithmetic: + BOP(>>); + break; + + case OpShiftLeftLogical: + BOP(<<); + break; + + case OpBitwiseOr: + BOP(|); + break; + + case OpBitwiseXor: + BOP(^); + break; + + case OpBitwiseAnd: + BOP(&); + break; + + case OpNot: + UOP(~); + break; + + case OpUMod: + case OpSMod: + case OpFMod: + BOP(%); + break; + + // Relational + case OpAny: + UFOP(any); + break; + + case OpAll: + UFOP(all); + break; + + case OpSelect: + emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]); + break; + + case OpLogicalOr: + BOP(||); + break; + + case OpLogicalAnd: + BOP(&&); + break; + + case OpLogicalNot: + UOP(!); + break; + + case OpLogicalEqual: + case OpIEqual: + case OpFOrdEqual: + { + if (expression_type(ops[2]).vecsize > 1) + BFOP(equal); + else + BOP(==); + break; + } + + case OpLogicalNotEqual: + case OpINotEqual: + case OpFOrdNotEqual: + { + if (expression_type(ops[2]).vecsize > 1) + BFOP(notEqual); + else + BOP(!=); + break; + } + + case OpUGreaterThan: + case OpSGreaterThan: + case OpFOrdGreaterThan: + { + if (expression_type(ops[2]).vecsize > 1) + BFOP(greaterThan); + else + BOP(>); + break; + } + + case OpUGreaterThanEqual: + case OpSGreaterThanEqual: + case OpFOrdGreaterThanEqual: + { + if (expression_type(ops[2]).vecsize > 1) + BFOP(greaterThanEqual); + else + BOP(>=); + break; + } + + case OpULessThan: + case OpSLessThan: + case OpFOrdLessThan: + { + if (expression_type(ops[2]).vecsize > 1) + BFOP(lessThan); + else + BOP(<); + break; + } + + case OpULessThanEqual: + case OpSLessThanEqual: + case OpFOrdLessThanEqual: + { + if (expression_type(ops[2]).vecsize > 1) + BFOP(lessThanEqual); + else + BOP(<=); + break; + } + + // Conversion + case OpConvertFToU: + case OpConvertFToS: + case OpConvertSToF: + case OpConvertUToF: + case OpUConvert: + case OpSConvert: + case OpFConvert: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto func = type_to_glsl_constructor(get(result_type)); + emit_unary_func_op(result_type, id, ops[2], func.c_str()); + break; + } + + case OpBitcast: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t arg = ops[2]; + + auto op = bitcast_glsl_op(result_type, arg); + emit_unary_func_op(result_type, id, arg, op.c_str()); + break; + } + + // Derivatives + case OpDPdx: + UFOP(dFdx); + break; + + case OpDPdy: + UFOP(dFdy); + break; + + case OpFwidth: + UFOP(fwidth); + break; + + // Bitfield + case OpBitFieldInsert: + QFOP(bitfieldInsert); + break; + + case OpBitFieldSExtract: + case OpBitFieldUExtract: + QFOP(bitfieldExtract); + break; + + case OpBitReverse: + UFOP(bitfieldReverse); + break; + + case OpBitCount: + UFOP(bitCount); + break; + + // Atomics + case OpAtomicExchange: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + // Ignore semantics for now, probably only relevant to CL. + uint32_t val = ops[5]; + const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange"; + forced_temporaries.insert(id); + emit_binary_func_op(result_type, id, ptr, val, op); + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicCompareExchange: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + uint32_t val = ops[6]; + uint32_t comp = ops[7]; + const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap"; + + forced_temporaries.insert(id); + emit_trinary_func_op(result_type, id, ptr, comp, val, op); + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicLoad: + flush_all_atomic_capable_variables(); + // FIXME: Image? + UFOP(atomicCounter); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + + // OpAtomicStore unimplemented. Not sure what would use that. + // OpAtomicLoad seems to only be relevant for atomic counters. + + case OpAtomicIIncrement: + forced_temporaries.insert(ops[1]); + // FIXME: Image? + UFOP(atomicCounterIncrement); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + + case OpAtomicIDecrement: + forced_temporaries.insert(ops[1]); + // FIXME: Image? + UFOP(atomicCounterDecrement); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + + case OpAtomicIAdd: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + case OpAtomicISub: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; + forced_temporaries.insert(ops[1]); + auto expr = join(op, "(", to_expression(ops[2]), ", -", to_expression(ops[5]), ")"); + emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]), false); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + case OpAtomicSMin: + case OpAtomicUMin: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + case OpAtomicSMax: + case OpAtomicUMax: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + case OpAtomicAnd: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + case OpAtomicOr: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + case OpAtomicXor: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + // Geometry shaders + case OpEmitVertex: + statement("EmitVertex();"); + break; + + case OpEndPrimitive: + statement("EndPrimitive();"); + break; + + case OpEmitStreamVertex: + statement("EmitStreamVertex();"); + break; + + case OpEndStreamPrimitive: + statement("EndStreamPrimitive();"); + break; + + // Textures + case OpImageSampleImplicitLod: + case OpImageSampleExplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSampleDrefImplicitLod: + case OpImageSampleDrefExplicitLod: + case OpImageSampleProjDrefImplicitLod: + case OpImageSampleProjDrefExplicitLod: + case OpImageFetch: + case OpImageGather: + case OpImageDrefGather: + // Gets a bit hairy, so move this to a separate instruction. + emit_texture_op(i); + break; + + case OpImage: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_op(result_type, id, to_expression(ops[2]), true, false); + break; + } + + case OpImageQuerySizeLod: + BFOP(textureSize); + break; + + // Image load/store + case OpImageRead: + { + // We added Nonreadable speculatively to the OpImage variable due to glslangValidator + // not adding the proper qualifiers. + // If it turns out we need to read the image after all, remove the qualifier and recompile. + auto *var = maybe_get_backing_variable(ops[2]); + if (var) + { + auto &flags = meta.at(var->self).decoration.decoration_flags; + if (flags & (1ull << DecorationNonReadable)) + { + flags &= ~(1ull << DecorationNonReadable); + force_recompile = true; + } + } + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + bool pure; + string imgexpr; + auto &type = expression_type(ops[2]); + + if (var && var->remapped_variable) // PLS input, just read as-is without any op-code + { + // PLS input could have different number of components than what the SPIR expects, swizzle to + // the appropriate vector size. + auto itr = find_if(begin(pls_inputs), end(pls_inputs), + [var](const PlsRemap &pls) { return pls.id == var->self; }); + + if (itr == end(pls_inputs)) + throw CompilerError("Found PLS remap for OpImageRead, but ID is not a PLS input ..."); + + uint32_t components = pls_format_to_components(itr->format); + imgexpr = remap_swizzle(result_type, components, ops[2]); + pure = true; + } + else if (type.image.dim == DimSubpassData) + { + // Implement subpass loads via texture barrier style sampling. + // Fairly ugly, but should essentially work as a fallback for desktop. + imgexpr = join("texture(", to_expression(ops[2]), + ", gl_FragCoord.xy / vec2(textureSize(", to_expression(ops[2]), ", 0).xy))"); + pure = true; + } + else + { + // Plain image load/store. + imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", to_expression(ops[3]), ")"); + pure = false; + } + + if (var && var->forwardable) + { + auto &e = emit_op(result_type, id, imgexpr, true, false); + + // We only need to track dependencies if we're reading from image load/store. + if (!pure) + { + e.loaded_from = var->self; + var->dependees.push_back(id); + } + } + else + emit_op(result_type, id, imgexpr, false, false); + break; + } + + case OpImageTexelPointer: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto &e = set(id, join(to_expression(ops[2]), ", ", to_expression(ops[3])), result_type, true); + + auto *var = maybe_get_backing_variable(ops[2]); + e.loaded_from = var ? var->self : 0; + break; + } + + case OpImageWrite: + { + // We added Nonwritable speculatively to the OpImage variable due to glslangValidator + // not adding the proper qualifiers. + // If it turns out we need to write to the image after all, remove the qualifier and recompile. + auto *var = maybe_get_backing_variable(ops[0]); + if (var) + { + auto &flags = meta.at(var->self).decoration.decoration_flags; + if (flags & (1ull << DecorationNonWritable)) + { + flags &= ~(1ull << DecorationNonWritable); + force_recompile = true; + } + } + + statement("imageStore(", + to_expression(ops[0]), ", ", + to_expression(ops[1]), ", ", + to_expression(ops[2]), ");"); + + if (var && variable_storage_is_aliased(*var)) + flush_all_aliased_variables(); + break; + } + + case OpImageQuerySize: + { + auto &type = expression_type(ops[2]); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + if (type.basetype == SPIRType::Image) + { + // The size of an image is always constant. + emit_op(result_type, id, join("imageSize(", to_expression(ops[2]), ")"), + true, false); + } + else + throw CompilerError("Invalid type for OpImageQuerySize."); + break; + } + + // Compute + case OpControlBarrier: + { + // Ignore execution and memory scope. + if (execution.model == ExecutionModelGLCompute) + { + uint32_t mem = get(ops[2]).scalar(); + if (mem == MemorySemanticsWorkgroupMemoryMask) + statement("memoryBarrierShared();"); + else if (mem && mem != 4062) // Hacky workaround of glslangValidator which emits "all" barrier for barrier() ... + statement("memoryBarrier();"); + } + statement("barrier();"); + break; + } + + case OpMemoryBarrier: + { + uint32_t mem = get(ops[1]).scalar(); + + // We cannot forward any loads beyond the memory barrier. + if (mem) + flush_all_active_variables(); + + if (mem == MemorySemanticsWorkgroupMemoryMask) + statement("memoryBarrierShared();"); + else if (mem && mem != 4062) // Hacky workaround of glslangValidator which emits "all" barrier for barrier() ... + statement("memoryBarrier();"); + break; + } + + case OpExtInst: + { + uint32_t set = ops[2]; + if (get(set).ext != SPIRExtension::GLSL) + { + statement("// unimplemented ext op ", i.op); + break; + } + + emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4); + break; + } + + default: + statement("// unimplemented op ", i.op); + break; + } +} + +string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index) +{ + auto &memb = meta[type.self].members; + if (index < memb.size() && !memb[index].alias.empty()) + return memb[index].alias; + else + return join("_", index); +} + +string CompilerGLSL::member_decl(const SPIRType &type, const SPIRType &membertype, uint32_t index) +{ + uint64_t memberflags = 0; + auto &memb = meta[type.self].members; + if (index < memb.size()) + memberflags = memb[index].decoration_flags; + + return join(layout_for_member(type, index), + flags_to_precision_qualifiers_glsl(membertype, memberflags), + type_to_glsl(membertype), " ", to_member_name(type, index), type_to_array_glsl(membertype)); +} + +const char* CompilerGLSL::flags_to_precision_qualifiers_glsl(const SPIRType &type, uint64_t flags) +{ + if (options.es) + { + // Structs to not have precision qualifiers. + if (type.basetype != SPIRType::Float && + type.basetype != SPIRType::Int && + type.basetype != SPIRType::UInt && + type.basetype != SPIRType::Image && + type.basetype != SPIRType::SampledImage) + return ""; + + if (flags & (1ull << DecorationRelaxedPrecision)) + { + bool implied_fmediump = type.basetype == SPIRType::Float && + options.fragment.default_float_precision == Options::Mediump && + execution.model == ExecutionModelFragment; + + bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && + options.fragment.default_int_precision == Options::Mediump && + execution.model == ExecutionModelFragment; + + return implied_fmediump || implied_imediump ? "" : "mediump "; + } + else + { + bool implied_fhighp = type.basetype == SPIRType::Float && + ((options.fragment.default_float_precision == Options::Highp && + execution.model == ExecutionModelFragment) || (execution.model != ExecutionModelFragment)); + + bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && + ((options.fragment.default_int_precision == Options::Highp && + execution.model == ExecutionModelFragment) || (execution.model != ExecutionModelFragment)); + + return implied_fhighp || implied_ihighp ? "" : "highp "; + } + } + else + return ""; +} + +const char* CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id) +{ + return flags_to_precision_qualifiers_glsl(expression_type(id), meta[id].decoration.decoration_flags); +} + +string CompilerGLSL::to_qualifiers_glsl(uint32_t id) +{ + auto flags = meta[id].decoration.decoration_flags; + string res; + + auto *var = maybe_get(id); + + if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied) + res += "shared "; + + res += to_precision_qualifiers_glsl(id); + + //if (flags & (1ull << DecorationSmooth)) + // res += "smooth "; + if (flags & (1ull << DecorationFlat)) + res += "flat "; + if (flags & (1ull << DecorationNoPerspective)) + res += "noperspective "; + if (flags & (1ull << DecorationPatch)) + res += "patch "; + if (flags & (1ull << DecorationSample)) + res += "sample "; + if (flags & (1ull << DecorationInvariant)) + res += "invariant "; + + auto &type = expression_type(id); + if (type.image.dim != DimSubpassData) + { + if (flags & (1ull << DecorationNonWritable)) + res += "readonly "; + if (flags & (1ull << DecorationNonReadable)) + res += "writeonly "; + } + + return res; +} + +string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg) +{ + // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ... + // Not sure if argument being pointer type should make the argument inout. + auto &type = expression_type(arg.id); + const char *direction = ""; + + if (type.pointer) + { + if (arg.write_count && arg.read_count) + direction = "inout "; + else if (arg.write_count) + direction = "out "; + } + + return join(direction, to_qualifiers_glsl(arg.id), type_to_glsl(type), " ", + to_name(arg.id), type_to_array_glsl(type)); +} + +string CompilerGLSL::variable_decl(const SPIRVariable &variable) +{ + // Ignore the pointer type since GLSL doesn't have pointers. + auto &type = get(variable.basetype); + auto res = join(to_qualifiers_glsl(variable.self), type_to_glsl(type), " ", to_name(variable.self), type_to_array_glsl(type)); + if (variable.initializer) + res += join(" = ", to_expression(variable.initializer)); + return res; +} + +const char* CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable) +{ + auto flags = meta[variable.self].decoration.decoration_flags; + if (flags & (1ull << DecorationRelaxedPrecision)) + return "mediump "; + else + return "highp "; +} + +string CompilerGLSL::pls_decl(const PlsRemap &var) +{ + auto &variable = get(var.id); + + SPIRType type; + type.vecsize = pls_format_to_components(var.format); + type.basetype = pls_format_to_basetype(var.format); + + return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ", to_name(variable.self)); +} + +string CompilerGLSL::type_to_array_glsl(const SPIRType &type) +{ + string res; + for (auto &size : type.array) + { + res += "["; + if (size) + res += convert_to_string(size); + res += "]"; + } + return res; +} + +string CompilerGLSL::image_type_glsl(const SPIRType &type) +{ + auto &imagetype = get(type.image.type); + string res; + + switch (imagetype.basetype) + { + case SPIRType::Int: res = "i"; break; + case SPIRType::UInt: res = "u"; break; + default: break; + } + + // If we're emulating subpassInput with samplers, force sampler2D + // so we don't have to specify format. + res += type.basetype == SPIRType::Image && type.image.dim != DimSubpassData ? + "image" : "sampler"; + + switch (type.image.dim) + { + case Dim1D: res += "1D"; break; + case Dim2D: res += "2D"; break; + case Dim3D: res += "3D"; break; + case DimCube: res += "Cube"; break; + + case DimBuffer: + if (options.es && options.version < 320) + require_extension("GL_OES_texture_buffer"); + else if (!options.es && options.version < 300) + require_extension("GL_EXT_texture_buffer_object"); + res += "Buffer"; + break; + + case DimSubpassData: res += "2D"; break; + default: throw CompilerError("Only 1D, 2D, 3D, Buffer, InputTarget and Cube textures supported."); + } + + if (type.image.arrayed) + res += "Array"; + if (type.image.depth) + res += "Shadow"; + if (type.image.ms) + res += "MS"; + + return res; +} + +string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type) +{ + auto e = type_to_glsl(type); + for (uint32_t i = 0; i < type.array.size(); i++) + e += "[]"; + return e; +} + +string CompilerGLSL::type_to_glsl(const SPIRType &type) +{ + // Ignore the pointer type since GLSL doesn't have pointers. + + switch (type.basetype) + { + case SPIRType::Struct: + // Need OpName lookup here to get a "sensible" name for a struct. + return to_name(type.self); + + case SPIRType::Image: + case SPIRType::SampledImage: + return image_type_glsl(type); + + case SPIRType::Sampler: + // Not really used. + return "sampler"; + + case SPIRType::Void: + return "void"; + + default: + break; + } + + if (type.vecsize == 1 && type.columns == 1) // Scalar builtin + { + switch (type.basetype) + { + case SPIRType::Bool: return "bool"; + case SPIRType::Int: return backend.basic_int_type; + case SPIRType::UInt: return backend.basic_uint_type; + case SPIRType::AtomicCounter: return "atomic_uint"; + case SPIRType::Float: return "float"; + default: return "???"; + } + } + else if (type.vecsize > 1 && type.columns == 1) // Vector builtin + { + switch (type.basetype) + { + case SPIRType::Bool: return join("bvec", type.vecsize); + case SPIRType::Int: return join("ivec", type.vecsize); + case SPIRType::UInt: return join("uvec", type.vecsize); + case SPIRType::Float: return join("vec", type.vecsize); + default: return "???"; + } + } + else if (type.vecsize == type.columns) // Simple Matrix builtin + { + switch (type.basetype) + { + case SPIRType::Bool: return join("bmat", type.vecsize); + case SPIRType::Int: return join("imat", type.vecsize); + case SPIRType::UInt: return join("umat", type.vecsize); + case SPIRType::Float: return join("mat", type.vecsize); + default: return "???"; + } + } + else + { + switch (type.basetype) + { + case SPIRType::Bool: return join("bmat", type.columns, "x", type.vecsize); + case SPIRType::Int: return join("imat", type.columns, "x", type.vecsize); + case SPIRType::UInt: return join("umat", type.columns, "x", type.vecsize); + case SPIRType::Float: return join("mat", type.columns, "x", type.vecsize); + default: return "???"; + } + } +} + +void CompilerGLSL::add_local_variable(uint32_t id) +{ + auto &name = meta[id].decoration.alias; + if (name.empty()) + return; + + // Reserved for temporaries. + if (name[0] == '_') + { + name.clear(); + return; + } + + update_name_cache(local_variables, name); +} + +void CompilerGLSL::require_extension(const string &ext) +{ + if (forced_extensions.find(ext) == end(forced_extensions)) + { + forced_extensions.insert(ext); + force_recompile = true; + } +} + +bool CompilerGLSL::check_atomic_image(uint32_t id) +{ + auto &type = expression_type(id); + if (type.storage == StorageClassImage) + { + if (options.es && options.version < 320) + require_extension("GL_OES_shader_image_atomic"); + + auto *var = maybe_get_backing_variable(id); + if (var) + { + auto &flags = meta.at(var->self).decoration.decoration_flags; + if (flags & ((1ull << DecorationNonWritable) | (1ull << DecorationNonReadable))) + { + flags &= ~(1ull << DecorationNonWritable); + flags &= ~(1ull << DecorationNonReadable); + force_recompile = true; + } + } + return true; + } + else + return false; +} + +void CompilerGLSL::emit_function_prototype(SPIRFunction &func, uint64_t return_flags) +{ + local_variables.clear(); + string decl; + + auto &type = get(func.return_type); + decl += flags_to_precision_qualifiers_glsl(type, return_flags); + decl += type_to_glsl(type); + decl += " "; + + if (func.self == execution.entry_point) + { + decl += "main"; + processing_entry_point = true; + } + else + decl += to_name(func.self); + + decl += "("; + for (auto &arg : func.arguments) + { + // Might change the variable name if it already exists in this function. + // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation + // to use same name for variables. + // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. + add_local_variable(arg.id); + + decl += argument_decl(arg); + if (&arg != &func.arguments.back()) + decl += ", "; + + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get(arg.id); + if (var) + var->parameter = &arg; + } + + decl += ")"; + statement(decl); +} + +void CompilerGLSL::emit_function(SPIRFunction &func, uint64_t return_flags) +{ + // Avoid potential cycles. + if (func.active) + return; + func.active = true; + + // If we depend on a function, emit that function before we emit our own function. + for (auto block : func.blocks) + { + auto &b = get(block); + for (auto &i : b.ops) + { + auto ops = stream(i.offset); + auto op = static_cast(i.op); + + if (op == OpFunctionCall) + { + // Recursively emit functions which are called. + uint32_t func = ops[2]; + emit_function(get(func), meta[ops[1]].decoration.decoration_flags); + } + } + } + + emit_function_prototype(func, return_flags); + begin_scope(); + + function = &func; + + for (auto &v : func.local_variables) + { + auto &var = get(v); + if (expression_is_lvalue(v)) + { + add_local_variable(var.self); + + if (var.initializer) + statement(variable_decl(var), ";"); + else + { + // Don't declare variable until first use to declutter the GLSL output quite a lot. + // If we don't touch the variable before first branch, + // declare it then since we need variable declaration to be in top scope. + var.deferred_declaration = true; + } + } + else + { + // HACK: SPIRV likes to use samplers and images as local variables, but GLSL does not allow + // this. For these types (non-lvalue), we enforce forwarding through a shadowed variable. + // This means that when we OpStore to these variables, we just write in the expression ID directly. + // This breaks any kind of branching, since the variable must be statically assigned. + // Branching on samplers and images would be pretty much impossible to fake in GLSL. + var.statically_assigned = true; + } + } + + auto &entry_block = get(func.entry_block); + entry_block.loop_dominator = SPIRBlock::NoDominator; + emit_block_chain(entry_block); + + end_scope(); + processing_entry_point = false; + statement(""); +} + +void CompilerGLSL::emit_fixup() +{ + if (execution.model == ExecutionModelVertex && options.vertex.fixup_clipspace) + { + const char *suffix = backend.float_literal_suffix ? "f" : ""; + statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;"); + } +} + +bool CompilerGLSL::flush_phi_required(uint32_t from, uint32_t to) +{ + auto &child = get(to); + for (auto &phi : child.phi_variables) + if (phi.parent == from) + return true; + return false; +} + +void CompilerGLSL::flush_phi(uint32_t from, uint32_t to) +{ + auto &child = get(to); + + for (auto &phi : child.phi_variables) + if (phi.parent == from) + statement(to_expression(phi.function_variable), " = ", to_expression(phi.local_variable), ";"); +} + +void CompilerGLSL::branch(uint32_t from, uint32_t to) +{ + flush_phi(from, to); + flush_all_active_variables(); + + if (loop_block.find(to) != end(loop_block)) + { + // This can happen if we had a complex continue block which was emitted. + // Once the continue block tries to branch to the loop header, just emit continue; + // and end the chain here. + statement("continue;"); + } + else if (is_continue(to)) + { + auto &continue_block = get(to); + if (continue_block.complex_continue) + { + // Just emit the whole block chain as is. + auto usage_counts = expression_usage_counts; + auto invalid = invalid_expressions; + + emit_block_chain(continue_block); + + // Expression usage counts and invalid expressions + // are moot after returning from the continue block. + // Since we emit the same block multiple times, + // we don't want to invalidate ourselves. + expression_usage_counts = usage_counts; + invalid_expressions = invalid; + } + else + { + auto &block = get(from); + auto &dominator = get(block.loop_dominator); + + // For non-complex continue blocks, we implicitly branch to the continue block + // by having the continue block be part of the loop header in for (; ; continue-block). + bool outside_control_flow = block_is_outside_flow_control_from_block(dominator, block); + + // Some simplification for for-loops. We always end up with a useless continue; + // statement since we branch to a loop block. + // Walk the CFG, if we uncoditionally execute the block calling continue assuming we're in the loop block, + // we can avoid writing out an explicit continue statement. + // Similar optimization to return statements if we know we're outside flow control. + if (!outside_control_flow) + statement("continue;"); + } + } + else if (is_break(to)) + statement("break;"); + else if (!is_conditional(to)) + emit_block_chain(get(to)); +} + +void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block) +{ + // If we branch directly to a selection merge target, we don't really need a code path. + bool true_sub = !is_conditional(true_block); + bool false_sub = !is_conditional(false_block); + + if (true_sub) + { + statement("if (", to_expression(cond), ")"); + begin_scope(); + branch(from, true_block); + end_scope(); + + if (false_sub) + { + statement("else"); + begin_scope(); + branch(from, false_block); + end_scope(); + } + else if (flush_phi_required(from, false_block)) + { + statement("else"); + begin_scope(); + flush_phi(from, false_block); + end_scope(); + } + } + else if (false_sub && !true_sub) + { + // Only need false path, use negative conditional. + statement("if (!", to_expression(cond), ")"); + begin_scope(); + branch(from, false_block); + end_scope(); + + if (flush_phi_required(from, true_block)) + { + statement("else"); + begin_scope(); + flush_phi(from, true_block); + end_scope(); + } + } +} + +void CompilerGLSL::propagate_loop_dominators(const SPIRBlock &block) +{ + // Propagate down the loop dominator block, so that dominated blocks can back trace. + if (block.merge == SPIRBlock::MergeLoop || block.loop_dominator) + { + uint32_t dominator = block.merge == SPIRBlock::MergeLoop ? + block.self : block.loop_dominator; + + auto set_dominator = [this](uint32_t self, uint32_t dominator) { + auto &block = this->get(self); + + // If we already have a loop dominator, we're trying to break out to merge targets + // which should not update the loop dominator. + if (!block.loop_dominator) + block.loop_dominator = dominator; + }; + + if (block.true_block) + set_dominator(block.true_block, dominator); + if (block.false_block) + set_dominator(block.false_block, dominator); + if (block.next_block) + set_dominator(block.next_block, dominator); + if (block.continue_block) + set_dominator(block.continue_block, dominator); + for (auto &c : block.cases) + set_dominator(c.block, dominator); + + // After merging a loop, we inherit the loop dominator always. + if (block.merge_block) + set_dominator(block.merge_block, block.loop_dominator); + } +} + +// FIXME: This currently cannot handle complex continue blocks +// as in do-while. +// This should be seen as a "trivial" continue block. +string CompilerGLSL::emit_continue_block(uint32_t continue_block) +{ + auto *block = &get(continue_block); + + // While emitting the continue block, declare_temporary will check this + // if we have to emit temporaries. + current_continue_block = block; + + vector statements; + + // Capture all statements into our list. + auto *old = redirect_statement; + redirect_statement = &statements; + + // Stamp out all blocks one after each other. + while (loop_block.find(block->self) == end(loop_block)) + { + propagate_loop_dominators(*block); + // Write out all instructions we have in this block. + for (auto &op : block->ops) + emit_instruction(op); + + // For plain branchless for/while continue blocks. + if (block->next_block) + { + flush_phi(continue_block, block->next_block); + block = &get(block->next_block); + } + // For do while blocks. The last block will be a select block. + else if (block->true_block) + { + flush_phi(continue_block, block->true_block); + block = &get(block->true_block); + } + } + + // Restore old pointer. + redirect_statement = old; + + // Somewhat ugly, strip off the last ';' since we use ',' instead. + // Ideally, we should select this behavior in statement(). + for (auto &statement : statements) + { + if (!statement.empty() && statement.back() == ';') + statement.pop_back(); + } + + current_continue_block = nullptr; + return merge(statements); +} + +bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method) +{ + SPIRBlock::ContinueBlockType continue_type = continue_block_type(get(block.continue_block)); + + if (method == SPIRBlock::MergeToSelectForLoop) + { + uint32_t current_count = statement_count; + // If we're trying to create a true for loop, + // we need to make sure that all opcodes before branch statement do not actually emit any code. + // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. + for (auto &op : block.ops) + emit_instruction(op); + + bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries); + + // This can work! We only did trivial things which could be forwarded in block body! + if (current_count == statement_count && condition_is_temporary) + { + switch (continue_type) + { + case SPIRBlock::ForLoop: + statement("for (; ", to_expression(block.condition), "; ", + emit_continue_block(block.continue_block), ")"); + break; + + case SPIRBlock::WhileLoop: + statement("while (", to_expression(block.condition), ")"); + break; + + default: + throw CompilerError("For/while loop detected, but need while/for loop semantics."); + } + + begin_scope(); + return true; + } + else + { + block.disable_block_optimization = true; + force_recompile = true; + begin_scope(); // We'll see an end_scope() later. + return false; + } + } + else if (method == SPIRBlock::MergeToDirectForLoop) + { + uint32_t current_count = statement_count; + auto &child = get(block.next_block); + + // If we're trying to create a true for loop, + // we need to make sure that all opcodes before branch statement do not actually emit any code. + // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. + for (auto &op : child.ops) + emit_instruction(op); + + bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries); + + if (current_count == statement_count && condition_is_temporary) + { + propagate_loop_dominators(child); + + switch (continue_type) + { + case SPIRBlock::ForLoop: + statement("for (; ", to_expression(child.condition), "; ", + emit_continue_block(block.continue_block), ")"); + break; + + case SPIRBlock::WhileLoop: + statement("while (", to_expression(child.condition), ")"); + break; + + default: + throw CompilerError("For/while loop detected, but need while/for loop semantics."); + } + + begin_scope(); + branch(child.self, child.true_block); + return true; + } + else + { + block.disable_block_optimization = true; + force_recompile = true; + begin_scope(); // We'll see an end_scope() later. + return false; + } + } + else + return false; +} + +void CompilerGLSL::flush_undeclared_variables() +{ + // Declare undeclared variables. + if (function->flush_undeclared) + { + for (auto &v : function->local_variables) + { + auto &var = get(v); + if (var.deferred_declaration) + statement(variable_decl(var), ";"); + var.deferred_declaration = false; + } + function->flush_undeclared = false; + } +} + +void CompilerGLSL::emit_block_chain(SPIRBlock &block) +{ + propagate_loop_dominators(block); + + bool select_branch_to_true_block = false; + bool skip_direct_branch = false; + + // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header. + for (auto &tmp : block.declare_temporary) + { + auto flags = meta[tmp.second].decoration.decoration_flags; + auto &type = get(tmp.first); + statement(flags_to_precision_qualifiers_glsl(type, flags), + type_to_glsl(type), " ", to_name(tmp.second), type_to_array_glsl(type), ";"); + } + + SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone; + if (block.continue_block) + continue_type = continue_block_type(get(block.continue_block)); + + // This is the older loop behavior in glslang which branches to loop body directly from the loop header. + if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop)) + { + flush_undeclared_variables(); + if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop)) + { + // The body of while, is actually just the true block, so always branch there + // unconditionally. + select_branch_to_true_block = true; + } + } + // This is the newer loop behavior in glslang which branches from Loop header directly to + // a new block, which in turn has a OpBranchSelection without a selection merge. + else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop)) + { + flush_undeclared_variables(); + if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop)) + skip_direct_branch = true; + } + else if (continue_type == SPIRBlock::DoWhileLoop) + { + statement("do"); + begin_scope(); + for (auto &op : block.ops) + emit_instruction(op); + } + else if (block.merge == SPIRBlock::MergeLoop) + { + flush_undeclared_variables(); + + // We have a generic loop without any distinguishable pattern like for, while or do while. + get(block.continue_block).complex_continue = true; + continue_type = SPIRBlock::ComplexLoop; + + statement("for (;;)"); + begin_scope(); + for (auto &op : block.ops) + emit_instruction(op); + } + else + { + for (auto &op : block.ops) + emit_instruction(op); + } + + bool emit_next_block = true; + + // Handle end of block. + switch (block.terminator) + { + case SPIRBlock::Direct: + // True when emitting complex continue block. + if (block.loop_dominator == block.next_block) + { + branch(block.self, block.next_block); + emit_next_block = false; + } + // True if MergeToDirectForLoop succeeded. + else if (skip_direct_branch) + emit_next_block = false; + else if (is_continue(block.next_block) || + is_break(block.next_block) || + is_conditional(block.next_block)) + { + branch(block.self, block.next_block); + emit_next_block = false; + } + break; + + case SPIRBlock::Select: + // True if MergeToSelectForLoop succeeded. + if (select_branch_to_true_block) + branch(block.self, block.true_block); + else + { + flush_undeclared_variables(); + branch(block.self, block.condition, block.true_block, block.false_block); + } + break; + + case SPIRBlock::MultiSelect: + { + flush_undeclared_variables(); + auto &type = expression_type(block.condition); + bool uint32_t_case = type.basetype == SPIRType::UInt; + + statement("switch (", to_expression(block.condition), ")"); + begin_scope(); + + for (auto &c : block.cases) + { + auto case_value = uint32_t_case ? convert_to_string(uint32_t(c.value)) : convert_to_string(int32_t(c.value)); + statement("case ", case_value, ":"); + begin_scope(); + branch(block.self, c.block); + end_scope(); + } + + if (block.default_block != block.next_block) + { + statement("default:"); + begin_scope(); + if (is_break(block.default_block)) + throw CompilerError("Cannot break; out of a switch statement and out of a loop at the same time ..."); + branch(block.self, block.default_block); + end_scope(); + } + else if (flush_phi_required(block.self, block.next_block)) + { + statement("default:"); + begin_scope(); + flush_phi(block.self, block.next_block); + statement("break;"); + end_scope(); + } + + end_scope(); + break; + } + + case SPIRBlock::Return: + if (processing_entry_point) + emit_fixup(); + + if (block.return_value) + { + // OpReturnValue can return Undef, so don't emit anything for this case. + if (ids.at(block.return_value).get_type() != TypeUndef) + statement("return ", to_expression(block.return_value), ";"); + } + // If this block is the very final block and not called from control flow, + // we do not need an explicit return which looks out of place. Just end the function here. + // In the very weird case of for(;;) { return; } executing return is unconditional, + // but we actually need a return here ... + else if (!block_is_outside_flow_control_from_block(get(function->entry_block), block) || + block.loop_dominator != SPIRBlock::NoDominator) + statement("return;"); + break; + + case SPIRBlock::Kill: + statement("discard;"); + break; + + default: + throw CompilerError("Unimplemented block terminator."); + } + + if (block.next_block && emit_next_block) + { + // If we hit this case, we're dealing with an unconditional branch, which means we will output + // that block after this. If we had selection merge, we already flushed phi variables. + if (block.merge != SPIRBlock::MergeSelection) + flush_phi(block.self, block.next_block); + emit_block_chain(get(block.next_block)); + } + + if (block.merge == SPIRBlock::MergeLoop) + { + if (continue_type == SPIRBlock::DoWhileLoop) + { + // Make sure that we run the continue block to get the expressions set, but this + // should become an empty string. + // We have no fallbacks if we cannot forward everything to temporaries ... + auto statements = emit_continue_block(block.continue_block); + if (!statements.empty()) + { + // The DoWhile block has side effects, force ComplexLoop pattern next pass. + get(block.continue_block).complex_continue = true; + force_recompile = true; + } + + end_scope_decl(join("while (", to_expression(get(block.continue_block).condition), ")")); + } + else + end_scope(); + + flush_phi(block.self, block.merge_block); + emit_block_chain(get(block.merge_block)); + } +} + +void CompilerGLSL::begin_scope() +{ + statement("{"); + indent++; +} + +void CompilerGLSL::end_scope() +{ + if (!indent) + throw CompilerError("Popping empty indent stack."); + indent--; + statement("}"); +} + +void CompilerGLSL::end_scope_decl() +{ + if (!indent) + throw CompilerError("Popping empty indent stack."); + indent--; + statement("};"); +} + +void CompilerGLSL::end_scope_decl(const string &decl) +{ + if (!indent) + throw CompilerError("Popping empty indent stack."); + indent--; + statement("} ", decl, ";"); +} + diff --git a/spir2glsl.hpp b/spir2glsl.hpp new file mode 100644 index 0000000..3b36833 --- /dev/null +++ b/spir2glsl.hpp @@ -0,0 +1,298 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIR2GLSL +#define SPIR2GLSL + +#include "spir2cross.hpp" +#include +#include +#include +#include + +namespace spir2cross +{ + enum PlsFormat + { + PlsNone = 0, + + PlsR11FG11FB10F, + PlsR32F, + PlsRG16F, + PlsRGB10A2, + PlsRGBA8, + PlsRG16, + + PlsRGBA8I, + PlsRG16I, + + PlsRGB10A2UI, + PlsRGBA8UI, + PlsRG16UI, + PlsR32UI + }; + + struct PlsRemap + { + uint32_t id; + PlsFormat format; + }; + + class CompilerGLSL : public Compiler + { + public: + struct Options + { + uint32_t version = 450; + bool es = false; + bool force_temporary = false; + + enum Precision + { + DontCare, + Lowp, + Mediump, + Highp + }; + + struct + { + // In vertex shaders, rewrite [0, w] depth (Vulkan/D3D style) to [-w, w] depth (GL style). + bool fixup_clipspace = true; + } vertex; + + struct + { + // Add precision mediump float in ES targets when emitting GLES source. + // Add precision highp int in ES targets when emitting GLES source. + Precision default_float_precision = Mediump; + Precision default_int_precision = Highp; + } fragment; + }; + + void remap_pixel_local_storage(std::vector inputs, std::vector outputs) + { + pls_inputs = std::move(inputs); + pls_outputs = std::move(outputs); + remap_pls_variables(); + } + + CompilerGLSL(std::vector spirv) : Compiler(move(spirv)) + { + if (source.known) + { + options.es = source.es; + options.version = source.version; + } + } + + const Options& get_options() const { return options; } + void set_options(Options &opts) { options = opts; } + std::string compile() override; + + protected: + void reset(); + void emit_function(SPIRFunction &func, uint64_t return_flags); + + // Virtualize methods which need to be overridden by subclass targets like C++ and such. + virtual void emit_function_prototype(SPIRFunction &func, uint64_t return_flags); + virtual void emit_header(); + + std::unique_ptr buffer; + + template + inline void statement_inner(T&& t) + { + (*buffer) << std::forward(t); + statement_count++; + } + + template + inline void statement_inner(T&& t, Ts&&... ts) + { + (*buffer) << std::forward(t); + statement_count++; + statement_inner(std::forward(ts)...); + } + + template + inline void statement(Ts&&... ts) + { + if (redirect_statement) + redirect_statement->push_back(join(std::forward(ts)...)); + else + { + for (uint32_t i = 0; i < indent; i++) + (*buffer) << " "; + + statement_inner(std::forward(ts)...); + (*buffer) << '\n'; + } + } + + template + inline void statement_no_indent(Ts&&... ts) + { + auto old_indent = indent; + indent = 0; + statement(std::forward(ts)...); + indent = old_indent; + } + + // Used for implementing continue blocks where + // we want to obtain a list of statements we can merge + // on a single line separated by comma. + std::vector *redirect_statement = nullptr; + const SPIRBlock *current_continue_block = nullptr; + + void begin_scope(); + void end_scope(); + void end_scope_decl(); + void end_scope_decl(const std::string &decl); + + Options options; + + std::string type_to_glsl(const SPIRType &type); + std::string type_to_array_glsl(const SPIRType &type); + std::string variable_decl(const SPIRVariable &variable); + + void add_local_variable(uint32_t id); + std::unordered_set local_variables; + + bool processing_entry_point = false; + + // Can be overriden by subclass backends for trivial things which + // shouldn't need polymorphism. + struct BackendVariations + { + bool float_literal_suffix = false; + bool uint32_t_literal_suffix = true; + const char *basic_int_type = "int"; + const char *basic_uint_type = "uint"; + bool swizzle_is_function = false; + bool shared_is_implied = false; + } backend; + + void emit_struct(const SPIRType &type); + void emit_instruction(const Instruction &instr); + + private: + + void emit_resources(); + void emit_buffer_block(const SPIRVariable &type); + void emit_push_constant_block(const SPIRVariable &var); + void emit_interface_block(const SPIRVariable &type); + void emit_block_chain(SPIRBlock &block); + std::string emit_continue_block(uint32_t continue_block); + bool attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method); + void emit_uniform(const SPIRVariable &var); + void propagate_loop_dominators(const SPIRBlock &block); + + void branch(uint32_t from, uint32_t to); + void branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block); + void flush_phi(uint32_t from, uint32_t to); + bool flush_phi_required(uint32_t from, uint32_t to); + void flush_variable_declaration(uint32_t id); + void flush_undeclared_variables(); + + bool should_forward(uint32_t id); + void emit_texture_op(const Instruction &i); + void emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp); + void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, uint32_t count); + void emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, uint32_t op3, const char *op); + void emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op); + void emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); + void emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op); + void emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); + void emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op); + bool expression_is_forwarded(uint32_t id); + SPIRExpression& emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs, bool extra_parens, bool suppress_usage_tracking = false); + std::string access_chain(uint32_t base, const uint32_t *indices, uint32_t count, bool index_is_literal, bool chain_only = false); + + const char* index_to_swizzle(uint32_t index); + std::string remap_swizzle(uint32_t result_type, uint32_t input_components, uint32_t expr); + std::string declare_temporary(uint32_t type, uint32_t id); + std::string to_expression(uint32_t id); + std::string to_member_name(const SPIRType &type, uint32_t index); + std::string type_to_glsl_constructor(const SPIRType &type); + std::string argument_decl(const SPIRFunction::Parameter &arg); + std::string member_decl(const SPIRType &type, const SPIRType &member_type, uint32_t member); + std::string image_type_glsl(const SPIRType &type); + std::string to_qualifiers_glsl(uint32_t id); + const char* to_precision_qualifiers_glsl(uint32_t id); + const char* flags_to_precision_qualifiers_glsl(const SPIRType &type, uint64_t flags); + std::string constant_expression(const SPIRConstant &c); + std::string constant_expression_vector(const SPIRConstant &c, uint32_t vector); + const char* format_to_glsl(spv::ImageFormat format); + std::string layout_for_member(const SPIRType &type, uint32_t index); + uint64_t combined_decoration_for_member(const SPIRType &type, uint32_t index); + std::string layout_for_variable(const SPIRVariable &variable); + + bool ssbo_is_std430_packing(const SPIRType &type); + uint32_t type_to_std430_alignment(const SPIRType &type, uint64_t flags); + uint32_t type_to_std430_array_stride(const SPIRType &type, uint64_t flags); + uint32_t type_to_std430_size(const SPIRType &type, uint64_t flags); + + std::string bitcast_glsl(uint32_t result_type, uint32_t arg); + std::string bitcast_glsl_op(uint32_t result_type, uint32_t arg); + const char* builtin_to_glsl(spv::BuiltIn builtin); + std::string build_composite_combiner(const uint32_t *elems, uint32_t length); + bool remove_duplicate_swizzle(std::string &op); + bool remove_unity_swizzle(uint32_t base, std::string &op); + + // Can modify flags to remote readonly/writeonly if image type + // and force recompile. + bool check_atomic_image(uint32_t id); + void require_extension(const std::string &ext); + + void replace_fragment_output(SPIRVariable &var); + void replace_fragment_outputs(); + std::string legacy_tex_op(const std::string &op, const SPIRType &imgtype); + + uint32_t indent = 0; + void emit_fixup(); + + std::unordered_set emitted_functions; + + // Usage tracking. If a temporary is used more than once, use the temporary instead to + // avoid AST explosion when SPIRV is generated with pure SSA and doesn't write stuff to variables. + std::unordered_map expression_usage_counts; + std::unordered_set forced_temporaries; + std::unordered_set forwarded_temporaries; + void track_expression_read(uint32_t id); + + std::unordered_set forced_extensions; + + uint32_t statement_count; + + inline bool is_legacy() const { return (options.es && options.version < 300) || (!options.es && options.version < 130); } + + bool args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure); + void register_call_out_argument(uint32_t id); + void register_impure_function_call(); + + // GL_EXT_shader_pixel_local_storage support. + std::vector pls_inputs; + std::vector pls_outputs; + std::string pls_decl(const PlsRemap &variable); + const char* to_pls_qualifiers_glsl(const SPIRVariable &variable); + void emit_pls(); + void remap_pls_variables(); + }; +} + +#endif + diff --git a/spirv.hpp b/spirv.hpp new file mode 100644 index 0000000..526781f --- /dev/null +++ b/spirv.hpp @@ -0,0 +1,879 @@ +// Copyright (c) 2014-2016 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and/or associated documentation files (the "Materials"), +// to deal in the Materials without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Materials, and to permit persons to whom the +// Materials are furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +// IN THE MATERIALS. + +// This header is automatically generated by the same tool that creates +// the Binary Section of the SPIR-V specification. + +// Enumeration tokens for SPIR-V, in various styles: +// C, C++, C++11, JSON, Lua, Python +// +// - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +// - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +// - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +// - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +// - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +// +// Some tokens act like mask values, which can be OR'd together, +// while others are mutually exclusive. The mask-like ones have +// "Mask" in their name, and a parallel enum that has the shift +// amount (1 << x) for each corresponding enumerant. + +#ifndef spirv_HPP +#define spirv_HPP + +namespace spv { + +typedef unsigned int Id; + +#define SPV_VERSION 0x10000 +#define SPV_REVISION 3 + +static const unsigned int MagicNumber = 0x07230203; +static const unsigned int Version = 0x00010000; +static const unsigned int Revision = 3; +static const unsigned int OpCodeMask = 0xffff; +static const unsigned int WordCountShift = 16; + +enum SourceLanguage { + SourceLanguageUnknown = 0, + SourceLanguageESSL = 1, + SourceLanguageGLSL = 2, + SourceLanguageOpenCL_C = 3, + SourceLanguageOpenCL_CPP = 4, +}; + +enum ExecutionModel { + ExecutionModelVertex = 0, + ExecutionModelTessellationControl = 1, + ExecutionModelTessellationEvaluation = 2, + ExecutionModelGeometry = 3, + ExecutionModelFragment = 4, + ExecutionModelGLCompute = 5, + ExecutionModelKernel = 6, +}; + +enum AddressingModel { + AddressingModelLogical = 0, + AddressingModelPhysical32 = 1, + AddressingModelPhysical64 = 2, +}; + +enum MemoryModel { + MemoryModelSimple = 0, + MemoryModelGLSL450 = 1, + MemoryModelOpenCL = 2, +}; + +enum ExecutionMode { + ExecutionModeInvocations = 0, + ExecutionModeSpacingEqual = 1, + ExecutionModeSpacingFractionalEven = 2, + ExecutionModeSpacingFractionalOdd = 3, + ExecutionModeVertexOrderCw = 4, + ExecutionModeVertexOrderCcw = 5, + ExecutionModePixelCenterInteger = 6, + ExecutionModeOriginUpperLeft = 7, + ExecutionModeOriginLowerLeft = 8, + ExecutionModeEarlyFragmentTests = 9, + ExecutionModePointMode = 10, + ExecutionModeXfb = 11, + ExecutionModeDepthReplacing = 12, + ExecutionModeDepthGreater = 14, + ExecutionModeDepthLess = 15, + ExecutionModeDepthUnchanged = 16, + ExecutionModeLocalSize = 17, + ExecutionModeLocalSizeHint = 18, + ExecutionModeInputPoints = 19, + ExecutionModeInputLines = 20, + ExecutionModeInputLinesAdjacency = 21, + ExecutionModeTriangles = 22, + ExecutionModeInputTrianglesAdjacency = 23, + ExecutionModeQuads = 24, + ExecutionModeIsolines = 25, + ExecutionModeOutputVertices = 26, + ExecutionModeOutputPoints = 27, + ExecutionModeOutputLineStrip = 28, + ExecutionModeOutputTriangleStrip = 29, + ExecutionModeVecTypeHint = 30, + ExecutionModeContractionOff = 31, +}; + +enum StorageClass { + StorageClassUniformConstant = 0, + StorageClassInput = 1, + StorageClassUniform = 2, + StorageClassOutput = 3, + StorageClassWorkgroup = 4, + StorageClassCrossWorkgroup = 5, + StorageClassPrivate = 6, + StorageClassFunction = 7, + StorageClassGeneric = 8, + StorageClassPushConstant = 9, + StorageClassAtomicCounter = 10, + StorageClassImage = 11, +}; + +enum Dim { + Dim1D = 0, + Dim2D = 1, + Dim3D = 2, + DimCube = 3, + DimRect = 4, + DimBuffer = 5, + DimSubpassData = 6, +}; + +enum SamplerAddressingMode { + SamplerAddressingModeNone = 0, + SamplerAddressingModeClampToEdge = 1, + SamplerAddressingModeClamp = 2, + SamplerAddressingModeRepeat = 3, + SamplerAddressingModeRepeatMirrored = 4, +}; + +enum SamplerFilterMode { + SamplerFilterModeNearest = 0, + SamplerFilterModeLinear = 1, +}; + +enum ImageFormat { + ImageFormatUnknown = 0, + ImageFormatRgba32f = 1, + ImageFormatRgba16f = 2, + ImageFormatR32f = 3, + ImageFormatRgba8 = 4, + ImageFormatRgba8Snorm = 5, + ImageFormatRg32f = 6, + ImageFormatRg16f = 7, + ImageFormatR11fG11fB10f = 8, + ImageFormatR16f = 9, + ImageFormatRgba16 = 10, + ImageFormatRgb10A2 = 11, + ImageFormatRg16 = 12, + ImageFormatRg8 = 13, + ImageFormatR16 = 14, + ImageFormatR8 = 15, + ImageFormatRgba16Snorm = 16, + ImageFormatRg16Snorm = 17, + ImageFormatRg8Snorm = 18, + ImageFormatR16Snorm = 19, + ImageFormatR8Snorm = 20, + ImageFormatRgba32i = 21, + ImageFormatRgba16i = 22, + ImageFormatRgba8i = 23, + ImageFormatR32i = 24, + ImageFormatRg32i = 25, + ImageFormatRg16i = 26, + ImageFormatRg8i = 27, + ImageFormatR16i = 28, + ImageFormatR8i = 29, + ImageFormatRgba32ui = 30, + ImageFormatRgba16ui = 31, + ImageFormatRgba8ui = 32, + ImageFormatR32ui = 33, + ImageFormatRgb10a2ui = 34, + ImageFormatRg32ui = 35, + ImageFormatRg16ui = 36, + ImageFormatRg8ui = 37, + ImageFormatR16ui = 38, + ImageFormatR8ui = 39, +}; + +enum ImageChannelOrder { + ImageChannelOrderR = 0, + ImageChannelOrderA = 1, + ImageChannelOrderRG = 2, + ImageChannelOrderRA = 3, + ImageChannelOrderRGB = 4, + ImageChannelOrderRGBA = 5, + ImageChannelOrderBGRA = 6, + ImageChannelOrderARGB = 7, + ImageChannelOrderIntensity = 8, + ImageChannelOrderLuminance = 9, + ImageChannelOrderRx = 10, + ImageChannelOrderRGx = 11, + ImageChannelOrderRGBx = 12, + ImageChannelOrderDepth = 13, + ImageChannelOrderDepthStencil = 14, + ImageChannelOrdersRGB = 15, + ImageChannelOrdersRGBx = 16, + ImageChannelOrdersRGBA = 17, + ImageChannelOrdersBGRA = 18, +}; + +enum ImageChannelDataType { + ImageChannelDataTypeSnormInt8 = 0, + ImageChannelDataTypeSnormInt16 = 1, + ImageChannelDataTypeUnormInt8 = 2, + ImageChannelDataTypeUnormInt16 = 3, + ImageChannelDataTypeUnormShort565 = 4, + ImageChannelDataTypeUnormShort555 = 5, + ImageChannelDataTypeUnormInt101010 = 6, + ImageChannelDataTypeSignedInt8 = 7, + ImageChannelDataTypeSignedInt16 = 8, + ImageChannelDataTypeSignedInt32 = 9, + ImageChannelDataTypeUnsignedInt8 = 10, + ImageChannelDataTypeUnsignedInt16 = 11, + ImageChannelDataTypeUnsignedInt32 = 12, + ImageChannelDataTypeHalfFloat = 13, + ImageChannelDataTypeFloat = 14, + ImageChannelDataTypeUnormInt24 = 15, + ImageChannelDataTypeUnormInt101010_2 = 16, +}; + +enum ImageOperandsShift { + ImageOperandsBiasShift = 0, + ImageOperandsLodShift = 1, + ImageOperandsGradShift = 2, + ImageOperandsConstOffsetShift = 3, + ImageOperandsOffsetShift = 4, + ImageOperandsConstOffsetsShift = 5, + ImageOperandsSampleShift = 6, + ImageOperandsMinLodShift = 7, +}; + +enum ImageOperandsMask { + ImageOperandsMaskNone = 0, + ImageOperandsBiasMask = 0x00000001, + ImageOperandsLodMask = 0x00000002, + ImageOperandsGradMask = 0x00000004, + ImageOperandsConstOffsetMask = 0x00000008, + ImageOperandsOffsetMask = 0x00000010, + ImageOperandsConstOffsetsMask = 0x00000020, + ImageOperandsSampleMask = 0x00000040, + ImageOperandsMinLodMask = 0x00000080, +}; + +enum FPFastMathModeShift { + FPFastMathModeNotNaNShift = 0, + FPFastMathModeNotInfShift = 1, + FPFastMathModeNSZShift = 2, + FPFastMathModeAllowRecipShift = 3, + FPFastMathModeFastShift = 4, +}; + +enum FPFastMathModeMask { + FPFastMathModeMaskNone = 0, + FPFastMathModeNotNaNMask = 0x00000001, + FPFastMathModeNotInfMask = 0x00000002, + FPFastMathModeNSZMask = 0x00000004, + FPFastMathModeAllowRecipMask = 0x00000008, + FPFastMathModeFastMask = 0x00000010, +}; + +enum FPRoundingMode { + FPRoundingModeRTE = 0, + FPRoundingModeRTZ = 1, + FPRoundingModeRTP = 2, + FPRoundingModeRTN = 3, +}; + +enum LinkageType { + LinkageTypeExport = 0, + LinkageTypeImport = 1, +}; + +enum AccessQualifier { + AccessQualifierReadOnly = 0, + AccessQualifierWriteOnly = 1, + AccessQualifierReadWrite = 2, +}; + +enum FunctionParameterAttribute { + FunctionParameterAttributeZext = 0, + FunctionParameterAttributeSext = 1, + FunctionParameterAttributeByVal = 2, + FunctionParameterAttributeSret = 3, + FunctionParameterAttributeNoAlias = 4, + FunctionParameterAttributeNoCapture = 5, + FunctionParameterAttributeNoWrite = 6, + FunctionParameterAttributeNoReadWrite = 7, +}; + +enum Decoration { + DecorationRelaxedPrecision = 0, + DecorationSpecId = 1, + DecorationBlock = 2, + DecorationBufferBlock = 3, + DecorationRowMajor = 4, + DecorationColMajor = 5, + DecorationArrayStride = 6, + DecorationMatrixStride = 7, + DecorationGLSLShared = 8, + DecorationGLSLPacked = 9, + DecorationCPacked = 10, + DecorationBuiltIn = 11, + DecorationNoPerspective = 13, + DecorationFlat = 14, + DecorationPatch = 15, + DecorationCentroid = 16, + DecorationSample = 17, + DecorationInvariant = 18, + DecorationRestrict = 19, + DecorationAliased = 20, + DecorationVolatile = 21, + DecorationConstant = 22, + DecorationCoherent = 23, + DecorationNonWritable = 24, + DecorationNonReadable = 25, + DecorationUniform = 26, + DecorationSaturatedConversion = 28, + DecorationStream = 29, + DecorationLocation = 30, + DecorationComponent = 31, + DecorationIndex = 32, + DecorationBinding = 33, + DecorationDescriptorSet = 34, + DecorationOffset = 35, + DecorationXfbBuffer = 36, + DecorationXfbStride = 37, + DecorationFuncParamAttr = 38, + DecorationFPRoundingMode = 39, + DecorationFPFastMathMode = 40, + DecorationLinkageAttributes = 41, + DecorationNoContraction = 42, + DecorationInputAttachmentIndex = 43, + DecorationAlignment = 44, +}; + +enum BuiltIn { + BuiltInPosition = 0, + BuiltInPointSize = 1, + BuiltInClipDistance = 3, + BuiltInCullDistance = 4, + BuiltInVertexId = 5, + BuiltInInstanceId = 6, + BuiltInPrimitiveId = 7, + BuiltInInvocationId = 8, + BuiltInLayer = 9, + BuiltInViewportIndex = 10, + BuiltInTessLevelOuter = 11, + BuiltInTessLevelInner = 12, + BuiltInTessCoord = 13, + BuiltInPatchVertices = 14, + BuiltInFragCoord = 15, + BuiltInPointCoord = 16, + BuiltInFrontFacing = 17, + BuiltInSampleId = 18, + BuiltInSamplePosition = 19, + BuiltInSampleMask = 20, + BuiltInFragDepth = 22, + BuiltInHelperInvocation = 23, + BuiltInNumWorkgroups = 24, + BuiltInWorkgroupSize = 25, + BuiltInWorkgroupId = 26, + BuiltInLocalInvocationId = 27, + BuiltInGlobalInvocationId = 28, + BuiltInLocalInvocationIndex = 29, + BuiltInWorkDim = 30, + BuiltInGlobalSize = 31, + BuiltInEnqueuedWorkgroupSize = 32, + BuiltInGlobalOffset = 33, + BuiltInGlobalLinearId = 34, + BuiltInSubgroupSize = 36, + BuiltInSubgroupMaxSize = 37, + BuiltInNumSubgroups = 38, + BuiltInNumEnqueuedSubgroups = 39, + BuiltInSubgroupId = 40, + BuiltInSubgroupLocalInvocationId = 41, + BuiltInVertexIndex = 42, + BuiltInInstanceIndex = 43, +}; + +enum SelectionControlShift { + SelectionControlFlattenShift = 0, + SelectionControlDontFlattenShift = 1, +}; + +enum SelectionControlMask { + SelectionControlMaskNone = 0, + SelectionControlFlattenMask = 0x00000001, + SelectionControlDontFlattenMask = 0x00000002, +}; + +enum LoopControlShift { + LoopControlUnrollShift = 0, + LoopControlDontUnrollShift = 1, +}; + +enum LoopControlMask { + LoopControlMaskNone = 0, + LoopControlUnrollMask = 0x00000001, + LoopControlDontUnrollMask = 0x00000002, +}; + +enum FunctionControlShift { + FunctionControlInlineShift = 0, + FunctionControlDontInlineShift = 1, + FunctionControlPureShift = 2, + FunctionControlConstShift = 3, +}; + +enum FunctionControlMask { + FunctionControlMaskNone = 0, + FunctionControlInlineMask = 0x00000001, + FunctionControlDontInlineMask = 0x00000002, + FunctionControlPureMask = 0x00000004, + FunctionControlConstMask = 0x00000008, +}; + +enum MemorySemanticsShift { + MemorySemanticsAcquireShift = 1, + MemorySemanticsReleaseShift = 2, + MemorySemanticsAcquireReleaseShift = 3, + MemorySemanticsSequentiallyConsistentShift = 4, + MemorySemanticsUniformMemoryShift = 6, + MemorySemanticsSubgroupMemoryShift = 7, + MemorySemanticsWorkgroupMemoryShift = 8, + MemorySemanticsCrossWorkgroupMemoryShift = 9, + MemorySemanticsAtomicCounterMemoryShift = 10, + MemorySemanticsImageMemoryShift = 11, +}; + +enum MemorySemanticsMask { + MemorySemanticsMaskNone = 0, + MemorySemanticsAcquireMask = 0x00000002, + MemorySemanticsReleaseMask = 0x00000004, + MemorySemanticsAcquireReleaseMask = 0x00000008, + MemorySemanticsSequentiallyConsistentMask = 0x00000010, + MemorySemanticsUniformMemoryMask = 0x00000040, + MemorySemanticsSubgroupMemoryMask = 0x00000080, + MemorySemanticsWorkgroupMemoryMask = 0x00000100, + MemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, + MemorySemanticsAtomicCounterMemoryMask = 0x00000400, + MemorySemanticsImageMemoryMask = 0x00000800, +}; + +enum MemoryAccessShift { + MemoryAccessVolatileShift = 0, + MemoryAccessAlignedShift = 1, + MemoryAccessNontemporalShift = 2, +}; + +enum MemoryAccessMask { + MemoryAccessMaskNone = 0, + MemoryAccessVolatileMask = 0x00000001, + MemoryAccessAlignedMask = 0x00000002, + MemoryAccessNontemporalMask = 0x00000004, +}; + +enum Scope { + ScopeCrossDevice = 0, + ScopeDevice = 1, + ScopeWorkgroup = 2, + ScopeSubgroup = 3, + ScopeInvocation = 4, +}; + +enum GroupOperation { + GroupOperationReduce = 0, + GroupOperationInclusiveScan = 1, + GroupOperationExclusiveScan = 2, +}; + +enum KernelEnqueueFlags { + KernelEnqueueFlagsNoWait = 0, + KernelEnqueueFlagsWaitKernel = 1, + KernelEnqueueFlagsWaitWorkGroup = 2, +}; + +enum KernelProfilingInfoShift { + KernelProfilingInfoCmdExecTimeShift = 0, +}; + +enum KernelProfilingInfoMask { + KernelProfilingInfoMaskNone = 0, + KernelProfilingInfoCmdExecTimeMask = 0x00000001, +}; + +enum Capability { + CapabilityMatrix = 0, + CapabilityShader = 1, + CapabilityGeometry = 2, + CapabilityTessellation = 3, + CapabilityAddresses = 4, + CapabilityLinkage = 5, + CapabilityKernel = 6, + CapabilityVector16 = 7, + CapabilityFloat16Buffer = 8, + CapabilityFloat16 = 9, + CapabilityFloat64 = 10, + CapabilityInt64 = 11, + CapabilityInt64Atomics = 12, + CapabilityImageBasic = 13, + CapabilityImageReadWrite = 14, + CapabilityImageMipmap = 15, + CapabilityPipes = 17, + CapabilityGroups = 18, + CapabilityDeviceEnqueue = 19, + CapabilityLiteralSampler = 20, + CapabilityAtomicStorage = 21, + CapabilityInt16 = 22, + CapabilityTessellationPointSize = 23, + CapabilityGeometryPointSize = 24, + CapabilityImageGatherExtended = 25, + CapabilityStorageImageMultisample = 27, + CapabilityUniformBufferArrayDynamicIndexing = 28, + CapabilitySampledImageArrayDynamicIndexing = 29, + CapabilityStorageBufferArrayDynamicIndexing = 30, + CapabilityStorageImageArrayDynamicIndexing = 31, + CapabilityClipDistance = 32, + CapabilityCullDistance = 33, + CapabilityImageCubeArray = 34, + CapabilitySampleRateShading = 35, + CapabilityImageRect = 36, + CapabilitySampledRect = 37, + CapabilityGenericPointer = 38, + CapabilityInt8 = 39, + CapabilityInputAttachment = 40, + CapabilitySparseResidency = 41, + CapabilityMinLod = 42, + CapabilitySampled1D = 43, + CapabilityImage1D = 44, + CapabilitySampledCubeArray = 45, + CapabilitySampledBuffer = 46, + CapabilityImageBuffer = 47, + CapabilityImageMSArray = 48, + CapabilityStorageImageExtendedFormats = 49, + CapabilityImageQuery = 50, + CapabilityDerivativeControl = 51, + CapabilityInterpolationFunction = 52, + CapabilityTransformFeedback = 53, + CapabilityGeometryStreams = 54, + CapabilityStorageImageReadWithoutFormat = 55, + CapabilityStorageImageWriteWithoutFormat = 56, + CapabilityMultiViewport = 57, +}; + +enum Op { + OpNop = 0, + OpUndef = 1, + OpSourceContinued = 2, + OpSource = 3, + OpSourceExtension = 4, + OpName = 5, + OpMemberName = 6, + OpString = 7, + OpLine = 8, + OpExtension = 10, + OpExtInstImport = 11, + OpExtInst = 12, + OpMemoryModel = 14, + OpEntryPoint = 15, + OpExecutionMode = 16, + OpCapability = 17, + OpTypeVoid = 19, + OpTypeBool = 20, + OpTypeInt = 21, + OpTypeFloat = 22, + OpTypeVector = 23, + OpTypeMatrix = 24, + OpTypeImage = 25, + OpTypeSampler = 26, + OpTypeSampledImage = 27, + OpTypeArray = 28, + OpTypeRuntimeArray = 29, + OpTypeStruct = 30, + OpTypeOpaque = 31, + OpTypePointer = 32, + OpTypeFunction = 33, + OpTypeEvent = 34, + OpTypeDeviceEvent = 35, + OpTypeReserveId = 36, + OpTypeQueue = 37, + OpTypePipe = 38, + OpTypeForwardPointer = 39, + OpConstantTrue = 41, + OpConstantFalse = 42, + OpConstant = 43, + OpConstantComposite = 44, + OpConstantSampler = 45, + OpConstantNull = 46, + OpSpecConstantTrue = 48, + OpSpecConstantFalse = 49, + OpSpecConstant = 50, + OpSpecConstantComposite = 51, + OpSpecConstantOp = 52, + OpFunction = 54, + OpFunctionParameter = 55, + OpFunctionEnd = 56, + OpFunctionCall = 57, + OpVariable = 59, + OpImageTexelPointer = 60, + OpLoad = 61, + OpStore = 62, + OpCopyMemory = 63, + OpCopyMemorySized = 64, + OpAccessChain = 65, + OpInBoundsAccessChain = 66, + OpPtrAccessChain = 67, + OpArrayLength = 68, + OpGenericPtrMemSemantics = 69, + OpInBoundsPtrAccessChain = 70, + OpDecorate = 71, + OpMemberDecorate = 72, + OpDecorationGroup = 73, + OpGroupDecorate = 74, + OpGroupMemberDecorate = 75, + OpVectorExtractDynamic = 77, + OpVectorInsertDynamic = 78, + OpVectorShuffle = 79, + OpCompositeConstruct = 80, + OpCompositeExtract = 81, + OpCompositeInsert = 82, + OpCopyObject = 83, + OpTranspose = 84, + OpSampledImage = 86, + OpImageSampleImplicitLod = 87, + OpImageSampleExplicitLod = 88, + OpImageSampleDrefImplicitLod = 89, + OpImageSampleDrefExplicitLod = 90, + OpImageSampleProjImplicitLod = 91, + OpImageSampleProjExplicitLod = 92, + OpImageSampleProjDrefImplicitLod = 93, + OpImageSampleProjDrefExplicitLod = 94, + OpImageFetch = 95, + OpImageGather = 96, + OpImageDrefGather = 97, + OpImageRead = 98, + OpImageWrite = 99, + OpImage = 100, + OpImageQueryFormat = 101, + OpImageQueryOrder = 102, + OpImageQuerySizeLod = 103, + OpImageQuerySize = 104, + OpImageQueryLod = 105, + OpImageQueryLevels = 106, + OpImageQuerySamples = 107, + OpConvertFToU = 109, + OpConvertFToS = 110, + OpConvertSToF = 111, + OpConvertUToF = 112, + OpUConvert = 113, + OpSConvert = 114, + OpFConvert = 115, + OpQuantizeToF16 = 116, + OpConvertPtrToU = 117, + OpSatConvertSToU = 118, + OpSatConvertUToS = 119, + OpConvertUToPtr = 120, + OpPtrCastToGeneric = 121, + OpGenericCastToPtr = 122, + OpGenericCastToPtrExplicit = 123, + OpBitcast = 124, + OpSNegate = 126, + OpFNegate = 127, + OpIAdd = 128, + OpFAdd = 129, + OpISub = 130, + OpFSub = 131, + OpIMul = 132, + OpFMul = 133, + OpUDiv = 134, + OpSDiv = 135, + OpFDiv = 136, + OpUMod = 137, + OpSRem = 138, + OpSMod = 139, + OpFRem = 140, + OpFMod = 141, + OpVectorTimesScalar = 142, + OpMatrixTimesScalar = 143, + OpVectorTimesMatrix = 144, + OpMatrixTimesVector = 145, + OpMatrixTimesMatrix = 146, + OpOuterProduct = 147, + OpDot = 148, + OpIAddCarry = 149, + OpISubBorrow = 150, + OpUMulExtended = 151, + OpSMulExtended = 152, + OpAny = 154, + OpAll = 155, + OpIsNan = 156, + OpIsInf = 157, + OpIsFinite = 158, + OpIsNormal = 159, + OpSignBitSet = 160, + OpLessOrGreater = 161, + OpOrdered = 162, + OpUnordered = 163, + OpLogicalEqual = 164, + OpLogicalNotEqual = 165, + OpLogicalOr = 166, + OpLogicalAnd = 167, + OpLogicalNot = 168, + OpSelect = 169, + OpIEqual = 170, + OpINotEqual = 171, + OpUGreaterThan = 172, + OpSGreaterThan = 173, + OpUGreaterThanEqual = 174, + OpSGreaterThanEqual = 175, + OpULessThan = 176, + OpSLessThan = 177, + OpULessThanEqual = 178, + OpSLessThanEqual = 179, + OpFOrdEqual = 180, + OpFUnordEqual = 181, + OpFOrdNotEqual = 182, + OpFUnordNotEqual = 183, + OpFOrdLessThan = 184, + OpFUnordLessThan = 185, + OpFOrdGreaterThan = 186, + OpFUnordGreaterThan = 187, + OpFOrdLessThanEqual = 188, + OpFUnordLessThanEqual = 189, + OpFOrdGreaterThanEqual = 190, + OpFUnordGreaterThanEqual = 191, + OpShiftRightLogical = 194, + OpShiftRightArithmetic = 195, + OpShiftLeftLogical = 196, + OpBitwiseOr = 197, + OpBitwiseXor = 198, + OpBitwiseAnd = 199, + OpNot = 200, + OpBitFieldInsert = 201, + OpBitFieldSExtract = 202, + OpBitFieldUExtract = 203, + OpBitReverse = 204, + OpBitCount = 205, + OpDPdx = 207, + OpDPdy = 208, + OpFwidth = 209, + OpDPdxFine = 210, + OpDPdyFine = 211, + OpFwidthFine = 212, + OpDPdxCoarse = 213, + OpDPdyCoarse = 214, + OpFwidthCoarse = 215, + OpEmitVertex = 218, + OpEndPrimitive = 219, + OpEmitStreamVertex = 220, + OpEndStreamPrimitive = 221, + OpControlBarrier = 224, + OpMemoryBarrier = 225, + OpAtomicLoad = 227, + OpAtomicStore = 228, + OpAtomicExchange = 229, + OpAtomicCompareExchange = 230, + OpAtomicCompareExchangeWeak = 231, + OpAtomicIIncrement = 232, + OpAtomicIDecrement = 233, + OpAtomicIAdd = 234, + OpAtomicISub = 235, + OpAtomicSMin = 236, + OpAtomicUMin = 237, + OpAtomicSMax = 238, + OpAtomicUMax = 239, + OpAtomicAnd = 240, + OpAtomicOr = 241, + OpAtomicXor = 242, + OpPhi = 245, + OpLoopMerge = 246, + OpSelectionMerge = 247, + OpLabel = 248, + OpBranch = 249, + OpBranchConditional = 250, + OpSwitch = 251, + OpKill = 252, + OpReturn = 253, + OpReturnValue = 254, + OpUnreachable = 255, + OpLifetimeStart = 256, + OpLifetimeStop = 257, + OpGroupAsyncCopy = 259, + OpGroupWaitEvents = 260, + OpGroupAll = 261, + OpGroupAny = 262, + OpGroupBroadcast = 263, + OpGroupIAdd = 264, + OpGroupFAdd = 265, + OpGroupFMin = 266, + OpGroupUMin = 267, + OpGroupSMin = 268, + OpGroupFMax = 269, + OpGroupUMax = 270, + OpGroupSMax = 271, + OpReadPipe = 274, + OpWritePipe = 275, + OpReservedReadPipe = 276, + OpReservedWritePipe = 277, + OpReserveReadPipePackets = 278, + OpReserveWritePipePackets = 279, + OpCommitReadPipe = 280, + OpCommitWritePipe = 281, + OpIsValidReserveId = 282, + OpGetNumPipePackets = 283, + OpGetMaxPipePackets = 284, + OpGroupReserveReadPipePackets = 285, + OpGroupReserveWritePipePackets = 286, + OpGroupCommitReadPipe = 287, + OpGroupCommitWritePipe = 288, + OpEnqueueMarker = 291, + OpEnqueueKernel = 292, + OpGetKernelNDrangeSubGroupCount = 293, + OpGetKernelNDrangeMaxSubGroupSize = 294, + OpGetKernelWorkGroupSize = 295, + OpGetKernelPreferredWorkGroupSizeMultiple = 296, + OpRetainEvent = 297, + OpReleaseEvent = 298, + OpCreateUserEvent = 299, + OpIsValidEvent = 300, + OpSetUserEventStatus = 301, + OpCaptureEventProfilingInfo = 302, + OpGetDefaultQueue = 303, + OpBuildNDRange = 304, + OpImageSparseSampleImplicitLod = 305, + OpImageSparseSampleExplicitLod = 306, + OpImageSparseSampleDrefImplicitLod = 307, + OpImageSparseSampleDrefExplicitLod = 308, + OpImageSparseSampleProjImplicitLod = 309, + OpImageSparseSampleProjExplicitLod = 310, + OpImageSparseSampleProjDrefImplicitLod = 311, + OpImageSparseSampleProjDrefExplicitLod = 312, + OpImageSparseFetch = 313, + OpImageSparseGather = 314, + OpImageSparseDrefGather = 315, + OpImageSparseTexelsResident = 316, + OpNoLine = 317, + OpAtomicFlagTestAndSet = 318, + OpAtomicFlagClear = 319, + OpImageSparseRead = 320, +}; + +// Overload operator| for mask bit combining + +inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); } +inline FPFastMathModeMask operator|(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) | unsigned(b)); } +inline SelectionControlMask operator|(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) | unsigned(b)); } +inline LoopControlMask operator|(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) | unsigned(b)); } +inline FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) | unsigned(b)); } +inline MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); } +inline MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); } +inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); } + +} // end namespace spv + +#endif // #ifndef spirv_HPP diff --git a/test_shaders.py b/test_shaders.py new file mode 100755 index 0000000..4b12251 --- /dev/null +++ b/test_shaders.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 + +import sys +import os +import subprocess +import tempfile +import re +import itertools +import hashlib +import shutil + +def parse_stats(stats): + m = re.search('([0-9]+) work registers', stats) + registers = int(m.group(1)) if m else 0 + + m = re.search('([0-9]+) uniform registers', stats) + uniform_regs = int(m.group(1)) if m else 0 + + m_list = re.findall('(-?[0-9]+)\s+(-?[0-9]+)\s+(-?[0-9]+)', stats) + alu_short = float(m_list[1][0]) if m_list else 0 + ls_short = float(m_list[1][1]) if m_list else 0 + tex_short = float(m_list[1][2]) if m_list else 0 + alu_long = float(m_list[2][0]) if m_list else 0 + ls_long = float(m_list[2][1]) if m_list else 0 + tex_long = float(m_list[2][2]) if m_list else 0 + + return (registers, uniform_regs, alu_short, ls_short, tex_short, alu_long, ls_long, tex_long) + +def get_shader_type(shader): + _, ext = os.path.splitext(shader) + if ext == '.vert': + return '--vertex' + elif ext == '.frag': + return '--fragment' + elif ext == '.comp': + return '--compute' + elif ext == '.tesc': + return '--tessellation_control' + elif ext == '.tese': + return '--tessellation_evaluation' + elif ext == '.geom': + return '--geometry' + else: + return '' + +def get_shader_stats(shader): + f, path = tempfile.mkstemp() + + os.close(f) + p = subprocess.Popen(['malisc', get_shader_type(shader), '--core', 'Mali-T760', '-V', shader], stdout = subprocess.PIPE, stderr = subprocess.PIPE) + stdout, stderr = p.communicate() + os.remove(path) + + if p.returncode != 0: + print(stderr.decode('utf-8')) + raise OSError('malisc failed') + p.wait() + + returned = stdout.decode('utf-8') + return parse_stats(returned) + +def cross_compile(shader): + spirv_f, spirv_path = tempfile.mkstemp() + glsl_f, glsl_path = tempfile.mkstemp(suffix = os.path.basename(shader)) + os.close(spirv_f) + os.close(glsl_f) + + subprocess.check_call(['glslangValidator', '-G', '-o', spirv_path, shader]) + subprocess.check_call(['./spir2cross', '--output', glsl_path, spirv_path]) + return (spirv_path, glsl_path) + +def md5_for_file(path): + md5 = hashlib.md5() + with open(path, 'rb') as f: + for chunk in iter(lambda: f.read(8192), b''): + md5.update(chunk) + return md5.digest() + +def make_reference_dir(path): + base = os.path.dirname(path) + if not os.path.exists(base): + os.makedirs(base) + +def regression_check(shader, glsl): + reference = os.path.join('./reference', shader) + if os.path.exists(reference): + if md5_for_file(glsl) != md5_for_file(reference): + print('Generated GLSL has changed for {}!'.format(reference)) + if os.path.exists(reference): + os.remove(reference) + make_reference_dir(reference) + shutil.move(glsl, reference) + else: + os.remove(glsl) + else: + print('Found new shader {}. Placing GLSL in {}'.format(shader, reference)) + make_reference_dir(reference) + shutil.move(glsl, reference) + +def test_shader(stats, shader): + print('Testing shader:', shader) + pristine_stats = get_shader_stats(shader) + spirv, glsl = cross_compile(shader) + cross_stats = get_shader_stats(glsl) + + regression_check(shader, glsl) + os.remove(spirv) + + a = [] + a.append(shader) + for i in pristine_stats: + a.append(str(i)) + for i in cross_stats: + a.append(str(i)) + print(','.join(a), file = stats) + +def test_shaders(shader_dir): + with open('stats.csv', 'w') as stats: + print('Shader,OrigRegs,OrigUniRegs,OrigALUShort,OrigLSShort,OrigTEXShort,OrigALULong,OrigLSLong,OrigTEXLong,CrossRegs,CrossUniRegs,CrossALUShort,CrossLSShort,CrossTEXShort,CrossALULong,CrossLSLong,CrossTEXLong', file = stats) + for f in os.walk(os.path.join(shader_dir)): + for i in f[2]: + shader = os.path.join(f[0], i) + test_shader(stats, shader) + +if __name__ == '__main__': + test_shaders(sys.argv[1]) + print('Stats in stats.csv!')