SPIRV-Cross/spirv_glsl.cpp

/*
 * Copyright 2015-2016 ARM Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "spirv_glsl.hpp"
#include "GLSL.std.450.h"
#include <algorithm>
#include <assert.h>

using namespace spv;
using namespace spirv_cross;
using namespace std;

static const char *to_pls_layout(PlsFormat format)
{
	switch (format)
	{
	case PlsR11FG11FB10F:
		return "layout(r11f_g11f_b10f) ";
	case PlsR32F:
		return "layout(r32f) ";
	case PlsRG16F:
		return "layout(rg16f) ";
	case PlsRGB10A2:
		return "layout(rgb10_a2) ";
	case PlsRGBA8:
		return "layout(rgba8) ";
	case PlsRG16:
		return "layout(rg16) ";
	case PlsRGBA8I:
		return "layout(rgba8i)";
	case PlsRG16I:
		return "layout(rg16i) ";
	case PlsRGB10A2UI:
		return "layout(rgb10_a2ui) ";
	case PlsRGBA8UI:
		return "layout(rgba8ui) ";
	case PlsRG16UI:
		return "layout(rg16ui) ";
	case PlsR32UI:
		return "layout(r32ui) ";
	default:
		return "";
	}
}

static SPIRType::BaseType pls_format_to_basetype(PlsFormat format)
{
	switch (format)
	{
	default:
	case PlsR11FG11FB10F:
	case PlsR32F:
	case PlsRG16F:
	case PlsRGB10A2:
	case PlsRGBA8:
	case PlsRG16:
		return SPIRType::Float;

	case PlsRGBA8I:
	case PlsRG16I:
		return SPIRType::Int;

	case PlsRGB10A2UI:
	case PlsRGBA8UI:
	case PlsRG16UI:
	case PlsR32UI:
		return SPIRType::UInt;
	}
}

static uint32_t pls_format_to_components(PlsFormat format)
{
	switch (format)
	{
	default:
	case PlsR32F:
	case PlsR32UI:
		return 1;

	case PlsRG16F:
	case PlsRG16:
	case PlsRG16UI:
	case PlsRG16I:
		return 2;

	case PlsR11FG11FB10F:
		return 3;

	case PlsRGB10A2:
	case PlsRGBA8:
	case PlsRGBA8I:
	case PlsRGB10A2UI:
	case PlsRGBA8UI:
		return 4;
	}
}

void CompilerGLSL::reset()
{
	// We do some speculative optimizations which should pretty much always work out,
	// but just in case the SPIR-V is rather weird, recompile until it's happy.
	// This typically only means one extra pass.
	force_recompile = false;

	// Clear invalid expression tracking.
	invalid_expressions.clear();
	current_function = nullptr;

	// Clear temporary usage tracking.
	expression_usage_counts.clear();
	forwarded_temporaries.clear();

	for (auto &id : ids)
	{
		if (id.get_type() == TypeVariable)
		{
			// Clear unflushed dependees.
			id.get<SPIRVariable>().dependees.clear();
		}
		else if (id.get_type() == TypeExpression)
		{
			// And remove all expressions.
			id.reset();
		}
		else if (id.get_type() == TypeFunction)
		{
			// Reset active state for all functions.
			id.get<SPIRFunction>().active = false;
			id.get<SPIRFunction>().flush_undeclared = true;
		}
	}

	statement_count = 0;
	indent = 0;
}

void CompilerGLSL::remap_pls_variables()
{
	for (auto &input : pls_inputs)
	{
		auto &var = get<SPIRVariable>(input.id);

		bool input_is_target = false;
		if (var.storage == StorageClassUniformConstant)
		{
			auto &type = get<SPIRType>(var.basetype);
			input_is_target = type.image.dim == DimSubpassData;
		}

		if (var.storage != StorageClassInput && !input_is_target)
			throw CompilerError("Can only use in and target variables for PLS inputs.");
		var.remapped_variable = true;
	}

	for (auto &output : pls_outputs)
	{
		auto &var = get<SPIRVariable>(output.id);
		if (var.storage != StorageClassOutput)
			throw CompilerError("Can only use out variables for PLS outputs.");
		var.remapped_variable = true;
	}
}

string CompilerGLSL::compile()
{
	uint32_t pass_count = 0;
	do
	{
		if (pass_count >= 3)
			throw CompilerError("Over 3 compilation loops detected. Must be a bug!");

		reset();

		// Move constructor for this type is broken on GCC 4.9 ...
		buffer = unique_ptr<ostringstream>(new ostringstream());

		emit_header();
		emit_resources();

		emit_function(get<SPIRFunction>(execution.entry_point), 0);

		pass_count++;
	} while (force_recompile);

	return buffer->str();
}

void CompilerGLSL::emit_header()
{
	statement("#version ", options.version, options.es && options.version > 100 ? " es" : "");

	// Needed for binding = # on UBOs, etc.
	if (!options.es && options.version < 420)
	{
		statement("#ifdef GL_ARB_shading_language_420pack");
		statement("#extension GL_ARB_shading_language_420pack : require");
		statement("#endif");
	}

	for (auto &ext : forced_extensions)
		statement("#extension ", ext, " : require");

	if (!pls_inputs.empty() || !pls_outputs.empty())
		statement("#extension GL_EXT_shader_pixel_local_storage : require");

	vector<string> inputs;
	vector<string> outputs;

	switch (execution.model)
	{
	case ExecutionModelGeometry:
		if (options.es && options.version < 320)
			statement("#extension GL_EXT_geometry_shader : require");
		if (!options.es && options.version < 320)
			statement("#extension GL_ARB_geometry_shader4 : require");
		outputs.push_back(join("max_vertices = ", execution.output_vertices));
		if (execution.flags & (1ull << ExecutionModeInvocations))
			inputs.push_back(join("invocations = ", execution.invocations));
		if (execution.flags & (1ull << ExecutionModeInputPoints))
			inputs.push_back("points");
		if (execution.flags & (1ull << ExecutionModeInputLines))
			inputs.push_back("lines");
		if (execution.flags & (1ull << ExecutionModeInputLinesAdjacency))
			inputs.push_back("lines_adjacency");
		if (execution.flags & (1ull << ExecutionModeTriangles))
			inputs.push_back("triangles");
		if (execution.flags & (1ull << ExecutionModeInputTrianglesAdjacency))
			inputs.push_back("triangles_adjacency");
		if (execution.flags & (1ull << ExecutionModeOutputTriangleStrip))
			outputs.push_back("triangle_strip");
		if (execution.flags & (1ull << ExecutionModeOutputPoints))
			outputs.push_back("points");
		if (execution.flags & (1ull << ExecutionModeOutputLineStrip))
			outputs.push_back("line_strip");
		break;

	case ExecutionModelTessellationControl:
		if (options.es && options.version < 320)
			statement("#extension GL_EXT_tessellation_shader : require");
		if (!options.es && options.version < 400)
			statement("#extension GL_ARB_tessellation_shader : require");
		if (execution.flags & (1ull << ExecutionModeOutputVertices))
			outputs.push_back(join("vertices = ", execution.output_vertices));
		break;

	case ExecutionModelTessellationEvaluation:
		if (options.es && options.version < 320)
			statement("#extension GL_EXT_tessellation_shader : require");
		if (!options.es && options.version < 400)
			statement("#extension GL_ARB_tessellation_shader : require");
		if (execution.flags & (1ull << ExecutionModeQuads))
			inputs.push_back("quads");
		if (execution.flags & (1ull << ExecutionModeIsolines))
			inputs.push_back("isolines");
		if (execution.flags & (1ull << ExecutionModePointMode))
			inputs.push_back("point_mode");
		if (execution.flags & (1ull << ExecutionModeVertexOrderCw))
			inputs.push_back("cw");
		if (execution.flags & (1ull << ExecutionModeVertexOrderCcw))
			inputs.push_back("ccw");
		if (execution.flags & (1ull << ExecutionModeSpacingFractionalEven))
			inputs.push_back("fractional_even_spacing");
		if (execution.flags & (1ull << ExecutionModeSpacingFractionalOdd))
			inputs.push_back("fractional_odd_spacing");
		if (execution.flags & (1ull << ExecutionModeSpacingEqual))
			inputs.push_back("equal_spacing");
		break;

	case ExecutionModelGLCompute:
		if (!options.es && options.version < 430)
			statement("#extension GL_ARB_compute_shader : require");
		if (options.es && options.version < 310)
			throw CompilerError("At least ESSL 3.10 required for compute shaders.");
		inputs.push_back(join("local_size_x = ", execution.workgroup_size.x));
		inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
		inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
		break;

	case ExecutionModelFragment:
		if (options.es)
		{
			switch (options.fragment.default_float_precision)
			{
			case Options::Lowp:
				statement("precision lowp float;");
				break;

			case Options::Mediump:
				statement("precision mediump float;");
				break;

			case Options::Highp:
				statement("precision highp float;");
				break;

			default:
				break;
			}

			switch (options.fragment.default_int_precision)
			{
			case Options::Lowp:
				statement("precision lowp int;");
				break;

			case Options::Mediump:
				statement("precision mediump int;");
				break;

			case Options::Highp:
				statement("precision highp int;");
				break;

			default:
				break;
			}
		}

		if (execution.flags & (1ull << ExecutionModeEarlyFragmentTests))
			inputs.push_back("early_fragment_tests");
		if (execution.flags & (1ull << ExecutionModeDepthGreater))
			inputs.push_back("depth_greater");
		if (execution.flags & (1ull << ExecutionModeDepthLess))
			inputs.push_back("depth_less");

		break;

	default:
		break;
	}

	if (!inputs.empty())
		statement("layout(", merge(inputs), ") in;");
	if (!outputs.empty())
		statement("layout(", merge(outputs), ") out;");

	statement("");
}

void CompilerGLSL::emit_struct(const SPIRType &type)
{
	// Struct types can be stamped out multiple times
	// with just different offsets, matrix layouts, etc ...
	// Type-punning with these types is legal, which complicates things
	// when we are storing struct and array types in an SSBO for example.
	if (type.type_alias != 0)
		return;

	auto name = type_to_glsl(type);

	statement("struct ", name);
	begin_scope();

	uint32_t i = 0;
	bool emitted = false;
	for (auto &member : type.member_types)
	{
		auto &membertype = get<SPIRType>(member);
		statement(member_decl(type, membertype, i), ";");
		i++;
		emitted = true;
	}
	end_scope_decl();

	if (emitted)
		statement("");
}

uint64_t CompilerGLSL::combined_decoration_for_member(const SPIRType &type, uint32_t index)
{
	uint64_t flags = 0;
	auto &memb = meta[type.self].members;
	if (index >= memb.size())
		return 0;
	auto &dec = memb[index];

	// If our type is a sturct, traverse all the members as well recursively.
	flags |= dec.decoration_flags;
	for (uint32_t i = 0; i < type.member_types.size(); i++)
		flags |= combined_decoration_for_member(get<SPIRType>(type.member_types[i]), i);

	return flags;
}

string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
{
	bool is_block = (meta[type.self].decoration.decoration_flags &
	                 ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0;
	if (!is_block)
		return "";

	auto &memb = meta[type.self].members;
	if (index >= memb.size())
		return 0;
	auto &dec = memb[index];

	vector<string> attr;

	// We can only apply layouts on members in block interfaces.
	// This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
	// This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
	// has a decoration, it was originally caused by a top-level layout() qualifier in GLSL.
	//
	// We would like to go from (SPIR-V style):
	//
	// struct Foo { layout(row_major) mat4 matrix; };
	// buffer UBO { Foo foo; };
	//
	// to
	//
	// struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations.
	// buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
	auto flags = combined_decoration_for_member(type, index);

	if (flags & (1ull << DecorationRowMajor))
		attr.push_back("row_major");
	// We don't emit any global layouts, so column_major is default.
	//if (flags & (1ull << DecorationColMajor))
	//    attr.push_back("column_major");

	if (dec.decoration_flags & (1ull << DecorationLocation))
		attr.push_back(join("location = ", dec.location));

	if (attr.empty())
		return "";

	string res = "layout(";
	res += merge(attr);
	res += ") ";
	return res;
}

const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format)
{
	// Only handle GLES 3.1 compliant types for now ...
	switch (format)
	{
	case ImageFormatRgba32f:
		return "rgba32f";
	case ImageFormatRgba16f:
		return "rgba16f";
	case ImageFormatR32f:
		return "r32f";
	case ImageFormatRgba8:
		return "rgba8";
	case ImageFormatRgba8Snorm:
		return "rgba8_snorm";
	case ImageFormatRg32f:
		return "rg32f";
	case ImageFormatRg16f:
		return "rg16f";

	case ImageFormatRgba32i:
		return "rgba32i";
	case ImageFormatRgba16i:
		return "rgba16i";
	case ImageFormatR32i:
		return "r32i";
	case ImageFormatRgba8i:
		return "rgba8i";
	case ImageFormatRg32i:
		return "rg32i";
	case ImageFormatRg16i:
		return "rg16i";

	case ImageFormatRgba32ui:
		return "rgba32ui";
	case ImageFormatRgba16ui:
		return "rgba16ui";
	case ImageFormatR32ui:
		return "r32ui";
	case ImageFormatRgba8ui:
		return "rgba8ui";
	case ImageFormatRg32ui:
		return "rg32ui";
	case ImageFormatRg16ui:
		return "rg16ui";

	case ImageFormatUnknown:
		return nullptr;
	default:
		return "UNSUPPORTED"; // TODO: Fill in rest.
	}
}

uint32_t CompilerGLSL::type_to_std430_alignment(const SPIRType &type, uint64_t flags)
{
	// float, int and uint all take 4 bytes.
	const uint32_t base_alignment = 4;

	if (type.basetype == SPIRType::Struct)
	{
		// Rule 9. Structs alignments are maximum alignment of its members.
		uint32_t alignment = 0;
		for (uint32_t i = 0; i < type.member_types.size(); i++)
		{
			auto member_flags = meta[type.self].members.at(i).decoration_flags;
			alignment = max(alignment, type_to_std430_alignment(get<SPIRType>(type.member_types[i]), member_flags));
		}

		return alignment;
	}
	else
	{
		// From 7.6.2.2 in GL 4.5 core spec.
		// Rule 1
		if (type.vecsize == 1 && type.columns == 1)
			return base_alignment;

		// Rule 2
		if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1)
			return type.vecsize * base_alignment;

		// Rule 3
		if (type.vecsize == 3 && type.columns == 1)
			return 4 * base_alignment;

		// Rule 4 implied. Alignment does not change in std430.

		// Rule 5. Column-major matrices are stored as arrays of
		// vectors.
		if ((flags & (1ull << DecorationColMajor)) && type.columns > 1)
		{
			if (type.vecsize == 3)
				return 4 * base_alignment;
			else
				return type.vecsize * base_alignment;
		}

		// Rule 6 implied.

		// Rule 7.
		if ((flags & (1ull << DecorationRowMajor)) && type.vecsize > 1)
		{
			if (type.columns == 3)
				return 4 * base_alignment;
			else
				return type.columns * base_alignment;
		}

		// Rule 8 implied.
	}

	throw CompilerError("Did not find suitable std430 rule for type. Bogus decorations?");
}

uint32_t CompilerGLSL::type_to_std430_array_stride(const SPIRType &type, uint64_t flags)
{
	// Array stride is equal to aligned size of the underlying type.
	SPIRType tmp = type;
	tmp.array.pop_back();
	uint32_t size = type_to_std430_size(tmp, flags);
	uint32_t alignment = type_to_std430_alignment(tmp, flags);
	return (size + alignment - 1) & ~(alignment - 1);
}

uint32_t CompilerGLSL::type_to_std430_size(const SPIRType &type, uint64_t flags)
{
	if (!type.array.empty())
		return type.array.back() * type_to_std430_array_stride(type, flags);

	// float, int and uint all take 4 bytes.
	const uint32_t base_alignment = 4;
	uint32_t size = 0;

	if (type.basetype == SPIRType::Struct)
	{
		uint32_t pad_alignment = 1;

		for (uint32_t i = 0; i < type.member_types.size(); i++)
		{
			auto member_flags = meta[type.self].members.at(i).decoration_flags;
			auto &member_type = get<SPIRType>(type.member_types[i]);

			uint32_t std430_alignment = type_to_std430_alignment(member_type, member_flags);
			uint32_t alignment = max(std430_alignment, pad_alignment);

			// The next member following a struct member is aligned to the base alignment of the struct that came before.
			// GL 4.5 spec, 7.6.2.2.
			if (member_type.basetype == SPIRType::Struct)
				pad_alignment = std430_alignment;
			else
				pad_alignment = 1;

			size = (size + alignment - 1) & ~(alignment - 1);
			size += type_to_std430_size(member_type, member_flags);
		}
	}
	else
	{
		if (type.columns == 1)
			size = type.vecsize * base_alignment;

		if ((flags & (1ull << DecorationColMajor)) && type.columns > 1)
		{
			if (type.vecsize == 3)
				size = type.columns * 4 * base_alignment;
			else
				size = type.columns * type.vecsize * base_alignment;
		}

		if ((flags & (1ull << DecorationRowMajor)) && type.vecsize > 1)
		{
			if (type.columns == 3)
				size = type.vecsize * 4 * base_alignment;
			else
				size = type.vecsize * type.columns * base_alignment;
		}
	}

	return size;
}

bool CompilerGLSL::ssbo_is_std430_packing(const SPIRType &type)
{
	// This is very tricky and error prone, but try to be exhaustive and correct here.
	// SPIR-V doesn't directly say if we're using std430 or std140.
	// SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters),
	// so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information.
	// We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing).
	//
	// It is almost certain that we're using std430, but it gets tricky with arrays in particular.
	// We will assume std430, but infer std140 if we can prove the struct is not compliant with std430.
	//
	// The only two differences between std140 and std430 are related to padding alignment/array stride
	// in arrays and structs. In std140 they take minimum vec4 alignment.
	// std430 only removes the vec4 requirement.

	uint32_t offset = 0;
	uint32_t pad_alignment = 1;

	for (uint32_t i = 0; i < type.member_types.size(); i++)
	{
		auto &memb_type = get<SPIRType>(type.member_types[i]);
		auto member_flags = meta[type.self].members.at(i).decoration_flags;

		// Verify alignment rules.
		uint32_t std430_alignment = type_to_std430_alignment(memb_type, member_flags);
		uint32_t alignment = max(std430_alignment, pad_alignment);
		offset = (offset + alignment - 1) & ~(alignment - 1);

		// The next member following a struct member is aligned to the base alignment of the struct that came before.
		// GL 4.5 spec, 7.6.2.2.
		if (memb_type.basetype == SPIRType::Struct)
			pad_alignment = std430_alignment;
		else
			pad_alignment = 1;

		uint32_t actual_offset = type_struct_member_offset(type, i);
		if (actual_offset != offset) // This cannot be std430.
			return false;

		// Verify array stride rules.
		if (!memb_type.array.empty() &&
		    type_to_std430_array_stride(memb_type, member_flags) != type_struct_member_array_stride(type, i))
			return false;

		// Verify that sub-structs also follow std430 rules.
		if (!memb_type.member_types.empty() && !ssbo_is_std430_packing(memb_type))
			return false;

		// Bump size.
		offset += type_to_std430_size(memb_type, member_flags);
	}

	return true;
}

string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
{
	vector<string> attr;

	auto &dec = meta[var.self].decoration;
	auto &type = get<SPIRType>(var.basetype);
	auto flags = dec.decoration_flags;
	auto typeflags = meta[type.self].decoration.decoration_flags;

	if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
		attr.push_back("push_constant");

	if (flags & (1ull << DecorationRowMajor))
		attr.push_back("row_major");
	if (flags & (1ull << DecorationColMajor))
		attr.push_back("column_major");

	if (options.vulkan_semantics)
	{
		if (flags & (1ull << DecorationInputAttachmentIndex))
			attr.push_back(join("input_attachment_index = ", dec.input_attachment));
	}

	if (flags & (1ull << DecorationLocation))
		attr.push_back(join("location = ", dec.location));

	// set = 0 is the default. Do not emit set = decoration in regular GLSL output, but
	// we should preserve it in Vulkan GLSL mode.
	if (var.storage != StorageClassPushConstant)
	{
		if ((flags & (1ull << DecorationDescriptorSet)) && (dec.set != 0 || options.vulkan_semantics))
			attr.push_back(join("set = ", dec.set));
	}

	if (flags & (1ull << DecorationBinding))
		attr.push_back(join("binding = ", dec.binding));
	if (flags & (1ull << DecorationCoherent))
		attr.push_back("coherent");
	if (flags & (1ull << DecorationOffset))
		attr.push_back(join("offset = ", dec.offset));

	// Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
	// If SPIR-V does not comply with either layout, we cannot really work around it.
	if (var.storage == StorageClassUniform && (typeflags & (1ull << DecorationBlock)))
		attr.push_back("std140");
	else if (var.storage == StorageClassUniform && (typeflags & (1ull << DecorationBufferBlock)))
		attr.push_back(ssbo_is_std430_packing(type) ? "std430" : "std140");
	else if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
		attr.push_back(ssbo_is_std430_packing(type) ? "std430" : "std140");

	// For images, the type itself adds a layout qualifer.
	if (type.basetype == SPIRType::Image)
	{
		const char *fmt = format_to_glsl(type.image.format);
		if (fmt)
			attr.push_back(fmt);
	}

	if (attr.empty())
		return "";

	string res = "layout(";
	res += merge(attr);
	res += ") ";
	return res;
}

void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
{
	if (options.vulkan_semantics)
		emit_push_constant_block_vulkan(var);
	else
		emit_push_constant_block_glsl(var);
}

void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var)
{
	emit_buffer_block(var);
}

void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
{
	// OpenGL has no concept of push constant blocks, implement it as a uniform struct.
	auto &type = get<SPIRType>(var.basetype);

	auto &flags = meta[var.self].decoration.decoration_flags;
	flags &= ~((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet));

#if 0
    if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
        throw CompilerError("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
                            "Remap to location with reflection API first or disable these decorations.");
#endif

	// We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
	// Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
	auto &block_flags = meta[type.self].decoration.decoration_flags;
	uint64_t block_flag = block_flags & (1ull << DecorationBlock);
	block_flags &= ~block_flag;

	emit_struct(type);

	block_flags |= block_flag;

	emit_uniform(var);
	statement("");
}

void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
{
	auto &type = get<SPIRType>(var.basetype);
	auto ssbo = meta[type.self].decoration.decoration_flags & (1ull << DecorationBufferBlock);

	// Block names should never alias.
	auto buffer_name = to_name(type.self, false);

	statement(layout_for_variable(var) + (ssbo ? "buffer " : "uniform ") + buffer_name);
	begin_scope();

	uint32_t i = 0;
	for (auto &member : type.member_types)
	{
		auto &membertype = get<SPIRType>(member);
		statement(member_decl(type, membertype, i), ";");
		i++;
	}

	end_scope_decl(to_name(var.self) + type_to_array_glsl(type));
	statement("");
}

void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
{
	auto &type = get<SPIRType>(var.basetype);

	// Either make it plain in/out or in/out blocks depending on what shader is doing ...
	bool block = (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock)) != 0;

	const char *qual = nullptr;
	if (is_legacy() && execution.model == ExecutionModelVertex)
		qual = var.storage == StorageClassInput ? "attribute " : "varying ";
	else if (is_legacy() && execution.model == ExecutionModelFragment)
		qual = "varying "; // Fragment outputs are renamed so they never hit this case.
	else
		qual = var.storage == StorageClassInput ? "in " : "out ";

	if (block)
	{
		// Block names should never alias.
		statement(layout_for_variable(var), qual, to_name(type.self, false));
		begin_scope();

		uint32_t i = 0;
		for (auto &member : type.member_types)
		{
			auto &membertype = get<SPIRType>(member);
			statement(member_decl(type, membertype, i), ";");
			i++;
		}

		end_scope_decl(join(to_name(var.self), type_to_array_glsl(type)));
		statement("");
	}
	else
	{
		statement(layout_for_variable(var), qual, variable_decl(var), ";");
	}
}

void CompilerGLSL::emit_uniform(const SPIRVariable &var)
{
	auto &type = get<SPIRType>(var.basetype);
	if (type.basetype == SPIRType::Image)
	{
		if (!options.es && options.version < 420)
			require_extension("GL_ARB_shader_image_load_store");
		else if (options.es && options.version < 310)
			throw CompilerError("At least ESSL 3.10 required for shader image load store.");
	}

	statement(layout_for_variable(var), "uniform ", variable_decl(var), ";");
}

void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
{
	auto &m = meta[var.self].decoration;
	uint32_t location = 0;
	if (m.decoration_flags & (1ull << DecorationLocation))
		location = m.location;

	m.alias = join("gl_FragData[", location, "]");
	var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is.
}

void CompilerGLSL::replace_fragment_outputs()
{
	for (auto &id : ids)
	{
		if (id.get_type() == TypeVariable)
		{
			auto &var = id.get<SPIRVariable>();
			auto &type = get<SPIRType>(var.basetype);

			if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer &&
			    var.storage == StorageClassOutput)
				replace_fragment_output(var);
		}
	}
}

string CompilerGLSL::remap_swizzle(uint32_t result_type, uint32_t input_components, uint32_t expr)
{
	auto &out_type = get<SPIRType>(result_type);

	if (out_type.vecsize == input_components)
		return to_expression(expr);
	else if (input_components == 1)
		return join(type_to_glsl(out_type), "(", to_expression(expr), ")");
	else
	{
		auto e = to_expression(expr) + ".";
		// Just clamp the swizzle index if we have more outputs than inputs.
		for (uint32_t c = 0; c < out_type.vecsize; c++)
			e += index_to_swizzle(min(c, input_components - 1));
		if (backend.swizzle_is_function && out_type.vecsize > 1)
			e += "()";
		return e;
	}
}

void CompilerGLSL::emit_pls()
{
	if (execution.model != ExecutionModelFragment)
		throw CompilerError("Pixel local storage only supported in fragment shaders.");

	if (!options.es)
		throw CompilerError("Pixel local storage only supported in OpenGL ES.");

	if (options.version < 300)
		throw CompilerError("Pixel local storage only supported in ESSL 3.0 and above.");

	if (!pls_inputs.empty())
	{
		statement("__pixel_local_inEXT _PLSIn");
		begin_scope();
		for (auto &input : pls_inputs)
			statement(pls_decl(input), ";");
		end_scope_decl();
		statement("");
	}

	if (!pls_outputs.empty())
	{
		statement("__pixel_local_outEXT _PLSOut");
		begin_scope();
		for (auto &output : pls_outputs)
			statement(pls_decl(output), ";");
		end_scope_decl();
		statement("");
	}
}

void CompilerGLSL::emit_resources()
{
	// Legacy GL uses gl_FragData[], redeclare all fragment outputs
	// with builtins.
	if (execution.model == ExecutionModelFragment && is_legacy())
		replace_fragment_outputs();

	// Emit PLS blocks if we have such variables.
	if (!pls_inputs.empty() || !pls_outputs.empty())
		emit_pls();

	// Output all basic struct types which are not Block or BufferBlock as these are declared inplace
	// when such variables are instantiated.
	for (auto &id : ids)
	{
		if (id.get_type() == TypeType)
		{
			auto &type = id.get<SPIRType>();
			if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer &&
			    (meta[type.self].decoration.decoration_flags &
			     ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) == 0)
			{
				emit_struct(type);
			}
		}
	}

	// Output UBOs and SSBOs
	for (auto &id : ids)
	{
		if (id.get_type() == TypeVariable)
		{
			auto &var = id.get<SPIRVariable>();
			auto &type = get<SPIRType>(var.basetype);

			if (type.pointer && type.storage == StorageClassUniform && !is_builtin_variable(var) &&
			    (meta[type.self].decoration.decoration_flags &
			     ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))))
			{
				emit_buffer_block(var);
			}
		}
	}

	// Output push constant blocks
	for (auto &id : ids)
	{
		if (id.get_type() == TypeVariable)
		{
			auto &var = id.get<SPIRVariable>();
			auto &type = get<SPIRType>(var.basetype);
			if (type.pointer && type.storage == StorageClassPushConstant)
				emit_push_constant_block(var);
		}
	}

	bool emitted = false;

	// Output Uniform Constants (values, samplers, images, etc).
	for (auto &id : ids)
	{
		if (id.get_type() == TypeVariable)
		{
			auto &var = id.get<SPIRVariable>();
			auto &type = get<SPIRType>(var.basetype);

			if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer &&
			    (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter))
			{
				emit_uniform(var);
				emitted = true;
			}
		}
	}

	if (emitted)
		statement("");
	emitted = false;

	// Output in/out interfaces.
	for (auto &id : ids)
	{
		if (id.get_type() == TypeVariable)
		{
			auto &var = id.get<SPIRVariable>();
			auto &type = get<SPIRType>(var.basetype);

			if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer &&
			    (var.storage == StorageClassInput || var.storage == StorageClassOutput))
			{
				emit_interface_block(var);
				emitted = true;
			}
			else if (is_builtin_variable(var))
			{
				// For gl_InstanceIndex emulation on GLES, the API user needs to
				// supply this uniform.
				if (meta[var.self].decoration.builtin_type == BuiltInInstanceIndex && !options.vulkan_semantics)
				{
					statement("uniform int SPIRV_Cross_BaseInstance;");
					emitted = true;
				}
			}
		}
	}

	// Global variables.
	for (auto global : global_variables)
	{
		auto &var = get<SPIRVariable>(global);
		if (var.storage != StorageClassOutput)
		{
			statement(variable_decl(var), ";");
			emitted = true;
		}
	}

	if (emitted)
		statement("");
}

string CompilerGLSL::to_expression(uint32_t id)
{
	auto itr = invalid_expressions.find(id);
	if (itr != end(invalid_expressions))
	{
		auto &expr = get<SPIRExpression>(id);

		// This expression has been invalidated in the past.
		// Be careful with this expression next pass ...
		// Used for OpCompositeInsert forwarding atm.
		expr.used_while_invalidated = true;

		// We tried to read an invalidated expression.
		// This means we need another pass at compilation, but next time, do not try to forward
		// the variables which caused invalidation to happen in the first place.
		for (auto var : expr.invalidated_by)
		{
			//fprintf(stderr, "Expression %u was invalidated due to variable %u being invalid at read time!\n", id, var);
			get<SPIRVariable>(var).forwardable = false;
		}

		if (expr.invalidated_by.empty() && expr.loaded_from)
		{
			//fprintf(stderr, "Expression %u was invalidated due to variable %u being invalid at read time!\n", id, expr.loaded_from);
			get<SPIRVariable>(expr.loaded_from).forwardable = false;
		}
		force_recompile = true;
	}

	track_expression_read(id);

	switch (ids[id].get_type())
	{
	case TypeExpression:
	{
		auto &e = get<SPIRExpression>(id);
		if (e.base_expression)
			return to_expression(e.base_expression) + e.expression;
		else
			return e.expression;
	}

	case TypeConstant:
		return constant_expression(get<SPIRConstant>(id));

	case TypeVariable:
	{
		auto &var = get<SPIRVariable>(id);
		if (var.statically_assigned)
			return to_expression(var.static_expression);
		else if (var.deferred_declaration)
		{
			var.deferred_declaration = false;
			return variable_decl(var);
		}
		else
		{
			auto &dec = meta[var.self].decoration;
			if (dec.builtin)
				return builtin_to_glsl(dec.builtin_type);
			else
				return to_name(id);
		}
	}

	default:
		return to_name(id);
	}
}

string CompilerGLSL::constant_expression(const SPIRConstant &c)
{
	if (!c.subconstants.empty())
	{
		// Handles Arrays and structures.
		string res = type_to_glsl_constructor(get<SPIRType>(c.constant_type)) + "(";
		for (auto &elem : c.subconstants)
		{
			res += constant_expression(get<SPIRConstant>(elem));
			if (&elem != &c.subconstants.back())
				res += ", ";
		}
		res += ")";
		return res;
	}
	else if (c.columns() == 1)
	{
		return constant_expression_vector(c, 0);
	}
	else
	{
		string res = type_to_glsl(get<SPIRType>(c.constant_type)) + "(";
		for (uint32_t col = 0; col < c.columns(); col++)
		{
			res += constant_expression_vector(c, col);
			if (col + 1 < c.columns())
				res += ", ";
		}
		res += ")";
		return res;
	}
}

string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
{
	auto type = get<SPIRType>(c.constant_type);
	type.columns = 1;

	string res;
	if (c.vector_size() > 1)
		res += type_to_glsl(type) + "(";

	bool splat = c.vector_size() > 1;
	if (splat)
	{
		uint32_t ident = c.scalar(vector, 0);
		for (uint32_t i = 1; i < c.vector_size(); i++)
			if (ident != c.scalar(vector, i))
				splat = false;
	}

	switch (type.basetype)
	{
	case SPIRType::Float:
		if (splat)
		{
			res += convert_to_string(c.scalar_f32(vector, 0));
			if (backend.float_literal_suffix)
				res += "f";
		}
		else
		{
			for (uint32_t i = 0; i < c.vector_size(); i++)
			{
				res += convert_to_string(c.scalar_f32(vector, i));
				if (backend.float_literal_suffix)
					res += "f";
				if (i + 1 < c.vector_size())
					res += ", ";
			}
		}
		break;

	case SPIRType::UInt:
		if (splat)
		{
			res += convert_to_string(c.scalar(vector, 0));
			if (backend.uint32_t_literal_suffix)
				res += "u";
		}
		else
		{
			for (uint32_t i = 0; i < c.vector_size(); i++)
			{
				res += convert_to_string(c.scalar(vector, i));
				if (backend.uint32_t_literal_suffix)
					res += "u";
				if (i + 1 < c.vector_size())
					res += ", ";
			}
		}
		break;

	case SPIRType::Int:
		if (splat)
			res += convert_to_string(c.scalar_i32(vector, 0));
		else
		{
			for (uint32_t i = 0; i < c.vector_size(); i++)
			{
				res += convert_to_string(c.scalar_i32(vector, i));
				if (i + 1 < c.vector_size())
					res += ", ";
			}
		}
		break;

	case SPIRType::Bool:
		if (splat)
			res += c.scalar(vector, 0) ? "true" : "false";
		else
		{
			for (uint32_t i = 0; i < c.vector_size(); i++)
			{
				res += c.scalar(vector, i) ? "true" : "false";
				if (i + 1 < c.vector_size())
					res += ", ";
			}
		}
		break;

	default:
		throw CompilerError("Invalid constant expression basetype.");
	}

	if (c.vector_size() > 1)
		res += ")";

	return res;
}

string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
{
	auto &type = get<SPIRType>(result_type);
	auto flags = meta[result_id].decoration.decoration_flags;

	// If we're declaring temporaries inside continue blocks,
	// we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
	if (current_continue_block)
	{
		auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
		if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
		            [result_type, result_id](const pair<uint32_t, uint32_t> &tmp)
		            {
			            return tmp.first == result_type && tmp.second == result_id;
			        }) == end(header.declare_temporary))
		{
			header.declare_temporary.emplace_back(result_type, result_id);
			force_recompile = true;
		}

		return join(to_name(result_id), " = ");
	}
	else
	{
		// The result_id has not been made into an expression yet, so use flags interface.
		return join(flags_to_precision_qualifiers_glsl(type, flags), type_to_glsl(type), " ", to_name(result_id),
		            type_to_array_glsl(type), " = ");
	}
}

bool CompilerGLSL::expression_is_forwarded(uint32_t id)
{
	return forwarded_temporaries.find(id) != end(forwarded_temporaries);
}

SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
                                      bool extra_parens, bool suppress_usage_tracking)
{
	if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
	{
		// Just forward it without temporary.
		// If the forward is trivial, we do not force flushing to temporary for this expression.
		if (!suppress_usage_tracking)
			forwarded_temporaries.insert(result_id);

		if (extra_parens)
			return set<SPIRExpression>(result_id, join("(", rhs, ")"), result_type, true);
		else
			return set<SPIRExpression>(result_id, rhs, result_type, true);
	}
	else
	{
		// If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
		statement(declare_temporary(result_type, result_id), rhs, ";");
		return set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
	}
}

void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
{
	emit_op(result_type, result_id, join(op, to_expression(op0)), should_forward(op0), true);
}

void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
{
	emit_op(result_type, result_id, join(to_expression(op0), " ", op, " ", to_expression(op1)),
	        should_forward(op0) && should_forward(op1), true);
}

SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
                                                uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
{
	auto &type0 = expression_type(op0);
	auto &type1 = expression_type(op1);

	// We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
	// For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
	// since equality test is exactly the same.
	bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);

	// Create a fake type so we can bitcast to it.
	// We only deal with regular arithmetic types here like int, uints and so on.
	SPIRType expected_type;
	expected_type.basetype = input_type;
	expected_type.vecsize = type0.vecsize;
	expected_type.columns = type0.columns;
	expected_type.width = type0.width;

	if (cast)
	{
		cast_op0 = bitcast_glsl(expected_type, op0);
		cast_op1 = bitcast_glsl(expected_type, op1);
	}
	else
	{
		// If we don't cast, our actual input type is that of the first (or second) argument.
		cast_op0 = to_expression(op0);
		cast_op1 = to_expression(op1);
		input_type = type0.basetype;
	}

	return expected_type;
}

void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
                                       const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
{
	string cast_op0, cast_op1;
	auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
	auto &out_type = get<SPIRType>(result_type);

	// We might have casted away from the result type, so bitcast again.
	// For example, arithmetic right shift with uint inputs.
	// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
	bool extra_parens = true;
	string expr;
	if (out_type.basetype != input_type && out_type.basetype != SPIRType::Bool)
	{
		expected_type.basetype = input_type;
		expr = bitcast_glsl_op(out_type, expected_type);
		expr += '(';
		expr += join(cast_op0, " ", op, " ", cast_op1);
		expr += ')';
		extra_parens = false;
	}
	else
	{
		expr += join(cast_op0, " ", op, " ", cast_op1);
	}

	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1), extra_parens);
}

void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
{
	emit_op(result_type, result_id, join(op, "(", to_expression(op0), ")"), should_forward(op0), false);
}

void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
                                       const char *op)
{
	emit_op(result_type, result_id, join(op, "(", to_expression(op0), ", ", to_expression(op1), ")"),
	        should_forward(op0) && should_forward(op1), false);
}

void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
                                            const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
{
	string cast_op0, cast_op1;
	auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
	auto &out_type = get<SPIRType>(result_type);

	// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
	string expr;
	if (out_type.basetype != input_type && out_type.basetype != SPIRType::Bool)
	{
		expected_type.basetype = input_type;
		expr = bitcast_glsl_op(out_type, expected_type);
		expr += '(';
		expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
		expr += ')';
	}
	else
	{
		expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
	}

	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1), false);
}

void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
                                        uint32_t op2, const char *op)
{
	emit_op(result_type, result_id,
	        join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(op2), ")"),
	        should_forward(op0) && should_forward(op1) && should_forward(op2), false);
}

void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
                                           uint32_t op2, uint32_t op3, const char *op)
{
	emit_op(result_type, result_id, join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ",
	                                     to_expression(op2), ", ", to_expression(op3), ")"),
	        should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3), false);
}

string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype)
{
	const char *type;
	switch (imgtype.image.dim)
	{
	case spv::Dim1D:
		type = "1D";
		break;
	case spv::Dim2D:
		type = "2D";
		break;
	case spv::Dim3D:
		type = "3D";
		break;
	case spv::DimCube:
		type = "Cube";
		break;
	case spv::DimBuffer:
		type = "Buffer";
		break;
	case spv::DimSubpassData:
		type = "2D";
		break;
	default:
		type = "";
		break;
	}

	if (op == "texture")
		return join("texture", type);
	else if (op == "textureLod")
		return join("texture", type, "Lod");
	else if (op == "textureProj")
		return join("texture", type, "Proj");
	else if (op == "textureProjLod")
		return join("texture", type, "ProjLod");
	else
		throw CompilerError(join("Unsupported legacy texture op: ", op));
}

void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
{
	auto &lerptype = expression_type(lerp);
	auto &restype = get<SPIRType>(result_type);

	bool has_boolean_mix = (options.es && options.version >= 310) || (!options.es && options.version >= 450);

	// Boolean mix not supported on desktop without extension.
	// Was added in OpenGL 4.5 with ES 3.1 compat.
	if (!has_boolean_mix && lerptype.basetype == SPIRType::Bool)
	{
		// Could use GL_EXT_shader_integer_mix on desktop at least,
		// but Apple doesn't support it. :(
		// Just implement it as ternary expressions.
		string expr;
		if (lerptype.vecsize == 1)
			expr = join(to_expression(lerp), " ? ", to_expression(right), " : ", to_expression(left));
		else
		{
			auto swiz = [this](uint32_t expression, uint32_t i)
			{
				return join(to_expression(expression), ".", index_to_swizzle(i));
			};

			expr = type_to_glsl_constructor(restype);
			expr += "(";
			for (uint32_t i = 0; i < restype.vecsize; i++)
			{
				expr += swiz(lerp, i);
				expr += " ? ";
				expr += swiz(right, i);
				expr += " : ";
				expr += swiz(left, i);
				if (i + 1 < restype.vecsize)
					expr += ", ";
			}
			expr += ")";
		}

		emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp), false);
	}
	else
		emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
}

void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
{
	emit_binary_func_op(result_type, result_id, image_id, samp_id, type_to_glsl(get<SPIRType>(result_type)).c_str());
}

void CompilerGLSL::emit_texture_op(const Instruction &i)
{
	auto ops = stream(i);
	auto op = static_cast<Op>(i.op);
	uint32_t length = i.length;

	if (i.offset + length > spirv.size())
		throw CompilerError("Compiler::parse() opcode out of range.");

	uint32_t result_type = ops[0];
	uint32_t id = ops[1];
	uint32_t img = ops[2];
	uint32_t coord = ops[3];
	uint32_t dref = 0;
	uint32_t comp = 0;
	bool gather = false;
	bool proj = false;
	const uint32_t *opt = nullptr;

	switch (op)
	{
	case OpImageSampleDrefImplicitLod:
	case OpImageSampleDrefExplicitLod:
		dref = ops[4];
		opt = &ops[5];
		length -= 5;
		break;

	case OpImageSampleProjDrefImplicitLod:
	case OpImageSampleProjDrefExplicitLod:
		dref = ops[4];
		proj = true;
		opt = &ops[5];
		length -= 5;
		break;

	case OpImageDrefGather:
		dref = ops[4];
		opt = &ops[5];
		gather = true;
		length -= 5;
		break;

	case OpImageGather:
		comp = ops[4];
		opt = &ops[5];
		gather = true;
		length -= 5;
		break;

	case OpImageSampleProjImplicitLod:
	case OpImageSampleProjExplicitLod:
		opt = &ops[4];
		length -= 4;
		proj = true;
		break;

	default:
		opt = &ops[4];
		length -= 4;
		break;
	}

	auto &imgtype = expression_type(img);
	uint32_t coord_components = 0;
	switch (imgtype.image.dim)
	{
	case spv::Dim1D:
		coord_components = 1;
		break;
	case spv::Dim2D:
		coord_components = 2;
		break;
	case spv::Dim3D:
		coord_components = 3;
		break;
	case spv::DimCube:
		coord_components = 3;
		break;
	case spv::DimBuffer:
		coord_components = 1;
		break;
	default:
		coord_components = 2;
		break;
	}

	if (proj)
		coord_components++;
	if (imgtype.image.arrayed)
		coord_components++;

	uint32_t bias = 0;
	uint32_t lod = 0;
	uint32_t grad_x = 0;
	uint32_t grad_y = 0;
	uint32_t coffset = 0;
	uint32_t offset = 0;
	uint32_t coffsets = 0;
	uint32_t sample = 0;
	uint32_t flags = 0;

	if (length)
	{
		flags = opt[0];
		opt++;
		length--;
	}

	auto test = [&](uint32_t &v, uint32_t flag)
	{
		if (length && (flags & flag))
		{
			v = *opt++;
			length--;
		}
	};

	test(bias, ImageOperandsBiasMask);
	test(lod, ImageOperandsLodMask);
	test(grad_x, ImageOperandsGradMask);
	test(grad_y, ImageOperandsGradMask);
	test(coffset, ImageOperandsConstOffsetMask);
	test(offset, ImageOperandsOffsetMask);
	test(coffsets, ImageOperandsConstOffsetsMask);
	test(sample, ImageOperandsSampleMask);

	string expr;
	string texop;

	if (op == OpImageFetch)
		texop += "texelFetch";
	else
	{
		texop += "texture";

		if (gather)
			texop += "Gather";
		if (coffsets)
			texop += "Offsets";
		if (proj)
			texop += "Proj";
		if (grad_x || grad_y)
			texop += "Grad";
		if (lod)
			texop += "Lod";
	}

	if (coffset || offset)
		texop += "Offset";

	if (is_legacy())
		texop = legacy_tex_op(texop, imgtype);

	expr += texop;
	expr += "(";
	expr += to_expression(img);

	bool swizz_func = backend.swizzle_is_function;
	auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char *
	{
		if (comps == in_comps)
			return "";

		switch (comps)
		{
		case 1:
			return ".x";
		case 2:
			return swizz_func ? ".xy()" : ".xy";
		case 3:
			return swizz_func ? ".xyz()" : ".xyz";
		default:
			return "";
		}
	};

	bool forward = should_forward(coord);

	// The IR can give us more components than we need, so chop them off as needed.
	auto coord_expr = to_expression(coord) + swizzle(coord_components, expression_type(coord).vecsize);

	// TODO: implement rest ... A bit intensive.

	if (dref)
	{
		forward = forward && should_forward(dref);

		// SPIR-V splits dref and coordinate.
		if (coord_components == 4) // GLSL also splits the arguments in two.
		{
			expr += ", ";
			expr += to_expression(coord);
			expr += ", ";
			expr += to_expression(dref);
		}
		else
		{
			// Create a composite which merges coord/dref into a single vector.
			auto type = expression_type(coord);
			type.vecsize = coord_components + 1;
			expr += ", ";
			expr += type_to_glsl_constructor(type);
			expr += "(";
			expr += coord_expr;
			expr += ", ";
			expr += to_expression(dref);
			expr += ")";
		}
	}
	else
	{
		expr += ", ";
		expr += coord_expr;
	}

	if (grad_x || grad_y)
	{
		forward = forward && should_forward(grad_x);
		forward = forward && should_forward(grad_y);
		expr += ", ";
		expr += to_expression(grad_x);
		expr += ", ";
		expr += to_expression(grad_y);
	}

	if (lod)
	{
		forward = forward && should_forward(lod);
		expr += ", ";
		expr += to_expression(lod);
	}

	if (coffset)
	{
		forward = forward && should_forward(coffset);
		expr += ", ";
		expr += to_expression(coffset);
	}
	else if (offset)
	{
		forward = forward && should_forward(offset);
		expr += ", ";
		expr += to_expression(offset);
	}

	if (bias)
	{
		forward = forward && should_forward(bias);
		expr += ", ";
		expr += to_expression(bias);
	}

	if (comp)
	{
		forward = forward && should_forward(comp);
		expr += ", ";
		expr += to_expression(comp);
	}

	expr += ")";

	emit_op(result_type, id, expr, forward, false);
}

void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t)
{
	GLSLstd450 op = static_cast<GLSLstd450>(eop);

	switch (op)
	{
	// FP fiddling
	case GLSLstd450Round:
	case GLSLstd450RoundEven:
		emit_unary_func_op(result_type, id, args[0], "round");
		break;
	case GLSLstd450Trunc:
		emit_unary_func_op(result_type, id, args[0], "trunc");
		break;
	case GLSLstd450SAbs:
	case GLSLstd450FAbs:
		emit_unary_func_op(result_type, id, args[0], "abs");
		break;
	case GLSLstd450SSign:
	case GLSLstd450FSign:
		emit_unary_func_op(result_type, id, args[0], "sign");
		break;
	case GLSLstd450Floor:
		emit_unary_func_op(result_type, id, args[0], "floor");
		break;
	case GLSLstd450Ceil:
		emit_unary_func_op(result_type, id, args[0], "ceil");
		break;
	case GLSLstd450Fract:
		emit_unary_func_op(result_type, id, args[0], "fract");
		break;
	case GLSLstd450Radians:
		emit_unary_func_op(result_type, id, args[0], "radians");
		break;
	case GLSLstd450Degrees:
		emit_unary_func_op(result_type, id, args[0], "degrees");
		break;
	case GLSLstd450Fma:
		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma");
		break;
	case GLSLstd450Modf:
		register_call_out_argument(args[1]);
		forced_temporaries.insert(id);
		emit_binary_func_op(result_type, id, args[0], args[1], "modf");
		break;

	// Minmax
	case GLSLstd450FMin:
	case GLSLstd450UMin:
	case GLSLstd450SMin:
		emit_binary_func_op(result_type, id, args[0], args[1], "min");
		break;
	case GLSLstd450FMax:
	case GLSLstd450UMax:
	case GLSLstd450SMax:
		emit_binary_func_op(result_type, id, args[0], args[1], "max");
		break;
	case GLSLstd450FClamp:
	case GLSLstd450UClamp:
	case GLSLstd450SClamp:
		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
		break;

	// Trig
	case GLSLstd450Sin:
		emit_unary_func_op(result_type, id, args[0], "sin");
		break;
	case GLSLstd450Cos:
		emit_unary_func_op(result_type, id, args[0], "cos");
		break;
	case GLSLstd450Tan:
		emit_unary_func_op(result_type, id, args[0], "tan");
		break;
	case GLSLstd450Asin:
		emit_unary_func_op(result_type, id, args[0], "asin");
		break;
	case GLSLstd450Acos:
		emit_unary_func_op(result_type, id, args[0], "acos");
		break;
	case GLSLstd450Atan:
		emit_unary_func_op(result_type, id, args[0], "atan");
		break;
	case GLSLstd450Sinh:
		emit_unary_func_op(result_type, id, args[0], "sinh");
		break;
	case GLSLstd450Cosh:
		emit_unary_func_op(result_type, id, args[0], "cosh");
		break;
	case GLSLstd450Tanh:
		emit_unary_func_op(result_type, id, args[0], "tanh");
		break;
	case GLSLstd450Asinh:
		emit_unary_func_op(result_type, id, args[0], "asinh");
		break;
	case GLSLstd450Acosh:
		emit_unary_func_op(result_type, id, args[0], "acosh");
		break;
	case GLSLstd450Atanh:
		emit_unary_func_op(result_type, id, args[0], "atanh");
		break;
	case GLSLstd450Atan2:
		emit_binary_func_op(result_type, id, args[0], args[1], "atan");
		break;

	// Exponentials
	case GLSLstd450Pow:
		emit_binary_func_op(result_type, id, args[0], args[1], "pow");
		break;
	case GLSLstd450Exp:
		emit_unary_func_op(result_type, id, args[0], "exp");
		break;
	case GLSLstd450Log:
		emit_unary_func_op(result_type, id, args[0], "log");
		break;
	case GLSLstd450Exp2:
		emit_unary_func_op(result_type, id, args[0], "exp2");
		break;
	case GLSLstd450Log2:
		emit_unary_func_op(result_type, id, args[0], "log2");
		break;
	case GLSLstd450Sqrt:
		emit_unary_func_op(result_type, id, args[0], "sqrt");
		break;
	case GLSLstd450InverseSqrt:
		emit_unary_func_op(result_type, id, args[0], "inversesqrt");
		break;

	// Matrix math
	case GLSLstd450Determinant:
		emit_unary_func_op(result_type, id, args[0], "determinant");
		break;
	case GLSLstd450MatrixInverse:
		emit_unary_func_op(result_type, id, args[0], "inverse");
		break;

	// Lerping
	case GLSLstd450FMix:
	case GLSLstd450IMix:
	{
		emit_mix_op(result_type, id, args[0], args[1], args[2]);
		break;
	}
	case GLSLstd450Step:
		emit_binary_func_op(result_type, id, args[0], args[1], "step");
		break;
	case GLSLstd450SmoothStep:
		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep");
		break;

	// Packing
	case GLSLstd450Frexp:
		register_call_out_argument(args[1]);
		forced_temporaries.insert(id);
		emit_binary_func_op(result_type, id, args[0], args[1], "frexp");
		break;
	case GLSLstd450Ldexp:
		emit_binary_func_op(result_type, id, args[0], args[1], "ldexp");
		break;
	case GLSLstd450PackSnorm4x8:
		emit_unary_func_op(result_type, id, args[0], "packSnorm4x8");
		break;
	case GLSLstd450PackUnorm4x8:
		emit_unary_func_op(result_type, id, args[0], "packUnorm4x8");
		break;
	case GLSLstd450PackSnorm2x16:
		emit_unary_func_op(result_type, id, args[0], "packSnorm2x16");
		break;
	case GLSLstd450PackUnorm2x16:
		emit_unary_func_op(result_type, id, args[0], "packUnorm2x16");
		break;
	case GLSLstd450PackHalf2x16:
		emit_unary_func_op(result_type, id, args[0], "packHalf2x16");
		break;
	case GLSLstd450UnpackSnorm4x8:
		emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8");
		break;
	case GLSLstd450UnpackUnorm4x8:
		emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8");
		break;
	case GLSLstd450UnpackSnorm2x16:
		emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16");
		break;
	case GLSLstd450UnpackUnorm2x16:
		emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16");
		break;
	case GLSLstd450UnpackHalf2x16:
		emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16");
		break;

	// Vector math
	case GLSLstd450Length:
		emit_unary_func_op(result_type, id, args[0], "length");
		break;
	case GLSLstd450Distance:
		emit_binary_func_op(result_type, id, args[0], args[1], "distance");
		break;
	case GLSLstd450Cross:
		emit_binary_func_op(result_type, id, args[0], args[1], "cross");
		break;
	case GLSLstd450Normalize:
		emit_unary_func_op(result_type, id, args[0], "normalize");
		break;
	case GLSLstd450FaceForward:
		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward");
		break;
	case GLSLstd450Reflect:
		emit_binary_func_op(result_type, id, args[0], args[1], "reflect");
		break;
	case GLSLstd450Refract:
		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract");
		break;

	// Bit-fiddling
	case GLSLstd450FindILsb:
		emit_unary_func_op(result_type, id, args[0], "findLSB");
		break;
	case GLSLstd450FindSMsb:
	case GLSLstd450FindUMsb:
		emit_unary_func_op(result_type, id, args[0], "findMSB");
		break;

	// Multisampled varying
	case GLSLstd450InterpolateAtCentroid:
		emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid");
		break;
	case GLSLstd450InterpolateAtSample:
		emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample");
		break;
	case GLSLstd450InterpolateAtOffset:
		emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset");
		break;

	default:
		statement("// unimplemented GLSL op ", eop);
		break;
	}
}

string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
{
	if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int)
		return type_to_glsl(out_type);
	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
		return "floatBitsToUint";
	else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::UInt)
		return type_to_glsl(out_type);
	else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
		return "floatBitsToInt";
	else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
		return "uintBitsToFloat";
	else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
		return "intBitsToFloat";
	else
		return "";
}

string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
{
	auto op = bitcast_glsl_op(result_type, expression_type(argument));
	if (op.empty())
		return to_expression(argument);
	else
		return join(op, "(", to_expression(argument), ")");
}

string CompilerGLSL::builtin_to_glsl(BuiltIn builtin)
{
	switch (builtin)
	{
	case BuiltInPosition:
		return "gl_Position";
	case BuiltInPointSize:
		return "gl_PointSize";
	case BuiltInVertexId:
		if (options.vulkan_semantics)
			throw CompilerError(
			    "Cannot implement gl_VertexID in Vulkan GLSL. This shader was created with GL semantics.");
		return "gl_VertexID";
	case BuiltInInstanceId:
		if (options.vulkan_semantics)
			throw CompilerError(
			    "Cannot implement gl_InstanceID in Vulkan GLSL. This shader was created with GL semantics.");
		return "gl_InstanceID";
	case BuiltInVertexIndex:
		if (options.vulkan_semantics)
			return "gl_VertexIndex";
		else
			return "gl_VertexID"; // gl_VertexID already has the base offset applied.
	case BuiltInInstanceIndex:
		if (options.vulkan_semantics)
			return "gl_InstanceIndex";
		else
			return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
	case BuiltInPrimitiveId:
		return "gl_PrimitiveID";
	case BuiltInInvocationId:
		return "gl_InvocationID";
	case BuiltInLayer:
		return "gl_Layer";
	case BuiltInTessLevelOuter:
		return "gl_TessLevelOuter";
	case BuiltInTessLevelInner:
		return "gl_TessLevelInner";
	case BuiltInTessCoord:
		return "gl_TessCoord";
	case BuiltInFragCoord:
		return "gl_FragCoord";
	case BuiltInPointCoord:
		return "gl_PointCoord";
	case BuiltInFrontFacing:
		return "gl_FrontFacing";
	case BuiltInFragDepth:
		return "gl_FragDepth";
	case BuiltInNumWorkgroups:
		return "gl_NumWorkGroups";
	case BuiltInWorkgroupSize:
		return "gl_WorkGroupSize";
	case BuiltInWorkgroupId:
		return "gl_WorkGroupID";
	case BuiltInLocalInvocationId:
		return "gl_LocalInvocationID";
	case BuiltInGlobalInvocationId:
		return "gl_GlobalInvocationID";
	case BuiltInLocalInvocationIndex:
		return "gl_LocalInvocationIndex";
	default:
		return "gl_???";
	}
}

const char *CompilerGLSL::index_to_swizzle(uint32_t index)
{
	switch (index)
	{
	case 0:
		return "x";
	case 1:
		return "y";
	case 2:
		return "z";
	case 3:
		return "w";
	default:
		throw CompilerError("Swizzle index out of range");
	}
}

string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, bool index_is_literal,
                                  bool chain_only)
{
	string expr;
	if (!chain_only)
		expr = to_expression(base);

	const auto *type = &expression_type(base);

	// For resolving array accesses, etc, keep a local copy for poking.
	SPIRType temp;

	bool access_chain_is_arrayed = false;

	for (uint32_t i = 0; i < count; i++)
	{
		uint32_t index = indices[i];

		// Arrays
		if (!type->array.empty())
		{
			expr += "[";
			if (index_is_literal)
				expr += convert_to_string(index);
			else
				expr += to_expression(index);
			expr += "]";

			// We have to modify the type, so keep a local copy.
			if (&temp != type)
				temp = *type;
			type = &temp;
			temp.array.pop_back();

			access_chain_is_arrayed = true;
		}
		// For structs, the index refers to a constant, which indexes into the members.
		// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
		else if (type->basetype == SPIRType::Struct)
		{
			if (!index_is_literal)
				index = get<SPIRConstant>(index).scalar();

			if (index >= type->member_types.size())
				throw CompilerError("Member index is out of bounds!");

			BuiltIn builtin;
			if (is_member_builtin(*type, index, &builtin))
			{
				// FIXME: We rely here on OpName on gl_in/gl_out to make this work properly.
				// To make this properly work by omitting all OpName opcodes,
				// we need to infer gl_in or gl_out based on the builtin, and stage.
				if (access_chain_is_arrayed)
				{
					expr += ".";
					expr += builtin_to_glsl(builtin);
				}
				else
					expr = builtin_to_glsl(builtin);
			}
			else
			{
				expr += ".";
				expr += to_member_name(*type, index);
			}
			type = &get<SPIRType>(type->member_types[index]);
		}
		// Matrix -> Vector
		else if (type->columns > 1)
		{
			expr += "[";
			if (index_is_literal)
				expr += convert_to_string(index);
			else
				expr += to_expression(index);
			expr += "]";

			// We have to modify the type, so keep a local copy.
			if (&temp != type)
				temp = *type;
			type = &temp;
			temp.columns = 1;
		}
		// Vector -> Scalar
		else if (type->vecsize > 1)
		{
			if (index_is_literal)
			{
				expr += ".";
				expr += index_to_swizzle(index);
			}
			else if (ids[index].get_type() == TypeConstant)
			{
				auto &c = get<SPIRConstant>(index);
				expr += ".";
				expr += index_to_swizzle(c.scalar());
			}
			else
			{
				expr += "[";
				expr += to_expression(index);
				expr += "]";
			}

			// We have to modify the type, so keep a local copy.
			if (&temp != type)
				temp = *type;
			type = &temp;
			temp.vecsize = 1;
		}
		else
			throw CompilerError("Cannot subdivide a scalar value!");
	}

	return expr;
}

bool CompilerGLSL::should_forward(uint32_t id)
{
	return is_immutable(id) && !options.force_temporary;
}

void CompilerGLSL::track_expression_read(uint32_t id)
{
	// If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
	// In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
	if (expression_is_forwarded(id))
	{
		auto &v = expression_usage_counts[id];
		v++;

		if (v >= 2)
		{
			//if (v == 2)
			//    fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);

			forced_temporaries.insert(id);
			// Force a recompile after this pass to avoid forwarding this variable.
			force_recompile = true;
		}
	}
}

bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
{
	if (forced_temporaries.find(id) != end(forced_temporaries))
		return false;

	for (uint32_t i = 0; i < num_args; i++)
		if (!should_forward(args[i]))
			return false;

	// We need to forward globals as well.
	if (!pure)
	{
		for (auto global : global_variables)
			if (!should_forward(global))
				return false;
		for (auto aliased : aliased_variables)
			if (!should_forward(aliased))
				return false;
	}

	return true;
}

void CompilerGLSL::register_impure_function_call()
{
	// Impure functions can modify globals and aliased variables, so invalidate them as well.
	for (auto global : global_variables)
		flush_dependees(get<SPIRVariable>(global));
	for (auto aliased : aliased_variables)
		flush_dependees(get<SPIRVariable>(aliased));
}

void CompilerGLSL::register_call_out_argument(uint32_t id)
{
	register_write(id);

	auto *var = maybe_get<SPIRVariable>(id);
	if (var)
		flush_variable_declaration(var->self);
}

void CompilerGLSL::flush_variable_declaration(uint32_t id)
{
	auto *var = maybe_get<SPIRVariable>(id);
	if (var && var->deferred_declaration)
	{
		statement(variable_decl(*var), ";");
		var->deferred_declaration = false;
	}
}

bool CompilerGLSL::remove_duplicate_swizzle(string &op)
{
	auto pos = op.find_last_of('.');
	if (pos == string::npos || pos == 0)
		return false;

	string final_swiz = op.substr(pos + 1, string::npos);

	if (backend.swizzle_is_function)
	{
		if (final_swiz.size() < 2)
			return false;

		if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
			final_swiz.erase(final_swiz.size() - 2, string::npos);
		else
			return false;
	}

	// Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
	// If so, and previous swizzle is of same length,
	// we can drop the final swizzle altogether.
	for (uint32_t i = 0; i < final_swiz.size(); i++)
	{
		static const char expected[] = { 'x', 'y', 'z', 'w' };
		if (i >= 4 || final_swiz[i] != expected[i])
			return false;
	}

	auto prevpos = op.find_last_of('.', pos - 1);
	if (prevpos == string::npos)
		return false;

	prevpos++;

	// Make sure there are only swizzles here ...
	for (auto i = prevpos; i < pos; i++)
	{
		if (op[i] < 'w' || op[i] > 'z')
		{
			// If swizzles are foo.xyz() like in C++ backend for example, check for that.
			if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
				break;
			return false;
		}
	}

	// If original swizzle is large enough, just carve out the components we need.
	// E.g. foobar.wyx.xy will turn into foobar.wy.
	if (pos - prevpos >= final_swiz.size())
	{
		op.erase(prevpos + final_swiz.size(), string::npos);

		// Add back the function call ...
		if (backend.swizzle_is_function)
			op += "()";
	}
	return true;
}

// Optimizes away vector swizzles where we have something like
// vec3 foo;
// foo.xyz <-- swizzle expression does nothing.
// This is a very common pattern after OpCompositeCombine.
bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
{
	auto pos = op.find_last_of('.');
	if (pos == string::npos || pos == 0)
		return false;

	string final_swiz = op.substr(pos + 1, string::npos);

	if (backend.swizzle_is_function)
	{
		if (final_swiz.size() < 2)
			return false;

		if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
			final_swiz.erase(final_swiz.size() - 2, string::npos);
		else
			return false;
	}

	// Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
	// If so, and previous swizzle is of same length,
	// we can drop the final swizzle altogether.
	for (uint32_t i = 0; i < final_swiz.size(); i++)
	{
		static const char expected[] = { 'x', 'y', 'z', 'w' };
		if (i >= 4 || final_swiz[i] != expected[i])
			return false;
	}

	auto &type = expression_type(base);

	// Sanity checking ...
	assert(type.columns == 1 && type.array.empty());

	if (type.vecsize == final_swiz.size())
		op.erase(pos, string::npos);
	return true;
}

string CompilerGLSL::build_composite_combiner(const uint32_t *elems, uint32_t length)
{
	uint32_t base = 0;
	bool swizzle_optimization = false;
	string op;

	for (uint32_t i = 0; i < length; i++)
	{
		auto *e = maybe_get<SPIRExpression>(elems[i]);

		// If we're merging another scalar which belongs to the same base
		// object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
		if (e && e->base_expression && e->base_expression == base)
		{
			// Only supposed to be used for vector swizzle -> scalar.
			assert(!e->expression.empty() && e->expression.front() == '.');
			op += e->expression.substr(1, string::npos);
			swizzle_optimization = true;
		}
		else
		{
			// We'll likely end up with duplicated swizzles, e.g.
			// foobar.xyz.xyz from patterns like
			// OpVectorSwizzle
			// OpCompositeExtract x 3
			// OpCompositeConstruct 3x + other scalar.
			// Just modify op in-place.
			if (swizzle_optimization)
			{
				if (backend.swizzle_is_function)
					op += "()";

				// Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
				// The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
				// We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
				// Essentially, we can only remove one set of swizzles, since that's what we have control over ...
				// Case 1:
				//  foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
				//               foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
				// Case 2:
				//  foo.xyz: Duplicate swizzle won't kick in.
				//           If foo is vec3, we can remove xyz, giving just foo.
				if (!remove_duplicate_swizzle(op))
					remove_unity_swizzle(base, op);
				swizzle_optimization = false;
			}

			if (i)
				op += ", ";
			op += to_expression(elems[i]);
		}

		base = e ? e->base_expression : 0;
	}

	if (swizzle_optimization)
	{
		if (backend.swizzle_is_function)
			op += "()";

		if (!remove_duplicate_swizzle(op))
			remove_unity_swizzle(base, op);
	}

	return op;
}

void CompilerGLSL::emit_instruction(const Instruction &instruction)
{
	auto ops = stream(instruction);
	auto opcode = static_cast<Op>(instruction.op);
	uint32_t length = instruction.length;

#define BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
#define BOP_CAST(op, type, skip_cast) emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, skip_cast)
#define UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
#define QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
#define TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
#define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
#define BFOP_CAST(op, type, skip_cast) emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, skip_cast)
#define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
#define UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)

	switch (opcode)
	{
	// Dealing with memory
	case OpLoad:
	{
		uint32_t result_type = ops[0];
		uint32_t id = ops[1];
		uint32_t ptr = ops[2];

		flush_variable_declaration(ptr);

		// If we're loading from memory that cannot be changed by the shader,
		// just forward the expression directly to avoid needless temporaries.
		if (should_forward(ptr))
		{
			set<SPIRExpression>(id, to_expression(ptr), result_type, true);
			register_read(id, ptr, true);
		}
		else
		{
			// If the variable can be modified after this OpLoad, we cannot just forward the expression.
			// We must read it now and store it in a temporary.
			emit_op(result_type, id, to_expression(ptr), false, false);
			register_read(id, ptr, false);
		}
		break;
	}

	case OpInBoundsAccessChain:
	case OpAccessChain:
	{
		auto *var = maybe_get<SPIRVariable>(ops[2]);
		if (var)
			flush_variable_declaration(var->self);

		// If the base is immutable, the access chain pointer must also be.
		auto e = access_chain(ops[2], &ops[3], length - 3, false);
		auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], is_immutable(ops[2]));
		expr.loaded_from = ops[2];
		break;
	}

	case OpStore:
	{
		auto *var = maybe_get<SPIRVariable>(ops[0]);

		if (var && var->statically_assigned)
			var->static_expression = ops[1];
		else
		{
			auto lhs = to_expression(ops[0]);
			auto rhs = to_expression(ops[1]);

			// It is possible with OpLoad/OpCompositeInsert/OpStore that we get <expr> = <same-expr>.
			// For this case, we don't need to invalidate anything and emit any opcode.
			if (lhs != rhs)
			{
				register_write(ops[0]);
				statement(lhs, " = ", rhs, ";");
			}
		}
		break;
	}

	case OpArrayLength:
	{
		uint32_t result_type = ops[0];
		uint32_t id = ops[1];
		auto e = access_chain(ops[2], &ops[3], length - 3, true);
		set<SPIRExpression>(id, e + ".length()", result_type, true);
		break;
	}

	// Function calls
	case OpFunctionCall:
	{
		uint32_t result_type = ops[0];
		uint32_t id = ops[1];
		uint32_t func = ops[2];
		const auto *arg = &ops[3];
		length -= 3;

		auto &callee = get<SPIRFunction>(func);
		bool pure = function_is_pure(callee);

		bool callee_has_out_variables = false;

		// Invalidate out variables passed to functions since they can be OpStore'd to.
		for (uint32_t i = 0; i < length; i++)
		{
			if (callee.arguments[i].write_count)
			{
				register_call_out_argument(arg[i]);
				callee_has_out_variables = true;
			}

			flush_variable_declaration(arg[i]);
		}

		if (!pure)
			register_impure_function_call();

		string funexpr;
		funexpr += to_name(func) + "(";
		for (uint32_t i = 0; i < length; i++)
		{
			funexpr += to_expression(arg[i]);
			if (i + 1 < length)
				funexpr += ", ";
		}
		funexpr += ")";

		if (get<SPIRType>(result_type).basetype != SPIRType::Void)
		{
			// If the function actually writes to an out variable,
			// take the conservative route and do not forward.
			// The problem is that we might not read the function
			// result (and emit the function) before an out variable
			// is read (common case when return value is ignored!
			// In order to avoid start tracking invalid variables,
			// just avoid the forwarding problem altogether.
			bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure &&
			               (forced_temporaries.find(id) == end(forced_temporaries));

			emit_op(result_type, id, funexpr, forward, false);

			// Function calls are implicit loads from all variables in question.
			// Set dependencies for them.
			for (uint32_t i = 0; i < length; i++)
				register_read(id, arg[i], forward);

			// If we're going to forward the temporary result,
			// put dependencies on every variable that must not change.
			if (forward)
				register_global_read_dependencies(callee, id);
		}
		else
			statement(funexpr, ";");

		break;
	}

	// Composite munging
	case OpCompositeConstruct:
	{
		uint32_t result_type = ops[0];
		uint32_t id = ops[1];
		const auto *elems = &ops[2];
		length -= 2;

		if (!length)
			throw CompilerError("Invalid input to OpCompositeConstruct.");

		bool forward = true;
		for (uint32_t i = 0; i < length; i++)
			forward = forward && should_forward(elems[i]);

		auto &in_type = expression_type(elems[0]);
		bool splat = in_type.vecsize == 1 && in_type.columns == 1;

		if (splat)
		{
			uint32_t input = elems[0];
			for (uint32_t i = 0; i < length; i++)
				if (input != elems[i])
					splat = false;
		}

		auto constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
		if (splat)
			constructor_op += to_expression(elems[0]);
		else
			constructor_op += build_composite_combiner(elems, length);
		constructor_op += ")";
		emit_op(result_type, id, constructor_op, forward, false);
		break;
	}

	case OpVectorInsertDynamic:
	{
		uint32_t result_type = ops[0];
		uint32_t id = ops[1];
		uint32_t vec = ops[2];
		uint32_t comp = ops[3];
		uint32_t index = ops[4];

		flush_variable_declaration(vec);

		// Make a copy, then use access chain to store the variable.
		statement(declare_temporary(result_type, id), to_expression(vec), ";");
		set<SPIRExpression>(id, to_name(id), result_type, true);
		auto chain = access_chain(id, &index, 1, false);
		statement(chain, " = ", to_expression(comp), ";");
		break;
	}

	case OpVectorExtractDynamic:
	{
		uint32_t result_type = ops[0];
		uint32_t id = ops[1];

		auto expr = access_chain(ops[2], &ops[3], 1, false);
		emit_op(result_type, id, expr, should_forward(ops[2]), false);
		break;
	}

	case OpCompositeExtract:
	{
		uint32_t result_type = ops[0];
		uint32_t id = ops[1];
		length -= 3;

		auto &type = get<SPIRType>(result_type);

		// Only apply this optimization if result is scalar.
		if (should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
		{
			// We want to split the access chain from the base.
			// This is so we can later combine different CompositeExtract results
			// with CompositeConstruct without emitting code like
			//
			// vec3 temp = texture(...).xyz
			// vec4(temp.x, temp.y, temp.z, 1.0).
			//
			// when we actually wanted to emit this
			// vec4(texture(...).xyz, 1.0).
			//
			// Including the base will prevent this and would trigger multiple reads
			// from expression causing it to be forced to an actual temporary in GLSL.
			auto expr = access_chain(ops[2], &ops[3], length, true, true);
			auto &e = emit_op(result_type, id, expr, true, false, !expression_is_forwarded(ops[2]));
			e.base_expression = ops[2];
		}
		else
		{
			auto expr = access_chain(ops[2], &ops[3], length, true);
			emit_op(result_type, id, expr, should_forward(ops[2]), false, !expression_is_forwarded(ops[2]));
		}
		break;
	}

	case OpCompositeInsert:
	{
		uint32_t result_type = ops[0];
		uint32_t id = ops[1];
		uint32_t obj = ops[2];
		uint32_t composite = ops[3];
		const auto *elems = &ops[4];
		length -= 4;

		flush_variable_declaration(composite);

		auto *expr = maybe_get<SPIRExpression>(id);
		if ((expr && expr->used_while_invalidated) || !should_forward(composite))
		{
			// Make a copy, then use access chain to store the variable.
			statement(declare_temporary(result_type, id), to_expression(composite), ";");
			set<SPIRExpression>(id, to_name(id), result_type, true);
			auto chain = access_chain(id, elems, length, true);
			statement(chain, " = ", to_expression(obj), ";");
		}
		else
		{
			auto chain = access_chain(composite, elems, length, true);
			statement(chain, " = ", to_expression(obj), ";");
			set<SPIRExpression>(id, to_expression(composite), result_type, true);

			register_write(composite);
			register_read(id, composite, true);
			// Invalidate the old expression we inserted into.
			invalid_expressions.insert(composite);
		}
		break;
	}

	case OpCopyObject:
	{
		uint32_t result_type = ops[0];
		uint32_t id = ops[1];
		uint32_t rhs = ops[2];
		if (expression_is_lvalue(rhs))
		{
			// Need a copy.
			statement(declare_temporary(result_type, id), to_expression(rhs), ";");
			set<SPIRExpression>(id, to_name(id), result_type, true);
		}
		else
		{
			// RHS expression is immutable, so just forward it.
			// Copying these things really make no sense, but
			// seems to be allowed anyways.
			set<SPIRExpression>(id, to_expression(rhs), result_type, true);
		}
		break;
	}

	case OpVectorShuffle:
	{
		uint32_t result_type = ops[0];
		uint32_t id = ops[1];
		uint32_t vec0 = ops[2];
		uint32_t vec1 = ops[3];
		const auto *elems = &ops[4];
		length -= 4;

		auto &type0 = expression_type(vec0);

		bool shuffle = false;
		for (uint32_t i = 0; i < length; i++)
			if (elems[i] >= type0.vecsize)
				shuffle = true;

		string expr;
		bool trivial_forward;

		if (shuffle)
		{
			trivial_forward = !expression_is_forwarded(vec0) && !expression_is_forwarded(vec1);

			// Constructor style and shuffling from two different vectors.
			vector<string> args;
			for (uint32_t i = 0; i < length; i++)
			{
				if (elems[i] >= type0.vecsize)
					args.push_back(join(to_expression(vec1), ".", index_to_swizzle(elems[i] - type0.vecsize)));
				else
					args.push_back(join(to_expression(vec0), ".", index_to_swizzle(elems[i])));
			}
			expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")");
		}
		else
		{
			trivial_forward = !expression_is_forwarded(vec0);

			// We only source from first vector, so can use swizzle.
			expr += to_expression(vec0);
			expr += ".";
			for (uint32_t i = 0; i < length; i++)
				expr += index_to_swizzle(elems[i]);
			if (backend.swizzle_is_function && length > 1)
				expr += "()";
		}

		// A shuffle is trivial in that it doesn't actually *do* anything.
		// We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.

		emit_op(result_type, id, expr, should_forward(vec0) && should_forward(vec1), false, trivial_forward);
		break;
	}

	// ALU
	case OpIsNan:
		UFOP(isnan);
		break;

	case OpIsInf:
		UFOP(isinf);
		break;

	case OpSNegate:
	case OpFNegate:
		UOP(-);
		break;

	case OpIAdd:
	{
		// For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
		auto type = get<SPIRType>(ops[0]).basetype;
		BOP_CAST(+, type, true);
		break;
	}

	case OpFAdd:
		BOP(+);
		break;

	case OpISub:
	{
		auto type = get<SPIRType>(ops[0]).basetype;
		BOP_CAST(-, type, true);
		break;
	}

	case OpFSub:
		BOP(-);
		break;

	case OpIMul:
	{
		auto type = get<SPIRType>(ops[0]).basetype;
		BOP_CAST(*, type, true);
		break;
	}

	case OpFMul:
	case OpMatrixTimesVector:
	case OpMatrixTimesScalar:
	case OpVectorTimesScalar:
	case OpVectorTimesMatrix:
	case OpMatrixTimesMatrix:
		BOP(*);
		break;

	case OpOuterProduct:
		UFOP(outerProduct);
		break;

	case OpDot:
		BFOP(dot);
		break;

	case OpTranspose:
		UFOP(transpose);
		break;

	case OpSDiv:
		BOP_CAST(/, SPIRType::Int, false);
		break;

	case OpUDiv:
		BOP_CAST(/, SPIRType::UInt, false);
		break;

	case OpFDiv:
		BOP(/ );
		break;

	case OpShiftRightLogical:
		BOP_CAST(>>, SPIRType::UInt, false);
		break;

	case OpShiftRightArithmetic:
		BOP_CAST(>>, SPIRType::Int, false);
		break;

	case OpShiftLeftLogical:
	{
		auto type = get<SPIRType>(ops[0]).basetype;
		BOP_CAST(<<, type, true);
		break;
	}

	case OpBitwiseOr:
	{
		auto type = get<SPIRType>(ops[0]).basetype;
		BOP_CAST(|, type, true);
		break;
	}

	case OpBitwiseXor:
	{
		auto type = get<SPIRType>(ops[0]).basetype;
		BOP_CAST (^, type, true);
		break;
	}

	case OpBitwiseAnd:
	{
		auto type = get<SPIRType>(ops[0]).basetype;
		BOP_CAST(&, type, true);
		break;
	}

	case OpNot:
		UOP(~);
		break;

	case OpUMod:
		BOP_CAST(%, SPIRType::UInt, false);
		break;

	case OpSMod:
		BOP_CAST(%, SPIRType::Int, false);
		break;

	case OpFMod:
		BFOP(mod);
		break;

	// Relational
	case OpAny:
		UFOP(any);
		break;

	case OpAll:
		UFOP(all);
		break;

	case OpSelect:
		emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]);
		break;

	case OpLogicalOr:
		BOP(|| );
		break;

	case OpLogicalAnd:
		BOP(&&);
		break;

	case OpLogicalNot:
		UOP(!);
		break;

	case OpIEqual:
	{
		if (expression_type(ops[2]).vecsize > 1)
			BFOP_CAST(equal, SPIRType::Int, true);
		else
			BOP_CAST(==, SPIRType::Int, true);
		break;
	}

	case OpLogicalEqual:
	case OpFOrdEqual:
	{
		if (expression_type(ops[2]).vecsize > 1)
			BFOP(equal);
		else
			BOP(== );
		break;
	}

	case OpINotEqual:
	{
		if (expression_type(ops[2]).vecsize > 1)
			BFOP_CAST(notEqual, SPIRType::Int, true);
		else
			BOP_CAST(!=, SPIRType::Int, true);
		break;
	}

	case OpLogicalNotEqual:
	case OpFOrdNotEqual:
	{
		if (expression_type(ops[2]).vecsize > 1)
			BFOP(notEqual);
		else
			BOP(!= );
		break;
	}

	case OpUGreaterThan:
	case OpSGreaterThan:
	{
		auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int;
		if (expression_type(ops[2]).vecsize > 1)
			BFOP_CAST(greaterThan, type, false);
		else
			BOP_CAST(>, type, false);
		break;
	}

	case OpFOrdGreaterThan:
	{
		if (expression_type(ops[2]).vecsize > 1)
			BFOP(greaterThan);
		else
			BOP(> );
		break;
	}

	case OpUGreaterThanEqual:
	case OpSGreaterThanEqual:
	{
		auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int;
		if (expression_type(ops[2]).vecsize > 1)
			BFOP_CAST(greaterThanEqual, type, false);
		else
			BOP_CAST(>=, type, false);
		break;
	}

	case OpFOrdGreaterThanEqual:
	{
		if (expression_type(ops[2]).vecsize > 1)
			BFOP(greaterThanEqual);
		else
			BOP(>= );
		break;
	}

	case OpULessThan:
	case OpSLessThan:
	{
		auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int;
		if (expression_type(ops[2]).vecsize > 1)
			BFOP_CAST(lessThan, type, false);
		else
			BOP_CAST(<, type, false);
		break;
	}

	case OpFOrdLessThan:
	{
		if (expression_type(ops[2]).vecsize > 1)
			BFOP(lessThan);
		else
			BOP(< );
		break;
	}

	case OpULessThanEqual:
	case OpSLessThanEqual:
	{
		auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int;
		if (expression_type(ops[2]).vecsize > 1)
			BFOP_CAST(lessThanEqual, type, false);
		else
			BOP_CAST(<=, type, false);
		break;
	}

	case OpFOrdLessThanEqual:
	{
		if (expression_type(ops[2]).vecsize > 1)
			BFOP(lessThanEqual);
		else
			BOP(<= );
		break;
	}

	// Conversion
	case OpConvertFToU:
	case OpConvertFToS:
	case OpConvertSToF:
	case OpConvertUToF:
	case OpUConvert:
	case OpSConvert:
	case OpFConvert:
	{
		uint32_t result_type = ops[0];
		uint32_t id = ops[1];

		auto func = type_to_glsl_constructor(get<SPIRType>(result_type));
		emit_unary_func_op(result_type, id, ops[2], func.c_str());
		break;
	}

	case OpBitcast:
	{
		uint32_t result_type = ops[0];
		uint32_t id = ops[1];
		uint32_t arg = ops[2];

		auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
		emit_unary_func_op(result_type, id, arg, op.c_str());
		break;
	}

	// Derivatives
	case OpDPdx:
		UFOP(dFdx);
		break;

	case OpDPdy:
		UFOP(dFdy);
		break;

	case OpFwidth:
		UFOP(fwidth);
		break;

	// Bitfield
	case OpBitFieldInsert:
		QFOP(bitfieldInsert);
		break;

	case OpBitFieldSExtract:
	case OpBitFieldUExtract:
		QFOP(bitfieldExtract);
		break;

	case OpBitReverse:
		UFOP(bitfieldReverse);
		break;

	case OpBitCount:
		UFOP(bitCount);
		break;

	// Atomics
	case OpAtomicExchange:
	{
		uint32_t result_type = ops[0];
		uint32_t id = ops[1];
		uint32_t ptr = ops[2];
		// Ignore semantics for now, probably only relevant to CL.
		uint32_t val = ops[5];
		const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
		forced_temporaries.insert(id);
		emit_binary_func_op(result_type, id, ptr, val, op);
		flush_all_atomic_capable_variables();
		break;
	}

	case OpAtomicCompareExchange:
	{
		uint32_t result_type = ops[0];
		uint32_t id = ops[1];
		uint32_t ptr = ops[2];
		uint32_t val = ops[6];
		uint32_t comp = ops[7];
		const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";

		forced_temporaries.insert(id);
		emit_trinary_func_op(result_type, id, ptr, comp, val, op);
		flush_all_atomic_capable_variables();
		break;
	}

	case OpAtomicLoad:
		flush_all_atomic_capable_variables();
		// FIXME: Image?
		UFOP(atomicCounter);
		register_read(ops[1], ops[2], should_forward(ops[2]));
		break;

	// OpAtomicStore unimplemented. Not sure what would use that.
	// OpAtomicLoad seems to only be relevant for atomic counters.

	case OpAtomicIIncrement:
		forced_temporaries.insert(ops[1]);
		// FIXME: Image?
		UFOP(atomicCounterIncrement);
		flush_all_atomic_capable_variables();
		register_read(ops[1], ops[2], should_forward(ops[2]));
		break;

	case OpAtomicIDecrement:
		forced_temporaries.insert(ops[1]);
		// FIXME: Image?
		UFOP(atomicCounterDecrement);
		flush_all_atomic_capable_variables();
		register_read(ops[1], ops[2], should_forward(ops[2]));
		break;

	case OpAtomicIAdd:
	{
		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
		forced_temporaries.insert(ops[1]);
		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
		flush_all_atomic_capable_variables();
		register_read(ops[1], ops[2], should_forward(ops[2]));
		break;
	}

	case OpAtomicISub:
	{
		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
		forced_temporaries.insert(ops[1]);
		auto expr = join(op, "(", to_expression(ops[2]), ", -", to_expression(ops[5]), ")");
		emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]), false);
		flush_all_atomic_capable_variables();
		register_read(ops[1], ops[2], should_forward(ops[2]));
		break;
	}

	case OpAtomicSMin:
	case OpAtomicUMin:
	{
		const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin";
		forced_temporaries.insert(ops[1]);
		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
		flush_all_atomic_capable_variables();
		register_read(ops[1], ops[2], should_forward(ops[2]));
		break;
	}

	case OpAtomicSMax:
	case OpAtomicUMax:
	{
		const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax";
		forced_temporaries.insert(ops[1]);
		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
		flush_all_atomic_capable_variables();
		register_read(ops[1], ops[2], should_forward(ops[2]));
		break;
	}

	case OpAtomicAnd:
	{
		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd";
		forced_temporaries.insert(ops[1]);
		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
		flush_all_atomic_capable_variables();
		register_read(ops[1], ops[2], should_forward(ops[2]));
		break;
	}

	case OpAtomicOr:
	{
		const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr";
		forced_temporaries.insert(ops[1]);
		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
		flush_all_atomic_capable_variables();
		register_read(ops[1], ops[2], should_forward(ops[2]));
		break;
	}

	case OpAtomicXor:
	{
		const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor";
		forced_temporaries.insert(ops[1]);
		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
		flush_all_atomic_capable_variables();
		register_read(ops[1], ops[2], should_forward(ops[2]));
		break;
	}

	// Geometry shaders
	case OpEmitVertex:
		statement("EmitVertex();");
		break;

	case OpEndPrimitive:
		statement("EndPrimitive();");
		break;

	case OpEmitStreamVertex:
		statement("EmitStreamVertex();");
		break;

	case OpEndStreamPrimitive:
		statement("EndStreamPrimitive();");
		break;

	// Textures
	case OpImageSampleImplicitLod:
	case OpImageSampleExplicitLod:
	case OpImageSampleProjImplicitLod:
	case OpImageSampleProjExplicitLod:
	case OpImageSampleDrefImplicitLod:
	case OpImageSampleDrefExplicitLod:
	case OpImageSampleProjDrefImplicitLod:
	case OpImageSampleProjDrefExplicitLod:
	case OpImageFetch:
	case OpImageGather:
	case OpImageDrefGather:
		// Gets a bit hairy, so move this to a separate instruction.
		emit_texture_op(instruction);
		break;

	case OpImage:
	{
		uint32_t result_type = ops[0];
		uint32_t id = ops[1];
		emit_op(result_type, id, to_expression(ops[2]), true, false);
		break;
	}

	case OpSampledImage:
	{
		uint32_t result_type = ops[0];
		uint32_t id = ops[1];
		emit_sampled_image_op(result_type, id, ops[2], ops[3]);
		break;
	}

	case OpImageQuerySizeLod:
		BFOP(textureSize);
		break;

	// Image load/store
	case OpImageRead:
	{
		// We added Nonreadable speculatively to the OpImage variable due to glslangValidator
		// not adding the proper qualifiers.
		// If it turns out we need to read the image after all, remove the qualifier and recompile.
		auto *var = maybe_get_backing_variable(ops[2]);
		if (var)
		{
			auto &flags = meta.at(var->self).decoration.decoration_flags;
			if (flags & (1ull << DecorationNonReadable))
			{
				flags &= ~(1ull << DecorationNonReadable);
				force_recompile = true;
			}
		}

		uint32_t result_type = ops[0];
		uint32_t id = ops[1];

		bool pure;
		string imgexpr;
		auto &type = expression_type(ops[2]);

		if (var && var->remapped_variable) // PLS input, just read as-is without any op-code
		{
			// PLS input could have different number of components than what the SPIR expects, swizzle to
			// the appropriate vector size.
			auto itr = find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls)
			                   {
				                   return pls.id == var->self;
				               });

			if (itr == end(pls_inputs))
				throw CompilerError("Found PLS remap for OpImageRead, but ID is not a PLS input ...");

			uint32_t components = pls_format_to_components(itr->format);
			imgexpr = remap_swizzle(result_type, components, ops[2]);
			pure = true;
		}
		else if (type.image.dim == DimSubpassData)
		{
			if (options.vulkan_semantics)
			{
				// With Vulkan semantics, use the proper Vulkan GLSL construct.
				imgexpr = join("subpassLoad(", to_expression(ops[2]), ")");
			}
			else
			{
				// Implement subpass loads via texture barrier style sampling.
				imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)");
			}
			pure = true;
		}
		else
		{
			// Plain image load/store.
			imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", to_expression(ops[3]), ")");
			pure = false;
		}

		if (var && var->forwardable)
		{
			auto &e = emit_op(result_type, id, imgexpr, true, false);

			// We only need to track dependencies if we're reading from image load/store.
			if (!pure)
			{
				e.loaded_from = var->self;
				var->dependees.push_back(id);
			}
		}
		else
			emit_op(result_type, id, imgexpr, false, false);
		break;
	}

	case OpImageTexelPointer:
	{
		uint32_t result_type = ops[0];
		uint32_t id = ops[1];
		auto &e = set<SPIRExpression>(id, join(to_expression(ops[2]), ", ", to_expression(ops[3])), result_type, true);

		auto *var = maybe_get_backing_variable(ops[2]);
		e.loaded_from = var ? var->self : 0;
		break;
	}

	case OpImageWrite:
	{
		// We added Nonwritable speculatively to the OpImage variable due to glslangValidator
		// not adding the proper qualifiers.
		// If it turns out we need to write to the image after all, remove the qualifier and recompile.
		auto *var = maybe_get_backing_variable(ops[0]);
		if (var)
		{
			auto &flags = meta.at(var->self).decoration.decoration_flags;
			if (flags & (1ull << DecorationNonWritable))
			{
				flags &= ~(1ull << DecorationNonWritable);
				force_recompile = true;
			}
		}

		statement("imageStore(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ");");

		if (var && variable_storage_is_aliased(*var))
			flush_all_aliased_variables();
		break;
	}

	case OpImageQuerySize:
	{
		auto &type = expression_type(ops[2]);
		uint32_t result_type = ops[0];
		uint32_t id = ops[1];

		if (type.basetype == SPIRType::Image)
		{
			// The size of an image is always constant.
			emit_op(result_type, id, join("imageSize(", to_expression(ops[2]), ")"), true, false);
		}
		else
			throw CompilerError("Invalid type for OpImageQuerySize.");
		break;
	}

	// Compute
	case OpControlBarrier:
	{
		// Ignore execution and memory scope.
		if (execution.model == ExecutionModelGLCompute)
		{
			uint32_t mem = get<SPIRConstant>(ops[2]).scalar();
			if (mem == MemorySemanticsWorkgroupMemoryMask)
				statement("memoryBarrierShared();");
			else if (mem &&
			         mem != 4062) // Hacky workaround of glslangValidator which emits "all" barrier for barrier() ...
				statement("memoryBarrier();");
		}
		statement("barrier();");
		break;
	}

	case OpMemoryBarrier:
	{
		uint32_t mem = get<SPIRConstant>(ops[1]).scalar();

		// We cannot forward any loads beyond the memory barrier.
		if (mem)
			flush_all_active_variables();

		if (mem == MemorySemanticsWorkgroupMemoryMask)
			statement("memoryBarrierShared();");
		else if (mem && mem != 4062) // Hacky workaround of glslangValidator which emits "all" barrier for barrier() ...
			statement("memoryBarrier();");
		break;
	}

	case OpExtInst:
	{
		uint32_t extension_set = ops[2];
		if (get<SPIRExtension>(extension_set).ext != SPIRExtension::GLSL)
		{
			statement("// unimplemented ext op ", instruction.op);
			break;
		}

		emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
		break;
	}

	default:
		statement("// unimplemented op ", instruction.op);
		break;
	}
}

string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
{
	auto &memb = meta[type.self].members;
	if (index < memb.size() && !memb[index].alias.empty())
		return memb[index].alias;
	else
		return join("_", index);
}

string CompilerGLSL::member_decl(const SPIRType &type, const SPIRType &membertype, uint32_t index)
{
	uint64_t memberflags = 0;
	auto &memb = meta[type.self].members;
	if (index < memb.size())
		memberflags = memb[index].decoration_flags;

	return join(layout_for_member(type, index), flags_to_precision_qualifiers_glsl(membertype, memberflags),
	            type_to_glsl(membertype), " ", to_member_name(type, index), type_to_array_glsl(membertype));
}

const char *CompilerGLSL::flags_to_precision_qualifiers_glsl(const SPIRType &type, uint64_t flags)
{
	if (options.es)
	{
		// Structs do not have precision qualifiers.
		if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt &&
		    type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage &&
		    type.basetype != SPIRType::Sampler)
			return "";

		if (flags & (1ull << DecorationRelaxedPrecision))
		{
			bool implied_fmediump = type.basetype == SPIRType::Float &&
			                        options.fragment.default_float_precision == Options::Mediump &&
			                        execution.model == ExecutionModelFragment;

			bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
			                        options.fragment.default_int_precision == Options::Mediump &&
			                        execution.model == ExecutionModelFragment;

			return implied_fmediump || implied_imediump ? "" : "mediump ";
		}
		else
		{
			bool implied_fhighp =
			    type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
			                                          execution.model == ExecutionModelFragment) ||
			                                         (execution.model != ExecutionModelFragment));

			bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
			                      ((options.fragment.default_int_precision == Options::Highp &&
			                        execution.model == ExecutionModelFragment) ||
			                       (execution.model != ExecutionModelFragment));

			return implied_fhighp || implied_ihighp ? "" : "highp ";
		}
	}
	else
		return "";
}

const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
{
	return flags_to_precision_qualifiers_glsl(expression_type(id), meta[id].decoration.decoration_flags);
}

string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
{
	auto flags = meta[id].decoration.decoration_flags;
	string res;

	auto *var = maybe_get<SPIRVariable>(id);

	if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
		res += "shared ";

	res += to_precision_qualifiers_glsl(id);

	//if (flags & (1ull << DecorationSmooth))
	//    res += "smooth ";
	if (flags & (1ull << DecorationFlat))
		res += "flat ";
	if (flags & (1ull << DecorationNoPerspective))
		res += "noperspective ";
	if (flags & (1ull << DecorationPatch))
		res += "patch ";
	if (flags & (1ull << DecorationSample))
		res += "sample ";
	if (flags & (1ull << DecorationInvariant))
		res += "invariant ";

	auto &type = expression_type(id);
	if (type.image.dim != DimSubpassData && type.image.sampled == 2)
	{
		if (flags & (1ull << DecorationNonWritable))
			res += "readonly ";
		if (flags & (1ull << DecorationNonReadable))
			res += "writeonly ";
	}

	return res;
}

string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
{
	// glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
	// Not sure if argument being pointer type should make the argument inout.
	auto &type = expression_type(arg.id);
	const char *direction = "";

	if (type.pointer)
	{
		if (arg.write_count && arg.read_count)
			direction = "inout ";
		else if (arg.write_count)
			direction = "out ";
	}

	return join(direction, to_qualifiers_glsl(arg.id), type_to_glsl(type), " ", to_name(arg.id),
	            type_to_array_glsl(type));
}

string CompilerGLSL::variable_decl(const SPIRVariable &variable)
{
	// Ignore the pointer type since GLSL doesn't have pointers.
	auto &type = get<SPIRType>(variable.basetype);
	auto res = join(to_qualifiers_glsl(variable.self), type_to_glsl(type), " ", to_name(variable.self),
	                type_to_array_glsl(type));
	if (variable.initializer)
		res += join(" = ", to_expression(variable.initializer));
	return res;
}

const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
{
	auto flags = meta[variable.self].decoration.decoration_flags;
	if (flags & (1ull << DecorationRelaxedPrecision))
		return "mediump ";
	else
		return "highp ";
}

string CompilerGLSL::pls_decl(const PlsRemap &var)
{
	auto &variable = get<SPIRVariable>(var.id);

	SPIRType type;
	type.vecsize = pls_format_to_components(var.format);
	type.basetype = pls_format_to_basetype(var.format);

	return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ",
	            to_name(variable.self));
}

string CompilerGLSL::type_to_array_glsl(const SPIRType &type)
{
	string res;
	for (auto &size : type.array)
	{
		res += "[";
		if (size)
		{
			res += convert_to_string(size);
		}
		else if (!backend.flexible_member_array_supported)
		{
			// For runtime-sized arrays, we can work around
			// lack of standard support for this by simply having
			// a single element array.
			//
			// Runtime length arrays must always be the last element
			// in an interface block.
			res += '1';
		}
		res += "]";
	}
	return res;
}

string CompilerGLSL::image_type_glsl(const SPIRType &type)
{
	auto &imagetype = get<SPIRType>(type.image.type);
	string res;

	switch (imagetype.basetype)
	{
	case SPIRType::Int:
		res = "i";
		break;
	case SPIRType::UInt:
		res = "u";
		break;
	default:
		break;
	}

	if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
		return res + "subpassInput";

	// If we're emulating subpassInput with samplers, force sampler2D
	// so we don't have to specify format.
	if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
		res += type.image.sampled == 2 ? "image" : "texture";
	else
		res += "sampler";

	switch (type.image.dim)
	{
	case Dim1D:
		res += "1D";
		break;
	case Dim2D:
		res += "2D";
		break;
	case Dim3D:
		res += "3D";
		break;
	case DimCube:
		res += "Cube";
		break;

	case DimBuffer:
		if (options.es && options.version < 320)
			require_extension("GL_OES_texture_buffer");
		else if (!options.es && options.version < 300)
			require_extension("GL_EXT_texture_buffer_object");
		res += "Buffer";
		break;

	case DimSubpassData:
		res += "2D";
		break;
	default:
		throw CompilerError("Only 1D, 2D, 3D, Buffer, InputTarget and Cube textures supported.");
	}

	if (type.image.arrayed)
		res += "Array";
	if (type.image.depth)
		res += "Shadow";
	if (type.image.ms)
		res += "MS";

	return res;
}

string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
{
	auto e = type_to_glsl(type);
	for (uint32_t i = 0; i < type.array.size(); i++)
		e += "[]";
	return e;
}

string CompilerGLSL::type_to_glsl(const SPIRType &type)
{
	// Ignore the pointer type since GLSL doesn't have pointers.

	switch (type.basetype)
	{
	case SPIRType::Struct:
		// Need OpName lookup here to get a "sensible" name for a struct.
		return to_name(type.self);

	case SPIRType::Image:
	case SPIRType::SampledImage:
		return image_type_glsl(type);

	case SPIRType::Sampler:
		// Not really used.
		return "sampler";

	case SPIRType::Void:
		return "void";

	default:
		break;
	}

	if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
	{
		switch (type.basetype)
		{
		case SPIRType::Bool:
			return "bool";
		case SPIRType::Int:
			return backend.basic_int_type;
		case SPIRType::UInt:
			return backend.basic_uint_type;
		case SPIRType::AtomicCounter:
			return "atomic_uint";
		case SPIRType::Float:
			return "float";
		default:
			return "???";
		}
	}
	else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
	{
		switch (type.basetype)
		{
		case SPIRType::Bool:
			return join("bvec", type.vecsize);
		case SPIRType::Int:
			return join("ivec", type.vecsize);
		case SPIRType::UInt:
			return join("uvec", type.vecsize);
		case SPIRType::Float:
			return join("vec", type.vecsize);
		default:
			return "???";
		}
	}
	else if (type.vecsize == type.columns) // Simple Matrix builtin
	{
		switch (type.basetype)
		{
		case SPIRType::Bool:
			return join("bmat", type.vecsize);
		case SPIRType::Int:
			return join("imat", type.vecsize);
		case SPIRType::UInt:
			return join("umat", type.vecsize);
		case SPIRType::Float:
			return join("mat", type.vecsize);
		default:
			return "???";
		}
	}
	else
	{
		switch (type.basetype)
		{
		case SPIRType::Bool:
			return join("bmat", type.columns, "x", type.vecsize);
		case SPIRType::Int:
			return join("imat", type.columns, "x", type.vecsize);
		case SPIRType::UInt:
			return join("umat", type.columns, "x", type.vecsize);
		case SPIRType::Float:
			return join("mat", type.columns, "x", type.vecsize);
		default:
			return "???";
		}
	}
}

void CompilerGLSL::add_local_variable(uint32_t id)
{
	auto &name = meta[id].decoration.alias;
	if (name.empty())
		return;

	// Reserved for temporaries.
	if (name[0] == '_')
	{
		name.clear();
		return;
	}

	update_name_cache(local_variables, name);
}

void CompilerGLSL::require_extension(const string &ext)
{
	if (forced_extensions.find(ext) == end(forced_extensions))
	{
		forced_extensions.insert(ext);
		force_recompile = true;
	}
}

bool CompilerGLSL::check_atomic_image(uint32_t id)
{
	auto &type = expression_type(id);
	if (type.storage == StorageClassImage)
	{
		if (options.es && options.version < 320)
			require_extension("GL_OES_shader_image_atomic");

		auto *var = maybe_get_backing_variable(id);
		if (var)
		{
			auto &flags = meta.at(var->self).decoration.decoration_flags;
			if (flags & ((1ull << DecorationNonWritable) | (1ull << DecorationNonReadable)))
			{
				flags &= ~(1ull << DecorationNonWritable);
				flags &= ~(1ull << DecorationNonReadable);
				force_recompile = true;
			}
		}
		return true;
	}
	else
		return false;
}

void CompilerGLSL::emit_function_prototype(SPIRFunction &func, uint64_t return_flags)
{
	local_variables.clear();
	string decl;

	auto &type = get<SPIRType>(func.return_type);
	decl += flags_to_precision_qualifiers_glsl(type, return_flags);
	decl += type_to_glsl(type);
	decl += " ";

	if (func.self == execution.entry_point)
	{
		decl += "main";
		processing_entry_point = true;
	}
	else
		decl += to_name(func.self);

	decl += "(";
	for (auto &arg : func.arguments)
	{
		// Might change the variable name if it already exists in this function.
		// SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
		// to use same name for variables.
		// Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
		add_local_variable(arg.id);

		decl += argument_decl(arg);
		if (&arg != &func.arguments.back())
			decl += ", ";

		// Hold a pointer to the parameter so we can invalidate the readonly field if needed.
		auto *var = maybe_get<SPIRVariable>(arg.id);
		if (var)
			var->parameter = &arg;
	}

	decl += ")";
	statement(decl);
}

void CompilerGLSL::emit_function(SPIRFunction &func, uint64_t return_flags)
{
	// Avoid potential cycles.
	if (func.active)
		return;
	func.active = true;

	// If we depend on a function, emit that function before we emit our own function.
	for (auto block : func.blocks)
	{
		auto &b = get<SPIRBlock>(block);
		for (auto &i : b.ops)
		{
			auto ops = stream(i);
			auto op = static_cast<Op>(i.op);

			if (op == OpFunctionCall)
			{
				// Recursively emit functions which are called.
				uint32_t id = ops[2];
				emit_function(get<SPIRFunction>(id), meta[ops[1]].decoration.decoration_flags);
			}
		}
	}

	emit_function_prototype(func, return_flags);
	begin_scope();

	current_function = &func;

	for (auto &v : func.local_variables)
	{
		auto &var = get<SPIRVariable>(v);
		if (expression_is_lvalue(v))
		{
			add_local_variable(var.self);

			if (var.initializer)
				statement(variable_decl(var), ";");
			else
			{
				// Don't declare variable until first use to declutter the GLSL output quite a lot.
				// If we don't touch the variable before first branch,
				// declare it then since we need variable declaration to be in top scope.
				var.deferred_declaration = true;
			}
		}
		else
		{
			// HACK: SPIRV likes to use samplers and images as local variables, but GLSL does not allow
			// this. For these types (non-lvalue), we enforce forwarding through a shadowed variable.
			// This means that when we OpStore to these variables, we just write in the expression ID directly.
			// This breaks any kind of branching, since the variable must be statically assigned.
			// Branching on samplers and images would be pretty much impossible to fake in GLSL.
			var.statically_assigned = true;
		}
	}

	auto &entry_block = get<SPIRBlock>(func.entry_block);
	entry_block.loop_dominator = SPIRBlock::NoDominator;
	emit_block_chain(entry_block);

	end_scope();
	processing_entry_point = false;
	statement("");
}

void CompilerGLSL::emit_fixup()
{
	if (execution.model == ExecutionModelVertex && options.vertex.fixup_clipspace)
	{
		const char *suffix = backend.float_literal_suffix ? "f" : "";
		statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;");
	}
}

bool CompilerGLSL::flush_phi_required(uint32_t from, uint32_t to)
{
	auto &child = get<SPIRBlock>(to);
	for (auto &phi : child.phi_variables)
		if (phi.parent == from)
			return true;
	return false;
}

void CompilerGLSL::flush_phi(uint32_t from, uint32_t to)
{
	auto &child = get<SPIRBlock>(to);

	for (auto &phi : child.phi_variables)
		if (phi.parent == from)
			statement(to_expression(phi.function_variable), " = ", to_expression(phi.local_variable), ";");
}

void CompilerGLSL::branch(uint32_t from, uint32_t to)
{
	flush_phi(from, to);
	flush_all_active_variables();

	// This is only a continue if we branch to our loop dominator.
	if (loop_blocks.find(to) != end(loop_blocks) && get<SPIRBlock>(from).loop_dominator == to)
	{
		// This can happen if we had a complex continue block which was emitted.
		// Once the continue block tries to branch to the loop header, just emit continue;
		// and end the chain here.
		statement("continue;");
	}
	else if (is_continue(to))
	{
		auto &to_block = get<SPIRBlock>(to);
		if (to_block.complex_continue)
		{
			// Just emit the whole block chain as is.
			auto usage_counts = expression_usage_counts;
			auto invalid = invalid_expressions;

			emit_block_chain(to_block);

			// Expression usage counts and invalid expressions
			// are moot after returning from the continue block.
			// Since we emit the same block multiple times,
			// we don't want to invalidate ourselves.
			expression_usage_counts = usage_counts;
			invalid_expressions = invalid;
		}
		else
		{
			auto &from_block = get<SPIRBlock>(from);
			auto &dominator = get<SPIRBlock>(from_block.loop_dominator);

			// For non-complex continue blocks, we implicitly branch to the continue block
			// by having the continue block be part of the loop header in for (; ; continue-block).
			bool outside_control_flow = block_is_outside_flow_control_from_block(dominator, from_block);

			// Some simplification for for-loops. We always end up with a useless continue;
			// statement since we branch to a loop block.
			// Walk the CFG, if we uncoditionally execute the block calling continue assuming we're in the loop block,
			// we can avoid writing out an explicit continue statement.
			// Similar optimization to return statements if we know we're outside flow control.
			if (!outside_control_flow)
				statement("continue;");
		}
	}
	else if (is_break(to))
		statement("break;");
	else if (!is_conditional(to))
		emit_block_chain(get<SPIRBlock>(to));
}

void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block)
{
	// If we branch directly to a selection merge target, we don't really need a code path.
	bool true_sub = !is_conditional(true_block);
	bool false_sub = !is_conditional(false_block);

	if (true_sub)
	{
		statement("if (", to_expression(cond), ")");
		begin_scope();
		branch(from, true_block);
		end_scope();

		if (false_sub)
		{
			statement("else");
			begin_scope();
			branch(from, false_block);
			end_scope();
		}
		else if (flush_phi_required(from, false_block))
		{
			statement("else");
			begin_scope();
			flush_phi(from, false_block);
			end_scope();
		}
	}
	else if (false_sub && !true_sub)
	{
		// Only need false path, use negative conditional.
		statement("if (!", to_expression(cond), ")");
		begin_scope();
		branch(from, false_block);
		end_scope();

		if (flush_phi_required(from, true_block))
		{
			statement("else");
			begin_scope();
			flush_phi(from, true_block);
			end_scope();
		}
	}
}

void CompilerGLSL::propagate_loop_dominators(const SPIRBlock &block)
{
	// Propagate down the loop dominator block, so that dominated blocks can back trace.
	if (block.merge == SPIRBlock::MergeLoop || block.loop_dominator)
	{
		uint32_t dominator = block.merge == SPIRBlock::MergeLoop ? block.self : block.loop_dominator;

		auto set_dominator = [this](uint32_t self, uint32_t new_dominator)
		{
			auto &dominated_block = this->get<SPIRBlock>(self);

			// If we already have a loop dominator, we're trying to break out to merge targets
			// which should not update the loop dominator.
			if (!dominated_block.loop_dominator)
				dominated_block.loop_dominator = new_dominator;
		};

		// After merging a loop, we inherit the loop dominator always.
		if (block.merge_block)
			set_dominator(block.merge_block, block.loop_dominator);

		if (block.true_block)
			set_dominator(block.true_block, dominator);
		if (block.false_block)
			set_dominator(block.false_block, dominator);
		if (block.next_block)
			set_dominator(block.next_block, dominator);

		for (auto &c : block.cases)
			set_dominator(c.block, dominator);

		// In older glslang output continue_block can be == loop header.
		if (block.continue_block && block.continue_block != block.self)
			set_dominator(block.continue_block, dominator);
	}
}

// FIXME: This currently cannot handle complex continue blocks
// as in do-while.
// This should be seen as a "trivial" continue block.
string CompilerGLSL::emit_continue_block(uint32_t continue_block)
{
	auto *block = &get<SPIRBlock>(continue_block);

	// While emitting the continue block, declare_temporary will check this
	// if we have to emit temporaries.
	current_continue_block = block;

	vector<string> statements;

	// Capture all statements into our list.
	auto *old = redirect_statement;
	redirect_statement = &statements;

	// Stamp out all blocks one after each other.
	while (loop_blocks.find(block->self) == end(loop_blocks))
	{
		propagate_loop_dominators(*block);
		// Write out all instructions we have in this block.
		for (auto &op : block->ops)
			emit_instruction(op);

		// For plain branchless for/while continue blocks.
		if (block->next_block)
		{
			flush_phi(continue_block, block->next_block);
			block = &get<SPIRBlock>(block->next_block);
		}
		// For do while blocks. The last block will be a select block.
		else if (block->true_block)
		{
			flush_phi(continue_block, block->true_block);
			block = &get<SPIRBlock>(block->true_block);
		}
	}

	// Restore old pointer.
	redirect_statement = old;

	// Somewhat ugly, strip off the last ';' since we use ',' instead.
	// Ideally, we should select this behavior in statement().
	for (auto &s : statements)
	{
		if (!s.empty() && s.back() == ';')
			s.pop_back();
	}

	current_continue_block = nullptr;
	return merge(statements);
}

bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
{
	SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));

	if (method == SPIRBlock::MergeToSelectForLoop)
	{
		uint32_t current_count = statement_count;
		// If we're trying to create a true for loop,
		// we need to make sure that all opcodes before branch statement do not actually emit any code.
		// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
		for (auto &op : block.ops)
			emit_instruction(op);

		bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);

		// This can work! We only did trivial things which could be forwarded in block body!
		if (current_count == statement_count && condition_is_temporary)
		{
			switch (continue_type)
			{
			case SPIRBlock::ForLoop:
				statement("for (; ", to_expression(block.condition), "; ", emit_continue_block(block.continue_block),
				          ")");
				break;

			case SPIRBlock::WhileLoop:
				statement("while (", to_expression(block.condition), ")");
				break;

			default:
				throw CompilerError("For/while loop detected, but need while/for loop semantics.");
			}

			begin_scope();
			return true;
		}
		else
		{
			block.disable_block_optimization = true;
			force_recompile = true;
			begin_scope(); // We'll see an end_scope() later.
			return false;
		}
	}
	else if (method == SPIRBlock::MergeToDirectForLoop)
	{
		uint32_t current_count = statement_count;
		auto &child = get<SPIRBlock>(block.next_block);

		// If we're trying to create a true for loop,
		// we need to make sure that all opcodes before branch statement do not actually emit any code.
		// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
		for (auto &op : child.ops)
			emit_instruction(op);

		bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);

		if (current_count == statement_count && condition_is_temporary)
		{
			propagate_loop_dominators(child);

			switch (continue_type)
			{
			case SPIRBlock::ForLoop:
				statement("for (; ", to_expression(child.condition), "; ", emit_continue_block(block.continue_block),
				          ")");
				break;

			case SPIRBlock::WhileLoop:
				statement("while (", to_expression(child.condition), ")");
				break;

			default:
				throw CompilerError("For/while loop detected, but need while/for loop semantics.");
			}

			begin_scope();
			branch(child.self, child.true_block);
			return true;
		}
		else
		{
			block.disable_block_optimization = true;
			force_recompile = true;
			begin_scope(); // We'll see an end_scope() later.
			return false;
		}
	}
	else
		return false;
}

void CompilerGLSL::flush_undeclared_variables()
{
	// Declare undeclared variables.
	if (current_function->flush_undeclared)
	{
		for (auto &v : current_function->local_variables)
		{
			auto &var = get<SPIRVariable>(v);
			if (var.deferred_declaration)
				statement(variable_decl(var), ";");
			var.deferred_declaration = false;
		}
		current_function->flush_undeclared = false;
	}
}

void CompilerGLSL::emit_block_chain(SPIRBlock &block)
{
	propagate_loop_dominators(block);

	bool select_branch_to_true_block = false;
	bool skip_direct_branch = false;

	// If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
	for (auto &tmp : block.declare_temporary)
	{
		auto flags = meta[tmp.second].decoration.decoration_flags;
		auto &type = get<SPIRType>(tmp.first);
		statement(flags_to_precision_qualifiers_glsl(type, flags), type_to_glsl(type), " ", to_name(tmp.second),
		          type_to_array_glsl(type), ";");
	}

	SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
	if (block.continue_block)
		continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));

	// This is the older loop behavior in glslang which branches to loop body directly from the loop header.
	if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
	{
		flush_undeclared_variables();
		if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
		{
			// The body of while, is actually just the true block, so always branch there
			// unconditionally.
			select_branch_to_true_block = true;
		}
	}
	// This is the newer loop behavior in glslang which branches from Loop header directly to
	// a new block, which in turn has a OpBranchSelection without a selection merge.
	else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop))
	{
		flush_undeclared_variables();
		if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop))
			skip_direct_branch = true;
	}
	else if (continue_type == SPIRBlock::DoWhileLoop)
	{
		statement("do");
		begin_scope();
		for (auto &op : block.ops)
			emit_instruction(op);
	}
	else if (block.merge == SPIRBlock::MergeLoop)
	{
		flush_undeclared_variables();

		// We have a generic loop without any distinguishable pattern like for, while or do while.
		get<SPIRBlock>(block.continue_block).complex_continue = true;
		continue_type = SPIRBlock::ComplexLoop;

		statement("for (;;)");
		begin_scope();
		for (auto &op : block.ops)
			emit_instruction(op);
	}
	else
	{
		for (auto &op : block.ops)
			emit_instruction(op);
	}

	bool emit_next_block = true;

	// Handle end of block.
	switch (block.terminator)
	{
	case SPIRBlock::Direct:
		// True when emitting complex continue block.
		if (block.loop_dominator == block.next_block)
		{
			branch(block.self, block.next_block);
			emit_next_block = false;
		}
		// True if MergeToDirectForLoop succeeded.
		else if (skip_direct_branch)
			emit_next_block = false;
		else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block))
		{
			branch(block.self, block.next_block);
			emit_next_block = false;
		}
		break;

	case SPIRBlock::Select:
		// True if MergeToSelectForLoop succeeded.
		if (select_branch_to_true_block)
			branch(block.self, block.true_block);
		else
		{
			flush_undeclared_variables();
			branch(block.self, block.condition, block.true_block, block.false_block);
		}
		break;

	case SPIRBlock::MultiSelect:
	{
		flush_undeclared_variables();
		auto &type = expression_type(block.condition);
		bool uint32_t_case = type.basetype == SPIRType::UInt;

		statement("switch (", to_expression(block.condition), ")");
		begin_scope();

		for (auto &c : block.cases)
		{
			auto case_value =
			    uint32_t_case ? convert_to_string(uint32_t(c.value)) : convert_to_string(int32_t(c.value));
			statement("case ", case_value, ":");
			begin_scope();
			branch(block.self, c.block);
			end_scope();
		}

		if (block.default_block != block.next_block)
		{
			statement("default:");
			begin_scope();
			if (is_break(block.default_block))
				throw CompilerError("Cannot break; out of a switch statement and out of a loop at the same time ...");
			branch(block.self, block.default_block);
			end_scope();
		}
		else if (flush_phi_required(block.self, block.next_block))
		{
			statement("default:");
			begin_scope();
			flush_phi(block.self, block.next_block);
			statement("break;");
			end_scope();
		}

		end_scope();
		break;
	}

	case SPIRBlock::Return:
		if (processing_entry_point)
			emit_fixup();

		if (block.return_value)
		{
			// OpReturnValue can return Undef, so don't emit anything for this case.
			if (ids.at(block.return_value).get_type() != TypeUndef)
				statement("return ", to_expression(block.return_value), ";");
		}
		// If this block is the very final block and not called from control flow,
		// we do not need an explicit return which looks out of place. Just end the function here.
		// In the very weird case of for(;;) { return; } executing return is unconditional,
		// but we actually need a return here ...
		else if (!block_is_outside_flow_control_from_block(get<SPIRBlock>(current_function->entry_block), block) ||
		         block.loop_dominator != SPIRBlock::NoDominator)
			statement("return;");
		break;

	case SPIRBlock::Kill:
		statement("discard;");
		break;

	default:
		throw CompilerError("Unimplemented block terminator.");
	}

	if (block.next_block && emit_next_block)
	{
		// If we hit this case, we're dealing with an unconditional branch, which means we will output
		// that block after this. If we had selection merge, we already flushed phi variables.
		if (block.merge != SPIRBlock::MergeSelection)
			flush_phi(block.self, block.next_block);
		emit_block_chain(get<SPIRBlock>(block.next_block));
	}

	if (block.merge == SPIRBlock::MergeLoop)
	{
		if (continue_type == SPIRBlock::DoWhileLoop)
		{
			// Make sure that we run the continue block to get the expressions set, but this
			// should become an empty string.
			// We have no fallbacks if we cannot forward everything to temporaries ...
			auto statements = emit_continue_block(block.continue_block);
			if (!statements.empty())
			{
				// The DoWhile block has side effects, force ComplexLoop pattern next pass.
				get<SPIRBlock>(block.continue_block).complex_continue = true;
				force_recompile = true;
			}

			end_scope_decl(join("while (", to_expression(get<SPIRBlock>(block.continue_block).condition), ")"));
		}
		else
			end_scope();

		flush_phi(block.self, block.merge_block);
		emit_block_chain(get<SPIRBlock>(block.merge_block));
	}
}

void CompilerGLSL::begin_scope()
{
	statement("{");
	indent++;
}

void CompilerGLSL::end_scope()
{
	if (!indent)
		throw CompilerError("Popping empty indent stack.");
	indent--;
	statement("}");
}

void CompilerGLSL::end_scope_decl()
{
	if (!indent)
		throw CompilerError("Popping empty indent stack.");
	indent--;
	statement("};");
}

void CompilerGLSL::end_scope_decl(const string &decl)
{
	if (!indent)
		throw CompilerError("Popping empty indent stack.");
	indent--;
	statement("} ", decl, ";");
}