Vulkan: Seamful cube map emulation

In GLSL, a cube texture is sampled with one of textureCube* functions. This function takes a 3D coordinate which is a vector from the center of the cube and identifies a direction to sample from. GLES2.0 has the following table that translates this 3D coordinate (Rx, Ry, Rz) to a face and ST coordinates within that face. This table can be found in Section 3.7.5 (Cube Map Texture Selection). A compiler pass is implemented in ANGLE that replaces samplerCube declarations with a sampler2DArray. The textureCube* functions are replaced with the corresponding texture* functions with the translated coordinates according to that table. Gradients provided to textureCubeGrad are translated using the same formulae, which is not precise but the spec specifies this projection to be implementation dependent. Helper invocations enabled through WQM (whole quad mode) cause a nuisance in that the extrapolated varyings used as coordinates in a textureCube call could have a different major axis (and therefore face) from the non-helper invocations that lie within the geometry. subgroupQuadSwap* operations are used in conjunction with gl_HelperInvocation to make sure the helper threads calculate texture UVs in the same face as the non-helper invocations. Bug: angleproject:3300 Bug: angleproject:3240 Bug: angleproject:3243 Bug: angleproject:3732 Change-Id: I0cb6a9b1f2e1e6a392b5baca1c7118ed1c502ccf Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1715977 Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org> Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>
2019-07-25 16:13:02 -04:00 · 2019-07-25 16:13:02 -04:00 · 93560ef51d
--- a/include/GLSLANG/ShaderLang.h
+++ b/include/GLSLANG/ShaderLang.h
@ -26,7 +26,7 @@

 // Version number for shader translation API.
 // It is incremented every time the API changes.
-#define ANGLE_SH_VERSION 210
+#define ANGLE_SH_VERSION 211

 enum ShShaderSpec
 {
@ -287,6 +287,11 @@ const ShCompileOptions SH_FORCE_ATOMIC_VALUE_RESOLUTION = UINT64_C(1) << 42;
 // Rewrite gl_BaseVertex and gl_BaseInstance as uniform int
 const ShCompileOptions SH_EMULATE_GL_BASE_VERTEX_BASE_INSTANCE = UINT64_C(1) << 43;

+// Emulate seamful cube map sampling for OpenGL ES2.0.  Currently only applies to the Vulkan
+// backend, as subgroup operations are used.  Once that dependency is broken, could be used with
+// the other backends as well.
+const ShCompileOptions SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING = UINT64_C(1) << 44;
+
 // Defines alternate strategies for implementing array index clamping.
 enum ShArrayIndexClampingStrategy
 {
--- a/include/platform/FeaturesVk.h
+++ b/include/platform/FeaturesVk.h
@ -194,6 +194,12 @@ struct FeaturesVk : FeatureSetBase
        "On Pixel2, keep using transient vkCommandBuffer to work around driver issue in reseting"
        "vkCommandBuffer",
        &members, "http://b/135763283"};
+
+    // Seamful cube map emulation misbehaves on the AMD windows driver, so it's disallowed.
+    Feature disallowSeamfulCubeMapEmulation = {
+        "disallow_seamful_cube_map_emulation", FeatureCategory::VulkanWorkarounds,
+        "Seamful cube map emulation misbehaves on the AMD windows driver, so it's disallowed",
+        &members, "http://anglebug.com/3243"};
 };

 inline FeaturesVk::FeaturesVk()  = default;
--- a/src/common/utilities.cpp
+++ b/src/common/utilities.cpp
@ -422,6 +422,20 @@ bool IsSamplerType(GLenum type)
    return false;
 }

+bool IsSamplerCubeType(GLenum type)
+{
+    switch (type)
+    {
+        case GL_SAMPLER_CUBE:
+        case GL_INT_SAMPLER_CUBE:
+        case GL_UNSIGNED_INT_SAMPLER_CUBE:
+        case GL_SAMPLER_CUBE_SHADOW:
+            return true;
+    }
+
+    return false;
+}
+
 bool IsImageType(GLenum type)
 {
    switch (type)
--- a/src/common/utilities.h
+++ b/src/common/utilities.h
@ -38,6 +38,7 @@ size_t VariableExternalSize(GLenum type);
 int VariableRowCount(GLenum type);
 int VariableColumnCount(GLenum type);
 bool IsSamplerType(GLenum type);
+bool IsSamplerCubeType(GLenum type);
 bool IsImageType(GLenum type);
 bool IsImage2DType(GLenum type);
 bool IsAtomicCounterType(GLenum type);
--- a/src/compiler.gni
+++ b/src/compiler.gni
@ -156,6 +156,8 @@ angle_translator_sources = [
  "src/compiler/translator/tree_ops/RewriteAtomicCounters.h",
  "src/compiler/translator/tree_ops/RewriteAtomicFunctionExpressions.cpp",
  "src/compiler/translator/tree_ops/RewriteAtomicFunctionExpressions.h",
+  "src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.cpp",
+  "src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h",
  "src/compiler/translator/tree_ops/RewriteDfdy.cpp",
  "src/compiler/translator/tree_ops/RewriteDfdy.h",
  "src/compiler/translator/tree_ops/RewriteDoWhile.cpp",
--- a/src/compiler/translator/TranslatorVulkan.cpp
+++ b/src/compiler/translator/TranslatorVulkan.cpp
@ -18,6 +18,7 @@
 #include "compiler/translator/StaticType.h"
 #include "compiler/translator/tree_ops/NameEmbeddedUniformStructs.h"
 #include "compiler/translator/tree_ops/RewriteAtomicCounters.h"
+#include "compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h"
 #include "compiler/translator/tree_ops/RewriteDfdy.h"
 #include "compiler/translator/tree_ops/RewriteStructSamplers.h"
 #include "compiler/translator/tree_util/BuiltIn_autogen.h"
@ -644,6 +645,11 @@ void TranslatorVulkan::translate(TIntermBlock *root,

    sink << "#version 450 core\n";

+    if (compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING)
+    {
+        sink << "#extension GL_KHR_shader_subgroup_quad : require\n";
+    }
+
    // Write out default uniforms into a uniform block assigned to a specific set/binding.
    int defaultUniformCount        = 0;
    int structTypesUsedForUniforms = 0;
@ -680,6 +686,14 @@ void TranslatorVulkan::translate(TIntermBlock *root,
        structTypesTraverser.updateTree();
    }

+    // Rewrite samplerCubes as sampler2DArrays.  This must be done after rewriting struct samplers
+    // as it doesn't expect that.
+    if (compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING)
+    {
+        RewriteCubeMapSamplersAs2DArray(root, &getSymbolTable(),
+                                        getShaderType() == GL_FRAGMENT_SHADER);
+    }
+
    if (defaultUniformCount > 0)
    {
        sink << "\n@@ LAYOUT-defaultUniforms(std140) @@ uniform defaultUniforms\n{\n";
--- a/src/compiler/translator/Types.h
+++ b/src/compiler/translator/Types.h
@ -329,6 +329,7 @@ class TType
    void realize();

    bool isSampler() const { return IsSampler(type); }
+    bool isSamplerCube() const { return type == EbtSamplerCube; }
    bool isAtomicCounter() const { return IsAtomicCounter(type); }

  private:
--- a/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.cpp
+++ b/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.cpp
@ -0,0 +1,855 @@
+//
+// Copyright 2019 The ANGLE Project Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// RewriteCubeMapSamplersAs2DArray: Change samplerCube samplers to sampler2DArray for seamful cube
+// map emulation.
+//
+
+#include "compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h"
+
+#include "compiler/translator/ImmutableStringBuilder.h"
+#include "compiler/translator/StaticType.h"
+#include "compiler/translator/SymbolTable.h"
+#include "compiler/translator/tree_util/IntermNode_util.h"
+#include "compiler/translator/tree_util/IntermTraverse.h"
+#include "compiler/translator/tree_util/ReplaceVariable.h"
+
+namespace sh
+{
+namespace
+{
+constexpr ImmutableString kCoordTransformFuncName("ANGLECubeMapCoordTransform");
+
+// Retrieve a value from another invocation in the quad.  See comment in
+// declareCoordTranslationFunction.
+TIntermSymbol *GetValueFromNeighbor(TSymbolTable *symbolTable,
+                                    TIntermBlock *body,
+                                    TFunction *quadSwap,
+                                    TIntermTyped *variable,
+                                    const TType *variableType)
+{
+    TIntermTyped *neighborValue =
+        TIntermAggregate::CreateRawFunctionCall(*quadSwap, new TIntermSequence({variable}));
+
+    TIntermSymbol *neighbor = new TIntermSymbol(CreateTempVariable(symbolTable, variableType));
+    body->appendStatement(CreateTempInitDeclarationNode(&neighbor->variable(), neighborValue));
+
+    return neighbor;
+}
+
+// If this is a helper invocation, retrieve the layer index (cube map face) from another invocation
+// in the quad that is not a helper.  Get the corresponding ma value from the same invocation as
+// well.  See comment in declareCoordTranslationFunction.
+void GetLayerFromNonHelperInvocation(TSymbolTable *symbolTable,
+                                     TIntermBlock *body,
+                                     TIntermTyped *l,
+                                     TIntermTyped *ma)
+{
+    TVariable *gl_HelperInvocationVar =
+        new TVariable(symbolTable, ImmutableString("gl_HelperInvocation"),
+                      StaticType::GetBasic<EbtBool>(), SymbolType::AngleInternal);
+    TIntermSymbol *gl_HelperInvocation = new TIntermSymbol(gl_HelperInvocationVar);
+
+    const TType *boolType  = StaticType::GetBasic<EbtBool>();
+    const TType *floatType = StaticType::GetBasic<EbtFloat>();
+    TFunction *quadSwapHorizontalBool =
+        new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"),
+                      SymbolType::AngleInternal, boolType, true);
+    TFunction *quadSwapHorizontalFloat =
+        new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"),
+                      SymbolType::AngleInternal, floatType, true);
+    TFunction *quadSwapVerticalBool =
+        new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"),
+                      SymbolType::AngleInternal, boolType, true);
+    TFunction *quadSwapVerticalFloat =
+        new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"),
+                      SymbolType::AngleInternal, floatType, true);
+    TFunction *quadSwapDiagonalFloat =
+        new TFunction(symbolTable, ImmutableString("subgroupQuadSwapDiagonal"),
+                      SymbolType::AngleInternal, floatType, true);
+
+    quadSwapHorizontalBool->addParameter(CreateTempVariable(symbolTable, boolType));
+    quadSwapVerticalBool->addParameter(CreateTempVariable(symbolTable, boolType));
+    quadSwapHorizontalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
+    quadSwapVerticalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
+    quadSwapDiagonalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
+
+    // Get the layer from the horizontal, vertical and diagonal neighbor.  These should be done
+    // outside `if`s so the non-helper thread is not turned inactive.
+    TIntermSymbol *lH =
+        GetValueFromNeighbor(symbolTable, body, quadSwapHorizontalFloat, l, floatType);
+    TIntermSymbol *lV =
+        GetValueFromNeighbor(symbolTable, body, quadSwapVerticalFloat, l->deepCopy(), floatType);
+    TIntermSymbol *lD =
+        GetValueFromNeighbor(symbolTable, body, quadSwapDiagonalFloat, l->deepCopy(), floatType);
+
+    // Get the value of ma from the neighbors.  Similarly, these should be done outside `if`s.
+    TIntermSymbol *maH =
+        GetValueFromNeighbor(symbolTable, body, quadSwapHorizontalFloat, ma, floatType);
+    TIntermSymbol *maV =
+        GetValueFromNeighbor(symbolTable, body, quadSwapVerticalFloat, ma->deepCopy(), floatType);
+    TIntermSymbol *maD =
+        GetValueFromNeighbor(symbolTable, body, quadSwapDiagonalFloat, ma->deepCopy(), floatType);
+
+    // Get the value of gl_HelperInvocation from the neighbors too.
+    TIntermSymbol *horizontalIsHelper = GetValueFromNeighbor(
+        symbolTable, body, quadSwapHorizontalBool, gl_HelperInvocation->deepCopy(), boolType);
+    TIntermSymbol *verticalIsHelper = GetValueFromNeighbor(
+        symbolTable, body, quadSwapVerticalBool, gl_HelperInvocation->deepCopy(), boolType);
+
+    // Note(syoussefi): if the sampling is done inside an if with a non-uniform condition, it's not
+    // enough to test if the neighbor is not a helper, we should also check if it's active.
+    TIntermTyped *horizontalIsNonHelper =
+        new TIntermUnary(EOpLogicalNot, horizontalIsHelper, nullptr);
+    TIntermTyped *verticalIsNonHelper = new TIntermUnary(EOpLogicalNot, verticalIsHelper, nullptr);
+
+    TIntermTyped *lVD  = new TIntermTernary(verticalIsNonHelper, lV, lD);
+    TIntermTyped *lHVD = new TIntermTernary(horizontalIsNonHelper, lH, lVD);
+
+    TIntermTyped *maVD  = new TIntermTernary(verticalIsNonHelper->deepCopy(), maV, maD);
+    TIntermTyped *maHVD = new TIntermTernary(horizontalIsNonHelper->deepCopy(), maH, maVD);
+
+    TIntermBlock *helperBody = new TIntermBlock;
+    helperBody->appendStatement(new TIntermBinary(EOpAssign, l->deepCopy(), lHVD));
+    helperBody->appendStatement(new TIntermBinary(EOpAssign, ma->deepCopy(), maHVD));
+
+    TIntermIfElse *ifHelper = new TIntermIfElse(gl_HelperInvocation, helperBody, nullptr);
+    body->appendStatement(ifHelper);
+}
+
+// Generated the common transformation in each coord transformation case.  See comment in
+// declareCoordTranslationFunction().  Called with P, dPdx and dPdy.
+void TransformXMajor(TIntermBlock *block,
+                     TIntermTyped *x,
+                     TIntermTyped *y,
+                     TIntermTyped *z,
+                     TIntermTyped *uc,
+                     TIntermTyped *vc)
+{
+    // uc = -sign(x)*z
+    // vc = -y
+    TIntermTyped *signX = new TIntermUnary(EOpSign, x->deepCopy(), nullptr);
+
+    TIntermTyped *ucValue =
+        new TIntermUnary(EOpNegative, new TIntermBinary(EOpMul, signX, z->deepCopy()), nullptr);
+    TIntermTyped *vcValue = new TIntermUnary(EOpNegative, y->deepCopy(), nullptr);
+
+    block->appendStatement(new TIntermBinary(EOpAssign, uc->deepCopy(), ucValue));
+    block->appendStatement(new TIntermBinary(EOpAssign, vc->deepCopy(), vcValue));
+}
+
+void TransformYMajor(TIntermBlock *block,
+                     TIntermTyped *x,
+                     TIntermTyped *y,
+                     TIntermTyped *z,
+                     TIntermTyped *uc,
+                     TIntermTyped *vc)
+{
+    // uc = x
+    // vc = sign(y)*z
+    TIntermTyped *signY = new TIntermUnary(EOpSign, y->deepCopy(), nullptr);
+
+    TIntermTyped *ucValue = x->deepCopy();
+    TIntermTyped *vcValue = new TIntermBinary(EOpMul, signY, z->deepCopy());
+
+    block->appendStatement(new TIntermBinary(EOpAssign, uc->deepCopy(), ucValue));
+    block->appendStatement(new TIntermBinary(EOpAssign, vc->deepCopy(), vcValue));
+}
+
+void TransformZMajor(TIntermBlock *block,
+                     TIntermTyped *x,
+                     TIntermTyped *y,
+                     TIntermTyped *z,
+                     TIntermTyped *uc,
+                     TIntermTyped *vc)
+{
+    // uc = size(z)*x
+    // vc = -y
+    TIntermTyped *signZ = new TIntermUnary(EOpSign, z->deepCopy(), nullptr);
+
+    TIntermTyped *ucValue = new TIntermBinary(EOpMul, signZ, x->deepCopy());
+    TIntermTyped *vcValue = new TIntermUnary(EOpNegative, y->deepCopy(), nullptr);
+
+    block->appendStatement(new TIntermBinary(EOpAssign, uc->deepCopy(), ucValue));
+    block->appendStatement(new TIntermBinary(EOpAssign, vc->deepCopy(), vcValue));
+}
+
+class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
+{
+  public:
+    RewriteCubeMapSamplersAs2DArrayTraverser(TSymbolTable *symbolTable, bool isFragmentShader)
+        : TIntermTraverser(true, true, true, symbolTable),
+          mCubeXYZToArrayUVL(nullptr),
+          mIsFragmentShader(isFragmentShader),
+          mCoordTranslationFunctionDecl(nullptr)
+    {}
+
+    bool visitDeclaration(Visit visit, TIntermDeclaration *node) override
+    {
+        if (visit != PreVisit)
+        {
+            return true;
+        }
+
+        const TIntermSequence &sequence = *(node->getSequence());
+
+        TIntermTyped *variable = sequence.front()->getAsTyped();
+        const TType &type      = variable->getType();
+        bool isSamplerCube     = type.getQualifier() == EvqUniform && type.isSamplerCube();
+
+        if (isSamplerCube)
+        {
+            // Samplers cannot have initializers, so the declaration must necessarily be a symbol.
+            TIntermSymbol *samplerVariable = variable->getAsSymbolNode();
+            ASSERT(samplerVariable != nullptr);
+
+            declareSampler2DArray(&samplerVariable->variable(), node);
+            return false;
+        }
+
+        return true;
+    }
+
+    void visitFunctionPrototype(TIntermFunctionPrototype *node) override
+    {
+        const TFunction *function = node->getFunction();
+        // Go over the parameters and replace the samplerCube arguments with a sampler2DArray.
+        mRetyper.visitFunctionPrototype();
+        for (size_t paramIndex = 0; paramIndex < function->getParamCount(); ++paramIndex)
+        {
+            const TVariable *param = function->getParam(paramIndex);
+            TVariable *replacement = convertFunctionParameter(node, param);
+            if (replacement)
+            {
+                mRetyper.replaceFunctionParam(param, replacement);
+            }
+        }
+
+        TIntermFunctionPrototype *replacementPrototype =
+            mRetyper.convertFunctionPrototype(mSymbolTable, function);
+        if (replacementPrototype)
+        {
+            queueReplacement(replacementPrototype, OriginalNode::IS_DROPPED);
+        }
+    }
+
+    bool visitAggregate(Visit visit, TIntermAggregate *node) override
+    {
+        if (visit == PreVisit)
+        {
+            mRetyper.preVisitAggregate();
+        }
+
+        if (visit != PostVisit)
+        {
+            return true;
+        }
+
+        if (node->getOp() == EOpCallBuiltInFunction)
+        {
+            convertBuiltinFunction(node);
+        }
+        else if (node->getOp() == EOpCallFunctionInAST)
+        {
+            TIntermAggregate *substituteCall = mRetyper.convertASTFunction(node);
+            if (substituteCall)
+            {
+                queueReplacement(substituteCall, OriginalNode::IS_DROPPED);
+            }
+        }
+        mRetyper.postVisitAggregate();
+
+        return true;
+    }
+
+    void visitSymbol(TIntermSymbol *symbol) override
+    {
+        if (!symbol->getType().isSamplerCube())
+        {
+            return;
+        }
+
+        const TVariable *samplerCubeVar = &symbol->variable();
+
+        TIntermTyped *sampler2DArrayVar =
+            new TIntermSymbol(mRetyper.getVariableReplacement(samplerCubeVar));
+        ASSERT(sampler2DArrayVar != nullptr);
+
+        TIntermNode *argument = symbol;
+
+        // We need to replace the whole function call argument with the symbol replaced.  The
+        // argument can either be the sampler (array) itself, or a subscript into a sampler array.
+        TIntermBinary *arrayExpression = getParentNode()->getAsBinaryNode();
+        if (arrayExpression)
+        {
+            ASSERT(arrayExpression->getOp() == EOpIndexDirect ||
+                   arrayExpression->getOp() == EOpIndexIndirect);
+
+            argument = arrayExpression;
+
+            sampler2DArrayVar = new TIntermBinary(arrayExpression->getOp(), sampler2DArrayVar,
+                                                  arrayExpression->getRight()->deepCopy());
+        }
+
+        mRetyper.replaceFunctionCallArg(argument, sampler2DArrayVar);
+    }
+
+    TIntermFunctionDefinition *getCoordTranslationFunctionDecl()
+    {
+        return mCoordTranslationFunctionDecl;
+    }
+
+  private:
+    void declareSampler2DArray(const TVariable *samplerCubeVar, TIntermDeclaration *node)
+    {
+        if (mCubeXYZToArrayUVL == nullptr)
+        {
+            // If not done yet, declare the function that transforms cube map texture sampling
+            // coordinates to face index and uv coordinates.
+            declareCoordTranslationFunction();
+        }
+
+        TType *newType = new TType(samplerCubeVar->getType());
+        newType->setBasicType(EbtSampler2DArray);
+
+        TVariable *sampler2DArrayVar =
+            new TVariable(mSymbolTable, samplerCubeVar->name(), newType, SymbolType::UserDefined);
+
+        TIntermDeclaration *sampler2DArrayDecl = new TIntermDeclaration();
+        sampler2DArrayDecl->appendDeclarator(new TIntermSymbol(sampler2DArrayVar));
+
+        TIntermSequence replacement;
+        replacement.push_back(sampler2DArrayDecl);
+        mMultiReplacements.emplace_back(getParentNode()->getAsBlock(), node, replacement);
+
+        // Remember the sampler2DArray variable.
+        mRetyper.replaceGlobalVariable(samplerCubeVar, sampler2DArrayVar);
+    }
+
+    void declareCoordTranslationFunction()
+    {
+        // GLES2.0 (as well as desktop OpenGL 2.0) define the coordination transformation as
+        // follows.  Given xyz cube coordinates, where each channel is in [-1, 1], the following
+        // table calculates uc, vc and ma as well as the cube map face.
+        //
+        //    Major    Axis Direction Target     uc  vc  ma
+        //     +x   TEXTURE_CUBE_MAP_POSITIVE_X  −z  −y  |x|
+        //     −x   TEXTURE_CUBE_MAP_NEGATIVE_X   z  −y  |x|
+        //     +y   TEXTURE_CUBE_MAP_POSITIVE_Y   x   z  |y|
+        //     −y   TEXTURE_CUBE_MAP_NEGATIVE_Y   x  −z  |y|
+        //     +z   TEXTURE_CUBE_MAP_POSITIVE_Z   x  −y  |z|
+        //     −z   TEXTURE_CUBE_MAP_NEGATIVE_Z  −x  −y  |z|
+        //
+        // "Major" is an indication of the axis with the largest value.  The cube map face indicates
+        // the layer to sample from.  The uv coordinates to sample from are calculated as,
+        // effectively transforming the uv values to [0, 1]:
+        //
+        //     u = (1 + uc/ma) / 2
+        //     v = (1 + vc/ma) / 2
+        //
+        // The function can be implemented as 6 ifs, though it would be far from efficient.  The
+        // following calculations implement the table above in a smaller number of instructions.
+        //
+        // First, ma can be calculated as the max of the three axes.
+        //
+        //     ma = max3(|x|, |y|, |z|)
+        //
+        // We have three cases:
+        //
+        //     ma == |x|:      uc = -sign(x)*z
+        //                     vc = -y
+        //                  layer = float(x < 0)
+        //
+        //     ma == |y|:      uc = x
+        //                     vc = sign(y)*z
+        //                  layer = 2 + float(y < 0)
+        //
+        //     ma == |z|:      uc = size(z)*x
+        //                     vc = -y
+        //                  layer = 4 + float(z < 0)
+        //
+        // This can be implemented with a number of ?: instructions or 3 ifs. ?: would require all
+        // expressions to be evaluated (vector ALU) while if would require exec mask and jumps
+        // (scalar operations).  We implement this using ifs as there would otherwise be many vector
+        // operations and not much of anything else.
+        //
+        // If textureCubeGrad is used, we also need to transform the provided dPdx and dPdy (both
+        // vec3) to a dUVdx and dUVdy.  Assume P=(r,s,t) and we are investigating dx (note the
+        // change from xyz to rst to not confuse with dx and dy):
+        //
+        //     uv = (f(r,s,t)/ma + 1)/2
+        //
+        // Where f is one of the transformations above for uc and vc.  Between two neighbors along
+        // the x axis, we have P0=(r0,s0,t0) and P1=(r1,s1,t1)
+        //
+        //     dP = (r1-r0, s1-s0, t1-t0)
+        //     dUV = (f(r1,s1,t1)/ma1 - g(r0,s0,t0)/ma0) / 2
+        //
+        // f and g may not necessarily be the same because the two points may have different major
+        // axes.  Even with the same major access, the sign that's used in the formulas may not be
+        // the same.  Furthermore, ma0 and ma1 may not be the same.  This makes it impossible to
+        // derive dUV from dP exactly.
+        //
+        // However, gradient transformation is implementation dependant, so we will simplify and
+        // assume all the above complications are non-existent.  We therefore have:
+        //
+        //      dUV = (f(r1,s1,t1)/ma0 - f(r0,s0,t0)/ma0)/2
+        //
+        // Given that we assumed the sign functions are returning identical results for the two
+        // points, f becomes a linear transformation.  Thus:
+        //
+        //      dUV = f(r1-r0,s1-0,t1-t0)/ma0/2
+        //
+        // In other words, we use the same formulae that transform XYZ (RST here) to UV to
+        // transform the derivatives.
+        //
+        //     ma == |x|:    dUdx = -sign(x)*dPdx.z / ma / 2
+        //                   dVdx = -dPdx.y / ma / 2
+        //
+        //     ma == |y|:    dUdx = dPdx.x / ma / 2
+        //                   dVdx = sign(y)*dPdx.z / ma / 2
+        //
+        //     ma == |z|:    dUdx = size(z)*dPdx.x / ma / 2
+        //                   dVdx = -dPdx.y / ma / 2
+        //
+        // Similarly for dy.
+
+        // Create the function parameters: vec3 P, vec3 dPdx, vec3 dPdy,
+        //                                 out vec2 dUVdx, out vec2 dUVdy
+        const TType *vec3Type = StaticType::GetBasic<EbtFloat, 3>();
+        TVariable *pVar =
+            new TVariable(mSymbolTable, ImmutableString("P"), vec3Type, SymbolType::AngleInternal);
+        TVariable *dPdxVar = new TVariable(mSymbolTable, ImmutableString("dPdx"), vec3Type,
+                                           SymbolType::AngleInternal);
+        TVariable *dPdyVar = new TVariable(mSymbolTable, ImmutableString("dPdy"), vec3Type,
+                                           SymbolType::AngleInternal);
+
+        const TType *vec2Type = StaticType::GetBasic<EbtFloat, 2>();
+        TType *outVec2Type    = new TType(*vec2Type);
+        outVec2Type->setQualifier(EvqOut);
+
+        TVariable *dUVdxVar = new TVariable(mSymbolTable, ImmutableString("dUVdx"), outVec2Type,
+                                            SymbolType::AngleInternal);
+        TVariable *dUVdyVar = new TVariable(mSymbolTable, ImmutableString("dUVdy"), outVec2Type,
+                                            SymbolType::AngleInternal);
+
+        TIntermSymbol *p     = new TIntermSymbol(pVar);
+        TIntermSymbol *dPdx  = new TIntermSymbol(dPdxVar);
+        TIntermSymbol *dPdy  = new TIntermSymbol(dPdyVar);
+        TIntermSymbol *dUVdx = new TIntermSymbol(dUVdxVar);
+        TIntermSymbol *dUVdy = new TIntermSymbol(dUVdyVar);
+
+        // Create the function body as statements are generated.
+        TIntermBlock *body = new TIntermBlock;
+
+        // Create the swizzle nodes that will be used in multiple expressions:
+        TIntermSwizzle *x = new TIntermSwizzle(p->deepCopy(), {0});
+        TIntermSwizzle *y = new TIntermSwizzle(p->deepCopy(), {1});
+        TIntermSwizzle *z = new TIntermSwizzle(p->deepCopy(), {2});
+
+        // Create abs and "< 0" expressions from the channels.
+        const TType *floatType = StaticType::GetBasic<EbtFloat>();
+
+        TIntermTyped *isNegX = new TIntermBinary(EOpLessThan, x, CreateZeroNode(*floatType));
+        TIntermTyped *isNegY = new TIntermBinary(EOpLessThan, y, CreateZeroNode(*floatType));
+        TIntermTyped *isNegZ = new TIntermBinary(EOpLessThan, z, CreateZeroNode(*floatType));
+
+        TIntermSymbol *absX = new TIntermSymbol(CreateTempVariable(mSymbolTable, floatType));
+        TIntermSymbol *absY = new TIntermSymbol(CreateTempVariable(mSymbolTable, floatType));
+        TIntermSymbol *absZ = new TIntermSymbol(CreateTempVariable(mSymbolTable, floatType));
+
+        TIntermDeclaration *absXDecl = CreateTempInitDeclarationNode(
+            &absX->variable(), new TIntermUnary(EOpAbs, x->deepCopy(), nullptr));
+        TIntermDeclaration *absYDecl = CreateTempInitDeclarationNode(
+            &absY->variable(), new TIntermUnary(EOpAbs, y->deepCopy(), nullptr));
+        TIntermDeclaration *absZDecl = CreateTempInitDeclarationNode(
+            &absZ->variable(), new TIntermUnary(EOpAbs, z->deepCopy(), nullptr));
+
+        body->appendStatement(absXDecl);
+        body->appendStatement(absYDecl);
+        body->appendStatement(absZDecl);
+
+        // Create temporary variables for ma, uc, vc, and l (layer), as well as dUdx, dVdx, dUdy
+        // and dVdy.
+        TIntermSymbol *ma   = new TIntermSymbol(CreateTempVariable(mSymbolTable, floatType));
+        TIntermSymbol *l    = new TIntermSymbol(CreateTempVariable(mSymbolTable, floatType));
+        TIntermSymbol *uc   = new TIntermSymbol(CreateTempVariable(mSymbolTable, floatType));
+        TIntermSymbol *vc   = new TIntermSymbol(CreateTempVariable(mSymbolTable, floatType));
+        TIntermSymbol *dUdx = new TIntermSymbol(CreateTempVariable(mSymbolTable, floatType));
+        TIntermSymbol *dVdx = new TIntermSymbol(CreateTempVariable(mSymbolTable, floatType));
+        TIntermSymbol *dUdy = new TIntermSymbol(CreateTempVariable(mSymbolTable, floatType));
+        TIntermSymbol *dVdy = new TIntermSymbol(CreateTempVariable(mSymbolTable, floatType));
+
+        body->appendStatement(CreateTempDeclarationNode(&ma->variable()));
+        body->appendStatement(CreateTempDeclarationNode(&l->variable()));
+        body->appendStatement(CreateTempDeclarationNode(&uc->variable()));
+        body->appendStatement(CreateTempDeclarationNode(&vc->variable()));
+        body->appendStatement(CreateTempDeclarationNode(&dUdx->variable()));
+        body->appendStatement(CreateTempDeclarationNode(&dVdx->variable()));
+        body->appendStatement(CreateTempDeclarationNode(&dUdy->variable()));
+        body->appendStatement(CreateTempDeclarationNode(&dVdy->variable()));
+
+        // ma = max(|x|, max(|y|, |z|))
+        TIntermTyped *maxYZ = CreateBuiltInFunctionCallNode(
+            "max", new TIntermSequence({absY->deepCopy(), absZ->deepCopy()}), *mSymbolTable, 100);
+        TIntermTyped *maValue = CreateBuiltInFunctionCallNode(
+            "max", new TIntermSequence({absX->deepCopy(), maxYZ}), *mSymbolTable, 100);
+        body->appendStatement(new TIntermBinary(EOpAssign, ma, maValue));
+
+        // ma == |x| and ma == |y| expressions
+        TIntermTyped *isXMajor = new TIntermBinary(EOpEqual, ma->deepCopy(), absX->deepCopy());
+        TIntermTyped *isYMajor = new TIntermBinary(EOpEqual, ma->deepCopy(), absY->deepCopy());
+
+        // Determine the cube face:
+
+        // The case where x is major:
+        //     layer = float(x < 0)
+        TIntermTyped *xl =
+            TIntermAggregate::CreateConstructor(*floatType, new TIntermSequence({isNegX}));
+
+        TIntermBlock *calculateXL = new TIntermBlock;
+        calculateXL->appendStatement(new TIntermBinary(EOpAssign, l->deepCopy(), xl));
+
+        // The case where y is major:
+        //     layer = 2 + float(y < 0)
+        TIntermTyped *yl = new TIntermBinary(
+            EOpAdd, CreateFloatNode(2.0f),
+            TIntermAggregate::CreateConstructor(*floatType, new TIntermSequence({isNegY})));
+
+        TIntermBlock *calculateYL = new TIntermBlock;
+        calculateYL->appendStatement(new TIntermBinary(EOpAssign, l->deepCopy(), yl));
+
+        // The case where z is major:
+        //     layer = 4 + float(z < 0)
+        TIntermTyped *zl = new TIntermBinary(
+            EOpAdd, CreateFloatNode(4.0f),
+            TIntermAggregate::CreateConstructor(*floatType, new TIntermSequence({isNegZ})));
+
+        TIntermBlock *calculateZL = new TIntermBlock;
+        calculateZL->appendStatement(new TIntermBinary(EOpAssign, l->deepCopy(), zl));
+
+        // Create the if-else paths:
+        TIntermIfElse *calculateYZL     = new TIntermIfElse(isYMajor, calculateYL, calculateZL);
+        TIntermBlock *calculateYZLBlock = new TIntermBlock;
+        calculateYZLBlock->appendStatement(calculateYZL);
+        TIntermIfElse *calculateXYZL = new TIntermIfElse(isXMajor, calculateXL, calculateYZLBlock);
+        body->appendStatement(calculateXYZL);
+
+        // If the input coordinates come from a varying, they are interpolated between values
+        // provided by the vertex shader.  Say the vertex shader provides the coordinates
+        // corresponding to corners of a face.  For the sake of the argument, say this is the
+        // positive X face.  The coordinates would thus look as follows:
+        //
+        //  - (A, A, A)
+        //  - (B, B, -B)
+        //  - (C, -C, C)
+        //  - (D, -D, -D)
+        //
+        // The values A, B, C and D could be equal, but not necessarily.  All fragments inside this
+        // quad will have X as the major axis.  The transformation described the spec works for
+        // these samples.
+        //
+        // However, WQM (Whole Quad Mode) can enable a few invocations outside the borders of the
+        // quad for the sole purpose of calculating derivatives.  These invocations will extrapolate
+        // the coordinates that are input from varyings and end up with a different major axis.  In
+        // turn, their transformed UV would correspond to a different face and while the sampling
+        // is done on the correct face (by fragments inside the quad), the derivatives would be
+        // incorrect and the wrong mip would be selected.
+        //
+        // We therefore use gl_HelperInvocation to identify these invocations and subgroupQuadSwap*
+        // operations to retrieve the layer from a non-helper invocation.  As a result, the UVs
+        // calculated for the helper invocations correspond to the same face and end up outside the
+        // [0, 1] range, but result in correct derivatives.  Indeed, sampling from any other kind of
+        // texture using varyings that range from [0, 1] would follow the same behavior (where
+        // helper invocations generate UVs out of range).
+        if (mIsFragmentShader)
+        {
+            GetLayerFromNonHelperInvocation(mSymbolTable, body, l->deepCopy(), ma->deepCopy());
+        }
+
+        // layer < 1.5 (covering faces 0 and 1, corresponding to major axis being X) and layer < 3.5
+        // (covering faces 2 and 3, corresponding to major axis being Y).  Used to determine which
+        // of the three transformations to apply.  Previously, ma == |X| and ma == |Y| was used,
+        // which is no longer correct for helper invocations.
+        isXMajor = new TIntermBinary(EOpLessThan, l->deepCopy(), CreateFloatNode(1.5f));
+        isYMajor = new TIntermBinary(EOpLessThan, l->deepCopy(), CreateFloatNode(3.5f));
+
+        TIntermSwizzle *dPdxX = new TIntermSwizzle(dPdx->deepCopy(), {0});
+        TIntermSwizzle *dPdxY = new TIntermSwizzle(dPdx->deepCopy(), {1});
+        TIntermSwizzle *dPdxZ = new TIntermSwizzle(dPdx->deepCopy(), {2});
+
+        TIntermSwizzle *dPdyX = new TIntermSwizzle(dPdy->deepCopy(), {0});
+        TIntermSwizzle *dPdyY = new TIntermSwizzle(dPdy->deepCopy(), {1});
+        TIntermSwizzle *dPdyZ = new TIntermSwizzle(dPdy->deepCopy(), {2});
+
+        TIntermBlock *calculateXUcVc = new TIntermBlock;
+        TransformXMajor(calculateXUcVc, x, y, z, uc, vc);
+        TransformXMajor(calculateXUcVc, dPdxX, dPdxY, dPdxZ, dUdx, dVdx);
+        TransformXMajor(calculateXUcVc, dPdyX, dPdyY, dPdyZ, dUdy, dVdy);
+
+        TIntermBlock *calculateYUcVc = new TIntermBlock;
+        TransformYMajor(calculateYUcVc, x, y, z, uc, vc);
+        TransformYMajor(calculateYUcVc, dPdxX, dPdxY, dPdxZ, dUdx, dVdx);
+        TransformYMajor(calculateYUcVc, dPdyX, dPdyY, dPdyZ, dUdy, dVdy);
+
+        TIntermBlock *calculateZUcVc = new TIntermBlock;
+        TransformZMajor(calculateZUcVc, x, y, z, uc, vc);
+        TransformZMajor(calculateZUcVc, dPdxX, dPdxY, dPdxZ, dUdx, dVdx);
+        TransformZMajor(calculateZUcVc, dPdyX, dPdyY, dPdyZ, dUdy, dVdy);
+
+        // Create the if-else paths:
+        TIntermIfElse *calculateYZUcVc =
+            new TIntermIfElse(isYMajor, calculateYUcVc, calculateZUcVc);
+        TIntermBlock *calculateYZUcVcBlock = new TIntermBlock;
+        calculateYZUcVcBlock->appendStatement(calculateYZUcVc);
+        TIntermIfElse *calculateXYZUcVc =
+            new TIntermIfElse(isXMajor, calculateXUcVc, calculateYZUcVcBlock);
+        body->appendStatement(calculateXYZUcVc);
+
+        // u = (1 + uc/|ma|) / 2
+        // v = (1 + vc/|ma|) / 2
+        TIntermTyped *maTimesTwo =
+            new TIntermBinary(EOpMulAssign, ma->deepCopy(), CreateFloatNode(2.0));
+        body->appendStatement(maTimesTwo);
+
+        TIntermTyped *ucDivMa     = new TIntermBinary(EOpDiv, uc, ma->deepCopy());
+        TIntermTyped *vcDivMa     = new TIntermBinary(EOpDiv, vc, ma->deepCopy());
+        TIntermTyped *uNormalized = new TIntermBinary(EOpAdd, CreateFloatNode(0.5f), ucDivMa);
+        TIntermTyped *vNormalized = new TIntermBinary(EOpAdd, CreateFloatNode(0.5f), vcDivMa);
+
+        body->appendStatement(new TIntermBinary(EOpAssign, uc->deepCopy(), uNormalized));
+        body->appendStatement(new TIntermBinary(EOpAssign, vc->deepCopy(), vNormalized));
+
+        // dUdx / (ma*2).  Similarly for dVdx, dUdy and dVdy
+        TIntermTyped *dUdxNormalized = new TIntermBinary(EOpDiv, dUdx, ma->deepCopy());
+        TIntermTyped *dVdxNormalized = new TIntermBinary(EOpDiv, dVdx, ma->deepCopy());
+        TIntermTyped *dUdyNormalized = new TIntermBinary(EOpDiv, dUdy, ma->deepCopy());
+        TIntermTyped *dVdyNormalized = new TIntermBinary(EOpDiv, dVdy, ma->deepCopy());
+
+        // dUVdx = vec2(dUdx/2ma, dVdx/2ma)
+        // dUVdy = vec2(dUdy/2ma, dVdy/2ma)
+        TIntermTyped *dUVdxValue = TIntermAggregate::CreateConstructor(
+            *vec2Type, new TIntermSequence({dUdxNormalized, dVdxNormalized}));
+        TIntermTyped *dUVdyValue = TIntermAggregate::CreateConstructor(
+            *vec2Type, new TIntermSequence({dUdyNormalized, dVdyNormalized}));
+
+        body->appendStatement(new TIntermBinary(EOpAssign, dUVdx, dUVdxValue));
+        body->appendStatement(new TIntermBinary(EOpAssign, dUVdy, dUVdyValue));
+
+        // return vec3(u, v, l)
+        TIntermBranch *returnStatement = new TIntermBranch(
+            EOpReturn, TIntermAggregate::CreateConstructor(
+                           *vec3Type, new TIntermSequence({uc->deepCopy(), vc->deepCopy(), l})));
+        body->appendStatement(returnStatement);
+
+        mCubeXYZToArrayUVL = new TFunction(mSymbolTable, kCoordTransformFuncName,
+                                           SymbolType::AngleInternal, vec3Type, true);
+        mCubeXYZToArrayUVL->addParameter(pVar);
+        mCubeXYZToArrayUVL->addParameter(dPdxVar);
+        mCubeXYZToArrayUVL->addParameter(dPdyVar);
+        mCubeXYZToArrayUVL->addParameter(dUVdxVar);
+        mCubeXYZToArrayUVL->addParameter(dUVdyVar);
+
+        mCoordTranslationFunctionDecl =
+            CreateInternalFunctionDefinitionNode(*mCubeXYZToArrayUVL, body);
+    }
+
+    TIntermTyped *createCoordTransformationCall(TIntermTyped *P,
+                                                TIntermTyped *dPdx,
+                                                TIntermTyped *dPdy,
+                                                TIntermTyped *dUVdx,
+                                                TIntermTyped *dUVdy)
+    {
+        TIntermSequence *args = new TIntermSequence({P, dPdx, dPdy, dUVdx, dUVdy});
+        return TIntermAggregate::CreateFunctionCall(*mCubeXYZToArrayUVL, args);
+    }
+
+    TVariable *convertFunctionParameter(TIntermNode *parent, const TVariable *param)
+    {
+        if (!param->getType().isSamplerCube())
+        {
+            return nullptr;
+        }
+
+        TType *newType = new TType(param->getType());
+        newType->setBasicType(EbtSampler2DArray);
+
+        TVariable *replacementVar =
+            new TVariable(mSymbolTable, param->name(), newType, SymbolType::UserDefined);
+
+        return replacementVar;
+    }
+
+    void convertBuiltinFunction(TIntermAggregate *node)
+    {
+        const TFunction *function = node->getFunction();
+        if (!function->name().beginsWith("textureCube"))
+        {
+            return;
+        }
+
+        // All textureCube* functions are in the form:
+        //
+        //     textureCube??(samplerCube, vec3, ??)
+        //
+        // They should be converted to:
+        //
+        //     texture??(sampler2DArray, convertCoords(vec3), ??)
+        //
+        // We assume the target platform supports texture() functions (currently only used in
+        // Vulkan).
+        //
+        // The intrinsics map as follows:
+        //
+        //     textureCube -> texture
+        //     textureCubeLod -> textureLod
+        //     textureCubeLodEXT -> textureLod
+        //     textureCubeGrad -> textureGrad
+        //     textureCubeGradEXT -> textureGrad
+        //
+        // Note that dPdx and dPdy in textureCubeGrad* are vec3, while the textureGrad equivalent
+        // for sampler2DArray is vec2.  The EXT_shader_texture_lod that introduces thid function
+        // says:
+        //
+        // > For the "Grad" functions, dPdx is the explicit derivative of P with respect
+        // > to window x, and similarly dPdy with respect to window y. ...  For a cube map texture,
+        // > dPdx and dPdy are vec3.
+        // >
+        // > Let
+        // >
+        // >     dSdx = dPdx.s;
+        // >     dSdy = dPdy.s;
+        // >     dTdx = dPdx.t;
+        // >     dTdy = dPdy.t;
+        // >
+        // > and
+        // >
+        // >             / 0.0;    for two-dimensional texture
+        // >     dRdx = (
+        // >             \ dPdx.p; for cube map texture
+        // >
+        // >             / 0.0;    for two-dimensional texture
+        // >     dRdy = (
+        // >             \ dPdy.p; for cube map texture
+        // >
+        // > (See equation 3.12a in The OpenGL ES 2.0 Specification.)
+        //
+        // It's unclear to me what dRdx and dRdy are.  EXT_gpu_shader4 that promotes this function
+        // has the following additional information:
+        //
+        // > For the "Cube" versions, the partial
+        // > derivatives ddx and ddy are assumed to be in the coordinate system used
+        // > before texture coordinates are projected onto the appropriate cube
+        // > face. The partial derivatives of the post-projection texture coordinates,
+        // > which are used for level-of-detail and anisotropic filtering
+        // > calculations, are derived from coord, ddx and ddy in an
+        // > implementation-dependent manner.
+        //
+        // The calculation of dPdx and dPdy is declared as implementation-dependent, so we have
+        // freedom to calculate it as fit, even if not precisely the same as hardware might.
+
+        const char *substituteFunctionName = "texture";
+        bool isGrad                        = false;
+        if (function->name().beginsWith("textureCubeLod"))
+        {
+            substituteFunctionName = "textureLod";
+        }
+        else if (function->name().beginsWith("textureCubeGrad"))
+        {
+            substituteFunctionName = "textureGrad";
+            isGrad                 = true;
+        }
+
+        TIntermSequence *arguments = node->getSequence();
+        ASSERT(arguments->size() >= 2);
+
+        const TType *vec2Type = StaticType::GetBasic<EbtFloat, 2>();
+        const TType *vec3Type = StaticType::GetBasic<EbtFloat, 3>();
+        TIntermSymbol *uvl    = new TIntermSymbol(CreateTempVariable(mSymbolTable, vec3Type));
+        TIntermSymbol *dUVdx  = new TIntermSymbol(CreateTempVariable(mSymbolTable, vec2Type));
+        TIntermSymbol *dUVdy  = new TIntermSymbol(CreateTempVariable(mSymbolTable, vec2Type));
+
+        TIntermTyped *dPdx = nullptr;
+        TIntermTyped *dPdy = nullptr;
+        if (isGrad)
+        {
+            ASSERT(arguments->size() == 4);
+            dPdx = (*arguments)[2]->getAsTyped()->deepCopy();
+            dPdy = (*arguments)[3]->getAsTyped()->deepCopy();
+        }
+        else
+        {
+            dPdx = CreateZeroNode(*vec3Type);
+            dPdy = CreateZeroNode(*vec3Type);
+        }
+
+        // The function call to transform the coordinates, dPdx and dPdy.  If not textureCubeGrad,
+        // the driver compiler will optimize out the unnecessary calculations.
+        TIntermSequence *coordTransform = new TIntermSequence;
+        coordTransform->push_back(CreateTempDeclarationNode(&dUVdx->variable()));
+        coordTransform->push_back(CreateTempDeclarationNode(&dUVdy->variable()));
+        TIntermTyped *coordTransformCall = createCoordTransformationCall(
+            (*arguments)[1]->getAsTyped()->deepCopy(), dPdx, dPdy, dUVdx, dUVdy);
+        coordTransform->push_back(
+            CreateTempInitDeclarationNode(&uvl->variable(), coordTransformCall));
+        insertStatementsInParentBlock(*coordTransform);
+
+        TIntermSequence *substituteArguments = new TIntermSequence;
+        // Replace the first argument (samplerCube) with the sampler2DArray.
+        substituteArguments->push_back(mRetyper.getFunctionCallArgReplacement((*arguments)[0]));
+        // Replace the second argument with the coordination transformation.
+        substituteArguments->push_back(uvl->deepCopy());
+        if (isGrad)
+        {
+            substituteArguments->push_back(dUVdx->deepCopy());
+            substituteArguments->push_back(dUVdy->deepCopy());
+        }
+        else
+        {
+            // Pass the rest of the parameters as is.
+            for (size_t argIndex = 2; argIndex < arguments->size(); ++argIndex)
+            {
+                substituteArguments->push_back((*arguments)[argIndex]->getAsTyped()->deepCopy());
+            }
+        }
+
+        TIntermTyped *substituteCall = CreateBuiltInFunctionCallNode(
+            substituteFunctionName, substituteArguments, *mSymbolTable, 300);
+
+        queueReplacement(substituteCall, OriginalNode::IS_DROPPED);
+    }
+
+    RetypeOpaqueVariablesHelper mRetyper;
+
+    // A helper function to convert xyz coordinates passed to a cube map sampling function into the
+    // array layer (cube map face) and uv coordinates.
+    TFunction *mCubeXYZToArrayUVL;
+
+    bool mIsFragmentShader;
+
+    // Stored to be put before the first function after the pass.
+    TIntermFunctionDefinition *mCoordTranslationFunctionDecl;
+};
+
+}  // anonymous namespace
+
+void RewriteCubeMapSamplersAs2DArray(TIntermBlock *root,
+                                     TSymbolTable *symbolTable,
+                                     bool isFragmentShader)
+{
+    RewriteCubeMapSamplersAs2DArrayTraverser traverser(symbolTable, isFragmentShader);
+    root->traverse(&traverser);
+    traverser.updateTree();
+
+    TIntermFunctionDefinition *coordTranslationFunctionDecl =
+        traverser.getCoordTranslationFunctionDecl();
+    if (coordTranslationFunctionDecl)
+    {
+        size_t firstFunctionIndex = FindFirstFunctionDefinitionIndex(root);
+        root->insertChildNodes(firstFunctionIndex, TIntermSequence({coordTranslationFunctionDecl}));
+    }
+}
+
+}  // namespace sh
--- a/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h
+++ b/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h
@ -0,0 +1,25 @@
+//
+// Copyright 2019 The ANGLE Project Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// RewriteCubeMapSamplersAs2DArray: Change samplerCube samplers to sampler2DArray, and the
+// textureCube* function calls to calls to helper functions that select the cube map face and sample
+// from the face as a 2D texture.  This emulates seamful cube map sampling in ES2 (or desktop GL 2)
+// and therefore only looks at samplerCube (i.e. not integer variants or cube arrays) and sampling
+// functions that are defined in ES GLSL 1.0 (i.e. not the cube overload of texture()).
+
+#ifndef COMPILER_TRANSLATOR_TREEOPS_REWRITECUBEMAPSAMPLERSAS2DARRAY_H_
+#define COMPILER_TRANSLATOR_TREEOPS_REWRITECUBEMAPSAMPLERSAS2DARRAY_H_
+
+namespace sh
+{
+class TIntermBlock;
+class TSymbolTable;
+
+void RewriteCubeMapSamplersAs2DArray(TIntermBlock *root,
+                                     TSymbolTable *symbolTable,
+                                     bool isFragmentShader);
+}  // namespace sh
+
+#endif  // COMPILER_TRANSLATOR_TREEOPS_REWRITECUBEMAPSAMPLERSAS2DARRAY_H_
--- a/src/libANGLE/renderer/vulkan/ContextVk.cpp
+++ b/src/libANGLE/renderer/vulkan/ContextVk.cpp
@ -438,6 +438,8 @@ angle::Result ContextVk::initialize()
        ANGLE_TRY(synchronizeCpuGpuTime());
    }

+    mEmulateSeamfulCubeMapSampling = shouldEmulateSeamfulCubeMapSampling();
+
    return angle::Result::Continue;
 }

@ -2890,4 +2892,30 @@ vk::DescriptorSetLayoutDesc ContextVk::getDriverUniformsDescriptorSetDesc(
    desc.update(0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, shaderStages);
    return desc;
 }
+
+bool ContextVk::shouldEmulateSeamfulCubeMapSampling() const
+{
+    if (mState.getClientMajorVersion() != 2)
+    {
+        return false;
+    }
+
+    if (mRenderer->getFeatures().disallowSeamfulCubeMapEmulation.enabled)
+    {
+        return false;
+    }
+
+    constexpr VkSubgroupFeatureFlags kSeamfulCubeMapSubgroupOperations =
+        VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
+        VK_SUBGROUP_FEATURE_QUAD_BIT;
+    const VkSubgroupFeatureFlags deviceSupportedOperations =
+        mRenderer->getPhysicalDeviceSubgroupProperties().supportedOperations;
+    bool hasSeamfulCubeMapSubgroupOperations =
+        (deviceSupportedOperations & kSeamfulCubeMapSubgroupOperations) ==
+        kSeamfulCubeMapSubgroupOperations;
+
+    // Only enable seamful cube map emulation if the necessary subgroup operations are supported.
+    // Without them, we cannot remove derivative-related artifacts caused by helper invocations.
+    return hasSeamfulCubeMapSubgroupOperations;
+}
 }  // namespace rx
--- a/src/libANGLE/renderer/vulkan/ContextVk.h
+++ b/src/libANGLE/renderer/vulkan/ContextVk.h
@ -313,6 +313,8 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO

    void updateScissor(const gl::State &glState);

+    bool emulateSeamfulCubeMapSampling() const { return mEmulateSeamfulCubeMapSampling; }
+
  private:
    // Dirty bits.
    enum DirtyBitType : size_t
@ -470,6 +472,8 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO

    void waitForSwapchainImageIfNecessary();

+    bool shouldEmulateSeamfulCubeMapSampling() const;
+
    vk::PipelineHelper *mCurrentGraphicsPipeline;
    vk::PipelineAndSerial *mCurrentComputePipeline;
    gl::PrimitiveMode mCurrentDrawMode;
@ -531,6 +535,9 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO
    // at the end of the command buffer to make that write available to the host.
    bool mIsAnyHostVisibleBufferWritten;

+    // Whether this context should do seamful cube map sampling emulation.
+    bool mEmulateSeamfulCubeMapSampling;
+
    struct DriverUniformsDescriptorSet
    {
        vk::DynamicBuffer dynamicBuffer;
--- a/src/libANGLE/renderer/vulkan/GlslangWrapper.cpp
+++ b/src/libANGLE/renderer/vulkan/GlslangWrapper.cpp
@ -28,6 +28,7 @@ ANGLE_REENABLE_EXTRA_SEMI_WARNING
 #include "common/utilities.h"
 #include "libANGLE/Caps.h"
 #include "libANGLE/ProgramLinkedResources.h"
+#include "libANGLE/renderer/vulkan/ContextVk.h"
 #include "libANGLE/renderer/vulkan/vk_cache_utils.h"

 namespace rx
@ -935,6 +936,7 @@ void GlslangWrapper::GetShaderSource(const gl::ProgramState &programState,
 angle::Result GlslangWrapper::GetShaderCode(vk::Context *context,
                                            const gl::Caps &glCaps,
                                            bool enableLineRasterEmulation,
+                                            bool enableSeamfulCubeMapEmulation,
                                            const gl::ShaderMap<std::string> &shaderSources,
                                            gl::ShaderMap<std::vector<uint32_t>> *shaderCodeOut)
 {
@ -954,17 +956,20 @@ angle::Result GlslangWrapper::GetShaderCode(vk::Context *context,
                                               kVersionDefine, kLineRasterDefine),
                       VK_ERROR_INVALID_SHADER_NV);

-        return GetShaderCodeImpl(context, glCaps, patchedSources, shaderCodeOut);
+        return GetShaderCodeImpl(context, glCaps, enableSeamfulCubeMapEmulation, patchedSources,
+                                 shaderCodeOut);
    }
    else
    {
-        return GetShaderCodeImpl(context, glCaps, shaderSources, shaderCodeOut);
+        return GetShaderCodeImpl(context, glCaps, enableSeamfulCubeMapEmulation, shaderSources,
+                                 shaderCodeOut);
    }
 }

 // static
 angle::Result GlslangWrapper::GetShaderCodeImpl(vk::Context *context,
                                                const gl::Caps &glCaps,
+                                                bool enableSeamfulCubeMapEmulation,
                                                const gl::ShaderMap<std::string> &shaderSources,
                                                gl::ShaderMap<std::vector<uint32_t>> *shaderCodeOut)
 {
@ -1000,6 +1005,11 @@ angle::Result GlslangWrapper::GetShaderCodeImpl(vk::Context *context,
        glslang::TShader *shader = shaders[shaderType];
        shader->setStringsWithLengths(&shaderString, &shaderLength, 1);
        shader->setEntryPoint("main");
+        if (enableSeamfulCubeMapEmulation)
+        {
+            // Enable SPIR-V 1.3 if this workaround is used, as it uses subgroup operations.
+            shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetSpv_1_3);
+        }

        bool result = shader->parse(&builtInResources, 450, ECoreProfile, false, false, messages);
        if (!result)
--- a/src/libANGLE/renderer/vulkan/GlslangWrapper.h
+++ b/src/libANGLE/renderer/vulkan/GlslangWrapper.h
@ -29,12 +29,14 @@ class GlslangWrapper
    static angle::Result GetShaderCode(vk::Context *context,
                                       const gl::Caps &glCaps,
                                       bool enableLineRasterEmulation,
+                                       bool enableSeamfulCubeMapEmulation,
                                       const gl::ShaderMap<std::string> &shaderSources,
                                       gl::ShaderMap<std::vector<uint32_t>> *shaderCodesOut);

  private:
    static angle::Result GetShaderCodeImpl(vk::Context *context,
                                           const gl::Caps &glCaps,
+                                           bool enableSeamfulCubeMapEmulation,
                                           const gl::ShaderMap<std::string> &shaderSources,
                                           gl::ShaderMap<std::vector<uint32_t>> *shaderCodesOut);
 };
--- a/src/libANGLE/renderer/vulkan/ProgramVk.cpp
+++ b/src/libANGLE/renderer/vulkan/ProgramVk.cpp
@ -306,7 +306,8 @@ angle::Result ProgramVk::ShaderInfo::initShaders(ContextVk *contextVk,

    gl::ShaderMap<std::vector<uint32_t>> shaderCodes;
    ANGLE_TRY(GlslangWrapper::GetShaderCode(
-        contextVk, contextVk->getCaps(), enableLineRasterEmulation, shaderSources, &shaderCodes));
+        contextVk, contextVk->getCaps(), enableLineRasterEmulation,
+        contextVk->emulateSeamfulCubeMapSampling(), shaderSources, &shaderCodes));

    for (const gl::ShaderType shaderType : gl::AllShaderTypes())
    {
@ -1436,6 +1437,8 @@ angle::Result ProgramVk::updateTexturesDescriptorSet(ContextVk *contextVk)

    const gl::ActiveTextureArray<TextureVk *> &activeTextures = contextVk->getActiveTextures();

+    bool emulateSeamfulCubeMapSampling = contextVk->emulateSeamfulCubeMapSampling();
+
    for (uint32_t textureIndex = 0; textureIndex < mState.getSamplerBindings().size();
         ++textureIndex)
    {
@ -1457,6 +1460,13 @@ angle::Result ProgramVk::updateTexturesDescriptorSet(ContextVk *contextVk)
            imageInfo.imageView   = textureVk->getReadImageView().getHandle();
            imageInfo.imageLayout = image.getCurrentLayout();

+            if (emulateSeamfulCubeMapSampling)
+            {
+                // If emulating seamful cubemapping, use the fetch image view.  This is basically
+                // the same image view as read, except it's a 2DArray view for cube maps.
+                imageInfo.imageView = textureVk->getFetchImageView().getHandle();
+            }
+
            VkWriteDescriptorSet &writeInfo = writeDescriptorInfo[writeCount];

            writeInfo.sType            = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
--- a/src/libANGLE/renderer/vulkan/RendererVk.cpp
+++ b/src/libANGLE/renderer/vulkan/RendererVk.cpp
@ -490,6 +490,7 @@ RendererVk::RendererVk()
      mDebugUtilsMessenger(VK_NULL_HANDLE),
      mDebugReportCallback(VK_NULL_HANDLE),
      mPhysicalDevice(VK_NULL_HANDLE),
+      mPhysicalDeviceSubgroupProperties{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES},
      mQueue(VK_NULL_HANDLE),
      mCurrentQueueFamilyIndex(std::numeric_limits<uint32_t>::max()),
      mMaxVertexAttribDivisor(1),
@ -1005,6 +1006,15 @@ angle::Result RendererVk::initializeDevice(DisplayVk *displayVk, uint32_t queueF
        createInfo.pEnabledFeatures = &enabledFeatures.features;
    }

+    if (vkGetPhysicalDeviceProperties2KHR)
+    {
+        VkPhysicalDeviceProperties2 deviceProperties = {};
+        deviceProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+        deviceProperties.pNext = &mPhysicalDeviceSubgroupProperties;
+
+        vkGetPhysicalDeviceProperties2KHR(mPhysicalDevice, &deviceProperties);
+    }
+
    createInfo.enabledExtensionCount = static_cast<uint32_t>(enabledDeviceExtensions.size());
    createInfo.ppEnabledExtensionNames =
        enabledDeviceExtensions.empty() ? nullptr : enabledDeviceExtensions.data();
@ -1264,6 +1274,11 @@ void RendererVk::initFeatures(const ExtensionNameList &deviceExtensionNames)
        mFeatures.perFrameWindowSizeQuery.enabled = true;
    }

+    if (IsWindows() && IsAMD(mPhysicalDeviceProperties.vendorID))
+    {
+        mFeatures.disallowSeamfulCubeMapEmulation.enabled = true;
+    }
+
    if (IsAndroid() && IsQualcomm(mPhysicalDeviceProperties.vendorID))
    {
        mFeatures.forceD16TexFilter.enabled = true;
--- a/src/libANGLE/renderer/vulkan/RendererVk.h
+++ b/src/libANGLE/renderer/vulkan/RendererVk.h
@ -72,6 +72,10 @@ class RendererVk : angle::NonCopyable
    {
        return mPhysicalDeviceProperties;
    }
+    const VkPhysicalDeviceSubgroupProperties &getPhysicalDeviceSubgroupProperties() const
+    {
+        return mPhysicalDeviceSubgroupProperties;
+    }
    const VkPhysicalDeviceFeatures &getPhysicalDeviceFeatures() const
    {
        return mPhysicalDeviceFeatures;
@ -190,6 +194,7 @@ class RendererVk : angle::NonCopyable
    VkDebugReportCallbackEXT mDebugReportCallback;
    VkPhysicalDevice mPhysicalDevice;
    VkPhysicalDeviceProperties mPhysicalDeviceProperties;
+    VkPhysicalDeviceSubgroupProperties mPhysicalDeviceSubgroupProperties;
    VkPhysicalDeviceFeatures mPhysicalDeviceFeatures;
    std::vector<VkQueueFamilyProperties> mQueueFamilyProperties;
    std::mutex mQueueMutex;
--- a/src/libANGLE/renderer/vulkan/ShaderVk.cpp
+++ b/src/libANGLE/renderer/vulkan/ShaderVk.cpp
@ -40,6 +40,11 @@ std::shared_ptr<WaitableCompileEvent> ShaderVk::compile(const gl::Context *conte
        compileOptions |= SH_CLAMP_POINT_SIZE;
    }

+    if (contextVk->emulateSeamfulCubeMapSampling())
+    {
+        compileOptions |= SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING;
+    }
+
    return compileImpl(context, compilerInstance, mData.getSource(), compileOptions | options);
 }

--- a/src/libANGLE/renderer/vulkan/TextureVk.h
+++ b/src/libANGLE/renderer/vulkan/TextureVk.h
@ -153,7 +153,8 @@ class TextureVk : public TextureImpl
    void releaseOwnershipOfImage(const gl::Context *context);

    const vk::ImageView &getReadImageView() const;
-    // A special view for cube maps as a 2D array, used with shaders that do texelFetch().
+    // A special view for cube maps as a 2D array, used with shaders that do texelFetch() and for
+    // seamful cube map emulation.
    const vk::ImageView &getFetchImageView() const;
    angle::Result getLayerLevelDrawImageView(vk::Context *context,
                                             size_t layer,
--- a/src/tests/deqp_support/deqp_gles2_test_expectations.txt
+++ b/src/tests/deqp_support/deqp_gles2_test_expectations.txt
@ -280,9 +280,6 @@
 3309 NEXUS5X GLES : dEQP-GLES2.functional.uniform_api.random.3 = FAIL
 3309 NEXUS5X GLES : dEQP-GLES2.functional.uniform_api.random.54 = FAIL

-// General Vulkan failures
-3300 VULKAN : dEQP-GLES2.functional.shaders.texture_functions.vertex.texturecubelod = FAIL
-
 // Only seen failing on Android
 3241 VULKAN ANDROID : dEQP-GLES2.functional.depth_stencil_clear.depth_scissored_masked = FAIL

@ -290,13 +287,6 @@
 3253 VULKAN : dEQP-GLES2.functional.clipping.point.wide_point_clip_viewport_center = FAIL
 3253 VULKAN : dEQP-GLES2.functional.clipping.point.wide_point_clip_viewport_corner = FAIL

-// These seem to fail on both D3D11 and Vulkan
-3243 VULKAN : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_nearest = FAIL
-3243 VULKAN : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_linear = FAIL
-3243 VULKAN : dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_nearest = FAIL
-3243 VULKAN : dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_linear = FAIL
-3243 VULKAN : dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_nearest = FAIL
-3243 VULKAN : dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_linear = FAIL
 // D3D11 AMD already covered by Line 148
 3243 D3D11 INTEL : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_nearest = FAIL
 3243 D3D11 INTEL : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_linear = FAIL
@ -312,16 +302,6 @@
 3243 D3D11 NVIDIA : dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_linear = FAIL

 // Fail with very tiny pixel differences
-3240 VULKAN : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_nearest_clamp = FAIL
-3240 VULKAN : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_nearest_mirror = FAIL
-3240 VULKAN : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_linear_clamp = FAIL
-3240 VULKAN : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_linear_mirror = FAIL
-3240 VULKAN : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_clamp = FAIL
-3240 VULKAN : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_repeat = FAIL
-3240 VULKAN : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_mirror = FAIL
-3240 VULKAN : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_clamp = FAIL
-3240 VULKAN : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_repeat = FAIL
-3240 VULKAN : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_mirror = FAIL
 3240 D3D11 : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_nearest_clamp = FAIL
 3240 D3D11 : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_nearest_mirror = FAIL
 3240 D3D11 : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_linear_clamp = FAIL
@ -349,6 +329,44 @@
 3306 VULKAN ANDROID : dEQP-GLES2.functional.polygon_offset.fixed16_factor_1_slope = FAIL
 3307 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.projected.nearest_linear = FAIL

+// Seamful cubemap sampling failures on Android (due to missing support subgroupQuad* operations).
+3243 VULKAN ANDROID : dEQP-GLES2.functional.shaders.texture_functions.vertex.texturecubelod = FAIL
+3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_nearest = FAIL
+3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_linear = FAIL
+3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_nearest = FAIL
+3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_linear = FAIL
+3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_nearest = FAIL
+3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_linear = FAIL
+3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_nearest_clamp = FAIL
+3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_nearest_mirror = FAIL
+3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_linear_clamp = FAIL
+3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_linear_mirror = FAIL
+3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_clamp = FAIL
+3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_repeat = FAIL
+3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_mirror = FAIL
+3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_clamp = FAIL
+3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_repeat = FAIL
+3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_mirror = FAIL
+
+// These tests also fail on AMD windows driver as it is not allowed to use emulation due to errors.
+3243 VULKAN WIN AMD : dEQP-GLES2.functional.shaders.texture_functions.vertex.texturecubelod = FAIL
+3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_nearest = FAIL
+3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_linear = FAIL
+3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_nearest = FAIL
+3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_linear = FAIL
+3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_nearest = FAIL
+3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_linear = FAIL
+3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_nearest_clamp = FAIL
+3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_nearest_mirror = FAIL
+3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_linear_clamp = FAIL
+3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_linear_mirror = FAIL
+3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_clamp = FAIL
+3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_repeat = FAIL
+3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_mirror = FAIL
+3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_clamp = FAIL
+3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_repeat = FAIL
+3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_mirror = FAIL
+
 // Vulkan AMD drivers don't seem to support wide point clipping.
 2463 VULKAN WIN AMD : dEQP-GLES2.functional.clipping.point.wide_point_clip = FAIL

--- a/src/tests/gl_tests/CubeMapTextureTest.cpp
+++ b/src/tests/gl_tests/CubeMapTextureTest.cpp
@ -5,6 +5,7 @@
 //

 #include "test_utils/ANGLETest.h"
+#include "test_utils/gl_raii.h"

 using namespace angle;

@ -112,6 +113,154 @@ TEST_P(CubeMapTextureTest, RenderToFacesConsecutively)
    EXPECT_GL_NO_ERROR();
 }

+// Verify that cube map sampling follows the rules that map cubemap coordinates to coordinates
+// within each face.  See section 3.7.5 of GLES2.0 (Cube Map Texture Selection).
+TEST_P(CubeMapTextureTest, SampleCoordinateTransform)
+{
+    // Fails to compile the shader.  anglebug.com/3776
+    ANGLE_SKIP_TEST_IF(IsOpenGL() && IsIntel() && IsWindows());
+
+    constexpr GLsizei kCubeFaceCount            = 6;
+    constexpr GLsizei kCubeFaceSectionCount     = 4;
+    constexpr GLsizei kCubeFaceSectionCountSqrt = 2;
+
+    constexpr GLColor faceColors[kCubeFaceCount][kCubeFaceSectionCount] = {
+        {GLColor(255, 0, 0, 255), GLColor(191, 0, 0, 255), GLColor(127, 0, 0, 255),
+         GLColor(63, 0, 0, 255)},
+        {GLColor(0, 255, 0, 255), GLColor(0, 191, 0, 255), GLColor(0, 127, 0, 255),
+         GLColor(0, 63, 0, 255)},
+        {GLColor(0, 0, 255, 255), GLColor(0, 0, 191, 255), GLColor(0, 0, 127, 255),
+         GLColor(0, 0, 63, 255)},
+        {GLColor(255, 63, 0, 255), GLColor(191, 127, 0, 255), GLColor(127, 191, 0, 255),
+         GLColor(63, 255, 0, 255)},
+        {GLColor(0, 255, 63, 255), GLColor(0, 191, 127, 255), GLColor(0, 127, 191, 255),
+         GLColor(0, 63, 255, 255)},
+        {GLColor(63, 0, 255, 255), GLColor(127, 0, 191, 255), GLColor(191, 0, 127, 255),
+         GLColor(255, 0, 63, 255)},
+    };
+
+    constexpr GLsizei kTextureSize = 32;
+
+    GLTexture tex;
+    glBindTexture(GL_TEXTURE_CUBE_MAP, tex);
+    for (GLenum face = 0; face < kCubeFaceCount; face++)
+    {
+        std::vector<GLColor> faceData(kTextureSize * kTextureSize);
+
+        // Create the face with four sections, each with a solid color from |faceColors|.
+        for (size_t row = 0; row < kTextureSize / kCubeFaceSectionCountSqrt; ++row)
+        {
+            for (size_t col = 0; col < kTextureSize / kCubeFaceSectionCountSqrt; ++col)
+            {
+                for (size_t srow = 0; srow < kCubeFaceSectionCountSqrt; ++srow)
+                {
+                    for (size_t scol = 0; scol < kCubeFaceSectionCountSqrt; ++scol)
+                    {
+                        size_t r = row + srow * kTextureSize / kCubeFaceSectionCountSqrt;
+                        size_t c = col + scol * kTextureSize / kCubeFaceSectionCountSqrt;
+                        size_t s = srow * kCubeFaceSectionCountSqrt + scol;
+                        faceData[r * kTextureSize + c] = faceColors[face][s];
+                    }
+                }
+            }
+        }
+
+        glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face, 0, GL_RGBA, kTextureSize, kTextureSize,
+                     0, GL_RGBA, GL_UNSIGNED_BYTE, faceData.data());
+    }
+    glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+    glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+    EXPECT_GL_NO_ERROR();
+
+    GLTexture fboTex;
+    glBindTexture(GL_TEXTURE_2D, fboTex);
+    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, kCubeFaceCount, kCubeFaceSectionCount, 0, GL_RGBA,
+                 GL_UNSIGNED_BYTE, nullptr);
+
+    GLFramebuffer fbo;
+    glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fboTex, 0);
+    EXPECT_GL_NO_ERROR();
+
+    // Create a program that samples from 6x4 directions of the cubemap, draw and verify that the
+    // colors match the right color from |faceColors|.
+    constexpr char kFS[] = R"(precision mediump float;
+
+uniform samplerCube texCube;
+
+const mat4 coordInSection = mat4(
+    vec4(-0.5, -0.5, 0, 0),
+    vec4( 0.5, -0.5, 0, 0),
+    vec4(-0.5,  0.5, 0, 0),
+    vec4( 0.5,  0.5, 0, 0)
+);
+
+void main()
+{
+    vec3 coord;
+    if (gl_FragCoord.x < 2.0)
+    {
+        coord.x = gl_FragCoord.x < 1.0 ? 1.0 : -1.0;
+        coord.zy = coordInSection[int(gl_FragCoord.y)].xy;
+    }
+    else if (gl_FragCoord.x < 4.0)
+    {
+        coord.y = gl_FragCoord.x < 3.0 ? 1.0 : -1.0;
+        coord.xz = coordInSection[int(gl_FragCoord.y)].xy;
+    }
+    else
+    {
+        coord.z = gl_FragCoord.x < 5.0 ? 1.0 : -1.0;
+        coord.xy = coordInSection[int(gl_FragCoord.y)].xy;
+    }
+
+    gl_FragColor = textureCube(texCube, coord);
+})";
+
+    ANGLE_GL_PROGRAM(program, essl1_shaders::vs::Simple(), kFS);
+    glUseProgram(program);
+
+    GLint texCubeLocation = glGetUniformLocation(program, "texCube");
+    ASSERT_NE(-1, texCubeLocation);
+    glUniform1i(texCubeLocation, 0);
+
+    drawQuad(program, essl1_shaders::PositionAttrib(), 0.5f);
+    EXPECT_GL_NO_ERROR();
+
+    for (GLenum face = 0; face < kCubeFaceCount; face++)
+    {
+        // The following table defines the translation from textureCube coordinates to coordinates
+        // in each face.  The framebuffer has width 6 and height 4.  Every column corresponding to
+        // an x value represents one cube face.  The values in rows are samples from the four
+        // sections of the face.
+        //
+        // Major    Axis Direction Target    sc  tc  ma
+        //  +rx  TEXTURE_CUBE_MAP_POSITIVE_X −rz −ry rx
+        //  −rx  TEXTURE_CUBE_MAP_NEGATIVE_X  rz −ry rx
+        //  +ry  TEXTURE_CUBE_MAP_POSITIVE_Y  rx  rz ry
+        //  −ry  TEXTURE_CUBE_MAP_NEGATIVE_Y  rx −rz ry
+        //  +rz  TEXTURE_CUBE_MAP_POSITIVE_Z  rx −ry rz
+        //  −rz  TEXTURE_CUBE_MAP_NEGATIVE_Z −rx −ry rz
+        //
+        // This table is used only to determine the direction of growth for s and t.  The shader
+        // always generates (row,col) coordinates (0, 0), (0, 1), (1, 0), (1, 1) which is the order
+        // the data is uploaded to the faces, but based on the table above, the sample order would
+        // be different.
+        constexpr size_t faceSampledSections[kCubeFaceCount][kCubeFaceSectionCount] = {
+            {3, 2, 1, 0}, {2, 3, 0, 1}, {0, 1, 2, 3}, {2, 3, 0, 1}, {2, 3, 0, 1}, {3, 2, 1, 0},
+        };
+
+        for (size_t section = 0; section < kCubeFaceSectionCount; ++section)
+        {
+            const GLColor sectionColor = faceColors[face][faceSampledSections[face][section]];
+
+            EXPECT_PIXEL_COLOR_EQ(face, section, sectionColor)
+                << "face " << face << ", section " << section;
+        }
+    }
+    EXPECT_GL_NO_ERROR();
+}
+
 // Use this to select which configurations (e.g. which renderer, which GLES major version) these
 // tests should be run against.
 ANGLE_INSTANTIATE_TEST(CubeMapTextureTest,
@ -120,4 +269,5 @@ ANGLE_INSTANTIATE_TEST(CubeMapTextureTest,
                       ES3_OPENGL(),
                       ES2_OPENGLES(),
                       ES3_OPENGLES(),
-                       ES2_VULKAN());
+                       ES2_VULKAN(),
+                       ES3_VULKAN());
--- a/src/tests/test_utils/ANGLETest.cpp
+++ b/src/tests/test_utils/ANGLETest.cpp
@ -176,18 +176,10 @@ bool ShouldAlwaysForceNewDisplay()
 }
 }  // anonymous namespace

-GLColorRGB::GLColorRGB() : R(0), G(0), B(0) {}
-
-GLColorRGB::GLColorRGB(GLubyte r, GLubyte g, GLubyte b) : R(r), G(g), B(b) {}
-
 GLColorRGB::GLColorRGB(const Vector3 &floatColor)
    : R(ColorDenorm(floatColor.x())), G(ColorDenorm(floatColor.y())), B(ColorDenorm(floatColor.z()))
 {}

-GLColor::GLColor() : R(0), G(0), B(0), A(0) {}
-
-GLColor::GLColor(GLubyte r, GLubyte g, GLubyte b, GLubyte a) : R(r), G(g), B(b), A(a) {}
-
 GLColor::GLColor(const Vector4 &floatColor)
    : R(ColorDenorm(floatColor.x())),
      G(ColorDenorm(floatColor.y())),
--- a/src/tests/test_utils/ANGLETest.h
+++ b/src/tests/test_utils/ANGLETest.h
@ -76,8 +76,8 @@ namespace angle
 {
 struct GLColorRGB
 {
-    GLColorRGB();
-    GLColorRGB(GLubyte r, GLubyte g, GLubyte b);
+    constexpr GLColorRGB() : R(0), G(0), B(0) {}
+    constexpr GLColorRGB(GLubyte r, GLubyte g, GLubyte b) : R(r), G(g), B(b) {}
    GLColorRGB(const angle::Vector3 &floatColor);

    const GLubyte *data() const { return &R; }
@ -94,8 +94,8 @@ struct GLColorRGB

 struct GLColor
 {
-    GLColor();
-    GLColor(GLubyte r, GLubyte g, GLubyte b, GLubyte a);
+    constexpr GLColor() : R(0), G(0), B(0), A(0) {}
+    constexpr GLColor(GLubyte r, GLubyte g, GLubyte b, GLubyte a) : R(r), G(g), B(b), A(a) {}
    GLColor(const angle::Vector4 &floatColor);
    GLColor(GLuint colorValue);