From bf07e5fa7b118ca17a2a83021a2013d9f090474e Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 9 Apr 2019 11:50:45 +0200 Subject: [PATCH] MSL: Fix OpLoad of array which is forced to a temporary. --- .../op-load-forced-temporary-array.asm.frag | 37 +++++++++ .../op-load-forced-temporary-array.asm.frag | 60 ++++++++++++++ spirv_glsl.cpp | 78 +++++++++++++------ spirv_glsl.hpp | 2 + 4 files changed, 153 insertions(+), 24 deletions(-) create mode 100644 reference/shaders-msl-no-opt/asm/vert/op-load-forced-temporary-array.asm.frag create mode 100644 shaders-msl-no-opt/asm/vert/op-load-forced-temporary-array.asm.frag diff --git a/reference/shaders-msl-no-opt/asm/vert/op-load-forced-temporary-array.asm.frag b/reference/shaders-msl-no-opt/asm/vert/op-load-forced-temporary-array.asm.frag new file mode 100644 index 00000000..b77501b0 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/vert/op-load-forced-temporary-array.asm.frag @@ -0,0 +1,37 @@ +#include +#include + +using namespace metal; + +constant float _21 = {}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +vertex main0_out main0() +{ + main0_out out = {}; + float _23[2]; + for (int _25 = 0; _25 < 2; ) + { + _23[_25] = 0.0; + _25++; + continue; + } + float _31[2]; + spvArrayCopyFromStack1(_31, _23); + float _37; + if (as_type(3.0) != 0u) + { + _37 = _31[0]; + } + else + { + _37 = _21; + } + out.gl_Position = float4(0.0, 0.0, 0.0, _37); + return out; +} + diff --git a/shaders-msl-no-opt/asm/vert/op-load-forced-temporary-array.asm.frag b/shaders-msl-no-opt/asm/vert/op-load-forced-temporary-array.asm.frag new file mode 100644 index 00000000..fecc83a9 --- /dev/null +++ b/shaders-msl-no-opt/asm/vert/op-load-forced-temporary-array.asm.frag @@ -0,0 +1,60 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 39 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %vs_main "main" %gl_Position + OpSource HLSL 600 + OpName %vs_main "vs_main" + OpDecorate %gl_Position BuiltIn Position + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_2 = OpConstant %int 2 + %float = OpTypeFloat 32 + %float_0 = OpConstant %float 0 + %int_1 = OpConstant %int 1 + %float_3 = OpConstant %float 3 + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %void = OpTypeVoid + %15 = OpTypeFunction %void + %uint_2 = OpConstant %uint 2 +%_arr_float_uint_2 = OpTypeArray %float %uint_2 +%_ptr_Function__arr_float_uint_2 = OpTypePointer Function %_arr_float_uint_2 +%_ptr_Function_float = OpTypePointer Function %float + %bool = OpTypeBool +%gl_Position = OpVariable %_ptr_Output_v4float Output + %21 = OpUndef %float + %vs_main = OpFunction %void None %15 + %22 = OpLabel + %23 = OpVariable %_ptr_Function__arr_float_uint_2 Function + OpBranch %24 + %24 = OpLabel + %25 = OpPhi %int %int_0 %22 %26 %27 + %28 = OpSLessThan %bool %25 %int_2 + OpLoopMerge %29 %27 None + OpBranchConditional %28 %27 %29 + %27 = OpLabel + %30 = OpAccessChain %_ptr_Function_float %23 %25 + OpStore %30 %float_0 + %26 = OpIAdd %int %25 %int_1 + OpBranch %24 + %29 = OpLabel + %31 = OpLoad %_arr_float_uint_2 %23 + %32 = OpBitcast %uint %float_3 + %33 = OpINotEqual %bool %32 %uint_0 + OpSelectionMerge %34 None + OpBranchConditional %33 %35 %34 + %35 = OpLabel + %36 = OpCompositeExtract %float %31 0 + OpBranch %34 + %34 = OpLabel + %37 = OpPhi %float %21 %29 %36 %35 + %38 = OpCompositeConstruct %v4float %float_0 %float_0 %float_0 %37 + OpStore %gl_Position %38 + OpReturn + OpFunctionEnd diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 88c2f95a..234aafd9 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -3581,6 +3581,41 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t return res; } +SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id) +{ + forced_temporaries.insert(id); + emit_uninitialized_temporary(type, id); + return set(id, to_name(id), type, true); +} + +void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id) +{ + // If we're declaring temporaries inside continue blocks, + // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. + if (current_continue_block && !hoisted_temporaries.count(result_id)) + { + auto &header = get(current_continue_block->loop_dominator); + if (find_if(begin(header.declare_temporary), end(header.declare_temporary), + [result_type, result_id](const pair &tmp) { + return tmp.first == result_type && tmp.second == result_id; + }) == end(header.declare_temporary)) + { + header.declare_temporary.emplace_back(result_type, result_id); + hoisted_temporaries.insert(result_id); + force_recompile(); + } + } + else if (hoisted_temporaries.count(result_id) == 0) + { + auto &type = get(result_type); + auto &flags = ir.meta[result_id].decoration.decoration_flags; + + // The result_id has not been made into an expression yet, so use flags interface. + add_local_variable_name(result_id); + statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), ";"); + } +} + string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) { auto &type = get(result_type); @@ -4841,10 +4876,7 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, { forced_temporaries.insert(id); auto &type = get(result_type); - auto &flags = ir.meta[id].decoration.decoration_flags; - statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(id)), ";"); - set(id, to_name(id), result_type, true); - + emit_uninitialized_temporary_expression(result_type, id); statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ", to_expression(id), ".", to_member_name(type, 1), ");"); break; @@ -4984,10 +5016,7 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, { forced_temporaries.insert(id); auto &type = get(result_type); - auto &flags = ir.meta[id].decoration.decoration_flags; - statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(id)), ";"); - set(id, to_name(id), result_type, true); - + emit_uninitialized_temporary_expression(result_type, id); statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ", to_expression(id), ".", to_member_name(type, 1), ");"); break; @@ -7169,8 +7198,19 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) bool usage_tracking = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0 && (type.basetype == SPIRType::Struct || (type.columns > 1)); - auto &e = emit_op(result_type, id, expr, forward, !usage_tracking); - e.need_transpose = need_transpose; + SPIRExpression *e = nullptr; + if (!backend.array_is_value_type && !type.array.empty() && !forward) + { + // Complicated load case where we need to make a copy of ptr, but we cannot, because + // it is an array, and our backend does not support arrays as value types. + // Emit the temporary, and copy it explicitly. + e = &emit_uninitialized_temporary_expression(result_type, id); + emit_array_copy(to_expression(id), ptr); + } + else + e = &emit_op(result_type, id, expr, forward, !usage_tracking); + + e->need_transpose = need_transpose; register_read(id, ptr, forward); // Pass through whether the result is of a packed type. @@ -7183,7 +7223,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) inherit_expression_dependencies(id, ptr); if (forward) - add_implied_read_expression(e, ptr); + add_implied_read_expression(*e, ptr); break; } @@ -7430,10 +7470,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { // We cannot construct array of arrays because we cannot treat the inputs // as value types. Need to declare the array-of-arrays, and copy in elements one by one. - forced_temporaries.insert(id); - auto &flags = ir.meta[id].decoration.decoration_flags; - statement(flags_to_precision_qualifiers_glsl(out_type, flags), variable_decl(out_type, to_name(id)), ";"); - set(id, to_name(id), result_type, true); + emit_uninitialized_temporary_expression(result_type, id); for (uint32_t i = 0; i < length; i++) emit_array_copy(join(to_expression(id), "[", i, "]"), elems[i]); } @@ -7832,12 +7869,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) uint32_t result_id = ops[1]; uint32_t op0 = ops[2]; uint32_t op1 = ops[3]; - forced_temporaries.insert(result_id); auto &type = get(result_type); - auto &flags = ir.meta[result_id].decoration.decoration_flags; - statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), ";"); - set(result_id, to_name(result_id), result_type, true); - + emit_uninitialized_temporary_expression(result_type, result_id); const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow"; statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ", @@ -7859,10 +7892,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) uint32_t op1 = ops[3]; forced_temporaries.insert(result_id); auto &type = get(result_type); - auto &flags = ir.meta[result_id].decoration.decoration_flags; - statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), ";"); - set(result_id, to_name(result_id), result_type, true); - + emit_uninitialized_temporary_expression(result_type, result_id); const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended"; statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".", diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index ca447cde..a9ec3aae 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -484,6 +484,8 @@ protected: const char *index_to_swizzle(uint32_t index); std::string remap_swizzle(const SPIRType &result_type, uint32_t input_components, const std::string &expr); std::string declare_temporary(uint32_t type, uint32_t id); + void emit_uninitialized_temporary(uint32_t type, uint32_t id); + SPIRExpression &emit_uninitialized_temporary_expression(uint32_t type, uint32_t id); void append_global_func_args(const SPIRFunction &func, uint32_t index, std::vector &arglist); std::string to_expression(uint32_t id, bool register_expression_read = true); std::string to_enclosed_expression(uint32_t id, bool register_expression_read = true);