Bug 1637148 - Update glslopt to fix intermittent build error. r=lsalzman

Update webrender's dependency on glslopt to 0.1.4. This includes an updated version of Mesa, which
has fixed a race condition that was causing intermittent build failures.

Differential Revision: https://phabricator.services.mozilla.com/D85254
This commit is contained in:
Jamie Nicol 2020-07-29 15:12:38 +00:00
Родитель 24a7f188b8
Коммит 6423c052ae
119 изменённых файлов: 11094 добавлений и 2914 удалений

4
Cargo.lock сгенерированный
Просмотреть файл

@ -2051,9 +2051,9 @@ dependencies = [
[[package]]
name = "glslopt"
version = "0.1.2"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f22b383fcf6f85c4a268af39a0758ec40970e5f9f8fe9809e4415d48409b8379"
checksum = "065c2e941ad25c18428724fd2ad0bc3967cb96242e8db92f3794eedb15c02e44"
dependencies = [
"cc",
]

6
gfx/wr/Cargo.lock сгенерированный
Просмотреть файл

@ -685,7 +685,7 @@ dependencies = [
[[package]]
name = "glslopt"
version = "0.1.2"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1809,7 +1809,7 @@ dependencies = [
"freetype 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
"fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"gleam 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)",
"glslopt 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"glslopt 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"image 0.23.3 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)",
@ -2131,7 +2131,7 @@ dependencies = [
"checksum gleam 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3fdef5b9df6d3a261b80a5ac55e13bf93945725df2463c1b0a2e5a527dce0d37"
"checksum gleam 0.6.19 (registry+https://github.com/rust-lang/crates.io-index)" = "cae10d7c99d0e77b4766e850a60898a17c1abaf01075531f1066f03dc7dc5fc5"
"checksum glsl 4.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "766443890761b3c4edcce86cafaac97971b200662fbdd0446eb7c6b99b4401ea"
"checksum glslopt 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f22b383fcf6f85c4a268af39a0758ec40970e5f9f8fe9809e4415d48409b8379"
"checksum glslopt 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "065c2e941ad25c18428724fd2ad0bc3967cb96242e8db92f3794eedb15c02e44"
"checksum glutin 0.21.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5371b35b309dace06be1b81b5f6adb1c9de578b7dbe1e74bf7e4ef762cf6febd"
"checksum glutin_egl_sys 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "772edef3b28b8ad41e4ea202748e65eefe8e5ffd1f4535f1219793dbb20b3d4c"
"checksum glutin_emscripten_sys 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "80de4146df76e8a6c32b03007bc764ff3249dcaeb4f675d68a06caf1bac363f1"

Просмотреть файл

@ -22,7 +22,7 @@ leak_checks = []
[build-dependencies]
build-parallel = "0.1.1"
glslopt = "0.1.2"
glslopt = "0.1.4"
webrender_build = { version = "0.0.1", path = "../webrender_build" }
[dependencies]

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

2
third_party/rust/glslopt/Cargo.toml поставляемый
Просмотреть файл

@ -13,7 +13,7 @@
[package]
edition = "2018"
name = "glslopt"
version = "0.1.2"
version = "0.1.4"
authors = ["Jamie Nicol <jnicol@mozilla.com>"]
description = "Optimizes GLSL shader code"
keywords = ["opengl", "gl", "gles", "glsl", "shader"]

20
third_party/rust/glslopt/README.md поставляемый Normal file
Просмотреть файл

@ -0,0 +1,20 @@
# glslopt-rs
Rust bindings to [glsl-optimizer](https://github.com/jamienicol/glsl-optimizer).
## Updating glsl-optimizer
To update the version of glsl-optimizer, update the git submodule:
```sh
git submodule update --remote glsl-optimizer
```
Then, if required, regenerate the bindings:
```sh
cargo install bindgen
bindgen wrapper.hpp -o src/bindings.rs
```
Then commit the changes.

8
third_party/rust/glslopt/build.rs поставляемый
Просмотреть файл

@ -7,6 +7,7 @@ use std::env;
fn configure(build: &mut cc::Build) -> &mut cc::Build {
build.define("__STDC_FORMAT_MACROS", None);
if cfg!(target_os = "linux") {
build.define("_GNU_SOURCE", None);
build.define("HAVE_ENDIAN_H", None);
}
if cfg!(target_os = "windows") {
@ -48,12 +49,14 @@ fn main() {
.file("glsl-optimizer/src/util/half_float.c")
.file("glsl-optimizer/src/util/hash_table.c")
.file("glsl-optimizer/src/util/mesa-sha1.c")
.file("glsl-optimizer/src/util/os_misc.c")
.file("glsl-optimizer/src/util/ralloc.c")
.file("glsl-optimizer/src/util/set.c")
.file("glsl-optimizer/src/util/sha1/sha1.c")
.file("glsl-optimizer/src/util/softfloat.c")
.file("glsl-optimizer/src/util/string_buffer.c")
.file("glsl-optimizer/src/util/strtod.c")
.file("glsl-optimizer/src/util/u_debug.c")
.compile("glcpp");
configure(&mut cc::Build::new())
@ -70,7 +73,6 @@ fn main() {
.file("glsl-optimizer/src/mesa/program/dummy_errors.c")
.file("glsl-optimizer/src/mesa/program/symbol_table.c")
.file("glsl-optimizer/src/mesa/main/extensions_table.c")
.file("glsl-optimizer/src/mesa/main/imports.c")
.file("glsl-optimizer/src/compiler/shader_enums.c")
.compile("mesa");
@ -137,6 +139,7 @@ fn main() {
.file("glsl-optimizer/src/compiler/glsl/loop_unroll.cpp")
.file("glsl-optimizer/src/compiler/glsl/lower_blend_equation_advanced.cpp")
.file("glsl-optimizer/src/compiler/glsl/lower_buffer_access.cpp")
.file("glsl-optimizer/src/compiler/glsl/lower_builtins.cpp")
.file("glsl-optimizer/src/compiler/glsl/lower_const_arrays_to_uniforms.cpp")
.file("glsl-optimizer/src/compiler/glsl/lower_cs_derived.cpp")
.file("glsl-optimizer/src/compiler/glsl/lower_discard_flow.cpp")
@ -148,11 +151,11 @@ fn main() {
.file("glsl-optimizer/src/compiler/glsl/lower_jumps.cpp")
.file("glsl-optimizer/src/compiler/glsl/lower_mat_op_to_vec.cpp")
.file("glsl-optimizer/src/compiler/glsl/lower_named_interface_blocks.cpp")
.file("glsl-optimizer/src/compiler/glsl/lower_noise.cpp")
.file("glsl-optimizer/src/compiler/glsl/lower_offset_array.cpp")
.file("glsl-optimizer/src/compiler/glsl/lower_output_reads.cpp")
.file("glsl-optimizer/src/compiler/glsl/lower_packed_varyings.cpp")
.file("glsl-optimizer/src/compiler/glsl/lower_packing_builtins.cpp")
.file("glsl-optimizer/src/compiler/glsl/lower_precision.cpp")
.file("glsl-optimizer/src/compiler/glsl/lower_shared_reference.cpp")
.file("glsl-optimizer/src/compiler/glsl/lower_subroutine.cpp")
.file("glsl-optimizer/src/compiler/glsl/lower_tess_level.cpp")
@ -165,6 +168,7 @@ fn main() {
.file("glsl-optimizer/src/compiler/glsl/lower_vector_insert.cpp")
.file("glsl-optimizer/src/compiler/glsl/lower_vector.cpp")
.file("glsl-optimizer/src/compiler/glsl/lower_vertex_id.cpp")
.file("glsl-optimizer/src/compiler/glsl/lower_xfb_varying.cpp")
.file("glsl-optimizer/src/compiler/glsl/opt_algebraic.cpp")
.file("glsl-optimizer/src/compiler/glsl/opt_array_splitting.cpp")
.file("glsl-optimizer/src/compiler/glsl/opt_conditional_discard.cpp")

Просмотреть файл

@ -4,6 +4,8 @@ project(glsl_optimizer VERSION 0.1
DESCRIPTION "GLSL Optimizer"
LANGUAGES C CXX)
set(CMAKE_CXX_STANDARD 11)
include_directories(include)
include_directories(src/mesa)
include_directories(src/mapi)
@ -16,6 +18,7 @@ include_directories(src/util)
add_definitions(-D__STDC_FORMAT_MACROS)
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
add_definitions(-D_GNU_SOURCE)
add_definitions(-DHAVE_ENDIAN_H)
endif()
if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
@ -25,17 +28,6 @@ else()
add_definitions(-DHAVE_TIMESPEC_GET)
endif()
option (DEBUG "Enable debugging" FALSE)
if(${DEBUG} MATCHES "on")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Os -DNDEBUG")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Os -DNDEBUG")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -s")
endif()
add_library(glcpp STATIC "src/compiler/glsl/glcpp/glcpp-lex.c"
"src/compiler/glsl/glcpp/glcpp-parse.c"
"src/compiler/glsl/glcpp/glcpp.h"
@ -45,17 +37,18 @@ add_library(glcpp STATIC "src/compiler/glsl/glcpp/glcpp-lex.c"
"src/util/half_float.c"
"src/util/hash_table.c"
"src/util/mesa-sha1.c"
"src/util/os_misc.c"
"src/util/ralloc.c"
"src/util/set.c"
"src/util/sha1/sha1.c"
"src/util/softfloat.c"
"src/util/string_buffer.c"
"src/util/strtod.c")
"src/util/strtod.c"
"src/util/u_debug.c")
add_library(mesa STATIC "src/mesa/program/dummy_errors.c"
"src/mesa/program/symbol_table.c"
"src/mesa/main/extensions_table.c"
"src/mesa/main/imports.c")
"src/mesa/main/extensions_table.c")
add_library(glsl_optimizer STATIC "src/compiler/glsl_types.cpp"
"src/compiler/glsl/ast_array_index.cpp"
@ -111,6 +104,7 @@ add_library(glsl_optimizer STATIC "src/compiler/glsl_types.cpp"
"src/compiler/glsl/loop_unroll.cpp"
"src/compiler/glsl/lower_blend_equation_advanced.cpp"
"src/compiler/glsl/lower_buffer_access.cpp"
"src/compiler/glsl/lower_builtins.cpp"
"src/compiler/glsl/lower_const_arrays_to_uniforms.cpp"
"src/compiler/glsl/lower_cs_derived.cpp"
"src/compiler/glsl/lower_discard_flow.cpp"
@ -122,11 +116,11 @@ add_library(glsl_optimizer STATIC "src/compiler/glsl_types.cpp"
"src/compiler/glsl/lower_jumps.cpp"
"src/compiler/glsl/lower_mat_op_to_vec.cpp"
"src/compiler/glsl/lower_named_interface_blocks.cpp"
"src/compiler/glsl/lower_noise.cpp"
"src/compiler/glsl/lower_offset_array.cpp"
"src/compiler/glsl/lower_output_reads.cpp"
"src/compiler/glsl/lower_packed_varyings.cpp"
"src/compiler/glsl/lower_packing_builtins.cpp"
"src/compiler/glsl/lower_precision.cpp"
"src/compiler/glsl/lower_shared_reference.cpp"
"src/compiler/glsl/lower_subroutine.cpp"
"src/compiler/glsl/lower_tess_level.cpp"
@ -139,6 +133,7 @@ add_library(glsl_optimizer STATIC "src/compiler/glsl_types.cpp"
"src/compiler/glsl/lower_vector_insert.cpp"
"src/compiler/glsl/lower_vector.cpp"
"src/compiler/glsl/lower_vertex_id.cpp"
"src/compiler/glsl/lower_xfb_varying.cpp"
"src/compiler/glsl/opt_algebraic.cpp"
"src/compiler/glsl/opt_array_splitting.cpp"
"src/compiler/glsl/opt_conditional_discard.cpp"

Просмотреть файл

@ -220,7 +220,7 @@ Q: What is the file naming convention in this directory?
Initially, there really wasn't one. We have since adopted one:
- Files that implement code lowering passes should be named lower_*
(e.g., lower_noise.cpp).
(e.g., lower_builtins.cpp).
- Files that implement optimization passes should be named opt_*.
- Files that implement a class that is used throught the code should
take the name of that class (e.g., ir_hierarchical_visitor.cpp).

Просмотреть файл

@ -663,6 +663,12 @@ struct ast_type_qualifier {
/** \{ */
unsigned derivative_group:1;
/** \} */
/**
* Flag set if GL_NV_viewport_array2 viewport_relative layout
* qualifier is used.
*/
unsigned viewport_relative:1;
}
/** \brief Set of flags, accessed by name. */
q;
@ -773,7 +779,7 @@ struct ast_type_qualifier {
* \note
* This field is only valid if \c explicit_image_format is set.
*/
GLenum image_format;
enum pipe_format image_format;
/**
* Arrangement of invocations used to calculate derivatives in a compute

Просмотреть файл

@ -49,6 +49,13 @@ process_parameters(exec_list *instructions, exec_list *actual_parameters,
ast->set_is_lhs(true);
ir_rvalue *result = ast->hir(instructions, state);
/* Error happened processing function parameter */
if (!result) {
actual_parameters->push_tail(ir_rvalue::error_value(mem_ctx));
count++;
continue;
}
ir_constant *const constant =
result->constant_expression_value(mem_ctx);
@ -612,11 +619,6 @@ generate_call(exec_list *instructions, ir_function_signature *sig,
ir_call *call = new(ctx) ir_call(sig, deref,
actual_parameters, sub_var, array_idx);
instructions->push_tail(call);
if (sig->is_builtin()) {
/* inline immediately */
call->generate_inline(call);
call->remove();
}
/* Also emit any necessary out-parameter conversions. */
instructions->append_list(&post_call_conversions);

Просмотреть файл

@ -1702,8 +1702,10 @@ ast_expression::do_hir(exec_list *instructions,
/* Break out if operand types were not parsed successfully. */
if ((op[0]->type == glsl_type::error_type ||
op[1]->type == glsl_type::error_type))
op[1]->type == glsl_type::error_type)) {
error_emitted = true;
break;
}
type = arithmetic_result_type(op[0], op[1],
(this->oper == ast_mul_assign),
@ -2144,7 +2146,7 @@ ast_expression::do_hir(exec_list *instructions,
}
}
type = NULL; /* use result->type, not type. */
assert(result != NULL || !needs_rvalue);
assert(error_emitted || (result != NULL || !needs_rvalue));
if (result && result->type->is_error() && !error_emitted)
_mesa_glsl_error(& loc, state, "type mismatch");
@ -3510,7 +3512,7 @@ apply_image_qualifier_to_variable(const struct ast_type_qualifier *qual,
"`writeonly' must have a format layout qualifier");
}
}
var->data.image_format = GL_NONE;
var->data.image_format = PIPE_FORMAT_NONE;
}
/* From page 70 of the GLSL ES 3.1 specification:
@ -3520,9 +3522,9 @@ apply_image_qualifier_to_variable(const struct ast_type_qualifier *qual,
* readonly or the memory qualifier writeonly."
*/
if (state->es_shader &&
var->data.image_format != GL_R32F &&
var->data.image_format != GL_R32I &&
var->data.image_format != GL_R32UI &&
var->data.image_format != PIPE_FORMAT_R32_FLOAT &&
var->data.image_format != PIPE_FORMAT_R32_SINT &&
var->data.image_format != PIPE_FORMAT_R32_UINT &&
!var->data.memory_read_only &&
!var->data.memory_write_only) {
_mesa_glsl_error(loc, state, "image variables of format other than r32f, "
@ -3559,6 +3561,16 @@ is_conflicting_fragcoord_redeclaration(struct _mesa_glsl_parse_state *state,
return false;
}
static inline bool
is_conflicting_layer_redeclaration(struct _mesa_glsl_parse_state *state,
const struct ast_type_qualifier *qual)
{
if (state->redeclares_gl_layer) {
return state->layer_viewport_relative != qual->flags.q.viewport_relative;
}
return false;
}
static inline void
validate_array_dimensions(const glsl_type *t,
struct _mesa_glsl_parse_state *state,
@ -3948,6 +3960,21 @@ apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
"sample_interlock_ordered and sample_interlock_unordered, "
"only valid in fragment shader input layout declaration.");
}
if (var->name != NULL && strcmp(var->name, "gl_Layer") == 0) {
if (is_conflicting_layer_redeclaration(state, qual)) {
_mesa_glsl_error(loc, state, "gl_Layer redeclaration with "
"different viewport_relative setting than earlier");
}
state->redeclares_gl_layer = 1;
if (qual->flags.q.viewport_relative) {
state->layer_viewport_relative = 1;
}
} else if (qual->flags.q.viewport_relative) {
_mesa_glsl_error(loc, state,
"viewport_relative qualifier "
"can only be applied to gl_Layer.");
}
}
static void
@ -4389,6 +4416,11 @@ get_variable_being_redeclared(ir_variable **var_ptr, YYLTYPE loc,
earlier->data.precision = var->data.precision;
earlier->data.memory_coherent = var->data.memory_coherent;
} else if (state->NV_viewport_array2_enable &&
strcmp(var->name, "gl_Layer") == 0 &&
earlier->data.how_declared == ir_var_declared_implicitly) {
/* No need to do anything, just allow it. Qualifier is stored in state */
} else if ((earlier->data.how_declared == ir_var_declared_implicitly &&
state->allow_builtin_variable_redeclaration) ||
allow_all_redeclarations) {
@ -4960,12 +4992,50 @@ ast_declarator_list::hir(exec_list *instructions,
* size4x32 rgba32f rgba32i rgba32ui"
*/
if (strncmp(this->type->specifier->type_name, "image", strlen("image")) == 0) {
this->type->qualifier.image_format = GL_R8 +
this->type->qualifier.image_format - GL_R8I;
switch (this->type->qualifier.image_format) {
case PIPE_FORMAT_R8_SINT:
/* No valid qualifier in this case, driver will need to look at
* the underlying image's format (just like no qualifier being
* present).
*/
this->type->qualifier.image_format = PIPE_FORMAT_NONE;
break;
case PIPE_FORMAT_R16_SINT:
this->type->qualifier.image_format = PIPE_FORMAT_R16_FLOAT;
break;
case PIPE_FORMAT_R32_SINT:
this->type->qualifier.image_format = PIPE_FORMAT_R32_FLOAT;
break;
case PIPE_FORMAT_R32G32_SINT:
this->type->qualifier.image_format = PIPE_FORMAT_R32G32_FLOAT;
break;
case PIPE_FORMAT_R32G32B32A32_SINT:
this->type->qualifier.image_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
break;
default:
unreachable("Unknown image format");
}
this->type->qualifier.image_base_type = GLSL_TYPE_FLOAT;
} else if (strncmp(this->type->specifier->type_name, "uimage", strlen("uimage")) == 0) {
this->type->qualifier.image_format = GL_R8UI +
this->type->qualifier.image_format - GL_R8I;
switch (this->type->qualifier.image_format) {
case PIPE_FORMAT_R8_SINT:
this->type->qualifier.image_format = PIPE_FORMAT_R8_UINT;
break;
case PIPE_FORMAT_R16_SINT:
this->type->qualifier.image_format = PIPE_FORMAT_R16_UINT;
break;
case PIPE_FORMAT_R32_SINT:
this->type->qualifier.image_format = PIPE_FORMAT_R32_UINT;
break;
case PIPE_FORMAT_R32G32_SINT:
this->type->qualifier.image_format = PIPE_FORMAT_R32G32_UINT;
break;
case PIPE_FORMAT_R32G32B32A32_SINT:
this->type->qualifier.image_format = PIPE_FORMAT_R32G32B32A32_UINT;
break;
default:
unreachable("Unknown image format");
}
this->type->qualifier.image_base_type = GLSL_TYPE_UINT;
} else if (strncmp(this->type->specifier->type_name, "iimage", strlen("iimage")) == 0) {
this->type->qualifier.image_base_type = GLSL_TYPE_INT;
@ -7649,7 +7719,7 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
"qualifier");
}
fields[i].image_format = GL_NONE;
fields[i].image_format = PIPE_FORMAT_NONE;
}
}
}
@ -8213,6 +8283,21 @@ ast_interface_block::hir(exec_list *instructions,
}
ir_typedecl_statement* stmt = new(state) ir_typedecl_statement(block_type);
/* Push the interface declarations to the top.
* However, do not insert declarations before default precision
* statements or other declarations
*/
ir_instruction* before_node = (ir_instruction*)instructions->get_head();
while (before_node &&
(before_node->ir_type == ir_type_precision ||
before_node->ir_type == ir_type_typedecl))
before_node = (ir_instruction*)before_node->next;
if (before_node)
before_node->insert_before(stmt);
else
instructions->push_head(stmt);
/* Page 39 (page 45 of the PDF) of section 4.3.7 in the GLSL ES 3.00 spec
* says:
*

Просмотреть файл

@ -6650,103 +6650,52 @@ builtin_builder::_fwidthFine(const glsl_type *type)
ir_function_signature *
builtin_builder::_noise1(const glsl_type *type)
{
return unop(v110, ir_unop_noise, glsl_type::float_type, type);
/* From the GLSL 4.60 specification:
*
* "The noise functions noise1, noise2, noise3, and noise4 have been
* deprecated starting with version 4.4 of GLSL. When not generating
* SPIR-V they are defined to return the value 0.0 or a vector whose
* components are all 0.0. When generating SPIR-V the noise functions
* are not declared and may not be used."
*
* In earlier versions of the GLSL specification attempt to define some
* sort of statistical noise function. However, the function's
* characteristics have always been such that always returning 0 is
* valid and Mesa has always returned 0 for noise on most drivers.
*/
ir_variable *p = in_var(type, "p");
MAKE_SIG(glsl_type::float_type, v110, 1, p);
body.emit(ret(imm(glsl_type::float_type, ir_constant_data())));
return sig;
}
ir_function_signature *
builtin_builder::_noise2(const glsl_type *type)
{
/* See builtin_builder::_noise1 */
ir_variable *p = in_var(type, "p");
MAKE_SIG(glsl_type::vec2_type, v110, 1, p);
ir_constant_data b_offset;
b_offset.f[0] = 601.0f;
b_offset.f[1] = 313.0f;
b_offset.f[2] = 29.0f;
b_offset.f[3] = 277.0f;
ir_variable *a = body.make_temp(glsl_type::float_type, "a");
ir_variable *b = body.make_temp(glsl_type::float_type, "b");
ir_variable *t = body.make_temp(glsl_type::vec2_type, "t");
body.emit(assign(a, expr(ir_unop_noise, p)));
body.emit(assign(b, expr(ir_unop_noise, add(p, imm(type, b_offset)))));
body.emit(assign(t, a, WRITEMASK_X));
body.emit(assign(t, b, WRITEMASK_Y));
body.emit(ret(t));
body.emit(ret(imm(glsl_type::vec2_type, ir_constant_data())));
return sig;
}
ir_function_signature *
builtin_builder::_noise3(const glsl_type *type)
{
/* See builtin_builder::_noise1 */
ir_variable *p = in_var(type, "p");
MAKE_SIG(glsl_type::vec3_type, v110, 1, p);
ir_constant_data b_offset;
b_offset.f[0] = 601.0f;
b_offset.f[1] = 313.0f;
b_offset.f[2] = 29.0f;
b_offset.f[3] = 277.0f;
ir_constant_data c_offset;
c_offset.f[0] = 1559.0f;
c_offset.f[1] = 113.0f;
c_offset.f[2] = 1861.0f;
c_offset.f[3] = 797.0f;
ir_variable *a = body.make_temp(glsl_type::float_type, "a");
ir_variable *b = body.make_temp(glsl_type::float_type, "b");
ir_variable *c = body.make_temp(glsl_type::float_type, "c");
ir_variable *t = body.make_temp(glsl_type::vec3_type, "t");
body.emit(assign(a, expr(ir_unop_noise, p)));
body.emit(assign(b, expr(ir_unop_noise, add(p, imm(type, b_offset)))));
body.emit(assign(c, expr(ir_unop_noise, add(p, imm(type, c_offset)))));
body.emit(assign(t, a, WRITEMASK_X));
body.emit(assign(t, b, WRITEMASK_Y));
body.emit(assign(t, c, WRITEMASK_Z));
body.emit(ret(t));
body.emit(ret(imm(glsl_type::vec3_type, ir_constant_data())));
return sig;
}
ir_function_signature *
builtin_builder::_noise4(const glsl_type *type)
{
/* See builtin_builder::_noise1 */
ir_variable *p = in_var(type, "p");
MAKE_SIG(glsl_type::vec4_type, v110, 1, p);
ir_variable *_p = body.make_temp(type, "_p");
ir_constant_data p_offset;
p_offset.f[0] = 1559.0f;
p_offset.f[1] = 113.0f;
p_offset.f[2] = 1861.0f;
p_offset.f[3] = 797.0f;
body.emit(assign(_p, add(p, imm(type, p_offset))));
ir_constant_data offset;
offset.f[0] = 601.0f;
offset.f[1] = 313.0f;
offset.f[2] = 29.0f;
offset.f[3] = 277.0f;
ir_variable *a = body.make_temp(glsl_type::float_type, "a");
ir_variable *b = body.make_temp(glsl_type::float_type, "b");
ir_variable *c = body.make_temp(glsl_type::float_type, "c");
ir_variable *d = body.make_temp(glsl_type::float_type, "d");
ir_variable *t = body.make_temp(glsl_type::vec4_type, "t");
body.emit(assign(a, expr(ir_unop_noise, p)));
body.emit(assign(b, expr(ir_unop_noise, add(p, imm(type, offset)))));
body.emit(assign(c, expr(ir_unop_noise, _p)));
body.emit(assign(d, expr(ir_unop_noise, add(_p, imm(type, offset)))));
body.emit(assign(t, a, WRITEMASK_X));
body.emit(assign(t, b, WRITEMASK_Y));
body.emit(assign(t, c, WRITEMASK_Z));
body.emit(assign(t, d, WRITEMASK_W));
body.emit(ret(t));
body.emit(ret(imm(glsl_type::vec4_type, ir_constant_data())));
return sig;
}

Просмотреть файл

@ -351,7 +351,7 @@ per_vertex_accumulator::add_field(int slot, const glsl_type *type,
this->fields[this->num_fields].memory_coherent = 0;
this->fields[this->num_fields].memory_volatile = 0;
this->fields[this->num_fields].memory_restrict = 0;
this->fields[this->num_fields].image_format = 0;
this->fields[this->num_fields].image_format = PIPE_FORMAT_NONE;
this->fields[this->num_fields].explicit_xfb_buffer = 0;
this->fields[this->num_fields].xfb_buffer = -1;
this->fields[this->num_fields].xfb_stride = -1;
@ -1084,8 +1084,13 @@ builtin_variable_generator::generate_vs_special_vars()
add_system_value(SYSTEM_VALUE_BASE_INSTANCE, int_t, "gl_BaseInstance");
add_system_value(SYSTEM_VALUE_DRAW_ID, int_t, "gl_DrawID");
}
if (state->EXT_draw_instanced_enable && state->is_version(0, 100))
add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, GLSL_PRECISION_HIGH,
"gl_InstanceIDEXT");
if (state->ARB_draw_instanced_enable)
add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceIDARB");
if (state->ARB_draw_instanced_enable || state->is_version(140, 300) ||
state->EXT_gpu_shader4_enable) {
add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, GLSL_PRECISION_HIGH,
@ -1097,15 +1102,32 @@ builtin_variable_generator::generate_vs_special_vars()
add_system_value(SYSTEM_VALUE_DRAW_ID, int_t, "gl_DrawIDARB");
}
if (state->AMD_vertex_shader_layer_enable ||
state->ARB_shader_viewport_layer_array_enable) {
state->ARB_shader_viewport_layer_array_enable ||
state->NV_viewport_array2_enable) {
var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer");
var->data.interpolation = INTERP_MODE_FLAT;
}
if (state->AMD_vertex_shader_viewport_index_enable ||
state->ARB_shader_viewport_layer_array_enable) {
state->ARB_shader_viewport_layer_array_enable ||
state->NV_viewport_array2_enable) {
var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
var->data.interpolation = INTERP_MODE_FLAT;
}
if (state->NV_viewport_array2_enable) {
/* From the NV_viewport_array2 specification:
*
* "The variable gl_ViewportMask[] is available as an output variable
* in the VTG languages. The array has ceil(v/32) elements where v is
* the maximum number of viewports supported by the implementation."
*
* Since no drivers expose more than 16 viewports, we can simply set the
* array size to 1 rather than computing it and dealing with varying
* slot complication.
*/
var = add_output(VARYING_SLOT_VIEWPORT_MASK, array(int_t, 1),
"gl_ViewportMask");
var->data.interpolation = INTERP_MODE_FLAT;
}
if (compatibility) {
add_input(VERT_ATTRIB_POS, vec4_t, "gl_Vertex");
add_input(VERT_ATTRIB_NORMAL, vec3_t, "gl_Normal");
@ -1155,6 +1177,17 @@ builtin_variable_generator::generate_tcs_special_vars()
add_output(bbox_slot, array(vec4_t, 2), GLSL_PRECISION_HIGH,
"gl_BoundingBox")->data.patch = 1;
}
/* NOTE: These are completely pointless. Writing these will never go
* anywhere. But the specs demands it. So we add them with a slot of -1,
* which makes the data go nowhere.
*/
if (state->NV_viewport_array2_enable) {
add_output(-1, int_t, "gl_Layer");
add_output(-1, int_t, "gl_ViewportIndex");
add_output(-1, array(int_t, 1), "gl_ViewportMask");
}
}
@ -1183,12 +1216,18 @@ builtin_variable_generator::generate_tes_special_vars()
add_system_value(SYSTEM_VALUE_TESS_LEVEL_INNER, array(float_t, 2),
GLSL_PRECISION_HIGH, "gl_TessLevelInner");
}
if (state->ARB_shader_viewport_layer_array_enable) {
if (state->ARB_shader_viewport_layer_array_enable ||
state->NV_viewport_array2_enable) {
var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer");
var->data.interpolation = INTERP_MODE_FLAT;
var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
var->data.interpolation = INTERP_MODE_FLAT;
}
if (state->NV_viewport_array2_enable) {
var = add_output(VARYING_SLOT_VIEWPORT_MASK, array(int_t, 1),
"gl_ViewportMask");
var->data.interpolation = INTERP_MODE_FLAT;
}
}
@ -1208,6 +1247,11 @@ builtin_variable_generator::generate_gs_special_vars()
"gl_ViewportIndex");
var->data.interpolation = INTERP_MODE_FLAT;
}
if (state->NV_viewport_array2_enable) {
var = add_output(VARYING_SLOT_VIEWPORT_MASK, array(int_t, 1),
"gl_ViewportMask");
var->data.interpolation = INTERP_MODE_FLAT;
}
if (state->is_version(400, 320) || state->ARB_gpu_shader5_enable ||
state->OES_geometry_shader_enable || state->EXT_geometry_shader_enable) {
add_system_value(SYSTEM_VALUE_INVOCATION_ID, int_t, GLSL_PRECISION_HIGH,

Просмотреть файл

@ -59,6 +59,11 @@
#define FLOAT_ROUND_UP 3
#define FLOAT_ROUNDING_MODE FLOAT_ROUND_NEAREST_EVEN
/* Relax propagation of NaN. Binary operations with a NaN source will still
* produce a NaN result, but it won't follow strict IEEE rules.
*/
#define RELAXED_NAN_PROPAGATION
/* Absolute value of a Float64 :
* Clear the sign bit
*/
@ -88,10 +93,7 @@ uint64_t
__fneg64(uint64_t __a)
{
uvec2 a = unpackUint2x32(__a);
uint t = a.y;
t ^= (1u << 31);
a.y = mix(t, a.y, __is_nan(__a));
a.y ^= (1u << 31);
return packUint2x32(a);
}
@ -165,17 +167,17 @@ __fne64(uint64_t a, uint64_t b)
uint
__extractFloat64Sign(uint64_t a)
{
return unpackUint2x32(a).y >> 31;
return unpackUint2x32(a).y & 0x80000000u;
}
/* Returns true if the 64-bit value formed by concatenating `a0' and `a1' is less
* than the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,
* returns false.
/* Returns true if the signed 64-bit value formed by concatenating `a0' and
* `a1' is less than the signed 64-bit value formed by concatenating `b0' and
* `b1'. Otherwise, returns false.
*/
bool
lt64(uint a0, uint a1, uint b0, uint b1)
ilt64(uint a0, uint a1, uint b0, uint b1)
{
return (a0 < b0) || ((a0 == b0) && (a1 < b1));
return (int(a0) < int(b0)) || ((a0 == b0) && (a1 < b1));
}
bool
@ -183,12 +185,42 @@ __flt64_nonnan(uint64_t __a, uint64_t __b)
{
uvec2 a = unpackUint2x32(__a);
uvec2 b = unpackUint2x32(__b);
uint aSign = __extractFloat64Sign(__a);
uint bSign = __extractFloat64Sign(__b);
if (aSign != bSign)
return (aSign != 0u) && ((((a.y | b.y)<<1) | a.x | b.x) != 0u);
return mix(lt64(a.y, a.x, b.y, b.x), lt64(b.y, b.x, a.y, a.x), aSign != 0u);
/* IEEE 754 floating point numbers are specifically designed so that, with
* two exceptions, values can be compared by bit-casting to signed integers
* with the same number of bits.
*
* From https://en.wikipedia.org/wiki/IEEE_754-1985#Comparing_floating-point_numbers:
*
* When comparing as 2's-complement integers: If the sign bits differ,
* the negative number precedes the positive number, so 2's complement
* gives the correct result (except that negative zero and positive zero
* should be considered equal). If both values are positive, the 2's
* complement comparison again gives the correct result. Otherwise (two
* negative numbers), the correct FP ordering is the opposite of the 2's
* complement ordering.
*
* The logic implied by the above quotation is:
*
* !both_are_zero(a, b) && (both_negative(a, b) ? a > b : a < b)
*
* This is equivalent to
*
* fne(a, b) && (both_negative(a, b) ? a >= b : a < b)
*
* fne(a, b) && (both_negative(a, b) ? !(a < b) : a < b)
*
* fne(a, b) && ((both_negative(a, b) && !(a < b)) ||
* (!both_negative(a, b) && (a < b)))
*
* (A!|B)&(A|!B) is (A xor B) which is implemented here using !=.
*
* fne(a, b) && (both_negative(a, b) != (a < b))
*/
bool lt = ilt64(a.y, a.x, b.y, b.x);
bool both_negative = (a.y & b.y & 0x80000000u) != 0;
return !__feq64_nonnan(__a, __b) && (lt != both_negative);
}
/* Returns true if the double-precision floating-point value `a' is less than
@ -198,10 +230,15 @@ __flt64_nonnan(uint64_t __a, uint64_t __b)
bool
__flt64(uint64_t a, uint64_t b)
{
if (__is_nan(a) || __is_nan(b))
return false;
/* This weird layout matters. Doing the "obvious" thing results in extra
* flow control being inserted to implement the short-circuit evaluation
* rules. Flow control is bad!
*/
bool x = !__is_nan(a);
bool y = !__is_nan(b);
bool z = __flt64_nonnan(a, b);
return __flt64_nonnan(a, b);
return (x && y && z);
}
/* Returns true if the double-precision floating-point value `a' is greater
@ -212,19 +249,45 @@ __flt64(uint64_t a, uint64_t b)
bool
__fge64(uint64_t a, uint64_t b)
{
if (__is_nan(a) || __is_nan(b))
return false;
/* This weird layout matters. Doing the "obvious" thing results in extra
* flow control being inserted to implement the short-circuit evaluation
* rules. Flow control is bad!
*/
bool x = !__is_nan(a);
bool y = !__is_nan(b);
bool z = !__flt64_nonnan(a, b);
return !__flt64_nonnan(a, b);
return (x && y && z);
}
uint64_t
__fsat64(uint64_t __a)
{
if (__flt64(__a, 0ul))
uvec2 a = unpackUint2x32(__a);
/* fsat(NaN) should be zero. */
if (__is_nan(__a) || int(a.y) < 0)
return 0ul;
if (__fge64(__a, 0x3FF0000000000000ul /* 1.0 */))
/* IEEE 754 floating point numbers are specifically designed so that, with
* two exceptions, values can be compared by bit-casting to signed integers
* with the same number of bits.
*
* From https://en.wikipedia.org/wiki/IEEE_754-1985#Comparing_floating-point_numbers:
*
* When comparing as 2's-complement integers: If the sign bits differ,
* the negative number precedes the positive number, so 2's complement
* gives the correct result (except that negative zero and positive zero
* should be considered equal). If both values are positive, the 2's
* complement comparison again gives the correct result. Otherwise (two
* negative numbers), the correct FP ordering is the opposite of the 2's
* complement ordering.
*
* We know that both values are not negative, and we know that at least one
* value is not zero. Therefore, we can just use the 2's complement
* comparison ordering.
*/
if (ilt64(0x3FF00000, 0x00000000, a.y, a.x))
return 0x3FF0000000000000ul;
return __a;
@ -376,7 +439,7 @@ __packFloat64(uint zSign, int zExp, uint zFrac0, uint zFrac1)
{
uvec2 z;
z.y = (zSign << 31) + (uint(zExp) << 20) + zFrac0;
z.y = zSign + (uint(zExp) << 20) + zFrac0;
z.x = zFrac1;
return packUint2x32(z);
}
@ -437,23 +500,25 @@ __roundAndPackFloat64(uint zSign,
}
return __packFloat64(zSign, 0x7FF, 0u, 0u);
}
if (zExp < 0) {
__shift64ExtraRightJamming(
zFrac0, zFrac1, zFrac2, -zExp, zFrac0, zFrac1, zFrac2);
zExp = 0;
if (roundNearestEven) {
increment = zFrac2 < 0u;
}
if (zExp < 0) {
__shift64ExtraRightJamming(
zFrac0, zFrac1, zFrac2, -zExp, zFrac0, zFrac1, zFrac2);
zExp = 0;
if (roundNearestEven) {
increment = zFrac2 < 0u;
} else {
if (zSign != 0u) {
increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&
(zFrac2 != 0u);
} else {
if (zSign != 0u) {
increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&
(zFrac2 != 0u);
} else {
increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&
(zFrac2 != 0u);
}
increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&
(zFrac2 != 0u);
}
}
}
if (increment) {
__add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);
zFrac1 &= ~((zFrac2 + uint(zFrac2 == 0u)) & uint(roundNearestEven));
@ -492,7 +557,7 @@ __roundAndPackUInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)
zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven);
}
return mix(packUint2x32(uvec2(zFrac1, zFrac0)), default_nan,
(zSign !=0u && (zFrac0 | zFrac1) != 0u));
(zSign != 0u && (zFrac0 | zFrac1) != 0u));
}
int64_t
@ -526,9 +591,9 @@ __roundAndPackInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)
int64_t absZ = mix(int64_t(packUint2x32(uvec2(zFrac1, zFrac0))),
-int64_t(packUint2x32(uvec2(zFrac1, zFrac0))),
(zSign != 0u));
int64_t nan = mix(default_PosNaN, default_NegNaN, bool(zSign));
return mix(absZ, nan, bool(zSign ^ uint(absZ < 0)) && bool(absZ));
zSign != 0u);
int64_t nan = mix(default_PosNaN, default_NegNaN, zSign != 0u);
return mix(absZ, nan, ((zSign != 0u) != (absZ < 0)) && bool(absZ));
}
/* Returns the number of leading 0 bits before the most-significant 1 bit of
@ -537,9 +602,7 @@ __roundAndPackInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)
int
__countLeadingZeros32(uint a)
{
int shiftCount;
shiftCount = mix(31 - findMSB(a), 32, a == 0u);
return shiftCount;
return 31 - findMSB(a);
}
/* Takes an abstract floating-point value having sign `zSign', exponent `zExp',
@ -583,6 +646,12 @@ __normalizeRoundAndPackFloat64(uint zSign,
uint64_t
__propagateFloat64NaN(uint64_t __a, uint64_t __b)
{
#if defined RELAXED_NAN_PROPAGATION
uvec2 a = unpackUint2x32(__a);
uvec2 b = unpackUint2x32(__b);
return packUint2x32(uvec2(a.x | b.x, a.y | b.y));
#else
bool aIsNaN = __is_nan(__a);
bool bIsNaN = __is_nan(__b);
uvec2 a = unpackUint2x32(__a);
@ -591,8 +660,20 @@ __propagateFloat64NaN(uint64_t __a, uint64_t __b)
b.y |= 0x00080000u;
return packUint2x32(mix(b, mix(a, b, bvec2(bIsNaN, bIsNaN)), bvec2(aIsNaN, aIsNaN)));
#endif
}
/* If a shader is in the soft-fp64 path, it almost certainly has register
* pressure problems. Choose a method to exchange two values that does not
* require a temporary.
*/
#define EXCHANGE(a, b) \
do { \
a ^= b; \
b ^= a; \
a ^= b; \
} while (false)
/* Returns the result of adding the double-precision floating-point values
* `a' and `b'. The operation is performed according to the IEEE Standard for
* Floating-Point Arithmetic.
@ -608,17 +689,16 @@ __fadd64(uint64_t a, uint64_t b)
uint bFracHi = __extractFloat64FracHi(b);
int aExp = __extractFloat64Exp(a);
int bExp = __extractFloat64Exp(b);
uint zFrac0 = 0u;
uint zFrac1 = 0u;
int expDiff = aExp - bExp;
if (aSign == bSign) {
uint zFrac2 = 0u;
uint zFrac0;
uint zFrac1;
uint zFrac2;
int zExp;
bool orig_exp_diff_is_zero = (expDiff == 0);
if (orig_exp_diff_is_zero) {
if (expDiff == 0) {
if (aExp == 0x7FF) {
bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u;
bool propagate = ((aFracHi | bFracHi) | (aFracLo| bFracLo)) != 0u;
return mix(a, __propagateFloat64NaN(a, b), propagate);
}
__add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
@ -629,29 +709,24 @@ __fadd64(uint64_t a, uint64_t b)
zExp = aExp;
__shift64ExtraRightJamming(
zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2);
} else if (0 < expDiff) {
if (aExp == 0x7FF) {
bool propagate = (aFracHi | aFracLo) != 0u;
return mix(a, __propagateFloat64NaN(a, b), propagate);
} else {
if (expDiff < 0) {
EXCHANGE(aFracHi, bFracHi);
EXCHANGE(aFracLo, bFracLo);
EXCHANGE(aExp, bExp);
}
expDiff = mix(expDiff, expDiff - 1, bExp == 0);
if (aExp == 0x7FF) {
bool propagate = (aFracHi | aFracLo) != 0u;
return mix(__packFloat64(aSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);
}
expDiff = mix(abs(expDiff), abs(expDiff) - 1, bExp == 0);
bFracHi = mix(bFracHi | 0x00100000u, bFracHi, bExp == 0);
__shift64ExtraRightJamming(
bFracHi, bFracLo, 0u, expDiff, bFracHi, bFracLo, zFrac2);
zExp = aExp;
} else if (expDiff < 0) {
if (bExp == 0x7FF) {
bool propagate = (bFracHi | bFracLo) != 0u;
return mix(__packFloat64(aSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);
}
expDiff = mix(expDiff, expDiff + 1, aExp == 0);
aFracHi = mix(aFracHi | 0x00100000u, aFracHi, aExp == 0);
__shift64ExtraRightJamming(
aFracHi, aFracLo, 0u, - expDiff, aFracHi, aFracLo, zFrac2);
zExp = bExp;
}
if (!orig_exp_diff_is_zero) {
aFracHi |= 0x00100000u;
__add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
--zExp;
@ -667,12 +742,23 @@ __fadd64(uint64_t a, uint64_t b)
__shortShift64Left(aFracHi, aFracLo, 10, aFracHi, aFracLo);
__shortShift64Left(bFracHi, bFracLo, 10, bFracHi, bFracLo);
if (0 < expDiff) {
if (expDiff != 0) {
uint zFrac0;
uint zFrac1;
if (expDiff < 0) {
EXCHANGE(aFracHi, bFracHi);
EXCHANGE(aFracLo, bFracLo);
EXCHANGE(aExp, bExp);
aSign ^= 0x80000000u;
}
if (aExp == 0x7FF) {
bool propagate = (aFracHi | aFracLo) != 0u;
return mix(a, __propagateFloat64NaN(a, b), propagate);
return mix(__packFloat64(aSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);
}
expDiff = mix(expDiff, expDiff - 1, bExp == 0);
expDiff = mix(abs(expDiff), abs(expDiff) - 1, bExp == 0);
bFracHi = mix(bFracHi | 0x40000000u, bFracHi, bExp == 0);
__shift64RightJamming(bFracHi, bFracLo, expDiff, bFracHi, bFracLo);
aFracHi |= 0x40000000u;
@ -681,79 +767,39 @@ __fadd64(uint64_t a, uint64_t b)
--zExp;
return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1);
}
if (expDiff < 0) {
if (bExp == 0x7FF) {
bool propagate = (bFracHi | bFracLo) != 0u;
return mix(__packFloat64(aSign ^ 1u, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);
}
expDiff = mix(expDiff, expDiff + 1, aExp == 0);
aFracHi = mix(aFracHi | 0x40000000u, aFracHi, aExp == 0);
__shift64RightJamming(aFracHi, aFracLo, - expDiff, aFracHi, aFracLo);
bFracHi |= 0x40000000u;
__sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);
zExp = bExp;
aSign ^= 1u;
--zExp;
return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1);
}
if (aExp == 0x7FF) {
bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u;
bool propagate = ((aFracHi | bFracHi) | (aFracLo | bFracLo)) != 0u;
return mix(0xFFFFFFFFFFFFFFFFUL, __propagateFloat64NaN(a, b), propagate);
}
bExp = mix(bExp, 1, aExp == 0);
aExp = mix(aExp, 1, aExp == 0);
bool zexp_normal = false;
bool blta = true;
uint zFrac0;
uint zFrac1;
uint sign_of_difference = 0;
if (bFracHi < aFracHi) {
__sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
zexp_normal = true;
}
else if (aFracHi < bFracHi) {
__sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);
blta = false;
zexp_normal = true;
sign_of_difference = 0x80000000;
}
else if (bFracLo < aFracLo) {
else if (bFracLo <= aFracLo) {
/* It is possible that zFrac0 and zFrac1 may be zero after this. */
__sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
zexp_normal = true;
}
else if (aFracLo < bFracLo) {
else {
__sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);
blta = false;
zexp_normal = true;
sign_of_difference = 0x80000000;
}
zExp = mix(bExp, aExp, blta);
aSign = mix(aSign ^ 1u, aSign, blta);
uint64_t retval_0 = __packFloat64(uint(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN), 0, 0u, 0u);
zExp = mix(bExp, aExp, sign_of_difference == 0u);
aSign ^= sign_of_difference;
uint64_t retval_0 = __packFloat64(uint(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) << 31, 0, 0u, 0u);
uint64_t retval_1 = __normalizeRoundAndPackFloat64(aSign, zExp - 11, zFrac0, zFrac1);
return mix(retval_0, retval_1, zexp_normal);
return mix(retval_0, retval_1, zFrac0 != 0u || zFrac1 != 0u);
}
}
/* Multiplies `a' by `b' to obtain a 64-bit product. The product is broken
* into two 32-bit pieces which are stored at the locations pointed to by
* `z0Ptr' and `z1Ptr'.
*/
void
__mul32To64(uint a, uint b, out uint z0Ptr, out uint z1Ptr)
{
uint aLow = a & 0x0000FFFFu;
uint aHigh = a>>16;
uint bLow = b & 0x0000FFFFu;
uint bHigh = b>>16;
uint z1 = aLow * bLow;
uint zMiddleA = aLow * bHigh;
uint zMiddleB = aHigh * bLow;
uint z0 = aHigh * bHigh;
zMiddleA += zMiddleB;
z0 += ((uint(zMiddleA < zMiddleB)) << 16) + (zMiddleA >> 16);
zMiddleA <<= 16;
z1 += zMiddleA;
z0 += uint(z1 < zMiddleA);
z1Ptr = z1;
z0Ptr = z0;
}
/* Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the
* 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit
* product. The product is broken into four 32-bit pieces which are stored at
@ -773,12 +819,12 @@ __mul64To128(uint a0, uint a1, uint b0, uint b1,
uint more1 = 0u;
uint more2 = 0u;
__mul32To64(a1, b1, z2, z3);
__mul32To64(a1, b0, z1, more2);
umulExtended(a1, b1, z2, z3);
umulExtended(a1, b0, z1, more2);
__add64(z1, more2, 0u, z2, z1, z2);
__mul32To64(a0, b0, z0, more1);
umulExtended(a0, b0, z0, more1);
__add64(z0, more1, 0u, z1, z0, z1);
__mul32To64(a0, b1, more1, more2);
umulExtended(a0, b1, more1, more2);
__add64(more1, more2, 0u, z2, more1, z2);
__add64(z0, z1, 0u, more1, z0, z1);
z3Ptr = z3;
@ -847,8 +893,13 @@ __fmul64(uint64_t a, uint64_t b)
return __packFloat64(zSign, 0x7FF, 0u, 0u);
}
if (bExp == 0x7FF) {
/* a cannot be NaN, but is b NaN? */
if ((bFracHi | bFracLo) != 0u)
#if defined RELAXED_NAN_PROPAGATION
return b;
#else
return __propagateFloat64NaN(a, b);
#endif
if ((uint(aExp) | aFracHi | aFracLo) == 0u)
return 0xFFFFFFFFFFFFFFFFUL;
return __packFloat64(zSign, 0x7FF, 0u, 0u);
@ -934,13 +985,13 @@ __fp64_to_uint(uint64_t a)
__shift64RightJamming(aFracHi, aFracLo, shiftDist, aFracHi, aFracLo);
if ((aFracHi & 0xFFFFF000u) != 0u)
return mix(~0u, 0u, (aSign != 0u));
return mix(~0u, 0u, aSign != 0u);
uint z = 0u;
uint zero = 0u;
__shift64Right(aFracHi, aFracLo, 12, zero, z);
uint expt = mix(~0u, 0u, (aSign != 0u));
uint expt = mix(~0u, 0u, aSign != 0u);
return mix(z, expt, (aSign != 0u) && (z != 0u));
}
@ -1047,7 +1098,7 @@ __fp32_to_uint64(float f)
uint a = floatBitsToUint(f);
uint aFrac = a & 0x007FFFFFu;
int aExp = int((a>>23) & 0xFFu);
uint aSign = a>>31;
uint aSign = a & 0x80000000u;
uint zFrac0 = 0u;
uint zFrac1 = 0u;
uint zFrac2 = 0u;
@ -1076,7 +1127,7 @@ __fp32_to_int64(float f)
uint a = floatBitsToUint(f);
uint aFrac = a & 0x007FFFFFu;
int aExp = int((a>>23) & 0xFFu);
uint aSign = a>>31;
uint aSign = a & 0x80000000u;
uint zFrac0 = 0u;
uint zFrac1 = 0u;
uint zFrac2 = 0u;
@ -1110,10 +1161,10 @@ __int64_to_fp64(int64_t a)
uint64_t absA = mix(uint64_t(a), uint64_t(-a), a < 0);
uint aFracHi = __extractFloat64FracHi(absA);
uvec2 aFrac = unpackUint2x32(absA);
uint zSign = uint(a < 0);
uint zSign = uint(unpackInt2x32(a).y) & 0x80000000u;
if ((aFracHi & 0x80000000u) != 0u) {
return mix(0ul, __packFloat64(1, 0x434, 0u, 0u), a < 0);
return mix(0ul, __packFloat64(0x80000000u, 0x434, 0u, 0u), a < 0);
}
return __normalizeRoundAndPackFloat64(zSign, 0x432, aFrac.y, aFrac.x);
@ -1143,7 +1194,7 @@ __fp64_to_int(uint64_t a)
if (0x41E < aExp) {
if ((aExp == 0x7FF) && bool(aFracHi | aFracLo))
aSign = 0u;
return mix(0x7FFFFFFF, 0x80000000, bool(aSign));
return mix(0x7FFFFFFF, 0x80000000, aSign != 0u);
}
__shortShift64Left(aFracHi | 0x00100000u, aFracLo, shiftCount, absZ, aFracExtra);
} else {
@ -1155,9 +1206,9 @@ __fp64_to_int(uint64_t a)
absZ = aFracHi >> (- shiftCount);
}
int z = mix(int(absZ), -int(absZ), (aSign != 0u));
int nan = mix(0x7FFFFFFF, 0x80000000, bool(aSign));
return mix(z, nan, bool(aSign ^ uint(z < 0)) && bool(z));
int z = mix(int(absZ), -int(absZ), aSign != 0u);
int nan = mix(0x7FFFFFFF, 0x80000000, aSign != 0u);
return mix(z, nan, ((aSign != 0u) != (z < 0)) && bool(z));
}
/* Returns the result of converting the 32-bit two's complement integer `a'
@ -1171,7 +1222,7 @@ __int_to_fp64(int a)
uint zFrac1 = 0u;
if (a==0)
return __packFloat64(0u, 0, 0u, 0u);
uint zSign = uint(a < 0);
uint zSign = uint(a) & 0x80000000u;
uint absA = mix(uint(a), uint(-a), a < 0);
int shiftCount = __countLeadingZeros32(absA) - 11;
if (0 <= shiftCount) {
@ -1192,7 +1243,7 @@ __fp64_to_bool(uint64_t a)
uint64_t
__bool_to_fp64(bool a)
{
return __int_to_fp64(int(a));
return packUint2x32(uvec2(0x00000000u, uint(-int(a) & 0x3ff00000)));
}
/* Packs the sign `zSign', exponent `zExp', and significand `zFrac' into a
@ -1207,7 +1258,7 @@ __bool_to_fp64(bool a)
float
__packFloat32(uint zSign, int zExp, uint zFrac)
{
return uintBitsToFloat((zSign<<31) + (uint(zExp)<<23) + zFrac);
return uintBitsToFloat(zSign + (uint(zExp)<<23) + zFrac);
}
/* Takes an abstract floating-point value having sign `zSign', exponent `zExp',
@ -1287,7 +1338,7 @@ __fp64_to_fp32(uint64_t __a)
uint aSign = __extractFloat64Sign(__a);
if (aExp == 0x7FF) {
__shortShift64Left(a.y, a.x, 12, a.y, a.x);
float rval = uintBitsToFloat((aSign<<31) | 0x7FC00000u | (a.y>>9));
float rval = uintBitsToFloat(aSign | 0x7FC00000u | (a.y>>9));
rval = mix(__packFloat32(aSign, 0xFF, 0u), rval, (aFracHi | aFracLo) != 0u);
return rval;
}
@ -1315,7 +1366,7 @@ __uint64_to_fp32(uint64_t __a)
float
__int64_to_fp32(int64_t __a)
{
uint aSign = uint(__a < 0);
uint aSign = uint(unpackInt2x32(__a).y) & 0x80000000u;
uint64_t absA = mix(uint64_t(__a), uint64_t(-__a), __a < 0);
uvec2 aFrac = unpackUint2x32(absA);
int shiftCount = mix(__countLeadingZeros32(aFrac.y) - 33,
@ -1339,7 +1390,7 @@ __fp32_to_fp64(float f)
uint a = floatBitsToUint(f);
uint aFrac = a & 0x007FFFFFu;
int aExp = int((a>>23) & 0xFFu);
uint aSign = a>>31;
uint aSign = a & 0x80000000u;
uint zFrac0 = 0u;
uint zFrac1 = 0u;
@ -1348,7 +1399,7 @@ __fp32_to_fp64(float f)
uint nanLo = 0u;
uint nanHi = a<<9;
__shift64Right(nanHi, nanLo, 12, nanHi, nanLo);
nanHi |= ((aSign<<31) | 0x7FF80000u);
nanHi |= aSign | 0x7FF80000u;
return packUint2x32(uvec2(nanLo, nanHi));
}
return __packFloat64(aSign, 0x7FF, 0u, 0u);
@ -1442,7 +1493,7 @@ __estimateDiv64To32(uint a0, uint a1, uint b)
return 0xFFFFFFFFu;
b0 = b>>16;
z = (b0<<16 <= a0) ? 0xFFFF0000u : (a0 / b0)<<16;
__mul32To64(b, z, term0, term1);
umulExtended(b, z, term0, term1);
__sub64(a0, a1, term0, term1, rem0, rem1);
while (int(rem0) < 0) {
z -= 0x10000u;
@ -1612,7 +1663,7 @@ __fsqrt64(uint64_t a)
zFrac0 = 0x7FFFFFFFu;
doubleZFrac0 = zFrac0 + zFrac0;
__shortShift64Left(aFracHi, aFracLo, 9 - (aExp & 1), aFracHi, aFracLo);
__mul32To64(zFrac0, zFrac0, term0, term1);
umulExtended(zFrac0, zFrac0, term0, term1);
__sub64(aFracHi, aFracLo, term0, term1, rem0, rem1);
while (int(rem0) < 0) {
--zFrac0;
@ -1623,9 +1674,9 @@ __fsqrt64(uint64_t a)
if ((zFrac1 & 0x1FFu) <= 5u) {
if (zFrac1 == 0u)
zFrac1 = 1u;
__mul32To64(doubleZFrac0, zFrac1, term1, term2);
umulExtended(doubleZFrac0, zFrac1, term1, term2);
__sub64(rem1, 0u, term1, term2, rem1, rem2);
__mul32To64(zFrac1, zFrac1, term2, term3);
umulExtended(zFrac1, zFrac1, term2, term3);
__sub96(rem1, rem2, 0u, 0u, term2, term3, rem1, rem2, rem3);
while (int(rem1) < 0) {
--zFrac1;
@ -1665,7 +1716,19 @@ __ftrunc64(uint64_t __a)
uint64_t
__ffloor64(uint64_t a)
{
bool is_positive = __fge64(a, 0ul);
/* The big assumtion is that when 'a' is NaN, __ftrunc(a) returns a. Based
* on that assumption, NaN values that don't have the sign bit will safely
* return NaN (identity). This is guarded by RELAXED_NAN_PROPAGATION
* because otherwise the NaN should have the "signal" bit set. The
* __fadd64 will ensure that occurs.
*/
bool is_positive =
#if defined RELAXED_NAN_PROPAGATION
int(unpackUint2x32(a).y) >= 0
#else
__fge64(a, 0ul)
#endif
;
uint64_t tr = __ftrunc64(a);
if (is_positive || __feq64(tr, a)) {
@ -1723,21 +1786,29 @@ __fround64(uint64_t __a)
uint64_t
__fmin64(uint64_t a, uint64_t b)
{
if (__is_nan(a)) return b;
if (__is_nan(b)) return a;
/* This weird layout matters. Doing the "obvious" thing results in extra
* flow control being inserted to implement the short-circuit evaluation
* rules. Flow control is bad!
*/
bool b_nan = __is_nan(b);
bool a_lt_b = __flt64_nonnan(a, b);
bool a_nan = __is_nan(a);
if (__flt64_nonnan(a, b)) return a;
return b;
return (b_nan || a_lt_b) && !a_nan ? a : b;
}
uint64_t
__fmax64(uint64_t a, uint64_t b)
{
if (__is_nan(a)) return b;
if (__is_nan(b)) return a;
/* This weird layout matters. Doing the "obvious" thing results in extra
* flow control being inserted to implement the short-circuit evaluation
* rules. Flow control is bad!
*/
bool b_nan = __is_nan(b);
bool a_lt_b = __flt64_nonnan(a, b);
bool a_nan = __is_nan(a);
if (__flt64_nonnan(a, b)) return b;
return a;
return (b_nan || a_lt_b) && !a_nan ? b : a;
}
uint64_t

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,4 +1,4 @@
/* A Bison parser, made by GNU Bison 3.4.1. */
/* A Bison parser, made by GNU Bison 3.5. */
/* Bison interface for Yacc-like parsers in C

Просмотреть файл

@ -173,8 +173,8 @@ add_builtin_define(glcpp_parser_t *parser, const char *name, int value);
%}
%pure-parser
%error-verbose
%define api.pure
%define parse.error verbose
%locations
%initial-action {
@ -1187,6 +1187,9 @@ _token_list_equal_ignoring_space(token_list_t *a, token_list_t *b)
node_b = node_b->next;
}
if (node_a == NULL && node_b == NULL)
break;
if (node_b == NULL && node_a->token->type == SPACE) {
while (node_a && node_a->token->type == SPACE)
node_a = node_a->next;

Просмотреть файл

@ -18,33 +18,21 @@
#include "program/program.h"
static void
init_gl_program(struct gl_program *prog, bool is_arb_asm, GLenum target)
init_gl_program(struct gl_program *prog, bool is_arb_asm, gl_shader_stage stage)
{
prog->RefCount = 1;
prog->Format = GL_PROGRAM_FORMAT_ASCII_ARB;
prog->is_arb_asm = is_arb_asm;
prog->info.stage = (gl_shader_stage)_mesa_program_enum_to_shader_stage(target);
prog->info.stage = stage;
}
static struct gl_program *
new_program(UNUSED struct gl_context *ctx, GLenum target,
new_program(UNUSED struct gl_context *ctx, gl_shader_stage stage,
UNUSED GLuint id, bool is_arb_asm)
{
switch (target) {
case GL_VERTEX_PROGRAM_ARB: /* == GL_VERTEX_PROGRAM_NV */
case GL_GEOMETRY_PROGRAM_NV:
case GL_TESS_CONTROL_PROGRAM_NV:
case GL_TESS_EVALUATION_PROGRAM_NV:
case GL_FRAGMENT_PROGRAM_ARB:
case GL_COMPUTE_PROGRAM_NV: {
struct gl_program *prog = rzalloc(NULL, struct gl_program);
init_gl_program(prog, is_arb_asm, target);
return prog;
}
default:
printf("bad target in new_program\n");
return NULL;
}
struct gl_program *prog = rzalloc(NULL, struct gl_program);
init_gl_program(prog, is_arb_asm, stage);
return prog;
}
static void
@ -84,7 +72,7 @@ initialize_mesa_context(struct gl_context *ctx, glslopt_target api)
ctx->Extensions.EXT_shader_framebuffer_fetch = true;
break;
case kGlslTargetOpenGLES30:
ctx->Extensions.ARB_ES3_compatibility = true;
ctx->Extensions.ARB_ES3_1_compatibility = true;
ctx->Extensions.EXT_shader_framebuffer_fetch = true;
break;
case kGlslTargetMetal:
@ -677,7 +665,10 @@ glslopt_shader* glslopt_optimize (glslopt_ctx* ctx, glslopt_shader_type type, co
validate_ir_tree(ir);
shader->rawOutput = _mesa_print_ir_glsl(ir, state, ralloc_strdup(shader, ""), printMode);
}
// Lower builtin functions prior to linking.
lower_builtins(ir);
// Link built-in functions
shader->shader->symbols = state->symbols;

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,4 +1,4 @@
/* A Bison parser, made by GNU Bison 3.4.1. */
/* A Bison parser, made by GNU Bison 3.5. */
/* Bison interface for Yacc-like parsers in C
@ -192,7 +192,7 @@ extern int _mesa_glsl_debug;
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
union YYSTYPE
{
#line 100 "src/compiler/glsl/glsl_parser.yy"
#line 101 "src/compiler/glsl/glsl_parser.yy"
int n;
int64_t n64;

Просмотреть файл

@ -34,6 +34,7 @@
#include "compiler/glsl_types.h"
#include "main/context.h"
#include "util/u_string.h"
#include "util/format/u_format.h"
#ifdef _MSC_VER
#pragma warning( disable : 4065 ) // switch statement contains 'default' but no 'case' labels
@ -81,8 +82,8 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
%expect 0
%pure-parser
%error-verbose
%define api.pure
%define parse.error verbose
%locations
%initial-action {
@ -1336,7 +1337,7 @@ layout_qualifier_id:
if (!$$.flags.i) {
static const struct {
const char *name;
GLenum format;
enum pipe_format format;
glsl_base_type base_type;
/** Minimum desktop GLSL version required for the image
* format. Use 130 if already present in the original
@ -1349,54 +1350,54 @@ layout_qualifier_id:
bool nv_image_formats;
bool ext_qualifiers;
} map[] = {
{ "rgba32f", GL_RGBA32F, GLSL_TYPE_FLOAT, 130, 310, false, false },
{ "rgba16f", GL_RGBA16F, GLSL_TYPE_FLOAT, 130, 310, false, false },
{ "rg32f", GL_RG32F, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "rg16f", GL_RG16F, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "r11f_g11f_b10f", GL_R11F_G11F_B10F, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "r32f", GL_R32F, GLSL_TYPE_FLOAT, 130, 310, false, false },
{ "r16f", GL_R16F, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "rgba32ui", GL_RGBA32UI, GLSL_TYPE_UINT, 130, 310, false, false },
{ "rgba16ui", GL_RGBA16UI, GLSL_TYPE_UINT, 130, 310, false, false },
{ "rgb10_a2ui", GL_RGB10_A2UI, GLSL_TYPE_UINT, 130, 0, true, false },
{ "rgba8ui", GL_RGBA8UI, GLSL_TYPE_UINT, 130, 310, false, false },
{ "rg32ui", GL_RG32UI, GLSL_TYPE_UINT, 130, 0, true, false },
{ "rg16ui", GL_RG16UI, GLSL_TYPE_UINT, 130, 0, true, false },
{ "rg8ui", GL_RG8UI, GLSL_TYPE_UINT, 130, 0, true, false },
{ "r32ui", GL_R32UI, GLSL_TYPE_UINT, 130, 310, false, false },
{ "r16ui", GL_R16UI, GLSL_TYPE_UINT, 130, 0, true, false },
{ "r8ui", GL_R8UI, GLSL_TYPE_UINT, 130, 0, true, false },
{ "rgba32i", GL_RGBA32I, GLSL_TYPE_INT, 130, 310, false, false },
{ "rgba16i", GL_RGBA16I, GLSL_TYPE_INT, 130, 310, false, false },
{ "rgba8i", GL_RGBA8I, GLSL_TYPE_INT, 130, 310, false, false },
{ "rg32i", GL_RG32I, GLSL_TYPE_INT, 130, 0, true, false },
{ "rg16i", GL_RG16I, GLSL_TYPE_INT, 130, 0, true, false },
{ "rg8i", GL_RG8I, GLSL_TYPE_INT, 130, 0, true, false },
{ "r32i", GL_R32I, GLSL_TYPE_INT, 130, 310, false, false },
{ "r16i", GL_R16I, GLSL_TYPE_INT, 130, 0, true, false },
{ "r8i", GL_R8I, GLSL_TYPE_INT, 130, 0, true, false },
{ "rgba16", GL_RGBA16, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "rgb10_a2", GL_RGB10_A2, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "rgba8", GL_RGBA8, GLSL_TYPE_FLOAT, 130, 310, false, false },
{ "rg16", GL_RG16, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "rg8", GL_RG8, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "r16", GL_R16, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "r8", GL_R8, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "rgba16_snorm", GL_RGBA16_SNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "rgba8_snorm", GL_RGBA8_SNORM, GLSL_TYPE_FLOAT, 130, 310, false, false },
{ "rg16_snorm", GL_RG16_SNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "rg8_snorm", GL_RG8_SNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "r16_snorm", GL_R16_SNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "r8_snorm", GL_R8_SNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "rgba32f", PIPE_FORMAT_R32G32B32A32_FLOAT, GLSL_TYPE_FLOAT, 130, 310, false, false },
{ "rgba16f", PIPE_FORMAT_R16G16B16A16_FLOAT, GLSL_TYPE_FLOAT, 130, 310, false, false },
{ "rg32f", PIPE_FORMAT_R32G32_FLOAT, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "rg16f", PIPE_FORMAT_R16G16_FLOAT, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "r11f_g11f_b10f", PIPE_FORMAT_R11G11B10_FLOAT, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "r32f", PIPE_FORMAT_R32_FLOAT, GLSL_TYPE_FLOAT, 130, 310, false, false },
{ "r16f", PIPE_FORMAT_R16_FLOAT, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "rgba32ui", PIPE_FORMAT_R32G32B32A32_UINT, GLSL_TYPE_UINT, 130, 310, false, false },
{ "rgba16ui", PIPE_FORMAT_R16G16B16A16_UINT, GLSL_TYPE_UINT, 130, 310, false, false },
{ "rgb10_a2ui", PIPE_FORMAT_R10G10B10A2_UINT, GLSL_TYPE_UINT, 130, 0, true, false },
{ "rgba8ui", PIPE_FORMAT_R8G8B8A8_UINT, GLSL_TYPE_UINT, 130, 310, false, false },
{ "rg32ui", PIPE_FORMAT_R32G32_UINT, GLSL_TYPE_UINT, 130, 0, true, false },
{ "rg16ui", PIPE_FORMAT_R16G16_UINT, GLSL_TYPE_UINT, 130, 0, true, false },
{ "rg8ui", PIPE_FORMAT_R8G8_UINT, GLSL_TYPE_UINT, 130, 0, true, false },
{ "r32ui", PIPE_FORMAT_R32_UINT, GLSL_TYPE_UINT, 130, 310, false, false },
{ "r16ui", PIPE_FORMAT_R16_UINT, GLSL_TYPE_UINT, 130, 0, true, false },
{ "r8ui", PIPE_FORMAT_R8_UINT, GLSL_TYPE_UINT, 130, 0, true, false },
{ "rgba32i", PIPE_FORMAT_R32G32B32A32_SINT, GLSL_TYPE_INT, 130, 310, false, false },
{ "rgba16i", PIPE_FORMAT_R16G16B16A16_SINT, GLSL_TYPE_INT, 130, 310, false, false },
{ "rgba8i", PIPE_FORMAT_R8G8B8A8_SINT, GLSL_TYPE_INT, 130, 310, false, false },
{ "rg32i", PIPE_FORMAT_R32G32_SINT, GLSL_TYPE_INT, 130, 0, true, false },
{ "rg16i", PIPE_FORMAT_R16G16_SINT, GLSL_TYPE_INT, 130, 0, true, false },
{ "rg8i", PIPE_FORMAT_R8G8_SINT, GLSL_TYPE_INT, 130, 0, true, false },
{ "r32i", PIPE_FORMAT_R32_SINT, GLSL_TYPE_INT, 130, 310, false, false },
{ "r16i", PIPE_FORMAT_R16_SINT, GLSL_TYPE_INT, 130, 0, true, false },
{ "r8i", PIPE_FORMAT_R8_SINT, GLSL_TYPE_INT, 130, 0, true, false },
{ "rgba16", PIPE_FORMAT_R16G16B16A16_UNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "rgb10_a2", PIPE_FORMAT_R10G10B10A2_UNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "rgba8", PIPE_FORMAT_R8G8B8A8_UNORM, GLSL_TYPE_FLOAT, 130, 310, false, false },
{ "rg16", PIPE_FORMAT_R16G16_UNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "rg8", PIPE_FORMAT_R8G8_UNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "r16", PIPE_FORMAT_R16_UNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "r8", PIPE_FORMAT_R8_UNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "rgba16_snorm", PIPE_FORMAT_R16G16B16A16_SNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "rgba8_snorm", PIPE_FORMAT_R8G8B8A8_SNORM, GLSL_TYPE_FLOAT, 130, 310, false, false },
{ "rg16_snorm", PIPE_FORMAT_R16G16_SNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "rg8_snorm", PIPE_FORMAT_R8G8_SNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "r16_snorm", PIPE_FORMAT_R16_SNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
{ "r8_snorm", PIPE_FORMAT_R8_SNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
/* From GL_EXT_shader_image_load_store: */
/* base_type is incorrect but it'll be patched later when we know
* the variable type. See ast_to_hir.cpp */
{ "size1x8", GL_R8I, GLSL_TYPE_VOID, 130, 0, false, true },
{ "size1x16", GL_R16I, GLSL_TYPE_VOID, 130, 0, false, true },
{ "size1x32", GL_R32I, GLSL_TYPE_VOID, 130, 0, false, true },
{ "size2x32", GL_RG32I, GLSL_TYPE_VOID, 130, 0, false, true },
{ "size4x32", GL_RGBA32I, GLSL_TYPE_VOID, 130, 0, false, true },
{ "size1x8", PIPE_FORMAT_R8_SINT, GLSL_TYPE_VOID, 130, 0, false, true },
{ "size1x16", PIPE_FORMAT_R16_SINT, GLSL_TYPE_VOID, 130, 0, false, true },
{ "size1x32", PIPE_FORMAT_R32_SINT, GLSL_TYPE_VOID, 130, 0, false, true },
{ "size2x32", PIPE_FORMAT_R32G32_SINT, GLSL_TYPE_VOID, 130, 0, false, true },
{ "size4x32", PIPE_FORMAT_R32G32B32A32_SINT, GLSL_TYPE_VOID, 130, 0, false, true },
};
for (unsigned i = 0; i < ARRAY_SIZE(map); i++) {
@ -1708,6 +1709,25 @@ layout_qualifier_id:
}
}
/* Layout qualifier for NV_viewport_array2. */
if (!$$.flags.i && state->stage != MESA_SHADER_FRAGMENT) {
if (match_layout_qualifier($1, "viewport_relative", state) == 0) {
$$.flags.q.viewport_relative = 1;
}
if ($$.flags.i && !state->NV_viewport_array2_enable) {
_mesa_glsl_error(& @1, state,
"qualifier `%s' requires "
"GL_NV_viewport_array2", $1);
}
if ($$.flags.i && state->NV_viewport_array2_warn) {
_mesa_glsl_warning(& @1, state,
"GL_NV_viewport_array2 layout "
"identifier `%s' used", $1);
}
}
if (!$$.flags.i) {
_mesa_glsl_error(& @1, state, "unrecognized layout identifier "
"`%s'", $1);

Просмотреть файл

@ -723,6 +723,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
EXT(EXT_demote_to_helper_invocation),
EXT(EXT_frag_depth),
EXT(EXT_draw_buffers),
EXT(EXT_draw_instanced),
EXT(EXT_clip_cull_distance),
EXT(EXT_geometry_point_size),
EXT_AEP(EXT_geometry_shader),
@ -753,6 +754,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
EXT(NV_fragment_shader_interlock),
EXT(NV_image_formats),
EXT(NV_shader_atomic_float),
EXT(NV_viewport_array2),
};
#undef EXT
@ -1931,6 +1933,8 @@ set_shader_inout_layout(struct gl_shader *shader,
shader->bindless_image = state->bindless_image_specified;
shader->bound_sampler = state->bound_sampler_specified;
shader->bound_image = state->bound_image_specified;
shader->redeclares_gl_layer = state->redeclares_gl_layer;
shader->layer_viewport_relative = state->layer_viewport_relative;
}
/* src can be NULL if only the symbols found in the exec_list should be
@ -2234,7 +2238,13 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
shader->Version = state->language_version;
shader->IsES = state->es_shader;
struct gl_shader_compiler_options *options =
&ctx->Const.ShaderCompilerOptions[shader->Stage];
if (!state->error && !shader->ir->is_empty()) {
if (options->LowerPrecision)
lower_precision(shader->ir);
lower_builtins(shader->ir);
assign_subroutine_indexes(state);
lower_subroutine(shader->ir, state);
opt_shader_and_create_symbol_table(ctx, state->symbols, shader);
@ -2347,7 +2357,20 @@ do_common_optimization(exec_list *ir, bool linked,
OPT(lower_vector_insert, ir, false);
OPT(optimize_swizzles, ir);
OPT(optimize_split_arrays, ir, linked);
/* Some drivers only call do_common_optimization() once rather than in a
* loop, and split arrays causes each element of a constant array to
* dereference is own copy of the entire array initilizer. This IR is not
* something that can be generated manually in a shader and is not
* accounted for by NIR optimisations, the result is an exponential slow
* down in compilation speed as a constant arrays element count grows. To
* avoid that here we make sure to always clean up the mess split arrays
* causes to constant arrays.
*/
bool array_split = optimize_split_arrays(ir, linked);
if (array_split)
do_constant_propagation(ir);
progress |= array_split;
OPT(optimize_redundant_jumps, ir);
if (options->MaxUnrollIterations) {

Просмотреть файл

@ -823,6 +823,8 @@ struct _mesa_glsl_parse_state {
bool EXT_demote_to_helper_invocation_warn;
bool EXT_draw_buffers_enable;
bool EXT_draw_buffers_warn;
bool EXT_draw_instanced_enable;
bool EXT_draw_instanced_warn;
bool EXT_frag_depth_enable;
bool EXT_frag_depth_warn;
bool EXT_geometry_point_size_enable;
@ -883,6 +885,8 @@ struct _mesa_glsl_parse_state {
bool NV_image_formats_warn;
bool NV_shader_atomic_float_enable;
bool NV_shader_atomic_float_warn;
bool NV_viewport_array2_enable;
bool NV_viewport_array2_warn;
/*@}*/
/** Extensions supported by the OpenGL implementation. */
@ -925,6 +929,10 @@ struct _mesa_glsl_parse_state {
/** Atomic counter offsets by binding */
unsigned atomic_counter_offsets[MAX_COMBINED_ATOMIC_BUFFERS];
/** Whether gl_Layer output is viewport-relative. */
bool redeclares_gl_layer;
bool layer_viewport_relative;
bool allow_extension_directive_midshader;
bool allow_builtin_variable_redeclaration;
bool allow_layout_qualifier_on_function_parameter;

Просмотреть файл

@ -22,6 +22,7 @@
*/
#include <string.h>
#include "ir.h"
#include "util/half_float.h"
#include "compiler/glsl_types.h"
#include "glsl_parser_extras.h"
@ -282,6 +283,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
case ir_unop_i2f:
case ir_unop_u2f:
case ir_unop_d2f:
case ir_unop_f162f:
case ir_unop_bitcast_i2f:
case ir_unop_bitcast_u2f:
case ir_unop_i642f:
@ -290,9 +292,17 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
op0->type->vector_elements, 1);
break;
case ir_unop_f2f16:
case ir_unop_f2fmp:
case ir_unop_b2f16:
this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT16,
op0->type->vector_elements, 1);
break;
case ir_unop_f2b:
case ir_unop_i2b:
case ir_unop_d2b:
case ir_unop_f162b:
case ir_unop_i642b:
this->type = glsl_type::get_instance(GLSL_TYPE_BOOL,
op0->type->vector_elements, 1);
@ -335,9 +345,6 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
this->type = glsl_type::get_instance(GLSL_TYPE_UINT64,
op0->type->vector_elements, 1);
break;
case ir_unop_noise:
this->type = glsl_type::float_type;
break;
case ir_unop_unpack_double_2x32:
case ir_unop_unpack_uint_2x32:
@ -682,6 +689,19 @@ ir_constant::ir_constant(const struct glsl_type *type,
memcpy(& this->value, data, sizeof(this->value));
}
ir_constant::ir_constant(float16_t f16, unsigned vector_elements)
: ir_rvalue(ir_type_constant)
{
assert(vector_elements <= 4);
this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT16, vector_elements, 1);
for (unsigned i = 0; i < vector_elements; i++) {
this->value.f16[i] = f16.bits;
}
for (unsigned i = vector_elements; i < 16; i++) {
this->value.f[i] = 0;
}
}
ir_constant::ir_constant(float f, unsigned vector_elements)
: ir_rvalue(ir_type_constant)
{
@ -783,6 +803,7 @@ ir_constant::ir_constant(const ir_constant *c, unsigned i)
case GLSL_TYPE_UINT: this->value.u[0] = c->value.u[i]; break;
case GLSL_TYPE_INT: this->value.i[0] = c->value.i[i]; break;
case GLSL_TYPE_FLOAT: this->value.f[0] = c->value.f[i]; break;
case GLSL_TYPE_FLOAT16: this->value.f16[0] = c->value.f16[i]; break;
case GLSL_TYPE_BOOL: this->value.b[0] = c->value.b[i]; break;
case GLSL_TYPE_DOUBLE: this->value.d[0] = c->value.d[i]; break;
default: assert(!"Should not get here."); break;
@ -828,14 +849,23 @@ ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list)
if (value->type->is_scalar() && value->next->is_tail_sentinel()) {
if (type->is_matrix()) {
/* Matrix - fill diagonal (rest is already set to 0) */
assert(type->is_float() || type->is_double());
for (unsigned i = 0; i < type->matrix_columns; i++) {
if (type->is_float())
switch (type->base_type) {
case GLSL_TYPE_FLOAT:
this->value.f[i * type->vector_elements + i] =
value->value.f[0];
else
break;
case GLSL_TYPE_DOUBLE:
this->value.d[i * type->vector_elements + i] =
value->value.d[0];
break;
case GLSL_TYPE_FLOAT16:
this->value.f16[i * type->vector_elements + i] =
value->value.f16[0];
break;
default:
assert(!"unexpected matrix base type");
}
}
} else {
/* Vector or scalar - fill all components */
@ -849,6 +879,10 @@ ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list)
for (unsigned i = 0; i < type->components(); i++)
this->value.f[i] = value->value.f[0];
break;
case GLSL_TYPE_FLOAT16:
for (unsigned i = 0; i < type->components(); i++)
this->value.f16[i] = value->value.f16[0];
break;
case GLSL_TYPE_DOUBLE:
for (unsigned i = 0; i < type->components(); i++)
this->value.d[i] = value->value.d[0];
@ -918,6 +952,9 @@ ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list)
case GLSL_TYPE_FLOAT:
this->value.f[i] = value->get_float_component(j);
break;
case GLSL_TYPE_FLOAT16:
this->value.f16[i] = value->get_float16_component(j);
break;
case GLSL_TYPE_BOOL:
this->value.b[i] = value->get_bool_component(j);
break;
@ -983,6 +1020,7 @@ ir_constant::get_bool_component(unsigned i) const
case GLSL_TYPE_UINT: return this->value.u[i] != 0;
case GLSL_TYPE_INT: return this->value.i[i] != 0;
case GLSL_TYPE_FLOAT: return ((int)this->value.f[i]) != 0;
case GLSL_TYPE_FLOAT16: return ((int)_mesa_half_to_float(this->value.f16[i])) != 0;
case GLSL_TYPE_BOOL: return this->value.b[i];
case GLSL_TYPE_DOUBLE: return this->value.d[i] != 0.0;
case GLSL_TYPE_SAMPLER:
@ -1005,6 +1043,7 @@ ir_constant::get_float_component(unsigned i) const
case GLSL_TYPE_UINT: return (float) this->value.u[i];
case GLSL_TYPE_INT: return (float) this->value.i[i];
case GLSL_TYPE_FLOAT: return this->value.f[i];
case GLSL_TYPE_FLOAT16: return _mesa_half_to_float(this->value.f16[i]);
case GLSL_TYPE_BOOL: return this->value.b[i] ? 1.0f : 0.0f;
case GLSL_TYPE_DOUBLE: return (float) this->value.d[i];
case GLSL_TYPE_SAMPLER:
@ -1020,6 +1059,15 @@ ir_constant::get_float_component(unsigned i) const
return 0.0;
}
uint16_t
ir_constant::get_float16_component(unsigned i) const
{
if (this->type->base_type == GLSL_TYPE_FLOAT16)
return this->value.f16[i];
else
return _mesa_float_to_half(get_float_component(i));
}
double
ir_constant::get_double_component(unsigned i) const
{
@ -1027,6 +1075,7 @@ ir_constant::get_double_component(unsigned i) const
case GLSL_TYPE_UINT: return (double) this->value.u[i];
case GLSL_TYPE_INT: return (double) this->value.i[i];
case GLSL_TYPE_FLOAT: return (double) this->value.f[i];
case GLSL_TYPE_FLOAT16: return (double) _mesa_half_to_float(this->value.f16[i]);
case GLSL_TYPE_BOOL: return this->value.b[i] ? 1.0 : 0.0;
case GLSL_TYPE_DOUBLE: return this->value.d[i];
case GLSL_TYPE_SAMPLER:
@ -1049,6 +1098,7 @@ ir_constant::get_int_component(unsigned i) const
case GLSL_TYPE_UINT: return this->value.u[i];
case GLSL_TYPE_INT: return this->value.i[i];
case GLSL_TYPE_FLOAT: return (int) this->value.f[i];
case GLSL_TYPE_FLOAT16: return (int) _mesa_half_to_float(this->value.f16[i]);
case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0;
case GLSL_TYPE_DOUBLE: return (int) this->value.d[i];
case GLSL_TYPE_SAMPLER:
@ -1071,6 +1121,7 @@ ir_constant::get_uint_component(unsigned i) const
case GLSL_TYPE_UINT: return this->value.u[i];
case GLSL_TYPE_INT: return this->value.i[i];
case GLSL_TYPE_FLOAT: return (unsigned) this->value.f[i];
case GLSL_TYPE_FLOAT16: return (unsigned) _mesa_half_to_float(this->value.f16[i]);
case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0;
case GLSL_TYPE_DOUBLE: return (unsigned) this->value.d[i];
case GLSL_TYPE_SAMPLER:
@ -1093,6 +1144,7 @@ ir_constant::get_int64_component(unsigned i) const
case GLSL_TYPE_UINT: return this->value.u[i];
case GLSL_TYPE_INT: return this->value.i[i];
case GLSL_TYPE_FLOAT: return (int64_t) this->value.f[i];
case GLSL_TYPE_FLOAT16: return (int64_t) _mesa_half_to_float(this->value.f16[i]);
case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0;
case GLSL_TYPE_DOUBLE: return (int64_t) this->value.d[i];
case GLSL_TYPE_SAMPLER:
@ -1115,6 +1167,7 @@ ir_constant::get_uint64_component(unsigned i) const
case GLSL_TYPE_UINT: return this->value.u[i];
case GLSL_TYPE_INT: return this->value.i[i];
case GLSL_TYPE_FLOAT: return (uint64_t) this->value.f[i];
case GLSL_TYPE_FLOAT16: return (uint64_t) _mesa_half_to_float(this->value.f16[i]);
case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0;
case GLSL_TYPE_DOUBLE: return (uint64_t) this->value.d[i];
case GLSL_TYPE_SAMPLER:
@ -1169,6 +1222,7 @@ ir_constant::copy_offset(ir_constant *src, int offset)
case GLSL_TYPE_UINT:
case GLSL_TYPE_INT:
case GLSL_TYPE_FLOAT:
case GLSL_TYPE_FLOAT16:
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
@ -1188,6 +1242,9 @@ ir_constant::copy_offset(ir_constant *src, int offset)
case GLSL_TYPE_FLOAT:
value.f[i+offset] = src->get_float_component(i);
break;
case GLSL_TYPE_FLOAT16:
value.f16[i+offset] = src->get_float16_component(i);
break;
case GLSL_TYPE_BOOL:
value.b[i+offset] = src->get_bool_component(i);
break;
@ -1247,6 +1304,9 @@ ir_constant::copy_masked_offset(ir_constant *src, int offset, unsigned int mask)
case GLSL_TYPE_FLOAT:
value.f[i+offset] = src->get_float_component(id++);
break;
case GLSL_TYPE_FLOAT16:
value.f16[i+offset] = src->get_float16_component(id++);
break;
case GLSL_TYPE_BOOL:
value.b[i+offset] = src->get_bool_component(id++);
break;
@ -1297,6 +1357,12 @@ ir_constant::has_value(const ir_constant *c) const
if (this->value.f[i] != c->value.f[i])
return false;
break;
case GLSL_TYPE_FLOAT16:
/* Convert to float to make sure NaN and ±0.0 compares correctly */
if (_mesa_half_to_float(this->value.f16[i]) !=
_mesa_half_to_float(c->value.f16[i]))
return false;
break;
case GLSL_TYPE_BOOL:
if (this->value.b[i] != c->value.b[i])
return false;
@ -1340,6 +1406,10 @@ ir_constant::is_value(float f, int i) const
if (this->value.f[c] != f)
return false;
break;
case GLSL_TYPE_FLOAT16:
if (_mesa_half_to_float(this->value.f16[c]) != f)
return false;
break;
case GLSL_TYPE_INT:
if (this->value.i[c] != i)
return false;
@ -1820,7 +1890,7 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name,
this->data.fb_fetch_output = false;
this->data.bindless = false;
this->data.bound = false;
this->data.image_format = GL_NONE;
this->data.image_format = PIPE_FORMAT_NONE;
this->data._num_state_slots = 0;
this->data.param_index = 0;
this->data.stream = 0;

Просмотреть файл

@ -29,6 +29,8 @@
#include <stdlib.h>
#include "util/ralloc.h"
#include "util/format/u_format.h"
#include "util/half_float.h"
#include "compiler/glsl_types.h"
#include "list.h"
#include "ir_visitor.h"
@ -767,6 +769,13 @@ public:
*/
unsigned is_unmatched_generic_inout:1;
/**
* Is this varying used by transform feedback?
*
* This is used by the linker to decide if it's safe to pack the varying.
*/
unsigned is_xfb:1;
/**
* Is this varying used only by transform feedback?
*
@ -887,8 +896,11 @@ public:
uint8_t warn_extension_index;
public:
/** Image internal format if specified explicitly, otherwise GL_NONE. */
uint16_t image_format;
/**
* Image internal format if specified explicitly, otherwise
* PIPE_FORMAT_NONE.
*/
enum pipe_format image_format;
private:
/**
@ -2039,6 +2051,12 @@ public:
*/
virtual ir_variable *variable_referenced() const = 0;
/**
* Get the precision. This can either come from the eventual variable that
* is dereferenced, or from a record member.
*/
virtual int precision() const = 0;
protected:
ir_dereference(enum ir_node_type t)
: ir_rvalue(t)
@ -2068,6 +2086,11 @@ public:
return this->var;
}
virtual int precision() const
{
return this->var->data.precision;
}
virtual ir_variable *whole_variable_referenced()
{
/* ir_dereference_variable objects always dereference the entire
@ -2116,6 +2139,16 @@ public:
return this->array->variable_referenced();
}
virtual int precision() const
{
ir_dereference *deref = this->array->as_dereference();
if (deref == NULL)
return GLSL_PRECISION_NONE;
else
return deref->precision();
}
virtual void accept(ir_visitor *v)
{
v->visit(this);
@ -2151,6 +2184,13 @@ public:
return this->record->variable_referenced();
}
virtual int precision() const
{
glsl_struct_field *field = record->type->fields.structure + field_idx;
return field->precision;
}
virtual void accept(ir_visitor *v)
{
v->visit(this);
@ -2172,6 +2212,7 @@ union ir_constant_data {
float f[16];
bool b[16];
double d[16];
uint16_t f16[16];
uint64_t u64[16];
int64_t i64[16];
};
@ -2183,6 +2224,7 @@ public:
ir_constant(bool b, unsigned vector_elements=1);
ir_constant(unsigned int u, unsigned vector_elements=1);
ir_constant(int i, unsigned vector_elements=1);
ir_constant(float16_t f16, unsigned vector_elements=1);
ir_constant(float f, unsigned vector_elements=1);
ir_constant(double d, unsigned vector_elements=1);
ir_constant(uint64_t u64, unsigned vector_elements=1);
@ -2235,6 +2277,7 @@ public:
/*@{*/
bool get_bool_component(unsigned i) const;
float get_float_component(unsigned i) const;
uint16_t get_float16_component(unsigned i) const;
double get_double_component(unsigned i) const;
int get_int_component(unsigned i) const;
unsigned get_uint_component(unsigned i) const;

Просмотреть файл

@ -75,54 +75,6 @@ ir_array_refcount_entry::~ir_array_refcount_entry()
delete [] bits;
}
void
ir_array_refcount_entry::mark_array_elements_referenced(const array_deref_range *dr,
unsigned count)
{
if (count != array_depth)
return;
mark_array_elements_referenced(dr, count, 1, 0);
}
void
ir_array_refcount_entry::mark_array_elements_referenced(const array_deref_range *dr,
unsigned count,
unsigned scale,
unsigned linearized_index)
{
/* Walk through the list of array dereferences in least- to
* most-significant order. Along the way, accumulate the current
* linearized offset and the scale factor for each array-of-.
*/
for (unsigned i = 0; i < count; i++) {
if (dr[i].index < dr[i].size) {
linearized_index += dr[i].index * scale;
scale *= dr[i].size;
} else {
/* For each element in the current array, update the count and
* offset, then recurse to process the remaining arrays.
*
* There is some inefficency here if the last element in the
* array_deref_range list specifies the entire array. In that case,
* the loop will make recursive calls with count == 0. In the call,
* all that will happen is the bit will be set.
*/
for (unsigned j = 0; j < dr[i].size; j++) {
mark_array_elements_referenced(&dr[i + 1],
count - (i + 1),
scale * dr[i].size,
linearized_index + (j * scale));
}
return;
}
}
BITSET_SET(bits, linearized_index);
}
ir_array_refcount_entry *
ir_array_refcount_visitor::get_variable_entry(ir_variable *var)
{
@ -224,7 +176,9 @@ ir_array_refcount_visitor::visit_enter(ir_dereference_array *ir)
if (entry == NULL)
return visit_stop;
entry->mark_array_elements_referenced(derefs, num_derefs);
link_util_mark_array_elements_referenced(derefs, num_derefs,
entry->array_depth,
entry->bits);
return visit_continue;
}

Просмотреть файл

@ -32,26 +32,10 @@
#include "ir.h"
#include "ir_visitor.h"
#include "linker_util.h"
#include "compiler/glsl_types.h"
#include "util/bitset.h"
/**
* Describes an access of an array element or an access of the whole array
*/
struct array_deref_range {
/**
* Index that was accessed.
*
* All valid array indices are less than the size of the array. If index
* is equal to the size of the array, this means the entire array has been
* accessed (e.g., due to use of a non-constant index).
*/
unsigned index;
/** Size of the array. Used for offset calculations. */
unsigned size;
};
class ir_array_refcount_entry
{
public:
@ -63,33 +47,11 @@ public:
/** Has the variable been referenced? */
bool is_referenced;
/**
* Mark a set of array elements as accessed.
*
* If every \c array_deref_range is for a single index, only a single
* element will be marked. If any \c array_deref_range is for an entire
* array-of-, then multiple elements will be marked.
*
* Items in the \c array_deref_range list appear in least- to
* most-significant order. This is the \b opposite order the indices
* appear in the GLSL shader text. An array access like
*
* x = y[1][i][3];
*
* would appear as
*
* { { 3, n }, { m, m }, { 1, p } }
*
* where n, m, and p are the sizes of the arrays-of-arrays.
*
* The set of marked array elements can later be queried by
* \c ::is_linearized_index_referenced.
*
* \param dr List of array_deref_range elements to be processed.
* \param count Number of array_deref_range elements to be processed.
*/
void mark_array_elements_referenced(const array_deref_range *dr,
unsigned count);
/** Count of nested arrays in the type. */
unsigned array_depth;
/** Set of bit-flags to note which array elements have been accessed. */
BITSET_WORD *bits;
/** Has a linearized array index been referenced? */
bool is_linearized_index_referenced(unsigned linearized_index) const
@ -101,8 +63,6 @@ public:
}
private:
/** Set of bit-flags to note which array elements have been accessed. */
BITSET_WORD *bits;
/**
* Total number of bits referenced by \c bits.
@ -111,27 +71,6 @@ private:
*/
unsigned num_bits;
/** Count of nested arrays in the type. */
unsigned array_depth;
/**
* Recursive part of the public mark_array_elements_referenced method.
*
* The recursion occurs when an entire array-of- is accessed. See the
* implementation for more details.
*
* \param dr List of array_deref_range elements to be
* processed.
* \param count Number of array_deref_range elements to be
* processed.
* \param scale Current offset scale.
* \param linearized_index Current accumulated linearized array index.
*/
void mark_array_elements_referenced(const array_deref_range *dr,
unsigned count,
unsigned scale,
unsigned linearized_index);
friend class array_refcount_test;
};

Просмотреть файл

@ -22,7 +22,7 @@
*/
#include <string.h>
#include "main/compiler.h"
#include "util/compiler.h"
#include "ir.h"
#include "compiler/glsl_types.h"
#include "util/hash_table.h"

Просмотреть файл

@ -452,6 +452,21 @@ isub64_saturate(int64_t a, int64_t b)
return a - b;
}
static uint64_t
pack_2x32(uint32_t a, uint32_t b)
{
uint64_t v = a;
v |= (uint64_t)b << 32;
return v;
}
static void
unpack_2x32(uint64_t p, uint32_t *a, uint32_t *b)
{
*a = p & 0xffffffff;
*b = (p >> 32);
}
/**
* Get the constant that is ultimately referenced by an r-value, in a constant
* expression evaluation context.
@ -692,6 +707,23 @@ ir_expression::constant_expression_value(void *mem_ctx,
return NULL;
}
for (unsigned operand = 0; operand < this->num_operands; operand++) {
if (op[operand]->type->base_type == GLSL_TYPE_FLOAT16) {
const struct glsl_type *float_type =
glsl_type::get_instance(GLSL_TYPE_FLOAT,
op[operand]->type->vector_elements,
op[operand]->type->matrix_columns,
op[operand]->type->explicit_stride,
op[operand]->type->interface_row_major);
ir_constant_data f;
for (unsigned i = 0; i < ARRAY_SIZE(f.f); i++)
f.f[i] = _mesa_half_to_float(op[operand]->value.f16[i]);
op[operand] = new(mem_ctx) ir_constant(float_type, &f);
}
}
if (op[1] != NULL)
switch (this->operation) {
case ir_binop_lshift:
@ -740,6 +772,15 @@ ir_expression::constant_expression_value(void *mem_ctx,
#include "ir_expression_operation_constant.h"
if (this->type->base_type == GLSL_TYPE_FLOAT16) {
ir_constant_data f;
for (unsigned i = 0; i < ARRAY_SIZE(f.f16); i++)
f.f16[i] = _mesa_float_to_half(data.f[i]);
return new(mem_ctx) ir_constant(this->type, &f);
}
return new(mem_ctx) ir_constant(this->type, &data);
}
@ -773,6 +814,7 @@ ir_swizzle::constant_expression_value(void *mem_ctx,
case GLSL_TYPE_UINT:
case GLSL_TYPE_INT: data.u[i] = v->value.u[swiz_idx[i]]; break;
case GLSL_TYPE_FLOAT: data.f[i] = v->value.f[swiz_idx[i]]; break;
case GLSL_TYPE_FLOAT16: data.f16[i] = v->value.f16[swiz_idx[i]]; break;
case GLSL_TYPE_BOOL: data.b[i] = v->value.b[swiz_idx[i]]; break;
case GLSL_TYPE_DOUBLE:data.d[i] = v->value.d[swiz_idx[i]]; break;
case GLSL_TYPE_UINT64:data.u64[i] = v->value.u64[swiz_idx[i]]; break;
@ -1056,10 +1098,16 @@ ir_function_signature::constant_expression_value(void *mem_ctx,
/*
* Of the builtin functions, only the texture lookups and the noise
* ones must not be used in constant expressions. They all include
* specific opcodes so they don't need to be special-cased at this
* point.
* ones must not be used in constant expressions. Texture instructions
* include special ir_texture opcodes which can't be constant-folded (see
* ir_texture::constant_expression_value). Noise functions, however, we
* have to special case here.
*/
if (strcmp(this->function_name(), "noise1") == 0 ||
strcmp(this->function_name(), "noise2") == 0 ||
strcmp(this->function_name(), "noise3") == 0 ||
strcmp(this->function_name(), "noise4") == 0)
return NULL;
/* Initialize the table of dereferencable names with the function
* parameters. Verify their const-ness on the way.

Просмотреть файл

@ -39,6 +39,7 @@ enum ir_expression_operation {
ir_unop_i2f,
ir_unop_f2b,
ir_unop_b2f,
ir_unop_b2f16,
ir_unop_i2b,
ir_unop_b2i,
ir_unop_u2f,
@ -46,11 +47,15 @@ enum ir_expression_operation {
ir_unop_u2i,
ir_unop_d2f,
ir_unop_f2d,
ir_unop_f2f16,
ir_unop_f2fmp,
ir_unop_f162f,
ir_unop_d2i,
ir_unop_i2d,
ir_unop_d2u,
ir_unop_u2d,
ir_unop_d2b,
ir_unop_f162b,
ir_unop_bitcast_i2f,
ir_unop_bitcast_f2i,
ir_unop_bitcast_u2f,
@ -117,7 +122,6 @@ enum ir_expression_operation {
ir_unop_unpack_image_2x32,
ir_unop_frexp_sig,
ir_unop_frexp_exp,
ir_unop_noise,
ir_unop_subroutine_to_int,
ir_unop_interpolate_at_centroid,
ir_unop_get_buffer_size,

Просмотреть файл

@ -438,6 +438,8 @@ ir_expression_operation = [
operation("f2b", 1, source_types=(float_type,), dest_type=bool_type, c_expression="{src0} != 0.0F ? true : false"),
# Boolean-to-float conversion
operation("b2f", 1, source_types=(bool_type,), dest_type=float_type, c_expression="{src0} ? 1.0F : 0.0F"),
# Boolean-to-float16 conversion
operation("b2f16", 1, source_types=(bool_type,), dest_type=float_type, c_expression="{src0} ? 1.0F : 0.0F"),
# int-to-boolean conversion
operation("i2b", 1, source_types=(uint_type, int_type), dest_type=bool_type, c_expression="{src0} ? true : false"),
# Boolean-to-int conversion
@ -452,6 +454,13 @@ ir_expression_operation = [
operation("d2f", 1, source_types=(double_type,), dest_type=float_type, c_expression="{src0}"),
# Float-to-double conversion.
operation("f2d", 1, source_types=(float_type,), dest_type=double_type, c_expression="{src0}"),
# Half-float conversions. These all operate on and return float types,
# since the framework expands half to full float before calling in. We
# still have to handle them here so that we can constant propagate through
# them, but they are no-ops.
operation("f2f16", 1, source_types=(float_type,), dest_type=float_type, c_expression="{src0}"),
operation("f2fmp", 1, source_types=(float_type,), dest_type=float_type, c_expression="{src0}"),
operation("f162f", 1, source_types=(float_type,), dest_type=float_type, c_expression="{src0}"),
# Double-to-integer conversion.
operation("d2i", 1, source_types=(double_type,), dest_type=int_type, c_expression="{src0}"),
# Integer-to-double conversion.
@ -462,6 +471,8 @@ ir_expression_operation = [
operation("u2d", 1, source_types=(uint_type,), dest_type=double_type, c_expression="{src0}"),
# Double-to-boolean conversion.
operation("d2b", 1, source_types=(double_type,), dest_type=bool_type, c_expression="{src0} != 0.0"),
# Float16-to-boolean conversion.
operation("f162b", 1, source_types=(float_type,), dest_type=bool_type, c_expression="{src0} != 0.0"),
# 'Bit-identical int-to-float "conversion"
operation("bitcast_i2f", 1, source_types=(int_type,), dest_type=float_type, c_expression="bitcast_u2f({src0})"),
# 'Bit-identical float-to-int "conversion"
@ -544,20 +555,18 @@ ir_expression_operation = [
operation("saturate", 1, printable_name="sat", source_types=(float_type,), c_expression="CLAMP({src0}, 0.0f, 1.0f)"),
# Double packing, part of ARB_gpu_shader_fp64.
operation("pack_double_2x32", 1, printable_name="packDouble2x32", source_types=(uint_type,), dest_type=double_type, c_expression="memcpy(&data.d[0], &op[0]->value.u[0], sizeof(double))", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("unpack_double_2x32", 1, printable_name="unpackDouble2x32", source_types=(double_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.d[0], sizeof(double))", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("pack_double_2x32", 1, printable_name="packDouble2x32", source_types=(uint_type,), dest_type=double_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("unpack_double_2x32", 1, printable_name="unpackDouble2x32", source_types=(double_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
# Sampler/Image packing, part of ARB_bindless_texture.
operation("pack_sampler_2x32", 1, printable_name="packSampler2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("pack_image_2x32", 1, printable_name="packImage2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("unpack_sampler_2x32", 1, printable_name="unpackSampler2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("unpack_image_2x32", 1, printable_name="unpackImage2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("pack_sampler_2x32", 1, printable_name="packSampler2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("pack_image_2x32", 1, printable_name="packImage2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("unpack_sampler_2x32", 1, printable_name="unpackSampler2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("unpack_image_2x32", 1, printable_name="unpackImage2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("frexp_sig", 1),
operation("frexp_exp", 1),
operation("noise", 1),
operation("subroutine_to_int", 1),
# Interpolate fs input at centroid
@ -578,10 +587,10 @@ ir_expression_operation = [
operation("ssbo_unsized_array_length", 1),
# 64-bit integer packing ops.
operation("pack_int_2x32", 1, printable_name="packInt2x32", source_types=(int_type,), dest_type=int64_type, c_expression="memcpy(&data.i64[0], &op[0]->value.i[0], sizeof(int64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("pack_uint_2x32", 1, printable_name="packUint2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("unpack_int_2x32", 1, printable_name="unpackInt2x32", source_types=(int64_type,), dest_type=int_type, c_expression="memcpy(&data.i[0], &op[0]->value.i64[0], sizeof(int64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("unpack_uint_2x32", 1, printable_name="unpackUint2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("pack_int_2x32", 1, printable_name="packInt2x32", source_types=(int_type,), dest_type=int64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("pack_uint_2x32", 1, printable_name="packUint2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("unpack_int_2x32", 1, printable_name="unpackInt2x32", source_types=(int64_type,), dest_type=int_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("unpack_uint_2x32", 1, printable_name="unpackUint2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("add", 2, printable_name="+", source_types=numeric_types, c_expression="{src0} + {src1}", flags=vector_scalar_operation),
operation("sub", 2, printable_name="-", source_types=numeric_types, c_expression="{src0} - {src1}", flags=vector_scalar_operation),

Просмотреть файл

@ -254,6 +254,18 @@
}
break;
case ir_unop_b2f16:
for (unsigned c = 0; c < op[0]->type->components(); c++) {
switch (op[0]->type->base_type) {
case GLSL_TYPE_BOOL:
data.f[c] = op[0]->value.b[c] ? 1.0F : 0.0F;
break;
default:
unreachable("invalid type");
}
}
break;
case ir_unop_i2b:
for (unsigned c = 0; c < op[0]->type->components(); c++) {
switch (op[0]->type->base_type) {
@ -341,6 +353,42 @@
}
break;
case ir_unop_f2f16:
for (unsigned c = 0; c < op[0]->type->components(); c++) {
switch (op[0]->type->base_type) {
case GLSL_TYPE_FLOAT:
data.f[c] = op[0]->value.f[c];
break;
default:
unreachable("invalid type");
}
}
break;
case ir_unop_f2fmp:
for (unsigned c = 0; c < op[0]->type->components(); c++) {
switch (op[0]->type->base_type) {
case GLSL_TYPE_FLOAT:
data.f[c] = op[0]->value.f[c];
break;
default:
unreachable("invalid type");
}
}
break;
case ir_unop_f162f:
for (unsigned c = 0; c < op[0]->type->components(); c++) {
switch (op[0]->type->base_type) {
case GLSL_TYPE_FLOAT:
data.f[c] = op[0]->value.f[c];
break;
default:
unreachable("invalid type");
}
}
break;
case ir_unop_d2i:
for (unsigned c = 0; c < op[0]->type->components(); c++) {
switch (op[0]->type->base_type) {
@ -401,6 +449,18 @@
}
break;
case ir_unop_f162b:
for (unsigned c = 0; c < op[0]->type->components(); c++) {
switch (op[0]->type->base_type) {
case GLSL_TYPE_FLOAT:
data.b[c] = op[0]->value.f[c] != 0.0;
break;
default:
unreachable("invalid type");
}
}
break;
case ir_unop_bitcast_i2f:
for (unsigned c = 0; c < op[0]->type->components(); c++) {
switch (op[0]->type->base_type) {
@ -1075,43 +1135,43 @@
break;
case ir_unop_pack_double_2x32:
memcpy(&data.d[0], &op[0]->value.u[0], sizeof(double));
data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1]);
break;
case ir_unop_unpack_double_2x32:
memcpy(&data.u[0], &op[0]->value.d[0], sizeof(double));
unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1]);
break;
case ir_unop_pack_sampler_2x32:
memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t));
data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1]);
break;
case ir_unop_pack_image_2x32:
memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t));
data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1]);
break;
case ir_unop_unpack_sampler_2x32:
memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t));
unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1]);
break;
case ir_unop_unpack_image_2x32:
memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t));
unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1]);
break;
case ir_unop_pack_int_2x32:
memcpy(&data.i64[0], &op[0]->value.i[0], sizeof(int64_t));
data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1]);
break;
case ir_unop_pack_uint_2x32:
memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t));
data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1]);
break;
case ir_unop_unpack_int_2x32:
memcpy(&data.i[0], &op[0]->value.i64[0], sizeof(int64_t));
unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1]);
break;
case ir_unop_unpack_uint_2x32:
memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t));
unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1]);
break;
case ir_binop_add:

Просмотреть файл

@ -39,6 +39,7 @@ const char *const ir_expression_operation_strings[] = {
"i2f",
"f2b",
"b2f",
"b2f16",
"i2b",
"b2i",
"u2f",
@ -46,11 +47,15 @@ const char *const ir_expression_operation_strings[] = {
"u2i",
"d2f",
"f2d",
"f2f16",
"f2fmp",
"f162f",
"d2i",
"i2d",
"d2u",
"u2d",
"d2b",
"f162b",
"bitcast_i2f",
"bitcast_f2i",
"bitcast_u2f",
@ -117,7 +122,6 @@ const char *const ir_expression_operation_strings[] = {
"unpackImage2x32",
"frexp_sig",
"frexp_exp",
"noise",
"subroutine_to_int",
"interpolate_at_centroid",
"get_buffer_size",
@ -191,6 +195,7 @@ const char *const ir_expression_operation_enum_strings[] = {
"i2f",
"f2b",
"b2f",
"b2f16",
"i2b",
"b2i",
"u2f",
@ -198,11 +203,15 @@ const char *const ir_expression_operation_enum_strings[] = {
"u2i",
"d2f",
"f2d",
"f2f16",
"f2fmp",
"f162f",
"d2i",
"i2d",
"d2u",
"u2d",
"d2b",
"f162b",
"bitcast_i2f",
"bitcast_f2i",
"bitcast_u2f",
@ -269,7 +278,6 @@ const char *const ir_expression_operation_enum_strings[] = {
"unpack_image_2x32",
"frexp_sig",
"frexp_exp",
"noise",
"subroutine_to_int",
"interpolate_at_centroid",
"get_buffer_size",

Просмотреть файл

@ -37,8 +37,7 @@ ir_hierarchical_visitor::ir_hierarchical_visitor()
ir_visitor_status
ir_hierarchical_visitor::visit(ir_rvalue *ir)
{
if (this->callback_enter != NULL)
this->callback_enter(ir, this->data_enter);
call_enter_leave_callbacks(ir);
return visit_continue;
}
@ -46,8 +45,7 @@ ir_hierarchical_visitor::visit(ir_rvalue *ir)
ir_visitor_status
ir_hierarchical_visitor::visit(ir_variable *ir)
{
if (this->callback_enter != NULL)
this->callback_enter(ir, this->data_enter);
call_enter_leave_callbacks(ir);
return visit_continue;
}
@ -55,8 +53,7 @@ ir_hierarchical_visitor::visit(ir_variable *ir)
ir_visitor_status
ir_hierarchical_visitor::visit(ir_constant *ir)
{
if (this->callback_enter != NULL)
this->callback_enter(ir, this->data_enter);
call_enter_leave_callbacks(ir);
return visit_continue;
}
@ -64,8 +61,7 @@ ir_hierarchical_visitor::visit(ir_constant *ir)
ir_visitor_status
ir_hierarchical_visitor::visit(ir_loop_jump *ir)
{
if (this->callback_enter != NULL)
this->callback_enter(ir, this->data_enter);
call_enter_leave_callbacks(ir);
return visit_continue;
}
@ -91,8 +87,7 @@ ir_hierarchical_visitor::visit(ir_typedecl_statement *ir)
ir_visitor_status
ir_hierarchical_visitor::visit(ir_dereference_variable *ir)
{
if (this->callback_enter != NULL)
this->callback_enter(ir, this->data_enter);
call_enter_leave_callbacks(ir);
return visit_continue;
}
@ -100,8 +95,7 @@ ir_hierarchical_visitor::visit(ir_dereference_variable *ir)
ir_visitor_status
ir_hierarchical_visitor::visit(ir_barrier *ir)
{
if (this->callback_enter != NULL)
this->callback_enter(ir, this->data_enter);
call_enter_leave_callbacks(ir);
return visit_continue;
}
@ -400,6 +394,14 @@ ir_hierarchical_visitor::run(exec_list *instructions)
visit_list_elements(this, instructions);
}
void
ir_hierarchical_visitor::call_enter_leave_callbacks(class ir_instruction *ir)
{
if (this->callback_enter != NULL)
this->callback_enter(ir, this->data_enter);
if (this->callback_leave != NULL)
this->callback_leave(ir, this->data_leave);
}
void
visit_tree(ir_instruction *ir,

Просмотреть файл

@ -151,6 +151,12 @@ public:
*/
void run(struct exec_list *instructions);
/**
* Utility function to call both the leave and enter callback functions.
* This is used for leaf nodes.
*/
void call_enter_leave_callbacks(class ir_instruction *ir);
/* Some visitors may need to insert new variable declarations and
* assignments for portions of a subtree, which means they need a
* pointer to the current instruction in the stream, not just their

Просмотреть файл

@ -135,7 +135,6 @@ bool do_vec_index_to_swizzle(exec_list *instructions);
bool lower_discard(exec_list *instructions);
void lower_discard_flow(exec_list *instructions);
bool lower_instructions(exec_list *instructions, unsigned what_to_lower);
bool lower_noise(exec_list *instructions);
bool lower_variable_index_to_cond_assign(gl_shader_stage stage,
exec_list *instructions, bool lower_input, bool lower_output,
bool lower_temp, bool lower_uniform);
@ -143,6 +142,9 @@ bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
bool lower_const_arrays_to_uniforms(exec_list *instructions, unsigned stage, unsigned max_uniform_components);
bool lower_clip_cull_distance(struct gl_shader_program *prog,
gl_linked_shader *shader);
ir_variable * lower_xfb_varying(void *mem_ctx,
gl_linked_shader *shader,
const char *old_var_name);
void lower_output_reads(unsigned stage, exec_list *instructions);
bool lower_packing_builtins(exec_list *instructions, int op_mask);
void lower_shared_reference(struct gl_context *ctx,
@ -156,7 +158,9 @@ void lower_packed_varyings(void *mem_ctx,
ir_variable_mode mode,
unsigned gs_input_vertices,
gl_linked_shader *shader,
bool disable_varying_packing, bool xfb_enabled);
bool disable_varying_packing,
bool disable_xfb_packing,
bool xfb_enabled);
bool lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index);
bool lower_vector_derefs(gl_linked_shader *shader);
void lower_named_interface_blocks(void *mem_ctx, gl_linked_shader *shader);
@ -171,6 +175,7 @@ bool lower_vertex_id(gl_linked_shader *shader);
bool lower_cs_derived(gl_linked_shader *shader);
bool lower_blend_equation_advanced(gl_linked_shader *shader, bool coherent);
bool lower_builtins(exec_list *instructions);
bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state);
void propagate_invariance(exec_list *instructions);
@ -183,4 +188,6 @@ ir_variable *compare_index_block(ir_builder::ir_factory &body,
bool lower_64bit_integer_instructions(exec_list *instructions,
unsigned what_to_lower);
bool lower_precision(exec_list *instructions);
#endif /* GLSL_IR_OPTIMIZATION_H */

Просмотреть файл

@ -274,6 +274,8 @@ _mesa_print_ir_glsl(exec_list *instructions,
str.asprintf_append ("#extension GL_EXT_blend_func_extended : enable\n");
if (state->OES_EGL_image_external_essl3_enable)
str.asprintf_append ("#extension GL_OES_EGL_image_external_essl3 : enable\n");
if (state->ARB_shader_storage_buffer_object_enable)
str.asprintf_append ("#extension GL_ARB_shader_storage_buffer_object : enable\n");
// TODO: support other blend specifiers besides "all"
@ -464,14 +466,25 @@ static void print_type(string_buffer& buffer, const glsl_type *t, bool arraySize
static void print_type_post(string_buffer& buffer, const glsl_type *t, bool arraySize)
{
if (t->base_type == GLSL_TYPE_ARRAY) {
if (!arraySize)
buffer.asprintf_append ("[%u]", t->length);
if (!arraySize) {
if (t->length) {
buffer.asprintf_append ("[%u]", t->length);
} else {
buffer.asprintf_append ("[]");
}
}
}
}
void ir_print_glsl_visitor::visit(ir_variable *ir)
{
// Variables that are declared as or part of interface blocks will be printed by the block declaration.
if (ir->is_in_buffer_block()) {
skipped_this_ir = true;
return;
}
const char *const cent = (ir->data.centroid) ? "centroid " : "";
const char *const inv = (ir->data.invariant) ? "invariant " : "";
const char *const mode[3][ir_var_mode_count] =
@ -644,174 +657,194 @@ void ir_print_glsl_visitor::visit(ir_function *ir)
indent();
}
static const char *const operator_glsl_strs[] = {
"~", // ir_unop_bit_not,
"!", // ir_unop_logic_not,
"-", // ir_unop_neg,
"abs", // ir_unop_abs,
"sign", // ir_unop_sign,
"1.0/", // ir_unop_rcp,
"inversesqrt", // ir_unop_rsq,
"sqrt", // ir_unop_sqrt,
"exp", // ir_unop_exp,
"log", // ir_unop_log,
"exp2", // ir_unop_exp2,
"log2", // ir_unop_log2,
"int", // ir_unop_f2i,
"int", // ir_unop_f2u,
"float", // ir_unop_i2f,
"bool", // ir_unop_f2b,
"float", // ir_unop_b2f,
"bool", // ir_unop_i2b,
"int", // ir_unop_b2i,
"float", // ir_unop_u2f,
"int", // ir_unop_i2u,
"int", // ir_unop_u2i,
"float", // ir_unop_d2f,
"f2d", // ir_unop_f2d,
"d2i", // ir_unop_d2i,
"i2d", // ir_unop_i2d,
"d2u", // ir_unop_d2u,
"u2d", // ir_unop_u2d,
"d2b", // ir_unop_d2b,
"intBitsToFloat", // ir_unop_bitcast_i2f,
"floatBitsToInt", // ir_unop_bitcast_f2i,
"uintBitsToFloat", // ir_unop_bitcast_u2f,
"floatBitsToUint", // ir_unop_bitcast_f2u,
"uint64BitsToDouble", // ir_unop_bitcast_u642d,
"int64BitsToDouble", // ir_unop_bitcast_i642d,
"doubleBitsToUint64", // ir_unop_bitcast_d2u64,
"doubleBitsToInt64", // ir_unop_bitcast_d2i64,
"int", // ir_unop_i642i,
"int", // ir_unop_u642i,
"uint", // ir_unop_i642u,
"uint", // ir_unop_u642u,
"bool", // ir_unop_i642b,
"float", // ir_unop_i642f,
"float", // ir_unop_u642f,
"double", // ir_unop_i642d,
"double", // ir_unop_u642d,
"int64_t", // ir_unop_i2i64,
"int64_t", // ir_unop_u2i64,
"int64_t", // ir_unop_b2i64,
"int64_t", // ir_unop_f2i64,
"int64_t", // ir_unop_d2i64,
"uint64_t", // ir_unop_i2u64,
"uint64_t", // ir_unop_u2u64,
"uint64_t", // ir_unop_f2u64,
"uint64_t", // ir_unop_d2u64,
"int64_t", // ir_unop_u642i64,
"uint64_t", // ir_unop_i642u64,
"trunc", // ir_unop_trunc,
"ceil", // ir_unop_ceil,
"floor", // ir_unop_floor,
"fract", // ir_unop_fract,
"roundEven", // ir_unop_round_even,
"sin", // ir_unop_sin,
"cos", // ir_unop_cos,
"atan", // ir_unop_atan,
"dFdx", // ir_unop_dFdx,
"dFdxCoarse", // ir_unop_dFdx_coarse,
"dFdxFine", // ir_unop_dFdx_fine,
"dFdy", // ir_unop_dFdy,
"dFdyCoarse", // ir_unop_dFdy_coarse,
"dFdyFine", // ir_unop_dFdy_fine,
"packSnorm2x16", // ir_unop_pack_snorm_2x16,
"packSnorm4x8", // ir_unop_pack_snorm_4x8,
"packUnorm2x16", // ir_unop_pack_unorm_2x16,
"packUnorm4x8", // ir_unop_pack_unorm_4x8,
"packHalf2x16", // ir_unop_pack_half_2x16,
"unpackSnorm2x16", // ir_unop_unpack_snorm_2x16,
"unpackSnorm4x8", // ir_unop_unpack_snorm_4x8,
"unpackUnorm2x16", // ir_unop_unpack_unorm_2x16,
"unpackUnorm4x8", // ir_unop_unpack_unorm_4x8,
"unpackHalf2x16", // ir_unop_unpack_half_2x16,
"bitfieldReverse", // ir_unop_bitfield_reverse,
"bitCount", // ir_unop_bit_count,
"findMSB", // ir_unop_find_msb,
"findLSB", // ir_unop_find_lsb,
"clz_TODO", // ir_unop_clz,
"saturate", // ir_unop_saturate,
"packDouble2x32", // ir_unop_pack_double_2x32,
"unpackDouble2x32", // ir_unop_unpack_double_2x32,
"packSampler2x32", // ir_unop_pack_sampler_2x32,
"packImage2x32", // ir_unop_pack_image_2x32,
"unpackSampler2x32", // ir_unop_unpack_sampler_2x32,
"unpackImage2x32", // ir_unop_unpack_image_2x32,
"frexp_sig_TODO", // ir_unop_frexp_sig,
"frexp_exp_TODO", // ir_unop_frexp_exp,
"noise", // ir_unop_noise,
"subroutine_to_int_TODO", // ir_unop_subroutine_to_int,
"interpolateAtCentroid", // ir_unop_interpolate_at_centroid,
"get_buffer_size_TODO", // ir_unop_get_buffer_size,
"ssbo_unsized_array_length_TODO", // ir_unop_ssbo_unsized_array_length,
"packInt2x32", // ir_unop_pack_int_2x32,
"packUint2x32", // ir_unop_pack_uint_2x32,
"unpackInt2x32", // ir_unop_unpack_int_2x32,
"unpackUint2x32", // ir_unop_unpack_uint_2x32,
"+", // ir_binop_add,
"-", // ir_binop_sub,
"add_sat_TODO", // ir_binop_add_sat,
"sub_sat_TODO", // ir_binop_sub_sat,
"abs_sub_TODO", // ir_binop_abs_sub,
"avg_TODO", // ir_binop_avg,
"avg_round_TODO", // ir_binop_avg_round,
"*", // ir_binop_mul,
"mul_32x16_TODO", // ir_binop_mul_32x16,
"imul_high_TODO", // ir_binop_imul_high,
"/", // ir_binop_div,
"carry_TODO", // ir_binop_carry,
"borrow_TODO", // ir_binop_borrow,
"mod", // ir_binop_mod,
"<", // ir_binop_less,
">=", // ir_binop_gequal,
"==", // ir_binop_equal,
"!=", // ir_binop_nequal,
"==", // ir_binop_all_equal,
"!=", // ir_binop_any_nequal,
"<<", // ir_binop_lshift,
">>", // ir_binop_rshift,
"&", // ir_binop_bit_and,
"^", // ir_binop_bit_xor,
"|", // ir_binop_bit_or,
"&&", // ir_binop_logic_and,
"^^", // ir_binop_logic_xor,
"||", // ir_binop_logic_or,
"dot", // ir_binop_dot,
"min", // ir_binop_min,
"max", // ir_binop_max,
"pow", // ir_binop_pow,
"uboload_TODO", // ir_binop_ubo_load,
"ldexp_TODO", // ir_binop_ldexp,
"vectorExtract_TODO", // ir_binop_vector_extract,
"interpolateAtOffset", // ir_binop_interpolate_at_offset,
"interpolateAtSample", // ir_binop_interpolate_at_sample,
"atan", // ir_binop_atan2,
"fma", // ir_triop_fma,
"mix", // ir_triop_lrp,
"csel_TODO", // ir_triop_csel,
"bitfield_extract_TODO", // ir_triop_bitfield_extract,
"vector_insert_TODO", // ir_triop_vector_insert,
"bitfield_insert_TODO", // ir_quadop_bitfield_insert,
"vector_TODO", // ir_quadop_vector,
};
static const char *const operator_vec_glsl_strs[] = {
"lessThan",
"greaterThanEqual",
"equal",
"notEqual",
};
static const char* operator_glsl_str(ir_expression_operation op, const glsl_type* type) {
switch (op) {
case ir_unop_bit_not:
return "~";
case ir_unop_logic_not:
return "!";
case ir_unop_neg:
return "-";
case ir_unop_abs:
return "abs";
case ir_unop_sign:
return "sign";
case ir_unop_rsq:
return "inversesqrt";
case ir_unop_sqrt:
return "sqrt";
case ir_unop_exp:
return "exp";
case ir_unop_log:
return "log";
case ir_unop_exp2:
return "exp2";
case ir_unop_log2:
return "log2";
case ir_unop_trunc:
return "trunc";
case ir_unop_ceil:
return "ceil";
case ir_unop_floor:
return "floor";
case ir_unop_fract:
return "fract";
case ir_unop_round_even:
return "roundEven";
case ir_unop_sin:
return "sin";
case ir_unop_cos:
return "cos";
case ir_unop_atan:
return "atan";
case ir_unop_dFdx:
return "dFdx";
case ir_unop_dFdx_coarse:
return "dFdxCoarse";
case ir_unop_dFdx_fine:
return "dFdxFine";
case ir_unop_dFdy:
return "dFdy";
case ir_unop_dFdy_coarse:
return "dFdyCoarse";
case ir_unop_dFdy_fine:
return "dFdyFine";
case ir_unop_pack_snorm_2x16:
return "packSnorm2x16";
case ir_unop_pack_snorm_4x8:
return "packSnorm4x8";
case ir_unop_pack_unorm_2x16:
return "packUnorm2x16";
case ir_unop_pack_unorm_4x8:
return "packUnorm4x8";
case ir_unop_pack_half_2x16:
return "packHalf2x16";
case ir_unop_unpack_snorm_2x16:
return "unpackSnorm2x16";
case ir_unop_unpack_snorm_4x8:
return "unpackSnorm4x8";
case ir_unop_unpack_unorm_2x16:
return "unpackUnorm2x16";
case ir_unop_unpack_unorm_4x8:
return "unpackUnorm4x8";
case ir_unop_unpack_half_2x16:
return "unpackHalf2x16";
case ir_unop_bitfield_reverse:
return "bitfieldReverse";
case ir_unop_bit_count:
return "bitCount";
case ir_unop_find_msb:
return "findMSB";
case ir_unop_find_lsb:
return "findLSB";
case ir_unop_saturate:
return "saturate";
case ir_unop_pack_double_2x32:
return "packDouble2x32";
case ir_unop_unpack_double_2x32:
return "unpackDouble2x32";
case ir_unop_pack_sampler_2x32:
return "packSampler2x32";
case ir_unop_pack_image_2x32:
return "packImage2x32";
case ir_unop_unpack_sampler_2x32:
return "unpackSampler2x32";
case ir_unop_unpack_image_2x32:
return "unpackImage2x32";
case ir_unop_interpolate_at_centroid:
return "interpolateAtCentroid";
case ir_unop_pack_int_2x32:
return "packInt2x32";
case ir_unop_pack_uint_2x32:
return "packUint2x32";
case ir_unop_unpack_int_2x32:
return "unpackInt2x32";
case ir_unop_unpack_uint_2x32:
return "unpackUint2x32";
case ir_binop_add:
return "+";
case ir_binop_sub:
return "-";
case ir_binop_mul:
return "*";
case ir_binop_div:
return "/";
case ir_binop_mod:
if (type->is_integer())
return "%";
else
return "mod";
case ir_binop_less:
if (type->is_vector())
return "lessThan";
else
return "<";
case ir_binop_gequal:
if (type->is_vector())
return "greaterThanEqual";
else
return ">=";
case ir_binop_equal:
if (type->is_vector())
return "equal";
else
return "==";
case ir_binop_nequal:
if (type->is_vector())
return "notEqual";
else
return "!=";
case ir_binop_all_equal:
return "==";
case ir_binop_any_nequal:
return "!=";
case ir_binop_lshift:
return "<<";
case ir_binop_rshift:
return ">>";
case ir_binop_bit_and:
return "&";
case ir_binop_bit_xor:
return "^";
case ir_binop_bit_or:
return "|";
case ir_binop_logic_and:
return "&&";
case ir_binop_logic_xor:
return "^^";
case ir_binop_logic_or:
return "||";
case ir_binop_dot:
return "dot";
case ir_binop_min:
return "min";
case ir_binop_max:
return "max";
case ir_binop_pow:
return "pow";
case ir_binop_interpolate_at_offset:
return "interpolateAtOffset";
case ir_binop_interpolate_at_sample:
return "interpolateAtSample";
case ir_binop_atan2:
return "atan";
case ir_triop_fma:
return "fma";
case ir_triop_lrp:
return "mix";
default:
unreachable("Unexpected operator in operator_glsl_str");
return "UNIMPLEMENTED";
}
}
static bool is_binop_func_like(ir_expression_operation op, const glsl_type* type)
{
if (op == ir_binop_mod ||
(op >= ir_binop_dot && op <= ir_binop_pow) ||
op == ir_binop_atan2)
if (op == ir_binop_mod && !type->is_integer()) {
return true;
if (type->is_vector() && (op >= ir_binop_less && op <= ir_binop_nequal))
{
} else if ((op >= ir_binop_dot && op <= ir_binop_pow) || op == ir_binop_atan2) {
return true;
} else if (type->is_vector() && (op >= ir_binop_less && op <= ir_binop_nequal)) {
return true;
}
return false;
@ -829,7 +862,7 @@ void ir_print_glsl_visitor::visit(ir_expression *ir)
} else if (ir->operation == ir_unop_rcp) {
buffer.asprintf_append ("(1.0/(");
} else {
buffer.asprintf_append ("%s(", operator_glsl_strs[ir->operation]);
buffer.asprintf_append ("%s(", operator_glsl_str(ir->operation, ir->type));
}
if (ir->operands[0])
ir->operands[0]->accept(this);
@ -862,23 +895,6 @@ void ir_print_glsl_visitor::visit(ir_expression *ir)
ir->operands[1]->accept(this);
buffer.asprintf_append ("]");
}
else if (ir->operation == ir_binop_mod && ir->operands[0]->type->is_integer())
{
// In GLES, mod() is only a func for floats,
// and we must use the % operator for ints.
assert(ir->num_operands == 2);
assert(ir->operands[1]->type->is_integer());
buffer.asprintf_append ("(");
if (ir->operands[0])
ir->operands[0]->accept(this);
buffer.asprintf_append (" %s ", "%");
if (ir->operands[1])
ir->operands[1]->accept(this);
buffer.asprintf_append (")");
}
else if (is_binop_func_like(ir->operation, ir->type))
{
if (ir->operation == ir_binop_mod)
@ -887,10 +903,7 @@ void ir_print_glsl_visitor::visit(ir_expression *ir)
print_type(buffer, ir->type, true);
buffer.asprintf_append ("(");
}
if (ir->type->is_vector() && (ir->operation >= ir_binop_less && ir->operation <= ir_binop_nequal))
buffer.asprintf_append ("%s (", operator_vec_glsl_strs[ir->operation-ir_binop_less]);
else
buffer.asprintf_append ("%s (", operator_glsl_strs[ir->operation]);
buffer.asprintf_append ("%s (", operator_glsl_str(ir->operation, ir->type));
if (ir->operands[0])
ir->operands[0]->accept(this);
@ -907,7 +920,7 @@ void ir_print_glsl_visitor::visit(ir_expression *ir)
if (ir->operands[0])
ir->operands[0]->accept(this);
buffer.asprintf_append (" %s ", operator_glsl_strs[ir->operation]);
buffer.asprintf_append (" %s ", operator_glsl_str(ir->operation, ir->type));
if (ir->operands[1])
ir->operands[1]->accept(this);
@ -916,7 +929,7 @@ void ir_print_glsl_visitor::visit(ir_expression *ir)
else
{
// ternary op
buffer.asprintf_append ("%s (", operator_glsl_strs[ir->operation]);
buffer.asprintf_append ("%s (", operator_glsl_str(ir->operation, ir->type));
if (ir->operands[0])
ir->operands[0]->accept(this);
buffer.asprintf_append (", ");
@ -1863,12 +1876,68 @@ ir_print_glsl_visitor::visit(ir_precision_statement *ir)
buffer.asprintf_append ("%s", ir->precision_statement);
}
// FIXME
static const char*
interface_packing_string(enum glsl_interface_packing packing)
{
switch (packing) {
case GLSL_INTERFACE_PACKING_STD140:
return "std140";
case GLSL_INTERFACE_PACKING_SHARED:
return "shared";
case GLSL_INTERFACE_PACKING_PACKED:
return "packed";
case GLSL_INTERFACE_PACKING_STD430:
return "std430";
default:
unreachable("Unexpected interface packing");
return "UNKNOWN";
}
}
static const char*
interface_variable_mode_string(enum ir_variable_mode mode)
{
switch (mode) {
case ir_var_uniform:
return "uniform";
case ir_var_shader_storage:
return "buffer";
default:
unreachable("Unexpected interface variable mode");
return "UNKOWN";
}
}
void
ir_print_glsl_visitor::visit(ir_typedecl_statement *ir)
{
const glsl_type *const s = ir->type_decl;
buffer.asprintf_append ("struct %s {\n", s->name);
ir_variable* interface_var = NULL;
if (s->is_struct()) {
buffer.asprintf_append ("struct %s {\n", s->name);
} else if (s->is_interface()) {
const char* packing = interface_packing_string(s->get_interface_packing());
// Find a variable defined by this interface, as it holds some necessary data.
exec_node* n = ir;
while ((n = n->get_next())) {
ir_variable* v = ((ir_instruction *)n)->as_variable();
if (v != NULL && v->get_interface_type() == ir->type_decl) {
interface_var = v;
break;
}
}
const char* mode = interface_variable_mode_string((enum ir_variable_mode)interface_var->data.mode);
if (interface_var->data.explicit_binding) {
uint16_t binding = interface_var->data.binding;
buffer.asprintf_append ("layout(%s, binding=%" PRIu16 ") %s %s {\n", packing, binding, mode, s->name);
} else {
buffer.asprintf_append ("layout(%s) %s %s {\n", packing, mode, s->name);
}
}
for (unsigned j = 0; j < s->length; j++) {
buffer.asprintf_append (" ");
@ -1881,6 +1950,11 @@ ir_print_glsl_visitor::visit(ir_typedecl_statement *ir)
buffer.asprintf_append (";\n");
}
buffer.asprintf_append ("}");
if (interface_var && interface_var->is_interface_instance()) {
buffer.asprintf_append(" ");
print_var_name(interface_var);
}
}
void

Просмотреть файл

@ -28,6 +28,7 @@
#include "main/macros.h"
#include "util/hash_table.h"
#include "util/u_string.h"
#include "util/half_float.h"
static void print_type(FILE *f, const glsl_type *t);
@ -460,6 +461,19 @@ void ir_print_visitor::visit(ir_assignment *ir)
fprintf(f, ") ");
}
static void
print_float_constant(FILE *f, float val)
{
if (val == 0.0f)
/* 0.0 == -0.0, so print with %f to get the proper sign. */
fprintf(f, "%f", val);
else if (fabs(val) < 0.000001f)
fprintf(f, "%a", val);
else if (fabs(val) > 1000000.0f)
fprintf(f, "%e", val);
else
fprintf(f, "%f", val);
}
void ir_print_visitor::visit(ir_constant *ir)
{
@ -484,15 +498,10 @@ void ir_print_visitor::visit(ir_constant *ir)
case GLSL_TYPE_UINT: fprintf(f, "%u", ir->value.u[i]); break;
case GLSL_TYPE_INT: fprintf(f, "%d", ir->value.i[i]); break;
case GLSL_TYPE_FLOAT:
if (ir->value.f[i] == 0.0f)
/* 0.0 == -0.0, so print with %f to get the proper sign. */
fprintf(f, "%f", ir->value.f[i]);
else if (fabs(ir->value.f[i]) < 0.000001f)
fprintf(f, "%a", ir->value.f[i]);
else if (fabs(ir->value.f[i]) > 1000000.0f)
fprintf(f, "%e", ir->value.f[i]);
else
fprintf(f, "%f", ir->value.f[i]);
print_float_constant(f, ir->value.f[i]);
break;
case GLSL_TYPE_FLOAT16:
print_float_constant(f, _mesa_half_to_float(ir->value.f16[i]));
break;
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:

Просмотреть файл

@ -49,7 +49,7 @@ public:
virtual ir_visitor_status visit(ir_typedecl_statement* ir)
{
if (!used_structs->has_struct_entry(ir->type_decl))
if (ir->type_decl->is_struct() && !used_structs->has_struct_entry(ir->type_decl))
{
ir->remove();
}

Просмотреть файл

@ -260,8 +260,7 @@ ir_validate::visit_leave(ir_expression *ir)
case ir_unop_abs:
case ir_unop_sign:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT ||
ir->operands[0]->type->is_float() ||
ir->operands[0]->type->is_double() ||
ir->operands[0]->type->is_float_16_32_64() ||
ir->operands[0]->type->base_type == GLSL_TYPE_INT64);
assert(ir->type == ir->operands[0]->type);
break;
@ -269,8 +268,7 @@ ir_validate::visit_leave(ir_expression *ir)
case ir_unop_rcp:
case ir_unop_rsq:
case ir_unop_sqrt:
assert(ir->type->is_float() ||
ir->type->is_double());
assert(ir->type->is_float_16_32_64());
assert(ir->type == ir->operands[0]->type);
break;
@ -279,7 +277,7 @@ ir_validate::visit_leave(ir_expression *ir)
case ir_unop_exp2:
case ir_unop_log2:
case ir_unop_saturate:
assert(ir->operands[0]->type->is_float());
assert(ir->operands[0]->type->is_float_16_32());
assert(ir->type == ir->operands[0]->type);
break;
@ -299,10 +297,19 @@ ir_validate::visit_leave(ir_expression *ir)
assert(ir->operands[0]->type->is_float());
assert(ir->type->is_boolean());
break;
case ir_unop_f162b:
assert(ir->operands[0]->type->base_type ==
GLSL_TYPE_FLOAT16);
assert(ir->type->is_boolean());
break;
case ir_unop_b2f:
assert(ir->operands[0]->type->is_boolean());
assert(ir->type->is_float());
break;
case ir_unop_b2f16:
assert(ir->operands[0]->type->is_boolean());
assert(ir->type->base_type == GLSL_TYPE_FLOAT16);
break;
case ir_unop_i2b:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
assert(ir->type->is_boolean());
@ -441,8 +448,7 @@ ir_validate::visit_leave(ir_expression *ir)
case ir_unop_ceil:
case ir_unop_floor:
case ir_unop_fract:
assert(ir->operands[0]->type->is_float() ||
ir->operands[0]->type->is_double());
assert(ir->operands[0]->type->is_float_16_32_64());
assert(ir->operands[0]->type == ir->type);
break;
case ir_unop_sin:
@ -453,7 +459,7 @@ ir_validate::visit_leave(ir_expression *ir)
case ir_unop_dFdy:
case ir_unop_dFdy_coarse:
case ir_unop_dFdy_fine:
assert(ir->operands[0]->type->is_float());
assert(ir->operands[0]->type->is_float_16_32());
assert(ir->operands[0]->type == ir->type);
break;
@ -551,13 +557,9 @@ ir_validate::visit_leave(ir_expression *ir)
assert(ir->type->base_type == GLSL_TYPE_UINT);
break;
case ir_unop_noise:
/* XXX what can we assert here? */
break;
case ir_unop_interpolate_at_centroid:
assert(ir->operands[0]->type == ir->type);
assert(ir->operands[0]->type->is_float());
assert(ir->operands[0]->type->is_float_16_32());
break;
case ir_unop_get_buffer_size:
@ -579,6 +581,15 @@ ir_validate::visit_leave(ir_expression *ir)
assert(ir->operands[0]->type->is_float());
assert(ir->type->is_double());
break;
case ir_unop_f162f:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT16);
assert(ir->type->is_float());
break;
case ir_unop_f2f16:
case ir_unop_f2fmp:
assert(ir->operands[0]->type->is_float());
assert(ir->type->base_type == GLSL_TYPE_FLOAT16);
break;
case ir_unop_d2i:
assert(ir->operands[0]->type->is_double());
assert(ir->type->base_type == GLSL_TYPE_INT);
@ -601,13 +612,11 @@ ir_validate::visit_leave(ir_expression *ir)
break;
case ir_unop_frexp_sig:
assert(ir->operands[0]->type->is_float() ||
ir->operands[0]->type->is_double());
assert(ir->operands[0]->type->is_float_16_32_64());
assert(ir->type->is_double());
break;
case ir_unop_frexp_exp:
assert(ir->operands[0]->type->is_float() ||
ir->operands[0]->type->is_double());
assert(ir->operands[0]->type->is_float_16_32_64());
assert(ir->type->base_type == GLSL_TYPE_INT);
break;
case ir_unop_subroutine_to_int:
@ -616,8 +625,7 @@ ir_validate::visit_leave(ir_expression *ir)
break;
case ir_unop_atan:
assert(ir->operands[0]->type->is_float() ||
ir->operands[0]->type->is_double());
assert(ir->operands[0]->type->is_float_16_32_64());
assert(ir->type == ir->operands[0]->type);
break;
@ -750,9 +758,9 @@ ir_validate::visit_leave(ir_expression *ir)
case ir_binop_dot:
assert(ir->type == glsl_type::float_type ||
ir->type == glsl_type::double_type);
assert(ir->operands[0]->type->is_float() ||
ir->operands[0]->type->is_double());
ir->type == glsl_type::double_type ||
ir->type == glsl_type::float16_t_type);
assert(ir->operands[0]->type->is_float_16_32_64());
assert(ir->operands[0]->type->is_vector());
assert(ir->operands[0]->type == ir->operands[1]->type);
break;
@ -765,8 +773,7 @@ ir_validate::visit_leave(ir_expression *ir)
case ir_binop_ldexp:
assert(ir->operands[0]->type == ir->type);
assert(ir->operands[0]->type->is_float() ||
ir->operands[0]->type->is_double());
assert(ir->operands[0]->type->is_float_16_32_64());
assert(ir->operands[1]->type->base_type == GLSL_TYPE_INT);
assert(ir->operands[0]->type->components() ==
ir->operands[1]->type->components());
@ -792,27 +799,25 @@ ir_validate::visit_leave(ir_expression *ir)
break;
case ir_binop_atan2:
assert(ir->operands[0]->type->is_float() ||
ir->operands[0]->type->is_double());
assert(ir->operands[0]->type->is_float_16_32_64());
assert(ir->operands[1]->type == ir->operands[0]->type);
assert(ir->type == ir->operands[0]->type);
break;
case ir_triop_fma:
assert(ir->type->is_float() ||
ir->type->is_double());
assert(ir->type->is_float_16_32_64());
assert(ir->type == ir->operands[0]->type);
assert(ir->type == ir->operands[1]->type);
assert(ir->type == ir->operands[2]->type);
break;
case ir_triop_lrp:
assert(ir->operands[0]->type->is_float() ||
ir->operands[0]->type->is_double());
assert(ir->operands[0]->type->is_float_16_32_64());
assert(ir->operands[0]->type == ir->operands[1]->type);
assert(ir->operands[2]->type == ir->operands[0]->type ||
ir->operands[2]->type == glsl_type::float_type ||
ir->operands[2]->type == glsl_type::double_type);
ir->operands[2]->type == glsl_type::double_type ||
ir->operands[2]->type == glsl_type::float16_t_type);
break;
case ir_triop_csel:

Просмотреть файл

@ -1373,6 +1373,21 @@ tfeedback_decl::find_candidate(gl_shader_program *prog,
return this->matched_candidate;
}
/**
* Force a candidate over the previously matched one. It happens when a new
* varying needs to be created to match the xfb declaration, for example,
* to fullfil an alignment criteria.
*/
void
tfeedback_decl::set_lowered_candidate(const tfeedback_candidate *candidate)
{
this->matched_candidate = candidate;
/* The subscript part is no longer relevant */
this->is_subscripted = false;
this->array_subscript = 0;
}
/**
* Parse all the transform feedback declarations that were passed to
@ -1590,7 +1605,9 @@ namespace {
class varying_matches
{
public:
varying_matches(bool disable_varying_packing, bool xfb_enabled,
varying_matches(bool disable_varying_packing,
bool disable_xfb_packing,
bool xfb_enabled,
bool enhanced_layouts_enabled,
gl_shader_stage producer_stage,
gl_shader_stage consumer_stage);
@ -1616,11 +1633,17 @@ private:
*/
const bool disable_varying_packing;
/**
* If true, this driver disables packing for varyings used by transform
* feedback.
*/
const bool disable_xfb_packing;
/**
* If true, this driver has transform feedback enabled. The transform
* feedback code requires at least some packing be done even when varying
* packing is disabled, fortunately where transform feedback requires
* packing it's safe to override the disabled setting. See
* feedback code usually requires at least some packing be done even
* when varying packing is disabled, fortunately where transform feedback
* requires packing it's safe to override the disabled setting. See
* is_varying_packing_safe().
*/
const bool xfb_enabled;
@ -1647,6 +1670,7 @@ private:
static packing_order_enum compute_packing_order(const ir_variable *var);
static int match_comparator(const void *x_generic, const void *y_generic);
static int xfb_comparator(const void *x_generic, const void *y_generic);
static int not_xfb_comparator(const void *x_generic, const void *y_generic);
/**
* Structure recording the relationship between a single producer output
@ -1702,11 +1726,13 @@ private:
} /* anonymous namespace */
varying_matches::varying_matches(bool disable_varying_packing,
bool disable_xfb_packing,
bool xfb_enabled,
bool enhanced_layouts_enabled,
gl_shader_stage producer_stage,
gl_shader_stage consumer_stage)
: disable_varying_packing(disable_varying_packing),
disable_xfb_packing(disable_xfb_packing),
xfb_enabled(xfb_enabled),
enhanced_layouts_enabled(enhanced_layouts_enabled),
producer_stage(producer_stage),
@ -1785,6 +1811,7 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
producer_var->type->contains_double());
if (!disable_varying_packing &&
(!disable_xfb_packing || producer_var == NULL || !producer_var->data.is_xfb) &&
(needs_flat_qualifier ||
(consumer_stage != MESA_SHADER_NONE && consumer_stage != MESA_SHADER_FRAGMENT))) {
/* Since this varying is not being consumed by the fragment shader, its
@ -1850,6 +1877,7 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
this->matches[this->num_matches].packing_order
= this->compute_packing_order(var);
if ((this->disable_varying_packing && !is_varying_packing_safe(type, var)) ||
(this->disable_xfb_packing && var->data.is_xfb) ||
var->data.must_be_shader_input) {
unsigned slots = type->count_attribute_slots(false);
this->matches[this->num_matches].num_components = slots * 4;
@ -1890,19 +1918,29 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
* When packing is disabled the sort orders varyings used by transform
* feedback first, but also depends on *undefined behaviour* of qsort to
* reverse the order of the varyings. See: xfb_comparator().
*
* If packing is only disabled for xfb varyings (mutually exclusive with
* disable_varying_packing), we then group varyings depending on if they
* are captured for transform feedback. The same *undefined behaviour* is
* taken advantage of.
*/
if (!this->disable_varying_packing) {
/* Sort varying matches into an order that makes them easy to pack. */
qsort(this->matches, this->num_matches, sizeof(*this->matches),
&varying_matches::match_comparator);
} else {
if (this->disable_varying_packing) {
/* Only sort varyings that are only used by transform feedback. */
qsort(this->matches, this->num_matches, sizeof(*this->matches),
&varying_matches::xfb_comparator);
} else if (this->disable_xfb_packing) {
/* Only sort varyings that are NOT used by transform feedback. */
qsort(this->matches, this->num_matches, sizeof(*this->matches),
&varying_matches::not_xfb_comparator);
} else {
/* Sort varying matches into an order that makes them easy to pack. */
qsort(this->matches, this->num_matches, sizeof(*this->matches),
&varying_matches::match_comparator);
}
unsigned generic_location = 0;
unsigned generic_patch_location = MAX_VARYING*4;
bool previous_var_xfb = false;
bool previous_var_xfb_only = false;
unsigned previous_packing_class = ~0u;
@ -1939,6 +1977,9 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
* class than the previous one, and we're not already on a slot
* boundary.
*
* Also advance if varying packing is disabled for transform feedback,
* and previous or current varying is used for transform feedback.
*
* Also advance to the next slot if packing is disabled. This makes sure
* we don't assign varyings the same locations which is possible
* because we still pack individual arrays, records and matrices even
@ -1947,6 +1988,8 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
* feedback.
*/
if (var->data.must_be_shader_input ||
(this->disable_xfb_packing &&
(previous_var_xfb || var->data.is_xfb)) ||
(this->disable_varying_packing &&
!(previous_var_xfb_only && var->data.is_xfb_only)) ||
(previous_packing_class != this->matches[i].packing_class) ||
@ -1955,6 +1998,7 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
*location = ALIGN(*location, 4);
}
previous_var_xfb = var->data.is_xfb;
previous_var_xfb_only = var->data.is_xfb_only;
previous_packing_class = this->matches[i].packing_class;
@ -2051,7 +2095,7 @@ varying_matches::store_locations() const
const glsl_type *type =
get_varying_type(producer_var, producer_stage);
if (type->is_array() || type->is_matrix() || type->is_struct() ||
type->is_double()) {
type->is_64bit()) {
unsigned comp_slots = type->component_slots() + offset;
unsigned slots = comp_slots / 4;
if (comp_slots % 4)
@ -2211,6 +2255,32 @@ varying_matches::xfb_comparator(const void *x_generic, const void *y_generic)
}
/**
* Comparison function passed to qsort() to sort varyings NOT used by
* transform feedback when packing of xfb varyings is disabled.
*/
int
varying_matches::not_xfb_comparator(const void *x_generic, const void *y_generic)
{
const match *x = (const match *) x_generic;
if (x->producer_var != NULL && !x->producer_var->data.is_xfb)
return match_comparator(x_generic, y_generic);
/* FIXME: When the comparator returns 0 it means the elements being
* compared are equivalent. However the qsort documentation says:
*
* "The order of equivalent elements is undefined."
*
* In practice the sort ends up reversing the order of the varyings which
* means locations are also assigned in this reversed order and happens to
* be what we want. This is also whats happening in
* varying_matches::match_comparator().
*/
return 0;
}
/**
* Is the given variable a varying variable to be counted against the
* limit in ctx->Const.MaxVarying?
@ -2558,11 +2628,17 @@ assign_varying_locations(struct gl_context *ctx,
/* Transform feedback code assumes varying arrays are packed, so if the
* driver has disabled varying packing, make sure to at least enable
* packing required by transform feedback.
* packing required by transform feedback. See below for exception.
*/
bool xfb_enabled =
ctx->Extensions.EXT_transform_feedback && !unpackable_tess;
/* Some drivers actually requires packing to be explicitly disabled
* for varyings used by transform feedback.
*/
bool disable_xfb_packing =
ctx->Const.DisableTransformFeedbackPacking;
/* Disable packing on outward facing interfaces for SSO because in ES we
* need to retain the unpacked varying information for draw time
* validation.
@ -2577,7 +2653,9 @@ assign_varying_locations(struct gl_context *ctx,
if (prog->SeparateShader && (producer == NULL || consumer == NULL))
disable_varying_packing = true;
varying_matches matches(disable_varying_packing, xfb_enabled,
varying_matches matches(disable_varying_packing,
disable_xfb_packing,
xfb_enabled,
ctx->Extensions.ARB_enhanced_layouts,
producer ? producer->Stage : MESA_SHADER_NONE,
consumer ? consumer->Stage : MESA_SHADER_NONE);
@ -2716,6 +2794,52 @@ assign_varying_locations(struct gl_context *ctx,
return false;
}
/* There are two situations where a new output varying is needed:
*
* - If varying packing is disabled for xfb and the current declaration
* is not aligned within the top level varying (e.g. vec3_arr[1]).
*
* - If a builtin variable needs to be copied to a new variable
* before its content is modified by another lowering pass (e.g.
* \c gl_Position is transformed by \c nir_lower_viewport_transform).
*/
const unsigned dmul =
matched_candidate->type->without_array()->is_64bit() ? 2 : 1;
const bool lowered =
(disable_xfb_packing &&
!tfeedback_decls[i].is_aligned(dmul, matched_candidate->offset)) ||
(matched_candidate->toplevel_var->data.explicit_location &&
matched_candidate->toplevel_var->data.location < VARYING_SLOT_VAR0 &&
(ctx->Const.ShaderCompilerOptions[producer->Stage].LowerBuiltinVariablesXfb &
BITFIELD_BIT(matched_candidate->toplevel_var->data.location)));
if (lowered) {
ir_variable *new_var;
tfeedback_candidate *new_candidate = NULL;
new_var = lower_xfb_varying(mem_ctx, producer, tfeedback_decls[i].name());
if (new_var == NULL) {
ralloc_free(hash_table_ctx);
return false;
}
/* Create new candidate and replace matched_candidate */
new_candidate = rzalloc(mem_ctx, tfeedback_candidate);
new_candidate->toplevel_var = new_var;
new_candidate->toplevel_var->data.is_unmatched_generic_inout = 1;
new_candidate->type = new_var->type;
new_candidate->offset = 0;
_mesa_hash_table_insert(tfeedback_candidates,
ralloc_strdup(mem_ctx, new_var->name),
new_candidate);
tfeedback_decls[i].set_lowered_candidate(new_candidate);
matched_candidate = new_candidate;
}
/* Mark as xfb varying */
matched_candidate->toplevel_var->data.is_xfb = 1;
/* Mark xfb varyings as always active */
matched_candidate->toplevel_var->data.always_active_io = 1;
@ -2732,8 +2856,10 @@ assign_varying_locations(struct gl_context *ctx,
consumer_inputs,
consumer_interface_inputs,
consumer_inputs_with_locations);
if (input_var)
if (input_var) {
input_var->data.is_xfb = 1;
input_var->data.always_active_io = 1;
}
if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) {
matched_candidate->toplevel_var->data.is_xfb_only = 1;
@ -2804,13 +2930,13 @@ assign_varying_locations(struct gl_context *ctx,
if (producer) {
lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_out,
0, producer, disable_varying_packing,
xfb_enabled);
disable_xfb_packing, xfb_enabled);
}
if (consumer) {
lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_in,
consumer_vertices, consumer,
disable_varying_packing, xfb_enabled);
consumer_vertices, consumer, disable_varying_packing,
disable_xfb_packing, xfb_enabled);
}
return true;

Просмотреть файл

@ -104,6 +104,7 @@ public:
const void *mem_ctx) const;
const tfeedback_candidate *find_candidate(gl_shader_program *prog,
hash_table *tfeedback_candidates);
void set_lowered_candidate(const tfeedback_candidate *candidate);
bool is_next_buffer_separator() const
{
@ -123,6 +124,11 @@ public:
return !this->next_buffer_separator && !this->skip_components;
}
bool is_aligned(unsigned dmul, unsigned offset) const
{
return (dmul * (this->array_subscript + offset)) % 4 == 0;
}
const char *name() const
{
return this->orig_name;

Просмотреть файл

@ -86,7 +86,7 @@
#include "util/u_string.h"
#include "util/u_math.h"
#include "main/imports.h"
#include "main/shaderobj.h"
#include "main/enums.h"
#include "main/mtypes.h"
@ -260,6 +260,8 @@ public:
class array_resize_visitor : public deref_type_updater {
public:
using deref_type_updater::visit;
unsigned num_vertices;
gl_shader_program *prog;
gl_shader_stage stage;
@ -1511,6 +1513,8 @@ move_non_declarations(exec_list *instructions, exec_node *last,
*/
class array_sizing_visitor : public deref_type_updater {
public:
using deref_type_updater::visit;
array_sizing_visitor()
: mem_ctx(ralloc_context(NULL)),
unnamed_interfaces(_mesa_pointer_hash_table_create(NULL))
@ -1817,6 +1821,40 @@ link_bindless_layout_qualifiers(struct gl_shader_program *prog,
}
}
/**
* Check for conflicting viewport_relative settings across shaders, and sets
* the value for the linked shader.
*/
static void
link_layer_viewport_relative_qualifier(struct gl_shader_program *prog,
struct gl_program *gl_prog,
struct gl_shader **shader_list,
unsigned num_shaders)
{
unsigned i;
/* Find first shader with explicit layer declaration */
for (i = 0; i < num_shaders; i++) {
if (shader_list[i]->redeclares_gl_layer) {
gl_prog->info.layer_viewport_relative =
shader_list[i]->layer_viewport_relative;
break;
}
}
/* Now make sure that each subsequent shader's explicit layer declaration
* matches the first one's.
*/
for (; i < num_shaders; i++) {
if (shader_list[i]->redeclares_gl_layer &&
shader_list[i]->layer_viewport_relative !=
gl_prog->info.layer_viewport_relative) {
linker_error(prog, "all gl_Layer redeclarations must have identical "
"viewport_relative settings");
}
}
}
/**
* Performs the cross-validation of tessellation control shader vertices and
* layout qualifiers for the attached tessellation control shaders,
@ -2434,9 +2472,7 @@ link_intrastage_shaders(void *mem_ctx,
/* Create program and attach it to the linked shader */
struct gl_program *gl_prog =
ctx->Driver.NewProgram(ctx,
_mesa_shader_stage_to_program(shader_list[0]->Stage),
prog->Name, false);
ctx->Driver.NewProgram(ctx, shader_list[0]->Stage, prog->Name, false);
if (!gl_prog) {
prog->data->LinkStatus = LINKING_FAILURE;
_mesa_delete_linked_shader(ctx, linked);
@ -2462,6 +2498,8 @@ link_intrastage_shaders(void *mem_ctx,
link_bindless_layout_qualifiers(prog, shader_list, num_shaders);
link_layer_viewport_relative_qualifier(prog, gl_prog, shader_list, num_shaders);
populate_symbol_table(linked, shader_list[0]->symbols);
/* The pointer to the main function in the final linked shader (i.e., the
@ -4406,12 +4444,13 @@ link_and_validate_uniforms(struct gl_context *ctx,
struct gl_shader_program *prog)
{
update_array_sizes(prog);
link_assign_uniform_locations(prog, ctx);
if (prog->data->LinkStatus == LINKING_FAILURE)
return;
if (!ctx->Const.UseNIRGLSLLinker) {
link_assign_uniform_locations(prog, ctx);
if (prog->data->LinkStatus == LINKING_FAILURE)
return;
link_util_calculate_subroutine_compat(prog);
link_util_check_uniform_resources(ctx, prog);
link_util_check_subroutine_resources(prog);

Просмотреть файл

@ -287,3 +287,90 @@ link_util_calculate_subroutine_compat(struct gl_shader_program *prog)
}
}
}
/**
* Recursive part of the public mark_array_elements_referenced function.
*
* The recursion occurs when an entire array-of- is accessed. See the
* implementation for more details.
*
* \param dr List of array_deref_range elements to be
* processed.
* \param count Number of array_deref_range elements to be
* processed.
* \param scale Current offset scale.
* \param linearized_index Current accumulated linearized array index.
*/
void
_mark_array_elements_referenced(const struct array_deref_range *dr,
unsigned count, unsigned scale,
unsigned linearized_index,
BITSET_WORD *bits)
{
/* Walk through the list of array dereferences in least- to
* most-significant order. Along the way, accumulate the current
* linearized offset and the scale factor for each array-of-.
*/
for (unsigned i = 0; i < count; i++) {
if (dr[i].index < dr[i].size) {
linearized_index += dr[i].index * scale;
scale *= dr[i].size;
} else {
/* For each element in the current array, update the count and
* offset, then recurse to process the remaining arrays.
*
* There is some inefficency here if the last eBITSET_WORD *bitslement in the
* array_deref_range list specifies the entire array. In that case,
* the loop will make recursive calls with count == 0. In the call,
* all that will happen is the bit will be set.
*/
for (unsigned j = 0; j < dr[i].size; j++) {
_mark_array_elements_referenced(&dr[i + 1],
count - (i + 1),
scale * dr[i].size,
linearized_index + (j * scale),
bits);
}
return;
}
}
BITSET_SET(bits, linearized_index);
}
/**
* Mark a set of array elements as accessed.
*
* If every \c array_deref_range is for a single index, only a single
* element will be marked. If any \c array_deref_range is for an entire
* array-of-, then multiple elements will be marked.
*
* Items in the \c array_deref_range list appear in least- to
* most-significant order. This is the \b opposite order the indices
* appear in the GLSL shader text. An array access like
*
* x = y[1][i][3];
*
* would appear as
*
* { { 3, n }, { m, m }, { 1, p } }
*
* where n, m, and p are the sizes of the arrays-of-arrays.
*
* The set of marked array elements can later be queried by
* \c ::is_linearized_index_referenced.
*
* \param dr List of array_deref_range elements to be processed.
* \param count Number of array_deref_range elements to be processed.
*/
void
link_util_mark_array_elements_referenced(const struct array_deref_range *dr,
unsigned count, unsigned array_depth,
BITSET_WORD *bits)
{
if (count != array_depth)
return;
_mark_array_elements_referenced(dr, count, 1, 0, bits);
}

Просмотреть файл

@ -24,6 +24,8 @@
#ifndef GLSL_LINKER_UTIL_H
#define GLSL_LINKER_UTIL_H
#include "util/bitset.h"
struct gl_context;
struct gl_shader_program;
struct gl_uniform_storage;
@ -45,6 +47,23 @@ struct empty_uniform_block {
unsigned slots;
};
/**
* Describes an access of an array element or an access of the whole array
*/
struct array_deref_range {
/**
* Index that was accessed.
*
* All valid array indices are less than the size of the array. If index
* is equal to the size of the array, this means the entire array has been
* accessed (e.g., due to use of a non-constant index).
*/
unsigned index;
/** Size of the array. Used for offset calculations. */
unsigned size;
};
void
linker_error(struct gl_shader_program *prog, const char *fmt, ...);
@ -81,6 +100,11 @@ link_util_check_uniform_resources(struct gl_context *ctx,
void
link_util_calculate_subroutine_compat(struct gl_shader_program *prog);
void
link_util_mark_array_elements_referenced(const struct array_deref_range *dr,
unsigned count, unsigned array_depth,
BITSET_WORD *bits);
#ifdef __cplusplus
}
#endif

Просмотреть файл

@ -281,7 +281,7 @@ inline bool exec_node::is_head_sentinel() const
* \param field Name of the field in \c type that is the embedded \c exec_node
*/
#define exec_node_data(type, node, field) \
((type *) (((char *) node) - exec_list_offsetof(type, field, node)))
((type *) (((uintptr_t) node) - exec_list_offsetof(type, field, node)))
#ifdef __cplusplus
struct exec_node;
@ -679,36 +679,44 @@ inline void exec_node::insert_before(exec_list *before)
}
#endif
#define foreach_in_list(__type, __inst, __list) \
for (__type *__inst = (__type *)(__list)->head_sentinel.next; \
!(__inst)->is_tail_sentinel(); \
(__inst) = (__type *)(__inst)->next)
#define exec_node_typed_forward(__node, __type) \
(!exec_node_is_tail_sentinel(__node) ? (__type) (__node) : NULL)
#define foreach_in_list_reverse(__type, __inst, __list) \
for (__type *__inst = (__type *)(__list)->tail_sentinel.prev; \
!(__inst)->is_head_sentinel(); \
(__inst) = (__type *)(__inst)->prev)
#define exec_node_typed_backward(__node, __type) \
(!exec_node_is_head_sentinel(__node) ? (__type) (__node) : NULL)
#define foreach_in_list(__type, __inst, __list) \
for (__type *__inst = exec_node_typed_forward((__list)->head_sentinel.next, __type *); \
(__inst) != NULL; \
(__inst) = exec_node_typed_forward((__inst)->next, __type *))
#define foreach_in_list_reverse(__type, __inst, __list) \
for (__type *__inst = exec_node_typed_backward((__list)->tail_sentinel.prev, __type *); \
(__inst) != NULL; \
(__inst) = exec_node_typed_backward((__inst)->prev, __type *))
/**
* This version is safe even if the current node is removed.
*/
#define foreach_in_list_safe(__type, __node, __list) \
for (__type *__node = (__type *)(__list)->head_sentinel.next, \
*__next = (__type *)__node->next; \
__next != NULL; \
__node = __next, __next = (__type *)__next->next)
*/
#define foreach_in_list_reverse_safe(__type, __node, __list) \
for (__type *__node = (__type *)(__list)->tail_sentinel.prev, \
*__prev = (__type *)__node->prev; \
__prev != NULL; \
__node = __prev, __prev = (__type *)__prev->prev)
#define foreach_in_list_safe(__type, __node, __list) \
for (__type *__node = exec_node_typed_forward((__list)->head_sentinel.next, __type *), \
*__next = (__node) ? exec_node_typed_forward((__list)->head_sentinel.next->next, __type *) : NULL; \
(__node) != NULL; \
(__node) = __next, __next = __next ? exec_node_typed_forward(__next->next, __type *) : NULL)
#define foreach_in_list_reverse_safe(__type, __node, __list) \
for (__type *__node = exec_node_typed_backward((__list)->tail_sentinel.prev, __type *), \
*__prev = (__node) ? exec_node_typed_backward((__list)->tail_sentinel.prev->prev, __type *) : NULL; \
(__node) != NULL; \
(__node) = __prev, __prev = __prev ? exec_node_typed_backward(__prev->prev, __type *) : NULL)
#define foreach_in_list_use_after(__type, __inst, __list) \
__type *__inst; \
for ((__inst) = exec_node_typed_forward((__list)->head_sentinel.next, __type *); \
(__inst) != NULL; \
(__inst) = exec_node_typed_forward((__inst)->next, __type *))
#define foreach_in_list_use_after(__type, __inst, __list) \
__type *__inst; \
for ((__inst) = (__type *)(__list)->head_sentinel.next; \
!(__inst)->is_tail_sentinel(); \
(__inst) = (__type *)(__inst)->next)
/**
* Iterate through two lists at once. Stops at the end of the shorter list.
*
@ -725,39 +733,45 @@ inline void exec_node::insert_before(exec_list *before)
__next1 = __next1->next, \
__next2 = __next2->next)
#define foreach_list_typed(__type, __node, __field, __list) \
for (__type * __node = \
exec_node_data(__type, (__list)->head_sentinel.next, __field); \
(__node)->__field.next != NULL; \
(__node) = exec_node_data(__type, (__node)->__field.next, __field))
#define exec_node_data_forward(type, node, field) \
(!exec_node_is_tail_sentinel(node) ? exec_node_data(type, node, field) : NULL)
#define foreach_list_typed_from(__type, __node, __field, __list, __start) \
for (__type * __node = exec_node_data(__type, (__start), __field); \
(__node)->__field.next != NULL; \
(__node) = exec_node_data(__type, (__node)->__field.next, __field))
#define exec_node_data_backward(type, node, field) \
(!exec_node_is_head_sentinel(node) ? exec_node_data(type, node, field) : NULL)
#define foreach_list_typed_reverse(__type, __node, __field, __list) \
for (__type * __node = \
exec_node_data(__type, (__list)->tail_sentinel.prev, __field); \
(__node)->__field.prev != NULL; \
(__node) = exec_node_data(__type, (__node)->__field.prev, __field))
#define foreach_list_typed(__type, __node, __field, __list) \
for (__type * __node = \
exec_node_data_forward(__type, (__list)->head_sentinel.next, __field); \
(__node) != NULL; \
(__node) = exec_node_data_forward(__type, (__node)->__field.next, __field))
#define foreach_list_typed_safe(__type, __node, __field, __list) \
for (__type * __node = \
exec_node_data(__type, (__list)->head_sentinel.next, __field), \
* __next = \
exec_node_data(__type, (__node)->__field.next, __field); \
(__node)->__field.next != NULL; \
__node = __next, __next = \
exec_node_data(__type, (__next)->__field.next, __field))
#define foreach_list_typed_from(__type, __node, __field, __list, __start) \
for (__type * __node = exec_node_data_forward(__type, (__start), __field); \
(__node) != NULL; \
(__node) = exec_node_data_forward(__type, (__node)->__field.next, __field))
#define foreach_list_typed_reverse_safe(__type, __node, __field, __list) \
for (__type * __node = \
exec_node_data(__type, (__list)->tail_sentinel.prev, __field), \
* __prev = \
exec_node_data(__type, (__node)->__field.prev, __field); \
(__node)->__field.prev != NULL; \
__node = __prev, __prev = \
exec_node_data(__type, (__prev)->__field.prev, __field))
#define foreach_list_typed_reverse(__type, __node, __field, __list) \
for (__type * __node = \
exec_node_data_backward(__type, (__list)->tail_sentinel.prev, __field); \
(__node) != NULL; \
(__node) = exec_node_data_backward(__type, (__node)->__field.prev, __field))
#define foreach_list_typed_safe(__type, __node, __field, __list) \
for (__type * __node = \
exec_node_data_forward(__type, (__list)->head_sentinel.next, __field), \
* __next = (__node) ? \
exec_node_data_forward(__type, (__node)->__field.next, __field) : NULL; \
(__node) != NULL; \
(__node) = __next, __next = (__next && (__next)->__field.next) ? \
exec_node_data_forward(__type, (__next)->__field.next, __field) : NULL)
#define foreach_list_typed_reverse_safe(__type, __node, __field, __list) \
for (__type * __node = \
exec_node_data_backward(__type, (__list)->tail_sentinel.prev, __field), \
* __prev = (__node) ? \
exec_node_data_backward(__type, (__node)->__field.prev, __field) : NULL; \
(__node) != NULL; \
(__node) = __prev, __prev = (__prev && (__prev)->__field.prev) ? \
exec_node_data_backward(__type, (__prev)->__field.prev, __field) : NULL)
#endif /* LIST_CONTAINER_H */

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright © 2010 Intel Corporation
* Copyright © 2019 Google, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -22,50 +22,43 @@
*/
/**
* \file lower_noise.cpp
* IR lower pass to remove noise opcodes.
* \file lower_builtins.cpp
*
* \author Ian Romanick <ian.d.romanick@intel.com>
* Inline calls to builtin functions.
*/
#include "ir.h"
#include "ir_rvalue_visitor.h"
#include "ir_optimization.h"
class lower_noise_visitor : public ir_rvalue_visitor {
namespace {
class lower_builtins_visitor : public ir_hierarchical_visitor {
public:
lower_noise_visitor() : progress(false)
{
/* empty */
}
void handle_rvalue(ir_rvalue **rvalue)
{
if (!*rvalue)
return;
ir_expression *expr = (*rvalue)->as_expression();
if (!expr)
return;
/* In the future, ir_unop_noise may be replaced by a call to a function
* that implements noise. No hardware has a noise instruction.
*/
if (expr->operation == ir_unop_noise) {
*rvalue = ir_constant::zero(ralloc_parent(expr), expr->type);
this->progress = true;
}
}
lower_builtins_visitor() : progress(false) { }
ir_visitor_status visit_leave(ir_call *);
bool progress;
};
}
bool
lower_noise(exec_list *instructions)
lower_builtins(exec_list *instructions)
{
lower_noise_visitor v;
lower_builtins_visitor v;
visit_list_elements(&v, instructions);
return v.progress;
}
ir_visitor_status
lower_builtins_visitor::visit_leave(ir_call *ir)
{
if (!ir->callee->is_builtin())
return visit_continue;
ir->generate_inline(ir);
ir->remove();
this->progress = true;
return visit_continue;
}

Просмотреть файл

@ -63,7 +63,8 @@
* reciprocal. By breaking the operation down, constant reciprocals
* can get constant folded.
*
* FDIV_TO_MUL_RCP only lowers single-precision floating point division;
* FDIV_TO_MUL_RCP lowers single-precision and half-precision
* floating point division;
* DDIV_TO_MUL_RCP only lowers double-precision floating point division.
* DIV_TO_MUL_RCP is a convenience macro that sets both flags.
* INT_DIV_TO_MUL_RCP handles the integer case, converting to and from floating
@ -123,6 +124,7 @@
#include "ir.h"
#include "ir_builder.h"
#include "ir_optimization.h"
#include "util/half_float.h"
using namespace ir_builder;
@ -172,6 +174,11 @@ private:
void mul64_to_mul_and_mul_high(ir_expression *ir);
ir_expression *_carry(operand a, operand b);
static ir_constant *_imm_fp(void *mem_ctx,
const glsl_type *type,
double f,
unsigned vector_elements=1);
};
} /* anonymous namespace */
@ -203,7 +210,7 @@ lower_instructions_visitor::sub_to_add_neg(ir_expression *ir)
void
lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
{
assert(ir->operands[1]->type->is_float() || ir->operands[1]->type->is_double());
assert(ir->operands[1]->type->is_float_16_32_64());
/* New expression for the 1.0 / op1 */
ir_rvalue *expr;
@ -273,7 +280,7 @@ lower_instructions_visitor::int_div_to_mul_rcp(ir_expression *ir)
void
lower_instructions_visitor::exp_to_exp2(ir_expression *ir)
{
ir_constant *log2_e = new(ir) ir_constant(float(M_LOG2E));
ir_constant *log2_e = _imm_fp(ir, ir->type, M_LOG2E);
ir->operation = ir_unop_exp2;
ir->init_num_operands();
@ -304,7 +311,7 @@ lower_instructions_visitor::log_to_log2(ir_expression *ir)
ir->init_num_operands();
ir->operands[0] = new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
ir->operands[0], NULL);
ir->operands[1] = new(ir) ir_constant(float(1.0 / M_LOG2E));
ir->operands[1] = _imm_fp(ir, ir->operands[0]->type, 1.0 / M_LOG2E);
this->progress = true;
}
@ -336,7 +343,7 @@ lower_instructions_visitor::mod_to_floor(ir_expression *ir)
/* Don't generate new IR that would need to be lowered in an additional
* pass.
*/
if ((lowering(FDIV_TO_MUL_RCP) && ir->type->is_float()) ||
if ((lowering(FDIV_TO_MUL_RCP) && ir->type->is_float_16_32()) ||
(lowering(DDIV_TO_MUL_RCP) && ir->type->is_double()))
div_to_mul_rcp(div_expr);
@ -837,10 +844,11 @@ lower_instructions_visitor::sat_to_clamp(ir_expression *ir)
ir->operation = ir_binop_min;
ir->init_num_operands();
ir_constant *zero = _imm_fp(ir, ir->operands[0]->type, 0.0);
ir->operands[0] = new(ir) ir_expression(ir_binop_max, ir->operands[0]->type,
ir->operands[0],
new(ir) ir_constant(0.0f));
ir->operands[1] = new(ir) ir_constant(1.0f);
ir->operands[0], zero);
ir->operands[1] = _imm_fp(ir, ir->operands[0]->type, 1.0);
this->progress = true;
}
@ -1515,6 +1523,25 @@ lower_instructions_visitor::_carry(operand a, operand b)
return carry(a, b);
}
ir_constant *
lower_instructions_visitor::_imm_fp(void *mem_ctx,
const glsl_type *type,
double f,
unsigned vector_elements)
{
switch (type->base_type) {
case GLSL_TYPE_FLOAT:
return new(mem_ctx) ir_constant((float) f, vector_elements);
case GLSL_TYPE_DOUBLE:
return new(mem_ctx) ir_constant((double) f, vector_elements);
case GLSL_TYPE_FLOAT16:
return new(mem_ctx) ir_constant(float16_t(f), vector_elements);
default:
assert(!"unknown float type for immediate");
return NULL;
}
}
void
lower_instructions_visitor::imul_high_to_mul(ir_expression *ir)
{
@ -1747,7 +1774,7 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
case ir_binop_div:
if (ir->operands[1]->type->is_integer_32() && lowering(INT_DIV_TO_MUL_RCP))
int_div_to_mul_rcp(ir);
else if ((ir->operands[1]->type->is_float() && lowering(FDIV_TO_MUL_RCP)) ||
else if ((ir->operands[1]->type->is_float_16_32() && lowering(FDIV_TO_MUL_RCP)) ||
(ir->operands[1]->type->is_double() && lowering(DDIV_TO_MUL_RCP)))
div_to_mul_rcp(ir);
break;
@ -1763,7 +1790,7 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
break;
case ir_binop_mod:
if (lowering(MOD_TO_FLOOR) && (ir->type->is_float() || ir->type->is_double()))
if (lowering(MOD_TO_FLOOR) && ir->type->is_float_16_32_64())
mod_to_floor(ir);
break;

Просмотреть файл

@ -268,6 +268,8 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor {
* contains the jump.
*/
using ir_control_flow_visitor::visit;
bool progress;
struct function_record function;

Просмотреть файл

@ -360,6 +360,9 @@ ir_mat_op_to_vec_visitor::visit_leave(ir_assignment *orig_assign)
switch (orig_expr->operation) {
case ir_unop_d2f:
case ir_unop_f2d:
case ir_unop_f2f16:
case ir_unop_f2fmp:
case ir_unop_f162f:
case ir_unop_neg: {
/* Apply the operation to each column.*/
for (i = 0; i < matrix_columns; i++) {

Просмотреть файл

@ -173,6 +173,7 @@ public:
exec_list *out_instructions,
exec_list *out_variables,
bool disable_varying_packing,
bool disable_xfb_packing,
bool xfb_enabled);
void run(struct gl_linked_shader *shader);
@ -240,6 +241,7 @@ private:
exec_list *out_variables;
bool disable_varying_packing;
bool disable_xfb_packing;
bool xfb_enabled;
};
@ -250,7 +252,7 @@ lower_packed_varyings_visitor::lower_packed_varyings_visitor(
ir_variable_mode mode,
unsigned gs_input_vertices, exec_list *out_instructions,
exec_list *out_variables, bool disable_varying_packing,
bool xfb_enabled)
bool disable_xfb_packing, bool xfb_enabled)
: mem_ctx(mem_ctx),
locations_used(locations_used),
components(components),
@ -262,6 +264,7 @@ lower_packed_varyings_visitor::lower_packed_varyings_visitor(
out_instructions(out_instructions),
out_variables(out_variables),
disable_varying_packing(disable_varying_packing),
disable_xfb_packing(disable_xfb_packing),
xfb_enabled(xfb_enabled)
{
}
@ -769,12 +772,21 @@ lower_packed_varyings_visitor::needs_lowering(ir_variable *var)
if (var->data.explicit_location || var->data.must_be_shader_input)
return false;
const glsl_type *type = var->type;
/* Some drivers (e.g. panfrost) don't support packing of transform
* feedback varyings.
*/
if (disable_xfb_packing && var->data.is_xfb &&
!(type->is_array() || type->is_struct() || type->is_matrix()) &&
xfb_enabled)
return false;
/* Override disable_varying_packing if the var is only used by transform
* feedback. Also override it if transform feedback is enabled and the
* variable is an array, struct or matrix as the elements of these types
* will always have the same interpolation and therefore are safe to pack.
*/
const glsl_type *type = var->type;
if (disable_varying_packing && !var->data.is_xfb_only &&
!((type->is_array() || type->is_struct() || type->is_matrix()) &&
xfb_enabled))
@ -874,7 +886,7 @@ lower_packed_varyings(void *mem_ctx, unsigned locations_used,
const uint8_t *components,
ir_variable_mode mode, unsigned gs_input_vertices,
gl_linked_shader *shader, bool disable_varying_packing,
bool xfb_enabled)
bool disable_xfb_packing, bool xfb_enabled)
{
exec_list *instructions = shader->ir;
ir_function *main_func = shader->symbols->get_function("main");
@ -890,6 +902,7 @@ lower_packed_varyings(void *mem_ctx, unsigned locations_used,
&new_instructions,
&new_variables,
disable_varying_packing,
disable_xfb_packing,
xfb_enabled);
visitor.run(shader);
if (mode == ir_var_shader_out) {

Просмотреть файл

@ -0,0 +1,721 @@
/*
* Copyright © 2019 Google, Inc
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* \file lower_precision.cpp
*/
#include "main/macros.h"
#include "compiler/glsl_types.h"
#include "ir.h"
#include "ir_builder.h"
#include "ir_optimization.h"
#include "ir_rvalue_visitor.h"
#include "util/half_float.h"
#include "util/set.h"
#include "util/hash_table.h"
#include <vector>
namespace {
class find_precision_visitor : public ir_rvalue_enter_visitor {
public:
find_precision_visitor();
~find_precision_visitor();
virtual void handle_rvalue(ir_rvalue **rvalue);
virtual ir_visitor_status visit_enter(ir_call *ir);
ir_function_signature *map_builtin(ir_function_signature *sig);
bool progress;
/* Set of rvalues that can be lowered. This will be filled in by
* find_lowerable_rvalues_visitor. Only the root node of a lowerable section
* will be added to this set.
*/
struct set *lowerable_rvalues;
/**
* A mapping of builtin signature functions to lowered versions. This is
* filled in lazily when a lowered version is needed.
*/
struct hash_table *lowered_builtins;
/**
* A temporary hash table only used in order to clone functions.
*/
struct hash_table *clone_ht;
void *lowered_builtin_mem_ctx;
};
class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor {
public:
enum can_lower_state {
UNKNOWN,
CANT_LOWER,
SHOULD_LOWER,
};
enum parent_relation {
/* The parent performs a further operation involving the result from the
* child and can be lowered along with it.
*/
COMBINED_OPERATION,
/* The parent instructions operation is independent of the child type so
* the child should be lowered separately.
*/
INDEPENDENT_OPERATION,
};
struct stack_entry {
ir_instruction *instr;
enum can_lower_state state;
/* List of child rvalues that can be lowered. When this stack entry is
* popped, if this node itself cant be lowered than all of the children
* are root nodes to lower so we will add them to lowerable_rvalues.
* Otherwise if this node can also be lowered then we wont add the
* children because we only want to add the topmost lowerable nodes to
* lowerable_rvalues and the children will be lowered as part of lowering
* this node.
*/
std::vector<ir_instruction *> lowerable_children;
};
find_lowerable_rvalues_visitor(struct set *result);
static void stack_enter(class ir_instruction *ir, void *data);
static void stack_leave(class ir_instruction *ir, void *data);
virtual ir_visitor_status visit(ir_constant *ir);
virtual ir_visitor_status visit(ir_dereference_variable *ir);
virtual ir_visitor_status visit_enter(ir_dereference_record *ir);
virtual ir_visitor_status visit_enter(ir_dereference_array *ir);
virtual ir_visitor_status visit_enter(ir_texture *ir);
virtual ir_visitor_status visit_enter(ir_expression *ir);
virtual ir_visitor_status visit_leave(ir_assignment *ir);
virtual ir_visitor_status visit_leave(ir_call *ir);
static can_lower_state handle_precision(const glsl_type *type,
int precision);
static parent_relation get_parent_relation(ir_instruction *parent,
ir_instruction *child);
std::vector<stack_entry> stack;
struct set *lowerable_rvalues;
void pop_stack_entry();
void add_lowerable_children(const stack_entry &entry);
};
class lower_precision_visitor : public ir_rvalue_visitor {
public:
virtual void handle_rvalue(ir_rvalue **rvalue);
virtual ir_visitor_status visit_enter(ir_dereference_array *);
virtual ir_visitor_status visit_enter(ir_dereference_record *);
virtual ir_visitor_status visit_enter(ir_call *ir);
virtual ir_visitor_status visit_enter(ir_texture *ir);
virtual ir_visitor_status visit_leave(ir_expression *);
};
bool
can_lower_type(const glsl_type *type)
{
/* Dont lower any expressions involving non-float types except bool and
* texture samplers. This will rule out operations that change the type such
* as conversion to ints. Instead it will end up lowering the arguments
* instead and adding a final conversion to float32. We want to handle
* boolean types so that it will do comparisons as 16-bit.
*/
switch (type->base_type) {
case GLSL_TYPE_FLOAT:
case GLSL_TYPE_BOOL:
case GLSL_TYPE_SAMPLER:
return true;
default:
return false;
}
}
find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res)
{
lowerable_rvalues = res;
callback_enter = stack_enter;
callback_leave = stack_leave;
data_enter = this;
data_leave = this;
}
void
find_lowerable_rvalues_visitor::stack_enter(class ir_instruction *ir,
void *data)
{
find_lowerable_rvalues_visitor *state =
(find_lowerable_rvalues_visitor *) data;
/* Add a new stack entry for this instruction */
stack_entry entry;
entry.instr = ir;
entry.state = state->in_assignee ? CANT_LOWER : UNKNOWN;
state->stack.push_back(entry);
}
void
find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry &entry)
{
/* We cant lower this node so if there were any pending children then they
* are all root lowerable nodes and we should add them to the set.
*/
for (auto &it : entry.lowerable_children)
_mesa_set_add(lowerable_rvalues, it);
}
void
find_lowerable_rvalues_visitor::pop_stack_entry()
{
const stack_entry &entry = stack.back();
if (stack.size() >= 2) {
/* Combine this state into the parent state, unless the parent operation
* doesnt have any relation to the child operations
*/
stack_entry &parent = stack.end()[-2];
parent_relation rel = get_parent_relation(parent.instr, entry.instr);
if (rel == COMBINED_OPERATION) {
switch (entry.state) {
case CANT_LOWER:
parent.state = CANT_LOWER;
break;
case SHOULD_LOWER:
if (parent.state == UNKNOWN)
parent.state = SHOULD_LOWER;
break;
case UNKNOWN:
break;
}
}
}
if (entry.state == SHOULD_LOWER) {
ir_rvalue *rv = entry.instr->as_rvalue();
if (rv == NULL) {
add_lowerable_children(entry);
} else if (stack.size() >= 2) {
stack_entry &parent = stack.end()[-2];
switch (get_parent_relation(parent.instr, rv)) {
case COMBINED_OPERATION:
/* We only want to add the toplevel lowerable instructions to the
* lowerable set. Therefore if there is a parent then instead of
* adding this instruction to the set we will queue depending on
* the result of the parent instruction.
*/
parent.lowerable_children.push_back(entry.instr);
break;
case INDEPENDENT_OPERATION:
_mesa_set_add(lowerable_rvalues, rv);
break;
}
} else {
/* This is a toplevel node so add it directly to the lowerable
* set.
*/
_mesa_set_add(lowerable_rvalues, rv);
}
} else if (entry.state == CANT_LOWER) {
add_lowerable_children(entry);
}
stack.pop_back();
}
void
find_lowerable_rvalues_visitor::stack_leave(class ir_instruction *ir,
void *data)
{
find_lowerable_rvalues_visitor *state =
(find_lowerable_rvalues_visitor *) data;
state->pop_stack_entry();
}
enum find_lowerable_rvalues_visitor::can_lower_state
find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type,
int precision)
{
if (!can_lower_type(type))
return CANT_LOWER;
switch (precision) {
case GLSL_PRECISION_NONE:
return UNKNOWN;
case GLSL_PRECISION_HIGH:
return CANT_LOWER;
case GLSL_PRECISION_MEDIUM:
case GLSL_PRECISION_LOW:
return SHOULD_LOWER;
}
return CANT_LOWER;
}
enum find_lowerable_rvalues_visitor::parent_relation
find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction *parent,
ir_instruction *child)
{
/* If the parent is a dereference instruction then the only child could be
* for example an array dereference and that should be lowered independently
* of the parent.
*/
if (parent->as_dereference())
return INDEPENDENT_OPERATION;
/* The precision of texture sampling depend on the precision of the sampler.
* The rest of the arguments dont matter so we can treat it as an
* independent operation.
*/
if (parent->as_texture())
return INDEPENDENT_OPERATION;
return COMBINED_OPERATION;
}
ir_visitor_status
find_lowerable_rvalues_visitor::visit(ir_constant *ir)
{
stack_enter(ir, this);
if (!can_lower_type(ir->type))
stack.back().state = CANT_LOWER;
stack_leave(ir, this);
return visit_continue;
}
ir_visitor_status
find_lowerable_rvalues_visitor::visit(ir_dereference_variable *ir)
{
stack_enter(ir, this);
if (stack.back().state == UNKNOWN)
stack.back().state = handle_precision(ir->type, ir->precision());
stack_leave(ir, this);
return visit_continue;
}
ir_visitor_status
find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record *ir)
{
ir_hierarchical_visitor::visit_enter(ir);
if (stack.back().state == UNKNOWN)
stack.back().state = handle_precision(ir->type, ir->precision());
return visit_continue;
}
ir_visitor_status
find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array *ir)
{
ir_hierarchical_visitor::visit_enter(ir);
if (stack.back().state == UNKNOWN)
stack.back().state = handle_precision(ir->type, ir->precision());
return visit_continue;
}
ir_visitor_status
find_lowerable_rvalues_visitor::visit_enter(ir_texture *ir)
{
ir_hierarchical_visitor::visit_enter(ir);
if (stack.back().state == UNKNOWN) {
/* The precision of the sample value depends on the precision of the
* sampler.
*/
stack.back().state = handle_precision(ir->type,
ir->sampler->precision());
}
return visit_continue;
}
ir_visitor_status
find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir)
{
ir_hierarchical_visitor::visit_enter(ir);
if (!can_lower_type(ir->type))
stack.back().state = CANT_LOWER;
/* Don't lower precision for derivative calculations */
if (ir->operation == ir_unop_dFdx ||
ir->operation == ir_unop_dFdx_coarse ||
ir->operation == ir_unop_dFdx_fine ||
ir->operation == ir_unop_dFdy ||
ir->operation == ir_unop_dFdy_coarse ||
ir->operation == ir_unop_dFdy_fine) {
stack.back().state = CANT_LOWER;
}
return visit_continue;
}
static bool
is_lowerable_builtin(ir_call *ir,
const struct set *lowerable_rvalues)
{
if (!ir->callee->is_builtin())
return false;
assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
if (!param->as_constant() &&
_mesa_set_search(lowerable_rvalues, param) == NULL)
return false;
}
return true;
}
ir_visitor_status
find_lowerable_rvalues_visitor::visit_leave(ir_call *ir)
{
ir_hierarchical_visitor::visit_leave(ir);
/* Special case for handling temporary variables generated by the compiler
* for function calls. If we assign to one of these using a function call
* that has a lowerable return type then we can assume the temporary
* variable should have a medium precision too.
*/
/* Do nothing if the return type is void. */
if (!ir->return_deref)
return visit_continue;
ir_variable *var = ir->return_deref->variable_referenced();
assert(var->data.mode == ir_var_temporary);
unsigned return_precision = ir->callee->return_precision;
/* If the call is to a builtin, then the function wont have a return
* precision and we should determine it from the precision of the arguments.
*/
if (is_lowerable_builtin(ir, lowerable_rvalues))
return_precision = GLSL_PRECISION_MEDIUM;
can_lower_state lower_state =
handle_precision(var->type, return_precision);
if (lower_state == SHOULD_LOWER) {
/* There probably shouldnt be any situations where multiple ir_call
* instructions write to the same temporary?
*/
assert(var->data.precision == GLSL_PRECISION_NONE);
var->data.precision = GLSL_PRECISION_MEDIUM;
} else {
var->data.precision = GLSL_PRECISION_HIGH;
}
return visit_continue;
}
ir_visitor_status
find_lowerable_rvalues_visitor::visit_leave(ir_assignment *ir)
{
ir_hierarchical_visitor::visit_leave(ir);
/* Special case for handling temporary variables generated by the compiler.
* If we assign to one of these using a lowered precision then we can assume
* the temporary variable should have a medium precision too.
*/
ir_variable *var = ir->lhs->variable_referenced();
if (var->data.mode == ir_var_temporary) {
if (_mesa_set_search(lowerable_rvalues, ir->rhs)) {
/* Only override the precision if this is the first assignment. For
* temporaries such as the ones generated for the ?: operator there
* can be multiple assignments with different precisions. This way we
* get the highest precision of all of the assignments.
*/
if (var->data.precision == GLSL_PRECISION_NONE)
var->data.precision = GLSL_PRECISION_MEDIUM;
} else if (!ir->rhs->as_constant()) {
var->data.precision = GLSL_PRECISION_HIGH;
}
}
return visit_continue;
}
void
find_lowerable_rvalues(exec_list *instructions,
struct set *result)
{
find_lowerable_rvalues_visitor v(result);
visit_list_elements(&v, instructions);
assert(v.stack.empty());
}
static ir_rvalue *
convert_precision(int op, ir_rvalue *ir)
{
unsigned base_type = (op == ir_unop_f2fmp ?
GLSL_TYPE_FLOAT16 : GLSL_TYPE_FLOAT);
const glsl_type *desired_type;
desired_type = glsl_type::get_instance(base_type,
ir->type->vector_elements,
ir->type->matrix_columns);
void *mem_ctx = ralloc_parent(ir);
return new(mem_ctx) ir_expression(op, desired_type, ir, NULL);
}
void
lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
{
ir_rvalue *ir = *rvalue;
if (ir == NULL)
return;
if (ir->as_dereference()) {
if (!ir->type->is_boolean())
*rvalue = convert_precision(ir_unop_f2fmp, ir);
} else if (ir->type->is_float()) {
ir->type = glsl_type::get_instance(GLSL_TYPE_FLOAT16,
ir->type->vector_elements,
ir->type->matrix_columns,
ir->type->explicit_stride,
ir->type->interface_row_major);
ir_constant *const_ir = ir->as_constant();
if (const_ir) {
ir_constant_data value;
for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]);
const_ir->value = value;
}
}
}
ir_visitor_status
lower_precision_visitor::visit_enter(ir_dereference_record *ir)
{
/* We dont want to lower the variable */
return visit_continue_with_parent;
}
ir_visitor_status
lower_precision_visitor::visit_enter(ir_dereference_array *ir)
{
/* We dont want to convert the array index or the variable. If the array
* index itself is lowerable that will be handled separately.
*/
return visit_continue_with_parent;
}
ir_visitor_status
lower_precision_visitor::visit_enter(ir_call *ir)
{
/* We dont want to convert the arguments. These will be handled separately.
*/
return visit_continue_with_parent;
}
ir_visitor_status
lower_precision_visitor::visit_enter(ir_texture *ir)
{
/* We dont want to convert the arguments. These will be handled separately.
*/
return visit_continue_with_parent;
}
ir_visitor_status
lower_precision_visitor::visit_leave(ir_expression *ir)
{
ir_rvalue_visitor::visit_leave(ir);
/* If the expression is a conversion operation to or from bool then fix the
* operation.
*/
switch (ir->operation) {
case ir_unop_b2f:
ir->operation = ir_unop_b2f16;
break;
case ir_unop_f2b:
ir->operation = ir_unop_f162b;
break;
default:
break;
}
return visit_continue;
}
void
find_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
{
/* Checking the precision of rvalue can be lowered first throughout
* find_lowerable_rvalues_visitor.
* Once it found the precision of rvalue can be lowered, then we can
* add conversion f2fmp through lower_precision_visitor.
*/
if (*rvalue == NULL)
return;
struct set_entry *entry = _mesa_set_search(lowerable_rvalues, *rvalue);
if (!entry)
return;
_mesa_set_remove(lowerable_rvalues, entry);
/* If the entire expression is just a variable dereference then trying to
* lower it will just directly add pointless to and from conversions without
* any actual operation in-between. Although these will eventually get
* optimised out, avoiding generating them here also avoids breaking inout
* parameters to functions.
*/
if ((*rvalue)->as_dereference())
return;
lower_precision_visitor v;
(*rvalue)->accept(&v);
v.handle_rvalue(rvalue);
/* We dont need to add the final conversion if the final type has been
* converted to bool
*/
if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL)
*rvalue = convert_precision(ir_unop_f162f, *rvalue);
progress = true;
}
ir_visitor_status
find_precision_visitor::visit_enter(ir_call *ir)
{
ir_rvalue_enter_visitor::visit_enter(ir);
/* If this is a call to a builtin and the find_lowerable_rvalues_visitor
* overrode the precision of the temporary return variable, then we can
* replace the builtin implementation with a lowered version.
*/
if (!ir->callee->is_builtin() ||
ir->return_deref == NULL ||
ir->return_deref->variable_referenced()->data.precision !=
GLSL_PRECISION_MEDIUM)
return visit_continue;
ir->callee = map_builtin(ir->callee);
ir->generate_inline(ir);
ir->remove();
return visit_continue_with_parent;
}
ir_function_signature *
find_precision_visitor::map_builtin(ir_function_signature *sig)
{
if (lowered_builtins == NULL) {
lowered_builtins = _mesa_pointer_hash_table_create(NULL);
clone_ht =_mesa_pointer_hash_table_create(NULL);
lowered_builtin_mem_ctx = ralloc_context(NULL);
} else {
struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig);
if (entry)
return (ir_function_signature *) entry->data;
}
ir_function_signature *lowered_sig =
sig->clone(lowered_builtin_mem_ctx, clone_ht);
foreach_in_list(ir_variable, param, &lowered_sig->parameters) {
param->data.precision = GLSL_PRECISION_MEDIUM;
}
lower_precision(&lowered_sig->body);
_mesa_hash_table_clear(clone_ht, NULL);
_mesa_hash_table_insert(lowered_builtins, sig, lowered_sig);
return lowered_sig;
}
find_precision_visitor::find_precision_visitor()
: progress(false),
lowerable_rvalues(_mesa_pointer_set_create(NULL)),
lowered_builtins(NULL),
clone_ht(NULL),
lowered_builtin_mem_ctx(NULL)
{
}
find_precision_visitor::~find_precision_visitor()
{
_mesa_set_destroy(lowerable_rvalues, NULL);
if (lowered_builtins) {
_mesa_hash_table_destroy(lowered_builtins, NULL);
_mesa_hash_table_destroy(clone_ht, NULL);
ralloc_free(lowered_builtin_mem_ctx);
}
}
}
bool
lower_precision(exec_list *instructions)
{
find_precision_visitor v;
find_lowerable_rvalues(instructions, v.lowerable_rvalues);
visit_list_elements(&v, instructions);
return v.progress;
}

Просмотреть файл

@ -0,0 +1,222 @@
/*
* Copyright ©2019 Collabora Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* \file lower_xfb_varying.cpp
*
*/
#include "ir.h"
#include "main/mtypes.h"
#include "glsl_symbol_table.h"
#include "util/strndup.h"
namespace {
/**
* Visitor that splices varying packing code before every return.
*/
class lower_xfb_var_splicer : public ir_hierarchical_visitor
{
public:
explicit lower_xfb_var_splicer(void *mem_ctx,
const exec_list *instructions);
virtual ir_visitor_status visit_leave(ir_return *ret);
virtual ir_visitor_status visit_leave(ir_function_signature *sig);
private:
/**
* Memory context used to allocate new instructions for the shader.
*/
void * const mem_ctx;
/**
* Instructions that should be spliced into place before each return.
*/
const exec_list *instructions;
};
} /* anonymous namespace */
lower_xfb_var_splicer::lower_xfb_var_splicer(void *mem_ctx, const exec_list *instructions)
: mem_ctx(mem_ctx), instructions(instructions)
{
}
ir_visitor_status
lower_xfb_var_splicer::visit_leave(ir_return *ret)
{
foreach_in_list(ir_instruction, ir, this->instructions) {
ret->insert_before(ir->clone(this->mem_ctx, NULL));
}
return visit_continue;
}
/** Insert a copy-back assignment at the end of the main() function */
ir_visitor_status
lower_xfb_var_splicer::visit_leave(ir_function_signature *sig)
{
if (strcmp(sig->function_name(), "main") != 0)
return visit_continue;
if (((ir_instruction*)sig->body.get_tail())->ir_type == ir_type_return)
return visit_continue;
foreach_in_list(ir_instruction, ir, this->instructions) {
sig->body.push_tail(ir->clone(this->mem_ctx, NULL));
}
return visit_continue;
}
static char*
get_field_name(const char *name)
{
const char *first_dot = strchr(name, '.');
const char *first_square_bracket = strchr(name, '[');
int name_size = 0;
if (!first_square_bracket && !first_dot)
name_size = strlen(name);
else if ((!first_square_bracket ||
(first_dot && first_dot < first_square_bracket)))
name_size = first_dot - name;
else
name_size = first_square_bracket - name;
return strndup(name, name_size);
}
/* Generate a new name given the old xfb declaration string by replacing dots
* with '_', brackets with '@' and appending "-xfb" */
static char *
generate_new_name(void *mem_ctx, const char *name)
{
char *new_name;
unsigned i = 0;
new_name = ralloc_strdup(mem_ctx, name);
while (new_name[i]) {
if (new_name[i] == '.') {
new_name[i] = '_';
} else if (new_name[i] == '[' || new_name[i] == ']') {
new_name[i] = '@';
}
i++;
}
if (!ralloc_strcat(&new_name, "-xfb")) {
ralloc_free(new_name);
return NULL;
}
return new_name;
}
/* Get the dereference for the given variable name. The method is called
* recursively to parse array indices and struct members. */
static bool
get_deref(void *ctx,
const char *name,
struct gl_linked_shader *shader,
ir_dereference **deref,
const glsl_type **type)
{
if (name[0] == '\0') {
/* End */
return (*deref != NULL);
} else if (name[0] == '[') {
/* Array index */
char *endptr = NULL;
unsigned index;
index = strtol(name + 1, &endptr, 10);
assert(*type != NULL && (*type)->is_array() && endptr[0] == ']');
*deref = new(ctx) ir_dereference_array(*deref, new(ctx) ir_constant(index));
*type = (*type)->without_array();
return get_deref(ctx, endptr + 1, shader, deref, type);
} else if (name[0] == '.') {
/* Struct member */
char *field = get_field_name(name + 1);
assert(*type != NULL && (*type)->is_struct() && field != NULL);
*deref = new(ctx) ir_dereference_record(*deref, field);
*type = (*type)->field_type(field);
assert(*type != glsl_type::error_type);
name += 1 + strlen(field);
free(field);
return get_deref(ctx, name, shader, deref, type);
} else {
/* Top level variable */
char *field = get_field_name(name);
ir_variable *toplevel_var;
toplevel_var = shader->symbols->get_variable(field);
name += strlen(field);
free(field);
if (toplevel_var == NULL) {
return false;
}
*deref = new (ctx) ir_dereference_variable(toplevel_var);
*type = toplevel_var->type;
return get_deref(ctx, name, shader, deref, type);
}
}
ir_variable *
lower_xfb_varying(void *mem_ctx,
struct gl_linked_shader *shader,
const char *old_var_name)
{
exec_list new_instructions;
char *new_var_name;
ir_dereference *deref = NULL;
const glsl_type *type = NULL;
if (!get_deref(mem_ctx, old_var_name, shader, &deref, &type)) {
if (deref) {
delete deref;
}
return NULL;
}
new_var_name = generate_new_name(mem_ctx, old_var_name);
ir_variable *new_variable
= new(mem_ctx) ir_variable(type, new_var_name, ir_var_shader_out);
new_variable->data.assigned = true;
new_variable->data.used = true;
shader->ir->push_head(new_variable);
ralloc_free(new_var_name);
ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(new_variable);
ir_assignment *new_assignment = new(mem_ctx) ir_assignment(lhs, deref);
new_instructions.push_tail(new_assignment);
lower_xfb_var_splicer splicer(mem_ctx, &new_instructions);
visit_list_elements(&splicer, shader->ir);
return new_variable;
}

Просмотреть файл

@ -46,6 +46,7 @@ const struct option compiler_opts[] = {
{ "dump-builder", no_argument, &options.dump_builder, 1 },
{ "link", no_argument, &options.do_link, 1 },
{ "just-log", no_argument, &options.just_log, 1 },
{ "lower-precision", no_argument, &options.lower_precision, 1 },
{ "version", required_argument, NULL, 'v' },
{ NULL, 0, NULL, 0 }
};

Просмотреть файл

@ -578,7 +578,8 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
ir_rvalue *y_operand = inner_add->operands[1 - neg_pos];
ir_rvalue *a_operand = mul->operands[1 - inner_add_pos];
if (x_operand->type != y_operand->type ||
if (!x_operand->type->is_float_16_32_64() ||
x_operand->type != y_operand->type ||
x_operand->type != a_operand->type)
continue;
@ -983,6 +984,9 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
ir_constant *one;
switch (ir->type->base_type) {
case GLSL_TYPE_FLOAT16:
one = new(mem_ctx) ir_constant(float16_t::one(), op2_components);
break;
case GLSL_TYPE_FLOAT:
one = new(mem_ctx) ir_constant(1.0f, op2_components);
break;

Просмотреть файл

@ -208,6 +208,9 @@ ir_constant_propagation_visitor::constant_propagation(ir_rvalue **rvalue) {
case GLSL_TYPE_FLOAT:
data.f[i] = found->constant->value.f[rhs_channel];
break;
case GLSL_TYPE_FLOAT16:
data.f16[i] = found->constant->value.f16[rhs_channel];
break;
case GLSL_TYPE_DOUBLE:
data.d[i] = found->constant->value.d[rhs_channel];
break;

Просмотреть файл

@ -49,6 +49,9 @@ struct assignment_entry {
class ir_constant_variable_visitor : public ir_hierarchical_visitor {
public:
using ir_hierarchical_visitor::visit;
using ir_hierarchical_visitor::visit_enter;
virtual ir_visitor_status visit_enter(ir_dereference_variable *);
virtual ir_visitor_status visit(ir_variable *);
virtual ir_visitor_status visit_enter(ir_assignment *);
@ -162,6 +165,15 @@ ir_constant_variable_visitor::visit_enter(ir_call *ir)
entry = get_assignment_entry(var, this->ht);
entry->assignment_count++;
}
/* We don't know if the variable passed to this function has been
* assigned a value or if it is undefined, so for now we always assume
* it has been assigned a value. Once functions have been inlined any
* further potential optimisations will be taken care of.
*/
struct assignment_entry *entry;
entry = get_assignment_entry(param, this->ht);
entry->assignment_count++;
}
/* Mark the return storage as having been assigned to */

Просмотреть файл

@ -66,6 +66,8 @@ public:
class kill_for_derefs_visitor : public ir_hierarchical_visitor {
public:
using ir_hierarchical_visitor::visit;
kill_for_derefs_visitor(exec_list *assignments)
{
this->assignments = assignments;

Просмотреть файл

@ -38,6 +38,7 @@
#include "program/prog_instruction.h"
#include "compiler/glsl_types.h"
#include "main/macros.h"
#include "util/half_float.h"
using namespace ir_builder;
@ -125,6 +126,17 @@ compare_components(ir_constant *a, ir_constant *b)
else
foundequal = true;
break;
case GLSL_TYPE_FLOAT16: {
float af = _mesa_half_to_float(a->value.f16[c0]);
float bf = _mesa_half_to_float(b->value.f16[c1]);
if (af < bf)
foundless = true;
else if (af > bf)
foundgreater = true;
else
foundequal = true;
break;
}
case GLSL_TYPE_FLOAT:
if (a->value.f[c0] < b->value.f[c1])
foundless = true;
@ -181,6 +193,13 @@ combine_constant(bool ismin, ir_constant *a, ir_constant *b)
(!ismin && b->value.i[i] > c->value.i[i]))
c->value.i[i] = b->value.i[i];
break;
case GLSL_TYPE_FLOAT16: {
float bf = _mesa_half_to_float(b->value.f16[i]);
float cf = _mesa_half_to_float(c->value.f16[i]);
if ((ismin && bf < cf) || (!ismin && bf > cf))
c->value.f16[i] = b->value.f16[i];
break;
}
case GLSL_TYPE_FLOAT:
if ((ismin && b->value.f[i] < c->value.f[i]) ||
(!ismin && b->value.f[i] > c->value.f[i]))

Просмотреть файл

@ -1222,8 +1222,7 @@ create_linked_shader_and_program(struct gl_context *ctx,
struct gl_linked_shader *linked = rzalloc(NULL, struct gl_linked_shader);
linked->Stage = stage;
glprog = ctx->Driver.NewProgram(ctx, _mesa_shader_stage_to_program(stage),
prog->Name, false);
glprog = ctx->Driver.NewProgram(ctx, stage, prog->Name, false);
glprog->info.stage = stage;
linked->Program = glprog;
@ -1256,6 +1255,7 @@ serialize_glsl_program(struct blob *blob, struct gl_context *ctx,
write_hash_tables(blob, prog);
blob_write_uint32(blob, prog->data->Version);
blob_write_uint32(blob, prog->IsES);
blob_write_uint32(blob, prog->data->linked_stages);
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
@ -1314,6 +1314,7 @@ deserialize_glsl_program(struct blob_reader *blob, struct gl_context *ctx,
read_hash_tables(blob, prog);
prog->data->Version = blob_read_uint32(blob);
prog->IsES = blob_read_uint32(blob);
prog->data->linked_stages = blob_read_uint32(blob);
unsigned mask = prog->data->linked_stages;

Просмотреть файл

@ -99,33 +99,21 @@ private:
};
static void
init_gl_program(struct gl_program *prog, bool is_arb_asm, GLenum target)
init_gl_program(struct gl_program *prog, bool is_arb_asm, gl_shader_stage stage)
{
prog->RefCount = 1;
prog->Format = GL_PROGRAM_FORMAT_ASCII_ARB;
prog->is_arb_asm = is_arb_asm;
prog->info.stage = (gl_shader_stage)_mesa_program_enum_to_shader_stage(target);
prog->info.stage = stage;
}
static struct gl_program *
new_program(UNUSED struct gl_context *ctx, GLenum target,
new_program(UNUSED struct gl_context *ctx, gl_shader_stage stage,
UNUSED GLuint id, bool is_arb_asm)
{
switch (target) {
case GL_VERTEX_PROGRAM_ARB: /* == GL_VERTEX_PROGRAM_NV */
case GL_GEOMETRY_PROGRAM_NV:
case GL_TESS_CONTROL_PROGRAM_NV:
case GL_TESS_EVALUATION_PROGRAM_NV:
case GL_FRAGMENT_PROGRAM_ARB:
case GL_COMPUTE_PROGRAM_NV: {
struct gl_program *prog = rzalloc(NULL, struct gl_program);
init_gl_program(prog, is_arb_asm, target);
return prog;
}
default:
printf("bad target in new_program\n");
return NULL;
}
struct gl_program *prog = rzalloc(NULL, struct gl_program);
init_gl_program(prog, is_arb_asm, stage);
return prog;
}
static const struct standalone_options *options;
@ -446,6 +434,14 @@ standalone_compile_shader(const struct standalone_options *_options,
initialize_context(ctx, options->glsl_version > 130 ? API_OPENGL_CORE : API_OPENGL_COMPAT);
}
if (options->lower_precision) {
for (unsigned i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
struct gl_shader_compiler_options *options =
&ctx->Const.ShaderCompilerOptions[i];
options->LowerPrecision = true;
}
}
struct gl_shader_program *whole_program;
whole_program = rzalloc (NULL, struct gl_shader_program);

Просмотреть файл

@ -36,6 +36,7 @@ struct standalone_options {
int dump_builder;
int do_link;
int just_log;
int lower_precision;
};
struct gl_shader_program;

Просмотреть файл

@ -203,6 +203,7 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)
ctx->Extensions.ARB_shader_bit_encoding = true;
ctx->Extensions.ARB_shader_draw_parameters = true;
ctx->Extensions.ARB_shader_stencil_export = true;
ctx->Extensions.ARB_shader_storage_buffer_object = true;
ctx->Extensions.ARB_shader_texture_lod = true;
ctx->Extensions.ARB_shading_language_420pack = true;
ctx->Extensions.ARB_shading_language_packing = true;
@ -265,6 +266,16 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxInputComponents = 0; /* not used */
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxOutputComponents = 0; /* not used */
ctx->Const.MaxVertexStreams = 4;
ctx->Const.MaxTransformFeedbackBuffers = 4;
ctx->Const.MaxShaderStorageBufferBindings = 4;
ctx->Const.MaxShaderStorageBlockSize = 4096;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks = 8;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks = 8;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformBlocks = 12;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformBlocks = 12;
/* Set up default shader compiler options. */
struct gl_shader_compiler_options options;
memset(&options, 0, sizeof(options));

Просмотреть файл

@ -462,6 +462,16 @@ const glsl_type *glsl_type::get_bare_type() const
unreachable("Invalid base type");
}
const glsl_type *glsl_type::get_float16_type() const
{
assert(this->base_type == GLSL_TYPE_FLOAT);
return get_instance(GLSL_TYPE_FLOAT16,
this->vector_elements,
this->matrix_columns,
this->explicit_stride,
this->interface_row_major);
}
static void
hash_free_type_function(struct hash_entry *entry)
@ -663,9 +673,11 @@ glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns,
assert(((glsl_type *) entry->data)->matrix_columns == columns);
assert(((glsl_type *) entry->data)->explicit_stride == explicit_stride);
const glsl_type *t = (const glsl_type *) entry->data;
mtx_unlock(&glsl_type::hash_mutex);
return (const glsl_type *) entry->data;
return t;
}
assert(!row_major);
@ -1024,9 +1036,11 @@ glsl_type::get_array_instance(const glsl_type *base,
assert(((glsl_type *) entry->data)->length == array_size);
assert(((glsl_type *) entry->data)->fields.array == base);
glsl_type *t = (glsl_type *) entry->data;
mtx_unlock(&glsl_type::hash_mutex);
return (glsl_type *) entry->data;
return t;
}
bool
@ -1225,9 +1239,11 @@ glsl_type::get_struct_instance(const glsl_struct_field *fields,
assert(strcmp(((glsl_type *) entry->data)->name, name) == 0);
assert(((glsl_type *) entry->data)->packed == packed);
glsl_type *t = (glsl_type *) entry->data;
mtx_unlock(&glsl_type::hash_mutex);
return (glsl_type *) entry->data;
return t;
}
@ -1261,9 +1277,11 @@ glsl_type::get_interface_instance(const glsl_struct_field *fields,
assert(((glsl_type *) entry->data)->length == num_fields);
assert(strcmp(((glsl_type *) entry->data)->name, block_name) == 0);
glsl_type *t = (glsl_type *) entry->data;
mtx_unlock(&glsl_type::hash_mutex);
return (glsl_type *) entry->data;
return t;
}
const glsl_type *
@ -1290,9 +1308,11 @@ glsl_type::get_subroutine_instance(const char *subroutine_name)
assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_SUBROUTINE);
assert(strcmp(((glsl_type *) entry->data)->name, subroutine_name) == 0);
glsl_type *t = (glsl_type *) entry->data;
mtx_unlock(&glsl_type::hash_mutex);
return (glsl_type *) entry->data;
return t;
}
@ -2572,29 +2592,8 @@ glsl_type::count_dword_slots(bool is_bindless) const
int
glsl_type::coordinate_components() const
{
int size;
switch (sampler_dimensionality) {
case GLSL_SAMPLER_DIM_1D:
case GLSL_SAMPLER_DIM_BUF:
size = 1;
break;
case GLSL_SAMPLER_DIM_2D:
case GLSL_SAMPLER_DIM_RECT:
case GLSL_SAMPLER_DIM_MS:
case GLSL_SAMPLER_DIM_EXTERNAL:
case GLSL_SAMPLER_DIM_SUBPASS:
size = 2;
break;
case GLSL_SAMPLER_DIM_3D:
case GLSL_SAMPLER_DIM_CUBE:
size = 3;
break;
default:
assert(!"Should not get here.");
size = 1;
break;
}
enum glsl_sampler_dim dim = (enum glsl_sampler_dim)sampler_dimensionality;
int size = glsl_get_sampler_dim_coordinate_components(dim);
/* Array textures need an additional component for the array index, except
* for cubemap array images that behave like a 2D array of interleaved
@ -2927,3 +2926,29 @@ glsl_type::cl_size() const
}
return 1;
}
extern "C" {
int
glsl_get_sampler_dim_coordinate_components(enum glsl_sampler_dim dim)
{
switch (dim) {
case GLSL_SAMPLER_DIM_1D:
case GLSL_SAMPLER_DIM_BUF:
return 1;
case GLSL_SAMPLER_DIM_2D:
case GLSL_SAMPLER_DIM_RECT:
case GLSL_SAMPLER_DIM_MS:
case GLSL_SAMPLER_DIM_EXTERNAL:
case GLSL_SAMPLER_DIM_SUBPASS:
case GLSL_SAMPLER_DIM_SUBPASS_MS:
return 2;
case GLSL_SAMPLER_DIM_3D:
case GLSL_SAMPLER_DIM_CUBE:
return 3;
default:
unreachable("Unknown sampler dim");
}
}
}

Просмотреть файл

@ -31,6 +31,7 @@
#include "shader_enums.h"
#include "c11/threads.h"
#include "util/blob.h"
#include "util/format/u_format.h"
#include "util/macros.h"
#ifdef __cplusplus
@ -59,10 +60,6 @@ void encode_type_to_blob(struct blob *blob, const struct glsl_type *type);
const struct glsl_type *decode_type_from_blob(struct blob_reader *blob);
#ifdef __cplusplus
}
#endif
typedef void (*glsl_type_size_align_func)(const struct glsl_type *type,
unsigned *size, unsigned *align);
@ -230,6 +227,9 @@ enum glsl_sampler_dim {
GLSL_SAMPLER_DIM_SUBPASS_MS, /* for multisampled vulkan input attachments */
};
int
glsl_get_sampler_dim_coordinate_components(enum glsl_sampler_dim dim);
enum glsl_matrix_layout {
/**
* The layout of the matrix is inherited from the object containing the
@ -259,6 +259,8 @@ enum {
};
#ifdef __cplusplus
} /* extern "C" */
#include "GL/gl.h"
#include "util/ralloc.h"
#include "main/menums.h" /* for gl_texture_index, C++'s enum rules are broken */
@ -393,6 +395,11 @@ public:
*/
const glsl_type *get_bare_type() const;
/**
* Gets the float16 version of this type.
*/
const glsl_type *get_float16_type() const;
/**
* Get the instance of a built-in scalar, vector, or matrix type
*/
@ -754,6 +761,22 @@ public:
return base_type == GLSL_TYPE_FLOAT;
}
/**
* Query whether or not a type is a half-float or float type
*/
bool is_float_16_32() const
{
return base_type == GLSL_TYPE_FLOAT16 || is_float();
}
/**
* Query whether or not a type is a half-float, float or double
*/
bool is_float_16_32_64() const
{
return base_type == GLSL_TYPE_FLOAT16 || is_float() || is_double();
}
/**
* Query whether or not a type is a double type
*/
@ -1297,7 +1320,7 @@ struct glsl_struct_field {
/**
* Layout format, applicable to image variables only.
*/
unsigned image_format:16;
enum pipe_format image_format;
/**
* Any of the xfb_* qualifiers trigger the shader to be in transform
@ -1314,7 +1337,8 @@ struct glsl_struct_field {
sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0), \
precision(_precision), memory_read_only(0), \
memory_write_only(0), memory_coherent(0), memory_volatile(0), \
memory_restrict(0), image_format(0), explicit_xfb_buffer(0), \
memory_restrict(0), image_format(PIPE_FORMAT_NONE), \
explicit_xfb_buffer(0), \
implicit_sized_array(0)
glsl_struct_field(const struct glsl_type *_type,

Просмотреть файл

@ -165,6 +165,7 @@ gl_varying_slot_name(gl_varying_slot slot)
ENUM(VARYING_SLOT_BOUNDING_BOX0),
ENUM(VARYING_SLOT_BOUNDING_BOX1),
ENUM(VARYING_SLOT_VIEW_INDEX),
ENUM(VARYING_SLOT_VIEWPORT_MASK),
ENUM(VARYING_SLOT_VAR0),
ENUM(VARYING_SLOT_VAR1),
ENUM(VARYING_SLOT_VAR2),

Просмотреть файл

@ -261,6 +261,7 @@ typedef enum
VARYING_SLOT_BOUNDING_BOX0, /* Only appears as TCS output. */
VARYING_SLOT_BOUNDING_BOX1, /* Only appears as TCS output. */
VARYING_SLOT_VIEW_INDEX,
VARYING_SLOT_VIEWPORT_MASK, /* Does not appear in FS */
VARYING_SLOT_VAR0, /* First generic varying slot */
/* the remaining are simply for the benefit of gl_varying_slot_name()
* and not to be construed as an upper bound:
@ -343,6 +344,7 @@ const char *gl_varying_slot_name(gl_varying_slot slot);
#define VARYING_BIT_TESS_LEVEL_INNER BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_INNER)
#define VARYING_BIT_BOUNDING_BOX0 BITFIELD64_BIT(VARYING_SLOT_BOUNDING_BOX0)
#define VARYING_BIT_BOUNDING_BOX1 BITFIELD64_BIT(VARYING_SLOT_BOUNDING_BOX1)
#define VARYING_BIT_VIEWPORT_MASK BITFIELD64_BIT(VARYING_SLOT_VIEWPORT_MASK)
#define VARYING_BIT_VAR(V) BITFIELD64_BIT(VARYING_SLOT_VAR0 + (V))
/*@}*/

Просмотреть файл

@ -134,12 +134,24 @@ typedef struct shader_info {
/* Which patch outputs are read */
uint32_t patch_outputs_read;
/* Which inputs are read indirectly (subset of inputs_read) */
uint64_t inputs_read_indirectly;
/* Which outputs are read or written indirectly */
uint64_t outputs_accessed_indirectly;
/* Which patch inputs are read indirectly (subset of patch_inputs_read) */
uint64_t patch_inputs_read_indirectly;
/* Which patch outputs are read or written indirectly */
uint64_t patch_outputs_accessed_indirectly;
/** Bitfield of which textures are used */
uint32_t textures_used;
/** Bitfield of which textures are used by texelFetch() */
uint32_t textures_used_by_txf;
/** Bitfield of which images are used */
uint32_t images_used;
/* SPV_KHR_float_controls: execution mode for floating point ops */
uint16_t float_controls_execution_mode;
@ -176,6 +188,12 @@ typedef struct shader_info {
/* Whether flrp has been lowered. */
bool flrp_lowered:1;
/* Whether the shader writes memory, including transform feedback. */
bool writes_memory:1;
/* Whether gl_Layer is viewport-relative */
bool layer_viewport_relative:1;
union {
struct {
/* Which inputs are doubles */
@ -217,6 +235,7 @@ typedef struct shader_info {
struct {
bool uses_discard:1;
bool uses_demote:1;
/**
* True if this fragment shader requires helper invocations. This
@ -282,6 +301,7 @@ typedef struct shader_info {
struct {
uint16_t local_size[3];
uint16_t max_variable_local_size;
bool local_size_variable:1;
uint8_t user_data_components_amd:3;
@ -317,6 +337,16 @@ typedef struct shader_info {
/** Is the vertex order counterclockwise? */
bool ccw:1;
bool point_mode:1;
/* Bit mask of TCS per-vertex inputs (VS outputs) that are used
* with a vertex index that is NOT the invocation id
*/
uint64_t tcs_cross_invocation_inputs_read;
/* Bit mask of TCS per-vertex outputs that are used
* with a vertex index that is NOT the invocation id
*/
uint64_t tcs_cross_invocation_outputs_read;
} tess;
};
} shader_info;

Просмотреть файл

@ -30,6 +30,7 @@
#include "pipe/p_compiler.h"
#include "util/u_math.h"
#include "util/half_float.h"
#ifdef __cplusplus
extern "C" {
@ -45,6 +46,12 @@ extern "C" {
static inline uint16_t
util_float_to_half(float f)
{
return _mesa_float_to_half(f);
}
static inline uint16_t
util_float_to_half_rtz(float f)
{
uint32_t sign_mask = 0x80000000;
uint32_t round_mask = ~0xfff;

Просмотреть файл

@ -128,7 +128,7 @@ typedef unsigned char boolean;
/* See http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Variable-Attributes.html */
#define PIPE_ALIGN_VAR(_alignment) __attribute__((aligned(_alignment)))
#if defined(__GNUC__) && !defined(PIPE_ARCH_X86_64)
#if defined(__GNUC__) && defined(PIPE_ARCH_X86)
#define PIPE_ALIGN_STACK __attribute__((force_align_arg_pointer))
#else
#define PIPE_ALIGN_STACK

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -560,6 +560,26 @@ enum pipe_video_chroma_format
PIPE_VIDEO_CHROMA_FORMAT_NONE
};
static inline enum pipe_video_chroma_format
pipe_format_to_chroma_format(enum pipe_format format)
{
switch (format) {
case PIPE_FORMAT_NV12:
case PIPE_FORMAT_NV21:
case PIPE_FORMAT_YV12:
case PIPE_FORMAT_IYUV:
case PIPE_FORMAT_P010:
case PIPE_FORMAT_P016:
return PIPE_VIDEO_CHROMA_FORMAT_420;
case PIPE_FORMAT_UYVY:
case PIPE_FORMAT_YUYV:
case PIPE_FORMAT_YV16:
return PIPE_VIDEO_CHROMA_FORMAT_422;
default:
return PIPE_VIDEO_CHROMA_FORMAT_NONE;
}
}
#ifdef __cplusplus
}
#endif

Просмотреть файл

@ -0,0 +1,980 @@
/**************************************************************************
*
* Copyright 2007 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/**
* @file
*
* Abstract graphics pipe state objects.
*
* Basic notes:
* 1. Want compact representations, so we use bitfields.
* 2. Put bitfields before other (GLfloat) fields.
* 3. enum bitfields need to be at least one bit extra in size so the most
* significant bit is zero. MSVC treats enums as signed so if the high
* bit is set, the value will be interpreted as a negative number.
* That causes trouble in various places.
*/
#ifndef PIPE_STATE_H
#define PIPE_STATE_H
#include "p_compiler.h"
#include "p_defines.h"
#include "p_format.h"
#ifdef __cplusplus
extern "C" {
#endif
/**
* Implementation limits
*/
#define PIPE_MAX_ATTRIBS 32
#define PIPE_MAX_CLIP_PLANES 8
#define PIPE_MAX_COLOR_BUFS 8
#define PIPE_MAX_CONSTANT_BUFFERS 32
#define PIPE_MAX_SAMPLERS 32
#define PIPE_MAX_SHADER_INPUTS 80 /* 32 GENERIC + 32 PATCH + 16 others */
#define PIPE_MAX_SHADER_OUTPUTS 80 /* 32 GENERIC + 32 PATCH + 16 others */
#define PIPE_MAX_SHADER_SAMPLER_VIEWS 128
#define PIPE_MAX_SHADER_BUFFERS 32
#define PIPE_MAX_SHADER_IMAGES 32
#define PIPE_MAX_TEXTURE_LEVELS 16
#define PIPE_MAX_SO_BUFFERS 4
#define PIPE_MAX_SO_OUTPUTS 64
#define PIPE_MAX_VIEWPORTS 16
#define PIPE_MAX_CLIP_OR_CULL_DISTANCE_COUNT 8
#define PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT 2
#define PIPE_MAX_WINDOW_RECTANGLES 8
#define PIPE_MAX_SAMPLE_LOCATION_GRID_SIZE 4
#define PIPE_MAX_HW_ATOMIC_BUFFERS 32
#define PIPE_MAX_VERTEX_STREAMS 4
struct pipe_reference
{
int32_t count; /* atomic */
};
/**
* Primitive (point/line/tri) rasterization info
*/
struct pipe_rasterizer_state
{
unsigned flatshade:1;
unsigned light_twoside:1;
unsigned clamp_vertex_color:1;
unsigned clamp_fragment_color:1;
unsigned front_ccw:1;
unsigned cull_face:2; /**< PIPE_FACE_x */
unsigned fill_front:2; /**< PIPE_POLYGON_MODE_x */
unsigned fill_back:2; /**< PIPE_POLYGON_MODE_x */
unsigned offset_point:1;
unsigned offset_line:1;
unsigned offset_tri:1;
unsigned scissor:1;
unsigned poly_smooth:1;
unsigned poly_stipple_enable:1;
unsigned point_smooth:1;
unsigned sprite_coord_mode:1; /**< PIPE_SPRITE_COORD_ */
unsigned point_quad_rasterization:1; /** points rasterized as quads or points */
unsigned point_tri_clip:1; /** large points clipped as tris or points */
unsigned point_size_per_vertex:1; /**< size computed in vertex shader */
unsigned multisample:1; /* XXX maybe more ms state in future */
unsigned force_persample_interp:1;
unsigned line_smooth:1;
unsigned line_stipple_enable:1;
unsigned line_last_pixel:1;
unsigned conservative_raster_mode:2; /**< PIPE_CONSERVATIVE_RASTER_x */
/**
* Use the first vertex of a primitive as the provoking vertex for
* flat shading.
*/
unsigned flatshade_first:1;
unsigned half_pixel_center:1;
unsigned bottom_edge_rule:1;
/*
* Conservative rasterization subpixel precision bias in bits
*/
unsigned subpixel_precision_x:4;
unsigned subpixel_precision_y:4;
/**
* When true, rasterization is disabled and no pixels are written.
* This only makes sense with the Stream Out functionality.
*/
unsigned rasterizer_discard:1;
/**
* Exposed by PIPE_CAP_TILE_RASTER_ORDER. When true,
* tile_raster_order_increasing_* indicate the order that the rasterizer
* should render tiles, to meet the requirements of
* GL_MESA_tile_raster_order.
*/
unsigned tile_raster_order_fixed:1;
unsigned tile_raster_order_increasing_x:1;
unsigned tile_raster_order_increasing_y:1;
/**
* When false, depth clipping is disabled and the depth value will be
* clamped later at the per-pixel level before depth testing.
* This depends on PIPE_CAP_DEPTH_CLIP_DISABLE.
*
* If PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE is unsupported, depth_clip_near
* is equal to depth_clip_far.
*/
unsigned depth_clip_near:1;
unsigned depth_clip_far:1;
/**
* When true clip space in the z axis goes from [0..1] (D3D). When false
* [-1, 1] (GL).
*
* NOTE: D3D will always use depth clamping.
*/
unsigned clip_halfz:1;
/**
* When true do not scale offset_units and use same rules for unorm and
* float depth buffers (D3D9). When false use GL/D3D1X behaviour.
* This depends on PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED.
*/
unsigned offset_units_unscaled:1;
/**
* Enable bits for clipping half-spaces.
* This applies to both user clip planes and shader clip distances.
* Note that if the bound shader exports any clip distances, these
* replace all user clip planes, and clip half-spaces enabled here
* but not written by the shader count as disabled.
*/
unsigned clip_plane_enable:PIPE_MAX_CLIP_PLANES;
unsigned line_stipple_factor:8; /**< [1..256] actually */
unsigned line_stipple_pattern:16;
/**
* Replace the given TEXCOORD inputs with point coordinates, max. 8 inputs.
* If TEXCOORD (including PCOORD) are unsupported, replace GENERIC inputs
* instead. Max. 9 inputs: 8x GENERIC to emulate TEXCOORD, and 1x GENERIC
* to emulate PCOORD.
*/
uint16_t sprite_coord_enable; /* 0-7: TEXCOORD/GENERIC, 8: PCOORD */
float line_width;
float point_size; /**< used when no per-vertex size */
float offset_units;
float offset_scale;
float offset_clamp;
float conservative_raster_dilate;
};
struct pipe_poly_stipple
{
unsigned stipple[32];
};
struct pipe_viewport_state
{
float scale[3];
float translate[3];
enum pipe_viewport_swizzle swizzle_x:3;
enum pipe_viewport_swizzle swizzle_y:3;
enum pipe_viewport_swizzle swizzle_z:3;
enum pipe_viewport_swizzle swizzle_w:3;
};
struct pipe_scissor_state
{
unsigned minx:16;
unsigned miny:16;
unsigned maxx:16;
unsigned maxy:16;
};
struct pipe_clip_state
{
float ucp[PIPE_MAX_CLIP_PLANES][4];
};
/**
* A single output for vertex transform feedback.
*/
struct pipe_stream_output
{
unsigned register_index:6; /**< 0 to 63 (OUT index) */
unsigned start_component:2; /** 0 to 3 */
unsigned num_components:3; /** 1 to 4 */
unsigned output_buffer:3; /**< 0 to PIPE_MAX_SO_BUFFERS */
unsigned dst_offset:16; /**< offset into the buffer in dwords */
unsigned stream:2; /**< 0 to 3 */
};
/**
* Stream output for vertex transform feedback.
*/
struct pipe_stream_output_info
{
unsigned num_outputs;
/** stride for an entire vertex for each buffer in dwords */
uint16_t stride[PIPE_MAX_SO_BUFFERS];
/**
* Array of stream outputs, in the order they are to be written in.
* Selected components are tightly packed into the output buffer.
*/
struct pipe_stream_output output[PIPE_MAX_SO_OUTPUTS];
};
/**
* The 'type' parameter identifies whether the shader state contains TGSI
* tokens, etc. If the driver returns 'PIPE_SHADER_IR_TGSI' for the
* 'PIPE_SHADER_CAP_PREFERRED_IR' shader param, the ir will *always* be
* 'PIPE_SHADER_IR_TGSI' and the tokens ptr will be valid. If the driver
* requests a different 'pipe_shader_ir' type, then it must check the 'type'
* enum to see if it is getting TGSI tokens or its preferred IR.
*
* TODO pipe_compute_state should probably get similar treatment to handle
* multiple IR's in a cleaner way..
*
* NOTE: since it is expected that the consumer will want to perform
* additional passes on the nir_shader, the driver takes ownership of
* the nir_shader. If state trackers need to hang on to the IR (for
* example, variant management), it should use nir_shader_clone().
*/
struct pipe_shader_state
{
enum pipe_shader_ir type;
/* TODO move tokens into union. */
const struct tgsi_token *tokens;
union {
void *native;
void *nir;
} ir;
struct pipe_stream_output_info stream_output;
};
static inline void
pipe_shader_state_from_tgsi(struct pipe_shader_state *state,
const struct tgsi_token *tokens)
{
state->type = PIPE_SHADER_IR_TGSI;
state->tokens = tokens;
memset(&state->stream_output, 0, sizeof(state->stream_output));
}
struct pipe_depth_state
{
unsigned enabled:1; /**< depth test enabled? */
unsigned writemask:1; /**< allow depth buffer writes? */
unsigned func:3; /**< depth test func (PIPE_FUNC_x) */
unsigned bounds_test:1; /**< depth bounds test enabled? */
float bounds_min; /**< minimum depth bound */
float bounds_max; /**< maximum depth bound */
};
struct pipe_stencil_state
{
unsigned enabled:1; /**< stencil[0]: stencil enabled, stencil[1]: two-side enabled */
unsigned func:3; /**< PIPE_FUNC_x */
unsigned fail_op:3; /**< PIPE_STENCIL_OP_x */
unsigned zpass_op:3; /**< PIPE_STENCIL_OP_x */
unsigned zfail_op:3; /**< PIPE_STENCIL_OP_x */
unsigned valuemask:8;
unsigned writemask:8;
};
struct pipe_alpha_state
{
unsigned enabled:1;
unsigned func:3; /**< PIPE_FUNC_x */
float ref_value; /**< reference value */
};
struct pipe_depth_stencil_alpha_state
{
struct pipe_depth_state depth;
struct pipe_stencil_state stencil[2]; /**< [0] = front, [1] = back */
struct pipe_alpha_state alpha;
};
struct pipe_rt_blend_state
{
unsigned blend_enable:1;
unsigned rgb_func:3; /**< PIPE_BLEND_x */
unsigned rgb_src_factor:5; /**< PIPE_BLENDFACTOR_x */
unsigned rgb_dst_factor:5; /**< PIPE_BLENDFACTOR_x */
unsigned alpha_func:3; /**< PIPE_BLEND_x */
unsigned alpha_src_factor:5; /**< PIPE_BLENDFACTOR_x */
unsigned alpha_dst_factor:5; /**< PIPE_BLENDFACTOR_x */
unsigned colormask:4; /**< bitmask of PIPE_MASK_R/G/B/A */
};
struct pipe_blend_state
{
unsigned independent_blend_enable:1;
unsigned logicop_enable:1;
unsigned logicop_func:4; /**< PIPE_LOGICOP_x */
unsigned dither:1;
unsigned alpha_to_coverage:1;
unsigned alpha_to_coverage_dither:1;
unsigned alpha_to_one:1;
unsigned max_rt:3; /* index of max rt, Ie. # of cbufs minus 1 */
struct pipe_rt_blend_state rt[PIPE_MAX_COLOR_BUFS];
};
struct pipe_blend_color
{
float color[4];
};
struct pipe_stencil_ref
{
ubyte ref_value[2];
};
/**
* Note that pipe_surfaces are "texture views for rendering"
* and so in the case of ARB_framebuffer_no_attachment there
* is no pipe_surface state available such that we may
* extract the number of samples and layers.
*/
struct pipe_framebuffer_state
{
uint16_t width, height;
uint16_t layers; /**< Number of layers in a no-attachment framebuffer */
ubyte samples; /**< Number of samples in a no-attachment framebuffer */
/** multiple color buffers for multiple render targets */
ubyte nr_cbufs;
struct pipe_surface *cbufs[PIPE_MAX_COLOR_BUFS];
struct pipe_surface *zsbuf; /**< Z/stencil buffer */
};
/**
* Texture sampler state.
*/
struct pipe_sampler_state
{
unsigned wrap_s:3; /**< PIPE_TEX_WRAP_x */
unsigned wrap_t:3; /**< PIPE_TEX_WRAP_x */
unsigned wrap_r:3; /**< PIPE_TEX_WRAP_x */
unsigned min_img_filter:1; /**< PIPE_TEX_FILTER_x */
unsigned min_mip_filter:2; /**< PIPE_TEX_MIPFILTER_x */
unsigned mag_img_filter:1; /**< PIPE_TEX_FILTER_x */
unsigned compare_mode:1; /**< PIPE_TEX_COMPARE_x */
unsigned compare_func:3; /**< PIPE_FUNC_x */
unsigned normalized_coords:1; /**< Are coords normalized to [0,1]? */
unsigned max_anisotropy:5;
unsigned seamless_cube_map:1;
float lod_bias; /**< LOD/lambda bias */
float min_lod, max_lod; /**< LOD clamp range, after bias */
union pipe_color_union border_color;
};
union pipe_surface_desc {
struct {
unsigned level;
unsigned first_layer:16;
unsigned last_layer:16;
} tex;
struct {
unsigned first_element;
unsigned last_element;
} buf;
};
/**
* A view into a texture that can be bound to a color render target /
* depth stencil attachment point.
*/
struct pipe_surface
{
struct pipe_reference reference;
enum pipe_format format:16;
unsigned writable:1; /**< writable shader resource */
struct pipe_resource *texture; /**< resource into which this is a view */
struct pipe_context *context; /**< context this surface belongs to */
/* XXX width/height should be removed */
uint16_t width; /**< logical width in pixels */
uint16_t height; /**< logical height in pixels */
/**
* Number of samples for the surface. This will be 0 if rendering
* should use the resource's nr_samples, or another value if the resource
* is bound using FramebufferTexture2DMultisampleEXT.
*/
unsigned nr_samples:8;
union pipe_surface_desc u;
};
/**
* A view into a texture that can be bound to a shader stage.
*/
struct pipe_sampler_view
{
struct pipe_reference reference;
enum pipe_format format:15; /**< typed PIPE_FORMAT_x */
enum pipe_texture_target target:5; /**< PIPE_TEXTURE_x */
unsigned swizzle_r:3; /**< PIPE_SWIZZLE_x for red component */
unsigned swizzle_g:3; /**< PIPE_SWIZZLE_x for green component */
unsigned swizzle_b:3; /**< PIPE_SWIZZLE_x for blue component */
unsigned swizzle_a:3; /**< PIPE_SWIZZLE_x for alpha component */
struct pipe_resource *texture; /**< texture into which this is a view */
struct pipe_context *context; /**< context this view belongs to */
union {
struct {
unsigned first_layer:16; /**< first layer to use for array textures */
unsigned last_layer:16; /**< last layer to use for array textures */
unsigned first_level:8; /**< first mipmap level to use */
unsigned last_level:8; /**< last mipmap level to use */
} tex;
struct {
unsigned offset; /**< offset in bytes */
unsigned size; /**< size of the readable sub-range in bytes */
} buf;
} u;
};
/**
* A description of a buffer or texture image that can be bound to a shader
* stage.
*/
struct pipe_image_view
{
struct pipe_resource *resource; /**< resource into which this is a view */
enum pipe_format format; /**< typed PIPE_FORMAT_x */
uint16_t access; /**< PIPE_IMAGE_ACCESS_x */
uint16_t shader_access; /**< PIPE_IMAGE_ACCESS_x */
union {
struct {
unsigned first_layer:16; /**< first layer to use for array textures */
unsigned last_layer:16; /**< last layer to use for array textures */
unsigned level:8; /**< mipmap level to use */
} tex;
struct {
unsigned offset; /**< offset in bytes */
unsigned size; /**< size of the accessible sub-range in bytes */
} buf;
} u;
};
/**
* Subregion of 1D/2D/3D image resource.
*/
struct pipe_box
{
/* Fields only used by textures use int16_t instead of int.
* x and width are used by buffers, so they need the full 32-bit range.
*/
int x;
int16_t y;
int16_t z;
int width;
int16_t height;
int16_t depth;
};
/**
* A memory object/resource such as a vertex buffer or texture.
*/
struct pipe_resource
{
struct pipe_reference reference;
unsigned width0; /**< Used by both buffers and textures. */
uint16_t height0; /* Textures: The maximum height/depth/array_size is 16k. */
uint16_t depth0;
uint16_t array_size;
enum pipe_format format:16; /**< PIPE_FORMAT_x */
enum pipe_texture_target target:8; /**< PIPE_TEXTURE_x */
unsigned last_level:8; /**< Index of last mipmap level present/defined */
/** Number of samples determining quality, driving rasterizer, shading,
* and framebuffer.
*/
unsigned nr_samples:8;
/** Multiple samples within a pixel can have the same value.
* nr_storage_samples determines how many slots for different values
* there are per pixel. Only color buffers can set this lower than
* nr_samples.
*/
unsigned nr_storage_samples:8;
unsigned usage:8; /**< PIPE_USAGE_x (not a bitmask) */
unsigned bind; /**< bitmask of PIPE_BIND_x */
unsigned flags; /**< bitmask of PIPE_RESOURCE_FLAG_x */
/**
* For planar images, ie. YUV EGLImage external, etc, pointer to the
* next plane.
*/
struct pipe_resource *next;
/* The screen pointer should be last for optimal structure packing. */
struct pipe_screen *screen; /**< screen that this texture belongs to */
};
/**
* Transfer object. For data transfer to/from a resource.
*/
struct pipe_transfer
{
struct pipe_resource *resource; /**< resource to transfer to/from */
unsigned level; /**< texture mipmap level */
enum pipe_transfer_usage usage;
struct pipe_box box; /**< region of the resource to access */
unsigned stride; /**< row stride in bytes */
unsigned layer_stride; /**< image/layer stride in bytes */
};
/**
* A vertex buffer. Typically, all the vertex data/attributes for
* drawing something will be in one buffer. But it's also possible, for
* example, to put colors in one buffer and texcoords in another.
*/
struct pipe_vertex_buffer
{
uint16_t stride; /**< stride to same attrib in next vertex, in bytes */
bool is_user_buffer;
unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */
union {
struct pipe_resource *resource; /**< the actual buffer */
const void *user; /**< pointer to a user buffer */
} buffer;
};
/**
* A constant buffer. A subrange of an existing buffer can be set
* as a constant buffer.
*/
struct pipe_constant_buffer
{
struct pipe_resource *buffer; /**< the actual buffer */
unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */
unsigned buffer_size; /**< how much data can be read in shader */
const void *user_buffer; /**< pointer to a user buffer if buffer == NULL */
};
/**
* An untyped shader buffer supporting loads, stores, and atomics.
*/
struct pipe_shader_buffer {
struct pipe_resource *buffer; /**< the actual buffer */
unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */
unsigned buffer_size; /**< how much data can be read in shader */
};
/**
* A stream output target. The structure specifies the range vertices can
* be written to.
*
* In addition to that, the structure should internally maintain the offset
* into the buffer, which should be incremented everytime something is written
* (appended) to it. The internal offset is buffer_offset + how many bytes
* have been written. The internal offset can be stored on the device
* and the CPU actually doesn't have to query it.
*
* Note that the buffer_size variable is actually specifying the available
* space in the buffer, not the size of the attached buffer.
* In other words in majority of cases buffer_size would simply be
* 'buffer->width0 - buffer_offset', so buffer_size refers to the size
* of the buffer left, after accounting for buffer offset, for stream output
* to write to.
*
* Use PIPE_QUERY_SO_STATISTICS to know how many primitives have
* actually been written.
*/
struct pipe_stream_output_target
{
struct pipe_reference reference;
struct pipe_resource *buffer; /**< the output buffer */
struct pipe_context *context; /**< context this SO target belongs to */
unsigned buffer_offset; /**< offset where data should be written, in bytes */
unsigned buffer_size; /**< how much data is allowed to be written */
};
/**
* Information to describe a vertex attribute (position, color, etc)
*/
struct pipe_vertex_element
{
/** Offset of this attribute, in bytes, from the start of the vertex */
unsigned src_offset:16;
/** Which vertex_buffer (as given to pipe->set_vertex_buffer()) does
* this attribute live in?
*/
unsigned vertex_buffer_index:5;
enum pipe_format src_format:11;
/** Instance data rate divisor. 0 means this is per-vertex data,
* n means per-instance data used for n consecutive instances (n > 0).
*/
unsigned instance_divisor;
};
struct pipe_draw_indirect_info
{
unsigned offset; /**< must be 4 byte aligned */
unsigned stride; /**< must be 4 byte aligned */
unsigned draw_count; /**< number of indirect draws */
unsigned indirect_draw_count_offset; /**< must be 4 byte aligned */
/* Indirect draw parameters resource is laid out as follows:
*
* if using indexed drawing:
* struct {
* uint32_t count;
* uint32_t instance_count;
* uint32_t start;
* int32_t index_bias;
* uint32_t start_instance;
* };
* otherwise:
* struct {
* uint32_t count;
* uint32_t instance_count;
* uint32_t start;
* uint32_t start_instance;
* };
*/
struct pipe_resource *buffer;
/* Indirect draw count resource: If not NULL, contains a 32-bit value which
* is to be used as the real draw_count.
*/
struct pipe_resource *indirect_draw_count;
};
/**
* Information to describe a draw_vbo call.
*/
struct pipe_draw_info
{
ubyte index_size; /**< if 0, the draw is not indexed. */
enum pipe_prim_type mode:8; /**< the mode of the primitive */
unsigned primitive_restart:1;
unsigned has_user_indices:1; /**< if true, use index.user_buffer */
ubyte vertices_per_patch; /**< the number of vertices per patch */
/**
* Direct draws: start is the index of the first vertex
* Non-indexed indirect draws: not used
* Indexed indirect draws: start is added to the indirect start.
*/
unsigned start;
unsigned count; /**< number of vertices */
unsigned start_instance; /**< first instance id */
unsigned instance_count; /**< number of instances */
unsigned drawid; /**< id of this draw in a multidraw */
/**
* For indexed drawing, these fields apply after index lookup.
*/
int index_bias; /**< a bias to be added to each index */
unsigned min_index; /**< the min index */
unsigned max_index; /**< the max index */
/**
* Primitive restart enable/index (only applies to indexed drawing)
*/
unsigned restart_index;
/* Pointers must be at the end for an optimal structure layout on 64-bit. */
/**
* An index buffer. When an index buffer is bound, all indices to vertices
* will be looked up from the buffer.
*
* If has_user_indices, use index.user, else use index.resource.
*/
union {
struct pipe_resource *resource; /**< real buffer */
const void *user; /**< pointer to a user buffer */
} index;
struct pipe_draw_indirect_info *indirect; /**< Indirect draw. */
/**
* Stream output target. If not NULL, it's used to provide the 'count'
* parameter based on the number vertices captured by the stream output
* stage. (or generally, based on the number of bytes captured)
*
* Only 'mode', 'start_instance', and 'instance_count' are taken into
* account, all the other variables from pipe_draw_info are ignored.
*
* 'start' is implicitly 0 and 'count' is set as discussed above.
* The draw command is non-indexed.
*
* Note that this only provides the count. The vertex buffers must
* be set via set_vertex_buffers manually.
*/
struct pipe_stream_output_target *count_from_stream_output;
};
/**
* Information to describe a blit call.
*/
struct pipe_blit_info
{
struct {
struct pipe_resource *resource;
unsigned level;
struct pipe_box box; /**< negative width, height only legal for src */
/* For pipe_surface-like format casting: */
enum pipe_format format; /**< must be supported for sampling (src)
or rendering (dst), ZS is always supported */
} dst, src;
unsigned mask; /**< bitmask of PIPE_MASK_R/G/B/A/Z/S */
unsigned filter; /**< PIPE_TEX_FILTER_* */
bool scissor_enable;
struct pipe_scissor_state scissor;
/* Window rectangles can either be inclusive or exclusive. */
bool window_rectangle_include;
unsigned num_window_rectangles;
struct pipe_scissor_state window_rectangles[PIPE_MAX_WINDOW_RECTANGLES];
bool render_condition_enable; /**< whether the blit should honor the
current render condition */
bool alpha_blend; /* dst.rgb = src.rgb * src.a + dst.rgb * (1 - src.a) */
};
/**
* Information to describe a launch_grid call.
*/
struct pipe_grid_info
{
/**
* For drivers that use PIPE_SHADER_IR_NATIVE as their prefered IR, this
* value will be the index of the kernel in the opencl.kernels metadata
* list.
*/
uint32_t pc;
/**
* Will be used to initialize the INPUT resource, and it should point to a
* buffer of at least pipe_compute_state::req_input_mem bytes.
*/
void *input;
/**
* Grid number of dimensions, 1-3, e.g. the work_dim parameter passed to
* clEnqueueNDRangeKernel. Note block[] and grid[] must be padded with
* 1 for non-used dimensions.
*/
uint work_dim;
/**
* Determine the layout of the working block (in thread units) to be used.
*/
uint block[3];
/**
* last_block allows disabling threads at the farthermost grid boundary.
* Full blocks as specified by "block" are launched, but the threads
* outside of "last_block" dimensions are disabled.
*
* If a block touches the grid boundary in the i-th axis, threads with
* THREAD_ID[i] >= last_block[i] are disabled.
*
* If last_block[i] is 0, it has the same behavior as last_block[i] = block[i],
* meaning no effect.
*
* It's equivalent to doing this at the beginning of the compute shader:
*
* for (i = 0; i < 3; i++) {
* if (block_id[i] == grid[i] - 1 &&
* last_block[i] && thread_id[i] >= last_block[i])
* return;
* }
*/
uint last_block[3];
/**
* Determine the layout of the grid (in block units) to be used.
*/
uint grid[3];
/* Indirect compute parameters resource: If not NULL, block sizes are taken
* from this buffer instead, which is laid out as follows:
*
* struct {
* uint32_t num_blocks_x;
* uint32_t num_blocks_y;
* uint32_t num_blocks_z;
* };
*/
struct pipe_resource *indirect;
unsigned indirect_offset; /**< must be 4 byte aligned */
};
/**
* Structure used as a header for serialized compute programs.
*/
struct pipe_binary_program_header
{
uint32_t num_bytes; /**< Number of bytes in the LLVM bytecode program. */
char blob[];
};
struct pipe_compute_state
{
enum pipe_shader_ir ir_type; /**< IR type contained in prog. */
const void *prog; /**< Compute program to be executed. */
unsigned req_local_mem; /**< Required size of the LOCAL resource. */
unsigned req_private_mem; /**< Required size of the PRIVATE resource. */
unsigned req_input_mem; /**< Required size of the INPUT resource. */
};
/**
* Structure that contains a callback for debug messages from the driver back
* to the state tracker.
*/
struct pipe_debug_callback
{
/**
* When set to \c true, the callback may be called asynchronously from a
* driver-created thread.
*/
bool async;
/**
* Callback for the driver to report debug/performance/etc information back
* to the state tracker.
*
* \param data user-supplied data pointer
* \param id message type identifier, if pointed value is 0, then a
* new id is assigned
* \param type PIPE_DEBUG_TYPE_*
* \param format printf-style format string
* \param args args for format string
*/
void (*debug_message)(void *data,
unsigned *id,
enum pipe_debug_type type,
const char *fmt,
va_list args);
void *data;
};
/**
* Structure that contains a callback for device reset messages from the driver
* back to the state tracker.
*
* The callback must not be called from driver-created threads.
*/
struct pipe_device_reset_callback
{
/**
* Callback for the driver to report when a device reset is detected.
*
* \param data user-supplied data pointer
* \param status PIPE_*_RESET
*/
void (*reset)(void *data, enum pipe_reset_status status);
void *data;
};
/**
* Information about memory usage. All sizes are in kilobytes.
*/
struct pipe_memory_info
{
unsigned total_device_memory; /**< size of device memory, e.g. VRAM */
unsigned avail_device_memory; /**< free device memory at the moment */
unsigned total_staging_memory; /**< size of staging memory, e.g. GART */
unsigned avail_staging_memory; /**< free staging memory at the moment */
unsigned device_memory_evicted; /**< size of memory evicted (monotonic counter) */
unsigned nr_device_memory_evictions; /**< # of evictions (monotonic counter) */
};
/**
* Structure that contains information about external memory
*/
struct pipe_memory_object
{
bool dedicated;
};
#ifdef __cplusplus
}
#endif
#endif

Просмотреть файл

@ -77,7 +77,7 @@
#include "glheader.h"
#include "imports.h"
#include "accum.h"
#include "api_exec.h"
#include "api_loopback.h"
@ -152,6 +152,7 @@
#include "compiler/glsl/builtin_functions.h"
#include "compiler/glsl/glsl_parser_extras.h"
#include <stdbool.h>
#include "util/u_memory.h"
#ifndef MESA_VERBOSE
@ -181,7 +182,7 @@ _mesa_notifySwapBuffers(struct gl_context *ctx)
{
if (MESA_VERBOSE & VERBOSE_SWAPBUFFERS)
_mesa_debug(ctx, "SwapBuffers\n");
FLUSH_CURRENT( ctx, 0 );
FLUSH_VERTICES(ctx, 0);
if (ctx->Driver.Flush) {
ctx->Driver.Flush(ctx);
}
@ -367,15 +368,15 @@ one_time_fini(void)
*
* \sa _math_init().
*/
static void
one_time_init( struct gl_context *ctx )
void
_mesa_initialize(void)
{
static GLbitfield api_init_mask = 0x0;
static bool initialized;
mtx_lock(&OneTimeLock);
/* truly one-time init */
if (!api_init_mask) {
if (!initialized) {
GLuint i;
STATIC_ASSERT(sizeof(GLbyte) == 1);
@ -387,7 +388,7 @@ one_time_init( struct gl_context *ctx )
_mesa_locale_init();
_mesa_one_time_init_extension_overrides(ctx);
_mesa_one_time_init_extension_overrides();
_mesa_get_cpu_features();
@ -399,7 +400,7 @@ one_time_init( struct gl_context *ctx )
#if defined(DEBUG)
if (MESA_VERBOSE != 0) {
_mesa_debug(ctx, "Mesa " PACKAGE_VERSION " DEBUG build" MESA_GIT_SHA1 "\n");
_mesa_debug(NULL, "Mesa " PACKAGE_VERSION " DEBUG build" MESA_GIT_SHA1 "\n");
}
#endif
@ -407,14 +408,11 @@ one_time_init( struct gl_context *ctx )
* unecessary creation/destruction of glsl types.
*/
glsl_type_singleton_init_or_ref();
}
/* per-API one-time init */
if (!(api_init_mask & (1 << ctx->API))) {
_mesa_init_remap_table();
}
api_init_mask |= 1 << ctx->API;
initialized = true;
mtx_unlock(&OneTimeLock);
}
@ -754,6 +752,8 @@ _mesa_init_constants(struct gl_constants *consts, gl_api api)
consts->ConservativeRasterDilateRange[0] = 0.0;
consts->ConservativeRasterDilateRange[1] = 0.0;
consts->ConservativeRasterDilateGranularity = 0.0;
consts->glBeginEndBufferSize = 512 * 1024;
}
@ -1204,7 +1204,7 @@ _mesa_initialize_context(struct gl_context *ctx,
_mesa_override_gl_version(ctx);
/* misc one-time initializations */
one_time_init(ctx);
_mesa_initialize();
/* Plug in driver functions and context pointer here.
* This is important because when we call alloc_shared_state() below
@ -1763,6 +1763,7 @@ _mesa_make_current( struct gl_context *newCtx,
* changed since the last time this FBO was bound).
*/
_mesa_update_draw_buffers(newCtx);
_mesa_update_allow_draw_out_of_order(newCtx);
}
if (!newCtx->ReadBuffer || _mesa_is_winsys_fbo(newCtx->ReadBuffer)) {
_mesa_reference_framebuffer(&newCtx->ReadBuffer, readBuffer);
@ -1874,7 +1875,6 @@ void
_mesa_flush(struct gl_context *ctx)
{
FLUSH_VERTICES( ctx, 0 );
FLUSH_CURRENT( ctx, 0 );
if (ctx->Driver.Flush) {
ctx->Driver.Flush(ctx);
}
@ -1895,7 +1895,6 @@ _mesa_Finish(void)
ASSERT_OUTSIDE_BEGIN_END(ctx);
FLUSH_VERTICES(ctx, 0);
FLUSH_CURRENT(ctx, 0);
if (ctx->Driver.Finish) {
ctx->Driver.Finish(ctx);

Просмотреть файл

@ -50,7 +50,7 @@
#include "errors.h"
#include "imports.h"
#include "extensions.h"
#include "mtypes.h"
#include "vbo/vbo.h"
@ -66,7 +66,7 @@ struct _glapi_table;
/** \name Visual-related functions */
/*@{*/
extern struct gl_config *
_mesa_create_visual( GLboolean dbFlag,
GLboolean stereoFlag,
@ -107,6 +107,9 @@ _mesa_destroy_visual( struct gl_config *vis );
/** \name Context-related functions */
/*@{*/
extern void
_mesa_initialize(void);
extern GLboolean
_mesa_initialize_context( struct gl_context *ctx,
gl_api api,
@ -244,14 +247,20 @@ do { \
do { \
if (MESA_VERBOSE & VERBOSE_STATE) \
_mesa_debug(ctx, "FLUSH_FOR_DRAW in %s\n", __func__); \
if (ctx->Driver.NeedFlush) \
vbo_exec_FlushVertices(ctx, ctx->Driver.NeedFlush); \
if (ctx->Driver.NeedFlush) { \
if (ctx->_AllowDrawOutOfOrder) { \
if (ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) \
vbo_exec_FlushVertices(ctx, FLUSH_UPDATE_CURRENT); \
} else { \
vbo_exec_FlushVertices(ctx, ctx->Driver.NeedFlush); \
} \
} \
} while (0)
/**
* Macro to assert that the API call was made outside the
* glBegin()/glEnd() pair, with return value.
*
*
* \param ctx GL context.
* \param retval value to return in case the assertion fails.
*/
@ -266,7 +275,7 @@ do { \
/**
* Macro to assert that the API call was made outside the
* glBegin()/glEnd() pair.
*
*
* \param ctx GL context.
*/
#define ASSERT_OUTSIDE_BEGIN_END(ctx) \

Просмотреть файл

@ -34,6 +34,7 @@
#include "glheader.h"
#include "formats.h"
#include "menums.h"
#include "compiler/shader_enums.h"
struct gl_bitmap_atlas;
struct gl_buffer_object;
@ -70,6 +71,9 @@ struct _mesa_index_buffer;
*/
#define MESA_MAP_NOWAIT_BIT 0x4000
/* Mapping a buffer is allowed from any thread. */
#define MESA_MAP_THREAD_SAFE_BIT 0x8000
/**
* Device driver function table.
@ -450,7 +454,8 @@ struct dd_function_table {
*/
/*@{*/
/** Allocate a new program */
struct gl_program * (*NewProgram)(struct gl_context *ctx, GLenum target,
struct gl_program * (*NewProgram)(struct gl_context *ctx,
gl_shader_stage stage,
GLuint id, bool is_arb_asm);
/** Delete a program */
void (*DeleteProgram)(struct gl_context *ctx, struct gl_program *prog);
@ -528,6 +533,8 @@ struct dd_function_table {
* \param index_bounds_valid are min_index and max_index valid?
* \param min_index lowest vertex index used
* \param max_index highest vertex index used
* \param num_instances instance count from ARB_draw_instanced
* \param base_instance base instance from ARB_base_instance
* \param tfb_vertcount if non-null, indicates which transform feedback
* object has the vertex count.
* \param tfb_stream If called via DrawTransformFeedbackStream, specifies
@ -542,8 +549,9 @@ struct dd_function_table {
const struct _mesa_index_buffer *ib,
GLboolean index_bounds_valid,
GLuint min_index, GLuint max_index,
GLuint num_instances, GLuint base_instance,
struct gl_transform_feedback_object *tfb_vertcount,
unsigned tfb_stream, struct gl_buffer_object *indirect);
unsigned tfb_stream);
/**

Просмотреть файл

@ -42,21 +42,26 @@ struct gl_context;
struct _mesa_prim
{
GLuint mode:8; /**< GL_POINTS, GL_LINES, GL_QUAD_STRIP, etc */
GLuint indexed:1;
GLuint begin:1;
GLuint end:1;
GLuint is_indirect:1;
GLuint pad:20;
GLubyte mode; /**< GL_POINTS, GL_LINES, GL_QUAD_STRIP, etc */
/**
* tnl: If true, line stipple emulation will reset the pattern walker.
* vbo: If false and the primitive is a line loop, the first vertex is
* the beginning of the line loop and it won't be drawn.
* Instead, it will be moved to the end.
*/
bool begin;
/**
* tnl: If true and the primitive is a line loop, it will be closed.
* vbo: Same as tnl.
*/
bool end;
GLuint start;
GLuint count;
GLint basevertex;
GLuint num_instances;
GLuint base_instance;
GLuint draw_id;
GLsizeiptr indirect_offset;
};
/* Would like to call this a "vbo_index_buffer", but this would be
@ -66,7 +71,7 @@ struct _mesa_prim
struct _mesa_index_buffer
{
GLuint count;
unsigned index_size;
uint8_t index_size_shift; /* logbase2(index_size) */
struct gl_buffer_object *obj;
const void *ptr;
};
@ -76,25 +81,88 @@ void
_mesa_initialize_exec_dispatch(const struct gl_context *ctx,
struct _glapi_table *exec);
void GLAPIENTRY
_mesa_EvalMesh1(GLenum mode, GLint i1, GLint i2);
void
_mesa_draw_indirect(struct gl_context *ctx, GLuint mode,
struct gl_buffer_object *indirect_data,
GLsizeiptr indirect_offset, unsigned draw_count,
unsigned stride,
struct gl_buffer_object *indirect_draw_count_buffer,
GLsizeiptr indirect_draw_count_offset,
const struct _mesa_index_buffer *ib);
void GLAPIENTRY
_mesa_EvalMesh2(GLenum mode, GLint i1, GLint i2, GLint j1, GLint j2);
void GLAPIENTRY
_mesa_DrawElementsInstancedARB(GLenum mode, GLsizei count, GLenum type,
const GLvoid * indices, GLsizei numInstances);
void GLAPIENTRY
_mesa_DrawArraysInstancedBaseInstance(GLenum mode, GLint first,
GLsizei count, GLsizei numInstances,
GLuint baseInstance);
void GLAPIENTRY
_mesa_DrawElementsInstancedBaseVertex(GLenum mode, GLsizei count,
GLenum type, const GLvoid * indices,
GLsizei numInstances,
GLint basevertex);
void GLAPIENTRY
_mesa_DrawElementsInstancedBaseInstance(GLenum mode, GLsizei count,
GLenum type,
const GLvoid *indices,
GLsizei numInstances,
GLuint baseInstance);
void GLAPIENTRY
_mesa_DrawTransformFeedbackStream(GLenum mode, GLuint name, GLuint stream);
void GLAPIENTRY
_mesa_DrawTransformFeedbackInstanced(GLenum mode, GLuint name,
GLsizei primcount);
void GLAPIENTRY
_mesa_DrawTransformFeedbackStreamInstanced(GLenum mode, GLuint name,
GLuint stream,
GLsizei primcount);
void GLAPIENTRY
_mesa_DrawArraysIndirect(GLenum mode, const GLvoid *indirect);
void GLAPIENTRY
_mesa_DrawElementsIndirect(GLenum mode, GLenum type, const GLvoid *indirect);
void GLAPIENTRY
_mesa_MultiDrawArraysIndirect(GLenum mode, const GLvoid *indirect,
GLsizei primcount, GLsizei stride);
void GLAPIENTRY
_mesa_MultiDrawElementsIndirect(GLenum mode, GLenum type,
const GLvoid *indirect,
GLsizei primcount, GLsizei stride);
void GLAPIENTRY
_mesa_MultiDrawArraysIndirectCountARB(GLenum mode, GLintptr indirect,
GLintptr drawcount_offset,
GLsizei maxdrawcount, GLsizei stride);
void GLAPIENTRY
_mesa_MultiDrawElementsIndirectCountARB(GLenum mode, GLenum type,
GLintptr indirect,
GLintptr drawcount_offset,
GLsizei maxdrawcount, GLsizei stride);
void GLAPIENTRY
_mesa_DrawArrays(GLenum mode, GLint first, GLsizei count);
void GLAPIENTRY
_mesa_DrawArraysInstanced(GLenum mode, GLint first, GLsizei count,
GLsizei primcount);
_mesa_DrawArraysInstancedARB(GLenum mode, GLint first, GLsizei count,
GLsizei primcount);
void GLAPIENTRY
_mesa_DrawElementsInstancedBaseVertexBaseInstance(GLenum mode,
GLsizei count,
GLenum type,
const GLvoid *indices,
GLsizei numInstances,
GLint basevertex,
GLuint baseInstance);
void GLAPIENTRY
_mesa_DrawElements(GLenum mode, GLsizei count, GLenum type,

Просмотреть файл

@ -47,7 +47,7 @@ struct gl_extensions;
extern void _mesa_enable_sw_extensions(struct gl_context *ctx);
extern void _mesa_one_time_init_extension_overrides(struct gl_context *ctx);
extern void _mesa_one_time_init_extension_overrides(void);
extern void _mesa_init_extensions(struct gl_extensions *extentions);

Просмотреть файл

@ -234,7 +234,7 @@ EXT(EXT_draw_buffers , dummy_true
EXT(EXT_draw_buffers2 , EXT_draw_buffers2 , GLL, GLC, x , x , 2006)
EXT(EXT_draw_buffers_indexed , ARB_draw_buffers_blend , x , x , x , 30, 2014)
EXT(EXT_draw_elements_base_vertex , ARB_draw_elements_base_vertex , x , x , x , ES2, 2014)
EXT(EXT_draw_instanced , ARB_draw_instanced , GLL, GLC, x , x , 2006)
EXT(EXT_draw_instanced , ARB_draw_instanced , GLL, GLC, x , ES2 , 2006)
EXT(EXT_draw_range_elements , dummy_true , GLL, x , x , x , 1997)
EXT(EXT_float_blend , EXT_float_blend , x , x , x , 30, 2015)
EXT(EXT_fog_coord , dummy_true , GLL, x , x , x , 1999)
@ -343,6 +343,7 @@ EXT(IBM_texture_mirrored_repeat , dummy_true
EXT(INGR_blend_func_separate , EXT_blend_func_separate , GLL, x , x , x , 1999)
EXT(INTEL_blackhole_render , INTEL_blackhole_render , 30, 30, x , ES2, 2018)
EXT(INTEL_conservative_rasterization , INTEL_conservative_rasterization , x , GLC, x , 31, 2013)
EXT(INTEL_performance_query , INTEL_performance_query , GLL, GLC, x , ES2, 2013)
EXT(INTEL_shader_atomic_float_minmax , INTEL_shader_atomic_float_minmax , GLL, GLC, x , x , 2018)
@ -370,6 +371,7 @@ EXT(MESA_ycbcr_texture , MESA_ycbcr_texture
EXT(NVX_gpu_memory_info , NVX_gpu_memory_info , GLL, GLC, x , x , 2013)
EXT(NV_alpha_to_coverage_dither_control , NV_alpha_to_coverage_dither_control , GLL, GLC, x , ES2, 2017)
EXT(NV_blend_square , dummy_true , GLL, x , x , x , 1999)
EXT(NV_compute_shader_derivatives , NV_compute_shader_derivatives , GLL, GLC, x , 32, 2018)
EXT(NV_conditional_render , NV_conditional_render , GLL, GLC, x , ES2, 2008)
@ -377,6 +379,7 @@ EXT(NV_conservative_raster , NV_conservative_raster
EXT(NV_conservative_raster_dilate , NV_conservative_raster_dilate , GLL, GLC, ES1, ES2, 2015)
EXT(NV_conservative_raster_pre_snap , NV_conservative_raster_pre_snap , GLL, GLC, ES1, ES2, 2017)
EXT(NV_conservative_raster_pre_snap_triangles, NV_conservative_raster_pre_snap_triangles, GLL, GLC, ES1, ES2, 2015)
EXT(NV_copy_image , NV_copy_image , GLL, GLC, x , x, 2009)
EXT(NV_depth_clamp , ARB_depth_clamp , GLL, GLC, x , x , 2001)
EXT(NV_draw_buffers , dummy_true , x , x , x , ES2, 2011)
EXT(NV_fbo_color_attachments , dummy_true , x , x , x , ES2, 2010)
@ -386,6 +389,7 @@ EXT(NV_fragment_shader_interlock , ARB_fragment_shader_interlock
EXT(NV_image_formats , ARB_shader_image_load_store , x , x , x , 31, 2014)
EXT(NV_light_max_exponent , dummy_true , GLL, x , x , x , 1999)
EXT(NV_packed_depth_stencil , dummy_true , GLL, GLC, x , x , 2000)
EXT(NV_pixel_buffer_object , EXT_pixel_buffer_object , x , x , x , ES2, 2012)
EXT(NV_point_sprite , NV_point_sprite , GLL, GLC, x , x , 2001)
EXT(NV_primitive_restart , NV_primitive_restart , GLL, x , x , x , 2002)
EXT(NV_read_buffer , dummy_true , x , x , x , ES2, 2011)
@ -399,6 +403,8 @@ EXT(NV_texture_barrier , NV_texture_barrier
EXT(NV_texture_env_combine4 , NV_texture_env_combine4 , GLL, x , x , x , 1999)
EXT(NV_texture_rectangle , NV_texture_rectangle , GLL, x , x , x , 2000)
EXT(NV_vdpau_interop , NV_vdpau_interop , GLL, GLC, x , x , 2010)
EXT(NV_viewport_array2 , NV_viewport_array2 , GLL, GLC, x , 31, 2015)
EXT(NV_viewport_swizzle , NV_viewport_swizzle , GLL, GLC, x , 31, 2015)
EXT(OES_EGL_image , OES_EGL_image , GLL, GLC, ES1, ES2, 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
EXT(OES_EGL_image_external , OES_EGL_image_external , x , x , ES1, ES2, 2010)

Просмотреть файл

@ -619,8 +619,12 @@ typedef enum pipe_format mesa_format;
/* Packed to array format adapters */
#if UTIL_ARCH_LITTLE_ENDIAN
#define MESA_FORMAT_RGBA_UINT8 MESA_FORMAT_R8G8B8A8_UINT
#define MESA_FORMAT_RGBA_UNORM8 MESA_FORMAT_R8G8B8A8_UNORM
#define MESA_FORMAT_RGBA_SNORM8 MESA_FORMAT_R8G8B8A8_SNORM
#else
#define MESA_FORMAT_RGBA_UINT8 MESA_FORMAT_A8B8G8R8_UINT
#define MESA_FORMAT_RGBA_UNORM8 MESA_FORMAT_A8B8G8R8_UNORM
#define MESA_FORMAT_RGBA_SNORM8 MESA_FORMAT_A8B8G8R8_SNORM
#endif
extern const char *

142
third_party/rust/glslopt/glsl-optimizer/src/mesa/main/glthread.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,142 @@
/*
* Copyright © 2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef _GLTHREAD_H
#define _GLTHREAD_H
/* The size of one batch and the maximum size of one call.
*
* This should be as low as possible, so that:
* - multiple synchronizations within a frame don't slow us down much
* - a smaller number of calls per frame can still get decent parallelism
* - the memory footprint of the queue is low, and with that comes a lower
* chance of experiencing CPU cache thrashing
* but it should be high enough so that u_queue overhead remains negligible.
*/
#define MARSHAL_MAX_CMD_SIZE (8 * 1024)
/* The number of batch slots in memory.
*
* One batch is being executed, one batch is being filled, the rest are
* waiting batches. There must be at least 1 slot for a waiting batch,
* so the minimum number of batches is 3.
*/
#define MARSHAL_MAX_BATCHES 8
#include <inttypes.h>
#include <stdbool.h>
#include "util/u_queue.h"
#include "GL/gl.h"
#include "compiler/shader_enums.h"
struct gl_context;
struct _mesa_HashTable;
struct glthread_vao {
GLuint Name;
GLuint CurrentElementBufferName;
GLbitfield Enabled;
GLbitfield UserPointerMask;
};
/** A single batch of commands queued up for execution. */
struct glthread_batch
{
/** Batch fence for waiting for the execution to finish. */
struct util_queue_fence fence;
/** The worker thread will access the context with this. */
struct gl_context *ctx;
/** Amount of data used by batch commands, in bytes. */
int used;
/** Data contained in the command buffer. */
#ifdef _MSC_VER
__declspec(align(8))
#else
__attribute__((aligned(8)))
#endif
uint8_t buffer[MARSHAL_MAX_CMD_SIZE];
};
struct glthread_state
{
/** Multithreaded queue. */
struct util_queue queue;
/** This is sent to the driver for framebuffer overlay / HUD. */
struct util_queue_monitoring stats;
/** Whether GLThread is enabled. */
bool enabled;
/** The ring of batches in memory. */
struct glthread_batch batches[MARSHAL_MAX_BATCHES];
/** Pointer to the batch currently being filled. */
struct glthread_batch *next_batch;
/** Index of the last submitted batch. */
unsigned last;
/** Index of the batch being filled and about to be submitted. */
unsigned next;
/** Vertex Array objects tracked by glthread independently of Mesa. */
struct _mesa_HashTable *VAOs;
struct glthread_vao *CurrentVAO;
struct glthread_vao *LastLookedUpVAO;
struct glthread_vao DefaultVAO;
int ClientActiveTexture;
/** Currently-bound buffer object IDs. */
GLuint CurrentArrayBufferName;
GLuint CurrentDrawIndirectBufferName;
};
void _mesa_glthread_init(struct gl_context *ctx);
void _mesa_glthread_destroy(struct gl_context *ctx);
void _mesa_glthread_restore_dispatch(struct gl_context *ctx, const char *func);
void _mesa_glthread_disable(struct gl_context *ctx, const char *func);
void _mesa_glthread_flush_batch(struct gl_context *ctx);
void _mesa_glthread_finish(struct gl_context *ctx);
void _mesa_glthread_finish_before(struct gl_context *ctx, const char *func);
void _mesa_glthread_BindBuffer(struct gl_context *ctx, GLenum target,
GLuint buffer);
void _mesa_glthread_DeleteBuffers(struct gl_context *ctx, GLsizei n,
const GLuint *buffers);
void _mesa_glthread_BindVertexArray(struct gl_context *ctx, GLuint id);
void _mesa_glthread_DeleteVertexArrays(struct gl_context *ctx,
GLsizei n, const GLuint *ids);
void _mesa_glthread_GenVertexArrays(struct gl_context *ctx,
GLsizei n, GLuint *arrays);
void _mesa_glthread_ClientState(struct gl_context *ctx, GLuint *vaobj,
gl_vert_attrib attrib, bool enable);
void _mesa_glthread_AttribPointer(struct gl_context *ctx,
gl_vert_attrib attrib);
#endif /* _GLTHREAD_H*/

Просмотреть файл

@ -1,6 +1,6 @@
/**
* \file hash.h
* Generic hash table.
* Generic hash table.
*/
/*
@ -32,8 +32,10 @@
#define HASH_H
#include <stdbool.h>
#include <stdint.h>
#include "glheader.h"
#include "imports.h"
#include "c11/threads.h"
/**

Просмотреть файл

@ -1,234 +0,0 @@
/**
* \file imports.c
* Standard C library function wrappers.
*
* Imports are services which the device driver or window system or
* operating system provides to the core renderer. The core renderer (Mesa)
* will call these functions in order to do memory allocation, simple I/O,
* etc.
*
* Some drivers will want to override/replace this file with something
* specialized, but that'll be rare.
*
* Eventually, I want to move roll the glheader.h file into this.
*
* \todo Functions still needed:
* - scanf
* - qsort
* - rand and RAND_MAX
*/
/*
* Mesa 3-D graphics library
*
* Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <stdio.h>
#include <stdarg.h>
#include "c99_math.h"
#include "imports.h"
#include "context.h"
#include "version.h"
#ifdef _GNU_SOURCE
#include <locale.h>
#ifdef __APPLE__
#include <xlocale.h>
#endif
#endif
#ifdef _WIN32
#define vsnprintf _vsnprintf
#elif defined(__IBMC__) || defined(__IBMCPP__)
extern int vsnprintf(char *str, size_t count, const char *fmt, va_list arg);
#endif
/**********************************************************************/
/** \name Memory */
/*@{*/
/**
* Allocate aligned memory.
*
* \param bytes number of bytes to allocate.
* \param alignment alignment (must be greater than zero).
*
* Allocates extra memory to accommodate rounding up the address for
* alignment and to record the real malloc address.
*
* \sa _mesa_align_free().
*/
void *
_mesa_align_malloc(size_t bytes, unsigned long alignment)
{
#if defined(HAVE_POSIX_MEMALIGN)
void *mem;
int err = posix_memalign(& mem, alignment, bytes);
if (err)
return NULL;
return mem;
#elif defined(_WIN32)
return _aligned_malloc(bytes, alignment);
#else
uintptr_t ptr, buf;
assert( alignment > 0 );
ptr = (uintptr_t)malloc(bytes + alignment + sizeof(void *));
if (!ptr)
return NULL;
buf = (ptr + alignment + sizeof(void *)) & ~(uintptr_t)(alignment - 1);
*(uintptr_t *)(buf - sizeof(void *)) = ptr;
#ifndef NDEBUG
/* mark the non-aligned area */
while ( ptr < buf - sizeof(void *) ) {
*(unsigned long *)ptr = 0xcdcdcdcd;
ptr += sizeof(unsigned long);
}
#endif
return (void *) buf;
#endif /* defined(HAVE_POSIX_MEMALIGN) */
}
/**
* Same as _mesa_align_malloc(), but using calloc(1, ) instead of
* malloc()
*/
void *
_mesa_align_calloc(size_t bytes, unsigned long alignment)
{
#if defined(HAVE_POSIX_MEMALIGN)
void *mem;
mem = _mesa_align_malloc(bytes, alignment);
if (mem != NULL) {
(void) memset(mem, 0, bytes);
}
return mem;
#elif defined(_WIN32)
void *mem;
mem = _aligned_malloc(bytes, alignment);
if (mem != NULL) {
(void) memset(mem, 0, bytes);
}
return mem;
#else
uintptr_t ptr, buf;
assert( alignment > 0 );
ptr = (uintptr_t)calloc(1, bytes + alignment + sizeof(void *));
if (!ptr)
return NULL;
buf = (ptr + alignment + sizeof(void *)) & ~(uintptr_t)(alignment - 1);
*(uintptr_t *)(buf - sizeof(void *)) = ptr;
#ifndef NDEBUG
/* mark the non-aligned area */
while ( ptr < buf - sizeof(void *) ) {
*(unsigned long *)ptr = 0xcdcdcdcd;
ptr += sizeof(unsigned long);
}
#endif
return (void *)buf;
#endif /* defined(HAVE_POSIX_MEMALIGN) */
}
/**
* Free memory which was allocated with either _mesa_align_malloc()
* or _mesa_align_calloc().
* \param ptr pointer to the memory to be freed.
* The actual address to free is stored in the word immediately before the
* address the client sees.
* Note that it is legal to pass NULL pointer to this function and will be
* handled accordingly.
*/
void
_mesa_align_free(void *ptr)
{
#if defined(HAVE_POSIX_MEMALIGN)
free(ptr);
#elif defined(_WIN32)
_aligned_free(ptr);
#else
if (ptr) {
void **cubbyHole = (void **) ((char *) ptr - sizeof(void *));
void *realAddr = *cubbyHole;
free(realAddr);
}
#endif /* defined(HAVE_POSIX_MEMALIGN) */
}
/**
* Reallocate memory, with alignment.
*/
void *
_mesa_align_realloc(void *oldBuffer, size_t oldSize, size_t newSize,
unsigned long alignment)
{
#if defined(_WIN32)
(void) oldSize;
return _aligned_realloc(oldBuffer, newSize, alignment);
#else
const size_t copySize = (oldSize < newSize) ? oldSize : newSize;
void *newBuf = _mesa_align_malloc(newSize, alignment);
if (newBuf && oldBuffer && copySize > 0) {
memcpy(newBuf, oldBuffer, copySize);
}
_mesa_align_free(oldBuffer);
return newBuf;
#endif
}
/*@}*/
/** Needed due to #ifdef's, above. */
int
_mesa_vsnprintf(char *str, size_t size, const char *fmt, va_list args)
{
return vsnprintf( str, size, fmt, args);
}
/** Wrapper around vsnprintf() */
int
_mesa_snprintf( char *str, size_t size, const char *fmt, ... )
{
int r;
va_list args;
va_start( args, fmt );
r = vsnprintf( str, size, fmt, args );
va_end( args );
return r;
}

Просмотреть файл

@ -1,321 +0,0 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* \file imports.h
* Standard C library function wrappers.
*
* This file provides wrappers for all the standard C library functions
* like malloc(), free(), printf(), getenv(), etc.
*/
#ifndef IMPORTS_H
#define IMPORTS_H
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include "compiler.h"
#include "glheader.h"
#include "util/bitscan.h"
#ifdef __cplusplus
extern "C" {
#endif
/**********************************************************************/
/** Memory macros */
/*@{*/
/** Allocate a structure of type \p T */
#define MALLOC_STRUCT(T) (struct T *) malloc(sizeof(struct T))
/** Allocate and zero a structure of type \p T */
#define CALLOC_STRUCT(T) (struct T *) calloc(1, sizeof(struct T))
/*@}*/
/*
* For GL_ARB_vertex_buffer_object we need to treat vertex array pointers
* as offsets into buffer stores. Since the vertex array pointer and
* buffer store pointer are both pointers and we need to add them, we use
* this macro.
* Both pointers/offsets are expressed in bytes.
*/
#define ADD_POINTERS(A, B) ( (GLubyte *) (A) + (uintptr_t) (B) )
/**
* Sometimes we treat GLfloats as GLints. On x86 systems, moving a float
* as an int (thereby using integer registers instead of FP registers) is
* a performance win. Typically, this can be done with ordinary casts.
* But with gcc's -fstrict-aliasing flag (which defaults to on in gcc 3.0)
* these casts generate warnings.
* The following union typedef is used to solve that.
*/
typedef union { GLfloat f; GLint i; GLuint u; } fi_type;
/*@}*/
/***
*** LOG2: Log base 2 of float
***/
static inline GLfloat LOG2(GLfloat x)
{
#if 0
/* This is pretty fast, but not accurate enough (only 2 fractional bits).
* Based on code from http://www.stereopsis.com/log2.html
*/
const GLfloat y = x * x * x * x;
const GLuint ix = *((GLuint *) &y);
const GLuint exp = (ix >> 23) & 0xFF;
const GLint log2 = ((GLint) exp) - 127;
return (GLfloat) log2 * (1.0 / 4.0); /* 4, because of x^4 above */
#endif
/* Pretty fast, and accurate.
* Based on code from http://www.flipcode.com/totd/
*/
fi_type num;
GLint log_2;
num.f = x;
log_2 = ((num.i >> 23) & 255) - 128;
num.i &= ~(255 << 23);
num.i += 127 << 23;
num.f = ((-1.0f/3) * num.f + 2) * num.f - 2.0f/3;
return num.f + log_2;
}
/**
* finite macro.
*/
#if defined(_MSC_VER)
# define finite _finite
#endif
/***
*** IS_INF_OR_NAN: test if float is infinite or NaN
***/
#if defined(isfinite)
#define IS_INF_OR_NAN(x) (!isfinite(x))
#elif defined(finite)
#define IS_INF_OR_NAN(x) (!finite(x))
#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#define IS_INF_OR_NAN(x) (!isfinite(x))
#else
#define IS_INF_OR_NAN(x) (!finite(x))
#endif
/**
* Convert float to int by rounding to nearest integer, away from zero.
*/
static inline int IROUND(float f)
{
return (int) ((f >= 0.0F) ? (f + 0.5F) : (f - 0.5F));
}
/**
* Convert double to int by rounding to nearest integer, away from zero.
*/
static inline int IROUNDD(double d)
{
return (int) ((d >= 0.0) ? (d + 0.5) : (d - 0.5));
}
/**
* Convert float to int64 by rounding to nearest integer.
*/
static inline GLint64 IROUND64(float f)
{
return (GLint64) ((f >= 0.0F) ? (f + 0.5F) : (f - 0.5F));
}
/**
* Convert positive float to int by rounding to nearest integer.
*/
static inline int IROUND_POS(float f)
{
assert(f >= 0.0F);
return (int) (f + 0.5F);
}
/** Return (as an integer) floor of float */
static inline int IFLOOR(float f)
{
#if defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__)
/*
* IEEE floor for computers that round to nearest or even.
* 'f' must be between -4194304 and 4194303.
* This floor operation is done by "(iround(f + .5) + iround(f - .5)) >> 1",
* but uses some IEEE specific tricks for better speed.
* Contributed by Josh Vanderhoof
*/
int ai, bi;
double af, bf;
af = (3 << 22) + 0.5 + (double)f;
bf = (3 << 22) + 0.5 - (double)f;
/* GCC generates an extra fstp/fld without this. */
__asm__ ("fstps %0" : "=m" (ai) : "t" (af) : "st");
__asm__ ("fstps %0" : "=m" (bi) : "t" (bf) : "st");
return (ai - bi) >> 1;
#else
int ai, bi;
double af, bf;
fi_type u;
af = (3 << 22) + 0.5 + (double)f;
bf = (3 << 22) + 0.5 - (double)f;
u.f = (float) af; ai = u.i;
u.f = (float) bf; bi = u.i;
return (ai - bi) >> 1;
#endif
}
/**
* Is x a power of two?
*/
static inline int
_mesa_is_pow_two(int x)
{
return !(x & (x - 1));
}
/**
* Round given integer to next higer power of two
* If X is zero result is undefined.
*
* Source for the fallback implementation is
* Sean Eron Anderson's webpage "Bit Twiddling Hacks"
* http://graphics.stanford.edu/~seander/bithacks.html
*
* When using builtin function have to do some work
* for case when passed values 1 to prevent hiting
* undefined result from __builtin_clz. Undefined
* results would be different depending on optimization
* level used for build.
*/
static inline int32_t
_mesa_next_pow_two_32(uint32_t x)
{
#ifdef HAVE___BUILTIN_CLZ
uint32_t y = (x != 1);
return (1 + y) << ((__builtin_clz(x - y) ^ 31) );
#else
x--;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
x++;
return x;
#endif
}
static inline int64_t
_mesa_next_pow_two_64(uint64_t x)
{
#ifdef HAVE___BUILTIN_CLZLL
uint64_t y = (x != 1);
STATIC_ASSERT(sizeof(x) == sizeof(long long));
return (1 + y) << ((__builtin_clzll(x - y) ^ 63));
#else
x--;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
x |= x >> 32;
x++;
return x;
#endif
}
/*
* Returns the floor form of binary logarithm for a 32-bit integer.
*/
static inline GLuint
_mesa_logbase2(GLuint n)
{
#ifdef HAVE___BUILTIN_CLZ
return (31 - __builtin_clz(n | 1));
#else
GLuint pos = 0;
if (n >= 1<<16) { n >>= 16; pos += 16; }
if (n >= 1<< 8) { n >>= 8; pos += 8; }
if (n >= 1<< 4) { n >>= 4; pos += 4; }
if (n >= 1<< 2) { n >>= 2; pos += 2; }
if (n >= 1<< 1) { pos += 1; }
return pos;
#endif
}
/**********************************************************************
* Functions
*/
extern void *
_mesa_align_malloc( size_t bytes, unsigned long alignment );
extern void *
_mesa_align_calloc( size_t bytes, unsigned long alignment );
extern void
_mesa_align_free( void *ptr );
extern void *
_mesa_align_realloc(void *oldBuffer, size_t oldSize, size_t newSize,
unsigned long alignment);
extern int
_mesa_snprintf( char *str, size_t size, const char *fmt, ... ) PRINTFLIKE(3, 4);
extern int
_mesa_vsnprintf(char *str, size_t size, const char *fmt, va_list arg);
#if defined(_WIN32) && !defined(HAVE_STRTOK_R)
#define strtok_r strtok_s
#endif
#ifdef __cplusplus
}
#endif
#endif /* IMPORTS_H */

Просмотреть файл

@ -34,7 +34,9 @@
#include "util/macros.h"
#include "util/u_math.h"
#include "util/rounding.h"
#include "imports.h"
#include "util/compiler.h"
#include "main/glheader.h"
#include "mesa_private.h"
/**
@ -194,6 +196,30 @@ static inline fi_type FLOAT_AS_UNION(GLfloat f)
return tmp;
}
static inline uint64_t DOUBLE_AS_UINT64(double d)
{
union {
double d;
uint64_t u64;
} tmp;
tmp.d = d;
return tmp.u64;
}
static inline double UINT64_AS_DOUBLE(uint64_t u)
{
union {
double d;
uint64_t u64;
} tmp;
tmp.u64 = u;
return tmp.d;
}
/* First sign-extend x, then return uint32_t. */
#define INT_AS_UINT(x) ((uint32_t)((int32_t)(x)))
#define FLOAT_AS_UINT(x) (FLOAT_AS_UNION(x).u)
/**
* Convert a floating point value to an unsigned fixed point value.
*
@ -666,52 +692,6 @@ minify(unsigned value, unsigned levels)
return MAX2(1, value >> levels);
}
/**
* Align a value up to an alignment value
*
* If \c value is not already aligned to the requested alignment value, it
* will be rounded up.
*
* \param value Value to be rounded
* \param alignment Alignment value to be used. This must be a power of two.
*
* \sa ROUND_DOWN_TO()
*/
static inline uintptr_t
ALIGN(uintptr_t value, int32_t alignment)
{
assert((alignment > 0) && _mesa_is_pow_two(alignment));
return (((value) + (alignment) - 1) & ~((alignment) - 1));
}
/**
* Like ALIGN(), but works with a non-power-of-two alignment.
*/
static inline uintptr_t
ALIGN_NPOT(uintptr_t value, int32_t alignment)
{
assert(alignment > 0);
return (value + alignment - 1) / alignment * alignment;
}
/**
* Align a value down to an alignment value
*
* If \c value is not already aligned to the requested alignment value, it
* will be rounded down.
*
* \param value Value to be rounded
* \param alignment Alignment value to be used. This must be a power of two.
*
* \sa ALIGN()
*/
static inline uintptr_t
ROUND_DOWN_TO(uintptr_t value, int32_t alignment)
{
assert((alignment > 0) && _mesa_is_pow_two(alignment));
return ((value) & ~(alignment - 1));
}
/** Cross product of two 3-element vectors */
static inline void
@ -808,4 +788,13 @@ DIFFERENT_SIGNS(GLfloat x, GLfloat y)
/* Stringify */
#define STRINGIFY(x) #x
/*
* For GL_ARB_vertex_buffer_object we need to treat vertex array pointers
* as offsets into buffer stores. Since the vertex array pointer and
* buffer store pointer are both pointers and we need to add them, we use
* this macro.
* Both pointers/offsets are expressed in bytes.
*/
#define ADD_POINTERS(A, B) ( (GLubyte *) (A) + (uintptr_t) (B) )
#endif

Просмотреть файл

@ -0,0 +1,56 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* \file mesa_private.h
* Contains mesa internal values
*
*/
#ifndef MESA_PRIVATE_H
#define MESA_PRIVATE_H
#include "glheader.h"
#ifdef __cplusplus
extern "C" {
#endif
/**
* Sometimes we treat floats as ints. On x86 systems, moving a float
* as an int (thereby using integer registers instead of FP registers) is
* a performance win. Typically, this can be done with ordinary casts.
* But with gcc's -fstrict-aliasing flag (which defaults to on in gcc 3.0)
* these casts generate warnings.
* The following union typedef is used to solve that.
*/
typedef union { float f; int i; unsigned u; } fi_type;
#ifdef __cplusplus
}
#endif
#endif /* MESA_PRIVATE_H */

Просмотреть файл

@ -39,6 +39,7 @@
#include "c11/threads.h"
#include "main/glheader.h"
#include "main/glthread.h"
#include "main/menums.h"
#include "main/config.h"
#include "glapi/glapi.h"
@ -104,6 +105,7 @@ _mesa_varying_slot_in_fs(gl_varying_slot slot)
case VARYING_SLOT_TESS_LEVEL_INNER:
case VARYING_SLOT_BOUNDING_BOX0:
case VARYING_SLOT_BOUNDING_BOX1:
case VARYING_SLOT_VIEWPORT_MASK:
return GL_FALSE;
default:
return GL_TRUE;
@ -459,6 +461,7 @@ struct gl_vertex_format
{
GLenum16 Type; /**< datatype: GL_FLOAT, GL_INT, etc */
GLenum16 Format; /**< default: GL_RGBA, but may be GL_BGRA */
enum pipe_format _PipeFormat:16; /**< pipe_format for Gallium */
GLubyte Size:5; /**< components per element (1,2,3,4) */
GLubyte Normalized:1; /**< GL_ARB_vertex_program */
GLubyte Integer:1; /**< Integer-valued? */
@ -686,6 +689,9 @@ struct gl_multisample_attrib
/** The GL spec defines this as an array but >32x MSAA is madness */
GLbitfield SampleMaskValue;
/* NV_alpha_to_coverage_dither_control */
GLenum SampleAlphaToCoverageDitherControl;
};
@ -1310,6 +1316,9 @@ struct gl_viewport_attrib
GLfloat X, Y; /**< position */
GLfloat Width, Height; /**< size */
GLfloat Near, Far; /**< Depth buffer range */
/**< GL_NV_viewport_swizzle */
GLenum16 SwizzleX, SwizzleY, SwizzleZ, SwizzleW;
};
@ -1546,6 +1555,9 @@ struct gl_vertex_array_object
/** Mask indicating which vertex arrays have vertex buffer associated. */
GLbitfield VertexAttribBufferMask;
/** Mask indicating which vertex arrays have a non-zero instance divisor. */
GLbitfield NonZeroDivisorMask;
/** Mask of VERT_BIT_* values indicating which arrays are enabled */
GLbitfield Enabled;
@ -1558,6 +1570,9 @@ struct gl_vertex_array_object
*/
GLbitfield _EffEnabledVBO;
/** Same as _EffEnabledVBO, but for instance divisors. */
GLbitfield _EffEnabledNonZeroDivisor;
/** Denotes the way the position/generic0 attribute is mapped */
gl_attribute_map_mode _AttributeMapMode;
@ -1583,6 +1598,9 @@ struct gl_array_attrib
/** The last VAO accessed by a DSA function */
struct gl_vertex_array_object *LastLookedUpVAO;
/** These contents are copied to newly created VAOs. */
struct gl_vertex_array_object DefaultVAOState;
/** Array objects (GL_ARB_vertex_array_object) */
struct _mesa_HashTable *Objects;
@ -1601,6 +1619,7 @@ struct gl_array_attrib
GLboolean PrimitiveRestartFixedIndex;
GLboolean _PrimitiveRestart;
GLuint RestartIndex;
GLuint _RestartIndex[4]; /**< Restart indices for index_size - 1. */
/*@}*/
/* GL_ARB_vertex_buffer_object */
@ -2658,6 +2677,12 @@ struct gl_shader
bool bound_sampler;
bool bound_image;
/**
* Whether layer output is viewport-relative.
*/
bool redeclares_gl_layer;
bool layer_viewport_relative;
/** Global xfb_stride out qualifier if any */
GLuint TransformFeedbackBufferStride[MAX_FEEDBACK_BUFFERS];
@ -3167,6 +3192,15 @@ struct gl_shader_compiler_options
* gl_CullDistance together from
* float[8] to vec4[2]
**/
GLbitfield LowerBuiltinVariablesXfb; /**< Which builtin variables should
* be lowered for transform feedback
**/
/**
* If we can lower the precision of variables based on precision
* qualifiers
*/
GLboolean LowerPrecision;
/**
* \name Forms of indirect addressing the driver cannot do.
@ -3293,9 +3327,6 @@ struct gl_shared_state
GLuint TextureStateStamp; /**< state notification for shared tex */
/*@}*/
/** Default buffer object for vertex arrays that aren't in VBOs */
struct gl_buffer_object *NullBufferObj;
/**
* \name Vertex/geometry/fragment programs
*/
@ -3835,6 +3866,11 @@ struct gl_constants
*/
GLboolean GLSLZeroInit;
/**
* Treat integer textures using GL_LINEAR filters as GL_NEAREST.
*/
GLboolean ForceIntegerTexNearest;
/**
* Does the driver support real 32-bit integers? (Otherwise, integers are
* simulated via floats.)
@ -3967,6 +4003,15 @@ struct gl_constants
*/
GLboolean DisableVaryingPacking;
/**
* Disable varying packing if used for transform feedback. This is needed
* for some drivers (e.g. Panfrost) where transform feedback requires
* unpacked varyings.
*
* This variable is mutually exlusive with DisableVaryingPacking.
*/
GLboolean DisableTransformFeedbackPacking;
/**
* UBOs and SSBOs can be packed tightly by the OpenGL implementation when
* layout is set as shared (the default) or packed. However most Mesa drivers
@ -4026,51 +4071,6 @@ struct gl_constants
} SupportedMultisampleModes[40];
GLint NumSupportedMultisampleModes;
/**
* GL_EXT_texture_multisample_blit_scaled implementation assumes that
* samples are laid out in a rectangular grid roughly corresponding to
* sample locations within a pixel. Below SampleMap{2,4,8}x variables
* are used to map indices of rectangular grid to sample numbers within
* a pixel. This mapping of indices to sample numbers must be initialized
* by the driver for the target hardware. For example, if we have the 8X
* MSAA sample number layout (sample positions) for XYZ hardware:
*
* sample indices layout sample number layout
* --------- ---------
* | 0 | 1 | | a | b |
* --------- ---------
* | 2 | 3 | | c | d |
* --------- ---------
* | 4 | 5 | | e | f |
* --------- ---------
* | 6 | 7 | | g | h |
* --------- ---------
*
* Where a,b,c,d,e,f,g,h are integers between [0-7].
*
* Then, initialize the SampleMap8x variable for XYZ hardware as shown
* below:
* SampleMap8x = {a, b, c, d, e, f, g, h};
*
* Follow the logic for sample counts 2-8.
*
* For 16x the sample indices layout as a 4x4 grid as follows:
*
* -----------------
* | 0 | 1 | 2 | 3 |
* -----------------
* | 4 | 5 | 6 | 7 |
* -----------------
* | 8 | 9 |10 |11 |
* -----------------
* |12 |13 |14 |15 |
* -----------------
*/
uint8_t SampleMap2x[2];
uint8_t SampleMap4x[4];
uint8_t SampleMap8x[8];
uint8_t SampleMap16x[16];
/** GL_ARB_shader_atomic_counters */
GLuint MaxAtomicBufferBindings;
GLuint MaxAtomicBufferSize;
@ -4131,6 +4131,12 @@ struct gl_constants
/** When drivers are OK with mapped buffers during draw and other calls. */
bool AllowMappedBuffersDuringExecution;
/**
* Whether buffer creation, unsynchronized mapping, unmapping, and
* deletion is thread-safe.
*/
bool BufferCreateMapUnsynchronizedThreadSafe;
/** GL_ARB_get_program_binary */
GLuint NumProgramBinaryFormats;
@ -4150,6 +4156,15 @@ struct gl_constants
/** Wether or not glBitmap uses red textures rather than alpha */
bool BitmapUsesRed;
/** Whether the vertex buffer offset is a signed 32-bit integer. */
bool VertexBufferOffsetIsInt32;
/** Whether the driver can handle MultiDrawElements with non-VBO indices. */
bool MultiDrawWithUserIndices;
/** Whether out-of-order draw (Begin/End) optimizations are allowed. */
bool AllowDrawOutOfOrder;
/** GL_ARB_gl_spirv */
struct spirv_supported_capabilities SpirVCapabilities;
@ -4157,6 +4172,9 @@ struct gl_constants
struct spirv_supported_extensions *SpirVExtensions;
char *VendorOverride;
/** Buffer size used to upload vertices from glBegin/glEnd. */
unsigned glBeginEndBufferSize;
};
@ -4365,6 +4383,7 @@ struct gl_extensions
GLboolean ATI_texture_env_combine3;
GLboolean ATI_fragment_shader;
GLboolean GREMEDY_string_marker;
GLboolean INTEL_blackhole_render;
GLboolean INTEL_conservative_rasterization;
GLboolean INTEL_performance_query;
GLboolean INTEL_shader_atomic_float_minmax;
@ -4382,8 +4401,10 @@ struct gl_extensions
GLboolean EXT_shader_framebuffer_fetch_non_coherent;
GLboolean MESA_shader_integer_functions;
GLboolean MESA_ycbcr_texture;
GLboolean NV_alpha_to_coverage_dither_control;
GLboolean NV_compute_shader_derivatives;
GLboolean NV_conditional_render;
GLboolean NV_copy_image;
GLboolean NV_fill_rectangle;
GLboolean NV_fog_distance;
GLboolean NV_point_sprite;
@ -4397,6 +4418,8 @@ struct gl_extensions
GLboolean NV_conservative_raster_dilate;
GLboolean NV_conservative_raster_pre_snap_triangles;
GLboolean NV_conservative_raster_pre_snap;
GLboolean NV_viewport_array2;
GLboolean NV_viewport_swizzle;
GLboolean NVX_gpu_memory_info;
GLboolean TDFX_texture_compression_FXT1;
GLboolean OES_EGL_image;
@ -4421,12 +4444,6 @@ struct gl_extensions
* while meta is in progress.
*/
GLubyte Version;
/**
* Force-enabled, yet unrecognized, extensions.
* See _mesa_one_time_init_extension_overrides()
*/
#define MAX_UNRECOGNIZED_EXTENSIONS 16
const char *unrecognized_extensions[MAX_UNRECOGNIZED_EXTENSIONS];
};
@ -4470,7 +4487,7 @@ struct gl_matrix_stack
#define _NEW_TEXTURE_MATRIX (1u << 2) /**< gl_context::TextureMatrix */
#define _NEW_COLOR (1u << 3) /**< gl_context::Color */
#define _NEW_DEPTH (1u << 4) /**< gl_context::Depth */
#define _NEW_EVAL (1u << 5) /**< gl_context::Eval, EvalMap */
/* gap */
#define _NEW_FOG (1u << 6) /**< gl_context::Fog */
#define _NEW_HINT (1u << 7) /**< gl_context::Hint */
#define _NEW_LIGHT (1u << 8) /**< gl_context::Light */
@ -4557,7 +4574,7 @@ struct gl_dlist_state
GLvertexformat ListVtxfmt;
GLubyte ActiveAttribSize[VERT_ATTRIB_MAX];
GLfloat CurrentAttrib[VERT_ATTRIB_MAX][8];
uint32_t CurrentAttrib[VERT_ATTRIB_MAX][8];
GLubyte ActiveMaterialSize[MAT_ATTRIB_MAX];
GLfloat CurrentMaterial[MAT_ATTRIB_MAX][4];
@ -4877,7 +4894,7 @@ struct gl_context
/*@}*/
struct glthread_state *GLThread;
struct glthread_state GLThread;
struct gl_config Visual;
struct gl_framebuffer *DrawBuffer; /**< buffer for writing */
@ -5107,6 +5124,7 @@ struct gl_context
struct gl_driver_flags DriverFlags;
GLboolean ViewportInitialized; /**< has viewport size been initialized? */
GLboolean _AllowDrawOutOfOrder;
GLbitfield varying_vp_inputs; /**< mask of VERT_BIT_* flags */
@ -5141,6 +5159,8 @@ struct gl_context
GLfloat ConservativeRasterDilate;
GLenum16 ConservativeRasterMode;
GLboolean IntelBlackholeRender; /**< GL_INTEL_blackhole_render */
/** Does glVertexAttrib(0) alias glVertex()? */
bool _AttribZeroAliasesVertex;

Просмотреть файл

@ -31,6 +31,8 @@
#ifndef PROG_PARAMETER_H
#define PROG_PARAMETER_H
#include <stdbool.h>
#include <stdint.h>
#include "prog_statevars.h"
#include <string.h>

Просмотреть файл

@ -63,11 +63,11 @@ extern void
_mesa_set_program_error(struct gl_context *ctx, GLint pos, const char *string);
extern struct gl_program *
_mesa_init_gl_program(struct gl_program *prog, GLenum target, GLuint id,
bool is_arb_asm);
_mesa_init_gl_program(struct gl_program *prog, gl_shader_stage stage,
GLuint id, bool is_arb_asm);
extern struct gl_program *
_mesa_new_program(struct gl_context *ctx, GLenum target, GLuint id,
_mesa_new_program(struct gl_context *ctx, gl_shader_stage stage, GLuint id,
bool is_arb_asm);
extern void

Просмотреть файл

@ -21,10 +21,10 @@
* DEALINGS IN THE SOFTWARE.
*/
#include "main/imports.h"
#include "main/errors.h"
#include "symbol_table.h"
#include "../../util/hash_table.h"
#include "util/hash_table.h"
#include "util/u_string.h"
struct symbol {
@ -63,7 +63,7 @@ struct symbol {
struct scope_level {
/** Link to next (inner) scope level. */
struct scope_level *next;
/** Linked list of symbols with the same scope. */
struct symbol *symbols;
};

Просмотреть файл

@ -42,13 +42,13 @@ extern "C" {
struct gl_context;
GLboolean
_vbo_CreateContext(struct gl_context *ctx);
_vbo_CreateContext(struct gl_context *ctx, bool use_buffer_objects);
void
_vbo_DestroyContext(struct gl_context *ctx);
void
vbo_exec_invalidate_state(struct gl_context *ctx);
vbo_exec_update_eval_maps(struct gl_context *ctx);
void
_vbo_install_exec_vtxfmt(struct gl_context *ctx);
@ -87,23 +87,25 @@ vbo_save_EndCallList(struct gl_context *ctx);
void
vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj);
void
vbo_get_minmax_index_mapped(unsigned count, unsigned index_size,
unsigned restartIndex, bool restart,
const void *indices,
unsigned *min_index, unsigned *max_index);
void
vbo_get_minmax_indices(struct gl_context *ctx, const struct _mesa_prim *prim,
const struct _mesa_index_buffer *ib,
GLuint *min_index, GLuint *max_index, GLuint nr_prims);
void
vbo_use_buffer_objects(struct gl_context *ctx);
void
vbo_always_unmap_buffers(struct gl_context *ctx);
void
vbo_sw_primitive_restart(struct gl_context *ctx,
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
struct gl_buffer_object *indirect);
GLuint num_instances, GLuint base_instance,
struct gl_buffer_object *indirect,
GLsizeiptr indirect_offset);
const struct gl_array_attributes*

Просмотреть файл

@ -23,8 +23,8 @@
#include <string.h>
#include "main/macros.h"
#include "blob.h"
#include "u_math.h"
#ifdef HAVE_VALGRIND
#include <valgrind.h>
@ -85,7 +85,7 @@ grow_to_fit(struct blob *blob, size_t additional)
static bool
align_blob(struct blob *blob, size_t alignment)
{
const size_t new_size = ALIGN(blob->size, alignment);
const size_t new_size = align64(blob->size, alignment);
if (blob->size < new_size) {
if (!grow_to_fit(blob, new_size - blob->size))
@ -102,7 +102,7 @@ align_blob(struct blob *blob, size_t alignment)
static void
align_blob_reader(struct blob_reader *blob, size_t alignment)
{
blob->current = blob->data + ALIGN(blob->current - blob->data, alignment);
blob->current = blob->data + align64(blob->current - blob->data, alignment);
}
void
@ -162,7 +162,7 @@ blob_write_bytes(struct blob *blob, const void *bytes, size_t to_write)
VG(VALGRIND_CHECK_MEM_IS_DEFINED(bytes, to_write));
if (blob->data)
if (blob->data && to_write > 0)
memcpy(blob->data + blob->size, bytes, to_write);
blob->size += to_write;
@ -212,7 +212,16 @@ BLOB_WRITE_TYPE(blob_write_uint64, uint64_t)
BLOB_WRITE_TYPE(blob_write_intptr, intptr_t)
#define ASSERT_ALIGNED(_offset, _align) \
assert(ALIGN((_offset), (_align)) == (_offset))
assert(align64((_offset), (_align)) == (_offset))
bool
blob_overwrite_uint8 (struct blob *blob,
size_t offset,
uint8_t value)
{
ASSERT_ALIGNED(offset, sizeof(value));
return blob_overwrite_bytes(blob, offset, &value, sizeof(value));
}
bool
blob_overwrite_uint32 (struct blob *blob,
@ -286,7 +295,7 @@ blob_copy_bytes(struct blob_reader *blob, void *dest, size_t size)
const void *bytes;
bytes = blob_read_bytes(blob, size);
if (bytes == NULL)
if (bytes == NULL || size == 0)
return;
memcpy(dest, bytes, size);

Просмотреть файл

@ -183,6 +183,21 @@ blob_overwrite_bytes(struct blob *blob,
bool
blob_write_uint8(struct blob *blob, uint8_t value);
/**
* Overwrite a uint8_t previously written to the blob.
*
* Writes a uint8_t value to an existing portion of the blob at an offset of
* \offset. This data range must have previously been written to the blob by
* one of the blob_write_* calls.
*
* \return True unless the requested position or position+to_write lie outside
* the current blob's size.
*/
bool
blob_overwrite_uint8(struct blob *blob,
size_t offset,
uint8_t value);
/**
* Add a uint16_t to a blob.
*

Просмотреть файл

@ -23,7 +23,6 @@
#include <errno.h>
#include <string.h>
#include "main/macros.h"
#include "debug.h"
#include "u_string.h"

Просмотреть файл

@ -51,8 +51,7 @@
#include "util/u_queue.h"
#include "util/mesa-sha1.h"
#include "util/ralloc.h"
#include "main/compiler.h"
#include "main/errors.h"
#include "util/compiler.h"
#include "disk_cache.h"

1665
third_party/rust/glslopt/glsl-optimizer/src/util/format/u_format.h поставляемый Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -25,6 +25,7 @@
#define UTIL_FUTEX_H
#if defined(HAVE_LINUX_FUTEX_H)
#define UTIL_FUTEX_SUPPORTED 1
#include <limits.h>
#include <stdint.h>
@ -52,6 +53,7 @@ static inline int futex_wait(uint32_t *addr, int32_t value, const struct timespe
}
#elif defined(__FreeBSD__)
#define UTIL_FUTEX_SUPPORTED 1
#include <assert.h>
#include <errno.h>
@ -86,6 +88,7 @@ static inline int futex_wait(uint32_t *addr, int32_t value, struct timespec *tim
}
#elif defined(__OpenBSD__)
#define UTIL_FUTEX_SUPPORTED 1
#include <sys/time.h>
#include <sys/futex.h>
@ -103,6 +106,8 @@ static inline int futex_wait(uint32_t *addr, int32_t value, const struct timespe
return futex(addr, FUTEX_WAIT, value, &tsrel, NULL);
}
#else
#define UTIL_FUTEX_SUPPORTED 0
#endif
#endif /* UTIL_FUTEX_H */

Просмотреть файл

@ -33,8 +33,8 @@
extern "C" {
#endif
#define FP16_ONE 0x3C00
#define FP16_ZERO 0
#define FP16_ONE ((uint16_t) 0x3c00)
#define FP16_ZERO ((uint16_t) 0)
uint16_t _mesa_float_to_half(float val);
float _mesa_half_to_float(uint16_t val);
@ -62,6 +62,22 @@ _mesa_half_is_negative(uint16_t h)
}
#ifdef __cplusplus
/* Helper class for disambiguating fp16 from uint16_t in C++ overloads */
struct float16_t {
uint16_t bits;
float16_t(float f) : bits(_mesa_float_to_half(f)) {}
float16_t(double d) : bits(_mesa_float_to_half(d)) {}
float16_t(uint16_t bits) : bits(bits) {}
static float16_t one() { return float16_t(FP16_ONE); }
static float16_t zero() { return float16_t(FP16_ZERO); }
};
#endif
#ifdef __cplusplus
} /* extern C */
#endif

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше