зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1637148
- Update glslopt to fix intermittent build error. r=lsalzman
Update webrender's dependency on glslopt to 0.1.4. This includes an updated version of Mesa, which has fixed a race condition that was causing intermittent build failures. Differential Revision: https://phabricator.services.mozilla.com/D85254
This commit is contained in:
Родитель
24a7f188b8
Коммит
6423c052ae
|
@ -2051,9 +2051,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "glslopt"
|
||||
version = "0.1.2"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f22b383fcf6f85c4a268af39a0758ec40970e5f9f8fe9809e4415d48409b8379"
|
||||
checksum = "065c2e941ad25c18428724fd2ad0bc3967cb96242e8db92f3794eedb15c02e44"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
|
|
@ -685,7 +685,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "glslopt"
|
||||
version = "0.1.2"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -1809,7 +1809,7 @@ dependencies = [
|
|||
"freetype 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"gleam 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"glslopt 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"glslopt 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"image 0.23.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -2131,7 +2131,7 @@ dependencies = [
|
|||
"checksum gleam 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3fdef5b9df6d3a261b80a5ac55e13bf93945725df2463c1b0a2e5a527dce0d37"
|
||||
"checksum gleam 0.6.19 (registry+https://github.com/rust-lang/crates.io-index)" = "cae10d7c99d0e77b4766e850a60898a17c1abaf01075531f1066f03dc7dc5fc5"
|
||||
"checksum glsl 4.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "766443890761b3c4edcce86cafaac97971b200662fbdd0446eb7c6b99b4401ea"
|
||||
"checksum glslopt 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f22b383fcf6f85c4a268af39a0758ec40970e5f9f8fe9809e4415d48409b8379"
|
||||
"checksum glslopt 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "065c2e941ad25c18428724fd2ad0bc3967cb96242e8db92f3794eedb15c02e44"
|
||||
"checksum glutin 0.21.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5371b35b309dace06be1b81b5f6adb1c9de578b7dbe1e74bf7e4ef762cf6febd"
|
||||
"checksum glutin_egl_sys 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "772edef3b28b8ad41e4ea202748e65eefe8e5ffd1f4535f1219793dbb20b3d4c"
|
||||
"checksum glutin_emscripten_sys 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "80de4146df76e8a6c32b03007bc764ff3249dcaeb4f675d68a06caf1bac363f1"
|
||||
|
|
|
@ -22,7 +22,7 @@ leak_checks = []
|
|||
|
||||
[build-dependencies]
|
||||
build-parallel = "0.1.1"
|
||||
glslopt = "0.1.2"
|
||||
glslopt = "0.1.4"
|
||||
webrender_build = { version = "0.0.1", path = "../webrender_build" }
|
||||
|
||||
[dependencies]
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -13,7 +13,7 @@
|
|||
[package]
|
||||
edition = "2018"
|
||||
name = "glslopt"
|
||||
version = "0.1.2"
|
||||
version = "0.1.4"
|
||||
authors = ["Jamie Nicol <jnicol@mozilla.com>"]
|
||||
description = "Optimizes GLSL shader code"
|
||||
keywords = ["opengl", "gl", "gles", "glsl", "shader"]
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
# glslopt-rs
|
||||
|
||||
Rust bindings to [glsl-optimizer](https://github.com/jamienicol/glsl-optimizer).
|
||||
|
||||
## Updating glsl-optimizer
|
||||
|
||||
To update the version of glsl-optimizer, update the git submodule:
|
||||
|
||||
```sh
|
||||
git submodule update --remote glsl-optimizer
|
||||
```
|
||||
|
||||
Then, if required, regenerate the bindings:
|
||||
|
||||
```sh
|
||||
cargo install bindgen
|
||||
bindgen wrapper.hpp -o src/bindings.rs
|
||||
```
|
||||
|
||||
Then commit the changes.
|
|
@ -7,6 +7,7 @@ use std::env;
|
|||
fn configure(build: &mut cc::Build) -> &mut cc::Build {
|
||||
build.define("__STDC_FORMAT_MACROS", None);
|
||||
if cfg!(target_os = "linux") {
|
||||
build.define("_GNU_SOURCE", None);
|
||||
build.define("HAVE_ENDIAN_H", None);
|
||||
}
|
||||
if cfg!(target_os = "windows") {
|
||||
|
@ -48,12 +49,14 @@ fn main() {
|
|||
.file("glsl-optimizer/src/util/half_float.c")
|
||||
.file("glsl-optimizer/src/util/hash_table.c")
|
||||
.file("glsl-optimizer/src/util/mesa-sha1.c")
|
||||
.file("glsl-optimizer/src/util/os_misc.c")
|
||||
.file("glsl-optimizer/src/util/ralloc.c")
|
||||
.file("glsl-optimizer/src/util/set.c")
|
||||
.file("glsl-optimizer/src/util/sha1/sha1.c")
|
||||
.file("glsl-optimizer/src/util/softfloat.c")
|
||||
.file("glsl-optimizer/src/util/string_buffer.c")
|
||||
.file("glsl-optimizer/src/util/strtod.c")
|
||||
.file("glsl-optimizer/src/util/u_debug.c")
|
||||
.compile("glcpp");
|
||||
|
||||
configure(&mut cc::Build::new())
|
||||
|
@ -70,7 +73,6 @@ fn main() {
|
|||
.file("glsl-optimizer/src/mesa/program/dummy_errors.c")
|
||||
.file("glsl-optimizer/src/mesa/program/symbol_table.c")
|
||||
.file("glsl-optimizer/src/mesa/main/extensions_table.c")
|
||||
.file("glsl-optimizer/src/mesa/main/imports.c")
|
||||
.file("glsl-optimizer/src/compiler/shader_enums.c")
|
||||
.compile("mesa");
|
||||
|
||||
|
@ -137,6 +139,7 @@ fn main() {
|
|||
.file("glsl-optimizer/src/compiler/glsl/loop_unroll.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/lower_blend_equation_advanced.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/lower_buffer_access.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/lower_builtins.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/lower_const_arrays_to_uniforms.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/lower_cs_derived.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/lower_discard_flow.cpp")
|
||||
|
@ -148,11 +151,11 @@ fn main() {
|
|||
.file("glsl-optimizer/src/compiler/glsl/lower_jumps.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/lower_mat_op_to_vec.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/lower_named_interface_blocks.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/lower_noise.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/lower_offset_array.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/lower_output_reads.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/lower_packed_varyings.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/lower_packing_builtins.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/lower_precision.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/lower_shared_reference.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/lower_subroutine.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/lower_tess_level.cpp")
|
||||
|
@ -165,6 +168,7 @@ fn main() {
|
|||
.file("glsl-optimizer/src/compiler/glsl/lower_vector_insert.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/lower_vector.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/lower_vertex_id.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/lower_xfb_varying.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/opt_algebraic.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/opt_array_splitting.cpp")
|
||||
.file("glsl-optimizer/src/compiler/glsl/opt_conditional_discard.cpp")
|
||||
|
|
|
@ -4,6 +4,8 @@ project(glsl_optimizer VERSION 0.1
|
|||
DESCRIPTION "GLSL Optimizer"
|
||||
LANGUAGES C CXX)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
include_directories(include)
|
||||
include_directories(src/mesa)
|
||||
include_directories(src/mapi)
|
||||
|
@ -16,6 +18,7 @@ include_directories(src/util)
|
|||
|
||||
add_definitions(-D__STDC_FORMAT_MACROS)
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
|
||||
add_definitions(-D_GNU_SOURCE)
|
||||
add_definitions(-DHAVE_ENDIAN_H)
|
||||
endif()
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
|
||||
|
@ -25,17 +28,6 @@ else()
|
|||
add_definitions(-DHAVE_TIMESPEC_GET)
|
||||
endif()
|
||||
|
||||
option (DEBUG "Enable debugging" FALSE)
|
||||
|
||||
if(${DEBUG} MATCHES "on")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Os -DNDEBUG")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Os -DNDEBUG")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -s")
|
||||
endif()
|
||||
|
||||
add_library(glcpp STATIC "src/compiler/glsl/glcpp/glcpp-lex.c"
|
||||
"src/compiler/glsl/glcpp/glcpp-parse.c"
|
||||
"src/compiler/glsl/glcpp/glcpp.h"
|
||||
|
@ -45,17 +37,18 @@ add_library(glcpp STATIC "src/compiler/glsl/glcpp/glcpp-lex.c"
|
|||
"src/util/half_float.c"
|
||||
"src/util/hash_table.c"
|
||||
"src/util/mesa-sha1.c"
|
||||
"src/util/os_misc.c"
|
||||
"src/util/ralloc.c"
|
||||
"src/util/set.c"
|
||||
"src/util/sha1/sha1.c"
|
||||
"src/util/softfloat.c"
|
||||
"src/util/string_buffer.c"
|
||||
"src/util/strtod.c")
|
||||
"src/util/strtod.c"
|
||||
"src/util/u_debug.c")
|
||||
|
||||
add_library(mesa STATIC "src/mesa/program/dummy_errors.c"
|
||||
"src/mesa/program/symbol_table.c"
|
||||
"src/mesa/main/extensions_table.c"
|
||||
"src/mesa/main/imports.c")
|
||||
"src/mesa/main/extensions_table.c")
|
||||
|
||||
add_library(glsl_optimizer STATIC "src/compiler/glsl_types.cpp"
|
||||
"src/compiler/glsl/ast_array_index.cpp"
|
||||
|
@ -111,6 +104,7 @@ add_library(glsl_optimizer STATIC "src/compiler/glsl_types.cpp"
|
|||
"src/compiler/glsl/loop_unroll.cpp"
|
||||
"src/compiler/glsl/lower_blend_equation_advanced.cpp"
|
||||
"src/compiler/glsl/lower_buffer_access.cpp"
|
||||
"src/compiler/glsl/lower_builtins.cpp"
|
||||
"src/compiler/glsl/lower_const_arrays_to_uniforms.cpp"
|
||||
"src/compiler/glsl/lower_cs_derived.cpp"
|
||||
"src/compiler/glsl/lower_discard_flow.cpp"
|
||||
|
@ -122,11 +116,11 @@ add_library(glsl_optimizer STATIC "src/compiler/glsl_types.cpp"
|
|||
"src/compiler/glsl/lower_jumps.cpp"
|
||||
"src/compiler/glsl/lower_mat_op_to_vec.cpp"
|
||||
"src/compiler/glsl/lower_named_interface_blocks.cpp"
|
||||
"src/compiler/glsl/lower_noise.cpp"
|
||||
"src/compiler/glsl/lower_offset_array.cpp"
|
||||
"src/compiler/glsl/lower_output_reads.cpp"
|
||||
"src/compiler/glsl/lower_packed_varyings.cpp"
|
||||
"src/compiler/glsl/lower_packing_builtins.cpp"
|
||||
"src/compiler/glsl/lower_precision.cpp"
|
||||
"src/compiler/glsl/lower_shared_reference.cpp"
|
||||
"src/compiler/glsl/lower_subroutine.cpp"
|
||||
"src/compiler/glsl/lower_tess_level.cpp"
|
||||
|
@ -139,6 +133,7 @@ add_library(glsl_optimizer STATIC "src/compiler/glsl_types.cpp"
|
|||
"src/compiler/glsl/lower_vector_insert.cpp"
|
||||
"src/compiler/glsl/lower_vector.cpp"
|
||||
"src/compiler/glsl/lower_vertex_id.cpp"
|
||||
"src/compiler/glsl/lower_xfb_varying.cpp"
|
||||
"src/compiler/glsl/opt_algebraic.cpp"
|
||||
"src/compiler/glsl/opt_array_splitting.cpp"
|
||||
"src/compiler/glsl/opt_conditional_discard.cpp"
|
||||
|
|
|
@ -220,7 +220,7 @@ Q: What is the file naming convention in this directory?
|
|||
Initially, there really wasn't one. We have since adopted one:
|
||||
|
||||
- Files that implement code lowering passes should be named lower_*
|
||||
(e.g., lower_noise.cpp).
|
||||
(e.g., lower_builtins.cpp).
|
||||
- Files that implement optimization passes should be named opt_*.
|
||||
- Files that implement a class that is used throught the code should
|
||||
take the name of that class (e.g., ir_hierarchical_visitor.cpp).
|
||||
|
|
|
@ -663,6 +663,12 @@ struct ast_type_qualifier {
|
|||
/** \{ */
|
||||
unsigned derivative_group:1;
|
||||
/** \} */
|
||||
|
||||
/**
|
||||
* Flag set if GL_NV_viewport_array2 viewport_relative layout
|
||||
* qualifier is used.
|
||||
*/
|
||||
unsigned viewport_relative:1;
|
||||
}
|
||||
/** \brief Set of flags, accessed by name. */
|
||||
q;
|
||||
|
@ -773,7 +779,7 @@ struct ast_type_qualifier {
|
|||
* \note
|
||||
* This field is only valid if \c explicit_image_format is set.
|
||||
*/
|
||||
GLenum image_format;
|
||||
enum pipe_format image_format;
|
||||
|
||||
/**
|
||||
* Arrangement of invocations used to calculate derivatives in a compute
|
||||
|
|
|
@ -49,6 +49,13 @@ process_parameters(exec_list *instructions, exec_list *actual_parameters,
|
|||
ast->set_is_lhs(true);
|
||||
ir_rvalue *result = ast->hir(instructions, state);
|
||||
|
||||
/* Error happened processing function parameter */
|
||||
if (!result) {
|
||||
actual_parameters->push_tail(ir_rvalue::error_value(mem_ctx));
|
||||
count++;
|
||||
continue;
|
||||
}
|
||||
|
||||
ir_constant *const constant =
|
||||
result->constant_expression_value(mem_ctx);
|
||||
|
||||
|
@ -612,11 +619,6 @@ generate_call(exec_list *instructions, ir_function_signature *sig,
|
|||
ir_call *call = new(ctx) ir_call(sig, deref,
|
||||
actual_parameters, sub_var, array_idx);
|
||||
instructions->push_tail(call);
|
||||
if (sig->is_builtin()) {
|
||||
/* inline immediately */
|
||||
call->generate_inline(call);
|
||||
call->remove();
|
||||
}
|
||||
|
||||
/* Also emit any necessary out-parameter conversions. */
|
||||
instructions->append_list(&post_call_conversions);
|
||||
|
|
|
@ -1702,8 +1702,10 @@ ast_expression::do_hir(exec_list *instructions,
|
|||
|
||||
/* Break out if operand types were not parsed successfully. */
|
||||
if ((op[0]->type == glsl_type::error_type ||
|
||||
op[1]->type == glsl_type::error_type))
|
||||
op[1]->type == glsl_type::error_type)) {
|
||||
error_emitted = true;
|
||||
break;
|
||||
}
|
||||
|
||||
type = arithmetic_result_type(op[0], op[1],
|
||||
(this->oper == ast_mul_assign),
|
||||
|
@ -2144,7 +2146,7 @@ ast_expression::do_hir(exec_list *instructions,
|
|||
}
|
||||
}
|
||||
type = NULL; /* use result->type, not type. */
|
||||
assert(result != NULL || !needs_rvalue);
|
||||
assert(error_emitted || (result != NULL || !needs_rvalue));
|
||||
|
||||
if (result && result->type->is_error() && !error_emitted)
|
||||
_mesa_glsl_error(& loc, state, "type mismatch");
|
||||
|
@ -3510,7 +3512,7 @@ apply_image_qualifier_to_variable(const struct ast_type_qualifier *qual,
|
|||
"`writeonly' must have a format layout qualifier");
|
||||
}
|
||||
}
|
||||
var->data.image_format = GL_NONE;
|
||||
var->data.image_format = PIPE_FORMAT_NONE;
|
||||
}
|
||||
|
||||
/* From page 70 of the GLSL ES 3.1 specification:
|
||||
|
@ -3520,9 +3522,9 @@ apply_image_qualifier_to_variable(const struct ast_type_qualifier *qual,
|
|||
* readonly or the memory qualifier writeonly."
|
||||
*/
|
||||
if (state->es_shader &&
|
||||
var->data.image_format != GL_R32F &&
|
||||
var->data.image_format != GL_R32I &&
|
||||
var->data.image_format != GL_R32UI &&
|
||||
var->data.image_format != PIPE_FORMAT_R32_FLOAT &&
|
||||
var->data.image_format != PIPE_FORMAT_R32_SINT &&
|
||||
var->data.image_format != PIPE_FORMAT_R32_UINT &&
|
||||
!var->data.memory_read_only &&
|
||||
!var->data.memory_write_only) {
|
||||
_mesa_glsl_error(loc, state, "image variables of format other than r32f, "
|
||||
|
@ -3559,6 +3561,16 @@ is_conflicting_fragcoord_redeclaration(struct _mesa_glsl_parse_state *state,
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
is_conflicting_layer_redeclaration(struct _mesa_glsl_parse_state *state,
|
||||
const struct ast_type_qualifier *qual)
|
||||
{
|
||||
if (state->redeclares_gl_layer) {
|
||||
return state->layer_viewport_relative != qual->flags.q.viewport_relative;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void
|
||||
validate_array_dimensions(const glsl_type *t,
|
||||
struct _mesa_glsl_parse_state *state,
|
||||
|
@ -3948,6 +3960,21 @@ apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
|
|||
"sample_interlock_ordered and sample_interlock_unordered, "
|
||||
"only valid in fragment shader input layout declaration.");
|
||||
}
|
||||
|
||||
if (var->name != NULL && strcmp(var->name, "gl_Layer") == 0) {
|
||||
if (is_conflicting_layer_redeclaration(state, qual)) {
|
||||
_mesa_glsl_error(loc, state, "gl_Layer redeclaration with "
|
||||
"different viewport_relative setting than earlier");
|
||||
}
|
||||
state->redeclares_gl_layer = 1;
|
||||
if (qual->flags.q.viewport_relative) {
|
||||
state->layer_viewport_relative = 1;
|
||||
}
|
||||
} else if (qual->flags.q.viewport_relative) {
|
||||
_mesa_glsl_error(loc, state,
|
||||
"viewport_relative qualifier "
|
||||
"can only be applied to gl_Layer.");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -4389,6 +4416,11 @@ get_variable_being_redeclared(ir_variable **var_ptr, YYLTYPE loc,
|
|||
earlier->data.precision = var->data.precision;
|
||||
earlier->data.memory_coherent = var->data.memory_coherent;
|
||||
|
||||
} else if (state->NV_viewport_array2_enable &&
|
||||
strcmp(var->name, "gl_Layer") == 0 &&
|
||||
earlier->data.how_declared == ir_var_declared_implicitly) {
|
||||
/* No need to do anything, just allow it. Qualifier is stored in state */
|
||||
|
||||
} else if ((earlier->data.how_declared == ir_var_declared_implicitly &&
|
||||
state->allow_builtin_variable_redeclaration) ||
|
||||
allow_all_redeclarations) {
|
||||
|
@ -4960,12 +4992,50 @@ ast_declarator_list::hir(exec_list *instructions,
|
|||
* size4x32 rgba32f rgba32i rgba32ui"
|
||||
*/
|
||||
if (strncmp(this->type->specifier->type_name, "image", strlen("image")) == 0) {
|
||||
this->type->qualifier.image_format = GL_R8 +
|
||||
this->type->qualifier.image_format - GL_R8I;
|
||||
switch (this->type->qualifier.image_format) {
|
||||
case PIPE_FORMAT_R8_SINT:
|
||||
/* No valid qualifier in this case, driver will need to look at
|
||||
* the underlying image's format (just like no qualifier being
|
||||
* present).
|
||||
*/
|
||||
this->type->qualifier.image_format = PIPE_FORMAT_NONE;
|
||||
break;
|
||||
case PIPE_FORMAT_R16_SINT:
|
||||
this->type->qualifier.image_format = PIPE_FORMAT_R16_FLOAT;
|
||||
break;
|
||||
case PIPE_FORMAT_R32_SINT:
|
||||
this->type->qualifier.image_format = PIPE_FORMAT_R32_FLOAT;
|
||||
break;
|
||||
case PIPE_FORMAT_R32G32_SINT:
|
||||
this->type->qualifier.image_format = PIPE_FORMAT_R32G32_FLOAT;
|
||||
break;
|
||||
case PIPE_FORMAT_R32G32B32A32_SINT:
|
||||
this->type->qualifier.image_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unknown image format");
|
||||
}
|
||||
this->type->qualifier.image_base_type = GLSL_TYPE_FLOAT;
|
||||
} else if (strncmp(this->type->specifier->type_name, "uimage", strlen("uimage")) == 0) {
|
||||
this->type->qualifier.image_format = GL_R8UI +
|
||||
this->type->qualifier.image_format - GL_R8I;
|
||||
switch (this->type->qualifier.image_format) {
|
||||
case PIPE_FORMAT_R8_SINT:
|
||||
this->type->qualifier.image_format = PIPE_FORMAT_R8_UINT;
|
||||
break;
|
||||
case PIPE_FORMAT_R16_SINT:
|
||||
this->type->qualifier.image_format = PIPE_FORMAT_R16_UINT;
|
||||
break;
|
||||
case PIPE_FORMAT_R32_SINT:
|
||||
this->type->qualifier.image_format = PIPE_FORMAT_R32_UINT;
|
||||
break;
|
||||
case PIPE_FORMAT_R32G32_SINT:
|
||||
this->type->qualifier.image_format = PIPE_FORMAT_R32G32_UINT;
|
||||
break;
|
||||
case PIPE_FORMAT_R32G32B32A32_SINT:
|
||||
this->type->qualifier.image_format = PIPE_FORMAT_R32G32B32A32_UINT;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unknown image format");
|
||||
}
|
||||
this->type->qualifier.image_base_type = GLSL_TYPE_UINT;
|
||||
} else if (strncmp(this->type->specifier->type_name, "iimage", strlen("iimage")) == 0) {
|
||||
this->type->qualifier.image_base_type = GLSL_TYPE_INT;
|
||||
|
@ -7649,7 +7719,7 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
|
|||
"qualifier");
|
||||
}
|
||||
|
||||
fields[i].image_format = GL_NONE;
|
||||
fields[i].image_format = PIPE_FORMAT_NONE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -8213,6 +8283,21 @@ ast_interface_block::hir(exec_list *instructions,
|
|||
}
|
||||
|
||||
|
||||
ir_typedecl_statement* stmt = new(state) ir_typedecl_statement(block_type);
|
||||
/* Push the interface declarations to the top.
|
||||
* However, do not insert declarations before default precision
|
||||
* statements or other declarations
|
||||
*/
|
||||
ir_instruction* before_node = (ir_instruction*)instructions->get_head();
|
||||
while (before_node &&
|
||||
(before_node->ir_type == ir_type_precision ||
|
||||
before_node->ir_type == ir_type_typedecl))
|
||||
before_node = (ir_instruction*)before_node->next;
|
||||
if (before_node)
|
||||
before_node->insert_before(stmt);
|
||||
else
|
||||
instructions->push_head(stmt);
|
||||
|
||||
/* Page 39 (page 45 of the PDF) of section 4.3.7 in the GLSL ES 3.00 spec
|
||||
* says:
|
||||
*
|
||||
|
|
|
@ -6650,103 +6650,52 @@ builtin_builder::_fwidthFine(const glsl_type *type)
|
|||
ir_function_signature *
|
||||
builtin_builder::_noise1(const glsl_type *type)
|
||||
{
|
||||
return unop(v110, ir_unop_noise, glsl_type::float_type, type);
|
||||
/* From the GLSL 4.60 specification:
|
||||
*
|
||||
* "The noise functions noise1, noise2, noise3, and noise4 have been
|
||||
* deprecated starting with version 4.4 of GLSL. When not generating
|
||||
* SPIR-V they are defined to return the value 0.0 or a vector whose
|
||||
* components are all 0.0. When generating SPIR-V the noise functions
|
||||
* are not declared and may not be used."
|
||||
*
|
||||
* In earlier versions of the GLSL specification attempt to define some
|
||||
* sort of statistical noise function. However, the function's
|
||||
* characteristics have always been such that always returning 0 is
|
||||
* valid and Mesa has always returned 0 for noise on most drivers.
|
||||
*/
|
||||
ir_variable *p = in_var(type, "p");
|
||||
MAKE_SIG(glsl_type::float_type, v110, 1, p);
|
||||
body.emit(ret(imm(glsl_type::float_type, ir_constant_data())));
|
||||
return sig;
|
||||
}
|
||||
|
||||
ir_function_signature *
|
||||
builtin_builder::_noise2(const glsl_type *type)
|
||||
{
|
||||
/* See builtin_builder::_noise1 */
|
||||
ir_variable *p = in_var(type, "p");
|
||||
MAKE_SIG(glsl_type::vec2_type, v110, 1, p);
|
||||
|
||||
ir_constant_data b_offset;
|
||||
b_offset.f[0] = 601.0f;
|
||||
b_offset.f[1] = 313.0f;
|
||||
b_offset.f[2] = 29.0f;
|
||||
b_offset.f[3] = 277.0f;
|
||||
|
||||
ir_variable *a = body.make_temp(glsl_type::float_type, "a");
|
||||
ir_variable *b = body.make_temp(glsl_type::float_type, "b");
|
||||
ir_variable *t = body.make_temp(glsl_type::vec2_type, "t");
|
||||
body.emit(assign(a, expr(ir_unop_noise, p)));
|
||||
body.emit(assign(b, expr(ir_unop_noise, add(p, imm(type, b_offset)))));
|
||||
body.emit(assign(t, a, WRITEMASK_X));
|
||||
body.emit(assign(t, b, WRITEMASK_Y));
|
||||
body.emit(ret(t));
|
||||
|
||||
body.emit(ret(imm(glsl_type::vec2_type, ir_constant_data())));
|
||||
return sig;
|
||||
}
|
||||
|
||||
ir_function_signature *
|
||||
builtin_builder::_noise3(const glsl_type *type)
|
||||
{
|
||||
/* See builtin_builder::_noise1 */
|
||||
ir_variable *p = in_var(type, "p");
|
||||
MAKE_SIG(glsl_type::vec3_type, v110, 1, p);
|
||||
|
||||
ir_constant_data b_offset;
|
||||
b_offset.f[0] = 601.0f;
|
||||
b_offset.f[1] = 313.0f;
|
||||
b_offset.f[2] = 29.0f;
|
||||
b_offset.f[3] = 277.0f;
|
||||
|
||||
ir_constant_data c_offset;
|
||||
c_offset.f[0] = 1559.0f;
|
||||
c_offset.f[1] = 113.0f;
|
||||
c_offset.f[2] = 1861.0f;
|
||||
c_offset.f[3] = 797.0f;
|
||||
|
||||
ir_variable *a = body.make_temp(glsl_type::float_type, "a");
|
||||
ir_variable *b = body.make_temp(glsl_type::float_type, "b");
|
||||
ir_variable *c = body.make_temp(glsl_type::float_type, "c");
|
||||
ir_variable *t = body.make_temp(glsl_type::vec3_type, "t");
|
||||
body.emit(assign(a, expr(ir_unop_noise, p)));
|
||||
body.emit(assign(b, expr(ir_unop_noise, add(p, imm(type, b_offset)))));
|
||||
body.emit(assign(c, expr(ir_unop_noise, add(p, imm(type, c_offset)))));
|
||||
body.emit(assign(t, a, WRITEMASK_X));
|
||||
body.emit(assign(t, b, WRITEMASK_Y));
|
||||
body.emit(assign(t, c, WRITEMASK_Z));
|
||||
body.emit(ret(t));
|
||||
|
||||
body.emit(ret(imm(glsl_type::vec3_type, ir_constant_data())));
|
||||
return sig;
|
||||
}
|
||||
|
||||
ir_function_signature *
|
||||
builtin_builder::_noise4(const glsl_type *type)
|
||||
{
|
||||
/* See builtin_builder::_noise1 */
|
||||
ir_variable *p = in_var(type, "p");
|
||||
MAKE_SIG(glsl_type::vec4_type, v110, 1, p);
|
||||
|
||||
ir_variable *_p = body.make_temp(type, "_p");
|
||||
|
||||
ir_constant_data p_offset;
|
||||
p_offset.f[0] = 1559.0f;
|
||||
p_offset.f[1] = 113.0f;
|
||||
p_offset.f[2] = 1861.0f;
|
||||
p_offset.f[3] = 797.0f;
|
||||
|
||||
body.emit(assign(_p, add(p, imm(type, p_offset))));
|
||||
|
||||
ir_constant_data offset;
|
||||
offset.f[0] = 601.0f;
|
||||
offset.f[1] = 313.0f;
|
||||
offset.f[2] = 29.0f;
|
||||
offset.f[3] = 277.0f;
|
||||
|
||||
ir_variable *a = body.make_temp(glsl_type::float_type, "a");
|
||||
ir_variable *b = body.make_temp(glsl_type::float_type, "b");
|
||||
ir_variable *c = body.make_temp(glsl_type::float_type, "c");
|
||||
ir_variable *d = body.make_temp(glsl_type::float_type, "d");
|
||||
ir_variable *t = body.make_temp(glsl_type::vec4_type, "t");
|
||||
body.emit(assign(a, expr(ir_unop_noise, p)));
|
||||
body.emit(assign(b, expr(ir_unop_noise, add(p, imm(type, offset)))));
|
||||
body.emit(assign(c, expr(ir_unop_noise, _p)));
|
||||
body.emit(assign(d, expr(ir_unop_noise, add(_p, imm(type, offset)))));
|
||||
body.emit(assign(t, a, WRITEMASK_X));
|
||||
body.emit(assign(t, b, WRITEMASK_Y));
|
||||
body.emit(assign(t, c, WRITEMASK_Z));
|
||||
body.emit(assign(t, d, WRITEMASK_W));
|
||||
body.emit(ret(t));
|
||||
|
||||
body.emit(ret(imm(glsl_type::vec4_type, ir_constant_data())));
|
||||
return sig;
|
||||
}
|
||||
|
||||
|
|
|
@ -351,7 +351,7 @@ per_vertex_accumulator::add_field(int slot, const glsl_type *type,
|
|||
this->fields[this->num_fields].memory_coherent = 0;
|
||||
this->fields[this->num_fields].memory_volatile = 0;
|
||||
this->fields[this->num_fields].memory_restrict = 0;
|
||||
this->fields[this->num_fields].image_format = 0;
|
||||
this->fields[this->num_fields].image_format = PIPE_FORMAT_NONE;
|
||||
this->fields[this->num_fields].explicit_xfb_buffer = 0;
|
||||
this->fields[this->num_fields].xfb_buffer = -1;
|
||||
this->fields[this->num_fields].xfb_stride = -1;
|
||||
|
@ -1084,8 +1084,13 @@ builtin_variable_generator::generate_vs_special_vars()
|
|||
add_system_value(SYSTEM_VALUE_BASE_INSTANCE, int_t, "gl_BaseInstance");
|
||||
add_system_value(SYSTEM_VALUE_DRAW_ID, int_t, "gl_DrawID");
|
||||
}
|
||||
if (state->EXT_draw_instanced_enable && state->is_version(0, 100))
|
||||
add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, GLSL_PRECISION_HIGH,
|
||||
"gl_InstanceIDEXT");
|
||||
|
||||
if (state->ARB_draw_instanced_enable)
|
||||
add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceIDARB");
|
||||
|
||||
if (state->ARB_draw_instanced_enable || state->is_version(140, 300) ||
|
||||
state->EXT_gpu_shader4_enable) {
|
||||
add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, GLSL_PRECISION_HIGH,
|
||||
|
@ -1097,15 +1102,32 @@ builtin_variable_generator::generate_vs_special_vars()
|
|||
add_system_value(SYSTEM_VALUE_DRAW_ID, int_t, "gl_DrawIDARB");
|
||||
}
|
||||
if (state->AMD_vertex_shader_layer_enable ||
|
||||
state->ARB_shader_viewport_layer_array_enable) {
|
||||
state->ARB_shader_viewport_layer_array_enable ||
|
||||
state->NV_viewport_array2_enable) {
|
||||
var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer");
|
||||
var->data.interpolation = INTERP_MODE_FLAT;
|
||||
}
|
||||
if (state->AMD_vertex_shader_viewport_index_enable ||
|
||||
state->ARB_shader_viewport_layer_array_enable) {
|
||||
state->ARB_shader_viewport_layer_array_enable ||
|
||||
state->NV_viewport_array2_enable) {
|
||||
var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
|
||||
var->data.interpolation = INTERP_MODE_FLAT;
|
||||
}
|
||||
if (state->NV_viewport_array2_enable) {
|
||||
/* From the NV_viewport_array2 specification:
|
||||
*
|
||||
* "The variable gl_ViewportMask[] is available as an output variable
|
||||
* in the VTG languages. The array has ceil(v/32) elements where v is
|
||||
* the maximum number of viewports supported by the implementation."
|
||||
*
|
||||
* Since no drivers expose more than 16 viewports, we can simply set the
|
||||
* array size to 1 rather than computing it and dealing with varying
|
||||
* slot complication.
|
||||
*/
|
||||
var = add_output(VARYING_SLOT_VIEWPORT_MASK, array(int_t, 1),
|
||||
"gl_ViewportMask");
|
||||
var->data.interpolation = INTERP_MODE_FLAT;
|
||||
}
|
||||
if (compatibility) {
|
||||
add_input(VERT_ATTRIB_POS, vec4_t, "gl_Vertex");
|
||||
add_input(VERT_ATTRIB_NORMAL, vec3_t, "gl_Normal");
|
||||
|
@ -1155,6 +1177,17 @@ builtin_variable_generator::generate_tcs_special_vars()
|
|||
add_output(bbox_slot, array(vec4_t, 2), GLSL_PRECISION_HIGH,
|
||||
"gl_BoundingBox")->data.patch = 1;
|
||||
}
|
||||
|
||||
/* NOTE: These are completely pointless. Writing these will never go
|
||||
* anywhere. But the specs demands it. So we add them with a slot of -1,
|
||||
* which makes the data go nowhere.
|
||||
*/
|
||||
if (state->NV_viewport_array2_enable) {
|
||||
add_output(-1, int_t, "gl_Layer");
|
||||
add_output(-1, int_t, "gl_ViewportIndex");
|
||||
add_output(-1, array(int_t, 1), "gl_ViewportMask");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -1183,12 +1216,18 @@ builtin_variable_generator::generate_tes_special_vars()
|
|||
add_system_value(SYSTEM_VALUE_TESS_LEVEL_INNER, array(float_t, 2),
|
||||
GLSL_PRECISION_HIGH, "gl_TessLevelInner");
|
||||
}
|
||||
if (state->ARB_shader_viewport_layer_array_enable) {
|
||||
if (state->ARB_shader_viewport_layer_array_enable ||
|
||||
state->NV_viewport_array2_enable) {
|
||||
var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer");
|
||||
var->data.interpolation = INTERP_MODE_FLAT;
|
||||
var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
|
||||
var->data.interpolation = INTERP_MODE_FLAT;
|
||||
}
|
||||
if (state->NV_viewport_array2_enable) {
|
||||
var = add_output(VARYING_SLOT_VIEWPORT_MASK, array(int_t, 1),
|
||||
"gl_ViewportMask");
|
||||
var->data.interpolation = INTERP_MODE_FLAT;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -1208,6 +1247,11 @@ builtin_variable_generator::generate_gs_special_vars()
|
|||
"gl_ViewportIndex");
|
||||
var->data.interpolation = INTERP_MODE_FLAT;
|
||||
}
|
||||
if (state->NV_viewport_array2_enable) {
|
||||
var = add_output(VARYING_SLOT_VIEWPORT_MASK, array(int_t, 1),
|
||||
"gl_ViewportMask");
|
||||
var->data.interpolation = INTERP_MODE_FLAT;
|
||||
}
|
||||
if (state->is_version(400, 320) || state->ARB_gpu_shader5_enable ||
|
||||
state->OES_geometry_shader_enable || state->EXT_geometry_shader_enable) {
|
||||
add_system_value(SYSTEM_VALUE_INVOCATION_ID, int_t, GLSL_PRECISION_HIGH,
|
||||
|
|
|
@ -59,6 +59,11 @@
|
|||
#define FLOAT_ROUND_UP 3
|
||||
#define FLOAT_ROUNDING_MODE FLOAT_ROUND_NEAREST_EVEN
|
||||
|
||||
/* Relax propagation of NaN. Binary operations with a NaN source will still
|
||||
* produce a NaN result, but it won't follow strict IEEE rules.
|
||||
*/
|
||||
#define RELAXED_NAN_PROPAGATION
|
||||
|
||||
/* Absolute value of a Float64 :
|
||||
* Clear the sign bit
|
||||
*/
|
||||
|
@ -88,10 +93,7 @@ uint64_t
|
|||
__fneg64(uint64_t __a)
|
||||
{
|
||||
uvec2 a = unpackUint2x32(__a);
|
||||
uint t = a.y;
|
||||
|
||||
t ^= (1u << 31);
|
||||
a.y = mix(t, a.y, __is_nan(__a));
|
||||
a.y ^= (1u << 31);
|
||||
return packUint2x32(a);
|
||||
}
|
||||
|
||||
|
@ -165,17 +167,17 @@ __fne64(uint64_t a, uint64_t b)
|
|||
uint
|
||||
__extractFloat64Sign(uint64_t a)
|
||||
{
|
||||
return unpackUint2x32(a).y >> 31;
|
||||
return unpackUint2x32(a).y & 0x80000000u;
|
||||
}
|
||||
|
||||
/* Returns true if the 64-bit value formed by concatenating `a0' and `a1' is less
|
||||
* than the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,
|
||||
* returns false.
|
||||
/* Returns true if the signed 64-bit value formed by concatenating `a0' and
|
||||
* `a1' is less than the signed 64-bit value formed by concatenating `b0' and
|
||||
* `b1'. Otherwise, returns false.
|
||||
*/
|
||||
bool
|
||||
lt64(uint a0, uint a1, uint b0, uint b1)
|
||||
ilt64(uint a0, uint a1, uint b0, uint b1)
|
||||
{
|
||||
return (a0 < b0) || ((a0 == b0) && (a1 < b1));
|
||||
return (int(a0) < int(b0)) || ((a0 == b0) && (a1 < b1));
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -183,12 +185,42 @@ __flt64_nonnan(uint64_t __a, uint64_t __b)
|
|||
{
|
||||
uvec2 a = unpackUint2x32(__a);
|
||||
uvec2 b = unpackUint2x32(__b);
|
||||
uint aSign = __extractFloat64Sign(__a);
|
||||
uint bSign = __extractFloat64Sign(__b);
|
||||
if (aSign != bSign)
|
||||
return (aSign != 0u) && ((((a.y | b.y)<<1) | a.x | b.x) != 0u);
|
||||
|
||||
return mix(lt64(a.y, a.x, b.y, b.x), lt64(b.y, b.x, a.y, a.x), aSign != 0u);
|
||||
/* IEEE 754 floating point numbers are specifically designed so that, with
|
||||
* two exceptions, values can be compared by bit-casting to signed integers
|
||||
* with the same number of bits.
|
||||
*
|
||||
* From https://en.wikipedia.org/wiki/IEEE_754-1985#Comparing_floating-point_numbers:
|
||||
*
|
||||
* When comparing as 2's-complement integers: If the sign bits differ,
|
||||
* the negative number precedes the positive number, so 2's complement
|
||||
* gives the correct result (except that negative zero and positive zero
|
||||
* should be considered equal). If both values are positive, the 2's
|
||||
* complement comparison again gives the correct result. Otherwise (two
|
||||
* negative numbers), the correct FP ordering is the opposite of the 2's
|
||||
* complement ordering.
|
||||
*
|
||||
* The logic implied by the above quotation is:
|
||||
*
|
||||
* !both_are_zero(a, b) && (both_negative(a, b) ? a > b : a < b)
|
||||
*
|
||||
* This is equivalent to
|
||||
*
|
||||
* fne(a, b) && (both_negative(a, b) ? a >= b : a < b)
|
||||
*
|
||||
* fne(a, b) && (both_negative(a, b) ? !(a < b) : a < b)
|
||||
*
|
||||
* fne(a, b) && ((both_negative(a, b) && !(a < b)) ||
|
||||
* (!both_negative(a, b) && (a < b)))
|
||||
*
|
||||
* (A!|B)&(A|!B) is (A xor B) which is implemented here using !=.
|
||||
*
|
||||
* fne(a, b) && (both_negative(a, b) != (a < b))
|
||||
*/
|
||||
bool lt = ilt64(a.y, a.x, b.y, b.x);
|
||||
bool both_negative = (a.y & b.y & 0x80000000u) != 0;
|
||||
|
||||
return !__feq64_nonnan(__a, __b) && (lt != both_negative);
|
||||
}
|
||||
|
||||
/* Returns true if the double-precision floating-point value `a' is less than
|
||||
|
@ -198,10 +230,15 @@ __flt64_nonnan(uint64_t __a, uint64_t __b)
|
|||
bool
|
||||
__flt64(uint64_t a, uint64_t b)
|
||||
{
|
||||
if (__is_nan(a) || __is_nan(b))
|
||||
return false;
|
||||
/* This weird layout matters. Doing the "obvious" thing results in extra
|
||||
* flow control being inserted to implement the short-circuit evaluation
|
||||
* rules. Flow control is bad!
|
||||
*/
|
||||
bool x = !__is_nan(a);
|
||||
bool y = !__is_nan(b);
|
||||
bool z = __flt64_nonnan(a, b);
|
||||
|
||||
return __flt64_nonnan(a, b);
|
||||
return (x && y && z);
|
||||
}
|
||||
|
||||
/* Returns true if the double-precision floating-point value `a' is greater
|
||||
|
@ -212,19 +249,45 @@ __flt64(uint64_t a, uint64_t b)
|
|||
bool
|
||||
__fge64(uint64_t a, uint64_t b)
|
||||
{
|
||||
if (__is_nan(a) || __is_nan(b))
|
||||
return false;
|
||||
/* This weird layout matters. Doing the "obvious" thing results in extra
|
||||
* flow control being inserted to implement the short-circuit evaluation
|
||||
* rules. Flow control is bad!
|
||||
*/
|
||||
bool x = !__is_nan(a);
|
||||
bool y = !__is_nan(b);
|
||||
bool z = !__flt64_nonnan(a, b);
|
||||
|
||||
return !__flt64_nonnan(a, b);
|
||||
return (x && y && z);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
__fsat64(uint64_t __a)
|
||||
{
|
||||
if (__flt64(__a, 0ul))
|
||||
uvec2 a = unpackUint2x32(__a);
|
||||
|
||||
/* fsat(NaN) should be zero. */
|
||||
if (__is_nan(__a) || int(a.y) < 0)
|
||||
return 0ul;
|
||||
|
||||
if (__fge64(__a, 0x3FF0000000000000ul /* 1.0 */))
|
||||
/* IEEE 754 floating point numbers are specifically designed so that, with
|
||||
* two exceptions, values can be compared by bit-casting to signed integers
|
||||
* with the same number of bits.
|
||||
*
|
||||
* From https://en.wikipedia.org/wiki/IEEE_754-1985#Comparing_floating-point_numbers:
|
||||
*
|
||||
* When comparing as 2's-complement integers: If the sign bits differ,
|
||||
* the negative number precedes the positive number, so 2's complement
|
||||
* gives the correct result (except that negative zero and positive zero
|
||||
* should be considered equal). If both values are positive, the 2's
|
||||
* complement comparison again gives the correct result. Otherwise (two
|
||||
* negative numbers), the correct FP ordering is the opposite of the 2's
|
||||
* complement ordering.
|
||||
*
|
||||
* We know that both values are not negative, and we know that at least one
|
||||
* value is not zero. Therefore, we can just use the 2's complement
|
||||
* comparison ordering.
|
||||
*/
|
||||
if (ilt64(0x3FF00000, 0x00000000, a.y, a.x))
|
||||
return 0x3FF0000000000000ul;
|
||||
|
||||
return __a;
|
||||
|
@ -376,7 +439,7 @@ __packFloat64(uint zSign, int zExp, uint zFrac0, uint zFrac1)
|
|||
{
|
||||
uvec2 z;
|
||||
|
||||
z.y = (zSign << 31) + (uint(zExp) << 20) + zFrac0;
|
||||
z.y = zSign + (uint(zExp) << 20) + zFrac0;
|
||||
z.x = zFrac1;
|
||||
return packUint2x32(z);
|
||||
}
|
||||
|
@ -437,23 +500,25 @@ __roundAndPackFloat64(uint zSign,
|
|||
}
|
||||
return __packFloat64(zSign, 0x7FF, 0u, 0u);
|
||||
}
|
||||
if (zExp < 0) {
|
||||
__shift64ExtraRightJamming(
|
||||
zFrac0, zFrac1, zFrac2, -zExp, zFrac0, zFrac1, zFrac2);
|
||||
zExp = 0;
|
||||
if (roundNearestEven) {
|
||||
increment = zFrac2 < 0u;
|
||||
}
|
||||
|
||||
if (zExp < 0) {
|
||||
__shift64ExtraRightJamming(
|
||||
zFrac0, zFrac1, zFrac2, -zExp, zFrac0, zFrac1, zFrac2);
|
||||
zExp = 0;
|
||||
if (roundNearestEven) {
|
||||
increment = zFrac2 < 0u;
|
||||
} else {
|
||||
if (zSign != 0u) {
|
||||
increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&
|
||||
(zFrac2 != 0u);
|
||||
} else {
|
||||
if (zSign != 0u) {
|
||||
increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&
|
||||
(zFrac2 != 0u);
|
||||
} else {
|
||||
increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&
|
||||
(zFrac2 != 0u);
|
||||
}
|
||||
increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&
|
||||
(zFrac2 != 0u);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (increment) {
|
||||
__add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);
|
||||
zFrac1 &= ~((zFrac2 + uint(zFrac2 == 0u)) & uint(roundNearestEven));
|
||||
|
@ -492,7 +557,7 @@ __roundAndPackUInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)
|
|||
zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven);
|
||||
}
|
||||
return mix(packUint2x32(uvec2(zFrac1, zFrac0)), default_nan,
|
||||
(zSign !=0u && (zFrac0 | zFrac1) != 0u));
|
||||
(zSign != 0u && (zFrac0 | zFrac1) != 0u));
|
||||
}
|
||||
|
||||
int64_t
|
||||
|
@ -526,9 +591,9 @@ __roundAndPackInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)
|
|||
|
||||
int64_t absZ = mix(int64_t(packUint2x32(uvec2(zFrac1, zFrac0))),
|
||||
-int64_t(packUint2x32(uvec2(zFrac1, zFrac0))),
|
||||
(zSign != 0u));
|
||||
int64_t nan = mix(default_PosNaN, default_NegNaN, bool(zSign));
|
||||
return mix(absZ, nan, bool(zSign ^ uint(absZ < 0)) && bool(absZ));
|
||||
zSign != 0u);
|
||||
int64_t nan = mix(default_PosNaN, default_NegNaN, zSign != 0u);
|
||||
return mix(absZ, nan, ((zSign != 0u) != (absZ < 0)) && bool(absZ));
|
||||
}
|
||||
|
||||
/* Returns the number of leading 0 bits before the most-significant 1 bit of
|
||||
|
@ -537,9 +602,7 @@ __roundAndPackInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)
|
|||
int
|
||||
__countLeadingZeros32(uint a)
|
||||
{
|
||||
int shiftCount;
|
||||
shiftCount = mix(31 - findMSB(a), 32, a == 0u);
|
||||
return shiftCount;
|
||||
return 31 - findMSB(a);
|
||||
}
|
||||
|
||||
/* Takes an abstract floating-point value having sign `zSign', exponent `zExp',
|
||||
|
@ -583,6 +646,12 @@ __normalizeRoundAndPackFloat64(uint zSign,
|
|||
uint64_t
|
||||
__propagateFloat64NaN(uint64_t __a, uint64_t __b)
|
||||
{
|
||||
#if defined RELAXED_NAN_PROPAGATION
|
||||
uvec2 a = unpackUint2x32(__a);
|
||||
uvec2 b = unpackUint2x32(__b);
|
||||
|
||||
return packUint2x32(uvec2(a.x | b.x, a.y | b.y));
|
||||
#else
|
||||
bool aIsNaN = __is_nan(__a);
|
||||
bool bIsNaN = __is_nan(__b);
|
||||
uvec2 a = unpackUint2x32(__a);
|
||||
|
@ -591,8 +660,20 @@ __propagateFloat64NaN(uint64_t __a, uint64_t __b)
|
|||
b.y |= 0x00080000u;
|
||||
|
||||
return packUint2x32(mix(b, mix(a, b, bvec2(bIsNaN, bIsNaN)), bvec2(aIsNaN, aIsNaN)));
|
||||
#endif
|
||||
}
|
||||
|
||||
/* If a shader is in the soft-fp64 path, it almost certainly has register
|
||||
* pressure problems. Choose a method to exchange two values that does not
|
||||
* require a temporary.
|
||||
*/
|
||||
#define EXCHANGE(a, b) \
|
||||
do { \
|
||||
a ^= b; \
|
||||
b ^= a; \
|
||||
a ^= b; \
|
||||
} while (false)
|
||||
|
||||
/* Returns the result of adding the double-precision floating-point values
|
||||
* `a' and `b'. The operation is performed according to the IEEE Standard for
|
||||
* Floating-Point Arithmetic.
|
||||
|
@ -608,17 +689,16 @@ __fadd64(uint64_t a, uint64_t b)
|
|||
uint bFracHi = __extractFloat64FracHi(b);
|
||||
int aExp = __extractFloat64Exp(a);
|
||||
int bExp = __extractFloat64Exp(b);
|
||||
uint zFrac0 = 0u;
|
||||
uint zFrac1 = 0u;
|
||||
int expDiff = aExp - bExp;
|
||||
if (aSign == bSign) {
|
||||
uint zFrac2 = 0u;
|
||||
uint zFrac0;
|
||||
uint zFrac1;
|
||||
uint zFrac2;
|
||||
int zExp;
|
||||
bool orig_exp_diff_is_zero = (expDiff == 0);
|
||||
|
||||
if (orig_exp_diff_is_zero) {
|
||||
if (expDiff == 0) {
|
||||
if (aExp == 0x7FF) {
|
||||
bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u;
|
||||
bool propagate = ((aFracHi | bFracHi) | (aFracLo| bFracLo)) != 0u;
|
||||
return mix(a, __propagateFloat64NaN(a, b), propagate);
|
||||
}
|
||||
__add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
|
||||
|
@ -629,29 +709,24 @@ __fadd64(uint64_t a, uint64_t b)
|
|||
zExp = aExp;
|
||||
__shift64ExtraRightJamming(
|
||||
zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2);
|
||||
} else if (0 < expDiff) {
|
||||
if (aExp == 0x7FF) {
|
||||
bool propagate = (aFracHi | aFracLo) != 0u;
|
||||
return mix(a, __propagateFloat64NaN(a, b), propagate);
|
||||
} else {
|
||||
if (expDiff < 0) {
|
||||
EXCHANGE(aFracHi, bFracHi);
|
||||
EXCHANGE(aFracLo, bFracLo);
|
||||
EXCHANGE(aExp, bExp);
|
||||
}
|
||||
|
||||
expDiff = mix(expDiff, expDiff - 1, bExp == 0);
|
||||
if (aExp == 0x7FF) {
|
||||
bool propagate = (aFracHi | aFracLo) != 0u;
|
||||
return mix(__packFloat64(aSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);
|
||||
}
|
||||
|
||||
expDiff = mix(abs(expDiff), abs(expDiff) - 1, bExp == 0);
|
||||
bFracHi = mix(bFracHi | 0x00100000u, bFracHi, bExp == 0);
|
||||
__shift64ExtraRightJamming(
|
||||
bFracHi, bFracLo, 0u, expDiff, bFracHi, bFracLo, zFrac2);
|
||||
zExp = aExp;
|
||||
} else if (expDiff < 0) {
|
||||
if (bExp == 0x7FF) {
|
||||
bool propagate = (bFracHi | bFracLo) != 0u;
|
||||
return mix(__packFloat64(aSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);
|
||||
}
|
||||
expDiff = mix(expDiff, expDiff + 1, aExp == 0);
|
||||
aFracHi = mix(aFracHi | 0x00100000u, aFracHi, aExp == 0);
|
||||
__shift64ExtraRightJamming(
|
||||
aFracHi, aFracLo, 0u, - expDiff, aFracHi, aFracLo, zFrac2);
|
||||
zExp = bExp;
|
||||
}
|
||||
if (!orig_exp_diff_is_zero) {
|
||||
|
||||
aFracHi |= 0x00100000u;
|
||||
__add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
|
||||
--zExp;
|
||||
|
@ -667,12 +742,23 @@ __fadd64(uint64_t a, uint64_t b)
|
|||
|
||||
__shortShift64Left(aFracHi, aFracLo, 10, aFracHi, aFracLo);
|
||||
__shortShift64Left(bFracHi, bFracLo, 10, bFracHi, bFracLo);
|
||||
if (0 < expDiff) {
|
||||
if (expDiff != 0) {
|
||||
uint zFrac0;
|
||||
uint zFrac1;
|
||||
|
||||
if (expDiff < 0) {
|
||||
EXCHANGE(aFracHi, bFracHi);
|
||||
EXCHANGE(aFracLo, bFracLo);
|
||||
EXCHANGE(aExp, bExp);
|
||||
aSign ^= 0x80000000u;
|
||||
}
|
||||
|
||||
if (aExp == 0x7FF) {
|
||||
bool propagate = (aFracHi | aFracLo) != 0u;
|
||||
return mix(a, __propagateFloat64NaN(a, b), propagate);
|
||||
return mix(__packFloat64(aSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);
|
||||
}
|
||||
expDiff = mix(expDiff, expDiff - 1, bExp == 0);
|
||||
|
||||
expDiff = mix(abs(expDiff), abs(expDiff) - 1, bExp == 0);
|
||||
bFracHi = mix(bFracHi | 0x40000000u, bFracHi, bExp == 0);
|
||||
__shift64RightJamming(bFracHi, bFracLo, expDiff, bFracHi, bFracLo);
|
||||
aFracHi |= 0x40000000u;
|
||||
|
@ -681,79 +767,39 @@ __fadd64(uint64_t a, uint64_t b)
|
|||
--zExp;
|
||||
return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1);
|
||||
}
|
||||
if (expDiff < 0) {
|
||||
if (bExp == 0x7FF) {
|
||||
bool propagate = (bFracHi | bFracLo) != 0u;
|
||||
return mix(__packFloat64(aSign ^ 1u, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);
|
||||
}
|
||||
expDiff = mix(expDiff, expDiff + 1, aExp == 0);
|
||||
aFracHi = mix(aFracHi | 0x40000000u, aFracHi, aExp == 0);
|
||||
__shift64RightJamming(aFracHi, aFracLo, - expDiff, aFracHi, aFracLo);
|
||||
bFracHi |= 0x40000000u;
|
||||
__sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);
|
||||
zExp = bExp;
|
||||
aSign ^= 1u;
|
||||
--zExp;
|
||||
return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1);
|
||||
}
|
||||
if (aExp == 0x7FF) {
|
||||
bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u;
|
||||
bool propagate = ((aFracHi | bFracHi) | (aFracLo | bFracLo)) != 0u;
|
||||
return mix(0xFFFFFFFFFFFFFFFFUL, __propagateFloat64NaN(a, b), propagate);
|
||||
}
|
||||
bExp = mix(bExp, 1, aExp == 0);
|
||||
aExp = mix(aExp, 1, aExp == 0);
|
||||
bool zexp_normal = false;
|
||||
bool blta = true;
|
||||
|
||||
uint zFrac0;
|
||||
uint zFrac1;
|
||||
uint sign_of_difference = 0;
|
||||
if (bFracHi < aFracHi) {
|
||||
__sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
|
||||
zexp_normal = true;
|
||||
}
|
||||
else if (aFracHi < bFracHi) {
|
||||
__sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);
|
||||
blta = false;
|
||||
zexp_normal = true;
|
||||
sign_of_difference = 0x80000000;
|
||||
}
|
||||
else if (bFracLo < aFracLo) {
|
||||
else if (bFracLo <= aFracLo) {
|
||||
/* It is possible that zFrac0 and zFrac1 may be zero after this. */
|
||||
__sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
|
||||
zexp_normal = true;
|
||||
}
|
||||
else if (aFracLo < bFracLo) {
|
||||
else {
|
||||
__sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);
|
||||
blta = false;
|
||||
zexp_normal = true;
|
||||
sign_of_difference = 0x80000000;
|
||||
}
|
||||
zExp = mix(bExp, aExp, blta);
|
||||
aSign = mix(aSign ^ 1u, aSign, blta);
|
||||
uint64_t retval_0 = __packFloat64(uint(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN), 0, 0u, 0u);
|
||||
zExp = mix(bExp, aExp, sign_of_difference == 0u);
|
||||
aSign ^= sign_of_difference;
|
||||
uint64_t retval_0 = __packFloat64(uint(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) << 31, 0, 0u, 0u);
|
||||
uint64_t retval_1 = __normalizeRoundAndPackFloat64(aSign, zExp - 11, zFrac0, zFrac1);
|
||||
return mix(retval_0, retval_1, zexp_normal);
|
||||
return mix(retval_0, retval_1, zFrac0 != 0u || zFrac1 != 0u);
|
||||
}
|
||||
}
|
||||
|
||||
/* Multiplies `a' by `b' to obtain a 64-bit product. The product is broken
|
||||
* into two 32-bit pieces which are stored at the locations pointed to by
|
||||
* `z0Ptr' and `z1Ptr'.
|
||||
*/
|
||||
void
|
||||
__mul32To64(uint a, uint b, out uint z0Ptr, out uint z1Ptr)
|
||||
{
|
||||
uint aLow = a & 0x0000FFFFu;
|
||||
uint aHigh = a>>16;
|
||||
uint bLow = b & 0x0000FFFFu;
|
||||
uint bHigh = b>>16;
|
||||
uint z1 = aLow * bLow;
|
||||
uint zMiddleA = aLow * bHigh;
|
||||
uint zMiddleB = aHigh * bLow;
|
||||
uint z0 = aHigh * bHigh;
|
||||
zMiddleA += zMiddleB;
|
||||
z0 += ((uint(zMiddleA < zMiddleB)) << 16) + (zMiddleA >> 16);
|
||||
zMiddleA <<= 16;
|
||||
z1 += zMiddleA;
|
||||
z0 += uint(z1 < zMiddleA);
|
||||
z1Ptr = z1;
|
||||
z0Ptr = z0;
|
||||
}
|
||||
|
||||
/* Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the
|
||||
* 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit
|
||||
* product. The product is broken into four 32-bit pieces which are stored at
|
||||
|
@ -773,12 +819,12 @@ __mul64To128(uint a0, uint a1, uint b0, uint b1,
|
|||
uint more1 = 0u;
|
||||
uint more2 = 0u;
|
||||
|
||||
__mul32To64(a1, b1, z2, z3);
|
||||
__mul32To64(a1, b0, z1, more2);
|
||||
umulExtended(a1, b1, z2, z3);
|
||||
umulExtended(a1, b0, z1, more2);
|
||||
__add64(z1, more2, 0u, z2, z1, z2);
|
||||
__mul32To64(a0, b0, z0, more1);
|
||||
umulExtended(a0, b0, z0, more1);
|
||||
__add64(z0, more1, 0u, z1, z0, z1);
|
||||
__mul32To64(a0, b1, more1, more2);
|
||||
umulExtended(a0, b1, more1, more2);
|
||||
__add64(more1, more2, 0u, z2, more1, z2);
|
||||
__add64(z0, z1, 0u, more1, z0, z1);
|
||||
z3Ptr = z3;
|
||||
|
@ -847,8 +893,13 @@ __fmul64(uint64_t a, uint64_t b)
|
|||
return __packFloat64(zSign, 0x7FF, 0u, 0u);
|
||||
}
|
||||
if (bExp == 0x7FF) {
|
||||
/* a cannot be NaN, but is b NaN? */
|
||||
if ((bFracHi | bFracLo) != 0u)
|
||||
#if defined RELAXED_NAN_PROPAGATION
|
||||
return b;
|
||||
#else
|
||||
return __propagateFloat64NaN(a, b);
|
||||
#endif
|
||||
if ((uint(aExp) | aFracHi | aFracLo) == 0u)
|
||||
return 0xFFFFFFFFFFFFFFFFUL;
|
||||
return __packFloat64(zSign, 0x7FF, 0u, 0u);
|
||||
|
@ -934,13 +985,13 @@ __fp64_to_uint(uint64_t a)
|
|||
__shift64RightJamming(aFracHi, aFracLo, shiftDist, aFracHi, aFracLo);
|
||||
|
||||
if ((aFracHi & 0xFFFFF000u) != 0u)
|
||||
return mix(~0u, 0u, (aSign != 0u));
|
||||
return mix(~0u, 0u, aSign != 0u);
|
||||
|
||||
uint z = 0u;
|
||||
uint zero = 0u;
|
||||
__shift64Right(aFracHi, aFracLo, 12, zero, z);
|
||||
|
||||
uint expt = mix(~0u, 0u, (aSign != 0u));
|
||||
uint expt = mix(~0u, 0u, aSign != 0u);
|
||||
|
||||
return mix(z, expt, (aSign != 0u) && (z != 0u));
|
||||
}
|
||||
|
@ -1047,7 +1098,7 @@ __fp32_to_uint64(float f)
|
|||
uint a = floatBitsToUint(f);
|
||||
uint aFrac = a & 0x007FFFFFu;
|
||||
int aExp = int((a>>23) & 0xFFu);
|
||||
uint aSign = a>>31;
|
||||
uint aSign = a & 0x80000000u;
|
||||
uint zFrac0 = 0u;
|
||||
uint zFrac1 = 0u;
|
||||
uint zFrac2 = 0u;
|
||||
|
@ -1076,7 +1127,7 @@ __fp32_to_int64(float f)
|
|||
uint a = floatBitsToUint(f);
|
||||
uint aFrac = a & 0x007FFFFFu;
|
||||
int aExp = int((a>>23) & 0xFFu);
|
||||
uint aSign = a>>31;
|
||||
uint aSign = a & 0x80000000u;
|
||||
uint zFrac0 = 0u;
|
||||
uint zFrac1 = 0u;
|
||||
uint zFrac2 = 0u;
|
||||
|
@ -1110,10 +1161,10 @@ __int64_to_fp64(int64_t a)
|
|||
uint64_t absA = mix(uint64_t(a), uint64_t(-a), a < 0);
|
||||
uint aFracHi = __extractFloat64FracHi(absA);
|
||||
uvec2 aFrac = unpackUint2x32(absA);
|
||||
uint zSign = uint(a < 0);
|
||||
uint zSign = uint(unpackInt2x32(a).y) & 0x80000000u;
|
||||
|
||||
if ((aFracHi & 0x80000000u) != 0u) {
|
||||
return mix(0ul, __packFloat64(1, 0x434, 0u, 0u), a < 0);
|
||||
return mix(0ul, __packFloat64(0x80000000u, 0x434, 0u, 0u), a < 0);
|
||||
}
|
||||
|
||||
return __normalizeRoundAndPackFloat64(zSign, 0x432, aFrac.y, aFrac.x);
|
||||
|
@ -1143,7 +1194,7 @@ __fp64_to_int(uint64_t a)
|
|||
if (0x41E < aExp) {
|
||||
if ((aExp == 0x7FF) && bool(aFracHi | aFracLo))
|
||||
aSign = 0u;
|
||||
return mix(0x7FFFFFFF, 0x80000000, bool(aSign));
|
||||
return mix(0x7FFFFFFF, 0x80000000, aSign != 0u);
|
||||
}
|
||||
__shortShift64Left(aFracHi | 0x00100000u, aFracLo, shiftCount, absZ, aFracExtra);
|
||||
} else {
|
||||
|
@ -1155,9 +1206,9 @@ __fp64_to_int(uint64_t a)
|
|||
absZ = aFracHi >> (- shiftCount);
|
||||
}
|
||||
|
||||
int z = mix(int(absZ), -int(absZ), (aSign != 0u));
|
||||
int nan = mix(0x7FFFFFFF, 0x80000000, bool(aSign));
|
||||
return mix(z, nan, bool(aSign ^ uint(z < 0)) && bool(z));
|
||||
int z = mix(int(absZ), -int(absZ), aSign != 0u);
|
||||
int nan = mix(0x7FFFFFFF, 0x80000000, aSign != 0u);
|
||||
return mix(z, nan, ((aSign != 0u) != (z < 0)) && bool(z));
|
||||
}
|
||||
|
||||
/* Returns the result of converting the 32-bit two's complement integer `a'
|
||||
|
@ -1171,7 +1222,7 @@ __int_to_fp64(int a)
|
|||
uint zFrac1 = 0u;
|
||||
if (a==0)
|
||||
return __packFloat64(0u, 0, 0u, 0u);
|
||||
uint zSign = uint(a < 0);
|
||||
uint zSign = uint(a) & 0x80000000u;
|
||||
uint absA = mix(uint(a), uint(-a), a < 0);
|
||||
int shiftCount = __countLeadingZeros32(absA) - 11;
|
||||
if (0 <= shiftCount) {
|
||||
|
@ -1192,7 +1243,7 @@ __fp64_to_bool(uint64_t a)
|
|||
uint64_t
|
||||
__bool_to_fp64(bool a)
|
||||
{
|
||||
return __int_to_fp64(int(a));
|
||||
return packUint2x32(uvec2(0x00000000u, uint(-int(a) & 0x3ff00000)));
|
||||
}
|
||||
|
||||
/* Packs the sign `zSign', exponent `zExp', and significand `zFrac' into a
|
||||
|
@ -1207,7 +1258,7 @@ __bool_to_fp64(bool a)
|
|||
float
|
||||
__packFloat32(uint zSign, int zExp, uint zFrac)
|
||||
{
|
||||
return uintBitsToFloat((zSign<<31) + (uint(zExp)<<23) + zFrac);
|
||||
return uintBitsToFloat(zSign + (uint(zExp)<<23) + zFrac);
|
||||
}
|
||||
|
||||
/* Takes an abstract floating-point value having sign `zSign', exponent `zExp',
|
||||
|
@ -1287,7 +1338,7 @@ __fp64_to_fp32(uint64_t __a)
|
|||
uint aSign = __extractFloat64Sign(__a);
|
||||
if (aExp == 0x7FF) {
|
||||
__shortShift64Left(a.y, a.x, 12, a.y, a.x);
|
||||
float rval = uintBitsToFloat((aSign<<31) | 0x7FC00000u | (a.y>>9));
|
||||
float rval = uintBitsToFloat(aSign | 0x7FC00000u | (a.y>>9));
|
||||
rval = mix(__packFloat32(aSign, 0xFF, 0u), rval, (aFracHi | aFracLo) != 0u);
|
||||
return rval;
|
||||
}
|
||||
|
@ -1315,7 +1366,7 @@ __uint64_to_fp32(uint64_t __a)
|
|||
float
|
||||
__int64_to_fp32(int64_t __a)
|
||||
{
|
||||
uint aSign = uint(__a < 0);
|
||||
uint aSign = uint(unpackInt2x32(__a).y) & 0x80000000u;
|
||||
uint64_t absA = mix(uint64_t(__a), uint64_t(-__a), __a < 0);
|
||||
uvec2 aFrac = unpackUint2x32(absA);
|
||||
int shiftCount = mix(__countLeadingZeros32(aFrac.y) - 33,
|
||||
|
@ -1339,7 +1390,7 @@ __fp32_to_fp64(float f)
|
|||
uint a = floatBitsToUint(f);
|
||||
uint aFrac = a & 0x007FFFFFu;
|
||||
int aExp = int((a>>23) & 0xFFu);
|
||||
uint aSign = a>>31;
|
||||
uint aSign = a & 0x80000000u;
|
||||
uint zFrac0 = 0u;
|
||||
uint zFrac1 = 0u;
|
||||
|
||||
|
@ -1348,7 +1399,7 @@ __fp32_to_fp64(float f)
|
|||
uint nanLo = 0u;
|
||||
uint nanHi = a<<9;
|
||||
__shift64Right(nanHi, nanLo, 12, nanHi, nanLo);
|
||||
nanHi |= ((aSign<<31) | 0x7FF80000u);
|
||||
nanHi |= aSign | 0x7FF80000u;
|
||||
return packUint2x32(uvec2(nanLo, nanHi));
|
||||
}
|
||||
return __packFloat64(aSign, 0x7FF, 0u, 0u);
|
||||
|
@ -1442,7 +1493,7 @@ __estimateDiv64To32(uint a0, uint a1, uint b)
|
|||
return 0xFFFFFFFFu;
|
||||
b0 = b>>16;
|
||||
z = (b0<<16 <= a0) ? 0xFFFF0000u : (a0 / b0)<<16;
|
||||
__mul32To64(b, z, term0, term1);
|
||||
umulExtended(b, z, term0, term1);
|
||||
__sub64(a0, a1, term0, term1, rem0, rem1);
|
||||
while (int(rem0) < 0) {
|
||||
z -= 0x10000u;
|
||||
|
@ -1612,7 +1663,7 @@ __fsqrt64(uint64_t a)
|
|||
zFrac0 = 0x7FFFFFFFu;
|
||||
doubleZFrac0 = zFrac0 + zFrac0;
|
||||
__shortShift64Left(aFracHi, aFracLo, 9 - (aExp & 1), aFracHi, aFracLo);
|
||||
__mul32To64(zFrac0, zFrac0, term0, term1);
|
||||
umulExtended(zFrac0, zFrac0, term0, term1);
|
||||
__sub64(aFracHi, aFracLo, term0, term1, rem0, rem1);
|
||||
while (int(rem0) < 0) {
|
||||
--zFrac0;
|
||||
|
@ -1623,9 +1674,9 @@ __fsqrt64(uint64_t a)
|
|||
if ((zFrac1 & 0x1FFu) <= 5u) {
|
||||
if (zFrac1 == 0u)
|
||||
zFrac1 = 1u;
|
||||
__mul32To64(doubleZFrac0, zFrac1, term1, term2);
|
||||
umulExtended(doubleZFrac0, zFrac1, term1, term2);
|
||||
__sub64(rem1, 0u, term1, term2, rem1, rem2);
|
||||
__mul32To64(zFrac1, zFrac1, term2, term3);
|
||||
umulExtended(zFrac1, zFrac1, term2, term3);
|
||||
__sub96(rem1, rem2, 0u, 0u, term2, term3, rem1, rem2, rem3);
|
||||
while (int(rem1) < 0) {
|
||||
--zFrac1;
|
||||
|
@ -1665,7 +1716,19 @@ __ftrunc64(uint64_t __a)
|
|||
uint64_t
|
||||
__ffloor64(uint64_t a)
|
||||
{
|
||||
bool is_positive = __fge64(a, 0ul);
|
||||
/* The big assumtion is that when 'a' is NaN, __ftrunc(a) returns a. Based
|
||||
* on that assumption, NaN values that don't have the sign bit will safely
|
||||
* return NaN (identity). This is guarded by RELAXED_NAN_PROPAGATION
|
||||
* because otherwise the NaN should have the "signal" bit set. The
|
||||
* __fadd64 will ensure that occurs.
|
||||
*/
|
||||
bool is_positive =
|
||||
#if defined RELAXED_NAN_PROPAGATION
|
||||
int(unpackUint2x32(a).y) >= 0
|
||||
#else
|
||||
__fge64(a, 0ul)
|
||||
#endif
|
||||
;
|
||||
uint64_t tr = __ftrunc64(a);
|
||||
|
||||
if (is_positive || __feq64(tr, a)) {
|
||||
|
@ -1723,21 +1786,29 @@ __fround64(uint64_t __a)
|
|||
uint64_t
|
||||
__fmin64(uint64_t a, uint64_t b)
|
||||
{
|
||||
if (__is_nan(a)) return b;
|
||||
if (__is_nan(b)) return a;
|
||||
/* This weird layout matters. Doing the "obvious" thing results in extra
|
||||
* flow control being inserted to implement the short-circuit evaluation
|
||||
* rules. Flow control is bad!
|
||||
*/
|
||||
bool b_nan = __is_nan(b);
|
||||
bool a_lt_b = __flt64_nonnan(a, b);
|
||||
bool a_nan = __is_nan(a);
|
||||
|
||||
if (__flt64_nonnan(a, b)) return a;
|
||||
return b;
|
||||
return (b_nan || a_lt_b) && !a_nan ? a : b;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
__fmax64(uint64_t a, uint64_t b)
|
||||
{
|
||||
if (__is_nan(a)) return b;
|
||||
if (__is_nan(b)) return a;
|
||||
/* This weird layout matters. Doing the "obvious" thing results in extra
|
||||
* flow control being inserted to implement the short-circuit evaluation
|
||||
* rules. Flow control is bad!
|
||||
*/
|
||||
bool b_nan = __is_nan(b);
|
||||
bool a_lt_b = __flt64_nonnan(a, b);
|
||||
bool a_nan = __is_nan(a);
|
||||
|
||||
if (__flt64_nonnan(a, b)) return b;
|
||||
return a;
|
||||
return (b_nan || a_lt_b) && !a_nan ? b : a;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,4 +1,4 @@
|
|||
/* A Bison parser, made by GNU Bison 3.4.1. */
|
||||
/* A Bison parser, made by GNU Bison 3.5. */
|
||||
|
||||
/* Bison interface for Yacc-like parsers in C
|
||||
|
||||
|
|
|
@ -173,8 +173,8 @@ add_builtin_define(glcpp_parser_t *parser, const char *name, int value);
|
|||
|
||||
%}
|
||||
|
||||
%pure-parser
|
||||
%error-verbose
|
||||
%define api.pure
|
||||
%define parse.error verbose
|
||||
|
||||
%locations
|
||||
%initial-action {
|
||||
|
@ -1187,6 +1187,9 @@ _token_list_equal_ignoring_space(token_list_t *a, token_list_t *b)
|
|||
node_b = node_b->next;
|
||||
}
|
||||
|
||||
if (node_a == NULL && node_b == NULL)
|
||||
break;
|
||||
|
||||
if (node_b == NULL && node_a->token->type == SPACE) {
|
||||
while (node_a && node_a->token->type == SPACE)
|
||||
node_a = node_a->next;
|
||||
|
|
|
@ -18,33 +18,21 @@
|
|||
#include "program/program.h"
|
||||
|
||||
static void
|
||||
init_gl_program(struct gl_program *prog, bool is_arb_asm, GLenum target)
|
||||
init_gl_program(struct gl_program *prog, bool is_arb_asm, gl_shader_stage stage)
|
||||
{
|
||||
prog->RefCount = 1;
|
||||
prog->Format = GL_PROGRAM_FORMAT_ASCII_ARB;
|
||||
prog->is_arb_asm = is_arb_asm;
|
||||
prog->info.stage = (gl_shader_stage)_mesa_program_enum_to_shader_stage(target);
|
||||
prog->info.stage = stage;
|
||||
}
|
||||
|
||||
static struct gl_program *
|
||||
new_program(UNUSED struct gl_context *ctx, GLenum target,
|
||||
new_program(UNUSED struct gl_context *ctx, gl_shader_stage stage,
|
||||
UNUSED GLuint id, bool is_arb_asm)
|
||||
{
|
||||
switch (target) {
|
||||
case GL_VERTEX_PROGRAM_ARB: /* == GL_VERTEX_PROGRAM_NV */
|
||||
case GL_GEOMETRY_PROGRAM_NV:
|
||||
case GL_TESS_CONTROL_PROGRAM_NV:
|
||||
case GL_TESS_EVALUATION_PROGRAM_NV:
|
||||
case GL_FRAGMENT_PROGRAM_ARB:
|
||||
case GL_COMPUTE_PROGRAM_NV: {
|
||||
struct gl_program *prog = rzalloc(NULL, struct gl_program);
|
||||
init_gl_program(prog, is_arb_asm, target);
|
||||
return prog;
|
||||
}
|
||||
default:
|
||||
printf("bad target in new_program\n");
|
||||
return NULL;
|
||||
}
|
||||
struct gl_program *prog = rzalloc(NULL, struct gl_program);
|
||||
init_gl_program(prog, is_arb_asm, stage);
|
||||
return prog;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -84,7 +72,7 @@ initialize_mesa_context(struct gl_context *ctx, glslopt_target api)
|
|||
ctx->Extensions.EXT_shader_framebuffer_fetch = true;
|
||||
break;
|
||||
case kGlslTargetOpenGLES30:
|
||||
ctx->Extensions.ARB_ES3_compatibility = true;
|
||||
ctx->Extensions.ARB_ES3_1_compatibility = true;
|
||||
ctx->Extensions.EXT_shader_framebuffer_fetch = true;
|
||||
break;
|
||||
case kGlslTargetMetal:
|
||||
|
@ -677,7 +665,10 @@ glslopt_shader* glslopt_optimize (glslopt_ctx* ctx, glslopt_shader_type type, co
|
|||
validate_ir_tree(ir);
|
||||
shader->rawOutput = _mesa_print_ir_glsl(ir, state, ralloc_strdup(shader, ""), printMode);
|
||||
}
|
||||
|
||||
|
||||
// Lower builtin functions prior to linking.
|
||||
lower_builtins(ir);
|
||||
|
||||
// Link built-in functions
|
||||
shader->shader->symbols = state->symbols;
|
||||
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,4 +1,4 @@
|
|||
/* A Bison parser, made by GNU Bison 3.4.1. */
|
||||
/* A Bison parser, made by GNU Bison 3.5. */
|
||||
|
||||
/* Bison interface for Yacc-like parsers in C
|
||||
|
||||
|
@ -192,7 +192,7 @@ extern int _mesa_glsl_debug;
|
|||
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
|
||||
union YYSTYPE
|
||||
{
|
||||
#line 100 "src/compiler/glsl/glsl_parser.yy"
|
||||
#line 101 "src/compiler/glsl/glsl_parser.yy"
|
||||
|
||||
int n;
|
||||
int64_t n64;
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include "compiler/glsl_types.h"
|
||||
#include "main/context.h"
|
||||
#include "util/u_string.h"
|
||||
#include "util/format/u_format.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning( disable : 4065 ) // switch statement contains 'default' but no 'case' labels
|
||||
|
@ -81,8 +82,8 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
|
|||
|
||||
%expect 0
|
||||
|
||||
%pure-parser
|
||||
%error-verbose
|
||||
%define api.pure
|
||||
%define parse.error verbose
|
||||
|
||||
%locations
|
||||
%initial-action {
|
||||
|
@ -1336,7 +1337,7 @@ layout_qualifier_id:
|
|||
if (!$$.flags.i) {
|
||||
static const struct {
|
||||
const char *name;
|
||||
GLenum format;
|
||||
enum pipe_format format;
|
||||
glsl_base_type base_type;
|
||||
/** Minimum desktop GLSL version required for the image
|
||||
* format. Use 130 if already present in the original
|
||||
|
@ -1349,54 +1350,54 @@ layout_qualifier_id:
|
|||
bool nv_image_formats;
|
||||
bool ext_qualifiers;
|
||||
} map[] = {
|
||||
{ "rgba32f", GL_RGBA32F, GLSL_TYPE_FLOAT, 130, 310, false, false },
|
||||
{ "rgba16f", GL_RGBA16F, GLSL_TYPE_FLOAT, 130, 310, false, false },
|
||||
{ "rg32f", GL_RG32F, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "rg16f", GL_RG16F, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "r11f_g11f_b10f", GL_R11F_G11F_B10F, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "r32f", GL_R32F, GLSL_TYPE_FLOAT, 130, 310, false, false },
|
||||
{ "r16f", GL_R16F, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "rgba32ui", GL_RGBA32UI, GLSL_TYPE_UINT, 130, 310, false, false },
|
||||
{ "rgba16ui", GL_RGBA16UI, GLSL_TYPE_UINT, 130, 310, false, false },
|
||||
{ "rgb10_a2ui", GL_RGB10_A2UI, GLSL_TYPE_UINT, 130, 0, true, false },
|
||||
{ "rgba8ui", GL_RGBA8UI, GLSL_TYPE_UINT, 130, 310, false, false },
|
||||
{ "rg32ui", GL_RG32UI, GLSL_TYPE_UINT, 130, 0, true, false },
|
||||
{ "rg16ui", GL_RG16UI, GLSL_TYPE_UINT, 130, 0, true, false },
|
||||
{ "rg8ui", GL_RG8UI, GLSL_TYPE_UINT, 130, 0, true, false },
|
||||
{ "r32ui", GL_R32UI, GLSL_TYPE_UINT, 130, 310, false, false },
|
||||
{ "r16ui", GL_R16UI, GLSL_TYPE_UINT, 130, 0, true, false },
|
||||
{ "r8ui", GL_R8UI, GLSL_TYPE_UINT, 130, 0, true, false },
|
||||
{ "rgba32i", GL_RGBA32I, GLSL_TYPE_INT, 130, 310, false, false },
|
||||
{ "rgba16i", GL_RGBA16I, GLSL_TYPE_INT, 130, 310, false, false },
|
||||
{ "rgba8i", GL_RGBA8I, GLSL_TYPE_INT, 130, 310, false, false },
|
||||
{ "rg32i", GL_RG32I, GLSL_TYPE_INT, 130, 0, true, false },
|
||||
{ "rg16i", GL_RG16I, GLSL_TYPE_INT, 130, 0, true, false },
|
||||
{ "rg8i", GL_RG8I, GLSL_TYPE_INT, 130, 0, true, false },
|
||||
{ "r32i", GL_R32I, GLSL_TYPE_INT, 130, 310, false, false },
|
||||
{ "r16i", GL_R16I, GLSL_TYPE_INT, 130, 0, true, false },
|
||||
{ "r8i", GL_R8I, GLSL_TYPE_INT, 130, 0, true, false },
|
||||
{ "rgba16", GL_RGBA16, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "rgb10_a2", GL_RGB10_A2, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "rgba8", GL_RGBA8, GLSL_TYPE_FLOAT, 130, 310, false, false },
|
||||
{ "rg16", GL_RG16, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "rg8", GL_RG8, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "r16", GL_R16, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "r8", GL_R8, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "rgba16_snorm", GL_RGBA16_SNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "rgba8_snorm", GL_RGBA8_SNORM, GLSL_TYPE_FLOAT, 130, 310, false, false },
|
||||
{ "rg16_snorm", GL_RG16_SNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "rg8_snorm", GL_RG8_SNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "r16_snorm", GL_R16_SNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "r8_snorm", GL_R8_SNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "rgba32f", PIPE_FORMAT_R32G32B32A32_FLOAT, GLSL_TYPE_FLOAT, 130, 310, false, false },
|
||||
{ "rgba16f", PIPE_FORMAT_R16G16B16A16_FLOAT, GLSL_TYPE_FLOAT, 130, 310, false, false },
|
||||
{ "rg32f", PIPE_FORMAT_R32G32_FLOAT, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "rg16f", PIPE_FORMAT_R16G16_FLOAT, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "r11f_g11f_b10f", PIPE_FORMAT_R11G11B10_FLOAT, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "r32f", PIPE_FORMAT_R32_FLOAT, GLSL_TYPE_FLOAT, 130, 310, false, false },
|
||||
{ "r16f", PIPE_FORMAT_R16_FLOAT, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "rgba32ui", PIPE_FORMAT_R32G32B32A32_UINT, GLSL_TYPE_UINT, 130, 310, false, false },
|
||||
{ "rgba16ui", PIPE_FORMAT_R16G16B16A16_UINT, GLSL_TYPE_UINT, 130, 310, false, false },
|
||||
{ "rgb10_a2ui", PIPE_FORMAT_R10G10B10A2_UINT, GLSL_TYPE_UINT, 130, 0, true, false },
|
||||
{ "rgba8ui", PIPE_FORMAT_R8G8B8A8_UINT, GLSL_TYPE_UINT, 130, 310, false, false },
|
||||
{ "rg32ui", PIPE_FORMAT_R32G32_UINT, GLSL_TYPE_UINT, 130, 0, true, false },
|
||||
{ "rg16ui", PIPE_FORMAT_R16G16_UINT, GLSL_TYPE_UINT, 130, 0, true, false },
|
||||
{ "rg8ui", PIPE_FORMAT_R8G8_UINT, GLSL_TYPE_UINT, 130, 0, true, false },
|
||||
{ "r32ui", PIPE_FORMAT_R32_UINT, GLSL_TYPE_UINT, 130, 310, false, false },
|
||||
{ "r16ui", PIPE_FORMAT_R16_UINT, GLSL_TYPE_UINT, 130, 0, true, false },
|
||||
{ "r8ui", PIPE_FORMAT_R8_UINT, GLSL_TYPE_UINT, 130, 0, true, false },
|
||||
{ "rgba32i", PIPE_FORMAT_R32G32B32A32_SINT, GLSL_TYPE_INT, 130, 310, false, false },
|
||||
{ "rgba16i", PIPE_FORMAT_R16G16B16A16_SINT, GLSL_TYPE_INT, 130, 310, false, false },
|
||||
{ "rgba8i", PIPE_FORMAT_R8G8B8A8_SINT, GLSL_TYPE_INT, 130, 310, false, false },
|
||||
{ "rg32i", PIPE_FORMAT_R32G32_SINT, GLSL_TYPE_INT, 130, 0, true, false },
|
||||
{ "rg16i", PIPE_FORMAT_R16G16_SINT, GLSL_TYPE_INT, 130, 0, true, false },
|
||||
{ "rg8i", PIPE_FORMAT_R8G8_SINT, GLSL_TYPE_INT, 130, 0, true, false },
|
||||
{ "r32i", PIPE_FORMAT_R32_SINT, GLSL_TYPE_INT, 130, 310, false, false },
|
||||
{ "r16i", PIPE_FORMAT_R16_SINT, GLSL_TYPE_INT, 130, 0, true, false },
|
||||
{ "r8i", PIPE_FORMAT_R8_SINT, GLSL_TYPE_INT, 130, 0, true, false },
|
||||
{ "rgba16", PIPE_FORMAT_R16G16B16A16_UNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "rgb10_a2", PIPE_FORMAT_R10G10B10A2_UNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "rgba8", PIPE_FORMAT_R8G8B8A8_UNORM, GLSL_TYPE_FLOAT, 130, 310, false, false },
|
||||
{ "rg16", PIPE_FORMAT_R16G16_UNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "rg8", PIPE_FORMAT_R8G8_UNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "r16", PIPE_FORMAT_R16_UNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "r8", PIPE_FORMAT_R8_UNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "rgba16_snorm", PIPE_FORMAT_R16G16B16A16_SNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "rgba8_snorm", PIPE_FORMAT_R8G8B8A8_SNORM, GLSL_TYPE_FLOAT, 130, 310, false, false },
|
||||
{ "rg16_snorm", PIPE_FORMAT_R16G16_SNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "rg8_snorm", PIPE_FORMAT_R8G8_SNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "r16_snorm", PIPE_FORMAT_R16_SNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
{ "r8_snorm", PIPE_FORMAT_R8_SNORM, GLSL_TYPE_FLOAT, 130, 0, true, false },
|
||||
|
||||
/* From GL_EXT_shader_image_load_store: */
|
||||
/* base_type is incorrect but it'll be patched later when we know
|
||||
* the variable type. See ast_to_hir.cpp */
|
||||
{ "size1x8", GL_R8I, GLSL_TYPE_VOID, 130, 0, false, true },
|
||||
{ "size1x16", GL_R16I, GLSL_TYPE_VOID, 130, 0, false, true },
|
||||
{ "size1x32", GL_R32I, GLSL_TYPE_VOID, 130, 0, false, true },
|
||||
{ "size2x32", GL_RG32I, GLSL_TYPE_VOID, 130, 0, false, true },
|
||||
{ "size4x32", GL_RGBA32I, GLSL_TYPE_VOID, 130, 0, false, true },
|
||||
{ "size1x8", PIPE_FORMAT_R8_SINT, GLSL_TYPE_VOID, 130, 0, false, true },
|
||||
{ "size1x16", PIPE_FORMAT_R16_SINT, GLSL_TYPE_VOID, 130, 0, false, true },
|
||||
{ "size1x32", PIPE_FORMAT_R32_SINT, GLSL_TYPE_VOID, 130, 0, false, true },
|
||||
{ "size2x32", PIPE_FORMAT_R32G32_SINT, GLSL_TYPE_VOID, 130, 0, false, true },
|
||||
{ "size4x32", PIPE_FORMAT_R32G32B32A32_SINT, GLSL_TYPE_VOID, 130, 0, false, true },
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(map); i++) {
|
||||
|
@ -1708,6 +1709,25 @@ layout_qualifier_id:
|
|||
}
|
||||
}
|
||||
|
||||
/* Layout qualifier for NV_viewport_array2. */
|
||||
if (!$$.flags.i && state->stage != MESA_SHADER_FRAGMENT) {
|
||||
if (match_layout_qualifier($1, "viewport_relative", state) == 0) {
|
||||
$$.flags.q.viewport_relative = 1;
|
||||
}
|
||||
|
||||
if ($$.flags.i && !state->NV_viewport_array2_enable) {
|
||||
_mesa_glsl_error(& @1, state,
|
||||
"qualifier `%s' requires "
|
||||
"GL_NV_viewport_array2", $1);
|
||||
}
|
||||
|
||||
if ($$.flags.i && state->NV_viewport_array2_warn) {
|
||||
_mesa_glsl_warning(& @1, state,
|
||||
"GL_NV_viewport_array2 layout "
|
||||
"identifier `%s' used", $1);
|
||||
}
|
||||
}
|
||||
|
||||
if (!$$.flags.i) {
|
||||
_mesa_glsl_error(& @1, state, "unrecognized layout identifier "
|
||||
"`%s'", $1);
|
||||
|
|
|
@ -723,6 +723,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
|
|||
EXT(EXT_demote_to_helper_invocation),
|
||||
EXT(EXT_frag_depth),
|
||||
EXT(EXT_draw_buffers),
|
||||
EXT(EXT_draw_instanced),
|
||||
EXT(EXT_clip_cull_distance),
|
||||
EXT(EXT_geometry_point_size),
|
||||
EXT_AEP(EXT_geometry_shader),
|
||||
|
@ -753,6 +754,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
|
|||
EXT(NV_fragment_shader_interlock),
|
||||
EXT(NV_image_formats),
|
||||
EXT(NV_shader_atomic_float),
|
||||
EXT(NV_viewport_array2),
|
||||
};
|
||||
|
||||
#undef EXT
|
||||
|
@ -1931,6 +1933,8 @@ set_shader_inout_layout(struct gl_shader *shader,
|
|||
shader->bindless_image = state->bindless_image_specified;
|
||||
shader->bound_sampler = state->bound_sampler_specified;
|
||||
shader->bound_image = state->bound_image_specified;
|
||||
shader->redeclares_gl_layer = state->redeclares_gl_layer;
|
||||
shader->layer_viewport_relative = state->layer_viewport_relative;
|
||||
}
|
||||
|
||||
/* src can be NULL if only the symbols found in the exec_list should be
|
||||
|
@ -2234,7 +2238,13 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
|
|||
shader->Version = state->language_version;
|
||||
shader->IsES = state->es_shader;
|
||||
|
||||
struct gl_shader_compiler_options *options =
|
||||
&ctx->Const.ShaderCompilerOptions[shader->Stage];
|
||||
|
||||
if (!state->error && !shader->ir->is_empty()) {
|
||||
if (options->LowerPrecision)
|
||||
lower_precision(shader->ir);
|
||||
lower_builtins(shader->ir);
|
||||
assign_subroutine_indexes(state);
|
||||
lower_subroutine(shader->ir, state);
|
||||
opt_shader_and_create_symbol_table(ctx, state->symbols, shader);
|
||||
|
@ -2347,7 +2357,20 @@ do_common_optimization(exec_list *ir, bool linked,
|
|||
OPT(lower_vector_insert, ir, false);
|
||||
OPT(optimize_swizzles, ir);
|
||||
|
||||
OPT(optimize_split_arrays, ir, linked);
|
||||
/* Some drivers only call do_common_optimization() once rather than in a
|
||||
* loop, and split arrays causes each element of a constant array to
|
||||
* dereference is own copy of the entire array initilizer. This IR is not
|
||||
* something that can be generated manually in a shader and is not
|
||||
* accounted for by NIR optimisations, the result is an exponential slow
|
||||
* down in compilation speed as a constant arrays element count grows. To
|
||||
* avoid that here we make sure to always clean up the mess split arrays
|
||||
* causes to constant arrays.
|
||||
*/
|
||||
bool array_split = optimize_split_arrays(ir, linked);
|
||||
if (array_split)
|
||||
do_constant_propagation(ir);
|
||||
progress |= array_split;
|
||||
|
||||
OPT(optimize_redundant_jumps, ir);
|
||||
|
||||
if (options->MaxUnrollIterations) {
|
||||
|
|
|
@ -823,6 +823,8 @@ struct _mesa_glsl_parse_state {
|
|||
bool EXT_demote_to_helper_invocation_warn;
|
||||
bool EXT_draw_buffers_enable;
|
||||
bool EXT_draw_buffers_warn;
|
||||
bool EXT_draw_instanced_enable;
|
||||
bool EXT_draw_instanced_warn;
|
||||
bool EXT_frag_depth_enable;
|
||||
bool EXT_frag_depth_warn;
|
||||
bool EXT_geometry_point_size_enable;
|
||||
|
@ -883,6 +885,8 @@ struct _mesa_glsl_parse_state {
|
|||
bool NV_image_formats_warn;
|
||||
bool NV_shader_atomic_float_enable;
|
||||
bool NV_shader_atomic_float_warn;
|
||||
bool NV_viewport_array2_enable;
|
||||
bool NV_viewport_array2_warn;
|
||||
/*@}*/
|
||||
|
||||
/** Extensions supported by the OpenGL implementation. */
|
||||
|
@ -925,6 +929,10 @@ struct _mesa_glsl_parse_state {
|
|||
/** Atomic counter offsets by binding */
|
||||
unsigned atomic_counter_offsets[MAX_COMBINED_ATOMIC_BUFFERS];
|
||||
|
||||
/** Whether gl_Layer output is viewport-relative. */
|
||||
bool redeclares_gl_layer;
|
||||
bool layer_viewport_relative;
|
||||
|
||||
bool allow_extension_directive_midshader;
|
||||
bool allow_builtin_variable_redeclaration;
|
||||
bool allow_layout_qualifier_on_function_parameter;
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
*/
|
||||
#include <string.h>
|
||||
#include "ir.h"
|
||||
#include "util/half_float.h"
|
||||
#include "compiler/glsl_types.h"
|
||||
#include "glsl_parser_extras.h"
|
||||
|
||||
|
@ -282,6 +283,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
|
|||
case ir_unop_i2f:
|
||||
case ir_unop_u2f:
|
||||
case ir_unop_d2f:
|
||||
case ir_unop_f162f:
|
||||
case ir_unop_bitcast_i2f:
|
||||
case ir_unop_bitcast_u2f:
|
||||
case ir_unop_i642f:
|
||||
|
@ -290,9 +292,17 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
|
|||
op0->type->vector_elements, 1);
|
||||
break;
|
||||
|
||||
case ir_unop_f2f16:
|
||||
case ir_unop_f2fmp:
|
||||
case ir_unop_b2f16:
|
||||
this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT16,
|
||||
op0->type->vector_elements, 1);
|
||||
break;
|
||||
|
||||
case ir_unop_f2b:
|
||||
case ir_unop_i2b:
|
||||
case ir_unop_d2b:
|
||||
case ir_unop_f162b:
|
||||
case ir_unop_i642b:
|
||||
this->type = glsl_type::get_instance(GLSL_TYPE_BOOL,
|
||||
op0->type->vector_elements, 1);
|
||||
|
@ -335,9 +345,6 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
|
|||
this->type = glsl_type::get_instance(GLSL_TYPE_UINT64,
|
||||
op0->type->vector_elements, 1);
|
||||
break;
|
||||
case ir_unop_noise:
|
||||
this->type = glsl_type::float_type;
|
||||
break;
|
||||
|
||||
case ir_unop_unpack_double_2x32:
|
||||
case ir_unop_unpack_uint_2x32:
|
||||
|
@ -682,6 +689,19 @@ ir_constant::ir_constant(const struct glsl_type *type,
|
|||
memcpy(& this->value, data, sizeof(this->value));
|
||||
}
|
||||
|
||||
ir_constant::ir_constant(float16_t f16, unsigned vector_elements)
|
||||
: ir_rvalue(ir_type_constant)
|
||||
{
|
||||
assert(vector_elements <= 4);
|
||||
this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT16, vector_elements, 1);
|
||||
for (unsigned i = 0; i < vector_elements; i++) {
|
||||
this->value.f16[i] = f16.bits;
|
||||
}
|
||||
for (unsigned i = vector_elements; i < 16; i++) {
|
||||
this->value.f[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
ir_constant::ir_constant(float f, unsigned vector_elements)
|
||||
: ir_rvalue(ir_type_constant)
|
||||
{
|
||||
|
@ -783,6 +803,7 @@ ir_constant::ir_constant(const ir_constant *c, unsigned i)
|
|||
case GLSL_TYPE_UINT: this->value.u[0] = c->value.u[i]; break;
|
||||
case GLSL_TYPE_INT: this->value.i[0] = c->value.i[i]; break;
|
||||
case GLSL_TYPE_FLOAT: this->value.f[0] = c->value.f[i]; break;
|
||||
case GLSL_TYPE_FLOAT16: this->value.f16[0] = c->value.f16[i]; break;
|
||||
case GLSL_TYPE_BOOL: this->value.b[0] = c->value.b[i]; break;
|
||||
case GLSL_TYPE_DOUBLE: this->value.d[0] = c->value.d[i]; break;
|
||||
default: assert(!"Should not get here."); break;
|
||||
|
@ -828,14 +849,23 @@ ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list)
|
|||
if (value->type->is_scalar() && value->next->is_tail_sentinel()) {
|
||||
if (type->is_matrix()) {
|
||||
/* Matrix - fill diagonal (rest is already set to 0) */
|
||||
assert(type->is_float() || type->is_double());
|
||||
for (unsigned i = 0; i < type->matrix_columns; i++) {
|
||||
if (type->is_float())
|
||||
switch (type->base_type) {
|
||||
case GLSL_TYPE_FLOAT:
|
||||
this->value.f[i * type->vector_elements + i] =
|
||||
value->value.f[0];
|
||||
else
|
||||
break;
|
||||
case GLSL_TYPE_DOUBLE:
|
||||
this->value.d[i * type->vector_elements + i] =
|
||||
value->value.d[0];
|
||||
break;
|
||||
case GLSL_TYPE_FLOAT16:
|
||||
this->value.f16[i * type->vector_elements + i] =
|
||||
value->value.f16[0];
|
||||
break;
|
||||
default:
|
||||
assert(!"unexpected matrix base type");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Vector or scalar - fill all components */
|
||||
|
@ -849,6 +879,10 @@ ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list)
|
|||
for (unsigned i = 0; i < type->components(); i++)
|
||||
this->value.f[i] = value->value.f[0];
|
||||
break;
|
||||
case GLSL_TYPE_FLOAT16:
|
||||
for (unsigned i = 0; i < type->components(); i++)
|
||||
this->value.f16[i] = value->value.f16[0];
|
||||
break;
|
||||
case GLSL_TYPE_DOUBLE:
|
||||
for (unsigned i = 0; i < type->components(); i++)
|
||||
this->value.d[i] = value->value.d[0];
|
||||
|
@ -918,6 +952,9 @@ ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list)
|
|||
case GLSL_TYPE_FLOAT:
|
||||
this->value.f[i] = value->get_float_component(j);
|
||||
break;
|
||||
case GLSL_TYPE_FLOAT16:
|
||||
this->value.f16[i] = value->get_float16_component(j);
|
||||
break;
|
||||
case GLSL_TYPE_BOOL:
|
||||
this->value.b[i] = value->get_bool_component(j);
|
||||
break;
|
||||
|
@ -983,6 +1020,7 @@ ir_constant::get_bool_component(unsigned i) const
|
|||
case GLSL_TYPE_UINT: return this->value.u[i] != 0;
|
||||
case GLSL_TYPE_INT: return this->value.i[i] != 0;
|
||||
case GLSL_TYPE_FLOAT: return ((int)this->value.f[i]) != 0;
|
||||
case GLSL_TYPE_FLOAT16: return ((int)_mesa_half_to_float(this->value.f16[i])) != 0;
|
||||
case GLSL_TYPE_BOOL: return this->value.b[i];
|
||||
case GLSL_TYPE_DOUBLE: return this->value.d[i] != 0.0;
|
||||
case GLSL_TYPE_SAMPLER:
|
||||
|
@ -1005,6 +1043,7 @@ ir_constant::get_float_component(unsigned i) const
|
|||
case GLSL_TYPE_UINT: return (float) this->value.u[i];
|
||||
case GLSL_TYPE_INT: return (float) this->value.i[i];
|
||||
case GLSL_TYPE_FLOAT: return this->value.f[i];
|
||||
case GLSL_TYPE_FLOAT16: return _mesa_half_to_float(this->value.f16[i]);
|
||||
case GLSL_TYPE_BOOL: return this->value.b[i] ? 1.0f : 0.0f;
|
||||
case GLSL_TYPE_DOUBLE: return (float) this->value.d[i];
|
||||
case GLSL_TYPE_SAMPLER:
|
||||
|
@ -1020,6 +1059,15 @@ ir_constant::get_float_component(unsigned i) const
|
|||
return 0.0;
|
||||
}
|
||||
|
||||
uint16_t
|
||||
ir_constant::get_float16_component(unsigned i) const
|
||||
{
|
||||
if (this->type->base_type == GLSL_TYPE_FLOAT16)
|
||||
return this->value.f16[i];
|
||||
else
|
||||
return _mesa_float_to_half(get_float_component(i));
|
||||
}
|
||||
|
||||
double
|
||||
ir_constant::get_double_component(unsigned i) const
|
||||
{
|
||||
|
@ -1027,6 +1075,7 @@ ir_constant::get_double_component(unsigned i) const
|
|||
case GLSL_TYPE_UINT: return (double) this->value.u[i];
|
||||
case GLSL_TYPE_INT: return (double) this->value.i[i];
|
||||
case GLSL_TYPE_FLOAT: return (double) this->value.f[i];
|
||||
case GLSL_TYPE_FLOAT16: return (double) _mesa_half_to_float(this->value.f16[i]);
|
||||
case GLSL_TYPE_BOOL: return this->value.b[i] ? 1.0 : 0.0;
|
||||
case GLSL_TYPE_DOUBLE: return this->value.d[i];
|
||||
case GLSL_TYPE_SAMPLER:
|
||||
|
@ -1049,6 +1098,7 @@ ir_constant::get_int_component(unsigned i) const
|
|||
case GLSL_TYPE_UINT: return this->value.u[i];
|
||||
case GLSL_TYPE_INT: return this->value.i[i];
|
||||
case GLSL_TYPE_FLOAT: return (int) this->value.f[i];
|
||||
case GLSL_TYPE_FLOAT16: return (int) _mesa_half_to_float(this->value.f16[i]);
|
||||
case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0;
|
||||
case GLSL_TYPE_DOUBLE: return (int) this->value.d[i];
|
||||
case GLSL_TYPE_SAMPLER:
|
||||
|
@ -1071,6 +1121,7 @@ ir_constant::get_uint_component(unsigned i) const
|
|||
case GLSL_TYPE_UINT: return this->value.u[i];
|
||||
case GLSL_TYPE_INT: return this->value.i[i];
|
||||
case GLSL_TYPE_FLOAT: return (unsigned) this->value.f[i];
|
||||
case GLSL_TYPE_FLOAT16: return (unsigned) _mesa_half_to_float(this->value.f16[i]);
|
||||
case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0;
|
||||
case GLSL_TYPE_DOUBLE: return (unsigned) this->value.d[i];
|
||||
case GLSL_TYPE_SAMPLER:
|
||||
|
@ -1093,6 +1144,7 @@ ir_constant::get_int64_component(unsigned i) const
|
|||
case GLSL_TYPE_UINT: return this->value.u[i];
|
||||
case GLSL_TYPE_INT: return this->value.i[i];
|
||||
case GLSL_TYPE_FLOAT: return (int64_t) this->value.f[i];
|
||||
case GLSL_TYPE_FLOAT16: return (int64_t) _mesa_half_to_float(this->value.f16[i]);
|
||||
case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0;
|
||||
case GLSL_TYPE_DOUBLE: return (int64_t) this->value.d[i];
|
||||
case GLSL_TYPE_SAMPLER:
|
||||
|
@ -1115,6 +1167,7 @@ ir_constant::get_uint64_component(unsigned i) const
|
|||
case GLSL_TYPE_UINT: return this->value.u[i];
|
||||
case GLSL_TYPE_INT: return this->value.i[i];
|
||||
case GLSL_TYPE_FLOAT: return (uint64_t) this->value.f[i];
|
||||
case GLSL_TYPE_FLOAT16: return (uint64_t) _mesa_half_to_float(this->value.f16[i]);
|
||||
case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0;
|
||||
case GLSL_TYPE_DOUBLE: return (uint64_t) this->value.d[i];
|
||||
case GLSL_TYPE_SAMPLER:
|
||||
|
@ -1169,6 +1222,7 @@ ir_constant::copy_offset(ir_constant *src, int offset)
|
|||
case GLSL_TYPE_UINT:
|
||||
case GLSL_TYPE_INT:
|
||||
case GLSL_TYPE_FLOAT:
|
||||
case GLSL_TYPE_FLOAT16:
|
||||
case GLSL_TYPE_DOUBLE:
|
||||
case GLSL_TYPE_SAMPLER:
|
||||
case GLSL_TYPE_IMAGE:
|
||||
|
@ -1188,6 +1242,9 @@ ir_constant::copy_offset(ir_constant *src, int offset)
|
|||
case GLSL_TYPE_FLOAT:
|
||||
value.f[i+offset] = src->get_float_component(i);
|
||||
break;
|
||||
case GLSL_TYPE_FLOAT16:
|
||||
value.f16[i+offset] = src->get_float16_component(i);
|
||||
break;
|
||||
case GLSL_TYPE_BOOL:
|
||||
value.b[i+offset] = src->get_bool_component(i);
|
||||
break;
|
||||
|
@ -1247,6 +1304,9 @@ ir_constant::copy_masked_offset(ir_constant *src, int offset, unsigned int mask)
|
|||
case GLSL_TYPE_FLOAT:
|
||||
value.f[i+offset] = src->get_float_component(id++);
|
||||
break;
|
||||
case GLSL_TYPE_FLOAT16:
|
||||
value.f16[i+offset] = src->get_float16_component(id++);
|
||||
break;
|
||||
case GLSL_TYPE_BOOL:
|
||||
value.b[i+offset] = src->get_bool_component(id++);
|
||||
break;
|
||||
|
@ -1297,6 +1357,12 @@ ir_constant::has_value(const ir_constant *c) const
|
|||
if (this->value.f[i] != c->value.f[i])
|
||||
return false;
|
||||
break;
|
||||
case GLSL_TYPE_FLOAT16:
|
||||
/* Convert to float to make sure NaN and ±0.0 compares correctly */
|
||||
if (_mesa_half_to_float(this->value.f16[i]) !=
|
||||
_mesa_half_to_float(c->value.f16[i]))
|
||||
return false;
|
||||
break;
|
||||
case GLSL_TYPE_BOOL:
|
||||
if (this->value.b[i] != c->value.b[i])
|
||||
return false;
|
||||
|
@ -1340,6 +1406,10 @@ ir_constant::is_value(float f, int i) const
|
|||
if (this->value.f[c] != f)
|
||||
return false;
|
||||
break;
|
||||
case GLSL_TYPE_FLOAT16:
|
||||
if (_mesa_half_to_float(this->value.f16[c]) != f)
|
||||
return false;
|
||||
break;
|
||||
case GLSL_TYPE_INT:
|
||||
if (this->value.i[c] != i)
|
||||
return false;
|
||||
|
@ -1820,7 +1890,7 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name,
|
|||
this->data.fb_fetch_output = false;
|
||||
this->data.bindless = false;
|
||||
this->data.bound = false;
|
||||
this->data.image_format = GL_NONE;
|
||||
this->data.image_format = PIPE_FORMAT_NONE;
|
||||
this->data._num_state_slots = 0;
|
||||
this->data.param_index = 0;
|
||||
this->data.stream = 0;
|
||||
|
|
|
@ -29,6 +29,8 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
#include "util/ralloc.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "util/half_float.h"
|
||||
#include "compiler/glsl_types.h"
|
||||
#include "list.h"
|
||||
#include "ir_visitor.h"
|
||||
|
@ -767,6 +769,13 @@ public:
|
|||
*/
|
||||
unsigned is_unmatched_generic_inout:1;
|
||||
|
||||
/**
|
||||
* Is this varying used by transform feedback?
|
||||
*
|
||||
* This is used by the linker to decide if it's safe to pack the varying.
|
||||
*/
|
||||
unsigned is_xfb:1;
|
||||
|
||||
/**
|
||||
* Is this varying used only by transform feedback?
|
||||
*
|
||||
|
@ -887,8 +896,11 @@ public:
|
|||
uint8_t warn_extension_index;
|
||||
|
||||
public:
|
||||
/** Image internal format if specified explicitly, otherwise GL_NONE. */
|
||||
uint16_t image_format;
|
||||
/**
|
||||
* Image internal format if specified explicitly, otherwise
|
||||
* PIPE_FORMAT_NONE.
|
||||
*/
|
||||
enum pipe_format image_format;
|
||||
|
||||
private:
|
||||
/**
|
||||
|
@ -2039,6 +2051,12 @@ public:
|
|||
*/
|
||||
virtual ir_variable *variable_referenced() const = 0;
|
||||
|
||||
/**
|
||||
* Get the precision. This can either come from the eventual variable that
|
||||
* is dereferenced, or from a record member.
|
||||
*/
|
||||
virtual int precision() const = 0;
|
||||
|
||||
protected:
|
||||
ir_dereference(enum ir_node_type t)
|
||||
: ir_rvalue(t)
|
||||
|
@ -2068,6 +2086,11 @@ public:
|
|||
return this->var;
|
||||
}
|
||||
|
||||
virtual int precision() const
|
||||
{
|
||||
return this->var->data.precision;
|
||||
}
|
||||
|
||||
virtual ir_variable *whole_variable_referenced()
|
||||
{
|
||||
/* ir_dereference_variable objects always dereference the entire
|
||||
|
@ -2116,6 +2139,16 @@ public:
|
|||
return this->array->variable_referenced();
|
||||
}
|
||||
|
||||
virtual int precision() const
|
||||
{
|
||||
ir_dereference *deref = this->array->as_dereference();
|
||||
|
||||
if (deref == NULL)
|
||||
return GLSL_PRECISION_NONE;
|
||||
else
|
||||
return deref->precision();
|
||||
}
|
||||
|
||||
virtual void accept(ir_visitor *v)
|
||||
{
|
||||
v->visit(this);
|
||||
|
@ -2151,6 +2184,13 @@ public:
|
|||
return this->record->variable_referenced();
|
||||
}
|
||||
|
||||
virtual int precision() const
|
||||
{
|
||||
glsl_struct_field *field = record->type->fields.structure + field_idx;
|
||||
|
||||
return field->precision;
|
||||
}
|
||||
|
||||
virtual void accept(ir_visitor *v)
|
||||
{
|
||||
v->visit(this);
|
||||
|
@ -2172,6 +2212,7 @@ union ir_constant_data {
|
|||
float f[16];
|
||||
bool b[16];
|
||||
double d[16];
|
||||
uint16_t f16[16];
|
||||
uint64_t u64[16];
|
||||
int64_t i64[16];
|
||||
};
|
||||
|
@ -2183,6 +2224,7 @@ public:
|
|||
ir_constant(bool b, unsigned vector_elements=1);
|
||||
ir_constant(unsigned int u, unsigned vector_elements=1);
|
||||
ir_constant(int i, unsigned vector_elements=1);
|
||||
ir_constant(float16_t f16, unsigned vector_elements=1);
|
||||
ir_constant(float f, unsigned vector_elements=1);
|
||||
ir_constant(double d, unsigned vector_elements=1);
|
||||
ir_constant(uint64_t u64, unsigned vector_elements=1);
|
||||
|
@ -2235,6 +2277,7 @@ public:
|
|||
/*@{*/
|
||||
bool get_bool_component(unsigned i) const;
|
||||
float get_float_component(unsigned i) const;
|
||||
uint16_t get_float16_component(unsigned i) const;
|
||||
double get_double_component(unsigned i) const;
|
||||
int get_int_component(unsigned i) const;
|
||||
unsigned get_uint_component(unsigned i) const;
|
||||
|
|
|
@ -75,54 +75,6 @@ ir_array_refcount_entry::~ir_array_refcount_entry()
|
|||
delete [] bits;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ir_array_refcount_entry::mark_array_elements_referenced(const array_deref_range *dr,
|
||||
unsigned count)
|
||||
{
|
||||
if (count != array_depth)
|
||||
return;
|
||||
|
||||
mark_array_elements_referenced(dr, count, 1, 0);
|
||||
}
|
||||
|
||||
void
|
||||
ir_array_refcount_entry::mark_array_elements_referenced(const array_deref_range *dr,
|
||||
unsigned count,
|
||||
unsigned scale,
|
||||
unsigned linearized_index)
|
||||
{
|
||||
/* Walk through the list of array dereferences in least- to
|
||||
* most-significant order. Along the way, accumulate the current
|
||||
* linearized offset and the scale factor for each array-of-.
|
||||
*/
|
||||
for (unsigned i = 0; i < count; i++) {
|
||||
if (dr[i].index < dr[i].size) {
|
||||
linearized_index += dr[i].index * scale;
|
||||
scale *= dr[i].size;
|
||||
} else {
|
||||
/* For each element in the current array, update the count and
|
||||
* offset, then recurse to process the remaining arrays.
|
||||
*
|
||||
* There is some inefficency here if the last element in the
|
||||
* array_deref_range list specifies the entire array. In that case,
|
||||
* the loop will make recursive calls with count == 0. In the call,
|
||||
* all that will happen is the bit will be set.
|
||||
*/
|
||||
for (unsigned j = 0; j < dr[i].size; j++) {
|
||||
mark_array_elements_referenced(&dr[i + 1],
|
||||
count - (i + 1),
|
||||
scale * dr[i].size,
|
||||
linearized_index + (j * scale));
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
BITSET_SET(bits, linearized_index);
|
||||
}
|
||||
|
||||
ir_array_refcount_entry *
|
||||
ir_array_refcount_visitor::get_variable_entry(ir_variable *var)
|
||||
{
|
||||
|
@ -224,7 +176,9 @@ ir_array_refcount_visitor::visit_enter(ir_dereference_array *ir)
|
|||
if (entry == NULL)
|
||||
return visit_stop;
|
||||
|
||||
entry->mark_array_elements_referenced(derefs, num_derefs);
|
||||
link_util_mark_array_elements_referenced(derefs, num_derefs,
|
||||
entry->array_depth,
|
||||
entry->bits);
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
|
|
@ -32,26 +32,10 @@
|
|||
|
||||
#include "ir.h"
|
||||
#include "ir_visitor.h"
|
||||
#include "linker_util.h"
|
||||
#include "compiler/glsl_types.h"
|
||||
#include "util/bitset.h"
|
||||
|
||||
/**
|
||||
* Describes an access of an array element or an access of the whole array
|
||||
*/
|
||||
struct array_deref_range {
|
||||
/**
|
||||
* Index that was accessed.
|
||||
*
|
||||
* All valid array indices are less than the size of the array. If index
|
||||
* is equal to the size of the array, this means the entire array has been
|
||||
* accessed (e.g., due to use of a non-constant index).
|
||||
*/
|
||||
unsigned index;
|
||||
|
||||
/** Size of the array. Used for offset calculations. */
|
||||
unsigned size;
|
||||
};
|
||||
|
||||
class ir_array_refcount_entry
|
||||
{
|
||||
public:
|
||||
|
@ -63,33 +47,11 @@ public:
|
|||
/** Has the variable been referenced? */
|
||||
bool is_referenced;
|
||||
|
||||
/**
|
||||
* Mark a set of array elements as accessed.
|
||||
*
|
||||
* If every \c array_deref_range is for a single index, only a single
|
||||
* element will be marked. If any \c array_deref_range is for an entire
|
||||
* array-of-, then multiple elements will be marked.
|
||||
*
|
||||
* Items in the \c array_deref_range list appear in least- to
|
||||
* most-significant order. This is the \b opposite order the indices
|
||||
* appear in the GLSL shader text. An array access like
|
||||
*
|
||||
* x = y[1][i][3];
|
||||
*
|
||||
* would appear as
|
||||
*
|
||||
* { { 3, n }, { m, m }, { 1, p } }
|
||||
*
|
||||
* where n, m, and p are the sizes of the arrays-of-arrays.
|
||||
*
|
||||
* The set of marked array elements can later be queried by
|
||||
* \c ::is_linearized_index_referenced.
|
||||
*
|
||||
* \param dr List of array_deref_range elements to be processed.
|
||||
* \param count Number of array_deref_range elements to be processed.
|
||||
*/
|
||||
void mark_array_elements_referenced(const array_deref_range *dr,
|
||||
unsigned count);
|
||||
/** Count of nested arrays in the type. */
|
||||
unsigned array_depth;
|
||||
|
||||
/** Set of bit-flags to note which array elements have been accessed. */
|
||||
BITSET_WORD *bits;
|
||||
|
||||
/** Has a linearized array index been referenced? */
|
||||
bool is_linearized_index_referenced(unsigned linearized_index) const
|
||||
|
@ -101,8 +63,6 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
/** Set of bit-flags to note which array elements have been accessed. */
|
||||
BITSET_WORD *bits;
|
||||
|
||||
/**
|
||||
* Total number of bits referenced by \c bits.
|
||||
|
@ -111,27 +71,6 @@ private:
|
|||
*/
|
||||
unsigned num_bits;
|
||||
|
||||
/** Count of nested arrays in the type. */
|
||||
unsigned array_depth;
|
||||
|
||||
/**
|
||||
* Recursive part of the public mark_array_elements_referenced method.
|
||||
*
|
||||
* The recursion occurs when an entire array-of- is accessed. See the
|
||||
* implementation for more details.
|
||||
*
|
||||
* \param dr List of array_deref_range elements to be
|
||||
* processed.
|
||||
* \param count Number of array_deref_range elements to be
|
||||
* processed.
|
||||
* \param scale Current offset scale.
|
||||
* \param linearized_index Current accumulated linearized array index.
|
||||
*/
|
||||
void mark_array_elements_referenced(const array_deref_range *dr,
|
||||
unsigned count,
|
||||
unsigned scale,
|
||||
unsigned linearized_index);
|
||||
|
||||
friend class array_refcount_test;
|
||||
};
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "main/compiler.h"
|
||||
#include "util/compiler.h"
|
||||
#include "ir.h"
|
||||
#include "compiler/glsl_types.h"
|
||||
#include "util/hash_table.h"
|
||||
|
|
|
@ -452,6 +452,21 @@ isub64_saturate(int64_t a, int64_t b)
|
|||
return a - b;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
pack_2x32(uint32_t a, uint32_t b)
|
||||
{
|
||||
uint64_t v = a;
|
||||
v |= (uint64_t)b << 32;
|
||||
return v;
|
||||
}
|
||||
|
||||
static void
|
||||
unpack_2x32(uint64_t p, uint32_t *a, uint32_t *b)
|
||||
{
|
||||
*a = p & 0xffffffff;
|
||||
*b = (p >> 32);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the constant that is ultimately referenced by an r-value, in a constant
|
||||
* expression evaluation context.
|
||||
|
@ -692,6 +707,23 @@ ir_expression::constant_expression_value(void *mem_ctx,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
for (unsigned operand = 0; operand < this->num_operands; operand++) {
|
||||
if (op[operand]->type->base_type == GLSL_TYPE_FLOAT16) {
|
||||
const struct glsl_type *float_type =
|
||||
glsl_type::get_instance(GLSL_TYPE_FLOAT,
|
||||
op[operand]->type->vector_elements,
|
||||
op[operand]->type->matrix_columns,
|
||||
op[operand]->type->explicit_stride,
|
||||
op[operand]->type->interface_row_major);
|
||||
|
||||
ir_constant_data f;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(f.f); i++)
|
||||
f.f[i] = _mesa_half_to_float(op[operand]->value.f16[i]);
|
||||
|
||||
op[operand] = new(mem_ctx) ir_constant(float_type, &f);
|
||||
}
|
||||
}
|
||||
|
||||
if (op[1] != NULL)
|
||||
switch (this->operation) {
|
||||
case ir_binop_lshift:
|
||||
|
@ -740,6 +772,15 @@ ir_expression::constant_expression_value(void *mem_ctx,
|
|||
|
||||
#include "ir_expression_operation_constant.h"
|
||||
|
||||
if (this->type->base_type == GLSL_TYPE_FLOAT16) {
|
||||
ir_constant_data f;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(f.f16); i++)
|
||||
f.f16[i] = _mesa_float_to_half(data.f[i]);
|
||||
|
||||
return new(mem_ctx) ir_constant(this->type, &f);
|
||||
}
|
||||
|
||||
|
||||
return new(mem_ctx) ir_constant(this->type, &data);
|
||||
}
|
||||
|
||||
|
@ -773,6 +814,7 @@ ir_swizzle::constant_expression_value(void *mem_ctx,
|
|||
case GLSL_TYPE_UINT:
|
||||
case GLSL_TYPE_INT: data.u[i] = v->value.u[swiz_idx[i]]; break;
|
||||
case GLSL_TYPE_FLOAT: data.f[i] = v->value.f[swiz_idx[i]]; break;
|
||||
case GLSL_TYPE_FLOAT16: data.f16[i] = v->value.f16[swiz_idx[i]]; break;
|
||||
case GLSL_TYPE_BOOL: data.b[i] = v->value.b[swiz_idx[i]]; break;
|
||||
case GLSL_TYPE_DOUBLE:data.d[i] = v->value.d[swiz_idx[i]]; break;
|
||||
case GLSL_TYPE_UINT64:data.u64[i] = v->value.u64[swiz_idx[i]]; break;
|
||||
|
@ -1056,10 +1098,16 @@ ir_function_signature::constant_expression_value(void *mem_ctx,
|
|||
|
||||
/*
|
||||
* Of the builtin functions, only the texture lookups and the noise
|
||||
* ones must not be used in constant expressions. They all include
|
||||
* specific opcodes so they don't need to be special-cased at this
|
||||
* point.
|
||||
* ones must not be used in constant expressions. Texture instructions
|
||||
* include special ir_texture opcodes which can't be constant-folded (see
|
||||
* ir_texture::constant_expression_value). Noise functions, however, we
|
||||
* have to special case here.
|
||||
*/
|
||||
if (strcmp(this->function_name(), "noise1") == 0 ||
|
||||
strcmp(this->function_name(), "noise2") == 0 ||
|
||||
strcmp(this->function_name(), "noise3") == 0 ||
|
||||
strcmp(this->function_name(), "noise4") == 0)
|
||||
return NULL;
|
||||
|
||||
/* Initialize the table of dereferencable names with the function
|
||||
* parameters. Verify their const-ness on the way.
|
||||
|
|
|
@ -39,6 +39,7 @@ enum ir_expression_operation {
|
|||
ir_unop_i2f,
|
||||
ir_unop_f2b,
|
||||
ir_unop_b2f,
|
||||
ir_unop_b2f16,
|
||||
ir_unop_i2b,
|
||||
ir_unop_b2i,
|
||||
ir_unop_u2f,
|
||||
|
@ -46,11 +47,15 @@ enum ir_expression_operation {
|
|||
ir_unop_u2i,
|
||||
ir_unop_d2f,
|
||||
ir_unop_f2d,
|
||||
ir_unop_f2f16,
|
||||
ir_unop_f2fmp,
|
||||
ir_unop_f162f,
|
||||
ir_unop_d2i,
|
||||
ir_unop_i2d,
|
||||
ir_unop_d2u,
|
||||
ir_unop_u2d,
|
||||
ir_unop_d2b,
|
||||
ir_unop_f162b,
|
||||
ir_unop_bitcast_i2f,
|
||||
ir_unop_bitcast_f2i,
|
||||
ir_unop_bitcast_u2f,
|
||||
|
@ -117,7 +122,6 @@ enum ir_expression_operation {
|
|||
ir_unop_unpack_image_2x32,
|
||||
ir_unop_frexp_sig,
|
||||
ir_unop_frexp_exp,
|
||||
ir_unop_noise,
|
||||
ir_unop_subroutine_to_int,
|
||||
ir_unop_interpolate_at_centroid,
|
||||
ir_unop_get_buffer_size,
|
||||
|
|
|
@ -438,6 +438,8 @@ ir_expression_operation = [
|
|||
operation("f2b", 1, source_types=(float_type,), dest_type=bool_type, c_expression="{src0} != 0.0F ? true : false"),
|
||||
# Boolean-to-float conversion
|
||||
operation("b2f", 1, source_types=(bool_type,), dest_type=float_type, c_expression="{src0} ? 1.0F : 0.0F"),
|
||||
# Boolean-to-float16 conversion
|
||||
operation("b2f16", 1, source_types=(bool_type,), dest_type=float_type, c_expression="{src0} ? 1.0F : 0.0F"),
|
||||
# int-to-boolean conversion
|
||||
operation("i2b", 1, source_types=(uint_type, int_type), dest_type=bool_type, c_expression="{src0} ? true : false"),
|
||||
# Boolean-to-int conversion
|
||||
|
@ -452,6 +454,13 @@ ir_expression_operation = [
|
|||
operation("d2f", 1, source_types=(double_type,), dest_type=float_type, c_expression="{src0}"),
|
||||
# Float-to-double conversion.
|
||||
operation("f2d", 1, source_types=(float_type,), dest_type=double_type, c_expression="{src0}"),
|
||||
# Half-float conversions. These all operate on and return float types,
|
||||
# since the framework expands half to full float before calling in. We
|
||||
# still have to handle them here so that we can constant propagate through
|
||||
# them, but they are no-ops.
|
||||
operation("f2f16", 1, source_types=(float_type,), dest_type=float_type, c_expression="{src0}"),
|
||||
operation("f2fmp", 1, source_types=(float_type,), dest_type=float_type, c_expression="{src0}"),
|
||||
operation("f162f", 1, source_types=(float_type,), dest_type=float_type, c_expression="{src0}"),
|
||||
# Double-to-integer conversion.
|
||||
operation("d2i", 1, source_types=(double_type,), dest_type=int_type, c_expression="{src0}"),
|
||||
# Integer-to-double conversion.
|
||||
|
@ -462,6 +471,8 @@ ir_expression_operation = [
|
|||
operation("u2d", 1, source_types=(uint_type,), dest_type=double_type, c_expression="{src0}"),
|
||||
# Double-to-boolean conversion.
|
||||
operation("d2b", 1, source_types=(double_type,), dest_type=bool_type, c_expression="{src0} != 0.0"),
|
||||
# Float16-to-boolean conversion.
|
||||
operation("f162b", 1, source_types=(float_type,), dest_type=bool_type, c_expression="{src0} != 0.0"),
|
||||
# 'Bit-identical int-to-float "conversion"
|
||||
operation("bitcast_i2f", 1, source_types=(int_type,), dest_type=float_type, c_expression="bitcast_u2f({src0})"),
|
||||
# 'Bit-identical float-to-int "conversion"
|
||||
|
@ -544,20 +555,18 @@ ir_expression_operation = [
|
|||
operation("saturate", 1, printable_name="sat", source_types=(float_type,), c_expression="CLAMP({src0}, 0.0f, 1.0f)"),
|
||||
|
||||
# Double packing, part of ARB_gpu_shader_fp64.
|
||||
operation("pack_double_2x32", 1, printable_name="packDouble2x32", source_types=(uint_type,), dest_type=double_type, c_expression="memcpy(&data.d[0], &op[0]->value.u[0], sizeof(double))", flags=frozenset((horizontal_operation, non_assign_operation))),
|
||||
operation("unpack_double_2x32", 1, printable_name="unpackDouble2x32", source_types=(double_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.d[0], sizeof(double))", flags=frozenset((horizontal_operation, non_assign_operation))),
|
||||
operation("pack_double_2x32", 1, printable_name="packDouble2x32", source_types=(uint_type,), dest_type=double_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
|
||||
operation("unpack_double_2x32", 1, printable_name="unpackDouble2x32", source_types=(double_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
|
||||
|
||||
# Sampler/Image packing, part of ARB_bindless_texture.
|
||||
operation("pack_sampler_2x32", 1, printable_name="packSampler2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
|
||||
operation("pack_image_2x32", 1, printable_name="packImage2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
|
||||
operation("unpack_sampler_2x32", 1, printable_name="unpackSampler2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
|
||||
operation("unpack_image_2x32", 1, printable_name="unpackImage2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
|
||||
operation("pack_sampler_2x32", 1, printable_name="packSampler2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
|
||||
operation("pack_image_2x32", 1, printable_name="packImage2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
|
||||
operation("unpack_sampler_2x32", 1, printable_name="unpackSampler2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
|
||||
operation("unpack_image_2x32", 1, printable_name="unpackImage2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
|
||||
|
||||
operation("frexp_sig", 1),
|
||||
operation("frexp_exp", 1),
|
||||
|
||||
operation("noise", 1),
|
||||
|
||||
operation("subroutine_to_int", 1),
|
||||
|
||||
# Interpolate fs input at centroid
|
||||
|
@ -578,10 +587,10 @@ ir_expression_operation = [
|
|||
operation("ssbo_unsized_array_length", 1),
|
||||
|
||||
# 64-bit integer packing ops.
|
||||
operation("pack_int_2x32", 1, printable_name="packInt2x32", source_types=(int_type,), dest_type=int64_type, c_expression="memcpy(&data.i64[0], &op[0]->value.i[0], sizeof(int64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
|
||||
operation("pack_uint_2x32", 1, printable_name="packUint2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
|
||||
operation("unpack_int_2x32", 1, printable_name="unpackInt2x32", source_types=(int64_type,), dest_type=int_type, c_expression="memcpy(&data.i[0], &op[0]->value.i64[0], sizeof(int64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
|
||||
operation("unpack_uint_2x32", 1, printable_name="unpackUint2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
|
||||
operation("pack_int_2x32", 1, printable_name="packInt2x32", source_types=(int_type,), dest_type=int64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
|
||||
operation("pack_uint_2x32", 1, printable_name="packUint2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
|
||||
operation("unpack_int_2x32", 1, printable_name="unpackInt2x32", source_types=(int64_type,), dest_type=int_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
|
||||
operation("unpack_uint_2x32", 1, printable_name="unpackUint2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
|
||||
|
||||
operation("add", 2, printable_name="+", source_types=numeric_types, c_expression="{src0} + {src1}", flags=vector_scalar_operation),
|
||||
operation("sub", 2, printable_name="-", source_types=numeric_types, c_expression="{src0} - {src1}", flags=vector_scalar_operation),
|
||||
|
|
|
@ -254,6 +254,18 @@
|
|||
}
|
||||
break;
|
||||
|
||||
case ir_unop_b2f16:
|
||||
for (unsigned c = 0; c < op[0]->type->components(); c++) {
|
||||
switch (op[0]->type->base_type) {
|
||||
case GLSL_TYPE_BOOL:
|
||||
data.f[c] = op[0]->value.b[c] ? 1.0F : 0.0F;
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid type");
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case ir_unop_i2b:
|
||||
for (unsigned c = 0; c < op[0]->type->components(); c++) {
|
||||
switch (op[0]->type->base_type) {
|
||||
|
@ -341,6 +353,42 @@
|
|||
}
|
||||
break;
|
||||
|
||||
case ir_unop_f2f16:
|
||||
for (unsigned c = 0; c < op[0]->type->components(); c++) {
|
||||
switch (op[0]->type->base_type) {
|
||||
case GLSL_TYPE_FLOAT:
|
||||
data.f[c] = op[0]->value.f[c];
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid type");
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case ir_unop_f2fmp:
|
||||
for (unsigned c = 0; c < op[0]->type->components(); c++) {
|
||||
switch (op[0]->type->base_type) {
|
||||
case GLSL_TYPE_FLOAT:
|
||||
data.f[c] = op[0]->value.f[c];
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid type");
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case ir_unop_f162f:
|
||||
for (unsigned c = 0; c < op[0]->type->components(); c++) {
|
||||
switch (op[0]->type->base_type) {
|
||||
case GLSL_TYPE_FLOAT:
|
||||
data.f[c] = op[0]->value.f[c];
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid type");
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case ir_unop_d2i:
|
||||
for (unsigned c = 0; c < op[0]->type->components(); c++) {
|
||||
switch (op[0]->type->base_type) {
|
||||
|
@ -401,6 +449,18 @@
|
|||
}
|
||||
break;
|
||||
|
||||
case ir_unop_f162b:
|
||||
for (unsigned c = 0; c < op[0]->type->components(); c++) {
|
||||
switch (op[0]->type->base_type) {
|
||||
case GLSL_TYPE_FLOAT:
|
||||
data.b[c] = op[0]->value.f[c] != 0.0;
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid type");
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case ir_unop_bitcast_i2f:
|
||||
for (unsigned c = 0; c < op[0]->type->components(); c++) {
|
||||
switch (op[0]->type->base_type) {
|
||||
|
@ -1075,43 +1135,43 @@
|
|||
break;
|
||||
|
||||
case ir_unop_pack_double_2x32:
|
||||
memcpy(&data.d[0], &op[0]->value.u[0], sizeof(double));
|
||||
data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1]);
|
||||
break;
|
||||
|
||||
case ir_unop_unpack_double_2x32:
|
||||
memcpy(&data.u[0], &op[0]->value.d[0], sizeof(double));
|
||||
unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1]);
|
||||
break;
|
||||
|
||||
case ir_unop_pack_sampler_2x32:
|
||||
memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t));
|
||||
data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1]);
|
||||
break;
|
||||
|
||||
case ir_unop_pack_image_2x32:
|
||||
memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t));
|
||||
data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1]);
|
||||
break;
|
||||
|
||||
case ir_unop_unpack_sampler_2x32:
|
||||
memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t));
|
||||
unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1]);
|
||||
break;
|
||||
|
||||
case ir_unop_unpack_image_2x32:
|
||||
memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t));
|
||||
unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1]);
|
||||
break;
|
||||
|
||||
case ir_unop_pack_int_2x32:
|
||||
memcpy(&data.i64[0], &op[0]->value.i[0], sizeof(int64_t));
|
||||
data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1]);
|
||||
break;
|
||||
|
||||
case ir_unop_pack_uint_2x32:
|
||||
memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t));
|
||||
data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1]);
|
||||
break;
|
||||
|
||||
case ir_unop_unpack_int_2x32:
|
||||
memcpy(&data.i[0], &op[0]->value.i64[0], sizeof(int64_t));
|
||||
unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1]);
|
||||
break;
|
||||
|
||||
case ir_unop_unpack_uint_2x32:
|
||||
memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t));
|
||||
unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1]);
|
||||
break;
|
||||
|
||||
case ir_binop_add:
|
||||
|
|
|
@ -39,6 +39,7 @@ const char *const ir_expression_operation_strings[] = {
|
|||
"i2f",
|
||||
"f2b",
|
||||
"b2f",
|
||||
"b2f16",
|
||||
"i2b",
|
||||
"b2i",
|
||||
"u2f",
|
||||
|
@ -46,11 +47,15 @@ const char *const ir_expression_operation_strings[] = {
|
|||
"u2i",
|
||||
"d2f",
|
||||
"f2d",
|
||||
"f2f16",
|
||||
"f2fmp",
|
||||
"f162f",
|
||||
"d2i",
|
||||
"i2d",
|
||||
"d2u",
|
||||
"u2d",
|
||||
"d2b",
|
||||
"f162b",
|
||||
"bitcast_i2f",
|
||||
"bitcast_f2i",
|
||||
"bitcast_u2f",
|
||||
|
@ -117,7 +122,6 @@ const char *const ir_expression_operation_strings[] = {
|
|||
"unpackImage2x32",
|
||||
"frexp_sig",
|
||||
"frexp_exp",
|
||||
"noise",
|
||||
"subroutine_to_int",
|
||||
"interpolate_at_centroid",
|
||||
"get_buffer_size",
|
||||
|
@ -191,6 +195,7 @@ const char *const ir_expression_operation_enum_strings[] = {
|
|||
"i2f",
|
||||
"f2b",
|
||||
"b2f",
|
||||
"b2f16",
|
||||
"i2b",
|
||||
"b2i",
|
||||
"u2f",
|
||||
|
@ -198,11 +203,15 @@ const char *const ir_expression_operation_enum_strings[] = {
|
|||
"u2i",
|
||||
"d2f",
|
||||
"f2d",
|
||||
"f2f16",
|
||||
"f2fmp",
|
||||
"f162f",
|
||||
"d2i",
|
||||
"i2d",
|
||||
"d2u",
|
||||
"u2d",
|
||||
"d2b",
|
||||
"f162b",
|
||||
"bitcast_i2f",
|
||||
"bitcast_f2i",
|
||||
"bitcast_u2f",
|
||||
|
@ -269,7 +278,6 @@ const char *const ir_expression_operation_enum_strings[] = {
|
|||
"unpack_image_2x32",
|
||||
"frexp_sig",
|
||||
"frexp_exp",
|
||||
"noise",
|
||||
"subroutine_to_int",
|
||||
"interpolate_at_centroid",
|
||||
"get_buffer_size",
|
||||
|
|
|
@ -37,8 +37,7 @@ ir_hierarchical_visitor::ir_hierarchical_visitor()
|
|||
ir_visitor_status
|
||||
ir_hierarchical_visitor::visit(ir_rvalue *ir)
|
||||
{
|
||||
if (this->callback_enter != NULL)
|
||||
this->callback_enter(ir, this->data_enter);
|
||||
call_enter_leave_callbacks(ir);
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
@ -46,8 +45,7 @@ ir_hierarchical_visitor::visit(ir_rvalue *ir)
|
|||
ir_visitor_status
|
||||
ir_hierarchical_visitor::visit(ir_variable *ir)
|
||||
{
|
||||
if (this->callback_enter != NULL)
|
||||
this->callback_enter(ir, this->data_enter);
|
||||
call_enter_leave_callbacks(ir);
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
@ -55,8 +53,7 @@ ir_hierarchical_visitor::visit(ir_variable *ir)
|
|||
ir_visitor_status
|
||||
ir_hierarchical_visitor::visit(ir_constant *ir)
|
||||
{
|
||||
if (this->callback_enter != NULL)
|
||||
this->callback_enter(ir, this->data_enter);
|
||||
call_enter_leave_callbacks(ir);
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
@ -64,8 +61,7 @@ ir_hierarchical_visitor::visit(ir_constant *ir)
|
|||
ir_visitor_status
|
||||
ir_hierarchical_visitor::visit(ir_loop_jump *ir)
|
||||
{
|
||||
if (this->callback_enter != NULL)
|
||||
this->callback_enter(ir, this->data_enter);
|
||||
call_enter_leave_callbacks(ir);
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
@ -91,8 +87,7 @@ ir_hierarchical_visitor::visit(ir_typedecl_statement *ir)
|
|||
ir_visitor_status
|
||||
ir_hierarchical_visitor::visit(ir_dereference_variable *ir)
|
||||
{
|
||||
if (this->callback_enter != NULL)
|
||||
this->callback_enter(ir, this->data_enter);
|
||||
call_enter_leave_callbacks(ir);
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
@ -100,8 +95,7 @@ ir_hierarchical_visitor::visit(ir_dereference_variable *ir)
|
|||
ir_visitor_status
|
||||
ir_hierarchical_visitor::visit(ir_barrier *ir)
|
||||
{
|
||||
if (this->callback_enter != NULL)
|
||||
this->callback_enter(ir, this->data_enter);
|
||||
call_enter_leave_callbacks(ir);
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
@ -400,6 +394,14 @@ ir_hierarchical_visitor::run(exec_list *instructions)
|
|||
visit_list_elements(this, instructions);
|
||||
}
|
||||
|
||||
void
|
||||
ir_hierarchical_visitor::call_enter_leave_callbacks(class ir_instruction *ir)
|
||||
{
|
||||
if (this->callback_enter != NULL)
|
||||
this->callback_enter(ir, this->data_enter);
|
||||
if (this->callback_leave != NULL)
|
||||
this->callback_leave(ir, this->data_leave);
|
||||
}
|
||||
|
||||
void
|
||||
visit_tree(ir_instruction *ir,
|
||||
|
|
|
@ -151,6 +151,12 @@ public:
|
|||
*/
|
||||
void run(struct exec_list *instructions);
|
||||
|
||||
/**
|
||||
* Utility function to call both the leave and enter callback functions.
|
||||
* This is used for leaf nodes.
|
||||
*/
|
||||
void call_enter_leave_callbacks(class ir_instruction *ir);
|
||||
|
||||
/* Some visitors may need to insert new variable declarations and
|
||||
* assignments for portions of a subtree, which means they need a
|
||||
* pointer to the current instruction in the stream, not just their
|
||||
|
|
|
@ -135,7 +135,6 @@ bool do_vec_index_to_swizzle(exec_list *instructions);
|
|||
bool lower_discard(exec_list *instructions);
|
||||
void lower_discard_flow(exec_list *instructions);
|
||||
bool lower_instructions(exec_list *instructions, unsigned what_to_lower);
|
||||
bool lower_noise(exec_list *instructions);
|
||||
bool lower_variable_index_to_cond_assign(gl_shader_stage stage,
|
||||
exec_list *instructions, bool lower_input, bool lower_output,
|
||||
bool lower_temp, bool lower_uniform);
|
||||
|
@ -143,6 +142,9 @@ bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
|
|||
bool lower_const_arrays_to_uniforms(exec_list *instructions, unsigned stage, unsigned max_uniform_components);
|
||||
bool lower_clip_cull_distance(struct gl_shader_program *prog,
|
||||
gl_linked_shader *shader);
|
||||
ir_variable * lower_xfb_varying(void *mem_ctx,
|
||||
gl_linked_shader *shader,
|
||||
const char *old_var_name);
|
||||
void lower_output_reads(unsigned stage, exec_list *instructions);
|
||||
bool lower_packing_builtins(exec_list *instructions, int op_mask);
|
||||
void lower_shared_reference(struct gl_context *ctx,
|
||||
|
@ -156,7 +158,9 @@ void lower_packed_varyings(void *mem_ctx,
|
|||
ir_variable_mode mode,
|
||||
unsigned gs_input_vertices,
|
||||
gl_linked_shader *shader,
|
||||
bool disable_varying_packing, bool xfb_enabled);
|
||||
bool disable_varying_packing,
|
||||
bool disable_xfb_packing,
|
||||
bool xfb_enabled);
|
||||
bool lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index);
|
||||
bool lower_vector_derefs(gl_linked_shader *shader);
|
||||
void lower_named_interface_blocks(void *mem_ctx, gl_linked_shader *shader);
|
||||
|
@ -171,6 +175,7 @@ bool lower_vertex_id(gl_linked_shader *shader);
|
|||
bool lower_cs_derived(gl_linked_shader *shader);
|
||||
bool lower_blend_equation_advanced(gl_linked_shader *shader, bool coherent);
|
||||
|
||||
bool lower_builtins(exec_list *instructions);
|
||||
bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state);
|
||||
void propagate_invariance(exec_list *instructions);
|
||||
|
||||
|
@ -183,4 +188,6 @@ ir_variable *compare_index_block(ir_builder::ir_factory &body,
|
|||
bool lower_64bit_integer_instructions(exec_list *instructions,
|
||||
unsigned what_to_lower);
|
||||
|
||||
bool lower_precision(exec_list *instructions);
|
||||
|
||||
#endif /* GLSL_IR_OPTIMIZATION_H */
|
||||
|
|
|
@ -274,6 +274,8 @@ _mesa_print_ir_glsl(exec_list *instructions,
|
|||
str.asprintf_append ("#extension GL_EXT_blend_func_extended : enable\n");
|
||||
if (state->OES_EGL_image_external_essl3_enable)
|
||||
str.asprintf_append ("#extension GL_OES_EGL_image_external_essl3 : enable\n");
|
||||
if (state->ARB_shader_storage_buffer_object_enable)
|
||||
str.asprintf_append ("#extension GL_ARB_shader_storage_buffer_object : enable\n");
|
||||
|
||||
|
||||
// TODO: support other blend specifiers besides "all"
|
||||
|
@ -464,14 +466,25 @@ static void print_type(string_buffer& buffer, const glsl_type *t, bool arraySize
|
|||
static void print_type_post(string_buffer& buffer, const glsl_type *t, bool arraySize)
|
||||
{
|
||||
if (t->base_type == GLSL_TYPE_ARRAY) {
|
||||
if (!arraySize)
|
||||
buffer.asprintf_append ("[%u]", t->length);
|
||||
if (!arraySize) {
|
||||
if (t->length) {
|
||||
buffer.asprintf_append ("[%u]", t->length);
|
||||
} else {
|
||||
buffer.asprintf_append ("[]");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ir_print_glsl_visitor::visit(ir_variable *ir)
|
||||
{
|
||||
// Variables that are declared as or part of interface blocks will be printed by the block declaration.
|
||||
if (ir->is_in_buffer_block()) {
|
||||
skipped_this_ir = true;
|
||||
return;
|
||||
}
|
||||
|
||||
const char *const cent = (ir->data.centroid) ? "centroid " : "";
|
||||
const char *const inv = (ir->data.invariant) ? "invariant " : "";
|
||||
const char *const mode[3][ir_var_mode_count] =
|
||||
|
@ -644,174 +657,194 @@ void ir_print_glsl_visitor::visit(ir_function *ir)
|
|||
indent();
|
||||
}
|
||||
|
||||
static const char *const operator_glsl_strs[] = {
|
||||
"~", // ir_unop_bit_not,
|
||||
"!", // ir_unop_logic_not,
|
||||
"-", // ir_unop_neg,
|
||||
"abs", // ir_unop_abs,
|
||||
"sign", // ir_unop_sign,
|
||||
"1.0/", // ir_unop_rcp,
|
||||
"inversesqrt", // ir_unop_rsq,
|
||||
"sqrt", // ir_unop_sqrt,
|
||||
"exp", // ir_unop_exp,
|
||||
"log", // ir_unop_log,
|
||||
"exp2", // ir_unop_exp2,
|
||||
"log2", // ir_unop_log2,
|
||||
"int", // ir_unop_f2i,
|
||||
"int", // ir_unop_f2u,
|
||||
"float", // ir_unop_i2f,
|
||||
"bool", // ir_unop_f2b,
|
||||
"float", // ir_unop_b2f,
|
||||
"bool", // ir_unop_i2b,
|
||||
"int", // ir_unop_b2i,
|
||||
"float", // ir_unop_u2f,
|
||||
"int", // ir_unop_i2u,
|
||||
"int", // ir_unop_u2i,
|
||||
"float", // ir_unop_d2f,
|
||||
"f2d", // ir_unop_f2d,
|
||||
"d2i", // ir_unop_d2i,
|
||||
"i2d", // ir_unop_i2d,
|
||||
"d2u", // ir_unop_d2u,
|
||||
"u2d", // ir_unop_u2d,
|
||||
"d2b", // ir_unop_d2b,
|
||||
"intBitsToFloat", // ir_unop_bitcast_i2f,
|
||||
"floatBitsToInt", // ir_unop_bitcast_f2i,
|
||||
"uintBitsToFloat", // ir_unop_bitcast_u2f,
|
||||
"floatBitsToUint", // ir_unop_bitcast_f2u,
|
||||
"uint64BitsToDouble", // ir_unop_bitcast_u642d,
|
||||
"int64BitsToDouble", // ir_unop_bitcast_i642d,
|
||||
"doubleBitsToUint64", // ir_unop_bitcast_d2u64,
|
||||
"doubleBitsToInt64", // ir_unop_bitcast_d2i64,
|
||||
"int", // ir_unop_i642i,
|
||||
"int", // ir_unop_u642i,
|
||||
"uint", // ir_unop_i642u,
|
||||
"uint", // ir_unop_u642u,
|
||||
"bool", // ir_unop_i642b,
|
||||
"float", // ir_unop_i642f,
|
||||
"float", // ir_unop_u642f,
|
||||
"double", // ir_unop_i642d,
|
||||
"double", // ir_unop_u642d,
|
||||
"int64_t", // ir_unop_i2i64,
|
||||
"int64_t", // ir_unop_u2i64,
|
||||
"int64_t", // ir_unop_b2i64,
|
||||
"int64_t", // ir_unop_f2i64,
|
||||
"int64_t", // ir_unop_d2i64,
|
||||
"uint64_t", // ir_unop_i2u64,
|
||||
"uint64_t", // ir_unop_u2u64,
|
||||
"uint64_t", // ir_unop_f2u64,
|
||||
"uint64_t", // ir_unop_d2u64,
|
||||
"int64_t", // ir_unop_u642i64,
|
||||
"uint64_t", // ir_unop_i642u64,
|
||||
"trunc", // ir_unop_trunc,
|
||||
"ceil", // ir_unop_ceil,
|
||||
"floor", // ir_unop_floor,
|
||||
"fract", // ir_unop_fract,
|
||||
"roundEven", // ir_unop_round_even,
|
||||
"sin", // ir_unop_sin,
|
||||
"cos", // ir_unop_cos,
|
||||
"atan", // ir_unop_atan,
|
||||
"dFdx", // ir_unop_dFdx,
|
||||
"dFdxCoarse", // ir_unop_dFdx_coarse,
|
||||
"dFdxFine", // ir_unop_dFdx_fine,
|
||||
"dFdy", // ir_unop_dFdy,
|
||||
"dFdyCoarse", // ir_unop_dFdy_coarse,
|
||||
"dFdyFine", // ir_unop_dFdy_fine,
|
||||
"packSnorm2x16", // ir_unop_pack_snorm_2x16,
|
||||
"packSnorm4x8", // ir_unop_pack_snorm_4x8,
|
||||
"packUnorm2x16", // ir_unop_pack_unorm_2x16,
|
||||
"packUnorm4x8", // ir_unop_pack_unorm_4x8,
|
||||
"packHalf2x16", // ir_unop_pack_half_2x16,
|
||||
"unpackSnorm2x16", // ir_unop_unpack_snorm_2x16,
|
||||
"unpackSnorm4x8", // ir_unop_unpack_snorm_4x8,
|
||||
"unpackUnorm2x16", // ir_unop_unpack_unorm_2x16,
|
||||
"unpackUnorm4x8", // ir_unop_unpack_unorm_4x8,
|
||||
"unpackHalf2x16", // ir_unop_unpack_half_2x16,
|
||||
"bitfieldReverse", // ir_unop_bitfield_reverse,
|
||||
"bitCount", // ir_unop_bit_count,
|
||||
"findMSB", // ir_unop_find_msb,
|
||||
"findLSB", // ir_unop_find_lsb,
|
||||
"clz_TODO", // ir_unop_clz,
|
||||
"saturate", // ir_unop_saturate,
|
||||
"packDouble2x32", // ir_unop_pack_double_2x32,
|
||||
"unpackDouble2x32", // ir_unop_unpack_double_2x32,
|
||||
"packSampler2x32", // ir_unop_pack_sampler_2x32,
|
||||
"packImage2x32", // ir_unop_pack_image_2x32,
|
||||
"unpackSampler2x32", // ir_unop_unpack_sampler_2x32,
|
||||
"unpackImage2x32", // ir_unop_unpack_image_2x32,
|
||||
"frexp_sig_TODO", // ir_unop_frexp_sig,
|
||||
"frexp_exp_TODO", // ir_unop_frexp_exp,
|
||||
"noise", // ir_unop_noise,
|
||||
"subroutine_to_int_TODO", // ir_unop_subroutine_to_int,
|
||||
"interpolateAtCentroid", // ir_unop_interpolate_at_centroid,
|
||||
"get_buffer_size_TODO", // ir_unop_get_buffer_size,
|
||||
"ssbo_unsized_array_length_TODO", // ir_unop_ssbo_unsized_array_length,
|
||||
"packInt2x32", // ir_unop_pack_int_2x32,
|
||||
"packUint2x32", // ir_unop_pack_uint_2x32,
|
||||
"unpackInt2x32", // ir_unop_unpack_int_2x32,
|
||||
"unpackUint2x32", // ir_unop_unpack_uint_2x32,
|
||||
"+", // ir_binop_add,
|
||||
"-", // ir_binop_sub,
|
||||
"add_sat_TODO", // ir_binop_add_sat,
|
||||
"sub_sat_TODO", // ir_binop_sub_sat,
|
||||
"abs_sub_TODO", // ir_binop_abs_sub,
|
||||
"avg_TODO", // ir_binop_avg,
|
||||
"avg_round_TODO", // ir_binop_avg_round,
|
||||
"*", // ir_binop_mul,
|
||||
"mul_32x16_TODO", // ir_binop_mul_32x16,
|
||||
"imul_high_TODO", // ir_binop_imul_high,
|
||||
"/", // ir_binop_div,
|
||||
"carry_TODO", // ir_binop_carry,
|
||||
"borrow_TODO", // ir_binop_borrow,
|
||||
"mod", // ir_binop_mod,
|
||||
"<", // ir_binop_less,
|
||||
">=", // ir_binop_gequal,
|
||||
"==", // ir_binop_equal,
|
||||
"!=", // ir_binop_nequal,
|
||||
"==", // ir_binop_all_equal,
|
||||
"!=", // ir_binop_any_nequal,
|
||||
"<<", // ir_binop_lshift,
|
||||
">>", // ir_binop_rshift,
|
||||
"&", // ir_binop_bit_and,
|
||||
"^", // ir_binop_bit_xor,
|
||||
"|", // ir_binop_bit_or,
|
||||
"&&", // ir_binop_logic_and,
|
||||
"^^", // ir_binop_logic_xor,
|
||||
"||", // ir_binop_logic_or,
|
||||
"dot", // ir_binop_dot,
|
||||
"min", // ir_binop_min,
|
||||
"max", // ir_binop_max,
|
||||
"pow", // ir_binop_pow,
|
||||
"uboload_TODO", // ir_binop_ubo_load,
|
||||
"ldexp_TODO", // ir_binop_ldexp,
|
||||
"vectorExtract_TODO", // ir_binop_vector_extract,
|
||||
"interpolateAtOffset", // ir_binop_interpolate_at_offset,
|
||||
"interpolateAtSample", // ir_binop_interpolate_at_sample,
|
||||
"atan", // ir_binop_atan2,
|
||||
"fma", // ir_triop_fma,
|
||||
"mix", // ir_triop_lrp,
|
||||
"csel_TODO", // ir_triop_csel,
|
||||
"bitfield_extract_TODO", // ir_triop_bitfield_extract,
|
||||
"vector_insert_TODO", // ir_triop_vector_insert,
|
||||
"bitfield_insert_TODO", // ir_quadop_bitfield_insert,
|
||||
"vector_TODO", // ir_quadop_vector,
|
||||
};
|
||||
|
||||
static const char *const operator_vec_glsl_strs[] = {
|
||||
"lessThan",
|
||||
"greaterThanEqual",
|
||||
"equal",
|
||||
"notEqual",
|
||||
};
|
||||
|
||||
static const char* operator_glsl_str(ir_expression_operation op, const glsl_type* type) {
|
||||
switch (op) {
|
||||
case ir_unop_bit_not:
|
||||
return "~";
|
||||
case ir_unop_logic_not:
|
||||
return "!";
|
||||
case ir_unop_neg:
|
||||
return "-";
|
||||
case ir_unop_abs:
|
||||
return "abs";
|
||||
case ir_unop_sign:
|
||||
return "sign";
|
||||
case ir_unop_rsq:
|
||||
return "inversesqrt";
|
||||
case ir_unop_sqrt:
|
||||
return "sqrt";
|
||||
case ir_unop_exp:
|
||||
return "exp";
|
||||
case ir_unop_log:
|
||||
return "log";
|
||||
case ir_unop_exp2:
|
||||
return "exp2";
|
||||
case ir_unop_log2:
|
||||
return "log2";
|
||||
case ir_unop_trunc:
|
||||
return "trunc";
|
||||
case ir_unop_ceil:
|
||||
return "ceil";
|
||||
case ir_unop_floor:
|
||||
return "floor";
|
||||
case ir_unop_fract:
|
||||
return "fract";
|
||||
case ir_unop_round_even:
|
||||
return "roundEven";
|
||||
case ir_unop_sin:
|
||||
return "sin";
|
||||
case ir_unop_cos:
|
||||
return "cos";
|
||||
case ir_unop_atan:
|
||||
return "atan";
|
||||
case ir_unop_dFdx:
|
||||
return "dFdx";
|
||||
case ir_unop_dFdx_coarse:
|
||||
return "dFdxCoarse";
|
||||
case ir_unop_dFdx_fine:
|
||||
return "dFdxFine";
|
||||
case ir_unop_dFdy:
|
||||
return "dFdy";
|
||||
case ir_unop_dFdy_coarse:
|
||||
return "dFdyCoarse";
|
||||
case ir_unop_dFdy_fine:
|
||||
return "dFdyFine";
|
||||
case ir_unop_pack_snorm_2x16:
|
||||
return "packSnorm2x16";
|
||||
case ir_unop_pack_snorm_4x8:
|
||||
return "packSnorm4x8";
|
||||
case ir_unop_pack_unorm_2x16:
|
||||
return "packUnorm2x16";
|
||||
case ir_unop_pack_unorm_4x8:
|
||||
return "packUnorm4x8";
|
||||
case ir_unop_pack_half_2x16:
|
||||
return "packHalf2x16";
|
||||
case ir_unop_unpack_snorm_2x16:
|
||||
return "unpackSnorm2x16";
|
||||
case ir_unop_unpack_snorm_4x8:
|
||||
return "unpackSnorm4x8";
|
||||
case ir_unop_unpack_unorm_2x16:
|
||||
return "unpackUnorm2x16";
|
||||
case ir_unop_unpack_unorm_4x8:
|
||||
return "unpackUnorm4x8";
|
||||
case ir_unop_unpack_half_2x16:
|
||||
return "unpackHalf2x16";
|
||||
case ir_unop_bitfield_reverse:
|
||||
return "bitfieldReverse";
|
||||
case ir_unop_bit_count:
|
||||
return "bitCount";
|
||||
case ir_unop_find_msb:
|
||||
return "findMSB";
|
||||
case ir_unop_find_lsb:
|
||||
return "findLSB";
|
||||
case ir_unop_saturate:
|
||||
return "saturate";
|
||||
case ir_unop_pack_double_2x32:
|
||||
return "packDouble2x32";
|
||||
case ir_unop_unpack_double_2x32:
|
||||
return "unpackDouble2x32";
|
||||
case ir_unop_pack_sampler_2x32:
|
||||
return "packSampler2x32";
|
||||
case ir_unop_pack_image_2x32:
|
||||
return "packImage2x32";
|
||||
case ir_unop_unpack_sampler_2x32:
|
||||
return "unpackSampler2x32";
|
||||
case ir_unop_unpack_image_2x32:
|
||||
return "unpackImage2x32";
|
||||
case ir_unop_interpolate_at_centroid:
|
||||
return "interpolateAtCentroid";
|
||||
case ir_unop_pack_int_2x32:
|
||||
return "packInt2x32";
|
||||
case ir_unop_pack_uint_2x32:
|
||||
return "packUint2x32";
|
||||
case ir_unop_unpack_int_2x32:
|
||||
return "unpackInt2x32";
|
||||
case ir_unop_unpack_uint_2x32:
|
||||
return "unpackUint2x32";
|
||||
case ir_binop_add:
|
||||
return "+";
|
||||
case ir_binop_sub:
|
||||
return "-";
|
||||
case ir_binop_mul:
|
||||
return "*";
|
||||
case ir_binop_div:
|
||||
return "/";
|
||||
case ir_binop_mod:
|
||||
if (type->is_integer())
|
||||
return "%";
|
||||
else
|
||||
return "mod";
|
||||
case ir_binop_less:
|
||||
if (type->is_vector())
|
||||
return "lessThan";
|
||||
else
|
||||
return "<";
|
||||
case ir_binop_gequal:
|
||||
if (type->is_vector())
|
||||
return "greaterThanEqual";
|
||||
else
|
||||
return ">=";
|
||||
case ir_binop_equal:
|
||||
if (type->is_vector())
|
||||
return "equal";
|
||||
else
|
||||
return "==";
|
||||
case ir_binop_nequal:
|
||||
if (type->is_vector())
|
||||
return "notEqual";
|
||||
else
|
||||
return "!=";
|
||||
case ir_binop_all_equal:
|
||||
return "==";
|
||||
case ir_binop_any_nequal:
|
||||
return "!=";
|
||||
case ir_binop_lshift:
|
||||
return "<<";
|
||||
case ir_binop_rshift:
|
||||
return ">>";
|
||||
case ir_binop_bit_and:
|
||||
return "&";
|
||||
case ir_binop_bit_xor:
|
||||
return "^";
|
||||
case ir_binop_bit_or:
|
||||
return "|";
|
||||
case ir_binop_logic_and:
|
||||
return "&&";
|
||||
case ir_binop_logic_xor:
|
||||
return "^^";
|
||||
case ir_binop_logic_or:
|
||||
return "||";
|
||||
case ir_binop_dot:
|
||||
return "dot";
|
||||
case ir_binop_min:
|
||||
return "min";
|
||||
case ir_binop_max:
|
||||
return "max";
|
||||
case ir_binop_pow:
|
||||
return "pow";
|
||||
case ir_binop_interpolate_at_offset:
|
||||
return "interpolateAtOffset";
|
||||
case ir_binop_interpolate_at_sample:
|
||||
return "interpolateAtSample";
|
||||
case ir_binop_atan2:
|
||||
return "atan";
|
||||
case ir_triop_fma:
|
||||
return "fma";
|
||||
case ir_triop_lrp:
|
||||
return "mix";
|
||||
default:
|
||||
unreachable("Unexpected operator in operator_glsl_str");
|
||||
return "UNIMPLEMENTED";
|
||||
}
|
||||
}
|
||||
|
||||
static bool is_binop_func_like(ir_expression_operation op, const glsl_type* type)
|
||||
{
|
||||
if (op == ir_binop_mod ||
|
||||
(op >= ir_binop_dot && op <= ir_binop_pow) ||
|
||||
op == ir_binop_atan2)
|
||||
if (op == ir_binop_mod && !type->is_integer()) {
|
||||
return true;
|
||||
if (type->is_vector() && (op >= ir_binop_less && op <= ir_binop_nequal))
|
||||
{
|
||||
} else if ((op >= ir_binop_dot && op <= ir_binop_pow) || op == ir_binop_atan2) {
|
||||
return true;
|
||||
} else if (type->is_vector() && (op >= ir_binop_less && op <= ir_binop_nequal)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -829,7 +862,7 @@ void ir_print_glsl_visitor::visit(ir_expression *ir)
|
|||
} else if (ir->operation == ir_unop_rcp) {
|
||||
buffer.asprintf_append ("(1.0/(");
|
||||
} else {
|
||||
buffer.asprintf_append ("%s(", operator_glsl_strs[ir->operation]);
|
||||
buffer.asprintf_append ("%s(", operator_glsl_str(ir->operation, ir->type));
|
||||
}
|
||||
if (ir->operands[0])
|
||||
ir->operands[0]->accept(this);
|
||||
|
@ -862,23 +895,6 @@ void ir_print_glsl_visitor::visit(ir_expression *ir)
|
|||
ir->operands[1]->accept(this);
|
||||
buffer.asprintf_append ("]");
|
||||
}
|
||||
else if (ir->operation == ir_binop_mod && ir->operands[0]->type->is_integer())
|
||||
{
|
||||
// In GLES, mod() is only a func for floats,
|
||||
// and we must use the % operator for ints.
|
||||
assert(ir->num_operands == 2);
|
||||
assert(ir->operands[1]->type->is_integer());
|
||||
|
||||
buffer.asprintf_append ("(");
|
||||
if (ir->operands[0])
|
||||
ir->operands[0]->accept(this);
|
||||
|
||||
buffer.asprintf_append (" %s ", "%");
|
||||
|
||||
if (ir->operands[1])
|
||||
ir->operands[1]->accept(this);
|
||||
buffer.asprintf_append (")");
|
||||
}
|
||||
else if (is_binop_func_like(ir->operation, ir->type))
|
||||
{
|
||||
if (ir->operation == ir_binop_mod)
|
||||
|
@ -887,10 +903,7 @@ void ir_print_glsl_visitor::visit(ir_expression *ir)
|
|||
print_type(buffer, ir->type, true);
|
||||
buffer.asprintf_append ("(");
|
||||
}
|
||||
if (ir->type->is_vector() && (ir->operation >= ir_binop_less && ir->operation <= ir_binop_nequal))
|
||||
buffer.asprintf_append ("%s (", operator_vec_glsl_strs[ir->operation-ir_binop_less]);
|
||||
else
|
||||
buffer.asprintf_append ("%s (", operator_glsl_strs[ir->operation]);
|
||||
buffer.asprintf_append ("%s (", operator_glsl_str(ir->operation, ir->type));
|
||||
|
||||
if (ir->operands[0])
|
||||
ir->operands[0]->accept(this);
|
||||
|
@ -907,7 +920,7 @@ void ir_print_glsl_visitor::visit(ir_expression *ir)
|
|||
if (ir->operands[0])
|
||||
ir->operands[0]->accept(this);
|
||||
|
||||
buffer.asprintf_append (" %s ", operator_glsl_strs[ir->operation]);
|
||||
buffer.asprintf_append (" %s ", operator_glsl_str(ir->operation, ir->type));
|
||||
|
||||
if (ir->operands[1])
|
||||
ir->operands[1]->accept(this);
|
||||
|
@ -916,7 +929,7 @@ void ir_print_glsl_visitor::visit(ir_expression *ir)
|
|||
else
|
||||
{
|
||||
// ternary op
|
||||
buffer.asprintf_append ("%s (", operator_glsl_strs[ir->operation]);
|
||||
buffer.asprintf_append ("%s (", operator_glsl_str(ir->operation, ir->type));
|
||||
if (ir->operands[0])
|
||||
ir->operands[0]->accept(this);
|
||||
buffer.asprintf_append (", ");
|
||||
|
@ -1863,12 +1876,68 @@ ir_print_glsl_visitor::visit(ir_precision_statement *ir)
|
|||
buffer.asprintf_append ("%s", ir->precision_statement);
|
||||
}
|
||||
|
||||
// FIXME
|
||||
static const char*
|
||||
interface_packing_string(enum glsl_interface_packing packing)
|
||||
{
|
||||
switch (packing) {
|
||||
case GLSL_INTERFACE_PACKING_STD140:
|
||||
return "std140";
|
||||
case GLSL_INTERFACE_PACKING_SHARED:
|
||||
return "shared";
|
||||
case GLSL_INTERFACE_PACKING_PACKED:
|
||||
return "packed";
|
||||
case GLSL_INTERFACE_PACKING_STD430:
|
||||
return "std430";
|
||||
default:
|
||||
unreachable("Unexpected interface packing");
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
static const char*
|
||||
interface_variable_mode_string(enum ir_variable_mode mode)
|
||||
{
|
||||
switch (mode) {
|
||||
case ir_var_uniform:
|
||||
return "uniform";
|
||||
case ir_var_shader_storage:
|
||||
return "buffer";
|
||||
default:
|
||||
unreachable("Unexpected interface variable mode");
|
||||
return "UNKOWN";
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ir_print_glsl_visitor::visit(ir_typedecl_statement *ir)
|
||||
{
|
||||
const glsl_type *const s = ir->type_decl;
|
||||
buffer.asprintf_append ("struct %s {\n", s->name);
|
||||
|
||||
ir_variable* interface_var = NULL;
|
||||
|
||||
if (s->is_struct()) {
|
||||
buffer.asprintf_append ("struct %s {\n", s->name);
|
||||
} else if (s->is_interface()) {
|
||||
const char* packing = interface_packing_string(s->get_interface_packing());
|
||||
|
||||
// Find a variable defined by this interface, as it holds some necessary data.
|
||||
exec_node* n = ir;
|
||||
while ((n = n->get_next())) {
|
||||
ir_variable* v = ((ir_instruction *)n)->as_variable();
|
||||
if (v != NULL && v->get_interface_type() == ir->type_decl) {
|
||||
interface_var = v;
|
||||
break;
|
||||
}
|
||||
}
|
||||
const char* mode = interface_variable_mode_string((enum ir_variable_mode)interface_var->data.mode);
|
||||
if (interface_var->data.explicit_binding) {
|
||||
uint16_t binding = interface_var->data.binding;
|
||||
buffer.asprintf_append ("layout(%s, binding=%" PRIu16 ") %s %s {\n", packing, binding, mode, s->name);
|
||||
} else {
|
||||
buffer.asprintf_append ("layout(%s) %s %s {\n", packing, mode, s->name);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < s->length; j++) {
|
||||
buffer.asprintf_append (" ");
|
||||
|
@ -1881,6 +1950,11 @@ ir_print_glsl_visitor::visit(ir_typedecl_statement *ir)
|
|||
buffer.asprintf_append (";\n");
|
||||
}
|
||||
buffer.asprintf_append ("}");
|
||||
|
||||
if (interface_var && interface_var->is_interface_instance()) {
|
||||
buffer.asprintf_append(" ");
|
||||
print_var_name(interface_var);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "main/macros.h"
|
||||
#include "util/hash_table.h"
|
||||
#include "util/u_string.h"
|
||||
#include "util/half_float.h"
|
||||
|
||||
static void print_type(FILE *f, const glsl_type *t);
|
||||
|
||||
|
@ -460,6 +461,19 @@ void ir_print_visitor::visit(ir_assignment *ir)
|
|||
fprintf(f, ") ");
|
||||
}
|
||||
|
||||
static void
|
||||
print_float_constant(FILE *f, float val)
|
||||
{
|
||||
if (val == 0.0f)
|
||||
/* 0.0 == -0.0, so print with %f to get the proper sign. */
|
||||
fprintf(f, "%f", val);
|
||||
else if (fabs(val) < 0.000001f)
|
||||
fprintf(f, "%a", val);
|
||||
else if (fabs(val) > 1000000.0f)
|
||||
fprintf(f, "%e", val);
|
||||
else
|
||||
fprintf(f, "%f", val);
|
||||
}
|
||||
|
||||
void ir_print_visitor::visit(ir_constant *ir)
|
||||
{
|
||||
|
@ -484,15 +498,10 @@ void ir_print_visitor::visit(ir_constant *ir)
|
|||
case GLSL_TYPE_UINT: fprintf(f, "%u", ir->value.u[i]); break;
|
||||
case GLSL_TYPE_INT: fprintf(f, "%d", ir->value.i[i]); break;
|
||||
case GLSL_TYPE_FLOAT:
|
||||
if (ir->value.f[i] == 0.0f)
|
||||
/* 0.0 == -0.0, so print with %f to get the proper sign. */
|
||||
fprintf(f, "%f", ir->value.f[i]);
|
||||
else if (fabs(ir->value.f[i]) < 0.000001f)
|
||||
fprintf(f, "%a", ir->value.f[i]);
|
||||
else if (fabs(ir->value.f[i]) > 1000000.0f)
|
||||
fprintf(f, "%e", ir->value.f[i]);
|
||||
else
|
||||
fprintf(f, "%f", ir->value.f[i]);
|
||||
print_float_constant(f, ir->value.f[i]);
|
||||
break;
|
||||
case GLSL_TYPE_FLOAT16:
|
||||
print_float_constant(f, _mesa_half_to_float(ir->value.f16[i]));
|
||||
break;
|
||||
case GLSL_TYPE_SAMPLER:
|
||||
case GLSL_TYPE_IMAGE:
|
||||
|
|
|
@ -49,7 +49,7 @@ public:
|
|||
|
||||
virtual ir_visitor_status visit(ir_typedecl_statement* ir)
|
||||
{
|
||||
if (!used_structs->has_struct_entry(ir->type_decl))
|
||||
if (ir->type_decl->is_struct() && !used_structs->has_struct_entry(ir->type_decl))
|
||||
{
|
||||
ir->remove();
|
||||
}
|
||||
|
|
|
@ -260,8 +260,7 @@ ir_validate::visit_leave(ir_expression *ir)
|
|||
case ir_unop_abs:
|
||||
case ir_unop_sign:
|
||||
assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT ||
|
||||
ir->operands[0]->type->is_float() ||
|
||||
ir->operands[0]->type->is_double() ||
|
||||
ir->operands[0]->type->is_float_16_32_64() ||
|
||||
ir->operands[0]->type->base_type == GLSL_TYPE_INT64);
|
||||
assert(ir->type == ir->operands[0]->type);
|
||||
break;
|
||||
|
@ -269,8 +268,7 @@ ir_validate::visit_leave(ir_expression *ir)
|
|||
case ir_unop_rcp:
|
||||
case ir_unop_rsq:
|
||||
case ir_unop_sqrt:
|
||||
assert(ir->type->is_float() ||
|
||||
ir->type->is_double());
|
||||
assert(ir->type->is_float_16_32_64());
|
||||
assert(ir->type == ir->operands[0]->type);
|
||||
break;
|
||||
|
||||
|
@ -279,7 +277,7 @@ ir_validate::visit_leave(ir_expression *ir)
|
|||
case ir_unop_exp2:
|
||||
case ir_unop_log2:
|
||||
case ir_unop_saturate:
|
||||
assert(ir->operands[0]->type->is_float());
|
||||
assert(ir->operands[0]->type->is_float_16_32());
|
||||
assert(ir->type == ir->operands[0]->type);
|
||||
break;
|
||||
|
||||
|
@ -299,10 +297,19 @@ ir_validate::visit_leave(ir_expression *ir)
|
|||
assert(ir->operands[0]->type->is_float());
|
||||
assert(ir->type->is_boolean());
|
||||
break;
|
||||
case ir_unop_f162b:
|
||||
assert(ir->operands[0]->type->base_type ==
|
||||
GLSL_TYPE_FLOAT16);
|
||||
assert(ir->type->is_boolean());
|
||||
break;
|
||||
case ir_unop_b2f:
|
||||
assert(ir->operands[0]->type->is_boolean());
|
||||
assert(ir->type->is_float());
|
||||
break;
|
||||
case ir_unop_b2f16:
|
||||
assert(ir->operands[0]->type->is_boolean());
|
||||
assert(ir->type->base_type == GLSL_TYPE_FLOAT16);
|
||||
break;
|
||||
case ir_unop_i2b:
|
||||
assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
|
||||
assert(ir->type->is_boolean());
|
||||
|
@ -441,8 +448,7 @@ ir_validate::visit_leave(ir_expression *ir)
|
|||
case ir_unop_ceil:
|
||||
case ir_unop_floor:
|
||||
case ir_unop_fract:
|
||||
assert(ir->operands[0]->type->is_float() ||
|
||||
ir->operands[0]->type->is_double());
|
||||
assert(ir->operands[0]->type->is_float_16_32_64());
|
||||
assert(ir->operands[0]->type == ir->type);
|
||||
break;
|
||||
case ir_unop_sin:
|
||||
|
@ -453,7 +459,7 @@ ir_validate::visit_leave(ir_expression *ir)
|
|||
case ir_unop_dFdy:
|
||||
case ir_unop_dFdy_coarse:
|
||||
case ir_unop_dFdy_fine:
|
||||
assert(ir->operands[0]->type->is_float());
|
||||
assert(ir->operands[0]->type->is_float_16_32());
|
||||
assert(ir->operands[0]->type == ir->type);
|
||||
break;
|
||||
|
||||
|
@ -551,13 +557,9 @@ ir_validate::visit_leave(ir_expression *ir)
|
|||
assert(ir->type->base_type == GLSL_TYPE_UINT);
|
||||
break;
|
||||
|
||||
case ir_unop_noise:
|
||||
/* XXX what can we assert here? */
|
||||
break;
|
||||
|
||||
case ir_unop_interpolate_at_centroid:
|
||||
assert(ir->operands[0]->type == ir->type);
|
||||
assert(ir->operands[0]->type->is_float());
|
||||
assert(ir->operands[0]->type->is_float_16_32());
|
||||
break;
|
||||
|
||||
case ir_unop_get_buffer_size:
|
||||
|
@ -579,6 +581,15 @@ ir_validate::visit_leave(ir_expression *ir)
|
|||
assert(ir->operands[0]->type->is_float());
|
||||
assert(ir->type->is_double());
|
||||
break;
|
||||
case ir_unop_f162f:
|
||||
assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT16);
|
||||
assert(ir->type->is_float());
|
||||
break;
|
||||
case ir_unop_f2f16:
|
||||
case ir_unop_f2fmp:
|
||||
assert(ir->operands[0]->type->is_float());
|
||||
assert(ir->type->base_type == GLSL_TYPE_FLOAT16);
|
||||
break;
|
||||
case ir_unop_d2i:
|
||||
assert(ir->operands[0]->type->is_double());
|
||||
assert(ir->type->base_type == GLSL_TYPE_INT);
|
||||
|
@ -601,13 +612,11 @@ ir_validate::visit_leave(ir_expression *ir)
|
|||
break;
|
||||
|
||||
case ir_unop_frexp_sig:
|
||||
assert(ir->operands[0]->type->is_float() ||
|
||||
ir->operands[0]->type->is_double());
|
||||
assert(ir->operands[0]->type->is_float_16_32_64());
|
||||
assert(ir->type->is_double());
|
||||
break;
|
||||
case ir_unop_frexp_exp:
|
||||
assert(ir->operands[0]->type->is_float() ||
|
||||
ir->operands[0]->type->is_double());
|
||||
assert(ir->operands[0]->type->is_float_16_32_64());
|
||||
assert(ir->type->base_type == GLSL_TYPE_INT);
|
||||
break;
|
||||
case ir_unop_subroutine_to_int:
|
||||
|
@ -616,8 +625,7 @@ ir_validate::visit_leave(ir_expression *ir)
|
|||
break;
|
||||
|
||||
case ir_unop_atan:
|
||||
assert(ir->operands[0]->type->is_float() ||
|
||||
ir->operands[0]->type->is_double());
|
||||
assert(ir->operands[0]->type->is_float_16_32_64());
|
||||
assert(ir->type == ir->operands[0]->type);
|
||||
break;
|
||||
|
||||
|
@ -750,9 +758,9 @@ ir_validate::visit_leave(ir_expression *ir)
|
|||
|
||||
case ir_binop_dot:
|
||||
assert(ir->type == glsl_type::float_type ||
|
||||
ir->type == glsl_type::double_type);
|
||||
assert(ir->operands[0]->type->is_float() ||
|
||||
ir->operands[0]->type->is_double());
|
||||
ir->type == glsl_type::double_type ||
|
||||
ir->type == glsl_type::float16_t_type);
|
||||
assert(ir->operands[0]->type->is_float_16_32_64());
|
||||
assert(ir->operands[0]->type->is_vector());
|
||||
assert(ir->operands[0]->type == ir->operands[1]->type);
|
||||
break;
|
||||
|
@ -765,8 +773,7 @@ ir_validate::visit_leave(ir_expression *ir)
|
|||
|
||||
case ir_binop_ldexp:
|
||||
assert(ir->operands[0]->type == ir->type);
|
||||
assert(ir->operands[0]->type->is_float() ||
|
||||
ir->operands[0]->type->is_double());
|
||||
assert(ir->operands[0]->type->is_float_16_32_64());
|
||||
assert(ir->operands[1]->type->base_type == GLSL_TYPE_INT);
|
||||
assert(ir->operands[0]->type->components() ==
|
||||
ir->operands[1]->type->components());
|
||||
|
@ -792,27 +799,25 @@ ir_validate::visit_leave(ir_expression *ir)
|
|||
break;
|
||||
|
||||
case ir_binop_atan2:
|
||||
assert(ir->operands[0]->type->is_float() ||
|
||||
ir->operands[0]->type->is_double());
|
||||
assert(ir->operands[0]->type->is_float_16_32_64());
|
||||
assert(ir->operands[1]->type == ir->operands[0]->type);
|
||||
assert(ir->type == ir->operands[0]->type);
|
||||
break;
|
||||
|
||||
case ir_triop_fma:
|
||||
assert(ir->type->is_float() ||
|
||||
ir->type->is_double());
|
||||
assert(ir->type->is_float_16_32_64());
|
||||
assert(ir->type == ir->operands[0]->type);
|
||||
assert(ir->type == ir->operands[1]->type);
|
||||
assert(ir->type == ir->operands[2]->type);
|
||||
break;
|
||||
|
||||
case ir_triop_lrp:
|
||||
assert(ir->operands[0]->type->is_float() ||
|
||||
ir->operands[0]->type->is_double());
|
||||
assert(ir->operands[0]->type->is_float_16_32_64());
|
||||
assert(ir->operands[0]->type == ir->operands[1]->type);
|
||||
assert(ir->operands[2]->type == ir->operands[0]->type ||
|
||||
ir->operands[2]->type == glsl_type::float_type ||
|
||||
ir->operands[2]->type == glsl_type::double_type);
|
||||
ir->operands[2]->type == glsl_type::double_type ||
|
||||
ir->operands[2]->type == glsl_type::float16_t_type);
|
||||
break;
|
||||
|
||||
case ir_triop_csel:
|
||||
|
|
|
@ -1373,6 +1373,21 @@ tfeedback_decl::find_candidate(gl_shader_program *prog,
|
|||
return this->matched_candidate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Force a candidate over the previously matched one. It happens when a new
|
||||
* varying needs to be created to match the xfb declaration, for example,
|
||||
* to fullfil an alignment criteria.
|
||||
*/
|
||||
void
|
||||
tfeedback_decl::set_lowered_candidate(const tfeedback_candidate *candidate)
|
||||
{
|
||||
this->matched_candidate = candidate;
|
||||
|
||||
/* The subscript part is no longer relevant */
|
||||
this->is_subscripted = false;
|
||||
this->array_subscript = 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Parse all the transform feedback declarations that were passed to
|
||||
|
@ -1590,7 +1605,9 @@ namespace {
|
|||
class varying_matches
|
||||
{
|
||||
public:
|
||||
varying_matches(bool disable_varying_packing, bool xfb_enabled,
|
||||
varying_matches(bool disable_varying_packing,
|
||||
bool disable_xfb_packing,
|
||||
bool xfb_enabled,
|
||||
bool enhanced_layouts_enabled,
|
||||
gl_shader_stage producer_stage,
|
||||
gl_shader_stage consumer_stage);
|
||||
|
@ -1616,11 +1633,17 @@ private:
|
|||
*/
|
||||
const bool disable_varying_packing;
|
||||
|
||||
/**
|
||||
* If true, this driver disables packing for varyings used by transform
|
||||
* feedback.
|
||||
*/
|
||||
const bool disable_xfb_packing;
|
||||
|
||||
/**
|
||||
* If true, this driver has transform feedback enabled. The transform
|
||||
* feedback code requires at least some packing be done even when varying
|
||||
* packing is disabled, fortunately where transform feedback requires
|
||||
* packing it's safe to override the disabled setting. See
|
||||
* feedback code usually requires at least some packing be done even
|
||||
* when varying packing is disabled, fortunately where transform feedback
|
||||
* requires packing it's safe to override the disabled setting. See
|
||||
* is_varying_packing_safe().
|
||||
*/
|
||||
const bool xfb_enabled;
|
||||
|
@ -1647,6 +1670,7 @@ private:
|
|||
static packing_order_enum compute_packing_order(const ir_variable *var);
|
||||
static int match_comparator(const void *x_generic, const void *y_generic);
|
||||
static int xfb_comparator(const void *x_generic, const void *y_generic);
|
||||
static int not_xfb_comparator(const void *x_generic, const void *y_generic);
|
||||
|
||||
/**
|
||||
* Structure recording the relationship between a single producer output
|
||||
|
@ -1702,11 +1726,13 @@ private:
|
|||
} /* anonymous namespace */
|
||||
|
||||
varying_matches::varying_matches(bool disable_varying_packing,
|
||||
bool disable_xfb_packing,
|
||||
bool xfb_enabled,
|
||||
bool enhanced_layouts_enabled,
|
||||
gl_shader_stage producer_stage,
|
||||
gl_shader_stage consumer_stage)
|
||||
: disable_varying_packing(disable_varying_packing),
|
||||
disable_xfb_packing(disable_xfb_packing),
|
||||
xfb_enabled(xfb_enabled),
|
||||
enhanced_layouts_enabled(enhanced_layouts_enabled),
|
||||
producer_stage(producer_stage),
|
||||
|
@ -1785,6 +1811,7 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
|
|||
producer_var->type->contains_double());
|
||||
|
||||
if (!disable_varying_packing &&
|
||||
(!disable_xfb_packing || producer_var == NULL || !producer_var->data.is_xfb) &&
|
||||
(needs_flat_qualifier ||
|
||||
(consumer_stage != MESA_SHADER_NONE && consumer_stage != MESA_SHADER_FRAGMENT))) {
|
||||
/* Since this varying is not being consumed by the fragment shader, its
|
||||
|
@ -1850,6 +1877,7 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
|
|||
this->matches[this->num_matches].packing_order
|
||||
= this->compute_packing_order(var);
|
||||
if ((this->disable_varying_packing && !is_varying_packing_safe(type, var)) ||
|
||||
(this->disable_xfb_packing && var->data.is_xfb) ||
|
||||
var->data.must_be_shader_input) {
|
||||
unsigned slots = type->count_attribute_slots(false);
|
||||
this->matches[this->num_matches].num_components = slots * 4;
|
||||
|
@ -1890,19 +1918,29 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
|
|||
* When packing is disabled the sort orders varyings used by transform
|
||||
* feedback first, but also depends on *undefined behaviour* of qsort to
|
||||
* reverse the order of the varyings. See: xfb_comparator().
|
||||
*
|
||||
* If packing is only disabled for xfb varyings (mutually exclusive with
|
||||
* disable_varying_packing), we then group varyings depending on if they
|
||||
* are captured for transform feedback. The same *undefined behaviour* is
|
||||
* taken advantage of.
|
||||
*/
|
||||
if (!this->disable_varying_packing) {
|
||||
/* Sort varying matches into an order that makes them easy to pack. */
|
||||
qsort(this->matches, this->num_matches, sizeof(*this->matches),
|
||||
&varying_matches::match_comparator);
|
||||
} else {
|
||||
if (this->disable_varying_packing) {
|
||||
/* Only sort varyings that are only used by transform feedback. */
|
||||
qsort(this->matches, this->num_matches, sizeof(*this->matches),
|
||||
&varying_matches::xfb_comparator);
|
||||
} else if (this->disable_xfb_packing) {
|
||||
/* Only sort varyings that are NOT used by transform feedback. */
|
||||
qsort(this->matches, this->num_matches, sizeof(*this->matches),
|
||||
&varying_matches::not_xfb_comparator);
|
||||
} else {
|
||||
/* Sort varying matches into an order that makes them easy to pack. */
|
||||
qsort(this->matches, this->num_matches, sizeof(*this->matches),
|
||||
&varying_matches::match_comparator);
|
||||
}
|
||||
|
||||
unsigned generic_location = 0;
|
||||
unsigned generic_patch_location = MAX_VARYING*4;
|
||||
bool previous_var_xfb = false;
|
||||
bool previous_var_xfb_only = false;
|
||||
unsigned previous_packing_class = ~0u;
|
||||
|
||||
|
@ -1939,6 +1977,9 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
|
|||
* class than the previous one, and we're not already on a slot
|
||||
* boundary.
|
||||
*
|
||||
* Also advance if varying packing is disabled for transform feedback,
|
||||
* and previous or current varying is used for transform feedback.
|
||||
*
|
||||
* Also advance to the next slot if packing is disabled. This makes sure
|
||||
* we don't assign varyings the same locations which is possible
|
||||
* because we still pack individual arrays, records and matrices even
|
||||
|
@ -1947,6 +1988,8 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
|
|||
* feedback.
|
||||
*/
|
||||
if (var->data.must_be_shader_input ||
|
||||
(this->disable_xfb_packing &&
|
||||
(previous_var_xfb || var->data.is_xfb)) ||
|
||||
(this->disable_varying_packing &&
|
||||
!(previous_var_xfb_only && var->data.is_xfb_only)) ||
|
||||
(previous_packing_class != this->matches[i].packing_class) ||
|
||||
|
@ -1955,6 +1998,7 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
|
|||
*location = ALIGN(*location, 4);
|
||||
}
|
||||
|
||||
previous_var_xfb = var->data.is_xfb;
|
||||
previous_var_xfb_only = var->data.is_xfb_only;
|
||||
previous_packing_class = this->matches[i].packing_class;
|
||||
|
||||
|
@ -2051,7 +2095,7 @@ varying_matches::store_locations() const
|
|||
const glsl_type *type =
|
||||
get_varying_type(producer_var, producer_stage);
|
||||
if (type->is_array() || type->is_matrix() || type->is_struct() ||
|
||||
type->is_double()) {
|
||||
type->is_64bit()) {
|
||||
unsigned comp_slots = type->component_slots() + offset;
|
||||
unsigned slots = comp_slots / 4;
|
||||
if (comp_slots % 4)
|
||||
|
@ -2211,6 +2255,32 @@ varying_matches::xfb_comparator(const void *x_generic, const void *y_generic)
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Comparison function passed to qsort() to sort varyings NOT used by
|
||||
* transform feedback when packing of xfb varyings is disabled.
|
||||
*/
|
||||
int
|
||||
varying_matches::not_xfb_comparator(const void *x_generic, const void *y_generic)
|
||||
{
|
||||
const match *x = (const match *) x_generic;
|
||||
|
||||
if (x->producer_var != NULL && !x->producer_var->data.is_xfb)
|
||||
return match_comparator(x_generic, y_generic);
|
||||
|
||||
/* FIXME: When the comparator returns 0 it means the elements being
|
||||
* compared are equivalent. However the qsort documentation says:
|
||||
*
|
||||
* "The order of equivalent elements is undefined."
|
||||
*
|
||||
* In practice the sort ends up reversing the order of the varyings which
|
||||
* means locations are also assigned in this reversed order and happens to
|
||||
* be what we want. This is also whats happening in
|
||||
* varying_matches::match_comparator().
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Is the given variable a varying variable to be counted against the
|
||||
* limit in ctx->Const.MaxVarying?
|
||||
|
@ -2558,11 +2628,17 @@ assign_varying_locations(struct gl_context *ctx,
|
|||
|
||||
/* Transform feedback code assumes varying arrays are packed, so if the
|
||||
* driver has disabled varying packing, make sure to at least enable
|
||||
* packing required by transform feedback.
|
||||
* packing required by transform feedback. See below for exception.
|
||||
*/
|
||||
bool xfb_enabled =
|
||||
ctx->Extensions.EXT_transform_feedback && !unpackable_tess;
|
||||
|
||||
/* Some drivers actually requires packing to be explicitly disabled
|
||||
* for varyings used by transform feedback.
|
||||
*/
|
||||
bool disable_xfb_packing =
|
||||
ctx->Const.DisableTransformFeedbackPacking;
|
||||
|
||||
/* Disable packing on outward facing interfaces for SSO because in ES we
|
||||
* need to retain the unpacked varying information for draw time
|
||||
* validation.
|
||||
|
@ -2577,7 +2653,9 @@ assign_varying_locations(struct gl_context *ctx,
|
|||
if (prog->SeparateShader && (producer == NULL || consumer == NULL))
|
||||
disable_varying_packing = true;
|
||||
|
||||
varying_matches matches(disable_varying_packing, xfb_enabled,
|
||||
varying_matches matches(disable_varying_packing,
|
||||
disable_xfb_packing,
|
||||
xfb_enabled,
|
||||
ctx->Extensions.ARB_enhanced_layouts,
|
||||
producer ? producer->Stage : MESA_SHADER_NONE,
|
||||
consumer ? consumer->Stage : MESA_SHADER_NONE);
|
||||
|
@ -2716,6 +2794,52 @@ assign_varying_locations(struct gl_context *ctx,
|
|||
return false;
|
||||
}
|
||||
|
||||
/* There are two situations where a new output varying is needed:
|
||||
*
|
||||
* - If varying packing is disabled for xfb and the current declaration
|
||||
* is not aligned within the top level varying (e.g. vec3_arr[1]).
|
||||
*
|
||||
* - If a builtin variable needs to be copied to a new variable
|
||||
* before its content is modified by another lowering pass (e.g.
|
||||
* \c gl_Position is transformed by \c nir_lower_viewport_transform).
|
||||
*/
|
||||
const unsigned dmul =
|
||||
matched_candidate->type->without_array()->is_64bit() ? 2 : 1;
|
||||
const bool lowered =
|
||||
(disable_xfb_packing &&
|
||||
!tfeedback_decls[i].is_aligned(dmul, matched_candidate->offset)) ||
|
||||
(matched_candidate->toplevel_var->data.explicit_location &&
|
||||
matched_candidate->toplevel_var->data.location < VARYING_SLOT_VAR0 &&
|
||||
(ctx->Const.ShaderCompilerOptions[producer->Stage].LowerBuiltinVariablesXfb &
|
||||
BITFIELD_BIT(matched_candidate->toplevel_var->data.location)));
|
||||
|
||||
if (lowered) {
|
||||
ir_variable *new_var;
|
||||
tfeedback_candidate *new_candidate = NULL;
|
||||
|
||||
new_var = lower_xfb_varying(mem_ctx, producer, tfeedback_decls[i].name());
|
||||
if (new_var == NULL) {
|
||||
ralloc_free(hash_table_ctx);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Create new candidate and replace matched_candidate */
|
||||
new_candidate = rzalloc(mem_ctx, tfeedback_candidate);
|
||||
new_candidate->toplevel_var = new_var;
|
||||
new_candidate->toplevel_var->data.is_unmatched_generic_inout = 1;
|
||||
new_candidate->type = new_var->type;
|
||||
new_candidate->offset = 0;
|
||||
_mesa_hash_table_insert(tfeedback_candidates,
|
||||
ralloc_strdup(mem_ctx, new_var->name),
|
||||
new_candidate);
|
||||
|
||||
tfeedback_decls[i].set_lowered_candidate(new_candidate);
|
||||
matched_candidate = new_candidate;
|
||||
}
|
||||
|
||||
/* Mark as xfb varying */
|
||||
matched_candidate->toplevel_var->data.is_xfb = 1;
|
||||
|
||||
/* Mark xfb varyings as always active */
|
||||
matched_candidate->toplevel_var->data.always_active_io = 1;
|
||||
|
||||
|
@ -2732,8 +2856,10 @@ assign_varying_locations(struct gl_context *ctx,
|
|||
consumer_inputs,
|
||||
consumer_interface_inputs,
|
||||
consumer_inputs_with_locations);
|
||||
if (input_var)
|
||||
if (input_var) {
|
||||
input_var->data.is_xfb = 1;
|
||||
input_var->data.always_active_io = 1;
|
||||
}
|
||||
|
||||
if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) {
|
||||
matched_candidate->toplevel_var->data.is_xfb_only = 1;
|
||||
|
@ -2804,13 +2930,13 @@ assign_varying_locations(struct gl_context *ctx,
|
|||
if (producer) {
|
||||
lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_out,
|
||||
0, producer, disable_varying_packing,
|
||||
xfb_enabled);
|
||||
disable_xfb_packing, xfb_enabled);
|
||||
}
|
||||
|
||||
if (consumer) {
|
||||
lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_in,
|
||||
consumer_vertices, consumer,
|
||||
disable_varying_packing, xfb_enabled);
|
||||
consumer_vertices, consumer, disable_varying_packing,
|
||||
disable_xfb_packing, xfb_enabled);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -104,6 +104,7 @@ public:
|
|||
const void *mem_ctx) const;
|
||||
const tfeedback_candidate *find_candidate(gl_shader_program *prog,
|
||||
hash_table *tfeedback_candidates);
|
||||
void set_lowered_candidate(const tfeedback_candidate *candidate);
|
||||
|
||||
bool is_next_buffer_separator() const
|
||||
{
|
||||
|
@ -123,6 +124,11 @@ public:
|
|||
return !this->next_buffer_separator && !this->skip_components;
|
||||
}
|
||||
|
||||
bool is_aligned(unsigned dmul, unsigned offset) const
|
||||
{
|
||||
return (dmul * (this->array_subscript + offset)) % 4 == 0;
|
||||
}
|
||||
|
||||
const char *name() const
|
||||
{
|
||||
return this->orig_name;
|
||||
|
|
|
@ -86,7 +86,7 @@
|
|||
#include "util/u_string.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
#include "main/imports.h"
|
||||
|
||||
#include "main/shaderobj.h"
|
||||
#include "main/enums.h"
|
||||
#include "main/mtypes.h"
|
||||
|
@ -260,6 +260,8 @@ public:
|
|||
|
||||
class array_resize_visitor : public deref_type_updater {
|
||||
public:
|
||||
using deref_type_updater::visit;
|
||||
|
||||
unsigned num_vertices;
|
||||
gl_shader_program *prog;
|
||||
gl_shader_stage stage;
|
||||
|
@ -1511,6 +1513,8 @@ move_non_declarations(exec_list *instructions, exec_node *last,
|
|||
*/
|
||||
class array_sizing_visitor : public deref_type_updater {
|
||||
public:
|
||||
using deref_type_updater::visit;
|
||||
|
||||
array_sizing_visitor()
|
||||
: mem_ctx(ralloc_context(NULL)),
|
||||
unnamed_interfaces(_mesa_pointer_hash_table_create(NULL))
|
||||
|
@ -1817,6 +1821,40 @@ link_bindless_layout_qualifiers(struct gl_shader_program *prog,
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check for conflicting viewport_relative settings across shaders, and sets
|
||||
* the value for the linked shader.
|
||||
*/
|
||||
static void
|
||||
link_layer_viewport_relative_qualifier(struct gl_shader_program *prog,
|
||||
struct gl_program *gl_prog,
|
||||
struct gl_shader **shader_list,
|
||||
unsigned num_shaders)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
/* Find first shader with explicit layer declaration */
|
||||
for (i = 0; i < num_shaders; i++) {
|
||||
if (shader_list[i]->redeclares_gl_layer) {
|
||||
gl_prog->info.layer_viewport_relative =
|
||||
shader_list[i]->layer_viewport_relative;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now make sure that each subsequent shader's explicit layer declaration
|
||||
* matches the first one's.
|
||||
*/
|
||||
for (; i < num_shaders; i++) {
|
||||
if (shader_list[i]->redeclares_gl_layer &&
|
||||
shader_list[i]->layer_viewport_relative !=
|
||||
gl_prog->info.layer_viewport_relative) {
|
||||
linker_error(prog, "all gl_Layer redeclarations must have identical "
|
||||
"viewport_relative settings");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs the cross-validation of tessellation control shader vertices and
|
||||
* layout qualifiers for the attached tessellation control shaders,
|
||||
|
@ -2434,9 +2472,7 @@ link_intrastage_shaders(void *mem_ctx,
|
|||
|
||||
/* Create program and attach it to the linked shader */
|
||||
struct gl_program *gl_prog =
|
||||
ctx->Driver.NewProgram(ctx,
|
||||
_mesa_shader_stage_to_program(shader_list[0]->Stage),
|
||||
prog->Name, false);
|
||||
ctx->Driver.NewProgram(ctx, shader_list[0]->Stage, prog->Name, false);
|
||||
if (!gl_prog) {
|
||||
prog->data->LinkStatus = LINKING_FAILURE;
|
||||
_mesa_delete_linked_shader(ctx, linked);
|
||||
|
@ -2462,6 +2498,8 @@ link_intrastage_shaders(void *mem_ctx,
|
|||
|
||||
link_bindless_layout_qualifiers(prog, shader_list, num_shaders);
|
||||
|
||||
link_layer_viewport_relative_qualifier(prog, gl_prog, shader_list, num_shaders);
|
||||
|
||||
populate_symbol_table(linked, shader_list[0]->symbols);
|
||||
|
||||
/* The pointer to the main function in the final linked shader (i.e., the
|
||||
|
@ -4406,12 +4444,13 @@ link_and_validate_uniforms(struct gl_context *ctx,
|
|||
struct gl_shader_program *prog)
|
||||
{
|
||||
update_array_sizes(prog);
|
||||
link_assign_uniform_locations(prog, ctx);
|
||||
|
||||
if (prog->data->LinkStatus == LINKING_FAILURE)
|
||||
return;
|
||||
|
||||
if (!ctx->Const.UseNIRGLSLLinker) {
|
||||
link_assign_uniform_locations(prog, ctx);
|
||||
|
||||
if (prog->data->LinkStatus == LINKING_FAILURE)
|
||||
return;
|
||||
|
||||
link_util_calculate_subroutine_compat(prog);
|
||||
link_util_check_uniform_resources(ctx, prog);
|
||||
link_util_check_subroutine_resources(prog);
|
||||
|
|
|
@ -287,3 +287,90 @@ link_util_calculate_subroutine_compat(struct gl_shader_program *prog)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursive part of the public mark_array_elements_referenced function.
|
||||
*
|
||||
* The recursion occurs when an entire array-of- is accessed. See the
|
||||
* implementation for more details.
|
||||
*
|
||||
* \param dr List of array_deref_range elements to be
|
||||
* processed.
|
||||
* \param count Number of array_deref_range elements to be
|
||||
* processed.
|
||||
* \param scale Current offset scale.
|
||||
* \param linearized_index Current accumulated linearized array index.
|
||||
*/
|
||||
void
|
||||
_mark_array_elements_referenced(const struct array_deref_range *dr,
|
||||
unsigned count, unsigned scale,
|
||||
unsigned linearized_index,
|
||||
BITSET_WORD *bits)
|
||||
{
|
||||
/* Walk through the list of array dereferences in least- to
|
||||
* most-significant order. Along the way, accumulate the current
|
||||
* linearized offset and the scale factor for each array-of-.
|
||||
*/
|
||||
for (unsigned i = 0; i < count; i++) {
|
||||
if (dr[i].index < dr[i].size) {
|
||||
linearized_index += dr[i].index * scale;
|
||||
scale *= dr[i].size;
|
||||
} else {
|
||||
/* For each element in the current array, update the count and
|
||||
* offset, then recurse to process the remaining arrays.
|
||||
*
|
||||
* There is some inefficency here if the last eBITSET_WORD *bitslement in the
|
||||
* array_deref_range list specifies the entire array. In that case,
|
||||
* the loop will make recursive calls with count == 0. In the call,
|
||||
* all that will happen is the bit will be set.
|
||||
*/
|
||||
for (unsigned j = 0; j < dr[i].size; j++) {
|
||||
_mark_array_elements_referenced(&dr[i + 1],
|
||||
count - (i + 1),
|
||||
scale * dr[i].size,
|
||||
linearized_index + (j * scale),
|
||||
bits);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
BITSET_SET(bits, linearized_index);
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark a set of array elements as accessed.
|
||||
*
|
||||
* If every \c array_deref_range is for a single index, only a single
|
||||
* element will be marked. If any \c array_deref_range is for an entire
|
||||
* array-of-, then multiple elements will be marked.
|
||||
*
|
||||
* Items in the \c array_deref_range list appear in least- to
|
||||
* most-significant order. This is the \b opposite order the indices
|
||||
* appear in the GLSL shader text. An array access like
|
||||
*
|
||||
* x = y[1][i][3];
|
||||
*
|
||||
* would appear as
|
||||
*
|
||||
* { { 3, n }, { m, m }, { 1, p } }
|
||||
*
|
||||
* where n, m, and p are the sizes of the arrays-of-arrays.
|
||||
*
|
||||
* The set of marked array elements can later be queried by
|
||||
* \c ::is_linearized_index_referenced.
|
||||
*
|
||||
* \param dr List of array_deref_range elements to be processed.
|
||||
* \param count Number of array_deref_range elements to be processed.
|
||||
*/
|
||||
void
|
||||
link_util_mark_array_elements_referenced(const struct array_deref_range *dr,
|
||||
unsigned count, unsigned array_depth,
|
||||
BITSET_WORD *bits)
|
||||
{
|
||||
if (count != array_depth)
|
||||
return;
|
||||
|
||||
_mark_array_elements_referenced(dr, count, 1, 0, bits);
|
||||
}
|
||||
|
|
|
@ -24,6 +24,8 @@
|
|||
#ifndef GLSL_LINKER_UTIL_H
|
||||
#define GLSL_LINKER_UTIL_H
|
||||
|
||||
#include "util/bitset.h"
|
||||
|
||||
struct gl_context;
|
||||
struct gl_shader_program;
|
||||
struct gl_uniform_storage;
|
||||
|
@ -45,6 +47,23 @@ struct empty_uniform_block {
|
|||
unsigned slots;
|
||||
};
|
||||
|
||||
/**
|
||||
* Describes an access of an array element or an access of the whole array
|
||||
*/
|
||||
struct array_deref_range {
|
||||
/**
|
||||
* Index that was accessed.
|
||||
*
|
||||
* All valid array indices are less than the size of the array. If index
|
||||
* is equal to the size of the array, this means the entire array has been
|
||||
* accessed (e.g., due to use of a non-constant index).
|
||||
*/
|
||||
unsigned index;
|
||||
|
||||
/** Size of the array. Used for offset calculations. */
|
||||
unsigned size;
|
||||
};
|
||||
|
||||
void
|
||||
linker_error(struct gl_shader_program *prog, const char *fmt, ...);
|
||||
|
||||
|
@ -81,6 +100,11 @@ link_util_check_uniform_resources(struct gl_context *ctx,
|
|||
void
|
||||
link_util_calculate_subroutine_compat(struct gl_shader_program *prog);
|
||||
|
||||
void
|
||||
link_util_mark_array_elements_referenced(const struct array_deref_range *dr,
|
||||
unsigned count, unsigned array_depth,
|
||||
BITSET_WORD *bits);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -281,7 +281,7 @@ inline bool exec_node::is_head_sentinel() const
|
|||
* \param field Name of the field in \c type that is the embedded \c exec_node
|
||||
*/
|
||||
#define exec_node_data(type, node, field) \
|
||||
((type *) (((char *) node) - exec_list_offsetof(type, field, node)))
|
||||
((type *) (((uintptr_t) node) - exec_list_offsetof(type, field, node)))
|
||||
|
||||
#ifdef __cplusplus
|
||||
struct exec_node;
|
||||
|
@ -679,36 +679,44 @@ inline void exec_node::insert_before(exec_list *before)
|
|||
}
|
||||
#endif
|
||||
|
||||
#define foreach_in_list(__type, __inst, __list) \
|
||||
for (__type *__inst = (__type *)(__list)->head_sentinel.next; \
|
||||
!(__inst)->is_tail_sentinel(); \
|
||||
(__inst) = (__type *)(__inst)->next)
|
||||
#define exec_node_typed_forward(__node, __type) \
|
||||
(!exec_node_is_tail_sentinel(__node) ? (__type) (__node) : NULL)
|
||||
|
||||
#define foreach_in_list_reverse(__type, __inst, __list) \
|
||||
for (__type *__inst = (__type *)(__list)->tail_sentinel.prev; \
|
||||
!(__inst)->is_head_sentinel(); \
|
||||
(__inst) = (__type *)(__inst)->prev)
|
||||
#define exec_node_typed_backward(__node, __type) \
|
||||
(!exec_node_is_head_sentinel(__node) ? (__type) (__node) : NULL)
|
||||
|
||||
#define foreach_in_list(__type, __inst, __list) \
|
||||
for (__type *__inst = exec_node_typed_forward((__list)->head_sentinel.next, __type *); \
|
||||
(__inst) != NULL; \
|
||||
(__inst) = exec_node_typed_forward((__inst)->next, __type *))
|
||||
|
||||
#define foreach_in_list_reverse(__type, __inst, __list) \
|
||||
for (__type *__inst = exec_node_typed_backward((__list)->tail_sentinel.prev, __type *); \
|
||||
(__inst) != NULL; \
|
||||
(__inst) = exec_node_typed_backward((__inst)->prev, __type *))
|
||||
|
||||
/**
|
||||
* This version is safe even if the current node is removed.
|
||||
*/
|
||||
#define foreach_in_list_safe(__type, __node, __list) \
|
||||
for (__type *__node = (__type *)(__list)->head_sentinel.next, \
|
||||
*__next = (__type *)__node->next; \
|
||||
__next != NULL; \
|
||||
__node = __next, __next = (__type *)__next->next)
|
||||
*/
|
||||
|
||||
#define foreach_in_list_reverse_safe(__type, __node, __list) \
|
||||
for (__type *__node = (__type *)(__list)->tail_sentinel.prev, \
|
||||
*__prev = (__type *)__node->prev; \
|
||||
__prev != NULL; \
|
||||
__node = __prev, __prev = (__type *)__prev->prev)
|
||||
#define foreach_in_list_safe(__type, __node, __list) \
|
||||
for (__type *__node = exec_node_typed_forward((__list)->head_sentinel.next, __type *), \
|
||||
*__next = (__node) ? exec_node_typed_forward((__list)->head_sentinel.next->next, __type *) : NULL; \
|
||||
(__node) != NULL; \
|
||||
(__node) = __next, __next = __next ? exec_node_typed_forward(__next->next, __type *) : NULL)
|
||||
|
||||
#define foreach_in_list_reverse_safe(__type, __node, __list) \
|
||||
for (__type *__node = exec_node_typed_backward((__list)->tail_sentinel.prev, __type *), \
|
||||
*__prev = (__node) ? exec_node_typed_backward((__list)->tail_sentinel.prev->prev, __type *) : NULL; \
|
||||
(__node) != NULL; \
|
||||
(__node) = __prev, __prev = __prev ? exec_node_typed_backward(__prev->prev, __type *) : NULL)
|
||||
|
||||
#define foreach_in_list_use_after(__type, __inst, __list) \
|
||||
__type *__inst; \
|
||||
for ((__inst) = exec_node_typed_forward((__list)->head_sentinel.next, __type *); \
|
||||
(__inst) != NULL; \
|
||||
(__inst) = exec_node_typed_forward((__inst)->next, __type *))
|
||||
|
||||
#define foreach_in_list_use_after(__type, __inst, __list) \
|
||||
__type *__inst; \
|
||||
for ((__inst) = (__type *)(__list)->head_sentinel.next; \
|
||||
!(__inst)->is_tail_sentinel(); \
|
||||
(__inst) = (__type *)(__inst)->next)
|
||||
/**
|
||||
* Iterate through two lists at once. Stops at the end of the shorter list.
|
||||
*
|
||||
|
@ -725,39 +733,45 @@ inline void exec_node::insert_before(exec_list *before)
|
|||
__next1 = __next1->next, \
|
||||
__next2 = __next2->next)
|
||||
|
||||
#define foreach_list_typed(__type, __node, __field, __list) \
|
||||
for (__type * __node = \
|
||||
exec_node_data(__type, (__list)->head_sentinel.next, __field); \
|
||||
(__node)->__field.next != NULL; \
|
||||
(__node) = exec_node_data(__type, (__node)->__field.next, __field))
|
||||
#define exec_node_data_forward(type, node, field) \
|
||||
(!exec_node_is_tail_sentinel(node) ? exec_node_data(type, node, field) : NULL)
|
||||
|
||||
#define foreach_list_typed_from(__type, __node, __field, __list, __start) \
|
||||
for (__type * __node = exec_node_data(__type, (__start), __field); \
|
||||
(__node)->__field.next != NULL; \
|
||||
(__node) = exec_node_data(__type, (__node)->__field.next, __field))
|
||||
#define exec_node_data_backward(type, node, field) \
|
||||
(!exec_node_is_head_sentinel(node) ? exec_node_data(type, node, field) : NULL)
|
||||
|
||||
#define foreach_list_typed_reverse(__type, __node, __field, __list) \
|
||||
for (__type * __node = \
|
||||
exec_node_data(__type, (__list)->tail_sentinel.prev, __field); \
|
||||
(__node)->__field.prev != NULL; \
|
||||
(__node) = exec_node_data(__type, (__node)->__field.prev, __field))
|
||||
#define foreach_list_typed(__type, __node, __field, __list) \
|
||||
for (__type * __node = \
|
||||
exec_node_data_forward(__type, (__list)->head_sentinel.next, __field); \
|
||||
(__node) != NULL; \
|
||||
(__node) = exec_node_data_forward(__type, (__node)->__field.next, __field))
|
||||
|
||||
#define foreach_list_typed_safe(__type, __node, __field, __list) \
|
||||
for (__type * __node = \
|
||||
exec_node_data(__type, (__list)->head_sentinel.next, __field), \
|
||||
* __next = \
|
||||
exec_node_data(__type, (__node)->__field.next, __field); \
|
||||
(__node)->__field.next != NULL; \
|
||||
__node = __next, __next = \
|
||||
exec_node_data(__type, (__next)->__field.next, __field))
|
||||
#define foreach_list_typed_from(__type, __node, __field, __list, __start) \
|
||||
for (__type * __node = exec_node_data_forward(__type, (__start), __field); \
|
||||
(__node) != NULL; \
|
||||
(__node) = exec_node_data_forward(__type, (__node)->__field.next, __field))
|
||||
|
||||
#define foreach_list_typed_reverse_safe(__type, __node, __field, __list) \
|
||||
for (__type * __node = \
|
||||
exec_node_data(__type, (__list)->tail_sentinel.prev, __field), \
|
||||
* __prev = \
|
||||
exec_node_data(__type, (__node)->__field.prev, __field); \
|
||||
(__node)->__field.prev != NULL; \
|
||||
__node = __prev, __prev = \
|
||||
exec_node_data(__type, (__prev)->__field.prev, __field))
|
||||
#define foreach_list_typed_reverse(__type, __node, __field, __list) \
|
||||
for (__type * __node = \
|
||||
exec_node_data_backward(__type, (__list)->tail_sentinel.prev, __field); \
|
||||
(__node) != NULL; \
|
||||
(__node) = exec_node_data_backward(__type, (__node)->__field.prev, __field))
|
||||
|
||||
#define foreach_list_typed_safe(__type, __node, __field, __list) \
|
||||
for (__type * __node = \
|
||||
exec_node_data_forward(__type, (__list)->head_sentinel.next, __field), \
|
||||
* __next = (__node) ? \
|
||||
exec_node_data_forward(__type, (__node)->__field.next, __field) : NULL; \
|
||||
(__node) != NULL; \
|
||||
(__node) = __next, __next = (__next && (__next)->__field.next) ? \
|
||||
exec_node_data_forward(__type, (__next)->__field.next, __field) : NULL)
|
||||
|
||||
#define foreach_list_typed_reverse_safe(__type, __node, __field, __list) \
|
||||
for (__type * __node = \
|
||||
exec_node_data_backward(__type, (__list)->tail_sentinel.prev, __field), \
|
||||
* __prev = (__node) ? \
|
||||
exec_node_data_backward(__type, (__node)->__field.prev, __field) : NULL; \
|
||||
(__node) != NULL; \
|
||||
(__node) = __prev, __prev = (__prev && (__prev)->__field.prev) ? \
|
||||
exec_node_data_backward(__type, (__prev)->__field.prev, __field) : NULL)
|
||||
|
||||
#endif /* LIST_CONTAINER_H */
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2010 Intel Corporation
|
||||
* Copyright © 2019 Google, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
|
@ -22,50 +22,43 @@
|
|||
*/
|
||||
|
||||
/**
|
||||
* \file lower_noise.cpp
|
||||
* IR lower pass to remove noise opcodes.
|
||||
* \file lower_builtins.cpp
|
||||
*
|
||||
* \author Ian Romanick <ian.d.romanick@intel.com>
|
||||
* Inline calls to builtin functions.
|
||||
*/
|
||||
|
||||
#include "ir.h"
|
||||
#include "ir_rvalue_visitor.h"
|
||||
#include "ir_optimization.h"
|
||||
|
||||
class lower_noise_visitor : public ir_rvalue_visitor {
|
||||
namespace {
|
||||
|
||||
class lower_builtins_visitor : public ir_hierarchical_visitor {
|
||||
public:
|
||||
lower_noise_visitor() : progress(false)
|
||||
{
|
||||
/* empty */
|
||||
}
|
||||
|
||||
void handle_rvalue(ir_rvalue **rvalue)
|
||||
{
|
||||
if (!*rvalue)
|
||||
return;
|
||||
|
||||
ir_expression *expr = (*rvalue)->as_expression();
|
||||
if (!expr)
|
||||
return;
|
||||
|
||||
/* In the future, ir_unop_noise may be replaced by a call to a function
|
||||
* that implements noise. No hardware has a noise instruction.
|
||||
*/
|
||||
if (expr->operation == ir_unop_noise) {
|
||||
*rvalue = ir_constant::zero(ralloc_parent(expr), expr->type);
|
||||
this->progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
lower_builtins_visitor() : progress(false) { }
|
||||
ir_visitor_status visit_leave(ir_call *);
|
||||
bool progress;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
bool
|
||||
lower_noise(exec_list *instructions)
|
||||
lower_builtins(exec_list *instructions)
|
||||
{
|
||||
lower_noise_visitor v;
|
||||
|
||||
lower_builtins_visitor v;
|
||||
visit_list_elements(&v, instructions);
|
||||
|
||||
return v.progress;
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
lower_builtins_visitor::visit_leave(ir_call *ir)
|
||||
{
|
||||
if (!ir->callee->is_builtin())
|
||||
return visit_continue;
|
||||
|
||||
ir->generate_inline(ir);
|
||||
ir->remove();
|
||||
|
||||
this->progress = true;
|
||||
|
||||
return visit_continue;
|
||||
}
|
|
@ -63,7 +63,8 @@
|
|||
* reciprocal. By breaking the operation down, constant reciprocals
|
||||
* can get constant folded.
|
||||
*
|
||||
* FDIV_TO_MUL_RCP only lowers single-precision floating point division;
|
||||
* FDIV_TO_MUL_RCP lowers single-precision and half-precision
|
||||
* floating point division;
|
||||
* DDIV_TO_MUL_RCP only lowers double-precision floating point division.
|
||||
* DIV_TO_MUL_RCP is a convenience macro that sets both flags.
|
||||
* INT_DIV_TO_MUL_RCP handles the integer case, converting to and from floating
|
||||
|
@ -123,6 +124,7 @@
|
|||
#include "ir.h"
|
||||
#include "ir_builder.h"
|
||||
#include "ir_optimization.h"
|
||||
#include "util/half_float.h"
|
||||
|
||||
using namespace ir_builder;
|
||||
|
||||
|
@ -172,6 +174,11 @@ private:
|
|||
void mul64_to_mul_and_mul_high(ir_expression *ir);
|
||||
|
||||
ir_expression *_carry(operand a, operand b);
|
||||
|
||||
static ir_constant *_imm_fp(void *mem_ctx,
|
||||
const glsl_type *type,
|
||||
double f,
|
||||
unsigned vector_elements=1);
|
||||
};
|
||||
|
||||
} /* anonymous namespace */
|
||||
|
@ -203,7 +210,7 @@ lower_instructions_visitor::sub_to_add_neg(ir_expression *ir)
|
|||
void
|
||||
lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
|
||||
{
|
||||
assert(ir->operands[1]->type->is_float() || ir->operands[1]->type->is_double());
|
||||
assert(ir->operands[1]->type->is_float_16_32_64());
|
||||
|
||||
/* New expression for the 1.0 / op1 */
|
||||
ir_rvalue *expr;
|
||||
|
@ -273,7 +280,7 @@ lower_instructions_visitor::int_div_to_mul_rcp(ir_expression *ir)
|
|||
void
|
||||
lower_instructions_visitor::exp_to_exp2(ir_expression *ir)
|
||||
{
|
||||
ir_constant *log2_e = new(ir) ir_constant(float(M_LOG2E));
|
||||
ir_constant *log2_e = _imm_fp(ir, ir->type, M_LOG2E);
|
||||
|
||||
ir->operation = ir_unop_exp2;
|
||||
ir->init_num_operands();
|
||||
|
@ -304,7 +311,7 @@ lower_instructions_visitor::log_to_log2(ir_expression *ir)
|
|||
ir->init_num_operands();
|
||||
ir->operands[0] = new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
|
||||
ir->operands[0], NULL);
|
||||
ir->operands[1] = new(ir) ir_constant(float(1.0 / M_LOG2E));
|
||||
ir->operands[1] = _imm_fp(ir, ir->operands[0]->type, 1.0 / M_LOG2E);
|
||||
this->progress = true;
|
||||
}
|
||||
|
||||
|
@ -336,7 +343,7 @@ lower_instructions_visitor::mod_to_floor(ir_expression *ir)
|
|||
/* Don't generate new IR that would need to be lowered in an additional
|
||||
* pass.
|
||||
*/
|
||||
if ((lowering(FDIV_TO_MUL_RCP) && ir->type->is_float()) ||
|
||||
if ((lowering(FDIV_TO_MUL_RCP) && ir->type->is_float_16_32()) ||
|
||||
(lowering(DDIV_TO_MUL_RCP) && ir->type->is_double()))
|
||||
div_to_mul_rcp(div_expr);
|
||||
|
||||
|
@ -837,10 +844,11 @@ lower_instructions_visitor::sat_to_clamp(ir_expression *ir)
|
|||
|
||||
ir->operation = ir_binop_min;
|
||||
ir->init_num_operands();
|
||||
|
||||
ir_constant *zero = _imm_fp(ir, ir->operands[0]->type, 0.0);
|
||||
ir->operands[0] = new(ir) ir_expression(ir_binop_max, ir->operands[0]->type,
|
||||
ir->operands[0],
|
||||
new(ir) ir_constant(0.0f));
|
||||
ir->operands[1] = new(ir) ir_constant(1.0f);
|
||||
ir->operands[0], zero);
|
||||
ir->operands[1] = _imm_fp(ir, ir->operands[0]->type, 1.0);
|
||||
|
||||
this->progress = true;
|
||||
}
|
||||
|
@ -1515,6 +1523,25 @@ lower_instructions_visitor::_carry(operand a, operand b)
|
|||
return carry(a, b);
|
||||
}
|
||||
|
||||
ir_constant *
|
||||
lower_instructions_visitor::_imm_fp(void *mem_ctx,
|
||||
const glsl_type *type,
|
||||
double f,
|
||||
unsigned vector_elements)
|
||||
{
|
||||
switch (type->base_type) {
|
||||
case GLSL_TYPE_FLOAT:
|
||||
return new(mem_ctx) ir_constant((float) f, vector_elements);
|
||||
case GLSL_TYPE_DOUBLE:
|
||||
return new(mem_ctx) ir_constant((double) f, vector_elements);
|
||||
case GLSL_TYPE_FLOAT16:
|
||||
return new(mem_ctx) ir_constant(float16_t(f), vector_elements);
|
||||
default:
|
||||
assert(!"unknown float type for immediate");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
lower_instructions_visitor::imul_high_to_mul(ir_expression *ir)
|
||||
{
|
||||
|
@ -1747,7 +1774,7 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
|
|||
case ir_binop_div:
|
||||
if (ir->operands[1]->type->is_integer_32() && lowering(INT_DIV_TO_MUL_RCP))
|
||||
int_div_to_mul_rcp(ir);
|
||||
else if ((ir->operands[1]->type->is_float() && lowering(FDIV_TO_MUL_RCP)) ||
|
||||
else if ((ir->operands[1]->type->is_float_16_32() && lowering(FDIV_TO_MUL_RCP)) ||
|
||||
(ir->operands[1]->type->is_double() && lowering(DDIV_TO_MUL_RCP)))
|
||||
div_to_mul_rcp(ir);
|
||||
break;
|
||||
|
@ -1763,7 +1790,7 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
|
|||
break;
|
||||
|
||||
case ir_binop_mod:
|
||||
if (lowering(MOD_TO_FLOOR) && (ir->type->is_float() || ir->type->is_double()))
|
||||
if (lowering(MOD_TO_FLOOR) && ir->type->is_float_16_32_64())
|
||||
mod_to_floor(ir);
|
||||
break;
|
||||
|
||||
|
|
|
@ -268,6 +268,8 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor {
|
|||
* contains the jump.
|
||||
*/
|
||||
|
||||
using ir_control_flow_visitor::visit;
|
||||
|
||||
bool progress;
|
||||
|
||||
struct function_record function;
|
||||
|
|
|
@ -360,6 +360,9 @@ ir_mat_op_to_vec_visitor::visit_leave(ir_assignment *orig_assign)
|
|||
switch (orig_expr->operation) {
|
||||
case ir_unop_d2f:
|
||||
case ir_unop_f2d:
|
||||
case ir_unop_f2f16:
|
||||
case ir_unop_f2fmp:
|
||||
case ir_unop_f162f:
|
||||
case ir_unop_neg: {
|
||||
/* Apply the operation to each column.*/
|
||||
for (i = 0; i < matrix_columns; i++) {
|
||||
|
|
|
@ -173,6 +173,7 @@ public:
|
|||
exec_list *out_instructions,
|
||||
exec_list *out_variables,
|
||||
bool disable_varying_packing,
|
||||
bool disable_xfb_packing,
|
||||
bool xfb_enabled);
|
||||
|
||||
void run(struct gl_linked_shader *shader);
|
||||
|
@ -240,6 +241,7 @@ private:
|
|||
exec_list *out_variables;
|
||||
|
||||
bool disable_varying_packing;
|
||||
bool disable_xfb_packing;
|
||||
bool xfb_enabled;
|
||||
};
|
||||
|
||||
|
@ -250,7 +252,7 @@ lower_packed_varyings_visitor::lower_packed_varyings_visitor(
|
|||
ir_variable_mode mode,
|
||||
unsigned gs_input_vertices, exec_list *out_instructions,
|
||||
exec_list *out_variables, bool disable_varying_packing,
|
||||
bool xfb_enabled)
|
||||
bool disable_xfb_packing, bool xfb_enabled)
|
||||
: mem_ctx(mem_ctx),
|
||||
locations_used(locations_used),
|
||||
components(components),
|
||||
|
@ -262,6 +264,7 @@ lower_packed_varyings_visitor::lower_packed_varyings_visitor(
|
|||
out_instructions(out_instructions),
|
||||
out_variables(out_variables),
|
||||
disable_varying_packing(disable_varying_packing),
|
||||
disable_xfb_packing(disable_xfb_packing),
|
||||
xfb_enabled(xfb_enabled)
|
||||
{
|
||||
}
|
||||
|
@ -769,12 +772,21 @@ lower_packed_varyings_visitor::needs_lowering(ir_variable *var)
|
|||
if (var->data.explicit_location || var->data.must_be_shader_input)
|
||||
return false;
|
||||
|
||||
const glsl_type *type = var->type;
|
||||
|
||||
/* Some drivers (e.g. panfrost) don't support packing of transform
|
||||
* feedback varyings.
|
||||
*/
|
||||
if (disable_xfb_packing && var->data.is_xfb &&
|
||||
!(type->is_array() || type->is_struct() || type->is_matrix()) &&
|
||||
xfb_enabled)
|
||||
return false;
|
||||
|
||||
/* Override disable_varying_packing if the var is only used by transform
|
||||
* feedback. Also override it if transform feedback is enabled and the
|
||||
* variable is an array, struct or matrix as the elements of these types
|
||||
* will always have the same interpolation and therefore are safe to pack.
|
||||
*/
|
||||
const glsl_type *type = var->type;
|
||||
if (disable_varying_packing && !var->data.is_xfb_only &&
|
||||
!((type->is_array() || type->is_struct() || type->is_matrix()) &&
|
||||
xfb_enabled))
|
||||
|
@ -874,7 +886,7 @@ lower_packed_varyings(void *mem_ctx, unsigned locations_used,
|
|||
const uint8_t *components,
|
||||
ir_variable_mode mode, unsigned gs_input_vertices,
|
||||
gl_linked_shader *shader, bool disable_varying_packing,
|
||||
bool xfb_enabled)
|
||||
bool disable_xfb_packing, bool xfb_enabled)
|
||||
{
|
||||
exec_list *instructions = shader->ir;
|
||||
ir_function *main_func = shader->symbols->get_function("main");
|
||||
|
@ -890,6 +902,7 @@ lower_packed_varyings(void *mem_ctx, unsigned locations_used,
|
|||
&new_instructions,
|
||||
&new_variables,
|
||||
disable_varying_packing,
|
||||
disable_xfb_packing,
|
||||
xfb_enabled);
|
||||
visitor.run(shader);
|
||||
if (mode == ir_var_shader_out) {
|
||||
|
|
721
third_party/rust/glslopt/glsl-optimizer/src/compiler/glsl/lower_precision.cpp
поставляемый
Normal file
721
third_party/rust/glslopt/glsl-optimizer/src/compiler/glsl/lower_precision.cpp
поставляемый
Normal file
|
@ -0,0 +1,721 @@
|
|||
/*
|
||||
* Copyright © 2019 Google, Inc
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file lower_precision.cpp
|
||||
*/
|
||||
|
||||
#include "main/macros.h"
|
||||
#include "compiler/glsl_types.h"
|
||||
#include "ir.h"
|
||||
#include "ir_builder.h"
|
||||
#include "ir_optimization.h"
|
||||
#include "ir_rvalue_visitor.h"
|
||||
#include "util/half_float.h"
|
||||
#include "util/set.h"
|
||||
#include "util/hash_table.h"
|
||||
#include <vector>
|
||||
|
||||
namespace {
|
||||
|
||||
class find_precision_visitor : public ir_rvalue_enter_visitor {
|
||||
public:
|
||||
find_precision_visitor();
|
||||
~find_precision_visitor();
|
||||
|
||||
virtual void handle_rvalue(ir_rvalue **rvalue);
|
||||
virtual ir_visitor_status visit_enter(ir_call *ir);
|
||||
|
||||
ir_function_signature *map_builtin(ir_function_signature *sig);
|
||||
|
||||
bool progress;
|
||||
|
||||
/* Set of rvalues that can be lowered. This will be filled in by
|
||||
* find_lowerable_rvalues_visitor. Only the root node of a lowerable section
|
||||
* will be added to this set.
|
||||
*/
|
||||
struct set *lowerable_rvalues;
|
||||
|
||||
/**
|
||||
* A mapping of builtin signature functions to lowered versions. This is
|
||||
* filled in lazily when a lowered version is needed.
|
||||
*/
|
||||
struct hash_table *lowered_builtins;
|
||||
/**
|
||||
* A temporary hash table only used in order to clone functions.
|
||||
*/
|
||||
struct hash_table *clone_ht;
|
||||
|
||||
void *lowered_builtin_mem_ctx;
|
||||
};
|
||||
|
||||
class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor {
|
||||
public:
|
||||
enum can_lower_state {
|
||||
UNKNOWN,
|
||||
CANT_LOWER,
|
||||
SHOULD_LOWER,
|
||||
};
|
||||
|
||||
enum parent_relation {
|
||||
/* The parent performs a further operation involving the result from the
|
||||
* child and can be lowered along with it.
|
||||
*/
|
||||
COMBINED_OPERATION,
|
||||
/* The parent instruction’s operation is independent of the child type so
|
||||
* the child should be lowered separately.
|
||||
*/
|
||||
INDEPENDENT_OPERATION,
|
||||
};
|
||||
|
||||
struct stack_entry {
|
||||
ir_instruction *instr;
|
||||
enum can_lower_state state;
|
||||
/* List of child rvalues that can be lowered. When this stack entry is
|
||||
* popped, if this node itself can’t be lowered than all of the children
|
||||
* are root nodes to lower so we will add them to lowerable_rvalues.
|
||||
* Otherwise if this node can also be lowered then we won’t add the
|
||||
* children because we only want to add the topmost lowerable nodes to
|
||||
* lowerable_rvalues and the children will be lowered as part of lowering
|
||||
* this node.
|
||||
*/
|
||||
std::vector<ir_instruction *> lowerable_children;
|
||||
};
|
||||
|
||||
find_lowerable_rvalues_visitor(struct set *result);
|
||||
|
||||
static void stack_enter(class ir_instruction *ir, void *data);
|
||||
static void stack_leave(class ir_instruction *ir, void *data);
|
||||
|
||||
virtual ir_visitor_status visit(ir_constant *ir);
|
||||
virtual ir_visitor_status visit(ir_dereference_variable *ir);
|
||||
|
||||
virtual ir_visitor_status visit_enter(ir_dereference_record *ir);
|
||||
virtual ir_visitor_status visit_enter(ir_dereference_array *ir);
|
||||
virtual ir_visitor_status visit_enter(ir_texture *ir);
|
||||
virtual ir_visitor_status visit_enter(ir_expression *ir);
|
||||
|
||||
virtual ir_visitor_status visit_leave(ir_assignment *ir);
|
||||
virtual ir_visitor_status visit_leave(ir_call *ir);
|
||||
|
||||
static can_lower_state handle_precision(const glsl_type *type,
|
||||
int precision);
|
||||
|
||||
static parent_relation get_parent_relation(ir_instruction *parent,
|
||||
ir_instruction *child);
|
||||
|
||||
std::vector<stack_entry> stack;
|
||||
struct set *lowerable_rvalues;
|
||||
|
||||
void pop_stack_entry();
|
||||
void add_lowerable_children(const stack_entry &entry);
|
||||
};
|
||||
|
||||
class lower_precision_visitor : public ir_rvalue_visitor {
|
||||
public:
|
||||
virtual void handle_rvalue(ir_rvalue **rvalue);
|
||||
virtual ir_visitor_status visit_enter(ir_dereference_array *);
|
||||
virtual ir_visitor_status visit_enter(ir_dereference_record *);
|
||||
virtual ir_visitor_status visit_enter(ir_call *ir);
|
||||
virtual ir_visitor_status visit_enter(ir_texture *ir);
|
||||
virtual ir_visitor_status visit_leave(ir_expression *);
|
||||
};
|
||||
|
||||
bool
|
||||
can_lower_type(const glsl_type *type)
|
||||
{
|
||||
/* Don’t lower any expressions involving non-float types except bool and
|
||||
* texture samplers. This will rule out operations that change the type such
|
||||
* as conversion to ints. Instead it will end up lowering the arguments
|
||||
* instead and adding a final conversion to float32. We want to handle
|
||||
* boolean types so that it will do comparisons as 16-bit.
|
||||
*/
|
||||
|
||||
switch (type->base_type) {
|
||||
case GLSL_TYPE_FLOAT:
|
||||
case GLSL_TYPE_BOOL:
|
||||
case GLSL_TYPE_SAMPLER:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res)
|
||||
{
|
||||
lowerable_rvalues = res;
|
||||
callback_enter = stack_enter;
|
||||
callback_leave = stack_leave;
|
||||
data_enter = this;
|
||||
data_leave = this;
|
||||
}
|
||||
|
||||
void
|
||||
find_lowerable_rvalues_visitor::stack_enter(class ir_instruction *ir,
|
||||
void *data)
|
||||
{
|
||||
find_lowerable_rvalues_visitor *state =
|
||||
(find_lowerable_rvalues_visitor *) data;
|
||||
|
||||
/* Add a new stack entry for this instruction */
|
||||
stack_entry entry;
|
||||
|
||||
entry.instr = ir;
|
||||
entry.state = state->in_assignee ? CANT_LOWER : UNKNOWN;
|
||||
|
||||
state->stack.push_back(entry);
|
||||
}
|
||||
|
||||
void
|
||||
find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry &entry)
|
||||
{
|
||||
/* We can’t lower this node so if there were any pending children then they
|
||||
* are all root lowerable nodes and we should add them to the set.
|
||||
*/
|
||||
for (auto &it : entry.lowerable_children)
|
||||
_mesa_set_add(lowerable_rvalues, it);
|
||||
}
|
||||
|
||||
void
|
||||
find_lowerable_rvalues_visitor::pop_stack_entry()
|
||||
{
|
||||
const stack_entry &entry = stack.back();
|
||||
|
||||
if (stack.size() >= 2) {
|
||||
/* Combine this state into the parent state, unless the parent operation
|
||||
* doesn’t have any relation to the child operations
|
||||
*/
|
||||
stack_entry &parent = stack.end()[-2];
|
||||
parent_relation rel = get_parent_relation(parent.instr, entry.instr);
|
||||
|
||||
if (rel == COMBINED_OPERATION) {
|
||||
switch (entry.state) {
|
||||
case CANT_LOWER:
|
||||
parent.state = CANT_LOWER;
|
||||
break;
|
||||
case SHOULD_LOWER:
|
||||
if (parent.state == UNKNOWN)
|
||||
parent.state = SHOULD_LOWER;
|
||||
break;
|
||||
case UNKNOWN:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (entry.state == SHOULD_LOWER) {
|
||||
ir_rvalue *rv = entry.instr->as_rvalue();
|
||||
|
||||
if (rv == NULL) {
|
||||
add_lowerable_children(entry);
|
||||
} else if (stack.size() >= 2) {
|
||||
stack_entry &parent = stack.end()[-2];
|
||||
|
||||
switch (get_parent_relation(parent.instr, rv)) {
|
||||
case COMBINED_OPERATION:
|
||||
/* We only want to add the toplevel lowerable instructions to the
|
||||
* lowerable set. Therefore if there is a parent then instead of
|
||||
* adding this instruction to the set we will queue depending on
|
||||
* the result of the parent instruction.
|
||||
*/
|
||||
parent.lowerable_children.push_back(entry.instr);
|
||||
break;
|
||||
case INDEPENDENT_OPERATION:
|
||||
_mesa_set_add(lowerable_rvalues, rv);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* This is a toplevel node so add it directly to the lowerable
|
||||
* set.
|
||||
*/
|
||||
_mesa_set_add(lowerable_rvalues, rv);
|
||||
}
|
||||
} else if (entry.state == CANT_LOWER) {
|
||||
add_lowerable_children(entry);
|
||||
}
|
||||
|
||||
stack.pop_back();
|
||||
}
|
||||
|
||||
void
|
||||
find_lowerable_rvalues_visitor::stack_leave(class ir_instruction *ir,
|
||||
void *data)
|
||||
{
|
||||
find_lowerable_rvalues_visitor *state =
|
||||
(find_lowerable_rvalues_visitor *) data;
|
||||
|
||||
state->pop_stack_entry();
|
||||
}
|
||||
|
||||
enum find_lowerable_rvalues_visitor::can_lower_state
|
||||
find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type,
|
||||
int precision)
|
||||
{
|
||||
if (!can_lower_type(type))
|
||||
return CANT_LOWER;
|
||||
|
||||
switch (precision) {
|
||||
case GLSL_PRECISION_NONE:
|
||||
return UNKNOWN;
|
||||
case GLSL_PRECISION_HIGH:
|
||||
return CANT_LOWER;
|
||||
case GLSL_PRECISION_MEDIUM:
|
||||
case GLSL_PRECISION_LOW:
|
||||
return SHOULD_LOWER;
|
||||
}
|
||||
|
||||
return CANT_LOWER;
|
||||
}
|
||||
|
||||
enum find_lowerable_rvalues_visitor::parent_relation
|
||||
find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction *parent,
|
||||
ir_instruction *child)
|
||||
{
|
||||
/* If the parent is a dereference instruction then the only child could be
|
||||
* for example an array dereference and that should be lowered independently
|
||||
* of the parent.
|
||||
*/
|
||||
if (parent->as_dereference())
|
||||
return INDEPENDENT_OPERATION;
|
||||
|
||||
/* The precision of texture sampling depend on the precision of the sampler.
|
||||
* The rest of the arguments don’t matter so we can treat it as an
|
||||
* independent operation.
|
||||
*/
|
||||
if (parent->as_texture())
|
||||
return INDEPENDENT_OPERATION;
|
||||
|
||||
return COMBINED_OPERATION;
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
find_lowerable_rvalues_visitor::visit(ir_constant *ir)
|
||||
{
|
||||
stack_enter(ir, this);
|
||||
|
||||
if (!can_lower_type(ir->type))
|
||||
stack.back().state = CANT_LOWER;
|
||||
|
||||
stack_leave(ir, this);
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
find_lowerable_rvalues_visitor::visit(ir_dereference_variable *ir)
|
||||
{
|
||||
stack_enter(ir, this);
|
||||
|
||||
if (stack.back().state == UNKNOWN)
|
||||
stack.back().state = handle_precision(ir->type, ir->precision());
|
||||
|
||||
stack_leave(ir, this);
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record *ir)
|
||||
{
|
||||
ir_hierarchical_visitor::visit_enter(ir);
|
||||
|
||||
if (stack.back().state == UNKNOWN)
|
||||
stack.back().state = handle_precision(ir->type, ir->precision());
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array *ir)
|
||||
{
|
||||
ir_hierarchical_visitor::visit_enter(ir);
|
||||
|
||||
if (stack.back().state == UNKNOWN)
|
||||
stack.back().state = handle_precision(ir->type, ir->precision());
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
find_lowerable_rvalues_visitor::visit_enter(ir_texture *ir)
|
||||
{
|
||||
ir_hierarchical_visitor::visit_enter(ir);
|
||||
|
||||
if (stack.back().state == UNKNOWN) {
|
||||
/* The precision of the sample value depends on the precision of the
|
||||
* sampler.
|
||||
*/
|
||||
stack.back().state = handle_precision(ir->type,
|
||||
ir->sampler->precision());
|
||||
}
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir)
|
||||
{
|
||||
ir_hierarchical_visitor::visit_enter(ir);
|
||||
|
||||
if (!can_lower_type(ir->type))
|
||||
stack.back().state = CANT_LOWER;
|
||||
|
||||
/* Don't lower precision for derivative calculations */
|
||||
if (ir->operation == ir_unop_dFdx ||
|
||||
ir->operation == ir_unop_dFdx_coarse ||
|
||||
ir->operation == ir_unop_dFdx_fine ||
|
||||
ir->operation == ir_unop_dFdy ||
|
||||
ir->operation == ir_unop_dFdy_coarse ||
|
||||
ir->operation == ir_unop_dFdy_fine) {
|
||||
stack.back().state = CANT_LOWER;
|
||||
}
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_lowerable_builtin(ir_call *ir,
|
||||
const struct set *lowerable_rvalues)
|
||||
{
|
||||
if (!ir->callee->is_builtin())
|
||||
return false;
|
||||
|
||||
assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
|
||||
|
||||
foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
|
||||
if (!param->as_constant() &&
|
||||
_mesa_set_search(lowerable_rvalues, param) == NULL)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
find_lowerable_rvalues_visitor::visit_leave(ir_call *ir)
|
||||
{
|
||||
ir_hierarchical_visitor::visit_leave(ir);
|
||||
|
||||
/* Special case for handling temporary variables generated by the compiler
|
||||
* for function calls. If we assign to one of these using a function call
|
||||
* that has a lowerable return type then we can assume the temporary
|
||||
* variable should have a medium precision too.
|
||||
*/
|
||||
|
||||
/* Do nothing if the return type is void. */
|
||||
if (!ir->return_deref)
|
||||
return visit_continue;
|
||||
|
||||
ir_variable *var = ir->return_deref->variable_referenced();
|
||||
|
||||
assert(var->data.mode == ir_var_temporary);
|
||||
|
||||
unsigned return_precision = ir->callee->return_precision;
|
||||
|
||||
/* If the call is to a builtin, then the function won’t have a return
|
||||
* precision and we should determine it from the precision of the arguments.
|
||||
*/
|
||||
if (is_lowerable_builtin(ir, lowerable_rvalues))
|
||||
return_precision = GLSL_PRECISION_MEDIUM;
|
||||
|
||||
can_lower_state lower_state =
|
||||
handle_precision(var->type, return_precision);
|
||||
|
||||
if (lower_state == SHOULD_LOWER) {
|
||||
/* There probably shouldn’t be any situations where multiple ir_call
|
||||
* instructions write to the same temporary?
|
||||
*/
|
||||
assert(var->data.precision == GLSL_PRECISION_NONE);
|
||||
var->data.precision = GLSL_PRECISION_MEDIUM;
|
||||
} else {
|
||||
var->data.precision = GLSL_PRECISION_HIGH;
|
||||
}
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
find_lowerable_rvalues_visitor::visit_leave(ir_assignment *ir)
|
||||
{
|
||||
ir_hierarchical_visitor::visit_leave(ir);
|
||||
|
||||
/* Special case for handling temporary variables generated by the compiler.
|
||||
* If we assign to one of these using a lowered precision then we can assume
|
||||
* the temporary variable should have a medium precision too.
|
||||
*/
|
||||
ir_variable *var = ir->lhs->variable_referenced();
|
||||
|
||||
if (var->data.mode == ir_var_temporary) {
|
||||
if (_mesa_set_search(lowerable_rvalues, ir->rhs)) {
|
||||
/* Only override the precision if this is the first assignment. For
|
||||
* temporaries such as the ones generated for the ?: operator there
|
||||
* can be multiple assignments with different precisions. This way we
|
||||
* get the highest precision of all of the assignments.
|
||||
*/
|
||||
if (var->data.precision == GLSL_PRECISION_NONE)
|
||||
var->data.precision = GLSL_PRECISION_MEDIUM;
|
||||
} else if (!ir->rhs->as_constant()) {
|
||||
var->data.precision = GLSL_PRECISION_HIGH;
|
||||
}
|
||||
}
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
void
|
||||
find_lowerable_rvalues(exec_list *instructions,
|
||||
struct set *result)
|
||||
{
|
||||
find_lowerable_rvalues_visitor v(result);
|
||||
|
||||
visit_list_elements(&v, instructions);
|
||||
|
||||
assert(v.stack.empty());
|
||||
}
|
||||
|
||||
static ir_rvalue *
|
||||
convert_precision(int op, ir_rvalue *ir)
|
||||
{
|
||||
unsigned base_type = (op == ir_unop_f2fmp ?
|
||||
GLSL_TYPE_FLOAT16 : GLSL_TYPE_FLOAT);
|
||||
const glsl_type *desired_type;
|
||||
desired_type = glsl_type::get_instance(base_type,
|
||||
ir->type->vector_elements,
|
||||
ir->type->matrix_columns);
|
||||
|
||||
void *mem_ctx = ralloc_parent(ir);
|
||||
return new(mem_ctx) ir_expression(op, desired_type, ir, NULL);
|
||||
}
|
||||
|
||||
void
|
||||
lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
|
||||
{
|
||||
ir_rvalue *ir = *rvalue;
|
||||
|
||||
if (ir == NULL)
|
||||
return;
|
||||
|
||||
if (ir->as_dereference()) {
|
||||
if (!ir->type->is_boolean())
|
||||
*rvalue = convert_precision(ir_unop_f2fmp, ir);
|
||||
} else if (ir->type->is_float()) {
|
||||
ir->type = glsl_type::get_instance(GLSL_TYPE_FLOAT16,
|
||||
ir->type->vector_elements,
|
||||
ir->type->matrix_columns,
|
||||
ir->type->explicit_stride,
|
||||
ir->type->interface_row_major);
|
||||
|
||||
ir_constant *const_ir = ir->as_constant();
|
||||
|
||||
if (const_ir) {
|
||||
ir_constant_data value;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
|
||||
value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]);
|
||||
|
||||
const_ir->value = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
lower_precision_visitor::visit_enter(ir_dereference_record *ir)
|
||||
{
|
||||
/* We don’t want to lower the variable */
|
||||
return visit_continue_with_parent;
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
lower_precision_visitor::visit_enter(ir_dereference_array *ir)
|
||||
{
|
||||
/* We don’t want to convert the array index or the variable. If the array
|
||||
* index itself is lowerable that will be handled separately.
|
||||
*/
|
||||
return visit_continue_with_parent;
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
lower_precision_visitor::visit_enter(ir_call *ir)
|
||||
{
|
||||
/* We don’t want to convert the arguments. These will be handled separately.
|
||||
*/
|
||||
return visit_continue_with_parent;
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
lower_precision_visitor::visit_enter(ir_texture *ir)
|
||||
{
|
||||
/* We don’t want to convert the arguments. These will be handled separately.
|
||||
*/
|
||||
return visit_continue_with_parent;
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
lower_precision_visitor::visit_leave(ir_expression *ir)
|
||||
{
|
||||
ir_rvalue_visitor::visit_leave(ir);
|
||||
|
||||
/* If the expression is a conversion operation to or from bool then fix the
|
||||
* operation.
|
||||
*/
|
||||
switch (ir->operation) {
|
||||
case ir_unop_b2f:
|
||||
ir->operation = ir_unop_b2f16;
|
||||
break;
|
||||
case ir_unop_f2b:
|
||||
ir->operation = ir_unop_f162b;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
void
|
||||
find_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
|
||||
{
|
||||
/* Checking the precision of rvalue can be lowered first throughout
|
||||
* find_lowerable_rvalues_visitor.
|
||||
* Once it found the precision of rvalue can be lowered, then we can
|
||||
* add conversion f2fmp through lower_precision_visitor.
|
||||
*/
|
||||
if (*rvalue == NULL)
|
||||
return;
|
||||
|
||||
struct set_entry *entry = _mesa_set_search(lowerable_rvalues, *rvalue);
|
||||
|
||||
if (!entry)
|
||||
return;
|
||||
|
||||
_mesa_set_remove(lowerable_rvalues, entry);
|
||||
|
||||
/* If the entire expression is just a variable dereference then trying to
|
||||
* lower it will just directly add pointless to and from conversions without
|
||||
* any actual operation in-between. Although these will eventually get
|
||||
* optimised out, avoiding generating them here also avoids breaking inout
|
||||
* parameters to functions.
|
||||
*/
|
||||
if ((*rvalue)->as_dereference())
|
||||
return;
|
||||
|
||||
lower_precision_visitor v;
|
||||
|
||||
(*rvalue)->accept(&v);
|
||||
v.handle_rvalue(rvalue);
|
||||
|
||||
/* We don’t need to add the final conversion if the final type has been
|
||||
* converted to bool
|
||||
*/
|
||||
if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL)
|
||||
*rvalue = convert_precision(ir_unop_f162f, *rvalue);
|
||||
|
||||
progress = true;
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
find_precision_visitor::visit_enter(ir_call *ir)
|
||||
{
|
||||
ir_rvalue_enter_visitor::visit_enter(ir);
|
||||
|
||||
/* If this is a call to a builtin and the find_lowerable_rvalues_visitor
|
||||
* overrode the precision of the temporary return variable, then we can
|
||||
* replace the builtin implementation with a lowered version.
|
||||
*/
|
||||
|
||||
if (!ir->callee->is_builtin() ||
|
||||
ir->return_deref == NULL ||
|
||||
ir->return_deref->variable_referenced()->data.precision !=
|
||||
GLSL_PRECISION_MEDIUM)
|
||||
return visit_continue;
|
||||
|
||||
ir->callee = map_builtin(ir->callee);
|
||||
ir->generate_inline(ir);
|
||||
ir->remove();
|
||||
|
||||
return visit_continue_with_parent;
|
||||
}
|
||||
|
||||
ir_function_signature *
|
||||
find_precision_visitor::map_builtin(ir_function_signature *sig)
|
||||
{
|
||||
if (lowered_builtins == NULL) {
|
||||
lowered_builtins = _mesa_pointer_hash_table_create(NULL);
|
||||
clone_ht =_mesa_pointer_hash_table_create(NULL);
|
||||
lowered_builtin_mem_ctx = ralloc_context(NULL);
|
||||
} else {
|
||||
struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig);
|
||||
if (entry)
|
||||
return (ir_function_signature *) entry->data;
|
||||
}
|
||||
|
||||
ir_function_signature *lowered_sig =
|
||||
sig->clone(lowered_builtin_mem_ctx, clone_ht);
|
||||
|
||||
foreach_in_list(ir_variable, param, &lowered_sig->parameters) {
|
||||
param->data.precision = GLSL_PRECISION_MEDIUM;
|
||||
}
|
||||
|
||||
lower_precision(&lowered_sig->body);
|
||||
|
||||
_mesa_hash_table_clear(clone_ht, NULL);
|
||||
|
||||
_mesa_hash_table_insert(lowered_builtins, sig, lowered_sig);
|
||||
|
||||
return lowered_sig;
|
||||
}
|
||||
|
||||
find_precision_visitor::find_precision_visitor()
|
||||
: progress(false),
|
||||
lowerable_rvalues(_mesa_pointer_set_create(NULL)),
|
||||
lowered_builtins(NULL),
|
||||
clone_ht(NULL),
|
||||
lowered_builtin_mem_ctx(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
find_precision_visitor::~find_precision_visitor()
|
||||
{
|
||||
_mesa_set_destroy(lowerable_rvalues, NULL);
|
||||
|
||||
if (lowered_builtins) {
|
||||
_mesa_hash_table_destroy(lowered_builtins, NULL);
|
||||
_mesa_hash_table_destroy(clone_ht, NULL);
|
||||
ralloc_free(lowered_builtin_mem_ctx);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool
|
||||
lower_precision(exec_list *instructions)
|
||||
{
|
||||
find_precision_visitor v;
|
||||
|
||||
find_lowerable_rvalues(instructions, v.lowerable_rvalues);
|
||||
|
||||
visit_list_elements(&v, instructions);
|
||||
|
||||
return v.progress;
|
||||
}
|
222
third_party/rust/glslopt/glsl-optimizer/src/compiler/glsl/lower_xfb_varying.cpp
поставляемый
Normal file
222
third_party/rust/glslopt/glsl-optimizer/src/compiler/glsl/lower_xfb_varying.cpp
поставляемый
Normal file
|
@ -0,0 +1,222 @@
|
|||
/*
|
||||
* Copyright ©2019 Collabora Ltd.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file lower_xfb_varying.cpp
|
||||
*
|
||||
*/
|
||||
|
||||
#include "ir.h"
|
||||
#include "main/mtypes.h"
|
||||
#include "glsl_symbol_table.h"
|
||||
#include "util/strndup.h"
|
||||
|
||||
namespace {
|
||||
|
||||
/**
|
||||
* Visitor that splices varying packing code before every return.
|
||||
*/
|
||||
class lower_xfb_var_splicer : public ir_hierarchical_visitor
|
||||
{
|
||||
public:
|
||||
explicit lower_xfb_var_splicer(void *mem_ctx,
|
||||
const exec_list *instructions);
|
||||
|
||||
virtual ir_visitor_status visit_leave(ir_return *ret);
|
||||
virtual ir_visitor_status visit_leave(ir_function_signature *sig);
|
||||
|
||||
private:
|
||||
/**
|
||||
* Memory context used to allocate new instructions for the shader.
|
||||
*/
|
||||
void * const mem_ctx;
|
||||
|
||||
/**
|
||||
* Instructions that should be spliced into place before each return.
|
||||
*/
|
||||
const exec_list *instructions;
|
||||
};
|
||||
|
||||
} /* anonymous namespace */
|
||||
|
||||
|
||||
lower_xfb_var_splicer::lower_xfb_var_splicer(void *mem_ctx, const exec_list *instructions)
|
||||
: mem_ctx(mem_ctx), instructions(instructions)
|
||||
{
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
lower_xfb_var_splicer::visit_leave(ir_return *ret)
|
||||
{
|
||||
foreach_in_list(ir_instruction, ir, this->instructions) {
|
||||
ret->insert_before(ir->clone(this->mem_ctx, NULL));
|
||||
}
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
/** Insert a copy-back assignment at the end of the main() function */
|
||||
ir_visitor_status
|
||||
lower_xfb_var_splicer::visit_leave(ir_function_signature *sig)
|
||||
{
|
||||
if (strcmp(sig->function_name(), "main") != 0)
|
||||
return visit_continue;
|
||||
|
||||
if (((ir_instruction*)sig->body.get_tail())->ir_type == ir_type_return)
|
||||
return visit_continue;
|
||||
|
||||
foreach_in_list(ir_instruction, ir, this->instructions) {
|
||||
sig->body.push_tail(ir->clone(this->mem_ctx, NULL));
|
||||
}
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
static char*
|
||||
get_field_name(const char *name)
|
||||
{
|
||||
const char *first_dot = strchr(name, '.');
|
||||
const char *first_square_bracket = strchr(name, '[');
|
||||
int name_size = 0;
|
||||
|
||||
if (!first_square_bracket && !first_dot)
|
||||
name_size = strlen(name);
|
||||
else if ((!first_square_bracket ||
|
||||
(first_dot && first_dot < first_square_bracket)))
|
||||
name_size = first_dot - name;
|
||||
else
|
||||
name_size = first_square_bracket - name;
|
||||
|
||||
return strndup(name, name_size);
|
||||
}
|
||||
|
||||
/* Generate a new name given the old xfb declaration string by replacing dots
|
||||
* with '_', brackets with '@' and appending "-xfb" */
|
||||
static char *
|
||||
generate_new_name(void *mem_ctx, const char *name)
|
||||
{
|
||||
char *new_name;
|
||||
unsigned i = 0;
|
||||
|
||||
new_name = ralloc_strdup(mem_ctx, name);
|
||||
while (new_name[i]) {
|
||||
if (new_name[i] == '.') {
|
||||
new_name[i] = '_';
|
||||
} else if (new_name[i] == '[' || new_name[i] == ']') {
|
||||
new_name[i] = '@';
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
if (!ralloc_strcat(&new_name, "-xfb")) {
|
||||
ralloc_free(new_name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return new_name;
|
||||
}
|
||||
|
||||
/* Get the dereference for the given variable name. The method is called
|
||||
* recursively to parse array indices and struct members. */
|
||||
static bool
|
||||
get_deref(void *ctx,
|
||||
const char *name,
|
||||
struct gl_linked_shader *shader,
|
||||
ir_dereference **deref,
|
||||
const glsl_type **type)
|
||||
{
|
||||
if (name[0] == '\0') {
|
||||
/* End */
|
||||
return (*deref != NULL);
|
||||
} else if (name[0] == '[') {
|
||||
/* Array index */
|
||||
char *endptr = NULL;
|
||||
unsigned index;
|
||||
|
||||
index = strtol(name + 1, &endptr, 10);
|
||||
assert(*type != NULL && (*type)->is_array() && endptr[0] == ']');
|
||||
*deref = new(ctx) ir_dereference_array(*deref, new(ctx) ir_constant(index));
|
||||
*type = (*type)->without_array();
|
||||
return get_deref(ctx, endptr + 1, shader, deref, type);
|
||||
} else if (name[0] == '.') {
|
||||
/* Struct member */
|
||||
char *field = get_field_name(name + 1);
|
||||
|
||||
assert(*type != NULL && (*type)->is_struct() && field != NULL);
|
||||
*deref = new(ctx) ir_dereference_record(*deref, field);
|
||||
*type = (*type)->field_type(field);
|
||||
assert(*type != glsl_type::error_type);
|
||||
name += 1 + strlen(field);
|
||||
free(field);
|
||||
return get_deref(ctx, name, shader, deref, type);
|
||||
} else {
|
||||
/* Top level variable */
|
||||
char *field = get_field_name(name);
|
||||
ir_variable *toplevel_var;
|
||||
|
||||
toplevel_var = shader->symbols->get_variable(field);
|
||||
name += strlen(field);
|
||||
free(field);
|
||||
if (toplevel_var == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
*deref = new (ctx) ir_dereference_variable(toplevel_var);
|
||||
*type = toplevel_var->type;
|
||||
return get_deref(ctx, name, shader, deref, type);
|
||||
}
|
||||
}
|
||||
|
||||
ir_variable *
|
||||
lower_xfb_varying(void *mem_ctx,
|
||||
struct gl_linked_shader *shader,
|
||||
const char *old_var_name)
|
||||
{
|
||||
exec_list new_instructions;
|
||||
char *new_var_name;
|
||||
ir_dereference *deref = NULL;
|
||||
const glsl_type *type = NULL;
|
||||
|
||||
if (!get_deref(mem_ctx, old_var_name, shader, &deref, &type)) {
|
||||
if (deref) {
|
||||
delete deref;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
new_var_name = generate_new_name(mem_ctx, old_var_name);
|
||||
ir_variable *new_variable
|
||||
= new(mem_ctx) ir_variable(type, new_var_name, ir_var_shader_out);
|
||||
new_variable->data.assigned = true;
|
||||
new_variable->data.used = true;
|
||||
shader->ir->push_head(new_variable);
|
||||
ralloc_free(new_var_name);
|
||||
|
||||
ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(new_variable);
|
||||
ir_assignment *new_assignment = new(mem_ctx) ir_assignment(lhs, deref);
|
||||
new_instructions.push_tail(new_assignment);
|
||||
|
||||
lower_xfb_var_splicer splicer(mem_ctx, &new_instructions);
|
||||
visit_list_elements(&splicer, shader->ir);
|
||||
|
||||
return new_variable;
|
||||
}
|
|
@ -46,6 +46,7 @@ const struct option compiler_opts[] = {
|
|||
{ "dump-builder", no_argument, &options.dump_builder, 1 },
|
||||
{ "link", no_argument, &options.do_link, 1 },
|
||||
{ "just-log", no_argument, &options.just_log, 1 },
|
||||
{ "lower-precision", no_argument, &options.lower_precision, 1 },
|
||||
{ "version", required_argument, NULL, 'v' },
|
||||
{ NULL, 0, NULL, 0 }
|
||||
};
|
||||
|
|
|
@ -578,7 +578,8 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
|
|||
ir_rvalue *y_operand = inner_add->operands[1 - neg_pos];
|
||||
ir_rvalue *a_operand = mul->operands[1 - inner_add_pos];
|
||||
|
||||
if (x_operand->type != y_operand->type ||
|
||||
if (!x_operand->type->is_float_16_32_64() ||
|
||||
x_operand->type != y_operand->type ||
|
||||
x_operand->type != a_operand->type)
|
||||
continue;
|
||||
|
||||
|
@ -983,6 +984,9 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
|
|||
ir_constant *one;
|
||||
|
||||
switch (ir->type->base_type) {
|
||||
case GLSL_TYPE_FLOAT16:
|
||||
one = new(mem_ctx) ir_constant(float16_t::one(), op2_components);
|
||||
break;
|
||||
case GLSL_TYPE_FLOAT:
|
||||
one = new(mem_ctx) ir_constant(1.0f, op2_components);
|
||||
break;
|
||||
|
|
|
@ -208,6 +208,9 @@ ir_constant_propagation_visitor::constant_propagation(ir_rvalue **rvalue) {
|
|||
case GLSL_TYPE_FLOAT:
|
||||
data.f[i] = found->constant->value.f[rhs_channel];
|
||||
break;
|
||||
case GLSL_TYPE_FLOAT16:
|
||||
data.f16[i] = found->constant->value.f16[rhs_channel];
|
||||
break;
|
||||
case GLSL_TYPE_DOUBLE:
|
||||
data.d[i] = found->constant->value.d[rhs_channel];
|
||||
break;
|
||||
|
|
|
@ -49,6 +49,9 @@ struct assignment_entry {
|
|||
|
||||
class ir_constant_variable_visitor : public ir_hierarchical_visitor {
|
||||
public:
|
||||
using ir_hierarchical_visitor::visit;
|
||||
using ir_hierarchical_visitor::visit_enter;
|
||||
|
||||
virtual ir_visitor_status visit_enter(ir_dereference_variable *);
|
||||
virtual ir_visitor_status visit(ir_variable *);
|
||||
virtual ir_visitor_status visit_enter(ir_assignment *);
|
||||
|
@ -162,6 +165,15 @@ ir_constant_variable_visitor::visit_enter(ir_call *ir)
|
|||
entry = get_assignment_entry(var, this->ht);
|
||||
entry->assignment_count++;
|
||||
}
|
||||
|
||||
/* We don't know if the variable passed to this function has been
|
||||
* assigned a value or if it is undefined, so for now we always assume
|
||||
* it has been assigned a value. Once functions have been inlined any
|
||||
* further potential optimisations will be taken care of.
|
||||
*/
|
||||
struct assignment_entry *entry;
|
||||
entry = get_assignment_entry(param, this->ht);
|
||||
entry->assignment_count++;
|
||||
}
|
||||
|
||||
/* Mark the return storage as having been assigned to */
|
||||
|
|
|
@ -66,6 +66,8 @@ public:
|
|||
|
||||
class kill_for_derefs_visitor : public ir_hierarchical_visitor {
|
||||
public:
|
||||
using ir_hierarchical_visitor::visit;
|
||||
|
||||
kill_for_derefs_visitor(exec_list *assignments)
|
||||
{
|
||||
this->assignments = assignments;
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
#include "program/prog_instruction.h"
|
||||
#include "compiler/glsl_types.h"
|
||||
#include "main/macros.h"
|
||||
#include "util/half_float.h"
|
||||
|
||||
using namespace ir_builder;
|
||||
|
||||
|
@ -125,6 +126,17 @@ compare_components(ir_constant *a, ir_constant *b)
|
|||
else
|
||||
foundequal = true;
|
||||
break;
|
||||
case GLSL_TYPE_FLOAT16: {
|
||||
float af = _mesa_half_to_float(a->value.f16[c0]);
|
||||
float bf = _mesa_half_to_float(b->value.f16[c1]);
|
||||
if (af < bf)
|
||||
foundless = true;
|
||||
else if (af > bf)
|
||||
foundgreater = true;
|
||||
else
|
||||
foundequal = true;
|
||||
break;
|
||||
}
|
||||
case GLSL_TYPE_FLOAT:
|
||||
if (a->value.f[c0] < b->value.f[c1])
|
||||
foundless = true;
|
||||
|
@ -181,6 +193,13 @@ combine_constant(bool ismin, ir_constant *a, ir_constant *b)
|
|||
(!ismin && b->value.i[i] > c->value.i[i]))
|
||||
c->value.i[i] = b->value.i[i];
|
||||
break;
|
||||
case GLSL_TYPE_FLOAT16: {
|
||||
float bf = _mesa_half_to_float(b->value.f16[i]);
|
||||
float cf = _mesa_half_to_float(c->value.f16[i]);
|
||||
if ((ismin && bf < cf) || (!ismin && bf > cf))
|
||||
c->value.f16[i] = b->value.f16[i];
|
||||
break;
|
||||
}
|
||||
case GLSL_TYPE_FLOAT:
|
||||
if ((ismin && b->value.f[i] < c->value.f[i]) ||
|
||||
(!ismin && b->value.f[i] > c->value.f[i]))
|
||||
|
|
|
@ -1222,8 +1222,7 @@ create_linked_shader_and_program(struct gl_context *ctx,
|
|||
struct gl_linked_shader *linked = rzalloc(NULL, struct gl_linked_shader);
|
||||
linked->Stage = stage;
|
||||
|
||||
glprog = ctx->Driver.NewProgram(ctx, _mesa_shader_stage_to_program(stage),
|
||||
prog->Name, false);
|
||||
glprog = ctx->Driver.NewProgram(ctx, stage, prog->Name, false);
|
||||
glprog->info.stage = stage;
|
||||
linked->Program = glprog;
|
||||
|
||||
|
@ -1256,6 +1255,7 @@ serialize_glsl_program(struct blob *blob, struct gl_context *ctx,
|
|||
write_hash_tables(blob, prog);
|
||||
|
||||
blob_write_uint32(blob, prog->data->Version);
|
||||
blob_write_uint32(blob, prog->IsES);
|
||||
blob_write_uint32(blob, prog->data->linked_stages);
|
||||
|
||||
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
|
@ -1314,6 +1314,7 @@ deserialize_glsl_program(struct blob_reader *blob, struct gl_context *ctx,
|
|||
read_hash_tables(blob, prog);
|
||||
|
||||
prog->data->Version = blob_read_uint32(blob);
|
||||
prog->IsES = blob_read_uint32(blob);
|
||||
prog->data->linked_stages = blob_read_uint32(blob);
|
||||
|
||||
unsigned mask = prog->data->linked_stages;
|
||||
|
|
|
@ -99,33 +99,21 @@ private:
|
|||
};
|
||||
|
||||
static void
|
||||
init_gl_program(struct gl_program *prog, bool is_arb_asm, GLenum target)
|
||||
init_gl_program(struct gl_program *prog, bool is_arb_asm, gl_shader_stage stage)
|
||||
{
|
||||
prog->RefCount = 1;
|
||||
prog->Format = GL_PROGRAM_FORMAT_ASCII_ARB;
|
||||
prog->is_arb_asm = is_arb_asm;
|
||||
prog->info.stage = (gl_shader_stage)_mesa_program_enum_to_shader_stage(target);
|
||||
prog->info.stage = stage;
|
||||
}
|
||||
|
||||
static struct gl_program *
|
||||
new_program(UNUSED struct gl_context *ctx, GLenum target,
|
||||
new_program(UNUSED struct gl_context *ctx, gl_shader_stage stage,
|
||||
UNUSED GLuint id, bool is_arb_asm)
|
||||
{
|
||||
switch (target) {
|
||||
case GL_VERTEX_PROGRAM_ARB: /* == GL_VERTEX_PROGRAM_NV */
|
||||
case GL_GEOMETRY_PROGRAM_NV:
|
||||
case GL_TESS_CONTROL_PROGRAM_NV:
|
||||
case GL_TESS_EVALUATION_PROGRAM_NV:
|
||||
case GL_FRAGMENT_PROGRAM_ARB:
|
||||
case GL_COMPUTE_PROGRAM_NV: {
|
||||
struct gl_program *prog = rzalloc(NULL, struct gl_program);
|
||||
init_gl_program(prog, is_arb_asm, target);
|
||||
return prog;
|
||||
}
|
||||
default:
|
||||
printf("bad target in new_program\n");
|
||||
return NULL;
|
||||
}
|
||||
struct gl_program *prog = rzalloc(NULL, struct gl_program);
|
||||
init_gl_program(prog, is_arb_asm, stage);
|
||||
return prog;
|
||||
}
|
||||
|
||||
static const struct standalone_options *options;
|
||||
|
@ -446,6 +434,14 @@ standalone_compile_shader(const struct standalone_options *_options,
|
|||
initialize_context(ctx, options->glsl_version > 130 ? API_OPENGL_CORE : API_OPENGL_COMPAT);
|
||||
}
|
||||
|
||||
if (options->lower_precision) {
|
||||
for (unsigned i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
|
||||
struct gl_shader_compiler_options *options =
|
||||
&ctx->Const.ShaderCompilerOptions[i];
|
||||
options->LowerPrecision = true;
|
||||
}
|
||||
}
|
||||
|
||||
struct gl_shader_program *whole_program;
|
||||
|
||||
whole_program = rzalloc (NULL, struct gl_shader_program);
|
||||
|
|
|
@ -36,6 +36,7 @@ struct standalone_options {
|
|||
int dump_builder;
|
||||
int do_link;
|
||||
int just_log;
|
||||
int lower_precision;
|
||||
};
|
||||
|
||||
struct gl_shader_program;
|
||||
|
|
|
@ -203,6 +203,7 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)
|
|||
ctx->Extensions.ARB_shader_bit_encoding = true;
|
||||
ctx->Extensions.ARB_shader_draw_parameters = true;
|
||||
ctx->Extensions.ARB_shader_stencil_export = true;
|
||||
ctx->Extensions.ARB_shader_storage_buffer_object = true;
|
||||
ctx->Extensions.ARB_shader_texture_lod = true;
|
||||
ctx->Extensions.ARB_shading_language_420pack = true;
|
||||
ctx->Extensions.ARB_shading_language_packing = true;
|
||||
|
@ -265,6 +266,16 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)
|
|||
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxInputComponents = 0; /* not used */
|
||||
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxOutputComponents = 0; /* not used */
|
||||
|
||||
ctx->Const.MaxVertexStreams = 4;
|
||||
ctx->Const.MaxTransformFeedbackBuffers = 4;
|
||||
ctx->Const.MaxShaderStorageBufferBindings = 4;
|
||||
ctx->Const.MaxShaderStorageBlockSize = 4096;
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks = 8;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks = 8;
|
||||
|
||||
ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformBlocks = 12;
|
||||
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformBlocks = 12;
|
||||
|
||||
/* Set up default shader compiler options. */
|
||||
struct gl_shader_compiler_options options;
|
||||
memset(&options, 0, sizeof(options));
|
||||
|
|
|
@ -462,6 +462,16 @@ const glsl_type *glsl_type::get_bare_type() const
|
|||
unreachable("Invalid base type");
|
||||
}
|
||||
|
||||
const glsl_type *glsl_type::get_float16_type() const
|
||||
{
|
||||
assert(this->base_type == GLSL_TYPE_FLOAT);
|
||||
|
||||
return get_instance(GLSL_TYPE_FLOAT16,
|
||||
this->vector_elements,
|
||||
this->matrix_columns,
|
||||
this->explicit_stride,
|
||||
this->interface_row_major);
|
||||
}
|
||||
|
||||
static void
|
||||
hash_free_type_function(struct hash_entry *entry)
|
||||
|
@ -663,9 +673,11 @@ glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns,
|
|||
assert(((glsl_type *) entry->data)->matrix_columns == columns);
|
||||
assert(((glsl_type *) entry->data)->explicit_stride == explicit_stride);
|
||||
|
||||
const glsl_type *t = (const glsl_type *) entry->data;
|
||||
|
||||
mtx_unlock(&glsl_type::hash_mutex);
|
||||
|
||||
return (const glsl_type *) entry->data;
|
||||
return t;
|
||||
}
|
||||
|
||||
assert(!row_major);
|
||||
|
@ -1024,9 +1036,11 @@ glsl_type::get_array_instance(const glsl_type *base,
|
|||
assert(((glsl_type *) entry->data)->length == array_size);
|
||||
assert(((glsl_type *) entry->data)->fields.array == base);
|
||||
|
||||
glsl_type *t = (glsl_type *) entry->data;
|
||||
|
||||
mtx_unlock(&glsl_type::hash_mutex);
|
||||
|
||||
return (glsl_type *) entry->data;
|
||||
return t;
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -1225,9 +1239,11 @@ glsl_type::get_struct_instance(const glsl_struct_field *fields,
|
|||
assert(strcmp(((glsl_type *) entry->data)->name, name) == 0);
|
||||
assert(((glsl_type *) entry->data)->packed == packed);
|
||||
|
||||
glsl_type *t = (glsl_type *) entry->data;
|
||||
|
||||
mtx_unlock(&glsl_type::hash_mutex);
|
||||
|
||||
return (glsl_type *) entry->data;
|
||||
return t;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1261,9 +1277,11 @@ glsl_type::get_interface_instance(const glsl_struct_field *fields,
|
|||
assert(((glsl_type *) entry->data)->length == num_fields);
|
||||
assert(strcmp(((glsl_type *) entry->data)->name, block_name) == 0);
|
||||
|
||||
glsl_type *t = (glsl_type *) entry->data;
|
||||
|
||||
mtx_unlock(&glsl_type::hash_mutex);
|
||||
|
||||
return (glsl_type *) entry->data;
|
||||
return t;
|
||||
}
|
||||
|
||||
const glsl_type *
|
||||
|
@ -1290,9 +1308,11 @@ glsl_type::get_subroutine_instance(const char *subroutine_name)
|
|||
assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_SUBROUTINE);
|
||||
assert(strcmp(((glsl_type *) entry->data)->name, subroutine_name) == 0);
|
||||
|
||||
glsl_type *t = (glsl_type *) entry->data;
|
||||
|
||||
mtx_unlock(&glsl_type::hash_mutex);
|
||||
|
||||
return (glsl_type *) entry->data;
|
||||
return t;
|
||||
}
|
||||
|
||||
|
||||
|
@ -2572,29 +2592,8 @@ glsl_type::count_dword_slots(bool is_bindless) const
|
|||
int
|
||||
glsl_type::coordinate_components() const
|
||||
{
|
||||
int size;
|
||||
|
||||
switch (sampler_dimensionality) {
|
||||
case GLSL_SAMPLER_DIM_1D:
|
||||
case GLSL_SAMPLER_DIM_BUF:
|
||||
size = 1;
|
||||
break;
|
||||
case GLSL_SAMPLER_DIM_2D:
|
||||
case GLSL_SAMPLER_DIM_RECT:
|
||||
case GLSL_SAMPLER_DIM_MS:
|
||||
case GLSL_SAMPLER_DIM_EXTERNAL:
|
||||
case GLSL_SAMPLER_DIM_SUBPASS:
|
||||
size = 2;
|
||||
break;
|
||||
case GLSL_SAMPLER_DIM_3D:
|
||||
case GLSL_SAMPLER_DIM_CUBE:
|
||||
size = 3;
|
||||
break;
|
||||
default:
|
||||
assert(!"Should not get here.");
|
||||
size = 1;
|
||||
break;
|
||||
}
|
||||
enum glsl_sampler_dim dim = (enum glsl_sampler_dim)sampler_dimensionality;
|
||||
int size = glsl_get_sampler_dim_coordinate_components(dim);
|
||||
|
||||
/* Array textures need an additional component for the array index, except
|
||||
* for cubemap array images that behave like a 2D array of interleaved
|
||||
|
@ -2927,3 +2926,29 @@ glsl_type::cl_size() const
|
|||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
int
|
||||
glsl_get_sampler_dim_coordinate_components(enum glsl_sampler_dim dim)
|
||||
{
|
||||
switch (dim) {
|
||||
case GLSL_SAMPLER_DIM_1D:
|
||||
case GLSL_SAMPLER_DIM_BUF:
|
||||
return 1;
|
||||
case GLSL_SAMPLER_DIM_2D:
|
||||
case GLSL_SAMPLER_DIM_RECT:
|
||||
case GLSL_SAMPLER_DIM_MS:
|
||||
case GLSL_SAMPLER_DIM_EXTERNAL:
|
||||
case GLSL_SAMPLER_DIM_SUBPASS:
|
||||
case GLSL_SAMPLER_DIM_SUBPASS_MS:
|
||||
return 2;
|
||||
case GLSL_SAMPLER_DIM_3D:
|
||||
case GLSL_SAMPLER_DIM_CUBE:
|
||||
return 3;
|
||||
default:
|
||||
unreachable("Unknown sampler dim");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "shader_enums.h"
|
||||
#include "c11/threads.h"
|
||||
#include "util/blob.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "util/macros.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -59,10 +60,6 @@ void encode_type_to_blob(struct blob *blob, const struct glsl_type *type);
|
|||
|
||||
const struct glsl_type *decode_type_from_blob(struct blob_reader *blob);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
typedef void (*glsl_type_size_align_func)(const struct glsl_type *type,
|
||||
unsigned *size, unsigned *align);
|
||||
|
||||
|
@ -230,6 +227,9 @@ enum glsl_sampler_dim {
|
|||
GLSL_SAMPLER_DIM_SUBPASS_MS, /* for multisampled vulkan input attachments */
|
||||
};
|
||||
|
||||
int
|
||||
glsl_get_sampler_dim_coordinate_components(enum glsl_sampler_dim dim);
|
||||
|
||||
enum glsl_matrix_layout {
|
||||
/**
|
||||
* The layout of the matrix is inherited from the object containing the
|
||||
|
@ -259,6 +259,8 @@ enum {
|
|||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
|
||||
#include "GL/gl.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "main/menums.h" /* for gl_texture_index, C++'s enum rules are broken */
|
||||
|
@ -393,6 +395,11 @@ public:
|
|||
*/
|
||||
const glsl_type *get_bare_type() const;
|
||||
|
||||
/**
|
||||
* Gets the float16 version of this type.
|
||||
*/
|
||||
const glsl_type *get_float16_type() const;
|
||||
|
||||
/**
|
||||
* Get the instance of a built-in scalar, vector, or matrix type
|
||||
*/
|
||||
|
@ -754,6 +761,22 @@ public:
|
|||
return base_type == GLSL_TYPE_FLOAT;
|
||||
}
|
||||
|
||||
/**
|
||||
* Query whether or not a type is a half-float or float type
|
||||
*/
|
||||
bool is_float_16_32() const
|
||||
{
|
||||
return base_type == GLSL_TYPE_FLOAT16 || is_float();
|
||||
}
|
||||
|
||||
/**
|
||||
* Query whether or not a type is a half-float, float or double
|
||||
*/
|
||||
bool is_float_16_32_64() const
|
||||
{
|
||||
return base_type == GLSL_TYPE_FLOAT16 || is_float() || is_double();
|
||||
}
|
||||
|
||||
/**
|
||||
* Query whether or not a type is a double type
|
||||
*/
|
||||
|
@ -1297,7 +1320,7 @@ struct glsl_struct_field {
|
|||
/**
|
||||
* Layout format, applicable to image variables only.
|
||||
*/
|
||||
unsigned image_format:16;
|
||||
enum pipe_format image_format;
|
||||
|
||||
/**
|
||||
* Any of the xfb_* qualifiers trigger the shader to be in transform
|
||||
|
@ -1314,7 +1337,8 @@ struct glsl_struct_field {
|
|||
sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0), \
|
||||
precision(_precision), memory_read_only(0), \
|
||||
memory_write_only(0), memory_coherent(0), memory_volatile(0), \
|
||||
memory_restrict(0), image_format(0), explicit_xfb_buffer(0), \
|
||||
memory_restrict(0), image_format(PIPE_FORMAT_NONE), \
|
||||
explicit_xfb_buffer(0), \
|
||||
implicit_sized_array(0)
|
||||
|
||||
glsl_struct_field(const struct glsl_type *_type,
|
||||
|
|
|
@ -165,6 +165,7 @@ gl_varying_slot_name(gl_varying_slot slot)
|
|||
ENUM(VARYING_SLOT_BOUNDING_BOX0),
|
||||
ENUM(VARYING_SLOT_BOUNDING_BOX1),
|
||||
ENUM(VARYING_SLOT_VIEW_INDEX),
|
||||
ENUM(VARYING_SLOT_VIEWPORT_MASK),
|
||||
ENUM(VARYING_SLOT_VAR0),
|
||||
ENUM(VARYING_SLOT_VAR1),
|
||||
ENUM(VARYING_SLOT_VAR2),
|
||||
|
|
|
@ -261,6 +261,7 @@ typedef enum
|
|||
VARYING_SLOT_BOUNDING_BOX0, /* Only appears as TCS output. */
|
||||
VARYING_SLOT_BOUNDING_BOX1, /* Only appears as TCS output. */
|
||||
VARYING_SLOT_VIEW_INDEX,
|
||||
VARYING_SLOT_VIEWPORT_MASK, /* Does not appear in FS */
|
||||
VARYING_SLOT_VAR0, /* First generic varying slot */
|
||||
/* the remaining are simply for the benefit of gl_varying_slot_name()
|
||||
* and not to be construed as an upper bound:
|
||||
|
@ -343,6 +344,7 @@ const char *gl_varying_slot_name(gl_varying_slot slot);
|
|||
#define VARYING_BIT_TESS_LEVEL_INNER BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_INNER)
|
||||
#define VARYING_BIT_BOUNDING_BOX0 BITFIELD64_BIT(VARYING_SLOT_BOUNDING_BOX0)
|
||||
#define VARYING_BIT_BOUNDING_BOX1 BITFIELD64_BIT(VARYING_SLOT_BOUNDING_BOX1)
|
||||
#define VARYING_BIT_VIEWPORT_MASK BITFIELD64_BIT(VARYING_SLOT_VIEWPORT_MASK)
|
||||
#define VARYING_BIT_VAR(V) BITFIELD64_BIT(VARYING_SLOT_VAR0 + (V))
|
||||
/*@}*/
|
||||
|
||||
|
|
|
@ -134,12 +134,24 @@ typedef struct shader_info {
|
|||
/* Which patch outputs are read */
|
||||
uint32_t patch_outputs_read;
|
||||
|
||||
/* Which inputs are read indirectly (subset of inputs_read) */
|
||||
uint64_t inputs_read_indirectly;
|
||||
/* Which outputs are read or written indirectly */
|
||||
uint64_t outputs_accessed_indirectly;
|
||||
/* Which patch inputs are read indirectly (subset of patch_inputs_read) */
|
||||
uint64_t patch_inputs_read_indirectly;
|
||||
/* Which patch outputs are read or written indirectly */
|
||||
uint64_t patch_outputs_accessed_indirectly;
|
||||
|
||||
/** Bitfield of which textures are used */
|
||||
uint32_t textures_used;
|
||||
|
||||
/** Bitfield of which textures are used by texelFetch() */
|
||||
uint32_t textures_used_by_txf;
|
||||
|
||||
/** Bitfield of which images are used */
|
||||
uint32_t images_used;
|
||||
|
||||
/* SPV_KHR_float_controls: execution mode for floating point ops */
|
||||
uint16_t float_controls_execution_mode;
|
||||
|
||||
|
@ -176,6 +188,12 @@ typedef struct shader_info {
|
|||
/* Whether flrp has been lowered. */
|
||||
bool flrp_lowered:1;
|
||||
|
||||
/* Whether the shader writes memory, including transform feedback. */
|
||||
bool writes_memory:1;
|
||||
|
||||
/* Whether gl_Layer is viewport-relative */
|
||||
bool layer_viewport_relative:1;
|
||||
|
||||
union {
|
||||
struct {
|
||||
/* Which inputs are doubles */
|
||||
|
@ -217,6 +235,7 @@ typedef struct shader_info {
|
|||
|
||||
struct {
|
||||
bool uses_discard:1;
|
||||
bool uses_demote:1;
|
||||
|
||||
/**
|
||||
* True if this fragment shader requires helper invocations. This
|
||||
|
@ -282,6 +301,7 @@ typedef struct shader_info {
|
|||
|
||||
struct {
|
||||
uint16_t local_size[3];
|
||||
uint16_t max_variable_local_size;
|
||||
|
||||
bool local_size_variable:1;
|
||||
uint8_t user_data_components_amd:3;
|
||||
|
@ -317,6 +337,16 @@ typedef struct shader_info {
|
|||
/** Is the vertex order counterclockwise? */
|
||||
bool ccw:1;
|
||||
bool point_mode:1;
|
||||
|
||||
/* Bit mask of TCS per-vertex inputs (VS outputs) that are used
|
||||
* with a vertex index that is NOT the invocation id
|
||||
*/
|
||||
uint64_t tcs_cross_invocation_inputs_read;
|
||||
|
||||
/* Bit mask of TCS per-vertex outputs that are used
|
||||
* with a vertex index that is NOT the invocation id
|
||||
*/
|
||||
uint64_t tcs_cross_invocation_outputs_read;
|
||||
} tess;
|
||||
};
|
||||
} shader_info;
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
|
||||
#include "pipe/p_compiler.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/half_float.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
@ -45,6 +46,12 @@ extern "C" {
|
|||
|
||||
static inline uint16_t
|
||||
util_float_to_half(float f)
|
||||
{
|
||||
return _mesa_float_to_half(f);
|
||||
}
|
||||
|
||||
static inline uint16_t
|
||||
util_float_to_half_rtz(float f)
|
||||
{
|
||||
uint32_t sign_mask = 0x80000000;
|
||||
uint32_t round_mask = ~0xfff;
|
||||
|
|
|
@ -128,7 +128,7 @@ typedef unsigned char boolean;
|
|||
/* See http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Variable-Attributes.html */
|
||||
#define PIPE_ALIGN_VAR(_alignment) __attribute__((aligned(_alignment)))
|
||||
|
||||
#if defined(__GNUC__) && !defined(PIPE_ARCH_X86_64)
|
||||
#if defined(__GNUC__) && defined(PIPE_ARCH_X86)
|
||||
#define PIPE_ALIGN_STACK __attribute__((force_align_arg_pointer))
|
||||
#else
|
||||
#define PIPE_ALIGN_STACK
|
||||
|
|
1305
third_party/rust/glslopt/glsl-optimizer/src/gallium/include/pipe/p_defines.h
поставляемый
Normal file
1305
third_party/rust/glslopt/glsl-optimizer/src/gallium/include/pipe/p_defines.h
поставляемый
Normal file
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -560,6 +560,26 @@ enum pipe_video_chroma_format
|
|||
PIPE_VIDEO_CHROMA_FORMAT_NONE
|
||||
};
|
||||
|
||||
static inline enum pipe_video_chroma_format
|
||||
pipe_format_to_chroma_format(enum pipe_format format)
|
||||
{
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_NV12:
|
||||
case PIPE_FORMAT_NV21:
|
||||
case PIPE_FORMAT_YV12:
|
||||
case PIPE_FORMAT_IYUV:
|
||||
case PIPE_FORMAT_P010:
|
||||
case PIPE_FORMAT_P016:
|
||||
return PIPE_VIDEO_CHROMA_FORMAT_420;
|
||||
case PIPE_FORMAT_UYVY:
|
||||
case PIPE_FORMAT_YUYV:
|
||||
case PIPE_FORMAT_YV16:
|
||||
return PIPE_VIDEO_CHROMA_FORMAT_422;
|
||||
default:
|
||||
return PIPE_VIDEO_CHROMA_FORMAT_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
980
third_party/rust/glslopt/glsl-optimizer/src/gallium/include/pipe/p_state.h
поставляемый
Normal file
980
third_party/rust/glslopt/glsl-optimizer/src/gallium/include/pipe/p_state.h
поставляемый
Normal file
|
@ -0,0 +1,980 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2007 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
/**
|
||||
* @file
|
||||
*
|
||||
* Abstract graphics pipe state objects.
|
||||
*
|
||||
* Basic notes:
|
||||
* 1. Want compact representations, so we use bitfields.
|
||||
* 2. Put bitfields before other (GLfloat) fields.
|
||||
* 3. enum bitfields need to be at least one bit extra in size so the most
|
||||
* significant bit is zero. MSVC treats enums as signed so if the high
|
||||
* bit is set, the value will be interpreted as a negative number.
|
||||
* That causes trouble in various places.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef PIPE_STATE_H
|
||||
#define PIPE_STATE_H
|
||||
|
||||
#include "p_compiler.h"
|
||||
#include "p_defines.h"
|
||||
#include "p_format.h"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Implementation limits
|
||||
*/
|
||||
#define PIPE_MAX_ATTRIBS 32
|
||||
#define PIPE_MAX_CLIP_PLANES 8
|
||||
#define PIPE_MAX_COLOR_BUFS 8
|
||||
#define PIPE_MAX_CONSTANT_BUFFERS 32
|
||||
#define PIPE_MAX_SAMPLERS 32
|
||||
#define PIPE_MAX_SHADER_INPUTS 80 /* 32 GENERIC + 32 PATCH + 16 others */
|
||||
#define PIPE_MAX_SHADER_OUTPUTS 80 /* 32 GENERIC + 32 PATCH + 16 others */
|
||||
#define PIPE_MAX_SHADER_SAMPLER_VIEWS 128
|
||||
#define PIPE_MAX_SHADER_BUFFERS 32
|
||||
#define PIPE_MAX_SHADER_IMAGES 32
|
||||
#define PIPE_MAX_TEXTURE_LEVELS 16
|
||||
#define PIPE_MAX_SO_BUFFERS 4
|
||||
#define PIPE_MAX_SO_OUTPUTS 64
|
||||
#define PIPE_MAX_VIEWPORTS 16
|
||||
#define PIPE_MAX_CLIP_OR_CULL_DISTANCE_COUNT 8
|
||||
#define PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT 2
|
||||
#define PIPE_MAX_WINDOW_RECTANGLES 8
|
||||
#define PIPE_MAX_SAMPLE_LOCATION_GRID_SIZE 4
|
||||
|
||||
#define PIPE_MAX_HW_ATOMIC_BUFFERS 32
|
||||
#define PIPE_MAX_VERTEX_STREAMS 4
|
||||
|
||||
struct pipe_reference
|
||||
{
|
||||
int32_t count; /* atomic */
|
||||
};
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Primitive (point/line/tri) rasterization info
|
||||
*/
|
||||
struct pipe_rasterizer_state
|
||||
{
|
||||
unsigned flatshade:1;
|
||||
unsigned light_twoside:1;
|
||||
unsigned clamp_vertex_color:1;
|
||||
unsigned clamp_fragment_color:1;
|
||||
unsigned front_ccw:1;
|
||||
unsigned cull_face:2; /**< PIPE_FACE_x */
|
||||
unsigned fill_front:2; /**< PIPE_POLYGON_MODE_x */
|
||||
unsigned fill_back:2; /**< PIPE_POLYGON_MODE_x */
|
||||
unsigned offset_point:1;
|
||||
unsigned offset_line:1;
|
||||
unsigned offset_tri:1;
|
||||
unsigned scissor:1;
|
||||
unsigned poly_smooth:1;
|
||||
unsigned poly_stipple_enable:1;
|
||||
unsigned point_smooth:1;
|
||||
unsigned sprite_coord_mode:1; /**< PIPE_SPRITE_COORD_ */
|
||||
unsigned point_quad_rasterization:1; /** points rasterized as quads or points */
|
||||
unsigned point_tri_clip:1; /** large points clipped as tris or points */
|
||||
unsigned point_size_per_vertex:1; /**< size computed in vertex shader */
|
||||
unsigned multisample:1; /* XXX maybe more ms state in future */
|
||||
unsigned force_persample_interp:1;
|
||||
unsigned line_smooth:1;
|
||||
unsigned line_stipple_enable:1;
|
||||
unsigned line_last_pixel:1;
|
||||
unsigned conservative_raster_mode:2; /**< PIPE_CONSERVATIVE_RASTER_x */
|
||||
|
||||
/**
|
||||
* Use the first vertex of a primitive as the provoking vertex for
|
||||
* flat shading.
|
||||
*/
|
||||
unsigned flatshade_first:1;
|
||||
|
||||
unsigned half_pixel_center:1;
|
||||
unsigned bottom_edge_rule:1;
|
||||
|
||||
/*
|
||||
* Conservative rasterization subpixel precision bias in bits
|
||||
*/
|
||||
unsigned subpixel_precision_x:4;
|
||||
unsigned subpixel_precision_y:4;
|
||||
|
||||
/**
|
||||
* When true, rasterization is disabled and no pixels are written.
|
||||
* This only makes sense with the Stream Out functionality.
|
||||
*/
|
||||
unsigned rasterizer_discard:1;
|
||||
|
||||
/**
|
||||
* Exposed by PIPE_CAP_TILE_RASTER_ORDER. When true,
|
||||
* tile_raster_order_increasing_* indicate the order that the rasterizer
|
||||
* should render tiles, to meet the requirements of
|
||||
* GL_MESA_tile_raster_order.
|
||||
*/
|
||||
unsigned tile_raster_order_fixed:1;
|
||||
unsigned tile_raster_order_increasing_x:1;
|
||||
unsigned tile_raster_order_increasing_y:1;
|
||||
|
||||
/**
|
||||
* When false, depth clipping is disabled and the depth value will be
|
||||
* clamped later at the per-pixel level before depth testing.
|
||||
* This depends on PIPE_CAP_DEPTH_CLIP_DISABLE.
|
||||
*
|
||||
* If PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE is unsupported, depth_clip_near
|
||||
* is equal to depth_clip_far.
|
||||
*/
|
||||
unsigned depth_clip_near:1;
|
||||
unsigned depth_clip_far:1;
|
||||
|
||||
/**
|
||||
* When true clip space in the z axis goes from [0..1] (D3D). When false
|
||||
* [-1, 1] (GL).
|
||||
*
|
||||
* NOTE: D3D will always use depth clamping.
|
||||
*/
|
||||
unsigned clip_halfz:1;
|
||||
|
||||
/**
|
||||
* When true do not scale offset_units and use same rules for unorm and
|
||||
* float depth buffers (D3D9). When false use GL/D3D1X behaviour.
|
||||
* This depends on PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED.
|
||||
*/
|
||||
unsigned offset_units_unscaled:1;
|
||||
|
||||
/**
|
||||
* Enable bits for clipping half-spaces.
|
||||
* This applies to both user clip planes and shader clip distances.
|
||||
* Note that if the bound shader exports any clip distances, these
|
||||
* replace all user clip planes, and clip half-spaces enabled here
|
||||
* but not written by the shader count as disabled.
|
||||
*/
|
||||
unsigned clip_plane_enable:PIPE_MAX_CLIP_PLANES;
|
||||
|
||||
unsigned line_stipple_factor:8; /**< [1..256] actually */
|
||||
unsigned line_stipple_pattern:16;
|
||||
|
||||
/**
|
||||
* Replace the given TEXCOORD inputs with point coordinates, max. 8 inputs.
|
||||
* If TEXCOORD (including PCOORD) are unsupported, replace GENERIC inputs
|
||||
* instead. Max. 9 inputs: 8x GENERIC to emulate TEXCOORD, and 1x GENERIC
|
||||
* to emulate PCOORD.
|
||||
*/
|
||||
uint16_t sprite_coord_enable; /* 0-7: TEXCOORD/GENERIC, 8: PCOORD */
|
||||
|
||||
float line_width;
|
||||
float point_size; /**< used when no per-vertex size */
|
||||
float offset_units;
|
||||
float offset_scale;
|
||||
float offset_clamp;
|
||||
float conservative_raster_dilate;
|
||||
};
|
||||
|
||||
|
||||
struct pipe_poly_stipple
|
||||
{
|
||||
unsigned stipple[32];
|
||||
};
|
||||
|
||||
|
||||
struct pipe_viewport_state
|
||||
{
|
||||
float scale[3];
|
||||
float translate[3];
|
||||
enum pipe_viewport_swizzle swizzle_x:3;
|
||||
enum pipe_viewport_swizzle swizzle_y:3;
|
||||
enum pipe_viewport_swizzle swizzle_z:3;
|
||||
enum pipe_viewport_swizzle swizzle_w:3;
|
||||
};
|
||||
|
||||
|
||||
struct pipe_scissor_state
|
||||
{
|
||||
unsigned minx:16;
|
||||
unsigned miny:16;
|
||||
unsigned maxx:16;
|
||||
unsigned maxy:16;
|
||||
};
|
||||
|
||||
|
||||
struct pipe_clip_state
|
||||
{
|
||||
float ucp[PIPE_MAX_CLIP_PLANES][4];
|
||||
};
|
||||
|
||||
/**
|
||||
* A single output for vertex transform feedback.
|
||||
*/
|
||||
struct pipe_stream_output
|
||||
{
|
||||
unsigned register_index:6; /**< 0 to 63 (OUT index) */
|
||||
unsigned start_component:2; /** 0 to 3 */
|
||||
unsigned num_components:3; /** 1 to 4 */
|
||||
unsigned output_buffer:3; /**< 0 to PIPE_MAX_SO_BUFFERS */
|
||||
unsigned dst_offset:16; /**< offset into the buffer in dwords */
|
||||
unsigned stream:2; /**< 0 to 3 */
|
||||
};
|
||||
|
||||
/**
|
||||
* Stream output for vertex transform feedback.
|
||||
*/
|
||||
struct pipe_stream_output_info
|
||||
{
|
||||
unsigned num_outputs;
|
||||
/** stride for an entire vertex for each buffer in dwords */
|
||||
uint16_t stride[PIPE_MAX_SO_BUFFERS];
|
||||
|
||||
/**
|
||||
* Array of stream outputs, in the order they are to be written in.
|
||||
* Selected components are tightly packed into the output buffer.
|
||||
*/
|
||||
struct pipe_stream_output output[PIPE_MAX_SO_OUTPUTS];
|
||||
};
|
||||
|
||||
/**
|
||||
* The 'type' parameter identifies whether the shader state contains TGSI
|
||||
* tokens, etc. If the driver returns 'PIPE_SHADER_IR_TGSI' for the
|
||||
* 'PIPE_SHADER_CAP_PREFERRED_IR' shader param, the ir will *always* be
|
||||
* 'PIPE_SHADER_IR_TGSI' and the tokens ptr will be valid. If the driver
|
||||
* requests a different 'pipe_shader_ir' type, then it must check the 'type'
|
||||
* enum to see if it is getting TGSI tokens or its preferred IR.
|
||||
*
|
||||
* TODO pipe_compute_state should probably get similar treatment to handle
|
||||
* multiple IR's in a cleaner way..
|
||||
*
|
||||
* NOTE: since it is expected that the consumer will want to perform
|
||||
* additional passes on the nir_shader, the driver takes ownership of
|
||||
* the nir_shader. If state trackers need to hang on to the IR (for
|
||||
* example, variant management), it should use nir_shader_clone().
|
||||
*/
|
||||
struct pipe_shader_state
|
||||
{
|
||||
enum pipe_shader_ir type;
|
||||
/* TODO move tokens into union. */
|
||||
const struct tgsi_token *tokens;
|
||||
union {
|
||||
void *native;
|
||||
void *nir;
|
||||
} ir;
|
||||
struct pipe_stream_output_info stream_output;
|
||||
};
|
||||
|
||||
static inline void
|
||||
pipe_shader_state_from_tgsi(struct pipe_shader_state *state,
|
||||
const struct tgsi_token *tokens)
|
||||
{
|
||||
state->type = PIPE_SHADER_IR_TGSI;
|
||||
state->tokens = tokens;
|
||||
memset(&state->stream_output, 0, sizeof(state->stream_output));
|
||||
}
|
||||
|
||||
struct pipe_depth_state
|
||||
{
|
||||
unsigned enabled:1; /**< depth test enabled? */
|
||||
unsigned writemask:1; /**< allow depth buffer writes? */
|
||||
unsigned func:3; /**< depth test func (PIPE_FUNC_x) */
|
||||
unsigned bounds_test:1; /**< depth bounds test enabled? */
|
||||
float bounds_min; /**< minimum depth bound */
|
||||
float bounds_max; /**< maximum depth bound */
|
||||
};
|
||||
|
||||
|
||||
struct pipe_stencil_state
|
||||
{
|
||||
unsigned enabled:1; /**< stencil[0]: stencil enabled, stencil[1]: two-side enabled */
|
||||
unsigned func:3; /**< PIPE_FUNC_x */
|
||||
unsigned fail_op:3; /**< PIPE_STENCIL_OP_x */
|
||||
unsigned zpass_op:3; /**< PIPE_STENCIL_OP_x */
|
||||
unsigned zfail_op:3; /**< PIPE_STENCIL_OP_x */
|
||||
unsigned valuemask:8;
|
||||
unsigned writemask:8;
|
||||
};
|
||||
|
||||
|
||||
struct pipe_alpha_state
|
||||
{
|
||||
unsigned enabled:1;
|
||||
unsigned func:3; /**< PIPE_FUNC_x */
|
||||
float ref_value; /**< reference value */
|
||||
};
|
||||
|
||||
|
||||
struct pipe_depth_stencil_alpha_state
|
||||
{
|
||||
struct pipe_depth_state depth;
|
||||
struct pipe_stencil_state stencil[2]; /**< [0] = front, [1] = back */
|
||||
struct pipe_alpha_state alpha;
|
||||
};
|
||||
|
||||
|
||||
struct pipe_rt_blend_state
|
||||
{
|
||||
unsigned blend_enable:1;
|
||||
|
||||
unsigned rgb_func:3; /**< PIPE_BLEND_x */
|
||||
unsigned rgb_src_factor:5; /**< PIPE_BLENDFACTOR_x */
|
||||
unsigned rgb_dst_factor:5; /**< PIPE_BLENDFACTOR_x */
|
||||
|
||||
unsigned alpha_func:3; /**< PIPE_BLEND_x */
|
||||
unsigned alpha_src_factor:5; /**< PIPE_BLENDFACTOR_x */
|
||||
unsigned alpha_dst_factor:5; /**< PIPE_BLENDFACTOR_x */
|
||||
|
||||
unsigned colormask:4; /**< bitmask of PIPE_MASK_R/G/B/A */
|
||||
};
|
||||
|
||||
|
||||
struct pipe_blend_state
|
||||
{
|
||||
unsigned independent_blend_enable:1;
|
||||
unsigned logicop_enable:1;
|
||||
unsigned logicop_func:4; /**< PIPE_LOGICOP_x */
|
||||
unsigned dither:1;
|
||||
unsigned alpha_to_coverage:1;
|
||||
unsigned alpha_to_coverage_dither:1;
|
||||
unsigned alpha_to_one:1;
|
||||
unsigned max_rt:3; /* index of max rt, Ie. # of cbufs minus 1 */
|
||||
struct pipe_rt_blend_state rt[PIPE_MAX_COLOR_BUFS];
|
||||
};
|
||||
|
||||
|
||||
struct pipe_blend_color
|
||||
{
|
||||
float color[4];
|
||||
};
|
||||
|
||||
|
||||
struct pipe_stencil_ref
|
||||
{
|
||||
ubyte ref_value[2];
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Note that pipe_surfaces are "texture views for rendering"
|
||||
* and so in the case of ARB_framebuffer_no_attachment there
|
||||
* is no pipe_surface state available such that we may
|
||||
* extract the number of samples and layers.
|
||||
*/
|
||||
struct pipe_framebuffer_state
|
||||
{
|
||||
uint16_t width, height;
|
||||
uint16_t layers; /**< Number of layers in a no-attachment framebuffer */
|
||||
ubyte samples; /**< Number of samples in a no-attachment framebuffer */
|
||||
|
||||
/** multiple color buffers for multiple render targets */
|
||||
ubyte nr_cbufs;
|
||||
struct pipe_surface *cbufs[PIPE_MAX_COLOR_BUFS];
|
||||
|
||||
struct pipe_surface *zsbuf; /**< Z/stencil buffer */
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Texture sampler state.
|
||||
*/
|
||||
struct pipe_sampler_state
|
||||
{
|
||||
unsigned wrap_s:3; /**< PIPE_TEX_WRAP_x */
|
||||
unsigned wrap_t:3; /**< PIPE_TEX_WRAP_x */
|
||||
unsigned wrap_r:3; /**< PIPE_TEX_WRAP_x */
|
||||
unsigned min_img_filter:1; /**< PIPE_TEX_FILTER_x */
|
||||
unsigned min_mip_filter:2; /**< PIPE_TEX_MIPFILTER_x */
|
||||
unsigned mag_img_filter:1; /**< PIPE_TEX_FILTER_x */
|
||||
unsigned compare_mode:1; /**< PIPE_TEX_COMPARE_x */
|
||||
unsigned compare_func:3; /**< PIPE_FUNC_x */
|
||||
unsigned normalized_coords:1; /**< Are coords normalized to [0,1]? */
|
||||
unsigned max_anisotropy:5;
|
||||
unsigned seamless_cube_map:1;
|
||||
float lod_bias; /**< LOD/lambda bias */
|
||||
float min_lod, max_lod; /**< LOD clamp range, after bias */
|
||||
union pipe_color_union border_color;
|
||||
};
|
||||
|
||||
union pipe_surface_desc {
|
||||
struct {
|
||||
unsigned level;
|
||||
unsigned first_layer:16;
|
||||
unsigned last_layer:16;
|
||||
} tex;
|
||||
struct {
|
||||
unsigned first_element;
|
||||
unsigned last_element;
|
||||
} buf;
|
||||
};
|
||||
|
||||
/**
|
||||
* A view into a texture that can be bound to a color render target /
|
||||
* depth stencil attachment point.
|
||||
*/
|
||||
struct pipe_surface
|
||||
{
|
||||
struct pipe_reference reference;
|
||||
enum pipe_format format:16;
|
||||
unsigned writable:1; /**< writable shader resource */
|
||||
struct pipe_resource *texture; /**< resource into which this is a view */
|
||||
struct pipe_context *context; /**< context this surface belongs to */
|
||||
|
||||
/* XXX width/height should be removed */
|
||||
uint16_t width; /**< logical width in pixels */
|
||||
uint16_t height; /**< logical height in pixels */
|
||||
|
||||
/**
|
||||
* Number of samples for the surface. This will be 0 if rendering
|
||||
* should use the resource's nr_samples, or another value if the resource
|
||||
* is bound using FramebufferTexture2DMultisampleEXT.
|
||||
*/
|
||||
unsigned nr_samples:8;
|
||||
|
||||
union pipe_surface_desc u;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* A view into a texture that can be bound to a shader stage.
|
||||
*/
|
||||
struct pipe_sampler_view
|
||||
{
|
||||
struct pipe_reference reference;
|
||||
enum pipe_format format:15; /**< typed PIPE_FORMAT_x */
|
||||
enum pipe_texture_target target:5; /**< PIPE_TEXTURE_x */
|
||||
unsigned swizzle_r:3; /**< PIPE_SWIZZLE_x for red component */
|
||||
unsigned swizzle_g:3; /**< PIPE_SWIZZLE_x for green component */
|
||||
unsigned swizzle_b:3; /**< PIPE_SWIZZLE_x for blue component */
|
||||
unsigned swizzle_a:3; /**< PIPE_SWIZZLE_x for alpha component */
|
||||
struct pipe_resource *texture; /**< texture into which this is a view */
|
||||
struct pipe_context *context; /**< context this view belongs to */
|
||||
union {
|
||||
struct {
|
||||
unsigned first_layer:16; /**< first layer to use for array textures */
|
||||
unsigned last_layer:16; /**< last layer to use for array textures */
|
||||
unsigned first_level:8; /**< first mipmap level to use */
|
||||
unsigned last_level:8; /**< last mipmap level to use */
|
||||
} tex;
|
||||
struct {
|
||||
unsigned offset; /**< offset in bytes */
|
||||
unsigned size; /**< size of the readable sub-range in bytes */
|
||||
} buf;
|
||||
} u;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* A description of a buffer or texture image that can be bound to a shader
|
||||
* stage.
|
||||
*/
|
||||
struct pipe_image_view
|
||||
{
|
||||
struct pipe_resource *resource; /**< resource into which this is a view */
|
||||
enum pipe_format format; /**< typed PIPE_FORMAT_x */
|
||||
uint16_t access; /**< PIPE_IMAGE_ACCESS_x */
|
||||
uint16_t shader_access; /**< PIPE_IMAGE_ACCESS_x */
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned first_layer:16; /**< first layer to use for array textures */
|
||||
unsigned last_layer:16; /**< last layer to use for array textures */
|
||||
unsigned level:8; /**< mipmap level to use */
|
||||
} tex;
|
||||
struct {
|
||||
unsigned offset; /**< offset in bytes */
|
||||
unsigned size; /**< size of the accessible sub-range in bytes */
|
||||
} buf;
|
||||
} u;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Subregion of 1D/2D/3D image resource.
|
||||
*/
|
||||
struct pipe_box
|
||||
{
|
||||
/* Fields only used by textures use int16_t instead of int.
|
||||
* x and width are used by buffers, so they need the full 32-bit range.
|
||||
*/
|
||||
int x;
|
||||
int16_t y;
|
||||
int16_t z;
|
||||
int width;
|
||||
int16_t height;
|
||||
int16_t depth;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* A memory object/resource such as a vertex buffer or texture.
|
||||
*/
|
||||
struct pipe_resource
|
||||
{
|
||||
struct pipe_reference reference;
|
||||
|
||||
unsigned width0; /**< Used by both buffers and textures. */
|
||||
uint16_t height0; /* Textures: The maximum height/depth/array_size is 16k. */
|
||||
uint16_t depth0;
|
||||
uint16_t array_size;
|
||||
|
||||
enum pipe_format format:16; /**< PIPE_FORMAT_x */
|
||||
enum pipe_texture_target target:8; /**< PIPE_TEXTURE_x */
|
||||
unsigned last_level:8; /**< Index of last mipmap level present/defined */
|
||||
|
||||
/** Number of samples determining quality, driving rasterizer, shading,
|
||||
* and framebuffer.
|
||||
*/
|
||||
unsigned nr_samples:8;
|
||||
|
||||
/** Multiple samples within a pixel can have the same value.
|
||||
* nr_storage_samples determines how many slots for different values
|
||||
* there are per pixel. Only color buffers can set this lower than
|
||||
* nr_samples.
|
||||
*/
|
||||
unsigned nr_storage_samples:8;
|
||||
|
||||
unsigned usage:8; /**< PIPE_USAGE_x (not a bitmask) */
|
||||
unsigned bind; /**< bitmask of PIPE_BIND_x */
|
||||
unsigned flags; /**< bitmask of PIPE_RESOURCE_FLAG_x */
|
||||
|
||||
/**
|
||||
* For planar images, ie. YUV EGLImage external, etc, pointer to the
|
||||
* next plane.
|
||||
*/
|
||||
struct pipe_resource *next;
|
||||
/* The screen pointer should be last for optimal structure packing. */
|
||||
struct pipe_screen *screen; /**< screen that this texture belongs to */
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Transfer object. For data transfer to/from a resource.
|
||||
*/
|
||||
struct pipe_transfer
|
||||
{
|
||||
struct pipe_resource *resource; /**< resource to transfer to/from */
|
||||
unsigned level; /**< texture mipmap level */
|
||||
enum pipe_transfer_usage usage;
|
||||
struct pipe_box box; /**< region of the resource to access */
|
||||
unsigned stride; /**< row stride in bytes */
|
||||
unsigned layer_stride; /**< image/layer stride in bytes */
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* A vertex buffer. Typically, all the vertex data/attributes for
|
||||
* drawing something will be in one buffer. But it's also possible, for
|
||||
* example, to put colors in one buffer and texcoords in another.
|
||||
*/
|
||||
struct pipe_vertex_buffer
|
||||
{
|
||||
uint16_t stride; /**< stride to same attrib in next vertex, in bytes */
|
||||
bool is_user_buffer;
|
||||
unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */
|
||||
|
||||
union {
|
||||
struct pipe_resource *resource; /**< the actual buffer */
|
||||
const void *user; /**< pointer to a user buffer */
|
||||
} buffer;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* A constant buffer. A subrange of an existing buffer can be set
|
||||
* as a constant buffer.
|
||||
*/
|
||||
struct pipe_constant_buffer
|
||||
{
|
||||
struct pipe_resource *buffer; /**< the actual buffer */
|
||||
unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */
|
||||
unsigned buffer_size; /**< how much data can be read in shader */
|
||||
const void *user_buffer; /**< pointer to a user buffer if buffer == NULL */
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* An untyped shader buffer supporting loads, stores, and atomics.
|
||||
*/
|
||||
struct pipe_shader_buffer {
|
||||
struct pipe_resource *buffer; /**< the actual buffer */
|
||||
unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */
|
||||
unsigned buffer_size; /**< how much data can be read in shader */
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* A stream output target. The structure specifies the range vertices can
|
||||
* be written to.
|
||||
*
|
||||
* In addition to that, the structure should internally maintain the offset
|
||||
* into the buffer, which should be incremented everytime something is written
|
||||
* (appended) to it. The internal offset is buffer_offset + how many bytes
|
||||
* have been written. The internal offset can be stored on the device
|
||||
* and the CPU actually doesn't have to query it.
|
||||
*
|
||||
* Note that the buffer_size variable is actually specifying the available
|
||||
* space in the buffer, not the size of the attached buffer.
|
||||
* In other words in majority of cases buffer_size would simply be
|
||||
* 'buffer->width0 - buffer_offset', so buffer_size refers to the size
|
||||
* of the buffer left, after accounting for buffer offset, for stream output
|
||||
* to write to.
|
||||
*
|
||||
* Use PIPE_QUERY_SO_STATISTICS to know how many primitives have
|
||||
* actually been written.
|
||||
*/
|
||||
struct pipe_stream_output_target
|
||||
{
|
||||
struct pipe_reference reference;
|
||||
struct pipe_resource *buffer; /**< the output buffer */
|
||||
struct pipe_context *context; /**< context this SO target belongs to */
|
||||
|
||||
unsigned buffer_offset; /**< offset where data should be written, in bytes */
|
||||
unsigned buffer_size; /**< how much data is allowed to be written */
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Information to describe a vertex attribute (position, color, etc)
|
||||
*/
|
||||
struct pipe_vertex_element
|
||||
{
|
||||
/** Offset of this attribute, in bytes, from the start of the vertex */
|
||||
unsigned src_offset:16;
|
||||
|
||||
/** Which vertex_buffer (as given to pipe->set_vertex_buffer()) does
|
||||
* this attribute live in?
|
||||
*/
|
||||
unsigned vertex_buffer_index:5;
|
||||
|
||||
enum pipe_format src_format:11;
|
||||
|
||||
/** Instance data rate divisor. 0 means this is per-vertex data,
|
||||
* n means per-instance data used for n consecutive instances (n > 0).
|
||||
*/
|
||||
unsigned instance_divisor;
|
||||
};
|
||||
|
||||
|
||||
struct pipe_draw_indirect_info
|
||||
{
|
||||
unsigned offset; /**< must be 4 byte aligned */
|
||||
unsigned stride; /**< must be 4 byte aligned */
|
||||
unsigned draw_count; /**< number of indirect draws */
|
||||
unsigned indirect_draw_count_offset; /**< must be 4 byte aligned */
|
||||
|
||||
/* Indirect draw parameters resource is laid out as follows:
|
||||
*
|
||||
* if using indexed drawing:
|
||||
* struct {
|
||||
* uint32_t count;
|
||||
* uint32_t instance_count;
|
||||
* uint32_t start;
|
||||
* int32_t index_bias;
|
||||
* uint32_t start_instance;
|
||||
* };
|
||||
* otherwise:
|
||||
* struct {
|
||||
* uint32_t count;
|
||||
* uint32_t instance_count;
|
||||
* uint32_t start;
|
||||
* uint32_t start_instance;
|
||||
* };
|
||||
*/
|
||||
struct pipe_resource *buffer;
|
||||
|
||||
/* Indirect draw count resource: If not NULL, contains a 32-bit value which
|
||||
* is to be used as the real draw_count.
|
||||
*/
|
||||
struct pipe_resource *indirect_draw_count;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Information to describe a draw_vbo call.
|
||||
*/
|
||||
struct pipe_draw_info
|
||||
{
|
||||
ubyte index_size; /**< if 0, the draw is not indexed. */
|
||||
enum pipe_prim_type mode:8; /**< the mode of the primitive */
|
||||
unsigned primitive_restart:1;
|
||||
unsigned has_user_indices:1; /**< if true, use index.user_buffer */
|
||||
ubyte vertices_per_patch; /**< the number of vertices per patch */
|
||||
|
||||
/**
|
||||
* Direct draws: start is the index of the first vertex
|
||||
* Non-indexed indirect draws: not used
|
||||
* Indexed indirect draws: start is added to the indirect start.
|
||||
*/
|
||||
unsigned start;
|
||||
unsigned count; /**< number of vertices */
|
||||
|
||||
unsigned start_instance; /**< first instance id */
|
||||
unsigned instance_count; /**< number of instances */
|
||||
|
||||
unsigned drawid; /**< id of this draw in a multidraw */
|
||||
|
||||
/**
|
||||
* For indexed drawing, these fields apply after index lookup.
|
||||
*/
|
||||
int index_bias; /**< a bias to be added to each index */
|
||||
unsigned min_index; /**< the min index */
|
||||
unsigned max_index; /**< the max index */
|
||||
|
||||
/**
|
||||
* Primitive restart enable/index (only applies to indexed drawing)
|
||||
*/
|
||||
unsigned restart_index;
|
||||
|
||||
/* Pointers must be at the end for an optimal structure layout on 64-bit. */
|
||||
|
||||
/**
|
||||
* An index buffer. When an index buffer is bound, all indices to vertices
|
||||
* will be looked up from the buffer.
|
||||
*
|
||||
* If has_user_indices, use index.user, else use index.resource.
|
||||
*/
|
||||
union {
|
||||
struct pipe_resource *resource; /**< real buffer */
|
||||
const void *user; /**< pointer to a user buffer */
|
||||
} index;
|
||||
|
||||
struct pipe_draw_indirect_info *indirect; /**< Indirect draw. */
|
||||
|
||||
/**
|
||||
* Stream output target. If not NULL, it's used to provide the 'count'
|
||||
* parameter based on the number vertices captured by the stream output
|
||||
* stage. (or generally, based on the number of bytes captured)
|
||||
*
|
||||
* Only 'mode', 'start_instance', and 'instance_count' are taken into
|
||||
* account, all the other variables from pipe_draw_info are ignored.
|
||||
*
|
||||
* 'start' is implicitly 0 and 'count' is set as discussed above.
|
||||
* The draw command is non-indexed.
|
||||
*
|
||||
* Note that this only provides the count. The vertex buffers must
|
||||
* be set via set_vertex_buffers manually.
|
||||
*/
|
||||
struct pipe_stream_output_target *count_from_stream_output;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Information to describe a blit call.
|
||||
*/
|
||||
struct pipe_blit_info
|
||||
{
|
||||
struct {
|
||||
struct pipe_resource *resource;
|
||||
unsigned level;
|
||||
struct pipe_box box; /**< negative width, height only legal for src */
|
||||
/* For pipe_surface-like format casting: */
|
||||
enum pipe_format format; /**< must be supported for sampling (src)
|
||||
or rendering (dst), ZS is always supported */
|
||||
} dst, src;
|
||||
|
||||
unsigned mask; /**< bitmask of PIPE_MASK_R/G/B/A/Z/S */
|
||||
unsigned filter; /**< PIPE_TEX_FILTER_* */
|
||||
|
||||
bool scissor_enable;
|
||||
struct pipe_scissor_state scissor;
|
||||
|
||||
/* Window rectangles can either be inclusive or exclusive. */
|
||||
bool window_rectangle_include;
|
||||
unsigned num_window_rectangles;
|
||||
struct pipe_scissor_state window_rectangles[PIPE_MAX_WINDOW_RECTANGLES];
|
||||
|
||||
bool render_condition_enable; /**< whether the blit should honor the
|
||||
current render condition */
|
||||
bool alpha_blend; /* dst.rgb = src.rgb * src.a + dst.rgb * (1 - src.a) */
|
||||
};
|
||||
|
||||
/**
|
||||
* Information to describe a launch_grid call.
|
||||
*/
|
||||
struct pipe_grid_info
|
||||
{
|
||||
/**
|
||||
* For drivers that use PIPE_SHADER_IR_NATIVE as their prefered IR, this
|
||||
* value will be the index of the kernel in the opencl.kernels metadata
|
||||
* list.
|
||||
*/
|
||||
uint32_t pc;
|
||||
|
||||
/**
|
||||
* Will be used to initialize the INPUT resource, and it should point to a
|
||||
* buffer of at least pipe_compute_state::req_input_mem bytes.
|
||||
*/
|
||||
void *input;
|
||||
|
||||
/**
|
||||
* Grid number of dimensions, 1-3, e.g. the work_dim parameter passed to
|
||||
* clEnqueueNDRangeKernel. Note block[] and grid[] must be padded with
|
||||
* 1 for non-used dimensions.
|
||||
*/
|
||||
uint work_dim;
|
||||
|
||||
/**
|
||||
* Determine the layout of the working block (in thread units) to be used.
|
||||
*/
|
||||
uint block[3];
|
||||
|
||||
/**
|
||||
* last_block allows disabling threads at the farthermost grid boundary.
|
||||
* Full blocks as specified by "block" are launched, but the threads
|
||||
* outside of "last_block" dimensions are disabled.
|
||||
*
|
||||
* If a block touches the grid boundary in the i-th axis, threads with
|
||||
* THREAD_ID[i] >= last_block[i] are disabled.
|
||||
*
|
||||
* If last_block[i] is 0, it has the same behavior as last_block[i] = block[i],
|
||||
* meaning no effect.
|
||||
*
|
||||
* It's equivalent to doing this at the beginning of the compute shader:
|
||||
*
|
||||
* for (i = 0; i < 3; i++) {
|
||||
* if (block_id[i] == grid[i] - 1 &&
|
||||
* last_block[i] && thread_id[i] >= last_block[i])
|
||||
* return;
|
||||
* }
|
||||
*/
|
||||
uint last_block[3];
|
||||
|
||||
/**
|
||||
* Determine the layout of the grid (in block units) to be used.
|
||||
*/
|
||||
uint grid[3];
|
||||
|
||||
/* Indirect compute parameters resource: If not NULL, block sizes are taken
|
||||
* from this buffer instead, which is laid out as follows:
|
||||
*
|
||||
* struct {
|
||||
* uint32_t num_blocks_x;
|
||||
* uint32_t num_blocks_y;
|
||||
* uint32_t num_blocks_z;
|
||||
* };
|
||||
*/
|
||||
struct pipe_resource *indirect;
|
||||
unsigned indirect_offset; /**< must be 4 byte aligned */
|
||||
};
|
||||
|
||||
/**
|
||||
* Structure used as a header for serialized compute programs.
|
||||
*/
|
||||
struct pipe_binary_program_header
|
||||
{
|
||||
uint32_t num_bytes; /**< Number of bytes in the LLVM bytecode program. */
|
||||
char blob[];
|
||||
};
|
||||
|
||||
struct pipe_compute_state
|
||||
{
|
||||
enum pipe_shader_ir ir_type; /**< IR type contained in prog. */
|
||||
const void *prog; /**< Compute program to be executed. */
|
||||
unsigned req_local_mem; /**< Required size of the LOCAL resource. */
|
||||
unsigned req_private_mem; /**< Required size of the PRIVATE resource. */
|
||||
unsigned req_input_mem; /**< Required size of the INPUT resource. */
|
||||
};
|
||||
|
||||
/**
|
||||
* Structure that contains a callback for debug messages from the driver back
|
||||
* to the state tracker.
|
||||
*/
|
||||
struct pipe_debug_callback
|
||||
{
|
||||
/**
|
||||
* When set to \c true, the callback may be called asynchronously from a
|
||||
* driver-created thread.
|
||||
*/
|
||||
bool async;
|
||||
|
||||
/**
|
||||
* Callback for the driver to report debug/performance/etc information back
|
||||
* to the state tracker.
|
||||
*
|
||||
* \param data user-supplied data pointer
|
||||
* \param id message type identifier, if pointed value is 0, then a
|
||||
* new id is assigned
|
||||
* \param type PIPE_DEBUG_TYPE_*
|
||||
* \param format printf-style format string
|
||||
* \param args args for format string
|
||||
*/
|
||||
void (*debug_message)(void *data,
|
||||
unsigned *id,
|
||||
enum pipe_debug_type type,
|
||||
const char *fmt,
|
||||
va_list args);
|
||||
void *data;
|
||||
};
|
||||
|
||||
/**
|
||||
* Structure that contains a callback for device reset messages from the driver
|
||||
* back to the state tracker.
|
||||
*
|
||||
* The callback must not be called from driver-created threads.
|
||||
*/
|
||||
struct pipe_device_reset_callback
|
||||
{
|
||||
/**
|
||||
* Callback for the driver to report when a device reset is detected.
|
||||
*
|
||||
* \param data user-supplied data pointer
|
||||
* \param status PIPE_*_RESET
|
||||
*/
|
||||
void (*reset)(void *data, enum pipe_reset_status status);
|
||||
|
||||
void *data;
|
||||
};
|
||||
|
||||
/**
|
||||
* Information about memory usage. All sizes are in kilobytes.
|
||||
*/
|
||||
struct pipe_memory_info
|
||||
{
|
||||
unsigned total_device_memory; /**< size of device memory, e.g. VRAM */
|
||||
unsigned avail_device_memory; /**< free device memory at the moment */
|
||||
unsigned total_staging_memory; /**< size of staging memory, e.g. GART */
|
||||
unsigned avail_staging_memory; /**< free staging memory at the moment */
|
||||
unsigned device_memory_evicted; /**< size of memory evicted (monotonic counter) */
|
||||
unsigned nr_device_memory_evictions; /**< # of evictions (monotonic counter) */
|
||||
};
|
||||
|
||||
/**
|
||||
* Structure that contains information about external memory
|
||||
*/
|
||||
struct pipe_memory_object
|
||||
{
|
||||
bool dedicated;
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -77,7 +77,7 @@
|
|||
|
||||
|
||||
#include "glheader.h"
|
||||
#include "imports.h"
|
||||
|
||||
#include "accum.h"
|
||||
#include "api_exec.h"
|
||||
#include "api_loopback.h"
|
||||
|
@ -152,6 +152,7 @@
|
|||
#include "compiler/glsl/builtin_functions.h"
|
||||
#include "compiler/glsl/glsl_parser_extras.h"
|
||||
#include <stdbool.h>
|
||||
#include "util/u_memory.h"
|
||||
|
||||
|
||||
#ifndef MESA_VERBOSE
|
||||
|
@ -181,7 +182,7 @@ _mesa_notifySwapBuffers(struct gl_context *ctx)
|
|||
{
|
||||
if (MESA_VERBOSE & VERBOSE_SWAPBUFFERS)
|
||||
_mesa_debug(ctx, "SwapBuffers\n");
|
||||
FLUSH_CURRENT( ctx, 0 );
|
||||
FLUSH_VERTICES(ctx, 0);
|
||||
if (ctx->Driver.Flush) {
|
||||
ctx->Driver.Flush(ctx);
|
||||
}
|
||||
|
@ -367,15 +368,15 @@ one_time_fini(void)
|
|||
*
|
||||
* \sa _math_init().
|
||||
*/
|
||||
static void
|
||||
one_time_init( struct gl_context *ctx )
|
||||
void
|
||||
_mesa_initialize(void)
|
||||
{
|
||||
static GLbitfield api_init_mask = 0x0;
|
||||
static bool initialized;
|
||||
|
||||
mtx_lock(&OneTimeLock);
|
||||
|
||||
/* truly one-time init */
|
||||
if (!api_init_mask) {
|
||||
if (!initialized) {
|
||||
GLuint i;
|
||||
|
||||
STATIC_ASSERT(sizeof(GLbyte) == 1);
|
||||
|
@ -387,7 +388,7 @@ one_time_init( struct gl_context *ctx )
|
|||
|
||||
_mesa_locale_init();
|
||||
|
||||
_mesa_one_time_init_extension_overrides(ctx);
|
||||
_mesa_one_time_init_extension_overrides();
|
||||
|
||||
_mesa_get_cpu_features();
|
||||
|
||||
|
@ -399,7 +400,7 @@ one_time_init( struct gl_context *ctx )
|
|||
|
||||
#if defined(DEBUG)
|
||||
if (MESA_VERBOSE != 0) {
|
||||
_mesa_debug(ctx, "Mesa " PACKAGE_VERSION " DEBUG build" MESA_GIT_SHA1 "\n");
|
||||
_mesa_debug(NULL, "Mesa " PACKAGE_VERSION " DEBUG build" MESA_GIT_SHA1 "\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -407,14 +408,11 @@ one_time_init( struct gl_context *ctx )
|
|||
* unecessary creation/destruction of glsl types.
|
||||
*/
|
||||
glsl_type_singleton_init_or_ref();
|
||||
}
|
||||
|
||||
/* per-API one-time init */
|
||||
if (!(api_init_mask & (1 << ctx->API))) {
|
||||
_mesa_init_remap_table();
|
||||
}
|
||||
|
||||
api_init_mask |= 1 << ctx->API;
|
||||
initialized = true;
|
||||
|
||||
mtx_unlock(&OneTimeLock);
|
||||
}
|
||||
|
@ -754,6 +752,8 @@ _mesa_init_constants(struct gl_constants *consts, gl_api api)
|
|||
consts->ConservativeRasterDilateRange[0] = 0.0;
|
||||
consts->ConservativeRasterDilateRange[1] = 0.0;
|
||||
consts->ConservativeRasterDilateGranularity = 0.0;
|
||||
|
||||
consts->glBeginEndBufferSize = 512 * 1024;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1204,7 +1204,7 @@ _mesa_initialize_context(struct gl_context *ctx,
|
|||
_mesa_override_gl_version(ctx);
|
||||
|
||||
/* misc one-time initializations */
|
||||
one_time_init(ctx);
|
||||
_mesa_initialize();
|
||||
|
||||
/* Plug in driver functions and context pointer here.
|
||||
* This is important because when we call alloc_shared_state() below
|
||||
|
@ -1763,6 +1763,7 @@ _mesa_make_current( struct gl_context *newCtx,
|
|||
* changed since the last time this FBO was bound).
|
||||
*/
|
||||
_mesa_update_draw_buffers(newCtx);
|
||||
_mesa_update_allow_draw_out_of_order(newCtx);
|
||||
}
|
||||
if (!newCtx->ReadBuffer || _mesa_is_winsys_fbo(newCtx->ReadBuffer)) {
|
||||
_mesa_reference_framebuffer(&newCtx->ReadBuffer, readBuffer);
|
||||
|
@ -1874,7 +1875,6 @@ void
|
|||
_mesa_flush(struct gl_context *ctx)
|
||||
{
|
||||
FLUSH_VERTICES( ctx, 0 );
|
||||
FLUSH_CURRENT( ctx, 0 );
|
||||
if (ctx->Driver.Flush) {
|
||||
ctx->Driver.Flush(ctx);
|
||||
}
|
||||
|
@ -1895,7 +1895,6 @@ _mesa_Finish(void)
|
|||
ASSERT_OUTSIDE_BEGIN_END(ctx);
|
||||
|
||||
FLUSH_VERTICES(ctx, 0);
|
||||
FLUSH_CURRENT(ctx, 0);
|
||||
|
||||
if (ctx->Driver.Finish) {
|
||||
ctx->Driver.Finish(ctx);
|
||||
|
|
|
@ -50,7 +50,7 @@
|
|||
|
||||
|
||||
#include "errors.h"
|
||||
#include "imports.h"
|
||||
|
||||
#include "extensions.h"
|
||||
#include "mtypes.h"
|
||||
#include "vbo/vbo.h"
|
||||
|
@ -66,7 +66,7 @@ struct _glapi_table;
|
|||
|
||||
/** \name Visual-related functions */
|
||||
/*@{*/
|
||||
|
||||
|
||||
extern struct gl_config *
|
||||
_mesa_create_visual( GLboolean dbFlag,
|
||||
GLboolean stereoFlag,
|
||||
|
@ -107,6 +107,9 @@ _mesa_destroy_visual( struct gl_config *vis );
|
|||
/** \name Context-related functions */
|
||||
/*@{*/
|
||||
|
||||
extern void
|
||||
_mesa_initialize(void);
|
||||
|
||||
extern GLboolean
|
||||
_mesa_initialize_context( struct gl_context *ctx,
|
||||
gl_api api,
|
||||
|
@ -244,14 +247,20 @@ do { \
|
|||
do { \
|
||||
if (MESA_VERBOSE & VERBOSE_STATE) \
|
||||
_mesa_debug(ctx, "FLUSH_FOR_DRAW in %s\n", __func__); \
|
||||
if (ctx->Driver.NeedFlush) \
|
||||
vbo_exec_FlushVertices(ctx, ctx->Driver.NeedFlush); \
|
||||
if (ctx->Driver.NeedFlush) { \
|
||||
if (ctx->_AllowDrawOutOfOrder) { \
|
||||
if (ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) \
|
||||
vbo_exec_FlushVertices(ctx, FLUSH_UPDATE_CURRENT); \
|
||||
} else { \
|
||||
vbo_exec_FlushVertices(ctx, ctx->Driver.NeedFlush); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* Macro to assert that the API call was made outside the
|
||||
* glBegin()/glEnd() pair, with return value.
|
||||
*
|
||||
*
|
||||
* \param ctx GL context.
|
||||
* \param retval value to return in case the assertion fails.
|
||||
*/
|
||||
|
@ -266,7 +275,7 @@ do { \
|
|||
/**
|
||||
* Macro to assert that the API call was made outside the
|
||||
* glBegin()/glEnd() pair.
|
||||
*
|
||||
*
|
||||
* \param ctx GL context.
|
||||
*/
|
||||
#define ASSERT_OUTSIDE_BEGIN_END(ctx) \
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include "glheader.h"
|
||||
#include "formats.h"
|
||||
#include "menums.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
|
||||
struct gl_bitmap_atlas;
|
||||
struct gl_buffer_object;
|
||||
|
@ -70,6 +71,9 @@ struct _mesa_index_buffer;
|
|||
*/
|
||||
#define MESA_MAP_NOWAIT_BIT 0x4000
|
||||
|
||||
/* Mapping a buffer is allowed from any thread. */
|
||||
#define MESA_MAP_THREAD_SAFE_BIT 0x8000
|
||||
|
||||
|
||||
/**
|
||||
* Device driver function table.
|
||||
|
@ -450,7 +454,8 @@ struct dd_function_table {
|
|||
*/
|
||||
/*@{*/
|
||||
/** Allocate a new program */
|
||||
struct gl_program * (*NewProgram)(struct gl_context *ctx, GLenum target,
|
||||
struct gl_program * (*NewProgram)(struct gl_context *ctx,
|
||||
gl_shader_stage stage,
|
||||
GLuint id, bool is_arb_asm);
|
||||
/** Delete a program */
|
||||
void (*DeleteProgram)(struct gl_context *ctx, struct gl_program *prog);
|
||||
|
@ -528,6 +533,8 @@ struct dd_function_table {
|
|||
* \param index_bounds_valid are min_index and max_index valid?
|
||||
* \param min_index lowest vertex index used
|
||||
* \param max_index highest vertex index used
|
||||
* \param num_instances instance count from ARB_draw_instanced
|
||||
* \param base_instance base instance from ARB_base_instance
|
||||
* \param tfb_vertcount if non-null, indicates which transform feedback
|
||||
* object has the vertex count.
|
||||
* \param tfb_stream If called via DrawTransformFeedbackStream, specifies
|
||||
|
@ -542,8 +549,9 @@ struct dd_function_table {
|
|||
const struct _mesa_index_buffer *ib,
|
||||
GLboolean index_bounds_valid,
|
||||
GLuint min_index, GLuint max_index,
|
||||
GLuint num_instances, GLuint base_instance,
|
||||
struct gl_transform_feedback_object *tfb_vertcount,
|
||||
unsigned tfb_stream, struct gl_buffer_object *indirect);
|
||||
unsigned tfb_stream);
|
||||
|
||||
|
||||
/**
|
||||
|
|
|
@ -42,21 +42,26 @@ struct gl_context;
|
|||
|
||||
struct _mesa_prim
|
||||
{
|
||||
GLuint mode:8; /**< GL_POINTS, GL_LINES, GL_QUAD_STRIP, etc */
|
||||
GLuint indexed:1;
|
||||
GLuint begin:1;
|
||||
GLuint end:1;
|
||||
GLuint is_indirect:1;
|
||||
GLuint pad:20;
|
||||
GLubyte mode; /**< GL_POINTS, GL_LINES, GL_QUAD_STRIP, etc */
|
||||
|
||||
/**
|
||||
* tnl: If true, line stipple emulation will reset the pattern walker.
|
||||
* vbo: If false and the primitive is a line loop, the first vertex is
|
||||
* the beginning of the line loop and it won't be drawn.
|
||||
* Instead, it will be moved to the end.
|
||||
*/
|
||||
bool begin;
|
||||
|
||||
/**
|
||||
* tnl: If true and the primitive is a line loop, it will be closed.
|
||||
* vbo: Same as tnl.
|
||||
*/
|
||||
bool end;
|
||||
|
||||
GLuint start;
|
||||
GLuint count;
|
||||
GLint basevertex;
|
||||
GLuint num_instances;
|
||||
GLuint base_instance;
|
||||
GLuint draw_id;
|
||||
|
||||
GLsizeiptr indirect_offset;
|
||||
};
|
||||
|
||||
/* Would like to call this a "vbo_index_buffer", but this would be
|
||||
|
@ -66,7 +71,7 @@ struct _mesa_prim
|
|||
struct _mesa_index_buffer
|
||||
{
|
||||
GLuint count;
|
||||
unsigned index_size;
|
||||
uint8_t index_size_shift; /* logbase2(index_size) */
|
||||
struct gl_buffer_object *obj;
|
||||
const void *ptr;
|
||||
};
|
||||
|
@ -76,25 +81,88 @@ void
|
|||
_mesa_initialize_exec_dispatch(const struct gl_context *ctx,
|
||||
struct _glapi_table *exec);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_EvalMesh1(GLenum mode, GLint i1, GLint i2);
|
||||
|
||||
void
|
||||
_mesa_draw_indirect(struct gl_context *ctx, GLuint mode,
|
||||
struct gl_buffer_object *indirect_data,
|
||||
GLsizeiptr indirect_offset, unsigned draw_count,
|
||||
unsigned stride,
|
||||
struct gl_buffer_object *indirect_draw_count_buffer,
|
||||
GLsizeiptr indirect_draw_count_offset,
|
||||
const struct _mesa_index_buffer *ib);
|
||||
void GLAPIENTRY
|
||||
_mesa_EvalMesh2(GLenum mode, GLint i1, GLint i2, GLint j1, GLint j2);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_DrawElementsInstancedARB(GLenum mode, GLsizei count, GLenum type,
|
||||
const GLvoid * indices, GLsizei numInstances);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_DrawArraysInstancedBaseInstance(GLenum mode, GLint first,
|
||||
GLsizei count, GLsizei numInstances,
|
||||
GLuint baseInstance);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_DrawElementsInstancedBaseVertex(GLenum mode, GLsizei count,
|
||||
GLenum type, const GLvoid * indices,
|
||||
GLsizei numInstances,
|
||||
GLint basevertex);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_DrawElementsInstancedBaseInstance(GLenum mode, GLsizei count,
|
||||
GLenum type,
|
||||
const GLvoid *indices,
|
||||
GLsizei numInstances,
|
||||
GLuint baseInstance);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_DrawTransformFeedbackStream(GLenum mode, GLuint name, GLuint stream);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_DrawTransformFeedbackInstanced(GLenum mode, GLuint name,
|
||||
GLsizei primcount);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_DrawTransformFeedbackStreamInstanced(GLenum mode, GLuint name,
|
||||
GLuint stream,
|
||||
GLsizei primcount);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_DrawArraysIndirect(GLenum mode, const GLvoid *indirect);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_DrawElementsIndirect(GLenum mode, GLenum type, const GLvoid *indirect);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_MultiDrawArraysIndirect(GLenum mode, const GLvoid *indirect,
|
||||
GLsizei primcount, GLsizei stride);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_MultiDrawElementsIndirect(GLenum mode, GLenum type,
|
||||
const GLvoid *indirect,
|
||||
GLsizei primcount, GLsizei stride);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_MultiDrawArraysIndirectCountARB(GLenum mode, GLintptr indirect,
|
||||
GLintptr drawcount_offset,
|
||||
GLsizei maxdrawcount, GLsizei stride);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_MultiDrawElementsIndirectCountARB(GLenum mode, GLenum type,
|
||||
GLintptr indirect,
|
||||
GLintptr drawcount_offset,
|
||||
GLsizei maxdrawcount, GLsizei stride);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_DrawArrays(GLenum mode, GLint first, GLsizei count);
|
||||
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_DrawArraysInstanced(GLenum mode, GLint first, GLsizei count,
|
||||
GLsizei primcount);
|
||||
_mesa_DrawArraysInstancedARB(GLenum mode, GLint first, GLsizei count,
|
||||
GLsizei primcount);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_DrawElementsInstancedBaseVertexBaseInstance(GLenum mode,
|
||||
GLsizei count,
|
||||
GLenum type,
|
||||
const GLvoid *indices,
|
||||
GLsizei numInstances,
|
||||
GLint basevertex,
|
||||
GLuint baseInstance);
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_DrawElements(GLenum mode, GLsizei count, GLenum type,
|
||||
|
|
|
@ -47,7 +47,7 @@ struct gl_extensions;
|
|||
|
||||
extern void _mesa_enable_sw_extensions(struct gl_context *ctx);
|
||||
|
||||
extern void _mesa_one_time_init_extension_overrides(struct gl_context *ctx);
|
||||
extern void _mesa_one_time_init_extension_overrides(void);
|
||||
|
||||
extern void _mesa_init_extensions(struct gl_extensions *extentions);
|
||||
|
||||
|
|
|
@ -234,7 +234,7 @@ EXT(EXT_draw_buffers , dummy_true
|
|||
EXT(EXT_draw_buffers2 , EXT_draw_buffers2 , GLL, GLC, x , x , 2006)
|
||||
EXT(EXT_draw_buffers_indexed , ARB_draw_buffers_blend , x , x , x , 30, 2014)
|
||||
EXT(EXT_draw_elements_base_vertex , ARB_draw_elements_base_vertex , x , x , x , ES2, 2014)
|
||||
EXT(EXT_draw_instanced , ARB_draw_instanced , GLL, GLC, x , x , 2006)
|
||||
EXT(EXT_draw_instanced , ARB_draw_instanced , GLL, GLC, x , ES2 , 2006)
|
||||
EXT(EXT_draw_range_elements , dummy_true , GLL, x , x , x , 1997)
|
||||
EXT(EXT_float_blend , EXT_float_blend , x , x , x , 30, 2015)
|
||||
EXT(EXT_fog_coord , dummy_true , GLL, x , x , x , 1999)
|
||||
|
@ -343,6 +343,7 @@ EXT(IBM_texture_mirrored_repeat , dummy_true
|
|||
|
||||
EXT(INGR_blend_func_separate , EXT_blend_func_separate , GLL, x , x , x , 1999)
|
||||
|
||||
EXT(INTEL_blackhole_render , INTEL_blackhole_render , 30, 30, x , ES2, 2018)
|
||||
EXT(INTEL_conservative_rasterization , INTEL_conservative_rasterization , x , GLC, x , 31, 2013)
|
||||
EXT(INTEL_performance_query , INTEL_performance_query , GLL, GLC, x , ES2, 2013)
|
||||
EXT(INTEL_shader_atomic_float_minmax , INTEL_shader_atomic_float_minmax , GLL, GLC, x , x , 2018)
|
||||
|
@ -370,6 +371,7 @@ EXT(MESA_ycbcr_texture , MESA_ycbcr_texture
|
|||
|
||||
EXT(NVX_gpu_memory_info , NVX_gpu_memory_info , GLL, GLC, x , x , 2013)
|
||||
|
||||
EXT(NV_alpha_to_coverage_dither_control , NV_alpha_to_coverage_dither_control , GLL, GLC, x , ES2, 2017)
|
||||
EXT(NV_blend_square , dummy_true , GLL, x , x , x , 1999)
|
||||
EXT(NV_compute_shader_derivatives , NV_compute_shader_derivatives , GLL, GLC, x , 32, 2018)
|
||||
EXT(NV_conditional_render , NV_conditional_render , GLL, GLC, x , ES2, 2008)
|
||||
|
@ -377,6 +379,7 @@ EXT(NV_conservative_raster , NV_conservative_raster
|
|||
EXT(NV_conservative_raster_dilate , NV_conservative_raster_dilate , GLL, GLC, ES1, ES2, 2015)
|
||||
EXT(NV_conservative_raster_pre_snap , NV_conservative_raster_pre_snap , GLL, GLC, ES1, ES2, 2017)
|
||||
EXT(NV_conservative_raster_pre_snap_triangles, NV_conservative_raster_pre_snap_triangles, GLL, GLC, ES1, ES2, 2015)
|
||||
EXT(NV_copy_image , NV_copy_image , GLL, GLC, x , x, 2009)
|
||||
EXT(NV_depth_clamp , ARB_depth_clamp , GLL, GLC, x , x , 2001)
|
||||
EXT(NV_draw_buffers , dummy_true , x , x , x , ES2, 2011)
|
||||
EXT(NV_fbo_color_attachments , dummy_true , x , x , x , ES2, 2010)
|
||||
|
@ -386,6 +389,7 @@ EXT(NV_fragment_shader_interlock , ARB_fragment_shader_interlock
|
|||
EXT(NV_image_formats , ARB_shader_image_load_store , x , x , x , 31, 2014)
|
||||
EXT(NV_light_max_exponent , dummy_true , GLL, x , x , x , 1999)
|
||||
EXT(NV_packed_depth_stencil , dummy_true , GLL, GLC, x , x , 2000)
|
||||
EXT(NV_pixel_buffer_object , EXT_pixel_buffer_object , x , x , x , ES2, 2012)
|
||||
EXT(NV_point_sprite , NV_point_sprite , GLL, GLC, x , x , 2001)
|
||||
EXT(NV_primitive_restart , NV_primitive_restart , GLL, x , x , x , 2002)
|
||||
EXT(NV_read_buffer , dummy_true , x , x , x , ES2, 2011)
|
||||
|
@ -399,6 +403,8 @@ EXT(NV_texture_barrier , NV_texture_barrier
|
|||
EXT(NV_texture_env_combine4 , NV_texture_env_combine4 , GLL, x , x , x , 1999)
|
||||
EXT(NV_texture_rectangle , NV_texture_rectangle , GLL, x , x , x , 2000)
|
||||
EXT(NV_vdpau_interop , NV_vdpau_interop , GLL, GLC, x , x , 2010)
|
||||
EXT(NV_viewport_array2 , NV_viewport_array2 , GLL, GLC, x , 31, 2015)
|
||||
EXT(NV_viewport_swizzle , NV_viewport_swizzle , GLL, GLC, x , 31, 2015)
|
||||
|
||||
EXT(OES_EGL_image , OES_EGL_image , GLL, GLC, ES1, ES2, 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
|
||||
EXT(OES_EGL_image_external , OES_EGL_image_external , x , x , ES1, ES2, 2010)
|
||||
|
|
|
@ -619,8 +619,12 @@ typedef enum pipe_format mesa_format;
|
|||
/* Packed to array format adapters */
|
||||
#if UTIL_ARCH_LITTLE_ENDIAN
|
||||
#define MESA_FORMAT_RGBA_UINT8 MESA_FORMAT_R8G8B8A8_UINT
|
||||
#define MESA_FORMAT_RGBA_UNORM8 MESA_FORMAT_R8G8B8A8_UNORM
|
||||
#define MESA_FORMAT_RGBA_SNORM8 MESA_FORMAT_R8G8B8A8_SNORM
|
||||
#else
|
||||
#define MESA_FORMAT_RGBA_UINT8 MESA_FORMAT_A8B8G8R8_UINT
|
||||
#define MESA_FORMAT_RGBA_UNORM8 MESA_FORMAT_A8B8G8R8_UNORM
|
||||
#define MESA_FORMAT_RGBA_SNORM8 MESA_FORMAT_A8B8G8R8_SNORM
|
||||
#endif
|
||||
|
||||
extern const char *
|
||||
|
|
|
@ -0,0 +1,142 @@
|
|||
/*
|
||||
* Copyright © 2012 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _GLTHREAD_H
|
||||
#define _GLTHREAD_H
|
||||
|
||||
/* The size of one batch and the maximum size of one call.
|
||||
*
|
||||
* This should be as low as possible, so that:
|
||||
* - multiple synchronizations within a frame don't slow us down much
|
||||
* - a smaller number of calls per frame can still get decent parallelism
|
||||
* - the memory footprint of the queue is low, and with that comes a lower
|
||||
* chance of experiencing CPU cache thrashing
|
||||
* but it should be high enough so that u_queue overhead remains negligible.
|
||||
*/
|
||||
#define MARSHAL_MAX_CMD_SIZE (8 * 1024)
|
||||
|
||||
/* The number of batch slots in memory.
|
||||
*
|
||||
* One batch is being executed, one batch is being filled, the rest are
|
||||
* waiting batches. There must be at least 1 slot for a waiting batch,
|
||||
* so the minimum number of batches is 3.
|
||||
*/
|
||||
#define MARSHAL_MAX_BATCHES 8
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdbool.h>
|
||||
#include "util/u_queue.h"
|
||||
#include "GL/gl.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
|
||||
struct gl_context;
|
||||
struct _mesa_HashTable;
|
||||
|
||||
struct glthread_vao {
|
||||
GLuint Name;
|
||||
GLuint CurrentElementBufferName;
|
||||
GLbitfield Enabled;
|
||||
GLbitfield UserPointerMask;
|
||||
};
|
||||
|
||||
/** A single batch of commands queued up for execution. */
|
||||
struct glthread_batch
|
||||
{
|
||||
/** Batch fence for waiting for the execution to finish. */
|
||||
struct util_queue_fence fence;
|
||||
|
||||
/** The worker thread will access the context with this. */
|
||||
struct gl_context *ctx;
|
||||
|
||||
/** Amount of data used by batch commands, in bytes. */
|
||||
int used;
|
||||
|
||||
/** Data contained in the command buffer. */
|
||||
#ifdef _MSC_VER
|
||||
__declspec(align(8))
|
||||
#else
|
||||
__attribute__((aligned(8)))
|
||||
#endif
|
||||
uint8_t buffer[MARSHAL_MAX_CMD_SIZE];
|
||||
};
|
||||
|
||||
struct glthread_state
|
||||
{
|
||||
/** Multithreaded queue. */
|
||||
struct util_queue queue;
|
||||
|
||||
/** This is sent to the driver for framebuffer overlay / HUD. */
|
||||
struct util_queue_monitoring stats;
|
||||
|
||||
/** Whether GLThread is enabled. */
|
||||
bool enabled;
|
||||
|
||||
/** The ring of batches in memory. */
|
||||
struct glthread_batch batches[MARSHAL_MAX_BATCHES];
|
||||
|
||||
/** Pointer to the batch currently being filled. */
|
||||
struct glthread_batch *next_batch;
|
||||
|
||||
/** Index of the last submitted batch. */
|
||||
unsigned last;
|
||||
|
||||
/** Index of the batch being filled and about to be submitted. */
|
||||
unsigned next;
|
||||
|
||||
/** Vertex Array objects tracked by glthread independently of Mesa. */
|
||||
struct _mesa_HashTable *VAOs;
|
||||
struct glthread_vao *CurrentVAO;
|
||||
struct glthread_vao *LastLookedUpVAO;
|
||||
struct glthread_vao DefaultVAO;
|
||||
int ClientActiveTexture;
|
||||
|
||||
/** Currently-bound buffer object IDs. */
|
||||
GLuint CurrentArrayBufferName;
|
||||
GLuint CurrentDrawIndirectBufferName;
|
||||
};
|
||||
|
||||
void _mesa_glthread_init(struct gl_context *ctx);
|
||||
void _mesa_glthread_destroy(struct gl_context *ctx);
|
||||
|
||||
void _mesa_glthread_restore_dispatch(struct gl_context *ctx, const char *func);
|
||||
void _mesa_glthread_disable(struct gl_context *ctx, const char *func);
|
||||
void _mesa_glthread_flush_batch(struct gl_context *ctx);
|
||||
void _mesa_glthread_finish(struct gl_context *ctx);
|
||||
void _mesa_glthread_finish_before(struct gl_context *ctx, const char *func);
|
||||
|
||||
void _mesa_glthread_BindBuffer(struct gl_context *ctx, GLenum target,
|
||||
GLuint buffer);
|
||||
void _mesa_glthread_DeleteBuffers(struct gl_context *ctx, GLsizei n,
|
||||
const GLuint *buffers);
|
||||
|
||||
void _mesa_glthread_BindVertexArray(struct gl_context *ctx, GLuint id);
|
||||
void _mesa_glthread_DeleteVertexArrays(struct gl_context *ctx,
|
||||
GLsizei n, const GLuint *ids);
|
||||
void _mesa_glthread_GenVertexArrays(struct gl_context *ctx,
|
||||
GLsizei n, GLuint *arrays);
|
||||
void _mesa_glthread_ClientState(struct gl_context *ctx, GLuint *vaobj,
|
||||
gl_vert_attrib attrib, bool enable);
|
||||
void _mesa_glthread_AttribPointer(struct gl_context *ctx,
|
||||
gl_vert_attrib attrib);
|
||||
|
||||
#endif /* _GLTHREAD_H*/
|
|
@ -1,6 +1,6 @@
|
|||
/**
|
||||
* \file hash.h
|
||||
* Generic hash table.
|
||||
* Generic hash table.
|
||||
*/
|
||||
|
||||
/*
|
||||
|
@ -32,8 +32,10 @@
|
|||
#define HASH_H
|
||||
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include "glheader.h"
|
||||
#include "imports.h"
|
||||
|
||||
#include "c11/threads.h"
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,234 +0,0 @@
|
|||
/**
|
||||
* \file imports.c
|
||||
* Standard C library function wrappers.
|
||||
*
|
||||
* Imports are services which the device driver or window system or
|
||||
* operating system provides to the core renderer. The core renderer (Mesa)
|
||||
* will call these functions in order to do memory allocation, simple I/O,
|
||||
* etc.
|
||||
*
|
||||
* Some drivers will want to override/replace this file with something
|
||||
* specialized, but that'll be rare.
|
||||
*
|
||||
* Eventually, I want to move roll the glheader.h file into this.
|
||||
*
|
||||
* \todo Functions still needed:
|
||||
* - scanf
|
||||
* - qsort
|
||||
* - rand and RAND_MAX
|
||||
*/
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include "c99_math.h"
|
||||
#include "imports.h"
|
||||
#include "context.h"
|
||||
#include "version.h"
|
||||
|
||||
#ifdef _GNU_SOURCE
|
||||
#include <locale.h>
|
||||
#ifdef __APPLE__
|
||||
#include <xlocale.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
#define vsnprintf _vsnprintf
|
||||
#elif defined(__IBMC__) || defined(__IBMCPP__)
|
||||
extern int vsnprintf(char *str, size_t count, const char *fmt, va_list arg);
|
||||
#endif
|
||||
|
||||
/**********************************************************************/
|
||||
/** \name Memory */
|
||||
/*@{*/
|
||||
|
||||
/**
|
||||
* Allocate aligned memory.
|
||||
*
|
||||
* \param bytes number of bytes to allocate.
|
||||
* \param alignment alignment (must be greater than zero).
|
||||
*
|
||||
* Allocates extra memory to accommodate rounding up the address for
|
||||
* alignment and to record the real malloc address.
|
||||
*
|
||||
* \sa _mesa_align_free().
|
||||
*/
|
||||
void *
|
||||
_mesa_align_malloc(size_t bytes, unsigned long alignment)
|
||||
{
|
||||
#if defined(HAVE_POSIX_MEMALIGN)
|
||||
void *mem;
|
||||
int err = posix_memalign(& mem, alignment, bytes);
|
||||
if (err)
|
||||
return NULL;
|
||||
return mem;
|
||||
#elif defined(_WIN32)
|
||||
return _aligned_malloc(bytes, alignment);
|
||||
#else
|
||||
uintptr_t ptr, buf;
|
||||
|
||||
assert( alignment > 0 );
|
||||
|
||||
ptr = (uintptr_t)malloc(bytes + alignment + sizeof(void *));
|
||||
if (!ptr)
|
||||
return NULL;
|
||||
|
||||
buf = (ptr + alignment + sizeof(void *)) & ~(uintptr_t)(alignment - 1);
|
||||
*(uintptr_t *)(buf - sizeof(void *)) = ptr;
|
||||
|
||||
#ifndef NDEBUG
|
||||
/* mark the non-aligned area */
|
||||
while ( ptr < buf - sizeof(void *) ) {
|
||||
*(unsigned long *)ptr = 0xcdcdcdcd;
|
||||
ptr += sizeof(unsigned long);
|
||||
}
|
||||
#endif
|
||||
|
||||
return (void *) buf;
|
||||
#endif /* defined(HAVE_POSIX_MEMALIGN) */
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as _mesa_align_malloc(), but using calloc(1, ) instead of
|
||||
* malloc()
|
||||
*/
|
||||
void *
|
||||
_mesa_align_calloc(size_t bytes, unsigned long alignment)
|
||||
{
|
||||
#if defined(HAVE_POSIX_MEMALIGN)
|
||||
void *mem;
|
||||
|
||||
mem = _mesa_align_malloc(bytes, alignment);
|
||||
if (mem != NULL) {
|
||||
(void) memset(mem, 0, bytes);
|
||||
}
|
||||
|
||||
return mem;
|
||||
#elif defined(_WIN32)
|
||||
void *mem;
|
||||
|
||||
mem = _aligned_malloc(bytes, alignment);
|
||||
if (mem != NULL) {
|
||||
(void) memset(mem, 0, bytes);
|
||||
}
|
||||
|
||||
return mem;
|
||||
#else
|
||||
uintptr_t ptr, buf;
|
||||
|
||||
assert( alignment > 0 );
|
||||
|
||||
ptr = (uintptr_t)calloc(1, bytes + alignment + sizeof(void *));
|
||||
if (!ptr)
|
||||
return NULL;
|
||||
|
||||
buf = (ptr + alignment + sizeof(void *)) & ~(uintptr_t)(alignment - 1);
|
||||
*(uintptr_t *)(buf - sizeof(void *)) = ptr;
|
||||
|
||||
#ifndef NDEBUG
|
||||
/* mark the non-aligned area */
|
||||
while ( ptr < buf - sizeof(void *) ) {
|
||||
*(unsigned long *)ptr = 0xcdcdcdcd;
|
||||
ptr += sizeof(unsigned long);
|
||||
}
|
||||
#endif
|
||||
|
||||
return (void *)buf;
|
||||
#endif /* defined(HAVE_POSIX_MEMALIGN) */
|
||||
}
|
||||
|
||||
/**
|
||||
* Free memory which was allocated with either _mesa_align_malloc()
|
||||
* or _mesa_align_calloc().
|
||||
* \param ptr pointer to the memory to be freed.
|
||||
* The actual address to free is stored in the word immediately before the
|
||||
* address the client sees.
|
||||
* Note that it is legal to pass NULL pointer to this function and will be
|
||||
* handled accordingly.
|
||||
*/
|
||||
void
|
||||
_mesa_align_free(void *ptr)
|
||||
{
|
||||
#if defined(HAVE_POSIX_MEMALIGN)
|
||||
free(ptr);
|
||||
#elif defined(_WIN32)
|
||||
_aligned_free(ptr);
|
||||
#else
|
||||
if (ptr) {
|
||||
void **cubbyHole = (void **) ((char *) ptr - sizeof(void *));
|
||||
void *realAddr = *cubbyHole;
|
||||
free(realAddr);
|
||||
}
|
||||
#endif /* defined(HAVE_POSIX_MEMALIGN) */
|
||||
}
|
||||
|
||||
/**
|
||||
* Reallocate memory, with alignment.
|
||||
*/
|
||||
void *
|
||||
_mesa_align_realloc(void *oldBuffer, size_t oldSize, size_t newSize,
|
||||
unsigned long alignment)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
(void) oldSize;
|
||||
return _aligned_realloc(oldBuffer, newSize, alignment);
|
||||
#else
|
||||
const size_t copySize = (oldSize < newSize) ? oldSize : newSize;
|
||||
void *newBuf = _mesa_align_malloc(newSize, alignment);
|
||||
if (newBuf && oldBuffer && copySize > 0) {
|
||||
memcpy(newBuf, oldBuffer, copySize);
|
||||
}
|
||||
|
||||
_mesa_align_free(oldBuffer);
|
||||
return newBuf;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*@}*/
|
||||
|
||||
|
||||
/** Needed due to #ifdef's, above. */
|
||||
int
|
||||
_mesa_vsnprintf(char *str, size_t size, const char *fmt, va_list args)
|
||||
{
|
||||
return vsnprintf( str, size, fmt, args);
|
||||
}
|
||||
|
||||
/** Wrapper around vsnprintf() */
|
||||
int
|
||||
_mesa_snprintf( char *str, size_t size, const char *fmt, ... )
|
||||
{
|
||||
int r;
|
||||
va_list args;
|
||||
va_start( args, fmt );
|
||||
r = vsnprintf( str, size, fmt, args );
|
||||
va_end( args );
|
||||
return r;
|
||||
}
|
||||
|
||||
|
|
@ -1,321 +0,0 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* \file imports.h
|
||||
* Standard C library function wrappers.
|
||||
*
|
||||
* This file provides wrappers for all the standard C library functions
|
||||
* like malloc(), free(), printf(), getenv(), etc.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef IMPORTS_H
|
||||
#define IMPORTS_H
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
#include "compiler.h"
|
||||
#include "glheader.h"
|
||||
#include "util/bitscan.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/**********************************************************************/
|
||||
/** Memory macros */
|
||||
/*@{*/
|
||||
|
||||
/** Allocate a structure of type \p T */
|
||||
#define MALLOC_STRUCT(T) (struct T *) malloc(sizeof(struct T))
|
||||
/** Allocate and zero a structure of type \p T */
|
||||
#define CALLOC_STRUCT(T) (struct T *) calloc(1, sizeof(struct T))
|
||||
|
||||
/*@}*/
|
||||
|
||||
|
||||
/*
|
||||
* For GL_ARB_vertex_buffer_object we need to treat vertex array pointers
|
||||
* as offsets into buffer stores. Since the vertex array pointer and
|
||||
* buffer store pointer are both pointers and we need to add them, we use
|
||||
* this macro.
|
||||
* Both pointers/offsets are expressed in bytes.
|
||||
*/
|
||||
#define ADD_POINTERS(A, B) ( (GLubyte *) (A) + (uintptr_t) (B) )
|
||||
|
||||
|
||||
/**
|
||||
* Sometimes we treat GLfloats as GLints. On x86 systems, moving a float
|
||||
* as an int (thereby using integer registers instead of FP registers) is
|
||||
* a performance win. Typically, this can be done with ordinary casts.
|
||||
* But with gcc's -fstrict-aliasing flag (which defaults to on in gcc 3.0)
|
||||
* these casts generate warnings.
|
||||
* The following union typedef is used to solve that.
|
||||
*/
|
||||
typedef union { GLfloat f; GLint i; GLuint u; } fi_type;
|
||||
|
||||
|
||||
|
||||
/*@}*/
|
||||
|
||||
|
||||
/***
|
||||
*** LOG2: Log base 2 of float
|
||||
***/
|
||||
static inline GLfloat LOG2(GLfloat x)
|
||||
{
|
||||
#if 0
|
||||
/* This is pretty fast, but not accurate enough (only 2 fractional bits).
|
||||
* Based on code from http://www.stereopsis.com/log2.html
|
||||
*/
|
||||
const GLfloat y = x * x * x * x;
|
||||
const GLuint ix = *((GLuint *) &y);
|
||||
const GLuint exp = (ix >> 23) & 0xFF;
|
||||
const GLint log2 = ((GLint) exp) - 127;
|
||||
return (GLfloat) log2 * (1.0 / 4.0); /* 4, because of x^4 above */
|
||||
#endif
|
||||
/* Pretty fast, and accurate.
|
||||
* Based on code from http://www.flipcode.com/totd/
|
||||
*/
|
||||
fi_type num;
|
||||
GLint log_2;
|
||||
num.f = x;
|
||||
log_2 = ((num.i >> 23) & 255) - 128;
|
||||
num.i &= ~(255 << 23);
|
||||
num.i += 127 << 23;
|
||||
num.f = ((-1.0f/3) * num.f + 2) * num.f - 2.0f/3;
|
||||
return num.f + log_2;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* finite macro.
|
||||
*/
|
||||
#if defined(_MSC_VER)
|
||||
# define finite _finite
|
||||
#endif
|
||||
|
||||
|
||||
/***
|
||||
*** IS_INF_OR_NAN: test if float is infinite or NaN
|
||||
***/
|
||||
#if defined(isfinite)
|
||||
#define IS_INF_OR_NAN(x) (!isfinite(x))
|
||||
#elif defined(finite)
|
||||
#define IS_INF_OR_NAN(x) (!finite(x))
|
||||
#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
|
||||
#define IS_INF_OR_NAN(x) (!isfinite(x))
|
||||
#else
|
||||
#define IS_INF_OR_NAN(x) (!finite(x))
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Convert float to int by rounding to nearest integer, away from zero.
|
||||
*/
|
||||
static inline int IROUND(float f)
|
||||
{
|
||||
return (int) ((f >= 0.0F) ? (f + 0.5F) : (f - 0.5F));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert double to int by rounding to nearest integer, away from zero.
|
||||
*/
|
||||
static inline int IROUNDD(double d)
|
||||
{
|
||||
return (int) ((d >= 0.0) ? (d + 0.5) : (d - 0.5));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert float to int64 by rounding to nearest integer.
|
||||
*/
|
||||
static inline GLint64 IROUND64(float f)
|
||||
{
|
||||
return (GLint64) ((f >= 0.0F) ? (f + 0.5F) : (f - 0.5F));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convert positive float to int by rounding to nearest integer.
|
||||
*/
|
||||
static inline int IROUND_POS(float f)
|
||||
{
|
||||
assert(f >= 0.0F);
|
||||
return (int) (f + 0.5F);
|
||||
}
|
||||
|
||||
/** Return (as an integer) floor of float */
|
||||
static inline int IFLOOR(float f)
|
||||
{
|
||||
#if defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__)
|
||||
/*
|
||||
* IEEE floor for computers that round to nearest or even.
|
||||
* 'f' must be between -4194304 and 4194303.
|
||||
* This floor operation is done by "(iround(f + .5) + iround(f - .5)) >> 1",
|
||||
* but uses some IEEE specific tricks for better speed.
|
||||
* Contributed by Josh Vanderhoof
|
||||
*/
|
||||
int ai, bi;
|
||||
double af, bf;
|
||||
af = (3 << 22) + 0.5 + (double)f;
|
||||
bf = (3 << 22) + 0.5 - (double)f;
|
||||
/* GCC generates an extra fstp/fld without this. */
|
||||
__asm__ ("fstps %0" : "=m" (ai) : "t" (af) : "st");
|
||||
__asm__ ("fstps %0" : "=m" (bi) : "t" (bf) : "st");
|
||||
return (ai - bi) >> 1;
|
||||
#else
|
||||
int ai, bi;
|
||||
double af, bf;
|
||||
fi_type u;
|
||||
af = (3 << 22) + 0.5 + (double)f;
|
||||
bf = (3 << 22) + 0.5 - (double)f;
|
||||
u.f = (float) af; ai = u.i;
|
||||
u.f = (float) bf; bi = u.i;
|
||||
return (ai - bi) >> 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Is x a power of two?
|
||||
*/
|
||||
static inline int
|
||||
_mesa_is_pow_two(int x)
|
||||
{
|
||||
return !(x & (x - 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Round given integer to next higer power of two
|
||||
* If X is zero result is undefined.
|
||||
*
|
||||
* Source for the fallback implementation is
|
||||
* Sean Eron Anderson's webpage "Bit Twiddling Hacks"
|
||||
* http://graphics.stanford.edu/~seander/bithacks.html
|
||||
*
|
||||
* When using builtin function have to do some work
|
||||
* for case when passed values 1 to prevent hiting
|
||||
* undefined result from __builtin_clz. Undefined
|
||||
* results would be different depending on optimization
|
||||
* level used for build.
|
||||
*/
|
||||
static inline int32_t
|
||||
_mesa_next_pow_two_32(uint32_t x)
|
||||
{
|
||||
#ifdef HAVE___BUILTIN_CLZ
|
||||
uint32_t y = (x != 1);
|
||||
return (1 + y) << ((__builtin_clz(x - y) ^ 31) );
|
||||
#else
|
||||
x--;
|
||||
x |= x >> 1;
|
||||
x |= x >> 2;
|
||||
x |= x >> 4;
|
||||
x |= x >> 8;
|
||||
x |= x >> 16;
|
||||
x++;
|
||||
return x;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int64_t
|
||||
_mesa_next_pow_two_64(uint64_t x)
|
||||
{
|
||||
#ifdef HAVE___BUILTIN_CLZLL
|
||||
uint64_t y = (x != 1);
|
||||
STATIC_ASSERT(sizeof(x) == sizeof(long long));
|
||||
return (1 + y) << ((__builtin_clzll(x - y) ^ 63));
|
||||
#else
|
||||
x--;
|
||||
x |= x >> 1;
|
||||
x |= x >> 2;
|
||||
x |= x >> 4;
|
||||
x |= x >> 8;
|
||||
x |= x >> 16;
|
||||
x |= x >> 32;
|
||||
x++;
|
||||
return x;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Returns the floor form of binary logarithm for a 32-bit integer.
|
||||
*/
|
||||
static inline GLuint
|
||||
_mesa_logbase2(GLuint n)
|
||||
{
|
||||
#ifdef HAVE___BUILTIN_CLZ
|
||||
return (31 - __builtin_clz(n | 1));
|
||||
#else
|
||||
GLuint pos = 0;
|
||||
if (n >= 1<<16) { n >>= 16; pos += 16; }
|
||||
if (n >= 1<< 8) { n >>= 8; pos += 8; }
|
||||
if (n >= 1<< 4) { n >>= 4; pos += 4; }
|
||||
if (n >= 1<< 2) { n >>= 2; pos += 2; }
|
||||
if (n >= 1<< 1) { pos += 1; }
|
||||
return pos;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* Functions
|
||||
*/
|
||||
|
||||
extern void *
|
||||
_mesa_align_malloc( size_t bytes, unsigned long alignment );
|
||||
|
||||
extern void *
|
||||
_mesa_align_calloc( size_t bytes, unsigned long alignment );
|
||||
|
||||
extern void
|
||||
_mesa_align_free( void *ptr );
|
||||
|
||||
extern void *
|
||||
_mesa_align_realloc(void *oldBuffer, size_t oldSize, size_t newSize,
|
||||
unsigned long alignment);
|
||||
|
||||
extern int
|
||||
_mesa_snprintf( char *str, size_t size, const char *fmt, ... ) PRINTFLIKE(3, 4);
|
||||
|
||||
extern int
|
||||
_mesa_vsnprintf(char *str, size_t size, const char *fmt, va_list arg);
|
||||
|
||||
|
||||
#if defined(_WIN32) && !defined(HAVE_STRTOK_R)
|
||||
#define strtok_r strtok_s
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* IMPORTS_H */
|
|
@ -34,7 +34,9 @@
|
|||
#include "util/macros.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/rounding.h"
|
||||
#include "imports.h"
|
||||
#include "util/compiler.h"
|
||||
#include "main/glheader.h"
|
||||
#include "mesa_private.h"
|
||||
|
||||
|
||||
/**
|
||||
|
@ -194,6 +196,30 @@ static inline fi_type FLOAT_AS_UNION(GLfloat f)
|
|||
return tmp;
|
||||
}
|
||||
|
||||
static inline uint64_t DOUBLE_AS_UINT64(double d)
|
||||
{
|
||||
union {
|
||||
double d;
|
||||
uint64_t u64;
|
||||
} tmp;
|
||||
tmp.d = d;
|
||||
return tmp.u64;
|
||||
}
|
||||
|
||||
static inline double UINT64_AS_DOUBLE(uint64_t u)
|
||||
{
|
||||
union {
|
||||
double d;
|
||||
uint64_t u64;
|
||||
} tmp;
|
||||
tmp.u64 = u;
|
||||
return tmp.d;
|
||||
}
|
||||
|
||||
/* First sign-extend x, then return uint32_t. */
|
||||
#define INT_AS_UINT(x) ((uint32_t)((int32_t)(x)))
|
||||
#define FLOAT_AS_UINT(x) (FLOAT_AS_UNION(x).u)
|
||||
|
||||
/**
|
||||
* Convert a floating point value to an unsigned fixed point value.
|
||||
*
|
||||
|
@ -666,52 +692,6 @@ minify(unsigned value, unsigned levels)
|
|||
return MAX2(1, value >> levels);
|
||||
}
|
||||
|
||||
/**
|
||||
* Align a value up to an alignment value
|
||||
*
|
||||
* If \c value is not already aligned to the requested alignment value, it
|
||||
* will be rounded up.
|
||||
*
|
||||
* \param value Value to be rounded
|
||||
* \param alignment Alignment value to be used. This must be a power of two.
|
||||
*
|
||||
* \sa ROUND_DOWN_TO()
|
||||
*/
|
||||
static inline uintptr_t
|
||||
ALIGN(uintptr_t value, int32_t alignment)
|
||||
{
|
||||
assert((alignment > 0) && _mesa_is_pow_two(alignment));
|
||||
return (((value) + (alignment) - 1) & ~((alignment) - 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Like ALIGN(), but works with a non-power-of-two alignment.
|
||||
*/
|
||||
static inline uintptr_t
|
||||
ALIGN_NPOT(uintptr_t value, int32_t alignment)
|
||||
{
|
||||
assert(alignment > 0);
|
||||
return (value + alignment - 1) / alignment * alignment;
|
||||
}
|
||||
|
||||
/**
|
||||
* Align a value down to an alignment value
|
||||
*
|
||||
* If \c value is not already aligned to the requested alignment value, it
|
||||
* will be rounded down.
|
||||
*
|
||||
* \param value Value to be rounded
|
||||
* \param alignment Alignment value to be used. This must be a power of two.
|
||||
*
|
||||
* \sa ALIGN()
|
||||
*/
|
||||
static inline uintptr_t
|
||||
ROUND_DOWN_TO(uintptr_t value, int32_t alignment)
|
||||
{
|
||||
assert((alignment > 0) && _mesa_is_pow_two(alignment));
|
||||
return ((value) & ~(alignment - 1));
|
||||
}
|
||||
|
||||
|
||||
/** Cross product of two 3-element vectors */
|
||||
static inline void
|
||||
|
@ -808,4 +788,13 @@ DIFFERENT_SIGNS(GLfloat x, GLfloat y)
|
|||
/* Stringify */
|
||||
#define STRINGIFY(x) #x
|
||||
|
||||
/*
|
||||
* For GL_ARB_vertex_buffer_object we need to treat vertex array pointers
|
||||
* as offsets into buffer stores. Since the vertex array pointer and
|
||||
* buffer store pointer are both pointers and we need to add them, we use
|
||||
* this macro.
|
||||
* Both pointers/offsets are expressed in bytes.
|
||||
*/
|
||||
#define ADD_POINTERS(A, B) ( (GLubyte *) (A) + (uintptr_t) (B) )
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file mesa_private.h
|
||||
* Contains mesa internal values
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef MESA_PRIVATE_H
|
||||
#define MESA_PRIVATE_H
|
||||
|
||||
#include "glheader.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Sometimes we treat floats as ints. On x86 systems, moving a float
|
||||
* as an int (thereby using integer registers instead of FP registers) is
|
||||
* a performance win. Typically, this can be done with ordinary casts.
|
||||
* But with gcc's -fstrict-aliasing flag (which defaults to on in gcc 3.0)
|
||||
* these casts generate warnings.
|
||||
* The following union typedef is used to solve that.
|
||||
*/
|
||||
typedef union { float f; int i; unsigned u; } fi_type;
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* MESA_PRIVATE_H */
|
|
@ -39,6 +39,7 @@
|
|||
#include "c11/threads.h"
|
||||
|
||||
#include "main/glheader.h"
|
||||
#include "main/glthread.h"
|
||||
#include "main/menums.h"
|
||||
#include "main/config.h"
|
||||
#include "glapi/glapi.h"
|
||||
|
@ -104,6 +105,7 @@ _mesa_varying_slot_in_fs(gl_varying_slot slot)
|
|||
case VARYING_SLOT_TESS_LEVEL_INNER:
|
||||
case VARYING_SLOT_BOUNDING_BOX0:
|
||||
case VARYING_SLOT_BOUNDING_BOX1:
|
||||
case VARYING_SLOT_VIEWPORT_MASK:
|
||||
return GL_FALSE;
|
||||
default:
|
||||
return GL_TRUE;
|
||||
|
@ -459,6 +461,7 @@ struct gl_vertex_format
|
|||
{
|
||||
GLenum16 Type; /**< datatype: GL_FLOAT, GL_INT, etc */
|
||||
GLenum16 Format; /**< default: GL_RGBA, but may be GL_BGRA */
|
||||
enum pipe_format _PipeFormat:16; /**< pipe_format for Gallium */
|
||||
GLubyte Size:5; /**< components per element (1,2,3,4) */
|
||||
GLubyte Normalized:1; /**< GL_ARB_vertex_program */
|
||||
GLubyte Integer:1; /**< Integer-valued? */
|
||||
|
@ -686,6 +689,9 @@ struct gl_multisample_attrib
|
|||
|
||||
/** The GL spec defines this as an array but >32x MSAA is madness */
|
||||
GLbitfield SampleMaskValue;
|
||||
|
||||
/* NV_alpha_to_coverage_dither_control */
|
||||
GLenum SampleAlphaToCoverageDitherControl;
|
||||
};
|
||||
|
||||
|
||||
|
@ -1310,6 +1316,9 @@ struct gl_viewport_attrib
|
|||
GLfloat X, Y; /**< position */
|
||||
GLfloat Width, Height; /**< size */
|
||||
GLfloat Near, Far; /**< Depth buffer range */
|
||||
|
||||
/**< GL_NV_viewport_swizzle */
|
||||
GLenum16 SwizzleX, SwizzleY, SwizzleZ, SwizzleW;
|
||||
};
|
||||
|
||||
|
||||
|
@ -1546,6 +1555,9 @@ struct gl_vertex_array_object
|
|||
/** Mask indicating which vertex arrays have vertex buffer associated. */
|
||||
GLbitfield VertexAttribBufferMask;
|
||||
|
||||
/** Mask indicating which vertex arrays have a non-zero instance divisor. */
|
||||
GLbitfield NonZeroDivisorMask;
|
||||
|
||||
/** Mask of VERT_BIT_* values indicating which arrays are enabled */
|
||||
GLbitfield Enabled;
|
||||
|
||||
|
@ -1558,6 +1570,9 @@ struct gl_vertex_array_object
|
|||
*/
|
||||
GLbitfield _EffEnabledVBO;
|
||||
|
||||
/** Same as _EffEnabledVBO, but for instance divisors. */
|
||||
GLbitfield _EffEnabledNonZeroDivisor;
|
||||
|
||||
/** Denotes the way the position/generic0 attribute is mapped */
|
||||
gl_attribute_map_mode _AttributeMapMode;
|
||||
|
||||
|
@ -1583,6 +1598,9 @@ struct gl_array_attrib
|
|||
/** The last VAO accessed by a DSA function */
|
||||
struct gl_vertex_array_object *LastLookedUpVAO;
|
||||
|
||||
/** These contents are copied to newly created VAOs. */
|
||||
struct gl_vertex_array_object DefaultVAOState;
|
||||
|
||||
/** Array objects (GL_ARB_vertex_array_object) */
|
||||
struct _mesa_HashTable *Objects;
|
||||
|
||||
|
@ -1601,6 +1619,7 @@ struct gl_array_attrib
|
|||
GLboolean PrimitiveRestartFixedIndex;
|
||||
GLboolean _PrimitiveRestart;
|
||||
GLuint RestartIndex;
|
||||
GLuint _RestartIndex[4]; /**< Restart indices for index_size - 1. */
|
||||
/*@}*/
|
||||
|
||||
/* GL_ARB_vertex_buffer_object */
|
||||
|
@ -2658,6 +2677,12 @@ struct gl_shader
|
|||
bool bound_sampler;
|
||||
bool bound_image;
|
||||
|
||||
/**
|
||||
* Whether layer output is viewport-relative.
|
||||
*/
|
||||
bool redeclares_gl_layer;
|
||||
bool layer_viewport_relative;
|
||||
|
||||
/** Global xfb_stride out qualifier if any */
|
||||
GLuint TransformFeedbackBufferStride[MAX_FEEDBACK_BUFFERS];
|
||||
|
||||
|
@ -3167,6 +3192,15 @@ struct gl_shader_compiler_options
|
|||
* gl_CullDistance together from
|
||||
* float[8] to vec4[2]
|
||||
**/
|
||||
GLbitfield LowerBuiltinVariablesXfb; /**< Which builtin variables should
|
||||
* be lowered for transform feedback
|
||||
**/
|
||||
|
||||
/**
|
||||
* If we can lower the precision of variables based on precision
|
||||
* qualifiers
|
||||
*/
|
||||
GLboolean LowerPrecision;
|
||||
|
||||
/**
|
||||
* \name Forms of indirect addressing the driver cannot do.
|
||||
|
@ -3293,9 +3327,6 @@ struct gl_shared_state
|
|||
GLuint TextureStateStamp; /**< state notification for shared tex */
|
||||
/*@}*/
|
||||
|
||||
/** Default buffer object for vertex arrays that aren't in VBOs */
|
||||
struct gl_buffer_object *NullBufferObj;
|
||||
|
||||
/**
|
||||
* \name Vertex/geometry/fragment programs
|
||||
*/
|
||||
|
@ -3835,6 +3866,11 @@ struct gl_constants
|
|||
*/
|
||||
GLboolean GLSLZeroInit;
|
||||
|
||||
/**
|
||||
* Treat integer textures using GL_LINEAR filters as GL_NEAREST.
|
||||
*/
|
||||
GLboolean ForceIntegerTexNearest;
|
||||
|
||||
/**
|
||||
* Does the driver support real 32-bit integers? (Otherwise, integers are
|
||||
* simulated via floats.)
|
||||
|
@ -3967,6 +4003,15 @@ struct gl_constants
|
|||
*/
|
||||
GLboolean DisableVaryingPacking;
|
||||
|
||||
/**
|
||||
* Disable varying packing if used for transform feedback. This is needed
|
||||
* for some drivers (e.g. Panfrost) where transform feedback requires
|
||||
* unpacked varyings.
|
||||
*
|
||||
* This variable is mutually exlusive with DisableVaryingPacking.
|
||||
*/
|
||||
GLboolean DisableTransformFeedbackPacking;
|
||||
|
||||
/**
|
||||
* UBOs and SSBOs can be packed tightly by the OpenGL implementation when
|
||||
* layout is set as shared (the default) or packed. However most Mesa drivers
|
||||
|
@ -4026,51 +4071,6 @@ struct gl_constants
|
|||
} SupportedMultisampleModes[40];
|
||||
GLint NumSupportedMultisampleModes;
|
||||
|
||||
/**
|
||||
* GL_EXT_texture_multisample_blit_scaled implementation assumes that
|
||||
* samples are laid out in a rectangular grid roughly corresponding to
|
||||
* sample locations within a pixel. Below SampleMap{2,4,8}x variables
|
||||
* are used to map indices of rectangular grid to sample numbers within
|
||||
* a pixel. This mapping of indices to sample numbers must be initialized
|
||||
* by the driver for the target hardware. For example, if we have the 8X
|
||||
* MSAA sample number layout (sample positions) for XYZ hardware:
|
||||
*
|
||||
* sample indices layout sample number layout
|
||||
* --------- ---------
|
||||
* | 0 | 1 | | a | b |
|
||||
* --------- ---------
|
||||
* | 2 | 3 | | c | d |
|
||||
* --------- ---------
|
||||
* | 4 | 5 | | e | f |
|
||||
* --------- ---------
|
||||
* | 6 | 7 | | g | h |
|
||||
* --------- ---------
|
||||
*
|
||||
* Where a,b,c,d,e,f,g,h are integers between [0-7].
|
||||
*
|
||||
* Then, initialize the SampleMap8x variable for XYZ hardware as shown
|
||||
* below:
|
||||
* SampleMap8x = {a, b, c, d, e, f, g, h};
|
||||
*
|
||||
* Follow the logic for sample counts 2-8.
|
||||
*
|
||||
* For 16x the sample indices layout as a 4x4 grid as follows:
|
||||
*
|
||||
* -----------------
|
||||
* | 0 | 1 | 2 | 3 |
|
||||
* -----------------
|
||||
* | 4 | 5 | 6 | 7 |
|
||||
* -----------------
|
||||
* | 8 | 9 |10 |11 |
|
||||
* -----------------
|
||||
* |12 |13 |14 |15 |
|
||||
* -----------------
|
||||
*/
|
||||
uint8_t SampleMap2x[2];
|
||||
uint8_t SampleMap4x[4];
|
||||
uint8_t SampleMap8x[8];
|
||||
uint8_t SampleMap16x[16];
|
||||
|
||||
/** GL_ARB_shader_atomic_counters */
|
||||
GLuint MaxAtomicBufferBindings;
|
||||
GLuint MaxAtomicBufferSize;
|
||||
|
@ -4131,6 +4131,12 @@ struct gl_constants
|
|||
/** When drivers are OK with mapped buffers during draw and other calls. */
|
||||
bool AllowMappedBuffersDuringExecution;
|
||||
|
||||
/**
|
||||
* Whether buffer creation, unsynchronized mapping, unmapping, and
|
||||
* deletion is thread-safe.
|
||||
*/
|
||||
bool BufferCreateMapUnsynchronizedThreadSafe;
|
||||
|
||||
/** GL_ARB_get_program_binary */
|
||||
GLuint NumProgramBinaryFormats;
|
||||
|
||||
|
@ -4150,6 +4156,15 @@ struct gl_constants
|
|||
/** Wether or not glBitmap uses red textures rather than alpha */
|
||||
bool BitmapUsesRed;
|
||||
|
||||
/** Whether the vertex buffer offset is a signed 32-bit integer. */
|
||||
bool VertexBufferOffsetIsInt32;
|
||||
|
||||
/** Whether the driver can handle MultiDrawElements with non-VBO indices. */
|
||||
bool MultiDrawWithUserIndices;
|
||||
|
||||
/** Whether out-of-order draw (Begin/End) optimizations are allowed. */
|
||||
bool AllowDrawOutOfOrder;
|
||||
|
||||
/** GL_ARB_gl_spirv */
|
||||
struct spirv_supported_capabilities SpirVCapabilities;
|
||||
|
||||
|
@ -4157,6 +4172,9 @@ struct gl_constants
|
|||
struct spirv_supported_extensions *SpirVExtensions;
|
||||
|
||||
char *VendorOverride;
|
||||
|
||||
/** Buffer size used to upload vertices from glBegin/glEnd. */
|
||||
unsigned glBeginEndBufferSize;
|
||||
};
|
||||
|
||||
|
||||
|
@ -4365,6 +4383,7 @@ struct gl_extensions
|
|||
GLboolean ATI_texture_env_combine3;
|
||||
GLboolean ATI_fragment_shader;
|
||||
GLboolean GREMEDY_string_marker;
|
||||
GLboolean INTEL_blackhole_render;
|
||||
GLboolean INTEL_conservative_rasterization;
|
||||
GLboolean INTEL_performance_query;
|
||||
GLboolean INTEL_shader_atomic_float_minmax;
|
||||
|
@ -4382,8 +4401,10 @@ struct gl_extensions
|
|||
GLboolean EXT_shader_framebuffer_fetch_non_coherent;
|
||||
GLboolean MESA_shader_integer_functions;
|
||||
GLboolean MESA_ycbcr_texture;
|
||||
GLboolean NV_alpha_to_coverage_dither_control;
|
||||
GLboolean NV_compute_shader_derivatives;
|
||||
GLboolean NV_conditional_render;
|
||||
GLboolean NV_copy_image;
|
||||
GLboolean NV_fill_rectangle;
|
||||
GLboolean NV_fog_distance;
|
||||
GLboolean NV_point_sprite;
|
||||
|
@ -4397,6 +4418,8 @@ struct gl_extensions
|
|||
GLboolean NV_conservative_raster_dilate;
|
||||
GLboolean NV_conservative_raster_pre_snap_triangles;
|
||||
GLboolean NV_conservative_raster_pre_snap;
|
||||
GLboolean NV_viewport_array2;
|
||||
GLboolean NV_viewport_swizzle;
|
||||
GLboolean NVX_gpu_memory_info;
|
||||
GLboolean TDFX_texture_compression_FXT1;
|
||||
GLboolean OES_EGL_image;
|
||||
|
@ -4421,12 +4444,6 @@ struct gl_extensions
|
|||
* while meta is in progress.
|
||||
*/
|
||||
GLubyte Version;
|
||||
/**
|
||||
* Force-enabled, yet unrecognized, extensions.
|
||||
* See _mesa_one_time_init_extension_overrides()
|
||||
*/
|
||||
#define MAX_UNRECOGNIZED_EXTENSIONS 16
|
||||
const char *unrecognized_extensions[MAX_UNRECOGNIZED_EXTENSIONS];
|
||||
};
|
||||
|
||||
|
||||
|
@ -4470,7 +4487,7 @@ struct gl_matrix_stack
|
|||
#define _NEW_TEXTURE_MATRIX (1u << 2) /**< gl_context::TextureMatrix */
|
||||
#define _NEW_COLOR (1u << 3) /**< gl_context::Color */
|
||||
#define _NEW_DEPTH (1u << 4) /**< gl_context::Depth */
|
||||
#define _NEW_EVAL (1u << 5) /**< gl_context::Eval, EvalMap */
|
||||
/* gap */
|
||||
#define _NEW_FOG (1u << 6) /**< gl_context::Fog */
|
||||
#define _NEW_HINT (1u << 7) /**< gl_context::Hint */
|
||||
#define _NEW_LIGHT (1u << 8) /**< gl_context::Light */
|
||||
|
@ -4557,7 +4574,7 @@ struct gl_dlist_state
|
|||
GLvertexformat ListVtxfmt;
|
||||
|
||||
GLubyte ActiveAttribSize[VERT_ATTRIB_MAX];
|
||||
GLfloat CurrentAttrib[VERT_ATTRIB_MAX][8];
|
||||
uint32_t CurrentAttrib[VERT_ATTRIB_MAX][8];
|
||||
|
||||
GLubyte ActiveMaterialSize[MAT_ATTRIB_MAX];
|
||||
GLfloat CurrentMaterial[MAT_ATTRIB_MAX][4];
|
||||
|
@ -4877,7 +4894,7 @@ struct gl_context
|
|||
|
||||
/*@}*/
|
||||
|
||||
struct glthread_state *GLThread;
|
||||
struct glthread_state GLThread;
|
||||
|
||||
struct gl_config Visual;
|
||||
struct gl_framebuffer *DrawBuffer; /**< buffer for writing */
|
||||
|
@ -5107,6 +5124,7 @@ struct gl_context
|
|||
struct gl_driver_flags DriverFlags;
|
||||
|
||||
GLboolean ViewportInitialized; /**< has viewport size been initialized? */
|
||||
GLboolean _AllowDrawOutOfOrder;
|
||||
|
||||
GLbitfield varying_vp_inputs; /**< mask of VERT_BIT_* flags */
|
||||
|
||||
|
@ -5141,6 +5159,8 @@ struct gl_context
|
|||
GLfloat ConservativeRasterDilate;
|
||||
GLenum16 ConservativeRasterMode;
|
||||
|
||||
GLboolean IntelBlackholeRender; /**< GL_INTEL_blackhole_render */
|
||||
|
||||
/** Does glVertexAttrib(0) alias glVertex()? */
|
||||
bool _AttribZeroAliasesVertex;
|
||||
|
||||
|
|
|
@ -31,6 +31,8 @@
|
|||
#ifndef PROG_PARAMETER_H
|
||||
#define PROG_PARAMETER_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include "prog_statevars.h"
|
||||
|
||||
#include <string.h>
|
||||
|
|
|
@ -63,11 +63,11 @@ extern void
|
|||
_mesa_set_program_error(struct gl_context *ctx, GLint pos, const char *string);
|
||||
|
||||
extern struct gl_program *
|
||||
_mesa_init_gl_program(struct gl_program *prog, GLenum target, GLuint id,
|
||||
bool is_arb_asm);
|
||||
_mesa_init_gl_program(struct gl_program *prog, gl_shader_stage stage,
|
||||
GLuint id, bool is_arb_asm);
|
||||
|
||||
extern struct gl_program *
|
||||
_mesa_new_program(struct gl_context *ctx, GLenum target, GLuint id,
|
||||
_mesa_new_program(struct gl_context *ctx, gl_shader_stage stage, GLuint id,
|
||||
bool is_arb_asm);
|
||||
|
||||
extern void
|
||||
|
|
|
@ -21,10 +21,10 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "main/imports.h"
|
||||
|
||||
#include "main/errors.h"
|
||||
#include "symbol_table.h"
|
||||
#include "../../util/hash_table.h"
|
||||
#include "util/hash_table.h"
|
||||
#include "util/u_string.h"
|
||||
|
||||
struct symbol {
|
||||
|
@ -63,7 +63,7 @@ struct symbol {
|
|||
struct scope_level {
|
||||
/** Link to next (inner) scope level. */
|
||||
struct scope_level *next;
|
||||
|
||||
|
||||
/** Linked list of symbols with the same scope. */
|
||||
struct symbol *symbols;
|
||||
};
|
||||
|
|
|
@ -42,13 +42,13 @@ extern "C" {
|
|||
struct gl_context;
|
||||
|
||||
GLboolean
|
||||
_vbo_CreateContext(struct gl_context *ctx);
|
||||
_vbo_CreateContext(struct gl_context *ctx, bool use_buffer_objects);
|
||||
|
||||
void
|
||||
_vbo_DestroyContext(struct gl_context *ctx);
|
||||
|
||||
void
|
||||
vbo_exec_invalidate_state(struct gl_context *ctx);
|
||||
vbo_exec_update_eval_maps(struct gl_context *ctx);
|
||||
|
||||
void
|
||||
_vbo_install_exec_vtxfmt(struct gl_context *ctx);
|
||||
|
@ -87,23 +87,25 @@ vbo_save_EndCallList(struct gl_context *ctx);
|
|||
void
|
||||
vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj);
|
||||
|
||||
void
|
||||
vbo_get_minmax_index_mapped(unsigned count, unsigned index_size,
|
||||
unsigned restartIndex, bool restart,
|
||||
const void *indices,
|
||||
unsigned *min_index, unsigned *max_index);
|
||||
|
||||
void
|
||||
vbo_get_minmax_indices(struct gl_context *ctx, const struct _mesa_prim *prim,
|
||||
const struct _mesa_index_buffer *ib,
|
||||
GLuint *min_index, GLuint *max_index, GLuint nr_prims);
|
||||
|
||||
void
|
||||
vbo_use_buffer_objects(struct gl_context *ctx);
|
||||
|
||||
void
|
||||
vbo_always_unmap_buffers(struct gl_context *ctx);
|
||||
|
||||
void
|
||||
vbo_sw_primitive_restart(struct gl_context *ctx,
|
||||
const struct _mesa_prim *prim,
|
||||
GLuint nr_prims,
|
||||
const struct _mesa_index_buffer *ib,
|
||||
struct gl_buffer_object *indirect);
|
||||
GLuint num_instances, GLuint base_instance,
|
||||
struct gl_buffer_object *indirect,
|
||||
GLsizeiptr indirect_offset);
|
||||
|
||||
|
||||
const struct gl_array_attributes*
|
||||
|
|
|
@ -23,8 +23,8 @@
|
|||
|
||||
#include <string.h>
|
||||
|
||||
#include "main/macros.h"
|
||||
#include "blob.h"
|
||||
#include "u_math.h"
|
||||
|
||||
#ifdef HAVE_VALGRIND
|
||||
#include <valgrind.h>
|
||||
|
@ -85,7 +85,7 @@ grow_to_fit(struct blob *blob, size_t additional)
|
|||
static bool
|
||||
align_blob(struct blob *blob, size_t alignment)
|
||||
{
|
||||
const size_t new_size = ALIGN(blob->size, alignment);
|
||||
const size_t new_size = align64(blob->size, alignment);
|
||||
|
||||
if (blob->size < new_size) {
|
||||
if (!grow_to_fit(blob, new_size - blob->size))
|
||||
|
@ -102,7 +102,7 @@ align_blob(struct blob *blob, size_t alignment)
|
|||
static void
|
||||
align_blob_reader(struct blob_reader *blob, size_t alignment)
|
||||
{
|
||||
blob->current = blob->data + ALIGN(blob->current - blob->data, alignment);
|
||||
blob->current = blob->data + align64(blob->current - blob->data, alignment);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -162,7 +162,7 @@ blob_write_bytes(struct blob *blob, const void *bytes, size_t to_write)
|
|||
|
||||
VG(VALGRIND_CHECK_MEM_IS_DEFINED(bytes, to_write));
|
||||
|
||||
if (blob->data)
|
||||
if (blob->data && to_write > 0)
|
||||
memcpy(blob->data + blob->size, bytes, to_write);
|
||||
blob->size += to_write;
|
||||
|
||||
|
@ -212,7 +212,16 @@ BLOB_WRITE_TYPE(blob_write_uint64, uint64_t)
|
|||
BLOB_WRITE_TYPE(blob_write_intptr, intptr_t)
|
||||
|
||||
#define ASSERT_ALIGNED(_offset, _align) \
|
||||
assert(ALIGN((_offset), (_align)) == (_offset))
|
||||
assert(align64((_offset), (_align)) == (_offset))
|
||||
|
||||
bool
|
||||
blob_overwrite_uint8 (struct blob *blob,
|
||||
size_t offset,
|
||||
uint8_t value)
|
||||
{
|
||||
ASSERT_ALIGNED(offset, sizeof(value));
|
||||
return blob_overwrite_bytes(blob, offset, &value, sizeof(value));
|
||||
}
|
||||
|
||||
bool
|
||||
blob_overwrite_uint32 (struct blob *blob,
|
||||
|
@ -286,7 +295,7 @@ blob_copy_bytes(struct blob_reader *blob, void *dest, size_t size)
|
|||
const void *bytes;
|
||||
|
||||
bytes = blob_read_bytes(blob, size);
|
||||
if (bytes == NULL)
|
||||
if (bytes == NULL || size == 0)
|
||||
return;
|
||||
|
||||
memcpy(dest, bytes, size);
|
||||
|
|
|
@ -183,6 +183,21 @@ blob_overwrite_bytes(struct blob *blob,
|
|||
bool
|
||||
blob_write_uint8(struct blob *blob, uint8_t value);
|
||||
|
||||
/**
|
||||
* Overwrite a uint8_t previously written to the blob.
|
||||
*
|
||||
* Writes a uint8_t value to an existing portion of the blob at an offset of
|
||||
* \offset. This data range must have previously been written to the blob by
|
||||
* one of the blob_write_* calls.
|
||||
*
|
||||
* \return True unless the requested position or position+to_write lie outside
|
||||
* the current blob's size.
|
||||
*/
|
||||
bool
|
||||
blob_overwrite_uint8(struct blob *blob,
|
||||
size_t offset,
|
||||
uint8_t value);
|
||||
|
||||
/**
|
||||
* Add a uint16_t to a blob.
|
||||
*
|
||||
|
|
|
@ -23,7 +23,6 @@
|
|||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include "main/macros.h"
|
||||
#include "debug.h"
|
||||
#include "u_string.h"
|
||||
|
||||
|
|
|
@ -51,8 +51,7 @@
|
|||
#include "util/u_queue.h"
|
||||
#include "util/mesa-sha1.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "main/compiler.h"
|
||||
#include "main/errors.h"
|
||||
#include "util/compiler.h"
|
||||
|
||||
#include "disk_cache.h"
|
||||
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -25,6 +25,7 @@
|
|||
#define UTIL_FUTEX_H
|
||||
|
||||
#if defined(HAVE_LINUX_FUTEX_H)
|
||||
#define UTIL_FUTEX_SUPPORTED 1
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdint.h>
|
||||
|
@ -52,6 +53,7 @@ static inline int futex_wait(uint32_t *addr, int32_t value, const struct timespe
|
|||
}
|
||||
|
||||
#elif defined(__FreeBSD__)
|
||||
#define UTIL_FUTEX_SUPPORTED 1
|
||||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
|
@ -86,6 +88,7 @@ static inline int futex_wait(uint32_t *addr, int32_t value, struct timespec *tim
|
|||
}
|
||||
|
||||
#elif defined(__OpenBSD__)
|
||||
#define UTIL_FUTEX_SUPPORTED 1
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <sys/futex.h>
|
||||
|
@ -103,6 +106,8 @@ static inline int futex_wait(uint32_t *addr, int32_t value, const struct timespe
|
|||
return futex(addr, FUTEX_WAIT, value, &tsrel, NULL);
|
||||
}
|
||||
|
||||
#else
|
||||
#define UTIL_FUTEX_SUPPORTED 0
|
||||
#endif
|
||||
|
||||
#endif /* UTIL_FUTEX_H */
|
||||
|
|
|
@ -33,8 +33,8 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define FP16_ONE 0x3C00
|
||||
#define FP16_ZERO 0
|
||||
#define FP16_ONE ((uint16_t) 0x3c00)
|
||||
#define FP16_ZERO ((uint16_t) 0)
|
||||
|
||||
uint16_t _mesa_float_to_half(float val);
|
||||
float _mesa_half_to_float(uint16_t val);
|
||||
|
@ -62,6 +62,22 @@ _mesa_half_is_negative(uint16_t h)
|
|||
}
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
/* Helper class for disambiguating fp16 from uint16_t in C++ overloads */
|
||||
|
||||
struct float16_t {
|
||||
uint16_t bits;
|
||||
float16_t(float f) : bits(_mesa_float_to_half(f)) {}
|
||||
float16_t(double d) : bits(_mesa_float_to_half(d)) {}
|
||||
float16_t(uint16_t bits) : bits(bits) {}
|
||||
static float16_t one() { return float16_t(FP16_ONE); }
|
||||
static float16_t zero() { return float16_t(FP16_ZERO); }
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern C */
|
||||
#endif
|
||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче