зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1674524 - implement KHR_blend_equation_advanced in SWGL. r=bradwerth
This patch has a few moving parts. We have to first tell WR that when it detects the extension that it is actually allowed to use it. We have to make the glsl-to-cxx translator eat the blend_supports_all_equations layout qualifier. We have to enable generation of advanced-blend-equation variants in the SWGL build setup. Then we report the actual extension inside SWGL. Finally, we actually add all the necessary blend equation enums, hash them down to a blend key, and implement all the blend modes therein. Differential Revision: https://phabricator.services.mozilla.com/D103804
This commit is contained in:
Родитель
a79879c249
Коммит
7b87580cdf
|
@ -1660,6 +1660,9 @@ pub extern "C" fn wr_window_new(
|
|||
// SWGL doesn't support the GL_ALWAYS depth comparison function used by
|
||||
// `clear_caches_with_quads`, but scissored clears work well.
|
||||
clear_caches_with_quads: !software && !allow_scissored_cache_clears,
|
||||
// SWGL supports KHR_blend_equation_advanced safely, but we haven't yet
|
||||
// tested other HW platforms determine if it is safe to allow them.
|
||||
allow_advanced_blend_equation: software,
|
||||
start_debug_server,
|
||||
surface_origin_is_top_left,
|
||||
compositor_config,
|
||||
|
|
|
@ -1892,11 +1892,33 @@ fn translate_declaration(
|
|||
syntax::Declaration::FunctionPrototype(p) => {
|
||||
Declaration::FunctionPrototype(translate_function_prototype(state, p))
|
||||
}
|
||||
syntax::Declaration::Global(_ty, _ids) => {
|
||||
panic!();
|
||||
// glsl non-es supports requalifying variables
|
||||
// we don't right now
|
||||
//Declaration::Global(..)
|
||||
syntax::Declaration::Global(ty, ids) => {
|
||||
// glsl non-es supports requalifying variables, but we don't yet.
|
||||
// However, we still want to allow global layout qualifiers for
|
||||
// KHR_advanced_blend_equation.
|
||||
if !ids.is_empty() {
|
||||
panic!();
|
||||
}
|
||||
let _ = for qual in &ty.qualifiers {
|
||||
match qual {
|
||||
syntax::TypeQualifierSpec::Layout(l) => {
|
||||
for id in &l.ids {
|
||||
match id {
|
||||
syntax::LayoutQualifierSpec::Identifier(key, _) => {
|
||||
match key.as_str() {
|
||||
"blend_support_all_equations" => (),
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
syntax::TypeQualifierSpec::Storage(syntax::StorageQualifier::Out) => (),
|
||||
_ => panic!(),
|
||||
}
|
||||
};
|
||||
Declaration::Global(lift_type_qualifier_for_declaration(state, &Some(ty.clone())).unwrap(), ids.clone())
|
||||
}
|
||||
syntax::Declaration::InitDeclaratorList(dl) => {
|
||||
translate_init_declarator_list(state, dl, default_run_class)
|
||||
|
|
|
@ -2313,19 +2313,22 @@ pub fn show_declaration(state: &mut OutputState, d: &hir::Declaration) {
|
|||
//state.write(";\n");
|
||||
}
|
||||
hir::Declaration::Global(ref qual, ref identifiers) => {
|
||||
show_type_qualifier(state, &qual);
|
||||
// We only want to output GLSL layout qualifiers if not C++
|
||||
if !state.output_cxx {
|
||||
show_type_qualifier(state, &qual);
|
||||
|
||||
if !identifiers.is_empty() {
|
||||
let mut iter = identifiers.iter();
|
||||
let first = iter.next().unwrap();
|
||||
show_identifier(state, first);
|
||||
if !identifiers.is_empty() {
|
||||
let mut iter = identifiers.iter();
|
||||
let first = iter.next().unwrap();
|
||||
show_identifier(state, first);
|
||||
|
||||
for identifier in iter {
|
||||
let _ = write!(state, ", {}", identifier);
|
||||
for identifier in iter {
|
||||
let _ = write!(state, ", {}", identifier);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
state.write(";\n");
|
||||
state.write(";\n");
|
||||
}
|
||||
}
|
||||
hir::Declaration::StructDefinition(ref sym) => {
|
||||
show_sym_decl(state, sym);
|
||||
|
|
|
@ -108,6 +108,7 @@ fn main() {
|
|||
let shader_flags =
|
||||
ShaderFeatureFlags::GL |
|
||||
ShaderFeatureFlags::DUAL_SOURCE_BLENDING |
|
||||
ShaderFeatureFlags::ADVANCED_BLEND_EQUATION |
|
||||
ShaderFeatureFlags::DEBUG;
|
||||
let mut shaders: Vec<String> = Vec::new();
|
||||
for (name, features) in get_shader_features(shader_flags) {
|
||||
|
|
|
@ -596,13 +596,13 @@ static ALWAYS_INLINE V8<int16_t> linearRowTapsR8(S sampler, I32 ix,
|
|||
auto b0 = unaligned_load<V2<uint8_t>>(&buf[ix.y]);
|
||||
auto c0 = unaligned_load<V2<uint8_t>>(&buf[ix.z]);
|
||||
auto d0 = unaligned_load<V2<uint8_t>>(&buf[ix.w]);
|
||||
auto abcd0 = CONVERT(combine(combine(a0, b0), combine(c0, d0)), V8<int16_t>);
|
||||
auto abcd0 = CONVERT(combine(a0, b0, c0, d0), V8<int16_t>);
|
||||
buf += stridey;
|
||||
auto a1 = unaligned_load<V2<uint8_t>>(&buf[ix.x]);
|
||||
auto b1 = unaligned_load<V2<uint8_t>>(&buf[ix.y]);
|
||||
auto c1 = unaligned_load<V2<uint8_t>>(&buf[ix.z]);
|
||||
auto d1 = unaligned_load<V2<uint8_t>>(&buf[ix.w]);
|
||||
auto abcd1 = CONVERT(combine(combine(a1, b1), combine(c1, d1)), V8<int16_t>);
|
||||
auto abcd1 = CONVERT(combine(a1, b1, c1, d1), V8<int16_t>);
|
||||
abcd0 += ((abcd1 - abcd0) * fracy) >> 7;
|
||||
return abcd0;
|
||||
}
|
||||
|
|
|
@ -762,10 +762,13 @@ struct Program {
|
|||
};
|
||||
|
||||
// clang-format off
|
||||
// for GL defines to fully expand
|
||||
// Fully-expand GL defines while ignoring more than 4 suffixes
|
||||
#define CONCAT_KEY(prefix, x, y, z, w, ...) prefix##x##y##z##w
|
||||
#define BLEND_KEY(...) CONCAT_KEY(BLEND_, __VA_ARGS__, 0, 0)
|
||||
#define MASK_BLEND_KEY(...) CONCAT_KEY(MASK_BLEND_, __VA_ARGS__, 0, 0)
|
||||
// Generate a blend key enum symbol
|
||||
#define BLEND_KEY(...) CONCAT_KEY(BLEND_, __VA_ARGS__, 0, 0, 0)
|
||||
// Generate a blend key symbol for a clip-mask variation
|
||||
#define MASK_BLEND_KEY(...) CONCAT_KEY(MASK_BLEND_, __VA_ARGS__, 0, 0, 0)
|
||||
// Utility macro to easily generate similar code for all implemented blend modes
|
||||
#define FOR_EACH_BLEND_KEY(macro) \
|
||||
macro(GL_ONE, GL_ZERO, 0, 0) \
|
||||
macro(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA) \
|
||||
|
@ -778,7 +781,24 @@ struct Program {
|
|||
macro(GL_ONE, GL_ONE, GL_ONE, GL_ONE_MINUS_SRC_ALPHA) \
|
||||
macro(GL_ONE_MINUS_DST_ALPHA, GL_ONE, GL_ZERO, GL_ONE) \
|
||||
macro(GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_COLOR, 0, 0) \
|
||||
macro(GL_ONE, GL_ONE_MINUS_SRC1_COLOR, 0, 0)
|
||||
macro(GL_ONE, GL_ONE_MINUS_SRC1_COLOR, 0, 0) \
|
||||
macro(GL_MIN, 0, 0, 0) \
|
||||
macro(GL_MAX, 0, 0, 0) \
|
||||
macro(GL_MULTIPLY_KHR, 0, 0, 0) \
|
||||
macro(GL_SCREEN_KHR, 0, 0, 0) \
|
||||
macro(GL_OVERLAY_KHR, 0, 0, 0) \
|
||||
macro(GL_DARKEN_KHR, 0, 0, 0) \
|
||||
macro(GL_LIGHTEN_KHR, 0, 0, 0) \
|
||||
macro(GL_COLORDODGE_KHR, 0, 0, 0) \
|
||||
macro(GL_COLORBURN_KHR, 0, 0, 0) \
|
||||
macro(GL_HARDLIGHT_KHR, 0, 0, 0) \
|
||||
macro(GL_SOFTLIGHT_KHR, 0, 0, 0) \
|
||||
macro(GL_DIFFERENCE_KHR, 0, 0, 0) \
|
||||
macro(GL_EXCLUSION_KHR, 0, 0, 0) \
|
||||
macro(GL_HSL_HUE_KHR, 0, 0, 0) \
|
||||
macro(GL_HSL_SATURATION_KHR, 0, 0, 0) \
|
||||
macro(GL_HSL_COLOR_KHR, 0, 0, 0) \
|
||||
macro(GL_HSL_LUMINOSITY_KHR, 0, 0, 0)
|
||||
|
||||
#define DEFINE_BLEND_KEY(...) BLEND_KEY(__VA_ARGS__),
|
||||
#define DEFINE_MASK_BLEND_KEY(...) MASK_BLEND_KEY(__VA_ARGS__),
|
||||
|
@ -1299,6 +1319,8 @@ static const char* const extensions[] = {
|
|||
"GL_ARB_invalidate_subdata",
|
||||
"GL_ARB_texture_storage",
|
||||
"GL_EXT_timer_query",
|
||||
"GL_KHR_blend_equation_advanced",
|
||||
"GL_KHR_blend_equation_advanced_coherent",
|
||||
"GL_APPLE_rgb_422",
|
||||
};
|
||||
|
||||
|
@ -1437,6 +1459,37 @@ GLenum remap_blendfunc(GLenum rgb, GLenum a) {
|
|||
return a;
|
||||
}
|
||||
|
||||
// Generate a hashed blend key based on blend func and equation state. This
|
||||
// allows all the blend state to be processed down to a blend key that can be
|
||||
// dealt with inside a single switch statement.
|
||||
static void hash_blend_key() {
|
||||
GLenum srgb = ctx->blendfunc_srgb;
|
||||
GLenum drgb = ctx->blendfunc_drgb;
|
||||
GLenum sa = ctx->blendfunc_sa;
|
||||
GLenum da = ctx->blendfunc_da;
|
||||
GLenum equation = ctx->blend_equation;
|
||||
#define HASH_BLEND_KEY(x, y, z, w) ((x << 4) | (y) | (z << 24) | (w << 20))
|
||||
// Basic non-separate blend funcs used the two argument form
|
||||
int hash = HASH_BLEND_KEY(srgb, drgb, 0, 0);
|
||||
// Separate alpha blend funcs use the 4 argument hash
|
||||
if (srgb != sa || drgb != da) hash |= HASH_BLEND_KEY(0, 0, sa, da);
|
||||
// Any other blend equation than the default func_add ignores the func and
|
||||
// instead generates a one-argument hash based on the equation
|
||||
if (equation != GL_FUNC_ADD) hash = HASH_BLEND_KEY(equation, 0, 0, 0);
|
||||
switch (hash) {
|
||||
#define MAP_BLEND_KEY(...) \
|
||||
case HASH_BLEND_KEY(__VA_ARGS__): \
|
||||
ctx->blend_key = BLEND_KEY(__VA_ARGS__); \
|
||||
break;
|
||||
FOR_EACH_BLEND_KEY(MAP_BLEND_KEY)
|
||||
default:
|
||||
debugf("blendfunc: %x, %x, separate: %x, %x, equation: %x\n", srgb, drgb,
|
||||
sa, da, equation);
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void BlendFunc(GLenum srgb, GLenum drgb, GLenum sa, GLenum da) {
|
||||
ctx->blendfunc_srgb = srgb;
|
||||
ctx->blendfunc_drgb = drgb;
|
||||
|
@ -1445,20 +1498,7 @@ void BlendFunc(GLenum srgb, GLenum drgb, GLenum sa, GLenum da) {
|
|||
ctx->blendfunc_sa = sa;
|
||||
ctx->blendfunc_da = da;
|
||||
|
||||
#define HASH_BLEND_KEY(x, y, z, w) ((x << 4) | (y) | (z << 24) | (w << 20))
|
||||
int hash = HASH_BLEND_KEY(srgb, drgb, 0, 0);
|
||||
if (srgb != sa || drgb != da) hash |= HASH_BLEND_KEY(0, 0, sa, da);
|
||||
switch (hash) {
|
||||
#define MAP_BLEND_KEY(...) \
|
||||
case HASH_BLEND_KEY(__VA_ARGS__): \
|
||||
ctx->blend_key = BLEND_KEY(__VA_ARGS__); \
|
||||
break;
|
||||
FOR_EACH_BLEND_KEY(MAP_BLEND_KEY)
|
||||
default:
|
||||
debugf("blendfunc: %x, %x, separate: %x, %x\n", srgb, drgb, sa, da);
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
hash_blend_key();
|
||||
}
|
||||
|
||||
void BlendColor(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
|
||||
|
@ -1467,8 +1507,12 @@ void BlendColor(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
|
|||
}
|
||||
|
||||
void BlendEquation(GLenum mode) {
|
||||
assert(mode == GL_FUNC_ADD);
|
||||
ctx->blend_equation = mode;
|
||||
assert(mode == GL_FUNC_ADD || mode == GL_MIN || mode == GL_MAX ||
|
||||
(mode >= GL_MULTIPLY_KHR && mode <= GL_HSL_LUMINOSITY_KHR));
|
||||
if (mode != ctx->blend_equation) {
|
||||
ctx->blend_equation = mode;
|
||||
hash_blend_key();
|
||||
}
|
||||
}
|
||||
|
||||
void DepthMask(GLboolean flag) { ctx->depthmask = flag; }
|
||||
|
@ -2990,8 +3034,9 @@ static ALWAYS_INLINE HalfRGBA8 packRGBA8(I32 a, I32 b) {
|
|||
#endif
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE WideRGBA8 pack_pixels_RGBA8(const vec4& v) {
|
||||
ivec4 i = round_pixel(v);
|
||||
static ALWAYS_INLINE WideRGBA8 pack_pixels_RGBA8(const vec4& v,
|
||||
float maxval = 1.0f) {
|
||||
ivec4 i = round_pixel(v, maxval);
|
||||
HalfRGBA8 xz = packRGBA8(i.z, i.x);
|
||||
HalfRGBA8 yw = packRGBA8(i.y, i.w);
|
||||
HalfRGBA8 xyzwl = zipLow(xz, yw);
|
||||
|
@ -3011,6 +3056,12 @@ static ALWAYS_INLINE WideRGBA8 pack_pixels_RGBA8() {
|
|||
return pack_pixels_RGBA8(fragment_shader->gl_FragColor);
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE WideRGBA8 pack_pixels_RGBA8(WideRGBA32F v,
|
||||
float maxval = 1.0f) {
|
||||
ivec4 i = round_pixel(bit_cast<vec4>(v), maxval);
|
||||
return combine(packRGBA8(i.x, i.y), packRGBA8(i.z, i.w));
|
||||
}
|
||||
|
||||
// Load a partial span > 0 and < 4 pixels.
|
||||
template <typename V, typename P>
|
||||
static ALWAYS_INLINE V partial_load_span(const P* src, int span) {
|
||||
|
@ -3079,10 +3130,128 @@ static ALWAYS_INLINE T addlow(T x, T y) {
|
|||
return bit_cast<T>(bit_cast<bytes>(x) + bit_cast<bytes>(y));
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE WideRGBA8 alphas(WideRGBA8 c) {
|
||||
// Replace color components of each pixel with the pixel's alpha values.
|
||||
template <typename T>
|
||||
static ALWAYS_INLINE T alphas(T c) {
|
||||
return SHUFFLE(c, c, 3, 3, 3, 3, 7, 7, 7, 7, 11, 11, 11, 11, 15, 15, 15, 15);
|
||||
}
|
||||
|
||||
// Replace the alpha values of the first vector with alpha values from the
|
||||
// second, while leaving the color components unmodified.
|
||||
template <typename T>
|
||||
static ALWAYS_INLINE T set_alphas(T c, T a) {
|
||||
return SHUFFLE(c, a, 0, 1, 2, 19, 4, 5, 6, 23, 8, 9, 10, 27, 12, 13, 14, 31);
|
||||
}
|
||||
|
||||
// Miscellaneous helper functions for working with packed RGBA8 data.
|
||||
static ALWAYS_INLINE HalfRGBA8 if_then_else(V8<int16_t> c, HalfRGBA8 t,
|
||||
HalfRGBA8 e) {
|
||||
return bit_cast<HalfRGBA8>((c & t) | (~c & e));
|
||||
}
|
||||
|
||||
template <typename T, typename C, int N>
|
||||
static ALWAYS_INLINE VectorType<T, N> if_then_else(VectorType<C, N> c,
|
||||
VectorType<T, N> t,
|
||||
VectorType<T, N> e) {
|
||||
return combine(if_then_else(lowHalf(c), lowHalf(t), lowHalf(e)),
|
||||
if_then_else(highHalf(c), highHalf(t), highHalf(e)));
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE HalfRGBA8 min(HalfRGBA8 x, HalfRGBA8 y) {
|
||||
#if USE_SSE2
|
||||
return bit_cast<HalfRGBA8>(
|
||||
_mm_min_epi16(bit_cast<V8<int16_t>>(x), bit_cast<V8<int16_t>>(y)));
|
||||
#elif USE_NEON
|
||||
return vminq_u16(x, y);
|
||||
#else
|
||||
return if_then_else(a < b, a, b);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T, int N>
|
||||
static ALWAYS_INLINE VectorType<T, N> min(VectorType<T, N> x,
|
||||
VectorType<T, N> y) {
|
||||
return combine(min(lowHalf(x), lowHalf(y)), min(highHalf(x), highHalf(y)));
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE HalfRGBA8 max(HalfRGBA8 x, HalfRGBA8 y) {
|
||||
#if USE_SSE2
|
||||
return bit_cast<HalfRGBA8>(
|
||||
_mm_max_epi16(bit_cast<V8<int16_t>>(x), bit_cast<V8<int16_t>>(y)));
|
||||
#elif USE_NEON
|
||||
return vmaxq_u16(x, y);
|
||||
#else
|
||||
return if_then_else(a > b, a, b);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T, int N>
|
||||
static ALWAYS_INLINE VectorType<T, N> max(VectorType<T, N> x,
|
||||
VectorType<T, N> y) {
|
||||
return combine(max(lowHalf(x), lowHalf(y)), max(highHalf(x), highHalf(y)));
|
||||
}
|
||||
|
||||
template <typename T, int N>
|
||||
static ALWAYS_INLINE VectorType<T, N> recip(VectorType<T, N> v) {
|
||||
return combine(recip(lowHalf(v)), recip(highHalf(v)));
|
||||
}
|
||||
|
||||
// Helper to get the reciprocal if the value is non-zero, or otherwise default
|
||||
// to the supplied fallback value.
|
||||
template <typename V>
|
||||
static ALWAYS_INLINE V recip_or(V v, float f) {
|
||||
return if_then_else(v != V(0.0f), recip(v), V(f));
|
||||
}
|
||||
|
||||
template <typename T, int N>
|
||||
static ALWAYS_INLINE VectorType<T, N> inversesqrt(VectorType<T, N> v) {
|
||||
return combine(inversesqrt(lowHalf(v)), inversesqrt(highHalf(v)));
|
||||
}
|
||||
|
||||
// Extract the alpha components so that we can cheaply calculate the reciprocal
|
||||
// on a single SIMD register. Then multiply the duplicated alpha reciprocal with
|
||||
// the pixel data. 0 alpha is treated as transparent black.
|
||||
static ALWAYS_INLINE WideRGBA32F unpremultiply(WideRGBA32F v) {
|
||||
Float a = recip_or((Float){v[3], v[7], v[11], v[15]}, 0.0f);
|
||||
return v * combine(a.xxxx, a.yyyy, a.zzzz, a.wwww);
|
||||
}
|
||||
|
||||
// Packed RGBA32F data is AoS in BGRA order. Transpose it to SoA and swizzle to
|
||||
// RGBA to unpack.
|
||||
static ALWAYS_INLINE vec4 unpack(PackedRGBA32F c) {
|
||||
return bit_cast<vec4>(
|
||||
SHUFFLE(c, c, 2, 6, 10, 14, 1, 5, 9, 13, 0, 4, 8, 12, 3, 7, 11, 15));
|
||||
}
|
||||
|
||||
// The following lum/sat functions mostly follow the KHR_blend_equation_advanced
|
||||
// specification but are rearranged to work on premultiplied data.
|
||||
static ALWAYS_INLINE Float lumv3(vec3 v) {
|
||||
return v.x * 0.30f + v.y * 0.59f + v.z * 0.11f;
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE Float minv3(vec3 v) { return min(min(v.x, v.y), v.z); }
|
||||
|
||||
static ALWAYS_INLINE Float maxv3(vec3 v) { return max(max(v.x, v.y), v.z); }
|
||||
|
||||
static inline vec3 clip_color(vec3 v, Float lum, Float alpha) {
|
||||
Float mincol = max(-minv3(v), lum);
|
||||
Float maxcol = max(maxv3(v), alpha - lum);
|
||||
return lum + v * (lum * (alpha - lum) * recip_or(mincol * maxcol, 0.0f));
|
||||
}
|
||||
|
||||
static inline vec3 set_lum(vec3 base, vec3 ref, Float alpha) {
|
||||
return clip_color(base - lumv3(base), lumv3(ref), alpha);
|
||||
}
|
||||
|
||||
static inline vec3 set_lum_sat(vec3 base, vec3 sref, vec3 lref, Float alpha) {
|
||||
vec3 diff = base - minv3(base);
|
||||
Float sbase = maxv3(diff);
|
||||
Float ssat = maxv3(sref) - minv3(sref);
|
||||
// The sbase range is rescaled to ssat. If sbase has 0 extent, then rescale
|
||||
// to black, as per specification.
|
||||
return set_lum(diff * ssat * recip_or(sbase, 0.0f), lref, alpha);
|
||||
}
|
||||
|
||||
// A pointer into the color buffer for the start of the span.
|
||||
static void* swgl_SpanBuf = nullptr;
|
||||
// A pointer into the clip mask for the start of the span.
|
||||
|
@ -3173,6 +3342,182 @@ static ALWAYS_INLINE WideRGBA8 blend_pixels(uint32_t* buf, PackedRGBA8 pdst,
|
|||
return muldiv255(src, mask) + dst -
|
||||
muldiv255(dst, muldiv255(secondary, mask));
|
||||
}
|
||||
case BLEND_CASE(GL_MIN):
|
||||
return min(src, dst);
|
||||
case BLEND_CASE(GL_MAX):
|
||||
return max(src, dst);
|
||||
|
||||
// clang-format off
|
||||
// The KHR_blend_equation_advanced spec describes the blend equations such
|
||||
// that the unpremultiplied values Cs, Cd, As, Ad and function f combine to
|
||||
// the result:
|
||||
// Cr = f(Cs,Cd)*As*Ad + Cs*As*(1-Ad) + Cd*AD*(1-As)
|
||||
// Ar = As*Ad + As*(1-Ad) + Ad*(1-As)
|
||||
// However, working with unpremultiplied values requires expensive math to
|
||||
// unpremultiply and premultiply again during blending. We can use the fact
|
||||
// that premultiplied value P = C*A and simplify the equations such that no
|
||||
// unpremultiplied colors are necessary, allowing us to stay with integer
|
||||
// math that avoids floating-point conversions in the common case. Some of
|
||||
// the blend modes require division or sqrt, in which case we do convert
|
||||
// to (possibly transposed/unpacked) floating-point to implement the mode.
|
||||
// However, most common modes can still use cheaper premultiplied integer
|
||||
// math. As an example, the multiply mode f(Cs,Cd) = Cs*Cd is simplified
|
||||
// to:
|
||||
// Cr = Cs*Cd*As*Ad + Cs*As*(1-Ad) + Cd*Ad*(1-As)
|
||||
// .. Pr = Ps*Pd + Ps - Ps*Ad + Pd - Pd*As
|
||||
// Ar = As*Ad + As - As*Ad + Ad - Ad*As
|
||||
// .. Ar = As + Ad - As*Ad
|
||||
// Note that the alpha equation is the same for all blend equations, such
|
||||
// that so long as the implementation results in As + Ad - As*Ad, we can
|
||||
// avoid using separate instructions to compute the alpha result, which is
|
||||
// dependent on the math used to implement each blend mode. The exact
|
||||
// reductions used to get the final math for every blend mode are too
|
||||
// involved to show here in comments, but mostly follows from replacing
|
||||
// Cs*As and Cd*Ad with Ps and Ps while factoring out as many common terms
|
||||
// as possible.
|
||||
// clang-format on
|
||||
|
||||
case BLEND_CASE(GL_MULTIPLY_KHR): {
|
||||
WideRGBA8 diff = muldiv255(alphas(src) - (src & RGB_MASK),
|
||||
alphas(dst) - (dst & RGB_MASK));
|
||||
return src + dst + (diff & RGB_MASK) - alphas(diff);
|
||||
}
|
||||
case BLEND_CASE(GL_SCREEN_KHR):
|
||||
return src + dst - muldiv255(src, dst);
|
||||
case BLEND_CASE(GL_OVERLAY_KHR): {
|
||||
WideRGBA8 srcA = alphas(src);
|
||||
WideRGBA8 dstA = alphas(dst);
|
||||
WideRGBA8 diff = muldiv255(src, dst) + muldiv255(srcA - src, dstA - dst);
|
||||
return src + dst +
|
||||
if_then_else(dst * 2 <= dstA, (diff & RGB_MASK) - alphas(diff),
|
||||
-diff);
|
||||
}
|
||||
case BLEND_CASE(GL_DARKEN_KHR):
|
||||
return src + dst -
|
||||
max(muldiv255(src, alphas(dst)), muldiv255(dst, alphas(src)));
|
||||
case BLEND_CASE(GL_LIGHTEN_KHR):
|
||||
return src + dst -
|
||||
min(muldiv255(src, alphas(dst)), muldiv255(dst, alphas(src)));
|
||||
|
||||
case BLEND_CASE(GL_COLORDODGE_KHR): {
|
||||
// Color-dodge and color-burn require division, so we convert to FP math
|
||||
// here, but avoid transposing to a vec4.
|
||||
WideRGBA32F srcF = CONVERT(src, WideRGBA32F);
|
||||
WideRGBA32F srcA = alphas(srcF);
|
||||
WideRGBA32F dstF = CONVERT(dst, WideRGBA32F);
|
||||
WideRGBA32F dstA = alphas(dstF);
|
||||
return pack_pixels_RGBA8(
|
||||
srcA * set_alphas(
|
||||
min(dstA, dstF * srcA * recip_or(srcA - srcF, 255.0f)),
|
||||
dstF) +
|
||||
srcF * (255.0f - dstA) + dstF * (255.0f - srcA),
|
||||
255.0f * 255.0f);
|
||||
}
|
||||
case BLEND_CASE(GL_COLORBURN_KHR): {
|
||||
WideRGBA32F srcF = CONVERT(src, WideRGBA32F);
|
||||
WideRGBA32F srcA = alphas(srcF);
|
||||
WideRGBA32F dstF = CONVERT(dst, WideRGBA32F);
|
||||
WideRGBA32F dstA = alphas(dstF);
|
||||
return pack_pixels_RGBA8(
|
||||
srcA * set_alphas((dstA - min(dstA, (dstA - dstF) * srcA *
|
||||
recip_or(srcF, 255.0f))),
|
||||
dstF) +
|
||||
srcF * (255.0f - dstA) + dstF * (255.0f - srcA),
|
||||
255.0f * 255.0f);
|
||||
}
|
||||
case BLEND_CASE(GL_HARDLIGHT_KHR): {
|
||||
WideRGBA8 srcA = alphas(src);
|
||||
WideRGBA8 dstA = alphas(dst);
|
||||
WideRGBA8 diff = muldiv255(src, dst) + muldiv255(srcA - src, dstA - dst);
|
||||
return src + dst +
|
||||
if_then_else(src * 2 <= srcA, (diff & RGB_MASK) - alphas(diff),
|
||||
-diff);
|
||||
}
|
||||
case BLEND_CASE(GL_SOFTLIGHT_KHR): {
|
||||
// Soft-light requires an unpremultiply that can't be factored out as
|
||||
// well as a sqrt, so we convert to FP math here, but avoid transposing
|
||||
// to a vec4.
|
||||
WideRGBA32F srcF = CONVERT(src, WideRGBA32F);
|
||||
WideRGBA32F srcA = alphas(srcF);
|
||||
WideRGBA32F dstF = CONVERT(dst, WideRGBA32F);
|
||||
WideRGBA32F dstA = alphas(dstF);
|
||||
WideRGBA32F dstU = unpremultiply(dstF);
|
||||
WideRGBA32F scale = srcF + srcF - srcA;
|
||||
return pack_pixels_RGBA8(
|
||||
dstF * (255.0f +
|
||||
set_alphas(
|
||||
scale *
|
||||
if_then_else(scale < 0.0f, 1.0f - dstU,
|
||||
min((16.0f * dstU - 12.0f) * dstU + 3.0f,
|
||||
inversesqrt(dstU) - 1.0f)),
|
||||
WideRGBA32F(0.0f))) +
|
||||
srcF * (255.0f - dstA),
|
||||
255.0f * 255.0f);
|
||||
}
|
||||
case BLEND_CASE(GL_DIFFERENCE_KHR): {
|
||||
WideRGBA8 diff =
|
||||
min(muldiv255(dst, alphas(src)), muldiv255(src, alphas(dst)));
|
||||
return src + dst - diff - (diff & RGB_MASK);
|
||||
}
|
||||
case BLEND_CASE(GL_EXCLUSION_KHR): {
|
||||
WideRGBA8 diff = muldiv255(src, dst);
|
||||
return src + dst - diff - (diff & RGB_MASK);
|
||||
}
|
||||
case BLEND_CASE(GL_HSL_HUE_KHR): {
|
||||
// The HSL blend modes are non-separable and require complicated use of
|
||||
// division. It is advantageous to convert to FP and transpose to vec4
|
||||
// math to more easily manipulate the individual color components.
|
||||
vec4 srcV = unpack(CONVERT(src, PackedRGBA32F));
|
||||
vec4 dstV = unpack(CONVERT(dst, PackedRGBA32F));
|
||||
Float srcA = srcV.w * (1.0f / 255.0f);
|
||||
Float dstA = dstV.w * (1.0f / 255.0f);
|
||||
Float srcDstA = srcV.w * dstA;
|
||||
vec3 srcC = vec3(srcV) * dstA;
|
||||
vec3 dstC = vec3(dstV) * srcA;
|
||||
return pack_pixels_RGBA8(vec4(set_lum_sat(srcC, dstC, dstC, srcDstA) +
|
||||
vec3(srcV) - srcC + vec3(dstV) - dstC,
|
||||
srcV.w + dstV.w - srcDstA),
|
||||
255.0f);
|
||||
}
|
||||
case BLEND_CASE(GL_HSL_SATURATION_KHR): {
|
||||
vec4 srcV = unpack(CONVERT(src, PackedRGBA32F));
|
||||
vec4 dstV = unpack(CONVERT(dst, PackedRGBA32F));
|
||||
Float srcA = srcV.w * (1.0f / 255.0f);
|
||||
Float dstA = dstV.w * (1.0f / 255.0f);
|
||||
Float srcDstA = srcV.w * dstA;
|
||||
vec3 srcC = vec3(srcV) * dstA;
|
||||
vec3 dstC = vec3(dstV) * srcA;
|
||||
return pack_pixels_RGBA8(vec4(set_lum_sat(dstC, srcC, dstC, srcDstA) +
|
||||
vec3(srcV) - srcC + vec3(dstV) - dstC,
|
||||
srcV.w + dstV.w - srcDstA),
|
||||
255.0f);
|
||||
}
|
||||
case BLEND_CASE(GL_HSL_COLOR_KHR): {
|
||||
vec4 srcV = unpack(CONVERT(src, PackedRGBA32F));
|
||||
vec4 dstV = unpack(CONVERT(dst, PackedRGBA32F));
|
||||
Float srcA = srcV.w * (1.0f / 255.0f);
|
||||
Float dstA = dstV.w * (1.0f / 255.0f);
|
||||
Float srcDstA = srcV.w * dstA;
|
||||
vec3 srcC = vec3(srcV) * dstA;
|
||||
vec3 dstC = vec3(dstV) * srcA;
|
||||
return pack_pixels_RGBA8(vec4(set_lum(srcC, dstC, srcDstA) + vec3(srcV) -
|
||||
srcC + vec3(dstV) - dstC,
|
||||
srcV.w + dstV.w - srcDstA),
|
||||
255.0f);
|
||||
}
|
||||
case BLEND_CASE(GL_HSL_LUMINOSITY_KHR): {
|
||||
vec4 srcV = unpack(CONVERT(src, PackedRGBA32F));
|
||||
vec4 dstV = unpack(CONVERT(dst, PackedRGBA32F));
|
||||
Float srcA = srcV.w * (1.0f / 255.0f);
|
||||
Float dstA = dstV.w * (1.0f / 255.0f);
|
||||
Float srcDstA = srcV.w * dstA;
|
||||
vec3 srcC = vec3(srcV) * dstA;
|
||||
vec3 dstC = vec3(dstV) * srcA;
|
||||
return pack_pixels_RGBA8(vec4(set_lum(dstC, srcC, srcDstA) + vec3(srcV) -
|
||||
srcC + vec3(dstV) - dstC,
|
||||
srcV.w + dstV.w - srcDstA),
|
||||
255.0f);
|
||||
}
|
||||
default:
|
||||
UNREACHABLE;
|
||||
// return src;
|
||||
|
|
|
@ -155,6 +155,8 @@ typedef intptr_t GLintptr;
|
|||
#define GL_ONE_MINUS_SRC1_ALPHA 0x88FB
|
||||
|
||||
#define GL_FUNC_ADD 0x8006
|
||||
#define GL_MIN 0x8007
|
||||
#define GL_MAX 0x8008
|
||||
|
||||
#define GL_NEVER 0x0200
|
||||
#define GL_LESS 0x0201
|
||||
|
@ -192,3 +194,19 @@ typedef intptr_t GLintptr;
|
|||
#define GL_UNSIGNED_SHORT_8_8_APPLE 0x85BA
|
||||
#define GL_UNSIGNED_SHORT_8_8_REV_APPLE 0x85BB
|
||||
#define GL_RGB_RAW_422_APPLE 0x8A51
|
||||
|
||||
#define GL_MULTIPLY_KHR 0x9294
|
||||
#define GL_SCREEN_KHR 0x9295
|
||||
#define GL_OVERLAY_KHR 0x9296
|
||||
#define GL_DARKEN_KHR 0x9297
|
||||
#define GL_LIGHTEN_KHR 0x9298
|
||||
#define GL_COLORDODGE_KHR 0x9299
|
||||
#define GL_COLORBURN_KHR 0x929A
|
||||
#define GL_HARDLIGHT_KHR 0x929B
|
||||
#define GL_SOFTLIGHT_KHR 0x929C
|
||||
#define GL_DIFFERENCE_KHR 0x929E
|
||||
#define GL_EXCLUSION_KHR 0x92A0
|
||||
#define GL_HSL_HUE_KHR 0x92AD
|
||||
#define GL_HSL_SATURATION_KHR 0x92AE
|
||||
#define GL_HSL_COLOR_KHR 0x92AF
|
||||
#define GL_HSL_LUMINOSITY_KHR 0x92B0
|
||||
|
|
|
@ -215,6 +215,24 @@ SI Float sqrt(Float v) {
|
|||
#endif
|
||||
}
|
||||
|
||||
SI float recip(float x) { return 1.0f / x; }
|
||||
|
||||
// Use a fast vector reciprocal approximation when available. This should only
|
||||
// be used in cases where it is okay that the approximation is imprecise -
|
||||
// essentially visually correct but numerically wrong. Otherwise just rely on
|
||||
// however the compiler would implement slower division if the platform doesn't
|
||||
// provide a convenient intrinsic.
|
||||
SI Float recip(Float v) {
|
||||
#if USE_SSE2
|
||||
return _mm_rcp_ps(v);
|
||||
#elif USE_NEON
|
||||
Float e = vrecpeq_f32(v);
|
||||
return vrecpsq_f32(v, e) * e;
|
||||
#else
|
||||
return 1.0f / v;
|
||||
#endif
|
||||
}
|
||||
|
||||
SI float inversesqrt(float x) { return 1.0f / sqrtf(x); }
|
||||
|
||||
SI Float inversesqrt(Float v) {
|
||||
|
@ -648,8 +666,8 @@ SI I32 roundfast(Float v, Float scale) {
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
SI auto round_pixel(T v) {
|
||||
return roundfast(v, 255.0f);
|
||||
SI auto round_pixel(T v, float maxval = 1.0f) {
|
||||
return roundfast(v, (255.0f / maxval));
|
||||
}
|
||||
|
||||
#define round __glsl_round
|
||||
|
@ -1335,6 +1353,7 @@ struct vec3 {
|
|||
IMPLICIT constexpr vec3(Float a) : x(a), y(a), z(a) {}
|
||||
constexpr vec3(Float x, Float y, Float z) : x(x), y(y), z(z) {}
|
||||
vec3(vec2 a, Float z) : x(a.x), y(a.y), z(z) {}
|
||||
explicit vec3(vec4);
|
||||
IMPLICIT constexpr vec3(vec3_scalar s) : x(s.x), y(s.y), z(s.z) {}
|
||||
constexpr vec3(vec3_scalar s0, vec3_scalar s1, vec3_scalar s2, vec3_scalar s3)
|
||||
: x(Float{s0.x, s1.x, s2.x, s3.x}),
|
||||
|
@ -1828,6 +1847,8 @@ vec4 make_vec4(const X& x, const Y& y, const Z& z, const W& w) {
|
|||
return vec4(x, y, z, w);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE vec3::vec3(vec4 v) : x(v.x), y(v.y), z(v.z) {}
|
||||
|
||||
SI ivec4 roundfast(vec4 v, Float scale) {
|
||||
return ivec4(roundfast(v.x, scale), roundfast(v.y, scale),
|
||||
roundfast(v.z, scale), roundfast(v.w, scale));
|
||||
|
|
|
@ -2287,7 +2287,7 @@ impl Gl for Context {
|
|||
|
||||
// GL_KHR_blend_equation_advanced
|
||||
fn blend_barrier_khr(&self) {
|
||||
panic!();
|
||||
// No barrier required, so nothing to do
|
||||
}
|
||||
|
||||
// GL_CHROMIUM_copy_texture
|
||||
|
|
|
@ -589,13 +589,13 @@ static inline U16 textureLinearUnpackedR8(S sampler, ivec2 i,
|
|||
auto b0 = unaligned_load<V2<uint8_t>>(&buf[row0.y]);
|
||||
auto c0 = unaligned_load<V2<uint8_t>>(&buf[row0.z]);
|
||||
auto d0 = unaligned_load<V2<uint8_t>>(&buf[row0.w]);
|
||||
auto abcd0 = CONVERT(combine(combine(a0, b0), combine(c0, d0)), V8<int16_t>);
|
||||
auto abcd0 = CONVERT(combine(a0, b0, c0, d0), V8<int16_t>);
|
||||
|
||||
auto a1 = unaligned_load<V2<uint8_t>>(&buf[row1.x]);
|
||||
auto b1 = unaligned_load<V2<uint8_t>>(&buf[row1.y]);
|
||||
auto c1 = unaligned_load<V2<uint8_t>>(&buf[row1.z]);
|
||||
auto d1 = unaligned_load<V2<uint8_t>>(&buf[row1.w]);
|
||||
auto abcd1 = CONVERT(combine(combine(a1, b1), combine(c1, d1)), V8<int16_t>);
|
||||
auto abcd1 = CONVERT(combine(a1, b1, c1, d1), V8<int16_t>);
|
||||
|
||||
abcd0 += ((abcd1 - abcd0) * fracy.xxyyzzww) >> 7;
|
||||
|
||||
|
@ -709,15 +709,13 @@ static inline I16 textureLinearUnpackedR16(S sampler, ivec2 i,
|
|||
auto b0 = unaligned_load<V2<uint16_t>>(&buf[row0.y]);
|
||||
auto c0 = unaligned_load<V2<uint16_t>>(&buf[row0.z]);
|
||||
auto d0 = unaligned_load<V2<uint16_t>>(&buf[row0.w]);
|
||||
auto abcd0 =
|
||||
CONVERT(combine(combine(a0, b0), combine(c0, d0)) >> 1, V8<int16_t>);
|
||||
auto abcd0 = CONVERT(combine(a0, b0, c0, d0) >> 1, V8<int16_t>);
|
||||
|
||||
auto a1 = unaligned_load<V2<uint16_t>>(&buf[row1.x]);
|
||||
auto b1 = unaligned_load<V2<uint16_t>>(&buf[row1.y]);
|
||||
auto c1 = unaligned_load<V2<uint16_t>>(&buf[row1.z]);
|
||||
auto d1 = unaligned_load<V2<uint16_t>>(&buf[row1.w]);
|
||||
auto abcd1 =
|
||||
CONVERT(combine(combine(a1, b1), combine(c1, d1)) >> 1, V8<int16_t>);
|
||||
auto abcd1 = CONVERT(combine(a1, b1, c1, d1) >> 1, V8<int16_t>);
|
||||
|
||||
// The samples occupy 15 bits and the fraction occupies 15 bits, so that when
|
||||
// they are multiplied together, the new scaled sample will fit in the high
|
||||
|
@ -767,6 +765,9 @@ vec4 textureLinearR16(S sampler, vec2 P, int32_t zoffset = 0) {
|
|||
return vec4(r * (1.0f / 32767.0f), 0.0f, 0.0f, 1.0f);
|
||||
}
|
||||
|
||||
using PackedRGBA32F = V16<float>;
|
||||
using WideRGBA32F = V16<float>;
|
||||
|
||||
template <typename S>
|
||||
vec4 textureLinearRGBA32F(S sampler, vec2 P, int32_t zoffset = 0) {
|
||||
assert(sampler->format == TextureFormat::RGBA32F);
|
||||
|
|
|
@ -315,6 +315,10 @@ struct VectorType {
|
|||
return VectorType<T, N * 2>::wrap(data, high.data);
|
||||
}
|
||||
|
||||
# define xxxx swizzle(0, 0, 0, 0)
|
||||
# define yyyy swizzle(1, 1, 1, 1)
|
||||
# define zzzz swizzle(2, 2, 2, 2)
|
||||
# define wwww swizzle(3, 3, 3, 3)
|
||||
# define xyxy swizzle(0, 1, 0, 1)
|
||||
# define zwzw swizzle(2, 3, 2, 3)
|
||||
# define zwxy swizzle(2, 3, 0, 1)
|
||||
|
@ -388,6 +392,12 @@ SI VectorType<T, N * 2> expand(VectorType<T, N> a) {
|
|||
}
|
||||
#endif
|
||||
|
||||
template <typename T, int N>
|
||||
SI VectorType<T, N * 4> combine(VectorType<T, N> a, VectorType<T, N> b,
|
||||
VectorType<T, N> c, VectorType<T, N> d) {
|
||||
return combine(combine(a, b), combine(c, d));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
SI VectorType<T, 4> zipLow(VectorType<T, 4> a, VectorType<T, 4> b) {
|
||||
return SHUFFLE(a, b, 0, 4, 1, 5);
|
||||
|
|
|
@ -20,14 +20,14 @@ fuzzy-if(azureSkiaGL,0-2,0-7174) == background-blending-image-gradient.html back
|
|||
== background-blending-color-burn.html background-blending-color-burn-ref.svg
|
||||
== background-blending-color-dodge.html background-blending-color-dodge-ref.svg
|
||||
# need to investigate why these tests are fuzzy - first suspect is a possible color space conversion on some platforms; same for mix-blend-mode tests
|
||||
fuzzy-if(azureSkia||gtkWidget,0-2,0-9600) fuzzy-if(d2d,0-1,0-8000) fuzzy-if(swgl,1-1,9600-9600) == background-blending-color.html background-blending-color-ref.svg
|
||||
fuzzy-if(azureSkia||gtkWidget,0-2,0-9600) fuzzy-if(d2d,0-1,0-8000) fuzzy-if(swgl,1-1,8000-9600) == background-blending-color.html background-blending-color-ref.svg
|
||||
== background-blending-darken.html background-blending-darken-ref.svg
|
||||
== background-blending-difference.html background-blending-difference-ref.svg
|
||||
fuzzy-if(/^Windows\x20NT\x2010\.0/.test(http.oscpu)||skiaContent,0-1,0-1600) == background-blending-exclusion.html background-blending-exclusion-ref.svg
|
||||
fuzzy-if(cocoaWidget||d2d,0-1,0-1600) == background-blending-hard-light.html background-blending-hard-light-ref.svg
|
||||
fuzzy-if(cocoaWidget||d2d||swgl,0-1,0-1600) == background-blending-hard-light.html background-blending-hard-light-ref.svg
|
||||
fuzzy-if(d2d,0-1,0-9600) fuzzy-if(azureSkia||gtkWidget,0-1,0-11200) fuzzy-if(webrender&&!geckoview,1-1,9600-11240) == background-blending-hue.html background-blending-hue-ref.svg
|
||||
== background-blending-lighten.html background-blending-lighten-ref.svg
|
||||
fuzzy-if(d2d,0-1,0-8000) fuzzy-if(azureSkia||gtkWidget,0-2,0-9600) fuzzy-if(swgl,1-1,9600-9600) == background-blending-luminosity.html background-blending-luminosity-ref.svg
|
||||
fuzzy-if(d2d,0-1,0-8000) fuzzy-if(azureSkia||gtkWidget,0-2,0-9600) fuzzy-if(swgl,1-1,8000-9600) == background-blending-luminosity.html background-blending-luminosity-ref.svg
|
||||
fuzzy-if(skiaContent,0-1,0-1600) == background-blending-multiply.html background-blending-multiply-ref.svg
|
||||
== background-blending-normal.html background-blending-normal-ref.svg
|
||||
fuzzy-if(/^Windows\x20NT\x2010\.0/.test(http.oscpu)||azureSkia||gtkWidget,0-1,0-1600) == background-blending-overlay.html background-blending-overlay-ref.svg
|
||||
|
@ -41,7 +41,7 @@ fuzzy-if(azureSkia||d2d||gtkWidget,0-1,0-40000) == background-blending-image-col
|
|||
fuzzy(0-65,0-53) fuzzy-if(geckoview&&webrender&&device,63-64,163-328) == mix-blend-mode-952051.html mix-blend-mode-952051-ref.html
|
||||
|
||||
fuzzy-if(d3d11,0-49,0-200) == mix-blend-mode-and-filter.html mix-blend-mode-and-filter-ref.html
|
||||
fuzzy-if(d3d11,0-1,0-6) == mix-blend-mode-and-filter.svg mix-blend-mode-and-filter-ref.svg
|
||||
fuzzy-if(d3d11,0-1,0-6) fuzzy-if(swgl,171-171,2980-2980) == mix-blend-mode-and-filter.svg mix-blend-mode-and-filter-ref.svg
|
||||
|
||||
fuzzy(0-2,0-14400) fuzzy-if(geckoview&&webrender&&device,3-3,700-700) == mix-blend-mode-child-of-blended-has-opacity.html mix-blend-mode-child-of-blended-has-opacity-ref.html
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче