- streamline the testing down to just byte multiplies
    (that's always where the blend algorithms vary)
  - add another approximate multiply (x*y+255)>>8
  - add another variant of the perfect multiply, ((x*y+128)*257)>>16

I've realized ((x*y+128)*257)>>16 might be just as fast in SSE/NEON
as our current (x*y+x)>>8 approximation.  Good to be testing it here.

BUG=skia:

Review URL: https://codereview.chromium.org/1453043005
This commit is contained in:
mtklein 2015-11-17 08:39:01 -08:00 коммит произвёл Commit bot
Родитель f3182ebc72
Коммит 195fe08421
1 изменённых файлов: 48 добавлений и 247 удалений

Просмотреть файл

@ -10,263 +10,64 @@
#include "SkColorPriv.h" #include "SkColorPriv.h"
#include "SkTaskGroup.h" #include "SkTaskGroup.h"
#include "SkXfermode.h" #include "SkXfermode.h"
#include <functional>
#define ASSERT(x) REPORTER_ASSERT(r, x) struct Results { int diffs, diffs_0x00, diffs_0xff, diffs_by_1; };
static uint8_t double_to_u8(double d) { static bool acceptable(const Results& r) {
SkASSERT(d >= 0); #if 0
SkASSERT(d < 256); SkDebugf("%d diffs, %d at 0x00, %d at 0xff, %d off by 1, all out of 65536\n",
return uint8_t(d); r.diffs, r.diffs_0x00, r.diffs_0xff, r.diffs_by_1);
#endif
return r.diffs_by_1 == r.diffs // never off by more than 1
&& r.diffs_0x00 == 0 // transparent must stay transparent
&& r.diffs_0xff == 0; // opaque must stay opaque
} }
// All algorithms we're testing have this interface. template <typename Fn>
// We want a single channel blend, src over dst, assuming src is premultiplied by srcAlpha. static Results test(Fn&& multiply) {
typedef uint8_t(*Blend)(uint8_t dst, uint8_t src, uint8_t srcAlpha); Results r = { 0,0,0,0 };
for (int x = 0; x < 256; x++) {
// This is our golden algorithm. for (int y = 0; y < 256; y++) {
static uint8_t blend_double_round(uint8_t dst, uint8_t src, uint8_t srcAlpha) { int p = multiply(x, y),
SkASSERT(src <= srcAlpha); ideal = (x*y+127)/255;
return double_to_u8(0.5 + src + dst * (255.0 - srcAlpha) / 255.0); if (p != ideal) {
} r.diffs++;
if (x == 0x00 || y == 0x00) { r.diffs_0x00++; }
static uint8_t abs_diff(uint8_t a, uint8_t b) { if (x == 0xff || y == 0xff) { r.diffs_0xff++; }
const int diff = a - b; if (SkTAbs(ideal - p) == 1) { r.diffs_by_1++; }
return diff > 0 ? diff : -diff;
}
static void test(skiatest::Reporter* r, int maxDiff, Blend algorithm,
uint8_t dst, uint8_t src, uint8_t alpha) {
const uint8_t golden = blend_double_round(dst, src, alpha);
const uint8_t blend = algorithm(dst, src, alpha);
if (abs_diff(blend, golden) > maxDiff) {
SkDebugf("dst %02x, src %02x, alpha %02x, |%02x - %02x| > %d\n",
dst, src, alpha, blend, golden, maxDiff);
ASSERT(abs_diff(blend, golden) <= maxDiff);
}
}
// Exhaustively compare an algorithm against our golden, for a given alpha.
static void test_alpha(skiatest::Reporter* r, uint8_t alpha, int maxDiff, Blend algorithm) {
SkASSERT(maxDiff >= 0);
for (unsigned src = 0; src <= alpha; src++) {
for (unsigned dst = 0; dst < 256; dst++) {
test(r, maxDiff, algorithm, dst, src, alpha);
} }
} }}
return r;
} }
// Exhaustively compare an algorithm against our golden, for a given dst. DEF_TEST(Blend_byte_multiply, r) {
static void test_dst(skiatest::Reporter* r, uint8_t dst, int maxDiff, Blend algorithm) { // These are all temptingly close but fundamentally broken.
SkASSERT(maxDiff >= 0); int (*broken[])(int, int) = {
[](int x, int y) { return (x*y)>>8; },
[](int x, int y) { return (x*y+128)>>8; },
[](int x, int y) { y += y>>7; return (x*y)>>8; },
};
for (auto multiply : broken) { REPORTER_ASSERT(r, !acceptable(test(multiply))); }
for (unsigned alpha = 0; alpha < 256; alpha++) { // These are fine to use, but not perfect.
for (unsigned src = 0; src <= alpha; src++) { int (*fine[])(int, int) = {
test(r, maxDiff, algorithm, dst, src, alpha); [](int x, int y) { return (x*y+x)>>8; },
} [](int x, int y) { return (x*y+y)>>8; },
} [](int x, int y) { return (x*y+255)>>8; },
[](int x, int y) { y += y>>7; return (x*y+128)>>8; },
};
for (auto multiply : fine) { REPORTER_ASSERT(r, acceptable(test(multiply))); }
// These are pefect.
int (*perfect[])(int, int) = {
[](int x, int y) { return (x*y+127)/255; }, // Duh.
[](int x, int y) { int p = (x*y+128); return (p+(p>>8))>>8; },
[](int x, int y) { return ((x*y+128)*257)>>16; },
};
for (auto multiply : perfect) { REPORTER_ASSERT(r, test(multiply).diffs == 0); }
} }
static uint8_t blend_double_trunc(uint8_t dst, uint8_t src, uint8_t srcAlpha) {
return double_to_u8(src + dst * (255.0 - srcAlpha) / 255.0);
}
static uint8_t blend_float_trunc(uint8_t dst, uint8_t src, uint8_t srcAlpha) {
return double_to_u8(src + dst * (255.0f - srcAlpha) / 255.0f);
}
static uint8_t blend_float_round(uint8_t dst, uint8_t src, uint8_t srcAlpha) {
return double_to_u8(0.5f + src + dst * (255.0f - srcAlpha) / 255.0f);
}
static uint8_t blend_255_trunc(uint8_t dst, uint8_t src, uint8_t srcAlpha) {
const uint16_t invAlpha = 255 - srcAlpha;
const uint16_t product = dst * invAlpha;
return src + (product >> 8);
}
static uint8_t blend_255_round(uint8_t dst, uint8_t src, uint8_t srcAlpha) {
const uint16_t invAlpha = 255 - srcAlpha;
const uint16_t product = dst * invAlpha + 128;
return src + (product >> 8);
}
static uint8_t blend_256_trunc(uint8_t dst, uint8_t src, uint8_t srcAlpha) {
const uint16_t invAlpha = 256 - (srcAlpha + (srcAlpha >> 7));
const uint16_t product = dst * invAlpha;
return src + (product >> 8);
}
static uint8_t blend_256_round(uint8_t dst, uint8_t src, uint8_t srcAlpha) {
const uint16_t invAlpha = 256 - (srcAlpha + (srcAlpha >> 7));
const uint16_t product = dst * invAlpha + 128;
return src + (product >> 8);
}
static uint8_t blend_256_round_alt(uint8_t dst, uint8_t src, uint8_t srcAlpha) {
const uint8_t invAlpha8 = 255 - srcAlpha;
const uint16_t invAlpha = invAlpha8 + (invAlpha8 >> 7);
const uint16_t product = dst * invAlpha + 128;
return src + (product >> 8);
}
static uint8_t blend_256_plus1_trunc(uint8_t dst, uint8_t src, uint8_t srcAlpha) {
const uint16_t invAlpha = 256 - (srcAlpha + 1);
const uint16_t product = dst * invAlpha;
return src + (product >> 8);
}
static uint8_t blend_256_plus1_round(uint8_t dst, uint8_t src, uint8_t srcAlpha) {
const uint16_t invAlpha = 256 - (srcAlpha + 1);
const uint16_t product = dst * invAlpha + 128;
return src + (product >> 8);
}
static uint8_t blend_perfect(uint8_t dst, uint8_t src, uint8_t srcAlpha) {
const uint8_t invAlpha = 255 - srcAlpha;
const uint16_t product = dst * invAlpha + 128;
return src + ((product + (product >> 8)) >> 8);
}
// We want 0 diff whenever src is fully transparent.
DEF_TEST(Blend_alpha_0x00, r) {
const uint8_t alpha = 0x00;
// GOOD
test_alpha(r, alpha, 0, blend_256_round);
test_alpha(r, alpha, 0, blend_256_round_alt);
test_alpha(r, alpha, 0, blend_256_trunc);
test_alpha(r, alpha, 0, blend_double_trunc);
test_alpha(r, alpha, 0, blend_float_round);
test_alpha(r, alpha, 0, blend_float_trunc);
test_alpha(r, alpha, 0, blend_perfect);
// BAD
test_alpha(r, alpha, 1, blend_255_round);
test_alpha(r, alpha, 1, blend_255_trunc);
test_alpha(r, alpha, 1, blend_256_plus1_round);
test_alpha(r, alpha, 1, blend_256_plus1_trunc);
}
// We want 0 diff whenever dst is 0.
DEF_TEST(Blend_dst_0x00, r) {
const uint8_t dst = 0x00;
// GOOD
test_dst(r, dst, 0, blend_255_round);
test_dst(r, dst, 0, blend_255_trunc);
test_dst(r, dst, 0, blend_256_plus1_round);
test_dst(r, dst, 0, blend_256_plus1_trunc);
test_dst(r, dst, 0, blend_256_round);
test_dst(r, dst, 0, blend_256_round_alt);
test_dst(r, dst, 0, blend_256_trunc);
test_dst(r, dst, 0, blend_double_trunc);
test_dst(r, dst, 0, blend_float_round);
test_dst(r, dst, 0, blend_float_trunc);
test_dst(r, dst, 0, blend_perfect);
// BAD
}
// We want 0 diff whenever src is fully opaque.
DEF_TEST(Blend_alpha_0xFF, r) {
const uint8_t alpha = 0xFF;
// GOOD
test_alpha(r, alpha, 0, blend_255_round);
test_alpha(r, alpha, 0, blend_255_trunc);
test_alpha(r, alpha, 0, blend_256_plus1_round);
test_alpha(r, alpha, 0, blend_256_plus1_trunc);
test_alpha(r, alpha, 0, blend_256_round);
test_alpha(r, alpha, 0, blend_256_round_alt);
test_alpha(r, alpha, 0, blend_256_trunc);
test_alpha(r, alpha, 0, blend_double_trunc);
test_alpha(r, alpha, 0, blend_float_round);
test_alpha(r, alpha, 0, blend_float_trunc);
test_alpha(r, alpha, 0, blend_perfect);
// BAD
}
// We want 0 diff whenever dst is 0xFF.
DEF_TEST(Blend_dst_0xFF, r) {
const uint8_t dst = 0xFF;
// GOOD
test_dst(r, dst, 0, blend_256_round);
test_dst(r, dst, 0, blend_256_round_alt);
test_dst(r, dst, 0, blend_double_trunc);
test_dst(r, dst, 0, blend_float_round);
test_dst(r, dst, 0, blend_float_trunc);
test_dst(r, dst, 0, blend_perfect);
// BAD
test_dst(r, dst, 1, blend_255_round);
test_dst(r, dst, 1, blend_255_trunc);
test_dst(r, dst, 1, blend_256_plus1_round);
test_dst(r, dst, 1, blend_256_plus1_trunc);
test_dst(r, dst, 1, blend_256_trunc);
}
// We'd like diff <= 1 everywhere.
DEF_TEST(Blend_alpha_Exhaustive, r) {
for (unsigned alpha = 0; alpha < 256; alpha++) {
// PERFECT
test_alpha(r, alpha, 0, blend_float_round);
test_alpha(r, alpha, 0, blend_perfect);
// GOOD
test_alpha(r, alpha, 1, blend_255_round);
test_alpha(r, alpha, 1, blend_256_plus1_round);
test_alpha(r, alpha, 1, blend_256_round);
test_alpha(r, alpha, 1, blend_256_round_alt);
test_alpha(r, alpha, 1, blend_256_trunc);
test_alpha(r, alpha, 1, blend_double_trunc);
test_alpha(r, alpha, 1, blend_float_trunc);
// BAD
test_alpha(r, alpha, 2, blend_255_trunc);
test_alpha(r, alpha, 2, blend_256_plus1_trunc);
}
}
// We'd like diff <= 1 everywhere.
DEF_TEST(Blend_dst_Exhaustive, r) {
for (unsigned dst = 0; dst < 256; dst++) {
// PERFECT
test_dst(r, dst, 0, blend_float_round);
test_dst(r, dst, 0, blend_perfect);
// GOOD
test_dst(r, dst, 1, blend_255_round);
test_dst(r, dst, 1, blend_256_plus1_round);
test_dst(r, dst, 1, blend_256_round);
test_dst(r, dst, 1, blend_256_round_alt);
test_dst(r, dst, 1, blend_256_trunc);
test_dst(r, dst, 1, blend_double_trunc);
test_dst(r, dst, 1, blend_float_trunc);
// BAD
test_dst(r, dst, 2, blend_255_trunc);
test_dst(r, dst, 2, blend_256_plus1_trunc);
}
}
// Overall summary:
// PERFECT
// blend_double_round
// blend_float_round
// blend_perfect
// GOOD ENOUGH
// blend_double_trunc
// blend_float_trunc
// blend_256_round
// blend_256_round_alt
// NOT GOOD ENOUGH
// all others
//
// Algorithms that make sense to use in Skia: blend_256_round, blend_256_round_alt, blend_perfect
DEF_TEST(Blend_premul_begets_premul, r) { DEF_TEST(Blend_premul_begets_premul, r) {
// This test is quite slow, even if you have enough cores to run each mode in parallel. // This test is quite slow, even if you have enough cores to run each mode in parallel.
if (!r->allowExtendedTest()) { if (!r->allowExtendedTest()) {