зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1622220 - Add UnpremultiplyRow and extend SwizzleRow. r=lsalzman
UnpremultiplyRow will be used in the image encoders to reverse premultiplication. SwizzleRow needs to support copying (no swizzling) and swapping RGB/BGR. Differential Revision: https://phabricator.services.mozilla.com/D66743 --HG-- extra : moz-landing-system : lando
This commit is contained in:
Родитель
28816fc161
Коммит
acdd6810d2
|
@ -134,6 +134,14 @@ void Unpremultiply_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
|
|||
FORMAT_CASE(aSrcFormat, aDstFormat, \
|
||||
Unpremultiply_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat)>)
|
||||
|
||||
template <bool aSwapRB>
|
||||
void UnpremultiplyRow_SSE2(const uint8_t*, uint8_t*, int32_t);
|
||||
|
||||
# define UNPREMULTIPLY_ROW_SSE2(aSrcFormat, aDstFormat) \
|
||||
FORMAT_CASE_ROW( \
|
||||
aSrcFormat, aDstFormat, \
|
||||
UnpremultiplyRow_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat)>)
|
||||
|
||||
template <bool aSwapRB, bool aOpaqueAlpha>
|
||||
void Swizzle_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
|
||||
|
||||
|
@ -198,6 +206,14 @@ void Unpremultiply_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
|
|||
FORMAT_CASE(aSrcFormat, aDstFormat, \
|
||||
Unpremultiply_NEON<ShouldSwapRB(aSrcFormat, aDstFormat)>)
|
||||
|
||||
template <bool aSwapRB>
|
||||
void UnpremultiplyRow_NEON(const uint8_t*, uint8_t*, int32_t);
|
||||
|
||||
# define UNPREMULTIPLY_ROW_NEON(aSrcFormat, aDstFormat) \
|
||||
FORMAT_CASE_ROW( \
|
||||
aSrcFormat, aDstFormat, \
|
||||
UnpremultiplyRow_NEON<ShouldSwapRB(aSrcFormat, aDstFormat)>)
|
||||
|
||||
template <bool aSwapRB, bool aOpaqueAlpha>
|
||||
void Swizzle_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
|
||||
|
||||
|
@ -491,32 +507,47 @@ static const uint32_t sUnpremultiplyTable[256] = {0,
|
|||
// implementation also accesses color components using individual byte accesses
|
||||
// as this profiles faster than accessing the pixel as a uint32_t and
|
||||
// shifting/masking to access components.
|
||||
template <bool aSwapRB, uint32_t aSrcRGBIndex, uint32_t aSrcAIndex,
|
||||
uint32_t aDstRGBIndex, uint32_t aDstAIndex>
|
||||
static void UnpremultiplyChunkFallback(const uint8_t*& aSrc, uint8_t*& aDst,
|
||||
int32_t aLength) {
|
||||
const uint8_t* end = aSrc + 4 * aLength;
|
||||
do {
|
||||
uint8_t r = aSrc[aSrcRGBIndex + (aSwapRB ? 2 : 0)];
|
||||
uint8_t g = aSrc[aSrcRGBIndex + 1];
|
||||
uint8_t b = aSrc[aSrcRGBIndex + (aSwapRB ? 0 : 2)];
|
||||
uint8_t a = aSrc[aSrcAIndex];
|
||||
|
||||
// Access the 8.16 reciprocal from the table based on alpha. Multiply by
|
||||
// the reciprocal and shift off the fraction bits to approximate the
|
||||
// division by alpha.
|
||||
uint32_t q = sUnpremultiplyTable[a];
|
||||
aDst[aDstRGBIndex + 0] = (r * q) >> 16;
|
||||
aDst[aDstRGBIndex + 1] = (g * q) >> 16;
|
||||
aDst[aDstRGBIndex + 2] = (b * q) >> 16;
|
||||
aDst[aDstAIndex] = a;
|
||||
|
||||
aSrc += 4;
|
||||
aDst += 4;
|
||||
} while (aSrc < end);
|
||||
}
|
||||
|
||||
template <bool aSwapRB, uint32_t aSrcRGBIndex, uint32_t aSrcAIndex,
|
||||
uint32_t aDstRGBIndex, uint32_t aDstAIndex>
|
||||
static void UnpremultiplyRowFallback(const uint8_t* aSrc, uint8_t* aDst,
|
||||
int32_t aLength) {
|
||||
UnpremultiplyChunkFallback<aSwapRB, aSrcRGBIndex, aSrcAIndex, aDstRGBIndex,
|
||||
aDstAIndex>(aSrc, aDst, aLength);
|
||||
}
|
||||
|
||||
template <bool aSwapRB, uint32_t aSrcRGBIndex, uint32_t aSrcAIndex,
|
||||
uint32_t aDstRGBIndex, uint32_t aDstAIndex>
|
||||
static void UnpremultiplyFallback(const uint8_t* aSrc, int32_t aSrcGap,
|
||||
uint8_t* aDst, int32_t aDstGap,
|
||||
IntSize aSize) {
|
||||
for (int32_t height = aSize.height; height > 0; height--) {
|
||||
const uint8_t* end = aSrc + 4 * aSize.width;
|
||||
do {
|
||||
uint8_t r = aSrc[aSrcRGBIndex + (aSwapRB ? 2 : 0)];
|
||||
uint8_t g = aSrc[aSrcRGBIndex + 1];
|
||||
uint8_t b = aSrc[aSrcRGBIndex + (aSwapRB ? 0 : 2)];
|
||||
uint8_t a = aSrc[aSrcAIndex];
|
||||
|
||||
// Access the 8.16 reciprocal from the table based on alpha. Multiply by
|
||||
// the reciprocal and shift off the fraction bits to approximate the
|
||||
// division by alpha.
|
||||
uint32_t q = sUnpremultiplyTable[a];
|
||||
aDst[aDstRGBIndex + 0] = (r * q) >> 16;
|
||||
aDst[aDstRGBIndex + 1] = (g * q) >> 16;
|
||||
aDst[aDstRGBIndex + 2] = (b * q) >> 16;
|
||||
aDst[aDstAIndex] = a;
|
||||
|
||||
aSrc += 4;
|
||||
aDst += 4;
|
||||
} while (aSrc < end);
|
||||
|
||||
UnpremultiplyChunkFallback<aSwapRB, aSrcRGBIndex, aSrcAIndex, aDstRGBIndex,
|
||||
aDstAIndex>(aSrc, aDst, aSize.width);
|
||||
aSrc += aSrcGap;
|
||||
aDst += aDstGap;
|
||||
}
|
||||
|
@ -534,6 +565,18 @@ static void UnpremultiplyFallback(const uint8_t* aSrc, int32_t aSrcGap,
|
|||
UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \
|
||||
UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8)
|
||||
|
||||
#define UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, aDstFormat) \
|
||||
FORMAT_CASE_ROW(aSrcFormat, aDstFormat, \
|
||||
UnpremultiplyRowFallback< \
|
||||
ShouldSwapRB(aSrcFormat, aDstFormat), \
|
||||
RGBByteIndex(aSrcFormat), AlphaByteIndex(aSrcFormat), \
|
||||
RGBByteIndex(aDstFormat), AlphaByteIndex(aDstFormat)>)
|
||||
|
||||
#define UNPREMULTIPLY_ROW_FALLBACK(aSrcFormat) \
|
||||
UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \
|
||||
UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \
|
||||
UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8)
|
||||
|
||||
bool UnpremultiplyData(const uint8_t* aSrc, int32_t aSrcStride,
|
||||
SurfaceFormat aSrcFormat, uint8_t* aDst,
|
||||
int32_t aDstStride, SurfaceFormat aDstFormat,
|
||||
|
@ -588,6 +631,42 @@ bool UnpremultiplyData(const uint8_t* aSrc, int32_t aSrcStride,
|
|||
return false;
|
||||
}
|
||||
|
||||
SwizzleRowFn UnpremultiplyRow(SurfaceFormat aSrcFormat,
|
||||
SurfaceFormat aDstFormat) {
|
||||
#ifdef USE_SSE2
|
||||
if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
|
||||
UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
|
||||
UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
|
||||
UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
|
||||
UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
|
||||
default:
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_NEON
|
||||
if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
|
||||
UNPREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
|
||||
UNPREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
|
||||
UNPREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
|
||||
UNPREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
|
||||
default:
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
|
||||
UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::B8G8R8A8)
|
||||
UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::R8G8B8A8)
|
||||
UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::A8R8G8B8)
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
MOZ_ASSERT_UNREACHABLE("Unsupported premultiply formats");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Swizzling
|
||||
*/
|
||||
|
@ -663,6 +742,15 @@ static void SwizzleFallback(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
|
|||
RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \
|
||||
RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>)
|
||||
|
||||
// Fast-path for matching formats.
|
||||
template <int32_t aBytesPerPixel>
|
||||
static void SwizzleRowCopy(const uint8_t* aSrc, uint8_t* aDst,
|
||||
int32_t aLength) {
|
||||
if (aSrc != aDst) {
|
||||
memcpy(aDst, aSrc, aLength * aBytesPerPixel);
|
||||
}
|
||||
}
|
||||
|
||||
// Fast-path for matching formats.
|
||||
static void SwizzleCopy(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
|
||||
int32_t aDstGap, IntSize aSize, int32_t aBPP) {
|
||||
|
@ -727,6 +815,41 @@ static void SwizzleSwap(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
|
|||
SwizzleRowSwap<ShouldForceOpaque(aSrcFormat, aDstFormat), \
|
||||
AlphaBitShift(aSrcFormat), AlphaBitShift(aDstFormat)>)
|
||||
|
||||
static void SwizzleChunkSwapRGB24(const uint8_t*& aSrc, uint8_t*& aDst,
|
||||
int32_t aLength) {
|
||||
const uint8_t* end = aSrc + 3 * aLength;
|
||||
do {
|
||||
uint8_t r = aSrc[0];
|
||||
uint8_t g = aSrc[1];
|
||||
uint8_t b = aSrc[2];
|
||||
aDst[0] = b;
|
||||
aDst[1] = g;
|
||||
aDst[2] = r;
|
||||
aSrc += 3;
|
||||
aDst += 3;
|
||||
} while (aSrc < end);
|
||||
}
|
||||
|
||||
static void SwizzleRowSwapRGB24(const uint8_t* aSrc, uint8_t* aDst,
|
||||
int32_t aLength) {
|
||||
SwizzleChunkSwapRGB24(aSrc, aDst, aLength);
|
||||
}
|
||||
|
||||
static void SwizzleSwapRGB24(const uint8_t* aSrc, int32_t aSrcGap,
|
||||
uint8_t* aDst, int32_t aDstGap, IntSize aSize) {
|
||||
for (int32_t height = aSize.height; height > 0; height--) {
|
||||
SwizzleChunkSwapRGB24(aSrc, aDst, aSize.width);
|
||||
aSrc += aSrcGap;
|
||||
aDst += aDstGap;
|
||||
}
|
||||
}
|
||||
|
||||
#define SWIZZLE_SWAP_RGB24(aSrcFormat, aDstFormat) \
|
||||
FORMAT_CASE_ROW(aSrcFormat, aDstFormat, SwizzleSwapRGB24)
|
||||
|
||||
#define SWIZZLE_ROW_SWAP_RGB24(aSrcFormat, aDstFormat) \
|
||||
FORMAT_CASE_ROW(aSrcFormat, aDstFormat, SwizzleRowSwapRGB24)
|
||||
|
||||
// Fast-path for conversions that force alpha to opaque.
|
||||
template <uint32_t aDstAShift>
|
||||
static void SwizzleChunkOpaqueUpdate(uint8_t*& aBuffer, int32_t aLength) {
|
||||
|
@ -823,24 +946,36 @@ static void PackToRGB565(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
|
|||
}
|
||||
|
||||
// Packing of 32-bit formats to 24-bit formats.
|
||||
template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex>
|
||||
static void PackChunkToRGB24(const uint8_t*& aSrc, uint8_t*& aDst,
|
||||
int32_t aLength) {
|
||||
const uint8_t* end = aSrc + 4 * aLength;
|
||||
do {
|
||||
uint8_t r = aSrc[aSrcRGBIndex + (aSwapRB ? 2 : 0)];
|
||||
uint8_t g = aSrc[aSrcRGBIndex + 1];
|
||||
uint8_t b = aSrc[aSrcRGBIndex + (aSwapRB ? 0 : 2)];
|
||||
|
||||
aDst[0] = r;
|
||||
aDst[1] = g;
|
||||
aDst[2] = b;
|
||||
|
||||
aSrc += 4;
|
||||
aDst += 3;
|
||||
} while (aSrc < end);
|
||||
}
|
||||
|
||||
template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex>
|
||||
static void PackRowToRGB24(const uint8_t* aSrc, uint8_t* aDst,
|
||||
int32_t aLength) {
|
||||
PackChunkToRGB24<aSwapRB, aSrcRGBShift, aSrcRGBIndex>(aSrc, aDst, aLength);
|
||||
}
|
||||
|
||||
template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex>
|
||||
static void PackToRGB24(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
|
||||
int32_t aDstGap, IntSize aSize) {
|
||||
for (int32_t height = aSize.height; height > 0; height--) {
|
||||
const uint8_t* end = aSrc + 4 * aSize.width;
|
||||
do {
|
||||
uint8_t r = aSrc[aSrcRGBIndex + (aSwapRB ? 2 : 0)];
|
||||
uint8_t g = aSrc[aSrcRGBIndex + 1];
|
||||
uint8_t b = aSrc[aSrcRGBIndex + (aSwapRB ? 0 : 2)];
|
||||
|
||||
aDst[0] = r;
|
||||
aDst[1] = g;
|
||||
aDst[2] = b;
|
||||
|
||||
aSrc += 4;
|
||||
aDst += 3;
|
||||
} while (aSrc < end);
|
||||
|
||||
PackChunkToRGB24<aSwapRB, aSrcRGBShift, aSrcRGBIndex>(aSrc, aDst,
|
||||
aSize.width);
|
||||
aSrc += aSrcGap;
|
||||
aDst += aDstGap;
|
||||
}
|
||||
|
@ -859,6 +994,20 @@ static void PackToRGB24(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
|
|||
PACK_RGB_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc) \
|
||||
PACK_RGB_CASE(SurfaceFormat::X8R8G8B8, aDstFormat, aPackFunc)
|
||||
|
||||
#define PACK_ROW_RGB_CASE(aSrcFormat, aDstFormat, aPackFunc) \
|
||||
FORMAT_CASE_ROW( \
|
||||
aSrcFormat, aDstFormat, \
|
||||
aPackFunc<ShouldSwapRB(aSrcFormat, aDstFormat), RGBBitShift(aSrcFormat), \
|
||||
RGBByteIndex(aSrcFormat)>)
|
||||
|
||||
#define PACK_ROW_RGB(aDstFormat, aPackFunc) \
|
||||
PACK_ROW_RGB_CASE(SurfaceFormat::B8G8R8A8, aDstFormat, aPackFunc) \
|
||||
PACK_ROW_RGB_CASE(SurfaceFormat::B8G8R8X8, aDstFormat, aPackFunc) \
|
||||
PACK_ROW_RGB_CASE(SurfaceFormat::R8G8B8A8, aDstFormat, aPackFunc) \
|
||||
PACK_ROW_RGB_CASE(SurfaceFormat::R8G8B8X8, aDstFormat, aPackFunc) \
|
||||
PACK_ROW_RGB_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc) \
|
||||
PACK_ROW_RGB_CASE(SurfaceFormat::X8R8G8B8, aDstFormat, aPackFunc)
|
||||
|
||||
// Packing of 32-bit formats to A8.
|
||||
template <uint32_t aSrcAIndex>
|
||||
static void PackToA8(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
|
||||
|
@ -1006,6 +1155,9 @@ bool SwizzleData(const uint8_t* aSrc, int32_t aSrcStride,
|
|||
SWIZZLE_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8X8)
|
||||
SWIZZLE_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8A8)
|
||||
|
||||
SWIZZLE_SWAP_RGB24(SurfaceFormat::R8G8B8, SurfaceFormat::B8G8R8)
|
||||
SWIZZLE_SWAP_RGB24(SurfaceFormat::B8G8R8, SurfaceFormat::R8G8B8)
|
||||
|
||||
SWIZZLE_OPAQUE(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8)
|
||||
SWIZZLE_OPAQUE(SurfaceFormat::B8G8R8X8, SurfaceFormat::B8G8R8A8)
|
||||
SWIZZLE_OPAQUE(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8)
|
||||
|
@ -1121,6 +1273,9 @@ SwizzleRowFn SwizzleRow(SurfaceFormat aSrcFormat, SurfaceFormat aDstFormat) {
|
|||
SWIZZLE_ROW_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8X8)
|
||||
SWIZZLE_ROW_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8A8)
|
||||
|
||||
SWIZZLE_ROW_SWAP_RGB24(SurfaceFormat::R8G8B8, SurfaceFormat::B8G8R8)
|
||||
SWIZZLE_ROW_SWAP_RGB24(SurfaceFormat::B8G8R8, SurfaceFormat::R8G8B8)
|
||||
|
||||
UNPACK_ROW_RGB(SurfaceFormat::R8G8B8X8)
|
||||
UNPACK_ROW_RGB(SurfaceFormat::R8G8B8A8)
|
||||
UNPACK_ROW_RGB(SurfaceFormat::B8G8R8X8)
|
||||
|
@ -1128,10 +1283,24 @@ SwizzleRowFn SwizzleRow(SurfaceFormat aSrcFormat, SurfaceFormat aDstFormat) {
|
|||
UNPACK_ROW_RGB_TO_ARGB(SurfaceFormat::A8R8G8B8)
|
||||
UNPACK_ROW_RGB_TO_ARGB(SurfaceFormat::X8R8G8B8)
|
||||
|
||||
PACK_ROW_RGB(SurfaceFormat::R8G8B8, PackRowToRGB24)
|
||||
PACK_ROW_RGB(SurfaceFormat::B8G8R8, PackRowToRGB24)
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (aSrcFormat == aDstFormat) {
|
||||
switch (BytesPerPixel(aSrcFormat)) {
|
||||
case 4:
|
||||
return &SwizzleRowCopy<4>;
|
||||
case 3:
|
||||
return &SwizzleRowCopy<3>;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
MOZ_ASSERT_UNREACHABLE("Unsupported swizzle formats");
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -54,6 +54,12 @@ typedef void (*SwizzleRowFn)(const uint8_t* aSrc, uint8_t* aDst,
|
|||
GFX2D_API SwizzleRowFn PremultiplyRow(SurfaceFormat aSrcFormat,
|
||||
SurfaceFormat aDstFormat);
|
||||
|
||||
/**
|
||||
* Get a function pointer to perform unpremultiplication between two formats.
|
||||
*/
|
||||
GFX2D_API SwizzleRowFn UnpremultiplyRow(SurfaceFormat aSrcFormat,
|
||||
SurfaceFormat aDstFormat);
|
||||
|
||||
/**
|
||||
* Get a function pointer to perform swizzling between two formats.
|
||||
*/
|
||||
|
|
|
@ -243,6 +243,36 @@ UnpremultiplyVector_NEON(const uint16x8_t& aSrc) {
|
|||
vsliq_n_u16(rb, ga, 8));
|
||||
}
|
||||
|
||||
template <bool aSwapRB>
|
||||
static MOZ_ALWAYS_INLINE void UnpremultiplyChunk_NEON(const uint8_t*& aSrc,
|
||||
uint8_t*& aDst,
|
||||
int32_t aAlignedRow,
|
||||
int32_t aRemainder) {
|
||||
// Process all 4-pixel chunks as one vector.
|
||||
for (const uint8_t* end = aSrc + aAlignedRow; aSrc < end;) {
|
||||
uint16x8_t px = vld1q_u16(reinterpret_cast<const uint16_t*>(aSrc));
|
||||
px = UnpremultiplyVector_NEON<aSwapRB>(px);
|
||||
vst1q_u16(reinterpret_cast<uint16_t*>(aDst), px);
|
||||
aSrc += 4 * 4;
|
||||
aDst += 4 * 4;
|
||||
}
|
||||
|
||||
// Handle any 1-3 remaining pixels.
|
||||
if (aRemainder) {
|
||||
uint16x8_t px = LoadRemainder_NEON(aSrc, remainder);
|
||||
px = UnpremultiplyVector_NEON<aSwapRB>(px);
|
||||
StoreRemainder_NEON(aDst, remainder, px);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool aSwapRB>
|
||||
void UnpremultiplyRow_NEON(const uint8_t* aSrc, uint8_t* aDst,
|
||||
int32_t aLength) {
|
||||
int32_t alignedRow = 4 * (aLength & ~3);
|
||||
int32_t remainder = aLength & 3;
|
||||
UnpremultiplyChunk_NEON<aSwapRB>(aSrc, aDst, alignedRow, remainder);
|
||||
}
|
||||
|
||||
template <bool aSwapRB>
|
||||
void Unpremultiply_NEON(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
|
||||
int32_t aDstGap, IntSize aSize) {
|
||||
|
@ -253,28 +283,15 @@ void Unpremultiply_NEON(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
|
|||
aDstGap += 4 * remainder;
|
||||
|
||||
for (int32_t height = aSize.height; height > 0; height--) {
|
||||
// Process all 4-pixel chunks as one vector.
|
||||
for (const uint8_t* end = aSrc + alignedRow; aSrc < end;) {
|
||||
uint16x8_t px = vld1q_u16(reinterpret_cast<const uint16_t*>(aSrc));
|
||||
px = UnpremultiplyVector_NEON<aSwapRB>(px);
|
||||
vst1q_u16(reinterpret_cast<uint16_t*>(aDst), px);
|
||||
aSrc += 4 * 4;
|
||||
aDst += 4 * 4;
|
||||
}
|
||||
|
||||
// Handle any 1-3 remaining pixels.
|
||||
if (remainder) {
|
||||
uint16x8_t px = LoadRemainder_NEON(aSrc, remainder);
|
||||
px = UnpremultiplyVector_NEON<aSwapRB>(px);
|
||||
StoreRemainder_NEON(aDst, remainder, px);
|
||||
}
|
||||
|
||||
UnpremultiplyChunk_NEON<aSwapRB>(aSrc, aDst, alignedRow, remainder);
|
||||
aSrc += aSrcGap;
|
||||
aDst += aDstGap;
|
||||
}
|
||||
}
|
||||
|
||||
// Force instantiation of unpremultiply variants here.
|
||||
template void UnpremultiplyRow_NEON<false>(const uint8_t*, uint8_t*, int32_t);
|
||||
template void UnpremultiplyRow_NEON<true>(const uint8_t*, uint8_t*, int32_t);
|
||||
template void Unpremultiply_NEON<false>(const uint8_t*, int32_t, uint8_t*,
|
||||
int32_t, IntSize);
|
||||
template void Unpremultiply_NEON<true>(const uint8_t*, int32_t, uint8_t*,
|
||||
|
|
|
@ -242,6 +242,36 @@ static MOZ_ALWAYS_INLINE __m128i UnpremultiplyVector_SSE2(const __m128i& aSrc) {
|
|||
return _mm_or_si128(rb, ga);
|
||||
}
|
||||
|
||||
template <bool aSwapRB>
|
||||
static MOZ_ALWAYS_INLINE void UnpremultiplyChunk_SSE2(const uint8_t*& aSrc,
|
||||
uint8_t*& aDst,
|
||||
int32_t aAlignedRow,
|
||||
int32_t aRemainder) {
|
||||
// Process all 4-pixel chunks as one vector.
|
||||
for (const uint8_t* end = aSrc + aAlignedRow; aSrc < end;) {
|
||||
__m128i px = _mm_loadu_si128(reinterpret_cast<const __m128i*>(aSrc));
|
||||
px = UnpremultiplyVector_SSE2<aSwapRB>(px);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(aDst), px);
|
||||
aSrc += 4 * 4;
|
||||
aDst += 4 * 4;
|
||||
}
|
||||
|
||||
// Handle any 1-3 remaining pixels.
|
||||
if (aRemainder) {
|
||||
__m128i px = LoadRemainder_SSE2(aSrc, aRemainder);
|
||||
px = UnpremultiplyVector_SSE2<aSwapRB>(px);
|
||||
StoreRemainder_SSE2(aDst, aRemainder, px);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool aSwapRB>
|
||||
void UnpremultiplyRow_SSE2(const uint8_t* aSrc, uint8_t* aDst,
|
||||
int32_t aLength) {
|
||||
int32_t alignedRow = 4 * (aLength & ~3);
|
||||
int32_t remainder = aLength & 3;
|
||||
UnpremultiplyChunk_SSE2<aSwapRB>(aSrc, aDst, alignedRow, remainder);
|
||||
}
|
||||
|
||||
template <bool aSwapRB>
|
||||
void Unpremultiply_SSE2(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
|
||||
int32_t aDstGap, IntSize aSize) {
|
||||
|
@ -252,28 +282,15 @@ void Unpremultiply_SSE2(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
|
|||
aDstGap += 4 * remainder;
|
||||
|
||||
for (int32_t height = aSize.height; height > 0; height--) {
|
||||
// Process all 4-pixel chunks as one vector.
|
||||
for (const uint8_t* end = aSrc + alignedRow; aSrc < end;) {
|
||||
__m128i px = _mm_loadu_si128(reinterpret_cast<const __m128i*>(aSrc));
|
||||
px = UnpremultiplyVector_SSE2<aSwapRB>(px);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(aDst), px);
|
||||
aSrc += 4 * 4;
|
||||
aDst += 4 * 4;
|
||||
}
|
||||
|
||||
// Handle any 1-3 remaining pixels.
|
||||
if (remainder) {
|
||||
__m128i px = LoadRemainder_SSE2(aSrc, remainder);
|
||||
px = UnpremultiplyVector_SSE2<aSwapRB>(px);
|
||||
StoreRemainder_SSE2(aDst, remainder, px);
|
||||
}
|
||||
|
||||
UnpremultiplyChunk_SSE2<aSwapRB>(aSrc, aDst, alignedRow, remainder);
|
||||
aSrc += aSrcGap;
|
||||
aDst += aDstGap;
|
||||
}
|
||||
}
|
||||
|
||||
// Force instantiation of unpremultiply variants here.
|
||||
template void UnpremultiplyRow_SSE2<false>(const uint8_t*, uint8_t*, int32_t);
|
||||
template void UnpremultiplyRow_SSE2<true>(const uint8_t*, uint8_t*, int32_t);
|
||||
template void Unpremultiply_SSE2<false>(const uint8_t*, int32_t, uint8_t*,
|
||||
int32_t, IntSize);
|
||||
template void Unpremultiply_SSE2<true>(const uint8_t*, int32_t, uint8_t*,
|
||||
|
|
|
@ -62,6 +62,9 @@ TEST(Moz2D, PremultiplyRow)
|
|||
const uint8_t check_rgba[5 * 4] = {
|
||||
0, 255, 255, 255, 255, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128,
|
||||
};
|
||||
const uint8_t check_argb[5 * 4] = {
|
||||
255, 0, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 128,
|
||||
};
|
||||
|
||||
SwizzleRowFn func =
|
||||
PremultiplyRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8);
|
||||
|
@ -71,6 +74,10 @@ TEST(Moz2D, PremultiplyRow)
|
|||
func = PremultiplyRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8);
|
||||
func(in_bgra, out, 5);
|
||||
EXPECT_TRUE(ArrayEqual(out, check_rgba));
|
||||
|
||||
func = PremultiplyRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::A8R8G8B8);
|
||||
func(in_bgra, out, 5);
|
||||
EXPECT_TRUE(ArrayEqual(out, check_argb));
|
||||
}
|
||||
|
||||
TEST(Moz2D, UnpremultiplyData)
|
||||
|
@ -107,6 +114,41 @@ TEST(Moz2D, UnpremultiplyData)
|
|||
EXPECT_TRUE(ArrayEqual(out, check_argb));
|
||||
}
|
||||
|
||||
TEST(Moz2D, UnpremultiplyRow)
|
||||
{
|
||||
const uint8_t in_bgra[5 * 4] = {
|
||||
255, 255, 0, 255, // verify 255 alpha leaves RGB unchanged
|
||||
0, 0, 255, 255, 0, 0, 0, 0, // verify 0 alpha leaves RGB at 0
|
||||
0, 0, 0, 64, // verify 0 RGB stays 0 with non-zero alpha
|
||||
128, 0, 0, 128, // verify that RGB == alpha maps to 255
|
||||
|
||||
};
|
||||
uint8_t out[5 * 4];
|
||||
const uint8_t check_bgra[5 * 4] = {
|
||||
255, 255, 0, 255, 0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 64, 255, 0, 0, 128,
|
||||
};
|
||||
// check swizzled output
|
||||
const uint8_t check_rgba[5 * 4] = {
|
||||
0, 255, 255, 255, 255, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 255, 128,
|
||||
};
|
||||
const uint8_t check_argb[5 * 4] = {
|
||||
255, 0, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 128, 0, 0, 255,
|
||||
};
|
||||
|
||||
SwizzleRowFn func =
|
||||
UnpremultiplyRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8);
|
||||
func(in_bgra, out, 5);
|
||||
EXPECT_TRUE(ArrayEqual(out, check_bgra));
|
||||
|
||||
func = UnpremultiplyRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8);
|
||||
func(in_bgra, out, 5);
|
||||
EXPECT_TRUE(ArrayEqual(out, check_rgba));
|
||||
|
||||
func = UnpremultiplyRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::A8R8G8B8);
|
||||
func(in_bgra, out, 5);
|
||||
EXPECT_TRUE(ArrayEqual(out, check_argb));
|
||||
}
|
||||
|
||||
TEST(Moz2D, SwizzleData)
|
||||
{
|
||||
const uint8_t in_bgra[5 * 4] = {
|
||||
|
@ -200,6 +242,13 @@ TEST(Moz2D, SwizzleRow)
|
|||
0, 254, 253, 255, 255, 0, 0, 255, 0, 0,
|
||||
0, 255, 3, 2, 1, 255, 9, 0, 127, 255,
|
||||
};
|
||||
// check packing
|
||||
uint8_t out24[5 * 3];
|
||||
const uint8_t check_bgr[5 * 3] = {253, 254, 0, 0, 0, 255, 0, 0,
|
||||
0, 1, 2, 3, 127, 0, 9};
|
||||
const uint8_t check_rgb[5 * 3] = {
|
||||
0, 254, 253, 255, 0, 0, 0, 0, 0, 3, 2, 1, 9, 0, 127,
|
||||
};
|
||||
// check unpacking
|
||||
uint8_t out_unpack[16 * 4];
|
||||
const uint8_t in_rgb[16 * 3] = {
|
||||
|
@ -235,6 +284,18 @@ TEST(Moz2D, SwizzleRow)
|
|||
func(in_bgra, out, 5);
|
||||
EXPECT_TRUE(ArrayEqual(out, check_rgbx));
|
||||
|
||||
func = SwizzleRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8);
|
||||
func(in_bgra, out, 5);
|
||||
EXPECT_TRUE(ArrayEqual(out, in_bgra));
|
||||
|
||||
func = SwizzleRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8);
|
||||
func(in_bgra, out24, 5);
|
||||
EXPECT_TRUE(ArrayEqual(out24, check_bgr));
|
||||
|
||||
func = SwizzleRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8);
|
||||
func(in_bgra, out24, 5);
|
||||
EXPECT_TRUE(ArrayEqual(out24, check_rgb));
|
||||
|
||||
func = SwizzleRow(SurfaceFormat::R8G8B8, SurfaceFormat::B8G8R8X8);
|
||||
func(in_rgb, out_unpack, 16);
|
||||
EXPECT_TRUE(ArrayEqual(out_unpack, check_unpack_bgrx));
|
||||
|
|
Загрузка…
Ссылка в новой задаче