зеркало из https://github.com/mozilla/pjs.git
Bug 413143: trunk is broken(sigbus) on SPARC since 20071221. r/sr=pavlov, a=beltzner
Unaligned 32-bit memory reads cause a sigbus on SPARC. Turns out that fixing this also gets us a little performance boost on other platforms because we're now mostly doing aligned 32-bit reads.
This commit is contained in:
Родитель
c96a0cd675
Коммит
ad19ea8cb7
|
@ -44,7 +44,32 @@
|
|||
|
||||
#include "gfxTypes.h"
|
||||
|
||||
#define GFX_UINT32_FROM_BPTR(pbptr,i) (((PRUint32*)(pbptr))[i])
|
||||
|
||||
#if defined(IS_BIG_ENDIAN)
|
||||
#define GFX_NTOHL(x) (x)
|
||||
#define GFX_HAVE_CHEAP_NTOHL
|
||||
#elif defined(_WIN32)
|
||||
#if (_MSC_VER >= 1300) // also excludes MinGW
|
||||
#include <stdlib.h>
|
||||
#pragma intrinsic(_byteswap_ulong)
|
||||
#define GFX_NTOHL(x) _byteswap_ulong(x)
|
||||
#define GFX_HAVE_CHEAP_NTOHL
|
||||
#else
|
||||
// A reasonably fast generic little-endian implementation.
|
||||
#define GFX_NTOHL(x) \
|
||||
( (PR_ROTATE_RIGHT32((x),8) & 0xFF00FF00) | \
|
||||
(PR_ROTATE_LEFT32((x),8) & 0x00FF00FF) )
|
||||
#endif
|
||||
#else
|
||||
#include "prio.h" // for ntohl
|
||||
#define GFX_NTOHL(x) ntohl(x)
|
||||
#define GFX_HAVE_CHEAP_NTOHL
|
||||
#endif
|
||||
|
||||
/**
|
||||
* GFX_0XFF_PPIXEL_FROM_BPTR(x)
|
||||
*
|
||||
* Avoid tortured construction of 32-bit ARGB pixel from 3 individual bytes
|
||||
* of memory plus constant 0xFF. RGB bytes are already contiguous!
|
||||
* Equivalent to: GFX_PACKED_PIXEL(0xff,r,g,b)
|
||||
|
@ -52,28 +77,42 @@
|
|||
* Attempt to use fast byte-swapping instruction(s), e.g. bswap on x86, in
|
||||
* preference to a sequence of shift/or operations.
|
||||
*/
|
||||
#if defined(_WIN32)
|
||||
#if defined(IS_BIG_ENDIAN)
|
||||
#define GFX_0XFF_PPIXEL_FROM_BPTR(pbptr) \
|
||||
( (*((PRUint32 *)(pbptr)) >> 8) | (0xFF << 24) )
|
||||
#elif (_MSC_VER >= 1300) // also excludes MinGW
|
||||
#include <stdlib.h>
|
||||
#pragma intrinsic(_byteswap_ulong)
|
||||
#define GFX_0XFF_PPIXEL_FROM_BPTR(pbptr) \
|
||||
( (_byteswap_ulong(*((PRUint32 *)(pbptr))) >> 8) | (0xFF << 24) )
|
||||
#else
|
||||
// A reasonably fast generic implementation.
|
||||
#define GFX_BYTESWAP24FF(x) \
|
||||
( ((((x) << 16) | ((x) >> 16)) | 0xFF00FF00) & ((x) | 0xFFFF00FF) )
|
||||
#define GFX_0XFF_PPIXEL_FROM_BPTR(pbptr) \
|
||||
( GFX_BYTESWAP24FF(*((PRUint32 *)(pbptr))) )
|
||||
#endif
|
||||
#if defined(GFX_HAVE_CHEAP_NTOHL)
|
||||
#define GFX_0XFF_PPIXEL_FROM_UINT32(x) \
|
||||
( (GFX_NTOHL(x) >> 8) | (0xFF << 24) )
|
||||
#else
|
||||
#include "prio.h" // for ntohl
|
||||
#define GFX_0XFF_PPIXEL_FROM_BPTR(pbptr) \
|
||||
( (ntohl(*((PRUint32 *)(pbptr))) >> 8) | (0xFF << 24) )
|
||||
// A reasonably fast generic little-endian implementation.
|
||||
#define GFX_0XFF_PPIXEL_FROM_UINT32(x) \
|
||||
( (PR_ROTATE_LEFT32((x),16) | 0xFF00FF00) & ((x) | 0xFFFF00FF) )
|
||||
#endif
|
||||
|
||||
#define GFX_0XFF_PPIXEL_FROM_BPTR(x) \
|
||||
( GFX_0XFF_PPIXEL_FROM_UINT32(GFX_UINT32_FROM_BPTR((x),0)) )
|
||||
|
||||
/**
|
||||
* GFX_BLOCK_RGB_TO_FRGB(from,to)
|
||||
* sizeof(*from) == sizeof(char)
|
||||
* sizeof(*to) == sizeof(PRUint32)
|
||||
*
|
||||
* Copy 4 pixels at a time, reading blocks of 12 bytes (RGB x4)
|
||||
* and writing blocks of 16 bytes (FRGB x4)
|
||||
*/
|
||||
#define GFX_BLOCK_RGB_TO_FRGB(from,to) \
|
||||
PR_BEGIN_MACRO \
|
||||
PRUint32 m0 = GFX_UINT32_FROM_BPTR(from,0), \
|
||||
m1 = GFX_UINT32_FROM_BPTR(from,1), \
|
||||
m2 = GFX_UINT32_FROM_BPTR(from,2), \
|
||||
rgbr = GFX_NTOHL(m0), \
|
||||
gbrg = GFX_NTOHL(m1), \
|
||||
brgb = GFX_NTOHL(m2), \
|
||||
p0, p1, p2, p3; \
|
||||
p0 = 0xFF000000 | ((rgbr) >> 8); \
|
||||
p1 = 0xFF000000 | ((rgbr) << 16) | ((gbrg) >> 16); \
|
||||
p2 = 0xFF000000 | ((gbrg) << 8) | ((brgb) >> 24); \
|
||||
p3 = 0xFF000000 | (brgb); \
|
||||
to[0] = p0; to[1] = p1; to[2] = p2; to[3] = p3; \
|
||||
PR_END_MACRO
|
||||
|
||||
/**
|
||||
* Fast approximate division by 255. It has the property that
|
||||
* for all 0 <= n <= 255*255, FAST_DIVIDE_BY_255(n) == n/255.
|
||||
|
|
|
@ -689,28 +689,26 @@ static void ConvertColormap(PRUint32 *aColormap, PRUint32 aColors)
|
|||
if (!aColors) return;
|
||||
PRUint32 c = aColors;
|
||||
|
||||
// copy 1st pixel as bytes to avoid reading past end of buffer
|
||||
*--to = GFX_PACKED_PIXEL(0xFF, from[-3], from[-2], from[-1]);
|
||||
from -= 3; c--;
|
||||
// copy as bytes until source pointer is 32-bit-aligned
|
||||
// NB: can't use 32-bit reads, they might read off the end of the buffer
|
||||
while ((NS_PTR_TO_UINT32(from) & 0x3) && c--) {
|
||||
from -= 3;
|
||||
*--to = GFX_PACKED_PIXEL(0xFF, from[0], from[1], from[2]);
|
||||
}
|
||||
|
||||
// bulk copy of pixels.
|
||||
while (c >= 4) {
|
||||
PRUint32 p0, p1, p2, p3; // to avoid back-to-back register stalls
|
||||
from -= 12;
|
||||
to -= 4;
|
||||
c -= 4;
|
||||
p0 = GFX_0XFF_PPIXEL_FROM_BPTR(from+9);
|
||||
p1 = GFX_0XFF_PPIXEL_FROM_BPTR(from+6);
|
||||
p2 = GFX_0XFF_PPIXEL_FROM_BPTR(from+3);
|
||||
p3 = GFX_0XFF_PPIXEL_FROM_BPTR(from+0);
|
||||
to[3] = p0; to[2] = p1;
|
||||
to[1] = p2; to[0] = p3;
|
||||
GFX_BLOCK_RGB_TO_FRGB(from,to);
|
||||
}
|
||||
|
||||
// copy remaining pixel(s)
|
||||
// NB: can't use 32-bit reads, they might read off the end of the buffer
|
||||
while (c--) {
|
||||
from -= 3;
|
||||
*--to = GFX_0XFF_PPIXEL_FROM_BPTR(from);
|
||||
*--to = GFX_PACKED_PIXEL(0xFF, from[0], from[1], from[2]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -730,15 +730,15 @@ nsJPEGDecoder::OutputScanlines()
|
|||
// counter for while() loops below
|
||||
PRUint32 idx = mInfo.output_width;
|
||||
|
||||
// bulk copy of pixels.
|
||||
while (idx > 4) { // >4 to avoid last 3 bytes in buffer
|
||||
PRUint32 p0, p1, p2, p3; // to avoid back-to-back register stalls
|
||||
p0 = GFX_0XFF_PPIXEL_FROM_BPTR(sampleRow+0);
|
||||
p1 = GFX_0XFF_PPIXEL_FROM_BPTR(sampleRow+3);
|
||||
p2 = GFX_0XFF_PPIXEL_FROM_BPTR(sampleRow+6);
|
||||
p3 = GFX_0XFF_PPIXEL_FROM_BPTR(sampleRow+9);
|
||||
imageRow[0] = p0; imageRow[1] = p1;
|
||||
imageRow[2] = p2; imageRow[3] = p3;
|
||||
// copy as bytes until source pointer is 32-bit-aligned
|
||||
while ((NS_PTR_TO_UINT32(sampleRow) & 0x3) && idx--) {
|
||||
*imageRow++ = GFX_PACKED_PIXEL(0xFF, sampleRow[0], sampleRow[1], sampleRow[2]);
|
||||
sampleRow += 3;
|
||||
}
|
||||
|
||||
// copy pixels in blocks of 4
|
||||
while (idx >= 4) {
|
||||
GFX_BLOCK_RGB_TO_FRGB(sampleRow, imageRow);
|
||||
idx -= 4;
|
||||
sampleRow += 12;
|
||||
imageRow += 4;
|
||||
|
|
|
@ -735,15 +735,15 @@ row_callback(png_structp png_ptr, png_bytep new_row,
|
|||
// counter for while() loops below
|
||||
PRUint32 idx = iwidth;
|
||||
|
||||
// bulk copy of pixels.
|
||||
while (idx > 4) { // >4 to avoid last 3 bytes in buffer
|
||||
PRUint32 p0, p1, p2, p3; // to avoid back-to-back register stalls
|
||||
p0 = GFX_0XFF_PPIXEL_FROM_BPTR(line+0);
|
||||
p1 = GFX_0XFF_PPIXEL_FROM_BPTR(line+3);
|
||||
p2 = GFX_0XFF_PPIXEL_FROM_BPTR(line+6);
|
||||
p3 = GFX_0XFF_PPIXEL_FROM_BPTR(line+9);
|
||||
cptr32[0] = p0; cptr32[1] = p1;
|
||||
cptr32[2] = p2; cptr32[3] = p3;
|
||||
// copy as bytes until source pointer is 32-bit-aligned
|
||||
while ((NS_PTR_TO_UINT32(line) & 0x3) && idx--) {
|
||||
*cptr32++ = GFX_PACKED_PIXEL(0xFF, line[0], line[1], line[2]);
|
||||
line += 3;
|
||||
}
|
||||
|
||||
// copy pixels in blocks of 4
|
||||
while (idx >= 4) {
|
||||
GFX_BLOCK_RGB_TO_FRGB(line, cptr32);
|
||||
idx -= 4;
|
||||
line += 12;
|
||||
cptr32 += 4;
|
||||
|
|
Загрузка…
Ссылка в новой задаче