Merge pull request #3772 from akallabeth/sse_test_fixes

Sse test fixes
This commit is contained in:
David Fort 2017-02-16 16:37:27 +01:00 коммит произвёл GitHub
Родитель af4034b68b c50e5ba57e
Коммит c0f4b6bcad
10 изменённых файлов: 508 добавлений и 272 удалений

Просмотреть файл

@ -375,7 +375,7 @@ static BOOL xf_sw_desktop_resize(rdpContext* context)
}
if (!(xfc->image = XCreateImage(xfc->display, xfc->visual, xfc->depth, ZPixmap,
0, gdi->primary_buffer, gdi->width,
0, (char*)gdi->primary_buffer, gdi->width,
gdi->height, xfc->scanline_pad, gdi->stride)))
{
goto out;

Просмотреть файл

@ -437,7 +437,6 @@ static BOOL xf_gdi_opaque_rect(rdpContext* context,
const OPAQUE_RECT_ORDER* opaque_rect)
{
XColor color;
rdpGdi* gdi = context->gdi;
xfContext* xfc = (xfContext*) context;
BOOL ret = TRUE;
@ -467,7 +466,6 @@ static BOOL xf_gdi_multi_opaque_rect(rdpContext* context,
UINT32 i;
xfContext* xfc = (xfContext*) context;
BOOL ret = TRUE;
rdpGdi* gdi = context->gdi;
XColor color;
if (!xf_decode_color(xfc, multi_opaque_rect->color, &color))

Просмотреть файл

@ -625,7 +625,6 @@ static BOOL freerdp_client_parse_rdp_file_buffer_unicode(rdpFile* file, const BY
if (length > 1)
{
const WCHAR* beg = line;
const WCHAR* end = &line[length - 1];
if (!freerdp_client_parse_rdp_file_add_line_unicode(file, line, index))
goto fail;

Просмотреть файл

@ -459,8 +459,6 @@
int freerdp_bitmap_compress(const char* srcData, int width, int height,
wStream* s, int bpp, int byte_limit, int start_line, wStream* temp_s, int e)
{
char *line;
char *last_line;
char fom_mask[8192]; /* good for up to 64K bitmap */
int lines_sent;
int pixel;
@ -485,7 +483,6 @@ int freerdp_bitmap_compress(const char* srcData, int width, int height,
Stream_SetPosition(temp_s, 0);
fom_mask_len = 0;
last_line = 0;
lines_sent = 0;
end = width + e;
count = 0;
@ -502,9 +499,10 @@ int freerdp_bitmap_compress(const char* srcData, int width, int height,
if ((bpp == 15) || (bpp == 16))
{
const char* line = srcData + width * start_line * 2;
const char *last_line = NULL;
mix = (bpp == 15) ? 0xBA1F : 0xFFFF;
out_count = end * 2;
line = srcData + width * start_line * 2;
while (start_line >= 0 && out_count < 32768)
{
@ -794,9 +792,10 @@ int freerdp_bitmap_compress(const char* srcData, int width, int height,
}
else if (bpp == 24)
{
const char* line = srcData + width * start_line * 4;
const char *last_line = NULL;
mix = 0xFFFFFF;
out_count = end * 3;
line = srcData + width * start_line * 4;
while (start_line >= 0 && out_count < 32768)
{

Просмотреть файл

@ -207,28 +207,22 @@ static pstatus_t sse2_yCbCrToRGB_16s8u_P3AC4R_BGRX(
BYTE* pDst, UINT32 dstStep,
const prim_size_t* roi) /* region of interest */
{
__m128i zero, max, r_cr, g_cb, g_cr, b_cb, c4096;
const __m128i* y_buf, *cb_buf, *cr_buf;
__m128i* d_buf;
int srcbump, dstbump, yp, imax;
size_t dstPad, yPad, cbPad, crPad;
zero = _mm_setzero_si128();
max = _mm_set1_epi16(255);
y_buf = (const __m128i*)(pSrc[0]);
cb_buf = (const __m128i*)(pSrc[1]);
cr_buf = (const __m128i*)(pSrc[2]);
d_buf = (__m128i*)pDst;
r_cr = _mm_set1_epi16(22986); /* 1.403 << 14 */
g_cb = _mm_set1_epi16(-5636); /* -0.344 << 14 */
g_cr = _mm_set1_epi16(-11698); /* -0.714 << 14 */
b_cb = _mm_set1_epi16(28999); /* 1.770 << 14 */
c4096 = _mm_set1_epi16(4096);
srcbump = srcStep / sizeof(__m128i);
dstbump = dstStep / sizeof(__m128i);
dstPad = (dstStep - roi->width * 4) / sizeof(__m128i);
yPad = 0;
cbPad = 0;
crPad = 0;
const __m128i zero = _mm_setzero_si128();
const __m128i max = _mm_set1_epi16(255);
const __m128i r_cr = _mm_set1_epi16(22986); /* 1.403 << 14 */
const __m128i g_cb = _mm_set1_epi16(-5636); /* -0.344 << 14 */
const __m128i g_cr = _mm_set1_epi16(-11698); /* -0.714 << 14 */
const __m128i b_cb = _mm_set1_epi16(28999); /* 1.770 << 14 */
const __m128i c4096 = _mm_set1_epi16(4096);
const INT16* y_buf = (INT16*)pSrc[0];
const INT16* cb_buf = (INT16*)pSrc[1];
const INT16* cr_buf = (INT16*)pSrc[2];
const UINT32 pad = roi->width % 16;
const UINT32 step = sizeof(__m128i) / sizeof(INT16);
const UINT32 imax = (roi->width - pad) * sizeof(INT16) / sizeof(__m128i);
BYTE* d_buf = pDst;
int yp;
const size_t dstPad = (dstStep - roi->width * 4);
#ifdef DO_PREFETCH
/* Prefetch Y's, Cb's, and Cr's. */
@ -236,28 +230,27 @@ static pstatus_t sse2_yCbCrToRGB_16s8u_P3AC4R_BGRX(
{
int i;
for (i = 0; i < roi->width * sizeof(INT16) / sizeof(__m128i);
for (i = 0; i < imax;
i += (CACHE_LINE_BYTES / sizeof(__m128i)))
{
_mm_prefetch((char*)(&y_buf[i]), _MM_HINT_NTA);
_mm_prefetch((char*)(&cb_buf[i]), _MM_HINT_NTA);
_mm_prefetch((char*)(&cr_buf[i]), _MM_HINT_NTA);
_mm_prefetch((char*)(&((__m128i*)y_buf)[i]), _MM_HINT_NTA);
_mm_prefetch((char*)(&((__m128i*)cb_buf)[i]), _MM_HINT_NTA);
_mm_prefetch((char*)(&((__m128i*)cr_buf)[i]), _MM_HINT_NTA);
}
y_buf += srcbump;
cb_buf += srcbump;
cr_buf += srcbump;
y_buf += srcStep / sizeof(INT16);
cb_buf += srcStep / sizeof(INT16);
cr_buf += srcStep / sizeof(INT16);
}
y_buf = (__m128i*)(pSrc[0]);
cb_buf = (__m128i*)(pSrc[1]);
cr_buf = (__m128i*)(pSrc[2]);
y_buf = (INT16*)pSrc[0];
cb_buf = (INT16*)pSrc[1];
cr_buf = (INT16*)pSrc[2];
#endif /* DO_PREFETCH */
imax = roi->width * sizeof(INT16) / sizeof(__m128i);
for (yp = 0; yp < roi->height; ++yp)
{
int i;
UINT32 i;
for (i = 0; i < imax; i += 2)
{
@ -282,13 +275,16 @@ static pstatus_t sse2_yCbCrToRGB_16s8u_P3AC4R_BGRX(
*/
/* y = (y_r_buf[i] + 4096) >> 2 */
__m128i y1, y2, cb1, cb2, cr1, cr2, r1, r2, g1, g2, b1, b2;
y1 = _mm_load_si128(y_buf++);
y1 = _mm_load_si128((__m128i*)y_buf);
y_buf += step;
y1 = _mm_add_epi16(y1, c4096);
y1 = _mm_srai_epi16(y1, 2);
/* cb = cb_g_buf[i]; */
cb1 = _mm_load_si128(cb_buf++);
cb1 = _mm_load_si128((__m128i*)cb_buf);
cb_buf += step;
/* cr = cr_b_buf[i]; */
cr1 = _mm_load_si128(cr_buf++);
cr1 = _mm_load_si128((__m128i*)cr_buf);
cr_buf += step;
/* (y + HIWORD(cr*22986)) >> 3 */
r1 = _mm_add_epi16(y1, _mm_mulhi_epi16(cr1, r_cr));
r1 = _mm_srai_epi16(r1, 3);
@ -305,13 +301,16 @@ static pstatus_t sse2_yCbCrToRGB_16s8u_P3AC4R_BGRX(
b1 = _mm_srai_epi16(b1, 3);
/* b_buf[i] = CLIP(b); */
_mm_between_epi16(b1, zero, max);
y2 = _mm_load_si128(y_buf++);
y2 = _mm_load_si128((__m128i*)y_buf);
y_buf += step;
y2 = _mm_add_epi16(y2, c4096);
y2 = _mm_srai_epi16(y2, 2);
/* cb = cb_g_buf[i]; */
cb2 = _mm_load_si128(cb_buf++);
cb2 = _mm_load_si128((__m128i*)cb_buf);
cb_buf += step;
/* cr = cr_b_buf[i]; */
cr2 = _mm_load_si128(cr_buf++);
cr2 = _mm_load_si128((__m128i*)cr_buf);
cr_buf += step;
/* (y + HIWORD(cr*22986)) >> 3 */
r2 = _mm_add_epi16(y2, _mm_mulhi_epi16(cr2, r_cr));
r2 = _mm_srai_epi16(r2, 3);
@ -355,16 +354,36 @@ static pstatus_t sse2_yCbCrToRGB_16s8u_P3AC4R_BGRX(
R2 = R3; /* R2 = R3 */
R2 = _mm_unpacklo_epi16(R1, R2); /* R2 = B5G5R5FFB4G4R4FF */
R3 = _mm_unpackhi_epi16(R1, R3); /* R3 = B7G7R7FFB6G6R6FF */
_mm_store_si128(d_buf++, R0); /* B1G1R1FFB0G0R0FF */
_mm_store_si128(d_buf++, R4); /* B3G3R3FFB2G2R2FF */
_mm_store_si128(d_buf++, R2); /* B5G5R5FFB4G4R4FF */
_mm_store_si128(d_buf++, R3); /* B7G7R7FFB6G6R6FF */
_mm_store_si128((__m128i*)d_buf, R0); /* B1G1R1FFB0G0R0FF */
d_buf += sizeof(__m128i);
_mm_store_si128((__m128i*)d_buf, R4); /* B3G3R3FFB2G2R2FF */
d_buf += sizeof(__m128i);
_mm_store_si128((__m128i*)d_buf, R2); /* B5G5R5FFB4G4R4FF */
d_buf += sizeof(__m128i);
_mm_store_si128((__m128i*)d_buf, R3); /* B7G7R7FFB6G6R6FF */
d_buf += sizeof(__m128i);
}
}
y_buf += yPad;
cb_buf += cbPad;
cr_buf += crPad;
for (i = 0; i < pad; i++)
{
const INT32 divisor = 16;
const INT32 Y = ((*y_buf++) + 4096) << divisor;
const INT32 Cb = (*cb_buf++);
const INT32 Cr = (*cr_buf++);
const INT32 CrR = Cr * (INT32)(1.402525f * (1 << divisor));
const INT32 CrG = Cr * (INT32)(0.714401f * (1 << divisor));
const INT32 CbG = Cb * (INT32)(0.343730f * (1 << divisor));
const INT32 CbB = Cb * (INT32)(1.769905f * (1 << divisor));
const INT16 R = ((INT16)((CrR + Y) >> divisor) >> 5);
const INT16 G = ((INT16)((Y - CbG - CrG) >> divisor) >> 5);
const INT16 B = ((INT16)((CbB + Y) >> divisor) >> 5);
*d_buf++ = CLIP(B);
*d_buf++ = CLIP(G);
*d_buf++ = CLIP(R);
*d_buf++ = 0xFF;
}
d_buf += dstPad;
}
@ -377,28 +396,22 @@ static pstatus_t sse2_yCbCrToRGB_16s8u_P3AC4R_RGBX(
BYTE* pDst, UINT32 dstStep,
const prim_size_t* roi) /* region of interest */
{
__m128i zero, max, r_cr, g_cb, g_cr, b_cb, c4096;
const __m128i* y_buf, *cb_buf, *cr_buf;
__m128i* d_buf;
int srcbump, dstbump, yp, imax;
size_t dstPad, yPad, cbPad, crPad;
zero = _mm_setzero_si128();
max = _mm_set1_epi16(255);
y_buf = (const __m128i*)(pSrc[0]);
cb_buf = (const __m128i*)(pSrc[1]);
cr_buf = (const __m128i*)(pSrc[2]);
d_buf = (__m128i*)pDst;
r_cr = _mm_set1_epi16(22986); /* 1.403 << 14 */
g_cb = _mm_set1_epi16(-5636); /* -0.344 << 14 */
g_cr = _mm_set1_epi16(-11698); /* -0.714 << 14 */
b_cb = _mm_set1_epi16(28999); /* 1.770 << 14 */
c4096 = _mm_set1_epi16(4096);
srcbump = srcStep / sizeof(__m128i);
dstbump = dstStep / sizeof(__m128i);
dstPad = (dstStep - roi->width * 4) / sizeof(__m128i);
yPad = 0;
cbPad = 0;
crPad = 0;
const __m128i zero = _mm_setzero_si128();
const __m128i max = _mm_set1_epi16(255);
const __m128i r_cr = _mm_set1_epi16(22986); /* 1.403 << 14 */
const __m128i g_cb = _mm_set1_epi16(-5636); /* -0.344 << 14 */
const __m128i g_cr = _mm_set1_epi16(-11698); /* -0.714 << 14 */
const __m128i b_cb = _mm_set1_epi16(28999); /* 1.770 << 14 */
const __m128i c4096 = _mm_set1_epi16(4096);
const INT16* y_buf = (INT16*)pSrc[0];
const INT16* cb_buf = (INT16*)pSrc[1];
const INT16* cr_buf = (INT16*)pSrc[2];
const UINT32 pad = roi->width % 16;
const UINT32 step = sizeof(__m128i) / sizeof(INT16);
const UINT32 imax = (roi->width - pad) * sizeof(INT16) / sizeof(__m128i);
BYTE* d_buf = pDst;
int yp;
const size_t dstPad = (dstStep - roi->width * 4);
#ifdef DO_PREFETCH
/* Prefetch Y's, Cb's, and Cr's. */
@ -406,28 +419,27 @@ static pstatus_t sse2_yCbCrToRGB_16s8u_P3AC4R_RGBX(
{
int i;
for (i = 0; i < roi->width * sizeof(INT16) / sizeof(__m128i);
for (i = 0; i < imax;
i += (CACHE_LINE_BYTES / sizeof(__m128i)))
{
_mm_prefetch((char*)(&y_buf[i]), _MM_HINT_NTA);
_mm_prefetch((char*)(&cb_buf[i]), _MM_HINT_NTA);
_mm_prefetch((char*)(&cr_buf[i]), _MM_HINT_NTA);
_mm_prefetch((char*)(&((__m128i*)y_buf)[i]), _MM_HINT_NTA);
_mm_prefetch((char*)(&((__m128i*)cb_buf)[i]), _MM_HINT_NTA);
_mm_prefetch((char*)(&((__m128i*)cr_buf)[i]), _MM_HINT_NTA);
}
y_buf += srcbump;
cb_buf += srcbump;
cr_buf += srcbump;
y_buf += srcStep / sizeof(INT16);
cb_buf += srcStep / sizeof(INT16);
cr_buf += srcStep / sizeof(INT16);
}
y_buf = (__m128i*)(pSrc[0]);
cb_buf = (__m128i*)(pSrc[1]);
cr_buf = (__m128i*)(pSrc[2]);
y_buf = (INT16*)(pSrc[0]);
cb_buf = (INT16*)(pSrc[1]);
cr_buf = (INT16*)(pSrc[2]);
#endif /* DO_PREFETCH */
imax = roi->width * sizeof(INT16) / sizeof(__m128i);
for (yp = 0; yp < roi->height; ++yp)
{
int i;
UINT32 i;
for (i = 0; i < imax; i += 2)
{
@ -452,13 +464,16 @@ static pstatus_t sse2_yCbCrToRGB_16s8u_P3AC4R_RGBX(
*/
/* y = (y_r_buf[i] + 4096) >> 2 */
__m128i y1, y2, cb1, cb2, cr1, cr2, r1, r2, g1, g2, b1, b2;
y1 = _mm_load_si128(y_buf++);
y1 = _mm_load_si128((__m128i*)y_buf);
y_buf += step;
y1 = _mm_add_epi16(y1, c4096);
y1 = _mm_srai_epi16(y1, 2);
/* cb = cb_g_buf[i]; */
cb1 = _mm_load_si128(cb_buf++);
cb1 = _mm_load_si128((__m128i*)cb_buf);
cb_buf += step;
/* cr = cr_b_buf[i]; */
cr1 = _mm_load_si128(cr_buf++);
cr1 = _mm_load_si128((__m128i*)cr_buf);
cr_buf += step;
/* (y + HIWORD(cr*22986)) >> 3 */
r1 = _mm_add_epi16(y1, _mm_mulhi_epi16(cr1, r_cr));
r1 = _mm_srai_epi16(r1, 3);
@ -475,13 +490,16 @@ static pstatus_t sse2_yCbCrToRGB_16s8u_P3AC4R_RGBX(
b1 = _mm_srai_epi16(b1, 3);
/* b_buf[i] = CLIP(b); */
_mm_between_epi16(b1, zero, max);
y2 = _mm_load_si128(y_buf++);
y2 = _mm_load_si128((__m128i*)y_buf);
y_buf += step;
y2 = _mm_add_epi16(y2, c4096);
y2 = _mm_srai_epi16(y2, 2);
/* cb = cb_g_buf[i]; */
cb2 = _mm_load_si128(cb_buf++);
cb2 = _mm_load_si128((__m128i*)cb_buf);
cb_buf += step;
/* cr = cr_b_buf[i]; */
cr2 = _mm_load_si128(cr_buf++);
cr2 = _mm_load_si128((__m128i*)cr_buf);
cr_buf += step;
/* (y + HIWORD(cr*22986)) >> 3 */
r2 = _mm_add_epi16(y2, _mm_mulhi_epi16(cr2, r_cr));
r2 = _mm_srai_epi16(r2, 3);
@ -525,16 +543,36 @@ static pstatus_t sse2_yCbCrToRGB_16s8u_P3AC4R_RGBX(
R2 = R3; /* R2 = R3 */
R2 = _mm_unpacklo_epi16(R1, R2); /* R2 = R5G5B5FFR4G4B4FF */
R3 = _mm_unpackhi_epi16(R1, R3); /* R3 = R7G7B7FFR6G6B6FF */
_mm_store_si128(d_buf++, R0); /* R1G1B1FFR0G0B0FF */
_mm_store_si128(d_buf++, R4); /* R3G3B3FFR2G2B2FF */
_mm_store_si128(d_buf++, R2); /* R5G5B5FFR4G4B4FF */
_mm_store_si128(d_buf++, R3); /* R7G7B7FFR6G6B6FF */
_mm_store_si128((__m128i*)d_buf, R0); /* R1G1B1FFR0G0B0FF */
d_buf += sizeof(__m128i);
_mm_store_si128((__m128i*)d_buf, R4); /* R3G3B3FFR2G2B2FF */
d_buf += sizeof(__m128i);
_mm_store_si128((__m128i*)d_buf, R2); /* R5G5B5FFR4G4B4FF */
d_buf += sizeof(__m128i);
_mm_store_si128((__m128i*)d_buf, R3); /* R7G7B7FFR6G6B6FF */
d_buf += sizeof(__m128i);
}
}
y_buf += yPad;
cb_buf += cbPad;
cr_buf += crPad;
for (i = 0; i < pad; i++)
{
const INT32 divisor = 16;
const INT32 Y = ((*y_buf++) + 4096) << divisor;
const INT32 Cb = (*cb_buf++);
const INT32 Cr = (*cr_buf++);
const INT32 CrR = Cr * (INT32)(1.402525f * (1 << divisor));
const INT32 CrG = Cr * (INT32)(0.714401f * (1 << divisor));
const INT32 CbG = Cb * (INT32)(0.343730f * (1 << divisor));
const INT32 CbB = Cb * (INT32)(1.769905f * (1 << divisor));
const INT16 R = ((INT16)((CrR + Y) >> divisor) >> 5);
const INT16 G = ((INT16)((Y - CbG - CrG) >> divisor) >> 5);
const INT16 B = ((INT16)((CbB + Y) >> divisor) >> 5);
*d_buf++ = CLIP(R);
*d_buf++ = CLIP(G);
*d_buf++ = CLIP(B);
*d_buf++ = 0xFF;
}
d_buf += dstPad;
}
@ -550,9 +588,8 @@ static pstatus_t sse2_yCbCrToRGB_16s8u_P3AC4R(
|| ((ULONG_PTR)(pSrc[1]) & 0x0f)
|| ((ULONG_PTR)(pSrc[2]) & 0x0f)
|| ((ULONG_PTR)(pDst) & 0x0f)
|| (roi->width & 0x07)
|| (srcStep & 127)
|| (dstStep & 127))
|| (srcStep & 0x0f)
|| (dstStep & 0x0f))
{
/* We can't maintain 16-byte alignment. */
return generic->yCbCrToRGB_16s8u_P3AC4R(pSrc, srcStep,
@ -718,6 +755,7 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_BGRX(
const UINT16* pr = (const UINT16*)(pSrc[0]);
const UINT16* pg = (const UINT16*)(pSrc[1]);
const UINT16* pb = (const UINT16*)(pSrc[2]);
const UINT32 pad = roi->width % 16;
const __m128i a = _mm_set1_epi32(0xFFFFFFFFU);
BYTE* out;
UINT32 srcbump, dstbump, y;
@ -729,7 +767,7 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_BGRX(
{
UINT32 x;
for (x = 0; x < roi->width; x += 16)
for (x = 0; x < roi->width - pad; x += 16)
{
__m128i r, g, b;
/* The comments below pretend these are 8-byte registers
@ -737,25 +775,25 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_BGRX(
*/
{
__m128i R0, R1;
R0 = _mm_loadu_si128((__m128i*)pb);
R0 = _mm_load_si128((__m128i*)pb);
pb += 8; /* R0 = 00B300B200B100B0 */
R1 = _mm_loadu_si128((__m128i*)pb);
R1 = _mm_load_si128((__m128i*)pb);
pb += 8; /* R1 = 00B700B600B500B4 */
b = _mm_packus_epi16(R0, R1); /* b = B7B6B5B4B3B2B1B0 */
}
{
__m128i R0, R1;
R0 = _mm_loadu_si128((__m128i*)pg);
R0 = _mm_load_si128((__m128i*)pg);
pg += 8; /* R1 = 00G300G200G100G0 */
R1 = _mm_loadu_si128((__m128i*)pg);
R1 = _mm_load_si128((__m128i*)pg);
pg += 8; /* R2 = 00G700G600G500G4 */
g = _mm_packus_epi16(R0, R1); /* g = G7G6G5G4G3G2G1G0 */
}
{
__m128i R0, R1;
R0 = _mm_loadu_si128((__m128i*)pr);
R0 = _mm_load_si128((__m128i*)pr);
pr += 8; /* R0 = 00R300R200R100R0 */
R1 = _mm_loadu_si128((__m128i*)pr);
R1 = _mm_load_si128((__m128i*)pr);
pr += 8; /* R3 = 00R700R600R500R4 */
r = _mm_packus_epi16(R0, R1); /* r = R7R6R5R4R3R2R1R0 */
}
@ -790,6 +828,17 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_BGRX(
}
}
for (x = 0; x < pad; x++)
{
const BYTE R = CLIP(*pr++);
const BYTE G = CLIP(*pg++);
const BYTE B = CLIP(*pb++);
*out++ = B;
*out++ = G;
*out++ = R;
*out++ = 0xFF;
}
/* Jump to next row. */
pr += srcbump;
pg += srcbump;
@ -810,6 +859,7 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_RGBX(
const UINT16* pr = (const UINT16*)(pSrc[0]);
const UINT16* pg = (const UINT16*)(pSrc[1]);
const UINT16* pb = (const UINT16*)(pSrc[2]);
const UINT32 pad = roi->width % 16;
const __m128i a = _mm_set1_epi32(0xFFFFFFFFU);
BYTE* out;
UINT32 srcbump, dstbump, y;
@ -821,7 +871,7 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_RGBX(
{
UINT32 x;
for (x = 0; x < roi->width; x += 16)
for (x = 0; x < roi->width - pad; x += 16)
{
__m128i r, g, b;
/* The comments below pretend these are 8-byte registers
@ -829,25 +879,25 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_RGBX(
*/
{
__m128i R0, R1;
R0 = _mm_loadu_si128((__m128i*)pb);
R0 = _mm_load_si128((__m128i*)pb);
pb += 8; /* R0 = 00B300B200B100B0 */
R1 = _mm_loadu_si128((__m128i*)pb);
R1 = _mm_load_si128((__m128i*)pb);
pb += 8; /* R1 = 00B700B600B500B4 */
b = _mm_packus_epi16(R0, R1); /* b = B7B6B5B4B3B2B1B0 */
}
{
__m128i R0, R1;
R0 = _mm_loadu_si128((__m128i*)pg);
R0 = _mm_load_si128((__m128i*)pg);
pg += 8; /* R1 = 00G300G200G100G0 */
R1 = _mm_loadu_si128((__m128i*)pg);
R1 = _mm_load_si128((__m128i*)pg);
pg += 8; /* R2 = 00G700G600G500G4 */
g = _mm_packus_epi16(R0, R1); /* g = G7G6G5G4G3G2G1G0 */
}
{
__m128i R0, R1;
R0 = _mm_loadu_si128((__m128i*)pr);
R0 = _mm_load_si128((__m128i*)pr);
pr += 8; /* R0 = 00R300R200R100R0 */
R1 = _mm_loadu_si128((__m128i*)pr);
R1 = _mm_load_si128((__m128i*)pr);
pr += 8; /* R3 = 00R700R600R500R4 */
r = _mm_packus_epi16(R0, R1); /* r = R7R6R5R4R3R2R1R0 */
}
@ -882,6 +932,17 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_RGBX(
}
}
for (x = 0; x < pad; x++)
{
const BYTE R = CLIP(*pr++);
const BYTE G = CLIP(*pg++);
const BYTE B = CLIP(*pb++);
*out++ = R;
*out++ = G;
*out++ = B;
*out++ = 0xFF;
}
/* Jump to next row. */
pr += srcbump;
pg += srcbump;
@ -902,6 +963,7 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_XBGR(
const UINT16* pr = (const UINT16*)(pSrc[0]);
const UINT16* pg = (const UINT16*)(pSrc[1]);
const UINT16* pb = (const UINT16*)(pSrc[2]);
const UINT32 pad = roi->width % 16;
const __m128i a = _mm_set1_epi32(0xFFFFFFFFU);
BYTE* out;
UINT32 srcbump, dstbump, y;
@ -913,7 +975,7 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_XBGR(
{
UINT32 x;
for (x = 0; x < roi->width; x += 16)
for (x = 0; x < roi->width - pad; x += 16)
{
__m128i r, g, b;
/* The comments below pretend these are 8-byte registers
@ -921,25 +983,25 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_XBGR(
*/
{
__m128i R0, R1;
R0 = _mm_loadu_si128((__m128i*)pb);
R0 = _mm_load_si128((__m128i*)pb);
pb += 8; /* R0 = 00B300B200B100B0 */
R1 = _mm_loadu_si128((__m128i*)pb);
R1 = _mm_load_si128((__m128i*)pb);
pb += 8; /* R1 = 00B700B600B500B4 */
b = _mm_packus_epi16(R0, R1); /* b = B7B6B5B4B3B2B1B0 */
}
{
__m128i R0, R1;
R0 = _mm_loadu_si128((__m128i*)pg);
R0 = _mm_load_si128((__m128i*)pg);
pg += 8; /* R1 = 00G300G200G100G0 */
R1 = _mm_loadu_si128((__m128i*)pg);
R1 = _mm_load_si128((__m128i*)pg);
pg += 8; /* R2 = 00G700G600G500G4 */
g = _mm_packus_epi16(R0, R1); /* g = G7G6G5G4G3G2G1G0 */
}
{
__m128i R0, R1;
R0 = _mm_loadu_si128((__m128i*)pr);
R0 = _mm_load_si128((__m128i*)pr);
pr += 8; /* R0 = 00R300R200R100R0 */
R1 = _mm_loadu_si128((__m128i*)pr);
R1 = _mm_load_si128((__m128i*)pr);
pr += 8; /* R3 = 00R700R600R500R4 */
r = _mm_packus_epi16(R0, R1); /* r = R7R6R5R4R3R2R1R0 */
}
@ -974,6 +1036,17 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_XBGR(
}
}
for (x = 0; x < pad; x++)
{
const BYTE R = CLIP(*pr++);
const BYTE G = CLIP(*pg++);
const BYTE B = CLIP(*pb++);
*out++ = 0xFF;
*out++ = B;
*out++ = G;
*out++ = R;
}
/* Jump to next row. */
pr += srcbump;
pg += srcbump;
@ -995,6 +1068,7 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_XRGB(
const UINT16* pg = (const UINT16*)(pSrc[1]);
const UINT16* pb = (const UINT16*)(pSrc[2]);
const __m128i a = _mm_set1_epi32(0xFFFFFFFFU);
const UINT32 pad = roi->width % 16;
BYTE* out;
UINT32 srcbump, dstbump, y;
out = (BYTE*) pDst;
@ -1005,7 +1079,7 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_XRGB(
{
UINT32 x;
for (x = 0; x < roi->width; x += 16)
for (x = 0; x < roi->width - pad; x += 16)
{
__m128i r, g, b;
/* The comments below pretend these are 8-byte registers
@ -1013,25 +1087,25 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_XRGB(
*/
{
__m128i R0, R1;
R0 = _mm_loadu_si128((__m128i*)pb);
R0 = _mm_load_si128((__m128i*)pb);
pb += 8; /* R0 = 00B300B200B100B0 */
R1 = _mm_loadu_si128((__m128i*)pb);
R1 = _mm_load_si128((__m128i*)pb);
pb += 8; /* R1 = 00B700B600B500B4 */
b = _mm_packus_epi16(R0, R1); /* b = B7B6B5B4B3B2B1B0 */
}
{
__m128i R0, R1;
R0 = _mm_loadu_si128((__m128i*)pg);
R0 = _mm_load_si128((__m128i*)pg);
pg += 8; /* R1 = 00G300G200G100G0 */
R1 = _mm_loadu_si128((__m128i*)pg);
R1 = _mm_load_si128((__m128i*)pg);
pg += 8; /* R2 = 00G700G600G500G4 */
g = _mm_packus_epi16(R0, R1); /* g = G7G6G5G4G3G2G1G0 */
}
{
__m128i R0, R1;
R0 = _mm_loadu_si128((__m128i*)pr);
R0 = _mm_load_si128((__m128i*)pr);
pr += 8; /* R0 = 00R300R200R100R0 */
R1 = _mm_loadu_si128((__m128i*)pr);
R1 = _mm_load_si128((__m128i*)pr);
pr += 8; /* R3 = 00R700R600R500R4 */
r = _mm_packus_epi16(R0, R1); /* r = R7R6R5R4R3R2R1R0 */
}
@ -1066,6 +1140,17 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_XRGB(
}
}
for (x = 0; x < pad; x++)
{
const BYTE R = CLIP(*pr++);
const BYTE G = CLIP(*pg++);
const BYTE B = CLIP(*pb++);
*out++ = 0xFF;
*out++ = R;
*out++ = G;
*out++ = B;
}
/* Jump to next row. */
pr += srcbump;
pg += srcbump;
@ -1084,6 +1169,10 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
UINT32 DstFormat,
const prim_size_t* roi)
{
if (((UINT64)pSrc[0] & 0x0f) || ((UINT64)pSrc[0] & 0x0f) || ((UINT64)pSrc[0] & 0x0f) ||
(srcStep & 0x0f) || ((UINT64)pDst & 0x0f) || (dstStep & 0x0f))
return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
switch (DstFormat)
{
case PIXEL_FORMAT_BGRA32:
@ -1427,7 +1516,7 @@ static pstatus_t neon_RGBToRGB_16s8u_P3AC4R(
return neon_RGBToRGB_16s8u_P3AC4R_X(pSrc, srcStep, pDst, dstStep, roi, 3, 2, 1, 0);
default:
return generic->yCbCrToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
}
}
#endif /* WITH_NEON */

Просмотреть файл

@ -2155,34 +2155,55 @@ static int test_bmp_cmp_dump(const BYTE* actual, const BYTE* expected, int size,
return count;
}
static int test_PrimitivesYCbCr(const primitives_t* prims, UINT32 format, prim_size_t roi)
static int test_PrimitivesYCbCr(const primitives_t* prims, UINT32 format, prim_size_t roi,
BOOL compare)
{
pstatus_t status = -1;
int cnt[3];
float err[3];
BYTE* actual;
BYTE* actual1;
BYTE* expected;
int margin = 1;
const INT16* pYCbCr[3];
INT16* pYCbCr[3];
const UINT32 srcStride = roi.width * 2;
const UINT32 dstStride = roi.width * GetBytesPerPixel(format);
const UINT32 srcSize = srcStride * roi.height;
const UINT32 dstSize = dstStride * roi.height;
PROFILER_DEFINE(prof);
PROFILER_DEFINE(prof1);
PROFILER_DEFINE(prof2);
//return test_YCbCr_pixels();
expected = (BYTE*) TEST_XRGB_IMAGE;
actual = _aligned_malloc(dstSize, 16);
PROFILER_CREATE(prof, "YCbCr");
actual1 = _aligned_malloc(dstSize, 16);
PROFILER_CREATE(prof, "yCbCrToRGB_16s8u");
PROFILER_CREATE(prof1, "yCbCrToRGB16s16s");
PROFILER_CREATE(prof2, "RGBToRGB_16s8u");
if (!actual)
if (!actual || !actual1)
goto fail;
ZeroMemory(actual, dstSize);
pYCbCr[0] = TEST_Y_COMPONENT;
pYCbCr[1] = TEST_CB_COMPONENT;
pYCbCr[2] = TEST_CR_COMPONENT;
ZeroMemory(actual1, dstSize);
pYCbCr[0] = _aligned_malloc(srcSize, 16);
pYCbCr[1] = _aligned_malloc(srcSize, 16);
pYCbCr[2] = _aligned_malloc(srcSize, 16);
if (!pYCbCr[0] || !pYCbCr[1] || !pYCbCr[2])
goto fail;
winpr_RAND((BYTE*)pYCbCr[0], srcSize);
winpr_RAND((BYTE*)pYCbCr[1], srcSize);
winpr_RAND((BYTE*)pYCbCr[2], srcSize);
if (compare)
{
memcpy(pYCbCr[0], TEST_Y_COMPONENT, srcSize);
memcpy(pYCbCr[1], TEST_CB_COMPONENT, srcSize);
memcpy(pYCbCr[2], TEST_CR_COMPONENT, srcSize);
}
if (1)
{
PROFILER_ENTER(prof);
status = prims->yCbCrToRGB_16s8u_P3AC4R((const INT16**) pYCbCr, srcStride,
@ -2190,7 +2211,7 @@ static int test_PrimitivesYCbCr(const primitives_t* prims, UINT32 format, prim_s
&roi);
PROFILER_EXIT(prof);
}
else
{
INT16* pSrcDst[3];
pSrcDst[0] = _aligned_malloc(srcSize, 16);
@ -2199,16 +2220,18 @@ static int test_PrimitivesYCbCr(const primitives_t* prims, UINT32 format, prim_s
CopyMemory(pSrcDst[0], pYCbCr[0], srcSize);
CopyMemory(pSrcDst[1], pYCbCr[1], srcSize);
CopyMemory(pSrcDst[2], pYCbCr[2], srcSize);
PROFILER_ENTER(prof);
PROFILER_ENTER(prof1);
status = prims->yCbCrToRGB_16s16s_P3P3((const INT16**) pSrcDst, srcStride,
pSrcDst, srcStride, &roi);
PROFILER_EXIT(prof1);
if (status != PRIMITIVES_SUCCESS)
goto fail2;
PROFILER_ENTER(prof2);
status = prims->RGBToRGB_16s8u_P3AC4R((const INT16**) pSrcDst, srcStride,
actual, dstStride, format, &roi);
PROFILER_EXIT(prof);
actual1, dstStride, format, &roi);
PROFILER_EXIT(prof2);
fail2:
_aligned_free(pSrcDst[0]);
_aligned_free(pSrcDst[1]);
@ -2218,32 +2241,65 @@ static int test_PrimitivesYCbCr(const primitives_t* prims, UINT32 format, prim_s
goto fail;
}
cnt[2] = test_bmp_cmp_count(actual, expected, dstSize, 2, margin); /* red */
err[2] = ((float) cnt[2]) / ((float) dstSize / 4) * 100.0f;
cnt[1] = test_bmp_cmp_count(actual, expected, dstSize, 1, margin); /* green */
err[1] = ((float) cnt[1]) / ((float) dstSize / 4) * 100.0f;
cnt[0] = test_bmp_cmp_count(actual, expected, dstSize, 0, margin); /* blue */
err[0] = ((float) cnt[0]) / ((float) dstSize / 4) * 100.0f;
if (cnt[0] || cnt[1] || cnt[2])
if (compare)
{
printf("Red Error Dump:\n");
test_bmp_cmp_dump(actual, expected, dstSize, 2, margin); /* red */
printf("Green Error Dump:\n");
test_bmp_cmp_dump(actual, expected, dstSize, 1, margin); /* green */
printf("Blue Error Dump:\n");
test_bmp_cmp_dump(actual, expected, dstSize, 0, margin); /* blue */
printf("R: diff: %d (%f%%)\n", cnt[2], err[2]);
printf("G: diff: %d (%f%%)\n", cnt[1], err[1]);
printf("B: diff: %d (%f%%)\n", cnt[0], err[0]);
cnt[2] = test_bmp_cmp_count(actual, expected, dstSize, 2, margin); /* red */
err[2] = ((float) cnt[2]) / ((float) dstSize / 4) * 100.0f;
cnt[1] = test_bmp_cmp_count(actual, expected, dstSize, 1, margin); /* green */
err[1] = ((float) cnt[1]) / ((float) dstSize / 4) * 100.0f;
cnt[0] = test_bmp_cmp_count(actual, expected, dstSize, 0, margin); /* blue */
err[0] = ((float) cnt[0]) / ((float) dstSize / 4) * 100.0f;
if (cnt[0] || cnt[1] || cnt[2])
{
printf("Summary information yCbCrToRGB_16s8u_P3AC4R\n");
printf("Red Error Dump:\n");
test_bmp_cmp_dump(actual, expected, dstSize, 2, margin); /* red */
printf("Green Error Dump:\n");
test_bmp_cmp_dump(actual, expected, dstSize, 1, margin); /* green */
printf("Blue Error Dump:\n");
test_bmp_cmp_dump(actual, expected, dstSize, 0, margin); /* blue */
printf("R: diff: %d (%f%%)\n", cnt[2], err[2]);
printf("G: diff: %d (%f%%)\n", cnt[1], err[1]);
printf("B: diff: %d (%f%%)\n", cnt[0], err[0]);
}
cnt[2] = test_bmp_cmp_count(actual1, expected, dstSize, 2, margin); /* red */
err[2] = ((float) cnt[2]) / ((float) dstSize / 4) * 100.0f;
cnt[1] = test_bmp_cmp_count(actual1, expected, dstSize, 1, margin); /* green */
err[1] = ((float) cnt[1]) / ((float) dstSize / 4) * 100.0f;
cnt[0] = test_bmp_cmp_count(actual1, expected, dstSize, 0, margin); /* blue */
err[0] = ((float) cnt[0]) / ((float) dstSize / 4) * 100.0f;
if (cnt[0] || cnt[1] || cnt[2])
{
printf("Summary information yCbCrToRGB_16s16s_P3P3 & RGBToRGB_16s8u_P3AC4R\n");
printf("Red Error Dump:\n");
test_bmp_cmp_dump(actual1, expected, dstSize, 2, margin); /* red */
printf("Green Error Dump:\n");
test_bmp_cmp_dump(actual1, expected, dstSize, 1, margin); /* green */
printf("Blue Error Dump:\n");
test_bmp_cmp_dump(actual1, expected, dstSize, 0, margin); /* blue */
printf("R: diff: %d (%f%%)\n", cnt[2], err[2]);
printf("G: diff: %d (%f%%)\n", cnt[1], err[1]);
printf("B: diff: %d (%f%%)\n", cnt[0], err[0]);
}
}
PROFILER_PRINT_HEADER;
PROFILER_PRINT(prof);
PROFILER_PRINT(prof1);
PROFILER_PRINT(prof2);
PROFILER_PRINT_FOOTER;
fail:
_aligned_free((BYTE*)pYCbCr[0]);
_aligned_free((BYTE*)pYCbCr[1]);
_aligned_free((BYTE*)pYCbCr[2]);
_aligned_free(actual);
_aligned_free(actual1);
PROFILER_FREE(prof);
PROFILER_FREE(prof1);
PROFILER_FREE(prof2);
return status;
}
@ -2262,30 +2318,105 @@ int TestPrimitivesYCbCr(int argc, char* argv[])
};
const primitives_t* prims = primitives_get();
const primitives_t* generics = primitives_get_generic();
prim_size_t roi = { 64, 64 };
UINT32 x;
for (x = 0; x < sizeof(formats) / sizeof(formats[0]); x++)
if (argc < 2)
{
int rc;
printf("----------------------- GENERIC %s -------------------\n",
GetColorFormatName(formats[x]));
rc = test_PrimitivesYCbCr(generics, formats[x], roi);
{
/* Do content comparison. */
for (x = 0; x < sizeof(formats) / sizeof(formats[0]); x++)
{
prim_size_t roi = { 64, 64 };
int rc;
printf("----------------------- GENERIC %s [%"PRIu32"x%"PRIu32"] COMPARE CONTENT ----\n",
GetColorFormatName(formats[x]), roi.width, roi.height);
rc = test_PrimitivesYCbCr(generics, formats[x], roi, TRUE);
if (rc != PRIMITIVES_SUCCESS)
return rc;
if (rc != PRIMITIVES_SUCCESS)
return rc;
printf("------------------------- END %s ----------------------\n",
GetColorFormatName(formats[x]));
printf("---------------------- OPTIMIZED %s -------------------\n",
GetColorFormatName(formats[x]));
rc = test_PrimitivesYCbCr(prims, formats[x], roi);
printf("------------------------- END %s ----------------------\n",
GetColorFormatName(formats[x]));
printf("---------------------- OPTIMIZED %s [%"PRIu32"x%"PRIu32"] COMPARE CONTENT ----\n",
GetColorFormatName(formats[x]), roi.width, roi.height);
rc = test_PrimitivesYCbCr(prims, formats[x], roi, TRUE);
if (rc != PRIMITIVES_SUCCESS)
return rc;
if (rc != PRIMITIVES_SUCCESS)
return rc;
printf("------------------------- END %s ----------------------\n",
GetColorFormatName(formats[x]));
printf("------------------------- END %s ----------------------\n",
GetColorFormatName(formats[x]));
}
}
/* Do random data conversion with random sizes */
{
prim_size_t roi;
do
{
winpr_RAND((BYTE*)&roi.width, sizeof(roi.width));
roi.width %= 2048;
}
while (roi.width < 16);
do
{
winpr_RAND((BYTE*)&roi.height, sizeof(roi.height));
roi.height %= 2048;
}
while (roi.height < 16);
for (x = 0; x < sizeof(formats) / sizeof(formats[0]); x++)
{
int rc;
printf("----------------------- GENERIC %s [%"PRIu32"x%"PRIu32"] COMPARE CONTENT ----\n",
GetColorFormatName(formats[x]), roi.width, roi.height);
rc = test_PrimitivesYCbCr(generics, formats[x], roi, FALSE);
if (rc != PRIMITIVES_SUCCESS)
return rc;
printf("------------------------- END %s ----------------------\n",
GetColorFormatName(formats[x]));
printf("---------------------- OPTIMIZED %s [%"PRIu32"x%"PRIu32"] COMPARE CONTENT ----\n",
GetColorFormatName(formats[x]), roi.width, roi.height);
rc = test_PrimitivesYCbCr(prims, formats[x], roi, FALSE);
if (rc != PRIMITIVES_SUCCESS)
return rc;
printf("------------------------- END %s ----------------------\n",
GetColorFormatName(formats[x]));
}
}
}
/* Do a performance run with full HD */
else
{
prim_size_t roi = { 1928, 1080 };
for (x = 0; x < sizeof(formats) / sizeof(formats[0]); x++)
{
int rc;
printf("----------------------- GENERIC %s [%"PRIu32"x%"PRIu32"] COMPARE CONTENT ----\n",
GetColorFormatName(formats[x]), roi.width, roi.height);
rc = test_PrimitivesYCbCr(generics, formats[x], roi, FALSE);
if (rc != PRIMITIVES_SUCCESS)
return rc;
printf("------------------------- END %s ----------------------\n",
GetColorFormatName(formats[x]));
printf("---------------------- OPTIMIZED %s [%"PRIu32"x%"PRIu32"] COMPARE CONTENT ----\n",
GetColorFormatName(formats[x]), roi.width, roi.height);
rc = test_PrimitivesYCbCr(prims, formats[x], roi, FALSE);
if (rc != PRIMITIVES_SUCCESS)
return rc;
printf("------------------------- END %s ----------------------\n",
GetColorFormatName(formats[x]));
}
}
return 0;

Просмотреть файл

@ -27,7 +27,7 @@
/* ------------------------------------------------------------------------- */
static BOOL test_YCoCgRToRGB_8u_AC4R_func(UINT32 width, UINT32 height)
{
pstatus_t status;
pstatus_t status = -1;
BYTE* out_sse = NULL;
BYTE* in = NULL;
BYTE* out_c = NULL;
@ -45,8 +45,6 @@ static BOOL test_YCoCgRToRGB_8u_AC4R_func(UINT32 width, UINT32 height)
};
PROFILER_DEFINE(genericProf);
PROFILER_DEFINE(optProf);
PROFILER_CREATE(genericProf, "YCoCgRToRGB_8u_AC4R-GENERIC");
PROFILER_CREATE(optProf, "YCoCgRToRGB_8u_AC4R-OPT");
in = _aligned_malloc(size, 16);
out_c = _aligned_malloc(size, 16);
out_sse = _aligned_malloc(size, 16);
@ -61,6 +59,8 @@ static BOOL test_YCoCgRToRGB_8u_AC4R_func(UINT32 width, UINT32 height)
const UINT32 format = formats[x];
const UINT32 dstStride = width * GetBytesPerPixel(format);
const char* formatName = GetColorFormatName(format);
PROFILER_CREATE(genericProf, "YCoCgRToRGB_8u_AC4R-GENERIC");
PROFILER_CREATE(optProf, "YCoCgRToRGB_8u_AC4R-OPT");
PROFILER_ENTER(genericProf);
status = generic->YCoCgToRGB_8u_AC4R(
in, srcStride,
@ -68,7 +68,7 @@ static BOOL test_YCoCgRToRGB_8u_AC4R_func(UINT32 width, UINT32 height)
PROFILER_EXIT(genericProf);
if (status != PRIMITIVES_SUCCESS)
goto fail;
goto loop_fail;
PROFILER_ENTER(optProf);
status = optimized->YCoCgToRGB_8u_AC4R(
@ -77,7 +77,7 @@ static BOOL test_YCoCgRToRGB_8u_AC4R_func(UINT32 width, UINT32 height)
PROFILER_EXIT(optProf);
if (status != PRIMITIVES_SUCCESS)
goto fail;
goto loop_fail;
if (memcmp(out_c, out_sse, dstStride * height) != 0)
{
@ -95,15 +95,21 @@ static BOOL test_YCoCgRToRGB_8u_AC4R_func(UINT32 width, UINT32 height)
}
}
printf("--------------------------- [%s] [%"PRIu32"x%"PRIu32"] ---------------------------\n",
formatName, width, height);
PROFILER_PRINT_HEADER;
PROFILER_PRINT(genericProf);
PROFILER_PRINT(optProf);
PROFILER_PRINT_FOOTER;
loop_fail:
PROFILER_FREE(genericProf);
PROFILER_FREE(optProf);
if (status != PRIMITIVES_SUCCESS)
goto fail;
}
fail:
PROFILER_FREE(genericProf);
PROFILER_FREE(optProf);
_aligned_free(in);
_aligned_free(out_c);
_aligned_free(out_sse);
@ -114,6 +120,35 @@ int TestPrimitivesYCoCg(int argc, char* argv[])
{
prim_test_setup(FALSE);
/* Random resolution tests */
if (argc < 2)
{
UINT32 x;
for (x = 0; x < 10; x++)
{
UINT32 w, h;
do
{
winpr_RAND((BYTE*)&w, sizeof(w));
w %= 2048;
}
while (w < 16);
do
{
winpr_RAND((BYTE*)&h, sizeof(h));
h %= 2048;
}
while (h < 16);
if (!test_YCoCgRToRGB_8u_AC4R_func(w, h))
return 1;
}
}
/* Test once with full HD */
if (!test_YCoCgRToRGB_8u_AC4R_func(1920, 1080))
return 1;

Просмотреть файл

@ -254,9 +254,6 @@ static BOOL TestPrimitiveYUVCombine(primitives_t* prims, prim_size_t roi)
}
PROFILER_EXIT(yuvCombine);
PROFILER_PRINT_HEADER;
PROFILER_PRINT(yuvCombine);
PROFILER_PRINT_FOOTER;
for (x = 0; x < 3; x++)
{
@ -284,9 +281,6 @@ static BOOL TestPrimitiveYUVCombine(primitives_t* prims, prim_size_t roi)
}
PROFILER_EXIT(yuvSplit);
PROFILER_PRINT_HEADER;
PROFILER_PRINT(yuvSplit);
PROFILER_PRINT_FOOTER;
for (x = 0; x < 3; x++)
{
@ -346,6 +340,10 @@ static BOOL TestPrimitiveYUVCombine(primitives_t* prims, prim_size_t roi)
}
}
PROFILER_PRINT_HEADER;
PROFILER_PRINT(yuvSplit);
PROFILER_PRINT(yuvCombine);
PROFILER_PRINT_FOOTER;
rc = TRUE;
fail:
PROFILER_FREE(yuvCombine);
@ -375,7 +373,7 @@ static BOOL TestPrimitiveYUV(primitives_t* prims, prim_size_t roi, BOOL use444)
size_t size;
size_t uvsize, uvwidth;
size_t padding = 100 * 16;
size_t stride;
UINT32 stride;
const UINT32 formats[] =
{
PIXEL_FORMAT_XRGB32,
@ -396,10 +394,6 @@ static BOOL TestPrimitiveYUV(primitives_t* prims, prim_size_t roi, BOOL use444)
aheight = roi.height + 16 - roi.height % 16;
stride = awidth * sizeof(UINT32);
size = awidth * aheight;
PROFILER_CREATE(rgbToYUV420, "RGBToYUV420");
PROFILER_CREATE(rgbToYUV444, "RGBToYUV444");
PROFILER_CREATE(yuv420ToRGB, "YUV420ToRGB");
PROFILER_CREATE(yuv444ToRGB, "YUV444ToRGB");
if (use444)
{
@ -456,22 +450,25 @@ static BOOL TestPrimitiveYUV(primitives_t* prims, prim_size_t roi, BOOL use444)
for (x = 0; x < sizeof(formats) / sizeof(formats[0]); x++)
{
pstatus_t rc;
const UINT32 DstFormat = formats[x];
printf("Testing destination color format %s\n", GetColorFormatName(DstFormat));
PROFILER_CREATE(rgbToYUV420, "RGBToYUV420");
PROFILER_CREATE(rgbToYUV444, "RGBToYUV444");
PROFILER_CREATE(yuv420ToRGB, "YUV420ToRGB");
PROFILER_CREATE(yuv444ToRGB, "YUV444ToRGB");
if (use444)
{
PROFILER_ENTER(rgbToYUV444);
if (prims->RGBToYUV444_8u_P3AC4R(rgb, DstFormat,
stride, yuv, yuv_step,
&roi) != PRIMITIVES_SUCCESS)
{
PROFILER_EXIT(rgbToYUV444);
goto fail;
}
rc = prims->RGBToYUV444_8u_P3AC4R(rgb, DstFormat,
stride, yuv, yuv_step,
&roi);
PROFILER_EXIT(rgbToYUV444);
if (rc != PRIMITIVES_SUCCESS)
goto loop_fail;
PROFILER_PRINT_HEADER;
PROFILER_PRINT(rgbToYUV444);
PROFILER_PRINT_FOOTER;
@ -479,45 +476,52 @@ static BOOL TestPrimitiveYUV(primitives_t* prims, prim_size_t roi, BOOL use444)
else
{
PROFILER_ENTER(rgbToYUV420);
if (prims->RGBToYUV420_8u_P3AC4R(rgb, DstFormat,
stride, yuv, yuv_step,
&roi) != PRIMITIVES_SUCCESS)
{
PROFILER_EXIT(rgbToYUV420);
goto fail;
}
rc = prims->RGBToYUV420_8u_P3AC4R(rgb, DstFormat,
stride, yuv, yuv_step,
&roi);
PROFILER_EXIT(rgbToYUV420);
if (rc != PRIMITIVES_SUCCESS)
goto loop_fail;
PROFILER_PRINT_HEADER;
PROFILER_PRINT(rgbToYUV420);
PROFILER_PRINT_FOOTER;
}
if (!check_padding(rgb, size * sizeof(UINT32), padding, "rgb"))
goto fail;
{
rc = -1;
goto loop_fail;
}
if ((!check_padding(yuv[0], size, padding, "Y")) ||
(!check_padding(yuv[1], uvsize, padding, "U")) ||
(!check_padding(yuv[2], uvsize, padding, "V")))
goto fail;
{
rc = -1;
goto loop_fail;
}
if (use444)
{
PROFILER_ENTER(yuv444ToRGB);
rc = prims->YUV444ToRGB_8u_P3AC4R((const BYTE**)yuv, yuv_step, rgb_dst, stride,
DstFormat,
&roi);
PROFILER_EXIT(yuv444ToRGB);
if (prims->YUV444ToRGB_8u_P3AC4R((const BYTE**)yuv, yuv_step, rgb_dst, stride,
DstFormat,
&roi) != PRIMITIVES_SUCCESS)
{
PROFILER_EXIT(yuv444ToRGB);
goto fail;
}
if (rc != PRIMITIVES_SUCCESS)
goto loop_fail;
loop_fail:
PROFILER_EXIT(yuv444ToRGB);
PROFILER_PRINT_HEADER;
PROFILER_PRINT(yuv444ToRGB);
PROFILER_PRINT_FOOTER;
if (rc != PRIMITIVES_SUCCESS)
goto fail;
}
else
{
@ -552,14 +556,15 @@ static BOOL TestPrimitiveYUV(primitives_t* prims, prim_size_t roi, BOOL use444)
if (!similarRGB(srgb, drgb, roi.width, DstFormat))
goto fail;
}
PROFILER_FREE(rgbToYUV420);
PROFILER_FREE(rgbToYUV444);
PROFILER_FREE(yuv420ToRGB);
PROFILER_FREE(yuv444ToRGB);
}
rc = TRUE;
fail:
PROFILER_FREE(rgbToYUV420);
PROFILER_FREE(rgbToYUV444);
PROFILER_FREE(yuv420ToRGB);
PROFILER_FREE(yuv444ToRGB);
free_padding(rgb, padding);
free_padding(rgb_dst, padding);
free_padding(yuv[0], padding);

Просмотреть файл

@ -47,7 +47,6 @@ static SecurityFunctionTableA* g_SspiA = NULL;
static BOOL ShouldUseNativeSspi(void);
static BOOL InitializeSspiModule_Native(void);
#endif
static void InitializeSspiModule(DWORD flags);
#if defined(WITH_NATIVE_SSPI)
BOOL ShouldUseNativeSspi(void)
@ -146,12 +145,6 @@ static BOOL CALLBACK InitializeSspiModuleInt(PINIT_ONCE once, PVOID param, PVOID
return TRUE;
}
void InitializeSspiModule(DWORD flags)
{
BOOL status = FALSE;
InitOnceExecuteOnce(&g_Initialized, InitializeSspiModuleInt, &flags, NULL);
}
const char* GetSecurityStatusString(SECURITY_STATUS status)
{
switch (status)

Просмотреть файл

@ -6,15 +6,11 @@
int TestLinkedList(int argc, char* argv[])
{
int count;
int number;
wLinkedList* list;
list = LinkedList_New();
LinkedList_AddFirst(list, (void*) (size_t) 1);
LinkedList_AddLast(list, (void*) (size_t) 2);
LinkedList_AddLast(list, (void*) (size_t) 3);
LinkedList_AddFirst(list, (void*)(size_t) 1);
LinkedList_AddLast(list, (void*)(size_t) 2);
LinkedList_AddLast(list, (void*)(size_t) 3);
count = LinkedList_Count(list);
if (count != 3)
@ -29,14 +25,12 @@ int TestLinkedList(int argc, char* argv[])
{
printf("\t%"PRIuz"\n", (size_t) LinkedList_Enumerator_Current(list));
}
printf("\n");
printf("LinkedList First: %"PRIuz" Last: %"PRIuz"\n",
(size_t) LinkedList_First(list), (size_t) LinkedList_Last(list));
(size_t) LinkedList_First(list), (size_t) LinkedList_Last(list));
LinkedList_RemoveFirst(list);
LinkedList_RemoveLast(list);
count = LinkedList_Count(list);
if (count != 1)
@ -51,14 +45,12 @@ int TestLinkedList(int argc, char* argv[])
{
printf("\t%"PRIuz"\n", (size_t) LinkedList_Enumerator_Current(list));
}
printf("\n");
printf("LinkedList First: %"PRIuz" Last: %"PRIuz"\n",
(size_t) LinkedList_First(list), (size_t) LinkedList_Last(list));
(size_t) LinkedList_First(list), (size_t) LinkedList_Last(list));
LinkedList_RemoveFirst(list);
LinkedList_RemoveLast(list);
count = LinkedList_Count(list);
if (count != 0)
@ -67,10 +59,9 @@ int TestLinkedList(int argc, char* argv[])
return -1;
}
LinkedList_AddFirst(list, (void*) (size_t) 4);
LinkedList_AddLast(list, (void*) (size_t) 5);
LinkedList_AddLast(list, (void*) (size_t) 6);
LinkedList_AddFirst(list, (void*)(size_t) 4);
LinkedList_AddLast(list, (void*)(size_t) 5);
LinkedList_AddLast(list, (void*)(size_t) 6);
count = LinkedList_Count(list);
if (count != 3)
@ -85,51 +76,47 @@ int TestLinkedList(int argc, char* argv[])
{
printf("\t%"PRIuz"\n", (size_t) LinkedList_Enumerator_Current(list));
}
printf("\n");
printf("LinkedList First: %"PRIuz" Last: %"PRIuz"\n",
(size_t) LinkedList_First(list), (size_t) LinkedList_Last(list));
LinkedList_Remove(list, (void*) (size_t) 5);
(size_t) LinkedList_First(list), (size_t) LinkedList_Last(list));
LinkedList_Remove(list, (void*)(size_t) 5);
LinkedList_Enumerator_Reset(list);
while (LinkedList_Enumerator_MoveNext(list))
{
printf("\t%"PRIuz"\n", (size_t) LinkedList_Enumerator_Current(list));
}
printf("\n");
printf("LinkedList First: %"PRIuz" Last: %"PRIuz"\n",
(size_t) LinkedList_First(list), (size_t) LinkedList_Last(list));
(size_t) LinkedList_First(list), (size_t) LinkedList_Last(list));
LinkedList_Free(list);
/* Test enumerator robustness */
/* enumerator on an empty list */
list = LinkedList_New();
LinkedList_Enumerator_Reset(list);
while (LinkedList_Enumerator_MoveNext(list))
{
printf("\terror: %"PRIuz"\n", (size_t) LinkedList_Enumerator_Current(list));
}
printf("\n");
LinkedList_Free(list);
/* Use an enumerator without reset */
list = LinkedList_New();
LinkedList_AddFirst(list, (void*) (size_t) 4);
LinkedList_AddLast(list, (void*) (size_t) 5);
LinkedList_AddLast(list, (void*) (size_t) 6);
LinkedList_AddFirst(list, (void*)(size_t) 4);
LinkedList_AddLast(list, (void*)(size_t) 5);
LinkedList_AddLast(list, (void*)(size_t) 6);
while (LinkedList_Enumerator_MoveNext(list))
{
printf("\t%"PRIuz"\n", (size_t) LinkedList_Enumerator_Current(list));
}
printf("\n");
LinkedList_Free(list);
return 0;
}