pjs/jpeg/jdcolor.c

600 строки
17 KiB
C

/*
* jdcolor.c
*
* This file contains an Optimized Routine for YCbCr->RGB Color Space Conversion
*
* Copyright (C) 1991-1996, Thomas G. Lane.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
* This file contains output colorspace conversion routines.
*
*/
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "xp_core.h"
/* Private subobject */
typedef struct {
struct jpeg_color_deconverter pub; /* public fields */
/* Private state for YCC->RGB conversion */
int * Cr_r_tab; /* => table for Cr to R conversion */
int * Cb_b_tab; /* => table for Cb to B conversion */
INT32 * Cr_g_tab; /* => table for Cr to G conversion */
INT32 * Cb_g_tab; /* => table for Cb to G conversion */
} my_color_deconverter;
typedef my_color_deconverter * my_cconvert_ptr;
#ifdef XP_WIN32
/* Info Added for MMX(TM) Technology Optimization */
extern void MMXYCbCr2RGB(
int columns,
unsigned char *inY,
unsigned char *inU,
unsigned char *inV,
unsigned char *outRGB);
/*
These constants correspond to CCIR 601-1
R = [256*Y + 359*(Cr-128)] / 256
G = [256*Y - 88*(Cb-128) - 183*(Cr-128)] / 256
B = [256*Y + 454*(Cb-128)] / 256
Conventional floating point equations:
R = Y + 1.40200 * Cr
G = Y - 0.34414 * Cb - 0.71414 * Cr
B = Y + 1.77200 * Cb
*/
/*Ry=0100 Ru=0000 Rv=0167*/
/*Gy=0100 Gu=FFA8 Gv=FF49*/
/*By=0100 Bu=01C6 Bv=0000*/
/* constants for YCbCr->RGB and YCbCrA->RGBA*/
static __int64 const_0 = 0x0000000000000000;
static __int64 const_sub128 = 0x0080008000800080;
static __int64 const_VUmul = 0xFF49FFA8FF49FFA8;
static __int64 const_YVmul = 0x0100016701000167;
static __int64 const_YUmul = 0x010001C6010001C6;
static __int64 mask_highd = 0xFFFFFFFF00000000;
static __int64 const_invert = 0x00FFFFFF00FFFFFF;
/* End of added info */
#endif
/**************** YCbCr -> RGB conversion: most common case **************/
/*
* YCbCr is defined per CCIR 601-1, except that Cb and Cr are
* normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
* The conversion equations to be implemented are therefore
* R = Y + 1.40200 * Cr
* G = Y - 0.34414 * Cb - 0.71414 * Cr
* B = Y + 1.77200 * Cb
* where Cb and Cr represent the incoming values less CENTERJSAMPLE.
* (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
*
* To avoid floating-point arithmetic, we represent the fractional constants
* as integers scaled up by 2^16 (about 4 digits precision); we have to divide
* the products by 2^16, with appropriate rounding, to get the correct answer.
* Notice that Y, being an integral input, does not contribute any fraction
* so it need not participate in the rounding.
*
* For even more speed, we avoid doing any multiplications in the inner loop
* by precalculating the constants times Cb and Cr for all possible values.
* For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
* for 12-bit samples it is still acceptable. It's not very reasonable for
* 16-bit samples, but if you want lossless storage you shouldn't be changing
* colorspace anyway.
* The Cr=>R and Cb=>B values can be rounded to integers in advance; the
* values for the G calculation are left scaled up, since we must add them
* together before rounding.
*/
#define SCALEBITS 16 /* speediest right-shift on some machines */
#define ONE_HALF ((INT32) 1 << (SCALEBITS-1))
#define FIX(x) ((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
/*
* Initialize tables for YCC->RGB colorspace conversion.
*/
LOCAL void
build_ycc_rgb_table (j_decompress_ptr cinfo)
{
my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
int i;
INT32 x;
SHIFT_TEMPS
cconvert->Cr_r_tab = (int *)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
(MAXJSAMPLE+1) * SIZEOF(int));
cconvert->Cb_b_tab = (int *)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
(MAXJSAMPLE+1) * SIZEOF(int));
cconvert->Cr_g_tab = (INT32 *)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
(MAXJSAMPLE+1) * SIZEOF(INT32));
cconvert->Cb_g_tab = (INT32 *)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
(MAXJSAMPLE+1) * SIZEOF(INT32));
for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
/* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
/* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
/* Cr=>R value is nearest int to 1.40200 * x */
cconvert->Cr_r_tab[i] = (int)
RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
/* Cb=>B value is nearest int to 1.77200 * x */
cconvert->Cb_b_tab[i] = (int)
RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
/* Cr=>G value is scaled-up -0.71414 * x */
cconvert->Cr_g_tab[i] = (- FIX(0.71414)) * x;
/* Cb=>G value is scaled-up -0.34414 * x */
/* We also add in ONE_HALF so that need not do it in inner loop */
cconvert->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF;
}
}
/*
* Convert some rows of samples to the output colorspace.
*
* Note that we change from noninterleaved, one-plane-per-component format
* to interleaved-pixel format. The output buffer is therefore three times
* as wide as the input buffer.
* A starting row offset is provided only for the input buffer. The caller
* can easily adjust the passed output_buf value to accommodate any row
* offset required on that side.
*/
METHODDEF void
ycc_rgb_convert (j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION input_row,
JSAMPARRAY output_buf, int num_rows)
{
my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
register int y, cb, cr;
register JSAMPROW outptr;
register JSAMPROW inptr0, inptr1, inptr2;
register JDIMENSION col;
JDIMENSION num_cols = cinfo->output_width;
#ifdef XP_WIN32
/* Alignment variables - CRK */
/* JDIMENSION tail_cols = num_cols&7; */
JDIMENSION mmx_cols=num_cols&~7;
#endif
/* copy these pointers into registers if possible */
register JSAMPLE * range_limit = cinfo->sample_range_limit;
register int * Crrtab = cconvert->Cr_r_tab;
register int * Cbbtab = cconvert->Cb_b_tab;
register INT32 * Crgtab = cconvert->Cr_g_tab;
register INT32 * Cbgtab = cconvert->Cb_g_tab;
SHIFT_TEMPS
#ifdef XP_WIN32
if(MMXAvailable) { //MMX Code - CRK
while (--num_rows >= 0) {
inptr0 = input_buf[0][input_row];
inptr1 = input_buf[1][input_row];
inptr2 = input_buf[2][input_row];
input_row++;
outptr = *output_buf++;
MMXYCbCr2RGB(mmx_cols, inptr0, inptr1, inptr2, outptr);
outptr += 3*mmx_cols;
for (col = mmx_cols; col < num_cols; col++) {
y = GETJSAMPLE(inptr0[col]);
cb = GETJSAMPLE(inptr1[col]);
cr = GETJSAMPLE(inptr2[col]);
/* Range-limiting is essential due to noise introduced by DCT losses. */
outptr[RGB_RED] = range_limit[y + Crrtab[cr]];
outptr[RGB_GREEN] = range_limit[y +
((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
SCALEBITS))];
outptr[RGB_BLUE] = range_limit[y + Cbbtab[cb]];
outptr += RGB_PIXELSIZE;
}
}
__asm emms
}
else
{
#endif
while (--num_rows >= 0) {
inptr0 = input_buf[0][input_row];
inptr1 = input_buf[1][input_row];
inptr2 = input_buf[2][input_row];
input_row++;
outptr = *output_buf++;
for (col = 0; col < num_cols; col++) {
y = GETJSAMPLE(inptr0[col]);
cb = GETJSAMPLE(inptr1[col]);
cr = GETJSAMPLE(inptr2[col]);
/* Range-limiting is essential due to noise introduced by DCT losses. */
outptr[RGB_RED] = range_limit[y + Crrtab[cr]];
outptr[RGB_GREEN] = range_limit[y +
((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
SCALEBITS))];
outptr[RGB_BLUE] = range_limit[y + Cbbtab[cb]];
outptr += RGB_PIXELSIZE;
}
}
#ifdef XP_WIN32
}
#endif
}
/**************** Cases other than YCbCr -> RGB **************/
/*
* Color conversion for no colorspace change: just copy the data,
* converting from separate-planes to interleaved representation.
*/
METHODDEF void
null_convert (j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION input_row,
JSAMPARRAY output_buf, int num_rows)
{
register JSAMPROW inptr, outptr;
register JDIMENSION count;
register int num_components = cinfo->num_components;
JDIMENSION num_cols = cinfo->output_width;
int ci;
while (--num_rows >= 0) {
for (ci = 0; ci < num_components; ci++) {
inptr = input_buf[ci][input_row];
outptr = output_buf[0] + ci;
for (count = num_cols; count > 0; count--) {
*outptr = *inptr++; /* needn't bother with GETJSAMPLE() here */
outptr += num_components;
}
}
input_row++;
output_buf++;
}
}
/*
* Color conversion for grayscale: just copy the data.
* This also works for YCbCr -> grayscale conversion, in which
* we just copy the Y (luminance) component and ignore chrominance.
*/
METHODDEF void
grayscale_convert (j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION input_row,
JSAMPARRAY output_buf, int num_rows)
{
jcopy_sample_rows(input_buf[0], (int) input_row, output_buf, 0,
num_rows, cinfo->output_width);
}
/*
* Adobe-style YCCK->CMYK conversion.
* We convert YCbCr to R=1-C, G=1-M, and B=1-Y using the same
* conversion as above, while passing K (black) unchanged.
* We assume build_ycc_rgb_table has been called.
*/
METHODDEF void
ycck_cmyk_convert (j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION input_row,
JSAMPARRAY output_buf, int num_rows)
{
my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
register int y, cb, cr;
register JSAMPROW outptr;
register JSAMPROW inptr0, inptr1, inptr2, inptr3;
register JDIMENSION col;
JDIMENSION num_cols = cinfo->output_width;
/* copy these pointers into registers if possible */
register JSAMPLE * range_limit = cinfo->sample_range_limit;
register int * Crrtab = cconvert->Cr_r_tab;
register int * Cbbtab = cconvert->Cb_b_tab;
register INT32 * Crgtab = cconvert->Cr_g_tab;
register INT32 * Cbgtab = cconvert->Cb_g_tab;
SHIFT_TEMPS
while (--num_rows >= 0) {
inptr0 = input_buf[0][input_row];
inptr1 = input_buf[1][input_row];
inptr2 = input_buf[2][input_row];
inptr3 = input_buf[3][input_row];
input_row++;
outptr = *output_buf++;
for (col = 0; col < num_cols; col++) {
y = GETJSAMPLE(inptr0[col]);
cb = GETJSAMPLE(inptr1[col]);
cr = GETJSAMPLE(inptr2[col]);
/* Range-limiting is essential due to noise introduced by DCT losses. */
outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])]; /* red */
outptr[1] = range_limit[MAXJSAMPLE - (y + /* green */
((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
SCALEBITS)))];
outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])]; /* blue */
/* K passes through unchanged */
outptr[3] = inptr3[col]; /* don't need GETJSAMPLE here */
outptr += 4;
}
}
}
/*
* Empty method for start_pass.
*/
METHODDEF void
start_pass_dcolor (j_decompress_ptr cinfo)
{
/* no work needed */
}
/*
* Module initialization routine for output colorspace conversion.
*/
GLOBAL void
jinit_color_deconverter (j_decompress_ptr cinfo)
{
my_cconvert_ptr cconvert;
int ci;
cconvert = (my_cconvert_ptr)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
SIZEOF(my_color_deconverter));
cinfo->cconvert = (struct jpeg_color_deconverter *) cconvert;
cconvert->pub.start_pass = start_pass_dcolor;
/* Make sure num_components agrees with jpeg_color_space */
switch (cinfo->jpeg_color_space) {
case JCS_GRAYSCALE:
if (cinfo->num_components != 1)
ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
break;
case JCS_RGB:
case JCS_YCbCr:
if (cinfo->num_components != 3)
ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
break;
case JCS_CMYK:
case JCS_YCCK:
if (cinfo->num_components != 4)
ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
break;
default: /* JCS_UNKNOWN can be anything */
if (cinfo->num_components < 1)
ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
break;
}
/* Set out_color_components and conversion method based on requested space.
* Also clear the component_needed flags for any unused components,
* so that earlier pipeline stages can avoid useless computation.
*/
switch (cinfo->out_color_space) {
case JCS_GRAYSCALE:
cinfo->out_color_components = 1;
if (cinfo->jpeg_color_space == JCS_GRAYSCALE ||
cinfo->jpeg_color_space == JCS_YCbCr) {
cconvert->pub.color_convert = grayscale_convert;
/* For color->grayscale conversion, only the Y (0) component is needed */
for (ci = 1; ci < cinfo->num_components; ci++)
cinfo->comp_info[ci].component_needed = FALSE;
} else
ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
break;
case JCS_RGB:
cinfo->out_color_components = RGB_PIXELSIZE;
if (cinfo->jpeg_color_space == JCS_YCbCr) {
cconvert->pub.color_convert = ycc_rgb_convert;
build_ycc_rgb_table(cinfo);
} else if (cinfo->jpeg_color_space == JCS_RGB && RGB_PIXELSIZE == 3) {
cconvert->pub.color_convert = null_convert;
} else
ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
break;
case JCS_CMYK:
cinfo->out_color_components = 4;
if (cinfo->jpeg_color_space == JCS_YCCK) {
cconvert->pub.color_convert = ycck_cmyk_convert;
build_ycc_rgb_table(cinfo);
} else if (cinfo->jpeg_color_space == JCS_CMYK) {
cconvert->pub.color_convert = null_convert;
} else
ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
break;
default:
/* Permit null conversion to same output space */
if (cinfo->out_color_space == cinfo->jpeg_color_space) {
cinfo->out_color_components = cinfo->num_components;
cconvert->pub.color_convert = null_convert;
} else /* unsupported non-null conversion */
ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
break;
}
if (cinfo->quantize_colors)
cinfo->output_components = 1; /* single colormapped output component */
else
cinfo->output_components = cinfo->out_color_components;
}
#ifdef XP_WIN32
// MMX(tm) technology assembly code additions begin here
void MMXYCbCr2RGB(
int columns,
unsigned char *inY,
unsigned char *inU,
unsigned char *inV,
unsigned char *outRGB)
{
//; This program will compile with Microsoft Visual C++ 4.1 or greater.
//; Use the /GM compile switch to allow the compilation of MMX(tm) Technology
//; instructions as inline assembly
__asm {
// Initialize all the pointers, loop variables
mov eax, inY
mov ecx, inV
mov edi, columns
mov ebx, inU
shr edi, 2 ; number of loops = cols/4
mov edx, outRGB
// Main Loop to process 12 bytes
YUVtoRGB:
movd mm0, [eax] ; 0/0/0/0/Y3/Y2/Y1/Y0
pxor mm7, mm7 ; use mm7 as const_0 to achieve better pairing at start
movd mm2, [ebx] ; 0/0/0/0/U3/U2/U1/U0
punpcklbw mm0, mm7 ; Y3/Y2/Y1/Y0
movd mm3, [ecx] ; 0/0/0/0/V3/V2/V1/V0
punpcklbw mm2, mm7 ; U3/U2/U1/U0
psubsw mm2, const_sub128 ; U3'/U2'/U1'/U0'
punpcklbw mm3, mm7 ; V3/V2/V1/V0
psubsw mm3, const_sub128 ; V3'/V2'/V1'/V0'
movq mm4, mm2
punpcklwd mm2, mm3 ; V1'/U1'/V0'/U0'
movq mm1, mm0
pmaddwd mm2, const_VUmul ; gvV1'+guU1'/gvV0'+guU0'
psllw mm1, 8 ; Y3*256/Y2*256/Y1*256/Y0*256
movq mm6, mm1
punpcklwd mm1, mm7 ; Y1*256/Y0*256
punpckhwd mm6, mm7 ; Y3*256/Y2*256
movq mm5, mm4
punpckhwd mm5, mm3 ; V3'/U3'/V2'/U2'
paddd mm2, mm1 ; G1*256/G0*256 (mm1 free)
pmaddwd mm5, const_VUmul ; gvV3'+guU3'/gvV2'+guU2'
movq mm1, mm3 ; (using mm1)
punpcklwd mm3, mm0 ; Y1/V1'/Y0/V0'
movq mm7, mm4 ; This wipes out the zero constant
pmaddwd mm3, const_YVmul ; ryY1+rvV1'/ryY0+rvV0'
psrad mm2, 8 ; G1/G0
paddd mm5, mm6 ; G3*256/G2*256 (mm6 free)
punpcklwd mm4, mm0 ; Y1/U1'/Y0/U0'
pmaddwd mm4, const_YUmul ; // "byY1+buU1'/byY0'+buU0'"
psrad mm5, 8 ; G3/G2
psrad mm3, 8 ; R1/R0
punpckhwd mm7 , mm0 ; Y3/U3'/Y2/U2'
psrad mm4, 8 ; B1/B0
movq mm6, mm3
pmaddwd mm7, const_YUmul ; // "byY3+buU3'/byY2'+buU2'"
punpckhwd mm1, mm0 ; Y3/V3'/Y2/V2'
pmaddwd mm1, const_YVmul ; ryY3+rvV3'/ryY2+rvV2'
punpckldq mm3, mm2 ; G0/R0
punpckhdq mm6, mm2 ; G1/R1 (mm2 free)
movq mm0, mm4
psrad mm7, 8 ; B3/B2
punpckldq mm4, const_0 ; 0/B0
punpckhdq mm0, const_0 ; 0/B1
psrad mm1, 8 ; R3/R2
packssdw mm3, mm4 ; 0/B0/G0/R0 (mm4 free)
movq mm2, mm1
packssdw mm6, mm0 ; 0/B1/G1/R1 (mm0 free)
packuswb mm3, mm6 ; 0/B1/G1/R1/0/B0/G0/R0 (mm6 free)
punpckldq mm2, mm5 ; G2/R2
movq mm4, mm7
punpckhdq mm1, mm5 ; G3/R3 (mm5 done)
punpckldq mm7, const_0 ; 0/B2 (change this line for alpha code)
punpckhdq mm4, const_0 ; 0/B3 (change this line for alpha code)
movq mm0, mm3
packssdw mm2, mm7 ; 0/B2/G2/R2
pand mm3, mask_highd ; 0/B1/G1/R1/0/0/0/0
packssdw mm1, mm4 ; 0/B3/G3/R3
psrlq mm3, 8 ; 0/0/B1/G1/R1/0/0/0
add edx, 12
por mm0, mm3 ; 0/0/?/?/R1/B0/G0/R0
packuswb mm2, mm1 ; 0/B3/G3/R3/0/B2/G2/R2
psrlq mm3, 32 ; 0/0/0/0/0/0/B1/G1
add eax, 4
movd [edx][-12], mm0 ; correct for add
punpcklwd mm3, mm2 ; 0/B2/0/0/G2/R2/B1/G1
psrlq mm2, 24 ; 0/0/0/0/B3/G3/R3/0
add ecx, 4
movd [edx][-8], mm3 ; correct for previous add
psrlq mm3, 48 ; 0/0/0/0/0/0/0/B2
por mm2, mm3 ; 0/0/0/0/B3/G3/R3/0
add ebx, 4
movd [edx][-4], mm2 ; correct for previous add
dec edi
jnz YUVtoRGB ; Do 12 more bytes if not zero
//emms // "commented out since it is done at the end of the caller's loop"
} // end of __asm
}
#endif