зеркало из https://github.com/mozilla/pjs.git
Optimized SSE2 assembly for LCMS - part of bug 445552. r=vlad
This commit is contained in:
Родитель
b22606db84
Коммит
47b7be01c1
|
@ -1571,6 +1571,7 @@ void cdecl MAT3eval(LPVEC3 r, LPMAT3 a, LPVEC3 v);
|
|||
void cdecl MAT3evalF(LPFVEC3 r, LPFMAT3 a, LPFVEC3 v);
|
||||
void cdecl MAT3toFix(LPWMAT3 r, LPMAT3 v);
|
||||
void cdecl MAT3toFloat(LPFMAT3 r, LPMAT3 v);
|
||||
void cdecl MAT3toFloatTranspose(LPFMAT3 r, LPMAT3 v);
|
||||
void cdecl MAT3fromFix(LPMAT3 r, LPWMAT3 v);
|
||||
void cdecl MAT3evalW(LPWVEC3 r, LPWMAT3 a, LPWVEC3 v);
|
||||
LCMSBOOL cdecl MAT3isIdentity(LPWMAT3 a, double Tolerance);
|
||||
|
@ -1862,8 +1863,16 @@ typedef struct {
|
|||
union {
|
||||
WMAT3 W;
|
||||
FMAT3A FA; // This is not a matrix proper - use FA.F to access the matrix pointer
|
||||
// Moreover, we store the transpose of the matrix instead, so the first
|
||||
// vector corresponds to the first column instead of the first row.
|
||||
} Matrix;
|
||||
|
||||
FLOAT clampMax; // SSE2 doesn't have an efficient way to clamp using integers, so we have
|
||||
// to clamp in the float domain. Unfortunately, since we eventually want
|
||||
// our integer values clamped to 2^16 - 1, we need to clamp with a very
|
||||
// precise value in the float domain. We let the CPU take care of by calculating
|
||||
// it at transform creation time rather than trusting the compiler.
|
||||
|
||||
L16PARAMS p16; // Primary curve
|
||||
LPWORD L[3];
|
||||
LPLCMSPRECACHE L_Precache;
|
||||
|
@ -1880,7 +1889,6 @@ LPMATSHAPER cdecl cmsAllocMatShaper2(LPMAT3 matrix, LPGAMMATABLE In[], LPLCMSPRE
|
|||
|
||||
void cdecl cmsFreeMatShaper(LPMATSHAPER MatShaper);
|
||||
void cdecl cmsEvalMatShaper(LPMATSHAPER MatShaper, WORD In[], WORD Out[]);
|
||||
void cdecl cmsEvalMatShaperFloat(LPMATSHAPER MatShaper, BYTE In[], BYTE Out[]);
|
||||
|
||||
LCMSBOOL cdecl cmsReadICCMatrixRGB2XYZ(LPMAT3 r, cmsHPROFILE hProfile);
|
||||
|
||||
|
|
|
@ -103,9 +103,15 @@ LPMATSHAPER cmsAllocMatShaper2(LPMAT3 Matrix, LPGAMMATABLE In[], LPLCMSPRECACHE
|
|||
// Fill matrix part
|
||||
if (Behaviour & MATSHAPER_FLOATMAT) {
|
||||
FMAT3ASetup(&NewMatShaper->Matrix.FA);
|
||||
MAT3toFloat(NewMatShaper -> Matrix.FA.F, Matrix);
|
||||
MAT3toFloatTranspose(NewMatShaper -> Matrix.FA.F, Matrix);
|
||||
if (!FMAT3isIdentity(NewMatShaper -> Matrix.FA.F, 0.00001f))
|
||||
NewMatShaper -> dwFlags |= MATSHAPER_HASMATRIX;
|
||||
|
||||
// This needs to be calculated by the CPU or a very precise
|
||||
// compiler. If it's too big (like 1.0), values are clamped
|
||||
// to 65536 instead 65535, and we either have an overflow of
|
||||
// the precache bounds or scary downcasting.
|
||||
NewMatShaper -> clampMax = ((FLOAT) (65536 - 1)) / 65536.0f;
|
||||
}
|
||||
else {
|
||||
MAT3toFix(&NewMatShaper -> Matrix.W, Matrix);
|
||||
|
@ -397,76 +403,6 @@ void OutputBehaviour(LPMATSHAPER MatShaper, WORD In[], WORD Out[])
|
|||
|
||||
}
|
||||
|
||||
void cmsEvalMatShaperFloat(LPMATSHAPER MatShaper, BYTE In[], BYTE Out[])
|
||||
{
|
||||
WORD tmp[3];
|
||||
FVEC3 OutVect;
|
||||
LPFVEC3 FloatVals = &MatShaper -> Matrix.FA.F->v[3]; // Access our secret aligned temp buffer
|
||||
|
||||
if (MatShaper -> dwFlags & MATSHAPER_HASINPSHAPER)
|
||||
{
|
||||
if (MatShaper->L2_Precache != NULL)
|
||||
{
|
||||
FloatVals->n[VX] = MatShaper->L2_Precache->Impl.LI16F_FORWARD.Cache[0][In[0]];
|
||||
FloatVals->n[VY] = MatShaper->L2_Precache->Impl.LI16F_FORWARD.Cache[1][In[1]];
|
||||
FloatVals->n[VZ] = MatShaper->L2_Precache->Impl.LI16F_FORWARD.Cache[2][In[2]];
|
||||
}
|
||||
else
|
||||
{
|
||||
FloatVals->n[VX] = ToFloatDomain(cmsLinearInterpLUT16(RGB_8_TO_16(In[0]), MatShaper -> L2[0], &MatShaper -> p2_16));
|
||||
FloatVals->n[VY] = ToFloatDomain(cmsLinearInterpLUT16(RGB_8_TO_16(In[1]), MatShaper -> L2[1], &MatShaper -> p2_16));
|
||||
FloatVals->n[VZ] = ToFloatDomain(cmsLinearInterpLUT16(RGB_8_TO_16(In[2]), MatShaper -> L2[2], &MatShaper -> p2_16));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
FloatVals->n[VX] = ToFloatDomain(In[0]);
|
||||
FloatVals->n[VY] = ToFloatDomain(In[1]);
|
||||
FloatVals->n[VZ] = ToFloatDomain(In[2]);
|
||||
}
|
||||
|
||||
|
||||
if (MatShaper -> dwFlags & MATSHAPER_HASMATRIX)
|
||||
{
|
||||
|
||||
MAT3evalF(&OutVect, MatShaper -> Matrix.FA.F, FloatVals);
|
||||
}
|
||||
else
|
||||
{
|
||||
OutVect.n[VX] = FloatVals->n[VX];
|
||||
OutVect.n[VY] = FloatVals->n[VY];
|
||||
OutVect.n[VZ] = FloatVals->n[VZ];
|
||||
}
|
||||
|
||||
|
||||
tmp[0] = _cmsClampWord(FromFloatDomain(OutVect.n[VX]));
|
||||
tmp[1] = _cmsClampWord(FromFloatDomain(OutVect.n[VY]));
|
||||
tmp[2] = _cmsClampWord(FromFloatDomain(OutVect.n[VZ]));
|
||||
|
||||
|
||||
|
||||
if (MatShaper -> dwFlags & MATSHAPER_HASSHAPER)
|
||||
{
|
||||
if (MatShaper->L_Precache != NULL)
|
||||
{
|
||||
Out[0] = MatShaper->L_Precache->Impl.LI168_REVERSE.Cache[0][tmp[0]];
|
||||
Out[1] = MatShaper->L_Precache->Impl.LI168_REVERSE.Cache[1][tmp[1]];
|
||||
Out[2] = MatShaper->L_Precache->Impl.LI168_REVERSE.Cache[2][tmp[2]];
|
||||
}
|
||||
else
|
||||
{
|
||||
Out[0] = RGB_16_TO_8(cmsLinearInterpLUT16(tmp[0], MatShaper -> L[0], &MatShaper -> p16));
|
||||
Out[1] = RGB_16_TO_8(cmsLinearInterpLUT16(tmp[1], MatShaper -> L[1], &MatShaper -> p16));
|
||||
Out[2] = RGB_16_TO_8(cmsLinearInterpLUT16(tmp[2], MatShaper -> L[2], &MatShaper -> p16));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Out[0] = RGB_16_TO_8(tmp[0]);
|
||||
Out[1] = RGB_16_TO_8(tmp[1]);
|
||||
Out[2] = RGB_16_TO_8(tmp[2]);
|
||||
}
|
||||
}
|
||||
|
||||
// Master on evaluating shapers, 3 different behaviours
|
||||
|
||||
|
|
|
@ -51,6 +51,7 @@ double cdecl MAT3det(LPMAT3 m);
|
|||
void cdecl MAT3eval(LPVEC3 r, LPMAT3 a, LPVEC3 v);
|
||||
void cdecl MAT3toFix(LPWMAT3 r, LPMAT3 v);
|
||||
void cdecl MAT3toFloat(LPFMAT3 r, LPMAT3 v);
|
||||
void cdecl MAT3toFloatTranspose(LPFMAT3 r, LPMAT3 v);
|
||||
void cdecl MAT3evalW(LPWVEC3 r, LPWMAT3 a, LPWVEC3 v);
|
||||
void cdecl MAT3perK(LPMAT3 r, LPMAT3 v, double d);
|
||||
void cdecl MAT3scaleAndCut(LPWMAT3 r, LPMAT3 v, double d);
|
||||
|
@ -861,6 +862,20 @@ void MAT3toFloat(LPFMAT3 r, LPMAT3 v)
|
|||
VEC3toFloat(&r -> v[2], &v -> v[2]);
|
||||
}
|
||||
|
||||
void MAT3toFloatTranspose(LPFMAT3 r, LPMAT3 v)
|
||||
{
|
||||
unsigned i, j;
|
||||
|
||||
/* for each row of the source. */
|
||||
for (i = 0; i < 3; ++i)
|
||||
|
||||
/* For element in the row. */
|
||||
for (j = 0; j < 3; ++j)
|
||||
|
||||
/* Col=>Row, Row=>Col. */
|
||||
r -> v[j].n[i] = DOUBLE_TO_FLOAT(v -> v[i].n[j]);
|
||||
}
|
||||
|
||||
void MAT3fromFix(LPMAT3 r, LPWMAT3 v)
|
||||
{
|
||||
VEC3fromFix(&r -> v[0], &v -> v[0]);
|
||||
|
|
|
@ -58,7 +58,61 @@ void LCMSEXPORT cmsSetAlarmCodes(int r, int g, int b);
|
|||
LCMSBOOL LCMSEXPORT cmsIsIntentSupported(cmsHPROFILE hProfile,
|
||||
int Intent, int UsedDirection);
|
||||
|
||||
// Determine if we can build with SSE2 (this was partly copied from jmorecfg.h in
|
||||
// mozilla/jpeg)
|
||||
// -------------------------------------------------------------------------
|
||||
#if defined(_M_IX86) && !defined(__GNUC__)
|
||||
|
||||
/* Get us a CPUID function. Avoid clobbering EBX because sometimes it's the PIC
|
||||
register - I'm not sure if that ever happens on windows, but cpuid isn't
|
||||
on the critical path so we just preserve the register to be safe and to be
|
||||
consistent with the non-windows version. */
|
||||
LCMS_INLINE void LCMSCPUID(DWORD fxn, LPDWORD a, LPDWORD b, LPDWORD c, LPDWORD d) {
|
||||
DWORD a_, b_, c_, d_;
|
||||
|
||||
ASM {
|
||||
xchg ebx, esi
|
||||
mov eax, fxn
|
||||
cpuid
|
||||
mov a_, eax
|
||||
mov b_, ebx
|
||||
mov c_, ecx
|
||||
mov d_, edx
|
||||
xchg ebx, esi
|
||||
}
|
||||
*a = a_;
|
||||
*b = b_;
|
||||
*c = c_;
|
||||
*d = d_;
|
||||
}
|
||||
|
||||
#define HAVE_MMX_INTEL_MNEMONICS
|
||||
|
||||
/* SSE2 code appears broken for some cpus (bug 247437) */
|
||||
#define HAVE_SSE2_INTEL_MNEMONICS
|
||||
#define HAVE_SSE2_INTRINSICS
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) && defined(__i386__)
|
||||
|
||||
/* Get us a CPUID function. We can't use ebx because it's the PIC register on
|
||||
some platforms, so we use ESI instead and save ebx to avoid clobbering it. */
|
||||
LCMS_INLINE void LCMSCPUID(DWORD fxn, LPDWORD a, LPDWORD b, LPDWORD c, LPDWORD d) {
|
||||
|
||||
DWORD a_, b_, c_, d_;
|
||||
__asm__ __volatile__ ("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi;"
|
||||
: "=a" (a_), "=S" (b_), "=c" (c_), "=d" (d_) : "a" (fxn));
|
||||
*a = a_;
|
||||
*b = b_;
|
||||
*c = c_;
|
||||
*d = d_;
|
||||
}
|
||||
|
||||
#define HAVE_SSE2_INTRINSICS
|
||||
/* XXX - the below wasn't in jpeg/jmorecfg.h - why? */
|
||||
#define HAVE_SSE2_INTEL_MNEMONICS
|
||||
#endif /* ! GNUC && i386 */
|
||||
|
||||
|
||||
|
||||
// Alarm RGB codes
|
||||
|
@ -89,6 +143,33 @@ static icTagSignature Preview[] = {icSigPreview0Tag,
|
|||
|
||||
static volatile double GlobalAdaptationState = 0;
|
||||
|
||||
// -------------------------Runtime SSE2 Detection-----------------------------
|
||||
|
||||
#define SSE2_EDX_MASK (1UL << 26)
|
||||
static LCMSBOOL SSE2Available() {
|
||||
|
||||
static int isAvailable = -1;
|
||||
DWORD a, b, c, d;
|
||||
DWORD function = 0x00000001;
|
||||
|
||||
if (isAvailable == -1) {
|
||||
|
||||
// If we don't have compile-time support, we don't have runtime support
|
||||
#ifndef HAVE_SSE2_INTEL_MNEMONICS
|
||||
isAvailable = 0;
|
||||
#else
|
||||
/* We have CPUID macros defined if we have sse2 mnemonics. */
|
||||
LCMSCPUID(function, &a, &b, &c, &d);
|
||||
if (d & SSE2_EDX_MASK)
|
||||
isAvailable = 1;
|
||||
else
|
||||
isAvailable = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
return (isAvailable) ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
// --------------------------------Stages--------------------------------------
|
||||
|
||||
// Following routines does implement several kind of steps inside
|
||||
|
@ -501,8 +582,8 @@ void CachedXFORMGamutCheck(_LPcmsTRANSFORM p,
|
|||
|
||||
static
|
||||
void MatrixShaperXFORM(_LPcmsTRANSFORM p,
|
||||
LPVOID in,
|
||||
LPVOID out, unsigned int Size)
|
||||
LPVOID in,
|
||||
LPVOID out, unsigned int Size)
|
||||
{
|
||||
register LPBYTE accum;
|
||||
register LPBYTE output;
|
||||
|
@ -522,25 +603,166 @@ void MatrixShaperXFORM(_LPcmsTRANSFORM p,
|
|||
}
|
||||
}
|
||||
|
||||
static const FLOAT floatScale = 65536.0f;
|
||||
static const FLOAT * const floatScaleAddr = &floatScale; // Win32 ASM doesn't know how to take addressOf inline
|
||||
|
||||
#ifdef HAVE_SSE2_INTEL_MNEMONICS
|
||||
static
|
||||
void MatrixShaperXFORMFloat(_LPcmsTRANSFORM p,
|
||||
LPVOID in,
|
||||
LPVOID out, unsigned int Size)
|
||||
{
|
||||
register LPBYTE input, output;
|
||||
register LPBYTE In, Out;
|
||||
register unsigned int i;
|
||||
LPMATSHAPER MatShaper;
|
||||
|
||||
|
||||
input = (LPBYTE) in;
|
||||
output = (LPBYTE) out;
|
||||
In = (LPBYTE) in;
|
||||
Out = (LPBYTE) out;
|
||||
MatShaper = p -> SmeltMatShaper;
|
||||
|
||||
for (i=0; i < Size; i++)
|
||||
{
|
||||
cmsEvalMatShaperFloat(p -> SmeltMatShaper, input, output);
|
||||
input += 3;
|
||||
output += 3;
|
||||
|
||||
LPFVEC3 FloatVals = &MatShaper -> Matrix.FA.F->v[3]; // Access our secret aligned temp buffer
|
||||
LPFVEC3 MatPtr = MatShaper -> Matrix.FA.F->v; // Matrix
|
||||
LPFLOAT clampMax = &MatShaper -> clampMax;
|
||||
LPDWORD tmp = (LPDWORD) FloatVals;
|
||||
|
||||
if (MatShaper -> dwFlags & MATSHAPER_HASINPSHAPER)
|
||||
{
|
||||
if (MatShaper->L2_Precache != NULL)
|
||||
{
|
||||
FloatVals->n[VX] = MatShaper->L2_Precache->Impl.LI16F_FORWARD.Cache[0][In[0]];
|
||||
FloatVals->n[VY] = MatShaper->L2_Precache->Impl.LI16F_FORWARD.Cache[1][In[1]];
|
||||
FloatVals->n[VZ] = MatShaper->L2_Precache->Impl.LI16F_FORWARD.Cache[2][In[2]];
|
||||
}
|
||||
else
|
||||
{
|
||||
FloatVals->n[VX] = ToFloatDomain(cmsLinearInterpLUT16(RGB_8_TO_16(In[0]), MatShaper -> L2[0], &MatShaper -> p2_16));
|
||||
FloatVals->n[VY] = ToFloatDomain(cmsLinearInterpLUT16(RGB_8_TO_16(In[1]), MatShaper -> L2[1], &MatShaper -> p2_16));
|
||||
FloatVals->n[VZ] = ToFloatDomain(cmsLinearInterpLUT16(RGB_8_TO_16(In[2]), MatShaper -> L2[2], &MatShaper -> p2_16));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
FloatVals->n[VX] = ToFloatDomain(In[0]);
|
||||
FloatVals->n[VY] = ToFloatDomain(In[1]);
|
||||
FloatVals->n[VZ] = ToFloatDomain(In[2]);
|
||||
}
|
||||
|
||||
if (MatShaper -> dwFlags & MATSHAPER_HASMATRIX)
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
__asm(
|
||||
"movaps (%0), %%xmm1;\n\t" // Move the first matrix column to xmm1
|
||||
"movaps 16(%0), %%xmm2;\n\t" // Move the second matrix column to xmm2
|
||||
"movaps 32(%0), %%xmm3;\n\t" // move the third matrix column to xmm3
|
||||
"movaps 48(%0), %%xmm0;\n\t" // Move the vector to xmm0
|
||||
|
||||
// Note - We have to copy and then shuffle because of the weird
|
||||
// semantics of shufps
|
||||
//
|
||||
"movaps %%xmm0, %%xmm4;\n\t" // Copy the vector to xmm4
|
||||
"shufps $0, %%xmm4, %%xmm4;\n\t" // Shuffle to repeat the first vector element repeated 4 times
|
||||
"mulps %%xmm4, %%xmm1;\n\t" // Multiply the first vector element by the first matrix column
|
||||
"movaps %%xmm0, %%xmm5; \n\t" // Copy the vector to xmm5
|
||||
"shufps $0x55, %%xmm5, %%xmm5;\n\t" // Shuffle to repeat the second vector element repeated 4 times
|
||||
"mulps %%xmm5, %%xmm2;\n\t" // Multiply the second vector element by the seccond matrix column
|
||||
"movaps %%xmm0, %%xmm6;\n\t" // Copy the vector to xmm6
|
||||
"shufps $0xAA, %%xmm6, %%xmm6;\n\t" // Shuffle to repeat the third vector element repeated 4 times
|
||||
"mulps %%xmm6, %%xmm3;\n\t" // Multiply the third vector element by the third matrix column
|
||||
|
||||
"addps %%xmm3, %%xmm2;\n\t" // Sum (second + third) columns
|
||||
"addps %%xmm2, %%xmm1;\n\t" // Sum ((second + third) + first) columns
|
||||
|
||||
"movss (%1), %%xmm7;\n\t" // load the floating point representation of 65535/65536
|
||||
"shufps $0, %%xmm7, %%xmm7;\n\t" // move it into all of the four slots
|
||||
"minps %%xmm7, %%xmm1;\n\t" // clamp the vector to 1.0 max
|
||||
"xorps %%xmm6, %%xmm6;\n\t" // get us cleared bitpatern, which is 0.0f
|
||||
"maxps %%xmm6, %%xmm1;\n\t" // clamp the vector to 0.0 min
|
||||
"movss (%2), %%xmm5;\n\t" // load the floating point scale factor
|
||||
"shufps $0, %%xmm5, %%xmm5;\n\t" // put it in all four slots
|
||||
"mulps %%xmm5, %%xmm1;\n\t" // multiply by the scale factor
|
||||
"cvtps2dq %%xmm1, %%xmm1;\n\t" // convert to integers
|
||||
"movdqa %%xmm1, 48(%0);\n\t" // store
|
||||
|
||||
:
|
||||
: "r" (MatPtr), "r" (clampMax), "r" (&floatScale)
|
||||
: "memory"
|
||||
);
|
||||
#else
|
||||
ASM {
|
||||
mov eax, MatPtr
|
||||
mov ecx, clampMax
|
||||
mov edx, floatScaleAddr
|
||||
|
||||
movaps xmm1, [eax]
|
||||
movaps xmm2, [eax + 16]
|
||||
movaps xmm3, [eax + 32]
|
||||
movaps xmm0, [eax + 48]
|
||||
|
||||
movaps xmm4, xmm0
|
||||
shufps xmm4, xmm4, 0
|
||||
mulps xmm1, xmm4
|
||||
movaps xmm5, xmm0
|
||||
shufps xmm5, xmm5, 0x55
|
||||
mulps xmm2, xmm5
|
||||
movaps xmm6, xmm0
|
||||
shufps xmm6, xmm6, 0xAA
|
||||
mulps xmm3, xmm6
|
||||
|
||||
addps xmm2, xmm3
|
||||
addps xmm1, xmm2
|
||||
|
||||
movss xmm7, [ecx]
|
||||
shufps xmm7, xmm7, 0
|
||||
minps xmm1, xmm7
|
||||
xorps xmm6, xmm6
|
||||
maxps xmm1, xmm6
|
||||
movss xmm5, [edx]
|
||||
shufps xmm5, xmm5, 0
|
||||
mulps xmm1, xmm5
|
||||
cvtps2dq xmm1, xmm1
|
||||
movdqa [eax + 48], xmm1
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
tmp[0] = _cmsClampWord(FromFloatDomain(FloatVals->n[VX]));
|
||||
tmp[1] = _cmsClampWord(FromFloatDomain(FloatVals->n[VY]));
|
||||
tmp[2] = _cmsClampWord(FromFloatDomain(FloatVals->n[VZ]));
|
||||
}
|
||||
|
||||
if (MatShaper -> dwFlags & MATSHAPER_HASSHAPER)
|
||||
{
|
||||
if (MatShaper->L_Precache != NULL)
|
||||
{
|
||||
Out[0] = MatShaper->L_Precache->Impl.LI168_REVERSE.Cache[0][tmp[0]];
|
||||
Out[1] = MatShaper->L_Precache->Impl.LI168_REVERSE.Cache[1][tmp[1]];
|
||||
Out[2] = MatShaper->L_Precache->Impl.LI168_REVERSE.Cache[2][tmp[2]];
|
||||
}
|
||||
else
|
||||
{
|
||||
Out[0] = RGB_16_TO_8(cmsLinearInterpLUT16((WORD)tmp[0], MatShaper -> L[0], &MatShaper -> p16));
|
||||
Out[1] = RGB_16_TO_8(cmsLinearInterpLUT16((WORD)tmp[1], MatShaper -> L[1], &MatShaper -> p16));
|
||||
Out[2] = RGB_16_TO_8(cmsLinearInterpLUT16((WORD)tmp[2], MatShaper -> L[2], &MatShaper -> p16));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Out[0] = RGB_16_TO_8((WORD)tmp[0]);
|
||||
Out[1] = RGB_16_TO_8((WORD)tmp[1]);
|
||||
Out[2] = RGB_16_TO_8((WORD)tmp[2]);
|
||||
}
|
||||
|
||||
In += 3;
|
||||
Out += 3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// Using Named color input table
|
||||
|
@ -1296,8 +1518,29 @@ _LPcmsTRANSFORM PickTransformRoutine(_LPcmsTRANSFORM p,
|
|||
(p -> ExitColorSpace == icSigRgbData) &&
|
||||
!(p -> dwOriginalFlags & cmsFLAGS_BLACKPOINTCOMPENSATION)) {
|
||||
|
||||
|
||||
// If the floating point path is requested, see if we support it
|
||||
if (p -> dwOriginalFlags & cmsFLAGS_FLOATSHAPER)
|
||||
{
|
||||
|
||||
#ifndef HAVE_SSE2_INTEL_MNEMONICS
|
||||
// Turn it off if we can't compile it
|
||||
p -> dwOriginalFlags &= ~cmsFLAGS_FLOATSHAPER;
|
||||
#else
|
||||
// Turn it off if we don't have it at runtime
|
||||
if (!SSE2Available())
|
||||
p -> dwOriginalFlags &= ~cmsFLAGS_FLOATSHAPER;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Yes... try to smelt matrix-shapers
|
||||
|
||||
#ifndef HAVE_SSE2_INTEL_MNEMONICS
|
||||
p -> xform = MatrixShaperXFORM;
|
||||
#else
|
||||
p -> xform = (p -> dwOriginalFlags & cmsFLAGS_FLOATSHAPER) ? MatrixShaperXFORMFloat : MatrixShaperXFORM;
|
||||
#endif
|
||||
|
||||
p -> dwOriginalFlags |= cmsFLAGS_NOTPRECALC;
|
||||
|
||||
if (!cmsBuildSmeltMatShaper(p))
|
||||
|
|
Загрузка…
Ссылка в новой задаче