Optimize software radial gradients (remove branches from the inner loop where

we can, because an entire row is conservatively either outside or inside the
gradient.) Change the gradient benchmark to capture both cases, and add new
gm to detect errors in these optimized paths.



git-svn-id: http://skia.googlecode.com/svn/trunk@2327 2bbb7eff-a529-9590-31e7-b0007b416f81
This commit is contained in:
tomhudson@google.com 2011-09-26 15:03:55 +00:00
Родитель b01ced0042
Коммит 5ea050f662
3 изменённых файлов: 286 добавлений и 120 удалений

Просмотреть файл

@ -38,23 +38,29 @@ static const GradData gGradData[] = {
{ 5, gColors, gPos2 }
};
/// Ignores scale
static SkShader* MakeLinear(const SkPoint pts[2], const GradData& data,
SkShader::TileMode tm, SkUnitMapper* mapper) {
SkShader::TileMode tm, SkUnitMapper* mapper,
float scale) {
return SkGradientShader::CreateLinear(pts, data.fColors, data.fPos,
data.fCount, tm, mapper);
}
static SkShader* MakeRadial(const SkPoint pts[2], const GradData& data,
SkShader::TileMode tm, SkUnitMapper* mapper) {
SkShader::TileMode tm, SkUnitMapper* mapper,
float scale) {
SkPoint center;
center.set(SkScalarAve(pts[0].fX, pts[1].fX),
SkScalarAve(pts[0].fY, pts[1].fY));
return SkGradientShader::CreateRadial(center, center.fX, data.fColors,
return SkGradientShader::CreateRadial(center, center.fX * scale,
data.fColors,
data.fPos, data.fCount, tm, mapper);
}
/// Ignores scale
static SkShader* MakeSweep(const SkPoint pts[2], const GradData& data,
SkShader::TileMode tm, SkUnitMapper* mapper) {
SkShader::TileMode tm, SkUnitMapper* mapper,
float scale) {
SkPoint center;
center.set(SkScalarAve(pts[0].fX, pts[1].fX),
SkScalarAve(pts[0].fY, pts[1].fY));
@ -62,8 +68,10 @@ static SkShader* MakeSweep(const SkPoint pts[2], const GradData& data,
data.fPos, data.fCount, mapper);
}
/// Ignores scale
static SkShader* Make2Radial(const SkPoint pts[2], const GradData& data,
SkShader::TileMode tm, SkUnitMapper* mapper) {
SkShader::TileMode tm, SkUnitMapper* mapper,
float scale) {
SkPoint center0, center1;
center0.set(SkScalarAve(pts[0].fX, pts[1].fX),
SkScalarAve(pts[0].fY, pts[1].fY));
@ -76,7 +84,8 @@ static SkShader* Make2Radial(const SkPoint pts[2], const GradData& data,
}
typedef SkShader* (*GradMaker)(const SkPoint pts[2], const GradData& data,
SkShader::TileMode tm, SkUnitMapper* mapper);
SkShader::TileMode tm, SkUnitMapper* mapper,
float scale);
static const struct {
GradMaker fMaker;
@ -96,6 +105,11 @@ enum GradType { // these must match the order in gGrads
kRadial2_GradType
};
enum GeomType {
kRect_GeomType,
kOval_GeomType
};
static const char* tilemodename(SkShader::TileMode tm) {
switch (tm) {
case SkShader::kClamp_TileMode:
@ -110,6 +124,18 @@ static const char* tilemodename(SkShader::TileMode tm) {
}
}
static const char* geomtypename(GeomType gt) {
switch (gt) {
case kRect_GeomType:
return "rectangle";
case kOval_GeomType:
return "oval";
default:
SkASSERT(!"unknown geometry type");
return "error";
}
}
///////////////////////////////////////////////////////////////////////////////
class GradientBench : public SkBenchmark {
@ -122,17 +148,26 @@ class GradientBench : public SkBenchmark {
N = 1
};
public:
GradientBench(void* param, GradType gt,
SkShader::TileMode tm = SkShader::kClamp_TileMode) : INHERITED(param) {
fName.printf("gradient_%s_%s", gGrads[gt].fName, tilemodename(tm));
GradientBench(void* param, GradType gradType,
SkShader::TileMode tm = SkShader::kClamp_TileMode,
GeomType geomType = kRect_GeomType,
float scale = 1.0f)
: INHERITED(param) {
fName.printf("gradient_%s_%s", gGrads[gradType].fName,
tilemodename(tm));
if (geomType != kRect_GeomType) {
fName.append("_");
fName.append(geomtypename(geomType));
}
const SkPoint pts[2] = {
{ 0, 0 },
{ SkIntToScalar(W), SkIntToScalar(H) }
};
fCount = N * gGrads[gt].fRepeat;
fShader = gGrads[gt].fMaker(pts, gGradData[0], tm, NULL);
fCount = N * gGrads[gradType].fRepeat;
fShader = gGrads[gradType].fMaker(pts, gGradData[0], tm, NULL, scale);
fGeomType = geomType;
}
virtual ~GradientBench() {
@ -152,12 +187,21 @@ protected:
SkRect r = { 0, 0, SkIntToScalar(W), SkIntToScalar(H) };
for (int i = 0; i < fCount; i++) {
switch (fGeomType) {
case kRect_GeomType:
canvas->drawRect(r, paint);
break;
case kOval_GeomType:
canvas->drawOval(r, paint);
break;
}
}
}
private:
typedef SkBenchmark INHERITED;
GeomType fGeomType;
};
class Gradient2Bench : public SkBenchmark {
@ -181,7 +225,10 @@ protected:
for (int i = 0; i < 1000; i++) {
const int a = i % 256;
SkColor colors[] = { SK_ColorBLACK, SkColorSetARGB(a, a, a, a), SK_ColorWHITE };
SkColor colors[] = {
SK_ColorBLACK,
SkColorSetARGB(a, a, a, a),
SK_ColorWHITE };
SkShader* s = SkGradientShader::CreateLinear(pts, colors, NULL,
SK_ARRAY_COUNT(colors),
SkShader::kClamp_TileMode);
@ -196,7 +243,15 @@ private:
static SkBenchmark* Fact0(void* p) { return new GradientBench(p, kLinear_GradType); }
static SkBenchmark* Fact01(void* p) { return new GradientBench(p, kLinear_GradType, SkShader::kMirror_TileMode); }
static SkBenchmark* Fact1(void* p) { return new GradientBench(p, kRadial_GradType); }
// Draw a radial gradient of radius 1/2 on a rectangle; half the lines should
// be completely pinned, the other half should pe partially pinned
static SkBenchmark* Fact1(void* p) { return new GradientBench(p, kRadial_GradType, SkShader::kClamp_TileMode, kRect_GeomType, 0.5f); }
// Draw a radial gradient on a circle of equal size; all the lines should
// hit the unpinned fast path (so long as GradientBench.W == H)
static SkBenchmark* Fact1o(void* p) { return new GradientBench(p, kRadial_GradType, SkShader::kClamp_TileMode, kOval_GeomType); }
static SkBenchmark* Fact11(void* p) { return new GradientBench(p, kRadial_GradType, SkShader::kMirror_TileMode); }
static SkBenchmark* Fact2(void* p) { return new GradientBench(p, kSweep_GradType); }
static SkBenchmark* Fact3(void* p) { return new GradientBench(p, kRadial2_GradType); }
@ -207,6 +262,7 @@ static SkBenchmark* Fact4(void* p) { return new Gradient2Bench(p); }
static BenchRegistry gReg0(Fact0);
static BenchRegistry gReg01(Fact01);
static BenchRegistry gReg1(Fact1);
static BenchRegistry gReg1o(Fact1o);
static BenchRegistry gReg11(Fact11);
static BenchRegistry gReg2(Fact2);
static BenchRegistry gReg3(Fact3);

Просмотреть файл

@ -178,6 +178,45 @@ private:
typedef GM INHERITED;
};
/// Tests correctness of *optimized* codepaths in gradients.
class ClampedGradientsGM : public GM {
public:
ClampedGradientsGM() {}
protected:
SkString onShortName() { return SkString("clamped_gradients"); }
virtual SkISize onISize() { return make_isize(640, 510); }
void drawBG(SkCanvas* canvas) {
canvas->drawColor(0xFFDDDDDD);
}
virtual void onDraw(SkCanvas* canvas) {
this->drawBG(canvas);
SkRect r = { 0, 0, SkIntToScalar(100), SkIntToScalar(300) };
SkPaint paint;
paint.setAntiAlias(true);
SkPoint center;
center.iset(0, 300);
canvas->translate(SkIntToScalar(20), SkIntToScalar(20));
SkShader* shader = SkGradientShader::CreateRadial(
SkPoint(center),
200, gColors, NULL, 5,
SkShader::kClamp_TileMode, NULL);
paint.setShader(shader);
canvas->drawRect(r, paint);
shader->unref();
}
private:
typedef GM INHERITED;
};
///////////////////////////////////////////////////////////////////////////////
static GM* MyFactory(void*) { return new GradientsGM; }
@ -186,5 +225,8 @@ static GMRegistry reg(MyFactory);
static GM* MyFactory2(void*) { return new GradientsDegenrate2PointGM; }
static GMRegistry reg2(MyFactory2);
static GM* MyFactory3(void*) { return new ClampedGradientsGM; }
static GMRegistry reg3(MyFactory3);
}

Просмотреть файл

@ -1151,109 +1151,7 @@ public:
rad_to_unit_matrix(center, radius, &fPtsToUnit);
}
virtual void shadeSpan(int x, int y, SkPMColor* SK_RESTRICT dstC, int count) {
SkASSERT(count > 0);
SkPoint srcPt;
SkMatrix::MapXYProc dstProc = fDstToIndexProc;
TileProc proc = fTileProc;
const SkPMColor* SK_RESTRICT cache = this->getCache32();
if (fDstToIndexClass != kPerspective_MatrixClass) {
dstProc(fDstToIndex, SkIntToScalar(x) + SK_ScalarHalf,
SkIntToScalar(y) + SK_ScalarHalf, &srcPt);
SkFixed dx, fx = SkScalarToFixed(srcPt.fX);
SkFixed dy, fy = SkScalarToFixed(srcPt.fY);
#ifdef SK_USE_FLOAT_SQRT
float fdx, fdy;
#endif
if (fDstToIndexClass == kFixedStepInX_MatrixClass) {
SkFixed storage[2];
(void)fDstToIndex.fixedStepInX(SkIntToScalar(y), &storage[0], &storage[1]);
dx = storage[0];
dy = storage[1];
#ifdef SK_USE_FLOAT_SQRT
fdx = SkFixedToFloat(storage[0]);
fdy = SkFixedToFloat(storage[1]);
#endif
} else {
SkASSERT(fDstToIndexClass == kLinear_MatrixClass);
dx = SkScalarToFixed(fDstToIndex.getScaleX());
dy = SkScalarToFixed(fDstToIndex.getSkewY());
#ifdef SK_USE_FLOAT_SQRT
fdx = fDstToIndex.getScaleX();
fdy = fDstToIndex.getSkewY();
#endif
}
if (proc == clamp_tileproc) {
const uint8_t* SK_RESTRICT sqrt_table = gSqrt8Table;
fx >>= 1;
dx >>= 1;
fy >>= 1;
dy >>= 1;
do {
unsigned xx = SkPin32(fx, -0xFFFF >> 1, 0xFFFF >> 1);
unsigned fi = SkPin32(fy, -0xFFFF >> 1, 0xFFFF >> 1);
fi = (xx * xx + fi * fi) >> (14 + 16 - kSQRT_TABLE_BITS);
fi = SkFastMin32(fi, 0xFFFF >> (16 - kSQRT_TABLE_BITS));
*dstC++ = cache[sqrt_table[fi] >> (8 - kCache32Bits)];
fx += dx;
fy += dy;
} while (--count != 0);
} else if (proc == mirror_tileproc) {
#ifdef SK_USE_FLOAT_SQRT
float ffx = srcPt.fX;
float ffy = srcPt.fY;
do {
float fdist = sk_float_sqrt(ffx*ffx + ffy*ffy);
unsigned fi = mirror_tileproc(SkFloatToFixed(fdist));
SkASSERT(fi <= 0xFFFF);
*dstC++ = cache[fi >> (16 - kCache32Bits)];
ffx += fdx;
ffy += fdy;
} while (--count != 0);
#else
do {
SkFixed magnitudeSquared = SkFixedSquare(fx) + SkFixedSquare(fy);
if (magnitudeSquared < 0) // Overflow.
magnitudeSquared = SK_FixedMax;
SkFixed dist = SkFixedSqrt(magnitudeSquared);
unsigned fi = mirror_tileproc(dist);
SkASSERT(fi <= 0xFFFF);
*dstC++ = cache[fi >> (16 - kCache32Bits)];
fx += dx;
fy += dy;
} while (--count != 0);
#endif
} else {
SkASSERT(proc == repeat_tileproc);
do {
SkFixed magnitudeSquared = SkFixedSquare(fx) + SkFixedSquare(fy);
if (magnitudeSquared < 0) // Overflow.
magnitudeSquared = SK_FixedMax;
SkFixed dist = SkFixedSqrt(magnitudeSquared);
unsigned fi = repeat_tileproc(dist);
SkASSERT(fi <= 0xFFFF);
*dstC++ = cache[fi >> (16 - kCache32Bits)];
fx += dx;
fy += dy;
} while (--count != 0);
}
} else { // perspective case
SkScalar dstX = SkIntToScalar(x);
SkScalar dstY = SkIntToScalar(y);
do {
dstProc(fDstToIndex, dstX, dstY, &srcPt);
unsigned fi = proc(SkScalarToFixed(srcPt.length()));
SkASSERT(fi <= 0xFFFF);
*dstC++ = cache[fi >> (16 - kCache32Bits)];
dstX += SK_Scalar1;
} while (--count != 0);
}
}
virtual void shadeSpan(int x, int y, SkPMColor* SK_RESTRICT dstC, int count);
virtual void shadeSpan16(int x, int y, uint16_t* SK_RESTRICT dstC, int count) {
SkASSERT(count > 0);
@ -1406,6 +1304,176 @@ private:
const SkScalar fRadius;
};
static inline bool radial_completely_pinned(int fx, int dx, int fy, int dy) {
// fast, overly-conservative test: checks unit square instead
// of unit circle
bool xClamped = (fx >= SK_FixedHalf && dx >= 0) ||
(fx <= -SK_FixedHalf && dx <= 0);
bool yClamped = (fy >= SK_FixedHalf && dy >= 0) ||
(fy <= -SK_FixedHalf && dy <= 0);
return xClamped || yClamped;
}
// Return true if (fx * fy) is always inside the unit circle
// SkPin32 is expensive, but so are all the SkFixedMul in this test,
// so it shouldn't be run if count is small.
static inline bool no_need_for_radial_pin(int fx, int dx,
int fy, int dy, int count) {
SkASSERT(count > 0);
if (SkAbs32(fx) > 0x7FFF || SkAbs32(fy) > 0x7FFF) {
return false;
}
if (fx*fx + fy*fy > 0x7FFF*0x7FFF) {
return false;
}
fx += (count - 1) * dx;
fy += (count - 1) * dy;
if (SkAbs32(fx) > 0x7FFF || SkAbs32(fy) > 0x7FFF) {
return false;
}
return fx*fx + fy*fy <= 0x7FFF*0x7FFF;
}
#define UNPINNED_RADIAL_STEP \
fi = (fx * fx + fy * fy) >> (14 + 16 - kSQRT_TABLE_BITS); \
*dstC++ = cache[sqrt_table[fi] >> (8 - kCache32Bits)]; \
fx += dx; \
fy += dy;
// On Linux, this is faster with SkPMColor[] params than SkPMColor* SK_RESTRICT
static void radial_clamp(SkFixed fx, SkFixed fy, SkFixed dx, SkFixed dy,
SkPMColor* dstC, int count, const SkPMColor* cache,
const int kCache32Bits, const int kCache32Count) {
// Floating point seems to be slower than fixed point,
// even when we have float hardware.
const uint8_t* sqrt_table = gSqrt8Table;
fx >>= 1;
dx >>= 1;
fy >>= 1;
dy >>= 1;
if ((count > 4) && radial_completely_pinned(fx, dx, fy, dy)) {
sk_memset32(dstC, cache[kCache32Count - 1], count);
} else if ((count > 4) &&
no_need_for_radial_pin(fx, dx, fy, dy, count)) {
unsigned fi;
// 4x unroll appears to be no faster than 2x unroll on Linux
while (count > 1) {
UNPINNED_RADIAL_STEP;
UNPINNED_RADIAL_STEP;
count -= 2;
}
if (count) {
UNPINNED_RADIAL_STEP;
}
}
else {
do {
unsigned xx = SkPin32(fx, -0xFFFF >> 1, 0xFFFF >> 1);
unsigned fi = SkPin32(fy, -0xFFFF >> 1, 0xFFFF >> 1);
fi = (xx * xx + fi * fi) >> (14 + 16 - kSQRT_TABLE_BITS);
fi = SkFastMin32(fi, 0xFFFF >> (16 - kSQRT_TABLE_BITS));
*dstC++ = cache[sqrt_table[fi] >> (8 - kCache32Bits)];
fx += dx;
fy += dy;
} while (--count != 0);
}
}
void Radial_Gradient::shadeSpan(int x, int y,
SkPMColor* SK_RESTRICT dstC, int count) {
SkASSERT(count > 0);
SkPoint srcPt;
SkMatrix::MapXYProc dstProc = fDstToIndexProc;
TileProc proc = fTileProc;
const SkPMColor* cache = this->getCache32();
if (fDstToIndexClass != kPerspective_MatrixClass) {
dstProc(fDstToIndex, SkIntToScalar(x) + SK_ScalarHalf,
SkIntToScalar(y) + SK_ScalarHalf, &srcPt);
SkFixed dx, fx = SkScalarToFixed(srcPt.fX);
SkFixed dy, fy = SkScalarToFixed(srcPt.fY);
#ifdef SK_USE_FLOAT_SQRT
float fdx, fdy;
#endif
if (fDstToIndexClass == kFixedStepInX_MatrixClass) {
SkFixed storage[2];
(void)fDstToIndex.fixedStepInX(SkIntToScalar(y), &storage[0], &storage[1]);
dx = storage[0];
dy = storage[1];
#ifdef SK_USE_FLOAT_SQRT
fdx = SkFixedToFloat(storage[0]);
fdy = SkFixedToFloat(storage[1]);
#endif
} else {
SkASSERT(fDstToIndexClass == kLinear_MatrixClass);
dx = SkScalarToFixed(fDstToIndex.getScaleX());
dy = SkScalarToFixed(fDstToIndex.getSkewY());
#ifdef SK_USE_FLOAT_SQRT
fdx = fDstToIndex.getScaleX();
fdy = fDstToIndex.getSkewY();
#endif
}
if (proc == clamp_tileproc) {
radial_clamp(fx, fy, dx, dy, dstC, count, cache,
kCache32Bits, kCache32Count);
} else if (proc == mirror_tileproc) {
#ifdef SK_USE_FLOAT_SQRT
float ffx = srcPt.fX;
float ffy = srcPt.fY;
do {
float fdist = sk_float_sqrt(ffx*ffx + ffy*ffy);
unsigned fi = mirror_tileproc(SkFloatToFixed(fdist));
SkASSERT(fi <= 0xFFFF);
*dstC++ = cache[fi >> (16 - kCache32Bits)];
ffx += fdx;
ffy += fdy;
} while (--count != 0);
#else
do {
SkFixed magnitudeSquared = SkFixedSquare(fx) +
SkFixedSquare(fy);
if (magnitudeSquared < 0) // Overflow.
magnitudeSquared = SK_FixedMax;
SkFixed dist = SkFixedSqrt(magnitudeSquared);
unsigned fi = mirror_tileproc(dist);
SkASSERT(fi <= 0xFFFF);
*dstC++ = cache[fi >> (16 - kCache32Bits)];
fx += dx;
fy += dy;
} while (--count != 0);
#endif
} else {
SkASSERT(proc == repeat_tileproc);
do {
SkFixed magnitudeSquared = SkFixedSquare(fx) +
SkFixedSquare(fy);
if (magnitudeSquared < 0) // Overflow.
magnitudeSquared = SK_FixedMax;
SkFixed dist = SkFixedSqrt(magnitudeSquared);
unsigned fi = repeat_tileproc(dist);
SkASSERT(fi <= 0xFFFF);
*dstC++ = cache[fi >> (16 - kCache32Bits)];
fx += dx;
fy += dy;
} while (--count != 0);
}
} else { // perspective case
SkScalar dstX = SkIntToScalar(x);
SkScalar dstY = SkIntToScalar(y);
do {
dstProc(fDstToIndex, dstX, dstY, &srcPt);
unsigned fi = proc(SkScalarToFixed(srcPt.length()));
SkASSERT(fi <= 0xFFFF);
*dstC++ = cache[fi >> (16 - kCache32Bits)];
dstX += SK_Scalar1;
} while (--count != 0);
}
}
/* Two-point radial gradients are specified by two circles, each with a center
point and radius. The gradient can be considered to be a series of
concentric circles, with the color interpolated from the start circle