Bug 577743 - Scale videos at YCbCr to RGB conversion time - r=roc a=blocking

This commit is contained in:
Chris Double 2010-08-23 13:47:33 +12:00
Родитель b4efbff0b5
Коммит d2b3e251e9
14 изменённых файлов: 1769 добавлений и 19 удалений

Просмотреть файл

@ -50,7 +50,6 @@
#include "nsAutoLock.h"
#include "nsIRenderingContext.h"
#include "gfxContext.h"
#include "gfxImageSurface.h"
#include "nsPresContext.h"
#include "nsDOMError.h"
#include "nsDisplayList.h"
@ -58,10 +57,6 @@
#include "nsSVGEffects.h"
#endif
#if defined(XP_MACOSX)
#include "gfxQuartzImageSurface.h"
#endif
// Number of milliseconds between progress events as defined by spec
#define PROGRESS_MS 350

Просмотреть файл

@ -114,6 +114,7 @@ class THEBES_API ImageContainer {
THEBES_INLINE_DECL_THREADSAFE_REFCOUNTING(ImageContainer)
public:
ImageContainer() {}
virtual ~ImageContainer() {}
/**
@ -179,6 +180,13 @@ public:
*/
virtual PRBool SetLayerManager(LayerManager *aManager) = 0;
/**
* Sets a size that the image is expected to be rendered at.
* This is a hint for image backends to optimize scaling.
* Default implementation in this class is to ignore the hint.
*/
virtual void SetScaleHint(const gfxIntSize& /* aScaleHint */) { }
protected:
LayerManager* mManager;

Просмотреть файл

@ -104,8 +104,13 @@ protected:
*/
class BasicPlanarYCbCrImage : public PlanarYCbCrImage, public BasicImageImplData {
public:
BasicPlanarYCbCrImage() :
PlanarYCbCrImage(static_cast<BasicImageImplData*>(this))
/**
* aScaleHint is a size that the image is expected to be rendered at.
* This is a hint for image backends to optimize scaling.
*/
BasicPlanarYCbCrImage(const gfxIntSize& aScaleHint) :
PlanarYCbCrImage(static_cast<BasicImageImplData*>(this)),
mScaleHint(aScaleHint)
{}
virtual void SetData(const Data& aData);
@ -115,6 +120,7 @@ public:
protected:
nsAutoArrayPtr<PRUint8> mBuffer;
nsCountedRef<nsMainThreadSurfaceRef> mSurface;
gfxIntSize mScaleHint;
};
void
@ -125,8 +131,13 @@ BasicPlanarYCbCrImage::SetData(const Data& aData)
NS_ERROR("Illegal width or height");
return;
}
size_t size = aData.mPicSize.width*aData.mPicSize.height*4;
mBuffer = new PRUint8[size];
// 'prescale' is true if the scaling is to be done as part of the
// YCbCr to RGB conversion rather than on the RGB data when rendered.
PRBool prescale = mScaleHint.width > 0 && mScaleHint.height > 0;
gfxIntSize size(prescale ? mScaleHint.width : aData.mPicSize.width,
prescale ? mScaleHint.height : aData.mPicSize.height);
mBuffer = new PRUint8[size.width * size.height * 4];
if (!mBuffer) {
// out of memory
return;
@ -149,20 +160,37 @@ BasicPlanarYCbCrImage::SetData(const Data& aData)
NS_ERROR("YCbCr format not supported");
}
// Convert from YCbCr to RGB now
gfx::ConvertYCbCrToRGB32(aData.mYChannel,
// Convert from YCbCr to RGB now, scaling the image if needed.
if (size != aData.mPicSize) {
gfx::ScaleYCbCrToRGB32(aData.mYChannel,
aData.mCbChannel,
aData.mCrChannel,
mBuffer,
aData.mPicX,
aData.mPicY,
aData.mPicSize.width,
aData.mPicSize.height,
size.width,
size.height,
aData.mYStride,
aData.mCbCrStride,
aData.mPicSize.width*4,
type);
mSize = aData.mPicSize;
size.width*4,
type,
gfx::ROTATE_0);
}
else {
gfx::ConvertYCbCrToRGB32(aData.mYChannel,
aData.mCbChannel,
aData.mCrChannel,
mBuffer,
aData.mPicX,
aData.mPicY,
aData.mPicSize.width,
aData.mPicSize.height,
aData.mYStride,
aData.mCbCrStride,
aData.mPicSize.width*4,
type);
}
mSize = size;
}
static cairo_user_data_key_t imageSurfaceDataKey;
@ -218,7 +246,8 @@ BasicPlanarYCbCrImage::GetAsSurface()
class BasicImageContainer : public ImageContainer {
public:
BasicImageContainer(BasicLayerManager* aManager) :
ImageContainer(aManager), mMonitor("BasicImageContainer")
ImageContainer(aManager), mMonitor("BasicImageContainer"),
mScaleHint(-1, -1)
{}
virtual already_AddRefed<Image> CreateImage(const Image::Format* aFormats,
PRUint32 aNumFormats);
@ -227,10 +256,12 @@ public:
virtual already_AddRefed<gfxASurface> GetCurrentAsSurface(gfxIntSize* aSize);
virtual gfxIntSize GetCurrentSize();
virtual PRBool SetLayerManager(LayerManager *aManager);
virtual void SetScaleHint(const gfxIntSize& aScaleHint);
protected:
Monitor mMonitor;
nsRefPtr<Image> mImage;
gfxIntSize mScaleHint;
};
/**
@ -257,7 +288,8 @@ BasicImageContainer::CreateImage(const Image::Format* aFormats,
if (FormatInList(aFormats, aNumFormats, Image::CAIRO_SURFACE)) {
image = new BasicCairoImage();
} else if (FormatInList(aFormats, aNumFormats, Image::PLANAR_YCBCR)) {
image = new BasicPlanarYCbCrImage();
MonitorAutoEnter mon(mMonitor);
image = new BasicPlanarYCbCrImage(mScaleHint);
}
return image.forget();
}
@ -303,6 +335,12 @@ BasicImageContainer::GetCurrentSize()
return !mImage ? gfxIntSize(0,0) : ToImageData(mImage)->GetSize();
}
void BasicImageContainer::SetScaleHint(const gfxIntSize& aScaleHint)
{
MonitorAutoEnter mon(mMonitor);
mScaleHint = aScaleHint;
}
PRBool
BasicImageContainer::SetLayerManager(LayerManager *aManager)
{

Просмотреть файл

@ -21,3 +21,4 @@ yv24.patch: Adds YCbCr 4:4:4 support
row_c_fix.patch: Fix broken C fallback code (See bug 561385).
bug572034_mac_64bit.patch: Fix x86_64 linux code so it works on OS X.
solaris.patch: Adds Solaris support, fallback to C implementation on SPARC
add_scale.patch: re-adds Chromium scaling code

953
gfx/ycbcr/add_scale.patch Normal file
Просмотреть файл

@ -0,0 +1,953 @@
diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
index 40ce10f..7d46629 100644
--- a/gfx/ycbcr/yuv_convert.cpp
+++ b/gfx/ycbcr/yuv_convert.cpp
@@ -82,10 +82,139 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* y_buf,
#ifdef ARCH_CPU_X86_FAMILY
// MMX used for FastConvertYUVToRGB32Row requires emms instruction.
if (has_mmx)
EMMS();
#endif
}
+// Scale a frame of YUV to 32 bit ARGB.
+void ScaleYCbCrToRGB32(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int height,
+ int scaled_width,
+ int scaled_height,
+ int y_pitch,
+ int uv_pitch,
+ int rgb_pitch,
+ YUVType yuv_type,
+ Rotate view_rotate) {
+ unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
+ unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
+ bool has_mmx = supports_mmx();
+ // Diagram showing origin and direction of source sampling.
+ // ->0 4<-
+ // 7 3
+ //
+ // 6 5
+ // ->1 2<-
+ // Rotations that start at right side of image.
+ if ((view_rotate == ROTATE_180) ||
+ (view_rotate == ROTATE_270) ||
+ (view_rotate == MIRROR_ROTATE_0) ||
+ (view_rotate == MIRROR_ROTATE_90)) {
+ y_buf += width - 1;
+ u_buf += width / 2 - 1;
+ v_buf += width / 2 - 1;
+ width = -width;
+ }
+ // Rotations that start at bottom of image.
+ if ((view_rotate == ROTATE_90) ||
+ (view_rotate == ROTATE_180) ||
+ (view_rotate == MIRROR_ROTATE_90) ||
+ (view_rotate == MIRROR_ROTATE_180)) {
+ y_buf += (height - 1) * y_pitch;
+ u_buf += ((height >> y_shift) - 1) * uv_pitch;
+ v_buf += ((height >> y_shift) - 1) * uv_pitch;
+ height = -height;
+ }
+
+ // Handle zero sized destination.
+ if (scaled_width == 0 || scaled_height == 0)
+ return;
+ int scaled_dx = width * 16 / scaled_width;
+ int scaled_dy = height * 16 / scaled_height;
+
+ int scaled_dx_uv = scaled_dx;
+
+ if ((view_rotate == ROTATE_90) ||
+ (view_rotate == ROTATE_270)) {
+ int tmp = scaled_height;
+ scaled_height = scaled_width;
+ scaled_width = tmp;
+ tmp = height;
+ height = width;
+ width = tmp;
+ int original_dx = scaled_dx;
+ int original_dy = scaled_dy;
+ scaled_dx = ((original_dy >> 4) * y_pitch) << 4;
+ scaled_dx_uv = ((original_dy >> 4) * uv_pitch) << 4;
+ scaled_dy = original_dx;
+ if (view_rotate == ROTATE_90) {
+ y_pitch = -1;
+ uv_pitch = -1;
+ height = -height;
+ } else {
+ y_pitch = 1;
+ uv_pitch = 1;
+ }
+ }
+
+ for (int y = 0; y < scaled_height; ++y) {
+ uint8* dest_pixel = rgb_buf + y * rgb_pitch;
+ int scaled_y = (y * height / scaled_height);
+ const uint8* y_ptr = y_buf + scaled_y * y_pitch;
+ const uint8* u_ptr = u_buf + (scaled_y >> y_shift) * uv_pitch;
+ const uint8* v_ptr = v_buf + (scaled_y >> y_shift) * uv_pitch;
+
+#if defined(_MSC_VER)
+ if (scaled_width == (width * 2)) {
+ DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width);
+ } else if ((scaled_dx & 15) == 0) { // Scaling by integer scale factor.
+ if (scaled_dx_uv == scaled_dx) { // Not rotated.
+ if (scaled_dx == 16) { // Not scaled
+ if (has_mmx)
+ FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width);
+ else
+ FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width, x_shift);
+ } else { // Simple scale down. ie half
+ ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width, scaled_dx >> 4);
+ }
+ } else {
+ RotateConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width,
+ scaled_dx >> 4, scaled_dx_uv >> 4);
+ }
+#else
+ if (scaled_dx == 16) { // Not scaled
+ if (has_mmx)
+ FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width);
+ else
+ FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width, x_shift);
+#endif
+ } else {
+ if (has_mmx)
+ ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width, scaled_dx);
+ else
+ ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width, scaled_dx, x_shift);
+
+ }
+ }
+
+ // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
+ if (has_mmx)
+ EMMS();
+}
+
} // namespace gfx
} // namespace mozilla
diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h
index c0b678d..a7e5b68 100644
--- a/gfx/ycbcr/yuv_convert.h
+++ b/gfx/ycbcr/yuv_convert.h
@@ -15,27 +15,56 @@ namespace gfx {
// Type of YUV surface.
// The value of these enums matter as they are used to shift vertical indices.
enum YUVType {
YV12 = 0, // YV12 is half width and half height chroma channels.
YV16 = 1, // YV16 is half width and full height chroma channels.
YV24 = 2 // YV24 is full width and full height chroma channels.
};
+// Mirror means flip the image horizontally, as in looking in a mirror.
+// Rotate happens after mirroring.
+enum Rotate {
+ ROTATE_0, // Rotation off.
+ ROTATE_90, // Rotate clockwise.
+ ROTATE_180, // Rotate upside down.
+ ROTATE_270, // Rotate counter clockwise.
+ MIRROR_ROTATE_0, // Mirror horizontally.
+ MIRROR_ROTATE_90, // Mirror then Rotate clockwise.
+ MIRROR_ROTATE_180, // Mirror vertically.
+ MIRROR_ROTATE_270 // Transpose.
+};
+
// Convert a frame of YUV to 32 bit ARGB.
// Pass in YV16/YV12 depending on source format
NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* yplane,
const uint8* uplane,
const uint8* vplane,
uint8* rgbframe,
int pic_x,
int pic_y,
int pic_width,
int pic_height,
int ystride,
int uvstride,
int rgbstride,
YUVType yuv_type);
+// Scale a frame of YUV to 32 bit ARGB.
+// Supports rotation and mirroring.
+void ScaleYCbCrToRGB32(const uint8* yplane,
+ const uint8* uplane,
+ const uint8* vplane,
+ uint8* rgbframe,
+ int frame_width,
+ int frame_height,
+ int scaled_width,
+ int scaled_height,
+ int ystride,
+ int uvstride,
+ int rgbstride,
+ YUVType yuv_type,
+ Rotate view_rotate);
+
} // namespace gfx
} // namespace mozilla
#endif // MEDIA_BASE_YUV_CONVERT_H_
diff --git a/gfx/ycbcr/yuv_row.h b/gfx/ycbcr/yuv_row.h
index 8519008..96969ec 100644
--- a/gfx/ycbcr/yuv_row.h
+++ b/gfx/ycbcr/yuv_row.h
@@ -24,16 +24,64 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
unsigned int x_shift);
+// Can do 1x, half size or any scale down by an integer amount.
+// Step can be negative (mirroring, rotate 180).
+// This is the third fastest of the scalers.
+void ConvertYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int step);
+
+// Rotate is like Convert, but applies different step to Y versus U and V.
+// This allows rotation by 90 or 270, by stepping by stride.
+// This is the forth fastest of the scalers.
+void RotateConvertYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int ystep,
+ int uvstep);
+
+// Doubler does 4 pixels at a time. Each pixel is replicated.
+// This is the fastest of the scalers.
+void DoubleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width);
+
+// Handles arbitrary scaling up or down.
+// Mirroring is supported, but not 90 or 270 degree rotation.
+// Chroma is under sampled every 2 pixels for performance.
+// This is the slowest of the scalers.
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx);
+
+void ScaleYUVToRGB32Row_C(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx,
+ unsigned int x_shift);
+
} // extern "C"
// x64 uses MMX2 (SSE) so emms is not required.
#if defined(ARCH_CPU_X86)
#if defined(_MSC_VER)
#define EMMS() __asm emms
#else
#define EMMS() asm("emms")
diff --git a/gfx/ycbcr/yuv_row_c.cpp b/gfx/ycbcr/yuv_row_c.cpp
index b5c0018..49eced2 100644
--- a/gfx/ycbcr/yuv_row_c.cpp
+++ b/gfx/ycbcr/yuv_row_c.cpp
@@ -172,10 +172,31 @@ void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
v = v_buf[x + 1];
}
YuvPixel(y1, u, v, rgb_buf + 4);
}
rgb_buf += 8; // Advance 2 pixels.
}
}
+// 28.4 fixed point is used. A shift by 4 isolates the integer.
+// A shift by 5 is used to further subsample the chrominence channels.
+// & 15 isolates the fixed point fraction. >> 2 to get the upper 2 bits,
+// for 1/4 pixel accurate interpolation.
+void ScaleYUVToRGB32Row_C(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx,
+ unsigned int x_shift) {
+ int scaled_x = 0;
+ for (int x = 0; x < width; ++x) {
+ uint8 u = u_buf[scaled_x >> (4 + x_shift)];
+ uint8 v = v_buf[scaled_x >> (4 + x_shift)];
+ uint8 y0 = y_buf[scaled_x >> 4];
+ YuvPixel(y0, u, v, rgb_buf);
+ rgb_buf += 4;
+ scaled_x += scaled_dx;
+ }
+}
} // extern "C"
diff --git a/gfx/ycbcr/yuv_row_linux.cpp b/gfx/ycbcr/yuv_row_linux.cpp
index 9f7625c..bff02b3 100644
--- a/gfx/ycbcr/yuv_row_linux.cpp
+++ b/gfx/ycbcr/yuv_row_linux.cpp
@@ -16,16 +16,24 @@ extern "C" {
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx) {
+ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
+}
#else
#define RGBY(i) { \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
0 \
}
@@ -365,16 +373,86 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
"r"(u_buf), // %1
"r"(v_buf), // %2
"r"(rgb_buf), // %3
"r"(width), // %4
"r" (kCoefficientsRgbY) // %5
: "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
);
}
+
+void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi
+ const uint8* u_buf, // rsi
+ const uint8* v_buf, // rdx
+ uint8* rgb_buf, // rcx
+ int width, // r8
+ int scaled_dx) { // r9
+ asm(
+ "xor %%r11,%%r11\n"
+ "sub $0x2,%4\n"
+ "js scalenext\n"
+
+"scaleloop:"
+ "mov %%r11,%%r10\n"
+ "sar $0x5,%%r10\n"
+ "movzb (%1,%%r10,1),%%rax\n"
+ "movq 2048(%5,%%rax,8),%%xmm0\n"
+ "movzb (%2,%%r10,1),%%rax\n"
+ "movq 4096(%5,%%rax,8),%%xmm1\n"
+ "lea (%%r11,%6),%%r10\n"
+ "sar $0x4,%%r11\n"
+ "movzb (%0,%%r11,1),%%rax\n"
+ "paddsw %%xmm1,%%xmm0\n"
+ "movq (%5,%%rax,8),%%xmm1\n"
+ "lea (%%r10,%6),%%r11\n"
+ "sar $0x4,%%r10\n"
+ "movzb (%0,%%r10,1),%%rax\n"
+ "movq (%5,%%rax,8),%%xmm2\n"
+ "paddsw %%xmm0,%%xmm1\n"
+ "paddsw %%xmm0,%%xmm2\n"
+ "shufps $0x44,%%xmm2,%%xmm1\n"
+ "psraw $0x6,%%xmm1\n"
+ "packuswb %%xmm1,%%xmm1\n"
+ "movq %%xmm1,0x0(%3)\n"
+ "add $0x8,%3\n"
+ "sub $0x2,%4\n"
+ "jns scaleloop\n"
+
+"scalenext:"
+ "add $0x1,%4\n"
+ "js scaledone\n"
+
+ "mov %%r11,%%r10\n"
+ "sar $0x5,%%r10\n"
+ "movzb (%1,%%r10,1),%%rax\n"
+ "movq 2048(%5,%%rax,8),%%xmm0\n"
+ "movzb (%2,%%r10,1),%%rax\n"
+ "movq 4096(%5,%%rax,8),%%xmm1\n"
+ "paddsw %%xmm1,%%xmm0\n"
+ "sar $0x4,%%r11\n"
+ "movzb (%0,%%r11,1),%%rax\n"
+ "movq (%5,%%rax,8),%%xmm1\n"
+ "paddsw %%xmm0,%%xmm1\n"
+ "psraw $0x6,%%xmm1\n"
+ "packuswb %%xmm1,%%xmm1\n"
+ "movd %%xmm1,0x0(%3)\n"
+
+"scaledone:"
+ :
+ : "r"(y_buf), // %0
+ "r"(u_buf), // %1
+ "r"(v_buf), // %2
+ "r"(rgb_buf), // %3
+ "r"(width), // %4
+ "r" (kCoefficientsRgbY), // %5
+ "r"(static_cast<long>(scaled_dx)) // %6
+ : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2"
+);
+}
+
#endif // __SUNPRO_CC
#else // ARCH_CPU_X86_64
#ifdef __SUNPRO_CC
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
@@ -493,13 +571,87 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
"packuswb %mm1,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"2:"
"popa\n"
"ret\n"
".previous\n"
);
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx);
+
+ asm(
+ ".global ScaleYUVToRGB32Row\n"
+"ScaleYUVToRGB32Row:\n"
+ "pusha\n"
+ "mov 0x24(%esp),%edx\n"
+ "mov 0x28(%esp),%edi\n"
+ "mov 0x2c(%esp),%esi\n"
+ "mov 0x30(%esp),%ebp\n"
+ "mov 0x34(%esp),%ecx\n"
+ "xor %ebx,%ebx\n"
+ "jmp scaleend\n"
+
+"scaleloop:"
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%edi,%eax,1),%eax\n"
+ "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%esi,%eax,1),%eax\n"
+ "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "add 0x38(%esp),%ebx\n"
+ "sar $0x4,%eax\n"
+ "movzbl (%edx,%eax,1),%eax\n"
+ "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
+ "mov %ebx,%eax\n"
+ "add 0x38(%esp),%ebx\n"
+ "sar $0x4,%eax\n"
+ "movzbl (%edx,%eax,1),%eax\n"
+ "movq kCoefficientsRgbY(,%eax,8),%mm2\n"
+ "paddsw %mm0,%mm1\n"
+ "paddsw %mm0,%mm2\n"
+ "psraw $0x6,%mm1\n"
+ "psraw $0x6,%mm2\n"
+ "packuswb %mm2,%mm1\n"
+ "movntq %mm1,0x0(%ebp)\n"
+ "add $0x8,%ebp\n"
+"scaleend:"
+ "sub $0x2,%ecx\n"
+ "jns scaleloop\n"
+
+ "and $0x1,%ecx\n"
+ "je scaledone\n"
+
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%edi,%eax,1),%eax\n"
+ "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%esi,%eax,1),%eax\n"
+ "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "sar $0x4,%eax\n"
+ "movzbl (%edx,%eax,1),%eax\n"
+ "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
+ "paddsw %mm0,%mm1\n"
+ "psraw $0x6,%mm1\n"
+ "packuswb %mm1,%mm1\n"
+ "movd %mm1,0x0(%ebp)\n"
+
+"scaledone:"
+ "popa\n"
+ "ret\n"
+);
+
#endif // __SUNPRO_CC
#endif // ARCH_CPU_X86_64
#endif // !ARCH_CPU_X86_FAMILY
} // extern "C"
diff --git a/gfx/ycbcr/yuv_row_mac.cpp b/gfx/ycbcr/yuv_row_mac.cpp
index a1d0058..5acf825 100644
--- a/gfx/ycbcr/yuv_row_mac.cpp
+++ b/gfx/ycbcr/yuv_row_mac.cpp
@@ -16,16 +16,24 @@ extern "C" {
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx) {
+ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
+}
#else
#define RGBY(i) { \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
0 \
}
@@ -313,11 +321,96 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
MacConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width,
&kCoefficientsRgbY[0][0]);
}
+extern void MacScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx,
+ int16 *kCoefficientsRgbY);
+
+ __asm__(
+"_MacScaleYUVToRGB32Row:\n"
+ "pusha\n"
+ "mov 0x24(%esp),%edx\n"
+ "mov 0x28(%esp),%edi\n"
+ "mov 0x2c(%esp),%esi\n"
+ "mov 0x30(%esp),%ebp\n"
+ "mov 0x3c(%esp),%ecx\n"
+ "xor %ebx,%ebx\n"
+ "jmp Lscaleend\n"
+
+"Lscaleloop:"
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%edi,%eax,1),%eax\n"
+ "movq 2048(%ecx,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%esi,%eax,1),%eax\n"
+ "paddsw 4096(%ecx,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "add 0x38(%esp),%ebx\n"
+ "sar $0x4,%eax\n"
+ "movzbl (%edx,%eax,1),%eax\n"
+ "movq 0(%ecx,%eax,8),%mm1\n"
+ "mov %ebx,%eax\n"
+ "add 0x38(%esp),%ebx\n"
+ "sar $0x4,%eax\n"
+ "movzbl (%edx,%eax,1),%eax\n"
+ "movq 0(%ecx,%eax,8),%mm2\n"
+ "paddsw %mm0,%mm1\n"
+ "paddsw %mm0,%mm2\n"
+ "psraw $0x6,%mm1\n"
+ "psraw $0x6,%mm2\n"
+ "packuswb %mm2,%mm1\n"
+ "movntq %mm1,0x0(%ebp)\n"
+ "add $0x8,%ebp\n"
+"Lscaleend:"
+ "sub $0x2,0x34(%esp)\n"
+ "jns Lscaleloop\n"
+
+ "and $0x1,0x34(%esp)\n"
+ "je Lscaledone\n"
+
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%edi,%eax,1),%eax\n"
+ "movq 2048(%ecx,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%esi,%eax,1),%eax\n"
+ "paddsw 4096(%ecx,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "sar $0x4,%eax\n"
+ "movzbl (%edx,%eax,1),%eax\n"
+ "movq 0(%ecx,%eax,8),%mm1\n"
+ "paddsw %mm0,%mm1\n"
+ "psraw $0x6,%mm1\n"
+ "packuswb %mm1,%mm1\n"
+ "movd %mm1,0x0(%ebp)\n"
+
+"Lscaledone:"
+ "popa\n"
+ "ret\n"
+);
+
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx) {
+
+ MacScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx,
+ &kCoefficientsRgbY[0][0]);
+}
+
#endif // ARCH_CPU_PPC || ARCH_CPU_64_BITS
} // extern "C"
diff --git a/gfx/ycbcr/yuv_row_win.cpp b/gfx/ycbcr/yuv_row_win.cpp
index 699ac77..a1700fc 100644
--- a/gfx/ycbcr/yuv_row_win.cpp
+++ b/gfx/ycbcr/yuv_row_win.cpp
@@ -11,17 +11,26 @@ extern "C" {
// PPC implementation uses C fallback
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
-
+
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx) {
+ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
+}
+
#else
#define RGBY(i) { \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
0 \
@@ -307,11 +316,280 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
movd [ebp], mm1
convertdone :
popad
ret
}
}
+__declspec(naked)
+void ConvertYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int step) {
+ __asm {
+ pushad
+ mov edx, [esp + 32 + 4] // Y
+ mov edi, [esp + 32 + 8] // U
+ mov esi, [esp + 32 + 12] // V
+ mov ebp, [esp + 32 + 16] // rgb
+ mov ecx, [esp + 32 + 20] // width
+ mov ebx, [esp + 32 + 24] // step
+ jmp wend
+
+ wloop :
+ movzx eax, byte ptr [edi]
+ add edi, ebx
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ movzx eax, byte ptr [esi]
+ add esi, ebx
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ movzx eax, byte ptr [edx]
+ add edx, ebx
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ movzx eax, byte ptr [edx]
+ add edx, ebx
+ movq mm2, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ paddsw mm2, mm0
+ psraw mm1, 6
+ psraw mm2, 6
+ packuswb mm1, mm2
+ movntq [ebp], mm1
+ add ebp, 8
+ wend :
+ sub ecx, 2
+ jns wloop
+
+ and ecx, 1 // odd number of pixels?
+ jz wdone
+
+ movzx eax, byte ptr [edi]
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ movzx eax, byte ptr [esi]
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ movzx eax, byte ptr [edx]
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ psraw mm1, 6
+ packuswb mm1, mm1
+ movd [ebp], mm1
+ wdone :
+
+ popad
+ ret
+ }
+}
+
+__declspec(naked)
+void RotateConvertYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int ystep,
+ int uvstep) {
+ __asm {
+ pushad
+ mov edx, [esp + 32 + 4] // Y
+ mov edi, [esp + 32 + 8] // U
+ mov esi, [esp + 32 + 12] // V
+ mov ebp, [esp + 32 + 16] // rgb
+ mov ecx, [esp + 32 + 20] // width
+ jmp wend
+
+ wloop :
+ movzx eax, byte ptr [edi]
+ mov ebx, [esp + 32 + 28] // uvstep
+ add edi, ebx
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ movzx eax, byte ptr [esi]
+ add esi, ebx
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ movzx eax, byte ptr [edx]
+ mov ebx, [esp + 32 + 24] // ystep
+ add edx, ebx
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ movzx eax, byte ptr [edx]
+ add edx, ebx
+ movq mm2, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ paddsw mm2, mm0
+ psraw mm1, 6
+ psraw mm2, 6
+ packuswb mm1, mm2
+ movntq [ebp], mm1
+ add ebp, 8
+ wend :
+ sub ecx, 2
+ jns wloop
+
+ and ecx, 1 // odd number of pixels?
+ jz wdone
+
+ movzx eax, byte ptr [edi]
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ movzx eax, byte ptr [esi]
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ movzx eax, byte ptr [edx]
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ psraw mm1, 6
+ packuswb mm1, mm1
+ movd [ebp], mm1
+ wdone :
+
+ popad
+ ret
+ }
+}
+
+__declspec(naked)
+void DoubleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width) {
+ __asm {
+ pushad
+ mov edx, [esp + 32 + 4] // Y
+ mov edi, [esp + 32 + 8] // U
+ mov esi, [esp + 32 + 12] // V
+ mov ebp, [esp + 32 + 16] // rgb
+ mov ecx, [esp + 32 + 20] // width
+ jmp wend
+
+ wloop :
+ movzx eax, byte ptr [edi]
+ add edi, 1
+ movzx ebx, byte ptr [esi]
+ add esi, 1
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ movzx eax, byte ptr [edx]
+ paddsw mm0, [kCoefficientsRgbV + 8 * ebx]
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ psraw mm1, 6
+ packuswb mm1, mm1
+ punpckldq mm1, mm1
+ movntq [ebp], mm1
+
+ movzx ebx, byte ptr [edx + 1]
+ add edx, 2
+ paddsw mm0, [kCoefficientsRgbY + 8 * ebx]
+ psraw mm0, 6
+ packuswb mm0, mm0
+ punpckldq mm0, mm0
+ movntq [ebp+8], mm0
+ add ebp, 16
+ wend :
+ sub ecx, 4
+ jns wloop
+
+ add ecx, 4
+ jz wdone
+
+ movzx eax, byte ptr [edi]
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ movzx eax, byte ptr [esi]
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ movzx eax, byte ptr [edx]
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ psraw mm1, 6
+ packuswb mm1, mm1
+ jmp wend1
+
+ wloop1 :
+ movd [ebp], mm1
+ add ebp, 4
+ wend1 :
+ sub ecx, 1
+ jns wloop1
+ wdone :
+ popad
+ ret
+ }
+}
+
+// This version does general purpose scaling by any amount, up or down.
+// The only thing it can not do it rotation by 90 or 270.
+// For performance the chroma is under sampled, reducing cost of a 3x
+// 1080p scale from 8.4 ms to 5.4 ms.
+__declspec(naked)
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int dx) {
+ __asm {
+ pushad
+ mov edx, [esp + 32 + 4] // Y
+ mov edi, [esp + 32 + 8] // U
+ mov esi, [esp + 32 + 12] // V
+ mov ebp, [esp + 32 + 16] // rgb
+ mov ecx, [esp + 32 + 20] // width
+ xor ebx, ebx // x
+ jmp scaleend
+
+ scaleloop :
+ mov eax, ebx
+ sar eax, 5
+ movzx eax, byte ptr [edi + eax]
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ mov eax, ebx
+ sar eax, 5
+ movzx eax, byte ptr [esi + eax]
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ mov eax, ebx
+ add ebx, [esp + 32 + 24] // x += dx
+ sar eax, 4
+ movzx eax, byte ptr [edx + eax]
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ mov eax, ebx
+ add ebx, [esp + 32 + 24] // x += dx
+ sar eax, 4
+ movzx eax, byte ptr [edx + eax]
+ movq mm2, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ paddsw mm2, mm0
+ psraw mm1, 6
+ psraw mm2, 6
+ packuswb mm1, mm2
+ movntq [ebp], mm1
+ add ebp, 8
+ scaleend :
+ sub ecx, 2
+ jns scaleloop
+
+ and ecx, 1 // odd number of pixels?
+ jz scaledone
+
+ mov eax, ebx
+ sar eax, 5
+ movzx eax, byte ptr [edi + eax]
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ mov eax, ebx
+ sar eax, 5
+ movzx eax, byte ptr [esi + eax]
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ mov eax, ebx
+ sar eax, 4
+ movzx eax, byte ptr [edx + eax]
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ psraw mm1, 6
+ packuswb mm1, mm1
+ movd [ebp], mm1
+
+ scaledone :
+ popad
+ ret
+ }
+}
+
#endif // ARCH_CPU_64_BITS
} // extern "C"

Просмотреть файл

@ -15,3 +15,4 @@ patch -p3 <yv24.patch
patch -p3 <row_c_fix.patch
patch -p3 <bug572034_mac_64bit.patch
patch -p3 <bug577645_movntq.patch
patch -p3 <add_scale.patch

Просмотреть файл

@ -89,5 +89,134 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* y_buf,
#endif
}
// Scale a frame of YUV to 32 bit ARGB.
void ScaleYCbCrToRGB32(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int height,
int scaled_width,
int scaled_height,
int y_pitch,
int uv_pitch,
int rgb_pitch,
YUVType yuv_type,
Rotate view_rotate) {
unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
bool has_mmx = supports_mmx();
// Diagram showing origin and direction of source sampling.
// ->0 4<-
// 7 3
//
// 6 5
// ->1 2<-
// Rotations that start at right side of image.
if ((view_rotate == ROTATE_180) ||
(view_rotate == ROTATE_270) ||
(view_rotate == MIRROR_ROTATE_0) ||
(view_rotate == MIRROR_ROTATE_90)) {
y_buf += width - 1;
u_buf += width / 2 - 1;
v_buf += width / 2 - 1;
width = -width;
}
// Rotations that start at bottom of image.
if ((view_rotate == ROTATE_90) ||
(view_rotate == ROTATE_180) ||
(view_rotate == MIRROR_ROTATE_90) ||
(view_rotate == MIRROR_ROTATE_180)) {
y_buf += (height - 1) * y_pitch;
u_buf += ((height >> y_shift) - 1) * uv_pitch;
v_buf += ((height >> y_shift) - 1) * uv_pitch;
height = -height;
}
// Handle zero sized destination.
if (scaled_width == 0 || scaled_height == 0)
return;
int scaled_dx = width * 16 / scaled_width;
int scaled_dy = height * 16 / scaled_height;
int scaled_dx_uv = scaled_dx;
if ((view_rotate == ROTATE_90) ||
(view_rotate == ROTATE_270)) {
int tmp = scaled_height;
scaled_height = scaled_width;
scaled_width = tmp;
tmp = height;
height = width;
width = tmp;
int original_dx = scaled_dx;
int original_dy = scaled_dy;
scaled_dx = ((original_dy >> 4) * y_pitch) << 4;
scaled_dx_uv = ((original_dy >> 4) * uv_pitch) << 4;
scaled_dy = original_dx;
if (view_rotate == ROTATE_90) {
y_pitch = -1;
uv_pitch = -1;
height = -height;
} else {
y_pitch = 1;
uv_pitch = 1;
}
}
for (int y = 0; y < scaled_height; ++y) {
uint8* dest_pixel = rgb_buf + y * rgb_pitch;
int scaled_y = (y * height / scaled_height);
const uint8* y_ptr = y_buf + scaled_y * y_pitch;
const uint8* u_ptr = u_buf + (scaled_y >> y_shift) * uv_pitch;
const uint8* v_ptr = v_buf + (scaled_y >> y_shift) * uv_pitch;
#if defined(_MSC_VER)
if (scaled_width == (width * 2)) {
DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
dest_pixel, scaled_width);
} else if ((scaled_dx & 15) == 0) { // Scaling by integer scale factor.
if (scaled_dx_uv == scaled_dx) { // Not rotated.
if (scaled_dx == 16) { // Not scaled
if (has_mmx)
FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
dest_pixel, scaled_width);
else
FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
dest_pixel, scaled_width, x_shift);
} else { // Simple scale down. ie half
ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
dest_pixel, scaled_width, scaled_dx >> 4);
}
} else {
RotateConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
dest_pixel, scaled_width,
scaled_dx >> 4, scaled_dx_uv >> 4);
}
#else
if (scaled_dx == 16) { // Not scaled
if (has_mmx)
FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
dest_pixel, scaled_width);
else
FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
dest_pixel, scaled_width, x_shift);
#endif
} else {
if (has_mmx)
ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
dest_pixel, scaled_width, scaled_dx);
else
ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
dest_pixel, scaled_width, scaled_dx, x_shift);
}
}
// MMX used for FastConvertYUVToRGB32Row requires emms instruction.
if (has_mmx)
EMMS();
}
} // namespace gfx
} // namespace mozilla

Просмотреть файл

@ -20,6 +20,19 @@ enum YUVType {
YV24 = 2 // YV24 is full width and full height chroma channels.
};
// Mirror means flip the image horizontally, as in looking in a mirror.
// Rotate happens after mirroring.
enum Rotate {
ROTATE_0, // Rotation off.
ROTATE_90, // Rotate clockwise.
ROTATE_180, // Rotate upside down.
ROTATE_270, // Rotate counter clockwise.
MIRROR_ROTATE_0, // Mirror horizontally.
MIRROR_ROTATE_90, // Mirror then Rotate clockwise.
MIRROR_ROTATE_180, // Mirror vertically.
MIRROR_ROTATE_270 // Transpose.
};
// Convert a frame of YUV to 32 bit ARGB.
// Pass in YV16/YV12 depending on source format
NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* yplane,
@ -35,6 +48,22 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* yplane,
int rgbstride,
YUVType yuv_type);
// Scale a frame of YUV to 32 bit ARGB.
// Supports rotation and mirroring.
void ScaleYCbCrToRGB32(const uint8* yplane,
const uint8* uplane,
const uint8* vplane,
uint8* rgbframe,
int frame_width,
int frame_height,
int scaled_width,
int scaled_height,
int ystride,
int uvstride,
int rgbstride,
YUVType yuv_type,
Rotate view_rotate);
} // namespace gfx
} // namespace mozilla

Просмотреть файл

@ -29,6 +29,54 @@ void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
unsigned int x_shift);
// Can do 1x, half size or any scale down by an integer amount.
// Step can be negative (mirroring, rotate 180).
// This is the third fastest of the scalers.
void ConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int step);
// Rotate is like Convert, but applies different step to Y versus U and V.
// This allows rotation by 90 or 270, by stepping by stride.
// This is the forth fastest of the scalers.
void RotateConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int ystep,
int uvstep);
// Doubler does 4 pixels at a time. Each pixel is replicated.
// This is the fastest of the scalers.
void DoubleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
// Handles arbitrary scaling up or down.
// Mirroring is supported, but not 90 or 270 degree rotation.
// Chroma is under sampled every 2 pixels for performance.
// This is the slowest of the scalers.
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx);
void ScaleYUVToRGB32Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx,
unsigned int x_shift);
} // extern "C"
// x64 uses MMX2 (SSE) so emms is not required.

Просмотреть файл

@ -177,5 +177,26 @@ void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
}
}
// 28.4 fixed point is used. A shift by 4 isolates the integer.
// A shift by 5 is used to further subsample the chrominence channels.
// & 15 isolates the fixed point fraction. >> 2 to get the upper 2 bits,
// for 1/4 pixel accurate interpolation.
void ScaleYUVToRGB32Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx,
unsigned int x_shift) {
int scaled_x = 0;
for (int x = 0; x < width; ++x) {
uint8 u = u_buf[scaled_x >> (4 + x_shift)];
uint8 v = v_buf[scaled_x >> (4 + x_shift)];
uint8 y0 = y_buf[scaled_x >> 4];
YuvPixel(y0, u, v, rgb_buf);
rgb_buf += 4;
scaled_x += scaled_dx;
}
}
} // extern "C"

Просмотреть файл

@ -21,6 +21,14 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx) {
ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
}
#else
#define RGBY(i) { \
@ -370,6 +378,76 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
: "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
);
}
void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi
const uint8* u_buf, // rsi
const uint8* v_buf, // rdx
uint8* rgb_buf, // rcx
int width, // r8
int scaled_dx) { // r9
asm(
"xor %%r11,%%r11\n"
"sub $0x2,%4\n"
"js scalenext\n"
"scaleloop:"
"mov %%r11,%%r10\n"
"sar $0x5,%%r10\n"
"movzb (%1,%%r10,1),%%rax\n"
"movq 2048(%5,%%rax,8),%%xmm0\n"
"movzb (%2,%%r10,1),%%rax\n"
"movq 4096(%5,%%rax,8),%%xmm1\n"
"lea (%%r11,%6),%%r10\n"
"sar $0x4,%%r11\n"
"movzb (%0,%%r11,1),%%rax\n"
"paddsw %%xmm1,%%xmm0\n"
"movq (%5,%%rax,8),%%xmm1\n"
"lea (%%r10,%6),%%r11\n"
"sar $0x4,%%r10\n"
"movzb (%0,%%r10,1),%%rax\n"
"movq (%5,%%rax,8),%%xmm2\n"
"paddsw %%xmm0,%%xmm1\n"
"paddsw %%xmm0,%%xmm2\n"
"shufps $0x44,%%xmm2,%%xmm1\n"
"psraw $0x6,%%xmm1\n"
"packuswb %%xmm1,%%xmm1\n"
"movq %%xmm1,0x0(%3)\n"
"add $0x8,%3\n"
"sub $0x2,%4\n"
"jns scaleloop\n"
"scalenext:"
"add $0x1,%4\n"
"js scaledone\n"
"mov %%r11,%%r10\n"
"sar $0x5,%%r10\n"
"movzb (%1,%%r10,1),%%rax\n"
"movq 2048(%5,%%rax,8),%%xmm0\n"
"movzb (%2,%%r10,1),%%rax\n"
"movq 4096(%5,%%rax,8),%%xmm1\n"
"paddsw %%xmm1,%%xmm0\n"
"sar $0x4,%%r11\n"
"movzb (%0,%%r11,1),%%rax\n"
"movq (%5,%%rax,8),%%xmm1\n"
"paddsw %%xmm0,%%xmm1\n"
"psraw $0x6,%%xmm1\n"
"packuswb %%xmm1,%%xmm1\n"
"movd %%xmm1,0x0(%3)\n"
"scaledone:"
:
: "r"(y_buf), // %0
"r"(u_buf), // %1
"r"(v_buf), // %2
"r"(rgb_buf), // %3
"r"(width), // %4
"r" (kCoefficientsRgbY), // %5
"r"(static_cast<long>(scaled_dx)) // %6
: "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2"
);
}
#endif // __SUNPRO_CC
#else // ARCH_CPU_X86_64
@ -498,6 +576,80 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
".previous\n"
);
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx);
asm(
".global ScaleYUVToRGB32Row\n"
"ScaleYUVToRGB32Row:\n"
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x34(%esp),%ecx\n"
"xor %ebx,%ebx\n"
"jmp scaleend\n"
"scaleloop:"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%edi,%eax,1),%eax\n"
"movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%esi,%eax,1),%eax\n"
"paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm1\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
"psraw $0x6,%mm2\n"
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
"scaleend:"
"sub $0x2,%ecx\n"
"jns scaleloop\n"
"and $0x1,%ecx\n"
"je scaledone\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%edi,%eax,1),%eax\n"
"movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%esi,%eax,1),%eax\n"
"paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm1\n"
"paddsw %mm0,%mm1\n"
"psraw $0x6,%mm1\n"
"packuswb %mm1,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"scaledone:"
"popa\n"
"ret\n"
);
#endif // __SUNPRO_CC
#endif // ARCH_CPU_X86_64
#endif // !ARCH_CPU_X86_FAMILY

Просмотреть файл

@ -21,6 +21,14 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx) {
ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
}
#else
#define RGBY(i) { \
@ -318,6 +326,91 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
&kCoefficientsRgbY[0][0]);
}
extern void MacScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx,
int16 *kCoefficientsRgbY);
__asm__(
"_MacScaleYUVToRGB32Row:\n"
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x3c(%esp),%ecx\n"
"xor %ebx,%ebx\n"
"jmp Lscaleend\n"
"Lscaleloop:"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%edi,%eax,1),%eax\n"
"movq 2048(%ecx,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%esi,%eax,1),%eax\n"
"paddsw 4096(%ecx,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq 0(%ecx,%eax,8),%mm1\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq 0(%ecx,%eax,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
"psraw $0x6,%mm2\n"
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
"Lscaleend:"
"sub $0x2,0x34(%esp)\n"
"jns Lscaleloop\n"
"and $0x1,0x34(%esp)\n"
"je Lscaledone\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%edi,%eax,1),%eax\n"
"movq 2048(%ecx,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%esi,%eax,1),%eax\n"
"paddsw 4096(%ecx,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq 0(%ecx,%eax,8),%mm1\n"
"paddsw %mm0,%mm1\n"
"psraw $0x6,%mm1\n"
"packuswb %mm1,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"Lscaledone:"
"popa\n"
"ret\n"
);
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx) {
MacScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx,
&kCoefficientsRgbY[0][0]);
}
#endif // ARCH_CPU_PPC || ARCH_CPU_64_BITS
} // extern "C"

Просмотреть файл

@ -16,7 +16,16 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
int width) {
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx) {
ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
}
#else
@ -312,6 +321,275 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
}
}
__declspec(naked)
void ConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int step) {
__asm {
pushad
mov edx, [esp + 32 + 4] // Y
mov edi, [esp + 32 + 8] // U
mov esi, [esp + 32 + 12] // V
mov ebp, [esp + 32 + 16] // rgb
mov ecx, [esp + 32 + 20] // width
mov ebx, [esp + 32 + 24] // step
jmp wend
wloop :
movzx eax, byte ptr [edi]
add edi, ebx
movq mm0, [kCoefficientsRgbU + 8 * eax]
movzx eax, byte ptr [esi]
add esi, ebx
paddsw mm0, [kCoefficientsRgbV + 8 * eax]
movzx eax, byte ptr [edx]
add edx, ebx
movq mm1, [kCoefficientsRgbY + 8 * eax]
movzx eax, byte ptr [edx]
add edx, ebx
movq mm2, [kCoefficientsRgbY + 8 * eax]
paddsw mm1, mm0
paddsw mm2, mm0
psraw mm1, 6
psraw mm2, 6
packuswb mm1, mm2
movntq [ebp], mm1
add ebp, 8
wend :
sub ecx, 2
jns wloop
and ecx, 1 // odd number of pixels?
jz wdone
movzx eax, byte ptr [edi]
movq mm0, [kCoefficientsRgbU + 8 * eax]
movzx eax, byte ptr [esi]
paddsw mm0, [kCoefficientsRgbV + 8 * eax]
movzx eax, byte ptr [edx]
movq mm1, [kCoefficientsRgbY + 8 * eax]
paddsw mm1, mm0
psraw mm1, 6
packuswb mm1, mm1
movd [ebp], mm1
wdone :
popad
ret
}
}
__declspec(naked)
void RotateConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int ystep,
int uvstep) {
__asm {
pushad
mov edx, [esp + 32 + 4] // Y
mov edi, [esp + 32 + 8] // U
mov esi, [esp + 32 + 12] // V
mov ebp, [esp + 32 + 16] // rgb
mov ecx, [esp + 32 + 20] // width
jmp wend
wloop :
movzx eax, byte ptr [edi]
mov ebx, [esp + 32 + 28] // uvstep
add edi, ebx
movq mm0, [kCoefficientsRgbU + 8 * eax]
movzx eax, byte ptr [esi]
add esi, ebx
paddsw mm0, [kCoefficientsRgbV + 8 * eax]
movzx eax, byte ptr [edx]
mov ebx, [esp + 32 + 24] // ystep
add edx, ebx
movq mm1, [kCoefficientsRgbY + 8 * eax]
movzx eax, byte ptr [edx]
add edx, ebx
movq mm2, [kCoefficientsRgbY + 8 * eax]
paddsw mm1, mm0
paddsw mm2, mm0
psraw mm1, 6
psraw mm2, 6
packuswb mm1, mm2
movntq [ebp], mm1
add ebp, 8
wend :
sub ecx, 2
jns wloop
and ecx, 1 // odd number of pixels?
jz wdone
movzx eax, byte ptr [edi]
movq mm0, [kCoefficientsRgbU + 8 * eax]
movzx eax, byte ptr [esi]
paddsw mm0, [kCoefficientsRgbV + 8 * eax]
movzx eax, byte ptr [edx]
movq mm1, [kCoefficientsRgbY + 8 * eax]
paddsw mm1, mm0
psraw mm1, 6
packuswb mm1, mm1
movd [ebp], mm1
wdone :
popad
ret
}
}
__declspec(naked)
void DoubleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
__asm {
pushad
mov edx, [esp + 32 + 4] // Y
mov edi, [esp + 32 + 8] // U
mov esi, [esp + 32 + 12] // V
mov ebp, [esp + 32 + 16] // rgb
mov ecx, [esp + 32 + 20] // width
jmp wend
wloop :
movzx eax, byte ptr [edi]
add edi, 1
movzx ebx, byte ptr [esi]
add esi, 1
movq mm0, [kCoefficientsRgbU + 8 * eax]
movzx eax, byte ptr [edx]
paddsw mm0, [kCoefficientsRgbV + 8 * ebx]
movq mm1, [kCoefficientsRgbY + 8 * eax]
paddsw mm1, mm0
psraw mm1, 6
packuswb mm1, mm1
punpckldq mm1, mm1
movntq [ebp], mm1
movzx ebx, byte ptr [edx + 1]
add edx, 2
paddsw mm0, [kCoefficientsRgbY + 8 * ebx]
psraw mm0, 6
packuswb mm0, mm0
punpckldq mm0, mm0
movntq [ebp+8], mm0
add ebp, 16
wend :
sub ecx, 4
jns wloop
add ecx, 4
jz wdone
movzx eax, byte ptr [edi]
movq mm0, [kCoefficientsRgbU + 8 * eax]
movzx eax, byte ptr [esi]
paddsw mm0, [kCoefficientsRgbV + 8 * eax]
movzx eax, byte ptr [edx]
movq mm1, [kCoefficientsRgbY + 8 * eax]
paddsw mm1, mm0
psraw mm1, 6
packuswb mm1, mm1
jmp wend1
wloop1 :
movd [ebp], mm1
add ebp, 4
wend1 :
sub ecx, 1
jns wloop1
wdone :
popad
ret
}
}
// This version does general purpose scaling by any amount, up or down.
// The only thing it can not do it rotation by 90 or 270.
// For performance the chroma is under sampled, reducing cost of a 3x
// 1080p scale from 8.4 ms to 5.4 ms.
__declspec(naked)
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int dx) {
__asm {
pushad
mov edx, [esp + 32 + 4] // Y
mov edi, [esp + 32 + 8] // U
mov esi, [esp + 32 + 12] // V
mov ebp, [esp + 32 + 16] // rgb
mov ecx, [esp + 32 + 20] // width
xor ebx, ebx // x
jmp scaleend
scaleloop :
mov eax, ebx
sar eax, 5
movzx eax, byte ptr [edi + eax]
movq mm0, [kCoefficientsRgbU + 8 * eax]
mov eax, ebx
sar eax, 5
movzx eax, byte ptr [esi + eax]
paddsw mm0, [kCoefficientsRgbV + 8 * eax]
mov eax, ebx
add ebx, [esp + 32 + 24] // x += dx
sar eax, 4
movzx eax, byte ptr [edx + eax]
movq mm1, [kCoefficientsRgbY + 8 * eax]
mov eax, ebx
add ebx, [esp + 32 + 24] // x += dx
sar eax, 4
movzx eax, byte ptr [edx + eax]
movq mm2, [kCoefficientsRgbY + 8 * eax]
paddsw mm1, mm0
paddsw mm2, mm0
psraw mm1, 6
psraw mm2, 6
packuswb mm1, mm2
movntq [ebp], mm1
add ebp, 8
scaleend :
sub ecx, 2
jns scaleloop
and ecx, 1 // odd number of pixels?
jz scaledone
mov eax, ebx
sar eax, 5
movzx eax, byte ptr [edi + eax]
movq mm0, [kCoefficientsRgbU + 8 * eax]
mov eax, ebx
sar eax, 5
movzx eax, byte ptr [esi + eax]
paddsw mm0, [kCoefficientsRgbV + 8 * eax]
mov eax, ebx
sar eax, 4
movzx eax, byte ptr [edx + eax]
movq mm1, [kCoefficientsRgbY + 8 * eax]
paddsw mm1, mm0
psraw mm1, 6
packuswb mm1, mm1
movd [ebp], mm1
scaledone :
popad
ret
}
}
#endif // ARCH_CPU_64_BITS
} // extern "C"

Просмотреть файл

@ -253,6 +253,10 @@ nsVideoFrame::BuildLayer(nsDisplayListBuilder* aBuilder,
presContext->AppUnitsToGfxUnits(area.width),
presContext->AppUnitsToGfxUnits(area.height));
r = CorrectForAspectRatio(r, videoSize);
r.Round();
gfxIntSize scaleHint(static_cast<PRInt32>(r.Width()),
static_cast<PRInt32>(r.Height()));
container->SetScaleHint(scaleHint);
nsRefPtr<ImageLayer> layer = static_cast<ImageLayer*>
(aBuilder->LayerBuilder()->GetLeafLayerFor(aBuilder, aManager, aItem));