Bug 577645 - Y'CbCr assembly uses movntq which is SSE, not MMX. Change runtime CPU detection to match. r=tterribe

2010-08-02 15:32:14 +12:00 · 2010-08-02 15:32:14 +12:00 · c547b0c0c0
--- a/gfx/ycbcr/bug577645_movntq.patch
+++ b/gfx/ycbcr/bug577645_movntq.patch
@ -0,0 +1,63 @@
+diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
+--- a/gfx/ycbcr/yuv_convert.cpp
+++ b/gfx/ycbcr/yuv_convert.cpp
+@@ -36,19 +36,21 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const 
+                                   int pic_width,
+                                   int pic_height,
+                                   int y_pitch,
+                                   int uv_pitch,
+                                   int rgb_pitch,
+                                   YUVType yuv_type) {
+   unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
+   unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
+-  // There is no optimized YV24 MMX routine so we check for this and
+  // Test for SSE because the optimized code uses movntq, which is not part of MMX.
+  bool has_sse = supports_mmx() && supports_sse();
+  // There is no optimized YV24 SSE routine so we check for this and
+   // fall back to the C code.
+-  bool has_mmx = supports_mmx() && yuv_type != YV24;
+  has_sse &= yuv_type != YV24;
+   bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;
+   int x_width = odd_pic_x ? pic_width - 1 : pic_width;
+ 
+   for (int y = pic_y; y < pic_height + pic_y; ++y) {
+     uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
+     const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
+     const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
+     const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
+@@ -60,32 +62,32 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const 
+                                  u_ptr++,
+                                  v_ptr++,
+                                  rgb_row,
+                                  1,
+                                  x_shift);
+       rgb_row += 4;
+     }
+ 
+-    if (has_mmx)
+    if (has_sse)
+       FastConvertYUVToRGB32Row(y_ptr,
+                                u_ptr,
+                                v_ptr,
+                                rgb_row,
+                                x_width);
+     else
+       FastConvertYUVToRGB32Row_C(y_ptr,
+                                  u_ptr,
+                                  v_ptr,
+                                  rgb_row,
+                                  x_width,
+                                  x_shift);
+   }
+ 
+ #ifdef ARCH_CPU_X86_FAMILY
+-  // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
+-  if (has_mmx)
+  // SSE used for FastConvertYUVToRGB32Row requires emms instruction.
+  if (has_sse)
+     EMMS();
+ #endif
+ }
+ 
+ }  // namespace gfx
+ }  // namespace mozilla
--- a/gfx/ycbcr/update.sh
+++ b/gfx/ycbcr/update.sh
@ -14,3 +14,4 @@ patch -p3 <win64_mac64.patch
 patch -p3 <yv24.patch
 patch -p3 <row_c_fix.patch
 patch -p3 <bug572034_mac_64bit.patch
+patch -p3 <bug577645_movntq.patch
--- a/gfx/ycbcr/yuv_convert.cpp
+++ b/gfx/ycbcr/yuv_convert.cpp
@ -41,9 +41,11 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* y_buf,
                                  YUVType yuv_type) {
  unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
  unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
-  // There is no optimized YV24 MMX routine so we check for this and
+  // Test for SSE because the optimized code uses movntq, which is not part of MMX.
+  bool has_sse = supports_mmx() && supports_sse();
+  // There is no optimized YV24 SSE routine so we check for this and
  // fall back to the C code.
-  bool has_mmx = supports_mmx() && yuv_type != YV24;
+  has_sse &= yuv_type != YV24;
  bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;
  int x_width = odd_pic_x ? pic_width - 1 : pic_width;

@ -65,7 +67,7 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* y_buf,
      rgb_row += 4;
    }

-    if (has_mmx)
+    if (has_sse)
      FastConvertYUVToRGB32Row(y_ptr,
                               u_ptr,
                               v_ptr,
@ -81,8 +83,8 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* y_buf,
  }

 #ifdef ARCH_CPU_X86_FAMILY
-  // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
-  if (has_mmx)
+  // SSE used for FastConvertYUVToRGB32Row requires emms instruction.
+  if (has_sse)
    EMMS();
 #endif
 }