Bug 1414507 - Fix aom compile errors with VS2015. r=rillian, a=me

Import BUG=aomedia:900 Change-Id: Ifb6c437d8d2f309d0717fb50c402618475fe021f Change-Id: I9ca596f677e174ccd12a6e4eb927733d25e76f58 --HG-- extra : amend_source : 19b6118abd6dfc641d780b560f428b6ce2d8040b
2017-11-04 08:03:00 -04:00 · 2017-11-04 08:03:00 -04:00 · 6484a0c1c6
--- a/third_party/aom/aom_dsp/x86/highbd_intrapred_avx2.c
+++ b/third_party/aom/aom_dsp/x86/highbd_intrapred_avx2.c
@ -11,6 +11,7 @@

 #include <immintrin.h>

+#include "aom_ports/msvc.h"
 #include "./aom_dsp_rtcd.h"

 // -----------------------------------------------------------------------------
--- a/third_party/aom/aom_ports/msvc.h
+++ b/third_party/aom/aom_ports/msvc.h
@ -43,5 +43,25 @@ static INLINE long lroundf(float x) {
 }
 #endif  // _MSC_VER < 1800

+#if HAVE_AVX
+#include <immintrin.h>
+// Note:
+// _mm256_insert_epi16 intrinsics is available from vs2017.
+// We define this macro for vs2015 and earlier. The
+// intrinsics used here are in vs2015 document:
+// https://msdn.microsoft.com/en-us/library/hh977022.aspx
+// Input parameters:
+// a: __m256i,
+// d: int16_t,
+// indx: imm8 (0 - 15)
+#if _MSC_VER <= 1900
+#define _mm256_insert_epi16(a, d, indx)                                      \
+  _mm256_insertf128_si256(                                                   \
+      a,                                                                     \
+      _mm_insert_epi16(_mm256_extractf128_si256(a, indx >> 3), d, indx % 8), \
+      indx >> 3)
+#endif  // _MSC_VER <= 1900
+#endif  // HAVE_AVX
+
 #endif  // _MSC_VER
 #endif  // AOM_PORTS_MSVC_H_
--- a/third_party/aom/av1/common/reconinter.c
+++ b/third_party/aom/av1/common/reconinter.c
@ -1728,9 +1728,9 @@ void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
  av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, ctx, bsize);
 }

-void av1_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
-                          BLOCK_SIZE bsize, const YV12_BUFFER_CONFIG *src,
-                          int mi_row, int mi_col) {
+void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
+                          const YV12_BUFFER_CONFIG *src, int mi_row,
+                          int mi_col) {
  const int widths[MAX_MB_PLANE] = { src->y_crop_width, src->uv_crop_width,
                                     src->uv_crop_width };
  const int heights[MAX_MB_PLANE] = { src->y_crop_height, src->uv_crop_height,
--- a/third_party/aom/av1/common/reconinter.h
+++ b/third_party/aom/av1/common/reconinter.h
@ -446,9 +446,9 @@ static INLINE void setup_pred_plane(struct buf_2d *dst, BLOCK_SIZE bsize,
  dst->stride = stride;
 }

-void av1_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
-                          BLOCK_SIZE bsize, const YV12_BUFFER_CONFIG *src,
-                          int mi_row, int mi_col);
+void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
+                          const YV12_BUFFER_CONFIG *src, int mi_row,
+                          int mi_col);

 void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
--- a/third_party/aom/av1/common/thread_common.c
+++ b/third_party/aom/av1/common/thread_common.c
@ -86,7 +86,7 @@ static INLINE void sync_write(AV1LfSync *const lf_sync, int r, int c,

 #if !CONFIG_EXT_PARTITION_TYPES
 static INLINE enum lf_path get_loop_filter_path(
-    int y_only, struct macroblockd_plane planes[MAX_MB_PLANE]) {
+    int y_only, struct macroblockd_plane *planes) {
  if (y_only)
    return LF_PATH_444;
  else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
@ -98,7 +98,7 @@ static INLINE enum lf_path get_loop_filter_path(
 }

 static INLINE void loop_filter_block_plane_ver(
-    AV1_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int plane,
+    AV1_COMMON *cm, struct macroblockd_plane *planes, int plane,
    MODE_INFO **mi, int mi_row, int mi_col, enum lf_path path,
    LOOP_FILTER_MASK *lfm) {
  if (plane == 0) {
@ -120,7 +120,7 @@ static INLINE void loop_filter_block_plane_ver(
 }

 static INLINE void loop_filter_block_plane_hor(
-    AV1_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int plane,
+    AV1_COMMON *cm, struct macroblockd_plane *planes, int plane,
    MODE_INFO **mi, int mi_row, int mi_col, enum lf_path path,
    LOOP_FILTER_MASK *lfm) {
  if (plane == 0) {
@ -286,10 +286,9 @@ static int loop_filter_row_worker(AV1LfSync *const lf_sync,
 #endif  //  CONFIG_PARALLEL_DEBLOCKING

 static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
-                                struct macroblockd_plane planes[MAX_MB_PLANE],
-                                int start, int stop, int y_only,
-                                AVxWorker *workers, int nworkers,
-                                AV1LfSync *lf_sync) {
+                                struct macroblockd_plane *planes, int start,
+                                int stop, int y_only, AVxWorker *workers,
+                                int nworkers, AV1LfSync *lf_sync) {
 #if CONFIG_EXT_PARTITION
  printf(
      "STOPPING: This code has not been modified to work with the "
@ -415,7 +414,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
 }

 void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
-                              struct macroblockd_plane planes[MAX_MB_PLANE],
+                              struct macroblockd_plane *planes,
                              int frame_filter_level,
 #if CONFIG_LOOPFILTER_LEVEL
                              int frame_filter_level_r,
--- a/third_party/aom/av1/common/thread_common.h
+++ b/third_party/aom/av1/common/thread_common.h
@ -49,7 +49,7 @@ void av1_loop_filter_dealloc(AV1LfSync *lf_sync);

 // Multi-threaded loopfilter that uses the tile threads.
 void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
-                              struct macroblockd_plane planes[MAX_MB_PLANE],
+                              struct macroblockd_plane *planes,
                              int frame_filter_level,
 #if CONFIG_LOOPFILTER_LEVEL
                              int frame_filter_level_r,
--- a/third_party/aom/av1/common/x86/selfguided_sse4.c
+++ b/third_party/aom/av1/common/x86/selfguided_sse4.c
@ -10,9 +10,11 @@
   av1_selfguided_restoration)
 */
 static void calc_block(__m128i sum, __m128i sum_sq, __m128i n,
-                       __m128i one_over_n, __m128i s, int bit_depth, int idx,
-                       int32_t *A, int32_t *B) {
+                       __m128i *one_over_n_, __m128i *s_, int bit_depth,
+                       int idx, int32_t *A, int32_t *B) {
  __m128i a, b, p;
+  __m128i one_over_n = *one_over_n_;
+  __m128i s = *s_;
 #if CONFIG_HIGHBITDEPTH
  if (bit_depth > 8) {
    __m128i rounding_a = _mm_set1_epi32((1 << (2 * (bit_depth - 8))) >> 1);
@ -147,7 +149,7 @@ static void selfguided_restoration_1_h(int32_t *A, int32_t *B, int width,
    __m128i s = _mm_set_epi32(
        sgrproj_mtable[eps - 1][3 * h - 1], sgrproj_mtable[eps - 1][3 * h - 1],
        sgrproj_mtable[eps - 1][3 * h - 1], sgrproj_mtable[eps - 1][2 * h - 1]);
-    calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride, A,
+    calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride, A,
               B);

    n = _mm_set1_epi32(3 * h);
@ -178,8 +180,8 @@ static void selfguided_restoration_1_h(int32_t *A, int32_t *B, int width,
                                             _mm_alignr_epi8(b2, b1, 8)));
      sum_sq_ = _mm_add_epi32(a1, _mm_add_epi32(_mm_alignr_epi8(a2, a1, 4),
                                                _mm_alignr_epi8(a2, a1, 8)));
-      calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride + j,
-                 A, B);
+      calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth,
+                 i * buf_stride + j, A, B);
    }
    __m128i a3 = _mm_loadu_si128((__m128i *)&A[i * buf_stride + j + 3]);
    __m128i b3 = _mm_loadu_si128((__m128i *)&B[i * buf_stride + j + 3]);
@ -227,7 +229,7 @@ static void selfguided_restoration_1_h(int32_t *A, int32_t *B, int width,
    s = _mm_set_epi32(
        sgrproj_mtable[eps - 1][2 * h - 1], sgrproj_mtable[eps - 1][3 * h - 1],
        sgrproj_mtable[eps - 1][3 * h - 1], sgrproj_mtable[eps - 1][3 * h - 1]);
-    calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride + j,
+    calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride + j,
               A, B);
  }
 }
@ -342,7 +344,7 @@ static void selfguided_restoration_2_h(int32_t *A, int32_t *B, int width,
    __m128i s = _mm_set_epi32(
        sgrproj_mtable[eps - 1][5 * h - 1], sgrproj_mtable[eps - 1][5 * h - 1],
        sgrproj_mtable[eps - 1][4 * h - 1], sgrproj_mtable[eps - 1][3 * h - 1]);
-    calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride, A,
+    calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride, A,
               B);

    // Re-align a1 and b1 so that they start at index i * buf_stride + 2
@ -372,8 +374,8 @@ static void selfguided_restoration_2_h(int32_t *A, int32_t *B, int width,
                                          _mm_alignr_epi8(a2, a1, 8))),
          _mm_add_epi32(_mm_alignr_epi8(a2, a1, 12), a2));

-      calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride + j,
-                 A, B);
+      calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth,
+                 i * buf_stride + j, A, B);
    }
    // If the width is not a multiple of 4, we need to reset j to width - 4
    // and adjust a1, a2, b1, b2 so that the loop invariant above is maintained
@ -428,7 +430,7 @@ static void selfguided_restoration_2_h(int32_t *A, int32_t *B, int width,
    s = _mm_set_epi32(
        sgrproj_mtable[eps - 1][3 * h - 1], sgrproj_mtable[eps - 1][4 * h - 1],
        sgrproj_mtable[eps - 1][5 * h - 1], sgrproj_mtable[eps - 1][5 * h - 1]);
-    calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride + j,
+    calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride + j,
               A, B);
  }
 }
@ -562,7 +564,7 @@ static void selfguided_restoration_3_h(int32_t *A, int32_t *B, int width,
    __m128i s = _mm_set_epi32(
        sgrproj_mtable[eps - 1][7 * h - 1], sgrproj_mtable[eps - 1][6 * h - 1],
        sgrproj_mtable[eps - 1][5 * h - 1], sgrproj_mtable[eps - 1][4 * h - 1]);
-    calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride, A,
+    calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride, A,
               B);

    // Re-align a1 and b1 so that they start at index i * buf_stride + 1
@ -599,8 +601,8 @@ static void selfguided_restoration_3_h(int32_t *A, int32_t *B, int width,
          _mm_add_epi32(_mm_add_epi32(a2, _mm_alignr_epi8(a3, a2, 4)),
                        _mm_alignr_epi8(a3, a2, 8)));

-      calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride + j,
-                 A, B);
+      calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth,
+                 i * buf_stride + j, A, B);
    }
    __m128i a3 = _mm_loadu_si128((__m128i *)&A[i * buf_stride + j + 1]);
    __m128i b3 = _mm_loadu_si128((__m128i *)&B[i * buf_stride + j + 1]);
@ -657,7 +659,7 @@ static void selfguided_restoration_3_h(int32_t *A, int32_t *B, int width,
    s = _mm_set_epi32(
        sgrproj_mtable[eps - 1][4 * h - 1], sgrproj_mtable[eps - 1][5 * h - 1],
        sgrproj_mtable[eps - 1][6 * h - 1], sgrproj_mtable[eps - 1][7 * h - 1]);
-    calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride + j,
+    calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride + j,
               A, B);
  }
 }