Merge "Fix visual studio 2013 compiler warnings"

2014-11-05 15:34:13 -08:00 · 2014-11-05 15:34:13 -08:00 · 7e491de1c4
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@ -34,7 +34,7 @@
 // bd of x uses trans_low with 8+x bits, need to remove 24-x bits
 #define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd))
 #else
-#define WRAPLOW(x, bd) (x)
+#define WRAPLOW(x, bd) ((int32_t)(x))
 #endif  // CONFIG_EMULATE_HARDWARE

 #if CONFIG_VP9_HIGHBITDEPTH
@ -367,14 +367,14 @@ static void iadst8(const tran_low_t *input, tran_low_t *output) {
  }

  // stage 1
-  s0 = cospi_2_64  * x0 + cospi_30_64 * x1;
-  s1 = cospi_30_64 * x0 - cospi_2_64  * x1;
-  s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
-  s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
-  s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
-  s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
-  s6 = cospi_26_64 * x6 + cospi_6_64  * x7;
-  s7 = cospi_6_64  * x6 - cospi_26_64 * x7;
+  s0 = (int)(cospi_2_64  * x0 + cospi_30_64 * x1);
+  s1 = (int)(cospi_30_64 * x0 - cospi_2_64  * x1);
+  s2 = (int)(cospi_10_64 * x2 + cospi_22_64 * x3);
+  s3 = (int)(cospi_22_64 * x2 - cospi_10_64 * x3);
+  s4 = (int)(cospi_18_64 * x4 + cospi_14_64 * x5);
+  s5 = (int)(cospi_14_64 * x4 - cospi_18_64 * x5);
+  s6 = (int)(cospi_26_64 * x6 + cospi_6_64  * x7);
+  s7 = (int)(cospi_6_64  * x6 - cospi_26_64 * x7);

  x0 = WRAPLOW(dct_const_round_shift(s0 + s4), 8);
  x1 = WRAPLOW(dct_const_round_shift(s1 + s5), 8);
@ -386,14 +386,14 @@ static void iadst8(const tran_low_t *input, tran_low_t *output) {
  x7 = WRAPLOW(dct_const_round_shift(s3 - s7), 8);

  // stage 2
-  s0 = x0;
-  s1 = x1;
-  s2 = x2;
-  s3 = x3;
-  s4 =  cospi_8_64  * x4 + cospi_24_64 * x5;
-  s5 =  cospi_24_64 * x4 - cospi_8_64  * x5;
-  s6 = -cospi_24_64 * x6 + cospi_8_64  * x7;
-  s7 =  cospi_8_64  * x6 + cospi_24_64 * x7;
+  s0 = (int)x0;
+  s1 = (int)x1;
+  s2 = (int)x2;
+  s3 = (int)x3;
+  s4 = (int)(cospi_8_64 * x4 + cospi_24_64 * x5);
+  s5 = (int)(cospi_24_64 * x4 - cospi_8_64 * x5);
+  s6 = (int)(-cospi_24_64 * x6 + cospi_8_64 * x7);
+  s7 = (int)(cospi_8_64 * x6 + cospi_24_64 * x7);

  x0 = WRAPLOW(s0 + s2, 8);
  x1 = WRAPLOW(s1 + s3, 8);
@ -405,10 +405,10 @@ static void iadst8(const tran_low_t *input, tran_low_t *output) {
  x7 = WRAPLOW(dct_const_round_shift(s5 - s7), 8);

  // stage 3
-  s2 = cospi_16_64 * (x2 + x3);
-  s3 = cospi_16_64 * (x2 - x3);
-  s6 = cospi_16_64 * (x6 + x7);
-  s7 = cospi_16_64 * (x6 - x7);
+  s2 = (int)(cospi_16_64 * (x2 + x3));
+  s3 = (int)(cospi_16_64 * (x2 - x3));
+  s6 = (int)(cospi_16_64 * (x6 + x7));
+  s7 = (int)(cospi_16_64 * (x6 - x7));

  x2 = WRAPLOW(dct_const_round_shift(s2), 8);
  x3 = WRAPLOW(dct_const_round_shift(s3), 8);
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@ -29,10 +29,12 @@ extern "C" {
 #define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT)

 #define pair_set_epi16(a, b) \
-  _mm_set_epi16(b, a, b, a, b, a, b, a)
+  _mm_set_epi16((int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \
+                (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a))

 #define dual_set_epi16(a, b) \
-  _mm_set_epi16(b, b, b, b, a, a, a, a)
+  _mm_set_epi16((int16_t)(b), (int16_t)(b), (int16_t)(b), (int16_t)(b), \
+                (int16_t)(a), (int16_t)(a), (int16_t)(a), (int16_t)(a))

 // Constants:
 //  for (int i = 1; i< 32; ++i)
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@ -216,7 +216,7 @@ static void iadst4_sse2(__m128i *in) {
  const __m128i k__sinpi_p03_p02 = pair_set_epi16(sinpi_3_9, sinpi_2_9);
  const __m128i k__sinpi_p02_m01 = pair_set_epi16(sinpi_2_9, -sinpi_1_9);
  const __m128i k__sinpi_p03_m04 = pair_set_epi16(sinpi_3_9, -sinpi_4_9);
-  const __m128i k__sinpi_p03_p03 = _mm_set1_epi16(sinpi_3_9);
+  const __m128i k__sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi_3_9);
  const __m128i kZero = _mm_set1_epi16(0);
  const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
  __m128i u[8], v[8], in7;
@ -641,7 +641,7 @@ static void iadst8_sse2(__m128i *in) {
  const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
  const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64);
  const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
-  const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
  const __m128i k__const_0 = _mm_set1_epi16(0);
  const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);

@ -1530,8 +1530,8 @@ static void iadst16_8col(__m128i *in) {
  const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
  const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
  const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64);
-  const __m128i k__cospi_m16_m16 = _mm_set1_epi16(-cospi_16_64);
-  const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+  const __m128i k__cospi_m16_m16 = _mm_set1_epi16((int16_t)-cospi_16_64);
+  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
  const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
  const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64);
  const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
@ -1985,7 +1985,7 @@ static void idct16_8col(__m128i *in) {
  const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64);
  const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64);
  const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64);
-  const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
  const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
  const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
  const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
--- a/vp9/common/x86/vp9_idct_intrin_ssse3.c
+++ b/vp9/common/x86/vp9_idct_intrin_ssse3.c
@ -36,7 +36,7 @@ static void idct16_8col(__m128i *in, int round) {
  const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
  const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
  const __m128i k__cospi_p16_p16_x2 = pair_set_epi16(23170, 23170);
-  const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
  const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64);

  __m128i v[16], u[16], s[16], t[16];
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@ -37,12 +37,12 @@ static void fdct4(const tran_low_t *input, tran_low_t *output) {

  temp1 = (step[0] + step[1]) * cospi_16_64;
  temp2 = (step[0] - step[1]) * cospi_16_64;
-  output[0] = fdct_round_shift(temp1);
-  output[2] = fdct_round_shift(temp2);
+  output[0] = (tran_low_t)fdct_round_shift(temp1);
+  output[2] = (tran_low_t)fdct_round_shift(temp2);
  temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
  temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
-  output[1] = fdct_round_shift(temp1);
-  output[3] = fdct_round_shift(temp2);
+  output[1] = (tran_low_t)fdct_round_shift(temp1);
+  output[3] = (tran_low_t)fdct_round_shift(temp2);
 }

 void vp9_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride) {
@ -98,12 +98,12 @@ void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
      step[3] = input[0] - input[3];
      temp1 = (step[0] + step[1]) * cospi_16_64;
      temp2 = (step[0] - step[1]) * cospi_16_64;
-      out[0] = fdct_round_shift(temp1);
-      out[2] = fdct_round_shift(temp2);
+      out[0] = (tran_low_t)fdct_round_shift(temp1);
+      out[2] = (tran_low_t)fdct_round_shift(temp2);
      temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
      temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
-      out[1] = fdct_round_shift(temp1);
-      out[3] = fdct_round_shift(temp2);
+      out[1] = (tran_low_t)fdct_round_shift(temp1);
+      out[3] = (tran_low_t)fdct_round_shift(temp2);
      // Do next column (which is a transposed row in second/horizontal pass)
      in_pass0++;
      in++;
@ -157,10 +157,10 @@ static void fadst4(const tran_low_t *input, tran_low_t *output) {
  s3 = x2 - x0 + x3;

  // 1-D transform scaling factor is sqrt(2).
-  output[0] = fdct_round_shift(s0);
-  output[1] = fdct_round_shift(s1);
-  output[2] = fdct_round_shift(s2);
-  output[3] = fdct_round_shift(s3);
+  output[0] = (tran_low_t)fdct_round_shift(s0);
+  output[1] = (tran_low_t)fdct_round_shift(s1);
+  output[2] = (tran_low_t)fdct_round_shift(s2);
+  output[3] = (tran_low_t)fdct_round_shift(s3);
 }

 static const transform_2d FHT_4[] = {
@ -227,16 +227,16 @@ static void fdct8(const tran_low_t *input, tran_low_t *output) {
  t1 = (x0 - x1) * cospi_16_64;
  t2 =  x2 * cospi_24_64 + x3 *  cospi_8_64;
  t3 = -x2 * cospi_8_64  + x3 * cospi_24_64;
-  output[0] = fdct_round_shift(t0);
-  output[2] = fdct_round_shift(t2);
-  output[4] = fdct_round_shift(t1);
-  output[6] = fdct_round_shift(t3);
+  output[0] = (tran_low_t)fdct_round_shift(t0);
+  output[2] = (tran_low_t)fdct_round_shift(t2);
+  output[4] = (tran_low_t)fdct_round_shift(t1);
+  output[6] = (tran_low_t)fdct_round_shift(t3);

  // Stage 2
  t0 = (s6 - s5) * cospi_16_64;
  t1 = (s6 + s5) * cospi_16_64;
-  t2 = fdct_round_shift(t0);
-  t3 = fdct_round_shift(t1);
+  t2 = (tran_low_t)fdct_round_shift(t0);
+  t3 = (tran_low_t)fdct_round_shift(t1);

  // Stage 3
  x0 = s4 + t2;
@ -249,10 +249,10 @@ static void fdct8(const tran_low_t *input, tran_low_t *output) {
  t1 = x1 * cospi_12_64 + x2 *  cospi_20_64;
  t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
  t3 = x3 * cospi_28_64 + x0 *  -cospi_4_64;
-  output[1] = fdct_round_shift(t0);
-  output[3] = fdct_round_shift(t2);
-  output[5] = fdct_round_shift(t1);
-  output[7] = fdct_round_shift(t3);
+  output[1] = (tran_low_t)fdct_round_shift(t0);
+  output[3] = (tran_low_t)fdct_round_shift(t2);
+  output[5] = (tran_low_t)fdct_round_shift(t1);
+  output[7] = (tran_low_t)fdct_round_shift(t3);
 }

 void vp9_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) {
@ -298,10 +298,10 @@ void vp9_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) {
      t1 = (x0 - x1) * cospi_16_64;
      t2 =  x2 * cospi_24_64 + x3 *  cospi_8_64;
      t3 = -x2 * cospi_8_64  + x3 * cospi_24_64;
-      output[0 * 8] = fdct_round_shift(t0);
-      output[2 * 8] = fdct_round_shift(t2);
-      output[4 * 8] = fdct_round_shift(t1);
-      output[6 * 8] = fdct_round_shift(t3);
+      output[0 * 8] = (tran_low_t)fdct_round_shift(t0);
+      output[2 * 8] = (tran_low_t)fdct_round_shift(t2);
+      output[4 * 8] = (tran_low_t)fdct_round_shift(t1);
+      output[6 * 8] = (tran_low_t)fdct_round_shift(t3);

      // Stage 2
      t0 = (s6 - s5) * cospi_16_64;
@ -320,10 +320,10 @@ void vp9_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) {
      t1 = x1 * cospi_12_64 + x2 *  cospi_20_64;
      t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
      t3 = x3 * cospi_28_64 + x0 *  -cospi_4_64;
-      output[1 * 8] = fdct_round_shift(t0);
-      output[3 * 8] = fdct_round_shift(t2);
-      output[5 * 8] = fdct_round_shift(t1);
-      output[7 * 8] = fdct_round_shift(t3);
+      output[1 * 8] = (tran_low_t)fdct_round_shift(t0);
+      output[3 * 8] = (tran_low_t)fdct_round_shift(t2);
+      output[5 * 8] = (tran_low_t)fdct_round_shift(t1);
+      output[7 * 8] = (tran_low_t)fdct_round_shift(t3);
      input++;
      output++;
    }
@ -434,10 +434,10 @@ void vp9_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
        t1 = (x0 - x1) * cospi_16_64;
        t2 = x3 * cospi_8_64  + x2 * cospi_24_64;
        t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
-        out[0] = fdct_round_shift(t0);
-        out[4] = fdct_round_shift(t2);
-        out[8] = fdct_round_shift(t1);
-        out[12] = fdct_round_shift(t3);
+        out[0] = (tran_low_t)fdct_round_shift(t0);
+        out[4] = (tran_low_t)fdct_round_shift(t2);
+        out[8] = (tran_low_t)fdct_round_shift(t1);
+        out[12] = (tran_low_t)fdct_round_shift(t3);

        // Stage 2
        t0 = (s6 - s5) * cospi_16_64;
@ -456,10 +456,10 @@ void vp9_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
        t1 = x1 * cospi_12_64 + x2 *  cospi_20_64;
        t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
        t3 = x3 * cospi_28_64 + x0 *  -cospi_4_64;
-        out[2] = fdct_round_shift(t0);
-        out[6] = fdct_round_shift(t2);
-        out[10] = fdct_round_shift(t1);
-        out[14] = fdct_round_shift(t3);
+        out[2] = (tran_low_t)fdct_round_shift(t0);
+        out[6] = (tran_low_t)fdct_round_shift(t2);
+        out[10] = (tran_low_t)fdct_round_shift(t1);
+        out[14] = (tran_low_t)fdct_round_shift(t3);
      }
      // Work on the next eight values; step1 -> odd_results
      {
@ -502,20 +502,20 @@ void vp9_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
        // step 6
        temp1 = step1[0] * cospi_30_64 + step1[7] *  cospi_2_64;
        temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
-        out[1] = fdct_round_shift(temp1);
-        out[9] = fdct_round_shift(temp2);
+        out[1] = (tran_low_t)fdct_round_shift(temp1);
+        out[9] = (tran_low_t)fdct_round_shift(temp2);
        temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
        temp2 = step1[3] *  cospi_6_64 + step1[4] * cospi_26_64;
-        out[5] = fdct_round_shift(temp1);
-        out[13] = fdct_round_shift(temp2);
+        out[5] = (tran_low_t)fdct_round_shift(temp1);
+        out[13] = (tran_low_t)fdct_round_shift(temp2);
        temp1 = step1[3] * -cospi_26_64 + step1[4] *  cospi_6_64;
        temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
-        out[3] = fdct_round_shift(temp1);
-        out[11] = fdct_round_shift(temp2);
+        out[3] = (tran_low_t)fdct_round_shift(temp1);
+        out[11] = (tran_low_t)fdct_round_shift(temp2);
        temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
        temp2 = step1[0] *  -cospi_2_64 + step1[7] * cospi_30_64;
-        out[7] = fdct_round_shift(temp1);
-        out[15] = fdct_round_shift(temp2);
+        out[7] = (tran_low_t)fdct_round_shift(temp1);
+        out[15] = (tran_low_t)fdct_round_shift(temp2);
      }
      // Do next column (which is a transposed row in second/horizontal pass)
      in++;
@ -589,14 +589,14 @@ static void fadst8(const tran_low_t *input, tran_low_t *output) {
  x6 = fdct_round_shift(s6);
  x7 = fdct_round_shift(s7);

-  output[0] =   x0;
-  output[1] = - x4;
-  output[2] =   x6;
-  output[3] = - x2;
-  output[4] =   x3;
-  output[5] = - x7;
-  output[6] =   x5;
-  output[7] = - x1;
+  output[0] = (tran_low_t)x0;
+  output[1] = (tran_low_t)-x4;
+  output[2] = (tran_low_t)x6;
+  output[3] = (tran_low_t)-x2;
+  output[4] = (tran_low_t)x3;
+  output[5] = (tran_low_t)-x7;
+  output[6] = (tran_low_t)x5;
+  output[7] = (tran_low_t)-x1;
 }

 static const transform_2d FHT_8[] = {
@ -659,10 +659,10 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
    c1 = e1 - c1;
    a1 -= c1;
    d1 += b1;
-    op[0] = a1;
-    op[4] = c1;
-    op[8] = d1;
-    op[12] = b1;
+    op[0] = (tran_low_t)a1;
+    op[4] = (tran_low_t)c1;
+    op[8] = (tran_low_t)d1;
+    op[12] = (tran_low_t)b1;

    ip_pass0++;
    op++;
@ -683,10 +683,10 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
    c1 = e1 - c1;
    a1 -= c1;
    d1 += b1;
-    op[0] = a1 * UNIT_QUANT_FACTOR;
-    op[1] = c1 * UNIT_QUANT_FACTOR;
-    op[2] = d1 * UNIT_QUANT_FACTOR;
-    op[3] = b1 * UNIT_QUANT_FACTOR;
+    op[0] = (tran_low_t)(a1 * UNIT_QUANT_FACTOR);
+    op[1] = (tran_low_t)(c1 * UNIT_QUANT_FACTOR);
+    op[2] = (tran_low_t)(d1 * UNIT_QUANT_FACTOR);
+    op[3] = (tran_low_t)(b1 * UNIT_QUANT_FACTOR);

    ip += 4;
    op += 4;
@ -745,10 +745,10 @@ static void fdct16(const tran_low_t in[16], tran_low_t out[16]) {
    t1 = (x0 - x1) * cospi_16_64;
    t2 = x3 * cospi_8_64  + x2 * cospi_24_64;
    t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
-    out[0] = fdct_round_shift(t0);
-    out[4] = fdct_round_shift(t2);
-    out[8] = fdct_round_shift(t1);
-    out[12] = fdct_round_shift(t3);
+    out[0] = (tran_low_t)fdct_round_shift(t0);
+    out[4] = (tran_low_t)fdct_round_shift(t2);
+    out[8] = (tran_low_t)fdct_round_shift(t1);
+    out[12] = (tran_low_t)fdct_round_shift(t3);

    // Stage 2
    t0 = (s6 - s5) * cospi_16_64;
@ -767,10 +767,10 @@ static void fdct16(const tran_low_t in[16], tran_low_t out[16]) {
    t1 = x1 * cospi_12_64 + x2 *  cospi_20_64;
    t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
    t3 = x3 * cospi_28_64 + x0 *  -cospi_4_64;
-    out[2] = fdct_round_shift(t0);
-    out[6] = fdct_round_shift(t2);
-    out[10] = fdct_round_shift(t1);
-    out[14] = fdct_round_shift(t3);
+    out[2] = (tran_low_t)fdct_round_shift(t0);
+    out[6] = (tran_low_t)fdct_round_shift(t2);
+    out[10] = (tran_low_t)fdct_round_shift(t1);
+    out[14] = (tran_low_t)fdct_round_shift(t3);
  }

  // step 2
@ -816,23 +816,23 @@ static void fdct16(const tran_low_t in[16], tran_low_t out[16]) {
  // step 6
  temp1 = step1[0] * cospi_30_64 + step1[7] *  cospi_2_64;
  temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
-  out[1] = fdct_round_shift(temp1);
-  out[9] = fdct_round_shift(temp2);
+  out[1] = (tran_low_t)fdct_round_shift(temp1);
+  out[9] = (tran_low_t)fdct_round_shift(temp2);

  temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
  temp2 = step1[3] *  cospi_6_64 + step1[4] * cospi_26_64;
-  out[5] = fdct_round_shift(temp1);
-  out[13] = fdct_round_shift(temp2);
+  out[5] = (tran_low_t)fdct_round_shift(temp1);
+  out[13] = (tran_low_t)fdct_round_shift(temp2);

  temp1 = step1[3] * -cospi_26_64 + step1[4] *  cospi_6_64;
  temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
-  out[3] = fdct_round_shift(temp1);
-  out[11] = fdct_round_shift(temp2);
+  out[3] = (tran_low_t)fdct_round_shift(temp1);
+  out[11] = (tran_low_t)fdct_round_shift(temp2);

  temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
  temp2 = step1[0] *  -cospi_2_64 + step1[7] * cospi_30_64;
-  out[7] = fdct_round_shift(temp1);
-  out[15] = fdct_round_shift(temp2);
+  out[7] = (tran_low_t)fdct_round_shift(temp1);
+  out[15] = (tran_low_t)fdct_round_shift(temp2);
 }

 static void fadst16(const tran_low_t *input, tran_low_t *output) {
@ -980,22 +980,22 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
  x14 = fdct_round_shift(s14);
  x15 = fdct_round_shift(s15);

-  output[0] = x0;
-  output[1] = - x8;
-  output[2] = x12;
-  output[3] = - x4;
-  output[4] = x6;
-  output[5] = x14;
-  output[6] = x10;
-  output[7] = x2;
-  output[8] = x3;
-  output[9] =  x11;
-  output[10] = x15;
-  output[11] = x7;
-  output[12] = x5;
-  output[13] = - x13;
-  output[14] = x9;
-  output[15] = - x1;
+  output[0] = (tran_low_t)x0;
+  output[1] = (tran_low_t)-x8;
+  output[2] = (tran_low_t)x12;
+  output[3] = (tran_low_t)-x4;
+  output[4] = (tran_low_t)x6;
+  output[5] = (tran_low_t)x14;
+  output[6] = (tran_low_t)x10;
+  output[7] = (tran_low_t)x2;
+  output[8] = (tran_low_t)x3;
+  output[9] = (tran_low_t)x11;
+  output[10] = (tran_low_t)x15;
+  output[11] = (tran_low_t)x7;
+  output[12] = (tran_low_t)x5;
+  output[13] = (tran_low_t)-x13;
+  output[14] = (tran_low_t)x9;
+  output[15] = (tran_low_t)-x1;
 }

 static const transform_2d FHT_16[] = {
@ -1404,7 +1404,8 @@ void vp9_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) {
      temp_in[j] = output[j + i * 32];
    fdct32(temp_in, temp_out, 0);
    for (j = 0; j < 32; ++j)
-      out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
+      out[j + i * 32] =
+          (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
  }
 }

@ -1435,7 +1436,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) {
      temp_in[j] = output[j + i * 32];
    fdct32(temp_in, temp_out, 1);
    for (j = 0; j < 32; ++j)
-      out[j + i * 32] = temp_out[j];
+      out[j + i * 32] = (tran_low_t)temp_out[j];
  }
 }

--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@ -56,7 +56,7 @@ void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
    const int64_t tmp =
        (clamp(abs_coeff + round_ptr[rc != 0], INT32_MIN, INT32_MAX) *
         quant) >> 16;
-    qcoeff_ptr[rc]  = (tmp ^ coeff_sign) - coeff_sign;
+    qcoeff_ptr[rc] = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign);
    dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr;
    if (tmp)
      eob = 0;
@ -107,7 +107,7 @@ void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
    const int64_t tmp =
        (clamp(abs_coeff + round_ptr[rc != 0], INT32_MIN, INT32_MAX) *
         quant) >> 15;
-    qcoeff_ptr[rc]  = (tmp ^ coeff_sign) - coeff_sign;
+    qcoeff_ptr[rc] = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign);
    dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2;
    if (tmp)
      eob = 0;
@ -197,7 +197,7 @@ void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr,
          (clamp(abs_coeff + round_ptr[rc != 0], INT32_MIN, INT32_MAX) *
           quant_ptr[rc != 0]) >> 16;

-      qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+      qcoeff_ptr[rc] = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign);
      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];

      if (tmp)
@ -284,7 +284,7 @@ void vp9_highbd_quantize_fp_32x32_c(const tran_low_t *coeff_ptr,
        tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
                    INT32_MIN, INT32_MAX);
        tmp = (tmp * quant_ptr[rc != 0]) >> 15;
-        qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+        qcoeff_ptr[rc] = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign);
        dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
      }

@ -393,7 +393,7 @@ void vp9_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                            INT32_MIN, INT32_MAX);
        tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
                  quant_shift_ptr[rc != 0]) >> 16;  // quantization
-        qcoeff_ptr[rc]  = (tmp ^ coeff_sign) - coeff_sign;
+        qcoeff_ptr[rc]  = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign);
        dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];

        if (tmp)
@ -510,7 +510,7 @@ void vp9_highbd_quantize_b_32x32_c(const tran_low_t *coeff_ptr,
      tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
               quant_shift_ptr[rc != 0]) >> 15;

-      qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+      qcoeff_ptr[rc] = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign);
      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;

      if (tmp)
--- a/vp9/encoder/vp9_variance.c
+++ b/vp9/encoder/vp9_variance.c
@ -298,8 +298,8 @@ void highbd_variance(const uint8_t *a8, int  a_stride,
  uint64_t sse_long = 0;
  uint64_t sum_long = 0;
  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
-  *sse = sse_long;
-  *sum = sum_long;
+  *sse = (unsigned int)sse_long;
+  *sum = (int)sum_long;
 }

 void highbd_10_variance(const uint8_t *a8, int  a_stride,
@ -309,8 +309,8 @@ void highbd_10_variance(const uint8_t *a8, int  a_stride,
  uint64_t sse_long = 0;
  uint64_t sum_long = 0;
  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
-  *sum = ROUND_POWER_OF_TWO(sum_long, 2);
-  *sse = ROUND_POWER_OF_TWO(sse_long, 4);
+  *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
+  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 4);
 }

 void highbd_12_variance(const uint8_t *a8, int  a_stride,
@ -320,8 +320,8 @@ void highbd_12_variance(const uint8_t *a8, int  a_stride,
  uint64_t sse_long = 0;
  uint64_t sum_long = 0;
  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
-  *sum = ROUND_POWER_OF_TWO(sum_long, 4);
-  *sse = ROUND_POWER_OF_TWO(sse_long, 8);
+  *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
+  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 8);
 }

 static void highbd_var_filter_block2d_bil_first_pass(
--- a/vp9/encoder/x86/vp9_dct32x32_avx2.c
+++ b/vp9/encoder/x86/vp9_dct32x32_avx2.c
@ -13,13 +13,14 @@
 #include "vpx_ports/mem.h"

 #define pair256_set_epi16(a, b) \
-  _mm256_set_epi16(b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a)
+  _mm256_set_epi16((int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \
+                   (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \
+                   (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \
+                   (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a))

 #define pair256_set_epi32(a, b) \
-  _mm256_set_epi32(b, a, b, a, b, a, b, a)
-
-
-
+  _mm256_set_epi32((int)(b), (int)(a), (int)(b), (int)(a), \
+                   (int)(b), (int)(a), (int)(b), (int)(a))

 #if FDCT32x32_HIGH_PRECISION
 static INLINE __m256i k_madd_epi32_avx2(__m256i a, __m256i b) {
@ -50,7 +51,7 @@ void FDCT32x32_2D_AVX2(const int16_t *input,
  //    When we use them, in one case, they are all the same. In all others
  //    it's a pair of them that we need to repeat four times. This is done
  //    by constructing the 32 bit constant corresponding to that pair.
-  const __m256i k__cospi_p16_p16 = _mm256_set1_epi16(+cospi_16_64);
+  const __m256i k__cospi_p16_p16 = _mm256_set1_epi16((int16_t)cospi_16_64);
  const __m256i k__cospi_p16_m16 = pair256_set_epi16(+cospi_16_64, -cospi_16_64);
  const __m256i k__cospi_m08_p24 = pair256_set_epi16(-cospi_8_64,   cospi_24_64);
  const __m256i k__cospi_m24_m08 = pair256_set_epi16(-cospi_24_64, -cospi_8_64);
--- a/vp9/encoder/x86/vp9_dct32x32_sse2.c
+++ b/vp9/encoder/x86/vp9_dct32x32_sse2.c
@ -13,7 +13,7 @@
 #include "vpx_ports/mem.h"

 #define pair_set_epi32(a, b) \
-  _mm_set_epi32(b, a, b, a)
+  _mm_set_epi32((int)(b), (int)(a), (int)(b), (int)(a))

 #if FDCT32x32_HIGH_PRECISION
 static INLINE __m128i k_madd_epi32(__m128i a, __m128i b) {
@ -44,7 +44,7 @@ void FDCT32x32_2D(const int16_t *input,
  //    When we use them, in one case, they are all the same. In all others
  //    it's a pair of them that we need to repeat four times. This is done
  //    by constructing the 32 bit constant corresponding to that pair.
-  const __m128i k__cospi_p16_p16 = _mm_set1_epi16(+cospi_16_64);
+  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
  const __m128i k__cospi_p16_m16 = pair_set_epi16(+cospi_16_64, -cospi_16_64);
  const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64,   cospi_24_64);
  const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@ -58,38 +58,38 @@ void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) {
  // These are the coefficients used for the multiplies.
  // In the comments, pN means cos(N pi /64) and mN is -cos(N pi /64),
  // where cospi_N_64 = cos(N pi /64)
-  const __m128i k__cospi_A = _mm_setr_epi16(cospi_16_64, cospi_16_64,
-                                            cospi_16_64, cospi_16_64,
-                                            cospi_16_64, -cospi_16_64,
-                                            cospi_16_64, -cospi_16_64);
-  const __m128i k__cospi_B = _mm_setr_epi16(cospi_16_64, -cospi_16_64,
-                                            cospi_16_64, -cospi_16_64,
-                                            cospi_16_64, cospi_16_64,
-                                            cospi_16_64, cospi_16_64);
-  const __m128i k__cospi_C = _mm_setr_epi16(cospi_8_64, cospi_24_64,
-                                            cospi_8_64, cospi_24_64,
-                                            cospi_24_64, -cospi_8_64,
-                                            cospi_24_64, -cospi_8_64);
-  const __m128i k__cospi_D = _mm_setr_epi16(cospi_24_64, -cospi_8_64,
-                                            cospi_24_64, -cospi_8_64,
-                                            cospi_8_64, cospi_24_64,
-                                            cospi_8_64, cospi_24_64);
-  const __m128i k__cospi_E = _mm_setr_epi16(cospi_16_64, cospi_16_64,
-                                            cospi_16_64, cospi_16_64,
-                                            cospi_16_64, cospi_16_64,
-                                            cospi_16_64, cospi_16_64);
-  const __m128i k__cospi_F = _mm_setr_epi16(cospi_16_64, -cospi_16_64,
-                                            cospi_16_64, -cospi_16_64,
-                                            cospi_16_64, -cospi_16_64,
-                                            cospi_16_64, -cospi_16_64);
-  const __m128i k__cospi_G = _mm_setr_epi16(cospi_8_64, cospi_24_64,
-                                            cospi_8_64, cospi_24_64,
-                                            -cospi_8_64, -cospi_24_64,
-                                            -cospi_8_64, -cospi_24_64);
-  const __m128i k__cospi_H = _mm_setr_epi16(cospi_24_64, -cospi_8_64,
-                                            cospi_24_64, -cospi_8_64,
-                                            -cospi_24_64, cospi_8_64,
-                                            -cospi_24_64, cospi_8_64);
+  const __m128i k__cospi_A = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64,
+                                            (int16_t)cospi_16_64, (int16_t)cospi_16_64,
+                                            (int16_t)cospi_16_64, (int16_t)-cospi_16_64,
+                                            (int16_t)cospi_16_64, (int16_t)-cospi_16_64);
+  const __m128i k__cospi_B = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)-cospi_16_64,
+                                            (int16_t)cospi_16_64, (int16_t)-cospi_16_64,
+                                            (int16_t)cospi_16_64, (int16_t)cospi_16_64,
+                                            (int16_t)cospi_16_64, (int16_t)cospi_16_64);
+  const __m128i k__cospi_C = _mm_setr_epi16((int16_t)cospi_8_64, (int16_t)cospi_24_64,
+                                            (int16_t)cospi_8_64, (int16_t)cospi_24_64,
+                                            (int16_t)cospi_24_64, (int16_t)-cospi_8_64,
+                                            (int16_t)cospi_24_64, (int16_t)-cospi_8_64);
+  const __m128i k__cospi_D = _mm_setr_epi16((int16_t)cospi_24_64, (int16_t)-cospi_8_64,
+                                            (int16_t)cospi_24_64, (int16_t)-cospi_8_64,
+                                            (int16_t)cospi_8_64, (int16_t)cospi_24_64,
+                                            (int16_t)cospi_8_64, (int16_t)cospi_24_64);
+  const __m128i k__cospi_E = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64,
+                                            (int16_t)cospi_16_64, (int16_t)cospi_16_64,
+                                            (int16_t)cospi_16_64, (int16_t)cospi_16_64,
+                                            (int16_t)cospi_16_64, (int16_t)cospi_16_64);
+  const __m128i k__cospi_F = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)-cospi_16_64,
+                                            (int16_t)cospi_16_64, (int16_t)-cospi_16_64,
+                                            (int16_t)cospi_16_64, (int16_t)-cospi_16_64,
+                                            (int16_t)cospi_16_64, (int16_t)-cospi_16_64);
+  const __m128i k__cospi_G = _mm_setr_epi16((int16_t)cospi_8_64, (int16_t)cospi_24_64,
+                                            (int16_t)cospi_8_64, (int16_t)cospi_24_64,
+                                            (int16_t)-cospi_8_64, (int16_t)-cospi_24_64,
+                                            (int16_t)-cospi_8_64, (int16_t)-cospi_24_64);
+  const __m128i k__cospi_H = _mm_setr_epi16((int16_t)cospi_24_64, (int16_t)-cospi_8_64,
+                                            (int16_t)cospi_24_64, (int16_t)-cospi_8_64,
+                                            (int16_t)-cospi_24_64, (int16_t)cospi_8_64,
+                                            (int16_t)-cospi_24_64, (int16_t)cospi_8_64);

  const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
  // This second rounding constant saves doing some extra adds at the end
@ -296,7 +296,7 @@ static INLINE void transpose_4x4(__m128i *res) {
 }

 void fdct4_sse2(__m128i *in) {
-  const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
  const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
  const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
  const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
@ -333,7 +333,7 @@ void fadst4_sse2(__m128i *in) {
  const __m128i k__sinpi_p04_m01 = pair_set_epi16(sinpi_4_9, -sinpi_1_9);
  const __m128i k__sinpi_p03_p04 = pair_set_epi16(sinpi_3_9, sinpi_4_9);
  const __m128i k__sinpi_m03_p02 = pair_set_epi16(-sinpi_3_9, sinpi_2_9);
-  const __m128i k__sinpi_p03_p03 = _mm_set1_epi16(sinpi_3_9);
+  const __m128i k__sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi_3_9);
  const __m128i kZero = _mm_set1_epi16(0);
  const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
  __m128i u[8], v[8];
@ -454,7 +454,7 @@ void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {
  //    When we use them, in one case, they are all the same. In all others
  //    it's a pair of them that we need to repeat four times. This is done
  //    by constructing the 32 bit constant corresponding to that pair.
-  const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
  const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
  const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
  const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
@ -784,7 +784,7 @@ static INLINE void write_buffer_8x8(int16_t *output, __m128i *res, int stride) {

 void fdct8_sse2(__m128i *in) {
  // constants
-  const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
  const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
  const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
  const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
@ -936,7 +936,7 @@ void fadst8_sse2(__m128i *in) {
  const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
  const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64);
  const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
-  const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
  const __m128i k__const_0 = _mm_set1_epi16(0);
  const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);

@ -1271,7 +1271,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
  //    When we use them, in one case, they are all the same. In all others
  //    it's a pair of them that we need to repeat four times. This is done
  //    by constructing the 32 bit constant corresponding to that pair.
-  const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
  const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
  const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
  const __m128i k__cospi_p08_m24 = pair_set_epi16(cospi_8_64, -cospi_24_64);
@ -1914,7 +1914,7 @@ static INLINE void right_shift_16x16(__m128i *res0, __m128i *res1) {
 void fdct16_8col(__m128i *in) {
  // perform 16x16 1-D DCT for 8 columns
  __m128i i[8], s[8], p[8], t[8], u[16], v[16];
-  const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
  const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
  const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64);
  const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
@ -2261,8 +2261,8 @@ void fadst16_8col(__m128i *in) {
  const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
  const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
  const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64);
-  const __m128i k__cospi_m16_m16 = _mm_set1_epi16(-cospi_16_64);
-  const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+  const __m128i k__cospi_m16_m16 = _mm_set1_epi16((int16_t)-cospi_16_64);
+  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
  const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
  const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64);
  const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);