Cleanup dead fwd transform functions

Cleanup related wrappers and unit-tests. Change-Id: I2d37a8c80de63dbeaef584e3d5fa842c0b2ee6db
2017-06-08 11:20:33 +02:00 · 2017-06-08 11:20:33 +02:00 · d405f8a627
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@ -341,24 +341,15 @@ if ((aom_config("CONFIG_AV1_ENCODER") eq "yes") || (aom_config("CONFIG_PVQ") eq
    add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct8x8 sse2/, "$ssse3_x86_64";
    add_proto qw/void aom_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct8x8_1 sse2/;
    add_proto qw/void aom_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct16x16 sse2/;
    add_proto qw/void aom_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct16x16_1 sse2 avx2/;
    add_proto qw/void aom_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32 sse2 avx2/;
    add_proto qw/void aom_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32_rd sse2 avx2/;
    add_proto qw/void aom_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32_1 sse2 avx2/;
    # High bit depth
    add_proto qw/void aom_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct4x4 sse2/;
@ -366,20 +357,15 @@ if ((aom_config("CONFIG_AV1_ENCODER") eq "yes") || (aom_config("CONFIG_PVQ") eq
    add_proto qw/void aom_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct8x8 sse2/;
    add_proto qw/void aom_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
    add_proto qw/void aom_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct16x16 sse2/;
    add_proto qw/void aom_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
    add_proto qw/void aom_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct32x32 sse2/;
    add_proto qw/void aom_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct32x32_rd sse2/;
    add_proto qw/void aom_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
  } else {
    add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct4x4 sse2 msa/;
@ -390,23 +376,14 @@ if ((aom_config("CONFIG_AV1_ENCODER") eq "yes") || (aom_config("CONFIG_PVQ") eq
    add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct8x8 sse2 neon msa/, "$ssse3_x86_64";
    add_proto qw/void aom_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct8x8_1 sse2 neon msa/;
    add_proto qw/void aom_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct16x16 sse2 msa/;
    add_proto qw/void aom_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct16x16_1 sse2 avx2 msa/;
    add_proto qw/void aom_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32 sse2 avx2 msa/;
    add_proto qw/void aom_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32_rd sse2 avx2 msa/;
    add_proto qw/void aom_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32_1 sse2 avx2 msa/;
  }  # CONFIG_HIGHBITDEPTH
 }  # CONFIG_AV1_ENCODER
--- a/aom_dsp/fwd_txfm.c
+++ b/aom_dsp/fwd_txfm.c
@ -172,15 +172,6 @@ void aom_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) {
  }
 }
 void aom_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) {
  int r, c;
  tran_low_t sum = 0;
  for (r = 0; r < 8; ++r)
    for (c = 0; c < 8; ++c) sum += input[r * stride + c];
  output[0] = sum;
 }
 void aom_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
  // The 2D transform is done with two passes which are actually pretty
  // similar. In the first one, we transform the columns and transpose
@ -361,15 +352,6 @@ void aom_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
  }
 }
 void aom_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride) {
  int r, c;
  int sum = 0;
  for (r = 0; r < 16; ++r)
    for (c = 0; c < 16; ++c) sum += input[r * stride + c];
  output[0] = (tran_low_t)(sum >> 1);
 }
 static INLINE tran_high_t dct_32_round(tran_high_t input) {
  tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
  // TODO(debargha, peter.derivaz): Find new bounds for this assert,
@ -758,15 +740,6 @@ void aom_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) {
  }
 }
 void aom_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride) {
  int r, c;
  int sum = 0;
  for (r = 0; r < 32; ++r)
    for (c = 0; c < 32; ++c) sum += input[r * stride + c];
  output[0] = (tran_low_t)(sum >> 3);
 }
 #if CONFIG_HIGHBITDEPTH
 void aom_highbd_fdct4x4_c(const int16_t *input, tran_low_t *output,
                          int stride) {
@ -778,32 +751,17 @@ void aom_highbd_fdct8x8_c(const int16_t *input, tran_low_t *final_output,
  aom_fdct8x8_c(input, final_output, stride);
 }
 void aom_highbd_fdct8x8_1_c(const int16_t *input, tran_low_t *final_output,
                            int stride) {
  aom_fdct8x8_1_c(input, final_output, stride);
 }
 void aom_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output,
                            int stride) {
  aom_fdct16x16_c(input, output, stride);
 }
 void aom_highbd_fdct16x16_1_c(const int16_t *input, tran_low_t *output,
                              int stride) {
  aom_fdct16x16_1_c(input, output, stride);
 }
 void aom_highbd_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) {
  aom_fdct32x32_c(input, out, stride);
 }
 void aom_highbd_fdct32x32_rd_c(const int16_t *input, tran_low_t *out,
                               int stride) {
  aom_fdct32x32_rd_c(input, out, stride);
 }
 void aom_highbd_fdct32x32_1_c(const int16_t *input, tran_low_t *out,
                              int stride) {
  aom_fdct32x32_1_c(input, out, stride);
 }
 #endif  // CONFIG_HIGHBITDEPTH
--- a/aom_dsp/mips/fwd_dct32x32_msa.c
+++ b/aom_dsp/mips/fwd_dct32x32_msa.c
@ -926,23 +926,3 @@ void aom_fdct32x32_rd_msa(const int16_t *input, int16_t *out,
                       out + (8 * i * 32));
  }
 }
 void aom_fdct32x32_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
  int sum = LD_HADD(input, stride);
  sum += LD_HADD(input + 8, stride);
  sum += LD_HADD(input + 16, stride);
  sum += LD_HADD(input + 24, stride);
  sum += LD_HADD(input + 32 * 8, stride);
  sum += LD_HADD(input + 32 * 8 + 8, stride);
  sum += LD_HADD(input + 32 * 8 + 16, stride);
  sum += LD_HADD(input + 32 * 8 + 24, stride);
  sum += LD_HADD(input + 32 * 16, stride);
  sum += LD_HADD(input + 32 * 16 + 8, stride);
  sum += LD_HADD(input + 32 * 16 + 16, stride);
  sum += LD_HADD(input + 32 * 16 + 24, stride);
  sum += LD_HADD(input + 32 * 24, stride);
  sum += LD_HADD(input + 32 * 24 + 8, stride);
  sum += LD_HADD(input + 32 * 24 + 16, stride);
  sum += LD_HADD(input + 32 * 24 + 24, stride);
  out[0] = (int16_t)(sum >> 3);
 }
--- a/aom_dsp/mips/fwd_txfm_msa.c
+++ b/aom_dsp/mips/fwd_txfm_msa.c
@ -236,11 +236,3 @@ void aom_fdct16x16_msa(const int16_t *input, int16_t *output,
    fdct16x8_1d_row((&tmp_buf[0] + (128 * i)), (output + (128 * i)));
  }
 }
 void aom_fdct16x16_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
  int sum = LD_HADD(input, stride);
  sum += LD_HADD(input + 8, stride);
  sum += LD_HADD(input + 16 * 8, stride);
  sum += LD_HADD(input + 16 * 8 + 8, stride);
  out[0] = (int16_t)(sum >> 1);
 }
--- a/aom_dsp/x86/fwd_txfm_sse2.c
+++ b/aom_dsp/x86/fwd_txfm_sse2.c
@ -85,147 +85,6 @@ void aom_fdct8x8_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
  output[0] = (tran_low_t)_mm_cvtsi128_si32(in1);
 }
 void aom_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output,
                          int stride) {
  __m128i in0, in1, in2, in3;
  __m128i u0, u1;
  __m128i sum = _mm_setzero_si128();
  int i;
  for (i = 0; i < 2; ++i) {
    in0 = _mm_load_si128((const __m128i *)(input + 0 * stride + 0));
    in1 = _mm_load_si128((const __m128i *)(input + 0 * stride + 8));
    in2 = _mm_load_si128((const __m128i *)(input + 1 * stride + 0));
    in3 = _mm_load_si128((const __m128i *)(input + 1 * stride + 8));
    u0 = _mm_add_epi16(in0, in1);
    u1 = _mm_add_epi16(in2, in3);
    sum = _mm_add_epi16(sum, u0);
    in0 = _mm_load_si128((const __m128i *)(input + 2 * stride + 0));
    in1 = _mm_load_si128((const __m128i *)(input + 2 * stride + 8));
    in2 = _mm_load_si128((const __m128i *)(input + 3 * stride + 0));
    in3 = _mm_load_si128((const __m128i *)(input + 3 * stride + 8));
    sum = _mm_add_epi16(sum, u1);
    u0 = _mm_add_epi16(in0, in1);
    u1 = _mm_add_epi16(in2, in3);
    sum = _mm_add_epi16(sum, u0);
    in0 = _mm_load_si128((const __m128i *)(input + 4 * stride + 0));
    in1 = _mm_load_si128((const __m128i *)(input + 4 * stride + 8));
    in2 = _mm_load_si128((const __m128i *)(input + 5 * stride + 0));
    in3 = _mm_load_si128((const __m128i *)(input + 5 * stride + 8));
    sum = _mm_add_epi16(sum, u1);
    u0 = _mm_add_epi16(in0, in1);
    u1 = _mm_add_epi16(in2, in3);
    sum = _mm_add_epi16(sum, u0);
    in0 = _mm_load_si128((const __m128i *)(input + 6 * stride + 0));
    in1 = _mm_load_si128((const __m128i *)(input + 6 * stride + 8));
    in2 = _mm_load_si128((const __m128i *)(input + 7 * stride + 0));
    in3 = _mm_load_si128((const __m128i *)(input + 7 * stride + 8));
    sum = _mm_add_epi16(sum, u1);
    u0 = _mm_add_epi16(in0, in1);
    u1 = _mm_add_epi16(in2, in3);
    sum = _mm_add_epi16(sum, u0);
    sum = _mm_add_epi16(sum, u1);
    input += 8 * stride;
  }
  u0 = _mm_setzero_si128();
  in0 = _mm_unpacklo_epi16(u0, sum);
  in1 = _mm_unpackhi_epi16(u0, sum);
  in0 = _mm_srai_epi32(in0, 16);
  in1 = _mm_srai_epi32(in1, 16);
  sum = _mm_add_epi32(in0, in1);
  in0 = _mm_unpacklo_epi32(sum, u0);
  in1 = _mm_unpackhi_epi32(sum, u0);
  sum = _mm_add_epi32(in0, in1);
  in0 = _mm_srli_si128(sum, 8);
  in1 = _mm_add_epi32(sum, in0);
  in1 = _mm_srai_epi32(in1, 1);
  output[0] = (tran_low_t)_mm_cvtsi128_si32(in1);
 }
 void aom_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output,
                          int stride) {
  __m128i in0, in1, in2, in3;
  __m128i u0, u1;
  __m128i sum = _mm_setzero_si128();
  int i;
  for (i = 0; i < 8; ++i) {
    in0 = _mm_load_si128((const __m128i *)(input + 0));
    in1 = _mm_load_si128((const __m128i *)(input + 8));
    in2 = _mm_load_si128((const __m128i *)(input + 16));
    in3 = _mm_load_si128((const __m128i *)(input + 24));
    input += stride;
    u0 = _mm_add_epi16(in0, in1);
    u1 = _mm_add_epi16(in2, in3);
    sum = _mm_add_epi16(sum, u0);
    in0 = _mm_load_si128((const __m128i *)(input + 0));
    in1 = _mm_load_si128((const __m128i *)(input + 8));
    in2 = _mm_load_si128((const __m128i *)(input + 16));
    in3 = _mm_load_si128((const __m128i *)(input + 24));
    input += stride;
    sum = _mm_add_epi16(sum, u1);
    u0 = _mm_add_epi16(in0, in1);
    u1 = _mm_add_epi16(in2, in3);
    sum = _mm_add_epi16(sum, u0);
    in0 = _mm_load_si128((const __m128i *)(input + 0));
    in1 = _mm_load_si128((const __m128i *)(input + 8));
    in2 = _mm_load_si128((const __m128i *)(input + 16));
    in3 = _mm_load_si128((const __m128i *)(input + 24));
    input += stride;
    sum = _mm_add_epi16(sum, u1);
    u0 = _mm_add_epi16(in0, in1);
    u1 = _mm_add_epi16(in2, in3);
    sum = _mm_add_epi16(sum, u0);
    in0 = _mm_load_si128((const __m128i *)(input + 0));
    in1 = _mm_load_si128((const __m128i *)(input + 8));
    in2 = _mm_load_si128((const __m128i *)(input + 16));
    in3 = _mm_load_si128((const __m128i *)(input + 24));
    input += stride;
    sum = _mm_add_epi16(sum, u1);
    u0 = _mm_add_epi16(in0, in1);
    u1 = _mm_add_epi16(in2, in3);
    sum = _mm_add_epi16(sum, u0);
    sum = _mm_add_epi16(sum, u1);
  }
  u0 = _mm_setzero_si128();
  in0 = _mm_unpacklo_epi16(u0, sum);
  in1 = _mm_unpackhi_epi16(u0, sum);
  in0 = _mm_srai_epi32(in0, 16);
  in1 = _mm_srai_epi32(in1, 16);
  sum = _mm_add_epi32(in0, in1);
  in0 = _mm_unpacklo_epi32(sum, u0);
  in1 = _mm_unpackhi_epi32(sum, u0);
  sum = _mm_add_epi32(in0, in1);
  in0 = _mm_srli_si128(sum, 8);
  in1 = _mm_add_epi32(sum, in0);
  in1 = _mm_srai_epi32(in1, 3);
  output[0] = (tran_low_t)_mm_cvtsi128_si32(in1);
 }
 #define DCT_HIGH_BIT_DEPTH 0
 #define FDCT4x4_2D aom_fdct4x4_sse2
 #define FDCT8x8_2D aom_fdct8x8_sse2
--- a/av1/encoder/x86/hybrid_fwd_txfm_avx2.c
+++ b/av1/encoder/x86/hybrid_fwd_txfm_avx2.c
@ -18,51 +18,6 @@
 #include "aom_dsp/txfm_common.h"
 #include "aom_dsp/x86/txfm_common_avx2.h"
 static int32_t get_16x16_sum(const int16_t *input, int stride) {
  __m256i r0, r1, r2, r3, u0, u1;
  __m256i zero = _mm256_setzero_si256();
  __m256i sum = _mm256_setzero_si256();
  const int16_t *blockBound = input + (stride << 4);
  __m128i v0, v1;
  while (input < blockBound) {
    r0 = _mm256_loadu_si256((__m256i const *)input);
    r1 = _mm256_loadu_si256((__m256i const *)(input + stride));
    r2 = _mm256_loadu_si256((__m256i const *)(input + 2 * stride));
    r3 = _mm256_loadu_si256((__m256i const *)(input + 3 * stride));
    u0 = _mm256_add_epi16(r0, r1);
    u1 = _mm256_add_epi16(r2, r3);
    sum = _mm256_add_epi16(sum, u0);
    sum = _mm256_add_epi16(sum, u1);
    input += stride << 2;
  }
  // unpack 16 int16_t into 2x8 int32_t
  u0 = _mm256_unpacklo_epi16(zero, sum);
  u1 = _mm256_unpackhi_epi16(zero, sum);
  u0 = _mm256_srai_epi32(u0, 16);
  u1 = _mm256_srai_epi32(u1, 16);
  sum = _mm256_add_epi32(u0, u1);
  u0 = _mm256_srli_si256(sum, 8);
  u1 = _mm256_add_epi32(sum, u0);
  v0 = _mm_add_epi32(_mm256_extracti128_si256(u1, 1),
                     _mm256_castsi256_si128(u1));
  v1 = _mm_srli_si128(v0, 4);
  v0 = _mm_add_epi32(v0, v1);
  return (int32_t)_mm_extract_epi32(v0, 0);
 }
 void aom_fdct16x16_1_avx2(const int16_t *input, tran_low_t *output,
                          int stride) {
  int32_t dc = get_16x16_sum(input, stride);
  output[0] = (tran_low_t)(dc >> 1);
  _mm256_zeroupper();
 }
 static INLINE void load_buffer_16x16(const int16_t *input, int stride,
                                     int flipud, int fliplr, __m256i *in) {
  if (!flipud) {
@ -1084,22 +1039,6 @@ void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride,
  _mm256_zeroupper();
 }
 void aom_fdct32x32_1_avx2(const int16_t *input, tran_low_t *output,
                          int stride) {
  // left and upper corner
  int32_t sum = get_16x16_sum(input, stride);
  // right and upper corner
  sum += get_16x16_sum(input + 16, stride);
  // left and lower corner
  sum += get_16x16_sum(input + (stride << 4), stride);
  // right and lower corner
  sum += get_16x16_sum(input + (stride << 4) + 16, stride);
  sum >>= 3;
  output[0] = (tran_low_t)sum;
  _mm256_zeroupper();
 }
 static void mm256_vectors_swap(__m256i *a0, __m256i *a1, const int size) {
  int i = 0;
  __m256i temp;
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@ -796,11 +796,6 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 1, AOM_BITS_8),
        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 2, AOM_BITS_8),
        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 3, AOM_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    C, PartialTrans16x16Test,
    ::testing::Values(make_tuple(&aom_highbd_fdct16x16_1_c, AOM_BITS_8),
                      make_tuple(&aom_highbd_fdct16x16_1_c, AOM_BITS_10),
                      make_tuple(&aom_highbd_fdct16x16_1_c, AOM_BITS_12)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans16x16HT,
@ -809,9 +804,6 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 1, AOM_BITS_8),
        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 2, AOM_BITS_8),
        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 3, AOM_BITS_8)));
 INSTANTIATE_TEST_CASE_P(C, PartialTrans16x16Test,
                        ::testing::Values(make_tuple(&aom_fdct16x16_1_c,
                                                     AOM_BITS_8)));
 #endif  // CONFIG_HIGHBITDEPTH
 #if HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH
@ -836,17 +828,8 @@ INSTANTIATE_TEST_CASE_P(
                                 2, AOM_BITS_8),
                      make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2,
                                 3, AOM_BITS_8)));
 INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test,
                        ::testing::Values(make_tuple(&aom_fdct16x16_1_sse2,
                                                     AOM_BITS_8)));
 #endif  // HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
 #if HAVE_AVX2 && !CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(AVX2, PartialTrans16x16Test,
                        ::testing::Values(make_tuple(&aom_fdct16x16_1_avx2,
                                                     AOM_BITS_8)));
 #endif  // HAVE_AVX2 && !CONFIG_HIGHBITDEPTH
 #if HAVE_SSE2 && CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(SSE2, Trans16x16DCT,
                        ::testing::Values(make_tuple(&aom_fdct16x16_sse2,
@ -860,14 +843,6 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c, 2, AOM_BITS_8),
        make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c, 3,
                   AOM_BITS_8)));
 // TODO(luoyi):
 // For this test case, we should test function: aom_highbd_fdct16x16_1_sse2.
 // However this function is not available yet. if we mistakely test
 // aom_fdct16x16_1_sse2, it could only pass AOM_BITS_8/AOM_BITS_10 but not
 // AOM_BITS_12.
 INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test,
                        ::testing::Values(make_tuple(&aom_fdct16x16_1_sse2,
                                                     AOM_BITS_8)));
 #endif  // HAVE_SSE2 && CONFIG_HIGHBITDEPTH
 #if HAVE_MSA && !CONFIG_HIGHBITDEPTH
@ -886,8 +861,5 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa, 3,
                   AOM_BITS_8)));
 #endif  // !CONFIG_EXT_TX
 INSTANTIATE_TEST_CASE_P(MSA, PartialTrans16x16Test,
                        ::testing::Values(make_tuple(&aom_fdct16x16_1_msa,
                                                     AOM_BITS_8)));
 #endif  // HAVE_MSA && !CONFIG_HIGHBITDEPTH
 }  // namespace
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@ -350,11 +350,6 @@ INSTANTIATE_TEST_CASE_P(
                                 AOM_BITS_8),
                      make_tuple(&aom_fdct32x32_rd_c, &aom_idct32x32_1024_add_c,
                                 1, AOM_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    C, PartialTrans32x32Test,
    ::testing::Values(make_tuple(&aom_highbd_fdct32x32_1_c, AOM_BITS_8),
                      make_tuple(&aom_highbd_fdct32x32_1_c, AOM_BITS_10),
                      make_tuple(&aom_highbd_fdct32x32_1_c, AOM_BITS_12)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans32x32Test,
@ -362,9 +357,6 @@ INSTANTIATE_TEST_CASE_P(
                                 AOM_BITS_8),
                      make_tuple(&aom_fdct32x32_rd_c, &aom_idct32x32_1024_add_c,
                                 1, AOM_BITS_8)));
 INSTANTIATE_TEST_CASE_P(C, PartialTrans32x32Test,
                        ::testing::Values(make_tuple(&aom_fdct32x32_1_c,
                                                     AOM_BITS_8)));
 #endif  // CONFIG_HIGHBITDEPTH
 #if HAVE_NEON && !CONFIG_HIGHBITDEPTH
@ -383,17 +375,8 @@ INSTANTIATE_TEST_CASE_P(
                                 &aom_idct32x32_1024_add_sse2, 0, AOM_BITS_8),
                      make_tuple(&aom_fdct32x32_rd_sse2,
                                 &aom_idct32x32_1024_add_sse2, 1, AOM_BITS_8)));
 INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
                        ::testing::Values(make_tuple(&aom_fdct32x32_1_sse2,
                                                     AOM_BITS_8)));
 #endif  // HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
 #if HAVE_AVX2 && !CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(AVX2, PartialTrans32x32Test,
                        ::testing::Values(make_tuple(&aom_fdct32x32_1_avx2,
                                                     AOM_BITS_8)));
 #endif  // HAVE_AVX2 && !CONFIG_HIGHBITDEPTH
 #if HAVE_SSE2 && CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans32x32Test,
@ -401,9 +384,6 @@ INSTANTIATE_TEST_CASE_P(
                                 0, AOM_BITS_8),
                      make_tuple(&aom_fdct32x32_rd_sse2,
                                 &aom_idct32x32_1024_add_c, 1, AOM_BITS_8)));
 INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
                        ::testing::Values(make_tuple(&aom_fdct32x32_1_sse2,
                                                     AOM_BITS_8)));
 #endif  // HAVE_SSE2 && CONFIG_HIGHBITDEPTH
 #if HAVE_AVX2 && !CONFIG_HIGHBITDEPTH
@ -431,8 +411,5 @@ INSTANTIATE_TEST_CASE_P(
                                 &aom_idct32x32_1024_add_msa, 0, AOM_BITS_8),
                      make_tuple(&aom_fdct32x32_rd_msa,
                                 &aom_idct32x32_1024_add_msa, 1, AOM_BITS_8)));
 INSTANTIATE_TEST_CASE_P(MSA, PartialTrans32x32Test,
                        ::testing::Values(make_tuple(&aom_fdct32x32_1_msa,
                                                     AOM_BITS_8)));
 #endif  // HAVE_MSA && !CONFIG_HIGHBITDEPTH
 }  // namespace