aom/aom_dsp/mips/avg_msa.c

/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

#include "./aom_dsp_rtcd.h"
#include "aom_dsp/mips/macros_msa.h"

uint32_t aom_avg_8x8_msa(const uint8_t *src, int32_t src_stride) {
  uint32_t sum_out;
  v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
  v8u16 sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
  v4u32 sum = { 0 };

  LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
  HADD_UB4_UH(src0, src1, src2, src3, sum0, sum1, sum2, sum3);
  HADD_UB4_UH(src4, src5, src6, src7, sum4, sum5, sum6, sum7);
  ADD4(sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum0, sum2, sum4, sum6);
  ADD2(sum0, sum2, sum4, sum6, sum0, sum4);
  sum0 += sum4;

  sum = __msa_hadd_u_w(sum0, sum0);
  sum0 = (v8u16)__msa_pckev_h((v8i16)sum, (v8i16)sum);
  sum = __msa_hadd_u_w(sum0, sum0);
  sum = (v4u32)__msa_srari_w((v4i32)sum, 6);
  sum_out = __msa_copy_u_w((v4i32)sum, 0);

  return sum_out;
}

uint32_t aom_avg_4x4_msa(const uint8_t *src, int32_t src_stride) {
  uint32_t sum_out;
  uint32_t src0, src1, src2, src3;
  v16u8 vec = { 0 };
  v8u16 sum0;
  v4u32 sum1;
  v2u64 sum2;

  LW4(src, src_stride, src0, src1, src2, src3);
  INSERT_W4_UB(src0, src1, src2, src3, vec);

  sum0 = __msa_hadd_u_h(vec, vec);
  sum1 = __msa_hadd_u_w(sum0, sum0);
  sum0 = (v8u16)__msa_pckev_h((v8i16)sum1, (v8i16)sum1);
  sum1 = __msa_hadd_u_w(sum0, sum0);
  sum2 = __msa_hadd_u_d(sum1, sum1);
  sum1 = (v4u32)__msa_srari_w((v4i32)sum2, 4);
  sum_out = __msa_copy_u_w((v4i32)sum1, 0);

  return sum_out;
}
Fork VP9 and VP10 codebase This commit folks the VP9 and VP10 codebase and makes libvpx support VP8, VP9, and VP10. Change-Id: I81782e0b809acb3c9844bee8c8ec8f4d5e8fa356 2015-08-06 05:00:31 +03:00			`/*`
Change to use aom copyright notice This minimize code differences between AOM master and nextgenv2 Change-Id: If144865bdf3ef0818e7aac11018b9e786444c550 2016-09-02 00:32:49 +03:00			`* Copyright (c) 2016, Alliance for Open Media. All rights reserved`
Fork VP9 and VP10 codebase This commit folks the VP9 and VP10 codebase and makes libvpx support VP8, VP9, and VP10. Change-Id: I81782e0b809acb3c9844bee8c8ec8f4d5e8fa356 2015-08-06 05:00:31 +03:00			`*`
Change to use aom copyright notice This minimize code differences between AOM master and nextgenv2 Change-Id: If144865bdf3ef0818e7aac11018b9e786444c550 2016-09-02 00:32:49 +03:00			`* This source code is subject to the terms of the BSD 2 Clause License and`
			`* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License`
			`* was not distributed with this source code in the LICENSE file, you can`
			`* obtain it at www.aomedia.org/license/software. If the Alliance for Open`
			`* Media Patent License 1.0 was not distributed with this source code in the`
			`* PATENTS file, you can obtain it at www.aomedia.org/license/patent.`
Fork VP9 and VP10 codebase This commit folks the VP9 and VP10 codebase and makes libvpx support VP8, VP9, and VP10. Change-Id: I81782e0b809acb3c9844bee8c8ec8f4d5e8fa356 2015-08-06 05:00:31 +03:00			`*/`

Port renaming changes from AOMedia Cherry-Picked the following commits: 0defd8f Changed "WebM" to "AOMedia" & "webm" to "aomedia" 54e6676 Replace "VPx" by "AVx" 5082a36 Change "Vpx" to "Avx" 7df44f1 Replace "Vp9" w/ "Av1" 967f722 Remove kVp9CodecId 828f30c Change "Vp8" to "AOM" 030b5ff AUTHORS regenerated 2524cae Add ref-mv experimental flag 016762b Change copyright notice to AOMedia form 81e5526 Replace vp9 w/ av1 9b94565 Add missing files fa8ca9f Change "vp9" to "av1" ec838b7 Convert "vp8" to "aom" 80edfa0 Change "VP9" to "AV1" d1a11fb Change "vp8" to "aom" 7b58251 Point to WebM test data dd1a5c8 Replace "VP8" with "AOM" ff00fc0 Change "VPX" to "AOM" 01dee0b Change "vp10" to "av1" in source code cebe6f0 Convert "vpx" to "aom" 17b0567 rename vp10.mk to av1_.mk fe5f8a8 rename files vp10_* to av1_* Change-Id: I6fc3d18eb11fc171e46140c836ad5339cf6c9419 2016-08-31 00:01:10 +03:00			`#include "./aom_dsp_rtcd.h"`
Port folder renaming changes from AOM Manually cherry-picked commits: ceef058 libvpx->libaom part2 3d26d91 libvpx -> libaom cfea7dd vp10/ -> av1/ 3a8eff7 Fix a build issue for a test bf4202e Rename vpx to aom Change-Id: I1b0eb5a40796e3aaf41c58984b4229a439a597dc 2016-08-23 02:08:15 +03:00			`#include "aom_dsp/mips/macros_msa.h"`
Fork VP9 and VP10 codebase This commit folks the VP9 and VP10 codebase and makes libvpx support VP8, VP9, and VP10. Change-Id: I81782e0b809acb3c9844bee8c8ec8f4d5e8fa356 2015-08-06 05:00:31 +03:00
Port renaming changes from AOMedia Cherry-Picked the following commits: 0defd8f Changed "WebM" to "AOMedia" & "webm" to "aomedia" 54e6676 Replace "VPx" by "AVx" 5082a36 Change "Vpx" to "Avx" 7df44f1 Replace "Vp9" w/ "Av1" 967f722 Remove kVp9CodecId 828f30c Change "Vp8" to "AOM" 030b5ff AUTHORS regenerated 2524cae Add ref-mv experimental flag 016762b Change copyright notice to AOMedia form 81e5526 Replace vp9 w/ av1 9b94565 Add missing files fa8ca9f Change "vp9" to "av1" ec838b7 Convert "vp8" to "aom" 80edfa0 Change "VP9" to "AV1" d1a11fb Change "vp8" to "aom" 7b58251 Point to WebM test data dd1a5c8 Replace "VP8" with "AOM" ff00fc0 Change "VPX" to "AOM" 01dee0b Change "vp10" to "av1" in source code cebe6f0 Convert "vpx" to "aom" 17b0567 rename vp10.mk to av1_.mk fe5f8a8 rename files vp10_* to av1_* Change-Id: I6fc3d18eb11fc171e46140c836ad5339cf6c9419 2016-08-31 00:01:10 +03:00			`uint32_t aom_avg_8x8_msa(const uint8_t *src, int32_t src_stride) {`
Fork VP9 and VP10 codebase This commit folks the VP9 and VP10 codebase and makes libvpx support VP8, VP9, and VP10. Change-Id: I81782e0b809acb3c9844bee8c8ec8f4d5e8fa356 2015-08-06 05:00:31 +03:00			`uint32_t sum_out;`
			`v16u8 src0, src1, src2, src3, src4, src5, src6, src7;`
			`v8u16 sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7;`
			`v4u32 sum = { 0 };`

			`LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);`
			`HADD_UB4_UH(src0, src1, src2, src3, sum0, sum1, sum2, sum3);`
			`HADD_UB4_UH(src4, src5, src6, src7, sum4, sum5, sum6, sum7);`
			`ADD4(sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum0, sum2, sum4, sum6);`
			`ADD2(sum0, sum2, sum4, sum6, sum0, sum4);`
			`sum0 += sum4;`

			`sum = __msa_hadd_u_w(sum0, sum0);`
			`sum0 = (v8u16)__msa_pckev_h((v8i16)sum, (v8i16)sum);`
			`sum = __msa_hadd_u_w(sum0, sum0);`
			`sum = (v4u32)__msa_srari_w((v4i32)sum, 6);`
			`sum_out = __msa_copy_u_w((v4i32)sum, 0);`

			`return sum_out;`
			`}`

Port renaming changes from AOMedia Cherry-Picked the following commits: 0defd8f Changed "WebM" to "AOMedia" & "webm" to "aomedia" 54e6676 Replace "VPx" by "AVx" 5082a36 Change "Vpx" to "Avx" 7df44f1 Replace "Vp9" w/ "Av1" 967f722 Remove kVp9CodecId 828f30c Change "Vp8" to "AOM" 030b5ff AUTHORS regenerated 2524cae Add ref-mv experimental flag 016762b Change copyright notice to AOMedia form 81e5526 Replace vp9 w/ av1 9b94565 Add missing files fa8ca9f Change "vp9" to "av1" ec838b7 Convert "vp8" to "aom" 80edfa0 Change "VP9" to "AV1" d1a11fb Change "vp8" to "aom" 7b58251 Point to WebM test data dd1a5c8 Replace "VP8" with "AOM" ff00fc0 Change "VPX" to "AOM" 01dee0b Change "vp10" to "av1" in source code cebe6f0 Convert "vpx" to "aom" 17b0567 rename vp10.mk to av1_.mk fe5f8a8 rename files vp10_* to av1_* Change-Id: I6fc3d18eb11fc171e46140c836ad5339cf6c9419 2016-08-31 00:01:10 +03:00			`uint32_t aom_avg_4x4_msa(const uint8_t *src, int32_t src_stride) {`
Fork VP9 and VP10 codebase This commit folks the VP9 and VP10 codebase and makes libvpx support VP8, VP9, and VP10. Change-Id: I81782e0b809acb3c9844bee8c8ec8f4d5e8fa356 2015-08-06 05:00:31 +03:00			`uint32_t sum_out;`
			`uint32_t src0, src1, src2, src3;`
			`v16u8 vec = { 0 };`
			`v8u16 sum0;`
			`v4u32 sum1;`
			`v2u64 sum2;`

			`LW4(src, src_stride, src0, src1, src2, src3);`
			`INSERT_W4_UB(src0, src1, src2, src3, vec);`

			`sum0 = __msa_hadd_u_h(vec, vec);`
			`sum1 = __msa_hadd_u_w(sum0, sum0);`
			`sum0 = (v8u16)__msa_pckev_h((v8i16)sum1, (v8i16)sum1);`
			`sum1 = __msa_hadd_u_w(sum0, sum0);`
			`sum2 = __msa_hadd_u_d(sum1, sum1);`
			`sum1 = (v4u32)__msa_srari_w((v4i32)sum2, 4);`
			`sum_out = __msa_copy_u_w((v4i32)sum1, 0);`

			`return sum_out;`
			`}`