Scale the normalization factor depending on the block size

Change-Id: I0a26994bf65ea224e496b09af2ce71e1a4210433
This commit is contained in:
Jingning Han 2015-03-02 10:28:12 -08:00
Родитель 6cf7b3b240
Коммит a521008201
2 изменённых файлов: 16 добавлений и 5 удалений

Просмотреть файл

@ -32,12 +32,13 @@ unsigned int vp9_avg_4x4_c(const uint8_t *s, int p) {
void vp9_int_pro_row_c(int16_t *hbuf, uint8_t const *ref, void vp9_int_pro_row_c(int16_t *hbuf, uint8_t const *ref,
const int ref_stride, const int height) { const int ref_stride, const int height) {
int idx; int idx;
const int norm_factor = MAX(8, height >> 1);
for (idx = 0; idx < 16; ++idx) { for (idx = 0; idx < 16; ++idx) {
int i; int i;
hbuf[idx] = 0; hbuf[idx] = 0;
for (i = 0; i < height; ++i) for (i = 0; i < height; ++i)
hbuf[idx] += ref[i * ref_stride]; hbuf[idx] += ref[i * ref_stride];
hbuf[idx] /= 32; hbuf[idx] /= norm_factor;
++ref; ++ref;
} }
} }
@ -45,9 +46,10 @@ void vp9_int_pro_row_c(int16_t *hbuf, uint8_t const *ref,
int16_t vp9_int_pro_col_c(uint8_t const *ref, const int width) { int16_t vp9_int_pro_col_c(uint8_t const *ref, const int width) {
int idx; int idx;
int16_t sum = 0; int16_t sum = 0;
const int norm_factor = MAX(8, width >> 1);
for (idx = 0; idx < width; ++idx) for (idx = 0; idx < width; ++idx)
sum += ref[idx]; sum += ref[idx];
return sum / 32; return sum / norm_factor;
} }
int vp9_vector_var_c(int16_t const *ref, int16_t const *src, int vp9_vector_var_c(int16_t const *ref, int16_t const *src,

Просмотреть файл

@ -90,8 +90,16 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
s0 = _mm_adds_epu16(s0, t0); s0 = _mm_adds_epu16(s0, t0);
s1 = _mm_adds_epu16(s1, t1); s1 = _mm_adds_epu16(s1, t1);
if (height == 64) {
s0 = _mm_srai_epi16(s0, 5); s0 = _mm_srai_epi16(s0, 5);
s1 = _mm_srai_epi16(s1, 5); s1 = _mm_srai_epi16(s1, 5);
} else if (height == 32) {
s0 = _mm_srai_epi16(s0, 4);
s1 = _mm_srai_epi16(s1, 4);
} else {
s0 = _mm_srai_epi16(s0, 3);
s1 = _mm_srai_epi16(s1, 3);
}
_mm_store_si128((__m128i *)hbuf, s0); _mm_store_si128((__m128i *)hbuf, s0);
hbuf += 8; hbuf += 8;
@ -104,6 +112,7 @@ int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) {
__m128i s0 = _mm_sad_epu8(src_line, zero); __m128i s0 = _mm_sad_epu8(src_line, zero);
__m128i s1; __m128i s1;
int i; int i;
const int norm_factor = 3 + (width >> 5);
for (i = 16; i < width; i += 16) { for (i = 16; i < width; i += 16) {
ref += 16; ref += 16;
@ -115,7 +124,7 @@ int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) {
s1 = _mm_srli_si128(s0, 8); s1 = _mm_srli_si128(s0, 8);
s0 = _mm_adds_epu16(s0, s1); s0 = _mm_adds_epu16(s0, s1);
return (_mm_extract_epi16(s0, 0)) >> 5; return _mm_extract_epi16(s0, 0) >> norm_factor;
} }
int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src,