Merge "Simplify rounding in vp10_[fwd/inv]_txfm[1/2]d_#x#" into nextgenv2
This commit is contained in:
Коммит
1b755039c6
|
@ -31,7 +31,7 @@ static int8_t cos_bit[12] = {14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14};
|
|||
static int8_t range_bit[12] = {32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32};
|
||||
|
||||
TEST(vp10_fwd_txfm1d, round_shift) {
|
||||
EXPECT_EQ(round_shift(7, 1), 3);
|
||||
EXPECT_EQ(round_shift(7, 1), 4);
|
||||
EXPECT_EQ(round_shift(-7, 1), -3);
|
||||
|
||||
EXPECT_EQ(round_shift(7, 2), 2);
|
||||
|
@ -46,17 +46,6 @@ TEST(vp10_fwd_txfm1d, get_max_bit) {
|
|||
EXPECT_EQ(max_bit, 3);
|
||||
}
|
||||
|
||||
TEST(vp10_fwd_txfm1d, half_btf) {
|
||||
int32_t max = (1 << 15) - 1;
|
||||
int32_t w0 = max;
|
||||
int32_t in0 = max;
|
||||
int32_t w1 = max;
|
||||
int32_t in1 = max;
|
||||
int32_t result_32 = half_btf(w0, in0, w1, in1, 0);
|
||||
int64_t result_64 = (int64_t)w0 * (int64_t)in0 + (int64_t)w1 * (int64_t)in1;
|
||||
EXPECT_EQ(result_32, result_64);
|
||||
}
|
||||
|
||||
TEST(vp10_fwd_txfm1d, cospi_arr) {
|
||||
for (int i = 0; i < 7; i++) {
|
||||
for (int j = 0; j < 64; j++) {
|
||||
|
|
|
@ -81,23 +81,7 @@ static const int32_t cospi_arr[7][64] =
|
|||
12785, 11204, 9616, 8022, 6424, 4821, 3216, 1608}};
|
||||
|
||||
static INLINE int32_t round_shift(int32_t value, int bit) {
|
||||
// For value >= 0,
|
||||
// there are twe version of rounding
|
||||
// 1) (value + (1 << (bit - 1)) - 1) >> bit
|
||||
// 2) (value + (1 << (bit - 1))) >> bit
|
||||
// boath methods are mild unbiased
|
||||
// however, the first version has slightly advantage because
|
||||
// it rounds number toward zero.
|
||||
// For value < 0, we also choose the version that rounds number
|
||||
// toward zero.
|
||||
if (bit > 0) {
|
||||
if (value >= 0)
|
||||
return (value + (1 << (bit - 1)) - 1) >> bit;
|
||||
else
|
||||
return ((value - (1 << (bit - 1))) >> bit) + 1;
|
||||
} else {
|
||||
return value << (-bit);
|
||||
}
|
||||
return (value + (1 << (bit - 1))) >> bit;
|
||||
}
|
||||
|
||||
static INLINE void round_shift_array(int32_t *arr, int size, int bit) {
|
||||
|
@ -105,8 +89,14 @@ static INLINE void round_shift_array(int32_t *arr, int size, int bit) {
|
|||
if (bit == 0) {
|
||||
return;
|
||||
} else {
|
||||
for (i = 0; i < size; i++) {
|
||||
arr[i] = round_shift(arr[i], bit);
|
||||
if (bit > 0) {
|
||||
for (i = 0; i < size; i++) {
|
||||
arr[i] = round_shift(arr[i], bit);
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < size; i++) {
|
||||
arr[i] = arr[i] << (-bit);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -81,32 +81,20 @@ static INLINE void transpose_32(int txfm_size, const __m128i* input,
|
|||
}
|
||||
}
|
||||
|
||||
#define mullo_epi32(a, b) \
|
||||
({ \
|
||||
#define mullo_epi32(a, b) \
|
||||
({ \
|
||||
__m128i tmp1 = _mm_mul_epu32(a, b); \
|
||||
__m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)); \
|
||||
_mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), \
|
||||
_mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0))); \
|
||||
_mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), \
|
||||
_mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0))); \
|
||||
})
|
||||
|
||||
#define round_shift_32_simple_sse2(input, bit) \
|
||||
({ \
|
||||
__m128i round = _mm_set1_epi32((1 << (bit - 1)) - 1); \
|
||||
__m128i tmp1 = _mm_add_epi32(input, round); \
|
||||
_mm_srai_epi32(tmp1, bit); \
|
||||
})
|
||||
|
||||
#define round_shift_32_sse2(vec, bit) \
|
||||
({ \
|
||||
__m128i sign, tmp, round; \
|
||||
sign = _mm_srai_epi32(vec, 31); \
|
||||
tmp = _mm_add_epi32(vec, sign); \
|
||||
tmp = _mm_xor_si128(tmp, sign); \
|
||||
round = _mm_set1_epi32((1 << (bit - 1)) - 1); \
|
||||
tmp = _mm_add_epi32(tmp, round); \
|
||||
tmp = _mm_srli_epi32(tmp, bit); \
|
||||
tmp = _mm_xor_si128(tmp, sign); \
|
||||
_mm_sub_epi32(tmp, sign); \
|
||||
#define round_shift_32_sse2(vec, bit) \
|
||||
({ \
|
||||
__m128i tmp, round; \
|
||||
round = _mm_set1_epi32(1 << (bit - 1)); \
|
||||
tmp = _mm_add_epi32(vec, round); \
|
||||
_mm_srai_epi32(tmp, bit); \
|
||||
})
|
||||
|
||||
#define round_shift_array_32_sse2(input, output, size, bit) \
|
||||
|
@ -128,7 +116,7 @@ static INLINE void transpose_32(int txfm_size, const __m128i* input,
|
|||
// out1 = -in1*w0 + in0*w1
|
||||
#define btf_32_sse2_type0(w0, w1, in0, in1, out0, out1, bit) \
|
||||
({ \
|
||||
__m128i ww0, ww1, in0_w0, in1_w1, in0_w1, in1_w0; \
|
||||
__m128i ww0, ww1, in0_w0, in1_w1, in0_w1, in1_w0; \
|
||||
ww0 = _mm_set1_epi32(w0); \
|
||||
ww1 = _mm_set1_epi32(w1); \
|
||||
in0_w0 = mullo_epi32(in0, ww0); \
|
||||
|
@ -145,7 +133,7 @@ static INLINE void transpose_32(int txfm_size, const __m128i* input,
|
|||
// out1 = in1*w0 - in0*w1
|
||||
#define btf_32_sse2_type1(w0, w1, in0, in1, out0, out1, bit) \
|
||||
({ \
|
||||
__m128i ww0, ww1, in0_w0, in1_w1, in0_w1, in1_w0; \
|
||||
__m128i ww0, ww1, in0_w0, in1_w1, in0_w1, in1_w0; \
|
||||
ww0 = _mm_set1_epi32(w0); \
|
||||
ww1 = _mm_set1_epi32(w1); \
|
||||
in0_w0 = mullo_epi32(in0, ww0); \
|
||||
|
|
Загрузка…
Ссылка в новой задаче