Using stride (# of elements) instead of pitch (bytes) in fdct32x32.
Just making fdct consistent with iht/idct/fht functions which all use stride (# of elements) as input argument. Change-Id: Id623c5113262655fa50f7c9d6cec9a91fcb20bb4
This commit is contained in:
Родитель
518fc282f4
Коммит
e05412fc23
|
@ -113,8 +113,7 @@ TEST_P(Trans32x32Test, AccuracyCheck) {
|
|||
test_input_block[j] = src[j] - dst[j];
|
||||
}
|
||||
|
||||
const int pitch = 64;
|
||||
REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, pitch));
|
||||
REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
|
||||
REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
|
||||
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
|
@ -150,9 +149,9 @@ TEST_P(Trans32x32Test, CoeffCheck) {
|
|||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
|
||||
const int pitch = 64;
|
||||
vp9_short_fdct32x32_c(input_block, output_ref_block, pitch);
|
||||
REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, pitch));
|
||||
const int stride = 32;
|
||||
vp9_short_fdct32x32_c(input_block, output_ref_block, stride);
|
||||
REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));
|
||||
|
||||
if (version_ == 0) {
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
|
@ -188,9 +187,9 @@ TEST_P(Trans32x32Test, MemCheck) {
|
|||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
input_extreme_block[j] = -255;
|
||||
|
||||
const int pitch = 64;
|
||||
vp9_short_fdct32x32_c(input_extreme_block, output_ref_block, pitch);
|
||||
REGISTER_STATE_CHECK(fwd_txfm_(input_extreme_block, output_block, pitch));
|
||||
const int stride = 32;
|
||||
vp9_short_fdct32x32_c(input_extreme_block, output_ref_block, stride);
|
||||
REGISTER_STATE_CHECK(fwd_txfm_(input_extreme_block, output_block, stride));
|
||||
|
||||
// The minimum quant value is 4.
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
|
|
|
@ -701,10 +701,10 @@ specialize vp9_short_fdct8x8 sse2
|
|||
prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int pitch"
|
||||
specialize vp9_short_fdct4x4 sse2
|
||||
|
||||
prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int pitch"
|
||||
prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int stride"
|
||||
specialize vp9_short_fdct32x32 sse2
|
||||
|
||||
prototype void vp9_short_fdct32x32_rd "int16_t *InputData, int16_t *OutputData, int pitch"
|
||||
prototype void vp9_short_fdct32x32_rd "int16_t *InputData, int16_t *OutputData, int stride"
|
||||
specialize vp9_short_fdct32x32_rd sse2
|
||||
|
||||
prototype void vp9_short_fdct16x16 "int16_t *InputData, int16_t *OutputData, int pitch"
|
||||
|
|
|
@ -1315,8 +1315,7 @@ static void dct32_1d(const int *input, int *output, int round) {
|
|||
output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64);
|
||||
}
|
||||
|
||||
void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) {
|
||||
int shortpitch = pitch >> 1;
|
||||
void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int stride) {
|
||||
int i, j;
|
||||
int output[32 * 32];
|
||||
|
||||
|
@ -1324,7 +1323,7 @@ void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) {
|
|||
for (i = 0; i < 32; ++i) {
|
||||
int temp_in[32], temp_out[32];
|
||||
for (j = 0; j < 32; ++j)
|
||||
temp_in[j] = input[j * shortpitch + i] * 4;
|
||||
temp_in[j] = input[j * stride + i] * 4;
|
||||
dct32_1d(temp_in, temp_out, 0);
|
||||
for (j = 0; j < 32; ++j)
|
||||
output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
|
||||
|
@ -1344,8 +1343,7 @@ void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) {
|
|||
// Note that although we use dct_32_round in dct32_1d computation flow,
|
||||
// this 2d fdct32x32 for rate-distortion optimization loop is operating
|
||||
// within 16 bits precision.
|
||||
void vp9_short_fdct32x32_rd_c(int16_t *input, int16_t *out, int pitch) {
|
||||
int shortpitch = pitch >> 1;
|
||||
void vp9_short_fdct32x32_rd_c(int16_t *input, int16_t *out, int stride) {
|
||||
int i, j;
|
||||
int output[32 * 32];
|
||||
|
||||
|
@ -1353,7 +1351,7 @@ void vp9_short_fdct32x32_rd_c(int16_t *input, int16_t *out, int pitch) {
|
|||
for (i = 0; i < 32; ++i) {
|
||||
int temp_in[32], temp_out[32];
|
||||
for (j = 0; j < 32; ++j)
|
||||
temp_in[j] = input[j * shortpitch + i] * 4;
|
||||
temp_in[j] = input[j * stride + i] * 4;
|
||||
dct32_1d(temp_in, temp_out, 0);
|
||||
for (j = 0; j < 32; ++j)
|
||||
// TODO(cd): see quality impact of only doing
|
||||
|
|
|
@ -365,9 +365,9 @@ void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize,
|
|||
yoff = 32 * (block >> twl);
|
||||
src_diff = p->src_diff + 4 * bw * yoff + xoff;
|
||||
if (x->use_lp32x32fdct)
|
||||
vp9_short_fdct32x32_rd(src_diff, coeff, bw * 8);
|
||||
vp9_short_fdct32x32_rd(src_diff, coeff, bw * 4);
|
||||
else
|
||||
vp9_short_fdct32x32(src_diff, coeff, bw * 8);
|
||||
vp9_short_fdct32x32(src_diff, coeff, bw * 4);
|
||||
vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
|
||||
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob, scan, iscan);
|
||||
|
@ -532,9 +532,9 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
|
|||
vp9_subtract_block(32, 32, src_diff, bw * 4,
|
||||
src, p->src.stride, dst, pd->dst.stride);
|
||||
if (x->use_lp32x32fdct)
|
||||
vp9_short_fdct32x32_rd(src_diff, coeff, bw * 8);
|
||||
vp9_short_fdct32x32_rd(src_diff, coeff, bw * 4);
|
||||
else
|
||||
vp9_short_fdct32x32(src_diff, coeff, bw * 8);
|
||||
vp9_short_fdct32x32(src_diff, coeff, bw * 4);
|
||||
vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
|
||||
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob, scan, iscan);
|
||||
|
|
|
@ -30,11 +30,11 @@ static INLINE __m128i k_packs_epi64(__m128i a, __m128i b) {
|
|||
#endif
|
||||
|
||||
void FDCT32x32_2D(int16_t *input,
|
||||
int16_t *output_org, int pitch) {
|
||||
int16_t *output_org, int stride) {
|
||||
// Calculate pre-multiplied strides
|
||||
const int str1 = pitch >> 1;
|
||||
const int str2 = pitch;
|
||||
const int str3 = pitch + str1;
|
||||
const int str1 = stride;
|
||||
const int str2 = 2 * stride;
|
||||
const int str3 = 2 * stride + str1;
|
||||
// We need an intermediate buffer between passes.
|
||||
DECLARE_ALIGNED(16, int16_t, intermediate[32 * 32]);
|
||||
// Constants
|
||||
|
|
Загрузка…
Ссылка в новой задаче