Merge changes Ibbfa68d6,Idb76a0e2 into experimental
* changes: Move EOB to per-plane data Move qcoeff, dqcoeff from BLOCKD to per-plane data
This commit is contained in:
Коммит
fa135d7b9e
|
@ -249,8 +249,6 @@ typedef struct {
|
|||
} MODE_INFO;
|
||||
|
||||
typedef struct blockd {
|
||||
int16_t *qcoeff;
|
||||
int16_t *dqcoeff;
|
||||
uint8_t *predictor;
|
||||
int16_t *diff;
|
||||
int16_t *dequant;
|
||||
|
@ -284,15 +282,28 @@ struct scale_factors {
|
|||
#endif
|
||||
};
|
||||
|
||||
enum { MAX_MB_PLANE = 3 };
|
||||
|
||||
struct mb_plane {
|
||||
DECLARE_ALIGNED(16, int16_t, qcoeff[64 * 64]);
|
||||
DECLARE_ALIGNED(16, int16_t, dqcoeff[64 * 64]);
|
||||
DECLARE_ALIGNED(16, uint16_t, eobs[256]);
|
||||
};
|
||||
|
||||
#define BLOCK_OFFSET(x, i, n) ((x) + (i) * (n))
|
||||
|
||||
#define MB_SUBBLOCK_FIELD(x, field, i) (\
|
||||
((i) < 16) ? BLOCK_OFFSET((x)->plane[0].field, (i), 16) : \
|
||||
((i) < 20) ? BLOCK_OFFSET((x)->plane[1].field, ((i) - 16), 16) : \
|
||||
BLOCK_OFFSET((x)->plane[2].field, ((i) - 20), 16))
|
||||
|
||||
typedef struct macroblockd {
|
||||
DECLARE_ALIGNED(16, int16_t, diff[64*64+32*32*2]); /* from idct diff */
|
||||
DECLARE_ALIGNED(16, uint8_t, predictor[384]); // unused for superblocks
|
||||
DECLARE_ALIGNED(16, int16_t, qcoeff[64*64+32*32*2]);
|
||||
DECLARE_ALIGNED(16, int16_t, dqcoeff[64*64+32*32*2]);
|
||||
DECLARE_ALIGNED(16, uint16_t, eobs[256+64*2]);
|
||||
#if CONFIG_CODE_NONZEROCOUNT
|
||||
DECLARE_ALIGNED(16, uint16_t, nzcs[256+64*2]);
|
||||
#endif
|
||||
struct mb_plane plane[MAX_MB_PLANE];
|
||||
|
||||
/* 16 Y blocks, 4 U, 4 V, each with 16 entries. */
|
||||
BLOCKD block[24];
|
||||
|
@ -372,8 +383,8 @@ typedef struct macroblockd {
|
|||
void (*itxm_add_y_block)(int16_t *q, const int16_t *dq,
|
||||
uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd);
|
||||
void (*itxm_add_uv_block)(int16_t *q, const int16_t *dq,
|
||||
uint8_t *pre, uint8_t *dst_u, uint8_t *dst_v, int stride,
|
||||
struct macroblockd *xd);
|
||||
uint8_t *pre, uint8_t *dst, int stride,
|
||||
uint16_t *eobs);
|
||||
|
||||
struct subpix_fn_table subpix;
|
||||
|
||||
|
@ -669,4 +680,32 @@ static int get_nzc_used(TX_SIZE tx_size) {
|
|||
return (tx_size >= TX_16X16);
|
||||
}
|
||||
#endif
|
||||
|
||||
struct plane_block_idx {
|
||||
int plane;
|
||||
int block;
|
||||
};
|
||||
|
||||
// TODO(jkoleszar): returning a struct so it can be used in a const context,
|
||||
// expect to refactor this further later.
|
||||
static INLINE struct plane_block_idx plane_block_idx(int y_blocks,
|
||||
int b_idx) {
|
||||
const int v_offset = y_blocks * 5 / 4;
|
||||
struct plane_block_idx res;
|
||||
|
||||
if (b_idx < y_blocks) {
|
||||
res.plane = 0;
|
||||
res.block = b_idx;
|
||||
} else if (b_idx < v_offset) {
|
||||
res.plane = 1;
|
||||
res.block = b_idx - y_blocks;
|
||||
} else {
|
||||
assert(b_idx < y_blocks * 3 / 2);
|
||||
res.plane = 2;
|
||||
res.block = b_idx - v_offset;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
#endif // VP9_COMMON_VP9_BLOCKD_H_
|
||||
|
|
|
@ -26,9 +26,12 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
|
|||
for (i = 0; i < 16; i++) {
|
||||
TX_TYPE tx_type = get_tx_type_4x4(xd, i);
|
||||
if (tx_type != DCT_DCT) {
|
||||
vp9_short_iht4x4(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type);
|
||||
vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16),
|
||||
xd->block[i].diff, 16, tx_type);
|
||||
} else {
|
||||
vp9_inverse_transform_b_4x4(xd, xd->eobs[i], xd->block[i].dqcoeff,
|
||||
vp9_inverse_transform_b_4x4(xd,
|
||||
xd->plane[0].eobs[i],
|
||||
BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16),
|
||||
xd->block[i].diff, 32);
|
||||
}
|
||||
}
|
||||
|
@ -37,8 +40,14 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
|
|||
void vp9_inverse_transform_mbuv_4x4(MACROBLOCKD *xd) {
|
||||
int i;
|
||||
|
||||
for (i = 16; i < 24; i++) {
|
||||
vp9_inverse_transform_b_4x4(xd, xd->eobs[i], xd->block[i].dqcoeff,
|
||||
for (i = 16; i < 20; i++) {
|
||||
vp9_inverse_transform_b_4x4(xd, xd->plane[1].eobs[i - 16],
|
||||
BLOCK_OFFSET(xd->plane[1].dqcoeff, i - 16, 16),
|
||||
xd->block[i].diff, 16);
|
||||
}
|
||||
for (i = 20; i < 24; i++) {
|
||||
vp9_inverse_transform_b_4x4(xd, xd->plane[2].eobs[i - 20],
|
||||
BLOCK_OFFSET(xd->plane[2].dqcoeff, i - 20, 16),
|
||||
xd->block[i].diff, 16);
|
||||
}
|
||||
}
|
||||
|
@ -60,19 +69,20 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) {
|
|||
for (i = 0; i < 9; i += 8) {
|
||||
TX_TYPE tx_type = get_tx_type_8x8(xd, i);
|
||||
if (tx_type != DCT_DCT) {
|
||||
vp9_short_iht8x8(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type);
|
||||
vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16),
|
||||
xd->block[i].diff, 16, tx_type);
|
||||
} else {
|
||||
vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0],
|
||||
vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16),
|
||||
&blockd[i].diff[0], 32);
|
||||
}
|
||||
}
|
||||
for (i = 2; i < 11; i += 8) {
|
||||
TX_TYPE tx_type = get_tx_type_8x8(xd, i);
|
||||
if (tx_type != DCT_DCT) {
|
||||
vp9_short_iht8x8(xd->block[i + 2].dqcoeff, xd->block[i].diff,
|
||||
16, tx_type);
|
||||
vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, i + 2, 16),
|
||||
xd->block[i].diff, 16, tx_type);
|
||||
} else {
|
||||
vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0],
|
||||
vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, i + 2, 16),
|
||||
&blockd[i].diff[0], 32);
|
||||
}
|
||||
}
|
||||
|
@ -82,8 +92,12 @@ void vp9_inverse_transform_mbuv_8x8(MACROBLOCKD *xd) {
|
|||
int i;
|
||||
BLOCKD *blockd = xd->block;
|
||||
|
||||
for (i = 16; i < 24; i += 4) {
|
||||
vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0],
|
||||
for (i = 16; i < 20; i += 4) {
|
||||
vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[1].dqcoeff, i - 16, 16),
|
||||
&blockd[i].diff[0], 16);
|
||||
}
|
||||
for (i = 20; i < 24; i += 4) {
|
||||
vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[2].dqcoeff, i - 20, 16),
|
||||
&blockd[i].diff[0], 16);
|
||||
}
|
||||
}
|
||||
|
@ -102,9 +116,10 @@ void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd) {
|
|||
BLOCKD *bd = &xd->block[0];
|
||||
TX_TYPE tx_type = get_tx_type_16x16(xd, 0);
|
||||
if (tx_type != DCT_DCT) {
|
||||
vp9_short_iht16x16(bd->dqcoeff, bd->diff, 16, tx_type);
|
||||
vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, 0, 16),
|
||||
bd->diff, 16, tx_type);
|
||||
} else {
|
||||
vp9_inverse_transform_b_16x16(&xd->block[0].dqcoeff[0],
|
||||
vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, 0, 16),
|
||||
&xd->block[0].diff[0], 32);
|
||||
}
|
||||
}
|
||||
|
@ -115,7 +130,7 @@ void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd) {
|
|||
}
|
||||
|
||||
void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd) {
|
||||
vp9_short_idct32x32(xd->dqcoeff, xd->diff, 64);
|
||||
vp9_short_idct32x32(BLOCK_OFFSET(xd->plane[0].dqcoeff, 0, 16), xd->diff, 64);
|
||||
}
|
||||
|
||||
void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd) {
|
||||
|
@ -126,11 +141,11 @@ void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd) {
|
|||
const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 8 + x_idx) * 4);
|
||||
|
||||
if (tx_type == DCT_DCT) {
|
||||
vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256,
|
||||
vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256),
|
||||
xd->diff + x_idx * 16 + y_idx * 32 * 16,
|
||||
64);
|
||||
} else {
|
||||
vp9_short_iht16x16(xd->dqcoeff + n * 256,
|
||||
vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256),
|
||||
xd->diff + x_idx * 16 + y_idx * 32 * 16, 32, tx_type);
|
||||
}
|
||||
}
|
||||
|
@ -144,10 +159,10 @@ void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd) {
|
|||
const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2);
|
||||
|
||||
if (tx_type == DCT_DCT) {
|
||||
vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64,
|
||||
vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64),
|
||||
xd->diff + x_idx * 8 + y_idx * 32 * 8, 64);
|
||||
} else {
|
||||
vp9_short_iht8x8(xd->dqcoeff + n * 64,
|
||||
vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64),
|
||||
xd->diff + x_idx * 8 + y_idx * 32 * 8, 32, tx_type);
|
||||
}
|
||||
}
|
||||
|
@ -161,19 +176,20 @@ void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd) {
|
|||
const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx);
|
||||
|
||||
if (tx_type == DCT_DCT) {
|
||||
vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16,
|
||||
vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[n],
|
||||
BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16),
|
||||
xd->diff + x_idx * 4 + y_idx * 4 * 32, 64);
|
||||
} else {
|
||||
vp9_short_iht4x4(xd->dqcoeff + n * 16,
|
||||
vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16),
|
||||
xd->diff + x_idx * 4 + y_idx * 4 * 32, 32, tx_type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd) {
|
||||
vp9_inverse_transform_b_16x16(xd->dqcoeff + 1024,
|
||||
vp9_inverse_transform_b_16x16(xd->plane[1].dqcoeff,
|
||||
xd->diff + 1024, 32);
|
||||
vp9_inverse_transform_b_16x16(xd->dqcoeff + 1280,
|
||||
vp9_inverse_transform_b_16x16(xd->plane[2].dqcoeff,
|
||||
xd->diff + 1280, 32);
|
||||
}
|
||||
|
||||
|
@ -183,10 +199,10 @@ void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd) {
|
|||
for (n = 0; n < 4; n++) {
|
||||
const int x_idx = n & 1, y_idx = n >> 1;
|
||||
|
||||
vp9_inverse_transform_b_8x8(xd->dqcoeff + 1024 + n * 64,
|
||||
vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 64),
|
||||
xd->diff + 1024 + x_idx * 8 + y_idx * 16 * 8,
|
||||
32);
|
||||
vp9_inverse_transform_b_8x8(xd->dqcoeff + 1280 + n * 64,
|
||||
vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 64),
|
||||
xd->diff + 1280 + x_idx * 8 + y_idx * 16 * 8,
|
||||
32);
|
||||
}
|
||||
|
@ -198,12 +214,12 @@ void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd) {
|
|||
for (n = 0; n < 16; n++) {
|
||||
const int x_idx = n & 3, y_idx = n >> 2;
|
||||
|
||||
vp9_inverse_transform_b_4x4(xd, xd->eobs[64 + n],
|
||||
xd->dqcoeff + 1024 + n * 16,
|
||||
vp9_inverse_transform_b_4x4(xd, xd->plane[1].eobs[n],
|
||||
BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 16),
|
||||
xd->diff + 1024 + x_idx * 4 + y_idx * 16 * 4,
|
||||
32);
|
||||
vp9_inverse_transform_b_4x4(xd, xd->eobs[64 + 16 + n],
|
||||
xd->dqcoeff + 1280 + n * 16,
|
||||
vp9_inverse_transform_b_4x4(xd, xd->plane[2].eobs[n],
|
||||
BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 16),
|
||||
xd->diff + 1280 + x_idx * 4 + y_idx * 16 * 4,
|
||||
32);
|
||||
}
|
||||
|
@ -215,7 +231,7 @@ void vp9_inverse_transform_sb64y_32x32(MACROBLOCKD *xd) {
|
|||
for (n = 0; n < 4; n++) {
|
||||
const int x_idx = n & 1, y_idx = n >> 1;
|
||||
|
||||
vp9_short_idct32x32(xd->dqcoeff + n * 1024,
|
||||
vp9_short_idct32x32(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 1024),
|
||||
xd->diff + x_idx * 32 + y_idx * 32 * 64, 128);
|
||||
}
|
||||
}
|
||||
|
@ -228,11 +244,11 @@ void vp9_inverse_transform_sb64y_16x16(MACROBLOCKD *xd) {
|
|||
const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 16 + x_idx) * 4);
|
||||
|
||||
if (tx_type == DCT_DCT) {
|
||||
vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256,
|
||||
vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256),
|
||||
xd->diff + x_idx * 16 + y_idx * 64 * 16,
|
||||
128);
|
||||
} else {
|
||||
vp9_short_iht16x16(xd->dqcoeff + n * 256,
|
||||
vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256),
|
||||
xd->diff + x_idx * 16 + y_idx * 64 * 16, 64, tx_type);
|
||||
}
|
||||
}
|
||||
|
@ -246,10 +262,10 @@ void vp9_inverse_transform_sb64y_8x8(MACROBLOCKD *xd) {
|
|||
const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2);
|
||||
|
||||
if (tx_type == DCT_DCT) {
|
||||
vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64,
|
||||
vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64),
|
||||
xd->diff + x_idx * 8 + y_idx * 64 * 8, 128);
|
||||
} else {
|
||||
vp9_short_iht8x8(xd->dqcoeff + n * 64,
|
||||
vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64),
|
||||
xd->diff + x_idx * 8 + y_idx * 64 * 8, 64, tx_type);
|
||||
}
|
||||
}
|
||||
|
@ -263,19 +279,20 @@ void vp9_inverse_transform_sb64y_4x4(MACROBLOCKD *xd) {
|
|||
const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx);
|
||||
|
||||
if (tx_type == DCT_DCT) {
|
||||
vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16,
|
||||
vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[n],
|
||||
BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16),
|
||||
xd->diff + x_idx * 4 + y_idx * 4 * 64, 128);
|
||||
} else {
|
||||
vp9_short_iht4x4(xd->dqcoeff + n * 16,
|
||||
vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16),
|
||||
xd->diff + x_idx * 4 + y_idx * 4 * 64, 64, tx_type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_inverse_transform_sb64uv_32x32(MACROBLOCKD *xd) {
|
||||
vp9_short_idct32x32(xd->dqcoeff + 4096,
|
||||
vp9_short_idct32x32(xd->plane[1].dqcoeff,
|
||||
xd->diff + 4096, 64);
|
||||
vp9_short_idct32x32(xd->dqcoeff + 4096 + 1024,
|
||||
vp9_short_idct32x32(xd->plane[2].dqcoeff,
|
||||
xd->diff + 4096 + 1024, 64);
|
||||
}
|
||||
|
||||
|
@ -285,9 +302,9 @@ void vp9_inverse_transform_sb64uv_16x16(MACROBLOCKD *xd) {
|
|||
for (n = 0; n < 4; n++) {
|
||||
const int x_idx = n & 1, y_idx = n >> 1, off = x_idx * 16 + y_idx * 32 * 16;
|
||||
|
||||
vp9_inverse_transform_b_16x16(xd->dqcoeff + 4096 + n * 256,
|
||||
vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 256),
|
||||
xd->diff + 4096 + off, 64);
|
||||
vp9_inverse_transform_b_16x16(xd->dqcoeff + 4096 + 1024 + n * 256,
|
||||
vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 256),
|
||||
xd->diff + 4096 + 1024 + off, 64);
|
||||
}
|
||||
}
|
||||
|
@ -298,9 +315,9 @@ void vp9_inverse_transform_sb64uv_8x8(MACROBLOCKD *xd) {
|
|||
for (n = 0; n < 16; n++) {
|
||||
const int x_idx = n & 3, y_idx = n >> 2, off = x_idx * 8 + y_idx * 32 * 8;
|
||||
|
||||
vp9_inverse_transform_b_8x8(xd->dqcoeff + 4096 + n * 64,
|
||||
vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 64),
|
||||
xd->diff + 4096 + off, 64);
|
||||
vp9_inverse_transform_b_8x8(xd->dqcoeff + 4096 + 1024 + n * 64,
|
||||
vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 64),
|
||||
xd->diff + 4096 + 1024 + off, 64);
|
||||
}
|
||||
}
|
||||
|
@ -311,11 +328,11 @@ void vp9_inverse_transform_sb64uv_4x4(MACROBLOCKD *xd) {
|
|||
for (n = 0; n < 64; n++) {
|
||||
const int x_idx = n & 7, y_idx = n >> 3, off = x_idx * 4 + y_idx * 32 * 4;
|
||||
|
||||
vp9_inverse_transform_b_4x4(xd, xd->eobs[256 + n],
|
||||
xd->dqcoeff + 4096 + n * 16,
|
||||
vp9_inverse_transform_b_4x4(xd, xd->plane[1].eobs[n],
|
||||
BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 16),
|
||||
xd->diff + 4096 + off, 64);
|
||||
vp9_inverse_transform_b_4x4(xd, xd->eobs[256 + 64 + n],
|
||||
xd->dqcoeff + 4096 + 1024 + n * 16,
|
||||
vp9_inverse_transform_b_4x4(xd, xd->plane[2].eobs[n],
|
||||
BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 16),
|
||||
xd->diff + 4096 + 1024 + off, 64);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -99,11 +99,6 @@ void vp9_setup_block_dptrs(MACROBLOCKD *mb) {
|
|||
blockd[to].predictor = &mb->predictor[from];
|
||||
}
|
||||
}
|
||||
|
||||
for (r = 0; r < 24; r++) {
|
||||
blockd[r].qcoeff = &mb->qcoeff[r * 16];
|
||||
blockd[r].dqcoeff = &mb->dqcoeff[r * 16];
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_build_block_doffsets(MACROBLOCKD *mb) {
|
||||
|
|
|
@ -29,9 +29,6 @@ forward_decls vp9_common_forward_decls
|
|||
prototype void vp9_dequant_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd"
|
||||
specialize vp9_dequant_idct_add_y_block_8x8
|
||||
|
||||
prototype void vp9_dequant_idct_add_uv_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, struct macroblockd *xd"
|
||||
specialize vp9_dequant_idct_add_uv_block_8x8
|
||||
|
||||
prototype void vp9_dequant_idct_add_16x16 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob"
|
||||
specialize vp9_dequant_idct_add_16x16
|
||||
|
||||
|
@ -44,15 +41,12 @@ specialize vp9_dequant_idct_add
|
|||
prototype void vp9_dequant_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd"
|
||||
specialize vp9_dequant_idct_add_y_block
|
||||
|
||||
prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, struct macroblockd *xd"
|
||||
prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs"
|
||||
specialize vp9_dequant_idct_add_uv_block
|
||||
|
||||
prototype void vp9_dequant_idct_add_32x32 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int pitch, int stride, int eob"
|
||||
specialize vp9_dequant_idct_add_32x32
|
||||
|
||||
prototype void vp9_dequant_idct_add_uv_block_16x16 "int16_t *q, const int16_t *dq, uint8_t *dstu, uint8_t *dstv, int stride, struct macroblockd *xd"
|
||||
specialize vp9_dequant_idct_add_uv_block_16x16
|
||||
|
||||
#
|
||||
# RECON
|
||||
#
|
||||
|
@ -606,8 +600,7 @@ prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch"
|
|||
specialize vp9_subtract_b mmx sse2
|
||||
|
||||
prototype int vp9_mbuverror "struct macroblock *mb"
|
||||
specialize vp9_mbuverror mmx sse2
|
||||
vp9_mbuverror_sse2=vp9_mbuverror_xmm
|
||||
specialize vp9_mbuverror
|
||||
|
||||
prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch"
|
||||
specialize vp9_subtract_b mmx sse2
|
||||
|
|
|
@ -245,19 +245,23 @@ static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd,
|
|||
}
|
||||
#endif
|
||||
if (tx_type != DCT_DCT) {
|
||||
vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff,
|
||||
vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->plane[0].qcoeff,
|
||||
xd->block[0].dequant, xd->predictor,
|
||||
xd->dst.y_buffer, 16, xd->dst.y_stride,
|
||||
xd->eobs[0]);
|
||||
xd->plane[0].eobs[0]);
|
||||
} else {
|
||||
vp9_dequant_idct_add_16x16(xd->qcoeff, xd->block[0].dequant,
|
||||
vp9_dequant_idct_add_16x16(xd->plane[0].qcoeff, xd->block[0].dequant,
|
||||
xd->predictor, xd->dst.y_buffer,
|
||||
16, xd->dst.y_stride, xd->eobs[0]);
|
||||
16, xd->dst.y_stride, xd->plane[0].eobs[0]);
|
||||
}
|
||||
vp9_dequant_idct_add_uv_block_8x8(
|
||||
xd->qcoeff + 16 * 16, xd->block[16].dequant,
|
||||
xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
|
||||
xd->dst.uv_stride, xd);
|
||||
|
||||
vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant,
|
||||
xd->predictor + 16 * 16, xd->dst.u_buffer, 8,
|
||||
xd->dst.uv_stride, xd->plane[1].eobs[0]);
|
||||
|
||||
vp9_dequant_idct_add_8x8(xd->plane[2].qcoeff, xd->block[16].dequant,
|
||||
xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, 8,
|
||||
xd->dst.uv_stride, xd->plane[2].eobs[0]);
|
||||
}
|
||||
|
||||
static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
|
||||
|
@ -281,7 +285,7 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
|
|||
for (i = 0; i < 4; i++) {
|
||||
int ib = vp9_i8x8_block[i];
|
||||
int idx = (ib & 0x02) ? (ib + 2) : ib;
|
||||
int16_t *q = xd->block[idx].qcoeff;
|
||||
int16_t *q = BLOCK_OFFSET(xd->plane[0].qcoeff, idx, 16);
|
||||
int16_t *dq = xd->block[0].dequant;
|
||||
uint8_t *pre = xd->block[ib].predictor;
|
||||
uint8_t *dst = *(xd->block[ib].base_dst) + xd->block[ib].dst;
|
||||
|
@ -294,14 +298,14 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
|
|||
tx_type = get_tx_type_8x8(xd, ib);
|
||||
if (tx_type != DCT_DCT) {
|
||||
vp9_ht_dequant_idct_add_8x8_c(tx_type, q, dq, pre, dst, 16, stride,
|
||||
xd->eobs[idx]);
|
||||
xd->plane[0].eobs[idx]);
|
||||
} else {
|
||||
vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride,
|
||||
xd->eobs[idx]);
|
||||
xd->plane[0].eobs[idx]);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
vp9_dequant_idct_add_y_block_8x8(xd->qcoeff,
|
||||
vp9_dequant_idct_add_y_block_8x8(xd->plane[0].qcoeff,
|
||||
xd->block[0].dequant,
|
||||
xd->predictor,
|
||||
xd->dst.y_buffer,
|
||||
|
@ -319,23 +323,33 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
|
|||
|
||||
b = &xd->block[16 + i];
|
||||
vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
|
||||
xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
|
||||
*(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[16 + i]);
|
||||
xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16),
|
||||
b->dequant, b->predictor,
|
||||
*(b->base_dst) + b->dst, 8, b->dst_stride,
|
||||
xd->plane[1].eobs[i]);
|
||||
|
||||
b = &xd->block[20 + i];
|
||||
vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
|
||||
xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
|
||||
*(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[20 + i]);
|
||||
xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16),
|
||||
b->dequant, b->predictor,
|
||||
*(b->base_dst) + b->dst, 8, b->dst_stride,
|
||||
xd->plane[2].eobs[i]);
|
||||
}
|
||||
} else if (xd->mode_info_context->mbmi.mode == SPLITMV) {
|
||||
xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant,
|
||||
xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
|
||||
xd->dst.uv_stride, xd);
|
||||
xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
|
||||
xd->predictor + 16 * 16, xd->dst.u_buffer,
|
||||
xd->dst.uv_stride, xd->plane[1].eobs);
|
||||
xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
|
||||
xd->predictor + 16 * 16 + 64, xd->dst.v_buffer,
|
||||
xd->dst.uv_stride, xd->plane[2].eobs);
|
||||
} else {
|
||||
vp9_dequant_idct_add_uv_block_8x8
|
||||
(xd->qcoeff + 16 * 16, xd->block[16].dequant,
|
||||
xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
|
||||
xd->dst.uv_stride, xd);
|
||||
vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant,
|
||||
xd->predictor + 16 * 16, xd->dst.u_buffer, 8,
|
||||
xd->dst.uv_stride, xd->plane[1].eobs[0]);
|
||||
|
||||
vp9_dequant_idct_add_8x8(xd->plane[2].qcoeff, xd->block[16].dequant,
|
||||
xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, 8,
|
||||
xd->dst.uv_stride, xd->plane[2].eobs[0]);
|
||||
}
|
||||
#if 0 // def DEC_DEBUG
|
||||
if (dec_debug) {
|
||||
|
@ -378,24 +392,31 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
|
|||
b = &xd->block[ib + iblock[j]];
|
||||
tx_type = get_tx_type_4x4(xd, ib + iblock[j]);
|
||||
if (tx_type != DCT_DCT) {
|
||||
vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
|
||||
vp9_ht_dequant_idct_add_c(tx_type,
|
||||
BLOCK_OFFSET(xd->plane[0].qcoeff, ib + iblock[j], 16),
|
||||
b->dequant, b->predictor,
|
||||
*(b->base_dst) + b->dst, 16,
|
||||
b->dst_stride, xd->eobs[ib + iblock[j]]);
|
||||
b->dst_stride,
|
||||
xd->plane[0].eobs[ib + iblock[j]]);
|
||||
} else {
|
||||
xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
|
||||
xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, ib + iblock[j], 16),
|
||||
b->dequant, b->predictor,
|
||||
*(b->base_dst) + b->dst, 16, b->dst_stride,
|
||||
xd->eobs[ib + iblock[j]]);
|
||||
xd->plane[0].eobs[ib + iblock[j]]);
|
||||
}
|
||||
}
|
||||
b = &xd->block[16 + i];
|
||||
vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
|
||||
xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
|
||||
*(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[16 + i]);
|
||||
xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16),
|
||||
b->dequant, b->predictor,
|
||||
*(b->base_dst) + b->dst, 8, b->dst_stride,
|
||||
xd->plane[1].eobs[i]);
|
||||
b = &xd->block[20 + i];
|
||||
vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
|
||||
xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
|
||||
*(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[20 + i]);
|
||||
xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16),
|
||||
b->dequant, b->predictor,
|
||||
*(b->base_dst) + b->dst, 8, b->dst_stride,
|
||||
xd->plane[2].eobs[i]);
|
||||
}
|
||||
} else if (mode == B_PRED) {
|
||||
for (i = 0; i < 16; i++) {
|
||||
|
@ -410,13 +431,16 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
|
|||
vp9_intra4x4_predict(xd, b, b_mode, b->predictor);
|
||||
tx_type = get_tx_type_4x4(xd, i);
|
||||
if (tx_type != DCT_DCT) {
|
||||
vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
|
||||
vp9_ht_dequant_idct_add_c(tx_type,
|
||||
BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
|
||||
b->dequant, b->predictor,
|
||||
*(b->base_dst) + b->dst, 16, b->dst_stride,
|
||||
xd->eobs[i]);
|
||||
xd->plane[0].eobs[i]);
|
||||
} else {
|
||||
xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
|
||||
*(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[i]);
|
||||
xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
|
||||
b->dequant, b->predictor,
|
||||
*(b->base_dst) + b->dst, 16, b->dst_stride,
|
||||
xd->plane[0].eobs[i]);
|
||||
}
|
||||
}
|
||||
#if CONFIG_NEWBINTRAMODES
|
||||
|
@ -424,27 +448,25 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
|
|||
vp9_decode_mb_tokens_4x4_uv(pbi, xd, bc);
|
||||
#endif
|
||||
vp9_build_intra_predictors_mbuv(xd);
|
||||
xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
|
||||
xd->block[16].dequant,
|
||||
xd->predictor + 16 * 16,
|
||||
xd->dst.u_buffer,
|
||||
xd->dst.v_buffer,
|
||||
xd->dst.uv_stride,
|
||||
xd);
|
||||
xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
|
||||
xd->predictor + 16 * 16, xd->dst.u_buffer,
|
||||
xd->dst.uv_stride, xd->plane[1].eobs);
|
||||
xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
|
||||
xd->predictor + 16 * 16 + 64, xd->dst.v_buffer,
|
||||
xd->dst.uv_stride, xd->plane[2].eobs);
|
||||
} else if (mode == SPLITMV || get_tx_type_4x4(xd, 0) == DCT_DCT) {
|
||||
xd->itxm_add_y_block(xd->qcoeff,
|
||||
xd->itxm_add_y_block(xd->plane[0].qcoeff,
|
||||
xd->block[0].dequant,
|
||||
xd->predictor,
|
||||
xd->dst.y_buffer,
|
||||
xd->dst.y_stride,
|
||||
xd);
|
||||
xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
|
||||
xd->block[16].dequant,
|
||||
xd->predictor + 16 * 16,
|
||||
xd->dst.u_buffer,
|
||||
xd->dst.v_buffer,
|
||||
xd->dst.uv_stride,
|
||||
xd);
|
||||
xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
|
||||
xd->predictor + 16 * 16, xd->dst.u_buffer,
|
||||
xd->dst.uv_stride, xd->plane[1].eobs);
|
||||
xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
|
||||
xd->predictor + 16 * 16 + 64, xd->dst.v_buffer,
|
||||
xd->dst.uv_stride, xd->plane[2].eobs);
|
||||
} else {
|
||||
#if 0 // def DEC_DEBUG
|
||||
if (dec_debug) {
|
||||
|
@ -467,22 +489,24 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
|
|||
BLOCKD *b = &xd->block[i];
|
||||
tx_type = get_tx_type_4x4(xd, i);
|
||||
if (tx_type != DCT_DCT) {
|
||||
vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
|
||||
vp9_ht_dequant_idct_add_c(tx_type,
|
||||
BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
|
||||
b->dequant, b->predictor,
|
||||
*(b->base_dst) + b->dst, 16,
|
||||
b->dst_stride, xd->eobs[i]);
|
||||
b->dst_stride, xd->plane[0].eobs[i]);
|
||||
} else {
|
||||
xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
|
||||
*(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[i]);
|
||||
xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
|
||||
b->dequant, b->predictor,
|
||||
*(b->base_dst) + b->dst, 16, b->dst_stride,
|
||||
xd->plane[0].eobs[i]);
|
||||
}
|
||||
}
|
||||
xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
|
||||
xd->block[16].dequant,
|
||||
xd->predictor + 16 * 16,
|
||||
xd->dst.u_buffer,
|
||||
xd->dst.v_buffer,
|
||||
xd->dst.uv_stride,
|
||||
xd);
|
||||
xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
|
||||
xd->predictor + 16 * 16, xd->dst.u_buffer,
|
||||
xd->dst.uv_stride, xd->plane[1].eobs);
|
||||
xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
|
||||
xd->predictor + 16 * 16 + 64, xd->dst.v_buffer,
|
||||
xd->dst.uv_stride, xd->plane[2].eobs);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -490,11 +514,6 @@ static void decode_sb_16x16(MACROBLOCKD *mb, int y_size) {
|
|||
const int y_count = y_size * y_size;
|
||||
const int uv_size = y_size / 2;
|
||||
const int uv_count = uv_size * uv_size;
|
||||
|
||||
const int u_qcoeff_offset = (16 * 16) * y_count;
|
||||
const int v_qcoeff_offset = u_qcoeff_offset + (16 * 16) * uv_count;
|
||||
const int u_eob_offset = 16 * y_count;
|
||||
const int v_eob_offset = u_eob_offset + 16 * uv_count;
|
||||
int n;
|
||||
|
||||
for (n = 0; n < y_count; n++) {
|
||||
|
@ -504,20 +523,20 @@ static void decode_sb_16x16(MACROBLOCKD *mb, int y_size) {
|
|||
const TX_TYPE tx_type = get_tx_type_16x16(mb,
|
||||
(y_idx * (4 * y_size) + x_idx) * 4);
|
||||
if (tx_type == DCT_DCT) {
|
||||
vp9_dequant_idct_add_16x16(mb->qcoeff + n * 16 * 16,
|
||||
vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256),
|
||||
mb->block[0].dequant ,
|
||||
mb->dst.y_buffer + y_offset,
|
||||
mb->dst.y_buffer + y_offset,
|
||||
mb->dst.y_stride, mb->dst.y_stride,
|
||||
mb->eobs[n * 16]);
|
||||
mb->plane[0].eobs[n * 16]);
|
||||
} else {
|
||||
vp9_ht_dequant_idct_add_16x16_c(tx_type,
|
||||
mb->qcoeff + n * 16 * 16,
|
||||
BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256),
|
||||
mb->block[0].dequant,
|
||||
mb->dst.y_buffer + y_offset,
|
||||
mb->dst.y_buffer + y_offset,
|
||||
mb->dst.y_stride, mb->dst.y_stride,
|
||||
mb->eobs[n * 16]);
|
||||
mb->plane[0].eobs[n * 16]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -525,54 +544,49 @@ static void decode_sb_16x16(MACROBLOCKD *mb, int y_size) {
|
|||
const int x_idx = n % uv_size;
|
||||
const int y_idx = n / uv_size;
|
||||
const int uv_offset = (y_idx * 16) * mb->dst.uv_stride + (x_idx * 16);
|
||||
vp9_dequant_idct_add_16x16(mb->qcoeff + u_qcoeff_offset + n * 16 * 16,
|
||||
vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[1].qcoeff, n, 256),
|
||||
mb->block[16].dequant,
|
||||
mb->dst.u_buffer + uv_offset,
|
||||
mb->dst.u_buffer + uv_offset,
|
||||
mb->dst.uv_stride, mb->dst.uv_stride,
|
||||
mb->eobs[u_eob_offset + n * 16]);
|
||||
vp9_dequant_idct_add_16x16(mb->qcoeff + v_qcoeff_offset + n * 16 * 16,
|
||||
mb->plane[1].eobs[n * 16]);
|
||||
vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[2].qcoeff, n, 256),
|
||||
mb->block[20].dequant,
|
||||
mb->dst.v_buffer + uv_offset,
|
||||
mb->dst.v_buffer + uv_offset,
|
||||
mb->dst.uv_stride, mb->dst.uv_stride,
|
||||
mb->eobs[v_eob_offset + n * 16]);
|
||||
mb->plane[2].eobs[n * 16]);
|
||||
}
|
||||
}
|
||||
|
||||
static void decode_sb_8x8(MACROBLOCKD *mb, int y_size) {
|
||||
static INLINE void decode_sb_8x8(MACROBLOCKD *xd, int y_size) {
|
||||
const int y_count = y_size * y_size;
|
||||
const int uv_size = y_size / 2;
|
||||
const int uv_count = uv_size * uv_size;
|
||||
|
||||
const int u_qcoeff_offset = (8 * 8) * y_count;
|
||||
const int v_qcoeff_offset = u_qcoeff_offset + (8 * 8) * uv_count;
|
||||
const int u_eob_offset = 4 * y_count;
|
||||
const int v_eob_offset = u_eob_offset + 4 * uv_count;
|
||||
int n;
|
||||
|
||||
// luma
|
||||
for (n = 0; n < y_count; n++) {
|
||||
const int x_idx = n % y_size;
|
||||
const int y_idx = n / y_size;
|
||||
const int y_offset = (y_idx * 8) * mb->dst.y_stride + (x_idx * 8);
|
||||
const TX_TYPE tx_type = get_tx_type_8x8(mb,
|
||||
const int y_offset = (y_idx * 8) * xd->dst.y_stride + (x_idx * 8);
|
||||
const TX_TYPE tx_type = get_tx_type_8x8(xd,
|
||||
(y_idx * (2 * y_size) + x_idx) * 2);
|
||||
if (tx_type == DCT_DCT) {
|
||||
vp9_dequant_idct_add_8x8_c(mb->qcoeff + n * 8 * 8,
|
||||
mb->block[0].dequant,
|
||||
mb->dst.y_buffer + y_offset,
|
||||
mb->dst.y_buffer + y_offset,
|
||||
mb->dst.y_stride, mb->dst.y_stride,
|
||||
mb->eobs[n * 4]);
|
||||
vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64),
|
||||
xd->block[0].dequant,
|
||||
xd->dst.y_buffer + y_offset,
|
||||
xd->dst.y_buffer + y_offset,
|
||||
xd->dst.y_stride, xd->dst.y_stride,
|
||||
xd->plane[0].eobs[n * 4]);
|
||||
} else {
|
||||
vp9_ht_dequant_idct_add_8x8_c(tx_type,
|
||||
mb->qcoeff + n * 8 * 8,
|
||||
mb->block[0].dequant,
|
||||
mb->dst.y_buffer + y_offset,
|
||||
mb->dst.y_buffer + y_offset,
|
||||
mb->dst.y_stride, mb->dst.y_stride,
|
||||
mb->eobs[n * 4]);
|
||||
BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64),
|
||||
xd->block[0].dequant,
|
||||
xd->dst.y_buffer + y_offset,
|
||||
xd->dst.y_buffer + y_offset,
|
||||
xd->dst.y_stride, xd->dst.y_stride,
|
||||
xd->plane[0].eobs[n * 4]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -580,73 +594,67 @@ static void decode_sb_8x8(MACROBLOCKD *mb, int y_size) {
|
|||
for (n = 0; n < uv_count; n++) {
|
||||
const int x_idx = n % uv_size;
|
||||
const int y_idx = n / uv_size;
|
||||
const int uv_offset = (y_idx * 8) * mb->dst.uv_stride + (x_idx * 8);
|
||||
vp9_dequant_idct_add_8x8_c(mb->qcoeff + u_qcoeff_offset + n * 8 * 8,
|
||||
mb->block[16].dequant,
|
||||
mb->dst.u_buffer + uv_offset,
|
||||
mb->dst.u_buffer + uv_offset,
|
||||
mb->dst.uv_stride, mb->dst.uv_stride,
|
||||
mb->eobs[u_eob_offset + n * 4]);
|
||||
vp9_dequant_idct_add_8x8_c(mb->qcoeff + v_qcoeff_offset + n * 8 * 8,
|
||||
mb->block[20].dequant,
|
||||
mb->dst.v_buffer + uv_offset,
|
||||
mb->dst.v_buffer + uv_offset,
|
||||
mb->dst.uv_stride, mb->dst.uv_stride,
|
||||
mb->eobs[v_eob_offset + n * 4]);
|
||||
const int uv_offset = (y_idx * 8) * xd->dst.uv_stride + (x_idx * 8);
|
||||
vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 64),
|
||||
xd->block[16].dequant,
|
||||
xd->dst.u_buffer + uv_offset,
|
||||
xd->dst.u_buffer + uv_offset,
|
||||
xd->dst.uv_stride, xd->dst.uv_stride,
|
||||
xd->plane[1].eobs[n * 4]);
|
||||
vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[2].qcoeff, n, 64),
|
||||
xd->block[20].dequant,
|
||||
xd->dst.v_buffer + uv_offset,
|
||||
xd->dst.v_buffer + uv_offset,
|
||||
xd->dst.uv_stride, xd->dst.uv_stride,
|
||||
xd->plane[2].eobs[n * 4]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void decode_sb_4x4(MACROBLOCKD *mb, int y_size) {
|
||||
static void decode_sb_4x4(MACROBLOCKD *xd, int y_size) {
|
||||
const int y_count = y_size * y_size;
|
||||
const int uv_size = y_size / 2;
|
||||
const int uv_count = uv_size * uv_size;
|
||||
|
||||
const int u_qcoeff_offset = (4 * 4) * y_count;
|
||||
const int v_qcoeff_offset = u_qcoeff_offset + (4 * 4) * uv_count;
|
||||
const int u_eob_offset = y_count;
|
||||
const int v_eob_offset = u_eob_offset + uv_count;
|
||||
int n;
|
||||
|
||||
for (n = 0; n < y_count; n++) {
|
||||
const int x_idx = n % y_size;
|
||||
const int y_idx = n / y_size;
|
||||
const int y_offset = (y_idx * 4) * mb->dst.y_stride + (x_idx * 4);
|
||||
const TX_TYPE tx_type = get_tx_type_4x4(mb, y_idx * y_size + x_idx);
|
||||
const int y_offset = (y_idx * 4) * xd->dst.y_stride + (x_idx * 4);
|
||||
const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * y_size + x_idx);
|
||||
if (tx_type == DCT_DCT) {
|
||||
mb->itxm_add(mb->qcoeff + n * 4 * 4,
|
||||
mb->block[0].dequant,
|
||||
mb->dst.y_buffer + y_offset,
|
||||
mb->dst.y_buffer + y_offset,
|
||||
mb->dst.y_stride, mb->dst.y_stride,
|
||||
mb->eobs[n]);
|
||||
xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 16),
|
||||
xd->block[0].dequant,
|
||||
xd->dst.y_buffer + y_offset,
|
||||
xd->dst.y_buffer + y_offset,
|
||||
xd->dst.y_stride, xd->dst.y_stride,
|
||||
xd->plane[0].eobs[n]);
|
||||
} else {
|
||||
vp9_ht_dequant_idct_add_c(tx_type,
|
||||
mb->qcoeff + n * 4 * 4,
|
||||
mb->block[0].dequant,
|
||||
mb->dst.y_buffer + y_offset,
|
||||
mb->dst.y_buffer + y_offset,
|
||||
mb->dst.y_stride, mb->dst.y_stride,
|
||||
mb->eobs[n]);
|
||||
BLOCK_OFFSET(xd->plane[0].qcoeff, n, 16),
|
||||
xd->block[0].dequant,
|
||||
xd->dst.y_buffer + y_offset,
|
||||
xd->dst.y_buffer + y_offset,
|
||||
xd->dst.y_stride,
|
||||
xd->dst.y_stride,
|
||||
xd->plane[0].eobs[n]);
|
||||
}
|
||||
}
|
||||
|
||||
for (n = 0; n < uv_count; n++) {
|
||||
const int x_idx = n % uv_size;
|
||||
const int y_idx = n / uv_size;
|
||||
const int uv_offset = (y_idx * 4) * mb->dst.uv_stride + (x_idx * 4);
|
||||
mb->itxm_add(mb->qcoeff + u_qcoeff_offset + n * 4 * 4,
|
||||
mb->block[16].dequant,
|
||||
mb->dst.u_buffer + uv_offset,
|
||||
mb->dst.u_buffer + uv_offset,
|
||||
mb->dst.uv_stride, mb->dst.uv_stride,
|
||||
mb->eobs[u_eob_offset + n]);
|
||||
mb->itxm_add(mb->qcoeff + v_qcoeff_offset + n * 4 * 4,
|
||||
mb->block[20].dequant,
|
||||
mb->dst.v_buffer + uv_offset,
|
||||
mb->dst.v_buffer + uv_offset,
|
||||
mb->dst.uv_stride, mb->dst.uv_stride,
|
||||
mb->eobs[v_eob_offset + n]);
|
||||
const int uv_offset = (y_idx * 4) * xd->dst.uv_stride + (x_idx * 4);
|
||||
xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 16),
|
||||
xd->block[16].dequant,
|
||||
xd->dst.u_buffer + uv_offset,
|
||||
xd->dst.u_buffer + uv_offset,
|
||||
xd->dst.uv_stride, xd->dst.uv_stride, xd->plane[1].eobs[n]);
|
||||
xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, n, 16),
|
||||
xd->block[20].dequant,
|
||||
xd->dst.v_buffer + uv_offset,
|
||||
xd->dst.v_buffer + uv_offset,
|
||||
xd->dst.uv_stride, xd->dst.uv_stride, xd->plane[2].eobs[n]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -698,18 +706,18 @@ static void decode_sb64(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col,
|
|||
for (n = 0; n < 4; n++) {
|
||||
const int x_idx = n & 1, y_idx = n >> 1;
|
||||
const int y_offset = x_idx * 32 + y_idx * xd->dst.y_stride * 32;
|
||||
vp9_dequant_idct_add_32x32(xd->qcoeff + n * 1024,
|
||||
vp9_dequant_idct_add_32x32(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 1024),
|
||||
xd->block[0].dequant,
|
||||
xd->dst.y_buffer + y_offset,
|
||||
xd->dst.y_buffer + y_offset,
|
||||
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 64]);
|
||||
xd->dst.y_stride, xd->dst.y_stride, xd->plane[0].eobs[n * 64]);
|
||||
}
|
||||
vp9_dequant_idct_add_32x32(xd->qcoeff + 4096,
|
||||
vp9_dequant_idct_add_32x32(xd->plane[1].qcoeff,
|
||||
xd->block[16].dequant, xd->dst.u_buffer, xd->dst.u_buffer,
|
||||
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256]);
|
||||
vp9_dequant_idct_add_32x32(xd->qcoeff + 4096 + 1024,
|
||||
xd->dst.uv_stride, xd->dst.uv_stride, xd->plane[1].eobs[0]);
|
||||
vp9_dequant_idct_add_32x32(xd->plane[2].qcoeff,
|
||||
xd->block[20].dequant, xd->dst.v_buffer, xd->dst.v_buffer,
|
||||
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320]);
|
||||
xd->dst.uv_stride, xd->dst.uv_stride, xd->plane[2].eobs[0]);
|
||||
break;
|
||||
case TX_16X16:
|
||||
decode_sb_16x16(xd, 4);
|
||||
|
@ -776,15 +784,18 @@ static void decode_sb32(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col,
|
|||
} else {
|
||||
switch (xd->mode_info_context->mbmi.txfm_size) {
|
||||
case TX_32X32:
|
||||
vp9_dequant_idct_add_32x32(xd->qcoeff, xd->block[0].dequant,
|
||||
vp9_dequant_idct_add_32x32(xd->plane[0].qcoeff, xd->block[0].dequant,
|
||||
xd->dst.y_buffer, xd->dst.y_buffer,
|
||||
xd->dst.y_stride, xd->dst.y_stride,
|
||||
xd->eobs[0]);
|
||||
vp9_dequant_idct_add_uv_block_16x16_c(xd->qcoeff + 1024,
|
||||
xd->block[16].dequant,
|
||||
xd->dst.u_buffer,
|
||||
xd->dst.v_buffer,
|
||||
xd->dst.uv_stride, xd);
|
||||
xd->plane[0].eobs[0]);
|
||||
vp9_dequant_idct_add_16x16(xd->plane[1].qcoeff, xd->block[16].dequant,
|
||||
xd->dst.u_buffer, xd->dst.u_buffer,
|
||||
xd->dst.uv_stride, xd->dst.uv_stride,
|
||||
xd->plane[1].eobs[0]);
|
||||
vp9_dequant_idct_add_16x16(xd->plane[2].qcoeff, xd->block[16].dequant,
|
||||
xd->dst.v_buffer, xd->dst.v_buffer,
|
||||
xd->dst.uv_stride, xd->dst.uv_stride,
|
||||
xd->plane[2].eobs[0]);
|
||||
break;
|
||||
case TX_16X16:
|
||||
decode_sb_16x16(xd, 2);
|
||||
|
@ -1852,7 +1863,9 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
|
|||
vp9_build_block_doffsets(xd);
|
||||
|
||||
// clear out the coeff buffer
|
||||
vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
|
||||
vpx_memset(xd->plane[0].qcoeff, 0, sizeof(xd->plane[0].qcoeff));
|
||||
vpx_memset(xd->plane[1].qcoeff, 0, sizeof(xd->plane[1].qcoeff));
|
||||
vpx_memset(xd->plane[2].qcoeff, 0, sizeof(xd->plane[2].qcoeff));
|
||||
|
||||
// Read the mb_no_coeff_skip flag
|
||||
pc->mb_no_coeff_skip = vp9_read_bit(&header_bc);
|
||||
|
|
|
@ -388,14 +388,3 @@ void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_dequant_idct_add_uv_block_16x16_c(int16_t *q, const int16_t *dq,
|
||||
uint8_t *dstu,
|
||||
uint8_t *dstv,
|
||||
int stride,
|
||||
MACROBLOCKD *xd) {
|
||||
vp9_dequant_idct_add_16x16_c(q, dq, dstu, dstu, stride, stride,
|
||||
xd->eobs[64]);
|
||||
vp9_dequant_idct_add_16x16_c(q + 256, dq, dstv, dstv, stride, stride,
|
||||
xd->eobs[80]);
|
||||
}
|
||||
|
|
|
@ -40,10 +40,9 @@ void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq,
|
|||
|
||||
void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq,
|
||||
unsigned char *pre,
|
||||
unsigned char *dst_u,
|
||||
unsigned char *dst_v,
|
||||
unsigned char *dst,
|
||||
int stride,
|
||||
struct macroblockd *xd);
|
||||
uint16_t *eobs);
|
||||
|
||||
void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq,
|
||||
unsigned char *pred, unsigned char *dest,
|
||||
|
|
|
@ -389,19 +389,32 @@ static INLINE int decode_sb(VP9D_COMP* const pbi,
|
|||
const int seg_eob = get_eob(xd, segment_id, eob_max);
|
||||
int i, eobtotal = 0;
|
||||
|
||||
assert(count == offset * 3 / 2);
|
||||
|
||||
// luma blocks
|
||||
for (i = 0; i < offset; i += inc) {
|
||||
const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, seg_eob,
|
||||
xd->qcoeff + i * 16, tx_size);
|
||||
xd->eobs[i] = c;
|
||||
BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
|
||||
tx_size);
|
||||
xd->plane[0].eobs[i] = c;
|
||||
eobtotal += c;
|
||||
}
|
||||
|
||||
// chroma blocks
|
||||
for (i = offset; i < count; i += inc) {
|
||||
for (i = offset; i < offset * 5 / 4; i += inc) {
|
||||
const int b = i - offset;
|
||||
const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
|
||||
xd->qcoeff + i * 16, tx_size);
|
||||
xd->eobs[i] = c;
|
||||
BLOCK_OFFSET(xd->plane[1].qcoeff, b, 16),
|
||||
tx_size);
|
||||
xd->plane[1].eobs[b] = c;
|
||||
eobtotal += c;
|
||||
}
|
||||
for (i = offset * 5 / 4; i < count; i += inc) {
|
||||
const int b = i - offset * 5 / 4;
|
||||
const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
|
||||
BLOCK_OFFSET(xd->plane[2].qcoeff, b, 16),
|
||||
tx_size);
|
||||
xd->plane[2].eobs[b] = c;
|
||||
eobtotal += c;
|
||||
}
|
||||
|
||||
|
@ -415,20 +428,24 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi,
|
|||
case TX_32X32: {
|
||||
// 32x32 luma block
|
||||
const int segment_id = xd->mode_info_context->mbmi.segment_id;
|
||||
int i, eobtotal = 0, seg_eob;
|
||||
int eobtotal = 0, seg_eob;
|
||||
int c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC,
|
||||
get_eob(xd, segment_id, 1024), xd->qcoeff, TX_32X32);
|
||||
xd->eobs[0] = c;
|
||||
get_eob(xd, segment_id, 1024),
|
||||
xd->plane[0].qcoeff, TX_32X32);
|
||||
xd->plane[0].eobs[0] = c;
|
||||
eobtotal += c;
|
||||
|
||||
// 16x16 chroma blocks
|
||||
seg_eob = get_eob(xd, segment_id, 256);
|
||||
for (i = 64; i < 96; i += 16) {
|
||||
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
|
||||
xd->qcoeff + i * 16, TX_16X16);
|
||||
xd->eobs[i] = c;
|
||||
eobtotal += c;
|
||||
}
|
||||
|
||||
c = decode_coefs(pbi, xd, bc, 64, PLANE_TYPE_UV, seg_eob,
|
||||
xd->plane[1].qcoeff, TX_16X16);
|
||||
xd->plane[1].eobs[0] = c;
|
||||
eobtotal += c;
|
||||
c = decode_coefs(pbi, xd, bc, 80, PLANE_TYPE_UV, seg_eob,
|
||||
xd->plane[2].qcoeff, TX_16X16);
|
||||
xd->plane[2].eobs[0] = c;
|
||||
eobtotal += c;
|
||||
return eobtotal;
|
||||
}
|
||||
case TX_16X16:
|
||||
|
@ -465,22 +482,26 @@ static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi,
|
|||
MACROBLOCKD* const xd,
|
||||
BOOL_DECODER* const bc) {
|
||||
const int segment_id = xd->mode_info_context->mbmi.segment_id;
|
||||
int i, eobtotal = 0, seg_eob;
|
||||
int eobtotal = 0, seg_eob;
|
||||
|
||||
// Luma block
|
||||
int c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC,
|
||||
get_eob(xd, segment_id, 256), xd->qcoeff, TX_16X16);
|
||||
xd->eobs[0] = c;
|
||||
get_eob(xd, segment_id, 256),
|
||||
xd->plane[0].qcoeff, TX_16X16);
|
||||
xd->plane[0].eobs[0] = c;
|
||||
eobtotal += c;
|
||||
|
||||
// 8x8 chroma blocks
|
||||
seg_eob = get_eob(xd, segment_id, 64);
|
||||
for (i = 16; i < 24; i += 4) {
|
||||
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV,
|
||||
seg_eob, xd->block[i].qcoeff, TX_8X8);
|
||||
xd->eobs[i] = c;
|
||||
eobtotal += c;
|
||||
}
|
||||
|
||||
c = decode_coefs(pbi, xd, bc, 16, PLANE_TYPE_UV,
|
||||
seg_eob, xd->plane[1].qcoeff, TX_8X8);
|
||||
xd->plane[1].eobs[0] = c;
|
||||
eobtotal += c;
|
||||
c = decode_coefs(pbi, xd, bc, 20, PLANE_TYPE_UV,
|
||||
seg_eob, xd->plane[2].qcoeff, TX_8X8);
|
||||
xd->plane[2].eobs[0] = c;
|
||||
eobtotal += c;
|
||||
return eobtotal;
|
||||
}
|
||||
|
||||
|
@ -493,9 +514,10 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi,
|
|||
// luma blocks
|
||||
int seg_eob = get_eob(xd, segment_id, 64);
|
||||
for (i = 0; i < 16; i += 4) {
|
||||
const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
|
||||
seg_eob, xd->block[i].qcoeff, TX_8X8);
|
||||
xd->eobs[i] = c;
|
||||
const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, seg_eob,
|
||||
BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
|
||||
TX_8X8);
|
||||
xd->plane[0].eobs[i] = c;
|
||||
eobtotal += c;
|
||||
}
|
||||
|
||||
|
@ -504,19 +526,31 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi,
|
|||
xd->mode_info_context->mbmi.mode == SPLITMV) {
|
||||
// use 4x4 transform for U, V components in I8X8/splitmv prediction mode
|
||||
seg_eob = get_eob(xd, segment_id, 16);
|
||||
for (i = 16; i < 24; i++) {
|
||||
const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV,
|
||||
seg_eob, xd->block[i].qcoeff, TX_4X4);
|
||||
xd->eobs[i] = c;
|
||||
for (i = 16; i < 20; i++) {
|
||||
const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
|
||||
BLOCK_OFFSET(xd->plane[1].qcoeff, i - 16, 16),
|
||||
TX_4X4);
|
||||
xd->plane[1].eobs[i - 16] = c;
|
||||
eobtotal += c;
|
||||
}
|
||||
for (i = 20; i < 24; i++) {
|
||||
const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
|
||||
BLOCK_OFFSET(xd->plane[2].qcoeff, i - 20, 16),
|
||||
TX_4X4);
|
||||
xd->plane[2].eobs[i - 20] = c;
|
||||
eobtotal += c;
|
||||
}
|
||||
} else {
|
||||
for (i = 16; i < 24; i += 4) {
|
||||
const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV,
|
||||
seg_eob, xd->block[i].qcoeff, TX_8X8);
|
||||
xd->eobs[i] = c;
|
||||
eobtotal += c;
|
||||
}
|
||||
int c;
|
||||
|
||||
c = decode_coefs(pbi, xd, bc, 16, PLANE_TYPE_UV, seg_eob,
|
||||
xd->plane[1].qcoeff, TX_8X8);
|
||||
xd->plane[1].eobs[0] = c;
|
||||
eobtotal += c;
|
||||
c = decode_coefs(pbi, xd, bc, 20, PLANE_TYPE_UV, seg_eob,
|
||||
xd->plane[2].qcoeff, TX_8X8);
|
||||
xd->plane[2].eobs[0] = c;
|
||||
eobtotal += c;
|
||||
}
|
||||
|
||||
return eobtotal;
|
||||
|
@ -525,9 +559,10 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi,
|
|||
static int decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd,
|
||||
BOOL_DECODER* const bc,
|
||||
PLANE_TYPE type, int i, int seg_eob) {
|
||||
const struct plane_block_idx pb_idx = plane_block_idx(16, i);
|
||||
const int c = decode_coefs(dx, xd, bc, i, type, seg_eob,
|
||||
xd->block[i].qcoeff, TX_4X4);
|
||||
xd->eobs[i] = c;
|
||||
BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16), TX_4X4);
|
||||
xd->plane[pb_idx.plane].eobs[pb_idx.block] = c;
|
||||
return c;
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,8 @@ void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq,
|
|||
|
||||
for (i = 0; i < 4; i++) {
|
||||
for (j = 0; j < 4; j++) {
|
||||
vp9_dequant_idct_add(q, dq, pre, dst, 16, stride, xd->eobs[i * 4 + j]);
|
||||
vp9_dequant_idct_add(q, dq, pre, dst, 16, stride,
|
||||
xd->plane[0].eobs[i * 4 + j]);
|
||||
q += 16;
|
||||
pre += 4;
|
||||
dst += 4;
|
||||
|
@ -32,35 +33,20 @@ void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq,
|
|||
}
|
||||
|
||||
void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq,
|
||||
uint8_t *pre, uint8_t *dstu,
|
||||
uint8_t *dstv, int stride,
|
||||
MACROBLOCKD *xd) {
|
||||
uint8_t *pre, uint8_t *dst,
|
||||
int stride, uint16_t *eobs) {
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
for (j = 0; j < 2; j++) {
|
||||
vp9_dequant_idct_add(q, dq, pre, dstu, 8, stride,
|
||||
xd->eobs[16 + i * 2 + j]);
|
||||
q += 16;
|
||||
pre += 4;
|
||||
dstu += 4;
|
||||
vp9_dequant_idct_add(q, dq, pre, dst, 8, stride, eobs[i * 2 + j]);
|
||||
q += 16;
|
||||
pre += 4;
|
||||
dst += 4;
|
||||
}
|
||||
|
||||
pre += 32 - 8;
|
||||
dstu += 4 * stride - 8;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
for (j = 0; j < 2; j++) {
|
||||
vp9_dequant_idct_add(q, dq, pre, dstv, 8, stride,
|
||||
xd->eobs[20 + i * 2 + j]);
|
||||
q += 16;
|
||||
pre += 4;
|
||||
dstv += 4;
|
||||
}
|
||||
|
||||
pre += 32 - 8;
|
||||
dstv += 4 * stride - 8;
|
||||
dst += 4 * stride - 8;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -71,28 +57,17 @@ void vp9_dequant_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq,
|
|||
uint8_t *origdest = dst;
|
||||
uint8_t *origpred = pre;
|
||||
|
||||
vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, xd->eobs[0]);
|
||||
vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride,
|
||||
xd->plane[0].eobs[0]);
|
||||
vp9_dequant_idct_add_8x8_c(&q[64], dq, origpred + 8,
|
||||
origdest + 8, 16, stride, xd->eobs[4]);
|
||||
origdest + 8, 16, stride,
|
||||
xd->plane[0].eobs[4]);
|
||||
vp9_dequant_idct_add_8x8_c(&q[128], dq, origpred + 8 * 16,
|
||||
origdest + 8 * stride, 16, stride,
|
||||
xd->eobs[8]);
|
||||
xd->plane[0].eobs[8]);
|
||||
vp9_dequant_idct_add_8x8_c(&q[192], dq, origpred + 8 * 16 + 8,
|
||||
origdest + 8 * stride + 8, 16, stride,
|
||||
xd->eobs[12]);
|
||||
}
|
||||
|
||||
void vp9_dequant_idct_add_uv_block_8x8_c(int16_t *q, const int16_t *dq,
|
||||
uint8_t *pre,
|
||||
uint8_t *dstu,
|
||||
uint8_t *dstv,
|
||||
int stride, MACROBLOCKD *xd) {
|
||||
vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride, xd->eobs[16]);
|
||||
|
||||
q += 64;
|
||||
pre += 64;
|
||||
|
||||
vp9_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride, xd->eobs[20]);
|
||||
xd->plane[0].eobs[12]);
|
||||
}
|
||||
|
||||
void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq,
|
||||
|
@ -104,7 +79,7 @@ void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq,
|
|||
for (i = 0; i < 4; i++) {
|
||||
for (j = 0; j < 4; j++) {
|
||||
vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, 16, stride,
|
||||
xd->eobs[i * 4 + j]);
|
||||
xd->plane[0].eobs[i * 4 + j]);
|
||||
q += 16;
|
||||
pre += 4;
|
||||
dst += 4;
|
||||
|
@ -117,36 +92,22 @@ void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq,
|
|||
|
||||
void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq,
|
||||
uint8_t *pre,
|
||||
uint8_t *dstu,
|
||||
uint8_t *dstv,
|
||||
uint8_t *dst,
|
||||
int stride,
|
||||
MACROBLOCKD *xd) {
|
||||
uint16_t *eobs) {
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
for (j = 0; j < 2; j++) {
|
||||
vp9_dequant_idct_add_lossless_c(q, dq, pre, dstu, 8, stride,
|
||||
xd->eobs[16 + i * 2 + j]);
|
||||
q += 16;
|
||||
pre += 4;
|
||||
dstu += 4;
|
||||
vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, 8, stride,
|
||||
eobs[i * 2 + j]);
|
||||
q += 16;
|
||||
pre += 4;
|
||||
dst += 4;
|
||||
}
|
||||
|
||||
pre += 32 - 8;
|
||||
dstu += 4 * stride - 8;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
for (j = 0; j < 2; j++) {
|
||||
vp9_dequant_idct_add_lossless_c(q, dq, pre, dstv, 8, stride,
|
||||
xd->eobs[20 + i * 2 + j]);
|
||||
q += 16;
|
||||
pre += 4;
|
||||
dstv += 4;
|
||||
}
|
||||
|
||||
pre += 32 - 8;
|
||||
dstv += 4 * stride - 8;
|
||||
pre += 32 - 8;
|
||||
dst += 4 * stride - 8;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -29,9 +29,7 @@ DEFINE(vp9_block_zbin_extra, offsetof(BLOCK, zbin_extra));
|
|||
DEFINE(vp9_block_zrun_zbin_boost, offsetof(BLOCK, zrun_zbin_boost));
|
||||
DEFINE(vp9_block_quant_shift, offsetof(BLOCK, quant_shift));
|
||||
|
||||
DEFINE(vp9_blockd_qcoeff, offsetof(BLOCKD, qcoeff));
|
||||
DEFINE(vp9_blockd_dequant, offsetof(BLOCKD, dequant));
|
||||
DEFINE(vp9_blockd_dqcoeff, offsetof(BLOCKD, dqcoeff));
|
||||
|
||||
END
|
||||
|
||||
|
|
|
@ -171,10 +171,13 @@ struct macroblock {
|
|||
void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch);
|
||||
void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch);
|
||||
void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch);
|
||||
void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx);
|
||||
void (*quantize_b_4x4_pair)(MACROBLOCK *x, int b_idx1, int b_idx2);
|
||||
void (*quantize_b_16x16)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type);
|
||||
void (*quantize_b_8x8)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type);
|
||||
void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx, int y_blocks);
|
||||
void (*quantize_b_4x4_pair)(MACROBLOCK *x, int b_idx1, int b_idx2,
|
||||
int y_blocks);
|
||||
void (*quantize_b_16x16)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type,
|
||||
int y_blocks);
|
||||
void (*quantize_b_8x8)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type,
|
||||
int y_blocks);
|
||||
};
|
||||
|
||||
#endif // VP9_ENCODER_VP9_BLOCK_H_
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
#include "vp9/common/vp9_invtrans.h"
|
||||
#include "vp9/encoder/vp9_encodeintra.h"
|
||||
|
||||
static void encode_intra4x4block(MACROBLOCK *x, int ib);
|
||||
|
||||
int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) {
|
||||
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
|
||||
(void) cpi;
|
||||
|
@ -31,18 +33,21 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) {
|
|||
|
||||
for (i = 0; i < 16; i++) {
|
||||
x->e_mbd.block[i].bmi.as_mode.first = B_DC_PRED;
|
||||
vp9_encode_intra4x4block(x, i);
|
||||
encode_intra4x4block(x, i);
|
||||
}
|
||||
}
|
||||
|
||||
return vp9_get_mb_ss(x->src_diff);
|
||||
}
|
||||
|
||||
void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) {
|
||||
static void encode_intra4x4block(MACROBLOCK *x, int ib) {
|
||||
BLOCKD *b = &x->e_mbd.block[ib];
|
||||
BLOCK *be = &x->block[ib];
|
||||
MACROBLOCKD * const xd = &x->e_mbd;
|
||||
TX_TYPE tx_type;
|
||||
|
||||
assert(ib < 16);
|
||||
|
||||
#if CONFIG_NEWBINTRAMODES
|
||||
b->bmi.as_mode.context = vp9_find_bpred_context(&x->e_mbd, b);
|
||||
#endif
|
||||
|
@ -54,12 +59,14 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) {
|
|||
if (tx_type != DCT_DCT) {
|
||||
vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
|
||||
vp9_ht_quantize_b_4x4(x, ib, tx_type);
|
||||
vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type);
|
||||
vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16),
|
||||
b->diff, 16, tx_type);
|
||||
} else {
|
||||
x->fwd_txm4x4(be->src_diff, be->coeff, 32);
|
||||
x->quantize_b_4x4(x, ib);
|
||||
vp9_inverse_transform_b_4x4(&x->e_mbd, x->e_mbd.eobs[ib],
|
||||
b->dqcoeff, b->diff, 32);
|
||||
x->quantize_b_4x4(x, ib, 16);
|
||||
vp9_inverse_transform_b_4x4(&x->e_mbd, xd->plane[0].eobs[ib],
|
||||
BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16),
|
||||
b->diff, 32);
|
||||
}
|
||||
|
||||
vp9_recon_b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
|
@ -69,7 +76,7 @@ void vp9_encode_intra4x4mby(MACROBLOCK *mb) {
|
|||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
vp9_encode_intra4x4block(mb, i);
|
||||
encode_intra4x4block(mb, i);
|
||||
}
|
||||
|
||||
void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) {
|
||||
|
@ -151,41 +158,47 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
|
|||
|
||||
if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
|
||||
int idx = (ib & 0x02) ? (ib + 2) : ib;
|
||||
int16_t * const dqcoeff = BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16);
|
||||
|
||||
assert(idx < 16);
|
||||
tx_type = get_tx_type_8x8(xd, ib);
|
||||
if (tx_type != DCT_DCT) {
|
||||
vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type);
|
||||
x->quantize_b_8x8(x, idx, tx_type);
|
||||
vp9_short_iht8x8(xd->block[idx].dqcoeff, xd->block[ib].diff,
|
||||
x->quantize_b_8x8(x, idx, tx_type, 16);
|
||||
vp9_short_iht8x8(dqcoeff, xd->block[ib].diff,
|
||||
16, tx_type);
|
||||
} else {
|
||||
x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
|
||||
x->quantize_b_8x8(x, idx, DCT_DCT);
|
||||
vp9_short_idct8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
|
||||
x->quantize_b_8x8(x, idx, DCT_DCT, 16);
|
||||
vp9_short_idct8x8(dqcoeff, xd->block[ib].diff, 32);
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < 4; i++) {
|
||||
int idx = ib + iblock[i];
|
||||
int16_t * const dqcoeff = BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16);
|
||||
|
||||
assert(idx < 16);
|
||||
b = &xd->block[ib + iblock[i]];
|
||||
be = &x->block[ib + iblock[i]];
|
||||
tx_type = get_tx_type_4x4(xd, ib + iblock[i]);
|
||||
if (tx_type != DCT_DCT) {
|
||||
vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
|
||||
vp9_ht_quantize_b_4x4(x, ib + iblock[i], tx_type);
|
||||
vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type);
|
||||
vp9_short_iht4x4(dqcoeff, b->diff, 16, tx_type);
|
||||
} else if (!(i & 1) &&
|
||||
get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) {
|
||||
x->fwd_txm8x4(be->src_diff, be->coeff, 32);
|
||||
x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1);
|
||||
vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i]],
|
||||
b->dqcoeff, b->diff, 32);
|
||||
vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i] + 1],
|
||||
(b + 1)->dqcoeff, (b + 1)->diff, 32);
|
||||
x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1, 16);
|
||||
vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[ib + iblock[i]],
|
||||
dqcoeff, b->diff, 32);
|
||||
vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[ib + iblock[i] + 1],
|
||||
dqcoeff + 16, (b + 1)->diff, 32);
|
||||
i++;
|
||||
} else {
|
||||
x->fwd_txm4x4(be->src_diff, be->coeff, 32);
|
||||
x->quantize_b_4x4(x, ib + iblock[i]);
|
||||
vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i]],
|
||||
b->dqcoeff, b->diff, 32);
|
||||
x->quantize_b_4x4(x, ib + iblock[i], 16);
|
||||
vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[ib + iblock[i]],
|
||||
dqcoeff, b->diff, 32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -206,17 +219,22 @@ void vp9_encode_intra8x8mby(MACROBLOCK *x) {
|
|||
}
|
||||
|
||||
static void encode_intra_uv4x4(MACROBLOCK *x, int ib, int mode) {
|
||||
MACROBLOCKD * const xd = &x->e_mbd;
|
||||
BLOCKD *b = &x->e_mbd.block[ib];
|
||||
BLOCK *be = &x->block[ib];
|
||||
int16_t * const dqcoeff = MB_SUBBLOCK_FIELD(xd, dqcoeff, ib);
|
||||
const int plane = ib < 20 ? 1 : 2;
|
||||
const int block = ib < 20 ? ib - 16 : ib - 20;
|
||||
|
||||
assert(ib >= 16 && ib < 24);
|
||||
vp9_intra_uv4x4_predict(&x->e_mbd, b, mode, b->predictor);
|
||||
|
||||
vp9_subtract_b(be, b, 8);
|
||||
|
||||
x->fwd_txm4x4(be->src_diff, be->coeff, 16);
|
||||
x->quantize_b_4x4(x, ib);
|
||||
vp9_inverse_transform_b_4x4(&x->e_mbd, x->e_mbd.eobs[ib],
|
||||
b->dqcoeff, b->diff, 16);
|
||||
x->quantize_b_4x4(x, ib, 16);
|
||||
vp9_inverse_transform_b_4x4(&x->e_mbd, xd->plane[plane].eobs[block],
|
||||
dqcoeff, b->diff, 16);
|
||||
|
||||
vp9_recon_uv_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst,
|
||||
b->dst_stride);
|
||||
|
|
|
@ -17,7 +17,6 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred);
|
|||
void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x);
|
||||
void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x);
|
||||
void vp9_encode_intra4x4mby(MACROBLOCK *mb);
|
||||
void vp9_encode_intra4x4block(MACROBLOCK *x, int ib);
|
||||
void vp9_encode_intra8x8mby(MACROBLOCK *x);
|
||||
void vp9_encode_intra8x8mbuv(MACROBLOCK *x);
|
||||
void vp9_encode_intra8x8(MACROBLOCK *x, int ib);
|
||||
|
|
|
@ -544,15 +544,16 @@ static void optimize_b(VP9_COMMON *const cm,
|
|||
MACROBLOCK *mb, int ib, PLANE_TYPE type,
|
||||
const int16_t *dequant_ptr,
|
||||
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
|
||||
int tx_size) {
|
||||
int tx_size, int y_blocks) {
|
||||
const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME;
|
||||
MACROBLOCKD *const xd = &mb->e_mbd;
|
||||
vp9_token_state tokens[1025][2];
|
||||
unsigned best_index[1025][2];
|
||||
const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, ib);
|
||||
const int16_t *coeff_ptr = mb->coeff + ib * 16;
|
||||
int16_t *qcoeff_ptr = xd->qcoeff + ib * 16;
|
||||
int16_t *dqcoeff_ptr = xd->dqcoeff + ib * 16;
|
||||
int eob = xd->eobs[ib], final_eob, sz = 0;
|
||||
int16_t *qcoeff_ptr;
|
||||
int16_t *dqcoeff_ptr;
|
||||
int eob = xd->plane[pb_idx.plane].eobs[pb_idx.block], final_eob, sz = 0;
|
||||
const int i0 = 0;
|
||||
int rc, x, next, i;
|
||||
int64_t rdmult, rddiv, rd_cost0, rd_cost1;
|
||||
|
@ -582,6 +583,9 @@ static void optimize_b(VP9_COMMON *const cm,
|
|||
nzc0 = nzc1 = nzc;
|
||||
#endif
|
||||
|
||||
assert((!type && !pb_idx.plane) || (type && pb_idx.plane));
|
||||
dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16);
|
||||
qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16);
|
||||
switch (tx_size) {
|
||||
default:
|
||||
case TX_4X4: {
|
||||
|
@ -641,6 +645,7 @@ static void optimize_b(VP9_COMMON *const cm,
|
|||
#endif
|
||||
break;
|
||||
}
|
||||
assert(eob <= default_eob);
|
||||
|
||||
/* Now set up a Viterbi trellis to evaluate alternative roundings. */
|
||||
rdmult = mb->rdmult * err_mult;
|
||||
|
@ -838,7 +843,7 @@ static void optimize_b(VP9_COMMON *const cm,
|
|||
}
|
||||
final_eob++;
|
||||
|
||||
xd->eobs[ib] = final_eob;
|
||||
xd->plane[pb_idx.plane].eobs[pb_idx.block] = final_eob;
|
||||
*a = *l = (final_eob > 0);
|
||||
#if CONFIG_CODE_NONZEROCOUNT
|
||||
assert(final_nzc == final_nzc_exp);
|
||||
|
@ -864,7 +869,7 @@ void vp9_optimize_mby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
for (b = 0; b < 16; b++) {
|
||||
optimize_b(cm, x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant,
|
||||
ta + vp9_block2above[TX_4X4][b],
|
||||
tl + vp9_block2left[TX_4X4][b], TX_4X4);
|
||||
tl + vp9_block2left[TX_4X4][b], TX_4X4, 16);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -886,7 +891,7 @@ void vp9_optimize_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
for (b = 16; b < 24; b++) {
|
||||
optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant,
|
||||
ta + vp9_block2above[TX_4X4][b],
|
||||
tl + vp9_block2left[TX_4X4][b], TX_4X4);
|
||||
tl + vp9_block2left[TX_4X4][b], TX_4X4, 16);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -915,7 +920,7 @@ void vp9_optimize_mby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0;
|
||||
ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0;
|
||||
optimize_b(cm, x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant,
|
||||
&above_ec, &left_ec, TX_8X8);
|
||||
&above_ec, &left_ec, TX_8X8, 16);
|
||||
a[1] = a[0] = above_ec;
|
||||
l[1] = l[0] = left_ec;
|
||||
}
|
||||
|
@ -935,7 +940,7 @@ void vp9_optimize_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0;
|
||||
ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0;
|
||||
optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant,
|
||||
&above_ec, &left_ec, TX_8X8);
|
||||
&above_ec, &left_ec, TX_8X8, 16);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -955,7 +960,7 @@ void vp9_optimize_mby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
ta = (t_above->y1[0] + t_above->y1[1] + t_above->y1[2] + t_above->y1[3]) != 0;
|
||||
tl = (t_left->y1[0] + t_left->y1[1] + t_left->y1[2] + t_left->y1[3]) != 0;
|
||||
optimize_b(cm, x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
|
||||
&ta, &tl, TX_16X16);
|
||||
&ta, &tl, TX_16X16, 16);
|
||||
}
|
||||
|
||||
static void optimize_mb_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
|
||||
|
@ -973,7 +978,7 @@ void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
ta = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0;
|
||||
tl = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0;
|
||||
optimize_b(cm, x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
|
||||
&ta, &tl, TX_32X32);
|
||||
&ta, &tl, TX_32X32, 64);
|
||||
}
|
||||
|
||||
void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
|
||||
|
@ -992,7 +997,7 @@ void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
const int x_idx = n & 1, y_idx = n >> 1;
|
||||
|
||||
optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
|
||||
ta + x_idx, tl + y_idx, TX_16X16);
|
||||
ta + x_idx, tl + y_idx, TX_16X16, 64);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1016,7 +1021,7 @@ void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
const int x_idx = n & 3, y_idx = n >> 2;
|
||||
|
||||
optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
|
||||
ta + x_idx, tl + y_idx, TX_8X8);
|
||||
ta + x_idx, tl + y_idx, TX_8X8, 64);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1032,7 +1037,7 @@ void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
const int x_idx = n & 7, y_idx = n >> 3;
|
||||
|
||||
optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
|
||||
ta + x_idx, tl + y_idx, TX_4X4);
|
||||
ta + x_idx, tl + y_idx, TX_4X4, 64);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1051,7 +1056,7 @@ void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
|
||||
left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
|
||||
optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
|
||||
&above_ec, &left_ec, TX_16X16);
|
||||
&above_ec, &left_ec, TX_16X16, 64);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1071,7 +1076,7 @@ void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
above_ec = (a[0] + a[1]) != 0;
|
||||
left_ec = (l[0] + l[1]) != 0;
|
||||
optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
|
||||
&above_ec, &left_ec, TX_8X8);
|
||||
&above_ec, &left_ec, TX_8X8, 64);
|
||||
a[0] = a[1] = above_ec;
|
||||
l[0] = l[1] = left_ec;
|
||||
}
|
||||
|
@ -1091,7 +1096,7 @@ void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
a = ta + vp9_block2above_sb[TX_4X4][b];
|
||||
l = tl + vp9_block2left_sb[TX_4X4][b];
|
||||
optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
|
||||
a, l, TX_4X4);
|
||||
a, l, TX_4X4, 64);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1115,7 +1120,7 @@ void vp9_optimize_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
const int x_idx = n & 1, y_idx = n >> 1;
|
||||
|
||||
optimize_b(cm, x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
|
||||
ta + x_idx, tl + y_idx, TX_32X32);
|
||||
ta + x_idx, tl + y_idx, TX_32X32, 256);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1143,7 +1148,7 @@ void vp9_optimize_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
const int x_idx = n & 3, y_idx = n >> 2;
|
||||
|
||||
optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
|
||||
ta + x_idx, tl + y_idx, TX_16X16);
|
||||
ta + x_idx, tl + y_idx, TX_16X16, 256);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1179,7 +1184,7 @@ void vp9_optimize_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
const int x_idx = n & 7, y_idx = n >> 3;
|
||||
|
||||
optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
|
||||
ta + x_idx, tl + y_idx, TX_8X8);
|
||||
ta + x_idx, tl + y_idx, TX_8X8, 256);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1199,7 +1204,7 @@ void vp9_optimize_sb64y_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
const int x_idx = n & 15, y_idx = n >> 4;
|
||||
|
||||
optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
|
||||
ta + x_idx, tl + y_idx, TX_4X4);
|
||||
ta + x_idx, tl + y_idx, TX_4X4, 256);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1222,7 +1227,7 @@ void vp9_optimize_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
a_ec = (a[0] + a[1] + a1[0] + a1[1] + a2[0] + a2[1] + a3[0] + a3[1]) != 0;
|
||||
l_ec = (l[0] + l[1] + l1[0] + l1[1] + l2[0] + l2[1] + l3[0] + l3[1]) != 0;
|
||||
optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
|
||||
&a_ec, &l_ec, TX_32X32);
|
||||
&a_ec, &l_ec, TX_32X32, 256);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1244,7 +1249,7 @@ void vp9_optimize_sb64uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
|
||||
left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
|
||||
optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
|
||||
&above_ec, &left_ec, TX_16X16);
|
||||
&above_ec, &left_ec, TX_16X16, 256);
|
||||
a[0] = a[1] = a1[0] = a1[1] = above_ec;
|
||||
l[0] = l[1] = l1[0] = l1[1] = left_ec;
|
||||
}
|
||||
|
@ -1266,7 +1271,7 @@ void vp9_optimize_sb64uv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
above_ec = (a[0] + a[1]) != 0;
|
||||
left_ec = (l[0] + l[1]) != 0;
|
||||
optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
|
||||
&above_ec, &left_ec, TX_8X8);
|
||||
&above_ec, &left_ec, TX_8X8, 256);
|
||||
a[0] = a[1] = above_ec;
|
||||
l[0] = l[1] = left_ec;
|
||||
}
|
||||
|
@ -1286,7 +1291,7 @@ void vp9_optimize_sb64uv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
a = ta + vp9_block2above_sb64[TX_4X4][b];
|
||||
l = tl + vp9_block2left_sb64[TX_4X4][b];
|
||||
optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
|
||||
a, l, TX_4X4);
|
||||
a, l, TX_4X4, 256);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -21,14 +21,9 @@
|
|||
extern int enc_debug;
|
||||
#endif
|
||||
|
||||
static INLINE int plane_idx(MACROBLOCKD *xd, int b_idx) {
|
||||
const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
|
||||
if (b_idx < (16 << (sb_type * 2)))
|
||||
return 0; // Y
|
||||
else if (b_idx < (20 << (sb_type * 2)))
|
||||
return 16; // U
|
||||
assert(b_idx < (24 << (sb_type * 2)));
|
||||
return 20; // V
|
||||
static INLINE int plane_idx(int plane) {
|
||||
return plane == 0 ? 0 :
|
||||
plane == 1 ? 16 : 20;
|
||||
}
|
||||
|
||||
void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
|
||||
|
@ -39,8 +34,9 @@ void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
|
|||
int zbin;
|
||||
int x, y, z, sz;
|
||||
int16_t *coeff_ptr = mb->coeff + b_idx * 16;
|
||||
int16_t *qcoeff_ptr = xd->qcoeff + b_idx * 16;
|
||||
int16_t *dqcoeff_ptr = xd->dqcoeff + b_idx * 16;
|
||||
// ht is luma-only
|
||||
int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[0].qcoeff, b_idx, 16);
|
||||
int16_t *dqcoeff_ptr = BLOCK_OFFSET(xd->plane[0].dqcoeff, b_idx, 16);
|
||||
int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
|
||||
int16_t *zbin_ptr = b->zbin;
|
||||
int16_t *round_ptr = b->round;
|
||||
|
@ -53,7 +49,6 @@ void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
|
|||
int nzc = 0;
|
||||
#endif
|
||||
|
||||
assert(plane_idx(xd, b_idx) == 0);
|
||||
switch (tx_type) {
|
||||
case ADST_DCT:
|
||||
pt_scan = vp9_row_scan_4x4;
|
||||
|
@ -101,23 +96,26 @@ void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
|
|||
}
|
||||
}
|
||||
|
||||
xd->eobs[b_idx] = eob + 1;
|
||||
xd->plane[0].eobs[b_idx] = eob + 1;
|
||||
#if CONFIG_CODE_NONZEROCOUNT
|
||||
xd->nzcs[b_idx] = nzc;
|
||||
#endif
|
||||
}
|
||||
|
||||
void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx) {
|
||||
void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, int y_blocks) {
|
||||
MACROBLOCKD *const xd = &mb->e_mbd;
|
||||
const int c_idx = plane_idx(xd, b_idx);
|
||||
const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
|
||||
const int c_idx = plane_idx(pb_idx.plane);
|
||||
BLOCK *const b = &mb->block[c_idx];
|
||||
BLOCKD *const d = &xd->block[c_idx];
|
||||
int i, rc, eob;
|
||||
int zbin;
|
||||
int x, y, z, sz;
|
||||
int16_t *coeff_ptr = mb->coeff + b_idx * 16;
|
||||
int16_t *qcoeff_ptr = xd->qcoeff + b_idx * 16;
|
||||
int16_t *dqcoeff_ptr = xd->dqcoeff + b_idx * 16;
|
||||
int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff,
|
||||
pb_idx.block, 16);
|
||||
int16_t *dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff,
|
||||
pb_idx.block, 16);
|
||||
int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
|
||||
int16_t *zbin_ptr = b->zbin;
|
||||
int16_t *round_ptr = b->round;
|
||||
|
@ -129,6 +127,9 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx) {
|
|||
int nzc = 0;
|
||||
#endif
|
||||
|
||||
if (c_idx == 0) assert(pb_idx.plane == 0);
|
||||
if (c_idx == 16) assert(pb_idx.plane == 1);
|
||||
if (c_idx == 20) assert(pb_idx.plane == 2);
|
||||
vpx_memset(qcoeff_ptr, 0, 32);
|
||||
vpx_memset(dqcoeff_ptr, 0, 32);
|
||||
|
||||
|
@ -165,7 +166,7 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx) {
|
|||
}
|
||||
}
|
||||
|
||||
xd->eobs[b_idx] = eob + 1;
|
||||
xd->plane[pb_idx.plane].eobs[pb_idx.block] = eob + 1;
|
||||
#if CONFIG_CODE_NONZEROCOUNT
|
||||
xd->nzcs[b_idx] = nzc;
|
||||
#endif
|
||||
|
@ -179,16 +180,20 @@ void vp9_quantize_mby_4x4(MACROBLOCK *x) {
|
|||
if (tx_type != DCT_DCT) {
|
||||
vp9_ht_quantize_b_4x4(x, i, tx_type);
|
||||
} else {
|
||||
x->quantize_b_4x4(x, i);
|
||||
x->quantize_b_4x4(x, i, 16);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_quantize_mbuv_4x4(MACROBLOCK *x) {
|
||||
int i;
|
||||
const MACROBLOCKD * const xd = &x->e_mbd;
|
||||
const BLOCK_SIZE_TYPE real_sb_type = xd->mode_info_context->mbmi.sb_type;
|
||||
xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_MB16X16;
|
||||
|
||||
for (i = 16; i < 24; i++)
|
||||
x->quantize_b_4x4(x, i);
|
||||
x->quantize_b_4x4(x, i, 16);
|
||||
xd->mode_info_context->mbmi.sb_type = real_sb_type;
|
||||
}
|
||||
|
||||
void vp9_quantize_mb_4x4(MACROBLOCK *x) {
|
||||
|
@ -196,11 +201,15 @@ void vp9_quantize_mb_4x4(MACROBLOCK *x) {
|
|||
vp9_quantize_mbuv_4x4(x);
|
||||
}
|
||||
|
||||
void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
|
||||
void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
|
||||
int y_blocks) {
|
||||
MACROBLOCKD *const xd = &mb->e_mbd;
|
||||
int16_t *qcoeff_ptr = xd->qcoeff + 16 * b_idx;
|
||||
int16_t *dqcoeff_ptr = xd->dqcoeff + 16 * b_idx;
|
||||
const int c_idx = plane_idx(xd, b_idx);
|
||||
const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
|
||||
const int c_idx = plane_idx(pb_idx.plane);
|
||||
int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff,
|
||||
pb_idx.block, 16);
|
||||
int16_t *dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff,
|
||||
pb_idx.block, 16);
|
||||
BLOCK *const b = &mb->block[c_idx];
|
||||
BLOCKD *const d = &xd->block[c_idx];
|
||||
const int *pt_scan;
|
||||
|
@ -217,6 +226,9 @@ void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
|
|||
break;
|
||||
}
|
||||
|
||||
if (c_idx == 0) assert(pb_idx.plane == 0);
|
||||
if (c_idx == 16) assert(pb_idx.plane == 1);
|
||||
if (c_idx == 20) assert(pb_idx.plane == 2);
|
||||
vpx_memset(qcoeff_ptr, 0, 64 * sizeof(int16_t));
|
||||
vpx_memset(dqcoeff_ptr, 0, 64 * sizeof(int16_t));
|
||||
|
||||
|
@ -295,12 +307,12 @@ void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
|
|||
}
|
||||
}
|
||||
}
|
||||
xd->eobs[b_idx] = eob + 1;
|
||||
xd->plane[pb_idx.plane].eobs[pb_idx.block] = eob + 1;
|
||||
#if CONFIG_CODE_NONZEROCOUNT
|
||||
xd->nzcs[b_idx] = nzc;
|
||||
#endif
|
||||
} else {
|
||||
xd->eobs[b_idx] = 0;
|
||||
xd->plane[pb_idx.plane].eobs[pb_idx.block] = 0;
|
||||
#if CONFIG_CODE_NONZEROCOUNT
|
||||
xd->nzcs[b_idx] = 0;
|
||||
#endif
|
||||
|
@ -317,12 +329,15 @@ void vp9_quantize_mby_8x8(MACROBLOCK *x) {
|
|||
#endif
|
||||
for (i = 0; i < 16; i += 4) {
|
||||
TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, (i & 8) + ((i & 4) >> 1));
|
||||
x->quantize_b_8x8(x, i, tx_type);
|
||||
x->quantize_b_8x8(x, i, tx_type, 16);
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_quantize_mbuv_8x8(MACROBLOCK *x) {
|
||||
int i;
|
||||
const MACROBLOCKD * const xd = &x->e_mbd;
|
||||
const BLOCK_SIZE_TYPE real_sb_type = xd->mode_info_context->mbmi.sb_type;
|
||||
xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_MB16X16;
|
||||
|
||||
#if CONFIG_CODE_NONZEROCOUNT
|
||||
for (i = 16; i < 24; i ++) {
|
||||
|
@ -330,7 +345,8 @@ void vp9_quantize_mbuv_8x8(MACROBLOCK *x) {
|
|||
}
|
||||
#endif
|
||||
for (i = 16; i < 24; i += 4)
|
||||
x->quantize_b_8x8(x, i, DCT_DCT);
|
||||
x->quantize_b_8x8(x, i, DCT_DCT, 16);
|
||||
xd->mode_info_context->mbmi.sb_type = real_sb_type;
|
||||
}
|
||||
|
||||
void vp9_quantize_mb_8x8(MACROBLOCK *x) {
|
||||
|
@ -346,7 +362,7 @@ void vp9_quantize_mby_16x16(MACROBLOCK *x) {
|
|||
x->e_mbd.nzcs[i] = 0;
|
||||
}
|
||||
#endif
|
||||
x->quantize_b_16x16(x, 0, tx_type);
|
||||
x->quantize_b_16x16(x, 0, tx_type, 16);
|
||||
}
|
||||
|
||||
void vp9_quantize_mb_16x16(MACROBLOCK *x) {
|
||||
|
@ -415,9 +431,11 @@ static void quantize(int16_t *zbin_boost_orig_ptr,
|
|||
#endif
|
||||
}
|
||||
|
||||
void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
|
||||
void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
|
||||
int y_blocks) {
|
||||
MACROBLOCKD *const xd = &mb->e_mbd;
|
||||
const int c_idx = plane_idx(xd, b_idx);
|
||||
const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
|
||||
const int c_idx = plane_idx(pb_idx.plane);
|
||||
BLOCK *const b = &mb->block[c_idx];
|
||||
BLOCKD *const d = &xd->block[c_idx];
|
||||
const int *pt_scan;
|
||||
|
@ -434,37 +452,44 @@ void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
|
|||
break;
|
||||
}
|
||||
|
||||
if (c_idx == 0) assert(pb_idx.plane == 0);
|
||||
if (c_idx == 16) assert(pb_idx.plane == 1);
|
||||
if (c_idx == 20) assert(pb_idx.plane == 2);
|
||||
quantize(b->zrun_zbin_boost,
|
||||
mb->coeff + 16 * b_idx,
|
||||
256, b->skip_block,
|
||||
b->zbin, b->round, b->quant, b->quant_shift,
|
||||
xd->qcoeff + 16 * b_idx,
|
||||
xd->dqcoeff + 16 * b_idx,
|
||||
BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16),
|
||||
BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16),
|
||||
d->dequant,
|
||||
b->zbin_extra,
|
||||
&xd->eobs[b_idx],
|
||||
&xd->plane[pb_idx.plane].eobs[pb_idx.block],
|
||||
#if CONFIG_CODE_NONZEROCOUNT
|
||||
&xd->nzcs[b_idx],
|
||||
#endif
|
||||
pt_scan, 1);
|
||||
}
|
||||
|
||||
void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx) {
|
||||
void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx, int y_blocks) {
|
||||
MACROBLOCKD *const xd = &mb->e_mbd;
|
||||
const int c_idx = plane_idx(xd, b_idx);
|
||||
const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
|
||||
const int c_idx = plane_idx(pb_idx.plane);
|
||||
BLOCK *const b = &mb->block[c_idx];
|
||||
BLOCKD *const d = &xd->block[c_idx];
|
||||
|
||||
if (c_idx == 0) assert(pb_idx.plane == 0);
|
||||
if (c_idx == 16) assert(pb_idx.plane == 1);
|
||||
if (c_idx == 20) assert(pb_idx.plane == 2);
|
||||
quantize(b->zrun_zbin_boost,
|
||||
mb->coeff + b_idx * 16,
|
||||
1024, b->skip_block,
|
||||
b->zbin,
|
||||
b->round, b->quant, b->quant_shift,
|
||||
xd->qcoeff + b_idx * 16,
|
||||
xd->dqcoeff + b_idx * 16,
|
||||
BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16),
|
||||
BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16),
|
||||
d->dequant,
|
||||
b->zbin_extra,
|
||||
&xd->eobs[b_idx],
|
||||
&xd->plane[pb_idx.plane].eobs[pb_idx.block],
|
||||
#if CONFIG_CODE_NONZEROCOUNT
|
||||
&xd->nzcs[b_idx],
|
||||
#endif
|
||||
|
@ -472,7 +497,7 @@ void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx) {
|
|||
}
|
||||
|
||||
void vp9_quantize_sby_32x32(MACROBLOCK *x) {
|
||||
vp9_regular_quantize_b_32x32(x, 0);
|
||||
vp9_regular_quantize_b_32x32(x, 0, 64);
|
||||
}
|
||||
|
||||
void vp9_quantize_sby_16x16(MACROBLOCK *x) {
|
||||
|
@ -481,7 +506,7 @@ void vp9_quantize_sby_16x16(MACROBLOCK *x) {
|
|||
for (n = 0; n < 4; n++) {
|
||||
TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd,
|
||||
(16 * (n & 2)) + ((n & 1) * 4));
|
||||
x->quantize_b_16x16(x, n * 16, tx_type);
|
||||
x->quantize_b_16x16(x, n * 16, tx_type, 64);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -491,7 +516,7 @@ void vp9_quantize_sby_8x8(MACROBLOCK *x) {
|
|||
for (n = 0; n < 16; n++) {
|
||||
TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd,
|
||||
(4 * (n & 12)) + ((n & 3) * 2));
|
||||
x->quantize_b_8x8(x, n * 4, tx_type);
|
||||
x->quantize_b_8x8(x, n * 4, tx_type, 64);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -504,35 +529,35 @@ void vp9_quantize_sby_4x4(MACROBLOCK *x) {
|
|||
if (tx_type != DCT_DCT) {
|
||||
vp9_ht_quantize_b_4x4(x, n, tx_type);
|
||||
} else {
|
||||
x->quantize_b_4x4(x, n);
|
||||
x->quantize_b_4x4(x, n, 64);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_quantize_sbuv_16x16(MACROBLOCK *x) {
|
||||
x->quantize_b_16x16(x, 64, DCT_DCT);
|
||||
x->quantize_b_16x16(x, 80, DCT_DCT);
|
||||
x->quantize_b_16x16(x, 64, DCT_DCT, 64);
|
||||
x->quantize_b_16x16(x, 80, DCT_DCT, 64);
|
||||
}
|
||||
|
||||
void vp9_quantize_sbuv_8x8(MACROBLOCK *x) {
|
||||
int i;
|
||||
|
||||
for (i = 64; i < 96; i += 4)
|
||||
x->quantize_b_8x8(x, i, DCT_DCT);
|
||||
x->quantize_b_8x8(x, i, DCT_DCT, 64);
|
||||
}
|
||||
|
||||
void vp9_quantize_sbuv_4x4(MACROBLOCK *x) {
|
||||
int i;
|
||||
|
||||
for (i = 64; i < 96; i++)
|
||||
x->quantize_b_4x4(x, i);
|
||||
x->quantize_b_4x4(x, i, 64);
|
||||
}
|
||||
|
||||
void vp9_quantize_sb64y_32x32(MACROBLOCK *x) {
|
||||
int n;
|
||||
|
||||
for (n = 0; n < 4; n++)
|
||||
vp9_regular_quantize_b_32x32(x, n * 64);
|
||||
vp9_regular_quantize_b_32x32(x, n * 64, 256);
|
||||
}
|
||||
|
||||
void vp9_quantize_sb64y_16x16(MACROBLOCK *x) {
|
||||
|
@ -541,7 +566,7 @@ void vp9_quantize_sb64y_16x16(MACROBLOCK *x) {
|
|||
for (n = 0; n < 16; n++) {
|
||||
TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd,
|
||||
(16 * (n & 12)) + ((n & 3) * 4));
|
||||
x->quantize_b_16x16(x, n * 16, tx_type);
|
||||
x->quantize_b_16x16(x, n * 16, tx_type, 256);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -551,7 +576,7 @@ void vp9_quantize_sb64y_8x8(MACROBLOCK *x) {
|
|||
for (n = 0; n < 64; n++) {
|
||||
TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd,
|
||||
(4 * (n & 56)) + ((n & 7) * 2));
|
||||
x->quantize_b_8x8(x, n * 4, tx_type);
|
||||
x->quantize_b_8x8(x, n * 4, tx_type, 256);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -564,44 +589,45 @@ void vp9_quantize_sb64y_4x4(MACROBLOCK *x) {
|
|||
if (tx_type != DCT_DCT) {
|
||||
vp9_ht_quantize_b_4x4(x, n, tx_type);
|
||||
} else {
|
||||
x->quantize_b_4x4(x, n);
|
||||
x->quantize_b_4x4(x, n, 256);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_quantize_sb64uv_32x32(MACROBLOCK *x) {
|
||||
vp9_regular_quantize_b_32x32(x, 256);
|
||||
vp9_regular_quantize_b_32x32(x, 320);
|
||||
vp9_regular_quantize_b_32x32(x, 256, 256);
|
||||
vp9_regular_quantize_b_32x32(x, 320, 256);
|
||||
}
|
||||
|
||||
void vp9_quantize_sb64uv_16x16(MACROBLOCK *x) {
|
||||
int i;
|
||||
|
||||
for (i = 256; i < 384; i += 16)
|
||||
x->quantize_b_16x16(x, i, DCT_DCT);
|
||||
x->quantize_b_16x16(x, i, DCT_DCT, 256);
|
||||
}
|
||||
|
||||
void vp9_quantize_sb64uv_8x8(MACROBLOCK *x) {
|
||||
int i;
|
||||
|
||||
for (i = 256; i < 384; i += 4)
|
||||
x->quantize_b_8x8(x, i, DCT_DCT);
|
||||
x->quantize_b_8x8(x, i, DCT_DCT, 256);
|
||||
}
|
||||
|
||||
void vp9_quantize_sb64uv_4x4(MACROBLOCK *x) {
|
||||
int i;
|
||||
|
||||
for (i = 256; i < 384; i++)
|
||||
x->quantize_b_4x4(x, i);
|
||||
x->quantize_b_4x4(x, i, 256);
|
||||
}
|
||||
|
||||
/* quantize_b_pair function pointer in MACROBLOCK structure is set to one of
|
||||
* these two C functions if corresponding optimized routine is not available.
|
||||
* NEON optimized version implements currently the fast quantization for pair
|
||||
* of blocks. */
|
||||
void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *x, int b_idx1, int b_idx2) {
|
||||
vp9_regular_quantize_b_4x4(x, b_idx1);
|
||||
vp9_regular_quantize_b_4x4(x, b_idx2);
|
||||
void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *x, int b_idx1, int b_idx2,
|
||||
int y_blocks) {
|
||||
vp9_regular_quantize_b_4x4(x, b_idx1, y_blocks);
|
||||
vp9_regular_quantize_b_4x4(x, b_idx2, y_blocks);
|
||||
}
|
||||
|
||||
static void invert_quant(int16_t *quant, uint8_t *shift, int d) {
|
||||
|
|
|
@ -27,11 +27,15 @@
|
|||
#endif
|
||||
|
||||
void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_ix, TX_TYPE type);
|
||||
void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx);
|
||||
void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *mb, int b_idx1, int b_idx2);
|
||||
void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type);
|
||||
void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type);
|
||||
void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx);
|
||||
void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, int y_blocks);
|
||||
void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *mb, int b_idx1, int b_idx2,
|
||||
int y_blocks);
|
||||
void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
|
||||
int y_blocks);
|
||||
void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
|
||||
int y_blocks);
|
||||
void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx,
|
||||
int y_blocks);
|
||||
|
||||
void vp9_quantize_mb_4x4(MACROBLOCK *x);
|
||||
void vp9_quantize_mb_8x8(MACROBLOCK *x);
|
||||
|
|
|
@ -348,35 +348,36 @@ int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
|
|||
}
|
||||
|
||||
int vp9_mbblock_error_c(MACROBLOCK *mb) {
|
||||
MACROBLOCKD * const xd = &mb->e_mbd;
|
||||
BLOCK *be;
|
||||
BLOCKD *bd;
|
||||
int i, j;
|
||||
int berror, error = 0;
|
||||
int i;
|
||||
int error = 0;
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
be = &mb->block[i];
|
||||
bd = &mb->e_mbd.block[i];
|
||||
berror = 0;
|
||||
for (j = 0; j < 16; j++) {
|
||||
int this_diff = be->coeff[j] - bd->dqcoeff[j];
|
||||
berror += this_diff * this_diff;
|
||||
}
|
||||
error += berror;
|
||||
error += vp9_block_error(be->coeff,
|
||||
BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), 16);
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
int vp9_mbuverror_c(MACROBLOCK *mb) {
|
||||
MACROBLOCKD * const xd = &mb->e_mbd;
|
||||
BLOCK *be;
|
||||
BLOCKD *bd;
|
||||
|
||||
int i, error = 0;
|
||||
|
||||
for (i = 16; i < 24; i++) {
|
||||
for (i = 16; i < 20; i++) {
|
||||
be = &mb->block[i];
|
||||
bd = &mb->e_mbd.block[i];
|
||||
|
||||
error += vp9_block_error_c(be->coeff, bd->dqcoeff, 16);
|
||||
error += vp9_block_error(be->coeff,
|
||||
BLOCK_OFFSET(xd->plane[1].dqcoeff, i - 16, 16),
|
||||
16);
|
||||
}
|
||||
for (i = 20; i < 24; i++) {
|
||||
be = &mb->block[i];
|
||||
error += vp9_block_error(be->coeff,
|
||||
BLOCK_OFFSET(xd->plane[2].dqcoeff, i - 20, 16),
|
||||
16);
|
||||
}
|
||||
|
||||
return error;
|
||||
|
@ -430,15 +431,18 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
|
|||
int ib, PLANE_TYPE type,
|
||||
ENTROPY_CONTEXT *a,
|
||||
ENTROPY_CONTEXT *l,
|
||||
TX_SIZE tx_size) {
|
||||
TX_SIZE tx_size,
|
||||
int y_blocks) {
|
||||
MACROBLOCKD *const xd = &mb->e_mbd;
|
||||
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
|
||||
int pt;
|
||||
const int eob = xd->eobs[ib];
|
||||
int c = 0;
|
||||
int cost = 0, pad;
|
||||
const int *scan, *nb;
|
||||
const int16_t *qcoeff_ptr = xd->qcoeff + ib * 16;
|
||||
const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, ib);
|
||||
const int eob = xd->plane[pb_idx.plane].eobs[pb_idx.block];
|
||||
const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff,
|
||||
pb_idx.block, 16);
|
||||
const int ref = mbmi->ref_frame != INTRA_FRAME;
|
||||
unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
|
||||
mb->token_costs[tx_size][type][ref];
|
||||
|
@ -460,6 +464,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
|
|||
uint8_t token_cache[1024];
|
||||
|
||||
// Check for consistency of tx_size with mode info
|
||||
assert((!type && !pb_idx.plane) || (type && pb_idx.plane));
|
||||
if (type == PLANE_TYPE_Y_WITH_DC) {
|
||||
assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
|
||||
} else {
|
||||
|
@ -562,6 +567,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
|
|||
abort();
|
||||
break;
|
||||
}
|
||||
assert(eob <= seg_eob);
|
||||
|
||||
VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec);
|
||||
nb = vp9_get_coef_neighbors_handle(scan, &pad);
|
||||
|
@ -644,7 +650,7 @@ static int rdcost_mby_4x4(VP9_COMMON *const cm, MACROBLOCK *mb) {
|
|||
cost += cost_coeffs(cm, mb, b, PLANE_TYPE_Y_WITH_DC,
|
||||
ta + vp9_block2above[TX_4X4][b],
|
||||
tl + vp9_block2left[TX_4X4][b],
|
||||
TX_4X4);
|
||||
TX_4X4, 16);
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
@ -680,7 +686,7 @@ static int rdcost_mby_8x8(VP9_COMMON *const cm, MACROBLOCK *mb) {
|
|||
cost += cost_coeffs(cm, mb, b, PLANE_TYPE_Y_WITH_DC,
|
||||
ta + vp9_block2above[TX_8X8][b],
|
||||
tl + vp9_block2left[TX_8X8][b],
|
||||
TX_8X8);
|
||||
TX_8X8, 16);
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
@ -710,7 +716,7 @@ static int rdcost_mby_16x16(VP9_COMMON *const cm, MACROBLOCK *mb) {
|
|||
vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
|
||||
vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
|
||||
|
||||
return cost_coeffs(cm, mb, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16);
|
||||
return cost_coeffs(cm, mb, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16, 16);
|
||||
}
|
||||
|
||||
static void macro_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *mb,
|
||||
|
@ -858,6 +864,26 @@ static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff,
|
|||
return error > INT_MAX ? INT_MAX : (int)error;
|
||||
}
|
||||
|
||||
static int vp9_sb_uv_block_error_c(int16_t *coeff,
|
||||
int16_t *dqcoeff0, int16_t *dqcoeff1,
|
||||
int block_size, int shift) {
|
||||
int i;
|
||||
int64_t error = 0;
|
||||
|
||||
for (i = 0; i < block_size / 2; i++) {
|
||||
unsigned int this_diff = coeff[i] - dqcoeff0[i];
|
||||
error += this_diff * this_diff;
|
||||
}
|
||||
coeff += block_size / 2;
|
||||
for (i = 0; i < block_size / 2; i++) {
|
||||
unsigned int this_diff = coeff[i] - dqcoeff1[i];
|
||||
error += this_diff * this_diff;
|
||||
}
|
||||
error >>= shift;
|
||||
|
||||
return error > INT_MAX ? INT_MAX : (int)error;
|
||||
}
|
||||
|
||||
static int rdcost_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
|
||||
int cost = 0, b;
|
||||
MACROBLOCKD *const xd = &x->e_mbd;
|
||||
|
@ -871,7 +897,7 @@ static int rdcost_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
for (b = 0; b < 64; b++)
|
||||
cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
|
||||
ta + vp9_block2above_sb[TX_4X4][b],
|
||||
tl + vp9_block2left_sb[TX_4X4][b], TX_4X4);
|
||||
tl + vp9_block2left_sb[TX_4X4][b], TX_4X4, 64);
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
@ -884,7 +910,7 @@ static void super_block_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
|
|||
vp9_transform_sby_4x4(x);
|
||||
vp9_quantize_sby_4x4(x);
|
||||
|
||||
*distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2);
|
||||
*distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 2);
|
||||
*rate = rdcost_sby_4x4(cm, x);
|
||||
*skippable = vp9_sby_is_skippable_4x4(xd);
|
||||
}
|
||||
|
@ -902,7 +928,7 @@ static int rdcost_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
for (b = 0; b < 64; b += 4)
|
||||
cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
|
||||
ta + vp9_block2above_sb[TX_8X8][b],
|
||||
tl + vp9_block2left_sb[TX_8X8][b], TX_8X8);
|
||||
tl + vp9_block2left_sb[TX_8X8][b], TX_8X8, 64);
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
@ -915,7 +941,7 @@ static void super_block_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
|
|||
vp9_transform_sby_8x8(x);
|
||||
vp9_quantize_sby_8x8(x);
|
||||
|
||||
*distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2);
|
||||
*distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 2);
|
||||
*rate = rdcost_sby_8x8(cm, x);
|
||||
*skippable = vp9_sby_is_skippable_8x8(xd);
|
||||
}
|
||||
|
@ -933,7 +959,7 @@ static int rdcost_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
for (b = 0; b < 64; b += 16)
|
||||
cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
|
||||
ta + vp9_block2above_sb[TX_16X16][b],
|
||||
tl + vp9_block2left_sb[TX_16X16][b], TX_16X16);
|
||||
tl + vp9_block2left_sb[TX_16X16][b], TX_16X16, 64);
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
@ -946,7 +972,7 @@ static void super_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
|
|||
vp9_transform_sby_16x16(x);
|
||||
vp9_quantize_sby_16x16(x);
|
||||
|
||||
*distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2);
|
||||
*distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 2);
|
||||
*rate = rdcost_sby_16x16(cm, x);
|
||||
*skippable = vp9_sby_is_skippable_16x16(xd);
|
||||
}
|
||||
|
@ -960,7 +986,7 @@ static int rdcost_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
|
||||
vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
|
||||
|
||||
return cost_coeffs(cm, x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32);
|
||||
return cost_coeffs(cm, x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32, 64);
|
||||
}
|
||||
|
||||
static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
|
||||
|
@ -971,7 +997,7 @@ static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
|
|||
vp9_transform_sby_32x32(x);
|
||||
vp9_quantize_sby_32x32(x);
|
||||
|
||||
*distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 0);
|
||||
*distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 0);
|
||||
*rate = rdcost_sby_32x32(cm, x);
|
||||
*skippable = vp9_sby_is_skippable_32x32(xd);
|
||||
}
|
||||
|
@ -1009,7 +1035,7 @@ static int rdcost_sb64y_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
for (b = 0; b < 256; b++)
|
||||
cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
|
||||
ta + vp9_block2above_sb64[TX_4X4][b],
|
||||
tl + vp9_block2left_sb64[TX_4X4][b], TX_4X4);
|
||||
tl + vp9_block2left_sb64[TX_4X4][b], TX_4X4, 256);
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
@ -1022,7 +1048,7 @@ static void super_block64_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
|
|||
vp9_transform_sb64y_4x4(x);
|
||||
vp9_quantize_sb64y_4x4(x);
|
||||
|
||||
*distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2);
|
||||
*distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 2);
|
||||
*rate = rdcost_sb64y_4x4(cm, x);
|
||||
*skippable = vp9_sb64y_is_skippable_4x4(xd);
|
||||
}
|
||||
|
@ -1040,7 +1066,7 @@ static int rdcost_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
for (b = 0; b < 256; b += 4)
|
||||
cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
|
||||
ta + vp9_block2above_sb64[TX_8X8][b],
|
||||
tl + vp9_block2left_sb64[TX_8X8][b], TX_8X8);
|
||||
tl + vp9_block2left_sb64[TX_8X8][b], TX_8X8, 256);
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
@ -1053,7 +1079,7 @@ static void super_block64_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
|
|||
vp9_transform_sb64y_8x8(x);
|
||||
vp9_quantize_sb64y_8x8(x);
|
||||
|
||||
*distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2);
|
||||
*distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 2);
|
||||
*rate = rdcost_sb64y_8x8(cm, x);
|
||||
*skippable = vp9_sb64y_is_skippable_8x8(xd);
|
||||
}
|
||||
|
@ -1071,7 +1097,7 @@ static int rdcost_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
for (b = 0; b < 256; b += 16)
|
||||
cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
|
||||
ta + vp9_block2above_sb64[TX_16X16][b],
|
||||
tl + vp9_block2left_sb64[TX_16X16][b], TX_16X16);
|
||||
tl + vp9_block2left_sb64[TX_16X16][b], TX_16X16, 256);
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
@ -1085,7 +1111,7 @@ static void super_block64_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
|
|||
vp9_transform_sb64y_16x16(x);
|
||||
vp9_quantize_sb64y_16x16(x);
|
||||
|
||||
*distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2);
|
||||
*distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 2);
|
||||
*rate = rdcost_sb64y_16x16(cm, x);
|
||||
*skippable = vp9_sb64y_is_skippable_16x16(xd);
|
||||
}
|
||||
|
@ -1103,7 +1129,7 @@ static int rdcost_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
|
|||
for (b = 0; b < 256; b += 64)
|
||||
cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
|
||||
ta + vp9_block2above_sb64[TX_32X32][b],
|
||||
tl + vp9_block2left_sb64[TX_32X32][b], TX_32X32);
|
||||
tl + vp9_block2left_sb64[TX_32X32][b], TX_32X32, 256);
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
@ -1117,7 +1143,7 @@ static void super_block64_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
|
|||
vp9_transform_sb64y_32x32(x);
|
||||
vp9_quantize_sb64y_32x32(x);
|
||||
|
||||
*distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 0);
|
||||
*distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 0);
|
||||
*rate = rdcost_sb64y_32x32(cm, x);
|
||||
*skippable = vp9_sb64y_is_skippable_32x32(xd);
|
||||
}
|
||||
|
@ -1163,8 +1189,8 @@ static void copy_predictor_8x8(uint8_t *dst, const uint8_t *predictor) {
|
|||
d[29] = p[29];
|
||||
}
|
||||
|
||||
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
|
||||
BLOCKD *b, B_PREDICTION_MODE *best_mode,
|
||||
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
|
||||
B_PREDICTION_MODE *best_mode,
|
||||
int *bmode_costs,
|
||||
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
|
||||
int *bestrate, int *bestratey,
|
||||
|
@ -1175,6 +1201,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
|
|||
int rate = 0;
|
||||
int distortion;
|
||||
VP9_COMMON *const cm = &cpi->common;
|
||||
BLOCK *be = x->block + ib;
|
||||
BLOCKD *b = xd->block + ib;
|
||||
|
||||
ENTROPY_CONTEXT ta = *a, tempa = *a;
|
||||
ENTROPY_CONTEXT tl = *l, templ = *l;
|
||||
|
@ -1188,6 +1216,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
|
|||
DECLARE_ALIGNED_ARRAY(16, uint8_t, best_predictor, 16 * 4);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16);
|
||||
|
||||
assert(ib < 16);
|
||||
#if CONFIG_NEWBINTRAMODES
|
||||
b->bmi.as_mode.context = vp9_find_bpred_context(xd, b);
|
||||
#endif
|
||||
|
@ -1224,16 +1253,18 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
|
|||
vp9_ht_quantize_b_4x4(x, be - x->block, tx_type);
|
||||
} else {
|
||||
x->fwd_txm4x4(be->src_diff, be->coeff, 32);
|
||||
x->quantize_b_4x4(x, be - x->block);
|
||||
x->quantize_b_4x4(x, be - x->block, 16);
|
||||
}
|
||||
|
||||
tempa = ta;
|
||||
templ = tl;
|
||||
|
||||
ratey = cost_coeffs(cm, x, b - xd->block,
|
||||
PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4);
|
||||
PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4, 16);
|
||||
rate += ratey;
|
||||
distortion = vp9_block_error(be->coeff, b->dqcoeff, 16) >> 2;
|
||||
distortion = vp9_block_error(be->coeff,
|
||||
BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16),
|
||||
16) >> 2;
|
||||
|
||||
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
|
||||
|
||||
|
@ -1247,7 +1278,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
|
|||
*a = tempa;
|
||||
*l = templ;
|
||||
copy_predictor(best_predictor, b->predictor);
|
||||
vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
|
||||
vpx_memcpy(best_dqcoeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), 32);
|
||||
}
|
||||
}
|
||||
b->bmi.as_mode.first = (B_PREDICTION_MODE)(*best_mode);
|
||||
|
@ -1304,7 +1335,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
|
|||
#endif
|
||||
|
||||
total_rd += rd_pick_intra4x4block(
|
||||
cpi, mb, mb->block + i, xd->block + i, &best_mode,
|
||||
cpi, mb, i, &best_mode,
|
||||
bmode_costs, ta + vp9_block2above[TX_4X4][i],
|
||||
tl + vp9_block2left[TX_4X4][i], &r, &ry, &d);
|
||||
|
||||
|
@ -1504,6 +1535,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
|
|||
// note the input and output index mapping
|
||||
int idx = (ib & 0x02) ? (ib + 2) : ib;
|
||||
|
||||
assert(ib < 16);
|
||||
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
|
||||
int64_t this_rd;
|
||||
int rate_t = 0;
|
||||
|
@ -1522,11 +1554,11 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
|
|||
vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type);
|
||||
else
|
||||
x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
|
||||
x->quantize_b_8x8(x, idx, tx_type);
|
||||
x->quantize_b_8x8(x, idx, tx_type, 16);
|
||||
|
||||
// compute quantization mse of 8x8 block
|
||||
distortion = vp9_block_error_c((x->block + idx)->coeff,
|
||||
(xd->block + idx)->dqcoeff, 64);
|
||||
BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64);
|
||||
|
||||
vpx_memcpy(&ta, a, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(&tl, l, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
@ -1537,7 +1569,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
|
|||
tl1 = tl0 + 1;
|
||||
|
||||
rate_t = cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC,
|
||||
ta0, tl0, TX_8X8);
|
||||
ta0, tl0, TX_8X8, 16);
|
||||
|
||||
rate += rate_t;
|
||||
} else {
|
||||
|
@ -1563,21 +1595,23 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
|
|||
} else if (!(i & 1) &&
|
||||
get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) {
|
||||
x->fwd_txm8x4(be->src_diff, be->coeff, 32);
|
||||
x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1);
|
||||
x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1, 16);
|
||||
do_two = 1;
|
||||
} else {
|
||||
x->fwd_txm4x4(be->src_diff, be->coeff, 32);
|
||||
x->quantize_b_4x4(x, ib + iblock[i]);
|
||||
x->quantize_b_4x4(x, ib + iblock[i], 16);
|
||||
}
|
||||
distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two);
|
||||
distortion += vp9_block_error_c(be->coeff,
|
||||
BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[i], 16),
|
||||
16 << do_two);
|
||||
rate_t += cost_coeffs(cm, x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC,
|
||||
i&1 ? ta1 : ta0, i&2 ? tl1 : tl0,
|
||||
TX_4X4);
|
||||
TX_4X4, 16);
|
||||
if (do_two) {
|
||||
i++;
|
||||
rate_t += cost_coeffs(cm, x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC,
|
||||
i&1 ? ta1 : ta0, i&2 ? tl1 : tl0,
|
||||
TX_4X4);
|
||||
TX_4X4, 16);
|
||||
}
|
||||
}
|
||||
b = &xd->block[ib];
|
||||
|
@ -1598,8 +1632,10 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
|
|||
best_rd = this_rd;
|
||||
*best_mode = mode;
|
||||
copy_predictor_8x8(best_predictor, b->predictor);
|
||||
vpx_memcpy(best_dqcoeff, b->dqcoeff, 64);
|
||||
vpx_memcpy(best_dqcoeff + 32, b->dqcoeff + 64, 64);
|
||||
vpx_memcpy(best_dqcoeff,
|
||||
BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), 64);
|
||||
vpx_memcpy(best_dqcoeff + 32,
|
||||
BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16) + 64, 64);
|
||||
}
|
||||
}
|
||||
b->bmi.as_mode.first = (*best_mode);
|
||||
|
@ -1758,7 +1794,7 @@ static int rd_cost_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) {
|
|||
cost += cost_coeffs(cm, mb, b, PLANE_TYPE_UV,
|
||||
ta + vp9_block2above[TX_4X4][b],
|
||||
tl + vp9_block2left[TX_4X4][b],
|
||||
TX_4X4);
|
||||
TX_4X4, 16);
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
@ -1798,7 +1834,7 @@ static int rd_cost_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) {
|
|||
for (b = 16; b < 24; b += 4)
|
||||
cost += cost_coeffs(cm, mb, b, PLANE_TYPE_UV,
|
||||
ta + vp9_block2above[TX_8X8][b],
|
||||
tl + vp9_block2left[TX_8X8][b], TX_8X8);
|
||||
tl + vp9_block2left[TX_8X8][b], TX_8X8, 16);
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
@ -1837,7 +1873,7 @@ static int rd_cost_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, int backup) {
|
|||
for (b = 16; b < 24; b += 4)
|
||||
cost += cost_coeffs(cm, x, b * 4, PLANE_TYPE_UV,
|
||||
ta + vp9_block2above[TX_8X8][b],
|
||||
tl + vp9_block2left[TX_8X8][b], TX_16X16);
|
||||
tl + vp9_block2left[TX_8X8][b], TX_16X16, 64);
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
@ -1851,8 +1887,9 @@ static void rd_inter32x32_uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
|
|||
vp9_quantize_sbuv_16x16(x);
|
||||
|
||||
*rate = rd_cost_sbuv_16x16(cm, x, backup);
|
||||
*distortion = vp9_sb_block_error_c(x->coeff + 1024,
|
||||
xd->dqcoeff + 1024, 512, 2);
|
||||
*distortion = vp9_sb_uv_block_error_c(x->coeff + 1024,
|
||||
xd->plane[1].dqcoeff,
|
||||
xd->plane[2].dqcoeff, 512, 2);
|
||||
*skip = vp9_sbuv_is_skippable_16x16(xd);
|
||||
}
|
||||
|
||||
|
@ -2113,7 +2150,7 @@ static int rd_cost_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
|
|||
for (b = 16; b < 24; b += 4)
|
||||
cost += cost_coeffs(cm, x, b * 16, PLANE_TYPE_UV,
|
||||
ta + vp9_block2above[TX_8X8][b],
|
||||
tl + vp9_block2left[TX_8X8][b], TX_32X32);
|
||||
tl + vp9_block2left[TX_8X8][b], TX_32X32, 256);
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
@ -2127,8 +2164,9 @@ static void rd_inter64x64_uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
|
|||
vp9_quantize_sb64uv_32x32(x);
|
||||
|
||||
*rate = rd_cost_sb64uv_32x32(cm, x, backup);
|
||||
*distortion = vp9_sb_block_error_c(x->coeff + 4096,
|
||||
xd->dqcoeff + 4096, 2048, 0);
|
||||
*distortion = vp9_sb_uv_block_error_c(x->coeff + 4096,
|
||||
xd->plane[1].dqcoeff,
|
||||
xd->plane[2].dqcoeff, 2048, 0);
|
||||
*skip = vp9_sb64uv_is_skippable_32x32(xd);
|
||||
}
|
||||
|
||||
|
@ -2465,12 +2503,13 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
|
|||
|
||||
vp9_subtract_b(be, bd, 16);
|
||||
x->fwd_txm4x4(be->src_diff, be->coeff, 32);
|
||||
x->quantize_b_4x4(x, i);
|
||||
thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16);
|
||||
x->quantize_b_4x4(x, i, 16);
|
||||
thisdistortion = vp9_block_error(be->coeff,
|
||||
BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), 16);
|
||||
*distortion += thisdistortion;
|
||||
*labelyrate += cost_coeffs(cm, x, i, PLANE_TYPE_Y_WITH_DC,
|
||||
ta + vp9_block2above[TX_4X4][i],
|
||||
tl + vp9_block2left[TX_4X4][i], TX_4X4);
|
||||
tl + vp9_block2left[TX_4X4][i], TX_4X4, 16);
|
||||
}
|
||||
}
|
||||
*distortion >>= 2;
|
||||
|
@ -2508,11 +2547,12 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm,
|
|||
const int use_second_ref =
|
||||
xd->mode_info_context->mbmi.second_ref_frame > 0;
|
||||
int which_mv;
|
||||
int idx = (ib & 8) + ((ib & 2) << 1);
|
||||
BLOCKD *bd = &xd->block[ib], *bd2 = &xd->block[idx];
|
||||
const int idx = (ib & 8) + ((ib & 2) << 1);
|
||||
BLOCKD *bd = &xd->block[ib];
|
||||
BLOCK *be = &x->block[ib], *be2 = &x->block[idx];
|
||||
int thisdistortion;
|
||||
|
||||
assert(idx < 16);
|
||||
for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
|
||||
uint8_t **base_pre = which_mv ? bd->base_second_pre : bd->base_pre;
|
||||
|
||||
|
@ -2531,66 +2571,70 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm,
|
|||
if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
|
||||
if (otherrd) {
|
||||
x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
|
||||
x->quantize_b_8x8(x, idx, DCT_DCT);
|
||||
thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
|
||||
x->quantize_b_8x8(x, idx, DCT_DCT, 16);
|
||||
thisdistortion = vp9_block_error_c(be2->coeff,
|
||||
BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64);
|
||||
otherdist += thisdistortion;
|
||||
xd->mode_info_context->mbmi.txfm_size = TX_8X8;
|
||||
othercost += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC,
|
||||
tacp + vp9_block2above[TX_8X8][idx],
|
||||
tlcp + vp9_block2left[TX_8X8][idx],
|
||||
TX_8X8);
|
||||
TX_8X8, 16);
|
||||
xd->mode_info_context->mbmi.txfm_size = TX_4X4;
|
||||
}
|
||||
for (j = 0; j < 4; j += 2) {
|
||||
bd = &xd->block[ib + iblock[j]];
|
||||
be = &x->block[ib + iblock[j]];
|
||||
x->fwd_txm8x4(be->src_diff, be->coeff, 32);
|
||||
x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1);
|
||||
thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
|
||||
x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1, 16);
|
||||
thisdistortion = vp9_block_error_c(be->coeff,
|
||||
BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[j], 16), 32);
|
||||
*distortion += thisdistortion;
|
||||
*labelyrate +=
|
||||
cost_coeffs(cm, x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC,
|
||||
ta + vp9_block2above[TX_4X4][ib + iblock[j]],
|
||||
tl + vp9_block2left[TX_4X4][ib + iblock[j]],
|
||||
TX_4X4);
|
||||
TX_4X4, 16);
|
||||
*labelyrate +=
|
||||
cost_coeffs(cm, x, ib + iblock[j] + 1,
|
||||
PLANE_TYPE_Y_WITH_DC,
|
||||
ta + vp9_block2above[TX_4X4][ib + iblock[j] + 1],
|
||||
tl + vp9_block2left[TX_4X4][ib + iblock[j]],
|
||||
TX_4X4);
|
||||
TX_4X4, 16);
|
||||
}
|
||||
} else /* 8x8 */ {
|
||||
if (otherrd) {
|
||||
for (j = 0; j < 4; j += 2) {
|
||||
BLOCKD *bd = &xd->block[ib + iblock[j]];
|
||||
BLOCK *be = &x->block[ib + iblock[j]];
|
||||
x->fwd_txm8x4(be->src_diff, be->coeff, 32);
|
||||
x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1);
|
||||
thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
|
||||
x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1, 16);
|
||||
thisdistortion = vp9_block_error_c(be->coeff,
|
||||
BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[j], 16), 32);
|
||||
otherdist += thisdistortion;
|
||||
xd->mode_info_context->mbmi.txfm_size = TX_4X4;
|
||||
othercost +=
|
||||
cost_coeffs(cm, x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC,
|
||||
tacp + vp9_block2above[TX_4X4][ib + iblock[j]],
|
||||
tlcp + vp9_block2left[TX_4X4][ib + iblock[j]],
|
||||
TX_4X4);
|
||||
TX_4X4, 16);
|
||||
othercost +=
|
||||
cost_coeffs(cm, x, ib + iblock[j] + 1,
|
||||
PLANE_TYPE_Y_WITH_DC,
|
||||
tacp + vp9_block2above[TX_4X4][ib + iblock[j] + 1],
|
||||
tlcp + vp9_block2left[TX_4X4][ib + iblock[j]],
|
||||
TX_4X4);
|
||||
TX_4X4, 16);
|
||||
xd->mode_info_context->mbmi.txfm_size = TX_8X8;
|
||||
}
|
||||
}
|
||||
x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
|
||||
x->quantize_b_8x8(x, idx, DCT_DCT);
|
||||
thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
|
||||
x->quantize_b_8x8(x, idx, DCT_DCT, 16);
|
||||
thisdistortion = vp9_block_error_c(be2->coeff,
|
||||
BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64);
|
||||
*distortion += thisdistortion;
|
||||
*labelyrate += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC,
|
||||
ta + vp9_block2above[TX_8X8][idx],
|
||||
tl + vp9_block2left[TX_8X8][idx], TX_8X8);
|
||||
tl + vp9_block2left[TX_8X8][idx], TX_8X8,
|
||||
16);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2850,13 +2894,13 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
|
|||
if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_4X4) {
|
||||
for (j = 0; j < 16; j++)
|
||||
if (labels[j] == i)
|
||||
best_eobs[j] = x->e_mbd.eobs[j];
|
||||
best_eobs[j] = x->e_mbd.plane[0].eobs[j];
|
||||
} else {
|
||||
for (j = 0; j < 4; j++) {
|
||||
int ib = vp9_i8x8_block[j], idx = j * 4;
|
||||
|
||||
if (labels[ib] == i)
|
||||
best_eobs[idx] = x->e_mbd.eobs[idx];
|
||||
best_eobs[idx] = x->e_mbd.plane[0].eobs[idx];
|
||||
}
|
||||
}
|
||||
if (other_rd < best_other_rd)
|
||||
|
@ -3131,7 +3175,7 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
|
|||
bd->bmi.as_mv[0].as_int = bsi.mvs[i].as_int;
|
||||
if (mbmi->second_ref_frame > 0)
|
||||
bd->bmi.as_mv[1].as_int = bsi.second_mvs[i].as_int;
|
||||
x->e_mbd.eobs[i] = bsi.eobs[i];
|
||||
x->e_mbd.plane[0].eobs[i] = bsi.eobs[i];
|
||||
}
|
||||
|
||||
*returntotrate = bsi.r;
|
||||
|
|
|
@ -117,13 +117,16 @@ static void tokenize_b(VP9_COMP *cpi,
|
|||
TOKENEXTRA **tp,
|
||||
PLANE_TYPE type,
|
||||
TX_SIZE tx_size,
|
||||
int y_blocks,
|
||||
int dry_run) {
|
||||
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
|
||||
int pt; /* near block/prev token context index */
|
||||
int c = 0;
|
||||
const int eob = xd->eobs[ib]; /* one beyond last nonzero coeff */
|
||||
TOKENEXTRA *t = *tp; /* store tokens starting here */
|
||||
int16_t *qcoeff_ptr = xd->qcoeff + 16 * ib;
|
||||
const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, ib);
|
||||
const int eob = xd->plane[pb_idx.plane].eobs[pb_idx.block];
|
||||
const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff,
|
||||
pb_idx.block, 16);
|
||||
int seg_eob, default_eob, pad;
|
||||
const int segment_id = mbmi->segment_id;
|
||||
const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type;
|
||||
|
@ -140,6 +143,7 @@ static void tokenize_b(VP9_COMP *cpi,
|
|||
assert(xd->nzcs[ib] == 0);
|
||||
#endif
|
||||
|
||||
assert((!type && !pb_idx.plane) || (type && pb_idx.plane));
|
||||
if (sb_type == BLOCK_SIZE_SB64X64) {
|
||||
a = (ENTROPY_CONTEXT *)xd->above_context +
|
||||
vp9_block2above_sb64[tx_size][ib];
|
||||
|
@ -338,7 +342,7 @@ int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd) {
|
|||
int i = 0;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
skip &= (!xd->eobs[i]);
|
||||
skip &= (!xd->plane[0].eobs[i]);
|
||||
|
||||
return skip;
|
||||
}
|
||||
|
@ -347,8 +351,10 @@ int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd) {
|
|||
int skip = 1;
|
||||
int i;
|
||||
|
||||
for (i = 16; i < 24; i++)
|
||||
skip &= (!xd->eobs[i]);
|
||||
for (i = 0; i < 4; i++)
|
||||
skip &= (!xd->plane[1].eobs[i]);
|
||||
for (i = 0; i < 4; i++)
|
||||
skip &= (!xd->plane[2].eobs[i]);
|
||||
return skip;
|
||||
}
|
||||
|
||||
|
@ -362,13 +368,13 @@ int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd) {
|
|||
int i = 0;
|
||||
|
||||
for (i = 0; i < 16; i += 4)
|
||||
skip &= (!xd->eobs[i]);
|
||||
skip &= (!xd->plane[0].eobs[i]);
|
||||
|
||||
return skip;
|
||||
}
|
||||
|
||||
int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd) {
|
||||
return (!xd->eobs[16]) & (!xd->eobs[20]);
|
||||
return (!xd->plane[1].eobs[0]) & (!xd->plane[2].eobs[0]);
|
||||
}
|
||||
|
||||
static int mb_is_skippable_8x8(MACROBLOCKD *xd) {
|
||||
|
@ -382,7 +388,7 @@ static int mb_is_skippable_8x8_4x4uv(MACROBLOCKD *xd) {
|
|||
}
|
||||
|
||||
int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd) {
|
||||
return (!xd->eobs[0]);
|
||||
return (!xd->plane[0].eobs[0]);
|
||||
}
|
||||
|
||||
static int mb_is_skippable_16x16(MACROBLOCKD *xd) {
|
||||
|
@ -390,11 +396,11 @@ static int mb_is_skippable_16x16(MACROBLOCKD *xd) {
|
|||
}
|
||||
|
||||
int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd) {
|
||||
return (!xd->eobs[0]);
|
||||
return (!xd->plane[0].eobs[0]);
|
||||
}
|
||||
|
||||
int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd) {
|
||||
return (!xd->eobs[64]) & (!xd->eobs[80]);
|
||||
return (!xd->plane[1].eobs[0]) & (!xd->plane[2].eobs[0]);
|
||||
}
|
||||
|
||||
static int sb_is_skippable_32x32(MACROBLOCKD *xd) {
|
||||
|
@ -407,7 +413,7 @@ int vp9_sby_is_skippable_16x16(MACROBLOCKD *xd) {
|
|||
int i = 0;
|
||||
|
||||
for (i = 0; i < 64; i += 16)
|
||||
skip &= (!xd->eobs[i]);
|
||||
skip &= (!xd->plane[0].eobs[i]);
|
||||
|
||||
return skip;
|
||||
}
|
||||
|
@ -421,7 +427,7 @@ int vp9_sby_is_skippable_8x8(MACROBLOCKD *xd) {
|
|||
int i = 0;
|
||||
|
||||
for (i = 0; i < 64; i += 4)
|
||||
skip &= (!xd->eobs[i]);
|
||||
skip &= (!xd->plane[0].eobs[i]);
|
||||
|
||||
return skip;
|
||||
}
|
||||
|
@ -430,8 +436,10 @@ int vp9_sbuv_is_skippable_8x8(MACROBLOCKD *xd) {
|
|||
int skip = 1;
|
||||
int i = 0;
|
||||
|
||||
for (i = 64; i < 96; i += 4)
|
||||
skip &= (!xd->eobs[i]);
|
||||
for (i = 0; i < 16; i += 4)
|
||||
skip &= (!xd->plane[1].eobs[i]);
|
||||
for (i = 0; i < 16; i += 4)
|
||||
skip &= (!xd->plane[2].eobs[i]);
|
||||
|
||||
return skip;
|
||||
}
|
||||
|
@ -445,7 +453,7 @@ int vp9_sby_is_skippable_4x4(MACROBLOCKD *xd) {
|
|||
int i = 0;
|
||||
|
||||
for (i = 0; i < 64; i++)
|
||||
skip &= (!xd->eobs[i]);
|
||||
skip &= (!xd->plane[0].eobs[i]);
|
||||
|
||||
return skip;
|
||||
}
|
||||
|
@ -454,8 +462,10 @@ int vp9_sbuv_is_skippable_4x4(MACROBLOCKD *xd) {
|
|||
int skip = 1;
|
||||
int i = 0;
|
||||
|
||||
for (i = 64; i < 96; i++)
|
||||
skip &= (!xd->eobs[i]);
|
||||
for (i = 0; i < 16; i++)
|
||||
skip &= (!xd->plane[1].eobs[i]);
|
||||
for (i = 0; i < 16; i++)
|
||||
skip &= (!xd->plane[2].eobs[i]);
|
||||
|
||||
return skip;
|
||||
}
|
||||
|
@ -511,34 +521,34 @@ void vp9_tokenize_sb(VP9_COMP *cpi,
|
|||
switch (mbmi->txfm_size) {
|
||||
case TX_32X32:
|
||||
tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC,
|
||||
TX_32X32, dry_run);
|
||||
TX_32X32, 64, dry_run);
|
||||
for (b = 64; b < 96; b += 16)
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
|
||||
TX_16X16, dry_run);
|
||||
TX_16X16, 64, dry_run);
|
||||
break;
|
||||
case TX_16X16:
|
||||
for (b = 0; b < 64; b += 16)
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
|
||||
TX_16X16, dry_run);
|
||||
TX_16X16, 64, dry_run);
|
||||
for (b = 64; b < 96; b += 16)
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
|
||||
TX_16X16, dry_run);
|
||||
TX_16X16, 64, dry_run);
|
||||
break;
|
||||
case TX_8X8:
|
||||
for (b = 0; b < 64; b += 4)
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
|
||||
TX_8X8, dry_run);
|
||||
TX_8X8, 64, dry_run);
|
||||
for (b = 64; b < 96; b += 4)
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
|
||||
TX_8X8, dry_run);
|
||||
TX_8X8, 64, dry_run);
|
||||
break;
|
||||
case TX_4X4:
|
||||
for (b = 0; b < 64; b++)
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
|
||||
TX_4X4, dry_run);
|
||||
TX_4X4, 64, dry_run);
|
||||
for (b = 64; b < 96; b++)
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
|
||||
TX_4X4, dry_run);
|
||||
TX_4X4, 64, dry_run);
|
||||
break;
|
||||
default: assert(0);
|
||||
}
|
||||
|
@ -552,13 +562,13 @@ int vp9_sb64y_is_skippable_32x32(MACROBLOCKD *xd) {
|
|||
int i = 0;
|
||||
|
||||
for (i = 0; i < 256; i += 64)
|
||||
skip &= (!xd->eobs[i]);
|
||||
skip &= (!xd->plane[0].eobs[i]);
|
||||
|
||||
return skip;
|
||||
}
|
||||
|
||||
int vp9_sb64uv_is_skippable_32x32(MACROBLOCKD *xd) {
|
||||
return (!xd->eobs[256]) & (!xd->eobs[320]);
|
||||
return (!xd->plane[1].eobs[0]) & (!xd->plane[2].eobs[0]);
|
||||
}
|
||||
|
||||
static int sb64_is_skippable_32x32(MACROBLOCKD *xd) {
|
||||
|
@ -570,7 +580,7 @@ int vp9_sb64y_is_skippable_16x16(MACROBLOCKD *xd) {
|
|||
int i = 0;
|
||||
|
||||
for (i = 0; i < 256; i += 16)
|
||||
skip &= (!xd->eobs[i]);
|
||||
skip &= (!xd->plane[0].eobs[i]);
|
||||
|
||||
return skip;
|
||||
}
|
||||
|
@ -579,8 +589,10 @@ int vp9_sb64uv_is_skippable_16x16(MACROBLOCKD *xd) {
|
|||
int skip = 1;
|
||||
int i = 0;
|
||||
|
||||
for (i = 256; i < 384; i += 16)
|
||||
skip &= (!xd->eobs[i]);
|
||||
for (i = 0; i < 64; i += 16)
|
||||
skip &= (!xd->plane[1].eobs[i]);
|
||||
for (i = 0; i < 64; i += 16)
|
||||
skip &= (!xd->plane[2].eobs[i]);
|
||||
|
||||
return skip;
|
||||
}
|
||||
|
@ -594,7 +606,7 @@ int vp9_sb64y_is_skippable_8x8(MACROBLOCKD *xd) {
|
|||
int i = 0;
|
||||
|
||||
for (i = 0; i < 256; i += 4)
|
||||
skip &= (!xd->eobs[i]);
|
||||
skip &= (!xd->plane[0].eobs[i]);
|
||||
|
||||
return skip;
|
||||
}
|
||||
|
@ -603,8 +615,10 @@ int vp9_sb64uv_is_skippable_8x8(MACROBLOCKD *xd) {
|
|||
int skip = 1;
|
||||
int i = 0;
|
||||
|
||||
for (i = 256; i < 384; i += 4)
|
||||
skip &= (!xd->eobs[i]);
|
||||
for (i = 0; i < 64; i += 4)
|
||||
skip &= (!xd->plane[1].eobs[i]);
|
||||
for (i = 0; i < 64; i += 4)
|
||||
skip &= (!xd->plane[2].eobs[i]);
|
||||
|
||||
return skip;
|
||||
}
|
||||
|
@ -618,7 +632,7 @@ int vp9_sb64y_is_skippable_4x4(MACROBLOCKD *xd) {
|
|||
int i = 0;
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
skip &= (!xd->eobs[i]);
|
||||
skip &= (!xd->plane[0].eobs[i]);
|
||||
|
||||
return skip;
|
||||
}
|
||||
|
@ -627,8 +641,10 @@ int vp9_sb64uv_is_skippable_4x4(MACROBLOCKD *xd) {
|
|||
int skip = 1;
|
||||
int i = 0;
|
||||
|
||||
for (i = 256; i < 384; i++)
|
||||
skip &= (!xd->eobs[i]);
|
||||
for (i = 0; i < 64; i++)
|
||||
skip &= (!xd->plane[1].eobs[i]);
|
||||
for (i = 0; i < 64; i++)
|
||||
skip &= (!xd->plane[2].eobs[i]);
|
||||
|
||||
return skip;
|
||||
}
|
||||
|
@ -685,34 +701,34 @@ void vp9_tokenize_sb64(VP9_COMP *cpi,
|
|||
case TX_32X32:
|
||||
for (b = 0; b < 256; b += 64)
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
|
||||
TX_32X32, dry_run);
|
||||
TX_32X32, 256, dry_run);
|
||||
for (b = 256; b < 384; b += 64)
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
|
||||
TX_32X32, dry_run);
|
||||
TX_32X32, 256, dry_run);
|
||||
break;
|
||||
case TX_16X16:
|
||||
for (b = 0; b < 256; b += 16)
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
|
||||
TX_16X16, dry_run);
|
||||
TX_16X16, 256, dry_run);
|
||||
for (b = 256; b < 384; b += 16)
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
|
||||
TX_16X16, dry_run);
|
||||
TX_16X16, 256, dry_run);
|
||||
break;
|
||||
case TX_8X8:
|
||||
for (b = 0; b < 256; b += 4)
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
|
||||
TX_8X8, dry_run);
|
||||
TX_8X8, 256, dry_run);
|
||||
for (b = 256; b < 384; b += 4)
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
|
||||
TX_8X8, dry_run);
|
||||
TX_8X8, 256, dry_run);
|
||||
break;
|
||||
case TX_4X4:
|
||||
for (b = 0; b < 256; b++)
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
|
||||
TX_4X4, dry_run);
|
||||
TX_4X4, 256, dry_run);
|
||||
for (b = 256; b < 384; b++)
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
|
||||
TX_4X4, dry_run);
|
||||
TX_4X4, 256, dry_run);
|
||||
break;
|
||||
default: assert(0);
|
||||
}
|
||||
|
@ -780,29 +796,29 @@ void vp9_tokenize_mb(VP9_COMP *cpi,
|
|||
cpi->skip_false_count[mb_skip_context] += skip_inc;
|
||||
|
||||
if (tx_size == TX_16X16) {
|
||||
tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run);
|
||||
tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, 16, dry_run);
|
||||
for (b = 16; b < 24; b += 4) {
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run);
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, 16, dry_run);
|
||||
}
|
||||
} else if (tx_size == TX_8X8) {
|
||||
for (b = 0; b < 16; b += 4) {
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run);
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, 16, dry_run);
|
||||
}
|
||||
if (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
|
||||
xd->mode_info_context->mbmi.mode == SPLITMV) {
|
||||
for (b = 16; b < 24; b++) {
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run);
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, 16, dry_run);
|
||||
}
|
||||
} else {
|
||||
for (b = 16; b < 24; b += 4) {
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run);
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, 16, dry_run);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (b = 0; b < 16; b++)
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run);
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, 16, dry_run);
|
||||
for (b = 16; b < 24; b++)
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run);
|
||||
tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, 16, dry_run);
|
||||
}
|
||||
if (dry_run)
|
||||
*t = t_backup;
|
||||
|
|
|
@ -260,117 +260,3 @@ sym(vp9_mbblock_error_xmm_impl):
|
|||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;int vp9_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
|
||||
global sym(vp9_mbuverror_mmx_impl) PRIVATE
|
||||
sym(vp9_mbuverror_mmx_impl):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 2
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
|
||||
mov rsi, arg(0) ;s_ptr
|
||||
mov rdi, arg(1) ;d_ptr
|
||||
|
||||
mov rcx, 16
|
||||
pxor mm7, mm7
|
||||
|
||||
.mbuverror_loop_mmx:
|
||||
|
||||
movq mm1, [rsi]
|
||||
movq mm2, [rdi]
|
||||
|
||||
psubw mm1, mm2
|
||||
pmaddwd mm1, mm1
|
||||
|
||||
|
||||
movq mm3, [rsi+8]
|
||||
movq mm4, [rdi+8]
|
||||
|
||||
psubw mm3, mm4
|
||||
pmaddwd mm3, mm3
|
||||
|
||||
|
||||
paddd mm7, mm1
|
||||
paddd mm7, mm3
|
||||
|
||||
|
||||
add rsi, 16
|
||||
add rdi, 16
|
||||
|
||||
dec rcx
|
||||
jnz .mbuverror_loop_mmx
|
||||
|
||||
movq mm0, mm7
|
||||
psrlq mm7, 32
|
||||
|
||||
paddd mm0, mm7
|
||||
movq rax, mm0
|
||||
|
||||
pop rdi
|
||||
pop rsi
|
||||
; begin epilog
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;int vp9_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);
|
||||
global sym(vp9_mbuverror_xmm_impl) PRIVATE
|
||||
sym(vp9_mbuverror_xmm_impl):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 2
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
|
||||
mov rsi, arg(0) ;s_ptr
|
||||
mov rdi, arg(1) ;d_ptr
|
||||
|
||||
mov rcx, 16
|
||||
pxor xmm3, xmm3
|
||||
|
||||
.mbuverror_loop:
|
||||
|
||||
movdqa xmm1, [rsi]
|
||||
movdqa xmm2, [rdi]
|
||||
|
||||
psubw xmm1, xmm2
|
||||
pmaddwd xmm1, xmm1
|
||||
|
||||
paddd xmm3, xmm1
|
||||
|
||||
add rsi, 16
|
||||
add rdi, 16
|
||||
|
||||
dec rcx
|
||||
jnz .mbuverror_loop
|
||||
|
||||
pxor xmm0, xmm0
|
||||
movdqa xmm1, xmm3
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
punpckldq xmm1, xmm0
|
||||
|
||||
punpckhdq xmm2, xmm0
|
||||
paddd xmm1, xmm2
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
|
||||
psrldq xmm1, 8
|
||||
paddd xmm1, xmm2
|
||||
|
||||
movq rax, xmm1
|
||||
|
||||
pop rdi
|
||||
pop rsi
|
||||
; begin epilog
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
|
|
@ -26,17 +26,10 @@ void vp9_short_fdct8x4_mmx(short *input, short *output, int pitch) {
|
|||
int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr);
|
||||
int vp9_mbblock_error_mmx(MACROBLOCK *mb) {
|
||||
short *coeff_ptr = mb->block[0].coeff;
|
||||
short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff;
|
||||
short *dcoef_ptr = mb->e_mbd.plane[0].dqcoeff;
|
||||
return vp9_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr);
|
||||
}
|
||||
|
||||
int vp9_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
|
||||
int vp9_mbuverror_mmx(MACROBLOCK *mb) {
|
||||
short *s_ptr = &mb->coeff[256];
|
||||
short *d_ptr = &mb->e_mbd.dqcoeff[256];
|
||||
return vp9_mbuverror_mmx_impl(s_ptr, d_ptr);
|
||||
}
|
||||
|
||||
void vp9_subtract_b_mmx_impl(unsigned char *z, int src_stride,
|
||||
short *diff, unsigned char *predictor,
|
||||
int pitch);
|
||||
|
@ -54,17 +47,10 @@ void vp9_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) {
|
|||
int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr);
|
||||
int vp9_mbblock_error_xmm(MACROBLOCK *mb) {
|
||||
short *coeff_ptr = mb->block[0].coeff;
|
||||
short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff;
|
||||
short *dcoef_ptr = mb->e_mbd.plane[0].dqcoeff;
|
||||
return vp9_mbblock_error_xmm_impl(coeff_ptr, dcoef_ptr);
|
||||
}
|
||||
|
||||
int vp9_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);
|
||||
int vp9_mbuverror_xmm(MACROBLOCK *mb) {
|
||||
short *s_ptr = &mb->coeff[256];
|
||||
short *d_ptr = &mb->e_mbd.dqcoeff[256];
|
||||
return vp9_mbuverror_xmm_impl(s_ptr, d_ptr);
|
||||
}
|
||||
|
||||
void vp9_subtract_b_sse2_impl(unsigned char *z, int src_stride,
|
||||
short *diff, unsigned char *predictor,
|
||||
int pitch);
|
||||
|
|
Загрузка…
Ссылка в новой задаче