32x32 transform for superblocks.

This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.

Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
  transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
  1 bit, or else they won't fit in int16_t (they are 17 bits). Because
  of this, the RD error scoring does not right-shift the MSE score by
  two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
  also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
  simply using the 16x16 luma ones. A future commit will add newly
  generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
  ADST is desired, transform-size selection can scale back to 16x16
  or lower, and use an ADST at that level.

Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
  block than for the rest (DWT pixel differences) of the block. Therefore,
  RD error scoring isn't easily scalable between coefficient and pixel
  domain. Thus, unfortunately, we need to compute the RD distortion in
  the pixel domain until we figure out how to scale these appropriately.

Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
This commit is contained in:
Ronald S. Bultje 2012-12-07 14:45:05 -08:00
Родитель a36d9a4a15
Коммит c456b35fdf
34 изменённых файлов: 2512 добавлений и 156 удалений

2
configure поставляемый
Просмотреть файл

@ -247,6 +247,8 @@ EXPERIMENT_LIST="
implicit_segmentation
newbintramodes
comp_interintra_pred
tx32x32
dwt32x32hybrid
"
CONFIG_LIST="
external_build

189
test/dct32x32_test.cc Normal file
Просмотреть файл

@ -0,0 +1,189 @@
/*
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
extern "C" {
#include "vp9/common/vp9_entropy.h"
#include "./vp9_rtcd.h"
void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch);
void vp9_short_idct32x32_c(short *input, short *output, int pitch);
}
#include "test/acm_random.h"
#include "vpx/vpx_integer.h"
using libvpx_test::ACMRandom;
namespace {
#if !CONFIG_DWT32X32HYBRID
static const double kPi = 3.141592653589793238462643383279502884;
static void reference2_32x32_idct_2d(double *input, double *output) {
double x;
for (int l = 0; l < 32; ++l) {
for (int k = 0; k < 32; ++k) {
double s = 0;
for (int i = 0; i < 32; ++i) {
for (int j = 0; j < 32; ++j) {
x = cos(kPi * j * (l + 0.5) / 32.0) *
cos(kPi * i * (k + 0.5) / 32.0) * input[i * 32 + j] / 1024;
if (i != 0)
x *= sqrt(2.0);
if (j != 0)
x *= sqrt(2.0);
s += x;
}
}
output[k * 32 + l] = s / 4;
}
}
}
static void reference_32x32_dct_1d(double in[32], double out[32], int stride) {
const double kInvSqrt2 = 0.707106781186547524400844362104;
for (int k = 0; k < 32; k++) {
out[k] = 0.0;
for (int n = 0; n < 32; n++)
out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0);
if (k == 0)
out[k] = out[k] * kInvSqrt2;
}
}
static void reference_32x32_dct_2d(int16_t input[32*32], double output[32*32]) {
// First transform columns
for (int i = 0; i < 32; ++i) {
double temp_in[32], temp_out[32];
for (int j = 0; j < 32; ++j)
temp_in[j] = input[j*32 + i];
reference_32x32_dct_1d(temp_in, temp_out, 1);
for (int j = 0; j < 32; ++j)
output[j * 32 + i] = temp_out[j];
}
// Then transform rows
for (int i = 0; i < 32; ++i) {
double temp_in[32], temp_out[32];
for (int j = 0; j < 32; ++j)
temp_in[j] = output[j + i*32];
reference_32x32_dct_1d(temp_in, temp_out, 1);
// Scale by some magic number
for (int j = 0; j < 32; ++j)
output[j + i * 32] = temp_out[j] / 4;
}
}
TEST(VP9Idct32x32Test, AccuracyCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
for (int i = 0; i < count_test_block; ++i) {
int16_t in[1024], coeff[1024];
int16_t out_c[1024];
double out_r[1024];
// Initialize a test block with input range [-255, 255].
for (int j = 0; j < 1024; ++j)
in[j] = rnd.Rand8() - rnd.Rand8();
reference_32x32_dct_2d(in, out_r);
for (int j = 0; j < 1024; j++)
coeff[j] = round(out_r[j]);
vp9_short_idct32x32_c(coeff, out_c, 64);
for (int j = 0; j < 1024; ++j) {
const int diff = out_c[j] - in[j];
const int error = diff * diff;
EXPECT_GE(1, error)
<< "Error: 3x32 IDCT has error " << error
<< " at index " << j;
}
vp9_short_fdct32x32_c(in, out_c, 64);
for (int j = 0; j < 1024; ++j) {
const double diff = coeff[j] - out_c[j];
const double error = diff * diff;
EXPECT_GE(1.0, error)
<< "Error: 32x32 FDCT has error " << error
<< " at index " << j;
}
}
}
#else // CONFIG_DWT32X32HYBRID
// TODO(rbultje/debargha): add DWT-specific tests
#endif // CONFIG_DWT32X32HYBRID
TEST(VP9Fdct32x32Test, AccuracyCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
unsigned int max_error = 0;
int64_t total_error = 0;
const int count_test_block = 1000;
for (int i = 0; i < count_test_block; ++i) {
int16_t test_input_block[1024];
int16_t test_temp_block[1024];
int16_t test_output_block[1024];
// Initialize a test block with input range [-255, 255].
for (int j = 0; j < 1024; ++j)
test_input_block[j] = rnd.Rand8() - rnd.Rand8();
const int pitch = 64;
vp9_short_fdct32x32_c(test_input_block, test_temp_block, pitch);
vp9_short_idct32x32_c(test_temp_block, test_output_block, pitch);
for (int j = 0; j < 1024; ++j) {
const unsigned diff = test_input_block[j] - test_output_block[j];
const unsigned error = diff * diff;
if (max_error < error)
max_error = error;
total_error += error;
}
}
EXPECT_GE(1u, max_error)
<< "Error: 32x32 FDCT/IDCT has an individual roundtrip error > 1";
EXPECT_GE(count_test_block/10, total_error)
<< "Error: 32x32 FDCT/IDCT has average roundtrip error > 1/10 per block";
}
TEST(VP9Fdct32x32Test, CoeffSizeCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
for (int i = 0; i < count_test_block; ++i) {
int16_t input_block[1024], input_extreme_block[1024];
int16_t output_block[1024], output_extreme_block[1024];
// Initialize a test block with input range [-255, 255].
for (int j = 0; j < 1024; ++j) {
input_block[j] = rnd.Rand8() - rnd.Rand8();
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
}
if (i == 0)
for (int j = 0; j < 1024; ++j)
input_extreme_block[j] = 255;
const int pitch = 32;
vp9_short_fdct32x32_c(input_block, output_block, pitch);
vp9_short_fdct32x32_c(input_extreme_block, output_extreme_block, pitch);
// The minimum quant value is 4.
for (int j = 0; j < 1024; ++j) {
EXPECT_GE(4*DCT_MAX_VALUE, abs(output_block[j]))
<< "Error: 32x32 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
EXPECT_GE(4*DCT_MAX_VALUE, abs(output_extreme_block[j]))
<< "Error: 32x32 FDCT extreme has coefficient larger than "
"4*DCT_MAX_VALUE";
}
}
}
} // namespace

Просмотреть файл

@ -64,6 +64,9 @@ endif
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
#LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_TX32X32),yesyes)
LIBVPX_TEST_SRCS-yes += dct32x32_test.cc
endif
LIBVPX_TEST_SRCS-yes += idct8x8_test.cc
LIBVPX_TEST_SRCS-yes += variance_test.cc
endif # VP9

Просмотреть файл

@ -129,7 +129,13 @@ typedef enum {
TX_4X4, // 4x4 dct transform
TX_8X8, // 8x8 dct transform
TX_16X16, // 16x16 dct transform
TX_SIZE_MAX // Number of different transforms available
TX_SIZE_MAX_MB, // Number of transforms available to MBs
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
TX_32X32 = TX_SIZE_MAX_MB, // 32x32 dct transform
TX_SIZE_MAX_SB, // Number of transforms available to SBs
#else
TX_SIZE_MAX_SB = TX_SIZE_MAX_MB,
#endif
} TX_SIZE;
typedef enum {
@ -302,6 +308,15 @@ typedef struct blockd {
union b_mode_info bmi;
} BLOCKD;
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
typedef struct superblockd {
/* 32x32 Y and 16x16 U/V. No 2nd order transform yet. */
DECLARE_ALIGNED(16, short, diff[32*32+16*16*2]);
DECLARE_ALIGNED(16, short, qcoeff[32*32+16*16*2]);
DECLARE_ALIGNED(16, short, dqcoeff[32*32+16*16*2]);
} SUPERBLOCKD;
#endif
typedef struct macroblockd {
DECLARE_ALIGNED(16, short, diff[400]); /* from idct diff */
DECLARE_ALIGNED(16, unsigned char, predictor[384]);
@ -309,6 +324,10 @@ typedef struct macroblockd {
DECLARE_ALIGNED(16, short, dqcoeff[400]);
DECLARE_ALIGNED(16, unsigned short, eobs[25]);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
SUPERBLOCKD sb_coeff_data;
#endif
/* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
BLOCKD block[25];
int fullpixel_mask;

Просмотреть файл

@ -1375,3 +1375,5 @@ static const vp9_prob
}
}
};
#define default_coef_probs_32x32 default_coef_probs_16x16

Просмотреть файл

@ -132,6 +132,109 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = {
250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254, 255,
};
DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = {
0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6,
6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
};
DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = {
0, 1, 32, 64, 33, 2, 3, 34, 65, 96, 128, 97, 66, 35, 4, 5, 36, 67, 98, 129, 160, 192, 161, 130, 99, 68, 37, 6, 7, 38, 69, 100,
131, 162, 193, 224, 256, 225, 194, 163, 132, 101, 70, 39, 8, 9, 40, 71, 102, 133, 164, 195, 226, 257, 288, 320, 289, 258, 227, 196, 165, 134, 103, 72,
41, 10, 11, 42, 73, 104, 135, 166, 197, 228, 259, 290, 321, 352, 384, 353, 322, 291, 260, 229, 198, 167, 136, 105, 74, 43, 12, 13, 44, 75, 106, 137,
168, 199, 230, 261, 292, 323, 354, 385, 416, 448, 417, 386, 355, 324, 293, 262, 231, 200, 169, 138, 107, 76, 45, 14, 15, 46, 77, 108, 139, 170, 201, 232,
263, 294, 325, 356, 387, 418, 449, 480, 512, 481, 450, 419, 388, 357, 326, 295, 264, 233, 202, 171, 140, 109, 78, 47, 16, 17, 48, 79, 110, 141, 172, 203,
234, 265, 296, 327, 358, 389, 420, 451, 482, 513, 544, 576, 545, 514, 483, 452, 421, 390, 359, 328, 297, 266, 235, 204, 173, 142, 111, 80, 49, 18, 19, 50,
81, 112, 143, 174, 205, 236, 267, 298, 329, 360, 391, 422, 453, 484, 515, 546, 577, 608, 640, 609, 578, 547, 516, 485, 454, 423, 392, 361, 330, 299, 268, 237,
206, 175, 144, 113, 82, 51, 20, 21, 52, 83, 114, 145, 176, 207, 238, 269, 300, 331, 362, 393, 424, 455, 486, 517, 548, 579, 610, 641, 672, 704, 673, 642,
611, 580, 549, 518, 487, 456, 425, 394, 363, 332, 301, 270, 239, 208, 177, 146, 115, 84, 53, 22, 23, 54, 85, 116, 147, 178, 209, 240, 271, 302, 333, 364,
395, 426, 457, 488, 519, 550, 581, 612, 643, 674, 705, 736, 768, 737, 706, 675, 644, 613, 582, 551, 520, 489, 458, 427, 396, 365, 334, 303, 272, 241, 210, 179,
148, 117, 86, 55, 24, 25, 56, 87, 118, 149, 180, 211, 242, 273, 304, 335, 366, 397, 428, 459, 490, 521, 552, 583, 614, 645, 676, 707, 738, 769, 800, 832,
801, 770, 739, 708, 677, 646, 615, 584, 553, 522, 491, 460, 429, 398, 367, 336, 305, 274, 243, 212, 181, 150, 119, 88, 57, 26, 27, 58, 89, 120, 151, 182,
213, 244, 275, 306, 337, 368, 399, 430, 461, 492, 523, 554, 585, 616, 647, 678, 709, 740, 771, 802, 833, 864, 896, 865, 834, 803, 772, 741, 710, 679, 648, 617,
586, 555, 524, 493, 462, 431, 400, 369, 338, 307, 276, 245, 214, 183, 152, 121, 90, 59, 28, 29, 60, 91, 122, 153, 184, 215, 246, 277, 308, 339, 370, 401,
432, 463, 494, 525, 556, 587, 618, 649, 680, 711, 742, 773, 804, 835, 866, 897, 928, 960, 929, 898, 867, 836, 805, 774, 743, 712, 681, 650, 619, 588, 557, 526,
495, 464, 433, 402, 371, 340, 309, 278, 247, 216, 185, 154, 123, 92, 61, 30, 31, 62, 93, 124, 155, 186, 217, 248, 279, 310, 341, 372, 403, 434, 465, 496,
527, 558, 589, 620, 651, 682, 713, 744, 775, 806, 837, 868, 899, 930, 961, 992, 993, 962, 931, 900, 869, 838, 807, 776, 745, 714, 683, 652, 621, 590, 559, 528,
497, 466, 435, 404, 373, 342, 311, 280, 249, 218, 187, 156, 125, 94, 63, 95, 126, 157, 188, 219, 250, 281, 312, 343, 374, 405, 436, 467, 498, 529, 560, 591,
622, 653, 684, 715, 746, 777, 808, 839, 870, 901, 932, 963, 994, 995, 964, 933, 902, 871, 840, 809, 778, 747, 716, 685, 654, 623, 592, 561, 530, 499, 468, 437,
406, 375, 344, 313, 282, 251, 220, 189, 158, 127, 159, 190, 221, 252, 283, 314, 345, 376, 407, 438, 469, 500, 531, 562, 593, 624, 655, 686, 717, 748, 779, 810,
841, 872, 903, 934, 965, 996, 997, 966, 935, 904, 873, 842, 811, 780, 749, 718, 687, 656, 625, 594, 563, 532, 501, 470, 439, 408, 377, 346, 315, 284, 253, 222,
191, 223, 254, 285, 316, 347, 378, 409, 440, 471, 502, 533, 564, 595, 626, 657, 688, 719, 750, 781, 812, 843, 874, 905, 936, 967, 998, 999, 968, 937, 906, 875,
844, 813, 782, 751, 720, 689, 658, 627, 596, 565, 534, 503, 472, 441, 410, 379, 348, 317, 286, 255, 287, 318, 349, 380, 411, 442, 473, 504, 535, 566, 597, 628,
659, 690, 721, 752, 783, 814, 845, 876, 907, 938, 969, 1000, 1001, 970, 939, 908, 877, 846, 815, 784, 753, 722, 691, 660, 629, 598, 567, 536, 505, 474, 443, 412,
381, 350, 319, 351, 382, 413, 444, 475, 506, 537, 568, 599, 630, 661, 692, 723, 754, 785, 816, 847, 878, 909, 940, 971, 1002, 1003, 972, 941, 910, 879, 848, 817,
786, 755, 724, 693, 662, 631, 600, 569, 538, 507, 476, 445, 414, 383, 415, 446, 477, 508, 539, 570, 601, 632, 663, 694, 725, 756, 787, 818, 849, 880, 911, 942,
973, 1004, 1005, 974, 943, 912, 881, 850, 819, 788, 757, 726, 695, 664, 633, 602, 571, 540, 509, 478, 447, 479, 510, 541, 572, 603, 634, 665, 696, 727, 758, 789,
820, 851, 882, 913, 944, 975, 1006, 1007, 976, 945, 914, 883, 852, 821, 790, 759, 728, 697, 666, 635, 604, 573, 542, 511, 543, 574, 605, 636, 667, 698, 729, 760,
791, 822, 853, 884, 915, 946, 977, 1008, 1009, 978, 947, 916, 885, 854, 823, 792, 761, 730, 699, 668, 637, 606, 575, 607, 638, 669, 700, 731, 762, 793, 824, 855,
886, 917, 948, 979, 1010, 1011, 980, 949, 918, 887, 856, 825, 794, 763, 732, 701, 670, 639, 671, 702, 733, 764, 795, 826, 857, 888, 919, 950, 981, 1012, 1013, 982,
951, 920, 889, 858, 827, 796, 765, 734, 703, 735, 766, 797, 828, 859, 890, 921, 952, 983, 1014, 1015, 984, 953, 922, 891, 860, 829, 798, 767, 799, 830, 861, 892,
923, 954, 985, 1016, 1017, 986, 955, 924, 893, 862, 831, 863, 894, 925, 956, 987, 1018, 1019, 988, 957, 926, 895, 927, 958, 989, 1020, 1021, 990, 959, 991, 1022, 1023,
};
/* Array indices are identical to previously-existing CONTEXT_NODE indices */
@ -160,10 +263,11 @@ static const Prob Pcat2[] = { 165, 145};
static const Prob Pcat3[] = { 173, 148, 140};
static const Prob Pcat4[] = { 176, 155, 140, 135};
static const Prob Pcat5[] = { 180, 157, 141, 134, 130};
static const Prob Pcat6[] =
{ 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129};
static const Prob Pcat6[] = {
254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129
};
static vp9_tree_index cat1[2], cat2[4], cat3[6], cat4[8], cat5[10], cat6[26];
static vp9_tree_index cat1[2], cat2[4], cat3[6], cat4[8], cat5[10], cat6[28];
static void init_bit_tree(vp9_tree_index *p, int n) {
int i = 0;
@ -182,7 +286,7 @@ static void init_bit_trees() {
init_bit_tree(cat3, 3);
init_bit_tree(cat4, 4);
init_bit_tree(cat5, 5);
init_bit_tree(cat6, 13);
init_bit_tree(cat6, 14);
}
vp9_extra_bit_struct vp9_extra_bits[12] = {
@ -196,7 +300,7 @@ vp9_extra_bit_struct vp9_extra_bits[12] = {
{ cat3, Pcat3, 3, 11},
{ cat4, Pcat4, 4, 19},
{ cat5, Pcat5, 5, 35},
{ cat6, Pcat6, 13, 67},
{ cat6, Pcat6, 14, 67},
{ 0, 0, 0, 0}
};
@ -218,6 +322,11 @@ void vp9_default_coef_probs(VP9_COMMON *pc) {
vpx_memcpy(pc->fc.hybrid_coef_probs_16x16,
default_hybrid_coef_probs_16x16,
sizeof(pc->fc.hybrid_coef_probs_16x16));
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
vpx_memcpy(pc->fc.coef_probs_32x32, default_coef_probs_32x32,
sizeof(pc->fc.coef_probs_32x32));
#endif
}
void vp9_coef_tree_initialize() {
@ -444,4 +553,28 @@ void vp9_adapt_coef_probs(VP9_COMMON *cm) {
else cm->fc.hybrid_coef_probs_16x16[i][j][k][t] = prob;
}
}
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
for (i = 0; i < BLOCK_TYPES_32X32; ++i)
for (j = 0; j < COEF_BANDS; ++j)
for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
continue;
vp9_tree_probs_from_distribution(
MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree,
coef_probs, branch_ct, cm->fc.coef_counts_32x32[i][j][k], 256, 1);
for (t = 0; t < ENTROPY_NODES; ++t) {
int prob;
count = branch_ct[t][0] + branch_ct[t][1];
count = count > count_sat ? count_sat : count;
factor = (update_factor * count / count_sat);
prob = ((int)cm->fc.pre_coef_probs_32x32[i][j][k][t] *
(256 - factor) +
(int)coef_probs[t] * factor + 128) >> 8;
if (prob <= 0) cm->fc.coef_probs_32x32[i][j][k][t] = 1;
else if (prob > 255) cm->fc.coef_probs_32x32[i][j][k][t] = 255;
else cm->fc.coef_probs_32x32[i][j][k][t] = prob;
}
}
#endif
}

Просмотреть файл

@ -55,7 +55,7 @@ extern vp9_extra_bit_struct vp9_extra_bits[12]; /* indexed by token value */
#define PROB_UPDATE_BASELINE_COST 7
#define MAX_PROB 255
#define DCT_MAX_VALUE 8192
#define DCT_MAX_VALUE 16384
/* Coefficients are predicted via a 3-dimensional probability table. */
@ -66,6 +66,10 @@ extern vp9_extra_bit_struct vp9_extra_bits[12]; /* indexed by token value */
#define BLOCK_TYPES_16X16 4
#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32
#define BLOCK_TYPES_32X32 4
#endif
/* Middle dimension is a coarsening of the coefficient's
position within the 4x4 DCT. */
@ -73,6 +77,9 @@ extern vp9_extra_bit_struct vp9_extra_bits[12]; /* indexed by token value */
extern DECLARE_ALIGNED(16, const int, vp9_coef_bands[16]);
extern DECLARE_ALIGNED(64, const int, vp9_coef_bands_8x8[64]);
extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_16x16[256]);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]);
#endif
/* Inside dimension is 3-valued measure of nearby complexity, that is,
the extent to which nearby coefficients are nonzero. For the first
@ -106,9 +113,13 @@ extern DECLARE_ALIGNED(16, const int, vp9_col_scan[16]);
extern DECLARE_ALIGNED(16, const int, vp9_row_scan[16]);
extern DECLARE_ALIGNED(64, const int, vp9_default_zig_zag1d_8x8[64]);
extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]);
#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32
extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]);
#endif
void vp9_coef_tree_initialize(void);
extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]);
void vp9_adapt_coef_probs(struct VP9Common *);
#endif

Просмотреть файл

@ -1774,3 +1774,465 @@ void vp9_short_idct10_16x16_c(int16_t *input, int16_t *output, int pitch) {
#undef RIGHT_SHIFT
#undef RIGHT_ROUNDING
#endif
#if CONFIG_TX32X32
#if !CONFIG_DWT32X32HYBRID
#define DownshiftMultiplyBy2(x) x * 2
#define DownshiftMultiply(x) x
static void idct16(double *input, double *output, int stride) {
static const double C1 = 0.995184726672197;
static const double C2 = 0.98078528040323;
static const double C3 = 0.956940335732209;
static const double C4 = 0.923879532511287;
static const double C5 = 0.881921264348355;
static const double C6 = 0.831469612302545;
static const double C7 = 0.773010453362737;
static const double C8 = 0.707106781186548;
static const double C9 = 0.634393284163646;
static const double C10 = 0.555570233019602;
static const double C11 = 0.471396736825998;
static const double C12 = 0.38268343236509;
static const double C13 = 0.290284677254462;
static const double C14 = 0.195090322016128;
static const double C15 = 0.098017140329561;
double step[16];
double intermediate[16];
double temp1, temp2;
// step 1 and 2
step[ 0] = input[stride*0] + input[stride*8];
step[ 1] = input[stride*0] - input[stride*8];
temp1 = input[stride*4]*C12;
temp2 = input[stride*12]*C4;
temp1 -= temp2;
temp1 = DownshiftMultiply(temp1);
temp1 *= C8;
step[ 2] = DownshiftMultiplyBy2(temp1);
temp1 = input[stride*4]*C4;
temp2 = input[stride*12]*C12;
temp1 += temp2;
temp1 = DownshiftMultiply(temp1);
temp1 *= C8;
step[ 3] = DownshiftMultiplyBy2(temp1);
temp1 = input[stride*2]*C8;
temp1 = DownshiftMultiplyBy2(temp1);
temp2 = input[stride*6] + input[stride*10];
step[ 4] = temp1 + temp2;
step[ 5] = temp1 - temp2;
temp1 = input[stride*14]*C8;
temp1 = DownshiftMultiplyBy2(temp1);
temp2 = input[stride*6] - input[stride*10];
step[ 6] = temp2 - temp1;
step[ 7] = temp2 + temp1;
// for odd input
temp1 = input[stride*3]*C12;
temp2 = input[stride*13]*C4;
temp1 += temp2;
temp1 = DownshiftMultiply(temp1);
temp1 *= C8;
intermediate[ 8] = DownshiftMultiplyBy2(temp1);
temp1 = input[stride*3]*C4;
temp2 = input[stride*13]*C12;
temp2 -= temp1;
temp2 = DownshiftMultiply(temp2);
temp2 *= C8;
intermediate[ 9] = DownshiftMultiplyBy2(temp2);
intermediate[10] = DownshiftMultiplyBy2(input[stride*9]*C8);
intermediate[11] = input[stride*15] - input[stride*1];
intermediate[12] = input[stride*15] + input[stride*1];
intermediate[13] = DownshiftMultiplyBy2((input[stride*7]*C8));
temp1 = input[stride*11]*C12;
temp2 = input[stride*5]*C4;
temp2 -= temp1;
temp2 = DownshiftMultiply(temp2);
temp2 *= C8;
intermediate[14] = DownshiftMultiplyBy2(temp2);
temp1 = input[stride*11]*C4;
temp2 = input[stride*5]*C12;
temp1 += temp2;
temp1 = DownshiftMultiply(temp1);
temp1 *= C8;
intermediate[15] = DownshiftMultiplyBy2(temp1);
step[ 8] = intermediate[ 8] + intermediate[14];
step[ 9] = intermediate[ 9] + intermediate[15];
step[10] = intermediate[10] + intermediate[11];
step[11] = intermediate[10] - intermediate[11];
step[12] = intermediate[12] + intermediate[13];
step[13] = intermediate[12] - intermediate[13];
step[14] = intermediate[ 8] - intermediate[14];
step[15] = intermediate[ 9] - intermediate[15];
// step 3
output[stride*0] = step[ 0] + step[ 3];
output[stride*1] = step[ 1] + step[ 2];
output[stride*2] = step[ 1] - step[ 2];
output[stride*3] = step[ 0] - step[ 3];
temp1 = step[ 4]*C14;
temp2 = step[ 7]*C2;
temp1 -= temp2;
output[stride*4] = DownshiftMultiply(temp1);
temp1 = step[ 4]*C2;
temp2 = step[ 7]*C14;
temp1 += temp2;
output[stride*7] = DownshiftMultiply(temp1);
temp1 = step[ 5]*C10;
temp2 = step[ 6]*C6;
temp1 -= temp2;
output[stride*5] = DownshiftMultiply(temp1);
temp1 = step[ 5]*C6;
temp2 = step[ 6]*C10;
temp1 += temp2;
output[stride*6] = DownshiftMultiply(temp1);
output[stride*8] = step[ 8] + step[11];
output[stride*9] = step[ 9] + step[10];
output[stride*10] = step[ 9] - step[10];
output[stride*11] = step[ 8] - step[11];
output[stride*12] = step[12] + step[15];
output[stride*13] = step[13] + step[14];
output[stride*14] = step[13] - step[14];
output[stride*15] = step[12] - step[15];
// output 4
step[ 0] = output[stride*0] + output[stride*7];
step[ 1] = output[stride*1] + output[stride*6];
step[ 2] = output[stride*2] + output[stride*5];
step[ 3] = output[stride*3] + output[stride*4];
step[ 4] = output[stride*3] - output[stride*4];
step[ 5] = output[stride*2] - output[stride*5];
step[ 6] = output[stride*1] - output[stride*6];
step[ 7] = output[stride*0] - output[stride*7];
temp1 = output[stride*8]*C7;
temp2 = output[stride*15]*C9;
temp1 -= temp2;
step[ 8] = DownshiftMultiply(temp1);
temp1 = output[stride*9]*C11;
temp2 = output[stride*14]*C5;
temp1 += temp2;
step[ 9] = DownshiftMultiply(temp1);
temp1 = output[stride*10]*C3;
temp2 = output[stride*13]*C13;
temp1 -= temp2;
step[10] = DownshiftMultiply(temp1);
temp1 = output[stride*11]*C15;
temp2 = output[stride*12]*C1;
temp1 += temp2;
step[11] = DownshiftMultiply(temp1);
temp1 = output[stride*11]*C1;
temp2 = output[stride*12]*C15;
temp2 -= temp1;
step[12] = DownshiftMultiply(temp2);
temp1 = output[stride*10]*C13;
temp2 = output[stride*13]*C3;
temp1 += temp2;
step[13] = DownshiftMultiply(temp1);
temp1 = output[stride*9]*C5;
temp2 = output[stride*14]*C11;
temp2 -= temp1;
step[14] = DownshiftMultiply(temp2);
temp1 = output[stride*8]*C9;
temp2 = output[stride*15]*C7;
temp1 += temp2;
step[15] = DownshiftMultiply(temp1);
// step 5
output[stride*0] = step[0] + step[15];
output[stride*1] = step[1] + step[14];
output[stride*2] = step[2] + step[13];
output[stride*3] = step[3] + step[12];
output[stride*4] = step[4] + step[11];
output[stride*5] = step[5] + step[10];
output[stride*6] = step[6] + step[ 9];
output[stride*7] = step[7] + step[ 8];
output[stride*15] = step[0] - step[15];
output[stride*14] = step[1] - step[14];
output[stride*13] = step[2] - step[13];
output[stride*12] = step[3] - step[12];
output[stride*11] = step[4] - step[11];
output[stride*10] = step[5] - step[10];
output[stride*9] = step[6] - step[ 9];
output[stride*8] = step[7] - step[ 8];
}
static void butterfly_32_idct_1d(double *input, double *output, int stride) {
static const double C1 = 0.998795456205; // cos(pi * 1 / 64)
static const double C3 = 0.989176509965; // cos(pi * 3 / 64)
static const double C5 = 0.970031253195; // cos(pi * 5 / 64)
static const double C7 = 0.941544065183; // cos(pi * 7 / 64)
static const double C9 = 0.903989293123; // cos(pi * 9 / 64)
static const double C11 = 0.857728610000; // cos(pi * 11 / 64)
static const double C13 = 0.803207531481; // cos(pi * 13 / 64)
static const double C15 = 0.740951125355; // cos(pi * 15 / 64)
static const double C16 = 0.707106781187; // cos(pi * 16 / 64)
static const double C17 = 0.671558954847; // cos(pi * 17 / 64)
static const double C19 = 0.595699304492; // cos(pi * 19 / 64)
static const double C21 = 0.514102744193; // cos(pi * 21 / 64)
static const double C23 = 0.427555093430; // cos(pi * 23 / 64)
static const double C25 = 0.336889853392; // cos(pi * 25 / 64)
static const double C27 = 0.242980179903; // cos(pi * 27 / 64)
static const double C29 = 0.146730474455; // cos(pi * 29 / 64)
static const double C31 = 0.049067674327; // cos(pi * 31 / 64)
double step1[32];
double step2[32];
step1[ 0] = input[stride*0];
step1[ 1] = input[stride*2];
step1[ 2] = input[stride*4];
step1[ 3] = input[stride*6];
step1[ 4] = input[stride*8];
step1[ 5] = input[stride*10];
step1[ 6] = input[stride*12];
step1[ 7] = input[stride*14];
step1[ 8] = input[stride*16];
step1[ 9] = input[stride*18];
step1[10] = input[stride*20];
step1[11] = input[stride*22];
step1[12] = input[stride*24];
step1[13] = input[stride*26];
step1[14] = input[stride*28];
step1[15] = input[stride*30];
step1[16] = DownshiftMultiplyBy2(input[stride*1]*C16);
step1[17] = (input[stride*3] + input[stride*1]);
step1[18] = (input[stride*5] + input[stride*3]);
step1[19] = (input[stride*7] + input[stride*5]);
step1[20] = (input[stride*9] + input[stride*7]);
step1[21] = (input[stride*11] + input[stride*9]);
step1[22] = (input[stride*13] + input[stride*11]);
step1[23] = (input[stride*15] + input[stride*13]);
step1[24] = (input[stride*17] + input[stride*15]);
step1[25] = (input[stride*19] + input[stride*17]);
step1[26] = (input[stride*21] + input[stride*19]);
step1[27] = (input[stride*23] + input[stride*21]);
step1[28] = (input[stride*25] + input[stride*23]);
step1[29] = (input[stride*27] + input[stride*25]);
step1[30] = (input[stride*29] + input[stride*27]);
step1[31] = (input[stride*31] + input[stride*29]);
idct16(step1, step2, 1);
idct16(step1 + 16, step2 + 16, 1);
step2[16] = DownshiftMultiply(step2[16] / (2*C1));
step2[17] = DownshiftMultiply(step2[17] / (2*C3));
step2[18] = DownshiftMultiply(step2[18] / (2*C5));
step2[19] = DownshiftMultiply(step2[19] / (2*C7));
step2[20] = DownshiftMultiply(step2[20] / (2*C9));
step2[21] = DownshiftMultiply(step2[21] / (2*C11));
step2[22] = DownshiftMultiply(step2[22] / (2*C13));
step2[23] = DownshiftMultiply(step2[23] / (2*C15));
step2[24] = DownshiftMultiply(step2[24] / (2*C17));
step2[25] = DownshiftMultiply(step2[25] / (2*C19));
step2[26] = DownshiftMultiply(step2[26] / (2*C21));
step2[27] = DownshiftMultiply(step2[27] / (2*C23));
step2[28] = DownshiftMultiply(step2[28] / (2*C25));
step2[29] = DownshiftMultiply(step2[29] / (2*C27));
step2[30] = DownshiftMultiply(step2[30] / (2*C29));
step2[31] = DownshiftMultiply(step2[31] / (2*C31));
output[stride* 0] = step2[ 0] + step2[16];
output[stride* 1] = step2[ 1] + step2[17];
output[stride* 2] = step2[ 2] + step2[18];
output[stride* 3] = step2[ 3] + step2[19];
output[stride* 4] = step2[ 4] + step2[20];
output[stride* 5] = step2[ 5] + step2[21];
output[stride* 6] = step2[ 6] + step2[22];
output[stride* 7] = step2[ 7] + step2[23];
output[stride* 8] = step2[ 8] + step2[24];
output[stride* 9] = step2[ 9] + step2[25];
output[stride*10] = step2[10] + step2[26];
output[stride*11] = step2[11] + step2[27];
output[stride*12] = step2[12] + step2[28];
output[stride*13] = step2[13] + step2[29];
output[stride*14] = step2[14] + step2[30];
output[stride*15] = step2[15] + step2[31];
output[stride*16] = step2[15] - step2[(31 - 0)];
output[stride*17] = step2[14] - step2[(31 - 1)];
output[stride*18] = step2[13] - step2[(31 - 2)];
output[stride*19] = step2[12] - step2[(31 - 3)];
output[stride*20] = step2[11] - step2[(31 - 4)];
output[stride*21] = step2[10] - step2[(31 - 5)];
output[stride*22] = step2[ 9] - step2[(31 - 6)];
output[stride*23] = step2[ 8] - step2[(31 - 7)];
output[stride*24] = step2[ 7] - step2[(31 - 8)];
output[stride*25] = step2[ 6] - step2[(31 - 9)];
output[stride*26] = step2[ 5] - step2[(31 - 10)];
output[stride*27] = step2[ 4] - step2[(31 - 11)];
output[stride*28] = step2[ 3] - step2[(31 - 12)];
output[stride*29] = step2[ 2] - step2[(31 - 13)];
output[stride*30] = step2[ 1] - step2[(31 - 14)];
output[stride*31] = step2[ 0] - step2[(31 - 15)];
}
void vp9_short_idct32x32_c(short *input, short *output, int pitch) {
vp9_clear_system_state(); // Make it simd safe : __asm emms;
{
double out[32*32], out2[32*32];
const int short_pitch = pitch >> 1;
int i, j;
// First transform rows
for (i = 0; i < 32; ++i) {
double temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = input[j + i*short_pitch];
butterfly_32_idct_1d(temp_in, temp_out, 1);
for (j = 0; j < 32; ++j)
out[j + i*32] = temp_out[j];
}
// Then transform columns
for (i = 0; i < 32; ++i) {
double temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = out[j*32 + i];
butterfly_32_idct_1d(temp_in, temp_out, 1);
for (j = 0; j < 32; ++j)
out2[j*32 + i] = temp_out[j];
}
for (i = 0; i < 32*32; ++i)
output[i] = round(out2[i]/128);
}
vp9_clear_system_state(); // Make it simd safe : __asm emms;
}
#else // CONFIG_DWT32X32HYBRID
#define MAX_BLOCK_LENGTH 64
#define ENH_PRECISION_BITS 1
#define ENH_PRECISION_RND ((1 << ENH_PRECISION_BITS) / 2)
// Note: block length must be even for this implementation
static void synthesis_53_row(int length, short *lowpass, short *highpass,
short *x) {
short r, * a, * b;
int n;
n = length >> 1;
b = highpass;
a = lowpass;
r = *highpass;
while (n--) {
*a++ -= (r + (*b) + 1) >> 1;
r = *b++;
}
n = length >> 1;
b = highpass;
a = lowpass;
while (--n) {
*x++ = ((r = *a++) + 1) >> 1;
*x++ = *b++ + ((r + (*a) + 2) >> 2);
}
*x++ = ((r = *a) + 1)>>1;
*x++ = *b + ((r+1)>>1);
}
static void synthesis_53_col(int length, short *lowpass, short *highpass,
short *x) {
short r, * a, * b;
int n;
n = length >> 1;
b = highpass;
a = lowpass;
r = *highpass;
while (n--) {
*a++ -= (r + (*b) + 1) >> 1;
r = *b++;
}
n = length >> 1;
b = highpass;
a = lowpass;
while (--n) {
*x++ = r = *a++;
*x++ = ((*b++) << 1) + ((r + (*a) + 1) >> 1);
}
*x++ = r = *a;
*x++ = ((*b) << 1) + r;
}
// NOTE: Using a 5/3 integer wavelet for now. Explore using a wavelet
// with a better response later
void dyadic_synthesize(int levels, int width, int height, short *c, int pitch_c,
short *x, int pitch_x) {
int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;
short buffer[2 * MAX_BLOCK_LENGTH];
th[0] = hh;
tw[0] = hw;
for (i = 1; i <= levels; i++) {
th[i] = (th[i - 1] + 1) >> 1;
tw[i] = (tw[i - 1] + 1) >> 1;
}
for (lv = levels - 1; lv >= 0; lv--) {
nh = th[lv];
nw = tw[lv];
hh = th[lv + 1];
hw = tw[lv + 1];
if ((nh < 2) || (nw < 2)) continue;
for (j = 0; j < nw; j++) {
for (i = 0; i < nh; i++)
buffer[i] = c[i * pitch_c + j];
synthesis_53_col(nh, buffer, buffer + hh, buffer + nh);
for (i = 0; i < nh; i++)
c[i * pitch_c + j] = buffer[i + nh];
}
for (i = 0; i < nh; i++) {
memcpy(buffer, &c[i * pitch_c], nw * sizeof(short));
synthesis_53_row(nw, buffer, buffer + hw, &c[i * pitch_c]);
}
}
for (i = 0; i < height; i++)
for (j = 0; j < width; j++)
x[i * pitch_x + j] = (c[i * pitch_c + j] + ENH_PRECISION_RND) >>
ENH_PRECISION_BITS;
}
void vp9_short_idct32x32_c(short *input, short *output, int pitch) {
// assume out is a 32x32 buffer
short buffer[16 * 16];
short buffer2[32 * 32];
const int short_pitch = pitch >> 1;
int i;
// TODO(debargha): Implement more efficiently by adding output pitch
// argument to the idct16x16 function
vp9_short_idct16x16_c(input, buffer, pitch);
for (i = 0; i < 16; ++i) {
vpx_memcpy(buffer2 + i * 32, buffer + i * 16, sizeof(short) * 16);
vpx_memcpy(buffer2 + i * 32 + 16, input + i * short_pitch + 16,
sizeof(short) * 16);
}
for (; i < 32; ++i) {
vpx_memcpy(buffer2 + i * 32, input + i * short_pitch,
sizeof(short) * 32);
}
dyadic_synthesize(1, 32, 32, buffer2, 32, output, 32);
}
#endif // CONFIG_DWT32X32HYBRID
#endif // CONFIG_TX32X32

Просмотреть файл

@ -143,3 +143,16 @@ void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd) {
vp9_inverse_transform_mby_16x16(xd);
vp9_inverse_transform_mbuv_8x8(xd);
}
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb) {
vp9_short_idct32x32(xd_sb->dqcoeff, xd_sb->diff, 64);
}
void vp9_inverse_transform_sbuv_16x16(SUPERBLOCKD *xd_sb) {
vp9_inverse_transform_b_16x16(xd_sb->dqcoeff + 1024,
xd_sb->diff + 1024, 32);
vp9_inverse_transform_b_16x16(xd_sb->dqcoeff + 1280,
xd_sb->diff + 1280, 32);
}
#endif

Просмотреть файл

@ -38,4 +38,9 @@ extern void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd);
extern void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
extern void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb);
extern void vp9_inverse_transform_sbuv_16x16(SUPERBLOCKD *xd_sb);
#endif
#endif // __INC_INVTRANS_H

Просмотреть файл

@ -192,6 +192,9 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) {
/* Point at base of Mb MODE_INFO list */
const MODE_INFO *mode_info_context = cm->mi;
#if CONFIG_SUPERBLOCKS
const int mis = cm->mode_info_stride;
#endif
/* Initialize the loop filter for this frame. */
vp9_loop_filter_frame_init(cm, xd, cm->filter_level);
@ -226,14 +229,18 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) {
if (mb_col > 0
#if CONFIG_SUPERBLOCKS
&& !((mb_col & 1) && mode_info_context->mbmi.encoded_as_sb &&
mode_info_context[0].mbmi.mb_skip_coeff &&
mode_info_context[-1].mbmi.mb_skip_coeff)
((mode_info_context[0].mbmi.mb_skip_coeff &&
mode_info_context[-1].mbmi.mb_skip_coeff)
#if CONFIG_TX32X32
|| mode_info_context[-1].mbmi.txfm_size == TX_32X32
#endif
))
#endif
)
vp9_loop_filter_mbv(y_ptr, u_ptr, v_ptr, post->y_stride,
post->uv_stride, &lfi);
if (!skip_lf && tx_type != TX_16X16) {
if (!skip_lf && tx_type < TX_16X16) {
if (tx_type == TX_8X8)
vp9_loop_filter_bv8x8(y_ptr, u_ptr, v_ptr, post->y_stride,
post->uv_stride, &lfi);
@ -247,14 +254,18 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) {
if (mb_row > 0
#if CONFIG_SUPERBLOCKS
&& !((mb_row & 1) && mode_info_context->mbmi.encoded_as_sb &&
mode_info_context[0].mbmi.mb_skip_coeff &&
mode_info_context[-cm->mode_info_stride].mbmi.mb_skip_coeff)
((mode_info_context[0].mbmi.mb_skip_coeff &&
mode_info_context[-mis].mbmi.mb_skip_coeff)
#if CONFIG_TX32X32
|| mode_info_context[-mis].mbmi.txfm_size == TX_32X32
#endif
))
#endif
)
vp9_loop_filter_mbh(y_ptr, u_ptr, v_ptr, post->y_stride,
post->uv_stride, &lfi);
if (!skip_lf && tx_type != TX_16X16) {
if (!skip_lf && tx_type < TX_16X16) {
if (tx_type == TX_8X8)
vp9_loop_filter_bh8x8(y_ptr, u_ptr, v_ptr, post->y_stride,
post->uv_stride, &lfi);

Просмотреть файл

@ -58,6 +58,9 @@ typedef struct frame_contexts {
vp9_prob hybrid_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
vp9_prob coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
vp9_prob hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
vp9_prob coef_probs_32x32 [BLOCK_TYPES_32X32] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
#endif
nmv_context nmvc;
nmv_context pre_nmvc;
@ -95,6 +98,11 @@ typedef struct frame_contexts {
vp9_prob pre_hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS]
[PREV_COEF_CONTEXTS] [ENTROPY_NODES];
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
vp9_prob pre_coef_probs_32x32 [BLOCK_TYPES_32X32] [COEF_BANDS]
[PREV_COEF_CONTEXTS] [ENTROPY_NODES];
#endif
unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS]
[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
unsigned int hybrid_coef_counts [BLOCK_TYPES] [COEF_BANDS]
@ -110,6 +118,11 @@ typedef struct frame_contexts {
unsigned int hybrid_coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS]
[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
unsigned int coef_counts_32x32 [BLOCK_TYPES_32X32] [COEF_BANDS]
[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
#endif
nmv_context_counts NMVcount;
vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
[VP9_SWITCHABLE_FILTERS - 1];
@ -139,8 +152,11 @@ typedef enum {
ONLY_4X4 = 0,
ALLOW_8X8 = 1,
ALLOW_16X16 = 2,
TX_MODE_SELECT = 3,
NB_TXFM_MODES = 4,
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
ALLOW_32X32 = 3,
#endif
TX_MODE_SELECT = 3 + (CONFIG_TX32X32 && CONFIG_SUPERBLOCKS),
NB_TXFM_MODES = 4 + (CONFIG_TX32X32 && CONFIG_SUPERBLOCKS),
} TXFM_MODE;
typedef struct VP9Common {
@ -268,7 +284,7 @@ typedef struct VP9Common {
vp9_prob prob_comppred[COMP_PRED_CONTEXTS];
// FIXME contextualize
vp9_prob prob_tx[TX_SIZE_MAX - 1];
vp9_prob prob_tx[TX_SIZE_MAX_SB - 1];
vp9_prob mbskip_pred_probs[MBSKIP_CONTEXTS];

Просмотреть файл

@ -168,6 +168,53 @@ void vp9_recon_mbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) {
}
}
}
#if CONFIG_TX32X32
void vp9_recon_sby_s_c(MACROBLOCKD *xd, uint8_t *dst) {
int x, y, stride = xd->block[0].dst_stride;
short *diff = xd->sb_coeff_data.diff;
for (y = 0; y < 32; y++) {
for (x = 0; x < 32; x++) {
int a = dst[x] + diff[x];
if (a < 0)
a = 0;
else if (a > 255)
a = 255;
dst[x] = a;
}
dst += stride;
diff += 32;
}
}
void vp9_recon_sbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) {
int x, y, stride = xd->block[16].dst_stride;
short *udiff = xd->sb_coeff_data.diff + 1024;
short *vdiff = xd->sb_coeff_data.diff + 1280;
for (y = 0; y < 16; y++) {
for (x = 0; x < 16; x++) {
int u = udst[x] + udiff[x];
int v = vdst[x] + vdiff[x];
if (u < 0)
u = 0;
else if (u > 255)
u = 255;
if (v < 0)
v = 0;
else if (v > 255)
v = 255;
udst[x] = u;
vdst[x] = v;
}
udst += stride;
vdst += stride;
udiff += 16;
vdiff += 16;
}
}
#endif
#endif
void vp9_recon_mby_c(MACROBLOCKD *xd) {

Просмотреть файл

@ -361,6 +361,9 @@ specialize vp9_short_idct16x16
prototype void vp9_short_idct10_16x16 "short *input, short *output, int pitch"
specialize vp9_short_idct10_16x16
prototype void vp9_short_idct32x32 "short *input, short *output, int pitch"
specialize vp9_short_idct32x32
prototype void vp9_ihtllm "const short *input, short *output, int pitch, int tx_type, int tx_dim"
specialize vp9_ihtllm
@ -640,6 +643,9 @@ specialize vp9_short_fdct8x4
prototype void vp9_short_walsh4x4 "short *InputData, short *OutputData, int pitch"
specialize vp9_short_walsh4x4
prototype void vp9_short_fdct32x32 "short *InputData, short *OutputData, int pitch"
specialize vp9_short_fdct32x32
prototype void vp9_short_fdct16x16 "short *InputData, short *OutputData, int pitch"
specialize vp9_short_fdct16x16

Просмотреть файл

@ -14,7 +14,7 @@
static const int segfeaturedata_signed[SEG_LVL_MAX] = { 1, 1, 0, 0, 0, 0 };
static const int seg_feature_data_max[SEG_LVL_MAX] =
{ MAXQ, 63, 0xf, MB_MODE_COUNT - 1, 255, TX_SIZE_MAX - 1};
{ MAXQ, 63, 0xf, MB_MODE_COUNT - 1, 255, TX_SIZE_MAX_SB - 1};
// These functions provide access to new segment level features.
// Eventually these function may be "optimized out" but for the moment,

Просмотреть файл

@ -209,8 +209,17 @@ static void kfread_modes(VP9D_COMP *pbi,
m->mbmi.mode <= I8X8_PRED) {
// FIXME(rbultje) code ternary symbol once all experiments are merged
m->mbmi.txfm_size = vp9_read(bc, cm->prob_tx[0]);
if (m->mbmi.txfm_size != TX_4X4 && m->mbmi.mode != I8X8_PRED)
if (m->mbmi.txfm_size != TX_4X4 && m->mbmi.mode != I8X8_PRED) {
m->mbmi.txfm_size += vp9_read(bc, cm->prob_tx[1]);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
if (m->mbmi.txfm_size != TX_8X8 && m->mbmi.encoded_as_sb)
m->mbmi.txfm_size += vp9_read(bc, cm->prob_tx[2]);
#endif
}
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
} else if (cm->txfm_mode >= ALLOW_32X32 && m->mbmi.encoded_as_sb) {
m->mbmi.txfm_size = TX_32X32;
#endif
} else if (cm->txfm_mode >= ALLOW_16X16 && m->mbmi.mode <= TM_PRED) {
m->mbmi.txfm_size = TX_16X16;
} else if (cm->txfm_mode >= ALLOW_8X8 && m->mbmi.mode != B_PRED) {
@ -1219,8 +1228,17 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
// FIXME(rbultje) code ternary symbol once all experiments are merged
mbmi->txfm_size = vp9_read(bc, cm->prob_tx[0]);
if (mbmi->txfm_size != TX_4X4 && mbmi->mode != I8X8_PRED &&
mbmi->mode != SPLITMV)
mbmi->mode != SPLITMV) {
mbmi->txfm_size += vp9_read(bc, cm->prob_tx[1]);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
if (mbmi->encoded_as_sb && mbmi->txfm_size != TX_8X8)
mbmi->txfm_size += vp9_read(bc, cm->prob_tx[2]);
#endif
}
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
} else if (mbmi->encoded_as_sb && cm->txfm_mode >= ALLOW_32X32) {
mbmi->txfm_size = TX_32X32;
#endif
} else if (cm->txfm_mode >= ALLOW_16X16 &&
((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= TM_PRED) ||
(mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))) {

Просмотреть файл

@ -693,6 +693,7 @@ static void decode_superblock(VP9D_COMP *pbi, MACROBLOCKD *xd,
TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
VP9_COMMON *const pc = &pbi->common;
MODE_INFO *orig_mi = xd->mode_info_context;
const int mis = pc->mode_info_stride;
assert(xd->mode_info_context->mbmi.encoded_as_sb);
@ -733,6 +734,30 @@ static void decode_superblock(VP9D_COMP *pbi, MACROBLOCKD *xd,
}
/* dequantization and idct */
#if CONFIG_TX32X32
if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) {
eobtotal = vp9_decode_sb_tokens(pbi, xd, bc);
if (eobtotal == 0) { // skip loopfilter
xd->mode_info_context->mbmi.mb_skip_coeff = 1;
if (mb_col + 1 < pc->mb_cols)
xd->mode_info_context[1].mbmi.mb_skip_coeff = 1;
if (mb_row + 1 < pc->mb_rows) {
xd->mode_info_context[mis].mbmi.mb_skip_coeff = 1;
if (mb_col + 1 < pc->mb_cols)
xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = 1;
}
} else {
vp9_dequant_idct_add_32x32(xd->sb_coeff_data.qcoeff, xd->block[0].dequant,
xd->dst.y_buffer, xd->dst.y_buffer,
xd->dst.y_stride, xd->dst.y_stride,
xd->eobs[0]);
vp9_dequant_idct_add_uv_block_16x16_c(xd->sb_coeff_data.qcoeff + 1024,
xd->block[16].dequant,
xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->eobs + 16);
}
} else {
#endif
for (n = 0; n < 4; n++) {
int x_idx = n & 1, y_idx = n >> 1;
@ -742,7 +767,7 @@ static void decode_superblock(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->above_context = pc->above_context + mb_col + x_idx;
xd->left_context = pc->left_context + y_idx;
xd->mode_info_context = orig_mi + x_idx + y_idx * pc->mode_info_stride;
xd->mode_info_context = orig_mi + x_idx + y_idx * mis;
for (i = 0; i < 25; i++) {
xd->block[i].eob = 0;
xd->eobs[i] = 0;
@ -766,6 +791,9 @@ static void decode_superblock(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->above_context = pc->above_context + mb_col;
xd->left_context = pc->left_context;
xd->mode_info_context = orig_mi;
#if CONFIG_TX32X32
}
#endif
}
#endif
@ -1244,6 +1272,11 @@ static void read_coef_probs(VP9D_COMP *pbi, BOOL_DECODER* const bc) {
read_coef_probs_common(bc, pc->fc.coef_probs_16x16);
read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_16x16);
}
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
if (pbi->common.txfm_mode > ALLOW_16X16) {
read_coef_probs_common(bc, pc->fc.coef_probs_32x32);
}
#endif
}
int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) {
@ -1433,9 +1466,16 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) {
/* Read the loop filter level and type */
pc->txfm_mode = vp9_read_literal(&header_bc, 2);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
if (pc->txfm_mode == 3)
pc->txfm_mode += vp9_read_bit(&header_bc);
#endif
if (pc->txfm_mode == TX_MODE_SELECT) {
pc->prob_tx[0] = vp9_read_literal(&header_bc, 8);
pc->prob_tx[1] = vp9_read_literal(&header_bc, 8);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
pc->prob_tx[2] = vp9_read_literal(&header_bc, 8);
#endif
}
pc->filter_type = (LOOPFILTERTYPE) vp9_read_bit(&header_bc);
@ -1591,6 +1631,10 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) {
pbi->common.fc.coef_probs_16x16);
vp9_copy(pbi->common.fc.pre_hybrid_coef_probs_16x16,
pbi->common.fc.hybrid_coef_probs_16x16);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
vp9_copy(pbi->common.fc.pre_coef_probs_32x32,
pbi->common.fc.coef_probs_32x32);
#endif
vp9_copy(pbi->common.fc.pre_ymode_prob, pbi->common.fc.ymode_prob);
#if CONFIG_SUPERBLOCKS
vp9_copy(pbi->common.fc.pre_sb_ymode_prob, pbi->common.fc.sb_ymode_prob);
@ -1610,6 +1654,9 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) {
vp9_zero(pbi->common.fc.hybrid_coef_counts_8x8);
vp9_zero(pbi->common.fc.coef_counts_16x16);
vp9_zero(pbi->common.fc.hybrid_coef_counts_16x16);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
vp9_zero(pbi->common.fc.coef_counts_32x32);
#endif
vp9_zero(pbi->common.fc.ymode_counts);
#if CONFIG_SUPERBLOCKS
vp9_zero(pbi->common.fc.sb_ymode_counts);

Просмотреть файл

@ -352,3 +352,30 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq,
add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16);
}
}
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
void vp9_dequant_idct_add_32x32(int16_t *input, const int16_t *dq,
uint8_t *pred, uint8_t *dest, int pitch,
int stride, uint16_t eobs) {
short output[1024];
int i;
input[0]= input[0] * dq[0] / 2;
for (i = 1; i < 1024; i++)
input[i] = input[i] * dq[1] / 2;
vp9_short_idct32x32_c(input, output, 64);
vpx_memset(input, 0, 2048);
add_residual(output, pred, pitch, dest, stride, 32, 32);
}
void vp9_dequant_idct_add_uv_block_16x16_c(short *q, const short *dq,
unsigned char *dstu,
unsigned char *dstv,
int stride,
unsigned short *eobs) {
vp9_dequant_idct_add_16x16_c(q, dq, dstu, dstu, stride, stride, eobs[0]);
vp9_dequant_idct_add_16x16_c(q + 256, dq,
dstv, dstv, stride, stride, eobs[4]);
}
#endif

Просмотреть файл

@ -55,8 +55,9 @@
#define CAT5_PROB3 157
#define CAT5_PROB4 180
static const unsigned char cat6_prob[14] =
{ 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 };
static const unsigned char cat6_prob[15] = {
254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0
};
void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) {
/* Clear entropy contexts */
@ -161,6 +162,12 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd,
coef_counts = fc->hybrid_coef_counts_16x16[type];
}
break;
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
case TX_32X32:
coef_probs = fc->coef_probs_32x32[type];
coef_counts = fc->coef_counts_32x32[type];
break;
#endif
}
VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l);
@ -256,6 +263,54 @@ static int get_eob(MACROBLOCKD* const xd, int segment_id, int eob_max) {
return eob;
}
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
int vp9_decode_sb_tokens(VP9D_COMP* const pbi,
MACROBLOCKD* const xd,
BOOL_DECODER* const bc) {
ENTROPY_CONTEXT* const A = (ENTROPY_CONTEXT *)xd->above_context;
ENTROPY_CONTEXT* const L = (ENTROPY_CONTEXT *)xd->left_context;
unsigned short* const eobs = xd->eobs;
const int segment_id = xd->mode_info_context->mbmi.segment_id;
int c, i, eobtotal = 0, seg_eob;
// Luma block
eobs[0] = c = decode_coefs(pbi, xd, bc, A, L, PLANE_TYPE_Y_WITH_DC,
DCT_DCT, get_eob(xd, segment_id, 1024),
xd->sb_coeff_data.qcoeff,
vp9_default_zig_zag1d_32x32,
TX_32X32, vp9_coef_bands_32x32);
A[1] = A[2] = A[3] = A[0];
L[1] = L[2] = L[3] = L[0];
eobtotal += c;
// 16x16 chroma blocks
seg_eob = get_eob(xd, segment_id, 256);
for (i = 16; i < 24; i += 4) {
ENTROPY_CONTEXT* const a = A + vp9_block2above_8x8[i];
ENTROPY_CONTEXT* const l = L + vp9_block2left_8x8[i];
eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_UV,
DCT_DCT, seg_eob,
xd->sb_coeff_data.qcoeff + 1024 + (i - 16) * 64,
vp9_default_zig_zag1d_16x16,
TX_16X16, vp9_coef_bands_16x16);
a[1] = a[0];
l[1] = l[0];
eobtotal += c;
}
// no Y2 block
vpx_memset(&A[8], 0, sizeof(A[8]));
vpx_memset(&L[8], 0, sizeof(L[8]));
vpx_memcpy(xd->above_context + 1, xd->above_context,
sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(xd->left_context + 1, xd->left_context,
sizeof(ENTROPY_CONTEXT_PLANES));
return eobtotal;
}
#endif
static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi,
MACROBLOCKD* const xd,

Просмотреть файл

@ -23,6 +23,12 @@ int vp9_decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd,
int vp9_decode_mb_tokens(VP9D_COMP* const, MACROBLOCKD* const,
BOOL_DECODER* const);
#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32
int vp9_decode_sb_tokens(VP9D_COMP* const pbi,
MACROBLOCKD* const xd,
BOOL_DECODER* const bc);
#endif
int vp9_decode_mb_tokens_4x4_uv(VP9D_COMP* const dx, MACROBLOCKD* const xd,
BOOL_DECODER* const bc);

Просмотреть файл

@ -1200,8 +1200,13 @@ static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) {
TX_SIZE sz = mi->txfm_size;
// FIXME(rbultje) code ternary symbol once all experiments are merged
vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]);
if (sz != TX_4X4 && mode != I8X8_PRED && mode != SPLITMV)
if (sz != TX_4X4 && mode != I8X8_PRED && mode != SPLITMV) {
vp9_write(bc, sz != TX_8X8, pc->prob_tx[1]);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
if (mi->encoded_as_sb && sz != TX_8X8)
vp9_write(bc, sz != TX_16X16, pc->prob_tx[2]);
#endif
}
}
#ifdef ENTROPY_STATS
@ -1337,8 +1342,13 @@ static void write_mb_modes_kf(const VP9_COMMON *c,
TX_SIZE sz = m->mbmi.txfm_size;
// FIXME(rbultje) code ternary symbol once all experiments are merged
vp9_write(bc, sz != TX_4X4, c->prob_tx[0]);
if (sz != TX_4X4 && ym <= TM_PRED)
if (sz != TX_4X4 && ym <= TM_PRED) {
vp9_write(bc, sz != TX_8X8, c->prob_tx[1]);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
if (m->mbmi.encoded_as_sb && sz != TX_8X8)
vp9_write(bc, sz != TX_16X16, c->prob_tx[2]);
#endif
}
}
}
@ -1551,25 +1561,50 @@ static void build_coeff_contexts(VP9_COMP *cpi) {
}
}
}
}
for (i = 0; i < BLOCK_TYPES_16X16; ++i) {
for (j = 0; j < COEF_BANDS; ++j) {
for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
continue;
vp9_tree_probs_from_distribution(
MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree,
cpi->frame_hybrid_coef_probs_16x16[i][j][k],
cpi->frame_hybrid_branch_ct_16x16[i][j][k],
cpi->hybrid_coef_counts_16x16[i][j][k], 256, 1);
for (i = 0; i < BLOCK_TYPES_16X16; ++i) {
for (j = 0; j < COEF_BANDS; ++j) {
for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
continue;
vp9_tree_probs_from_distribution(
MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree,
cpi->frame_hybrid_coef_probs_16x16[i][j][k],
cpi->frame_hybrid_branch_ct_16x16[i][j][k],
cpi->hybrid_coef_counts_16x16[i][j][k], 256, 1);
#ifdef ENTROPY_STATS
if (!cpi->dummy_packing)
for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
hybrid_context_counters_16x16[i][j][k][t] += cpi->hybrid_coef_counts_16x16[i][j][k][t];
if (!cpi->dummy_packing)
for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
hybrid_context_counters_16x16[i][j][k][t] +=
cpi->hybrid_coef_counts_16x16[i][j][k][t];
#endif
}
}
}
}
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
if (cpi->common.txfm_mode > ALLOW_16X16) {
for (i = 0; i < BLOCK_TYPES_32X32; ++i) {
for (j = 0; j < COEF_BANDS; ++j) {
for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
continue;
vp9_tree_probs_from_distribution(
MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree,
cpi->frame_coef_probs_32x32[i][j][k],
cpi->frame_branch_ct_32x32[i][j][k],
cpi->coef_counts_32x32[i][j][k], 256, 1);
#ifdef ENTROPY_STATS
if (!cpi->dummy_packing)
for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
context_counters_32x32[i][j][k][t] +=
cpi->coef_counts_32x32[i][j][k][t];
#endif
}
}
}
}
#endif
}
static void update_coef_probs_common(
@ -1714,6 +1749,15 @@ static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) {
cpi->common.fc.hybrid_coef_probs_16x16,
cpi->frame_hybrid_branch_ct_16x16);
}
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
if (cpi->common.txfm_mode > ALLOW_16X16) {
update_coef_probs_common(bc,
cpi->frame_coef_probs_32x32,
cpi->common.fc.coef_probs_32x32,
cpi->frame_branch_ct_32x32);
}
#endif
}
#ifdef PACKET_TESTING
@ -1955,18 +1999,53 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
{
if (pc->txfm_mode == TX_MODE_SELECT) {
pc->prob_tx[0] = get_prob(cpi->txfm_count[0] + cpi->txfm_count_8x8p[0],
cpi->txfm_count[0] + cpi->txfm_count[1] + cpi->txfm_count[2] +
cpi->txfm_count_8x8p[0] + cpi->txfm_count_8x8p[1]);
pc->prob_tx[1] = get_prob(cpi->txfm_count[1], cpi->txfm_count[1] + cpi->txfm_count[2]);
pc->prob_tx[0] = get_prob(cpi->txfm_count_32x32p[TX_4X4] +
cpi->txfm_count_16x16p[TX_4X4] +
cpi->txfm_count_8x8p[TX_4X4],
cpi->txfm_count_32x32p[TX_4X4] +
cpi->txfm_count_32x32p[TX_8X8] +
cpi->txfm_count_32x32p[TX_16X16] +
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
cpi->txfm_count_32x32p[TX_32X32] +
#endif
cpi->txfm_count_16x16p[TX_4X4] +
cpi->txfm_count_16x16p[TX_8X8] +
cpi->txfm_count_16x16p[TX_16X16] +
cpi->txfm_count_8x8p[TX_4X4] +
cpi->txfm_count_8x8p[TX_8X8]);
pc->prob_tx[1] = get_prob(cpi->txfm_count_32x32p[TX_8X8] +
cpi->txfm_count_16x16p[TX_8X8],
cpi->txfm_count_32x32p[TX_8X8] +
cpi->txfm_count_32x32p[TX_16X16] +
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
cpi->txfm_count_32x32p[TX_32X32] +
#endif
cpi->txfm_count_16x16p[TX_8X8] +
cpi->txfm_count_16x16p[TX_16X16]);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
pc->prob_tx[2] = get_prob(cpi->txfm_count_32x32p[TX_16X16],
cpi->txfm_count_32x32p[TX_16X16] +
cpi->txfm_count_32x32p[TX_32X32]);
#endif
} else {
pc->prob_tx[0] = 128;
pc->prob_tx[1] = 128;
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
pc->prob_tx[2] = 128;
#endif
}
vp9_write_literal(&header_bc, pc->txfm_mode, 2);
vp9_write_literal(&header_bc, pc->txfm_mode <= 3 ? pc->txfm_mode : 3, 2);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
if (pc->txfm_mode > ALLOW_16X16) {
vp9_write_bit(&header_bc, pc->txfm_mode == TX_MODE_SELECT);
}
#endif
if (pc->txfm_mode == TX_MODE_SELECT) {
vp9_write_literal(&header_bc, pc->prob_tx[0], 8);
vp9_write_literal(&header_bc, pc->prob_tx[1], 8);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
vp9_write_literal(&header_bc, pc->prob_tx[2], 8);
#endif
}
}
@ -2150,6 +2229,10 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_8x8, cpi->common.fc.hybrid_coef_probs_8x8);
vp9_copy(cpi->common.fc.pre_coef_probs_16x16, cpi->common.fc.coef_probs_16x16);
vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_16x16, cpi->common.fc.hybrid_coef_probs_16x16);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
vp9_copy(cpi->common.fc.pre_coef_probs_32x32,
cpi->common.fc.coef_probs_32x32);
#endif
#if CONFIG_SUPERBLOCKS
vp9_copy(cpi->common.fc.pre_sb_ymode_prob, cpi->common.fc.sb_ymode_prob);
#endif

Просмотреть файл

@ -36,9 +36,15 @@ typedef struct block {
short *zbin;
short *zbin_8x8;
short *zbin_16x16;
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
short *zbin_32x32;
#endif
short *zrun_zbin_boost;
short *zrun_zbin_boost_8x8;
short *zrun_zbin_boost_16x16;
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
short *zrun_zbin_boost_32x32;
#endif
short *round;
// Zbin Over Quant value
@ -52,6 +58,9 @@ typedef struct block {
int eob_max_offset;
int eob_max_offset_8x8;
int eob_max_offset_16x16;
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
int eob_max_offset_32x32;
#endif
} BLOCK;
typedef struct {
@ -83,6 +92,13 @@ typedef struct {
int64_t txfm_rd_diff[NB_TXFM_MODES];
} PICK_MODE_CONTEXT;
#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32
typedef struct superblock {
DECLARE_ALIGNED(16, short, src_diff[32*32+16*16*2]);
DECLARE_ALIGNED(16, short, coeff[32*32+16*16*2]);
} SUPERBLOCK;
#endif
typedef struct macroblock {
DECLARE_ALIGNED(16, short, src_diff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
DECLARE_ALIGNED(16, short, coeff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
@ -95,6 +111,10 @@ typedef struct macroblock {
// 1 DC 2nd order block each with 16 entries
BLOCK block[25];
#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32
SUPERBLOCK sb_coeff_data;
#endif
YV12_BUFFER_CONFIG src;
MACROBLOCKD e_mbd;
@ -153,9 +173,9 @@ typedef struct macroblock {
unsigned char *active_ptr;
unsigned int token_costs[TX_SIZE_MAX][BLOCK_TYPES][COEF_BANDS]
unsigned int token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES][COEF_BANDS]
[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
unsigned int hybrid_token_costs[TX_SIZE_MAX][BLOCK_TYPES][COEF_BANDS]
unsigned int hybrid_token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES][COEF_BANDS]
[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
int optimize;

Просмотреть файл

@ -1330,3 +1330,461 @@ void vp9_short_fdct16x16_c(int16_t *input, int16_t *out, int pitch) {
#undef RIGHT_SHIFT
#undef ROUNDING
#endif
#if CONFIG_TX32X32
#if !CONFIG_DWT32X32HYBRID
static void dct32_1d(double *input, double *output, int stride) {
static const double C1 = 0.998795456205; // cos(pi * 1 / 64)
static const double C2 = 0.995184726672; // cos(pi * 2 / 64)
static const double C3 = 0.989176509965; // cos(pi * 3 / 64)
static const double C4 = 0.980785280403; // cos(pi * 4 / 64)
static const double C5 = 0.970031253195; // cos(pi * 5 / 64)
static const double C6 = 0.956940335732; // cos(pi * 6 / 64)
static const double C7 = 0.941544065183; // cos(pi * 7 / 64)
static const double C8 = 0.923879532511; // cos(pi * 8 / 64)
static const double C9 = 0.903989293123; // cos(pi * 9 / 64)
static const double C10 = 0.881921264348; // cos(pi * 10 / 64)
static const double C11 = 0.857728610000; // cos(pi * 11 / 64)
static const double C12 = 0.831469612303; // cos(pi * 12 / 64)
static const double C13 = 0.803207531481; // cos(pi * 13 / 64)
static const double C14 = 0.773010453363; // cos(pi * 14 / 64)
static const double C15 = 0.740951125355; // cos(pi * 15 / 64)
static const double C16 = 0.707106781187; // cos(pi * 16 / 64)
static const double C17 = 0.671558954847; // cos(pi * 17 / 64)
static const double C18 = 0.634393284164; // cos(pi * 18 / 64)
static const double C19 = 0.595699304492; // cos(pi * 19 / 64)
static const double C20 = 0.555570233020; // cos(pi * 20 / 64)
static const double C21 = 0.514102744193; // cos(pi * 21 / 64)
static const double C22 = 0.471396736826; // cos(pi * 22 / 64)
static const double C23 = 0.427555093430; // cos(pi * 23 / 64)
static const double C24 = 0.382683432365; // cos(pi * 24 / 64)
static const double C25 = 0.336889853392; // cos(pi * 25 / 64)
static const double C26 = 0.290284677254; // cos(pi * 26 / 64)
static const double C27 = 0.242980179903; // cos(pi * 27 / 64)
static const double C28 = 0.195090322016; // cos(pi * 28 / 64)
static const double C29 = 0.146730474455; // cos(pi * 29 / 64)
static const double C30 = 0.098017140330; // cos(pi * 30 / 64)
static const double C31 = 0.049067674327; // cos(pi * 31 / 64)
double step[32];
// Stage 1
step[0] = input[stride*0] + input[stride*(32 - 1)];
step[1] = input[stride*1] + input[stride*(32 - 2)];
step[2] = input[stride*2] + input[stride*(32 - 3)];
step[3] = input[stride*3] + input[stride*(32 - 4)];
step[4] = input[stride*4] + input[stride*(32 - 5)];
step[5] = input[stride*5] + input[stride*(32 - 6)];
step[6] = input[stride*6] + input[stride*(32 - 7)];
step[7] = input[stride*7] + input[stride*(32 - 8)];
step[8] = input[stride*8] + input[stride*(32 - 9)];
step[9] = input[stride*9] + input[stride*(32 - 10)];
step[10] = input[stride*10] + input[stride*(32 - 11)];
step[11] = input[stride*11] + input[stride*(32 - 12)];
step[12] = input[stride*12] + input[stride*(32 - 13)];
step[13] = input[stride*13] + input[stride*(32 - 14)];
step[14] = input[stride*14] + input[stride*(32 - 15)];
step[15] = input[stride*15] + input[stride*(32 - 16)];
step[16] = -input[stride*16] + input[stride*(32 - 17)];
step[17] = -input[stride*17] + input[stride*(32 - 18)];
step[18] = -input[stride*18] + input[stride*(32 - 19)];
step[19] = -input[stride*19] + input[stride*(32 - 20)];
step[20] = -input[stride*20] + input[stride*(32 - 21)];
step[21] = -input[stride*21] + input[stride*(32 - 22)];
step[22] = -input[stride*22] + input[stride*(32 - 23)];
step[23] = -input[stride*23] + input[stride*(32 - 24)];
step[24] = -input[stride*24] + input[stride*(32 - 25)];
step[25] = -input[stride*25] + input[stride*(32 - 26)];
step[26] = -input[stride*26] + input[stride*(32 - 27)];
step[27] = -input[stride*27] + input[stride*(32 - 28)];
step[28] = -input[stride*28] + input[stride*(32 - 29)];
step[29] = -input[stride*29] + input[stride*(32 - 30)];
step[30] = -input[stride*30] + input[stride*(32 - 31)];
step[31] = -input[stride*31] + input[stride*(32 - 32)];
// Stage 2
output[stride*0] = step[0] + step[16 - 1];
output[stride*1] = step[1] + step[16 - 2];
output[stride*2] = step[2] + step[16 - 3];
output[stride*3] = step[3] + step[16 - 4];
output[stride*4] = step[4] + step[16 - 5];
output[stride*5] = step[5] + step[16 - 6];
output[stride*6] = step[6] + step[16 - 7];
output[stride*7] = step[7] + step[16 - 8];
output[stride*8] = -step[8] + step[16 - 9];
output[stride*9] = -step[9] + step[16 - 10];
output[stride*10] = -step[10] + step[16 - 11];
output[stride*11] = -step[11] + step[16 - 12];
output[stride*12] = -step[12] + step[16 - 13];
output[stride*13] = -step[13] + step[16 - 14];
output[stride*14] = -step[14] + step[16 - 15];
output[stride*15] = -step[15] + step[16 - 16];
output[stride*16] = step[16];
output[stride*17] = step[17];
output[stride*18] = step[18];
output[stride*19] = step[19];
output[stride*20] = (-step[20] + step[27])*C16;
output[stride*21] = (-step[21] + step[26])*C16;
output[stride*22] = (-step[22] + step[25])*C16;
output[stride*23] = (-step[23] + step[24])*C16;
output[stride*24] = (step[24] + step[23])*C16;
output[stride*25] = (step[25] + step[22])*C16;
output[stride*26] = (step[26] + step[21])*C16;
output[stride*27] = (step[27] + step[20])*C16;
output[stride*28] = step[28];
output[stride*29] = step[29];
output[stride*30] = step[30];
output[stride*31] = step[31];
// Stage 3
step[0] = output[stride*0] + output[stride*(8 - 1)];
step[1] = output[stride*1] + output[stride*(8 - 2)];
step[2] = output[stride*2] + output[stride*(8 - 3)];
step[3] = output[stride*3] + output[stride*(8 - 4)];
step[4] = -output[stride*4] + output[stride*(8 - 5)];
step[5] = -output[stride*5] + output[stride*(8 - 6)];
step[6] = -output[stride*6] + output[stride*(8 - 7)];
step[7] = -output[stride*7] + output[stride*(8 - 8)];
step[8] = output[stride*8];
step[9] = output[stride*9];
step[10] = (-output[stride*10] + output[stride*13])*C16;
step[11] = (-output[stride*11] + output[stride*12])*C16;
step[12] = (output[stride*12] + output[stride*11])*C16;
step[13] = (output[stride*13] + output[stride*10])*C16;
step[14] = output[stride*14];
step[15] = output[stride*15];
step[16] = output[stride*16] + output[stride*23];
step[17] = output[stride*17] + output[stride*22];
step[18] = output[stride*18] + output[stride*21];
step[19] = output[stride*19] + output[stride*20];
step[20] = -output[stride*20] + output[stride*19];
step[21] = -output[stride*21] + output[stride*18];
step[22] = -output[stride*22] + output[stride*17];
step[23] = -output[stride*23] + output[stride*16];
step[24] = -output[stride*24] + output[stride*31];
step[25] = -output[stride*25] + output[stride*30];
step[26] = -output[stride*26] + output[stride*29];
step[27] = -output[stride*27] + output[stride*28];
step[28] = output[stride*28] + output[stride*27];
step[29] = output[stride*29] + output[stride*26];
step[30] = output[stride*30] + output[stride*25];
step[31] = output[stride*31] + output[stride*24];
// Stage 4
output[stride*0] = step[0] + step[3];
output[stride*1] = step[1] + step[2];
output[stride*2] = -step[2] + step[1];
output[stride*3] = -step[3] + step[0];
output[stride*4] = step[4];
output[stride*5] = (-step[5] + step[6])*C16;
output[stride*6] = (step[6] + step[5])*C16;
output[stride*7] = step[7];
output[stride*8] = step[8] + step[11];
output[stride*9] = step[9] + step[10];
output[stride*10] = -step[10] + step[9];
output[stride*11] = -step[11] + step[8];
output[stride*12] = -step[12] + step[15];
output[stride*13] = -step[13] + step[14];
output[stride*14] = step[14] + step[13];
output[stride*15] = step[15] + step[12];
output[stride*16] = step[16];
output[stride*17] = step[17];
output[stride*18] = step[18]*-C8 + step[29]*C24;
output[stride*19] = step[19]*-C8 + step[28]*C24;
output[stride*20] = step[20]*-C24 + step[27]*-C8;
output[stride*21] = step[21]*-C24 + step[26]*-C8;
output[stride*22] = step[22];
output[stride*23] = step[23];
output[stride*24] = step[24];
output[stride*25] = step[25];
output[stride*26] = step[26]*C24 + step[21]*-C8;
output[stride*27] = step[27]*C24 + step[20]*-C8;
output[stride*28] = step[28]*C8 + step[19]*C24;
output[stride*29] = step[29]*C8 + step[18]*C24;
output[stride*30] = step[30];
output[stride*31] = step[31];
// Stage 5
step[0] = (output[stride*0] + output[stride*1]) * C16;
step[1] = (-output[stride*1] + output[stride*0]) * C16;
step[2] = output[stride*2]*C24 + output[stride*3] * C8;
step[3] = output[stride*3]*C24 - output[stride*2] * C8;
step[4] = output[stride*4] + output[stride*5];
step[5] = -output[stride*5] + output[stride*4];
step[6] = -output[stride*6] + output[stride*7];
step[7] = output[stride*7] + output[stride*6];
step[8] = output[stride*8];
step[9] = output[stride*9]*-C8 + output[stride*14]*C24;
step[10] = output[stride*10]*-C24 + output[stride*13]*-C8;
step[11] = output[stride*11];
step[12] = output[stride*12];
step[13] = output[stride*13]*C24 + output[stride*10]*-C8;
step[14] = output[stride*14]*C8 + output[stride*9]*C24;
step[15] = output[stride*15];
step[16] = output[stride*16] + output[stride*19];
step[17] = output[stride*17] + output[stride*18];
step[18] = -output[stride*18] + output[stride*17];
step[19] = -output[stride*19] + output[stride*16];
step[20] = -output[stride*20] + output[stride*23];
step[21] = -output[stride*21] + output[stride*22];
step[22] = output[stride*22] + output[stride*21];
step[23] = output[stride*23] + output[stride*20];
step[24] = output[stride*24] + output[stride*27];
step[25] = output[stride*25] + output[stride*26];
step[26] = -output[stride*26] + output[stride*25];
step[27] = -output[stride*27] + output[stride*24];
step[28] = -output[stride*28] + output[stride*31];
step[29] = -output[stride*29] + output[stride*30];
step[30] = output[stride*30] + output[stride*29];
step[31] = output[stride*31] + output[stride*28];
// Stage 6
output[stride*0] = step[0];
output[stride*1] = step[1];
output[stride*2] = step[2];
output[stride*3] = step[3];
output[stride*4] = step[4]*C28 + step[7]*C4;
output[stride*5] = step[5]*C12 + step[6]*C20;
output[stride*6] = step[6]*C12 + step[5]*-C20;
output[stride*7] = step[7]*C28 + step[4]*-C4;
output[stride*8] = step[8] + step[9];
output[stride*9] = -step[9] + step[8];
output[stride*10] = -step[10] + step[11];
output[stride*11] = step[11] + step[10];
output[stride*12] = step[12] + step[13];
output[stride*13] = -step[13] + step[12];
output[stride*14] = -step[14] + step[15];
output[stride*15] = step[15] + step[14];
output[stride*16] = step[16];
output[stride*17] = step[17]*-C4 + step[30]*C28;
output[stride*18] = step[18]*-C28 + step[29]*-C4;
output[stride*19] = step[19];
output[stride*20] = step[20];
output[stride*21] = step[21]*-C20 + step[26]*C12;
output[stride*22] = step[22]*-C12 + step[25]*-C20;
output[stride*23] = step[23];
output[stride*24] = step[24];
output[stride*25] = step[25]*C12 + step[22]*-C20;
output[stride*26] = step[26]*C20 + step[21]*C12;
output[stride*27] = step[27];
output[stride*28] = step[28];
output[stride*29] = step[29]*C28 + step[18]*-C4;
output[stride*30] = step[30]*C4 + step[17]*C28;
output[stride*31] = step[31];
// Stage 7
step[0] = output[stride*0];
step[1] = output[stride*1];
step[2] = output[stride*2];
step[3] = output[stride*3];
step[4] = output[stride*4];
step[5] = output[stride*5];
step[6] = output[stride*6];
step[7] = output[stride*7];
step[8] = output[stride*8]*C30 + output[stride*15]*C2;
step[9] = output[stride*9]*C14 + output[stride*14]*C18;
step[10] = output[stride*10]*C22 + output[stride*13]*C10;
step[11] = output[stride*11]*C6 + output[stride*12]*C26;
step[12] = output[stride*12]*C6 + output[stride*11]*-C26;
step[13] = output[stride*13]*C22 + output[stride*10]*-C10;
step[14] = output[stride*14]*C14 + output[stride*9]*-C18;
step[15] = output[stride*15]*C30 + output[stride*8]*-C2;
step[16] = output[stride*16] + output[stride*17];
step[17] = -output[stride*17] + output[stride*16];
step[18] = -output[stride*18] + output[stride*19];
step[19] = output[stride*19] + output[stride*18];
step[20] = output[stride*20] + output[stride*21];
step[21] = -output[stride*21] + output[stride*20];
step[22] = -output[stride*22] + output[stride*23];
step[23] = output[stride*23] + output[stride*22];
step[24] = output[stride*24] + output[stride*25];
step[25] = -output[stride*25] + output[stride*24];
step[26] = -output[stride*26] + output[stride*27];
step[27] = output[stride*27] + output[stride*26];
step[28] = output[stride*28] + output[stride*29];
step[29] = -output[stride*29] + output[stride*28];
step[30] = -output[stride*30] + output[stride*31];
step[31] = output[stride*31] + output[stride*30];
// Final stage --- outputs indices are bit-reversed.
output[stride*0] = step[0];
output[stride*16] = step[1];
output[stride*8] = step[2];
output[stride*24] = step[3];
output[stride*4] = step[4];
output[stride*20] = step[5];
output[stride*12] = step[6];
output[stride*28] = step[7];
output[stride*2] = step[8];
output[stride*18] = step[9];
output[stride*10] = step[10];
output[stride*26] = step[11];
output[stride*6] = step[12];
output[stride*22] = step[13];
output[stride*14] = step[14];
output[stride*30] = step[15];
output[stride*1] = step[16]*C31 + step[31]*C1;
output[stride*17] = step[17]*C15 + step[30]*C17;
output[stride*9] = step[18]*C23 + step[29]*C9;
output[stride*25] = step[19]*C7 + step[28]*C25;
output[stride*5] = step[20]*C27 + step[27]*C5;
output[stride*21] = step[21]*C11 + step[26]*C21;
output[stride*13] = step[22]*C19 + step[25]*C13;
output[stride*29] = step[23]*C3 + step[24]*C29;
output[stride*3] = step[24]*C3 + step[23]*-C29;
output[stride*19] = step[25]*C19 + step[22]*-C13;
output[stride*11] = step[26]*C11 + step[21]*-C21;
output[stride*27] = step[27]*C27 + step[20]*-C5;
output[stride*7] = step[28]*C7 + step[19]*-C25;
output[stride*23] = step[29]*C23 + step[18]*-C9;
output[stride*15] = step[30]*C15 + step[17]*-C17;
output[stride*31] = step[31]*C31 + step[16]*-C1;
}
void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) {
vp9_clear_system_state(); // Make it simd safe : __asm emms;
{
int shortpitch = pitch >> 1;
int i, j;
double output[1024];
// First transform columns
for (i = 0; i < 32; i++) {
double temp_in[32], temp_out[32];
for (j = 0; j < 32; j++)
temp_in[j] = input[j*shortpitch + i];
dct32_1d(temp_in, temp_out, 1);
for (j = 0; j < 32; j++)
output[j*32 + i] = temp_out[j];
}
// Then transform rows
for (i = 0; i < 32; ++i) {
double temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = output[j + i*32];
dct32_1d(temp_in, temp_out, 1);
for (j = 0; j < 32; ++j)
output[j + i*32] = temp_out[j];
}
// Scale by some magic number
for (i = 0; i < 1024; i++) {
out[i] = (short)round(output[i]/4);
}
}
vp9_clear_system_state(); // Make it simd safe : __asm emms;
}
#else // CONFIG_DWT32X32HYBRID
#define MAX_BLOCK_LENGTH 64
#define ENH_PRECISION_BITS 1
#define ENH_PRECISION_RND ((1 << ENH_PRECISION_BITS) / 2)
// Note: block length must be even for this implementation
static void analysis_53_row(int length, short *x,
short *lowpass, short *highpass) {
int n;
short r, * a, * b;
n = length >> 1;
b = highpass;
a = lowpass;
while (--n) {
*a++ = (r = *x++) << 1;
*b++ = *x - ((r + x[1] + 1) >> 1);
x++;
}
*a = (r = *x++) << 1;
*b = *x - r;
n = length >> 1;
b = highpass;
a = lowpass;
r = *highpass;
while (n--) {
*a++ += (r + (*b) + 1) >> 1;
r = *b++;
}
}
static void analysis_53_col(int length, short *x,
short *lowpass, short *highpass) {
int n;
short r, * a, * b;
n = length >> 1;
b = highpass;
a = lowpass;
while (--n) {
*a++ = (r = *x++);
*b++ = (((*x) << 1) - (r + x[1]) + 2) >> 2;
x++;
}
*a = (r = *x++);
*b = (*x - r + 1) >> 1;
n = length >> 1;
b = highpass;
a = lowpass;
r = *highpass;
while (n--) {
*a++ += (r + (*b) + 1) >> 1;
r = *b++;
}
}
// NOTE: Using a 5/3 integer wavelet for now. Explore using a wavelet
// with a better response later
static void dyadic_analyze(int levels, int width, int height,
short *x, int pitch_x, short *c, int pitch_c) {
int lv, i, j, nh, nw, hh = height, hw = width;
short buffer[2 * MAX_BLOCK_LENGTH];
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
c[i * pitch_c + j] = x[i * pitch_x + j] << ENH_PRECISION_BITS;
}
}
for (lv = 0; lv < levels; lv++) {
nh = hh;
hh = (hh + 1) >> 1;
nw = hw;
hw = (hw + 1) >> 1;
if ((nh < 2) || (nw < 2)) return;
for (i = 0; i < nh; i++) {
memcpy(buffer, &c[i * pitch_c], nw * sizeof(short));
analysis_53_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw);
}
for (j = 0; j < nw; j++) {
for (i = 0; i < nh; i++)
buffer[i + nh] = c[i * pitch_c + j];
analysis_53_col(nh, buffer + nh, buffer, buffer + hh);
for (i = 0; i < nh; i++)
c[i * pitch_c + j] = buffer[i];
}
}
}
void vp9_short_fdct32x32_c(short *input, short *out, int pitch) {
// assume out is a 32x32 buffer
short buffer[16 * 16];
int i;
const int short_pitch = pitch >> 1;
dyadic_analyze(1, 32, 32, input, short_pitch, out, 32);
// TODO(debargha): Implement more efficiently by adding output pitch
// argument to the dct16x16 function
vp9_short_fdct16x16_c(out, buffer, 64);
for (i = 0; i < 16; ++i)
vpx_memcpy(out + i * 32, buffer + i * 16, sizeof(short) * 16);
}
#endif // CONFIG_DWT32X32HYBRID
#endif // CONFIG_TX32X32

Просмотреть файл

@ -456,6 +456,10 @@ static void update_state(VP9_COMP *cpi, MACROBLOCK *x,
if (xd->mb_to_right_edge >= 0)
vpx_memcpy(xd->mode_info_context + mis + 1, mi, sizeof(MODE_INFO));
}
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
} else {
ctx->txfm_rd_diff[ALLOW_32X32] = ctx->txfm_rd_diff[ALLOW_16X16];
#endif
}
#endif
@ -1487,6 +1491,9 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vp9_zero(cpi->hybrid_coef_counts_8x8);
vp9_zero(cpi->coef_counts_16x16);
vp9_zero(cpi->hybrid_coef_counts_16x16);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
vp9_zero(cpi->coef_counts_32x32);
#endif
vp9_frame_init_quantizer(cpi);
@ -1507,7 +1514,8 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vpx_memset(cpi->rd_comp_pred_diff, 0, sizeof(cpi->rd_comp_pred_diff));
vpx_memset(cpi->single_pred_count, 0, sizeof(cpi->single_pred_count));
vpx_memset(cpi->comp_pred_count, 0, sizeof(cpi->comp_pred_count));
vpx_memset(cpi->txfm_count, 0, sizeof(cpi->txfm_count));
vpx_memset(cpi->txfm_count_32x32p, 0, sizeof(cpi->txfm_count_32x32p));
vpx_memset(cpi->txfm_count_16x16p, 0, sizeof(cpi->txfm_count_16x16p));
vpx_memset(cpi->txfm_count_8x8p, 0, sizeof(cpi->txfm_count_8x8p));
vpx_memset(cpi->rd_tx_select_diff, 0, sizeof(cpi->rd_tx_select_diff));
{
@ -1700,7 +1708,11 @@ void vp9_encode_frame(VP9_COMP *cpi) {
* keyframe's probabilities as an estimate of what the current keyframe's
* coefficient cost distributions may look like. */
if (frame_type == 0) {
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
txfm_type = ALLOW_32X32;
#else
txfm_type = ALLOW_16X16;
#endif
} else
#if 0
/* FIXME (rbultje)
@ -1731,9 +1743,15 @@ void vp9_encode_frame(VP9_COMP *cpi) {
} else
txfm_type = ALLOW_8X8;
#else
txfm_type = cpi->rd_tx_select_threshes[frame_type][ALLOW_16X16] >=
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
txfm_type = cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32] >=
cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ?
ALLOW_32X32 : TX_MODE_SELECT;
#else
txfm_type = cpi->rd_tx_select_threshes[frame_type][ALLOW_16X16] >=
cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ?
ALLOW_16X16 : TX_MODE_SELECT;
#endif
#endif
cpi->common.txfm_mode = txfm_type;
if (txfm_type != TX_MODE_SELECT) {
@ -1753,7 +1771,8 @@ void vp9_encode_frame(VP9_COMP *cpi) {
int64_t pd = cpi->rd_tx_select_diff[i];
int diff;
if (i == TX_MODE_SELECT)
pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv, 2048 * (TX_SIZE_MAX - 1), 0);
pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv,
2048 * (TX_SIZE_MAX_SB - 1), 0);
diff = (int)(pd / cpi->common.MBs);
cpi->rd_tx_select_threshes[frame_type][i] += diff;
cpi->rd_tx_select_threshes[frame_type][i] /= 2;
@ -1776,19 +1795,37 @@ void vp9_encode_frame(VP9_COMP *cpi) {
}
if (cpi->common.txfm_mode == TX_MODE_SELECT) {
const int count4x4 = cpi->txfm_count[TX_4X4] + cpi->txfm_count_8x8p[TX_4X4];
const int count8x8 = cpi->txfm_count[TX_8X8];
const int count4x4 = cpi->txfm_count_16x16p[TX_4X4] +
cpi->txfm_count_32x32p[TX_4X4] +
cpi->txfm_count_8x8p[TX_4X4];
const int count8x8_lp = cpi->txfm_count_32x32p[TX_8X8] +
cpi->txfm_count_16x16p[TX_8X8];
const int count8x8_8x8p = cpi->txfm_count_8x8p[TX_8X8];
const int count16x16 = cpi->txfm_count[TX_16X16];
const int count16x16_16x16p = cpi->txfm_count_16x16p[TX_16X16];
const int count16x16_lp = cpi->txfm_count_32x32p[TX_16X16];
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
const int count32x32 = cpi->txfm_count_32x32p[TX_32X32];
#else
const int count32x32 = 0;
#endif
if (count4x4 == 0 && count16x16 == 0) {
if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
count32x32 == 0) {
cpi->common.txfm_mode = ALLOW_8X8;
reset_skip_txfm_size(cpi, TX_8X8);
} else if (count8x8 == 0 && count16x16 == 0 && count8x8_8x8p == 0) {
} else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 &&
count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) {
cpi->common.txfm_mode = ONLY_4X4;
reset_skip_txfm_size(cpi, TX_4X4);
} else if (count8x8 == 0 && count4x4 == 0) {
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
} else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) {
cpi->common.txfm_mode = ALLOW_32X32;
#endif
} else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) {
cpi->common.txfm_mode = ALLOW_16X16;
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
reset_skip_txfm_size(cpi, TX_16X16);
#endif
}
}
} else {
@ -2087,6 +2124,7 @@ static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x,
vp9_set_pred_flag(xd, PRED_REF, ref_pred_flag);
}
assert(mbmi->txfm_size <= TX_16X16);
if (mbmi->ref_frame == INTRA_FRAME) {
#ifdef ENC_DEBUG
if (enc_debug) {
@ -2266,7 +2304,7 @@ static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x,
vp9_get_segdata(&x->e_mbd, segment_id, SEG_LVL_EOB) == 0))) {
if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED &&
mbmi->mode != SPLITMV) {
cpi->txfm_count[mbmi->txfm_size]++;
cpi->txfm_count_16x16p[mbmi->txfm_size]++;
} else if (mbmi->mode == I8X8_PRED ||
(mbmi->mode == SPLITMV &&
mbmi->partitioning != PARTITIONING_4X4)) {
@ -2308,6 +2346,7 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x,
MODE_INFO *mi = x->e_mbd.mode_info_context;
unsigned int segment_id = mi->mbmi.segment_id;
ENTROPY_CONTEXT_PLANES ta[4], tl[4];
const int mis = cm->mode_info_stride;
x->skip = 0;
@ -2397,6 +2436,53 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x,
xd->dst.y_stride, xd->dst.uv_stride);
}
#if CONFIG_TX32X32
if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) {
vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride,
dst, dst_y_stride);
vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff,
usrc, vsrc, src_uv_stride,
udst, vdst, dst_uv_stride);
vp9_transform_sby_32x32(x);
vp9_transform_sbuv_16x16(x);
vp9_quantize_sby_32x32(x);
vp9_quantize_sbuv_16x16(x);
// TODO(rbultje): trellis optimize
vp9_inverse_transform_sbuv_16x16(&x->e_mbd.sb_coeff_data);
vp9_inverse_transform_sby_32x32(&x->e_mbd.sb_coeff_data);
vp9_recon_sby_s_c(&x->e_mbd, dst);
vp9_recon_sbuv_s_c(&x->e_mbd, udst, vdst);
if (!x->skip) {
vp9_tokenize_sb(cpi, &x->e_mbd, t, 0);
} else {
int mb_skip_context =
cpi->common.mb_no_coeff_skip ?
(mi - 1)->mbmi.mb_skip_coeff +
(mi - mis)->mbmi.mb_skip_coeff :
0;
mi->mbmi.mb_skip_coeff = 1;
if (cm->mb_no_coeff_skip) {
cpi->skip_true_count[mb_skip_context]++;
vp9_fix_contexts_sb(xd);
} else {
vp9_stuff_sb(cpi, xd, t, 0);
cpi->skip_false_count[mb_skip_context]++;
}
}
// copy skip flag on all mb_mode_info contexts in this SB
// if this was a skip at this txfm size
if (mb_col < cm->mb_cols - 1)
mi[1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
if (mb_row < cm->mb_rows - 1) {
mi[mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
if (mb_col < cm->mb_cols - 1)
mi[mis + 1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
}
skip[0] = skip[2] = skip[1] = skip[3] = mi->mbmi.mb_skip_coeff;
} else {
#endif
for (n = 0; n < 4; n++) {
int x_idx = n & 1, y_idx = n >> 1;
@ -2405,7 +2491,7 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x,
memcpy(&ta[n], xd->above_context, sizeof(ta[n]));
memcpy(&tl[n], xd->left_context, sizeof(tl[n]));
tp[n] = *t;
xd->mode_info_context = mi + x_idx + y_idx * cm->mode_info_stride;
xd->mode_info_context = mi + x_idx + y_idx * mis;
vp9_subtract_mby_s_c(x->src_diff,
src + x_idx * 16 + y_idx * 16 * src_y_stride,
@ -2433,7 +2519,7 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x,
int mb_skip_context =
cpi->common.mb_no_coeff_skip ?
(x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff +
(x->e_mbd.mode_info_context - cpi->common.mode_info_stride)->mbmi.mb_skip_coeff :
(x->e_mbd.mode_info_context - mis)->mbmi.mb_skip_coeff :
0;
xd->mode_info_context->mbmi.mb_skip_coeff = skip[n] = 1;
if (cpi->common.mb_no_coeff_skip) {
@ -2450,20 +2536,29 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x,
xd->mode_info_context = mi;
update_sb_skip_coeff_state(cpi, x, ta, tl, tp, t, skip);
#if CONFIG_TX32X32
}
#endif
if (cm->txfm_mode == TX_MODE_SELECT &&
!((cm->mb_no_coeff_skip && skip[0] && skip[1] && skip[2] && skip[3]) ||
(vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) {
cpi->txfm_count[mi->mbmi.txfm_size]++;
cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++;
} else {
TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_16X16 : cm->txfm_mode;
TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ?
#if CONFIG_TX32X32
TX_32X32 :
#else
TX_16X16 :
#endif
cm->txfm_mode;
mi->mbmi.txfm_size = sz;
if (mb_col < cm->mb_cols - 1)
mi[1].mbmi.txfm_size = sz;
if (mb_row < cm->mb_rows - 1) {
mi[cm->mode_info_stride].mbmi.txfm_size = sz;
mi[mis].mbmi.txfm_size = sz;
if (mb_col < cm->mb_cols - 1)
mi[cm->mode_info_stride + 1].mbmi.txfm_size = sz;
mi[mis + 1].mbmi.txfm_size = sz;
}
}
}

Просмотреть файл

@ -108,6 +108,52 @@ void vp9_subtract_mby_s_c(short *diff, const unsigned char *src, int src_stride,
}
}
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
void vp9_subtract_sby_s_c(short *diff, const unsigned char *src, int src_stride,
const unsigned char *pred, int dst_stride) {
int r, c;
for (r = 0; r < 32; r++) {
for (c = 0; c < 32; c++) {
diff[c] = src[c] - pred[c];
}
diff += 32;
pred += dst_stride;
src += src_stride;
}
}
void vp9_subtract_sbuv_s_c(short *diff, const unsigned char *usrc,
const unsigned char *vsrc, int src_stride,
const unsigned char *upred,
const unsigned char *vpred, int dst_stride) {
short *udiff = diff + 1024;
short *vdiff = diff + 1024 + 256;
int r, c;
for (r = 0; r < 16; r++) {
for (c = 0; c < 16; c++) {
udiff[c] = usrc[c] - upred[c];
}
udiff += 16;
upred += dst_stride;
usrc += src_stride;
}
for (r = 0; r < 16; r++) {
for (c = 0; c < 16; c++) {
vdiff[c] = vsrc[c] - vpred[c];
}
vdiff += 16;
vpred += dst_stride;
vsrc += src_stride;
}
}
#endif
void vp9_subtract_mby_c(short *diff, unsigned char *src,
unsigned char *pred, int stride) {
vp9_subtract_mby_s_c(diff, src, stride, pred, 16);
@ -265,6 +311,22 @@ void vp9_transform_mb_16x16(MACROBLOCK *x) {
vp9_transform_mbuv_8x8(x);
}
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
void vp9_transform_sby_32x32(MACROBLOCK *x) {
SUPERBLOCK * const x_sb = &x->sb_coeff_data;
vp9_short_fdct32x32(x_sb->src_diff, x_sb->coeff, 64);
}
void vp9_transform_sbuv_16x16(MACROBLOCK *x) {
SUPERBLOCK * const x_sb = &x->sb_coeff_data;
vp9_clear_system_state();
x->vp9_short_fdct16x16(x_sb->src_diff + 1024,
x_sb->coeff + 1024, 32);
x->vp9_short_fdct16x16(x_sb->src_diff + 1280,
x_sb->coeff + 1280, 32);
}
#endif
#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
#define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
typedef struct vp9_token_state vp9_token_state;

Просмотреть файл

@ -47,6 +47,11 @@ void vp9_transform_mb_16x16(MACROBLOCK *mb);
void vp9_transform_mby_16x16(MACROBLOCK *x);
void vp9_optimize_mby_16x16(MACROBLOCK *x);
#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32
void vp9_transform_sby_32x32(MACROBLOCK *x);
void vp9_transform_sbuv_16x16(MACROBLOCK *x);
#endif
void vp9_fidct_mb(MACROBLOCK *x);
void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch);
@ -59,6 +64,14 @@ void vp9_subtract_mbuv_s_c(short *diff, const unsigned char *usrc,
void vp9_subtract_mby_s_c(short *diff, const unsigned char *src,
int src_stride, const unsigned char *pred,
int dst_stride);
#if CONFIG_TX32X32
void vp9_subtract_sby_s_c(short *diff, const unsigned char *src, int src_stride,
const unsigned char *pred, int dst_stride);
void vp9_subtract_sbuv_s_c(short *diff, const unsigned char *usrc,
const unsigned char *vsrc, int src_stride,
const unsigned char *upred,
const unsigned char *vpred, int dst_stride);
#endif
#endif
#endif

Просмотреть файл

@ -1810,7 +1810,7 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
#endif
for (i = 0; i < COMP_PRED_CONTEXTS; i++)
cm->prob_comppred[i] = 128;
for (i = 0; i < TX_SIZE_MAX - 1; i++)
for (i = 0; i < TX_SIZE_MAX_SB - 1; i++)
cm->prob_tx[i] = 128;
// Prime the recent reference frame useage counters.
@ -3698,6 +3698,9 @@ static void encode_frame_to_data_rate
vp9_copy(cpi->common.fc.coef_counts_16x16, cpi->coef_counts_16x16);
vp9_copy(cpi->common.fc.hybrid_coef_counts_16x16,
cpi->hybrid_coef_counts_16x16);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
vp9_copy(cpi->common.fc.coef_counts_32x32, cpi->coef_counts_32x32);
#endif
vp9_adapt_coef_probs(&cpi->common);
if (cpi->common.frame_type != KEY_FRAME) {
#if CONFIG_SUPERBLOCKS

Просмотреть файл

@ -109,6 +109,11 @@ typedef struct {
vp9_prob hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]
[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
vp9_prob coef_probs_32x32[BLOCK_TYPES_32X32]
[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
#endif
#if CONFIG_SUPERBLOCKS
vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1];
#endif
@ -435,6 +440,15 @@ typedef struct VP9_COMP {
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2_16x16[QINDEX_RANGE][256]);
DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv_16x16[QINDEX_RANGE][256]);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
DECLARE_ALIGNED(16, short, Y1zbin_32x32[QINDEX_RANGE][1024]);
DECLARE_ALIGNED(16, short, Y2zbin_32x32[QINDEX_RANGE][1024]);
DECLARE_ALIGNED(16, short, UVzbin_32x32[QINDEX_RANGE][1024]);
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1_32x32[QINDEX_RANGE][1024]);
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2_32x32[QINDEX_RANGE][1024]);
DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv_32x32[QINDEX_RANGE][1024]);
#endif
MACROBLOCK mb;
VP9_COMMON common;
VP9_CONFIG oxcf;
@ -483,8 +497,9 @@ typedef struct VP9_COMP {
int comp_pred_count[COMP_PRED_CONTEXTS];
int single_pred_count[COMP_PRED_CONTEXTS];
// FIXME contextualize
int txfm_count[TX_SIZE_MAX];
int txfm_count_8x8p[TX_SIZE_MAX - 1];
int txfm_count_32x32p[TX_SIZE_MAX_SB];
int txfm_count_16x16p[TX_SIZE_MAX_MB];
int txfm_count_8x8p[TX_SIZE_MAX_MB - 1];
int64_t rd_tx_select_diff[NB_TXFM_MODES];
int rd_tx_select_threshes[4][NB_TXFM_MODES];
@ -604,6 +619,12 @@ typedef struct VP9_COMP {
vp9_prob frame_hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
unsigned int frame_hybrid_branch_ct_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32
unsigned int coef_counts_32x32 [BLOCK_TYPES_32X32] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */
vp9_prob frame_coef_probs_32x32 [BLOCK_TYPES_32X32] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
unsigned int frame_branch_ct_32x32 [BLOCK_TYPES_32X32] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
#endif
int gfu_boost;
int last_boost;
int kf_boost;

Просмотреть файл

@ -323,28 +323,25 @@ void vp9_quantize_mb_16x16(MACROBLOCK *x) {
vp9_quantize_mbuv_8x8(x);
}
void vp9_regular_quantize_b_16x16(BLOCK *b, BLOCKD *d) {
static void quantize(short *zbin_boost_orig_ptr,
short *coeff_ptr, int n_coeffs, int max_coeffs,
short *zbin_ptr, short *round_ptr, short *quant_ptr,
unsigned char *quant_shift_ptr,
short *qcoeff_ptr, short *dqcoeff_ptr,
short *dequant_ptr, short zbin_oq_value,
int *eob_ptr, const int *scan, int mul) {
int i, rc, eob;
int zbin;
int x, y, z, sz;
short *zbin_boost_ptr = b->zrun_zbin_boost_16x16;
short *coeff_ptr = b->coeff;
short *zbin_ptr = b->zbin_16x16;
short *round_ptr = b->round;
short *quant_ptr = b->quant;
unsigned char *quant_shift_ptr = b->quant_shift;
short *qcoeff_ptr = d->qcoeff;
short *dqcoeff_ptr = d->dqcoeff;
short *dequant_ptr = d->dequant;
short zbin_oq_value = b->zbin_extra;
short *zbin_boost_ptr = zbin_boost_orig_ptr;
vpx_memset(qcoeff_ptr, 0, 256*sizeof(short));
vpx_memset(dqcoeff_ptr, 0, 256*sizeof(short));
vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(short));
vpx_memset(dqcoeff_ptr, 0, n_coeffs*sizeof(short));
eob = -1;
for (i = 0; i < b->eob_max_offset_16x16; i++) {
rc = vp9_default_zig_zag1d_16x16[i];
z = coeff_ptr[rc];
for (i = 0; i < max_coeffs; i++) {
rc = scan[i];
z = coeff_ptr[rc] * mul;
zbin = (zbin_ptr[rc!=0] + *zbin_boost_ptr + zbin_oq_value);
zbin_boost_ptr ++;
@ -354,22 +351,70 @@ void vp9_regular_quantize_b_16x16(BLOCK *b, BLOCKD *d) {
if (x >= zbin) {
x += (round_ptr[rc!=0]);
y = ((int)(((int)(x * quant_ptr[rc!=0]) >> 16) + x))
y = ((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x))
>> quant_shift_ptr[rc!=0]; // quantize (x)
x = (y ^ sz) - sz; // get the sign back
qcoeff_ptr[rc] = x; // write to destination
dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]; // dequantized value
dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0] / mul; // dequantized value
if (y) {
eob = i; // last nonzero coeffs
zbin_boost_ptr = b->zrun_zbin_boost_16x16;
zbin_boost_ptr = zbin_boost_orig_ptr;
}
}
}
d->eob = eob + 1;
*eob_ptr = eob + 1;
}
void vp9_regular_quantize_b_16x16(BLOCK *b, BLOCKD *d) {
quantize(b->zrun_zbin_boost_16x16,
b->coeff,
256, b->eob_max_offset_16x16,
b->zbin_16x16, b->round, b->quant, b->quant_shift,
d->qcoeff,
d->dqcoeff,
d->dequant,
b->zbin_extra,
&d->eob, vp9_default_zig_zag1d_16x16, 1);
}
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
void vp9_quantize_sby_32x32(MACROBLOCK *x) {
x->e_mbd.block[0].eob = 0;
quantize(x->block[0].zrun_zbin_boost_32x32,
x->sb_coeff_data.coeff,
1024, x->block[0].eob_max_offset_32x32,
x->block[0].zbin_32x32,
x->block[0].round, x->block[0].quant, x->block[0].quant_shift,
x->e_mbd.sb_coeff_data.qcoeff,
x->e_mbd.sb_coeff_data.dqcoeff,
x->e_mbd.block[0].dequant,
x->block[0].zbin_extra,
&x->e_mbd.block[0].eob,
vp9_default_zig_zag1d_32x32, 2);
}
void vp9_quantize_sbuv_16x16(MACROBLOCK *x) {
int i;
x->e_mbd.block[16].eob = 0;
x->e_mbd.block[20].eob = 0;
for (i = 16; i < 24; i += 4)
quantize(x->block[i].zrun_zbin_boost_16x16,
x->sb_coeff_data.coeff + 1024 + (i - 16) * 64,
256, x->block[i].eob_max_offset_16x16,
x->block[i].zbin_16x16,
x->block[i].round, x->block[0].quant, x->block[i].quant_shift,
x->e_mbd.sb_coeff_data.qcoeff + 1024 + (i - 16) * 64,
x->e_mbd.sb_coeff_data.dqcoeff + 1024 + (i - 16) * 64,
x->e_mbd.block[i].dequant,
x->block[i].zbin_extra,
&x->e_mbd.block[i].eob,
vp9_default_zig_zag1d_16x16, 1);
}
#endif
/* quantize_b_pair function pointer in MACROBLOCK structure is set to one of
* these two C functions if corresponding optimized routine is not available.
* NEON optimized version implements currently the fast quantization for pair
@ -427,6 +472,74 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
};
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
static const int zbin_boost_32x32[1024] = {
0, 0, 0, 8, 8, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28,
30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
};
#endif
int qrounding_factor = 48;
@ -454,7 +567,13 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
cpi->zrun_zbin_boost_y1_8x8[Q][0] =
((quant_val * zbin_boost_8x8[0]) + 64) >> 7;
cpi->zrun_zbin_boost_y1_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
cpi->zrun_zbin_boost_y1_16x16[Q][0] =
((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
cpi->Y1zbin_32x32[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
cpi->zrun_zbin_boost_y1_32x32[Q][0] =
((quant_val * zbin_boost_32x32[0]) + 64) >> 7;
#endif
quant_val = vp9_dc2quant(Q, cpi->common.y2dc_delta_q);
@ -468,7 +587,8 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
cpi->zrun_zbin_boost_y2_8x8[Q][0] =
((quant_val * zbin_boost_8x8[0]) + 64) >> 7;
cpi->zrun_zbin_boost_y2_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
cpi->zrun_zbin_boost_y2_16x16[Q][0] =
((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
quant_val = vp9_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
invert_quant(cpi->UVquant[Q] + 0,
@ -481,7 +601,8 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
cpi->zrun_zbin_boost_uv_8x8[Q][0] =
((quant_val * zbin_boost_8x8[0]) + 64) >> 7;
cpi->zrun_zbin_boost_uv_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
cpi->zrun_zbin_boost_uv_16x16[Q][0] =
((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
// all the 4x4 ac values =;
for (i = 1; i < 16; i++) {
@ -543,16 +664,30 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
quant_val = vp9_ac_yquant(Q);
cpi->Y1zbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
cpi->zrun_zbin_boost_y1_16x16[Q][i] = ((quant_val * zbin_boost_16x16[i]) + 64) >> 7;
cpi->zrun_zbin_boost_y1_16x16[Q][i] =
((quant_val * zbin_boost_16x16[i]) + 64) >> 7;
quant_val = vp9_ac2quant(Q, cpi->common.y2ac_delta_q);
cpi->Y2zbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
cpi->zrun_zbin_boost_y2_16x16[Q][i] = ((quant_val * zbin_boost_16x16[i]) + 64) >> 7;
cpi->zrun_zbin_boost_y2_16x16[Q][i] =
((quant_val * zbin_boost_16x16[i]) + 64) >> 7;
quant_val = vp9_ac_uv_quant(Q, cpi->common.uvac_delta_q);
cpi->UVzbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
cpi->zrun_zbin_boost_uv_16x16[Q][i] = ((quant_val * zbin_boost_16x16[i]) + 64) >> 7;
cpi->zrun_zbin_boost_uv_16x16[Q][i] =
((quant_val * zbin_boost_16x16[i]) + 64) >> 7;
}
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
// 32x32 structures. Same comment above applies.
for (i = 1; i < 1024; i++) {
int rc = vp9_default_zig_zag1d_32x32[i];
quant_val = vp9_ac_yquant(Q);
cpi->Y1zbin_32x32[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
cpi->zrun_zbin_boost_y1_32x32[Q][i] =
((quant_val * zbin_boost_32x32[i]) + 64) >> 7;
}
#endif
}
}
@ -592,11 +727,17 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
x->block[i].zbin = cpi->Y1zbin[QIndex];
x->block[i].zbin_8x8 = cpi->Y1zbin_8x8[QIndex];
x->block[i].zbin_16x16 = cpi->Y1zbin_16x16[QIndex];
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
x->block[i].zbin_32x32 = cpi->Y1zbin_32x32[QIndex];
#endif
x->block[i].round = cpi->Y1round[QIndex];
x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
x->block[i].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_y1_8x8[QIndex];
x->block[i].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_y1_16x16[QIndex];
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
x->block[i].zrun_zbin_boost_32x32 = cpi->zrun_zbin_boost_y1_32x32[QIndex];
#endif
x->block[i].zbin_extra = (short)zbin_extra;
// Segment max eob offset feature.
@ -607,10 +748,17 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
x->block[i].eob_max_offset_16x16 =
vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
x->block[i].eob_max_offset_32x32 =
vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
#endif
} else {
x->block[i].eob_max_offset = 16;
x->block[i].eob_max_offset_8x8 = 64;
x->block[i].eob_max_offset_16x16 = 256;
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
x->block[i].eob_max_offset_32x32 = 1024;
#endif
}
}
@ -640,9 +788,12 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
x->block[i].eob_max_offset_8x8 =
vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
x->block[i].eob_max_offset_16x16 =
vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
} else {
x->block[i].eob_max_offset = 16;
x->block[i].eob_max_offset_8x8 = 64;
x->block[i].eob_max_offset_16x16 = 256;
}
}

Просмотреть файл

@ -78,6 +78,11 @@ void vp9_quantize_mb_16x16(MACROBLOCK *x);
extern prototype_quantize_block(vp9_quantize_quantb_16x16);
extern prototype_quantize_mb(vp9_quantize_mby_16x16);
#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32
void vp9_quantize_sby_32x32(MACROBLOCK *x);
void vp9_quantize_sbuv_16x16(MACROBLOCK *x);
#endif
struct VP9_COMP;
extern void vp9_set_quantizer(struct VP9_COMP *cpi, int Q);

Просмотреть файл

@ -175,6 +175,9 @@ void vp9_save_coding_context(VP9_COMP *cpi) {
vp9_copy(cc->hybrid_coef_probs_8x8, cm->fc.hybrid_coef_probs_8x8);
vp9_copy(cc->coef_probs_16x16, cm->fc.coef_probs_16x16);
vp9_copy(cc->hybrid_coef_probs_16x16, cm->fc.hybrid_coef_probs_16x16);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
vp9_copy(cc->coef_probs_32x32, cm->fc.coef_probs_32x32);
#endif
vp9_copy(cc->switchable_interp_prob, cm->fc.switchable_interp_prob);
#if CONFIG_COMP_INTERINTRA_PRED
cc->interintra_prob = cm->fc.interintra_prob;
@ -234,6 +237,9 @@ void vp9_restore_coding_context(VP9_COMP *cpi) {
vp9_copy(cm->fc.hybrid_coef_probs_8x8, cc->hybrid_coef_probs_8x8);
vp9_copy(cm->fc.coef_probs_16x16, cc->coef_probs_16x16);
vp9_copy(cm->fc.hybrid_coef_probs_16x16, cc->hybrid_coef_probs_16x16);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
vp9_copy(cm->fc.coef_probs_32x32, cc->coef_probs_32x32);
#endif
vp9_copy(cm->fc.switchable_interp_prob, cc->switchable_interp_prob);
#if CONFIG_COMP_INTERINTRA_PRED
cm->fc.interintra_prob = cc->interintra_prob;

Просмотреть файл

@ -400,12 +400,18 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) {
cpi->common.fc.hybrid_coef_probs_16x16,
BLOCK_TYPES_16X16);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
fill_token_costs(
cpi->mb.token_costs[TX_32X32],
(const vp9_prob(*)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_32x32,
BLOCK_TYPES_32X32);
#endif
/*rough estimate for costing*/
cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
vp9_init_mode_costs(cpi);
if (cpi->common.frame_type != KEY_FRAME)
{
if (cpi->common.frame_type != KEY_FRAME) {
vp9_build_nmv_cost_table(
cpi->mb.nmvjointcost,
cpi->mb.e_mbd.allow_high_precision_mv ?
@ -556,7 +562,7 @@ static int cost_coeffs_2x2(MACROBLOCK *mb,
static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type,
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
int tx_size) {
TX_SIZE tx_size) {
const int eob = b->eob;
int c = (type == PLANE_TYPE_Y_NO_DC); /* start at coef 0, unless Y with Y2 */
int cost = 0, default_eob, seg_eob;
@ -613,9 +619,24 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type,
default_eob = 256;
if (type == PLANE_TYPE_Y_WITH_DC) {
tx_type = get_tx_type_16x16(xd, b);
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
} else if (type == PLANE_TYPE_UV) {
int ib = (int)(b - xd->block) - 16;
qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * ib;
#endif
}
break;
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
case TX_32X32:
scan = vp9_default_zig_zag1d_32x32;
band = vp9_coef_bands_32x32;
default_eob = 1024;
qcoeff_ptr = xd->sb_coeff_data.qcoeff;
break;
#endif
default:
abort();
break;
}
if (vp9_segfeature_active(&mb->e_mbd, segment_id, SEG_LVL_EOB))
@ -813,23 +834,28 @@ static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
}
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
int r[2][TX_SIZE_MAX], int *rate,
int d[TX_SIZE_MAX], int *distortion,
int s[TX_SIZE_MAX], int *skip,
int64_t txfm_cache[NB_TXFM_MODES]) {
int (*r)[2], int *rate,
int *d, int *distortion,
int *s, int *skip,
int64_t txfm_cache[NB_TXFM_MODES],
TX_SIZE max_txfm_size) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
vp9_prob skip_prob = cm->mb_no_coeff_skip ?
vp9_get_pred_prob(cm, xd, PRED_MBSKIP) : 128;
int64_t rd[2][TX_SIZE_MAX];
int n;
int64_t rd[TX_SIZE_MAX_SB][2];
int n, m;
r[1][TX_16X16] = r[0][TX_16X16] + vp9_cost_one(cm->prob_tx[0]) +
vp9_cost_one(cm->prob_tx[1]);
r[1][TX_8X8] = r[0][TX_8X8] + vp9_cost_one(cm->prob_tx[0]) +
vp9_cost_zero(cm->prob_tx[1]);
r[1][TX_4X4] = r[0][TX_4X4] + vp9_cost_zero(cm->prob_tx[0]);
for (n = TX_4X4; n <= max_txfm_size; n++) {
r[n][1] = r[n][0];
for (m = 0; m <= n - (n == max_txfm_size); m++) {
if (m == n)
r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
else
r[n][1] += vp9_cost_one(cm->prob_tx[m]);
}
}
if (cm->mb_no_coeff_skip) {
int s0, s1;
@ -838,64 +864,82 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
s0 = vp9_cost_bit(skip_prob, 0);
s1 = vp9_cost_bit(skip_prob, 1);
for (n = TX_4X4; n <= TX_16X16; n++) {
for (n = TX_4X4; n <= max_txfm_size; n++) {
if (s[n]) {
rd[0][n] = rd[1][n] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
} else {
rd[0][n] = RDCOST(x->rdmult, x->rddiv, r[0][n] + s0, d[n]);
rd[1][n] = RDCOST(x->rdmult, x->rddiv, r[1][n] + s0, d[n]);
rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
}
}
} else {
for (n = TX_4X4; n <= TX_16X16; n++) {
rd[0][n] = RDCOST(x->rdmult, x->rddiv, r[0][n], d[n]);
rd[1][n] = RDCOST(x->rdmult, x->rddiv, r[1][n], d[n]);
for (n = TX_4X4; n <= max_txfm_size; n++) {
rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0], d[n]);
rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1], d[n]);
}
}
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
if (max_txfm_size == TX_32X32 &&
(cm->txfm_mode == ALLOW_32X32 ||
(cm->txfm_mode == TX_MODE_SELECT &&
rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
rd[TX_32X32][1] < rd[TX_4X4][1]))) {
mbmi->txfm_size = TX_32X32;
} else
#endif
if ( cm->txfm_mode == ALLOW_16X16 ||
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
(max_txfm_size == TX_16X16 && cm->txfm_mode == ALLOW_32X32) ||
#endif
(cm->txfm_mode == TX_MODE_SELECT &&
rd[1][TX_16X16] < rd[1][TX_8X8] && rd[1][TX_16X16] < rd[1][TX_4X4])) {
rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])) {
mbmi->txfm_size = TX_16X16;
} else if (cm->txfm_mode == ALLOW_8X8 ||
(cm->txfm_mode == TX_MODE_SELECT && rd[1][TX_8X8] < rd[1][TX_4X4])) {
(cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
mbmi->txfm_size = TX_8X8;
} else {
assert(cm->txfm_mode == ONLY_4X4 ||
(cm->txfm_mode == TX_MODE_SELECT && rd[1][TX_4X4] <= rd[1][TX_8X8]));
assert(cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT);
mbmi->txfm_size = TX_4X4;
}
*distortion = d[mbmi->txfm_size];
*rate = r[cm->txfm_mode == TX_MODE_SELECT][mbmi->txfm_size];
*rate = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
*skip = s[mbmi->txfm_size];
txfm_cache[ONLY_4X4] = rd[0][TX_4X4];
txfm_cache[ALLOW_8X8] = rd[0][TX_8X8];
txfm_cache[ALLOW_16X16] = rd[0][TX_16X16];
if (rd[1][TX_16X16] < rd[1][TX_8X8] && rd[1][TX_16X16] < rd[1][TX_4X4])
txfm_cache[TX_MODE_SELECT] = rd[1][TX_16X16];
txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
txfm_cache[ALLOW_16X16] = rd[TX_16X16][0];
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
txfm_cache[ALLOW_32X32] = rd[max_txfm_size][0];
if (max_txfm_size == TX_32X32 &&
rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
rd[TX_32X32][1] < rd[TX_4X4][1])
txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
else
txfm_cache[TX_MODE_SELECT] = rd[1][TX_4X4] < rd[1][TX_8X8] ?
rd[1][TX_4X4] : rd[1][TX_8X8];
#endif
if (rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
else
txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
rd[TX_4X4][1] : rd[TX_8X8][1];
}
static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
int *distortion, int *skippable,
int64_t txfm_cache[NB_TXFM_MODES]) {
MACROBLOCKD *const xd = &x->e_mbd;
int r[2][TX_SIZE_MAX], d[TX_SIZE_MAX], s[TX_SIZE_MAX];
int r[TX_SIZE_MAX_MB][2], d[TX_SIZE_MAX_MB], s[TX_SIZE_MAX_MB];
vp9_subtract_mby(x->src_diff, *(x->block[0].base_src), xd->predictor,
x->block[0].src_stride);
macro_block_yrd_16x16(x, &r[0][TX_16X16], &d[TX_16X16],
&s[TX_16X16], 1);
macro_block_yrd_8x8(x, &r[0][TX_8X8], &d[TX_8X8], &s[TX_8X8], 1);
macro_block_yrd_4x4(x, &r[0][TX_4X4], &d[TX_4X4], &s[TX_4X4], 1);
macro_block_yrd_16x16(x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], 1);
macro_block_yrd_8x8(x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], 1);
macro_block_yrd_4x4(x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], 1);
choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skippable,
txfm_cache);
txfm_cache, TX_16X16);
}
static void copy_predictor(unsigned char *dst, const unsigned char *predictor) {
@ -908,25 +952,91 @@ static void copy_predictor(unsigned char *dst, const unsigned char *predictor) {
}
#if CONFIG_SUPERBLOCKS
#if CONFIG_TX32X32
static int rdcost_sby_32x32(MACROBLOCK *x) {
MACROBLOCKD * const xd = &x->e_mbd;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above,
*tl = (ENTROPY_CONTEXT *) &t_left;
vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
return cost_coeffs(x, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32);
}
static int vp9_sb_block_error_c(short *coeff, short *dqcoeff, int block_size) {
int i;
int64_t error = 0;
for (i = 0; i < block_size; i++) {
unsigned int this_diff = coeff[i] - dqcoeff[i];
error += this_diff * this_diff;
}
return error > INT_MAX ? INT_MAX : error;
}
#define DEBUG_ERROR 0
static void super_block_yrd_32x32(MACROBLOCK *x,
int *rate, int *distortion, int *skippable) {
SUPERBLOCK * const x_sb = &x->sb_coeff_data;
MACROBLOCKD * const xd = &x->e_mbd;
SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data;
#if DEBUG_ERROR || CONFIG_DWT32X32HYBRID
short out[1024];
#endif
vp9_transform_sby_32x32(x);
vp9_quantize_sby_32x32(x);
#if DEBUG_ERROR || CONFIG_DWT32X32HYBRID
vp9_short_idct32x32(xd_sb->dqcoeff, out, 64);
#endif
#if !CONFIG_DWT32X32HYBRID
*distortion = vp9_sb_block_error_c(x_sb->coeff, xd_sb->dqcoeff, 1024);
#else
*distortion = vp9_block_error_c(x_sb->src_diff, out, 1024) << 4;
#endif
#if DEBUG_ERROR
printf("IDCT/FDCT error 32x32: %d (d: %d)\n",
vp9_block_error_c(x_sb->src_diff, out, 1024), *distortion);
#endif
*rate = rdcost_sby_32x32(x);
*skippable = vp9_sby_is_skippable_32x32(&x->e_mbd);
}
#endif
static void super_block_yrd(VP9_COMP *cpi,
MACROBLOCK *x, int *rate, int *distortion,
int *skip,
int64_t txfm_cache[NB_TXFM_MODES]) {
MACROBLOCKD *const xd = &x->e_mbd;
int r[2][TX_SIZE_MAX], d[TX_SIZE_MAX], s[TX_SIZE_MAX], n;
int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n;
const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
ENTROPY_CONTEXT_PLANES t_above[3][2], *orig_above = xd->above_context;
ENTROPY_CONTEXT_PLANES t_left[3][2], *orig_left = xd->left_context;
ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_MB][2],
*orig_above = xd->above_context;
ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_MB][2],
*orig_left = xd->left_context;
for (n = TX_4X4; n <= TX_16X16; n++) {
for (n = TX_4X4; n < TX_SIZE_MAX_MB; n++) {
vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n]));
vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n]));
r[0][n] = 0;
r[n][0] = 0;
d[n] = 0;
s[n] = 1;
}
#if CONFIG_TX32X32
vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride,
dst, dst_y_stride);
super_block_yrd_32x32(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]);
#endif
#if DEBUG_ERROR
int err[3] = { 0, 0, 0 };
#endif
for (n = 0; n < 4; n++) {
int x_idx = n & 1, y_idx = n >> 1;
int r_tmp, d_tmp, s_tmp;
@ -941,25 +1051,42 @@ static void super_block_yrd(VP9_COMP *cpi,
xd->left_context = &t_left[TX_16X16][y_idx];
macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
d[TX_16X16] += d_tmp;
r[0][TX_16X16] += r_tmp;
r[TX_16X16][0] += r_tmp;
s[TX_16X16] = s[TX_16X16] && s_tmp;
#if DEBUG_ERROR
vp9_inverse_transform_mby_16x16(xd);
err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
xd->above_context = &t_above[TX_4X4][x_idx];
xd->left_context = &t_left[TX_4X4][y_idx];
macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0);
d[TX_4X4] += d_tmp;
r[0][TX_4X4] += r_tmp;
r[TX_4X4][0] += r_tmp;
s[TX_4X4] = s[TX_4X4] && s_tmp;
#if DEBUG_ERROR
vp9_inverse_transform_mby_4x4(xd);
err[0] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
xd->above_context = &t_above[TX_8X8][x_idx];
xd->left_context = &t_left[TX_8X8][y_idx];
macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0);
d[TX_8X8] += d_tmp;
r[0][TX_8X8] += r_tmp;
r[TX_8X8][0] += r_tmp;
s[TX_8X8] = s[TX_8X8] && s_tmp;
#if DEBUG_ERROR
vp9_inverse_transform_mby_8x8(xd);
err[1] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
}
choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache);
#if DEBUG_ERROR
printf("IDCT/FDCT error 16x16: %d (d: %d)\n", err[2], d[2]);
printf("IDCT/FDCT error 8x8: %d (d: %d)\n", err[1], d[1]);
printf("IDCT/FDCT error 4x4: %d (d: %d)\n", err[0], d[0]);
#endif
choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
TX_SIZE_MAX_SB - 1);
xd->above_context = orig_above;
xd->left_context = orig_left;
@ -1632,14 +1759,59 @@ static int64_t rd_inter16x16_uv_8x8(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
}
#if CONFIG_SUPERBLOCKS
#if CONFIG_TX32X32
static int rd_cost_sbuv_16x16(MACROBLOCK *x) {
int b;
int cost = 0;
MACROBLOCKD *const xd = &x->e_mbd;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta, *tl;
vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *) &t_above;
tl = (ENTROPY_CONTEXT *) &t_left;
for (b = 16; b < 24; b += 4)
cost += cost_coeffs(x, xd->block + b, PLANE_TYPE_UV,
ta + vp9_block2above_8x8[b],
tl + vp9_block2left_8x8[b], TX_16X16);
return cost;
}
static void rd_inter32x32_uv_16x16(MACROBLOCK *x, int *rate,
int *distortion, int *skip) {
MACROBLOCKD *const xd = &x->e_mbd;
vp9_transform_sbuv_16x16(x);
vp9_quantize_sbuv_16x16(x);
*rate = rd_cost_sbuv_16x16(x);
*distortion = vp9_block_error_c(x->sb_coeff_data.coeff + 1024,
xd->sb_coeff_data.dqcoeff + 1024, 512) >> 2;
*skip = vp9_sbuv_is_skippable_16x16(xd);
}
#endif
static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
int *distortion, int fullpixel, int *skip) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
int n, r = 0, d = 0;
const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer;
const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer;
int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
#if CONFIG_TX32X32
if (mbmi->txfm_size == TX_32X32) {
vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff,
usrc, vsrc, src_uv_stride,
udst, vdst, dst_uv_stride);
rd_inter32x32_uv_16x16(x, rate, distortion, skip);
} else {
#endif
int n, r = 0, d = 0;
int skippable = 1;
ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
ENTROPY_CONTEXT_PLANES *ta = xd->above_context;
@ -1680,8 +1852,11 @@ static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
xd->above_context = ta;
memcpy(xd->above_context, t_above, sizeof(t_above));
memcpy(xd->left_context, t_left, sizeof(t_left));
#if CONFIG_TX32X32
}
#endif
return RDCOST(x->rdmult, x->rddiv, r, d);
return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
#endif
@ -1818,15 +1993,26 @@ static void rd_pick_intra_mbuv_mode_8x8(VP9_COMP *cpi,
}
#if CONFIG_SUPERBLOCKS
static void super_block_uvrd_8x8(MACROBLOCK *x,
int *rate,
int *distortion,
int *skippable) {
// TODO(rbultje) very similar to rd_inter32x32_uv(), merge?
static void super_block_uvrd(MACROBLOCK *x,
int *rate,
int *distortion,
int *skippable) {
MACROBLOCKD *const xd = &x->e_mbd;
int d = 0, r = 0, n, s = 1;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer;
const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer;
int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
#if CONFIG_TX32X32
if (mbmi->txfm_size == TX_32X32) {
vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff,
usrc, vsrc, src_uv_stride,
udst, vdst, dst_uv_stride);
rd_inter32x32_uv_16x16(x, rate, distortion, skippable);
} else {
#endif
int d = 0, r = 0, n, s = 1;
ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
ENTROPY_CONTEXT_PLANES *ta = xd->above_context;
ENTROPY_CONTEXT_PLANES *tl = xd->left_context;
@ -1844,9 +2030,15 @@ static void super_block_uvrd_8x8(MACROBLOCK *x,
udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
dst_uv_stride);
vp9_transform_mbuv_8x8(x);
vp9_quantize_mbuv_8x8(x);
s &= vp9_mbuv_is_skippable_8x8(xd);
if (mbmi->txfm_size == TX_4X4) {
vp9_transform_mbuv_4x4(x);
vp9_quantize_mbuv_4x4(x);
s &= vp9_mbuv_is_skippable_4x4(xd);
} else {
vp9_transform_mbuv_8x8(x);
vp9_quantize_mbuv_8x8(x);
s &= vp9_mbuv_is_skippable_8x8(xd);
}
d += vp9_mbuverror(x) >> 2;
xd->above_context = ta + x_idx;
@ -1864,6 +2056,9 @@ static void super_block_uvrd_8x8(MACROBLOCK *x,
xd->above_context = ta;
memcpy(xd->above_context, t_above, sizeof(t_above));
memcpy(xd->left_context, t_left, sizeof(t_left));
#if CONFIG_TX32X32
}
#endif
}
static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi,
@ -1882,8 +2077,8 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi,
x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
vp9_build_intra_predictors_sbuv_s(&x->e_mbd);
super_block_uvrd_8x8(x, &this_rate_tokenonly,
&this_distortion, &s);
super_block_uvrd(x, &this_rate_tokenonly,
&this_distortion, &s);
this_rate = this_rate_tokenonly +
x->intra_uv_mode_cost[x->e_mbd.frame_type][mode];
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
@ -4141,8 +4336,6 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int y_skip, uv_skip;
int64_t txfm_cache[NB_TXFM_MODES];
xd->mode_info_context->mbmi.txfm_size = TX_8X8;
error_y = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
&dist_y, &y_skip, txfm_cache);
error_uv = rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
@ -4362,6 +4555,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int dist_uv_4x4 = 0, dist_uv_8x8 = 0, uv_skip_4x4 = 0, uv_skip_8x8 = 0;
MB_PREDICTION_MODE mode_uv_4x4 = NEARESTMV, mode_uv_8x8 = NEARESTMV;
int switchable_filter_index = 0;
#if CONFIG_TX32X32
int rate_uv_16x16 = 0, rate_uv_tokenonly_16x16 = 0;
int dist_uv_16x16 = 0, uv_skip_16x16 = 0;
MB_PREDICTION_MODE mode_uv_16x16;
#endif
x->skip = 0;
xd->mode_info_context->mbmi.segment_id = segment_id;
@ -4397,6 +4595,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
&dist_uv_8x8, &uv_skip_8x8);
mode_uv_8x8 = mbmi->uv_mode;
}
#if CONFIG_TX32X32
if (cm->txfm_mode >= ALLOW_32X32) {
mbmi->txfm_size = TX_32X32;
rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_16x16, &rate_uv_tokenonly_16x16,
&dist_uv_16x16, &uv_skip_16x16);
mode_uv_16x16 = mbmi->uv_mode;
}
#endif
for (mode_index = 0; mode_index < MAX_MODES;
mode_index += (!switchable_filter_index)) {
@ -4524,6 +4730,13 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
distortion_uv = dist_uv_4x4;
skippable = skippable && uv_skip_4x4;
mbmi->uv_mode = mode_uv_4x4;
#if CONFIG_TX32X32
} else if (mbmi->txfm_size == TX_32X32) {
rate_uv = rate_uv_16x16;
distortion_uv = dist_uv_16x16;
skippable = skippable && uv_skip_16x16;
mbmi->uv_mode = mode_uv_16x16;
#endif
} else {
rate_uv = rate_uv_8x8;
distortion_uv = dist_uv_8x8;

Просмотреть файл

@ -117,7 +117,7 @@ static void tokenize_b(VP9_COMP *cpi,
int dry_run) {
int pt; /* near block/prev token context index */
int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0;
const int eob = b->eob; /* one beyond last nonzero coeff */
int eob = b->eob; /* one beyond last nonzero coeff */
TOKENEXTRA *t = *tp; /* store tokens starting here */
const short *qcoeff_ptr = b->qcoeff;
int seg_eob;
@ -177,7 +177,23 @@ static void tokenize_b(VP9_COMP *cpi,
counts = cpi->coef_counts_16x16;
probs = cpi->common.fc.coef_probs_16x16;
}
#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32
if (type == PLANE_TYPE_UV) {
int uv_idx = (((int) (b - xd->block)) - 16) >> 2;
qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 256 * uv_idx;
}
#endif
break;
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
case TX_32X32:
seg_eob = 1024;
bands = vp9_coef_bands_32x32;
scan = vp9_default_zig_zag1d_32x32;
counts = cpi->coef_counts_32x32;
probs = cpi->common.fc.coef_probs_32x32;
qcoeff_ptr = xd->sb_coeff_data.qcoeff;
break;
#endif
}
if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB))
@ -283,6 +299,79 @@ static int mb_is_skippable_16x16(MACROBLOCKD *xd) {
return (vp9_mby_is_skippable_16x16(xd) & vp9_mbuv_is_skippable_8x8(xd));
}
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd) {
int skip = 1;
skip &= !xd->block[0].eob;
return skip;
}
int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd) {
return (!xd->block[16].eob) & (!xd->block[20].eob);
}
static int sb_is_skippable_32x32(MACROBLOCKD *xd) {
return vp9_sby_is_skippable_32x32(xd) &&
vp9_sbuv_is_skippable_16x16(xd);
}
void vp9_tokenize_sb(VP9_COMP *cpi,
MACROBLOCKD *xd,
TOKENEXTRA **t,
int dry_run) {
VP9_COMMON * const cm = &cpi->common;
MB_MODE_INFO * const mbmi = &xd->mode_info_context->mbmi;
TOKENEXTRA *t_backup = *t;
ENTROPY_CONTEXT *A[2] = { (ENTROPY_CONTEXT *) (xd->above_context + 0),
(ENTROPY_CONTEXT *) (xd->above_context + 1), };
ENTROPY_CONTEXT *L[2] = { (ENTROPY_CONTEXT *) (xd->left_context + 0),
(ENTROPY_CONTEXT *) (xd->left_context + 1), };
const int mb_skip_context = vp9_get_pred_context(cm, xd, PRED_MBSKIP);
const int segment_id = mbmi->segment_id;
const int skip_inc = !vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
(vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) != 0);
int b;
mbmi->mb_skip_coeff = sb_is_skippable_32x32(xd);
if (mbmi->mb_skip_coeff) {
if (!dry_run)
cpi->skip_true_count[mb_skip_context] += skip_inc;
if (!cm->mb_no_coeff_skip) {
vp9_stuff_sb(cpi, xd, t, dry_run);
} else {
vp9_fix_contexts_sb(xd);
}
if (dry_run)
*t = t_backup;
return;
}
if (!dry_run)
cpi->skip_false_count[mb_skip_context] += skip_inc;
tokenize_b(cpi, xd, xd->block, t, PLANE_TYPE_Y_WITH_DC,
A[0], L[0], TX_32X32, dry_run);
A[0][1] = A[0][2] = A[0][3] = A[0][0];
L[0][1] = L[0][2] = L[0][3] = L[0][0];
for (b = 16; b < 24; b += 4) {
tokenize_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV,
A[0] + vp9_block2above_8x8[b], L[0] + vp9_block2left_8x8[b],
TX_16X16, dry_run);
A[0][vp9_block2above_8x8[b] + 1] = A[0][vp9_block2above_8x8[b]];
L[0][vp9_block2left_8x8[b] + 1] = L[0][vp9_block2left_8x8[b]];
}
vpx_memset(&A[0][8], 0, sizeof(A[0][8]));
vpx_memset(&L[0][8], 0, sizeof(L[0][8]));
vpx_memcpy(A[1], A[0], sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(L[1], L[0], sizeof(ENTROPY_CONTEXT_PLANES));
if (dry_run)
*t = t_backup;
}
#endif
void vp9_tokenize_mb(VP9_COMP *cpi,
MACROBLOCKD *xd,
TOKENEXTRA **t,
@ -717,6 +806,13 @@ static __inline void stuff_b(VP9_COMP *cpi,
probs = cpi->common.fc.coef_probs_16x16;
}
break;
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
case TX_32X32:
bands = vp9_coef_bands_32x32;
counts = cpi->coef_counts_32x32;
probs = cpi->common.fc.coef_probs_32x32;
break;
#endif
}
band = bands[(type == PLANE_TYPE_Y_NO_DC) ? 1 : 0];
t->Token = DCT_EOB_TOKEN;
@ -775,7 +871,8 @@ static void stuff_mb_16x16(VP9_COMP *cpi, MACROBLOCKD *xd,
A[1] = A[2] = A[3] = A[0];
L[1] = L[2] = L[3] = L[0];
for (b = 16; b < 24; b += 4) {
stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, A + vp9_block2above[b],
stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV,
A + vp9_block2above_8x8[b],
L + vp9_block2above_8x8[b], TX_8X8, dry_run);
A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]];
L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]];
@ -869,6 +966,43 @@ void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) {
}
}
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
static void stuff_sb_32x32(VP9_COMP *cpi, MACROBLOCKD *xd,
TOKENEXTRA **t, int dry_run) {
ENTROPY_CONTEXT *A[2] = { (ENTROPY_CONTEXT *) (xd->above_context + 0),
(ENTROPY_CONTEXT *) (xd->above_context + 1), };
ENTROPY_CONTEXT *L[2] = { (ENTROPY_CONTEXT *) (xd->left_context + 0),
(ENTROPY_CONTEXT *) (xd->left_context + 1), };
int b;
stuff_b(cpi, xd, xd->block, t, PLANE_TYPE_Y_WITH_DC,
A[0], L[0], TX_32X32, dry_run);
A[0][1] = A[0][2] = A[0][3] = A[0][0];
L[0][1] = L[0][2] = L[0][3] = L[0][0];
for (b = 16; b < 24; b += 4) {
stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV,
A[0] + vp9_block2above_8x8[b],
L[0] + vp9_block2above_8x8[b], TX_16X16, dry_run);
A[0][vp9_block2above_8x8[b] + 1] = A[0][vp9_block2above_8x8[b]];
L[0][vp9_block2left_8x8[b] + 1] = L[0][vp9_block2left_8x8[b]];
}
vpx_memset(&A[0][8], 0, sizeof(A[0][8]));
vpx_memset(&L[0][8], 0, sizeof(L[0][8]));
vpx_memcpy(A[1], A[0], sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(L[1], L[0], sizeof(ENTROPY_CONTEXT_PLANES));
}
void vp9_stuff_sb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) {
TOKENEXTRA * const t_backup = *t;
stuff_sb_32x32(cpi, xd, t, dry_run);
if (dry_run) {
*t = t_backup;
}
}
#endif
void vp9_fix_contexts(MACROBLOCKD *xd) {
/* Clear entropy contexts for blocks */
if ((xd->mode_info_context->mbmi.mode != B_PRED
@ -885,3 +1019,10 @@ void vp9_fix_contexts(MACROBLOCKD *xd) {
xd->left_context->y2 = 1;
}
}
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
void vp9_fix_contexts_sb(MACROBLOCKD *xd) {
vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
}
#endif

Просмотреть файл

@ -34,16 +34,29 @@ extern int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd);
extern int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd, int has_y2_block);
extern int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd);
extern int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd);
#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32
extern int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd);
extern int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd);
#endif
struct VP9_COMP;
extern void vp9_tokenize_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
TOKENEXTRA **t, int dry_run);
extern void vp9_tokenize_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
TOKENEXTRA **t, int dry_run);
extern void vp9_stuff_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
TOKENEXTRA **t, int dry_run);
#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32
extern void vp9_stuff_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
TOKENEXTRA **t, int dry_run);
#endif
extern void vp9_fix_contexts(MACROBLOCKD *xd);
#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32
extern void vp9_fix_contexts_sb(MACROBLOCKD *xd);
#endif
#ifdef ENTROPY_STATS
void init_context_counters();