vp10/encoder: apply clang-format

Change-Id: I58a42ced5b8a4338524434ff3356850b89aa705a
This commit is contained in:
clang-format 2016-08-11 20:13:14 -07:00 коммит произвёл James Zern
Родитель 7feae8e84e
Коммит d9f9a34bb1
83 изменённых файлов: 9018 добавлений и 11726 удалений

Просмотреть файл

@ -19,25 +19,27 @@
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/system_state.h"
#define AQ_C_SEGMENTS 5
#define DEFAULT_AQ2_SEG 3 // Neutral Q segment
#define AQ_C_SEGMENTS 5
#define DEFAULT_AQ2_SEG 3 // Neutral Q segment
#define AQ_C_STRENGTHS 3
static const double aq_c_q_adj_factor[AQ_C_STRENGTHS][AQ_C_SEGMENTS] =
{ {1.75, 1.25, 1.05, 1.00, 0.90},
{2.00, 1.50, 1.15, 1.00, 0.85},
{2.50, 1.75, 1.25, 1.00, 0.80} };
static const double aq_c_transitions[AQ_C_STRENGTHS][AQ_C_SEGMENTS] =
{ {0.15, 0.30, 0.55, 2.00, 100.0},
{0.20, 0.40, 0.65, 2.00, 100.0},
{0.25, 0.50, 0.75, 2.00, 100.0} };
static const double aq_c_var_thresholds[AQ_C_STRENGTHS][AQ_C_SEGMENTS] =
{ {-4.0, -3.0, -2.0, 100.00, 100.0},
{-3.5, -2.5, -1.5, 100.00, 100.0},
{-3.0, -2.0, -1.0, 100.00, 100.0} };
static const double aq_c_q_adj_factor[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = {
{ 1.75, 1.25, 1.05, 1.00, 0.90 },
{ 2.00, 1.50, 1.15, 1.00, 0.85 },
{ 2.50, 1.75, 1.25, 1.00, 0.80 }
};
static const double aq_c_transitions[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = {
{ 0.15, 0.30, 0.55, 2.00, 100.0 },
{ 0.20, 0.40, 0.65, 2.00, 100.0 },
{ 0.25, 0.50, 0.75, 2.00, 100.0 }
};
static const double aq_c_var_thresholds[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = {
{ -4.0, -3.0, -2.0, 100.00, 100.0 },
{ -3.5, -2.5, -1.5, 100.00, 100.0 },
{ -3.0, -2.0, -1.0, 100.00, 100.0 }
};
#define DEFAULT_COMPLEXITY 64
static int get_aq_c_strength(int q_index, vpx_bit_depth_t bit_depth) {
// Approximate base quatizer (truncated to int)
const int base_quant = vp10_ac_quant(q_index, 0, bit_depth) / 4;
@ -81,14 +83,11 @@ void vp10_setup_in_frame_q_adj(VP10_COMP *cpi) {
for (segment = 0; segment < AQ_C_SEGMENTS; ++segment) {
int qindex_delta;
if (segment == DEFAULT_AQ2_SEG)
continue;
qindex_delta =
vp10_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex,
aq_c_q_adj_factor[aq_strength][segment],
cm->bit_depth);
if (segment == DEFAULT_AQ2_SEG) continue;
qindex_delta = vp10_compute_qdelta_by_rate(
&cpi->rc, cm->frame_type, cm->base_qindex,
aq_c_q_adj_factor[aq_strength][segment], cm->bit_depth);
// For AQ complexity mode, we dont allow Q0 in a segment if the base
// Q is not 0. Q0 (lossless) implies 4x4 only and in AQ mode 2 a segment
@ -112,7 +111,7 @@ void vp10_setup_in_frame_q_adj(VP10_COMP *cpi) {
// The choice of segment for a block depends on the ratio of the projected
// bits for the block vs a target average and its spatial complexity.
void vp10_caq_select_segment(VP10_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs,
int mi_row, int mi_col, int projected_rate) {
int mi_row, int mi_col, int projected_rate) {
VP10_COMMON *const cm = &cpi->common;
const int mi_offset = mi_row * cm->mi_cols + mi_col;
@ -134,19 +133,18 @@ void vp10_caq_select_segment(VP10_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs,
const int aq_strength = get_aq_c_strength(cm->base_qindex, cm->bit_depth);
vpx_clear_system_state();
low_var_thresh = (cpi->oxcf.pass == 2)
? VPXMAX(cpi->twopass.mb_av_energy, MIN_DEFAULT_LV_THRESH)
: DEFAULT_LV_THRESH;
low_var_thresh = (cpi->oxcf.pass == 2) ? VPXMAX(cpi->twopass.mb_av_energy,
MIN_DEFAULT_LV_THRESH)
: DEFAULT_LV_THRESH;
vp10_setup_src_planes(mb, cpi->Source, mi_row, mi_col);
logvar = vp10_log_block_var(cpi, mb, bs);
segment = AQ_C_SEGMENTS - 1; // Just in case no break out below.
segment = AQ_C_SEGMENTS - 1; // Just in case no break out below.
for (i = 0; i < AQ_C_SEGMENTS; ++i) {
// Test rate against a threshold value and variance against a threshold.
// Increasing segment number (higher variance and complexity) = higher Q.
if ((projected_rate <
target_rate * aq_c_transitions[aq_strength][i]) &&
if ((projected_rate < target_rate * aq_c_transitions[aq_strength][i]) &&
(logvar < (low_var_thresh + aq_c_var_thresholds[aq_strength][i]))) {
segment = i;
break;

Просмотреть файл

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_ENCODER_AQ_COMPLEXITY_H_
#define VP10_ENCODER_AQ_COMPLEXITY_H_
@ -23,8 +22,8 @@ struct macroblock;
// Select a segment for the current Block.
void vp10_caq_select_segment(struct VP10_COMP *cpi, struct macroblock *,
BLOCK_SIZE bs,
int mi_row, int mi_col, int projected_rate);
BLOCK_SIZE bs, int mi_row, int mi_col,
int projected_rate);
// This function sets up a set of segments with delta Q values around
// the baseline frame quantizer.

Просмотреть файл

@ -59,8 +59,7 @@ struct CYCLIC_REFRESH {
CYCLIC_REFRESH *vp10_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
size_t last_coded_q_map_size;
CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr));
if (cr == NULL)
return NULL;
if (cr == NULL) return NULL;
cr->map = vpx_calloc(mi_rows * mi_cols, sizeof(*cr->map));
if (cr->map == NULL) {
@ -94,7 +93,7 @@ static int apply_cyclic_refresh_bitrate(const VP10_COMMON *cm,
// Average bits available per frame = avg_frame_bandwidth
// Number of (8x8) blocks in frame = mi_rows * mi_cols;
const float factor = 0.25;
const int number_blocks = cm->mi_rows * cm->mi_cols;
const int number_blocks = cm->mi_rows * cm->mi_cols;
// The condition below corresponds to turning off at target bitrates:
// (at 30fps), ~12kbps for CIF, 36kbps for VGA, 100kps for HD/720p.
// Also turn off at very small frame sizes, to avoid too large fraction of
@ -111,10 +110,8 @@ static int apply_cyclic_refresh_bitrate(const VP10_COMMON *cm,
// size of the coding block (i.e., below min_block size rejected), coding
// mode, and rate/distortion.
static int candidate_refresh_aq(const CYCLIC_REFRESH *cr,
const MB_MODE_INFO *mbmi,
int64_t rate,
int64_t dist,
int bsize) {
const MB_MODE_INFO *mbmi, int64_t rate,
int64_t dist, int bsize) {
MV mv = mbmi->mv[0].as_mv;
// Reject the block for lower-qp coding if projected distortion
// is above the threshold, and any of the following is true:
@ -126,11 +123,9 @@ static int candidate_refresh_aq(const CYCLIC_REFRESH *cr,
mv.col > cr->motion_thresh || mv.col < -cr->motion_thresh ||
!is_inter_block(mbmi)))
return CR_SEGMENT_ID_BASE;
else if (bsize >= BLOCK_16X16 &&
rate < cr->thresh_rate_sb &&
is_inter_block(mbmi) &&
mbmi->mv[0].as_int == 0 &&
cr->rate_boost_fac > 10)
else if (bsize >= BLOCK_16X16 && rate < cr->thresh_rate_sb &&
is_inter_block(mbmi) && mbmi->mv[0].as_int == 0 &&
cr->rate_boost_fac > 10)
// More aggressive delta-q for bigger blocks with zero motion.
return CR_SEGMENT_ID_BOOST2;
else
@ -141,9 +136,8 @@ static int candidate_refresh_aq(const CYCLIC_REFRESH *cr,
static int compute_deltaq(const VP10_COMP *cpi, int q, double rate_factor) {
const CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
const RATE_CONTROL *const rc = &cpi->rc;
int deltaq = vp10_compute_qdelta_by_rate(rc, cpi->common.frame_type,
q, rate_factor,
cpi->common.bit_depth);
int deltaq = vp10_compute_qdelta_by_rate(rc, cpi->common.frame_type, q,
rate_factor, cpi->common.bit_depth);
if ((-deltaq) > cr->max_qdelta_perc * q / 100) {
deltaq = -cr->max_qdelta_perc * q / 100;
}
@ -155,7 +149,7 @@ static int compute_deltaq(const VP10_COMP *cpi, int q, double rate_factor) {
// (with different delta-q). Note this function is called in the postencode
// (called from rc_update_rate_correction_factors()).
int vp10_cyclic_refresh_estimate_bits_at_q(const VP10_COMP *cpi,
double correction_factor) {
double correction_factor) {
const VP10_COMMON *const cm = &cpi->common;
const CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
int estimated_bits;
@ -166,17 +160,18 @@ int vp10_cyclic_refresh_estimate_bits_at_q(const VP10_COMP *cpi,
double weight_segment1 = (double)cr->actual_num_seg1_blocks / num8x8bl;
double weight_segment2 = (double)cr->actual_num_seg2_blocks / num8x8bl;
// Take segment weighted average for estimated bits.
estimated_bits = (int)((1.0 - weight_segment1 - weight_segment2) *
vp10_estimate_bits_at_q(cm->frame_type, cm->base_qindex, mbs,
correction_factor, cm->bit_depth) +
weight_segment1 *
vp10_estimate_bits_at_q(cm->frame_type,
cm->base_qindex + cr->qindex_delta[1], mbs,
correction_factor, cm->bit_depth) +
weight_segment2 *
vp10_estimate_bits_at_q(cm->frame_type,
cm->base_qindex + cr->qindex_delta[2], mbs,
correction_factor, cm->bit_depth));
estimated_bits =
(int)((1.0 - weight_segment1 - weight_segment2) *
vp10_estimate_bits_at_q(cm->frame_type, cm->base_qindex, mbs,
correction_factor, cm->bit_depth) +
weight_segment1 *
vp10_estimate_bits_at_q(cm->frame_type,
cm->base_qindex + cr->qindex_delta[1],
mbs, correction_factor, cm->bit_depth) +
weight_segment2 *
vp10_estimate_bits_at_q(cm->frame_type,
cm->base_qindex + cr->qindex_delta[2],
mbs, correction_factor, cm->bit_depth));
return estimated_bits;
}
@ -186,24 +181,28 @@ int vp10_cyclic_refresh_estimate_bits_at_q(const VP10_COMP *cpi,
// Note: the segment map is set to either 0/CR_SEGMENT_ID_BASE (no refresh) or
// to 1/CR_SEGMENT_ID_BOOST1 (refresh) for each superblock, prior to encoding.
int vp10_cyclic_refresh_rc_bits_per_mb(const VP10_COMP *cpi, int i,
double correction_factor) {
double correction_factor) {
const VP10_COMMON *const cm = &cpi->common;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
int bits_per_mb;
int num8x8bl = cm->MBs << 2;
// Weight for segment prior to encoding: take the average of the target
// number for the frame to be encoded and the actual from the previous frame.
double weight_segment = (double)((cr->target_num_seg_blocks +
cr->actual_num_seg1_blocks + cr->actual_num_seg2_blocks) >> 1) /
double weight_segment =
(double)((cr->target_num_seg_blocks + cr->actual_num_seg1_blocks +
cr->actual_num_seg2_blocks) >>
1) /
num8x8bl;
// Compute delta-q corresponding to qindex i.
int deltaq = compute_deltaq(cpi, i, cr->rate_ratio_qdelta);
// Take segment weighted average for bits per mb.
bits_per_mb = (int)((1.0 - weight_segment) *
vp10_rc_bits_per_mb(cm->frame_type, i, correction_factor, cm->bit_depth) +
weight_segment *
vp10_rc_bits_per_mb(cm->frame_type, i + deltaq, correction_factor,
cm->bit_depth));
bits_per_mb =
(int)((1.0 - weight_segment) * vp10_rc_bits_per_mb(cm->frame_type, i,
correction_factor,
cm->bit_depth) +
weight_segment * vp10_rc_bits_per_mb(cm->frame_type, i + deltaq,
correction_factor,
cm->bit_depth));
return bits_per_mb;
}
@ -211,12 +210,9 @@ int vp10_cyclic_refresh_rc_bits_per_mb(const VP10_COMP *cpi, int i,
// check if we should reset the segment_id, and update the cyclic_refresh map
// and segmentation map.
void vp10_cyclic_refresh_update_segment(VP10_COMP *const cpi,
MB_MODE_INFO *const mbmi,
int mi_row, int mi_col,
BLOCK_SIZE bsize,
int64_t rate,
int64_t dist,
int skip) {
MB_MODE_INFO *const mbmi, int mi_row,
int mi_col, BLOCK_SIZE bsize,
int64_t rate, int64_t dist, int skip) {
const VP10_COMMON *const cm = &cpi->common;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
const int bw = num_8x8_blocks_wide_lookup[bsize];
@ -224,19 +220,19 @@ void vp10_cyclic_refresh_update_segment(VP10_COMP *const cpi,
const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
const int block_index = mi_row * cm->mi_cols + mi_col;
const int refresh_this_block = candidate_refresh_aq(cr, mbmi, rate, dist,
bsize);
const int refresh_this_block =
candidate_refresh_aq(cr, mbmi, rate, dist, bsize);
// Default is to not update the refresh map.
int new_map_value = cr->map[block_index];
int x = 0; int y = 0;
int x = 0;
int y = 0;
// If this block is labeled for refresh, check if we should reset the
// segment_id.
if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) {
mbmi->segment_id = refresh_this_block;
// Reset segment_id if will be skipped.
if (skip)
mbmi->segment_id = CR_SEGMENT_ID_BASE;
if (skip) mbmi->segment_id = CR_SEGMENT_ID_BASE;
}
// Update the cyclic refresh map, to be used for setting segmentation map
@ -249,8 +245,7 @@ void vp10_cyclic_refresh_update_segment(VP10_COMP *const cpi,
// Else if it is accepted as candidate for refresh, and has not already
// been refreshed (marked as 1) then mark it as a candidate for cleanup
// for future time (marked as 0), otherwise don't update it.
if (cr->map[block_index] == 1)
new_map_value = 0;
if (cr->map[block_index] == 1) new_map_value = 0;
} else {
// Leave it marked as block that is not candidate for refresh.
new_map_value = 1;
@ -291,11 +286,12 @@ void vp10_cyclic_refresh_postencode(VP10_COMP *const cpi) {
cr->actual_num_seg2_blocks = 0;
for (mi_row = 0; mi_row < cm->mi_rows; mi_row++)
for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
if (cyclic_refresh_segment_id(
seg_map[mi_row * cm->mi_cols + mi_col]) == CR_SEGMENT_ID_BOOST1)
if (cyclic_refresh_segment_id(seg_map[mi_row * cm->mi_cols + mi_col]) ==
CR_SEGMENT_ID_BOOST1)
cr->actual_num_seg1_blocks++;
else if (cyclic_refresh_segment_id(
seg_map[mi_row * cm->mi_cols + mi_col]) == CR_SEGMENT_ID_BOOST2)
seg_map[mi_row * cm->mi_cols + mi_col]) ==
CR_SEGMENT_ID_BOOST2)
cr->actual_num_seg2_blocks++;
}
}
@ -334,22 +330,22 @@ void vp10_cyclic_refresh_check_golden_update(VP10_COMP *const cpi) {
mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
for (mi_col = 0; mi_col < cols; mi_col++) {
int16_t abs_mvr = mi[0]->mbmi.mv[0].as_mv.row >= 0 ?
mi[0]->mbmi.mv[0].as_mv.row : -1 * mi[0]->mbmi.mv[0].as_mv.row;
int16_t abs_mvc = mi[0]->mbmi.mv[0].as_mv.col >= 0 ?
mi[0]->mbmi.mv[0].as_mv.col : -1 * mi[0]->mbmi.mv[0].as_mv.col;
int16_t abs_mvr = mi[0]->mbmi.mv[0].as_mv.row >= 0
? mi[0]->mbmi.mv[0].as_mv.row
: -1 * mi[0]->mbmi.mv[0].as_mv.row;
int16_t abs_mvc = mi[0]->mbmi.mv[0].as_mv.col >= 0
? mi[0]->mbmi.mv[0].as_mv.col
: -1 * mi[0]->mbmi.mv[0].as_mv.col;
// Calculate the motion of the background.
if (abs_mvr <= 16 && abs_mvc <= 16) {
cnt1++;
if (abs_mvr == 0 && abs_mvc == 0)
cnt2++;
if (abs_mvr == 0 && abs_mvc == 0) cnt2++;
}
mi++;
// Accumulate low_content_frame.
if (cr->map[mi_row * cols + mi_col] < 1)
low_content_frame++;
if (cr->map[mi_row * cols + mi_col] < 1) low_content_frame++;
}
}
@ -359,7 +355,7 @@ void vp10_cyclic_refresh_check_golden_update(VP10_COMP *const cpi) {
// Also, force this frame as a golden update frame if this frame will change
// the resolution (resize_pending != 0).
if (cpi->resize_pending != 0 ||
(cnt1 * 10 > (70 * rows * cols) && cnt2 * 20 < cnt1)) {
(cnt1 * 10 > (70 * rows * cols) && cnt2 * 20 < cnt1)) {
vp10_cyclic_refresh_set_golden_update(cpi);
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
@ -369,8 +365,7 @@ void vp10_cyclic_refresh_check_golden_update(VP10_COMP *const cpi) {
force_gf_refresh = 1;
}
fraction_low =
(double)low_content_frame / (rows * cols);
fraction_low = (double)low_content_frame / (rows * cols);
// Update average.
cr->low_content_avg = (fraction_low + 3 * cr->low_content_avg) / 4;
if (!force_gf_refresh && cpi->refresh_golden_frame == 1) {
@ -432,8 +427,7 @@ static void cyclic_refresh_update_map(VP10_COMP *const cpi) {
// for possible boost/refresh (segment 1). The segment id may get
// reset to 0 later if block gets coded anything other than ZEROMV.
if (cr->map[bl_index2] == 0) {
if (cr->last_coded_q_map[bl_index2] > qindex_thresh)
sum_map++;
if (cr->last_coded_q_map[bl_index2] > qindex_thresh) sum_map++;
} else if (cr->map[bl_index2] < 0) {
cr->map[bl_index2]++;
}
@ -466,14 +460,12 @@ void vp10_cyclic_refresh_update_parameters(VP10_COMP *const cpi) {
cr->time_for_refresh = 0;
// Use larger delta-qp (increase rate_ratio_qdelta) for first few (~4)
// periods of the refresh cycle, after a key frame.
if (rc->frames_since_key < 4 * cr->percent_refresh)
if (rc->frames_since_key < 4 * cr->percent_refresh)
cr->rate_ratio_qdelta = 3.0;
else
cr->rate_ratio_qdelta = 2.0;
// Adjust some parameters for low resolutions at low bitrates.
if (cm->width <= 352 &&
cm->height <= 288 &&
rc->avg_frame_bandwidth < 3400) {
if (cm->width <= 352 && cm->height <= 288 && rc->avg_frame_bandwidth < 3400) {
cr->motion_thresh = 4;
cr->rate_boost_fac = 10;
} else {
@ -488,9 +480,8 @@ void vp10_cyclic_refresh_setup(VP10_COMP *const cpi) {
const RATE_CONTROL *const rc = &cpi->rc;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
struct segmentation *const seg = &cm->seg;
const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc);
if (cm->current_video_frame == 0)
cr->low_content_avg = 0.0;
const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc);
if (cm->current_video_frame == 0) cr->low_content_avg = 0.0;
// Don't apply refresh on key frame or enhancement layer frames.
if (!apply_cyclic_refresh || cm->frame_type == KEY_FRAME) {
// Set segmentation map to 0 and disable.
@ -524,7 +515,8 @@ void vp10_cyclic_refresh_setup(VP10_COMP *const cpi) {
seg->abs_delta = SEGMENT_DELTADATA;
// Note: setting temporal_update has no effect, as the seg-map coding method
// (temporal or spatial) is determined in vp10_choose_segmap_coding_method(),
// (temporal or spatial) is determined in
// vp10_choose_segmap_coding_method(),
// based on the coding cost of each method. For error_resilient mode on the
// last_frame_seg_map is set to 0, so if temporal coding is used, it is
// relative to 0 previous map.

Просмотреть файл

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_ENCODER_AQ_CYCLICREFRESH_H_
#define VP10_ENCODER_AQ_CYCLICREFRESH_H_
@ -20,9 +19,9 @@ extern "C" {
// The segment ids used in cyclic refresh: from base (no boost) to increasing
// boost (higher delta-qp).
#define CR_SEGMENT_ID_BASE 0
#define CR_SEGMENT_ID_BOOST1 1
#define CR_SEGMENT_ID_BOOST2 2
#define CR_SEGMENT_ID_BASE 0
#define CR_SEGMENT_ID_BOOST1 1
#define CR_SEGMENT_ID_BOOST2 2
// Maximum rate target ratio for setting segment delta-qp.
#define CR_MAX_RATE_TARGET_RATIO 4.0
@ -39,20 +38,20 @@ void vp10_cyclic_refresh_free(CYCLIC_REFRESH *cr);
// Estimate the bits, incorporating the delta-q from segment 1, after encoding
// the frame.
int vp10_cyclic_refresh_estimate_bits_at_q(const struct VP10_COMP *cpi,
double correction_factor);
double correction_factor);
// Estimate the bits per mb, for a given q = i and a corresponding delta-q
// (for segment 1), prior to encoding the frame.
int vp10_cyclic_refresh_rc_bits_per_mb(const struct VP10_COMP *cpi, int i,
double correction_factor);
double correction_factor);
// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col),
// check if we should reset the segment_id, and update the cyclic_refresh map
// and segmentation map.
void vp10_cyclic_refresh_update_segment(struct VP10_COMP *const cpi,
MB_MODE_INFO *const mbmi,
int mi_row, int mi_col, BLOCK_SIZE bsize,
int64_t rate, int64_t dist, int skip);
MB_MODE_INFO *const mbmi, int mi_row,
int mi_col, BLOCK_SIZE bsize,
int64_t rate, int64_t dist, int skip);
// Update the segmentation map, and related quantities: cyclic refresh map,
// refresh sb_index, and target number of blocks to be refreshed.

Просмотреть файл

@ -22,21 +22,20 @@
#define ENERGY_MIN (-4)
#define ENERGY_MAX (1)
#define ENERGY_SPAN (ENERGY_MAX - ENERGY_MIN + 1)
#define ENERGY_IN_BOUNDS(energy)\
#define ENERGY_SPAN (ENERGY_MAX - ENERGY_MIN + 1)
#define ENERGY_IN_BOUNDS(energy) \
assert((energy) >= ENERGY_MIN && (energy) <= ENERGY_MAX)
static const double rate_ratio[MAX_SEGMENTS] =
{2.5, 2.0, 1.5, 1.0, 0.75, 1.0, 1.0, 1.0};
static const int segment_id[ENERGY_SPAN] = {0, 1, 1, 2, 3, 4};
static const double rate_ratio[MAX_SEGMENTS] = { 2.5, 2.0, 1.5, 1.0,
0.75, 1.0, 1.0, 1.0 };
static const int segment_id[ENERGY_SPAN] = { 0, 1, 1, 2, 3, 4 };
#define SEGMENT_ID(i) segment_id[(i) - ENERGY_MIN]
#define SEGMENT_ID(i) segment_id[(i)-ENERGY_MIN]
DECLARE_ALIGNED(16, static const uint8_t,
vp10_all_zeros[MAX_SB_SIZE]) = {0};
DECLARE_ALIGNED(16, static const uint8_t, vp10_all_zeros[MAX_SB_SIZE]) = { 0 };
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, static const uint16_t,
vp10_highbd_all_zeros[MAX_SB_SIZE]) = {0};
vp10_highbd_all_zeros[MAX_SB_SIZE]) = { 0 };
#endif
unsigned int vp10_vaq_segment_id(int energy) {
@ -64,7 +63,7 @@ void vp10_vaq_frame_setup(VP10_COMP *cpi) {
for (i = 0; i < MAX_SEGMENTS; ++i) {
int qindex_delta =
vp10_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex,
rate_ratio[i], cm->bit_depth);
rate_ratio[i], cm->bit_depth);
// We don't allow qindex 0 in a segment if the base value is not 0.
// Q index 0 (lossless) implies 4x4 encoding only and in AQ mode a segment
@ -88,9 +87,9 @@ void vp10_vaq_frame_setup(VP10_COMP *cpi) {
/* TODO(agrange, paulwilkins): The block_variance calls the unoptimized versions
* of variance() and highbd_8_variance(). It should not.
*/
static void aq_variance(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride,
int w, int h, unsigned int *sse, int *sum) {
static void aq_variance(const uint8_t *a, int a_stride, const uint8_t *b,
int b_stride, int w, int h, unsigned int *sse,
int *sum) {
int i, j;
*sum = 0;
@ -109,9 +108,9 @@ static void aq_variance(const uint8_t *a, int a_stride,
}
#if CONFIG_VP9_HIGHBITDEPTH
static void aq_highbd_variance64(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride,
int w, int h, uint64_t *sse, uint64_t *sum) {
static void aq_highbd_variance64(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride, int w, int h,
uint64_t *sse, uint64_t *sum) {
int i, j;
uint16_t *a = CONVERT_TO_SHORTPTR(a8);
@ -130,9 +129,9 @@ static void aq_highbd_variance64(const uint8_t *a8, int a_stride,
}
}
static void aq_highbd_8_variance(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride,
int w, int h, unsigned int *sse, int *sum) {
static void aq_highbd_8_variance(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride, int w, int h,
unsigned int *sse, int *sum) {
uint64_t sse_long = 0;
uint64_t sum_long = 0;
aq_highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
@ -145,10 +144,10 @@ static unsigned int block_variance(VP10_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bs) {
MACROBLOCKD *xd = &x->e_mbd;
unsigned int var, sse;
int right_overflow = (xd->mb_to_right_edge < 0) ?
((-xd->mb_to_right_edge) >> 3) : 0;
int bottom_overflow = (xd->mb_to_bottom_edge < 0) ?
((-xd->mb_to_bottom_edge) >> 3) : 0;
int right_overflow =
(xd->mb_to_right_edge < 0) ? ((-xd->mb_to_right_edge) >> 3) : 0;
int bottom_overflow =
(xd->mb_to_bottom_edge < 0) ? ((-xd->mb_to_bottom_edge) >> 3) : 0;
if (right_overflow || bottom_overflow) {
const int bw = 8 * num_8x8_blocks_wide_lookup[bs] - right_overflow;
@ -162,30 +161,27 @@ static unsigned int block_variance(VP10_COMP *cpi, MACROBLOCK *x,
sse >>= 2 * (xd->bd - 8);
avg >>= (xd->bd - 8);
} else {
aq_variance(x->plane[0].src.buf, x->plane[0].src.stride,
vp10_all_zeros, 0, bw, bh, &sse, &avg);
aq_variance(x->plane[0].src.buf, x->plane[0].src.stride, vp10_all_zeros,
0, bw, bh, &sse, &avg);
}
#else
aq_variance(x->plane[0].src.buf, x->plane[0].src.stride,
vp10_all_zeros, 0, bw, bh, &sse, &avg);
aq_variance(x->plane[0].src.buf, x->plane[0].src.stride, vp10_all_zeros, 0,
bw, bh, &sse, &avg);
#endif // CONFIG_VP9_HIGHBITDEPTH
var = sse - (((int64_t)avg * avg) / (bw * bh));
return (256 * var) / (bw * bh);
} else {
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
x->plane[0].src.stride,
CONVERT_TO_BYTEPTR(vp10_highbd_all_zeros),
0, &sse);
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride,
CONVERT_TO_BYTEPTR(vp10_highbd_all_zeros), 0,
&sse);
} else {
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
x->plane[0].src.stride,
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride,
vp10_all_zeros, 0, &sse);
}
#else
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
x->plane[0].src.stride,
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride,
vp10_all_zeros, 0, &sse);
#endif // CONFIG_VP9_HIGHBITDEPTH
return (256 * var) >> num_pels_log2_lookup[bs];
@ -204,7 +200,7 @@ int vp10_block_energy(VP10_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
double energy_midpoint;
vpx_clear_system_state();
energy_midpoint =
(cpi->oxcf.pass == 2) ? cpi->twopass.mb_av_energy : DEFAULT_E_MIDPOINT;
(cpi->oxcf.pass == 2) ? cpi->twopass.mb_av_energy : DEFAULT_E_MIDPOINT;
energy = vp10_log_block_var(cpi, x, bs) - energy_midpoint;
return clamp((int)round(energy), ENERGY_MIN, ENERGY_MAX);
}

Просмотреть файл

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_ENCODER_AQ_VARIANCE_H_
#define VP10_ENCODER_AQ_VARIANCE_H_

Просмотреть файл

@ -17,20 +17,17 @@
#include "vp10/common/blockd.h"
#include "vpx_dsp/txfm_common.h"
void vp10_fdct8x8_quant_neon(const int16_t *input, int stride,
int16_t* coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t* zbin_ptr,
const int16_t* round_ptr, const int16_t* quant_ptr,
const int16_t* quant_shift_ptr,
int16_t* qcoeff_ptr, int16_t* dqcoeff_ptr,
const int16_t* dequant_ptr, uint16_t* eob_ptr,
const int16_t* scan_ptr,
const int16_t* iscan_ptr) {
void vp10_fdct8x8_quant_neon(
const int16_t *input, int stride, int16_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan_ptr, const int16_t *iscan_ptr) {
int16_t temp_buffer[64];
(void)coeff_ptr;
vpx_fdct8x8_neon(input, temp_buffer, stride);
vp10_quantize_fp_neon(temp_buffer, n_coeffs, skip_block, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
dequant_ptr, eob_ptr, scan_ptr, iscan_ptr);
quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
dequant_ptr, eob_ptr, scan_ptr, iscan_ptr);
}

Просмотреть файл

@ -14,7 +14,7 @@
#include "./vp10_rtcd.h"
int64_t vp10_block_error_fp_neon(const int16_t *coeff, const int16_t *dqcoeff,
int block_size) {
int block_size) {
int64x2_t error = vdupq_n_s64(0);
assert(block_size >= 8);

Просмотреть файл

@ -22,12 +22,12 @@
#include "vp10/encoder/rd.h"
void vp10_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr,
int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr,
int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan,
const int16_t *iscan) {
// TODO(jingning) Decide the need of these arguments after the
// quantization process is completed.
(void)zbin_ptr;
@ -54,12 +54,12 @@ void vp10_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[0]);
const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp),
vget_low_s16(v_quant));
const int32x4_t v_tmp_hi = vmull_s16(vget_high_s16(v_tmp),
vget_high_s16(v_quant));
const int16x8_t v_tmp2 = vcombine_s16(vshrn_n_s32(v_tmp_lo, 16),
vshrn_n_s32(v_tmp_hi, 16));
const int32x4_t v_tmp_lo =
vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
const int32x4_t v_tmp_hi =
vmull_s16(vget_high_s16(v_tmp), vget_high_s16(v_quant));
const int16x8_t v_tmp2 =
vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), vshrn_n_s32(v_tmp_hi, 16));
const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero);
const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one);
const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
@ -79,12 +79,12 @@ void vp10_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[i]);
const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp),
vget_low_s16(v_quant));
const int32x4_t v_tmp_hi = vmull_s16(vget_high_s16(v_tmp),
vget_high_s16(v_quant));
const int16x8_t v_tmp2 = vcombine_s16(vshrn_n_s32(v_tmp_lo, 16),
vshrn_n_s32(v_tmp_hi, 16));
const int32x4_t v_tmp_lo =
vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
const int32x4_t v_tmp_hi =
vmull_s16(vget_high_s16(v_tmp), vget_high_s16(v_quant));
const int16x8_t v_tmp2 =
vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), vshrn_n_s32(v_tmp_hi, 16));
const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero);
const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one);
const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
@ -96,9 +96,8 @@ void vp10_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
vst1q_s16(&dqcoeff_ptr[i], v_dqcoeff);
}
{
const int16x4_t v_eobmax_3210 =
vmax_s16(vget_low_s16(v_eobmax_76543210),
vget_high_s16(v_eobmax_76543210));
const int16x4_t v_eobmax_3210 = vmax_s16(
vget_low_s16(v_eobmax_76543210), vget_high_s16(v_eobmax_76543210));
const int64x1_t v_eobmax_xx32 =
vshr_n_s64(vreinterpret_s64_s16(v_eobmax_3210), 32);
const int16x4_t v_eobmax_tmp =

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_ENCODER_BITSTREAM_H_
#define VP10_ENCODER_BITSTREAM_H_

Просмотреть файл

@ -51,7 +51,7 @@ typedef struct macroblock_plane {
/* The [2] dimension is for whether we skip the EOB node (i.e. if previous
* coefficient in this block was zero) or not. */
typedef unsigned int vp10_coeff_cost[PLANE_TYPES][REF_TYPES][COEF_BANDS][2]
[COEFF_CONTEXTS][ENTROPY_TOKENS];
[COEFF_CONTEXTS][ENTROPY_TOKENS];
typedef struct {
int_mv ref_mvs[MODE_CTX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
@ -93,8 +93,8 @@ struct macroblock {
int rddiv;
int rdmult;
int mb_energy;
int * m_search_count_ptr;
int * ex_search_count_ptr;
int *m_search_count_ptr;
int *ex_search_count_ptr;
// These are set to their default values at the beginning, and then adjusted
// further in the encoding process.

Просмотреть файл

@ -70,9 +70,9 @@ static int blockiness_vertical(const uint8_t *s, int sp, const uint8_t *r,
s_blockiness += horizontal_filter(s);
r_blockiness += horizontal_filter(r);
sum_0 += s[0];
sum_sq_0 += s[0]*s[0];
sum_sq_0 += s[0] * s[0];
sum_1 += s[-1];
sum_sq_1 += s[-1]*s[-1];
sum_sq_1 += s[-1] * s[-1];
}
var_0 = variance(sum_0, sum_sq_0, size);
var_1 = variance(sum_1, sum_sq_1, size);
@ -120,19 +120,19 @@ static int blockiness_horizontal(const uint8_t *s, int sp, const uint8_t *r,
// This function returns the blockiness for the entire frame currently by
// looking at all borders in steps of 4.
double vp10_get_blockiness(const unsigned char *img1, int img1_pitch,
const unsigned char *img2, int img2_pitch,
int width, int height ) {
const unsigned char *img2, int img2_pitch, int width,
int height) {
double blockiness = 0;
int i, j;
vpx_clear_system_state();
for (i = 0; i < height; i += 4, img1 += img1_pitch * 4,
img2 += img2_pitch * 4) {
for (i = 0; i < height;
i += 4, img1 += img1_pitch * 4, img2 += img2_pitch * 4) {
for (j = 0; j < width; j += 4) {
if (i > 0 && i < height && j > 0 && j < width) {
blockiness += blockiness_vertical(img1 + j, img1_pitch,
img2 + j, img2_pitch, 4);
blockiness += blockiness_horizontal(img1 + j, img1_pitch,
img2 + j, img2_pitch, 4);
blockiness +=
blockiness_vertical(img1 + j, img1_pitch, img2 + j, img2_pitch, 4);
blockiness += blockiness_horizontal(img1 + j, img1_pitch, img2 + j,
img2_pitch, 4);
}
}
}

Просмотреть файл

@ -27,10 +27,10 @@ extern "C" {
#define ANS_METHOD_RANS 1
struct buffered_ans_symbol {
uint8_t method; // one of ANS_METHOD_UABS or ANS_METHOD_RANS
uint8_t method; // one of ANS_METHOD_UABS or ANS_METHOD_RANS
// TODO(aconverse): Should be possible to write this interms of start for ABS
AnsP10 val_start; // Boolean value for ABS, start in symbol cycle for Rans
AnsP10 prob; // Probability of this symbol
AnsP10 prob; // Probability of this symbol
};
struct BufAnsCoder {
@ -51,8 +51,8 @@ static INLINE void buf_ans_write_reset(struct BufAnsCoder *const c) {
c->offset = 0;
}
static INLINE void buf_uabs_write(struct BufAnsCoder *const c,
uint8_t val, AnsP8 prob) {
static INLINE void buf_uabs_write(struct BufAnsCoder *const c, uint8_t val,
AnsP8 prob) {
assert(c->offset <= c->size);
if (c->offset == c->size) {
vp10_buf_ans_grow(c);
@ -95,8 +95,8 @@ static INLINE void buf_uabs_write_bit(struct BufAnsCoder *c, int bit) {
buf_uabs_write(c, bit, 128);
}
static INLINE void buf_uabs_write_literal(struct BufAnsCoder *c,
int literal, int bits) {
static INLINE void buf_uabs_write_literal(struct BufAnsCoder *c, int literal,
int bits) {
int bit;
assert(bits < 31);

Просмотреть файл

@ -12,10 +12,7 @@
#include "vp10/encoder/encoder.h"
static const BLOCK_SIZE square[MAX_SB_SIZE_LOG2 - 2] = {
BLOCK_8X8,
BLOCK_16X16,
BLOCK_32X32,
BLOCK_64X64,
BLOCK_8X8, BLOCK_16X16, BLOCK_32X32, BLOCK_64X64,
#if CONFIG_EXT_PARTITION
BLOCK_128X128,
#endif // CONFIG_EXT_PARTITION
@ -36,8 +33,7 @@ static void alloc_mode_context(VP10_COMMON *cm, int num_4x4_blk,
for (i = 0; i < MAX_MB_PLANE; ++i) {
#if CONFIG_VAR_TX
CHECK_MEM_ERROR(cm, ctx->blk_skip[i],
vpx_calloc(num_blk, sizeof(uint8_t)));
CHECK_MEM_ERROR(cm, ctx->blk_skip[i], vpx_calloc(num_blk, sizeof(uint8_t)));
#endif
for (k = 0; k < 3; ++k) {
CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
@ -48,18 +44,18 @@ static void alloc_mode_context(VP10_COMMON *cm, int num_4x4_blk,
vpx_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i][k])));
CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
vpx_memalign(32, num_blk * sizeof(*ctx->eobs[i][k])));
ctx->coeff_pbuf[i][k] = ctx->coeff[i][k];
ctx->qcoeff_pbuf[i][k] = ctx->qcoeff[i][k];
ctx->coeff_pbuf[i][k] = ctx->coeff[i][k];
ctx->qcoeff_pbuf[i][k] = ctx->qcoeff[i][k];
ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k];
ctx->eobs_pbuf[i][k] = ctx->eobs[i][k];
ctx->eobs_pbuf[i][k] = ctx->eobs[i][k];
}
}
if (cm->allow_screen_content_tools) {
for (i = 0; i < 2; ++i) {
CHECK_MEM_ERROR(cm, ctx->color_index_map[i],
vpx_memalign(32,
num_pix * sizeof(*ctx->color_index_map[i])));
for (i = 0; i < 2; ++i) {
CHECK_MEM_ERROR(
cm, ctx->color_index_map[i],
vpx_memalign(32, num_pix * sizeof(*ctx->color_index_map[i])));
}
}
}
@ -93,29 +89,35 @@ static void alloc_tree_contexts(VP10_COMMON *cm, PC_TREE *tree,
int num_4x4_blk) {
#if CONFIG_EXT_PARTITION_TYPES
alloc_mode_context(cm, num_4x4_blk, PARTITION_NONE, &tree->none);
alloc_mode_context(cm, num_4x4_blk/2, PARTITION_HORZ, &tree->horizontal[0]);
alloc_mode_context(cm, num_4x4_blk/2, PARTITION_VERT, &tree->vertical[0]);
alloc_mode_context(cm, num_4x4_blk/2, PARTITION_VERT, &tree->horizontal[1]);
alloc_mode_context(cm, num_4x4_blk/2, PARTITION_VERT, &tree->vertical[1]);
alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_HORZ, &tree->horizontal[0]);
alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_VERT, &tree->vertical[0]);
alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_VERT, &tree->horizontal[1]);
alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_VERT, &tree->vertical[1]);
alloc_mode_context(cm, num_4x4_blk/4, PARTITION_HORZ_A,
alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_HORZ_A,
&tree->horizontala[0]);
alloc_mode_context(cm, num_4x4_blk/4, PARTITION_HORZ_A,
alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_HORZ_A,
&tree->horizontala[1]);
alloc_mode_context(cm, num_4x4_blk/2, PARTITION_HORZ_A,
alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_HORZ_A,
&tree->horizontala[2]);
alloc_mode_context(cm, num_4x4_blk/2, PARTITION_HORZ_B,
alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_HORZ_B,
&tree->horizontalb[0]);
alloc_mode_context(cm, num_4x4_blk/4, PARTITION_HORZ_B,
alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_HORZ_B,
&tree->horizontalb[1]);
alloc_mode_context(cm, num_4x4_blk/4, PARTITION_HORZ_B,
alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_HORZ_B,
&tree->horizontalb[2]);
alloc_mode_context(cm, num_4x4_blk/4, PARTITION_VERT_A, &tree->verticala[0]);
alloc_mode_context(cm, num_4x4_blk/4, PARTITION_VERT_A, &tree->verticala[1]);
alloc_mode_context(cm, num_4x4_blk/2, PARTITION_VERT_A, &tree->verticala[2]);
alloc_mode_context(cm, num_4x4_blk/2, PARTITION_VERT_B, &tree->verticalb[0]);
alloc_mode_context(cm, num_4x4_blk/4, PARTITION_VERT_B, &tree->verticalb[1]);
alloc_mode_context(cm, num_4x4_blk/4, PARTITION_VERT_B, &tree->verticalb[2]);
alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_VERT_A,
&tree->verticala[0]);
alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_VERT_A,
&tree->verticala[1]);
alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_VERT_A,
&tree->verticala[2]);
alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_VERT_B,
&tree->verticalb[0]);
alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_VERT_B,
&tree->verticalb[1]);
alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_VERT_B,
&tree->verticalb[2]);
#ifdef CONFIG_SUPERTX
alloc_mode_context(cm, num_4x4_blk, PARTITION_HORZ,
&tree->horizontal_supertx);
@ -132,8 +134,8 @@ static void alloc_tree_contexts(VP10_COMMON *cm, PC_TREE *tree,
#endif // CONFIG_SUPERTX
#else
alloc_mode_context(cm, num_4x4_blk, &tree->none);
alloc_mode_context(cm, num_4x4_blk/2, &tree->horizontal[0]);
alloc_mode_context(cm, num_4x4_blk/2, &tree->vertical[0]);
alloc_mode_context(cm, num_4x4_blk / 2, &tree->horizontal[0]);
alloc_mode_context(cm, num_4x4_blk / 2, &tree->vertical[0]);
#ifdef CONFIG_SUPERTX
alloc_mode_context(cm, num_4x4_blk, &tree->horizontal_supertx);
alloc_mode_context(cm, num_4x4_blk, &tree->vertical_supertx);
@ -141,8 +143,8 @@ static void alloc_tree_contexts(VP10_COMMON *cm, PC_TREE *tree,
#endif
if (num_4x4_blk > 4) {
alloc_mode_context(cm, num_4x4_blk/2, &tree->horizontal[1]);
alloc_mode_context(cm, num_4x4_blk/2, &tree->vertical[1]);
alloc_mode_context(cm, num_4x4_blk / 2, &tree->horizontal[1]);
alloc_mode_context(cm, num_4x4_blk / 2, &tree->vertical[1]);
} else {
memset(&tree->horizontal[1], 0, sizeof(tree->horizontal[1]));
memset(&tree->vertical[1], 0, sizeof(tree->vertical[1]));
@ -198,11 +200,11 @@ void vp10_setup_pc_tree(VP10_COMMON *cm, ThreadData *td) {
int nodes;
vpx_free(td->leaf_tree);
CHECK_MEM_ERROR(cm, td->leaf_tree, vpx_calloc(leaf_nodes,
sizeof(*td->leaf_tree)));
CHECK_MEM_ERROR(cm, td->leaf_tree,
vpx_calloc(leaf_nodes, sizeof(*td->leaf_tree)));
vpx_free(td->pc_tree);
CHECK_MEM_ERROR(cm, td->pc_tree, vpx_calloc(tree_nodes,
sizeof(*td->pc_tree)));
CHECK_MEM_ERROR(cm, td->pc_tree,
vpx_calloc(tree_nodes, sizeof(*td->pc_tree)));
this_pc = &td->pc_tree[0];
this_leaf = &td->leaf_tree[0];
@ -223,8 +225,7 @@ void vp10_setup_pc_tree(VP10_COMMON *cm, ThreadData *td) {
tree->block_size = square[0];
alloc_tree_contexts(cm, tree, 4);
tree->leaf_split[0] = this_leaf++;
for (j = 1; j < 4; j++)
tree->leaf_split[j] = tree->leaf_split[0];
for (j = 1; j < 4; j++) tree->leaf_split[j] = tree->leaf_split[0];
}
// Each node has 4 leaf nodes, fill each block_size level of the tree
@ -234,8 +235,7 @@ void vp10_setup_pc_tree(VP10_COMMON *cm, ThreadData *td) {
PC_TREE *const tree = &td->pc_tree[pc_tree_index];
alloc_tree_contexts(cm, tree, 4 << (2 * square_index));
tree->block_size = square[square_index];
for (j = 0; j < 4; j++)
tree->split[j] = this_pc++;
for (j = 0; j < 4; j++) tree->split[j] = this_pc++;
++pc_tree_index;
}
++square_index;
@ -247,7 +247,7 @@ void vp10_setup_pc_tree(VP10_COMMON *cm, ThreadData *td) {
td->pc_root[i]->none.best_mode_index = 2;
// Set up the root nodes for the rest of the possible superblock sizes
while (--i >= 0) {
td->pc_root[i] = td->pc_root[i+1]->split[0];
td->pc_root[i] = td->pc_root[i + 1]->split[0];
td->pc_root[i]->none.best_mode_index = 2;
}
}
@ -263,12 +263,10 @@ void vp10_free_pc_tree(ThreadData *td) {
int i;
// Set up all 4x4 mode contexts
for (i = 0; i < leaf_nodes; ++i)
free_mode_context(&td->leaf_tree[i]);
for (i = 0; i < leaf_nodes; ++i) free_mode_context(&td->leaf_tree[i]);
// Sets up all the leaf nodes in the tree.
for (i = 0; i < tree_nodes; ++i)
free_tree_contexts(&td->pc_tree[i]);
for (i = 0; i < tree_nodes; ++i) free_tree_contexts(&td->pc_tree[i]);
vpx_free(td->pc_tree);
td->pc_tree = NULL;

Просмотреть файл

@ -18,122 +18,115 @@
/* round(-log2(i/256.) * (1 << VP9_PROB_COST_SHIFT))
Begins with a bogus entry for simpler addressing. */
const uint16_t vp10_prob_cost[256] = {
4096, 4096, 3584, 3284, 3072, 2907, 2772, 2659, 2560, 2473, 2395, 2325,
2260, 2201, 2147, 2096, 2048, 2003, 1961, 1921, 1883, 1847, 1813, 1780,
1748, 1718, 1689, 1661, 1635, 1609, 1584, 1559, 1536, 1513, 1491, 1470,
1449, 1429, 1409, 1390, 1371, 1353, 1335, 1318, 1301, 1284, 1268, 1252,
1236, 1221, 1206, 1192, 1177, 1163, 1149, 1136, 1123, 1110, 1097, 1084,
1072, 1059, 1047, 1036, 1024, 1013, 1001, 990, 979, 968, 958, 947,
937, 927, 917, 907, 897, 887, 878, 868, 859, 850, 841, 832,
823, 814, 806, 797, 789, 780, 772, 764, 756, 748, 740, 732,
724, 717, 709, 702, 694, 687, 680, 673, 665, 658, 651, 644,
637, 631, 624, 617, 611, 604, 598, 591, 585, 578, 572, 566,
560, 554, 547, 541, 535, 530, 524, 518, 512, 506, 501, 495,
489, 484, 478, 473, 467, 462, 456, 451, 446, 441, 435, 430,
425, 420, 415, 410, 405, 400, 395, 390, 385, 380, 375, 371,
366, 361, 356, 352, 347, 343, 338, 333, 329, 324, 320, 316,
311, 307, 302, 298, 294, 289, 285, 281, 277, 273, 268, 264,
260, 256, 252, 248, 244, 240, 236, 232, 228, 224, 220, 216,
212, 209, 205, 201, 197, 194, 190, 186, 182, 179, 175, 171,
168, 164, 161, 157, 153, 150, 146, 143, 139, 136, 132, 129,
125, 122, 119, 115, 112, 109, 105, 102, 99, 95, 92, 89,
86, 82, 79, 76, 73, 70, 66, 63, 60, 57, 54, 51,
48, 45, 42, 38, 35, 32, 29, 26, 23, 20, 18, 15,
12, 9, 6, 3};
4096, 4096, 3584, 3284, 3072, 2907, 2772, 2659, 2560, 2473, 2395, 2325, 2260,
2201, 2147, 2096, 2048, 2003, 1961, 1921, 1883, 1847, 1813, 1780, 1748, 1718,
1689, 1661, 1635, 1609, 1584, 1559, 1536, 1513, 1491, 1470, 1449, 1429, 1409,
1390, 1371, 1353, 1335, 1318, 1301, 1284, 1268, 1252, 1236, 1221, 1206, 1192,
1177, 1163, 1149, 1136, 1123, 1110, 1097, 1084, 1072, 1059, 1047, 1036, 1024,
1013, 1001, 990, 979, 968, 958, 947, 937, 927, 917, 907, 897, 887,
878, 868, 859, 850, 841, 832, 823, 814, 806, 797, 789, 780, 772,
764, 756, 748, 740, 732, 724, 717, 709, 702, 694, 687, 680, 673,
665, 658, 651, 644, 637, 631, 624, 617, 611, 604, 598, 591, 585,
578, 572, 566, 560, 554, 547, 541, 535, 530, 524, 518, 512, 506,
501, 495, 489, 484, 478, 473, 467, 462, 456, 451, 446, 441, 435,
430, 425, 420, 415, 410, 405, 400, 395, 390, 385, 380, 375, 371,
366, 361, 356, 352, 347, 343, 338, 333, 329, 324, 320, 316, 311,
307, 302, 298, 294, 289, 285, 281, 277, 273, 268, 264, 260, 256,
252, 248, 244, 240, 236, 232, 228, 224, 220, 216, 212, 209, 205,
201, 197, 194, 190, 186, 182, 179, 175, 171, 168, 164, 161, 157,
153, 150, 146, 143, 139, 136, 132, 129, 125, 122, 119, 115, 112,
109, 105, 102, 99, 95, 92, 89, 86, 82, 79, 76, 73, 70,
66, 63, 60, 57, 54, 51, 48, 45, 42, 38, 35, 32, 29,
26, 23, 20, 18, 15, 12, 9, 6, 3
};
#if CONFIG_ANS
// round(-log2(i/1024.) * (1 << VP9_PROB_COST_SHIFT))
static const uint16_t vp10_prob_cost10[1024] = {
5120, 5120, 4608, 4308, 4096, 3931, 3796, 3683, 3584, 3497, 3419, 3349,
3284, 3225, 3171, 3120, 3072, 3027, 2985, 2945, 2907, 2871, 2837, 2804,
2772, 2742, 2713, 2685, 2659, 2633, 2608, 2583, 2560, 2537, 2515, 2494,
2473, 2453, 2433, 2414, 2395, 2377, 2359, 2342, 2325, 2308, 2292, 2276,
2260, 2245, 2230, 2216, 2201, 2187, 2173, 2160, 2147, 2134, 2121, 2108,
2096, 2083, 2071, 2060, 2048, 2037, 2025, 2014, 2003, 1992, 1982, 1971,
1961, 1951, 1941, 1931, 1921, 1911, 1902, 1892, 1883, 1874, 1865, 1856,
1847, 1838, 1830, 1821, 1813, 1804, 1796, 1788, 1780, 1772, 1764, 1756,
1748, 1741, 1733, 1726, 1718, 1711, 1704, 1697, 1689, 1682, 1675, 1668,
1661, 1655, 1648, 1641, 1635, 1628, 1622, 1615, 1609, 1602, 1596, 1590,
1584, 1578, 1571, 1565, 1559, 1554, 1548, 1542, 1536, 1530, 1525, 1519,
1513, 1508, 1502, 1497, 1491, 1486, 1480, 1475, 1470, 1465, 1459, 1454,
1449, 1444, 1439, 1434, 1429, 1424, 1419, 1414, 1409, 1404, 1399, 1395,
1390, 1385, 1380, 1376, 1371, 1367, 1362, 1357, 1353, 1348, 1344, 1340,
1335, 1331, 1326, 1322, 1318, 1313, 1309, 1305, 1301, 1297, 1292, 1288,
1284, 1280, 1276, 1272, 1268, 1264, 1260, 1256, 1252, 1248, 1244, 1240,
1236, 1233, 1229, 1225, 1221, 1218, 1214, 1210, 1206, 1203, 1199, 1195,
1192, 1188, 1185, 1181, 1177, 1174, 1170, 1167, 1163, 1160, 1156, 1153,
1149, 1146, 1143, 1139, 1136, 1133, 1129, 1126, 1123, 1119, 1116, 1113,
1110, 1106, 1103, 1100, 1097, 1094, 1090, 1087, 1084, 1081, 1078, 1075,
1072, 1069, 1066, 1062, 1059, 1056, 1053, 1050, 1047, 1044, 1042, 1039,
1036, 1033, 1030, 1027, 1024, 1021, 1018, 1015, 1013, 1010, 1007, 1004,
1001, 998, 996, 993, 990, 987, 985, 982, 979, 977, 974, 971,
968, 966, 963, 960, 958, 955, 953, 950, 947, 945, 942, 940,
937, 934, 932, 929, 927, 924, 922, 919, 917, 914, 912, 909,
907, 904, 902, 899, 897, 895, 892, 890, 887, 885, 883, 880,
878, 876, 873, 871, 868, 866, 864, 861, 859, 857, 855, 852,
850, 848, 845, 843, 841, 839, 836, 834, 832, 830, 828, 825,
823, 821, 819, 817, 814, 812, 810, 808, 806, 804, 801, 799,
797, 795, 793, 791, 789, 787, 785, 783, 780, 778, 776, 774,
772, 770, 768, 766, 764, 762, 760, 758, 756, 754, 752, 750,
748, 746, 744, 742, 740, 738, 736, 734, 732, 730, 728, 726,
724, 723, 721, 719, 717, 715, 713, 711, 709, 707, 706, 704,
702, 700, 698, 696, 694, 693, 691, 689, 687, 685, 683, 682,
680, 678, 676, 674, 673, 671, 669, 667, 665, 664, 662, 660,
658, 657, 655, 653, 651, 650, 648, 646, 644, 643, 641, 639,
637, 636, 634, 632, 631, 629, 627, 626, 624, 622, 621, 619,
617, 616, 614, 612, 611, 609, 607, 606, 604, 602, 601, 599,
598, 596, 594, 593, 591, 590, 588, 586, 585, 583, 582, 580,
578, 577, 575, 574, 572, 571, 569, 567, 566, 564, 563, 561,
560, 558, 557, 555, 554, 552, 550, 549, 547, 546, 544, 543,
541, 540, 538, 537, 535, 534, 532, 531, 530, 528, 527, 525,
524, 522, 521, 519, 518, 516, 515, 513, 512, 511, 509, 508,
506, 505, 503, 502, 501, 499, 498, 496, 495, 493, 492, 491,
489, 488, 486, 485, 484, 482, 481, 480, 478, 477, 475, 474,
473, 471, 470, 469, 467, 466, 465, 463, 462, 460, 459, 458,
456, 455, 454, 452, 451, 450, 448, 447, 446, 444, 443, 442,
441, 439, 438, 437, 435, 434, 433, 431, 430, 429, 428, 426,
425, 424, 422, 421, 420, 419, 417, 416, 415, 414, 412, 411,
410, 409, 407, 406, 405, 404, 402, 401, 400, 399, 397, 396,
395, 394, 392, 391, 390, 389, 387, 386, 385, 384, 383, 381,
380, 379, 378, 377, 375, 374, 373, 372, 371, 369, 368, 367,
366, 365, 364, 362, 361, 360, 359, 358, 356, 355, 354, 353,
352, 351, 349, 348, 347, 346, 345, 344, 343, 341, 340, 339,
338, 337, 336, 335, 333, 332, 331, 330, 329, 328, 327, 326,
324, 323, 322, 321, 320, 319, 318, 317, 316, 314, 313, 312,
311, 310, 309, 308, 307, 306, 305, 303, 302, 301, 300, 299,
298, 297, 296, 295, 294, 293, 292, 291, 289, 288, 287, 286,
285, 284, 283, 282, 281, 280, 279, 278, 277, 276, 275, 274,
273, 272, 271, 269, 268, 267, 266, 265, 264, 263, 262, 261,
260, 259, 258, 257, 256, 255, 254, 253, 252, 251, 250, 249,
248, 247, 246, 245, 244, 243, 242, 241, 240, 239, 238, 237,
236, 235, 234, 233, 232, 231, 230, 229, 228, 227, 226, 225,
224, 223, 222, 221, 220, 219, 218, 217, 216, 215, 214, 213,
212, 212, 211, 210, 209, 208, 207, 206, 205, 204, 203, 202,
201, 200, 199, 198, 197, 196, 195, 194, 194, 193, 192, 191,
190, 189, 188, 187, 186, 185, 184, 183, 182, 181, 181, 180,
179, 178, 177, 176, 175, 174, 173, 172, 171, 170, 170, 169,
168, 167, 166, 165, 164, 163, 162, 161, 161, 160, 159, 158,
157, 156, 155, 154, 153, 152, 152, 151, 150, 149, 148, 147,
146, 145, 145, 144, 143, 142, 141, 140, 139, 138, 138, 137,
136, 135, 134, 133, 132, 132, 131, 130, 129, 128, 127, 126,
125, 125, 124, 123, 122, 121, 120, 120, 119, 118, 117, 116,
115, 114, 114, 113, 112, 111, 110, 109, 109, 108, 107, 106,
105, 104, 104, 103, 102, 101, 100, 99, 99, 98, 97, 96,
95, 95, 94, 93, 92, 91, 90, 90, 89, 88, 87, 86,
86, 85, 84, 83, 82, 82, 81, 80, 79, 78, 78, 77,
76, 75, 74, 74, 73, 72, 71, 70, 70, 69, 68, 67,
66, 66, 65, 64, 63, 62, 62, 61, 60, 59, 59, 58,
57, 56, 55, 55, 54, 53, 52, 52, 51, 50, 49, 48,
48, 47, 46, 45, 45, 44, 43, 42, 42, 41, 40, 39,
38, 38, 37, 36, 35, 35, 34, 33, 32, 32, 31, 30,
29, 29, 28, 27, 26, 26, 25, 24, 23, 23, 22, 21,
20, 20, 19, 18, 18, 17, 16, 15, 15, 14, 13, 12,
12, 11, 10, 9, 9, 8, 7, 7, 6, 5, 4, 4,
3, 2, 1, 1};
5120, 5120, 4608, 4308, 4096, 3931, 3796, 3683, 3584, 3497, 3419, 3349, 3284,
3225, 3171, 3120, 3072, 3027, 2985, 2945, 2907, 2871, 2837, 2804, 2772, 2742,
2713, 2685, 2659, 2633, 2608, 2583, 2560, 2537, 2515, 2494, 2473, 2453, 2433,
2414, 2395, 2377, 2359, 2342, 2325, 2308, 2292, 2276, 2260, 2245, 2230, 2216,
2201, 2187, 2173, 2160, 2147, 2134, 2121, 2108, 2096, 2083, 2071, 2060, 2048,
2037, 2025, 2014, 2003, 1992, 1982, 1971, 1961, 1951, 1941, 1931, 1921, 1911,
1902, 1892, 1883, 1874, 1865, 1856, 1847, 1838, 1830, 1821, 1813, 1804, 1796,
1788, 1780, 1772, 1764, 1756, 1748, 1741, 1733, 1726, 1718, 1711, 1704, 1697,
1689, 1682, 1675, 1668, 1661, 1655, 1648, 1641, 1635, 1628, 1622, 1615, 1609,
1602, 1596, 1590, 1584, 1578, 1571, 1565, 1559, 1554, 1548, 1542, 1536, 1530,
1525, 1519, 1513, 1508, 1502, 1497, 1491, 1486, 1480, 1475, 1470, 1465, 1459,
1454, 1449, 1444, 1439, 1434, 1429, 1424, 1419, 1414, 1409, 1404, 1399, 1395,
1390, 1385, 1380, 1376, 1371, 1367, 1362, 1357, 1353, 1348, 1344, 1340, 1335,
1331, 1326, 1322, 1318, 1313, 1309, 1305, 1301, 1297, 1292, 1288, 1284, 1280,
1276, 1272, 1268, 1264, 1260, 1256, 1252, 1248, 1244, 1240, 1236, 1233, 1229,
1225, 1221, 1218, 1214, 1210, 1206, 1203, 1199, 1195, 1192, 1188, 1185, 1181,
1177, 1174, 1170, 1167, 1163, 1160, 1156, 1153, 1149, 1146, 1143, 1139, 1136,
1133, 1129, 1126, 1123, 1119, 1116, 1113, 1110, 1106, 1103, 1100, 1097, 1094,
1090, 1087, 1084, 1081, 1078, 1075, 1072, 1069, 1066, 1062, 1059, 1056, 1053,
1050, 1047, 1044, 1042, 1039, 1036, 1033, 1030, 1027, 1024, 1021, 1018, 1015,
1013, 1010, 1007, 1004, 1001, 998, 996, 993, 990, 987, 985, 982, 979,
977, 974, 971, 968, 966, 963, 960, 958, 955, 953, 950, 947, 945,
942, 940, 937, 934, 932, 929, 927, 924, 922, 919, 917, 914, 912,
909, 907, 904, 902, 899, 897, 895, 892, 890, 887, 885, 883, 880,
878, 876, 873, 871, 868, 866, 864, 861, 859, 857, 855, 852, 850,
848, 845, 843, 841, 839, 836, 834, 832, 830, 828, 825, 823, 821,
819, 817, 814, 812, 810, 808, 806, 804, 801, 799, 797, 795, 793,
791, 789, 787, 785, 783, 780, 778, 776, 774, 772, 770, 768, 766,
764, 762, 760, 758, 756, 754, 752, 750, 748, 746, 744, 742, 740,
738, 736, 734, 732, 730, 728, 726, 724, 723, 721, 719, 717, 715,
713, 711, 709, 707, 706, 704, 702, 700, 698, 696, 694, 693, 691,
689, 687, 685, 683, 682, 680, 678, 676, 674, 673, 671, 669, 667,
665, 664, 662, 660, 658, 657, 655, 653, 651, 650, 648, 646, 644,
643, 641, 639, 637, 636, 634, 632, 631, 629, 627, 626, 624, 622,
621, 619, 617, 616, 614, 612, 611, 609, 607, 606, 604, 602, 601,
599, 598, 596, 594, 593, 591, 590, 588, 586, 585, 583, 582, 580,
578, 577, 575, 574, 572, 571, 569, 567, 566, 564, 563, 561, 560,
558, 557, 555, 554, 552, 550, 549, 547, 546, 544, 543, 541, 540,
538, 537, 535, 534, 532, 531, 530, 528, 527, 525, 524, 522, 521,
519, 518, 516, 515, 513, 512, 511, 509, 508, 506, 505, 503, 502,
501, 499, 498, 496, 495, 493, 492, 491, 489, 488, 486, 485, 484,
482, 481, 480, 478, 477, 475, 474, 473, 471, 470, 469, 467, 466,
465, 463, 462, 460, 459, 458, 456, 455, 454, 452, 451, 450, 448,
447, 446, 444, 443, 442, 441, 439, 438, 437, 435, 434, 433, 431,
430, 429, 428, 426, 425, 424, 422, 421, 420, 419, 417, 416, 415,
414, 412, 411, 410, 409, 407, 406, 405, 404, 402, 401, 400, 399,
397, 396, 395, 394, 392, 391, 390, 389, 387, 386, 385, 384, 383,
381, 380, 379, 378, 377, 375, 374, 373, 372, 371, 369, 368, 367,
366, 365, 364, 362, 361, 360, 359, 358, 356, 355, 354, 353, 352,
351, 349, 348, 347, 346, 345, 344, 343, 341, 340, 339, 338, 337,
336, 335, 333, 332, 331, 330, 329, 328, 327, 326, 324, 323, 322,
321, 320, 319, 318, 317, 316, 314, 313, 312, 311, 310, 309, 308,
307, 306, 305, 303, 302, 301, 300, 299, 298, 297, 296, 295, 294,
293, 292, 291, 289, 288, 287, 286, 285, 284, 283, 282, 281, 280,
279, 278, 277, 276, 275, 274, 273, 272, 271, 269, 268, 267, 266,
265, 264, 263, 262, 261, 260, 259, 258, 257, 256, 255, 254, 253,
252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240,
239, 238, 237, 236, 235, 234, 233, 232, 231, 230, 229, 228, 227,
226, 225, 224, 223, 222, 221, 220, 219, 218, 217, 216, 215, 214,
213, 212, 212, 211, 210, 209, 208, 207, 206, 205, 204, 203, 202,
201, 200, 199, 198, 197, 196, 195, 194, 194, 193, 192, 191, 190,
189, 188, 187, 186, 185, 184, 183, 182, 181, 181, 180, 179, 178,
177, 176, 175, 174, 173, 172, 171, 170, 170, 169, 168, 167, 166,
165, 164, 163, 162, 161, 161, 160, 159, 158, 157, 156, 155, 154,
153, 152, 152, 151, 150, 149, 148, 147, 146, 145, 145, 144, 143,
142, 141, 140, 139, 138, 138, 137, 136, 135, 134, 133, 132, 132,
131, 130, 129, 128, 127, 126, 125, 125, 124, 123, 122, 121, 120,
120, 119, 118, 117, 116, 115, 114, 114, 113, 112, 111, 110, 109,
109, 108, 107, 106, 105, 104, 104, 103, 102, 101, 100, 99, 99,
98, 97, 96, 95, 95, 94, 93, 92, 91, 90, 90, 89, 88,
87, 86, 86, 85, 84, 83, 82, 82, 81, 80, 79, 78, 78,
77, 76, 75, 74, 74, 73, 72, 71, 70, 70, 69, 68, 67,
66, 66, 65, 64, 63, 62, 62, 61, 60, 59, 59, 58, 57,
56, 55, 55, 54, 53, 52, 52, 51, 50, 49, 48, 48, 47,
46, 45, 45, 44, 43, 42, 42, 41, 40, 39, 38, 38, 37,
36, 35, 35, 34, 33, 32, 32, 31, 30, 29, 29, 28, 27,
26, 26, 25, 24, 23, 23, 22, 21, 20, 20, 19, 18, 18,
17, 16, 15, 15, 14, 13, 12, 12, 11, 10, 9, 9, 8,
7, 7, 6, 5, 4, 4, 3, 2, 1, 1
};
#endif // CONFIG_ANS
static void cost(int *costs, vpx_tree tree, const vpx_prob *probs,
int i, int c) {
static void cost(int *costs, vpx_tree tree, const vpx_prob *probs, int i,
int c) {
const vpx_prob prob = probs[i / 2];
int b;
@ -155,8 +148,7 @@ void vp10_cost_tokens_ans(int *costs, const vpx_prob *tree_probs,
int c_tree = 0; // Cost of the "tree" nodes EOB and ZERO.
int i;
costs[EOB_TOKEN] = vp10_cost_bit(tree_probs[0], 0);
if (!skip_eob)
c_tree = vp10_cost_bit(tree_probs[0], 1);
if (!skip_eob) c_tree = vp10_cost_bit(tree_probs[0], 1);
for (i = ZERO_TOKEN; i <= CATEGORY6_TOKEN; ++i) {
const int p = token_cdf[i + 1] - token_cdf[i];
costs[i] = c_tree + vp10_prob_cost10[p];

Просмотреть файл

@ -30,8 +30,7 @@ extern const uint16_t vp10_prob_cost[256];
#define vp10_cost_one(prob) vp10_cost_zero(256 - (prob))
#define vp10_cost_bit(prob, bit) vp10_cost_zero((bit) ? 256 - (prob) \
: (prob))
#define vp10_cost_bit(prob, bit) vp10_cost_zero((bit) ? 256 - (prob) : (prob))
// Cost of coding an n bit literal, using 128 (i.e. 50%) probability
// for each bit.
@ -42,8 +41,8 @@ static INLINE unsigned int cost_branch256(const unsigned int ct[2],
return ct[0] * vp10_cost_zero(p) + ct[1] * vp10_cost_one(p);
}
static INLINE int treed_cost(vpx_tree tree, const vpx_prob *probs,
int bits, int len) {
static INLINE int treed_cost(vpx_tree tree, const vpx_prob *probs, int bits,
int len) {
int cost = 0;
vpx_tree_index i = 0;

Просмотреть файл

@ -777,14 +777,14 @@ static void fadst8(const tran_low_t *input, tran_low_t *output) {
tran_high_t x7 = input[6];
// stage 1
s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
x0 = fdct_round_shift(s0 + s4);
x1 = fdct_round_shift(s1 + s5);
@ -800,10 +800,10 @@ static void fadst8(const tran_low_t *input, tran_low_t *output) {
s1 = x1;
s2 = x2;
s3 = x3;
s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
s6 = - cospi_24_64 * x6 + cospi_8_64 * x7;
s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
x0 = s0 + s2;
x1 = s1 + s3;
@ -857,11 +857,11 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
tran_high_t x15 = input[14];
// stage 1
s0 = x0 * cospi_1_64 + x1 * cospi_31_64;
s0 = x0 * cospi_1_64 + x1 * cospi_31_64;
s1 = x0 * cospi_31_64 - x1 * cospi_1_64;
s2 = x2 * cospi_5_64 + x3 * cospi_27_64;
s2 = x2 * cospi_5_64 + x3 * cospi_27_64;
s3 = x2 * cospi_27_64 - x3 * cospi_5_64;
s4 = x4 * cospi_9_64 + x5 * cospi_23_64;
s4 = x4 * cospi_9_64 + x5 * cospi_23_64;
s5 = x4 * cospi_23_64 - x5 * cospi_9_64;
s6 = x6 * cospi_13_64 + x7 * cospi_19_64;
s7 = x6 * cospi_19_64 - x7 * cospi_13_64;
@ -870,9 +870,9 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
s10 = x10 * cospi_21_64 + x11 * cospi_11_64;
s11 = x10 * cospi_11_64 - x11 * cospi_21_64;
s12 = x12 * cospi_25_64 + x13 * cospi_7_64;
s13 = x12 * cospi_7_64 - x13 * cospi_25_64;
s13 = x12 * cospi_7_64 - x13 * cospi_25_64;
s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
x0 = fdct_round_shift(s0 + s8);
x1 = fdct_round_shift(s1 + s9);
@ -882,8 +882,8 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
x5 = fdct_round_shift(s5 + s13);
x6 = fdct_round_shift(s6 + s14);
x7 = fdct_round_shift(s7 + s15);
x8 = fdct_round_shift(s0 - s8);
x9 = fdct_round_shift(s1 - s9);
x8 = fdct_round_shift(s0 - s8);
x9 = fdct_round_shift(s1 - s9);
x10 = fdct_round_shift(s2 - s10);
x11 = fdct_round_shift(s3 - s11);
x12 = fdct_round_shift(s4 - s12);
@ -900,14 +900,14 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
s5 = x5;
s6 = x6;
s7 = x7;
s8 = x8 * cospi_4_64 + x9 * cospi_28_64;
s9 = x8 * cospi_28_64 - x9 * cospi_4_64;
s10 = x10 * cospi_20_64 + x11 * cospi_12_64;
s11 = x10 * cospi_12_64 - x11 * cospi_20_64;
s12 = - x12 * cospi_28_64 + x13 * cospi_4_64;
s13 = x12 * cospi_4_64 + x13 * cospi_28_64;
s14 = - x14 * cospi_12_64 + x15 * cospi_20_64;
s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
s8 = x8 * cospi_4_64 + x9 * cospi_28_64;
s9 = x8 * cospi_28_64 - x9 * cospi_4_64;
s10 = x10 * cospi_20_64 + x11 * cospi_12_64;
s11 = x10 * cospi_12_64 - x11 * cospi_20_64;
s12 = -x12 * cospi_28_64 + x13 * cospi_4_64;
s13 = x12 * cospi_4_64 + x13 * cospi_28_64;
s14 = -x14 * cospi_12_64 + x15 * cospi_20_64;
s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
x0 = s0 + s4;
x1 = s1 + s5;
@ -931,18 +931,18 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
s1 = x1;
s2 = x2;
s3 = x3;
s4 = x4 * cospi_8_64 + x5 * cospi_24_64;
s4 = x4 * cospi_8_64 + x5 * cospi_24_64;
s5 = x4 * cospi_24_64 - x5 * cospi_8_64;
s6 = - x6 * cospi_24_64 + x7 * cospi_8_64;
s7 = x6 * cospi_8_64 + x7 * cospi_24_64;
s6 = -x6 * cospi_24_64 + x7 * cospi_8_64;
s7 = x6 * cospi_8_64 + x7 * cospi_24_64;
s8 = x8;
s9 = x9;
s10 = x10;
s11 = x11;
s12 = x12 * cospi_8_64 + x13 * cospi_24_64;
s12 = x12 * cospi_8_64 + x13 * cospi_24_64;
s13 = x12 * cospi_24_64 - x13 * cospi_8_64;
s14 = - x14 * cospi_24_64 + x15 * cospi_8_64;
s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
s14 = -x14 * cospi_24_64 + x15 * cospi_8_64;
s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
x0 = s0 + s2;
x1 = s1 + s3;
@ -962,13 +962,13 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
x15 = fdct_round_shift(s13 - s15);
// stage 4
s2 = (- cospi_16_64) * (x2 + x3);
s2 = (-cospi_16_64) * (x2 + x3);
s3 = cospi_16_64 * (x2 - x3);
s6 = cospi_16_64 * (x6 + x7);
s7 = cospi_16_64 * (- x6 + x7);
s7 = cospi_16_64 * (-x6 + x7);
s10 = cospi_16_64 * (x10 + x11);
s11 = cospi_16_64 * (- x10 + x11);
s14 = (- cospi_16_64) * (x14 + x15);
s11 = cospi_16_64 * (-x10 + x11);
s14 = (-cospi_16_64) * (x14 + x15);
s15 = cospi_16_64 * (x14 - x15);
x2 = fdct_round_shift(s2);
@ -1007,8 +1007,7 @@ static void fidtx4(const tran_low_t *input, tran_low_t *output) {
static void fidtx8(const tran_low_t *input, tran_low_t *output) {
int i;
for (i = 0; i < 8; ++i)
output[i] = input[i] * 2;
for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
}
static void fidtx16(const tran_low_t *input, tran_low_t *output) {
@ -1019,8 +1018,7 @@ static void fidtx16(const tran_low_t *input, tran_low_t *output) {
static void fidtx32(const tran_low_t *input, tran_low_t *output) {
int i;
for (i = 0; i < 32; ++i)
output[i] = input[i] * 4;
for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
}
// For use in lieu of ADST
@ -1042,8 +1040,7 @@ static void copy_block(const int16_t *src, int src_stride, int l, int w,
int16_t *dest, int dest_stride) {
int i;
for (i = 0; i < l; ++i) {
memcpy(dest + dest_stride * i, src + src_stride * i,
w * sizeof(int16_t));
memcpy(dest + dest_stride * i, src + src_stride * i, w * sizeof(int16_t));
}
}
@ -1080,29 +1077,25 @@ static void fliplrud(int16_t *dest, int stride, int l, int w) {
}
}
static void copy_fliplr(const int16_t *src, int src_stride,
int l, int w,
static void copy_fliplr(const int16_t *src, int src_stride, int l, int w,
int16_t *dest, int dest_stride) {
copy_block(src, src_stride, l, w, dest, dest_stride);
fliplr(dest, dest_stride, l, w);
}
static void copy_flipud(const int16_t *src, int src_stride,
int l, int w,
static void copy_flipud(const int16_t *src, int src_stride, int l, int w,
int16_t *dest, int dest_stride) {
copy_block(src, src_stride, l, w, dest, dest_stride);
flipud(dest, dest_stride, l, w);
}
static void copy_fliplrud(const int16_t *src, int src_stride,
int l, int w,
static void copy_fliplrud(const int16_t *src, int src_stride, int l, int w,
int16_t *dest, int dest_stride) {
copy_block(src, src_stride, l, w, dest, dest_stride);
fliplrud(dest, dest_stride, l, w);
}
static void maybe_flip_input(const int16_t **src, int *src_stride,
int l, int w,
static void maybe_flip_input(const int16_t **src, int *src_stride, int l, int w,
int16_t *buff, int tx_type) {
switch (tx_type) {
case DCT_DCT:
@ -1113,8 +1106,7 @@ static void maybe_flip_input(const int16_t **src, int *src_stride,
case V_DCT:
case H_DCT:
case V_ADST:
case H_ADST:
break;
case H_ADST: break;
case FLIPADST_DCT:
case FLIPADST_ADST:
case V_FLIPADST:
@ -1134,109 +1126,107 @@ static void maybe_flip_input(const int16_t **src, int *src_stride,
*src = buff;
*src_stride = w;
break;
default:
assert(0);
break;
default: assert(0); break;
}
}
#endif // CONFIG_EXT_TX
static const transform_2d FHT_4[] = {
{ fdct4, fdct4 }, // DCT_DCT
{ fadst4, fdct4 }, // ADST_DCT
{ fdct4, fadst4 }, // DCT_ADST
{ fdct4, fdct4 }, // DCT_DCT
{ fadst4, fdct4 }, // ADST_DCT
{ fdct4, fadst4 }, // DCT_ADST
{ fadst4, fadst4 }, // ADST_ADST
#if CONFIG_EXT_TX
{ fadst4, fdct4 }, // FLIPADST_DCT
{ fdct4, fadst4 }, // DCT_FLIPADST
{ fadst4, fdct4 }, // FLIPADST_DCT
{ fdct4, fadst4 }, // DCT_FLIPADST
{ fadst4, fadst4 }, // FLIPADST_FLIPADST
{ fadst4, fadst4 }, // ADST_FLIPADST
{ fadst4, fadst4 }, // FLIPADST_ADST
{ fidtx4, fidtx4 }, // IDTX
{ fdct4, fidtx4 }, // V_DCT
{ fidtx4, fdct4 }, // H_DCT
{ fdct4, fidtx4 }, // V_DCT
{ fidtx4, fdct4 }, // H_DCT
{ fadst4, fidtx4 }, // V_ADST
{ fidtx4, fadst4 }, // H_ADST
{ fadst4, fidtx4 }, // V_FLIPADST
{ fidtx4, fadst4 }, // H_FLIPADST
#endif // CONFIG_EXT_TX
#endif // CONFIG_EXT_TX
};
static const transform_2d FHT_8[] = {
{ fdct8, fdct8 }, // DCT_DCT
{ fadst8, fdct8 }, // ADST_DCT
{ fdct8, fadst8 }, // DCT_ADST
{ fdct8, fdct8 }, // DCT_DCT
{ fadst8, fdct8 }, // ADST_DCT
{ fdct8, fadst8 }, // DCT_ADST
{ fadst8, fadst8 }, // ADST_ADST
#if CONFIG_EXT_TX
{ fadst8, fdct8 }, // FLIPADST_DCT
{ fdct8, fadst8 }, // DCT_FLIPADST
{ fadst8, fdct8 }, // FLIPADST_DCT
{ fdct8, fadst8 }, // DCT_FLIPADST
{ fadst8, fadst8 }, // FLIPADST_FLIPADST
{ fadst8, fadst8 }, // ADST_FLIPADST
{ fadst8, fadst8 }, // FLIPADST_ADST
{ fidtx8, fidtx8 }, // IDTX
{ fdct8, fidtx8 }, // V_DCT
{ fidtx8, fdct8 }, // H_DCT
{ fdct8, fidtx8 }, // V_DCT
{ fidtx8, fdct8 }, // H_DCT
{ fadst8, fidtx8 }, // V_ADST
{ fidtx8, fadst8 }, // H_ADST
{ fadst8, fidtx8 }, // V_FLIPADST
{ fidtx8, fadst8 }, // H_FLIPADST
#endif // CONFIG_EXT_TX
#endif // CONFIG_EXT_TX
};
static const transform_2d FHT_16[] = {
{ fdct16, fdct16 }, // DCT_DCT
{ fadst16, fdct16 }, // ADST_DCT
{ fdct16, fadst16 }, // DCT_ADST
{ fdct16, fdct16 }, // DCT_DCT
{ fadst16, fdct16 }, // ADST_DCT
{ fdct16, fadst16 }, // DCT_ADST
{ fadst16, fadst16 }, // ADST_ADST
#if CONFIG_EXT_TX
{ fadst16, fdct16 }, // FLIPADST_DCT
{ fdct16, fadst16 }, // DCT_FLIPADST
{ fadst16, fdct16 }, // FLIPADST_DCT
{ fdct16, fadst16 }, // DCT_FLIPADST
{ fadst16, fadst16 }, // FLIPADST_FLIPADST
{ fadst16, fadst16 }, // ADST_FLIPADST
{ fadst16, fadst16 }, // FLIPADST_ADST
{ fidtx16, fidtx16 }, // IDTX
{ fdct16, fidtx16 }, // V_DCT
{ fidtx16, fdct16 }, // H_DCT
{ fdct16, fidtx16 }, // V_DCT
{ fidtx16, fdct16 }, // H_DCT
{ fadst16, fidtx16 }, // V_ADST
{ fidtx16, fadst16 }, // H_ADST
{ fadst16, fidtx16 }, // V_FLIPADST
{ fidtx16, fadst16 }, // H_FLIPADST
#endif // CONFIG_EXT_TX
#endif // CONFIG_EXT_TX
};
#if CONFIG_EXT_TX
static const transform_2d FHT_32[] = {
{ fdct32, fdct32 }, // DCT_DCT
{ fhalfright32, fdct32 }, // ADST_DCT
{ fdct32, fhalfright32 }, // DCT_ADST
{ fhalfright32, fhalfright32 }, // ADST_ADST
{ fhalfright32, fdct32 }, // FLIPADST_DCT
{ fdct32, fhalfright32 }, // DCT_FLIPADST
{ fhalfright32, fhalfright32 }, // FLIPADST_FLIPADST
{ fhalfright32, fhalfright32 }, // ADST_FLIPADST
{ fhalfright32, fhalfright32 }, // FLIPADST_ADST
{ fidtx32, fidtx32 }, // IDTX
{ fdct32, fidtx32 }, // V_DCT
{ fidtx32, fdct32 }, // H_DCT
{ fhalfright32, fidtx32 }, // V_ADST
{ fidtx32, fhalfright32 }, // H_ADST
{ fhalfright32, fidtx32 }, // V_FLIPADST
{ fidtx32, fhalfright32 }, // H_FLIPADST
{ fdct32, fdct32 }, // DCT_DCT
{ fhalfright32, fdct32 }, // ADST_DCT
{ fdct32, fhalfright32 }, // DCT_ADST
{ fhalfright32, fhalfright32 }, // ADST_ADST
{ fhalfright32, fdct32 }, // FLIPADST_DCT
{ fdct32, fhalfright32 }, // DCT_FLIPADST
{ fhalfright32, fhalfright32 }, // FLIPADST_FLIPADST
{ fhalfright32, fhalfright32 }, // ADST_FLIPADST
{ fhalfright32, fhalfright32 }, // FLIPADST_ADST
{ fidtx32, fidtx32 }, // IDTX
{ fdct32, fidtx32 }, // V_DCT
{ fidtx32, fdct32 }, // H_DCT
{ fhalfright32, fidtx32 }, // V_ADST
{ fidtx32, fhalfright32 }, // H_ADST
{ fhalfright32, fidtx32 }, // V_FLIPADST
{ fidtx32, fhalfright32 }, // H_FLIPADST
};
static const transform_2d FHT_4x8[] = {
{ fdct8, fdct4 }, // DCT_DCT
{ fadst8, fdct4 }, // ADST_DCT
{ fdct8, fadst4 }, // DCT_ADST
{ fdct8, fdct4 }, // DCT_DCT
{ fadst8, fdct4 }, // ADST_DCT
{ fdct8, fadst4 }, // DCT_ADST
{ fadst8, fadst4 }, // ADST_ADST
{ fadst8, fdct4 }, // FLIPADST_DCT
{ fdct8, fadst4 }, // DCT_FLIPADST
{ fadst8, fdct4 }, // FLIPADST_DCT
{ fdct8, fadst4 }, // DCT_FLIPADST
{ fadst8, fadst4 }, // FLIPADST_FLIPADST
{ fadst8, fadst4 }, // ADST_FLIPADST
{ fadst8, fadst4 }, // FLIPADST_ADST
{ fidtx8, fidtx4 }, // IDTX
{ fdct8, fidtx4 }, // V_DCT
{ fidtx8, fdct4 }, // H_DCT
{ fdct8, fidtx4 }, // V_DCT
{ fidtx8, fdct4 }, // H_DCT
{ fadst8, fidtx4 }, // V_ADST
{ fidtx8, fadst4 }, // H_ADST
{ fadst8, fidtx4 }, // V_FLIPADST
@ -1244,18 +1234,18 @@ static const transform_2d FHT_4x8[] = {
};
static const transform_2d FHT_8x4[] = {
{ fdct4, fdct8 }, // DCT_DCT
{ fadst4, fdct8 }, // ADST_DCT
{ fdct4, fadst8 }, // DCT_ADST
{ fdct4, fdct8 }, // DCT_DCT
{ fadst4, fdct8 }, // ADST_DCT
{ fdct4, fadst8 }, // DCT_ADST
{ fadst4, fadst8 }, // ADST_ADST
{ fadst4, fdct8 }, // FLIPADST_DCT
{ fdct4, fadst8 }, // DCT_FLIPADST
{ fadst4, fdct8 }, // FLIPADST_DCT
{ fdct4, fadst8 }, // DCT_FLIPADST
{ fadst4, fadst8 }, // FLIPADST_FLIPADST
{ fadst4, fadst8 }, // ADST_FLIPADST
{ fadst4, fadst8 }, // FLIPADST_ADST
{ fidtx4, fidtx8 }, // IDTX
{ fdct4, fidtx8 }, // V_DCT
{ fidtx4, fdct8 }, // H_DCT
{ fdct4, fidtx8 }, // V_DCT
{ fidtx4, fdct8 }, // H_DCT
{ fadst4, fidtx8 }, // V_ADST
{ fidtx4, fadst8 }, // H_ADST
{ fadst4, fidtx8 }, // V_FLIPADST
@ -1263,8 +1253,8 @@ static const transform_2d FHT_8x4[] = {
};
#endif // CONFIG_EXT_TX
void vp10_fht4x4_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp10_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
if (tx_type == DCT_DCT) {
vpx_fdct4x4_c(input, output, stride);
} else {
@ -1280,29 +1270,24 @@ void vp10_fht4x4_c(const int16_t *input, tran_low_t *output,
// Columns
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
temp_in[j] = input[j * stride + i] * 16;
if (i == 0 && temp_in[0])
temp_in[0] += 1;
for (j = 0; j < 4; ++j) temp_in[j] = input[j * stride + i] * 16;
if (i == 0 && temp_in[0]) temp_in[0] += 1;
ht.cols(temp_in, temp_out);
for (j = 0; j < 4; ++j)
out[j * 4 + i] = temp_out[j];
for (j = 0; j < 4; ++j) out[j * 4 + i] = temp_out[j];
}
// Rows
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
temp_in[j] = out[j + i * 4];
for (j = 0; j < 4; ++j) temp_in[j] = out[j + i * 4];
ht.rows(temp_in, temp_out);
for (j = 0; j < 4; ++j)
output[j + i * 4] = (temp_out[j] + 1) >> 2;
for (j = 0; j < 4; ++j) output[j + i * 4] = (temp_out[j] + 1) >> 2;
}
}
}
#if CONFIG_EXT_TX
void vp10_fht4x8_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp10_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
const int n = 4;
const int n2 = 8;
tran_low_t out[8 * 4];
@ -1314,8 +1299,7 @@ void vp10_fht4x8_c(const int16_t *input, tran_low_t *output,
// Columns
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j)
temp_in[j] = input[j * stride + i] * 8;
for (j = 0; j < n2; ++j) temp_in[j] = input[j * stride + i] * 8;
ht.cols(temp_in, temp_out);
for (j = 0; j < n2; ++j)
out[j * n + i] = (tran_low_t)fdct_round_shift(temp_out[j] * Sqrt2);
@ -1323,17 +1307,15 @@ void vp10_fht4x8_c(const int16_t *input, tran_low_t *output,
// Rows
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j)
temp_in[j] = out[j + i * n];
for (j = 0; j < n; ++j) temp_in[j] = out[j + i * n];
ht.rows(temp_in, temp_out);
for (j = 0; j < n; ++j)
output[j + i * n] = (temp_out[j] + 1) >> 2;
for (j = 0; j < n; ++j) output[j + i * n] = (temp_out[j] + 1) >> 2;
}
// Note: overall scale factor of transform is 8 times unitary
}
void vp10_fht8x4_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp10_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
const int n = 4;
const int n2 = 8;
tran_low_t out[8 * 4];
@ -1345,8 +1327,7 @@ void vp10_fht8x4_c(const int16_t *input, tran_low_t *output,
// Columns
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j)
temp_in[j] = input[j * stride + i] * 8;
for (j = 0; j < n; ++j) temp_in[j] = input[j * stride + i] * 8;
ht.cols(temp_in, temp_out);
for (j = 0; j < n; ++j)
out[j * n2 + i] = (tran_low_t)fdct_round_shift(temp_out[j] * Sqrt2);
@ -1354,11 +1335,9 @@ void vp10_fht8x4_c(const int16_t *input, tran_low_t *output,
// Rows
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j)
temp_in[j] = out[j + i * n2];
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
ht.rows(temp_in, temp_out);
for (j = 0; j < n2; ++j)
output[j + i * n2] = (temp_out[j] + 1) >> 2;
for (j = 0; j < n2; ++j) output[j + i * n2] = (temp_out[j] + 1) >> 2;
}
// Note: overall scale factor of transform is 8 times unitary
}
@ -1366,13 +1345,11 @@ void vp10_fht8x4_c(const int16_t *input, tran_low_t *output,
void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
uint16_t *eob_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
int eob = -1;
@ -1405,8 +1382,8 @@ void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
x3 = s0 - s3;
t0 = (x0 + x1) * cospi_16_64;
t1 = (x0 - x1) * cospi_16_64;
t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
output[0 * 8] = (tran_low_t)fdct_round_shift(t0);
output[2 * 8] = (tran_low_t)fdct_round_shift(t2);
output[4 * 8] = (tran_low_t)fdct_round_shift(t1);
@ -1425,10 +1402,10 @@ void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
x3 = s7 + t3;
// stage 4
t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
output[1 * 8] = (tran_low_t)fdct_round_shift(t0);
output[3 * 8] = (tran_low_t)fdct_round_shift(t2);
output[5 * 8] = (tran_low_t)fdct_round_shift(t1);
@ -1441,8 +1418,7 @@ void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
// Rows
for (i = 0; i < 8; ++i) {
fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]);
for (j = 0; j < 8; ++j)
coeff_ptr[j + i * 8] /= 2;
for (j = 0; j < 8; ++j) coeff_ptr[j + i * 8] /= 2;
}
// TODO(jingning) Decide the need of these arguments after the
@ -1469,15 +1445,14 @@ void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
if (tmp)
eob = i;
if (tmp) eob = i;
}
}
*eob_ptr = eob + 1;
}
void vp10_fht8x8_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp10_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
if (tx_type == DCT_DCT) {
vpx_fdct8x8_c(input, output, stride);
} else {
@ -1493,17 +1468,14 @@ void vp10_fht8x8_c(const int16_t *input, tran_low_t *output,
// Columns
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j)
temp_in[j] = input[j * stride + i] * 4;
for (j = 0; j < 8; ++j) temp_in[j] = input[j * stride + i] * 4;
ht.cols(temp_in, temp_out);
for (j = 0; j < 8; ++j)
out[j * 8 + i] = temp_out[j];
for (j = 0; j < 8; ++j) out[j * 8 + i] = temp_out[j];
}
// Rows
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j)
temp_in[j] = out[j + i * 8];
for (j = 0; j < 8; ++j) temp_in[j] = out[j + i * 8];
ht.rows(temp_in, temp_out);
for (j = 0; j < 8; ++j)
output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
@ -1567,8 +1539,8 @@ void vp10_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
}
}
void vp10_fht16x16_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp10_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
if (tx_type == DCT_DCT) {
vpx_fdct16x16_c(input, output, stride);
} else {
@ -1584,8 +1556,7 @@ void vp10_fht16x16_c(const int16_t *input, tran_low_t *output,
// Columns
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
temp_in[j] = input[j * stride + i] * 4;
for (j = 0; j < 16; ++j) temp_in[j] = input[j * stride + i] * 4;
ht.cols(temp_in, temp_out);
for (j = 0; j < 16; ++j)
out[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
@ -1593,35 +1564,33 @@ void vp10_fht16x16_c(const int16_t *input, tran_low_t *output,
// Rows
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
temp_in[j] = out[j + i * 16];
for (j = 0; j < 16; ++j) temp_in[j] = out[j + i * 16];
ht.rows(temp_in, temp_out);
for (j = 0; j < 16; ++j)
output[j + i * 16] = temp_out[j];
for (j = 0; j < 16; ++j) output[j + i * 16] = temp_out[j];
}
}
}
#if CONFIG_VP9_HIGHBITDEPTH
void vp10_highbd_fht4x4_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp10_highbd_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
vp10_fht4x4_c(input, output, stride, tx_type);
}
#if CONFIG_EXT_TX
void vp10_highbd_fht8x4_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp10_highbd_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
vp10_fht8x4_c(input, output, stride, tx_type);
}
void vp10_highbd_fht4x8_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp10_highbd_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
vp10_fht4x8_c(input, output, stride, tx_type);
}
#endif // CONFIG_EXT_TX
void vp10_highbd_fht8x8_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp10_highbd_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
vp10_fht8x8_c(input, output, stride, tx_type);
}
@ -1631,14 +1600,14 @@ void vp10_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output,
}
void vp10_highbd_fht16x16_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
int stride, int tx_type) {
vp10_fht16x16_c(input, output, stride, tx_type);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_EXT_TX
void vp10_fht32x32_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp10_fht32x32_c(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
if (tx_type == DCT_DCT) {
vpx_fdct32x32_c(input, output, stride);
} else {
@ -1652,8 +1621,7 @@ void vp10_fht32x32_c(const int16_t *input, tran_low_t *output,
// Columns
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j)
temp_in[j] = input[j * stride + i] * 4;
for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4;
ht.cols(temp_in, temp_out);
for (j = 0; j < 32; ++j)
out[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
@ -1661,8 +1629,7 @@ void vp10_fht32x32_c(const int16_t *input, tran_low_t *output,
// Rows
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j)
temp_in[j] = out[j + i * 32];
for (j = 0; j < 32; ++j) temp_in[j] = out[j + i * 32];
ht.rows(temp_in, temp_out);
for (j = 0; j < 32; ++j)
output[j + i * 32] =
@ -1672,8 +1639,7 @@ void vp10_fht32x32_c(const int16_t *input, tran_low_t *output,
}
// Forward identity transform.
void vp10_fwd_idtx_c(const int16_t *src_diff,
tran_low_t *coeff, int stride,
void vp10_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride,
int bs, int tx_type) {
int r, c;
const int shift = bs < 32 ? 3 : 2;

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_ENCODER_ENCODEFRAME_H_
#define VP10_ENCODER_ENCODEFRAME_H_
@ -31,14 +30,14 @@ struct ThreadData;
#define VAR_HIST_SMALL_CUT_OFF 45
void vp10_setup_src_planes(struct macroblock *x,
const struct yv12_buffer_config *src,
int mi_row, int mi_col);
const struct yv12_buffer_config *src, int mi_row,
int mi_col);
void vp10_encode_frame(struct VP10_COMP *cpi);
void vp10_init_tile_data(struct VP10_COMP *cpi);
void vp10_encode_tile(struct VP10_COMP *cpi, struct ThreadData *td,
int tile_row, int tile_col);
int tile_row, int tile_col);
void vp10_set_variance_partition_thresholds(struct VP10_COMP *cpi, int q);

Просмотреть файл

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp10_rtcd.h"
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
@ -48,27 +47,27 @@ void vp10_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
}
typedef struct vp10_token_state {
int rate;
int64_t error;
int next;
int16_t token;
tran_low_t qc;
tran_low_t dqc;
int rate;
int64_t error;
int next;
int16_t token;
tran_low_t qc;
tran_low_t dqc;
} vp10_token_state;
// These numbers are empirically obtained.
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
{10, 6}, {8, 5},
{ 10, 6 }, { 8, 5 },
};
#define UPDATE_RD_COST()\
{\
rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
}
#define UPDATE_RD_COST() \
{ \
rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); \
rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); \
}
int vp10_optimize_b(MACROBLOCK *mb, int plane, int block,
TX_SIZE tx_size, int ctx) {
int vp10_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
int ctx) {
MACROBLOCKD *const xd = &mb->e_mbd;
struct macroblock_plane *const p = &mb->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
@ -82,13 +81,13 @@ int vp10_optimize_b(MACROBLOCK *mb, int plane, int block,
const int eob = p->eobs[block];
const PLANE_TYPE type = pd->plane_type;
const int default_eob = get_tx2d_size(tx_size);
const int16_t* const dequant_ptr = pd->dequant;
const uint8_t* const band_translate = get_band_translate(tx_size);
const int16_t *const dequant_ptr = pd->dequant;
const uint8_t *const band_translate = get_band_translate(tx_size);
TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
const scan_order* const so =
const scan_order *const so =
get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
const int16_t* const scan = so->scan;
const int16_t* const nb = so->neighbors;
const int16_t *const scan = so->scan;
const int16_t *const nb = so->neighbors;
const int shift = get_tx_scale(xd, tx_type, tx_size);
#if CONFIG_NEW_QUANT
int dq = get_dq_profile_from_ctx(ctx);
@ -103,16 +102,16 @@ int vp10_optimize_b(MACROBLOCK *mb, int plane, int block,
int rate0, rate1;
int64_t error0, error1;
int16_t t0, t1;
int best, band = (eob < default_eob) ?
band_translate[eob] : band_translate[eob - 1];
int best, band = (eob < default_eob) ? band_translate[eob]
: band_translate[eob - 1];
int pt, i, final_eob;
#if CONFIG_VP9_HIGHBITDEPTH
const int *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
#else
const int *cat6_high_cost = vp10_get_high_cost_table(8);
#endif
unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
mb->token_costs[txsize_sqr_map[tx_size]][type][ref];
unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
mb->token_costs[txsize_sqr_map[tx_size]][type][ref];
const uint16_t *band_counts = &band_count_table[tx_size][band];
uint16_t band_left = eob - band_cum_count_table[tx_size][band] + 1;
int shortcut = 0;
@ -195,21 +194,20 @@ int vp10_optimize_b(MACROBLOCK *mb, int plane, int block,
shortcut = 0;
} else {
#if CONFIG_NEW_QUANT
shortcut = (
(vp10_dequant_abscoeff_nuq(
abs(x), dequant_ptr[rc != 0],
dequant_val[band_translate[i]]) > (abs(coeff[rc]) << shift)) &&
(vp10_dequant_abscoeff_nuq(
abs(x) - 1, dequant_ptr[rc != 0],
dequant_val[band_translate[i]]) < (abs(coeff[rc]) << shift)));
shortcut = ((vp10_dequant_abscoeff_nuq(abs(x), dequant_ptr[rc != 0],
dequant_val[band_translate[i]]) >
(abs(coeff[rc]) << shift)) &&
(vp10_dequant_abscoeff_nuq(abs(x) - 1, dequant_ptr[rc != 0],
dequant_val[band_translate[i]]) <
(abs(coeff[rc]) << shift)));
#else // CONFIG_NEW_QUANT
if ((abs(x) * dequant_ptr[rc != 0] > (abs(coeff[rc]) << shift)) &&
(abs(x) * dequant_ptr[rc != 0] < (abs(coeff[rc]) << shift) +
dequant_ptr[rc != 0]))
(abs(x) * dequant_ptr[rc != 0] <
(abs(coeff[rc]) << shift) + dequant_ptr[rc != 0]))
shortcut = 1;
else
shortcut = 0;
#endif // CONFIG_NEW_QUANT
#endif // CONFIG_NEW_QUANT
}
if (shortcut) {
@ -269,9 +267,9 @@ int vp10_optimize_b(MACROBLOCK *mb, int plane, int block,
}
#if CONFIG_NEW_QUANT
dx = vp10_dequant_coeff_nuq(
x, dequant_ptr[rc != 0],
dequant_val[band_translate[i]]) - (coeff[rc] << shift);
dx = vp10_dequant_coeff_nuq(x, dequant_ptr[rc != 0],
dequant_val[band_translate[i]]) -
(coeff[rc] << shift);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
dx >>= xd->bd - 8;
@ -300,18 +298,16 @@ int vp10_optimize_b(MACROBLOCK *mb, int plane, int block,
#if CONFIG_NEW_QUANT
tokens[i][1].dqc = vp10_dequant_abscoeff_nuq(
abs(x), dequant_ptr[rc != 0], dequant_val[band_translate[i]]);
tokens[i][1].dqc = shift ?
ROUND_POWER_OF_TWO(tokens[i][1].dqc, shift) : tokens[i][1].dqc;
if (sz)
tokens[i][1].dqc = -tokens[i][1].dqc;
tokens[i][1].dqc = shift ? ROUND_POWER_OF_TWO(tokens[i][1].dqc, shift)
: tokens[i][1].dqc;
if (sz) tokens[i][1].dqc = -tokens[i][1].dqc;
#else
tran_low_t offset = dq_step[rc != 0];
// The 32x32 transform coefficient uses half quantization step size.
// Account for the rounding difference in the dequantized coefficeint
// value when the quantization index is dropped from an even number
// to an odd number.
if (shift & x)
offset += (dequant_ptr[rc != 0] & 0x01);
if (shift & x) offset += (dequant_ptr[rc != 0] & 0x01);
if (sz == 0)
tokens[i][1].dqc = dqcoeff[rc] - offset;
@ -394,10 +390,11 @@ typedef enum QUANT_FUNC {
static VP10_QUANT_FACADE
quant_func_list[VP10_XFORM_QUANT_LAST][QUANT_FUNC_LAST] = {
{vp10_quantize_fp_facade, vp10_highbd_quantize_fp_facade},
{vp10_quantize_b_facade, vp10_highbd_quantize_b_facade},
{vp10_quantize_dc_facade, vp10_highbd_quantize_dc_facade},
{NULL, NULL}};
{ vp10_quantize_fp_facade, vp10_highbd_quantize_fp_facade },
{ vp10_quantize_b_facade, vp10_highbd_quantize_b_facade },
{ vp10_quantize_dc_facade, vp10_highbd_quantize_dc_facade },
{ NULL, NULL }
};
#else
typedef enum QUANT_FUNC {
@ -407,15 +404,16 @@ typedef enum QUANT_FUNC {
static VP10_QUANT_FACADE
quant_func_list[VP10_XFORM_QUANT_LAST][QUANT_FUNC_LAST] = {
{vp10_quantize_fp_facade},
{vp10_quantize_b_facade},
{vp10_quantize_dc_facade},
{NULL}};
{ vp10_quantize_fp_facade },
{ vp10_quantize_b_facade },
{ vp10_quantize_dc_facade },
{ NULL }
};
#endif
static FWD_TXFM_OPT fwd_txfm_opt_list[VP10_XFORM_QUANT_LAST] = {
FWD_TXFM_OPT_NORMAL, FWD_TXFM_OPT_NORMAL, FWD_TXFM_OPT_DC,
FWD_TXFM_OPT_NORMAL};
FWD_TXFM_OPT_NORMAL, FWD_TXFM_OPT_NORMAL, FWD_TXFM_OPT_DC, FWD_TXFM_OPT_NORMAL
};
void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row,
int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
@ -454,8 +452,7 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row,
if (xform_quant_idx != VP10_XFORM_QUANT_SKIP_QUANT) {
if (LIKELY(!x->skip_block)) {
quant_func_list[xform_quant_idx][QUANT_FUNC_HIGHBD](
coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob,
scan_order, &qparam);
coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam);
} else {
vp10_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
}
@ -468,8 +465,7 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row,
if (xform_quant_idx != VP10_XFORM_QUANT_SKIP_QUANT) {
if (LIKELY(!x->skip_block)) {
quant_func_list[xform_quant_idx][QUANT_FUNC_LOWBD](
coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob,
scan_order, &qparam);
coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam);
} else {
vp10_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
}
@ -478,8 +474,8 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row,
#if CONFIG_NEW_QUANT
void vp10_xform_quant_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
int blk_col, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, int ctx) {
int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
int ctx) {
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
@ -494,7 +490,7 @@ void vp10_xform_quant_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
uint16_t *const eob = &p->eobs[block];
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
const int16_t *src_diff;
const uint8_t* band = get_band_translate(tx_size);
const uint8_t *band = get_band_translate(tx_size);
FWD_TXFM_PARAM fwd_txfm_param;
@ -513,22 +509,18 @@ void vp10_xform_quant_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
if (tx_size == TX_32X32) {
highbd_quantize_32x32_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
p->quant, p->quant_shift, pd->dequant,
(const cuml_bins_type_nuq *)
p->cuml_bins_nuq[dq],
(const dequant_val_type_nuq *)
pd->dequant_val_nuq[dq],
qcoeff, dqcoeff, eob,
scan_order->scan, band);
highbd_quantize_32x32_nuq(
coeff, get_tx2d_size(tx_size), x->skip_block, p->quant,
p->quant_shift, pd->dequant,
(const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
(const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff,
dqcoeff, eob, scan_order->scan, band);
} else {
highbd_quantize_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
p->quant, p->quant_shift, pd->dequant,
(const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
(const dequant_val_type_nuq *)
pd->dequant_val_nuq[dq],
qcoeff, dqcoeff, eob,
scan_order->scan, band);
(const dequant_val_type_nuq *)pd->dequant_val_nuq[dq],
qcoeff, dqcoeff, eob, scan_order->scan, band);
}
return;
}
@ -536,20 +528,17 @@ void vp10_xform_quant_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
if (tx_size == TX_32X32) {
quantize_32x32_nuq(coeff, 1024, x->skip_block,
p->quant, p->quant_shift, pd->dequant,
quantize_32x32_nuq(coeff, 1024, x->skip_block, p->quant, p->quant_shift,
pd->dequant,
(const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
(const dequant_val_type_nuq *)
pd->dequant_val_nuq[dq],
qcoeff, dqcoeff, eob,
scan_order->scan, band);
(const dequant_val_type_nuq *)pd->dequant_val_nuq[dq],
qcoeff, dqcoeff, eob, scan_order->scan, band);
} else {
quantize_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
p->quant, p->quant_shift, pd->dequant,
quantize_nuq(coeff, get_tx2d_size(tx_size), x->skip_block, p->quant,
p->quant_shift, pd->dequant,
(const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
(const dequant_val_type_nuq *)pd->dequant_val_nuq[dq],
qcoeff, dqcoeff, eob,
scan_order->scan, band);
(const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff,
dqcoeff, eob, scan_order->scan, band);
}
}
@ -570,7 +559,7 @@ void vp10_xform_quant_fp_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
uint16_t *const eob = &p->eobs[block];
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
const int16_t *src_diff;
const uint8_t* band = get_band_translate(tx_size);
const uint8_t *band = get_band_translate(tx_size);
FWD_TXFM_PARAM fwd_txfm_param;
@ -589,23 +578,17 @@ void vp10_xform_quant_fp_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
if (tx_size == TX_32X32) {
highbd_quantize_32x32_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
p->quant_fp, pd->dequant,
(const cuml_bins_type_nuq *)
p->cuml_bins_nuq[dq],
(const dequant_val_type_nuq *)
pd->dequant_val_nuq[dq],
qcoeff, dqcoeff, eob,
scan_order->scan, band);
highbd_quantize_32x32_fp_nuq(
coeff, get_tx2d_size(tx_size), x->skip_block, p->quant_fp,
pd->dequant, (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
(const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff,
dqcoeff, eob, scan_order->scan, band);
} else {
highbd_quantize_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
p->quant_fp, pd->dequant,
(const cuml_bins_type_nuq *)
p->cuml_bins_nuq[dq],
(const dequant_val_type_nuq *)
pd->dequant_val_nuq[dq],
qcoeff, dqcoeff, eob,
scan_order->scan, band);
highbd_quantize_fp_nuq(
coeff, get_tx2d_size(tx_size), x->skip_block, p->quant_fp,
pd->dequant, (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
(const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff,
dqcoeff, eob, scan_order->scan, band);
}
return;
}
@ -615,21 +598,15 @@ void vp10_xform_quant_fp_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
if (tx_size == TX_32X32) {
quantize_32x32_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
p->quant_fp, pd->dequant,
(const cuml_bins_type_nuq *)
p->cuml_bins_nuq[dq],
(const dequant_val_type_nuq *)
pd->dequant_val_nuq[dq],
qcoeff, dqcoeff, eob,
scan_order->scan, band);
(const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
(const dequant_val_type_nuq *)pd->dequant_val_nuq[dq],
qcoeff, dqcoeff, eob, scan_order->scan, band);
} else {
quantize_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
p->quant_fp, pd->dequant,
(const cuml_bins_type_nuq *)
p->cuml_bins_nuq[dq],
(const dequant_val_type_nuq *)
pd->dequant_val_nuq[dq],
qcoeff, dqcoeff, eob,
scan_order->scan, band);
quantize_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block, p->quant_fp,
pd->dequant,
(const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
(const dequant_val_type_nuq *)pd->dequant_val_nuq[dq],
qcoeff, dqcoeff, eob, scan_order->scan, band);
}
}
@ -666,19 +643,15 @@ void vp10_xform_quant_dc_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
if (tx_size == TX_32X32) {
highbd_quantize_dc_32x32_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
p->quant[0], p->quant_shift[0],
pd->dequant[0],
p->cuml_bins_nuq[dq][0],
pd->dequant_val_nuq[dq][0],
qcoeff, dqcoeff, eob);
highbd_quantize_dc_32x32_nuq(
coeff, get_tx2d_size(tx_size), x->skip_block, p->quant[0],
p->quant_shift[0], pd->dequant[0], p->cuml_bins_nuq[dq][0],
pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob);
} else {
highbd_quantize_dc_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
p->quant[0], p->quant_shift[0],
pd->dequant[0],
p->quant[0], p->quant_shift[0], pd->dequant[0],
p->cuml_bins_nuq[dq][0],
pd->dequant_val_nuq[dq][0],
qcoeff, dqcoeff, eob);
pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob);
}
return;
}
@ -688,15 +661,12 @@ void vp10_xform_quant_dc_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
if (tx_size == TX_32X32) {
quantize_dc_32x32_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
p->quant[0], p->quant_shift[0], pd->dequant[0],
p->cuml_bins_nuq[dq][0],
pd->dequant_val_nuq[dq][0],
p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0],
qcoeff, dqcoeff, eob);
} else {
quantize_dc_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
p->quant[0], p->quant_shift[0], pd->dequant[0],
p->cuml_bins_nuq[dq][0],
pd->dequant_val_nuq[dq][0],
qcoeff, dqcoeff, eob);
quantize_dc_nuq(coeff, get_tx2d_size(tx_size), x->skip_block, p->quant[0],
p->quant_shift[0], pd->dequant[0], p->cuml_bins_nuq[dq][0],
pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob);
}
}
@ -734,18 +704,15 @@ void vp10_xform_quant_dc_fp_nuq(MACROBLOCK *x, int plane, int block,
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
if (tx_size == TX_32X32) {
highbd_quantize_dc_32x32_fp_nuq(coeff, get_tx2d_size(tx_size),
x->skip_block,
p->quant_fp[0], pd->dequant[0],
p->cuml_bins_nuq[dq][0],
pd->dequant_val_nuq[dq][0],
qcoeff, dqcoeff, eob);
highbd_quantize_dc_32x32_fp_nuq(
coeff, get_tx2d_size(tx_size), x->skip_block, p->quant_fp[0],
pd->dequant[0], p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0],
qcoeff, dqcoeff, eob);
} else {
highbd_quantize_dc_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
p->quant_fp[0], pd->dequant[0],
p->cuml_bins_nuq[dq][0],
pd->dequant_val_nuq[dq][0],
qcoeff, dqcoeff, eob);
highbd_quantize_dc_fp_nuq(
coeff, get_tx2d_size(tx_size), x->skip_block, p->quant_fp[0],
pd->dequant[0], p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0],
qcoeff, dqcoeff, eob);
}
return;
}
@ -756,21 +723,17 @@ void vp10_xform_quant_dc_fp_nuq(MACROBLOCK *x, int plane, int block,
quantize_dc_32x32_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
p->quant_fp[0], pd->dequant[0],
p->cuml_bins_nuq[dq][0],
pd->dequant_val_nuq[dq][0],
qcoeff, dqcoeff, eob);
pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob);
} else {
quantize_dc_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
p->quant_fp[0], pd->dequant[0],
p->cuml_bins_nuq[dq][0],
pd->dequant_val_nuq[dq][0],
qcoeff, dqcoeff, eob);
p->quant_fp[0], pd->dequant[0], p->cuml_bins_nuq[dq][0],
pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob);
}
}
#endif // CONFIG_NEW_QUANT
static void encode_block(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
struct encode_b_args *const args = arg;
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
@ -806,8 +769,8 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
vp10_xform_quant_fp_nuq(x, plane, block, blk_row, blk_col, plane_bsize,
tx_size, ctx);
#else
vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
tx_size, VP10_XFORM_QUANT_FP);
vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
VP10_XFORM_QUANT_FP);
#endif // CONFIG_NEW_QUANT
}
#if CONFIG_VAR_TX
@ -831,11 +794,9 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
}
#endif
if (p->eobs[block])
*(args->skip) = 0;
if (p->eobs[block]) *(args->skip) = 0;
if (p->eobs[block] == 0)
return;
if (p->eobs[block] == 0) return;
// inverse transform parameters
inv_txfm_param.tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
@ -865,9 +826,10 @@ static void encode_block_inter(int plane, int block, int blk_row, int blk_col,
const struct macroblockd_plane *const pd = &xd->plane[plane];
const int tx_row = blk_row >> (1 - pd->subsampling_y);
const int tx_col = blk_col >> (1 - pd->subsampling_x);
const TX_SIZE plane_tx_size = plane ?
get_uv_tx_size_impl(mbmi->inter_tx_size[tx_row][tx_col], bsize, 0, 0) :
mbmi->inter_tx_size[tx_row][tx_col];
const TX_SIZE plane_tx_size =
plane ? get_uv_tx_size_impl(mbmi->inter_tx_size[tx_row][tx_col], bsize, 0,
0)
: mbmi->inter_tx_size[tx_row][tx_col];
int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
@ -877,12 +839,10 @@ static void encode_block_inter(int plane, int block, int blk_row, int blk_col,
if (xd->mb_to_right_edge < 0)
max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
return;
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
if (tx_size == plane_tx_size) {
encode_block(plane, block, blk_row, blk_col, plane_bsize,
tx_size, arg);
encode_block(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
} else {
int bsl = b_width_log2_lookup[bsize];
int i;
@ -899,19 +859,18 @@ static void encode_block_inter(int plane, int block, int blk_row, int blk_col,
const int offsetc = blk_col + ((i & 0x01) << bsl);
int step = num_4x4_blocks_txsize_lookup[tx_size - 1];
if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide)
continue;
if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
encode_block_inter(plane, block + i * step, offsetr, offsetc,
plane_bsize, tx_size - 1, arg);
encode_block_inter(plane, block + i * step, offsetr, offsetc, plane_bsize,
tx_size - 1, arg);
}
}
}
#endif
static void encode_block_pass1(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
void *arg) {
MACROBLOCK *const x = (MACROBLOCK *)arg;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *const p = &x->plane[plane];
@ -928,19 +887,19 @@ static void encode_block_pass1(int plane, int block, int blk_row, int blk_col,
vp10_xform_quant_fp_nuq(x, plane, block, blk_row, blk_col, plane_bsize,
tx_size, ctx);
#else
vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
tx_size, VP10_XFORM_QUANT_B);
vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
VP10_XFORM_QUANT_B);
#endif // CONFIG_NEW_QUANT
if (p->eobs[block] > 0) {
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
vp10_highbd_iwht4x4_add(dqcoeff, dst, pd->dst.stride,
p->eobs[block], xd->bd);
vp10_highbd_iwht4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
xd->bd);
} else {
vp10_highbd_idct4x4_add(dqcoeff, dst, pd->dst.stride,
p->eobs[block], xd->bd);
vp10_highbd_idct4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
xd->bd);
}
return;
}
@ -963,13 +922,12 @@ void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
MACROBLOCKD *const xd = &x->e_mbd;
struct optimize_ctx ctx;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
struct encode_b_args arg = {x, &ctx, &mbmi->skip, NULL, NULL, 1};
struct encode_b_args arg = { x, &ctx, &mbmi->skip, NULL, NULL, 1 };
int plane;
mbmi->skip = 1;
if (x->skip)
return;
if (x->skip) return;
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
#if CONFIG_VAR_TX
@ -986,7 +944,7 @@ void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
int step = num_4x4_blocks_txsize_lookup[max_tx_size];
vp10_get_entropy_contexts(bsize, TX_4X4, pd, ctx.ta[plane], ctx.tl[plane]);
#else
const struct macroblockd_plane* const pd = &xd->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
vp10_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
#endif
@ -997,8 +955,8 @@ void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
#if CONFIG_VAR_TX
for (idy = 0; idy < mi_height; idy += bh) {
for (idx = 0; idx < mi_width; idx += bh) {
encode_block_inter(plane, block, idy, idx, plane_bsize,
max_tx_size, &arg);
encode_block_inter(plane, block, idy, idx, plane_bsize, max_tx_size,
&arg);
block += step;
}
}
@ -1014,23 +972,21 @@ void vp10_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize) {
MACROBLOCKD *const xd = &x->e_mbd;
struct optimize_ctx ctx;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
struct encode_b_args arg = {x, &ctx, &mbmi->skip, NULL, NULL, 1};
struct encode_b_args arg = { x, &ctx, &mbmi->skip, NULL, NULL, 1 };
int plane;
mbmi->skip = 1;
if (x->skip)
return;
if (x->skip) return;
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
const struct macroblockd_plane* const pd = &xd->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
#if CONFIG_VAR_TX
const TX_SIZE tx_size = TX_4X4;
#else
const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
#endif
vp10_subtract_plane(x, bsize, plane);
vp10_get_entropy_contexts(bsize, tx_size, pd,
ctx.ta[plane], ctx.tl[plane]);
vp10_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
arg.ta = ctx.ta[plane];
arg.tl = ctx.tl[plane];
vp10_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
@ -1040,9 +996,9 @@ void vp10_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize) {
#endif // CONFIG_SUPERTX
void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct encode_b_args* const args = arg;
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
void *arg) {
struct encode_b_args *const args = arg;
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
@ -1097,7 +1053,7 @@ void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col,
#if CONFIG_NEW_QUANT
vp10_xform_quant_fp_nuq(x, plane, block, blk_row, blk_col, plane_bsize,
tx_size, ctx);
#else // CONFIG_NEW_QUANT
#else // CONFIG_NEW_QUANT
vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
VP10_XFORM_QUANT_FP);
#endif // CONFIG_NEW_QUANT
@ -1139,12 +1095,12 @@ void vp10_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
ENTROPY_CONTEXT ta[2 * MAX_MIB_SIZE];
ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE];
struct encode_b_args arg = {x, NULL, &xd->mi[0]->mbmi.skip,
ta, tl, enable_optimize_b};
struct encode_b_args arg = { x, NULL, &xd->mi[0]->mbmi.skip,
ta, tl, enable_optimize_b };
if (enable_optimize_b) {
const struct macroblockd_plane* const pd = &xd->plane[plane];
const TX_SIZE tx_size = plane ? get_uv_tx_size(&xd->mi[0]->mbmi, pd) :
xd->mi[0]->mbmi.tx_size;
const struct macroblockd_plane *const pd = &xd->plane[plane];
const TX_SIZE tx_size =
plane ? get_uv_tx_size(&xd->mi[0]->mbmi, pd) : xd->mi[0]->mbmi.tx_size;
vp10_get_entropy_contexts(bsize, tx_size, pd, ta, tl);
}
vp10_foreach_transformed_block_in_plane(xd, bsize, plane,

Просмотреть файл

@ -45,14 +45,13 @@ void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize);
void vp10_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize);
#endif // CONFIG_SUPERTX
void vp10_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize);
void vp10_xform_quant(MACROBLOCK *x, int plane, int block,
int blk_row, int blk_col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row,
int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
VP10_XFORM_QUANT xform_quant_idx);
#if CONFIG_NEW_QUANT
void vp10_xform_quant_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
int blk_col, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, int ctx);
int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
int ctx);
void vp10_xform_quant_dc_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
int blk_col, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, int ctx);
@ -65,14 +64,14 @@ void vp10_xform_quant_dc_fp_nuq(MACROBLOCK *x, int plane, int block,
int ctx);
#endif
int vp10_optimize_b(MACROBLOCK *mb, int plane, int block,
TX_SIZE tx_size, int ctx);
int vp10_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
int ctx);
void vp10_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg);
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
void *arg);
void vp10_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
int enable_optimize_b);

Просмотреть файл

@ -31,15 +31,15 @@ void vp10_entropy_mv_init(void) {
vp10_tokens_from_tree(mv_fp_encodings, vp10_mv_fp_tree);
}
static void encode_mv_component(vp10_writer* w, int comp,
const nmv_component* mvcomp, int usehp) {
static void encode_mv_component(vp10_writer *w, int comp,
const nmv_component *mvcomp, int usehp) {
int offset;
const int sign = comp < 0;
const int mag = sign ? -comp : comp;
const int mv_class = vp10_get_mv_class(mag - 1, &offset);
const int d = offset >> 3; // int mv data
const int fr = (offset >> 1) & 3; // fractional mv data
const int hp = offset & 1; // high precision mv data
const int d = offset >> 3; // int mv data
const int fr = (offset >> 1) & 3; // fractional mv data
const int hp = offset & 1; // high precision mv data
assert(comp != 0);
@ -48,33 +48,30 @@ static void encode_mv_component(vp10_writer* w, int comp,
// Class
vp10_write_token(w, vp10_mv_class_tree, mvcomp->classes,
&mv_class_encodings[mv_class]);
&mv_class_encodings[mv_class]);
// Integer bits
if (mv_class == MV_CLASS_0) {
vp10_write_token(w, vp10_mv_class0_tree, mvcomp->class0,
&mv_class0_encodings[d]);
&mv_class0_encodings[d]);
} else {
int i;
const int n = mv_class + CLASS0_BITS - 1; // number of bits
for (i = 0; i < n; ++i)
vp10_write(w, (d >> i) & 1, mvcomp->bits[i]);
for (i = 0; i < n; ++i) vp10_write(w, (d >> i) & 1, mvcomp->bits[i]);
}
// Fractional bits
vp10_write_token(w, vp10_mv_fp_tree,
mv_class == MV_CLASS_0 ? mvcomp->class0_fp[d] : mvcomp->fp,
&mv_fp_encodings[fr]);
mv_class == MV_CLASS_0 ? mvcomp->class0_fp[d] : mvcomp->fp,
&mv_fp_encodings[fr]);
// High precision bit
if (usehp)
vp10_write(w, hp,
mv_class == MV_CLASS_0 ? mvcomp->class0_hp : mvcomp->hp);
vp10_write(w, hp, mv_class == MV_CLASS_0 ? mvcomp->class0_hp : mvcomp->hp);
}
static void build_nmv_component_cost_table(int *mvcost,
const nmv_component* const mvcomp,
const nmv_component *const mvcomp,
int usehp) {
int i, v;
int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE];
@ -107,16 +104,15 @@ static void build_nmv_component_cost_table(int *mvcost,
z = v - 1;
c = vp10_get_mv_class(z, &o);
cost += class_cost[c];
d = (o >> 3); /* int mv data */
f = (o >> 1) & 3; /* fractional pel mv data */
e = (o & 1); /* high precision mv data */
d = (o >> 3); /* int mv data */
f = (o >> 1) & 3; /* fractional pel mv data */
e = (o & 1); /* high precision mv data */
if (c == MV_CLASS_0) {
cost += class0_cost[d];
} else {
int i, b;
b = c + CLASS0_BITS - 1; /* number of bits */
for (i = 0; i < b; ++i)
cost += bits_cost[i][((d >> i) & 1)];
b = c + CLASS0_BITS - 1; /* number of bits */
for (i = 0; i < b; ++i) cost += bits_cost[i][((d >> i) & 1)];
}
if (c == MV_CLASS_0) {
cost += class0_fp_cost[d][f];
@ -137,14 +133,14 @@ static void build_nmv_component_cost_table(int *mvcost,
static void update_mv(vp10_writer *w, const unsigned int ct[2], vpx_prob *cur_p,
vpx_prob upd_p) {
(void) upd_p;
(void)upd_p;
vp10_cond_prob_diff_update(w, cur_p, ct);
}
static void write_mv_update(const vpx_tree_index *tree,
vpx_prob probs[/*n - 1*/],
const unsigned int counts[/*n - 1*/],
int n, vp10_writer *w) {
const unsigned int counts[/*n - 1*/], int n,
vp10_writer *w) {
int i;
unsigned int branch_ct[32][2];
@ -164,8 +160,8 @@ void vp10_write_nmv_probs(VP10_COMMON *cm, int usehp, vp10_writer *w,
for (nmv_ctx = 0; nmv_ctx < NMV_CONTEXTS; ++nmv_ctx) {
nmv_context *const mvc = &cm->fc->nmvc[nmv_ctx];
nmv_context_counts *const counts = &nmv_counts[nmv_ctx];
write_mv_update(vp10_mv_joint_tree, mvc->joints, counts->joints,
MV_JOINTS, w);
write_mv_update(vp10_mv_joint_tree, mvc->joints, counts->joints, MV_JOINTS,
w);
vp10_cond_prob_diff_update(w, &mvc->zero_rmv, counts->zero_rmv);
@ -203,7 +199,8 @@ void vp10_write_nmv_probs(VP10_COMMON *cm, int usehp, vp10_writer *w,
nmv_context *const mvc = &cm->fc->nmvc;
nmv_context_counts *const counts = nmv_counts;
write_mv_update(vp10_mv_joint_tree, mvc->joints, counts->joints, MV_JOINTS, w);
write_mv_update(vp10_mv_joint_tree, mvc->joints, counts->joints, MV_JOINTS,
w);
for (i = 0; i < 2; ++i) {
nmv_component *comp = &mvc->comps[i];
@ -237,25 +234,21 @@ void vp10_write_nmv_probs(VP10_COMMON *cm, int usehp, vp10_writer *w,
#endif
}
void vp10_encode_mv(VP10_COMP* cpi, vp10_writer* w,
const MV* mv, const MV* ref,
void vp10_encode_mv(VP10_COMP *cpi, vp10_writer *w, const MV *mv, const MV *ref,
#if CONFIG_REF_MV
int is_compound,
int is_compound,
#endif
const nmv_context* mvctx, int usehp) {
const MV diff = {mv->row - ref->row,
mv->col - ref->col};
const nmv_context *mvctx, int usehp) {
const MV diff = { mv->row - ref->row, mv->col - ref->col };
const MV_JOINT_TYPE j = vp10_get_mv_joint(&diff);
usehp = usehp && vp10_use_mv_hp(ref);
#if CONFIG_REF_MV && !CONFIG_EXT_INTER
if (is_compound) {
vp10_write(w, (j == MV_JOINT_ZERO), mvctx->zero_rmv);
if (j == MV_JOINT_ZERO)
return;
if (j == MV_JOINT_ZERO) return;
} else {
if (j == MV_JOINT_ZERO)
assert(0);
if (j == MV_JOINT_ZERO) assert(0);
}
#endif
@ -263,7 +256,8 @@ void vp10_encode_mv(VP10_COMP* cpi, vp10_writer* w,
(void)is_compound;
#endif
vp10_write_token(w, vp10_mv_joint_tree, mvctx->joints, &mv_joint_encodings[j]);
vp10_write_token(w, vp10_mv_joint_tree, mvctx->joints,
&mv_joint_encodings[j]);
if (mv_joint_vertical(j))
encode_mv_component(w, diff.row, &mvctx->comps[0], usehp);
@ -279,7 +273,7 @@ void vp10_encode_mv(VP10_COMP* cpi, vp10_writer* w,
}
void vp10_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
const nmv_context* ctx, int usehp) {
const nmv_context *ctx, int usehp) {
vp10_cost_tokens(mvjoint, ctx->joints, vp10_mv_joint_tree);
build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], usehp);
build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], usehp);
@ -302,8 +296,8 @@ static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
if (mode == NEWMV || mode == NEWFROMNEARMV || mode == NEW_NEWMV) {
for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[i]][mv_idx].as_mv;
const MV diff = {mvs[i].as_mv.row - ref->row,
mvs[i].as_mv.col - ref->col};
const MV diff = { mvs[i].as_mv.row - ref->row,
mvs[i].as_mv.col - ref->col };
#if CONFIG_REF_MV
int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[i]],
mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]]);
@ -314,8 +308,8 @@ static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
}
} else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_mv;
const MV diff = {mvs[1].as_mv.row - ref->row,
mvs[1].as_mv.col - ref->col};
const MV diff = { mvs[1].as_mv.row - ref->row,
mvs[1].as_mv.col - ref->col };
#if CONFIG_REF_MV
int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[1]],
mbmi_ext->ref_mv_stack[mbmi->ref_frame[1]]);
@ -324,8 +318,8 @@ static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
} else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv;
const MV diff = {mvs[0].as_mv.row - ref->row,
mvs[0].as_mv.col - ref->col};
const MV diff = { mvs[0].as_mv.row - ref->row,
mvs[0].as_mv.col - ref->col };
#if CONFIG_REF_MV
int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[0]],
mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]]);
@ -335,9 +329,7 @@ static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
}
}
static void inc_mvs_sub8x8(const MODE_INFO *mi,
int block,
const int_mv mvs[2],
static void inc_mvs_sub8x8(const MODE_INFO *mi, int block, const int_mv mvs[2],
#if CONFIG_REF_MV
const MB_MODE_INFO_EXT *mbmi_ext,
#endif
@ -353,8 +345,8 @@ static void inc_mvs_sub8x8(const MODE_INFO *mi,
if (mode == NEWMV || mode == NEWFROMNEARMV || mode == NEW_NEWMV) {
for (i = 0; i < 1 + has_second_ref(&mi->mbmi); ++i) {
const MV *ref = &mi->bmi[block].ref_mv[i].as_mv;
const MV diff = {mvs[i].as_mv.row - ref->row,
mvs[i].as_mv.col - ref->col};
const MV diff = { mvs[i].as_mv.row - ref->row,
mvs[i].as_mv.col - ref->col };
#if CONFIG_REF_MV
int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[i]],
mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]]);
@ -364,8 +356,8 @@ static void inc_mvs_sub8x8(const MODE_INFO *mi,
}
} else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
const MV *ref = &mi->bmi[block].ref_mv[1].as_mv;
const MV diff = {mvs[1].as_mv.row - ref->row,
mvs[1].as_mv.col - ref->col};
const MV diff = { mvs[1].as_mv.row - ref->row,
mvs[1].as_mv.col - ref->col };
#if CONFIG_REF_MV
int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[1]],
mbmi_ext->ref_mv_stack[mbmi->ref_frame[1]]);
@ -374,8 +366,8 @@ static void inc_mvs_sub8x8(const MODE_INFO *mi,
vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
} else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
const MV *ref = &mi->bmi[block].ref_mv[0].as_mv;
const MV diff = {mvs[0].as_mv.row - ref->row,
mvs[0].as_mv.col - ref->col};
const MV diff = { mvs[0].as_mv.row - ref->row,
mvs[0].as_mv.col - ref->col };
#if CONFIG_REF_MV
int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[0]],
mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]]);
@ -405,8 +397,8 @@ static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
#else
const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_mv;
#endif
const MV diff = {mvs[i].as_mv.row - ref->row,
mvs[i].as_mv.col - ref->col};
const MV diff = { mvs[i].as_mv.row - ref->row,
mvs[i].as_mv.col - ref->col };
vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
}
}
@ -431,8 +423,7 @@ void vp10_update_mv_count(ThreadData *td) {
if (have_newmv_in_inter_mode(mi->bmi[i].as_mode))
inc_mvs_sub8x8(mi, i, mi->bmi[i].as_mv,
#if CONFIG_REF_MV
mbmi_ext,
td->counts->mv);
mbmi_ext, td->counts->mv);
#else
&td->counts->mv);
#endif
@ -440,8 +431,7 @@ void vp10_update_mv_count(ThreadData *td) {
if (mi->bmi[i].as_mode == NEWMV)
inc_mvs(mbmi, mbmi_ext, mi->bmi[i].as_mv,
#if CONFIG_REF_MV
mi->bmi[i].pred_mv_s8,
td->counts->mv);
mi->bmi[i].pred_mv_s8, td->counts->mv);
#else
&td->counts->mv);
#endif
@ -456,11 +446,9 @@ void vp10_update_mv_count(ThreadData *td) {
#endif // CONFIG_EXT_INTER
inc_mvs(mbmi, mbmi_ext, mbmi->mv,
#if CONFIG_REF_MV
mbmi->pred_mv,
td->counts->mv);
mbmi->pred_mv, td->counts->mv);
#else
&td->counts->mv);
#endif
}
}

Просмотреть файл

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_ENCODER_ENCODEMV_H_
#define VP10_ENCODER_ENCODEMV_H_
@ -23,14 +22,14 @@ void vp10_entropy_mv_init(void);
void vp10_write_nmv_probs(VP10_COMMON *cm, int usehp, vp10_writer *w,
nmv_context_counts *const counts);
void vp10_encode_mv(VP10_COMP *cpi, vp10_writer* w, const MV* mv, const MV* ref,
void vp10_encode_mv(VP10_COMP *cpi, vp10_writer *w, const MV *mv, const MV *ref,
#if CONFIG_REF_MV
int is_compound,
#endif
const nmv_context* mvctx, int usehp);
const nmv_context *mvctx, int usehp);
void vp10_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
const nmv_context* mvctx, int usehp);
const nmv_context *mvctx, int usehp);
void vp10_update_mv_count(ThreadData *td);

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -94,10 +94,10 @@ typedef enum {
} ENCODE_BREAKOUT_TYPE;
typedef enum {
NORMAL = 0,
FOURFIVE = 1,
THREEFIVE = 2,
ONETWO = 3
NORMAL = 0,
FOURFIVE = 1,
THREEFIVE = 2,
ONETWO = 3
} VPX_SCALING;
typedef enum {
@ -117,7 +117,7 @@ typedef enum {
} MODE;
typedef enum {
FRAMEFLAGS_KEY = 1 << 0,
FRAMEFLAGS_KEY = 1 << 0,
FRAMEFLAGS_GOLDEN = 1 << 1,
#if CONFIG_EXT_REFS
FRAMEFLAGS_BWDREF = 1 << 2,
@ -144,14 +144,14 @@ typedef enum {
typedef struct VP10EncoderConfig {
BITSTREAM_PROFILE profile;
vpx_bit_depth_t bit_depth; // Codec bit-depth.
int width; // width of data passed to the compressor
int height; // height of data passed to the compressor
int width; // width of data passed to the compressor
int height; // height of data passed to the compressor
unsigned int input_bit_depth; // Input bit depth.
double init_framerate; // set to passed in framerate
int64_t target_bandwidth; // bandwidth to be used in bits per second
double init_framerate; // set to passed in framerate
int64_t target_bandwidth; // bandwidth to be used in bits per second
int noise_sensitivity; // pre processing blur: recommendation 0
int sharpness; // sharpening output: recommendation 0:
int sharpness; // sharpening output: recommendation 0:
int speed;
// maximum allowed bitrate for any intra frame in % of bitrate target.
unsigned int rc_max_intra_bitrate_pct;
@ -203,7 +203,7 @@ typedef struct VP10EncoderConfig {
int frame_periodic_boost;
// two pass datarate control
int two_pass_vbrbias; // two pass datarate control tweaks
int two_pass_vbrbias; // two pass datarate control tweaks
int two_pass_vbrmin_section;
int two_pass_vbrmax_section;
// END DATARATE CONTROL OPTIONS
@ -212,7 +212,7 @@ typedef struct VP10EncoderConfig {
int enable_auto_arf;
#if CONFIG_EXT_REFS
int enable_auto_brf; // (b)ackward (r)ef (f)rame
#endif // CONFIG_EXT_REFS
#endif // CONFIG_EXT_REFS
int encode_breakout; // early breakout : for video conf recommend 800
@ -300,15 +300,10 @@ typedef struct ActiveMap {
unsigned char *map;
} ActiveMap;
typedef enum {
Y,
U,
V,
ALL
} STAT_TYPE;
typedef enum { Y, U, V, ALL } STAT_TYPE;
typedef struct IMAGE_STAT {
double stat[ALL+1];
double stat[ALL + 1];
double worst;
} ImageStat;
@ -319,13 +314,10 @@ typedef struct {
#if CONFIG_ENTROPY
typedef struct SUBFRAME_STATS {
vp10_coeff_probs_model
coef_probs_buf[COEF_PROBS_BUFS][TX_SIZES][PLANE_TYPES];
vp10_coeff_count
coef_counts_buf[COEF_PROBS_BUFS][TX_SIZES][PLANE_TYPES];
unsigned int
eob_counts_buf[COEF_PROBS_BUFS]
[TX_SIZES][PLANE_TYPES][REF_TYPES][COEF_BANDS][COEFF_CONTEXTS];
vp10_coeff_probs_model coef_probs_buf[COEF_PROBS_BUFS][TX_SIZES][PLANE_TYPES];
vp10_coeff_count coef_counts_buf[COEF_PROBS_BUFS][TX_SIZES][PLANE_TYPES];
unsigned int eob_counts_buf[COEF_PROBS_BUFS][TX_SIZES][PLANE_TYPES][REF_TYPES]
[COEF_BANDS][COEFF_CONTEXTS];
vp10_coeff_probs_model enc_starting_coef_probs[TX_SIZES][PLANE_TYPES];
} SUBFRAME_STATS;
#endif // CONFIG_ENTROPY
@ -349,8 +341,8 @@ typedef struct VP10_COMP {
#endif // CONFIG_NEW_QUANT
VP10_COMMON common;
VP10EncoderConfig oxcf;
struct lookahead_ctx *lookahead;
struct lookahead_entry *alt_ref_source;
struct lookahead_ctx *lookahead;
struct lookahead_entry *alt_ref_source;
YV12_BUFFER_CONFIG *Source;
YV12_BUFFER_CONFIG *Last_Source; // NULL for first frame and alt_ref frames
@ -378,7 +370,7 @@ typedef struct VP10_COMP {
int gld_fb_idx;
#if CONFIG_EXT_REFS
int bwd_fb_idx; // BWD_REF_FRAME
#endif // CONFIG_EXT_REFS
#endif // CONFIG_EXT_REFS
int alt_fb_idx;
int last_show_frame_buf_idx; // last show frame buffer index
@ -432,11 +424,11 @@ typedef struct VP10_COMP {
// sufficient space to the size of the maximum possible number of frames.
int interp_filter_selected[REF_FRAMES + 1][SWITCHABLE];
struct vpx_codec_pkt_list *output_pkt_list;
struct vpx_codec_pkt_list *output_pkt_list;
MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS];
int mbgraph_n_frames; // number of frames filled in the above
int static_mb_pct; // % forced skip mbs by segmentation
int mbgraph_n_frames; // number of frames filled in the above
int static_mb_pct; // % forced skip mbs by segmentation
int ref_frame_flags;
SPEED_FEATURES sf;
@ -456,7 +448,7 @@ typedef struct VP10_COMP {
uint8_t *segmentation_map;
// segment threashold for encode breakout
int segment_encode_breakout[MAX_SEGMENTS];
int segment_encode_breakout[MAX_SEGMENTS];
CYCLIC_REFRESH *cyclic_refresh;
ActiveMap active_map;
@ -481,7 +473,7 @@ typedef struct VP10_COMP {
#if CONFIG_INTERNAL_STATS
unsigned int mode_chosen_counts[MAX_MODES];
int count;
int count;
uint64_t total_sq_error;
uint64_t total_samples;
ImageStat psnr;
@ -489,7 +481,7 @@ typedef struct VP10_COMP {
double total_blockiness;
double worst_blockiness;
int bytes;
int bytes;
double summed_quality;
double summed_weights;
unsigned int tot_recode_hits;
@ -540,8 +532,8 @@ typedef struct VP10_COMP {
unsigned int inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES];
#if CONFIG_EXT_INTER
unsigned int inter_compound_mode_cost[INTER_MODE_CONTEXTS]
[INTER_COMPOUND_MODES];
unsigned int
inter_compound_mode_cost[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES];
unsigned int interintra_mode_cost[BLOCK_SIZE_GROUPS][INTERINTRA_MODES];
#endif // CONFIG_EXT_INTER
#if CONFIG_OBMC || CONFIG_WARPED_MOTION
@ -557,15 +549,15 @@ typedef struct VP10_COMP {
#endif
int palette_y_size_cost[PALETTE_BLOCK_SIZES][PALETTE_SIZES];
int palette_uv_size_cost[PALETTE_BLOCK_SIZES][PALETTE_SIZES];
int palette_y_color_cost[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS]
[PALETTE_COLORS];
int palette_uv_color_cost[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS]
[PALETTE_COLORS];
int palette_y_color_cost[PALETTE_MAX_SIZE -
1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS];
int palette_uv_color_cost[PALETTE_MAX_SIZE -
1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS];
int tx_size_cost[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES];
#if CONFIG_EXT_TX
int inter_tx_type_costs[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
int intra_tx_type_costs[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
[TX_TYPES];
[TX_TYPES];
#else
int intra_tx_type_costs[EXT_TX_SIZES][TX_TYPES][TX_TYPES];
int inter_tx_type_costs[EXT_TX_SIZES][TX_TYPES];
@ -635,20 +627,20 @@ typedef struct VP10_COMP {
void vp10_initialize_enc(void);
struct VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf,
BufferPool *const pool);
BufferPool *const pool);
void vp10_remove_compressor(VP10_COMP *cpi);
void vp10_change_config(VP10_COMP *cpi, const VP10EncoderConfig *oxcf);
// receive a frames worth of data. caller can assume that a copy of this
// frame is made and not just a copy of the pointer..
// receive a frames worth of data. caller can assume that a copy of this
// frame is made and not just a copy of the pointer..
int vp10_receive_raw_frame(VP10_COMP *cpi, unsigned int frame_flags,
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
int64_t end_time_stamp);
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
int64_t end_time_stamp);
int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
size_t *size, uint8_t *dest,
int64_t *time_stamp, int64_t *time_end, int flush);
size_t *size, uint8_t *dest, int64_t *time_stamp,
int64_t *time_end, int flush);
int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest);
@ -659,10 +651,10 @@ int vp10_use_as_reference(VP10_COMP *cpi, int ref_frame_flags);
void vp10_update_reference(VP10_COMP *cpi, int ref_frame_flags);
int vp10_copy_reference_enc(VP10_COMP *cpi, VPX_REFFRAME ref_frame_flag,
YV12_BUFFER_CONFIG *sd);
YV12_BUFFER_CONFIG *sd);
int vp10_set_reference_enc(VP10_COMP *cpi, VPX_REFFRAME ref_frame_flag,
YV12_BUFFER_CONFIG *sd);
YV12_BUFFER_CONFIG *sd);
int vp10_update_entropy(VP10_COMP *cpi, int update);
@ -670,11 +662,11 @@ int vp10_set_active_map(VP10_COMP *cpi, unsigned char *map, int rows, int cols);
int vp10_get_active_map(VP10_COMP *cpi, unsigned char *map, int rows, int cols);
int vp10_set_internal_size(VP10_COMP *cpi,
VPX_SCALING horiz_mode, VPX_SCALING vert_mode);
int vp10_set_internal_size(VP10_COMP *cpi, VPX_SCALING horiz_mode,
VPX_SCALING vert_mode);
int vp10_set_size_literal(VP10_COMP *cpi, unsigned int width,
unsigned int height);
unsigned int height);
int vp10_get_quantizer(struct VP10_COMP *cpi);
@ -682,8 +674,7 @@ void vp10_full_to_model_counts(vp10_coeff_count_model *model_count,
vp10_coeff_count *full_count);
static INLINE int frame_is_kf_gf_arf(const VP10_COMP *cpi) {
return frame_is_intra_only(&cpi->common) ||
cpi->refresh_alt_ref_frame ||
return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
(cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref);
}
@ -693,8 +684,7 @@ static INLINE int get_ref_frame_map_idx(const VP10_COMP *cpi,
if (ref_frame >= LAST_FRAME && ref_frame <= LAST3_FRAME)
return cpi->lst_fb_idxes[ref_frame - 1];
#else
if (ref_frame == LAST_FRAME)
return cpi->lst_fb_idx;
if (ref_frame == LAST_FRAME) return cpi->lst_fb_idx;
#endif // CONFIG_EXT_REFS
else if (ref_frame == GOLDEN_FRAME)
return cpi->gld_fb_idx;
@ -717,8 +707,8 @@ static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
VP10_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
VP10_COMMON *const cm = &cpi->common;
const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
return
buf_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[buf_idx].buf : NULL;
return buf_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[buf_idx].buf
: NULL;
}
static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref(
@ -770,12 +760,12 @@ void vp10_update_reference_frames(VP10_COMP *cpi);
void vp10_set_high_precision_mv(VP10_COMP *cpi, int allow_high_precision_mv);
YV12_BUFFER_CONFIG *vp10_scale_if_required_fast(VP10_COMMON *cm,
YV12_BUFFER_CONFIG *unscaled,
YV12_BUFFER_CONFIG *scaled);
YV12_BUFFER_CONFIG *unscaled,
YV12_BUFFER_CONFIG *scaled);
YV12_BUFFER_CONFIG *vp10_scale_if_required(VP10_COMMON *cm,
YV12_BUFFER_CONFIG *unscaled,
YV12_BUFFER_CONFIG *scaled);
YV12_BUFFER_CONFIG *unscaled,
YV12_BUFFER_CONFIG *scaled);
void vp10_apply_encoding_flags(VP10_COMP *cpi, vpx_enc_frame_flags_t flags);
@ -797,10 +787,10 @@ static INLINE int is_bwdref_enabled(const VP10_COMP *const cpi) {
static INLINE void set_ref_ptrs(VP10_COMMON *cm, MACROBLOCKD *xd,
MV_REFERENCE_FRAME ref0,
MV_REFERENCE_FRAME ref1) {
xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME
: 0];
xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME
: 0];
xd->block_refs[0] =
&cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME : 0];
xd->block_refs[1] =
&cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME : 0];
}
static INLINE int get_chessboard_index(const int frame_index) {

Просмотреть файл

@ -28,7 +28,6 @@ static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
td->rd_counts.coef_counts[i][j][k][l][m][n] +=
td_t->rd_counts.coef_counts[i][j][k][l][m][n];
// Counts of all motion searches and exhuastive mesh searches.
td->rd_counts.m_search_count += td_t->rd_counts.m_search_count;
td->rd_counts.ex_search_count += td_t->rd_counts.ex_search_count;
@ -41,10 +40,10 @@ static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
const int tile_rows = cm->tile_rows;
int t;
(void) unused;
(void)unused;
for (t = thread_data->start; t < tile_rows * tile_cols;
t += cpi->num_workers) {
t += cpi->num_workers) {
int tile_row = t / tile_cols;
int tile_col = t % tile_cols;
@ -69,8 +68,7 @@ void vp10_encode_tiles_mt(VP10_COMP *cpi) {
vpx_malloc(num_workers * sizeof(*cpi->workers)));
CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
vpx_calloc(num_workers,
sizeof(*cpi->tile_thr_data)));
vpx_calloc(num_workers, sizeof(*cpi->tile_thr_data)));
for (i = 0; i < num_workers; i++) {
VPxWorker *const worker = &cpi->workers[i];
@ -82,7 +80,6 @@ void vp10_encode_tiles_mt(VP10_COMP *cpi) {
thread_data->cpi = cpi;
if (i < num_workers - 1) {
// Allocate thread data.
CHECK_MEM_ERROR(cm, thread_data->td,
vpx_memalign(32, sizeof(*thread_data->td)));
@ -121,7 +118,7 @@ void vp10_encode_tiles_mt(VP10_COMP *cpi) {
worker->hook = (VPxWorkerHook)enc_worker_hook;
worker->data1 = &cpi->tile_thr_data[i];
worker->data2 = NULL;
thread_data = (EncWorkerData*)worker->data1;
thread_data = (EncWorkerData *)worker->data1;
// Before encoding a frame, copy the thread data from cpi.
if (thread_data->td != &cpi->td) {
@ -135,16 +132,16 @@ void vp10_encode_tiles_mt(VP10_COMP *cpi) {
// Allocate buffers used by palette coding mode.
if (cpi->common.allow_screen_content_tools && i < num_workers - 1) {
MACROBLOCK *x = &thread_data->td->mb;
CHECK_MEM_ERROR(cm, x->palette_buffer,
vpx_memalign(16, sizeof(*x->palette_buffer)));
MACROBLOCK *x = &thread_data->td->mb;
CHECK_MEM_ERROR(cm, x->palette_buffer,
vpx_memalign(16, sizeof(*x->palette_buffer)));
}
}
// Encode a frame
for (i = 0; i < num_workers; i++) {
VPxWorker *const worker = &cpi->workers[i];
EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;
EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
// Set the starting tile for each thread.
thread_data->start = i;
@ -163,7 +160,7 @@ void vp10_encode_tiles_mt(VP10_COMP *cpi) {
for (i = 0; i < num_workers; i++) {
VPxWorker *const worker = &cpi->workers[i];
EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;
EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
// Accumulate counters.
if (i < cpi->num_workers - 1) {

Просмотреть файл

@ -16,8 +16,7 @@
#include "vp10/encoder/extend.h"
static void copy_and_extend_plane(const uint8_t *src, int src_pitch,
uint8_t *dst, int dst_pitch,
int w, int h,
uint8_t *dst, int dst_pitch, int w, int h,
int extend_top, int extend_left,
int extend_bottom, int extend_right) {
int i, linesize;
@ -43,7 +42,7 @@ static void copy_and_extend_plane(const uint8_t *src, int src_pitch,
src_ptr1 = dst - extend_left;
src_ptr2 = dst + dst_pitch * (h - 1) - extend_left;
dst_ptr1 = dst + dst_pitch * (-extend_top) - extend_left;
dst_ptr2 = dst + dst_pitch * (h) - extend_left;
dst_ptr2 = dst + dst_pitch * (h)-extend_left;
linesize = extend_left + extend_right + w;
for (i = 0; i < extend_top; i++) {
@ -59,9 +58,8 @@ static void copy_and_extend_plane(const uint8_t *src, int src_pitch,
#if CONFIG_VP9_HIGHBITDEPTH
static void highbd_copy_and_extend_plane(const uint8_t *src8, int src_pitch,
uint8_t *dst8, int dst_pitch,
int w, int h,
int extend_top, int extend_left,
uint8_t *dst8, int dst_pitch, int w,
int h, int extend_top, int extend_left,
int extend_bottom, int extend_right) {
int i, linesize;
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
@ -88,7 +86,7 @@ static void highbd_copy_and_extend_plane(const uint8_t *src8, int src_pitch,
src_ptr1 = dst - extend_left;
src_ptr2 = dst + dst_pitch * (h - 1) - extend_left;
dst_ptr1 = dst + dst_pitch * (-extend_top) - extend_left;
dst_ptr2 = dst + dst_pitch * (h) - extend_left;
dst_ptr2 = dst + dst_pitch * (h)-extend_left;
linesize = extend_left + extend_right + w;
for (i = 0; i < extend_top; i++) {
@ -104,7 +102,7 @@ static void highbd_copy_and_extend_plane(const uint8_t *src8, int src_pitch,
#endif // CONFIG_VP9_HIGHBITDEPTH
void vp10_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst) {
YV12_BUFFER_CONFIG *dst) {
// Extend src frame in buffer
// Altref filtering assumes 16 pixel extension
const int et_y = 16;
@ -127,51 +125,46 @@ void vp10_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
#if CONFIG_VP9_HIGHBITDEPTH
if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
highbd_copy_and_extend_plane(src->y_buffer, src->y_stride,
dst->y_buffer, dst->y_stride,
src->y_crop_width, src->y_crop_height,
et_y, el_y, eb_y, er_y);
highbd_copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer,
dst->y_stride, src->y_crop_width,
src->y_crop_height, et_y, el_y, eb_y, er_y);
highbd_copy_and_extend_plane(src->u_buffer, src->uv_stride,
dst->u_buffer, dst->uv_stride,
src->uv_crop_width, src->uv_crop_height,
et_uv, el_uv, eb_uv, er_uv);
highbd_copy_and_extend_plane(
src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride,
src->uv_crop_width, src->uv_crop_height, et_uv, el_uv, eb_uv, er_uv);
highbd_copy_and_extend_plane(src->v_buffer, src->uv_stride,
dst->v_buffer, dst->uv_stride,
src->uv_crop_width, src->uv_crop_height,
et_uv, el_uv, eb_uv, er_uv);
highbd_copy_and_extend_plane(
src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride,
src->uv_crop_width, src->uv_crop_height, et_uv, el_uv, eb_uv, er_uv);
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
copy_and_extend_plane(src->y_buffer, src->y_stride,
dst->y_buffer, dst->y_stride,
src->y_crop_width, src->y_crop_height,
copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer,
dst->y_stride, src->y_crop_width, src->y_crop_height,
et_y, el_y, eb_y, er_y);
copy_and_extend_plane(src->u_buffer, src->uv_stride,
dst->u_buffer, dst->uv_stride,
src->uv_crop_width, src->uv_crop_height,
copy_and_extend_plane(src->u_buffer, src->uv_stride, dst->u_buffer,
dst->uv_stride, src->uv_crop_width, src->uv_crop_height,
et_uv, el_uv, eb_uv, er_uv);
copy_and_extend_plane(src->v_buffer, src->uv_stride,
dst->v_buffer, dst->uv_stride,
src->uv_crop_width, src->uv_crop_height,
copy_and_extend_plane(src->v_buffer, src->uv_stride, dst->v_buffer,
dst->uv_stride, src->uv_crop_width, src->uv_crop_height,
et_uv, el_uv, eb_uv, er_uv);
}
void vp10_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst,
int srcy, int srcx,
int srch, int srcw) {
YV12_BUFFER_CONFIG *dst, int srcy,
int srcx, int srch, int srcw) {
// If the side is not touching the bounder then don't extend.
const int et_y = srcy ? 0 : dst->border;
const int el_y = srcx ? 0 : dst->border;
const int eb_y = srcy + srch != src->y_height ? 0 :
dst->border + dst->y_height - src->y_height;
const int er_y = srcx + srcw != src->y_width ? 0 :
dst->border + dst->y_width - src->y_width;
const int eb_y = srcy + srch != src->y_height
? 0
: dst->border + dst->y_height - src->y_height;
const int er_y = srcx + srcw != src->y_width
? 0
: dst->border + dst->y_width - src->y_width;
const int src_y_offset = srcy * src->y_stride + srcx;
const int dst_y_offset = srcy * dst->y_stride + srcx;
@ -185,17 +178,14 @@ void vp10_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src,
const int srcw_uv = ROUND_POWER_OF_TWO(srcw, 1);
copy_and_extend_plane(src->y_buffer + src_y_offset, src->y_stride,
dst->y_buffer + dst_y_offset, dst->y_stride,
srcw, srch,
dst->y_buffer + dst_y_offset, dst->y_stride, srcw, srch,
et_y, el_y, eb_y, er_y);
copy_and_extend_plane(src->u_buffer + src_uv_offset, src->uv_stride,
dst->u_buffer + dst_uv_offset, dst->uv_stride,
srcw_uv, srch_uv,
et_uv, el_uv, eb_uv, er_uv);
dst->u_buffer + dst_uv_offset, dst->uv_stride, srcw_uv,
srch_uv, et_uv, el_uv, eb_uv, er_uv);
copy_and_extend_plane(src->v_buffer + src_uv_offset, src->uv_stride,
dst->v_buffer + dst_uv_offset, dst->uv_stride,
srcw_uv, srch_uv,
et_uv, el_uv, eb_uv, er_uv);
dst->v_buffer + dst_uv_offset, dst->uv_stride, srcw_uv,
srch_uv, et_uv, el_uv, eb_uv, er_uv);
}

Просмотреть файл

@ -18,14 +18,12 @@
extern "C" {
#endif
void vp10_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst);
YV12_BUFFER_CONFIG *dst);
void vp10_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst,
int srcy, int srcx,
int srch, int srcw);
YV12_BUFFER_CONFIG *dst, int srcy,
int srcx, int srch, int srcw);
#ifdef __cplusplus
} // extern "C"
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -43,7 +43,7 @@ typedef struct {
// Length of the bi-predictive frame group (BFG)
// NOTE: Currently each BFG contains one backward ref (BWF) frame plus a certain
// number of bi-predictive frames.
#define BFG_INTERVAL 2
#define BFG_INTERVAL 2
#endif // CONFIG_EXT_REFS
#define VLOW_MOTION_THRESHOLD 950
@ -80,9 +80,9 @@ typedef enum {
ARF_UPDATE = 3,
OVERLAY_UPDATE = 4,
#if CONFIG_EXT_REFS
BRF_UPDATE = 5, // Backward Reference Frame
BRF_UPDATE = 5, // Backward Reference Frame
LAST_BIPRED_UPDATE = 6, // Last Bi-predictive Frame
BIPRED_UPDATE = 7, // Bi-predictive Frame, but not the last one
BIPRED_UPDATE = 7, // Bi-predictive Frame, but not the last one
FRAME_UPDATE_TYPES = 8
#else
FRAME_UPDATE_TYPES = 5
@ -173,9 +173,8 @@ void vp10_twopass_postencode_update(struct VP10_COMP *cpi);
void vp10_init_subsampling(struct VP10_COMP *cpi);
void vp10_calculate_coded_size(struct VP10_COMP *cpi,
int *scaled_frame_width,
int *scaled_frame_height);
void vp10_calculate_coded_size(struct VP10_COMP *cpi, int *scaled_frame_width,
int *scaled_frame_height);
#ifdef __cplusplus
} // extern "C"

Просмотреть файл

@ -24,11 +24,11 @@ int compute_global_motion_feature_based(struct VP10_COMP *cpi,
YV12_BUFFER_CONFIG *frm,
YV12_BUFFER_CONFIG *ref,
double inlier_prob, double *H) {
(void) cpi;
(void) type;
(void) frm;
(void) ref;
(void) inlier_prob;
(void) H;
(void)cpi;
(void)type;
(void)frm;
(void)ref;
(void)inlier_prob;
(void)H;
return 0;
}

Просмотреть файл

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_ENCODER_GLOBAL_MOTION_H_
#define VP10_ENCODER_GLOBAL_MOTION_H_
@ -27,4 +26,3 @@ int compute_global_motion_feature_based(struct VP10_COMP *cpi,
} // extern "C"
#endif
#endif // VP10_ENCODER_GLOBAL_MOTION_H_

Просмотреть файл

@ -35,9 +35,7 @@ static void fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
case DCT_DCT:
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
vp10_fht4x4(src_diff, coeff, diff_stride, tx_type);
break;
case ADST_ADST: vp10_fht4x4(src_diff, coeff, diff_stride, tx_type); break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
case DCT_FLIPADST:
@ -49,15 +47,10 @@ static void fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
case V_ADST:
case H_ADST:
case V_FLIPADST:
case H_FLIPADST:
vp10_fht4x4(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type);
break;
case H_FLIPADST: vp10_fht4x4(src_diff, coeff, diff_stride, tx_type); break;
case IDTX: vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type); break;
#endif // CONFIG_EXT_TX
default:
assert(0);
default: assert(0);
}
}
@ -65,14 +58,14 @@ static void fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
static void fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt) {
(void) fwd_txfm_opt;
(void)fwd_txfm_opt;
vp10_fht8x4(src_diff, coeff, diff_stride, tx_type);
}
static void fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt) {
(void) fwd_txfm_opt;
(void)fwd_txfm_opt;
vp10_fht4x8(src_diff, coeff, diff_stride, tx_type);
}
#endif // CONFIG_EXT_TX
@ -101,15 +94,10 @@ static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
case V_ADST:
case H_ADST:
case V_FLIPADST:
case H_FLIPADST:
vp10_fht8x8(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type);
break;
case H_FLIPADST: vp10_fht8x8(src_diff, coeff, diff_stride, tx_type); break;
case IDTX: vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type); break;
#endif // CONFIG_EXT_TX
default:
assert(0);
default: assert(0);
}
}
@ -144,8 +132,7 @@ static void fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 16, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
assert(0);
default: assert(0);
}
}
@ -182,9 +169,7 @@ static void fwd_txfm_32x32(int rd_transform, const int16_t *src_diff,
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
assert(0);
break;
default: assert(0); break;
}
}
@ -221,12 +206,9 @@ static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
case H_FLIPADST:
vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type);
break;
case IDTX: vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type); break;
#endif // CONFIG_EXT_TX
default:
assert(0);
default: assert(0);
}
}
@ -234,16 +216,16 @@ static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
static void highbd_fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
(void) fwd_txfm_opt;
(void) bd;
(void)fwd_txfm_opt;
(void)bd;
vp10_highbd_fht8x4(src_diff, coeff, diff_stride, tx_type);
}
static void highbd_fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
(void) fwd_txfm_opt;
(void) bd;
(void)fwd_txfm_opt;
(void)bd;
vp10_highbd_fht4x8(src_diff, coeff, diff_stride, tx_type);
}
#endif // CONFIG_EXT_TX
@ -276,12 +258,9 @@ static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
// Use C version since DST exists only in C
vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type);
break;
case IDTX: vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type); break;
#endif // CONFIG_EXT_TX
default:
assert(0);
default: assert(0);
}
}
@ -317,8 +296,7 @@ static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 16, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
assert(0);
default: assert(0);
}
}
@ -353,9 +331,7 @@ static void highbd_fwd_txfm_32x32(int rd_transform, const int16_t *src_diff,
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
assert(0);
break;
default: assert(0); break;
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@ -389,9 +365,7 @@ void fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
case TX_4X4:
fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless);
break;
default:
assert(0);
break;
default: assert(0); break;
}
}
@ -410,30 +384,27 @@ void highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
fwd_txfm_opt, bd);
break;
case TX_16X16:
highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type,
fwd_txfm_opt, bd);
highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
bd);
break;
case TX_8X8:
highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type,
fwd_txfm_opt, bd);
highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
bd);
break;
#if CONFIG_EXT_TX
case TX_4X8:
highbd_fwd_txfm_4x8(src_diff, coeff, diff_stride, tx_type,
fwd_txfm_opt, bd);
highbd_fwd_txfm_4x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
bd);
break;
case TX_8X4:
highbd_fwd_txfm_8x4(src_diff, coeff, diff_stride, tx_type,
fwd_txfm_opt, bd);
highbd_fwd_txfm_8x4(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
bd);
break;
#endif // CONFIG_EXT_TX
case TX_4X4:
highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
lossless, bd);
break;
default:
assert(0);
highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless, bd);
break;
default: assert(0); break;
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH

Просмотреть файл

@ -19,33 +19,28 @@
#include "vp10/encoder/lookahead.h"
/* Return the buffer at the given absolute index and increment the index */
static struct lookahead_entry *pop(struct lookahead_ctx *ctx,
int *idx) {
static struct lookahead_entry *pop(struct lookahead_ctx *ctx, int *idx) {
int index = *idx;
struct lookahead_entry *buf = ctx->buf + index;
assert(index < ctx->max_sz);
if (++index >= ctx->max_sz)
index -= ctx->max_sz;
if (++index >= ctx->max_sz) index -= ctx->max_sz;
*idx = index;
return buf;
}
void vp10_lookahead_destroy(struct lookahead_ctx *ctx) {
if (ctx) {
if (ctx->buf) {
int i;
for (i = 0; i < ctx->max_sz; i++)
vpx_free_frame_buffer(&ctx->buf[i].img);
for (i = 0; i < ctx->max_sz; i++) vpx_free_frame_buffer(&ctx->buf[i].img);
free(ctx->buf);
}
free(ctx);
}
}
struct lookahead_ctx *vp10_lookahead_init(unsigned int width,
unsigned int height,
unsigned int subsampling_x,
@ -69,32 +64,30 @@ struct lookahead_ctx *vp10_lookahead_init(unsigned int width,
unsigned int i;
ctx->max_sz = depth;
ctx->buf = calloc(depth, sizeof(*ctx->buf));
if (!ctx->buf)
goto bail;
if (!ctx->buf) goto bail;
for (i = 0; i < depth; i++)
if (vpx_alloc_frame_buffer(&ctx->buf[i].img,
width, height, subsampling_x, subsampling_y,
if (vpx_alloc_frame_buffer(
&ctx->buf[i].img, width, height, subsampling_x, subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
use_highbitdepth,
use_highbitdepth,
#endif
VPX_ENC_BORDER_IN_PIXELS,
legacy_byte_alignment))
VPX_ENC_BORDER_IN_PIXELS, legacy_byte_alignment))
goto bail;
}
return ctx;
bail:
bail:
vp10_lookahead_destroy(ctx);
return NULL;
}
#define USE_PARTIAL_COPY 0
int vp10_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
int64_t ts_start, int64_t ts_end,
int vp10_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
int64_t ts_start, int64_t ts_end,
#if CONFIG_VP9_HIGHBITDEPTH
int use_highbitdepth,
int use_highbitdepth,
#endif
unsigned int flags) {
unsigned int flags) {
struct lookahead_entry *buf;
#if USE_PARTIAL_COPY
int row, col, active_end;
@ -109,8 +102,7 @@ int vp10_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
int subsampling_y = src->subsampling_y;
int larger_dimensions, new_dimensions;
if (ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz)
return 1;
if (ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz) return 1;
ctx->sz++;
buf = pop(ctx, &ctx->write_idx);
@ -118,8 +110,7 @@ int vp10_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
height != buf->img.y_crop_height ||
uv_width != buf->img.uv_crop_width ||
uv_height != buf->img.uv_crop_height;
larger_dimensions = width > buf->img.y_width ||
height > buf->img.y_height ||
larger_dimensions = width > buf->img.y_width || height > buf->img.y_height ||
uv_width > buf->img.uv_width ||
uv_height > buf->img.uv_height;
assert(!larger_dimensions || new_dimensions);
@ -139,27 +130,22 @@ int vp10_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
while (1) {
// Find the first active macroblock in this row.
for (; col < mb_cols; ++col) {
if (active_map[col])
break;
if (active_map[col]) break;
}
// No more active macroblock in this row.
if (col == mb_cols)
break;
if (col == mb_cols) break;
// Find the end of active region in this row.
active_end = col;
for (; active_end < mb_cols; ++active_end) {
if (!active_map[active_end])
break;
if (!active_map[active_end]) break;
}
// Only copy this active region.
vp10_copy_and_extend_frame_with_rect(src, &buf->img,
row << 4,
col << 4, 16,
(active_end - col) << 4);
vp10_copy_and_extend_frame_with_rect(src, &buf->img, row << 4, col << 4,
16, (active_end - col) << 4);
// Start again from the end of this active region.
col = active_end;
@ -172,14 +158,13 @@ int vp10_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
if (larger_dimensions) {
YV12_BUFFER_CONFIG new_img;
memset(&new_img, 0, sizeof(new_img));
if (vpx_alloc_frame_buffer(&new_img,
width, height, subsampling_x, subsampling_y,
if (vpx_alloc_frame_buffer(&new_img, width, height, subsampling_x,
subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
use_highbitdepth,
#endif
VPX_ENC_BORDER_IN_PIXELS,
0))
return 1;
VPX_ENC_BORDER_IN_PIXELS, 0))
return 1;
vpx_free_frame_buffer(&buf->img);
buf->img = new_img;
} else if (new_dimensions) {
@ -202,9 +187,8 @@ int vp10_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
return 0;
}
struct lookahead_entry *vp10_lookahead_pop(struct lookahead_ctx *ctx,
int drain) {
int drain) {
struct lookahead_entry *buf = NULL;
if (ctx && ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) {
@ -214,25 +198,22 @@ struct lookahead_entry *vp10_lookahead_pop(struct lookahead_ctx *ctx,
return buf;
}
struct lookahead_entry *vp10_lookahead_peek(struct lookahead_ctx *ctx,
int index) {
int index) {
struct lookahead_entry *buf = NULL;
if (index >= 0) {
// Forward peek
if (index < ctx->sz) {
index += ctx->read_idx;
if (index >= ctx->max_sz)
index -= ctx->max_sz;
if (index >= ctx->max_sz) index -= ctx->max_sz;
buf = ctx->buf + index;
}
} else if (index < 0) {
// Backward peek
if (-index <= MAX_PRE_FRAMES) {
index += ctx->read_idx;
if (index < 0)
index += ctx->max_sz;
if (index < 0) index += ctx->max_sz;
buf = ctx->buf + index;
}
}
@ -240,6 +221,4 @@ struct lookahead_entry *vp10_lookahead_peek(struct lookahead_ctx *ctx,
return buf;
}
unsigned int vp10_lookahead_depth(struct lookahead_ctx *ctx) {
return ctx->sz;
}
unsigned int vp10_lookahead_depth(struct lookahead_ctx *ctx) { return ctx->sz; }

Просмотреть файл

@ -21,10 +21,10 @@ extern "C" {
#define MAX_LAG_BUFFERS 25
struct lookahead_entry {
YV12_BUFFER_CONFIG img;
int64_t ts_start;
int64_t ts_end;
unsigned int flags;
YV12_BUFFER_CONFIG img;
int64_t ts_start;
int64_t ts_end;
unsigned int flags;
};
// The max of past frames we want to keep in the queue.
@ -44,20 +44,18 @@ struct lookahead_ctx {
* may be done when buffers are enqueued.
*/
struct lookahead_ctx *vp10_lookahead_init(unsigned int width,
unsigned int height,
unsigned int subsampling_x,
unsigned int subsampling_y,
unsigned int height,
unsigned int subsampling_x,
unsigned int subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
int use_highbitdepth,
int use_highbitdepth,
#endif
unsigned int depth);
unsigned int depth);
/**\brief Destroys the lookahead stage
*/
void vp10_lookahead_destroy(struct lookahead_ctx *ctx);
/**\brief Enqueue a source buffer
*
* This function will copy the source image into a new framebuffer with
@ -74,12 +72,11 @@ void vp10_lookahead_destroy(struct lookahead_ctx *ctx);
* \param[in] active_map Map that specifies which macroblock is active
*/
int vp10_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
int64_t ts_start, int64_t ts_end,
int64_t ts_start, int64_t ts_end,
#if CONFIG_VP9_HIGHBITDEPTH
int use_highbitdepth,
int use_highbitdepth,
#endif
unsigned int flags);
unsigned int flags);
/**\brief Get the next source buffer to encode
*
@ -92,8 +89,7 @@ int vp10_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
* \retval NULL, if drain not set and queue not of the configured depth
*/
struct lookahead_entry *vp10_lookahead_pop(struct lookahead_ctx *ctx,
int drain);
int drain);
/**\brief Get a future source buffer to encode
*
@ -103,8 +99,7 @@ struct lookahead_entry *vp10_lookahead_pop(struct lookahead_ctx *ctx,
* \retval NULL, if no buffer exists at the specified index
*/
struct lookahead_entry *vp10_lookahead_peek(struct lookahead_ctx *ctx,
int index);
int index);
/**\brief Get the number of frames currently in the lookahead queue
*

Просмотреть файл

@ -22,11 +22,8 @@
#include "vp10/common/reconinter.h"
#include "vp10/common/reconintra.h"
static unsigned int do_16x16_motion_iteration(VP10_COMP *cpi,
const MV *ref_mv,
int mb_row,
int mb_col) {
static unsigned int do_16x16_motion_iteration(VP10_COMP *cpi, const MV *ref_mv,
int mb_row, int mb_col) {
MACROBLOCK *const x = &cpi->td.mb;
MACROBLOCKD *const xd = &x->e_mbd;
const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
@ -57,12 +54,11 @@ static unsigned int do_16x16_motion_iteration(VP10_COMP *cpi,
{
int distortion;
unsigned int sse;
cpi->find_fractional_mv_step(
x, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
&v_fn_ptr, 0, mv_sf->subpel_iters_per_step,
cond_cost_list(cpi, cost_list),
NULL, NULL,
&distortion, &sse, NULL, 0, 0, 0);
cpi->find_fractional_mv_step(x, ref_mv, cpi->common.allow_high_precision_mv,
x->errorperbit, &v_fn_ptr, 0,
mv_sf->subpel_iters_per_step,
cond_cost_list(cpi, cost_list), NULL, NULL,
&distortion, &sse, NULL, 0, 0, 0);
}
#if CONFIG_EXT_INTER
@ -70,7 +66,7 @@ static unsigned int do_16x16_motion_iteration(VP10_COMP *cpi,
xd->mi[0]->mbmi.mode = NEW_NEWMV;
else
#endif // CONFIG_EXT_INTER
xd->mi[0]->mbmi.mode = NEWMV;
xd->mi[0]->mbmi.mode = NEWMV;
xd->mi[0]->mbmi.mv[0] = x->best_mv;
#if CONFIG_EXT_INTER
@ -89,8 +85,8 @@ static unsigned int do_16x16_motion_iteration(VP10_COMP *cpi,
xd->plane[0].dst.buf, xd->plane[0].dst.stride);
}
static int do_16x16_motion_search(VP10_COMP *cpi, const MV *ref_mv,
int mb_row, int mb_col) {
static int do_16x16_motion_search(VP10_COMP *cpi, const MV *ref_mv, int mb_row,
int mb_col) {
MACROBLOCK *const x = &cpi->td.mb;
MACROBLOCKD *const xd = &x->e_mbd;
unsigned int err, tmp_err;
@ -114,7 +110,7 @@ static int do_16x16_motion_search(VP10_COMP *cpi, const MV *ref_mv,
// based search as well.
if (ref_mv->row != 0 || ref_mv->col != 0) {
unsigned int tmp_err;
MV zero_ref_mv = {0, 0};
MV zero_ref_mv = { 0, 0 };
tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, mb_row, mb_col);
if (tmp_err < err) {
@ -142,7 +138,7 @@ static int do_16x16_zerozero_search(VP10_COMP *cpi, int_mv *dst_mv) {
return err;
}
static int find_best_16x16_intra(VP10_COMP *cpi, PREDICTION_MODE *pbest_mode) {
MACROBLOCK *const x = &cpi->td.mb;
MACROBLOCK *const x = &cpi->td.mb;
MACROBLOCKD *const xd = &x->e_mbd;
PREDICTION_MODE best_mode = -1, mode;
unsigned int best_err = INT_MAX;
@ -153,38 +149,30 @@ static int find_best_16x16_intra(VP10_COMP *cpi, PREDICTION_MODE *pbest_mode) {
unsigned int err;
xd->mi[0]->mbmi.mode = mode;
vp10_predict_intra_block(xd, 2, 2, TX_16X16, mode,
x->plane[0].src.buf, x->plane[0].src.stride,
xd->plane[0].dst.buf, xd->plane[0].dst.stride,
0, 0, 0);
vp10_predict_intra_block(xd, 2, 2, TX_16X16, mode, x->plane[0].src.buf,
x->plane[0].src.stride, xd->plane[0].dst.buf,
xd->plane[0].dst.stride, 0, 0, 0);
err = vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
xd->plane[0].dst.buf, xd->plane[0].dst.stride);
// find best
if (err < best_err) {
best_err = err;
best_err = err;
best_mode = mode;
}
}
if (pbest_mode)
*pbest_mode = best_mode;
if (pbest_mode) *pbest_mode = best_mode;
return best_err;
}
static void update_mbgraph_mb_stats
(
VP10_COMP *cpi,
MBGRAPH_MB_STATS *stats,
YV12_BUFFER_CONFIG *buf,
int mb_y_offset,
YV12_BUFFER_CONFIG *golden_ref,
const MV *prev_golden_ref_mv,
YV12_BUFFER_CONFIG *alt_ref,
int mb_row,
int mb_col
) {
static void update_mbgraph_mb_stats(VP10_COMP *cpi, MBGRAPH_MB_STATS *stats,
YV12_BUFFER_CONFIG *buf, int mb_y_offset,
YV12_BUFFER_CONFIG *golden_ref,
const MV *prev_golden_ref_mv,
YV12_BUFFER_CONFIG *alt_ref, int mb_row,
int mb_col) {
MACROBLOCK *const x = &cpi->td.mb;
MACROBLOCKD *const xd = &x->e_mbd;
int intra_error;
@ -198,10 +186,8 @@ static void update_mbgraph_mb_stats
xd->plane[0].dst.stride = get_frame_new_buffer(cm)->y_stride;
// do intra 16x16 prediction
intra_error = find_best_16x16_intra(cpi,
&stats->ref[INTRA_FRAME].m.mode);
if (intra_error <= 0)
intra_error = 1;
intra_error = find_best_16x16_intra(cpi, &stats->ref[INTRA_FRAME].m.mode);
if (intra_error <= 0) intra_error = 1;
stats->ref[INTRA_FRAME].err = intra_error;
// Golden frame MV search, if it exists and is different than last frame
@ -209,9 +195,8 @@ static void update_mbgraph_mb_stats
int g_motion_error;
xd->plane[0].pre[0].buf = golden_ref->y_buffer + mb_y_offset;
xd->plane[0].pre[0].stride = golden_ref->y_stride;
g_motion_error = do_16x16_motion_search(cpi,
prev_golden_ref_mv,
mb_row, mb_col);
g_motion_error =
do_16x16_motion_search(cpi, prev_golden_ref_mv, mb_row, mb_col);
stats->ref[GOLDEN_FRAME].m.mv = x->best_mv;
stats->ref[GOLDEN_FRAME].err = g_motion_error;
} else {
@ -225,8 +210,8 @@ static void update_mbgraph_mb_stats
int a_motion_error;
xd->plane[0].pre[0].buf = alt_ref->y_buffer + mb_y_offset;
xd->plane[0].pre[0].stride = alt_ref->y_stride;
a_motion_error = do_16x16_zerozero_search(cpi,
&stats->ref[ALTREF_FRAME].m.mv);
a_motion_error =
do_16x16_zerozero_search(cpi, &stats->ref[ALTREF_FRAME].m.mv);
stats->ref[ALTREF_FRAME].err = a_motion_error;
} else {
@ -246,17 +231,17 @@ static void update_mbgraph_frame_stats(VP10_COMP *cpi,
int mb_col, mb_row, offset = 0;
int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0;
MV gld_top_mv = {0, 0};
MV gld_top_mv = { 0, 0 };
MODE_INFO mi_local;
vp10_zero(mi_local);
// Set up limit values for motion vectors to prevent them extending outside
// the UMV borders.
x->mv_row_min = -BORDER_MV_PIXELS_B16;
x->mv_row_max = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16;
xd->up_available = 0;
xd->plane[0].dst.stride = buf->y_stride;
xd->plane[0].pre[0].stride = buf->y_stride;
x->mv_row_min = -BORDER_MV_PIXELS_B16;
x->mv_row_max = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16;
xd->up_available = 0;
xd->plane[0].dst.stride = buf->y_stride;
xd->plane[0].pre[0].stride = buf->y_stride;
xd->plane[1].dst.stride = buf->uv_stride;
xd->mi[0] = &mi_local;
mi_local.mbmi.sb_type = BLOCK_16X16;
@ -265,41 +250,39 @@ static void update_mbgraph_frame_stats(VP10_COMP *cpi,
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
MV gld_left_mv = gld_top_mv;
int mb_y_in_offset = mb_y_offset;
int mb_y_in_offset = mb_y_offset;
int arf_y_in_offset = arf_y_offset;
int gld_y_in_offset = gld_y_offset;
// Set up limit values for motion vectors to prevent them extending outside
// the UMV borders.
x->mv_col_min = -BORDER_MV_PIXELS_B16;
x->mv_col_max = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16;
x->mv_col_min = -BORDER_MV_PIXELS_B16;
x->mv_col_max = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16;
xd->left_available = 0;
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
MBGRAPH_MB_STATS *mb_stats = &stats->mb_stats[offset + mb_col];
update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset,
golden_ref, &gld_left_mv, alt_ref,
mb_row, mb_col);
update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset, golden_ref,
&gld_left_mv, alt_ref, mb_row, mb_col);
gld_left_mv = mb_stats->ref[GOLDEN_FRAME].m.mv.as_mv;
if (mb_col == 0) {
gld_top_mv = gld_left_mv;
}
xd->left_available = 1;
mb_y_in_offset += 16;
gld_y_in_offset += 16;
arf_y_in_offset += 16;
x->mv_col_min -= 16;
x->mv_col_max -= 16;
mb_y_in_offset += 16;
gld_y_in_offset += 16;
arf_y_in_offset += 16;
x->mv_col_min -= 16;
x->mv_col_max -= 16;
}
xd->up_available = 1;
mb_y_offset += buf->y_stride * 16;
gld_y_offset += golden_ref->y_stride * 16;
if (alt_ref)
arf_y_offset += alt_ref->y_stride * 16;
x->mv_row_min -= 16;
x->mv_row_max -= 16;
offset += cm->mb_cols;
mb_y_offset += buf->y_stride * 16;
gld_y_offset += golden_ref->y_stride * 16;
if (alt_ref) arf_y_offset += alt_ref->y_stride * 16;
x->mv_row_min -= 16;
x->mv_row_max -= 16;
offset += cm->mb_cols;
}
}
@ -313,9 +296,9 @@ static void separate_arf_mbs(VP10_COMP *cpi) {
int *arf_not_zz;
CHECK_MEM_ERROR(cm, arf_not_zz,
vpx_calloc(cm->mb_rows * cm->mb_cols * sizeof(*arf_not_zz),
1));
CHECK_MEM_ERROR(
cm, arf_not_zz,
vpx_calloc(cm->mb_rows * cm->mb_cols * sizeof(*arf_not_zz), 1));
// We are not interested in results beyond the alt ref itself.
if (n_frames > cpi->rc.frames_till_gf_update_due)
@ -331,12 +314,11 @@ static void separate_arf_mbs(VP10_COMP *cpi) {
MBGRAPH_MB_STATS *mb_stats = &frame_stats->mb_stats[offset + mb_col];
int altref_err = mb_stats->ref[ALTREF_FRAME].err;
int intra_err = mb_stats->ref[INTRA_FRAME ].err;
int intra_err = mb_stats->ref[INTRA_FRAME].err;
int golden_err = mb_stats->ref[GOLDEN_FRAME].err;
// Test for altref vs intra and gf and that its mv was 0,0.
if (altref_err > 1000 ||
altref_err > intra_err ||
if (altref_err > 1000 || altref_err > intra_err ||
altref_err > golden_err) {
arf_not_zz[offset + mb_col]++;
}
@ -391,11 +373,9 @@ void vp10_update_mbgraph_stats(VP10_COMP *cpi) {
// we need to look ahead beyond where the ARF transitions into
// being a GF - so exit if we don't look ahead beyond that
if (n_frames <= cpi->rc.frames_till_gf_update_due)
return;
if (n_frames <= cpi->rc.frames_till_gf_update_due) return;
if (n_frames > MAX_LAG_BUFFERS)
n_frames = MAX_LAG_BUFFERS;
if (n_frames > MAX_LAG_BUFFERS) n_frames = MAX_LAG_BUFFERS;
cpi->mbgraph_n_frames = n_frames;
for (i = 0; i < n_frames; i++) {
@ -414,8 +394,8 @@ void vp10_update_mbgraph_stats(VP10_COMP *cpi) {
assert(q_cur != NULL);
update_mbgraph_frame_stats(cpi, frame_stats, &q_cur->img,
golden_ref, cpi->Source);
update_mbgraph_frame_stats(cpi, frame_stats, &q_cur->img, golden_ref,
cpi->Source);
}
vpx_clear_system_state();

Просмотреть файл

@ -25,9 +25,7 @@ typedef struct {
} ref[TOTAL_REFS_PER_FRAME];
} MBGRAPH_MB_STATS;
typedef struct {
MBGRAPH_MB_STATS *mb_stats;
} MBGRAPH_FRAME_STATS;
typedef struct { MBGRAPH_MB_STATS *mb_stats; } MBGRAPH_FRAME_STATS;
struct VP10_COMP;

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_ENCODER_MCOMP_H_
#define VP10_ENCODER_MCOMP_H_
@ -26,7 +25,7 @@ extern "C" {
// Enable the use of motion vector in range [-1023, 1023].
#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS - 1)) - 1)
// Maximum size of the first step in full pel units
#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1))
#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS - 1))
// Allowed motion vector pixel distance outside image border
// for Block_16x16
#define BORDER_MV_PIXELS_B16 (16 + VPX_INTERP_EXTEND)
@ -44,94 +43,71 @@ typedef struct search_site_config {
} search_site_config;
void vp10_init_dsmotion_compensation(search_site_config *cfg, int stride);
void vp10_init3smotion_compensation(search_site_config *cfg, int stride);
void vp10_init3smotion_compensation(search_site_config *cfg, int stride);
void vp10_set_mv_search_range(MACROBLOCK *x, const MV *mv);
int vp10_mv_bit_cost(const MV *mv, const MV *ref,
const int *mvjcost, int *mvcost[2], int weight);
int vp10_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
int *mvcost[2], int weight);
// Utility to compute variance + MV rate cost for a given MV
int vp10_get_mvpred_var(const MACROBLOCK *x,
const MV *best_mv, const MV *center_mv,
const vpx_variance_fn_ptr_t *vfp,
int use_mvcost);
int vp10_get_mvpred_av_var(const MACROBLOCK *x,
const MV *best_mv, const MV *center_mv,
const uint8_t *second_pred,
const vpx_variance_fn_ptr_t *vfp,
int use_mvcost);
int vp10_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
const MV *center_mv, const vpx_variance_fn_ptr_t *vfp,
int use_mvcost);
int vp10_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
const MV *center_mv, const uint8_t *second_pred,
const vpx_variance_fn_ptr_t *vfp, int use_mvcost);
struct VP10_COMP;
struct SPEED_FEATURES;
int vp10_init_search_range(int size);
int vp10_refining_search_sad(struct macroblock *x,
struct mv *ref_mv,
int vp10_refining_search_sad(struct macroblock *x, struct mv *ref_mv,
int sad_per_bit, int distance,
const vpx_variance_fn_ptr_t *fn_ptr,
const struct mv *center_mv);
// Runs sequence of diamond searches in smaller steps for RD.
int vp10_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x,
MV *mvp_full, int step_param,
int sadpb, int further_steps, int do_refine,
int *cost_list,
const vpx_variance_fn_ptr_t *fn_ptr,
const MV *ref_mv, MV *dst_mv);
MV *mvp_full, int step_param, int sadpb,
int further_steps, int do_refine, int *cost_list,
const vpx_variance_fn_ptr_t *fn_ptr,
const MV *ref_mv, MV *dst_mv);
// Perform integral projection based motion estimation.
unsigned int vp10_int_pro_motion_estimation(const struct VP10_COMP *cpi,
MACROBLOCK *x,
BLOCK_SIZE bsize,
MACROBLOCK *x, BLOCK_SIZE bsize,
int mi_row, int mi_col);
int vp10_hex_search(MACROBLOCK *x,
MV *start_mv,
int search_param,
int sad_per_bit,
int do_init_search,
int *cost_list,
const vpx_variance_fn_ptr_t *vfp,
int use_mvcost,
int vp10_hex_search(MACROBLOCK *x, MV *start_mv, int search_param,
int sad_per_bit, int do_init_search, int *cost_list,
const vpx_variance_fn_ptr_t *vfp, int use_mvcost,
const MV *center_mv);
typedef int (fractional_mv_step_fp) (
MACROBLOCK *x,
const MV *ref_mv,
int allow_hp,
int error_per_bit,
typedef int(fractional_mv_step_fp)(
MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
const vpx_variance_fn_ptr_t *vfp,
int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
int iters_per_step,
int *cost_list,
int *mvjcost, int *mvcost[2],
int *distortion, unsigned int *sse1,
const uint8_t *second_pred,
int w, int h, int use_upsampled_ref);
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
int *distortion, unsigned int *sse1, const uint8_t *second_pred, int w,
int h, int use_upsampled_ref);
extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree;
extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree_pruned;
extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree_pruned_more;
extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree_pruned_evenmore;
typedef int (*vp10_full_search_fn_t)(const MACROBLOCK *x,
const MV *ref_mv, int sad_per_bit,
int distance,
typedef int (*vp10_full_search_fn_t)(const MACROBLOCK *x, const MV *ref_mv,
int sad_per_bit, int distance,
const vpx_variance_fn_ptr_t *fn_ptr,
const MV *center_mv, MV *best_mv);
typedef int (*vp10_diamond_search_fn_t)(MACROBLOCK *x,
const search_site_config *cfg,
MV *ref_mv, MV *best_mv,
int search_param, int sad_per_bit,
int *num00,
const vpx_variance_fn_ptr_t *fn_ptr,
const MV *center_mv);
typedef int (*vp10_diamond_search_fn_t)(
MACROBLOCK *x, const search_site_config *cfg, MV *ref_mv, MV *best_mv,
int search_param, int sad_per_bit, int *num00,
const vpx_variance_fn_ptr_t *fn_ptr, const MV *center_mv);
int vp10_refining_search_8p_c(MACROBLOCK *x,
int error_per_bit,
int vp10_refining_search_8p_c(MACROBLOCK *x, int error_per_bit,
int search_range,
const vpx_variance_fn_ptr_t *fn_ptr,
const MV *center_mv, const uint8_t *second_pred);
@ -139,70 +115,44 @@ int vp10_refining_search_8p_c(MACROBLOCK *x,
struct VP10_COMP;
int vp10_full_pixel_search(struct VP10_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, MV *mvp_full,
int step_param, int error_per_bit,
int *cost_list, const MV *ref_mv,
BLOCK_SIZE bsize, MV *mvp_full, int step_param,
int error_per_bit, int *cost_list, const MV *ref_mv,
int var_max, int rd);
#if CONFIG_EXT_INTER
int vp10_find_best_masked_sub_pixel_tree(const MACROBLOCK *x,
const uint8_t *mask, int mask_stride,
MV *bestmv, const MV *ref_mv,
int allow_hp,
int error_per_bit,
const vpx_variance_fn_ptr_t *vfp,
int forced_stop,
int iters_per_step,
int *mvjcost, int *mvcost[2],
int *distortion,
unsigned int *sse1,
int is_second);
int vp10_find_best_masked_sub_pixel_tree_up(struct VP10_COMP *cpi,
MACROBLOCK *x,
const uint8_t *mask,
int mask_stride,
int mi_row, int mi_col,
MV *bestmv, const MV *ref_mv,
int allow_hp,
int error_per_bit,
const vpx_variance_fn_ptr_t *vfp,
int forced_stop,
int iters_per_step,
int *mvjcost, int *mvcost[2],
int *distortion,
unsigned int *sse1,
int is_second,
int use_upsampled_ref);
int vp10_find_best_masked_sub_pixel_tree(
const MACROBLOCK *x, const uint8_t *mask, int mask_stride, MV *bestmv,
const MV *ref_mv, int allow_hp, int error_per_bit,
const vpx_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
int is_second);
int vp10_find_best_masked_sub_pixel_tree_up(
struct VP10_COMP *cpi, MACROBLOCK *x, const uint8_t *mask, int mask_stride,
int mi_row, int mi_col, MV *bestmv, const MV *ref_mv, int allow_hp,
int error_per_bit, const vpx_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion,
unsigned int *sse1, int is_second, int use_upsampled_ref);
int vp10_masked_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x,
const uint8_t *mask, int mask_stride,
MV *mvp_full, int step_param,
int sadpb, int further_steps, int do_refine,
MV *mvp_full, int step_param, int sadpb,
int further_steps, int do_refine,
const vpx_variance_fn_ptr_t *fn_ptr,
const MV *ref_mv, MV *dst_mv,
int is_second);
const MV *ref_mv, MV *dst_mv, int is_second);
#endif // CONFIG_EXT_INTER
#if CONFIG_OBMC
int vp10_obmc_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x,
const int32_t *wsrc,
const int32_t *mask,
MV *mvp_full, int step_param,
int sadpb, int further_steps, int do_refine,
const int32_t *wsrc, const int32_t *mask,
MV *mvp_full, int step_param, int sadpb,
int further_steps, int do_refine,
const vpx_variance_fn_ptr_t *fn_ptr,
const MV *ref_mv, MV *dst_mv,
int is_second);
int vp10_find_best_obmc_sub_pixel_tree_up(struct VP10_COMP *cpi, MACROBLOCK *x,
const int32_t *wsrc,
const int32_t *mask,
int mi_row, int mi_col,
MV *bestmv, const MV *ref_mv,
int allow_hp, int error_per_bit,
const vpx_variance_fn_ptr_t *vfp,
int forced_stop, int iters_per_step,
int *mvjcost, int *mvcost[2],
int *distortion, unsigned int *sse1,
int is_second,
int use_upsampled_ref);
const MV *ref_mv, MV *dst_mv, int is_second);
int vp10_find_best_obmc_sub_pixel_tree_up(
struct VP10_COMP *cpi, MACROBLOCK *x, const int32_t *wsrc,
const int32_t *mask, int mi_row, int mi_col, MV *bestmv, const MV *ref_mv,
int allow_hp, int error_per_bit, const vpx_variance_fn_ptr_t *vfp,
int forced_stop, int iters_per_step, int *mvjcost, int *mvcost[2],
int *distortion, unsigned int *sse1, int is_second, int use_upsampled_ref);
#endif // CONFIG_OBMC
#ifdef __cplusplus
} // extern "C"

Просмотреть файл

@ -11,100 +11,93 @@
#include "./vp10_rtcd.h"
#include "vpx_dsp/mips/macros_msa.h"
#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \
static int64_t block_error_##BSize##size_msa(const int16_t *coeff_ptr, \
const int16_t *dq_coeff_ptr, \
int64_t *ssz) { \
int64_t err = 0; \
uint32_t loop_cnt; \
v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \
v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \
v2i64 sq_coeff_r, sq_coeff_l; \
v2i64 err0, err_dup0, err1, err_dup1; \
\
coeff = LD_SH(coeff_ptr); \
dq_coeff = LD_SH(dq_coeff_ptr); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, \
sq_coeff_r, sq_coeff_l); \
DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \
\
coeff = LD_SH(coeff_ptr + 8); \
dq_coeff = LD_SH(dq_coeff_ptr + 8); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff_ptr += 16; \
dq_coeff_ptr += 16; \
\
for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \
coeff = LD_SH(coeff_ptr); \
dq_coeff = LD_SH(dq_coeff_ptr); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff = LD_SH(coeff_ptr + 8); \
dq_coeff = LD_SH(dq_coeff_ptr + 8); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff_ptr += 16; \
dq_coeff_ptr += 16; \
} \
\
err_dup0 = __msa_splati_d(sq_coeff_r, 1); \
err_dup1 = __msa_splati_d(sq_coeff_l, 1); \
sq_coeff_r += err_dup0; \
sq_coeff_l += err_dup1; \
*ssz = __msa_copy_s_d(sq_coeff_r, 0); \
*ssz += __msa_copy_s_d(sq_coeff_l, 0); \
\
err_dup0 = __msa_splati_d(err0, 1); \
err_dup1 = __msa_splati_d(err1, 1); \
err0 += err_dup0; \
err1 += err_dup1; \
err = __msa_copy_s_d(err0, 0); \
err += __msa_copy_s_d(err1, 0); \
\
return err; \
}
#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \
static int64_t block_error_##BSize##size_msa( \
const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \
int64_t err = 0; \
uint32_t loop_cnt; \
v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \
v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \
v2i64 sq_coeff_r, sq_coeff_l; \
v2i64 err0, err_dup0, err1, err_dup1; \
\
coeff = LD_SH(coeff_ptr); \
dq_coeff = LD_SH(dq_coeff_ptr); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r, \
sq_coeff_l); \
DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \
\
coeff = LD_SH(coeff_ptr + 8); \
dq_coeff = LD_SH(dq_coeff_ptr + 8); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff_ptr += 16; \
dq_coeff_ptr += 16; \
\
for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \
coeff = LD_SH(coeff_ptr); \
dq_coeff = LD_SH(dq_coeff_ptr); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff = LD_SH(coeff_ptr + 8); \
dq_coeff = LD_SH(dq_coeff_ptr + 8); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff_ptr += 16; \
dq_coeff_ptr += 16; \
} \
\
err_dup0 = __msa_splati_d(sq_coeff_r, 1); \
err_dup1 = __msa_splati_d(sq_coeff_l, 1); \
sq_coeff_r += err_dup0; \
sq_coeff_l += err_dup1; \
*ssz = __msa_copy_s_d(sq_coeff_r, 0); \
*ssz += __msa_copy_s_d(sq_coeff_l, 0); \
\
err_dup0 = __msa_splati_d(err0, 1); \
err_dup1 = __msa_splati_d(err1, 1); \
err0 += err_dup0; \
err1 += err_dup1; \
err = __msa_copy_s_d(err0, 0); \
err += __msa_copy_s_d(err1, 0); \
\
return err; \
}
BLOCK_ERROR_BLOCKSIZE_MSA(16);
BLOCK_ERROR_BLOCKSIZE_MSA(64);
BLOCK_ERROR_BLOCKSIZE_MSA(256);
BLOCK_ERROR_BLOCKSIZE_MSA(1024);
/* clang-format off */
BLOCK_ERROR_BLOCKSIZE_MSA(16)
BLOCK_ERROR_BLOCKSIZE_MSA(64)
BLOCK_ERROR_BLOCKSIZE_MSA(256)
BLOCK_ERROR_BLOCKSIZE_MSA(1024)
/* clang-format on */
int64_t vp10_block_error_msa(const tran_low_t *coeff_ptr,
const tran_low_t *dq_coeff_ptr,
intptr_t blk_size, int64_t *ssz) {
const tran_low_t *dq_coeff_ptr, intptr_t blk_size,
int64_t *ssz) {
int64_t err;
const int16_t *coeff = (const int16_t *)coeff_ptr;
const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr;
switch (blk_size) {
case 16:
err = block_error_16size_msa(coeff, dq_coeff, ssz);
break;
case 64:
err = block_error_64size_msa(coeff, dq_coeff, ssz);
break;
case 256:
err = block_error_256size_msa(coeff, dq_coeff, ssz);
break;
case 1024:
err = block_error_1024size_msa(coeff, dq_coeff, ssz);
break;
case 16: err = block_error_16size_msa(coeff, dq_coeff, ssz); break;
case 64: err = block_error_64size_msa(coeff, dq_coeff, ssz); break;
case 256: err = block_error_256size_msa(coeff, dq_coeff, ssz); break;
case 1024: err = block_error_1024size_msa(coeff, dq_coeff, ssz); break;
default:
err = vp10_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz);
break;

Просмотреть файл

@ -159,8 +159,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
/* load input data */
LD_SH8(input, 16, l0, l1, l2, l3, l4, l5, l6, l7);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
r0, r1, r2, r3, r4, r5, r6, r7);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6,
r7);
FDCT_POSTPROC_2V_NEG_H(r0, r1);
FDCT_POSTPROC_2V_NEG_H(r2, r3);
FDCT_POSTPROC_2V_NEG_H(r4, r5);
@ -169,8 +169,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
out += 64;
LD_SH8(input + 8, 16, l8, l9, l10, l11, l12, l13, l14, l15);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
r8, r9, r10, r11, r12, r13, r14, r15);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11,
r12, r13, r14, r15);
FDCT_POSTPROC_2V_NEG_H(r8, r9);
FDCT_POSTPROC_2V_NEG_H(r10, r11);
FDCT_POSTPROC_2V_NEG_H(r12, r13);
@ -181,8 +181,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
/* load input data */
input += 128;
LD_SH8(input, 16, l0, l1, l2, l3, l4, l5, l6, l7);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
r0, r1, r2, r3, r4, r5, r6, r7);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6,
r7);
FDCT_POSTPROC_2V_NEG_H(r0, r1);
FDCT_POSTPROC_2V_NEG_H(r2, r3);
FDCT_POSTPROC_2V_NEG_H(r4, r5);
@ -191,8 +191,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
out += 64;
LD_SH8(input + 8, 16, l8, l9, l10, l11, l12, l13, l14, l15);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
r8, r9, r10, r11, r12, r13, r14, r15);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11,
r12, r13, r14, r15);
FDCT_POSTPROC_2V_NEG_H(r8, r9);
FDCT_POSTPROC_2V_NEG_H(r10, r11);
FDCT_POSTPROC_2V_NEG_H(r12, r13);
@ -339,24 +339,24 @@ static void fadst16_transpose_msa(int16_t *input, int16_t *out) {
v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15;
/* load input data */
LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11,
l4, l12, l5, l13, l6, l14, l7, l15);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
r0, r1, r2, r3, r4, r5, r6, r7);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
r8, r9, r10, r11, r12, r13, r14, r15);
LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11, l4, l12, l5, l13, l6, l14,
l7, l15);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6,
r7);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11,
r12, r13, r14, r15);
ST_SH8(r0, r8, r1, r9, r2, r10, r3, r11, out, 8);
ST_SH8(r4, r12, r5, r13, r6, r14, r7, r15, (out + 64), 8);
out += 16 * 8;
/* load input data */
input += 128;
LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11,
l4, l12, l5, l13, l6, l14, l7, l15);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
r0, r1, r2, r3, r4, r5, r6, r7);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
r8, r9, r10, r11, r12, r13, r14, r15);
LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11, l4, l12, l5, l13, l6, l14,
l7, l15);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6,
r7);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11,
r12, r13, r14, r15);
ST_SH8(r0, r8, r1, r9, r2, r10, r3, r11, out, 8);
ST_SH8(r4, r12, r5, r13, r6, r14, r7, r15, (out + 64), 8);
}
@ -371,10 +371,10 @@ static void postproc_fdct16x8_1d_row(int16_t *intermediate, int16_t *output) {
LD_SH8(temp, 16, in0, in1, in2, in3, in4, in5, in6, in7);
temp = intermediate + 8;
LD_SH8(temp, 16, in8, in9, in10, in11, in12, in13, in14, in15);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15,
in8, in9, in10, in11, in12, in13, in14, in15);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15, in8, in9,
in10, in11, in12, in13, in14, in15);
FDCT_POSTPROC_2V_NEG_H(in0, in1);
FDCT_POSTPROC_2V_NEG_H(in2, in3);
FDCT_POSTPROC_2V_NEG_H(in4, in5);
@ -383,29 +383,28 @@ static void postproc_fdct16x8_1d_row(int16_t *intermediate, int16_t *output) {
FDCT_POSTPROC_2V_NEG_H(in10, in11);
FDCT_POSTPROC_2V_NEG_H(in12, in13);
FDCT_POSTPROC_2V_NEG_H(in14, in15);
BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7,
in8, in9, in10, in11, in12, in13, in14, in15,
tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
in8, in9, in10, in11, in12, in13, in14, in15);
BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11,
in12, in13, in14, in15, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6,
tmp7, in8, in9, in10, in11, in12, in13, in14, in15);
temp = intermediate;
ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, temp, 16);
FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp0, tmp1,
tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
temp = intermediate;
LD_SH8(temp, 16, in8, in9, in10, in11, in12, in13, in14, in15);
FDCT8x16_ODD(in8, in9, in10, in11, in12, in13, in14, in15,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3,
tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3);
FDCT8x16_ODD(in8, in9, in10, in11, in12, in13, in14, in15, in0, in1, in2, in3,
in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, tmp0, in0,
tmp1, in1, tmp2, in2, tmp3, in3);
ST_SH8(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, out, 16);
TRANSPOSE8x8_SH_SH(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7,
tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7);
TRANSPOSE8x8_SH_SH(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, tmp4, in4,
tmp5, in5, tmp6, in6, tmp7, in7);
out = output + 8;
ST_SH8(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, out, 16);
}
void vp10_fht16x16_msa(const int16_t *input, int16_t *output,
int32_t stride, int32_t tx_type) {
void vp10_fht16x16_msa(const int16_t *input, int16_t *output, int32_t stride,
int32_t tx_type) {
DECLARE_ALIGNED(32, int16_t, tmp[256]);
DECLARE_ALIGNED(32, int16_t, trans_buf[256]);
DECLARE_ALIGNED(32, int16_t, tmp_buf[128]);
@ -413,35 +412,31 @@ void vp10_fht16x16_msa(const int16_t *input, int16_t *output,
int16_t *ptmpbuf = &tmp_buf[0];
int16_t *trans = &trans_buf[0];
const int32_t const_arr[29 * 4] = {
52707308, 52707308, 52707308, 52707308,
-1072430300, -1072430300, -1072430300, -1072430300,
795618043, 795618043, 795618043, 795618043,
-721080468, -721080468, -721080468, -721080468,
459094491, 459094491, 459094491, 459094491,
-970646691, -970646691, -970646691, -970646691,
1010963856, 1010963856, 1010963856, 1010963856,
-361743294, -361743294, -361743294, -361743294,
209469125, 209469125, 209469125, 209469125,
-1053094788, -1053094788, -1053094788, -1053094788,
1053160324, 1053160324, 1053160324, 1053160324,
639644520, 639644520, 639644520, 639644520,
-862444000, -862444000, -862444000, -862444000,
1062144356, 1062144356, 1062144356, 1062144356,
-157532337, -157532337, -157532337, -157532337,
260914709, 260914709, 260914709, 260914709,
-1041559667, -1041559667, -1041559667, -1041559667,
920985831, 920985831, 920985831, 920985831,
-551995675, -551995675, -551995675, -551995675,
596522295, 596522295, 596522295, 596522295,
892853362, 892853362, 892853362, 892853362,
-892787826, -892787826, -892787826, -892787826,
410925857, 410925857, 410925857, 410925857,
-992012162, -992012162, -992012162, -992012162,
992077698, 992077698, 992077698, 992077698,
759246145, 759246145, 759246145, 759246145,
-759180609, -759180609, -759180609, -759180609,
-759222975, -759222975, -759222975, -759222975,
759288511, 759288511, 759288511, 759288511 };
52707308, 52707308, 52707308, 52707308, -1072430300,
-1072430300, -1072430300, -1072430300, 795618043, 795618043,
795618043, 795618043, -721080468, -721080468, -721080468,
-721080468, 459094491, 459094491, 459094491, 459094491,
-970646691, -970646691, -970646691, -970646691, 1010963856,
1010963856, 1010963856, 1010963856, -361743294, -361743294,
-361743294, -361743294, 209469125, 209469125, 209469125,
209469125, -1053094788, -1053094788, -1053094788, -1053094788,
1053160324, 1053160324, 1053160324, 1053160324, 639644520,
639644520, 639644520, 639644520, -862444000, -862444000,
-862444000, -862444000, 1062144356, 1062144356, 1062144356,
1062144356, -157532337, -157532337, -157532337, -157532337,
260914709, 260914709, 260914709, 260914709, -1041559667,
-1041559667, -1041559667, -1041559667, 920985831, 920985831,
920985831, 920985831, -551995675, -551995675, -551995675,
-551995675, 596522295, 596522295, 596522295, 596522295,
892853362, 892853362, 892853362, 892853362, -892787826,
-892787826, -892787826, -892787826, 410925857, 410925857,
410925857, 410925857, -992012162, -992012162, -992012162,
-992012162, 992077698, 992077698, 992077698, 992077698,
759246145, 759246145, 759246145, 759246145, -759180609,
-759180609, -759180609, -759180609, -759222975, -759222975,
-759222975, -759222975, 759288511, 759288511, 759288511,
759288511
};
switch (tx_type) {
case DCT_DCT:
@ -500,8 +495,6 @@ void vp10_fht16x16_msa(const int16_t *input, int16_t *output,
fadst16_transpose_msa(tmp, output);
break;
default:
assert(0);
break;
default: assert(0); break;
}
}

Просмотреть файл

@ -14,7 +14,7 @@
#include "vp10/encoder/mips/msa/fdct_msa.h"
void vp10_fwht4x4_msa(const int16_t *input, int16_t *output,
int32_t src_stride) {
int32_t src_stride) {
v8i16 in0, in1, in2, in3, in4;
LD_SH4(input, src_stride, in0, in1, in2, in3);
@ -46,7 +46,7 @@ void vp10_fwht4x4_msa(const int16_t *input, int16_t *output,
}
void vp10_fht4x4_msa(const int16_t *input, int16_t *output, int32_t stride,
int32_t tx_type) {
int32_t tx_type) {
v8i16 in0, in1, in2, in3;
LD_SH4(input, stride, in0, in1, in2, in3);
@ -86,9 +86,7 @@ void vp10_fht4x4_msa(const int16_t *input, int16_t *output, int32_t stride,
TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
VPX_FADST4(in0, in1, in2, in3, in0, in1, in2, in3);
break;
default:
assert(0);
break;
default: assert(0); break;
}
TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);

Просмотреть файл

@ -14,7 +14,7 @@
#include "vp10/encoder/mips/msa/fdct_msa.h"
void vp10_fht8x8_msa(const int16_t *input, int16_t *output, int32_t stride,
int32_t tx_type) {
int32_t tx_type) {
v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
LD_SH8(input, stride, in0, in1, in2, in3, in4, in5, in6, in7);
@ -23,44 +23,42 @@ void vp10_fht8x8_msa(const int16_t *input, int16_t *output, int32_t stride,
switch (tx_type) {
case DCT_DCT:
VPX_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VPX_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VPX_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
in3, in4, in5, in6, in7);
VPX_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
break;
case ADST_DCT:
VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VPX_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
in3, in4, in5, in6, in7);
VPX_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
break;
case DCT_ADST:
VPX_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VPX_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
in3, in4, in5, in6, in7);
VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
break;
case ADST_ADST:
VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
break;
default:
assert(0);
VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
in3, in4, in5, in6, in7);
VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
break;
default: assert(0); break;
}
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
in4, in5, in6, in7);
SRLI_AVE_S_4V_H(in0, in1, in2, in3, in4, in5, in6, in7);
ST_SH8(in0, in1, in2, in3, in4, in5, in6, in7, output, 8);
}

Просмотреть файл

@ -15,103 +15,102 @@
#include "vpx_dsp/mips/txfm_macros_msa.h"
#include "vpx_ports/mem.h"
#define VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, \
out0, out1, out2, out3, out4, out5, out6, out7) { \
v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst4_m; \
v8i16 vec0_m, vec1_m, vec2_m, vec3_m, s0_m, s1_m; \
v8i16 coeff0_m = { cospi_2_64, cospi_6_64, cospi_10_64, cospi_14_64, \
cospi_18_64, cospi_22_64, cospi_26_64, cospi_30_64 }; \
v8i16 coeff1_m = { cospi_8_64, -cospi_8_64, cospi_16_64, -cospi_16_64, \
cospi_24_64, -cospi_24_64, 0, 0 }; \
\
SPLATI_H2_SH(coeff0_m, 0, 7, cnst0_m, cnst1_m); \
cnst2_m = -cnst0_m; \
ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
SPLATI_H2_SH(coeff0_m, 4, 3, cnst2_m, cnst3_m); \
cnst4_m = -cnst2_m; \
ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
\
ILVRL_H2_SH(in0, in7, vec1_m, vec0_m); \
ILVRL_H2_SH(in4, in3, vec3_m, vec2_m); \
DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \
cnst1_m, cnst2_m, cnst3_m, in7, in0, \
in4, in3); \
\
SPLATI_H2_SH(coeff0_m, 2, 5, cnst0_m, cnst1_m); \
cnst2_m = -cnst0_m; \
ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
SPLATI_H2_SH(coeff0_m, 6, 1, cnst2_m, cnst3_m); \
cnst4_m = -cnst2_m; \
ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
\
ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
\
DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \
cnst1_m, cnst2_m, cnst3_m, in5, in2, \
in6, in1); \
BUTTERFLY_4(in7, in0, in2, in5, s1_m, s0_m, in2, in5); \
out7 = -s0_m; \
out0 = s1_m; \
\
SPLATI_H4_SH(coeff1_m, 0, 4, 1, 5, cnst0_m, cnst1_m, cnst2_m, cnst3_m); \
\
ILVEV_H2_SH(cnst3_m, cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst2_m); \
cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
cnst1_m = cnst0_m; \
\
ILVRL_H2_SH(in4, in3, vec1_m, vec0_m); \
ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \
cnst2_m, cnst3_m, cnst1_m, out1, out6, \
s0_m, s1_m); \
\
SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \
cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
\
ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
ILVRL_H2_SH(s0_m, s1_m, vec3_m, vec2_m); \
out3 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
out4 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \
out2 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \
out5 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \
\
out1 = -out1; \
out3 = -out3; \
out5 = -out5; \
}
#define VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \
out3, out4, out5, out6, out7) \
{ \
v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst4_m; \
v8i16 vec0_m, vec1_m, vec2_m, vec3_m, s0_m, s1_m; \
v8i16 coeff0_m = { cospi_2_64, cospi_6_64, cospi_10_64, cospi_14_64, \
cospi_18_64, cospi_22_64, cospi_26_64, cospi_30_64 }; \
v8i16 coeff1_m = { cospi_8_64, -cospi_8_64, cospi_16_64, -cospi_16_64, \
cospi_24_64, -cospi_24_64, 0, 0 }; \
\
SPLATI_H2_SH(coeff0_m, 0, 7, cnst0_m, cnst1_m); \
cnst2_m = -cnst0_m; \
ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
SPLATI_H2_SH(coeff0_m, 4, 3, cnst2_m, cnst3_m); \
cnst4_m = -cnst2_m; \
ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
\
ILVRL_H2_SH(in0, in7, vec1_m, vec0_m); \
ILVRL_H2_SH(in4, in3, vec3_m, vec2_m); \
DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst1_m, \
cnst2_m, cnst3_m, in7, in0, in4, in3); \
\
SPLATI_H2_SH(coeff0_m, 2, 5, cnst0_m, cnst1_m); \
cnst2_m = -cnst0_m; \
ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
SPLATI_H2_SH(coeff0_m, 6, 1, cnst2_m, cnst3_m); \
cnst4_m = -cnst2_m; \
ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
\
ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
\
DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst1_m, \
cnst2_m, cnst3_m, in5, in2, in6, in1); \
BUTTERFLY_4(in7, in0, in2, in5, s1_m, s0_m, in2, in5); \
out7 = -s0_m; \
out0 = s1_m; \
\
SPLATI_H4_SH(coeff1_m, 0, 4, 1, 5, cnst0_m, cnst1_m, cnst2_m, cnst3_m); \
\
ILVEV_H2_SH(cnst3_m, cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst2_m); \
cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
cnst1_m = cnst0_m; \
\
ILVRL_H2_SH(in4, in3, vec1_m, vec0_m); \
ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst2_m, \
cnst3_m, cnst1_m, out1, out6, s0_m, s1_m); \
\
SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \
cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
\
ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
ILVRL_H2_SH(s0_m, s1_m, vec3_m, vec2_m); \
out3 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
out4 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \
out2 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \
out5 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \
\
out1 = -out1; \
out3 = -out3; \
out5 = -out5; \
}
#define VPX_FADST4(in0, in1, in2, in3, out0, out1, out2, out3) { \
v4i32 s0_m, s1_m, s2_m, s3_m, constant_m; \
v4i32 in0_r_m, in1_r_m, in2_r_m, in3_r_m; \
\
UNPCK_R_SH_SW(in0, in0_r_m); \
UNPCK_R_SH_SW(in1, in1_r_m); \
UNPCK_R_SH_SW(in2, in2_r_m); \
UNPCK_R_SH_SW(in3, in3_r_m); \
\
constant_m = __msa_fill_w(sinpi_4_9); \
MUL2(in0_r_m, constant_m, in3_r_m, constant_m, s1_m, s0_m); \
\
constant_m = __msa_fill_w(sinpi_1_9); \
s0_m += in0_r_m * constant_m; \
s1_m -= in1_r_m * constant_m; \
\
constant_m = __msa_fill_w(sinpi_2_9); \
s0_m += in1_r_m * constant_m; \
s1_m += in3_r_m * constant_m; \
\
s2_m = in0_r_m + in1_r_m - in3_r_m; \
\
constant_m = __msa_fill_w(sinpi_3_9); \
MUL2(in2_r_m, constant_m, s2_m, constant_m, s3_m, in1_r_m); \
\
in0_r_m = s0_m + s3_m; \
s2_m = s1_m - s3_m; \
s3_m = s1_m - s0_m + s3_m; \
\
SRARI_W4_SW(in0_r_m, in1_r_m, s2_m, s3_m, DCT_CONST_BITS); \
PCKEV_H4_SH(in0_r_m, in0_r_m, in1_r_m, in1_r_m, s2_m, s2_m, \
s3_m, s3_m, out0, out1, out2, out3); \
}
#endif /* VP10_ENCODER_MIPS_MSA_VP10_FDCT_MSA_H_ */
#define VPX_FADST4(in0, in1, in2, in3, out0, out1, out2, out3) \
{ \
v4i32 s0_m, s1_m, s2_m, s3_m, constant_m; \
v4i32 in0_r_m, in1_r_m, in2_r_m, in3_r_m; \
\
UNPCK_R_SH_SW(in0, in0_r_m); \
UNPCK_R_SH_SW(in1, in1_r_m); \
UNPCK_R_SH_SW(in2, in2_r_m); \
UNPCK_R_SH_SW(in3, in3_r_m); \
\
constant_m = __msa_fill_w(sinpi_4_9); \
MUL2(in0_r_m, constant_m, in3_r_m, constant_m, s1_m, s0_m); \
\
constant_m = __msa_fill_w(sinpi_1_9); \
s0_m += in0_r_m * constant_m; \
s1_m -= in1_r_m * constant_m; \
\
constant_m = __msa_fill_w(sinpi_2_9); \
s0_m += in1_r_m * constant_m; \
s1_m += in3_r_m * constant_m; \
\
s2_m = in0_r_m + in1_r_m - in3_r_m; \
\
constant_m = __msa_fill_w(sinpi_3_9); \
MUL2(in2_r_m, constant_m, s2_m, constant_m, s3_m, in1_r_m); \
\
in0_r_m = s0_m + s3_m; \
s2_m = s1_m - s3_m; \
s3_m = s1_m - s0_m + s3_m; \
\
SRARI_W4_SW(in0_r_m, in1_r_m, s2_m, s3_m, DCT_CONST_BITS); \
PCKEV_H4_SH(in0_r_m, in0_r_m, in1_r_m, in1_r_m, s2_m, s2_m, s3_m, s3_m, \
out0, out1, out2, out3); \
}
#endif // VP10_ENCODER_MIPS_MSA_VP10_FDCT_MSA_H_

Просмотреть файл

@ -11,12 +11,9 @@
#include "./vp10_rtcd.h"
#include "vpx_dsp/mips/macros_msa.h"
static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
uint32_t stride,
uint8_t *frm2_ptr,
int32_t filt_sth,
int32_t filt_wgt,
uint32_t *acc,
static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr, uint32_t stride,
uint8_t *frm2_ptr, int32_t filt_sth,
int32_t filt_wgt, uint32_t *acc,
uint16_t *cnt) {
uint32_t row;
uint64_t f0, f1, f2, f3;
@ -54,10 +51,10 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
UNPCK_SH_SW(diff0, diff0_r, diff0_l);
UNPCK_SH_SW(diff1, diff1_r, diff1_l);
MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l,
diff1_l, mod0_w, mod1_w, mod2_w, mod3_w);
MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3,
MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
mod0_w, mod1_w, mod2_w, mod3_w);
MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
mod1_w, mod2_w, mod3_w);
SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
diff0_r = (mod0_w < cnst16);
@ -65,8 +62,8 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
diff1_r = (mod2_w < cnst16);
diff1_l = (mod3_w < cnst16);
SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w,
mod0_w, mod1_w, mod2_w, mod3_w);
SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
mod1_w, mod2_w, mod3_w);
mod0_w = diff0_r & mod0_w;
mod1_w = diff0_l & mod1_w;
@ -85,8 +82,8 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
mod0_w, mod1_w, mod2_w, mod3_w);
ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3,
mod0_w, mod1_w, mod2_w, mod3_w);
ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
mod2_w, mod3_w);
ST_SW2(mod0_w, mod1_w, acc, 4);
acc += 8;
@ -101,10 +98,10 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
UNPCK_SH_SW(diff0, diff0_r, diff0_l);
UNPCK_SH_SW(diff1, diff1_r, diff1_l);
MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l,
diff1_l, mod0_w, mod1_w, mod2_w, mod3_w);
MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3,
MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
mod0_w, mod1_w, mod2_w, mod3_w);
MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
mod1_w, mod2_w, mod3_w);
SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
diff0_r = (mod0_w < cnst16);
@ -112,8 +109,8 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
diff1_r = (mod2_w < cnst16);
diff1_l = (mod3_w < cnst16);
SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w,
mod0_w, mod1_w, mod2_w, mod3_w);
SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
mod1_w, mod2_w, mod3_w);
mod0_w = diff0_r & mod0_w;
mod1_w = diff0_l & mod1_w;
@ -131,8 +128,8 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
mod0_w, mod1_w, mod2_w, mod3_w);
ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3,
mod0_w, mod1_w, mod2_w, mod3_w);
ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
mod2_w, mod3_w);
ST_SW2(mod0_w, mod1_w, acc, 4);
acc += 8;
@ -141,13 +138,10 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
}
}
static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
uint32_t stride,
static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr, uint32_t stride,
uint8_t *frm2_ptr,
int32_t filt_sth,
int32_t filt_wgt,
uint32_t *acc,
uint16_t *cnt) {
int32_t filt_sth, int32_t filt_wgt,
uint32_t *acc, uint16_t *cnt) {
uint32_t row;
v16i8 frm1, frm2, frm3, frm4;
v16u8 frm_r, frm_l;
@ -183,8 +177,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
UNPCK_SH_SW(diff1, diff1_r, diff1_l);
MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
mod0_w, mod1_w, mod2_w, mod3_w);
MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3,
mod0_w, mod1_w, mod2_w, mod3_w);
MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
mod1_w, mod2_w, mod3_w);
SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
diff0_r = (mod0_w < cnst16);
@ -192,8 +186,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
diff1_r = (mod2_w < cnst16);
diff1_l = (mod3_w < cnst16);
SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w,
mod0_w, mod1_w, mod2_w, mod3_w);
SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
mod1_w, mod2_w, mod3_w);
mod0_w = diff0_r & mod0_w;
mod1_w = diff0_l & mod1_w;
@ -212,8 +206,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
mod0_w, mod1_w, mod2_w, mod3_w);
ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3,
mod0_w, mod1_w, mod2_w, mod3_w);
ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
mod2_w, mod3_w);
ST_SW2(mod0_w, mod1_w, acc, 4);
acc += 8;
@ -230,8 +224,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
UNPCK_SH_SW(diff1, diff1_r, diff1_l);
MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
mod0_w, mod1_w, mod2_w, mod3_w);
MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3,
mod0_w, mod1_w, mod2_w, mod3_w);
MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
mod1_w, mod2_w, mod3_w);
SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
diff0_r = (mod0_w < cnst16);
@ -239,8 +233,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
diff1_r = (mod2_w < cnst16);
diff1_l = (mod3_w < cnst16);
SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w,
mod0_w, mod1_w, mod2_w, mod3_w);
SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
mod1_w, mod2_w, mod3_w);
mod0_w = diff0_r & mod0_w;
mod1_w = diff0_l & mod1_w;
@ -259,8 +253,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
mod0_w, mod1_w, mod2_w, mod3_w);
ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3,
mod0_w, mod1_w, mod2_w, mod3_w);
ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
mod2_w, mod3_w);
ST_SW2(mod0_w, mod1_w, acc, 4);
acc += 8;
ST_SW2(mod2_w, mod3_w, acc, 4);
@ -272,18 +266,18 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
}
void vp10_temporal_filter_apply_msa(uint8_t *frame1_ptr, uint32_t stride,
uint8_t *frame2_ptr, uint32_t blk_w,
uint32_t blk_h, int32_t strength,
int32_t filt_wgt, uint32_t *accu,
uint16_t *cnt) {
uint8_t *frame2_ptr, uint32_t blk_w,
uint32_t blk_h, int32_t strength,
int32_t filt_wgt, uint32_t *accu,
uint16_t *cnt) {
if (8 == (blk_w * blk_h)) {
temporal_filter_apply_8size_msa(frame1_ptr, stride, frame2_ptr,
strength, filt_wgt, accu, cnt);
temporal_filter_apply_8size_msa(frame1_ptr, stride, frame2_ptr, strength,
filt_wgt, accu, cnt);
} else if (16 == (blk_w * blk_h)) {
temporal_filter_apply_16size_msa(frame1_ptr, stride, frame2_ptr,
strength, filt_wgt, accu, cnt);
temporal_filter_apply_16size_msa(frame1_ptr, stride, frame2_ptr, strength,
filt_wgt, accu, cnt);
} else {
vp10_temporal_filter_apply_c(frame1_ptr, stride, frame2_ptr, blk_w, blk_h,
strength, filt_wgt, accu, cnt);
strength, filt_wgt, accu, cnt);
}
}

Просмотреть файл

@ -68,20 +68,19 @@ static void calc_centroids(const float *data, float *centroids,
for (i = 0; i < k; ++i) {
if (count[i] == 0) {
memcpy(centroids + i * dim, data + (lcg_rand16(&rand_state) % n) * dim,
sizeof(centroids[0]) * dim);
sizeof(centroids[0]) * dim);
} else {
const float norm = 1.0f / count[i];
for (j = 0; j < dim; ++j)
centroids[i * dim + j] *= norm;
for (j = 0; j < dim; ++j) centroids[i * dim + j] *= norm;
}
}
}
static float calc_total_dist(const float *data, const float *centroids,
const uint8_t *indices, int n, int k, int dim) {
const uint8_t *indices, int n, int k, int dim) {
float dist = 0;
int i;
(void) k;
(void)k;
for (i = 0; i < n; ++i)
dist += calc_dist(data + i * dim, centroids + indices[i] * dim, dim);
@ -188,5 +187,3 @@ int vp10_count_colors_highbd(const uint8_t *src8, int stride, int rows,
return n;
}
#endif // CONFIG_VP9_HIGHBITDEPTH

Просмотреть файл

@ -36,8 +36,8 @@ int vp10_get_max_filter_level(const VP10_COMP *cpi) {
}
static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
VP10_COMP *const cpi,
int filt_level, int partial_frame) {
VP10_COMP *const cpi, int filt_level,
int partial_frame) {
VP10_COMMON *const cm = &cpi->common;
int64_t filt_err;
@ -47,9 +47,8 @@ static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
#else
if (cpi->num_workers > 1)
vp10_loop_filter_frame_mt(cm->frame_to_show, cm, cpi->td.mb.e_mbd.plane,
filt_level, 1, partial_frame,
cpi->workers, cpi->num_workers,
&cpi->lf_row_sync);
filt_level, 1, partial_frame, cpi->workers,
cpi->num_workers, &cpi->lf_row_sync);
else
vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level,
1, partial_frame);
@ -110,8 +109,7 @@ int vp10_search_filter_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi,
bias = (bias * cpi->twopass.section_intra_rating) / 20;
// yx, bias less for large block size
if (cm->tx_mode != ONLY_4X4)
bias >>= 1;
if (cm->tx_mode != ONLY_4X4) bias >>= 1;
if (filt_direction <= 0 && filt_low != filt_mid) {
// Get Low filter error score
@ -162,21 +160,20 @@ int vp10_search_filter_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi,
#if !CONFIG_LOOP_RESTORATION
void vp10_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi,
LPF_PICK_METHOD method) {
LPF_PICK_METHOD method) {
VP10_COMMON *const cm = &cpi->common;
struct loopfilter *const lf = &cm->lf;
lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0
: cpi->oxcf.sharpness;
lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0 : cpi->oxcf.sharpness;
if (method == LPF_PICK_MINIMAL_LPF && lf->filter_level) {
lf->filter_level = 0;
lf->filter_level = 0;
} else if (method >= LPF_PICK_FROM_Q) {
const int min_filter_level = 0;
const int max_filter_level = vp10_get_max_filter_level(cpi);
const int q = vp10_ac_quant(cm->base_qindex, 0, cm->bit_depth);
// These values were determined by linear fitting the result of the
// searched level, filt_guess = q * 0.316206 + 3.87252
// These values were determined by linear fitting the result of the
// searched level, filt_guess = q * 0.316206 + 3.87252
#if CONFIG_VP9_HIGHBITDEPTH
int filt_guess;
switch (cm->bit_depth) {
@ -190,15 +187,15 @@ void vp10_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi,
filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22);
break;
default:
assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 "
"or VPX_BITS_12");
assert(0 &&
"bit_depth should be VPX_BITS_8, VPX_BITS_10 "
"or VPX_BITS_12");
return;
}
#else
int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18);
#endif // CONFIG_VP9_HIGHBITDEPTH
if (cm->frame_type == KEY_FRAME)
filt_guess -= 4;
if (cm->frame_type == KEY_FRAME) filt_guess -= 4;
lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level);
} else {
lf->filter_level = vp10_search_filter_level(

Просмотреть файл

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_ENCODER_PICKLPF_H_
#define VP10_ENCODER_PICKLPF_H_

Просмотреть файл

@ -29,13 +29,11 @@
#include "vp10/encoder/pickrst.h"
static int64_t try_restoration_frame(const YV12_BUFFER_CONFIG *sd,
VP10_COMP *const cpi,
RestorationInfo *rsi,
VP10_COMP *const cpi, RestorationInfo *rsi,
int partial_frame) {
VP10_COMMON *const cm = &cpi->common;
int64_t filt_err;
vp10_loop_restoration_frame(cm->frame_to_show, cm,
rsi, 1, partial_frame);
vp10_loop_restoration_frame(cm->frame_to_show, cm, rsi, 1, partial_frame);
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
filt_err = vpx_highbd_get_y_sse(sd, cm->frame_to_show);
@ -51,8 +49,7 @@ static int64_t try_restoration_frame(const YV12_BUFFER_CONFIG *sd,
return filt_err;
}
static int search_bilateral_level(const YV12_BUFFER_CONFIG *sd,
VP10_COMP *cpi,
static int search_bilateral_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi,
int filter_level, int partial_frame,
double *best_cost_ret) {
VP10_COMMON *const cm = &cpi->common;
@ -76,8 +73,8 @@ static int search_bilateral_level(const YV12_BUFFER_CONFIG *sd,
rsi.restoration_type = RESTORE_NONE;
err = try_restoration_frame(sd, cpi, &rsi, partial_frame);
bits = 0;
best_cost = RDCOST_DBL(x->rdmult, x->rddiv,
(bits << (VP9_PROB_COST_SHIFT - 4)), err);
best_cost =
RDCOST_DBL(x->rdmult, x->rddiv, (bits << (VP9_PROB_COST_SHIFT - 4)), err);
for (i = 0; i < restoration_levels; ++i) {
rsi.restoration_type = RESTORE_BILATERAL;
rsi.restoration_level = i;
@ -86,8 +83,8 @@ static int search_bilateral_level(const YV12_BUFFER_CONFIG *sd,
// when RDCOST is used. However below we just scale both in the correct
// ratios appropriately but not exactly by these values.
bits = restoration_level_bits;
cost = RDCOST_DBL(x->rdmult, x->rddiv,
(bits << (VP9_PROB_COST_SHIFT - 4)), err);
cost = RDCOST_DBL(x->rdmult, x->rddiv, (bits << (VP9_PROB_COST_SHIFT - 4)),
err);
if (cost < best_cost) {
restoration_best = i;
best_cost = cost;
@ -99,8 +96,7 @@ static int search_bilateral_level(const YV12_BUFFER_CONFIG *sd,
}
static int search_filter_bilateral_level(const YV12_BUFFER_CONFIG *sd,
VP10_COMP *cpi,
int partial_frame,
VP10_COMP *cpi, int partial_frame,
int *restoration_level,
double *best_cost_ret) {
const VP10_COMMON *const cm = &cpi->common;
@ -120,11 +116,10 @@ static int search_filter_bilateral_level(const YV12_BUFFER_CONFIG *sd,
double ss_err[MAX_LOOP_FILTER + 1];
// Set each entry to -1
for (i = 0; i <= MAX_LOOP_FILTER; ++i)
ss_err[i] = -1.0;
for (i = 0; i <= MAX_LOOP_FILTER; ++i) ss_err[i] = -1.0;
bilateral_lev = search_bilateral_level(sd, cpi, filt_mid,
partial_frame, &best_err);
bilateral_lev =
search_bilateral_level(sd, cpi, filt_mid, partial_frame, &best_err);
filt_best = filt_mid;
restoration_best = bilateral_lev;
ss_err[filt_mid] = best_err;
@ -140,14 +135,13 @@ static int search_filter_bilateral_level(const YV12_BUFFER_CONFIG *sd,
bias = (bias * cpi->twopass.section_intra_rating) / 20;
// yx, bias less for large block size
if (cm->tx_mode != ONLY_4X4)
bias /= 2;
if (cm->tx_mode != ONLY_4X4) bias /= 2;
if (filt_direction <= 0 && filt_low != filt_mid) {
// Get Low filter error score
if (ss_err[filt_low] < 0) {
bilateral_lev = search_bilateral_level(
sd, cpi, filt_low, partial_frame, &ss_err[filt_low]);
bilateral_lev = search_bilateral_level(sd, cpi, filt_low, partial_frame,
&ss_err[filt_low]);
}
// If value is close to the best so far then bias towards a lower loop
// filter value.
@ -199,15 +193,14 @@ static double find_average(uint8_t *src, int width, int height, int stride) {
double avg = 0;
int i, j;
for (i = 0; i < height; i++)
for (j = 0; j < width; j++)
sum += src[i * stride + j];
for (j = 0; j < width; j++) sum += src[i * stride + j];
avg = (double)sum / (height * width);
return avg;
}
static void compute_stats(uint8_t *dgd, uint8_t *src, int width, int height,
int dgd_stride, int src_stride,
double *M, double *H) {
int dgd_stride, int src_stride, double *M,
double *H) {
int i, j, k, l;
double Y[RESTORATION_WIN2];
const double avg = find_average(dgd, width, height, dgd_stride);
@ -238,21 +231,20 @@ static void compute_stats(uint8_t *dgd, uint8_t *src, int width, int height,
}
#if CONFIG_VP9_HIGHBITDEPTH
static double find_average_highbd(uint16_t *src,
int width, int height, int stride) {
static double find_average_highbd(uint16_t *src, int width, int height,
int stride) {
uint64_t sum = 0;
double avg = 0;
int i, j;
for (i = 0; i < height; i++)
for (j = 0; j < width; j++)
sum += src[i * stride + j];
for (j = 0; j < width; j++) sum += src[i * stride + j];
avg = (double)sum / (height * width);
return avg;
}
static void compute_stats_highbd(
uint8_t *dgd8, uint8_t *src8, int width, int height,
int dgd_stride, int src_stride, double *M, double *H) {
static void compute_stats_highbd(uint8_t *dgd8, uint8_t *src8, int width,
int height, int dgd_stride, int src_stride,
double *M, double *H) {
int i, j, k, l;
double Y[RESTORATION_WIN2];
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
@ -306,18 +298,15 @@ static int linsolve(int n, double *A, int stride, double *b, double *x) {
for (k = 0; k < n - 1; k++) {
for (i = k; i < n - 1; i++) {
c = A[(i + 1) * stride + k] / A[k * stride + k];
for (j = 0; j < n; j++)
A[(i + 1) * stride + j] -= c * A[k * stride + j];
for (j = 0; j < n; j++) A[(i + 1) * stride + j] -= c * A[k * stride + j];
b[i + 1] -= c * b[k];
}
}
// Backward substitution
for (i = n - 1; i >= 0; i--) {
if (fabs(A[i * stride + i]) < 1e-10)
return 0;
if (fabs(A[i * stride + i]) < 1e-10) return 0;
c = 0;
for (j = i + 1; j <= n - 1; j++)
c += A[i * stride + j] * x[j];
for (j = i + 1; j <= n - 1; j++) c += A[i * stride + j] * x[j];
x[i] = (b[i] - c) / A[i * stride + i];
}
return 1;
@ -335,23 +324,23 @@ static void update_a_sep_sym(double **Mc, double **Hc, double *a, double *b) {
int w, w2;
memset(A, 0, sizeof(A));
memset(B, 0, sizeof(B));
for (i = 0; i < RESTORATION_WIN; i ++) {
for (i = 0; i < RESTORATION_WIN; i++) {
int j;
for (j = 0; j < RESTORATION_WIN; ++j) {
const int jj = wrap_index(j);
A[jj] += Mc[i][j] * b[i];
}
}
for (i = 0; i < RESTORATION_WIN; i ++) {
for (j = 0; j < RESTORATION_WIN; j ++) {
for (i = 0; i < RESTORATION_WIN; i++) {
for (j = 0; j < RESTORATION_WIN; j++) {
int k, l;
for (k = 0; k < RESTORATION_WIN; ++k)
for (l = 0; l < RESTORATION_WIN; ++l) {
const int kk = wrap_index(k);
const int ll = wrap_index(l);
B[ll * RESTORATION_HALFWIN1 + kk] +=
Hc[j * RESTORATION_WIN + i][k * RESTORATION_WIN2 + l] *
b[i] * b[j];
Hc[j * RESTORATION_WIN + i][k * RESTORATION_WIN2 + l] * b[i] *
b[j];
}
}
}
@ -359,8 +348,8 @@ static void update_a_sep_sym(double **Mc, double **Hc, double *a, double *b) {
w = RESTORATION_WIN;
w2 = (w >> 1) + 1;
for (i = 0; i < w2 - 1; ++i)
A[i] -= A[w2 - 1] * 2 + B[i * w2 + w2 - 1]
- 2 * B[(w2 - 1) * w2 + (w2 - 1)];
A[i] -=
A[w2 - 1] * 2 + B[i * w2 + w2 - 1] - 2 * B[(w2 - 1) * w2 + (w2 - 1)];
for (i = 0; i < w2 - 1; ++i)
for (j = 0; j < w2 - 1; ++j)
B[i * w2 + j] -= 2 * (B[i * w2 + (w2 - 1)] + B[(w2 - 1) * w2 + j] -
@ -383,11 +372,10 @@ static void update_b_sep_sym(double **Mc, double **Hc, double *a, double *b) {
int w, w2;
memset(A, 0, sizeof(A));
memset(B, 0, sizeof(B));
for (i = 0; i < RESTORATION_WIN; i ++) {
for (i = 0; i < RESTORATION_WIN; i++) {
int j;
const int ii = wrap_index(i);
for (j = 0; j < RESTORATION_WIN; j ++)
A[ii] += Mc[i][j] * a[j];
for (j = 0; j < RESTORATION_WIN; j++) A[ii] += Mc[i][j] * a[j];
}
for (i = 0; i < RESTORATION_WIN; i++) {
@ -398,16 +386,16 @@ static void update_b_sep_sym(double **Mc, double **Hc, double *a, double *b) {
for (k = 0; k < RESTORATION_WIN; ++k)
for (l = 0; l < RESTORATION_WIN; ++l)
B[jj * RESTORATION_HALFWIN1 + ii] +=
Hc[i * RESTORATION_WIN + j][k * RESTORATION_WIN2 + l] *
a[k] * a[l];
Hc[i * RESTORATION_WIN + j][k * RESTORATION_WIN2 + l] * a[k] *
a[l];
}
}
// Normalization enforcement in the system of equations itself
w = RESTORATION_WIN;
w2 = RESTORATION_HALFWIN1;
for (i = 0; i < w2 - 1; ++i)
A[i] -= A[w2 - 1] * 2 + B[i * w2 + w2 - 1]
- 2 * B[(w2 - 1) * w2 + (w2 - 1)];
A[i] -=
A[w2 - 1] * 2 + B[i * w2 + w2 - 1] - 2 * B[(w2 - 1) * w2 + (w2 - 1)];
for (i = 0; i < w2 - 1; ++i)
for (j = 0; j < w2 - 1; ++j)
B[i * w2 + j] -= 2 * (B[i * w2 + (w2 - 1)] + B[(w2 - 1) * w2 + j] -
@ -422,10 +410,10 @@ static void update_b_sep_sym(double **Mc, double **Hc, double *a, double *b) {
}
}
static int wiener_decompose_sep_sym(double *M, double *H,
double *a, double *b) {
static int wiener_decompose_sep_sym(double *M, double *H, double *a,
double *b) {
static const double init_filt[RESTORATION_WIN] = {
0.035623, -0.127154, 0.211436, 0.760190, 0.211436, -0.127154, 0.035623,
0.035623, -0.127154, 0.211436, 0.760190, 0.211436, -0.127154, 0.035623,
};
int i, j, iter;
double *Hc[RESTORATION_WIN2];
@ -452,7 +440,7 @@ static int wiener_decompose_sep_sym(double *M, double *H,
// Computes the function x'*A*x - x'*b for the learned filters, and compares
// against identity filters; Final score is defined as the difference between
// the function values
static double compute_score(double *M, double *H, int *vfilt, int *hfilt) {
static double compute_score(double *M, double *H, int *vfilt, int *hfilt) {
double ab[RESTORATION_WIN * RESTORATION_WIN];
int i, k, l;
double P = 0, Q = 0;
@ -463,10 +451,10 @@ static int wiener_decompose_sep_sym(double *M, double *H,
w = RESTORATION_WIN;
a[RESTORATION_HALFWIN] = b[RESTORATION_HALFWIN] = 1.0;
for (i = 0; i < RESTORATION_HALFWIN; ++i) {
a[i] = a[RESTORATION_WIN - i - 1 ] =
(double) vfilt[i] / RESTORATION_FILT_STEP;
b[i] = b[RESTORATION_WIN - i - 1 ] =
(double) hfilt[i] / RESTORATION_FILT_STEP;
a[i] = a[RESTORATION_WIN - i - 1] =
(double)vfilt[i] / RESTORATION_FILT_STEP;
b[i] = b[RESTORATION_WIN - i - 1] =
(double)hfilt[i] / RESTORATION_FILT_STEP;
a[RESTORATION_HALFWIN] -= 2 * a[i];
b[RESTORATION_HALFWIN] -= 2 * b[i];
}
@ -477,8 +465,7 @@ static int wiener_decompose_sep_sym(double *M, double *H,
}
for (k = 0; k < w * w; ++k) {
P += ab[k] * M[k];
for (l = 0; l < w * w; ++l)
Q += ab[k] * H[k * w * w + l] * ab[l];
for (l = 0; l < w * w; ++l) Q += ab[k] * H[k * w * w + l] * ab[l];
}
Score = Q - 2 * P;
@ -490,7 +477,7 @@ static int wiener_decompose_sep_sym(double *M, double *H,
}
#define CLIP(x, lo, hi) ((x) < (lo) ? (lo) : (x) > (hi) ? (hi) : (x))
#define RINT(x) ((x) < 0 ? (int)((x) - 0.5) : (int)((x) + 0.5))
#define RINT(x) ((x) < 0 ? (int)((x)-0.5) : (int)((x) + 0.5))
static void quantize_sym_filter(double *f, int *fi) {
int i;
@ -503,10 +490,8 @@ static void quantize_sym_filter(double *f, int *fi) {
fi[2] = CLIP(fi[2], WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_MAXV);
}
static int search_wiener_filter(const YV12_BUFFER_CONFIG *src,
VP10_COMP *cpi,
int filter_level,
int partial_frame,
static int search_wiener_filter(const YV12_BUFFER_CONFIG *src, VP10_COMP *cpi,
int filter_level, int partial_frame,
int *vfilter, int *hfilter,
double *best_cost_ret) {
VP10_COMMON *const cm = &cpi->common;
@ -539,8 +524,8 @@ static int search_wiener_filter(const YV12_BUFFER_CONFIG *src,
rsi.restoration_type = RESTORE_NONE;
err = try_restoration_frame(src, cpi, &rsi, partial_frame);
bits = 0;
cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv,
(bits << (VP9_PROB_COST_SHIFT - 4)), err);
cost_norestore =
RDCOST_DBL(x->rdmult, x->rddiv, (bits << (VP9_PROB_COST_SHIFT - 4)), err);
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth)
@ -548,8 +533,8 @@ static int search_wiener_filter(const YV12_BUFFER_CONFIG *src,
dgd_stride, src_stride, M, H);
else
#endif // CONFIG_VP9_HIGHBITDEPTH
compute_stats(dgd->y_buffer, src->y_buffer, width, height,
dgd_stride, src_stride, M, H);
compute_stats(dgd->y_buffer, src->y_buffer, width, height, dgd_stride,
src_stride, M, H);
if (!wiener_decompose_sep_sym(M, H, vfilterd, hfilterd)) {
*best_cost_ret = DBL_MAX;
@ -564,8 +549,7 @@ static int search_wiener_filter(const YV12_BUFFER_CONFIG *src,
score = compute_score(M, H, vfilter, hfilter);
if (score > 0.0) {
int i;
for (i = 0; i < RESTORATION_HALFWIN; ++i)
vfilter[i] = hfilter[i] = 0;
for (i = 0; i < RESTORATION_HALFWIN; ++i) vfilter[i] = hfilter[i] = 0;
rsi.restoration_type = RESTORE_NONE;
if (best_cost_ret) *best_cost_ret = cost_norestore;
vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
@ -577,8 +561,8 @@ static int search_wiener_filter(const YV12_BUFFER_CONFIG *src,
memcpy(rsi.hfilter, hfilter, sizeof(rsi.hfilter));
err = try_restoration_frame(src, cpi, &rsi, partial_frame);
bits = WIENER_FILT_BITS;
cost_wiener = RDCOST_DBL(x->rdmult, x->rddiv,
(bits << (VP9_PROB_COST_SHIFT - 4)), err);
cost_wiener =
RDCOST_DBL(x->rdmult, x->rddiv, (bits << (VP9_PROB_COST_SHIFT - 4)), err);
vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
@ -591,8 +575,8 @@ static int search_wiener_filter(const YV12_BUFFER_CONFIG *src,
}
}
void vp10_pick_filter_restoration(
const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi, LPF_PICK_METHOD method) {
void vp10_pick_filter_restoration(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi,
LPF_PICK_METHOD method) {
VP10_COMMON *const cm = &cpi->common;
struct loopfilter *const lf = &cm->lf;
int wiener_success = 0;
@ -600,18 +584,17 @@ void vp10_pick_filter_restoration(
double cost_wiener = DBL_MAX;
double cost_norestore = DBL_MAX;
lf->sharpness_level =
cm->frame_type == KEY_FRAME ? 0 : cpi->oxcf.sharpness;
lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0 : cpi->oxcf.sharpness;
if (method == LPF_PICK_MINIMAL_LPF && lf->filter_level) {
lf->filter_level = 0;
cm->rst_info.restoration_type = RESTORE_NONE;
lf->filter_level = 0;
cm->rst_info.restoration_type = RESTORE_NONE;
} else if (method >= LPF_PICK_FROM_Q) {
const int min_filter_level = 0;
const int max_filter_level = vp10_get_max_filter_level(cpi);
const int q = vp10_ac_quant(cm->base_qindex, 0, cm->bit_depth);
// These values were determined by linear fitting the result of the
// searched level, filt_guess = q * 0.316206 + 3.87252
// These values were determined by linear fitting the result of the
// searched level, filt_guess = q * 0.316206 + 3.87252
#if CONFIG_VP9_HIGHBITDEPTH
int filt_guess;
switch (cm->bit_depth) {
@ -625,15 +608,15 @@ void vp10_pick_filter_restoration(
filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22);
break;
default:
assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 "
"or VPX_BITS_12");
assert(0 &&
"bit_depth should be VPX_BITS_8, VPX_BITS_10 "
"or VPX_BITS_12");
return;
}
#else
int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18);
#endif // CONFIG_VP9_HIGHBITDEPTH
if (cm->frame_type == KEY_FRAME)
filt_guess -= 4;
if (cm->frame_type == KEY_FRAME) filt_guess -= 4;
lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level);
cm->rst_info.restoration_level = search_bilateral_level(
sd, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE,

Просмотреть файл

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_ENCODER_PICKRST_H_
#define VP10_ENCODER_PICKRST_H_
@ -21,8 +20,8 @@ extern "C" {
struct yv12_buffer_config;
struct VP10_COMP;
void vp10_pick_filter_restoration(
const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi, LPF_PICK_METHOD method);
void vp10_pick_filter_restoration(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi,
LPF_PICK_METHOD method);
#ifdef __cplusplus
} // extern "C"

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -19,9 +19,7 @@
extern "C" {
#endif
typedef struct QUANT_PARAM {
int log_scale;
} QUANT_PARAM;
typedef struct QUANT_PARAM { int log_scale; } QUANT_PARAM;
typedef void (*VP10_QUANT_FACADE)(const tran_low_t *coeff_ptr,
intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
@ -33,12 +31,12 @@ typedef void (*VP10_QUANT_FACADE)(const tran_low_t *coeff_ptr,
typedef struct {
#if CONFIG_NEW_QUANT
DECLARE_ALIGNED(16, tran_low_t,
y_cuml_bins_nuq[QUANT_PROFILES][QINDEX_RANGE][COEF_BANDS]
[NUQ_KNOTS]);
DECLARE_ALIGNED(16, tran_low_t,
uv_cuml_bins_nuq[QUANT_PROFILES][QINDEX_RANGE][COEF_BANDS]
[NUQ_KNOTS]);
DECLARE_ALIGNED(
16, tran_low_t,
y_cuml_bins_nuq[QUANT_PROFILES][QINDEX_RANGE][COEF_BANDS][NUQ_KNOTS]);
DECLARE_ALIGNED(
16, tran_low_t,
uv_cuml_bins_nuq[QUANT_PROFILES][QINDEX_RANGE][COEF_BANDS][NUQ_KNOTS]);
#endif // CONFIG_NEW_QUANT
// 0: dc 1: ac 2-8: ac repeated to SIMD width
DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
@ -97,47 +95,30 @@ void vp10_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const scan_order *sc, const QUANT_PARAM *qparam);
#if CONFIG_NEW_QUANT
void quantize_dc_nuq(const tran_low_t *coeff_ptr,
intptr_t n_coeffs,
int skip_block,
const int16_t quant,
const int16_t quant_shift,
const int16_t dequant,
void quantize_dc_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t quant,
const int16_t quant_shift, const int16_t dequant,
const tran_low_t *cuml_bins_ptr,
const tran_low_t *dequant_val,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
uint16_t *eob_ptr);
void quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr,
intptr_t n_coeffs,
int skip_block,
const int16_t quant,
const int16_t quant_shift,
const int16_t dequant,
const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr);
void quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t quant,
const int16_t quant_shift, const int16_t dequant,
const tran_low_t *cuml_bins_ptr,
const tran_low_t *dequant_val,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
uint16_t *eob_ptr);
void quantize_dc_fp_nuq(const tran_low_t *coeff_ptr,
intptr_t n_coeffs,
int skip_block,
const int16_t quant,
const int16_t dequant,
const tran_low_t *cuml_bins_ptr,
const tran_low_t *dequant_val,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
uint16_t *eob_ptr);
void quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr,
intptr_t n_coeffs,
int skip_block,
const int16_t quant,
void quantize_dc_fp_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t quant,
const int16_t dequant, const tran_low_t *cuml_bins_ptr,
const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr);
void quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t quant,
const int16_t dequant,
const tran_low_t *cuml_bins_ptr,
const tran_low_t *dequant_val,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
uint16_t *eob_ptr);
#endif // CONFIG_NEW_QUANT
@ -162,55 +143,36 @@ void vp10_highbd_quantize_dc_facade(
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc,
const QUANT_PARAM *qparam);
void vp10_highbd_quantize_dc(const tran_low_t *coeff_ptr,
int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr,
const int log_scale);
void vp10_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
int skip_block, const int16_t *round_ptr,
const int16_t quant, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr,
uint16_t *eob_ptr, const int log_scale);
#if CONFIG_NEW_QUANT
void highbd_quantize_dc_nuq(const tran_low_t *coeff_ptr,
intptr_t n_coeffs,
int skip_block,
const int16_t quant,
const int16_t quant_shift,
const int16_t dequant,
void highbd_quantize_dc_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t quant,
const int16_t quant_shift, const int16_t dequant,
const tran_low_t *cuml_bins_ptr,
const tran_low_t *dequant_val,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
uint16_t *eob_ptr);
void highbd_quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr,
intptr_t n_coeffs,
int skip_block,
const int16_t quant,
const int16_t quant_shift,
const int16_t dequant,
const tran_low_t *cuml_bins_ptr,
const tran_low_t *dequant_val,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
uint16_t *eob_ptr);
void highbd_quantize_dc_fp_nuq(const tran_low_t *coeff_ptr,
intptr_t n_coeffs,
int skip_block,
const int16_t quant,
void highbd_quantize_dc_32x32_nuq(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
const int16_t quant, const int16_t quant_shift, const int16_t dequant,
const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr);
void highbd_quantize_dc_fp_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t quant,
const int16_t dequant,
const tran_low_t *cuml_bins_ptr,
const tran_low_t *dequant_val,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
uint16_t *eob_ptr);
void highbd_quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr,
intptr_t n_coeffs,
int skip_block,
const int16_t quant,
const int16_t dequant,
const tran_low_t *cuml_bins_ptr,
const tran_low_t *dequant_val,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
uint16_t *eob_ptr);
void highbd_quantize_dc_32x32_fp_nuq(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
const int16_t quant, const int16_t dequant, const tran_low_t *cuml_bins_ptr,
const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr);
#endif // CONFIG_NEW_QUANT
#endif // CONFIG_VP9_HIGHBITDEPTH

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_ENCODER_RATECTRL_H_
#define VP10_ENCODER_RATECTRL_H_
@ -22,11 +21,11 @@ extern "C" {
#endif
// Bits Per MB at different Q (Multiplied by 512)
#define BPER_MB_NORMBITS 9
#define BPER_MB_NORMBITS 9
#define MIN_GF_INTERVAL 4
#define MAX_GF_INTERVAL 16
#define FIXED_GF_INTERVAL 8 // Used in some testing modes only
#define MIN_GF_INTERVAL 4
#define MAX_GF_INTERVAL 16
#define FIXED_GF_INTERVAL 8 // Used in some testing modes only
#if CONFIG_EXT_REFS
typedef enum {
@ -61,25 +60,25 @@ typedef enum {
// e.g. 24 => 16/24 = 2/3 of native size. The restriction to 1/16th is
// intended to match the capabilities of the normative scaling filters,
// giving precedence to the up-scaling accuracy.
static const int frame_scale_factor[FRAME_SCALE_STEPS] = {16, 24};
static const int frame_scale_factor[FRAME_SCALE_STEPS] = { 16, 24 };
// Multiplier of the target rate to be used as threshold for triggering scaling.
static const double rate_thresh_mult[FRAME_SCALE_STEPS] = {1.0, 2.0};
static const double rate_thresh_mult[FRAME_SCALE_STEPS] = { 1.0, 2.0 };
// Scale dependent Rate Correction Factor multipliers. Compensates for the
// greater number of bits per pixel generated in down-scaled frames.
static const double rcf_mult[FRAME_SCALE_STEPS] = {1.0, 2.0};
static const double rcf_mult[FRAME_SCALE_STEPS] = { 1.0, 2.0 };
typedef struct {
// Rate targetting variables
int base_frame_target; // A baseline frame target before adjustment
// for previous under or over shoot.
int this_frame_target; // Actual frame target after rc adjustment.
int base_frame_target; // A baseline frame target before adjustment
// for previous under or over shoot.
int this_frame_target; // Actual frame target after rc adjustment.
int projected_frame_size;
int sb64_target_rate;
int last_q[FRAME_TYPES]; // Separate values for Intra/Inter
int last_boosted_qindex; // Last boosted GF/KF/ARF q
int last_kf_qindex; // Q index of the last key frame coded.
int last_q[FRAME_TYPES]; // Separate values for Intra/Inter
int last_boosted_qindex; // Last boosted GF/KF/ARF q
int last_kf_qindex; // Q index of the last key frame coded.
int gfu_boost;
int last_boost;
@ -172,17 +171,18 @@ struct VP10_COMP;
struct VP10EncoderConfig;
void vp10_rc_init(const struct VP10EncoderConfig *oxcf, int pass,
RATE_CONTROL *rc);
RATE_CONTROL *rc);
int vp10_estimate_bits_at_q(FRAME_TYPE frame_kind, int q, int mbs,
double correction_factor,
vpx_bit_depth_t bit_depth);
double correction_factor,
vpx_bit_depth_t bit_depth);
double vp10_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth);
void vp10_rc_init_minq_luts(void);
int vp10_rc_get_default_min_gf_interval(int width, int height, double framerate);
int vp10_rc_get_default_min_gf_interval(int width, int height,
double framerate);
// Note vp10_rc_get_default_max_gf_interval() requires the min_gf_interval to
// be passed in to ensure that the max_gf_interval returned is at least as bis
// as that.
@ -230,28 +230,27 @@ int vp10_rc_drop_frame(struct VP10_COMP *cpi);
// Computes frame size bounds.
void vp10_rc_compute_frame_size_bounds(const struct VP10_COMP *cpi,
int this_frame_target,
int *frame_under_shoot_limit,
int *frame_over_shoot_limit);
int this_frame_target,
int *frame_under_shoot_limit,
int *frame_over_shoot_limit);
// Picks q and q bounds given the target for bits
int vp10_rc_pick_q_and_bounds(const struct VP10_COMP *cpi,
int *bottom_index,
int *top_index);
int vp10_rc_pick_q_and_bounds(const struct VP10_COMP *cpi, int *bottom_index,
int *top_index);
// Estimates q to achieve a target bits per frame
int vp10_rc_regulate_q(const struct VP10_COMP *cpi, int target_bits_per_frame,
int active_best_quality, int active_worst_quality);
int active_best_quality, int active_worst_quality);
// Estimates bits per mb for a given qindex and correction factor.
int vp10_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex,
double correction_factor, vpx_bit_depth_t bit_depth);
double correction_factor, vpx_bit_depth_t bit_depth);
// Clamping utilities for bitrate targets for iframes and pframes.
int vp10_rc_clamp_iframe_target_size(const struct VP10_COMP *const cpi,
int target);
int target);
int vp10_rc_clamp_pframe_target_size(const struct VP10_COMP *const cpi,
int target);
int target);
// Utility to set frame_target into the RATE_CONTROL structure
// This function is called only from the vp10_rc_get_..._params() functions.
void vp10_rc_set_frame_target(struct VP10_COMP *cpi, int target);
@ -259,20 +258,20 @@ void vp10_rc_set_frame_target(struct VP10_COMP *cpi, int target);
// Computes a q delta (in "q index" terms) to get from a starting q value
// to a target q value
int vp10_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget,
vpx_bit_depth_t bit_depth);
vpx_bit_depth_t bit_depth);
// Computes a q delta (in "q index" terms) to get from a starting q value
// to a value that should equate to the given rate ratio.
int vp10_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
int qindex, double rate_target_ratio,
vpx_bit_depth_t bit_depth);
int qindex, double rate_target_ratio,
vpx_bit_depth_t bit_depth);
int vp10_frame_type_qdelta(const struct VP10_COMP *cpi, int rf_level, int q);
void vp10_rc_update_framerate(struct VP10_COMP *cpi);
void vp10_rc_set_gf_interval_range(const struct VP10_COMP *const cpi,
RATE_CONTROL *const rc);
RATE_CONTROL *const rc);
void vp10_set_target_rate(struct VP10_COMP *cpi);

Просмотреть файл

@ -40,7 +40,7 @@
#include "vp10/encoder/rd.h"
#include "vp10/encoder/tokenize.h"
#define RD_THRESH_POW 1.25
#define RD_THRESH_POW 1.25
// Factor to weigh the rate for switchable interp filters.
#define SWITCHABLE_INTERP_RATE_FACTOR 1
@ -62,7 +62,7 @@ void vp10_rd_cost_init(RD_COST *rd_cost) {
// This table is used to correct for block size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32,
2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32,
#if CONFIG_EXT_PARTITION
48, 48, 64
#endif // CONFIG_EXT_PARTITION
@ -75,19 +75,20 @@ static void fill_mode_costs(VP10_COMP *cpi) {
for (i = 0; i < INTRA_MODES; ++i)
for (j = 0; j < INTRA_MODES; ++j)
vp10_cost_tokens(cpi->y_mode_costs[i][j], vp10_kf_y_mode_prob[i][j],
vp10_intra_mode_tree);
vp10_intra_mode_tree);
for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
vp10_cost_tokens(cpi->mbmode_cost[i], fc->y_mode_prob[i],
vp10_intra_mode_tree);
for (i = 0; i < INTRA_MODES; ++i)
vp10_cost_tokens(cpi->intra_uv_mode_cost[i],
fc->uv_mode_prob[i], vp10_intra_mode_tree);
vp10_cost_tokens(cpi->intra_uv_mode_cost[i], fc->uv_mode_prob[i],
vp10_intra_mode_tree);
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
vp10_cost_tokens(cpi->switchable_interp_costs[i],
fc->switchable_interp_prob[i], vp10_switchable_interp_tree);
fc->switchable_interp_prob[i],
vp10_switchable_interp_tree);
for (i = 0; i < PALETTE_BLOCK_SIZES; ++i) {
vp10_cost_tokens(cpi->palette_y_size_cost[i],
@ -136,12 +137,10 @@ static void fill_mode_costs(VP10_COMP *cpi) {
for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
for (j = 0; j < TX_TYPES; ++j)
vp10_cost_tokens(cpi->intra_tx_type_costs[i][j],
fc->intra_ext_tx_prob[i][j],
vp10_ext_tx_tree);
fc->intra_ext_tx_prob[i][j], vp10_ext_tx_tree);
}
for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
vp10_cost_tokens(cpi->inter_tx_type_costs[i],
fc->inter_ext_tx_prob[i],
vp10_cost_tokens(cpi->inter_tx_type_costs[i], fc->inter_ext_tx_prob[i],
vp10_ext_tx_tree);
}
#endif // CONFIG_EXT_TX
@ -173,10 +172,9 @@ void vp10_fill_token_costs(vp10_coeff_cost *c,
#else
vpx_prob probs[ENTROPY_NODES];
vp10_model_to_full_probs(p[t][i][j][k][l], probs);
vp10_cost_tokens((int *)c[t][i][j][k][0][l], probs,
vp10_coef_tree);
vp10_cost_tokens((int *)c[t][i][j][k][0][l], probs, vp10_coef_tree);
vp10_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
vp10_coef_tree);
vp10_coef_tree);
#endif // CONFIG_ANS
assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
c[t][i][j][k][1][l][EOB_TOKEN]);
@ -218,10 +216,8 @@ void vp10_init_me_luts(void) {
#endif
}
static const int rd_boost_factor[16] = {
64, 32, 32, 32, 24, 16, 12, 12,
8, 8, 4, 4, 2, 2, 1, 0
};
static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
8, 8, 4, 4, 2, 2, 1, 0 };
static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = {
128, 144, 128, 128, 144,
#if CONFIG_EXT_REFS
@ -235,15 +231,9 @@ int vp10_compute_rd_mult(const VP10_COMP *cpi, int qindex) {
#if CONFIG_VP9_HIGHBITDEPTH
int64_t rdmult = 0;
switch (cpi->common.bit_depth) {
case VPX_BITS_8:
rdmult = 88 * q * q / 24;
break;
case VPX_BITS_10:
rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4);
break;
case VPX_BITS_12:
rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8);
break;
case VPX_BITS_8: rdmult = 88 * q * q / 24; break;
case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4); break;
case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8); break;
default:
assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
return -1;
@ -259,8 +249,7 @@ int vp10_compute_rd_mult(const VP10_COMP *cpi, int qindex) {
rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
}
if (rdmult < 1)
rdmult = 1;
if (rdmult < 1) rdmult = 1;
return (int)rdmult;
}
@ -268,21 +257,15 @@ static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
double q;
#if CONFIG_VP9_HIGHBITDEPTH
switch (bit_depth) {
case VPX_BITS_8:
q = vp10_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
break;
case VPX_BITS_10:
q = vp10_dc_quant(qindex, 0, VPX_BITS_10) / 16.0;
break;
case VPX_BITS_12:
q = vp10_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
break;
case VPX_BITS_8: q = vp10_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break;
case VPX_BITS_10: q = vp10_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break;
case VPX_BITS_12: q = vp10_dc_quant(qindex, 0, VPX_BITS_12) / 64.0; break;
default:
assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
return -1;
}
#else
(void) bit_depth;
(void)bit_depth;
q = vp10_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
#endif // CONFIG_VP9_HIGHBITDEPTH
// TODO(debargha): Adjust the function below.
@ -321,7 +304,8 @@ static void set_block_thresholds(const VP10_COMMON *cm, RD_OPT *rd) {
for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
const int qindex =
clamp(vp10_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
cm->y_dc_delta_q, 0, MAXQ);
cm->y_dc_delta_q,
0, MAXQ);
const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
@ -332,10 +316,9 @@ static void set_block_thresholds(const VP10_COMMON *cm, RD_OPT *rd) {
if (bsize >= BLOCK_8X8) {
for (i = 0; i < MAX_MODES; ++i)
rd->threshes[segment_id][bsize][i] =
rd->thresh_mult[i] < thresh_max
? rd->thresh_mult[i] * t / 4
: INT_MAX;
rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
? rd->thresh_mult[i] * t / 4
: INT_MAX;
} else {
for (i = 0; i < MAX_REFS; ++i)
rd->threshes[segment_id][bsize][i] =
@ -357,8 +340,8 @@ void vp10_set_mvcost(MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame) {
x->mvsadcost = x->mvcost;
x->nmvjointsadcost = x->nmvjointcost;
x->nmv_vec_cost[nmv_ctx][MV_JOINT_ZERO] =
x->zero_rmv_cost[nmv_ctx][1] - x->zero_rmv_cost[nmv_ctx][0];
x->nmv_vec_cost[nmv_ctx][MV_JOINT_ZERO] =
x->zero_rmv_cost[nmv_ctx][1] - x->zero_rmv_cost[nmv_ctx][0];
}
#endif
@ -475,8 +458,8 @@ void vp10_initialize_rd_consts(VP10_COMP *cpi) {
#endif // CONFIG_EXT_INTER
#if CONFIG_OBMC || CONFIG_WARPED_MOTION
for (i = BLOCK_8X8; i < BLOCK_SIZES; i++) {
vp10_cost_tokens((int *)cpi->motvar_cost[i],
cm->fc->motvar_prob[i], vp10_motvar_tree);
vp10_cost_tokens((int *)cpi->motvar_cost[i], cm->fc->motvar_prob[i],
vp10_motvar_tree);
}
#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
}
@ -497,19 +480,15 @@ static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
// where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
// and H(x) is the binary entropy function.
static const int rate_tab_q10[] = {
65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651,
4553, 4389, 4255, 4142, 4044, 3958, 3881, 3811,
3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638,
2589, 2501, 2423, 2353, 2290, 2232, 2179, 2130,
2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199,
1159, 1086, 1021, 963, 911, 864, 821, 781,
745, 680, 623, 574, 530, 490, 455, 424,
395, 345, 304, 269, 239, 213, 190, 171,
154, 126, 104, 87, 73, 61, 52, 44,
38, 28, 21, 16, 12, 10, 8, 6,
5, 3, 2, 1, 1, 1, 0, 0,
65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142,
4044, 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353,
2290, 2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963,
911, 864, 821, 781, 745, 680, 623, 574, 530, 490, 455, 424,
395, 345, 304, 269, 239, 213, 190, 171, 154, 126, 104, 87,
73, 61, 52, 44, 38, 28, 21, 16, 12, 10, 8, 6,
5, 3, 2, 1, 1, 1, 0, 0,
};
// Normalized distortion:
// This table models the normalized distortion for a Laplacian source
@ -519,34 +498,29 @@ static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
// where x = qpstep / sqrt(variance).
// Note the actual distortion is Dn * variance.
static const int dist_tab_q10[] = {
0, 0, 1, 1, 1, 2, 2, 2,
3, 3, 4, 5, 5, 6, 7, 7,
8, 9, 11, 12, 13, 15, 16, 17,
18, 21, 24, 26, 29, 31, 34, 36,
39, 44, 49, 54, 59, 64, 69, 73,
78, 88, 97, 106, 115, 124, 133, 142,
151, 167, 184, 200, 215, 231, 245, 260,
274, 301, 327, 351, 375, 397, 418, 439,
458, 495, 528, 559, 587, 613, 637, 659,
680, 717, 749, 777, 801, 823, 842, 859,
874, 899, 919, 936, 949, 960, 969, 977,
983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5,
5, 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17,
18, 21, 24, 26, 29, 31, 34, 36, 39, 44, 49, 54,
59, 64, 69, 73, 78, 88, 97, 106, 115, 124, 133, 142,
151, 167, 184, 200, 215, 231, 245, 260, 274, 301, 327, 351,
375, 397, 418, 439, 458, 495, 528, 559, 587, 613, 637, 659,
680, 717, 749, 777, 801, 823, 842, 859, 874, 899, 919, 936,
949, 960, 969, 977, 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
};
static const int xsq_iq_q10[] = {
0, 4, 8, 12, 16, 20, 24, 28,
32, 40, 48, 56, 64, 72, 80, 88,
96, 112, 128, 144, 160, 176, 192, 208,
224, 256, 288, 320, 352, 384, 416, 448,
480, 544, 608, 672, 736, 800, 864, 928,
992, 1120, 1248, 1376, 1504, 1632, 1760, 1888,
2016, 2272, 2528, 2784, 3040, 3296, 3552, 3808,
4064, 4576, 5088, 5600, 6112, 6624, 7136, 7648,
8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688,
32736, 36832, 40928, 45024, 49120, 53216, 57312, 61408,
65504, 73696, 81888, 90080, 98272, 106464, 114656, 122848,
131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
0, 4, 8, 12, 16, 20, 24, 28, 32,
40, 48, 56, 64, 72, 80, 88, 96, 112,
128, 144, 160, 176, 192, 208, 224, 256, 288,
320, 352, 384, 416, 448, 480, 544, 608, 672,
736, 800, 864, 928, 992, 1120, 1248, 1376, 1504,
1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296,
3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136,
7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736,
36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696,
81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808,
180192, 196576, 212960, 229344, 245728,
};
const int tmp = (xsq_q10 >> 2) + 8;
const int k = get_msb(tmp) - 3;
@ -627,9 +601,7 @@ static void get_entropy_contexts_plane(
for (i = 0; i < num_4x4_h; i += 8)
t_left[i] = !!*(const uint64_t *)&left[i];
break;
default:
assert(0 && "Invalid transform size.");
break;
default: assert(0 && "Invalid transform size."); break;
}
}
@ -641,9 +613,8 @@ void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
get_entropy_contexts_plane(plane_bsize, tx_size, pd, t_above, t_left);
}
void vp10_mv_pred(VP10_COMP *cpi, MACROBLOCK *x,
uint8_t *ref_y_buffer, int ref_y_stride,
int ref_frame, BLOCK_SIZE block_size) {
void vp10_mv_pred(VP10_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
int i;
int zero_seen = 0;
int best_index = 0;
@ -653,9 +624,9 @@ void vp10_mv_pred(VP10_COMP *cpi, MACROBLOCK *x,
int near_same_nearest;
uint8_t *src_y_ptr = x->plane[0].src.buf;
uint8_t *ref_y_ptr;
const int num_mv_refs = MAX_MV_REF_CANDIDATES +
(cpi->sf.adaptive_motion_search &&
block_size < x->max_partition_size);
const int num_mv_refs =
MAX_MV_REF_CANDIDATES +
(cpi->sf.adaptive_motion_search && block_size < x->max_partition_size);
MV pred_mv[3];
pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
@ -663,25 +634,22 @@ void vp10_mv_pred(VP10_COMP *cpi, MACROBLOCK *x,
pred_mv[2] = x->pred_mv[ref_frame];
assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
near_same_nearest =
x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
// Get the sad for each candidate reference mv.
for (i = 0; i < num_mv_refs; ++i) {
const MV *this_mv = &pred_mv[i];
int fp_row, fp_col;
if (i == 1 && near_same_nearest)
continue;
if (i == 1 && near_same_nearest) continue;
fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
if (fp_row ==0 && fp_col == 0 && zero_seen)
continue;
zero_seen |= (fp_row ==0 && fp_col == 0);
if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
zero_seen |= (fp_row == 0 && fp_col == 0);
ref_y_ptr =&ref_y_buffer[ref_y_stride * fp_row + fp_col];
ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col];
// Find sad for current vector.
this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
ref_y_ptr, ref_y_stride);
@ -699,11 +667,10 @@ void vp10_mv_pred(VP10_COMP *cpi, MACROBLOCK *x,
}
void vp10_setup_pred_block(const MACROBLOCKD *xd,
struct buf_2d dst[MAX_MB_PLANE],
const YV12_BUFFER_CONFIG *src,
int mi_row, int mi_col,
const struct scale_factors *scale,
const struct scale_factors *scale_uv) {
struct buf_2d dst[MAX_MB_PLANE],
const YV12_BUFFER_CONFIG *src, int mi_row,
int mi_col, const struct scale_factors *scale,
const struct scale_factors *scale_uv) {
int i;
dst[0].buf = src->y_buffer;
@ -716,34 +683,33 @@ void vp10_setup_pred_block(const MACROBLOCKD *xd,
setup_pred_plane(dst + i, dst[i].buf,
i ? src->uv_crop_width : src->y_crop_width,
i ? src->uv_crop_height : src->y_crop_height,
dst[i].stride, mi_row, mi_col,
i ? scale_uv : scale,
dst[i].stride, mi_row, mi_col, i ? scale_uv : scale,
xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
}
}
int vp10_raster_block_offset(BLOCK_SIZE plane_bsize,
int raster_block, int stride) {
int vp10_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
int stride) {
const int bw = b_width_log2_lookup[plane_bsize];
const int y = 4 * (raster_block >> bw);
const int x = 4 * (raster_block & ((1 << bw) - 1));
return y * stride + x;
}
int16_t* vp10_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
int raster_block, int16_t *base) {
int16_t *vp10_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
int raster_block, int16_t *base) {
const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
return base + vp10_raster_block_offset(plane_bsize, raster_block, stride);
}
YV12_BUFFER_CONFIG *vp10_get_scaled_ref_frame(const VP10_COMP *cpi,
int ref_frame) {
int ref_frame) {
const VP10_COMMON *const cm = &cpi->common;
const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
return
(scaled_idx != ref_idx && scaled_idx != INVALID_IDX) ?
&cm->buffer_pool->frame_bufs[scaled_idx].buf : NULL;
return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
? &cm->buffer_pool->frame_bufs[scaled_idx].buf
: NULL;
}
#if CONFIG_DUAL_FILTER
@ -773,7 +739,7 @@ int vp10_get_switchable_rate(const VP10_COMP *cpi,
if (!vp10_is_interp_needed(xd)) return 0;
#endif // CONFIG_EXT_INTERP
return SWITCHABLE_INTERP_RATE_FACTOR *
cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
}
#endif
@ -1013,52 +979,52 @@ void vp10_set_rd_speed_thresholds(VP10_COMP *cpi) {
rd->thresh_mult[THR_D153_PRED] += 2500;
rd->thresh_mult[THR_D63_PRED] += 2500;
rd->thresh_mult[THR_D117_PRED] += 2500;
rd->thresh_mult[THR_D45_PRED ] += 2500;
rd->thresh_mult[THR_D45_PRED] += 2500;
#if CONFIG_EXT_INTER
rd->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_ZEROL] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEARL ] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEWL ] += 2000;
rd->thresh_mult[THR_COMP_INTERINTRA_NEARL] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEWL] += 2000;
#if CONFIG_EXT_REFS
rd->thresh_mult[THR_COMP_INTERINTRA_ZEROL2 ] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_ZEROL2] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEARESTL2] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEARL2 ] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEWL2 ] += 2000;
rd->thresh_mult[THR_COMP_INTERINTRA_NEARL2] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEWL2] += 2000;
rd->thresh_mult[THR_COMP_INTERINTRA_ZEROL3 ] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_ZEROL3] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEARESTL3] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEARL3 ] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEWL3 ] += 2000;
rd->thresh_mult[THR_COMP_INTERINTRA_NEARL3] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEWL3] += 2000;
#endif // CONFIG_EXT_REFS
rd->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_ZEROG] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEARG ] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEWG ] += 2000;
rd->thresh_mult[THR_COMP_INTERINTRA_NEARG] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEWG] += 2000;
#if CONFIG_EXT_REFS
rd->thresh_mult[THR_COMP_INTERINTRA_ZEROB ] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_ZEROB] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEARESTB] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEARB ] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEWB ] += 2000;
rd->thresh_mult[THR_COMP_INTERINTRA_NEARB] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEWB] += 2000;
#endif // CONFIG_EXT_REFS
rd->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_ZEROA] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEARA ] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEWA ] += 2000;
rd->thresh_mult[THR_COMP_INTERINTRA_NEARA] += 1500;
rd->thresh_mult[THR_COMP_INTERINTRA_NEWA] += 2000;
#endif // CONFIG_EXT_INTER
}
void vp10_set_rd_speed_thresholds_sub8x8(VP10_COMP *cpi) {
static const int thresh_mult[2][MAX_REFS] = {
#if CONFIG_EXT_REFS
{ 2500, 2500, 2500, 2500, 2500, 2500, 4500, 4500,
4500, 4500, 4500, 4500, 4500, 4500, 2500 },
{ 2000, 2000, 2000, 2000, 2000, 2000, 4000, 4000,
4000, 4000, 4000, 4000, 4000, 4000, 2000 }
{ 2500, 2500, 2500, 2500, 2500, 2500, 4500, 4500, 4500, 4500, 4500, 4500,
4500, 4500, 2500 },
{ 2000, 2000, 2000, 2000, 2000, 2000, 4000, 4000, 4000, 4000, 4000, 4000,
4000, 4000, 2000 }
#else
{ 2500, 2500, 2500, 4500, 4500, 2500 },
{ 2000, 2000, 2000, 4000, 4000, 2000 }
@ -1092,16 +1058,13 @@ void vp10_update_rd_thresh_fact(const VP10_COMMON *const cm,
}
int vp10_get_intra_cost_penalty(int qindex, int qdelta,
vpx_bit_depth_t bit_depth) {
vpx_bit_depth_t bit_depth) {
const int q = vp10_dc_quant(qindex, qdelta, bit_depth);
#if CONFIG_VP9_HIGHBITDEPTH
switch (bit_depth) {
case VPX_BITS_8:
return 20 * q;
case VPX_BITS_10:
return 5 * q;
case VPX_BITS_12:
return ROUND_POWER_OF_TWO(5 * q, 2);
case VPX_BITS_8: return 20 * q;
case VPX_BITS_10: return 5 * q;
case VPX_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2);
default:
assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
return -1;
@ -1110,4 +1073,3 @@ int vp10_get_intra_cost_penalty(int qindex, int qdelta,
return 20 * q;
#endif // CONFIG_VP9_HIGHBITDEPTH
}

Просмотреть файл

@ -26,8 +26,8 @@
extern "C" {
#endif
#define RDDIV_BITS 7
#define RD_EPB_SHIFT 6
#define RDDIV_BITS 7
#define RD_EPB_SHIFT 6
#define RDCOST(RM, DM, R, D) \
(ROUND_POWER_OF_TWO(((int64_t)R) * (RM), VP9_PROB_COST_SHIFT) + (D << DM))
@ -36,10 +36,10 @@ extern "C" {
(((((double)(R)) * (RM)) / (double)(1 << VP9_PROB_COST_SHIFT)) + \
((double)(D) * (1 << (DM))))
#define QIDX_SKIP_THRESH 115
#define QIDX_SKIP_THRESH 115
#define MV_COST_WEIGHT 108
#define MV_COST_WEIGHT_SUB 120
#define MV_COST_WEIGHT 108
#define MV_COST_WEIGHT_SUB 120
#define INVALID_MV 0x80008000
@ -62,13 +62,13 @@ extern "C" {
#endif // CONFIG_EXT_REFS
#if CONFIG_EXT_REFS
#define MAX_REFS 15
#define MAX_REFS 15
#else
#define MAX_REFS 6
#define MAX_REFS 6
#endif // CONFIG_EXT_REFS
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC 1
#define RD_THRESH_INC 1
// This enumerator type needs to be kept aligned with the mode order in
// const MODE_DEFINITION vp10_mode_order[MAX_MODES] used in the rd code.
@ -391,8 +391,8 @@ int vp10_compute_rd_mult(const struct VP10_COMP *cpi, int qindex);
void vp10_initialize_rd_consts(struct VP10_COMP *cpi);
void vp10_initialize_me_consts(const struct VP10_COMP *cpi,
MACROBLOCK *x, int qindex);
void vp10_initialize_me_consts(const struct VP10_COMP *cpi, MACROBLOCK *x,
int qindex);
void vp10_model_rd_from_var_lapndz(int64_t var, unsigned int n,
unsigned int qstep, int *rate,
@ -401,10 +401,10 @@ void vp10_model_rd_from_var_lapndz(int64_t var, unsigned int n,
int vp10_get_switchable_rate(const struct VP10_COMP *cpi,
const MACROBLOCKD *const xd);
int vp10_raster_block_offset(BLOCK_SIZE plane_bsize,
int raster_block, int stride);
int vp10_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
int stride);
int16_t* vp10_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
int16_t *vp10_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
int raster_block, int16_t *base);
YV12_BUFFER_CONFIG *vp10_get_scaled_ref_frame(const struct VP10_COMP *cpi,
@ -437,12 +437,11 @@ void vp10_fill_token_costs(vp10_coeff_cost *c,
static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
int thresh_fact) {
return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
}
void vp10_mv_pred(struct VP10_COMP *cpi, MACROBLOCK *x,
uint8_t *ref_y_buffer, int ref_y_stride,
int ref_frame, BLOCK_SIZE block_size);
void vp10_mv_pred(struct VP10_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
int ref_y_stride, int ref_frame, BLOCK_SIZE block_size);
static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {
x->errorperbit = rdmult >> RD_EPB_SHIFT;
@ -451,9 +450,8 @@ static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {
void vp10_setup_pred_block(const MACROBLOCKD *xd,
struct buf_2d dst[MAX_MB_PLANE],
const YV12_BUFFER_CONFIG *src,
int mi_row, int mi_col,
const struct scale_factors *scale,
const YV12_BUFFER_CONFIG *src, int mi_row,
int mi_col, const struct scale_factors *scale,
const struct scale_factors *scale_uv);
int vp10_get_intra_cost_penalty(int qindex, int qdelta,

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -26,36 +26,32 @@ struct macroblock;
struct RD_COST;
void vp10_rd_pick_intra_mode_sb(struct VP10_COMP *cpi, struct macroblock *x,
struct RD_COST *rd_cost, BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx, int64_t best_rd);
struct RD_COST *rd_cost, BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx, int64_t best_rd);
unsigned int vp10_get_sby_perpixel_variance(VP10_COMP *cpi,
const struct buf_2d *ref,
BLOCK_SIZE bs);
const struct buf_2d *ref,
BLOCK_SIZE bs);
#if CONFIG_VP9_HIGHBITDEPTH
unsigned int vp10_high_get_sby_perpixel_variance(VP10_COMP *cpi,
const struct buf_2d *ref,
BLOCK_SIZE bs, int bd);
const struct buf_2d *ref,
BLOCK_SIZE bs, int bd);
#endif
void vp10_rd_pick_inter_mode_sb(struct VP10_COMP *cpi,
struct TileDataEnc *tile_data,
struct macroblock *x,
int mi_row, int mi_col,
struct RD_COST *rd_cost,
struct TileDataEnc *tile_data,
struct macroblock *x, int mi_row, int mi_col,
struct RD_COST *rd_cost,
#if CONFIG_SUPERTX
int *returnrate_nocoef,
int *returnrate_nocoef,
#endif // CONFIG_SUPERTX
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far);
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far);
void vp10_rd_pick_inter_mode_sb_seg_skip(struct VP10_COMP *cpi,
struct TileDataEnc *tile_data,
struct macroblock *x,
struct RD_COST *rd_cost,
BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far);
void vp10_rd_pick_inter_mode_sb_seg_skip(
struct VP10_COMP *cpi, struct TileDataEnc *tile_data, struct macroblock *x,
struct RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far);
int vp10_internal_image_edge(struct VP10_COMP *cpi);
int vp10_active_h_edge(struct VP10_COMP *cpi, int mi_row, int mi_step);
@ -64,9 +60,8 @@ int vp10_active_edge_sb(struct VP10_COMP *cpi, int mi_row, int mi_col);
void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi,
struct TileDataEnc *tile_data,
struct macroblock *x,
int mi_row, int mi_col,
struct RD_COST *rd_cost,
struct macroblock *x, int mi_row,
int mi_col, struct RD_COST *rd_cost,
#if CONFIG_SUPERTX
int *returnrate_nocoef,
#endif // CONFIG_SUPERTX
@ -77,12 +72,11 @@ void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi,
#if CONFIG_VAR_TX
void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
int blk_row, int blk_col, int plane, int block,
int plane_bsize, int coeff_ctx,
int *rate, int64_t *dist, int64_t *bsse, int *skip);
int plane_bsize, int coeff_ctx, int *rate,
int64_t *dist, int64_t *bsse, int *skip);
#endif
void vp10_txfm_rd_in_plane_supertx(MACROBLOCK *x,
const VP10_COMP *cpi,
void vp10_txfm_rd_in_plane_supertx(MACROBLOCK *x, const VP10_COMP *cpi,
int *rate, int64_t *distortion,
int *skippable, int64_t *sse,
int64_t ref_best_rd, int plane,

Просмотреть файл

@ -22,198 +22,118 @@
#include "vp10/common/common.h"
#include "vp10/encoder/resize.h"
#define FILTER_BITS 7
#define FILTER_BITS 7
#define INTERP_TAPS 8
#define SUBPEL_BITS 5
#define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1)
#define INTERP_PRECISION_BITS 32
#define INTERP_TAPS 8
#define SUBPEL_BITS 5
#define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1)
#define INTERP_PRECISION_BITS 32
typedef int16_t interp_kernel[INTERP_TAPS];
// Filters for interpolation (0.5-band) - note this also filters integer pels.
static const interp_kernel filteredinterp_filters500[(1 << SUBPEL_BITS)] = {
{-3, 0, 35, 64, 35, 0, -3, 0},
{-3, -1, 34, 64, 36, 1, -3, 0},
{-3, -1, 32, 64, 38, 1, -3, 0},
{-2, -2, 31, 63, 39, 2, -3, 0},
{-2, -2, 29, 63, 41, 2, -3, 0},
{-2, -2, 28, 63, 42, 3, -4, 0},
{-2, -3, 27, 63, 43, 4, -4, 0},
{-2, -3, 25, 62, 45, 5, -4, 0},
{-2, -3, 24, 62, 46, 5, -4, 0},
{-2, -3, 23, 61, 47, 6, -4, 0},
{-2, -3, 21, 60, 49, 7, -4, 0},
{-1, -4, 20, 60, 50, 8, -4, -1},
{-1, -4, 19, 59, 51, 9, -4, -1},
{-1, -4, 17, 58, 52, 10, -4, 0},
{-1, -4, 16, 57, 53, 12, -4, -1},
{-1, -4, 15, 56, 54, 13, -4, -1},
{-1, -4, 14, 55, 55, 14, -4, -1},
{-1, -4, 13, 54, 56, 15, -4, -1},
{-1, -4, 12, 53, 57, 16, -4, -1},
{0, -4, 10, 52, 58, 17, -4, -1},
{-1, -4, 9, 51, 59, 19, -4, -1},
{-1, -4, 8, 50, 60, 20, -4, -1},
{0, -4, 7, 49, 60, 21, -3, -2},
{0, -4, 6, 47, 61, 23, -3, -2},
{0, -4, 5, 46, 62, 24, -3, -2},
{0, -4, 5, 45, 62, 25, -3, -2},
{0, -4, 4, 43, 63, 27, -3, -2},
{0, -4, 3, 42, 63, 28, -2, -2},
{0, -3, 2, 41, 63, 29, -2, -2},
{0, -3, 2, 39, 63, 31, -2, -2},
{0, -3, 1, 38, 64, 32, -1, -3},
{0, -3, 1, 36, 64, 34, -1, -3}
{ -3, 0, 35, 64, 35, 0, -3, 0 }, { -3, -1, 34, 64, 36, 1, -3, 0 },
{ -3, -1, 32, 64, 38, 1, -3, 0 }, { -2, -2, 31, 63, 39, 2, -3, 0 },
{ -2, -2, 29, 63, 41, 2, -3, 0 }, { -2, -2, 28, 63, 42, 3, -4, 0 },
{ -2, -3, 27, 63, 43, 4, -4, 0 }, { -2, -3, 25, 62, 45, 5, -4, 0 },
{ -2, -3, 24, 62, 46, 5, -4, 0 }, { -2, -3, 23, 61, 47, 6, -4, 0 },
{ -2, -3, 21, 60, 49, 7, -4, 0 }, { -1, -4, 20, 60, 50, 8, -4, -1 },
{ -1, -4, 19, 59, 51, 9, -4, -1 }, { -1, -4, 17, 58, 52, 10, -4, 0 },
{ -1, -4, 16, 57, 53, 12, -4, -1 }, { -1, -4, 15, 56, 54, 13, -4, -1 },
{ -1, -4, 14, 55, 55, 14, -4, -1 }, { -1, -4, 13, 54, 56, 15, -4, -1 },
{ -1, -4, 12, 53, 57, 16, -4, -1 }, { 0, -4, 10, 52, 58, 17, -4, -1 },
{ -1, -4, 9, 51, 59, 19, -4, -1 }, { -1, -4, 8, 50, 60, 20, -4, -1 },
{ 0, -4, 7, 49, 60, 21, -3, -2 }, { 0, -4, 6, 47, 61, 23, -3, -2 },
{ 0, -4, 5, 46, 62, 24, -3, -2 }, { 0, -4, 5, 45, 62, 25, -3, -2 },
{ 0, -4, 4, 43, 63, 27, -3, -2 }, { 0, -4, 3, 42, 63, 28, -2, -2 },
{ 0, -3, 2, 41, 63, 29, -2, -2 }, { 0, -3, 2, 39, 63, 31, -2, -2 },
{ 0, -3, 1, 38, 64, 32, -1, -3 }, { 0, -3, 1, 36, 64, 34, -1, -3 }
};
// Filters for interpolation (0.625-band) - note this also filters integer pels.
static const interp_kernel filteredinterp_filters625[(1 << SUBPEL_BITS)] = {
{-1, -8, 33, 80, 33, -8, -1, 0},
{-1, -8, 30, 80, 35, -8, -1, 1},
{-1, -8, 28, 80, 37, -7, -2, 1},
{0, -8, 26, 79, 39, -7, -2, 1},
{0, -8, 24, 79, 41, -7, -2, 1},
{0, -8, 22, 78, 43, -6, -2, 1},
{0, -8, 20, 78, 45, -5, -3, 1},
{0, -8, 18, 77, 48, -5, -3, 1},
{0, -8, 16, 76, 50, -4, -3, 1},
{0, -8, 15, 75, 52, -3, -4, 1},
{0, -7, 13, 74, 54, -3, -4, 1},
{0, -7, 11, 73, 56, -2, -4, 1},
{0, -7, 10, 71, 58, -1, -4, 1},
{1, -7, 8, 70, 60, 0, -5, 1},
{1, -6, 6, 68, 62, 1, -5, 1},
{1, -6, 5, 67, 63, 2, -5, 1},
{1, -6, 4, 65, 65, 4, -6, 1},
{1, -5, 2, 63, 67, 5, -6, 1},
{1, -5, 1, 62, 68, 6, -6, 1},
{1, -5, 0, 60, 70, 8, -7, 1},
{1, -4, -1, 58, 71, 10, -7, 0},
{1, -4, -2, 56, 73, 11, -7, 0},
{1, -4, -3, 54, 74, 13, -7, 0},
{1, -4, -3, 52, 75, 15, -8, 0},
{1, -3, -4, 50, 76, 16, -8, 0},
{1, -3, -5, 48, 77, 18, -8, 0},
{1, -3, -5, 45, 78, 20, -8, 0},
{1, -2, -6, 43, 78, 22, -8, 0},
{1, -2, -7, 41, 79, 24, -8, 0},
{1, -2, -7, 39, 79, 26, -8, 0},
{1, -2, -7, 37, 80, 28, -8, -1},
{1, -1, -8, 35, 80, 30, -8, -1},
{ -1, -8, 33, 80, 33, -8, -1, 0 }, { -1, -8, 30, 80, 35, -8, -1, 1 },
{ -1, -8, 28, 80, 37, -7, -2, 1 }, { 0, -8, 26, 79, 39, -7, -2, 1 },
{ 0, -8, 24, 79, 41, -7, -2, 1 }, { 0, -8, 22, 78, 43, -6, -2, 1 },
{ 0, -8, 20, 78, 45, -5, -3, 1 }, { 0, -8, 18, 77, 48, -5, -3, 1 },
{ 0, -8, 16, 76, 50, -4, -3, 1 }, { 0, -8, 15, 75, 52, -3, -4, 1 },
{ 0, -7, 13, 74, 54, -3, -4, 1 }, { 0, -7, 11, 73, 56, -2, -4, 1 },
{ 0, -7, 10, 71, 58, -1, -4, 1 }, { 1, -7, 8, 70, 60, 0, -5, 1 },
{ 1, -6, 6, 68, 62, 1, -5, 1 }, { 1, -6, 5, 67, 63, 2, -5, 1 },
{ 1, -6, 4, 65, 65, 4, -6, 1 }, { 1, -5, 2, 63, 67, 5, -6, 1 },
{ 1, -5, 1, 62, 68, 6, -6, 1 }, { 1, -5, 0, 60, 70, 8, -7, 1 },
{ 1, -4, -1, 58, 71, 10, -7, 0 }, { 1, -4, -2, 56, 73, 11, -7, 0 },
{ 1, -4, -3, 54, 74, 13, -7, 0 }, { 1, -4, -3, 52, 75, 15, -8, 0 },
{ 1, -3, -4, 50, 76, 16, -8, 0 }, { 1, -3, -5, 48, 77, 18, -8, 0 },
{ 1, -3, -5, 45, 78, 20, -8, 0 }, { 1, -2, -6, 43, 78, 22, -8, 0 },
{ 1, -2, -7, 41, 79, 24, -8, 0 }, { 1, -2, -7, 39, 79, 26, -8, 0 },
{ 1, -2, -7, 37, 80, 28, -8, -1 }, { 1, -1, -8, 35, 80, 30, -8, -1 },
};
// Filters for interpolation (0.75-band) - note this also filters integer pels.
static const interp_kernel filteredinterp_filters750[(1 << SUBPEL_BITS)] = {
{2, -11, 25, 96, 25, -11, 2, 0},
{2, -11, 22, 96, 28, -11, 2, 0},
{2, -10, 19, 95, 31, -11, 2, 0},
{2, -10, 17, 95, 34, -12, 2, 0},
{2, -9, 14, 94, 37, -12, 2, 0},
{2, -8, 12, 93, 40, -12, 1, 0},
{2, -8, 9, 92, 43, -12, 1, 1},
{2, -7, 7, 91, 46, -12, 1, 0},
{2, -7, 5, 90, 49, -12, 1, 0},
{2, -6, 3, 88, 52, -12, 0, 1},
{2, -5, 1, 86, 55, -12, 0, 1},
{2, -5, -1, 84, 58, -11, 0, 1},
{2, -4, -2, 82, 61, -11, -1, 1},
{2, -4, -4, 80, 64, -10, -1, 1},
{1, -3, -5, 77, 67, -9, -1, 1},
{1, -3, -6, 75, 70, -8, -2, 1},
{1, -2, -7, 72, 72, -7, -2, 1},
{1, -2, -8, 70, 75, -6, -3, 1},
{1, -1, -9, 67, 77, -5, -3, 1},
{1, -1, -10, 64, 80, -4, -4, 2},
{1, -1, -11, 61, 82, -2, -4, 2},
{1, 0, -11, 58, 84, -1, -5, 2},
{1, 0, -12, 55, 86, 1, -5, 2},
{1, 0, -12, 52, 88, 3, -6, 2},
{0, 1, -12, 49, 90, 5, -7, 2},
{0, 1, -12, 46, 91, 7, -7, 2},
{1, 1, -12, 43, 92, 9, -8, 2},
{0, 1, -12, 40, 93, 12, -8, 2},
{0, 2, -12, 37, 94, 14, -9, 2},
{0, 2, -12, 34, 95, 17, -10, 2},
{0, 2, -11, 31, 95, 19, -10, 2},
{0, 2, -11, 28, 96, 22, -11, 2}
{ 2, -11, 25, 96, 25, -11, 2, 0 }, { 2, -11, 22, 96, 28, -11, 2, 0 },
{ 2, -10, 19, 95, 31, -11, 2, 0 }, { 2, -10, 17, 95, 34, -12, 2, 0 },
{ 2, -9, 14, 94, 37, -12, 2, 0 }, { 2, -8, 12, 93, 40, -12, 1, 0 },
{ 2, -8, 9, 92, 43, -12, 1, 1 }, { 2, -7, 7, 91, 46, -12, 1, 0 },
{ 2, -7, 5, 90, 49, -12, 1, 0 }, { 2, -6, 3, 88, 52, -12, 0, 1 },
{ 2, -5, 1, 86, 55, -12, 0, 1 }, { 2, -5, -1, 84, 58, -11, 0, 1 },
{ 2, -4, -2, 82, 61, -11, -1, 1 }, { 2, -4, -4, 80, 64, -10, -1, 1 },
{ 1, -3, -5, 77, 67, -9, -1, 1 }, { 1, -3, -6, 75, 70, -8, -2, 1 },
{ 1, -2, -7, 72, 72, -7, -2, 1 }, { 1, -2, -8, 70, 75, -6, -3, 1 },
{ 1, -1, -9, 67, 77, -5, -3, 1 }, { 1, -1, -10, 64, 80, -4, -4, 2 },
{ 1, -1, -11, 61, 82, -2, -4, 2 }, { 1, 0, -11, 58, 84, -1, -5, 2 },
{ 1, 0, -12, 55, 86, 1, -5, 2 }, { 1, 0, -12, 52, 88, 3, -6, 2 },
{ 0, 1, -12, 49, 90, 5, -7, 2 }, { 0, 1, -12, 46, 91, 7, -7, 2 },
{ 1, 1, -12, 43, 92, 9, -8, 2 }, { 0, 1, -12, 40, 93, 12, -8, 2 },
{ 0, 2, -12, 37, 94, 14, -9, 2 }, { 0, 2, -12, 34, 95, 17, -10, 2 },
{ 0, 2, -11, 31, 95, 19, -10, 2 }, { 0, 2, -11, 28, 96, 22, -11, 2 }
};
// Filters for interpolation (0.875-band) - note this also filters integer pels.
static const interp_kernel filteredinterp_filters875[(1 << SUBPEL_BITS)] = {
{3, -8, 13, 112, 13, -8, 3, 0},
{3, -7, 10, 112, 17, -9, 3, -1},
{2, -6, 7, 111, 21, -9, 3, -1},
{2, -5, 4, 111, 24, -10, 3, -1},
{2, -4, 1, 110, 28, -11, 3, -1},
{1, -3, -1, 108, 32, -12, 4, -1},
{1, -2, -3, 106, 36, -13, 4, -1},
{1, -1, -6, 105, 40, -14, 4, -1},
{1, -1, -7, 102, 44, -14, 4, -1},
{1, 0, -9, 100, 48, -15, 4, -1},
{1, 1, -11, 97, 53, -16, 4, -1},
{0, 1, -12, 95, 57, -16, 4, -1},
{0, 2, -13, 91, 61, -16, 4, -1},
{0, 2, -14, 88, 65, -16, 4, -1},
{0, 3, -15, 84, 69, -17, 4, 0},
{0, 3, -16, 81, 73, -16, 3, 0},
{0, 3, -16, 77, 77, -16, 3, 0},
{0, 3, -16, 73, 81, -16, 3, 0},
{0, 4, -17, 69, 84, -15, 3, 0},
{-1, 4, -16, 65, 88, -14, 2, 0},
{-1, 4, -16, 61, 91, -13, 2, 0},
{-1, 4, -16, 57, 95, -12, 1, 0},
{-1, 4, -16, 53, 97, -11, 1, 1},
{-1, 4, -15, 48, 100, -9, 0, 1},
{-1, 4, -14, 44, 102, -7, -1, 1},
{-1, 4, -14, 40, 105, -6, -1, 1},
{-1, 4, -13, 36, 106, -3, -2, 1},
{-1, 4, -12, 32, 108, -1, -3, 1},
{-1, 3, -11, 28, 110, 1, -4, 2},
{-1, 3, -10, 24, 111, 4, -5, 2},
{-1, 3, -9, 21, 111, 7, -6, 2},
{-1, 3, -9, 17, 112, 10, -7, 3}
{ 3, -8, 13, 112, 13, -8, 3, 0 }, { 3, -7, 10, 112, 17, -9, 3, -1 },
{ 2, -6, 7, 111, 21, -9, 3, -1 }, { 2, -5, 4, 111, 24, -10, 3, -1 },
{ 2, -4, 1, 110, 28, -11, 3, -1 }, { 1, -3, -1, 108, 32, -12, 4, -1 },
{ 1, -2, -3, 106, 36, -13, 4, -1 }, { 1, -1, -6, 105, 40, -14, 4, -1 },
{ 1, -1, -7, 102, 44, -14, 4, -1 }, { 1, 0, -9, 100, 48, -15, 4, -1 },
{ 1, 1, -11, 97, 53, -16, 4, -1 }, { 0, 1, -12, 95, 57, -16, 4, -1 },
{ 0, 2, -13, 91, 61, -16, 4, -1 }, { 0, 2, -14, 88, 65, -16, 4, -1 },
{ 0, 3, -15, 84, 69, -17, 4, 0 }, { 0, 3, -16, 81, 73, -16, 3, 0 },
{ 0, 3, -16, 77, 77, -16, 3, 0 }, { 0, 3, -16, 73, 81, -16, 3, 0 },
{ 0, 4, -17, 69, 84, -15, 3, 0 }, { -1, 4, -16, 65, 88, -14, 2, 0 },
{ -1, 4, -16, 61, 91, -13, 2, 0 }, { -1, 4, -16, 57, 95, -12, 1, 0 },
{ -1, 4, -16, 53, 97, -11, 1, 1 }, { -1, 4, -15, 48, 100, -9, 0, 1 },
{ -1, 4, -14, 44, 102, -7, -1, 1 }, { -1, 4, -14, 40, 105, -6, -1, 1 },
{ -1, 4, -13, 36, 106, -3, -2, 1 }, { -1, 4, -12, 32, 108, -1, -3, 1 },
{ -1, 3, -11, 28, 110, 1, -4, 2 }, { -1, 3, -10, 24, 111, 4, -5, 2 },
{ -1, 3, -9, 21, 111, 7, -6, 2 }, { -1, 3, -9, 17, 112, 10, -7, 3 }
};
// Filters for interpolation (full-band) - no filtering for integer pixels
static const interp_kernel filteredinterp_filters1000[(1 << SUBPEL_BITS)] = {
{0, 0, 0, 128, 0, 0, 0, 0},
{0, 1, -3, 128, 3, -1, 0, 0},
{-1, 2, -6, 127, 7, -2, 1, 0},
{-1, 3, -9, 126, 12, -4, 1, 0},
{-1, 4, -12, 125, 16, -5, 1, 0},
{-1, 4, -14, 123, 20, -6, 2, 0},
{-1, 5, -15, 120, 25, -8, 2, 0},
{-1, 5, -17, 118, 30, -9, 3, -1},
{-1, 6, -18, 114, 35, -10, 3, -1},
{-1, 6, -19, 111, 41, -12, 3, -1},
{-1, 6, -20, 107, 46, -13, 4, -1},
{-1, 6, -21, 103, 52, -14, 4, -1},
{-1, 6, -21, 99, 57, -16, 5, -1},
{-1, 6, -21, 94, 63, -17, 5, -1},
{-1, 6, -20, 89, 68, -18, 5, -1},
{-1, 6, -20, 84, 73, -19, 6, -1},
{-1, 6, -20, 79, 79, -20, 6, -1},
{-1, 6, -19, 73, 84, -20, 6, -1},
{-1, 5, -18, 68, 89, -20, 6, -1},
{-1, 5, -17, 63, 94, -21, 6, -1},
{-1, 5, -16, 57, 99, -21, 6, -1},
{-1, 4, -14, 52, 103, -21, 6, -1},
{-1, 4, -13, 46, 107, -20, 6, -1},
{-1, 3, -12, 41, 111, -19, 6, -1},
{-1, 3, -10, 35, 114, -18, 6, -1},
{-1, 3, -9, 30, 118, -17, 5, -1},
{0, 2, -8, 25, 120, -15, 5, -1},
{0, 2, -6, 20, 123, -14, 4, -1},
{0, 1, -5, 16, 125, -12, 4, -1},
{0, 1, -4, 12, 126, -9, 3, -1},
{0, 1, -2, 7, 127, -6, 2, -1},
{0, 0, -1, 3, 128, -3, 1, 0}
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 1, -3, 128, 3, -1, 0, 0 },
{ -1, 2, -6, 127, 7, -2, 1, 0 }, { -1, 3, -9, 126, 12, -4, 1, 0 },
{ -1, 4, -12, 125, 16, -5, 1, 0 }, { -1, 4, -14, 123, 20, -6, 2, 0 },
{ -1, 5, -15, 120, 25, -8, 2, 0 }, { -1, 5, -17, 118, 30, -9, 3, -1 },
{ -1, 6, -18, 114, 35, -10, 3, -1 }, { -1, 6, -19, 111, 41, -12, 3, -1 },
{ -1, 6, -20, 107, 46, -13, 4, -1 }, { -1, 6, -21, 103, 52, -14, 4, -1 },
{ -1, 6, -21, 99, 57, -16, 5, -1 }, { -1, 6, -21, 94, 63, -17, 5, -1 },
{ -1, 6, -20, 89, 68, -18, 5, -1 }, { -1, 6, -20, 84, 73, -19, 6, -1 },
{ -1, 6, -20, 79, 79, -20, 6, -1 }, { -1, 6, -19, 73, 84, -20, 6, -1 },
{ -1, 5, -18, 68, 89, -20, 6, -1 }, { -1, 5, -17, 63, 94, -21, 6, -1 },
{ -1, 5, -16, 57, 99, -21, 6, -1 }, { -1, 4, -14, 52, 103, -21, 6, -1 },
{ -1, 4, -13, 46, 107, -20, 6, -1 }, { -1, 3, -12, 41, 111, -19, 6, -1 },
{ -1, 3, -10, 35, 114, -18, 6, -1 }, { -1, 3, -9, 30, 118, -17, 5, -1 },
{ 0, 2, -8, 25, 120, -15, 5, -1 }, { 0, 2, -6, 20, 123, -14, 4, -1 },
{ 0, 1, -5, 16, 125, -12, 4, -1 }, { 0, 1, -4, 12, 126, -9, 3, -1 },
{ 0, 1, -2, 7, 127, -6, 2, -1 }, { 0, 0, -1, 3, 128, -3, 1, 0 }
};
// Filters for factor of 2 downsampling.
static const int16_t vp10_down2_symeven_half_filter[] = {56, 12, -3, -1};
static const int16_t vp10_down2_symodd_half_filter[] = {64, 35, 0, -3};
static const int16_t vp10_down2_symeven_half_filter[] = { 56, 12, -3, -1 };
static const int16_t vp10_down2_symodd_half_filter[] = { 64, 35, 0, -3 };
static const interp_kernel *choose_interp_filter(int inlength, int outlength) {
int outlength16 = outlength * 16;
@ -231,11 +151,14 @@ static const interp_kernel *choose_interp_filter(int inlength, int outlength) {
static void interpolate(const uint8_t *const input, int inlength,
uint8_t *output, int outlength) {
const int64_t delta = (((uint64_t)inlength << 32) + outlength / 2) /
outlength;
const int64_t offset = inlength > outlength ?
(((int64_t)(inlength - outlength) << 31) + outlength / 2) / outlength :
-(((int64_t)(outlength - inlength) << 31) + outlength / 2) / outlength;
const int64_t delta =
(((uint64_t)inlength << 32) + outlength / 2) / outlength;
const int64_t offset =
inlength > outlength
? (((int64_t)(inlength - outlength) << 31) + outlength / 2) /
outlength
: -(((int64_t)(outlength - inlength) << 31) + outlength / 2) /
outlength;
uint8_t *optr = output;
int x, x1, x2, sum, k, int_pel, sub_pel;
int64_t y;
@ -252,8 +175,8 @@ static void interpolate(const uint8_t *const input, int inlength,
x1 = x;
x = outlength - 1;
y = delta * x + offset;
while ((y >> INTERP_PRECISION_BITS) +
(int64_t)(INTERP_TAPS / 2) >= inlength) {
while ((y >> INTERP_PRECISION_BITS) + (int64_t)(INTERP_TAPS / 2) >=
inlength) {
x--;
y -= delta;
}
@ -267,8 +190,8 @@ static void interpolate(const uint8_t *const input, int inlength,
sum = 0;
for (k = 0; k < INTERP_TAPS; ++k) {
const int pk = int_pel - INTERP_TAPS / 2 + 1 + k;
sum += filter[k] * input[(pk < 0 ? 0 :
(pk >= inlength ? inlength - 1 : pk))];
sum += filter[k] *
input[(pk < 0 ? 0 : (pk >= inlength ? inlength - 1 : pk))];
}
*optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
}
@ -281,9 +204,9 @@ static void interpolate(const uint8_t *const input, int inlength,
filter = interp_filters[sub_pel];
sum = 0;
for (k = 0; k < INTERP_TAPS; ++k)
sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 ?
0 :
int_pel - INTERP_TAPS / 2 + 1 + k)];
sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0
? 0
: int_pel - INTERP_TAPS / 2 + 1 + k)];
*optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
}
// Middle part.
@ -305,9 +228,9 @@ static void interpolate(const uint8_t *const input, int inlength,
filter = interp_filters[sub_pel];
sum = 0;
for (k = 0; k < INTERP_TAPS; ++k)
sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >=
inlength ? inlength - 1 :
int_pel - INTERP_TAPS / 2 + 1 + k)];
sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >= inlength
? inlength - 1
: int_pel - INTERP_TAPS / 2 + 1 + k)];
*optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
}
}
@ -331,7 +254,7 @@ static void down2_symeven(const uint8_t *const input, int length,
for (j = 0; j < filter_len_half; ++j) {
sum += (input[(i - j < 0 ? 0 : i - j)] +
input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) *
filter[j];
filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel(sum);
@ -361,7 +284,7 @@ static void down2_symeven(const uint8_t *const input, int length,
for (j = 0; j < filter_len_half; ++j) {
sum += (input[i - j] +
input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) *
filter[j];
filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel(sum);
@ -387,7 +310,7 @@ static void down2_symodd(const uint8_t *const input, int length,
for (j = 1; j < filter_len_half; ++j) {
sum += (input[(i - j < 0 ? 0 : i - j)] +
input[(i + j >= length ? length - 1 : i + j)]) *
filter[j];
filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel(sum);
@ -416,7 +339,7 @@ static void down2_symodd(const uint8_t *const input, int length,
int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
for (j = 1; j < filter_len_half; ++j) {
sum += (input[i - j] + input[(i + j >= length ? length - 1 : i + j)]) *
filter[j];
filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel(sum);
@ -426,8 +349,7 @@ static void down2_symodd(const uint8_t *const input, int length,
static int get_down2_length(int length, int steps) {
int s;
for (s = 0; s < steps; ++s)
length = (length + 1) >> 1;
for (s = 0; s < steps; ++s) length = (length + 1) >> 1;
return length;
}
@ -441,11 +363,8 @@ static int get_down2_steps(int in_length, int out_length) {
return steps;
}
static void resize_multistep(const uint8_t *const input,
int length,
uint8_t *output,
int olength,
uint8_t *otmp) {
static void resize_multistep(const uint8_t *const input, int length,
uint8_t *output, int olength, uint8_t *otmp) {
int steps;
if (length == olength) {
memcpy(output, input, sizeof(output[0]) * length);
@ -500,37 +419,31 @@ static void fill_arr_to_col(uint8_t *img, int stride, int len, uint8_t *arr) {
}
}
void vp10_resize_plane(const uint8_t *const input,
int height,
int width,
int in_stride,
uint8_t *output,
int height2,
int width2,
int out_stride) {
void vp10_resize_plane(const uint8_t *const input, int height, int width,
int in_stride, uint8_t *output, int height2, int width2,
int out_stride) {
int i;
uint8_t *intbuf = (uint8_t *)malloc(sizeof(uint8_t) * width2 * height);
uint8_t *tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) *
(width < height ? height : width));
uint8_t *tmpbuf =
(uint8_t *)malloc(sizeof(uint8_t) * (width < height ? height : width));
uint8_t *arrbuf = (uint8_t *)malloc(sizeof(uint8_t) * height);
uint8_t *arrbuf2 = (uint8_t *)malloc(sizeof(uint8_t) * height2);
if (intbuf == NULL || tmpbuf == NULL ||
arrbuf == NULL || arrbuf2 == NULL)
if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL)
goto Error;
assert(width > 0);
assert(height > 0);
assert(width2 > 0);
assert(height2 > 0);
for (i = 0; i < height; ++i)
resize_multistep(input + in_stride * i, width,
intbuf + width2 * i, width2, tmpbuf);
resize_multistep(input + in_stride * i, width, intbuf + width2 * i, width2,
tmpbuf);
for (i = 0; i < width2; ++i) {
fill_col_to_arr(intbuf + i, width2, height, arrbuf);
resize_multistep(arrbuf, height, arrbuf2, height2, tmpbuf);
fill_arr_to_col(output + i, out_stride, height2, arrbuf2);
}
Error:
Error:
free(intbuf);
free(tmpbuf);
free(arrbuf);
@ -542,9 +455,12 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
uint16_t *output, int outlength, int bd) {
const int64_t delta =
(((uint64_t)inlength << 32) + outlength / 2) / outlength;
const int64_t offset = inlength > outlength ?
(((int64_t)(inlength - outlength) << 31) + outlength / 2) / outlength :
-(((int64_t)(outlength - inlength) << 31) + outlength / 2) / outlength;
const int64_t offset =
inlength > outlength
? (((int64_t)(inlength - outlength) << 31) + outlength / 2) /
outlength
: -(((int64_t)(outlength - inlength) << 31) + outlength / 2) /
outlength;
uint16_t *optr = output;
int x, x1, x2, sum, k, int_pel, sub_pel;
int64_t y;
@ -561,8 +477,8 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
x1 = x;
x = outlength - 1;
y = delta * x + offset;
while ((y >> INTERP_PRECISION_BITS) +
(int64_t)(INTERP_TAPS / 2) >= inlength) {
while ((y >> INTERP_PRECISION_BITS) + (int64_t)(INTERP_TAPS / 2) >=
inlength) {
x--;
y -= delta;
}
@ -577,7 +493,7 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
for (k = 0; k < INTERP_TAPS; ++k) {
const int pk = int_pel - INTERP_TAPS / 2 + 1 + k;
sum += filter[k] *
input[(pk < 0 ? 0 : (pk >= inlength ? inlength - 1 : pk))];
input[(pk < 0 ? 0 : (pk >= inlength ? inlength - 1 : pk))];
}
*optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
}
@ -590,9 +506,9 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
filter = interp_filters[sub_pel];
sum = 0;
for (k = 0; k < INTERP_TAPS; ++k)
sum += filter[k] *
input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 ?
0 : int_pel - INTERP_TAPS / 2 + 1 + k)];
sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0
? 0
: int_pel - INTERP_TAPS / 2 + 1 + k)];
*optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
}
// Middle part.
@ -614,9 +530,9 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
filter = interp_filters[sub_pel];
sum = 0;
for (k = 0; k < INTERP_TAPS; ++k)
sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >=
inlength ? inlength - 1 :
int_pel - INTERP_TAPS / 2 + 1 + k)];
sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >= inlength
? inlength - 1
: int_pel - INTERP_TAPS / 2 + 1 + k)];
*optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
}
}
@ -640,7 +556,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
for (j = 0; j < filter_len_half; ++j) {
sum += (input[(i - j < 0 ? 0 : i - j)] +
input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) *
filter[j];
filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel_highbd(sum, bd);
@ -670,7 +586,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
for (j = 0; j < filter_len_half; ++j) {
sum += (input[i - j] +
input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) *
filter[j];
filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel_highbd(sum, bd);
@ -679,7 +595,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
}
static void highbd_down2_symodd(const uint16_t *const input, int length,
uint16_t *output, int bd) {
uint16_t *output, int bd) {
// Actual filter len = 2 * filter_len_half - 1.
static const int16_t *filter = vp10_down2_symodd_half_filter;
const int filter_len_half = sizeof(vp10_down2_symodd_half_filter) / 2;
@ -696,7 +612,7 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
for (j = 1; j < filter_len_half; ++j) {
sum += (input[(i - j < 0 ? 0 : i - j)] +
input[(i + j >= length ? length - 1 : i + j)]) *
filter[j];
filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel_highbd(sum, bd);
@ -725,7 +641,7 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
for (j = 1; j < filter_len_half; ++j) {
sum += (input[i - j] + input[(i + j >= length ? length - 1 : i + j)]) *
filter[j];
filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel_highbd(sum, bd);
@ -733,12 +649,9 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
}
}
static void highbd_resize_multistep(const uint16_t *const input,
int length,
uint16_t *output,
int olength,
uint16_t *otmp,
int bd) {
static void highbd_resize_multistep(const uint16_t *const input, int length,
uint16_t *output, int olength,
uint16_t *otmp, int bd) {
int steps;
if (length == olength) {
memcpy(output, input, sizeof(output[0]) * length);
@ -795,36 +708,29 @@ static void highbd_fill_arr_to_col(uint16_t *img, int stride, int len,
}
}
void vp10_highbd_resize_plane(const uint8_t *const input,
int height,
int width,
int in_stride,
uint8_t *output,
int height2,
int width2,
int out_stride,
int bd) {
void vp10_highbd_resize_plane(const uint8_t *const input, int height, int width,
int in_stride, uint8_t *output, int height2,
int width2, int out_stride, int bd) {
int i;
uint16_t *intbuf = (uint16_t *)malloc(sizeof(uint16_t) * width2 * height);
uint16_t *tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) *
(width < height ? height : width));
uint16_t *tmpbuf =
(uint16_t *)malloc(sizeof(uint16_t) * (width < height ? height : width));
uint16_t *arrbuf = (uint16_t *)malloc(sizeof(uint16_t) * height);
uint16_t *arrbuf2 = (uint16_t *)malloc(sizeof(uint16_t) * height2);
if (intbuf == NULL || tmpbuf == NULL ||
arrbuf == NULL || arrbuf2 == NULL) goto Error;
if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL)
goto Error;
for (i = 0; i < height; ++i) {
highbd_resize_multistep(CONVERT_TO_SHORTPTR(input + in_stride * i), width,
intbuf + width2 * i, width2, tmpbuf, bd);
}
for (i = 0; i < width2; ++i) {
highbd_fill_col_to_arr(intbuf + i, width2, height, arrbuf);
highbd_resize_multistep(arrbuf, height, arrbuf2, height2, tmpbuf,
bd);
highbd_resize_multistep(arrbuf, height, arrbuf2, height2, tmpbuf, bd);
highbd_fill_arr_to_col(CONVERT_TO_SHORTPTR(output + i), out_stride, height2,
arrbuf2);
}
Error:
Error:
free(intbuf);
free(tmpbuf);
free(arrbuf);
@ -832,96 +738,82 @@ void vp10_highbd_resize_plane(const uint8_t *const input,
}
#endif // CONFIG_VP9_HIGHBITDEPTH
void vp10_resize_frame420(const uint8_t *const y,
int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride,
int height, int width,
uint8_t *oy, int oy_stride,
uint8_t *ou, uint8_t *ov, int ouv_stride,
int oheight, int owidth) {
vp10_resize_plane(y, height, width, y_stride,
oy, oheight, owidth, oy_stride);
vp10_resize_plane(u, height / 2, width / 2, uv_stride,
ou, oheight / 2, owidth / 2, ouv_stride);
vp10_resize_plane(v, height / 2, width / 2, uv_stride,
ov, oheight / 2, owidth / 2, ouv_stride);
void vp10_resize_frame420(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride, int height, int width, uint8_t *oy,
int oy_stride, uint8_t *ou, uint8_t *ov,
int ouv_stride, int oheight, int owidth) {
vp10_resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride);
vp10_resize_plane(u, height / 2, width / 2, uv_stride, ou, oheight / 2,
owidth / 2, ouv_stride);
vp10_resize_plane(v, height / 2, width / 2, uv_stride, ov, oheight / 2,
owidth / 2, ouv_stride);
}
void vp10_resize_frame422(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride,
int height, int width,
uint8_t *oy, int oy_stride,
uint8_t *ou, uint8_t *ov, int ouv_stride,
int oheight, int owidth) {
vp10_resize_plane(y, height, width, y_stride,
oy, oheight, owidth, oy_stride);
vp10_resize_plane(u, height, width / 2, uv_stride,
ou, oheight, owidth / 2, ouv_stride);
vp10_resize_plane(v, height, width / 2, uv_stride,
ov, oheight, owidth / 2, ouv_stride);
const uint8_t *const u, const uint8_t *const v,
int uv_stride, int height, int width, uint8_t *oy,
int oy_stride, uint8_t *ou, uint8_t *ov,
int ouv_stride, int oheight, int owidth) {
vp10_resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride);
vp10_resize_plane(u, height, width / 2, uv_stride, ou, oheight, owidth / 2,
ouv_stride);
vp10_resize_plane(v, height, width / 2, uv_stride, ov, oheight, owidth / 2,
ouv_stride);
}
void vp10_resize_frame444(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride,
int height, int width,
uint8_t *oy, int oy_stride,
uint8_t *ou, uint8_t *ov, int ouv_stride,
int oheight, int owidth) {
vp10_resize_plane(y, height, width, y_stride,
oy, oheight, owidth, oy_stride);
vp10_resize_plane(u, height, width, uv_stride,
ou, oheight, owidth, ouv_stride);
vp10_resize_plane(v, height, width, uv_stride,
ov, oheight, owidth, ouv_stride);
const uint8_t *const u, const uint8_t *const v,
int uv_stride, int height, int width, uint8_t *oy,
int oy_stride, uint8_t *ou, uint8_t *ov,
int ouv_stride, int oheight, int owidth) {
vp10_resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride);
vp10_resize_plane(u, height, width, uv_stride, ou, oheight, owidth,
ouv_stride);
vp10_resize_plane(v, height, width, uv_stride, ov, oheight, owidth,
ouv_stride);
}
#if CONFIG_VP9_HIGHBITDEPTH
void vp10_highbd_resize_frame420(const uint8_t *const y,
int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride,
int height, int width,
uint8_t *oy, int oy_stride,
uint8_t *ou, uint8_t *ov, int ouv_stride,
int oheight, int owidth, int bd) {
vp10_highbd_resize_plane(y, height, width, y_stride,
oy, oheight, owidth, oy_stride, bd);
vp10_highbd_resize_plane(u, height / 2, width / 2, uv_stride,
ou, oheight / 2, owidth / 2, ouv_stride, bd);
vp10_highbd_resize_plane(v, height / 2, width / 2, uv_stride,
ov, oheight / 2, owidth / 2, ouv_stride, bd);
void vp10_highbd_resize_frame420(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride, int height, int width,
uint8_t *oy, int oy_stride, uint8_t *ou,
uint8_t *ov, int ouv_stride, int oheight,
int owidth, int bd) {
vp10_highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth,
oy_stride, bd);
vp10_highbd_resize_plane(u, height / 2, width / 2, uv_stride, ou, oheight / 2,
owidth / 2, ouv_stride, bd);
vp10_highbd_resize_plane(v, height / 2, width / 2, uv_stride, ov, oheight / 2,
owidth / 2, ouv_stride, bd);
}
void vp10_highbd_resize_frame422(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride,
int height, int width,
uint8_t *oy, int oy_stride,
uint8_t *ou, uint8_t *ov, int ouv_stride,
int oheight, int owidth, int bd) {
vp10_highbd_resize_plane(y, height, width, y_stride,
oy, oheight, owidth, oy_stride, bd);
vp10_highbd_resize_plane(u, height, width / 2, uv_stride,
ou, oheight, owidth / 2, ouv_stride, bd);
vp10_highbd_resize_plane(v, height, width / 2, uv_stride,
ov, oheight, owidth / 2, ouv_stride, bd);
const uint8_t *const u, const uint8_t *const v,
int uv_stride, int height, int width,
uint8_t *oy, int oy_stride, uint8_t *ou,
uint8_t *ov, int ouv_stride, int oheight,
int owidth, int bd) {
vp10_highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth,
oy_stride, bd);
vp10_highbd_resize_plane(u, height, width / 2, uv_stride, ou, oheight,
owidth / 2, ouv_stride, bd);
vp10_highbd_resize_plane(v, height, width / 2, uv_stride, ov, oheight,
owidth / 2, ouv_stride, bd);
}
void vp10_highbd_resize_frame444(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride,
int height, int width,
uint8_t *oy, int oy_stride,
uint8_t *ou, uint8_t *ov, int ouv_stride,
int oheight, int owidth, int bd) {
vp10_highbd_resize_plane(y, height, width, y_stride,
oy, oheight, owidth, oy_stride, bd);
vp10_highbd_resize_plane(u, height, width, uv_stride,
ou, oheight, owidth, ouv_stride, bd);
vp10_highbd_resize_plane(v, height, width, uv_stride,
ov, oheight, owidth, ouv_stride, bd);
const uint8_t *const u, const uint8_t *const v,
int uv_stride, int height, int width,
uint8_t *oy, int oy_stride, uint8_t *ou,
uint8_t *ov, int ouv_stride, int oheight,
int owidth, int bd) {
vp10_highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth,
oy_stride, bd);
vp10_highbd_resize_plane(u, height, width, uv_stride, ou, oheight, owidth,
ouv_stride, bd);
vp10_highbd_resize_plane(v, height, width, uv_stride, ov, oheight, owidth,
ouv_stride, bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH

Просмотреть файл

@ -18,116 +18,51 @@
extern "C" {
#endif
void vp10_resize_plane(const uint8_t *const input,
int height,
int width,
int in_stride,
uint8_t *output,
int height2,
int width2,
int out_stride);
void vp10_resize_frame420(const uint8_t *const y,
int y_stride,
const uint8_t *const u,
const uint8_t *const v,
int uv_stride,
int height,
int width,
uint8_t *oy,
int oy_stride,
uint8_t *ou,
uint8_t *ov,
int ouv_stride,
int oheight,
int owidth);
void vp10_resize_frame422(const uint8_t *const y,
int y_stride,
const uint8_t *const u,
const uint8_t *const v,
int uv_stride,
int height,
int width,
uint8_t *oy,
int oy_stride,
uint8_t *ou,
uint8_t *ov,
int ouv_stride,
int oheight,
int owidth);
void vp10_resize_frame444(const uint8_t *const y,
int y_stride,
const uint8_t *const u,
const uint8_t *const v,
int uv_stride,
int height,
int width,
uint8_t *oy,
int oy_stride,
uint8_t *ou,
uint8_t *ov,
int ouv_stride,
int oheight,
int owidth);
void vp10_resize_plane(const uint8_t *const input, int height, int width,
int in_stride, uint8_t *output, int height2, int width2,
int out_stride);
void vp10_resize_frame420(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride, int height, int width, uint8_t *oy,
int oy_stride, uint8_t *ou, uint8_t *ov,
int ouv_stride, int oheight, int owidth);
void vp10_resize_frame422(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride, int height, int width, uint8_t *oy,
int oy_stride, uint8_t *ou, uint8_t *ov,
int ouv_stride, int oheight, int owidth);
void vp10_resize_frame444(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride, int height, int width, uint8_t *oy,
int oy_stride, uint8_t *ou, uint8_t *ov,
int ouv_stride, int oheight, int owidth);
#if CONFIG_VP9_HIGHBITDEPTH
void vp10_highbd_resize_plane(const uint8_t *const input,
int height,
int width,
int in_stride,
uint8_t *output,
int height2,
int width2,
int out_stride,
int bd);
void vp10_highbd_resize_frame420(const uint8_t *const y,
int y_stride,
const uint8_t *const u,
const uint8_t *const v,
int uv_stride,
int height,
int width,
uint8_t *oy,
int oy_stride,
uint8_t *ou,
uint8_t *ov,
int ouv_stride,
int oheight,
int owidth,
int bd);
void vp10_highbd_resize_frame422(const uint8_t *const y,
int y_stride,
const uint8_t *const u,
const uint8_t *const v,
int uv_stride,
int height,
int width,
uint8_t *oy,
int oy_stride,
uint8_t *ou,
uint8_t *ov,
int ouv_stride,
int oheight,
int owidth,
int bd);
void vp10_highbd_resize_frame444(const uint8_t *const y,
int y_stride,
const uint8_t *const u,
const uint8_t *const v,
int uv_stride,
int height,
int width,
uint8_t *oy,
int oy_stride,
uint8_t *ou,
uint8_t *ov,
int ouv_stride,
int oheight,
int owidth,
int bd);
#endif // CONFIG_VP9_HIGHBITDEPTH
void vp10_highbd_resize_plane(const uint8_t *const input, int height, int width,
int in_stride, uint8_t *output, int height2,
int width2, int out_stride, int bd);
void vp10_highbd_resize_frame420(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride, int height, int width,
uint8_t *oy, int oy_stride, uint8_t *ou,
uint8_t *ov, int ouv_stride, int oheight,
int owidth, int bd);
void vp10_highbd_resize_frame422(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride, int height, int width,
uint8_t *oy, int oy_stride, uint8_t *ou,
uint8_t *ov, int ouv_stride, int oheight,
int owidth, int bd);
void vp10_highbd_resize_frame444(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride, int height, int width,
uint8_t *oy, int oy_stride, uint8_t *ou,
uint8_t *ov, int ouv_stride, int oheight,
int owidth, int bd);
#endif // CONFIG_VP9_HIGHBITDEPTH
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP10_ENCODER_RESIZE_H_
#endif // VP10_ENCODER_RESIZE_H_

Просмотреть файл

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <limits.h>
#include "vpx_mem/vpx_mem.h"
@ -32,31 +31,31 @@ void vp10_disable_segmentation(struct segmentation *seg) {
seg->update_data = 0;
}
void vp10_set_segment_data(struct segmentation *seg,
signed char *feature_data,
unsigned char abs_delta) {
void vp10_set_segment_data(struct segmentation *seg, signed char *feature_data,
unsigned char abs_delta) {
seg->abs_delta = abs_delta;
memcpy(seg->feature_data, feature_data, sizeof(seg->feature_data));
}
void vp10_disable_segfeature(struct segmentation *seg, int segment_id,
SEG_LVL_FEATURES feature_id) {
SEG_LVL_FEATURES feature_id) {
seg->feature_mask[segment_id] &= ~(1 << feature_id);
}
void vp10_clear_segdata(struct segmentation *seg, int segment_id,
SEG_LVL_FEATURES feature_id) {
SEG_LVL_FEATURES feature_id) {
seg->feature_data[segment_id][feature_id] = 0;
}
// Based on set of segment counts calculate a probability tree
static void calc_segtree_probs(unsigned *segcounts,
vpx_prob *segment_tree_probs, const vpx_prob *cur_tree_probs) {
vpx_prob *segment_tree_probs,
const vpx_prob *cur_tree_probs) {
// Work out probabilities of each segment
const unsigned cc[4] = {
segcounts[0] + segcounts[1], segcounts[2] + segcounts[3],
segcounts[4] + segcounts[5], segcounts[6] + segcounts[7]
};
const unsigned cc[4] = { segcounts[0] + segcounts[1],
segcounts[2] + segcounts[3],
segcounts[4] + segcounts[5],
segcounts[6] + segcounts[7] };
const unsigned ccc[2] = { cc[0] + cc[1], cc[2] + cc[3] };
int i;
@ -69,10 +68,10 @@ static void calc_segtree_probs(unsigned *segcounts,
segment_tree_probs[6] = get_binary_prob(segcounts[6], segcounts[7]);
for (i = 0; i < 7; i++) {
const unsigned *ct = i == 0 ? ccc : i < 3 ? cc + (i & 2)
: segcounts + (i - 3) * 2;
vp10_prob_diff_update_savings_search(ct,
cur_tree_probs[i], &segment_tree_probs[i], DIFF_UPDATE_PROB);
const unsigned *ct =
i == 0 ? ccc : i < 3 ? cc + (i & 2) : segcounts + (i - 3) * 2;
vp10_prob_diff_update_savings_search(
ct, cur_tree_probs[i], &segment_tree_probs[i], DIFF_UPDATE_PROB);
}
}
@ -86,13 +85,11 @@ static int cost_segmap(unsigned *segcounts, vpx_prob *probs) {
const int c4567 = c45 + c67;
// Cost the top node of the tree
int cost = c0123 * vp10_cost_zero(probs[0]) +
c4567 * vp10_cost_one(probs[0]);
int cost = c0123 * vp10_cost_zero(probs[0]) + c4567 * vp10_cost_one(probs[0]);
// Cost subsequent levels
if (c0123 > 0) {
cost += c01 * vp10_cost_zero(probs[1]) +
c23 * vp10_cost_one(probs[1]);
cost += c01 * vp10_cost_zero(probs[1]) + c23 * vp10_cost_one(probs[1]);
if (c01 > 0)
cost += segcounts[0] * vp10_cost_zero(probs[3]) +
@ -103,8 +100,7 @@ static int cost_segmap(unsigned *segcounts, vpx_prob *probs) {
}
if (c4567 > 0) {
cost += c45 * vp10_cost_zero(probs[2]) +
c67 * vp10_cost_one(probs[2]);
cost += c45 * vp10_cost_zero(probs[2]) + c67 * vp10_cost_one(probs[2]);
if (c45 > 0)
cost += segcounts[4] * vp10_cost_zero(probs[5]) +
@ -121,12 +117,11 @@ static void count_segs(const VP10_COMMON *cm, MACROBLOCKD *xd,
const TileInfo *tile, MODE_INFO **mi,
unsigned *no_pred_segcounts,
unsigned (*temporal_predictor_count)[2],
unsigned *t_unpred_seg_counts,
int bw, int bh, int mi_row, int mi_col) {
unsigned *t_unpred_seg_counts, int bw, int bh,
int mi_row, int mi_col) {
int segment_id;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
xd->mi = mi;
segment_id = xd->mi[0]->mbmi.segment_id;
@ -140,8 +135,8 @@ static void count_segs(const VP10_COMMON *cm, MACROBLOCKD *xd,
if (cm->frame_type != KEY_FRAME) {
const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
// Test to see if the segment id matches the predicted value.
const int pred_segment_id = get_segment_id(cm, cm->last_frame_seg_map,
bsize, mi_row, mi_col);
const int pred_segment_id =
get_segment_id(cm, cm->last_frame_seg_map, bsize, mi_row, mi_col);
const int pred_flag = pred_segment_id == segment_id;
const int pred_context = vp10_get_pred_context_seg_id(xd);
@ -151,8 +146,7 @@ static void count_segs(const VP10_COMMON *cm, MACROBLOCKD *xd,
temporal_predictor_count[pred_context][pred_flag]++;
// Update the "unpredicted" segment count
if (!pred_flag)
t_unpred_seg_counts[segment_id]++;
if (!pred_flag) t_unpred_seg_counts[segment_id]++;
}
}
@ -160,8 +154,7 @@ static void count_segs_sb(const VP10_COMMON *cm, MACROBLOCKD *xd,
const TileInfo *tile, MODE_INFO **mi,
unsigned *no_pred_segcounts,
unsigned (*temporal_predictor_count)[2],
unsigned *t_unpred_seg_counts,
int mi_row, int mi_col,
unsigned *t_unpred_seg_counts, int mi_row, int mi_col,
BLOCK_SIZE bsize) {
const int mis = cm->mi_stride;
const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2;
@ -171,8 +164,7 @@ static void count_segs_sb(const VP10_COMMON *cm, MACROBLOCKD *xd,
int bw, bh;
#endif // CONFIG_EXT_PARTITION_TYPES
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
#if CONFIG_EXT_PARTITION_TYPES
if (bsize == BLOCK_8X8)
@ -194,9 +186,9 @@ static void count_segs_sb(const VP10_COMMON *cm, MACROBLOCKD *xd,
case PARTITION_VERT:
count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
t_unpred_seg_counts, hbs, bs, mi_row, mi_col);
count_segs(cm, xd, tile, mi + hbs,
no_pred_segcounts, temporal_predictor_count,
t_unpred_seg_counts, hbs, bs, mi_row, mi_col + hbs);
count_segs(cm, xd, tile, mi + hbs, no_pred_segcounts,
temporal_predictor_count, t_unpred_seg_counts, hbs, bs, mi_row,
mi_col + hbs);
break;
case PARTITION_HORZ_A:
count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
@ -224,41 +216,37 @@ static void count_segs_sb(const VP10_COMMON *cm, MACROBLOCKD *xd,
count_segs(cm, xd, tile, mi + hbs * mis, no_pred_segcounts,
temporal_predictor_count, t_unpred_seg_counts, hbs, hbs,
mi_row + hbs, mi_col);
count_segs(cm, xd, tile, mi + hbs,
no_pred_segcounts, temporal_predictor_count,
t_unpred_seg_counts, hbs, bs, mi_row, mi_col + hbs);
count_segs(cm, xd, tile, mi + hbs, no_pred_segcounts,
temporal_predictor_count, t_unpred_seg_counts, hbs, bs, mi_row,
mi_col + hbs);
break;
case PARTITION_VERT_B:
count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
t_unpred_seg_counts, hbs, bs, mi_row, mi_col);
count_segs(cm, xd, tile, mi + hbs,
no_pred_segcounts, temporal_predictor_count,
t_unpred_seg_counts, hbs, hbs, mi_row, mi_col + hbs);
count_segs(cm, xd, tile, mi + hbs + hbs * mis,
no_pred_segcounts, temporal_predictor_count,
t_unpred_seg_counts, hbs, hbs, mi_row + hbs, mi_col + hbs);
count_segs(cm, xd, tile, mi + hbs, no_pred_segcounts,
temporal_predictor_count, t_unpred_seg_counts, hbs, hbs,
mi_row, mi_col + hbs);
count_segs(cm, xd, tile, mi + hbs + hbs * mis, no_pred_segcounts,
temporal_predictor_count, t_unpred_seg_counts, hbs, hbs,
mi_row + hbs, mi_col + hbs);
break;
case PARTITION_SPLIT:
{
const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize];
int n;
case PARTITION_SPLIT: {
const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize];
int n;
assert(num_8x8_blocks_wide_lookup[mi[0]->mbmi.sb_type] < bs &&
num_8x8_blocks_high_lookup[mi[0]->mbmi.sb_type] < bs);
assert(num_8x8_blocks_wide_lookup[mi[0]->mbmi.sb_type] < bs &&
num_8x8_blocks_high_lookup[mi[0]->mbmi.sb_type] < bs);
for (n = 0; n < 4; n++) {
const int mi_dc = hbs * (n & 1);
const int mi_dr = hbs * (n >> 1);
for (n = 0; n < 4; n++) {
const int mi_dc = hbs * (n & 1);
const int mi_dr = hbs * (n >> 1);
count_segs_sb(cm, xd, tile, &mi[mi_dr * mis + mi_dc],
no_pred_segcounts, temporal_predictor_count,
t_unpred_seg_counts,
mi_row + mi_dr, mi_col + mi_dc, subsize);
}
count_segs_sb(cm, xd, tile, &mi[mi_dr * mis + mi_dc], no_pred_segcounts,
temporal_predictor_count, t_unpred_seg_counts,
mi_row + mi_dr, mi_col + mi_dc, subsize);
}
break;
default:
assert(0);
} break;
default: assert(0);
}
#else
bw = num_8x8_blocks_wide_lookup[mi[0]->mbmi.sb_type];
@ -276,9 +264,9 @@ static void count_segs_sb(const VP10_COMMON *cm, MACROBLOCKD *xd,
} else if (bw < bs && bh == bs) {
count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
t_unpred_seg_counts, hbs, bs, mi_row, mi_col);
count_segs(cm, xd, tile, mi + hbs,
no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts,
hbs, bs, mi_row, mi_col + hbs);
count_segs(cm, xd, tile, mi + hbs, no_pred_segcounts,
temporal_predictor_count, t_unpred_seg_counts, hbs, bs, mi_row,
mi_col + hbs);
} else {
const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize];
int n;
@ -289,9 +277,8 @@ static void count_segs_sb(const VP10_COMMON *cm, MACROBLOCKD *xd,
const int mi_dc = hbs * (n & 1);
const int mi_dr = hbs * (n >> 1);
count_segs_sb(cm, xd, tile, &mi[mi_dr * mis + mi_dc],
no_pred_segcounts, temporal_predictor_count,
t_unpred_seg_counts,
count_segs_sb(cm, xd, tile, &mi[mi_dr * mis + mi_dc], no_pred_segcounts,
temporal_predictor_count, t_unpred_seg_counts,
mi_row + mi_dr, mi_col + mi_dc, subsize);
}
}
@ -307,7 +294,7 @@ void vp10_choose_segmap_coding_method(VP10_COMMON *cm, MACROBLOCKD *xd) {
int i, tile_col, tile_row, mi_row, mi_col;
unsigned (*temporal_predictor_count)[2] = cm->counts.seg.pred;
unsigned(*temporal_predictor_count)[2] = cm->counts.seg.pred;
unsigned *no_pred_segcounts = cm->counts.seg.tree_total;
unsigned *t_unpred_seg_counts = cm->counts.seg.tree_mispred;
@ -315,7 +302,7 @@ void vp10_choose_segmap_coding_method(VP10_COMMON *cm, MACROBLOCKD *xd) {
vpx_prob t_pred_tree[SEG_TREE_PROBS];
vpx_prob t_nopred_prob[PREDICTION_PROBS];
(void) xd;
(void)xd;
// We are about to recompute all the segment counts, so zero the accumulators.
vp10_zero(cm->counts.seg);
@ -329,21 +316,20 @@ void vp10_choose_segmap_coding_method(VP10_COMMON *cm, MACROBLOCKD *xd) {
MODE_INFO **mi_ptr;
vp10_tile_set_col(&tile_info, cm, tile_col);
mi_ptr = cm->mi_grid_visible + tile_info.mi_row_start * cm->mi_stride +
tile_info.mi_col_start;
tile_info.mi_col_start;
for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
mi_row += cm->mib_size, mi_ptr += cm->mib_size * cm->mi_stride) {
MODE_INFO **mi = mi_ptr;
for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
mi_col += cm->mib_size, mi += cm->mib_size) {
count_segs_sb(cm, xd, &tile_info, mi, no_pred_segcounts,
temporal_predictor_count, t_unpred_seg_counts,
mi_row, mi_col, cm->sb_size);
temporal_predictor_count, t_unpred_seg_counts, mi_row,
mi_col, cm->sb_size);
}
}
}
}
// Work out probability tree for coding segments without prediction
// and the cost.
calc_segtree_probs(no_pred_segcounts, no_pred_tree, segp->tree_probs);

Просмотреть файл

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_ENCODER_SEGMENTATION_H_
#define VP10_ENCODER_SEGMENTATION_H_
@ -22,12 +21,10 @@ extern "C" {
void vp10_enable_segmentation(struct segmentation *seg);
void vp10_disable_segmentation(struct segmentation *seg);
void vp10_disable_segfeature(struct segmentation *seg,
int segment_id,
SEG_LVL_FEATURES feature_id);
void vp10_clear_segdata(struct segmentation *seg,
int segment_id,
SEG_LVL_FEATURES feature_id);
void vp10_disable_segfeature(struct segmentation *seg, int segment_id,
SEG_LVL_FEATURES feature_id);
void vp10_clear_segdata(struct segmentation *seg, int segment_id,
SEG_LVL_FEATURES feature_id);
// The values given for each segment can be either deltas (from the default
// value chosen for the frame) or absolute values.
@ -40,7 +37,7 @@ void vp10_clear_segdata(struct segmentation *seg,
// abs_delta = SEGMENT_DELTADATA (deltas) abs_delta = SEGMENT_ABSDATA (use
// the absolute values given).
void vp10_set_segment_data(struct segmentation *seg, signed char *feature_data,
unsigned char abs_delta);
unsigned char abs_delta);
void vp10_choose_segmap_coding_method(VP10_COMMON *cm, MACROBLOCKD *xd);

Просмотреть файл

@ -17,21 +17,23 @@
#include "vpx_dsp/vpx_dsp_common.h"
// Mesh search patters for various speed settings
static MESH_PATTERN best_quality_mesh_pattern[MAX_MESH_STEP] =
{{64, 4}, {28, 2}, {15, 1}, {7, 1}};
static MESH_PATTERN best_quality_mesh_pattern[MAX_MESH_STEP] = {
{ 64, 4 }, { 28, 2 }, { 15, 1 }, { 7, 1 }
};
#define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method
static MESH_PATTERN good_quality_mesh_patterns[MAX_MESH_SPEED + 1]
[MAX_MESH_STEP] =
{{{64, 8}, {28, 4}, {15, 1}, {7, 1}},
{{64, 8}, {28, 4}, {15, 1}, {7, 1}},
{{64, 8}, {14, 2}, {7, 1}, {7, 1}},
{{64, 16}, {24, 8}, {12, 4}, {7, 1}},
{{64, 16}, {24, 8}, {12, 4}, {7, 1}},
{{64, 16}, {24, 8}, {12, 4}, {7, 1}},
static MESH_PATTERN
good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
{ { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
{ { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
{ { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } },
{ { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
{ { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
{ { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
};
static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] =
{50, 25, 15, 5, 1, 1};
static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] = {
50, 25, 15, 5, 1, 1
};
// Intra only frames, golden frames (except alt ref overlays) and
// alt ref frames tend to be coded at a higher than ambient quality
@ -68,8 +70,8 @@ static void set_good_speed_feature_framesize_dependent(VP10_COMP *cpi,
if (speed >= 1) {
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
sf->disable_split_mask =
cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
sf->partition_search_breakout_dist_thr = (1 << 23);
} else {
sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
@ -79,8 +81,8 @@ static void set_good_speed_feature_framesize_dependent(VP10_COMP *cpi,
if (speed >= 2) {
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
sf->disable_split_mask =
cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
sf->adaptive_pred_interp_filter = 0;
sf->partition_search_breakout_dist_thr = (1 << 24);
sf->partition_search_breakout_rate_thr = 120;
@ -138,7 +140,7 @@ static void set_good_speed_feature(VP10_COMP *cpi, VP10_COMMON *cm,
sf->use_square_partition_only = !frame_is_intra_only(cm);
}
sf->less_rectangular_check = 1;
sf->less_rectangular_check = 1;
sf->use_rd_breakout = 1;
sf->adaptive_motion_search = 1;
@ -169,13 +171,13 @@ static void set_good_speed_feature(VP10_COMP *cpi, VP10_COMMON *cm,
}
if (speed >= 2) {
sf->tx_size_search_method = frame_is_boosted(cpi) ? USE_FULL_RD
: USE_LARGESTALL;
sf->mode_search_skip_flags = (cm->frame_type == KEY_FRAME) ? 0 :
FLAG_SKIP_INTRA_DIRMISMATCH |
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_INTRA_LOWVAR;
sf->tx_size_search_method =
frame_is_boosted(cpi) ? USE_FULL_RD : USE_LARGESTALL;
sf->mode_search_skip_flags =
(cm->frame_type == KEY_FRAME) ? 0 : FLAG_SKIP_INTRA_DIRMISMATCH |
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_INTRA_LOWVAR;
sf->disable_filter_search_var_thresh = 100;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
@ -189,8 +191,8 @@ static void set_good_speed_feature(VP10_COMP *cpi, VP10_COMMON *cm,
if (speed >= 3) {
sf->use_square_partition_only = !frame_is_intra_only(cm);
sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD
: USE_LARGESTALL;
sf->tx_size_search_method =
frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED;
sf->adaptive_pred_interp_filter = 0;
sf->adaptive_mode_search = 1;
@ -236,12 +238,13 @@ static void set_good_speed_feature(VP10_COMP *cpi, VP10_COMMON *cm,
}
static void set_rt_speed_feature_framesize_dependent(VP10_COMP *cpi,
SPEED_FEATURES *sf, int speed) {
SPEED_FEATURES *sf,
int speed) {
VP10_COMMON *const cm = &cpi->common;
if (speed >= 1) {
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
sf->disable_split_mask =
cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
} else {
sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
}
@ -249,8 +252,8 @@ static void set_rt_speed_feature_framesize_dependent(VP10_COMP *cpi,
if (speed >= 2) {
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
sf->disable_split_mask =
cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
} else {
sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
}
@ -265,13 +268,13 @@ static void set_rt_speed_feature_framesize_dependent(VP10_COMP *cpi,
}
if (speed >= 7) {
sf->encode_breakout_thresh = (VPXMIN(cm->width, cm->height) >= 720) ?
800 : 300;
sf->encode_breakout_thresh =
(VPXMIN(cm->width, cm->height) >= 720) ? 800 : 300;
}
}
static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf,
int speed, vpx_tune_content content) {
static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf, int speed,
vpx_tune_content content) {
VP10_COMMON *const cm = &cpi->common;
const int is_keyframe = cm->frame_type == KEY_FRAME;
const int frames_since_key = is_keyframe ? 0 : cpi->rc.frames_since_key;
@ -293,8 +296,8 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf,
if (speed >= 1) {
sf->use_square_partition_only = !frame_is_intra_only(cm);
sf->less_rectangular_check = 1;
sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD
: USE_LARGESTALL;
sf->tx_size_search_method =
frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
sf->use_rd_breakout = 1;
@ -307,13 +310,12 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf,
sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
}
if (speed >= 2) {
sf->mode_search_skip_flags = (cm->frame_type == KEY_FRAME) ? 0 :
FLAG_SKIP_INTRA_DIRMISMATCH |
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_INTRA_LOWVAR;
sf->mode_search_skip_flags =
(cm->frame_type == KEY_FRAME) ? 0 : FLAG_SKIP_INTRA_DIRMISMATCH |
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_INTRA_LOWVAR;
sf->adaptive_pred_interp_filter = 2;
sf->disable_filter_search_var_thresh = 50;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
@ -345,8 +347,8 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf,
sf->use_fast_coef_costing = 0;
sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX;
sf->adjust_partitioning_from_last_frame =
cm->last_frame_type != cm->frame_type || (0 ==
(frames_since_key + 1) % sf->last_partitioning_redo_frequency);
cm->last_frame_type != cm->frame_type ||
(0 == (frames_since_key + 1) % sf->last_partitioning_redo_frequency);
sf->mv.subpel_force_stop = 1;
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
@ -369,11 +371,12 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf,
}
if (speed >= 5) {
sf->auto_min_max_partition_size = is_keyframe ? RELAXED_NEIGHBORING_MIN_MAX
: STRICT_NEIGHBORING_MIN_MAX;
sf->auto_min_max_partition_size =
is_keyframe ? RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX;
sf->default_max_partition_size = BLOCK_32X32;
sf->default_min_partition_size = BLOCK_8X8;
sf->force_frame_boost = is_keyframe ||
sf->force_frame_boost =
is_keyframe ||
(frames_since_key % (sf->last_partitioning_redo_frequency << 1) == 1);
sf->max_delta_qindex = is_keyframe ? 20 : 15;
sf->partition_search_type = REFERENCE_PARTITION;
@ -530,8 +533,7 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
sf->use_fast_coef_costing = 0;
sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set
sf->schedule_mode_search = 0;
for (i = 0; i < BLOCK_SIZES; ++i)
sf->inter_mode_mask[i] = INTER_ALL;
for (i = 0; i < BLOCK_SIZES; ++i) sf->inter_mode_mask[i] = INTER_ALL;
sf->max_intra_bsize = BLOCK_LARGEST;
sf->reuse_inter_pred_sby = 0;
// This setting only takes effect when partition_search_type is set
@ -547,7 +549,7 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
sf->partition_search_breakout_rate_thr = 0;
sf->simple_model_rd_from_var = 0;
// Set this at the appropriate speed levels
// Set this at the appropriate speed levels
#if CONFIG_EXT_TILE
sf->use_transform_domain_distortion = 1;
#else
@ -590,8 +592,7 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1;
for (i = 0; i < MAX_MESH_STEP; ++i) {
sf->mesh_patterns[i].range =
good_quality_mesh_patterns[speed][i].range;
sf->mesh_patterns[i].range = good_quality_mesh_patterns[speed][i].range;
sf->mesh_patterns[i].interval =
good_quality_mesh_patterns[speed][i].interval;
}
@ -599,8 +600,7 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
// Slow quant, dct and trellis not worthwhile for first pass
// so make sure they are always turned off.
if (oxcf->pass == 1)
sf->optimize_coefficients = 0;
if (oxcf->pass == 1) sf->optimize_coefficients = 0;
// No recode for 1 pass.
if (oxcf->pass == 0) {
@ -615,7 +615,8 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
} else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_MORE) {
cpi->find_fractional_mv_step = vp10_find_best_sub_pixel_tree_pruned_more;
} else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_EVENMORE) {
cpi->find_fractional_mv_step = vp10_find_best_sub_pixel_tree_pruned_evenmore;
cpi->find_fractional_mv_step =
vp10_find_best_sub_pixel_tree_pruned_evenmore;
}
x->optimize = sf->optimize_coefficients == 1 && oxcf->pass != 1;

Просмотреть файл

@ -18,28 +18,24 @@ extern "C" {
#endif
enum {
INTRA_ALL = (1 << DC_PRED) |
(1 << V_PRED) | (1 << H_PRED) |
(1 << D45_PRED) | (1 << D135_PRED) |
(1 << D117_PRED) | (1 << D153_PRED) |
(1 << D207_PRED) | (1 << D63_PRED) |
(1 << TM_PRED),
INTRA_DC = (1 << DC_PRED),
INTRA_DC_TM = (1 << DC_PRED) | (1 << TM_PRED),
INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED),
INTRA_DC_TM_H_V = (1 << DC_PRED) | (1 << TM_PRED) | (1 << V_PRED) |
(1 << H_PRED)
INTRA_ALL = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED) | (1 << D45_PRED) |
(1 << D135_PRED) | (1 << D117_PRED) | (1 << D153_PRED) |
(1 << D207_PRED) | (1 << D63_PRED) | (1 << TM_PRED),
INTRA_DC = (1 << DC_PRED),
INTRA_DC_TM = (1 << DC_PRED) | (1 << TM_PRED),
INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED),
INTRA_DC_TM_H_V =
(1 << DC_PRED) | (1 << TM_PRED) | (1 << V_PRED) | (1 << H_PRED)
};
#if CONFIG_EXT_INTER
enum {
INTER_ALL =
(1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) |
(1 << NEWMV) | (1 << NEWFROMNEARMV) |
(1 << NEAREST_NEARESTMV) | (1 << NEAR_NEARMV) | (1 << NEAREST_NEARMV) |
(1 << NEAR_NEARESTMV) | (1 << NEW_NEWMV) | (1 << NEAREST_NEWMV) |
(1 << NEAR_NEWMV) | (1 << NEW_NEARMV) | (1 << NEW_NEARESTMV) |
(1 << ZERO_ZEROMV),
INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV) |
(1 << NEWFROMNEARMV) | (1 << NEAREST_NEARESTMV) |
(1 << NEAR_NEARMV) | (1 << NEAREST_NEARMV) |
(1 << NEAR_NEARESTMV) | (1 << NEW_NEWMV) | (1 << NEAREST_NEWMV) |
(1 << NEAR_NEWMV) | (1 << NEW_NEARMV) | (1 << NEW_NEARESTMV) |
(1 << ZERO_ZEROMV),
INTER_NEAREST = (1 << NEARESTMV) | (1 << NEAREST_NEARESTMV) |
(1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) |
(1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV),
@ -55,20 +51,17 @@ enum {
INTER_NEAREST_NEW_ZERO =
(1 << NEARESTMV) | (1 << ZEROMV) | (1 << NEWMV) | (1 << NEWFROMNEARMV) |
(1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) | (1 << NEW_NEWMV) |
(1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) |
(1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV) |
(1 << NEW_NEARMV) | (1 << NEAR_NEWMV),
(1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) | (1 << NEW_NEARESTMV) |
(1 << NEAREST_NEWMV) | (1 << NEW_NEARMV) | (1 << NEAR_NEWMV),
INTER_NEAREST_NEAR_NEW =
(1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV) | (1 << NEWFROMNEARMV) |
(1 << NEAREST_NEARESTMV) | (1 << NEW_NEWMV) |
(1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) |
(1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV) |
(1 << NEAREST_NEARESTMV) | (1 << NEW_NEWMV) | (1 << NEAREST_NEARMV) |
(1 << NEAR_NEARESTMV) | (1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV) |
(1 << NEW_NEARMV) | (1 << NEAR_NEWMV) | (1 << NEAR_NEARMV),
INTER_NEAREST_NEAR_ZERO =
(1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) |
(1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) |
(1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) |
(1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV) |
(1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) | (1 << NEAREST_NEARMV) |
(1 << NEAR_NEARESTMV) | (1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV) |
(1 << NEW_NEARMV) | (1 << NEAR_NEWMV) | (1 << NEAR_NEARMV),
};
#else
@ -84,20 +77,15 @@ enum {
#endif // CONFIG_EXT_INTER
enum {
DISABLE_ALL_INTER_SPLIT = (1 << THR_COMP_GA) |
(1 << THR_COMP_LA) |
(1 << THR_ALTR) |
(1 << THR_GOLD) |
(1 << THR_LAST),
DISABLE_ALL_INTER_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA) |
(1 << THR_ALTR) | (1 << THR_GOLD) | (1 << THR_LAST),
DISABLE_ALL_SPLIT = (1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT,
DISABLE_ALL_SPLIT = (1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT,
DISABLE_COMPOUND_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA),
DISABLE_COMPOUND_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA),
LAST_AND_INTRA_SPLIT_ONLY = (1 << THR_COMP_GA) |
(1 << THR_COMP_LA) |
(1 << THR_ALTR) |
(1 << THR_GOLD)
LAST_AND_INTRA_SPLIT_ONLY = (1 << THR_COMP_GA) | (1 << THR_COMP_LA) |
(1 << THR_ALTR) | (1 << THR_GOLD)
};
typedef enum {

Просмотреть файл

@ -14,25 +14,23 @@
#include "vp10/encoder/cost.h"
#include "vp10/encoder/subexp.h"
#define vp10_cost_upd256 ((int)(vp10_cost_one(upd) - vp10_cost_zero(upd)))
#define vp10_cost_upd256 ((int)(vp10_cost_one(upd) - vp10_cost_zero(upd)))
static const uint8_t update_bits[255] = {
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 0,
};
static int recenter_nonneg(int v, int m) {
@ -49,23 +47,23 @@ static int remap_prob(int v, int m) {
static const uint8_t map_table[MAX_PROB - 1] = {
// generated by:
// map_table[j] = split_index(j, MAX_PROB - 1, MODULUS_PARAM);
20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 2, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
3, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 4, 74,
75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88,
89, 90, 91, 92, 93, 94, 95, 96, 97, 6, 98, 99, 100, 101, 102,
103, 104, 105, 106, 107, 108, 109, 7, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 8, 122, 123, 124, 125, 126, 127, 128, 129, 130,
131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144,
145, 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11,
158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171,
172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 13, 182, 183, 184, 185,
186, 187, 188, 189, 190, 191, 192, 193, 14, 194, 195, 196, 197, 198, 199,
200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211, 212, 213,
214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227,
228, 229, 17, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241,
18, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19,
20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 2, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
3, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 4, 74,
75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88,
89, 90, 91, 92, 93, 94, 95, 96, 97, 6, 98, 99, 100, 101, 102,
103, 104, 105, 106, 107, 108, 109, 7, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 8, 122, 123, 124, 125, 126, 127, 128, 129, 130,
131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144,
145, 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11,
158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171,
172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 13, 182, 183, 184, 185,
186, 187, 188, 189, 190, 191, 192, 193, 14, 194, 195, 196, 197, 198, 199,
200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211, 212, 213,
214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227,
228, 229, 17, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241,
18, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19,
};
v--;
m--;
@ -116,9 +114,8 @@ void vp10_write_prob_diff_update(vp10_writer *w, vpx_prob newp, vpx_prob oldp) {
encode_term_subexp(w, delp);
}
int vp10_prob_diff_update_savings_search(const unsigned int *ct,
vpx_prob oldp, vpx_prob *bestp,
vpx_prob upd) {
int vp10_prob_diff_update_savings_search(const unsigned int *ct, vpx_prob oldp,
vpx_prob *bestp, vpx_prob upd) {
const int old_b = cost_branch256(ct, oldp);
int bestsavings = 0;
vpx_prob newp, bestnewp = oldp;
@ -138,10 +135,9 @@ int vp10_prob_diff_update_savings_search(const unsigned int *ct,
}
int vp10_prob_diff_update_savings_search_model(const unsigned int *ct,
const vpx_prob *oldp,
vpx_prob *bestp,
vpx_prob upd,
int stepsize) {
const vpx_prob *oldp,
vpx_prob *bestp, vpx_prob upd,
int stepsize) {
int i, old_b, new_b, update_b, savings, bestsavings;
int newp;
const int step_sign = *bestp > oldp[PIVOT_NODE] ? -1 : 1;
@ -158,17 +154,14 @@ int vp10_prob_diff_update_savings_search_model(const unsigned int *ct,
assert(stepsize > 0);
for (newp = *bestp; (newp - oldp[PIVOT_NODE]) * step_sign < 0;
newp += step) {
if (newp < 1 || newp > 255)
continue;
for (newp = *bestp; (newp - oldp[PIVOT_NODE]) * step_sign < 0; newp += step) {
if (newp < 1 || newp > 255) continue;
newplist[PIVOT_NODE] = newp;
vp10_model_to_full_probs(newplist, newplist);
for (i = UNCONSTRAINED_NODES, new_b = 0; i < ENTROPY_NODES; ++i)
new_b += cost_branch256(ct + 2 * i, newplist[i]);
new_b += cost_branch256(ct + 2 * PIVOT_NODE, newplist[PIVOT_NODE]);
update_b = prob_diff_update_cost(newp, oldp[PIVOT_NODE]) +
vp10_cost_upd256;
update_b = prob_diff_update_cost(newp, oldp[PIVOT_NODE]) + vp10_cost_upd256;
savings = old_b - new_b - update_b;
if (savings > bestsavings) {
bestsavings = savings;
@ -183,7 +176,7 @@ int vp10_prob_diff_update_savings_search_model(const unsigned int *ct,
#if CONFIG_ENTROPY
static int get_cost(unsigned int ct[][2], vpx_prob p, int n) {
int i, p0 = p;
unsigned int total_ct[2] = {0 , 0};
unsigned int total_ct[2] = { 0, 0 };
int cost = 0;
for (i = 0; i <= n; ++i) {
@ -191,15 +184,14 @@ static int get_cost(unsigned int ct[][2], vpx_prob p, int n) {
total_ct[0] += ct[i][0];
total_ct[1] += ct[i][1];
if (i < n)
p = vp10_merge_probs(p0, total_ct,
COEF_COUNT_SAT_BITS, COEF_MAX_UPDATE_FACTOR_BITS);
p = vp10_merge_probs(p0, total_ct, COEF_COUNT_SAT_BITS,
COEF_MAX_UPDATE_FACTOR_BITS);
}
return cost;
}
int vp10_prob_update_search_subframe(unsigned int ct[][2],
vpx_prob oldp, vpx_prob *bestp,
vpx_prob upd, int n) {
int vp10_prob_update_search_subframe(unsigned int ct[][2], vpx_prob oldp,
vpx_prob *bestp, vpx_prob upd, int n) {
const int old_b = get_cost(ct, oldp, n);
int bestsavings = 0;
vpx_prob newp, bestnewp = oldp;
@ -218,11 +210,9 @@ int vp10_prob_update_search_subframe(unsigned int ct[][2],
return bestsavings;
}
int vp10_prob_update_search_model_subframe(unsigned int ct[ENTROPY_NODES]
[COEF_PROBS_BUFS][2],
const vpx_prob *oldp,
vpx_prob *bestp, vpx_prob upd,
int stepsize, int n) {
int vp10_prob_update_search_model_subframe(
unsigned int ct[ENTROPY_NODES][COEF_PROBS_BUFS][2], const vpx_prob *oldp,
vpx_prob *bestp, vpx_prob upd, int stepsize, int n) {
int i, old_b, new_b, update_b, savings, bestsavings;
int newp;
const int step_sign = *bestp > oldp[PIVOT_NODE] ? -1 : 1;
@ -239,17 +229,14 @@ int vp10_prob_update_search_model_subframe(unsigned int ct[ENTROPY_NODES]
assert(stepsize > 0);
for (newp = *bestp; (newp - oldp[PIVOT_NODE]) * step_sign < 0;
newp += step) {
if (newp < 1 || newp > 255)
continue;
for (newp = *bestp; (newp - oldp[PIVOT_NODE]) * step_sign < 0; newp += step) {
if (newp < 1 || newp > 255) continue;
newplist[PIVOT_NODE] = newp;
vp10_model_to_full_probs(newplist, newplist);
for (i = UNCONSTRAINED_NODES, new_b = 0; i < ENTROPY_NODES; ++i)
new_b += get_cost(ct[i], newplist[i], n);
new_b += get_cost(ct[PIVOT_NODE], newplist[PIVOT_NODE], n);
update_b = prob_diff_update_cost(newp, oldp[PIVOT_NODE]) +
vp10_cost_upd256;
update_b = prob_diff_update_cost(newp, oldp[PIVOT_NODE]) + vp10_cost_upd256;
savings = old_b - new_b - update_b;
if (savings > bestsavings) {
bestsavings = savings;
@ -263,11 +250,11 @@ int vp10_prob_update_search_model_subframe(unsigned int ct[ENTROPY_NODES]
#endif // CONFIG_ENTROPY
void vp10_cond_prob_diff_update(vp10_writer *w, vpx_prob *oldp,
const unsigned int ct[2]) {
const unsigned int ct[2]) {
const vpx_prob upd = DIFF_UPDATE_PROB;
vpx_prob newp = get_binary_prob(ct[0], ct[1]);
const int savings = vp10_prob_diff_update_savings_search(ct, *oldp, &newp,
upd);
const int savings =
vp10_prob_diff_update_savings_search(ct, *oldp, &newp, upd);
assert(newp >= 1);
if (savings > 0) {
vp10_write(w, 1, upd);
@ -282,13 +269,13 @@ int vp10_cond_prob_diff_update_savings(vpx_prob *oldp,
const unsigned int ct[2]) {
const vpx_prob upd = DIFF_UPDATE_PROB;
vpx_prob newp = get_binary_prob(ct[0], ct[1]);
const int savings = vp10_prob_diff_update_savings_search(ct, *oldp, &newp,
upd);
const int savings =
vp10_prob_diff_update_savings_search(ct, *oldp, &newp, upd);
return savings;
}
void vp10_write_primitive_symmetric(vp10_writer *w, int word,
unsigned int abs_bits) {
unsigned int abs_bits) {
if (word == 0) {
vp10_write_bit(w, 0);
} else {

Просмотреть файл

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_ENCODER_SUBEXP_H_
#define VP10_ENCODER_SUBEXP_H_
@ -20,34 +19,28 @@ extern "C" {
struct vp10_writer;
void vp10_write_prob_diff_update(struct vp10_writer *w,
vpx_prob newp, vpx_prob oldp);
void vp10_write_prob_diff_update(struct vp10_writer *w, vpx_prob newp,
vpx_prob oldp);
void vp10_cond_prob_diff_update(struct vp10_writer *w, vpx_prob *oldp,
const unsigned int ct[2]);
int vp10_prob_diff_update_savings_search(const unsigned int *ct,
vpx_prob oldp, vpx_prob *bestp,
vpx_prob upd);
const unsigned int ct[2]);
int vp10_prob_diff_update_savings_search(const unsigned int *ct, vpx_prob oldp,
vpx_prob *bestp, vpx_prob upd);
int vp10_prob_diff_update_savings_search_model(const unsigned int *ct,
const vpx_prob *oldp,
vpx_prob *bestp,
vpx_prob upd,
int stepsize);
const vpx_prob *oldp,
vpx_prob *bestp, vpx_prob upd,
int stepsize);
int vp10_cond_prob_diff_update_savings(vpx_prob *oldp,
const unsigned int ct[2]);
#if CONFIG_ENTROPY
int vp10_prob_update_search_subframe(unsigned int ct[][2],
vpx_prob oldp, vpx_prob *bestp,
vpx_prob upd, int n);
int vp10_prob_update_search_model_subframe(unsigned int ct[ENTROPY_NODES]
[COEF_PROBS_BUFS][2],
const vpx_prob *oldp,
vpx_prob *bestp, vpx_prob upd,
int stepsize, int n);
int vp10_prob_update_search_subframe(unsigned int ct[][2], vpx_prob oldp,
vpx_prob *bestp, vpx_prob upd, int n);
int vp10_prob_update_search_model_subframe(
unsigned int ct[ENTROPY_NODES][COEF_PROBS_BUFS][2], const vpx_prob *oldp,
vpx_prob *bestp, vpx_prob upd, int stepsize, int n);
#endif // CONFIG_ENTROPY
//
@ -56,7 +49,7 @@ int vp10_prob_update_search_model_subframe(unsigned int ct[ENTROPY_NODES]
// indicate 0 or non-zero, mag_bits bits are used to indicate magnitide
// and 1 more bit for the sign if non-zero.
void vp10_write_primitive_symmetric(vp10_writer *w, int word,
unsigned int mag_bits);
unsigned int mag_bits);
#ifdef __cplusplus
} // extern "C"
#endif

Просмотреть файл

@ -31,18 +31,10 @@
static int fixed_divide[512];
static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
uint8_t *y_mb_ptr,
uint8_t *u_mb_ptr,
uint8_t *v_mb_ptr,
int stride,
int uv_block_width,
int uv_block_height,
int mv_row,
int mv_col,
uint8_t *pred,
struct scale_factors *scale,
int x, int y) {
static void temporal_filter_predictors_mb_c(
MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr,
int stride, int uv_block_width, int uv_block_height, int mv_row, int mv_col,
uint8_t *pred, struct scale_factors *scale, int x, int y) {
const int which_mv = 0;
const MV mv = { mv_row, mv_col };
enum mv_precision mv_precision_uv;
@ -50,10 +42,10 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
#if USE_TEMPORALFILTER_12TAP
#if CONFIG_DUAL_FILTER
const INTERP_FILTER interp_filter[4] = {
TEMPORALFILTER_12TAP, TEMPORALFILTER_12TAP,
TEMPORALFILTER_12TAP, TEMPORALFILTER_12TAP
};
const INTERP_FILTER interp_filter[4] = { TEMPORALFILTER_12TAP,
TEMPORALFILTER_12TAP,
TEMPORALFILTER_12TAP,
TEMPORALFILTER_12TAP };
#else
const INTERP_FILTER interp_filter = TEMPORALFILTER_12TAP;
#endif
@ -72,76 +64,45 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp10_highbd_build_inter_predictor(y_mb_ptr, stride,
&pred[0], 16,
&mv,
scale,
16, 16,
which_mv,
interp_filter,
MV_PRECISION_Q3, x, y, xd->bd);
vp10_highbd_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv,
scale, 16, 16, which_mv, interp_filter,
MV_PRECISION_Q3, x, y, xd->bd);
vp10_highbd_build_inter_predictor(u_mb_ptr, uv_stride,
&pred[256], uv_block_width,
&mv,
scale,
uv_block_width, uv_block_height,
which_mv,
interp_filter,
mv_precision_uv, x, y, xd->bd);
vp10_highbd_build_inter_predictor(
u_mb_ptr, uv_stride, &pred[256], uv_block_width, &mv, scale,
uv_block_width, uv_block_height, which_mv, interp_filter,
mv_precision_uv, x, y, xd->bd);
vp10_highbd_build_inter_predictor(v_mb_ptr, uv_stride,
&pred[512], uv_block_width,
&mv,
scale,
uv_block_width, uv_block_height,
which_mv,
interp_filter,
mv_precision_uv, x, y, xd->bd);
vp10_highbd_build_inter_predictor(
v_mb_ptr, uv_stride, &pred[512], uv_block_width, &mv, scale,
uv_block_width, uv_block_height, which_mv, interp_filter,
mv_precision_uv, x, y, xd->bd);
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
vp10_build_inter_predictor(y_mb_ptr, stride,
&pred[0], 16,
&mv,
scale,
16, 16,
which_mv,
interp_filter, MV_PRECISION_Q3, x, y);
vp10_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale, 16, 16,
which_mv, interp_filter, MV_PRECISION_Q3, x, y);
vp10_build_inter_predictor(u_mb_ptr, uv_stride,
&pred[256], uv_block_width,
&mv,
scale,
uv_block_width, uv_block_height,
which_mv,
interp_filter, mv_precision_uv, x, y);
vp10_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256], uv_block_width,
&mv, scale, uv_block_width, uv_block_height,
which_mv, interp_filter, mv_precision_uv, x, y);
vp10_build_inter_predictor(v_mb_ptr, uv_stride,
&pred[512], uv_block_width,
&mv,
scale,
uv_block_width, uv_block_height,
which_mv,
interp_filter, mv_precision_uv, x, y);
vp10_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512], uv_block_width,
&mv, scale, uv_block_width, uv_block_height,
which_mv, interp_filter, mv_precision_uv, x, y);
}
void vp10_temporal_filter_init(void) {
int i;
fixed_divide[0] = 0;
for (i = 1; i < 512; ++i)
fixed_divide[i] = 0x80000 / i;
for (i = 1; i < 512; ++i) fixed_divide[i] = 0x80000 / i;
}
void vp10_temporal_filter_apply_c(uint8_t *frame1,
unsigned int stride,
uint8_t *frame2,
unsigned int block_width,
unsigned int block_height,
int strength,
int filter_weight,
unsigned int *accumulator,
void vp10_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride,
uint8_t *frame2, unsigned int block_width,
unsigned int block_height, int strength,
int filter_weight, unsigned int *accumulator,
uint16_t *count) {
unsigned int i, j, k;
int modifier;
@ -161,10 +122,10 @@ void vp10_temporal_filter_apply_c(uint8_t *frame1,
int row = i + idy;
int col = j + idx;
if (row >= 0 && row < (int)block_height &&
col >= 0 && col < (int)block_width) {
if (row >= 0 && row < (int)block_height && col >= 0 &&
col < (int)block_width) {
int diff = frame1[byte + idy * (int)stride + idx] -
frame2[idy * (int)block_width + idx];
frame2[idy * (int)block_width + idx];
diff_sse[index] = diff * diff;
++index;
}
@ -174,19 +135,17 @@ void vp10_temporal_filter_apply_c(uint8_t *frame1,
assert(index > 0);
modifier = 0;
for (idx = 0; idx < 9; ++idx)
modifier += diff_sse[idx];
for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
modifier *= 3;
modifier /= index;
++frame2;
modifier += rounding;
modifier += rounding;
modifier >>= strength;
if (modifier > 16)
modifier = 16;
if (modifier > 16) modifier = 16;
modifier = 16 - modifier;
modifier *= filter_weight;
@ -202,15 +161,10 @@ void vp10_temporal_filter_apply_c(uint8_t *frame1,
}
#if CONFIG_VP9_HIGHBITDEPTH
void vp10_highbd_temporal_filter_apply_c(uint8_t *frame1_8,
unsigned int stride,
uint8_t *frame2_8,
unsigned int block_width,
unsigned int block_height,
int strength,
int filter_weight,
unsigned int *accumulator,
uint16_t *count) {
void vp10_highbd_temporal_filter_apply_c(
uint8_t *frame1_8, unsigned int stride, uint8_t *frame2_8,
unsigned int block_width, unsigned int block_height, int strength,
int filter_weight, unsigned int *accumulator, uint16_t *count) {
uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8);
uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8);
unsigned int i, j, k;
@ -231,10 +185,10 @@ void vp10_highbd_temporal_filter_apply_c(uint8_t *frame1_8,
int row = i + idy;
int col = j + idx;
if (row >= 0 && row < (int)block_height &&
col >= 0 && col < (int)block_width) {
if (row >= 0 && row < (int)block_height && col >= 0 &&
col < (int)block_width) {
int diff = frame1[byte + idy * (int)stride + idx] -
frame2[idy * (int)block_width + idx];
frame2[idy * (int)block_width + idx];
diff_sse[index] = diff * diff;
++index;
}
@ -244,8 +198,7 @@ void vp10_highbd_temporal_filter_apply_c(uint8_t *frame1_8,
assert(index > 0);
modifier = 0;
for (idx = 0; idx < 9; ++idx)
modifier += diff_sse[idx];
for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
modifier *= 3;
modifier /= index;
@ -255,8 +208,7 @@ void vp10_highbd_temporal_filter_apply_c(uint8_t *frame1_8,
modifier += rounding;
modifier >>= strength;
if (modifier > 16)
modifier = 16;
if (modifier > 16) modifier = 16;
modifier = 16 - modifier;
modifier *= filter_weight;
@ -286,7 +238,7 @@ static int temporal_filter_find_matching_mb_c(VP10_COMP *cpi,
unsigned int sse;
int cost_list[5];
MV best_ref_mv1 = {0, 0};
MV best_ref_mv1 = { 0, 0 };
MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
// Save input state
@ -314,18 +266,15 @@ static int temporal_filter_find_matching_mb_c(VP10_COMP *cpi,
// Ignore mv costing by sending NULL pointer instead of cost arrays
vp10_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1,
cond_cost_list(cpi, cost_list),
&cpi->fn_ptr[BLOCK_16X16], 0, &best_ref_mv1);
cond_cost_list(cpi, cost_list), &cpi->fn_ptr[BLOCK_16X16], 0,
&best_ref_mv1);
// Ignore mv costing by sending NULL pointer instead of cost array
bestsme = cpi->find_fractional_mv_step(x, &best_ref_mv1,
cpi->common.allow_high_precision_mv,
x->errorperbit,
&cpi->fn_ptr[BLOCK_16X16],
0, mv_sf->subpel_iters_per_step,
cond_cost_list(cpi, cost_list),
NULL, NULL,
&distortion, &sse, NULL, 0, 0, 0);
bestsme = cpi->find_fractional_mv_step(
x, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
&cpi->fn_ptr[BLOCK_16X16], 0, mv_sf->subpel_iters_per_step,
cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
0);
x->e_mbd.mi[0]->bmi[0].as_mv[0] = x->best_mv;
@ -338,8 +287,7 @@ static int temporal_filter_find_matching_mb_c(VP10_COMP *cpi,
static void temporal_filter_iterate_c(VP10_COMP *cpi,
YV12_BUFFER_CONFIG **frames,
int frame_count,
int alt_ref_index,
int frame_count, int alt_ref_index,
int strength,
struct scale_factors *scale) {
int byte;
@ -356,17 +304,17 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi,
YV12_BUFFER_CONFIG *f = frames[alt_ref_index];
uint8_t *dst1, *dst2;
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, predictor16[16 * 16 * 3]);
DECLARE_ALIGNED(16, uint8_t, predictor8[16 * 16 * 3]);
DECLARE_ALIGNED(16, uint16_t, predictor16[16 * 16 * 3]);
DECLARE_ALIGNED(16, uint8_t, predictor8[16 * 16 * 3]);
uint8_t *predictor;
#else
DECLARE_ALIGNED(16, uint8_t, predictor[16 * 16 * 3]);
DECLARE_ALIGNED(16, uint8_t, predictor[16 * 16 * 3]);
#endif
const int mb_uv_height = 16 >> mbd->plane[1].subsampling_y;
const int mb_uv_width = 16 >> mbd->plane[1].subsampling_x;
const int mb_uv_width = 16 >> mbd->plane[1].subsampling_x;
// Save input state
uint8_t* input_buffer[MAX_MB_PLANE];
uint8_t *input_buffer[MAX_MB_PLANE];
int i;
#if CONFIG_VP9_HIGHBITDEPTH
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@ -376,8 +324,7 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi,
}
#endif
for (i = 0; i < MAX_MB_PLANE; i++)
input_buffer[i] = mbd->plane[i].pre[0].buf;
for (i = 0; i < MAX_MB_PLANE; i++) input_buffer[i] = mbd->plane[i].pre[0].buf;
for (mb_row = 0; mb_row < mb_rows; mb_row++) {
// Source frames are extended to 16 pixels. This is different than
@ -392,8 +339,8 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi,
// To keep the mv in play for both Y and UV planes the max that it
// can be on a border is therefore 16 - (2*VPX_INTERP_EXTEND+1).
cpi->td.mb.mv_row_min = -((mb_row * 16) + (17 - 2 * VPX_INTERP_EXTEND));
cpi->td.mb.mv_row_max = ((mb_rows - 1 - mb_row) * 16)
+ (17 - 2 * VPX_INTERP_EXTEND);
cpi->td.mb.mv_row_max =
((mb_rows - 1 - mb_row) * 16) + (17 - 2 * VPX_INTERP_EXTEND);
for (mb_col = 0; mb_col < mb_cols; mb_col++) {
int i, j, k;
@ -403,15 +350,14 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi,
memset(count, 0, 16 * 16 * 3 * sizeof(count[0]));
cpi->td.mb.mv_col_min = -((mb_col * 16) + (17 - 2 * VPX_INTERP_EXTEND));
cpi->td.mb.mv_col_max = ((mb_cols - 1 - mb_col) * 16)
+ (17 - 2 * VPX_INTERP_EXTEND);
cpi->td.mb.mv_col_max =
((mb_cols - 1 - mb_col) * 16) + (17 - 2 * VPX_INTERP_EXTEND);
for (frame = 0; frame < frame_count; frame++) {
const int thresh_low = 10000;
const int thresh_low = 10000;
const int thresh_high = 20000;
if (frames[frame] == NULL)
continue;
if (frames[frame] == NULL) continue;
mbd->mi[0]->bmi[0].as_mv[0].as_mv.row = 0;
mbd->mi[0]->bmi[0].as_mv[0].as_mv.col = 0;
@ -420,84 +366,68 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi,
filter_weight = 2;
} else {
// Find best match in this frame by MC
int err = temporal_filter_find_matching_mb_c(cpi,
frames[alt_ref_index]->y_buffer + mb_y_offset,
frames[frame]->y_buffer + mb_y_offset,
frames[frame]->y_stride);
int err = temporal_filter_find_matching_mb_c(
cpi, frames[alt_ref_index]->y_buffer + mb_y_offset,
frames[frame]->y_buffer + mb_y_offset, frames[frame]->y_stride);
// Assign higher weight to matching MB if it's error
// score is lower. If not applying MC default behavior
// is to weight all MBs equal.
filter_weight = err < thresh_low
? 2 : err < thresh_high ? 1 : 0;
filter_weight = err < thresh_low ? 2 : err < thresh_high ? 1 : 0;
}
if (filter_weight != 0) {
// Construct the predictors
temporal_filter_predictors_mb_c(mbd,
frames[frame]->y_buffer + mb_y_offset,
temporal_filter_predictors_mb_c(
mbd, frames[frame]->y_buffer + mb_y_offset,
frames[frame]->u_buffer + mb_uv_offset,
frames[frame]->v_buffer + mb_uv_offset,
frames[frame]->y_stride,
mb_uv_width, mb_uv_height,
mbd->mi[0]->bmi[0].as_mv[0].as_mv.row,
mbd->mi[0]->bmi[0].as_mv[0].as_mv.col,
predictor, scale,
frames[frame]->v_buffer + mb_uv_offset, frames[frame]->y_stride,
mb_uv_width, mb_uv_height, mbd->mi[0]->bmi[0].as_mv[0].as_mv.row,
mbd->mi[0]->bmi[0].as_mv[0].as_mv.col, predictor, scale,
mb_col * 16, mb_row * 16);
#if CONFIG_VP9_HIGHBITDEPTH
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int adj_strength = strength + 2 * (mbd->bd - 8);
// Apply the filter (YUV)
vp10_highbd_temporal_filter_apply(f->y_buffer + mb_y_offset,
f->y_stride,
predictor, 16, 16, adj_strength,
filter_weight,
accumulator, count);
vp10_highbd_temporal_filter_apply(f->u_buffer + mb_uv_offset,
f->uv_stride, predictor + 256,
mb_uv_width, mb_uv_height,
adj_strength,
filter_weight, accumulator + 256,
count + 256);
vp10_highbd_temporal_filter_apply(f->v_buffer + mb_uv_offset,
f->uv_stride, predictor + 512,
mb_uv_width, mb_uv_height,
adj_strength, filter_weight,
accumulator + 512, count + 512);
vp10_highbd_temporal_filter_apply(
f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, 16,
adj_strength, filter_weight, accumulator, count);
vp10_highbd_temporal_filter_apply(
f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256,
mb_uv_width, mb_uv_height, adj_strength, filter_weight,
accumulator + 256, count + 256);
vp10_highbd_temporal_filter_apply(
f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512,
mb_uv_width, mb_uv_height, adj_strength, filter_weight,
accumulator + 512, count + 512);
} else {
// Apply the filter (YUV)
vp10_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
predictor, 16, 16,
strength, filter_weight,
accumulator, count);
vp10_temporal_filter_apply_c(f->u_buffer + mb_uv_offset,
f->uv_stride, predictor + 256,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 256,
count + 256);
vp10_temporal_filter_apply_c(f->v_buffer + mb_uv_offset,
f->uv_stride, predictor + 512,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 512,
count + 512);
predictor, 16, 16, strength,
filter_weight, accumulator, count);
vp10_temporal_filter_apply_c(
f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256,
mb_uv_width, mb_uv_height, strength, filter_weight,
accumulator + 256, count + 256);
vp10_temporal_filter_apply_c(
f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512,
mb_uv_width, mb_uv_height, strength, filter_weight,
accumulator + 512, count + 512);
}
#else
// Apply the filter (YUV)
vp10_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
predictor, 16, 16,
strength, filter_weight,
accumulator, count);
predictor, 16, 16, strength,
filter_weight, accumulator, count);
vp10_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride,
predictor + 256,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 256,
count + 256);
predictor + 256, mb_uv_width,
mb_uv_height, strength, filter_weight,
accumulator + 256, count + 256);
vp10_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride,
predictor + 512,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 512,
count + 512);
predictor + 512, mb_uv_width,
mb_uv_height, strength, filter_weight,
accumulator + 512, count + 512);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
@ -652,13 +582,11 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi,
}
// Restore input state
for (i = 0; i < MAX_MB_PLANE; i++)
mbd->plane[i].pre[0].buf = input_buffer[i];
for (i = 0; i < MAX_MB_PLANE; i++) mbd->plane[i].pre[0].buf = input_buffer[i];
}
// Apply buffer limits and context specific adjustments to arnr filter.
static void adjust_arnr_filter(VP10_COMP *cpi,
int distance, int group_boost,
static void adjust_arnr_filter(VP10_COMP *cpi, int distance, int group_boost,
int *arnr_frames, int *arnr_strength) {
const VP10EncoderConfig *const oxcf = &cpi->oxcf;
const int frames_after_arf =
@ -668,34 +596,30 @@ static void adjust_arnr_filter(VP10_COMP *cpi,
int q, frames, strength;
// Define the forward and backwards filter limits for this arnr group.
if (frames_fwd > frames_after_arf)
frames_fwd = frames_after_arf;
if (frames_fwd > distance)
frames_fwd = distance;
if (frames_fwd > frames_after_arf) frames_fwd = frames_after_arf;
if (frames_fwd > distance) frames_fwd = distance;
frames_bwd = frames_fwd;
// For even length filter there is one more frame backward
// than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
if (frames_bwd < distance)
frames_bwd += (oxcf->arnr_max_frames + 1) & 0x1;
if (frames_bwd < distance) frames_bwd += (oxcf->arnr_max_frames + 1) & 0x1;
// Set the baseline active filter size.
frames = frames_bwd + 1 + frames_fwd;
// Adjust the strength based on active max q.
if (cpi->common.current_video_frame > 1)
q = ((int)vp10_convert_qindex_to_q(
cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.bit_depth));
q = ((int)vp10_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME],
cpi->common.bit_depth));
else
q = ((int)vp10_convert_qindex_to_q(
cpi->rc.avg_frame_qindex[KEY_FRAME], cpi->common.bit_depth));
q = ((int)vp10_convert_qindex_to_q(cpi->rc.avg_frame_qindex[KEY_FRAME],
cpi->common.bit_depth));
if (q > 16) {
strength = oxcf->arnr_strength;
} else {
strength = oxcf->arnr_strength - ((16 - q) / 2);
if (strength < 0)
strength = 0;
if (strength < 0) strength = 0;
}
// Adjust number of frames in filter and strength based on gf boost level.
@ -729,7 +653,7 @@ void vp10_temporal_filter(VP10_COMP *cpi, int distance) {
int frames_to_blur_backward;
int frames_to_blur_forward;
struct scale_factors sf;
YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = {NULL};
YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL };
// Apply context specific adjustments to the arnr filter parameters.
adjust_arnr_filter(cpi, distance, rc->gfu_boost, &frames_to_blur, &strength);
@ -746,28 +670,24 @@ void vp10_temporal_filter(VP10_COMP *cpi, int distance) {
// Setup frame pointers, NULL indicates frame not included in filter.
for (frame = 0; frame < frames_to_blur; ++frame) {
const int which_buffer = start_frame - frame;
struct lookahead_entry *buf = vp10_lookahead_peek(cpi->lookahead,
which_buffer);
struct lookahead_entry *buf =
vp10_lookahead_peek(cpi->lookahead, which_buffer);
frames[frames_to_blur - 1 - frame] = &buf->img;
}
if (frames_to_blur > 0) {
// Setup scaling factors. Scaling on each of the arnr frames is not
// supported.
// ARF is produced at the native frame size and resized when coded.
// Setup scaling factors. Scaling on each of the arnr frames is not
// supported.
// ARF is produced at the native frame size and resized when coded.
#if CONFIG_VP9_HIGHBITDEPTH
vp10_setup_scale_factors_for_frame(&sf,
frames[0]->y_crop_width,
frames[0]->y_crop_height,
frames[0]->y_crop_width,
frames[0]->y_crop_height,
cpi->common.use_highbitdepth);
vp10_setup_scale_factors_for_frame(
&sf, frames[0]->y_crop_width, frames[0]->y_crop_height,
frames[0]->y_crop_width, frames[0]->y_crop_height,
cpi->common.use_highbitdepth);
#else
vp10_setup_scale_factors_for_frame(&sf,
frames[0]->y_crop_width,
frames[0]->y_crop_height,
frames[0]->y_crop_width,
frames[0]->y_crop_height);
vp10_setup_scale_factors_for_frame(
&sf, frames[0]->y_crop_width, frames[0]->y_crop_height,
frames[0]->y_crop_width, frames[0]->y_crop_height);
#endif // CONFIG_VP9_HIGHBITDEPTH
}

Просмотреть файл

@ -25,62 +25,54 @@
#include "vp10/encoder/tokenize.h"
static const TOKENVALUE dct_cat_lt_10_value_tokens[] = {
{9, 63}, {9, 61}, {9, 59}, {9, 57}, {9, 55}, {9, 53}, {9, 51}, {9, 49},
{9, 47}, {9, 45}, {9, 43}, {9, 41}, {9, 39}, {9, 37}, {9, 35}, {9, 33},
{9, 31}, {9, 29}, {9, 27}, {9, 25}, {9, 23}, {9, 21}, {9, 19}, {9, 17},
{9, 15}, {9, 13}, {9, 11}, {9, 9}, {9, 7}, {9, 5}, {9, 3}, {9, 1},
{8, 31}, {8, 29}, {8, 27}, {8, 25}, {8, 23}, {8, 21},
{8, 19}, {8, 17}, {8, 15}, {8, 13}, {8, 11}, {8, 9},
{8, 7}, {8, 5}, {8, 3}, {8, 1},
{7, 15}, {7, 13}, {7, 11}, {7, 9}, {7, 7}, {7, 5}, {7, 3}, {7, 1},
{6, 7}, {6, 5}, {6, 3}, {6, 1}, {5, 3}, {5, 1},
{4, 1}, {3, 1}, {2, 1}, {1, 1}, {0, 0},
{1, 0}, {2, 0}, {3, 0}, {4, 0},
{5, 0}, {5, 2}, {6, 0}, {6, 2}, {6, 4}, {6, 6},
{7, 0}, {7, 2}, {7, 4}, {7, 6}, {7, 8}, {7, 10}, {7, 12}, {7, 14},
{8, 0}, {8, 2}, {8, 4}, {8, 6}, {8, 8}, {8, 10}, {8, 12},
{8, 14}, {8, 16}, {8, 18}, {8, 20}, {8, 22}, {8, 24},
{8, 26}, {8, 28}, {8, 30}, {9, 0}, {9, 2},
{9, 4}, {9, 6}, {9, 8}, {9, 10}, {9, 12}, {9, 14}, {9, 16},
{9, 18}, {9, 20}, {9, 22}, {9, 24}, {9, 26}, {9, 28},
{9, 30}, {9, 32}, {9, 34}, {9, 36}, {9, 38}, {9, 40},
{9, 42}, {9, 44}, {9, 46}, {9, 48}, {9, 50}, {9, 52},
{9, 54}, {9, 56}, {9, 58}, {9, 60}, {9, 62}
{ 9, 63 }, { 9, 61 }, { 9, 59 }, { 9, 57 }, { 9, 55 }, { 9, 53 }, { 9, 51 },
{ 9, 49 }, { 9, 47 }, { 9, 45 }, { 9, 43 }, { 9, 41 }, { 9, 39 }, { 9, 37 },
{ 9, 35 }, { 9, 33 }, { 9, 31 }, { 9, 29 }, { 9, 27 }, { 9, 25 }, { 9, 23 },
{ 9, 21 }, { 9, 19 }, { 9, 17 }, { 9, 15 }, { 9, 13 }, { 9, 11 }, { 9, 9 },
{ 9, 7 }, { 9, 5 }, { 9, 3 }, { 9, 1 }, { 8, 31 }, { 8, 29 }, { 8, 27 },
{ 8, 25 }, { 8, 23 }, { 8, 21 }, { 8, 19 }, { 8, 17 }, { 8, 15 }, { 8, 13 },
{ 8, 11 }, { 8, 9 }, { 8, 7 }, { 8, 5 }, { 8, 3 }, { 8, 1 }, { 7, 15 },
{ 7, 13 }, { 7, 11 }, { 7, 9 }, { 7, 7 }, { 7, 5 }, { 7, 3 }, { 7, 1 },
{ 6, 7 }, { 6, 5 }, { 6, 3 }, { 6, 1 }, { 5, 3 }, { 5, 1 }, { 4, 1 },
{ 3, 1 }, { 2, 1 }, { 1, 1 }, { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 },
{ 4, 0 }, { 5, 0 }, { 5, 2 }, { 6, 0 }, { 6, 2 }, { 6, 4 }, { 6, 6 },
{ 7, 0 }, { 7, 2 }, { 7, 4 }, { 7, 6 }, { 7, 8 }, { 7, 10 }, { 7, 12 },
{ 7, 14 }, { 8, 0 }, { 8, 2 }, { 8, 4 }, { 8, 6 }, { 8, 8 }, { 8, 10 },
{ 8, 12 }, { 8, 14 }, { 8, 16 }, { 8, 18 }, { 8, 20 }, { 8, 22 }, { 8, 24 },
{ 8, 26 }, { 8, 28 }, { 8, 30 }, { 9, 0 }, { 9, 2 }, { 9, 4 }, { 9, 6 },
{ 9, 8 }, { 9, 10 }, { 9, 12 }, { 9, 14 }, { 9, 16 }, { 9, 18 }, { 9, 20 },
{ 9, 22 }, { 9, 24 }, { 9, 26 }, { 9, 28 }, { 9, 30 }, { 9, 32 }, { 9, 34 },
{ 9, 36 }, { 9, 38 }, { 9, 40 }, { 9, 42 }, { 9, 44 }, { 9, 46 }, { 9, 48 },
{ 9, 50 }, { 9, 52 }, { 9, 54 }, { 9, 56 }, { 9, 58 }, { 9, 60 }, { 9, 62 }
};
const TOKENVALUE *vp10_dct_cat_lt_10_value_tokens = dct_cat_lt_10_value_tokens +
(sizeof(dct_cat_lt_10_value_tokens) / sizeof(*dct_cat_lt_10_value_tokens))
/ 2;
const TOKENVALUE *vp10_dct_cat_lt_10_value_tokens =
dct_cat_lt_10_value_tokens +
(sizeof(dct_cat_lt_10_value_tokens) / sizeof(*dct_cat_lt_10_value_tokens)) /
2;
// The corresponding costs of the extrabits for the tokens in the above table
// are stored in the table below. The values are obtained from looking up the
// entry for the specified extrabits in the table corresponding to the token
// (as defined in cost element vp10_extra_bits)
// e.g. {9, 63} maps to cat5_cost[63 >> 1], {1, 1} maps to sign_cost[1 >> 1]
static const int dct_cat_lt_10_value_cost[] = {
3773, 3750, 3704, 3681, 3623, 3600, 3554, 3531,
3432, 3409, 3363, 3340, 3282, 3259, 3213, 3190,
3136, 3113, 3067, 3044, 2986, 2963, 2917, 2894,
2795, 2772, 2726, 2703, 2645, 2622, 2576, 2553,
3197, 3116, 3058, 2977, 2881, 2800,
2742, 2661, 2615, 2534, 2476, 2395,
2299, 2218, 2160, 2079,
2566, 2427, 2334, 2195, 2023, 1884, 1791, 1652,
1893, 1696, 1453, 1256, 1229, 864,
512, 512, 512, 512, 0,
512, 512, 512, 512,
864, 1229, 1256, 1453, 1696, 1893,
1652, 1791, 1884, 2023, 2195, 2334, 2427, 2566,
2079, 2160, 2218, 2299, 2395, 2476, 2534, 2615,
2661, 2742, 2800, 2881, 2977, 3058, 3116, 3197,
2553, 2576, 2622, 2645, 2703, 2726, 2772, 2795,
2894, 2917, 2963, 2986, 3044, 3067, 3113, 3136,
3190, 3213, 3259, 3282, 3340, 3363, 3409, 3432,
3531, 3554, 3600, 3623, 3681, 3704, 3750, 3773,
3773, 3750, 3704, 3681, 3623, 3600, 3554, 3531, 3432, 3409, 3363, 3340, 3282,
3259, 3213, 3190, 3136, 3113, 3067, 3044, 2986, 2963, 2917, 2894, 2795, 2772,
2726, 2703, 2645, 2622, 2576, 2553, 3197, 3116, 3058, 2977, 2881, 2800, 2742,
2661, 2615, 2534, 2476, 2395, 2299, 2218, 2160, 2079, 2566, 2427, 2334, 2195,
2023, 1884, 1791, 1652, 1893, 1696, 1453, 1256, 1229, 864, 512, 512, 512,
512, 0, 512, 512, 512, 512, 864, 1229, 1256, 1453, 1696, 1893, 1652,
1791, 1884, 2023, 2195, 2334, 2427, 2566, 2079, 2160, 2218, 2299, 2395, 2476,
2534, 2615, 2661, 2742, 2800, 2881, 2977, 3058, 3116, 3197, 2553, 2576, 2622,
2645, 2703, 2726, 2772, 2795, 2894, 2917, 2963, 2986, 3044, 3067, 3113, 3136,
3190, 3213, 3259, 3282, 3340, 3363, 3409, 3432, 3531, 3554, 3600, 3623, 3681,
3704, 3750, 3773,
};
const int *vp10_dct_cat_lt_10_value_cost = dct_cat_lt_10_value_cost +
(sizeof(dct_cat_lt_10_value_cost) / sizeof(*dct_cat_lt_10_value_cost))
/ 2;
const int *vp10_dct_cat_lt_10_value_cost =
dct_cat_lt_10_value_cost +
(sizeof(dct_cat_lt_10_value_cost) / sizeof(*dct_cat_lt_10_value_cost)) / 2;
// Array indices are identical to previously-existing CONTEXT_NODE indices
/* clang-format off */
const vpx_tree_index vp10_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
-EOB_TOKEN, 2, // 0 = EOB
-ZERO_TOKEN, 4, // 1 = ZERO
@ -94,251 +86,258 @@ const vpx_tree_index vp10_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
-CATEGORY3_TOKEN, -CATEGORY4_TOKEN, // 9 = CAT_THREE
-CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 10 = CAT_FIVE
};
/* clang-format on */
static const vpx_tree_index cat1[2] = {0, 0};
static const vpx_tree_index cat2[4] = {2, 2, 0, 0};
static const vpx_tree_index cat3[6] = {2, 2, 4, 4, 0, 0};
static const vpx_tree_index cat4[8] = {2, 2, 4, 4, 6, 6, 0, 0};
static const vpx_tree_index cat5[10] = {2, 2, 4, 4, 6, 6, 8, 8, 0, 0};
static const vpx_tree_index cat6[28] = {2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12,
14, 14, 16, 16, 18, 18, 20, 20, 22, 22, 24, 24, 26, 26, 0, 0};
static const vpx_tree_index cat1[2] = { 0, 0 };
static const vpx_tree_index cat2[4] = { 2, 2, 0, 0 };
static const vpx_tree_index cat3[6] = { 2, 2, 4, 4, 0, 0 };
static const vpx_tree_index cat4[8] = { 2, 2, 4, 4, 6, 6, 0, 0 };
static const vpx_tree_index cat5[10] = { 2, 2, 4, 4, 6, 6, 8, 8, 0, 0 };
static const vpx_tree_index cat6[28] = { 2, 2, 4, 4, 6, 6, 8, 8, 10, 10,
12, 12, 14, 14, 16, 16, 18, 18, 20, 20,
22, 22, 24, 24, 26, 26, 0, 0 };
static const int16_t zero_cost[] = {0};
static const int16_t sign_cost[1] = {512};
static const int16_t cat1_cost[1 << 1] = {864, 1229};
static const int16_t cat2_cost[1 << 2] = {1256, 1453, 1696, 1893};
static const int16_t cat3_cost[1 << 3] = {1652, 1791, 1884, 2023,
2195, 2334, 2427, 2566};
static const int16_t cat4_cost[1 << 4] = {2079, 2160, 2218, 2299, 2395, 2476,
2534, 2615, 2661, 2742, 2800, 2881,
2977, 3058, 3116, 3197};
static const int16_t zero_cost[] = { 0 };
static const int16_t sign_cost[1] = { 512 };
static const int16_t cat1_cost[1 << 1] = { 864, 1229 };
static const int16_t cat2_cost[1 << 2] = { 1256, 1453, 1696, 1893 };
static const int16_t cat3_cost[1 << 3] = { 1652, 1791, 1884, 2023,
2195, 2334, 2427, 2566 };
static const int16_t cat4_cost[1 << 4] = { 2079, 2160, 2218, 2299, 2395, 2476,
2534, 2615, 2661, 2742, 2800, 2881,
2977, 3058, 3116, 3197 };
static const int16_t cat5_cost[1 << 5] = {
2553, 2576, 2622, 2645, 2703, 2726, 2772, 2795, 2894, 2917, 2963,
2986, 3044, 3067, 3113, 3136, 3190, 3213, 3259, 3282, 3340, 3363,
3409, 3432, 3531, 3554, 3600, 3623, 3681, 3704, 3750, 3773};
2553, 2576, 2622, 2645, 2703, 2726, 2772, 2795, 2894, 2917, 2963,
2986, 3044, 3067, 3113, 3136, 3190, 3213, 3259, 3282, 3340, 3363,
3409, 3432, 3531, 3554, 3600, 3623, 3681, 3704, 3750, 3773
};
const int16_t vp10_cat6_low_cost[256] = {
3378, 3390, 3401, 3413, 3435, 3447, 3458, 3470, 3517, 3529, 3540, 3552,
3574, 3586, 3597, 3609, 3671, 3683, 3694, 3706, 3728, 3740, 3751, 3763,
3810, 3822, 3833, 3845, 3867, 3879, 3890, 3902, 3973, 3985, 3996, 4008,
4030, 4042, 4053, 4065, 4112, 4124, 4135, 4147, 4169, 4181, 4192, 4204,
4266, 4278, 4289, 4301, 4323, 4335, 4346, 4358, 4405, 4417, 4428, 4440,
4462, 4474, 4485, 4497, 4253, 4265, 4276, 4288, 4310, 4322, 4333, 4345,
4392, 4404, 4415, 4427, 4449, 4461, 4472, 4484, 4546, 4558, 4569, 4581,
4603, 4615, 4626, 4638, 4685, 4697, 4708, 4720, 4742, 4754, 4765, 4777,
4848, 4860, 4871, 4883, 4905, 4917, 4928, 4940, 4987, 4999, 5010, 5022,
5044, 5056, 5067, 5079, 5141, 5153, 5164, 5176, 5198, 5210, 5221, 5233,
5280, 5292, 5303, 5315, 5337, 5349, 5360, 5372, 4988, 5000, 5011, 5023,
5045, 5057, 5068, 5080, 5127, 5139, 5150, 5162, 5184, 5196, 5207, 5219,
5281, 5293, 5304, 5316, 5338, 5350, 5361, 5373, 5420, 5432, 5443, 5455,
5477, 5489, 5500, 5512, 5583, 5595, 5606, 5618, 5640, 5652, 5663, 5675,
5722, 5734, 5745, 5757, 5779, 5791, 5802, 5814, 5876, 5888, 5899, 5911,
5933, 5945, 5956, 5968, 6015, 6027, 6038, 6050, 6072, 6084, 6095, 6107,
5863, 5875, 5886, 5898, 5920, 5932, 5943, 5955, 6002, 6014, 6025, 6037,
6059, 6071, 6082, 6094, 6156, 6168, 6179, 6191, 6213, 6225, 6236, 6248,
6295, 6307, 6318, 6330, 6352, 6364, 6375, 6387, 6458, 6470, 6481, 6493,
6515, 6527, 6538, 6550, 6597, 6609, 6620, 6632, 6654, 6666, 6677, 6689,
6751, 6763, 6774, 6786, 6808, 6820, 6831, 6843, 6890, 6902, 6913, 6925,
6947, 6959, 6970, 6982};
3378, 3390, 3401, 3413, 3435, 3447, 3458, 3470, 3517, 3529, 3540, 3552, 3574,
3586, 3597, 3609, 3671, 3683, 3694, 3706, 3728, 3740, 3751, 3763, 3810, 3822,
3833, 3845, 3867, 3879, 3890, 3902, 3973, 3985, 3996, 4008, 4030, 4042, 4053,
4065, 4112, 4124, 4135, 4147, 4169, 4181, 4192, 4204, 4266, 4278, 4289, 4301,
4323, 4335, 4346, 4358, 4405, 4417, 4428, 4440, 4462, 4474, 4485, 4497, 4253,
4265, 4276, 4288, 4310, 4322, 4333, 4345, 4392, 4404, 4415, 4427, 4449, 4461,
4472, 4484, 4546, 4558, 4569, 4581, 4603, 4615, 4626, 4638, 4685, 4697, 4708,
4720, 4742, 4754, 4765, 4777, 4848, 4860, 4871, 4883, 4905, 4917, 4928, 4940,
4987, 4999, 5010, 5022, 5044, 5056, 5067, 5079, 5141, 5153, 5164, 5176, 5198,
5210, 5221, 5233, 5280, 5292, 5303, 5315, 5337, 5349, 5360, 5372, 4988, 5000,
5011, 5023, 5045, 5057, 5068, 5080, 5127, 5139, 5150, 5162, 5184, 5196, 5207,
5219, 5281, 5293, 5304, 5316, 5338, 5350, 5361, 5373, 5420, 5432, 5443, 5455,
5477, 5489, 5500, 5512, 5583, 5595, 5606, 5618, 5640, 5652, 5663, 5675, 5722,
5734, 5745, 5757, 5779, 5791, 5802, 5814, 5876, 5888, 5899, 5911, 5933, 5945,
5956, 5968, 6015, 6027, 6038, 6050, 6072, 6084, 6095, 6107, 5863, 5875, 5886,
5898, 5920, 5932, 5943, 5955, 6002, 6014, 6025, 6037, 6059, 6071, 6082, 6094,
6156, 6168, 6179, 6191, 6213, 6225, 6236, 6248, 6295, 6307, 6318, 6330, 6352,
6364, 6375, 6387, 6458, 6470, 6481, 6493, 6515, 6527, 6538, 6550, 6597, 6609,
6620, 6632, 6654, 6666, 6677, 6689, 6751, 6763, 6774, 6786, 6808, 6820, 6831,
6843, 6890, 6902, 6913, 6925, 6947, 6959, 6970, 6982
};
const int vp10_cat6_high_cost[64] = {
88, 2251, 2727, 4890, 3148, 5311, 5787, 7950, 3666, 5829, 6305,
8468, 6726, 8889, 9365, 11528, 3666, 5829, 6305, 8468, 6726, 8889,
9365, 11528, 7244, 9407, 9883, 12046, 10304, 12467, 12943, 15106, 3666,
5829, 6305, 8468, 6726, 8889, 9365, 11528, 7244, 9407, 9883, 12046,
10304, 12467, 12943, 15106, 7244, 9407, 9883, 12046, 10304, 12467, 12943,
15106, 10822, 12985, 13461, 15624, 13882, 16045, 16521, 18684};
88, 2251, 2727, 4890, 3148, 5311, 5787, 7950, 3666, 5829, 6305,
8468, 6726, 8889, 9365, 11528, 3666, 5829, 6305, 8468, 6726, 8889,
9365, 11528, 7244, 9407, 9883, 12046, 10304, 12467, 12943, 15106, 3666,
5829, 6305, 8468, 6726, 8889, 9365, 11528, 7244, 9407, 9883, 12046,
10304, 12467, 12943, 15106, 7244, 9407, 9883, 12046, 10304, 12467, 12943,
15106, 10822, 12985, 13461, 15624, 13882, 16045, 16521, 18684
};
#if CONFIG_VP9_HIGHBITDEPTH
const int vp10_cat6_high10_high_cost[256] = {
94, 2257, 2733, 4896, 3154, 5317, 5793, 7956, 3672, 5835, 6311,
8474, 6732, 8895, 9371, 11534, 3672, 5835, 6311, 8474, 6732, 8895,
9371, 11534, 7250, 9413, 9889, 12052, 10310, 12473, 12949, 15112, 3672,
5835, 6311, 8474, 6732, 8895, 9371, 11534, 7250, 9413, 9889, 12052,
10310, 12473, 12949, 15112, 7250, 9413, 9889, 12052, 10310, 12473, 12949,
15112, 10828, 12991, 13467, 15630, 13888, 16051, 16527, 18690, 4187, 6350,
6826, 8989, 7247, 9410, 9886, 12049, 7765, 9928, 10404, 12567, 10825,
12988, 13464, 15627, 7765, 9928, 10404, 12567, 10825, 12988, 13464, 15627,
11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 7765, 9928, 10404,
12567, 10825, 12988, 13464, 15627, 11343, 13506, 13982, 16145, 14403, 16566,
17042, 19205, 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 14921,
17084, 17560, 19723, 17981, 20144, 20620, 22783, 4187, 6350, 6826, 8989,
7247, 9410, 9886, 12049, 7765, 9928, 10404, 12567, 10825, 12988, 13464,
15627, 7765, 9928, 10404, 12567, 10825, 12988, 13464, 15627, 11343, 13506,
13982, 16145, 14403, 16566, 17042, 19205, 7765, 9928, 10404, 12567, 10825,
12988, 13464, 15627, 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205,
11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 14921, 17084, 17560,
19723, 17981, 20144, 20620, 22783, 8280, 10443, 10919, 13082, 11340, 13503,
13979, 16142, 11858, 14021, 14497, 16660, 14918, 17081, 17557, 19720, 11858,
14021, 14497, 16660, 14918, 17081, 17557, 19720, 15436, 17599, 18075, 20238,
18496, 20659, 21135, 23298, 11858, 14021, 14497, 16660, 14918, 17081, 17557,
19720, 15436, 17599, 18075, 20238, 18496, 20659, 21135, 23298, 15436, 17599,
18075, 20238, 18496, 20659, 21135, 23298, 19014, 21177, 21653, 23816, 22074,
24237, 24713, 26876};
94, 2257, 2733, 4896, 3154, 5317, 5793, 7956, 3672, 5835, 6311,
8474, 6732, 8895, 9371, 11534, 3672, 5835, 6311, 8474, 6732, 8895,
9371, 11534, 7250, 9413, 9889, 12052, 10310, 12473, 12949, 15112, 3672,
5835, 6311, 8474, 6732, 8895, 9371, 11534, 7250, 9413, 9889, 12052,
10310, 12473, 12949, 15112, 7250, 9413, 9889, 12052, 10310, 12473, 12949,
15112, 10828, 12991, 13467, 15630, 13888, 16051, 16527, 18690, 4187, 6350,
6826, 8989, 7247, 9410, 9886, 12049, 7765, 9928, 10404, 12567, 10825,
12988, 13464, 15627, 7765, 9928, 10404, 12567, 10825, 12988, 13464, 15627,
11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 7765, 9928, 10404,
12567, 10825, 12988, 13464, 15627, 11343, 13506, 13982, 16145, 14403, 16566,
17042, 19205, 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 14921,
17084, 17560, 19723, 17981, 20144, 20620, 22783, 4187, 6350, 6826, 8989,
7247, 9410, 9886, 12049, 7765, 9928, 10404, 12567, 10825, 12988, 13464,
15627, 7765, 9928, 10404, 12567, 10825, 12988, 13464, 15627, 11343, 13506,
13982, 16145, 14403, 16566, 17042, 19205, 7765, 9928, 10404, 12567, 10825,
12988, 13464, 15627, 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205,
11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 14921, 17084, 17560,
19723, 17981, 20144, 20620, 22783, 8280, 10443, 10919, 13082, 11340, 13503,
13979, 16142, 11858, 14021, 14497, 16660, 14918, 17081, 17557, 19720, 11858,
14021, 14497, 16660, 14918, 17081, 17557, 19720, 15436, 17599, 18075, 20238,
18496, 20659, 21135, 23298, 11858, 14021, 14497, 16660, 14918, 17081, 17557,
19720, 15436, 17599, 18075, 20238, 18496, 20659, 21135, 23298, 15436, 17599,
18075, 20238, 18496, 20659, 21135, 23298, 19014, 21177, 21653, 23816, 22074,
24237, 24713, 26876
};
const int vp10_cat6_high12_high_cost[1024] = {
100, 2263, 2739, 4902, 3160, 5323, 5799, 7962, 3678, 5841, 6317,
8480, 6738, 8901, 9377, 11540, 3678, 5841, 6317, 8480, 6738, 8901,
9377, 11540, 7256, 9419, 9895, 12058, 10316, 12479, 12955, 15118, 3678,
5841, 6317, 8480, 6738, 8901, 9377, 11540, 7256, 9419, 9895, 12058,
10316, 12479, 12955, 15118, 7256, 9419, 9895, 12058, 10316, 12479, 12955,
15118, 10834, 12997, 13473, 15636, 13894, 16057, 16533, 18696, 4193, 6356,
6832, 8995, 7253, 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831,
12994, 13470, 15633, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633,
11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410,
12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572,
17048, 19211, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927,
17090, 17566, 19729, 17987, 20150, 20626, 22789, 4193, 6356, 6832, 8995,
7253, 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831, 12994, 13470,
15633, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512,
13988, 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410, 12573, 10831,
12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211,
11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927, 17090, 17566,
19729, 17987, 20150, 20626, 22789, 8286, 10449, 10925, 13088, 11346, 13509,
13985, 16148, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864,
14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244,
18502, 20665, 21141, 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563,
19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605,
18081, 20244, 18502, 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080,
24243, 24719, 26882, 4193, 6356, 6832, 8995, 7253, 9416, 9892, 12055,
7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 7771, 9934, 10410,
12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572,
17048, 19211, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349,
13512, 13988, 16151, 14409, 16572, 17048, 19211, 11349, 13512, 13988, 16151,
14409, 16572, 17048, 19211, 14927, 17090, 17566, 19729, 17987, 20150, 20626,
22789, 8286, 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027,
14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924,
17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304,
11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081,
20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665,
21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, 8286,
10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, 14503, 16666,
14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, 17087, 17563,
19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 11864, 14027,
14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502,
20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304,
19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, 12379, 14542, 15018,
17181, 15439, 17602, 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180,
21656, 23819, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535,
21698, 22174, 24337, 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759,
19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234,
27397, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276,
25752, 27915, 26173, 28336, 28812, 30975, 4193, 6356, 6832, 8995, 7253,
9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633,
7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988,
16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410, 12573, 10831, 12994,
13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 11349,
13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927, 17090, 17566, 19729,
17987, 20150, 20626, 22789, 8286, 10449, 10925, 13088, 11346, 13509, 13985,
16148, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027,
14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502,
20665, 21141, 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726,
15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081,
20244, 18502, 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243,
24719, 26882, 8286, 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864,
14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666,
14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141,
23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605,
18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502,
20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882,
12379, 14542, 15018, 17181, 15439, 17602, 18078, 20241, 15957, 18120, 18596,
20759, 19017, 21180, 21656, 23819, 15957, 18120, 18596, 20759, 19017, 21180,
21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 15957,
18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337,
22595, 24758, 25234, 27397, 19535, 21698, 22174, 24337, 22595, 24758, 25234,
27397, 23113, 25276, 25752, 27915, 26173, 28336, 28812, 30975, 8286, 10449,
10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, 14503, 16666, 14924,
17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726,
15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 11864, 14027, 14503,
16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665,
21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 19020,
21183, 21659, 23822, 22080, 24243, 24719, 26882, 12379, 14542, 15018, 17181,
15439, 17602, 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180, 21656,
23819, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698,
22174, 24337, 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759, 19017,
21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397,
19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276, 25752,
27915, 26173, 28336, 28812, 30975, 12379, 14542, 15018, 17181, 15439, 17602,
18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 15957,
18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337,
22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759, 19017, 21180, 21656,
23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 19535, 21698,
22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276, 25752, 27915, 26173,
28336, 28812, 30975, 16472, 18635, 19111, 21274, 19532, 21695, 22171, 24334,
20050, 22213, 22689, 24852, 23110, 25273, 25749, 27912, 20050, 22213, 22689,
24852, 23110, 25273, 25749, 27912, 23628, 25791, 26267, 28430, 26688, 28851,
29327, 31490, 20050, 22213, 22689, 24852, 23110, 25273, 25749, 27912, 23628,
25791, 26267, 28430, 26688, 28851, 29327, 31490, 23628, 25791, 26267, 28430,
26688, 28851, 29327, 31490, 27206, 29369, 29845, 32008, 30266, 32429, 32905,
35068};
100, 2263, 2739, 4902, 3160, 5323, 5799, 7962, 3678, 5841, 6317,
8480, 6738, 8901, 9377, 11540, 3678, 5841, 6317, 8480, 6738, 8901,
9377, 11540, 7256, 9419, 9895, 12058, 10316, 12479, 12955, 15118, 3678,
5841, 6317, 8480, 6738, 8901, 9377, 11540, 7256, 9419, 9895, 12058,
10316, 12479, 12955, 15118, 7256, 9419, 9895, 12058, 10316, 12479, 12955,
15118, 10834, 12997, 13473, 15636, 13894, 16057, 16533, 18696, 4193, 6356,
6832, 8995, 7253, 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831,
12994, 13470, 15633, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633,
11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410,
12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572,
17048, 19211, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927,
17090, 17566, 19729, 17987, 20150, 20626, 22789, 4193, 6356, 6832, 8995,
7253, 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831, 12994, 13470,
15633, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512,
13988, 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410, 12573, 10831,
12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211,
11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927, 17090, 17566,
19729, 17987, 20150, 20626, 22789, 8286, 10449, 10925, 13088, 11346, 13509,
13985, 16148, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864,
14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244,
18502, 20665, 21141, 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563,
19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605,
18081, 20244, 18502, 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080,
24243, 24719, 26882, 4193, 6356, 6832, 8995, 7253, 9416, 9892, 12055,
7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 7771, 9934, 10410,
12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572,
17048, 19211, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349,
13512, 13988, 16151, 14409, 16572, 17048, 19211, 11349, 13512, 13988, 16151,
14409, 16572, 17048, 19211, 14927, 17090, 17566, 19729, 17987, 20150, 20626,
22789, 8286, 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027,
14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924,
17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304,
11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081,
20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665,
21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, 8286,
10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, 14503, 16666,
14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, 17087, 17563,
19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 11864, 14027,
14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502,
20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304,
19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, 12379, 14542, 15018,
17181, 15439, 17602, 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180,
21656, 23819, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535,
21698, 22174, 24337, 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759,
19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234,
27397, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276,
25752, 27915, 26173, 28336, 28812, 30975, 4193, 6356, 6832, 8995, 7253,
9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633,
7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988,
16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410, 12573, 10831, 12994,
13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 11349,
13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927, 17090, 17566, 19729,
17987, 20150, 20626, 22789, 8286, 10449, 10925, 13088, 11346, 13509, 13985,
16148, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027,
14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502,
20665, 21141, 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726,
15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081,
20244, 18502, 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243,
24719, 26882, 8286, 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864,
14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666,
14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141,
23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605,
18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502,
20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882,
12379, 14542, 15018, 17181, 15439, 17602, 18078, 20241, 15957, 18120, 18596,
20759, 19017, 21180, 21656, 23819, 15957, 18120, 18596, 20759, 19017, 21180,
21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 15957,
18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337,
22595, 24758, 25234, 27397, 19535, 21698, 22174, 24337, 22595, 24758, 25234,
27397, 23113, 25276, 25752, 27915, 26173, 28336, 28812, 30975, 8286, 10449,
10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, 14503, 16666, 14924,
17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726,
15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 11864, 14027, 14503,
16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665,
21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 19020,
21183, 21659, 23822, 22080, 24243, 24719, 26882, 12379, 14542, 15018, 17181,
15439, 17602, 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180, 21656,
23819, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698,
22174, 24337, 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759, 19017,
21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397,
19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276, 25752,
27915, 26173, 28336, 28812, 30975, 12379, 14542, 15018, 17181, 15439, 17602,
18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 15957,
18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337,
22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759, 19017, 21180, 21656,
23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 19535, 21698,
22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276, 25752, 27915, 26173,
28336, 28812, 30975, 16472, 18635, 19111, 21274, 19532, 21695, 22171, 24334,
20050, 22213, 22689, 24852, 23110, 25273, 25749, 27912, 20050, 22213, 22689,
24852, 23110, 25273, 25749, 27912, 23628, 25791, 26267, 28430, 26688, 28851,
29327, 31490, 20050, 22213, 22689, 24852, 23110, 25273, 25749, 27912, 23628,
25791, 26267, 28430, 26688, 28851, 29327, 31490, 23628, 25791, 26267, 28430,
26688, 28851, 29327, 31490, 27206, 29369, 29845, 32008, 30266, 32429, 32905,
35068
};
#endif
#if CONFIG_VP9_HIGHBITDEPTH
static const vpx_tree_index cat1_high10[2] = {0, 0};
static const vpx_tree_index cat2_high10[4] = {2, 2, 0, 0};
static const vpx_tree_index cat3_high10[6] = {2, 2, 4, 4, 0, 0};
static const vpx_tree_index cat4_high10[8] = {2, 2, 4, 4, 6, 6, 0, 0};
static const vpx_tree_index cat5_high10[10] = {2, 2, 4, 4, 6, 6, 8, 8, 0, 0};
static const vpx_tree_index cat6_high10[32] = {2, 2, 4, 4, 6, 6, 8, 8, 10, 10,
12, 12, 14, 14, 16, 16, 18, 18, 20, 20, 22, 22, 24, 24, 26, 26, 28, 28,
30, 30, 0, 0};
static const vpx_tree_index cat1_high12[2] = {0, 0};
static const vpx_tree_index cat2_high12[4] = {2, 2, 0, 0};
static const vpx_tree_index cat3_high12[6] = {2, 2, 4, 4, 0, 0};
static const vpx_tree_index cat4_high12[8] = {2, 2, 4, 4, 6, 6, 0, 0};
static const vpx_tree_index cat5_high12[10] = {2, 2, 4, 4, 6, 6, 8, 8, 0, 0};
static const vpx_tree_index cat6_high12[36] = {2, 2, 4, 4, 6, 6, 8, 8, 10, 10,
12, 12, 14, 14, 16, 16, 18, 18, 20, 20, 22, 22, 24, 24, 26, 26, 28, 28,
30, 30, 32, 32, 34, 34, 0, 0};
static const vpx_tree_index cat1_high10[2] = { 0, 0 };
static const vpx_tree_index cat2_high10[4] = { 2, 2, 0, 0 };
static const vpx_tree_index cat3_high10[6] = { 2, 2, 4, 4, 0, 0 };
static const vpx_tree_index cat4_high10[8] = { 2, 2, 4, 4, 6, 6, 0, 0 };
static const vpx_tree_index cat5_high10[10] = { 2, 2, 4, 4, 6, 6, 8, 8, 0, 0 };
static const vpx_tree_index cat6_high10[32] = { 2, 2, 4, 4, 6, 6, 8, 8,
10, 10, 12, 12, 14, 14, 16, 16,
18, 18, 20, 20, 22, 22, 24, 24,
26, 26, 28, 28, 30, 30, 0, 0 };
static const vpx_tree_index cat1_high12[2] = { 0, 0 };
static const vpx_tree_index cat2_high12[4] = { 2, 2, 0, 0 };
static const vpx_tree_index cat3_high12[6] = { 2, 2, 4, 4, 0, 0 };
static const vpx_tree_index cat4_high12[8] = { 2, 2, 4, 4, 6, 6, 0, 0 };
static const vpx_tree_index cat5_high12[10] = { 2, 2, 4, 4, 6, 6, 8, 8, 0, 0 };
static const vpx_tree_index cat6_high12[36] = {
2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14, 16, 16, 18, 18,
20, 20, 22, 22, 24, 24, 26, 26, 28, 28, 30, 30, 32, 32, 34, 34, 0, 0
};
#endif
const vp10_extra_bit vp10_extra_bits[ENTROPY_TOKENS] = {
{0, 0, 0, 0, zero_cost}, // ZERO_TOKEN
{0, 0, 0, 1, sign_cost}, // ONE_TOKEN
{0, 0, 0, 2, sign_cost}, // TWO_TOKEN
{0, 0, 0, 3, sign_cost}, // THREE_TOKEN
{0, 0, 0, 4, sign_cost}, // FOUR_TOKEN
{cat1, vp10_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost}, // CATEGORY1_TOKEN
{cat2, vp10_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost}, // CATEGORY2_TOKEN
{cat3, vp10_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost}, // CATEGORY3_TOKEN
{cat4, vp10_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost}, // CATEGORY4_TOKEN
{cat5, vp10_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost}, // CATEGORY5_TOKEN
{cat6, vp10_cat6_prob, 14, CAT6_MIN_VAL, 0}, // CATEGORY6_TOKEN
{0, 0, 0, 0, zero_cost} // EOB_TOKEN
{ 0, 0, 0, 0, zero_cost }, // ZERO_TOKEN
{ 0, 0, 0, 1, sign_cost }, // ONE_TOKEN
{ 0, 0, 0, 2, sign_cost }, // TWO_TOKEN
{ 0, 0, 0, 3, sign_cost }, // THREE_TOKEN
{ 0, 0, 0, 4, sign_cost }, // FOUR_TOKEN
{ cat1, vp10_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost }, // CATEGORY1_TOKEN
{ cat2, vp10_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost }, // CATEGORY2_TOKEN
{ cat3, vp10_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost }, // CATEGORY3_TOKEN
{ cat4, vp10_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost }, // CATEGORY4_TOKEN
{ cat5, vp10_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost }, // CATEGORY5_TOKEN
{ cat6, vp10_cat6_prob, 14, CAT6_MIN_VAL, 0 }, // CATEGORY6_TOKEN
{ 0, 0, 0, 0, zero_cost } // EOB_TOKEN
};
#if CONFIG_VP9_HIGHBITDEPTH
const vp10_extra_bit vp10_extra_bits_high10[ENTROPY_TOKENS] = {
{0, 0, 0, 0, zero_cost}, // ZERO
{0, 0, 0, 1, sign_cost}, // ONE
{0, 0, 0, 2, sign_cost}, // TWO
{0, 0, 0, 3, sign_cost}, // THREE
{0, 0, 0, 4, sign_cost}, // FOUR
{cat1_high10, vp10_cat1_prob_high10, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1
{cat2_high10, vp10_cat2_prob_high10, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2
{cat3_high10, vp10_cat3_prob_high10, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3
{cat4_high10, vp10_cat4_prob_high10, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4
{cat5_high10, vp10_cat5_prob_high10, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5
{cat6_high10, vp10_cat6_prob_high10, 16, CAT6_MIN_VAL, 0}, // CAT6
{0, 0, 0, 0, zero_cost} // EOB
{ 0, 0, 0, 0, zero_cost }, // ZERO
{ 0, 0, 0, 1, sign_cost }, // ONE
{ 0, 0, 0, 2, sign_cost }, // TWO
{ 0, 0, 0, 3, sign_cost }, // THREE
{ 0, 0, 0, 4, sign_cost }, // FOUR
{ cat1_high10, vp10_cat1_prob_high10, 1, CAT1_MIN_VAL, cat1_cost }, // CAT1
{ cat2_high10, vp10_cat2_prob_high10, 2, CAT2_MIN_VAL, cat2_cost }, // CAT2
{ cat3_high10, vp10_cat3_prob_high10, 3, CAT3_MIN_VAL, cat3_cost }, // CAT3
{ cat4_high10, vp10_cat4_prob_high10, 4, CAT4_MIN_VAL, cat4_cost }, // CAT4
{ cat5_high10, vp10_cat5_prob_high10, 5, CAT5_MIN_VAL, cat5_cost }, // CAT5
{ cat6_high10, vp10_cat6_prob_high10, 16, CAT6_MIN_VAL, 0 }, // CAT6
{ 0, 0, 0, 0, zero_cost } // EOB
};
const vp10_extra_bit vp10_extra_bits_high12[ENTROPY_TOKENS] = {
{0, 0, 0, 0, zero_cost}, // ZERO
{0, 0, 0, 1, sign_cost}, // ONE
{0, 0, 0, 2, sign_cost}, // TWO
{0, 0, 0, 3, sign_cost}, // THREE
{0, 0, 0, 4, sign_cost}, // FOUR
{cat1_high12, vp10_cat1_prob_high12, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1
{cat2_high12, vp10_cat2_prob_high12, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2
{cat3_high12, vp10_cat3_prob_high12, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3
{cat4_high12, vp10_cat4_prob_high12, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4
{cat5_high12, vp10_cat5_prob_high12, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5
{cat6_high12, vp10_cat6_prob_high12, 18, CAT6_MIN_VAL, 0}, // CAT6
{0, 0, 0, 0, zero_cost} // EOB
{ 0, 0, 0, 0, zero_cost }, // ZERO
{ 0, 0, 0, 1, sign_cost }, // ONE
{ 0, 0, 0, 2, sign_cost }, // TWO
{ 0, 0, 0, 3, sign_cost }, // THREE
{ 0, 0, 0, 4, sign_cost }, // FOUR
{ cat1_high12, vp10_cat1_prob_high12, 1, CAT1_MIN_VAL, cat1_cost }, // CAT1
{ cat2_high12, vp10_cat2_prob_high12, 2, CAT2_MIN_VAL, cat2_cost }, // CAT2
{ cat3_high12, vp10_cat3_prob_high12, 3, CAT3_MIN_VAL, cat3_cost }, // CAT3
{ cat4_high12, vp10_cat4_prob_high12, 4, CAT4_MIN_VAL, cat4_cost }, // CAT4
{ cat5_high12, vp10_cat5_prob_high12, 5, CAT5_MIN_VAL, cat5_cost }, // CAT5
{ cat6_high12, vp10_cat6_prob_high12, 18, CAT6_MIN_VAL, 0 }, // CAT6
{ 0, 0, 0, 0, zero_cost } // EOB
};
#endif
#if !CONFIG_ANS
const struct vp10_token vp10_coef_encodings[ENTROPY_TOKENS] = {
{2, 2}, {6, 3}, {28, 5}, {58, 6}, {59, 6}, {60, 6}, {61, 6}, {124, 7},
{125, 7}, {126, 7}, {127, 7}, {0, 1}
{ 2, 2 }, { 6, 3 }, { 28, 5 }, { 58, 6 }, { 59, 6 }, { 60, 6 },
{ 61, 6 }, { 124, 7 }, { 125, 7 }, { 126, 7 }, { 127, 7 }, { 0, 1 }
};
#endif // !CONFIG_ANS
@ -348,18 +347,17 @@ struct tokenize_b_args {
TOKENEXTRA **tp;
};
static void set_entropy_context_b(int plane, int block,
int blk_row, int blk_col,
BLOCK_SIZE plane_bsize,
static void set_entropy_context_b(int plane, int block, int blk_row,
int blk_col, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct tokenize_b_args* const args = arg;
struct tokenize_b_args *const args = arg;
ThreadData *const td = args->td;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *p = &x->plane[plane];
struct macroblockd_plane *pd = &xd->plane[plane];
vp10_set_contexts(xd, pd, plane_bsize, tx_size, p->eobs[block] > 0,
blk_col, blk_row);
vp10_set_contexts(xd, pd, plane_bsize, tx_size, p->eobs[block] > 0, blk_col,
blk_row);
}
static INLINE void add_token(TOKENEXTRA **t, const vpx_prob *context_tree,
@ -381,8 +379,7 @@ static INLINE void add_token(TOKENEXTRA **t, const vpx_prob *context_tree,
static INLINE void add_token_no_extra(TOKENEXTRA **t,
const vpx_prob *context_tree,
uint8_t token,
uint8_t skip_eob_node,
uint8_t token, uint8_t skip_eob_node,
unsigned int *counts) {
(*t)->token = token;
(*t)->context_tree = context_tree;
@ -397,9 +394,8 @@ static INLINE int get_tx_eob(const struct segmentation *seg, int segment_id,
return segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
}
void vp10_tokenize_palette_sb(struct ThreadData *const td,
BLOCK_SIZE bsize, int plane,
TOKENEXTRA **t) {
void vp10_tokenize_palette_sb(struct ThreadData *const td, BLOCK_SIZE bsize,
int plane, TOKENEXTRA **t) {
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
@ -409,17 +405,17 @@ void vp10_tokenize_palette_sb(struct ThreadData *const td,
int i, j, k;
int color_new_idx = -1, color_ctx, color_order[PALETTE_MAX_SIZE];
const int rows = (4 * num_4x4_blocks_high_lookup[bsize]) >>
(xd->plane[plane != 0].subsampling_y);
(xd->plane[plane != 0].subsampling_y);
const int cols = (4 * num_4x4_blocks_wide_lookup[bsize]) >>
(xd->plane[plane != 0].subsampling_x);
const vpx_prob (* const probs)[PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1] =
plane == 0 ? vp10_default_palette_y_color_prob :
vp10_default_palette_uv_color_prob;
(xd->plane[plane != 0].subsampling_x);
const vpx_prob (*const probs)[PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1] =
plane == 0 ? vp10_default_palette_y_color_prob
: vp10_default_palette_uv_color_prob;
for (i = 0; i < rows; ++i) {
for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
color_ctx = vp10_get_palette_color_context(color_map, cols, i, j, n,
color_order);
color_ctx =
vp10_get_palette_color_context(color_map, cols, i, j, n, color_order);
for (k = 0; k < n; ++k)
if (color_map[i * cols + j] == color_order[k]) {
color_new_idx = k;
@ -435,9 +431,8 @@ void vp10_tokenize_palette_sb(struct ThreadData *const td,
}
static void tokenize_b(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct tokenize_b_args* const args = arg;
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
struct tokenize_b_args *const args = arg;
VP10_COMP *cpi = args->cpi;
ThreadData *const td = args->td;
MACROBLOCK *const x = &td->mb;
@ -449,7 +444,7 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col,
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
int pt; /* near block/prev token context index */
int c;
TOKENEXTRA *t = *tp; /* store tokens starting here */
TOKENEXTRA *t = *tp; /* store tokens starting here */
int eob = p->eobs[block];
const PLANE_TYPE type = pd->plane_type;
const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
@ -465,7 +460,7 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col,
unsigned int (*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] =
td->rd_counts.coef_counts[txsize_sqr_map[tx_size]][type][ref];
#if CONFIG_ENTROPY
vpx_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
vpx_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
cpi->subframe_stats.coef_probs_buf[cpi->common.coef_probs_update_idx]
[txsize_sqr_map[tx_size]][type][ref];
#else
@ -497,7 +492,7 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col,
add_token(&t, coef_probs[band[c]][pt],
#if CONFIG_ANS
(const rans_dec_lut*)&coef_cdfs[band[c]][pt],
(const rans_dec_lut *)&coef_cdfs[band[c]][pt],
#endif // CONFIG_ANS
extra, (uint8_t)token, (uint8_t)skip_eob, counts[band[c]][pt]);
@ -522,8 +517,7 @@ struct is_skippable_args {
int *skippable;
};
static void is_skippable(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
void *argv) {
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *argv) {
struct is_skippable_args *args = argv;
(void)plane;
(void)plane_bsize;
@ -537,7 +531,7 @@ static void is_skippable(int plane, int block, int blk_row, int blk_col,
// vp10_foreach_transform_block() and simplify is_skippable().
int vp10_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
int result = 1;
struct is_skippable_args args = {x->plane[plane].eobs, &result};
struct is_skippable_args args = { x->plane[plane].eobs, &result };
vp10_foreach_transformed_block_in_plane(&x->e_mbd, bsize, plane, is_skippable,
&args);
return result;
@ -548,27 +542,26 @@ static void has_high_freq_coeff(int plane, int block, int blk_row, int blk_col,
void *argv) {
struct is_skippable_args *args = argv;
int eobs = (tx_size == TX_4X4) ? 3 : 10;
(void) plane;
(void) plane_bsize;
(void) blk_row;
(void) blk_col;
(void)plane;
(void)plane_bsize;
(void)blk_row;
(void)blk_col;
*(args->skippable) |= (args->eobs[block] > eobs);
}
int vp10_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
int result = 0;
struct is_skippable_args args = {x->plane[plane].eobs, &result};
struct is_skippable_args args = { x->plane[plane].eobs, &result };
vp10_foreach_transformed_block_in_plane(&x->e_mbd, bsize, plane,
has_high_freq_coeff, &args);
return result;
}
#if CONFIG_VAR_TX
void tokenize_tx(ThreadData *td, TOKENEXTRA **t,
int dry_run, TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
int blk_row, int blk_col, int block, int plane,
void *arg) {
void tokenize_tx(ThreadData *td, TOKENEXTRA **t, int dry_run, TX_SIZE tx_size,
BLOCK_SIZE plane_bsize, int blk_row, int blk_col, int block,
int plane, void *arg) {
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
@ -576,9 +569,10 @@ void tokenize_tx(ThreadData *td, TOKENEXTRA **t,
const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
const int tx_row = blk_row >> (1 - pd->subsampling_y);
const int tx_col = blk_col >> (1 - pd->subsampling_x);
const TX_SIZE plane_tx_size = plane ?
get_uv_tx_size_impl(mbmi->inter_tx_size[tx_row][tx_col], bsize, 0, 0) :
mbmi->inter_tx_size[tx_row][tx_col];
const TX_SIZE plane_tx_size =
plane ? get_uv_tx_size_impl(mbmi->inter_tx_size[tx_row][tx_col], bsize, 0,
0)
: mbmi->inter_tx_size[tx_row][tx_col];
int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
@ -590,8 +584,7 @@ void tokenize_tx(ThreadData *td, TOKENEXTRA **t,
if (xd->mb_to_right_edge < 0)
max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
return;
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
if (tx_size == plane_tx_size) {
const struct macroblockd_plane *const pd = &xd->plane[plane];
@ -599,8 +592,8 @@ void tokenize_tx(ThreadData *td, TOKENEXTRA **t,
if (!dry_run)
tokenize_b(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
else
set_entropy_context_b(plane, block, blk_row, blk_col,
plane_bsize, tx_size, arg);
set_entropy_context_b(plane, block, blk_row, blk_col, plane_bsize,
tx_size, arg);
} else {
int bsl = b_width_log2_lookup[bsize];
int i;
@ -613,11 +606,10 @@ void tokenize_tx(ThreadData *td, TOKENEXTRA **t,
const int offsetc = blk_col + ((i & 0x01) << bsl);
int step = num_4x4_blocks_txsize_lookup[tx_size - 1];
if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide)
continue;
if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
tokenize_tx(td, t, dry_run, tx_size - 1, plane_bsize,
offsetr, offsetc, block + i * step, plane, arg);
tokenize_tx(td, t, dry_run, tx_size - 1, plane_bsize, offsetr, offsetc,
block + i * step, plane, arg);
}
}
}
@ -631,19 +623,16 @@ void vp10_tokenize_sb_inter(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
TOKENEXTRA *t_backup = *t;
const int ctx = vp10_get_skip_context(xd);
const int skip_inc = !segfeature_active(&cm->seg, mbmi->segment_id,
SEG_LVL_SKIP);
struct tokenize_b_args arg = {cpi, td, t};
const int skip_inc =
!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP);
struct tokenize_b_args arg = { cpi, td, t };
int plane;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
if (mbmi->skip) {
if (!dry_run)
td->counts->skip[ctx][1] += skip_inc;
if (!dry_run) td->counts->skip[ctx][1] += skip_inc;
reset_skip_context(xd, bsize);
if (dry_run)
*t = t_backup;
if (dry_run) *t = t_backup;
return;
}
@ -665,8 +654,8 @@ void vp10_tokenize_sb_inter(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
int step = num_4x4_blocks_txsize_lookup[max_tx_size];
for (idy = 0; idy < mi_height; idy += bh) {
for (idx = 0; idx < mi_width; idx += bh) {
tokenize_tx(td, t, dry_run, max_tx_size, plane_bsize, idy, idx,
block, plane, &arg);
tokenize_tx(td, t, dry_run, max_tx_size, plane_bsize, idy, idx, block,
plane, &arg);
block += step;
}
}
@ -680,18 +669,17 @@ void vp10_tokenize_sb_inter(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
#endif // CONFIG_VAR_TX
void vp10_tokenize_sb(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
int dry_run, BLOCK_SIZE bsize) {
int dry_run, BLOCK_SIZE bsize) {
VP10_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const int ctx = vp10_get_skip_context(xd);
const int skip_inc = !segfeature_active(&cm->seg, mbmi->segment_id,
SEG_LVL_SKIP);
struct tokenize_b_args arg = {cpi, td, t};
const int skip_inc =
!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP);
struct tokenize_b_args arg = { cpi, td, t };
if (mbmi->skip) {
if (!dry_run)
td->counts->skip[ctx][1] += skip_inc;
if (!dry_run) td->counts->skip[ctx][1] += skip_inc;
reset_skip_context(xd, bsize);
return;
}
@ -720,15 +708,13 @@ void vp10_tokenize_sb_supertx(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
TOKENEXTRA *t_backup = *t;
const int ctx = vp10_get_skip_context(xd);
const int skip_inc = !segfeature_active(&cm->seg, mbmi->segment_id_supertx,
SEG_LVL_SKIP);
struct tokenize_b_args arg = {cpi, td, t};
const int skip_inc =
!segfeature_active(&cm->seg, mbmi->segment_id_supertx, SEG_LVL_SKIP);
struct tokenize_b_args arg = { cpi, td, t };
if (mbmi->skip) {
if (!dry_run)
td->counts->skip[ctx][1] += skip_inc;
if (!dry_run) td->counts->skip[ctx][1] += skip_inc;
reset_skip_context(xd, bsize);
if (dry_run)
*t = t_backup;
if (dry_run) *t = t_backup;
return;
}

Просмотреть файл

@ -20,15 +20,14 @@
extern "C" {
#endif
#define EOSB_TOKEN 127 // Not signalled, encoder only
#define EOSB_TOKEN 127 // Not signalled, encoder only
#if CONFIG_VP9_HIGHBITDEPTH
typedef int32_t EXTRABIT;
typedef int32_t EXTRABIT;
#else
typedef int16_t EXTRABIT;
typedef int16_t EXTRABIT;
#endif
typedef struct {
int16_t token;
EXTRABIT extra;
@ -62,11 +61,10 @@ void vp10_tokenize_sb_inter(struct VP10_COMP *cpi, struct ThreadData *td,
BLOCK_SIZE bsize);
#endif
void vp10_tokenize_palette_sb(struct ThreadData *const td,
BLOCK_SIZE bsize, int plane,
TOKENEXTRA **t);
void vp10_tokenize_palette_sb(struct ThreadData *const td, BLOCK_SIZE bsize,
int plane, TOKENEXTRA **t);
void vp10_tokenize_sb(struct VP10_COMP *cpi, struct ThreadData *td,
TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize);
TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize);
#if CONFIG_SUPERTX
void vp10_tokenize_sb_supertx(struct VP10_COMP *cpi, struct ThreadData *td,
TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize);
@ -85,27 +83,28 @@ extern const int vp10_cat6_high_cost[64];
extern const int vp10_cat6_high10_high_cost[256];
extern const int vp10_cat6_high12_high_cost[1024];
static INLINE int vp10_get_cost(int16_t token, EXTRABIT extrabits,
const int *cat6_high_table) {
const int *cat6_high_table) {
if (token != CATEGORY6_TOKEN)
return vp10_extra_bits[token].cost[extrabits >> 1];
return vp10_cat6_low_cost[(extrabits >> 1) & 0xff]
+ cat6_high_table[extrabits >> 9];
return vp10_cat6_low_cost[(extrabits >> 1) & 0xff] +
cat6_high_table[extrabits >> 9];
}
#if CONFIG_VP9_HIGHBITDEPTH
static INLINE const int* vp10_get_high_cost_table(int bit_depth) {
static INLINE const int *vp10_get_high_cost_table(int bit_depth) {
return bit_depth == 8 ? vp10_cat6_high_cost
: (bit_depth == 10 ? vp10_cat6_high10_high_cost :
vp10_cat6_high12_high_cost);
: (bit_depth == 10 ? vp10_cat6_high10_high_cost
: vp10_cat6_high12_high_cost);
}
#else
static INLINE const int* vp10_get_high_cost_table(int bit_depth) {
(void) bit_depth;
static INLINE const int *vp10_get_high_cost_table(int bit_depth) {
(void)bit_depth;
return vp10_cat6_high_cost;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
static INLINE void vp10_get_token_extra(int v, int16_t *token, EXTRABIT *extra) {
static INLINE void vp10_get_token_extra(int v, int16_t *token,
EXTRABIT *extra) {
if (v >= CAT6_MIN_VAL || v <= -CAT6_MIN_VAL) {
*token = CATEGORY6_TOKEN;
if (v >= CAT6_MIN_VAL)
@ -118,19 +117,18 @@ static INLINE void vp10_get_token_extra(int v, int16_t *token, EXTRABIT *extra)
*extra = vp10_dct_cat_lt_10_value_tokens[v].extra;
}
static INLINE int16_t vp10_get_token(int v) {
if (v >= CAT6_MIN_VAL || v <= -CAT6_MIN_VAL)
return 10;
if (v >= CAT6_MIN_VAL || v <= -CAT6_MIN_VAL) return 10;
return vp10_dct_cat_lt_10_value_tokens[v].token;
}
static INLINE int vp10_get_token_cost(int v, int16_t *token,
const int *cat6_high_table) {
const int *cat6_high_table) {
if (v >= CAT6_MIN_VAL || v <= -CAT6_MIN_VAL) {
EXTRABIT extrabits;
*token = CATEGORY6_TOKEN;
extrabits = abs(v) - CAT6_MIN_VAL;
return vp10_cat6_low_cost[extrabits & 0xff]
+ cat6_high_table[extrabits >> 8];
return vp10_cat6_low_cost[extrabits & 0xff] +
cat6_high_table[extrabits >> 8];
}
*token = vp10_dct_cat_lt_10_value_tokens[v].token;
return vp10_dct_cat_lt_10_value_cost[v];

Просмотреть файл

@ -27,7 +27,7 @@ static void tree2tok(struct vp10_token *tokens, const vpx_tree_index *tree,
}
void vp10_tokens_from_tree(struct vp10_token *tokens,
const vpx_tree_index *tree) {
const vpx_tree_index *tree) {
tree2tok(tokens, tree, 0, 0, 0);
}
@ -52,7 +52,7 @@ static unsigned int convert_distribution(unsigned int i, vpx_tree tree,
}
void vp10_tree_probs_from_distribution(vpx_tree tree,
unsigned int branch_ct[/* n-1 */][2],
const unsigned int num_events[/* n */]) {
unsigned int branch_ct[/* n-1 */][2],
const unsigned int num_events[/* n */]) {
convert_distribution(0, tree, branch_ct, num_events);
}

Просмотреть файл

@ -26,19 +26,19 @@ extern "C" {
#endif
void vp10_tree_probs_from_distribution(vpx_tree tree,
unsigned int branch_ct[ /* n - 1 */ ][2],
const unsigned int num_events[ /* n */ ]);
unsigned int branch_ct[/* n - 1 */][2],
const unsigned int num_events[/* n */]);
struct vp10_token {
int value;
int len;
};
void vp10_tokens_from_tree(struct vp10_token*, const vpx_tree_index *);
void vp10_tokens_from_tree(struct vp10_token *, const vpx_tree_index *);
static INLINE void vp10_write_tree(tree_writer *w, const vpx_tree_index *tree,
const vpx_prob *probs, int bits, int len,
vpx_tree_index i) {
const vpx_prob *probs, int bits, int len,
vpx_tree_index i) {
do {
const int bit = (bits >> --len) & 1;
tree_bit_write(w, bit, probs[i >> 1]);
@ -47,8 +47,8 @@ static INLINE void vp10_write_tree(tree_writer *w, const vpx_tree_index *tree,
}
static INLINE void vp10_write_token(tree_writer *w, const vpx_tree_index *tree,
const vpx_prob *probs,
const struct vp10_token *token) {
const vpx_prob *probs,
const struct vp10_token *token) {
vp10_write_tree(w, tree, probs, token->value, token->len, 0);
}

Просмотреть файл

@ -11,8 +11,6 @@
#include "vp10/encoder/variance_tree.h"
#include "vp10/encoder/encoder.h"
void vp10_setup_var_tree(struct VP10Common *cm, ThreadData *td) {
int i, j;
#if CONFIG_EXT_PARTITION
@ -27,8 +25,8 @@ void vp10_setup_var_tree(struct VP10Common *cm, ThreadData *td) {
int nodes;
vpx_free(td->var_tree);
CHECK_MEM_ERROR(cm, td->var_tree, vpx_calloc(tree_nodes,
sizeof(*td->var_tree)));
CHECK_MEM_ERROR(cm, td->var_tree,
vpx_calloc(tree_nodes, sizeof(*td->var_tree)));
this_var = &td->var_tree[0];
@ -43,8 +41,7 @@ void vp10_setup_var_tree(struct VP10Common *cm, ThreadData *td) {
for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) {
for (i = 0; i < nodes; ++i, ++index) {
VAR_TREE *const node = &td->var_tree[index];
for (j = 0; j < 4; j++)
node->split[j] = this_var++;
for (j = 0; j < 4; j++) node->split[j] = this_var++;
}
}
@ -53,7 +50,7 @@ void vp10_setup_var_tree(struct VP10Common *cm, ThreadData *td) {
td->var_root[i] = &td->var_tree[tree_nodes - 1];
// Set up the root nodes for the rest of the possible superblock sizes
while (--i >= 0) {
td->var_root[i] = td->var_root[i+1]->split[0];
td->var_root[i] = td->var_root[i + 1]->split[0];
}
}

Просмотреть файл

@ -63,8 +63,10 @@ static INLINE void fill_variance(int64_t s2, int64_t s, int c, var *v) {
v->sum_square_error = s2;
v->sum_error = s;
v->log2_count = c;
v->variance = (int)(256 * (v->sum_square_error -
((v->sum_error * v->sum_error) >> v->log2_count)) >> v->log2_count);
v->variance =
(int)(256 * (v->sum_square_error -
((v->sum_error * v->sum_error) >> v->log2_count)) >>
v->log2_count);
}
static INLINE void sum_2_variances(const var *a, const var *b, var *r) {
@ -74,20 +76,15 @@ static INLINE void sum_2_variances(const var *a, const var *b, var *r) {
}
static INLINE void fill_variance_node(VAR_TREE *vt) {
sum_2_variances(&vt->split[0]->variances.none,
&vt->split[1]->variances.none,
sum_2_variances(&vt->split[0]->variances.none, &vt->split[1]->variances.none,
&vt->variances.horz[0]);
sum_2_variances(&vt->split[2]->variances.none,
&vt->split[3]->variances.none,
sum_2_variances(&vt->split[2]->variances.none, &vt->split[3]->variances.none,
&vt->variances.horz[1]);
sum_2_variances(&vt->split[0]->variances.none,
&vt->split[2]->variances.none,
sum_2_variances(&vt->split[0]->variances.none, &vt->split[2]->variances.none,
&vt->variances.vert[0]);
sum_2_variances(&vt->split[1]->variances.none,
&vt->split[3]->variances.none,
sum_2_variances(&vt->split[1]->variances.none, &vt->split[3]->variances.none,
&vt->variances.vert[1]);
sum_2_variances(&vt->variances.vert[0],
&vt->variances.vert[1],
sum_2_variances(&vt->variances.vert[0], &vt->variances.vert[1],
&vt->variances.none);
}

Просмотреть файл

@ -18,7 +18,7 @@
#include "vp10/common/reconinter.h"
#define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)
#define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)
/**
* Computes SSE of a compound predictor constructed from 2 fundamental
@ -48,17 +48,15 @@
* holds for 8 bit input, and on real input, it should hold practically always,
* as residuals are expected to be small.
*/
uint64_t vp10_wedge_sse_from_residuals_c(const int16_t *r1,
const int16_t *d,
const uint8_t *m,
int N) {
uint64_t vp10_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d,
const uint8_t *m, int N) {
uint64_t csse = 0;
int i;
assert(N % 64 == 0);
for (i = 0 ; i < N ; i++) {
int32_t t = MAX_MASK_VALUE*r1[i] + m[i]*d[i];
for (i = 0; i < N; i++) {
int32_t t = MAX_MASK_VALUE * r1[i] + m[i] * d[i];
t = clamp(t, INT16_MIN, INT16_MAX);
csse += t*t;
csse += t * t;
}
return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS);
}
@ -94,9 +92,7 @@ uint64_t vp10_wedge_sse_from_residuals_c(const int16_t *r1,
* Note that for efficiency, ds is stored on 16 bits. Real input residuals
* being small, this should not cause a noticeable issue.
*/
int vp10_wedge_sign_from_residuals_c(const int16_t *ds,
const uint8_t *m,
int N,
int vp10_wedge_sign_from_residuals_c(const int16_t *ds, const uint8_t *m, int N,
int64_t limit) {
int64_t acc = 0;
@ -121,15 +117,12 @@ int vp10_wedge_sign_from_residuals_c(const int16_t *ds,
*
* The result is saturated to signed 16 bits.
*/
void vp10_wedge_compute_delta_squares_c(int16_t *d,
const int16_t *a,
const int16_t *b,
int N) {
void vp10_wedge_compute_delta_squares_c(int16_t *d, const int16_t *a,
const int16_t *b, int N) {
int i;
assert(N % 64 == 0);
for (i = 0 ; i < N ; i++)
d[i] = clamp(a[i]*a[i] - b[i]*b[i], INT16_MIN, INT16_MAX);
for (i = 0; i < N; i++)
d[i] = clamp(a[i] * a[i] - b[i] * b[i], INT16_MIN, INT16_MAX);
}

Просмотреть файл

@ -92,8 +92,8 @@ static void fdct4_sse2(__m128i *in) {
const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
__m128i u[4], v[4];
u[0]=_mm_unpacklo_epi16(in[0], in[1]);
u[1]=_mm_unpacklo_epi16(in[3], in[2]);
u[0] = _mm_unpacklo_epi16(in[0], in[1]);
u[1] = _mm_unpacklo_epi16(in[3], in[2]);
v[0] = _mm_add_epi16(u[0], u[1]);
v[1] = _mm_sub_epi16(u[0], u[1]);
@ -200,14 +200,12 @@ static void fidtx4_sse2(__m128i *in) {
}
#endif // CONFIG_EXT_TX
void vp10_fht4x4_sse2(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp10_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
__m128i in[4];
switch (tx_type) {
case DCT_DCT:
vpx_fdct4x4_sse2(input, output, stride);
break;
case DCT_DCT: vpx_fdct4x4_sse2(input, output, stride); break;
case ADST_DCT:
load_buffer_4x4(input, in, stride, 0, 0);
fadst4_sse2(in);
@ -294,20 +292,16 @@ void vp10_fht4x4_sse2(const int16_t *input, tran_low_t *output,
write_buffer_4x4(output, in);
break;
#endif // CONFIG_EXT_TX
default:
assert(0);
default: assert(0);
}
}
void vp10_fdct8x8_quant_sse2(const int16_t *input, int stride,
int16_t* coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t* zbin_ptr,
const int16_t* round_ptr, const int16_t* quant_ptr,
const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr,
int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
uint16_t* eob_ptr,
const int16_t* scan_ptr,
const int16_t* iscan_ptr) {
void vp10_fdct8x8_quant_sse2(
const int16_t *input, int stride, int16_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan_ptr, const int16_t *iscan_ptr) {
__m128i zero;
int pass;
// Constants
@ -324,14 +318,14 @@ void vp10_fdct8x8_quant_sse2(const int16_t *input, int stride,
const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
// Load input
__m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
__m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
__m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
__m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
__m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride));
__m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride));
__m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride));
__m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride));
__m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
__m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
__m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
__m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
__m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride));
__m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride));
__m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride));
__m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride));
__m128i *in[8];
int index = 0;
@ -585,9 +579,9 @@ void vp10_fdct8x8_quant_sse2(const int16_t *input, int stride,
// Setup global values
{
round = _mm_load_si128((const __m128i*)round_ptr);
quant = _mm_load_si128((const __m128i*)quant_ptr);
dequant = _mm_load_si128((const __m128i*)dequant_ptr);
round = _mm_load_si128((const __m128i *)round_ptr);
quant = _mm_load_si128((const __m128i *)quant_ptr);
dequant = _mm_load_si128((const __m128i *)dequant_ptr);
}
{
@ -619,15 +613,15 @@ void vp10_fdct8x8_quant_sse2(const int16_t *input, int stride,
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
dequant = _mm_unpackhi_epi64(dequant, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
}
{
@ -640,8 +634,8 @@ void vp10_fdct8x8_quant_sse2(const int16_t *input, int stride,
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@ -684,14 +678,14 @@ void vp10_fdct8x8_quant_sse2(const int16_t *input, int stride,
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
}
{
@ -704,8 +698,8 @@ void vp10_fdct8x8_quant_sse2(const int16_t *input, int stride,
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@ -731,10 +725,10 @@ void vp10_fdct8x8_quant_sse2(const int16_t *input, int stride,
}
} else {
do {
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
n_coeffs += 8 * 2;
} while (n_coeffs < 0);
*eob_ptr = 0;
@ -745,23 +739,23 @@ void vp10_fdct8x8_quant_sse2(const int16_t *input, int stride,
static INLINE void load_buffer_8x8(const int16_t *input, __m128i *in,
int stride, int flipud, int fliplr) {
if (!flipud) {
in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride));
in[1] = _mm_load_si128((const __m128i *)(input + 1 * stride));
in[2] = _mm_load_si128((const __m128i *)(input + 2 * stride));
in[3] = _mm_load_si128((const __m128i *)(input + 3 * stride));
in[4] = _mm_load_si128((const __m128i *)(input + 4 * stride));
in[5] = _mm_load_si128((const __m128i *)(input + 5 * stride));
in[6] = _mm_load_si128((const __m128i *)(input + 6 * stride));
in[7] = _mm_load_si128((const __m128i *)(input + 7 * stride));
in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride));
in[1] = _mm_load_si128((const __m128i *)(input + 1 * stride));
in[2] = _mm_load_si128((const __m128i *)(input + 2 * stride));
in[3] = _mm_load_si128((const __m128i *)(input + 3 * stride));
in[4] = _mm_load_si128((const __m128i *)(input + 4 * stride));
in[5] = _mm_load_si128((const __m128i *)(input + 5 * stride));
in[6] = _mm_load_si128((const __m128i *)(input + 6 * stride));
in[7] = _mm_load_si128((const __m128i *)(input + 7 * stride));
} else {
in[0] = _mm_load_si128((const __m128i *)(input + 7 * stride));
in[1] = _mm_load_si128((const __m128i *)(input + 6 * stride));
in[2] = _mm_load_si128((const __m128i *)(input + 5 * stride));
in[3] = _mm_load_si128((const __m128i *)(input + 4 * stride));
in[4] = _mm_load_si128((const __m128i *)(input + 3 * stride));
in[5] = _mm_load_si128((const __m128i *)(input + 2 * stride));
in[6] = _mm_load_si128((const __m128i *)(input + 1 * stride));
in[7] = _mm_load_si128((const __m128i *)(input + 0 * stride));
in[0] = _mm_load_si128((const __m128i *)(input + 7 * stride));
in[1] = _mm_load_si128((const __m128i *)(input + 6 * stride));
in[2] = _mm_load_si128((const __m128i *)(input + 5 * stride));
in[3] = _mm_load_si128((const __m128i *)(input + 4 * stride));
in[4] = _mm_load_si128((const __m128i *)(input + 3 * stride));
in[5] = _mm_load_si128((const __m128i *)(input + 2 * stride));
in[6] = _mm_load_si128((const __m128i *)(input + 1 * stride));
in[7] = _mm_load_si128((const __m128i *)(input + 0 * stride));
}
if (fliplr) {
@ -1068,14 +1062,14 @@ static void fadst8_sse2(__m128i *in) {
__m128i in0, in1, in2, in3, in4, in5, in6, in7;
// properly aligned for butterfly input
in0 = in[7];
in1 = in[0];
in2 = in[5];
in3 = in[2];
in4 = in[3];
in5 = in[4];
in6 = in[1];
in7 = in[6];
in0 = in[7];
in1 = in[0];
in2 = in[5];
in3 = in[2];
in4 = in[3];
in5 = in[4];
in6 = in[1];
in7 = in[6];
// column transformation
// stage 1
@ -1288,14 +1282,12 @@ static void fidtx8_sse2(__m128i *in) {
}
#endif // CONFIG_EXT_TX
void vp10_fht8x8_sse2(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp10_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
__m128i in[8];
switch (tx_type) {
case DCT_DCT:
vpx_fdct8x8_sse2(input, output, stride);
break;
case DCT_DCT: vpx_fdct8x8_sse2(input, output, stride); break;
case ADST_DCT:
load_buffer_8x8(input, in, stride, 0, 0);
fadst8_sse2(in);
@ -1396,14 +1388,13 @@ void vp10_fht8x8_sse2(const int16_t *input, tran_low_t *output,
write_buffer_8x8(output, in, 8);
break;
#endif // CONFIG_EXT_TX
default:
assert(0);
default: assert(0);
}
}
static INLINE void load_buffer_16x16(const int16_t* input, __m128i *in0,
__m128i *in1, int stride,
int flipud, int fliplr) {
static INLINE void load_buffer_16x16(const int16_t *input, __m128i *in0,
__m128i *in1, int stride, int flipud,
int fliplr) {
// Load 4 8x8 blocks
const int16_t *topL = input;
const int16_t *topR = input + 8;
@ -1414,24 +1405,32 @@ static INLINE void load_buffer_16x16(const int16_t* input, __m128i *in0,
if (flipud) {
// Swap left columns
tmp = topL; topL = botL; botL = tmp;
tmp = topL;
topL = botL;
botL = tmp;
// Swap right columns
tmp = topR; topR = botR; botR = tmp;
tmp = topR;
topR = botR;
botR = tmp;
}
if (fliplr) {
// Swap top rows
tmp = topL; topL = topR; topR = tmp;
tmp = topL;
topL = topR;
topR = tmp;
// Swap bottom rows
tmp = botL; botL = botR; botR = tmp;
tmp = botL;
botL = botR;
botR = tmp;
}
// load first 8 columns
load_buffer_8x8(topL, in0, stride, flipud, fliplr);
load_buffer_8x8(topL, in0, stride, flipud, fliplr);
load_buffer_8x8(botL, in0 + 8, stride, flipud, fliplr);
// load second 8 columns
load_buffer_8x8(topR, in1, stride, flipud, fliplr);
load_buffer_8x8(topR, in1, stride, flipud, fliplr);
load_buffer_8x8(botR, in1 + 8, stride, flipud, fliplr);
}
@ -1783,13 +1782,13 @@ static void fdct16_8col(__m128i *in) {
v[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS);
v[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS);
in[1] = _mm_packs_epi32(v[0], v[1]);
in[9] = _mm_packs_epi32(v[2], v[3]);
in[5] = _mm_packs_epi32(v[4], v[5]);
in[1] = _mm_packs_epi32(v[0], v[1]);
in[9] = _mm_packs_epi32(v[2], v[3]);
in[5] = _mm_packs_epi32(v[4], v[5]);
in[13] = _mm_packs_epi32(v[6], v[7]);
in[3] = _mm_packs_epi32(v[8], v[9]);
in[3] = _mm_packs_epi32(v[8], v[9]);
in[11] = _mm_packs_epi32(v[10], v[11]);
in[7] = _mm_packs_epi32(v[12], v[13]);
in[7] = _mm_packs_epi32(v[12], v[13]);
in[15] = _mm_packs_epi32(v[14], v[15]);
}
@ -2473,14 +2472,12 @@ static void fidtx16_sse2(__m128i *in0, __m128i *in1) {
}
#endif // CONFIG_EXT_TX
void vp10_fht16x16_sse2(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp10_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
__m128i in0[16], in1[16];
switch (tx_type) {
case DCT_DCT:
vpx_fdct16x16_sse2(input, output, stride);
break;
case DCT_DCT: vpx_fdct16x16_sse2(input, output, stride); break;
case ADST_DCT:
load_buffer_16x16(input, in0, in1, stride, 0, 0);
fadst16_sse2(in0, in1);
@ -2581,8 +2578,6 @@ void vp10_fht16x16_sse2(const int16_t *input, tran_low_t *output,
write_buffer_16x16(output, in0, in1, 16);
break;
#endif // CONFIG_EXT_TX
default:
assert(0);
break;
default: assert(0); break;
}
}

Просмотреть файл

@ -20,16 +20,12 @@
#include "vpx_dsp/x86/inv_txfm_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
void vp10_fdct8x8_quant_ssse3(const int16_t *input, int stride,
int16_t* coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t* zbin_ptr,
const int16_t* round_ptr, const int16_t* quant_ptr,
const int16_t* quant_shift_ptr,
int16_t* qcoeff_ptr,
int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
uint16_t* eob_ptr,
const int16_t* scan_ptr,
const int16_t* iscan_ptr) {
void vp10_fdct8x8_quant_ssse3(
const int16_t *input, int stride, int16_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan_ptr, const int16_t *iscan_ptr) {
__m128i zero;
int pass;
// Constants
@ -47,14 +43,14 @@ void vp10_fdct8x8_quant_ssse3(const int16_t *input, int stride,
const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
// Load input
__m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
__m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
__m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
__m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
__m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride));
__m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride));
__m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride));
__m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride));
__m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
__m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
__m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
__m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
__m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride));
__m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride));
__m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride));
__m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride));
__m128i *in[8];
int index = 0;
@ -303,9 +299,9 @@ void vp10_fdct8x8_quant_ssse3(const int16_t *input, int stride,
// Setup global values
{
round = _mm_load_si128((const __m128i*)round_ptr);
quant = _mm_load_si128((const __m128i*)quant_ptr);
dequant = _mm_load_si128((const __m128i*)dequant_ptr);
round = _mm_load_si128((const __m128i *)round_ptr);
quant = _mm_load_si128((const __m128i *)quant_ptr);
dequant = _mm_load_si128((const __m128i *)dequant_ptr);
}
{
@ -337,15 +333,15 @@ void vp10_fdct8x8_quant_ssse3(const int16_t *input, int stride,
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
dequant = _mm_unpackhi_epi64(dequant, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
}
{
@ -358,8 +354,8 @@ void vp10_fdct8x8_quant_ssse3(const int16_t *input, int stride,
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@ -393,7 +389,7 @@ void vp10_fdct8x8_quant_ssse3(const int16_t *input, int stride,
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
nzflag = _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff0, thr)) |
_mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
_mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
if (nzflag) {
qcoeff0 = _mm_adds_epi16(qcoeff0, round);
@ -407,20 +403,20 @@ void vp10_fdct8x8_quant_ssse3(const int16_t *input, int stride,
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
} else {
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
}
}
@ -434,8 +430,8 @@ void vp10_fdct8x8_quant_ssse3(const int16_t *input, int stride,
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@ -461,10 +457,10 @@ void vp10_fdct8x8_quant_ssse3(const int16_t *input, int stride,
}
} else {
do {
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
n_coeffs += 8 * 2;
} while (n_coeffs < 0);
*eob_ptr = 0;

Просмотреть файл

@ -13,10 +13,8 @@
#include "./vp10_rtcd.h"
#include "vpx/vpx_integer.h"
int64_t vp10_block_error_avx2(const int16_t *coeff,
const int16_t *dqcoeff,
intptr_t block_size,
int64_t *ssz) {
int64_t vp10_block_error_avx2(const int16_t *coeff, const int16_t *dqcoeff,
intptr_t block_size, int64_t *ssz) {
__m256i sse_reg, ssz_reg, coeff_reg, dqcoeff_reg;
__m256i exp_dqcoeff_lo, exp_dqcoeff_hi, exp_coeff_lo, exp_coeff_hi;
__m256i sse_reg_64hi, ssz_reg_64hi;
@ -29,7 +27,7 @@ int64_t vp10_block_error_avx2(const int16_t *coeff,
sse_reg = _mm256_set1_epi16(0);
ssz_reg = _mm256_set1_epi16(0);
for (i = 0 ; i < block_size ; i+= 16) {
for (i = 0; i < block_size; i += 16) {
// load 32 bytes from coeff and dqcoeff
coeff_reg = _mm256_loadu_si256((const __m256i *)(coeff + i));
dqcoeff_reg = _mm256_loadu_si256((const __m256i *)(dqcoeff + i));
@ -66,8 +64,8 @@ int64_t vp10_block_error_avx2(const int16_t *coeff,
_mm256_extractf128_si256(ssz_reg, 1));
// store the results
_mm_storel_epi64((__m128i*)(&sse), sse_reg128);
_mm_storel_epi64((__m128i *)(&sse), sse_reg128);
_mm_storel_epi64((__m128i*)(ssz), ssz_reg128);
_mm_storel_epi64((__m128i *)(ssz), ssz_reg128);
return sse;
}

Просмотреть файл

@ -14,8 +14,8 @@
#include "vp10/common/common.h"
int64_t vp10_highbd_block_error_sse2(tran_low_t *coeff, tran_low_t *dqcoeff,
intptr_t block_size, int64_t *ssz,
int bps) {
intptr_t block_size, int64_t *ssz,
int bps) {
int i, j, test;
uint32_t temp[4];
__m128i max, min, cmp0, cmp1, cmp2, cmp3;
@ -23,41 +23,41 @@ int64_t vp10_highbd_block_error_sse2(tran_low_t *coeff, tran_low_t *dqcoeff,
const int shift = 2 * (bps - 8);
const int rounding = shift > 0 ? 1 << (shift - 1) : 0;
for (i = 0; i < block_size; i+=8) {
for (i = 0; i < block_size; i += 8) {
// Load the data into xmm registers
__m128i mm_coeff = _mm_load_si128((__m128i*) (coeff + i));
__m128i mm_coeff2 = _mm_load_si128((__m128i*) (coeff + i + 4));
__m128i mm_dqcoeff = _mm_load_si128((__m128i*) (dqcoeff + i));
__m128i mm_dqcoeff2 = _mm_load_si128((__m128i*) (dqcoeff + i + 4));
__m128i mm_coeff = _mm_load_si128((__m128i *)(coeff + i));
__m128i mm_coeff2 = _mm_load_si128((__m128i *)(coeff + i + 4));
__m128i mm_dqcoeff = _mm_load_si128((__m128i *)(dqcoeff + i));
__m128i mm_dqcoeff2 = _mm_load_si128((__m128i *)(dqcoeff + i + 4));
// Check if any values require more than 15 bit
max = _mm_set1_epi32(0x3fff);
min = _mm_set1_epi32(0xffffc000);
cmp0 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff, max),
_mm_cmplt_epi32(mm_coeff, min));
_mm_cmplt_epi32(mm_coeff, min));
cmp1 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff2, max),
_mm_cmplt_epi32(mm_coeff2, min));
_mm_cmplt_epi32(mm_coeff2, min));
cmp2 = _mm_xor_si128(_mm_cmpgt_epi32(mm_dqcoeff, max),
_mm_cmplt_epi32(mm_dqcoeff, min));
_mm_cmplt_epi32(mm_dqcoeff, min));
cmp3 = _mm_xor_si128(_mm_cmpgt_epi32(mm_dqcoeff2, max),
_mm_cmplt_epi32(mm_dqcoeff2, min));
test = _mm_movemask_epi8(_mm_or_si128(_mm_or_si128(cmp0, cmp1),
_mm_or_si128(cmp2, cmp3)));
_mm_cmplt_epi32(mm_dqcoeff2, min));
test = _mm_movemask_epi8(
_mm_or_si128(_mm_or_si128(cmp0, cmp1), _mm_or_si128(cmp2, cmp3)));
if (!test) {
__m128i mm_diff, error_sse2, sqcoeff_sse2;;
__m128i mm_diff, error_sse2, sqcoeff_sse2;
mm_coeff = _mm_packs_epi32(mm_coeff, mm_coeff2);
mm_dqcoeff = _mm_packs_epi32(mm_dqcoeff, mm_dqcoeff2);
mm_diff = _mm_sub_epi16(mm_coeff, mm_dqcoeff);
error_sse2 = _mm_madd_epi16(mm_diff, mm_diff);
sqcoeff_sse2 = _mm_madd_epi16(mm_coeff, mm_coeff);
_mm_storeu_si128((__m128i*)temp, error_sse2);
_mm_storeu_si128((__m128i *)temp, error_sse2);
error = error + temp[0] + temp[1] + temp[2] + temp[3];
_mm_storeu_si128((__m128i*)temp, sqcoeff_sse2);
_mm_storeu_si128((__m128i *)temp, sqcoeff_sse2);
sqcoeff += temp[0] + temp[1] + temp[2] + temp[3];
} else {
for (j = 0; j < 8; j++) {
const int64_t diff = coeff[i + j] - dqcoeff[i + j];
error += diff * diff;
error += diff * diff;
sqcoeff += (int64_t)coeff[i + j] * (int64_t)coeff[i + j];
}
}

Просмотреть файл

@ -277,8 +277,7 @@ void vp10_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *coeff,
write_buffer_4x4(in, coeff);
break;
#endif
default:
assert(0);
default: assert(0);
}
(void)bd;
}
@ -288,23 +287,23 @@ static INLINE void load_buffer_8x8(const int16_t *input, __m128i *in,
int shift) {
__m128i u;
if (!flipud) {
in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride));
in[1] = _mm_load_si128((const __m128i *)(input + 1 * stride));
in[2] = _mm_load_si128((const __m128i *)(input + 2 * stride));
in[3] = _mm_load_si128((const __m128i *)(input + 3 * stride));
in[4] = _mm_load_si128((const __m128i *)(input + 4 * stride));
in[5] = _mm_load_si128((const __m128i *)(input + 5 * stride));
in[6] = _mm_load_si128((const __m128i *)(input + 6 * stride));
in[7] = _mm_load_si128((const __m128i *)(input + 7 * stride));
in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride));
in[1] = _mm_load_si128((const __m128i *)(input + 1 * stride));
in[2] = _mm_load_si128((const __m128i *)(input + 2 * stride));
in[3] = _mm_load_si128((const __m128i *)(input + 3 * stride));
in[4] = _mm_load_si128((const __m128i *)(input + 4 * stride));
in[5] = _mm_load_si128((const __m128i *)(input + 5 * stride));
in[6] = _mm_load_si128((const __m128i *)(input + 6 * stride));
in[7] = _mm_load_si128((const __m128i *)(input + 7 * stride));
} else {
in[0] = _mm_load_si128((const __m128i *)(input + 7 * stride));
in[1] = _mm_load_si128((const __m128i *)(input + 6 * stride));
in[2] = _mm_load_si128((const __m128i *)(input + 5 * stride));
in[3] = _mm_load_si128((const __m128i *)(input + 4 * stride));
in[4] = _mm_load_si128((const __m128i *)(input + 3 * stride));
in[5] = _mm_load_si128((const __m128i *)(input + 2 * stride));
in[6] = _mm_load_si128((const __m128i *)(input + 1 * stride));
in[7] = _mm_load_si128((const __m128i *)(input + 0 * stride));
in[0] = _mm_load_si128((const __m128i *)(input + 7 * stride));
in[1] = _mm_load_si128((const __m128i *)(input + 6 * stride));
in[2] = _mm_load_si128((const __m128i *)(input + 5 * stride));
in[3] = _mm_load_si128((const __m128i *)(input + 4 * stride));
in[4] = _mm_load_si128((const __m128i *)(input + 3 * stride));
in[5] = _mm_load_si128((const __m128i *)(input + 2 * stride));
in[6] = _mm_load_si128((const __m128i *)(input + 1 * stride));
in[7] = _mm_load_si128((const __m128i *)(input + 0 * stride));
}
if (fliplr) {
@ -452,7 +451,7 @@ static void fdct8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
u[2] = _mm_add_epi32(in[4], in[10]);
u[5] = _mm_sub_epi32(in[4], in[10]);
u[3] = _mm_add_epi32(in[6], in[8]);
v[4] = _mm_sub_epi32(in[6], in[8]); // v[4]
v[4] = _mm_sub_epi32(in[6], in[8]); // v[4]
// stage 2
v[0] = _mm_add_epi32(u[0], u[3]);
@ -508,7 +507,7 @@ static void fdct8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
v[1] = _mm_mullo_epi32(u[7], cospi8);
v[0] = _mm_add_epi32(v[0], v[1]);
v[0] = _mm_add_epi32(v[0], rnding);
out[2] = _mm_srai_epi32(v[0], bit); // buf0[4]
out[2] = _mm_srai_epi32(v[0], bit); // buf0[4]
v[0] = _mm_mullo_epi32(u[4], cospi8);
v[1] = _mm_mullo_epi32(u[7], cospi56);
@ -526,7 +525,7 @@ static void fdct8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
v[1] = _mm_mullo_epi32(u[6], cospi24);
v[0] = _mm_sub_epi32(v[1], v[0]);
v[0] = _mm_add_epi32(v[0], rnding);
out[6] = _mm_srai_epi32(v[0], bit); // buf0[6]
out[6] = _mm_srai_epi32(v[0], bit); // buf0[6]
out[0] = u[0]; // buf0[0]
out[8] = u[1]; // buf0[1]
@ -543,7 +542,7 @@ static void fdct8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
u[2] = _mm_add_epi32(in[5], in[11]);
u[5] = _mm_sub_epi32(in[5], in[11]);
u[3] = _mm_add_epi32(in[7], in[9]);
v[4] = _mm_sub_epi32(in[7], in[9]); // v[4]
v[4] = _mm_sub_epi32(in[7], in[9]); // v[4]
// stage 2
v[0] = _mm_add_epi32(u[0], u[3]);
@ -599,7 +598,7 @@ static void fdct8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
v[1] = _mm_mullo_epi32(u[7], cospi8);
v[0] = _mm_add_epi32(v[0], v[1]);
v[0] = _mm_add_epi32(v[0], rnding);
out[3] = _mm_srai_epi32(v[0], bit); // buf0[4]
out[3] = _mm_srai_epi32(v[0], bit); // buf0[4]
v[0] = _mm_mullo_epi32(u[4], cospi8);
v[1] = _mm_mullo_epi32(u[7], cospi56);
@ -617,7 +616,7 @@ static void fdct8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
v[1] = _mm_mullo_epi32(u[6], cospi24);
v[0] = _mm_sub_epi32(v[1], v[0]);
v[0] = _mm_add_epi32(v[0], rnding);
out[7] = _mm_srai_epi32(v[0], bit); // buf0[6]
out[7] = _mm_srai_epi32(v[0], bit); // buf0[6]
out[1] = u[0]; // buf0[0]
out[9] = u[1]; // buf0[1]
@ -1026,8 +1025,7 @@ void vp10_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *coeff,
write_buffer_8x8(in, coeff);
break;
#endif // CONFIG_EXT_TX
default:
assert(0);
default: assert(0);
}
(void)bd;
}
@ -1063,7 +1061,7 @@ static INLINE void convert_8x8_to_16x16(const __m128i *in, __m128i *out) {
} while (row_index < 16);
}
static INLINE void load_buffer_16x16(const int16_t* input, __m128i *out,
static INLINE void load_buffer_16x16(const int16_t *input, __m128i *out,
int stride, int flipud, int fliplr,
int shift) {
__m128i in[64];
@ -1077,20 +1075,28 @@ static INLINE void load_buffer_16x16(const int16_t* input, __m128i *out,
if (flipud) {
// Swap left columns
tmp = topL; topL = botL; botL = tmp;
tmp = topL;
topL = botL;
botL = tmp;
// Swap right columns
tmp = topR; topR = botR; botR = tmp;
tmp = topR;
topR = botR;
botR = tmp;
}
if (fliplr) {
// Swap top rows
tmp = topL; topL = topR; topR = tmp;
tmp = topL;
topL = topR;
topR = tmp;
// Swap bottom rows
tmp = botL; botL = botR; botR = tmp;
tmp = botL;
botL = botR;
botR = tmp;
}
// load first 8 columns
load_buffer_8x8(topL, &in[0], stride, flipud, fliplr, shift);
load_buffer_8x8(topL, &in[0], stride, flipud, fliplr, shift);
load_buffer_8x8(botL, &in[32], stride, flipud, fliplr, shift);
// load second 8 columns
@ -1129,22 +1135,22 @@ static void fdct16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
for (col = 0; col < col_num; ++col) {
// stage 0
// stage 1
u[0] = _mm_add_epi32(in[0 * col_num + col], in[15 * col_num + col]);
u[0] = _mm_add_epi32(in[0 * col_num + col], in[15 * col_num + col]);
u[15] = _mm_sub_epi32(in[0 * col_num + col], in[15 * col_num + col]);
u[1] = _mm_add_epi32(in[1 * col_num + col], in[14 * col_num + col]);
u[1] = _mm_add_epi32(in[1 * col_num + col], in[14 * col_num + col]);
u[14] = _mm_sub_epi32(in[1 * col_num + col], in[14 * col_num + col]);
u[2] = _mm_add_epi32(in[2 * col_num + col], in[13 * col_num + col]);
u[2] = _mm_add_epi32(in[2 * col_num + col], in[13 * col_num + col]);
u[13] = _mm_sub_epi32(in[2 * col_num + col], in[13 * col_num + col]);
u[3] = _mm_add_epi32(in[3 * col_num + col], in[12 * col_num + col]);
u[3] = _mm_add_epi32(in[3 * col_num + col], in[12 * col_num + col]);
u[12] = _mm_sub_epi32(in[3 * col_num + col], in[12 * col_num + col]);
u[4] = _mm_add_epi32(in[4 * col_num + col], in[11 * col_num + col]);
u[4] = _mm_add_epi32(in[4 * col_num + col], in[11 * col_num + col]);
u[11] = _mm_sub_epi32(in[4 * col_num + col], in[11 * col_num + col]);
u[5] = _mm_add_epi32(in[5 * col_num + col], in[10 * col_num + col]);
u[5] = _mm_add_epi32(in[5 * col_num + col], in[10 * col_num + col]);
u[10] = _mm_sub_epi32(in[5 * col_num + col], in[10 * col_num + col]);
u[6] = _mm_add_epi32(in[6 * col_num + col], in[9 * col_num + col]);
u[9] = _mm_sub_epi32(in[6 * col_num + col], in[9 * col_num + col]);
u[7] = _mm_add_epi32(in[7 * col_num + col], in[8 * col_num + col]);
u[8] = _mm_sub_epi32(in[7 * col_num + col], in[8 * col_num + col]);
u[6] = _mm_add_epi32(in[6 * col_num + col], in[9 * col_num + col]);
u[9] = _mm_sub_epi32(in[6 * col_num + col], in[9 * col_num + col]);
u[7] = _mm_add_epi32(in[7 * col_num + col], in[8 * col_num + col]);
u[8] = _mm_sub_epi32(in[7 * col_num + col], in[8 * col_num + col]);
// stage 2
v[0] = _mm_add_epi32(u[0], u[7]);
@ -1204,9 +1210,9 @@ static void fdct16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
u[6] = _mm_srai_epi32(u[6], bit);
u[7] = v[7];
u[8] = _mm_add_epi32(v[8], v[11]);
u[8] = _mm_add_epi32(v[8], v[11]);
u[11] = _mm_sub_epi32(v[8], v[11]);
u[9] = _mm_add_epi32(v[9], v[10]);
u[9] = _mm_add_epi32(v[9], v[10]);
u[10] = _mm_sub_epi32(v[9], v[10]);
u[12] = _mm_sub_epi32(v[15], v[12]);
u[15] = _mm_add_epi32(v[15], v[12]);
@ -1883,8 +1889,7 @@ void vp10_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *coeff,
write_buffer_16x16(in, coeff);
break;
#endif // CONFIG_EXT_TX
default:
assert(0);
default: assert(0);
}
(void)bd;
}

Просмотреть файл

@ -14,14 +14,13 @@
#include "./vp10_rtcd.h"
#include "vpx/vpx_integer.h"
void vp10_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t* zbin_ptr,
const int16_t* round_ptr, const int16_t* quant_ptr,
const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr,
int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
uint16_t* eob_ptr,
const int16_t* scan_ptr,
const int16_t* iscan_ptr) {
void vp10_quantize_fp_sse2(const int16_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr,
int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan_ptr,
const int16_t *iscan_ptr) {
__m128i zero;
__m128i thr;
int16_t nzflag;
@ -44,9 +43,9 @@ void vp10_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
// Setup global values
{
round = _mm_load_si128((const __m128i*)round_ptr);
quant = _mm_load_si128((const __m128i*)quant_ptr);
dequant = _mm_load_si128((const __m128i*)dequant_ptr);
round = _mm_load_si128((const __m128i *)round_ptr);
quant = _mm_load_si128((const __m128i *)quant_ptr);
dequant = _mm_load_si128((const __m128i *)dequant_ptr);
}
{
@ -54,8 +53,8 @@ void vp10_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
__m128i qcoeff0, qcoeff1;
__m128i qtmp0, qtmp1;
// Do DC and first 15 AC
coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
coeff0 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs));
coeff1 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs) + 1);
// Poor man's sign extract
coeff0_sign = _mm_srai_epi16(coeff0, 15);
@ -78,15 +77,15 @@ void vp10_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
dequant = _mm_unpackhi_epi64(dequant, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
}
{
@ -99,8 +98,8 @@ void vp10_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@ -121,8 +120,8 @@ void vp10_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
__m128i qcoeff0, qcoeff1;
__m128i qtmp0, qtmp1;
coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
coeff0 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs));
coeff1 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs) + 1);
// Poor man's sign extract
coeff0_sign = _mm_srai_epi16(coeff0, 15);
@ -133,7 +132,7 @@ void vp10_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
nzflag = _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff0, thr)) |
_mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
_mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
if (nzflag) {
qcoeff0 = _mm_adds_epi16(qcoeff0, round);
@ -147,20 +146,20 @@ void vp10_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
} else {
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
}
}
@ -174,8 +173,8 @@ void vp10_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@ -200,10 +199,10 @@ void vp10_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
}
} else {
do {
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
n_coeffs += 8 * 2;
} while (n_coeffs < 0);
*eob_ptr = 0;

Просмотреть файл

@ -106,20 +106,12 @@ static INLINE uint16_t get_accumulated_eob(__m128i *eob) {
return eobValue;
}
void vp10_highbd_quantize_fp_sse4_1(const tran_low_t *coeff_ptr,
intptr_t count,
int skip_block,
const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
uint16_t *eob_ptr,
const int16_t *scan,
const int16_t *iscan,
int log_scale) {
void vp10_highbd_quantize_fp_sse4_1(
const tran_low_t *coeff_ptr, intptr_t count, int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan, int log_scale) {
__m128i coeff[2], qcoeff[2], dequant[2], qparam[3], coeff_sign;
__m128i eob = _mm_setzero_si128();
const tran_low_t *src = coeff_ptr;
@ -139,14 +131,14 @@ void vp10_highbd_quantize_fp_sse4_1(const tran_low_t *coeff_ptr,
if (!skip_block) {
coeff[0] = _mm_loadu_si128((__m128i const *)src);
qparam[0] = _mm_set_epi32(round_ptr[1], round_ptr[1], round_ptr[1],
round_ptr[0]);
qparam[0] =
_mm_set_epi32(round_ptr[1], round_ptr[1], round_ptr[1], round_ptr[0]);
qparam[1] = _mm_set_epi64x(quant_ptr[1], quant_ptr[0]);
qparam[2] = _mm_set_epi64x(dequant_ptr[1], dequant_ptr[0]);
// DC and first 3 AC
quantize_coeff_phase1(&coeff[0], qparam, shift, log_scale,
qcoeff, dequant, &coeff_sign);
quantize_coeff_phase1(&coeff[0], qparam, shift, log_scale, qcoeff, dequant,
&coeff_sign);
// update round/quan/dquan for AC
qparam[0] = _mm_unpackhi_epi64(qparam[0], qparam[0]);
@ -158,8 +150,8 @@ void vp10_highbd_quantize_fp_sse4_1(const tran_low_t *coeff_ptr,
// next 4 AC
coeff[1] = _mm_loadu_si128((__m128i const *)(src + coeff_stride));
quantize_coeff_phase1(&coeff[1], qparam, shift, log_scale,
qcoeff, dequant, &coeff_sign);
quantize_coeff_phase1(&coeff[1], qparam, shift, log_scale, qcoeff, dequant,
&coeff_sign);
quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift,
log_scale, quanAddr + quan_stride,
dquanAddr + quan_stride);

Просмотреть файл

@ -17,15 +17,13 @@
#include "vp10/common/reconinter.h"
#define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)
#define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)
/**
* See vp10_wedge_sse_from_residuals_c
*/
uint64_t vp10_wedge_sse_from_residuals_sse2(const int16_t *r1,
const int16_t *d,
const uint8_t *m,
int N) {
uint64_t vp10_wedge_sse_from_residuals_sse2(const int16_t *r1, const int16_t *d,
const uint8_t *m, int N) {
int n = -N;
int n8 = n + 8;
@ -98,10 +96,8 @@ uint64_t vp10_wedge_sse_from_residuals_sse2(const int16_t *r1,
/**
* See vp10_wedge_sign_from_residuals_c
*/
int vp10_wedge_sign_from_residuals_sse2(const int16_t *ds,
const uint8_t *m,
int N,
int64_t limit) {
int vp10_wedge_sign_from_residuals_sse2(const int16_t *ds, const uint8_t *m,
int N, int64_t limit) {
int64_t acc;
__m128i v_sign_d;
@ -167,11 +163,11 @@ int vp10_wedge_sign_from_residuals_sse2(const int16_t *ds,
v_sign_d = _mm_cmplt_epi32(v_acc0_d, _mm_setzero_si128());
v_acc0_d = _mm_add_epi64(_mm_unpacklo_epi32(v_acc0_d, v_sign_d),
_mm_unpackhi_epi32(v_acc0_d, v_sign_d));
_mm_unpackhi_epi32(v_acc0_d, v_sign_d));
v_sign_d = _mm_cmplt_epi32(v_acc1_d, _mm_setzero_si128());
v_acc1_d = _mm_add_epi64(_mm_unpacklo_epi32(v_acc1_d, v_sign_d),
_mm_unpackhi_epi32(v_acc1_d, v_sign_d));
_mm_unpackhi_epi32(v_acc1_d, v_sign_d));
v_acc_q = _mm_add_epi64(v_acc0_d, v_acc1_d);
@ -194,12 +190,10 @@ static INLINE __m128i negm_epi16(__m128i v_v_w, __m128i v_mask_w) {
/**
* vp10_wedge_compute_delta_squares_c
*/
void vp10_wedge_compute_delta_squares_sse2(int16_t *d,
const int16_t *a,
const int16_t *b,
int N) {
const __m128i v_neg_w = _mm_set_epi16(0xffff, 0, 0xffff, 0,
0xffff, 0, 0xffff, 0);
void vp10_wedge_compute_delta_squares_sse2(int16_t *d, const int16_t *a,
const int16_t *b, int N) {
const __m128i v_neg_w =
_mm_set_epi16(0xffff, 0, 0xffff, 0, 0xffff, 0, 0xffff, 0);
assert(N % 64 == 0);
@ -257,4 +251,3 @@ void vp10_wedge_compute_delta_squares_sse2(int16_t *d,
N -= 32;
} while (N);
}