Refactor hbd txfm configurations to be 1D

The hbd transform configurations were originally written for all possible
2d transforms. Now that there are many more possible 2d transforms
due to EXT_TX and RECT_TX, it is simpler to write the cfg for the
4 1D transform types and compose them to make all new possible transform
types. This will allow for an easier integration of the identity transform
for EXT_TX and rectangular transforms for RECT_TX into the current
hbd transform codepath and facilitate the removal of obsolete transforms.
This has no impact on performance.

BUG=aomedia:524

Change-Id: I1e217bcd217fd637b1df94fae62d9c59a0523c1a
This commit is contained in:
Sarah Parker 2017-05-15 20:49:22 -07:00
Родитель bb6e13432e
Коммит eec47e65bb
19 изменённых файлов: 1113 добавлений и 1296 удалений

Просмотреть файл

@ -16,11 +16,11 @@ set(AOM_AV1_COMMON_SOURCES
"${AOM_ROOT}/av1/common/av1_fwd_txfm1d.c"
"${AOM_ROOT}/av1/common/av1_fwd_txfm1d.h"
"${AOM_ROOT}/av1/common/av1_fwd_txfm2d.c"
"${AOM_ROOT}/av1/common/av1_fwd_txfm2d_cfg.h"
"${AOM_ROOT}/av1/common/av1_fwd_txfm1d_cfg.h"
"${AOM_ROOT}/av1/common/av1_inv_txfm1d.c"
"${AOM_ROOT}/av1/common/av1_inv_txfm1d.h"
"${AOM_ROOT}/av1/common/av1_inv_txfm2d.c"
"${AOM_ROOT}/av1/common/av1_inv_txfm2d_cfg.h"
"${AOM_ROOT}/av1/common/av1_inv_txfm1d_cfg.h"
"${AOM_ROOT}/av1/common/av1_loopfilter.c"
"${AOM_ROOT}/av1/common/av1_loopfilter.h"
"${AOM_ROOT}/av1/common/av1_txfm.h"

Просмотреть файл

@ -69,9 +69,9 @@ AV1_COMMON_SRCS-yes += common/av1_fwd_txfm1d.c
AV1_COMMON_SRCS-yes += common/av1_inv_txfm1d.h
AV1_COMMON_SRCS-yes += common/av1_inv_txfm1d.c
AV1_COMMON_SRCS-yes += common/av1_fwd_txfm2d.c
AV1_COMMON_SRCS-yes += common/av1_fwd_txfm2d_cfg.h
AV1_COMMON_SRCS-yes += common/av1_fwd_txfm1d_cfg.h
AV1_COMMON_SRCS-yes += common/av1_inv_txfm2d.c
AV1_COMMON_SRCS-yes += common/av1_inv_txfm2d_cfg.h
AV1_COMMON_SRCS-yes += common/av1_inv_txfm1d_cfg.h
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/av1_convolve_ssse3.c
ifeq ($(CONFIG_HIGHBITDEPTH),yes)
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_highbd_convolve_sse4.c

Просмотреть файл

@ -0,0 +1,314 @@
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AV1_FWD_TXFM2D_CFG_H_
#define AV1_FWD_TXFM2D_CFG_H_
#include "av1/common/enums.h"
#include "av1/common/av1_fwd_txfm1d.h"
// ---------------- 4x4 1D constants -----------------------
// shift
static const int8_t fwd_shift_4[3] = { 2, 0, 0 };
// stage range
static const int8_t fwd_stage_range_col_dct_4[4] = { 15, 16, 17, 17 };
static const int8_t fwd_stage_range_row_dct_4[4] = { 17, 18, 18, 18 };
static const int8_t fwd_stage_range_col_adst_4[6] = { 15, 15, 16, 17, 17, 17 };
static const int8_t fwd_stage_range_row_adst_4[6] = { 17, 17, 17, 18, 18, 18 };
// cos bit
static const int8_t fwd_cos_bit_col_dct_4[4] = { 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_dct_4[4] = { 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_col_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
// ---------------- 8x8 1D constants -----------------------
// shift
static const int8_t fwd_shift_8[3] = { 2, -1, 0 };
// stage range
static const int8_t fwd_stage_range_col_dct_8[6] = { 15, 16, 17, 18, 18, 18 };
static const int8_t fwd_stage_range_row_dct_8[6] = { 17, 18, 19, 19, 19, 19 };
static const int8_t fwd_stage_range_col_adst_8[8] = { 15, 15, 16, 17,
17, 18, 18, 18 };
static const int8_t fwd_stage_range_row_adst_8[8] = { 17, 17, 17, 18,
18, 19, 19, 19 };
// cos bit
static const int8_t fwd_cos_bit_col_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_col_adst_8[8] = {
13, 13, 13, 13, 13, 13, 13, 13
};
static const int8_t fwd_cos_bit_row_adst_8[8] = {
13, 13, 13, 13, 13, 13, 13, 13
};
// ---------------- 16x16 1D constants -----------------------
// shift
static const int8_t fwd_shift_16[3] = { 2, -2, 0 };
// stage range
static const int8_t fwd_stage_range_col_dct_16[8] = { 15, 16, 17, 18,
19, 19, 19, 19 };
static const int8_t fwd_stage_range_row_dct_16[8] = { 17, 18, 19, 20,
20, 20, 20, 20 };
static const int8_t fwd_stage_range_col_adst_16[10] = { 15, 15, 16, 17, 17,
18, 18, 19, 19, 19 };
static const int8_t fwd_stage_range_row_adst_16[10] = { 17, 17, 17, 18, 18,
19, 19, 20, 20, 20 };
// cos bit
static const int8_t fwd_cos_bit_col_dct_16[8] = {
13, 13, 13, 13, 13, 13, 13, 13
};
static const int8_t fwd_cos_bit_row_dct_16[8] = {
12, 12, 12, 12, 12, 12, 12, 12
};
static const int8_t fwd_cos_bit_col_adst_16[10] = { 13, 13, 13, 13, 13,
13, 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_adst_16[10] = { 12, 12, 12, 12, 12,
12, 12, 12, 12, 12 };
// ---------------- 32x32 1D constants -----------------------
// shift
static const int8_t fwd_shift_32[3] = { 2, -4, 0 };
// stage range
static const int8_t fwd_stage_range_col_dct_32[10] = { 15, 16, 17, 18, 19,
20, 20, 20, 20, 20 };
static const int8_t fwd_stage_range_row_dct_32[10] = { 16, 17, 18, 19, 20,
20, 20, 20, 20, 20 };
static const int8_t fwd_stage_range_col_adst_32[12] = {
15, 15, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
};
static const int8_t fwd_stage_range_row_adst_32[12] = {
16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
};
// cos bit
static const int8_t fwd_cos_bit_col_dct_32[10] = { 12, 12, 12, 12, 12,
12, 12, 12, 12, 12 };
static const int8_t fwd_cos_bit_row_dct_32[10] = { 12, 12, 12, 12, 12,
12, 12, 12, 12, 12 };
static const int8_t fwd_cos_bit_col_adst_32[12] = { 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12 };
static const int8_t fwd_cos_bit_row_adst_32[12] = { 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12 };
// ---------------- 64x64 1D constants -----------------------
// shift
static const int8_t fwd_shift_64[3] = { 0, -2, -2 };
// stage range
static const int8_t fwd_stage_range_col_dct_64[12] = { 13, 14, 15, 16, 17, 18,
19, 19, 19, 19, 19, 19 };
static const int8_t fwd_stage_range_row_dct_64[12] = { 17, 18, 19, 20, 21, 22,
22, 22, 22, 22, 22, 22 };
// cos bit
static const int8_t fwd_cos_bit_col_dct_64[12] = { 15, 15, 15, 15, 15, 14,
13, 13, 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_dct_64[12] = { 15, 14, 13, 12, 11, 10,
10, 10, 10, 10, 10, 10 };
// ---------------- row config fwd_dct_4 ----------------
static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_dct_4 = {
4, // .txfm_size
4, // .stage_num
// 0, // .log_scale
fwd_shift_4, // .shift
fwd_stage_range_row_dct_4, // .stage_range
fwd_cos_bit_row_dct_4, // .cos_bit
TXFM_TYPE_DCT4 // .txfm_type
};
// ---------------- row config fwd_dct_8 ----------------
static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_dct_8 = {
8, // .txfm_size
6, // .stage_num
// 0, // .log_scale
fwd_shift_8, // .shift
fwd_stage_range_row_dct_8, // .stage_range
fwd_cos_bit_row_dct_8, // .cos_bit_
TXFM_TYPE_DCT8 // .txfm_type
};
// ---------------- row config fwd_dct_16 ----------------
static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_dct_16 = {
16, // .txfm_size
8, // .stage_num
// 0, // .log_scale
fwd_shift_16, // .shift
fwd_stage_range_row_dct_16, // .stage_range
fwd_cos_bit_row_dct_16, // .cos_bit
TXFM_TYPE_DCT16 // .txfm_type
};
// ---------------- row config fwd_dct_32 ----------------
static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_dct_32 = {
32, // .txfm_size
10, // .stage_num
// 1, // .log_scale
fwd_shift_32, // .shift
fwd_stage_range_row_dct_32, // .stage_range
fwd_cos_bit_row_dct_32, // .cos_bit_row
TXFM_TYPE_DCT32 // .txfm_type
};
// ---------------- row config fwd_dct_64 ----------------
static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_dct_64 = {
64, // .txfm_size
12, // .stage_num
fwd_shift_64, // .shift
fwd_stage_range_row_dct_64, // .stage_range
fwd_cos_bit_row_dct_64, // .cos_bit
TXFM_TYPE_DCT64, // .txfm_type_col
};
// ---------------- row config fwd_adst_4 ----------------
static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_adst_4 = {
4, // .txfm_size
6, // .stage_num
// 0, // .log_scale
fwd_shift_4, // .shift
fwd_stage_range_row_adst_4, // .stage_range
fwd_cos_bit_row_adst_4, // .cos_bit
TXFM_TYPE_ADST4, // .txfm_type
};
// ---------------- row config fwd_adst_8 ----------------
static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_adst_8 = {
8, // .txfm_size
8, // .stage_num
// 0, // .log_scale
fwd_shift_8, // .shift
fwd_stage_range_row_adst_8, // .stage_range
fwd_cos_bit_row_adst_8, // .cos_bit
TXFM_TYPE_ADST8, // .txfm_type_col
};
// ---------------- row config fwd_adst_16 ----------------
static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_adst_16 = {
16, // .txfm_size
10, // .stage_num
// 0, // .log_scale
fwd_shift_16, // .shift
fwd_stage_range_row_adst_16, // .stage_range
fwd_cos_bit_row_adst_16, // .cos_bit
TXFM_TYPE_ADST16, // .txfm_type
};
// ---------------- row config fwd_adst_32 ----------------
static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_adst_32 = {
32, // .txfm_size
12, // .stage_num
// 1, // .log_scale
fwd_shift_32, // .shift
fwd_stage_range_row_adst_32, // .stage_range
fwd_cos_bit_row_adst_32, // .cos_bit
TXFM_TYPE_ADST32, // .txfm_type
};
// ---------------- col config fwd_dct_4 ----------------
static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_dct_4 = {
4, // .txfm_size
4, // .stage_num
// 0, // .log_scale
fwd_shift_4, // .shift
fwd_stage_range_col_dct_4, // .stage_range
fwd_cos_bit_col_dct_4, // .cos_bit
TXFM_TYPE_DCT4 // .txfm_type
};
// ---------------- col config fwd_dct_8 ----------------
static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_dct_8 = {
8, // .txfm_size
6, // .stage_num
// 0, // .log_scale
fwd_shift_8, // .shift
fwd_stage_range_col_dct_8, // .stage_range
fwd_cos_bit_col_dct_8, // .cos_bit_
TXFM_TYPE_DCT8 // .txfm_type
};
// ---------------- col config fwd_dct_16 ----------------
static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_dct_16 = {
16, // .txfm_size
8, // .stage_num
// 0, // .log_scale
fwd_shift_16, // .shift
fwd_stage_range_col_dct_16, // .stage_range
fwd_cos_bit_col_dct_16, // .cos_bit
TXFM_TYPE_DCT16 // .txfm_type
};
// ---------------- col config fwd_dct_32 ----------------
static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_dct_32 = {
32, // .txfm_size
10, // .stage_num
// 1, // .log_scale
fwd_shift_32, // .shift
fwd_stage_range_col_dct_32, // .stage_range
fwd_cos_bit_col_dct_32, // .cos_bit_col
TXFM_TYPE_DCT32 // .txfm_type
};
// ---------------- col config fwd_dct_64 ----------------
static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_dct_64 = {
64, // .txfm_size
12, // .stage_num
fwd_shift_64, // .shift
fwd_stage_range_col_dct_64, // .stage_range
fwd_cos_bit_col_dct_64, // .cos_bit
TXFM_TYPE_DCT64, // .txfm_type_col
};
// ---------------- col config fwd_adst_4 ----------------
static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_adst_4 = {
4, // .txfm_size
6, // .stage_num
// 0, // .log_scale
fwd_shift_4, // .shift
fwd_stage_range_col_adst_4, // .stage_range
fwd_cos_bit_col_adst_4, // .cos_bit
TXFM_TYPE_ADST4, // .txfm_type
};
// ---------------- col config fwd_adst_8 ----------------
static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_adst_8 = {
8, // .txfm_size
8, // .stage_num
// 0, // .log_scale
fwd_shift_8, // .shift
fwd_stage_range_col_adst_8, // .stage_range
fwd_cos_bit_col_adst_8, // .cos_bit
TXFM_TYPE_ADST8, // .txfm_type_col
};
// ---------------- col config fwd_adst_16 ----------------
static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_adst_16 = {
16, // .txfm_size
10, // .stage_num
// 0, // .log_scale
fwd_shift_16, // .shift
fwd_stage_range_col_adst_16, // .stage_range
fwd_cos_bit_col_adst_16, // .cos_bit
TXFM_TYPE_ADST16, // .txfm_type
};
// ---------------- col config fwd_adst_32 ----------------
static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_adst_32 = {
32, // .txfm_size
12, // .stage_num
// 1, // .log_scale
fwd_shift_32, // .shift
fwd_stage_range_col_adst_32, // .stage_range
fwd_cos_bit_col_adst_32, // .cos_bit
TXFM_TYPE_ADST32, // .txfm_type
};
#endif // AV1_FWD_TXFM2D_CFG_H_

Просмотреть файл

@ -14,7 +14,7 @@
#include "./av1_rtcd.h"
#include "av1/common/enums.h"
#include "av1/common/av1_fwd_txfm1d.h"
#include "av1/common/av1_fwd_txfm2d_cfg.h"
#include "av1/common/av1_fwd_txfm1d_cfg.h"
#include "av1/common/av1_txfm.h"
static INLINE TxfmFunc fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
@ -35,14 +35,15 @@ static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
const int stride, const TXFM_2D_FLIP_CFG *cfg,
int32_t *buf) {
int c, r;
const int txfm_size = cfg->cfg->txfm_size;
const int8_t *shift = cfg->cfg->shift;
const int8_t *stage_range_col = cfg->cfg->stage_range_col;
const int8_t *stage_range_row = cfg->cfg->stage_range_row;
const int8_t *cos_bit_col = cfg->cfg->cos_bit_col;
const int8_t *cos_bit_row = cfg->cfg->cos_bit_row;
const TxfmFunc txfm_func_col = fwd_txfm_type_to_func(cfg->cfg->txfm_type_col);
const TxfmFunc txfm_func_row = fwd_txfm_type_to_func(cfg->cfg->txfm_type_row);
// TODO(sarahparker) must correct for rectangular transforms in follow up
const int txfm_size = cfg->row_cfg->txfm_size;
const int8_t *shift = cfg->row_cfg->shift;
const int8_t *stage_range_col = cfg->col_cfg->stage_range;
const int8_t *stage_range_row = cfg->row_cfg->stage_range;
const int8_t *cos_bit_col = cfg->col_cfg->cos_bit;
const int8_t *cos_bit_row = cfg->row_cfg->cos_bit;
const TxfmFunc txfm_func_col = fwd_txfm_type_to_func(cfg->col_cfg->txfm_type);
const TxfmFunc txfm_func_row = fwd_txfm_type_to_func(cfg->row_cfg->txfm_type);
// use output buffer as temp buffer
int32_t *temp_in = output;
@ -117,69 +118,79 @@ void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride,
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
}
static const TXFM_2D_CFG *fwd_txfm_cfg_ls[TX_TYPES][TX_SIZES] = {
static const TXFM_1D_CFG *fwd_txfm_col_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
// DCT
{
#if CONFIG_CB4X4
NULL,
#endif
&fwd_txfm_2d_cfg_dct_dct_4, &fwd_txfm_2d_cfg_dct_dct_8,
&fwd_txfm_2d_cfg_dct_dct_16, &fwd_txfm_2d_cfg_dct_dct_32 },
&fwd_txfm_1d_col_cfg_dct_4, &fwd_txfm_1d_col_cfg_dct_8,
&fwd_txfm_1d_col_cfg_dct_16, &fwd_txfm_1d_col_cfg_dct_32 },
// ADST
{
#if CONFIG_CB4X4
NULL,
#endif
&fwd_txfm_2d_cfg_adst_dct_4, &fwd_txfm_2d_cfg_adst_dct_8,
&fwd_txfm_2d_cfg_adst_dct_16, &fwd_txfm_2d_cfg_adst_dct_32 },
{
#if CONFIG_CB4X4
NULL,
#endif
&fwd_txfm_2d_cfg_dct_adst_4, &fwd_txfm_2d_cfg_dct_adst_8,
&fwd_txfm_2d_cfg_dct_adst_16, &fwd_txfm_2d_cfg_dct_adst_32 },
{
#if CONFIG_CB4X4
NULL,
#endif
&fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
&fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
&fwd_txfm_1d_col_cfg_adst_4, &fwd_txfm_1d_col_cfg_adst_8,
&fwd_txfm_1d_col_cfg_adst_16, &fwd_txfm_1d_col_cfg_adst_32 },
#if CONFIG_EXT_TX
// FLIPADST
{
#if CONFIG_CB4X4
NULL,
#endif
&fwd_txfm_2d_cfg_adst_dct_4, &fwd_txfm_2d_cfg_adst_dct_8,
&fwd_txfm_2d_cfg_adst_dct_16, &fwd_txfm_2d_cfg_adst_dct_32 },
&fwd_txfm_1d_col_cfg_adst_4, &fwd_txfm_1d_col_cfg_adst_8,
&fwd_txfm_1d_col_cfg_adst_16, &fwd_txfm_1d_col_cfg_adst_32 },
// IDENTITY PLACEHOLDER
{
#if CONFIG_CB4X4
NULL,
#endif
&fwd_txfm_2d_cfg_dct_adst_4, &fwd_txfm_2d_cfg_dct_adst_8,
&fwd_txfm_2d_cfg_dct_adst_16, &fwd_txfm_2d_cfg_dct_adst_32 },
&fwd_txfm_1d_col_cfg_adst_4, &fwd_txfm_1d_col_cfg_adst_8,
&fwd_txfm_1d_col_cfg_adst_16, &fwd_txfm_1d_col_cfg_adst_32 },
#endif // CONFIG_EXT_TX
};
static const TXFM_1D_CFG *fwd_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
// DCT
{
#if CONFIG_CB4X4
NULL,
#endif
&fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
&fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
&fwd_txfm_1d_row_cfg_dct_4, &fwd_txfm_1d_row_cfg_dct_8,
&fwd_txfm_1d_row_cfg_dct_16, &fwd_txfm_1d_row_cfg_dct_32 },
// ADST
{
#if CONFIG_CB4X4
NULL,
#endif
&fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
&fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
&fwd_txfm_1d_row_cfg_adst_4, &fwd_txfm_1d_row_cfg_adst_8,
&fwd_txfm_1d_row_cfg_adst_16, &fwd_txfm_1d_row_cfg_adst_32 },
#if CONFIG_EXT_TX
// FLIPADST
{
#if CONFIG_CB4X4
NULL,
#endif
&fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
&fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
&fwd_txfm_1d_row_cfg_adst_4, &fwd_txfm_1d_row_cfg_adst_8,
&fwd_txfm_1d_row_cfg_adst_16, &fwd_txfm_1d_row_cfg_adst_32 },
// IDENTITY PLACEHOLDER
{
#if CONFIG_CB4X4
NULL,
#endif
&fwd_txfm_1d_row_cfg_adst_4, &fwd_txfm_1d_row_cfg_adst_8,
&fwd_txfm_1d_row_cfg_adst_16, &fwd_txfm_1d_row_cfg_adst_32 },
#endif // CONFIG_EXT_TX
};
TXFM_2D_FLIP_CFG av1_get_fwd_txfm_cfg(int tx_type, int tx_size) {
TXFM_2D_FLIP_CFG cfg;
set_flip_cfg(tx_type, &cfg);
cfg.cfg = fwd_txfm_cfg_ls[tx_type][tx_size];
int tx_type_col = vtx_tab[tx_type];
int tx_type_row = htx_tab[tx_type];
cfg.col_cfg = fwd_txfm_col_cfg_ls[tx_type_col][tx_size];
cfg.row_cfg = fwd_txfm_row_cfg_ls[tx_type_row][tx_size];
return cfg;
}
@ -187,13 +198,11 @@ TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x64_cfg(int tx_type) {
TXFM_2D_FLIP_CFG cfg;
switch (tx_type) {
case DCT_DCT:
cfg.cfg = &fwd_txfm_2d_cfg_dct_dct_64;
cfg.col_cfg = &fwd_txfm_1d_col_cfg_dct_64;
cfg.row_cfg = &fwd_txfm_1d_row_cfg_dct_64;
cfg.ud_flip = 0;
cfg.lr_flip = 0;
break;
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
default:
cfg.ud_flip = 0;
cfg.lr_flip = 0;

Просмотреть файл

@ -1,444 +0,0 @@
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AV1_FWD_TXFM2D_CFG_H_
#define AV1_FWD_TXFM2D_CFG_H_
#include "av1/common/enums.h"
#include "av1/common/av1_fwd_txfm1d.h"
// ---------------- config fwd_dct_dct_4 ----------------
static const int8_t fwd_shift_dct_dct_4[3] = { 2, 0, 0 };
static const int8_t fwd_stage_range_col_dct_dct_4[4] = { 15, 16, 17, 17 };
static const int8_t fwd_stage_range_row_dct_dct_4[4] = { 17, 18, 18, 18 };
static const int8_t fwd_cos_bit_col_dct_dct_4[4] = { 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_dct_dct_4[4] = { 13, 13, 13, 13 };
static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_4 = {
4, // .txfm_size
4, // .stage_num_col
4, // .stage_num_row
// 0, // .log_scale
fwd_shift_dct_dct_4, // .shift
fwd_stage_range_col_dct_dct_4, // .stage_range_col
fwd_stage_range_row_dct_dct_4, // .stage_range_row
fwd_cos_bit_col_dct_dct_4, // .cos_bit_col
fwd_cos_bit_row_dct_dct_4, // .cos_bit_row
TXFM_TYPE_DCT4, // .txfm_type_col
TXFM_TYPE_DCT4
}; // .txfm_type_row
// ---------------- config fwd_dct_dct_8 ----------------
static const int8_t fwd_shift_dct_dct_8[3] = { 2, -1, 0 };
static const int8_t fwd_stage_range_col_dct_dct_8[6] = {
15, 16, 17, 18, 18, 18
};
static const int8_t fwd_stage_range_row_dct_dct_8[6] = {
17, 18, 19, 19, 19, 19
};
static const int8_t fwd_cos_bit_col_dct_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_dct_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_8 = {
8, // .txfm_size
6, // .stage_num_col
6, // .stage_num_row
// 0, // .log_scale
fwd_shift_dct_dct_8, // .shift
fwd_stage_range_col_dct_dct_8, // .stage_range_col
fwd_stage_range_row_dct_dct_8, // .stage_range_row
fwd_cos_bit_col_dct_dct_8, // .cos_bit_col
fwd_cos_bit_row_dct_dct_8, // .cos_bit_row
TXFM_TYPE_DCT8, // .txfm_type_col
TXFM_TYPE_DCT8
}; // .txfm_type_row
// ---------------- config fwd_dct_dct_16 ----------------
static const int8_t fwd_shift_dct_dct_16[3] = { 2, -2, 0 };
static const int8_t fwd_stage_range_col_dct_dct_16[8] = { 15, 16, 17, 18,
19, 19, 19, 19 };
static const int8_t fwd_stage_range_row_dct_dct_16[8] = { 17, 18, 19, 20,
20, 20, 20, 20 };
static const int8_t fwd_cos_bit_col_dct_dct_16[8] = { 13, 13, 13, 13,
13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_dct_dct_16[8] = { 12, 12, 12, 12,
12, 12, 12, 12 };
static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_16 = {
16, // .txfm_size
8, // .stage_num_col
8, // .stage_num_row
// 0, // .log_scale
fwd_shift_dct_dct_16, // .shift
fwd_stage_range_col_dct_dct_16, // .stage_range_col
fwd_stage_range_row_dct_dct_16, // .stage_range_row
fwd_cos_bit_col_dct_dct_16, // .cos_bit_col
fwd_cos_bit_row_dct_dct_16, // .cos_bit_row
TXFM_TYPE_DCT16, // .txfm_type_col
TXFM_TYPE_DCT16
}; // .txfm_type_row
// ---------------- config fwd_dct_dct_32 ----------------
static const int8_t fwd_shift_dct_dct_32[3] = { 2, -4, 0 };
static const int8_t fwd_stage_range_col_dct_dct_32[10] = { 15, 16, 17, 18, 19,
20, 20, 20, 20, 20 };
static const int8_t fwd_stage_range_row_dct_dct_32[10] = { 16, 17, 18, 19, 20,
20, 20, 20, 20, 20 };
static const int8_t fwd_cos_bit_col_dct_dct_32[10] = { 12, 12, 12, 12, 12,
12, 12, 12, 12, 12 };
static const int8_t fwd_cos_bit_row_dct_dct_32[10] = { 12, 12, 12, 12, 12,
12, 12, 12, 12, 12 };
static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_32 = {
32, // .txfm_size
10, // .stage_num_col
10, // .stage_num_row
// 1, // .log_scale
fwd_shift_dct_dct_32, // .shift
fwd_stage_range_col_dct_dct_32, // .stage_range_col
fwd_stage_range_row_dct_dct_32, // .stage_range_row
fwd_cos_bit_col_dct_dct_32, // .cos_bit_col
fwd_cos_bit_row_dct_dct_32, // .cos_bit_row
TXFM_TYPE_DCT32, // .txfm_type_col
TXFM_TYPE_DCT32
}; // .txfm_type_row
// ---------------- config fwd_dct_dct_64 ----------------
static const int8_t fwd_shift_dct_dct_64[3] = { 0, -2, -2 };
static const int8_t fwd_stage_range_col_dct_dct_64[12] = {
13, 14, 15, 16, 17, 18, 19, 19, 19, 19, 19, 19
};
static const int8_t fwd_stage_range_row_dct_dct_64[12] = {
17, 18, 19, 20, 21, 22, 22, 22, 22, 22, 22, 22
};
static const int8_t fwd_cos_bit_col_dct_dct_64[12] = { 15, 15, 15, 15, 15, 14,
13, 13, 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_dct_dct_64[12] = { 15, 14, 13, 12, 11, 10,
10, 10, 10, 10, 10, 10 };
static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_64 = {
64, // .txfm_size
12, // .stage_num_col
12, // .stage_num_row
fwd_shift_dct_dct_64, // .shift
fwd_stage_range_col_dct_dct_64, // .stage_range_col
fwd_stage_range_row_dct_dct_64, // .stage_range_row
fwd_cos_bit_col_dct_dct_64, // .cos_bit_col
fwd_cos_bit_row_dct_dct_64, // .cos_bit_row
TXFM_TYPE_DCT64, // .txfm_type_col
TXFM_TYPE_DCT64
}; // .txfm_type_row
// ---------------- config fwd_dct_adst_4 ----------------
static const int8_t fwd_shift_dct_adst_4[3] = { 2, 0, 0 };
static const int8_t fwd_stage_range_col_dct_adst_4[4] = { 15, 16, 17, 17 };
static const int8_t fwd_stage_range_row_dct_adst_4[6] = {
17, 17, 17, 18, 18, 18
};
static const int8_t fwd_cos_bit_col_dct_adst_4[4] = { 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_dct_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_4 = {
4, // .txfm_size
4, // .stage_num_col
6, // .stage_num_row
// 0, // .log_scale
fwd_shift_dct_adst_4, // .shift
fwd_stage_range_col_dct_adst_4, // .stage_range_col
fwd_stage_range_row_dct_adst_4, // .stage_range_row
fwd_cos_bit_col_dct_adst_4, // .cos_bit_col
fwd_cos_bit_row_dct_adst_4, // .cos_bit_row
TXFM_TYPE_DCT4, // .txfm_type_col
TXFM_TYPE_ADST4
}; // .txfm_type_row
// ---------------- config fwd_dct_adst_8 ----------------
static const int8_t fwd_shift_dct_adst_8[3] = { 2, -1, 0 };
static const int8_t fwd_stage_range_col_dct_adst_8[6] = {
15, 16, 17, 18, 18, 18
};
static const int8_t fwd_stage_range_row_dct_adst_8[8] = { 17, 17, 17, 18,
18, 19, 19, 19 };
static const int8_t fwd_cos_bit_col_dct_adst_8[6] = { 13, 13, 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_dct_adst_8[8] = { 13, 13, 13, 13,
13, 13, 13, 13 };
static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_8 = {
8, // .txfm_size
6, // .stage_num_col
8, // .stage_num_row
// 0, // .log_scale
fwd_shift_dct_adst_8, // .shift
fwd_stage_range_col_dct_adst_8, // .stage_range_col
fwd_stage_range_row_dct_adst_8, // .stage_range_row
fwd_cos_bit_col_dct_adst_8, // .cos_bit_col
fwd_cos_bit_row_dct_adst_8, // .cos_bit_row
TXFM_TYPE_DCT8, // .txfm_type_col
TXFM_TYPE_ADST8
}; // .txfm_type_row
// ---------------- config fwd_dct_adst_16 ----------------
static const int8_t fwd_shift_dct_adst_16[3] = { 2, -2, 0 };
static const int8_t fwd_stage_range_col_dct_adst_16[8] = { 15, 16, 17, 18,
19, 19, 19, 19 };
static const int8_t fwd_stage_range_row_dct_adst_16[10] = {
17, 17, 17, 18, 18, 19, 19, 20, 20, 20
};
static const int8_t fwd_cos_bit_col_dct_adst_16[8] = { 13, 13, 13, 13,
13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_dct_adst_16[10] = { 12, 12, 12, 12, 12,
12, 12, 12, 12, 12 };
static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_16 = {
16, // .txfm_size
8, // .stage_num_col
10, // .stage_num_row
// 0, // .log_scale
fwd_shift_dct_adst_16, // .shift
fwd_stage_range_col_dct_adst_16, // .stage_range_col
fwd_stage_range_row_dct_adst_16, // .stage_range_row
fwd_cos_bit_col_dct_adst_16, // .cos_bit_col
fwd_cos_bit_row_dct_adst_16, // .cos_bit_row
TXFM_TYPE_DCT16, // .txfm_type_col
TXFM_TYPE_ADST16
}; // .txfm_type_row
// ---------------- config fwd_dct_adst_32 ----------------
static const int8_t fwd_shift_dct_adst_32[3] = { 2, -4, 0 };
static const int8_t fwd_stage_range_col_dct_adst_32[10] = {
15, 16, 17, 18, 19, 20, 20, 20, 20, 20
};
static const int8_t fwd_stage_range_row_dct_adst_32[12] = {
16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
};
static const int8_t fwd_cos_bit_col_dct_adst_32[10] = { 12, 12, 12, 12, 12,
12, 12, 12, 12, 12 };
static const int8_t fwd_cos_bit_row_dct_adst_32[12] = {
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
};
static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_32 = {
32, // .txfm_size
10, // .stage_num_col
12, // .stage_num_row
// 1, // .log_scale
fwd_shift_dct_adst_32, // .shift
fwd_stage_range_col_dct_adst_32, // .stage_range_col
fwd_stage_range_row_dct_adst_32, // .stage_range_row
fwd_cos_bit_col_dct_adst_32, // .cos_bit_col
fwd_cos_bit_row_dct_adst_32, // .cos_bit_row
TXFM_TYPE_DCT32, // .txfm_type_col
TXFM_TYPE_ADST32
}; // .txfm_type_row
// ---------------- config fwd_adst_adst_4 ----------------
static const int8_t fwd_shift_adst_adst_4[3] = { 2, 0, 0 };
static const int8_t fwd_stage_range_col_adst_adst_4[6] = { 15, 15, 16,
17, 17, 17 };
static const int8_t fwd_stage_range_row_adst_adst_4[6] = { 17, 17, 17,
18, 18, 18 };
static const int8_t fwd_cos_bit_col_adst_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_adst_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_4 = {
4, // .txfm_size
6, // .stage_num_col
6, // .stage_num_row
// 0, // .log_scale
fwd_shift_adst_adst_4, // .shift
fwd_stage_range_col_adst_adst_4, // .stage_range_col
fwd_stage_range_row_adst_adst_4, // .stage_range_row
fwd_cos_bit_col_adst_adst_4, // .cos_bit_col
fwd_cos_bit_row_adst_adst_4, // .cos_bit_row
TXFM_TYPE_ADST4, // .txfm_type_col
TXFM_TYPE_ADST4
}; // .txfm_type_row
// ---------------- config fwd_adst_adst_8 ----------------
static const int8_t fwd_shift_adst_adst_8[3] = { 2, -1, 0 };
static const int8_t fwd_stage_range_col_adst_adst_8[8] = { 15, 15, 16, 17,
17, 18, 18, 18 };
static const int8_t fwd_stage_range_row_adst_adst_8[8] = { 17, 17, 17, 18,
18, 19, 19, 19 };
static const int8_t fwd_cos_bit_col_adst_adst_8[8] = { 13, 13, 13, 13,
13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_adst_adst_8[8] = { 13, 13, 13, 13,
13, 13, 13, 13 };
static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_8 = {
8, // .txfm_size
8, // .stage_num_col
8, // .stage_num_row
// 0, // .log_scale
fwd_shift_adst_adst_8, // .shift
fwd_stage_range_col_adst_adst_8, // .stage_range_col
fwd_stage_range_row_adst_adst_8, // .stage_range_row
fwd_cos_bit_col_adst_adst_8, // .cos_bit_col
fwd_cos_bit_row_adst_adst_8, // .cos_bit_row
TXFM_TYPE_ADST8, // .txfm_type_col
TXFM_TYPE_ADST8
}; // .txfm_type_row
// ---------------- config fwd_adst_adst_16 ----------------
static const int8_t fwd_shift_adst_adst_16[3] = { 2, -2, 0 };
static const int8_t fwd_stage_range_col_adst_adst_16[10] = {
15, 15, 16, 17, 17, 18, 18, 19, 19, 19
};
static const int8_t fwd_stage_range_row_adst_adst_16[10] = {
17, 17, 17, 18, 18, 19, 19, 20, 20, 20
};
static const int8_t fwd_cos_bit_col_adst_adst_16[10] = { 13, 13, 13, 13, 13,
13, 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_adst_adst_16[10] = { 12, 12, 12, 12, 12,
12, 12, 12, 12, 12 };
static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_16 = {
16, // .txfm_size
10, // .stage_num_col
10, // .stage_num_row
// 0, // .log_scale
fwd_shift_adst_adst_16, // .shift
fwd_stage_range_col_adst_adst_16, // .stage_range_col
fwd_stage_range_row_adst_adst_16, // .stage_range_row
fwd_cos_bit_col_adst_adst_16, // .cos_bit_col
fwd_cos_bit_row_adst_adst_16, // .cos_bit_row
TXFM_TYPE_ADST16, // .txfm_type_col
TXFM_TYPE_ADST16
}; // .txfm_type_row
// ---------------- config fwd_adst_adst_32 ----------------
static const int8_t fwd_shift_adst_adst_32[3] = { 2, -4, 0 };
static const int8_t fwd_stage_range_col_adst_adst_32[12] = {
15, 15, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
};
static const int8_t fwd_stage_range_row_adst_adst_32[12] = {
16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
};
static const int8_t fwd_cos_bit_col_adst_adst_32[12] = {
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
};
static const int8_t fwd_cos_bit_row_adst_adst_32[12] = {
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
};
static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_32 = {
32, // .txfm_size
12, // .stage_num_col
12, // .stage_num_row
// 1, // .log_scale
fwd_shift_adst_adst_32, // .shift
fwd_stage_range_col_adst_adst_32, // .stage_range_col
fwd_stage_range_row_adst_adst_32, // .stage_range_row
fwd_cos_bit_col_adst_adst_32, // .cos_bit_col
fwd_cos_bit_row_adst_adst_32, // .cos_bit_row
TXFM_TYPE_ADST32, // .txfm_type_col
TXFM_TYPE_ADST32
}; // .txfm_type_row
// ---------------- config fwd_adst_dct_4 ----------------
static const int8_t fwd_shift_adst_dct_4[3] = { 2, 0, 0 };
static const int8_t fwd_stage_range_col_adst_dct_4[6] = {
15, 15, 16, 17, 17, 17
};
static const int8_t fwd_stage_range_row_adst_dct_4[4] = { 17, 18, 18, 18 };
static const int8_t fwd_cos_bit_col_adst_dct_4[6] = { 13, 13, 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_adst_dct_4[4] = { 13, 13, 13, 13 };
static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_4 = {
4, // .txfm_size
6, // .stage_num_col
4, // .stage_num_row
// 0, // .log_scale
fwd_shift_adst_dct_4, // .shift
fwd_stage_range_col_adst_dct_4, // .stage_range_col
fwd_stage_range_row_adst_dct_4, // .stage_range_row
fwd_cos_bit_col_adst_dct_4, // .cos_bit_col
fwd_cos_bit_row_adst_dct_4, // .cos_bit_row
TXFM_TYPE_ADST4, // .txfm_type_col
TXFM_TYPE_DCT4
}; // .txfm_type_row
// ---------------- config fwd_adst_dct_8 ----------------
static const int8_t fwd_shift_adst_dct_8[3] = { 2, -1, 0 };
static const int8_t fwd_stage_range_col_adst_dct_8[8] = { 15, 15, 16, 17,
17, 18, 18, 18 };
static const int8_t fwd_stage_range_row_adst_dct_8[6] = {
17, 18, 19, 19, 19, 19
};
static const int8_t fwd_cos_bit_col_adst_dct_8[8] = { 13, 13, 13, 13,
13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_adst_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_8 = {
8, // .txfm_size
8, // .stage_num_col
6, // .stage_num_row
// 0, // .log_scale
fwd_shift_adst_dct_8, // .shift
fwd_stage_range_col_adst_dct_8, // .stage_range_col
fwd_stage_range_row_adst_dct_8, // .stage_range_row
fwd_cos_bit_col_adst_dct_8, // .cos_bit_col
fwd_cos_bit_row_adst_dct_8, // .cos_bit_row
TXFM_TYPE_ADST8, // .txfm_type_col
TXFM_TYPE_DCT8
}; // .txfm_type_row
// ---------------- config fwd_adst_dct_16 ----------------
static const int8_t fwd_shift_adst_dct_16[3] = { 2, -2, 0 };
static const int8_t fwd_stage_range_col_adst_dct_16[10] = {
15, 15, 16, 17, 17, 18, 18, 19, 19, 19
};
static const int8_t fwd_stage_range_row_adst_dct_16[8] = { 17, 18, 19, 20,
20, 20, 20, 20 };
static const int8_t fwd_cos_bit_col_adst_dct_16[10] = { 13, 13, 13, 13, 13,
13, 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_adst_dct_16[8] = { 12, 12, 12, 12,
12, 12, 12, 12 };
static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_16 = {
16, // .txfm_size
10, // .stage_num_col
8, // .stage_num_row
// 0, // .log_scale
fwd_shift_adst_dct_16, // .shift
fwd_stage_range_col_adst_dct_16, // .stage_range_col
fwd_stage_range_row_adst_dct_16, // .stage_range_row
fwd_cos_bit_col_adst_dct_16, // .cos_bit_col
fwd_cos_bit_row_adst_dct_16, // .cos_bit_row
TXFM_TYPE_ADST16, // .txfm_type_col
TXFM_TYPE_DCT16
}; // .txfm_type_row
// ---------------- config fwd_adst_dct_32 ----------------
static const int8_t fwd_shift_adst_dct_32[3] = { 2, -4, 0 };
static const int8_t fwd_stage_range_col_adst_dct_32[12] = {
15, 15, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
};
static const int8_t fwd_stage_range_row_adst_dct_32[10] = {
16, 17, 18, 19, 20, 20, 20, 20, 20, 20
};
static const int8_t fwd_cos_bit_col_adst_dct_32[12] = {
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
};
static const int8_t fwd_cos_bit_row_adst_dct_32[10] = { 12, 12, 12, 12, 12,
12, 12, 12, 12, 12 };
static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_32 = {
32, // .txfm_size
12, // .stage_num_col
10, // .stage_num_row
// 1, // .log_scale
fwd_shift_adst_dct_32, // .shift
fwd_stage_range_col_adst_dct_32, // .stage_range_col
fwd_stage_range_row_adst_dct_32, // .stage_range_row
fwd_cos_bit_col_adst_dct_32, // .cos_bit_col
fwd_cos_bit_row_adst_dct_32, // .cos_bit_row
TXFM_TYPE_ADST32, // .txfm_type_col
TXFM_TYPE_DCT32
}; // .txfm_type_row
#endif // AV1_FWD_TXFM2D_CFG_H_

Просмотреть файл

@ -0,0 +1,312 @@
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AV1_INV_TXFM2D_CFG_H_
#define AV1_INV_TXFM2D_CFG_H_
#include "av1/common/av1_inv_txfm1d.h"
// ---------------- 4x4 1D config -----------------------
// shift
static const int8_t inv_shift_4[2] = { 0, -4 };
// stage range
static const int8_t inv_stage_range_col_dct_4[4] = { 18, 18, 17, 17 };
static const int8_t inv_stage_range_row_dct_4[4] = { 18, 18, 18, 18 };
static const int8_t inv_stage_range_col_adst_4[6] = { 18, 18, 18, 18, 17, 17 };
static const int8_t inv_stage_range_row_adst_4[6] = { 18, 18, 18, 18, 18, 18 };
// cos bit
static const int8_t inv_cos_bit_col_dct_4[4] = { 13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_dct_4[4] = { 13, 13, 13, 13 };
static const int8_t inv_cos_bit_col_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
// ---------------- 8x8 1D constants -----------------------
// shift
static const int8_t inv_shift_8[2] = { 0, -5 };
// stage range
static const int8_t inv_stage_range_col_dct_8[6] = { 19, 19, 19, 19, 18, 18 };
static const int8_t inv_stage_range_row_dct_8[6] = { 19, 19, 19, 19, 19, 19 };
static const int8_t inv_stage_range_col_adst_8[8] = { 19, 19, 19, 19,
19, 19, 18, 18 };
static const int8_t inv_stage_range_row_adst_8[8] = { 19, 19, 19, 19,
19, 19, 19, 19 };
// cos bit
static const int8_t inv_cos_bit_col_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
static const int8_t inv_cos_bit_col_adst_8[8] = {
13, 13, 13, 13, 13, 13, 13, 13
};
static const int8_t inv_cos_bit_row_adst_8[8] = {
13, 13, 13, 13, 13, 13, 13, 13
};
// ---------------- 16x16 1D constants -----------------------
// shift
static const int8_t inv_shift_16[2] = { -1, -5 };
// stage range
static const int8_t inv_stage_range_col_dct_16[8] = { 19, 19, 19, 19,
19, 19, 18, 18 };
static const int8_t inv_stage_range_row_dct_16[8] = { 20, 20, 20, 20,
20, 20, 20, 20 };
static const int8_t inv_stage_range_col_adst_16[10] = { 19, 19, 19, 19, 19,
19, 19, 19, 18, 18 };
static const int8_t inv_stage_range_row_adst_16[10] = { 20, 20, 20, 20, 20,
20, 20, 20, 20, 20 };
// cos bit
static const int8_t inv_cos_bit_col_dct_16[8] = {
13, 13, 13, 13, 13, 13, 13, 13
};
static const int8_t inv_cos_bit_row_dct_16[8] = {
12, 12, 12, 12, 12, 12, 12, 12
};
static const int8_t inv_cos_bit_col_adst_16[10] = { 13, 13, 13, 13, 13,
13, 13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_adst_16[10] = { 12, 12, 12, 12, 12,
12, 12, 12, 12, 12 };
// ---------------- 32x32 1D constants -----------------------
// shift
static const int8_t inv_shift_32[2] = { -1, -5 };
// stage range
static const int8_t inv_stage_range_col_dct_32[10] = { 19, 19, 19, 19, 19,
19, 19, 19, 18, 18 };
static const int8_t inv_stage_range_row_dct_32[10] = { 20, 20, 20, 20, 20,
20, 20, 20, 20, 20 };
static const int8_t inv_stage_range_col_adst_32[12] = {
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18
};
static const int8_t inv_stage_range_row_adst_32[12] = {
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
};
// cos bit
static const int8_t inv_cos_bit_col_dct_32[10] = { 13, 13, 13, 13, 13,
13, 13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_dct_32[10] = { 12, 12, 12, 12, 12,
12, 12, 12, 12, 12 };
static const int8_t inv_cos_bit_col_adst_32[12] = { 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_adst_32[12] = { 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12 };
// ---------------- 64x64 1D constants -----------------------
// shift
static const int8_t inv_shift_64[2] = { -1, -7 };
// stage range
static const int8_t inv_stage_range_col_dct_64[12] = { 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 18, 18 };
static const int8_t inv_stage_range_row_dct_64[12] = { 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20 };
// cos bit
static const int8_t inv_cos_bit_col_dct_64[12] = { 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_dct_64[12] = { 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12 };
// ---------------- row config inv_dct_4 ----------------
static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_4 = {
4, // .txfm_size
4, // .stage_num
// 0, // .log_scale
inv_shift_4, // .shift
inv_stage_range_row_dct_4, // .stage_range
inv_cos_bit_row_dct_4, // .cos_bit
TXFM_TYPE_DCT4 // .txfm_type
};
// ---------------- row config inv_dct_8 ----------------
static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_8 = {
8, // .txfm_size
6, // .stage_num
// 0, // .log_scale
inv_shift_8, // .shift
inv_stage_range_row_dct_8, // .stage_range
inv_cos_bit_row_dct_8, // .cos_bit_
TXFM_TYPE_DCT8 // .txfm_type
};
// ---------------- row config inv_dct_16 ----------------
static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_16 = {
16, // .txfm_size
8, // .stage_num
// 0, // .log_scale
inv_shift_16, // .shift
inv_stage_range_row_dct_16, // .stage_range
inv_cos_bit_row_dct_16, // .cos_bit
TXFM_TYPE_DCT16 // .txfm_type
};
// ---------------- row config inv_dct_32 ----------------
static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_32 = {
32, // .txfm_size
10, // .stage_num
// 1, // .log_scale
inv_shift_32, // .shift
inv_stage_range_row_dct_32, // .stage_range
inv_cos_bit_row_dct_32, // .cos_bit_row
TXFM_TYPE_DCT32 // .txfm_type
};
// ---------------- row config inv_dct_64 ----------------
static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_64 = {
64, // .txfm_size
12, // .stage_num
inv_shift_64, // .shift
inv_stage_range_row_dct_64, // .stage_range
inv_cos_bit_row_dct_64, // .cos_bit
TXFM_TYPE_DCT64, // .txfm_type_col
};
// ---------------- row config inv_adst_4 ----------------
static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_4 = {
4, // .txfm_size
6, // .stage_num
// 0, // .log_scale
inv_shift_4, // .shift
inv_stage_range_row_adst_4, // .stage_range
inv_cos_bit_row_adst_4, // .cos_bit
TXFM_TYPE_ADST4, // .txfm_type
};
// ---------------- row config inv_adst_8 ----------------
static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_8 = {
8, // .txfm_size
8, // .stage_num
// 0, // .log_scale
inv_shift_8, // .shift
inv_stage_range_row_adst_8, // .stage_range
inv_cos_bit_row_adst_8, // .cos_bit
TXFM_TYPE_ADST8, // .txfm_type_col
};
// ---------------- row config inv_adst_16 ----------------
static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_16 = {
16, // .txfm_size
10, // .stage_num
// 0, // .log_scale
inv_shift_16, // .shift
inv_stage_range_row_adst_16, // .stage_range
inv_cos_bit_row_adst_16, // .cos_bit
TXFM_TYPE_ADST16, // .txfm_type
};
// ---------------- row config inv_adst_32 ----------------
static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_32 = {
32, // .txfm_size
12, // .stage_num
// 1, // .log_scale
inv_shift_32, // .shift
inv_stage_range_row_adst_32, // .stage_range
inv_cos_bit_row_adst_32, // .cos_bit
TXFM_TYPE_ADST32, // .txfm_type
};
// ---------------- col config inv_dct_4 ----------------
static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_4 = {
4, // .txfm_size
4, // .stage_num
// 0, // .log_scale
inv_shift_4, // .shift
inv_stage_range_col_dct_4, // .stage_range
inv_cos_bit_col_dct_4, // .cos_bit
TXFM_TYPE_DCT4 // .txfm_type
};
// ---------------- col config inv_dct_8 ----------------
static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_8 = {
8, // .txfm_size
6, // .stage_num
// 0, // .log_scale
inv_shift_8, // .shift
inv_stage_range_col_dct_8, // .stage_range
inv_cos_bit_col_dct_8, // .cos_bit_
TXFM_TYPE_DCT8 // .txfm_type
};
// ---------------- col config inv_dct_16 ----------------
static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_16 = {
16, // .txfm_size
8, // .stage_num
// 0, // .log_scale
inv_shift_16, // .shift
inv_stage_range_col_dct_16, // .stage_range
inv_cos_bit_col_dct_16, // .cos_bit
TXFM_TYPE_DCT16 // .txfm_type
};
// ---------------- col config inv_dct_32 ----------------
static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_32 = {
32, // .txfm_size
10, // .stage_num
// 1, // .log_scale
inv_shift_32, // .shift
inv_stage_range_col_dct_32, // .stage_range
inv_cos_bit_col_dct_32, // .cos_bit_col
TXFM_TYPE_DCT32 // .txfm_type
};
// ---------------- col config inv_dct_64 ----------------
static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_64 = {
64, // .txfm_size
12, // .stage_num
inv_shift_64, // .shift
inv_stage_range_col_dct_64, // .stage_range
inv_cos_bit_col_dct_64, // .cos_bit
TXFM_TYPE_DCT64, // .txfm_type_col
};
// ---------------- col config inv_adst_4 ----------------
static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_4 = {
4, // .txfm_size
6, // .stage_num
// 0, // .log_scale
inv_shift_4, // .shift
inv_stage_range_col_adst_4, // .stage_range
inv_cos_bit_col_adst_4, // .cos_bit
TXFM_TYPE_ADST4, // .txfm_type
};
// ---------------- col config inv_adst_8 ----------------
static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_8 = {
8, // .txfm_size
8, // .stage_num
// 0, // .log_scale
inv_shift_8, // .shift
inv_stage_range_col_adst_8, // .stage_range
inv_cos_bit_col_adst_8, // .cos_bit
TXFM_TYPE_ADST8, // .txfm_type_col
};
// ---------------- col config inv_adst_16 ----------------
static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_16 = {
16, // .txfm_size
10, // .stage_num
// 0, // .log_scale
inv_shift_16, // .shift
inv_stage_range_col_adst_16, // .stage_range
inv_cos_bit_col_adst_16, // .cos_bit
TXFM_TYPE_ADST16, // .txfm_type
};
// ---------------- col config inv_adst_32 ----------------
static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_32 = {
32, // .txfm_size
12, // .stage_num
// 1, // .log_scale
inv_shift_32, // .shift
inv_stage_range_col_adst_32, // .stage_range
inv_cos_bit_col_adst_32, // .cos_bit
TXFM_TYPE_ADST32, // .txfm_type
};
#endif // AV1_INV_TXFM2D_CFG_H_

Просмотреть файл

@ -13,7 +13,7 @@
#include "av1/common/enums.h"
#include "av1/common/av1_txfm.h"
#include "av1/common/av1_inv_txfm1d.h"
#include "av1/common/av1_inv_txfm2d_cfg.h"
#include "av1/common/av1_inv_txfm1d_cfg.h"
static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {
switch (txfm_type) {
@ -29,77 +29,90 @@ static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {
}
}
static const TXFM_2D_CFG *inv_txfm_cfg_ls[TX_TYPES][TX_SIZES] = {
static const TXFM_1D_CFG *inv_txfm_col_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
// DCT
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_dct_dct_4, &inv_txfm_2d_cfg_dct_dct_8,
&inv_txfm_2d_cfg_dct_dct_16, &inv_txfm_2d_cfg_dct_dct_32 },
&inv_txfm_1d_col_cfg_dct_4, &inv_txfm_1d_col_cfg_dct_8,
&inv_txfm_1d_col_cfg_dct_16, &inv_txfm_1d_col_cfg_dct_32 },
// ADST
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8,
&inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 },
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8,
&inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 },
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
&inv_txfm_1d_col_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_8,
&inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32 },
#if CONFIG_EXT_TX
// FLIPADST
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8,
&inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 },
&inv_txfm_1d_col_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_8,
&inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32 },
// IDENTITY PLACEHOLDER
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8,
&inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 },
&inv_txfm_1d_col_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_8,
&inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32 },
#endif // CONFIG_EXT_TX
};
static const TXFM_1D_CFG *inv_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
// DCT
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
&inv_txfm_1d_row_cfg_dct_4, &inv_txfm_1d_row_cfg_dct_8,
&inv_txfm_1d_row_cfg_dct_16, &inv_txfm_1d_row_cfg_dct_32 },
// ADST
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
&inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_row_cfg_adst_8,
&inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32 },
#if CONFIG_EXT_TX
// FLIPADST
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
&inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_row_cfg_adst_8,
&inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32 },
// IDENTITY PLACEHOLDER
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_row_cfg_adst_8,
&inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32 },
#endif // CONFIG_EXT_TX
};
TXFM_2D_FLIP_CFG av1_get_inv_txfm_cfg(int tx_type, int tx_size) {
TXFM_2D_FLIP_CFG cfg;
set_flip_cfg(tx_type, &cfg);
cfg.cfg = inv_txfm_cfg_ls[tx_type][tx_size];
int tx_type_col = vtx_tab[tx_type];
int tx_type_row = htx_tab[tx_type];
// TODO(sarahparker) this is currently only implemented for
// square transforms
cfg.col_cfg = inv_txfm_col_cfg_ls[tx_type_col][tx_size];
cfg.row_cfg = inv_txfm_row_cfg_ls[tx_type_row][tx_size];
return cfg;
}
TXFM_2D_FLIP_CFG av1_get_inv_txfm_64x64_cfg(int tx_type) {
TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL };
TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL, NULL };
switch (tx_type) {
case DCT_DCT:
cfg.cfg = &inv_txfm_2d_cfg_dct_dct_64;
cfg.col_cfg = &inv_txfm_1d_col_cfg_dct_64;
cfg.row_cfg = &inv_txfm_1d_row_cfg_dct_64;
set_flip_cfg(tx_type, &cfg);
break;
default: assert(0);
@ -110,14 +123,15 @@ TXFM_2D_FLIP_CFG av1_get_inv_txfm_64x64_cfg(int tx_type) {
static INLINE void inv_txfm2d_add_c(const int32_t *input, int16_t *output,
int stride, TXFM_2D_FLIP_CFG *cfg,
int32_t *txfm_buf) {
const int txfm_size = cfg->cfg->txfm_size;
const int8_t *shift = cfg->cfg->shift;
const int8_t *stage_range_col = cfg->cfg->stage_range_col;
const int8_t *stage_range_row = cfg->cfg->stage_range_row;
const int8_t *cos_bit_col = cfg->cfg->cos_bit_col;
const int8_t *cos_bit_row = cfg->cfg->cos_bit_row;
const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->cfg->txfm_type_col);
const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->cfg->txfm_type_row);
// TODO(sarahparker) must correct for rectangular transforms in follow up
const int txfm_size = cfg->row_cfg->txfm_size;
const int8_t *shift = cfg->row_cfg->shift;
const int8_t *stage_range_col = cfg->col_cfg->stage_range;
const int8_t *stage_range_row = cfg->row_cfg->stage_range;
const int8_t *cos_bit_col = cfg->col_cfg->cos_bit;
const int8_t *cos_bit_row = cfg->row_cfg->cos_bit;
const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->col_cfg->txfm_type);
const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->row_cfg->txfm_type);
// txfm_buf's length is txfm_size * txfm_size + 2 * txfm_size
// it is used for intermediate data buffering
@ -165,7 +179,11 @@ static INLINE void inv_txfm2d_add_facade(const int32_t *input, uint16_t *output,
// int16_t*
TXFM_2D_FLIP_CFG cfg = av1_get_inv_txfm_cfg(tx_type, tx_size);
inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
clamp_block((int16_t *)output, cfg.cfg->txfm_size, stride, 0, (1 << bd) - 1);
// TODO(sarahparker) just using the cfg_row->txfm_size for now because
// we are assumint this is only used for square transforms. This will
// be adjusted in a follow up
clamp_block((int16_t *)output, cfg.row_cfg->txfm_size, stride, 0,
(1 << bd) - 1);
}
void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output,

Просмотреть файл

@ -1,445 +0,0 @@
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AV1_INV_TXFM2D_CFG_H_
#define AV1_INV_TXFM2D_CFG_H_
#include "av1/common/av1_inv_txfm1d.h"
// ---------------- config inv_dct_dct_4 ----------------
static const int8_t inv_shift_dct_dct_4[2] = { 0, -4 };
static const int8_t inv_stage_range_col_dct_dct_4[4] = { 18, 18, 17, 17 };
static const int8_t inv_stage_range_row_dct_dct_4[4] = { 18, 18, 18, 18 };
static const int8_t inv_cos_bit_col_dct_dct_4[4] = { 13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_dct_dct_4[4] = { 13, 13, 13, 13 };
static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_4 = {
4, // .txfm_size
4, // .stage_num_col
4, // .stage_num_row
// 0, // .log_scale
inv_shift_dct_dct_4, // .shift
inv_stage_range_col_dct_dct_4, // .stage_range_col
inv_stage_range_row_dct_dct_4, // .stage_range_row
inv_cos_bit_col_dct_dct_4, // .cos_bit_col
inv_cos_bit_row_dct_dct_4, // .cos_bit_row
TXFM_TYPE_DCT4, // .txfm_type_col
TXFM_TYPE_DCT4
}; // .txfm_type_row
// ---------------- config inv_dct_dct_8 ----------------
static const int8_t inv_shift_dct_dct_8[2] = { 0, -5 };
static const int8_t inv_stage_range_col_dct_dct_8[6] = {
19, 19, 19, 19, 18, 18
};
static const int8_t inv_stage_range_row_dct_dct_8[6] = {
19, 19, 19, 19, 19, 19
};
static const int8_t inv_cos_bit_col_dct_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_dct_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_8 = {
8, // .txfm_size
6, // .stage_num_col
6, // .stage_num_row
// 0, // .log_scale
inv_shift_dct_dct_8, // .shift
inv_stage_range_col_dct_dct_8, // .stage_range_col
inv_stage_range_row_dct_dct_8, // .stage_range_row
inv_cos_bit_col_dct_dct_8, // .cos_bit_col
inv_cos_bit_row_dct_dct_8, // .cos_bit_row
TXFM_TYPE_DCT8, // .txfm_type_col
TXFM_TYPE_DCT8
}; // .txfm_type_row
// ---------------- config inv_dct_dct_16 ----------------
static const int8_t inv_shift_dct_dct_16[2] = { -1, -5 };
static const int8_t inv_stage_range_col_dct_dct_16[8] = { 19, 19, 19, 19,
19, 19, 18, 18 };
static const int8_t inv_stage_range_row_dct_dct_16[8] = { 20, 20, 20, 20,
20, 20, 20, 20 };
static const int8_t inv_cos_bit_col_dct_dct_16[8] = { 13, 13, 13, 13,
13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_dct_dct_16[8] = { 12, 12, 12, 12,
12, 12, 12, 12 };
static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_16 = {
16, // .txfm_size
8, // .stage_num_col
8, // .stage_num_row
// 0, // .log_scale
inv_shift_dct_dct_16, // .shift
inv_stage_range_col_dct_dct_16, // .stage_range_col
inv_stage_range_row_dct_dct_16, // .stage_range_row
inv_cos_bit_col_dct_dct_16, // .cos_bit_col
inv_cos_bit_row_dct_dct_16, // .cos_bit_row
TXFM_TYPE_DCT16, // .txfm_type_col
TXFM_TYPE_DCT16
}; // .txfm_type_row
// ---------------- config inv_dct_dct_32 ----------------
static const int8_t inv_shift_dct_dct_32[2] = { -1, -5 };
static const int8_t inv_stage_range_col_dct_dct_32[10] = { 19, 19, 19, 19, 19,
19, 19, 19, 18, 18 };
static const int8_t inv_stage_range_row_dct_dct_32[10] = { 20, 20, 20, 20, 20,
20, 20, 20, 20, 20 };
static const int8_t inv_cos_bit_col_dct_dct_32[10] = { 13, 13, 13, 13, 13,
13, 13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_dct_dct_32[10] = { 12, 12, 12, 12, 12,
12, 12, 12, 12, 12 };
static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_32 = {
32, // .txfm_size
10, // .stage_num_col
10, // .stage_num_row
// 1, // .log_scale
inv_shift_dct_dct_32, // .shift
inv_stage_range_col_dct_dct_32, // .stage_range_col
inv_stage_range_row_dct_dct_32, // .stage_range_row
inv_cos_bit_col_dct_dct_32, // .cos_bit_col
inv_cos_bit_row_dct_dct_32, // .cos_bit_row
TXFM_TYPE_DCT32, // .txfm_type_col
TXFM_TYPE_DCT32
}; // .txfm_type_row
// ---------------- config inv_dct_dct_64 ----------------
static const int8_t inv_shift_dct_dct_64[2] = { -1, -7 };
static const int8_t inv_stage_range_col_dct_dct_64[12] = {
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18
};
static const int8_t inv_stage_range_row_dct_dct_64[12] = {
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
};
static const int8_t inv_cos_bit_col_dct_dct_64[12] = { 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_dct_dct_64[12] = { 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12 };
static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_64 = {
64, // .txfm_size
12, // .stage_num_col
12, // .stage_num_row
inv_shift_dct_dct_64, // .shift
inv_stage_range_col_dct_dct_64, // .stage_range_col
inv_stage_range_row_dct_dct_64, // .stage_range_row
inv_cos_bit_col_dct_dct_64, // .cos_bit_col
inv_cos_bit_row_dct_dct_64, // .cos_bit_row
TXFM_TYPE_DCT64, // .txfm_type_col
TXFM_TYPE_DCT64
}; // .txfm_type_row
// ---------------- config inv_dct_adst_4 ----------------
static const int8_t inv_shift_dct_adst_4[2] = { 0, -4 };
static const int8_t inv_stage_range_col_dct_adst_4[4] = { 18, 18, 17, 17 };
static const int8_t inv_stage_range_row_dct_adst_4[6] = {
18, 18, 18, 18, 18, 18
};
static const int8_t inv_cos_bit_col_dct_adst_4[4] = { 13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_dct_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_4 = {
4, // .txfm_size
4, // .stage_num_col
6, // .stage_num_row
// 0, // .log_scale
inv_shift_dct_adst_4, // .shift
inv_stage_range_col_dct_adst_4, // .stage_range_col
inv_stage_range_row_dct_adst_4, // .stage_range_row
inv_cos_bit_col_dct_adst_4, // .cos_bit_col
inv_cos_bit_row_dct_adst_4, // .cos_bit_row
TXFM_TYPE_DCT4, // .txfm_type_col
TXFM_TYPE_ADST4
}; // .txfm_type_row
// ---------------- config inv_dct_adst_8 ----------------
static const int8_t inv_shift_dct_adst_8[2] = { 0, -5 };
static const int8_t inv_stage_range_col_dct_adst_8[6] = {
19, 19, 19, 19, 18, 18
};
static const int8_t inv_stage_range_row_dct_adst_8[8] = { 19, 19, 19, 19,
19, 19, 19, 19 };
static const int8_t inv_cos_bit_col_dct_adst_8[6] = { 13, 13, 13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_dct_adst_8[8] = { 13, 13, 13, 13,
13, 13, 13, 13 };
static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_8 = {
8, // .txfm_size
6, // .stage_num_col
8, // .stage_num_row
// 0, // .log_scale
inv_shift_dct_adst_8, // .shift
inv_stage_range_col_dct_adst_8, // .stage_range_col
inv_stage_range_row_dct_adst_8, // .stage_range_row
inv_cos_bit_col_dct_adst_8, // .cos_bit_col
inv_cos_bit_row_dct_adst_8, // .cos_bit_row
TXFM_TYPE_DCT8, // .txfm_type_col
TXFM_TYPE_ADST8
}; // .txfm_type_row
// ---------------- config inv_dct_adst_16 ----------------
static const int8_t inv_shift_dct_adst_16[2] = { -1, -5 };
static const int8_t inv_stage_range_col_dct_adst_16[8] = { 19, 19, 19, 19,
19, 19, 18, 18 };
static const int8_t inv_stage_range_row_dct_adst_16[10] = {
20, 20, 20, 20, 20, 20, 20, 20, 20, 20
};
static const int8_t inv_cos_bit_col_dct_adst_16[8] = { 13, 13, 13, 13,
13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_dct_adst_16[10] = { 12, 12, 12, 12, 12,
12, 12, 12, 12, 12 };
static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_16 = {
16, // .txfm_size
8, // .stage_num_col
10, // .stage_num_row
// 0, // .log_scale
inv_shift_dct_adst_16, // .shift
inv_stage_range_col_dct_adst_16, // .stage_range_col
inv_stage_range_row_dct_adst_16, // .stage_range_row
inv_cos_bit_col_dct_adst_16, // .cos_bit_col
inv_cos_bit_row_dct_adst_16, // .cos_bit_row
TXFM_TYPE_DCT16, // .txfm_type_col
TXFM_TYPE_ADST16
}; // .txfm_type_row
// ---------------- config inv_dct_adst_32 ----------------
static const int8_t inv_shift_dct_adst_32[2] = { -1, -5 };
static const int8_t inv_stage_range_col_dct_adst_32[10] = {
19, 19, 19, 19, 19, 19, 19, 19, 18, 18
};
static const int8_t inv_stage_range_row_dct_adst_32[12] = {
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
};
static const int8_t inv_cos_bit_col_dct_adst_32[10] = { 13, 13, 13, 13, 13,
13, 13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_dct_adst_32[12] = {
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
};
static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_32 = {
32, // .txfm_size
10, // .stage_num_col
12, // .stage_num_row
// 1, // .log_scale
inv_shift_dct_adst_32, // .shift
inv_stage_range_col_dct_adst_32, // .stage_range_col
inv_stage_range_row_dct_adst_32, // .stage_range_row
inv_cos_bit_col_dct_adst_32, // .cos_bit_col
inv_cos_bit_row_dct_adst_32, // .cos_bit_row
TXFM_TYPE_DCT32, // .txfm_type_col
TXFM_TYPE_ADST32
}; // .txfm_type_row
// ---------------- config inv_adst_adst_4 ----------------
static const int8_t inv_shift_adst_adst_4[2] = { 0, -4 };
static const int8_t inv_stage_range_col_adst_adst_4[6] = { 18, 18, 18,
18, 17, 17 };
static const int8_t inv_stage_range_row_adst_adst_4[6] = { 18, 18, 18,
18, 18, 18 };
static const int8_t inv_cos_bit_col_adst_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_adst_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_4 = {
4, // .txfm_size
6, // .stage_num_col
6, // .stage_num_row
// 0, // .log_scale
inv_shift_adst_adst_4, // .shift
inv_stage_range_col_adst_adst_4, // .stage_range_col
inv_stage_range_row_adst_adst_4, // .stage_range_row
inv_cos_bit_col_adst_adst_4, // .cos_bit_col
inv_cos_bit_row_adst_adst_4, // .cos_bit_row
TXFM_TYPE_ADST4, // .txfm_type_col
TXFM_TYPE_ADST4
}; // .txfm_type_row
// ---------------- config inv_adst_adst_8 ----------------
static const int8_t inv_shift_adst_adst_8[2] = { 0, -5 };
static const int8_t inv_stage_range_col_adst_adst_8[8] = { 19, 19, 19, 19,
19, 19, 18, 18 };
static const int8_t inv_stage_range_row_adst_adst_8[8] = { 19, 19, 19, 19,
19, 19, 19, 19 };
static const int8_t inv_cos_bit_col_adst_adst_8[8] = { 13, 13, 13, 13,
13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_adst_adst_8[8] = { 13, 13, 13, 13,
13, 13, 13, 13 };
static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_8 = {
8, // .txfm_size
8, // .stage_num_col
8, // .stage_num_row
// 0, // .log_scale
inv_shift_adst_adst_8, // .shift
inv_stage_range_col_adst_adst_8, // .stage_range_col
inv_stage_range_row_adst_adst_8, // .stage_range_row
inv_cos_bit_col_adst_adst_8, // .cos_bit_col
inv_cos_bit_row_adst_adst_8, // .cos_bit_row
TXFM_TYPE_ADST8, // .txfm_type_col
TXFM_TYPE_ADST8
}; // .txfm_type_row
// ---------------- config inv_adst_adst_16 ----------------
static const int8_t inv_shift_adst_adst_16[2] = { -1, -5 };
static const int8_t inv_stage_range_col_adst_adst_16[10] = {
19, 19, 19, 19, 19, 19, 19, 19, 18, 18
};
static const int8_t inv_stage_range_row_adst_adst_16[10] = {
20, 20, 20, 20, 20, 20, 20, 20, 20, 20
};
static const int8_t inv_cos_bit_col_adst_adst_16[10] = { 13, 13, 13, 13, 13,
13, 13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_adst_adst_16[10] = { 12, 12, 12, 12, 12,
12, 12, 12, 12, 12 };
static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_16 = {
16, // .txfm_size
10, // .stage_num_col
10, // .stage_num_row
// 0, // .log_scale
inv_shift_adst_adst_16, // .shift
inv_stage_range_col_adst_adst_16, // .stage_range_col
inv_stage_range_row_adst_adst_16, // .stage_range_row
inv_cos_bit_col_adst_adst_16, // .cos_bit_col
inv_cos_bit_row_adst_adst_16, // .cos_bit_row
TXFM_TYPE_ADST16, // .txfm_type_col
TXFM_TYPE_ADST16
}; // .txfm_type_row
// ---------------- config inv_adst_adst_32 ----------------
static const int8_t inv_shift_adst_adst_32[2] = { -1, -5 };
static const int8_t inv_stage_range_col_adst_adst_32[12] = {
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18
};
static const int8_t inv_stage_range_row_adst_adst_32[12] = {
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
};
static const int8_t inv_cos_bit_col_adst_adst_32[12] = {
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13
};
static const int8_t inv_cos_bit_row_adst_adst_32[12] = {
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
};
static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_32 = {
32, // .txfm_size
12, // .stage_num_col
12, // .stage_num_row
// 1, // .log_scale
inv_shift_adst_adst_32, // .shift
inv_stage_range_col_adst_adst_32, // .stage_range_col
inv_stage_range_row_adst_adst_32, // .stage_range_row
inv_cos_bit_col_adst_adst_32, // .cos_bit_col
inv_cos_bit_row_adst_adst_32, // .cos_bit_row
TXFM_TYPE_ADST32, // .txfm_type_col
TXFM_TYPE_ADST32
}; // .txfm_type_row
// ---------------- config inv_adst_dct_4 ----------------
static const int8_t inv_shift_adst_dct_4[2] = { 0, -4 };
static const int8_t inv_stage_range_col_adst_dct_4[6] = {
18, 18, 18, 18, 17, 17
};
static const int8_t inv_stage_range_row_adst_dct_4[4] = { 18, 18, 18, 18 };
static const int8_t inv_cos_bit_col_adst_dct_4[6] = { 13, 13, 13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_adst_dct_4[4] = { 13, 13, 13, 13 };
static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_4 = {
4, // .txfm_size
6, // .stage_num_col
4, // .stage_num_row
// 0, // .log_scale
inv_shift_adst_dct_4, // .shift
inv_stage_range_col_adst_dct_4, // .stage_range_col
inv_stage_range_row_adst_dct_4, // .stage_range_row
inv_cos_bit_col_adst_dct_4, // .cos_bit_col
inv_cos_bit_row_adst_dct_4, // .cos_bit_row
TXFM_TYPE_ADST4, // .txfm_type_col
TXFM_TYPE_DCT4
}; // .txfm_type_row
// ---------------- config inv_adst_dct_8 ----------------
static const int8_t inv_shift_adst_dct_8[2] = { 0, -5 };
static const int8_t inv_stage_range_col_adst_dct_8[8] = { 19, 19, 19, 19,
19, 19, 18, 18 };
static const int8_t inv_stage_range_row_adst_dct_8[6] = {
19, 19, 19, 19, 19, 19
};
static const int8_t inv_cos_bit_col_adst_dct_8[8] = { 13, 13, 13, 13,
13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_adst_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_8 = {
8, // .txfm_size
8, // .stage_num_col
6, // .stage_num_row
// 0, // .log_scale
inv_shift_adst_dct_8, // .shift
inv_stage_range_col_adst_dct_8, // .stage_range_col
inv_stage_range_row_adst_dct_8, // .stage_range_row
inv_cos_bit_col_adst_dct_8, // .cos_bit_col
inv_cos_bit_row_adst_dct_8, // .cos_bit_row
TXFM_TYPE_ADST8, // .txfm_type_col
TXFM_TYPE_DCT8
}; // .txfm_type_row
// ---------------- config inv_adst_dct_16 ----------------
static const int8_t inv_shift_adst_dct_16[2] = { -1, -5 };
static const int8_t inv_stage_range_col_adst_dct_16[10] = {
19, 19, 19, 19, 19, 19, 19, 19, 18, 18
};
static const int8_t inv_stage_range_row_adst_dct_16[8] = { 20, 20, 20, 20,
20, 20, 20, 20 };
static const int8_t inv_cos_bit_col_adst_dct_16[10] = { 13, 13, 13, 13, 13,
13, 13, 13, 13, 13 };
static const int8_t inv_cos_bit_row_adst_dct_16[8] = { 12, 12, 12, 12,
12, 12, 12, 12 };
static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_16 = {
16, // .txfm_size
10, // .stage_num_col
8, // .stage_num_row
// 0, // .log_scale
inv_shift_adst_dct_16, // .shift
inv_stage_range_col_adst_dct_16, // .stage_range_col
inv_stage_range_row_adst_dct_16, // .stage_range_row
inv_cos_bit_col_adst_dct_16, // .cos_bit_col
inv_cos_bit_row_adst_dct_16, // .cos_bit_row
TXFM_TYPE_ADST16, // .txfm_type_col
TXFM_TYPE_DCT16
}; // .txfm_type_row
// ---------------- config inv_adst_dct_32 ----------------
static const int8_t inv_shift_adst_dct_32[2] = { -1, -5 };
static const int8_t inv_stage_range_col_adst_dct_32[12] = {
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18
};
static const int8_t inv_stage_range_row_adst_dct_32[10] = {
20, 20, 20, 20, 20, 20, 20, 20, 20, 20
};
static const int8_t inv_cos_bit_col_adst_dct_32[12] = {
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13
};
static const int8_t inv_cos_bit_row_adst_dct_32[10] = { 12, 12, 12, 12, 12,
12, 12, 12, 12, 12 };
static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_32 = {
32, // .txfm_size
12, // .stage_num_col
10, // .stage_num_row
// 1, // .log_scale
inv_shift_adst_dct_32, // .shift
inv_stage_range_col_adst_dct_32, // .stage_range_col
inv_stage_range_row_adst_dct_32, // .stage_range_row
inv_cos_bit_col_adst_dct_32, // .cos_bit_col
inv_cos_bit_row_adst_dct_32, // .cos_bit_row
TXFM_TYPE_ADST32, // .txfm_type_col
TXFM_TYPE_DCT32
}; // .txfm_type_row
#endif // AV1_INV_TXFM2D_CFG_H_

Просмотреть файл

@ -145,24 +145,21 @@ typedef enum TXFM_TYPE {
TXFM_TYPE_ADST32,
} TXFM_TYPE;
typedef struct TXFM_2D_CFG {
typedef struct TXFM_1D_CFG {
const int txfm_size;
const int stage_num_col;
const int stage_num_row;
const int stage_num;
const int8_t *shift;
const int8_t *stage_range_col;
const int8_t *stage_range_row;
const int8_t *cos_bit_col;
const int8_t *cos_bit_row;
const TXFM_TYPE txfm_type_col;
const TXFM_TYPE txfm_type_row;
} TXFM_2D_CFG;
const int8_t *stage_range;
const int8_t *cos_bit;
const TXFM_TYPE txfm_type;
} TXFM_1D_CFG;
typedef struct TXFM_2D_FLIP_CFG {
int ud_flip; // flip upside down
int lr_flip; // flip left to right
const TXFM_2D_CFG *cfg;
const TXFM_1D_CFG *col_cfg;
const TXFM_1D_CFG *row_cfg;
} TXFM_2D_FLIP_CFG;
static INLINE void set_flip_cfg(int tx_type, TXFM_2D_FLIP_CFG *cfg) {
@ -176,10 +173,12 @@ static INLINE void set_flip_cfg(int tx_type, TXFM_2D_FLIP_CFG *cfg) {
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
case FLIPADST_ADST:
cfg->ud_flip = 1;
cfg->lr_flip = 0;
break;
case DCT_FLIPADST:
case ADST_FLIPADST:
cfg->ud_flip = 0;
cfg->lr_flip = 1;
break;
@ -187,14 +186,6 @@ static INLINE void set_flip_cfg(int tx_type, TXFM_2D_FLIP_CFG *cfg) {
cfg->ud_flip = 1;
cfg->lr_flip = 1;
break;
case ADST_FLIPADST:
cfg->ud_flip = 0;
cfg->lr_flip = 1;
break;
case FLIPADST_ADST:
cfg->ud_flip = 1;
cfg->lr_flip = 0;
break;
#endif // CONFIG_EXT_TX
default:
cfg->ud_flip = 0;

Просмотреть файл

@ -491,6 +491,22 @@ static const TX_SIZE max_txsize_rect_lookup[BLOCK_SIZES] = {
#define max_txsize_rect_lookup max_txsize_lookup
#endif // CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
static const TX_TYPE_1D vtx_tab[TX_TYPES] = {
DCT_1D, ADST_1D, DCT_1D, ADST_1D,
#if CONFIG_EXT_TX
FLIPADST_1D, DCT_1D, FLIPADST_1D, ADST_1D, FLIPADST_1D, IDTX_1D,
DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D, IDTX_1D,
#endif // CONFIG_EXT_TX
};
static const TX_TYPE_1D htx_tab[TX_TYPES] = {
DCT_1D, DCT_1D, ADST_1D, ADST_1D,
#if CONFIG_EXT_TX
DCT_1D, FLIPADST_1D, FLIPADST_1D, FLIPADST_1D, ADST_1D, IDTX_1D,
IDTX_1D, DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D,
#endif // CONFIG_EXT_TX
};
#if CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
// Same as "max_txsize_lookup[bsize] - TX_8X8", except for rectangular
// block which may use a rectangular transform, in which case it is

Просмотреть файл

@ -15,7 +15,7 @@
#include "./av1_rtcd.h"
#include "aom_dsp/inv_txfm.h"
#include "aom_ports/mem.h"
#include "av1/common/av1_inv_txfm2d_cfg.h"
#include "av1/common/av1_inv_txfm1d_cfg.h"
#include "av1/common/blockd.h"
#include "av1/common/enums.h"
#include "av1/common/idct.h"
@ -85,8 +85,7 @@ static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
int32_t in[64], out[64];
int i;
for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
av1_idct64_new(in, out, inv_cos_bit_col_dct_dct_64,
inv_stage_range_col_dct_dct_64);
av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
@ -94,8 +93,7 @@ static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
int32_t in[64], out[64];
int i;
for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
av1_idct64_new(in, out, inv_cos_bit_row_dct_dct_64,
inv_stage_range_row_dct_dct_64);
av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
@ -196,8 +194,7 @@ static void highbd_idct64_col_c(const tran_low_t *input, tran_low_t *output,
int i;
(void)bd;
for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
av1_idct64_new(in, out, inv_cos_bit_col_dct_dct_64,
inv_stage_range_col_dct_dct_64);
av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
@ -207,8 +204,7 @@ static void highbd_idct64_row_c(const tran_low_t *input, tran_low_t *output,
int i;
(void)bd;
for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
av1_idct64_new(in, out, inv_cos_bit_row_dct_dct_64,
inv_stage_range_row_dct_dct_64);
av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
#endif // CONFIG_TX64X64

Просмотреть файл

@ -37,16 +37,20 @@ static INLINE TxfmFuncSSE2 fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
}
static INLINE void fwd_txfm2d_sse4_1(const int16_t *input, int32_t *output,
const int stride, const TXFM_2D_CFG *cfg,
const int stride,
const TXFM_2D_FLIP_CFG *cfg,
int32_t *txfm_buf) {
const int txfm_size = cfg->txfm_size;
const int8_t *shift = cfg->shift;
const int8_t *stage_range_col = cfg->stage_range_col;
const int8_t *stage_range_row = cfg->stage_range_row;
const int8_t *cos_bit_col = cfg->cos_bit_col;
const int8_t *cos_bit_row = cfg->cos_bit_row;
const TxfmFuncSSE2 txfm_func_col = fwd_txfm_type_to_func(cfg->txfm_type_col);
const TxfmFuncSSE2 txfm_func_row = fwd_txfm_type_to_func(cfg->txfm_type_row);
// TODO(sarahparker) must correct for rectangular transforms in follow up
const int txfm_size = cfg->row_cfg->txfm_size;
const int8_t *shift = cfg->row_cfg->shift;
const int8_t *stage_range_col = cfg->col_cfg->stage_range;
const int8_t *stage_range_row = cfg->row_cfg->stage_range;
const int8_t *cos_bit_col = cfg->col_cfg->cos_bit;
const int8_t *cos_bit_row = cfg->row_cfg->cos_bit;
const TxfmFuncSSE2 txfm_func_col =
fwd_txfm_type_to_func(cfg->col_cfg->txfm_type);
const TxfmFuncSSE2 txfm_func_row =
fwd_txfm_type_to_func(cfg->row_cfg->txfm_type);
__m128i *buf_128 = (__m128i *)txfm_buf;
__m128i *out_128 = (__m128i *)output;
@ -69,7 +73,7 @@ void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output,
DECLARE_ALIGNED(16, int32_t, txfm_buf[1024]);
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_32X32);
(void)bd;
fwd_txfm2d_sse4_1(input, output, stride, cfg.cfg, txfm_buf);
fwd_txfm2d_sse4_1(input, output, stride, &cfg, txfm_buf);
}
void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
@ -77,5 +81,5 @@ void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
DECLARE_ALIGNED(16, int32_t, txfm_buf[4096]);
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_64x64_cfg(tx_type);
(void)bd;
fwd_txfm2d_sse4_1(input, output, stride, cfg.cfg, txfm_buf);
fwd_txfm2d_sse4_1(input, output, stride, &cfg, txfm_buf);
}

Просмотреть файл

@ -13,7 +13,7 @@
#include "./av1_rtcd.h"
#include "./aom_config.h"
#include "av1/common/av1_inv_txfm2d_cfg.h"
#include "av1/common/av1_inv_txfm1d_cfg.h"
// Note:
// Total 32x4 registers to represent 32x32 block coefficients.
@ -601,18 +601,20 @@ static void idct32_avx2(__m256i *in, __m256i *out, int bit) {
void av1_inv_txfm2d_add_32x32_avx2(const int32_t *coeff, uint16_t *output,
int stride, int tx_type, int bd) {
__m256i in[128], out[128];
const TXFM_2D_CFG *cfg = NULL;
const TXFM_1D_CFG *row_cfg = NULL;
const TXFM_1D_CFG *col_cfg = NULL;
switch (tx_type) {
case DCT_DCT:
cfg = &inv_txfm_2d_cfg_dct_dct_32;
row_cfg = &inv_txfm_1d_row_cfg_dct_32;
col_cfg = &inv_txfm_1d_col_cfg_dct_32;
load_buffer_32x32(coeff, in);
transpose_32x32(in, out);
idct32_avx2(out, in, cfg->cos_bit_row[2]);
round_shift_32x32(in, -cfg->shift[0]);
idct32_avx2(out, in, row_cfg->cos_bit[2]);
round_shift_32x32(in, -row_cfg->shift[0]);
transpose_32x32(in, out);
idct32_avx2(out, in, cfg->cos_bit_col[2]);
write_buffer_32x32(in, output, stride, 0, 0, -cfg->shift[1], bd);
idct32_avx2(out, in, col_cfg->cos_bit[2]);
write_buffer_32x32(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
break;
default: assert(0);
}

Просмотреть файл

@ -13,7 +13,7 @@
#include "./av1_rtcd.h"
#include "./aom_config.h"
#include "av1/common/av1_inv_txfm2d_cfg.h"
#include "av1/common/av1_inv_txfm1d_cfg.h"
#include "av1/common/x86/highbd_txfm_utility_sse4.h"
static INLINE void load_buffer_4x4(const int32_t *coeff, __m128i *in) {
@ -232,72 +232,82 @@ static void write_buffer_4x4(__m128i *in, uint16_t *output, int stride,
void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *coeff, uint16_t *output,
int stride, int tx_type, int bd) {
__m128i in[4];
const TXFM_2D_CFG *cfg = NULL;
const TXFM_1D_CFG *row_cfg = NULL;
const TXFM_1D_CFG *col_cfg = NULL;
switch (tx_type) {
case DCT_DCT:
cfg = &inv_txfm_2d_cfg_dct_dct_4;
row_cfg = &inv_txfm_1d_row_cfg_dct_4;
col_cfg = &inv_txfm_1d_col_cfg_dct_4;
load_buffer_4x4(coeff, in);
idct4x4_sse4_1(in, cfg->cos_bit_row[2]);
idct4x4_sse4_1(in, cfg->cos_bit_col[2]);
write_buffer_4x4(in, output, stride, 0, 0, -cfg->shift[1], bd);
idct4x4_sse4_1(in, row_cfg->cos_bit[2]);
idct4x4_sse4_1(in, col_cfg->cos_bit[2]);
write_buffer_4x4(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
break;
case ADST_DCT:
cfg = &inv_txfm_2d_cfg_adst_dct_4;
row_cfg = &inv_txfm_1d_row_cfg_dct_4;
col_cfg = &inv_txfm_1d_col_cfg_adst_4;
load_buffer_4x4(coeff, in);
idct4x4_sse4_1(in, cfg->cos_bit_row[2]);
iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
write_buffer_4x4(in, output, stride, 0, 0, -cfg->shift[1], bd);
idct4x4_sse4_1(in, row_cfg->cos_bit[2]);
iadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
write_buffer_4x4(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
break;
case DCT_ADST:
cfg = &inv_txfm_2d_cfg_dct_adst_4;
row_cfg = &inv_txfm_1d_row_cfg_adst_4;
col_cfg = &inv_txfm_1d_col_cfg_dct_4;
load_buffer_4x4(coeff, in);
iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
idct4x4_sse4_1(in, cfg->cos_bit_col[2]);
write_buffer_4x4(in, output, stride, 0, 0, -cfg->shift[1], bd);
iadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
idct4x4_sse4_1(in, col_cfg->cos_bit[2]);
write_buffer_4x4(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
break;
case ADST_ADST:
cfg = &inv_txfm_2d_cfg_adst_adst_4;
row_cfg = &inv_txfm_1d_row_cfg_adst_4;
col_cfg = &inv_txfm_1d_col_cfg_adst_4;
load_buffer_4x4(coeff, in);
iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
write_buffer_4x4(in, output, stride, 0, 0, -cfg->shift[1], bd);
iadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
iadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
write_buffer_4x4(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
cfg = &inv_txfm_2d_cfg_adst_dct_4;
row_cfg = &inv_txfm_1d_row_cfg_dct_4;
col_cfg = &inv_txfm_1d_col_cfg_adst_4;
load_buffer_4x4(coeff, in);
idct4x4_sse4_1(in, cfg->cos_bit_row[2]);
iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
write_buffer_4x4(in, output, stride, 0, 1, -cfg->shift[1], bd);
idct4x4_sse4_1(in, row_cfg->cos_bit[2]);
iadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
write_buffer_4x4(in, output, stride, 0, 1, -row_cfg->shift[1], bd);
break;
case DCT_FLIPADST:
cfg = &inv_txfm_2d_cfg_dct_adst_4;
row_cfg = &inv_txfm_1d_row_cfg_adst_4;
col_cfg = &inv_txfm_1d_col_cfg_dct_4;
load_buffer_4x4(coeff, in);
iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
idct4x4_sse4_1(in, cfg->cos_bit_col[2]);
write_buffer_4x4(in, output, stride, 1, 0, -cfg->shift[1], bd);
iadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
idct4x4_sse4_1(in, col_cfg->cos_bit[2]);
write_buffer_4x4(in, output, stride, 1, 0, -row_cfg->shift[1], bd);
break;
case FLIPADST_FLIPADST:
cfg = &inv_txfm_2d_cfg_adst_adst_4;
row_cfg = &inv_txfm_1d_row_cfg_adst_4;
col_cfg = &inv_txfm_1d_col_cfg_adst_4;
load_buffer_4x4(coeff, in);
iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
write_buffer_4x4(in, output, stride, 1, 1, -cfg->shift[1], bd);
iadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
iadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
write_buffer_4x4(in, output, stride, 1, 1, -row_cfg->shift[1], bd);
break;
case ADST_FLIPADST:
cfg = &inv_txfm_2d_cfg_adst_adst_4;
row_cfg = &inv_txfm_1d_row_cfg_adst_4;
col_cfg = &inv_txfm_1d_col_cfg_adst_4;
load_buffer_4x4(coeff, in);
iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
write_buffer_4x4(in, output, stride, 1, 0, -cfg->shift[1], bd);
iadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
iadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
write_buffer_4x4(in, output, stride, 1, 0, -row_cfg->shift[1], bd);
break;
case FLIPADST_ADST:
cfg = &inv_txfm_2d_cfg_adst_adst_4;
row_cfg = &inv_txfm_1d_row_cfg_adst_4;
col_cfg = &inv_txfm_1d_col_cfg_adst_4;
load_buffer_4x4(coeff, in);
iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
write_buffer_4x4(in, output, stride, 0, 1, -cfg->shift[1], bd);
iadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
iadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
write_buffer_4x4(in, output, stride, 0, 1, -row_cfg->shift[1], bd);
break;
#endif // CONFIG_EXT_TX
default: assert(0);
@ -698,90 +708,100 @@ static void write_buffer_8x8(__m128i *in, uint16_t *output, int stride,
void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *coeff, uint16_t *output,
int stride, int tx_type, int bd) {
__m128i in[16], out[16];
const TXFM_2D_CFG *cfg = NULL;
const TXFM_1D_CFG *row_cfg = NULL;
const TXFM_1D_CFG *col_cfg = NULL;
switch (tx_type) {
case DCT_DCT:
cfg = &inv_txfm_2d_cfg_dct_dct_8;
row_cfg = &inv_txfm_1d_row_cfg_dct_8;
col_cfg = &inv_txfm_1d_col_cfg_dct_8;
load_buffer_8x8(coeff, in);
transpose_8x8(in, out);
idct8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
idct8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
transpose_8x8(in, out);
idct8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
write_buffer_8x8(in, output, stride, 0, 0, -cfg->shift[1], bd);
idct8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_8x8(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
break;
case DCT_ADST:
cfg = &inv_txfm_2d_cfg_dct_adst_8;
row_cfg = &inv_txfm_1d_row_cfg_adst_8;
col_cfg = &inv_txfm_1d_col_cfg_dct_8;
load_buffer_8x8(coeff, in);
transpose_8x8(in, out);
iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
transpose_8x8(in, out);
idct8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
write_buffer_8x8(in, output, stride, 0, 0, -cfg->shift[1], bd);
idct8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_8x8(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
break;
case ADST_DCT:
cfg = &inv_txfm_2d_cfg_adst_dct_8;
row_cfg = &inv_txfm_1d_row_cfg_dct_8;
col_cfg = &inv_txfm_1d_col_cfg_adst_8;
load_buffer_8x8(coeff, in);
transpose_8x8(in, out);
idct8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
idct8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
transpose_8x8(in, out);
iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
write_buffer_8x8(in, output, stride, 0, 0, -cfg->shift[1], bd);
iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_8x8(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
break;
case ADST_ADST:
cfg = &inv_txfm_2d_cfg_adst_adst_8;
row_cfg = &inv_txfm_1d_row_cfg_adst_8;
col_cfg = &inv_txfm_1d_col_cfg_adst_8;
load_buffer_8x8(coeff, in);
transpose_8x8(in, out);
iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
transpose_8x8(in, out);
iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
write_buffer_8x8(in, output, stride, 0, 0, -cfg->shift[1], bd);
iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_8x8(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
cfg = &inv_txfm_2d_cfg_adst_dct_8;
row_cfg = &inv_txfm_1d_row_cfg_dct_8;
col_cfg = &inv_txfm_1d_col_cfg_adst_8;
load_buffer_8x8(coeff, in);
transpose_8x8(in, out);
idct8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
idct8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
transpose_8x8(in, out);
iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
write_buffer_8x8(in, output, stride, 0, 1, -cfg->shift[1], bd);
iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_8x8(in, output, stride, 0, 1, -row_cfg->shift[1], bd);
break;
case DCT_FLIPADST:
cfg = &inv_txfm_2d_cfg_dct_adst_8;
row_cfg = &inv_txfm_1d_row_cfg_adst_8;
col_cfg = &inv_txfm_1d_col_cfg_dct_8;
load_buffer_8x8(coeff, in);
transpose_8x8(in, out);
iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
transpose_8x8(in, out);
idct8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
write_buffer_8x8(in, output, stride, 1, 0, -cfg->shift[1], bd);
idct8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_8x8(in, output, stride, 1, 0, -row_cfg->shift[1], bd);
break;
case ADST_FLIPADST:
cfg = &inv_txfm_2d_cfg_adst_adst_8;
row_cfg = &inv_txfm_1d_row_cfg_adst_8;
col_cfg = &inv_txfm_1d_col_cfg_adst_8;
load_buffer_8x8(coeff, in);
transpose_8x8(in, out);
iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
transpose_8x8(in, out);
iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
write_buffer_8x8(in, output, stride, 1, 0, -cfg->shift[1], bd);
iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_8x8(in, output, stride, 1, 0, -row_cfg->shift[1], bd);
break;
case FLIPADST_FLIPADST:
cfg = &inv_txfm_2d_cfg_adst_adst_8;
row_cfg = &inv_txfm_1d_row_cfg_adst_8;
col_cfg = &inv_txfm_1d_col_cfg_adst_8;
load_buffer_8x8(coeff, in);
transpose_8x8(in, out);
iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
transpose_8x8(in, out);
iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
write_buffer_8x8(in, output, stride, 1, 1, -cfg->shift[1], bd);
iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_8x8(in, output, stride, 1, 1, -row_cfg->shift[1], bd);
break;
case FLIPADST_ADST:
cfg = &inv_txfm_2d_cfg_adst_adst_8;
row_cfg = &inv_txfm_1d_row_cfg_adst_8;
col_cfg = &inv_txfm_1d_col_cfg_adst_8;
load_buffer_8x8(coeff, in);
transpose_8x8(in, out);
iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
transpose_8x8(in, out);
iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
write_buffer_8x8(in, output, stride, 0, 1, -cfg->shift[1], bd);
iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_8x8(in, output, stride, 0, 1, -row_cfg->shift[1], bd);
break;
#endif // CONFIG_EXT_TX
default: assert(0);
@ -1298,99 +1318,109 @@ static void round_shift_16x16(__m128i *in, int shift) {
void av1_inv_txfm2d_add_16x16_sse4_1(const int32_t *coeff, uint16_t *output,
int stride, int tx_type, int bd) {
__m128i in[64], out[64];
const TXFM_2D_CFG *cfg = NULL;
const TXFM_1D_CFG *row_cfg = NULL;
const TXFM_1D_CFG *col_cfg = NULL;
switch (tx_type) {
case DCT_DCT:
cfg = &inv_txfm_2d_cfg_dct_dct_16;
row_cfg = &inv_txfm_1d_row_cfg_dct_16;
col_cfg = &inv_txfm_1d_col_cfg_dct_16;
load_buffer_16x16(coeff, in);
transpose_16x16(in, out);
idct16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
round_shift_16x16(in, -cfg->shift[0]);
idct16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
round_shift_16x16(in, -row_cfg->shift[0]);
transpose_16x16(in, out);
idct16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
write_buffer_16x16(in, output, stride, 0, 0, -cfg->shift[1], bd);
idct16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_16x16(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
break;
case DCT_ADST:
cfg = &inv_txfm_2d_cfg_dct_adst_16;
row_cfg = &inv_txfm_1d_row_cfg_adst_16;
col_cfg = &inv_txfm_1d_col_cfg_dct_16;
load_buffer_16x16(coeff, in);
transpose_16x16(in, out);
iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
round_shift_16x16(in, -cfg->shift[0]);
iadst16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
round_shift_16x16(in, -row_cfg->shift[0]);
transpose_16x16(in, out);
idct16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
write_buffer_16x16(in, output, stride, 0, 0, -cfg->shift[1], bd);
idct16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_16x16(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
break;
case ADST_DCT:
cfg = &inv_txfm_2d_cfg_adst_dct_16;
row_cfg = &inv_txfm_1d_row_cfg_dct_16;
col_cfg = &inv_txfm_1d_col_cfg_adst_16;
load_buffer_16x16(coeff, in);
transpose_16x16(in, out);
idct16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
round_shift_16x16(in, -cfg->shift[0]);
idct16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
round_shift_16x16(in, -row_cfg->shift[0]);
transpose_16x16(in, out);
iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
write_buffer_16x16(in, output, stride, 0, 0, -cfg->shift[1], bd);
iadst16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_16x16(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
break;
case ADST_ADST:
cfg = &inv_txfm_2d_cfg_adst_adst_16;
row_cfg = &inv_txfm_1d_row_cfg_adst_16;
col_cfg = &inv_txfm_1d_col_cfg_adst_16;
load_buffer_16x16(coeff, in);
transpose_16x16(in, out);
iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
round_shift_16x16(in, -cfg->shift[0]);
iadst16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
round_shift_16x16(in, -row_cfg->shift[0]);
transpose_16x16(in, out);
iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
write_buffer_16x16(in, output, stride, 0, 0, -cfg->shift[1], bd);
iadst16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_16x16(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
cfg = &inv_txfm_2d_cfg_adst_dct_16;
row_cfg = &inv_txfm_1d_row_cfg_dct_16;
col_cfg = &inv_txfm_1d_col_cfg_adst_16;
load_buffer_16x16(coeff, in);
transpose_16x16(in, out);
idct16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
round_shift_16x16(in, -cfg->shift[0]);
idct16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
round_shift_16x16(in, -row_cfg->shift[0]);
transpose_16x16(in, out);
iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
write_buffer_16x16(in, output, stride, 0, 1, -cfg->shift[1], bd);
iadst16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_16x16(in, output, stride, 0, 1, -row_cfg->shift[1], bd);
break;
case DCT_FLIPADST:
cfg = &inv_txfm_2d_cfg_dct_adst_16;
row_cfg = &inv_txfm_1d_row_cfg_adst_16;
col_cfg = &inv_txfm_1d_col_cfg_dct_16;
load_buffer_16x16(coeff, in);
transpose_16x16(in, out);
iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
round_shift_16x16(in, -cfg->shift[0]);
iadst16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
round_shift_16x16(in, -row_cfg->shift[0]);
transpose_16x16(in, out);
idct16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
write_buffer_16x16(in, output, stride, 1, 0, -cfg->shift[1], bd);
idct16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_16x16(in, output, stride, 1, 0, -row_cfg->shift[1], bd);
break;
case ADST_FLIPADST:
cfg = &inv_txfm_2d_cfg_adst_adst_16;
row_cfg = &inv_txfm_1d_row_cfg_adst_16;
col_cfg = &inv_txfm_1d_col_cfg_adst_16;
load_buffer_16x16(coeff, in);
transpose_16x16(in, out);
iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
round_shift_16x16(in, -cfg->shift[0]);
iadst16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
round_shift_16x16(in, -row_cfg->shift[0]);
transpose_16x16(in, out);
iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
write_buffer_16x16(in, output, stride, 1, 0, -cfg->shift[1], bd);
iadst16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_16x16(in, output, stride, 1, 0, -row_cfg->shift[1], bd);
break;
case FLIPADST_FLIPADST:
cfg = &inv_txfm_2d_cfg_adst_adst_16;
row_cfg = &inv_txfm_1d_row_cfg_adst_16;
col_cfg = &inv_txfm_1d_col_cfg_adst_16;
load_buffer_16x16(coeff, in);
transpose_16x16(in, out);
iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
round_shift_16x16(in, -cfg->shift[0]);
iadst16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
round_shift_16x16(in, -row_cfg->shift[0]);
transpose_16x16(in, out);
iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
write_buffer_16x16(in, output, stride, 1, 1, -cfg->shift[1], bd);
iadst16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_16x16(in, output, stride, 1, 1, -row_cfg->shift[1], bd);
break;
case FLIPADST_ADST:
cfg = &inv_txfm_2d_cfg_adst_adst_16;
row_cfg = &inv_txfm_1d_row_cfg_adst_16;
col_cfg = &inv_txfm_1d_col_cfg_adst_16;
load_buffer_16x16(coeff, in);
transpose_16x16(in, out);
iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
round_shift_16x16(in, -cfg->shift[0]);
iadst16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
round_shift_16x16(in, -row_cfg->shift[0]);
transpose_16x16(in, out);
iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
write_buffer_16x16(in, output, stride, 0, 1, -cfg->shift[1], bd);
iadst16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_16x16(in, output, stride, 0, 1, -row_cfg->shift[1], bd);
break;
#endif
default: assert(0);

Просмотреть файл

@ -19,7 +19,7 @@
#include "aom_ports/mem.h"
#include "av1/common/blockd.h"
#include "av1/common/av1_fwd_txfm1d.h"
#include "av1/common/av1_fwd_txfm2d_cfg.h"
#include "av1/common/av1_fwd_txfm1d_cfg.h"
#include "av1/common/idct.h"
static INLINE void range_check(const tran_low_t *input, const int size,
@ -2133,8 +2133,7 @@ static void fdct64_col(const tran_low_t *input, tran_low_t *output) {
int32_t in[64], out[64];
int i;
for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
av1_fdct64_new(in, out, fwd_cos_bit_col_dct_dct_64,
fwd_stage_range_col_dct_dct_64);
av1_fdct64_new(in, out, fwd_cos_bit_col_dct_64, fwd_stage_range_col_dct_64);
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
@ -2142,8 +2141,7 @@ static void fdct64_row(const tran_low_t *input, tran_low_t *output) {
int32_t in[64], out[64];
int i;
for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
av1_fdct64_new(in, out, fwd_cos_bit_row_dct_dct_64,
fwd_stage_range_row_dct_dct_64);
av1_fdct64_new(in, out, fwd_cos_bit_row_dct_64, fwd_stage_range_row_dct_64);
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

Просмотреть файл

@ -418,22 +418,6 @@ static INLINE int write_uniform_cost(int n, int v) {
#define FAST_EXT_TX_CORR_MARGIN 0.5
#define FAST_EXT_TX_EDST_MARGIN 0.3
static const TX_TYPE_1D vtx_tab[TX_TYPES] = {
DCT_1D, ADST_1D, DCT_1D, ADST_1D,
#if CONFIG_EXT_TX
FLIPADST_1D, DCT_1D, FLIPADST_1D, ADST_1D, FLIPADST_1D, IDTX_1D,
DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D, IDTX_1D,
#endif // CONFIG_EXT_TX
};
static const TX_TYPE_1D htx_tab[TX_TYPES] = {
DCT_1D, DCT_1D, ADST_1D, ADST_1D,
#if CONFIG_EXT_TX
DCT_1D, FLIPADST_1D, FLIPADST_1D, FLIPADST_1D, ADST_1D, IDTX_1D,
IDTX_1D, DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D,
#endif // CONFIG_EXT_TX
};
#if CONFIG_DAALA_DIST
static int od_compute_var_4x4(od_coeff *x, int stride) {
int sum;

Просмотреть файл

@ -13,7 +13,7 @@
#include "./av1_rtcd.h"
#include "./aom_config.h"
#include "av1/common/av1_fwd_txfm2d_cfg.h"
#include "av1/common/av1_fwd_txfm1d_cfg.h"
#include "av1/common/av1_txfm.h"
#include "av1/common/x86/highbd_txfm_utility_sse4.h"
#include "aom_dsp/txfm_common.h"
@ -209,71 +209,81 @@ static void fadst4x4_sse4_1(__m128i *in, int bit) {
void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *coeff,
int input_stride, int tx_type, int bd) {
__m128i in[4];
const TXFM_2D_CFG *cfg = NULL;
const TXFM_1D_CFG *row_cfg = NULL;
const TXFM_1D_CFG *col_cfg = NULL;
switch (tx_type) {
case DCT_DCT:
cfg = &fwd_txfm_2d_cfg_dct_dct_4;
load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
fdct4x4_sse4_1(in, cfg->cos_bit_col[2]);
fdct4x4_sse4_1(in, cfg->cos_bit_row[2]);
row_cfg = &fwd_txfm_1d_row_cfg_dct_4;
col_cfg = &fwd_txfm_1d_col_cfg_dct_4;
load_buffer_4x4(input, in, input_stride, 0, 0, row_cfg->shift[0]);
fdct4x4_sse4_1(in, col_cfg->cos_bit[2]);
fdct4x4_sse4_1(in, row_cfg->cos_bit[2]);
write_buffer_4x4(in, coeff);
break;
case ADST_DCT:
cfg = &fwd_txfm_2d_cfg_adst_dct_4;
load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
fdct4x4_sse4_1(in, cfg->cos_bit_row[2]);
row_cfg = &fwd_txfm_1d_row_cfg_dct_4;
col_cfg = &fwd_txfm_1d_col_cfg_adst_4;
load_buffer_4x4(input, in, input_stride, 0, 0, row_cfg->shift[0]);
fadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
fdct4x4_sse4_1(in, row_cfg->cos_bit[2]);
write_buffer_4x4(in, coeff);
break;
case DCT_ADST:
cfg = &fwd_txfm_2d_cfg_dct_adst_4;
load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
fdct4x4_sse4_1(in, cfg->cos_bit_col[2]);
fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
row_cfg = &fwd_txfm_1d_row_cfg_adst_4;
col_cfg = &fwd_txfm_1d_col_cfg_dct_4;
load_buffer_4x4(input, in, input_stride, 0, 0, row_cfg->shift[0]);
fdct4x4_sse4_1(in, col_cfg->cos_bit[2]);
fadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
write_buffer_4x4(in, coeff);
break;
case ADST_ADST:
cfg = &fwd_txfm_2d_cfg_adst_adst_4;
load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
row_cfg = &fwd_txfm_1d_row_cfg_adst_4;
col_cfg = &fwd_txfm_1d_col_cfg_adst_4;
load_buffer_4x4(input, in, input_stride, 0, 0, row_cfg->shift[0]);
fadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
fadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
write_buffer_4x4(in, coeff);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
cfg = &fwd_txfm_2d_cfg_adst_dct_4;
load_buffer_4x4(input, in, input_stride, 1, 0, cfg->shift[0]);
fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
fdct4x4_sse4_1(in, cfg->cos_bit_row[2]);
row_cfg = &fwd_txfm_1d_row_cfg_dct_4;
col_cfg = &fwd_txfm_1d_col_cfg_adst_4;
load_buffer_4x4(input, in, input_stride, 1, 0, row_cfg->shift[0]);
fadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
fdct4x4_sse4_1(in, row_cfg->cos_bit[2]);
write_buffer_4x4(in, coeff);
break;
case DCT_FLIPADST:
cfg = &fwd_txfm_2d_cfg_dct_adst_4;
load_buffer_4x4(input, in, input_stride, 0, 1, cfg->shift[0]);
fdct4x4_sse4_1(in, cfg->cos_bit_col[2]);
fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
row_cfg = &fwd_txfm_1d_row_cfg_adst_4;
col_cfg = &fwd_txfm_1d_col_cfg_dct_4;
load_buffer_4x4(input, in, input_stride, 0, 1, row_cfg->shift[0]);
fdct4x4_sse4_1(in, col_cfg->cos_bit[2]);
fadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
write_buffer_4x4(in, coeff);
break;
case FLIPADST_FLIPADST:
cfg = &fwd_txfm_2d_cfg_adst_adst_4;
load_buffer_4x4(input, in, input_stride, 1, 1, cfg->shift[0]);
fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
row_cfg = &fwd_txfm_1d_row_cfg_adst_4;
col_cfg = &fwd_txfm_1d_col_cfg_adst_4;
load_buffer_4x4(input, in, input_stride, 1, 1, row_cfg->shift[0]);
fadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
fadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
write_buffer_4x4(in, coeff);
break;
case ADST_FLIPADST:
cfg = &fwd_txfm_2d_cfg_adst_adst_4;
load_buffer_4x4(input, in, input_stride, 0, 1, cfg->shift[0]);
fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
row_cfg = &fwd_txfm_1d_row_cfg_adst_4;
col_cfg = &fwd_txfm_1d_col_cfg_adst_4;
load_buffer_4x4(input, in, input_stride, 0, 1, row_cfg->shift[0]);
fadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
fadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
write_buffer_4x4(in, coeff);
break;
case FLIPADST_ADST:
cfg = &fwd_txfm_2d_cfg_adst_adst_4;
load_buffer_4x4(input, in, input_stride, 1, 0, cfg->shift[0]);
fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
row_cfg = &fwd_txfm_1d_row_cfg_adst_4;
col_cfg = &fwd_txfm_1d_col_cfg_adst_4;
load_buffer_4x4(input, in, input_stride, 1, 0, row_cfg->shift[0]);
fadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
fadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
write_buffer_4x4(in, coeff);
break;
#endif
@ -930,97 +940,107 @@ static void fadst8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *coeff, int stride,
int tx_type, int bd) {
__m128i in[16], out[16];
const TXFM_2D_CFG *cfg = NULL;
const TXFM_1D_CFG *row_cfg = NULL;
const TXFM_1D_CFG *col_cfg = NULL;
switch (tx_type) {
case DCT_DCT:
cfg = &fwd_txfm_2d_cfg_dct_dct_8;
load_buffer_8x8(input, in, stride, 0, 0, cfg->shift[0]);
fdct8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
col_txfm_8x8_rounding(out, -cfg->shift[1]);
row_cfg = &fwd_txfm_1d_row_cfg_dct_8;
col_cfg = &fwd_txfm_1d_col_cfg_dct_8;
load_buffer_8x8(input, in, stride, 0, 0, row_cfg->shift[0]);
fdct8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
transpose_8x8(out, in);
fdct8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
fdct8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
transpose_8x8(out, in);
write_buffer_8x8(in, coeff);
break;
case ADST_DCT:
cfg = &fwd_txfm_2d_cfg_adst_dct_8;
load_buffer_8x8(input, in, stride, 0, 0, cfg->shift[0]);
fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
col_txfm_8x8_rounding(out, -cfg->shift[1]);
row_cfg = &fwd_txfm_1d_row_cfg_dct_8;
col_cfg = &fwd_txfm_1d_col_cfg_adst_8;
load_buffer_8x8(input, in, stride, 0, 0, row_cfg->shift[0]);
fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
transpose_8x8(out, in);
fdct8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
fdct8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
transpose_8x8(out, in);
write_buffer_8x8(in, coeff);
break;
case DCT_ADST:
cfg = &fwd_txfm_2d_cfg_dct_adst_8;
load_buffer_8x8(input, in, stride, 0, 0, cfg->shift[0]);
fdct8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
col_txfm_8x8_rounding(out, -cfg->shift[1]);
row_cfg = &fwd_txfm_1d_row_cfg_adst_8;
col_cfg = &fwd_txfm_1d_col_cfg_dct_8;
load_buffer_8x8(input, in, stride, 0, 0, row_cfg->shift[0]);
fdct8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
transpose_8x8(out, in);
fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
transpose_8x8(out, in);
write_buffer_8x8(in, coeff);
break;
case ADST_ADST:
cfg = &fwd_txfm_2d_cfg_adst_adst_8;
load_buffer_8x8(input, in, stride, 0, 0, cfg->shift[0]);
fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
col_txfm_8x8_rounding(out, -cfg->shift[1]);
row_cfg = &fwd_txfm_1d_row_cfg_adst_8;
col_cfg = &fwd_txfm_1d_col_cfg_adst_8;
load_buffer_8x8(input, in, stride, 0, 0, row_cfg->shift[0]);
fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
transpose_8x8(out, in);
fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
transpose_8x8(out, in);
write_buffer_8x8(in, coeff);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
cfg = &fwd_txfm_2d_cfg_adst_dct_8;
load_buffer_8x8(input, in, stride, 1, 0, cfg->shift[0]);
fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
col_txfm_8x8_rounding(out, -cfg->shift[1]);
row_cfg = &fwd_txfm_1d_row_cfg_dct_8;
col_cfg = &fwd_txfm_1d_col_cfg_adst_8;
load_buffer_8x8(input, in, stride, 1, 0, row_cfg->shift[0]);
fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
transpose_8x8(out, in);
fdct8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
fdct8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
transpose_8x8(out, in);
write_buffer_8x8(in, coeff);
break;
case DCT_FLIPADST:
cfg = &fwd_txfm_2d_cfg_dct_adst_8;
load_buffer_8x8(input, in, stride, 0, 1, cfg->shift[0]);
fdct8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
col_txfm_8x8_rounding(out, -cfg->shift[1]);
row_cfg = &fwd_txfm_1d_row_cfg_adst_8;
col_cfg = &fwd_txfm_1d_col_cfg_dct_8;
load_buffer_8x8(input, in, stride, 0, 1, row_cfg->shift[0]);
fdct8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
transpose_8x8(out, in);
fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
transpose_8x8(out, in);
write_buffer_8x8(in, coeff);
break;
case FLIPADST_FLIPADST:
cfg = &fwd_txfm_2d_cfg_adst_adst_8;
load_buffer_8x8(input, in, stride, 1, 1, cfg->shift[0]);
fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
col_txfm_8x8_rounding(out, -cfg->shift[1]);
row_cfg = &fwd_txfm_1d_row_cfg_adst_8;
col_cfg = &fwd_txfm_1d_col_cfg_adst_8;
load_buffer_8x8(input, in, stride, 1, 1, row_cfg->shift[0]);
fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
transpose_8x8(out, in);
fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
transpose_8x8(out, in);
write_buffer_8x8(in, coeff);
break;
case ADST_FLIPADST:
cfg = &fwd_txfm_2d_cfg_adst_adst_8;
load_buffer_8x8(input, in, stride, 0, 1, cfg->shift[0]);
fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
col_txfm_8x8_rounding(out, -cfg->shift[1]);
row_cfg = &fwd_txfm_1d_row_cfg_adst_8;
col_cfg = &fwd_txfm_1d_col_cfg_adst_8;
load_buffer_8x8(input, in, stride, 0, 1, row_cfg->shift[0]);
fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
transpose_8x8(out, in);
fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
transpose_8x8(out, in);
write_buffer_8x8(in, coeff);
break;
case FLIPADST_ADST:
cfg = &fwd_txfm_2d_cfg_adst_adst_8;
load_buffer_8x8(input, in, stride, 1, 0, cfg->shift[0]);
fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
col_txfm_8x8_rounding(out, -cfg->shift[1]);
row_cfg = &fwd_txfm_1d_row_cfg_adst_8;
col_cfg = &fwd_txfm_1d_col_cfg_adst_8;
load_buffer_8x8(input, in, stride, 1, 0, row_cfg->shift[0]);
fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
transpose_8x8(out, in);
fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
transpose_8x8(out, in);
write_buffer_8x8(in, coeff);
break;
@ -1794,97 +1814,107 @@ static void write_buffer_16x16(const __m128i *in, tran_low_t *output) {
void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *coeff,
int stride, int tx_type, int bd) {
__m128i in[64], out[64];
const TXFM_2D_CFG *cfg = NULL;
const TXFM_1D_CFG *row_cfg = NULL;
const TXFM_1D_CFG *col_cfg = NULL;
switch (tx_type) {
case DCT_DCT:
cfg = &fwd_txfm_2d_cfg_dct_dct_16;
load_buffer_16x16(input, in, stride, 0, 0, cfg->shift[0]);
fdct16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
col_txfm_16x16_rounding(out, -cfg->shift[1]);
row_cfg = &fwd_txfm_1d_row_cfg_dct_16;
col_cfg = &fwd_txfm_1d_col_cfg_dct_16;
load_buffer_16x16(input, in, stride, 0, 0, row_cfg->shift[0]);
fdct16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
transpose_16x16(out, in);
fdct16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
fdct16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
transpose_16x16(out, in);
write_buffer_16x16(in, coeff);
break;
case ADST_DCT:
cfg = &fwd_txfm_2d_cfg_adst_dct_16;
load_buffer_16x16(input, in, stride, 0, 0, cfg->shift[0]);
fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
col_txfm_16x16_rounding(out, -cfg->shift[1]);
row_cfg = &fwd_txfm_1d_row_cfg_dct_16;
col_cfg = &fwd_txfm_1d_col_cfg_adst_16;
load_buffer_16x16(input, in, stride, 0, 0, row_cfg->shift[0]);
fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
transpose_16x16(out, in);
fdct16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
fdct16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
transpose_16x16(out, in);
write_buffer_16x16(in, coeff);
break;
case DCT_ADST:
cfg = &fwd_txfm_2d_cfg_dct_adst_16;
load_buffer_16x16(input, in, stride, 0, 0, cfg->shift[0]);
fdct16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
col_txfm_16x16_rounding(out, -cfg->shift[1]);
row_cfg = &fwd_txfm_1d_row_cfg_adst_16;
col_cfg = &fwd_txfm_1d_col_cfg_dct_16;
load_buffer_16x16(input, in, stride, 0, 0, row_cfg->shift[0]);
fdct16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
transpose_16x16(out, in);
fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
transpose_16x16(out, in);
write_buffer_16x16(in, coeff);
break;
case ADST_ADST:
cfg = &fwd_txfm_2d_cfg_adst_adst_16;
load_buffer_16x16(input, in, stride, 0, 0, cfg->shift[0]);
fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
col_txfm_16x16_rounding(out, -cfg->shift[1]);
row_cfg = &fwd_txfm_1d_row_cfg_adst_16;
col_cfg = &fwd_txfm_1d_col_cfg_adst_16;
load_buffer_16x16(input, in, stride, 0, 0, row_cfg->shift[0]);
fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
transpose_16x16(out, in);
fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
transpose_16x16(out, in);
write_buffer_16x16(in, coeff);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
cfg = &fwd_txfm_2d_cfg_adst_dct_16;
load_buffer_16x16(input, in, stride, 1, 0, cfg->shift[0]);
fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
col_txfm_16x16_rounding(out, -cfg->shift[1]);
row_cfg = &fwd_txfm_1d_row_cfg_dct_16;
col_cfg = &fwd_txfm_1d_col_cfg_adst_16;
load_buffer_16x16(input, in, stride, 1, 0, row_cfg->shift[0]);
fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
transpose_16x16(out, in);
fdct16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
fdct16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
transpose_16x16(out, in);
write_buffer_16x16(in, coeff);
break;
case DCT_FLIPADST:
cfg = &fwd_txfm_2d_cfg_dct_adst_16;
load_buffer_16x16(input, in, stride, 0, 1, cfg->shift[0]);
fdct16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
col_txfm_16x16_rounding(out, -cfg->shift[1]);
row_cfg = &fwd_txfm_1d_row_cfg_adst_16;
col_cfg = &fwd_txfm_1d_col_cfg_dct_16;
load_buffer_16x16(input, in, stride, 0, 1, row_cfg->shift[0]);
fdct16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
transpose_16x16(out, in);
fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
transpose_16x16(out, in);
write_buffer_16x16(in, coeff);
break;
case FLIPADST_FLIPADST:
cfg = &fwd_txfm_2d_cfg_adst_adst_16;
load_buffer_16x16(input, in, stride, 1, 1, cfg->shift[0]);
fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
col_txfm_16x16_rounding(out, -cfg->shift[1]);
row_cfg = &fwd_txfm_1d_row_cfg_adst_16;
col_cfg = &fwd_txfm_1d_col_cfg_adst_16;
load_buffer_16x16(input, in, stride, 1, 1, row_cfg->shift[0]);
fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
transpose_16x16(out, in);
fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
transpose_16x16(out, in);
write_buffer_16x16(in, coeff);
break;
case ADST_FLIPADST:
cfg = &fwd_txfm_2d_cfg_adst_adst_16;
load_buffer_16x16(input, in, stride, 0, 1, cfg->shift[0]);
fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
col_txfm_16x16_rounding(out, -cfg->shift[1]);
row_cfg = &fwd_txfm_1d_row_cfg_adst_16;
col_cfg = &fwd_txfm_1d_col_cfg_adst_16;
load_buffer_16x16(input, in, stride, 0, 1, row_cfg->shift[0]);
fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
transpose_16x16(out, in);
fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
transpose_16x16(out, in);
write_buffer_16x16(in, coeff);
break;
case FLIPADST_ADST:
cfg = &fwd_txfm_2d_cfg_adst_adst_16;
load_buffer_16x16(input, in, stride, 1, 0, cfg->shift[0]);
fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
col_txfm_16x16_rounding(out, -cfg->shift[1]);
row_cfg = &fwd_txfm_1d_row_cfg_adst_16;
col_cfg = &fwd_txfm_1d_col_cfg_adst_16;
load_buffer_16x16(input, in, stride, 1, 0, row_cfg->shift[0]);
fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
transpose_16x16(out, in);
fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
transpose_16x16(out, in);
write_buffer_16x16(in, coeff);
break;

Просмотреть файл

@ -41,9 +41,11 @@ class AV1FwdTxfm2d : public ::testing::TestWithParam<AV1FwdTxfm2dParam> {
count_ = 500;
TXFM_2D_FLIP_CFG fwd_txfm_flip_cfg =
av1_get_fwd_txfm_cfg(tx_type_, tx_size_);
const TXFM_2D_CFG *fwd_txfm_cfg = fwd_txfm_flip_cfg.cfg;
int amplify_bit = fwd_txfm_cfg->shift[0] + fwd_txfm_cfg->shift[1] +
fwd_txfm_cfg->shift[2];
// TODO(sarahparker) this test will need to be updated when these
// functions are extended to support rectangular transforms
int amplify_bit = fwd_txfm_flip_cfg.row_cfg->shift[0] +
fwd_txfm_flip_cfg.row_cfg->shift[1] +
fwd_txfm_flip_cfg.row_cfg->shift[2];
ud_flip_ = fwd_txfm_flip_cfg.ud_flip;
lr_flip_ = fwd_txfm_flip_cfg.lr_flip;
amplify_factor_ =

Просмотреть файл

@ -17,7 +17,7 @@
#include "test/acm_random.h"
#include "test/util.h"
#include "test/av1_txfm_test.h"
#include "av1/common/av1_inv_txfm2d_cfg.h"
#include "av1/common/av1_inv_txfm1d_cfg.h"
using libaom_test::ACMRandom;
using libaom_test::input_base;