aom/vp10/common/vp10_inv_txfm2d.c

/*
 *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "vp10/common/enums.h"
#include "vp10/common/vp10_txfm.h"
#include "vp10/common/vp10_inv_txfm1d.h"
#include "vp10/common/vp10_inv_txfm2d_cfg.h"

static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {
  switch (txfm_type) {
    case TXFM_TYPE_DCT4:
      return vp10_idct4_new;
    case TXFM_TYPE_DCT8:
      return vp10_idct8_new;
    case TXFM_TYPE_DCT16:
      return vp10_idct16_new;
    case TXFM_TYPE_DCT32:
      return vp10_idct32_new;
    case TXFM_TYPE_DCT64:
      return vp10_idct64_new;
    case TXFM_TYPE_ADST4:
      return vp10_iadst4_new;
    case TXFM_TYPE_ADST8:
      return vp10_iadst8_new;
    case TXFM_TYPE_ADST16:
      return vp10_iadst16_new;
    case TXFM_TYPE_ADST32:
      return vp10_iadst32_new;
    default:
      assert(0);
      return NULL;
  }
}

#if CONFIG_EXT_TX
static const TXFM_2D_CFG* inv_txfm_cfg_ls[FLIPADST_ADST + 1][TX_SIZES] = {
    {&inv_txfm_2d_cfg_dct_dct_4  , &inv_txfm_2d_cfg_dct_dct_8,
     &inv_txfm_2d_cfg_dct_dct_16  , &inv_txfm_2d_cfg_dct_dct_32},
    {&inv_txfm_2d_cfg_adst_dct_4 , &inv_txfm_2d_cfg_adst_dct_8,
     &inv_txfm_2d_cfg_adst_dct_16 , &inv_txfm_2d_cfg_adst_dct_32},
    {&inv_txfm_2d_cfg_dct_adst_4 , &inv_txfm_2d_cfg_dct_adst_8,
     &inv_txfm_2d_cfg_dct_adst_16 , &inv_txfm_2d_cfg_dct_adst_32},
    {&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
     &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32},
    {&inv_txfm_2d_cfg_adst_dct_4 , &inv_txfm_2d_cfg_adst_dct_8,
     &inv_txfm_2d_cfg_adst_dct_16 , &inv_txfm_2d_cfg_adst_dct_32},
    {&inv_txfm_2d_cfg_dct_adst_4 , &inv_txfm_2d_cfg_dct_adst_8,
     &inv_txfm_2d_cfg_dct_adst_16 , &inv_txfm_2d_cfg_dct_adst_32},
    {&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
     &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32},
    {&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
     &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32},
    {&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
     &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32},
};
#else
static const TXFM_2D_CFG* inv_txfm_cfg_ls[TX_TYPES][TX_SIZES] = {
    {&inv_txfm_2d_cfg_dct_dct_4  , &inv_txfm_2d_cfg_dct_dct_8,
      &inv_txfm_2d_cfg_dct_dct_16  , &inv_txfm_2d_cfg_dct_dct_32},
    {&inv_txfm_2d_cfg_adst_dct_4 , &inv_txfm_2d_cfg_adst_dct_8,
      &inv_txfm_2d_cfg_adst_dct_16 , &inv_txfm_2d_cfg_adst_dct_32},
    {&inv_txfm_2d_cfg_dct_adst_4 , &inv_txfm_2d_cfg_dct_adst_8,
      &inv_txfm_2d_cfg_dct_adst_16 , &inv_txfm_2d_cfg_dct_adst_32},
    {&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
      &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32},
};
#endif

TXFM_2D_FLIP_CFG vp10_get_inv_txfm_cfg(int tx_type, int tx_size) {
  TXFM_2D_FLIP_CFG cfg;
  set_flip_cfg(tx_type, &cfg);
  cfg.cfg = inv_txfm_cfg_ls[tx_type][tx_size];
  return cfg;
}

TXFM_2D_FLIP_CFG vp10_get_inv_txfm_64x64_cfg(int tx_type) {
  TXFM_2D_FLIP_CFG cfg;
  switch (tx_type) {
    case DCT_DCT:
      cfg.cfg = &inv_txfm_2d_cfg_dct_dct_64;
      set_flip_cfg(tx_type, &cfg);
      break;
    default:
      assert(0);
  }
  return cfg;
}

static INLINE void inv_txfm2d_add_c(const int32_t *input, int16_t *output,
                                    int stride, TXFM_2D_FLIP_CFG *cfg,
                                    int32_t *txfm_buf) {
  const int txfm_size = cfg->cfg->txfm_size;
  const int8_t *shift = cfg->cfg->shift;
  const int8_t *stage_range_col = cfg->cfg->stage_range_col;
  const int8_t *stage_range_row = cfg->cfg->stage_range_row;
  const int8_t *cos_bit_col = cfg->cfg->cos_bit_col;
  const int8_t *cos_bit_row = cfg->cfg->cos_bit_row;
  const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->cfg->txfm_type_col);
  const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->cfg->txfm_type_row);

  // txfm_buf's length is  txfm_size * txfm_size + 2 * txfm_size
  // it is used for intermediate data buffering
  int32_t *temp_in = txfm_buf;
  int32_t *temp_out = temp_in + txfm_size;
  int32_t *buf = temp_out + txfm_size;
  int32_t *buf_ptr = buf;
  int c, r;

  // Rows
  for (r = 0; r < txfm_size; ++r) {
    txfm_func_row(input, buf_ptr, cos_bit_row, stage_range_row);
    round_shift_array(buf_ptr, txfm_size, -shift[0]);
    input += txfm_size;
    buf_ptr += txfm_size;
  }

  // Columns
  for (c = 0; c < txfm_size; ++c) {
    if (cfg->lr_flip == 0) {
      for (r = 0; r < txfm_size; ++r)
        temp_in[r] = buf[r * txfm_size + c];
    } else {
      // flip left right
      for (r = 0; r < txfm_size; ++r)
        temp_in[r] = buf[r * txfm_size + (txfm_size - c - 1)];
    }
    txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
    round_shift_array(temp_out, txfm_size, -shift[1]);
    if (cfg->ud_flip == 0) {
      for (r = 0; r < txfm_size; ++r)
        output[r * stride + c] += temp_out[r];
    } else {
      // flip upside down
      for (r = 0; r < txfm_size; ++r)
        output[r * stride + c] += temp_out[txfm_size - r - 1];
    }
  }
}

void vp10_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output,
                               int stride, int tx_type,
                               int bd) {
  int txfm_buf[4 * 4 + 4 + 4];
  // output contains the prediction signal which is always positive and smaller
  // than (1 << bd) - 1
  // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
  // int16_t*
  TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_cfg(tx_type, TX_4X4);
  inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
  clamp_block((int16_t *)output, 4, stride, 0, (1 << bd) - 1);
}

void vp10_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output,
                               int stride, int tx_type,
                               int bd) {
  int txfm_buf[8 * 8 + 8 + 8];
  // output contains the prediction signal which is always positive and smaller
  // than (1 << bd) - 1
  // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
  // int16_t*
  TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_cfg(tx_type, TX_8X8);
  inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
  clamp_block((int16_t *)output, 8, stride, 0, (1 << bd) - 1);
}

void vp10_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output,
                                 int stride, int tx_type,
                                 int bd) {
  int txfm_buf[16 * 16 + 16 + 16];
  // output contains the prediction signal which is always positive and smaller
  // than (1 << bd) - 1
  // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
  // int16_t*
  TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_cfg(tx_type, TX_16X16);
  inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
  clamp_block((int16_t *)output, 16, stride, 0, (1 << bd) - 1);
}

void vp10_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output,
                                 int stride, int tx_type,
                                 int bd) {
  int txfm_buf[32 * 32 + 32 + 32];
  // output contains the prediction signal which is always positive and smaller
  // than (1 << bd) - 1
  // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
  // int16_t*
  TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_cfg(tx_type, TX_32X32);
  inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
  clamp_block((int16_t *)output, 32, stride, 0, (1 << bd) - 1);
}

void vp10_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output,
                                 int stride, int tx_type,
                                 int bd) {
  int txfm_buf[64 * 64 + 64 + 64];
  // output contains the prediction signal which is always positive and smaller
  // than (1 << bd) - 1
  // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
  // int16_t*
  TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_64x64_cfg(tx_type);
  inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
  clamp_block((int16_t *)output, 64, stride, 0, (1 << bd) - 1);
}
Add vp10_inv_txfm2d Change-Id: Ib63062a52c688e65bae5eb0052ce69d73d96c9c5 2015-11-06 22:15:54 +03:00			`/*`
			`* Copyright (c) 2015 The WebM project authors. All Rights Reserved.`
			`*`
			`* Use of this source code is governed by a BSD-style license`
			`* that can be found in the LICENSE file in the root of the source`
			`* tree. An additional intellectual property rights grant can be found`
			`* in the file PATENTS. All contributing project authors may`
			`* be found in the AUTHORS file in the root of the source tree.`
			`*/`

Change inverse HT function argument from TXFM_2D_CFG* to int This change has no performance impact. It prepares the proper function interface for better performance optimization. Change-Id: I12e2f2deaf7f3adc603de0a74852116468c762f6 2016-05-10 04:34:16 +03:00			`#include "vp10/common/enums.h"`
Add vp10_inv_txfm2d Change-Id: Ib63062a52c688e65bae5eb0052ce69d73d96c9c5 2015-11-06 22:15:54 +03:00			`#include "vp10/common/vp10_txfm.h"`
Passing TXFM_TYPE instead of func pointer This is to facilitate sse2 implementation Change-Id: Id2f53e83c5508c4445d9b1bba00a649cb4da6b74 2016-03-17 03:15:27 +03:00			`#include "vp10/common/vp10_inv_txfm1d.h"`
Change inverse HT function argument from TXFM_2D_CFG* to int This change has no performance impact. It prepares the proper function interface for better performance optimization. Change-Id: I12e2f2deaf7f3adc603de0a74852116468c762f6 2016-05-10 04:34:16 +03:00			`#include "vp10/common/vp10_inv_txfm2d_cfg.h"`
Add vp10_inv_txfm2d Change-Id: Ib63062a52c688e65bae5eb0052ce69d73d96c9c5 2015-11-06 22:15:54 +03:00
Replace inline with INLINE This fixes build issues under MSVC Change-Id: I6db6a43cba2e8ddb099b676f1ae019fe2742f366 2016-05-06 04:28:04 +03:00			`static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {`
Passing TXFM_TYPE instead of func pointer This is to facilitate sse2 implementation Change-Id: Id2f53e83c5508c4445d9b1bba00a649cb4da6b74 2016-03-17 03:15:27 +03:00			`switch (txfm_type) {`
			`case TXFM_TYPE_DCT4:`
			`return vp10_idct4_new;`
			`case TXFM_TYPE_DCT8:`
			`return vp10_idct8_new;`
			`case TXFM_TYPE_DCT16:`
			`return vp10_idct16_new;`
			`case TXFM_TYPE_DCT32:`
			`return vp10_idct32_new;`
			`case TXFM_TYPE_DCT64:`
			`return vp10_idct64_new;`
			`case TXFM_TYPE_ADST4:`
			`return vp10_iadst4_new;`
			`case TXFM_TYPE_ADST8:`
			`return vp10_iadst8_new;`
			`case TXFM_TYPE_ADST16:`
			`return vp10_iadst16_new;`
			`case TXFM_TYPE_ADST32:`
			`return vp10_iadst32_new;`
			`default:`
			`assert(0);`
			`return NULL;`
			`}`
			`}`

Add flip feature to vp10_inv_txfm2d.c Change-Id: Id5f0fade42749d2bed5553eda0d690af22b6c5b1 2016-05-13 01:21:01 +03:00			`#if CONFIG_EXT_TX`
			`static const TXFM_2D_CFG* inv_txfm_cfg_ls[FLIPADST_ADST + 1][TX_SIZES] = {`
			`{&inv_txfm_2d_cfg_dct_dct_4 , &inv_txfm_2d_cfg_dct_dct_8,`
			`&inv_txfm_2d_cfg_dct_dct_16 , &inv_txfm_2d_cfg_dct_dct_32},`
			`{&inv_txfm_2d_cfg_adst_dct_4 , &inv_txfm_2d_cfg_adst_dct_8,`
			`&inv_txfm_2d_cfg_adst_dct_16 , &inv_txfm_2d_cfg_adst_dct_32},`
			`{&inv_txfm_2d_cfg_dct_adst_4 , &inv_txfm_2d_cfg_dct_adst_8,`
			`&inv_txfm_2d_cfg_dct_adst_16 , &inv_txfm_2d_cfg_dct_adst_32},`
			`{&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,`
			`&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32},`
			`{&inv_txfm_2d_cfg_adst_dct_4 , &inv_txfm_2d_cfg_adst_dct_8,`
			`&inv_txfm_2d_cfg_adst_dct_16 , &inv_txfm_2d_cfg_adst_dct_32},`
			`{&inv_txfm_2d_cfg_dct_adst_4 , &inv_txfm_2d_cfg_dct_adst_8,`
			`&inv_txfm_2d_cfg_dct_adst_16 , &inv_txfm_2d_cfg_dct_adst_32},`
			`{&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,`
			`&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32},`
			`{&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,`
			`&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32},`
			`{&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,`
			`&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32},`
			`};`
			`#else`
			`static const TXFM_2D_CFG* inv_txfm_cfg_ls[TX_TYPES][TX_SIZES] = {`
			`{&inv_txfm_2d_cfg_dct_dct_4 , &inv_txfm_2d_cfg_dct_dct_8,`
			`&inv_txfm_2d_cfg_dct_dct_16 , &inv_txfm_2d_cfg_dct_dct_32},`
			`{&inv_txfm_2d_cfg_adst_dct_4 , &inv_txfm_2d_cfg_adst_dct_8,`
			`&inv_txfm_2d_cfg_adst_dct_16 , &inv_txfm_2d_cfg_adst_dct_32},`
			`{&inv_txfm_2d_cfg_dct_adst_4 , &inv_txfm_2d_cfg_dct_adst_8,`
			`&inv_txfm_2d_cfg_dct_adst_16 , &inv_txfm_2d_cfg_dct_adst_32},`
			`{&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,`
			`&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32},`
			`};`
			`#endif`

			`TXFM_2D_FLIP_CFG vp10_get_inv_txfm_cfg(int tx_type, int tx_size) {`
			`TXFM_2D_FLIP_CFG cfg;`
			`set_flip_cfg(tx_type, &cfg);`
			`cfg.cfg = inv_txfm_cfg_ls[tx_type][tx_size];`
Change inverse HT function argument from TXFM_2D_CFG* to int This change has no performance impact. It prepares the proper function interface for better performance optimization. Change-Id: I12e2f2deaf7f3adc603de0a74852116468c762f6 2016-05-10 04:34:16 +03:00			`return cfg;`
			`}`

Add flip feature to vp10_inv_txfm2d.c Change-Id: Id5f0fade42749d2bed5553eda0d690af22b6c5b1 2016-05-13 01:21:01 +03:00			`TXFM_2D_FLIP_CFG vp10_get_inv_txfm_64x64_cfg(int tx_type) {`
			`TXFM_2D_FLIP_CFG cfg;`
Change inverse HT function argument from TXFM_2D_CFG* to int This change has no performance impact. It prepares the proper function interface for better performance optimization. Change-Id: I12e2f2deaf7f3adc603de0a74852116468c762f6 2016-05-10 04:34:16 +03:00			`switch (tx_type) {`
			`case DCT_DCT:`
Add flip feature to vp10_inv_txfm2d.c Change-Id: Id5f0fade42749d2bed5553eda0d690af22b6c5b1 2016-05-13 01:21:01 +03:00			`cfg.cfg = &inv_txfm_2d_cfg_dct_dct_64;`
			`set_flip_cfg(tx_type, &cfg);`
Change inverse HT function argument from TXFM_2D_CFG* to int This change has no performance impact. It prepares the proper function interface for better performance optimization. Change-Id: I12e2f2deaf7f3adc603de0a74852116468c762f6 2016-05-10 04:34:16 +03:00			`break;`
			`default:`
			`assert(0);`
			`}`
			`return cfg;`
			`}`

Replace inline with INLINE This fixes build issues under MSVC Change-Id: I6db6a43cba2e8ddb099b676f1ae019fe2742f366 2016-05-06 04:28:04 +03:00			`static INLINE void inv_txfm2d_add_c(const int32_t input, int16_t output,`
Add flip feature to vp10_inv_txfm2d.c Change-Id: Id5f0fade42749d2bed5553eda0d690af22b6c5b1 2016-05-13 01:21:01 +03:00			`int stride, TXFM_2D_FLIP_CFG *cfg,`
Add vp10_inv_txfm2d Change-Id: Ib63062a52c688e65bae5eb0052ce69d73d96c9c5 2015-11-06 22:15:54 +03:00			`int32_t *txfm_buf) {`
Add flip feature to vp10_inv_txfm2d.c Change-Id: Id5f0fade42749d2bed5553eda0d690af22b6c5b1 2016-05-13 01:21:01 +03:00			`const int txfm_size = cfg->cfg->txfm_size;`
			`const int8_t *shift = cfg->cfg->shift;`
			`const int8_t *stage_range_col = cfg->cfg->stage_range_col;`
			`const int8_t *stage_range_row = cfg->cfg->stage_range_row;`
			`const int8_t *cos_bit_col = cfg->cfg->cos_bit_col;`
			`const int8_t *cos_bit_row = cfg->cfg->cos_bit_row;`
			`const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->cfg->txfm_type_col);`
			`const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->cfg->txfm_type_row);`
Add vp10_inv_txfm2d Change-Id: Ib63062a52c688e65bae5eb0052ce69d73d96c9c5 2015-11-06 22:15:54 +03:00
			`// txfm_buf's length is txfm_size * txfm_size + 2 * txfm_size`
			`// it is used for intermediate data buffering`
			`int32_t *temp_in = txfm_buf;`
			`int32_t *temp_out = temp_in + txfm_size;`
			`int32_t *buf = temp_out + txfm_size;`
			`int32_t *buf_ptr = buf;`
Add flip feature to vp10_inv_txfm2d.c Change-Id: Id5f0fade42749d2bed5553eda0d690af22b6c5b1 2016-05-13 01:21:01 +03:00			`int c, r;`
Add vp10_inv_txfm2d Change-Id: Ib63062a52c688e65bae5eb0052ce69d73d96c9c5 2015-11-06 22:15:54 +03:00
			`// Rows`
Add flip feature to vp10_inv_txfm2d.c Change-Id: Id5f0fade42749d2bed5553eda0d690af22b6c5b1 2016-05-13 01:21:01 +03:00			`for (r = 0; r < txfm_size; ++r) {`
Add vp10_inv_txfm2d Change-Id: Ib63062a52c688e65bae5eb0052ce69d73d96c9c5 2015-11-06 22:15:54 +03:00			`txfm_func_row(input, buf_ptr, cos_bit_row, stage_range_row);`
			`round_shift_array(buf_ptr, txfm_size, -shift[0]);`
			`input += txfm_size;`
			`buf_ptr += txfm_size;`
			`}`

			`// Columns`
Add flip feature to vp10_inv_txfm2d.c Change-Id: Id5f0fade42749d2bed5553eda0d690af22b6c5b1 2016-05-13 01:21:01 +03:00			`for (c = 0; c < txfm_size; ++c) {`
			`if (cfg->lr_flip == 0) {`
			`for (r = 0; r < txfm_size; ++r)`
			`temp_in[r] = buf[r * txfm_size + c];`
			`} else {`
			`// flip left right`
			`for (r = 0; r < txfm_size; ++r)`
			`temp_in[r] = buf[r * txfm_size + (txfm_size - c - 1)];`
			`}`
Add vp10_inv_txfm2d Change-Id: Ib63062a52c688e65bae5eb0052ce69d73d96c9c5 2015-11-06 22:15:54 +03:00			`txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);`
			`round_shift_array(temp_out, txfm_size, -shift[1]);`
Add flip feature to vp10_inv_txfm2d.c Change-Id: Id5f0fade42749d2bed5553eda0d690af22b6c5b1 2016-05-13 01:21:01 +03:00			`if (cfg->ud_flip == 0) {`
			`for (r = 0; r < txfm_size; ++r)`
			`output[r * stride + c] += temp_out[r];`
			`} else {`
			`// flip upside down`
			`for (r = 0; r < txfm_size; ++r)`
			`output[r * stride + c] += temp_out[txfm_size - r - 1];`
			`}`
Add vp10_inv_txfm2d Change-Id: Ib63062a52c688e65bae5eb0052ce69d73d96c9c5 2015-11-06 22:15:54 +03:00			`}`
			`}`

mv vp10_fwd_txfm2d_#x# into vp10_rtcd.h Change-Id: Iad7352698786791b0fd7c005a7edfd1724b71599 2016-03-19 00:09:06 +03:00			`void vp10_inv_txfm2d_add_4x4_c(const int32_t input, uint16_t output,`
Change inverse HT function argument from TXFM_2D_CFG* to int This change has no performance impact. It prepares the proper function interface for better performance optimization. Change-Id: I12e2f2deaf7f3adc603de0a74852116468c762f6 2016-05-10 04:34:16 +03:00			`int stride, int tx_type,`
			`int bd) {`
Add vp10_inv_txfm2d Change-Id: Ib63062a52c688e65bae5eb0052ce69d73d96c9c5 2015-11-06 22:15:54 +03:00			`int txfm_buf[4 * 4 + 4 + 4];`
			`// output contains the prediction signal which is always positive and smaller`
			`// than (1 << bd) - 1`
			`// since bd < 16-1, therefore we can treat the uint16_t* output buffer as an`
			`// int16_t*`
Add flip feature to vp10_inv_txfm2d.c Change-Id: Id5f0fade42749d2bed5553eda0d690af22b6c5b1 2016-05-13 01:21:01 +03:00			`TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_cfg(tx_type, TX_4X4);`
			`inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);`
Add vp10_inv_txfm2d Change-Id: Ib63062a52c688e65bae5eb0052ce69d73d96c9c5 2015-11-06 22:15:54 +03:00			`clamp_block((int16_t *)output, 4, stride, 0, (1 << bd) - 1);`
			`}`

mv vp10_fwd_txfm2d_#x# into vp10_rtcd.h Change-Id: Iad7352698786791b0fd7c005a7edfd1724b71599 2016-03-19 00:09:06 +03:00			`void vp10_inv_txfm2d_add_8x8_c(const int32_t input, uint16_t output,`
Change inverse HT function argument from TXFM_2D_CFG* to int This change has no performance impact. It prepares the proper function interface for better performance optimization. Change-Id: I12e2f2deaf7f3adc603de0a74852116468c762f6 2016-05-10 04:34:16 +03:00			`int stride, int tx_type,`
			`int bd) {`
Add vp10_inv_txfm2d Change-Id: Ib63062a52c688e65bae5eb0052ce69d73d96c9c5 2015-11-06 22:15:54 +03:00			`int txfm_buf[8 * 8 + 8 + 8];`
			`// output contains the prediction signal which is always positive and smaller`
			`// than (1 << bd) - 1`
			`// since bd < 16-1, therefore we can treat the uint16_t* output buffer as an`
			`// int16_t*`
Add flip feature to vp10_inv_txfm2d.c Change-Id: Id5f0fade42749d2bed5553eda0d690af22b6c5b1 2016-05-13 01:21:01 +03:00			`TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_cfg(tx_type, TX_8X8);`
			`inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);`
Add vp10_inv_txfm2d Change-Id: Ib63062a52c688e65bae5eb0052ce69d73d96c9c5 2015-11-06 22:15:54 +03:00			`clamp_block((int16_t *)output, 8, stride, 0, (1 << bd) - 1);`
			`}`

mv vp10_fwd_txfm2d_#x# into vp10_rtcd.h Change-Id: Iad7352698786791b0fd7c005a7edfd1724b71599 2016-03-19 00:09:06 +03:00			`void vp10_inv_txfm2d_add_16x16_c(const int32_t input, uint16_t output,`
Change inverse HT function argument from TXFM_2D_CFG* to int This change has no performance impact. It prepares the proper function interface for better performance optimization. Change-Id: I12e2f2deaf7f3adc603de0a74852116468c762f6 2016-05-10 04:34:16 +03:00			`int stride, int tx_type,`
			`int bd) {`
Add vp10_inv_txfm2d Change-Id: Ib63062a52c688e65bae5eb0052ce69d73d96c9c5 2015-11-06 22:15:54 +03:00			`int txfm_buf[16 * 16 + 16 + 16];`
			`// output contains the prediction signal which is always positive and smaller`
			`// than (1 << bd) - 1`
			`// since bd < 16-1, therefore we can treat the uint16_t* output buffer as an`
			`// int16_t*`
Add flip feature to vp10_inv_txfm2d.c Change-Id: Id5f0fade42749d2bed5553eda0d690af22b6c5b1 2016-05-13 01:21:01 +03:00			`TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_cfg(tx_type, TX_16X16);`
			`inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);`
Add vp10_inv_txfm2d Change-Id: Ib63062a52c688e65bae5eb0052ce69d73d96c9c5 2015-11-06 22:15:54 +03:00			`clamp_block((int16_t *)output, 16, stride, 0, (1 << bd) - 1);`
			`}`

mv vp10_fwd_txfm2d_#x# into vp10_rtcd.h Change-Id: Iad7352698786791b0fd7c005a7edfd1724b71599 2016-03-19 00:09:06 +03:00			`void vp10_inv_txfm2d_add_32x32_c(const int32_t input, uint16_t output,`
Change inverse HT function argument from TXFM_2D_CFG* to int This change has no performance impact. It prepares the proper function interface for better performance optimization. Change-Id: I12e2f2deaf7f3adc603de0a74852116468c762f6 2016-05-10 04:34:16 +03:00			`int stride, int tx_type,`
			`int bd) {`
Add vp10_inv_txfm2d Change-Id: Ib63062a52c688e65bae5eb0052ce69d73d96c9c5 2015-11-06 22:15:54 +03:00			`int txfm_buf[32 * 32 + 32 + 32];`
			`// output contains the prediction signal which is always positive and smaller`
			`// than (1 << bd) - 1`
			`// since bd < 16-1, therefore we can treat the uint16_t* output buffer as an`
			`// int16_t*`
Add flip feature to vp10_inv_txfm2d.c Change-Id: Id5f0fade42749d2bed5553eda0d690af22b6c5b1 2016-05-13 01:21:01 +03:00			`TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_cfg(tx_type, TX_32X32);`
			`inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);`
Add vp10_inv_txfm2d Change-Id: Ib63062a52c688e65bae5eb0052ce69d73d96c9c5 2015-11-06 22:15:54 +03:00			`clamp_block((int16_t *)output, 32, stride, 0, (1 << bd) - 1);`
			`}`
add dct 64x64 transform Change-Id: I131c4d1216cd156e520b8a91c4438c2d3c6602cb 2016-03-14 22:02:27 +03:00
mv vp10_fwd_txfm2d_#x# into vp10_rtcd.h Change-Id: Iad7352698786791b0fd7c005a7edfd1724b71599 2016-03-19 00:09:06 +03:00			`void vp10_inv_txfm2d_add_64x64_c(const int32_t input, uint16_t output,`
Change inverse HT function argument from TXFM_2D_CFG* to int This change has no performance impact. It prepares the proper function interface for better performance optimization. Change-Id: I12e2f2deaf7f3adc603de0a74852116468c762f6 2016-05-10 04:34:16 +03:00			`int stride, int tx_type,`
			`int bd) {`
add dct 64x64 transform Change-Id: I131c4d1216cd156e520b8a91c4438c2d3c6602cb 2016-03-14 22:02:27 +03:00			`int txfm_buf[64 * 64 + 64 + 64];`
			`// output contains the prediction signal which is always positive and smaller`
			`// than (1 << bd) - 1`
			`// since bd < 16-1, therefore we can treat the uint16_t* output buffer as an`
			`// int16_t*`
Add flip feature to vp10_inv_txfm2d.c Change-Id: Id5f0fade42749d2bed5553eda0d690af22b6c5b1 2016-05-13 01:21:01 +03:00			`TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_64x64_cfg(tx_type);`
			`inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);`
add dct 64x64 transform Change-Id: I131c4d1216cd156e520b8a91c4438c2d3c6602cb 2016-03-14 22:02:27 +03:00			`clamp_block((int16_t *)output, 64, stride, 0, (1 << bd) - 1);`
			`}`