Refactor hbd txfm configurations to be 1D

The hbd transform configurations were originally written for all possible 2d transforms. Now that there are many more possible 2d transforms due to EXT_TX and RECT_TX, it is simpler to write the cfg for the 4 1D transform types and compose them to make all new possible transform types. This will allow for an easier integration of the identity transform for EXT_TX and rectangular transforms for RECT_TX into the current hbd transform codepath and facilitate the removal of obsolete transforms. This has no impact on performance. BUG=aomedia:524 Change-Id: I1e217bcd217fd637b1df94fae62d9c59a0523c1a
2017-05-15 20:49:22 -07:00 · 2017-05-15 20:49:22 -07:00 · eec47e65bb
--- a/av1/av1.cmake
+++ b/av1/av1.cmake
@ -16,11 +16,11 @@ set(AOM_AV1_COMMON_SOURCES
    "${AOM_ROOT}/av1/common/av1_fwd_txfm1d.c"
    "${AOM_ROOT}/av1/common/av1_fwd_txfm1d.h"
    "${AOM_ROOT}/av1/common/av1_fwd_txfm2d.c"
-    "${AOM_ROOT}/av1/common/av1_fwd_txfm2d_cfg.h"
+    "${AOM_ROOT}/av1/common/av1_fwd_txfm1d_cfg.h"
    "${AOM_ROOT}/av1/common/av1_inv_txfm1d.c"
    "${AOM_ROOT}/av1/common/av1_inv_txfm1d.h"
    "${AOM_ROOT}/av1/common/av1_inv_txfm2d.c"
-    "${AOM_ROOT}/av1/common/av1_inv_txfm2d_cfg.h"
+    "${AOM_ROOT}/av1/common/av1_inv_txfm1d_cfg.h"
    "${AOM_ROOT}/av1/common/av1_loopfilter.c"
    "${AOM_ROOT}/av1/common/av1_loopfilter.h"
    "${AOM_ROOT}/av1/common/av1_txfm.h"
--- a/av1/av1_common.mk
+++ b/av1/av1_common.mk
@ -69,9 +69,9 @@ AV1_COMMON_SRCS-yes += common/av1_fwd_txfm1d.c
 AV1_COMMON_SRCS-yes += common/av1_inv_txfm1d.h
 AV1_COMMON_SRCS-yes += common/av1_inv_txfm1d.c
 AV1_COMMON_SRCS-yes += common/av1_fwd_txfm2d.c
-AV1_COMMON_SRCS-yes += common/av1_fwd_txfm2d_cfg.h
+AV1_COMMON_SRCS-yes += common/av1_fwd_txfm1d_cfg.h
 AV1_COMMON_SRCS-yes += common/av1_inv_txfm2d.c
-AV1_COMMON_SRCS-yes += common/av1_inv_txfm2d_cfg.h
+AV1_COMMON_SRCS-yes += common/av1_inv_txfm1d_cfg.h
 AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/av1_convolve_ssse3.c
 ifeq ($(CONFIG_HIGHBITDEPTH),yes)
 AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_highbd_convolve_sse4.c
--- a/av1/common/av1_fwd_txfm1d_cfg.h
+++ b/av1/common/av1_fwd_txfm1d_cfg.h
@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AV1_FWD_TXFM2D_CFG_H_
+#define AV1_FWD_TXFM2D_CFG_H_
+#include "av1/common/enums.h"
+#include "av1/common/av1_fwd_txfm1d.h"
+//  ---------------- 4x4 1D constants -----------------------
+// shift
+static const int8_t fwd_shift_4[3] = { 2, 0, 0 };
+
+// stage range
+static const int8_t fwd_stage_range_col_dct_4[4] = { 15, 16, 17, 17 };
+static const int8_t fwd_stage_range_row_dct_4[4] = { 17, 18, 18, 18 };
+static const int8_t fwd_stage_range_col_adst_4[6] = { 15, 15, 16, 17, 17, 17 };
+static const int8_t fwd_stage_range_row_adst_4[6] = { 17, 17, 17, 18, 18, 18 };
+// cos bit
+static const int8_t fwd_cos_bit_col_dct_4[4] = { 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_dct_4[4] = { 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_col_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
+
+//  ---------------- 8x8 1D constants -----------------------
+// shift
+static const int8_t fwd_shift_8[3] = { 2, -1, 0 };
+
+// stage range
+static const int8_t fwd_stage_range_col_dct_8[6] = { 15, 16, 17, 18, 18, 18 };
+static const int8_t fwd_stage_range_row_dct_8[6] = { 17, 18, 19, 19, 19, 19 };
+static const int8_t fwd_stage_range_col_adst_8[8] = { 15, 15, 16, 17,
+                                                      17, 18, 18, 18 };
+static const int8_t fwd_stage_range_row_adst_8[8] = { 17, 17, 17, 18,
+                                                      18, 19, 19, 19 };
+
+// cos bit
+static const int8_t fwd_cos_bit_col_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_col_adst_8[8] = {
+  13, 13, 13, 13, 13, 13, 13, 13
+};
+static const int8_t fwd_cos_bit_row_adst_8[8] = {
+  13, 13, 13, 13, 13, 13, 13, 13
+};
+
+//  ---------------- 16x16 1D constants -----------------------
+// shift
+static const int8_t fwd_shift_16[3] = { 2, -2, 0 };
+
+// stage range
+static const int8_t fwd_stage_range_col_dct_16[8] = { 15, 16, 17, 18,
+                                                      19, 19, 19, 19 };
+static const int8_t fwd_stage_range_row_dct_16[8] = { 17, 18, 19, 20,
+                                                      20, 20, 20, 20 };
+static const int8_t fwd_stage_range_col_adst_16[10] = { 15, 15, 16, 17, 17,
+                                                        18, 18, 19, 19, 19 };
+static const int8_t fwd_stage_range_row_adst_16[10] = { 17, 17, 17, 18, 18,
+                                                        19, 19, 20, 20, 20 };
+
+// cos bit
+static const int8_t fwd_cos_bit_col_dct_16[8] = {
+  13, 13, 13, 13, 13, 13, 13, 13
+};
+static const int8_t fwd_cos_bit_row_dct_16[8] = {
+  12, 12, 12, 12, 12, 12, 12, 12
+};
+static const int8_t fwd_cos_bit_col_adst_16[10] = { 13, 13, 13, 13, 13,
+                                                    13, 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_adst_16[10] = { 12, 12, 12, 12, 12,
+                                                    12, 12, 12, 12, 12 };
+
+//  ---------------- 32x32 1D constants -----------------------
+// shift
+static const int8_t fwd_shift_32[3] = { 2, -4, 0 };
+
+// stage range
+static const int8_t fwd_stage_range_col_dct_32[10] = { 15, 16, 17, 18, 19,
+                                                       20, 20, 20, 20, 20 };
+static const int8_t fwd_stage_range_row_dct_32[10] = { 16, 17, 18, 19, 20,
+                                                       20, 20, 20, 20, 20 };
+static const int8_t fwd_stage_range_col_adst_32[12] = {
+  15, 15, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
+};
+static const int8_t fwd_stage_range_row_adst_32[12] = {
+  16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
+};
+
+// cos bit
+static const int8_t fwd_cos_bit_col_dct_32[10] = { 12, 12, 12, 12, 12,
+                                                   12, 12, 12, 12, 12 };
+static const int8_t fwd_cos_bit_row_dct_32[10] = { 12, 12, 12, 12, 12,
+                                                   12, 12, 12, 12, 12 };
+static const int8_t fwd_cos_bit_col_adst_32[12] = { 12, 12, 12, 12, 12, 12,
+                                                    12, 12, 12, 12, 12, 12 };
+static const int8_t fwd_cos_bit_row_adst_32[12] = { 12, 12, 12, 12, 12, 12,
+                                                    12, 12, 12, 12, 12, 12 };
+
+//  ---------------- 64x64 1D constants -----------------------
+// shift
+static const int8_t fwd_shift_64[3] = { 0, -2, -2 };
+
+// stage range
+static const int8_t fwd_stage_range_col_dct_64[12] = { 13, 14, 15, 16, 17, 18,
+                                                       19, 19, 19, 19, 19, 19 };
+static const int8_t fwd_stage_range_row_dct_64[12] = { 17, 18, 19, 20, 21, 22,
+                                                       22, 22, 22, 22, 22, 22 };
+
+// cos bit
+static const int8_t fwd_cos_bit_col_dct_64[12] = { 15, 15, 15, 15, 15, 14,
+                                                   13, 13, 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_dct_64[12] = { 15, 14, 13, 12, 11, 10,
+                                                   10, 10, 10, 10, 10, 10 };
+
+//  ---------------- row config fwd_dct_4 ----------------
+static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_dct_4 = {
+  4,  // .txfm_size
+  4,  // .stage_num
+  // 0,  // .log_scale
+  fwd_shift_4,                // .shift
+  fwd_stage_range_row_dct_4,  // .stage_range
+  fwd_cos_bit_row_dct_4,      // .cos_bit
+  TXFM_TYPE_DCT4              // .txfm_type
+};
+
+//  ---------------- row config fwd_dct_8 ----------------
+static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_dct_8 = {
+  8,  // .txfm_size
+  6,  // .stage_num
+  // 0,  // .log_scale
+  fwd_shift_8,                // .shift
+  fwd_stage_range_row_dct_8,  // .stage_range
+  fwd_cos_bit_row_dct_8,      // .cos_bit_
+  TXFM_TYPE_DCT8              // .txfm_type
+};
+//  ---------------- row config fwd_dct_16 ----------------
+static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_dct_16 = {
+  16,  // .txfm_size
+  8,   // .stage_num
+  // 0,  // .log_scale
+  fwd_shift_16,                // .shift
+  fwd_stage_range_row_dct_16,  // .stage_range
+  fwd_cos_bit_row_dct_16,      // .cos_bit
+  TXFM_TYPE_DCT16              // .txfm_type
+};
+
+//  ---------------- row config fwd_dct_32 ----------------
+static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_dct_32 = {
+  32,  // .txfm_size
+  10,  // .stage_num
+  // 1,  // .log_scale
+  fwd_shift_32,                // .shift
+  fwd_stage_range_row_dct_32,  // .stage_range
+  fwd_cos_bit_row_dct_32,      // .cos_bit_row
+  TXFM_TYPE_DCT32              // .txfm_type
+};
+
+//  ---------------- row config fwd_dct_64 ----------------
+static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_dct_64 = {
+  64,                          // .txfm_size
+  12,                          // .stage_num
+  fwd_shift_64,                // .shift
+  fwd_stage_range_row_dct_64,  // .stage_range
+  fwd_cos_bit_row_dct_64,      // .cos_bit
+  TXFM_TYPE_DCT64,             // .txfm_type_col
+};
+
+//  ---------------- row config fwd_adst_4 ----------------
+static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_adst_4 = {
+  4,  // .txfm_size
+  6,  // .stage_num
+  // 0,  // .log_scale
+  fwd_shift_4,                 // .shift
+  fwd_stage_range_row_adst_4,  // .stage_range
+  fwd_cos_bit_row_adst_4,      // .cos_bit
+  TXFM_TYPE_ADST4,             // .txfm_type
+};
+
+//  ---------------- row config fwd_adst_8 ----------------
+static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_adst_8 = {
+  8,  // .txfm_size
+  8,  // .stage_num
+  // 0,  // .log_scale
+  fwd_shift_8,                 // .shift
+  fwd_stage_range_row_adst_8,  // .stage_range
+  fwd_cos_bit_row_adst_8,      // .cos_bit
+  TXFM_TYPE_ADST8,             // .txfm_type_col
+};
+
+//  ---------------- row config fwd_adst_16 ----------------
+static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_adst_16 = {
+  16,  // .txfm_size
+  10,  // .stage_num
+  // 0,  // .log_scale
+  fwd_shift_16,                 // .shift
+  fwd_stage_range_row_adst_16,  // .stage_range
+  fwd_cos_bit_row_adst_16,      // .cos_bit
+  TXFM_TYPE_ADST16,             // .txfm_type
+};
+
+//  ---------------- row config fwd_adst_32 ----------------
+static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_adst_32 = {
+  32,  // .txfm_size
+  12,  // .stage_num
+  // 1,  // .log_scale
+  fwd_shift_32,                 // .shift
+  fwd_stage_range_row_adst_32,  // .stage_range
+  fwd_cos_bit_row_adst_32,      // .cos_bit
+  TXFM_TYPE_ADST32,             // .txfm_type
+};
+
+//  ---------------- col config fwd_dct_4 ----------------
+static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_dct_4 = {
+  4,  // .txfm_size
+  4,  // .stage_num
+  // 0,  // .log_scale
+  fwd_shift_4,                // .shift
+  fwd_stage_range_col_dct_4,  // .stage_range
+  fwd_cos_bit_col_dct_4,      // .cos_bit
+  TXFM_TYPE_DCT4              // .txfm_type
+};
+
+//  ---------------- col config fwd_dct_8 ----------------
+static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_dct_8 = {
+  8,  // .txfm_size
+  6,  // .stage_num
+  // 0,  // .log_scale
+  fwd_shift_8,                // .shift
+  fwd_stage_range_col_dct_8,  // .stage_range
+  fwd_cos_bit_col_dct_8,      // .cos_bit_
+  TXFM_TYPE_DCT8              // .txfm_type
+};
+//  ---------------- col config fwd_dct_16 ----------------
+static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_dct_16 = {
+  16,  // .txfm_size
+  8,   // .stage_num
+  // 0,  // .log_scale
+  fwd_shift_16,                // .shift
+  fwd_stage_range_col_dct_16,  // .stage_range
+  fwd_cos_bit_col_dct_16,      // .cos_bit
+  TXFM_TYPE_DCT16              // .txfm_type
+};
+
+//  ---------------- col config fwd_dct_32 ----------------
+static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_dct_32 = {
+  32,  // .txfm_size
+  10,  // .stage_num
+  // 1,  // .log_scale
+  fwd_shift_32,                // .shift
+  fwd_stage_range_col_dct_32,  // .stage_range
+  fwd_cos_bit_col_dct_32,      // .cos_bit_col
+  TXFM_TYPE_DCT32              // .txfm_type
+};
+
+//  ---------------- col config fwd_dct_64 ----------------
+static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_dct_64 = {
+  64,                          // .txfm_size
+  12,                          // .stage_num
+  fwd_shift_64,                // .shift
+  fwd_stage_range_col_dct_64,  // .stage_range
+  fwd_cos_bit_col_dct_64,      // .cos_bit
+  TXFM_TYPE_DCT64,             // .txfm_type_col
+};
+
+//  ---------------- col config fwd_adst_4 ----------------
+static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_adst_4 = {
+  4,  // .txfm_size
+  6,  // .stage_num
+  // 0,  // .log_scale
+  fwd_shift_4,                 // .shift
+  fwd_stage_range_col_adst_4,  // .stage_range
+  fwd_cos_bit_col_adst_4,      // .cos_bit
+  TXFM_TYPE_ADST4,             // .txfm_type
+};
+
+//  ---------------- col config fwd_adst_8 ----------------
+static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_adst_8 = {
+  8,  // .txfm_size
+  8,  // .stage_num
+  // 0,  // .log_scale
+  fwd_shift_8,                 // .shift
+  fwd_stage_range_col_adst_8,  // .stage_range
+  fwd_cos_bit_col_adst_8,      // .cos_bit
+  TXFM_TYPE_ADST8,             // .txfm_type_col
+};
+
+//  ---------------- col config fwd_adst_16 ----------------
+static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_adst_16 = {
+  16,  // .txfm_size
+  10,  // .stage_num
+  // 0,  // .log_scale
+  fwd_shift_16,                 // .shift
+  fwd_stage_range_col_adst_16,  // .stage_range
+  fwd_cos_bit_col_adst_16,      // .cos_bit
+  TXFM_TYPE_ADST16,             // .txfm_type
+};
+
+//  ---------------- col config fwd_adst_32 ----------------
+static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_adst_32 = {
+  32,  // .txfm_size
+  12,  // .stage_num
+  // 1,  // .log_scale
+  fwd_shift_32,                 // .shift
+  fwd_stage_range_col_adst_32,  // .stage_range
+  fwd_cos_bit_col_adst_32,      // .cos_bit
+  TXFM_TYPE_ADST32,             // .txfm_type
+};
+#endif  // AV1_FWD_TXFM2D_CFG_H_
--- a/av1/common/av1_fwd_txfm2d.c
+++ b/av1/common/av1_fwd_txfm2d.c
@ -14,7 +14,7 @@
 #include "./av1_rtcd.h"
 #include "av1/common/enums.h"
 #include "av1/common/av1_fwd_txfm1d.h"
-#include "av1/common/av1_fwd_txfm2d_cfg.h"
+#include "av1/common/av1_fwd_txfm1d_cfg.h"
 #include "av1/common/av1_txfm.h"

 static INLINE TxfmFunc fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
@ -35,14 +35,15 @@ static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
                                const int stride, const TXFM_2D_FLIP_CFG *cfg,
                                int32_t *buf) {
  int c, r;
-  const int txfm_size = cfg->cfg->txfm_size;
-  const int8_t *shift = cfg->cfg->shift;
-  const int8_t *stage_range_col = cfg->cfg->stage_range_col;
-  const int8_t *stage_range_row = cfg->cfg->stage_range_row;
-  const int8_t *cos_bit_col = cfg->cfg->cos_bit_col;
-  const int8_t *cos_bit_row = cfg->cfg->cos_bit_row;
-  const TxfmFunc txfm_func_col = fwd_txfm_type_to_func(cfg->cfg->txfm_type_col);
-  const TxfmFunc txfm_func_row = fwd_txfm_type_to_func(cfg->cfg->txfm_type_row);
+  // TODO(sarahparker) must correct for rectangular transforms in follow up
+  const int txfm_size = cfg->row_cfg->txfm_size;
+  const int8_t *shift = cfg->row_cfg->shift;
+  const int8_t *stage_range_col = cfg->col_cfg->stage_range;
+  const int8_t *stage_range_row = cfg->row_cfg->stage_range;
+  const int8_t *cos_bit_col = cfg->col_cfg->cos_bit;
+  const int8_t *cos_bit_row = cfg->row_cfg->cos_bit;
+  const TxfmFunc txfm_func_col = fwd_txfm_type_to_func(cfg->col_cfg->txfm_type);
+  const TxfmFunc txfm_func_row = fwd_txfm_type_to_func(cfg->row_cfg->txfm_type);

  // use output buffer as temp buffer
  int32_t *temp_in = output;
@ -117,69 +118,79 @@ void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride,
  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
 }

-static const TXFM_2D_CFG *fwd_txfm_cfg_ls[TX_TYPES][TX_SIZES] = {
+static const TXFM_1D_CFG *fwd_txfm_col_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
+  // DCT
  {
 #if CONFIG_CB4X4
      NULL,
 #endif
-      &fwd_txfm_2d_cfg_dct_dct_4, &fwd_txfm_2d_cfg_dct_dct_8,
-      &fwd_txfm_2d_cfg_dct_dct_16, &fwd_txfm_2d_cfg_dct_dct_32 },
+      &fwd_txfm_1d_col_cfg_dct_4, &fwd_txfm_1d_col_cfg_dct_8,
+      &fwd_txfm_1d_col_cfg_dct_16, &fwd_txfm_1d_col_cfg_dct_32 },
+  // ADST
  {
 #if CONFIG_CB4X4
      NULL,
 #endif
-      &fwd_txfm_2d_cfg_adst_dct_4, &fwd_txfm_2d_cfg_adst_dct_8,
-      &fwd_txfm_2d_cfg_adst_dct_16, &fwd_txfm_2d_cfg_adst_dct_32 },
-  {
-#if CONFIG_CB4X4
-      NULL,
-#endif
-      &fwd_txfm_2d_cfg_dct_adst_4, &fwd_txfm_2d_cfg_dct_adst_8,
-      &fwd_txfm_2d_cfg_dct_adst_16, &fwd_txfm_2d_cfg_dct_adst_32 },
-  {
-#if CONFIG_CB4X4
-      NULL,
-#endif
-      &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
-      &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
+      &fwd_txfm_1d_col_cfg_adst_4, &fwd_txfm_1d_col_cfg_adst_8,
+      &fwd_txfm_1d_col_cfg_adst_16, &fwd_txfm_1d_col_cfg_adst_32 },
 #if CONFIG_EXT_TX
+  // FLIPADST
  {
 #if CONFIG_CB4X4
      NULL,
 #endif
-      &fwd_txfm_2d_cfg_adst_dct_4, &fwd_txfm_2d_cfg_adst_dct_8,
-      &fwd_txfm_2d_cfg_adst_dct_16, &fwd_txfm_2d_cfg_adst_dct_32 },
+      &fwd_txfm_1d_col_cfg_adst_4, &fwd_txfm_1d_col_cfg_adst_8,
+      &fwd_txfm_1d_col_cfg_adst_16, &fwd_txfm_1d_col_cfg_adst_32 },
+  // IDENTITY PLACEHOLDER
  {
 #if CONFIG_CB4X4
      NULL,
 #endif
-      &fwd_txfm_2d_cfg_dct_adst_4, &fwd_txfm_2d_cfg_dct_adst_8,
-      &fwd_txfm_2d_cfg_dct_adst_16, &fwd_txfm_2d_cfg_dct_adst_32 },
+      &fwd_txfm_1d_col_cfg_adst_4, &fwd_txfm_1d_col_cfg_adst_8,
+      &fwd_txfm_1d_col_cfg_adst_16, &fwd_txfm_1d_col_cfg_adst_32 },
+#endif  // CONFIG_EXT_TX
+};
+
+static const TXFM_1D_CFG *fwd_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
+  // DCT
  {
 #if CONFIG_CB4X4
      NULL,
 #endif
-      &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
-      &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
+      &fwd_txfm_1d_row_cfg_dct_4, &fwd_txfm_1d_row_cfg_dct_8,
+      &fwd_txfm_1d_row_cfg_dct_16, &fwd_txfm_1d_row_cfg_dct_32 },
+  // ADST
  {
 #if CONFIG_CB4X4
      NULL,
 #endif
-      &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
-      &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
+      &fwd_txfm_1d_row_cfg_adst_4, &fwd_txfm_1d_row_cfg_adst_8,
+      &fwd_txfm_1d_row_cfg_adst_16, &fwd_txfm_1d_row_cfg_adst_32 },
+#if CONFIG_EXT_TX
+  // FLIPADST
  {
 #if CONFIG_CB4X4
      NULL,
 #endif
-      &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
-      &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
+      &fwd_txfm_1d_row_cfg_adst_4, &fwd_txfm_1d_row_cfg_adst_8,
+      &fwd_txfm_1d_row_cfg_adst_16, &fwd_txfm_1d_row_cfg_adst_32 },
+  // IDENTITY PLACEHOLDER
+  {
+#if CONFIG_CB4X4
+      NULL,
+#endif
+      &fwd_txfm_1d_row_cfg_adst_4, &fwd_txfm_1d_row_cfg_adst_8,
+      &fwd_txfm_1d_row_cfg_adst_16, &fwd_txfm_1d_row_cfg_adst_32 },
 #endif  // CONFIG_EXT_TX
 };

 TXFM_2D_FLIP_CFG av1_get_fwd_txfm_cfg(int tx_type, int tx_size) {
  TXFM_2D_FLIP_CFG cfg;
  set_flip_cfg(tx_type, &cfg);
-  cfg.cfg = fwd_txfm_cfg_ls[tx_type][tx_size];
+  int tx_type_col = vtx_tab[tx_type];
+  int tx_type_row = htx_tab[tx_type];
+  cfg.col_cfg = fwd_txfm_col_cfg_ls[tx_type_col][tx_size];
+  cfg.row_cfg = fwd_txfm_row_cfg_ls[tx_type_row][tx_size];
  return cfg;
 }

@ -187,13 +198,11 @@ TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x64_cfg(int tx_type) {
  TXFM_2D_FLIP_CFG cfg;
  switch (tx_type) {
    case DCT_DCT:
-      cfg.cfg = &fwd_txfm_2d_cfg_dct_dct_64;
+      cfg.col_cfg = &fwd_txfm_1d_col_cfg_dct_64;
+      cfg.row_cfg = &fwd_txfm_1d_row_cfg_dct_64;
      cfg.ud_flip = 0;
      cfg.lr_flip = 0;
      break;
-    case ADST_DCT:
-    case DCT_ADST:
-    case ADST_ADST:
    default:
      cfg.ud_flip = 0;
      cfg.lr_flip = 0;
--- a/av1/common/av1_fwd_txfm2d_cfg.h
+++ b/av1/common/av1_fwd_txfm2d_cfg.h
@ -1,444 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AV1_FWD_TXFM2D_CFG_H_
-#define AV1_FWD_TXFM2D_CFG_H_
-#include "av1/common/enums.h"
-#include "av1/common/av1_fwd_txfm1d.h"
-//  ---------------- config fwd_dct_dct_4 ----------------
-static const int8_t fwd_shift_dct_dct_4[3] = { 2, 0, 0 };
-static const int8_t fwd_stage_range_col_dct_dct_4[4] = { 15, 16, 17, 17 };
-static const int8_t fwd_stage_range_row_dct_dct_4[4] = { 17, 18, 18, 18 };
-static const int8_t fwd_cos_bit_col_dct_dct_4[4] = { 13, 13, 13, 13 };
-static const int8_t fwd_cos_bit_row_dct_dct_4[4] = { 13, 13, 13, 13 };
-
-static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_4 = {
-  4,  // .txfm_size
-  4,  // .stage_num_col
-  4,  // .stage_num_row
-  // 0,  // .log_scale
-  fwd_shift_dct_dct_4,            // .shift
-  fwd_stage_range_col_dct_dct_4,  // .stage_range_col
-  fwd_stage_range_row_dct_dct_4,  // .stage_range_row
-  fwd_cos_bit_col_dct_dct_4,      // .cos_bit_col
-  fwd_cos_bit_row_dct_dct_4,      // .cos_bit_row
-  TXFM_TYPE_DCT4,                 // .txfm_type_col
-  TXFM_TYPE_DCT4
-};  // .txfm_type_row
-
-//  ---------------- config fwd_dct_dct_8 ----------------
-static const int8_t fwd_shift_dct_dct_8[3] = { 2, -1, 0 };
-static const int8_t fwd_stage_range_col_dct_dct_8[6] = {
-  15, 16, 17, 18, 18, 18
-};
-static const int8_t fwd_stage_range_row_dct_dct_8[6] = {
-  17, 18, 19, 19, 19, 19
-};
-static const int8_t fwd_cos_bit_col_dct_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
-static const int8_t fwd_cos_bit_row_dct_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
-
-static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_8 = {
-  8,  // .txfm_size
-  6,  // .stage_num_col
-  6,  // .stage_num_row
-  // 0,  // .log_scale
-  fwd_shift_dct_dct_8,            // .shift
-  fwd_stage_range_col_dct_dct_8,  // .stage_range_col
-  fwd_stage_range_row_dct_dct_8,  // .stage_range_row
-  fwd_cos_bit_col_dct_dct_8,      // .cos_bit_col
-  fwd_cos_bit_row_dct_dct_8,      // .cos_bit_row
-  TXFM_TYPE_DCT8,                 // .txfm_type_col
-  TXFM_TYPE_DCT8
-};  // .txfm_type_row
-
-//  ---------------- config fwd_dct_dct_16 ----------------
-static const int8_t fwd_shift_dct_dct_16[3] = { 2, -2, 0 };
-static const int8_t fwd_stage_range_col_dct_dct_16[8] = { 15, 16, 17, 18,
-                                                          19, 19, 19, 19 };
-static const int8_t fwd_stage_range_row_dct_dct_16[8] = { 17, 18, 19, 20,
-                                                          20, 20, 20, 20 };
-static const int8_t fwd_cos_bit_col_dct_dct_16[8] = { 13, 13, 13, 13,
-                                                      13, 13, 13, 13 };
-static const int8_t fwd_cos_bit_row_dct_dct_16[8] = { 12, 12, 12, 12,
-                                                      12, 12, 12, 12 };
-
-static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_16 = {
-  16,  // .txfm_size
-  8,   // .stage_num_col
-  8,   // .stage_num_row
-  // 0,  // .log_scale
-  fwd_shift_dct_dct_16,            // .shift
-  fwd_stage_range_col_dct_dct_16,  // .stage_range_col
-  fwd_stage_range_row_dct_dct_16,  // .stage_range_row
-  fwd_cos_bit_col_dct_dct_16,      // .cos_bit_col
-  fwd_cos_bit_row_dct_dct_16,      // .cos_bit_row
-  TXFM_TYPE_DCT16,                 // .txfm_type_col
-  TXFM_TYPE_DCT16
-};  // .txfm_type_row
-
-//  ---------------- config fwd_dct_dct_32 ----------------
-static const int8_t fwd_shift_dct_dct_32[3] = { 2, -4, 0 };
-static const int8_t fwd_stage_range_col_dct_dct_32[10] = { 15, 16, 17, 18, 19,
-                                                           20, 20, 20, 20, 20 };
-static const int8_t fwd_stage_range_row_dct_dct_32[10] = { 16, 17, 18, 19, 20,
-                                                           20, 20, 20, 20, 20 };
-static const int8_t fwd_cos_bit_col_dct_dct_32[10] = { 12, 12, 12, 12, 12,
-                                                       12, 12, 12, 12, 12 };
-static const int8_t fwd_cos_bit_row_dct_dct_32[10] = { 12, 12, 12, 12, 12,
-                                                       12, 12, 12, 12, 12 };
-
-static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_32 = {
-  32,  // .txfm_size
-  10,  // .stage_num_col
-  10,  // .stage_num_row
-  // 1,  // .log_scale
-  fwd_shift_dct_dct_32,            // .shift
-  fwd_stage_range_col_dct_dct_32,  // .stage_range_col
-  fwd_stage_range_row_dct_dct_32,  // .stage_range_row
-  fwd_cos_bit_col_dct_dct_32,      // .cos_bit_col
-  fwd_cos_bit_row_dct_dct_32,      // .cos_bit_row
-  TXFM_TYPE_DCT32,                 // .txfm_type_col
-  TXFM_TYPE_DCT32
-};  // .txfm_type_row
-
-//  ---------------- config fwd_dct_dct_64 ----------------
-static const int8_t fwd_shift_dct_dct_64[3] = { 0, -2, -2 };
-static const int8_t fwd_stage_range_col_dct_dct_64[12] = {
-  13, 14, 15, 16, 17, 18, 19, 19, 19, 19, 19, 19
-};
-static const int8_t fwd_stage_range_row_dct_dct_64[12] = {
-  17, 18, 19, 20, 21, 22, 22, 22, 22, 22, 22, 22
-};
-static const int8_t fwd_cos_bit_col_dct_dct_64[12] = { 15, 15, 15, 15, 15, 14,
-                                                       13, 13, 13, 13, 13, 13 };
-static const int8_t fwd_cos_bit_row_dct_dct_64[12] = { 15, 14, 13, 12, 11, 10,
-                                                       10, 10, 10, 10, 10, 10 };
-
-static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_64 = {
-  64,                              // .txfm_size
-  12,                              // .stage_num_col
-  12,                              // .stage_num_row
-  fwd_shift_dct_dct_64,            // .shift
-  fwd_stage_range_col_dct_dct_64,  // .stage_range_col
-  fwd_stage_range_row_dct_dct_64,  // .stage_range_row
-  fwd_cos_bit_col_dct_dct_64,      // .cos_bit_col
-  fwd_cos_bit_row_dct_dct_64,      // .cos_bit_row
-  TXFM_TYPE_DCT64,                 // .txfm_type_col
-  TXFM_TYPE_DCT64
-};  // .txfm_type_row
-
-//  ---------------- config fwd_dct_adst_4 ----------------
-static const int8_t fwd_shift_dct_adst_4[3] = { 2, 0, 0 };
-static const int8_t fwd_stage_range_col_dct_adst_4[4] = { 15, 16, 17, 17 };
-static const int8_t fwd_stage_range_row_dct_adst_4[6] = {
-  17, 17, 17, 18, 18, 18
-};
-static const int8_t fwd_cos_bit_col_dct_adst_4[4] = { 13, 13, 13, 13 };
-static const int8_t fwd_cos_bit_row_dct_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
-
-static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_4 = {
-  4,  // .txfm_size
-  4,  // .stage_num_col
-  6,  // .stage_num_row
-  // 0,  // .log_scale
-  fwd_shift_dct_adst_4,            // .shift
-  fwd_stage_range_col_dct_adst_4,  // .stage_range_col
-  fwd_stage_range_row_dct_adst_4,  // .stage_range_row
-  fwd_cos_bit_col_dct_adst_4,      // .cos_bit_col
-  fwd_cos_bit_row_dct_adst_4,      // .cos_bit_row
-  TXFM_TYPE_DCT4,                  // .txfm_type_col
-  TXFM_TYPE_ADST4
-};  // .txfm_type_row
-
-//  ---------------- config fwd_dct_adst_8 ----------------
-static const int8_t fwd_shift_dct_adst_8[3] = { 2, -1, 0 };
-static const int8_t fwd_stage_range_col_dct_adst_8[6] = {
-  15, 16, 17, 18, 18, 18
-};
-static const int8_t fwd_stage_range_row_dct_adst_8[8] = { 17, 17, 17, 18,
-                                                          18, 19, 19, 19 };
-static const int8_t fwd_cos_bit_col_dct_adst_8[6] = { 13, 13, 13, 13, 13, 13 };
-static const int8_t fwd_cos_bit_row_dct_adst_8[8] = { 13, 13, 13, 13,
-                                                      13, 13, 13, 13 };
-
-static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_8 = {
-  8,  // .txfm_size
-  6,  // .stage_num_col
-  8,  // .stage_num_row
-  // 0,  // .log_scale
-  fwd_shift_dct_adst_8,            // .shift
-  fwd_stage_range_col_dct_adst_8,  // .stage_range_col
-  fwd_stage_range_row_dct_adst_8,  // .stage_range_row
-  fwd_cos_bit_col_dct_adst_8,      // .cos_bit_col
-  fwd_cos_bit_row_dct_adst_8,      // .cos_bit_row
-  TXFM_TYPE_DCT8,                  // .txfm_type_col
-  TXFM_TYPE_ADST8
-};  // .txfm_type_row
-
-//  ---------------- config fwd_dct_adst_16 ----------------
-static const int8_t fwd_shift_dct_adst_16[3] = { 2, -2, 0 };
-static const int8_t fwd_stage_range_col_dct_adst_16[8] = { 15, 16, 17, 18,
-                                                           19, 19, 19, 19 };
-static const int8_t fwd_stage_range_row_dct_adst_16[10] = {
-  17, 17, 17, 18, 18, 19, 19, 20, 20, 20
-};
-static const int8_t fwd_cos_bit_col_dct_adst_16[8] = { 13, 13, 13, 13,
-                                                       13, 13, 13, 13 };
-static const int8_t fwd_cos_bit_row_dct_adst_16[10] = { 12, 12, 12, 12, 12,
-                                                        12, 12, 12, 12, 12 };
-
-static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_16 = {
-  16,  // .txfm_size
-  8,   // .stage_num_col
-  10,  // .stage_num_row
-  // 0,  // .log_scale
-  fwd_shift_dct_adst_16,            // .shift
-  fwd_stage_range_col_dct_adst_16,  // .stage_range_col
-  fwd_stage_range_row_dct_adst_16,  // .stage_range_row
-  fwd_cos_bit_col_dct_adst_16,      // .cos_bit_col
-  fwd_cos_bit_row_dct_adst_16,      // .cos_bit_row
-  TXFM_TYPE_DCT16,                  // .txfm_type_col
-  TXFM_TYPE_ADST16
-};  // .txfm_type_row
-
-//  ---------------- config fwd_dct_adst_32 ----------------
-static const int8_t fwd_shift_dct_adst_32[3] = { 2, -4, 0 };
-static const int8_t fwd_stage_range_col_dct_adst_32[10] = {
-  15, 16, 17, 18, 19, 20, 20, 20, 20, 20
-};
-static const int8_t fwd_stage_range_row_dct_adst_32[12] = {
-  16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
-};
-static const int8_t fwd_cos_bit_col_dct_adst_32[10] = { 12, 12, 12, 12, 12,
-                                                        12, 12, 12, 12, 12 };
-static const int8_t fwd_cos_bit_row_dct_adst_32[12] = {
-  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
-};
-
-static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_32 = {
-  32,  // .txfm_size
-  10,  // .stage_num_col
-  12,  // .stage_num_row
-  // 1,  // .log_scale
-  fwd_shift_dct_adst_32,            // .shift
-  fwd_stage_range_col_dct_adst_32,  // .stage_range_col
-  fwd_stage_range_row_dct_adst_32,  // .stage_range_row
-  fwd_cos_bit_col_dct_adst_32,      // .cos_bit_col
-  fwd_cos_bit_row_dct_adst_32,      // .cos_bit_row
-  TXFM_TYPE_DCT32,                  // .txfm_type_col
-  TXFM_TYPE_ADST32
-};  // .txfm_type_row
-//  ---------------- config fwd_adst_adst_4 ----------------
-static const int8_t fwd_shift_adst_adst_4[3] = { 2, 0, 0 };
-static const int8_t fwd_stage_range_col_adst_adst_4[6] = { 15, 15, 16,
-                                                           17, 17, 17 };
-static const int8_t fwd_stage_range_row_adst_adst_4[6] = { 17, 17, 17,
-                                                           18, 18, 18 };
-static const int8_t fwd_cos_bit_col_adst_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
-static const int8_t fwd_cos_bit_row_adst_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
-
-static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_4 = {
-  4,  // .txfm_size
-  6,  // .stage_num_col
-  6,  // .stage_num_row
-  // 0,  // .log_scale
-  fwd_shift_adst_adst_4,            // .shift
-  fwd_stage_range_col_adst_adst_4,  // .stage_range_col
-  fwd_stage_range_row_adst_adst_4,  // .stage_range_row
-  fwd_cos_bit_col_adst_adst_4,      // .cos_bit_col
-  fwd_cos_bit_row_adst_adst_4,      // .cos_bit_row
-  TXFM_TYPE_ADST4,                  // .txfm_type_col
-  TXFM_TYPE_ADST4
-};  // .txfm_type_row
-
-//  ---------------- config fwd_adst_adst_8 ----------------
-static const int8_t fwd_shift_adst_adst_8[3] = { 2, -1, 0 };
-static const int8_t fwd_stage_range_col_adst_adst_8[8] = { 15, 15, 16, 17,
-                                                           17, 18, 18, 18 };
-static const int8_t fwd_stage_range_row_adst_adst_8[8] = { 17, 17, 17, 18,
-                                                           18, 19, 19, 19 };
-static const int8_t fwd_cos_bit_col_adst_adst_8[8] = { 13, 13, 13, 13,
-                                                       13, 13, 13, 13 };
-static const int8_t fwd_cos_bit_row_adst_adst_8[8] = { 13, 13, 13, 13,
-                                                       13, 13, 13, 13 };
-
-static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_8 = {
-  8,  // .txfm_size
-  8,  // .stage_num_col
-  8,  // .stage_num_row
-  // 0,  // .log_scale
-  fwd_shift_adst_adst_8,            // .shift
-  fwd_stage_range_col_adst_adst_8,  // .stage_range_col
-  fwd_stage_range_row_adst_adst_8,  // .stage_range_row
-  fwd_cos_bit_col_adst_adst_8,      // .cos_bit_col
-  fwd_cos_bit_row_adst_adst_8,      // .cos_bit_row
-  TXFM_TYPE_ADST8,                  // .txfm_type_col
-  TXFM_TYPE_ADST8
-};  // .txfm_type_row
-
-//  ---------------- config fwd_adst_adst_16 ----------------
-static const int8_t fwd_shift_adst_adst_16[3] = { 2, -2, 0 };
-static const int8_t fwd_stage_range_col_adst_adst_16[10] = {
-  15, 15, 16, 17, 17, 18, 18, 19, 19, 19
-};
-static const int8_t fwd_stage_range_row_adst_adst_16[10] = {
-  17, 17, 17, 18, 18, 19, 19, 20, 20, 20
-};
-static const int8_t fwd_cos_bit_col_adst_adst_16[10] = { 13, 13, 13, 13, 13,
-                                                         13, 13, 13, 13, 13 };
-static const int8_t fwd_cos_bit_row_adst_adst_16[10] = { 12, 12, 12, 12, 12,
-                                                         12, 12, 12, 12, 12 };
-
-static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_16 = {
-  16,  // .txfm_size
-  10,  // .stage_num_col
-  10,  // .stage_num_row
-  // 0,  // .log_scale
-  fwd_shift_adst_adst_16,            // .shift
-  fwd_stage_range_col_adst_adst_16,  // .stage_range_col
-  fwd_stage_range_row_adst_adst_16,  // .stage_range_row
-  fwd_cos_bit_col_adst_adst_16,      // .cos_bit_col
-  fwd_cos_bit_row_adst_adst_16,      // .cos_bit_row
-  TXFM_TYPE_ADST16,                  // .txfm_type_col
-  TXFM_TYPE_ADST16
-};  // .txfm_type_row
-
-//  ---------------- config fwd_adst_adst_32 ----------------
-static const int8_t fwd_shift_adst_adst_32[3] = { 2, -4, 0 };
-static const int8_t fwd_stage_range_col_adst_adst_32[12] = {
-  15, 15, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
-};
-static const int8_t fwd_stage_range_row_adst_adst_32[12] = {
-  16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
-};
-static const int8_t fwd_cos_bit_col_adst_adst_32[12] = {
-  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
-};
-static const int8_t fwd_cos_bit_row_adst_adst_32[12] = {
-  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
-};
-
-static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_32 = {
-  32,  // .txfm_size
-  12,  // .stage_num_col
-  12,  // .stage_num_row
-  // 1,  // .log_scale
-  fwd_shift_adst_adst_32,            // .shift
-  fwd_stage_range_col_adst_adst_32,  // .stage_range_col
-  fwd_stage_range_row_adst_adst_32,  // .stage_range_row
-  fwd_cos_bit_col_adst_adst_32,      // .cos_bit_col
-  fwd_cos_bit_row_adst_adst_32,      // .cos_bit_row
-  TXFM_TYPE_ADST32,                  // .txfm_type_col
-  TXFM_TYPE_ADST32
-};  // .txfm_type_row
-
-//  ---------------- config fwd_adst_dct_4 ----------------
-static const int8_t fwd_shift_adst_dct_4[3] = { 2, 0, 0 };
-static const int8_t fwd_stage_range_col_adst_dct_4[6] = {
-  15, 15, 16, 17, 17, 17
-};
-static const int8_t fwd_stage_range_row_adst_dct_4[4] = { 17, 18, 18, 18 };
-static const int8_t fwd_cos_bit_col_adst_dct_4[6] = { 13, 13, 13, 13, 13, 13 };
-static const int8_t fwd_cos_bit_row_adst_dct_4[4] = { 13, 13, 13, 13 };
-
-static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_4 = {
-  4,  // .txfm_size
-  6,  // .stage_num_col
-  4,  // .stage_num_row
-  // 0,  // .log_scale
-  fwd_shift_adst_dct_4,            // .shift
-  fwd_stage_range_col_adst_dct_4,  // .stage_range_col
-  fwd_stage_range_row_adst_dct_4,  // .stage_range_row
-  fwd_cos_bit_col_adst_dct_4,      // .cos_bit_col
-  fwd_cos_bit_row_adst_dct_4,      // .cos_bit_row
-  TXFM_TYPE_ADST4,                 // .txfm_type_col
-  TXFM_TYPE_DCT4
-};  // .txfm_type_row
-
-//  ---------------- config fwd_adst_dct_8 ----------------
-static const int8_t fwd_shift_adst_dct_8[3] = { 2, -1, 0 };
-static const int8_t fwd_stage_range_col_adst_dct_8[8] = { 15, 15, 16, 17,
-                                                          17, 18, 18, 18 };
-static const int8_t fwd_stage_range_row_adst_dct_8[6] = {
-  17, 18, 19, 19, 19, 19
-};
-static const int8_t fwd_cos_bit_col_adst_dct_8[8] = { 13, 13, 13, 13,
-                                                      13, 13, 13, 13 };
-static const int8_t fwd_cos_bit_row_adst_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
-
-static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_8 = {
-  8,  // .txfm_size
-  8,  // .stage_num_col
-  6,  // .stage_num_row
-  // 0,  // .log_scale
-  fwd_shift_adst_dct_8,            // .shift
-  fwd_stage_range_col_adst_dct_8,  // .stage_range_col
-  fwd_stage_range_row_adst_dct_8,  // .stage_range_row
-  fwd_cos_bit_col_adst_dct_8,      // .cos_bit_col
-  fwd_cos_bit_row_adst_dct_8,      // .cos_bit_row
-  TXFM_TYPE_ADST8,                 // .txfm_type_col
-  TXFM_TYPE_DCT8
-};  // .txfm_type_row
-
-//  ---------------- config fwd_adst_dct_16 ----------------
-static const int8_t fwd_shift_adst_dct_16[3] = { 2, -2, 0 };
-static const int8_t fwd_stage_range_col_adst_dct_16[10] = {
-  15, 15, 16, 17, 17, 18, 18, 19, 19, 19
-};
-static const int8_t fwd_stage_range_row_adst_dct_16[8] = { 17, 18, 19, 20,
-                                                           20, 20, 20, 20 };
-static const int8_t fwd_cos_bit_col_adst_dct_16[10] = { 13, 13, 13, 13, 13,
-                                                        13, 13, 13, 13, 13 };
-static const int8_t fwd_cos_bit_row_adst_dct_16[8] = { 12, 12, 12, 12,
-                                                       12, 12, 12, 12 };
-
-static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_16 = {
-  16,  // .txfm_size
-  10,  // .stage_num_col
-  8,   // .stage_num_row
-  // 0,  // .log_scale
-  fwd_shift_adst_dct_16,            // .shift
-  fwd_stage_range_col_adst_dct_16,  // .stage_range_col
-  fwd_stage_range_row_adst_dct_16,  // .stage_range_row
-  fwd_cos_bit_col_adst_dct_16,      // .cos_bit_col
-  fwd_cos_bit_row_adst_dct_16,      // .cos_bit_row
-  TXFM_TYPE_ADST16,                 // .txfm_type_col
-  TXFM_TYPE_DCT16
-};  // .txfm_type_row
-
-//  ---------------- config fwd_adst_dct_32 ----------------
-static const int8_t fwd_shift_adst_dct_32[3] = { 2, -4, 0 };
-static const int8_t fwd_stage_range_col_adst_dct_32[12] = {
-  15, 15, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
-};
-static const int8_t fwd_stage_range_row_adst_dct_32[10] = {
-  16, 17, 18, 19, 20, 20, 20, 20, 20, 20
-};
-static const int8_t fwd_cos_bit_col_adst_dct_32[12] = {
-  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
-};
-static const int8_t fwd_cos_bit_row_adst_dct_32[10] = { 12, 12, 12, 12, 12,
-                                                        12, 12, 12, 12, 12 };
-
-static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_32 = {
-  32,  // .txfm_size
-  12,  // .stage_num_col
-  10,  // .stage_num_row
-  // 1,  // .log_scale
-  fwd_shift_adst_dct_32,            // .shift
-  fwd_stage_range_col_adst_dct_32,  // .stage_range_col
-  fwd_stage_range_row_adst_dct_32,  // .stage_range_row
-  fwd_cos_bit_col_adst_dct_32,      // .cos_bit_col
-  fwd_cos_bit_row_adst_dct_32,      // .cos_bit_row
-  TXFM_TYPE_ADST32,                 // .txfm_type_col
-  TXFM_TYPE_DCT32
-};      // .txfm_type_row
-#endif  // AV1_FWD_TXFM2D_CFG_H_
--- a/av1/common/av1_inv_txfm1d_cfg.h
+++ b/av1/common/av1_inv_txfm1d_cfg.h
@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AV1_INV_TXFM2D_CFG_H_
+#define AV1_INV_TXFM2D_CFG_H_
+#include "av1/common/av1_inv_txfm1d.h"
+//  ---------------- 4x4 1D config -----------------------
+// shift
+static const int8_t inv_shift_4[2] = { 0, -4 };
+
+// stage range
+static const int8_t inv_stage_range_col_dct_4[4] = { 18, 18, 17, 17 };
+static const int8_t inv_stage_range_row_dct_4[4] = { 18, 18, 18, 18 };
+static const int8_t inv_stage_range_col_adst_4[6] = { 18, 18, 18, 18, 17, 17 };
+static const int8_t inv_stage_range_row_adst_4[6] = { 18, 18, 18, 18, 18, 18 };
+// cos bit
+static const int8_t inv_cos_bit_col_dct_4[4] = { 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_4[4] = { 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_col_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
+
+//  ---------------- 8x8 1D constants -----------------------
+// shift
+static const int8_t inv_shift_8[2] = { 0, -5 };
+
+// stage range
+static const int8_t inv_stage_range_col_dct_8[6] = { 19, 19, 19, 19, 18, 18 };
+static const int8_t inv_stage_range_row_dct_8[6] = { 19, 19, 19, 19, 19, 19 };
+static const int8_t inv_stage_range_col_adst_8[8] = { 19, 19, 19, 19,
+                                                      19, 19, 18, 18 };
+static const int8_t inv_stage_range_row_adst_8[8] = { 19, 19, 19, 19,
+                                                      19, 19, 19, 19 };
+// cos bit
+static const int8_t inv_cos_bit_col_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_col_adst_8[8] = {
+  13, 13, 13, 13, 13, 13, 13, 13
+};
+static const int8_t inv_cos_bit_row_adst_8[8] = {
+  13, 13, 13, 13, 13, 13, 13, 13
+};
+
+//  ---------------- 16x16 1D constants -----------------------
+// shift
+static const int8_t inv_shift_16[2] = { -1, -5 };
+
+// stage range
+static const int8_t inv_stage_range_col_dct_16[8] = { 19, 19, 19, 19,
+                                                      19, 19, 18, 18 };
+static const int8_t inv_stage_range_row_dct_16[8] = { 20, 20, 20, 20,
+                                                      20, 20, 20, 20 };
+static const int8_t inv_stage_range_col_adst_16[10] = { 19, 19, 19, 19, 19,
+                                                        19, 19, 19, 18, 18 };
+static const int8_t inv_stage_range_row_adst_16[10] = { 20, 20, 20, 20, 20,
+                                                        20, 20, 20, 20, 20 };
+
+// cos bit
+static const int8_t inv_cos_bit_col_dct_16[8] = {
+  13, 13, 13, 13, 13, 13, 13, 13
+};
+static const int8_t inv_cos_bit_row_dct_16[8] = {
+  12, 12, 12, 12, 12, 12, 12, 12
+};
+static const int8_t inv_cos_bit_col_adst_16[10] = { 13, 13, 13, 13, 13,
+                                                    13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_adst_16[10] = { 12, 12, 12, 12, 12,
+                                                    12, 12, 12, 12, 12 };
+
+//  ---------------- 32x32 1D constants -----------------------
+// shift
+static const int8_t inv_shift_32[2] = { -1, -5 };
+
+// stage range
+static const int8_t inv_stage_range_col_dct_32[10] = { 19, 19, 19, 19, 19,
+                                                       19, 19, 19, 18, 18 };
+static const int8_t inv_stage_range_row_dct_32[10] = { 20, 20, 20, 20, 20,
+                                                       20, 20, 20, 20, 20 };
+static const int8_t inv_stage_range_col_adst_32[12] = {
+  19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18
+};
+static const int8_t inv_stage_range_row_adst_32[12] = {
+  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+};
+
+// cos bit
+static const int8_t inv_cos_bit_col_dct_32[10] = { 13, 13, 13, 13, 13,
+                                                   13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_32[10] = { 12, 12, 12, 12, 12,
+                                                   12, 12, 12, 12, 12 };
+static const int8_t inv_cos_bit_col_adst_32[12] = { 13, 13, 13, 13, 13, 13,
+                                                    13, 13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_adst_32[12] = { 12, 12, 12, 12, 12, 12,
+                                                    12, 12, 12, 12, 12, 12 };
+
+//  ---------------- 64x64 1D constants -----------------------
+// shift
+static const int8_t inv_shift_64[2] = { -1, -7 };
+
+// stage range
+static const int8_t inv_stage_range_col_dct_64[12] = { 19, 19, 19, 19, 19, 19,
+                                                       19, 19, 19, 19, 18, 18 };
+static const int8_t inv_stage_range_row_dct_64[12] = { 20, 20, 20, 20, 20, 20,
+                                                       20, 20, 20, 20, 20, 20 };
+
+// cos bit
+static const int8_t inv_cos_bit_col_dct_64[12] = { 13, 13, 13, 13, 13, 13,
+                                                   13, 13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_64[12] = { 12, 12, 12, 12, 12, 12,
+                                                   12, 12, 12, 12, 12, 12 };
+
+//  ---------------- row config inv_dct_4 ----------------
+static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_4 = {
+  4,  // .txfm_size
+  4,  // .stage_num
+  // 0,  // .log_scale
+  inv_shift_4,                // .shift
+  inv_stage_range_row_dct_4,  // .stage_range
+  inv_cos_bit_row_dct_4,      // .cos_bit
+  TXFM_TYPE_DCT4              // .txfm_type
+};
+
+//  ---------------- row config inv_dct_8 ----------------
+static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_8 = {
+  8,  // .txfm_size
+  6,  // .stage_num
+  // 0,  // .log_scale
+  inv_shift_8,                // .shift
+  inv_stage_range_row_dct_8,  // .stage_range
+  inv_cos_bit_row_dct_8,      // .cos_bit_
+  TXFM_TYPE_DCT8              // .txfm_type
+};
+//  ---------------- row config inv_dct_16 ----------------
+static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_16 = {
+  16,  // .txfm_size
+  8,   // .stage_num
+  // 0,  // .log_scale
+  inv_shift_16,                // .shift
+  inv_stage_range_row_dct_16,  // .stage_range
+  inv_cos_bit_row_dct_16,      // .cos_bit
+  TXFM_TYPE_DCT16              // .txfm_type
+};
+
+//  ---------------- row config inv_dct_32 ----------------
+static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_32 = {
+  32,  // .txfm_size
+  10,  // .stage_num
+  // 1,  // .log_scale
+  inv_shift_32,                // .shift
+  inv_stage_range_row_dct_32,  // .stage_range
+  inv_cos_bit_row_dct_32,      // .cos_bit_row
+  TXFM_TYPE_DCT32              // .txfm_type
+};
+
+//  ---------------- row config inv_dct_64 ----------------
+static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_64 = {
+  64,                          // .txfm_size
+  12,                          // .stage_num
+  inv_shift_64,                // .shift
+  inv_stage_range_row_dct_64,  // .stage_range
+  inv_cos_bit_row_dct_64,      // .cos_bit
+  TXFM_TYPE_DCT64,             // .txfm_type_col
+};
+
+//  ---------------- row config inv_adst_4 ----------------
+static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_4 = {
+  4,  // .txfm_size
+  6,  // .stage_num
+  // 0,  // .log_scale
+  inv_shift_4,                 // .shift
+  inv_stage_range_row_adst_4,  // .stage_range
+  inv_cos_bit_row_adst_4,      // .cos_bit
+  TXFM_TYPE_ADST4,             // .txfm_type
+};
+
+//  ---------------- row config inv_adst_8 ----------------
+static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_8 = {
+  8,  // .txfm_size
+  8,  // .stage_num
+  // 0,  // .log_scale
+  inv_shift_8,                 // .shift
+  inv_stage_range_row_adst_8,  // .stage_range
+  inv_cos_bit_row_adst_8,      // .cos_bit
+  TXFM_TYPE_ADST8,             // .txfm_type_col
+};
+
+//  ---------------- row config inv_adst_16 ----------------
+static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_16 = {
+  16,  // .txfm_size
+  10,  // .stage_num
+  // 0,  // .log_scale
+  inv_shift_16,                 // .shift
+  inv_stage_range_row_adst_16,  // .stage_range
+  inv_cos_bit_row_adst_16,      // .cos_bit
+  TXFM_TYPE_ADST16,             // .txfm_type
+};
+
+//  ---------------- row config inv_adst_32 ----------------
+static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_32 = {
+  32,  // .txfm_size
+  12,  // .stage_num
+  // 1,  // .log_scale
+  inv_shift_32,                 // .shift
+  inv_stage_range_row_adst_32,  // .stage_range
+  inv_cos_bit_row_adst_32,      // .cos_bit
+  TXFM_TYPE_ADST32,             // .txfm_type
+};
+
+//  ---------------- col config inv_dct_4 ----------------
+static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_4 = {
+  4,  // .txfm_size
+  4,  // .stage_num
+  // 0,  // .log_scale
+  inv_shift_4,                // .shift
+  inv_stage_range_col_dct_4,  // .stage_range
+  inv_cos_bit_col_dct_4,      // .cos_bit
+  TXFM_TYPE_DCT4              // .txfm_type
+};
+
+//  ---------------- col config inv_dct_8 ----------------
+static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_8 = {
+  8,  // .txfm_size
+  6,  // .stage_num
+  // 0,  // .log_scale
+  inv_shift_8,                // .shift
+  inv_stage_range_col_dct_8,  // .stage_range
+  inv_cos_bit_col_dct_8,      // .cos_bit_
+  TXFM_TYPE_DCT8              // .txfm_type
+};
+//  ---------------- col config inv_dct_16 ----------------
+static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_16 = {
+  16,  // .txfm_size
+  8,   // .stage_num
+  // 0,  // .log_scale
+  inv_shift_16,                // .shift
+  inv_stage_range_col_dct_16,  // .stage_range
+  inv_cos_bit_col_dct_16,      // .cos_bit
+  TXFM_TYPE_DCT16              // .txfm_type
+};
+
+//  ---------------- col config inv_dct_32 ----------------
+static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_32 = {
+  32,  // .txfm_size
+  10,  // .stage_num
+  // 1,  // .log_scale
+  inv_shift_32,                // .shift
+  inv_stage_range_col_dct_32,  // .stage_range
+  inv_cos_bit_col_dct_32,      // .cos_bit_col
+  TXFM_TYPE_DCT32              // .txfm_type
+};
+
+//  ---------------- col config inv_dct_64 ----------------
+static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_64 = {
+  64,                          // .txfm_size
+  12,                          // .stage_num
+  inv_shift_64,                // .shift
+  inv_stage_range_col_dct_64,  // .stage_range
+  inv_cos_bit_col_dct_64,      // .cos_bit
+  TXFM_TYPE_DCT64,             // .txfm_type_col
+};
+
+//  ---------------- col config inv_adst_4 ----------------
+static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_4 = {
+  4,  // .txfm_size
+  6,  // .stage_num
+  // 0,  // .log_scale
+  inv_shift_4,                 // .shift
+  inv_stage_range_col_adst_4,  // .stage_range
+  inv_cos_bit_col_adst_4,      // .cos_bit
+  TXFM_TYPE_ADST4,             // .txfm_type
+};
+
+//  ---------------- col config inv_adst_8 ----------------
+static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_8 = {
+  8,  // .txfm_size
+  8,  // .stage_num
+  // 0,  // .log_scale
+  inv_shift_8,                 // .shift
+  inv_stage_range_col_adst_8,  // .stage_range
+  inv_cos_bit_col_adst_8,      // .cos_bit
+  TXFM_TYPE_ADST8,             // .txfm_type_col
+};
+
+//  ---------------- col config inv_adst_16 ----------------
+static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_16 = {
+  16,  // .txfm_size
+  10,  // .stage_num
+  // 0,  // .log_scale
+  inv_shift_16,                 // .shift
+  inv_stage_range_col_adst_16,  // .stage_range
+  inv_cos_bit_col_adst_16,      // .cos_bit
+  TXFM_TYPE_ADST16,             // .txfm_type
+};
+
+//  ---------------- col config inv_adst_32 ----------------
+static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_32 = {
+  32,  // .txfm_size
+  12,  // .stage_num
+  // 1,  // .log_scale
+  inv_shift_32,                 // .shift
+  inv_stage_range_col_adst_32,  // .stage_range
+  inv_cos_bit_col_adst_32,      // .cos_bit
+  TXFM_TYPE_ADST32,             // .txfm_type
+};
+#endif  // AV1_INV_TXFM2D_CFG_H_
--- a/av1/common/av1_inv_txfm2d.c
+++ b/av1/common/av1_inv_txfm2d.c
@ -13,7 +13,7 @@
 #include "av1/common/enums.h"
 #include "av1/common/av1_txfm.h"
 #include "av1/common/av1_inv_txfm1d.h"
-#include "av1/common/av1_inv_txfm2d_cfg.h"
+#include "av1/common/av1_inv_txfm1d_cfg.h"

 static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {
  switch (txfm_type) {
@ -29,77 +29,90 @@ static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {
  }
 }

-static const TXFM_2D_CFG *inv_txfm_cfg_ls[TX_TYPES][TX_SIZES] = {
+static const TXFM_1D_CFG *inv_txfm_col_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
+  // DCT
  {
 #if CONFIG_CB4X4
      NULL,
 #endif
-      &inv_txfm_2d_cfg_dct_dct_4, &inv_txfm_2d_cfg_dct_dct_8,
-      &inv_txfm_2d_cfg_dct_dct_16, &inv_txfm_2d_cfg_dct_dct_32 },
+      &inv_txfm_1d_col_cfg_dct_4, &inv_txfm_1d_col_cfg_dct_8,
+      &inv_txfm_1d_col_cfg_dct_16, &inv_txfm_1d_col_cfg_dct_32 },
+  // ADST
  {
 #if CONFIG_CB4X4
      NULL,
 #endif
-      &inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8,
-      &inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 },
-  {
-#if CONFIG_CB4X4
-      NULL,
-#endif
-      &inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8,
-      &inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 },
-  {
-#if CONFIG_CB4X4
-      NULL,
-#endif
-      &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
-      &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
+      &inv_txfm_1d_col_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_8,
+      &inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32 },
 #if CONFIG_EXT_TX
+  // FLIPADST
  {
 #if CONFIG_CB4X4
      NULL,
 #endif
-      &inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8,
-      &inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 },
+      &inv_txfm_1d_col_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_8,
+      &inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32 },
+  // IDENTITY PLACEHOLDER
  {
 #if CONFIG_CB4X4
      NULL,
 #endif
-      &inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8,
-      &inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 },
+      &inv_txfm_1d_col_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_8,
+      &inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32 },
+#endif  // CONFIG_EXT_TX
+};
+
+static const TXFM_1D_CFG *inv_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
+  // DCT
  {
 #if CONFIG_CB4X4
      NULL,
 #endif
-      &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
-      &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
+      &inv_txfm_1d_row_cfg_dct_4, &inv_txfm_1d_row_cfg_dct_8,
+      &inv_txfm_1d_row_cfg_dct_16, &inv_txfm_1d_row_cfg_dct_32 },
+  // ADST
  {
 #if CONFIG_CB4X4
      NULL,
 #endif
-      &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
-      &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
+      &inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_row_cfg_adst_8,
+      &inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32 },
+#if CONFIG_EXT_TX
+  // FLIPADST
  {
 #if CONFIG_CB4X4
      NULL,
 #endif
-      &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
-      &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
+      &inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_row_cfg_adst_8,
+      &inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32 },
+  // IDENTITY PLACEHOLDER
+  {
+#if CONFIG_CB4X4
+      NULL,
+#endif
+      &inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_row_cfg_adst_8,
+      &inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32 },
 #endif  // CONFIG_EXT_TX
 };

 TXFM_2D_FLIP_CFG av1_get_inv_txfm_cfg(int tx_type, int tx_size) {
  TXFM_2D_FLIP_CFG cfg;
  set_flip_cfg(tx_type, &cfg);
-  cfg.cfg = inv_txfm_cfg_ls[tx_type][tx_size];
+  int tx_type_col = vtx_tab[tx_type];
+  int tx_type_row = htx_tab[tx_type];
+  // TODO(sarahparker) this is currently only implemented for
+  // square transforms
+  cfg.col_cfg = inv_txfm_col_cfg_ls[tx_type_col][tx_size];
+  cfg.row_cfg = inv_txfm_row_cfg_ls[tx_type_row][tx_size];
  return cfg;
 }

 TXFM_2D_FLIP_CFG av1_get_inv_txfm_64x64_cfg(int tx_type) {
-  TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL };
+  TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL, NULL };
  switch (tx_type) {
    case DCT_DCT:
-      cfg.cfg = &inv_txfm_2d_cfg_dct_dct_64;
+      cfg.col_cfg = &inv_txfm_1d_col_cfg_dct_64;
+      cfg.row_cfg = &inv_txfm_1d_row_cfg_dct_64;
      set_flip_cfg(tx_type, &cfg);
      break;
    default: assert(0);
@ -110,14 +123,15 @@ TXFM_2D_FLIP_CFG av1_get_inv_txfm_64x64_cfg(int tx_type) {
 static INLINE void inv_txfm2d_add_c(const int32_t *input, int16_t *output,
                                    int stride, TXFM_2D_FLIP_CFG *cfg,
                                    int32_t *txfm_buf) {
-  const int txfm_size = cfg->cfg->txfm_size;
-  const int8_t *shift = cfg->cfg->shift;
-  const int8_t *stage_range_col = cfg->cfg->stage_range_col;
-  const int8_t *stage_range_row = cfg->cfg->stage_range_row;
-  const int8_t *cos_bit_col = cfg->cfg->cos_bit_col;
-  const int8_t *cos_bit_row = cfg->cfg->cos_bit_row;
-  const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->cfg->txfm_type_col);
-  const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->cfg->txfm_type_row);
+  // TODO(sarahparker) must correct for rectangular transforms in follow up
+  const int txfm_size = cfg->row_cfg->txfm_size;
+  const int8_t *shift = cfg->row_cfg->shift;
+  const int8_t *stage_range_col = cfg->col_cfg->stage_range;
+  const int8_t *stage_range_row = cfg->row_cfg->stage_range;
+  const int8_t *cos_bit_col = cfg->col_cfg->cos_bit;
+  const int8_t *cos_bit_row = cfg->row_cfg->cos_bit;
+  const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->col_cfg->txfm_type);
+  const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->row_cfg->txfm_type);

  // txfm_buf's length is  txfm_size * txfm_size + 2 * txfm_size
  // it is used for intermediate data buffering
@ -165,7 +179,11 @@ static INLINE void inv_txfm2d_add_facade(const int32_t *input, uint16_t *output,
  // int16_t*
  TXFM_2D_FLIP_CFG cfg = av1_get_inv_txfm_cfg(tx_type, tx_size);
  inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
-  clamp_block((int16_t *)output, cfg.cfg->txfm_size, stride, 0, (1 << bd) - 1);
+  // TODO(sarahparker) just using the cfg_row->txfm_size for now because
+  // we are assumint this is only used for square transforms. This will
+  // be adjusted in a follow up
+  clamp_block((int16_t *)output, cfg.row_cfg->txfm_size, stride, 0,
+              (1 << bd) - 1);
 }

 void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output,
--- a/av1/common/av1_inv_txfm2d_cfg.h
+++ b/av1/common/av1_inv_txfm2d_cfg.h
@ -1,445 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AV1_INV_TXFM2D_CFG_H_
-#define AV1_INV_TXFM2D_CFG_H_
-#include "av1/common/av1_inv_txfm1d.h"
-//  ---------------- config inv_dct_dct_4 ----------------
-static const int8_t inv_shift_dct_dct_4[2] = { 0, -4 };
-static const int8_t inv_stage_range_col_dct_dct_4[4] = { 18, 18, 17, 17 };
-static const int8_t inv_stage_range_row_dct_dct_4[4] = { 18, 18, 18, 18 };
-static const int8_t inv_cos_bit_col_dct_dct_4[4] = { 13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_dct_dct_4[4] = { 13, 13, 13, 13 };
-
-static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_4 = {
-  4,  // .txfm_size
-  4,  // .stage_num_col
-  4,  // .stage_num_row
-  // 0,  // .log_scale
-  inv_shift_dct_dct_4,            // .shift
-  inv_stage_range_col_dct_dct_4,  // .stage_range_col
-  inv_stage_range_row_dct_dct_4,  // .stage_range_row
-  inv_cos_bit_col_dct_dct_4,      // .cos_bit_col
-  inv_cos_bit_row_dct_dct_4,      // .cos_bit_row
-  TXFM_TYPE_DCT4,                 // .txfm_type_col
-  TXFM_TYPE_DCT4
-};  // .txfm_type_row
-
-//  ---------------- config inv_dct_dct_8 ----------------
-static const int8_t inv_shift_dct_dct_8[2] = { 0, -5 };
-static const int8_t inv_stage_range_col_dct_dct_8[6] = {
-  19, 19, 19, 19, 18, 18
-};
-static const int8_t inv_stage_range_row_dct_dct_8[6] = {
-  19, 19, 19, 19, 19, 19
-};
-static const int8_t inv_cos_bit_col_dct_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_dct_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
-
-static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_8 = {
-  8,  // .txfm_size
-  6,  // .stage_num_col
-  6,  // .stage_num_row
-  // 0,  // .log_scale
-  inv_shift_dct_dct_8,            // .shift
-  inv_stage_range_col_dct_dct_8,  // .stage_range_col
-  inv_stage_range_row_dct_dct_8,  // .stage_range_row
-  inv_cos_bit_col_dct_dct_8,      // .cos_bit_col
-  inv_cos_bit_row_dct_dct_8,      // .cos_bit_row
-  TXFM_TYPE_DCT8,                 // .txfm_type_col
-  TXFM_TYPE_DCT8
-};  // .txfm_type_row
-
-//  ---------------- config inv_dct_dct_16 ----------------
-static const int8_t inv_shift_dct_dct_16[2] = { -1, -5 };
-static const int8_t inv_stage_range_col_dct_dct_16[8] = { 19, 19, 19, 19,
-                                                          19, 19, 18, 18 };
-static const int8_t inv_stage_range_row_dct_dct_16[8] = { 20, 20, 20, 20,
-                                                          20, 20, 20, 20 };
-static const int8_t inv_cos_bit_col_dct_dct_16[8] = { 13, 13, 13, 13,
-                                                      13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_dct_dct_16[8] = { 12, 12, 12, 12,
-                                                      12, 12, 12, 12 };
-
-static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_16 = {
-  16,  // .txfm_size
-  8,   // .stage_num_col
-  8,   // .stage_num_row
-  // 0,  // .log_scale
-  inv_shift_dct_dct_16,            // .shift
-  inv_stage_range_col_dct_dct_16,  // .stage_range_col
-  inv_stage_range_row_dct_dct_16,  // .stage_range_row
-  inv_cos_bit_col_dct_dct_16,      // .cos_bit_col
-  inv_cos_bit_row_dct_dct_16,      // .cos_bit_row
-  TXFM_TYPE_DCT16,                 // .txfm_type_col
-  TXFM_TYPE_DCT16
-};  // .txfm_type_row
-
-//  ---------------- config inv_dct_dct_32 ----------------
-static const int8_t inv_shift_dct_dct_32[2] = { -1, -5 };
-static const int8_t inv_stage_range_col_dct_dct_32[10] = { 19, 19, 19, 19, 19,
-                                                           19, 19, 19, 18, 18 };
-static const int8_t inv_stage_range_row_dct_dct_32[10] = { 20, 20, 20, 20, 20,
-                                                           20, 20, 20, 20, 20 };
-static const int8_t inv_cos_bit_col_dct_dct_32[10] = { 13, 13, 13, 13, 13,
-                                                       13, 13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_dct_dct_32[10] = { 12, 12, 12, 12, 12,
-                                                       12, 12, 12, 12, 12 };
-
-static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_32 = {
-  32,  // .txfm_size
-  10,  // .stage_num_col
-  10,  // .stage_num_row
-  // 1,  // .log_scale
-  inv_shift_dct_dct_32,            // .shift
-  inv_stage_range_col_dct_dct_32,  // .stage_range_col
-  inv_stage_range_row_dct_dct_32,  // .stage_range_row
-  inv_cos_bit_col_dct_dct_32,      // .cos_bit_col
-  inv_cos_bit_row_dct_dct_32,      // .cos_bit_row
-  TXFM_TYPE_DCT32,                 // .txfm_type_col
-  TXFM_TYPE_DCT32
-};  // .txfm_type_row
-
-//  ---------------- config inv_dct_dct_64 ----------------
-static const int8_t inv_shift_dct_dct_64[2] = { -1, -7 };
-static const int8_t inv_stage_range_col_dct_dct_64[12] = {
-  19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18
-};
-static const int8_t inv_stage_range_row_dct_dct_64[12] = {
-  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
-};
-static const int8_t inv_cos_bit_col_dct_dct_64[12] = { 13, 13, 13, 13, 13, 13,
-                                                       13, 13, 13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_dct_dct_64[12] = { 12, 12, 12, 12, 12, 12,
-                                                       12, 12, 12, 12, 12, 12 };
-
-static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_64 = {
-  64,                              // .txfm_size
-  12,                              // .stage_num_col
-  12,                              // .stage_num_row
-  inv_shift_dct_dct_64,            // .shift
-  inv_stage_range_col_dct_dct_64,  // .stage_range_col
-  inv_stage_range_row_dct_dct_64,  // .stage_range_row
-  inv_cos_bit_col_dct_dct_64,      // .cos_bit_col
-  inv_cos_bit_row_dct_dct_64,      // .cos_bit_row
-  TXFM_TYPE_DCT64,                 // .txfm_type_col
-  TXFM_TYPE_DCT64
-};  // .txfm_type_row
-
-//  ---------------- config inv_dct_adst_4 ----------------
-static const int8_t inv_shift_dct_adst_4[2] = { 0, -4 };
-static const int8_t inv_stage_range_col_dct_adst_4[4] = { 18, 18, 17, 17 };
-static const int8_t inv_stage_range_row_dct_adst_4[6] = {
-  18, 18, 18, 18, 18, 18
-};
-static const int8_t inv_cos_bit_col_dct_adst_4[4] = { 13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_dct_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
-
-static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_4 = {
-  4,  // .txfm_size
-  4,  // .stage_num_col
-  6,  // .stage_num_row
-  // 0,  // .log_scale
-  inv_shift_dct_adst_4,            // .shift
-  inv_stage_range_col_dct_adst_4,  // .stage_range_col
-  inv_stage_range_row_dct_adst_4,  // .stage_range_row
-  inv_cos_bit_col_dct_adst_4,      // .cos_bit_col
-  inv_cos_bit_row_dct_adst_4,      // .cos_bit_row
-  TXFM_TYPE_DCT4,                  // .txfm_type_col
-  TXFM_TYPE_ADST4
-};  // .txfm_type_row
-
-//  ---------------- config inv_dct_adst_8 ----------------
-static const int8_t inv_shift_dct_adst_8[2] = { 0, -5 };
-static const int8_t inv_stage_range_col_dct_adst_8[6] = {
-  19, 19, 19, 19, 18, 18
-};
-static const int8_t inv_stage_range_row_dct_adst_8[8] = { 19, 19, 19, 19,
-                                                          19, 19, 19, 19 };
-static const int8_t inv_cos_bit_col_dct_adst_8[6] = { 13, 13, 13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_dct_adst_8[8] = { 13, 13, 13, 13,
-                                                      13, 13, 13, 13 };
-
-static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_8 = {
-  8,  // .txfm_size
-  6,  // .stage_num_col
-  8,  // .stage_num_row
-  // 0,  // .log_scale
-  inv_shift_dct_adst_8,            // .shift
-  inv_stage_range_col_dct_adst_8,  // .stage_range_col
-  inv_stage_range_row_dct_adst_8,  // .stage_range_row
-  inv_cos_bit_col_dct_adst_8,      // .cos_bit_col
-  inv_cos_bit_row_dct_adst_8,      // .cos_bit_row
-  TXFM_TYPE_DCT8,                  // .txfm_type_col
-  TXFM_TYPE_ADST8
-};  // .txfm_type_row
-
-//  ---------------- config inv_dct_adst_16 ----------------
-static const int8_t inv_shift_dct_adst_16[2] = { -1, -5 };
-static const int8_t inv_stage_range_col_dct_adst_16[8] = { 19, 19, 19, 19,
-                                                           19, 19, 18, 18 };
-static const int8_t inv_stage_range_row_dct_adst_16[10] = {
-  20, 20, 20, 20, 20, 20, 20, 20, 20, 20
-};
-static const int8_t inv_cos_bit_col_dct_adst_16[8] = { 13, 13, 13, 13,
-                                                       13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_dct_adst_16[10] = { 12, 12, 12, 12, 12,
-                                                        12, 12, 12, 12, 12 };
-
-static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_16 = {
-  16,  // .txfm_size
-  8,   // .stage_num_col
-  10,  // .stage_num_row
-  // 0,  // .log_scale
-  inv_shift_dct_adst_16,            // .shift
-  inv_stage_range_col_dct_adst_16,  // .stage_range_col
-  inv_stage_range_row_dct_adst_16,  // .stage_range_row
-  inv_cos_bit_col_dct_adst_16,      // .cos_bit_col
-  inv_cos_bit_row_dct_adst_16,      // .cos_bit_row
-  TXFM_TYPE_DCT16,                  // .txfm_type_col
-  TXFM_TYPE_ADST16
-};  // .txfm_type_row
-
-//  ---------------- config inv_dct_adst_32 ----------------
-static const int8_t inv_shift_dct_adst_32[2] = { -1, -5 };
-static const int8_t inv_stage_range_col_dct_adst_32[10] = {
-  19, 19, 19, 19, 19, 19, 19, 19, 18, 18
-};
-static const int8_t inv_stage_range_row_dct_adst_32[12] = {
-  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
-};
-static const int8_t inv_cos_bit_col_dct_adst_32[10] = { 13, 13, 13, 13, 13,
-                                                        13, 13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_dct_adst_32[12] = {
-  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
-};
-
-static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_32 = {
-  32,  // .txfm_size
-  10,  // .stage_num_col
-  12,  // .stage_num_row
-  // 1,  // .log_scale
-  inv_shift_dct_adst_32,            // .shift
-  inv_stage_range_col_dct_adst_32,  // .stage_range_col
-  inv_stage_range_row_dct_adst_32,  // .stage_range_row
-  inv_cos_bit_col_dct_adst_32,      // .cos_bit_col
-  inv_cos_bit_row_dct_adst_32,      // .cos_bit_row
-  TXFM_TYPE_DCT32,                  // .txfm_type_col
-  TXFM_TYPE_ADST32
-};  // .txfm_type_row
-
-//  ---------------- config inv_adst_adst_4 ----------------
-static const int8_t inv_shift_adst_adst_4[2] = { 0, -4 };
-static const int8_t inv_stage_range_col_adst_adst_4[6] = { 18, 18, 18,
-                                                           18, 17, 17 };
-static const int8_t inv_stage_range_row_adst_adst_4[6] = { 18, 18, 18,
-                                                           18, 18, 18 };
-static const int8_t inv_cos_bit_col_adst_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_adst_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
-
-static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_4 = {
-  4,  // .txfm_size
-  6,  // .stage_num_col
-  6,  // .stage_num_row
-  // 0,  // .log_scale
-  inv_shift_adst_adst_4,            // .shift
-  inv_stage_range_col_adst_adst_4,  // .stage_range_col
-  inv_stage_range_row_adst_adst_4,  // .stage_range_row
-  inv_cos_bit_col_adst_adst_4,      // .cos_bit_col
-  inv_cos_bit_row_adst_adst_4,      // .cos_bit_row
-  TXFM_TYPE_ADST4,                  // .txfm_type_col
-  TXFM_TYPE_ADST4
-};  // .txfm_type_row
-
-//  ---------------- config inv_adst_adst_8 ----------------
-static const int8_t inv_shift_adst_adst_8[2] = { 0, -5 };
-static const int8_t inv_stage_range_col_adst_adst_8[8] = { 19, 19, 19, 19,
-                                                           19, 19, 18, 18 };
-static const int8_t inv_stage_range_row_adst_adst_8[8] = { 19, 19, 19, 19,
-                                                           19, 19, 19, 19 };
-static const int8_t inv_cos_bit_col_adst_adst_8[8] = { 13, 13, 13, 13,
-                                                       13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_adst_adst_8[8] = { 13, 13, 13, 13,
-                                                       13, 13, 13, 13 };
-
-static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_8 = {
-  8,  // .txfm_size
-  8,  // .stage_num_col
-  8,  // .stage_num_row
-  // 0,  // .log_scale
-  inv_shift_adst_adst_8,            // .shift
-  inv_stage_range_col_adst_adst_8,  // .stage_range_col
-  inv_stage_range_row_adst_adst_8,  // .stage_range_row
-  inv_cos_bit_col_adst_adst_8,      // .cos_bit_col
-  inv_cos_bit_row_adst_adst_8,      // .cos_bit_row
-  TXFM_TYPE_ADST8,                  // .txfm_type_col
-  TXFM_TYPE_ADST8
-};  // .txfm_type_row
-
-//  ---------------- config inv_adst_adst_16 ----------------
-static const int8_t inv_shift_adst_adst_16[2] = { -1, -5 };
-static const int8_t inv_stage_range_col_adst_adst_16[10] = {
-  19, 19, 19, 19, 19, 19, 19, 19, 18, 18
-};
-static const int8_t inv_stage_range_row_adst_adst_16[10] = {
-  20, 20, 20, 20, 20, 20, 20, 20, 20, 20
-};
-static const int8_t inv_cos_bit_col_adst_adst_16[10] = { 13, 13, 13, 13, 13,
-                                                         13, 13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_adst_adst_16[10] = { 12, 12, 12, 12, 12,
-                                                         12, 12, 12, 12, 12 };
-
-static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_16 = {
-  16,  // .txfm_size
-  10,  // .stage_num_col
-  10,  // .stage_num_row
-  // 0,  // .log_scale
-  inv_shift_adst_adst_16,            // .shift
-  inv_stage_range_col_adst_adst_16,  // .stage_range_col
-  inv_stage_range_row_adst_adst_16,  // .stage_range_row
-  inv_cos_bit_col_adst_adst_16,      // .cos_bit_col
-  inv_cos_bit_row_adst_adst_16,      // .cos_bit_row
-  TXFM_TYPE_ADST16,                  // .txfm_type_col
-  TXFM_TYPE_ADST16
-};  // .txfm_type_row
-
-//  ---------------- config inv_adst_adst_32 ----------------
-static const int8_t inv_shift_adst_adst_32[2] = { -1, -5 };
-static const int8_t inv_stage_range_col_adst_adst_32[12] = {
-  19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18
-};
-static const int8_t inv_stage_range_row_adst_adst_32[12] = {
-  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
-};
-static const int8_t inv_cos_bit_col_adst_adst_32[12] = {
-  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13
-};
-static const int8_t inv_cos_bit_row_adst_adst_32[12] = {
-  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
-};
-
-static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_32 = {
-  32,  // .txfm_size
-  12,  // .stage_num_col
-  12,  // .stage_num_row
-  // 1,  // .log_scale
-  inv_shift_adst_adst_32,            // .shift
-  inv_stage_range_col_adst_adst_32,  // .stage_range_col
-  inv_stage_range_row_adst_adst_32,  // .stage_range_row
-  inv_cos_bit_col_adst_adst_32,      // .cos_bit_col
-  inv_cos_bit_row_adst_adst_32,      // .cos_bit_row
-  TXFM_TYPE_ADST32,                  // .txfm_type_col
-  TXFM_TYPE_ADST32
-};  // .txfm_type_row
-
-//  ---------------- config inv_adst_dct_4 ----------------
-static const int8_t inv_shift_adst_dct_4[2] = { 0, -4 };
-static const int8_t inv_stage_range_col_adst_dct_4[6] = {
-  18, 18, 18, 18, 17, 17
-};
-static const int8_t inv_stage_range_row_adst_dct_4[4] = { 18, 18, 18, 18 };
-static const int8_t inv_cos_bit_col_adst_dct_4[6] = { 13, 13, 13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_adst_dct_4[4] = { 13, 13, 13, 13 };
-
-static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_4 = {
-  4,  // .txfm_size
-  6,  // .stage_num_col
-  4,  // .stage_num_row
-  // 0,  // .log_scale
-  inv_shift_adst_dct_4,            // .shift
-  inv_stage_range_col_adst_dct_4,  // .stage_range_col
-  inv_stage_range_row_adst_dct_4,  // .stage_range_row
-  inv_cos_bit_col_adst_dct_4,      // .cos_bit_col
-  inv_cos_bit_row_adst_dct_4,      // .cos_bit_row
-  TXFM_TYPE_ADST4,                 // .txfm_type_col
-  TXFM_TYPE_DCT4
-};  // .txfm_type_row
-
-//  ---------------- config inv_adst_dct_8 ----------------
-static const int8_t inv_shift_adst_dct_8[2] = { 0, -5 };
-static const int8_t inv_stage_range_col_adst_dct_8[8] = { 19, 19, 19, 19,
-                                                          19, 19, 18, 18 };
-static const int8_t inv_stage_range_row_adst_dct_8[6] = {
-  19, 19, 19, 19, 19, 19
-};
-static const int8_t inv_cos_bit_col_adst_dct_8[8] = { 13, 13, 13, 13,
-                                                      13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_adst_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
-
-static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_8 = {
-  8,  // .txfm_size
-  8,  // .stage_num_col
-  6,  // .stage_num_row
-  // 0,  // .log_scale
-  inv_shift_adst_dct_8,            // .shift
-  inv_stage_range_col_adst_dct_8,  // .stage_range_col
-  inv_stage_range_row_adst_dct_8,  // .stage_range_row
-  inv_cos_bit_col_adst_dct_8,      // .cos_bit_col
-  inv_cos_bit_row_adst_dct_8,      // .cos_bit_row
-  TXFM_TYPE_ADST8,                 // .txfm_type_col
-  TXFM_TYPE_DCT8
-};  // .txfm_type_row
-
-//  ---------------- config inv_adst_dct_16 ----------------
-static const int8_t inv_shift_adst_dct_16[2] = { -1, -5 };
-static const int8_t inv_stage_range_col_adst_dct_16[10] = {
-  19, 19, 19, 19, 19, 19, 19, 19, 18, 18
-};
-static const int8_t inv_stage_range_row_adst_dct_16[8] = { 20, 20, 20, 20,
-                                                           20, 20, 20, 20 };
-static const int8_t inv_cos_bit_col_adst_dct_16[10] = { 13, 13, 13, 13, 13,
-                                                        13, 13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_adst_dct_16[8] = { 12, 12, 12, 12,
-                                                       12, 12, 12, 12 };
-
-static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_16 = {
-  16,  // .txfm_size
-  10,  // .stage_num_col
-  8,   // .stage_num_row
-  // 0,  // .log_scale
-  inv_shift_adst_dct_16,            // .shift
-  inv_stage_range_col_adst_dct_16,  // .stage_range_col
-  inv_stage_range_row_adst_dct_16,  // .stage_range_row
-  inv_cos_bit_col_adst_dct_16,      // .cos_bit_col
-  inv_cos_bit_row_adst_dct_16,      // .cos_bit_row
-  TXFM_TYPE_ADST16,                 // .txfm_type_col
-  TXFM_TYPE_DCT16
-};  // .txfm_type_row
-
-//  ---------------- config inv_adst_dct_32 ----------------
-static const int8_t inv_shift_adst_dct_32[2] = { -1, -5 };
-static const int8_t inv_stage_range_col_adst_dct_32[12] = {
-  19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18
-};
-static const int8_t inv_stage_range_row_adst_dct_32[10] = {
-  20, 20, 20, 20, 20, 20, 20, 20, 20, 20
-};
-static const int8_t inv_cos_bit_col_adst_dct_32[12] = {
-  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13
-};
-static const int8_t inv_cos_bit_row_adst_dct_32[10] = { 12, 12, 12, 12, 12,
-                                                        12, 12, 12, 12, 12 };
-
-static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_32 = {
-  32,  // .txfm_size
-  12,  // .stage_num_col
-  10,  // .stage_num_row
-  // 1,  // .log_scale
-  inv_shift_adst_dct_32,            // .shift
-  inv_stage_range_col_adst_dct_32,  // .stage_range_col
-  inv_stage_range_row_adst_dct_32,  // .stage_range_row
-  inv_cos_bit_col_adst_dct_32,      // .cos_bit_col
-  inv_cos_bit_row_adst_dct_32,      // .cos_bit_row
-  TXFM_TYPE_ADST32,                 // .txfm_type_col
-  TXFM_TYPE_DCT32
-};  // .txfm_type_row
-
-#endif  // AV1_INV_TXFM2D_CFG_H_
--- a/av1/common/av1_txfm.h
+++ b/av1/common/av1_txfm.h
@ -145,24 +145,21 @@ typedef enum TXFM_TYPE {
  TXFM_TYPE_ADST32,
 } TXFM_TYPE;

-typedef struct TXFM_2D_CFG {
+typedef struct TXFM_1D_CFG {
  const int txfm_size;
-  const int stage_num_col;
-  const int stage_num_row;
+  const int stage_num;

  const int8_t *shift;
-  const int8_t *stage_range_col;
-  const int8_t *stage_range_row;
-  const int8_t *cos_bit_col;
-  const int8_t *cos_bit_row;
-  const TXFM_TYPE txfm_type_col;
-  const TXFM_TYPE txfm_type_row;
-} TXFM_2D_CFG;
+  const int8_t *stage_range;
+  const int8_t *cos_bit;
+  const TXFM_TYPE txfm_type;
+} TXFM_1D_CFG;

 typedef struct TXFM_2D_FLIP_CFG {
  int ud_flip;  // flip upside down
  int lr_flip;  // flip left to right
-  const TXFM_2D_CFG *cfg;
+  const TXFM_1D_CFG *col_cfg;
+  const TXFM_1D_CFG *row_cfg;
 } TXFM_2D_FLIP_CFG;

 static INLINE void set_flip_cfg(int tx_type, TXFM_2D_FLIP_CFG *cfg) {
@ -176,10 +173,12 @@ static INLINE void set_flip_cfg(int tx_type, TXFM_2D_FLIP_CFG *cfg) {
      break;
 #if CONFIG_EXT_TX
    case FLIPADST_DCT:
+    case FLIPADST_ADST:
      cfg->ud_flip = 1;
      cfg->lr_flip = 0;
      break;
    case DCT_FLIPADST:
+    case ADST_FLIPADST:
      cfg->ud_flip = 0;
      cfg->lr_flip = 1;
      break;
@ -187,14 +186,6 @@ static INLINE void set_flip_cfg(int tx_type, TXFM_2D_FLIP_CFG *cfg) {
      cfg->ud_flip = 1;
      cfg->lr_flip = 1;
      break;
-    case ADST_FLIPADST:
-      cfg->ud_flip = 0;
-      cfg->lr_flip = 1;
-      break;
-    case FLIPADST_ADST:
-      cfg->ud_flip = 1;
-      cfg->lr_flip = 0;
-      break;
 #endif  // CONFIG_EXT_TX
    default:
      cfg->ud_flip = 0;
--- a/av1/common/common_data.h
+++ b/av1/common/common_data.h
@ -491,6 +491,22 @@ static const TX_SIZE max_txsize_rect_lookup[BLOCK_SIZES] = {
 #define max_txsize_rect_lookup max_txsize_lookup
 #endif  // CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)

+static const TX_TYPE_1D vtx_tab[TX_TYPES] = {
+  DCT_1D,      ADST_1D, DCT_1D,      ADST_1D,
+#if CONFIG_EXT_TX
+  FLIPADST_1D, DCT_1D,  FLIPADST_1D, ADST_1D, FLIPADST_1D, IDTX_1D,
+  DCT_1D,      IDTX_1D, ADST_1D,     IDTX_1D, FLIPADST_1D, IDTX_1D,
+#endif  // CONFIG_EXT_TX
+};
+
+static const TX_TYPE_1D htx_tab[TX_TYPES] = {
+  DCT_1D,  DCT_1D,      ADST_1D,     ADST_1D,
+#if CONFIG_EXT_TX
+  DCT_1D,  FLIPADST_1D, FLIPADST_1D, FLIPADST_1D, ADST_1D, IDTX_1D,
+  IDTX_1D, DCT_1D,      IDTX_1D,     ADST_1D,     IDTX_1D, FLIPADST_1D,
+#endif  // CONFIG_EXT_TX
+};
+
 #if CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
 // Same as "max_txsize_lookup[bsize] - TX_8X8", except for rectangular
 // block which may use a rectangular transform, in which  case it is
--- a/av1/common/idct.c
+++ b/av1/common/idct.c
@ -15,7 +15,7 @@
 #include "./av1_rtcd.h"
 #include "aom_dsp/inv_txfm.h"
 #include "aom_ports/mem.h"
-#include "av1/common/av1_inv_txfm2d_cfg.h"
+#include "av1/common/av1_inv_txfm1d_cfg.h"
 #include "av1/common/blockd.h"
 #include "av1/common/enums.h"
 #include "av1/common/idct.h"
@ -85,8 +85,7 @@ static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
-  av1_idct64_new(in, out, inv_cos_bit_col_dct_dct_64,
-                 inv_stage_range_col_dct_dct_64);
+  av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
 }

@ -94,8 +93,7 @@ static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
-  av1_idct64_new(in, out, inv_cos_bit_row_dct_dct_64,
-                 inv_stage_range_row_dct_dct_64);
+  av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
 }

@ -196,8 +194,7 @@ static void highbd_idct64_col_c(const tran_low_t *input, tran_low_t *output,
  int i;
  (void)bd;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
-  av1_idct64_new(in, out, inv_cos_bit_col_dct_dct_64,
-                 inv_stage_range_col_dct_dct_64);
+  av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
 }

@ -207,8 +204,7 @@ static void highbd_idct64_row_c(const tran_low_t *input, tran_low_t *output,
  int i;
  (void)bd;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
-  av1_idct64_new(in, out, inv_cos_bit_row_dct_dct_64,
-                 inv_stage_range_row_dct_dct_64);
+  av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
 }
 #endif  // CONFIG_TX64X64
--- a/av1/common/x86/av1_fwd_txfm2d_sse4.c
+++ b/av1/common/x86/av1_fwd_txfm2d_sse4.c
@ -37,16 +37,20 @@ static INLINE TxfmFuncSSE2 fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
 }

 static INLINE void fwd_txfm2d_sse4_1(const int16_t *input, int32_t *output,
-                                     const int stride, const TXFM_2D_CFG *cfg,
+                                     const int stride,
+                                     const TXFM_2D_FLIP_CFG *cfg,
                                     int32_t *txfm_buf) {
-  const int txfm_size = cfg->txfm_size;
-  const int8_t *shift = cfg->shift;
-  const int8_t *stage_range_col = cfg->stage_range_col;
-  const int8_t *stage_range_row = cfg->stage_range_row;
-  const int8_t *cos_bit_col = cfg->cos_bit_col;
-  const int8_t *cos_bit_row = cfg->cos_bit_row;
-  const TxfmFuncSSE2 txfm_func_col = fwd_txfm_type_to_func(cfg->txfm_type_col);
-  const TxfmFuncSSE2 txfm_func_row = fwd_txfm_type_to_func(cfg->txfm_type_row);
+  // TODO(sarahparker) must correct for rectangular transforms in follow up
+  const int txfm_size = cfg->row_cfg->txfm_size;
+  const int8_t *shift = cfg->row_cfg->shift;
+  const int8_t *stage_range_col = cfg->col_cfg->stage_range;
+  const int8_t *stage_range_row = cfg->row_cfg->stage_range;
+  const int8_t *cos_bit_col = cfg->col_cfg->cos_bit;
+  const int8_t *cos_bit_row = cfg->row_cfg->cos_bit;
+  const TxfmFuncSSE2 txfm_func_col =
+      fwd_txfm_type_to_func(cfg->col_cfg->txfm_type);
+  const TxfmFuncSSE2 txfm_func_row =
+      fwd_txfm_type_to_func(cfg->row_cfg->txfm_type);

  __m128i *buf_128 = (__m128i *)txfm_buf;
  __m128i *out_128 = (__m128i *)output;
@ -69,7 +73,7 @@ void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output,
  DECLARE_ALIGNED(16, int32_t, txfm_buf[1024]);
  TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_32X32);
  (void)bd;
-  fwd_txfm2d_sse4_1(input, output, stride, cfg.cfg, txfm_buf);
+  fwd_txfm2d_sse4_1(input, output, stride, &cfg, txfm_buf);
 }

 void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
@ -77,5 +81,5 @@ void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
  DECLARE_ALIGNED(16, int32_t, txfm_buf[4096]);
  TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_64x64_cfg(tx_type);
  (void)bd;
-  fwd_txfm2d_sse4_1(input, output, stride, cfg.cfg, txfm_buf);
+  fwd_txfm2d_sse4_1(input, output, stride, &cfg, txfm_buf);
 }
--- a/av1/common/x86/highbd_inv_txfm_avx2.c
+++ b/av1/common/x86/highbd_inv_txfm_avx2.c
@ -13,7 +13,7 @@

 #include "./av1_rtcd.h"
 #include "./aom_config.h"
-#include "av1/common/av1_inv_txfm2d_cfg.h"
+#include "av1/common/av1_inv_txfm1d_cfg.h"

 // Note:
 //  Total 32x4 registers to represent 32x32 block coefficients.
@ -601,18 +601,20 @@ static void idct32_avx2(__m256i *in, __m256i *out, int bit) {
 void av1_inv_txfm2d_add_32x32_avx2(const int32_t *coeff, uint16_t *output,
                                   int stride, int tx_type, int bd) {
  __m256i in[128], out[128];
-  const TXFM_2D_CFG *cfg = NULL;
+  const TXFM_1D_CFG *row_cfg = NULL;
+  const TXFM_1D_CFG *col_cfg = NULL;

  switch (tx_type) {
    case DCT_DCT:
-      cfg = &inv_txfm_2d_cfg_dct_dct_32;
+      row_cfg = &inv_txfm_1d_row_cfg_dct_32;
+      col_cfg = &inv_txfm_1d_col_cfg_dct_32;
      load_buffer_32x32(coeff, in);
      transpose_32x32(in, out);
-      idct32_avx2(out, in, cfg->cos_bit_row[2]);
-      round_shift_32x32(in, -cfg->shift[0]);
+      idct32_avx2(out, in, row_cfg->cos_bit[2]);
+      round_shift_32x32(in, -row_cfg->shift[0]);
      transpose_32x32(in, out);
-      idct32_avx2(out, in, cfg->cos_bit_col[2]);
-      write_buffer_32x32(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      idct32_avx2(out, in, col_cfg->cos_bit[2]);
+      write_buffer_32x32(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
      break;
    default: assert(0);
  }
--- a/av1/common/x86/highbd_inv_txfm_sse4.c
+++ b/av1/common/x86/highbd_inv_txfm_sse4.c
@ -13,7 +13,7 @@

 #include "./av1_rtcd.h"
 #include "./aom_config.h"
-#include "av1/common/av1_inv_txfm2d_cfg.h"
+#include "av1/common/av1_inv_txfm1d_cfg.h"
 #include "av1/common/x86/highbd_txfm_utility_sse4.h"

 static INLINE void load_buffer_4x4(const int32_t *coeff, __m128i *in) {
@ -232,72 +232,82 @@ static void write_buffer_4x4(__m128i *in, uint16_t *output, int stride,
 void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *coeff, uint16_t *output,
                                   int stride, int tx_type, int bd) {
  __m128i in[4];
-  const TXFM_2D_CFG *cfg = NULL;
+  const TXFM_1D_CFG *row_cfg = NULL;
+  const TXFM_1D_CFG *col_cfg = NULL;

  switch (tx_type) {
    case DCT_DCT:
-      cfg = &inv_txfm_2d_cfg_dct_dct_4;
+      row_cfg = &inv_txfm_1d_row_cfg_dct_4;
+      col_cfg = &inv_txfm_1d_col_cfg_dct_4;
      load_buffer_4x4(coeff, in);
-      idct4x4_sse4_1(in, cfg->cos_bit_row[2]);
-      idct4x4_sse4_1(in, cfg->cos_bit_col[2]);
-      write_buffer_4x4(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      idct4x4_sse4_1(in, row_cfg->cos_bit[2]);
+      idct4x4_sse4_1(in, col_cfg->cos_bit[2]);
+      write_buffer_4x4(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
      break;
    case ADST_DCT:
-      cfg = &inv_txfm_2d_cfg_adst_dct_4;
+      row_cfg = &inv_txfm_1d_row_cfg_dct_4;
+      col_cfg = &inv_txfm_1d_col_cfg_adst_4;
      load_buffer_4x4(coeff, in);
-      idct4x4_sse4_1(in, cfg->cos_bit_row[2]);
-      iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
-      write_buffer_4x4(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      idct4x4_sse4_1(in, row_cfg->cos_bit[2]);
+      iadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
+      write_buffer_4x4(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
      break;
    case DCT_ADST:
-      cfg = &inv_txfm_2d_cfg_dct_adst_4;
+      row_cfg = &inv_txfm_1d_row_cfg_adst_4;
+      col_cfg = &inv_txfm_1d_col_cfg_dct_4;
      load_buffer_4x4(coeff, in);
-      iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
-      idct4x4_sse4_1(in, cfg->cos_bit_col[2]);
-      write_buffer_4x4(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      iadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
+      idct4x4_sse4_1(in, col_cfg->cos_bit[2]);
+      write_buffer_4x4(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
      break;
    case ADST_ADST:
-      cfg = &inv_txfm_2d_cfg_adst_adst_4;
+      row_cfg = &inv_txfm_1d_row_cfg_adst_4;
+      col_cfg = &inv_txfm_1d_col_cfg_adst_4;
      load_buffer_4x4(coeff, in);
-      iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
-      iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
-      write_buffer_4x4(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      iadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
+      iadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
+      write_buffer_4x4(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
      break;
 #if CONFIG_EXT_TX
    case FLIPADST_DCT:
-      cfg = &inv_txfm_2d_cfg_adst_dct_4;
+      row_cfg = &inv_txfm_1d_row_cfg_dct_4;
+      col_cfg = &inv_txfm_1d_col_cfg_adst_4;
      load_buffer_4x4(coeff, in);
-      idct4x4_sse4_1(in, cfg->cos_bit_row[2]);
-      iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
-      write_buffer_4x4(in, output, stride, 0, 1, -cfg->shift[1], bd);
+      idct4x4_sse4_1(in, row_cfg->cos_bit[2]);
+      iadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
+      write_buffer_4x4(in, output, stride, 0, 1, -row_cfg->shift[1], bd);
      break;
    case DCT_FLIPADST:
-      cfg = &inv_txfm_2d_cfg_dct_adst_4;
+      row_cfg = &inv_txfm_1d_row_cfg_adst_4;
+      col_cfg = &inv_txfm_1d_col_cfg_dct_4;
      load_buffer_4x4(coeff, in);
-      iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
-      idct4x4_sse4_1(in, cfg->cos_bit_col[2]);
-      write_buffer_4x4(in, output, stride, 1, 0, -cfg->shift[1], bd);
+      iadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
+      idct4x4_sse4_1(in, col_cfg->cos_bit[2]);
+      write_buffer_4x4(in, output, stride, 1, 0, -row_cfg->shift[1], bd);
      break;
    case FLIPADST_FLIPADST:
-      cfg = &inv_txfm_2d_cfg_adst_adst_4;
+      row_cfg = &inv_txfm_1d_row_cfg_adst_4;
+      col_cfg = &inv_txfm_1d_col_cfg_adst_4;
      load_buffer_4x4(coeff, in);
-      iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
-      iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
-      write_buffer_4x4(in, output, stride, 1, 1, -cfg->shift[1], bd);
+      iadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
+      iadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
+      write_buffer_4x4(in, output, stride, 1, 1, -row_cfg->shift[1], bd);
      break;
    case ADST_FLIPADST:
-      cfg = &inv_txfm_2d_cfg_adst_adst_4;
+      row_cfg = &inv_txfm_1d_row_cfg_adst_4;
+      col_cfg = &inv_txfm_1d_col_cfg_adst_4;
      load_buffer_4x4(coeff, in);
-      iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
-      iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
-      write_buffer_4x4(in, output, stride, 1, 0, -cfg->shift[1], bd);
+      iadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
+      iadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
+      write_buffer_4x4(in, output, stride, 1, 0, -row_cfg->shift[1], bd);
      break;
    case FLIPADST_ADST:
-      cfg = &inv_txfm_2d_cfg_adst_adst_4;
+      row_cfg = &inv_txfm_1d_row_cfg_adst_4;
+      col_cfg = &inv_txfm_1d_col_cfg_adst_4;
      load_buffer_4x4(coeff, in);
-      iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
-      iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
-      write_buffer_4x4(in, output, stride, 0, 1, -cfg->shift[1], bd);
+      iadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
+      iadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
+      write_buffer_4x4(in, output, stride, 0, 1, -row_cfg->shift[1], bd);
      break;
 #endif  // CONFIG_EXT_TX
    default: assert(0);
@ -698,90 +708,100 @@ static void write_buffer_8x8(__m128i *in, uint16_t *output, int stride,
 void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *coeff, uint16_t *output,
                                   int stride, int tx_type, int bd) {
  __m128i in[16], out[16];
-  const TXFM_2D_CFG *cfg = NULL;
+  const TXFM_1D_CFG *row_cfg = NULL;
+  const TXFM_1D_CFG *col_cfg = NULL;

  switch (tx_type) {
    case DCT_DCT:
-      cfg = &inv_txfm_2d_cfg_dct_dct_8;
+      row_cfg = &inv_txfm_1d_row_cfg_dct_8;
+      col_cfg = &inv_txfm_1d_col_cfg_dct_8;
      load_buffer_8x8(coeff, in);
      transpose_8x8(in, out);
-      idct8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+      idct8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
      transpose_8x8(in, out);
-      idct8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
-      write_buffer_8x8(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      idct8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
+      write_buffer_8x8(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
      break;
    case DCT_ADST:
-      cfg = &inv_txfm_2d_cfg_dct_adst_8;
+      row_cfg = &inv_txfm_1d_row_cfg_adst_8;
+      col_cfg = &inv_txfm_1d_col_cfg_dct_8;
      load_buffer_8x8(coeff, in);
      transpose_8x8(in, out);
-      iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+      iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
      transpose_8x8(in, out);
-      idct8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
-      write_buffer_8x8(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      idct8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
+      write_buffer_8x8(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
      break;
    case ADST_DCT:
-      cfg = &inv_txfm_2d_cfg_adst_dct_8;
+      row_cfg = &inv_txfm_1d_row_cfg_dct_8;
+      col_cfg = &inv_txfm_1d_col_cfg_adst_8;
      load_buffer_8x8(coeff, in);
      transpose_8x8(in, out);
-      idct8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+      idct8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
      transpose_8x8(in, out);
-      iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
-      write_buffer_8x8(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
+      write_buffer_8x8(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
      break;
    case ADST_ADST:
-      cfg = &inv_txfm_2d_cfg_adst_adst_8;
+      row_cfg = &inv_txfm_1d_row_cfg_adst_8;
+      col_cfg = &inv_txfm_1d_col_cfg_adst_8;
      load_buffer_8x8(coeff, in);
      transpose_8x8(in, out);
-      iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+      iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
      transpose_8x8(in, out);
-      iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
-      write_buffer_8x8(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
+      write_buffer_8x8(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
      break;
 #if CONFIG_EXT_TX
    case FLIPADST_DCT:
-      cfg = &inv_txfm_2d_cfg_adst_dct_8;
+      row_cfg = &inv_txfm_1d_row_cfg_dct_8;
+      col_cfg = &inv_txfm_1d_col_cfg_adst_8;
      load_buffer_8x8(coeff, in);
      transpose_8x8(in, out);
-      idct8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+      idct8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
      transpose_8x8(in, out);
-      iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
-      write_buffer_8x8(in, output, stride, 0, 1, -cfg->shift[1], bd);
+      iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
+      write_buffer_8x8(in, output, stride, 0, 1, -row_cfg->shift[1], bd);
      break;
    case DCT_FLIPADST:
-      cfg = &inv_txfm_2d_cfg_dct_adst_8;
+      row_cfg = &inv_txfm_1d_row_cfg_adst_8;
+      col_cfg = &inv_txfm_1d_col_cfg_dct_8;
      load_buffer_8x8(coeff, in);
      transpose_8x8(in, out);
-      iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+      iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
      transpose_8x8(in, out);
-      idct8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
-      write_buffer_8x8(in, output, stride, 1, 0, -cfg->shift[1], bd);
+      idct8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
+      write_buffer_8x8(in, output, stride, 1, 0, -row_cfg->shift[1], bd);
      break;
    case ADST_FLIPADST:
-      cfg = &inv_txfm_2d_cfg_adst_adst_8;
+      row_cfg = &inv_txfm_1d_row_cfg_adst_8;
+      col_cfg = &inv_txfm_1d_col_cfg_adst_8;
      load_buffer_8x8(coeff, in);
      transpose_8x8(in, out);
-      iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+      iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
      transpose_8x8(in, out);
-      iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
-      write_buffer_8x8(in, output, stride, 1, 0, -cfg->shift[1], bd);
+      iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
+      write_buffer_8x8(in, output, stride, 1, 0, -row_cfg->shift[1], bd);
      break;
    case FLIPADST_FLIPADST:
-      cfg = &inv_txfm_2d_cfg_adst_adst_8;
+      row_cfg = &inv_txfm_1d_row_cfg_adst_8;
+      col_cfg = &inv_txfm_1d_col_cfg_adst_8;
      load_buffer_8x8(coeff, in);
      transpose_8x8(in, out);
-      iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+      iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
      transpose_8x8(in, out);
-      iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
-      write_buffer_8x8(in, output, stride, 1, 1, -cfg->shift[1], bd);
+      iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
+      write_buffer_8x8(in, output, stride, 1, 1, -row_cfg->shift[1], bd);
      break;
    case FLIPADST_ADST:
-      cfg = &inv_txfm_2d_cfg_adst_adst_8;
+      row_cfg = &inv_txfm_1d_row_cfg_adst_8;
+      col_cfg = &inv_txfm_1d_col_cfg_adst_8;
      load_buffer_8x8(coeff, in);
      transpose_8x8(in, out);
-      iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+      iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
      transpose_8x8(in, out);
-      iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
-      write_buffer_8x8(in, output, stride, 0, 1, -cfg->shift[1], bd);
+      iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
+      write_buffer_8x8(in, output, stride, 0, 1, -row_cfg->shift[1], bd);
      break;
 #endif  // CONFIG_EXT_TX
    default: assert(0);
@ -1298,99 +1318,109 @@ static void round_shift_16x16(__m128i *in, int shift) {
 void av1_inv_txfm2d_add_16x16_sse4_1(const int32_t *coeff, uint16_t *output,
                                     int stride, int tx_type, int bd) {
  __m128i in[64], out[64];
-  const TXFM_2D_CFG *cfg = NULL;
+  const TXFM_1D_CFG *row_cfg = NULL;
+  const TXFM_1D_CFG *col_cfg = NULL;

  switch (tx_type) {
    case DCT_DCT:
-      cfg = &inv_txfm_2d_cfg_dct_dct_16;
+      row_cfg = &inv_txfm_1d_row_cfg_dct_16;
+      col_cfg = &inv_txfm_1d_col_cfg_dct_16;
      load_buffer_16x16(coeff, in);
      transpose_16x16(in, out);
-      idct16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
-      round_shift_16x16(in, -cfg->shift[0]);
+      idct16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
+      round_shift_16x16(in, -row_cfg->shift[0]);
      transpose_16x16(in, out);
-      idct16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
-      write_buffer_16x16(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      idct16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
+      write_buffer_16x16(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
      break;
    case DCT_ADST:
-      cfg = &inv_txfm_2d_cfg_dct_adst_16;
+      row_cfg = &inv_txfm_1d_row_cfg_adst_16;
+      col_cfg = &inv_txfm_1d_col_cfg_dct_16;
      load_buffer_16x16(coeff, in);
      transpose_16x16(in, out);
-      iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
-      round_shift_16x16(in, -cfg->shift[0]);
+      iadst16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
+      round_shift_16x16(in, -row_cfg->shift[0]);
      transpose_16x16(in, out);
-      idct16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
-      write_buffer_16x16(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      idct16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
+      write_buffer_16x16(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
      break;
    case ADST_DCT:
-      cfg = &inv_txfm_2d_cfg_adst_dct_16;
+      row_cfg = &inv_txfm_1d_row_cfg_dct_16;
+      col_cfg = &inv_txfm_1d_col_cfg_adst_16;
      load_buffer_16x16(coeff, in);
      transpose_16x16(in, out);
-      idct16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
-      round_shift_16x16(in, -cfg->shift[0]);
+      idct16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
+      round_shift_16x16(in, -row_cfg->shift[0]);
      transpose_16x16(in, out);
-      iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
-      write_buffer_16x16(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      iadst16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
+      write_buffer_16x16(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
      break;
    case ADST_ADST:
-      cfg = &inv_txfm_2d_cfg_adst_adst_16;
+      row_cfg = &inv_txfm_1d_row_cfg_adst_16;
+      col_cfg = &inv_txfm_1d_col_cfg_adst_16;
      load_buffer_16x16(coeff, in);
      transpose_16x16(in, out);
-      iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
-      round_shift_16x16(in, -cfg->shift[0]);
+      iadst16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
+      round_shift_16x16(in, -row_cfg->shift[0]);
      transpose_16x16(in, out);
-      iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
-      write_buffer_16x16(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      iadst16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
+      write_buffer_16x16(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
      break;
 #if CONFIG_EXT_TX
    case FLIPADST_DCT:
-      cfg = &inv_txfm_2d_cfg_adst_dct_16;
+      row_cfg = &inv_txfm_1d_row_cfg_dct_16;
+      col_cfg = &inv_txfm_1d_col_cfg_adst_16;
      load_buffer_16x16(coeff, in);
      transpose_16x16(in, out);
-      idct16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
-      round_shift_16x16(in, -cfg->shift[0]);
+      idct16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
+      round_shift_16x16(in, -row_cfg->shift[0]);
      transpose_16x16(in, out);
-      iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
-      write_buffer_16x16(in, output, stride, 0, 1, -cfg->shift[1], bd);
+      iadst16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
+      write_buffer_16x16(in, output, stride, 0, 1, -row_cfg->shift[1], bd);
      break;
    case DCT_FLIPADST:
-      cfg = &inv_txfm_2d_cfg_dct_adst_16;
+      row_cfg = &inv_txfm_1d_row_cfg_adst_16;
+      col_cfg = &inv_txfm_1d_col_cfg_dct_16;
      load_buffer_16x16(coeff, in);
      transpose_16x16(in, out);
-      iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
-      round_shift_16x16(in, -cfg->shift[0]);
+      iadst16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
+      round_shift_16x16(in, -row_cfg->shift[0]);
      transpose_16x16(in, out);
-      idct16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
-      write_buffer_16x16(in, output, stride, 1, 0, -cfg->shift[1], bd);
+      idct16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
+      write_buffer_16x16(in, output, stride, 1, 0, -row_cfg->shift[1], bd);
      break;
    case ADST_FLIPADST:
-      cfg = &inv_txfm_2d_cfg_adst_adst_16;
+      row_cfg = &inv_txfm_1d_row_cfg_adst_16;
+      col_cfg = &inv_txfm_1d_col_cfg_adst_16;
      load_buffer_16x16(coeff, in);
      transpose_16x16(in, out);
-      iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
-      round_shift_16x16(in, -cfg->shift[0]);
+      iadst16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
+      round_shift_16x16(in, -row_cfg->shift[0]);
      transpose_16x16(in, out);
-      iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
-      write_buffer_16x16(in, output, stride, 1, 0, -cfg->shift[1], bd);
+      iadst16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
+      write_buffer_16x16(in, output, stride, 1, 0, -row_cfg->shift[1], bd);
      break;
    case FLIPADST_FLIPADST:
-      cfg = &inv_txfm_2d_cfg_adst_adst_16;
+      row_cfg = &inv_txfm_1d_row_cfg_adst_16;
+      col_cfg = &inv_txfm_1d_col_cfg_adst_16;
      load_buffer_16x16(coeff, in);
      transpose_16x16(in, out);
-      iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
-      round_shift_16x16(in, -cfg->shift[0]);
+      iadst16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
+      round_shift_16x16(in, -row_cfg->shift[0]);
      transpose_16x16(in, out);
-      iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
-      write_buffer_16x16(in, output, stride, 1, 1, -cfg->shift[1], bd);
+      iadst16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
+      write_buffer_16x16(in, output, stride, 1, 1, -row_cfg->shift[1], bd);
      break;
    case FLIPADST_ADST:
-      cfg = &inv_txfm_2d_cfg_adst_adst_16;
+      row_cfg = &inv_txfm_1d_row_cfg_adst_16;
+      col_cfg = &inv_txfm_1d_col_cfg_adst_16;
      load_buffer_16x16(coeff, in);
      transpose_16x16(in, out);
-      iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
-      round_shift_16x16(in, -cfg->shift[0]);
+      iadst16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
+      round_shift_16x16(in, -row_cfg->shift[0]);
      transpose_16x16(in, out);
-      iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
-      write_buffer_16x16(in, output, stride, 0, 1, -cfg->shift[1], bd);
+      iadst16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
+      write_buffer_16x16(in, output, stride, 0, 1, -row_cfg->shift[1], bd);
      break;
 #endif
    default: assert(0);
--- a/av1/encoder/dct.c
+++ b/av1/encoder/dct.c
@ -19,7 +19,7 @@
 #include "aom_ports/mem.h"
 #include "av1/common/blockd.h"
 #include "av1/common/av1_fwd_txfm1d.h"
-#include "av1/common/av1_fwd_txfm2d_cfg.h"
+#include "av1/common/av1_fwd_txfm1d_cfg.h"
 #include "av1/common/idct.h"

 static INLINE void range_check(const tran_low_t *input, const int size,
@ -2133,8 +2133,7 @@ static void fdct64_col(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
-  av1_fdct64_new(in, out, fwd_cos_bit_col_dct_dct_64,
-                 fwd_stage_range_col_dct_dct_64);
+  av1_fdct64_new(in, out, fwd_cos_bit_col_dct_64, fwd_stage_range_col_dct_64);
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
 }

@ -2142,8 +2141,7 @@ static void fdct64_row(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
-  av1_fdct64_new(in, out, fwd_cos_bit_row_dct_dct_64,
-                 fwd_stage_range_row_dct_dct_64);
+  av1_fdct64_new(in, out, fwd_cos_bit_row_dct_64, fwd_stage_range_row_dct_64);
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
 }

--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@ -418,22 +418,6 @@ static INLINE int write_uniform_cost(int n, int v) {
 #define FAST_EXT_TX_CORR_MARGIN 0.5
 #define FAST_EXT_TX_EDST_MARGIN 0.3

-static const TX_TYPE_1D vtx_tab[TX_TYPES] = {
-  DCT_1D,      ADST_1D, DCT_1D,      ADST_1D,
-#if CONFIG_EXT_TX
-  FLIPADST_1D, DCT_1D,  FLIPADST_1D, ADST_1D, FLIPADST_1D, IDTX_1D,
-  DCT_1D,      IDTX_1D, ADST_1D,     IDTX_1D, FLIPADST_1D, IDTX_1D,
-#endif  // CONFIG_EXT_TX
-};
-
-static const TX_TYPE_1D htx_tab[TX_TYPES] = {
-  DCT_1D,  DCT_1D,      ADST_1D,     ADST_1D,
-#if CONFIG_EXT_TX
-  DCT_1D,  FLIPADST_1D, FLIPADST_1D, FLIPADST_1D, ADST_1D, IDTX_1D,
-  IDTX_1D, DCT_1D,      IDTX_1D,     ADST_1D,     IDTX_1D, FLIPADST_1D,
-#endif  // CONFIG_EXT_TX
-};
-
 #if CONFIG_DAALA_DIST
 static int od_compute_var_4x4(od_coeff *x, int stride) {
  int sum;
--- a/av1/encoder/x86/highbd_fwd_txfm_sse4.c
+++ b/av1/encoder/x86/highbd_fwd_txfm_sse4.c
@ -13,7 +13,7 @@

 #include "./av1_rtcd.h"
 #include "./aom_config.h"
-#include "av1/common/av1_fwd_txfm2d_cfg.h"
+#include "av1/common/av1_fwd_txfm1d_cfg.h"
 #include "av1/common/av1_txfm.h"
 #include "av1/common/x86/highbd_txfm_utility_sse4.h"
 #include "aom_dsp/txfm_common.h"
@ -209,71 +209,81 @@ static void fadst4x4_sse4_1(__m128i *in, int bit) {
 void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *coeff,
                               int input_stride, int tx_type, int bd) {
  __m128i in[4];
-  const TXFM_2D_CFG *cfg = NULL;
+  const TXFM_1D_CFG *row_cfg = NULL;
+  const TXFM_1D_CFG *col_cfg = NULL;

  switch (tx_type) {
    case DCT_DCT:
-      cfg = &fwd_txfm_2d_cfg_dct_dct_4;
-      load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
-      fdct4x4_sse4_1(in, cfg->cos_bit_col[2]);
-      fdct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      row_cfg = &fwd_txfm_1d_row_cfg_dct_4;
+      col_cfg = &fwd_txfm_1d_col_cfg_dct_4;
+      load_buffer_4x4(input, in, input_stride, 0, 0, row_cfg->shift[0]);
+      fdct4x4_sse4_1(in, col_cfg->cos_bit[2]);
+      fdct4x4_sse4_1(in, row_cfg->cos_bit[2]);
      write_buffer_4x4(in, coeff);
      break;
    case ADST_DCT:
-      cfg = &fwd_txfm_2d_cfg_adst_dct_4;
-      load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
-      fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
-      fdct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      row_cfg = &fwd_txfm_1d_row_cfg_dct_4;
+      col_cfg = &fwd_txfm_1d_col_cfg_adst_4;
+      load_buffer_4x4(input, in, input_stride, 0, 0, row_cfg->shift[0]);
+      fadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
+      fdct4x4_sse4_1(in, row_cfg->cos_bit[2]);
      write_buffer_4x4(in, coeff);
      break;
    case DCT_ADST:
-      cfg = &fwd_txfm_2d_cfg_dct_adst_4;
-      load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
-      fdct4x4_sse4_1(in, cfg->cos_bit_col[2]);
-      fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      row_cfg = &fwd_txfm_1d_row_cfg_adst_4;
+      col_cfg = &fwd_txfm_1d_col_cfg_dct_4;
+      load_buffer_4x4(input, in, input_stride, 0, 0, row_cfg->shift[0]);
+      fdct4x4_sse4_1(in, col_cfg->cos_bit[2]);
+      fadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
      write_buffer_4x4(in, coeff);
      break;
    case ADST_ADST:
-      cfg = &fwd_txfm_2d_cfg_adst_adst_4;
-      load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
-      fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
-      fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      row_cfg = &fwd_txfm_1d_row_cfg_adst_4;
+      col_cfg = &fwd_txfm_1d_col_cfg_adst_4;
+      load_buffer_4x4(input, in, input_stride, 0, 0, row_cfg->shift[0]);
+      fadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
+      fadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
      write_buffer_4x4(in, coeff);
      break;
 #if CONFIG_EXT_TX
    case FLIPADST_DCT:
-      cfg = &fwd_txfm_2d_cfg_adst_dct_4;
-      load_buffer_4x4(input, in, input_stride, 1, 0, cfg->shift[0]);
-      fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
-      fdct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      row_cfg = &fwd_txfm_1d_row_cfg_dct_4;
+      col_cfg = &fwd_txfm_1d_col_cfg_adst_4;
+      load_buffer_4x4(input, in, input_stride, 1, 0, row_cfg->shift[0]);
+      fadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
+      fdct4x4_sse4_1(in, row_cfg->cos_bit[2]);
      write_buffer_4x4(in, coeff);
      break;
    case DCT_FLIPADST:
-      cfg = &fwd_txfm_2d_cfg_dct_adst_4;
-      load_buffer_4x4(input, in, input_stride, 0, 1, cfg->shift[0]);
-      fdct4x4_sse4_1(in, cfg->cos_bit_col[2]);
-      fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      row_cfg = &fwd_txfm_1d_row_cfg_adst_4;
+      col_cfg = &fwd_txfm_1d_col_cfg_dct_4;
+      load_buffer_4x4(input, in, input_stride, 0, 1, row_cfg->shift[0]);
+      fdct4x4_sse4_1(in, col_cfg->cos_bit[2]);
+      fadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
      write_buffer_4x4(in, coeff);
      break;
    case FLIPADST_FLIPADST:
-      cfg = &fwd_txfm_2d_cfg_adst_adst_4;
-      load_buffer_4x4(input, in, input_stride, 1, 1, cfg->shift[0]);
-      fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
-      fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      row_cfg = &fwd_txfm_1d_row_cfg_adst_4;
+      col_cfg = &fwd_txfm_1d_col_cfg_adst_4;
+      load_buffer_4x4(input, in, input_stride, 1, 1, row_cfg->shift[0]);
+      fadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
+      fadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
      write_buffer_4x4(in, coeff);
      break;
    case ADST_FLIPADST:
-      cfg = &fwd_txfm_2d_cfg_adst_adst_4;
-      load_buffer_4x4(input, in, input_stride, 0, 1, cfg->shift[0]);
-      fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
-      fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      row_cfg = &fwd_txfm_1d_row_cfg_adst_4;
+      col_cfg = &fwd_txfm_1d_col_cfg_adst_4;
+      load_buffer_4x4(input, in, input_stride, 0, 1, row_cfg->shift[0]);
+      fadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
+      fadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
      write_buffer_4x4(in, coeff);
      break;
    case FLIPADST_ADST:
-      cfg = &fwd_txfm_2d_cfg_adst_adst_4;
-      load_buffer_4x4(input, in, input_stride, 1, 0, cfg->shift[0]);
-      fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
-      fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      row_cfg = &fwd_txfm_1d_row_cfg_adst_4;
+      col_cfg = &fwd_txfm_1d_col_cfg_adst_4;
+      load_buffer_4x4(input, in, input_stride, 1, 0, row_cfg->shift[0]);
+      fadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
+      fadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
      write_buffer_4x4(in, coeff);
      break;
 #endif
@ -930,97 +940,107 @@ static void fadst8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
 void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *coeff, int stride,
                               int tx_type, int bd) {
  __m128i in[16], out[16];
-  const TXFM_2D_CFG *cfg = NULL;
+  const TXFM_1D_CFG *row_cfg = NULL;
+  const TXFM_1D_CFG *col_cfg = NULL;

  switch (tx_type) {
    case DCT_DCT:
-      cfg = &fwd_txfm_2d_cfg_dct_dct_8;
-      load_buffer_8x8(input, in, stride, 0, 0, cfg->shift[0]);
-      fdct8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
-      col_txfm_8x8_rounding(out, -cfg->shift[1]);
+      row_cfg = &fwd_txfm_1d_row_cfg_dct_8;
+      col_cfg = &fwd_txfm_1d_col_cfg_dct_8;
+      load_buffer_8x8(input, in, stride, 0, 0, row_cfg->shift[0]);
+      fdct8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
+      col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
      transpose_8x8(out, in);
-      fdct8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+      fdct8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
      transpose_8x8(out, in);
      write_buffer_8x8(in, coeff);
      break;
    case ADST_DCT:
-      cfg = &fwd_txfm_2d_cfg_adst_dct_8;
-      load_buffer_8x8(input, in, stride, 0, 0, cfg->shift[0]);
-      fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
-      col_txfm_8x8_rounding(out, -cfg->shift[1]);
+      row_cfg = &fwd_txfm_1d_row_cfg_dct_8;
+      col_cfg = &fwd_txfm_1d_col_cfg_adst_8;
+      load_buffer_8x8(input, in, stride, 0, 0, row_cfg->shift[0]);
+      fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
+      col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
      transpose_8x8(out, in);
-      fdct8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+      fdct8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
      transpose_8x8(out, in);
      write_buffer_8x8(in, coeff);
      break;
    case DCT_ADST:
-      cfg = &fwd_txfm_2d_cfg_dct_adst_8;
-      load_buffer_8x8(input, in, stride, 0, 0, cfg->shift[0]);
-      fdct8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
-      col_txfm_8x8_rounding(out, -cfg->shift[1]);
+      row_cfg = &fwd_txfm_1d_row_cfg_adst_8;
+      col_cfg = &fwd_txfm_1d_col_cfg_dct_8;
+      load_buffer_8x8(input, in, stride, 0, 0, row_cfg->shift[0]);
+      fdct8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
+      col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
      transpose_8x8(out, in);
-      fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+      fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
      transpose_8x8(out, in);
      write_buffer_8x8(in, coeff);
      break;
    case ADST_ADST:
-      cfg = &fwd_txfm_2d_cfg_adst_adst_8;
-      load_buffer_8x8(input, in, stride, 0, 0, cfg->shift[0]);
-      fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
-      col_txfm_8x8_rounding(out, -cfg->shift[1]);
+      row_cfg = &fwd_txfm_1d_row_cfg_adst_8;
+      col_cfg = &fwd_txfm_1d_col_cfg_adst_8;
+      load_buffer_8x8(input, in, stride, 0, 0, row_cfg->shift[0]);
+      fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
+      col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
      transpose_8x8(out, in);
-      fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+      fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
      transpose_8x8(out, in);
      write_buffer_8x8(in, coeff);
      break;
 #if CONFIG_EXT_TX
    case FLIPADST_DCT:
-      cfg = &fwd_txfm_2d_cfg_adst_dct_8;
-      load_buffer_8x8(input, in, stride, 1, 0, cfg->shift[0]);
-      fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
-      col_txfm_8x8_rounding(out, -cfg->shift[1]);
+      row_cfg = &fwd_txfm_1d_row_cfg_dct_8;
+      col_cfg = &fwd_txfm_1d_col_cfg_adst_8;
+      load_buffer_8x8(input, in, stride, 1, 0, row_cfg->shift[0]);
+      fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
+      col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
      transpose_8x8(out, in);
-      fdct8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+      fdct8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
      transpose_8x8(out, in);
      write_buffer_8x8(in, coeff);
      break;
    case DCT_FLIPADST:
-      cfg = &fwd_txfm_2d_cfg_dct_adst_8;
-      load_buffer_8x8(input, in, stride, 0, 1, cfg->shift[0]);
-      fdct8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
-      col_txfm_8x8_rounding(out, -cfg->shift[1]);
+      row_cfg = &fwd_txfm_1d_row_cfg_adst_8;
+      col_cfg = &fwd_txfm_1d_col_cfg_dct_8;
+      load_buffer_8x8(input, in, stride, 0, 1, row_cfg->shift[0]);
+      fdct8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
+      col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
      transpose_8x8(out, in);
-      fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+      fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
      transpose_8x8(out, in);
      write_buffer_8x8(in, coeff);
      break;
    case FLIPADST_FLIPADST:
-      cfg = &fwd_txfm_2d_cfg_adst_adst_8;
-      load_buffer_8x8(input, in, stride, 1, 1, cfg->shift[0]);
-      fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
-      col_txfm_8x8_rounding(out, -cfg->shift[1]);
+      row_cfg = &fwd_txfm_1d_row_cfg_adst_8;
+      col_cfg = &fwd_txfm_1d_col_cfg_adst_8;
+      load_buffer_8x8(input, in, stride, 1, 1, row_cfg->shift[0]);
+      fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
+      col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
      transpose_8x8(out, in);
-      fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+      fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
      transpose_8x8(out, in);
      write_buffer_8x8(in, coeff);
      break;
    case ADST_FLIPADST:
-      cfg = &fwd_txfm_2d_cfg_adst_adst_8;
-      load_buffer_8x8(input, in, stride, 0, 1, cfg->shift[0]);
-      fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
-      col_txfm_8x8_rounding(out, -cfg->shift[1]);
+      row_cfg = &fwd_txfm_1d_row_cfg_adst_8;
+      col_cfg = &fwd_txfm_1d_col_cfg_adst_8;
+      load_buffer_8x8(input, in, stride, 0, 1, row_cfg->shift[0]);
+      fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
+      col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
      transpose_8x8(out, in);
-      fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+      fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
      transpose_8x8(out, in);
      write_buffer_8x8(in, coeff);
      break;
    case FLIPADST_ADST:
-      cfg = &fwd_txfm_2d_cfg_adst_adst_8;
-      load_buffer_8x8(input, in, stride, 1, 0, cfg->shift[0]);
-      fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
-      col_txfm_8x8_rounding(out, -cfg->shift[1]);
+      row_cfg = &fwd_txfm_1d_row_cfg_adst_8;
+      col_cfg = &fwd_txfm_1d_col_cfg_adst_8;
+      load_buffer_8x8(input, in, stride, 1, 0, row_cfg->shift[0]);
+      fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
+      col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
      transpose_8x8(out, in);
-      fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+      fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
      transpose_8x8(out, in);
      write_buffer_8x8(in, coeff);
      break;
@ -1794,97 +1814,107 @@ static void write_buffer_16x16(const __m128i *in, tran_low_t *output) {
 void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *coeff,
                                 int stride, int tx_type, int bd) {
  __m128i in[64], out[64];
-  const TXFM_2D_CFG *cfg = NULL;
+  const TXFM_1D_CFG *row_cfg = NULL;
+  const TXFM_1D_CFG *col_cfg = NULL;

  switch (tx_type) {
    case DCT_DCT:
-      cfg = &fwd_txfm_2d_cfg_dct_dct_16;
-      load_buffer_16x16(input, in, stride, 0, 0, cfg->shift[0]);
-      fdct16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
-      col_txfm_16x16_rounding(out, -cfg->shift[1]);
+      row_cfg = &fwd_txfm_1d_row_cfg_dct_16;
+      col_cfg = &fwd_txfm_1d_col_cfg_dct_16;
+      load_buffer_16x16(input, in, stride, 0, 0, row_cfg->shift[0]);
+      fdct16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
+      col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
      transpose_16x16(out, in);
-      fdct16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+      fdct16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
      transpose_16x16(out, in);
      write_buffer_16x16(in, coeff);
      break;
    case ADST_DCT:
-      cfg = &fwd_txfm_2d_cfg_adst_dct_16;
-      load_buffer_16x16(input, in, stride, 0, 0, cfg->shift[0]);
-      fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
-      col_txfm_16x16_rounding(out, -cfg->shift[1]);
+      row_cfg = &fwd_txfm_1d_row_cfg_dct_16;
+      col_cfg = &fwd_txfm_1d_col_cfg_adst_16;
+      load_buffer_16x16(input, in, stride, 0, 0, row_cfg->shift[0]);
+      fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
+      col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
      transpose_16x16(out, in);
-      fdct16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+      fdct16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
      transpose_16x16(out, in);
      write_buffer_16x16(in, coeff);
      break;
    case DCT_ADST:
-      cfg = &fwd_txfm_2d_cfg_dct_adst_16;
-      load_buffer_16x16(input, in, stride, 0, 0, cfg->shift[0]);
-      fdct16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
-      col_txfm_16x16_rounding(out, -cfg->shift[1]);
+      row_cfg = &fwd_txfm_1d_row_cfg_adst_16;
+      col_cfg = &fwd_txfm_1d_col_cfg_dct_16;
+      load_buffer_16x16(input, in, stride, 0, 0, row_cfg->shift[0]);
+      fdct16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
+      col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
      transpose_16x16(out, in);
-      fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+      fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
      transpose_16x16(out, in);
      write_buffer_16x16(in, coeff);
      break;
    case ADST_ADST:
-      cfg = &fwd_txfm_2d_cfg_adst_adst_16;
-      load_buffer_16x16(input, in, stride, 0, 0, cfg->shift[0]);
-      fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
-      col_txfm_16x16_rounding(out, -cfg->shift[1]);
+      row_cfg = &fwd_txfm_1d_row_cfg_adst_16;
+      col_cfg = &fwd_txfm_1d_col_cfg_adst_16;
+      load_buffer_16x16(input, in, stride, 0, 0, row_cfg->shift[0]);
+      fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
+      col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
      transpose_16x16(out, in);
-      fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+      fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
      transpose_16x16(out, in);
      write_buffer_16x16(in, coeff);
      break;
 #if CONFIG_EXT_TX
    case FLIPADST_DCT:
-      cfg = &fwd_txfm_2d_cfg_adst_dct_16;
-      load_buffer_16x16(input, in, stride, 1, 0, cfg->shift[0]);
-      fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
-      col_txfm_16x16_rounding(out, -cfg->shift[1]);
+      row_cfg = &fwd_txfm_1d_row_cfg_dct_16;
+      col_cfg = &fwd_txfm_1d_col_cfg_adst_16;
+      load_buffer_16x16(input, in, stride, 1, 0, row_cfg->shift[0]);
+      fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
+      col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
      transpose_16x16(out, in);
-      fdct16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+      fdct16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
      transpose_16x16(out, in);
      write_buffer_16x16(in, coeff);
      break;
    case DCT_FLIPADST:
-      cfg = &fwd_txfm_2d_cfg_dct_adst_16;
-      load_buffer_16x16(input, in, stride, 0, 1, cfg->shift[0]);
-      fdct16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
-      col_txfm_16x16_rounding(out, -cfg->shift[1]);
+      row_cfg = &fwd_txfm_1d_row_cfg_adst_16;
+      col_cfg = &fwd_txfm_1d_col_cfg_dct_16;
+      load_buffer_16x16(input, in, stride, 0, 1, row_cfg->shift[0]);
+      fdct16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
+      col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
      transpose_16x16(out, in);
-      fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+      fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
      transpose_16x16(out, in);
      write_buffer_16x16(in, coeff);
      break;
    case FLIPADST_FLIPADST:
-      cfg = &fwd_txfm_2d_cfg_adst_adst_16;
-      load_buffer_16x16(input, in, stride, 1, 1, cfg->shift[0]);
-      fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
-      col_txfm_16x16_rounding(out, -cfg->shift[1]);
+      row_cfg = &fwd_txfm_1d_row_cfg_adst_16;
+      col_cfg = &fwd_txfm_1d_col_cfg_adst_16;
+      load_buffer_16x16(input, in, stride, 1, 1, row_cfg->shift[0]);
+      fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
+      col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
      transpose_16x16(out, in);
-      fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+      fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
      transpose_16x16(out, in);
      write_buffer_16x16(in, coeff);
      break;
    case ADST_FLIPADST:
-      cfg = &fwd_txfm_2d_cfg_adst_adst_16;
-      load_buffer_16x16(input, in, stride, 0, 1, cfg->shift[0]);
-      fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
-      col_txfm_16x16_rounding(out, -cfg->shift[1]);
+      row_cfg = &fwd_txfm_1d_row_cfg_adst_16;
+      col_cfg = &fwd_txfm_1d_col_cfg_adst_16;
+      load_buffer_16x16(input, in, stride, 0, 1, row_cfg->shift[0]);
+      fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
+      col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
      transpose_16x16(out, in);
-      fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+      fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
      transpose_16x16(out, in);
      write_buffer_16x16(in, coeff);
      break;
    case FLIPADST_ADST:
-      cfg = &fwd_txfm_2d_cfg_adst_adst_16;
-      load_buffer_16x16(input, in, stride, 1, 0, cfg->shift[0]);
-      fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
-      col_txfm_16x16_rounding(out, -cfg->shift[1]);
+      row_cfg = &fwd_txfm_1d_row_cfg_adst_16;
+      col_cfg = &fwd_txfm_1d_col_cfg_adst_16;
+      load_buffer_16x16(input, in, stride, 1, 0, row_cfg->shift[0]);
+      fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
+      col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
      transpose_16x16(out, in);
-      fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+      fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
      transpose_16x16(out, in);
      write_buffer_16x16(in, coeff);
      break;
--- a/test/av1_fwd_txfm2d_test.cc
+++ b/test/av1_fwd_txfm2d_test.cc
@ -41,9 +41,11 @@ class AV1FwdTxfm2d : public ::testing::TestWithParam<AV1FwdTxfm2dParam> {
    count_ = 500;
    TXFM_2D_FLIP_CFG fwd_txfm_flip_cfg =
        av1_get_fwd_txfm_cfg(tx_type_, tx_size_);
-    const TXFM_2D_CFG *fwd_txfm_cfg = fwd_txfm_flip_cfg.cfg;
-    int amplify_bit = fwd_txfm_cfg->shift[0] + fwd_txfm_cfg->shift[1] +
-                      fwd_txfm_cfg->shift[2];
+    // TODO(sarahparker) this test will need to be updated when these
+    // functions are extended to support rectangular transforms
+    int amplify_bit = fwd_txfm_flip_cfg.row_cfg->shift[0] +
+                      fwd_txfm_flip_cfg.row_cfg->shift[1] +
+                      fwd_txfm_flip_cfg.row_cfg->shift[2];
    ud_flip_ = fwd_txfm_flip_cfg.ud_flip;
    lr_flip_ = fwd_txfm_flip_cfg.lr_flip;
    amplify_factor_ =
--- a/test/av1_inv_txfm2d_test.cc
+++ b/test/av1_inv_txfm2d_test.cc
@ -17,7 +17,7 @@
 #include "test/acm_random.h"
 #include "test/util.h"
 #include "test/av1_txfm_test.h"
-#include "av1/common/av1_inv_txfm2d_cfg.h"
+#include "av1/common/av1_inv_txfm1d_cfg.h"

 using libaom_test::ACMRandom;
 using libaom_test::input_base;