enable bitstream lossless support

1. Added a bit in frame header to to indicate if a frame is encoded in lossless mode, so decoder does not make the decision based on Q0 2. Minor changes to make sure that lossy coding works same as when the lossless experiment is not enabled. 3. Renamed function pointers for transforms to be consistent, using prefix fwd_txm and inv_txm for forward and inverse respectively To encode in lossless mode, using "--lossless=1 --min-q=0 --max-q=0" with vpxenc. Change-Id: Ifae53b26d2ffbe378d707e29d96817b8a5e6c068
2013-02-11 21:14:46 -08:00 · 2013-02-11 21:14:46 -08:00 · 17db5d00be
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@ -390,22 +390,22 @@ typedef struct macroblockd {
  int lossless;
 #endif
  /* Inverse transform function pointers. */
-  void (*inv_xform4x4_1_x8)(int16_t *input, int16_t *output, int pitch);
-  void (*inv_xform4x4_x8)(int16_t *input, int16_t *output, int pitch);
-  void (*inv_walsh4x4_1)(int16_t *in, int16_t *out);
-  void (*inv_walsh4x4_lossless)(int16_t *in, int16_t *out);
-  void (*idct_add)(int16_t *input, const int16_t *dq,
+  void (*inv_txm4x4_1)(int16_t *input, int16_t *output, int pitch);
+  void (*inv_txm4x4)(int16_t *input, int16_t *output, int pitch);
+  void (*inv_2ndtxm4x4_1)(int16_t *in, int16_t *out);
+  void (*inv_2ndtxm4x4)(int16_t *in, int16_t *out);
+  void (*itxm_add)(int16_t *input, const int16_t *dq,
    uint8_t *pred, uint8_t *output, int pitch, int stride);
-  void (*dc_idct_add)(int16_t *input, const int16_t *dq,
+  void (*dc_itxm_add)(int16_t *input, const int16_t *dq,
    uint8_t *pred, uint8_t *output, int pitch, int stride, int dc);
-  void (*dc_only_idct_add)(int input_dc, uint8_t *pred_ptr,
+  void (*dc_only_itxm_add)(int input_dc, uint8_t *pred_ptr,
    uint8_t *dst_ptr, int pitch, int stride);
-  void (*dc_idct_add_y_block)(int16_t *q, const int16_t *dq,
+  void (*dc_itxm_add_y_block)(int16_t *q, const int16_t *dq,
    uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs,
    const int16_t *dc);
-  void (*idct_add_y_block)(int16_t *q, const int16_t *dq,
+  void (*itxm_add_y_block)(int16_t *q, const int16_t *dq,
    uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs);
-  void (*idct_add_uv_block)(int16_t *q, const int16_t *dq,
+  void (*itxm_add_uv_block)(int16_t *q, const int16_t *dq,
    uint8_t *pre, uint8_t *dst_u, uint8_t *dst_v, int stride,
    uint16_t *eobs);

@ -520,7 +520,7 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) {
    return tx_type;
 #if CONFIG_LOSSLESS
  if (xd->lossless)
-    return tx_type;
+    return DCT_DCT;
 #endif
  // TODO(rbultje, debargha): Explore ADST usage for superblocks
  if (xd->mode_info_context->mbmi.sb_type)
--- a/vp9/common/vp9_idctllm.c
+++ b/vp9/common/vp9_idctllm.c
@ -476,12 +476,13 @@ void vp9_short_inv_walsh4x4_1_x8_c(int16_t *in, int16_t *out, int pitch) {
  }
 }

-void vp9_dc_only_inv_walsh_add_c(short input_dc, uint8_t *pred_ptr,
+void vp9_dc_only_inv_walsh_add_c(int input_dc, uint8_t *pred_ptr,
                                 uint8_t *dst_ptr,
                                 int pitch, int stride) {
  int r, c;
-  short tmp[16];
-  vp9_short_inv_walsh4x4_1_x8_c(&input_dc, tmp, 4 << 1);
+  int16_t dc = input_dc;
+  int16_t tmp[16];
+  vp9_short_inv_walsh4x4_1_x8_c(&dc, tmp, 4 << 1);

  for (r = 0; r < 4; r++) {
    for (c = 0; c < 4; c++) {
--- a/vp9/common/vp9_invtrans.c
+++ b/vp9/common/vp9_invtrans.c
@ -32,9 +32,9 @@ static void recon_dcblock_8x8(MACROBLOCKD *xd) {
 void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch) {
  BLOCKD *b = &xd->block[block];
  if (b->eob <= 1)
-    xd->inv_xform4x4_1_x8(b->dqcoeff, b->diff, pitch);
+    xd->inv_txm4x4_1(b->dqcoeff, b->diff, pitch);
  else
-    xd->inv_xform4x4_x8(b->dqcoeff, b->diff, pitch);
+    xd->inv_txm4x4(b->dqcoeff, b->diff, pitch);
 }

 void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
@ -44,7 +44,7 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {

  if (has_2nd_order) {
    /* do 2nd order transform on the dc block */
-    xd->inv_walsh4x4_lossless(blockd[24].dqcoeff, blockd[24].diff);
+    xd->inv_2ndtxm4x4(blockd[24].dqcoeff, blockd[24].diff);
    recon_dcblock(xd);
  }

--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@ -123,43 +123,30 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) {
    xd->block[i].dequant = pc->Y1dequant[QIndex];
  }

+  xd->inv_txm4x4_1        = vp9_short_idct4x4llm_1;
+  xd->inv_txm4x4          = vp9_short_idct4x4llm;
+  xd->inv_2ndtxm4x4_1     = vp9_short_inv_walsh4x4_1;
+  xd->inv_2ndtxm4x4       = vp9_short_inv_walsh4x4;
+  xd->itxm_add            = vp9_dequant_idct_add;
+  xd->dc_only_itxm_add    = vp9_dc_only_idct_add_c;
+  xd->dc_itxm_add         = vp9_dequant_dc_idct_add;
+  xd->dc_itxm_add_y_block = vp9_dequant_dc_idct_add_y_block;
+  xd->itxm_add_y_block    = vp9_dequant_idct_add_y_block;
+  xd->itxm_add_uv_block   = vp9_dequant_idct_add_uv_block;
 #if CONFIG_LOSSLESS
-  pbi->mb.lossless = 0;
-  if (!QIndex) {
-    pbi->mb.inv_xform4x4_1_x8     = vp9_short_inv_walsh4x4_1_x8;
-    pbi->mb.inv_xform4x4_x8       = vp9_short_inv_walsh4x4_x8;
-    pbi->mb.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1_lossless;
-    pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless;
-    pbi->mb.idct_add            = vp9_dequant_idct_add_lossless_c;
-    pbi->mb.dc_only_idct_add    = vp9_dc_only_inv_walsh_add_c;
-    pbi->mb.dc_idct_add         = vp9_dequant_dc_idct_add_lossless_c;
-    pbi->mb.dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c;
-    pbi->mb.idct_add_y_block    = vp9_dequant_idct_add_y_block_lossless_c;
-    pbi->mb.idct_add_uv_block   = vp9_dequant_idct_add_uv_block_lossless_c;
-    pbi->mb.lossless = 1;
-  } else {
-    pbi->mb.inv_xform4x4_1_x8     = vp9_short_idct4x4llm_1;
-    pbi->mb.inv_xform4x4_x8       = vp9_short_idct4x4llm;
-    pbi->mb.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1;
-    pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
-    pbi->mb.idct_add            = vp9_dequant_idct_add;
-    pbi->mb.dc_only_idct_add    = vp9_dc_only_idct_add_c;
-    pbi->mb.dc_idct_add         = vp9_dequant_dc_idct_add;
-    pbi->mb.dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block;
-    pbi->mb.idct_add_y_block    = vp9_dequant_idct_add_y_block;
-    pbi->mb.idct_add_uv_block   = vp9_dequant_idct_add_uv_block;
+  if (xd->lossless) {
+    assert(QIndex == 0);
+    xd->inv_txm4x4_1        = vp9_short_inv_walsh4x4_1_x8;
+    xd->inv_txm4x4          = vp9_short_inv_walsh4x4_x8;
+    xd->inv_2ndtxm4x4_1     = vp9_short_inv_walsh4x4_1_lossless;
+    xd->inv_2ndtxm4x4       = vp9_short_inv_walsh4x4_lossless;
+    xd->itxm_add            = vp9_dequant_idct_add_lossless_c;
+    xd->dc_only_itxm_add    = vp9_dc_only_inv_walsh_add_c;
+    xd->dc_itxm_add         = vp9_dequant_dc_idct_add_lossless_c;
+    xd->dc_itxm_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c;
+    xd->itxm_add_y_block    = vp9_dequant_idct_add_y_block_lossless_c;
+    xd->itxm_add_uv_block   = vp9_dequant_idct_add_uv_block_lossless_c;
  }
-#else
-  pbi->mb.inv_xform4x4_1_x8     = vp9_short_idct4x4llm_1;
-  pbi->mb.inv_xform4x4_x8       = vp9_short_idct4x4llm;
-  pbi->mb.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1;
-  pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
-  pbi->mb.idct_add            = vp9_dequant_idct_add;
-  pbi->mb.dc_only_idct_add    = vp9_dc_only_idct_add_c;
-  pbi->mb.dc_idct_add         = vp9_dequant_dc_idct_add;
-  pbi->mb.dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block;
-  pbi->mb.idct_add_y_block    = vp9_dequant_idct_add_y_block;
-  pbi->mb.idct_add_uv_block   = vp9_dequant_idct_add_uv_block;
 #endif

  for (i = 16; i < 24; i++) {
@ -349,15 +336,15 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
      int i8x8mode = b->bmi.as_mode.first;
      b = &xd->block[16 + i];
      vp9_intra_uv4x4_predict(xd, &xd->block[16 + i], i8x8mode, b->predictor);
-      xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+      xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                    *(b->base_dst) + b->dst, 8, b->dst_stride);
      b = &xd->block[20 + i];
      vp9_intra_uv4x4_predict(xd, &xd->block[20 + i], i8x8mode, b->predictor);
-      xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+      xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                    *(b->base_dst) + b->dst, 8, b->dst_stride);
    }
  } else if (xd->mode_info_context->mbmi.mode == SPLITMV) {
-    xd->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant,
+    xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant,
         xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
         xd->dst.uv_stride, xd->eobs + 16);
  } else {
@ -404,17 +391,17 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                                    *(b->base_dst) + b->dst, 16,
                                    b->dst_stride, b->eob);
        } else {
-          xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+          xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                        *(b->base_dst) + b->dst, 16, b->dst_stride);
        }
      }
      b = &xd->block[16 + i];
      vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
-      xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+      xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                    *(b->base_dst) + b->dst, 8, b->dst_stride);
      b = &xd->block[20 + i];
      vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
-      xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+      xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                    *(b->base_dst) + b->dst, 8, b->dst_stride);
    }
  } else if (mode == B_PRED) {
@ -438,7 +425,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                                  *(b->base_dst) + b->dst, 16, b->dst_stride,
                                  b->eob);
      } else {
-        xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+        xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                      *(b->base_dst) + b->dst, 16, b->dst_stride);
      }
    }
@ -448,7 +435,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
    xd->above_context->y2 = 0;
    xd->left_context->y2 = 0;
    vp9_build_intra_predictors_mbuv(xd);
-    xd->idct_add_uv_block(xd->qcoeff + 16 * 16,
+    xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
                           xd->block[16].dequant,
                           xd->predictor + 16 * 16,
                           xd->dst.u_buffer,
@ -457,13 +444,13 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                           xd->eobs + 16);
  } else if (mode == SPLITMV) {
    assert(get_2nd_order_usage(xd) == 0);
-    xd->idct_add_y_block(xd->qcoeff,
+    xd->itxm_add_y_block(xd->qcoeff,
                          xd->block[0].dequant,
                          xd->predictor,
                          xd->dst.y_buffer,
                          xd->dst.y_stride,
                          xd->eobs);
-    xd->idct_add_uv_block(xd->qcoeff + 16 * 16,
+    xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
                           xd->block[16].dequant,
                           xd->predictor + 16 * 16,
                           xd->dst.u_buffer,
@ -500,7 +487,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                                    *(b->base_dst) + b->dst, 16,
                                    b->dst_stride, b->eob);
        } else {
-          xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+          xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                        *(b->base_dst) + b->dst, 16, b->dst_stride);
        }
      }
@ -509,7 +496,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
      assert(get_2nd_order_usage(xd) == 1);
      vp9_dequantize_b(b);
      if (xd->eobs[24] > 1) {
-        xd->inv_walsh4x4_lossless(&b->dqcoeff[0], b->diff);
+        xd->inv_2ndtxm4x4(&b->dqcoeff[0], b->diff);
        ((int *)b->qcoeff)[0] = 0;
        ((int *)b->qcoeff)[1] = 0;
        ((int *)b->qcoeff)[2] = 0;
@ -519,11 +506,11 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
        ((int *)b->qcoeff)[6] = 0;
        ((int *)b->qcoeff)[7] = 0;
      } else {
-        xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff);
+        xd->inv_2ndtxm4x4_1(&b->dqcoeff[0], b->diff);
        ((int *)b->qcoeff)[0] = 0;
      }
      vp9_dequantize_b(b);
-      xd->dc_idct_add_y_block(xd->qcoeff,
+      xd->dc_itxm_add_y_block(xd->qcoeff,
                               xd->block[0].dequant,
                               xd->predictor,
                               xd->dst.y_buffer,
@ -531,7 +518,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                               xd->eobs,
                               xd->block[24].diff);
    }
-    xd->idct_add_uv_block(xd->qcoeff + 16 * 16,
+    xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
                           xd->block[16].dequant,
                           xd->predictor + 16 * 16,
                           xd->dst.u_buffer,
@ -649,7 +636,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
            + x_idx * 16 + (i & 3) * 4,
            xd->dst.y_stride, xd->dst.y_stride, b->eob);
      } else {
-        xd->idct_add(
+        xd->itxm_add(
            b->qcoeff, b->dequant,
            xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride
            + x_idx * 16 + (i & 3) * 4,
@ -661,7 +648,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
  } else {
    vp9_dequantize_b(b);
    if (xd->eobs[24] > 1) {
-      xd->inv_walsh4x4_lossless(&b->dqcoeff[0], b->diff);
+      xd->inv_2ndtxm4x4(&b->dqcoeff[0], b->diff);
      ((int *)b->qcoeff)[0] = 0;
      ((int *)b->qcoeff)[1] = 0;
      ((int *)b->qcoeff)[2] = 0;
@ -671,7 +658,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
      ((int *)b->qcoeff)[6] = 0;
      ((int *)b->qcoeff)[7] = 0;
    } else {
-      xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff);
+      xd->inv_2ndtxm4x4_1(&b->dqcoeff[0], b->diff);
      ((int *)b->qcoeff)[0] = 0;
    }
    vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(
@ -1533,17 +1520,24 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) {

  pc->sb64_coded = vp9_read_literal(&header_bc, 8);
  pc->sb32_coded = vp9_read_literal(&header_bc, 8);
-
-  /* Read the loop filter level and type */
-  pc->txfm_mode = vp9_read_literal(&header_bc, 2);
-  if (pc->txfm_mode == 3)
-    pc->txfm_mode += vp9_read_bit(&header_bc);
-  if (pc->txfm_mode == TX_MODE_SELECT) {
-    pc->prob_tx[0] = vp9_read_literal(&header_bc, 8);
-    pc->prob_tx[1] = vp9_read_literal(&header_bc, 8);
-    pc->prob_tx[2] = vp9_read_literal(&header_bc, 8);
+#if CONFIG_LOSSLESS
+  xd->lossless = vp9_read_bit(&header_bc);
+  if (xd->lossless) {
+    pc->txfm_mode = ONLY_4X4;
+  }
+  else
+#endif
+  {
+    /* Read the loop filter level and type */
+    pc->txfm_mode = vp9_read_literal(&header_bc, 2);
+    if (pc->txfm_mode == 3)
+      pc->txfm_mode += vp9_read_bit(&header_bc);
+    if (pc->txfm_mode == TX_MODE_SELECT) {
+      pc->prob_tx[0] = vp9_read_literal(&header_bc, 8);
+      pc->prob_tx[1] = vp9_read_literal(&header_bc, 8);
+      pc->prob_tx[2] = vp9_read_literal(&header_bc, 8);
+    }
  }
-
  pc->filter_type = (LOOPFILTERTYPE) vp9_read_bit(&header_bc);
  pc->filter_level = vp9_read_literal(&header_bc, 6);
  pc->sharpness_level = vp9_read_literal(&header_bc, 3);
--- a/vp9/decoder/vp9_idct_blk.c
+++ b/vp9/decoder/vp9_idct_blk.c
@ -51,9 +51,9 @@ void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q,
  for (i = 0; i < 4; i++) {
    for (j = 0; j < 4; j++) {
      if (*eobs++ > 1)
-        xd->dc_idct_add(q, dq, dst, dst, stride, stride, dc[0]);
+        xd->dc_itxm_add(q, dq, dst, dst, stride, stride, dc[0]);
      else
-        xd->dc_only_idct_add(dc[0], dst, dst, stride, stride);
+        xd->dc_only_itxm_add(dc[0], dst, dst, stride, stride);

      q   += 16;
      dst += 4;
@ -143,9 +143,9 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq,
  for (i = 0; i < 2; i++) {
    for (j = 0; j < 2; j++) {
      if (*eobs++ > 1) {
-        xd->idct_add(q, dq, dstu, dstu, stride, stride);
+        xd->itxm_add(q, dq, dstu, dstu, stride, stride);
      } else {
-        xd->dc_only_idct_add(q[0]*dq[0], dstu, dstu, stride, stride);
+        xd->dc_only_itxm_add(q[0]*dq[0], dstu, dstu, stride, stride);
        ((int *)q)[0] = 0;
      }

@ -159,9 +159,9 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq,
  for (i = 0; i < 2; i++) {
    for (j = 0; j < 2; j++) {
      if (*eobs++ > 1) {
-        xd->idct_add(q, dq, dstv, dstv, stride, stride);
+        xd->itxm_add(q, dq, dstv, dstv, stride, stride);
      } else {
-        xd->dc_only_idct_add(q[0]*dq[0], dstv, dstv, stride, stride);
+        xd->dc_only_itxm_add(q[0]*dq[0], dstv, dstv, stride, stride);
        ((int *)q)[0] = 0;
      }

--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@ -1667,7 +1667,13 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
  vp9_write_literal(&header_bc, pc->sb64_coded, 8);
  pc->sb32_coded = get_binary_prob(cpi->sb32_count[0], cpi->sb32_count[1]);
  vp9_write_literal(&header_bc, pc->sb32_coded, 8);
-
+#if CONFIG_LOSSLESS
+  vp9_write_bit(&header_bc, cpi->oxcf.lossless);
+  if (cpi->oxcf.lossless) {
+    pc->txfm_mode = ONLY_4X4;
+  }
+  else
+#endif
  {
    if (pc->txfm_mode == TX_MODE_SELECT) {
      pc->prob_tx[0] = get_prob(cpi->txfm_count_32x32p[TX_4X4] +
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@ -169,14 +169,14 @@ typedef struct macroblock {
  PICK_MODE_CONTEXT sb32_context[4];
  PICK_MODE_CONTEXT sb64_context;

-  void (*vp9_short_fdct4x4)(int16_t *input, int16_t *output, int pitch);
-  void (*vp9_short_fdct8x4)(int16_t *input, int16_t *output, int pitch);
-  void (*short_walsh4x4)(int16_t *input, int16_t *output, int pitch);
+  void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch);
+  void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch);
+  void (*fwd_2ndtxm4x4)(int16_t *input, int16_t *output, int pitch);
+  void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch);
+  void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch);
+  void (*fwd_2ndtxm2x2)(int16_t *input, int16_t *output, int pitch);
  void (*quantize_b_4x4)(BLOCK *b, BLOCKD *d);
  void (*quantize_b_4x4_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1);
-  void (*vp9_short_fdct8x8)(int16_t *input, int16_t *output, int pitch);
-  void (*vp9_short_fdct16x16)(int16_t *input, int16_t *output, int pitch);
-  void (*short_fhaar2x2)(int16_t *input, int16_t *output, int pitch);
  void (*quantize_b_16x16)(BLOCK *b, BLOCKD *d);
  void (*quantize_b_8x8)(BLOCK *b, BLOCKD *d);
  void (*quantize_b_2x2)(BLOCK *b, BLOCKD *d);
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@ -58,7 +58,7 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) {
    vp9_ht_quantize_b_4x4(be, b, tx_type);
    vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
  } else {
-    x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+    x->fwd_txm4x4(be->src_diff, be->coeff, 32);
    x->quantize_b_4x4(be, b) ;
    vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 32);
  }
@ -161,7 +161,7 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
                   tx_type, 8, xd->block[idx].eob);
 #endif
    } else {
-      x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
+      x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
      x->quantize_b_8x8(x->block + idx, xd->block + idx);
      vp9_short_idct8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
    }
@ -175,13 +175,13 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
        vp9_ht_quantize_b_4x4(be, b, tx_type);
        vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
      } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {
-        x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
+        x->fwd_txm8x4(be->src_diff, be->coeff, 32);
        x->quantize_b_4x4_pair(be, be + 1, b, b + 1);
        vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32);
        vp9_inverse_transform_b_4x4(xd, ib + iblock[i] + 1, 32);
        i++;
      } else {
-        x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+        x->fwd_txm4x4(be->src_diff, be->coeff, 32);
        x->quantize_b_4x4(be, b);
        vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32);
      }
@ -214,7 +214,7 @@ static void encode_intra_uv4x4(MACROBLOCK *x, int ib,

  vp9_subtract_b(be, b, 8);

-  x->vp9_short_fdct4x4(be->src_diff, be->coeff, 16);
+  x->fwd_txm4x4(be->src_diff, be->coeff, 16);
  x->quantize_b_4x4(be, b);
  vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 16);

--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@ -188,11 +188,11 @@ void vp9_transform_mby_4x4(MACROBLOCK *x) {
      assert(has_2nd_order == 0);
      vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 4);
    } else if (!(i & 1) && get_tx_type_4x4(xd, &xd->block[i + 1]) == DCT_DCT) {
-      x->vp9_short_fdct8x4(&x->block[i].src_diff[0],
+      x->fwd_txm8x4(&x->block[i].src_diff[0],
                           &x->block[i].coeff[0], 32);
      i++;
    } else {
-      x->vp9_short_fdct4x4(&x->block[i].src_diff[0],
+      x->fwd_txm4x4(&x->block[i].src_diff[0],
                           &x->block[i].coeff[0], 32);
    }
  }
@ -202,7 +202,7 @@ void vp9_transform_mby_4x4(MACROBLOCK *x) {
    build_dcblock_4x4(x);

    // do 2nd order transform on the dc block
-    x->short_walsh4x4(&x->block[24].src_diff[0],
+    x->fwd_2ndtxm4x4(&x->block[24].src_diff[0],
                      &x->block[24].coeff[0], 8);
  } else {
    vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));
@ -213,7 +213,7 @@ void vp9_transform_mbuv_4x4(MACROBLOCK *x) {
  int i;

  for (i = 16; i < 24; i += 2) {
-    x->vp9_short_fdct8x4(&x->block[i].src_diff[0],
+    x->fwd_txm8x4(&x->block[i].src_diff[0],
                         &x->block[i].coeff[0], 16);
  }
 }
@ -253,7 +253,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) {
      assert(has_2nd_order == 0);
      vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 8);
    } else {
-      x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
+      x->fwd_txm8x8(&x->block[i].src_diff[0],
                           &x->block[i].coeff[0], 32);
    }
  }
@ -264,7 +264,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) {
      assert(has_2nd_order == 0);
      vp9_fht_c(b->src_diff, 32, (b + 2)->coeff, tx_type, 8);
    } else {
-      x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
+      x->fwd_txm8x8(&x->block[i].src_diff[0],
                           &x->block[i + 2].coeff[0], 32);
    }
  }
@ -274,7 +274,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) {
    build_dcblock_8x8(x);

    // do 2nd order transform on the dc block
-    x->short_fhaar2x2(&x->block[24].src_diff[0],
+    x->fwd_2ndtxm2x2(&x->block[24].src_diff[0],
                      &x->block[24].coeff[0], 8);
  } else {
    vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));
@ -285,7 +285,7 @@ void vp9_transform_mbuv_8x8(MACROBLOCK *x) {
  int i;

  for (i = 16; i < 24; i += 4) {
-    x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
+    x->fwd_txm8x8(&x->block[i].src_diff[0],
                         &x->block[i].coeff[0], 16);
  }
 }
@ -303,7 +303,7 @@ void vp9_transform_mby_16x16(MACROBLOCK *x) {
  if (tx_type != DCT_DCT) {
    vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 16);
  } else {
-    x->vp9_short_fdct16x16(&x->block[0].src_diff[0],
+    x->fwd_txm16x16(&x->block[0].src_diff[0],
                           &x->block[0].coeff[0], 32);
  }
 }
@ -321,9 +321,9 @@ void vp9_transform_sby_32x32(MACROBLOCK *x) {
 void vp9_transform_sbuv_16x16(MACROBLOCK *x) {
  SUPERBLOCK * const x_sb = &x->sb_coeff_data;
  vp9_clear_system_state();
-  x->vp9_short_fdct16x16(x_sb->src_diff + 1024,
+  x->fwd_txm16x16(x_sb->src_diff + 1024,
                         x_sb->coeff + 1024, 32);
-  x->vp9_short_fdct16x16(x_sb->src_diff + 1280,
+  x->fwd_txm16x16(x_sb->src_diff + 1280,
                         x_sb->coeff + 1280, 32);
 }

--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@ -752,10 +752,10 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
  sf->quarter_pixel_search = 1;
  sf->half_pixel_search = 1;
  sf->iterative_sub_pixel = 1;
-#if CONFIG_LOSSLESS
-  sf->optimize_coefficients = 0;
-#else
  sf->optimize_coefficients = 1;
+#if CONFIG_LOSSLESS
+  if (cpi->oxcf.lossless)
+    sf->optimize_coefficients = 0;
 #endif
  sf->no_skip_block4x4_search = 1;
  sf->first_step = 0;
@ -840,20 +840,18 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
    }
  }

-  cpi->mb.vp9_short_fdct16x16 = vp9_short_fdct16x16;
-  cpi->mb.vp9_short_fdct8x8 = vp9_short_fdct8x8;
-  cpi->mb.vp9_short_fdct8x4 = vp9_short_fdct8x4;
-  cpi->mb.vp9_short_fdct4x4 = vp9_short_fdct4x4;
-  cpi->mb.short_walsh4x4 = vp9_short_walsh4x4;
-  cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2;
+  cpi->mb.fwd_txm16x16  = vp9_short_fdct16x16;
+  cpi->mb.fwd_txm8x8    = vp9_short_fdct8x8;
+  cpi->mb.fwd_txm8x4    = vp9_short_fdct8x4;
+  cpi->mb.fwd_txm4x4    = vp9_short_fdct4x4;
+  cpi->mb.fwd_2ndtxm4x4 = vp9_short_walsh4x4;
+  cpi->mb.fwd_2ndtxm2x2 = vp9_short_fhaar2x2;

 #if CONFIG_LOSSLESS
  if (cpi->oxcf.lossless) {
-    cpi->mb.vp9_short_fdct8x4 = vp9_short_walsh8x4_x8;
-    cpi->mb.vp9_short_fdct4x4 = vp9_short_walsh4x4_x8;
-    cpi->mb.short_walsh4x4 = vp9_short_walsh4x4;
-    cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2;
-    cpi->mb.short_walsh4x4 = vp9_short_walsh4x4_lossless;
+    cpi->mb.fwd_txm8x4    = vp9_short_walsh8x4_x8;
+    cpi->mb.fwd_txm4x4    = vp9_short_walsh4x4_x8;
+    cpi->mb.fwd_2ndtxm4x4 = vp9_short_walsh4x4_lossless;
  }
 #endif

@ -1206,18 +1204,18 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
  cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q];
  cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level];

-  cpi->mb.e_mbd.inv_xform4x4_1_x8     = vp9_short_idct4x4llm_1;
-  cpi->mb.e_mbd.inv_xform4x4_x8       = vp9_short_idct4x4llm;
-  cpi->mb.e_mbd.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1;
-  cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
+  cpi->mb.e_mbd.inv_txm4x4_1    = vp9_short_idct4x4llm_1;
+  cpi->mb.e_mbd.inv_txm4x4      = vp9_short_idct4x4llm;
+  cpi->mb.e_mbd.inv_2ndtxm4x4_1 = vp9_short_inv_walsh4x4_1;
+  cpi->mb.e_mbd.inv_2ndtxm4x4   = vp9_short_inv_walsh4x4;

 #if CONFIG_LOSSLESS
  cpi->oxcf.lossless = oxcf->lossless;
  if (cpi->oxcf.lossless) {
-    cpi->mb.e_mbd.inv_xform4x4_1_x8     = vp9_short_inv_walsh4x4_1_x8;
-    cpi->mb.e_mbd.inv_xform4x4_x8       = vp9_short_inv_walsh4x4_x8;
-    cpi->mb.e_mbd.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1_lossless;
-    cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless;
+    cpi->mb.e_mbd.inv_txm4x4_1    = vp9_short_inv_walsh4x4_1_x8;
+    cpi->mb.e_mbd.inv_txm4x4      = vp9_short_inv_walsh4x4_x8;
+    cpi->mb.e_mbd.inv_2ndtxm4x4_1 = vp9_short_inv_walsh4x4_1_lossless;
+    cpi->mb.e_mbd.inv_2ndtxm4x4   = vp9_short_inv_walsh4x4_lossless;
  }
 #endif

@ -2619,10 +2617,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
  // For 2 Pass Only used where GF/ARF prediction quality
  // is above a threshold
  cpi->zbin_mode_boost = 0;
-#if CONFIG_LOSSLESS
-  cpi->zbin_mode_boost_enabled = FALSE;
-#else
  cpi->zbin_mode_boost_enabled = TRUE;
+#if CONFIG_LOSSLESS
+  if (cpi->oxcf.lossless)
+    cpi->zbin_mode_boost_enabled = FALSE;
 #endif
  if (cpi->gfu_boost <= 400) {
    cpi->zbin_mode_boost_enabled = FALSE;
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@ -460,18 +460,14 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
  static const int zbin_boost[16] = { 0,  0,  0,  8,  8,  8, 10, 12,
                                     14, 16, 20, 24, 28, 32, 36, 40 };

-
-  int qrounding_factor = 48;
-
  for (Q = 0; Q < QINDEX_RANGE; Q++) {
    int qzbin_factor = (vp9_dc_quant(Q, 0) < 148) ? 84 : 80;

+    int qrounding_factor = 48;
 #if CONFIG_LOSSLESS
-    if (cpi->oxcf.lossless) {
-      if (Q == 0) {
-        qzbin_factor = 64;
-        qrounding_factor = 64;
-      }
+    if (cpi->oxcf.lossless && Q == 0) {
+      qzbin_factor = 64;
+      qrounding_factor = 64;
    }
 #endif

--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@ -1140,7 +1140,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
      vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4);
      vp9_ht_quantize_b_4x4(be, b, tx_type);
    } else {
-      x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+      x->fwd_txm4x4(be->src_diff, be->coeff, 32);
      x->quantize_b_4x4(be, b);
    }

@ -1172,7 +1172,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
  if (best_tx_type != DCT_DCT)
    vp9_ihtllm(best_dqcoeff, b->diff, 32, best_tx_type, 4, b->eob);
  else
-    xd->inv_xform4x4_x8(best_dqcoeff, b->diff, 32);
+    xd->inv_txm4x4(best_dqcoeff, b->diff, 32);

  vp9_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);

@ -1436,7 +1436,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
      if (tx_type != DCT_DCT)
        vp9_fht(be->src_diff, 32, (x->block + idx)->coeff, tx_type, 8);
      else
-        x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
+        x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
      x->quantize_b_8x8(x->block + idx, xd->block + idx);

      // compute quantization mse of 8x8 block
@ -1470,11 +1470,11 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
          vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4);
          vp9_ht_quantize_b_4x4(be, b, tx_type);
        } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {
-          x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
+          x->fwd_txm8x4(be->src_diff, be->coeff, 32);
          x->quantize_b_4x4_pair(be, be + 1, b, b + 1);
          do_two = 1;
        } else {
-          x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+          x->fwd_txm4x4(be->src_diff, be->coeff, 32);
          x->quantize_b_4x4(be, b);
        }
        distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two);
@ -2244,7 +2244,7 @@ static int64_t encode_inter_mb_segment(MACROBLOCK *x,
      if (xd->mode_info_context->mbmi.second_ref_frame > 0)
        vp9_build_2nd_inter_predictors_b(bd, 16, &xd->subpix);
      vp9_subtract_b(be, bd, 16);
-      x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+      x->fwd_txm4x4(be->src_diff, be->coeff, 32);
      x->quantize_b_4x4(be, bd);
      thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16);
      *distortion += thisdistortion;
@ -2296,7 +2296,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,

      if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
        if (otherrd) {
-          x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32);
+          x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
          x->quantize_b_8x8(be2, bd2);
          thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
          otherdist += thisdistortion;
@ -2308,7 +2308,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
        for (j = 0; j < 4; j += 2) {
          bd = &xd->block[ib + iblock[j]];
          be = &x->block[ib + iblock[j]];
-          x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
+          x->fwd_txm8x4(be->src_diff, be->coeff, 32);
          x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
          thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
          *distortion += thisdistortion;
@ -2326,7 +2326,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
          for (j = 0; j < 4; j += 2) {
            BLOCKD *bd = &xd->block[ib + iblock[j]];
            BLOCK *be = &x->block[ib + iblock[j]];
-            x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
+            x->fwd_txm8x4(be->src_diff, be->coeff, 32);
            x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
            thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
            otherdist += thisdistortion;
@ -2340,7 +2340,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
                           TX_4X4);
          }
        }
-        x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32);
+        x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
        x->quantize_b_8x8(be2, bd2);
        thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
        *distortion += thisdistortion;