WebM Experimental Codec Branch Snapshot

This is a code snapshot of experimental work currently ongoing for a next-generation codec. The codebase has been cut down considerably from the libvpx baseline. For example, we are currently only supporting VBR 2-pass rate control and have removed most of the code relating to coding speed, threading, error resilience, partitions and various other features. This is in part to make the codebase easier to work on and experiment with, but also because we want to have an open discussion about how the bitstream will be structured and partitioned and not have that conversation constrained by past work. Our basic working pattern has been to initially encapsulate experiments using configure options linked to #IF CONFIG_XXX statements in the code. Once experiments have matured and we are reasonably happy that they give benefit and can be merged without breaking other experiments, we remove the conditional compile statements and merge them in. Current changes include: * Temporal coding experiment for segments (though still only 4 max, it will likely be increased). * Segment feature experiment - to allow various bits of information to be coded at the segment level. Features tested so far include mode and reference frame information, limiting end of block offset and transform size, alongside Q and loop filter parameters, but this set is very fluid. * Support for 8x8 transform - 8x8 dct with 2nd order 2x2 haar is used in MBs using 16x16 prediction modes within inter frames. * Compound prediction (combination of signals from existing predictors to create a new predictor). * 8 tap interpolation filters and 1/8th pel motion vectors. * Loop filter modifications. * Various entropy modifications and changes to how entropy contexts and updates are handled. * Extended quantizer range matched to transform precision improvements. There are also ongoing further experiments that we hope to merge in the near future: For example, coding of motion and other aspects of the prediction signal to better support larger image formats, use of larger block sizes (e.g. 32x32 and up) and lossless non-transform based coding options (especially for key frames). It is our hope that we will be able to make regular updates and we will warmly welcome community contributions. Please be warned that, at this stage, the codebase is currently slower than VP8 stable branch as most new code has not been optimized, and even the 'C' has been deliberately written to be simple and obvious, not fast. The following graphs have the initial test results, numbers in the tables measure the compression improvement in terms of percentage. The build has the following optional experiments configured: --enable-experimental --enable-enhanced_interp --enable-uvintra --enable-high_precision_mv --enable-sixteenth_subpel_uv CIF Size clips: http://getwebm.org/tmp/cif/ HD size clips: http://getwebm.org/tmp/hd/ (stable_20120309 represents encoding results of WebM master branch build as of commit#7a15907) They were encoded using the following encode parameters: --good --cpu-used=0 -t 0 --lag-in-frames=25 --min-q=0 --max-q=63 --end-usage=0 --auto-alt-ref=1 -p 2 --pass=2 --kf-max-dist=9999 --kf-min-dist=0 --drop-frame=0 --static-thresh=0 --bias-pct=50 --minsection-pct=0 --maxsection-pct=800 --sharpness=0 --arnr-maxframes=7 --arnr-strength=3(for HD,6 for CIF) --arnr-type=3 Change-Id: I5c62ed09cfff5815a2bb34e7820d6a810c23183c
2012-03-09 17:32:50 -08:00 · 2012-03-09 17:32:50 -08:00 · 6035da5448
--- a/22
+++ b/22
@ -34,10 +34,7 @@ Advanced options:
  ${toggle_internal_stats}        output of encoder internal stats for debug, if supported (encoders)
  ${toggle_mem_tracker}           track memory usage
  ${toggle_postproc}              postprocessing
-  ${toggle_multithread}           multithreaded encoding and decoding.
  ${toggle_spatial_resampling}    spatial sampling (scaling) support
-  ${toggle_realtime_only}         enable this option while building for real-time encoding
-  ${toggle_error_concealment}     enable this option to get a decoder which is able to conceal losses
  ${toggle_runtime_cpu_detect}    runtime cpu detection
  ${toggle_shared}                shared library support
  ${toggle_static}                static library support
@ -159,7 +156,6 @@ enable optimizations
 enable fast_unaligned #allow unaligned accesses, if supported by hw
 enable md5
 enable spatial_resampling
-enable multithread
 enable os_support

 [ -d ${source_path}/../include ] && enable alt_tree_layout
@ -217,7 +213,16 @@ HAVE_LIST="
    unistd_h
 "
 EXPERIMENT_LIST="
-    extend_qrange
+    t8x8
+    csm
+    qimode
+    uvintra
+    compred
+    enhanced_interp
+    featureupdates
+    high_precision_mv
+    sixteenth_subpel_uv
+    comp_intra_pred
 "
 CONFIG_LIST="
    external_build
@ -246,7 +251,6 @@ CONFIG_LIST="
    dc_recon
    runtime_cpu_detect
    postproc
-    multithread
    internal_stats
    ${CODECS}
    ${CODEC_FAMILIES}
@ -254,8 +258,6 @@ CONFIG_LIST="
    decoders
    static_msvcrt
    spatial_resampling
-    realtime_only
-    error_concealment
    shared
    static
    small
@ -292,15 +294,12 @@ CMDLINE_SELECT="
    dequant_tokens
    dc_recon
    postproc
-    multithread
    internal_stats
    ${CODECS}
    ${CODEC_FAMILIES}
    static_msvcrt
    mem_tracker
    spatial_resampling
-    realtime_only
-    error_concealment
    shared
    static
    small
@ -393,7 +392,6 @@ process_targets() {
    enabled debug_libs && DIST_DIR="${DIST_DIR}-debug"
    enabled codec_srcs && DIST_DIR="${DIST_DIR}-src"
    ! enabled postproc && DIST_DIR="${DIST_DIR}-nopost"
-    ! enabled multithread && DIST_DIR="${DIST_DIR}-nomt"
    ! enabled install_docs && DIST_DIR="${DIST_DIR}-nodocs"
    DIST_DIR="${DIST_DIR}-${tgt_isa}-${tgt_os}"
    case "${tgt_os}" in
--- a/examples.mk
+++ b/examples.mk
@ -16,7 +16,7 @@ UTILS-$(CONFIG_DECODERS)    += vpxdec.c
 vpxdec.SRCS                 += md5_utils.c md5_utils.h
 vpxdec.SRCS                 += vpx_ports/vpx_timer.h
 vpxdec.SRCS                 += vpx/vpx_integer.h
-vpxdec.SRCS                 += args.c args.h
+vpxdec.SRCS                 += args.c args.h vpx_ports/config.h
 vpxdec.SRCS                 += tools_common.c tools_common.h
 vpxdec.SRCS                 += nestegg/halloc/halloc.h
 vpxdec.SRCS                 += nestegg/halloc/src/align.h
@ -30,7 +30,7 @@ vpxdec.DESCRIPTION           = Full featured decoder
 UTILS-$(CONFIG_ENCODERS)    += vpxenc.c
 vpxenc.SRCS                 += args.c args.h y4minput.c y4minput.h
 vpxenc.SRCS                 += tools_common.c tools_common.h
-vpxenc.SRCS                 += vpx_ports/mem_ops.h
+vpxenc.SRCS                 += vpx_ports/config.h vpx_ports/mem_ops.h
 vpxenc.SRCS                 += vpx_ports/mem_ops_aligned.h
 vpxenc.SRCS                 += libmkv/EbmlIDs.h
 vpxenc.SRCS                 += libmkv/EbmlWriter.c
@ -77,11 +77,6 @@ GEN_EXAMPLES-$(CONFIG_ENCODERS) += decode_with_drops.c
 endif
 decode_with_drops.GUID           = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26
 decode_with_drops.DESCRIPTION    = Drops frames while decoding
-ifeq ($(CONFIG_DECODERS),yes)
-GEN_EXAMPLES-$(CONFIG_ERROR_CONCEALMENT) += decode_with_partial_drops.c
-endif
-decode_with_partial_drops.GUID           = 61C2D026-5754-46AC-916F-1343ECC5537E
-decode_with_partial_drops.DESCRIPTION    = Drops parts of frames while decoding
 GEN_EXAMPLES-$(CONFIG_ENCODERS) += error_resilient.c
 error_resilient.GUID             = DF5837B9-4145-4F92-A031-44E4F832E00C
 error_resilient.DESCRIPTION      = Error Resiliency Feature
--- a/examples/decode_with_partial_drops.txt
+++ b/examples/decode_with_partial_drops.txt
@ -1,238 +0,0 @@
-@TEMPLATE decoder_tmpl.c
-Decode With Partial Drops Example
-=========================
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INTRODUCTION
-This is an example utility which drops a series of frames (or parts of frames),
-as specified on the command line. This is useful for observing the error
-recovery features of the codec.
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INTRODUCTION
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRA_INCLUDES
-#include <time.h>
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRA_INCLUDES
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HELPERS
-struct parsed_header
-{
-    char key_frame;
-    int version;
-    char show_frame;
-    int first_part_size;
-};
-
-int next_packet(struct parsed_header* hdr, int pos, int length, int mtu)
-{
-    int size = 0;
-    int remaining = length - pos;
-    /* Uncompressed part is 3 bytes for P frames and 10 bytes for I frames */
-    int uncomp_part_size = (hdr->key_frame ? 10 : 3);
-    /* number of bytes yet to send from header and the first partition */
-    int remainFirst = uncomp_part_size + hdr->first_part_size - pos;
-    if (remainFirst > 0)
-    {
-        if (remainFirst <= mtu)
-        {
-            size = remainFirst;
-        }
-        else
-        {
-            size = mtu;
-        }
-
-        return size;
-    }
-
-    /* second partition; just slot it up according to MTU */
-    if (remaining <= mtu)
-    {
-        size = remaining;
-        return size;
-    }
-    return mtu;
-}
-
-void throw_packets(unsigned char* frame, int* size, int loss_rate,
-                   int* thrown, int* kept)
-{
-    unsigned char loss_frame[256*1024];
-    int pkg_size = 1;
-    int pos = 0;
-    int loss_pos = 0;
-    struct parsed_header hdr;
-    unsigned int tmp;
-    int mtu = 1500;
-
-    if (*size < 3)
-    {
-        return;
-    }
-    putc('|', stdout);
-    /* parse uncompressed 3 bytes */
-    tmp = (frame[2] << 16) | (frame[1] << 8) | frame[0];
-    hdr.key_frame = !(tmp & 0x1); /* inverse logic */
-    hdr.version = (tmp >> 1) & 0x7;
-    hdr.show_frame = (tmp >> 4) & 0x1;
-    hdr.first_part_size = (tmp >> 5) & 0x7FFFF;
-
-    /* don't drop key frames */
-    if (hdr.key_frame)
-    {
-        int i;
-        *kept = *size/mtu + ((*size % mtu > 0) ? 1 : 0); /* approximate */
-        for (i=0; i < *kept; i++)
-            putc('.', stdout);
-        return;
-    }
-
-    while ((pkg_size = next_packet(&hdr, pos, *size, mtu)) > 0)
-    {
-        int loss_event = ((rand() + 1.0)/(RAND_MAX + 1.0) < loss_rate/100.0);
-        if (*thrown == 0 && !loss_event)
-        {
-            memcpy(loss_frame + loss_pos, frame + pos, pkg_size);
-            loss_pos += pkg_size;
-            (*kept)++;
-            putc('.', stdout);
-        }
-        else
-        {
-            (*thrown)++;
-            putc('X', stdout);
-        }
-        pos += pkg_size;
-    }
-    memcpy(frame, loss_frame, loss_pos);
-    memset(frame + loss_pos, 0, *size - loss_pos);
-    *size = loss_pos;
-}
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HELPERS
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_INIT
-/* Initialize codec */
-flags = VPX_CODEC_USE_ERROR_CONCEALMENT;
-res = vpx_codec_dec_init(&codec, interface, &dec_cfg, flags);
-if(res)
-    die_codec(&codec, "Failed to initialize decoder");
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_INIT
-
-Usage
-----
-This example adds a single argument to the `simple_decoder` example,
-which specifies the range or pattern of frames to drop. The parameter is
-parsed as follows:
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ USAGE
-if(argc < 4 || argc > 6)
-    die("Usage: %s <infile> <outfile> [-t <num threads>] <N-M|N/M|L,S>\n",
-        argv[0]);
-{
-    char *nptr;
-    int arg_num = 3;
-    if (argc == 6 && strncmp(argv[arg_num++], "-t", 2) == 0)
-        dec_cfg.threads = strtol(argv[arg_num++], NULL, 0);
-    n = strtol(argv[arg_num], &nptr, 0);
-    mode = (*nptr == '\0' || *nptr == ',') ? 2 : (*nptr == '-') ? 1 : 0;
-
-    m = strtol(nptr+1, NULL, 0);
-    if((!n && !m) || (*nptr != '-' && *nptr != '/' &&
-        *nptr != '\0' && *nptr != ','))
-        die("Couldn't parse pattern %s\n", argv[3]);
-}
-seed = (m > 0) ? m : (unsigned int)time(NULL);
-srand(seed);thrown_frame = 0;
-printf("Seed: %u\n", seed);
-printf("Threads: %d\n", dec_cfg.threads);
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ USAGE
-
-
-Dropping A Range Of Frames
--------------------------
-To drop a range of frames, specify the starting frame and the ending
-frame to drop, separated by a dash. The following command will drop
-frames 5 through 10 (base 1).
-
-  $ ./decode_with_partial_drops in.ivf out.i420 5-10
-
-
-Dropping A Pattern Of Frames
----------------------------
-To drop a pattern of frames, specify the number of frames to drop and
-the number of frames after which to repeat the pattern, separated by
-a forward-slash. The following command will drop 3 of 7 frames.
-Specifically, it will decode 4 frames, then drop 3 frames, and then
-repeat.
-
-  $ ./decode_with_partial_drops in.ivf out.i420 3/7
-
-Dropping Random Parts Of Frames
-------------------------------
-A third argument tuple is available to split the frame into 1500 bytes pieces
-and randomly drop pieces rather than frames. The frame will be split at
-partition boundaries where possible. The following example will seed the RNG
-with the seed 123 and drop approximately 5% of the pieces. Pieces which
-are depending on an already dropped piece will also be dropped.
-
-  $ ./decode_with_partial_drops in.ivf out.i420 5,123
-
-
-Extra Variables
---------------
-This example maintains the pattern passed on the command line in the
-`n`, `m`, and `is_range` variables:
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRA_VARS
-int              n, m, mode;
-unsigned int     seed;
-int              thrown=0, kept=0;
-int              thrown_frame=0, kept_frame=0;
-vpx_codec_dec_cfg_t  dec_cfg = {0};
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRA_VARS
-
-
-Making The Drop Decision
------------------------
-The example decides whether to drop the frame based on the current
-frame number, immediately before decoding the frame.
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PRE_DECODE
-/* Decide whether to throw parts of the frame or the whole frame
-   depending on the drop mode */
-thrown_frame = 0;
-kept_frame = 0;
-switch (mode)
-{
-case 0:
-    if (m - (frame_cnt-1)%m <= n)
-    {
-        frame_sz = 0;
-    }
-    break;
-case 1:
-    if (frame_cnt >= n && frame_cnt <= m)
-    {
-        frame_sz = 0;
-    }
-    break;
-case 2:
-    throw_packets(frame, &frame_sz, n, &thrown_frame, &kept_frame);
-    break;
-default: break;
-}
-if (mode < 2)
-{
-    if (frame_sz == 0)
-    {
-        putc('X', stdout);
-        thrown_frame++;
-    }
-    else
-    {
-        putc('.', stdout);
-        kept_frame++;
-    }
-}
-thrown += thrown_frame;
-kept += kept_frame;
-fflush(stdout);
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PRE_DECODE
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@ -9,7 +9,7 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "blockd.h"
 #include "vpx_mem/vpx_mem.h"
 #include "onyxc_int.h"
@ -20,17 +20,37 @@

 extern  void vp8_init_scan_order_mask();

-static void update_mode_info_border(MODE_INFO *mi, int rows, int cols)
+static void update_mode_info_border( VP8_COMMON *cpi, MODE_INFO *mi_base )
 {
+    int stride = cpi->mode_info_stride;
    int i;
-    vpx_memset(mi - cols - 2, 0, sizeof(MODE_INFO) * (cols + 1));

-    for (i = 0; i < rows; i++)
+    // Clear down top border row
+    vpx_memset(mi_base, 0, sizeof(MODE_INFO) * cpi->mode_info_stride);
+
+    // Clear left border column
+    for (i = 1; i < cpi->mb_rows+1; i++)
    {
-        /* TODO(holmer): Bug? This updates the last element of each row
-         * rather than the border element!
-         */
-        vpx_memset(&mi[i*cols-1], 0, sizeof(MODE_INFO));
+        vpx_memset(&mi_base[i*stride], 0, sizeof(MODE_INFO));
+    }
+}
+static void update_mode_info_in_image( VP8_COMMON *cpi, MODE_INFO *mi )
+{
+    int stride = cpi->mode_info_stride;
+    int rows = cpi->mb_rows;
+    int cols = cpi->mb_cols;
+    int i, j;
+
+    // For each in image mode_info element set the in image flag to 1
+    for (i = 0; i < cpi->mb_rows; i++)
+    {
+        for (j = 0; j < cpi->mb_cols; j++)
+        {
+            mi->mbmi.mb_in_image = 1;
+            mi++;   // Next element in the row
+        }
+
+        mi++;       // Step over border element at start of next row
    }
 }

@ -116,7 +136,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
    oci->mi = oci->mip + oci->mode_info_stride + 1;

    /* allocate memory for last frame MODE_INFO array */
-#if CONFIG_ERROR_CONCEALMENT
+
    oci->prev_mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));

    if (!oci->prev_mip)
@ -126,10 +146,6 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
    }

    oci->prev_mi = oci->prev_mip + oci->mode_info_stride + 1;
-#else
-    oci->prev_mip = NULL;
-    oci->prev_mi = NULL;
-#endif

    oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);

@ -139,10 +155,8 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
        return 1;
    }

-    update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols);
-#if CONFIG_ERROR_CONCEALMENT
-    update_mode_info_border(oci->prev_mi, oci->mb_rows, oci->mb_cols);
-#endif
+    update_mode_info_border(oci, oci->mip);
+    update_mode_info_in_image(oci, oci->mi);

    return 0;
 }
@ -153,10 +167,10 @@ void vp8_setup_version(VP8_COMMON *cm)
        if (!CONFIG_EXPERIMENTAL)
            vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
                               "Bitstream was created by an experimental "
-                               "encoder");        
+                               "encoder");
        cm->experimental = 1;
    }
-    
+
    switch (cm->version & 0x3)
    {
    case 0:
@ -172,17 +186,19 @@ void vp8_setup_version(VP8_COMMON *cm)
        cm->full_pixel = 0;
        break;
    case 2:
+    case 3:
        cm->no_lpf = 1;
        cm->filter_type = NORMAL_LOOPFILTER;
        cm->use_bilinear_mc_filter = 1;
        cm->full_pixel = 0;
        break;
-    case 3:
-        cm->no_lpf = 1;
-        cm->filter_type = SIMPLE_LOOPFILTER;
-        cm->use_bilinear_mc_filter = 1;
-        cm->full_pixel = 1;
-        break;
+    // Full pel only code deprecated in experimental code base
+    //case 3:
+    //    cm->no_lpf = 1;
+    //    cm->filter_type = SIMPLE_LOOPFILTER;
+    //    cm->use_bilinear_mc_filter = 1;
+    //    cm->full_pixel = 1;
+    //    break;
    }
 }
 void vp8_create_common(VP8_COMMON *oci)
@ -190,14 +206,16 @@ void vp8_create_common(VP8_COMMON *oci)
    vp8_machine_specific_config(oci);

    vp8_init_mbmode_probs(oci);
+
    vp8_default_bmode_probs(oci->fc.bmode_prob);

+    oci->txfm_mode = ONLY_4X4;
    oci->mb_no_coeff_skip = 1;
+    oci->comp_pred_mode = HYBRID_PREDICTION;
    oci->no_lpf = 0;
    oci->filter_type = NORMAL_LOOPFILTER;
    oci->use_bilinear_mc_filter = 0;
    oci->full_pixel = 0;
-    oci->multi_token_partition = ONE_PARTITION;
    oci->clr_type = REG_YUV;
    oci->clamp_type = RECON_CLAMP_REQUIRED;

@ -207,6 +225,9 @@ void vp8_create_common(VP8_COMMON *oci)
    /* Default disable buffer to buffer copying */
    oci->copy_buffer_to_gf = 0;
    oci->copy_buffer_to_arf = 0;
+#if CONFIG_QIMODE
+    oci->kf_ymode_probs_update = 0;
+#endif
 }

 void vp8_remove_common(VP8_COMMON *oci)
--- a/vp8/common/arm/arm_systemdependent.c
+++ b/vp8/common/arm/arm_systemdependent.c
@ -9,7 +9,7 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vpx_ports/arm.h"
 #include "vp8/common/g_common.h"
 #include "vp8/common/pragmas.h"
--- a/vp8/common/arm/filter_arm.c
+++ b/vp8/common/arm/filter_arm.c
@ -9,7 +9,7 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include <math.h>
 #include "vp8/common/filter.h"
 #include "vp8/common/subpixel.h"
--- a/vp8/common/arm/neon/recon_neon.c
+++ b/vp8/common/arm/neon/recon_neon.c
@ -9,7 +9,7 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vp8/common/recon.h"
 #include "vp8/common/blockd.h"

--- a/vp8/common/arm/reconintra_arm.c
+++ b/vp8/common/arm/reconintra_arm.c
@ -9,7 +9,7 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vp8/common/blockd.h"
 #include "vp8/common/reconintra.h"
 #include "vpx_mem/vpx_mem.h"
--- a/vp8/common/blockd.c
+++ b/vp8/common/blockd.c
@ -12,6 +12,7 @@
 #include "blockd.h"
 #include "vpx_mem/vpx_mem.h"

+
 const unsigned char vp8_block2left[25] =
 {
    0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
@ -20,3 +21,13 @@ const unsigned char vp8_block2above[25] =
 {
    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
 };
+
+const unsigned char vp8_block2left_8x8[25] =
+{
+    0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8
+};
+const unsigned char vp8_block2above_8x8[25] =
+{
+    0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8
+};
+
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@ -14,21 +14,26 @@

 void vpx_log(const char *format, ...);

-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vpx_scale/yv12config.h"
 #include "mv.h"
 #include "treecoder.h"
 #include "subpixel.h"
 #include "vpx_ports/mem.h"
+#include "common.h"

 #define TRUE    1
 #define FALSE   0

+//#define MODE_STATS
+
 /*#define DCPRED 1*/
 #define DCPREDSIMTHRESH 0
 #define DCPREDCNTTHRESH 3

 #define MB_FEATURE_TREE_PROBS   3
+#define PREDICTION_PROBS 3
+
 #define MAX_MB_SEGMENTS         4

 #define MAX_REF_LF_DELTAS       4
@ -60,11 +65,13 @@ typedef struct

 extern const unsigned char vp8_block2left[25];
 extern const unsigned char vp8_block2above[25];
+extern const unsigned char vp8_block2left_8x8[25];
+extern const unsigned char vp8_block2above_8x8[25];
+

 #define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \
    Dest = ((A)!=0) + ((B)!=0);

-
 typedef enum
 {
    KEY_FRAME = 0,
@ -77,6 +84,7 @@ typedef enum
    V_PRED,             /* vertical prediction */
    H_PRED,             /* horizontal prediction */
    TM_PRED,            /* Truemotion prediction */
+    I8X8_PRED,           /* 8x8 based prediction, each 8x8 has its own prediction mode */
    B_PRED,             /* block based prediction, each block has its own prediction mode */

    NEARESTMV,
@ -88,21 +96,32 @@ typedef enum
    MB_MODE_COUNT
 } MB_PREDICTION_MODE;

-/* Macroblock level features */
+// Segment level features.
 typedef enum
 {
-    MB_LVL_ALT_Q = 0,               /* Use alternate Quantizer .... */
-    MB_LVL_ALT_LF = 1,              /* Use alternate loop filter value... */
-    MB_LVL_MAX = 2                  /* Number of MB level features supported */
+    SEG_LVL_ALT_Q = 0,               // Use alternate Quantizer ....
+    SEG_LVL_ALT_LF = 1,              // Use alternate loop filter value...
+    SEG_LVL_REF_FRAME = 2,           // Optional Segment reference frame
+    SEG_LVL_MODE = 3,                // Optional Segment mode
+    SEG_LVL_EOB = 4,                 // EOB end stop marker.
+    SEG_LVL_TRANSFORM = 5,           // Block transform size.
+    SEG_LVL_MAX = 6                  // Number of MB level features supported

-} MB_LVL_FEATURES;
+} SEG_LVL_FEATURES;

-/* Segment Feature Masks */
-#define SEGMENT_ALTQ    0x01
-#define SEGMENT_ALT_LF  0x02
+// Segment level features.
+typedef enum
+{
+    TX_4X4 = 0,                      // 4x4 dct transform
+    TX_8X8 = 1,                      // 8x8 dct transform
+
+    TX_SIZE_MAX = 2                  // Number of differnt transforms avaialble
+
+} TX_SIZE;

 #define VP8_YMODES  (B_PRED + 1)
 #define VP8_UV_MODES (TM_PRED + 1)
+#define VP8_I8X8_MODES (TM_PRED + 1)

 #define VP8_MVREFS (1 + SPLITMV - NEARESTMV)

@ -139,7 +158,12 @@ typedef enum

 union b_mode_info
 {
-    B_PREDICTION_MODE as_mode;
+    struct {
+        B_PREDICTION_MODE first;
+#if CONFIG_COMP_INTRA_PRED
+        B_PREDICTION_MODE second;
+#endif
+    } as_mode;
    int_mv mv;
 };

@ -155,13 +179,26 @@ typedef enum
 typedef struct
 {
    MB_PREDICTION_MODE mode, uv_mode;
-    MV_REFERENCE_FRAME ref_frame;
-    int_mv mv;
-
+#if CONFIG_COMP_INTRA_PRED
+    MB_PREDICTION_MODE second_mode, second_uv_mode;
+#endif
+    MV_REFERENCE_FRAME ref_frame, second_ref_frame;
+    TX_SIZE txfm_size;
+    int_mv mv, second_mv;
    unsigned char partitioning;
    unsigned char mb_skip_coeff;                                /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
    unsigned char need_to_clamp_mvs;
    unsigned char segment_id;                  /* Which set of segmentation parameters should be used for this MB */
+
+    // Flags used for prediction status of various bistream signals
+    unsigned char seg_id_predicted;
+    unsigned char ref_predicted;
+
+    // Indicates if the mb is part of the image (1) vs border (0)
+    // This can be useful in determining whether the MB provides
+    // a valid predictor
+    unsigned char mb_in_image;
+
 } MB_MODE_INFO;

 typedef struct
@ -205,8 +242,12 @@ typedef struct MacroBlockD
    int fullpixel_mask;

    YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
+    struct {
+        uint8_t *y_buffer, *u_buffer, *v_buffer;
+    } second_pre;
    YV12_BUFFER_CONFIG dst;

+    MODE_INFO *prev_mode_info_context;
    MODE_INFO *mode_info_context;
    int mode_info_stride;

@ -229,13 +270,24 @@ typedef struct MacroBlockD
    unsigned char update_mb_segmentation_data;

    /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
-    unsigned char mb_segement_abs_delta;
+    unsigned char mb_segment_abs_delta;

    /* Per frame flags that define which MB level features (such as quantizer or loop filter level) */
    /* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */
-    vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS];         /* Probability Tree used to code Segment number */

-    signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];            /* Segment parameters */
+    // Probability Tree used to code Segment number
+    vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS];
+
+
+    // Segment features
+    signed char segment_feature_data[MAX_MB_SEGMENTS][SEG_LVL_MAX];
+    unsigned int segment_feature_mask[MAX_MB_SEGMENTS];
+
+#if CONFIG_FEATUREUPDATES
+    // keep around the last set so we can figure out what updates...
+    unsigned int old_segment_feature_mask[MAX_MB_SEGMENTS];
+    signed char old_segment_feature_data[MAX_MB_SEGMENTS][SEG_LVL_MAX];
+#endif

    /* mode_based Loop filter adjustment */
    unsigned char mode_ref_lf_delta_enabled;
@ -244,8 +296,8 @@ typedef struct MacroBlockD
    /* Delta values have the range +/- MAX_LOOP_FILTER */
    signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS];                /* 0 = Intra, Last, GF, ARF */
    signed char ref_lf_deltas[MAX_REF_LF_DELTAS];                     /* 0 = Intra, Last, GF, ARF */
-    signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];                      /* 0 = BPRED, ZERO_MV, MV, SPLIT */
-    signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];                           /* 0 = BPRED, ZERO_MV, MV, SPLIT */
+    signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];              /* 0 = BPRED, ZERO_MV, MV, SPLIT */
+    signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];                   /* 0 = BPRED, ZERO_MV, MV, SPLIT */

    /* Distance of MB away from frame edges */
    int mb_to_left_edge;
@ -253,15 +305,17 @@ typedef struct MacroBlockD
    int mb_to_top_edge;
    int mb_to_bottom_edge;

-    int ref_frame_cost[MAX_REF_FRAMES];
-
-
    unsigned int frames_since_golden;
    unsigned int frames_till_alt_ref_frame;
    vp8_subpix_fn_t  subpixel_predict;
    vp8_subpix_fn_t  subpixel_predict8x4;
    vp8_subpix_fn_t  subpixel_predict8x8;
    vp8_subpix_fn_t  subpixel_predict16x16;
+    vp8_subpix_fn_t  subpixel_predict_avg8x8;
+    vp8_subpix_fn_t  subpixel_predict_avg16x16;
+#if CONFIG_HIGH_PRECISION_MV
+    int allow_high_precision_mv;
+#endif /* CONFIG_HIGH_PRECISION_MV */

    void *current_bc;

@ -284,4 +338,20 @@ typedef struct MacroBlockD
 extern void vp8_build_block_doffsets(MACROBLOCKD *x);
 extern void vp8_setup_block_dptrs(MACROBLOCKD *x);

+static void update_blockd_bmi(MACROBLOCKD *xd)
+{
+    int i;
+    int is_4x4;
+    is_4x4 = (xd->mode_info_context->mbmi.mode == SPLITMV) ||
+             (xd->mode_info_context->mbmi.mode == I8X8_PRED) ||
+             (xd->mode_info_context->mbmi.mode == B_PRED);
+
+    if (is_4x4)
+    {
+        for (i = 0; i < 16; i++)
+        {
+            xd->block[i].bmi = xd->mode_info_context->bmi[i];
+        }
+    }
+}
 #endif  /* __INC_BLOCKD_H */
--- a/vp8/common/coefupdateprobs.h
+++ b/vp8/common/coefupdateprobs.h
@ -183,3 +183,178 @@ const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTE
        },
    },
 };
+const vp8_prob vp8_coef_update_probs_8x8 [BLOCK_TYPES]
+                                         [COEF_BANDS]
+                                         [PREV_COEF_CONTEXTS]
+                                         [ENTROPY_NODES] =
+{
+    {
+      {
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 229, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {219, 234, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {239, 204, 229, 255, 255, 255, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 209, 229, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {239, 219, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 204, 229, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {229, 209, 234, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 193, 209, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {229, 198, 239, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 204, 204, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {219, 198, 229, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 198, 204, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {209, 193, 234, 249, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 249, 255, 255, 255, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 214, 214, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {173, 193, 234, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {249, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+      },
+    },
+    {
+      {
+        {255, 255, 234, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {224, 224, 219, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {229, 239, 234, 255, 255, 255, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 234, 224, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {224, 234, 234, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 255, 229, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {229, 255, 234, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 255, 229, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {224, 255, 239, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+      },
+    },
+    {
+      {
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {224, 219, 234, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {234, 183, 214, 255, 255, 255, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 193, 229, 255, 249, 255, 255, 255, 255, 255, 255, },
+        {229, 214, 234, 249, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 249, 255, 255, 249, 255, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 198, 229, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {229, 219, 249, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 255, 249, 255, 255, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 193, 224, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {229, 204, 234, 249, 249, 255, 255, 255, 255, 255, 255, },
+        {255, 249, 249, 255, 244, 249, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 178, 224, 255, 249, 255, 255, 255, 255, 255, 255, },
+        {234, 224, 234, 249, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 183, 229, 255, 249, 255, 255, 255, 255, 255, 255, },
+        {234, 219, 234, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 249, 249, 255, 249, 255, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 193, 224, 249, 255, 244, 255, 255, 255, 255, 255, },
+        {219, 224, 229, 255, 255, 249, 255, 255, 255, 255, 255, },
+        {255, 255, 255, 249, 249, 255, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 193, 229, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {224, 224, 239, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {249, 244, 249, 255, 255, 255, 255, 255, 255, 255, 255, },
+      },
+    },
+    {
+      {
+        {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {249, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        {255, 239, 234, 244, 239, 244, 249, 255, 255, 255, 255, },
+      },
+      {
+        {255, 249, 239, 239, 244, 255, 255, 255, 255, 255, 255, },
+        {255, 249, 244, 255, 249, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 239, 255, 255, 249, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 244, 239, 239, 244, 255, 255, 255, 255, 255, 255, },
+        {255, 234, 239, 234, 249, 255, 255, 255, 255, 255, 255, },
+        {255, 255, 229, 239, 234, 249, 244, 255, 255, 255, 255, },
+      },
+      {
+        {255, 239, 229, 239, 234, 234, 255, 255, 255, 255, 255, },
+        {255, 239, 234, 229, 244, 239, 255, 234, 255, 255, 255, },
+        {255, 229, 209, 229, 239, 234, 244, 229, 255, 249, 255, },
+      },
+      {
+        {255, 239, 234, 229, 244, 249, 255, 249, 255, 255, 255, },
+        {255, 234, 229, 244, 234, 249, 255, 249, 255, 255, 255, },
+        {255, 229, 239, 229, 249, 255, 255, 244, 255, 255, 255, },
+      },
+      {
+        {255, 239, 234, 239, 234, 239, 255, 249, 255, 255, 255, },
+        {255, 229, 234, 239, 239, 239, 255, 244, 255, 255, 255, },
+        {255, 229, 234, 239, 239, 244, 255, 255, 255, 255, 255, },
+      },
+      {
+        {255, 219, 224, 229, 229, 234, 239, 224, 255, 255, 255, },
+        {255, 229, 229, 224, 234, 229, 239, 239, 255, 255, 255, },
+        {255, 229, 224, 239, 234, 239, 224, 224, 255, 249, 255, },
+      },
+      {
+        {255, 234, 229, 244, 229, 229, 255, 214, 255, 255, 255, },
+        {255, 239, 234, 239, 214, 239, 255, 209, 255, 255, 255, },
+        {249, 239, 219, 209, 219, 224, 239, 204, 255, 255, 255, },
+      },
+    },
+
+};
--- a/vp8/common/common.h
+++ b/vp8/common/common.h
@ -13,7 +13,7 @@
 #define common_h 1

 #include <assert.h>
-
+#include "vpx_config.h"
 /* Interface header for common constant data structures and lookup tables */

 #include "vpx_mem/vpx_mem.h"
@ -38,5 +38,4 @@

 #define vp8_zero_array( Dest, N)  vpx_memset( Dest, 0, N * sizeof( *Dest));

-
 #endif  /* common_h */
--- a/vp8/common/debugmodes.c
+++ b/vp8/common/debugmodes.c
@ -97,7 +97,12 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
                bindex = (b_row & 3) * 4 + (b_col & 3);

                if (mi[mb_index].mbmi.mode == B_PRED)
-                    fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode);
+                {
+                    fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode.first);
+#if CONFIG_COMP_INTRA_PRED
+                    fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode.second);
+#endif
+                }
                else
                    fprintf(mvs, "xx ");

--- a/vp8/common/defaultcoefcounts.h
+++ b/vp8/common/defaultcoefcounts.h
@ -0,0 +1,187 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/* Generated file, included by entropy.c */
+
+static const unsigned int vp8_default_coef_counts_8x8[BLOCK_TYPES]
+                                              [COEF_BANDS]
+                                              [PREV_COEF_CONTEXTS]
+                                              [MAX_ENTROPY_TOKENS] =
+{
+
+    { /* block Type 0 */
+      { /* Coeff Band 0 */
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+      },
+      { /* Coeff Band 1 */
+        { 21041, 13314, 3420, 592, 117, 0, 0, 0, 0, 0, 0, 11783},
+        { 48236, 6918, 586, 153, 0, 0, 0, 0, 0, 0, 0, 23137},
+        { 676112, 106685, 24701, 6003, 1426, 429, 165, 0, 0, 0, 0, 28910}
+      },
+      { /* Coeff Band 2 */
+        { 660107, 75227, 8451, 1345, 259, 0, 0, 0, 0, 0, 0, 0},
+        { 79164, 36835, 6865, 1185, 246, 47, 0, 0, 0, 0, 0, 2575},
+        { 19469, 14330, 3070, 579, 94, 6, 0, 0, 0, 0, 0, 44}
+      },
+      { /* Coeff Band 3 */
+        { 1978004, 235343, 28485, 3242, 271, 0, 0, 0, 0, 0, 0, 0},
+        { 228684, 106736, 21431, 2842, 272, 46, 0, 0, 0, 0, 0, 9266},
+        { 32470, 27496, 6852, 1386, 45, 93, 0, 0, 0, 0, 0, 0}
+      },
+      { /* Coeff Band 4 */
+        { 1911212, 224613, 49653, 13748, 2541, 568, 48, 0, 0, 0, 0, 0},
+        { 196670, 103472, 44473, 11490, 2432, 977, 72, 0, 0, 0, 0, 9447},
+        { 37876, 40417, 19142, 6069, 1799, 727, 51, 0, 0, 0, 0, 0}
+      },
+      { /* Coeff Band 5 */
+        { 3813399, 437714, 64387, 11312, 695, 219, 0, 0, 0, 0, 0, 0},
+        { 438288, 215917, 61905, 10194, 674, 107, 0, 0, 0, 0, 0, 17808},
+        { 99139, 93643, 30054, 5758, 802, 171, 0, 0, 0, 0, 0, 0}
+      },
+      { /* Coeff Band 6 */
+        { 12259383, 1625505, 234927, 46306, 8417, 1456, 151, 0, 0, 0, 0, 0},
+        { 1518161, 734287, 204240, 44228, 9462, 2240, 65, 0, 0, 0, 0, 107630},
+        { 292470, 258894, 94925, 25864, 6662, 2055, 170, 0, 0, 0, 0, 0}
+      },
+      { /* Coeff Band 7 */
+        { 9791308, 2118949, 169439, 16735, 1122, 0, 0, 0, 0, 0, 0, 0},
+        { 1500281, 752410, 123259, 13065, 1168, 47, 0, 0, 0, 0, 0, 707182},
+        { 193067, 142638, 31018, 4719, 516, 138, 0, 0, 0, 0, 0, 12439}
+      }
+    },
+    { /* block Type 1 */
+      { /* Coeff Band 0 */
+        { 16925, 10553, 852, 16, 63, 87, 47, 0, 0, 0, 0, 31232},
+        { 39777, 26839, 6822, 1908, 678, 456, 227, 168, 35, 0, 0, 46825},
+        { 17300, 16666, 4168, 1209, 492, 154, 118, 207, 0, 0, 0, 19608}
+      },
+      { /* Coeff Band 1 */
+        { 35882, 31722, 4625, 1270, 266, 237, 0, 0, 0, 0, 0, 0},
+        { 15426, 13894, 4482, 1305, 281, 43, 0, 0, 0, 0, 0, 18627},
+        { 3900, 6552, 3472, 1723, 746, 366, 115, 35, 0, 0, 0, 798}
+      },
+      { /* Coeff Band 2 */
+        { 21998, 29132, 3353, 679, 46, 0, 0, 0, 0, 0, 0, 0},
+        { 9098, 15767, 3794, 792, 268, 47, 0, 0, 0, 0, 0, 22402},
+        { 4007, 8472, 2844, 687, 217, 0, 0, 0, 0, 0, 0, 2739}
+      },
+      { /* Coeff Band 3 */
+        { 0, 31414, 2911, 682, 96, 0, 0, 0, 0, 0, 0, 0},
+        { 0, 16515, 4425, 938, 124, 0, 0, 0, 0, 0, 0, 31369},
+        { 0, 4833, 2787, 1213, 150, 0, 0, 0, 0, 0, 0, 3744}
+      },
+      { /* Coeff Band 4 */
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+      },
+      { /* Coeff Band 5 */
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+      },
+      { /* Coeff Band 6 */
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52762},
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13326}
+      },
+      { /* Coeff Band 7 */
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+      }
+    },
+    { /* block Type 2 */
+      { /* Coeff Band 0 */
+        { 4444, 1614, 120, 48, 0, 48, 0, 0, 0, 0, 0, 278},
+        { 192436, 103730, 24494, 9845, 4122, 1193, 102, 0, 0, 0, 0, 2577},
+        { 3473446, 2308716, 815510, 370374, 167797, 92152, 12073, 86, 0, 0, 0, 6801}
+      },
+      { /* Coeff Band 1 */
+        { 2150616, 1136388, 250011, 86888, 31434, 13746, 1243, 0, 0, 0, 0, 0},
+        { 1179945, 799802, 266012, 106787, 40809, 16486, 1546, 0, 0, 0, 0, 2673},
+        { 465128, 504130, 286989, 146259, 62380, 30192, 2866, 20, 0, 0, 0, 0}
+      },
+      { /* Coeff Band 2 */
+        { 2157762, 1177519, 282665, 108499, 43389, 23224, 2597, 34, 0, 0, 0, 0},
+        { 1135685, 813705, 278079, 123255, 53935, 29492, 3152, 39, 0, 0, 0, 2978},
+        { 391894, 428037, 264216, 144306, 69326, 40281, 5541, 29, 0, 0, 0, 38}
+      },
+      { /* Coeff Band 3 */
+        { 6669109, 3468471, 782161, 288484, 115500, 51083, 4943, 41, 0, 0, 0, 0},
+        { 3454493, 2361636, 809524, 337663, 141343, 65036, 6361, 0, 0, 0, 0, 8730},
+        { 1231825, 1359522, 824686, 420784, 185517, 98731, 10973, 72, 0, 0, 0, 20}
+      },
+      { /* Coeff Band 4 */
+        { 7606203, 3452846, 659856, 191703, 49335, 14336, 450, 0, 0, 0, 0, 0},
+        { 3806506, 2379332, 691697, 224938, 61966, 18324, 766, 0, 0, 0, 0, 8193},
+        { 1270110, 1283728, 628775, 243378, 72617, 24897, 1087, 0, 0, 0, 0, 0}
+      },
+      { /* Coeff Band 5 */
+        { 15314169, 7436809, 1579928, 515790, 167453, 58305, 3502, 19, 0, 0, 0, 0},
+        { 7021286, 4667922, 1545706, 574463, 191793, 68748, 4048, 1, 0, 0, 0, 17222},
+        { 2011989, 2145878, 1185336, 534879, 195719, 79103, 5343, 4, 0, 0, 0, 37}
+      },
+      { /* Coeff Band 6 */
+        { 63458382, 25384462, 4208045, 1091050, 299011, 95242, 5238, 33, 0, 0, 0, 0},
+        { 25638401, 14694085, 3945978, 1195420, 344813, 117355, 6703, 0, 0, 0, 0, 216811},
+        { 5988177, 5824044, 2754413, 1077350, 370739, 139710, 9693, 38, 0, 0, 0, 1835}
+      },
+      { /* Coeff Band 7 */
+        { 74998348, 29342158, 2955001, 452912, 69631, 9516, 37, 0, 0, 0, 0, 0},
+        { 24762356, 13281085, 2409883, 436787, 68948, 10658, 36, 0, 0, 0, 0, 6614989},
+        { 3882867, 3224489, 1052289, 252890, 46967, 8548, 154, 0, 0, 0, 0, 194354}
+      }
+    },
+    { /* block Type 3 */
+      { /* Coeff Band 0 */
+        { 10583, 12059, 3155, 1041, 248, 175, 24, 2, 0, 0, 0, 5717},
+        { 42461, 41782, 13553, 4966, 1352, 855, 89, 0, 0, 0, 0, 15000},
+        { 4691125, 5045589, 2673566, 1089317, 378161, 160268, 18252, 813, 69, 13, 0, 49}
+      },
+      { /* Coeff Band 1 */
+        { 1535203, 1685686, 924565, 390329, 141709, 60523, 5983, 171, 0, 0, 0, 0},
+        { 1594021, 1793276, 1016078, 441332, 164159, 70843, 8098, 311, 0, 0, 0, 11312},
+        { 1225223, 1430184, 888492, 460713, 203286, 115149, 22061, 804, 7, 0, 0, 0}
+      },
+      { /* Coeff Band 2 */
+        { 1522386, 1590366, 799910, 303691, 96625, 37608, 3637, 180, 33, 11, 0, 0},
+        { 1682184, 1793869, 913649, 353520, 113674, 46309, 4736, 221, 18, 3, 0, 963},
+        { 1574580, 1740474, 954392, 417994, 151400, 67091, 8000, 536, 73, 10, 0, 63}
+      },
+      { /* Coeff Band 3 */
+        { 4963672, 5197790, 2585383, 982161, 313333, 118498, 16014, 536, 62, 0, 0, 0},
+        { 5223913, 5569803, 2845858, 1107384, 364949, 147841, 18296, 658, 11, 11, 0, 1866},
+        { 4042207, 4548894, 2608767, 1154993, 446290, 221295, 41054, 2438, 124, 20, 0, 0}
+      },
+      { /* Coeff Band 4 */
+        { 3857216, 4431325, 2670447, 1330169, 553301, 286825, 46763, 1917, 0, 0, 0, 0},
+        { 4226215, 4963701, 3046198, 1523923, 644670, 355519, 58792, 2525, 0, 0, 0, 1298},
+        { 3831873, 4580350, 3018580, 1660048, 797298, 502983, 123906, 7172, 16, 0, 0, 0}
+      },
+      { /* Coeff Band 5 */
+        { 8524543, 9285149, 4979435, 2039330, 683458, 266032, 22628, 270, 0, 0, 0, 0},
+        { 9432163, 10428088, 5715661, 2385738, 838389, 326264, 29981, 361, 0, 0, 0, 884},
+        { 9039066, 10368964, 6136765, 2862030, 1098269, 511668, 63105, 945, 14, 0, 0, 0}
+      },
+      { /* Coeff Band 6 */
+        { 33222872, 34748297, 17701695, 7214933, 2602336, 1191859, 187873, 12667, 390, 3, 0, 0},
+        { 34765051, 37140719, 19525578, 8268934, 3085012, 1473864, 246743, 15258, 736, 3, 0, 8403},
+        { 28591289, 32252393, 19037068, 9213729, 4020653, 2372354, 586420, 67428, 3920, 92, 7, 3}
+      },
+      { /* Coeff Band 7 */
+        { 68604786, 60777665, 19712887, 5656955, 1520443, 507166, 51829, 2466, 10, 0, 0, 0},
+        { 55447403, 51682540, 19008774, 5928582, 1706884, 595531, 65998, 3661, 101, 0, 0, 8468343},
+        { 28321970, 29149398, 13565882, 5258675, 1868588, 898041, 192023, 21497, 672, 17, 0, 1884921}
+      }
+    }
+  };
--- a/vp8/common/entropy.c
+++ b/vp8/common/entropy.c
@ -60,6 +60,22 @@ DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) =
    9, 12, 13, 10,
    7, 11, 14, 15,
 };
+DECLARE_ALIGNED(64, cuchar, vp8_coef_bands_8x8[64]) = { 0, 1, 2, 3, 5, 4, 4, 5,
+                                                        5, 3, 6, 3, 5, 4, 6, 6,
+                                                        6, 5, 5, 6, 6, 6, 6, 6,
+                                                        6, 6, 6, 6, 6, 6, 6, 6,
+                                                        6, 6, 6, 6, 7, 7, 7, 7,
+                                                        7, 7, 7, 7, 7, 7, 7, 7,
+                                                        7, 7, 7, 7, 7, 7, 7, 7,
+                                                        7, 7, 7, 7, 7, 7, 7, 7
+};
+DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]) =
+{
+    0,  1,  8, 16,  9,  2,  3, 10, 17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
+};

 DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) =
 {
@ -70,8 +86,7 @@ DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) =
 };

 DECLARE_ALIGNED(16, short, vp8_default_zig_zag_mask[16]);
-
-const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6};
+DECLARE_ALIGNED(64, short, vp8_default_zig_zag_mask_8x8[64]);//int64_t

 /* Array indices are identical to previously-existing CONTEXT_NODE indices */

@ -113,7 +128,10 @@ void vp8_init_scan_order_mask()
    {
        vp8_default_zig_zag_mask[vp8_default_zig_zag1d[i]] = 1 << i;
    }
-
+    for (i = 0; i < 64; i++)
+    {
+        vp8_default_zig_zag_mask_8x8[vp8_default_zig_zag1d_8x8[i]] = 1 << i;
+    }
 }

 static void init_bit_tree(vp8_tree_index *p, int n)
@ -156,11 +174,37 @@ vp8_extra_bit_struct vp8_extra_bits[12] =
 };

 #include "default_coef_probs.h"
+#include "defaultcoefcounts.h"

 void vp8_default_coef_probs(VP8_COMMON *pc)
 {
+    int h;
    vpx_memcpy(pc->fc.coef_probs, default_coef_probs,
                   sizeof(default_coef_probs));
+    h = 0;
+    do
+    {
+        int i = 0;
+
+        do
+        {
+            int k = 0;
+
+            do
+            {
+                unsigned int branch_ct [ENTROPY_NODES] [2];
+                vp8_tree_probs_from_distribution(
+                    MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
+                    pc->fc.coef_probs_8x8 [h][i][k], branch_ct, vp8_default_coef_counts_8x8 [h][i][k],
+                    256, 1);
+
+            }
+            while (++k < PREV_COEF_CONTEXTS);
+        }
+        while (++i < COEF_BANDS);
+    }
+    while (++h < BLOCK_TYPES);
+
 }

 void vp8_coef_tree_initialize()
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@ -14,7 +14,7 @@

 #include "treecoder.h"
 #include "blockd.h"
-
+#include "common.h"
 /* Coefficient token alphabet */

 #define ZERO_TOKEN              0       /* 0         Extra Bits 0+0 */
@ -52,7 +52,6 @@ extern vp8_extra_bit_struct vp8_extra_bits[12];    /* indexed by token value */
 #define MAX_PROB                255
 #define DCT_MAX_VALUE           8192

-
 /* Coefficients are predicted via a 3-dimensional probability table. */

 /* Outside dimension.  0 = Y no DC, 1 = Y2, 2 = UV, 3 = Y with DC */
@ -64,6 +63,7 @@ extern vp8_extra_bit_struct vp8_extra_bits[12];    /* indexed by token value */

 #define COEF_BANDS 8
 extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]);
+extern DECLARE_ALIGNED(64, const unsigned char, vp8_coef_bands_8x8[64]);

 /* Inside dimension is 3-valued measure of nearby complexity, that is,
   the extent to which nearby coefficients are nonzero.  For the first
@ -87,15 +87,14 @@ extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]);
 extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]);

 extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
-
+extern const vp8_prob vp8_coef_update_probs_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];

 struct VP8Common;
 void vp8_default_coef_probs(struct VP8Common *);
-
 extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]);
 extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]);
 extern short vp8_default_zig_zag_mask[16];
-extern const int vp8_mb_feature_data_bits[MB_LVL_MAX];
-
+extern DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]);
+extern short vp8_default_zig_zag_mask_8x8[64];//int64_t
 void vp8_coef_tree_initialize(void);
 #endif
--- a/vp8/common/entropymode.c
+++ b/vp8/common/entropymode.c
@ -9,15 +9,59 @@
 */


+#include "modecont.h"
 #include "entropymode.h"
 #include "entropy.h"
 #include "vpx_mem/vpx_mem.h"

-static const unsigned int kf_y_mode_cts[VP8_YMODES] = { 1607, 915, 812, 811, 5455};
-static const unsigned int y_mode_cts  [VP8_YMODES] = { 8080, 1908, 1582, 1007, 5874};

+#if CONFIG_QIMODE
+const unsigned int kf_y_mode_cts[8][VP8_YMODES] =
+{
+    {17,  6,  5,  2, 22, 203},
+    {27, 13, 13,  6, 27, 170},
+    {35, 17, 18,  9, 26, 152},
+    {45, 22, 24, 12, 27, 126},
+    {58, 26, 29, 13, 26, 104},
+    {73, 33, 36, 17, 20,  78},
+    {88, 38, 39, 19, 16,  57},
+    {99, 42, 43, 21, 12,  39},
+};
+#else
+static const unsigned int kf_y_mode_cts[VP8_YMODES] = {
+    49, 22, 23, 11, 23, 128};
+#endif
+
+static const unsigned int y_mode_cts  [VP8_YMODES] = {
+    106,  25, 21, 13, 16, 74};
+
+#if CONFIG_UVINTRA
+static const unsigned int uv_mode_cts [VP8_YMODES] [VP8_UV_MODES] ={
+    { 210, 20, 20,  6},
+    { 180, 60, 10,  6},
+    { 150, 20, 80,  6},
+    { 170, 35, 35, 16},
+    { 142, 51, 45, 18}, /* never used */
+    { 160, 40, 46, 10},
+};
+#else
 static const unsigned int uv_mode_cts  [VP8_UV_MODES] = { 59483, 13605, 16492, 4230};
+#endif
+
+static const unsigned int i8x8_mode_cts  [VP8_UV_MODES] = {93, 69, 81, 13};
+
+#if CONFIG_UVINTRA
+static const unsigned int kf_uv_mode_cts [VP8_YMODES] [VP8_UV_MODES] ={
+    { 180, 34, 34,  8},
+    { 132, 74, 40, 10},
+    { 132, 40, 74, 10},
+    { 152, 46, 40, 18},
+    { 142, 51, 45, 18}, /* never used */
+    { 142, 51, 45, 18},
+};
+#else
 static const unsigned int kf_uv_mode_cts[VP8_UV_MODES] = { 5319, 1904, 1703, 674};
+#endif

 static const unsigned int bmode_cts[VP8_BINTRAMODES] =
 {
@ -117,23 +161,30 @@ const vp8_tree_index vp8_bmode_tree[18] =     /* INTRAMODECONTEXTNODE value */

 /* Again, these trees use the same probability indices as their
   explicitly-programmed predecessors. */
-
-const vp8_tree_index vp8_ymode_tree[8] =
+const vp8_tree_index vp8_ymode_tree[10] =
 {
    -DC_PRED, 2,
    4, 6,
    -V_PRED, -H_PRED,
-    -TM_PRED, -B_PRED
+    -TM_PRED, 8,
+    -B_PRED, -I8X8_PRED
 };

-const vp8_tree_index vp8_kf_ymode_tree[8] =
+const vp8_tree_index vp8_kf_ymode_tree[10] =
 {
    -B_PRED, 2,
    4, 6,
    -DC_PRED, -V_PRED,
-    -H_PRED, -TM_PRED
+    -H_PRED, 8,
+    -TM_PRED, -I8X8_PRED
 };

+const vp8_tree_index vp8_i8x8_mode_tree[6] =
+{
+    -DC_PRED, 2,
+    -V_PRED, 4,
+    -H_PRED, -TM_PRED
+};
 const vp8_tree_index vp8_uv_mode_tree[6] =
 {
    -DC_PRED, 2,
@ -168,11 +219,33 @@ struct vp8_token_struct vp8_bmode_encodings   [VP8_BINTRAMODES];
 struct vp8_token_struct vp8_ymode_encodings   [VP8_YMODES];
 struct vp8_token_struct vp8_kf_ymode_encodings [VP8_YMODES];
 struct vp8_token_struct vp8_uv_mode_encodings  [VP8_UV_MODES];
+struct vp8_token_struct vp8_i8x8_mode_encodings  [VP8_UV_MODES];
 struct vp8_token_struct vp8_mbsplit_encodings [VP8_NUMMBSPLITS];

 struct vp8_token_struct vp8_mv_ref_encoding_array    [VP8_MVREFS];
 struct vp8_token_struct vp8_sub_mv_ref_encoding_array [VP8_SUBMVREFS];

+#if CONFIG_HIGH_PRECISION_MV
+const vp8_tree_index vp8_small_mvtree_hp [30] =
+{
+     2,  16,
+     4,  10,
+     6,   8,
+    -0,  -1,
+    -2,  -3,
+    12,  14,
+    -4,  -5,
+    -6,  -7,
+    18,  24,
+    20,  22,
+    -8,  -9,
+   -10, -11,
+    26,  28,
+   -12, -13,
+   -14, -15
+};
+struct vp8_token_struct vp8_small_mvencodings_hp [16];
+#endif  /* CONFIG_HIGH_PRECISION_MV */

 const vp8_tree_index vp8_small_mvtree [14] =
 {
@ -184,9 +257,10 @@ const vp8_tree_index vp8_small_mvtree [14] =
    -4, -5,
    -6, -7
 };
-
 struct vp8_token_struct vp8_small_mvencodings [8];

+
+
 void vp8_init_mbmode_probs(VP8_COMMON *x)
 {
    unsigned int bct [VP8_YMODES] [2];      /* num Ymodes > num UV modes */
@ -196,22 +270,56 @@ void vp8_init_mbmode_probs(VP8_COMMON *x)
        x->fc.ymode_prob, bct, y_mode_cts,
        256, 1
    );
+#if CONFIG_QIMODE
+    {
+        int i;
+        for (i=0;i<8;i++)
+        vp8_tree_probs_from_distribution(
+            VP8_YMODES, vp8_kf_ymode_encodings, vp8_kf_ymode_tree,
+            x->kf_ymode_prob[i], bct, kf_y_mode_cts[i],
+            256, 1
+            );
+    }
+#else
    vp8_tree_probs_from_distribution(
        VP8_YMODES, vp8_kf_ymode_encodings, vp8_kf_ymode_tree,
        x->kf_ymode_prob, bct, kf_y_mode_cts,
        256, 1
    );
+#endif
+#if CONFIG_UVINTRA
+    {
+        int i;
+        for (i=0;i<VP8_YMODES;i++)
+        {
+            vp8_tree_probs_from_distribution(
+                VP8_UV_MODES, vp8_uv_mode_encodings, vp8_uv_mode_tree,
+                x->kf_uv_mode_prob[i], bct, kf_uv_mode_cts[i],
+                256, 1);
+            vp8_tree_probs_from_distribution(
+                VP8_UV_MODES, vp8_uv_mode_encodings, vp8_uv_mode_tree,
+                x->fc.uv_mode_prob[i], bct, uv_mode_cts[i],
+                256, 1);
+        }
+    }
+#else
    vp8_tree_probs_from_distribution(
        VP8_UV_MODES, vp8_uv_mode_encodings, vp8_uv_mode_tree,
        x->fc.uv_mode_prob, bct, uv_mode_cts,
-        256, 1
-    );
+        256, 1);
+
    vp8_tree_probs_from_distribution(
        VP8_UV_MODES, vp8_uv_mode_encodings, vp8_uv_mode_tree,
        x->kf_uv_mode_prob, bct, kf_uv_mode_cts,
+        256, 1);
+#endif
+    vp8_tree_probs_from_distribution(
+        VP8_UV_MODES, vp8_i8x8_mode_encodings, vp8_i8x8_mode_tree,
+        x->i8x8_mode_prob, bct, i8x8_mode_cts,
        256, 1
-    );
+        );
    vpx_memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob));
+
 }


@ -262,6 +370,7 @@ void vp8_entropy_mode_init()
    vp8_tokens_from_tree(vp8_ymode_encodings,   vp8_ymode_tree);
    vp8_tokens_from_tree(vp8_kf_ymode_encodings, vp8_kf_ymode_tree);
    vp8_tokens_from_tree(vp8_uv_mode_encodings,  vp8_uv_mode_tree);
+    vp8_tokens_from_tree(vp8_i8x8_mode_encodings,  vp8_i8x8_mode_tree);
    vp8_tokens_from_tree(vp8_mbsplit_encodings, vp8_mbsplit_tree);

    vp8_tokens_from_tree_offset(vp8_mv_ref_encoding_array,
@ -270,4 +379,138 @@ void vp8_entropy_mode_init()
                                vp8_sub_mv_ref_tree, LEFT4X4);

    vp8_tokens_from_tree(vp8_small_mvencodings, vp8_small_mvtree);
+#if CONFIG_HIGH_PRECISION_MV
+    vp8_tokens_from_tree(vp8_small_mvencodings_hp, vp8_small_mvtree_hp);
+#endif
+}
+
+void vp8_init_mode_contexts(VP8_COMMON *pc)
+{
+    vpx_memset(pc->mv_ref_ct, 0, sizeof(pc->mv_ref_ct));
+    vpx_memset(pc->mv_ref_ct_a, 0, sizeof(pc->mv_ref_ct_a));
+
+    vpx_memcpy( pc->mode_context,
+                default_vp8_mode_contexts,
+                sizeof (pc->mode_context));
+    vpx_memcpy( pc->mode_context_a,
+                default_vp8_mode_contexts,
+                sizeof (pc->mode_context_a));
+
+}
+
+void vp8_accum_mv_refs(VP8_COMMON *pc,
+                       MB_PREDICTION_MODE m,
+                       const int ct[4])
+{
+    int (*mv_ref_ct)[4][2];
+
+    if(pc->refresh_alt_ref_frame)
+        mv_ref_ct = pc->mv_ref_ct_a;
+    else
+        mv_ref_ct = pc->mv_ref_ct;
+
+    if (m == ZEROMV)
+    {
+        ++mv_ref_ct [ct[0]] [0] [0];
+    }
+    else
+    {
+        ++mv_ref_ct [ct[0]] [0] [1];
+        if (m == NEARESTMV)
+        {
+            ++mv_ref_ct [ct[1]] [1] [0];
+        }
+        else
+        {
+            ++mv_ref_ct [ct[1]] [1] [1];
+            if (m == NEARMV)
+            {
+                ++mv_ref_ct [ct[2]] [2] [0];
+            }
+            else
+            {
+                ++mv_ref_ct [ct[2]] [2] [1];
+                if (m == NEWMV)
+                {
+                    ++mv_ref_ct [ct[3]] [3] [0];
+                }
+                else
+                {
+                    ++mv_ref_ct [ct[3]] [3] [1];
+                }
+            }
+        }
+    }
+}
+
+void vp8_update_mode_context(VP8_COMMON *pc)
+{
+    int i, j;
+    int (*mv_ref_ct)[4][2];
+    int (*mode_context)[4];
+
+    if(pc->refresh_alt_ref_frame)
+    {
+        mv_ref_ct = pc->mv_ref_ct_a;
+        mode_context = pc->mode_context_a;
+    }
+    else
+    {
+        mv_ref_ct = pc->mv_ref_ct;
+        mode_context = pc->mode_context;
+    }
+
+    for (j = 0; j < 6; j++)
+    {
+        for (i = 0; i < 4; i++)
+        {
+            int this_prob;
+            int count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
+            /* preventing rare occurances from skewing the probs */
+            if (count>=4)
+            {
+                this_prob = 256 * mv_ref_ct[j][i][0] / count;
+                this_prob = this_prob? (this_prob<255?this_prob:255):1;
+                mode_context[j][i] = this_prob;
+            }
+        }
+    }
+}
+#include "vp8/common/modecont.h"
+void print_mode_contexts(VP8_COMMON *pc)
+{
+    int j, i;
+    printf("====================\n");
+    for(j=0; j<6; j++)
+    {
+        for (i = 0; i < 4; i++)
+        {
+            printf( "%4d ", pc->mode_context[j][i]);
+        }
+        printf("\n");
+    }
+    printf("====================\n");
+    for(j=0; j<6; j++)
+    {
+        for (i = 0; i < 4; i++)
+        {
+            printf( "%4d ", pc->mode_context_a[j][i]);
+        }
+        printf("\n");
+    }
+
+}
+void print_mv_ref_cts(VP8_COMMON *pc)
+{
+    int j, i;
+    for(j=0; j<6; j++)
+    {
+        for (i = 0; i < 4; i++)
+        {
+            printf("(%4d:%4d) ",
+                    pc->mv_ref_ct[j][i][0],
+                    pc->mv_ref_ct[j][i][1]);
+        }
+        printf("\n");
+    }
 }
--- a/vp8/common/entropymode.h
+++ b/vp8/common/entropymode.h
@ -38,7 +38,7 @@ extern const vp8_tree_index vp8_bmode_tree[];
 extern const vp8_tree_index  vp8_ymode_tree[];
 extern const vp8_tree_index  vp8_kf_ymode_tree[];
 extern const vp8_tree_index  vp8_uv_mode_tree[];
-
+extern const vp8_tree_index  vp8_i8x8_mode_tree[];
 extern const vp8_tree_index  vp8_mbsplit_tree[];
 extern const vp8_tree_index  vp8_mv_ref_tree[];
 extern const vp8_tree_index  vp8_sub_mv_ref_tree[];
@ -46,6 +46,7 @@ extern const vp8_tree_index  vp8_sub_mv_ref_tree[];
 extern struct vp8_token_struct vp8_bmode_encodings   [VP8_BINTRAMODES];
 extern struct vp8_token_struct vp8_ymode_encodings   [VP8_YMODES];
 extern struct vp8_token_struct vp8_kf_ymode_encodings [VP8_YMODES];
+extern struct vp8_token_struct vp8_i8x8_mode_encodings  [VP8_UV_MODES];
 extern struct vp8_token_struct vp8_uv_mode_encodings  [VP8_UV_MODES];
 extern struct vp8_token_struct vp8_mbsplit_encodings  [VP8_NUMMBSPLITS];

@ -55,12 +56,20 @@ extern struct vp8_token_struct vp8_mv_ref_encoding_array    [VP8_MVREFS];
 extern struct vp8_token_struct vp8_sub_mv_ref_encoding_array [VP8_SUBMVREFS];

 extern const vp8_tree_index vp8_small_mvtree[];
-
 extern struct vp8_token_struct vp8_small_mvencodings [8];
+#if CONFIG_HIGH_PRECISION_MV
+extern const vp8_tree_index vp8_small_mvtree_hp[];
+extern struct vp8_token_struct vp8_small_mvencodings_hp [16];
+#endif

 void vp8_entropy_mode_init(void);

 void vp8_init_mbmode_probs(VP8_COMMON *x);
+extern void vp8_init_mode_contexts(VP8_COMMON *pc);
+extern void vp8_update_mode_context(VP8_COMMON *pc);;
+extern void vp8_accum_mv_refs(VP8_COMMON *pc,
+                               MB_PREDICTION_MODE m,
+                               const int ct[4]);

 void   vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]);
 void vp8_kf_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]);
--- a/vp8/common/entropymv.c
+++ b/vp8/common/entropymv.c
@ -11,6 +11,41 @@

 #include "entropymv.h"

+#if CONFIG_HIGH_PRECISION_MV
+const MV_CONTEXT_HP vp8_mv_update_probs_hp[2] =
+{
+    {{
+        237,
+        246,
+        253, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
+        254, 254, 254, 254, 254, 250, 250, 252, 254, 254, 254
+    }},
+    {{
+        231,
+        243,
+        245, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
+        254, 254, 254, 254, 254, 251, 251, 254, 254, 254, 254
+    }}
+};
+const MV_CONTEXT_HP vp8_default_mv_context_hp[2] =
+{
+    {{
+        /* row */
+        162,                                        /* is short */
+        128,                                        /* sign */
+        220, 204, 180, 192, 192, 119, 192, 192, 180, 140, 192, 192, 224, 224, 224, /* short tree */
+        128, 129, 132,  75, 145, 178, 206, 239, 254, 254, 254 /* long bits */
+    }},
+    {{
+        /* same for column */
+        164,                                        /* is short */
+        128,
+        220, 204, 180, 192, 192, 119, 192, 192, 180, 140, 192, 192, 224, 224, 224, /* short tree */
+        128, 130, 130,  74, 148, 180, 203, 236, 254, 254, 254 /* long bits */
+    }}
+};
+#endif  /* CONFIG_HIGH_PRECISION_MV */
+
 const MV_CONTEXT vp8_mv_update_probs[2] =
 {
    {{
@ -35,15 +70,11 @@ const MV_CONTEXT vp8_default_mv_context[2] =
        225, 146, 172, 147, 214,  39, 156,          /* short tree */
        128, 129, 132,  75, 145, 178, 206, 239, 254, 254 /* long bits */
    }},
-
-
-
    {{
        /* same for column */
        164,                                        /* is short */
        128,
        204, 170, 119, 235, 140, 230, 228,
        128, 130, 130,  74, 148, 180, 203, 236, 254, 254 /* long bits */
-
    }}
 };
--- a/vp8/common/entropymv.h
+++ b/vp8/common/entropymv.h
@ -13,16 +13,18 @@
 #define __INC_ENTROPYMV_H

 #include "treecoder.h"
+#include "vpx_config.h"

 enum
 {
    mv_max  = 1023,              /* max absolute value of a MV component */
    MVvals = (2 * mv_max) + 1,   /* # possible values "" */
-    mvfp_max  = 255,              /* max absolute value of a full pixel MV component */
-    MVfpvals = (2 * mvfp_max) +1, /* # possible full pixel MV values */
-
    mvlong_width = 10,       /* Large MVs have 9 bit magnitudes */
    mvnum_short = 8,         /* magnitudes 0 through 7 */
+    mvnum_short_bits = 3,         /* number of bits for short mvs */
+
+    mvfp_max  = 255,              /* max absolute value of a full pixel MV component */
+    MVfpvals = (2 * mvfp_max) + 1, /* # possible full pixel MV values */

    /* probability offsets for coding each MV component */

@ -41,4 +43,34 @@ typedef struct mv_context

 extern const MV_CONTEXT vp8_mv_update_probs[2], vp8_default_mv_context[2];

+#if CONFIG_HIGH_PRECISION_MV
+enum
+{
+    mv_max_hp  = 2047,              /* max absolute value of a MV component */
+    MVvals_hp = (2 * mv_max_hp) + 1,   /* # possible values "" */
+    mvlong_width_hp = 11,       /* Large MVs have 9 bit magnitudes */
+    mvnum_short_hp = 16,         /* magnitudes 0 through 15 */
+    mvnum_short_bits_hp = 4,         /* number of bits for short mvs */
+
+    mvfp_max_hp  = 255,              /* max absolute value of a full pixel MV component */
+    MVfpvals_hp = (2 * mvfp_max_hp) + 1, /* # possible full pixel MV values */
+
+    /* probability offsets for coding each MV component */
+
+    mvpis_short_hp = 0,         /* short (<= 7) vs long (>= 8) */
+    MVPsign_hp,                /* sign for non-zero */
+    MVPshort_hp,               /* 8 short values = 7-position tree */
+
+    MVPbits_hp = MVPshort_hp + mvnum_short_hp - 1, /* mvlong_width long value bits */
+    MVPcount_hp = MVPbits_hp + mvlong_width_hp    /* (with independent probabilities) */
+};
+
+typedef struct mv_context_hp
+{
+    vp8_prob prob[MVPcount_hp];  /* often come in row, col pairs */
+} MV_CONTEXT_HP;
+
+extern const MV_CONTEXT_HP vp8_mv_update_probs_hp[2], vp8_default_mv_context_hp[2];
+#endif /* CONFIG_HIGH_PRECISION_MV */
+
 #endif
--- a/vp8/common/filter.c
+++ b/vp8/common/filter.c
@ -13,8 +13,26 @@
 #include "filter.h"
 #include "vpx_ports/mem.h"

-DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
+DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[SUBPEL_SHIFTS][2]) =
 {
+#if SUBPEL_SHIFTS==16
+    { 128,   0 },
+    { 120,   8 },
+    { 112,  16 },
+    { 104,  24 },
+    {  96,  32 },
+    {  88,  40 },
+    {  80,  48 },
+    {  72,  56 },
+    {  64,  64 },
+    {  56,  72 },
+    {  48,  80 },
+    {  40,  88 },
+    {  32,  96 },
+    {  24, 104 },
+    {  16, 112 },
+    {   8, 120 }
+#else
    { 128,   0 },
    { 112,  16 },
    {  96,  32 },
@ -23,8 +41,197 @@ DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
    {  48,  80 },
    {  32,  96 },
    {  16, 112 }
+#endif  /* SUBPEL_SHIFTS==16 */
 };

+#if CONFIG_ENHANCED_INTERP
+#define FILTER_ALPHA 60
+DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[SUBPEL_SHIFTS][2*INTERP_EXTEND]) =
+{
+    /* Generated using MATLAB:
+     * alpha = 0.6;
+     * b=intfilt(8,4,alpha);
+     * bi=round(128*b);
+     * ba=flipud(reshape([bi 0], 8, 8));
+     * disp(num2str(ba, '%d,'))
+     */
+#if SUBPEL_SHIFTS==16
+#if FILTER_ALPHA == 70
+    /* alpha = 0.70 */
+    { 0,   0,   0, 128,   0,   0,   0,  0},
+    { 0,   2,  -6, 126,   8,  -3,   1,  0},
+    {-1,   4, -11, 123,  18,  -7,   3, -1},
+    {-1,   5, -15, 119,  27, -10,   4, -1},
+    {-2,   6, -18, 113,  38, -13,   5, -1},
+    {-2,   7, -20, 106,  49, -16,   6, -2},
+    {-2,   8, -22,  98,  59, -18,   7, -2},
+    {-2,   8, -22,  89,  69, -20,   8, -2},
+    {-2,   8, -21,  79,  79, -21,   8, -2},
+    {-2,   8, -20,  69,  89, -22,   8, -2},
+    {-2,   7, -18,  59,  98, -22,   8, -2},
+    {-2,   6, -16,  49, 106, -20,   7, -2},
+    {-1,   5, -13,  38, 113, -18,   6, -2},
+    {-1,   4, -10,  27, 119, -15,   5, -1},
+    {-1,   3,  -7,  18, 123, -11,   4, -1},
+    { 0,   1,  -3,   8, 126,  -6,   2,  0}
+#elif FILTER_ALPHA == 65
+    /* alpha = 0.65 */
+    { 0,   0,   0, 128,   0,   0,   0,  0},
+    { 0,   2,  -6, 126,   8,  -3,   1,  0},
+    {-1,   3, -10, 123,  18,  -6,   2, -1},
+    {-1,   5, -14, 118,  27, -10,   4, -1},
+    {-1,   5, -17, 112,  38, -13,   5, -1},
+    {-2,   6, -19, 106,  48, -15,   5, -1},
+    {-2,   7, -21,  98,  59, -17,   6, -2},
+    {-2,   7, -21,  89,  69, -19,   7, -2},
+    {-2,   7, -20,  79,  79, -20,   7, -2},
+    {-2,   7, -19,  69,  89, -21,   7, -2},
+    {-2,   6, -17,  59,  98, -21,   7, -2},
+    {-1,   5, -15,  48, 106, -19,   6, -2},
+    {-1,   5, -13,  38, 112, -17,   5, -1},
+    {-1,   4, -10,  27, 118, -14,   5, -1},
+    {-1,   2,  -6,  18, 123, -10,   3, -1},
+    { 0,   1,  -3,   8, 126,  -6,   2,  0}
+#elif FILTER_ALPHA == 60
+    /* alpha = 0.60 */
+    { 0,   0,   0, 128,   0,   0,   0,  0},
+    { 0,   2,  -6, 126,   8,  -3,   1,  0},
+    {-1,   3, -10, 123,  18,  -6,   2, -1},
+    {-1,   4, -14, 118,  28,  -9,   3, -1},
+    {-1,   5, -17, 112,  38, -12,   4, -1},
+    {-1,   6, -19, 105,  48, -15,   5, -1},
+    {-1,   6, -20,  97,  58, -17,   6, -1},
+    {-1,   6, -20,  88,  69, -19,   6, -1},
+    {-1,   6, -20,  79,  79, -20,   6, -1},
+    {-1,   6, -19,  69,  88, -20,   6, -1},
+    {-1,   6, -17,  58,  97, -20,   6, -1},
+    {-1,   5, -15,  48, 105, -19,   6, -1},
+    {-1,   4, -12,  38, 112, -17,   5, -1},
+    {-1,   3,  -9,  28, 118, -14,   4, -1},
+    {-1,   2,  -6,  18, 123, -10,   3, -1},
+    { 0,   1,  -3,   8, 126,  -6,   2,  0}
+#elif FILTER_ALPHA == 55
+    /* alpha = 0.55 */
+    { 0,   0,   0, 128,   0,   0,   0,  0},
+    { 0,   1,  -5, 126,   8,  -3,   1,  0},
+    {-1,   2, -10, 123,  18,  -6,   2,  0},
+    {-1,   4, -13, 118,  27,  -9,   3, -1},
+    {-1,   5, -16, 112,  37, -12,   4, -1},
+    {-1,   5, -18, 105,  48, -14,   4, -1},
+    {-1,   5, -19,  97,  58, -16,   5, -1},
+    {-1,   6, -19,  88,  68, -18,   5, -1},
+    {-1,   6, -19,  78,  78, -19,   6, -1},
+    {-1,   5, -18,  68,  88, -19,   6, -1},
+    {-1,   5, -16,  58,  97, -19,   5, -1},
+    {-1,   4, -14,  48, 105, -18,   5, -1},
+    {-1,   4, -12,  37, 112, -16,   5, -1},
+    {-1,   3,  -9,  27, 118, -13,   4, -1},
+    { 0,   2,  -6,  18, 123, -10,   2, -1},
+    { 0,   1,  -3,   8, 126,  -5,   1,  0}
+#elif FILTER_ALPHA == 50
+    /* alpha = 0.50 */
+    { 0,   0,   0, 128,   0,   0,   0,  0},
+    { 0,   1,  -5, 126,   8,  -3,   1,  0},
+    { 0,   2, -10, 122,  18,  -6,   2,  0},
+    {-1,   3, -13, 118,  27,  -9,   3,  0},
+    {-1,   4, -16, 112,  37, -11,   3,  0},
+    {-1,   5, -17, 104,  48, -14,   4, -1},
+    {-1,   5, -18,  96,  58, -16,   5, -1},
+    {-1,   5, -19,  88,  68, -17,   5, -1},
+    {-1,   5, -18,  78,  78, -18,   5, -1},
+    {-1,   5, -17,  68,  88, -19,   5, -1},
+    {-1,   5, -16,  58,  96, -18,   5, -1},
+    {-1,   4, -14,  48, 104, -17,   5, -1},
+    { 0,   3, -11,  37, 112, -16,   4, -1},
+    { 0,   3,  -9,  27, 118, -13,   3, -1},
+    { 0,   2,  -6,  18, 122, -10,   2,  0},
+    { 0,   1,  -3,   8, 126,  -5,   1,  0}
+#elif FILTER_ALPHA == 0
+    /* Lagrangian interpolation filter */
+    { 0,   0,   0, 128,   0,   0,   0,   0},
+    { 0,   1,  -5, 126,   8,  -3,   1,   0},
+    {-1,   3, -10, 122,  18,  -6,   2,   0},
+    {-1,   4, -13, 118,  27,  -9,   3,  -1},
+    {-1,   4, -16, 112,  37, -11,   4,  -1},
+    {-1,   5, -18, 105,  48, -14,   4,  -1},
+    {-1,   5, -19,  97,  58, -16,   5,  -1},
+    {-1,   6, -19,  88,  68, -18,   5,  -1},
+    {-1,   6, -19,  78,  78, -19,   6,  -1},
+    {-1,   5, -18,  68,  88, -19,   6,  -1},
+    {-1,   5, -16,  58,  97, -19,   5,  -1},
+    {-1,   4, -14,  48, 105, -18,   5,  -1},
+    {-1,   4, -11,  37, 112, -16,   4,  -1},
+    {-1,   3,  -9,  27, 118, -13,   4,  -1},
+    { 0,   2,  -6,  18, 122, -10,   3,  -1},
+    { 0,   1,  -3,   8, 126,  -5,   1,   0}
+#endif  /* FILTER_ALPHA */
+#else   /* SUBPEL_SHIFTS==16 */
+#if FILTER_ALPHA == 70
+    /* alpha = 0.70 */
+    { 0,   0,   0, 128,   0,   0,   0,  0},
+    {-1,   4, -11, 123,  18,  -7,   3, -1},
+    {-2,   6, -18, 113,  38, -13,   5, -1},
+    {-2,   8, -22,  98,  59, -18,   7, -2},
+    {-2,   8, -21,  79,  79, -21,   8, -2},
+    {-2,   7, -18,  59,  98, -22,   8, -2},
+    {-1,   5, -13,  38, 113, -18,   6, -2},
+    {-1,   3,  -7,  18, 123, -11,   4, -1}
+#elif FILTER_ALPHA == 65
+    /* alpha = 0.65 */
+    { 0,   0,   0, 128,   0,   0,   0, 0},
+    {-1,   3, -10, 123,  18,  -6,   2, -1},
+    {-1,   5, -17, 112,  38, -13,   5, -1},
+    {-2,   7, -21,  98,  59, -17,   6, -2},
+    {-2,   7, -20,  79,  79, -20,   7, -2},
+    {-2,   6, -17,  59,  98, -21,   7, -2},
+    {-1,   5, -13,  38, 112, -17,   5, -1},
+    {-1,   2,  -6,  18, 123, -10,   3, -1}
+#elif FILTER_ALPHA == 60
+    /* alpha = 0.60 */
+    { 0,   0,   0, 128,   0,   0,   0, 0},
+    {-1,   3, -10, 123,  18,  -6,   2, -1},
+    {-1,   5, -17, 112,  38, -12,   4, -1},
+    {-1,   6, -20,  97,  58, -17,   6, -1},
+    {-1,   6, -20,  79,  79, -20,   6, -1},
+    {-1,   6, -17,  58,  97, -20,   6, -1},
+    {-1,   4, -12,  38, 112, -17,   5, -1},
+    {-1,   2,  -6,  18, 123, -10,   3, -1}
+#elif FILTER_ALPHA == 55
+    /* alpha = 0.55 */
+    { 0,   0,   0, 128,   0,   0,   0,  0},
+    {-1,   2, -10, 123,  18,  -6,   2,  0},
+    {-1,   5, -16, 112,  37, -12,   4, -1},
+    {-1,   5, -19,  97,  58, -16,   5, -1},
+    {-1,   6, -19,  78,  78, -19,   6, -1},
+    {-1,   5, -16,  58,  97, -19,   5, -1},
+    {-1,   4, -12,  37, 112, -16,   5, -1},
+    { 0,   2,  -6,  18, 123, -10,   2, -1}
+#elif FILTER_ALPHA == 50
+    /* alpha = 0.50 */
+    { 0,   0,   0, 128,   0,   0,   0,  0},
+    { 0,   2, -10, 122,  18,  -6,   2,  0},
+    {-1,   4, -16, 112,  37, -11,   3,  0},
+    {-1,   5, -18,  96,  58, -16,   5, -1},
+    {-1,   5, -18,  78,  78, -18,   5, -1},
+    {-1,   5, -16,  58,  96, -18,   5, -1},
+    { 0,   3, -11,  37, 112, -16,   4, -1},
+    { 0,   2,  -6,  18, 122, -10,   2,  0}
+#elif FILTER_ALPHA == 0
+    /* Lagrangian interpolation filter */
+    { 0,   0,   0, 128,   0,   0,   0,   0},
+    {-1,   3, -10, 122,  18,  -6,   2,   0},
+    {-1,   4, -16, 112,  37, -11,   4,  -1},
+    {-1,   5, -19,  97,  58, -16,   5,  -1},
+    {-1,   6, -19,  78,  78, -19,   6,  -1},
+    {-1,   5, -16,  58,  97, -19,   5,  -1},
+    {-1,   4, -11,  37, 112, -16,   4,  -1},
+    { 0,   2,  -6,  18, 122, -10,   3,  -1},
+#endif  /* FILTER_ALPHA */
+#endif  /* SUBPEL_SHIFTS==16 */
+};
+
+#else  // CONFIG_ENHANCED_INTERP
+
 DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
 {

@ -38,6 +245,8 @@ DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
    { 0, -1,   12,  123,  -6,  0 },
 };

+#endif  // CONFIG_ENHANCED_INTERP
+
 static void filter_block2d_first_pass
 (
    unsigned char *src_ptr,
@ -56,13 +265,37 @@ static void filter_block2d_first_pass
    {
        for (j = 0; j < output_width; j++)
        {
+#if INTERP_EXTEND == 3
            Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
                   ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
-                   ((int)src_ptr[0]                 * vp8_filter[2]) +
-                   ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
-                   ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
-                   ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
+                   ((int)src_ptr[0]                    * vp8_filter[2]) +
+                   ((int)src_ptr[pixel_step]           * vp8_filter[3]) +
+                   ((int)src_ptr[2*pixel_step]         * vp8_filter[4]) +
+                   ((int)src_ptr[3*pixel_step]         * vp8_filter[5]) +
                   (VP8_FILTER_WEIGHT >> 1);      /* Rounding */
+#elif INTERP_EXTEND == 4
+            Temp = ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[0]) +
+                   ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[1]) +
+                   ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[2]) +
+                   ((int)src_ptr[0]                    * vp8_filter[3]) +
+                   ((int)src_ptr[pixel_step]           * vp8_filter[4]) +
+                   ((int)src_ptr[2 * pixel_step]       * vp8_filter[5]) +
+                   ((int)src_ptr[3 * pixel_step]       * vp8_filter[6]) +
+                   ((int)src_ptr[4 * pixel_step]       * vp8_filter[7]) +
+                   (VP8_FILTER_WEIGHT >> 1);      /* Rounding */
+#elif INTERP_EXTEND == 5
+            Temp = ((int)src_ptr[-4 * (int)pixel_step] * vp8_filter[0]) +
+                   ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[1]) +
+                   ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[2]) +
+                   ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[3]) +
+                   ((int)src_ptr[0]                    * vp8_filter[4]) +
+                   ((int)src_ptr[pixel_step]           * vp8_filter[5]) +
+                   ((int)src_ptr[2 * pixel_step]       * vp8_filter[6]) +
+                   ((int)src_ptr[3 * pixel_step]       * vp8_filter[7]) +
+                   ((int)src_ptr[4 * pixel_step]       * vp8_filter[8]) +
+                   ((int)src_ptr[5 * pixel_step]       * vp8_filter[9]) +
+                   (VP8_FILTER_WEIGHT >> 1);      /* Rounding */
+#endif

            /* Normalize back to 0-255 */
            Temp = Temp >> VP8_FILTER_SHIFT;
@ -102,13 +335,37 @@ static void filter_block2d_second_pass
        for (j = 0; j < output_width; j++)
        {
            /* Apply filter */
+#if INTERP_EXTEND == 3
            Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
                   ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
-                   ((int)src_ptr[0]                 * vp8_filter[2]) +
-                   ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
-                   ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
-                   ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
+                   ((int)src_ptr[0]                    * vp8_filter[2]) +
+                   ((int)src_ptr[pixel_step]           * vp8_filter[3]) +
+                   ((int)src_ptr[2*pixel_step]         * vp8_filter[4]) +
+                   ((int)src_ptr[3*pixel_step]         * vp8_filter[5]) +
                   (VP8_FILTER_WEIGHT >> 1);   /* Rounding */
+#elif INTERP_EXTEND == 4
+            Temp = ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[0]) +
+                   ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[1]) +
+                   ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[2]) +
+                   ((int)src_ptr[0]                    * vp8_filter[3]) +
+                   ((int)src_ptr[pixel_step]           * vp8_filter[4]) +
+                   ((int)src_ptr[2 * pixel_step]       * vp8_filter[5]) +
+                   ((int)src_ptr[3 * pixel_step]       * vp8_filter[6]) +
+                   ((int)src_ptr[4 * pixel_step]       * vp8_filter[7]) +
+                   (VP8_FILTER_WEIGHT >> 1);      /* Rounding */
+#elif INTERP_EXTEND == 5
+            Temp = ((int)src_ptr[-4 * (int)pixel_step] * vp8_filter[0]) +
+                   ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[1]) +
+                   ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[2]) +
+                   ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[3]) +
+                   ((int)src_ptr[0]                    * vp8_filter[4]) +
+                   ((int)src_ptr[pixel_step]           * vp8_filter[5]) +
+                   ((int)src_ptr[2 * pixel_step]       * vp8_filter[6]) +
+                   ((int)src_ptr[3 * pixel_step]       * vp8_filter[7]) +
+                   ((int)src_ptr[4 * pixel_step]       * vp8_filter[8]) +
+                   ((int)src_ptr[5 * pixel_step]       * vp8_filter[9]) +
+                   (VP8_FILTER_WEIGHT >> 1);      /* Rounding */
+#endif

            /* Normalize back to 0-255 */
            Temp = Temp >> VP8_FILTER_SHIFT;
@ -128,6 +385,83 @@ static void filter_block2d_second_pass
    }
 }

+/*
+ * The only functional difference between filter_block2d_second_pass()
+ * and this function is that filter_block2d_second_pass() does a sixtap
+ * filter on the input and stores it in the output. This function
+ * (filter_block2d_second_pass_avg()) does a sixtap filter on the input,
+ * and then averages that with the content already present in the output
+ * ((filter_result + dest + 1) >> 1) and stores that in the output.
+ */
+static void filter_block2d_second_pass_avg
+(
+    int *src_ptr,
+    unsigned char *output_ptr,
+    int output_pitch,
+    unsigned int src_pixels_per_line,
+    unsigned int pixel_step,
+    unsigned int output_height,
+    unsigned int output_width,
+    const short *vp8_filter
+)
+{
+    unsigned int i, j;
+    int  Temp;
+
+    for (i = 0; i < output_height; i++)
+    {
+        for (j = 0; j < output_width; j++)
+        {
+            /* Apply filter */
+#if INTERP_EXTEND == 3
+            Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
+                   ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
+                   ((int)src_ptr[0]                    * vp8_filter[2]) +
+                   ((int)src_ptr[pixel_step]           * vp8_filter[3]) +
+                   ((int)src_ptr[2*pixel_step]         * vp8_filter[4]) +
+                   ((int)src_ptr[3*pixel_step]         * vp8_filter[5]) +
+                   (VP8_FILTER_WEIGHT >> 1);   /* Rounding */
+#elif INTERP_EXTEND == 4
+            Temp = ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[0]) +
+                   ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[1]) +
+                   ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[2]) +
+                   ((int)src_ptr[0]                    * vp8_filter[3]) +
+                   ((int)src_ptr[pixel_step]           * vp8_filter[4]) +
+                   ((int)src_ptr[2 * pixel_step]       * vp8_filter[5]) +
+                   ((int)src_ptr[3 * pixel_step]       * vp8_filter[6]) +
+                   ((int)src_ptr[4 * pixel_step]       * vp8_filter[7]) +
+                   (VP8_FILTER_WEIGHT >> 1);      /* Rounding */
+#elif INTERP_EXTEND == 5
+            Temp = ((int)src_ptr[-4 * (int)pixel_step] * vp8_filter[0]) +
+                   ((int)src_ptr[-3 * (int)pixel_step] * vp8_filter[1]) +
+                   ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[2]) +
+                   ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[3]) +
+                   ((int)src_ptr[0]                    * vp8_filter[4]) +
+                   ((int)src_ptr[pixel_step]           * vp8_filter[5]) +
+                   ((int)src_ptr[2 * pixel_step]       * vp8_filter[6]) +
+                   ((int)src_ptr[3 * pixel_step]       * vp8_filter[7]) +
+                   ((int)src_ptr[4 * pixel_step]       * vp8_filter[8]) +
+                   ((int)src_ptr[5 * pixel_step]       * vp8_filter[9]) +
+                   (VP8_FILTER_WEIGHT >> 1);      /* Rounding */
+#endif
+
+            /* Normalize back to 0-255 */
+            Temp = Temp >> VP8_FILTER_SHIFT;
+
+            if (Temp < 0)
+                Temp = 0;
+            else if (Temp > 255)
+                Temp = 255;
+
+            output_ptr[j] = (unsigned char) ((output_ptr[j] + Temp + 1) >> 1);
+            src_ptr++;
+        }
+
+        /* Start next row */
+        src_ptr    += src_pixels_per_line - output_width;
+        output_ptr += output_pitch;
+    }
+}

 static void filter_block2d
 (
@ -139,13 +473,14 @@ static void filter_block2d
    const short  *VFilter
 )
 {
-    int FData[9*4]; /* Temp data buffer used in filtering */
+    int FData[(3+INTERP_EXTEND*2)*4]; /* Temp data buffer used in filtering */

    /* First filter 1-D horizontally... */
-    filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
+    filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1,
+                              3+INTERP_EXTEND*2, 4, HFilter);

    /* then filter verticaly... */
-    filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
+    filter_block2d_second_pass(FData + 4*(INTERP_EXTEND-1), output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
 }


@ -179,20 +514,48 @@ void vp8_sixtap_predict8x8_c
 {
    const short  *HFilter;
    const short  *VFilter;
-    int FData[13*16];   /* Temp data buffer used in filtering */
+    // int FData[(7+INTERP_EXTEND*2)*16];   /* Temp data buffer used in filtering */
+    int FData[(7+INTERP_EXTEND*2)*8];   /* Temp data buffer used in filtering */

    HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */

    /* First filter 1-D horizontally... */
-    filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
+    filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1,
+                              7+INTERP_EXTEND*2, 8, HFilter);


    /* then filter verticaly... */
-    filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
+    filter_block2d_second_pass(FData + 8*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);

 }

+void vp8_sixtap_predict_avg8x8_c
+(
+    unsigned char  *src_ptr,
+    int  src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int  dst_pitch
+)
+{
+    const short  *HFilter;
+    const short  *VFilter;
+    // int FData[(7+INTERP_EXTEND*2)*16];   /* Temp data buffer used in filtering */
+    int FData[(7+INTERP_EXTEND*2)*8];   /* Temp data buffer used in filtering */
+
+    HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
+
+    /* First filter 1-D horizontally... */
+    filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1,
+                              7+INTERP_EXTEND*2, 8, HFilter);
+
+    /* then filter verticaly... */
+    filter_block2d_second_pass_avg(FData + 8*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
+}
+
 void vp8_sixtap_predict8x4_c
 (
    unsigned char  *src_ptr,
@ -205,17 +568,19 @@ void vp8_sixtap_predict8x4_c
 {
    const short  *HFilter;
    const short  *VFilter;
-    int FData[13*16];   /* Temp data buffer used in filtering */
+    // int FData[(7+INTERP_EXTEND*2)*16];   /* Temp data buffer used in filtering */
+    int FData[(3+INTERP_EXTEND*2)*8];   /* Temp data buffer used in filtering */

    HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */

    /* First filter 1-D horizontally... */
-    filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
+    filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1,
+                              3+INTERP_EXTEND*2, 8, HFilter);


    /* then filter verticaly... */
-    filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
+    filter_block2d_second_pass(FData + 8*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);

 }

@ -231,20 +596,48 @@ void vp8_sixtap_predict16x16_c
 {
    const short  *HFilter;
    const short  *VFilter;
-    int FData[21*24];   /* Temp data buffer used in filtering */
+    // int FData[(15+INTERP_EXTEND*2)*24];   /* Temp data buffer used in filtering */
+    int FData[(15+INTERP_EXTEND*2)*16];  /* Temp data buffer used in filtering */


    HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */

    /* First filter 1-D horizontally... */
-    filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
+    filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData, src_pixels_per_line, 1,
+                              15+INTERP_EXTEND*2, 16, HFilter);

    /* then filter verticaly... */
-    filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
+    filter_block2d_second_pass(FData + 16*(INTERP_EXTEND-1), dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);

 }

+void vp8_sixtap_predict_avg16x16_c
+(
+    unsigned char  *src_ptr,
+    int  src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int  dst_pitch
+)
+{
+    const short  *HFilter;
+    const short  *VFilter;
+    // int FData[(15+INTERP_EXTEND*2)*24];   /* Temp data buffer used in filtering */
+    int FData[(15+INTERP_EXTEND*2)*16];  /* Temp data buffer used in filtering */
+
+    HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
+
+    /* First filter 1-D horizontally... */
+    filter_block2d_first_pass(src_ptr - ((INTERP_EXTEND-1) * src_pixels_per_line), FData,
+                              src_pixels_per_line, 1, 15+INTERP_EXTEND*2, 16, HFilter);
+
+    /* then filter verticaly... */
+    filter_block2d_second_pass_avg(FData + 16*(INTERP_EXTEND-1), dst_ptr, dst_pitch,
+                                   16, 16, 16, 16, VFilter);
+}

 /****************************************************************************
 *
@ -349,6 +742,44 @@ static void filter_block2d_bil_second_pass
    }
 }

+/*
+ * As before for filter_block2d_second_pass_avg(), the functional difference
+ * between filter_block2d_bil_second_pass() and filter_block2d_bil_second_pass_avg()
+ * is that filter_block2d_bil_second_pass() does a bilinear filter on input
+ * and stores the result in output; filter_block2d_bil_second_pass_avg(),
+ * instead, does a bilinear filter on input, averages the resulting value
+ * with the values already present in the output and stores the result of
+ * that back into the output ((filter_result + dest + 1) >> 1).
+ */
+static void filter_block2d_bil_second_pass_avg
+(
+    unsigned short *src_ptr,
+    unsigned char  *dst_ptr,
+    int             dst_pitch,
+    unsigned int    height,
+    unsigned int    width,
+    const short    *vp8_filter
+)
+{
+    unsigned int  i, j;
+    int  Temp;
+
+    for (i = 0; i < height; i++)
+    {
+        for (j = 0; j < width; j++)
+        {
+            /* Apply filter */
+            Temp = ((int)src_ptr[0]     * vp8_filter[0]) +
+                   ((int)src_ptr[width] * vp8_filter[1]) +
+                   (VP8_FILTER_WEIGHT / 2);
+            dst_ptr[j] = (unsigned int)(((Temp >> VP8_FILTER_SHIFT) + dst_ptr[j] + 1) >> 1);
+            src_ptr++;
+        }
+
+        /* Next row... */
+        dst_ptr += dst_pitch;
+    }
+}

 /****************************************************************************
 *
@ -395,6 +826,26 @@ static void filter_block2d_bil
    filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
 }

+static void filter_block2d_bil_avg
+(
+    unsigned char *src_ptr,
+    unsigned char *dst_ptr,
+    unsigned int   src_pitch,
+    unsigned int   dst_pitch,
+    const short   *HFilter,
+    const short   *VFilter,
+    int            Width,
+    int            Height
+)
+{
+    unsigned short FData[17*16];    /* Temp data buffer used in filtering */
+
+    /* First filter 1-D horizontally... */
+    filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
+
+    /* then 1-D vertically... */
+    filter_block2d_bil_second_pass_avg(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
+}

 void vp8_bilinear_predict4x4_c
 (
@ -454,6 +905,26 @@ void vp8_bilinear_predict8x8_c

 }

+void vp8_bilinear_predict_avg8x8_c
+(
+    unsigned char  *src_ptr,
+    int  src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int  dst_pitch
+)
+{
+    const short *HFilter;
+    const short *VFilter;
+
+    HFilter = vp8_bilinear_filters[xoffset];
+    VFilter = vp8_bilinear_filters[yoffset];
+
+    filter_block2d_bil_avg(src_ptr, dst_ptr, src_pixels_per_line,
+                           dst_pitch, HFilter, VFilter, 8, 8);
+}
+
 void vp8_bilinear_predict8x4_c
 (
    unsigned char  *src_ptr,
@ -492,3 +963,23 @@ void vp8_bilinear_predict16x16_c

    filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
 }
+
+void vp8_bilinear_predict_avg16x16_c
+(
+    unsigned char  *src_ptr,
+    int  src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int  dst_pitch
+)
+{
+    const short *HFilter;
+    const short *VFilter;
+
+    HFilter = vp8_bilinear_filters[xoffset];
+    VFilter = vp8_bilinear_filters[yoffset];
+
+    filter_block2d_bil_avg(src_ptr, dst_ptr, src_pixels_per_line,
+                           dst_pitch, HFilter, VFilter, 16, 16);
+}
--- a/vp8/common/filter.h
+++ b/vp8/common/filter.h
@ -8,15 +8,23 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-
 #ifndef FILTER_H
 #define FILTER_H

+#include "vpx_config.h"
+#include "vpx_scale/yv12config.h"
+
 #define BLOCK_HEIGHT_WIDTH 4
 #define VP8_FILTER_WEIGHT 128
 #define VP8_FILTER_SHIFT  7

-extern const short vp8_bilinear_filters[8][2];
-extern const short vp8_sub_pel_filters[8][6];
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+#define SUBPEL_SHIFTS 16
+#else
+#define SUBPEL_SHIFTS 8
+#endif
+
+extern const short vp8_bilinear_filters[SUBPEL_SHIFTS][2];
+extern const short vp8_sub_pel_filters[SUBPEL_SHIFTS][INTERP_EXTEND*2];

 #endif //FILTER_H
--- a/vp8/common/findnearmv.c
+++ b/vp8/common/findnearmv.c
@ -21,10 +21,12 @@ const unsigned char vp8_mbsplit_offset[4][16] = {
 /* Predict motion vectors using those from already-decoded nearby blocks.
   Note that we only consider one 4x4 subblock from each candidate 16x16
   macroblock.   */
+
 void vp8_find_near_mvs
 (
    MACROBLOCKD *xd,
    const MODE_INFO *here,
+    const MODE_INFO *lf_here,
    int_mv *nearest,
    int_mv *nearby,
    int_mv *best_mv,
@ -36,6 +38,7 @@ void vp8_find_near_mvs
    const MODE_INFO *above = here - xd->mode_info_stride;
    const MODE_INFO *left = here - 1;
    const MODE_INFO *aboveleft = above - 1;
+    const MODE_INFO *third = NULL;
    int_mv            near_mvs[4];
    int_mv           *mv = near_mvs;
    int             *cntx = cnt;
@ -51,10 +54,10 @@ void vp8_find_near_mvs
        if (above->mbmi.mv.as_int)
        {
            (++mv)->as_int = above->mbmi.mv.as_int;
-            mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv, ref_frame_sign_bias);
+            mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame],
+                refframe, mv, ref_frame_sign_bias);
            ++cntx;
        }
-
        *cntx += 2;
    }

@ -64,38 +67,44 @@ void vp8_find_near_mvs
        if (left->mbmi.mv.as_int)
        {
            int_mv this_mv;
-
            this_mv.as_int = left->mbmi.mv.as_int;
-            mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
+            mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame],
+                refframe, &this_mv, ref_frame_sign_bias);

            if (this_mv.as_int != mv->as_int)
            {
                (++mv)->as_int = this_mv.as_int;
                ++cntx;
            }
-
            *cntx += 2;
        }
        else
            cnt[CNT_INTRA] += 2;
    }
-
-    /* Process above left */
-    if (aboveleft->mbmi.ref_frame != INTRA_FRAME)
+    /* Process above left or the one frome last frame */
+    if ( aboveleft->mbmi.ref_frame != INTRA_FRAME||
+         (lf_here->mbmi.ref_frame==LAST_FRAME && refframe == LAST_FRAME))
    {
        if (aboveleft->mbmi.mv.as_int)
+        {
+            third = aboveleft;
+        }
+        else if(lf_here->mbmi.mv.as_int)
+        {
+            third = lf_here;
+        }
+        if(third)
        {
            int_mv this_mv;
-
-            this_mv.as_int = aboveleft->mbmi.mv.as_int;
-            mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
+            this_mv.as_int = third->mbmi.mv.as_int;
+            mv_bias(ref_frame_sign_bias[third->mbmi.ref_frame],
+                refframe, &this_mv, ref_frame_sign_bias);

            if (this_mv.as_int != mv->as_int)
            {
                (++mv)->as_int = this_mv.as_int;
                ++cntx;
            }
-
            *cntx += 1;
        }
        else
@ -105,14 +114,16 @@ void vp8_find_near_mvs
    /* If we have three distinct MV's ... */
    if (cnt[CNT_SPLITMV])
    {
-        /* See if above-left MV can be merged with NEAREST */
+        /* See if the third MV can be merged with NEAREST */
        if (mv->as_int == near_mvs[CNT_NEAREST].as_int)
            cnt[CNT_NEAREST] += 1;
    }

    cnt[CNT_SPLITMV] = ((above->mbmi.mode == SPLITMV)
                        + (left->mbmi.mode == SPLITMV)) * 2
-                       + (aboveleft->mbmi.mode == SPLITMV);
+                        + (
+                        lf_here->mbmi.mode == SPLITMV ||
+                       aboveleft->mbmi.mode == SPLITMV);

    /* Swap near and nearest if necessary */
    if (cnt[CNT_NEAR] > cnt[CNT_NEAREST])
@ -135,21 +146,40 @@ void vp8_find_near_mvs
    nearest->as_int = near_mvs[CNT_NEAREST].as_int;
    nearby->as_int = near_mvs[CNT_NEAR].as_int;

+    /* Make sure that the 1/8th bits of the Mvs are zero if high_precision
+     * is not being used, by truncating the last bit towards 0
+     */
+#if CONFIG_HIGH_PRECISION_MV
+    if (!xd->allow_high_precision_mv)
+    {
+        if (best_mv->as_mv.row & 1)
+            best_mv->as_mv.row += (best_mv->as_mv.row > 0 ? -1 : 1);
+        if (best_mv->as_mv.col & 1)
+            best_mv->as_mv.col += (best_mv->as_mv.col > 0 ? -1 : 1);
+        if (nearest->as_mv.row & 1)
+            nearest->as_mv.row += (nearest->as_mv.row > 0 ? -1 : 1);
+        if (nearest->as_mv.col & 1)
+            nearest->as_mv.col += (nearest->as_mv.col > 0 ? -1 : 1);
+        if (nearby->as_mv.row & 1)
+            nearby->as_mv.row += (nearby->as_mv.row > 0 ? -1 : 1);
+        if (nearby->as_mv.col & 1)
+            nearby->as_mv.col += (nearby->as_mv.col > 0 ? -1 : 1);
+    }
+#endif
+
    //TODO: move clamp outside findnearmv
    vp8_clamp_mv2(nearest, xd);
    vp8_clamp_mv2(nearby, xd);
    vp8_clamp_mv2(best_mv, xd);
 }

-vp8_prob *vp8_mv_ref_probs(
+vp8_prob *vp8_mv_ref_probs(VP8_COMMON *pc,
    vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4]
 )
 {
-    p[0] = vp8_mode_contexts [near_mv_ref_ct[0]] [0];
-    p[1] = vp8_mode_contexts [near_mv_ref_ct[1]] [1];
-    p[2] = vp8_mode_contexts [near_mv_ref_ct[2]] [2];
-    p[3] = vp8_mode_contexts [near_mv_ref_ct[3]] [3];
-    /*p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];*/
+    p[0] = pc->vp8_mode_contexts [near_mv_ref_ct[0]] [0];
+    p[1] = pc->vp8_mode_contexts [near_mv_ref_ct[1]] [1];
+    p[2] = pc->vp8_mode_contexts [near_mv_ref_ct[2]] [2];
+    p[3] = pc->vp8_mode_contexts [near_mv_ref_ct[3]] [3];
    return p;
 }
-
--- a/vp8/common/findnearmv.h
+++ b/vp8/common/findnearmv.h
@ -16,6 +16,7 @@
 #include "blockd.h"
 #include "modecont.h"
 #include "treecoder.h"
+#include "onyxc_int.h"


 static void mv_bias(int refmb_ref_frame_sign_bias, int refframe, int_mv *mvp, const int *ref_frame_sign_bias)
@ -75,13 +76,14 @@ void vp8_find_near_mvs
 (
    MACROBLOCKD *xd,
    const MODE_INFO *here,
+    const MODE_INFO *lfhere,
    int_mv *nearest, int_mv *nearby, int_mv *best,
    int near_mv_ref_cts[4],
    int refframe,
    int *ref_frame_sign_bias
 );

-vp8_prob *vp8_mv_ref_probs(
+vp8_prob *vp8_mv_ref_probs(VP8_COMMON *pc,
    vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4]
 );

@ -125,8 +127,6 @@ static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b)
        --cur_mb;
        switch (cur_mb->mbmi.mode)
        {
-            case B_PRED:
-              return (cur_mb->bmi + b + 3)->as_mode;
            case DC_PRED:
                return B_DC_PRED;
            case V_PRED:
@ -135,15 +135,18 @@ static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b)
                return B_HE_PRED;
            case TM_PRED:
                return B_TM_PRED;
+            case I8X8_PRED:
+            case B_PRED:
+              return (cur_mb->bmi + b + 3)->as_mode.first;
            default:
                return B_DC_PRED;
        }
    }
-
-    return (cur_mb->bmi + b - 1)->as_mode;
+    return (cur_mb->bmi + b - 1)->as_mode.first;
 }

-static B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b, int mi_stride)
+static B_PREDICTION_MODE above_block_mode(const MODE_INFO
+                                          *cur_mb, int b, int mi_stride)
 {
    if (!(b >> 2))
    {
@ -152,8 +155,6 @@ static B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b, int mi

        switch (cur_mb->mbmi.mode)
        {
-            case B_PRED:
-              return (cur_mb->bmi + b + 12)->as_mode;
            case DC_PRED:
                return B_DC_PRED;
            case V_PRED:
@ -162,12 +163,15 @@ static B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b, int mi
                return B_HE_PRED;
            case TM_PRED:
                return B_TM_PRED;
+            case I8X8_PRED:
+            case B_PRED:
+              return (cur_mb->bmi + b + 12)->as_mode.first;
            default:
                return B_DC_PRED;
        }
    }

-    return (cur_mb->bmi + b - 4)->as_mode;
+    return (cur_mb->bmi + b - 4)->as_mode.first;
 }

 #endif
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@ -9,7 +9,7 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vp8/common/g_common.h"
 #include "vp8/common/subpixel.h"
 #include "vp8/common/loopfilter.h"
@ -17,54 +17,9 @@
 #include "vp8/common/idct.h"
 #include "vp8/common/onyxc_int.h"

-#if CONFIG_MULTITHREAD
-#if HAVE_UNISTD_H
-#include <unistd.h>
-#elif defined(_WIN32)
-#include <windows.h>
-typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO);
-#endif
-#endif
-
 extern void vp8_arch_x86_common_init(VP8_COMMON *ctx);
 extern void vp8_arch_arm_common_init(VP8_COMMON *ctx);

-#if CONFIG_MULTITHREAD
-static int get_cpu_count()
-{
-    int core_count = 16;
-
-#if HAVE_UNISTD_H
-#if defined(_SC_NPROCESSORS_ONLN)
-    core_count = sysconf(_SC_NPROCESSORS_ONLN);
-#elif defined(_SC_NPROC_ONLN)
-    core_count = sysconf(_SC_NPROC_ONLN);
-#endif
-#elif defined(_WIN32)
-    {
-        PGNSI pGNSI;
-        SYSTEM_INFO sysinfo;
-
-        /* Call GetNativeSystemInfo if supported or
-         * GetSystemInfo otherwise. */
-
-        pGNSI = (PGNSI) GetProcAddress(
-                GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo");
-        if (pGNSI != NULL)
-            pGNSI(&sysinfo);
-        else
-            GetSystemInfo(&sysinfo);
-
-        core_count = sysinfo.dwNumberOfProcessors;
-    }
-#else
-    /* other platforms */
-#endif
-
-    return core_count > 0 ? core_count : 1;
-}
-#endif
-
 void vp8_machine_specific_config(VP8_COMMON *ctx)
 {
 #if CONFIG_RUNTIME_CPU_DETECT
@ -75,34 +30,67 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
    rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_c;
    rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_c;
    rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_c;
-
+    rtcd->idct.idct8        = vp8_short_idct8x8_c;
+    rtcd->idct.idct1_scalar_add_8x8 = vp8_dc_only_idct_add_8x8_c;
+    rtcd->idct.ihaar2       = vp8_short_ihaar2x2_c;
    rtcd->recon.copy16x16   = vp8_copy_mem16x16_c;
    rtcd->recon.copy8x8     = vp8_copy_mem8x8_c;
+    rtcd->recon.avg16x16    = vp8_avg_mem16x16_c;
+    rtcd->recon.avg8x8      = vp8_avg_mem8x8_c;
    rtcd->recon.copy8x4     = vp8_copy_mem8x4_c;
    rtcd->recon.recon       = vp8_recon_b_c;
+    rtcd->recon.recon_uv    = vp8_recon_uv_b_c;
    rtcd->recon.recon2      = vp8_recon2b_c;
    rtcd->recon.recon4      = vp8_recon4b_c;
    rtcd->recon.recon_mb    = vp8_recon_mb_c;
    rtcd->recon.recon_mby   = vp8_recon_mby_c;
    rtcd->recon.build_intra_predictors_mby =
        vp8_build_intra_predictors_mby;
+#if CONFIG_COMP_INTRA_PRED
+    rtcd->recon.build_comp_intra_predictors_mby =
+        vp8_build_comp_intra_predictors_mby;
+#endif
    rtcd->recon.build_intra_predictors_mby_s =
        vp8_build_intra_predictors_mby_s;
    rtcd->recon.build_intra_predictors_mbuv =
        vp8_build_intra_predictors_mbuv;
    rtcd->recon.build_intra_predictors_mbuv_s =
        vp8_build_intra_predictors_mbuv_s;
+#if CONFIG_COMP_INTRA_PRED
+    rtcd->recon.build_comp_intra_predictors_mbuv =
+        vp8_build_comp_intra_predictors_mbuv;
+#endif
    rtcd->recon.intra4x4_predict =
        vp8_intra4x4_predict;
+#if CONFIG_COMP_INTRA_PRED
+    rtcd->recon.comp_intra4x4_predict =
+        vp8_comp_intra4x4_predict;
+#endif
+    rtcd->recon.intra8x8_predict =
+        vp8_intra8x8_predict;
+#if CONFIG_COMP_INTRA_PRED
+    rtcd->recon.comp_intra8x8_predict =
+        vp8_comp_intra8x8_predict;
+#endif
+    rtcd->recon.intra_uv4x4_predict =
+        vp8_intra_uv4x4_predict;
+#if CONFIG_COMP_INTRA_PRED
+    rtcd->recon.comp_intra_uv4x4_predict =
+        vp8_comp_intra_uv4x4_predict;
+#endif

-    rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_c;
-    rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_c;
-    rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_c;
-    rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_c;
-    rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_c;
-    rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_c;
-    rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_c;
-    rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_c;
+    rtcd->subpix.sixtap16x16       = vp8_sixtap_predict16x16_c;
+    rtcd->subpix.sixtap8x8         = vp8_sixtap_predict8x8_c;
+    rtcd->subpix.sixtap_avg16x16   = vp8_sixtap_predict_avg16x16_c;
+    rtcd->subpix.sixtap_avg8x8     = vp8_sixtap_predict_avg8x8_c;
+    rtcd->subpix.sixtap8x4         = vp8_sixtap_predict8x4_c;
+    rtcd->subpix.sixtap4x4         = vp8_sixtap_predict_c;
+    rtcd->subpix.bilinear16x16     = vp8_bilinear_predict16x16_c;
+    rtcd->subpix.bilinear8x8       = vp8_bilinear_predict8x8_c;
+    rtcd->subpix.bilinear_avg16x16 = vp8_bilinear_predict_avg16x16_c;
+    rtcd->subpix.bilinear_avg8x8   = vp8_bilinear_predict_avg8x8_c;
+    rtcd->subpix.bilinear8x4       = vp8_bilinear_predict8x4_c;
+    rtcd->subpix.bilinear4x4       = vp8_bilinear_predict4x4_c;

    rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_c;
    rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_c;
@ -133,17 +121,10 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
 #if ARCH_ARM
    vp8_arch_arm_common_init(ctx);
 #endif
-#if CONFIG_EXTEND_QRANGE
    rtcd->idct.idct1        = vp8_short_idct4x4llm_1_c;
    rtcd->idct.idct16       = vp8_short_idct4x4llm_c;
    rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_c;
    rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_c;
    rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_c;

-#endif
-
-
-#if CONFIG_MULTITHREAD
-    ctx->processor_core_count = get_cpu_count();
-#endif /* CONFIG_MULTITHREAD */
 }
--- a/vp8/common/idct.h
+++ b/vp8/common/idct.h
@ -31,6 +31,34 @@
 #include "arm/idct_arm.h"
 #endif

+
+#ifndef vp8_idct_idct8
+#define vp8_idct_idct8 vp8_short_idct8x8_c
+#endif
+extern prototype_idct(vp8_idct_idct8);
+
+#ifndef vp8_idct_idct8_1
+#define vp8_idct_idct8_1 vp8_short_idct8x8_1_c
+#endif
+extern prototype_idct(vp8_idct_idct8_1);
+
+#ifndef vp8_idct_ihaar2
+#define vp8_idct_ihaar2 vp8_short_ihaar2x2_c
+#endif
+extern prototype_idct(vp8_idct_ihaar2);
+
+#ifndef vp8_idct_ihaar2_1
+#define vp8_idct_ihaar2_1 vp8_short_ihaar2x2_1_c
+#endif
+extern prototype_idct(vp8_idct_ihaar2_1);
+
+#ifndef vp8_idct_idct1_scalar_add_8x8
+#define vp8_idct_idct1_scalar_add_8x8 vp8_dc_only_idct_add_8x8_c
+#endif
+extern prototype_idct_scalar_add(vp8_idct_idct1_scalar_add_8x8);
+
+
+
 #ifndef vp8_idct_idct1
 #define vp8_idct_idct1 vp8_short_idct4x4llm_1_c
 #endif
@ -69,6 +97,12 @@ typedef struct

    vp8_second_order_fn_t iwalsh1;
    vp8_second_order_fn_t iwalsh16;
+
+    vp8_idct_fn_t            idct8;
+    vp8_idct_fn_t            idct8_1;
+    vp8_idct_scalar_add_fn_t idct1_scalar_add_8x8;
+    vp8_idct_fn_t ihaar2;
+    vp8_idct_fn_t ihaar2_1;
 } vp8_idct_rtcd_vtable_t;

 #if CONFIG_RUNTIME_CPU_DETECT
--- a/vp8/common/idctllm.c
+++ b/vp8/common/idctllm.c
@ -22,11 +22,15 @@
 * so
 *         x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
 **************************************************************************/
-#include "vpx_config.h"
+#include "vpx_ports/config.h"
+
+
+#include <math.h>

 static const int cospi8sqrt2minus1 = 20091;
 static const int sinpi8sqrt2      = 35468;
 static const int rounding = 0;
+
 void vp8_short_idct4x4llm_c(short *input, short *output, int pitch)
 {
    int i;
@ -76,20 +80,11 @@ void vp8_short_idct4x4llm_c(short *input, short *output, int pitch)
        temp2 = (ip[3] * sinpi8sqrt2 + rounding) >> 16;
        d1 = temp1 + temp2;

-
-#if !CONFIG_EXTEND_QRANGE
-        op[0] = (a1 + d1 + 4) >> 3;
-        op[3] = (a1 - d1 + 4) >> 3;
-
-        op[1] = (b1 + c1 + 4) >> 3;
-        op[2] = (b1 - c1 + 4) >> 3;
-#else
        op[0] = (a1 + d1 + 16) >> 5;
        op[3] = (a1 - d1 + 16) >> 5;

        op[1] = (b1 + c1 + 16) >> 5;
        op[2] = (b1 - c1 + 16) >> 5;
-#endif

        ip += shortpitch;
        op += shortpitch;
@ -102,11 +97,7 @@ void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch)
    int a1;
    short *op = output;
    int shortpitch = pitch >> 1;
-#if !CONFIG_EXTEND_QRANGE
-    a1 = ((input[0] + 4) >> 3);
-#else
    a1 = ((input[0] + 16) >> 5);
-#endif
    for (i = 0; i < 4; i++)
    {
        op[0] = a1;
@ -119,11 +110,7 @@ void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch)

 void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
 {
-#if !CONFIG_EXTEND_QRANGE
-    int a1 = ((input_dc + 4) >> 3);
-#else
    int a1 = ((input_dc + 16) >> 5);
-#endif
    int r, c;

    for (r = 0; r < 4; r++)
@ -185,17 +172,11 @@ void vp8_short_inv_walsh4x4_c(short *input, short *output)
        c2 = a1 - b1;
        d2 = d1 - c1;

-#if !CONFIG_EXTEND_QRANGE
-        op[0] = (a2 + 3) >> 3;
-        op[1] = (b2 + 3) >> 3;
-        op[2] = (c2 + 3) >> 3;
-        op[3] = (d2 + 3) >> 3;
-#else
        op[0] = (a2 + 1) >> 2;
        op[1] = (b2 + 1) >> 2;
        op[2] = (c2 + 1) >> 2;
        op[3] = (d2 + 1) >> 2;
-#endif
+
        ip += 4;
        op += 4;
    }
@ -207,11 +188,7 @@ void vp8_short_inv_walsh4x4_1_c(short *input, short *output)
    int a1;
    short *op = output;

-#if !CONFIG_EXTEND_QRANGE
-    a1 = (input[0] + 3 )>> 3;
-#else
    a1 = (input[0] + 1 )>> 2;
-#endif

    for (i = 0; i < 4; i++)
    {
@ -222,3 +199,212 @@ void vp8_short_inv_walsh4x4_1_c(short *input, short *output)
        op += 4;
    }
 }
+
+
+void vp8_dc_only_idct_add_8x8_c(short input_dc,
+                                unsigned char *pred_ptr,
+                                unsigned char *dst_ptr,
+                                int pitch, int stride)
+{
+    int a1 = ((input_dc + 16) >> 5);
+    int r, c, b;
+    unsigned char *orig_pred = pred_ptr;
+    unsigned char *orig_dst = dst_ptr;
+    for (b = 0; b < 4; b++)
+    {
+        for (r = 0; r < 4; r++)
+        {
+          for (c = 0; c < 4; c++)
+          {
+              int a = a1 + pred_ptr[c] ;
+
+              if (a < 0)
+                 a = 0;
+
+              if (a > 255)
+                 a = 255;
+
+              dst_ptr[c] = (unsigned char) a ;
+         }
+
+         dst_ptr += stride;
+         pred_ptr += pitch;
+       }
+        dst_ptr = orig_dst + (b+1)%2*4 + (b+1)/2*4*stride;
+        pred_ptr = orig_pred + (b+1)%2*4 + (b+1)/2*4*pitch;
+    }
+}
+
+#define W1 2841                 /* 2048*sqrt(2)*cos(1*pi/16) */
+#define W2 2676                 /* 2048*sqrt(2)*cos(2*pi/16) */
+#define W3 2408                 /* 2048*sqrt(2)*cos(3*pi/16) */
+#define W5 1609                 /* 2048*sqrt(2)*cos(5*pi/16) */
+#define W6 1108                 /* 2048*sqrt(2)*cos(6*pi/16) */
+#define W7 565                  /* 2048*sqrt(2)*cos(7*pi/16) */
+
+/* row (horizontal) IDCT
+ *
+ * 7                       pi         1 dst[k] = sum c[l] * src[l] * cos( -- *
+ * ( k + - ) * l ) l=0                      8          2
+ *
+ * where: c[0]    = 128 c[1..7] = 128*sqrt(2) */
+
+static void idctrow (int *blk)
+{
+    int x0, x1, x2, x3, x4, x5, x6, x7, x8;
+    /* shortcut */
+    if (!((x1 = blk[4] << 11) | (x2 = blk[6]) | (x3 = blk[2]) |
+        (x4 = blk[1]) | (x5 = blk[7]) | (x6 = blk[5]) | (x7 = blk[3])))
+    {
+        blk[0] = blk[1] = blk[2] = blk[3] = blk[4]
+        = blk[5] = blk[6] = blk[7] = blk[0] << 3;
+        return;
+    }
+
+    x0 = (blk[0] << 11) + 128;    /* for proper rounding in the fourth stage */
+    /* first stage */
+    x8 = W7 * (x4 + x5);
+    x4 = x8 + (W1 - W7) * x4;
+    x5 = x8 - (W1 + W7) * x5;
+    x8 = W3 * (x6 + x7);
+    x6 = x8 - (W3 - W5) * x6;
+    x7 = x8 - (W3 + W5) * x7;
+
+    /* second stage */
+    x8 = x0 + x1;
+    x0 -= x1;
+    x1 = W6 * (x3 + x2);
+    x2 = x1 - (W2 + W6) * x2;
+    x3 = x1 + (W2 - W6) * x3;
+    x1 = x4 + x6;
+    x4 -= x6;
+    x6 = x5 + x7;
+    x5 -= x7;
+
+    /* third stage */
+    x7 = x8 + x3;
+    x8 -= x3;
+    x3 = x0 + x2;
+    x0 -= x2;
+    x2 = (181 * (x4 + x5) + 128) >> 8;
+    x4 = (181 * (x4 - x5) + 128) >> 8;
+
+    /* fourth stage */
+    blk[0] = (x7 + x1) >> 8;
+    blk[1] = (x3 + x2) >> 8;
+    blk[2] = (x0 + x4) >> 8;
+    blk[3] = (x8 + x6) >> 8;
+    blk[4] = (x8 - x6) >> 8;
+    blk[5] = (x0 - x4) >> 8;
+    blk[6] = (x3 - x2) >> 8;
+    blk[7] = (x7 - x1) >> 8;
+}
+
+/* column (vertical) IDCT
+ *
+ * 7                         pi         1 dst[8*k] = sum c[l] * src[8*l] *
+ * cos( -- * ( k + - ) * l ) l=0                        8          2
+ *
+ * where: c[0]    = 1/1024 c[1..7] = (1/1024)*sqrt(2) */
+static void idctcol (int *blk)
+{
+    int x0, x1, x2, x3, x4, x5, x6, x7, x8;
+
+    /* shortcut */
+    if (!((x1 = (blk[8 * 4] << 8)) | (x2 = blk[8 * 6]) | (x3 = blk[8 * 2]) |
+        (x4 = blk[8 * 1]) | (x5 = blk[8 * 7]) | (x6 = blk[8 * 5]) |
+        (x7 = blk[8 * 3])))
+    {
+        blk[8 * 0] = blk[8 * 1] = blk[8 * 2] = blk[8 * 3]
+                   = blk[8 * 4] = blk[8 * 5] = blk[8 * 6]
+                   = blk[8 * 7] = ((blk[8 * 0] + 32) >>6);
+        return;
+    }
+
+    x0 = (blk[8 * 0] << 8) + 16384;
+
+    /* first stage */
+    x8 = W7 * (x4 + x5) + 4;
+    x4 = (x8 + (W1 - W7) * x4) >> 3;
+    x5 = (x8 - (W1 + W7) * x5) >> 3;
+    x8 = W3 * (x6 + x7) + 4;
+    x6 = (x8 - (W3 - W5) * x6) >> 3;
+    x7 = (x8 - (W3 + W5) * x7) >> 3;
+
+    /* second stage */
+    x8 = x0 + x1;
+    x0 -= x1;
+    x1 = W6 * (x3 + x2) + 4;
+    x2 = (x1 - (W2 + W6) * x2) >> 3;
+    x3 = (x1 + (W2 - W6) * x3) >> 3;
+    x1 = x4 + x6;
+    x4 -= x6;
+    x6 = x5 + x7;
+    x5 -= x7;
+
+    /* third stage */
+    x7 = x8 + x3;
+    x8 -= x3;
+    x3 = x0 + x2;
+    x0 -= x2;
+    x2 = (181 * (x4 + x5) + 128) >> 8;
+    x4 = (181 * (x4 - x5) + 128) >> 8;
+
+    /* fourth stage */
+    blk[8 * 0] = (x7 + x1 ) >> 14;
+    blk[8 * 1] = (x3 + x2 ) >> 14;
+    blk[8 * 2] = (x0 + x4 ) >> 14;
+    blk[8 * 3] = (x8 + x6 ) >> 14;
+    blk[8 * 4] = (x8 - x6 ) >> 14;
+    blk[8 * 5] = (x0 - x4 ) >> 14;
+    blk[8 * 6] = (x3 - x2 ) >> 14;
+    blk[8 * 7] = (x7 - x1 ) >> 14;
+}
+
+#define TX_DIM 8
+void vp8_short_idct8x8_c(short *coefs, short *block, int pitch)
+{
+    int X[TX_DIM*TX_DIM];
+    int i,j;
+    int shortpitch = pitch >> 1;
+
+    for (i = 0; i < TX_DIM; i++)
+    {
+        for (j = 0; j < TX_DIM; j++)
+        {
+            X[i * TX_DIM + j] = (int)(coefs[i * TX_DIM + j]+1
+                + (coefs[i * TX_DIM + j]<0))>>2;
+        }
+    }
+    for (i = 0; i < 8; i++)
+        idctrow (X + 8 * i);
+
+    for (i = 0; i < 8; i++)
+        idctcol (X + i);
+
+    for (i = 0; i < TX_DIM; i++)
+    {
+        for (j = 0; j < TX_DIM; j++)
+        {
+            block[i*shortpitch+j]  = X[i * TX_DIM + j]>>1;
+        }
+    }
+}
+
+
+void vp8_short_ihaar2x2_c(short *input, short *output, int pitch)
+{
+   int i, x;
+   short *ip = input; //0,1, 4, 8
+   short *op = output;
+   for (i = 0; i < 16; i++)
+   {
+       op[i] = 0;
+   }
+
+   op[0] = (ip[0] + ip[1] + ip[4] + ip[8] + 1)>>1;
+   op[1] = (ip[0] - ip[1] + ip[4] - ip[8])>>1;
+   op[4] = (ip[0] + ip[1] - ip[4] - ip[8])>>1;
+   op[8] = (ip[0] - ip[1] - ip[4] + ip[8])>>1;
+}
+
--- a/vp8/common/implicit_segmentation.c
+++ b/vp8/common/implicit_segmentation.c
@ -0,0 +1,277 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp8/common/onyxc_int.h"
+
+#define MAX_REGIONS 24000
+#define NULL 0
+
+#define min_mbs_in_region 3
+
+// this linked list structure holds equivalences for connected
+// component labeling
+struct list_el {
+   int label;
+   int seg_value;
+   int count;
+   struct list_el * next;
+};
+typedef struct list_el item;
+
+// connected colorsegments
+typedef struct
+{
+  int min_x;
+  int min_y;
+  int max_x;
+  int max_y;
+  long long sum_x;
+  long long sum_y;
+  int pixels;
+  int seg_value;
+  int label;
+} segment_info;
+
+
+typedef enum
+{
+  SEGMENT_MODE,
+  SEGMENT_MV,
+  SEGMENT_REFFRAME,
+  SEGMENT_SKIPPED
+} SEGMENT_TYPE;
+
+
+// this merges the two equivalence lists and
+// then makes sure that every label points to the same
+// equivalence list
+void merge ( item *labels, int u, int v )
+{
+  item *a = labels[u].next;
+  item *b = labels[v].next;
+  item c;
+  item *it = &c;
+  int count;
+
+  // check if they are already merged
+  if(u==v || a==b)
+    return;
+
+  count = a->count + b->count;
+
+  // merge 2 sorted linked lists.
+  while ( a != NULL && b != NULL )
+  {
+    if ( a->label < b->label)
+    {
+      it->next = a;
+      a = a->next;
+    }
+    else
+    {
+      it->next = b;
+      b = b->next;
+    }
+
+    it = it->next;
+  }
+
+  if ( a == NULL )
+    it->next = b;
+  else
+    it->next = a;
+
+  it = c.next;
+
+  // make sure every equivalence in the linked list points to this new ll
+  while( it != NULL)
+  {
+    labels[it->label].next = c.next;
+    it=it->next;
+  }
+  c.next->count = count;
+
+}
+
+void segment_via_mode_info( VP8_COMMON *oci, int how)
+{
+    MODE_INFO *mi = oci->mi;
+    int i,j;
+    int mb_index = 0;
+
+    int label=1;
+    int pitch = oci->mb_cols;
+
+    // holds linked list equivalences
+    // the max should probably be allocated at a higher level in oci
+    item equivalences[MAX_REGIONS];
+    int eq_ptr = 0;
+    item labels[MAX_REGIONS];
+    segment_info segments[MAX_REGIONS];
+    int label_count = 1;
+    int labeling[400*300];
+    int *lp = labeling;
+
+    label_count = 1;
+    memset(labels,0,sizeof(labels));
+    memset(segments,0,sizeof(segments));
+
+    /* Go through each macroblock first pass labelling */
+    for (i = 0; i < oci->mb_rows; i++,lp+=pitch)
+    {
+        for (j = 0; j < oci->mb_cols; j++)
+        {
+          // int above seg_value, left seg_value, this seg_value...
+          int a=-1,l=-1,n=-1;
+
+          // above label, left label
+          int al=-1,ll=-1;
+          if(i)
+          {
+            al=lp[j-pitch];
+            a = labels[al].next->seg_value;
+          }
+          if(j)
+          {
+            ll=lp[j-1];
+            l = labels[ll].next->seg_value;
+          }
+
+          // what setting are we going to do the implicit segmentation on
+          switch (how)
+          {
+            case SEGMENT_MODE:
+              n= mi[mb_index].mbmi.mode;
+              break;
+            case SEGMENT_MV:
+              n = mi[mb_index].mbmi.mv.as_int;
+              if(mi[mb_index].mbmi.ref_frame == INTRA_FRAME)
+                 n=-9999999;
+              break;
+            case SEGMENT_REFFRAME:
+              n = mi[mb_index].mbmi.ref_frame;
+              break;
+            case SEGMENT_SKIPPED:
+              n = mi[mb_index].mbmi.mb_skip_coeff;
+              break;
+          }
+
+          // above and left both have the same seg_value
+          if(n==a&&n==l)
+          {
+            // pick the lowest label
+            lp[j] = (al<ll?al:ll);
+            labels[lp[j]].next->count++;
+
+            // merge the above and left equivalencies
+            merge( labels, al, ll );
+          }
+          // this matches above seg_value
+          else if(n==a)
+          {
+              // give it the same label as above
+              lp[j]=al;
+              labels[al].next->count++;
+          }
+          // this matches left seg_value
+          else if(n==l)
+          {
+              // give it the same label as above
+              lp[j]=ll;
+              labels[ll].next->count++;
+          }
+          else
+          {
+              // new label doesn't match either
+              item *e = &labels[label];
+              item *nl = &equivalences[eq_ptr++];
+              lp[j]=label;
+              nl->label = label;
+              nl->next = 0;
+              nl->seg_value = n;
+              nl->count = 1;
+              e->next = nl;
+              label++;
+          }
+          mb_index++;
+      }
+      mb_index++;
+    }
+    lp = labeling;
+
+    // give new labels to regions
+    for(i=1;i<label;i++)
+      if(labels[i].next->count >min_mbs_in_region  &&  labels[labels[i].next->label].label == 0 )
+      {
+        segment_info *cs= &segments[label_count];
+        cs->label = label_count;
+        labels[labels[i].next->label].label = label_count++;
+        labels[labels[i].next->label].seg_value  = labels[i].next->seg_value;
+        cs->seg_value = labels[labels[i].next->label].seg_value;
+        cs->min_x = oci->mb_cols;
+        cs->min_y = oci->mb_rows;
+        cs->max_x = 0;
+        cs->max_y = 0;
+        cs->sum_x = 0;
+        cs->sum_y = 0;
+        cs->pixels= 0;
+
+      }
+    lp = labeling;
+
+    // this is just to gather stats...
+    for(i=0;i<oci->mb_rows;i++,lp+=pitch)
+    {
+      for(j=0;j<oci->mb_cols;j++)
+      {
+         segment_info *cs;
+         int oldlab = labels[lp[j]].next->label;
+         int lab = labels[oldlab].label;
+         lp[j] = lab;
+
+         cs= &segments[lab];
+
+         cs->min_x = (j<cs->min_x?j:cs->min_x);
+         cs->max_x = (j>cs->max_x?j:cs->max_x);
+         cs->min_y = (i<cs->min_y?i:cs->min_y);
+         cs->max_y = (i>cs->max_y?i:cs->max_y);
+         cs->sum_x += j;
+         cs->sum_y += i;
+         cs->pixels ++;
+
+         lp[j] = lab;
+         mb_index++;
+      }
+      mb_index++;
+    }
+
+    {
+      lp = labeling;
+      printf("labelling \n");
+      mb_index = 0;
+      for(i=0;i<oci->mb_rows;i++,lp+=pitch)
+      {
+        for(j=0;j<oci->mb_cols;j++)
+        {
+          printf("%4d",lp[j]);
+        }
+        printf("            ");
+        for(j=0;j<oci->mb_cols;j++,mb_index++)
+        {
+          //printf("%3d",mi[mb_index].mbmi.mode );
+          printf("%4d:%4d",mi[mb_index].mbmi.mv.as_mv.row,mi[mb_index].mbmi.mv.as_mv.col );
+        }
+        printf("\n");
+        ++mb_index;
+      }
+      printf("\n");
+    }
+}
+
--- a/vp8/common/invtrans.c
+++ b/vp8/common/invtrans.c
@ -24,13 +24,23 @@ static void recon_dcblock(MACROBLOCKD *x)
    }

 }
+static void recon_dcblock_8x8(MACROBLOCKD *x)
+{
+    BLOCKD *b = &x->block[24]; //for coeff 0, 2, 8, 10
+    x->block[0].dqcoeff[0] = b->diff[0];
+    x->block[4].dqcoeff[0] = b->diff[1];
+    x->block[8].dqcoeff[0] = b->diff[4];
+    x->block[12].dqcoeff[0] = b->diff[8];
+
+}
+

 void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch)
 {
-    if (b->eob > 1)
-        IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->diff, pitch);
-    else
+    if (b->eob <= 1)
        IDCT_INVOKE(rtcd, idct1)(b->dqcoeff, b->diff, pitch);
+    else
+        IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->diff, pitch);
 }


@ -66,6 +76,7 @@ void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x
    int i;

    if (x->mode_info_context->mbmi.mode != B_PRED &&
+        x->mode_info_context->mbmi.mode != I8X8_PRED &&
        x->mode_info_context->mbmi.mode != SPLITMV)
    {
        /* do 2nd order transform on the dc block */
@ -86,3 +97,77 @@ void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x
    }

 }
+
+
+void vp8_inverse_transform_b_8x8(const vp8_idct_rtcd_vtable_t *rtcd, short *input_dqcoeff, short *output_coeff, int pitch)//pay attention to use when 8x8
+{
+    // int b,i;
+    //if (b->eob > 1)
+        IDCT_INVOKE(rtcd, idct8)(input_dqcoeff, output_coeff, pitch);
+    //else
+        //IDCT_INVOKE(rtcd, idct8_1)(b->dqcoeff, b->diff, pitch);//pitch
+
+}
+
+
+void vp8_inverse_transform_mby_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
+{
+    int i;
+
+    // do 2nd order transform on the dc block
+    IDCT_INVOKE(rtcd, ihaar2)(x->block[24].dqcoeff, x->block[24].diff, 8);
+
+    recon_dcblock_8x8(x); //need to change for 8x8
+    for (i = 0; i < 9; i += 8)
+    {
+        vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 32);
+    }
+    for (i = 2; i < 11; i += 8)
+    {
+        vp8_inverse_transform_b_8x8(rtcd, &x->block[i+2].dqcoeff[0], &x->block[i].diff[0], 32);
+    }
+
+}
+void vp8_inverse_transform_mbuv_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
+{
+    int i;
+
+    for (i = 16; i < 24; i += 4)
+    {
+        vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 16);
+    }
+
+}
+
+
+void vp8_inverse_transform_mb_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
+{
+    int i;
+
+    if (x->mode_info_context->mbmi.mode != B_PRED &&
+        x->mode_info_context->mbmi.mode != SPLITMV)
+    {
+        // do 2nd order transform on the dc block
+
+        IDCT_INVOKE(rtcd, ihaar2)(&x->block[24].dqcoeff[0], x->block[24].diff, 8);//dqcoeff[0]
+        recon_dcblock_8x8(x); //need to change for 8x8
+
+    }
+
+    for (i = 0; i < 9; i += 8)
+    {
+        vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 32);
+    }
+    for (i = 2; i < 11; i += 8)
+    {
+        vp8_inverse_transform_b_8x8(rtcd, &x->block[i+2].dqcoeff[0], &x->block[i].diff[0], 32);
+    }
+
+
+    for (i = 16; i < 24; i += 4)
+    {
+        vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 16);
+    }
+
+}
+
--- a/vp8/common/invtrans.h
+++ b/vp8/common/invtrans.h
@ -12,7 +12,7 @@
 #ifndef __INC_INVTRANS_H
 #define __INC_INVTRANS_H

-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "idct.h"
 #include "blockd.h"
 extern void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch);
@ -20,4 +20,9 @@ extern void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBL
 extern void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
 extern void vp8_inverse_transform_mbuv(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);

+extern void vp8_inverse_transform_b_8x8(const vp8_idct_rtcd_vtable_t *rtcd, short *input_dqcoeff, short *output_coeff, int pitch);
+extern void vp8_inverse_transform_mb_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
+extern void vp8_inverse_transform_mby_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
+extern void vp8_inverse_transform_mbuv_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
+
 #endif
--- a/vp8/common/loopfilter.c
+++ b/vp8/common/loopfilter.c
@ -14,10 +14,14 @@
 #include "onyxc_int.h"
 #include "vpx_mem/vpx_mem.h"

+#include "vp8/common/seg_common.h"
+
 typedef unsigned char uc;

 prototype_loopfilter(vp8_loop_filter_horizontal_edge_c);
 prototype_loopfilter(vp8_loop_filter_vertical_edge_c);
+
+
 prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_c);
 prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);

@ -68,6 +72,14 @@ void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr,
        vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
 }

+void vp8_loop_filter_bh8x8_c(unsigned char *y_ptr, unsigned char *u_ptr,
+                          unsigned char *v_ptr, int y_stride, int uv_stride,
+                          loop_filter_info *lfi)
+{
+    vp8_mbloop_filter_horizontal_edge_c(
+        y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+}
+
 void vp8_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride,
                           const unsigned char *blimit)
 {
@ -92,6 +104,14 @@ void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr,
        vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
 }

+void vp8_loop_filter_bv8x8_c(unsigned char *y_ptr, unsigned char *u_ptr,
+                          unsigned char *v_ptr, int y_stride, int uv_stride,
+                          loop_filter_info *lfi)
+{
+    vp8_mbloop_filter_vertical_edge_c(
+        y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+}
+
 void vp8_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride,
                           const unsigned char *blimit)
 {
@ -133,7 +153,7 @@ static void lf_init_lut(loop_filter_info_n *lfi)
    lfi->mode_lf_lut[H_PRED] = 1;
    lfi->mode_lf_lut[TM_PRED] = 1;
    lfi->mode_lf_lut[B_PRED]  = 0;
-
+    lfi->mode_lf_lut[I8X8_PRED]=0;
    lfi->mode_lf_lut[ZEROMV]  = 1;
    lfi->mode_lf_lut[NEARESTMV] = 2;
    lfi->mode_lf_lut[NEARMV] = 2;
@ -194,7 +214,7 @@ void vp8_loop_filter_init(VP8_COMMON *cm)
 }

 void vp8_loop_filter_frame_init(VP8_COMMON *cm,
-                                MACROBLOCKD *mbd,
+                                MACROBLOCKD *xd,
                                int default_filt_lvl)
 {
    int seg,  /* segment number */
@ -215,22 +235,23 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm,
        int lvl_seg = default_filt_lvl;
        int lvl_ref, lvl_mode;

-        /* Note the baseline filter values for each segment */
-        if (mbd->segmentation_enabled)
+
+        // Set the baseline filter values for each segment
+        if ( segfeature_active( xd, seg, SEG_LVL_ALT_LF ) )
        {
            /* Abs value */
-            if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
+            if (xd->mb_segment_abs_delta == SEGMENT_ABSDATA)
            {
-                lvl_seg = mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
+                lvl_seg = get_segdata( xd, seg, SEG_LVL_ALT_LF );
            }
            else  /* Delta Value */
            {
-                lvl_seg += mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
+                lvl_seg += get_segdata( xd, seg, SEG_LVL_ALT_LF );;
                lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0;
            }
        }

-        if (!mbd->mode_ref_lf_delta_enabled)
+        if (!xd->mode_ref_lf_delta_enabled)
        {
            /* we could get rid of this if we assume that deltas are set to
             * zero when not in use; encoder always uses deltas
@ -245,12 +266,12 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm,
        ref = INTRA_FRAME;

        /* Apply delta for reference frame */
-        lvl_ref += mbd->ref_lf_deltas[ref];
+        lvl_ref += xd->ref_lf_deltas[ref];

        /* Apply delta for Intra modes */
        mode = 0; /* B_PRED */
        /* Only the split mode BPRED has a further special case */
-        lvl_mode = lvl_ref +  mbd->mode_lf_deltas[mode];
+        lvl_mode = lvl_ref +  xd->mode_lf_deltas[mode];
        lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; /* clamp */

        lfi->lvl[seg][ref][mode] = lvl_mode;
@ -265,12 +286,12 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm,
            int lvl_ref = lvl_seg;

            /* Apply delta for reference frame */
-            lvl_ref += mbd->ref_lf_deltas[ref];
+            lvl_ref += xd->ref_lf_deltas[ref];

            /* Apply delta for Inter modes */
            for (mode = 1; mode < 4; mode++)
            {
-                lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode];
+                lvl_mode = lvl_ref + xd->mode_lf_deltas[mode];
                lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; /* clamp */

                lfi->lvl[seg][ref][mode] = lvl_mode;
@ -282,7 +303,7 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm,
 void vp8_loop_filter_frame
 (
    VP8_COMMON *cm,
-    MACROBLOCKD *mbd
+    MACROBLOCKD *xd
 )
 {
    YV12_BUFFER_CONFIG *post = cm->frame_to_show;
@ -302,7 +323,7 @@ void vp8_loop_filter_frame
    const MODE_INFO *mode_info_context = cm->mi;

    /* Initialize the loop filter for this frame. */
-    vp8_loop_filter_frame_init(cm, mbd, cm->filter_level);
+    vp8_loop_filter_frame_init(cm, xd, cm->filter_level);

    /* Set up the buffer pointers */
    y_ptr = post->y_buffer;
@ -315,13 +336,14 @@ void vp8_loop_filter_frame
        for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
        {
            int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
+                            mode_info_context->mbmi.mode != I8X8_PRED &&
                            mode_info_context->mbmi.mode != SPLITMV &&
                            mode_info_context->mbmi.mb_skip_coeff);

            const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
            const int seg = mode_info_context->mbmi.segment_id;
            const int ref_frame = mode_info_context->mbmi.ref_frame;
-
+            int tx_type = mode_info_context->mbmi.txfm_size;
            filter_level = lfi_n->lvl[seg][ref_frame][mode_index];

            if (filter_level)
@ -335,21 +357,34 @@ void vp8_loop_filter_frame
                    lfi.hev_thr = lfi_n->hev_thr[hev_index];

                    if (mb_col > 0)
-                        LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v)
+                        vp8_loop_filter_mbv_c
                        (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);

                    if (!skip_lf)
-                        LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v)
-                        (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
+                    {
+                        if(tx_type == TX_8X8)
+                            vp8_loop_filter_bv8x8_c
+                            (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
+                        else
+                            LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v)
+                            (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
+
+                    }

                    /* don't apply across umv border */
                    if (mb_row > 0)
-                        LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h)
+                        vp8_loop_filter_mbh_c
                        (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);

                    if (!skip_lf)
-                        LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h)
-                        (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
+                    {
+                        if(tx_type == TX_8X8)
+                            vp8_loop_filter_bh8x8_c
+                            (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
+                        else
+                            LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h)
+                            (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
+                    }
                }
                else
                {
@ -390,7 +425,7 @@ void vp8_loop_filter_frame
 void vp8_loop_filter_frame_yonly
 (
    VP8_COMMON *cm,
-    MACROBLOCKD *mbd,
+    MACROBLOCKD *xd,
    int default_filt_lvl
 )
 {
@ -415,7 +450,7 @@ void vp8_loop_filter_frame_yonly
 #endif

    /* Initialize the loop filter for this frame. */
-    vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl);
+    vp8_loop_filter_frame_init( cm, xd, default_filt_lvl);

    /* Set up the buffer pointers */
    y_ptr = post->y_buffer;
@ -426,12 +461,14 @@ void vp8_loop_filter_frame_yonly
        for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
        {
            int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
+                            mode_info_context->mbmi.mode != I8X8_PRED &&
                            mode_info_context->mbmi.mode != SPLITMV &&
                            mode_info_context->mbmi.mb_skip_coeff);

            const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
            const int seg = mode_info_context->mbmi.segment_id;
            const int ref_frame = mode_info_context->mbmi.ref_frame;
+            int tx_type = mode_info_context->mbmi.txfm_size;

            filter_level = lfi_n->lvl[seg][ref_frame][mode_index];

@ -446,21 +483,33 @@ void vp8_loop_filter_frame_yonly
                    lfi.hev_thr = lfi_n->hev_thr[hev_index];

                    if (mb_col > 0)
-                        LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v)
+                        vp8_loop_filter_mbv_c
                        (y_ptr, 0, 0, post->y_stride, 0, &lfi);

                    if (!skip_lf)
-                        LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v)
-                        (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+                    {
+                        if(tx_type == TX_8X8)
+                            vp8_loop_filter_bv8x8_c
+                            (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+                        else
+                            LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v)
+                            (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+                    }

                    /* don't apply across umv border */
                    if (mb_row > 0)
-                        LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h)
+                        vp8_loop_filter_mbh_c
                        (y_ptr, 0, 0, post->y_stride, 0, &lfi);

                    if (!skip_lf)
-                        LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h)
-                        (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+                    {
+                        if(tx_type == TX_8X8)
+                            vp8_loop_filter_bh8x8_c
+                            (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+                        else
+                            LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h)
+                            (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+                    }
                }
                else
                {
@ -493,11 +542,134 @@ void vp8_loop_filter_frame_yonly
    }

 }
+#if CONFIG_FEATUREUPDATES
+// TODO: Multiple copies of loop filtering code should be pruned and
+// cut down.   This just adds yet another so that I can do an if
+// on segment.
+void vp8_loop_filter_frame_segment(VP8_COMMON *cm, MACROBLOCKD *xd,
+                                   int default_filt_lvl, int segment)
+{
+    YV12_BUFFER_CONFIG *post = cm->frame_to_show;
+
+    unsigned char *y_ptr;
+    int mb_row;
+    int mb_col;
+
+    loop_filter_info_n *lfi_n = &cm->lf_info;
+    loop_filter_info lfi;
+
+    int filter_level;
+    FRAME_TYPE frame_type = cm->frame_type;
+
+    /* Point at base of Mb MODE_INFO list */
+    const MODE_INFO *mode_info_context = cm->mi;
+
+#if 0
+    if(default_filt_lvl == 0) /* no filter applied */
+    return;
+#endif
+
+    /* Initialize the loop filter for this frame. */
+    vp8_loop_filter_frame_init(cm, xd, default_filt_lvl);
+
+    /* Set up the buffer pointers */
+    y_ptr = post->y_buffer;
+
+    /* vp8_filter each macro block */
+    for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
+    {
+        for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
+        {
+            int skip_lf = (mode_info_context->mbmi.mode != B_PRED
+                           && mode_info_context->mbmi.mode != I8X8_PRED
+                           && mode_info_context->mbmi.mode != SPLITMV
+                           && mode_info_context->mbmi.mb_skip_coeff);
+
+            const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi
+                    .mode];
+            const int seg = mode_info_context->mbmi.segment_id;
+            const int ref_frame = mode_info_context->mbmi.ref_frame;
+
+            filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
+
+            // check if this mb has filtering applied
+            //    and then whether it is the right segment or
+            //    if not whether the passed in segment is 0 and this
+            //    segment has no alt lf
+
+            // TODO: Make this work for when segment 0 has the alt lv enabled
+            if (filter_level
+                && (seg == segment
+                    || (!segfeature_active(xd, seg, SEG_LVL_ALT_LF)
+                        && segment == 0)))
+            {
+                if (cm->filter_type == NORMAL_LOOPFILTER)
+                {
+                    const int hev_index =
+                            lfi_n->hev_thr_lut[frame_type][filter_level];
+                    lfi.mblim = lfi_n->mblim[filter_level];
+                    lfi.blim = lfi_n->blim[filter_level];
+                    lfi.lim = lfi_n->lim[filter_level];
+                    lfi.hev_thr = lfi_n->hev_thr[hev_index];
+
+                    if (mb_col > 0)
+                        vp8_loop_filter_mbv_c(y_ptr, 0, 0, post->y_stride, 0,
+                                              &lfi);
+
+                    if (!skip_lf)
+                        LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v)(
+                                y_ptr, 0, 0, post->y_stride, 0, &lfi);
+
+                    /* don't apply across umv border */
+                    if (mb_row > 0)
+                        vp8_loop_filter_mbh_c(y_ptr, 0, 0, post->y_stride, 0,
+                                              &lfi);
+
+                    if (!skip_lf)
+                        LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h)(
+                                y_ptr, 0, 0, post->y_stride, 0, &lfi);
+                }
+                else
+                {
+                    if (mb_col > 0)
+                        LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v)(
+                                y_ptr, post->y_stride,
+                                lfi_n->mblim[filter_level]);
+
+                    if (!skip_lf)
+                        LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v)(
+                                y_ptr, post->y_stride,
+                                lfi_n->blim[filter_level]);
+
+                    /* don't apply across umv border */
+                    if (mb_row > 0)
+                        LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h)(
+                                y_ptr, post->y_stride,
+                                lfi_n->mblim[filter_level]);
+
+                    if (!skip_lf)
+                        LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h)(
+                                y_ptr, post->y_stride,
+                                lfi_n->blim[filter_level]);
+                }
+            }
+
+            y_ptr += 16;
+            mode_info_context++; /* step to next MB */
+
+        }
+
+        y_ptr += post->y_stride * 16 - post->y_width;
+        mode_info_context++; /* Skip border mb */
+    }
+
+}
+#endif

 void vp8_loop_filter_partial_frame
 (
    VP8_COMMON *cm,
-    MACROBLOCKD *mbd,
+    MACROBLOCKD *xd,
    int default_filt_lvl
 )
 {
@ -514,7 +686,7 @@ void vp8_loop_filter_partial_frame
    loop_filter_info lfi;

    int filter_level;
-    int alt_flt_enabled = mbd->segmentation_enabled;
+    int alt_flt_enabled = xd->segmentation_enabled;
    FRAME_TYPE frame_type = cm->frame_type;

    const MODE_INFO *mode_info_context;
@ -539,15 +711,15 @@ void vp8_loop_filter_partial_frame
    {
        for (i = 0; i < MAX_MB_SEGMENTS; i++)
        {    /* Abs value */
-            if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
+            if (xd->mb_segment_abs_delta == SEGMENT_ABSDATA)
            {
-                lvl_seg[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
+                lvl_seg[i] = get_segdata( xd, i, SEG_LVL_ALT_LF );
            }
            /* Delta Value */
            else
            {
-                lvl_seg[i] = default_filt_lvl
-                        + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
+                lvl_seg[i] = default_filt_lvl +
+                             get_segdata( xd, i, SEG_LVL_ALT_LF );
                lvl_seg[i] = (lvl_seg[i] > 0) ?
                        ((lvl_seg[i] > 63) ? 63: lvl_seg[i]) : 0;
            }
@ -563,6 +735,7 @@ void vp8_loop_filter_partial_frame
        for (mb_col = 0; mb_col < mb_cols; mb_col++)
        {
            int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
+                           mode_info_context->mbmi.mode != I8X8_PRED &&
                           mode_info_context->mbmi.mode != SPLITMV &&
                           mode_info_context->mbmi.mb_skip_coeff);

--- a/vp8/common/loopfilter.h
+++ b/vp8/common/loopfilter.h
@ -14,6 +14,7 @@

 #include "vpx_ports/mem.h"
 #include "vpx_config.h"
+#include "blockd.h"

 #define MAX_LOOP_FILTER 63

@ -40,7 +41,7 @@ typedef struct
    DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]);
    unsigned char lvl[4][4][4];
    unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1];
-    unsigned char mode_lf_lut[10];
+    unsigned char mode_lf_lut[MB_MODE_COUNT];
 } loop_filter_info_n;

 typedef struct
--- a/vp8/common/loopfilter_filters.c
+++ b/vp8/common/loopfilter_filters.c
@ -10,6 +10,7 @@


 #include <stdlib.h>
+#include "vpx_config.h"
 #include "loopfilter.h"
 #include "onyxc_int.h"

@ -148,7 +149,8 @@ void vp8_loop_filter_vertical_edge_c
    do
    {
        mask = vp8_filter_mask(limit[0], blimit[0],
-                               s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
+                               s[-4], s[-3], s[-2], s[-1],
+                               s[0], s[1], s[2], s[3]);

        hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);

@ -158,62 +160,95 @@ void vp8_loop_filter_vertical_edge_c
    }
    while (++i < count * 8);
 }
-
-static __inline void vp8_mbfilter(signed char mask, uc hev,
-                           uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
+static __inline signed char vp8_flatmask(uc thresh,
+                                         uc p4, uc p3, uc p2, uc p1, uc p0,
+                                         uc q0, uc q1, uc q2, uc q3, uc q4)
 {
-    signed char s, u;
-    signed char vp8_filter, Filter1, Filter2;
-    signed char ps2 = (signed char) * op2 ^ 0x80;
-    signed char ps1 = (signed char) * op1 ^ 0x80;
-    signed char ps0 = (signed char) * op0 ^ 0x80;
-    signed char qs0 = (signed char) * oq0 ^ 0x80;
-    signed char qs1 = (signed char) * oq1 ^ 0x80;
-    signed char qs2 = (signed char) * oq2 ^ 0x80;
-
-    /* add outer taps if we have high edge variance */
-    vp8_filter = vp8_signed_char_clamp(ps1 - qs1);
-    vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0));
-    vp8_filter &= mask;
-
-    Filter2 = vp8_filter;
-    Filter2 &= hev;
-
-    /* save bottom 3 bits so that we round one side +4 and the other +3 */
-    Filter1 = vp8_signed_char_clamp(Filter2 + 4);
-    Filter2 = vp8_signed_char_clamp(Filter2 + 3);
-    Filter1 >>= 3;
-    Filter2 >>= 3;
-    qs0 = vp8_signed_char_clamp(qs0 - Filter1);
-    ps0 = vp8_signed_char_clamp(ps0 + Filter2);
-
-
-    /* only apply wider filter if not high edge variance */
-    vp8_filter &= ~hev;
-    Filter2 = vp8_filter;
-
-    /* roughly 3/7th difference across boundary */
-    u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
-    s = vp8_signed_char_clamp(qs0 - u);
-    *oq0 = s ^ 0x80;
-    s = vp8_signed_char_clamp(ps0 + u);
-    *op0 = s ^ 0x80;
-
-    /* roughly 2/7th difference across boundary */
-    u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
-    s = vp8_signed_char_clamp(qs1 - u);
-    *oq1 = s ^ 0x80;
-    s = vp8_signed_char_clamp(ps1 + u);
-    *op1 = s ^ 0x80;
-
-    /* roughly 1/7th difference across boundary */
-    u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
-    s = vp8_signed_char_clamp(qs2 - u);
-    *oq2 = s ^ 0x80;
-    s = vp8_signed_char_clamp(ps2 + u);
-    *op2 = s ^ 0x80;
+    signed char flat = 0;
+    flat |= (abs(p1 - p0) > 1) * -1;
+    flat |= (abs(q1 - q0) > 1) * -1;
+    flat |= (abs(p0 - p2) > 1) * -1;
+    flat |= (abs(q0 - q2) > 1) * -1;
+    flat |= (abs(p3 - p0) > 1) * -1;
+    flat |= (abs(q3 - q0) > 1) * -1;
+    flat |= (abs(p4 - p0) > 1) * -1;
+    flat |= (abs(q4 - q0) > 1) * -1;
+    flat = ~flat;
+    return flat;
 }

+static __inline void vp8_mbfilter(signed char mask, uc hev, uc flat,
+                                  uc *op4, uc *op3, uc *op2, uc *op1, uc *op0,
+                                  uc *oq0, uc *oq1, uc *oq2, uc *oq3, uc *oq4)
+{
+    /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */
+    if(flat && mask)
+    {
+        unsigned char p0, q0;
+        unsigned char p1, q1;
+        unsigned char p2, q2;
+        unsigned char p3, q3;
+        unsigned char p4, q4;
+
+        p4 = *op4;
+        p3 = *op3;
+        p2 = *op2;
+        p1 = *op1;
+        p0 = *op0;
+        q0 = *oq0;
+        q1 = *oq1;
+        q2 = *oq2;
+        q3 = *oq3;
+        q4 = *oq4;
+
+        *op2 = ( p4 + p4 + p3 + p2 + p2 + p1 + p0 + q0 + 4)>>3;
+        *op1 = ( p4 + p3 + p2 + p1 + p1 + p0 + q0 + q1 + 4)>>3;
+        *op0 = ( p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2 + 4)>>3;
+        *oq0 = ( p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3 + 4)>>3;
+        *oq1 = ( p1 + p0 + q0 + q1 + q1 + q2 + q3 + q4 + 4)>>3;
+        *oq2 = ( p0 + q0 + q1 + q2 + q2 + q3 + q4 + q4 + 4)>>3;
+    }
+    else
+    {
+        signed char ps0, qs0;
+        signed char ps1, qs1;
+        signed char vp8_filter, Filter1, Filter2;
+        signed char u;
+
+        ps1 = (signed char) * op1 ^ 0x80;
+        ps0 = (signed char) * op0 ^ 0x80;
+        qs0 = (signed char) * oq0 ^ 0x80;
+        qs1 = (signed char) * oq1 ^ 0x80;
+
+        /* add outer taps if we have high edge variance */
+        vp8_filter = vp8_signed_char_clamp(ps1 - qs1);
+        vp8_filter &= hev;
+
+        /* inner taps */
+        vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0));
+        vp8_filter &= mask;
+
+        Filter1 = vp8_signed_char_clamp(vp8_filter + 4);
+        Filter2 = vp8_signed_char_clamp(vp8_filter + 3);
+        Filter1 >>= 3;
+        Filter2 >>= 3;
+        u = vp8_signed_char_clamp(qs0 - Filter1);
+        *oq0 = u ^ 0x80;
+        u = vp8_signed_char_clamp(ps0 + Filter2);
+        *op0 = u ^ 0x80;
+        vp8_filter = Filter1;
+
+        /* outer tap adjustments */
+        vp8_filter += 1;
+        vp8_filter >>= 1;
+        vp8_filter &= ~hev;
+
+        u = vp8_signed_char_clamp(qs1 - vp8_filter);
+        *oq1 = u ^ 0x80;
+        u = vp8_signed_char_clamp(ps1 + vp8_filter);
+        *op1 = u ^ 0x80;
+    }
+}
 void vp8_mbloop_filter_horizontal_edge_c
 (
    unsigned char *s,
@ -226,6 +261,7 @@ void vp8_mbloop_filter_horizontal_edge_c
 {
    signed char hev = 0; /* high edge variance */
    signed char mask = 0;
+    signed char flat = 0;
    int i = 0;

    /* loop filter designed to work using chars so that we can make maximum use
@ -236,11 +272,16 @@ void vp8_mbloop_filter_horizontal_edge_c

        mask = vp8_filter_mask(limit[0], blimit[0],
                               s[-4*p], s[-3*p], s[-2*p], s[-1*p],
-                               s[0*p], s[1*p], s[2*p], s[3*p]);
+                               s[ 0*p], s[ 1*p], s[ 2*p], s[ 3*p]);

        hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);

-        vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p);
+        flat = vp8_flatmask(thresh[0],
+                            s[-5*p], s[-4*p], s[-3*p], s[-2*p], s[-1*p],
+                            s[ 0*p], s[ 1*p], s[ 2*p], s[ 3*p], s[ 4*p]);
+        vp8_mbfilter(mask, hev, flat,
+                     s - 5*p, s - 4*p, s- 3*p, s - 2*p, s - 1*p,
+                     s,       s + 1*p, s+ 2*p, s + 3*p, s + 4*p );

        ++s;
    }
@ -261,18 +302,23 @@ void vp8_mbloop_filter_vertical_edge_c
 {
    signed char hev = 0; /* high edge variance */
    signed char mask = 0;
+    signed char flat = 0;
    int i = 0;

    do
    {

        mask = vp8_filter_mask(limit[0], blimit[0],
-                               s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
+                               s[-4], s[-3], s[-2], s[-1],
+                               s[0], s[1], s[2], s[3]);

        hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
-
-        vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
-
+        flat = vp8_flatmask(thresh[0],
+                            s[-5],s[-4],s[-3],s[-2],s[-1],
+                            s[ 0],s[ 1],s[ 2],s[ 3],s[ 4]);
+        vp8_mbfilter(mask, hev, flat,
+                            s - 5, s - 4, s - 3, s - 2, s - 1,
+                            s,     s + 1, s + 2, s + 3, s + 4);
        s += p;
    }
    while (++i < count * 8);
@ -280,7 +326,9 @@ void vp8_mbloop_filter_vertical_edge_c
 }

 /* should we apply any filter at all ( 11111111 yes, 00000000 no) */
-static __inline signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1)
+static __inline signed char vp8_simple_filter_mask(uc blimit,
+                                                   uc p1, uc p0,
+                                                   uc q0, uc q1)
 {
 /* Why does this cause problems for win32?
 * error C2143: syntax error : missing ';' before 'type'
@ -290,7 +338,9 @@ static __inline signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q
    return mask;
 }

-static __inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
+static __inline void vp8_simple_filter(signed char mask,
+                                       uc *op1, uc *op0,
+                                       uc *oq0, uc *oq1)
 {
    signed char vp8_filter, Filter1, Filter2;
    signed char p1 = (signed char) * op1 ^ 0x80;
@ -327,8 +377,12 @@ void vp8_loop_filter_simple_horizontal_edge_c

    do
    {
-        mask = vp8_simple_filter_mask(blimit[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
-        vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
+        mask = vp8_simple_filter_mask(blimit[0],
+                                      s[-2*p], s[-1*p],
+                                      s[0*p], s[1*p]);
+        vp8_simple_filter(mask,
+                          s - 2 * p, s - 1 * p,
+                          s, s + 1 * p);
        ++s;
    }
    while (++i < 16);
--- a/vp8/common/maskingmv.c
+++ b/vp8/common/maskingmv.c
@ -0,0 +1,855 @@
+/*
+ ============================================================================
+ Name        : maskingmv.c
+ Author      : jimbankoski
+ Version     :
+ Copyright   : Your copyright notice
+ Description : Hello World in C, Ansi-style
+ ============================================================================
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+extern unsigned int vp8_sad16x16_sse3(
+    unsigned char *src_ptr,
+    int  src_stride,
+    unsigned char *ref_ptr,
+    int  ref_stride,
+    int  max_err);
+
+extern void vp8_sad16x16x3_sse3(
+    unsigned char *src_ptr,
+    int  src_stride,
+    unsigned char *ref_ptr,
+    int  ref_stride,
+    int  *results);
+
+extern int vp8_growmaskmb_sse3(
+    unsigned char *om,
+    unsigned char *nm);
+
+extern void vp8_makemask_sse3(
+    unsigned char *y,
+    unsigned char *u,
+    unsigned char *v,
+    unsigned char *ym,
+    int yp,
+    int uvp,
+    int ys,
+    int us,
+    int vs,
+    int yt,
+    int ut,
+    int vt);
+
+unsigned int vp8_sad16x16_unmasked_wmt(
+    unsigned char *src_ptr,
+    int  src_stride,
+    unsigned char *ref_ptr,
+    int  ref_stride,
+    unsigned char *mask);
+
+unsigned int vp8_sad16x16_masked_wmt(
+    unsigned char *src_ptr,
+    int  src_stride,
+    unsigned char *ref_ptr,
+    int  ref_stride,
+    unsigned char *mask);
+
+unsigned int vp8_masked_predictor_wmt(
+    unsigned char *masked,
+    unsigned char *unmasked,
+    int  src_stride,
+    unsigned char *dst_ptr,
+    int  dst_stride,
+    unsigned char *mask);
+unsigned int vp8_masked_predictor_uv_wmt(
+    unsigned char *masked,
+    unsigned char *unmasked,
+    int  src_stride,
+    unsigned char *dst_ptr,
+    int  dst_stride,
+    unsigned char *mask);
+unsigned int vp8_uv_from_y_mask(
+    unsigned char *ymask,
+    unsigned char *uvmask);
+int yp=16;
+unsigned char sxy[]=
+{
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90
+};
+
+unsigned char sts[]=
+{
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+};
+unsigned char str[]=
+{
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+};
+
+unsigned char y[]=
+{
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40
+};
+int uvp=8;
+unsigned char u[]=
+{
+90,80,70,70,90,90,90,17,
+90,80,70,70,90,90,90,17,
+84,70,70,90,90,90,17,17,
+84,70,70,90,90,90,17,17,
+80,70,70,90,90,90,17,17,
+90,80,70,70,90,90,90,17,
+90,80,70,70,90,90,90,17,
+90,80,70,70,90,90,90,17
+};
+
+unsigned char v[]=
+{
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80
+};
+
+unsigned char ym[256];
+unsigned char uvm[64];
+typedef struct
+{
+    unsigned char y;
+    unsigned char yt;
+    unsigned char u;
+    unsigned char ut;
+    unsigned char v;
+    unsigned char vt;
+    unsigned char use;
+} COLOR_SEG_ELEMENT;
+
+/*
+COLOR_SEG_ELEMENT segmentation[]=
+{
+    { 60,4,80,17,80,10, 1},
+    { 40,4,15,10,80,10, 1},
+};
+*/
+
+COLOR_SEG_ELEMENT segmentation[]=
+{
+    { 79,44,92,44, 237,60, 1},
+};
+
+unsigned char pixel_mask(unsigned char y,unsigned char u,unsigned char v,
+                COLOR_SEG_ELEMENT sgm[],
+                int c)
+{
+    COLOR_SEG_ELEMENT *s=sgm;
+    unsigned char m =0;
+    int i;
+    for(i=0;i<c;i++,s++)
+        m |= ( abs(y-s->y)< s->yt &&
+               abs(u-s->u)< s->ut &&
+               abs(v-s->v)< s->vt ? 255 : 0 );
+
+    return m;
+}
+int neighbors[256][8];
+int makeneighbors(void)
+{
+    int i,j;
+    for(i=0;i<256;i++)
+    {
+        int r=(i>>4),c=(i&15);
+        int ni=0;
+        for(j=0;j<8;j++)
+            neighbors[i][j]=i;
+        for(j=0;j<256;j++)
+        {
+            int nr=(j>>4),nc=(j&15);
+            if(abs(nr-r)<2&&abs(nc-c)<2)
+              neighbors[i][ni++]=j;
+        }
+    }
+    return 0;
+}
+void grow_ymask(unsigned char *ym)
+{
+    unsigned char nym[256];
+    int i,j;
+
+    for(i=0;i<256;i++)
+    {
+        nym[i]=ym[i];
+        for(j=0;j<8;j++)
+        {
+            nym[i]|=ym[neighbors[i][j]];
+        }
+    }
+    for(i=0;i<256;i++)
+        ym[i]=nym[i];
+}
+void make_mb_mask(unsigned char *y, unsigned char *u, unsigned char *v,
+                  unsigned char *ym, unsigned char *uvm,
+                  int yp, int uvp,
+                  COLOR_SEG_ELEMENT sgm[],
+                  int count)
+{
+    int r,c;
+    unsigned char *oym = ym;
+
+    memset(ym,20,256);
+    for(r=0;r<8;r++,uvm+=8,u+=uvp,v+=uvp,y+=(yp+yp),ym+=32)
+        for(c=0;c<8;c++)
+        {
+            int y1=y[c<<1];
+            int u1=u[c];
+            int v1=v[c];
+            int m = pixel_mask(y1,u1,v1,sgm,count);
+            uvm[c] = m;
+            ym[c<<1] = uvm[c];// = pixel_mask(y[c<<1],u[c],v[c],sgm,count);
+            ym[(c<<1)+1] = pixel_mask(y[1+(c<<1)],u[c],v[c],sgm,count);
+            ym[(c<<1)+16] = pixel_mask(y[yp+(c<<1)],u[c],v[c],sgm,count);
+            ym[(c<<1)+17] = pixel_mask(y[1+yp+(c<<1)],u[c],v[c],sgm,count);
+        }
+    grow_ymask(oym);
+}
+
+int masked_sad(unsigned char *src, int p, unsigned char *dst, int dp,
+               unsigned char *ym )
+{
+    int i,j;
+    unsigned sad = 0;
+    for(i=0;i<16;i++,src+=p,dst+=dp,ym+=16)
+        for(j=0;j<16;j++)
+            if(ym[j])
+                sad+= abs(src[j]-dst[j]);
+
+    return sad;
+}
+
+int compare_masks(unsigned char *sym, unsigned char *ym)
+{
+    int i,j;
+    unsigned sad = 0;
+    for(i=0;i<16;i++,sym += 16,ym+=16)
+        for(j=0;j<16;j++)
+            sad+= (sym[j]!=ym[j]?1:0);
+
+    return sad;
+}
+int unmasked_sad(unsigned char *src, int p, unsigned char *dst, int dp,
+               unsigned char *ym)
+{
+    int i,j;
+    unsigned sad = 0;
+    for(i=0;i<16;i++,src+=p,dst+=dp,ym+=16)
+        for(j=0;j<16;j++)
+            if(!ym[j])
+                sad+= abs(src[j]-dst[j]);
+
+    return sad;
+}
+int masked_motion_search( unsigned char *y, unsigned char *u, unsigned char *v,
+                          int yp, int uvp,
+                          unsigned char *dy, unsigned char *du, unsigned char *dv,
+                          int dyp, int duvp,
+                          COLOR_SEG_ELEMENT sgm[],
+                          int count,
+                          int *mi,
+                          int *mj,
+                          int *ui,
+                          int *uj,
+                          int *wm)
+{
+    int i,j;
+
+    unsigned char ym[256];
+    unsigned char uvm[64];
+    unsigned char dym[256];
+    unsigned char duvm[64];
+    unsigned int e = 0 ;
+    int beste=256;
+    int bmi=-32,bmj=-32;
+    int bui=-32,buj=-32;
+    int beste1=256;
+    int bmi1=-32,bmj1=-32;
+    int bui1=-32,buj1=-32;
+    int obeste;
+
+    // first try finding best mask and then unmasked
+    beste = 0xffffffff;
+
+    // find best unmasked mv
+    for(i=-32;i<32;i++)
+    {
+        unsigned char *dyz = i*dyp + dy;
+        unsigned char *duz = i/2*duvp + du;
+        unsigned char *dvz = i/2*duvp + dv;
+        for(j=-32;j<32;j++)
+        {
+            // 0,0  masked destination
+            make_mb_mask(dyz+j,duz+j/2, dvz+j/2, dym, duvm, dyp, duvp,sgm,count);
+
+            e = unmasked_sad(y, yp, dyz+j, dyp, dym );
+
+            if(e<beste)
+            {
+                bui=i;
+                buj=j;
+                beste=e;
+            }
+        }
+    }
+    //bui=0;buj=0;
+    // best mv masked destination
+    make_mb_mask(dy+bui*dyp+buj,du+bui/2*duvp+buj/2, dv+bui/2*duvp+buj/2,
+                 dym, duvm, dyp, duvp,sgm,count);
+
+    obeste = beste;
+    beste = 0xffffffff;
+
+    // find best masked
+    for(i=-32;i<32;i++)
+    {
+        unsigned char *dyz = i*dyp + dy;
+        for(j=-32;j<32;j++)
+        {
+            e = masked_sad(y, yp, dyz+j, dyp, dym );
+
+            if(e<beste)
+            {
+                bmi=i;
+                bmj=j;
+                beste=e;
+            }
+        }
+    }
+    beste1=beste+obeste;
+    bmi1=bmi;bmj1=bmj;
+    bui1=bui;buj1=buj;
+
+    beste = 0xffffffff;
+    // source mask
+    make_mb_mask(y,u, v, ym, uvm, yp, uvp,sgm,count);
+
+    // find best mask
+    for(i=-32;i<32;i++)
+    {
+        unsigned char *dyz = i*dyp + dy;
+        unsigned char *duz = i/2*duvp + du;
+        unsigned char *dvz = i/2*duvp + dv;
+        for(j=-32;j<32;j++)
+        {
+            // 0,0  masked destination
+            make_mb_mask(dyz+j,duz+j/2, dvz+j/2, dym, duvm, dyp, duvp,sgm,count);
+
+            e = compare_masks(ym, dym);
+
+            if(e<beste)
+            {
+                bmi=i;
+                bmj=j;
+                beste=e;
+            }
+        }
+    }
+
+
+    // best mv masked destination
+    make_mb_mask(dy+bmi*dyp+bmj,du+bmi/2*duvp+bmj/2, dv+bmi/2*duvp+bmj/2,
+                 dym, duvm, dyp, duvp,sgm,count);
+
+    obeste = masked_sad(y, yp, dy+bmi*dyp+bmj, dyp, dym );
+
+    beste = 0xffffffff;
+
+    // find best unmasked mv
+    for(i=-32;i<32;i++)
+    {
+        unsigned char *dyz = i*dyp + dy;
+        for(j=-32;j<32;j++)
+        {
+            e = unmasked_sad(y, yp, dyz+j, dyp, dym );
+
+            if(e<beste)
+            {
+                bui=i;
+                buj=j;
+                beste=e;
+            }
+        }
+    }
+    beste += obeste;
+
+
+    if(beste<beste1)
+    {
+        *mi = bmi;
+        *mj = bmj;
+        *ui = bui;
+        *uj = buj;
+        *wm = 1;
+    }
+    else
+    {
+        *mi = bmi1;
+        *mj = bmj1;
+        *ui = bui1;
+        *uj = buj1;
+        *wm = 0;
+
+    }
+    return 0;
+}
+
+int predict(unsigned char *src, int p, unsigned char *dst, int dp,
+            unsigned char *ym, unsigned char *prd )
+{
+    int i,j;
+    for(i=0;i<16;i++,src+=p,dst+=dp,ym+=16, prd+=16)
+        for(j=0;j<16;j++)
+            prd[j]=(ym[j] ? src[j]:dst[j]);
+    return 0;
+}
+
+int fast_masked_motion_search( unsigned char *y, unsigned char *u, unsigned char *v,
+                          int yp, int uvp,
+                          unsigned char *dy, unsigned char *du, unsigned char *dv,
+                          int dyp, int duvp,
+                          COLOR_SEG_ELEMENT sgm[],
+                          int count,
+                          int *mi,
+                          int *mj,
+                          int *ui,
+                          int *uj,
+                          int *wm)
+{
+    int i,j;
+
+    unsigned char ym[256];
+    unsigned char ym2[256];
+    unsigned char uvm[64];
+    unsigned char dym2[256];
+    unsigned char dym[256];
+    unsigned char duvm[64];
+    unsigned int e = 0 ;
+    int beste=256;
+    int bmi=-32,bmj=-32;
+    int bui=-32,buj=-32;
+    int beste1=256;
+    int bmi1=-32,bmj1=-32;
+    int bui1=-32,buj1=-32;
+    int obeste;
+
+    // first try finding best mask and then unmasked
+    beste = 0xffffffff;
+
+#if 0
+    for(i=0;i<16;i++)
+    {
+        unsigned char *dy = i*yp + y;
+        for(j=0;j<16;j++)
+            printf("%2x",dy[j]);
+        printf("\n");
+    }
+    printf("\n");
+
+    for(i=-32;i<48;i++)
+    {
+        unsigned char *dyz = i*dyp + dy;
+        for(j=-32;j<48;j++)
+            printf("%2x",dyz[j]);
+        printf("\n");
+    }
+#endif
+
+    // find best unmasked mv
+    for(i=-32;i<32;i++)
+    {
+        unsigned char *dyz = i*dyp + dy;
+        unsigned char *duz = i/2*duvp + du;
+        unsigned char *dvz = i/2*duvp + dv;
+        for(j=-32;j<32;j++)
+        {
+            // 0,0  masked destination
+            vp8_makemask_sse3(dyz+j,duz+j/2, dvz+j/2, dym, dyp, duvp,
+                              sgm[0].y,sgm[0].u,sgm[0].v,
+                              sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+            vp8_growmaskmb_sse3(dym,dym2);
+
+            e = vp8_sad16x16_unmasked_wmt(y, yp, dyz+j, dyp, dym2 );
+
+            if(e<beste)
+            {
+                bui=i;
+                buj=j;
+                beste=e;
+            }
+        }
+    }
+    //bui=0;buj=0;
+    // best mv masked destination
+
+    vp8_makemask_sse3(dy+bui*dyp+buj,du+bui/2*duvp+buj/2, dv+bui/2*duvp+buj/2,
+                      dym, dyp, duvp,
+                      sgm[0].y,sgm[0].u,sgm[0].v,
+                      sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+    vp8_growmaskmb_sse3(dym,dym2);
+
+    obeste = beste;
+    beste = 0xffffffff;
+
+    // find best masked
+    for(i=-32;i<32;i++)
+    {
+        unsigned char *dyz = i*dyp + dy;
+        for(j=-32;j<32;j++)
+        {
+            e = vp8_sad16x16_masked_wmt(y, yp, dyz+j, dyp, dym2 );
+            if(e<beste)
+            {
+                bmi=i;
+                bmj=j;
+                beste=e;
+            }
+        }
+    }
+    beste1=beste+obeste;
+    bmi1=bmi;bmj1=bmj;
+    bui1=bui;buj1=buj;
+
+    // source mask
+    vp8_makemask_sse3(y,u, v,
+                        ym, yp, uvp,
+                        sgm[0].y,sgm[0].u,sgm[0].v,
+                        sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+    vp8_growmaskmb_sse3(ym,ym2);
+
+    // find best mask
+    for(i=-32;i<32;i++)
+    {
+        unsigned char *dyz = i*dyp + dy;
+        unsigned char *duz = i/2*duvp + du;
+        unsigned char *dvz = i/2*duvp + dv;
+        for(j=-32;j<32;j++)
+        {
+            // 0,0  masked destination
+            vp8_makemask_sse3(dyz+j,duz+j/2, dvz+j/2, dym, dyp, duvp,
+                              sgm[0].y,sgm[0].u,sgm[0].v,
+                              sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+            vp8_growmaskmb_sse3(dym,dym2);
+
+            e = compare_masks(ym2, dym2);
+
+            if(e<beste)
+            {
+                bmi=i;
+                bmj=j;
+                beste=e;
+            }
+        }
+    }
+
+    vp8_makemask_sse3(dy+bmi*dyp+bmj,du+bmi/2*duvp+bmj/2, dv+bmi/2*duvp+bmj/2,
+                      dym, dyp, duvp,
+                      sgm[0].y,sgm[0].u,sgm[0].v,
+                      sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+    vp8_growmaskmb_sse3(dym,dym2);
+
+    obeste = vp8_sad16x16_masked_wmt(y, yp, dy+bmi*dyp+bmj, dyp, dym2 );
+
+    beste = 0xffffffff;
+
+    // find best unmasked mv
+    for(i=-32;i<32;i++)
+    {
+        unsigned char *dyz = i*dyp + dy;
+        for(j=-32;j<32;j++)
+        {
+            e = vp8_sad16x16_unmasked_wmt(y, yp, dyz+j, dyp, dym2 );
+
+            if(e<beste)
+            {
+                bui=i;
+                buj=j;
+                beste=e;
+            }
+        }
+    }
+    beste += obeste;
+
+    if(beste<beste1)
+    {
+        *mi = bmi;
+        *mj = bmj;
+        *ui = bui;
+        *uj = buj;
+        *wm = 1;
+    }
+    else
+    {
+        *mi = bmi1;
+        *mj = bmj1;
+        *ui = bui1;
+        *uj = buj1;
+        *wm = 0;
+        beste=beste1;
+
+    }
+    return beste;
+}
+
+int predict_all(unsigned char *ym, unsigned char *um, unsigned char *vm,
+                int ymp, int uvmp,
+                unsigned char *yp, unsigned char *up, unsigned char *vp,
+                int ypp, int uvpp,
+                COLOR_SEG_ELEMENT sgm[],
+                int count,
+                int mi,
+                int mj,
+                int ui,
+                int uj,
+                int wm)
+{
+    int i,j;
+    unsigned char dym[256];
+    unsigned char dym2[256];
+    unsigned char duvm[64];
+    unsigned char *yu=ym,*uu=um, *vu=vm;
+
+    unsigned char *dym3=dym2;
+
+    ym+=mi*ymp+mj;
+    um+=mi/2*uvmp+mj/2;
+    vm+=mi/2*uvmp+mj/2;
+
+    yu+=ui*ymp+uj;
+    uu+=ui/2*uvmp+uj/2;
+    vu+=ui/2*uvmp+uj/2;
+
+    // best mv masked destination
+    if(wm)
+        vp8_makemask_sse3(ym,um, vm, dym, ymp, uvmp,
+                              sgm[0].y,sgm[0].u,sgm[0].v,
+                              sgm[0].yt,sgm[0].ut,sgm[0].vt);
+    else
+        vp8_makemask_sse3(yu,uu, vu, dym, ymp, uvmp,
+                              sgm[0].y,sgm[0].u,sgm[0].v,
+                              sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+    vp8_growmaskmb_sse3(dym,dym2);
+    vp8_masked_predictor_wmt(ym,yu,ymp,yp,ypp,dym3);
+    vp8_uv_from_y_mask(dym3,duvm);
+    vp8_masked_predictor_uv_wmt(um,uu,uvmp,up,uvpp,duvm);
+    vp8_masked_predictor_uv_wmt(vm,vu,uvmp,vp,uvpp,duvm);
+
+    return 0;
+}
+
+unsigned char f0p[1280*720*3/2];
+unsigned char f1p[1280*720*3/2];
+unsigned char prd[1280*720*3/2];
+unsigned char msk[1280*720*3/2];
+
+
+int mainz(int argc, char *argv[]) {
+
+    FILE *f=fopen(argv[1],"rb");
+    FILE *g=fopen(argv[2],"wb");
+    int w=atoi(argv[3]),h=atoi(argv[4]);
+    int y_stride=w,uv_stride=w/2;
+    int r,c;
+    unsigned char *f0=f0p,*f1=f1p,*t;
+    unsigned char ym[256],uvm[64];
+    unsigned char ym2[256],uvm2[64];
+    unsigned char ym3[256],uvm3[64];
+    int a,b;
+
+    COLOR_SEG_ELEMENT last={ 20,20,20,20, 230,20, 1},best;
+#if 0
+    makeneighbors();
+    COLOR_SEG_ELEMENT segmentation[]=
+    {
+        { 60,4,80,17,80,10, 1},
+        { 40,4,15,10,80,10, 1},
+    };
+    make_mb_mask(y, u, v,ym2,uvm2,16,8,segmentation,1);
+
+    vp8_makemask_sse3(y,u,v,ym, (int) 16,(int) 8,
+                      (int) segmentation[0].y,(int) segmentation[0].u,(int) segmentation[0].v,
+                      segmentation[0].yt,segmentation[0].ut,segmentation[0].vt);
+
+    vp8_growmaskmb_sse3(ym,ym3);
+
+    a = vp8_sad16x16_masked_wmt(str,16,sts,16,ym3);
+    b = vp8_sad16x16_unmasked_wmt(str,16,sts,16,ym3);
+
+    vp8_masked_predictor_wmt(str,sts,16,ym,16,ym3);
+
+    vp8_uv_from_y_mask(ym3,uvm3);
+
+    return 4;
+#endif
+    makeneighbors();
+
+
+    memset(prd,128,w*h*3/2);
+
+    fread(f0,w*h*3/2,1,f);
+
+    while(!feof(f))
+    {
+        unsigned char *ys=f1,*yd=f0,*yp=prd;
+        unsigned char *us=f1+w*h,*ud=f0+w*h,*up=prd+w*h;
+        unsigned char *vs=f1+w*h*5/4,*vd=f0+w*h*5/4,*vp=prd+w*h*5/4;
+        fread(f1,w*h*3/2,1,f);
+
+        ys+=32*y_stride;yd+=32*y_stride;yp+=32*y_stride;
+        us+=16*uv_stride;ud+=16*uv_stride;up+=16*uv_stride;
+        vs+=16*uv_stride;vd+=16*uv_stride;vp+=16*uv_stride;
+        for(r=32;r<h-32;r+=16,
+            ys+=16*w,yd+=16*w,yp+=16*w,
+            us+=8*uv_stride,ud+=8*uv_stride,up+=8*uv_stride,
+            vs+=8*uv_stride,vd+=8*uv_stride,vp+=8*uv_stride)
+        {
+            for(c=32;c<w-32;c+=16)
+            {
+                int mi,mj,ui,uj,wm;
+                int bmi,bmj,bui,buj,bwm;
+                unsigned char ym[256];
+
+                if(vp8_sad16x16_sse3( ys+c,y_stride, yd+c,y_stride,0xffff) == 0)
+                    bmi=bmj=bui=buj=bwm=0;
+                else
+                {
+                    COLOR_SEG_ELEMENT cs[5];
+                    int j;
+                    unsigned int beste=0xfffffff;
+                    unsigned int bestj=0;
+
+                    // try color from last mb segmentation
+                    cs[0] = last;
+
+                    // try color segs from 4 pixels in mb recon as segmentation
+                    cs[1].y = yd[c + y_stride + 1];cs[1].u = ud[c/2 + uv_stride];
+                    cs[1].v = vd[c/2 + uv_stride];
+                    cs[1].yt = cs[1].ut = cs[1].vt = 20;
+                    cs[2].y = yd[c + w + 14];
+                    cs[2].u = ud[c/2 + uv_stride+7];
+                    cs[2].v = vd[c/2 + uv_stride+7];
+                    cs[2].yt = cs[2].ut = cs[2].vt = 20;
+                    cs[3].y = yd[c + w*14 + 1];
+                    cs[3].u = ud[c/2 + uv_stride*7];
+                    cs[3].v = vd[c/2 + uv_stride*7];
+                    cs[3].yt = cs[3].ut = cs[3].vt = 20;
+                    cs[4].y = yd[c + w*14 + 14];
+                    cs[4].u = ud[c/2 + uv_stride*7+7];
+                    cs[4].v = vd[c/2 + uv_stride*7+7];
+                    cs[4].yt = cs[4].ut = cs[4].vt = 20;
+
+                    for(j=0;j<5;j++)
+                    {
+                        int e;
+
+                        e = fast_masked_motion_search(
+                           ys+c, us+c/2, vs+c/2, y_stride, uv_stride,
+                           yd+c, ud+c/2, vd+c/2, y_stride, uv_stride,
+                           &cs[j], 1, &mi,&mj,&ui,&uj,&wm);
+
+                        if(e<beste)
+                        {
+                            bmi=mi;bmj=mj;bui=ui;buj=uj,bwm=wm;
+                            bestj=j;
+                            beste=e;
+                        }
+                    }
+                    best = cs[bestj];
+                    //best = segmentation[0];
+                    last = best;
+                }
+                predict_all(yd+c, ud+c/2, vd+c/2, w, uv_stride,
+                            yp+c, up+c/2, vp+c/2, w, uv_stride,
+                            &best, 1, bmi,bmj,bui,buj,bwm);
+
+            }
+        }
+        fwrite(prd,w*h*3/2,1,g);
+        t=f0;
+        f0=f1;
+        f1=t;
+
+    }
+    fclose(f);
+    fclose(g);
+	return;
+}
--- a/vp8/common/modecont.c
+++ b/vp8/common/modecont.c
@ -10,31 +10,33 @@


 #include "entropy.h"
-
-const int vp8_mode_contexts[6][4] =
+const int default_vp8_mode_contexts[6][4] =
 {
-    {
-        /* 0 */
-        7,     1,     1,   143,
-    },
-    {
-        /* 1 */
-        14,    18,    14,   107,
-    },
-    {
-        /* 2 */
-        135,    64,    57,    68,
-    },
-    {
-        /* 3 */
-        60,    56,   128,    65,
-    },
-    {
-        /* 4 */
-        159,   134,   128,    34,
-    },
-    {
-        /* 5 */
-        234,   188,   128,    28,
-    },
+    {   /* 0 */
+         7,     1,     1,   183},
+    {   /* 1 */
+        14,    18,    14,   147},
+    {/* 2 */
+       135,    64,    57,    68},
+    {   /* 3 */
+         60,    56,   128,   65},
+    {/* 4 */
+        159,   134,   128,   34},
+    {   /* 5 */
+        234,   188,   128,   28},
+};
+const int default_vp8_mode_contexts_a[6][4] =
+{
+    {   /* 0 */
+         4,     1,    1,   143},
+    {   /* 1 */
+         7,     9,    7,   107},
+    {/* 2 */
+        95,    34,   57,    68},
+    {   /* 3 */
+        95,    56,   128,   65},
+    {/* 4 */
+        159,   67,   128,   34},
+    {   /* 5 */
+        234,   94,   128,   28},
 };
--- a/vp8/common/modecont.h
+++ b/vp8/common/modecont.h
@ -12,6 +12,6 @@
 #ifndef __INC_MODECONT_H
 #define __INC_MODECONT_H

-extern const int vp8_mode_contexts[6][4];
-
+extern const int default_vp8_mode_contexts[6][4];
+extern const int default_vp8_mode_contexts_a[6][4];
 #endif
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@ -53,7 +53,6 @@ extern "C"

    typedef enum
    {
-        MODE_REALTIME       = 0x0,
        MODE_GOODQUALITY    = 0x1,
        MODE_BESTQUALITY    = 0x2,
        MODE_FIRSTPASS      = 0x3,
@ -155,15 +154,6 @@ extern "C"
        int best_allowed_q;
        int cq_level;

-        // allow internal resizing ( currently disabled in the build !!!!!)
-        int allow_spatial_resampling;
-        int resample_down_water_mark;
-        int resample_up_water_mark;
-
-        // allow internal frame rate alterations
-        int allow_df;
-        int drop_frames_water_mark;
-
        // two pass datarate control
        int two_pass_vbrbias;        // two pass datarate control tweaks
        int two_pass_vbrmin_section;
@ -175,21 +165,9 @@ extern "C"
        // these parameters aren't to be used in final build don't use!!!
        int play_alternate;
        int alt_freq;
-        int alt_q;
-        int key_q;
-        int gold_q;

-
-        int multi_threaded;   // how many threads to run the encoder on
-        int token_partitions; // how many token partitions to create for multi core decoding
        int encode_breakout;  // early breakout encode threshold : for video conf recommend 800

-        unsigned int error_resilient_mode; // Bitfield defining the error
-                                   // resiliency features to enable. Can provide
-                                   // decodable frames after losses in previous
-                                   // frames and decodable partitions after
-                                   // losses in the same frame.
-
        int arnr_max_frames;
        int arnr_strength ;
        int arnr_type     ;
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@ -32,32 +32,36 @@
 void vp8_initialize_common(void);

 #define MINQ 0
-#define MAXQ 127
+
+#define MAXQ 255
+#define QINDEX_BITS 8
+
 #define QINDEX_RANGE (MAXQ + 1)

 #define NUM_YV12_BUFFERS 4

-#define MAX_PARTITIONS 9
+#define COMP_PRED_CONTEXTS   2

 typedef struct frame_contexts
 {
    vp8_prob bmode_prob [VP8_BINTRAMODES-1];
    vp8_prob ymode_prob [VP8_YMODES-1];   /* interframe intra mode probs */
+#if CONFIG_UVINTRA
+    vp8_prob uv_mode_prob [VP8_YMODES][VP8_UV_MODES-1];
+#else
    vp8_prob uv_mode_prob [VP8_UV_MODES-1];
+#endif
    vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1];
    vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+    vp8_prob coef_probs_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
    MV_CONTEXT mvc[2];
    MV_CONTEXT pre_mvc[2];  /* not to caculate the mvcost for the frame if mvc doesn't change. */
+#if CONFIG_HIGH_PRECISION_MV
+    MV_CONTEXT_HP mvc_hp[2];
+    MV_CONTEXT_HP pre_mvc_hp[2];  /* not to caculate the mvcost for the frame if mvc doesn't change. */
+#endif
 } FRAME_CONTEXT;

-typedef enum
-{
-    ONE_PARTITION  = 0,
-    TWO_PARTITION  = 1,
-    FOUR_PARTITION = 2,
-    EIGHT_PARTITION = 3
-} TOKEN_PARTITION;
-
 typedef enum
 {
    RECON_CLAMP_REQUIRED        = 0,
@ -70,6 +74,21 @@ typedef enum
    BILINEAR = 1
 } INTERPOLATIONFILTERTYPE;

+typedef enum
+{
+    SINGLE_PREDICTION_ONLY = 0,
+    COMP_PREDICTION_ONLY   = 1,
+    HYBRID_PREDICTION      = 2,
+    NB_PREDICTION_TYPES    = 3,
+} COMPPREDMODE_TYPE;
+
+/* TODO: allows larger transform */
+typedef enum
+{
+    ONLY_4X4            = 0,
+    ALLOW_8X8           = 1
+} TXFM_MODE;
+
 typedef struct VP8_COMMON_RTCD
 {
 #if CONFIG_RUNTIME_CPU_DETECT
@ -127,6 +146,8 @@ typedef struct VP8Common
    /* profile settings */
    int experimental;
    int mb_no_coeff_skip;
+    TXFM_MODE txfm_mode;
+    COMPPREDMODE_TYPE comp_pred_mode;
    int no_lpf;
    int use_bilinear_mc_filter;
    int full_pixel;
@ -152,6 +173,9 @@ typedef struct VP8Common
    MODE_INFO *prev_mi;  /* 'mi' from last frame (points into prev_mip) */


+    // Persistent mb segment id map used in prediction.
+    unsigned char * last_frame_seg_map;
+
    INTERPOLATIONFILTERTYPE mcomp_filter_type;
    LOOPFILTERTYPE filter_type;

@ -176,24 +200,53 @@ typedef struct VP8Common
    ENTROPY_CONTEXT_PLANES *above_context;   /* row of context for each plane */
    ENTROPY_CONTEXT_PLANES left_context;  /* (up to) 4 contexts "" */

-
    /* keyframe block modes are predicted by their above, left neighbors */

    vp8_prob kf_bmode_prob [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1];
+#if CONFIG_QIMODE
+    vp8_prob kf_ymode_prob[8][VP8_YMODES-1];  /* keyframe "" */
+    int kf_ymode_probs_index;
+    int kf_ymode_probs_update;
+#else
    vp8_prob kf_ymode_prob [VP8_YMODES-1];  /* keyframe "" */
+#endif
+#if CONFIG_UVINTRA
+    vp8_prob kf_uv_mode_prob[VP8_YMODES] [VP8_UV_MODES-1];
+#else
    vp8_prob kf_uv_mode_prob [VP8_UV_MODES-1];
+#endif

+    vp8_prob i8x8_mode_prob [VP8_UV_MODES-1];

+    vp8_prob prob_intra_coded;
+    vp8_prob prob_last_coded;
+    vp8_prob prob_gf_coded;
+
+    // Context probabilities when using predictive coding of segment id
+    vp8_prob segment_pred_probs[PREDICTION_PROBS];
+    unsigned char temporal_update;
+
+    // Context probabilities for reference frame prediction
+    unsigned char ref_scores[MAX_REF_FRAMES];
+    vp8_prob ref_pred_probs[PREDICTION_PROBS];
+    vp8_prob mod_refprobs[MAX_REF_FRAMES][PREDICTION_PROBS];
+
+    vp8_prob prob_comppred[COMP_PRED_CONTEXTS];
+
+    FRAME_CONTEXT lfc_a; /* last alt ref entropy */
    FRAME_CONTEXT lfc; /* last frame entropy */
    FRAME_CONTEXT fc;  /* this frame entropy */

-    unsigned int current_video_frame;
+    int mv_ref_ct[6][4][2];
+    int mode_context[6][4];
+    int mv_ref_ct_a[6][4][2];
+    int mode_context_a[6][4];
+    int vp8_mode_contexts[6][4];

+    unsigned int current_video_frame;
    int near_boffset[3];
    int version;

-    TOKEN_PARTITION multi_token_partition;
-
 #ifdef PACKET_TESTING
    VP8_HEADER oh;
 #endif
@ -203,9 +256,7 @@ typedef struct VP8Common
 #if CONFIG_RUNTIME_CPU_DETECT
    VP8_COMMON_RTCD rtcd;
 #endif
-#if CONFIG_MULTITHREAD
-    int processor_core_count;
-#endif
+
 #if CONFIG_POSTPROC
    struct postproc_state  postproc_state;
 #endif
--- a/vp8/common/onyxd.h
+++ b/vp8/common/onyxd.h
@ -32,7 +32,6 @@ extern "C"
        int     Version;
        int     postprocess;
        int     max_threads;
-        int     error_concealment;
        int     input_partition;
    } VP8D_CONFIG;
    typedef enum
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@ -9,7 +9,7 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vpx_scale/yv12config.h"
 #include "postproc.h"
 #include "vpx_scale/yv12extend.h"
@ -1024,9 +1024,9 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
                            if ((ppflags->display_b_modes_flag & (1<<mi->mbmi.mode))
                                || (ppflags->display_mb_modes_flag & B_PRED))
                            {
-                                Y = B_PREDICTION_MODE_colors[bmi->as_mode][0];
-                                U = B_PREDICTION_MODE_colors[bmi->as_mode][1];
-                                V = B_PREDICTION_MODE_colors[bmi->as_mode][2];
+                                Y = B_PREDICTION_MODE_colors[bmi->as_mode.first][0];
+                                U = B_PREDICTION_MODE_colors[bmi->as_mode.first][1];
+                                V = B_PREDICTION_MODE_colors[bmi->as_mode.first][2];

                                POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_b)
                                    (yl+bx, ul+(bx>>1), vl+(bx>>1), Y, U, V, 0xc000, y_stride);
--- a/vp8/common/pred_common.c
+++ b/vp8/common/pred_common.c
@ -0,0 +1,339 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp8/common/pred_common.h"
+
+// TBD prediction functions for various bitstream signals
+
+// Returns a context number for the given MB prediction signal
+unsigned char get_pred_context( VP8_COMMON *const cm,
+                                MACROBLOCKD *const xd,
+                                PRED_ID pred_id )
+{
+    int pred_context;
+    MODE_INFO *m = xd->mode_info_context;
+
+    // Note:
+    // The mode info data structure has a one element border above and to the
+    // left of the entries correpsonding to real macroblocks.
+    // The prediction flags in these dummy entries are initialised to 0.
+    switch (pred_id)
+    {
+    case PRED_SEG_ID:
+        pred_context = (m - 1)->mbmi.seg_id_predicted +
+                       (m - cm->mode_info_stride)->mbmi.seg_id_predicted;
+        break;
+
+
+    case PRED_REF:
+        pred_context = (m - 1)->mbmi.ref_predicted +
+                       (m - cm->mode_info_stride)->mbmi.ref_predicted;
+        break;
+
+    case PRED_COMP:
+        // Context based on use of comp pred flag by neighbours
+        //pred_context =
+        //   ((m - 1)->mbmi.second_ref_frame != INTRA_FRAME) +
+        //    ((m - cm->mode_info_stride)->mbmi.second_ref_frame != INTRA_FRAME);
+
+        // Context based on mode and reference frame
+        //if ( m->mbmi.ref_frame == LAST_FRAME )
+        //    pred_context = 0 + (m->mbmi.mode != ZEROMV);
+        //else if ( m->mbmi.ref_frame == GOLDEN_FRAME )
+        //    pred_context = 2 + (m->mbmi.mode != ZEROMV);
+        //else
+        //    pred_context = 4 + (m->mbmi.mode != ZEROMV);
+
+        if ( m->mbmi.ref_frame == LAST_FRAME )
+            pred_context = 0;
+        else
+            pred_context = 1;
+
+        break;
+
+    default:
+        // TODO *** add error trap code.
+        pred_context = 0;
+        break;
+    }
+
+    return pred_context;
+}
+
+// This function returns a context probability for coding a given
+// prediction signal
+vp8_prob get_pred_prob( VP8_COMMON *const cm,
+                        MACROBLOCKD *const xd,
+                        PRED_ID pred_id )
+{
+    vp8_prob pred_probability;
+    int pred_context;
+
+    // Get the appropriate prediction context
+    pred_context = get_pred_context( cm, xd, pred_id );
+
+    switch (pred_id)
+    {
+    case PRED_SEG_ID:
+        pred_probability = cm->segment_pred_probs[pred_context];
+        break;
+
+    case PRED_REF:
+        pred_probability = cm->ref_pred_probs[pred_context];
+        break;
+
+    case PRED_COMP:
+        // In keeping with convention elsewhre the probability returned is
+        // the probability of a "0" outcome which in this case means the
+        // probability of comp pred off.
+        pred_probability = cm->prob_comppred[pred_context];
+        break;
+
+    default:
+        // TODO *** add error trap code.
+        pred_probability = 128;
+        break;
+    }
+
+    return pred_probability;
+}
+
+// This function returns the status of the given prediction signal.
+// I.e. is the predicted value for the given signal correct.
+unsigned char get_pred_flag( MACROBLOCKD *const xd,
+                             PRED_ID pred_id )
+{
+    unsigned char pred_flag = 0;
+
+    switch (pred_id)
+    {
+    case PRED_SEG_ID:
+        pred_flag = xd->mode_info_context->mbmi.seg_id_predicted;
+        break;
+
+    case PRED_REF:
+        pred_flag = xd->mode_info_context->mbmi.ref_predicted;
+        break;
+
+    default:
+        // TODO *** add error trap code.
+        pred_flag = 0;
+        break;
+}
+
+    return pred_flag;
+}
+
+// This function sets the status of the given prediction signal.
+// I.e. is the predicted value for the given signal correct.
+void set_pred_flag( MACROBLOCKD *const xd,
+                    PRED_ID pred_id,
+                    unsigned char pred_flag)
+{
+    switch (pred_id)
+    {
+    case PRED_SEG_ID:
+        xd->mode_info_context->mbmi.seg_id_predicted = pred_flag;
+        break;
+
+    case PRED_REF:
+        xd->mode_info_context->mbmi.ref_predicted = pred_flag;
+        break;
+
+    default:
+        // TODO *** add error trap code.
+        break;
+    }
+}
+
+
+// The following contain the guts of the prediction code used to
+// peredict various bitstream signals.
+
+// Macroblock segment id prediction function
+unsigned char get_pred_mb_segid( VP8_COMMON *const cm, int MbIndex )
+{
+    // Currently the prediction for the macroblock segment ID is
+    // the value stored for this macroblock in the previous frame.
+    return cm->last_frame_seg_map[MbIndex];
+}
+
+MV_REFERENCE_FRAME get_pred_ref( VP8_COMMON *const cm,
+                                 MACROBLOCKD *const xd )
+{
+    MODE_INFO *m = xd->mode_info_context;
+
+    MV_REFERENCE_FRAME left;
+    MV_REFERENCE_FRAME above;
+    MV_REFERENCE_FRAME above_left;
+    MV_REFERENCE_FRAME pred_ref = LAST_FRAME;
+
+    int segment_id = xd->mode_info_context->mbmi.segment_id;
+    int seg_ref_active;
+    int i;
+
+    unsigned char frame_allowed[MAX_REF_FRAMES] = {1,1,1,1};
+    unsigned char ref_score[MAX_REF_FRAMES];
+    unsigned char best_score = 0;
+    unsigned char left_in_image;
+    unsigned char above_in_image;
+    unsigned char above_left_in_image;
+
+    // Is segment coding ennabled
+    seg_ref_active = segfeature_active( xd, segment_id, SEG_LVL_REF_FRAME );
+
+    // Special case treatment if segment coding is enabled.
+    // Dont allow prediction of a reference frame that the segment
+    // does not allow
+    if ( seg_ref_active )
+    {
+        for ( i = 0; i < MAX_REF_FRAMES; i++ )
+        {
+            frame_allowed[i] =
+                check_segref( xd, segment_id, i );
+
+            // Score set to 0 if ref frame not allowed
+            ref_score[i] = cm->ref_scores[i] * frame_allowed[i];
+        }
+    }
+    else
+        vpx_memcpy( ref_score, cm->ref_scores, sizeof(ref_score) );
+
+    // Reference frames used by neighbours
+    left = (m - 1)->mbmi.ref_frame;
+    above = (m - cm->mode_info_stride)->mbmi.ref_frame;
+    above_left = (m - 1 - cm->mode_info_stride)->mbmi.ref_frame;
+
+    // Are neighbours in image
+    left_in_image = (m - 1)->mbmi.mb_in_image;
+    above_in_image = (m - cm->mode_info_stride)->mbmi.mb_in_image;
+    above_left_in_image = (m - 1 - cm->mode_info_stride)->mbmi.mb_in_image;
+
+    // Adjust scores for candidate reference frames based on neigbours
+    if  ( frame_allowed[left] && left_in_image )
+    {
+        ref_score[left] += 16;
+        if ( above_left_in_image && (left == above_left) )
+            ref_score[left] += 4;
+    }
+    if  ( frame_allowed[above] && above_in_image )
+    {
+        ref_score[above] += 16;
+        if ( above_left_in_image && (above == above_left) )
+            ref_score[above] += 4;
+    }
+
+    // Now choose the candidate with the highest score
+    for ( i = 0; i < MAX_REF_FRAMES; i++ )
+    {
+        if ( ref_score[i] > best_score  )
+        {
+            pred_ref = i;
+            best_score = ref_score[i];
+        }
+    }
+
+    return pred_ref;
+}
+
+// Functions to computes a set of modified reference frame probabilities
+// to use when the prediction of the reference frame value fails
+void calc_ref_probs( int * count, vp8_prob * probs )
+{
+    int tot_count;
+
+    tot_count = count[0] + count[1] + count[2] + count[3];
+    if ( tot_count )
+    {
+        probs[0] = (vp8_prob)((count[0] * 255) / tot_count);
+        probs[0] += !probs[0];
+    }
+    else
+        probs[0] = 128;
+
+    tot_count -= count[0];
+    if ( tot_count )
+    {
+        probs[1] = (vp8_prob)((count[1] * 255) / tot_count);
+        probs[1] += !probs[1];
+    }
+    else
+        probs[1] = 128;
+
+    tot_count -= count[1];
+    if ( tot_count )
+    {
+        probs[2] = (vp8_prob)((count[2] * 255) / tot_count);
+        probs[2] += !probs[2];
+    }
+    else
+        probs[2] = 128;
+
+}
+
+// Computes a set of modified conditional probabilities for the reference frame
+// Values willbe set to 0 for reference frame options that are not possible
+// because wither they were predicted and prediction has failed or because
+// they are not allowed for a given segment.
+void compute_mod_refprobs( VP8_COMMON *const cm )
+{
+    int norm_cnt[MAX_REF_FRAMES];
+    int intra_count;
+    int inter_count;
+    int last_count;
+    int gfarf_count;
+    int gf_count;
+    int arf_count;
+
+    intra_count = cm->prob_intra_coded;
+    inter_count = (255 - intra_count);
+    last_count = (inter_count * cm->prob_last_coded)/255;
+    gfarf_count = inter_count - last_count;
+    gf_count = (gfarf_count * cm->prob_gf_coded)/255;
+    arf_count = gfarf_count - gf_count;
+
+    // Work out modified reference frame probabilities to use where prediction
+    // of the reference frame fails
+    norm_cnt[0] = 0;
+    norm_cnt[1] = last_count;
+    norm_cnt[2] = gf_count;
+    norm_cnt[3] = arf_count;
+    calc_ref_probs( norm_cnt, cm->mod_refprobs[INTRA_FRAME] );
+    cm->mod_refprobs[INTRA_FRAME][0] = 0;    // This branch implicit
+
+    norm_cnt[0] = intra_count;
+    norm_cnt[1] = 0;
+    norm_cnt[2] = gf_count;
+    norm_cnt[3] = arf_count;
+    calc_ref_probs( norm_cnt, cm->mod_refprobs[LAST_FRAME]);
+    cm->mod_refprobs[LAST_FRAME][1] = 0;    // This branch implicit
+
+    norm_cnt[0] = intra_count;
+    norm_cnt[1] = last_count;
+    norm_cnt[2] = 0;
+    norm_cnt[3] = arf_count;
+    calc_ref_probs( norm_cnt, cm->mod_refprobs[GOLDEN_FRAME] );
+    cm->mod_refprobs[GOLDEN_FRAME][2] = 0;  // This branch implicit
+
+    norm_cnt[0] = intra_count;
+    norm_cnt[1] = last_count;
+    norm_cnt[2] = gf_count;
+    norm_cnt[3] = 0;
+    calc_ref_probs( norm_cnt, cm->mod_refprobs[ALTREF_FRAME] );
+    cm->mod_refprobs[ALTREF_FRAME][2] = 0;  // This branch implicit
+
+    // Score the reference frames based on overal frequency.
+    // These scores contribute to the prediction choices.
+    // Max score 17 min 1
+    cm->ref_scores[INTRA_FRAME] = 1 + (intra_count * 16 / 255);
+    cm->ref_scores[LAST_FRAME] = 1 + (last_count * 16 / 255);
+    cm->ref_scores[GOLDEN_FRAME] = 1 + (gf_count * 16 / 255);
+    cm->ref_scores[ALTREF_FRAME] = 1 + (arf_count * 16 / 255);
+}
--- a/vp8/common/pred_common.h
+++ b/vp8/common/pred_common.h
@ -0,0 +1,51 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "type_aliases.h"
+#include "onyxc_int.h"
+#include "vp8/common/blockd.h"
+
+#ifndef __INC_PRED_COMMON_H__
+#define __INC_PRED_COMMON_H__ 1
+
+
+// Predicted items
+typedef enum
+{
+    PRED_SEG_ID = 0,               // Segment identifier
+    PRED_REF = 1,
+    PRED_COMP = 2
+
+} PRED_ID;
+
+
+extern unsigned char get_pred_context( VP8_COMMON *const cm,
+                                       MACROBLOCKD *const xd,
+                                       PRED_ID pred_id );
+
+extern vp8_prob get_pred_prob( VP8_COMMON *const cm,
+                               MACROBLOCKD *const xd,
+                               PRED_ID pred_id );
+
+extern unsigned char get_pred_flag( MACROBLOCKD *const xd,
+                                    PRED_ID pred_id );
+
+extern void set_pred_flag( MACROBLOCKD *const xd,
+                           PRED_ID pred_id,
+                           unsigned char pred_flag);
+
+
+extern unsigned char get_pred_mb_segid( VP8_COMMON *const cm, int MbIndex );
+
+extern MV_REFERENCE_FRAME get_pred_ref( VP8_COMMON *const cm,
+                                        MACROBLOCKD *const xd );
+extern void compute_mod_refprobs( VP8_COMMON *const cm );
+
+#endif /* __INC_PRED_COMMON_H__ */
--- a/vp8/common/predict_rotated.c
+++ b/vp8/common/predict_rotated.c
@ -0,0 +1,85 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#if CONFIG_ROTATION
+typedef struct
+{
+    int y;
+    int x;
+    unsigned long t;
+} tap;
+
+typedef struct
+{
+    tap pt[4];
+} point_taps;
+
+typedef struct
+{
+    point_taps pt[256];
+} mb_taps;
+
+mb_taps mt_8x8[] =
+{
+    #include "rotate2.h"
+};
+
+mb_taps mt[] =
+{
+    #include "rotate.h"
+};
+
+void predict_rotated_16x16(int rotation_index, unsigned char *src, int sp,
+                           unsigned char *dst, int dp)
+{
+    int i, j, k, p = 0;
+
+    for (i = 0; i < 16; i++, dst += dp)
+    {
+        for (j = 0; j < 16; j++, p++)
+        {
+            unsigned int sum = 32768;
+
+            for (k = 0; k < 4; k++)
+            {
+                tap *tp = &mt[rotation_index].pt[p].pt[k];
+                sum += src[tp->y * sp + tp->x] * tp->t;
+            }
+            sum >>= 16;
+            dst[j] = sum;
+        }
+    }
+}
+void predict_rotated_8x8(int rotation_index, unsigned char *src, int sp,
+                         unsigned char *dst, int dp)
+{
+    int i, j, k, p = 0;
+
+    for (i = 0; i < 8; i++, dst += dp)
+    {
+        for (j = 0; j < 8; j++, p++)
+        {
+            unsigned int sum = 32768;
+
+            for (k = 0; k < 4; k++)
+            {
+                tap *tp = &mt_8x8[rotation_index].pt[p].pt[k];
+                sum += src[tp->y * sp + tp->x] * tp->t;
+            }
+            sum >>= 16;
+            dst[j] = sum;
+        }
+    }
+}
+#endif
+
+
+
+
--- a/vp8/common/quant_common.c
+++ b/vp8/common/quant_common.c
@ -11,57 +11,34 @@

 #include "quant_common.h"

+static int dc_qlookup[QINDEX_RANGE];
+static int ac_qlookup[QINDEX_RANGE];

-#if !CONFIG_EXTEND_QRANGE
-static const int dc_qlookup[QINDEX_RANGE] =
-{
-    4,    5,    6,    7,    8,    9,   10,   10,   11,   12,   13,   14,   15,   16,   17,   17,
-    18,   19,   20,   20,   21,   21,   22,   22,   23,   23,   24,   25,   25,   26,   27,   28,
-    29,   30,   31,   32,   33,   34,   35,   36,   37,   37,   38,   39,   40,   41,   42,   43,
-    44,   45,   46,   46,   47,   48,   49,   50,   51,   52,   53,   54,   55,   56,   57,   58,
-    59,   60,   61,   62,   63,   64,   65,   66,   67,   68,   69,   70,   71,   72,   73,   74,
-    75,   76,   76,   77,   78,   79,   80,   81,   82,   83,   84,   85,   86,   87,   88,   89,
-    91,   93,   95,   96,   98,  100,  101,  102,  104,  106,  108,  110,  112,  114,  116,  118,
-    122,  124,  126,  128,  130,  132,  134,  136,  138,  140,  143,  145,  148,  151,  154,  157,
-};
+#define ACDC_MIN 4

-static const int ac_qlookup[QINDEX_RANGE] =
+void vp8_init_quant_tables()
 {
-    4,    5,    6,    7,    8,    9,   10,   11,   12,   13,   14,   15,   16,   17,   18,   19,
-    20,   21,   22,   23,   24,   25,   26,   27,   28,   29,   30,   31,   32,   33,   34,   35,
-    36,   37,   38,   39,   40,   41,   42,   43,   44,   45,   46,   47,   48,   49,   50,   51,
-    52,   53,   54,   55,   56,   57,   58,   60,   62,   64,   66,   68,   70,   72,   74,   76,
-    78,   80,   82,   84,   86,   88,   90,   92,   94,   96,   98,  100,  102,  104,  106,  108,
-    110,  112,  114,  116,  119,  122,  125,  128,  131,  134,  137,  140,  143,  146,  149,  152,
-    155,  158,  161,  164,  167,  170,  173,  177,  181,  185,  189,  193,  197,  201,  205,  209,
-    213,  217,  221,  225,  229,  234,  239,  245,  249,  254,  259,  264,  269,  274,  279,  284,
-};
-#else
+    int i;
+    int current_val = 4;
+    int last_val = 4;
+    int ac_val;

-static const int dc_qlookup[QINDEX_RANGE] =
-{
-    4,    5,    6,    7,    8,    9,    10,   11,   12,   13,   14,   15,   16,   17,   18,   19,
-    20,   21,   22,   23,   24,   25,   26,   27,   28,   30,   32,   34,   36,   38,   40,   42,
-    44,   46,   49,   52,   55,   58,   61,   64,   67,   70,   73,   76,   79,   82,   85,   88,
-    92,    96,  100,  104,  108,  112,  116,  120,  124,  128,  132,  136,  140,  144,  148,  152,
-    156,  160,  164,  168,  172,  176,  180,  184,  188,  192,  196,  200,  205,  210,  215,  220,
-    225,  230,  235,  240,  245,  250,  255,  260,  265,  270,  275,  280,  285,  290,  295,  300,
-    310,  320,  330,  340,  350,  360,  370,  380,  390,  400,  410,  420,  430,  440,  450,  460,
-    472,  484,  496,  508,  520,  532,  544,  556,  572,  588,  608,  628,  648,  668,  692,  720,
-};
+    for ( i = 0; i < QINDEX_RANGE; i++ )
+    {
+        ac_qlookup[i] = current_val;
+        current_val = (int)((double)current_val * 1.02);
+        if ( current_val == last_val )
+            current_val++;
+        last_val = current_val;

-static const int ac_qlookup[QINDEX_RANGE] =
-{
-    4,    5,    6,    7,    8,    9,    10,   11,   12,   13,   14,   15,   16,   17,   18,   19,
-    20,   22,   24,   26,   28,   30,   32,   34,   36,   38,   40,   42,   44,   46,   48,   51,
-    54,   57,   60,   63,   66,   69,   72,   76,   80,   84,   88,   92,   96,   100,  105,  110,
-    115,  120,  125,  130,  135,  140,  146,  152,  158,  164,  170,  176,  182,  188,  194,  200,
-    206,  212,  218,  224,  232,  240,  248,  256,  264,  272,  280,  288,  296,  304,  312,  320,
-    330,  340,  350,  360,  370,  380,  392,  404,  416,  428,  440,  454,  468,  482,  496,  510,
-    524,  540,  556,  572,  588,  604,  622,  640,  658,  676,  696,  716,  736,  756,  776,  796,
-    820,  844,  868,  892,  916,  944,  972,  1000, 1032, 1064, 1096, 1128, 1168, 1208, 1252, 1300
-};
-#endif
+        ac_val = ac_qlookup[i];
+        dc_qlookup[i] = (0.000000305 * ac_val * ac_val * ac_val) +
+                        (-0.00065 * ac_val * ac_val) +
+                        (0.9 * ac_val) + 0.5;
+        if ( dc_qlookup[i] < ACDC_MIN )
+            dc_qlookup[i] = ACDC_MIN;
+    }
+}

 int vp8_dc_quant(int QIndex, int Delta)
 {
@ -69,8 +46,8 @@ int vp8_dc_quant(int QIndex, int Delta)

    QIndex = QIndex + Delta;

-    if (QIndex > 127)
-        QIndex = 127;
+    if (QIndex > MAXQ)
+        QIndex = MAXQ;
    else if (QIndex < 0)
        QIndex = 0;

@ -84,16 +61,13 @@ int vp8_dc2quant(int QIndex, int Delta)

    QIndex = QIndex + Delta;

-    if (QIndex > 127)
-        QIndex = 127;
+    if (QIndex > MAXQ)
+        QIndex = MAXQ;
    else if (QIndex < 0)
        QIndex = 0;

-#if !CONFIG_EXTEND_QRANGE
-    retval = dc_qlookup[ QIndex ] * 2;
-#else
    retval = dc_qlookup[ QIndex ];
-#endif
+
    return retval;

 }
@ -103,8 +77,8 @@ int vp8_dc_uv_quant(int QIndex, int Delta)

    QIndex = QIndex + Delta;

-    if (QIndex > 117)
-        QIndex = 117;
+    if (QIndex > MAXQ)
+        QIndex = MAXQ;
    else if (QIndex < 0)
        QIndex = 0;

@ -117,8 +91,8 @@ int vp8_ac_yquant(int QIndex)
 {
    int retval;

-    if (QIndex > 127)
-        QIndex = 127;
+    if (QIndex > MAXQ)
+        QIndex = MAXQ;
    else if (QIndex < 0)
        QIndex = 0;

@ -132,17 +106,15 @@ int vp8_ac2quant(int QIndex, int Delta)

    QIndex = QIndex + Delta;

-    if (QIndex > 127)
-        QIndex = 127;
+    if (QIndex > MAXQ)
+        QIndex = MAXQ;
    else if (QIndex < 0)
        QIndex = 0;
-#if !CONFIG_EXTEND_QRANGE
-    retval = (ac_qlookup[ QIndex ] * 155) / 100;
-    if (retval < 8)
-        retval = 8;
-#else
-    retval = ac_qlookup[ QIndex ];
-#endif
+
+    retval = (ac_qlookup[ QIndex ] * 775) / 1000;
+    if (retval < 4)
+        retval = 4;
+
    return retval;
 }
 int vp8_ac_uv_quant(int QIndex, int Delta)
@ -151,8 +123,8 @@ int vp8_ac_uv_quant(int QIndex, int Delta)

    QIndex = QIndex + Delta;

-    if (QIndex > 127)
-        QIndex = 127;
+    if (QIndex > MAXQ)
+        QIndex = MAXQ;
    else if (QIndex < 0)
        QIndex = 0;

--- a/vp8/common/quant_common.h
+++ b/vp8/common/quant_common.h
@ -13,6 +13,7 @@
 #include "blockd.h"
 #include "onyxc_int.h"

+extern void vp8_init_quant_tables();
 extern int vp8_ac_yquant(int QIndex);
 extern int vp8_dc_quant(int QIndex, int Delta);
 extern int vp8_dc2quant(int QIndex, int Delta);
--- a/vp8/common/recon.c
+++ b/vp8/common/recon.c
@ -9,7 +9,7 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "recon.h"
 #include "blockd.h"

@ -44,6 +44,36 @@ void vp8_recon_b_c
    }
 }

+void vp8_recon_uv_b_c
+(
+    unsigned char *pred_ptr,
+    short *diff_ptr,
+    unsigned char *dst_ptr,
+    int stride
+)
+{
+    int r, c;
+
+    for (r = 0; r < 4; r++)
+    {
+        for (c = 0; c < 4; c++)
+        {
+            int a = diff_ptr[c] + pred_ptr[c] ;
+
+            if (a < 0)
+                a = 0;
+
+            if (a > 255)
+                a = 255;
+
+            dst_ptr[c] = (unsigned char) a ;
+        }
+
+        dst_ptr += stride;
+        diff_ptr += 8;
+        pred_ptr += 8;
+    }
+}
 void vp8_recon4b_c
 (
    unsigned char *pred_ptr,
--- a/vp8/common/recon.h
+++ b/vp8/common/recon.h
@ -29,6 +29,11 @@
 #define prototype_intra4x4_predict(sym) \
    void sym(BLOCKD *x, int b_mode, unsigned char *predictor)

+#if CONFIG_COMP_INTRA_PRED
+#define prototype_comp_intra4x4_predict(sym) \
+    void sym(BLOCKD *x, int b_mode, int mode2, unsigned char *predictor)
+#endif
+
 struct vp8_recon_rtcd_vtable;

 #if ARCH_X86 || ARCH_X86_64
@ -49,6 +54,16 @@ extern prototype_copy_block(vp8_recon_copy16x16);
 #endif
 extern prototype_copy_block(vp8_recon_copy8x8);

+#ifndef vp8_recon_avg16x16
+#define vp8_recon_avg16x16 vp8_avg_mem16x16_c
+#endif
+extern prototype_copy_block(vp8_recon_avg16x16);
+
+#ifndef vp8_recon_avg8x8
+#define vp8_recon_avg8x8 vp8_avg_mem8x8_c
+#endif
+extern prototype_copy_block(vp8_recon_avg8x8);
+
 #ifndef vp8_recon_copy8x4
 #define vp8_recon_copy8x4 vp8_copy_mem8x4_c
 #endif
@ -59,6 +74,12 @@ extern prototype_copy_block(vp8_recon_copy8x4);
 #endif
 extern prototype_recon_block(vp8_recon_recon);

+#ifndef vp8_recon_recon_uv
+#define vp8_recon_recon_uv vp8_recon_uv_b_c
+#endif
+extern prototype_recon_block(vp8_recon_recon_uv);
+
+extern prototype_recon_block(vp8_recon_recon);
 #ifndef vp8_recon_recon2
 #define vp8_recon_recon2 vp8_recon2b_c
 #endif
@ -85,6 +106,20 @@ extern prototype_recon_macroblock(vp8_recon_recon_mby);
 extern prototype_build_intra_predictors\
    (vp8_recon_build_intra_predictors_mby);

+#if CONFIG_COMP_INTRA_PRED
+#ifndef vp8_recon_build_comp_intra_predictors_mby
+#define vp8_recon_build_comp_intra_predictors_mby vp8_build_comp_intra_predictors_mby
+#endif
+extern prototype_build_intra_predictors\
+    (vp8_recon_build_comp_intra_predictors_mby);
+#endif
+
+#ifndef vp8_recon_build_intra8x8_predictors_mby
+#define vp8_recon_build_intra8x8_predictors_mby vp8_build_intra8x8_predictors_mby
+#endif
+extern prototype_build_intra_predictors\
+    (vp8_recon_build_intra8x8_predictors_mby);
+
 #ifndef vp8_recon_build_intra_predictors_mby_s
 #define vp8_recon_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s
 #endif
@ -97,39 +132,111 @@ extern prototype_build_intra_predictors\
 extern prototype_build_intra_predictors\
    (vp8_recon_build_intra_predictors_mbuv);

+#ifndef vp8_recon_build_intra8x8_predictors_mbuv
+#define vp8_recon_build_intra8x8_predictors_mbuv vp8_build_intra8x8_predictors_mbuv
+#endif
+extern prototype_build_intra_predictors\
+    (vp8_recon_build_intra8x8_predictors_mbuv);
+
 #ifndef vp8_recon_build_intra_predictors_mbuv_s
 #define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s
 #endif
 extern prototype_build_intra_predictors\
    (vp8_recon_build_intra_predictors_mbuv_s);

+#if CONFIG_COMP_INTRA_PRED
+#ifndef vp8_recon_build_comp_intra_predictors_mbuv
+#define vp8_recon_build_comp_intra_predictors_mbuv vp8_build_comp_intra_predictors_mbuv
+#endif
+extern prototype_build_intra_predictors\
+    (vp8_recon_build_comp_intra_predictors_mbuv);
+#endif
+
 #ifndef vp8_recon_intra4x4_predict
 #define vp8_recon_intra4x4_predict vp8_intra4x4_predict
 #endif
 extern prototype_intra4x4_predict\
    (vp8_recon_intra4x4_predict);

+#if CONFIG_COMP_INTRA_PRED
+#ifndef vp8_recon_comp_intra4x4_predict
+#define vp8_recon_comp_intra4x4_predict vp8_comp_intra4x4_predict
+#endif
+extern prototype_comp_intra4x4_predict\
+    (vp8_recon_comp_intra4x4_predict);
+#endif
+
+#ifndef vp8_recon_intra8x8_predict
+#define vp8_recon_intra8x8_predict vp8_intra8x8_predict
+#endif
+extern prototype_intra4x4_predict\
+    (vp8_recon_intra8x8_predict);
+
+#if CONFIG_COMP_INTRA_PRED
+#ifndef vp8_recon_comp_intra8x8_predict
+#define vp8_recon_comp_intra8x8_predict vp8_comp_intra8x8_predict
+#endif
+extern prototype_comp_intra4x4_predict\
+    (vp8_recon_comp_intra8x8_predict);
+#endif
+
+#ifndef vp8_recon_intra_uv4x4_predict
+#define vp8_recon_intra_uv4x4_predict vp8_intra_uv4x4_predict
+#endif
+extern prototype_intra4x4_predict\
+    (vp8_recon_intra_uv4x4_predict);
+
+#if CONFIG_COMP_INTRA_PRED
+#ifndef vp8_recon_comp_intra_uv4x4_predict
+#define vp8_recon_comp_intra_uv4x4_predict vp8_comp_intra_uv4x4_predict
+#endif
+extern prototype_comp_intra4x4_predict\
+    (vp8_recon_comp_intra_uv4x4_predict);
+#endif

 typedef prototype_copy_block((*vp8_copy_block_fn_t));
 typedef prototype_recon_block((*vp8_recon_fn_t));
 typedef prototype_recon_macroblock((*vp8_recon_mb_fn_t));
 typedef prototype_build_intra_predictors((*vp8_build_intra_pred_fn_t));
 typedef prototype_intra4x4_predict((*vp8_intra4x4_pred_fn_t));
+#if CONFIG_COMP_INTRA_PRED
+typedef prototype_comp_intra4x4_predict((*vp8_comp_intra4x4_pred_fn_t));
+#endif
 typedef struct vp8_recon_rtcd_vtable
 {
    vp8_copy_block_fn_t  copy16x16;
    vp8_copy_block_fn_t  copy8x8;
+    vp8_copy_block_fn_t  avg16x16;
+    vp8_copy_block_fn_t  avg8x8;
    vp8_copy_block_fn_t  copy8x4;
    vp8_recon_fn_t       recon;
+    vp8_recon_fn_t       recon_uv;
    vp8_recon_fn_t       recon2;
    vp8_recon_fn_t       recon4;
    vp8_recon_mb_fn_t    recon_mb;
    vp8_recon_mb_fn_t    recon_mby;
    vp8_build_intra_pred_fn_t  build_intra_predictors_mby_s;
    vp8_build_intra_pred_fn_t  build_intra_predictors_mby;
+#if CONFIG_COMP_INTRA_PRED
+    vp8_build_intra_pred_fn_t  build_comp_intra_predictors_mby;
+#endif
    vp8_build_intra_pred_fn_t  build_intra_predictors_mbuv_s;
    vp8_build_intra_pred_fn_t  build_intra_predictors_mbuv;
+#if CONFIG_COMP_INTRA_PRED
+    vp8_build_intra_pred_fn_t  build_comp_intra_predictors_mbuv;
+#endif
    vp8_intra4x4_pred_fn_t intra4x4_predict;
+#if CONFIG_COMP_INTRA_PRED
+    vp8_comp_intra4x4_pred_fn_t comp_intra4x4_predict;
+#endif
+    vp8_intra4x4_pred_fn_t intra8x8_predict;
+#if CONFIG_COMP_INTRA_PRED
+    vp8_comp_intra4x4_pred_fn_t comp_intra8x8_predict;
+#endif
+    vp8_intra4x4_pred_fn_t intra_uv4x4_predict;
+#if CONFIG_COMP_INTRA_PRED
+    vp8_comp_intra4x4_pred_fn_t comp_intra_uv4x4_predict;
+#endif
 } vp8_recon_rtcd_vtable_t;

 #if CONFIG_RUNTIME_CPU_DETECT
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@ -9,7 +9,7 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vpx/vpx_integer.h"
 #include "recon.h"
 #include "subpixel.h"
@ -62,6 +62,28 @@ void vp8_copy_mem16x16_c(

 }

+void vp8_avg_mem16x16_c(
+    unsigned char *src,
+    int src_stride,
+    unsigned char *dst,
+    int dst_stride)
+{
+    int r;
+
+    for (r = 0; r < 16; r++)
+    {
+        int n;
+
+        for (n = 0; n < 16; n++)
+        {
+            dst[n] = (dst[n] + src[n] + 1) >> 1;
+        }
+
+        src += src_stride;
+        dst += dst_stride;
+    }
+}
+
 void vp8_copy_mem8x8_c(
    unsigned char *src,
    int src_stride,
@ -92,6 +114,28 @@ void vp8_copy_mem8x8_c(

 }

+void vp8_avg_mem8x8_c(
+    unsigned char *src,
+    int src_stride,
+    unsigned char *dst,
+    int dst_stride)
+{
+    int r;
+
+    for (r = 0; r < 8; r++)
+    {
+        int n;
+
+        for (n = 0; n < 8; n++)
+        {
+            dst[n] = (dst[n] + src[n] + 1) >> 1;
+        }
+
+        src += src_stride;
+        dst += dst_stride;
+    }
+}
+
 void vp8_copy_mem8x4_c(
    unsigned char *src,
    int src_stride,
@ -136,7 +180,11 @@ void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf)
    if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
    {
        ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+        sppf(ptr, d->pre_stride, (d->bmi.mv.as_mv.col & 7)<<1, (d->bmi.mv.as_mv.row & 7)<<1, pred_ptr, pitch);
+#else
        sppf(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
+#endif
    }
    else
    {
@ -170,7 +218,11 @@ static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, int pitch)

    if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
    {
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+        x->subpixel_predict8x8(ptr, d->pre_stride, (d->bmi.mv.as_mv.col & 7)<<1, (d->bmi.mv.as_mv.row & 7)<<1, pred_ptr, pitch);
+#else
        x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
+#endif
    }
    else
    {
@ -189,7 +241,11 @@ static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, int pitch)

    if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
    {
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+        x->subpixel_predict8x4(ptr, d->pre_stride, (d->bmi.mv.as_mv.col & 7)<<1, (d->bmi.mv.as_mv.row & 7)<<1, pred_ptr, pitch);
+#else
        x->subpixel_predict8x4(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
+#endif
    }
    else
    {
@ -205,8 +261,10 @@ void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x)
    unsigned char *upred_ptr = &x->predictor[256];
    unsigned char *vpred_ptr = &x->predictor[320];

-    int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
-    int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
+    int omv_row = x->mode_info_context->mbmi.mv.as_mv.row;
+    int omv_col = x->mode_info_context->mbmi.mv.as_mv.col;
+    int mv_row  = omv_row;
+    int mv_col  = omv_col;
    int offset;
    int pre_stride = x->block[16].pre_stride;

@ -231,11 +289,19 @@ void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x)
    uptr = x->pre.u_buffer + offset;
    vptr = x->pre.v_buffer + offset;

+#if CONFIG_SIXTEENTH_SUBPEL_UV
+    if ((omv_row | omv_col) & 15)
+    {
+        x->subpixel_predict8x8(uptr, pre_stride, omv_col & 15, omv_row & 15, upred_ptr, 8);
+        x->subpixel_predict8x8(vptr, pre_stride, omv_col & 15, omv_row & 15, vpred_ptr, 8);
+    }
+#else   /* CONFIG_SIXTEENTH_SUBPEL_UV */
    if ((mv_row | mv_col) & 7)
    {
        x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8);
        x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8);
    }
+#endif  /* CONFIG_SIXTEENTH_SUBPEL_UV */
    else
    {
        RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, upred_ptr, 8);
@ -317,7 +383,11 @@ void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x)

    if ((mv_row | mv_col) & 7)
    {
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+        x->subpixel_predict16x16(ptr, pre_stride, (mv_col & 7)<<1, (mv_row & 7)<<1, pred_ptr, 16);
+#else
        x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16);
+#endif
    }
    else
    {
@ -336,31 +406,33 @@ static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
     * filtering. The bottom and right edges use 16 pixels plus 2 pixels
     * left of the central pixel when filtering.
     */
-    if (mv->col < (xd->mb_to_left_edge - (19 << 3)))
+    if (mv->col < (xd->mb_to_left_edge - ((16+INTERP_EXTEND) << 3)))
        mv->col = xd->mb_to_left_edge - (16 << 3);
-    else if (mv->col > xd->mb_to_right_edge + (18 << 3))
+    else if (mv->col > xd->mb_to_right_edge + ((15+INTERP_EXTEND) << 3))
        mv->col = xd->mb_to_right_edge + (16 << 3);

-    if (mv->row < (xd->mb_to_top_edge - (19 << 3)))
+    if (mv->row < (xd->mb_to_top_edge - ((16+INTERP_EXTEND) << 3)))
        mv->row = xd->mb_to_top_edge - (16 << 3);
-    else if (mv->row > xd->mb_to_bottom_edge + (18 << 3))
+    else if (mv->row > xd->mb_to_bottom_edge + ((15+INTERP_EXTEND) << 3))
        mv->row = xd->mb_to_bottom_edge + (16 << 3);
 }

 /* A version of the above function for chroma block MVs.*/
 static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
 {
-    mv->col = (2*mv->col < (xd->mb_to_left_edge - (19 << 3))) ?
+    mv->col = (2*mv->col < (xd->mb_to_left_edge - ((16+INTERP_EXTEND) << 3))) ?
        (xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col;
-    mv->col = (2*mv->col > xd->mb_to_right_edge + (18 << 3)) ?
+    mv->col = (2*mv->col > xd->mb_to_right_edge + ((15+INTERP_EXTEND) << 3)) ?
        (xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col;

-    mv->row = (2*mv->row < (xd->mb_to_top_edge - (19 << 3))) ?
+    mv->row = (2*mv->row < (xd->mb_to_top_edge - ((16+INTERP_EXTEND) << 3))) ?
        (xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row;
-    mv->row = (2*mv->row > xd->mb_to_bottom_edge + (18 << 3)) ?
+    mv->row = (2*mv->row > xd->mb_to_bottom_edge + ((15+INTERP_EXTEND) << 3)) ?
        (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row;
 }

+
+
 void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
                                        unsigned char *dst_y,
                                        unsigned char *dst_u,
@ -372,6 +444,7 @@ void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
    unsigned char *ptr;
    unsigned char *uptr, *vptr;

+    int_mv _o16x16mv;
    int_mv _16x16mv;

    unsigned char *ptr_base = x->pre.y_buffer;
@ -388,13 +461,18 @@ void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,

    if ( _16x16mv.as_int & 0x00070007)
    {
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+        x->subpixel_predict16x16(ptr, pre_stride, (_16x16mv.as_mv.col & 7)<<1, (_16x16mv.as_mv.row & 7)<<1, dst_y, dst_ystride);
+#else
        x->subpixel_predict16x16(ptr, pre_stride, _16x16mv.as_mv.col & 7,  _16x16mv.as_mv.row & 7, dst_y, dst_ystride);
+#endif
    }
    else
    {
        RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_y, dst_ystride);
    }

+    _o16x16mv = _16x16mv;
    /* calc uv motion vectors */
    if ( _16x16mv.as_mv.row < 0)
      _16x16mv.as_mv.row -= 1;
@ -417,16 +495,106 @@ void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
    uptr = x->pre.u_buffer + offset;
    vptr = x->pre.v_buffer + offset;

+#if CONFIG_SIXTEENTH_SUBPEL_UV
+    if ( _o16x16mv.as_int & 0x000f000f)
+    {
+        x->subpixel_predict8x8(uptr, pre_stride, _o16x16mv.as_mv.col & 15,  _o16x16mv.as_mv.row & 15, dst_u, dst_uvstride);
+        x->subpixel_predict8x8(vptr, pre_stride, _o16x16mv.as_mv.col & 15,  _o16x16mv.as_mv.row & 15, dst_v, dst_uvstride);
+    }
+#else  /* CONFIG_SIXTEENTH_SUBPEL_UV */
    if ( _16x16mv.as_int & 0x00070007)
    {
        x->subpixel_predict8x8(uptr, pre_stride, _16x16mv.as_mv.col & 7,  _16x16mv.as_mv.row & 7, dst_u, dst_uvstride);
        x->subpixel_predict8x8(vptr, pre_stride, _16x16mv.as_mv.col & 7,  _16x16mv.as_mv.row & 7, dst_v, dst_uvstride);
    }
+#endif  /* CONFIG_SIXTEENTH_SUBPEL_UV */
    else
    {
        RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, dst_u, dst_uvstride);
        RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, dst_v, dst_uvstride);
    }
+
+}
+
+/*
+ * This function should be called after an initial call to
+ * vp8_build_inter16x16_predictors_mb() or _mby()/_mbuv().
+ * It will run a second sixtap filter on a (different) ref
+ * frame and average the result with the output of the
+ * first sixtap filter. The second reference frame is stored
+ * in x->second_pre (the reference frame index is in
+ * x->mode_info_context->mbmi.second_ref_frame). The second
+ * motion vector is x->mode_info_context->mbmi.second_mv.
+ *
+ * This allows blending prediction from two reference frames
+ * which sometimes leads to better prediction than from a
+ * single reference framer.
+ */
+void vp8_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *x,
+                                            unsigned char *dst_y,
+                                            unsigned char *dst_u,
+                                            unsigned char *dst_v,
+                                            int dst_ystride,
+                                            int dst_uvstride)
+{
+    int offset;
+    unsigned char *ptr;
+    unsigned char *uptr, *vptr;
+
+    int mv_row = x->mode_info_context->mbmi.second_mv.as_mv.row;
+    int mv_col = x->mode_info_context->mbmi.second_mv.as_mv.col;
+    int omv_row, omv_col;
+
+    unsigned char *ptr_base = x->second_pre.y_buffer;
+    int pre_stride = x->block[0].pre_stride;
+
+    ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
+
+    if ((mv_row | mv_col) & 7)
+    {
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+        x->subpixel_predict_avg16x16(ptr, pre_stride, (mv_col & 7)<<1, (mv_row & 7)<<1, dst_y, dst_ystride);
+#else
+        x->subpixel_predict_avg16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_y, dst_ystride);
+#endif
+    }
+    else
+    {
+        RECON_INVOKE(&x->rtcd->recon, avg16x16)(ptr, pre_stride, dst_y, dst_ystride);
+    }
+
+    /* calc uv motion vectors */
+    omv_row = mv_row;
+    omv_col = mv_col;
+    mv_row = (mv_row + (mv_row > 0)) >> 1;
+    mv_col = (mv_col + (mv_col > 0)) >> 1;
+
+    mv_row &= x->fullpixel_mask;
+    mv_col &= x->fullpixel_mask;
+
+    pre_stride >>= 1;
+    offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
+    uptr = x->second_pre.u_buffer + offset;
+    vptr = x->second_pre.v_buffer + offset;
+
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+    if ((omv_row | omv_col) & 15)
+    {
+        x->subpixel_predict_avg8x8(uptr, pre_stride, omv_col & 15, omv_row & 15, dst_u, dst_uvstride);
+        x->subpixel_predict_avg8x8(vptr, pre_stride, omv_col & 15, omv_row & 15, dst_v, dst_uvstride);
+    }
+#else  /* CONFIG_SIXTEENTH_SUBPEL_UV */
+    if ((mv_row | mv_col) & 7)
+    {
+        x->subpixel_predict_avg8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, dst_u, dst_uvstride);
+        x->subpixel_predict_avg8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, dst_v, dst_uvstride);
+    }
+#endif  /* CONFIG_SIXTEENTH_SUBPEL_UV */
+    else
+    {
+        RECON_INVOKE(&x->rtcd->recon, avg8x8)(uptr, pre_stride, dst_u, dst_uvstride);
+        RECON_INVOKE(&x->rtcd->recon, avg8x8)(vptr, pre_stride, dst_v, dst_uvstride);
+    }
 }

 static void build_inter4x4_predictors_mb(MACROBLOCKD *x)
@ -439,6 +607,7 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *x)
        x->block[ 2].bmi = x->mode_info_context->bmi[ 2];
        x->block[ 8].bmi = x->mode_info_context->bmi[ 8];
        x->block[10].bmi = x->mode_info_context->bmi[10];
+
        if (x->mode_info_context->mbmi.need_to_clamp_mvs)
        {
            clamp_mv_to_umv_border(&x->block[ 0].bmi.mv.as_mv, x);
@ -447,6 +616,7 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *x)
            clamp_mv_to_umv_border(&x->block[10].bmi.mv.as_mv, x);
        }

+
        build_inter_predictors4b(x, &x->block[ 0], 16);
        build_inter_predictors4b(x, &x->block[ 2], 16);
        build_inter_predictors4b(x, &x->block[ 8], 16);
@ -461,6 +631,7 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *x)

            x->block[i+0].bmi = x->mode_info_context->bmi[i+0];
            x->block[i+1].bmi = x->mode_info_context->bmi[i+1];
+
            if (x->mode_info_context->mbmi.need_to_clamp_mvs)
            {
                clamp_mv_to_umv_border(&x->block[i+0].bmi.mv.as_mv, x);
@ -484,8 +655,6 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *x)
        BLOCKD *d0 = &x->block[i];
        BLOCKD *d1 = &x->block[i+1];

-        /* Note: uv mvs already clamped in build_4x4uvmvs() */
-
        if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
            build_inter_predictors2b(x, d0, 8);
        else
@ -531,6 +700,9 @@ void build_4x4uvmvs(MACROBLOCKD *x)

            x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;

+            if (x->mode_info_context->mbmi.need_to_clamp_mvs)
+                clamp_uvmv_to_umv_border(&x->block[uoffset].bmi.mv.as_mv, x);
+
            if (x->mode_info_context->mbmi.need_to_clamp_mvs)
                clamp_uvmv_to_umv_border(&x->block[uoffset].bmi.mv.as_mv, x);

@ -548,6 +720,16 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
    {
        vp8_build_inter16x16_predictors_mb(x, x->predictor, &x->predictor[256],
                                           &x->predictor[320], 16, 8);
+
+        if (x->mode_info_context->mbmi.second_ref_frame)
+        {
+            /* 256 = offset of U plane in Y+U+V buffer;
+             * 320 = offset of V plane in Y+U+V buffer.
+             * (256=16x16, 320=16x16+8x8). */
+            vp8_build_2nd_inter16x16_predictors_mb(x, x->predictor,
+                                                   &x->predictor[256],
+                                                   &x->predictor[320], 16, 8);
+        }
    }
    else
    {
@ -555,4 +737,3 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
        build_inter4x4_predictors_mb(x);
    }
 }
-
--- a/vp8/common/reconinter.h
+++ b/vp8/common/reconinter.h
@ -19,6 +19,12 @@ extern void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
                                               unsigned char *dst_v,
                                               int dst_ystride,
                                               int dst_uvstride);
+extern void vp8_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *x,
+                                                   unsigned char *dst_y,
+                                                   unsigned char *dst_u,
+                                                   unsigned char *dst_v,
+                                                   int dst_ystride,
+                                                   int dst_uvstride);


 extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x);
--- a/vp8/common/reconintra.c
+++ b/vp8/common/reconintra.c
@ -9,7 +9,7 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "recon.h"
 #include "reconintra.h"
 #include "vpx_mem/vpx_mem.h"
@ -28,13 +28,12 @@ void vp8_recon_intra_mbuv(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
    }
 }

-void vp8_build_intra_predictors_mby(MACROBLOCKD *x)
+void vp8_build_intra_predictors_mby_internal(MACROBLOCKD *x, unsigned char *ypred_ptr, int y_stride, int mode)
 {

    unsigned char *yabove_row = x->dst.y_buffer - x->dst.y_stride;
    unsigned char yleft_col[16];
    unsigned char ytop_left = yabove_row[-1];
-    unsigned char *ypred_ptr = x->predictor;
    int r, c, i;

    for (i = 0; i < 16; i++)
@ -43,7 +42,7 @@ void vp8_build_intra_predictors_mby(MACROBLOCKD *x)
    }

    /* for Y */
-    switch (x->mode_info_context->mbmi.mode)
+    switch (mode)
    {
    case DC_PRED:
    {
@ -70,11 +69,7 @@ void vp8_build_intra_predictors_mby(MACROBLOCKD *x)
                {
                    average += yleft_col[i];
                }
-
            }
-
-
-
            shift = 3 + x->up_available + x->left_available;
            expected_dc = (average + (1 << (shift - 1))) >> shift;
        }
@ -83,128 +78,6 @@ void vp8_build_intra_predictors_mby(MACROBLOCKD *x)
            expected_dc = 128;
        }

-        vpx_memset(ypred_ptr, expected_dc, 256);
-    }
-    break;
-    case V_PRED:
-    {
-
-        for (r = 0; r < 16; r++)
-        {
-
-            ((int *)ypred_ptr)[0] = ((int *)yabove_row)[0];
-            ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1];
-            ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2];
-            ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3];
-            ypred_ptr += 16;
-        }
-    }
-    break;
-    case H_PRED:
-    {
-
-        for (r = 0; r < 16; r++)
-        {
-
-            vpx_memset(ypred_ptr, yleft_col[r], 16);
-            ypred_ptr += 16;
-        }
-
-    }
-    break;
-    case TM_PRED:
-    {
-
-        for (r = 0; r < 16; r++)
-        {
-            for (c = 0; c < 16; c++)
-            {
-                int pred =  yleft_col[r] + yabove_row[ c] - ytop_left;
-
-                if (pred < 0)
-                    pred = 0;
-
-                if (pred > 255)
-                    pred = 255;
-
-                ypred_ptr[c] = pred;
-            }
-
-            ypred_ptr += 16;
-        }
-
-    }
-    break;
-    case B_PRED:
-    case NEARESTMV:
-    case NEARMV:
-    case ZEROMV:
-    case NEWMV:
-    case SPLITMV:
-    case MB_MODE_COUNT:
-        break;
-    }
-}
-
-void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
-{
-
-    unsigned char *yabove_row = x->dst.y_buffer - x->dst.y_stride;
-    unsigned char yleft_col[16];
-    unsigned char ytop_left = yabove_row[-1];
-    unsigned char *ypred_ptr = x->predictor;
-    int r, c, i;
-
-    int y_stride = x->dst.y_stride;
-    ypred_ptr = x->dst.y_buffer; /*x->predictor;*/
-
-    for (i = 0; i < 16; i++)
-    {
-        yleft_col[i] = x->dst.y_buffer [i* x->dst.y_stride -1];
-    }
-
-    /* for Y */
-    switch (x->mode_info_context->mbmi.mode)
-    {
-    case DC_PRED:
-    {
-        int expected_dc;
-        int i;
-        int shift;
-        int average = 0;
-
-
-        if (x->up_available || x->left_available)
-        {
-            if (x->up_available)
-            {
-                for (i = 0; i < 16; i++)
-                {
-                    average += yabove_row[i];
-                }
-            }
-
-            if (x->left_available)
-            {
-
-                for (i = 0; i < 16; i++)
-                {
-                    average += yleft_col[i];
-                }
-
-            }
-
-
-
-            shift = 3 + x->up_available + x->left_available;
-            expected_dc = (average + (1 << (shift - 1))) >> shift;
-        }
-        else
-        {
-            expected_dc = 128;
-        }
-
-        /*vpx_memset(ypred_ptr, expected_dc, 256);*/
        for (r = 0; r < 16; r++)
        {
            vpx_memset(ypred_ptr, expected_dc, 16);
@ -222,7 +95,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
            ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1];
            ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2];
            ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3];
-            ypred_ptr += y_stride; /*16;*/
+            ypred_ptr += y_stride;
        }
    }
    break;
@ -233,7 +106,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
        {

            vpx_memset(ypred_ptr, yleft_col[r], 16);
-            ypred_ptr += y_stride;  /*16;*/
+            ypred_ptr += y_stride;
        }

    }
@ -256,11 +129,14 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
                ypred_ptr[c] = pred;
            }

-            ypred_ptr += y_stride;  /*16;*/
+            ypred_ptr += y_stride;
        }

    }
    break;
+#if CONIFG_I8X8
+    case I8X8_PRED:
+#endif
    case B_PRED:
    case NEARESTMV:
    case NEARMV:
@ -272,145 +148,41 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
    }
 }

-void vp8_build_intra_predictors_mbuv(MACROBLOCKD *x)
+void vp8_build_intra_predictors_mby(MACROBLOCKD *x)
 {
-    unsigned char *uabove_row = x->dst.u_buffer - x->dst.uv_stride;
-    unsigned char uleft_col[16];
-    unsigned char utop_left = uabove_row[-1];
-    unsigned char *vabove_row = x->dst.v_buffer - x->dst.uv_stride;
-    unsigned char vleft_col[20];
-    unsigned char vtop_left = vabove_row[-1];
-    unsigned char *upred_ptr = &x->predictor[256];
-    unsigned char *vpred_ptr = &x->predictor[320];
-    int i, j;
-
-    for (i = 0; i < 8; i++)
-    {
-        uleft_col[i] = x->dst.u_buffer [i* x->dst.uv_stride -1];
-        vleft_col[i] = x->dst.v_buffer [i* x->dst.uv_stride -1];
-    }
-
-    switch (x->mode_info_context->mbmi.uv_mode)
-    {
-    case DC_PRED:
-    {
-        int expected_udc;
-        int expected_vdc;
-        int i;
-        int shift;
-        int Uaverage = 0;
-        int Vaverage = 0;
-
-        if (x->up_available)
-        {
-            for (i = 0; i < 8; i++)
-            {
-                Uaverage += uabove_row[i];
-                Vaverage += vabove_row[i];
-            }
-        }
-
-        if (x->left_available)
-        {
-            for (i = 0; i < 8; i++)
-            {
-                Uaverage += uleft_col[i];
-                Vaverage += vleft_col[i];
-            }
-        }
-
-        if (!x->up_available && !x->left_available)
-        {
-            expected_udc = 128;
-            expected_vdc = 128;
-        }
-        else
-        {
-            shift = 2 + x->up_available + x->left_available;
-            expected_udc = (Uaverage + (1 << (shift - 1))) >> shift;
-            expected_vdc = (Vaverage + (1 << (shift - 1))) >> shift;
-        }
-
-
-        vpx_memset(upred_ptr, expected_udc, 64);
-        vpx_memset(vpred_ptr, expected_vdc, 64);
-
-
-    }
-    break;
-    case V_PRED:
-    {
-        int i;
-
-        for (i = 0; i < 8; i++)
-        {
-            vpx_memcpy(upred_ptr, uabove_row, 8);
-            vpx_memcpy(vpred_ptr, vabove_row, 8);
-            upred_ptr += 8;
-            vpred_ptr += 8;
-        }
-
-    }
-    break;
-    case H_PRED:
-    {
-        int i;
-
-        for (i = 0; i < 8; i++)
-        {
-            vpx_memset(upred_ptr, uleft_col[i], 8);
-            vpx_memset(vpred_ptr, vleft_col[i], 8);
-            upred_ptr += 8;
-            vpred_ptr += 8;
-        }
-    }
-
-    break;
-    case TM_PRED:
-    {
-        int i;
-
-        for (i = 0; i < 8; i++)
-        {
-            for (j = 0; j < 8; j++)
-            {
-                int predu = uleft_col[i] + uabove_row[j] - utop_left;
-                int predv = vleft_col[i] + vabove_row[j] - vtop_left;
-
-                if (predu < 0)
-                    predu = 0;
-
-                if (predu > 255)
-                    predu = 255;
-
-                if (predv < 0)
-                    predv = 0;
-
-                if (predv > 255)
-                    predv = 255;
-
-                upred_ptr[j] = predu;
-                vpred_ptr[j] = predv;
-            }
-
-            upred_ptr += 8;
-            vpred_ptr += 8;
-        }
-
-    }
-    break;
-    case B_PRED:
-    case NEARESTMV:
-    case NEARMV:
-    case ZEROMV:
-    case NEWMV:
-    case SPLITMV:
-    case MB_MODE_COUNT:
-        break;
-    }
+    vp8_build_intra_predictors_mby_internal(x, x->predictor, 16,
+                                            x->mode_info_context->mbmi.mode);
 }

-void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
+void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
+{
+    vp8_build_intra_predictors_mby_internal(x, x->dst.y_buffer, x->dst.y_stride,
+                                            x->mode_info_context->mbmi.mode);
+}
+
+#if CONFIG_COMP_INTRA_PRED
+void vp8_build_comp_intra_predictors_mby(MACROBLOCKD *x)
+{
+    unsigned char predictor[2][256];
+    int i;
+
+    vp8_build_intra_predictors_mby_internal(x, predictor[0], 16,
+                                            x->mode_info_context->mbmi.mode);
+    vp8_build_intra_predictors_mby_internal(x, predictor[1], 16,
+                                            x->mode_info_context->mbmi.second_mode);
+
+    for (i = 0; i < 256; i++)
+    {
+        x->predictor[i] = (predictor[0][i] + predictor[1][i] + 1) >> 1;
+    }
+}
+#endif
+
+void vp8_build_intra_predictors_mbuv_internal(MACROBLOCKD *x,
+                                              unsigned char *upred_ptr,
+                                              unsigned char *vpred_ptr,
+                                              int uv_stride,
+                                              int mode)
 {
    unsigned char *uabove_row = x->dst.u_buffer - x->dst.uv_stride;
    unsigned char uleft_col[16];
@ -418,9 +190,6 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
    unsigned char *vabove_row = x->dst.v_buffer - x->dst.uv_stride;
    unsigned char vleft_col[20];
    unsigned char vtop_left = vabove_row[-1];
-    unsigned char *upred_ptr = x->dst.u_buffer; /*&x->predictor[256];*/
-    unsigned char *vpred_ptr = x->dst.v_buffer; /*&x->predictor[320];*/
-    int uv_stride = x->dst.uv_stride;

    int i, j;

@ -430,7 +199,7 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
        vleft_col[i] = x->dst.v_buffer [i* x->dst.uv_stride -1];
    }

-    switch (x->mode_info_context->mbmi.uv_mode)
+    switch (mode)
    {
    case DC_PRED:
    {
@ -554,3 +323,261 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
        break;
    }
 }
+
+void vp8_build_intra_predictors_mbuv(MACROBLOCKD *x)
+{
+    vp8_build_intra_predictors_mbuv_internal(x,
+                                             &x->predictor[256],
+                                             &x->predictor[320],
+                                             8,
+                                             x->mode_info_context->mbmi.uv_mode);
+}
+
+void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
+{
+    vp8_build_intra_predictors_mbuv_internal(x,
+                                             x->dst.u_buffer,
+                                             x->dst.v_buffer,
+                                             x->dst.uv_stride,
+                                             x->mode_info_context->mbmi.uv_mode);
+}
+
+#if CONFIG_COMP_INTRA_PRED
+void vp8_build_comp_intra_predictors_mbuv(MACROBLOCKD *x)
+{
+    unsigned char predictor[2][2][64];
+    int i;
+
+    vp8_build_intra_predictors_mbuv_internal(x, predictor[0][0], predictor[1][0], 8,
+                                             x->mode_info_context->mbmi.uv_mode);
+    vp8_build_intra_predictors_mbuv_internal(x, predictor[0][1], predictor[1][1], 8,
+                                             x->mode_info_context->mbmi.second_uv_mode);
+    for (i = 0; i < 64; i++)
+    {
+        x->predictor[256 + i] = (predictor[0][0][i] + predictor[0][1][i] + 1) >> 1;
+        x->predictor[256 + 64 + i] = (predictor[1][0][i] + predictor[1][1][i] + 1) >> 1;
+    }
+}
+#endif
+
+void vp8_intra8x8_predict(BLOCKD *x,
+                          int mode,
+                          unsigned char *predictor)
+{
+
+    unsigned char *yabove_row = *(x->base_dst) + x->dst - x->dst_stride;
+    unsigned char yleft_col[8];
+    unsigned char ytop_left = yabove_row[-1];
+    int r, c, i;
+
+    for (i = 0; i < 8; i++)
+    {
+        yleft_col[i] = (*(x->base_dst))[x->dst - 1 + i * x->dst_stride];
+    }
+    switch (mode)
+    {
+    case DC_PRED:
+        {
+            int expected_dc = 0;
+
+            for (i = 0; i < 8; i++)
+            {
+                expected_dc += yabove_row[i];
+                expected_dc += yleft_col[i];
+            }
+            expected_dc = (expected_dc + 8) >> 4;
+
+            for (r = 0; r < 8; r++)
+            {
+                for (c = 0; c < 8; c++)
+                {
+                    predictor[c] = expected_dc;
+                }
+                predictor += 16;
+            }
+        }
+        break;
+    case V_PRED:
+        {
+            for (r = 0; r < 8; r++)
+            {
+                for (c = 0; c < 8; c++)
+                {
+
+                    predictor[c] = yabove_row[c];
+                }
+                predictor += 16;
+            }
+
+        }
+        break;
+    case H_PRED:
+        {
+
+            for (r = 0; r < 8; r++)
+            {
+                for (c = 0; c < 8; c++)
+                {
+                    predictor[c] = yleft_col[r];
+                }
+                predictor += 16;
+            }
+        }
+        break;
+    case TM_PRED:
+        {
+            /* prediction similar to true_motion prediction */
+            for (r = 0; r < 8; r++)
+            {
+                for (c = 0; c < 8; c++)
+                {
+                    int pred = yabove_row[c] - ytop_left + yleft_col[r];
+                    if (pred < 0)
+                        pred = 0;
+
+                    if (pred > 255)
+                        pred = 255;
+                    predictor[c] = pred;
+                }
+
+                predictor += 16;
+            }
+        }
+        break;
+    }
+}
+
+#if CONFIG_COMP_INTRA_PRED
+void vp8_comp_intra8x8_predict(BLOCKD *x,
+                               int mode, int second_mode,
+                               unsigned char *out_predictor)
+{
+    
+    unsigned char predictor[2][8*16];
+    int i, j;
+
+    vp8_intra8x8_predict(x, mode, predictor[0]);
+    vp8_intra8x8_predict(x, second_mode, predictor[1]);
+
+    for (i = 0; i < 8*16; i += 16)
+    {
+        for (j = i; j < i + 8; j++)
+        {
+            out_predictor[j] = (predictor[0][j] + predictor[1][j] + 1) >> 1;
+        }
+    }
+}
+#endif
+
+void vp8_intra_uv4x4_predict(BLOCKD *x,
+                             int mode,
+                             unsigned char *predictor)
+{
+
+    unsigned char *above_row = *(x->base_dst) + x->dst - x->dst_stride;
+    unsigned char left_col[4];
+    unsigned char top_left = above_row[-1];
+    int r, c, i;
+
+    for (i = 0; i < 4; i++)
+    {
+        left_col[i] = (*(x->base_dst))[x->dst - 1 + i * x->dst_stride];
+    }
+    switch (mode)
+    {
+    case DC_PRED:
+        {
+            int expected_dc = 0;
+
+            for (i = 0; i < 4; i++)
+            {
+                expected_dc += above_row[i];
+                expected_dc += left_col[i];
+            }
+            expected_dc = (expected_dc + 4) >> 3;
+
+            for (r = 0; r < 4; r++)
+            {
+                for (c = 0; c < 4; c++)
+                {
+                    predictor[c] = expected_dc;
+                }
+                predictor += 8;
+            }
+        }
+        break;
+    case V_PRED:
+        {
+            for (r = 0; r < 4; r++)
+            {
+                for (c = 0; c < 4; c++)
+                {
+
+                    predictor[c] = above_row[c];
+                }
+                predictor += 8;
+            }
+
+        }
+        break;
+    case H_PRED:
+        {
+
+            for (r = 0; r < 4; r++)
+            {
+                for (c = 0; c < 4; c++)
+                {
+                    predictor[c] = left_col[r];
+                }
+                predictor += 8;
+            }
+        }
+        break;
+    case TM_PRED:
+        {
+            /* prediction similar to true_motion prediction */
+            for (r = 0; r < 4; r++)
+            {
+                for (c = 0; c < 4; c++)
+                {
+                    int pred = above_row[c] - top_left + left_col[r];
+                    if (pred < 0)
+                        pred = 0;
+
+                    if (pred > 255)
+                        pred = 255;
+                    predictor[c] = pred;
+                }
+
+                predictor += 8;
+            }
+        }
+        break;
+    }
+}
+
+#if CONFIG_COMP_INTRA_PRED
+void vp8_comp_intra_uv4x4_predict(BLOCKD *x,
+                                  int mode, int mode2,
+                                  unsigned char *out_predictor)
+{
+    unsigned char predictor[2][8*4];
+    int i, j;
+
+    vp8_intra_uv4x4_predict(x, mode, predictor[0]);
+    vp8_intra_uv4x4_predict(x, mode2, predictor[1]);
+
+    for (i = 0; i < 4*8; i += 8)
+    {
+        for (j = i; j < i + 4; j++)
+        {
+            out_predictor[j] = (predictor[0][j] + predictor[1][j] + 1) >> 1;
+        }
+    }
+}
+#endif
+
+/* TODO: try different ways of use Y-UV mode correlation
+ Current code assumes that a uv 4x4 block use same mode
+ as corresponding Y 8x8 area
+ */
--- a/vp8/common/reconintra4x4.c
+++ b/vp8/common/reconintra4x4.c
@ -9,7 +9,7 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "recon.h"
 #include "vpx_mem/vpx_mem.h"
 #include "reconintra.h"
@ -295,6 +295,28 @@ void vp8_intra4x4_predict(BLOCKD *x,

    }
 }
+
+#if CONFIG_COMP_INTRA_PRED
+void vp8_comp_intra4x4_predict(BLOCKD *x,
+                               int b_mode, int b_mode2,
+                               unsigned char *out_predictor)
+{
+    unsigned char predictor[2][4*16];
+    int i, j;
+
+    vp8_intra4x4_predict(x, b_mode, predictor[0]);
+    vp8_intra4x4_predict(x, b_mode2, predictor[1]);
+
+    for (i = 0; i < 16*4; i += 16)
+    {
+        for (j = i; j < i + 4; j++)
+        {
+            out_predictor[j] = (predictor[0][j] + predictor[1][j] + 1) >> 1;
+        }
+    }
+}
+#endif
+
 /* copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and
 * to the right prediction have filled in pixels to use.
 */
--- a/vp8/common/rotate.h
+++ b/vp8/common/rotate.h
--- a/vp8/common/rotate2.h
+++ b/vp8/common/rotate2.h
--- a/vp8/common/seg_common.c
+++ b/vp8/common/seg_common.c
@ -0,0 +1,167 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp8/common/seg_common.h"
+
+const int segfeaturedata_signed[SEG_LVL_MAX] = {1, 1, 0, 0, 0, 0};
+const int vp8_seg_feature_data_bits[SEG_LVL_MAX] =
+              {QINDEX_BITS, 6, 4, 4, 6, 2};
+
+// These functions provide access to new segment level features.
+// Eventually these function may be "optimized out" but for the moment,
+// the coding mechanism is still subject to change so these provide a
+// convenient single point of change.
+
+int segfeature_active( MACROBLOCKD *xd,
+                       int segment_id,
+                       SEG_LVL_FEATURES feature_id )
+{
+    // Return true if mask bit set and segmentation enabled.
+    return ( xd->segmentation_enabled &&
+             ( xd->segment_feature_mask[segment_id] &
+               (0x01 << feature_id) ) );
+}
+
+void clearall_segfeatures( MACROBLOCKD *xd )
+{
+     vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
+     vpx_memset(xd->segment_feature_mask, 0, sizeof(xd->segment_feature_mask));
+}
+
+void enable_segfeature( MACROBLOCKD *xd,
+                        int segment_id,
+                        SEG_LVL_FEATURES feature_id )
+{
+     xd->segment_feature_mask[segment_id] |= (0x01 << feature_id);
+}
+
+void disable_segfeature( MACROBLOCKD *xd,
+                         int segment_id,
+                         SEG_LVL_FEATURES feature_id )
+{
+     xd->segment_feature_mask[segment_id] &= ~(1 << feature_id);
+}
+
+int seg_feature_data_bits( SEG_LVL_FEATURES feature_id )
+{
+    return vp8_seg_feature_data_bits[feature_id];
+}
+
+int is_segfeature_signed( SEG_LVL_FEATURES feature_id )
+{
+    return ( segfeaturedata_signed[feature_id] );
+}
+
+void clear_segdata( MACROBLOCKD *xd,
+                    int segment_id,
+                    SEG_LVL_FEATURES feature_id)
+{
+    xd->segment_feature_data[segment_id][feature_id] = 0;
+}
+
+void set_segdata( MACROBLOCKD *xd,
+                  int segment_id,
+                  SEG_LVL_FEATURES feature_id,
+                  int seg_data )
+{
+    xd->segment_feature_data[segment_id][feature_id] = seg_data;
+}
+
+int get_segdata( MACROBLOCKD *xd,
+                 int segment_id,
+                 SEG_LVL_FEATURES feature_id )
+{
+    return xd->segment_feature_data[segment_id][feature_id];
+}
+#if CONFIG_FEATUREUPDATES
+int old_segfeature_active( MACROBLOCKD *xd,
+                           int segment_id,
+                           SEG_LVL_FEATURES feature_id )
+{
+    // Return true if mask bit set and segmentation enabled.
+    return ( xd->segmentation_enabled &&
+             ( xd->old_segment_feature_mask[segment_id] &
+               (0x01 << feature_id) ) );
+}
+
+int get_old_segdata( MACROBLOCKD *xd,
+                     int segment_id,
+                     SEG_LVL_FEATURES feature_id )
+{
+    return xd->old_segment_feature_data[segment_id][feature_id];
+}
+
+int segfeature_changed( MACROBLOCKD *xd,
+                        int segment_id,
+                        SEG_LVL_FEATURES feature_id )
+{
+    // Return true if mask bit or data is different from last time
+    return
+      ( xd->segmentation_enabled &&
+         (
+           (xd->old_segment_feature_mask[segment_id] & (1 << feature_id) ) !=
+           (xd->segment_feature_mask[segment_id] & (1 << feature_id) )
+        ||  xd->old_segment_feature_data[segment_id][feature_id] !=
+            xd->segment_feature_data[segment_id][feature_id]
+         )
+      );
+}
+
+void save_segment_info ( MACROBLOCKD *xd )
+{
+    int i,j;
+    for (i = 0; i < MAX_MB_SEGMENTS; i++)
+    {
+        xd->old_segment_feature_mask[i] = xd->segment_feature_mask[i];
+
+        // For each segmentation codable feature...
+        for (j = 0; j < SEG_LVL_MAX; j++)
+        {
+            xd->old_segment_feature_data[i][j]=xd->segment_feature_data[i][j];
+
+        }
+    }
+}
+#endif
+void clear_segref( MACROBLOCKD *xd, int segment_id )
+{
+    xd->segment_feature_data[segment_id][SEG_LVL_REF_FRAME] = 0;
+}
+
+void set_segref( MACROBLOCKD *xd,
+                 int segment_id,
+                 MV_REFERENCE_FRAME ref_frame )
+{
+    xd->segment_feature_data[segment_id][SEG_LVL_REF_FRAME] |=
+        (1 << ref_frame);
+}
+
+int check_segref( MACROBLOCKD *xd,
+                  int segment_id,
+                  MV_REFERENCE_FRAME ref_frame )
+{
+    return ( xd->segment_feature_data[segment_id][SEG_LVL_REF_FRAME] &
+             (1 << ref_frame) ) ? 1 : 0;
+}
+
+int check_segref_inter(MACROBLOCKD *xd, int segment_id)
+{
+    return ( xd->segment_feature_data[segment_id][SEG_LVL_REF_FRAME] &
+             ~(1 << INTRA_FRAME) ) ? 1 : 0;
+}
+
+int get_seg_tx_type(MACROBLOCKD *xd, int segment_id)
+{
+    if ( segfeature_active(xd, segment_id, SEG_LVL_TRANSFORM) )
+        return get_segdata(xd, segment_id, SEG_LVL_TRANSFORM);
+    else
+        return TX_4X4;
+}
+// TBD? Functions to read and write segment data with range / validity checking
--- a/vp8/common/seg_common.h
+++ b/vp8/common/seg_common.h
@ -0,0 +1,85 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "type_aliases.h"
+#include "onyxc_int.h"
+#include "vp8/common/blockd.h"
+
+#ifndef __INC_SEG_COMMON_H__
+#define __INC_SEG_COMMON_H__ 1
+
+int segfeature_active( MACROBLOCKD *xd,
+                       int segment_id,
+                       SEG_LVL_FEATURES feature_id );
+
+void clearall_segfeatures( MACROBLOCKD *xd );
+
+void enable_segfeature( MACROBLOCKD *xd,
+                        int segment_id,
+                        SEG_LVL_FEATURES feature_id );
+
+void disable_segfeature( MACROBLOCKD *xd,
+                         int segment_id,
+                         SEG_LVL_FEATURES feature_id );
+
+int seg_feature_data_bits( SEG_LVL_FEATURES feature_id );
+
+int is_segfeature_signed( SEG_LVL_FEATURES feature_id );
+
+void clear_segdata( MACROBLOCKD *xd,
+                    int segment_id,
+                    SEG_LVL_FEATURES feature_id);
+
+void set_segdata( MACROBLOCKD *xd,
+                  int segment_id,
+                  SEG_LVL_FEATURES feature_id,
+                  int seg_data );
+
+int get_segdata( MACROBLOCKD *xd,
+                 int segment_id,
+                 SEG_LVL_FEATURES feature_id );
+
+#if CONFIG_FEATUREUPDATES
+
+int old_segfeature_active( MACROBLOCKD *xd,
+                           int segment_id,
+                           SEG_LVL_FEATURES feature_id );
+
+int get_old_segdata( MACROBLOCKD *xd,
+                     int segment_id,
+                     SEG_LVL_FEATURES feature_id );
+
+void save_segment_info ( MACROBLOCKD *xd );
+
+int segfeature_changed( MACROBLOCKD *xd,
+                        int segment_id,
+                        SEG_LVL_FEATURES feature_id );
+
+
+
+#endif
+
+
+void clear_segref( MACROBLOCKD *xd, int segment_id );
+
+void set_segref( MACROBLOCKD *xd,
+                 int segment_id,
+                 MV_REFERENCE_FRAME ref_frame );
+
+int check_segref( MACROBLOCKD *xd,
+                  int segment_id,
+                  MV_REFERENCE_FRAME ref_frame );
+
+int check_segref_inter(MACROBLOCKD *xd, int segment_id);
+
+int get_seg_tx_type(MACROBLOCKD *xd, int segment_id);
+
+#endif /* __INC_SEG_COMMON_H__ */
+
--- a/vp8/common/subpixel.h
+++ b/vp8/common/subpixel.h
@ -34,6 +34,15 @@ extern prototype_subpixel_predict(vp8_subpix_sixtap16x16);
 #endif
 extern prototype_subpixel_predict(vp8_subpix_sixtap8x8);

+#ifndef vp8_subpix_sixtap_avg16x16
+#define vp8_subpix_sixtap_avg16x16 vp8_sixtap_predict_avg16x16_c
+#endif
+extern prototype_subpixel_predict(vp8_subpix_sixtap_avg16x16);
+
+#ifndef vp8_subpix_sixtap_avg8x8
+#define vp8_subpix_sixtap_avg8x8 vp8_sixtap_predict_avg8x8_c
+#endif
+extern prototype_subpixel_predict(vp8_subpix_sixtap_avg8x8);
 #ifndef vp8_subpix_sixtap8x4
 #define vp8_subpix_sixtap8x4 vp8_sixtap_predict8x4_c
 #endif
@ -54,6 +63,16 @@ extern prototype_subpixel_predict(vp8_subpix_bilinear16x16);
 #endif
 extern prototype_subpixel_predict(vp8_subpix_bilinear8x8);

+#ifndef vp8_subpix_bilinear_avg16x16
+#define vp8_subpix_bilinear_avg16x16 vp8_bilinear_predict_avg16x16_c
+#endif
+extern prototype_subpixel_predict(vp8_subpix_bilinear_avg16x16);
+
+#ifndef vp8_subpix_bilinear_avg8x8
+#define vp8_subpix_bilinear_avg8x8 vp8_bilinear_predict_avg8x8_c
+#endif
+extern prototype_subpixel_predict(vp8_subpix_bilinear_avg8x8);
+
 #ifndef vp8_subpix_bilinear8x4
 #define vp8_subpix_bilinear8x4 vp8_bilinear_predict8x4_c
 #endif
@ -69,10 +88,14 @@ typedef struct
 {
    vp8_subpix_fn_t  sixtap16x16;
    vp8_subpix_fn_t  sixtap8x8;
+    vp8_subpix_fn_t  sixtap_avg16x16;
+    vp8_subpix_fn_t  sixtap_avg8x8;
    vp8_subpix_fn_t  sixtap8x4;
    vp8_subpix_fn_t  sixtap4x4;
    vp8_subpix_fn_t  bilinear16x16;
    vp8_subpix_fn_t  bilinear8x8;
+    vp8_subpix_fn_t  bilinear_avg16x16;
+    vp8_subpix_fn_t  bilinear_avg8x8;
    vp8_subpix_fn_t  bilinear8x4;
    vp8_subpix_fn_t  bilinear4x4;
 } vp8_subpix_rtcd_vtable_t;
--- a/vp8/common/systemdependent.h
+++ b/vp8/common/systemdependent.h
@ -9,7 +9,7 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #if ARCH_X86 || ARCH_X86_64
 void vpx_reset_mmx_state(void);
 #define vp8_clear_system_state() vpx_reset_mmx_state()
--- a/vp8/common/tapify.py
+++ b/vp8/common/tapify.py
@ -0,0 +1,106 @@
+"""
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+"""
+#!/usr/bin/env python
+import sys,string,os,re,math,numpy
+scale = 2**16
+def dist(p1,p2):
+  x1,y1 = p1
+  x2,y2 = p2
+  if x1==x2 and y1==y2 :
+    return 1.0 
+  return 1/ math.sqrt((x1-x2)*(x1-x2)+(y1-y2)*(y1-y2))
+
+def gettaps(p):
+  def l(b):
+    return int(math.floor(b))
+  def h(b):
+    return int(math.ceil(b))
+  def t(b,p,s):
+    return int((scale*dist(b,p)+s/2)/s)
+  r,c = p
+  ul=[l(r),l(c)]
+  ur=[l(r),h(c)]
+  ll=[h(r),l(c)]
+  lr=[h(r),h(c)]
+  sum = dist(ul,p)+dist(ur,p)+dist(ll,p)+dist(lr,p)
+  t4 = scale - t(ul,p,sum) - t(ur,p,sum) - t(ll,p,sum);
+  return [[ul,t(ul,p,sum)],[ur,t(ur,p,sum)],
+          [ll,t(ll,p,sum)],[lr,t4]]
+
+def print_mb_taps(angle,blocksize):
+  theta = angle / 57.2957795;
+  affine = [[math.cos(theta),-math.sin(theta)],
+            [math.sin(theta),math.cos(theta)]]
+  radius = (float(blocksize)-1)/2
+  print " // angle of",angle,"degrees"
+  for y in range(blocksize) :
+    for x in range(blocksize) :
+      r,c = numpy.dot(affine,[y-radius, x-radius])
+      tps = gettaps([r+radius,c+radius])
+      for t in tps :
+        p,t = t
+        tr,tc = p
+        print " %2d, %2d, %5d, " % (tr,tc,t,),
+      print " // %2d,%2d " % (y,x)
+
+i=float(sys.argv[1])
+while  i <= float(sys.argv[2]) :
+  print_mb_taps(i,float(sys.argv[4]))
+  i=i+float(sys.argv[3])
+"""
+
+taps = []
+pt=dict()
+ptr=dict()
+for y in range(16) :
+  for x in range(16) :
+    r,c = numpy.dot(affine,[y-7.5, x-7.5])
+    tps = gettaps([r+7.5,c+7.5])
+    j=0
+    for tp in tps : 
+      p,i = tp
+      r,c = p
+      pt[y,x,j]= [p,i]
+      try: 
+        ptr[r,j,c].append([y,x])
+      except:
+        ptr[r,j,c]=[[y,x]]
+      j = j+1 
+
+for key in sorted(pt.keys()) :
+  print key,pt[key]
+
+lr = -99
+lj = -99 
+lc = 0
+
+shuf=""
+mask=""
+for r,j,c in sorted(ptr.keys()) :
+  for y,x in ptr[r,j,c] :
+    if lr != r or lj != j :
+      print "shuf_"+str(lr)+"_"+str(lj)+"_"+shuf.ljust(16,"0"), lc
+      shuf=""
+      lc = 0
+    for i in range(lc,c-1) :
+      shuf = shuf +"0"
+    shuf = shuf + hex(x)[2]
+    lc =c
+    break
+  lr = r
+  lj = j
+#  print r,j,c,ptr[r,j,c]    
+#  print 
+
+for r,j,c in sorted(ptr.keys()) :
+  for y,x in ptr[r,j,c] :
+    print r,j,c,y,x 
+    break
+"""
--- a/vp8/common/threading.h
+++ b/vp8/common/threading.h
@ -1,94 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef _PTHREAD_EMULATION
-#define _PTHREAD_EMULATION
-
-#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD
-
-/* Thread management macros */
-#ifdef _WIN32
-/* Win32 */
-#define _WIN32_WINNT 0x500 /* WINBASE.H - Enable signal_object_and_wait */
-#include <process.h>
-#include <windows.h>
-#define THREAD_FUNCTION DWORD WINAPI
-#define THREAD_FUNCTION_RETURN DWORD
-#define THREAD_SPECIFIC_INDEX DWORD
-#define pthread_t HANDLE
-#define pthread_attr_t DWORD
-#define pthread_create(thhandle,attr,thfunc,tharg) (int)((*thhandle=(HANDLE)_beginthreadex(NULL,0,(unsigned int (__stdcall *)(void *))thfunc,tharg,0,NULL))==NULL)
-#define pthread_join(thread, result) ((WaitForSingleObject((thread),INFINITE)!=WAIT_OBJECT_0) || !CloseHandle(thread))
-#define pthread_detach(thread) if(thread!=NULL)CloseHandle(thread)
-#define thread_sleep(nms) Sleep(nms)
-#define pthread_cancel(thread) terminate_thread(thread,0)
-#define ts_key_create(ts_key, destructor) {ts_key = TlsAlloc();};
-#define pthread_getspecific(ts_key) TlsGetValue(ts_key)
-#define pthread_setspecific(ts_key, value) TlsSetValue(ts_key, (void *)value)
-#define pthread_self() GetCurrentThreadId()
-#else
-#ifdef __APPLE__
-#include <mach/mach_init.h>
-#include <mach/semaphore.h>
-#include <mach/task.h>
-#include <time.h>
-#include <unistd.h>
-
-#else
-#include <semaphore.h>
-#endif
-
-#include <pthread.h>
-/* pthreads */
-/* Nearly everything is already defined */
-#define THREAD_FUNCTION void *
-#define THREAD_FUNCTION_RETURN void *
-#define THREAD_SPECIFIC_INDEX pthread_key_t
-#define ts_key_create(ts_key, destructor) pthread_key_create (&(ts_key), destructor);
-#endif
-
-/* Syncrhronization macros: Win32 and Pthreads */
-#ifdef _WIN32
-#define sem_t HANDLE
-#define pause(voidpara) __asm PAUSE
-#define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateSemaphore(NULL,0,32768,NULL))==NULL)
-#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,INFINITE))
-#define sem_post(sem) ReleaseSemaphore(*sem,1,NULL)
-#define sem_destroy(sem) if(*sem)((int)(CloseHandle(*sem))==TRUE)
-#define thread_sleep(nms) Sleep(nms)
-
-#else
-
-#ifdef __APPLE__
-#define sem_t semaphore_t
-#define sem_init(X,Y,Z) semaphore_create(mach_task_self(), X, SYNC_POLICY_FIFO, Z)
-#define sem_wait(sem) (semaphore_wait(*sem) )
-#define sem_post(sem) semaphore_signal(*sem)
-#define sem_destroy(sem) semaphore_destroy(mach_task_self(),*sem)
-#define thread_sleep(nms) /* { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */
-#else
-#include <unistd.h>
-#include <sched.h>
-#define thread_sleep(nms) sched_yield();/* {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */
-#endif
-/* Not Windows. Assume pthreads */
-
-#endif
-
-#if ARCH_X86 || ARCH_X86_64
-#include "vpx_ports/x86.h"
-#else
-#define x86_pause_hint()
-#endif
-
-#endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */
-
-#endif
--- a/vp8/common/x86/mask_sse3.asm
+++ b/vp8/common/x86/mask_sse3.asm
@ -0,0 +1,484 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void int vp8_makemask_sse3(
+;    unsigned char *y,
+;    unsigned char *u,
+;    unsigned char *v,
+;    unsigned char *ym,
+;    unsigned char *uvm,
+;    int yp,
+;    int uvp,
+;    int ys,
+;    int us,
+;    int vs,
+;    int yt,
+;    int ut,
+;    int vt)
+global sym(vp8_makemask_sse3)
+sym(vp8_makemask_sse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 14
+    push        rsi
+    push        rdi
+    ; end prolog
+
+        mov             rsi,        arg(0) ;y
+        mov             rdi,        arg(1) ;u
+        mov             rcx,        arg(2) ;v
+        mov             rax,        arg(3) ;ym
+        movsxd          rbx,        dword arg(4) ;yp
+        movsxd          rdx,        dword arg(5) ;uvp
+
+        pxor            xmm0,xmm0
+
+        ;make 16 copies of the center y value
+        movd            xmm1, arg(6)
+        pshufb          xmm1, xmm0
+
+        ; make 16 copies of the center u value
+        movd            xmm2, arg(7)
+        pshufb          xmm2, xmm0
+
+        ; make 16 copies of the center v value
+        movd            xmm3, arg(8)
+        pshufb          xmm3, xmm0
+        unpcklpd        xmm2, xmm3
+
+        ;make 16 copies of the y tolerance
+        movd            xmm3, arg(9)
+        pshufb          xmm3, xmm0
+
+        ;make 16 copies of the u tolerance
+        movd            xmm4, arg(10)
+        pshufb          xmm4, xmm0
+
+        ;make 16 copies of the v tolerance
+        movd            xmm5, arg(11)
+        pshufb          xmm5, xmm0
+        unpckhpd        xmm4, xmm5
+
+        mov             r8,8
+
+NextPairOfRows:
+
+        ;grab the y source values
+        movdqu          xmm0, [rsi]
+
+        ;compute abs difference between source and y target
+        movdqa          xmm6, xmm1
+        movdqa          xmm7, xmm0
+        psubusb         xmm0, xmm1
+        psubusb         xmm6, xmm7
+        por             xmm0, xmm6
+
+        ;compute abs difference between
+        movdqa          xmm6, xmm3
+        pcmpgtb         xmm6, xmm0
+
+        ;grab the y source values
+        add             rsi, rbx
+        movdqu          xmm0, [rsi]
+
+        ;compute abs difference between source and y target
+        movdqa          xmm11, xmm1
+        movdqa          xmm7, xmm0
+        psubusb         xmm0, xmm1
+        psubusb         xmm11, xmm7
+        por             xmm0, xmm11
+
+        ;compute abs difference between
+        movdqa          xmm11, xmm3
+        pcmpgtb         xmm11, xmm0
+
+
+        ;grab the u and v source values
+        movdqu          xmm7, [rdi]
+        movdqu          xmm8, [rcx]
+        unpcklpd        xmm7, xmm8
+
+        ;compute abs difference between source and uv targets
+        movdqa          xmm9, xmm2
+        movdqa          xmm10, xmm7
+        psubusb         xmm7, xmm2
+        psubusb         xmm9, xmm10
+        por             xmm7, xmm9
+
+        ;check whether the number is < tolerance
+        movdqa          xmm0, xmm4
+        pcmpgtb         xmm0, xmm7
+
+        ;double  u and v masks
+        movdqa          xmm8, xmm0
+        punpckhbw       xmm0, xmm0
+        punpcklbw       xmm8, xmm8
+
+        ;mask row 0 and output
+        pand            xmm6, xmm8
+        pand            xmm6, xmm0
+        movdqa          [rax],xmm6
+
+        ;mask row 1 and output
+        pand            xmm11, xmm8
+        pand            xmm11, xmm0
+        movdqa          [rax+16],xmm11
+
+
+        ; to the next row or set of rows
+        add             rsi, rbx
+        add             rdi, rdx
+        add             rcx, rdx
+        add             rax,32
+        dec r8
+        jnz NextPairOfRows
+
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;GROW_HORIZ (register for result, source register or mem local)
+; takes source and shifts left and ors with source
+; then shifts right and ors with source
+%macro GROW_HORIZ 2
+    movdqa          %1, %2
+    movdqa          xmm14, %1
+    movdqa          xmm15, %1
+    pslldq          xmm14, 1
+    psrldq          xmm15, 1
+    por             %1,xmm14
+    por             %1,xmm15
+%endmacro
+;GROW_VERT (result, center row, above row, below row)
+%macro GROW_VERT 4
+    movdqa          %1,%2
+    por             %1,%3
+    por             %1,%4
+%endmacro
+
+;GROW_NEXTLINE (new line to grow, new source, line to write)
+%macro GROW_NEXTLINE 3
+    GROW_HORIZ %1, %2
+    GROW_VERT xmm3, xmm0, xmm1, xmm2
+    movdqa %3,xmm3
+%endmacro
+
+
+;void int vp8_growmaskmb_sse3(
+;    unsigned char *om,
+;    unsigned char *nm,
+global sym(vp8_growmaskmb_sse3)
+sym(vp8_growmaskmb_sse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 2
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    mov             rsi,        arg(0) ;src
+    mov             rdi,        arg(1) ;rst
+
+    GROW_HORIZ xmm0, [rsi]
+    GROW_HORIZ xmm1, [rsi+16]
+    GROW_HORIZ xmm2, [rsi+32]
+
+    GROW_VERT xmm3, xmm0, xmm1, xmm2
+    por xmm0,xmm1
+    movdqa [rdi], xmm0
+    movdqa [rdi+16],xmm3
+
+    GROW_NEXTLINE xmm0,[rsi+48],[rdi+32]
+    GROW_NEXTLINE xmm1,[rsi+64],[rdi+48]
+    GROW_NEXTLINE xmm2,[rsi+80],[rdi+64]
+    GROW_NEXTLINE xmm0,[rsi+96],[rdi+80]
+    GROW_NEXTLINE xmm1,[rsi+112],[rdi+96]
+    GROW_NEXTLINE xmm2,[rsi+128],[rdi+112]
+    GROW_NEXTLINE xmm0,[rsi+144],[rdi+128]
+    GROW_NEXTLINE xmm1,[rsi+160],[rdi+144]
+    GROW_NEXTLINE xmm2,[rsi+176],[rdi+160]
+    GROW_NEXTLINE xmm0,[rsi+192],[rdi+176]
+    GROW_NEXTLINE xmm1,[rsi+208],[rdi+192]
+    GROW_NEXTLINE xmm2,[rsi+224],[rdi+208]
+    GROW_NEXTLINE xmm0,[rsi+240],[rdi+224]
+
+    por xmm0,xmm2
+    movdqa [rdi+240], xmm0
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+
+;unsigned int vp8_sad16x16_masked_wmt(
+;    unsigned char *src_ptr,
+;    int  src_stride,
+;    unsigned char *ref_ptr,
+;    int  ref_stride,
+;    unsigned char *mask)
+global sym(vp8_sad16x16_masked_wmt)
+sym(vp8_sad16x16_masked_wmt):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+    ; end prolog
+    mov             rsi,        arg(0) ;src_ptr
+    mov             rdi,        arg(2) ;ref_ptr
+
+    mov             rbx,        arg(4) ;mask
+    movsxd          rax,        dword ptr arg(1) ;src_stride
+    movsxd          rdx,        dword ptr arg(3) ;ref_stride
+
+    mov             rcx,        16
+
+    pxor            xmm3,       xmm3
+
+NextSadRow:
+    movdqu          xmm0,       [rsi]
+    movdqu          xmm1,       [rdi]
+    movdqu          xmm2,       [rbx]
+    pand            xmm0,       xmm2
+    pand            xmm1,       xmm2
+
+    psadbw          xmm0,       xmm1
+    paddw           xmm3,       xmm0
+
+    add             rsi, rax
+    add             rdi, rdx
+    add             rbx,  16
+
+    dec rcx
+    jnz NextSadRow
+
+    movdqa          xmm4 ,     xmm3
+    psrldq          xmm4,       8
+    paddw           xmm3,      xmm4
+    movq            rax,       xmm3
+    ; begin epilog
+    pop rdi
+    pop rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;unsigned int vp8_sad16x16_unmasked_wmt(
+;    unsigned char *src_ptr,
+;    int  src_stride,
+;    unsigned char *ref_ptr,
+;    int  ref_stride,
+;    unsigned char *mask)
+global sym(vp8_sad16x16_unmasked_wmt)
+sym(vp8_sad16x16_unmasked_wmt):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+    ; end prolog
+    mov             rsi,        arg(0) ;src_ptr
+    mov             rdi,        arg(2) ;ref_ptr
+
+    mov             rbx,        arg(4) ;mask
+    movsxd          rax,        dword ptr arg(1) ;src_stride
+    movsxd          rdx,        dword ptr arg(3) ;ref_stride
+
+    mov             rcx,        16
+
+    pxor            xmm3,       xmm3
+
+next_vp8_sad16x16_unmasked_wmt:
+    movdqu          xmm0,       [rsi]
+    movdqu          xmm1,       [rdi]
+    movdqu          xmm2,       [rbx]
+    por             xmm0,       xmm2
+    por             xmm1,       xmm2
+
+    psadbw          xmm0,       xmm1
+    paddw           xmm3,       xmm0
+
+    add             rsi, rax
+    add             rdi, rdx
+    add             rbx,  16
+
+    dec rcx
+    jnz next_vp8_sad16x16_unmasked_wmt
+
+    movdqa          xmm4 ,     xmm3
+    psrldq          xmm4,       8
+    paddw           xmm3,      xmm4
+    movq            rax,        xmm3
+    ; begin epilog
+    pop rdi
+    pop rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;unsigned int vp8_masked_predictor_wmt(
+;    unsigned char *masked,
+;    unsigned char *unmasked,
+;    int  src_stride,
+;    unsigned char *dst_ptr,
+;    int  dst_stride,
+;    unsigned char *mask)
+global sym(vp8_masked_predictor_wmt)
+sym(vp8_masked_predictor_wmt):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    push        rsi
+    push        rdi
+    ; end prolog
+    mov             rsi,        arg(0) ;src_ptr
+    mov             rdi,        arg(1) ;ref_ptr
+
+    mov             rbx,        arg(5) ;mask
+    movsxd          rax,        dword ptr arg(2) ;src_stride
+    mov             r11,        arg(3) ; destination
+    movsxd          rdx,        dword ptr arg(4) ;dst_stride
+
+    mov             rcx,        16
+
+    pxor            xmm3,       xmm3
+
+next_vp8_masked_predictor_wmt:
+    movdqu          xmm0,       [rsi]
+    movdqu          xmm1,       [rdi]
+    movdqu          xmm2,       [rbx]
+
+    pand            xmm0,       xmm2
+    pandn           xmm2,       xmm1
+    por             xmm0,       xmm2
+    movdqu          [r11],      xmm0
+
+    add             r11, rdx
+    add             rsi, rax
+    add             rdi, rdx
+    add             rbx,  16
+
+    dec rcx
+    jnz next_vp8_masked_predictor_wmt
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;unsigned int vp8_masked_predictor_uv_wmt(
+;    unsigned char *masked,
+;    unsigned char *unmasked,
+;    int  src_stride,
+;    unsigned char *dst_ptr,
+;    int  dst_stride,
+;    unsigned char *mask)
+global sym(vp8_masked_predictor_uv_wmt)
+sym(vp8_masked_predictor_uv_wmt):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    push        rsi
+    push        rdi
+    ; end prolog
+    mov             rsi,        arg(0) ;src_ptr
+    mov             rdi,        arg(1) ;ref_ptr
+
+    mov             rbx,        arg(5) ;mask
+    movsxd          rax,        dword ptr arg(2) ;src_stride
+    mov             r11,        arg(3) ; destination
+    movsxd          rdx,        dword ptr arg(4) ;dst_stride
+
+    mov             rcx,        8
+
+    pxor            xmm3,       xmm3
+
+next_vp8_masked_predictor_uv_wmt:
+    movq            xmm0,       [rsi]
+    movq            xmm1,       [rdi]
+    movq            xmm2,       [rbx]
+
+    pand            xmm0,       xmm2
+    pandn           xmm2,       xmm1
+    por             xmm0,       xmm2
+    movq            [r11],      xmm0
+
+    add             r11, rdx
+    add             rsi, rax
+    add             rdi, rax
+    add             rbx,  8
+
+    dec rcx
+    jnz next_vp8_masked_predictor_uv_wmt
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;unsigned int vp8_uv_from_y_mask(
+;    unsigned char *ymask,
+;    unsigned char *uvmask)
+global sym(vp8_uv_from_y_mask)
+sym(vp8_uv_from_y_mask):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    push        rsi
+    push        rdi
+    ; end prolog
+    mov             rsi,        arg(0) ;src_ptr
+    mov             rdi,        arg(1) ;dst_ptr
+
+
+    mov             rcx,        8
+
+    pxor            xmm3,       xmm3
+
+next_p8_uv_from_y_mask:
+    movdqu          xmm0,       [rsi]
+    pshufb          xmm0, [shuf1b] ;[GLOBAL(shuf1b)]
+    movq            [rdi],xmm0
+    add             rdi, 8
+    add             rsi,32
+
+    dec rcx
+    jnz next_p8_uv_from_y_mask
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+SECTION_RODATA
+align 16
+shuf1b:
+    db 0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0
+
--- a/vp8/common/x86/recon_wrapper_sse2.c
+++ b/vp8/common/x86/recon_wrapper_sse2.c
@ -8,7 +8,7 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vp8/common/recon.h"
 #include "recon_x86.h"
 #include "vpx_mem/vpx_mem.h"
--- a/vp8/common/x86/subpixel_ssse3.asm
+++ b/vp8/common/x86/subpixel_ssse3.asm
@ -1495,6 +1495,25 @@ k2_k4:
    times 8 db  36,  -11
    times 8 db  12,   -6
 align 16
+%if CONFIG_SIXTEENTH_SUBPEL_UV
+vp8_bilinear_filters_ssse3:
+    times 8 db 128, 0
+    times 8 db 120, 8
+    times 8 db 112, 16
+    times 8 db 104, 24
+    times 8 db 96,  32
+    times 8 db 88,  40
+    times 8 db 80,  48
+    times 8 db 72,  56
+    times 8 db 64,  64
+    times 8 db 56,  72
+    times 8 db 48,  80
+    times 8 db 40,  88
+    times 8 db 32,  96
+    times 8 db 24,  104
+    times 8 db 16,  112
+    times 8 db 8,   120
+%else
 vp8_bilinear_filters_ssse3:
    times 8 db 128, 0
    times 8 db 112, 16
@ -1504,4 +1523,5 @@ vp8_bilinear_filters_ssse3:
    times 8 db 48,  80
    times 8 db 32,  96
    times 8 db 16,  112
+%endif

--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@ -9,12 +9,19 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vpx_ports/mem.h"
 #include "vp8/common/subpixel.h"

+#if CONFIG_SIXTEENTH_SUBPEL_UV
+extern const short vp8_six_tap_mmx[16][6*8];
+extern const short vp8_bilinear_filters_mmx[16][2*8];
+#else
 extern const short vp8_six_tap_mmx[8][6*8];
 extern const short vp8_bilinear_filters_mmx[8][2*8];
+#endif
+
+//#define ANNOUNCE_FUNCTION

 extern void vp8_filter_block1d_h6_mmx
 (
@ -128,6 +135,9 @@ void vp8_sixtap_predict4x4_mmx
    int dst_pitch
 )
 {
+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict4x4_mmx\n");
+#endif
    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 16*16);  /* Temp data bufffer used in filtering */
    const short *HFilter, *VFilter;
    HFilter = vp8_six_tap_mmx[xoffset];
@ -149,6 +159,9 @@ void vp8_sixtap_predict16x16_mmx
 )
 {

+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict16x16_mmx\n");
+#endif
    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24);  /* Temp data bufffer used in filtering */

    const short *HFilter, *VFilter;
@ -181,6 +194,9 @@ void vp8_sixtap_predict8x8_mmx
 )
 {

+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict8x8_mmx\n");
+#endif
    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);    /* Temp data bufffer used in filtering */

    const short *HFilter, *VFilter;
@ -206,7 +222,9 @@ void vp8_sixtap_predict8x4_mmx
    int dst_pitch
 )
 {
-
+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict8x4_mmx\n");
+#endif
    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);    /* Temp data bufffer used in filtering */

    const short *HFilter, *VFilter;
@ -256,6 +274,9 @@ void vp8_sixtap_predict16x16_sse2
    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24);    /* Temp data bufffer used in filtering */

    const short *HFilter, *VFilter;
+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict16x16_sse2\n");
+#endif

    if (xoffset)
    {
@ -295,6 +316,9 @@ void vp8_sixtap_predict8x8_sse2
 {
    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);  /* Temp data bufffer used in filtering */
    const short *HFilter, *VFilter;
+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict8x8_sse2\n");
+#endif

    if (xoffset)
    {
@ -333,6 +357,9 @@ void vp8_sixtap_predict8x4_sse2
 {
    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);  /* Temp data bufffer used in filtering */
    const short *HFilter, *VFilter;
+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict8x4_sse2\n");
+#endif

    if (xoffset)
    {
@ -434,6 +461,9 @@ void vp8_sixtap_predict16x16_ssse3
 )
 {
    DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 24*24);
+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict16x16_ssse3\n");
+#endif

    if (xoffset)
    {
@ -466,6 +496,9 @@ void vp8_sixtap_predict8x8_ssse3
 )
 {
    DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict8x8_ssse3\n");
+#endif

    if (xoffset)
    {
@ -498,6 +531,9 @@ void vp8_sixtap_predict8x4_ssse3
 )
 {
    DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict8x4_ssse3\n");
+#endif

    if (xoffset)
    {
@ -530,6 +566,9 @@ void vp8_sixtap_predict4x4_ssse3
 )
 {
  DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 4*9);
+#ifdef ANNOUNCE_FUNCTION
+    printf("vp8_sixtap_predict4x4_ssse3\n");
+#endif

  if (xoffset)
  {
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@ -43,17 +43,17 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
        rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_mmx;
        rtcd->idct.iwalsh1     = vp8_short_inv_walsh4x4_1_mmx;

-
-
        rtcd->recon.recon       = vp8_recon_b_mmx;
        rtcd->recon.copy8x8     = vp8_copy_mem8x8_mmx;
        rtcd->recon.copy8x4     = vp8_copy_mem8x4_mmx;
        rtcd->recon.copy16x16   = vp8_copy_mem16x16_mmx;

+#if CONFIG_ENHANCED_INTERP == 0 && CONFIG_HIGH_PRECISION_MV == 0 && CONFIG_SIXTEENTH_SUBPEL_UV == 0
        rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_mmx;
        rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_mmx;
        rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_mmx;
        rtcd->subpix.sixtap4x4     = vp8_sixtap_predict4x4_mmx;
+#endif
        rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_mmx;
        rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_mmx;
        rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_mmx;
@ -91,9 +91,11 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)

        rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_sse2;

+#if CONFIG_ENHANCED_INTERP == 0 && CONFIG_HIGH_PRECISION_MV == 0 && CONFIG_SIXTEENTH_SUBPEL_UV == 0
        rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_sse2;
        rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_sse2;
        rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_sse2;
+#endif
        rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_sse2;
        rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_sse2;

@ -120,12 +122,14 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)

    if (flags & HAS_SSSE3)
    {
+#if CONFIG_ENHANCED_INTERP == 0 && CONFIG_HIGH_PRECISION_MV == 0
        rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_ssse3;
        rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_ssse3;
        rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_ssse3;
        rtcd->subpix.sixtap4x4     = vp8_sixtap_predict4x4_ssse3;
        rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_ssse3;
        rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_ssse3;
+#endif

        rtcd->recon.build_intra_predictors_mbuv =
            vp8_build_intra_predictors_mbuv_ssse3;
--- a/vp8/decoder/arm/arm_dsystemdependent.c
+++ b/vp8/decoder/arm/arm_dsystemdependent.c
@ -9,7 +9,7 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vpx_ports/arm.h"
 #include "vp8/common/blockd.h"
 #include "vp8/common/pragmas.h"
--- a/vp8/decoder/arm/armv6/idct_blk_v6.c
+++ b/vp8/decoder/arm/armv6/idct_blk_v6.c
@ -8,7 +8,7 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vp8/common/idct.h"
 #include "vp8/decoder/dequantize.h"

--- a/vp8/decoder/arm/dequantize_arm.c
+++ b/vp8/decoder/arm/dequantize_arm.c
@ -9,7 +9,7 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vp8/decoder/dequantize.h"
 #include "vp8/common/idct.h"
 #include "vpx_mem/vpx_mem.h"
--- a/vp8/decoder/arm/neon/idct_blk_neon.c
+++ b/vp8/decoder/arm/neon/idct_blk_neon.c
@ -8,7 +8,7 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vp8/common/idct.h"
 #include "vp8/decoder/dequantize.h"

--- a/vp8/decoder/dboolhuff.h
+++ b/vp8/decoder/dboolhuff.h
@ -13,7 +13,7 @@
 #define DBOOLHUFF_H
 #include <stddef.h>
 #include <limits.h>
-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vpx_ports/mem.h"
 #include "vpx/vpx_integer.h"

--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
--- a/vp8/decoder/decoderthreading.h
+++ b/vp8/decoder/decoderthreading.h
@ -1,26 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-
-
-
-#ifndef _DECODER_THREADING_H
-#define _DECODER_THREADING_H
-
-#if CONFIG_MULTITHREAD
-extern void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd);
-extern void vp8_decoder_remove_threads(VP8D_COMP *pbi);
-extern void vp8_decoder_create_threads(VP8D_COMP *pbi);
-extern void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows);
-extern void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows);
-#endif
-
-#endif
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@ -9,17 +9,24 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "dequantize.h"
 #include "vp8/common/idct.h"
 #include "vpx_mem/vpx_mem.h"
+#include "onyxd_int.h"

 extern void vp8_short_idct4x4llm_c(short *input, short *output, int pitch) ;
 extern void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch);
+extern void vp8_short_idct8x8_c(short *input, short *output, int pitch);
+extern void vp8_short_idct8x8_1_c(short *input, short *output, int pitch);

+#ifdef DEC_DEBUG
+extern int dec_debug;
+#endif

 void vp8_dequantize_b_c(BLOCKD *d)
 {
+
    int i;
    short *DQ  = d->dqcoeff;
    short *Q   = d->qcoeff;
@ -111,3 +118,212 @@ void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred,
        pred += pitch;
    }
 }
+
+void vp8_dequantize_b_2x2_c(BLOCKD *d)
+{
+    int i;
+    short *DQ  = d->dqcoeff;
+    short *Q   = d->qcoeff;
+    short *DQC = d->dequant;
+
+    for (i = 0; i < 16; i++)
+    {
+        DQ[i] = (short)((Q[i] * DQC[i]));
+    }
+#ifdef DEC_DEBUG
+    if (dec_debug) {
+      int j;
+      printf("Dequantize 2x2\n");
+      for (j=0;j<16;j++) printf("%d ", Q[j]); printf("\n");
+      for (j=0;j<16;j++) printf("%d ", DQ[j]); printf("\n");
+    }
+#endif
+}
+
+void vp8_dequant_idct_add_8x8_c(short *input, short *dq, unsigned char *pred,
+                                unsigned char *dest, int pitch, int stride)//, MACROBLOCKD *xd, short blk_idx
+{
+    short output[64];
+    short *diff_ptr = output;
+    int r, c, b;
+    int i;
+    unsigned char *origdest = dest;
+    unsigned char *origpred = pred;
+
+#ifdef DEC_DEBUG
+    if (dec_debug) {
+      int j;
+      printf("Input 8x8\n");
+      for (j=0;j<64;j++) {
+        printf("%d ", input[j]);
+        if (j%8 == 7) printf("\n");
+      }
+    }
+#endif
+
+    input[0]= input[0] * dq[0];
+
+    // recover quantizer for 4 4x4 blocks
+    for (i = 1; i < 64; i++)
+    {
+      input[i]=input[i] * dq[1];
+    }
+#ifdef DEC_DEBUG
+    if (dec_debug) {
+      int j;
+      printf("Input DQ 8x8\n");
+      for (j=0;j<64;j++) {
+        printf("%d ", input[j]);
+        if (j%8 == 7) printf("\n");
+      }
+    }
+#endif
+
+    // the idct halves ( >> 1) the pitch
+    vp8_short_idct8x8_c(input, output, 16);
+#ifdef DEC_DEBUG
+    if (dec_debug) {
+      int j;
+      printf("Output 8x8\n");
+      for (j=0;j<64;j++) {
+        printf("%d ", output[j]);
+        if (j%8 == 7) printf("\n");
+      }
+    }
+#endif
+
+    vpx_memset(input, 0, 128);// test what should i put here
+
+    for (b = 0; b < 4; b++)
+    {
+      for (r = 0; r < 4; r++)
+      {
+          for (c = 0; c < 4; c++)
+          {
+              int a = diff_ptr[c] + pred[c];
+
+              if (a < 0)
+                  a = 0;
+
+              if (a > 255)
+                  a = 255;
+
+              dest[c] = (unsigned char) a;
+          }
+
+          dest += stride;
+          diff_ptr += 8;
+          pred += pitch;
+      }
+      diff_ptr = output + (b+1) / 2 * 4 * 8 + (b+1) % 2 * 4;
+      dest = origdest + (b+1) / 2 * 4 * stride + (b+1) % 2 * 4;
+      pred = origpred + (b+1) / 2 * 4 * pitch + (b+1) % 2 * 4;
+   }
+#ifdef DEC_DEBUG
+    if (dec_debug) {
+      int k,j;
+      printf("Final 8x8\n");
+      for (j=0;j<8;j++) {
+        for (k=0;k<8;k++) {
+          printf("%d ", origdest[k]);
+        }
+        printf("\n");
+        origdest+=stride;
+      }
+    }
+#endif
+}
+
+void vp8_dequant_dc_idct_add_8x8_c(short *input, short *dq, unsigned char *pred,
+                               unsigned char *dest, int pitch, int stride,
+                               int Dc)// Dc for 1st order T in some rear case
+{
+    short output[64];
+    short *diff_ptr = output;
+    int r, c, b;
+    int i;
+    unsigned char *origdest = dest;
+    unsigned char *origpred = pred;
+
+    input[0] = (short)Dc;//Dc is the reconstructed value, do not need dequantization
+    //dc value is recovered after dequantization, since dc need not quantization
+#ifdef DEC_DEBUG
+    if (dec_debug) {
+      int j;
+      printf("Input 8x8\n");
+      for (j=0;j<64;j++) {
+        printf("%d ", input[j]);
+        if (j%8 == 7) printf("\n");
+      }
+    }
+#endif
+    for (i = 1; i < 64; i++)
+    {
+        input[i]=input[i] * dq[1];
+    }
+
+#ifdef DEC_DEBUG
+    if (dec_debug) {
+      int j;
+      printf("Input DQ 8x8\n");
+      for (j=0;j<64;j++) {
+        printf("%d ", input[j]);
+        if (j%8 == 7) printf("\n");
+      }
+    }
+#endif
+
+    // the idct halves ( >> 1) the pitch
+    vp8_short_idct8x8_c(input, output,16);
+#ifdef DEC_DEBUG
+    if (dec_debug) {
+      int j;
+      printf("Output 8x8\n");
+      for (j=0;j<64;j++) {
+        printf("%d ", output[j]);
+        if (j%8 == 7) printf("\n");
+      }
+    }
+#endif
+    vpx_memset(input, 0, 128);
+
+    for (b = 0; b < 4; b++)
+    {
+      for (r = 0; r < 4; r++)
+      {
+          for (c = 0; c < 4; c++)
+          {
+              int a = diff_ptr[c] + pred[c];
+
+              if (a < 0)
+                  a = 0;
+
+              if (a > 255)
+                  a = 255;
+
+              dest[c] = (unsigned char) a;
+          }
+
+          dest += stride;
+          diff_ptr += 8;
+          pred += pitch;
+      }
+      diff_ptr = output + (b+1) / 2 * 4 * 8 + (b+1) % 2 * 4;
+      dest = origdest + (b+1) / 2 * 4 * stride + (b+1) % 2 * 4;
+      pred = origpred + (b+1) / 2 * 4 * pitch + (b+1) % 2 * 4;
+   }
+#ifdef DEC_DEBUG
+    if (dec_debug) {
+      int k,j;
+      printf("Final 8x8\n");
+      for (j=0;j<8;j++) {
+        for (k=0;k<8;k++) {
+          printf("%d ", origdest[k]);
+        }
+        printf("\n");
+        origdest+=stride;
+      }
+    }
+#endif
+}
+
--- a/vp8/decoder/dequantize.h
+++ b/vp8/decoder/dequantize.h
@ -42,6 +42,22 @@
             unsigned char *pre, unsigned char *dst_u, \
             unsigned char *dst_v, int stride, char *eobs)

+#define prototype_dequant_dc_idct_add_y_block_8x8(sym) \
+    void sym(short *q, short *dq, \
+             unsigned char *pre, unsigned char *dst, \
+             int stride, char *eobs, short *dc, MACROBLOCKD *xd)
+
+#define prototype_dequant_idct_add_y_block_8x8(sym) \
+    void sym(short *q, short *dq, \
+             unsigned char *pre, unsigned char *dst, \
+             int stride, char *eobs, MACROBLOCKD *xd)
+
+#define prototype_dequant_idct_add_uv_block_8x8(sym) \
+    void sym(short *q, short *dq, \
+             unsigned char *pre, unsigned char *dst_u, \
+             unsigned char *dst_v, int stride, char *eobs, \
+             MACROBLOCKD *xd)
+
 #if ARCH_X86 || ARCH_X86_64
 #include "x86/dequantize_x86.h"
 #endif
@ -81,6 +97,38 @@ extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block);
 extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block);


+#ifndef vp8_dequant_block_2x2
+#define vp8_dequant_block_2x2 vp8_dequantize_b_2x2_c
+#endif
+extern prototype_dequant_block(vp8_dequant_block_2x2);
+
+#ifndef vp8_dequant_idct_add_8x8
+#define vp8_dequant_idct_add_8x8 vp8_dequant_idct_add_8x8_c
+#endif
+extern prototype_dequant_idct_add(vp8_dequant_idct_add_8x8);
+
+#ifndef vp8_dequant_dc_idct_add_8x8
+#define vp8_dequant_dc_idct_add_8x8 vp8_dequant_dc_idct_add_8x8_c
+#endif
+extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_8x8);
+
+#ifndef vp8_dequant_dc_idct_add_y_block_8x8
+#define vp8_dequant_dc_idct_add_y_block_8x8 vp8_dequant_dc_idct_add_y_block_8x8_c
+#endif
+extern prototype_dequant_dc_idct_add_y_block_8x8(vp8_dequant_dc_idct_add_y_block_8x8);
+
+#ifndef vp8_dequant_idct_add_y_block_8x8
+#define vp8_dequant_idct_add_y_block_8x8 vp8_dequant_idct_add_y_block_8x8_c
+#endif
+extern prototype_dequant_idct_add_y_block_8x8(vp8_dequant_idct_add_y_block_8x8);
+
+#ifndef vp8_dequant_idct_add_uv_block_8x8
+#define vp8_dequant_idct_add_uv_block_8x8 vp8_dequant_idct_add_uv_block_8x8_c
+#endif
+extern prototype_dequant_idct_add_uv_block_8x8(vp8_dequant_idct_add_uv_block_8x8);
+
+
+
 typedef prototype_dequant_block((*vp8_dequant_block_fn_t));

 typedef prototype_dequant_idct_add((*vp8_dequant_idct_add_fn_t));
@ -93,6 +141,12 @@ typedef prototype_dequant_idct_add_y_block((*vp8_dequant_idct_add_y_block_fn_t))

 typedef prototype_dequant_idct_add_uv_block((*vp8_dequant_idct_add_uv_block_fn_t));

+typedef prototype_dequant_dc_idct_add_y_block_8x8((*vp8_dequant_dc_idct_add_y_block_fn_t_8x8));
+
+typedef prototype_dequant_idct_add_y_block_8x8((*vp8_dequant_idct_add_y_block_fn_t_8x8));
+
+typedef prototype_dequant_idct_add_uv_block_8x8((*vp8_dequant_idct_add_uv_block_fn_t_8x8));
+
 typedef struct
 {
    vp8_dequant_block_fn_t               block;
@ -101,6 +155,12 @@ typedef struct
    vp8_dequant_dc_idct_add_y_block_fn_t dc_idct_add_y_block;
    vp8_dequant_idct_add_y_block_fn_t    idct_add_y_block;
    vp8_dequant_idct_add_uv_block_fn_t   idct_add_uv_block;
+    vp8_dequant_block_fn_t               block_2x2;
+    vp8_dequant_idct_add_fn_t            idct_add_8x8;
+    vp8_dequant_dc_idct_add_fn_t         dc_idct_add_8x8;
+    vp8_dequant_dc_idct_add_y_block_fn_t_8x8 dc_idct_add_y_block_8x8;
+    vp8_dequant_idct_add_y_block_fn_t_8x8    idct_add_y_block_8x8;
+    vp8_dequant_idct_add_uv_block_fn_t_8x8   idct_add_uv_block_8x8;
 } vp8_dequant_rtcd_vtable_t;

 #if CONFIG_RUNTIME_CPU_DETECT
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@ -16,6 +16,8 @@
 #include "vpx_ports/mem.h"
 #include "detokenize.h"

+#include "vp8/common/seg_common.h"
+
 #define BOOL_DATA UINT8

 #define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
@ -26,6 +28,17 @@ DECLARE_ALIGNED(16, static const unsigned char, coef_bands_x[16]) =
    6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X,
    6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X
 };
+DECLARE_ALIGNED(64, static const unsigned char, coef_bands_x_8x8[64]) = {
+  0 * OCB_X, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 5 * OCB_X, 4 * OCB_X, 4 * OCB_X, 5 * OCB_X,
+  5 * OCB_X, 3 * OCB_X, 6 * OCB_X, 3 * OCB_X, 5 * OCB_X, 4 * OCB_X, 6 * OCB_X, 6 * OCB_X,
+  6 * OCB_X, 5 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X,
+  6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X,
+  6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X,
+  7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X,
+  7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X,
+  7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X,
+};
+
 #define EOB_CONTEXT_NODE            0
 #define ZERO_CONTEXT_NODE           1
 #define ONE_CONTEXT_NODE            2
@ -44,7 +57,6 @@ DECLARE_ALIGNED(16, static const unsigned char, coef_bands_x[16]) =
 #define CAT4_MIN_VAL   19
 #define CAT5_MIN_VAL   35
 #define CAT6_MIN_VAL   67
-
 #define CAT1_PROB0    159
 #define CAT2_PROB0    145
 #define CAT2_PROB1    165
@ -64,18 +76,14 @@ DECLARE_ALIGNED(16, static const unsigned char, coef_bands_x[16]) =
 #define CAT5_PROB3 157
 #define CAT5_PROB4 180

-#if CONFIG_EXTEND_QRANGE
 static const unsigned char cat6_prob[14] =
 { 129, 130, 133, 140, 153, 177, 196, 230, 243, 249, 252, 254, 254, 0 };
-#else
-static const unsigned char cat6_prob[12] =
-{ 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 };
-#endif

 void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
 {
    /* Clear entropy contexts for Y2 blocks */
    if (x->mode_info_context->mbmi.mode != B_PRED &&
+        x->mode_info_context->mbmi.mode != I8X8_PRED &&
        x->mode_info_context->mbmi.mode != SPLITMV)
    {
        vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
@ -158,6 +166,47 @@ DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]);
        NORMALIZE \
    }

+#define DECODE_AND_LOOP_IF_ZERO_8x8_2(probability,branch) \
+    { \
+        split = 1 + ((( probability*(range-1) ) ) >> 8); \
+        bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
+        FILL \
+        if ( value < bigsplit ) \
+        { \
+            range = split; \
+            NORMALIZE \
+            Prob = coef_probs; \
+            if(c<3) {\
+            ++c; \
+            Prob += coef_bands_x[c]; \
+            goto branch; \
+            } goto BLOCK_FINISHED_8x8; /*for malformed input */\
+        } \
+        value -= bigsplit; \
+        range = range - split; \
+        NORMALIZE \
+    }
+#define DECODE_AND_LOOP_IF_ZERO_8X8(probability,branch) \
+    { \
+        split = 1 + ((( probability*(range-1) ) ) >> 8); \
+        bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
+        FILL \
+        if ( value < bigsplit ) \
+        { \
+            range = split; \
+            NORMALIZE \
+            Prob = coef_probs; \
+            if(c<63) {\
+            ++c; \
+            Prob += coef_bands_x_8x8[c]; \
+            goto branch; \
+            } goto BLOCK_FINISHED_8x8; /*for malformed input */\
+        } \
+        value -= bigsplit; \
+        range = range - split; \
+        NORMALIZE \
+    }
+
 #define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val) \
    DECODE_AND_APPLYSIGN(val) \
    Prob = coef_probs + (ENTROPY_NODES*2); \
@ -169,6 +218,26 @@ DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]);
    goto BLOCK_FINISHED;


+#define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val) \
+    DECODE_AND_APPLYSIGN(val) \
+    Prob = coef_probs + (ENTROPY_NODES*2); \
+    if(c < 3){\
+        qcoeff_ptr [ scan[c] ] = (INT16) v; \
+        ++c; \
+        goto DO_WHILE_8x8; }\
+    qcoeff_ptr [ scan[3] ] = (INT16) v; \
+    goto BLOCK_FINISHED_8x8;
+#define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val) \
+    DECODE_AND_APPLYSIGN(val) \
+    Prob = coef_probs + (ENTROPY_NODES*2); \
+    if(c < 63){\
+        qcoeff_ptr [ scan[c] ] = (INT16) v; \
+        ++c; \
+        goto DO_WHILE_8x8; }\
+    qcoeff_ptr [ scan[63] ] = (INT16) v; \
+    goto BLOCK_FINISHED_8x8;
+
+
 #define DECODE_EXTRABIT_AND_ADJUST_VAL(prob, bits_count)\
    split = 1 +  (((range-1) * prob) >> 8); \
    bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
@ -185,11 +254,12 @@ DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]);
    }\
    NORMALIZE

-int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
+
+int vp8_decode_mb_tokens_8x8(VP8D_COMP *dx, MACROBLOCKD *x)
 {
    ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context;
    ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)x->left_context;
-    const FRAME_CONTEXT * const fc = &dx->common.fc;
+    const VP8_COMMON *const oc = & dx->common;

    BOOL_DECODER *bc = x->current_bc;

@ -203,6 +273,331 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)

    register int count;

+    const BOOL_DATA *bufptr;
+    const BOOL_DATA *bufend;
+    register unsigned int range;
+    VP8_BD_VALUE value;
+    const int *scan;//
+    register unsigned int shift;
+    UINT32 split;
+    VP8_BD_VALUE bigsplit;
+    INT16 *qcoeff_ptr;
+
+    const vp8_prob *coef_probs;//
+    int type;
+    int stop;
+    INT16 val, bits_count;
+    INT16 c;
+    INT16 v;
+    const vp8_prob *Prob;//
+
+    int seg_eob;
+    int segment_id = x->mode_info_context->mbmi.segment_id;
+
+    type = 3;
+    i = 0;
+    stop = 16;
+
+    scan = vp8_default_zig_zag1d_8x8;
+    qcoeff_ptr = &x->qcoeff[0];
+
+    if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
+    {
+        i = 24;
+        stop = 24;
+        type = 1;
+        qcoeff_ptr += 24*16;
+        eobtotal -= 4;
+        scan = vp8_default_zig_zag1d;
+    }
+
+    bufend  = bc->user_buffer_end;
+    bufptr  = bc->user_buffer;
+    value   = bc->value;
+    count   = bc->count;
+    range   = bc->range;
+
+    coef_probs = oc->fc.coef_probs_8x8 [type] [ 0 ] [0];
+
+BLOCK_LOOP_8x8:
+    a = A + vp8_block2above_8x8[i];
+    l = L + vp8_block2left_8x8[i];
+
+    c = (INT16)(!type);
+
+//    Dest = ((A)!=0) + ((B)!=0);
+    if(i==24)
+    {
+      VP8_COMBINEENTROPYCONTEXTS(v, *a, *l);
+      if ( segfeature_active( x, segment_id, SEG_LVL_EOB ) )
+      {
+          seg_eob = get_segdata( x, segment_id, SEG_LVL_EOB );
+      }
+      else
+          seg_eob = 4;
+    }
+    else
+    {
+      VP8_COMBINEENTROPYCONTEXTS(v, *a, *l);
+      if ( segfeature_active( x, segment_id, SEG_LVL_EOB ) )
+      {
+          seg_eob = get_segdata( x, segment_id, SEG_LVL_EOB );
+      }
+      else
+          seg_eob = 64;
+    }
+
+    Prob = coef_probs;
+    Prob += v * ENTROPY_NODES;
+
+DO_WHILE_8x8:
+    if ( c == seg_eob )
+        goto BLOCK_FINISHED_8x8;
+
+    if(i==24)
+      Prob += coef_bands_x[c];
+    else
+      Prob += coef_bands_x_8x8[c];
+    DECODE_AND_BRANCH_IF_ZERO(Prob[EOB_CONTEXT_NODE], BLOCK_FINISHED_8x8);
+
+CHECK_0_8x8_:
+    if (i==24)
+    {
+      DECODE_AND_LOOP_IF_ZERO_8x8_2(Prob[ZERO_CONTEXT_NODE], CHECK_0_8x8_);
+    }
+    else
+    {
+      DECODE_AND_LOOP_IF_ZERO_8X8(Prob[ZERO_CONTEXT_NODE], CHECK_0_8x8_);
+    }
+    DECODE_AND_BRANCH_IF_ZERO(Prob[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_8x8_);
+    DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE],
+                                LOW_VAL_CONTEXT_NODE_0_8x8_);
+    DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE],
+                                HIGH_LOW_CONTEXT_NODE_0_8x8_);
+    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE],
+                                CAT_THREEFOUR_CONTEXT_NODE_0_8x8_);
+    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE],
+                                CAT_FIVE_CONTEXT_NODE_0_8x8_);
+    val = CAT6_MIN_VAL;
+    bits_count = 12;
+    do
+    {
+        DECODE_EXTRABIT_AND_ADJUST_VAL(cat6_prob[bits_count], bits_count);
+        bits_count -- ;
+    }
+    while (bits_count >= 0);
+    if(i==24)
+    {
+        DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val);
+    }
+    else
+    {
+        DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val);
+    }
+
+CAT_FIVE_CONTEXT_NODE_0_8x8_:
+    val = CAT5_MIN_VAL;
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB4, 4);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB3, 3);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB2, 2);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB1, 1);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB0, 0);
+    if(i==24)
+    {
+        DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val);
+    }
+    else
+    {
+        DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val);
+    }
+
+CAT_THREEFOUR_CONTEXT_NODE_0_8x8_:
+    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE],
+                            CAT_THREE_CONTEXT_NODE_0_8x8_);
+    val = CAT4_MIN_VAL;
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB3, 3);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB2, 2);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB1, 1);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB0, 0);
+    if(i==24)
+    {
+        DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val);
+    }
+    else
+    {
+        DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val);
+    }
+
+CAT_THREE_CONTEXT_NODE_0_8x8_:
+    val = CAT3_MIN_VAL;
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB2, 2);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB1, 1);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB0, 0);
+    if(i==24)
+    {
+        DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val);
+    }
+    else
+    {
+        DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val);
+    }
+
+HIGH_LOW_CONTEXT_NODE_0_8x8_:
+    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE],
+                            CAT_ONE_CONTEXT_NODE_0_8x8_);
+    val = CAT2_MIN_VAL;
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT2_PROB1, 1);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT2_PROB0, 0);
+    if(i==24)
+    {
+        DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val);
+    }
+    else
+    {
+        DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val);
+    }
+
+CAT_ONE_CONTEXT_NODE_0_8x8_:
+    val = CAT1_MIN_VAL;
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT1_PROB0, 0);
+    if(i==24)
+    {
+        DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val);
+    }
+    else
+    {
+        DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val);
+    }
+
+LOW_VAL_CONTEXT_NODE_0_8x8_:
+    DECODE_AND_BRANCH_IF_ZERO(Prob[TWO_CONTEXT_NODE],
+                                TWO_CONTEXT_NODE_0_8x8_);
+    DECODE_AND_BRANCH_IF_ZERO(Prob[THREE_CONTEXT_NODE],
+                                THREE_CONTEXT_NODE_0_8x8_);
+    if(i==24)
+    {
+        DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(4);
+    }
+    else
+    {
+        DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(4);
+    }
+
+
+THREE_CONTEXT_NODE_0_8x8_:
+    if(i==24)
+    {
+        DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(3);
+    }
+    else
+    {
+        DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(3);
+    }
+
+
+TWO_CONTEXT_NODE_0_8x8_:
+    if(i==24)
+    {
+        DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(2);
+    }
+    else
+    {
+        DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(2);
+    }
+
+
+ONE_CONTEXT_NODE_0_8x8_:
+    DECODE_AND_APPLYSIGN(1);
+    Prob = coef_probs + ENTROPY_NODES;
+
+    if (i==24)
+    {
+      if (c < 3)//15
+      {
+        qcoeff_ptr [ scan[c] ] = (INT16) v;
+        ++c;
+        goto DO_WHILE_8x8;
+      }
+    }
+    else
+    {
+      if (c < 63)
+      {
+        qcoeff_ptr [ scan[c] ] = (INT16) v;
+        ++c;
+        goto DO_WHILE_8x8;
+      }
+    }
+
+   if(i==24)
+       qcoeff_ptr [ scan[3] ] = (INT16) v;//15
+   else
+       qcoeff_ptr [ scan[63] ] = (INT16) v;
+
+
+BLOCK_FINISHED_8x8:
+    *a = *l = ((eobs[i] = c) != !type);   // any nonzero data?
+    if (i!=24)
+    {
+        *(a + 1)    =  *a;
+        *(l + 1)    = *l;
+    }
+
+    eobtotal += c;
+    qcoeff_ptr += (i==24 ? 16 : 64);
+
+    i+=4;
+
+    if (i < stop)
+        goto BLOCK_LOOP_8x8;
+
+    if (i > 24)
+    {
+        type = 0;
+        i = 0;
+        stop = 16;
+        coef_probs = oc->fc.coef_probs_8x8 [type] [ 0 ] [0];
+        qcoeff_ptr -= (24*16 + 16);
+        scan = vp8_default_zig_zag1d_8x8;
+        goto BLOCK_LOOP_8x8;
+    }
+
+    if (i == 16)
+    {
+        type = 2;
+        coef_probs = oc->fc.coef_probs_8x8 [type] [ 0 ] [0];
+        stop = 24;
+        goto BLOCK_LOOP_8x8;
+    }
+
+    FILL
+    bc->user_buffer = bufptr;
+    bc->value = value;
+    bc->count = count;
+    bc->range = range;
+
+    return eobtotal;
+
+}
+
+int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *xd)
+{
+    ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)xd->above_context;
+    ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)xd->left_context;
+    const FRAME_CONTEXT * const fc = &dx->common.fc;
+
+    BOOL_DECODER *bc = xd->current_bc;
+
+    char *eobs = xd->eobs;
+
+    ENTROPY_CONTEXT *a;
+    ENTROPY_CONTEXT *l;
+    int i;
+
+    int eobtotal = 0;
+
+    register int count;
+
    const BOOL_DATA *bufptr;
    const BOOL_DATA *bufend;
    register unsigned int range;
@ -221,15 +616,23 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
    INT16 v;
    const vp8_prob *Prob;

+    int seg_eob = 16;
+    int segment_id = xd->mode_info_context->mbmi.segment_id;
+
+    if ( segfeature_active( xd, segment_id, SEG_LVL_EOB ) )
+    {
+        seg_eob = get_segdata( xd, segment_id, SEG_LVL_EOB );
+    }
+
    type = 3;
    i = 0;
    stop = 16;

    scan = vp8_default_zig_zag1d;
-    qcoeff_ptr = &x->qcoeff[0];
-
-    if (x->mode_info_context->mbmi.mode != B_PRED &&
-        x->mode_info_context->mbmi.mode != SPLITMV)
+    qcoeff_ptr = &xd->qcoeff[0];
+    if (xd->mode_info_context->mbmi.mode != B_PRED &&
+        xd->mode_info_context->mbmi.mode != I8X8_PRED &&
+        xd->mode_info_context->mbmi.mode != SPLITMV)
    {
        i = 24;
        stop = 24;
@ -259,6 +662,9 @@ BLOCK_LOOP:
    Prob += v * ENTROPY_NODES;

 DO_WHILE:
+    if ( c == seg_eob )
+        goto BLOCK_FINISHED;
+
    Prob += coef_bands_x[c];
    DECODE_AND_BRANCH_IF_ZERO(Prob[EOB_CONTEXT_NODE], BLOCK_FINISHED);

@ -275,7 +681,7 @@ CHECK_0_:
                              CAT_FIVE_CONTEXT_NODE_0_);

    val = CAT6_MIN_VAL;
-    bits_count = CONFIG_EXTEND_QRANGE?12:10;
+    bits_count = 12;

    do
    {
@ -382,6 +788,7 @@ BLOCK_FINISHED:
    bc->value = value;
    bc->count = count;
    bc->range = range;
+
    return eobtotal;

 }
--- a/vp8/decoder/detokenize.h
+++ b/vp8/decoder/detokenize.h
@ -16,5 +16,6 @@

 void vp8_reset_mb_tokens_context(MACROBLOCKD *x);
 int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *);
+int vp8_decode_mb_tokens_8x8(VP8D_COMP *, MACROBLOCKD *);

 #endif /* DETOKENIZE_H */
--- a/vp8/decoder/error_concealment.c
+++ b/vp8/decoder/error_concealment.c
@ -1,629 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "error_concealment.h"
-#include "onyxd_int.h"
-#include "decodemv.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vp8/common/recon.h"
-#include "vp8/common/findnearmv.h"
-
-#include <assert.h>
-
-#define MIN(x,y) (((x)<(y))?(x):(y))
-#define MAX(x,y) (((x)>(y))?(x):(y))
-
-#define FLOOR(x,q) ((x) & -(1 << (q)))
-
-#define NUM_NEIGHBORS 20
-
-typedef struct ec_position
-{
-    int row;
-    int col;
-} EC_POS;
-
-/*
- * Regenerate the table in Matlab with:
- * x = meshgrid((1:4), (1:4));
- * y = meshgrid((1:4), (1:4))';
- * W = round((1./(sqrt(x.^2 + y.^2))*2^7));
- * W(1,1) = 0;
- */
-static const int weights_q7[5][5] = {
-       {  0,   128,    64,    43,    32 },
-       {128,    91,    57,    40,    31 },
-       { 64,    57,    45,    36,    29 },
-       { 43,    40,    36,    30,    26 },
-       { 32,    31,    29,    26,    23 }
-};
-
-int vp8_alloc_overlap_lists(VP8D_COMP *pbi)
-{
-    if (pbi->overlaps != NULL)
-    {
-        vpx_free(pbi->overlaps);
-        pbi->overlaps = NULL;
-    }
-    pbi->overlaps = vpx_calloc(pbi->common.mb_rows * pbi->common.mb_cols,
-                               sizeof(MB_OVERLAP));
-    if (pbi->overlaps == NULL)
-        return -1;
-    vpx_memset(pbi->overlaps, 0,
-               sizeof(MB_OVERLAP) * pbi->common.mb_rows * pbi->common.mb_cols);
-    return 0;
-}
-
-void vp8_de_alloc_overlap_lists(VP8D_COMP *pbi)
-{
-    vpx_free(pbi->overlaps);
-    pbi->overlaps = NULL;
-}
-
-/* Inserts a new overlap area value to the list of overlaps of a block */
-static void assign_overlap(OVERLAP_NODE* overlaps,
-                           union b_mode_info *bmi,
-                           int overlap)
-{
-    int i;
-    if (overlap <= 0)
-        return;
-    /* Find and assign to the next empty overlap node in the list of overlaps.
-     * Empty is defined as bmi == NULL */
-    for (i = 0; i < MAX_OVERLAPS; i++)
-    {
-        if (overlaps[i].bmi == NULL)
-        {
-            overlaps[i].bmi = bmi;
-            overlaps[i].overlap = overlap;
-            break;
-        }
-    }
-}
-
-/* Calculates the overlap area between two 4x4 squares, where the first
- * square has its upper-left corner at (b1_row, b1_col) and the second
- * square has its upper-left corner at (b2_row, b2_col). Doesn't
- * properly handle squares which do not overlap.
- */
-static int block_overlap(int b1_row, int b1_col, int b2_row, int b2_col)
-{
-    const int int_top = MAX(b1_row, b2_row); // top
-    const int int_left = MAX(b1_col, b2_col); // left
-    /* Since each block is 4x4 pixels, adding 4 (Q3) to the left/top edge
-     * gives us the right/bottom edge.
-     */
-    const int int_right = MIN(b1_col + (4<<3), b2_col + (4<<3)); // right
-    const int int_bottom = MIN(b1_row + (4<<3), b2_row + (4<<3)); // bottom
-    return (int_bottom - int_top) * (int_right - int_left);
-}
-
-/* Calculates the overlap area for all blocks in a macroblock at position
- * (mb_row, mb_col) in macroblocks, which are being overlapped by a given
- * overlapping block at position (new_row, new_col) (in pixels, Q3). The
- * first block being overlapped in the macroblock has position (first_blk_row,
- * first_blk_col) in blocks relative the upper-left corner of the image.
- */
-static void calculate_overlaps_mb(B_OVERLAP *b_overlaps, union b_mode_info *bmi,
-                                  int new_row, int new_col,
-                                  int mb_row, int mb_col,
-                                  int first_blk_row, int first_blk_col)
-{
-    /* Find the blocks within this MB (defined by mb_row, mb_col) which are
-     * overlapped by bmi and calculate and assign overlap for each of those
-     * blocks. */
-
-    /* Block coordinates relative the upper-left block */
-    const int rel_ol_blk_row = first_blk_row - mb_row * 4;
-    const int rel_ol_blk_col = first_blk_col - mb_col * 4;
-    /* If the block partly overlaps any previous MB, these coordinates
-     * can be < 0. We don't want to access blocks in previous MBs.
-     */
-    const int blk_idx = MAX(rel_ol_blk_row,0) * 4 + MAX(rel_ol_blk_col,0);
-    /* Upper left overlapping block */
-    B_OVERLAP *b_ol_ul = &(b_overlaps[blk_idx]);
-
-    /* Calculate and assign overlaps for all blocks in this MB
-     * which the motion compensated block overlaps
-     */
-    /* Avoid calculating overlaps for blocks in later MBs */
-    int end_row = MIN(4 + mb_row * 4 - first_blk_row, 2);
-    int end_col = MIN(4 + mb_col * 4 - first_blk_col, 2);
-    int row, col;
-
-    /* Check if new_row and new_col are evenly divisible by 4 (Q3),
-     * and if so we shouldn't check neighboring blocks
-     */
-    if (new_row >= 0 && (new_row & 0x1F) == 0)
-        end_row = 1;
-    if (new_col >= 0 && (new_col & 0x1F) == 0)
-        end_col = 1;
-
-    /* Check if the overlapping block partly overlaps a previous MB
-     * and if so, we're overlapping fewer blocks in this MB.
-     */
-    if (new_row < (mb_row*16)<<3)
-        end_row = 1;
-    if (new_col < (mb_col*16)<<3)
-        end_col = 1;
-
-    for (row = 0; row < end_row; ++row)
-    {
-        for (col = 0; col < end_col; ++col)
-        {
-            /* input in Q3, result in Q6 */
-            const int overlap = block_overlap(new_row, new_col,
-                                                  (((first_blk_row + row) *
-                                                      4) << 3),
-                                                  (((first_blk_col + col) *
-                                                      4) << 3));
-            assign_overlap(b_ol_ul[row * 4 + col].overlaps, bmi, overlap);
-        }
-    }
-}
-
-void vp8_calculate_overlaps(MB_OVERLAP *overlap_ul,
-                            int mb_rows, int mb_cols,
-                            union b_mode_info *bmi,
-                            int b_row, int b_col)
-{
-    MB_OVERLAP *mb_overlap;
-    int row, col, rel_row, rel_col;
-    int new_row, new_col;
-    int end_row, end_col;
-    int overlap_b_row, overlap_b_col;
-    int overlap_mb_row, overlap_mb_col;
-
-    /* mb subpixel position */
-    row = (4 * b_row) << 3; /* Q3 */
-    col = (4 * b_col) << 3; /* Q3 */
-
-    /* reverse compensate for motion */
-    new_row = row - bmi->mv.as_mv.row;
-    new_col = col - bmi->mv.as_mv.col;
-
-    if (new_row >= ((16*mb_rows) << 3) || new_col >= ((16*mb_cols) << 3))
-    {
-        /* the new block ended up outside the frame */
-        return;
-    }
-
-    if (new_row <= (-4 << 3) || new_col <= (-4 << 3))
-    {
-        /* outside the frame */
-        return;
-    }
-    /* overlapping block's position in blocks */
-    overlap_b_row = FLOOR(new_row / 4, 3) >> 3;
-    overlap_b_col = FLOOR(new_col / 4, 3) >> 3;
-
-    /* overlapping block's MB position in MBs
-     * operations are done in Q3
-     */
-    overlap_mb_row = FLOOR((overlap_b_row << 3) / 4, 3) >> 3;
-    overlap_mb_col = FLOOR((overlap_b_col << 3) / 4, 3) >> 3;
-
-    end_row = MIN(mb_rows - overlap_mb_row, 2);
-    end_col = MIN(mb_cols - overlap_mb_col, 2);
-
-    /* Don't calculate overlap for MBs we don't overlap */
-    /* Check if the new block row starts at the last block row of the MB */
-    if (abs(new_row - ((16*overlap_mb_row) << 3)) < ((3*4) << 3))
-        end_row = 1;
-    /* Check if the new block col starts at the last block col of the MB */
-    if (abs(new_col - ((16*overlap_mb_col) << 3)) < ((3*4) << 3))
-        end_col = 1;
-
-    /* find the MB(s) this block is overlapping */
-    for (rel_row = 0; rel_row < end_row; ++rel_row)
-    {
-        for (rel_col = 0; rel_col < end_col; ++rel_col)
-        {
-            if (overlap_mb_row + rel_row < 0 ||
-                overlap_mb_col + rel_col < 0)
-                continue;
-            mb_overlap = overlap_ul + (overlap_mb_row + rel_row) * mb_cols +
-                 overlap_mb_col + rel_col;
-
-            calculate_overlaps_mb(mb_overlap->overlaps, bmi,
-                                  new_row, new_col,
-                                  overlap_mb_row + rel_row,
-                                  overlap_mb_col + rel_col,
-                                  overlap_b_row + rel_row,
-                                  overlap_b_col + rel_col);
-        }
-    }
-}
-
-/* Estimates a motion vector given the overlapping blocks' motion vectors.
- * Filters out all overlapping blocks which do not refer to the correct
- * reference frame type.
- */
-static void estimate_mv(const OVERLAP_NODE *overlaps, union b_mode_info *bmi)
-{
-    int i;
-    int overlap_sum = 0;
-    int row_acc = 0;
-    int col_acc = 0;
-
-    bmi->mv.as_int = 0;
-    for (i=0; i < MAX_OVERLAPS; ++i)
-    {
-        if (overlaps[i].bmi == NULL)
-            break;
-        col_acc += overlaps[i].overlap * overlaps[i].bmi->mv.as_mv.col;
-        row_acc += overlaps[i].overlap * overlaps[i].bmi->mv.as_mv.row;
-        overlap_sum += overlaps[i].overlap;
-    }
-    if (overlap_sum > 0)
-    {
-        /* Q9 / Q6 = Q3 */
-        bmi->mv.as_mv.col = col_acc / overlap_sum;
-        bmi->mv.as_mv.row = row_acc / overlap_sum;
-    }
-    else
-    {
-        bmi->mv.as_mv.col = 0;
-        bmi->mv.as_mv.row = 0;
-    }
-}
-
-/* Estimates all motion vectors for a macroblock given the lists of
- * overlaps for each block. Decides whether or not the MVs must be clamped.
- */
-static void estimate_mb_mvs(const B_OVERLAP *block_overlaps,
-                            MODE_INFO *mi,
-                            int mb_to_left_edge,
-                            int mb_to_right_edge,
-                            int mb_to_top_edge,
-                            int mb_to_bottom_edge)
-{
-    int row, col;
-    int non_zero_count = 0;
-    MV * const filtered_mv = &(mi->mbmi.mv.as_mv);
-    union b_mode_info * const bmi = mi->bmi;
-    filtered_mv->col = 0;
-    filtered_mv->row = 0;
-    mi->mbmi.need_to_clamp_mvs = 0;
-    for (row = 0; row < 4; ++row)
-    {
-        int this_b_to_top_edge = mb_to_top_edge + ((row*4)<<3);
-        int this_b_to_bottom_edge = mb_to_bottom_edge - ((row*4)<<3);
-        for (col = 0; col < 4; ++col)
-        {
-            int i = row * 4 + col;
-            int this_b_to_left_edge = mb_to_left_edge + ((col*4)<<3);
-            int this_b_to_right_edge = mb_to_right_edge - ((col*4)<<3);
-            /* Estimate vectors for all blocks which are overlapped by this */
-            /* type. Interpolate/extrapolate the rest of the block's MVs */
-            estimate_mv(block_overlaps[i].overlaps, &(bmi[i]));
-            mi->mbmi.need_to_clamp_mvs |= vp8_check_mv_bounds(
-                                                         &bmi[i].mv,
-                                                         this_b_to_left_edge,
-                                                         this_b_to_right_edge,
-                                                         this_b_to_top_edge,
-                                                         this_b_to_bottom_edge);
-            if (bmi[i].mv.as_int != 0)
-            {
-                ++non_zero_count;
-                filtered_mv->col += bmi[i].mv.as_mv.col;
-                filtered_mv->row += bmi[i].mv.as_mv.row;
-            }
-        }
-    }
-    if (non_zero_count > 0)
-    {
-        filtered_mv->col /= non_zero_count;
-        filtered_mv->row /= non_zero_count;
-    }
-}
-
-static void calc_prev_mb_overlaps(MB_OVERLAP *overlaps, MODE_INFO *prev_mi,
-                                    int mb_row, int mb_col,
-                                    int mb_rows, int mb_cols)
-{
-    int sub_row;
-    int sub_col;
-    for (sub_row = 0; sub_row < 4; ++sub_row)
-    {
-        for (sub_col = 0; sub_col < 4; ++sub_col)
-        {
-            vp8_calculate_overlaps(
-                                overlaps, mb_rows, mb_cols,
-                                &(prev_mi->bmi[sub_row * 4 + sub_col]),
-                                4 * mb_row + sub_row,
-                                4 * mb_col + sub_col);
-        }
-    }
-}
-
-/* Estimate all missing motion vectors. This function does the same as the one
- * above, but has different input arguments. */
-static void estimate_missing_mvs(MB_OVERLAP *overlaps,
-                                 MODE_INFO *mi, MODE_INFO *prev_mi,
-                                 int mb_rows, int mb_cols,
-                                 unsigned int first_corrupt)
-{
-    int mb_row, mb_col;
-    vpx_memset(overlaps, 0, sizeof(MB_OVERLAP) * mb_rows * mb_cols);
-    /* First calculate the overlaps for all blocks */
-    for (mb_row = 0; mb_row < mb_rows; ++mb_row)
-    {
-        for (mb_col = 0; mb_col < mb_cols; ++mb_col)
-        {
-            /* We're only able to use blocks referring to the last frame
-             * when extrapolating new vectors.
-             */
-            if (prev_mi->mbmi.ref_frame == LAST_FRAME)
-            {
-                calc_prev_mb_overlaps(overlaps, prev_mi,
-                                      mb_row, mb_col,
-                                      mb_rows, mb_cols);
-            }
-            ++prev_mi;
-        }
-        ++prev_mi;
-    }
-
-    mb_row = first_corrupt / mb_cols;
-    mb_col = first_corrupt - mb_row * mb_cols;
-    mi += mb_row*(mb_cols + 1) + mb_col;
-    /* Go through all macroblocks in the current image with missing MVs
-     * and calculate new MVs using the overlaps.
-     */
-    for (; mb_row < mb_rows; ++mb_row)
-    {
-        int mb_to_top_edge = -((mb_row * 16)) << 3;
-        int mb_to_bottom_edge = ((mb_rows - 1 - mb_row) * 16) << 3;
-        for (; mb_col < mb_cols; ++mb_col)
-        {
-            int mb_to_left_edge = -((mb_col * 16) << 3);
-            int mb_to_right_edge = ((mb_cols - 1 - mb_col) * 16) << 3;
-            const B_OVERLAP *block_overlaps =
-                    overlaps[mb_row*mb_cols + mb_col].overlaps;
-            mi->mbmi.ref_frame = LAST_FRAME;
-            mi->mbmi.mode = SPLITMV;
-            mi->mbmi.uv_mode = DC_PRED;
-            mi->mbmi.partitioning = 3;
-            mi->mbmi.segment_id = 0;
-            estimate_mb_mvs(block_overlaps,
-                            mi,
-                            mb_to_left_edge,
-                            mb_to_right_edge,
-                            mb_to_top_edge,
-                            mb_to_bottom_edge);
-            ++mi;
-        }
-        mb_col = 0;
-        ++mi;
-    }
-}
-
-void vp8_estimate_missing_mvs(VP8D_COMP *pbi)
-{
-    VP8_COMMON * const pc = &pbi->common;
-    estimate_missing_mvs(pbi->overlaps,
-                         pc->mi, pc->prev_mi,
-                         pc->mb_rows, pc->mb_cols,
-                         pbi->mvs_corrupt_from_mb);
-}
-
-static void assign_neighbor(EC_BLOCK *neighbor, MODE_INFO *mi, int block_idx)
-{
-    assert(mi->mbmi.ref_frame < MAX_REF_FRAMES);
-    neighbor->ref_frame = mi->mbmi.ref_frame;
-    neighbor->mv = mi->bmi[block_idx].mv.as_mv;
-}
-
-/* Finds the neighboring blocks of a macroblocks. In the general case
- * 20 blocks are found. If a fewer number of blocks are found due to
- * image boundaries, those positions in the EC_BLOCK array are left "empty".
- * The neighbors are enumerated with the upper-left neighbor as the first
- * element, the second element refers to the neighbor to right of the previous
- * neighbor, and so on. The last element refers to the neighbor below the first
- * neighbor.
- */
-static void find_neighboring_blocks(MODE_INFO *mi,
-                                    EC_BLOCK *neighbors,
-                                    int mb_row, int mb_col,
-                                    int mb_rows, int mb_cols,
-                                    int mi_stride)
-{
-    int i = 0;
-    int j;
-    if (mb_row > 0)
-    {
-        /* upper left */
-        if (mb_col > 0)
-            assign_neighbor(&neighbors[i], mi - mi_stride - 1, 15);
-        ++i;
-        /* above */
-        for (j = 12; j < 16; ++j, ++i)
-            assign_neighbor(&neighbors[i], mi - mi_stride, j);
-    }
-    else
-        i += 5;
-    if (mb_col < mb_cols - 1)
-    {
-        /* upper right */
-        if (mb_row > 0)
-            assign_neighbor(&neighbors[i], mi - mi_stride + 1, 12);
-        ++i;
-        /* right */
-        for (j = 0; j <= 12; j += 4, ++i)
-            assign_neighbor(&neighbors[i], mi + 1, j);
-    }
-    else
-        i += 5;
-    if (mb_row < mb_rows - 1)
-    {
-        /* lower right */
-        if (mb_col < mb_cols - 1)
-            assign_neighbor(&neighbors[i], mi + mi_stride + 1, 0);
-        ++i;
-        /* below */
-        for (j = 0; j < 4; ++j, ++i)
-            assign_neighbor(&neighbors[i], mi + mi_stride, j);
-    }
-    else
-        i += 5;
-    if (mb_col > 0)
-    {
-        /* lower left */
-        if (mb_row < mb_rows - 1)
-            assign_neighbor(&neighbors[i], mi + mi_stride - 1, 4);
-        ++i;
-        /* left */
-        for (j = 3; j < 16; j += 4, ++i)
-        {
-            assign_neighbor(&neighbors[i], mi - 1, j);
-        }
-    }
-    else
-        i += 5;
-    assert(i == 20);
-}
-
-/* Calculates which reference frame type is dominating among the neighbors */
-static MV_REFERENCE_FRAME dominant_ref_frame(EC_BLOCK *neighbors)
-{
-    /* Default to referring to "skip" */
-    MV_REFERENCE_FRAME dom_ref_frame = LAST_FRAME;
-    int max_ref_frame_cnt = 0;
-    int ref_frame_cnt[MAX_REF_FRAMES] = {0};
-    int i;
-    /* Count neighboring reference frames */
-    for (i = 0; i < NUM_NEIGHBORS; ++i)
-    {
-        if (neighbors[i].ref_frame < MAX_REF_FRAMES &&
-            neighbors[i].ref_frame != INTRA_FRAME)
-            ++ref_frame_cnt[neighbors[i].ref_frame];
-    }
-    /* Find maximum */
-    for (i = 0; i < MAX_REF_FRAMES; ++i)
-    {
-        if (ref_frame_cnt[i] > max_ref_frame_cnt)
-        {
-            dom_ref_frame = i;
-            max_ref_frame_cnt = ref_frame_cnt[i];
-        }
-    }
-    return dom_ref_frame;
-}
-
-/* Interpolates all motion vectors for a macroblock from the neighboring blocks'
- * motion vectors.
- */
-static void interpolate_mvs(MACROBLOCKD *mb,
-                         EC_BLOCK *neighbors,
-                         MV_REFERENCE_FRAME dom_ref_frame)
-{
-    int row, col, i;
-    MODE_INFO * const mi = mb->mode_info_context;
-    /* Table with the position of the neighboring blocks relative the position
-     * of the upper left block of the current MB. Starting with the upper left
-     * neighbor and going to the right.
-     */
-    const EC_POS neigh_pos[NUM_NEIGHBORS] = {
-                                        {-1,-1}, {-1,0}, {-1,1}, {-1,2}, {-1,3},
-                                        {-1,4}, {0,4}, {1,4}, {2,4}, {3,4},
-                                        {4,4}, {4,3}, {4,2}, {4,1}, {4,0},
-                                        {4,-1}, {3,-1}, {2,-1}, {1,-1}, {0,-1}
-                                      };
-    mi->mbmi.need_to_clamp_mvs = 0;
-    for (row = 0; row < 4; ++row)
-    {
-        int mb_to_top_edge = mb->mb_to_top_edge + ((row*4)<<3);
-        int mb_to_bottom_edge = mb->mb_to_bottom_edge - ((row*4)<<3);
-        for (col = 0; col < 4; ++col)
-        {
-            int mb_to_left_edge = mb->mb_to_left_edge + ((col*4)<<3);
-            int mb_to_right_edge = mb->mb_to_right_edge - ((col*4)<<3);
-            int w_sum = 0;
-            int mv_row_sum = 0;
-            int mv_col_sum = 0;
-            int_mv * const mv = &(mi->bmi[row*4 + col].mv);
-            mv->as_int = 0;
-            for (i = 0; i < NUM_NEIGHBORS; ++i)
-            {
-                /* Calculate the weighted sum of neighboring MVs referring
-                 * to the dominant frame type.
-                 */
-                const int w = weights_q7[abs(row - neigh_pos[i].row)]
-                                        [abs(col - neigh_pos[i].col)];
-                if (neighbors[i].ref_frame != dom_ref_frame)
-                    continue;
-                w_sum += w;
-                /* Q7 * Q3 = Q10 */
-                mv_row_sum += w*neighbors[i].mv.row;
-                mv_col_sum += w*neighbors[i].mv.col;
-            }
-            if (w_sum > 0)
-            {
-                /* Avoid division by zero.
-                 * Normalize with the sum of the coefficients
-                 * Q3 = Q10 / Q7
-                 */
-                mv->as_mv.row = mv_row_sum / w_sum;
-                mv->as_mv.col = mv_col_sum / w_sum;
-                mi->mbmi.need_to_clamp_mvs |= vp8_check_mv_bounds(
-                                                            mv,
-                                                            mb_to_left_edge,
-                                                            mb_to_right_edge,
-                                                            mb_to_top_edge,
-                                                            mb_to_bottom_edge);
-            }
-        }
-    }
-}
-
-void vp8_interpolate_motion(MACROBLOCKD *mb,
-                        int mb_row, int mb_col,
-                        int mb_rows, int mb_cols,
-                        int mi_stride)
-{
-    /* Find relevant neighboring blocks */
-    EC_BLOCK neighbors[NUM_NEIGHBORS];
-    MV_REFERENCE_FRAME dom_ref_frame;
-    int i;
-    /* Initialize the array. MAX_REF_FRAMES is interpreted as "doesn't exist" */
-    for (i = 0; i < NUM_NEIGHBORS; ++i)
-    {
-        neighbors[i].ref_frame = MAX_REF_FRAMES;
-        neighbors[i].mv.row = neighbors[i].mv.col = 0;
-    }
-    find_neighboring_blocks(mb->mode_info_context,
-                                neighbors,
-                                mb_row, mb_col,
-                                mb_rows, mb_cols,
-                                mb->mode_info_stride);
-    /* Determine the dominant block type */
-    dom_ref_frame = dominant_ref_frame(neighbors);
-    /* Interpolate MVs for the missing blocks
-     * from the dominating MVs */
-    interpolate_mvs(mb, neighbors, dom_ref_frame);
-
-    mb->mode_info_context->mbmi.ref_frame = dom_ref_frame;
-    mb->mode_info_context->mbmi.mode = SPLITMV;
-    mb->mode_info_context->mbmi.uv_mode = DC_PRED;
-    mb->mode_info_context->mbmi.partitioning = 3;
-    mb->mode_info_context->mbmi.segment_id = 0;
-}
-
-void vp8_conceal_corrupt_mb(MACROBLOCKD *xd)
-{
-    /* This macroblock has corrupt residual, use the motion compensated
-       image (predictor) for concealment */
-    vp8_recon_copy16x16(xd->predictor, 16, xd->dst.y_buffer, xd->dst.y_stride);
-    vp8_recon_copy8x8(xd->predictor + 256, 8,
-                      xd->dst.u_buffer, xd->dst.uv_stride);
-    vp8_recon_copy8x8(xd->predictor + 320, 8,
-                      xd->dst.v_buffer, xd->dst.uv_stride);
-}
--- a/vp8/decoder/error_concealment.h
+++ b/vp8/decoder/error_concealment.h
@ -1,41 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef ERROR_CONCEALMENT_H
-#define ERROR_CONCEALMENT_H
-
-#include "onyxd_int.h"
-#include "ec_types.h"
-
-/* Allocate memory for the overlap lists */
-int vp8_alloc_overlap_lists(VP8D_COMP *pbi);
-
-/* Deallocate the overlap lists */
-void vp8_de_alloc_overlap_lists(VP8D_COMP *pbi);
-
-/* Estimate all missing motion vectors. */
-void vp8_estimate_missing_mvs(VP8D_COMP *pbi);
-
-/* Functions for spatial MV interpolation */
-
-/* Interpolates all motion vectors for a macroblock mb at position
- * (mb_row, mb_col). */
-void vp8_interpolate_motion(MACROBLOCKD *mb,
-                            int mb_row, int mb_col,
-                            int mb_rows, int mb_cols,
-                            int mi_stride);
-
-/* Conceal a macroblock with corrupt residual.
- * Copies the prediction signal to the reconstructed image.
- */
-void vp8_conceal_corrupt_mb(MACROBLOCKD *xd);
-
-#endif
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@ -9,7 +9,7 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vp8/decoder/dequantize.h"
 #include "vp8/decoder/onyxd_int.h"

@ -21,6 +21,12 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
    /* Pure C: */
 #if CONFIG_RUNTIME_CPU_DETECT
    pbi->mb.rtcd                     = &pbi->common.rtcd;
+    pbi->dequant.block_2x2           = vp8_dequantize_b_2x2_c;
+    pbi->dequant.idct_add_8x8        = vp8_dequant_idct_add_8x8_c;
+    pbi->dequant.dc_idct_add_8x8     = vp8_dequant_dc_idct_add_8x8_c;
+    pbi->dequant.dc_idct_add_y_block_8x8 = vp8_dequant_dc_idct_add_y_block_8x8_c;
+    pbi->dequant.idct_add_y_block_8x8 = vp8_dequant_idct_add_y_block_8x8_c;
+    pbi->dequant.idct_add_uv_block_8x8 = vp8_dequant_idct_add_uv_block_8x8_c;
    pbi->dequant.block               = vp8_dequantize_b_c;
    pbi->dequant.idct_add            = vp8_dequant_idct_add_c;
    pbi->dequant.dc_idct_add         = vp8_dequant_dc_idct_add_c;
@ -38,11 +44,9 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
    vp8_arch_arm_decode_init(pbi);
 #endif

-#if CONFIG_EXTEND_QRANGE
    pbi->dequant.idct_add            = vp8_dequant_idct_add_c;
    pbi->dequant.dc_idct_add         = vp8_dequant_dc_idct_add_c;
    pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_c;
    pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_c;
    pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_c;
-#endif
 }
--- a/vp8/decoder/idct_blk.c
+++ b/vp8/decoder/idct_blk.c
@ -8,7 +8,7 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vp8/common/idct.h"
 #include "dequantize.h"

@ -122,3 +122,45 @@ void vp8_dequant_idct_add_uv_block_c
        dstv += 4*stride - 8;
    }
 }
+
+
+void vp8_dequant_dc_idct_add_y_block_8x8_c
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs, short *dc, MACROBLOCKD *xd)
+{
+
+     vp8_dequant_dc_idct_add_8x8_c (q, dq, pre, dst, 16, stride, dc[0]);
+     vp8_dequant_dc_idct_add_8x8_c (&q[64], dq, pre+8, dst+8, 16, stride, dc[1]);
+     vp8_dequant_dc_idct_add_8x8_c (&q[128], dq, pre+8*16, dst+8*stride, 16, stride, dc[4]);
+     vp8_dequant_dc_idct_add_8x8_c (&q[192], dq, pre+8*16+8, dst+8*stride+8, 16, stride, dc[8]);
+
+}
+
+void vp8_dequant_idct_add_y_block_8x8_c
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs, MACROBLOCKD *xd)
+{
+
+
+  unsigned char *origdest = dst;
+  unsigned char *origpred = pre;
+
+  vp8_dequant_idct_add_8x8_c (q, dq, pre, dst, 16, stride);
+  vp8_dequant_idct_add_8x8_c (&q[64], dq, origpred+8, origdest+8, 16, stride);
+  vp8_dequant_idct_add_8x8_c (&q[128], dq, origpred+8*16, origdest+8*stride, 16, stride);
+  vp8_dequant_idct_add_8x8_c (&q[192], dq, origpred+8*16+8, origdest+8*stride+8, 16, stride);
+
+}
+
+void vp8_dequant_idct_add_uv_block_8x8_c
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dstu, unsigned char *dstv, int stride, char *eobs, MACROBLOCKD *xd)
+{
+  vp8_dequant_idct_add_8x8_c (q, dq, pre, dstu, 8, stride);
+
+  q    += 64;
+  pre  += 64;
+
+  vp8_dequant_idct_add_8x8_c (q, dq, pre, dstv, 8, stride);
+}
+
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@ -21,8 +21,6 @@
 #include "vp8/common/loopfilter.h"
 #include "vp8/common/swapyv12buffer.h"
 #include "vp8/common/g_common.h"
-#include "vp8/common/threading.h"
-#include "decoderthreading.h"
 #include <stdio.h>
 #include <assert.h>

@ -31,9 +29,6 @@
 #include "vp8/common/systemdependent.h"
 #include "vpx_ports/vpx_timer.h"
 #include "detokenize.h"
-#if CONFIG_ERROR_CONCEALMENT
-#include "error_concealment.h"
-#endif
 #if ARCH_ARM
 #include "vpx_ports/arm.h"
 #endif
@ -43,6 +38,79 @@ extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);
 static int get_free_fb (VP8_COMMON *cm);
 static void ref_cnt_fb (int *buf, int *idx, int new_idx);

+#if CONFIG_DEBUG
+void vp8_recon_write_yuv_frame(char *name, YV12_BUFFER_CONFIG *s)
+{
+    FILE *yuv_file = fopen((char *)name, "ab");
+    unsigned char *src = s->y_buffer;
+    int h = s->y_height;
+
+    do
+    {
+        fwrite(src, s->y_width, 1,  yuv_file);
+        src += s->y_stride;
+    }
+    while (--h);
+
+    src = s->u_buffer;
+    h = s->uv_height;
+
+    do
+    {
+        fwrite(src, s->uv_width, 1,  yuv_file);
+        src += s->uv_stride;
+    }
+    while (--h);
+
+    src = s->v_buffer;
+    h = s->uv_height;
+
+    do
+    {
+        fwrite(src, s->uv_width, 1, yuv_file);
+        src += s->uv_stride;
+    }
+    while (--h);
+
+    fclose(yuv_file);
+}
+#endif
+//#define WRITE_RECON_BUFFER 1
+#if WRITE_RECON_BUFFER
+void write_dx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame)
+{
+
+    // write the frame
+    FILE *yframe;
+    int i;
+    char filename[255];
+
+    sprintf(filename, "dx\\y%04d.raw", this_frame);
+    yframe = fopen(filename, "wb");
+
+    for (i = 0; i < frame->y_height; i++)
+        fwrite(frame->y_buffer + i * frame->y_stride,
+            frame->y_width, 1, yframe);
+
+    fclose(yframe);
+    sprintf(filename, "dx\\u%04d.raw", this_frame);
+    yframe = fopen(filename, "wb");
+
+    for (i = 0; i < frame->uv_height; i++)
+        fwrite(frame->u_buffer + i * frame->uv_stride,
+            frame->uv_width, 1, yframe);
+
+    fclose(yframe);
+    sprintf(filename, "dx\\v%04d.raw", this_frame);
+    yframe = fopen(filename, "wb");
+
+    for (i = 0; i < frame->uv_height; i++)
+        fwrite(frame->v_buffer + i * frame->uv_stride,
+            frame->uv_width, 1, yframe);
+
+    fclose(yframe);
+}
+#endif

 void vp8dx_initialize()
 {
@ -51,12 +119,12 @@ void vp8dx_initialize()
    if (!init_done)
    {
        vp8_initialize_common();
+        vp8_init_quant_tables();
        vp8_scale_machine_specific_config();
        init_done = 1;
    }
 }

-
 VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
 {
    VP8D_COMP *pbi = vpx_memalign(32, sizeof(VP8D_COMP));
@ -82,11 +150,6 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
    pbi->common.current_video_frame = 0;
    pbi->ready_for_new_data = 1;

-#if CONFIG_MULTITHREAD
-    pbi->max_threads = oxcf->max_threads;
-    vp8_decoder_create_threads(pbi);
-#endif
-
    /* vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid
     *  unnecessary calling of vp8cx_init_de_quantizer() for every frame.
     */
@ -96,30 +159,11 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)

    pbi->common.error.setjmp = 0;

-#if CONFIG_ERROR_CONCEALMENT
-    pbi->ec_enabled = oxcf->error_concealment;
-#else
-    pbi->ec_enabled = 0;
-#endif
-    /* Error concealment is activated after a key frame has been
-     * decoded without errors when error concealment is enabled.
-     */
-    pbi->ec_active = 0;
-
    pbi->decoded_key_frame = 0;

-    pbi->input_partition = oxcf->input_partition;
-
-    /* Independent partitions is activated when a frame updates the
-     * token probability table to have equal probabilities over the
-     * PREV_COEF context.
-     */
-    pbi->independent_partitions = 0;
-
    return (VP8D_PTR) pbi;
 }

-
 void vp8dx_remove_decompressor(VP8D_PTR ptr)
 {
    VP8D_COMP *pbi = (VP8D_COMP *) ptr;
@ -127,14 +171,10 @@ void vp8dx_remove_decompressor(VP8D_PTR ptr)
    if (!pbi)
        return;

-#if CONFIG_MULTITHREAD
-    if (pbi->b_multithreaded_rd)
-        vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows);
-    vp8_decoder_remove_threads(pbi);
-#endif
-#if CONFIG_ERROR_CONCEALMENT
-    vp8_de_alloc_overlap_lists(pbi);
-#endif
+    // Delete sementation map
+    if (pbi->common.last_frame_seg_map != 0)
+        vpx_free(pbi->common.last_frame_seg_map);
+
    vp8_remove_common(&pbi->common);
    vpx_free(pbi->mbc);
    vpx_free(pbi);
@ -300,6 +340,22 @@ static int swap_frame_buffers (VP8_COMMON *cm)
    return err;
 }

+/*
+static void vp8_print_yuv_rec_mb(VP8_COMMON *cm, int mb_row, int mb_col)
+{
+  YV12_BUFFER_CONFIG *s = cm->frame_to_show;
+  unsigned char *src = s->y_buffer;
+  int i, j;
+
+  printf("After loop filter\n");
+  for (i=0;i<16;i++) {
+    for (j=0;j<16;j++)
+      printf("%3d ", src[(mb_row*16+i)*s->y_stride + mb_col*16+j]);
+    printf("\n");
+  }
+}
+*/
+
 int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsigned char *source, int64_t time_stamp)
 {
 #if HAVE_ARMV7
@ -319,115 +375,55 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign

    pbi->common.error.error_code = VPX_CODEC_OK;

-    if (pbi->input_partition && !(source == NULL && size == 0))
+    pbi->Source = source;
+    pbi->source_sz = size;
+
+    if (pbi->source_sz == 0)
    {
-        /* Store a pointer to this partition and return. We haven't
-         * received the complete frame yet, so we will wait with decoding.
-         */
-        assert(pbi->num_partitions < MAX_PARTITIONS);
-        pbi->partitions[pbi->num_partitions] = source;
-        pbi->partition_sizes[pbi->num_partitions] = size;
-        pbi->source_sz += size;
-        pbi->num_partitions++;
-        if (pbi->num_partitions > (1 << EIGHT_PARTITION) + 1)
-        {
-            pbi->common.error.error_code = VPX_CODEC_UNSUP_BITSTREAM;
-            pbi->common.error.setjmp = 0;
-            pbi->num_partitions = 0;
-            return -1;
-        }
-        return 0;
+       /* This is used to signal that we are missing frames.
+        * We do not know if the missing frame(s) was supposed to update
+        * any of the reference buffers, but we act conservative and
+        * mark only the last buffer as corrupted.
+        */
+        cm->yv12_fb[cm->lst_fb_idx].corrupted = 1;
    }
-    else
+
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
    {
-        if (!pbi->input_partition)
-        {
-            pbi->Source = source;
-            pbi->source_sz = size;
-        }
-        else
-        {
-            assert(pbi->common.multi_token_partition <= EIGHT_PARTITION);
-            if (pbi->num_partitions == 0)
-            {
-                pbi->num_partitions = 1;
-                pbi->partitions[0] = NULL;
-                pbi->partition_sizes[0] = 0;
-            }
-            while (pbi->num_partitions < (1 << pbi->common.multi_token_partition) + 1)
-            {
-                // Reset all missing partitions
-                pbi->partitions[pbi->num_partitions] =
-                    pbi->partitions[pbi->num_partitions - 1] +
-                    pbi->partition_sizes[pbi->num_partitions - 1];
-                pbi->partition_sizes[pbi->num_partitions] = 0;
-                pbi->num_partitions++;
-            }
-        }
+        vp8_push_neon(dx_store_reg);
+    }
+#endif

-        if (pbi->source_sz == 0)
-        {
-           /* This is used to signal that we are missing frames.
-            * We do not know if the missing frame(s) was supposed to update
-            * any of the reference buffers, but we act conservative and
-            * mark only the last buffer as corrupted.
-            */
-            cm->yv12_fb[cm->lst_fb_idx].corrupted = 1;
-
-            /* If error concealment is disabled we won't signal missing frames to
-             * the decoder.
-             */
-            if (!pbi->ec_active)
-            {
-                /* Signal that we have no frame to show. */
-                cm->show_frame = 0;
-
-                pbi->num_partitions = 0;
-
-                /* Nothing more to do. */
-                return 0;
-            }
-        }
+    cm->new_fb_idx = get_free_fb (cm);

+    if (setjmp(pbi->common.error.jmp))
+    {
 #if HAVE_ARMV7
 #if CONFIG_RUNTIME_CPU_DETECT
        if (cm->rtcd.flags & HAS_NEON)
 #endif
        {
-            vp8_push_neon(dx_store_reg);
+            vp8_pop_neon(dx_store_reg);
        }
 #endif
+        pbi->common.error.setjmp = 0;

-        cm->new_fb_idx = get_free_fb (cm);
+       /* We do not know if the missing frame(s) was supposed to update
+        * any of the reference buffers, but we act conservative and
+        * mark only the last buffer as corrupted.
+        */
+        cm->yv12_fb[cm->lst_fb_idx].corrupted = 1;

-        if (setjmp(pbi->common.error.jmp))
-        {
-#if HAVE_ARMV7
-#if CONFIG_RUNTIME_CPU_DETECT
-            if (cm->rtcd.flags & HAS_NEON)
-#endif
-            {
-                vp8_pop_neon(dx_store_reg);
-            }
-#endif
-            pbi->common.error.setjmp = 0;
-
-            pbi->num_partitions = 0;
-
-           /* We do not know if the missing frame(s) was supposed to update
-            * any of the reference buffers, but we act conservative and
-            * mark only the last buffer as corrupted.
-            */
-            cm->yv12_fb[cm->lst_fb_idx].corrupted = 1;
-
-            if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
-              cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
-            return -1;
-        }
-
-        pbi->common.error.setjmp = 1;
+        if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
+          cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
+        return -1;
    }

+    pbi->common.error.setjmp = 1;
+
    retcode = vp8_decode_frame(pbi);

    if (retcode < 0)
@ -442,14 +438,11 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
 #endif
        pbi->common.error.error_code = VPX_CODEC_ERROR;
        pbi->common.error.setjmp = 0;
-        pbi->num_partitions = 0;
        if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
          cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
        return retcode;
    }

-#if CONFIG_MULTITHREAD
-    if (pbi->b_multithreaded_rd && cm->multi_token_partition != ONE_PARTITION)
    {
        if (swap_frame_buffers (cm))
        {
@ -463,27 +456,17 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
 #endif
            pbi->common.error.error_code = VPX_CODEC_ERROR;
            pbi->common.error.setjmp = 0;
-            pbi->num_partitions = 0;
            return -1;
        }
-    } else
+
+#if WRITE_RECON_BUFFER
+        if(cm->show_frame)
+            write_dx_frame_to_file(cm->frame_to_show,
+                cm->current_video_frame);
+        else
+            write_dx_frame_to_file(cm->frame_to_show,
+                cm->current_video_frame+1000);
 #endif
-    {
-        if (swap_frame_buffers (cm))
-        {
-#if HAVE_ARMV7
-#if CONFIG_RUNTIME_CPU_DETECT
-            if (cm->rtcd.flags & HAS_NEON)
-#endif
-            {
-                vp8_pop_neon(dx_store_reg);
-            }
-#endif
-            pbi->common.error.error_code = VPX_CODEC_ERROR;
-            pbi->common.error.setjmp = 0;
-            pbi->num_partitions = 0;
-            return -1;
-        }

        if(cm->filter_level)
        {
@ -493,30 +476,22 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
        vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
    }

+#if CONFIG_DEBUG
+    vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);
+#endif

    vp8_clear_system_state();

-#if CONFIG_ERROR_CONCEALMENT
-    /* swap the mode infos to storage for future error concealment */
-    if (pbi->ec_enabled && pbi->common.prev_mi)
+    if(cm->show_frame)
    {
-        const MODE_INFO* tmp = pbi->common.prev_mi;
-        int row, col;
-        pbi->common.prev_mi = pbi->common.mi;
-        pbi->common.mi = tmp;
-
-        /* Propagate the segment_ids to the next frame */
-        for (row = 0; row < pbi->common.mb_rows; ++row)
-        {
-            for (col = 0; col < pbi->common.mb_cols; ++col)
-            {
-                const int i = row*pbi->common.mode_info_stride + col;
-                pbi->common.mi[i].mbmi.segment_id =
-                        pbi->common.prev_mi[i].mbmi.segment_id;
-            }
-        }
+        vpx_memcpy(cm->prev_mip, cm->mip,
+            (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO));
+    }
+    else
+    {
+        vpx_memset(cm->prev_mip, 0,
+            (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO));
    }
-#endif

    /*vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);*/

@ -525,7 +500,6 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign

    pbi->ready_for_new_data = 0;
    pbi->last_time_stamp = time_stamp;
-    pbi->num_partitions = 0;
    pbi->source_sz = 0;

 #if 0
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@ -11,15 +11,13 @@

 #ifndef __INC_VP8D_INT_H
 #define __INC_VP8D_INT_H
-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vp8/common/onyxd.h"
 #include "treereader.h"
 #include "vp8/common/onyxc_int.h"
-#include "vp8/common/threading.h"
 #include "dequantize.h"
-#if CONFIG_ERROR_CONCEALMENT
-#include "ec_types.h"
-#endif
+
+//#define DEC_DEBUG

 typedef struct
 {
@ -45,10 +43,12 @@ typedef struct
 typedef struct
 {
    int const *scan;
+    int const *scan_8x8;
    UINT8 const *ptr_block2leftabove;
    vp8_tree_index const *vp8_coef_tree_ptr;
    unsigned char *norm_ptr;
    UINT8 *ptr_coef_bands_x;
+    UINT8 *ptr_coef_bands_x_8x8;

    ENTROPY_CONTEXT_PLANES *A;
    ENTROPY_CONTEXT_PLANES *L;
@ -57,6 +57,7 @@ typedef struct
    BOOL_DECODER *current_bc;

    vp8_prob const *coef_probs[4];
+    vp8_prob const *coef_probs_8x8[4];

    UINT8 eob[25];

@ -75,38 +76,6 @@ typedef struct VP8Decompressor

    const unsigned char *Source;
    unsigned int   source_sz;
-    const unsigned char *partitions[MAX_PARTITIONS];
-    unsigned int   partition_sizes[MAX_PARTITIONS];
-    unsigned int   num_partitions;
-
-#if CONFIG_MULTITHREAD
-    /* variable for threading */
-
-    volatile int b_multithreaded_rd;
-    int max_threads;
-    int current_mb_col_main;
-    int decoding_thread_count;
-    int allocated_decoding_thread_count;
-
-    int mt_baseline_filter_level[MAX_MB_SEGMENTS];
-    int sync_range;
-    int *mt_current_mb_col;                  /* Each row remembers its already decoded column. */
-
-    unsigned char **mt_yabove_row;           /* mb_rows x width */
-    unsigned char **mt_uabove_row;
-    unsigned char **mt_vabove_row;
-    unsigned char **mt_yleft_col;            /* mb_rows x 16 */
-    unsigned char **mt_uleft_col;            /* mb_rows x 8 */
-    unsigned char **mt_vleft_col;            /* mb_rows x 8 */
-
-    MB_ROW_DEC           *mb_row_di;
-    DECODETHREAD_DATA    *de_thread_data;
-
-    pthread_t           *h_decoding_thread;
-    sem_t               *h_event_start_decoding;
-    sem_t                h_event_end_decoding;
-    /* end of threading data */
-#endif

    vp8_reader *mbc;
    int64_t last_time_stamp;
@ -120,23 +89,9 @@ typedef struct VP8Decompressor
    vp8_dequant_rtcd_vtable_t        dequant;
 #endif

-
-    vp8_prob prob_intra;
-    vp8_prob prob_last;
-    vp8_prob prob_gf;
    vp8_prob prob_skip_false;

-#if CONFIG_ERROR_CONCEALMENT
-    MB_OVERLAP *overlaps;
-    /* the mb num from which modes and mvs (first partition) are corrupt */
-    unsigned int mvs_corrupt_from_mb;
-#endif
-    int ec_enabled;
-    int ec_active;
-    int input_partition;
    int decoded_key_frame;
-    int independent_partitions;
-    int frame_corrupt_residual;

 } VP8D_COMP;

--- a/vp8/decoder/reconintra_mt.c
+++ b/vp8/decoder/reconintra_mt.c
@ -9,7 +9,7 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vp8/common/recon.h"
 #include "vp8/common/reconintra.h"
 #include "vpx_mem/vpx_mem.h"
--- a/vp8/decoder/reconintra_mt.h
+++ b/vp8/decoder/reconintra_mt.h
@ -12,15 +12,4 @@
 #ifndef __INC_RECONINTRA_MT_H
 #define __INC_RECONINTRA_MT_H

-/* reconintra functions used in multi-threaded decoder */
-#if CONFIG_MULTITHREAD
-extern void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
-extern void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
-extern void vp8mt_build_intra_predictors_mbuv(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
-extern void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
-
-extern void vp8mt_predict_intra4x4(VP8D_COMP *pbi, MACROBLOCKD *x, int b_mode, unsigned char *predictor, int mb_row, int mb_col, int num);
-extern void vp8mt_intra_prediction_down_copy(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
-#endif
-
 #endif
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
--- a/vp8/decoder/x86/idct_blk_mmx.c
+++ b/vp8/decoder/x86/idct_blk_mmx.c
@ -8,7 +8,7 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vp8/common/idct.h"
 #include "vp8/decoder/dequantize.h"

--- a/vp8/decoder/x86/idct_blk_sse2.c
+++ b/vp8/decoder/x86/idct_blk_sse2.c
@ -8,7 +8,7 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vp8/common/idct.h"
 #include "vp8/decoder/dequantize.h"

--- a/vp8/decoder/x86/x86_dsystemdependent.c
+++ b/vp8/decoder/x86/x86_dsystemdependent.c
@ -9,7 +9,7 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vpx_ports/x86.h"
 #include "vp8/decoder/onyxd_int.h"

--- a/vp8/encoder/arm/arm_csystemdependent.c
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@ -9,7 +9,7 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vpx_ports/arm.h"
 #include "vp8/encoder/variance.h"
 #include "vp8/encoder/onyx_int.h"
@ -56,8 +56,6 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
        cpi->rtcd.variance.mse16x16              = vp8_mse16x16_armv6;
        /*cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;*/

-        /*cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;*/
-
        cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_armv6;
        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_armv6;
        cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_armv6;
@ -103,8 +101,6 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
        cpi->rtcd.variance.mse16x16              = vp8_mse16x16_neon;
        /*cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;*/

-        cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_neon;
-
        cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_neon;
        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_neon;
        cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_neon;
--- a/vp8/encoder/arm/variance_arm.c
+++ b/vp8/encoder/arm/variance_arm.c
@ -13,6 +13,12 @@
 #include "vp8/common/filter.h"
 #include "vp8/common/arm/bilinearfilter_arm.h"

+#if CONFIG_SIXTEENTH_SUBPEL_UV
+#define HALFNDX 8
+#else
+#define HALFNDX 4
+#endif
+
 #if HAVE_ARMV6

 unsigned int vp8_sub_pixel_variance8x8_armv6
@ -59,17 +65,17 @@ unsigned int vp8_sub_pixel_variance16x16_armv6
    const short *HFilter, *VFilter;
    unsigned int var;

-    if (xoffset == 4 && yoffset == 0)
+    if (xoffset == HALFNDX && yoffset == 0)
    {
        var = vp8_variance_halfpixvar16x16_h_armv6(src_ptr, src_pixels_per_line,
                                                   dst_ptr, dst_pixels_per_line, sse);
    }
-    else if (xoffset == 0 && yoffset == 4)
+    else if (xoffset == 0 && yoffset == HALFNDX)
    {
        var = vp8_variance_halfpixvar16x16_v_armv6(src_ptr, src_pixels_per_line,
                                                   dst_ptr, dst_pixels_per_line, sse);
    }
-    else if (xoffset == 4 && yoffset == 4)
+    else if (xoffset == HALFNDX && yoffset == HALFNDX)
    {
        var = vp8_variance_halfpixvar16x16_hv_armv6(src_ptr, src_pixels_per_line,
                                                   dst_ptr, dst_pixels_per_line, sse);
@ -107,11 +113,11 @@ unsigned int vp8_sub_pixel_variance16x16_neon
    unsigned int *sse
 )
 {
-  if (xoffset == 4 && yoffset == 0)
+  if (xoffset == HALFNDX && yoffset == 0)
    return vp8_variance_halfpixvar16x16_h_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
-  else if (xoffset == 0 && yoffset == 4)
+  else if (xoffset == 0 && yoffset == HALFNDX)
    return vp8_variance_halfpixvar16x16_v_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
-  else if (xoffset == 4 && yoffset == 4)
+  else if (xoffset == HALFNDX && yoffset == HALFNDX)
    return vp8_variance_halfpixvar16x16_hv_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
  else
    return vp8_sub_pixel_variance16x16_neon_func(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@ -83,7 +83,6 @@ extern prototype_variance(vp8_variance_halfpixvar16x16_hv_neon);

 //extern prototype_getmbss(vp8_get_mb_ss_c);
 extern prototype_variance(vp8_mse16x16_neon);
-extern prototype_get16x16prederror(vp8_get4x4sse_cs_neon);

 #if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_variance_sad4x4
@ -146,8 +145,6 @@ extern prototype_get16x16prederror(vp8_get4x4sse_cs_neon);
 #undef  vp8_variance_mse16x16
 #define vp8_variance_mse16x16 vp8_mse16x16_neon

-#undef  vp8_variance_get4x4sse_cs
-#define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_neon
 #endif

 #endif
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
--- a/vp8/encoder/bitstream.h
+++ b/vp8/encoder/bitstream.h
@ -17,23 +17,9 @@ void vp8cx_pack_tokens_armv5(vp8_writer *w, const TOKENEXTRA *p, int xcount,
                             vp8_token *,
                             vp8_extra_bit_struct *,
                             const vp8_tree_index *);
-void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *, unsigned char *, int , int *,
-        vp8_token *,
-        vp8_extra_bit_struct *,
-        const vp8_tree_index *);
-void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w,
-                                    vp8_token *,
-                                    vp8_extra_bit_struct *,
-                                    const vp8_tree_index *);
 # define pack_tokens(a,b,c)                  \
    vp8cx_pack_tokens_armv5(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
-# define pack_tokens_into_partitions(a,b,c,d)  \
-    vp8cx_pack_tokens_into_partitions_armv5(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
-# define pack_mb_row_tokens(a,b)               \
-    vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
 #else
 # define pack_tokens(a,b,c)                  pack_tokens_c(a,b,c)
-# define pack_tokens_into_partitions(a,b,c,d)  pack_tokens_into_partitions_c(a,b,c,d)
-# define pack_mb_row_tokens(a,b)               pack_mb_row_tokens_c(a,b)
 #endif
 #endif
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@ -46,8 +46,8 @@ typedef struct
    int src;
    int src_stride;

-//  MV  enc_mv;
-    int force_empty;
+    int eob_max_offset;
+    int eob_max_offset_8x8;

 } BLOCK;

@ -95,9 +95,16 @@ typedef struct
    int *mvcost[2];
    int mvsadcosts[2][MVfpvals+1];
    int *mvsadcost[2];
+#if CONFIG_HIGH_PRECISION_MV
+    int mvcosts_hp[2][MVvals_hp+1];
+    int *mvcost_hp[2];
+    int mvsadcosts_hp[2][MVfpvals_hp+1];
+    int *mvsadcost_hp[2];
+#endif
    int mbmode_cost[2][MB_MODE_COUNT];
    int intra_uv_mode_cost[2][MB_MODE_COUNT];
    unsigned int bmode_costs[10][10][10];
+    unsigned int i8x8_mode_costs[MB_MODE_COUNT];
    unsigned int inter_bmode_costs[B_MODE_COUNT];

    // These define limits to motion vector components to prevent them from extending outside the UMV borders
@ -116,8 +123,15 @@ typedef struct

    unsigned char *active_ptr;
    MV_CONTEXT *mvc;
+#if CONFIG_HIGH_PRECISION_MV
+    MV_CONTEXT_HP *mvc_hp;
+#endif
+
+    unsigned int token_costs[BLOCK_TYPES] [COEF_BANDS]
+                            [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
+    unsigned int token_costs_8x8[BLOCK_TYPES] [COEF_BANDS]
+                            [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];

-    unsigned int token_costs[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
    int optimize;
    int q_index;

@ -126,6 +140,10 @@ typedef struct
    void (*short_walsh4x4)(short *input, short *output, int pitch);
    void (*quantize_b)(BLOCK *b, BLOCKD *d);
    void (*quantize_b_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1);
+    void (*vp8_short_fdct8x8)(short *input, short *output, int pitch);
+    void (*short_fhaar2x2)(short *input, short *output, int pitch);
+    void (*quantize_b_8x8)(BLOCK *b, BLOCKD *d);
+    void (*quantize_b_2x2)(BLOCK *b, BLOCKD *d);

 } MACROBLOCK;

--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@ -10,7 +10,123 @@


 #include <math.h>
-#include "vpx_config.h"
+#include "vpx_ports/config.h"
+
+
+
+
+
+
+void vp8_short_fdct8x8_c(short *block, short *coefs, int pitch)
+{
+  int j1, i, j, k;
+  float b[8];
+  float b1[8];
+  float d[8][8];
+  float f0 = (float) .7071068;
+  float f1 = (float) .4903926;
+  float f2 = (float) .4619398;
+  float f3 = (float) .4157348;
+  float f4 = (float) .3535534;
+  float f5 = (float) .2777851;
+  float f6 = (float) .1913417;
+  float f7 = (float) .0975452;
+  pitch = pitch / 2;
+  for (i = 0, k = 0; i < 8; i++, k += pitch)
+  {
+    for (j = 0; j < 8; j++)
+    {
+      b[j] = (float)( block[k + j]<<3);
+    }
+    /* Horizontal transform */
+    for (j = 0; j < 4; j++)
+    {
+      j1 = 7 - j;
+      b1[j] = b[j] + b[j1];
+      b1[j1] = b[j] - b[j1];
+    }
+    b[0] = b1[0] + b1[3];
+    b[1] = b1[1] + b1[2];
+    b[2] = b1[1] - b1[2];
+    b[3] = b1[0] - b1[3];
+    b[4] = b1[4];
+    b[5] = (b1[6] - b1[5]) * f0;
+    b[6] = (b1[6] + b1[5]) * f0;
+    b[7] = b1[7];
+    d[i][0] = (b[0] + b[1]) * f4;
+    d[i][4] = (b[0] - b[1]) * f4;
+    d[i][2] = b[2] * f6 + b[3] * f2;
+    d[i][6] = b[3] * f6 - b[2] * f2;
+    b1[4] = b[4] + b[5];
+    b1[7] = b[7] + b[6];
+    b1[5] = b[4] - b[5];
+    b1[6] = b[7] - b[6];
+    d[i][1] = b1[4] * f7 + b1[7] * f1;
+    d[i][5] = b1[5] * f3 + b1[6] * f5;
+    d[i][7] = b1[7] * f7 - b1[4] * f1;
+    d[i][3] = b1[6] * f3 - b1[5] * f5;
+  }
+  /* Vertical transform */
+  for (i = 0; i < 8; i++)
+  {
+    for (j = 0; j < 4; j++)
+    {
+      j1 = 7 - j;
+      b1[j] = d[j][i] + d[j1][i];
+      b1[j1] = d[j][i] - d[j1][i];
+    }
+    b[0] = b1[0] + b1[3];
+    b[1] = b1[1] + b1[2];
+    b[2] = b1[1] - b1[2];
+    b[3] = b1[0] - b1[3];
+    b[4] = b1[4];
+    b[5] = (b1[6] - b1[5]) * f0;
+    b[6] = (b1[6] + b1[5]) * f0;
+    b[7] = b1[7];
+    d[0][i] = (b[0] + b[1]) * f4;
+    d[4][i] = (b[0] - b[1]) * f4;
+    d[2][i] = b[2] * f6 + b[3] * f2;
+    d[6][i] = b[3] * f6 - b[2] * f2;
+    b1[4] = b[4] + b[5];
+    b1[7] = b[7] + b[6];
+    b1[5] = b[4] - b[5];
+    b1[6] = b[7] - b[6];
+    d[1][i] = b1[4] * f7 + b1[7] * f1;
+    d[5][i] = b1[5] * f3 + b1[6] * f5;
+    d[7][i] = b1[7] * f7 - b1[4] * f1;
+    d[3][i] = b1[6] * f3 - b1[5] * f5;
+  }
+  for (i = 0; i < 8; i++)
+  {
+    for (j = 0; j < 8; j++)
+    {
+      *(coefs + j + i * 8) = (short) floor(d[i][j] +0.5);
+    }
+  }
+  return;
+}
+
+
+
+void vp8_short_fhaar2x2_c(short *input, short *output, int pitch) //pitch = 8
+{
+    /* [1 1 ; 1 -1] orthogonal transform */
+    /* use position: 0,1, 4, 8 */
+   int i;
+   short *ip1 = input;
+   short *op1 = output;
+   for (i = 0; i < 16; i++)
+   {
+       op1[i] = 0;
+   }
+
+   op1[0]=(ip1[0] + ip1[1] + ip1[4] + ip1[8] + 1)>>1;
+   op1[1]=(ip1[0] - ip1[1] + ip1[4] - ip1[8])>>1;
+   op1[4]=(ip1[0] + ip1[1] - ip1[4] - ip1[8])>>1;
+   op1[8]=(ip1[0] - ip1[1] - ip1[4] + ip1[8])>>1;
+
+}
+
 void vp8_short_fdct4x4_c(short *input, short *output, int pitch)
 {
    int i;
@ -20,17 +136,11 @@ void vp8_short_fdct4x4_c(short *input, short *output, int pitch)

    for (i = 0; i < 4; i++)
    {
-#if CONFIG_EXTEND_QRANGE
        a1 = ((ip[0] + ip[3])<<5);
        b1 = ((ip[1] + ip[2])<<5);
        c1 = ((ip[1] - ip[2])<<5);
        d1 = ((ip[0] - ip[3])<<5);
-#else
-        a1 = ((ip[0] + ip[3])<<3);
-        b1 = ((ip[1] + ip[2])<<3);
-        c1 = ((ip[1] - ip[2])<<3);
-        d1 = ((ip[0] - ip[3])<<3);
-#endif
+
        op[0] = a1 + b1;
        op[2] = a1 - b1;

@ -78,22 +188,12 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch)

    for (i = 0; i < 4; i++)
    {
-#if !CONFIG_EXTEND_QRANGE
-        a1 = ((ip[0] + ip[2])<<2);
-        d1 = ((ip[1] + ip[3])<<2);
-        c1 = ((ip[1] - ip[3])<<2);
-        b1 = ((ip[0] - ip[2])<<2);
-
-        op[0] = a1 + d1 + (a1!=0);
-#else
        a1 = ((ip[0] + ip[2]));
        d1 = ((ip[1] + ip[3]));
        c1 = ((ip[1] - ip[3]));
        b1 = ((ip[0] - ip[2]));

-
        op[0] = a1 + d1;
-#endif
        op[1] = b1 + c1;
        op[2] = b1 - c1;
        op[3] = a1 - d1;
@ -121,17 +221,11 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch)
        c2 += c2<0;
        d2 += d2<0;

-#if !CONFIG_EXTEND_QRANGE
-        op[0] = (a2+3) >> 3;
-        op[4] = (b2+3) >> 3;
-        op[8] = (c2+3) >> 3;
-        op[12]= (d2+3) >> 3;
-#else
        op[0] = (a2+1) >> 2;
        op[4] = (b2+1) >> 2;
        op[8] = (c2+1) >> 2;
        op[12]= (d2+1) >> 2;
-#endif
+
        ip++;
        op++;
    }
--- a/vp8/encoder/dct.h
+++ b/vp8/encoder/dct.h
@ -22,6 +22,19 @@
 #include "arm/dct_arm.h"
 #endif

+
+
+#ifndef vp8_fdct_short8x8
+#define vp8_fdct_short8x8  vp8_short_fdct8x8_c
+#endif
+extern prototype_fdct(vp8_fdct_short8x8);
+
+#ifndef vp8_fhaar_short2x2
+#define vp8_fhaar_short2x2  vp8_short_fhaar2x2_c
+#endif
+extern prototype_fdct(vp8_fhaar_short2x2);
+
+
 #ifndef vp8_fdct_short4x4
 #define vp8_fdct_short4x4  vp8_short_fdct4x4_c
 #endif
@ -49,6 +62,8 @@ extern prototype_fdct(vp8_fdct_walsh_short4x4);
 typedef prototype_fdct(*vp8_fdct_fn_t);
 typedef struct
 {
+    vp8_fdct_fn_t    short8x8;
+    vp8_fdct_fn_t    haar_short2x2;
    vp8_fdct_fn_t    short4x4;
    vp8_fdct_fn_t    short8x4;
    vp8_fdct_fn_t    fast4x4;
--- a/vp8/encoder/defaultcoefcounts.h
+++ b/vp8/encoder/defaultcoefcounts.h
@ -1,223 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-/* Generated file, included by entropy.c */
-
-static const unsigned int default_coef_counts[BLOCK_TYPES]
-                                             [COEF_BANDS]
-                                             [PREV_COEF_CONTEXTS]
-                                             [MAX_ENTROPY_TOKENS] =
-{
-
-    {
-        /* Block Type ( 0 ) */
-        {
-            /* Coeff Band ( 0 ) */
-            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
-            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
-            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
-        },
-        {
-            /* Coeff Band ( 1 ) */
-            {30190, 26544, 225,  24,   4,   0,   0,   0,   0,   0,   0, 4171593,},
-            {26846, 25157, 1241, 130,  26,   6,   1,   0,   0,   0,   0, 149987,},
-            {10484, 9538, 1006, 160,  36,  18,   0,   0,   0,   0,   0, 15104,},
-        },
-        {
-            /* Coeff Band ( 2 ) */
-            {25842, 40456, 1126,  83,  11,   2,   0,   0,   0,   0,   0,   0,},
-            {9338, 8010, 512,  73,   7,   3,   2,   0,   0,   0,   0, 43294,},
-            {1047, 751, 149,  31,  13,   6,   1,   0,   0,   0,   0, 879,},
-        },
-        {
-            /* Coeff Band ( 3 ) */
-            {26136, 9826, 252,  13,   0,   0,   0,   0,   0,   0,   0,   0,},
-            {8134, 5574, 191,  14,   2,   0,   0,   0,   0,   0,   0, 35302,},
-            { 605, 677, 116,   9,   1,   0,   0,   0,   0,   0,   0, 611,},
-        },
-        {
-            /* Coeff Band ( 4 ) */
-            {10263, 15463, 283,  17,   0,   0,   0,   0,   0,   0,   0,   0,},
-            {2773, 2191, 128,   9,   2,   2,   0,   0,   0,   0,   0, 10073,},
-            { 134, 125,  32,   4,   0,   2,   0,   0,   0,   0,   0,  50,},
-        },
-        {
-            /* Coeff Band ( 5 ) */
-            {10483, 2663,  23,   1,   0,   0,   0,   0,   0,   0,   0,   0,},
-            {2137, 1251,  27,   1,   1,   0,   0,   0,   0,   0,   0, 14362,},
-            { 116, 156,  14,   2,   1,   0,   0,   0,   0,   0,   0, 190,},
-        },
-        {
-            /* Coeff Band ( 6 ) */
-            {40977, 27614, 412,  28,   0,   0,   0,   0,   0,   0,   0,   0,},
-            {6113, 5213, 261,  22,   3,   0,   0,   0,   0,   0,   0, 26164,},
-            { 382, 312,  50,  14,   2,   0,   0,   0,   0,   0,   0, 345,},
-        },
-        {
-            /* Coeff Band ( 7 ) */
-            {   0,  26,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
-            {   0,  13,   0,   0,   0,   0,   0,   0,   0,   0,   0, 319,},
-            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   8,},
-        },
-    },
-    {
-        /* Block Type ( 1 ) */
-        {
-            /* Coeff Band ( 0 ) */
-            {3268, 19382, 1043, 250,  93,  82,  49,  26,  17,   8,  25, 82289,},
-            {8758, 32110, 5436, 1832, 827, 668, 420, 153,  24,   0,   3, 52914,},
-            {9337, 23725, 8487, 3954, 2107, 1836, 1069, 399,  59,   0,   0, 18620,},
-        },
-        {
-            /* Coeff Band ( 1 ) */
-            {12419, 8420, 452,  62,   9,   1,   0,   0,   0,   0,   0,   0,},
-            {11715, 8705, 693,  92,  15,   7,   2,   0,   0,   0,   0, 53988,},
-            {7603, 8585, 2306, 778, 270, 145,  39,   5,   0,   0,   0, 9136,},
-        },
-        {
-            /* Coeff Band ( 2 ) */
-            {15938, 14335, 1207, 184,  55,  13,   4,   1,   0,   0,   0,   0,},
-            {7415, 6829, 1138, 244,  71,  26,   7,   0,   0,   0,   0, 9980,},
-            {1580, 1824, 655, 241,  89,  46,  10,   2,   0,   0,   0, 429,},
-        },
-        {
-            /* Coeff Band ( 3 ) */
-            {19453, 5260, 201,  19,   0,   0,   0,   0,   0,   0,   0,   0,},
-            {9173, 3758, 213,  22,   1,   1,   0,   0,   0,   0,   0, 9820,},
-            {1689, 1277, 276,  51,  17,   4,   0,   0,   0,   0,   0, 679,},
-        },
-        {
-            /* Coeff Band ( 4 ) */
-            {12076, 10667, 620,  85,  19,   9,   5,   0,   0,   0,   0,   0,},
-            {4665, 3625, 423,  55,  19,   9,   0,   0,   0,   0,   0, 5127,},
-            { 415, 440, 143,  34,  20,   7,   2,   0,   0,   0,   0, 101,},
-        },
-        {
-            /* Coeff Band ( 5 ) */
-            {12183, 4846, 115,  11,   1,   0,   0,   0,   0,   0,   0,   0,},
-            {4226, 3149, 177,  21,   2,   0,   0,   0,   0,   0,   0, 7157,},
-            { 375, 621, 189,  51,  11,   4,   1,   0,   0,   0,   0, 198,},
-        },
-        {
-            /* Coeff Band ( 6 ) */
-            {61658, 37743, 1203,  94,  10,   3,   0,   0,   0,   0,   0,   0,},
-            {15514, 11563, 903, 111,  14,   5,   0,   0,   0,   0,   0, 25195,},
-            { 929, 1077, 291,  78,  14,   7,   1,   0,   0,   0,   0, 507,},
-        },
-        {
-            /* Coeff Band ( 7 ) */
-            {   0, 990,  15,   3,   0,   0,   0,   0,   0,   0,   0,   0,},
-            {   0, 412,  13,   0,   0,   0,   0,   0,   0,   0,   0, 1641,},
-            {   0,  18,   7,   1,   0,   0,   0,   0,   0,   0,   0,  30,},
-        },
-    },
-    {
-        /* Block Type ( 2 ) */
-        {
-            /* Coeff Band ( 0 ) */
-            { 953, 24519, 628, 120,  28,  12,   4,   0,   0,   0,   0, 2248798,},
-            {1525, 25654, 2647, 617, 239, 143,  42,   5,   0,   0,   0, 66837,},
-            {1180, 11011, 3001, 1237, 532, 448, 239,  54,   5,   0,   0, 7122,},
-        },
-        {
-            /* Coeff Band ( 1 ) */
-            {1356, 2220,  67,  10,   4,   1,   0,   0,   0,   0,   0,   0,},
-            {1450, 2544, 102,  18,   4,   3,   0,   0,   0,   0,   0, 57063,},
-            {1182, 2110, 470, 130,  41,  21,   0,   0,   0,   0,   0, 6047,},
-        },
-        {
-            /* Coeff Band ( 2 ) */
-            { 370, 3378, 200,  30,   5,   4,   1,   0,   0,   0,   0,   0,},
-            { 293, 1006, 131,  29,  11,   0,   0,   0,   0,   0,   0, 5404,},
-            { 114, 387,  98,  23,   4,   8,   1,   0,   0,   0,   0, 236,},
-        },
-        {
-            /* Coeff Band ( 3 ) */
-            { 579, 194,   4,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
-            { 395, 213,   5,   1,   0,   0,   0,   0,   0,   0,   0, 4157,},
-            { 119, 122,   4,   0,   0,   0,   0,   0,   0,   0,   0, 300,},
-        },
-        {
-            /* Coeff Band ( 4 ) */
-            {  38, 557,  19,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
-            {  21, 114,  12,   1,   0,   0,   0,   0,   0,   0,   0, 427,},
-            {   0,   5,   0,   0,   0,   0,   0,   0,   0,   0,   0,   7,},
-        },
-        {
-            /* Coeff Band ( 5 ) */
-            {  52,   7,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
-            {  18,   6,   0,   0,   0,   0,   0,   0,   0,   0,   0, 652,},
-            {   1,   1,   0,   0,   0,   0,   0,   0,   0,   0,   0,  30,},
-        },
-        {
-            /* Coeff Band ( 6 ) */
-            { 640, 569,  10,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
-            {  25,  77,   2,   0,   0,   0,   0,   0,   0,   0,   0, 517,},
-            {   4,   7,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,},
-        },
-        {
-            /* Coeff Band ( 7 ) */
-            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
-            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
-            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
-        },
-    },
-    {
-        /* Block Type ( 3 ) */
-        {
-            /* Coeff Band ( 0 ) */
-            {2506, 20161, 2707, 767, 261, 178, 107,  30,  14,   3,   0, 100694,},
-            {8806, 36478, 8817, 3268, 1280, 850, 401, 114,  42,   0,   0, 58572,},
-            {11003, 27214, 11798, 5716, 2482, 2072, 1048, 175,  32,   0,   0, 19284,},
-        },
-        {
-            /* Coeff Band ( 1 ) */
-            {9738, 11313, 959, 205,  70,  18,  11,   1,   0,   0,   0,   0,},
-            {12628, 15085, 1507, 273,  52,  19,   9,   0,   0,   0,   0, 54280,},
-            {10701, 15846, 5561, 1926, 813, 570, 249,  36,   0,   0,   0, 6460,},
-        },
-        {
-            /* Coeff Band ( 2 ) */
-            {6781, 22539, 2784, 634, 182, 123,  20,   4,   0,   0,   0,   0,},
-            {6263, 11544, 2649, 790, 259, 168,  27,   5,   0,   0,   0, 20539,},
-            {3109, 4075, 2031, 896, 457, 386, 158,  29,   0,   0,   0, 1138,},
-        },
-        {
-            /* Coeff Band ( 3 ) */
-            {11515, 4079, 465,  73,   5,  14,   2,   0,   0,   0,   0,   0,},
-            {9361, 5834, 650,  96,  24,   8,   4,   0,   0,   0,   0, 22181,},
-            {4343, 3974, 1360, 415, 132,  96,  14,   1,   0,   0,   0, 1267,},
-        },
-        {
-            /* Coeff Band ( 4 ) */
-            {4787, 9297, 823, 168,  44,  12,   4,   0,   0,   0,   0,   0,},
-            {3619, 4472, 719, 198,  60,  31,   3,   0,   0,   0,   0, 8401,},
-            {1157, 1175, 483, 182,  88,  31,   8,   0,   0,   0,   0, 268,},
-        },
-        {
-            /* Coeff Band ( 5 ) */
-            {8299, 1226,  32,   5,   1,   0,   0,   0,   0,   0,   0,   0,},
-            {3502, 1568,  57,   4,   1,   1,   0,   0,   0,   0,   0, 9811,},
-            {1055, 1070, 166,  29,   6,   1,   0,   0,   0,   0,   0, 527,},
-        },
-        {
-            /* Coeff Band ( 6 ) */
-            {27414, 27927, 1989, 347,  69,  26,   0,   0,   0,   0,   0,   0,},
-            {5876, 10074, 1574, 341,  91,  24,   4,   0,   0,   0,   0, 21954,},
-            {1571, 2171, 778, 324, 124,  65,  16,   0,   0,   0,   0, 979,},
-        },
-        {
-            /* Coeff Band ( 7 ) */
-            {   0,  29,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
-            {   0,  23,   0,   0,   0,   0,   0,   0,   0,   0,   0, 459,},
-            {   0,   1,   0,   0,   0,   0,   0,   0,   0,   0,   0,  13,},
-        },
-    },
-};
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@ -9,7 +9,7 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "encodemb.h"
 #include "encodemv.h"
 #include "vp8/common/common.h"
@ -22,13 +22,17 @@
 #include "encodeintra.h"
 #include "vp8/common/reconinter.h"
 #include "rdopt.h"
-#include "pickinter.h"
 #include "vp8/common/findnearmv.h"
 #include "vp8/common/reconintra.h"
+#include "vp8/common/seg_common.h"
 #include <stdio.h>
+#include <math.h>
 #include <limits.h>
 #include "vp8/common/subpixel.h"
 #include "vpx_ports/vpx_timer.h"
+#include "vp8/common/pred_common.h"
+
+//#define DBG_PRNT_SEGMAP 1

 #if CONFIG_RUNTIME_CPU_DETECT
 #define RTCD(x)     &cpi->common.rtcd.x
@ -37,6 +41,12 @@
 #define RTCD(x)     NULL
 #define IF_RTCD(x)  NULL
 #endif
+
+#ifdef ENC_DEBUG
+int enc_debug=0;
+int mb_row_debug, mb_col_debug;
+#endif
+
 extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;

 extern void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex);
@ -52,13 +62,25 @@ int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t);
 static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x );

+
+
 #ifdef MODE_STATS
-unsigned int inter_y_modes[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-unsigned int inter_uv_modes[4] = {0, 0, 0, 0};
-unsigned int inter_b_modes[15]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-unsigned int y_modes[5]   = {0, 0, 0, 0, 0};
-unsigned int uv_modes[4]  = {0, 0, 0, 0};
-unsigned int b_modes[14]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+unsigned int inter_y_modes[MB_MODE_COUNT] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+unsigned int inter_uv_modes[VP8_UV_MODES] = {0, 0, 0, 0};
+unsigned int inter_b_modes[B_MODE_COUNT] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+unsigned int y_modes[VP8_YMODES] = {0, 0, 0, 0, 0, 0};
+unsigned int i8x8_modes[VP8_I8X8_MODES]={0  };
+unsigned int uv_modes[VP8_UV_MODES] = {0, 0, 0, 0};
+unsigned int uv_modes_y[VP8_YMODES][VP8_UV_MODES]=
+{
+{0, 0, 0, 0},
+{0, 0, 0, 0},
+{0, 0, 0, 0},
+{0, 0, 0, 0},
+{0, 0, 0, 0},
+{0, 0, 0, 0}
+};
+unsigned int b_modes[B_MODE_COUNT] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 #endif


@ -365,7 +387,6 @@ void encode_mb_row(VP8_COMP *cpi,
                   MACROBLOCK  *x,
                   MACROBLOCKD *xd,
                   TOKENEXTRA **tp,
-                   int *segment_counts,
                   int *totalrate)
 {
    int recon_yoffset, recon_uvoffset;
@ -376,16 +397,8 @@ void encode_mb_row(VP8_COMP *cpi,
    int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
    int map_index = (mb_row * cpi->common.mb_cols);

-#if CONFIG_MULTITHREAD
-    const int nsync = cpi->mt_sync_range;
-    const int rightmost_col = cm->mb_cols - 1;
-    volatile const int *last_row_current_mb_col;
-
-    if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
-        last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
-    else
-        last_row_current_mb_col = &rightmost_col;
-#endif
+    // Reset the left context
+    vp8_zero(cm->left_context)

    // reset above block coeffs
    xd->above_context = cm->above_context;
@ -414,6 +427,11 @@ void encode_mb_row(VP8_COMP *cpi,
    // for each macroblock col in image
    for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
    {
+#ifdef ENC_DEBUG
+        enc_debug = (cpi->common.current_video_frame ==1 && mb_row==4 && mb_col==0);
+        mb_col_debug=mb_col;
+        mb_row_debug=mb_row;
+#endif
        // Distance of Mb to the left & right edges, specified in
        // 1/8th pel units as they are always compared to values
        // that are in 1/8th pel units
@ -437,29 +455,13 @@ void encode_mb_row(VP8_COMP *cpi,
        //Copy current mb to a buffer
        RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer, x->src.y_stride, x->thismb, 16);

-#if CONFIG_MULTITHREAD
-        if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
-        {
-            if ((mb_col & (nsync - 1)) == 0)
-            {
-                while (mb_col > (*last_row_current_mb_col - nsync)
-                        && (*last_row_current_mb_col) != (cm->mb_cols - 1))
-                {
-                    x86_pause_hint();
-                    thread_sleep(0);
-                }
-            }
-        }
-#endif
-
        if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
            vp8_activity_masking(cpi, x);

        // Is segmentation enabled
-        // MB level adjutment to quantizer
        if (xd->segmentation_enabled)
        {
-            // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
+            // Code to set segment id in xd->mbmi.segment_id
            if (cpi->segmentation_map[map_index+mb_col] <= 3)
                xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index+mb_col];
            else
@ -468,66 +470,51 @@ void encode_mb_row(VP8_COMP *cpi,
            vp8cx_mb_init_quantizer(cpi, x);
        }
        else
-            xd->mode_info_context->mbmi.segment_id = 0;         // Set to Segment 0 by default
+            // Set to Segment 0 by default
+            xd->mode_info_context->mbmi.segment_id = 0;

        x->active_ptr = cpi->active_map + map_index + mb_col;

+        /* force 4x4 transform for mode selection */
+        xd->mode_info_context->mbmi.txfm_size = TX_4X4;
+
        if (cm->frame_type == KEY_FRAME)
        {
            *totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp);
+            //Note the encoder may have changed the segment_id
+
 #ifdef MODE_STATS
-            y_modes[xd->mbmi.mode] ++;
+            y_modes[xd->mode_info_context->mbmi.mode] ++;
 #endif
        }
        else
        {
            *totalrate += vp8cx_encode_inter_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset);
+            //Note the encoder may have changed the segment_id

 #ifdef MODE_STATS
-            inter_y_modes[xd->mbmi.mode] ++;
+            inter_y_modes[xd->mode_info_context->mbmi.mode] ++;

-            if (xd->mbmi.mode == SPLITMV)
+            if (xd->mode_info_context->mbmi.mode == SPLITMV)
            {
                int b;

-                for (b = 0; b < xd->mbmi.partition_count; b++)
+                for (b = 0; b < x->partition_info->count; b++)
                {
-                    inter_b_modes[x->partition->bmi[b].mode] ++;
+                    inter_b_modes[x->partition_info->bmi[b].mode] ++;
                }
            }

 #endif

-            // Count of last ref frame 0,0 useage
+            // Count of last ref frame 0,0 usage
            if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
                cpi->inter_zz_count ++;
-
-            // Special case code for cyclic refresh
-            // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
-            // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
-            if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
-            {
-                cpi->segmentation_map[map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
-
-                // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
-                // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
-                // else mark it as dirty (1).
-                if (xd->mode_info_context->mbmi.segment_id)
-                    cpi->cyclic_refresh_map[map_index+mb_col] = -1;
-                else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
-                {
-                    if (cpi->cyclic_refresh_map[map_index+mb_col] == 1)
-                        cpi->cyclic_refresh_map[map_index+mb_col] = 0;
-                }
-                else
-                    cpi->cyclic_refresh_map[map_index+mb_col] = 1;
-
-            }
        }

        cpi->tplist[mb_row].stop = *tp;

-        // Increment pointer into gf useage flags structure.
+        // Increment pointer into gf usage flags structure.
        x->gf_active_ptr++;

        // Increment the activity mask pointers.
@ -541,20 +528,15 @@ void encode_mb_row(VP8_COMP *cpi,
        recon_yoffset += 16;
        recon_uvoffset += 8;

-        // Keep track of segment useage
-        segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
-
        // skip to next mb
        xd->mode_info_context++;
+
+        xd->prev_mode_info_context++;
+        assert((xd->prev_mode_info_context - cpi->common.prev_mip)
+            ==(xd->mode_info_context - cpi->common.mip));
        x->partition_info++;

        xd->above_context++;
-#if CONFIG_MULTITHREAD
-        if (cpi->b_multi_threaded != 0)
-        {
-            cpi->mt_current_mb_col[mb_row] = mb_col;
-        }
-#endif
    }

    //extend the recon for intra prediction
@ -565,13 +547,17 @@ void encode_mb_row(VP8_COMP *cpi,
        xd->dst.v_buffer + 8);

    // this is to account for the border
+    xd->prev_mode_info_context++;
    xd->mode_info_context++;
    x->partition_info++;

-#if CONFIG_MULTITHREAD
-    if ((cpi->b_multi_threaded != 0) && (mb_row == cm->mb_rows - 1))
+// debug output
+#if DBG_PRNT_SEGMAP
    {
-        sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */
+        FILE *statsfile;
+        statsfile = fopen("segmap2.stt", "a");
+        fprintf(statsfile, "\n" );
+        fclose(statsfile);
    }
 #endif
 }
@ -596,6 +582,7 @@ void init_encode_frame_mb_context(VP8_COMP *cpi)

    xd->mode_info_context = cm->mi;
    xd->mode_info_stride = cm->mode_info_stride;
+    xd->prev_mode_info_context = cm->prev_mi;

    xd->frame_type = cm->frame_type;

@ -630,42 +617,19 @@ void init_encode_frame_mb_context(VP8_COMP *cpi)
    vp8_zero(cpi->uv_mode_count)

    x->mvc = cm->fc.mvc;
+#if CONFIG_HIGH_PRECISION_MV
+    x->mvc_hp = cm->fc.mvc_hp;
+#endif

    vpx_memset(cm->above_context, 0,
               sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);

-    xd->ref_frame_cost[INTRA_FRAME]   = vp8_cost_zero(cpi->prob_intra_coded);
-
-    // Special case treatment when GF and ARF are not sensible options for reference
-    if (cpi->ref_frame_flags == VP8_LAST_FLAG)
-    {
-        xd->ref_frame_cost[LAST_FRAME]    = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_zero(255);
-        xd->ref_frame_cost[GOLDEN_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_one(255)
-                                        + vp8_cost_zero(128);
-        xd->ref_frame_cost[ALTREF_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_one(255)
-                                        + vp8_cost_one(128);
-    }
-    else
-    {
-        xd->ref_frame_cost[LAST_FRAME]    = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_zero(cpi->prob_last_coded);
-        xd->ref_frame_cost[GOLDEN_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_one(cpi->prob_last_coded)
-                                        + vp8_cost_zero(cpi->prob_gf_coded);
-        xd->ref_frame_cost[ALTREF_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
-                                        + vp8_cost_one(cpi->prob_last_coded)
-                                        + vp8_cost_one(cpi->prob_gf_coded);
-    }
-
    xd->fullpixel_mask = 0xffffffff;
    if(cm->full_pixel)
        xd->fullpixel_mask = 0xfffffff8;
 }

-void vp8_encode_frame(VP8_COMP *cpi)
+static void encode_frame_internal(VP8_COMP *cpi)
 {
    int mb_row;
    MACROBLOCK *const x = & cpi->mb;
@ -673,19 +637,24 @@ void vp8_encode_frame(VP8_COMP *cpi)
    MACROBLOCKD *const xd = & x->e_mbd;

    TOKENEXTRA *tp = cpi->tok;
-    int segment_counts[MAX_MB_SEGMENTS];
    int totalrate;

-    vpx_memset(segment_counts, 0, sizeof(segment_counts));
-    totalrate = 0;
+    // Compute a modified set of reference frame probabilities to use when
+    // prediction fails. These are based on the current genreal estimates for
+    // this frame which may be updated with each itteration of the recode loop.
+    compute_mod_refprobs( cm );

-    if (cpi->compressor_speed == 2)
+// debug output
+#if DBG_PRNT_SEGMAP
    {
-        if (cpi->oxcf.cpu_used < 0)
-            cpi->Speed = -(cpi->oxcf.cpu_used);
-        else
-            vp8_auto_select_speed(cpi);
+        FILE *statsfile;
+        statsfile = fopen("segmap2.stt", "a");
+        fprintf(statsfile, "\n" );
+        fclose(statsfile);
    }
+#endif
+
+    totalrate = 0;

    // Functions setup for all frame types so we can use MC in AltRef
    if (cm->mcomp_filter_type == SIXTAP)
@ -698,6 +667,10 @@ void vp8_encode_frame(VP8_COMP *cpi)
                                        &cpi->common.rtcd.subpix, sixtap8x8);
        xd->subpixel_predict16x16   = SUBPIX_INVOKE(
                                        &cpi->common.rtcd.subpix, sixtap16x16);
+        xd->subpixel_predict_avg8x8 = SUBPIX_INVOKE(
+                                        &cpi->common.rtcd.subpix, sixtap_avg8x8);
+        xd->subpixel_predict_avg16x16 = SUBPIX_INVOKE(
+                                        &cpi->common.rtcd.subpix, sixtap_avg16x16);
    }
    else
    {
@ -709,13 +682,15 @@ void vp8_encode_frame(VP8_COMP *cpi)
                                        &cpi->common.rtcd.subpix, bilinear8x8);
        xd->subpixel_predict16x16   = SUBPIX_INVOKE(
                                      &cpi->common.rtcd.subpix, bilinear16x16);
+        xd->subpixel_predict_avg8x8 = SUBPIX_INVOKE(
+                                      &cpi->common.rtcd.subpix, bilinear_avg8x8);
+        xd->subpixel_predict_avg16x16 = SUBPIX_INVOKE(
+                                      &cpi->common.rtcd.subpix, bilinear_avg16x16);
    }

-    // Reset frame count of inter 0,0 motion vector useage.
+    // Reset frame count of inter 0,0 motion vector usage.
    cpi->inter_zz_count = 0;

-    vpx_memset(segment_counts, 0, sizeof(segment_counts));
-
    cpi->prediction_error = 0;
    cpi->intra_error = 0;
    cpi->skip_true_count = 0;
@ -729,7 +704,12 @@ void vp8_encode_frame(VP8_COMP *cpi)

    xd->mode_info_context = cm->mi;

+    xd->prev_mode_info_context = cm->prev_mi;
+
    vp8_zero(cpi->MVcount);
+#if CONFIG_HIGH_PRECISION_MV
+    vp8_zero(cpi->MVcount_hp);
+#endif
    vp8_zero(cpi->coef_counts);

    vp8cx_frame_init_quantizer(cpi);
@ -749,86 +729,21 @@ void vp8_encode_frame(VP8_COMP *cpi)
    // re-initencode frame context.
    init_encode_frame_mb_context(cpi);

+    cpi->rd_single_diff = cpi->rd_comp_diff = cpi->rd_hybrid_diff = 0;
+    vpx_memset(cpi->single_pred_count, 0, sizeof(cpi->single_pred_count));
+    vpx_memset(cpi->comp_pred_count, 0, sizeof(cpi->comp_pred_count));
+
    {
        struct vpx_usec_timer  emr_timer;
        vpx_usec_timer_start(&emr_timer);

-#if CONFIG_MULTITHREAD
-        if (cpi->b_multi_threaded)
        {
-            int i;
-
-            vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1,  cpi->encoding_thread_count);
-
-            for (i = 0; i < cm->mb_rows; i++)
-                cpi->mt_current_mb_col[i] = -1;
-
-            for (i = 0; i < cpi->encoding_thread_count; i++)
-            {
-                sem_post(&cpi->h_event_start_encoding[i]);
-            }
-
-            for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
-            {
-                vp8_zero(cm->left_context)
-
-                tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
-
-                encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
-
-                // adjust to the next row of mbs
-                x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
-                x->src.u_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
-                x->src.v_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
-
-                xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
-                x->partition_info  += xd->mode_info_stride * cpi->encoding_thread_count;
-                x->gf_active_ptr   += cm->mb_cols * cpi->encoding_thread_count;
-
-            }
-
-            sem_wait(&cpi->h_event_end_encoding); /* wait for other threads to finish */
-
-            cpi->tok_count = 0;
-
-            for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
-            {
-                cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start;
-            }
-
-            if (xd->segmentation_enabled)
-            {
-                int i, j;
-
-                if (xd->segmentation_enabled)
-                {
-
-                    for (i = 0; i < cpi->encoding_thread_count; i++)
-                    {
-                        for (j = 0; j < 4; j++)
-                            segment_counts[j] += cpi->mb_row_ei[i].segment_counts[j];
-                    }
-                }
-            }
-
-            for (i = 0; i < cpi->encoding_thread_count; i++)
-            {
-                totalrate += cpi->mb_row_ei[i].totalrate;
-            }
-
-        }
-        else
-#endif
-        {
-            // for each macroblock row in image
+            // for each macroblock row in the image
            for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
            {
+                encode_mb_row(cpi, cm, mb_row, x, xd, &tp, &totalrate);

-                vp8_zero(cm->left_context)
-
-                encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
-
-                // adjust to the next row of mbs
+                // adjust to the next row of MBs
                x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
                x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
                x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
@ -843,43 +758,6 @@ void vp8_encode_frame(VP8_COMP *cpi)

    }

-
-    // Work out the segment probabilites if segmentation is enabled
-    if (xd->segmentation_enabled)
-    {
-        int tot_count;
-        int i;
-
-        // Set to defaults
-        vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
-
-        tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3];
-
-        if (tot_count)
-        {
-            xd->mb_segment_tree_probs[0] = ((segment_counts[0] + segment_counts[1]) * 255) / tot_count;
-
-            tot_count = segment_counts[0] + segment_counts[1];
-
-            if (tot_count > 0)
-            {
-                xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) / tot_count;
-            }
-
-            tot_count = segment_counts[2] + segment_counts[3];
-
-            if (tot_count > 0)
-                xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count;
-
-            // Zero probabilities not allowed
-            for (i = 0; i < MB_FEATURE_TREE_PROBS; i ++)
-            {
-                if (xd->mb_segment_tree_probs[i] == 0)
-                    xd->mb_segment_tree_probs[i] = 1;
-            }
-        }
-    }
-
    // 256 rate units to the bit
    cpi->projected_frame_size = totalrate >> 8;   // projected_frame_size in units of BYTES

@ -932,44 +810,95 @@ void vp8_encode_frame(VP8_COMP *cpi)
    }
 #endif

-    // Adjust the projected reference frame useage probability numbers to reflect
-    // what we have just seen. This may be usefull when we make multiple itterations
-    // of the recode loop rather than continuing to use values from the previous frame.
-    if ((cm->frame_type != KEY_FRAME) && !cm->refresh_alt_ref_frame && !cm->refresh_golden_frame)
-    {
-        const int *const rfct = cpi->count_mb_ref_frame_usage;
-        const int rf_intra = rfct[INTRA_FRAME];
-        const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
-
-        if ((rf_intra + rf_inter) > 0)
-        {
-            cpi->prob_intra_coded = (rf_intra * 255) / (rf_intra + rf_inter);
-
-            if (cpi->prob_intra_coded < 1)
-                cpi->prob_intra_coded = 1;
-
-            if ((cm->frames_since_golden > 0) || cpi->source_alt_ref_active)
-            {
-                cpi->prob_last_coded = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128;
-
-                if (cpi->prob_last_coded < 1)
-                    cpi->prob_last_coded = 1;
-
-                cpi->prob_gf_coded = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME])
-                                     ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128;
-
-                if (cpi->prob_gf_coded < 1)
-                    cpi->prob_gf_coded = 1;
-            }
-        }
-    }
-
 #if 0
    // Keep record of the total distortion this time around for future use
    cpi->last_frame_distortion = cpi->frame_distortion;
 #endif

 }
+
+void vp8_encode_frame(VP8_COMP *cpi)
+{
+    if (cpi->sf.RD)
+    {
+        int frame_type, pred_type;
+        int redo = 0;
+        int single_diff, comp_diff, hybrid_diff;
+
+        /*
+         * This code does a single RD pass over the whole frame assuming
+         * either compound, single or hybrid prediction as per whatever has
+         * worked best for that type of frame in the past.
+         * It also predicts whether another coding mode would have worked
+         * better that this coding mode. If that is the case, it remembers
+         * that for subsequent frames. If the difference is above a certain
+         * threshold, it will actually re-encode the current frame using
+         * that different coding mode.
+         */
+        if (cpi->common.frame_type == KEY_FRAME)
+            frame_type = 0;
+        else if (cpi->is_src_frame_alt_ref && cpi->common.refresh_golden_frame)
+            frame_type = 3;
+        else if (cpi->common.refresh_golden_frame || cpi->common.refresh_alt_ref_frame)
+            frame_type = 1;
+        else
+            frame_type = 2;
+
+        if (cpi->rd_prediction_type_threshes[frame_type][1] >
+                cpi->rd_prediction_type_threshes[frame_type][0] &&
+            cpi->rd_prediction_type_threshes[frame_type][1] >
+                cpi->rd_prediction_type_threshes[frame_type][2])
+            pred_type = COMP_PREDICTION_ONLY;
+        else if (cpi->rd_prediction_type_threshes[frame_type][0] >
+                    cpi->rd_prediction_type_threshes[frame_type][1] &&
+                 cpi->rd_prediction_type_threshes[frame_type][0] >
+                    cpi->rd_prediction_type_threshes[frame_type][2])
+            pred_type = SINGLE_PREDICTION_ONLY;
+        else
+            pred_type = HYBRID_PREDICTION;
+
+        cpi->common.comp_pred_mode = pred_type;
+        encode_frame_internal(cpi);
+
+        single_diff = cpi->rd_single_diff / cpi->common.MBs;
+        cpi->rd_prediction_type_threshes[frame_type][0] += single_diff;
+        cpi->rd_prediction_type_threshes[frame_type][0] >>= 1;
+        comp_diff   = cpi->rd_comp_diff   / cpi->common.MBs;
+        cpi->rd_prediction_type_threshes[frame_type][1] += comp_diff;
+        cpi->rd_prediction_type_threshes[frame_type][1] >>= 1;
+        hybrid_diff = cpi->rd_hybrid_diff / cpi->common.MBs;
+        cpi->rd_prediction_type_threshes[frame_type][2] += hybrid_diff;
+        cpi->rd_prediction_type_threshes[frame_type][2] >>= 1;
+
+        if (cpi->common.comp_pred_mode == HYBRID_PREDICTION)
+        {
+            int single_count_zero = 0;
+            int comp_count_zero = 0;
+            int i;
+
+            for ( i = 0; i < COMP_PRED_CONTEXTS; i++ )
+            {
+                single_count_zero += cpi->single_pred_count[i];
+                comp_count_zero += cpi->comp_pred_count[i];
+            }
+
+            if (comp_count_zero == 0)
+            {
+                cpi->common.comp_pred_mode = SINGLE_PREDICTION_ONLY;
+            }
+            else if (single_count_zero == 0)
+            {
+                cpi->common.comp_pred_mode = COMP_PREDICTION_ONLY;
+            }
+        }
+    }
+    else
+    {
+        encode_frame_internal(cpi);
+    }
+
+}
+
 void vp8_setup_block_ptrs(MACROBLOCK *x)
 {
    int r, c;
@ -1070,6 +999,7 @@ static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
    const int is_key = cpi->common.frame_type == KEY_FRAME;

    ++ (is_key ? uv_modes : inter_uv_modes)[uvm];
+    ++ uv_modes_y[m][uvm];

    if (m == B_PRED)
    {
@ -1079,11 +1009,18 @@ static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)

        do
        {
-            ++ bct[xd->block[b].bmi.mode];
+            ++ bct[xd->block[b].bmi.as_mode.first];
        }
        while (++b < 16);
    }

+    if(m==I8X8_PRED)
+    {
+        i8x8_modes[xd->block[0].bmi.as_mode.first]++;
+        i8x8_modes[xd->block[2].bmi.as_mode.first]++;
+        i8x8_modes[xd->block[8].bmi.as_mode.first]++;
+        i8x8_modes[xd->block[10].bmi.as_mode.first]++;
+    }
 #endif

    ++cpi->ymode_count[m];
@ -1117,10 +1054,11 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
 {
    int rate;

-    if (cpi->sf.RD && cpi->compressor_speed != 2)
-        vp8_rd_pick_intra_mode(cpi, x, &rate);
-    else
-        vp8_pick_intra_mode(cpi, x, &rate);
+    // Non rd path deprecated in test code base
+    //if (cpi->sf.RD && cpi->compressor_speed != 2)
+    vp8_rd_pick_intra_mode(cpi, x, &rate);
+    //else
+    //   vp8_pick_intra_mode(cpi, x, &rate);

    if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
    {
@ -1128,12 +1066,32 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
        vp8_update_zbin_extra(cpi, x);
    }

-    if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED)
+    /* test code: set transform size based on mode selection */
+    if(cpi->common.txfm_mode == ALLOW_8X8
+        && x->e_mbd.mode_info_context->mbmi.mode != I8X8_PRED
+        && x->e_mbd.mode_info_context->mbmi.mode != B_PRED)
+    {
+        x->e_mbd.mode_info_context->mbmi.txfm_size = TX_8X8;
+        cpi->t8x8_count++;
+    }
+    else
+    {
+        x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4;
+        cpi->t4x4_count ++;
+    }
+
+    if(x->e_mbd.mode_info_context->mbmi.mode == I8X8_PRED)
+    {
+        vp8_encode_intra8x8mby(IF_RTCD(&cpi->rtcd), x);
+        vp8_encode_intra8x8mbuv(IF_RTCD(&cpi->rtcd), x);
+    }
+    else if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED)
        vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
    else
        vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);

-    vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
+    if(x->e_mbd.mode_info_context->mbmi.mode != I8X8_PRED)
+        vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
    sum_intra_stats(cpi, x);
    vp8_tokenize_mb(cpi, &x->e_mbd, t);

@ -1151,21 +1109,27 @@ int vp8cx_encode_inter_macroblock
    int recon_yoffset, int recon_uvoffset
 )
 {
+    VP8_COMMON *cm = &cpi->common;
    MACROBLOCKD *const xd = &x->e_mbd;
    int intra_error = 0;
    int rate;
    int distortion;
+    unsigned char *segment_id = &xd->mode_info_context->mbmi.segment_id;
+    int seg_ref_active;
+     unsigned char ref_pred_flag;

    x->skip = 0;

    if (xd->segmentation_enabled)
-        x->encode_breakout = cpi->segment_encode_breakout[xd->mode_info_context->mbmi.segment_id];
+        x->encode_breakout = cpi->segment_encode_breakout[*segment_id];
    else
        x->encode_breakout = cpi->oxcf.encode_breakout;

-    if (cpi->sf.RD)
+    //if (cpi->sf.RD)
+    // For now this codebase is limited to a single rd encode path
    {
        int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
+        int single, compound, hybrid;

        /* Are we using the fast quantizer for the mode selection? */
        if(cpi->sf.use_fastquant_for_pick)
@ -1180,7 +1144,39 @@ int vp8cx_encode_inter_macroblock
            cpi->zbin_mode_boost_enabled = 0;
        }
        vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,
-                               &distortion, &intra_error);
+                               &distortion, &intra_error, &single, &compound, &hybrid);
+
+        cpi->rd_single_diff += single;
+        cpi->rd_comp_diff   += compound;
+        cpi->rd_hybrid_diff += hybrid;
+        if (x->e_mbd.mode_info_context->mbmi.ref_frame &&
+            x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
+        {
+            unsigned char pred_context;
+
+            pred_context = get_pred_context( cm, xd, PRED_COMP );
+
+            if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME)
+                cpi->single_pred_count[pred_context]++;
+            else
+                cpi->comp_pred_count[pred_context]++;
+        }
+
+
+        /* test code: set transform size based on mode selection */
+        if( cpi->common.txfm_mode == ALLOW_8X8
+            && x->e_mbd.mode_info_context->mbmi.mode != I8X8_PRED
+            && x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+            && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
+        {
+            x->e_mbd.mode_info_context->mbmi.txfm_size = TX_8X8;
+            cpi->t8x8_count ++;
+        }
+        else
+        {
+            x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4;
+            cpi->t4x4_count++;
+        }

        /* switch back to the regular quantizer for the encode */
        if (cpi->sf.improved_quant)
@ -1190,14 +1186,14 @@ int vp8cx_encode_inter_macroblock
            cpi->mb.quantize_b_pair = QUANTIZE_INVOKE(&cpi->rtcd.quantize,
                                                      quantb_pair);
        }
-
        /* restore cpi->zbin_mode_boost_enabled */
        cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;

    }
-    else
-        vp8_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,
-                            &distortion, &intra_error);
+    //else
+    // The non rd encode path has been deleted from this code base
+    // to simplify development
+    //    vp8_pick_inter_mode

    cpi->prediction_error += distortion;
    cpi->intra_error += intra_error;
@ -1208,30 +1204,6 @@ int vp8cx_encode_inter_macroblock
        adjust_act_zbin( cpi, x );
    }

-#if 0
-    // Experimental RD code
-    cpi->frame_distortion += distortion;
-    cpi->last_mb_distortion = distortion;
-#endif
-
-    // MB level adjutment to quantizer setup
-    if (xd->segmentation_enabled)
-    {
-        // If cyclic update enabled
-        if (cpi->cyclic_refresh_mode_enabled)
-        {
-            // Clear segment_id back to 0 if not coded (last frame 0,0)
-            if ((xd->mode_info_context->mbmi.segment_id == 1) &&
-                ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV)))
-            {
-                xd->mode_info_context->mbmi.segment_id = 0;
-
-                /* segment_id changed, so update */
-                vp8cx_mb_init_quantizer(cpi, x);
-            }
-        }
-    }
-
    {
        // Experimental code. Special case for gf and arf zeromv modes.
        // Increase zbin size to supress noise
@ -1260,21 +1232,56 @@ int vp8cx_encode_inter_macroblock
            vp8_update_zbin_extra(cpi, x);
    }

-    cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
+    seg_ref_active = segfeature_active( xd, *segment_id, SEG_LVL_REF_FRAME );
+
+    // SET VARIOUS PREDICTION FLAGS
+
+    // Did the chosen reference frame match its predicted value.
+    ref_pred_flag = ( (xd->mode_info_context->mbmi.ref_frame ==
+                           get_pred_ref( cm, xd )) );
+    set_pred_flag( xd, PRED_REF, ref_pred_flag );
+
+    // If we have just a single reference frame coded for a segment then
+    // exclude from the reference frame counts used to work out
+    // probabilities. NOTE: At the moment we dont support custom trees
+    // for the reference frame coding for each segment but this is a
+    // possible future action.
+    if ( !seg_ref_active ||
+         ( ( check_segref( xd, *segment_id, INTRA_FRAME ) +
+             check_segref( xd, *segment_id, LAST_FRAME ) +
+             check_segref( xd, *segment_id, GOLDEN_FRAME ) +
+             check_segref( xd, *segment_id, ALTREF_FRAME ) ) > 1 ) )
+    {
+// TODO this may not be a good idea as it makes sample size small and means
+// the predictor functions cannot use data about most likely value only most
+// likely unpredicted value.
+//#if CONFIG_COMPRED
+//        // Only update count for incorrectly predicted cases
+//        if ( !ref_pred_flag )
+//#endif
+        {
+            cpi->count_mb_ref_frame_usage
+                [xd->mode_info_context->mbmi.ref_frame]++;
+        }
+    }

    if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
    {
-        vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
-
        if (xd->mode_info_context->mbmi.mode == B_PRED)
        {
+            vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
            vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
        }
+        else if(xd->mode_info_context->mbmi.mode == I8X8_PRED)
+        {
+            vp8_encode_intra8x8mby(IF_RTCD(&cpi->rtcd), x);
+            vp8_encode_intra8x8mbuv(IF_RTCD(&cpi->rtcd), x);
+        }
        else
        {
+            vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
            vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
        }
-
        sum_intra_stats(cpi, x);
    }
    else
@ -1292,6 +1299,24 @@ int vp8cx_encode_inter_macroblock
        xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
        xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;

+        if (xd->mode_info_context->mbmi.second_ref_frame) {
+            int second_ref_fb_idx;
+
+            if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME)
+                second_ref_fb_idx = cpi->common.lst_fb_idx;
+            else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME)
+                second_ref_fb_idx = cpi->common.gld_fb_idx;
+            else
+                second_ref_fb_idx = cpi->common.alt_fb_idx;
+
+            xd->second_pre.y_buffer = cpi->common.yv12_fb[second_ref_fb_idx].y_buffer +
+                                            recon_yoffset;
+            xd->second_pre.u_buffer = cpi->common.yv12_fb[second_ref_fb_idx].u_buffer +
+                                            recon_uvoffset;
+            xd->second_pre.v_buffer = cpi->common.yv12_fb[second_ref_fb_idx].v_buffer +
+                                            recon_uvoffset;
+        }
+
        if (!x->skip)
        {
            vp8_encode_inter16x16(IF_RTCD(&cpi->rtcd), x);
@ -1302,14 +1327,40 @@ int vp8cx_encode_inter_macroblock

        }
        else
+        {
            vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer,
                                           xd->dst.u_buffer, xd->dst.v_buffer,
                                           xd->dst.y_stride, xd->dst.uv_stride);
-
+        }
    }

    if (!x->skip)
+    {
+#ifdef ENC_DEBUG
+        if (enc_debug)
+        {
+          int i;
+            printf("Segment=%d [%d, %d]: %d %d:\n", x->e_mbd.mode_info_context->mbmi.segment_id, mb_col_debug, mb_row_debug, xd->mb_to_left_edge, xd->mb_to_top_edge);
+            for (i =0; i<400; i++) {
+              printf("%3d ", xd->qcoeff[i]);
+              if (i%16 == 15) printf("\n");
+            }
+            printf("\n");
+            printf("eobs = ");
+            for (i=0;i<25;i++)
+              printf("%d:%d ", i, xd->block[i].eob);
+            printf("\n");
+            fflush(stdout);
+        }
+#endif
        vp8_tokenize_mb(cpi, xd, t);
+#ifdef ENC_DEBUG
+        if (enc_debug) {
+          printf("Tokenized\n");
+          fflush(stdout);
+        }
+#endif
+    }
    else
    {
        if (cpi->common.mb_no_coeff_skip)
@ -1325,6 +1376,5 @@ int vp8cx_encode_inter_macroblock
            cpi->skip_false_count ++;
        }
    }
-
    return rate;
 }
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@ -9,7 +9,7 @@
 */


-#include "vpx_config.h"
+#include "vpx_ports/config.h"
 #include "vp8/common/idct.h"
 #include "quantize.h"
 #include "vp8/common/reconintra.h"
@ -22,22 +22,29 @@
 #include "encodeintra.h"


+#ifdef ENC_DEBUG
+extern int enc_debug;
+#endif
+
 #if CONFIG_RUNTIME_CPU_DETECT
 #define IF_RTCD(x) (x)
 #else
 #define IF_RTCD(x) NULL
 #endif

-int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
+int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_16x16_pred)
 {

    int i;
    int intra_pred_var = 0;
    (void) cpi;

-    if (use_dc_pred)
+    if (use_16x16_pred)
    {
        x->e_mbd.mode_info_context->mbmi.mode = DC_PRED;
+#if CONFIG_COMP_INTRA_PRED
+        x->e_mbd.mode_info_context->mbmi.second_mode = (MB_PREDICTION_MODE) (DC_PRED - 1);
+#endif
        x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
        x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;

@ -47,7 +54,7 @@ int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
    {
        for (i = 0; i < 16; i++)
        {
-            x->e_mbd.block[i].bmi.as_mode = B_DC_PRED;
+            x->e_mbd.block[i].bmi.as_mode.first = B_DC_PRED;
            vp8_encode_intra4x4block(IF_RTCD(&cpi->rtcd), x, i);
        }
    }
@ -63,8 +70,20 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd,
    BLOCKD *b = &x->e_mbd.block[ib];
    BLOCK *be = &x->block[ib];

+#if CONFIG_COMP_INTRA_PRED
+    if (b->bmi.as_mode.second == (B_PREDICTION_MODE) (B_DC_PRED - 1))
+    {
+#endif
    RECON_INVOKE(&rtcd->common->recon, intra4x4_predict)
-                (b, b->bmi.as_mode, b->predictor);
+                (b, b->bmi.as_mode.first, b->predictor);
+#if CONFIG_COMP_INTRA_PRED
+    }
+    else
+    {
+        RECON_INVOKE(&rtcd->common->recon, comp_intra4x4_predict)
+            (b, b->bmi.as_mode.first, b->bmi.as_mode.second, b->predictor);
+    }
+#endif

    ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 16);

@ -93,18 +112,72 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 {
    BLOCK *b = &x->block[0];

+    int tx_type = x->e_mbd.mode_info_context->mbmi.txfm_size;
+
+#if CONFIG_COMP_INTRA_PRED
+    if (x->e_mbd.mode_info_context->mbmi.second_mode == (MB_PREDICTION_MODE) (DC_PRED - 1))
+#endif
    RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mby)(&x->e_mbd);
+#if CONFIG_COMP_INTRA_PRED
+    else
+        RECON_INVOKE(&rtcd->common->recon, build_comp_intra_predictors_mby)(&x->e_mbd);
+#endif

    ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride);

-    vp8_transform_intra_mby(x);
+    if( tx_type == TX_8X8 )
+        vp8_transform_intra_mby_8x8(x);
+    else
+        vp8_transform_intra_mby(x);

-    vp8_quantize_mby(x);
+    if(tx_type == TX_8X8)
+      vp8_quantize_mby_8x8(x);
+    else
+      vp8_quantize_mby(x);

    if (x->optimize)
+    {
+      if( tx_type == TX_8X8 )
+        vp8_optimize_mby_8x8(x, rtcd);
+      else
        vp8_optimize_mby(x, rtcd);
+    }

-    vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
+    if(tx_type == TX_8X8)
+      vp8_inverse_transform_mby_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
+    else
+      vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
+
+#ifdef ENC_DEBUG
+    if (enc_debug) {
+      int i;
+      printf("Intra qcoeff:\n");
+      printf("%d %d:\n", x->e_mbd.mb_to_left_edge, x->e_mbd.mb_to_top_edge);
+      for (i =0; i<400; i++) {
+        printf("%3d ", x->e_mbd.qcoeff[i]);
+        if (i%16 == 15) printf("\n");
+      }
+      printf("Intra dqcoeff:\n");
+      for (i =0; i<400; i++) {
+        printf("%3d ", x->e_mbd.dqcoeff[i]);
+        if (i%16 == 15) printf("\n");
+      }
+      printf("Intra diff:\n");
+      for (i =0; i<400; i++) {
+        printf("%3d ", x->e_mbd.diff[i]);
+        if (i%16 == 15) printf("\n");
+      }
+      printf("Intra predictor:\n");
+      for (i =0; i<400; i++) {
+        printf("%3d ", x->e_mbd.predictor[i]);
+        if (i%16 == 15) printf("\n");
+      }
+      printf("eobs:\n");
+      for (i=0;i<25;i++)
+        printf("%d ", x->e_mbd.block[i].eob);
+      printf("\n");
+    }
+#endif

    RECON_INVOKE(&rtcd->common->recon, recon_mby)
        (IF_RTCD(&rtcd->common->recon), &x->e_mbd);
@ -113,18 +186,181 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)

 void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 {
+    int tx_type = x->e_mbd.mode_info_context->mbmi.txfm_size;
+#if CONFIG_COMP_INTRA_PRED
+    if (x->e_mbd.mode_info_context->mbmi.second_uv_mode == (MB_PREDICTION_MODE) (DC_PRED - 1))
+    {
+#endif
    RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mbuv)(&x->e_mbd);
+#if CONFIG_COMP_INTRA_PRED
+    }
+    else
+    {
+        RECON_INVOKE(&rtcd->common->recon, build_comp_intra_predictors_mbuv)(&x->e_mbd);
+    }
+#endif

    ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
+    if(tx_type == TX_8X8)
+        vp8_transform_mbuv_8x8(x);
+    else
+        vp8_transform_mbuv(x);

-    vp8_transform_mbuv(x);
-
-    vp8_quantize_mbuv(x);
+    if(tx_type == TX_8X8)
+        vp8_quantize_mbuv_8x8(x);
+    else
+        vp8_quantize_mbuv(x);

+#ifdef ENC_DEBUG
+    if (enc_debug) {
+      int i;
+      printf("vp8_encode_intra16x16mbuv\n");
+      printf("%d %d:\n", x->e_mbd.mb_to_left_edge, x->e_mbd.mb_to_top_edge);
+      printf("qcoeff:\n");
+      for (i =0; i<400; i++) {
+        printf("%3d ", x->e_mbd.qcoeff[i]);
+        if (i%16 == 15) printf("\n");
+      }
+      printf("dqcoeff:\n");
+      for (i =0; i<400; i++) {
+        printf("%3d ", x->e_mbd.dqcoeff[i]);
+        if (i%16 == 15) printf("\n");
+      }
+      printf("diff:\n");
+      for (i =0; i<400; i++) {
+        printf("%3d ", x->e_mbd.diff[i]);
+        if (i%16 == 15) printf("\n");
+      }
+      printf("predictor:\n");
+      for (i =0; i<400; i++) {
+        printf("%3d ", x->e_mbd.predictor[i]);
+        if (i%16 == 15) printf("\n");
+      }
+      printf("eobs:\n");
+      for (i=0;i<25;i++)
+        printf("%d ", x->e_mbd.block[i].eob);
+      printf("\n");
+    }
+#endif
    if (x->optimize)
+    {
+      if(tx_type == TX_8X8)
+        vp8_optimize_mbuv_8x8(x, rtcd);
+      else
        vp8_optimize_mbuv(x, rtcd);
+    }

-    vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
+    if(tx_type == TX_8X8)
+        vp8_inverse_transform_mbuv_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
+    else
+        vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd);

    vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
 }
+
+void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd,
+                              MACROBLOCK *x, int ib)
+{
+    BLOCKD *b = &x->e_mbd.block[ib];
+    BLOCK *be = &x->block[ib];
+    const int iblock[4]={0,1,4,5};
+    int i;
+
+#if CONFIG_COMP_INTRA_PRED
+    if (b->bmi.as_mode.second == (MB_PREDICTION_MODE) (DC_PRED - 1))
+    {
+#endif
+    RECON_INVOKE(&rtcd->common->recon, intra8x8_predict)
+                (b, b->bmi.as_mode.first, b->predictor);
+#if CONFIG_COMP_INTRA_PRED
+    }
+    else
+    {
+        RECON_INVOKE(&rtcd->common->recon, comp_intra8x8_predict)
+            (b, b->bmi.as_mode.first, b->bmi.as_mode.second, b->predictor);
+    }
+#endif
+
+    for(i=0;i<4;i++)
+    {
+        b = &x->e_mbd.block[ib + iblock[i]];
+        be = &x->block[ib + iblock[i]];
+        ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 16);
+        x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
+        x->quantize_b(be, b);
+        vp8_inverse_transform_b(IF_RTCD(&rtcd->common->idct), b, 32);
+        RECON_INVOKE(&rtcd->common->recon, recon)(b->predictor,
+            b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+    }
+
+}
+
+extern const int vp8_i8x8_block[4];
+void vp8_encode_intra8x8mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
+{
+    int i, ib;
+
+    for(i=0;i<4;i++)
+    {
+        ib = vp8_i8x8_block[i];
+        vp8_encode_intra8x8(rtcd, x, ib);
+    }
+
+}
+
+void vp8_encode_intra_uv4x4(const VP8_ENCODER_RTCD *rtcd,
+                              MACROBLOCK *x, int ib,
+                              int mode, int second)
+{
+    BLOCKD *b = &x->e_mbd.block[ib];
+    BLOCK *be = &x->block[ib];
+
+#if CONFIG_COMP_INTRA_PRED
+    if (second == -1)
+    {
+#endif
+    RECON_INVOKE(&rtcd->common->recon, intra_uv4x4_predict)
+                (b, mode, b->predictor);
+#if CONFIG_COMP_INTRA_PRED
+    }
+    else
+    {
+        RECON_INVOKE(&rtcd->common->recon, comp_intra_uv4x4_predict)
+            (b, mode, second, b->predictor);
+    }
+#endif
+
+    ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 8);
+
+    x->vp8_short_fdct4x4(be->src_diff, be->coeff, 16);
+
+    x->quantize_b(be, b);
+
+    vp8_inverse_transform_b(IF_RTCD(&rtcd->common->idct), b, 16);
+
+    RECON_INVOKE(&rtcd->common->recon, recon_uv)(b->predictor,
+        b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+}
+
+
+
+void vp8_encode_intra8x8mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
+{
+    int i, ib, mode, second;
+    BLOCKD *b;
+    for(i=0;i<4;i++)
+    {
+        ib = vp8_i8x8_block[i];
+        b = &x->e_mbd.block[ib];
+        mode = b->bmi.as_mode.first;
+#if CONFIG_COMP_INTRA_PRED
+        second = b->bmi.as_mode.second;
+#else
+        second = -1;
+#endif
+        /*u */
+        vp8_encode_intra_uv4x4(rtcd, x, i+16, mode, second);
+        /*v */
+        vp8_encode_intra_uv4x4(rtcd, x, i+20, mode, second);
+    }
+}
--- a/vp8/encoder/encodeintra.h
+++ b/vp8/encoder/encodeintra.h
@ -13,10 +13,15 @@
 #define _ENCODEINTRA_H_
 #include "onyx_int.h"

-int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred);
+int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_16x16_pred);
 void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *, MACROBLOCK *x);
 void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *, MACROBLOCK *x);
 void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *, MACROBLOCK *mb);
 void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd,
                              MACROBLOCK *x, int ib);
+void vp8_encode_intra8x8mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x);
+void vp8_encode_intra8x8mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x);
+void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd,
+                              MACROBLOCK *x, int ib);
+
 #endif
--- a/Показать больше
+++ b/Показать больше