Merge branch 'master' into experimental

Conflicts: vp9/common/vp9_findnearmv.c vp9/common/vp9_rtcd_defs.sh vp9/decoder/vp9_decodframe.c vp9/decoder/x86/vp9_dequantize_sse2.c vp9/encoder/vp9_rdopt.c vp9/vp9_common.mk Resolve file name changes in favor of master. Resolve rdopt changes in favor of experimental, preserving the newer experiments. Change-Id: If51ed8f457470281c7b20a5c1a2f4ce2cf76c20f
2013-04-26 11:40:43 -07:00 · 2013-04-26 11:40:43 -07:00 · 32a5c52856
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@ -48,7 +48,7 @@
 # Running ndk-build will build libvpx and include it in your project.
 #

-CONFIG_DIR := $(LOCAL_PATH)
+CONFIG_DIR := $(LOCAL_PATH)/
 LIBVPX_PATH := $(LOCAL_PATH)/libvpx
 ASM_CNV_PATH_LOCAL := $(TARGET_ARCH_ABI)/ads2gas
 ASM_CNV_PATH := $(LOCAL_PATH)/$(ASM_CNV_PATH_LOCAL)
@ -56,9 +56,9 @@ ASM_CNV_PATH := $(LOCAL_PATH)/$(ASM_CNV_PATH_LOCAL)
 # Makefiles created by the libvpx configure process
 # This will need to be fixed to handle x86.
 ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
-  include $(CONFIG_DIR)/libs-armv7-android-gcc.mk
+  include $(CONFIG_DIR)libs-armv7-android-gcc.mk
 else
-  include $(CONFIG_DIR)/libs-armv5te-android-gcc.mk
+  include $(CONFIG_DIR)libs-armv5te-android-gcc.mk
 endif

 # Rule that is normally in Makefile created by libvpx
@ -106,26 +106,25 @@ $$(eval $$(call ev-build-file))

 $(1) : $$(_OBJ) $(2)
 	@mkdir -p $$(dir $$@)
-	@grep $(OFFSET_PATTERN) $$< | tr -d '\#' | $(CONFIG_DIR)/$(ASM_CONVERSION) > $$@
+	@grep $(OFFSET_PATTERN) $$< | tr -d '\#' | $(CONFIG_DIR)$(ASM_CONVERSION) > $$@
 endef

 # Use ads2gas script to convert from RVCT format to GAS format.  This passes
 #  puts the processed file under $(ASM_CNV_PATH).  Local clean rule
 #  to handle removing these
-ASM_CNV_OFFSETS_DEPEND = $(ASM_CNV_PATH)/vp8_asm_com_offsets.asm
-ifeq ($(CONFIG_VP8_DECODER), yes)
-  ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vp8_asm_dec_offsets.asm
-endif
 ifeq ($(CONFIG_VP8_ENCODER), yes)
  ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm
 endif
+ifeq ($(HAVE_NEON), yes)
+  ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm
+endif

 .PRECIOUS: %.asm.s
 $(ASM_CNV_PATH)/libvpx/%.asm.s: $(LIBVPX_PATH)/%.asm $(ASM_CNV_OFFSETS_DEPEND)
 	@mkdir -p $(dir $@)
-	@$(CONFIG_DIR)/$(ASM_CONVERSION) <$< > $@
+	@$(CONFIG_DIR)$(ASM_CONVERSION) <$< > $@

-# For building vpx_rtcd.h, which has a rule in libs.mk
+# For building *_rtcd.h, which have rules in libs.mk
 TGT_ISA:=$(word 1, $(subst -, ,$(TOOLCHAIN)))
 target := libs

@ -177,7 +176,14 @@ ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
  LOCAL_STATIC_LIBRARIES := cpufeatures
 endif

-$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_rtcd.h
+# Add a dependency to force generation of the RTCD files.
+ifeq ($(CONFIG_VP8), yes)
+$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp8_rtcd.h
+endif
+ifeq ($(CONFIG_VP9), yes)
+$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp9_rtcd.h
+endif
+$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_scale_rtcd.h

 .PHONY: clean
 clean:
@ -189,14 +195,10 @@ clean:

 include $(BUILD_SHARED_LIBRARY)

-$(eval $(call asm_offsets_template,\
-    $(ASM_CNV_PATH)/vp8_asm_com_offsets.asm, \
-    $(LIBVPX_PATH)/vp8/common/vp8_asm_com_offsets.c))
-
-ifeq ($(CONFIG_VP8_DECODER), yes)
+ifeq ($(HAVE_NEON), yes)
  $(eval $(call asm_offsets_template,\
-    $(ASM_CNV_PATH)/vp8_asm_dec_offsets.asm, \
-    $(LIBVPX_PATH)/vp8/decoder/vp8_asm_dec_offsets.c))
+    $(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm, \
+    $(LIBVPX_PATH)/vpx_scale/vpx_scale_asm_offsets.c))
 endif

 ifeq ($(CONFIG_VP8_ENCODER), yes)
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@ -918,7 +918,7 @@ process_common_toolchain() {
            add_ldflags -arch_only ${tgt_isa}

            if [ -z "${alt_libc}" ]; then
-                alt_libc=${SDK_PATH}/SDKs/iPhoneOS5.1.sdk
+                alt_libc=${SDK_PATH}/SDKs/iPhoneOS6.0.sdk
            fi

            add_cflags  "-isysroot ${alt_libc}"
@ -1088,10 +1088,12 @@ EOF
            win32)
                add_asflags -f win32
                enabled debug && add_asflags -g cv8
+                EXE_SFX=.exe
            ;;
            win64)
                add_asflags -f x64
                enabled debug && add_asflags -g cv8
+                EXE_SFX=.exe
            ;;
            linux*|solaris*|android*)
                add_asflags -f elf${bits}
--- a/libs.mk
+++ b/libs.mk
@ -51,7 +51,7 @@ $$(BUILD_PFX)$(1).h: $$(SRC_PATH_BARE)/$(2)
 	@echo "    [CREATE] $$@"
 	$$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.sh --arch=$$(TGT_ISA) \
          --sym=$(1) \
-          --config=$$(target)$$(if $$(FAT_ARCHS),,-$$(TOOLCHAIN)).mk \
+          --config=$$(CONFIG_DIR)$$(target)$$(if $$(FAT_ARCHS),,-$$(TOOLCHAIN)).mk \
          $$(RTCD_OPTIONS) $$^ > $$@
 CLEAN-OBJS += $$(BUILD_PFX)$(1).h
 RTCD += $$(BUILD_PFX)$(1).h
@ -436,7 +436,7 @@ test_libvpx.vcproj: $(LIBVPX_TEST_SRCS)
 PROJECTS-$(CONFIG_MSVS) += test_libvpx.vcproj

 test:: testdata
-	@set -e; for t in $(addprefix Win32/Release/,$(notdir $(LIBVPX_TEST_BINS:.cc=.exe))); do $$t; done
+	@set -e; for t in $(addprefix $(TGT_OS:win64=x64)/Release/,$(notdir $(LIBVPX_TEST_BINS:.cc=.exe))); do $$t; done
 endif
 else

--- a/test/acm_random.h
+++ b/test/acm_random.h
@ -11,7 +11,7 @@
 #ifndef LIBVPX_TEST_ACM_RANDOM_H_
 #define LIBVPX_TEST_ACM_RANDOM_H_

-#include <stdlib.h>
+#include "third_party/googletest/src/include/gtest/gtest.h"

 #include "vpx/vpx_integer.h"

@ -19,20 +19,19 @@ namespace libvpx_test {

 class ACMRandom {
 public:
-  ACMRandom() {
-    Reset(DeterministicSeed());
-  }
+  ACMRandom() : random_(DeterministicSeed()) {}

-  explicit ACMRandom(int seed) {
-    Reset(seed);
-  }
+  explicit ACMRandom(int seed) : random_(seed) {}

  void Reset(int seed) {
-    srand(seed);
+    random_.Reseed(seed);
  }

  uint8_t Rand8(void) {
-    return (rand() >> 8) & 0xff;
+    const uint32_t value =
+        random_.Generate(testing::internal::Random::kMaxRange);
+    // There's a bit more entropy in the upper bits of this implementation.
+    return (value >> 24) & 0xff;
  }

  uint8_t Rand8Extremes(void) {
@ -43,7 +42,7 @@ class ACMRandom {
  }

  int PseudoUniform(int range) {
-    return (rand() >> 8) % range;
+    return random_.Generate(range);
  }

  int operator()(int n) {
@ -53,6 +52,9 @@ class ACMRandom {
  static int DeterministicSeed(void) {
    return 0xbaba;
  }
+
+ private:
+  testing::internal::Random random_;
 };

 }  // namespace libvpx_test
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@ -51,11 +51,15 @@ TEST(VP9Fdct8x8Test, SignBiasCheck) {
  }

  for (int j = 0; j < 64; ++j) {
-    const bool bias_acceptable = (abs(count_sign_block[j][0] -
-                                      count_sign_block[j][1]) < 1000);
-    EXPECT_TRUE(bias_acceptable)
-        << "Error: 8x8 FDCT has a sign bias > 1%"
-        << " for input range [-255, 255] at index " << j;
+    const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
+    const int max_diff = 1125;
+    EXPECT_LT(diff, max_diff)
+        << "Error: 8x8 FDCT has a sign bias > "
+        << 1. * max_diff / count_test_block * 100 << "%"
+        << " for input range [-255, 255] at index " << j
+        << " count0: " << count_sign_block[j][0]
+        << " count1: " << count_sign_block[j][1]
+        << " diff: " << diff;
  }

  memset(count_sign_block, 0, sizeof(count_sign_block));
@ -76,11 +80,15 @@ TEST(VP9Fdct8x8Test, SignBiasCheck) {
  }

  for (int j = 0; j < 64; ++j) {
-    const bool bias_acceptable = (abs(count_sign_block[j][0] -
-                                      count_sign_block[j][1]) < 10000);
-    EXPECT_TRUE(bias_acceptable)
-        << "Error: 8x8 FDCT has a sign bias > 10%"
-        << " for input range [-15, 15] at index " << j;
+    const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
+    const int max_diff = 10000;
+    EXPECT_LT(diff, max_diff)
+        << "Error: 4x4 FDCT has a sign bias > "
+        << 1. * max_diff / count_test_block * 100 << "%"
+        << " for input range [-15, 15] at index " << j
+        << " count0: " << count_sign_block[j][0]
+        << " count1: " << count_sign_block[j][1]
+        << " diff: " << diff;
  }
 };

--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@ -439,29 +439,28 @@ static void fillrd(struct postproc_state *state, int q, int a)
    char char_dist[300];

    double sigma;
-    int ai = a, qi = q, i;
+    int i;

    vp8_clear_system_state();


-    sigma = ai + .5 + .6 * (63 - qi) / 63.0;
+    sigma = a + .5 + .6 * (63 - q) / 63.0;

    /* set up a lookup table of 256 entries that matches
     * a gaussian distribution with sigma determined by q.
     */
    {
-        double i;
        int next, j;

        next = 0;

        for (i = -32; i < 32; i++)
        {
-            int a = (int)(.5 + 256 * vp8_gaussian(sigma, 0, i));
+            const int v = (int)(.5 + 256 * vp8_gaussian(sigma, 0, i));

-            if (a)
+            if (v)
            {
-                for (j = 0; j < a; j++)
+                for (j = 0; j < v; j++)
                {
                    char_dist[next+j] = (char) i;
                }
@ -544,12 +543,12 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
 * filled with the same color block.
 */
 void vp8_blend_mb_inner_c (unsigned char *y, unsigned char *u, unsigned char *v,
-                        int y1, int u1, int v1, int alpha, int stride)
+                        int y_1, int u_1, int v_1, int alpha, int stride)
 {
    int i, j;
-    int y1_const = y1*((1<<16)-alpha);
-    int u1_const = u1*((1<<16)-alpha);
-    int v1_const = v1*((1<<16)-alpha);
+    int y1_const = y_1*((1<<16)-alpha);
+    int u1_const = u_1*((1<<16)-alpha);
+    int v1_const = v_1*((1<<16)-alpha);

    y += 2*stride + 2;
    for (i = 0; i < 12; i++)
@ -582,12 +581,12 @@ void vp8_blend_mb_inner_c (unsigned char *y, unsigned char *u, unsigned char *v,
 * unblended to allow for other visualizations to be layered.
 */
 void vp8_blend_mb_outer_c (unsigned char *y, unsigned char *u, unsigned char *v,
-                        int y1, int u1, int v1, int alpha, int stride)
+                        int y_1, int u_1, int v_1, int alpha, int stride)
 {
    int i, j;
-    int y1_const = y1*((1<<16)-alpha);
-    int u1_const = u1*((1<<16)-alpha);
-    int v1_const = v1*((1<<16)-alpha);
+    int y1_const = y_1*((1<<16)-alpha);
+    int u1_const = u_1*((1<<16)-alpha);
+    int v1_const = v_1*((1<<16)-alpha);

    for (i = 0; i < 2; i++)
    {
@ -646,12 +645,12 @@ void vp8_blend_mb_outer_c (unsigned char *y, unsigned char *u, unsigned char *v,
 }

 void vp8_blend_b_c (unsigned char *y, unsigned char *u, unsigned char *v,
-                        int y1, int u1, int v1, int alpha, int stride)
+                        int y_1, int u_1, int v_1, int alpha, int stride)
 {
    int i, j;
-    int y1_const = y1*((1<<16)-alpha);
-    int u1_const = u1*((1<<16)-alpha);
-    int v1_const = v1*((1<<16)-alpha);
+    int y1_const = y_1*((1<<16)-alpha);
+    int u1_const = u_1*((1<<16)-alpha);
+    int v1_const = v_1*((1<<16)-alpha);

    for (i = 0; i < 4; i++)
    {
@ -676,46 +675,46 @@ void vp8_blend_b_c (unsigned char *y, unsigned char *u, unsigned char *v,
    }
 }

-static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int height)
+static void constrain_line (int x_0, int *x_1, int y_0, int *y_1, int width, int height)
 {
    int dx;
    int dy;

-    if (*x1 > width)
+    if (*x_1 > width)
    {
-        dx = *x1 - x0;
-        dy = *y1 - y0;
+        dx = *x_1 - x_0;
+        dy = *y_1 - y_0;

-        *x1 = width;
+        *x_1 = width;
        if (dx)
-            *y1 = ((width-x0)*dy)/dx + y0;
+            *y_1 = ((width-x_0)*dy)/dx + y_0;
    }
-    if (*x1 < 0)
+    if (*x_1 < 0)
    {
-        dx = *x1 - x0;
-        dy = *y1 - y0;
+        dx = *x_1 - x_0;
+        dy = *y_1 - y_0;

-        *x1 = 0;
+        *x_1 = 0;
        if (dx)
-            *y1 = ((0-x0)*dy)/dx + y0;
+            *y_1 = ((0-x_0)*dy)/dx + y_0;
    }
-    if (*y1 > height)
+    if (*y_1 > height)
    {
-        dx = *x1 - x0;
-        dy = *y1 - y0;
+        dx = *x_1 - x_0;
+        dy = *y_1 - y_0;

-        *y1 = height;
+        *y_1 = height;
        if (dy)
-            *x1 = ((height-y0)*dx)/dy + x0;
+            *x_1 = ((height-y_0)*dx)/dy + x_0;
    }
-    if (*y1 < 0)
+    if (*y_1 < 0)
    {
-        dx = *x1 - x0;
-        dy = *y1 - y0;
+        dx = *x_1 - x_0;
+        dy = *y_1 - y_0;

-        *y1 = 0;
+        *y_1 = 0;
        if (dy)
-            *x1 = ((0-y0)*dx)/dy + x0;
+            *x_1 = ((0-y_0)*dx)/dy + x_0;
    }
 }

--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@ -1365,11 +1365,11 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 #if CONFIG_MULTITHREAD
    if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION)
    {
-        unsigned int i;
+        unsigned int thread;
        vp8mt_decode_mb_rows(pbi, xd);
        vp8_yv12_extend_frame_borders(yv12_fb_new);
-        for (i = 0; i < pbi->decoding_thread_count; ++i)
-            corrupt_tokens |= pbi->mb_row_di[i].mbd.corrupted;
+        for (thread = 0; thread < pbi->decoding_thread_count; ++thread)
+            corrupt_tokens |= pbi->mb_row_di[thread].mbd.corrupted;
    }
    else
 #endif
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@ -343,7 +343,6 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row)

    for (mb_row = start_mb_row; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
    {
-       int i;
       int recon_yoffset, recon_uvoffset;
       int mb_col;
       int filter_level;
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@ -90,17 +90,17 @@ static void update_mode(

    if (new_b + (n << 8) < old_b)
    {
-        int i = 0;
+        int j = 0;

        vp8_write_bit(w, 1);

        do
        {
-            const vp8_prob p = Pnew[i];
+            const vp8_prob p = Pnew[j];

-            vp8_write_literal(w, Pcur[i] = p ? p : 1, 8);
+            vp8_write_literal(w, Pcur[j] = p ? p : 1, 8);
        }
-        while (++i < n);
+        while (++j < n);
    }
    else
        vp8_write_bit(w, 0);
@ -245,15 +245,15 @@ void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)

            if (L)
            {
-                const unsigned char *pp = b->prob;
-                int v = e >> 1;
-                int n = L;              /* number of bits in v, assumed nonzero */
-                int i = 0;
+                const unsigned char *proba = b->prob;
+                const int v2 = e >> 1;
+                int n2 = L;              /* number of bits in v2, assumed nonzero */
+                i = 0;

                do
                {
-                    const int bb = (v >> --n) & 1;
-                    split = 1 + (((range - 1) * pp[i>>1]) >> 8);
+                    const int bb = (v2 >> --n2) & 1;
+                    split = 1 + (((range - 1) * proba[i>>1]) >> 8);
                    i = b->tree[i+bb];

                    if (bb)
@ -301,7 +301,7 @@ void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)

                    lowvalue <<= shift;
                }
-                while (n);
+                while (n2);
            }


--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c
@ -206,8 +206,6 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
        MB_MODE_INFO saved_mbmi;
        MACROBLOCKD *filter_xd = &x->e_mbd;
        MB_MODE_INFO *mbmi = &filter_xd->mode_info_context->mbmi;
-        int mv_col;
-        int mv_row;
        int sse_diff = zero_mv_sse - best_sse;

        saved_mbmi = *mbmi;
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@ -10,6 +10,7 @@


 #include "vpx_config.h"
+#include "vp8_rtcd.h"
 #include "encodemb.h"
 #include "encodemv.h"
 #include "vp8/common/common.h"
@ -852,11 +853,10 @@ void vp8_encode_frame(VP8_COMP *cpi)

            if (xd->segmentation_enabled)
            {
-                int i, j;
+                int j;

                if (xd->segmentation_enabled)
                {
-
                    for (i = 0; i < cpi->encoding_thread_count; i++)
                    {
                        for (j = 0; j < 4; j++)
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@ -233,7 +233,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,

 #if ARCH_X86 || ARCH_X86_64
    MACROBLOCKD *xd = &x->e_mbd;
-    unsigned char *y0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
+    unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    unsigned char *y;
    int buf_r1, buf_r2, buf_c1;

@ -244,7 +244,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    y_stride = 32;

    /* Copy to intermediate buffer before searching. */
-    vfp->copymem(y0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
+    vfp->copymem(y_0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
    y = xd->y_buf + y_stride*buf_r1 +buf_c1;
 #else
    unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
@ -375,12 +375,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,

 #if ARCH_X86 || ARCH_X86_64
    MACROBLOCKD *xd = &x->e_mbd;
-    unsigned char *y0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
+    unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    unsigned char *y;

    y_stride = 32;
    /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
-     vfp->copymem(y0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
+     vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
     y = xd->y_buf + y_stride + 1;
 #else
     unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
@ -686,12 +686,12 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,

 #if ARCH_X86 || ARCH_X86_64
    MACROBLOCKD *xd = &x->e_mbd;
-    unsigned char *y0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
+    unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    unsigned char *y;

    y_stride = 32;
    /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
-    vfp->copymem(y0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
+    vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
    y = xd->y_buf + y_stride + 1;
 #else
    unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@ -825,7 +825,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
        {
            unsigned int sum = 0;
            unsigned int total_mbs = cm->MBs;
-            int i, thresh;
+            int thresh;
            unsigned int total_skip;

            int min = 2000;
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@ -594,6 +594,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
    unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX;
 #endif

+    int sf_improved_mv_pred = cpi->sf.improved_mv_pred;
    int_mv mvp;

    int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
@ -882,7 +883,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
               last frame motion info is not stored, then we can not
               use improved_mv_pred. */
            if (cpi->oxcf.mr_encoder_id && !parent_ref_valid)
-                cpi->sf.improved_mv_pred = 0;
+                sf_improved_mv_pred = 0;

            if (parent_ref_valid && parent_ref_frame)
            {
@ -899,7 +900,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
            }else
 #endif
            {
-                if(cpi->sf.improved_mv_pred)
+                if(sf_improved_mv_pred)
                {
                    if(!saddone)
                    {
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@ -184,17 +184,17 @@ void vp8_strict_quantize_b_c(BLOCK *b, BLOCKD *d)
    for (i = 0; i < 16; i++)
    {
        int dq;
-        int round;
+        int rounding;

        /*TODO: These arrays should be stored in zig-zag order.*/
        rc = vp8_default_zig_zag1d[i];
        z = coeff_ptr[rc];
        dq = dequant_ptr[rc];
-        round = dq >> 1;
+        rounding = dq >> 1;
        /* Sign of z. */
        sz = -(z < 0);
        x = (z + sz) ^ sz;
-        x += round;
+        x += rounding;
        if (x >= dq)
        {
            /* Quantize x. */
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@ -614,7 +614,6 @@ static void calc_gf_params(VP8_COMP *cpi)
 static void calc_pframe_target_size(VP8_COMP *cpi)
 {
    int min_frame_target;
-    int Adjustment;
    int old_per_frame_bandwidth = cpi->per_frame_bandwidth;

    if ( cpi->current_layer > 0)
@ -658,6 +657,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
        /* 1 pass */
        else
        {
+            int Adjustment;
            /* Make rate adjustment to recover bits spent in key frame
             * Test to see if the key frame inter data rate correction
             * should still be in force
@ -688,7 +688,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
             */
            if ((cpi->gf_overspend_bits > 0) && (cpi->this_frame_target > min_frame_target))
            {
-                int Adjustment = (cpi->non_gf_bitrate_adjustment <= cpi->gf_overspend_bits) ? cpi->non_gf_bitrate_adjustment : cpi->gf_overspend_bits;
+                Adjustment = (cpi->non_gf_bitrate_adjustment <= cpi->gf_overspend_bits) ? cpi->non_gf_bitrate_adjustment : cpi->gf_overspend_bits;

                if (Adjustment > (cpi->this_frame_target - min_frame_target))
                    Adjustment = (cpi->this_frame_target - min_frame_target);
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@ -884,8 +884,8 @@ static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,

    for (mode = DC_PRED; mode <= TM_PRED; mode++)
    {
-        int rate;
-        int distortion;
+        int this_rate;
+        int this_distortion;
        int this_rd;

        xd->mode_info_context->mbmi.uv_mode = mode;
@ -907,17 +907,17 @@ static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
        vp8_quantize_mbuv(x);

        rate_to = rd_cost_mbuv(x);
-        rate = rate_to + x->intra_uv_mode_cost[xd->frame_type][xd->mode_info_context->mbmi.uv_mode];
+        this_rate = rate_to + x->intra_uv_mode_cost[xd->frame_type][xd->mode_info_context->mbmi.uv_mode];

-        distortion = vp8_mbuverror(x) / 4;
+        this_distortion = vp8_mbuverror(x) / 4;

-        this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+        this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);

        if (this_rd < best_rd)
        {
            best_rd = this_rd;
-            d = distortion;
-            r = rate;
+            d = this_distortion;
+            r = this_rate;
            *rate_tokenonly = rate_to;
            mode_selected = mode;
        }
@ -1294,12 +1294,11 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,

                if (bestsme < INT_MAX)
                {
-                    int distortion;
+                    int disto;
                    unsigned int sse;
                    cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
                        bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost,
-                        &distortion, &sse);
-
+                        &disto, &sse);
                }
            } /* NEW4X4 */

--- a/vp8/encoder/x86/quantize_sse2_intrinsics.c
+++ b/vp8/encoder/x86/quantize_sse2_intrinsics.c
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@ -89,12 +89,12 @@ VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/subtract_mmx.asm
 VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp8_enc_stubs_mmx.c
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
-VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2_intrinsics.c
+VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c

 # TODO(johann) make this generic
 ifeq ($(HAVE_SSE2),yes)
-vp8/encoder/x86/quantize_sse2_intrinsics.c.o: CFLAGS += -msse2
-vp8/encoder/x86/quantize_sse2_intrinsics.c.d: CFLAGS += -msse2
+vp8/encoder/x86/quantize_sse2.c.o: CFLAGS += -msse2
+vp8/encoder/x86/quantize_sse2.c.d: CFLAGS += -msse2
 endif

 ifeq ($(CONFIG_TEMPORAL_DENOISING),yes)
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@ -15,7 +15,6 @@
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_idct.h"

-#if HAVE_SSE2
 // In order to improve performance, clip absolute diff values to [0, 255],
 // which allows to keep the additions/subtractions in 8 bits.
 void vp9_dc_only_idct_add_sse2(int input_dc, uint8_t *pred_ptr,
@ -1972,4 +1971,3 @@ void vp9_short_idct32x32_sse2(int16_t *input, int16_t *output, int pitch) {
    }
  }
 }
-#endif
--- a/vp9/common/x86/vp9_idct_x86.h
+++ b/vp9/common/x86/vp9_idct_x86.h
@ -1,51 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VP9_COMMON_X86_VP9_IDCT_X86_H_
-#define VP9_COMMON_X86_VP9_IDCT_X86_H_
-
-/* Note:
- *
- * This platform is commonly built for runtime CPU detection. If you modify
- * any of the function mappings present in this file, be sure to also update
- * them in the function pointer initialization code
- */
-
-#if HAVE_MMX
-extern prototype_second_order(vp9_short_inv_walsh4x4_mmx);
-extern prototype_second_order(vp9_short_inv_walsh4x4_1_mmx);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef vp9_idct_iwalsh16
-#define vp9_idct_iwalsh16 vp9_short_inv_walsh4x4_mmx
-
-#undef vp9_idct_iwalsh1
-#define vp9_idct_iwalsh1 vp9_short_inv_walsh4x4_1_mmx
-
-#endif
-#endif
-
-#if HAVE_SSE2
-
-extern prototype_second_order(vp9_short_inv_walsh4x4_sse2);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-
-#undef vp9_idct_iwalsh16
-#define vp9_idct_iwalsh16 vp9_short_inv_walsh4x4_sse2
-
-#endif
-
-#endif
-
-
-
-#endif
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@ -1340,7 +1340,7 @@ static void decode_tiles(VP9D_COMP *pbi,
  if (pbi->oxcf.inv_tile_order) {
    const int n_cols = pc->tile_columns;
    const uint8_t *data_ptr2[4][1 << 6];
-    vp9_reader UNINITIALIZED_IS_SAFE(bc_bak);
+    vp9_reader bc_bak = {0};

    // pre-initialize the offsets, we're going to read in inverse order
    data_ptr2[0][0] = data_ptr;
--- a/vp9/decoder/x86/vp9_dequantize_sse2.c
+++ b/vp9/decoder/x86/vp9_dequantize_sse2.c
@ -15,8 +15,6 @@
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_idct.h"

-#if HAVE_SSE2
-
 void vp9_add_residual_4x4_sse2(const int16_t *diff, uint8_t *dest, int stride) {
  const int width = 4;
  const __m128i zero = _mm_setzero_si128();
@ -445,4 +443,3 @@ void vp9_add_constant_residual_32x32_sse2(const int16_t diff, uint8_t *dest,
    dest += 4 * stride;
  } while (--i);
 }
-#endif
--- a/vp9/decoder/x86/vp9_idct_mmx.h
+++ b/vp9/decoder/x86/vp9_idct_mmx.h
@ -1,22 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP9_DECODER_X86_VP9_IDCT_MMX_H_
-#define VP9_DECODER_X86_VP9_IDCT_MMX_H_
-
-
-void vp9_dequant_dc_idct_add_mmx(short *input, const short *dq,
-                                 unsigned char *pred, unsigned char *dest,
-                                 int pitch, int stride, int Dc);
-
-void vp9_dequant_idct_add_mmx(short *input, const short *dq, unsigned char *pred,
-                              unsigned char *dest, int pitch, int stride);
-
-#endif /* VP9_DECODER_X86_VP9_IDCT_MMX_H_ */
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@ -1286,4 +1286,3 @@ void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) {
      out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
  }
 }
-
--- a/vp9/encoder/vp9_sad_c.c
+++ b/vp9/encoder/vp9_sad_c.c
@ -577,4 +577,3 @@ void vp9_sad4x4x4d_c(const uint8_t *src_ptr,
  sad_array[3] = vp9_sad4x4(src_ptr, src_stride,
                            ref_ptr[3], ref_stride, 0x7fffffff);
 }
-
--- a/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c
+++ b/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@ -83,7 +83,6 @@ VP9_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER) += common/vp9_textblit.c
 VP9_COMMON_SRCS-yes += common/vp9_treecoder.c
 VP9_COMMON_SRCS-$(CONFIG_IMPLICIT_SEGMENTATION) += common/vp9_implicit_segmentation.c

-VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_idct_x86.h
 VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_x86.h
 VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_postproc_x86.h
 VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_asm_stubs.c
@ -113,13 +112,13 @@ VP9_COMMON_SRCS-yes += common/vp9_maskingmv.c
 VP9_COMMON_SRCS-$(HAVE_SSE3) += common/x86/vp9_mask_sse3.asm
 endif

-VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_idct_x86.c
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
 VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_sadmxn_sse2.c
 ifeq ($(HAVE_SSE2),yes)
-vp9/common/x86/vp9_idct_x86.c.o: CFLAGS += -msse2
+vp9/common/x86/vp9_idct_intrin_sse2.c.o: CFLAGS += -msse2
 vp9/common/x86/vp9_loopfilter_intrin_sse2.c.o: CFLAGS += -msse2
 vp9/common/x86/vp9_sadmxn_sse2.c.o: CFLAGS += -msse2
-vp9/common/x86/vp9_idct_x86.c.d: CFLAGS += -msse2
+vp9/common/x86/vp9_idct_intrin_sse2.c.d: CFLAGS += -msse2
 vp9/common/x86/vp9_loopfilter_intrin_sse2.c.d: CFLAGS += -msse2
 vp9/common/x86/vp9_sadmxn_sse2.c.d: CFLAGS += -msse2
 endif
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@ -408,9 +408,9 @@ static void parse_superframe_index(const uint8_t *data,
  *count = 0;

  if ((marker & 0xe0) == 0xc0) {
-    const int frames = (marker & 0x7) + 1;
-    const int mag = ((marker >> 3) & 3) + 1;
-    const int index_sz = 2 + mag  * frames;
+    const uint32_t frames = (marker & 0x7) + 1;
+    const uint32_t mag = ((marker >> 3) & 0x3) + 1;
+    const size_t index_sz = 2 + mag * frames;

    if (data_sz >= index_sz && data[data_sz - index_sz] == marker) {
      // found a valid superframe index
@ -418,7 +418,7 @@ static void parse_superframe_index(const uint8_t *data,
      const uint8_t *x = data + data_sz - index_sz + 1;

      for (i = 0; i < frames; i++) {
-        int this_sz = 0;
+        uint32_t this_sz = 0;

        for (j = 0; j < mag; j++)
          this_sz |= (*x++) << (j * 8);
@ -447,9 +447,9 @@ static vpx_codec_err_t vp9_decode(vpx_codec_alg_priv_t  *ctx,
    // Skip over the superframe index, if present
    if (data_sz && (*data_start & 0xe0) == 0xc0) {
      const uint8_t marker = *data_start;
-      const int frames = (marker & 0x7) + 1;
-      const int mag = ((marker >> 3) & 3) + 1;
-      const int index_sz = 2 + mag  * frames;
+      const uint32_t frames = (marker & 0x7) + 1;
+      const uint32_t mag = ((marker >> 3) & 0x3) + 1;
+      const uint32_t index_sz = 2 + mag * frames;

      if (data_sz >= index_sz && data_start[index_sz - 1] == marker) {
        data_start += index_sz;
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@ -108,10 +108,10 @@ VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_quantize_mmx.asm
 VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_encodeopt.asm
 VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt.asm

-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2_intrinsics.c
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c
 ifeq ($(HAVE_SSE2),yes)
-vp9/encoder/x86/vp9_dct_sse2_intrinsics.c.d: CFLAGS += -msse2
-vp9/encoder/x86/vp9_dct_sse2_intrinsics.c.o: CFLAGS += -msse2
+vp9/encoder/x86/vp9_dct_sse2.c.d: CFLAGS += -msse2
+vp9/encoder/x86/vp9_dct_sse2.c.o: CFLAGS += -msse2
 endif


--- a/vp9/vp9dx.mk
+++ b/vp9/vp9dx.mk
@ -37,10 +37,10 @@ VP9_DX_SRCS-yes := $(filter-out $(VP9_DX_SRCS_REMOVE-yes),$(VP9_DX_SRCS-yes))

 VP9_DX_SRCS-$(HAVE_SSE2) += decoder/x86/vp9_idct_blk_sse2.c

-VP9_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/vp9_dequantize_x86.c
+VP9_DX_SRCS-$(HAVE_SSE2) += decoder/x86/vp9_dequantize_sse2.c
 ifeq ($(HAVE_SSE2),yes)
-vp9/decoder/x86/vp9_dequantize_x86.c.o: CFLAGS += -msse2
-vp9/decoder/x86/vp9_dequantize_x86.c.d: CFLAGS += -msse2
+vp9/decoder/x86/vp9_dequantize_sse2.c.o: CFLAGS += -msse2
+vp9/decoder/x86/vp9_dequantize_sse2.c.d: CFLAGS += -msse2
 endif

 $(eval $(call asm_offsets_template,\
--- a/vpx_ports/arm_cpudetect.c
+++ b/vpx_ports/arm_cpudetect.c
@ -53,8 +53,6 @@ int arm_cpu_caps(void) {
  return flags & mask;
 }

-#elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */
-
 #elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */
 /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
 #define WIN32_LEAN_AND_MEAN
--- a/vpxdec.c
+++ b/vpxdec.c
@ -49,8 +49,8 @@

 static const char *exec_name;

-#define VP8_FOURCC (0x00385056)
-#define VP9_FOURCC (0x00395056)
+#define VP8_FOURCC (0x30385056)
+#define VP9_FOURCC (0x30395056)
 static const struct {
  char const *name;
  const vpx_codec_iface_t *(*iface)(void);