Bug 608066 - Update libvpx to v0.9.5. r=chris,khuey a=b-f

2010-11-08 09:47:17 +02:00 · 2010-11-08 09:47:17 +02:00 · 767618fe25
--- a/config/autoconf.mk.in
+++ b/config/autoconf.mk.in
@ -165,7 +165,11 @@ MOZ_TREMOR = @MOZ_TREMOR@
 MOZ_WEBM = @MOZ_WEBM@
 VPX_AS = @VPX_AS@
 VPX_ASFLAGS = @VPX_ASFLAGS@
+VPX_DASH_C_FLAG = @VPX_DASH_C_FLAG@
+VPX_AS_CONVERSION = @VPX_AS_CONVERSION@
+VPX_ASM_SUFFIX = @VPX_ASM_SUFFIX@
 VPX_X86_ASM = @VPX_X86_ASM@
+VPX_ARM_ASM = @VPX_ARM_ASM@
 NS_PRINTING = @NS_PRINTING@
 MOZ_CRASHREPORTER = @MOZ_CRASHREPORTER@
 MOZ_HELP_VIEWER = @MOZ_HELP_VIEWER@
--- a/configure.in
+++ b/configure.in
@ -4958,7 +4958,11 @@ MOZ_MEDIA=
 MOZ_WEBM=1
 VPX_AS=
 VPX_ASFLAGS=
+VPX_AS_DASH_C_FLAG=
+VPX_AS_CONVERSION=
+VPX_ASM_SUFFIX=
 VPX_X86_ASM=
+VPX_ARM_ASM=
 MOZ_PANGO=1
 MOZ_PERMISSIONS=1
 MOZ_PLACES=1
@ -6045,8 +6049,10 @@ if test -n "$MOZ_WEBM" -a -z "$MOZ_NATIVE_LIBVPX"; then


    dnl Detect if we can use an assembler to compile optimized assembly for libvpx.
-    dnl We currently require yasm on all platforms and require yasm 1.1.0 on Win32.
+    dnl We currently require yasm on all x86 platforms and require yasm 1.1.0 on Win32.
+    dnl We currently require gcc on all arm platforms.
    VPX_AS=$YASM
+    VPX_ASM_SUFFIX=asm

    dnl See if we have assembly on this platform.  
    case "$OS_ARCH:$CPU_ARCH" in
@ -6093,6 +6099,17 @@ if test -n "$MOZ_WEBM" -a -z "$MOZ_NATIVE_LIBVPX"; then
        fi
      fi
    ;;
+    *:arm*)
+      if test -n "$GNU_AS" ; then
+        VPX_AS=$AS
+        dnl These flags are a lie; they're just used to enable the requisite
+        dnl opcodes; actual arch detection is done at runtime.
+        VPX_ASFLAGS="-march=armv7-a -mfpu=neon"
+        VPX_DASH_C_FLAG="-c"
+        VPX_AS_CONVERSION="$PERL ${srcdir}/media/libvpx/build/make/ads2gas.pl"
+        VPX_ASM_SUFFIX="$ASM_SUFFIX"
+        VPX_ARM_ASM=1
+      fi
    esac

    if test -n "$COMPILE_ENVIRONMENT" -a -n "$VPX_X86_ASM" -a -z "$VPX_AS"; then
@ -6101,6 +6118,8 @@ if test -n "$MOZ_WEBM" -a -z "$MOZ_NATIVE_LIBVPX"; then

    if test -n "$VPX_X86_ASM"; then
      AC_DEFINE(VPX_X86_ASM)
+    elif test -n "$VPX_ARM_ASM"; then
+      AC_DEFINE(VPX_ARM_ASM)
    else
      AC_MSG_WARN([No assembler or assembly support for libvpx. Using unoptimized C routines.])
    fi
@ -9082,7 +9101,11 @@ AC_SUBST(MOZ_OGG)
 AC_SUBST(MOZ_ALSA_LIBS)
 AC_SUBST(VPX_AS)
 AC_SUBST(VPX_ASFLAGS)
+AC_SUBST(VPX_DASH_C_FLAG)
+AC_SUBST(VPX_AS_CONVERSION)
+AC_SUBST(VPX_ASM_SUFFIX)
 AC_SUBST(VPX_X86_ASM)
+AC_SUBST(VPX_ARM_ASM)

 if test "$USING_HCC"; then
   CC='${topsrcdir}/build/hcc'
--- a/media/libvpx/Makefile.in
+++ b/media/libvpx/Makefile.in
@ -53,8 +53,10 @@ LOCAL_INCLUDES += \
  -I$(topsrcdir)/media/libvpx \
  -I$(topsrcdir)/media/libvpx/vp8/ \
  -I$(topsrcdir)/media/libvpx/vp8/common/ \
+  -I$(topsrcdir)/media/libvpx/vp8/common/arm \
  -I$(topsrcdir)/media/libvpx/vp8/common/x86 \
  -I$(topsrcdir)/media/libvpx/vp8/decoder \
+  -I$(topsrcdir)/media/libvpx/vp8/decoder/arm \
  -I$(topsrcdir)/media/libvpx/vp8/decoder/x86 \
  -I$(topsrcdir)/media/libvpx/vpx_codec \
  -I$(topsrcdir)/media/libvpx/vpx_mem/ \
@ -64,25 +66,35 @@ LOCAL_INCLUDES += \
  $(NULL)

 VPATH += \
+  $(srcdir)/build/make \
  $(srcdir)/vpx \
  $(srcdir)/vpx/src \
  $(srcdir)/vpx_mem \
  $(srcdir)/vpx_mem/include \
  $(srcdir)/vpx_ports \
  $(srcdir)/vpx_scale \
+  $(srcdir)/vpx_scale/arm \
  $(srcdir)/vpx_scale/generic \
  $(srcdir)/vp8 \
  $(srcdir)/vp8/common \
+  $(srcdir)/vp8/common/arm \
+  $(srcdir)/vp8/common/arm/armv6 \
+  $(srcdir)/vp8/common/arm/neon \
  $(srcdir)/vp8/common/generic \
  $(srcdir)/vp8/common/x86 \
  $(srcdir)/vp8/decoder \
+  $(srcdir)/vp8/decoder/arm \
+  $(srcdir)/vp8/decoder/arm/armv6 \
+  $(srcdir)/vp8/decoder/arm/neon \
  $(srcdir)/vp8/decoder/generic \
  $(srcdir)/vp8/decoder/x86 \
  $(NULL)

-ASM_SUFFIX=asm
+#Setup the libvpx assembler config.
 AS=$(VPX_AS)
-ASFLAGS=$(VPX_ASFLAGS) -I$(topsrcdir)/media/libvpx/ -I$(topsrcdir)/media/libvpx/vpx_ports/
+ASFLAGS=$(VPX_ASFLAGS) -I. -I$(topsrcdir)/media/libvpx/ -I$(topsrcdir)/media/libvpx/vpx_ports/
+AS_DASH_C_FLAG=$(VPX_DASH_C_FLAG)
+ASM_SUFFIX=$(VPX_ASM_SUFFIX)

 EXPORTS_NAMESPACES = vpx

@ -104,6 +116,7 @@ EXPORTS_vpx = \
  mem.h \
  vpx_integer.h \
  vpx_timer.h \
+  arm.h \
  x86.h \
  scale_mode.h \
  vpxscale.h \
@ -145,9 +158,9 @@ CSRCS += \
  dboolhuff.c \
  decodemv.c \
  decodframe.c \
-  demode.c \
  dequantize.c \
  detokenize.c \
+  reconintra_mt.c \
  idct_blk.c \
  onyxd_if.c \
  threading.c \
@ -168,6 +181,7 @@ CSRCS += \
 ifdef VPX_X86_ASM
 # Building on an x86 platform with a supported assembler, include
 # the optimized assembly in the build.
+
 CSRCS += \
  idct_blk_mmx.c \
  idct_blk_sse2.c \
@ -196,7 +210,116 @@ ASFILES += \
  $(NULL)

 endif
- 
+
+ifdef VPX_ARM_ASM
+# Building on an ARM platform with a supported assembler, include
+# the optimized assembly in the build.
+
+# The Android NDK doesn't pre-define anything to indicate the OS it's on, so
+# do it for them.
+ifeq ($(OS_TARGET),Android)
+DEFINES += -D__linux__
+endif
+
+CSRCS += \
+  arm_cpudetect.c \
+  arm_systemdependent.c \
+  bilinearfilter_arm.c \
+  filter_arm.c \
+  loopfilter_arm.c \
+  reconintra_arm.c \
+  arm_dsystemdependent.c \
+  dequantize_arm.c \
+  idct_blk_v6.c \
+  idct_blk_neon.c \
+  recon_neon.c \
+  $(NULL)
+
+VPX_ASFILES = \
+  detokenize.asm \
+  bilinearfilter_v6.asm \
+  copymem8x4_v6.asm \
+  copymem8x8_v6.asm \
+  copymem16x16_v6.asm \
+  dc_only_idct_add_v6.asm \
+  iwalsh_v6.asm \
+  filter_v6.asm \
+  idct_v6.asm \
+  loopfilter_v6.asm \
+  recon_v6.asm \
+  simpleloopfilter_v6.asm \
+  sixtappredict8x4_v6.asm \
+  bilinearpredict4x4_neon.asm \
+  bilinearpredict8x4_neon.asm \
+  bilinearpredict8x8_neon.asm \
+  bilinearpredict16x16_neon.asm \
+  copymem8x4_neon.asm \
+  copymem8x8_neon.asm \
+  copymem16x16_neon.asm \
+  dc_only_idct_add_neon.asm \
+  iwalsh_neon.asm \
+  loopfilter_neon.asm \
+  loopfiltersimplehorizontaledge_neon.asm \
+  loopfiltersimpleverticaledge_neon.asm \
+  mbloopfilter_neon.asm \
+  recon2b_neon.asm \
+  recon4b_neon.asm \
+  reconb_neon.asm \
+  shortidct4x4llm_1_neon.asm \
+  shortidct4x4llm_neon.asm \
+  sixtappredict4x4_neon.asm \
+  sixtappredict8x4_neon.asm \
+  sixtappredict8x8_neon.asm \
+  sixtappredict16x16_neon.asm \
+  recon16x16mb_neon.asm \
+  buildintrapredictorsmby_neon.asm \
+  save_neon_reg.asm \
+  dequant_dc_idct_v6.asm \
+  dequant_idct_v6.asm \
+  dequantize_v6.asm \
+  idct_dequant_dc_full_2x_neon.asm \
+  idct_dequant_dc_0_2x_neon.asm \
+  dequant_idct_neon.asm \
+  idct_dequant_full_2x_neon.asm \
+  idct_dequant_0_2x_neon.asm \
+  dequantizeb_neon.asm \
+  $(NULL)
+
+# The ARM asm needs to extract the offsets of various C struct members.
+# We need a program that runs on the host to pull them out of a .o file.
+HOST_CSRCS = obj_int_extract.c
+HOST_PROGRAM = host_obj_int_extract$(HOST_BIN_SUFFIX)
+
+ifdef VPX_AS_CONVERSION
+# The ARM asm is written in ARM RVCT syntax, but we actually build it with
+# gas using GNU syntax. Add some rules to perform the conversion.
+VPX_CONVERTED_ASFILES = $(addsuffix .$(ASM_SUFFIX), $(VPX_ASFILES))
+
+ASFILES += $(VPX_CONVERTED_ASFILES)
+GARBAGE += $(VPX_CONVERTED_ASFILES)
+
+%.asm.$(ASM_SUFFIX): %.asm
+	$(VPX_AS_CONVERSION) < $< > $@
+
+vpx_asm_offsets.asm: vpx_asm_offsets.$(OBJ_SUFFIX) $(HOST_PROGRAM)
+	./$(HOST_PROGRAM) rvds $< | $(VPX_AS_CONVERSION) > $@
+
+detokenize.asm.$(OBJ_SUFFIX): vpx_asm_offsets.asm
+
+else
+ASFILES += $(VPX_ASFILES)
+
+vpx_asm_offsets.asm: vpx_asm_offsets.$(OBJ_SUFFIX) $(HOST_PROGRAM)
+	./$(HOST_PROGRAM) rvds $< > $@
+
+detokenize.$(OBJ_SUFFIX): vpx_asm_offsets.asm
+
+endif
+
+GARBAGE += vpx_asm_offsets.$(OBJ_SUFFIX) vpx_asm_offsets.asm
+
+endif
+
 include $(topsrcdir)/config/rules.mk

 # Workaround a bug of Sun Studio (CR 6963410)
--- a/media/libvpx/README_MOZILLA
+++ b/media/libvpx/README_MOZILLA
@ -1,2 +1,2 @@
-Using libvpx pulled from git://review.webmproject.org/libvpx.git
-Commit ID: 0dd78af3e9b089eacc9af280adfb5549fc7ecdcd
+Using the v0.9.5 release pulled from
+http://webm.googlecode.com/files/libvpx-v0.9.5.zip
--- a/media/libvpx/build/make/ads2gas.pl
+++ b/media/libvpx/build/make/ads2gas.pl
@ -0,0 +1,150 @@
+#!/usr/bin/perl
+##
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+##
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
+##
+
+
+# ads2gas.pl
+# Author: Eric Fung (efung (at) acm.org)
+#
+# Convert ARM Developer Suite 1.0.1 syntax assembly source to GNU as format
+#
+# Usage: cat inputfile | perl ads2gas.pl > outputfile
+#
+print "@ This file was created from a .asm file\n";
+print "@  using the ads2gas.pl script.\n";
+print "\t.equ DO1STROUNDING, 0\n";
+
+while (<STDIN>)
+{
+    # Comment character
+    s/;/@/g;
+
+    # Hexadecimal constants prefaced by 0x
+    s/#&/#0x/g;
+
+    # Convert :OR: to |
+    s/:OR:/ | /g;
+
+    # Convert :AND: to &
+    s/:AND:/ & /g;
+
+    # Convert :NOT: to ~
+    s/:NOT:/ ~ /g;
+
+    # Convert :SHL: to <<
+    s/:SHL:/ << /g;
+
+    # Convert :SHR: to >>
+    s/:SHR:/ >> /g;
+
+    # Convert ELSE to .else
+    s/ELSE/.else/g;
+
+    # Convert ENDIF to .endif
+    s/ENDIF/.endif/g;
+
+    # Convert ELSEIF to .elseif
+    s/ELSEIF/.elseif/g;
+
+    # Convert LTORG to .ltorg
+    s/LTORG/.ltorg/g;
+
+    # Convert IF :DEF:to .if
+    # gcc doesn't have the ability to do a conditional
+    # if defined variable that is set by IF :DEF: on
+    # armasm, so convert it to a normal .if and then
+    # make sure to define a value elesewhere
+    if (s/\bIF :DEF:\b/.if /g)
+    {
+        s/=/==/g;
+    }
+
+    # Convert IF to .if
+    if (s/\bIF\b/.if/g)
+    {
+        s/=+/==/g;
+    }
+
+    # Convert INCLUDE to .INCLUDE "file"
+    s/INCLUDE(\s*)(.*)$/.include $1\"$2\"/;
+
+    # Code directive (ARM vs Thumb)
+    s/CODE([0-9][0-9])/.code $1/;
+
+    # No AREA required
+    s/^\s*AREA.*$/.text/;
+
+    # DCD to .word
+    # This one is for incoming symbols
+    s/DCD\s+\|(\w*)\|/.long $1/;
+
+    # DCW to .short
+    s/DCW\s+\|(\w*)\|/.short $1/;
+    s/DCW(.*)/.short $1/;
+
+    # Constants defined in scope
+    s/DCD(.*)/.long $1/;
+    s/DCB(.*)/.byte $1/;
+
+    # RN to .req
+    if (s/RN\s+([Rr]\d+|lr)/.req $1/)
+    {
+        print;
+        next;
+    }
+
+    # Make function visible to linker, and make additional symbol with
+    # prepended underscore
+    s/EXPORT\s+\|([\$\w]*)\|/.global $1 \n\t.type $1, function/;
+    s/IMPORT\s+\|([\$\w]*)\|/.global $1/;
+
+    # No vertical bars required; make additional symbol with prepended
+    # underscore
+    s/^\|(\$?\w+)\|/_$1\n\t$1:/g;
+
+    # Labels need trailing colon
+#   s/^(\w+)/$1:/ if !/EQU/;
+    # put the colon at the end of the line in the macro
+    s/^([a-zA-Z_0-9\$]+)/$1:/ if !/EQU/;
+
+    # Strip ALIGN
+    s/\sALIGN/@ ALIGN/g;
+
+    # Strip ARM
+    s/\sARM/@ ARM/g;
+
+    # Strip REQUIRE8
+    #s/\sREQUIRE8/@ REQUIRE8/g;
+    s/\sREQUIRE8/@ /g;      #EQU cause problem
+
+    # Strip PRESERVE8
+    s/\sPRESERVE8/@ PRESERVE8/g;
+
+    # Strip PROC and ENDPROC
+    s/\sPROC/@/g;
+    s/\sENDP/@/g;
+
+    # EQU directive
+    s/(.*)EQU(.*)/.equ $1, $2/;
+
+    # Begin macro definition
+    if (/MACRO/) {
+        $_ = <STDIN>;
+        s/^/.macro/;
+        s/\$//g;                # remove formal param reference
+        s/;/@/g;                # change comment characters
+    }
+
+    # For macros, use \ to reference formal params
+    s/\$/\\/g;                  # End macro definition
+    s/MEND/.endm/;              # No need to tell it where to stop assembling
+    next if /^\s*END\s*$/;
+    print;
+}
--- a/media/libvpx/build/make/obj_int_extract.c
+++ b/media/libvpx/build/make/obj_int_extract.c
@ -0,0 +1,756 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "vpx_config.h"
+
+#if defined(_MSC_VER)
+#include <io.h>
+#include <share.h>
+#include "vpx/vpx_integer.h"
+#else
+#include <stdint.h>
+#include <unistd.h>
+#endif
+
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdarg.h>
+
+typedef enum
+{
+    OUTPUT_FMT_PLAIN,
+    OUTPUT_FMT_RVDS,
+    OUTPUT_FMT_GAS,
+} output_fmt_t;
+
+int log_msg(const char *fmt, ...)
+{
+    int res;
+    va_list ap;
+    va_start(ap, fmt);
+    res = vfprintf(stderr, fmt, ap);
+    va_end(ap);
+    return res;
+}
+
+#if defined(__GNUC__) && __GNUC__
+
+#if defined(__MACH__)
+
+#include <mach-o/loader.h>
+#include <mach-o/nlist.h>
+
+int parse_macho(uint8_t *base_buf, size_t sz)
+{
+    int i, j;
+    struct mach_header header;
+    uint8_t *buf = base_buf;
+    int base_data_section = 0;
+
+    memcpy(&header, buf, sizeof(struct mach_header));
+    buf += sizeof(struct mach_header);
+
+    if (header.magic != MH_MAGIC)
+    {
+        log_msg("Bad magic number for object file. 0x%x expected, 0x%x found.\n",
+                header.magic, MH_MAGIC);
+        goto bail;
+    }
+
+    if (header.cputype != CPU_TYPE_ARM)
+    {
+        log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_ARM.\n");
+        goto bail;
+    }
+
+    if (header.filetype != MH_OBJECT)
+    {
+        log_msg("Bad filetype for object file. Currently only tested for MH_OBJECT.\n");
+        goto bail;
+    }
+
+    for (i = 0; i < header.ncmds; i++)
+    {
+        struct load_command lc;
+        struct symtab_command sc;
+        struct segment_command seg_c;
+
+        memcpy(&lc, buf, sizeof(struct load_command));
+
+        if (lc.cmd == LC_SEGMENT)
+        {
+            uint8_t *seg_buf = buf;
+            struct section s;
+
+            memcpy(&seg_c, buf, sizeof(struct segment_command));
+
+            seg_buf += sizeof(struct segment_command);
+
+            for (j = 0; j < seg_c.nsects; j++)
+            {
+                memcpy(&s, seg_buf + (j * sizeof(struct section)), sizeof(struct section));
+
+                // Need to get this offset which is the start of the symbol table
+                // before matching the strings up with symbols.
+                base_data_section = s.offset;
+            }
+        }
+        else if (lc.cmd == LC_SYMTAB)
+        {
+            uint8_t *sym_buf = base_buf;
+            uint8_t *str_buf = base_buf;
+
+            if (base_data_section != 0)
+            {
+                memcpy(&sc, buf, sizeof(struct symtab_command));
+
+                if (sc.cmdsize != sizeof(struct symtab_command))
+                    log_msg("Can't find symbol table!\n");
+
+                sym_buf += sc.symoff;
+                str_buf += sc.stroff;
+
+                for (j = 0; j < sc.nsyms; j++)
+                {
+                    struct nlist nl;
+                    int val;
+
+                    memcpy(&nl, sym_buf + (j * sizeof(struct nlist)), sizeof(struct nlist));
+
+                    val = *((int *)(base_buf + base_data_section + nl.n_value));
+
+                    // Location of string is cacluated each time from the
+                    // start of the string buffer.  On darwin the symbols
+                    // are prefixed by "_".  On other platforms it is not
+                    // so it needs to be removed.  That is the reason for
+                    // the +1.
+                    printf("%-40s EQU %5d\n", str_buf + nl.n_un.n_strx + 1, val);
+                }
+            }
+        }
+
+        buf += lc.cmdsize;
+    }
+
+    return 0;
+bail:
+    return 1;
+
+}
+
+int main(int argc, char **argv)
+{
+    int fd;
+    char *f;
+    struct stat stat_buf;
+    uint8_t *file_buf;
+    int res;
+
+    if (argc < 2 || argc > 3)
+    {
+        fprintf(stderr, "Usage: %s [output format] <obj file>\n\n", argv[0]);
+        fprintf(stderr, "  <obj file>\tMachO format object file to parse\n");
+        fprintf(stderr, "Output Formats:\n");
+        fprintf(stderr, "  gas  - compatible with GNU assembler\n");
+        fprintf(stderr, "  rvds - compatible with armasm\n");
+        goto bail;
+    }
+
+    f = argv[2];
+
+    if (!((!strcmp(argv[1], "rvds")) || (!strcmp(argv[1], "gas"))))
+        f = argv[1];
+
+    fd = open(f, O_RDONLY);
+
+    if (fd < 0)
+    {
+        perror("Unable to open file");
+        goto bail;
+    }
+
+    if (fstat(fd, &stat_buf))
+    {
+        perror("stat");
+        goto bail;
+    }
+
+    file_buf = malloc(stat_buf.st_size);
+
+    if (!file_buf)
+    {
+        perror("malloc");
+        goto bail;
+    }
+
+    if (read(fd, file_buf, stat_buf.st_size) != stat_buf.st_size)
+    {
+        perror("read");
+        goto bail;
+    }
+
+    if (close(fd))
+    {
+        perror("close");
+        goto bail;
+    }
+
+    res = parse_macho(file_buf, stat_buf.st_size);
+    free(file_buf);
+
+    if (!res)
+        return EXIT_SUCCESS;
+
+bail:
+    return EXIT_FAILURE;
+}
+
+#else
+#include "elf.h"
+
+#define COPY_STRUCT(dst, buf, ofst, sz) do {\
+        if(ofst + sizeof((*(dst))) > sz) goto bail;\
+        memcpy(dst, buf+ofst, sizeof((*(dst))));\
+    } while(0)
+
+#define ENDIAN_ASSIGN(val, memb) do {\
+        if(!elf->le_data) {log_msg("Big Endian data not supported yet!\n");goto bail;}\
+        (val) = (memb);\
+    } while(0)
+
+#define ENDIAN_ASSIGN_IN_PLACE(memb) do {\
+        ENDIAN_ASSIGN(memb, memb);\
+    } while(0)
+
+typedef struct
+{
+    uint8_t     *buf; /* Buffer containing ELF data */
+    size_t       sz;  /* Buffer size */
+    int          le_data;   /* Data is little-endian */
+    Elf32_Ehdr   hdr;
+} elf_obj_t;
+
+int parse_elf32_header(elf_obj_t *elf)
+{
+    int res;
+    /* Verify ELF32 header */
+    COPY_STRUCT(&elf->hdr, elf->buf, 0, elf->sz);
+    res = elf->hdr.e_ident[EI_MAG0] == ELFMAG0;
+    res &= elf->hdr.e_ident[EI_MAG1] == ELFMAG1;
+    res &= elf->hdr.e_ident[EI_MAG2] == ELFMAG2;
+    res &= elf->hdr.e_ident[EI_MAG3] == ELFMAG3;
+    res &= elf->hdr.e_ident[EI_CLASS] == ELFCLASS32;
+    res &= elf->hdr.e_ident[EI_DATA] == ELFDATA2LSB
+           || elf->hdr.e_ident[EI_DATA] == ELFDATA2MSB;
+
+    if (!res) goto bail;
+
+    elf->le_data = elf->hdr.e_ident[EI_DATA] == ELFDATA2LSB;
+
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_type);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_machine);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_version);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_entry);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_phoff);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_shoff);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_flags);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_ehsize);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_phentsize);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_phnum);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_shentsize);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_shnum);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_shstrndx);
+    return 0;
+bail:
+    return 1;
+}
+
+int parse_elf32_section(elf_obj_t *elf, int idx, Elf32_Shdr *hdr)
+{
+    if (idx >= elf->hdr.e_shnum)
+        goto bail;
+
+    COPY_STRUCT(hdr, elf->buf, elf->hdr.e_shoff + idx * elf->hdr.e_shentsize,
+                elf->sz);
+    ENDIAN_ASSIGN_IN_PLACE(hdr->sh_name);
+    ENDIAN_ASSIGN_IN_PLACE(hdr->sh_type);
+    ENDIAN_ASSIGN_IN_PLACE(hdr->sh_flags);
+    ENDIAN_ASSIGN_IN_PLACE(hdr->sh_addr);
+    ENDIAN_ASSIGN_IN_PLACE(hdr->sh_offset);
+    ENDIAN_ASSIGN_IN_PLACE(hdr->sh_size);
+    ENDIAN_ASSIGN_IN_PLACE(hdr->sh_link);
+    ENDIAN_ASSIGN_IN_PLACE(hdr->sh_info);
+    ENDIAN_ASSIGN_IN_PLACE(hdr->sh_addralign);
+    ENDIAN_ASSIGN_IN_PLACE(hdr->sh_entsize);
+    return 0;
+bail:
+    return 1;
+}
+
+char *parse_elf32_string_table(elf_obj_t *elf, int s_idx, int idx)
+{
+    Elf32_Shdr shdr;
+
+    if (parse_elf32_section(elf, s_idx, &shdr))
+    {
+        log_msg("Failed to parse ELF string table: section %d, index %d\n",
+                s_idx, idx);
+        return "";
+    }
+
+    return (char *)(elf->buf + shdr.sh_offset + idx);
+}
+
+int parse_elf32_symbol(elf_obj_t *elf, unsigned int ofst, Elf32_Sym *sym)
+{
+    COPY_STRUCT(sym, elf->buf, ofst, elf->sz);
+    ENDIAN_ASSIGN_IN_PLACE(sym->st_name);
+    ENDIAN_ASSIGN_IN_PLACE(sym->st_value);
+    ENDIAN_ASSIGN_IN_PLACE(sym->st_size);
+    ENDIAN_ASSIGN_IN_PLACE(sym->st_info);
+    ENDIAN_ASSIGN_IN_PLACE(sym->st_other);
+    ENDIAN_ASSIGN_IN_PLACE(sym->st_shndx);
+    return 0;
+bail:
+    return 1;
+}
+
+int parse_elf32(uint8_t *buf, size_t sz, output_fmt_t mode)
+{
+    elf_obj_t  elf;
+    Elf32_Shdr shdr;
+    unsigned int ofst;
+    int         i;
+    Elf32_Off strtab_off;   /* save String Table offset for later use */
+
+    memset(&elf, 0, sizeof(elf));
+    elf.buf = buf;
+    elf.sz = sz;
+
+    /* Parse Header */
+    if (parse_elf32_header(&elf))
+    {
+        log_msg("Parse error: File does not appear to be valid ELF32\n");
+        return 1;
+    }
+
+    for (i = 0; i < elf.hdr.e_shnum; i++)
+    {
+        parse_elf32_section(&elf, i, &shdr);
+
+        if (shdr.sh_type == SHT_STRTAB)
+        {
+            char strtsb_name[128];
+
+            strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
+
+            if (!(strcmp(strtsb_name, ".shstrtab")))
+            {
+                log_msg("found section: %s\n", strtsb_name);
+                strtab_off = shdr.sh_offset;
+                break;
+            }
+        }
+    }
+
+    /* Parse all Symbol Tables */
+    for (i = 0; i < elf.hdr.e_shnum; i++)
+    {
+
+        parse_elf32_section(&elf, i, &shdr);
+
+        if (shdr.sh_type == SHT_SYMTAB)
+        {
+            for (ofst = shdr.sh_offset;
+                 ofst < shdr.sh_offset + shdr.sh_size;
+                 ofst += shdr.sh_entsize)
+            {
+                Elf32_Sym sym;
+
+                parse_elf32_symbol(&elf, ofst, &sym);
+
+                /* For all OBJECTS (data objects), extract the value from the
+                 * proper data segment.
+                 */
+                if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
+                    log_msg("found data object %s\n",
+                            parse_elf32_string_table(&elf,
+                                                     shdr.sh_link,
+                                                     sym.st_name));
+
+                if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT
+                    && sym.st_size == 4)
+                {
+                    Elf32_Shdr dhdr;
+                    int32_t      val;
+                    char section_name[128];
+
+                    parse_elf32_section(&elf, sym.st_shndx, &dhdr);
+
+                    /* For explanition - refer to _MSC_VER version of code */
+                    strcpy(section_name, (char *)(elf.buf + strtab_off + dhdr.sh_name));
+                    log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type);
+
+                    if (!(strcmp(section_name, ".bss")))
+                    {
+                        val = 0;
+                    }
+                    else
+                    {
+                        memcpy(&val,
+                               elf.buf + dhdr.sh_offset + sym.st_value,
+                               sizeof(val));
+                    }
+
+                    if (!elf.le_data)
+                    {
+                        log_msg("Big Endian data not supported yet!\n");
+                        goto bail;
+                    }\
+
+                    switch (mode)
+                    {
+                    case OUTPUT_FMT_RVDS:
+                        printf("%-40s EQU %5d\n",
+                               parse_elf32_string_table(&elf,
+                                                        shdr.sh_link,
+                                                        sym.st_name),
+                               val);
+                        break;
+                    case OUTPUT_FMT_GAS:
+                        printf(".equ %-40s, %5d\n",
+                               parse_elf32_string_table(&elf,
+                                                        shdr.sh_link,
+                                                        sym.st_name),
+                               val);
+                        break;
+                    default:
+                        printf("%s = %d\n",
+                               parse_elf32_string_table(&elf,
+                                                        shdr.sh_link,
+                                                        sym.st_name),
+                               val);
+                    }
+                }
+            }
+        }
+    }
+
+    if (mode == OUTPUT_FMT_RVDS)
+        printf("    END\n");
+
+    return 0;
+bail:
+    log_msg("Parse error: File does not appear to be valid ELF32\n");
+    return 1;
+}
+
+int main(int argc, char **argv)
+{
+    int fd;
+    output_fmt_t mode;
+    char *f;
+    struct stat stat_buf;
+    uint8_t *file_buf;
+    int res;
+
+    if (argc < 2 || argc > 3)
+    {
+        fprintf(stderr, "Usage: %s [output format] <obj file>\n\n", argv[0]);
+        fprintf(stderr, "  <obj file>\tELF format object file to parse\n");
+        fprintf(stderr, "Output Formats:\n");
+        fprintf(stderr, "  gas  - compatible with GNU assembler\n");
+        fprintf(stderr, "  rvds - compatible with armasm\n");
+        goto bail;
+    }
+
+    f = argv[2];
+
+    if (!strcmp(argv[1], "rvds"))
+        mode = OUTPUT_FMT_RVDS;
+    else if (!strcmp(argv[1], "gas"))
+        mode = OUTPUT_FMT_GAS;
+    else
+        f = argv[1];
+
+
+    fd = open(f, O_RDONLY);
+
+    if (fd < 0)
+    {
+        perror("Unable to open file");
+        goto bail;
+    }
+
+    if (fstat(fd, &stat_buf))
+    {
+        perror("stat");
+        goto bail;
+    }
+
+    file_buf = malloc(stat_buf.st_size);
+
+    if (!file_buf)
+    {
+        perror("malloc");
+        goto bail;
+    }
+
+    if (read(fd, file_buf, stat_buf.st_size) != stat_buf.st_size)
+    {
+        perror("read");
+        goto bail;
+    }
+
+    if (close(fd))
+    {
+        perror("close");
+        goto bail;
+    }
+
+    res = parse_elf32(file_buf, stat_buf.st_size, mode);
+    //res = parse_coff(file_buf, stat_buf.st_size);
+    free(file_buf);
+
+    if (!res)
+        return EXIT_SUCCESS;
+
+bail:
+    return EXIT_FAILURE;
+}
+#endif
+#endif
+
+
+#if defined(_MSC_VER)
+/*  See "Microsoft Portable Executable and Common Object File Format Specification"
+    for reference.
+*/
+#define get_le32(x) ((*(x)) | (*(x+1)) << 8 |(*(x+2)) << 16 | (*(x+3)) << 24 )
+#define get_le16(x) ((*(x)) | (*(x+1)) << 8)
+
+int parse_coff(unsigned __int8 *buf, size_t sz)
+{
+    unsigned int nsections, symtab_ptr, symtab_sz, strtab_ptr;
+    unsigned int sectionrawdata_ptr;
+    unsigned int i;
+    unsigned __int8 *ptr;
+    unsigned __int32 symoffset;
+    FILE *fp;
+
+    char **sectionlist;  //this array holds all section names in their correct order.
+    //it is used to check if the symbol is in .bss or .data section.
+
+    nsections = get_le16(buf + 2);
+    symtab_ptr = get_le32(buf + 8);
+    symtab_sz = get_le32(buf + 12);
+    strtab_ptr = symtab_ptr + symtab_sz * 18;
+
+    if (nsections > 96)
+        goto bail;
+
+    sectionlist = malloc(nsections * sizeof * sectionlist);
+
+    //log_msg("COFF: Found %u symbols in %u sections.\n", symtab_sz, nsections);
+
+    /*
+    The size of optional header is always zero for an obj file. So, the section header
+    follows the file header immediately.
+    */
+
+    ptr = buf + 20;     //section header
+
+    for (i = 0; i < nsections; i++)
+    {
+        char sectionname[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
+        strncpy(sectionname, ptr, 8);
+        //log_msg("COFF: Parsing section %s\n",sectionname);
+
+        sectionlist[i] = malloc(strlen(sectionname) + 1);
+        strcpy(sectionlist[i], sectionname);
+
+        if (!strcmp(sectionname, ".data")) sectionrawdata_ptr = get_le32(ptr + 20);
+
+        ptr += 40;
+    }
+
+    //log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
+    //log_msg("COFF: raw data pointer ofset for section .data is %u\n", sectionrawdata_ptr);
+
+    fp = fopen("vpx_asm_offsets.asm", "w");
+
+    if (fp == NULL)
+    {
+        perror("open file");
+        goto bail;
+    }
+
+    /*  The compiler puts the data with non-zero offset in .data section, but puts the data with
+        zero offset in .bss section. So, if the data in in .bss section, set offset=0.
+        Note from Wiki: In an object module compiled from C, the bss section contains
+        the local variables (but not functions) that were declared with the static keyword,
+        except for those with non-zero initial values. (In C, static variables are initialized
+        to zero by default.) It also contains the non-local (both extern and static) variables
+        that are also initialized to zero (either explicitly or by default).
+        */
+    //move to symbol table
+    /* COFF symbol table:
+        offset      field
+        0           Name(*)
+        8           Value
+        12          SectionNumber
+        14          Type
+        16          StorageClass
+        17          NumberOfAuxSymbols
+        */
+    ptr = buf + symtab_ptr;
+
+    for (i = 0; i < symtab_sz; i++)
+    {
+        __int16 section = get_le16(ptr + 12); //section number
+
+        if (section > 0 && ptr[16] == 2)
+        {
+            //if(section > 0 && ptr[16] == 3 && get_le32(ptr+8)) {
+
+            if (get_le32(ptr))
+            {
+                char name[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
+                strncpy(name, ptr, 8);
+                //log_msg("COFF: Parsing symbol %s\n",name);
+                fprintf(fp, "%-40s EQU ", name);
+            }
+            else
+            {
+                //log_msg("COFF: Parsing symbol %s\n",
+                //        buf + strtab_ptr + get_le32(ptr+4));
+                fprintf(fp, "%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4));
+            }
+
+            if (!(strcmp(sectionlist[section-1], ".bss")))
+            {
+                symoffset = 0;
+            }
+            else
+            {
+                symoffset = get_le32(buf + sectionrawdata_ptr + get_le32(ptr + 8));
+            }
+
+            //log_msg("      Section: %d\n",section);
+            //log_msg("      Class:   %d\n",ptr[16]);
+            //log_msg("      Address: %u\n",get_le32(ptr+8));
+            //log_msg("      Offset: %u\n", symoffset);
+
+            fprintf(fp, "%5d\n", symoffset);
+        }
+
+        ptr += 18;
+    }
+
+    fprintf(fp, "    END\n");
+    fclose(fp);
+
+    for (i = 0; i < nsections; i++)
+    {
+        free(sectionlist[i]);
+    }
+
+    free(sectionlist);
+
+    return 0;
+bail:
+
+    for (i = 0; i < nsections; i++)
+    {
+        free(sectionlist[i]);
+    }
+
+    free(sectionlist);
+
+    return 1;
+}
+
+int main(int argc, char **argv)
+{
+    int fd;
+    output_fmt_t mode;
+    const char *f;
+    struct _stat stat_buf;
+    unsigned __int8 *file_buf;
+    int res;
+
+    if (argc < 2 || argc > 3)
+    {
+        fprintf(stderr, "Usage: %s [output format] <obj file>\n\n", argv[0]);
+        fprintf(stderr, "  <obj file>\tELF format object file to parse\n");
+        fprintf(stderr, "Output Formats:\n");
+        fprintf(stderr, "  gas  - compatible with GNU assembler\n");
+        fprintf(stderr, "  rvds - compatible with armasm\n");
+        goto bail;
+    }
+
+    f = argv[2];
+
+    if (!strcmp(argv[1], "rvds"))
+        mode = OUTPUT_FMT_RVDS;
+    else if (!strcmp(argv[1], "gas"))
+        mode = OUTPUT_FMT_GAS;
+    else
+        f = argv[1];
+
+    if (_sopen_s(&fd, f, _O_BINARY, _SH_DENYNO, _S_IREAD | _S_IWRITE))
+    {
+        perror("Unable to open file");
+        goto bail;
+    }
+
+    if (_fstat(fd, &stat_buf))
+    {
+        perror("stat");
+        goto bail;
+    }
+
+    file_buf = malloc(stat_buf.st_size);
+
+    if (!file_buf)
+    {
+        perror("malloc");
+        goto bail;
+    }
+
+    if (_read(fd, file_buf, stat_buf.st_size) != stat_buf.st_size)
+    {
+        perror("read");
+        goto bail;
+    }
+
+    if (_close(fd))
+    {
+        perror("close");
+        goto bail;
+    }
+
+    res = parse_coff(file_buf, stat_buf.st_size);
+
+    free(file_buf);
+
+    if (!res)
+        return EXIT_SUCCESS;
+
+bail:
+    return EXIT_FAILURE;
+}
+#endif
--- a/media/libvpx/frame_buf_ref.patch
+++ b/media/libvpx/frame_buf_ref.patch
@ -1,113 +0,0 @@
-diff --git a/media/libvpx/vp8/decoder/decodframe.c b/media/libvpx/vp8/decoder/decodframe.c
--- a/media/libvpx/vp8/decoder/decodframe.c
-+++ b/media/libvpx/vp8/decoder/decodframe.c
-@@ -462,17 +462,17 @@ static void setup_token_decoder(VP8D_COM
-         {
-             partition_size = read_partition_size(partition_size_ptr);
-         }
-         else
-         {
-             partition_size = user_data_end - partition;
-         }
- 
-        if (partition + partition_size > user_data_end)
-+        if (user_data_end - partition < partition_size)
-             vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
-                                "Truncated packet or corrupt partition "
-                                "%d length", i + 1);
- 
-         if (vp8dx_start_decode(bool_decoder, IF_RTCD(&pbi->dboolhuff),
-                                partition, partition_size))
-             vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
-                                "Failed to allocate bool decoder %d", i + 1);
-@@ -564,30 +564,33 @@ static void init_frame(VP8D_COMP *pbi)
- 
- int vp8_decode_frame(VP8D_COMP *pbi)
- {
-     vp8_reader *const bc = & pbi->bc;
-     VP8_COMMON *const pc = & pbi->common;
-     MACROBLOCKD *const xd  = & pbi->mb;
-     const unsigned char *data = (const unsigned char *)pbi->Source;
-     const unsigned char *const data_end = data + pbi->source_sz;
-    int first_partition_length_in_bytes;
-+    unsigned int first_partition_length_in_bytes;
- 
-     int mb_row;
-     int i, j, k, l;
-     const int *const mb_feature_data_bits = vp8_mb_feature_data_bits;
- 
-+    if (data_end - data < 3)
-+        vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
-+                           "Truncated packet");
-     pc->frame_type = (FRAME_TYPE)(data[0] & 1);
-     pc->version = (data[0] >> 1) & 7;
-     pc->show_frame = (data[0] >> 4) & 1;
-     first_partition_length_in_bytes =
-         (data[0] | (data[1] << 8) | (data[2] << 16)) >> 5;
-     data += 3;
- 
-    if (data + first_partition_length_in_bytes > data_end)
-+    if (data_end - data < first_partition_length_in_bytes)
-         vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
-                            "Truncated packet or corrupt partition 0 length");
-     vp8_setup_version(pc);
- 
-     if (pc->frame_type == KEY_FRAME)
-     {
-         const int Width = pc->Width;
-         const int Height = pc->Height;
-diff --git a/media/libvpx/vp8/decoder/onyxd_if.c b/media/libvpx/vp8/decoder/onyxd_if.c
--- a/media/libvpx/vp8/decoder/onyxd_if.c
-+++ b/media/libvpx/vp8/decoder/onyxd_if.c
-@@ -318,45 +318,49 @@ int vp8dx_receive_compressed_data(VP8D_P
- 
-     if (ptr == 0)
-     {
-         return -1;
-     }
- 
-     pbi->common.error.error_code = VPX_CODEC_OK;
- 
-+    cm->new_fb_idx = get_free_fb (cm);
-+
-     if (setjmp(pbi->common.error.jmp))
-     {
-         pbi->common.error.setjmp = 0;
-+        if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
-+          cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
-         return -1;
-     }
- 
-     pbi->common.error.setjmp = 1;
- 
- #if HAVE_ARMV7
-     vp8_push_neon(dx_store_reg);
- #endif
- 
-     vpx_usec_timer_start(&timer);
- 
-     //cm->current_video_frame++;
-     pbi->Source = source;
-     pbi->source_sz = size;
- 
-    cm->new_fb_idx = get_free_fb (cm);
-
-     retcode = vp8_decode_frame(pbi);
- 
-     if (retcode < 0)
-     {
- #if HAVE_ARMV7
-         vp8_pop_neon(dx_store_reg);
- #endif
-         pbi->common.error.error_code = VPX_CODEC_ERROR;
-         pbi->common.error.setjmp = 0;
-+        if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
-+          cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
-         return retcode;
-     }
- 
-     if (pbi->b_multithreaded_lf && pbi->common.filter_level != 0)
-         vp8_stop_lfthread(pbi);
- 
-     if (swap_frame_buffers (cm))
-     {
--- a/media/libvpx/reduce-warnings-1.patch
+++ b/media/libvpx/reduce-warnings-1.patch
@ -1,168 +0,0 @@
-diff --git a/media/libvpx/vp8/common/blockd.h b/media/libvpx/vp8/common/blockd.h
--- a/media/libvpx/vp8/common/blockd.h
-+++ b/media/libvpx/vp8/common/blockd.h
-@@ -90,17 +90,17 @@ typedef enum
-     MB_MODE_COUNT
- } MB_PREDICTION_MODE;
- 
- // Macroblock level features
- typedef enum
- {
-     MB_LVL_ALT_Q = 0,               // Use alternate Quantizer ....
-     MB_LVL_ALT_LF = 1,              // Use alternate loop filter value...
-    MB_LVL_MAX = 2,                 // Number of MB level features supported
-+    MB_LVL_MAX = 2                 // Number of MB level features supported
- 
- } MB_LVL_FEATURES;
- 
- // Segment Feature Masks
- #define SEGMENT_ALTQ    0x01
- #define SEGMENT_ALT_LF  0x02
- 
- #define VP8_YMODES  (B_PRED + 1)
-diff --git a/media/libvpx/vp8/common/ppflags.h b/media/libvpx/vp8/common/ppflags.h
--- a/media/libvpx/vp8/common/ppflags.h
-+++ b/media/libvpx/vp8/common/ppflags.h
-@@ -15,12 +15,12 @@ enum
- {
-     VP8D_NOFILTERING    = 0,
-     VP8D_DEBLOCK        = 1,
-     VP8D_DEMACROBLOCK   = 2,
-     VP8D_ADDNOISE       = 4,
-     VP8D_DEBUG_LEVEL1   = 8,
-     VP8D_DEBUG_LEVEL2   = 16,
-     VP8D_DEBUG_LEVEL3   = 32,
-    VP8D_DEBUG_LEVEL4   = 64,
-+    VP8D_DEBUG_LEVEL4   = 64
- };
- 
- #endif
-diff --git a/media/libvpx/vpx/vp8.h b/media/libvpx/vpx/vp8.h
--- a/media/libvpx/vpx/vp8.h
-+++ b/media/libvpx/vpx/vp8.h
-@@ -48,17 +48,17 @@ enum vp8_dec_control_id
-  *
-  * The set of macros define VP8 decoder post processing flags
-  */
- enum vp8_postproc_level
- {
-     VP8_NOFILTERING    = 0,
-     VP8_DEBLOCK        = 1,
-     VP8_DEMACROBLOCK   = 2,
-    VP8_ADDNOISE       = 4,
-+    VP8_ADDNOISE       = 4
- };
- 
- /*!\brief post process flags
-  *
-  * This define a structure that describe the post processing settings. For
-  * the best objective measure (using thet PSNR metric) set post_proc_flag
-  * to VP8_DEBLOCK and deblocking_level to 1.
-  */
-diff --git a/media/libvpx/vpx/vpx_codec.h b/media/libvpx/vpx/vpx_codec.h
--- a/media/libvpx/vpx/vpx_codec.h
-+++ b/media/libvpx/vpx/vpx_codec.h
-@@ -57,17 +57,17 @@ extern "C" {
- #define DEPRECATED
- #define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */
- #endif
- #endif
- 
-     /*!\brief Decorator indicating a function is potentially unused */
- #ifdef UNUSED
- #elif __GNUC__
-#define UNUSED __attribute__ ((unused));
-+#define UNUSED __attribute__ ((unused))
- #else
- #define UNUSED
- #endif
- 
-     /*!\brief Current ABI version number
-      *
-      * \internal
-      * If this file is altered in any way that changes the ABI, this value
-@@ -123,17 +123,17 @@ extern "C" {
-         /*!\brief An application-supplied parameter is not valid.
-          *
-          */
-         VPX_CODEC_INVALID_PARAM,
- 
-         /*!\brief An iterator reached the end of list.
-          *
-          */
-        VPX_CODEC_LIST_END,
-+        VPX_CODEC_LIST_END
- 
-     }
-     vpx_codec_err_t;
- 
- 
-     /*! \brief Codec capabilities bitfield
-      *
-      *  Each codec advertises the capabilities it supports as part of its
-diff --git a/media/libvpx/vpx/vpx_decoder_compat.h b/media/libvpx/vpx/vpx_decoder_compat.h
--- a/media/libvpx/vpx/vpx_decoder_compat.h
-+++ b/media/libvpx/vpx/vpx_decoder_compat.h
-@@ -73,17 +73,17 @@ extern "C" {
-         /*!\brief An application-supplied parameter is not valid.
-          *
-          */
-         VPX_DEC_INVALID_PARAM = VPX_CODEC_INVALID_PARAM,
- 
-         /*!\brief An iterator reached the end of list.
-          *
-          */
-        VPX_DEC_LIST_END = VPX_CODEC_LIST_END,
-+        VPX_DEC_LIST_END = VPX_CODEC_LIST_END
- 
-     }
-     vpx_dec_err_t;
- 
-     /*! \brief Decoder capabilities bitfield
-      *
-      *  Each decoder advertises the capabilities it supports as part of its
-      *  ::vpx_dec_iface_t interface structure. Capabilities are extra interfaces
-diff --git a/media/libvpx/vpx/vpx_encoder.h b/media/libvpx/vpx/vpx_encoder.h
--- a/media/libvpx/vpx/vpx_encoder.h
-+++ b/media/libvpx/vpx/vpx_encoder.h
-@@ -166,17 +166,17 @@ extern "C" {
-     } vpx_rational_t; /**< alias for struct vpx_rational */
- 
- 
-     /*!\brief Multi-pass Encoding Pass */
-     enum vpx_enc_pass
-     {
-         VPX_RC_ONE_PASS,   /**< Single pass mode */
-         VPX_RC_FIRST_PASS, /**< First pass of multi-pass mode */
-        VPX_RC_LAST_PASS,  /**< Final pass of multi-pass mode */
-+        VPX_RC_LAST_PASS  /**< Final pass of multi-pass mode */
-     };
- 
- 
-     /*!\brief Rate control mode */
-     enum vpx_rc_mode
-     {
-         VPX_VBR, /**< Variable Bit Rate (VBR) mode */
-         VPX_CBR  /**< Constant Bit Rate (CBR) mode */
-diff --git a/media/libvpx/vpx/vpx_image.h b/media/libvpx/vpx/vpx_image.h
--- a/media/libvpx/vpx/vpx_image.h
-+++ b/media/libvpx/vpx/vpx_image.h
-@@ -50,17 +50,17 @@ extern "C" {
-         VPX_IMG_FMT_RGB32_LE, /**< 32 bit packed BGR0 */
-         VPX_IMG_FMT_ARGB,     /**< 32 bit packed ARGB, alpha=255 */
-         VPX_IMG_FMT_ARGB_LE,  /**< 32 bit packed BGRA, alpha=255 */
-         VPX_IMG_FMT_RGB565_LE,  /**< 16 bit per pixel, gggbbbbb rrrrrggg */
-         VPX_IMG_FMT_RGB555_LE,  /**< 16 bit per pixel, gggbbbbb 0rrrrrgg */
-         VPX_IMG_FMT_YV12    = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 1, /**< planar YVU */
-         VPX_IMG_FMT_I420    = VPX_IMG_FMT_PLANAR | 2,
-         VPX_IMG_FMT_VPXYV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 3, /** < planar 4:2:0 format with vpx color space */
-        VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4,  /** < planar 4:2:0 format with vpx color space */
-+        VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4  /** < planar 4:2:0 format with vpx color space */
-     }
-     vpx_img_fmt_t; /**< alias for enum vpx_img_fmt */
- 
- #if !defined(VPX_CODEC_DISABLE_COMPAT) || !VPX_CODEC_DISABLE_COMPAT
- #define IMG_FMT_PLANAR         VPX_IMG_FMT_PLANAR     /**< \deprecated Use #VPX_IMG_FMT_PLANAR */
- #define IMG_FMT_UV_FLIP        VPX_IMG_FMT_UV_FLIP    /**< \deprecated Use #VPX_IMG_FMT_UV_FLIP */
- #define IMG_FMT_HAS_ALPHA      VPX_IMG_FMT_HAS_ALPHA  /**< \deprecated Use #VPX_IMG_FMT_HAS_ALPHA */
- 
--- a/media/libvpx/solaris.patch
+++ b/media/libvpx/solaris.patch
@ -1,10 +1,7 @@
 diff --git a/media/libvpx/vp8/common/loopfilter_filters.c b/media/libvpx/vp8/common/loopfilter_filters.c
 --- a/media/libvpx/vp8/common/loopfilter_filters.c
 +++ b/media/libvpx/vp8/common/loopfilter_filters.c
-@@ -8,16 +8,19 @@
-  *  be found in the AUTHORS file in the root of the source tree.
-  */
- 
+@@ -11,10 +11,14 @@
 
 #include <stdlib.h>
 #include "loopfilter.h"
@ -13,9 +10,7 @@ diff --git a/media/libvpx/vp8/common/loopfilter_filters.c b/media/libvpx/vp8/com
 +#ifdef __SUNPRO_C
 +#define __inline inline
 +#endif
- 
- #define NEW_LOOPFILTER_MASK
- 
+
 typedef unsigned char uc;
 
 static __inline signed char vp8_signed_char_clamp(int t)
@ -109,14 +104,14 @@ diff --git a/media/libvpx/vpx_ports/mem.h b/media/libvpx/vpx_ports/mem.h
 diff --git a/media/libvpx/vpx_ports/x86.h b/media/libvpx/vpx_ports/x86.h
 --- a/media/libvpx/vpx_ports/x86.h
 +++ b/media/libvpx/vpx_ports/x86.h
-@@ -26,16 +26,36 @@
+@@ -45,16 +45,36 @@
+ #define cpuid(func,ax,bx,cx,dx)\
     __asm__ __volatile__ (\
-                           "pushl %%ebx     \n\t" \
-                           "cpuid           \n\t" \
-                           "movl  %%ebx, %1 \n\t" \
-                           "popl  %%ebx     \n\t" \
-                           : "=a" (ax), "=r" (bx), "=c" (cx), "=d" (dx) \
-                           : "a"  (func));
+                           "mov %%ebx, %%edi   \n\t" \
+                           "cpuid              \n\t" \
+                           "xchg %%edi, %%ebx  \n\t" \
+                           : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
+                           : "a" (func));
 #endif
 +#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
 +#if ARCH_X86_64
--- a/media/libvpx/subpixel-qword.patch
+++ b/media/libvpx/subpixel-qword.patch
@ -1,22 +0,0 @@
-diff --git a/media/libvpx/vp8/common/x86/subpixel_sse2.asm b/media/libvpx/vp8/common/x86/subpixel_sse2.asm
--- a/media/libvpx/vp8/common/x86/subpixel_sse2.asm
-+++ b/media/libvpx/vp8/common/x86/subpixel_sse2.asm
-@@ -1003,17 +1003,17 @@ next_row8x8:
-         paddw       xmm3,       xmm7
- 
-         movdqa      xmm7,       xmm4
- 
-         paddw       xmm3,       [rd GLOBAL]         ; xmm3 += round value
-         psraw       xmm3,       VP8_FILTER_SHIFT        ; xmm3 /= 128
- 
-         packuswb    xmm3,       xmm0
-        movq        [rdi],      xmm3                 ; store the results in the destination
-+        movq        QWORD PTR [rdi], xmm3           ; store the results in the destination
- 
-         add         rsp,        16                 ; next line
-         add         rdi,        rdx
- 
-         cmp         rdi,        rcx
-         jne         next_row8x8
- 
-     ;add rsp, 144
--- a/media/libvpx/update.sh
+++ b/media/libvpx/update.sh
@ -79,19 +79,30 @@ commonFiles=(
  vp8/common/swapyv12buffer.c
  vp8/common/textblit.c
  vp8/common/treecoder.c
+  vp8/common/arm/arm_systemdependent.c
+  vp8/common/arm/bilinearfilter_arm.c
+  vp8/common/arm/filter_arm.c
+  vp8/common/arm/loopfilter_arm.c
+  vp8/common/arm/reconintra_arm.c
+  vp8/common/arm/vpx_asm_offsets.c
+  vp8/common/arm/neon/recon_neon.c
  vp8/common/x86/loopfilter_x86.c
  vp8/common/x86/vp8_asm_stubs.c
  vp8/common/x86/x86_systemdependent.c
  vp8/decoder/dboolhuff.c
  vp8/decoder/decodemv.c
  vp8/decoder/decodframe.c
-  vp8/decoder/demode.c
  vp8/decoder/dequantize.c
  vp8/decoder/detokenize.c
+  vp8/decoder/reconintra_mt.c
  vp8/decoder/generic/dsystemdependent.c
  vp8/decoder/idct_blk.c
  vp8/decoder/onyxd_if.c
  vp8/decoder/threading.c
+  vp8/decoder/arm/arm_dsystemdependent.c
+  vp8/decoder/arm/dequantize_arm.c
+  vp8/decoder/arm/armv6/idct_blk_v6.c
+  vp8/decoder/arm/neon/idct_blk_neon.c
  vp8/decoder/x86/idct_blk_mmx.c
  vp8/decoder/x86/idct_blk_sse2.c
  vp8/decoder/x86/x86_dsystemdependent.c
@ -138,7 +149,6 @@ commonFiles=(
  vp8/common/reconinter.h
  vp8/common/reconintra4x4.h
  vp8/common/reconintra.h
-  vp8/common/segmentation_common.h
  vp8/common/setupintrarecon.h
  vp8/common/subpixel.h
  vp8/common/swapyv12buffer.h
@ -147,6 +157,10 @@ commonFiles=(
  vp8/common/treecoder.h
  vp8/common/type_aliases.h
  vp8/common/vpxerrors.h
+  vp8/common/arm/idct_arm.h
+  vp8/common/arm/loopfilter_arm.h
+  vp8/common/arm/recon_arm.h
+  vp8/common/arm/subpixel_arm.h
  vp8/common/x86/idct_x86.h
  vp8/common/x86/loopfilter_x86.h
  vp8/common/x86/postproc_x86.h
@ -155,11 +169,14 @@ commonFiles=(
  vp8/decoder/dboolhuff.h
  vp8/decoder/decodemv.h
  vp8/decoder/decoderthreading.h
-  vp8/decoder/demode.h
  vp8/decoder/dequantize.h
  vp8/decoder/detokenize.h
  vp8/decoder/onyxd_int.h
+  vp8/decoder/reconintra_mt.h
  vp8/decoder/treereader.h
+  vp8/decoder/arm/dboolhuff_arm.h
+  vp8/decoder/arm/dequantize_arm.h
+  vp8/decoder/arm/detokenize_arm.h
  vp8/decoder/x86/dequantize_x86.h
  vpx/internal/vpx_codec_internal.h
  vpx/vp8cx.h
@ -176,14 +193,63 @@ commonFiles=(
  vpx/vpx_integer.h
  vpx_mem/include/vpx_mem_intrnl.h
  vpx_mem/vpx_mem.h
+  vpx_ports/arm_cpudetect.c
  vpx_ports/config.h
  vpx_ports/mem.h
  vpx_ports/vpx_timer.h
+  vpx_ports/arm.h
  vpx_ports/x86.h
  vpx_scale/scale_mode.h
  vpx_scale/vpxscale.h
  vpx_scale/yv12config.h
  vpx_scale/yv12extend.h
+  vp8/common/arm/armv6/bilinearfilter_v6.asm
+  vp8/common/arm/armv6/copymem8x4_v6.asm
+  vp8/common/arm/armv6/copymem8x8_v6.asm
+  vp8/common/arm/armv6/copymem16x16_v6.asm
+  vp8/common/arm/armv6/dc_only_idct_add_v6.asm
+  vp8/common/arm/armv6/iwalsh_v6.asm
+  vp8/common/arm/armv6/filter_v6.asm
+  vp8/common/arm/armv6/idct_v6.asm
+  vp8/common/arm/armv6/loopfilter_v6.asm
+  vp8/common/arm/armv6/recon_v6.asm
+  vp8/common/arm/armv6/simpleloopfilter_v6.asm
+  vp8/common/arm/armv6/sixtappredict8x4_v6.asm
+  vp8/common/arm/neon/bilinearpredict4x4_neon.asm
+  vp8/common/arm/neon/bilinearpredict8x4_neon.asm
+  vp8/common/arm/neon/bilinearpredict8x8_neon.asm
+  vp8/common/arm/neon/bilinearpredict16x16_neon.asm
+  vp8/common/arm/neon/copymem8x4_neon.asm
+  vp8/common/arm/neon/copymem8x8_neon.asm
+  vp8/common/arm/neon/copymem16x16_neon.asm
+  vp8/common/arm/neon/dc_only_idct_add_neon.asm
+  vp8/common/arm/neon/iwalsh_neon.asm
+  vp8/common/arm/neon/loopfilter_neon.asm
+  vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
+  vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
+  vp8/common/arm/neon/mbloopfilter_neon.asm
+  vp8/common/arm/neon/recon2b_neon.asm
+  vp8/common/arm/neon/recon4b_neon.asm
+  vp8/common/arm/neon/reconb_neon.asm
+  vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
+  vp8/common/arm/neon/shortidct4x4llm_neon.asm
+  vp8/common/arm/neon/sixtappredict4x4_neon.asm
+  vp8/common/arm/neon/sixtappredict8x4_neon.asm
+  vp8/common/arm/neon/sixtappredict8x8_neon.asm
+  vp8/common/arm/neon/sixtappredict16x16_neon.asm
+  vp8/common/arm/neon/recon16x16mb_neon.asm
+  vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
+  vp8/common/arm/neon/save_neon_reg.asm
+  vp8/decoder/arm/detokenize.asm
+  vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
+  vp8/decoder/arm/armv6/dequant_idct_v6.asm
+  vp8/decoder/arm/armv6/dequantize_v6.asm
+  vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm
+  vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm
+  vp8/decoder/arm/neon/dequant_idct_neon.asm
+  vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm
+  vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm
+  vp8/decoder/arm/neon/dequantizeb_neon.asm
  vp8/common/x86/idctllm_mmx.asm
  vp8/common/x86/idctllm_sse2.asm
  vp8/common/x86/iwalsh_mmx.asm
@ -200,6 +266,8 @@ commonFiles=(
  vp8/decoder/x86/dequantize_mmx.asm
  vpx_ports/emms.asm
  vpx_ports/x86_abi_support.asm
+  build/make/ads2gas.pl
+  build/make/obj_int_extract.c
  LICENSE
  PATENTS
 )
@ -218,9 +286,9 @@ cp $1/objdir/x86-linux-gcc/vpx_config.asm vpx_config_x86-linux-gcc.asm
 cp $1/objdir/x86-linux-gcc/vpx_config.h vpx_config_x86-linux-gcc.h

 # Config files for x86_64-linux-gcc and Solaris x86_64
-cp $1/objdir/x86_64-linux-gcc/vpx_config.c vpx_config_x86-linux-gcc.c
-cp $1/objdir/x86_64-linux-gcc/vpx_config.asm vpx_config_x86-linux-gcc.asm
-cp $1/objdir/x86_64-linux-gcc/vpx_config.h vpx_config_x86-linux-gcc.h
+cp $1/objdir/x86_64-linux-gcc/vpx_config.c vpx_config_x86_64-linux-gcc.c
+cp $1/objdir/x86_64-linux-gcc/vpx_config.asm vpx_config_x86_64-linux-gcc.asm
+cp $1/objdir/x86_64-linux-gcc/vpx_config.h vpx_config_x86_64-linux-gcc.h

 # Copy config files for mac...
 cp $1/objdir/x86-darwin9-gcc/vpx_config.c vpx_config_x86-darwin9-gcc.c
@ -232,6 +300,10 @@ cp $1/objdir/x86_64-darwin9-gcc/vpx_config.c vpx_config_x86_64-darwin9-gcc.c
 cp $1/objdir/x86_64-darwin9-gcc/vpx_config.asm vpx_config_x86_64-darwin9-gcc.asm
 cp $1/objdir/x86_64-darwin9-gcc/vpx_config.h vpx_config_x86_64-darwin9-gcc.h

+# Config files for arm-linux-gcc
+cp $1/objdir/armv7-linux-gcc/vpx_config.c vpx_config_arm-linux-gcc.c
+cp $1/objdir/armv7-linux-gcc/vpx_config.h vpx_config_arm-linux-gcc.h
+
 # Config files for generic-gnu
 cp $1/objdir/generic-gnu/vpx_config.c vpx_config_generic-gnu.c
 cp $1/objdir/generic-gnu/vpx_config.h vpx_config_generic-gnu.h
@ -243,11 +315,5 @@ do
  cp -v $1/$f $f
 done

-# Patch to reduce compiler warnings, so we can compile with -Werror in mozilla.
-patch -p3 < reduce-warnings-1.patch
-patch -p3 < subpixel-qword.patch
 # Patch to compile with Sun Studio on Solaris
 patch -p3 < solaris.patch
-# Patch to fix frame buffer reference counting and parition length overflow
-#  checks.
-patch -p3 < frame_buf_ref.patch
--- a/media/libvpx/vp8/common/alloccommon.c
+++ b/media/libvpx/vp8/common/alloccommon.c
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -56,7 +56,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)

    vp8_de_alloc_frame_buffers(oci);

-    // our internal buffers are always multiples of 16
+    /* our internal buffers are always multiples of 16 */
    if ((width & 0xf) != 0)
        width += 16 - (width & 0xf);

@ -153,7 +153,7 @@ void vp8_setup_version(VP8_COMMON *cm)
        cm->full_pixel = 1;
        break;
    default:
-        //4,5,6,7 are reserved for future use
+        /*4,5,6,7 are reserved for future use*/
        cm->no_lpf = 0;
        cm->simpler_lpf = 0;
        cm->use_bilinear_mc_filter = 0;
@ -177,10 +177,10 @@ void vp8_create_common(VP8_COMMON *oci)
    oci->clr_type = REG_YUV;
    oci->clamp_type = RECON_CLAMP_REQUIRED;

-    // Initialise reference frame sign bias structure to defaults
+    /* Initialise reference frame sign bias structure to defaults */
    vpx_memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));

-    // Default disable buffer to buffer copying
+    /* Default disable buffer to buffer copying */
    oci->copy_buffer_to_gf = 0;
    oci->copy_buffer_to_arf = 0;
 }
--- a/media/libvpx/vp8/common/alloccommon.h
+++ b/media/libvpx/vp8/common/alloccommon.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
--- a/media/libvpx/vp8/common/arm/arm_systemdependent.c
+++ b/media/libvpx/vp8/common/arm/arm_systemdependent.c
@ -0,0 +1,136 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "g_common.h"
+#include "pragmas.h"
+#include "subpixel.h"
+#include "loopfilter.h"
+#include "recon.h"
+#include "idct.h"
+#include "onyxc_int.h"
+
+extern void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
+
+extern void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
+
+void vp8_arch_arm_common_init(VP8_COMMON *ctx)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+    VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
+    int flags = arm_cpu_caps();
+    int has_edsp = flags & HAS_EDSP;
+    int has_media = flags & HAS_MEDIA;
+    int has_neon = flags & HAS_NEON;
+    rtcd->flags = flags;
+
+    /* Override default functions with fastest ones for this CPU. */
+#if HAVE_ARMV6
+    if (has_media)
+    {
+        rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_armv6;
+        rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_armv6;
+        rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_armv6;
+        rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_armv6;
+        rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6;
+        rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_armv6;
+        rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_armv6;
+        rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_armv6;
+
+        rtcd->idct.idct1        = vp8_short_idct4x4llm_1_v6;
+        rtcd->idct.idct16       = vp8_short_idct4x4llm_v6_dual;
+        rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_v6;
+        rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_v6;
+
+        rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6;
+        rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_armv6;
+        rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
+        rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_armv6;
+        rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
+        rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_armv6;
+        rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
+        rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_armv6;
+
+        rtcd->recon.copy16x16   = vp8_copy_mem16x16_v6;
+        rtcd->recon.copy8x8     = vp8_copy_mem8x8_v6;
+        rtcd->recon.copy8x4     = vp8_copy_mem8x4_v6;
+        rtcd->recon.recon       = vp8_recon_b_armv6;
+        rtcd->recon.recon2      = vp8_recon2b_armv6;
+        rtcd->recon.recon4      = vp8_recon4b_armv6;
+    }
+#endif
+
+#if HAVE_ARMV7
+    if (has_neon)
+    {
+        rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_neon;
+        rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_neon;
+        rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_neon;
+        rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_neon;
+        rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon;
+        rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_neon;
+        rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_neon;
+        rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_neon;
+
+        rtcd->idct.idct1        = vp8_short_idct4x4llm_1_neon;
+        rtcd->idct.idct16       = vp8_short_idct4x4llm_neon;
+        rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_neon;
+        rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_neon;
+
+        rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon;
+        rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_neon;
+        rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon;
+        rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_neon;
+        rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon;
+        rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_neon;
+        rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon;
+        rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_neon;
+
+        rtcd->recon.copy16x16   = vp8_copy_mem16x16_neon;
+        rtcd->recon.copy8x8     = vp8_copy_mem8x8_neon;
+        rtcd->recon.copy8x4     = vp8_copy_mem8x4_neon;
+        rtcd->recon.recon       = vp8_recon_b_neon;
+        rtcd->recon.recon2      = vp8_recon2b_neon;
+        rtcd->recon.recon4      = vp8_recon4b_neon;
+        rtcd->recon.recon_mb    = vp8_recon_mb_neon;
+
+    }
+#endif
+
+#endif
+
+#if HAVE_ARMV6
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (has_media)
+#endif
+    {
+        vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
+        vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
+    }
+#endif
+
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (has_neon)
+#endif
+    {
+        vp8_build_intra_predictors_mby_ptr =
+         vp8_build_intra_predictors_mby_neon;
+        vp8_build_intra_predictors_mby_s_ptr =
+         vp8_build_intra_predictors_mby_s_neon;
+    }
+#endif
+}
--- a/media/libvpx/vp8/common/arm/armv6/bilinearfilter_v6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/bilinearfilter_v6.asm
@ -0,0 +1,238 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_filter_block2d_bil_first_pass_armv6|
+    EXPORT  |vp8_filter_block2d_bil_second_pass_armv6|
+
+    AREA    |.text|, CODE, READONLY  ; name this block of code
+
+;-------------------------------------
+; r0    unsigned char *src_ptr,
+; r1    unsigned short *output_ptr,
+; r2    unsigned int src_pixels_per_line,
+; r3    unsigned int output_height,
+; stack    unsigned int output_width,
+; stack    const short *vp8_filter
+;-------------------------------------
+; The output is transposed stroed in output array to make it easy for second pass filtering.
+|vp8_filter_block2d_bil_first_pass_armv6| PROC
+    stmdb   sp!, {r4 - r11, lr}
+
+    ldr     r11, [sp, #40]                  ; vp8_filter address
+    ldr     r4, [sp, #36]                   ; output width
+
+    mov     r12, r3                         ; outer-loop counter
+    sub     r2, r2, r4                      ; src increment for height loop
+
+    ;;IF ARCHITECTURE=6
+    pld     [r0]
+    ;;ENDIF
+
+    ldr     r5, [r11]                       ; load up filter coefficients
+
+    mov     r3, r3, lsl #1                  ; output_height*2
+    add     r3, r3, #2                      ; plus 2 to make output buffer 4-bit aligned since height is actually (height+1)
+
+    mov     r11, r1                         ; save output_ptr for each row
+
+    cmp     r5, #128                        ; if filter coef = 128, then skip the filter
+    beq     bil_null_1st_filter
+
+|bil_height_loop_1st_v6|
+    ldrb    r6, [r0]                        ; load source data
+    ldrb    r7, [r0, #1]
+    ldrb    r8, [r0, #2]
+    mov     lr, r4, lsr #2                  ; 4-in-parellel loop counter
+
+|bil_width_loop_1st_v6|
+    ldrb    r9, [r0, #3]
+    ldrb    r10, [r0, #4]
+
+    pkhbt   r6, r6, r7, lsl #16             ; src[1] | src[0]
+    pkhbt   r7, r7, r8, lsl #16             ; src[2] | src[1]
+
+    smuad   r6, r6, r5                      ; apply the filter
+    pkhbt   r8, r8, r9, lsl #16             ; src[3] | src[2]
+    smuad   r7, r7, r5
+    pkhbt   r9, r9, r10, lsl #16            ; src[4] | src[3]
+
+    smuad   r8, r8, r5
+    smuad   r9, r9, r5
+
+    add     r0, r0, #4
+    subs    lr, lr, #1
+
+    add     r6, r6, #0x40                   ; round_shift_and_clamp
+    add     r7, r7, #0x40
+    usat    r6, #16, r6, asr #7
+    usat    r7, #16, r7, asr #7
+
+    strh    r6, [r1], r3                    ; result is transposed and stored
+
+    add     r8, r8, #0x40                   ; round_shift_and_clamp
+    strh    r7, [r1], r3
+    add     r9, r9, #0x40
+    usat    r8, #16, r8, asr #7
+    usat    r9, #16, r9, asr #7
+
+    strh    r8, [r1], r3                    ; result is transposed and stored
+
+    ldrneb  r6, [r0]                        ; load source data
+    strh    r9, [r1], r3
+
+    ldrneb  r7, [r0, #1]
+    ldrneb  r8, [r0, #2]
+
+    bne     bil_width_loop_1st_v6
+
+    add     r0, r0, r2                      ; move to next input row
+    subs    r12, r12, #1
+
+    ;;IF ARCHITECTURE=6
+    pld     [r0]
+    ;;ENDIF
+
+    add     r11, r11, #2                    ; move over to next column
+    mov     r1, r11
+
+    bne     bil_height_loop_1st_v6
+
+    ldmia   sp!, {r4 - r11, pc}
+
+|bil_null_1st_filter|
+|bil_height_loop_null_1st|
+    mov     lr, r4, lsr #2                  ; loop counter
+
+|bil_width_loop_null_1st|
+    ldrb    r6, [r0]                        ; load data
+    ldrb    r7, [r0, #1]
+    ldrb    r8, [r0, #2]
+    ldrb    r9, [r0, #3]
+
+    strh    r6, [r1], r3                    ; store it to immediate buffer
+    add     r0, r0, #4
+    strh    r7, [r1], r3
+    subs    lr, lr, #1
+    strh    r8, [r1], r3
+    strh    r9, [r1], r3
+
+    bne     bil_width_loop_null_1st
+
+    subs    r12, r12, #1
+    add     r0, r0, r2                      ; move to next input line
+    add     r11, r11, #2                    ; move over to next column
+    mov     r1, r11
+
+    bne     bil_height_loop_null_1st
+
+    ldmia   sp!, {r4 - r11, pc}
+
+    ENDP  ; |vp8_filter_block2d_bil_first_pass_armv6|
+
+
+;---------------------------------
+; r0    unsigned short *src_ptr,
+; r1    unsigned char *output_ptr,
+; r2    int output_pitch,
+; r3    unsigned int  output_height,
+; stack unsigned int  output_width,
+; stack const short *vp8_filter
+;---------------------------------
+|vp8_filter_block2d_bil_second_pass_armv6| PROC
+    stmdb   sp!, {r4 - r11, lr}
+
+    ldr     r11, [sp, #40]                  ; vp8_filter address
+    ldr     r4, [sp, #36]                   ; output width
+
+    ldr     r5, [r11]                       ; load up filter coefficients
+    mov     r12, r4                         ; outer-loop counter = width, since we work on transposed data matrix
+    mov     r11, r1
+
+    cmp     r5, #128                        ; if filter coef = 128, then skip the filter
+    beq     bil_null_2nd_filter
+
+|bil_height_loop_2nd|
+    ldr     r6, [r0]                        ; load the data
+    ldr     r8, [r0, #4]
+    ldrh    r10, [r0, #8]
+    mov     lr, r3, lsr #2                  ; loop counter
+
+|bil_width_loop_2nd|
+    pkhtb   r7, r6, r8                      ; src[1] | src[2]
+    pkhtb   r9, r8, r10                     ; src[3] | src[4]
+
+    smuad   r6, r6, r5                      ; apply filter
+    smuad   r8, r8, r5                      ; apply filter
+
+    subs    lr, lr, #1
+
+    smuadx  r7, r7, r5                      ; apply filter
+    smuadx  r9, r9, r5                      ; apply filter
+
+    add     r0, r0, #8
+
+    add     r6, r6, #0x40                   ; round_shift_and_clamp
+    add     r7, r7, #0x40
+    usat    r6, #8, r6, asr #7
+    usat    r7, #8, r7, asr #7
+    strb    r6, [r1], r2                    ; the result is transposed back and stored
+
+    add     r8, r8, #0x40                   ; round_shift_and_clamp
+    strb    r7, [r1], r2
+    add     r9, r9, #0x40
+    usat    r8, #8, r8, asr #7
+    usat    r9, #8, r9, asr #7
+    strb    r8, [r1], r2                    ; the result is transposed back and stored
+
+    ldrne   r6, [r0]                        ; load data
+    strb    r9, [r1], r2
+    ldrne   r8, [r0, #4]
+    ldrneh  r10, [r0, #8]
+
+    bne     bil_width_loop_2nd
+
+    subs    r12, r12, #1
+    add     r0, r0, #4                      ; update src for next row
+    add     r11, r11, #1
+    mov     r1, r11
+
+    bne     bil_height_loop_2nd
+    ldmia   sp!, {r4 - r11, pc}
+
+|bil_null_2nd_filter|
+|bil_height_loop_null_2nd|
+    mov     lr, r3, lsr #2
+
+|bil_width_loop_null_2nd|
+    ldr     r6, [r0], #4                    ; load data
+    subs    lr, lr, #1
+    ldr     r8, [r0], #4
+
+    strb    r6, [r1], r2                    ; store data
+    mov     r7, r6, lsr #16
+    strb    r7, [r1], r2
+    mov     r9, r8, lsr #16
+    strb    r8, [r1], r2
+    strb    r9, [r1], r2
+
+    bne     bil_width_loop_null_2nd
+
+    subs    r12, r12, #1
+    add     r0, r0, #4
+    add     r11, r11, #1
+    mov     r1, r11
+
+    bne     bil_height_loop_null_2nd
+
+    ldmia   sp!, {r4 - r11, pc}
+    ENDP  ; |vp8_filter_block2d_second_pass_armv6|
+
+    END
--- a/media/libvpx/vp8/common/arm/armv6/copymem16x16_v6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/copymem16x16_v6.asm
@ -0,0 +1,182 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_copy_mem16x16_v6|
+    ; ARM
+    ; REQUIRE8
+    ; PRESERVE8
+
+    AREA    Block, CODE, READONLY ; name this block of code
+;void copy_mem16x16_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
+;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+|vp8_copy_mem16x16_v6| PROC
+    stmdb       sp!, {r4 - r7}
+    ;push   {r4-r7}
+
+    ;preload
+    pld     [r0]
+    pld     [r0, r1]
+    pld     [r0, r1, lsl #1]
+
+    ands    r4, r0, #15
+    beq     copy_mem16x16_fast
+
+    ands    r4, r0, #7
+    beq     copy_mem16x16_8
+
+    ands    r4, r0, #3
+    beq     copy_mem16x16_4
+
+    ;copy one byte each time
+    ldrb    r4, [r0]
+    ldrb    r5, [r0, #1]
+    ldrb    r6, [r0, #2]
+    ldrb    r7, [r0, #3]
+
+    mov     r12, #16
+
+copy_mem16x16_1_loop
+    strb    r4, [r2]
+    strb    r5, [r2, #1]
+    strb    r6, [r2, #2]
+    strb    r7, [r2, #3]
+
+    ldrb    r4, [r0, #4]
+    ldrb    r5, [r0, #5]
+    ldrb    r6, [r0, #6]
+    ldrb    r7, [r0, #7]
+
+    subs    r12, r12, #1
+
+    strb    r4, [r2, #4]
+    strb    r5, [r2, #5]
+    strb    r6, [r2, #6]
+    strb    r7, [r2, #7]
+
+    ldrb    r4, [r0, #8]
+    ldrb    r5, [r0, #9]
+    ldrb    r6, [r0, #10]
+    ldrb    r7, [r0, #11]
+
+    strb    r4, [r2, #8]
+    strb    r5, [r2, #9]
+    strb    r6, [r2, #10]
+    strb    r7, [r2, #11]
+
+    ldrb    r4, [r0, #12]
+    ldrb    r5, [r0, #13]
+    ldrb    r6, [r0, #14]
+    ldrb    r7, [r0, #15]
+
+    add     r0, r0, r1
+
+    strb    r4, [r2, #12]
+    strb    r5, [r2, #13]
+    strb    r6, [r2, #14]
+    strb    r7, [r2, #15]
+
+    add     r2, r2, r3
+
+    ldrneb  r4, [r0]
+    ldrneb  r5, [r0, #1]
+    ldrneb  r6, [r0, #2]
+    ldrneb  r7, [r0, #3]
+
+    bne     copy_mem16x16_1_loop
+
+    ldmia       sp!, {r4 - r7}
+    ;pop        {r4-r7}
+    mov     pc, lr
+
+;copy 4 bytes each time
+copy_mem16x16_4
+    ldr     r4, [r0]
+    ldr     r5, [r0, #4]
+    ldr     r6, [r0, #8]
+    ldr     r7, [r0, #12]
+
+    mov     r12, #16
+
+copy_mem16x16_4_loop
+    subs    r12, r12, #1
+    add     r0, r0, r1
+
+    str     r4, [r2]
+    str     r5, [r2, #4]
+    str     r6, [r2, #8]
+    str     r7, [r2, #12]
+
+    add     r2, r2, r3
+
+    ldrne   r4, [r0]
+    ldrne   r5, [r0, #4]
+    ldrne   r6, [r0, #8]
+    ldrne   r7, [r0, #12]
+
+    bne     copy_mem16x16_4_loop
+
+    ldmia       sp!, {r4 - r7}
+    ;pop        {r4-r7}
+    mov     pc, lr
+
+;copy 8 bytes each time
+copy_mem16x16_8
+    sub     r1, r1, #16
+    sub     r3, r3, #16
+
+    mov     r12, #16
+
+copy_mem16x16_8_loop
+    ldmia   r0!, {r4-r5}
+    ;ldm        r0, {r4-r5}
+    ldmia   r0!, {r6-r7}
+
+    add     r0, r0, r1
+
+    stmia   r2!, {r4-r5}
+    subs    r12, r12, #1
+    ;stm        r2, {r4-r5}
+    stmia   r2!, {r6-r7}
+
+    add     r2, r2, r3
+
+    bne     copy_mem16x16_8_loop
+
+    ldmia       sp!, {r4 - r7}
+    ;pop        {r4-r7}
+    mov     pc, lr
+
+;copy 16 bytes each time
+copy_mem16x16_fast
+    ;sub        r1, r1, #16
+    ;sub        r3, r3, #16
+
+    mov     r12, #16
+
+copy_mem16x16_fast_loop
+    ldmia   r0, {r4-r7}
+    ;ldm        r0, {r4-r7}
+    add     r0, r0, r1
+
+    subs    r12, r12, #1
+    stmia   r2, {r4-r7}
+    ;stm        r2, {r4-r7}
+    add     r2, r2, r3
+
+    bne     copy_mem16x16_fast_loop
+
+    ldmia       sp!, {r4 - r7}
+    ;pop        {r4-r7}
+    mov     pc, lr
+
+    ENDP  ; |vp8_copy_mem16x16_v6|
+
+    END
--- a/media/libvpx/vp8/common/arm/armv6/copymem8x4_v6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/copymem8x4_v6.asm
@ -0,0 +1,128 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_copy_mem8x4_v6|
+    ; ARM
+    ; REQUIRE8
+    ; PRESERVE8
+
+    AREA    Block, CODE, READONLY ; name this block of code
+;void vp8_copy_mem8x4_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
+;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+|vp8_copy_mem8x4_v6| PROC
+    ;push   {r4-r5}
+    stmdb  sp!, {r4-r5}
+
+    ;preload
+    pld     [r0]
+    pld     [r0, r1]
+    pld     [r0, r1, lsl #1]
+
+    ands    r4, r0, #7
+    beq     copy_mem8x4_fast
+
+    ands    r4, r0, #3
+    beq     copy_mem8x4_4
+
+    ;copy 1 byte each time
+    ldrb    r4, [r0]
+    ldrb    r5, [r0, #1]
+
+    mov     r12, #4
+
+copy_mem8x4_1_loop
+    strb    r4, [r2]
+    strb    r5, [r2, #1]
+
+    ldrb    r4, [r0, #2]
+    ldrb    r5, [r0, #3]
+
+    subs    r12, r12, #1
+
+    strb    r4, [r2, #2]
+    strb    r5, [r2, #3]
+
+    ldrb    r4, [r0, #4]
+    ldrb    r5, [r0, #5]
+
+    strb    r4, [r2, #4]
+    strb    r5, [r2, #5]
+
+    ldrb    r4, [r0, #6]
+    ldrb    r5, [r0, #7]
+
+    add     r0, r0, r1
+
+    strb    r4, [r2, #6]
+    strb    r5, [r2, #7]
+
+    add     r2, r2, r3
+
+    ldrneb  r4, [r0]
+    ldrneb  r5, [r0, #1]
+
+    bne     copy_mem8x4_1_loop
+
+    ldmia       sp!, {r4 - r5}
+    ;pop        {r4-r5}
+    mov     pc, lr
+
+;copy 4 bytes each time
+copy_mem8x4_4
+    ldr     r4, [r0]
+    ldr     r5, [r0, #4]
+
+    mov     r12, #4
+
+copy_mem8x4_4_loop
+    subs    r12, r12, #1
+    add     r0, r0, r1
+
+    str     r4, [r2]
+    str     r5, [r2, #4]
+
+    add     r2, r2, r3
+
+    ldrne   r4, [r0]
+    ldrne   r5, [r0, #4]
+
+    bne     copy_mem8x4_4_loop
+
+    ldmia  sp!, {r4-r5}
+    ;pop        {r4-r5}
+    mov     pc, lr
+
+;copy 8 bytes each time
+copy_mem8x4_fast
+    ;sub        r1, r1, #8
+    ;sub        r3, r3, #8
+
+    mov     r12, #4
+
+copy_mem8x4_fast_loop
+    ldmia   r0, {r4-r5}
+    ;ldm        r0, {r4-r5}
+    add     r0, r0, r1
+
+    subs    r12, r12, #1
+    stmia   r2, {r4-r5}
+    ;stm        r2, {r4-r5}
+    add     r2, r2, r3
+
+    bne     copy_mem8x4_fast_loop
+
+    ldmia  sp!, {r4-r5}
+    ;pop        {r4-r5}
+    mov     pc, lr
+
+    ENDP  ; |vp8_copy_mem8x4_v6|
+
+    END
--- a/media/libvpx/vp8/common/arm/armv6/copymem8x8_v6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/copymem8x8_v6.asm
@ -0,0 +1,128 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_copy_mem8x8_v6|
+    ; ARM
+    ; REQUIRE8
+    ; PRESERVE8
+
+    AREA    Block, CODE, READONLY ; name this block of code
+;void copy_mem8x8_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
+;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+|vp8_copy_mem8x8_v6| PROC
+    ;push   {r4-r5}
+    stmdb  sp!, {r4-r5}
+
+    ;preload
+    pld     [r0]
+    pld     [r0, r1]
+    pld     [r0, r1, lsl #1]
+
+    ands    r4, r0, #7
+    beq     copy_mem8x8_fast
+
+    ands    r4, r0, #3
+    beq     copy_mem8x8_4
+
+    ;copy 1 byte each time
+    ldrb    r4, [r0]
+    ldrb    r5, [r0, #1]
+
+    mov     r12, #8
+
+copy_mem8x8_1_loop
+    strb    r4, [r2]
+    strb    r5, [r2, #1]
+
+    ldrb    r4, [r0, #2]
+    ldrb    r5, [r0, #3]
+
+    subs    r12, r12, #1
+
+    strb    r4, [r2, #2]
+    strb    r5, [r2, #3]
+
+    ldrb    r4, [r0, #4]
+    ldrb    r5, [r0, #5]
+
+    strb    r4, [r2, #4]
+    strb    r5, [r2, #5]
+
+    ldrb    r4, [r0, #6]
+    ldrb    r5, [r0, #7]
+
+    add     r0, r0, r1
+
+    strb    r4, [r2, #6]
+    strb    r5, [r2, #7]
+
+    add     r2, r2, r3
+
+    ldrneb  r4, [r0]
+    ldrneb  r5, [r0, #1]
+
+    bne     copy_mem8x8_1_loop
+
+    ldmia       sp!, {r4 - r5}
+    ;pop        {r4-r5}
+    mov     pc, lr
+
+;copy 4 bytes each time
+copy_mem8x8_4
+    ldr     r4, [r0]
+    ldr     r5, [r0, #4]
+
+    mov     r12, #8
+
+copy_mem8x8_4_loop
+    subs    r12, r12, #1
+    add     r0, r0, r1
+
+    str     r4, [r2]
+    str     r5, [r2, #4]
+
+    add     r2, r2, r3
+
+    ldrne   r4, [r0]
+    ldrne   r5, [r0, #4]
+
+    bne     copy_mem8x8_4_loop
+
+    ldmia       sp!, {r4 - r5}
+    ;pop        {r4-r5}
+    mov     pc, lr
+
+;copy 8 bytes each time
+copy_mem8x8_fast
+    ;sub        r1, r1, #8
+    ;sub        r3, r3, #8
+
+    mov     r12, #8
+
+copy_mem8x8_fast_loop
+    ldmia   r0, {r4-r5}
+    ;ldm        r0, {r4-r5}
+    add     r0, r0, r1
+
+    subs    r12, r12, #1
+    stmia   r2, {r4-r5}
+    ;stm        r2, {r4-r5}
+    add     r2, r2, r3
+
+    bne     copy_mem8x8_fast_loop
+
+    ldmia  sp!, {r4-r5}
+    ;pop        {r4-r5}
+    mov     pc, lr
+
+    ENDP  ; |vp8_copy_mem8x8_v6|
+
+    END
--- a/media/libvpx/vp8/common/arm/armv6/dc_only_idct_add_v6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/dc_only_idct_add_v6.asm
@ -0,0 +1,67 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license and patent
+;  grant that can be found in the LICENSE file in the root of the source
+;  tree. All contributing project authors may be found in the AUTHORS
+;  file in the root of the source tree.
+;
+
+    EXPORT  |vp8_dc_only_idct_add_v6|
+
+    AREA    |.text|, CODE, READONLY
+
+;void vp8_dc_only_idct_add_v6(short input_dc, unsigned char *pred_ptr,
+;                             unsigned char *dst_ptr, int pitch, int stride)
+; r0  input_dc
+; r1  pred_ptr
+; r2  dest_ptr
+; r3  pitch
+; sp  stride
+
+|vp8_dc_only_idct_add_v6| PROC
+    stmdb       sp!, {r4 - r7, lr}
+
+    add         r0, r0, #4                ; input_dc += 4
+    ldr         r12, c0x0000FFFF
+    ldr         r4, [r1], r3
+    ldr         r6, [r1], r3
+    and         r0, r12, r0, asr #3       ; input_dc >> 3 + mask
+    ldr         lr, [sp, #20]
+    orr         r0, r0, r0, lsl #16       ; a1 | a1
+
+    uxtab16     r5, r0, r4                ; a1+2 | a1+0
+    uxtab16     r4, r0, r4, ror #8        ; a1+3 | a1+1
+    uxtab16     r7, r0, r6
+    uxtab16     r6, r0, r6, ror #8
+    usat16      r5, #8, r5
+    usat16      r4, #8, r4
+    usat16      r7, #8, r7
+    usat16      r6, #8, r6
+    orr         r5, r5, r4, lsl #8
+    orr         r7, r7, r6, lsl #8
+    ldr         r4, [r1], r3
+    ldr         r6, [r1]
+    str         r5, [r2], lr
+    str         r7, [r2], lr
+
+    uxtab16     r5, r0, r4
+    uxtab16     r4, r0, r4, ror #8
+    uxtab16     r7, r0, r6
+    uxtab16     r6, r0, r6, ror #8
+    usat16      r5, #8, r5
+    usat16      r4, #8, r4
+    usat16      r7, #8, r7
+    usat16      r6, #8, r6
+    orr         r5, r5, r4, lsl #8
+    orr         r7, r7, r6, lsl #8
+    str         r5, [r2], lr
+    str         r7, [r2]
+
+    ldmia       sp!, {r4 - r7, pc}
+
+    ENDP  ; |vp8_dc_only_idct_add_v6|
+
+; Constant Pool
+c0x0000FFFF DCD 0x0000FFFF
+    END
--- a/media/libvpx/vp8/common/arm/armv6/filter_v6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/filter_v6.asm
@ -0,0 +1,443 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_filter_block2d_first_pass_armv6|
+    EXPORT  |vp8_filter_block2d_second_pass_armv6|
+    EXPORT  |vp8_filter4_block2d_second_pass_armv6|
+    EXPORT  |vp8_filter_block2d_first_pass_only_armv6|
+    EXPORT  |vp8_filter_block2d_second_pass_only_armv6|
+
+    AREA    |.text|, CODE, READONLY  ; name this block of code
+;-------------------------------------
+; r0    unsigned char *src_ptr
+; r1    short         *output_ptr
+; r2    unsigned int src_pixels_per_line
+; r3    unsigned int output_width
+; stack unsigned int output_height
+; stack const short *vp8_filter
+;-------------------------------------
+; vp8_filter the input and put in the output array.  Apply the 6 tap FIR filter with
+; the output being a 2 byte value and the intput being a 1 byte value.
+|vp8_filter_block2d_first_pass_armv6| PROC
+    stmdb   sp!, {r4 - r11, lr}
+
+    ldr     r11, [sp, #40]                  ; vp8_filter address
+    ldr     r7, [sp, #36]                   ; output height
+
+    sub     r2, r2, r3                      ; inside loop increments input array,
+                                            ; so the height loop only needs to add
+                                            ; r2 - width to the input pointer
+
+    mov     r3, r3, lsl #1                  ; multiply width by 2 because using shorts
+    add     r12, r3, #16                    ; square off the output
+    sub     sp, sp, #4
+
+    ;;IF ARCHITECTURE=6
+    ;pld        [r0, #-2]
+    ;;pld       [r0, #30]
+    ;;ENDIF
+
+    ldr     r4, [r11]                       ; load up packed filter coefficients
+    ldr     r5, [r11, #4]
+    ldr     r6, [r11, #8]
+
+    str     r1, [sp]                        ; push destination to stack
+    mov     r7, r7, lsl #16                 ; height is top part of counter
+
+; six tap filter
+|height_loop_1st_6|
+    ldrb    r8, [r0, #-2]                   ; load source data
+    ldrb    r9, [r0, #-1]
+    ldrb    r10, [r0], #2
+    orr     r7, r7, r3, lsr #2              ; construct loop counter
+
+|width_loop_1st_6|
+    ldrb    r11, [r0, #-1]
+
+    pkhbt   lr, r8, r9, lsl #16             ; r9 | r8
+    pkhbt   r8, r9, r10, lsl #16            ; r10 | r9
+
+    ldrb    r9, [r0]
+
+    smuad   lr, lr, r4                      ; apply the filter
+    pkhbt   r10, r10, r11, lsl #16          ; r11 | r10
+    smuad   r8, r8, r4
+    pkhbt   r11, r11, r9, lsl #16           ; r9 | r11
+
+    smlad   lr, r10, r5, lr
+    ldrb    r10, [r0, #1]
+    smlad   r8, r11, r5, r8
+    ldrb    r11, [r0, #2]
+
+    sub     r7, r7, #1
+
+    pkhbt   r9, r9, r10, lsl #16            ; r10 | r9
+    pkhbt   r10, r10, r11, lsl #16          ; r11 | r10
+
+    smlad   lr, r9, r6, lr
+    smlad   r11, r10, r6, r8
+
+    ands    r10, r7, #0xff                  ; test loop counter
+
+    add     lr, lr, #0x40                   ; round_shift_and_clamp
+    ldrneb  r8, [r0, #-2]                   ; load data for next loop
+    usat    lr, #8, lr, asr #7
+    add     r11, r11, #0x40
+    ldrneb  r9, [r0, #-1]
+    usat    r11, #8, r11, asr #7
+
+    strh    lr, [r1], r12                   ; result is transposed and stored, which
+                                            ; will make second pass filtering easier.
+    ldrneb  r10, [r0], #2
+    strh    r11, [r1], r12
+
+    bne     width_loop_1st_6
+
+    ;;add       r9, r2, #30                 ; attempt to load 2 adjacent cache lines
+    ;;IF ARCHITECTURE=6
+    ;pld        [r0, r2]
+    ;;pld       [r0, r9]
+    ;;ENDIF
+
+    ldr     r1, [sp]                        ; load and update dst address
+    subs    r7, r7, #0x10000
+    add     r0, r0, r2                      ; move to next input line
+    add     r1, r1, #2                      ; move over to next column
+    str     r1, [sp]
+
+    bne     height_loop_1st_6
+
+    add     sp, sp, #4
+    ldmia   sp!, {r4 - r11, pc}
+
+    ENDP
+
+;---------------------------------
+; r0    short         *src_ptr,
+; r1    unsigned char *output_ptr,
+; r2    unsigned int output_pitch,
+; r3    unsigned int cnt,
+; stack const short *vp8_filter
+;---------------------------------
+|vp8_filter_block2d_second_pass_armv6| PROC
+    stmdb   sp!, {r4 - r11, lr}
+
+    ldr     r11, [sp, #36]                  ; vp8_filter address
+    sub     sp, sp, #4
+    mov     r7, r3, lsl #16                 ; height is top part of counter
+    str     r1, [sp]                        ; push destination to stack
+
+    ldr     r4, [r11]                       ; load up packed filter coefficients
+    ldr     r5, [r11, #4]
+    ldr     r6, [r11, #8]
+
+    pkhbt   r12, r5, r4                     ; pack the filter differently
+    pkhbt   r11, r6, r5
+
+    sub     r0, r0, #4                      ; offset input buffer
+
+|height_loop_2nd|
+    ldr     r8, [r0]                        ; load the data
+    ldr     r9, [r0, #4]
+    orr     r7, r7, r3, lsr #1              ; loop counter
+
+|width_loop_2nd|
+    smuad   lr, r4, r8                      ; apply filter
+    sub     r7, r7, #1
+    smulbt  r8, r4, r8
+
+    ldr     r10, [r0, #8]
+
+    smlad   lr, r5, r9, lr
+    smladx  r8, r12, r9, r8
+
+    ldrh    r9, [r0, #12]
+
+    smlad   lr, r6, r10, lr
+    smladx  r8, r11, r10, r8
+
+    add     r0, r0, #4
+    smlatb  r10, r6, r9, r8
+
+    add     lr, lr, #0x40                   ; round_shift_and_clamp
+    ands    r8, r7, #0xff
+    usat    lr, #8, lr, asr #7
+    add     r10, r10, #0x40
+    strb    lr, [r1], r2                    ; the result is transposed back and stored
+    usat    r10, #8, r10, asr #7
+
+    ldrne   r8, [r0]                        ; load data for next loop
+    ldrne   r9, [r0, #4]
+    strb    r10, [r1], r2
+
+    bne     width_loop_2nd
+
+    ldr     r1, [sp]                        ; update dst for next loop
+    subs    r7, r7, #0x10000
+    add     r0, r0, #16                     ; updata src for next loop
+    add     r1, r1, #1
+    str     r1, [sp]
+
+    bne     height_loop_2nd
+
+    add     sp, sp, #4
+    ldmia   sp!, {r4 - r11, pc}
+
+    ENDP
+
+;---------------------------------
+; r0    short         *src_ptr,
+; r1    unsigned char *output_ptr,
+; r2    unsigned int output_pitch,
+; r3    unsigned int cnt,
+; stack const short *vp8_filter
+;---------------------------------
+|vp8_filter4_block2d_second_pass_armv6| PROC
+    stmdb   sp!, {r4 - r11, lr}
+
+    ldr     r11, [sp, #36]                  ; vp8_filter address
+    mov     r7, r3, lsl #16                 ; height is top part of counter
+
+    ldr     r4, [r11]                       ; load up packed filter coefficients
+    add     lr, r1, r3                      ; save final destination pointer
+    ldr     r5, [r11, #4]
+    ldr     r6, [r11, #8]
+
+    pkhbt   r12, r5, r4                     ; pack the filter differently
+    pkhbt   r11, r6, r5
+    mov     r4, #0x40                       ; rounding factor (for smlad{x})
+
+|height_loop_2nd_4|
+    ldrd    r8, [r0, #-4]                   ; load the data
+    orr     r7, r7, r3, lsr #1              ; loop counter
+
+|width_loop_2nd_4|
+    ldr     r10, [r0, #4]!
+    smladx  r6, r9, r12, r4                 ; apply filter
+    pkhbt   r8, r9, r8
+    smlad   r5, r8, r12, r4
+    pkhbt   r8, r10, r9
+    smladx  r6, r10, r11, r6
+    sub     r7, r7, #1
+    smlad   r5, r8, r11, r5
+
+    mov     r8, r9                          ; shift the data for the next loop
+    mov     r9, r10
+
+    usat    r6, #8, r6, asr #7              ; shift and clamp
+    usat    r5, #8, r5, asr #7
+
+    strb    r5, [r1], r2                    ; the result is transposed back and stored
+    tst     r7, #0xff
+    strb    r6, [r1], r2
+
+    bne     width_loop_2nd_4
+
+    subs    r7, r7, #0x10000
+    add     r0, r0, #16                     ; update src for next loop
+    sub     r1, lr, r7, lsr #16             ; update dst for next loop
+
+    bne     height_loop_2nd_4
+
+    ldmia   sp!, {r4 - r11, pc}
+
+    ENDP
+
+;------------------------------------
+; r0    unsigned char *src_ptr
+; r1    unsigned char *output_ptr,
+; r2    unsigned int src_pixels_per_line
+; r3    unsigned int cnt,
+; stack unsigned int output_pitch,
+; stack const short *vp8_filter
+;------------------------------------
+|vp8_filter_block2d_first_pass_only_armv6| PROC
+    stmdb   sp!, {r4 - r11, lr}
+
+    ldr     r4, [sp, #36]                   ; output pitch
+    ldr     r11, [sp, #40]                  ; HFilter address
+    sub     sp, sp, #8
+
+    mov     r7, r3
+    sub     r2, r2, r3                      ; inside loop increments input array,
+                                            ; so the height loop only needs to add
+                                            ; r2 - width to the input pointer
+
+    sub     r4, r4, r3
+    str     r4, [sp]                        ; save modified output pitch
+    str     r2, [sp, #4]
+
+    mov     r2, #0x40
+
+    ldr     r4, [r11]                       ; load up packed filter coefficients
+    ldr     r5, [r11, #4]
+    ldr     r6, [r11, #8]
+
+; six tap filter
+|height_loop_1st_only_6|
+    ldrb    r8, [r0, #-2]                   ; load data
+    ldrb    r9, [r0, #-1]
+    ldrb    r10, [r0], #2
+
+    mov     r12, r3, lsr #1                 ; loop counter
+
+|width_loop_1st_only_6|
+    ldrb    r11, [r0, #-1]
+
+    pkhbt   lr, r8, r9, lsl #16             ; r9 | r8
+    pkhbt   r8, r9, r10, lsl #16            ; r10 | r9
+
+    ldrb    r9, [r0]
+
+;;  smuad   lr, lr, r4
+    smlad   lr, lr, r4, r2
+    pkhbt   r10, r10, r11, lsl #16          ; r11 | r10
+;;  smuad   r8, r8, r4
+    smlad   r8, r8, r4, r2
+    pkhbt   r11, r11, r9, lsl #16           ; r9 | r11
+
+    smlad   lr, r10, r5, lr
+    ldrb    r10, [r0, #1]
+    smlad   r8, r11, r5, r8
+    ldrb    r11, [r0, #2]
+
+    subs    r12, r12, #1
+
+    pkhbt   r9, r9, r10, lsl #16            ; r10 | r9
+    pkhbt   r10, r10, r11, lsl #16          ; r11 | r10
+
+    smlad   lr, r9, r6, lr
+    smlad   r10, r10, r6, r8
+
+;;  add     lr, lr, #0x40                   ; round_shift_and_clamp
+    ldrneb  r8, [r0, #-2]                   ; load data for next loop
+    usat    lr, #8, lr, asr #7
+;;  add     r10, r10, #0x40
+    strb    lr, [r1], #1                    ; store the result
+    usat    r10, #8, r10, asr #7
+
+    ldrneb  r9, [r0, #-1]
+    strb    r10, [r1], #1
+    ldrneb  r10, [r0], #2
+
+    bne     width_loop_1st_only_6
+
+    ;;add       r9, r2, #30                 ; attempt to load 2 adjacent cache lines
+    ;;IF ARCHITECTURE=6
+    ;pld        [r0, r2]
+    ;;pld       [r0, r9]
+    ;;ENDIF
+
+    ldr     lr, [sp]                        ; load back output pitch
+    ldr     r12, [sp, #4]                   ; load back output pitch
+    subs    r7, r7, #1
+    add     r0, r0, r12                     ; updata src for next loop
+    add     r1, r1, lr                      ; update dst for next loop
+
+    bne     height_loop_1st_only_6
+
+    add     sp, sp, #8
+    ldmia   sp!, {r4 - r11, pc}
+    ENDP  ; |vp8_filter_block2d_first_pass_only_armv6|
+
+
+;------------------------------------
+; r0    unsigned char *src_ptr,
+; r1    unsigned char *output_ptr,
+; r2    unsigned int src_pixels_per_line
+; r3    unsigned int cnt,
+; stack unsigned int output_pitch,
+; stack const short *vp8_filter
+;------------------------------------
+|vp8_filter_block2d_second_pass_only_armv6| PROC
+    stmdb   sp!, {r4 - r11, lr}
+
+    ldr     r11, [sp, #40]                  ; VFilter address
+    ldr     r12, [sp, #36]                  ; output pitch
+
+    mov     r7, r3, lsl #16                 ; height is top part of counter
+    sub     r0, r0, r2, lsl #1              ; need 6 elements for filtering, 2 before, 3 after
+
+    sub     sp, sp, #8
+
+    ldr     r4, [r11]                       ; load up packed filter coefficients
+    ldr     r5, [r11, #4]
+    ldr     r6, [r11, #8]
+
+    str     r0, [sp]                        ; save r0 to stack
+    str     r1, [sp, #4]                    ; save dst to stack
+
+; six tap filter
+|width_loop_2nd_only_6|
+    ldrb    r8, [r0], r2                    ; load data
+    orr     r7, r7, r3                      ; loop counter
+    ldrb    r9, [r0], r2
+    ldrb    r10, [r0], r2
+
+|height_loop_2nd_only_6|
+    ; filter first column in this inner loop, than, move to next colum.
+    ldrb    r11, [r0], r2
+
+    pkhbt   lr, r8, r9, lsl #16             ; r9 | r8
+    pkhbt   r8, r9, r10, lsl #16            ; r10 | r9
+
+    ldrb    r9, [r0], r2
+
+    smuad   lr, lr, r4
+    pkhbt   r10, r10, r11, lsl #16          ; r11 | r10
+    smuad   r8, r8, r4
+    pkhbt   r11, r11, r9, lsl #16           ; r9 | r11
+
+    smlad   lr, r10, r5, lr
+    ldrb    r10, [r0], r2
+    smlad   r8, r11, r5, r8
+    ldrb    r11, [r0]
+
+    sub     r7, r7, #2
+    sub     r0, r0, r2, lsl #2
+
+    pkhbt   r9, r9, r10, lsl #16            ; r10 | r9
+    pkhbt   r10, r10, r11, lsl #16          ; r11 | r10
+
+    smlad   lr, r9, r6, lr
+    smlad   r10, r10, r6, r8
+
+    ands    r9, r7, #0xff
+
+    add     lr, lr, #0x40                   ; round_shift_and_clamp
+    ldrneb  r8, [r0], r2                    ; load data for next loop
+    usat    lr, #8, lr, asr #7
+    add     r10, r10, #0x40
+    strb    lr, [r1], r12                   ; store the result for the column
+    usat    r10, #8, r10, asr #7
+
+    ldrneb  r9, [r0], r2
+    strb    r10, [r1], r12
+    ldrneb  r10, [r0], r2
+
+    bne     height_loop_2nd_only_6
+
+    ldr     r0, [sp]
+    ldr     r1, [sp, #4]
+    subs    r7, r7, #0x10000
+    add     r0, r0, #1                      ; move to filter next column
+    str     r0, [sp]
+    add     r1, r1, #1
+    str     r1, [sp, #4]
+
+    bne     width_loop_2nd_only_6
+
+    add     sp, sp, #8
+
+    ldmia   sp!, {r4 - r11, pc}
+    ENDP  ; |vp8_filter_block2d_second_pass_only_armv6|
+
+    END
--- a/media/libvpx/vp8/common/arm/armv6/idct_v6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/idct_v6.asm
@ -0,0 +1,345 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+;                   r0  r1  r2  r3  r4  r5  r6  r7  r8  r9  r10 r11 r12     r14
+    EXPORT  |vp8_short_idct4x4llm_1_v6|
+    EXPORT  |vp8_short_idct4x4llm_v6|
+    EXPORT  |vp8_short_idct4x4llm_v6_scott|
+    EXPORT  |vp8_short_idct4x4llm_v6_dual|
+
+    AREA    |.text|, CODE, READONLY
+
+;********************************************************************************
+;*  void short_idct4x4llm_1_v6(INT16 * input, INT16 * output, INT32 pitch)
+;*      r0  INT16 * input
+;*      r1  INT16 * output
+;*      r2  INT32 pitch
+;*  bench:  3/5
+;********************************************************************************
+
+|vp8_short_idct4x4llm_1_v6| PROC         ;   cycles  in  out pit
+            ;
+    ldrsh   r0, [r0]    ; load input[0] 1, r0 un 2
+    add r0, r0, #4  ;   1   +4
+    stmdb   sp!, {r4, r5, lr}   ; make room for wide writes 1                   backup
+    mov r0, r0, asr #3  ; (input[0] + 4) >> 3   1, r0 req`d ^1  >> 3
+    pkhbt   r4, r0, r0, lsl #16 ; pack r0 into r4   1, r0 req`d ^1                  pack
+    mov r5, r4  ; expand                        expand
+
+    strd    r4, [r1], r2    ; *output = r0, post inc    1
+    strd    r4, [r1], r2    ;   1
+    strd    r4, [r1], r2    ;   1
+    strd    r4, [r1]    ;   1
+            ;
+    ldmia   sp!, {r4, r5, pc}   ; replace vars, return                      restore
+    ENDP        ; |vp8_short_idct4x4llm_1_v6|
+;********************************************************************************
+;********************************************************************************
+;********************************************************************************
+
+;********************************************************************************
+;*  void short_idct4x4llm_v6(INT16 * input, INT16 * output, INT32 pitch)
+;*      r0  INT16 * input
+;*      r1  INT16 * output
+;*      r2  INT32 pitch
+;*  bench:
+;********************************************************************************
+
+|vp8_short_idct4x4llm_v6| PROC           ;   cycles  in  out pit
+            ;
+    stmdb   sp!, {r4-r11, lr}   ; backup registers  1                   backup
+            ;
+    mov r4, #0x00004E00 ;   1                   cst
+    orr r4, r4, #0x0000007B ; cospi8sqrt2minus1
+    mov r5, #0x00008A00 ;   1                       cst
+    orr r5, r5, #0x0000008C ; sinpi8sqrt2
+            ;
+    mov r6, #4  ; i=4   1                           i
+loop1           ;
+    ldrsh   r12, [r0, #8]   ; input[4]  1, r12 unavail 2                                                    [4]
+    ldrsh   r3, [r0, #24]   ; input[12] 1, r3 unavail 2             [12]
+    ldrsh   r8, [r0, #16]   ; input[8]  1, r8 unavail 2                                 [8]
+    ldrsh   r7, [r0], #0x2  ; input[0]  1, r7 unavail 2 ++                          [0]
+    smulwb  r10, r5, r12    ; ([4] * sinpi8sqrt2) >> 16 1, r10 un 2, r12/r5 ^1                                          t1
+    smulwb  r11, r4, r3 ; ([12] * cospi8sqrt2minus1) >> 16  1, r11 un 2, r3/r4 ^1                                               t2
+    add r9, r7, r8  ; a1 = [0] + [8]    1                                       a1
+    sub r7, r7, r8  ; b1 = [0] - [8]    1                               b1
+    add r11, r3, r11    ; temp2 1
+    rsb r11, r11, r10   ; c1 = temp1 - temp2    1                                               c1
+    smulwb  r3, r5, r3  ; ([12] * sinpi8sqrt2) >> 16    1, r3 un 2, r3/r5 ^ 1               t2
+    smulwb  r10, r4, r12    ; ([4] * cospi8sqrt2minus1) >> 16   1, r10 un 2, r12/r4 ^1                                          t1
+    add r8, r7, r11 ; b1 + c1   1                                   b+c
+    strh    r8, [r1, r2]    ; out[pitch] = b1+c1    1
+    sub r7, r7, r11 ; b1 - c1   1                               b-c
+    add r10, r12, r10   ; temp1 1
+    add r3, r10, r3 ; d1 = temp1 + temp2    1               d1
+    add r10, r9, r3 ; a1 + d1   1                                           a+d
+    sub r3, r9, r3  ; a1 - d1   1               a-d
+    add r8, r2, r2  ; pitch * 2 1                                   p*2
+    strh    r7, [r1, r8]    ; out[pitch*2] = b1-c1  1
+    add r7, r2, r2, lsl #1  ; pitch * 3 1                               p*3
+    strh    r3, [r1, r7]    ; out[pitch*3] = a1-d1  1
+    subs    r6, r6, #1  ; i--   1                           --
+    strh    r10, [r1], #0x2 ; out[0] = a1+d1    1       ++
+    bne loop1   ; if i>0, continue
+            ;
+    sub r1, r1, #8  ; set up out for next loop  1       -4
+            ; for this iteration, input=prev output
+    mov r6, #4  ; i=4   1                           i
+;   b   returnfull
+loop2           ;
+    ldrsh   r11, [r1, #2]   ; input[1]  1, r11 un 2                                             [1]
+    ldrsh   r8, [r1, #6]    ; input[3]  1, r8 un 2                                  [3]
+    ldrsh   r3, [r1, #4]    ; input[2]  1, r3 un 2              [2]
+    ldrsh   r0, [r1]    ; input[0]  1, r0 un 2  [0]
+    smulwb  r9, r5, r11 ; ([1] * sinpi8sqrt2) >> 16 1, r9 un 2, r5/r11 ^1                                       t1
+    smulwb  r10, r4, r8 ; ([3] * cospi8sqrt2minus1) >> 16   1, r10 un 2, r4/r8 ^1                                           t2
+    add r7, r0, r3  ; a1 = [0] + [2]    1                               a1
+    sub r0, r0, r3  ; b1 = [0] - [2]    1   b1
+    add r10, r8, r10    ; temp2 1
+    rsb r9, r10, r9 ; c1 = temp1 - temp2    1                                       c1
+    smulwb  r8, r5, r8  ; ([3] * sinpi8sqrt2) >> 16 1, r8 un 2, r5/r8 ^1                                    t2
+    smulwb  r10, r4, r11    ; ([1] * cospi8sqrt2minus1) >> 16   1, r10 un 2, r4/r11 ^1                                          t1
+    add r3, r0, r9  ; b1+c1 1               b+c
+    add r3, r3, #4  ; b1+c1+4   1               +4
+    add r10, r11, r10   ; temp1 1
+    mov r3, r3, asr #3  ; b1+c1+4 >> 3  1, r3 ^1                >>3
+    strh    r3, [r1, #2]    ; out[1] = b1+c1    1
+    add r10, r10, r8    ; d1 = temp1 + temp2    1                                           d1
+    add r3, r7, r10 ; a1+d1 1               a+d
+    add r3, r3, #4  ; a1+d1+4   1               +4
+    sub r7, r7, r10 ; a1-d1 1                               a-d
+    add r7, r7, #4  ; a1-d1+4   1                               +4
+    mov r3, r3, asr #3  ; a1+d1+4 >> 3  1, r3 ^1                >>3
+    mov r7, r7, asr #3  ; a1-d1+4 >> 3  1, r7 ^1                                >>3
+    strh    r7, [r1, #6]    ; out[3] = a1-d1    1
+    sub r0, r0, r9  ; b1-c1 1   b-c
+    add r0, r0, #4  ; b1-c1+4   1   +4
+    subs    r6, r6, #1  ; i--   1                           --
+    mov r0, r0, asr #3  ; b1-c1+4 >> 3  1, r0 ^1    >>3
+    strh    r0, [r1, #4]    ; out[2] = b1-c1    1
+    strh    r3, [r1], r2    ; out[0] = a1+d1    1
+;   add r1, r1, r2  ; out += pitch  1       ++
+    bne loop2   ; if i>0, continue
+returnfull          ;
+    ldmia   sp!, {r4 - r11, pc} ; replace vars, return                      restore
+    ENDP
+
+;********************************************************************************
+;********************************************************************************
+;********************************************************************************
+
+;********************************************************************************
+;*  void short_idct4x4llm_v6_scott(INT16 * input, INT16 * output, INT32 pitch)
+;*      r0  INT16 * input
+;*      r1  INT16 * output
+;*      r2  INT32 pitch
+;*  bench:
+;********************************************************************************
+
+|vp8_short_idct4x4llm_v6_scott| PROC         ;   cycles  in  out pit
+;   mov r0, #0  ;
+;   ldr r0, [r0]    ;
+    stmdb   sp!, {r4 - r11, lr} ; backup registers  1                   backup
+            ;
+    mov r3, #0x00004E00 ;                   cos
+    orr r3, r3, #0x0000007B ; cospi8sqrt2minus1
+    mov r4, #0x00008A00 ;                       sin
+    orr r4, r4, #0x0000008C ; sinpi8sqrt2
+            ;
+    mov r5, #0x2    ; i                         i
+            ;
+short_idct4x4llm_v6_scott_loop1          ;
+    ldr r10, [r0, #(4*2)]   ; i5 | i4                                               5,4
+    ldr r11, [r0, #(12*2)]  ; i13 | i12                                                 13,12
+            ;
+    smulwb  r6, r4, r10 ; ((ip[4] * sinpi8sqrt2) >> 16)                             lt1
+    smulwb  r7, r3, r11 ; ((ip[12] * cospi8sqrt2minus1) >> 16)                                  lt2
+            ;
+    smulwb  r12, r3, r10    ; ((ip[4] * cospi8sqrt2misu1) >> 16)                                                        l2t2
+    smulwb  r14, r4, r11    ; ((ip[12] * sinpi8sqrt2) >> 16)                                                                l2t1
+            ;
+    add r6, r6, r7  ; partial c1                                lt1-lt2
+    add r12, r12, r14   ; partial d1                                                        l2t2+l2t1
+            ;
+    smulwt  r14, r4, r10    ; ((ip[5] * sinpi8sqrt2) >> 16)                                                             ht1
+    smulwt  r7, r3, r11 ; ((ip[13] * cospi8sqrt2minus1) >> 16)                                  ht2
+            ;
+    smulwt  r8, r3, r10 ; ((ip[5] * cospi8sqrt2minus1) >> 16)                                       h2t1
+    smulwt  r9, r4, r11 ; ((ip[13] * sinpi8sqrt2) >> 16)                                            h2t2
+            ;
+    add r7, r14, r7 ; partial c1_2                                  ht1+ht2
+    sub r8, r8, r9  ; partial d1_2                                      h2t1-h2t2
+            ;
+    pkhbt   r6, r6, r7, lsl #16 ; partial c1_2 | partial c1_1                               pack
+    pkhbt   r12, r12, r8, lsl #16   ; partial d1_2 | partial d1_1                                                       pack
+            ;
+    usub16  r6, r6, r10 ; c1_2 | c1_1                               c
+    uadd16  r12, r12, r11   ; d1_2 | d1_1                                                       d
+            ;
+    ldr r10, [r0, #0]   ; i1 | i0                                               1,0
+    ldr r11, [r0, #(8*2)]   ; i9 | i10                                                  9,10
+            ;
+;;;;;;  add r0, r0, #0x4    ;       +4
+;;;;;;  add r1, r1, #0x4    ;           +4
+            ;
+    uadd16  r8, r10, r11    ; i1 + i9 | i0 + i8 aka a1                                      a
+    usub16  r9, r10, r11    ; i1 - i9 | i0 - i8 aka b1                                          b
+            ;
+    uadd16  r7, r8, r12 ; a1 + d1 pair                                  a+d
+    usub16  r14, r8, r12    ; a1 - d1 pair                                                              a-d
+            ;
+    str r7, [r1]    ; op[0] = a1 + d1
+    str r14, [r1, r2]   ; op[pitch*3] = a1 - d1
+            ;
+    add r0, r0, #0x4    ; op[pitch] = b1 + c1       ++
+    add r1, r1, #0x4    ; op[pitch*2] = b1 - c1         ++
+            ;
+    subs    r5, r5, #0x1    ;                           --
+    bne short_idct4x4llm_v6_scott_loop1  ;
+            ;
+    sub r1, r1, #16 ; reset output ptr
+    mov r5, #0x4    ;
+    mov r0, r1  ; input = output
+            ;
+short_idct4x4llm_v6_scott_loop2          ;
+            ;
+    subs    r5, r5, #0x1    ;
+    bne short_idct4x4llm_v6_scott_loop2  ;
+            ;
+    ldmia   sp!, {r4 - r11, pc} ;
+    ENDP        ;
+            ;
+;********************************************************************************
+;********************************************************************************
+;********************************************************************************
+
+;********************************************************************************
+;*  void short_idct4x4llm_v6_dual(INT16 * input, INT16 * output, INT32 pitch)
+;*      r0  INT16 * input
+;*      r1  INT16 * output
+;*      r2  INT32 pitch
+;*  bench:
+;********************************************************************************
+
+|vp8_short_idct4x4llm_v6_dual| PROC          ;   cycles  in  out pit
+            ;
+    stmdb   sp!, {r4-r11, lr}   ; backup registers  1                   backup
+    mov r3, #0x00004E00 ;                   cos
+    orr r3, r3, #0x0000007B ; cospi8sqrt2minus1
+    mov r4, #0x00008A00 ;                       sin
+    orr r4, r4, #0x0000008C ; sinpi8sqrt2
+    mov r5, #0x2    ; i=2                           i
+loop1_dual
+    ldr r6, [r0, #(4*2)]    ; i5 | i4                               5|4
+    ldr r12, [r0, #(12*2)]  ; i13 | i12                                                     13|12
+    ldr r14, [r0, #(8*2)]   ; i9 | i8                                                               9|8
+
+    smulwt  r9, r3, r6  ; (ip[5] * cospi8sqrt2minus1) >> 16                                         5c
+    smulwb  r7, r3, r6  ; (ip[4] * cospi8sqrt2minus1) >> 16                                 4c
+    smulwt  r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16                                               5s
+    smulwb  r8, r4, r6  ; (ip[4] * sinpi8sqrt2) >> 16                                       4s
+    pkhbt   r7, r7, r9, lsl #16 ; 5c | 4c
+    smulwt  r11, r3, r12    ; (ip[13] * cospi8sqrt2minus1) >> 16                                                    13c
+    pkhbt   r8, r8, r10, lsl #16    ; 5s | 4s
+    uadd16  r6, r6, r7  ; 5c+5 | 4c+4
+    smulwt  r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16                                  13s
+    smulwb  r9, r3, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16                                            12c
+    smulwb  r10, r4, r12    ; (ip[12] * sinpi8sqrt2) >> 16                                              12s
+    subs    r5, r5, #0x1    ; i--                           --
+    pkhbt   r9, r9, r11, lsl #16    ; 13c | 12c
+    ldr r11, [r0], #0x4 ; i1 | i0       ++                                          1|0
+    pkhbt   r10, r10, r7, lsl #16   ; 13s | 12s
+    uadd16  r7, r12, r9 ; 13c+13 | 12c+12
+    usub16  r7, r8, r7  ; c                                 c
+    uadd16  r6, r6, r10 ; d                             d
+    uadd16  r10, r11, r14   ; a                                             a
+    usub16  r8, r11, r14    ; b                                     b
+    uadd16  r9, r10, r6 ; a+d                                           a+d
+    usub16  r10, r10, r6    ; a-d                                               a-d
+    uadd16  r6, r8, r7  ; b+c                               b+c
+    usub16  r7, r8, r7  ; b-c                                   b-c
+    str r6, [r1, r2]    ; o5 | o4
+    add r6, r2, r2  ; pitch * 2                             p2
+    str r7, [r1, r6]    ; o9 | o8
+    add r6,  r6, r2 ; pitch * 3                             p3
+    str r10, [r1, r6]   ; o13 | o12
+    str r9, [r1], #0x4  ; o1 | o0           ++
+    bne loop1_dual  ;
+    mov r5, #0x2    ; i=2                           i
+    sub r0, r1, #8  ; reset input/output        i/o
+loop2_dual
+    ldr r6, [r0, r2]    ; i5 | i4                               5|4
+    ldr r1, [r0]    ; i1 | i0           1|0
+    ldr r12, [r0, #0x4] ; i3 | i2                                                       3|2
+    add r14, r2, #0x4   ; pitch + 2                                                             p+2
+    ldr r14, [r0, r14]  ; i7 | i6                                                               7|6
+    smulwt  r9, r3, r6  ; (ip[5] * cospi8sqrt2minus1) >> 16                                         5c
+    smulwt  r7, r3, r1  ; (ip[1] * cospi8sqrt2minus1) >> 16                                 1c
+    smulwt  r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16                                               5s
+    smulwt  r8, r4, r1  ; (ip[1] * sinpi8sqrt2) >> 16                                       1s
+    pkhbt   r11, r6, r1, lsl #16    ; i0 | i4                                                   0|4
+    pkhbt   r7, r9, r7, lsl #16 ; 1c | 5c
+    pkhbt   r8, r10, r8, lsl #16    ; 1s | 5s = temp1 ©                                     tc1
+    pkhtb   r1, r1, r6, asr #16 ; i1 | i5           1|5
+    uadd16  r1, r7, r1  ; 1c+1 | 5c+5 = temp2 (d)           td2
+    pkhbt   r9, r14, r12, lsl #16   ; i2 | i6                                           2|6
+    uadd16  r10, r11, r9    ; a                                             a
+    usub16  r9, r11, r9 ; b                                         b
+    pkhtb   r6, r12, r14, asr #16   ; i3 | i7                               3|7
+    subs    r5, r5, #0x1    ; i--                           --
+    smulwt  r7, r3, r6  ; (ip[3] * cospi8sqrt2minus1) >> 16                                 3c
+    smulwt  r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16                                                   3s
+    smulwb  r12, r3, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16                                                     7c
+    smulwb  r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16                                                               7s
+
+    pkhbt   r7, r12, r7, lsl #16    ; 3c | 7c
+    pkhbt   r11, r14, r11, lsl #16  ; 3s | 7s = temp1 (d)                                                   td1
+    uadd16  r6, r7, r6  ; 3c+3 | 7c+7 = temp2  (c)                              tc2
+    usub16  r12, r8, r6 ; c (o1 | o5)                                                       c
+    uadd16  r6, r11, r1 ; d (o3 | o7)                               d
+    uadd16  r7, r10, r6 ; a+d                                   a+d
+    mov r8, #0x4    ; set up 4's                                        4
+    orr r8, r8, #0x40000    ;                                       4|4
+    usub16  r6, r10, r6 ; a-d                               a-d
+    uadd16  r6, r6, r8  ; a-d+4                             3|7
+    uadd16  r7, r7, r8  ; a+d+4                                 0|4
+    uadd16  r10, r9, r12    ; b+c                                               b+c
+    usub16  r1, r9, r12 ; b-c           b-c
+    uadd16  r10, r10, r8    ; b+c+4                                             1|5
+    uadd16  r1, r1, r8  ; b-c+4         2|6
+    mov r8, r10, asr #19    ; o1 >> 3
+    strh    r8, [r0, #2]    ; o1
+    mov r8, r1, asr #19 ; o2 >> 3
+    strh    r8, [r0, #4]    ; o2
+    mov r8, r6, asr #19 ; o3 >> 3
+    strh    r8, [r0, #6]    ; o3
+    mov r8, r7, asr #19 ; o0 >> 3
+    strh    r8, [r0], r2    ; o0        +p
+    sxth    r10, r10    ;
+    mov r8, r10, asr #3 ; o5 >> 3
+    strh    r8, [r0, #2]    ; o5
+    sxth    r1, r1  ;
+    mov r8, r1, asr #3  ; o6 >> 3
+    strh    r8, [r0, #4]    ; o6
+    sxth    r6, r6  ;
+    mov r8, r6, asr #3  ; o7 >> 3
+    strh    r8, [r0, #6]    ; o7
+    sxth    r7, r7  ;
+    mov r8, r7, asr #3  ; o4 >> 3
+    strh    r8, [r0], r2    ; o4        +p
+;;;;;   subs    r5, r5, #0x1    ; i--                           --
+    bne loop2_dual  ;
+            ;
+    ldmia   sp!, {r4 - r11, pc} ; replace vars, return                      restore
+    ENDP
+
+    END
--- a/media/libvpx/vp8/common/arm/armv6/iwalsh_v6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/iwalsh_v6.asm
@ -0,0 +1,152 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+    EXPORT |vp8_short_inv_walsh4x4_v6|
+    EXPORT |vp8_short_inv_walsh4x4_1_v6|
+
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA    |.text|, CODE, READONLY  ; name this block of code
+
+;short vp8_short_inv_walsh4x4_v6(short *input, short *output)
+|vp8_short_inv_walsh4x4_v6| PROC
+
+    stmdb       sp!, {r4 - r11, lr}
+
+    ldr         r2, [r0], #4         ; [1  |  0]
+    ldr         r3, [r0], #4         ; [3  |  2]
+    ldr         r4, [r0], #4         ; [5  |  4]
+    ldr         r5, [r0], #4         ; [7  |  6]
+    ldr         r6, [r0], #4         ; [9  |  8]
+    ldr         r7, [r0], #4         ; [11 | 10]
+    ldr         r8, [r0], #4         ; [13 | 12]
+    ldr         r9, [r0]             ; [15 | 14]
+
+    qadd16      r10, r2, r8          ; a1 [1+13  |  0+12]
+    qadd16      r11, r4, r6          ; b1 [5+9   |  4+8]
+    qsub16      r12, r4, r6          ; c1 [5-9   |  4-8]
+    qsub16      lr, r2, r8           ; d1 [1-13  |  0-12]
+
+    qadd16      r2, r10, r11         ; a1 + b1 [1  |  0]
+    qadd16      r4, r12, lr          ; c1 + d1 [5  |  4]
+    qsub16      r6, r10, r11         ; a1 - b1 [9  |  8]
+    qsub16      r8, lr, r12          ; d1 - c1 [13 | 12]
+
+    qadd16      r10, r3, r9          ; a1 [3+15  |  2+14]
+    qadd16      r11, r5, r7          ; b1 [7+11  |  6+10]
+    qsub16      r12, r5, r7          ; c1 [7-11  |  6-10]
+    qsub16      lr, r3, r9           ; d1 [3-15  |  2-14]
+
+    qadd16      r3, r10, r11         ; a1 + b1 [3  |  2]
+    qadd16      r5, r12, lr          ; c1 + d1 [7  |  6]
+    qsub16      r7, r10, r11         ; a1 - b1 [11 | 10]
+    qsub16      r9, lr, r12          ; d1 - c1 [15 | 14]
+
+    ; first transform complete
+
+    qsubaddx    r10, r2, r3          ; [c1|a1] [1-2   |   0+3]
+    qaddsubx    r11, r2, r3          ; [b1|d1] [1+2   |   0-3]
+    qsubaddx    r12, r4, r5          ; [c1|a1] [5-6   |   4+7]
+    qaddsubx    lr, r4, r5           ; [b1|d1] [5+6   |   4-7]
+
+    qaddsubx    r2, r10, r11         ; [b2|c2] [c1+d1 | a1-b1]
+    qaddsubx    r3, r11, r10         ; [a2|d2] [b1+a1 | d1-c1]
+    ldr         r10, c0x00030003
+    qaddsubx    r4, r12, lr          ; [b2|c2] [c1+d1 | a1-b1]
+    qaddsubx    r5, lr, r12          ; [a2|d2] [b1+a1 | d1-c1]
+
+    qadd16      r2, r2, r10          ; [b2+3|c2+3]
+    qadd16      r3, r3, r10          ; [a2+3|d2+3]
+    qadd16      r4, r4, r10          ; [b2+3|c2+3]
+    qadd16      r5, r5, r10          ; [a2+3|d2+3]
+
+    asr         r12, r2, #3          ; [1  |  x]
+    pkhtb       r12, r12, r3, asr #19; [1  |  0]
+    lsl         lr, r3, #16          ; [~3 |  x]
+    lsl         r2, r2, #16          ; [~2 |  x]
+    asr         lr, lr, #3           ; [3  |  x]
+    pkhtb       lr, lr, r2, asr #19  ; [3  |  2]
+
+    asr         r2, r4, #3           ; [5  |  x]
+    pkhtb       r2, r2, r5, asr #19  ; [5  |  4]
+    lsl         r3, r5, #16          ; [~7 |  x]
+    lsl         r4, r4, #16          ; [~6 |  x]
+    asr         r3, r3, #3           ; [7  |  x]
+    pkhtb       r3, r3, r4, asr #19  ; [7  |  6]
+
+    str         r12, [r1], #4
+    str         lr, [r1], #4
+    str         r2, [r1], #4
+    str         r3, [r1], #4
+
+    qsubaddx    r2, r6, r7           ; [c1|a1] [9-10  |  8+11]
+    qaddsubx    r3, r6, r7           ; [b1|d1] [9+10  |  8-11]
+    qsubaddx    r4, r8, r9           ; [c1|a1] [13-14 | 12+15]
+    qaddsubx    r5, r8, r9           ; [b1|d1] [13+14 | 12-15]
+
+    qaddsubx    r6, r2, r3           ; [b2|c2] [c1+d1 | a1-b1]
+    qaddsubx    r7, r3, r2           ; [a2|d2] [b1+a1 | d1-c1]
+    qaddsubx    r8, r4, r5           ; [b2|c2] [c1+d1 | a1-b1]
+    qaddsubx    r9, r5, r4           ; [a2|d2] [b1+a1 | d1-c1]
+
+    qadd16      r6, r6, r10          ; [b2+3|c2+3]
+    qadd16      r7, r7, r10          ; [a2+3|d2+3]
+    qadd16      r8, r8, r10          ; [b2+3|c2+3]
+    qadd16      r9, r9, r10          ; [a2+3|d2+3]
+
+    asr         r2, r6, #3           ; [9  |  x]
+    pkhtb       r2, r2, r7, asr #19  ; [9  |  8]
+    lsl         r3, r7, #16          ; [~11|  x]
+    lsl         r4, r6, #16          ; [~10|  x]
+    asr         r3, r3, #3           ; [11 |  x]
+    pkhtb       r3, r3, r4, asr #19  ; [11 | 10]
+
+    asr         r4, r8, #3           ; [13 |  x]
+    pkhtb       r4, r4, r9, asr #19  ; [13 | 12]
+    lsl         r5, r9, #16          ; [~15|  x]
+    lsl         r6, r8, #16          ; [~14|  x]
+    asr         r5, r5, #3           ; [15 |  x]
+    pkhtb       r5, r5, r6, asr #19  ; [15 | 14]
+
+    str         r2, [r1], #4
+    str         r3, [r1], #4
+    str         r4, [r1], #4
+    str         r5, [r1]
+
+    ldmia       sp!, {r4 - r11, pc}
+    ENDP        ; |vp8_short_inv_walsh4x4_v6|
+
+
+;short vp8_short_inv_walsh4x4_1_v6(short *input, short *output)
+|vp8_short_inv_walsh4x4_1_v6| PROC
+
+    ldrsh       r2, [r0]             ; [0]
+    add         r2, r2, #3           ; [0] + 3
+    asr         r2, r2, #3           ; a1 ([0]+3) >> 3
+    lsl         r2, r2, #16          ; [a1 |  x]
+    orr         r2, r2, r2, lsr #16  ; [a1 | a1]
+
+    str         r2, [r1], #4
+    str         r2, [r1], #4
+    str         r2, [r1], #4
+    str         r2, [r1], #4
+    str         r2, [r1], #4
+    str         r2, [r1], #4
+    str         r2, [r1], #4
+    str         r2, [r1]
+
+    bx          lr
+    ENDP        ; |vp8_short_inv_walsh4x4_1_v6|
+
+; Constant Pool
+c0x00030003 DCD 0x00030003
+    END
--- a/media/libvpx/vp8/common/arm/armv6/loopfilter_v6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/loopfilter_v6.asm
--- a/media/libvpx/vp8/common/arm/armv6/recon_v6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/recon_v6.asm
@ -0,0 +1,281 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_recon_b_armv6|
+    EXPORT  |vp8_recon2b_armv6|
+    EXPORT  |vp8_recon4b_armv6|
+
+    AREA    |.text|, CODE, READONLY  ; name this block of code
+prd     RN  r0
+dif     RN  r1
+dst     RN  r2
+stride      RN  r3
+
+;void recon_b(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int stride)
+; R0 char* pred_ptr
+; R1 short * dif_ptr
+; R2 char * dst_ptr
+; R3 int stride
+
+; Description:
+; Loop through the block adding the Pred and Diff together.  Clamp and then
+; store back into the Dst.
+
+; Restrictions :
+; all buffers are expected to be 4 byte aligned coming in and
+; going out.
+;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+;
+;
+;
+;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+|vp8_recon_b_armv6| PROC
+    stmdb   sp!, {r4 - r9, lr}
+
+    ;0, 1, 2, 3
+    ldr     r4, [prd], #16          ; 3 | 2 | 1 | 0
+    ldr     r6, [dif, #0]           ;     1 |     0
+    ldr     r7, [dif, #4]           ;     3 |     2
+
+    pkhbt   r8, r6, r7, lsl #16     ;     2 |     0
+    pkhtb   r9, r7, r6, asr #16     ;     3 |     1
+
+    uxtab16 r8, r8, r4              ;     2 |     0  +  3 | 2 | 2 | 0
+    uxtab16 r9, r9, r4, ror #8      ;     3 |     1  +  0 | 3 | 2 | 1
+
+    usat16  r8, #8, r8
+    usat16  r9, #8, r9
+    add     dif, dif, #32
+    orr     r8, r8, r9, lsl #8
+
+    str     r8, [dst], stride
+
+    ;0, 1, 2, 3
+    ldr     r4, [prd], #16          ; 3 | 2 | 1 | 0
+;;  ldr     r6, [dif, #8]           ;     1 |     0
+;;  ldr     r7, [dif, #12]          ;     3 |     2
+    ldr     r6, [dif, #0]           ;     1 |     0
+    ldr     r7, [dif, #4]           ;     3 |     2
+
+    pkhbt   r8, r6, r7, lsl #16     ;     2 |     0
+    pkhtb   r9, r7, r6, asr #16     ;     3 |     1
+
+    uxtab16 r8, r8, r4              ;     2 |     0  +  3 | 2 | 2 | 0
+    uxtab16 r9, r9, r4, ror #8      ;     3 |     1  +  0 | 3 | 2 | 1
+
+    usat16  r8, #8, r8
+    usat16  r9, #8, r9
+    add     dif, dif, #32
+    orr     r8, r8, r9, lsl #8
+
+    str     r8, [dst], stride
+
+    ;0, 1, 2, 3
+    ldr     r4, [prd], #16          ; 3 | 2 | 1 | 0
+;;  ldr     r6, [dif, #16]          ;     1 |     0
+;;  ldr     r7, [dif, #20]          ;     3 |     2
+    ldr     r6, [dif, #0]           ;     1 |     0
+    ldr     r7, [dif, #4]           ;     3 |     2
+
+    pkhbt   r8, r6, r7, lsl #16     ;     2 |     0
+    pkhtb   r9, r7, r6, asr #16     ;     3 |     1
+
+    uxtab16 r8, r8, r4              ;     2 |     0  +  3 | 2 | 2 | 0
+    uxtab16 r9, r9, r4, ror #8      ;     3 |     1  +  0 | 3 | 2 | 1
+
+    usat16  r8, #8, r8
+    usat16  r9, #8, r9
+    add     dif, dif, #32
+    orr     r8, r8, r9, lsl #8
+
+    str     r8, [dst], stride
+
+    ;0, 1, 2, 3
+    ldr     r4, [prd], #16          ; 3 | 2 | 1 | 0
+;;  ldr     r6, [dif, #24]          ;     1 |     0
+;;  ldr     r7, [dif, #28]          ;     3 |     2
+    ldr     r6, [dif, #0]           ;     1 |     0
+    ldr     r7, [dif, #4]           ;     3 |     2
+
+    pkhbt   r8, r6, r7, lsl #16     ;     2 |     0
+    pkhtb   r9, r7, r6, asr #16     ;     3 |     1
+
+    uxtab16 r8, r8, r4              ;     2 |     0  +  3 | 2 | 2 | 0
+    uxtab16 r9, r9, r4, ror #8      ;     3 |     1  +  0 | 3 | 2 | 1
+
+    usat16  r8, #8, r8
+    usat16  r9, #8, r9
+    orr     r8, r8, r9, lsl #8
+
+    str     r8, [dst], stride
+
+    ldmia   sp!, {r4 - r9, pc}
+
+    ENDP    ; |recon_b|
+
+;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+;
+;
+;
+;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+; R0 char  *pred_ptr
+; R1 short *dif_ptr
+; R2 char  *dst_ptr
+; R3 int stride
+|vp8_recon4b_armv6| PROC
+    stmdb   sp!, {r4 - r9, lr}
+
+    mov     lr, #4
+
+recon4b_loop
+    ;0, 1, 2, 3
+    ldr     r4, [prd], #4           ; 3 | 2 | 1 | 0
+    ldr     r6, [dif, #0]           ;     1 |     0
+    ldr     r7, [dif, #4]           ;     3 |     2
+
+    pkhbt   r8, r6, r7, lsl #16     ;     2 |     0
+    pkhtb   r9, r7, r6, asr #16     ;     3 |     1
+
+    uxtab16 r8, r8, r4              ;     2 |     0  +  3 | 2 | 2 | 0
+    uxtab16 r9, r9, r4, ror #8      ;     3 |     1  +  0 | 3 | 2 | 1
+
+    usat16  r8, #8, r8
+    usat16  r9, #8, r9
+    orr     r8, r8, r9, lsl #8
+
+    str     r8, [dst]
+
+    ;4, 5, 6, 7
+    ldr     r4, [prd], #4
+;;  ldr     r6, [dif, #32]
+;;  ldr     r7, [dif, #36]
+    ldr     r6, [dif, #8]
+    ldr     r7, [dif, #12]
+
+    pkhbt   r8, r6, r7, lsl #16
+    pkhtb   r9, r7, r6, asr #16
+
+    uxtab16 r8, r8, r4
+    uxtab16 r9, r9, r4, ror #8
+    usat16  r8, #8, r8
+    usat16  r9, #8, r9
+    orr     r8, r8, r9, lsl #8
+
+    str     r8, [dst, #4]
+
+    ;8, 9, 10, 11
+    ldr     r4, [prd], #4
+;;  ldr     r6, [dif, #64]
+;;  ldr     r7, [dif, #68]
+    ldr     r6, [dif, #16]
+    ldr     r7, [dif, #20]
+
+    pkhbt   r8, r6, r7, lsl #16
+    pkhtb   r9, r7, r6, asr #16
+
+    uxtab16 r8, r8, r4
+    uxtab16 r9, r9, r4, ror #8
+    usat16  r8, #8, r8
+    usat16  r9, #8, r9
+    orr     r8, r8, r9, lsl #8
+
+    str     r8, [dst, #8]
+
+    ;12, 13, 14, 15
+    ldr     r4, [prd], #4
+;;  ldr     r6, [dif, #96]
+;;  ldr     r7, [dif, #100]
+    ldr     r6, [dif, #24]
+    ldr     r7, [dif, #28]
+
+    pkhbt   r8, r6, r7, lsl #16
+    pkhtb   r9, r7, r6, asr #16
+
+    uxtab16 r8, r8, r4
+    uxtab16 r9, r9, r4, ror #8
+    usat16  r8, #8, r8
+    usat16  r9, #8, r9
+    orr     r8, r8, r9, lsl #8
+
+    str     r8, [dst, #12]
+
+    add     dst, dst, stride
+;;  add     dif, dif, #8
+    add     dif, dif, #32
+
+    subs    lr, lr, #1
+    bne     recon4b_loop
+
+    ldmia   sp!, {r4 - r9, pc}
+
+    ENDP    ; |Recon4B|
+
+;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+;
+;
+;
+;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+; R0 char  *pred_ptr
+; R1 short *dif_ptr
+; R2 char  *dst_ptr
+; R3 int stride
+|vp8_recon2b_armv6| PROC
+    stmdb   sp!, {r4 - r9, lr}
+
+    mov     lr, #4
+
+recon2b_loop
+    ;0, 1, 2, 3
+    ldr     r4, [prd], #4
+    ldr     r6, [dif, #0]
+    ldr     r7, [dif, #4]
+
+    pkhbt   r8, r6, r7, lsl #16
+    pkhtb   r9, r7, r6, asr #16
+
+    uxtab16 r8, r8, r4
+    uxtab16 r9, r9, r4, ror #8
+    usat16  r8, #8, r8
+    usat16  r9, #8, r9
+    orr     r8, r8, r9, lsl #8
+
+    str     r8, [dst]
+
+    ;4, 5, 6, 7
+    ldr     r4, [prd], #4
+;;  ldr     r6, [dif, #32]
+;;  ldr     r7, [dif, #36]
+    ldr     r6, [dif, #8]
+    ldr     r7, [dif, #12]
+
+    pkhbt   r8, r6, r7, lsl #16
+    pkhtb   r9, r7, r6, asr #16
+
+    uxtab16 r8, r8, r4
+    uxtab16 r9, r9, r4, ror #8
+    usat16  r8, #8, r8
+    usat16  r9, #8, r9
+    orr     r8, r8, r9, lsl #8
+
+    str     r8, [dst, #4]
+
+    add     dst, dst, stride
+;;  add     dif, dif, #8
+    add     dif, dif, #16
+
+    subs    lr, lr, #1
+    bne     recon2b_loop
+
+    ldmia   sp!, {r4 - r9, pc}
+
+    ENDP    ; |Recon2B|
+
+    END
--- a/media/libvpx/vp8/common/arm/armv6/simpleloopfilter_v6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/simpleloopfilter_v6.asm
@ -0,0 +1,287 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT |vp8_loop_filter_simple_horizontal_edge_armv6|
+    EXPORT |vp8_loop_filter_simple_vertical_edge_armv6|
+
+    AREA    |.text|, CODE, READONLY  ; name this block of code
+
+    MACRO
+    TRANSPOSE_MATRIX $a0, $a1, $a2, $a3, $b0, $b1, $b2, $b3
+    ; input: $a0, $a1, $a2, $a3; output: $b0, $b1, $b2, $b3
+    ; a0: 03 02 01 00
+    ; a1: 13 12 11 10
+    ; a2: 23 22 21 20
+    ; a3: 33 32 31 30
+    ;     b3 b2 b1 b0
+
+    uxtb16      $b1, $a1                    ; xx 12 xx 10
+    uxtb16      $b0, $a0                    ; xx 02 xx 00
+    uxtb16      $b3, $a3                    ; xx 32 xx 30
+    uxtb16      $b2, $a2                    ; xx 22 xx 20
+    orr         $b1, $b0, $b1, lsl #8       ; 12 02 10 00
+    orr         $b3, $b2, $b3, lsl #8       ; 32 22 30 20
+
+    uxtb16      $a1, $a1, ror #8            ; xx 13 xx 11
+    uxtb16      $a3, $a3, ror #8            ; xx 33 xx 31
+    uxtb16      $a0, $a0, ror #8            ; xx 03 xx 01
+    uxtb16      $a2, $a2, ror #8            ; xx 23 xx 21
+    orr         $a0, $a0, $a1, lsl #8       ; 13 03 11 01
+    orr         $a2, $a2, $a3, lsl #8       ; 33 23 31 21
+
+    pkhtb       $b2, $b3, $b1, asr #16      ; 32 22 12 02   -- p1
+    pkhbt       $b0, $b1, $b3, lsl #16      ; 30 20 10 00   -- p3
+
+    pkhtb       $b3, $a2, $a0, asr #16      ; 33 23 13 03   -- p0
+    pkhbt       $b1, $a0, $a2, lsl #16      ; 31 21 11 01   -- p2
+    MEND
+
+
+src         RN  r0
+pstep       RN  r1
+
+;r0     unsigned char *src_ptr,
+;r1     int src_pixel_step,
+;r2     const char *flimit,
+;r3     const char *limit,
+;stack  const char *thresh,
+;stack  int  count
+
+; All 16 elements in flimit are equal. So, in the code, only one load is needed
+; for flimit. Same applies to limit. thresh is not used in simple looopfilter
+
+;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+|vp8_loop_filter_simple_horizontal_edge_armv6| PROC
+;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+    stmdb       sp!, {r4 - r11, lr}
+
+    ldr         r12, [r3]                   ; limit
+    ldr         r3, [src, -pstep, lsl #1]   ; p1
+    ldr         r4, [src, -pstep]           ; p0
+    ldr         r5, [src]                   ; q0
+    ldr         r6, [src, pstep]            ; q1
+    ldr         r7, [r2]                    ; flimit
+    ldr         r2, c0x80808080
+    ldr         r9, [sp, #40]               ; count for 8-in-parallel
+    uadd8       r7, r7, r7                  ; flimit * 2
+    mov         r9, r9, lsl #1              ; double the count. we're doing 4 at a time
+    uadd8       r12, r7, r12                ; flimit * 2 + limit
+    mov         lr, #0                      ; need 0 in a couple places
+
+|simple_hnext8|
+    ; vp8_simple_filter_mask()
+
+    uqsub8      r7, r3, r6                  ; p1 - q1
+    uqsub8      r8, r6, r3                  ; q1 - p1
+    uqsub8      r10, r4, r5                 ; p0 - q0
+    uqsub8      r11, r5, r4                 ; q0 - p0
+    orr         r8, r8, r7                  ; abs(p1 - q1)
+    orr         r10, r10, r11               ; abs(p0 - q0)
+    uqadd8      r10, r10, r10               ; abs(p0 - q0) * 2
+    uhadd8      r8, r8, lr                  ; abs(p1 - q2) >> 1
+    uqadd8      r10, r10, r8                ; abs(p0 - q0)*2 + abs(p1 - q1)/2
+    mvn         r8, #0
+    usub8       r10, r12, r10               ; compare to flimit. usub8 sets GE flags
+    sel         r10, r8, lr                 ; filter mask: F or 0
+    cmp         r10, #0
+    beq         simple_hskip_filter         ; skip filtering if all masks are 0x00
+
+    ;vp8_simple_filter()
+
+    eor         r3, r3, r2                  ; p1 offset to convert to a signed value
+    eor         r6, r6, r2                  ; q1 offset to convert to a signed value
+    eor         r4, r4, r2                  ; p0 offset to convert to a signed value
+    eor         r5, r5, r2                  ; q0 offset to convert to a signed value
+
+    qsub8       r3, r3, r6                  ; vp8_filter = p1 - q1
+    qsub8       r6, r5, r4                  ; q0 - p0
+    qadd8       r3, r3, r6                  ; += q0 - p0
+    ldr         r7, c0x04040404
+    qadd8       r3, r3, r6                  ; += q0 - p0
+    ldr         r8, c0x03030303
+    qadd8       r3, r3, r6                  ; vp8_filter = p1-q1 + 3*(q0-p0))
+    ;STALL
+    and         r3, r3, r10                 ; vp8_filter &= mask
+
+    qadd8       r7 , r3 , r7                ; Filter1 = vp8_filter + 4
+    qadd8       r8 , r3 , r8                ; Filter2 = vp8_filter + 3
+
+    shadd8      r7 , r7 , lr
+    shadd8      r8 , r8 , lr
+    shadd8      r7 , r7 , lr
+    shadd8      r8 , r8 , lr
+    shadd8      r7 , r7 , lr                ; Filter1 >>= 3
+    shadd8      r8 , r8 , lr                ; Filter2 >>= 3
+
+    qsub8       r5 ,r5, r7                  ; u = q0 - Filter1
+    qadd8       r4, r4, r8                  ; u = p0 + Filter2
+    eor         r5, r5, r2                  ; *oq0 = u^0x80
+    str         r5, [src]                   ; store oq0 result
+    eor         r4, r4, r2                  ; *op0 = u^0x80
+    str         r4, [src, -pstep]           ; store op0 result
+
+|simple_hskip_filter|
+    subs        r9, r9, #1
+    addne       src, src, #4                ; next row
+
+    ldrne       r3, [src, -pstep, lsl #1]   ; p1
+    ldrne       r4, [src, -pstep]           ; p0
+    ldrne       r5, [src]                   ; q0
+    ldrne       r6, [src, pstep]            ; q1
+
+    bne         simple_hnext8
+
+    ldmia       sp!, {r4 - r11, pc}
+    ENDP        ; |vp8_loop_filter_simple_horizontal_edge_armv6|
+
+
+;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+|vp8_loop_filter_simple_vertical_edge_armv6| PROC
+;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+    stmdb       sp!, {r4 - r11, lr}
+
+    ldr         r12, [r2]                   ; r12: flimit
+    ldr         r2, c0x80808080
+    ldr         r7, [r3]                    ; limit
+
+    ; load soure data to r7, r8, r9, r10
+    ldrh        r3, [src, #-2]
+    ldrh        r4, [src], pstep
+    uadd8       r12, r12, r12               ; flimit * 2
+
+    ldrh        r5, [src, #-2]
+    ldrh        r6, [src], pstep
+    uadd8       r12, r12, r7                ; flimit * 2 + limit
+
+    pkhbt       r7, r3, r4, lsl #16
+
+    ldrh        r3, [src, #-2]
+    ldrh        r4, [src], pstep
+    ldr         r11, [sp, #40]              ; count (r11) for 8-in-parallel
+
+    pkhbt       r8, r5, r6, lsl #16
+
+    ldrh        r5, [src, #-2]
+    ldrh        r6, [src], pstep
+    mov         r11, r11, lsl #1            ; 4-in-parallel
+
+|simple_vnext8|
+    ; vp8_simple_filter_mask() function
+    pkhbt       r9, r3, r4, lsl #16
+    pkhbt       r10, r5, r6, lsl #16
+
+    ;transpose r7, r8, r9, r10 to r3, r4, r5, r6
+    TRANSPOSE_MATRIX r7, r8, r9, r10, r3, r4, r5, r6
+
+    uqsub8      r7, r3, r6                  ; p1 - q1
+    uqsub8      r8, r6, r3                  ; q1 - p1
+    uqsub8      r9, r4, r5                  ; p0 - q0
+    uqsub8      r10, r5, r4                 ; q0 - p0
+    orr         r7, r7, r8                  ; abs(p1 - q1)
+    orr         r9, r9, r10                 ; abs(p0 - q0)
+    mov         r8, #0
+    uqadd8      r9, r9, r9                  ; abs(p0 - q0) * 2
+    uhadd8      r7, r7, r8                  ; abs(p1 - q1) / 2
+    uqadd8      r7, r7, r9                  ; abs(p0 - q0)*2 + abs(p1 - q1)/2
+    mvn         r10, #0                     ; r10 == -1
+
+    usub8       r7, r12, r7                 ; compare to flimit
+    sel         lr, r10, r8                 ; filter mask
+
+    cmp         lr, #0
+    beq         simple_vskip_filter         ; skip filtering
+
+    ;vp8_simple_filter() function
+    eor         r3, r3, r2                  ; p1 offset to convert to a signed value
+    eor         r6, r6, r2                  ; q1 offset to convert to a signed value
+    eor         r4, r4, r2                  ; p0 offset to convert to a signed value
+    eor         r5, r5, r2                  ; q0 offset to convert to a signed value
+
+    qsub8       r3, r3, r6                  ; vp8_filter = p1 - q1
+    qsub8       r6, r5, r4                  ; q0 - p0
+
+    qadd8       r3, r3, r6                  ; vp8_filter += q0 - p0
+    ldr         r9, c0x03030303             ; r9 = 3
+
+    qadd8       r3, r3, r6                  ; vp8_filter += q0 - p0
+    ldr         r7, c0x04040404
+
+    qadd8       r3, r3, r6                  ; vp8_filter = p1-q1 + 3*(q0-p0))
+    ;STALL
+    and         r3, r3, lr                  ; vp8_filter &= mask
+
+    qadd8       r9 , r3 , r9                ; Filter2 = vp8_filter + 3
+    qadd8       r3 , r3 , r7                ; Filter1 = vp8_filter + 4
+
+    shadd8      r9 , r9 , r8
+    shadd8      r3 , r3 , r8
+    shadd8      r9 , r9 , r8
+    shadd8      r3 , r3 , r8
+    shadd8      r9 , r9 , r8                ; Filter2 >>= 3
+    shadd8      r3 , r3 , r8                ; Filter1 >>= 3
+
+    ;calculate output
+    sub         src, src, pstep, lsl #2
+
+    qadd8       r4, r4, r9                  ; u = p0 + Filter2
+    qsub8       r5, r5, r3                  ; u = q0 - Filter1
+    eor         r4, r4, r2                  ; *op0 = u^0x80
+    eor         r5, r5, r2                  ; *oq0 = u^0x80
+
+    strb        r4, [src, #-1]              ; store the result
+    mov         r4, r4, lsr #8
+    strb        r5, [src], pstep
+    mov         r5, r5, lsr #8
+
+    strb        r4, [src, #-1]
+    mov         r4, r4, lsr #8
+    strb        r5, [src], pstep
+    mov         r5, r5, lsr #8
+
+    strb        r4, [src, #-1]
+    mov         r4, r4, lsr #8
+    strb        r5, [src], pstep
+    mov         r5, r5, lsr #8
+
+    strb        r4, [src, #-1]
+    strb        r5, [src], pstep
+
+|simple_vskip_filter|
+    subs        r11, r11, #1
+
+    ; load soure data to r7, r8, r9, r10
+    ldrneh      r3, [src, #-2]
+    ldrneh      r4, [src], pstep
+
+    ldrneh      r5, [src, #-2]
+    ldrneh      r6, [src], pstep
+
+    pkhbt       r7, r3, r4, lsl #16
+
+    ldrneh      r3, [src, #-2]
+    ldrneh      r4, [src], pstep
+
+    pkhbt       r8, r5, r6, lsl #16
+
+    ldrneh      r5, [src, #-2]
+    ldrneh      r6, [src], pstep
+
+    bne         simple_vnext8
+
+    ldmia       sp!, {r4 - r11, pc}
+    ENDP        ; |vp8_loop_filter_simple_vertical_edge_armv6|
+
+; Constant Pool
+c0x80808080 DCD     0x80808080
+c0x03030303 DCD     0x03030303
+c0x04040404 DCD     0x04040404
+
+    END
--- a/media/libvpx/vp8/common/arm/armv6/sixtappredict8x4_v6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/sixtappredict8x4_v6.asm
@ -0,0 +1,271 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_sixtap_predict8x4_armv6|
+
+    AREA    |.text|, CODE, READONLY  ; name this block of code
+;-------------------------------------
+; r0    unsigned char *src_ptr,
+; r1    int  src_pixels_per_line,
+; r2    int  xoffset,
+; r3    int  yoffset,
+; stack unsigned char *dst_ptr,
+; stack int  dst_pitch
+;-------------------------------------
+;note: In first pass, store the result in transpose(8linesx9columns) on stack. Temporary stack size is 184.
+;Line width is 20 that is 9 short data plus 2 to make it 4bytes aligned. In second pass, load data from stack,
+;and the result is stored in transpose.
+|vp8_sixtap_predict8x4_armv6| PROC
+    stmdb       sp!, {r4 - r11, lr}
+    str         r3, [sp, #-184]!            ;reserve space on stack for temporary storage, store yoffset
+
+    cmp         r2, #0                      ;skip first_pass filter if xoffset=0
+    add         lr, sp, #4                  ;point to temporary buffer
+    beq         skip_firstpass_filter
+
+;first-pass filter
+    ldr         r12, _filter8_coeff_
+    sub         r0, r0, r1, lsl #1
+
+    add         r2, r12, r2, lsl #4         ;calculate filter location
+    add         r0, r0, #3                  ;adjust src only for loading convinience
+
+    ldr         r3, [r2]                    ; load up packed filter coefficients
+    ldr         r4, [r2, #4]
+    ldr         r5, [r2, #8]
+
+    mov         r2, #0x90000                ; height=9 is top part of counter
+
+    sub         r1, r1, #8
+
+|first_pass_hloop_v6|
+    ldrb        r6, [r0, #-5]               ; load source data
+    ldrb        r7, [r0, #-4]
+    ldrb        r8, [r0, #-3]
+    ldrb        r9, [r0, #-2]
+    ldrb        r10, [r0, #-1]
+
+    orr         r2, r2, #0x4                ; construct loop counter. width=8=4x2
+
+    pkhbt       r6, r6, r7, lsl #16         ; r7 | r6
+    pkhbt       r7, r7, r8, lsl #16         ; r8 | r7
+
+    pkhbt       r8, r8, r9, lsl #16         ; r9 | r8
+    pkhbt       r9, r9, r10, lsl #16        ; r10 | r9
+
+|first_pass_wloop_v6|
+    smuad       r11, r6, r3                 ; vp8_filter[0], vp8_filter[1]
+    smuad       r12, r7, r3
+
+    ldrb        r6, [r0], #1
+
+    smlad       r11, r8, r4, r11            ; vp8_filter[2], vp8_filter[3]
+    ldrb        r7, [r0], #1
+    smlad       r12, r9, r4, r12
+
+    pkhbt       r10, r10, r6, lsl #16       ; r10 | r9
+    pkhbt       r6, r6, r7, lsl #16         ; r11 | r10
+    smlad       r11, r10, r5, r11           ; vp8_filter[4], vp8_filter[5]
+    smlad       r12, r6, r5, r12
+
+    sub         r2, r2, #1
+
+    add         r11, r11, #0x40             ; round_shift_and_clamp
+    tst         r2, #0xff                   ; test loop counter
+    usat        r11, #8, r11, asr #7
+    add         r12, r12, #0x40
+    strh        r11, [lr], #20              ; result is transposed and stored, which
+    usat        r12, #8, r12, asr #7
+
+    strh        r12, [lr], #20
+
+    movne       r11, r6
+    movne       r12, r7
+
+    movne       r6, r8
+    movne       r7, r9
+    movne       r8, r10
+    movne       r9, r11
+    movne       r10, r12
+
+    bne         first_pass_wloop_v6
+
+    ;;add       r9, ppl, #30                ; attempt to load 2 adjacent cache lines
+    ;;IF ARCHITECTURE=6
+    ;pld        [src, ppl]
+    ;;pld       [src, r9]
+    ;;ENDIF
+
+    subs        r2, r2, #0x10000
+
+    sub         lr, lr, #158
+
+    add         r0, r0, r1                  ; move to next input line
+
+    bne         first_pass_hloop_v6
+
+;second pass filter
+secondpass_filter
+    ldr         r3, [sp], #4                ; load back yoffset
+    ldr         r0, [sp, #216]              ; load dst address from stack 180+36
+    ldr         r1, [sp, #220]              ; load dst stride from stack 180+40
+
+    cmp         r3, #0
+    beq         skip_secondpass_filter
+
+    ldr         r12, _filter8_coeff_
+    add         lr, r12, r3, lsl #4         ;calculate filter location
+
+    mov         r2, #0x00080000
+
+    ldr         r3, [lr]                    ; load up packed filter coefficients
+    ldr         r4, [lr, #4]
+    ldr         r5, [lr, #8]
+
+    pkhbt       r12, r4, r3                 ; pack the filter differently
+    pkhbt       r11, r5, r4
+
+second_pass_hloop_v6
+    ldr         r6, [sp]                    ; load the data
+    ldr         r7, [sp, #4]
+
+    orr         r2, r2, #2                  ; loop counter
+
+second_pass_wloop_v6
+    smuad       lr, r3, r6                  ; apply filter
+    smulbt      r10, r3, r6
+
+    ldr         r8, [sp, #8]
+
+    smlad       lr, r4, r7, lr
+    smladx      r10, r12, r7, r10
+
+    ldrh        r9, [sp, #12]
+
+    smlad       lr, r5, r8, lr
+    smladx      r10, r11, r8, r10
+
+    add         sp, sp, #4
+    smlatb      r10, r5, r9, r10
+
+    sub         r2, r2, #1
+
+    add         lr, lr, #0x40               ; round_shift_and_clamp
+    tst         r2, #0xff
+    usat        lr, #8, lr, asr #7
+    add         r10, r10, #0x40
+    strb        lr, [r0], r1                ; the result is transposed back and stored
+    usat        r10, #8, r10, asr #7
+
+    strb        r10, [r0],r1
+
+    movne       r6, r7
+    movne       r7, r8
+
+    bne         second_pass_wloop_v6
+
+    subs        r2, r2, #0x10000
+    add         sp, sp, #12                 ; updata src for next loop (20-8)
+    sub         r0, r0, r1, lsl #2
+    add         r0, r0, #1
+
+    bne         second_pass_hloop_v6
+
+    add         sp, sp, #20
+    ldmia       sp!, {r4 - r11, pc}
+
+;--------------------
+skip_firstpass_filter
+    sub         r0, r0, r1, lsl #1
+    sub         r1, r1, #8
+    mov         r2, #9
+
+skip_firstpass_hloop
+    ldrb        r4, [r0], #1                ; load data
+    subs        r2, r2, #1
+    ldrb        r5, [r0], #1
+    strh        r4, [lr], #20               ; store it to immediate buffer
+    ldrb        r6, [r0], #1                ; load data
+    strh        r5, [lr], #20
+    ldrb        r7, [r0], #1
+    strh        r6, [lr], #20
+    ldrb        r8, [r0], #1
+    strh        r7, [lr], #20
+    ldrb        r9, [r0], #1
+    strh        r8, [lr], #20
+    ldrb        r10, [r0], #1
+    strh        r9, [lr], #20
+    ldrb        r11, [r0], #1
+    strh        r10, [lr], #20
+    add         r0, r0, r1                  ; move to next input line
+    strh        r11, [lr], #20
+
+    sub         lr, lr, #158                ; move over to next column
+    bne         skip_firstpass_hloop
+
+    b           secondpass_filter
+
+;--------------------
+skip_secondpass_filter
+    mov         r2, #8
+    add         sp, sp, #4                  ;start from src[0] instead of src[-2]
+
+skip_secondpass_hloop
+    ldr         r6, [sp], #4
+    subs        r2, r2, #1
+    ldr         r8, [sp], #4
+
+    mov         r7, r6, lsr #16             ; unpack
+    strb        r6, [r0], r1
+    mov         r9, r8, lsr #16
+    strb        r7, [r0], r1
+    add         sp, sp, #12                 ; 20-8
+    strb        r8, [r0], r1
+    strb        r9, [r0], r1
+
+    sub         r0, r0, r1, lsl #2
+    add         r0, r0, #1
+
+    bne         skip_secondpass_hloop
+
+    add         sp, sp, #16                 ; 180 - (160 +4)
+
+    ldmia       sp!, {r4 - r11, pc}
+
+    ENDP
+
+;-----------------
+    AREA    subpelfilters8_dat, DATA, READWRITE         ;read/write by default
+;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
+;One word each is reserved. Label filter_coeff can be used to access the data.
+;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
+_filter8_coeff_
+    DCD     filter8_coeff
+filter8_coeff
+    DCD     0x00000000,     0x00000080,     0x00000000,     0x00000000
+    DCD     0xfffa0000,     0x000c007b,     0x0000ffff,     0x00000000
+    DCD     0xfff50002,     0x0024006c,     0x0001fff8,     0x00000000
+    DCD     0xfff70000,     0x0032005d,     0x0000fffa,     0x00000000
+    DCD     0xfff00003,     0x004d004d,     0x0003fff0,     0x00000000
+    DCD     0xfffa0000,     0x005d0032,     0x0000fff7,     0x00000000
+    DCD     0xfff80001,     0x006c0024,     0x0002fff5,     0x00000000
+    DCD     0xffff0000,     0x007b000c,     0x0000fffa,     0x00000000
+
+    ;DCD        0,  0,  128,    0,   0,  0
+    ;DCD        0, -6,  123,   12,  -1,  0
+    ;DCD        2, -11, 108,   36,  -8,  1
+    ;DCD        0, -9,   93,   50,  -6,  0
+    ;DCD        3, -16,  77,   77, -16,  3
+    ;DCD        0, -6,   50,   93,  -9,  0
+    ;DCD        1, -8,   36,  108, -11,  2
+    ;DCD        0, -1,   12,  123,  -6,  0
+
+    END
--- a/media/libvpx/vp8/common/arm/bilinearfilter_arm.c
+++ b/media/libvpx/vp8/common/arm/bilinearfilter_arm.c
@ -0,0 +1,212 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <math.h>
+#include "subpixel.h"
+
+#define BLOCK_HEIGHT_WIDTH 4
+#define VP8_FILTER_WEIGHT 128
+#define VP8_FILTER_SHIFT  7
+
+static const short bilinear_filters[8][2] =
+{
+    { 128,   0 },
+    { 112,  16 },
+    {  96,  32 },
+    {  80,  48 },
+    {  64,  64 },
+    {  48,  80 },
+    {  32,  96 },
+    {  16, 112 }
+};
+
+
+extern void vp8_filter_block2d_bil_first_pass_armv6
+(
+    unsigned char *src_ptr,
+    unsigned short *output_ptr,
+    unsigned int src_pixels_per_line,
+    unsigned int output_height,
+    unsigned int output_width,
+    const short *vp8_filter
+);
+
+extern void vp8_filter_block2d_bil_second_pass_armv6
+(
+    unsigned short *src_ptr,
+    unsigned char  *output_ptr,
+    int output_pitch,
+    unsigned int  output_height,
+    unsigned int  output_width,
+    const short *vp8_filter
+);
+
+#if 0
+void vp8_filter_block2d_bil_first_pass_6
+(
+    unsigned char *src_ptr,
+    unsigned short *output_ptr,
+    unsigned int src_pixels_per_line,
+    unsigned int output_height,
+    unsigned int output_width,
+    const short *vp8_filter
+)
+{
+    unsigned int i, j;
+
+    for ( i=0; i<output_height; i++ )
+    {
+        for ( j=0; j<output_width; j++ )
+        {
+            /* Apply bilinear filter */
+            output_ptr[j] = ( ( (int)src_ptr[0]          * vp8_filter[0]) +
+                               ((int)src_ptr[1] * vp8_filter[1]) +
+                                (VP8_FILTER_WEIGHT/2) ) >> VP8_FILTER_SHIFT;
+            src_ptr++;
+        }
+
+        /* Next row... */
+        src_ptr    += src_pixels_per_line - output_width;
+        output_ptr += output_width;
+    }
+}
+
+void vp8_filter_block2d_bil_second_pass_6
+(
+    unsigned short *src_ptr,
+    unsigned char  *output_ptr,
+    int output_pitch,
+    unsigned int  output_height,
+    unsigned int  output_width,
+    const short *vp8_filter
+)
+{
+    unsigned int  i,j;
+    int  Temp;
+
+    for ( i=0; i<output_height; i++ )
+    {
+        for ( j=0; j<output_width; j++ )
+        {
+            /* Apply filter */
+            Temp =  ((int)src_ptr[0]         * vp8_filter[0]) +
+                    ((int)src_ptr[output_width] * vp8_filter[1]) +
+                    (VP8_FILTER_WEIGHT/2);
+            output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
+            src_ptr++;
+        }
+
+        /* Next row... */
+        /*src_ptr    += src_pixels_per_line - output_width;*/
+        output_ptr += output_pitch;
+    }
+}
+#endif
+
+void vp8_filter_block2d_bil_armv6
+(
+    unsigned char *src_ptr,
+    unsigned char *output_ptr,
+    unsigned int   src_pixels_per_line,
+    unsigned int   dst_pitch,
+    const short      *HFilter,
+    const short      *VFilter,
+    int            Width,
+    int            Height
+)
+{
+
+    unsigned short FData[36*16]; /* Temp data bufffer used in filtering */
+
+    /* First filter 1-D horizontally... */
+    /* pixel_step = 1; */
+    vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pixels_per_line, Height + 1, Width, HFilter);
+
+    /* then 1-D vertically... */
+    vp8_filter_block2d_bil_second_pass_armv6(FData, output_ptr, dst_pitch, Height, Width, VFilter);
+}
+
+
+void vp8_bilinear_predict4x4_armv6
+(
+    unsigned char  *src_ptr,
+    int   src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int dst_pitch
+)
+{
+    const short  *HFilter;
+    const short  *VFilter;
+
+    HFilter = bilinear_filters[xoffset];
+    VFilter = bilinear_filters[yoffset];
+
+    vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
+}
+
+void vp8_bilinear_predict8x8_armv6
+(
+    unsigned char  *src_ptr,
+    int  src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int  dst_pitch
+)
+{
+    const short  *HFilter;
+    const short  *VFilter;
+
+    HFilter = bilinear_filters[xoffset];
+    VFilter = bilinear_filters[yoffset];
+
+    vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
+}
+
+void vp8_bilinear_predict8x4_armv6
+(
+    unsigned char  *src_ptr,
+    int  src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int  dst_pitch
+)
+{
+    const short  *HFilter;
+    const short  *VFilter;
+
+    HFilter = bilinear_filters[xoffset];
+    VFilter = bilinear_filters[yoffset];
+
+    vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
+}
+
+void vp8_bilinear_predict16x16_armv6
+(
+    unsigned char  *src_ptr,
+    int  src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int  dst_pitch
+)
+{
+    const short  *HFilter;
+    const short  *VFilter;
+
+    HFilter = bilinear_filters[xoffset];
+    VFilter = bilinear_filters[yoffset];
+
+    vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
+}
--- a/media/libvpx/vp8/common/arm/filter_arm.c
+++ b/media/libvpx/vp8/common/arm/filter_arm.c
@ -0,0 +1,256 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include <math.h>
+#include "subpixel.h"
+#include "vpx_ports/mem.h"
+
+#define BLOCK_HEIGHT_WIDTH 4
+#define VP8_FILTER_WEIGHT 128
+#define VP8_FILTER_SHIFT  7
+
+DECLARE_ALIGNED(16, static const short, sub_pel_filters[8][6]) =
+{
+    { 0,  0,  128,    0,   0,  0 },         /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
+    { 0, -6,  123,   12,  -1,  0 },
+    { 2, -11, 108,   36,  -8,  1 },         /* New 1/4 pel 6 tap filter */
+    { 0, -9,   93,   50,  -6,  0 },
+    { 3, -16,  77,   77, -16,  3 },         /* New 1/2 pel 6 tap filter */
+    { 0, -6,   50,   93,  -9,  0 },
+    { 1, -8,   36,  108, -11,  2 },         /* New 1/4 pel 6 tap filter */
+    { 0, -1,   12,  123,  -6,  0 },
+};
+
+
+extern void vp8_filter_block2d_first_pass_armv6
+(
+    unsigned char *src_ptr,
+    short         *output_ptr,
+    unsigned int src_pixels_per_line,
+    unsigned int output_width,
+    unsigned int output_height,
+    const short *vp8_filter
+);
+
+extern void vp8_filter_block2d_second_pass_armv6
+(
+    short         *src_ptr,
+    unsigned char *output_ptr,
+    unsigned int output_pitch,
+    unsigned int cnt,
+    const short *vp8_filter
+);
+
+extern void vp8_filter4_block2d_second_pass_armv6
+(
+    short         *src_ptr,
+    unsigned char *output_ptr,
+    unsigned int output_pitch,
+    unsigned int cnt,
+    const short *vp8_filter
+);
+
+extern void vp8_filter_block2d_first_pass_only_armv6
+(
+    unsigned char *src_ptr,
+    unsigned char *output_ptr,
+    unsigned int src_pixels_per_line,
+    unsigned int cnt,
+    unsigned int output_pitch,
+    const short *vp8_filter
+);
+
+
+extern void vp8_filter_block2d_second_pass_only_armv6
+(
+    unsigned char *src_ptr,
+    unsigned char *output_ptr,
+    unsigned int src_pixels_per_line,
+    unsigned int cnt,
+    unsigned int output_pitch,
+    const short *vp8_filter
+);
+
+#if HAVE_ARMV6
+void vp8_sixtap_predict_armv6
+(
+    unsigned char  *src_ptr,
+    int  src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int  dst_pitch
+)
+{
+    const short  *HFilter;
+    const short  *VFilter;
+    DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data bufffer used in filtering */
+
+
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];       /* 6 tap */
+
+    /* Vfilter is null. First pass only */
+    if (xoffset && !yoffset)
+    {
+        /*vp8_filter_block2d_first_pass_armv6 ( src_ptr, FData+2, src_pixels_per_line, 4, 4, HFilter );
+        vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, VFilter );*/
+
+        vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, HFilter);
+    }
+    /* Hfilter is null. Second pass only */
+    else if (!xoffset && yoffset)
+    {
+        vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, VFilter);
+    }
+    else
+    {
+        /* Vfilter is a 4 tap filter */
+        if (yoffset & 0x1)
+        {
+            vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 4, 7, HFilter);
+            vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
+        }
+        /* Vfilter is 6 tap filter */
+        else
+        {
+            vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 4, 9, HFilter);
+            vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
+        }
+    }
+}
+
+#if 0
+void vp8_sixtap_predict8x4_armv6
+(
+    unsigned char  *src_ptr,
+    int  src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int  dst_pitch
+)
+{
+    const short  *HFilter;
+    const short  *VFilter;
+    DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */
+
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];       /* 6 tap */
+
+
+    /*if (xoffset && !yoffset)
+    {
+        vp8_filter_block2d_first_pass_only_armv6 (  src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter );
+    }*/
+    /* Hfilter is null. Second pass only */
+    /*else if (!xoffset && yoffset)
+    {
+        vp8_filter_block2d_second_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter );
+    }
+    else
+    {
+        if (yoffset & 0x1)
+            vp8_filter_block2d_first_pass_armv6 ( src_ptr-src_pixels_per_line, FData+1, src_pixels_per_line, 8, 7, HFilter );
+        else*/
+
+        vp8_filter_block2d_first_pass_armv6 ( src_ptr-(2*src_pixels_per_line), FData, src_pixels_per_line, 8, 9, HFilter );
+
+        vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, 8, VFilter );
+    /*}*/
+}
+#endif
+
+void vp8_sixtap_predict8x8_armv6
+(
+    unsigned char  *src_ptr,
+    int  src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int  dst_pitch
+)
+{
+    const short  *HFilter;
+    const short  *VFilter;
+    DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */
+
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];       /* 6 tap */
+
+    if (xoffset && !yoffset)
+    {
+        vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter);
+    }
+    /* Hfilter is null. Second pass only */
+    else if (!xoffset && yoffset)
+    {
+        vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter);
+    }
+    else
+    {
+        if (yoffset & 0x1)
+        {
+            vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 8, 11, HFilter);
+            vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
+        }
+        else
+        {
+            vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 8, 13, HFilter);
+            vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
+        }
+    }
+}
+
+
+void vp8_sixtap_predict16x16_armv6
+(
+    unsigned char  *src_ptr,
+    int  src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int  dst_pitch
+)
+{
+    const short  *HFilter;
+    const short  *VFilter;
+    DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16);    /* Temp data bufffer used in filtering */
+
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];       /* 6 tap */
+
+    if (xoffset && !yoffset)
+    {
+        vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, HFilter);
+    }
+    /* Hfilter is null. Second pass only */
+    else if (!xoffset && yoffset)
+    {
+        vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, VFilter);
+    }
+    else
+    {
+        if (yoffset & 0x1)
+        {
+            vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 16, 19, HFilter);
+            vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
+        }
+        else
+        {
+            vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 16, 21, HFilter);
+            vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
+        }
+    }
+
+}
+#endif
--- a/media/libvpx/vp8/common/arm/idct_arm.h
+++ b/media/libvpx/vp8/common/arm/idct_arm.h
@ -0,0 +1,65 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef IDCT_ARM_H
+#define IDCT_ARM_H
+
+#if HAVE_ARMV6
+extern prototype_idct(vp8_short_idct4x4llm_1_v6);
+extern prototype_idct(vp8_short_idct4x4llm_v6_dual);
+extern prototype_idct_scalar_add(vp8_dc_only_idct_add_v6);
+extern prototype_second_order(vp8_short_inv_walsh4x4_1_v6);
+extern prototype_second_order(vp8_short_inv_walsh4x4_v6);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef  vp8_idct_idct1
+#define vp8_idct_idct1 vp8_short_idct4x4llm_1_v6
+
+#undef  vp8_idct_idct16
+#define vp8_idct_idct16 vp8_short_idct4x4llm_v6_dual
+
+#undef  vp8_idct_idct1_scalar_add
+#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_v6
+
+#undef  vp8_idct_iwalsh1
+#define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_v6
+
+#undef  vp8_idct_iwalsh16
+#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_v6
+#endif
+#endif
+
+#if HAVE_ARMV7
+extern prototype_idct(vp8_short_idct4x4llm_1_neon);
+extern prototype_idct(vp8_short_idct4x4llm_neon);
+extern prototype_idct_scalar_add(vp8_dc_only_idct_add_neon);
+extern prototype_second_order(vp8_short_inv_walsh4x4_1_neon);
+extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef  vp8_idct_idct1
+#define vp8_idct_idct1 vp8_short_idct4x4llm_1_neon
+
+#undef  vp8_idct_idct16
+#define vp8_idct_idct16 vp8_short_idct4x4llm_neon
+
+#undef  vp8_idct_idct1_scalar_add
+#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_neon
+
+#undef  vp8_idct_iwalsh1
+#define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_neon
+
+#undef  vp8_idct_iwalsh16
+#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_neon
+#endif
+#endif
+
+#endif
--- a/media/libvpx/vp8/common/arm/loopfilter_arm.c
+++ b/media/libvpx/vp8/common/arm/loopfilter_arm.c
@ -0,0 +1,237 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include <math.h>
+#include "loopfilter.h"
+#include "onyxc_int.h"
+
+extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
+extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
+extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
+extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6);
+extern prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_armv6);
+extern prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_armv6);
+
+extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_y_neon);
+extern prototype_loopfilter(vp8_loop_filter_vertical_edge_y_neon);
+extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_y_neon);
+extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_y_neon);
+extern prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_neon);
+extern prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_neon);
+
+extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_neon;
+extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_neon;
+extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_neon;
+extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_neon;
+
+
+#if HAVE_ARMV6
+/*ARMV6 loopfilter functions*/
+/* Horizontal MB filtering */
+void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                               int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
+{
+    (void) simpler_lpf;
+    vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
+
+    if (u_ptr)
+        vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
+
+    if (v_ptr)
+        vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
+}
+
+void vp8_loop_filter_mbhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                                int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
+{
+    (void) u_ptr;
+    (void) v_ptr;
+    (void) uv_stride;
+    (void) simpler_lpf;
+    vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
+}
+
+/* Vertical MB Filtering */
+void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                               int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
+{
+    (void) simpler_lpf;
+    vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
+
+    if (u_ptr)
+        vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
+
+    if (v_ptr)
+        vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
+}
+
+void vp8_loop_filter_mbvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                                int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
+{
+    (void) u_ptr;
+    (void) v_ptr;
+    (void) uv_stride;
+    (void) simpler_lpf;
+    vp8_loop_filter_simple_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
+}
+
+/* Horizontal B Filtering */
+void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
+{
+    (void) simpler_lpf;
+    vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+    vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+    vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+
+    if (u_ptr)
+        vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
+
+    if (v_ptr)
+        vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
+}
+
+void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                               int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
+{
+    (void) u_ptr;
+    (void) v_ptr;
+    (void) uv_stride;
+    (void) simpler_lpf;
+    vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+    vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+    vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+}
+
+/* Vertical B Filtering */
+void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
+{
+    (void) simpler_lpf;
+    vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+    vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+    vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+
+    if (u_ptr)
+        vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
+
+    if (v_ptr)
+        vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
+}
+
+void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                               int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
+{
+    (void) u_ptr;
+    (void) v_ptr;
+    (void) uv_stride;
+    (void) simpler_lpf;
+    vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+    vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+    vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+}
+#endif
+
+#if HAVE_ARMV7
+/* NEON loopfilter functions */
+/* Horizontal MB filtering */
+void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
+{
+    (void) simpler_lpf;
+    vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
+
+    if (u_ptr)
+        vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
+}
+
+void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                               int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
+{
+    (void) u_ptr;
+    (void) v_ptr;
+    (void) uv_stride;
+    (void) simpler_lpf;
+    vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
+}
+
+/* Vertical MB Filtering */
+void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
+{
+    (void) simpler_lpf;
+    vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
+
+    if (u_ptr)
+        vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
+}
+
+void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                               int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
+{
+    (void) u_ptr;
+    (void) v_ptr;
+    (void) uv_stride;
+    (void) simpler_lpf;
+    vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
+}
+
+/* Horizontal B Filtering */
+void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                             int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
+{
+    (void) simpler_lpf;
+    vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+    vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+    vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+
+    if (u_ptr)
+        vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4 * uv_stride);
+}
+
+void vp8_loop_filter_bhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
+{
+    (void) u_ptr;
+    (void) v_ptr;
+    (void) uv_stride;
+    (void) simpler_lpf;
+    vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+    vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+    vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+}
+
+/* Vertical B Filtering */
+void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                             int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
+{
+    (void) simpler_lpf;
+    vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+    vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+    vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+
+    if (u_ptr)
+        vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4);
+}
+
+void vp8_loop_filter_bvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
+{
+    (void) u_ptr;
+    (void) v_ptr;
+    (void) uv_stride;
+    (void) simpler_lpf;
+    vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+    vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+    vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+}
+#endif
--- a/media/libvpx/vp8/common/arm/loopfilter_arm.h
+++ b/media/libvpx/vp8/common/arm/loopfilter_arm.h
@ -0,0 +1,89 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef LOOPFILTER_ARM_H
+#define LOOPFILTER_ARM_H
+
+#if HAVE_ARMV6
+extern prototype_loopfilter_block(vp8_loop_filter_mbv_armv6);
+extern prototype_loopfilter_block(vp8_loop_filter_bv_armv6);
+extern prototype_loopfilter_block(vp8_loop_filter_mbh_armv6);
+extern prototype_loopfilter_block(vp8_loop_filter_bh_armv6);
+extern prototype_loopfilter_block(vp8_loop_filter_mbvs_armv6);
+extern prototype_loopfilter_block(vp8_loop_filter_bvs_armv6);
+extern prototype_loopfilter_block(vp8_loop_filter_mbhs_armv6);
+extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef  vp8_lf_normal_mb_v
+#define vp8_lf_normal_mb_v vp8_loop_filter_mbv_armv6
+
+#undef  vp8_lf_normal_b_v
+#define vp8_lf_normal_b_v vp8_loop_filter_bv_armv6
+
+#undef  vp8_lf_normal_mb_h
+#define vp8_lf_normal_mb_h vp8_loop_filter_mbh_armv6
+
+#undef  vp8_lf_normal_b_h
+#define vp8_lf_normal_b_h vp8_loop_filter_bh_armv6
+
+#undef  vp8_lf_simple_mb_v
+#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_armv6
+
+#undef  vp8_lf_simple_b_v
+#define vp8_lf_simple_b_v vp8_loop_filter_bvs_armv6
+
+#undef  vp8_lf_simple_mb_h
+#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_armv6
+
+#undef  vp8_lf_simple_b_h
+#define vp8_lf_simple_b_h vp8_loop_filter_bhs_armv6
+#endif
+#endif
+
+#if HAVE_ARMV7
+extern prototype_loopfilter_block(vp8_loop_filter_mbv_neon);
+extern prototype_loopfilter_block(vp8_loop_filter_bv_neon);
+extern prototype_loopfilter_block(vp8_loop_filter_mbh_neon);
+extern prototype_loopfilter_block(vp8_loop_filter_bh_neon);
+extern prototype_loopfilter_block(vp8_loop_filter_mbvs_neon);
+extern prototype_loopfilter_block(vp8_loop_filter_bvs_neon);
+extern prototype_loopfilter_block(vp8_loop_filter_mbhs_neon);
+extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef  vp8_lf_normal_mb_v
+#define vp8_lf_normal_mb_v vp8_loop_filter_mbv_neon
+
+#undef  vp8_lf_normal_b_v
+#define vp8_lf_normal_b_v vp8_loop_filter_bv_neon
+
+#undef  vp8_lf_normal_mb_h
+#define vp8_lf_normal_mb_h vp8_loop_filter_mbh_neon
+
+#undef  vp8_lf_normal_b_h
+#define vp8_lf_normal_b_h vp8_loop_filter_bh_neon
+
+#undef  vp8_lf_simple_mb_v
+#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_neon
+
+#undef  vp8_lf_simple_b_v
+#define vp8_lf_simple_b_v vp8_loop_filter_bvs_neon
+
+#undef  vp8_lf_simple_mb_h
+#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_neon
+
+#undef  vp8_lf_simple_b_h
+#define vp8_lf_simple_b_h vp8_loop_filter_bhs_neon
+#endif
+#endif
+
+#endif
--- a/media/libvpx/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
@ -0,0 +1,362 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_bilinear_predict16x16_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+; r0    unsigned char  *src_ptr,
+; r1    int  src_pixels_per_line,
+; r2    int  xoffset,
+; r3    int  yoffset,
+; r4    unsigned char *dst_ptr,
+; stack(r5) int  dst_pitch
+
+|vp8_bilinear_predict16x16_neon| PROC
+    push            {r4-r5, lr}
+
+    ldr             r12, _bifilter16_coeff_
+    ldr             r4, [sp, #12]           ;load parameters from stack
+    ldr             r5, [sp, #16]           ;load parameters from stack
+
+    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
+    beq             secondpass_bfilter16x16_only
+
+    add             r2, r12, r2, lsl #3     ;calculate filter location
+
+    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
+
+    vld1.s32        {d31}, [r2]             ;load first_pass filter
+
+    beq             firstpass_bfilter16x16_only
+
+    sub             sp, sp, #272            ;reserve space on stack for temporary storage
+    vld1.u8         {d2, d3, d4}, [r0], r1      ;load src data
+    mov             lr, sp
+    vld1.u8         {d5, d6, d7}, [r0], r1
+
+    mov             r2, #3                  ;loop counter
+    vld1.u8         {d8, d9, d10}, [r0], r1
+
+    vdup.8          d0, d31[0]              ;first_pass filter (d0 d1)
+    vld1.u8         {d11, d12, d13}, [r0], r1
+
+    vdup.8          d1, d31[4]
+
+;First Pass: output_height lines x output_width columns (17x16)
+filt_blk2d_fp16x16_loop_neon
+    pld             [r0]
+    pld             [r0, r1]
+    pld             [r0, r1, lsl #1]
+
+    vmull.u8        q7, d2, d0              ;(src_ptr[0] * vp8_filter[0])
+    vmull.u8        q8, d3, d0
+    vmull.u8        q9, d5, d0
+    vmull.u8        q10, d6, d0
+    vmull.u8        q11, d8, d0
+    vmull.u8        q12, d9, d0
+    vmull.u8        q13, d11, d0
+    vmull.u8        q14, d12, d0
+
+    vext.8          d2, d2, d3, #1          ;construct src_ptr[1]
+    vext.8          d5, d5, d6, #1
+    vext.8          d8, d8, d9, #1
+    vext.8          d11, d11, d12, #1
+
+    vmlal.u8        q7, d2, d1              ;(src_ptr[0] * vp8_filter[1])
+    vmlal.u8        q9, d5, d1
+    vmlal.u8        q11, d8, d1
+    vmlal.u8        q13, d11, d1
+
+    vext.8          d3, d3, d4, #1
+    vext.8          d6, d6, d7, #1
+    vext.8          d9, d9, d10, #1
+    vext.8          d12, d12, d13, #1
+
+    vmlal.u8        q8, d3, d1              ;(src_ptr[0] * vp8_filter[1])
+    vmlal.u8        q10, d6, d1
+    vmlal.u8        q12, d9, d1
+    vmlal.u8        q14, d12, d1
+
+    subs            r2, r2, #1
+
+    vqrshrn.u16    d14, q7, #7              ;shift/round/saturate to u8
+    vqrshrn.u16    d15, q8, #7
+    vqrshrn.u16    d16, q9, #7
+    vqrshrn.u16    d17, q10, #7
+    vqrshrn.u16    d18, q11, #7
+    vqrshrn.u16    d19, q12, #7
+    vqrshrn.u16    d20, q13, #7
+
+    vld1.u8         {d2, d3, d4}, [r0], r1      ;load src data
+    vqrshrn.u16    d21, q14, #7
+    vld1.u8         {d5, d6, d7}, [r0], r1
+
+    vst1.u8         {d14, d15, d16, d17}, [lr]!     ;store result
+    vld1.u8         {d8, d9, d10}, [r0], r1
+    vst1.u8         {d18, d19, d20, d21}, [lr]!
+    vld1.u8         {d11, d12, d13}, [r0], r1
+
+    bne             filt_blk2d_fp16x16_loop_neon
+
+;First-pass filtering for rest 5 lines
+    vld1.u8         {d14, d15, d16}, [r0], r1
+
+    vmull.u8        q9, d2, d0              ;(src_ptr[0] * vp8_filter[0])
+    vmull.u8        q10, d3, d0
+    vmull.u8        q11, d5, d0
+    vmull.u8        q12, d6, d0
+    vmull.u8        q13, d8, d0
+    vmull.u8        q14, d9, d0
+
+    vext.8          d2, d2, d3, #1          ;construct src_ptr[1]
+    vext.8          d5, d5, d6, #1
+    vext.8          d8, d8, d9, #1
+
+    vmlal.u8        q9, d2, d1              ;(src_ptr[0] * vp8_filter[1])
+    vmlal.u8        q11, d5, d1
+    vmlal.u8        q13, d8, d1
+
+    vext.8          d3, d3, d4, #1
+    vext.8          d6, d6, d7, #1
+    vext.8          d9, d9, d10, #1
+
+    vmlal.u8        q10, d3, d1             ;(src_ptr[0] * vp8_filter[1])
+    vmlal.u8        q12, d6, d1
+    vmlal.u8        q14, d9, d1
+
+    vmull.u8        q1, d11, d0
+    vmull.u8        q2, d12, d0
+    vmull.u8        q3, d14, d0
+    vmull.u8        q4, d15, d0
+
+    vext.8          d11, d11, d12, #1       ;construct src_ptr[1]
+    vext.8          d14, d14, d15, #1
+
+    vmlal.u8        q1, d11, d1             ;(src_ptr[0] * vp8_filter[1])
+    vmlal.u8        q3, d14, d1
+
+    vext.8          d12, d12, d13, #1
+    vext.8          d15, d15, d16, #1
+
+    vmlal.u8        q2, d12, d1             ;(src_ptr[0] * vp8_filter[1])
+    vmlal.u8        q4, d15, d1
+
+    vqrshrn.u16    d10, q9, #7              ;shift/round/saturate to u8
+    vqrshrn.u16    d11, q10, #7
+    vqrshrn.u16    d12, q11, #7
+    vqrshrn.u16    d13, q12, #7
+    vqrshrn.u16    d14, q13, #7
+    vqrshrn.u16    d15, q14, #7
+    vqrshrn.u16    d16, q1, #7
+    vqrshrn.u16    d17, q2, #7
+    vqrshrn.u16    d18, q3, #7
+    vqrshrn.u16    d19, q4, #7
+
+    vst1.u8         {d10, d11, d12, d13}, [lr]!         ;store result
+    vst1.u8         {d14, d15, d16, d17}, [lr]!
+    vst1.u8         {d18, d19}, [lr]!
+
+;Second pass: 16x16
+;secondpass_filter
+    add             r3, r12, r3, lsl #3
+    sub             lr, lr, #272
+
+    vld1.u32        {d31}, [r3]             ;load second_pass filter
+
+    vld1.u8         {d22, d23}, [lr]!       ;load src data
+
+    vdup.8          d0, d31[0]              ;second_pass filter parameters (d0 d1)
+    vdup.8          d1, d31[4]
+    mov             r12, #4                 ;loop counter
+
+filt_blk2d_sp16x16_loop_neon
+    vld1.u8         {d24, d25}, [lr]!
+    vmull.u8        q1, d22, d0             ;(src_ptr[0] * vp8_filter[0])
+    vld1.u8         {d26, d27}, [lr]!
+    vmull.u8        q2, d23, d0
+    vld1.u8         {d28, d29}, [lr]!
+    vmull.u8        q3, d24, d0
+    vld1.u8         {d30, d31}, [lr]!
+
+    vmull.u8        q4, d25, d0
+    vmull.u8        q5, d26, d0
+    vmull.u8        q6, d27, d0
+    vmull.u8        q7, d28, d0
+    vmull.u8        q8, d29, d0
+
+    vmlal.u8        q1, d24, d1             ;(src_ptr[pixel_step] * vp8_filter[1])
+    vmlal.u8        q2, d25, d1
+    vmlal.u8        q3, d26, d1
+    vmlal.u8        q4, d27, d1
+    vmlal.u8        q5, d28, d1
+    vmlal.u8        q6, d29, d1
+    vmlal.u8        q7, d30, d1
+    vmlal.u8        q8, d31, d1
+
+    subs            r12, r12, #1
+
+    vqrshrn.u16    d2, q1, #7               ;shift/round/saturate to u8
+    vqrshrn.u16    d3, q2, #7
+    vqrshrn.u16    d4, q3, #7
+    vqrshrn.u16    d5, q4, #7
+    vqrshrn.u16    d6, q5, #7
+    vqrshrn.u16    d7, q6, #7
+    vqrshrn.u16    d8, q7, #7
+    vqrshrn.u16    d9, q8, #7
+
+    vst1.u8         {d2, d3}, [r4], r5      ;store result
+    vst1.u8         {d4, d5}, [r4], r5
+    vst1.u8         {d6, d7}, [r4], r5
+    vmov            q11, q15
+    vst1.u8         {d8, d9}, [r4], r5
+
+    bne             filt_blk2d_sp16x16_loop_neon
+
+    add             sp, sp, #272
+
+    pop             {r4-r5,pc}
+
+;--------------------
+firstpass_bfilter16x16_only
+    mov             r2, #4                      ;loop counter
+    vdup.8          d0, d31[0]                  ;first_pass filter (d0 d1)
+    vdup.8          d1, d31[4]
+
+;First Pass: output_height lines x output_width columns (16x16)
+filt_blk2d_fpo16x16_loop_neon
+    vld1.u8         {d2, d3, d4}, [r0], r1      ;load src data
+    vld1.u8         {d5, d6, d7}, [r0], r1
+    vld1.u8         {d8, d9, d10}, [r0], r1
+    vld1.u8         {d11, d12, d13}, [r0], r1
+
+    pld             [r0]
+    pld             [r0, r1]
+    pld             [r0, r1, lsl #1]
+
+    vmull.u8        q7, d2, d0              ;(src_ptr[0] * vp8_filter[0])
+    vmull.u8        q8, d3, d0
+    vmull.u8        q9, d5, d0
+    vmull.u8        q10, d6, d0
+    vmull.u8        q11, d8, d0
+    vmull.u8        q12, d9, d0
+    vmull.u8        q13, d11, d0
+    vmull.u8        q14, d12, d0
+
+    vext.8          d2, d2, d3, #1          ;construct src_ptr[1]
+    vext.8          d5, d5, d6, #1
+    vext.8          d8, d8, d9, #1
+    vext.8          d11, d11, d12, #1
+
+    vmlal.u8        q7, d2, d1              ;(src_ptr[0] * vp8_filter[1])
+    vmlal.u8        q9, d5, d1
+    vmlal.u8        q11, d8, d1
+    vmlal.u8        q13, d11, d1
+
+    vext.8          d3, d3, d4, #1
+    vext.8          d6, d6, d7, #1
+    vext.8          d9, d9, d10, #1
+    vext.8          d12, d12, d13, #1
+
+    vmlal.u8        q8, d3, d1              ;(src_ptr[0] * vp8_filter[1])
+    vmlal.u8        q10, d6, d1
+    vmlal.u8        q12, d9, d1
+    vmlal.u8        q14, d12, d1
+
+    subs            r2, r2, #1
+
+    vqrshrn.u16    d14, q7, #7              ;shift/round/saturate to u8
+    vqrshrn.u16    d15, q8, #7
+    vqrshrn.u16    d16, q9, #7
+    vqrshrn.u16    d17, q10, #7
+    vqrshrn.u16    d18, q11, #7
+    vqrshrn.u16    d19, q12, #7
+    vqrshrn.u16    d20, q13, #7
+    vst1.u8         {d14, d15}, [r4], r5        ;store result
+    vqrshrn.u16    d21, q14, #7
+
+    vst1.u8         {d16, d17}, [r4], r5
+    vst1.u8         {d18, d19}, [r4], r5
+    vst1.u8         {d20, d21}, [r4], r5
+
+    bne             filt_blk2d_fpo16x16_loop_neon
+    pop             {r4-r5,pc}
+
+;---------------------
+secondpass_bfilter16x16_only
+;Second pass: 16x16
+;secondpass_filter
+    add             r3, r12, r3, lsl #3
+    mov             r12, #4                     ;loop counter
+    vld1.u32        {d31}, [r3]                 ;load second_pass filter
+    vld1.u8         {d22, d23}, [r0], r1        ;load src data
+
+    vdup.8          d0, d31[0]                  ;second_pass filter parameters (d0 d1)
+    vdup.8          d1, d31[4]
+
+filt_blk2d_spo16x16_loop_neon
+    vld1.u8         {d24, d25}, [r0], r1
+    vmull.u8        q1, d22, d0             ;(src_ptr[0] * vp8_filter[0])
+    vld1.u8         {d26, d27}, [r0], r1
+    vmull.u8        q2, d23, d0
+    vld1.u8         {d28, d29}, [r0], r1
+    vmull.u8        q3, d24, d0
+    vld1.u8         {d30, d31}, [r0], r1
+
+    vmull.u8        q4, d25, d0
+    vmull.u8        q5, d26, d0
+    vmull.u8        q6, d27, d0
+    vmull.u8        q7, d28, d0
+    vmull.u8        q8, d29, d0
+
+    vmlal.u8        q1, d24, d1             ;(src_ptr[pixel_step] * vp8_filter[1])
+    vmlal.u8        q2, d25, d1
+    vmlal.u8        q3, d26, d1
+    vmlal.u8        q4, d27, d1
+    vmlal.u8        q5, d28, d1
+    vmlal.u8        q6, d29, d1
+    vmlal.u8        q7, d30, d1
+    vmlal.u8        q8, d31, d1
+
+    vqrshrn.u16    d2, q1, #7               ;shift/round/saturate to u8
+    vqrshrn.u16    d3, q2, #7
+    vqrshrn.u16    d4, q3, #7
+    vqrshrn.u16    d5, q4, #7
+    vqrshrn.u16    d6, q5, #7
+    vqrshrn.u16    d7, q6, #7
+    vqrshrn.u16    d8, q7, #7
+    vqrshrn.u16    d9, q8, #7
+
+    vst1.u8         {d2, d3}, [r4], r5      ;store result
+    subs            r12, r12, #1
+    vst1.u8         {d4, d5}, [r4], r5
+    vmov            q11, q15
+    vst1.u8         {d6, d7}, [r4], r5
+    vst1.u8         {d8, d9}, [r4], r5
+
+    bne             filt_blk2d_spo16x16_loop_neon
+    pop             {r4-r5,pc}
+
+    ENDP
+
+;-----------------
+    AREA    bifilters16_dat, DATA, READWRITE            ;read/write by default
+;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
+;One word each is reserved. Label filter_coeff can be used to access the data.
+;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
+_bifilter16_coeff_
+    DCD     bifilter16_coeff
+bifilter16_coeff
+    DCD     128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
+
+    END
--- a/media/libvpx/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
@ -0,0 +1,135 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_bilinear_predict4x4_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+; r0    unsigned char  *src_ptr,
+; r1    int  src_pixels_per_line,
+; r2    int  xoffset,
+; r3    int  yoffset,
+; r4    unsigned char *dst_ptr,
+; stack(lr) int  dst_pitch
+
+|vp8_bilinear_predict4x4_neon| PROC
+    push            {r4, lr}
+
+    ldr             r12, _bifilter4_coeff_
+    ldr             r4, [sp, #8]            ;load parameters from stack
+    ldr             lr, [sp, #12]           ;load parameters from stack
+
+    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
+    beq             skip_firstpass_filter
+
+;First pass: output_height lines x output_width columns (5x4)
+    vld1.u8         {d2}, [r0], r1          ;load src data
+    add             r2, r12, r2, lsl #3     ;calculate Hfilter location (2coeffsx4bytes=8bytes)
+
+    vld1.u8         {d3}, [r0], r1
+    vld1.u32        {d31}, [r2]             ;first_pass filter
+
+    vld1.u8         {d4}, [r0], r1
+    vdup.8          d0, d31[0]              ;first_pass filter (d0-d1)
+    vld1.u8         {d5}, [r0], r1
+    vdup.8          d1, d31[4]
+    vld1.u8         {d6}, [r0], r1
+
+    vshr.u64        q4, q1, #8              ;construct src_ptr[1]
+    vshr.u64        q5, q2, #8
+    vshr.u64        d12, d6, #8
+
+    vzip.32         d2, d3                  ;put 2-line data in 1 register (src_ptr[0])
+    vzip.32         d4, d5
+    vzip.32         d8, d9                  ;put 2-line data in 1 register (src_ptr[1])
+    vzip.32         d10, d11
+
+    vmull.u8        q7, d2, d0              ;(src_ptr[0] * vp8_filter[0])
+    vmull.u8        q8, d4, d0
+    vmull.u8        q9, d6, d0
+
+    vmlal.u8        q7, d8, d1              ;(src_ptr[1] * vp8_filter[1])
+    vmlal.u8        q8, d10, d1
+    vmlal.u8        q9, d12, d1
+
+    vqrshrn.u16    d28, q7, #7              ;shift/round/saturate to u8
+    vqrshrn.u16    d29, q8, #7
+    vqrshrn.u16    d30, q9, #7
+
+;Second pass: 4x4
+secondpass_filter
+    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
+    beq             skip_secondpass_filter
+
+    add             r3, r12, r3, lsl #3 ;calculate Vfilter location
+    vld1.u32        {d31}, [r3]         ;load second_pass filter
+
+    vdup.8          d0, d31[0]              ;second_pass filter parameters (d0-d5)
+    vdup.8          d1, d31[4]
+
+    vmull.u8        q1, d28, d0
+    vmull.u8        q2, d29, d0
+
+    vext.8          d26, d28, d29, #4       ;construct src_ptr[pixel_step]
+    vext.8          d27, d29, d30, #4
+
+    vmlal.u8        q1, d26, d1
+    vmlal.u8        q2, d27, d1
+
+    add             r0, r4, lr
+    add             r1, r0, lr
+    add             r2, r1, lr
+
+    vqrshrn.u16    d2, q1, #7               ;shift/round/saturate to u8
+    vqrshrn.u16    d3, q2, #7
+
+    vst1.32         {d2[0]}, [r4]           ;store result
+    vst1.32         {d2[1]}, [r0]
+    vst1.32         {d3[0]}, [r1]
+    vst1.32         {d3[1]}, [r2]
+
+    pop             {r4, pc}
+
+;--------------------
+skip_firstpass_filter
+
+    vld1.32         {d28[0]}, [r0], r1      ;load src data
+    vld1.32         {d28[1]}, [r0], r1
+    vld1.32         {d29[0]}, [r0], r1
+    vld1.32         {d29[1]}, [r0], r1
+    vld1.32         {d30[0]}, [r0], r1
+
+    b               secondpass_filter
+
+;---------------------
+skip_secondpass_filter
+    vst1.32         {d28[0]}, [r4], lr      ;store result
+    vst1.32         {d28[1]}, [r4], lr
+    vst1.32         {d29[0]}, [r4], lr
+    vst1.32         {d29[1]}, [r4], lr
+
+    pop             {r4, pc}
+
+    ENDP
+
+;-----------------
+    AREA    bilinearfilters4_dat, DATA, READWRITE           ;read/write by default
+;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
+;One word each is reserved. Label filter_coeff can be used to access the data.
+;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
+_bifilter4_coeff_
+    DCD     bifilter4_coeff
+bifilter4_coeff
+    DCD     128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
+
+    END
--- a/media/libvpx/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
@ -0,0 +1,140 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_bilinear_predict8x4_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+; r0    unsigned char  *src_ptr,
+; r1    int  src_pixels_per_line,
+; r2    int  xoffset,
+; r3    int  yoffset,
+; r4    unsigned char *dst_ptr,
+; stack(lr) int  dst_pitch
+
+|vp8_bilinear_predict8x4_neon| PROC
+    push            {r4, lr}
+
+    ldr             r12, _bifilter8x4_coeff_
+    ldr             r4, [sp, #8]            ;load parameters from stack
+    ldr             lr, [sp, #12]           ;load parameters from stack
+
+    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
+    beq             skip_firstpass_filter
+
+;First pass: output_height lines x output_width columns (5x8)
+    add             r2, r12, r2, lsl #3     ;calculate filter location
+
+    vld1.u8         {q1}, [r0], r1          ;load src data
+    vld1.u32        {d31}, [r2]             ;load first_pass filter
+    vld1.u8         {q2}, [r0], r1
+    vdup.8          d0, d31[0]              ;first_pass filter (d0 d1)
+    vld1.u8         {q3}, [r0], r1
+    vdup.8          d1, d31[4]
+    vld1.u8         {q4}, [r0], r1
+
+    vmull.u8        q6, d2, d0              ;(src_ptr[0] * vp8_filter[0])
+    vld1.u8         {q5}, [r0], r1
+    vmull.u8        q7, d4, d0
+    vmull.u8        q8, d6, d0
+    vmull.u8        q9, d8, d0
+    vmull.u8        q10, d10, d0
+
+    vext.8          d3, d2, d3, #1          ;construct src_ptr[-1]
+    vext.8          d5, d4, d5, #1
+    vext.8          d7, d6, d7, #1
+    vext.8          d9, d8, d9, #1
+    vext.8          d11, d10, d11, #1
+
+    vmlal.u8        q6, d3, d1              ;(src_ptr[1] * vp8_filter[1])
+    vmlal.u8        q7, d5, d1
+    vmlal.u8        q8, d7, d1
+    vmlal.u8        q9, d9, d1
+    vmlal.u8        q10, d11, d1
+
+    vqrshrn.u16    d22, q6, #7              ;shift/round/saturate to u8
+    vqrshrn.u16    d23, q7, #7
+    vqrshrn.u16    d24, q8, #7
+    vqrshrn.u16    d25, q9, #7
+    vqrshrn.u16    d26, q10, #7
+
+;Second pass: 4x8
+secondpass_filter
+    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
+    beq             skip_secondpass_filter
+
+    add             r3, r12, r3, lsl #3
+    add             r0, r4, lr
+
+    vld1.u32        {d31}, [r3]             ;load second_pass filter
+    add             r1, r0, lr
+
+    vdup.8          d0, d31[0]              ;second_pass filter parameters (d0 d1)
+    vdup.8          d1, d31[4]
+
+    vmull.u8        q1, d22, d0             ;(src_ptr[0] * vp8_filter[0])
+    vmull.u8        q2, d23, d0
+    vmull.u8        q3, d24, d0
+    vmull.u8        q4, d25, d0
+
+    vmlal.u8        q1, d23, d1             ;(src_ptr[pixel_step] * vp8_filter[1])
+    vmlal.u8        q2, d24, d1
+    vmlal.u8        q3, d25, d1
+    vmlal.u8        q4, d26, d1
+
+    add             r2, r1, lr
+
+    vqrshrn.u16    d2, q1, #7               ;shift/round/saturate to u8
+    vqrshrn.u16    d3, q2, #7
+    vqrshrn.u16    d4, q3, #7
+    vqrshrn.u16    d5, q4, #7
+
+    vst1.u8         {d2}, [r4]              ;store result
+    vst1.u8         {d3}, [r0]
+    vst1.u8         {d4}, [r1]
+    vst1.u8         {d5}, [r2]
+
+    pop             {r4, pc}
+
+;--------------------
+skip_firstpass_filter
+    vld1.u8         {d22}, [r0], r1         ;load src data
+    vld1.u8         {d23}, [r0], r1
+    vld1.u8         {d24}, [r0], r1
+    vld1.u8         {d25}, [r0], r1
+    vld1.u8         {d26}, [r0], r1
+
+    b               secondpass_filter
+
+;---------------------
+skip_secondpass_filter
+    vst1.u8         {d22}, [r4], lr         ;store result
+    vst1.u8         {d23}, [r4], lr
+    vst1.u8         {d24}, [r4], lr
+    vst1.u8         {d25}, [r4], lr
+
+    pop             {r4, pc}
+
+    ENDP
+
+;-----------------
+    AREA    bifilters8x4_dat, DATA, READWRITE           ;read/write by default
+;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
+;One word each is reserved. Label filter_coeff can be used to access the data.
+;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
+_bifilter8x4_coeff_
+    DCD     bifilter8x4_coeff
+bifilter8x4_coeff
+    DCD     128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
+
+    END
--- a/media/libvpx/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
@ -0,0 +1,188 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_bilinear_predict8x8_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+; r0    unsigned char  *src_ptr,
+; r1    int  src_pixels_per_line,
+; r2    int  xoffset,
+; r3    int  yoffset,
+; r4    unsigned char *dst_ptr,
+; stack(lr) int  dst_pitch
+
+|vp8_bilinear_predict8x8_neon| PROC
+    push            {r4, lr}
+
+    ldr             r12, _bifilter8_coeff_
+    ldr             r4, [sp, #8]            ;load parameters from stack
+    ldr             lr, [sp, #12]           ;load parameters from stack
+
+    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
+    beq             skip_firstpass_filter
+
+;First pass: output_height lines x output_width columns (9x8)
+    add             r2, r12, r2, lsl #3     ;calculate filter location
+
+    vld1.u8         {q1}, [r0], r1          ;load src data
+    vld1.u32        {d31}, [r2]             ;load first_pass filter
+    vld1.u8         {q2}, [r0], r1
+    vdup.8          d0, d31[0]              ;first_pass filter (d0 d1)
+    vld1.u8         {q3}, [r0], r1
+    vdup.8          d1, d31[4]
+    vld1.u8         {q4}, [r0], r1
+
+    vmull.u8        q6, d2, d0              ;(src_ptr[0] * vp8_filter[0])
+    vmull.u8        q7, d4, d0
+    vmull.u8        q8, d6, d0
+    vmull.u8        q9, d8, d0
+
+    vext.8          d3, d2, d3, #1          ;construct src_ptr[-1]
+    vext.8          d5, d4, d5, #1
+    vext.8          d7, d6, d7, #1
+    vext.8          d9, d8, d9, #1
+
+    vmlal.u8        q6, d3, d1              ;(src_ptr[1] * vp8_filter[1])
+    vmlal.u8        q7, d5, d1
+    vmlal.u8        q8, d7, d1
+    vmlal.u8        q9, d9, d1
+
+    vld1.u8         {q1}, [r0], r1          ;load src data
+    vqrshrn.u16    d22, q6, #7              ;shift/round/saturate to u8
+    vld1.u8         {q2}, [r0], r1
+    vqrshrn.u16    d23, q7, #7
+    vld1.u8         {q3}, [r0], r1
+    vqrshrn.u16    d24, q8, #7
+    vld1.u8         {q4}, [r0], r1
+    vqrshrn.u16    d25, q9, #7
+
+    ;first_pass filtering on the rest 5-line data
+    vld1.u8         {q5}, [r0], r1
+
+    vmull.u8        q6, d2, d0              ;(src_ptr[0] * vp8_filter[0])
+    vmull.u8        q7, d4, d0
+    vmull.u8        q8, d6, d0
+    vmull.u8        q9, d8, d0
+    vmull.u8        q10, d10, d0
+
+    vext.8          d3, d2, d3, #1          ;construct src_ptr[-1]
+    vext.8          d5, d4, d5, #1
+    vext.8          d7, d6, d7, #1
+    vext.8          d9, d8, d9, #1
+    vext.8          d11, d10, d11, #1
+
+    vmlal.u8        q6, d3, d1              ;(src_ptr[1] * vp8_filter[1])
+    vmlal.u8        q7, d5, d1
+    vmlal.u8        q8, d7, d1
+    vmlal.u8        q9, d9, d1
+    vmlal.u8        q10, d11, d1
+
+    vqrshrn.u16    d26, q6, #7              ;shift/round/saturate to u8
+    vqrshrn.u16    d27, q7, #7
+    vqrshrn.u16    d28, q8, #7
+    vqrshrn.u16    d29, q9, #7
+    vqrshrn.u16    d30, q10, #7
+
+;Second pass: 8x8
+secondpass_filter
+    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
+    beq             skip_secondpass_filter
+
+    add             r3, r12, r3, lsl #3
+    add             r0, r4, lr
+
+    vld1.u32        {d31}, [r3]             ;load second_pass filter
+    add             r1, r0, lr
+
+    vdup.8          d0, d31[0]              ;second_pass filter parameters (d0 d1)
+    vdup.8          d1, d31[4]
+
+    vmull.u8        q1, d22, d0             ;(src_ptr[0] * vp8_filter[0])
+    vmull.u8        q2, d23, d0
+    vmull.u8        q3, d24, d0
+    vmull.u8        q4, d25, d0
+    vmull.u8        q5, d26, d0
+    vmull.u8        q6, d27, d0
+    vmull.u8        q7, d28, d0
+    vmull.u8        q8, d29, d0
+
+    vmlal.u8        q1, d23, d1             ;(src_ptr[pixel_step] * vp8_filter[1])
+    vmlal.u8        q2, d24, d1
+    vmlal.u8        q3, d25, d1
+    vmlal.u8        q4, d26, d1
+    vmlal.u8        q5, d27, d1
+    vmlal.u8        q6, d28, d1
+    vmlal.u8        q7, d29, d1
+    vmlal.u8        q8, d30, d1
+
+    vqrshrn.u16    d2, q1, #7               ;shift/round/saturate to u8
+    vqrshrn.u16    d3, q2, #7
+    vqrshrn.u16    d4, q3, #7
+    vqrshrn.u16    d5, q4, #7
+    vqrshrn.u16    d6, q5, #7
+    vqrshrn.u16    d7, q6, #7
+    vqrshrn.u16    d8, q7, #7
+    vqrshrn.u16    d9, q8, #7
+
+    vst1.u8         {d2}, [r4]              ;store result
+    vst1.u8         {d3}, [r0]
+    vst1.u8         {d4}, [r1], lr
+    vst1.u8         {d5}, [r1], lr
+    vst1.u8         {d6}, [r1], lr
+    vst1.u8         {d7}, [r1], lr
+    vst1.u8         {d8}, [r1], lr
+    vst1.u8         {d9}, [r1], lr
+
+    pop             {r4, pc}
+
+;--------------------
+skip_firstpass_filter
+    vld1.u8         {d22}, [r0], r1         ;load src data
+    vld1.u8         {d23}, [r0], r1
+    vld1.u8         {d24}, [r0], r1
+    vld1.u8         {d25}, [r0], r1
+    vld1.u8         {d26}, [r0], r1
+    vld1.u8         {d27}, [r0], r1
+    vld1.u8         {d28}, [r0], r1
+    vld1.u8         {d29}, [r0], r1
+    vld1.u8         {d30}, [r0], r1
+
+    b               secondpass_filter
+
+;---------------------
+skip_secondpass_filter
+    vst1.u8         {d22}, [r4], lr         ;store result
+    vst1.u8         {d23}, [r4], lr
+    vst1.u8         {d24}, [r4], lr
+    vst1.u8         {d25}, [r4], lr
+    vst1.u8         {d26}, [r4], lr
+    vst1.u8         {d27}, [r4], lr
+    vst1.u8         {d28}, [r4], lr
+    vst1.u8         {d29}, [r4], lr
+
+    pop             {r4, pc}
+
+    ENDP
+
+;-----------------
+    AREA    bifilters8_dat, DATA, READWRITE         ;read/write by default
+;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
+;One word each is reserved. Label filter_coeff can be used to access the data.
+;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
+_bifilter8_coeff_
+    DCD     bifilter8_coeff
+bifilter8_coeff
+    DCD     128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
+
+    END
--- a/media/libvpx/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
@ -0,0 +1,584 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_build_intra_predictors_mby_neon_func|
+    EXPORT  |vp8_build_intra_predictors_mby_s_neon_func|
+
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+; r0    unsigned char *y_buffer
+; r1    unsigned char *ypred_ptr
+; r2    int y_stride
+; r3    int mode
+; stack int Up
+; stack int Left
+
+|vp8_build_intra_predictors_mby_neon_func| PROC
+    push            {r4-r8, lr}
+
+    cmp             r3, #0
+    beq             case_dc_pred
+    cmp             r3, #1
+    beq             case_v_pred
+    cmp             r3, #2
+    beq             case_h_pred
+    cmp             r3, #3
+    beq             case_tm_pred
+
+case_dc_pred
+    ldr             r4, [sp, #24]       ; Up
+    ldr             r5, [sp, #28]       ; Left
+
+    ; Default the DC average to 128
+    mov             r12, #128
+    vdup.u8         q0, r12
+
+    ; Zero out running sum
+    mov             r12, #0
+
+    ; compute shift and jump
+    adds            r7, r4, r5
+    beq             skip_dc_pred_up_left
+
+    ; Load above row, if it exists
+    cmp             r4, #0
+    beq             skip_dc_pred_up
+
+    sub             r6, r0, r2
+    vld1.8          {q1}, [r6]
+    vpaddl.u8       q2, q1
+    vpaddl.u16      q3, q2
+    vpaddl.u32      q4, q3
+
+    vmov.32         r4, d8[0]
+    vmov.32         r6, d9[0]
+
+    add             r12, r4, r6
+
+    ; Move back to interger registers
+
+skip_dc_pred_up
+
+    cmp             r5, #0
+    beq             skip_dc_pred_left
+
+    sub             r0, r0, #1
+
+    ; Load left row, if it exists
+    ldrb            r3, [r0], r2
+    ldrb            r4, [r0], r2
+    ldrb            r5, [r0], r2
+    ldrb            r6, [r0], r2
+
+    add             r12, r12, r3
+    add             r12, r12, r4
+    add             r12, r12, r5
+    add             r12, r12, r6
+
+    ldrb            r3, [r0], r2
+    ldrb            r4, [r0], r2
+    ldrb            r5, [r0], r2
+    ldrb            r6, [r0], r2
+
+    add             r12, r12, r3
+    add             r12, r12, r4
+    add             r12, r12, r5
+    add             r12, r12, r6
+
+    ldrb            r3, [r0], r2
+    ldrb            r4, [r0], r2
+    ldrb            r5, [r0], r2
+    ldrb            r6, [r0], r2
+
+    add             r12, r12, r3
+    add             r12, r12, r4
+    add             r12, r12, r5
+    add             r12, r12, r6
+
+    ldrb            r3, [r0], r2
+    ldrb            r4, [r0], r2
+    ldrb            r5, [r0], r2
+    ldrb            r6, [r0]
+
+    add             r12, r12, r3
+    add             r12, r12, r4
+    add             r12, r12, r5
+    add             r12, r12, r6
+
+skip_dc_pred_left
+    add             r7, r7, #3          ; Shift
+    sub             r4, r7, #1
+    mov             r5, #1
+    add             r12, r12, r5, lsl r4
+    mov             r5, r12, lsr r7     ; expected_dc
+
+    vdup.u8         q0, r5
+
+skip_dc_pred_up_left
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+
+    pop             {r4-r8,pc}
+case_v_pred
+    ; Copy down above row
+    sub             r6, r0, r2
+    vld1.8          {q0}, [r6]
+
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q0}, [r1]!
+    pop             {r4-r8,pc}
+
+case_h_pred
+    ; Load 4x yleft_col
+    sub             r0, r0, #1
+
+    ldrb            r3, [r0], r2
+    ldrb            r4, [r0], r2
+    ldrb            r5, [r0], r2
+    ldrb            r6, [r0], r2
+    vdup.u8         q0, r3
+    vdup.u8         q1, r4
+    vdup.u8         q2, r5
+    vdup.u8         q3, r6
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q1}, [r1]!
+    vst1.u8         {q2}, [r1]!
+    vst1.u8         {q3}, [r1]!
+
+    ldrb            r3, [r0], r2
+    ldrb            r4, [r0], r2
+    ldrb            r5, [r0], r2
+    ldrb            r6, [r0], r2
+    vdup.u8         q0, r3
+    vdup.u8         q1, r4
+    vdup.u8         q2, r5
+    vdup.u8         q3, r6
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q1}, [r1]!
+    vst1.u8         {q2}, [r1]!
+    vst1.u8         {q3}, [r1]!
+
+
+    ldrb            r3, [r0], r2
+    ldrb            r4, [r0], r2
+    ldrb            r5, [r0], r2
+    ldrb            r6, [r0], r2
+    vdup.u8         q0, r3
+    vdup.u8         q1, r4
+    vdup.u8         q2, r5
+    vdup.u8         q3, r6
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q1}, [r1]!
+    vst1.u8         {q2}, [r1]!
+    vst1.u8         {q3}, [r1]!
+
+    ldrb            r3, [r0], r2
+    ldrb            r4, [r0], r2
+    ldrb            r5, [r0], r2
+    ldrb            r6, [r0], r2
+    vdup.u8         q0, r3
+    vdup.u8         q1, r4
+    vdup.u8         q2, r5
+    vdup.u8         q3, r6
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q1}, [r1]!
+    vst1.u8         {q2}, [r1]!
+    vst1.u8         {q3}, [r1]!
+
+    pop             {r4-r8,pc}
+
+case_tm_pred
+    ; Load yabove_row
+    sub             r3, r0, r2
+    vld1.8          {q8}, [r3]
+
+    ; Load ytop_left
+    sub             r3, r3, #1
+    ldrb            r7, [r3]
+
+    vdup.u16        q7, r7
+
+    ; Compute yabove_row - ytop_left
+    mov             r3, #1
+    vdup.u8         q0, r3
+
+    vmull.u8        q4, d16, d0
+    vmull.u8        q5, d17, d0
+
+    vsub.s16        q4, q4, q7
+    vsub.s16        q5, q5, q7
+
+    ; Load 4x yleft_col
+    sub             r0, r0, #1
+    mov             r12, #4
+
+case_tm_pred_loop
+    ldrb            r3, [r0], r2
+    ldrb            r4, [r0], r2
+    ldrb            r5, [r0], r2
+    ldrb            r6, [r0], r2
+    vdup.u16        q0, r3
+    vdup.u16        q1, r4
+    vdup.u16        q2, r5
+    vdup.u16        q3, r6
+
+    vqadd.s16       q8, q0, q4
+    vqadd.s16       q9, q0, q5
+
+    vqadd.s16       q10, q1, q4
+    vqadd.s16       q11, q1, q5
+
+    vqadd.s16       q12, q2, q4
+    vqadd.s16       q13, q2, q5
+
+    vqadd.s16       q14, q3, q4
+    vqadd.s16       q15, q3, q5
+
+    vqshrun.s16     d0, q8, #0
+    vqshrun.s16     d1, q9, #0
+
+    vqshrun.s16     d2, q10, #0
+    vqshrun.s16     d3, q11, #0
+
+    vqshrun.s16     d4, q12, #0
+    vqshrun.s16     d5, q13, #0
+
+    vqshrun.s16     d6, q14, #0
+    vqshrun.s16     d7, q15, #0
+
+    vst1.u8         {q0}, [r1]!
+    vst1.u8         {q1}, [r1]!
+    vst1.u8         {q2}, [r1]!
+    vst1.u8         {q3}, [r1]!
+
+    subs            r12, r12, #1
+    bne             case_tm_pred_loop
+
+    pop             {r4-r8,pc}
+
+    ENDP
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; r0    unsigned char *y_buffer
+; r1    unsigned char *ypred_ptr
+; r2    int y_stride
+; r3    int mode
+; stack int Up
+; stack int Left
+
+|vp8_build_intra_predictors_mby_s_neon_func| PROC
+    push            {r4-r8, lr}
+
+    mov             r1, r0      ;   unsigned char *ypred_ptr = x->dst.y_buffer; //x->Predictor;
+
+    cmp             r3, #0
+    beq             case_dc_pred_s
+    cmp             r3, #1
+    beq             case_v_pred_s
+    cmp             r3, #2
+    beq             case_h_pred_s
+    cmp             r3, #3
+    beq             case_tm_pred_s
+
+case_dc_pred_s
+    ldr             r4, [sp, #24]       ; Up
+    ldr             r5, [sp, #28]       ; Left
+
+    ; Default the DC average to 128
+    mov             r12, #128
+    vdup.u8         q0, r12
+
+    ; Zero out running sum
+    mov             r12, #0
+
+    ; compute shift and jump
+    adds            r7, r4, r5
+    beq             skip_dc_pred_up_left_s
+
+    ; Load above row, if it exists
+    cmp             r4, #0
+    beq             skip_dc_pred_up_s
+
+    sub             r6, r0, r2
+    vld1.8          {q1}, [r6]
+    vpaddl.u8       q2, q1
+    vpaddl.u16      q3, q2
+    vpaddl.u32      q4, q3
+
+    vmov.32         r4, d8[0]
+    vmov.32         r6, d9[0]
+
+    add             r12, r4, r6
+
+    ; Move back to interger registers
+
+skip_dc_pred_up_s
+
+    cmp             r5, #0
+    beq             skip_dc_pred_left_s
+
+    sub             r0, r0, #1
+
+    ; Load left row, if it exists
+    ldrb            r3, [r0], r2
+    ldrb            r4, [r0], r2
+    ldrb            r5, [r0], r2
+    ldrb            r6, [r0], r2
+
+    add             r12, r12, r3
+    add             r12, r12, r4
+    add             r12, r12, r5
+    add             r12, r12, r6
+
+    ldrb            r3, [r0], r2
+    ldrb            r4, [r0], r2
+    ldrb            r5, [r0], r2
+    ldrb            r6, [r0], r2
+
+    add             r12, r12, r3
+    add             r12, r12, r4
+    add             r12, r12, r5
+    add             r12, r12, r6
+
+    ldrb            r3, [r0], r2
+    ldrb            r4, [r0], r2
+    ldrb            r5, [r0], r2
+    ldrb            r6, [r0], r2
+
+    add             r12, r12, r3
+    add             r12, r12, r4
+    add             r12, r12, r5
+    add             r12, r12, r6
+
+    ldrb            r3, [r0], r2
+    ldrb            r4, [r0], r2
+    ldrb            r5, [r0], r2
+    ldrb            r6, [r0]
+
+    add             r12, r12, r3
+    add             r12, r12, r4
+    add             r12, r12, r5
+    add             r12, r12, r6
+
+skip_dc_pred_left_s
+    add             r7, r7, #3          ; Shift
+    sub             r4, r7, #1
+    mov             r5, #1
+    add             r12, r12, r5, lsl r4
+    mov             r5, r12, lsr r7     ; expected_dc
+
+    vdup.u8         q0, r5
+
+skip_dc_pred_up_left_s
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+
+    pop             {r4-r8,pc}
+case_v_pred_s
+    ; Copy down above row
+    sub             r6, r0, r2
+    vld1.8          {q0}, [r6]
+
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q0}, [r1], r2
+    pop             {r4-r8,pc}
+
+case_h_pred_s
+    ; Load 4x yleft_col
+    sub             r0, r0, #1
+
+    ldrb            r3, [r0], r2
+    ldrb            r4, [r0], r2
+    ldrb            r5, [r0], r2
+    ldrb            r6, [r0], r2
+    vdup.u8         q0, r3
+    vdup.u8         q1, r4
+    vdup.u8         q2, r5
+    vdup.u8         q3, r6
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q1}, [r1], r2
+    vst1.u8         {q2}, [r1], r2
+    vst1.u8         {q3}, [r1], r2
+
+    ldrb            r3, [r0], r2
+    ldrb            r4, [r0], r2
+    ldrb            r5, [r0], r2
+    ldrb            r6, [r0], r2
+    vdup.u8         q0, r3
+    vdup.u8         q1, r4
+    vdup.u8         q2, r5
+    vdup.u8         q3, r6
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q1}, [r1], r2
+    vst1.u8         {q2}, [r1], r2
+    vst1.u8         {q3}, [r1], r2
+
+
+    ldrb            r3, [r0], r2
+    ldrb            r4, [r0], r2
+    ldrb            r5, [r0], r2
+    ldrb            r6, [r0], r2
+    vdup.u8         q0, r3
+    vdup.u8         q1, r4
+    vdup.u8         q2, r5
+    vdup.u8         q3, r6
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q1}, [r1], r2
+    vst1.u8         {q2}, [r1], r2
+    vst1.u8         {q3}, [r1], r2
+
+    ldrb            r3, [r0], r2
+    ldrb            r4, [r0], r2
+    ldrb            r5, [r0], r2
+    ldrb            r6, [r0], r2
+    vdup.u8         q0, r3
+    vdup.u8         q1, r4
+    vdup.u8         q2, r5
+    vdup.u8         q3, r6
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q1}, [r1], r2
+    vst1.u8         {q2}, [r1], r2
+    vst1.u8         {q3}, [r1], r2
+
+    pop             {r4-r8,pc}
+
+case_tm_pred_s
+    ; Load yabove_row
+    sub             r3, r0, r2
+    vld1.8          {q8}, [r3]
+
+    ; Load ytop_left
+    sub             r3, r3, #1
+    ldrb            r7, [r3]
+
+    vdup.u16        q7, r7
+
+    ; Compute yabove_row - ytop_left
+    mov             r3, #1
+    vdup.u8         q0, r3
+
+    vmull.u8        q4, d16, d0
+    vmull.u8        q5, d17, d0
+
+    vsub.s16        q4, q4, q7
+    vsub.s16        q5, q5, q7
+
+    ; Load 4x yleft_col
+    sub             r0, r0, #1
+    mov             r12, #4
+
+case_tm_pred_loop_s
+    ldrb            r3, [r0], r2
+    ldrb            r4, [r0], r2
+    ldrb            r5, [r0], r2
+    ldrb            r6, [r0], r2
+    vdup.u16        q0, r3
+    vdup.u16        q1, r4
+    vdup.u16        q2, r5
+    vdup.u16        q3, r6
+
+    vqadd.s16       q8, q0, q4
+    vqadd.s16       q9, q0, q5
+
+    vqadd.s16       q10, q1, q4
+    vqadd.s16       q11, q1, q5
+
+    vqadd.s16       q12, q2, q4
+    vqadd.s16       q13, q2, q5
+
+    vqadd.s16       q14, q3, q4
+    vqadd.s16       q15, q3, q5
+
+    vqshrun.s16     d0, q8, #0
+    vqshrun.s16     d1, q9, #0
+
+    vqshrun.s16     d2, q10, #0
+    vqshrun.s16     d3, q11, #0
+
+    vqshrun.s16     d4, q12, #0
+    vqshrun.s16     d5, q13, #0
+
+    vqshrun.s16     d6, q14, #0
+    vqshrun.s16     d7, q15, #0
+
+    vst1.u8         {q0}, [r1], r2
+    vst1.u8         {q1}, [r1], r2
+    vst1.u8         {q2}, [r1], r2
+    vst1.u8         {q3}, [r1], r2
+
+    subs            r12, r12, #1
+    bne             case_tm_pred_loop_s
+
+    pop             {r4-r8,pc}
+
+    ENDP
+
+
+    END
--- a/media/libvpx/vp8/common/arm/neon/copymem16x16_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/copymem16x16_neon.asm
@ -0,0 +1,59 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_copy_mem16x16_neon|
+    ; ARM
+    ; REQUIRE8
+    ; PRESERVE8
+
+    AREA    Block, CODE, READONLY ; name this block of code
+;void copy_mem16x16_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
+;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+|vp8_copy_mem16x16_neon| PROC
+
+    vld1.u8     {q0}, [r0], r1
+    vld1.u8     {q1}, [r0], r1
+    vld1.u8     {q2}, [r0], r1
+    vst1.u8     {q0}, [r2], r3
+    vld1.u8     {q3}, [r0], r1
+    vst1.u8     {q1}, [r2], r3
+    vld1.u8     {q4}, [r0], r1
+    vst1.u8     {q2}, [r2], r3
+    vld1.u8     {q5}, [r0], r1
+    vst1.u8     {q3}, [r2], r3
+    vld1.u8     {q6}, [r0], r1
+    vst1.u8     {q4}, [r2], r3
+    vld1.u8     {q7}, [r0], r1
+    vst1.u8     {q5}, [r2], r3
+    vld1.u8     {q8}, [r0], r1
+    vst1.u8     {q6}, [r2], r3
+    vld1.u8     {q9}, [r0], r1
+    vst1.u8     {q7}, [r2], r3
+    vld1.u8     {q10}, [r0], r1
+    vst1.u8     {q8}, [r2], r3
+    vld1.u8     {q11}, [r0], r1
+    vst1.u8     {q9}, [r2], r3
+    vld1.u8     {q12}, [r0], r1
+    vst1.u8     {q10}, [r2], r3
+    vld1.u8     {q13}, [r0], r1
+    vst1.u8     {q11}, [r2], r3
+    vld1.u8     {q14}, [r0], r1
+    vst1.u8     {q12}, [r2], r3
+    vld1.u8     {q15}, [r0], r1
+    vst1.u8     {q13}, [r2], r3
+    vst1.u8     {q14}, [r2], r3
+    vst1.u8     {q15}, [r2], r3
+
+    mov     pc, lr
+
+    ENDP  ; |vp8_copy_mem16x16_neon|
+
+    END
--- a/media/libvpx/vp8/common/arm/neon/copymem8x4_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/copymem8x4_neon.asm
@ -0,0 +1,34 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_copy_mem8x4_neon|
+    ; ARM
+    ; REQUIRE8
+    ; PRESERVE8
+
+    AREA    Block, CODE, READONLY ; name this block of code
+;void copy_mem8x4_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
+;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+|vp8_copy_mem8x4_neon| PROC
+    vld1.u8     {d0}, [r0], r1
+    vld1.u8     {d1}, [r0], r1
+    vst1.u8     {d0}, [r2], r3
+    vld1.u8     {d2}, [r0], r1
+    vst1.u8     {d1}, [r2], r3
+    vld1.u8     {d3}, [r0], r1
+    vst1.u8     {d2}, [r2], r3
+    vst1.u8     {d3}, [r2], r3
+
+    mov     pc, lr
+
+    ENDP  ; |vp8_copy_mem8x4_neon|
+
+    END
--- a/media/libvpx/vp8/common/arm/neon/copymem8x8_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/copymem8x8_neon.asm
@ -0,0 +1,43 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_copy_mem8x8_neon|
+    ; ARM
+    ; REQUIRE8
+    ; PRESERVE8
+
+    AREA    Block, CODE, READONLY ; name this block of code
+;void copy_mem8x8_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
+;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+|vp8_copy_mem8x8_neon| PROC
+
+    vld1.u8     {d0}, [r0], r1
+    vld1.u8     {d1}, [r0], r1
+    vst1.u8     {d0}, [r2], r3
+    vld1.u8     {d2}, [r0], r1
+    vst1.u8     {d1}, [r2], r3
+    vld1.u8     {d3}, [r0], r1
+    vst1.u8     {d2}, [r2], r3
+    vld1.u8     {d4}, [r0], r1
+    vst1.u8     {d3}, [r2], r3
+    vld1.u8     {d5}, [r0], r1
+    vst1.u8     {d4}, [r2], r3
+    vld1.u8     {d6}, [r0], r1
+    vst1.u8     {d5}, [r2], r3
+    vld1.u8     {d7}, [r0], r1
+    vst1.u8     {d6}, [r2], r3
+    vst1.u8     {d7}, [r2], r3
+
+    mov     pc, lr
+
+    ENDP  ; |vp8_copy_mem8x8_neon|
+
+    END
--- a/media/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.asm
@ -0,0 +1,49 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license and patent
+;  grant that can be found in the LICENSE file in the root of the source
+;  tree. All contributing project authors may be found in the AUTHORS
+;  file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_dc_only_idct_add_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+;void vp8_dc_only_idct_add_neon(short input_dc, unsigned char *pred_ptr,
+;                               unsigned char *dst_ptr, int pitch, int stride)
+; r0  input_dc
+; r1  pred_ptr
+; r2  dst_ptr
+; r3  pitch
+; sp  stride
+|vp8_dc_only_idct_add_neon| PROC
+    add             r0, r0, #4
+    asr             r0, r0, #3
+    ldr             r12, [sp]
+    vdup.16         q0, r0
+
+    vld1.32         {d2[0]}, [r1], r3
+    vld1.32         {d2[1]}, [r1], r3
+    vld1.32         {d4[0]}, [r1], r3
+    vld1.32         {d4[1]}, [r1]
+
+    vaddw.u8        q1, q0, d2
+    vaddw.u8        q2, q0, d4
+
+    vqmovun.s16     d2, q1
+    vqmovun.s16     d4, q2
+
+    vst1.32         {d2[0]}, [r2], r12
+    vst1.32         {d2[1]}, [r2], r12
+    vst1.32         {d4[0]}, [r2], r12
+    vst1.32         {d4[1]}, [r2]
+
+    bx             lr
+
+    ENDP
+    END
--- a/media/libvpx/vp8/common/arm/neon/iwalsh_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/iwalsh_neon.asm
@ -0,0 +1,96 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+    EXPORT  |vp8_short_inv_walsh4x4_neon|
+    EXPORT  |vp8_short_inv_walsh4x4_1_neon|
+
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA    |.text|, CODE, READONLY  ; name this block of code
+
+;short vp8_short_inv_walsh4x4_neon(short *input, short *output)
+|vp8_short_inv_walsh4x4_neon| PROC
+
+    ; read in all four lines of values: d0->d3
+    vldm.64 r0, {q0, q1}
+
+    ; first for loop
+
+    vadd.s16 d4, d0, d3 ;a = [0] + [12]
+    vadd.s16 d5, d1, d2 ;b = [4] + [8]
+    vsub.s16 d6, d1, d2 ;c = [4] - [8]
+    vsub.s16 d7, d0, d3 ;d = [0] - [12]
+
+    vadd.s16 d0, d4, d5 ;a + b
+    vadd.s16 d1, d6, d7 ;c + d
+    vsub.s16 d2, d4, d5 ;a - b
+    vsub.s16 d3, d7, d6 ;d - c
+
+    vtrn.32 d0, d2 ;d0:  0  1  8  9
+                   ;d2:  2  3 10 11
+    vtrn.32 d1, d3 ;d1:  4  5 12 13
+                   ;d3:  6  7 14 15
+
+    vtrn.16 d0, d1 ;d0:  0  4  8 12
+                   ;d1:  1  5  9 13
+    vtrn.16 d2, d3 ;d2:  2  6 10 14
+                   ;d3:  3  7 11 15
+
+    ; second for loop
+
+    vadd.s16 d4, d0, d3 ;a = [0] + [3]
+    vadd.s16 d5, d1, d2 ;b = [1] + [2]
+    vsub.s16 d6, d1, d2 ;c = [1] - [2]
+    vsub.s16 d7, d0, d3 ;d = [0] - [3]
+
+    vadd.s16 d0, d4, d5 ;e = a + b
+    vadd.s16 d1, d6, d7 ;f = c + d
+    vsub.s16 d2, d4, d5 ;g = a - b
+    vsub.s16 d3, d7, d6 ;h = d - c
+
+    vmov.i16 q2, #3
+    vadd.i16 q0, q0, q2 ;e/f += 3
+    vadd.i16 q1, q1, q2 ;g/h += 3
+
+    vshr.s16 q0, q0, #3 ;e/f >> 3
+    vshr.s16 q1, q1, #3 ;g/h >> 3
+
+    vtrn.32 d0, d2
+    vtrn.32 d1, d3
+    vtrn.16 d0, d1
+    vtrn.16 d2, d3
+
+    vstmia.16 r1!, {q0}
+    vstmia.16 r1!, {q1}
+
+    bx lr
+    ENDP    ; |vp8_short_inv_walsh4x4_neon|
+
+
+;short vp8_short_inv_walsh4x4_1_neon(short *input, short *output)
+|vp8_short_inv_walsh4x4_1_neon| PROC
+    ; load a full line into a neon register
+    vld1.16  {q0}, [r0]
+    ; extract first element and replicate
+    vdup.16 q1, d0[0]
+    ; add 3 to all values
+    vmov.i16 q2, #3
+    vadd.i16 q3, q1, q2
+    ; right shift
+    vshr.s16 q3, q3, #3
+    ; write it back
+    vstmia.16 r1!, {q3}
+    vstmia.16 r1!, {q3}
+
+    bx lr
+    ENDP    ; |vp8_short_inv_walsh4x4_1_neon|
+
+    END
--- a/media/libvpx/vp8/common/arm/neon/loopfilter_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/loopfilter_neon.asm
@ -0,0 +1,409 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_loop_filter_horizontal_edge_y_neon|
+    EXPORT  |vp8_loop_filter_horizontal_edge_uv_neon|
+    EXPORT  |vp8_loop_filter_vertical_edge_y_neon|
+    EXPORT  |vp8_loop_filter_vertical_edge_uv_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; flimit, limit, and thresh should be positive numbers.
+; All 16 elements in these variables are equal.
+
+; void vp8_loop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
+;                                             const signed char *flimit,
+;                                             const signed char *limit,
+;                                             const signed char *thresh,
+;                                             int count)
+; r0    unsigned char *src
+; r1    int pitch
+; r2    const signed char *flimit
+; r3    const signed char *limit
+; sp    const signed char *thresh,
+; sp+4  int count (unused)
+|vp8_loop_filter_horizontal_edge_y_neon| PROC
+    stmdb       sp!, {lr}
+    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    sub         r2, r0, r1, lsl #2          ; move src pointer down by 4 lines
+    ldr         r12, [sp, #4]               ; load thresh pointer
+
+    vld1.u8     {q3}, [r2], r1              ; p3
+    vld1.u8     {q4}, [r2], r1              ; p2
+    vld1.u8     {q5}, [r2], r1              ; p1
+    vld1.u8     {q6}, [r2], r1              ; p0
+    vld1.u8     {q7}, [r2], r1              ; q0
+    vld1.u8     {q8}, [r2], r1              ; q1
+    vld1.u8     {q9}, [r2], r1              ; q2
+    vld1.u8     {q10}, [r2]                 ; q3
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+    sub         r0, r0, r1, lsl #1
+
+    bl          vp8_loop_filter_neon
+
+    vst1.u8     {q5}, [r0], r1              ; store op1
+    vst1.u8     {q6}, [r0], r1              ; store op0
+    vst1.u8     {q7}, [r0], r1              ; store oq0
+    vst1.u8     {q8}, [r0], r1              ; store oq1
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_loop_filter_horizontal_edge_y_neon|
+
+; void vp8_loop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch
+;                                              const signed char *flimit,
+;                                              const signed char *limit,
+;                                              const signed char *thresh,
+;                                              unsigned char *v)
+; r0    unsigned char *u,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  unsigned char *v
+|vp8_loop_filter_horizontal_edge_uv_neon| PROC
+    stmdb       sp!, {lr}
+    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    ldr         r2, [sp, #8]                ; load v ptr
+
+    sub         r3, r0, r1, lsl #2          ; move u pointer down by 4 lines
+    vld1.u8     {d6}, [r3], r1              ; p3
+    vld1.u8     {d8}, [r3], r1              ; p2
+    vld1.u8     {d10}, [r3], r1             ; p1
+    vld1.u8     {d12}, [r3], r1             ; p0
+    vld1.u8     {d14}, [r3], r1             ; q0
+    vld1.u8     {d16}, [r3], r1             ; q1
+    vld1.u8     {d18}, [r3], r1             ; q2
+    vld1.u8     {d20}, [r3]                 ; q3
+
+    ldr         r3, [sp, #4]                ; load thresh pointer
+
+    sub         r12, r2, r1, lsl #2         ; move v pointer down by 4 lines
+    vld1.u8     {d7}, [r12], r1             ; p3
+    vld1.u8     {d9}, [r12], r1             ; p2
+    vld1.u8     {d11}, [r12], r1            ; p1
+    vld1.u8     {d13}, [r12], r1            ; p0
+    vld1.u8     {d15}, [r12], r1            ; q0
+    vld1.u8     {d17}, [r12], r1            ; q1
+    vld1.u8     {d19}, [r12], r1            ; q2
+    vld1.u8     {d21}, [r12]                ; q3
+
+    vld1.s8     {d4[], d5[]}, [r3]          ; thresh
+
+    bl          vp8_loop_filter_neon
+
+    sub         r0, r0, r1, lsl #1
+    sub         r2, r2, r1, lsl #1
+
+    vst1.u8     {d10}, [r0], r1             ; store u op1
+    vst1.u8     {d11}, [r2], r1             ; store v op1
+    vst1.u8     {d12}, [r0], r1             ; store u op0
+    vst1.u8     {d13}, [r2], r1             ; store v op0
+    vst1.u8     {d14}, [r0], r1             ; store u oq0
+    vst1.u8     {d15}, [r2], r1             ; store v oq0
+    vst1.u8     {d16}, [r0]                 ; store u oq1
+    vst1.u8     {d17}, [r2]                 ; store v oq1
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_loop_filter_horizontal_edge_uv_neon|
+
+; void vp8_loop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
+;                                           const signed char *flimit,
+;                                           const signed char *limit,
+;                                           const signed char *thresh,
+;                                           int count)
+; r0    unsigned char *src,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  int count (unused)
+|vp8_loop_filter_vertical_edge_y_neon| PROC
+    stmdb       sp!, {lr}
+    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    sub         r2, r0, #4                  ; src ptr down by 4 columns
+    sub         r0, r0, #2                  ; dst ptr
+    ldr         r12, [sp, #4]               ; load thresh pointer
+
+    vld1.u8     {d6}, [r2], r1              ; load first 8-line src data
+    vld1.u8     {d8}, [r2], r1
+    vld1.u8     {d10}, [r2], r1
+    vld1.u8     {d12}, [r2], r1
+    vld1.u8     {d14}, [r2], r1
+    vld1.u8     {d16}, [r2], r1
+    vld1.u8     {d18}, [r2], r1
+    vld1.u8     {d20}, [r2], r1
+
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+
+    vld1.u8     {d7}, [r2], r1              ; load second 8-line src data
+    vld1.u8     {d9}, [r2], r1
+    vld1.u8     {d11}, [r2], r1
+    vld1.u8     {d13}, [r2], r1
+    vld1.u8     {d15}, [r2], r1
+    vld1.u8     {d17}, [r2], r1
+    vld1.u8     {d19}, [r2], r1
+    vld1.u8     {d21}, [r2]
+
+    ;transpose to 8x16 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    bl          vp8_loop_filter_neon
+
+    vswp        d12, d11
+    vswp        d16, d13
+    vswp        d14, d12
+    vswp        d16, d15
+
+    ;store op1, op0, oq0, oq1
+    vst4.8      {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
+    vst4.8      {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
+    vst4.8      {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
+    vst4.8      {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
+    vst4.8      {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
+    vst4.8      {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
+    vst4.8      {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
+    vst4.8      {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
+    vst4.8      {d14[0], d15[0], d16[0], d17[0]}, [r0], r1
+    vst4.8      {d14[1], d15[1], d16[1], d17[1]}, [r0], r1
+    vst4.8      {d14[2], d15[2], d16[2], d17[2]}, [r0], r1
+    vst4.8      {d14[3], d15[3], d16[3], d17[3]}, [r0], r1
+    vst4.8      {d14[4], d15[4], d16[4], d17[4]}, [r0], r1
+    vst4.8      {d14[5], d15[5], d16[5], d17[5]}, [r0], r1
+    vst4.8      {d14[6], d15[6], d16[6], d17[6]}, [r0], r1
+    vst4.8      {d14[7], d15[7], d16[7], d17[7]}, [r0]
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_loop_filter_vertical_edge_y_neon|
+
+; void vp8_loop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch
+;                                            const signed char *flimit,
+;                                            const signed char *limit,
+;                                            const signed char *thresh,
+;                                            unsigned char *v)
+; r0    unsigned char *u,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  unsigned char *v
+|vp8_loop_filter_vertical_edge_uv_neon| PROC
+    stmdb       sp!, {lr}
+    sub         r12, r0, #4                  ; move u pointer down by 4 columns
+    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+
+    ldr         r2, [sp, #8]                ; load v ptr
+
+    vld1.u8     {d6}, [r12], r1              ;load u data
+    vld1.u8     {d8}, [r12], r1
+    vld1.u8     {d10}, [r12], r1
+    vld1.u8     {d12}, [r12], r1
+    vld1.u8     {d14}, [r12], r1
+    vld1.u8     {d16}, [r12], r1
+    vld1.u8     {d18}, [r12], r1
+    vld1.u8     {d20}, [r12]
+
+    sub         r3, r2, #4                  ; move v pointer down by 4 columns
+    vld1.u8     {d7}, [r3], r1              ;load v data
+    vld1.u8     {d9}, [r3], r1
+    vld1.u8     {d11}, [r3], r1
+    vld1.u8     {d13}, [r3], r1
+    vld1.u8     {d15}, [r3], r1
+    vld1.u8     {d17}, [r3], r1
+    vld1.u8     {d19}, [r3], r1
+    vld1.u8     {d21}, [r3]
+
+    ldr         r12, [sp, #4]               ; load thresh pointer
+
+    ;transpose to 8x16 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+
+    bl          vp8_loop_filter_neon
+
+    sub         r0, r0, #2
+    sub         r2, r2, #2
+
+    vswp        d12, d11
+    vswp        d16, d13
+    vswp        d14, d12
+    vswp        d16, d15
+
+    ;store op1, op0, oq0, oq1
+    vst4.8      {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
+    vst4.8      {d14[0], d15[0], d16[0], d17[0]}, [r2], r1
+    vst4.8      {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
+    vst4.8      {d14[1], d15[1], d16[1], d17[1]}, [r2], r1
+    vst4.8      {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
+    vst4.8      {d14[2], d15[2], d16[2], d17[2]}, [r2], r1
+    vst4.8      {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
+    vst4.8      {d14[3], d15[3], d16[3], d17[3]}, [r2], r1
+    vst4.8      {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
+    vst4.8      {d14[4], d15[4], d16[4], d17[4]}, [r2], r1
+    vst4.8      {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
+    vst4.8      {d14[5], d15[5], d16[5], d17[5]}, [r2], r1
+    vst4.8      {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
+    vst4.8      {d14[6], d15[6], d16[6], d17[6]}, [r2], r1
+    vst4.8      {d10[7], d11[7], d12[7], d13[7]}, [r0]
+    vst4.8      {d14[7], d15[7], d16[7], d17[7]}, [r2]
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_loop_filter_vertical_edge_uv_neon|
+
+; void vp8_loop_filter_neon();
+; This is a helper function for the loopfilters. The invidual functions do the
+; necessary load, transpose (if necessary) and store.
+
+; r0-r3 PRESERVE
+; q0    flimit
+; q1    limit
+; q2    thresh
+; q3    p3
+; q4    p2
+; q5    p1
+; q6    p0
+; q7    q0
+; q8    q1
+; q9    q2
+; q10   q3
+|vp8_loop_filter_neon| PROC
+    ldr         r12, _lf_coeff_
+
+    ; vp8_filter_mask
+    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
+    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
+    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
+    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
+    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
+    vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
+    vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
+
+    vmax.u8     q11, q11, q12
+    vmax.u8     q12, q13, q14
+    vmax.u8     q3, q3, q4
+    vmax.u8     q15, q11, q12
+
+    ; vp8_hevmask
+    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
+    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
+    vmax.u8     q15, q15, q3
+
+    vadd.u8     q0, q0, q0                  ; flimit * 2
+    vadd.u8     q0, q0, q1                  ; flimit * 2 + limit
+    vcge.u8     q15, q1, q15
+
+    vabd.u8     q2, q5, q8                  ; a = abs(p1 - q1)
+    vqadd.u8    q9, q9, q9                  ; b = abs(p0 - q0) * 2
+    vshr.u8     q2, q2, #1                  ; a = a / 2
+    vqadd.u8    q9, q9, q2                  ; a = b + a
+    vcge.u8     q9, q0, q9                  ; (a > flimit * 2 + limit) * -1
+
+    vld1.u8     {q0}, [r12]!
+
+    ; vp8_filter() function
+    ; convert to signed
+    veor        q7, q7, q0                  ; qs0
+    veor        q6, q6, q0                  ; ps0
+    veor        q5, q5, q0                  ; ps1
+    veor        q8, q8, q0                  ; qs1
+
+    vld1.u8     {q10}, [r12]!
+
+    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
+    vsubl.s8    q11, d15, d13
+
+    vmovl.u8    q4, d20
+
+    vqsub.s8    q1, q5, q8                  ; vp8_filter = clamp(ps1-qs1)
+    vorr        q14, q13, q14               ; vp8_hevmask
+
+    vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
+    vmul.i16    q11, q11, q4
+
+    vand        q1, q1, q14                 ; vp8_filter &= hev
+    vand        q15, q15, q9                ; vp8_filter_mask
+
+    vaddw.s8    q2, q2, d2
+    vaddw.s8    q11, q11, d3
+
+    vld1.u8     {q9}, [r12]!
+
+    ; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
+    vqmovn.s16  d2, q2
+    vqmovn.s16  d3, q11
+    vand        q1, q1, q15                 ; vp8_filter &= mask
+
+    vqadd.s8    q2, q1, q10                 ; Filter2 = clamp(vp8_filter+3)
+    vqadd.s8    q1, q1, q9                  ; Filter1 = clamp(vp8_filter+4)
+    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
+    vshr.s8     q1, q1, #3                  ; Filter1 >>= 3
+
+    vqadd.s8    q11, q6, q2                 ; u = clamp(ps0 + Filter2)
+    vqsub.s8    q10, q7, q1                 ; u = clamp(qs0 - Filter1)
+
+    ; outer tap adjustments: ++vp8_filter >> 1
+    vrshr.s8    q1, q1, #1
+    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
+
+    vqadd.s8    q13, q5, q1                 ; u = clamp(ps1 + vp8_filter)
+    vqsub.s8    q12, q8, q1                 ; u = clamp(qs1 - vp8_filter)
+
+    veor        q5, q13, q0                 ; *op1 = u^0x80
+    veor        q6, q11, q0                 ; *op0 = u^0x80
+    veor        q7, q10, q0                 ; *oq0 = u^0x80
+    veor        q8, q12, q0                 ; *oq1 = u^0x80
+
+    bx          lr
+    ENDP        ; |vp8_loop_filter_horizontal_edge_y_neon|
+
+    AREA    loopfilter_dat, DATA, READONLY
+_lf_coeff_
+    DCD     lf_coeff
+lf_coeff
+    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
+    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
+    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
+    DCD     0x01010101, 0x01010101, 0x01010101, 0x01010101
+
+    END
--- a/media/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
@ -0,0 +1,118 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_loop_filter_simple_horizontal_edge_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
+;are equal. So, in the code, only one load is needed
+;for flimit. Same way applies to limit and thresh.
+; r0    unsigned char *s,
+; r1    int p, //pitch
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; stack(r4) const signed char *thresh,
+; //stack(r5)   int count --unused
+
+|vp8_loop_filter_simple_horizontal_edge_neon| PROC
+    sub         r0, r0, r1, lsl #1          ; move src pointer down by 2 lines
+
+    ldr         r12, _lfhy_coeff_
+    vld1.u8     {q5}, [r0], r1              ; p1
+    vld1.s8     {d2[], d3[]}, [r2]          ; flimit
+    vld1.s8     {d26[], d27[]}, [r3]        ; limit -> q13
+    vld1.u8     {q6}, [r0], r1              ; p0
+    vld1.u8     {q0}, [r12]!                ; 0x80
+    vld1.u8     {q7}, [r0], r1              ; q0
+    vld1.u8     {q10}, [r12]!               ; 0x03
+    vld1.u8     {q8}, [r0]                  ; q1
+
+    ;vp8_filter_mask() function
+    vabd.u8     q15, q6, q7                 ; abs(p0 - q0)
+    vabd.u8     q14, q5, q8                 ; abs(p1 - q1)
+    vqadd.u8    q15, q15, q15               ; abs(p0 - q0) * 2
+    vshr.u8     q14, q14, #1                ; abs(p1 - q1) / 2
+    vqadd.u8    q15, q15, q14               ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
+
+    ;vp8_filter() function
+    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
+    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
+    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
+    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
+
+    vadd.u8     q1, q1, q1                  ; flimit * 2
+    vadd.u8     q1, q1, q13                 ; flimit * 2 + limit
+    vcge.u8     q15, q1, q15                ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
+
+;;;;;;;;;;
+    ;vqsub.s8   q2, q7, q6                  ; ( qs0 - ps0)
+    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
+    vsubl.s8    q3, d15, d13
+
+    vqsub.s8    q4, q5, q8                  ; q4: vp8_filter = vp8_signed_char_clamp(ps1-qs1)
+
+    ;vmul.i8    q2, q2, q10                 ;  3 * ( qs0 - ps0)
+    vadd.s16    q11, q2, q2                 ;  3 * ( qs0 - ps0)
+    vadd.s16    q12, q3, q3
+
+    vld1.u8     {q9}, [r12]!                ; 0x04
+
+    vadd.s16    q2, q2, q11
+    vadd.s16    q3, q3, q12
+
+    vaddw.s8    q2, q2, d8                  ; vp8_filter + 3 * ( qs0 - ps0)
+    vaddw.s8    q3, q3, d9
+
+    ;vqadd.s8   q4, q4, q2                  ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
+    vqmovn.s16  d8, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
+    vqmovn.s16  d9, q3
+;;;;;;;;;;;;;
+
+    vand        q4, q4, q15                 ; vp8_filter &= mask
+
+    vqadd.s8    q2, q4, q10                 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
+    vqadd.s8    q4, q4, q9                  ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
+    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
+    vshr.s8     q4, q4, #3                  ; Filter1 >>= 3
+
+    sub         r0, r0, r1, lsl #1
+
+    ;calculate output
+    vqadd.s8    q11, q6, q2                 ; u = vp8_signed_char_clamp(ps0 + Filter2)
+    vqsub.s8    q10, q7, q4                 ; u = vp8_signed_char_clamp(qs0 - Filter1)
+
+    add         r3, r0, r1
+
+    veor        q6, q11, q0                 ; *op0 = u^0x80
+    veor        q7, q10, q0                 ; *oq0 = u^0x80
+
+    vst1.u8     {q6}, [r0]                  ; store op0
+    vst1.u8     {q7}, [r3]                  ; store oq0
+
+    bx          lr
+    ENDP        ; |vp8_loop_filter_simple_horizontal_edge_neon|
+
+;-----------------
+    AREA    hloopfiltery_dat, DATA, READWRITE           ;read/write by default
+;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
+;One word each is reserved. Label filter_coeff can be used to access the data.
+;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
+_lfhy_coeff_
+    DCD     lfhy_coeff
+lfhy_coeff
+    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
+    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
+    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
+
+    END
--- a/media/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
@ -0,0 +1,159 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_loop_filter_simple_vertical_edge_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+;Note: flimit, limit, and thresh should be positive numbers. All 16 elements in flimit
+;are equal. So, in the code, only one load is needed
+;for flimit. Same way applies to limit and thresh.
+; r0    unsigned char *s,
+; r1    int p, //pitch
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; stack(r4) const signed char *thresh,
+; //stack(r5)   int count --unused
+
+|vp8_loop_filter_simple_vertical_edge_neon| PROC
+    sub         r0, r0, #2                  ; move src pointer down by 2 columns
+
+    vld4.8      {d6[0], d7[0], d8[0], d9[0]}, [r0], r1
+    vld1.s8     {d2[], d3[]}, [r2]          ; flimit
+    vld1.s8     {d26[], d27[]}, [r3]        ; limit -> q13
+    vld4.8      {d6[1], d7[1], d8[1], d9[1]}, [r0], r1
+    ldr         r12, _vlfy_coeff_
+    vld4.8      {d6[2], d7[2], d8[2], d9[2]}, [r0], r1
+    vld4.8      {d6[3], d7[3], d8[3], d9[3]}, [r0], r1
+    vld4.8      {d6[4], d7[4], d8[4], d9[4]}, [r0], r1
+    vld4.8      {d6[5], d7[5], d8[5], d9[5]}, [r0], r1
+    vld4.8      {d6[6], d7[6], d8[6], d9[6]}, [r0], r1
+    vld4.8      {d6[7], d7[7], d8[7], d9[7]}, [r0], r1
+
+    vld4.8      {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
+    vld1.u8     {q0}, [r12]!                ; 0x80
+    vld4.8      {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
+    vld1.u8     {q11}, [r12]!               ; 0x03
+    vld4.8      {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
+    vld1.u8     {q12}, [r12]!               ; 0x04
+    vld4.8      {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
+    vld4.8      {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
+    vld4.8      {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
+    vld4.8      {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
+    vld4.8      {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
+
+    vswp        d7, d10
+    vswp        d12, d9
+    ;vswp       q4, q5                      ; p1:q3, p0:q5, q0:q4, q1:q6
+
+    ;vp8_filter_mask() function
+    ;vp8_hevmask() function
+    sub         r0, r0, r1, lsl #4
+    vabd.u8     q15, q5, q4                 ; abs(p0 - q0)
+    vabd.u8     q14, q3, q6                 ; abs(p1 - q1)
+    vqadd.u8    q15, q15, q15               ; abs(p0 - q0) * 2
+    vshr.u8     q14, q14, #1                ; abs(p1 - q1) / 2
+    vqadd.u8    q15, q15, q14               ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
+
+    veor        q4, q4, q0                  ; qs0: q0 offset to convert to a signed value
+    veor        q5, q5, q0                  ; ps0: p0 offset to convert to a signed value
+    veor        q3, q3, q0                  ; ps1: p1 offset to convert to a signed value
+    veor        q6, q6, q0                  ; qs1: q1 offset to convert to a signed value
+
+    vadd.u8     q1, q1, q1                  ; flimit * 2
+    vadd.u8     q1, q1, q13                 ; flimit * 2 + limit
+    vcge.u8     q15, q1, q15                ; abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
+
+    ;vp8_filter() function
+;;;;;;;;;;
+    ;vqsub.s8   q2, q5, q4                  ; ( qs0 - ps0)
+    vsubl.s8    q2, d8, d10                 ; ( qs0 - ps0)
+    vsubl.s8    q13, d9, d11
+
+    vqsub.s8    q1, q3, q6                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
+
+    ;vmul.i8    q2, q2, q11                 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
+    vadd.s16    q10, q2, q2                 ;  3 * ( qs0 - ps0)
+    vadd.s16    q14, q13, q13
+    vadd.s16    q2, q2, q10
+    vadd.s16    q13, q13, q14
+
+    ;vqadd.s8   q1, q1, q2
+    vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
+    vaddw.s8    q13, q13, d3
+
+    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
+    vqmovn.s16  d3, q13
+
+    add         r0, r0, #1
+    add         r2, r0, r1
+;;;;;;;;;;;
+
+    vand        q1, q1, q15                 ; vp8_filter &= mask
+
+    vqadd.s8    q2, q1, q11                 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
+    vqadd.s8    q1, q1, q12                 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
+    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
+    vshr.s8     q1, q1, #3                  ; Filter1 >>= 3
+
+    ;calculate output
+    vqsub.s8    q10, q4, q1                 ; u = vp8_signed_char_clamp(qs0 - Filter1)
+    vqadd.s8    q11, q5, q2                 ; u = vp8_signed_char_clamp(ps0 + Filter2)
+
+    veor        q7, q10, q0                 ; *oq0 = u^0x80
+    veor        q6, q11, q0                 ; *op0 = u^0x80
+
+    add         r3, r2, r1
+    vswp        d13, d14
+    add         r12, r3, r1
+
+    ;store op1, op0, oq0, oq1
+    vst2.8      {d12[0], d13[0]}, [r0]
+    vst2.8      {d12[1], d13[1]}, [r2]
+    vst2.8      {d12[2], d13[2]}, [r3]
+    vst2.8      {d12[3], d13[3]}, [r12], r1
+    add         r0, r12, r1
+    vst2.8      {d12[4], d13[4]}, [r12]
+    vst2.8      {d12[5], d13[5]}, [r0], r1
+    add         r2, r0, r1
+    vst2.8      {d12[6], d13[6]}, [r0]
+    vst2.8      {d12[7], d13[7]}, [r2], r1
+    add         r3, r2, r1
+    vst2.8      {d14[0], d15[0]}, [r2]
+    vst2.8      {d14[1], d15[1]}, [r3], r1
+    add         r12, r3, r1
+    vst2.8      {d14[2], d15[2]}, [r3]
+    vst2.8      {d14[3], d15[3]}, [r12], r1
+    add         r0, r12, r1
+    vst2.8      {d14[4], d15[4]}, [r12]
+    vst2.8      {d14[5], d15[5]}, [r0], r1
+    add         r2, r0, r1
+    vst2.8      {d14[6], d15[6]}, [r0]
+    vst2.8      {d14[7], d15[7]}, [r2]
+
+    bx          lr
+    ENDP        ; |vp8_loop_filter_simple_vertical_edge_neon|
+
+;-----------------
+    AREA    vloopfiltery_dat, DATA, READWRITE           ;read/write by default
+;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
+;One word each is reserved. Label filter_coeff can be used to access the data.
+;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
+_vlfy_coeff_
+    DCD     vlfy_coeff
+vlfy_coeff
+    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
+    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
+    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
+
+    END
--- a/media/libvpx/vp8/common/arm/neon/mbloopfilter_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/mbloopfilter_neon.asm
@ -0,0 +1,519 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_mbloop_filter_horizontal_edge_y_neon|
+    EXPORT  |vp8_mbloop_filter_horizontal_edge_uv_neon|
+    EXPORT  |vp8_mbloop_filter_vertical_edge_y_neon|
+    EXPORT  |vp8_mbloop_filter_vertical_edge_uv_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; flimit, limit, and thresh should be positive numbers.
+; All 16 elements in these variables are equal.
+
+; void vp8_mbloop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
+;                                               const signed char *flimit,
+;                                               const signed char *limit,
+;                                               const signed char *thresh,
+;                                               int count)
+; r0    unsigned char *src,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  int count (unused)
+|vp8_mbloop_filter_horizontal_edge_y_neon| PROC
+    stmdb       sp!, {lr}
+    sub         r0, r0, r1, lsl #2          ; move src pointer down by 4 lines
+    ldr         r12, [sp, #4]               ; load thresh pointer
+
+    vld1.u8     {q3}, [r0], r1              ; p3
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    vld1.u8     {q4}, [r0], r1              ; p2
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+    vld1.u8     {q5}, [r0], r1              ; p1
+    vld1.u8     {q6}, [r0], r1              ; p0
+    vld1.u8     {q7}, [r0], r1              ; q0
+    vld1.u8     {q8}, [r0], r1              ; q1
+    vld1.u8     {q9}, [r0], r1              ; q2
+    vld1.u8     {q10}, [r0], r1             ; q3
+
+    bl          vp8_mbloop_filter_neon
+
+    sub         r0, r0, r1, lsl #3
+    add         r0, r0, r1
+    add         r2, r0, r1
+    add         r3, r2, r1
+
+    vst1.u8     {q4}, [r0]                  ; store op2
+    vst1.u8     {q5}, [r2]                  ; store op1
+    vst1.u8     {q6}, [r3], r1              ; store op0
+    add         r12, r3, r1
+    vst1.u8     {q7}, [r3]                  ; store oq0
+    vst1.u8     {q8}, [r12], r1             ; store oq1
+    vst1.u8     {q9}, [r12]             ; store oq2
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_mbloop_filter_horizontal_edge_y_neon|
+
+; void vp8_mbloop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch,
+;                                                const signed char *flimit,
+;                                                const signed char *limit,
+;                                                const signed char *thresh,
+;                                                unsigned char *v)
+; r0    unsigned char *u,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  unsigned char *v
+|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC
+    stmdb       sp!, {lr}
+    sub         r0, r0, r1, lsl #2          ; move u pointer down by 4 lines
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    ldr         r3, [sp, #8]                ; load v ptr
+    ldr         r12, [sp, #4]               ; load thresh pointer
+    sub         r3, r3, r1, lsl #2          ; move v pointer down by 4 lines
+
+    vld1.u8     {d6}, [r0], r1              ; p3
+    vld1.u8     {d7}, [r3], r1              ; p3
+    vld1.u8     {d8}, [r0], r1              ; p2
+    vld1.u8     {d9}, [r3], r1              ; p2
+    vld1.u8     {d10}, [r0], r1             ; p1
+    vld1.u8     {d11}, [r3], r1             ; p1
+    vld1.u8     {d12}, [r0], r1             ; p0
+    vld1.u8     {d13}, [r3], r1             ; p0
+    vld1.u8     {d14}, [r0], r1             ; q0
+    vld1.u8     {d15}, [r3], r1             ; q0
+    vld1.u8     {d16}, [r0], r1             ; q1
+    vld1.u8     {d17}, [r3], r1             ; q1
+    vld1.u8     {d18}, [r0], r1             ; q2
+    vld1.u8     {d19}, [r3], r1             ; q2
+    vld1.u8     {d20}, [r0], r1             ; q3
+    vld1.u8     {d21}, [r3], r1             ; q3
+
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+
+    bl          vp8_mbloop_filter_neon
+
+    sub         r0, r0, r1, lsl #3
+    sub         r3, r3, r1, lsl #3
+
+    add         r0, r0, r1
+    add         r3, r3, r1
+
+    vst1.u8     {d8}, [r0], r1              ; store u op2
+    vst1.u8     {d9}, [r3], r1              ; store v op2
+    vst1.u8     {d10}, [r0], r1             ; store u op1
+    vst1.u8     {d11}, [r3], r1             ; store v op1
+    vst1.u8     {d12}, [r0], r1             ; store u op0
+    vst1.u8     {d13}, [r3], r1             ; store v op0
+    vst1.u8     {d14}, [r0], r1             ; store u oq0
+    vst1.u8     {d15}, [r3], r1             ; store v oq0
+    vst1.u8     {d16}, [r0], r1             ; store u oq1
+    vst1.u8     {d17}, [r3], r1             ; store v oq1
+    vst1.u8     {d18}, [r0], r1             ; store u oq2
+    vst1.u8     {d19}, [r3], r1             ; store v oq2
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_mbloop_filter_horizontal_edge_uv_neon|
+
+; void vp8_mbloop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
+;                                             const signed char *flimit,
+;                                             const signed char *limit,
+;                                             const signed char *thresh,
+;                                             int count)
+; r0    unsigned char *src,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  int count (unused)
+|vp8_mbloop_filter_vertical_edge_y_neon| PROC
+    stmdb       sp!, {lr}
+    sub         r0, r0, #4                  ; move src pointer down by 4 columns
+
+    vld1.u8     {d6}, [r0], r1              ; load first 8-line src data
+    ldr         r12, [sp, #4]               ; load thresh pointer
+    vld1.u8     {d8}, [r0], r1
+    sub         sp, sp, #32
+    vld1.u8     {d10}, [r0], r1
+    vld1.u8     {d12}, [r0], r1
+    vld1.u8     {d14}, [r0], r1
+    vld1.u8     {d16}, [r0], r1
+    vld1.u8     {d18}, [r0], r1
+    vld1.u8     {d20}, [r0], r1
+
+    vld1.u8     {d7}, [r0], r1              ; load second 8-line src data
+    vld1.u8     {d9}, [r0], r1
+    vld1.u8     {d11}, [r0], r1
+    vld1.u8     {d13}, [r0], r1
+    vld1.u8     {d15}, [r0], r1
+    vld1.u8     {d17}, [r0], r1
+    vld1.u8     {d19}, [r0], r1
+    vld1.u8     {d21}, [r0], r1
+
+    ;transpose to 8x16 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    mov         r12, sp
+    vst1.u8     {q3}, [r12]!
+    vst1.u8     {q10}, [r12]!
+
+    bl          vp8_mbloop_filter_neon
+
+    sub         r0, r0, r1, lsl #4
+
+    add         r2, r0, r1
+
+    add         r3, r2, r1
+
+    vld1.u8     {q3}, [sp]!
+    vld1.u8     {q10}, [sp]!
+
+    ;transpose to 16x8 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+    add         r12, r3, r1
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    ;store op2, op1, op0, oq0, oq1, oq2
+    vst1.8      {d6}, [r0]
+    vst1.8      {d8}, [r2]
+    vst1.8      {d10}, [r3]
+    vst1.8      {d12}, [r12], r1
+    add         r0, r12, r1
+    vst1.8      {d14}, [r12]
+    vst1.8      {d16}, [r0], r1
+    add         r2, r0, r1
+    vst1.8      {d18}, [r0]
+    vst1.8      {d20}, [r2], r1
+    add         r3, r2, r1
+    vst1.8      {d7}, [r2]
+    vst1.8      {d9}, [r3], r1
+    add         r12, r3, r1
+    vst1.8      {d11}, [r3]
+    vst1.8      {d13}, [r12], r1
+    add         r0, r12, r1
+    vst1.8      {d15}, [r12]
+    vst1.8      {d17}, [r0], r1
+    add         r2, r0, r1
+    vst1.8      {d19}, [r0]
+    vst1.8      {d21}, [r2]
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_mbloop_filter_vertical_edge_y_neon|
+
+; void vp8_mbloop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch,
+;                                              const signed char *flimit,
+;                                              const signed char *limit,
+;                                              const signed char *thresh,
+;                                              unsigned char *v)
+; r0    unsigned char *u,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  unsigned char *v
+|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
+    stmdb       sp!, {lr}
+    sub         r0, r0, #4                  ; move src pointer down by 4 columns
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    ldr         r3, [sp, #8]                ; load v ptr
+    ldr         r12, [sp, #4]               ; load thresh pointer
+
+    sub         r3, r3, #4                  ; move v pointer down by 4 columns
+
+    vld1.u8     {d6}, [r0], r1              ;load u data
+    vld1.u8     {d7}, [r3], r1              ;load v data
+    vld1.u8     {d8}, [r0], r1
+    vld1.u8     {d9}, [r3], r1
+    vld1.u8     {d10}, [r0], r1
+    vld1.u8     {d11}, [r3], r1
+    vld1.u8     {d12}, [r0], r1
+    vld1.u8     {d13}, [r3], r1
+    vld1.u8     {d14}, [r0], r1
+    vld1.u8     {d15}, [r3], r1
+    vld1.u8     {d16}, [r0], r1
+    vld1.u8     {d17}, [r3], r1
+    vld1.u8     {d18}, [r0], r1
+    vld1.u8     {d19}, [r3], r1
+    vld1.u8     {d20}, [r0], r1
+    vld1.u8     {d21}, [r3], r1
+
+    ;transpose to 8x16 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    sub         sp, sp, #32
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+    mov         r12, sp
+    vst1.u8     {q3}, [r12]!
+    vst1.u8     {q10}, [r12]!
+
+    bl          vp8_mbloop_filter_neon
+
+    sub         r0, r0, r1, lsl #3
+    sub         r3, r3, r1, lsl #3
+
+    vld1.u8     {q3}, [sp]!
+    vld1.u8     {q10}, [sp]!
+
+    ;transpose to 16x8 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    ;store op2, op1, op0, oq0, oq1, oq2
+    vst1.8      {d6}, [r0], r1
+    vst1.8      {d7}, [r3], r1
+    vst1.8      {d8}, [r0], r1
+    vst1.8      {d9}, [r3], r1
+    vst1.8      {d10}, [r0], r1
+    vst1.8      {d11}, [r3], r1
+    vst1.8      {d12}, [r0], r1
+    vst1.8      {d13}, [r3], r1
+    vst1.8      {d14}, [r0], r1
+    vst1.8      {d15}, [r3], r1
+    vst1.8      {d16}, [r0], r1
+    vst1.8      {d17}, [r3], r1
+    vst1.8      {d18}, [r0], r1
+    vst1.8      {d19}, [r3], r1
+    vst1.8      {d20}, [r0], r1
+    vst1.8      {d21}, [r3], r1
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_mbloop_filter_vertical_edge_uv_neon|
+
+; void vp8_mbloop_filter_neon()
+; This is a helper function for the macroblock loopfilters. The individual
+; functions do the necessary load, transpose (if necessary), preserve (if
+; necessary) and store.
+
+; TODO:
+; The vertical filter writes p3/q3 back out because two 4 element writes are
+; much simpler than ordering and writing two 3 element sets (or three 2 elements
+; sets, or whichever other combinations are possible).
+; If we can preserve q3 and q10, the vertical filter will be able to avoid
+; storing those values on the stack and reading them back after the filter.
+
+; r0,r1 PRESERVE
+; r2    flimit
+; r3    PRESERVE
+; q1    limit
+; q2    thresh
+; q3    p3
+; q4    p2
+; q5    p1
+; q6    p0
+; q7    q0
+; q8    q1
+; q9    q2
+; q10   q3
+
+|vp8_mbloop_filter_neon| PROC
+    ldr         r12, _mblf_coeff_
+
+    ; vp8_filter_mask
+    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
+    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
+    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
+    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
+    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
+    vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
+
+    vmax.u8     q11, q11, q12
+    vmax.u8     q12, q13, q14
+    vmax.u8     q3, q3, q0
+    vmax.u8     q15, q11, q12
+
+    vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
+
+    ; vp8_hevmask
+    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh) * -1
+    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh) * -1
+    vmax.u8     q15, q15, q3
+
+    vld1.s8     {d4[], d5[]}, [r2]          ; flimit
+
+    vld1.u8     {q0}, [r12]!
+
+    vadd.u8     q2, q2, q2                  ; flimit * 2
+    vadd.u8     q2, q2, q1                  ; flimit * 2 +  limit
+    vcge.u8     q15, q1, q15
+
+    vabd.u8     q1, q5, q8                  ; a = abs(p1 - q1)
+    vqadd.u8    q12, q12, q12               ; b = abs(p0 - q0) * 2
+    vshr.u8     q1, q1, #1                  ; a = a / 2
+    vqadd.u8    q12, q12, q1                ; a = b + a
+    vcge.u8     q12, q2, q12                ; (a > flimit * 2 + limit) * -1
+
+    ; vp8_filter
+    ; convert to signed
+    veor        q7, q7, q0                  ; qs0
+    veor        q6, q6, q0                  ; ps0
+    veor        q5, q5, q0                  ; ps1
+    veor        q8, q8, q0                  ; qs1
+    veor        q4, q4, q0                  ; ps2
+    veor        q9, q9, q0                  ; qs2
+
+    vorr        q14, q13, q14               ; vp8_hevmask
+
+    vsubl.s8    q2, d14, d12                ; qs0 - ps0
+    vsubl.s8    q13, d15, d13
+
+    vqsub.s8    q1, q5, q8                  ; vp8_filter = clamp(ps1-qs1)
+
+    vadd.s16    q10, q2, q2                 ; 3 * (qs0 - ps0)
+    vadd.s16    q11, q13, q13
+    vand        q15, q15, q12               ; vp8_filter_mask
+
+    vadd.s16    q2, q2, q10
+    vadd.s16    q13, q13, q11
+
+    vld1.u8     {q12}, [r12]!               ; #3
+
+    vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
+    vaddw.s8    q13, q13, d3
+
+    vld1.u8     {q11}, [r12]!               ; #4
+
+    ; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
+    vqmovn.s16  d2, q2
+    vqmovn.s16  d3, q13
+
+    vand        q1, q1, q15                 ; vp8_filter &= mask
+
+    vld1.u8     {q15}, [r12]!               ; #63
+    ;
+    vand        q13, q1, q14                ; Filter2 &= hev
+
+    vld1.u8     {d7}, [r12]!                ; #9
+
+    vqadd.s8    q2, q13, q11                ; Filter1 = clamp(Filter2+4)
+    vqadd.s8    q13, q13, q12               ; Filter2 = clamp(Filter2+3)
+
+    vld1.u8     {d6}, [r12]!                ; #18
+
+    vshr.s8     q2, q2, #3                  ; Filter1 >>= 3
+    vshr.s8     q13, q13, #3                ; Filter2 >>= 3
+
+    vmov        q10, q15
+    vmov        q12, q15
+
+    vqsub.s8    q7, q7, q2                  ; qs0 = clamp(qs0 - Filter1)
+
+    vld1.u8     {d5}, [r12]!                ; #27
+
+    vqadd.s8    q6, q6, q13                 ; ps0 = clamp(ps0 + Filter2)
+
+    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
+
+    ; roughly 1/7th difference across boundary
+    ; roughly 2/7th difference across boundary
+    ; roughly 3/7th difference across boundary
+    vmov        q11, q15
+    vmov        q13, q15
+    vmov        q14, q15
+
+    vmlal.s8    q10, d2, d7                 ; Filter2 * 9
+    vmlal.s8    q11, d3, d7
+    vmlal.s8    q12, d2, d6                 ; Filter2 * 18
+    vmlal.s8    q13, d3, d6
+    vmlal.s8    q14, d2, d5                 ; Filter2 * 27
+    vmlal.s8    q15, d3, d5
+    vqshrn.s16  d20, q10, #7                ; u = clamp((63 + Filter2 * 9)>>7)
+    vqshrn.s16  d21, q11, #7
+    vqshrn.s16  d24, q12, #7                ; u = clamp((63 + Filter2 * 18)>>7)
+    vqshrn.s16  d25, q13, #7
+    vqshrn.s16  d28, q14, #7                ; u = clamp((63 + Filter2 * 27)>>7)
+    vqshrn.s16  d29, q15, #7
+
+    vqsub.s8    q11, q9, q10                ; s = clamp(qs2 - u)
+    vqadd.s8    q10, q4, q10                ; s = clamp(ps2 + u)
+    vqsub.s8    q13, q8, q12                ; s = clamp(qs1 - u)
+    vqadd.s8    q12, q5, q12                ; s = clamp(ps1 + u)
+    vqsub.s8    q15, q7, q14                ; s = clamp(qs0 - u)
+    vqadd.s8    q14, q6, q14                ; s = clamp(ps0 + u)
+    veor        q9, q11, q0                 ; *oq2 = s^0x80
+    veor        q4, q10, q0                 ; *op2 = s^0x80
+    veor        q8, q13, q0                 ; *oq1 = s^0x80
+    veor        q5, q12, q0                 ; *op2 = s^0x80
+    veor        q7, q15, q0                 ; *oq0 = s^0x80
+    veor        q6, q14, q0                 ; *op0 = s^0x80
+
+    bx          lr
+    ENDP        ; |vp8_mbloop_filter_neon|
+
+    AREA    mbloopfilter_dat, DATA, READONLY
+_mblf_coeff_
+    DCD     mblf_coeff
+mblf_coeff
+    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
+    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
+    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
+    DCD     0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
+    DCD     0x09090909, 0x09090909, 0x12121212, 0x12121212
+    DCD     0x1b1b1b1b, 0x1b1b1b1b
+
+    END
--- a/media/libvpx/vp8/common/arm/neon/recon16x16mb_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/recon16x16mb_neon.asm
@ -0,0 +1,131 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_recon16x16mb_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; r0    unsigned char  *pred_ptr,
+; r1    short *diff_ptr,
+; r2    unsigned char *dst_ptr,
+; r3    int ystride,
+; stack unsigned char *udst_ptr,
+; stack unsigned char *vdst_ptr
+
+|vp8_recon16x16mb_neon| PROC
+    mov             r12, #4             ;loop counter for Y loop
+
+recon16x16mb_loop_y
+    vld1.u8         {q12, q13}, [r0]!   ;load data from pred_ptr
+    vld1.16         {q8, q9}, [r1]!     ;load data from diff_ptr
+    vld1.u8         {q14, q15}, [r0]!
+    vld1.16         {q10, q11}, [r1]!
+
+    vmovl.u8        q0, d24             ;modify Pred data from 8 bits to 16 bits
+    vmovl.u8        q1, d25
+    vmovl.u8        q2, d26
+    vmovl.u8        q3, d27
+    vmovl.u8        q4, d28
+    vmovl.u8        q5, d29
+    vmovl.u8        q6, d30
+    vld1.16         {q12, q13}, [r1]!
+    vmovl.u8        q7, d31
+    vld1.16         {q14, q15}, [r1]!
+
+    pld             [r0]
+    pld             [r1]
+    pld             [r1, #64]
+
+    vadd.s16        q0, q0, q8          ;add Diff data and Pred data together
+    vadd.s16        q1, q1, q9
+    vadd.s16        q2, q2, q10
+    vadd.s16        q3, q3, q11
+    vadd.s16        q4, q4, q12
+    vadd.s16        q5, q5, q13
+    vadd.s16        q6, q6, q14
+    vadd.s16        q7, q7, q15
+
+    vqmovun.s16     d0, q0              ;CLAMP() saturation
+    vqmovun.s16     d1, q1
+    vqmovun.s16     d2, q2
+    vqmovun.s16     d3, q3
+    vqmovun.s16     d4, q4
+    vqmovun.s16     d5, q5
+    vst1.u8         {q0}, [r2], r3      ;store result
+    vqmovun.s16     d6, q6
+    vst1.u8         {q1}, [r2], r3
+    vqmovun.s16     d7, q7
+    vst1.u8         {q2}, [r2], r3
+    subs            r12, r12, #1
+
+    moveq           r12, #2             ;loop counter for UV loop
+
+    vst1.u8         {q3}, [r2], r3
+    bne             recon16x16mb_loop_y
+
+    mov             r3, r3, lsr #1      ;uv_stride = ystride>>1
+    ldr             r2, [sp]            ;load upred_ptr
+
+recon16x16mb_loop_uv
+    vld1.u8         {q12, q13}, [r0]!   ;load data from pred_ptr
+    vld1.16         {q8, q9}, [r1]!     ;load data from diff_ptr
+    vld1.u8         {q14, q15}, [r0]!
+    vld1.16         {q10, q11}, [r1]!
+
+    vmovl.u8        q0, d24             ;modify Pred data from 8 bits to 16 bits
+    vmovl.u8        q1, d25
+    vmovl.u8        q2, d26
+    vmovl.u8        q3, d27
+    vmovl.u8        q4, d28
+    vmovl.u8        q5, d29
+    vmovl.u8        q6, d30
+    vld1.16         {q12, q13}, [r1]!
+    vmovl.u8        q7, d31
+    vld1.16         {q14, q15}, [r1]!
+
+    vadd.s16        q0, q0, q8          ;add Diff data and Pred data together
+    vadd.s16        q1, q1, q9
+    vadd.s16        q2, q2, q10
+    vadd.s16        q3, q3, q11
+    vadd.s16        q4, q4, q12
+    vadd.s16        q5, q5, q13
+    vadd.s16        q6, q6, q14
+
+    vqmovun.s16     d0, q0              ;CLAMP() saturation
+    vadd.s16        q7, q7, q15
+    vqmovun.s16     d1, q1
+    vqmovun.s16     d2, q2
+    vqmovun.s16     d3, q3
+    vst1.u8         {d0}, [r2], r3      ;store result
+    vqmovun.s16     d4, q4
+    vst1.u8         {d1}, [r2], r3
+    vqmovun.s16     d5, q5
+    vst1.u8         {d2}, [r2], r3
+    vqmovun.s16     d6, q6
+    vst1.u8         {d3}, [r2], r3
+    vqmovun.s16     d7, q7
+    vst1.u8         {d4}, [r2], r3
+    subs            r12, r12, #1
+
+    vst1.u8         {d5}, [r2], r3
+    vst1.u8         {d6}, [r2], r3
+    vst1.u8         {d7}, [r2], r3
+
+    ldrne           r2, [sp, #4]        ;load vpred_ptr
+    bne             recon16x16mb_loop_uv
+
+    bx             lr
+
+    ENDP
+    END
--- a/media/libvpx/vp8/common/arm/neon/recon2b_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/recon2b_neon.asm
@ -0,0 +1,54 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_recon2b_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; r0    unsigned char  *pred_ptr,
+; r1    short *diff_ptr,
+; r2    unsigned char *dst_ptr,
+; r3    int stride
+
+|vp8_recon2b_neon| PROC
+    vld1.u8         {q8, q9}, [r0]      ;load data from pred_ptr
+    vld1.16         {q4, q5}, [r1]!     ;load data from diff_ptr
+
+    vmovl.u8        q0, d16             ;modify Pred data from 8 bits to 16 bits
+    vld1.16         {q6, q7}, [r1]!
+    vmovl.u8        q1, d17
+    vmovl.u8        q2, d18
+    vmovl.u8        q3, d19
+
+    vadd.s16        q0, q0, q4          ;add Diff data and Pred data together
+    vadd.s16        q1, q1, q5
+    vadd.s16        q2, q2, q6
+    vadd.s16        q3, q3, q7
+
+    vqmovun.s16     d0, q0              ;CLAMP() saturation
+    vqmovun.s16     d1, q1
+    vqmovun.s16     d2, q2
+    vqmovun.s16     d3, q3
+    add             r0, r2, r3
+
+    vst1.u8         {d0}, [r2]          ;store result
+    vst1.u8         {d1}, [r0], r3
+    add             r2, r0, r3
+    vst1.u8         {d2}, [r0]
+    vst1.u8         {d3}, [r2], r3
+
+    bx             lr
+
+    ENDP
+    END
--- a/media/libvpx/vp8/common/arm/neon/recon4b_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/recon4b_neon.asm
@ -0,0 +1,69 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_recon4b_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; r0    unsigned char  *pred_ptr,
+; r1    short *diff_ptr,
+; r2    unsigned char *dst_ptr,
+; r3    int stride
+
+|vp8_recon4b_neon| PROC
+    vld1.u8         {q12, q13}, [r0]!   ;load data from pred_ptr
+    vld1.16         {q8, q9}, [r1]!     ;load data from diff_ptr
+    vld1.u8         {q14, q15}, [r0]
+    vld1.16         {q10, q11}, [r1]!
+
+    vmovl.u8        q0, d24             ;modify Pred data from 8 bits to 16 bits
+    vmovl.u8        q1, d25
+    vmovl.u8        q2, d26
+    vmovl.u8        q3, d27
+    vmovl.u8        q4, d28
+    vmovl.u8        q5, d29
+    vmovl.u8        q6, d30
+    vld1.16         {q12, q13}, [r1]!
+    vmovl.u8        q7, d31
+    vld1.16         {q14, q15}, [r1]
+
+    vadd.s16        q0, q0, q8          ;add Diff data and Pred data together
+    vadd.s16        q1, q1, q9
+    vadd.s16        q2, q2, q10
+    vadd.s16        q3, q3, q11
+    vadd.s16        q4, q4, q12
+    vadd.s16        q5, q5, q13
+    vadd.s16        q6, q6, q14
+    vadd.s16        q7, q7, q15
+
+    vqmovun.s16     d0, q0              ;CLAMP() saturation
+    vqmovun.s16     d1, q1
+    vqmovun.s16     d2, q2
+    vqmovun.s16     d3, q3
+    vqmovun.s16     d4, q4
+    vqmovun.s16     d5, q5
+    vqmovun.s16     d6, q6
+    vqmovun.s16     d7, q7
+    add             r0, r2, r3
+
+    vst1.u8         {q0}, [r2]          ;store result
+    vst1.u8         {q1}, [r0], r3
+    add             r2, r0, r3
+    vst1.u8         {q2}, [r0]
+    vst1.u8         {q3}, [r2], r3
+
+    bx             lr
+
+    ENDP
+    END
--- a/media/libvpx/vp8/common/arm/neon/recon_neon.c
+++ b/media/libvpx/vp8/common/arm/neon/recon_neon.c
@ -0,0 +1,29 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "recon.h"
+#include "blockd.h"
+
+extern void vp8_recon16x16mb_neon(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int ystride, unsigned char *udst_ptr, unsigned char *vdst_ptr);
+
+void vp8_recon_mb_neon(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
+{
+    unsigned char *pred_ptr = &x->predictor[0];
+    short *diff_ptr = &x->diff[0];
+    unsigned char *dst_ptr = x->dst.y_buffer;
+    unsigned char *udst_ptr = x->dst.u_buffer;
+    unsigned char *vdst_ptr = x->dst.v_buffer;
+    int ystride = x->dst.y_stride;
+    /*int uv_stride = x->dst.uv_stride;*/
+
+    vp8_recon16x16mb_neon(pred_ptr, diff_ptr, dst_ptr, ystride, udst_ptr, vdst_ptr);
+}
--- a/media/libvpx/vp8/common/arm/neon/reconb_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/reconb_neon.asm
@ -0,0 +1,61 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_recon_b_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; r0    unsigned char  *pred_ptr,
+; r1    short *diff_ptr,
+; r2    unsigned char *dst_ptr,
+; r3    int stride
+
+|vp8_recon_b_neon| PROC
+    mov             r12, #16
+
+    vld1.u8         {d28}, [r0], r12    ;load 4 data/line from pred_ptr
+    vld1.16         {q10, q11}, [r1]!   ;load data from diff_ptr
+    vld1.u8         {d29}, [r0], r12
+    vld1.16         {q11, q12}, [r1]!
+    vld1.u8         {d30}, [r0], r12
+    vld1.16         {q12, q13}, [r1]!
+    vld1.u8         {d31}, [r0], r12
+    vld1.16         {q13}, [r1]
+
+    vmovl.u8        q0, d28             ;modify Pred data from 8 bits to 16 bits
+    vmovl.u8        q1, d29             ;Pred data in d0, d2, d4, d6
+    vmovl.u8        q2, d30
+    vmovl.u8        q3, d31
+
+    vadd.s16        d0, d0, d20         ;add Diff data and Pred data together
+    vadd.s16        d2, d2, d22
+    vadd.s16        d4, d4, d24
+    vadd.s16        d6, d6, d26
+
+    vqmovun.s16     d0, q0              ;CLAMP() saturation
+    vqmovun.s16     d1, q1
+    vqmovun.s16     d2, q2
+    vqmovun.s16     d3, q3
+    add             r1, r2, r3
+
+    vst1.32         {d0[0]}, [r2]       ;store result
+    vst1.32         {d1[0]}, [r1], r3
+    add             r2, r1, r3
+    vst1.32         {d2[0]}, [r1]
+    vst1.32         {d3[0]}, [r2], r3
+
+    bx             lr
+
+    ENDP
+    END
--- a/media/libvpx/vp8/common/arm/neon/save_neon_reg.asm
+++ b/media/libvpx/vp8/common/arm/neon/save_neon_reg.asm
@ -0,0 +1,36 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_push_neon|
+    EXPORT  |vp8_pop_neon|
+
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+|vp8_push_neon| PROC
+    vst1.i64            {d8, d9, d10, d11}, [r0]!
+    vst1.i64            {d12, d13, d14, d15}, [r0]!
+    bx              lr
+
+    ENDP
+
+|vp8_pop_neon| PROC
+    vld1.i64            {d8, d9, d10, d11}, [r0]!
+    vld1.i64            {d12, d13, d14, d15}, [r0]!
+    bx              lr
+
+    ENDP
+
+    END
+
--- a/media/libvpx/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
@ -0,0 +1,67 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_short_idct4x4llm_1_neon|
+    EXPORT  |vp8_dc_only_idct_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch);
+; r0    short *input;
+; r1    short *output;
+; r2    int pitch;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+|vp8_short_idct4x4llm_1_neon| PROC
+    vld1.16         {d0[]}, [r0]            ;load input[0]
+
+    add             r3, r1, r2
+    add             r12, r3, r2
+
+    vrshr.s16       d0, d0, #3
+
+    add             r0, r12, r2
+
+    vst1.16         {d0}, [r1]
+    vst1.16         {d0}, [r3]
+    vst1.16         {d0}, [r12]
+    vst1.16         {d0}, [r0]
+
+    bx             lr
+    ENDP
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;void vp8_dc_only_idct_c(short input_dc, short *output, int pitch);
+; r0    short input_dc;
+; r1    short *output;
+; r2    int pitch;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+|vp8_dc_only_idct_neon| PROC
+    vdup.16         d0, r0
+
+    add             r3, r1, r2
+    add             r12, r3, r2
+
+    vrshr.s16       d0, d0, #3
+
+    add             r0, r12, r2
+
+    vst1.16         {d0}, [r1]
+    vst1.16         {d0}, [r3]
+    vst1.16         {d0}, [r12]
+    vst1.16         {d0}, [r0]
+
+    bx             lr
+
+    ENDP
+    END
--- a/media/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.asm
@ -0,0 +1,127 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_short_idct4x4llm_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+;*************************************************************
+;void vp8_short_idct4x4llm_c(short *input, short *output, int pitch)
+;r0 short * input
+;r1 short * output
+;r2 int pitch
+;*************************************************************
+;static const int cospi8sqrt2minus1=20091;
+;static const int sinpi8sqrt2      =35468;
+;static const int rounding = 0;
+;Optimization note: The resulted data from dequantization are signed 13-bit data that is
+;in the range of [-4096, 4095]. This allows to use "vqdmulh"(neon) instruction since
+;it won't go out of range (13+16+1=30bits<32bits). This instruction gives the high half
+;result of the multiplication that is needed in IDCT.
+
+|vp8_short_idct4x4llm_neon| PROC
+    ldr             r12, _idct_coeff_
+    vld1.16         {q1, q2}, [r0]
+    vld1.16         {d0}, [r12]
+
+    vswp            d3, d4                  ;q2(vp[4] vp[12])
+
+    vqdmulh.s16     q3, q2, d0[2]
+    vqdmulh.s16     q4, q2, d0[0]
+
+    vqadd.s16       d12, d2, d3             ;a1
+    vqsub.s16       d13, d2, d3             ;b1
+
+    vshr.s16        q3, q3, #1
+    vshr.s16        q4, q4, #1
+
+    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
+    vqadd.s16       q4, q4, q2
+
+    ;d6 - c1:temp1
+    ;d7 - d1:temp2
+    ;d8 - d1:temp1
+    ;d9 - c1:temp2
+
+    vqsub.s16       d10, d6, d9             ;c1
+    vqadd.s16       d11, d7, d8             ;d1
+
+    vqadd.s16       d2, d12, d11
+    vqadd.s16       d3, d13, d10
+    vqsub.s16       d4, d13, d10
+    vqsub.s16       d5, d12, d11
+
+    vtrn.32         d2, d4
+    vtrn.32         d3, d5
+    vtrn.16         d2, d3
+    vtrn.16         d4, d5
+
+    vswp            d3, d4
+
+    vqdmulh.s16     q3, q2, d0[2]
+    vqdmulh.s16     q4, q2, d0[0]
+
+    vqadd.s16       d12, d2, d3             ;a1
+    vqsub.s16       d13, d2, d3             ;b1
+
+    vshr.s16        q3, q3, #1
+    vshr.s16        q4, q4, #1
+
+    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
+    vqadd.s16       q4, q4, q2
+
+    vqsub.s16       d10, d6, d9             ;c1
+    vqadd.s16       d11, d7, d8             ;d1
+
+    vqadd.s16       d2, d12, d11
+    vqadd.s16       d3, d13, d10
+    vqsub.s16       d4, d13, d10
+    vqsub.s16       d5, d12, d11
+
+    vrshr.s16       d2, d2, #3
+    vrshr.s16       d3, d3, #3
+    vrshr.s16       d4, d4, #3
+    vrshr.s16       d5, d5, #3
+
+    add             r3, r1, r2
+    add             r12, r3, r2
+    add             r0, r12, r2
+
+    vtrn.32         d2, d4
+    vtrn.32         d3, d5
+    vtrn.16         d2, d3
+    vtrn.16         d4, d5
+
+    vst1.16         {d2}, [r1]
+    vst1.16         {d3}, [r3]
+    vst1.16         {d4}, [r12]
+    vst1.16         {d5}, [r0]
+
+    bx             lr
+
+    ENDP
+
+;-----------------
+    AREA    idct4x4_dat, DATA, READWRITE            ;read/write by default
+;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
+;One word each is reserved. Label filter_coeff can be used to access the data.
+;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
+_idct_coeff_
+    DCD     idct_coeff
+idct_coeff
+    DCD     0x4e7b4e7b, 0x8a8c8a8c
+
+;20091, 20091, 35468, 35468
+
+    END
--- a/media/libvpx/vp8/common/arm/neon/sixtappredict16x16_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/sixtappredict16x16_neon.asm
@ -0,0 +1,495 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_sixtap_predict16x16_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+; r0    unsigned char  *src_ptr,
+; r1    int  src_pixels_per_line,
+; r2    int  xoffset,
+; r3    int  yoffset,
+; r4    unsigned char *dst_ptr,
+; stack(r5) int  dst_pitch
+
+;Note: To take advantage of 8-bit mulplication instruction in NEON. First apply abs() to
+; filter coeffs to make them u8. Then, use vmlsl for negtive coeffs. After multiplication,
+; the result can be negtive. So, I treat the result as s16. But, since it is also possible
+; that the result can be a large positive number (> 2^15-1), which could be confused as a
+; negtive number. To avoid that error, apply filter coeffs in the order of 0, 1, 4 ,5 ,2,
+; which ensures that the result stays in s16 range. Finally, saturated add the result by
+; applying 3rd filter coeff. Same applys to other filter functions.
+
+|vp8_sixtap_predict16x16_neon| PROC
+    push            {r4-r5, lr}
+
+    ldr             r12, _filter16_coeff_
+    ldr             r4, [sp, #12]           ;load parameters from stack
+    ldr             r5, [sp, #16]           ;load parameters from stack
+
+    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
+    beq             secondpass_filter16x16_only
+
+    add             r2, r12, r2, lsl #5     ;calculate filter location
+
+    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
+
+    vld1.s32        {q14, q15}, [r2]        ;load first_pass filter
+
+    beq             firstpass_filter16x16_only
+
+    sub             sp, sp, #336            ;reserve space on stack for temporary storage
+    mov             lr, sp
+
+    vabs.s32        q12, q14
+    vabs.s32        q13, q15
+
+    mov             r2, #7                  ;loop counter
+    sub             r0, r0, #2              ;move srcptr back to (line-2) and (column-2)
+    sub             r0, r0, r1, lsl #1
+
+    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
+    vdup.8          d1, d24[4]
+    vdup.8          d2, d25[0]
+    vdup.8          d3, d25[4]
+    vdup.8          d4, d26[0]
+    vdup.8          d5, d26[4]
+
+;First Pass: output_height lines x output_width columns (21x16)
+filt_blk2d_fp16x16_loop_neon
+    vld1.u8         {d6, d7, d8}, [r0], r1      ;load src data
+    vld1.u8         {d9, d10, d11}, [r0], r1
+    vld1.u8         {d12, d13, d14}, [r0], r1
+
+    pld             [r0]
+    pld             [r0, r1]
+    pld             [r0, r1, lsl #1]
+
+    vmull.u8        q8, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
+    vmull.u8        q9, d7, d0
+    vmull.u8        q10, d9, d0
+    vmull.u8        q11, d10, d0
+    vmull.u8        q12, d12, d0
+    vmull.u8        q13, d13, d0
+
+    vext.8          d28, d6, d7, #1         ;construct src_ptr[-1]
+    vext.8          d29, d9, d10, #1
+    vext.8          d30, d12, d13, #1
+
+    vmlsl.u8        q8, d28, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q10, d29, d1
+    vmlsl.u8        q12, d30, d1
+
+    vext.8          d28, d7, d8, #1
+    vext.8          d29, d10, d11, #1
+    vext.8          d30, d13, d14, #1
+
+    vmlsl.u8        q9, d28, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q11, d29, d1
+    vmlsl.u8        q13, d30, d1
+
+    vext.8          d28, d6, d7, #4         ;construct src_ptr[2]
+    vext.8          d29, d9, d10, #4
+    vext.8          d30, d12, d13, #4
+
+    vmlsl.u8        q8, d28, d4             ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q10, d29, d4
+    vmlsl.u8        q12, d30, d4
+
+    vext.8          d28, d7, d8, #4
+    vext.8          d29, d10, d11, #4
+    vext.8          d30, d13, d14, #4
+
+    vmlsl.u8        q9, d28, d4             ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q11, d29, d4
+    vmlsl.u8        q13, d30, d4
+
+    vext.8          d28, d6, d7, #5         ;construct src_ptr[3]
+    vext.8          d29, d9, d10, #5
+    vext.8          d30, d12, d13, #5
+
+    vmlal.u8        q8, d28, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmlal.u8        q10, d29, d5
+    vmlal.u8        q12, d30, d5
+
+    vext.8          d28, d7, d8, #5
+    vext.8          d29, d10, d11, #5
+    vext.8          d30, d13, d14, #5
+
+    vmlal.u8        q9, d28, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmlal.u8        q11, d29, d5
+    vmlal.u8        q13, d30, d5
+
+    vext.8          d28, d6, d7, #2         ;construct src_ptr[0]
+    vext.8          d29, d9, d10, #2
+    vext.8          d30, d12, d13, #2
+
+    vmlal.u8        q8, d28, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q10, d29, d2
+    vmlal.u8        q12, d30, d2
+
+    vext.8          d28, d7, d8, #2
+    vext.8          d29, d10, d11, #2
+    vext.8          d30, d13, d14, #2
+
+    vmlal.u8        q9, d28, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q11, d29, d2
+    vmlal.u8        q13, d30, d2
+
+    vext.8          d28, d6, d7, #3         ;construct src_ptr[1]
+    vext.8          d29, d9, d10, #3
+    vext.8          d30, d12, d13, #3
+
+    vext.8          d15, d7, d8, #3
+    vext.8          d31, d10, d11, #3
+    vext.8          d6, d13, d14, #3
+
+    vmull.u8        q4, d28, d3             ;(src_ptr[1] * vp8_filter[3])
+    vmull.u8        q5, d29, d3
+    vmull.u8        q6, d30, d3
+
+    vqadd.s16       q8, q4                  ;sum of all (src_data*filter_parameters)
+    vqadd.s16       q10, q5
+    vqadd.s16       q12, q6
+
+    vmull.u8        q6, d15, d3             ;(src_ptr[1] * vp8_filter[3])
+    vmull.u8        q7, d31, d3
+    vmull.u8        q3, d6, d3
+
+    subs            r2, r2, #1
+
+    vqadd.s16       q9, q6
+    vqadd.s16       q11, q7
+    vqadd.s16       q13, q3
+
+    vqrshrun.s16    d6, q8, #7              ;shift/round/saturate to u8
+    vqrshrun.s16    d7, q9, #7
+    vqrshrun.s16    d8, q10, #7
+    vqrshrun.s16    d9, q11, #7
+    vqrshrun.s16    d10, q12, #7
+    vqrshrun.s16    d11, q13, #7
+
+    vst1.u8         {d6, d7, d8}, [lr]!     ;store result
+    vst1.u8         {d9, d10, d11}, [lr]!
+
+    bne             filt_blk2d_fp16x16_loop_neon
+
+;Second pass: 16x16
+;secondpass_filter - do first 8-columns and then second 8-columns
+    add             r3, r12, r3, lsl #5
+    sub             lr, lr, #336
+
+    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
+    mov             r3, #2                  ;loop counter
+
+    vabs.s32        q7, q5
+    vabs.s32        q8, q6
+
+    mov             r2, #16
+
+    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
+    vdup.8          d1, d14[4]
+    vdup.8          d2, d15[0]
+    vdup.8          d3, d15[4]
+    vdup.8          d4, d16[0]
+    vdup.8          d5, d16[4]
+
+filt_blk2d_sp16x16_outloop_neon
+    vld1.u8         {d18}, [lr], r2         ;load src data
+    vld1.u8         {d19}, [lr], r2
+    vld1.u8         {d20}, [lr], r2
+    vld1.u8         {d21}, [lr], r2
+    mov             r12, #4                 ;loop counter
+    vld1.u8         {d22}, [lr], r2
+
+secondpass_inner_loop_neon
+    vld1.u8         {d23}, [lr], r2         ;load src data
+    vld1.u8         {d24}, [lr], r2
+    vld1.u8         {d25}, [lr], r2
+    vld1.u8         {d26}, [lr], r2
+
+    vmull.u8        q3, d18, d0             ;(src_ptr[-2] * vp8_filter[0])
+    vmull.u8        q4, d19, d0
+    vmull.u8        q5, d20, d0
+    vmull.u8        q6, d21, d0
+
+    vmlsl.u8        q3, d19, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q4, d20, d1
+    vmlsl.u8        q5, d21, d1
+    vmlsl.u8        q6, d22, d1
+
+    vmlsl.u8        q3, d22, d4             ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q4, d23, d4
+    vmlsl.u8        q5, d24, d4
+    vmlsl.u8        q6, d25, d4
+
+    vmlal.u8        q3, d20, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q4, d21, d2
+    vmlal.u8        q5, d22, d2
+    vmlal.u8        q6, d23, d2
+
+    vmlal.u8        q3, d23, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmlal.u8        q4, d24, d5
+    vmlal.u8        q5, d25, d5
+    vmlal.u8        q6, d26, d5
+
+    vmull.u8        q7, d21, d3             ;(src_ptr[1] * vp8_filter[3])
+    vmull.u8        q8, d22, d3
+    vmull.u8        q9, d23, d3
+    vmull.u8        q10, d24, d3
+
+    subs            r12, r12, #1
+
+    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
+    vqadd.s16       q8, q4
+    vqadd.s16       q9, q5
+    vqadd.s16       q10, q6
+
+    vqrshrun.s16    d6, q7, #7              ;shift/round/saturate to u8
+    vqrshrun.s16    d7, q8, #7
+    vqrshrun.s16    d8, q9, #7
+    vqrshrun.s16    d9, q10, #7
+
+    vst1.u8         {d6}, [r4], r5          ;store result
+    vmov            q9, q11
+    vst1.u8         {d7}, [r4], r5
+    vmov            q10, q12
+    vst1.u8         {d8}, [r4], r5
+    vmov            d22, d26
+    vst1.u8         {d9}, [r4], r5
+
+    bne             secondpass_inner_loop_neon
+
+    subs            r3, r3, #1
+    sub             lr, lr, #336
+    add             lr, lr, #8
+
+    sub             r4, r4, r5, lsl #4
+    add             r4, r4, #8
+
+    bne filt_blk2d_sp16x16_outloop_neon
+
+    add             sp, sp, #336
+    pop             {r4-r5,pc}
+
+;--------------------
+firstpass_filter16x16_only
+    vabs.s32        q12, q14
+    vabs.s32        q13, q15
+
+    mov             r2, #8                  ;loop counter
+    sub             r0, r0, #2              ;move srcptr back to (column-2)
+
+    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
+    vdup.8          d1, d24[4]
+    vdup.8          d2, d25[0]
+    vdup.8          d3, d25[4]
+    vdup.8          d4, d26[0]
+    vdup.8          d5, d26[4]
+
+;First Pass: output_height lines x output_width columns (16x16)
+filt_blk2d_fpo16x16_loop_neon
+    vld1.u8         {d6, d7, d8}, [r0], r1      ;load src data
+    vld1.u8         {d9, d10, d11}, [r0], r1
+
+    pld             [r0]
+    pld             [r0, r1]
+
+    vmull.u8        q6, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
+    vmull.u8        q7, d7, d0
+    vmull.u8        q8, d9, d0
+    vmull.u8        q9, d10, d0
+
+    vext.8          d20, d6, d7, #1         ;construct src_ptr[-1]
+    vext.8          d21, d9, d10, #1
+    vext.8          d22, d7, d8, #1
+    vext.8          d23, d10, d11, #1
+    vext.8          d24, d6, d7, #4         ;construct src_ptr[2]
+    vext.8          d25, d9, d10, #4
+    vext.8          d26, d7, d8, #4
+    vext.8          d27, d10, d11, #4
+    vext.8          d28, d6, d7, #5         ;construct src_ptr[3]
+    vext.8          d29, d9, d10, #5
+
+    vmlsl.u8        q6, d20, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q8, d21, d1
+    vmlsl.u8        q7, d22, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q9, d23, d1
+    vmlsl.u8        q6, d24, d4             ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q8, d25, d4
+    vmlsl.u8        q7, d26, d4             ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q9, d27, d4
+    vmlal.u8        q6, d28, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmlal.u8        q8, d29, d5
+
+    vext.8          d20, d7, d8, #5
+    vext.8          d21, d10, d11, #5
+    vext.8          d22, d6, d7, #2         ;construct src_ptr[0]
+    vext.8          d23, d9, d10, #2
+    vext.8          d24, d7, d8, #2
+    vext.8          d25, d10, d11, #2
+
+    vext.8          d26, d6, d7, #3         ;construct src_ptr[1]
+    vext.8          d27, d9, d10, #3
+    vext.8          d28, d7, d8, #3
+    vext.8          d29, d10, d11, #3
+
+    vmlal.u8        q7, d20, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmlal.u8        q9, d21, d5
+    vmlal.u8        q6, d22, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q8, d23, d2
+    vmlal.u8        q7, d24, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q9, d25, d2
+
+    vmull.u8        q10, d26, d3            ;(src_ptr[1] * vp8_filter[3])
+    vmull.u8        q11, d27, d3
+    vmull.u8        q12, d28, d3            ;(src_ptr[1] * vp8_filter[3])
+    vmull.u8        q15, d29, d3
+
+    vqadd.s16       q6, q10                 ;sum of all (src_data*filter_parameters)
+    vqadd.s16       q8, q11
+    vqadd.s16       q7, q12
+    vqadd.s16       q9, q15
+
+    subs            r2, r2, #1
+
+    vqrshrun.s16    d6, q6, #7              ;shift/round/saturate to u8
+    vqrshrun.s16    d7, q7, #7
+    vqrshrun.s16    d8, q8, #7
+    vqrshrun.s16    d9, q9, #7
+
+    vst1.u8         {q3}, [r4], r5              ;store result
+    vst1.u8         {q4}, [r4], r5
+
+    bne             filt_blk2d_fpo16x16_loop_neon
+
+    pop             {r4-r5,pc}
+
+;--------------------
+secondpass_filter16x16_only
+;Second pass: 16x16
+    add             r3, r12, r3, lsl #5
+    sub             r0, r0, r1, lsl #1
+
+    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
+    mov             r3, #2                  ;loop counter
+
+    vabs.s32        q7, q5
+    vabs.s32        q8, q6
+
+    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
+    vdup.8          d1, d14[4]
+    vdup.8          d2, d15[0]
+    vdup.8          d3, d15[4]
+    vdup.8          d4, d16[0]
+    vdup.8          d5, d16[4]
+
+filt_blk2d_spo16x16_outloop_neon
+    vld1.u8         {d18}, [r0], r1         ;load src data
+    vld1.u8         {d19}, [r0], r1
+    vld1.u8         {d20}, [r0], r1
+    vld1.u8         {d21}, [r0], r1
+    mov             r12, #4                 ;loop counter
+    vld1.u8         {d22}, [r0], r1
+
+secondpass_only_inner_loop_neon
+    vld1.u8         {d23}, [r0], r1         ;load src data
+    vld1.u8         {d24}, [r0], r1
+    vld1.u8         {d25}, [r0], r1
+    vld1.u8         {d26}, [r0], r1
+
+    vmull.u8        q3, d18, d0             ;(src_ptr[-2] * vp8_filter[0])
+    vmull.u8        q4, d19, d0
+    vmull.u8        q5, d20, d0
+    vmull.u8        q6, d21, d0
+
+    vmlsl.u8        q3, d19, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q4, d20, d1
+    vmlsl.u8        q5, d21, d1
+    vmlsl.u8        q6, d22, d1
+
+    vmlsl.u8        q3, d22, d4             ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q4, d23, d4
+    vmlsl.u8        q5, d24, d4
+    vmlsl.u8        q6, d25, d4
+
+    vmlal.u8        q3, d20, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q4, d21, d2
+    vmlal.u8        q5, d22, d2
+    vmlal.u8        q6, d23, d2
+
+    vmlal.u8        q3, d23, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmlal.u8        q4, d24, d5
+    vmlal.u8        q5, d25, d5
+    vmlal.u8        q6, d26, d5
+
+    vmull.u8        q7, d21, d3             ;(src_ptr[1] * vp8_filter[3])
+    vmull.u8        q8, d22, d3
+    vmull.u8        q9, d23, d3
+    vmull.u8        q10, d24, d3
+
+    subs            r12, r12, #1
+
+    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
+    vqadd.s16       q8, q4
+    vqadd.s16       q9, q5
+    vqadd.s16       q10, q6
+
+    vqrshrun.s16    d6, q7, #7              ;shift/round/saturate to u8
+    vqrshrun.s16    d7, q8, #7
+    vqrshrun.s16    d8, q9, #7
+    vqrshrun.s16    d9, q10, #7
+
+    vst1.u8         {d6}, [r4], r5          ;store result
+    vmov            q9, q11
+    vst1.u8         {d7}, [r4], r5
+    vmov            q10, q12
+    vst1.u8         {d8}, [r4], r5
+    vmov            d22, d26
+    vst1.u8         {d9}, [r4], r5
+
+    bne             secondpass_only_inner_loop_neon
+
+    subs            r3, r3, #1
+    sub             r0, r0, r1, lsl #4
+    sub             r0, r0, r1, lsl #2
+    sub             r0, r0, r1
+    add             r0, r0, #8
+
+    sub             r4, r4, r5, lsl #4
+    add             r4, r4, #8
+
+    bne filt_blk2d_spo16x16_outloop_neon
+
+    pop             {r4-r5,pc}
+
+    ENDP
+
+;-----------------
+    AREA    subpelfilters16_dat, DATA, READWRITE            ;read/write by default
+;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
+;One word each is reserved. Label filter_coeff can be used to access the data.
+;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
+_filter16_coeff_
+    DCD     filter16_coeff
+filter16_coeff
+    DCD     0,  0,  128,    0,   0,  0,   0,  0
+    DCD     0, -6,  123,   12,  -1,  0,   0,  0
+    DCD     2, -11, 108,   36,  -8,  1,   0,  0
+    DCD     0, -9,   93,   50,  -6,  0,   0,  0
+    DCD     3, -16,  77,   77, -16,  3,   0,  0
+    DCD     0, -6,   50,   93,  -9,  0,   0,  0
+    DCD     1, -8,   36,  108, -11,  2,   0,  0
+    DCD     0, -1,   12,  123,  -6,   0,  0,  0
+
+    END
--- a/media/libvpx/vp8/common/arm/neon/sixtappredict4x4_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/sixtappredict4x4_neon.asm
@ -0,0 +1,426 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_sixtap_predict_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+; r0    unsigned char  *src_ptr,
+; r1    int  src_pixels_per_line,
+; r2    int  xoffset,
+; r3    int  yoffset,
+; stack(r4) unsigned char *dst_ptr,
+; stack(lr) int  dst_pitch
+
+|vp8_sixtap_predict_neon| PROC
+    push            {r4, lr}
+
+    ldr             r12, _filter4_coeff_
+    ldr             r4, [sp, #8]            ;load parameters from stack
+    ldr             lr, [sp, #12]           ;load parameters from stack
+
+    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
+    beq             secondpass_filter4x4_only
+
+    add             r2, r12, r2, lsl #5     ;calculate filter location
+
+    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
+    vld1.s32        {q14, q15}, [r2]        ;load first_pass filter
+
+    beq             firstpass_filter4x4_only
+
+    vabs.s32        q12, q14                ;get abs(filer_parameters)
+    vabs.s32        q13, q15
+
+    sub             r0, r0, #2              ;go back 2 columns of src data
+    sub             r0, r0, r1, lsl #1      ;go back 2 lines of src data
+
+;First pass: output_height lines x output_width columns (9x4)
+    vld1.u8         {q3}, [r0], r1          ;load first 4-line src data
+    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
+    vld1.u8         {q4}, [r0], r1
+    vdup.8          d1, d24[4]
+    vld1.u8         {q5}, [r0], r1
+    vdup.8          d2, d25[0]
+    vld1.u8         {q6}, [r0], r1
+    vdup.8          d3, d25[4]
+    vdup.8          d4, d26[0]
+    vdup.8          d5, d26[4]
+
+    pld             [r0]
+    pld             [r0, r1]
+    pld             [r0, r1, lsl #1]
+
+    vext.8          d18, d6, d7, #5         ;construct src_ptr[3]
+    vext.8          d19, d8, d9, #5
+    vext.8          d20, d10, d11, #5
+    vext.8          d21, d12, d13, #5
+
+    vswp            d7, d8                  ;discard 2nd half data after src_ptr[3] is done
+    vswp            d11, d12
+
+    vzip.32         d18, d19                ;put 2-line data in 1 register (src_ptr[3])
+    vzip.32         d20, d21
+    vmull.u8        q7, d18, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmull.u8        q8, d20, d5
+
+    vmov            q4, q3                  ;keep original src data in q4 q6
+    vmov            q6, q5
+
+    vzip.32         d6, d7                  ;construct src_ptr[-2], and put 2-line data together
+    vzip.32         d10, d11
+    vshr.u64        q9, q4, #8              ;construct src_ptr[-1]
+    vshr.u64        q10, q6, #8
+    vmlal.u8        q7, d6, d0              ;+(src_ptr[-2] * vp8_filter[0])
+    vmlal.u8        q8, d10, d0
+
+    vzip.32         d18, d19                ;put 2-line data in 1 register (src_ptr[-1])
+    vzip.32         d20, d21
+    vshr.u64        q3, q4, #32             ;construct src_ptr[2]
+    vshr.u64        q5, q6, #32
+    vmlsl.u8        q7, d18, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q8, d20, d1
+
+    vzip.32         d6, d7                  ;put 2-line data in 1 register (src_ptr[2])
+    vzip.32         d10, d11
+    vshr.u64        q9, q4, #16             ;construct src_ptr[0]
+    vshr.u64        q10, q6, #16
+    vmlsl.u8        q7, d6, d4              ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q8, d10, d4
+
+    vzip.32         d18, d19                ;put 2-line data in 1 register (src_ptr[0])
+    vzip.32         d20, d21
+    vshr.u64        q3, q4, #24             ;construct src_ptr[1]
+    vshr.u64        q5, q6, #24
+    vmlal.u8        q7, d18, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q8, d20, d2
+
+    vzip.32         d6, d7                  ;put 2-line data in 1 register (src_ptr[1])
+    vzip.32         d10, d11
+    vmull.u8        q9, d6, d3              ;(src_ptr[1] * vp8_filter[3])
+    vmull.u8        q10, d10, d3
+
+    vld1.u8         {q3}, [r0], r1          ;load rest 5-line src data
+    vld1.u8         {q4}, [r0], r1
+
+    vqadd.s16       q7, q9                  ;sum of all (src_data*filter_parameters)
+    vqadd.s16       q8, q10
+
+    vld1.u8         {q5}, [r0], r1
+    vld1.u8         {q6}, [r0], r1
+
+    vqrshrun.s16    d27, q7, #7             ;shift/round/saturate to u8
+    vqrshrun.s16    d28, q8, #7
+
+    ;First Pass on rest 5-line data
+    vld1.u8         {q11}, [r0], r1
+
+    vext.8          d18, d6, d7, #5         ;construct src_ptr[3]
+    vext.8          d19, d8, d9, #5
+    vext.8          d20, d10, d11, #5
+    vext.8          d21, d12, d13, #5
+
+    vswp            d7, d8                  ;discard 2nd half data after src_ptr[3] is done
+    vswp            d11, d12
+
+    vzip.32         d18, d19                ;put 2-line data in 1 register (src_ptr[3])
+    vzip.32         d20, d21
+    vext.8          d31, d22, d23, #5       ;construct src_ptr[3]
+    vmull.u8        q7, d18, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmull.u8        q8, d20, d5
+    vmull.u8        q12, d31, d5            ;(src_ptr[3] * vp8_filter[5])
+
+    vmov            q4, q3                  ;keep original src data in q4 q6
+    vmov            q6, q5
+
+    vzip.32         d6, d7                  ;construct src_ptr[-2], and put 2-line data together
+    vzip.32         d10, d11
+    vshr.u64        q9, q4, #8              ;construct src_ptr[-1]
+    vshr.u64        q10, q6, #8
+
+    vmlal.u8        q7, d6, d0              ;+(src_ptr[-2] * vp8_filter[0])
+    vmlal.u8        q8, d10, d0
+    vmlal.u8        q12, d22, d0            ;(src_ptr[-2] * vp8_filter[0])
+
+    vzip.32         d18, d19                ;put 2-line data in 1 register (src_ptr[-1])
+    vzip.32         d20, d21
+    vshr.u64        q3, q4, #32             ;construct src_ptr[2]
+    vshr.u64        q5, q6, #32
+    vext.8          d31, d22, d23, #1       ;construct src_ptr[-1]
+
+    vmlsl.u8        q7, d18, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q8, d20, d1
+    vmlsl.u8        q12, d31, d1            ;-(src_ptr[-1] * vp8_filter[1])
+
+    vzip.32         d6, d7                  ;put 2-line data in 1 register (src_ptr[2])
+    vzip.32         d10, d11
+    vshr.u64        q9, q4, #16             ;construct src_ptr[0]
+    vshr.u64        q10, q6, #16
+    vext.8          d31, d22, d23, #4       ;construct src_ptr[2]
+
+    vmlsl.u8        q7, d6, d4              ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q8, d10, d4
+    vmlsl.u8        q12, d31, d4            ;-(src_ptr[2] * vp8_filter[4])
+
+    vzip.32         d18, d19                ;put 2-line data in 1 register (src_ptr[0])
+    vzip.32         d20, d21
+    vshr.u64        q3, q4, #24             ;construct src_ptr[1]
+    vshr.u64        q5, q6, #24
+    vext.8          d31, d22, d23, #2       ;construct src_ptr[0]
+
+    vmlal.u8        q7, d18, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q8, d20, d2
+    vmlal.u8        q12, d31, d2            ;(src_ptr[0] * vp8_filter[2])
+
+    vzip.32         d6, d7                  ;put 2-line data in 1 register (src_ptr[1])
+    vzip.32         d10, d11
+    vext.8          d31, d22, d23, #3       ;construct src_ptr[1]
+    vmull.u8        q9, d6, d3              ;(src_ptr[1] * vp8_filter[3])
+    vmull.u8        q10, d10, d3
+    vmull.u8        q11, d31, d3            ;(src_ptr[1] * vp8_filter[3])
+
+    add             r3, r12, r3, lsl #5
+
+    vqadd.s16       q7, q9                  ;sum of all (src_data*filter_parameters)
+    vqadd.s16       q8, q10
+    vqadd.s16       q12, q11
+
+    vext.8          d23, d27, d28, #4
+    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
+
+    vqrshrun.s16    d29, q7, #7             ;shift/round/saturate to u8
+    vqrshrun.s16    d30, q8, #7
+    vqrshrun.s16    d31, q12, #7
+
+;Second pass: 4x4
+    vabs.s32        q7, q5
+    vabs.s32        q8, q6
+
+    vext.8          d24, d28, d29, #4
+    vext.8          d25, d29, d30, #4
+    vext.8          d26, d30, d31, #4
+
+    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
+    vdup.8          d1, d14[4]
+    vdup.8          d2, d15[0]
+    vdup.8          d3, d15[4]
+    vdup.8          d4, d16[0]
+    vdup.8          d5, d16[4]
+
+    vmull.u8        q3, d27, d0             ;(src_ptr[-2] * vp8_filter[0])
+    vmull.u8        q4, d28, d0
+
+    vmull.u8        q5, d25, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmull.u8        q6, d26, d5
+
+    vmlsl.u8        q3, d29, d4             ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q4, d30, d4
+
+    vmlsl.u8        q5, d23, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q6, d24, d1
+
+    vmlal.u8        q3, d28, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q4, d29, d2
+
+    vmlal.u8        q5, d24, d3             ;(src_ptr[1] * vp8_filter[3])
+    vmlal.u8        q6, d25, d3
+
+    add             r0, r4, lr
+    add             r1, r0, lr
+    add             r2, r1, lr
+
+    vqadd.s16       q5, q3                  ;sum of all (src_data*filter_parameters)
+    vqadd.s16       q6, q4
+
+    vqrshrun.s16    d3, q5, #7              ;shift/round/saturate to u8
+    vqrshrun.s16    d4, q6, #7
+
+    vst1.32         {d3[0]}, [r4]           ;store result
+    vst1.32         {d3[1]}, [r0]
+    vst1.32         {d4[0]}, [r1]
+    vst1.32         {d4[1]}, [r2]
+
+    pop             {r4, pc}
+
+
+;---------------------
+firstpass_filter4x4_only
+    vabs.s32        q12, q14                ;get abs(filer_parameters)
+    vabs.s32        q13, q15
+
+    sub             r0, r0, #2              ;go back 2 columns of src data
+
+;First pass: output_height lines x output_width columns (4x4)
+    vld1.u8         {q3}, [r0], r1          ;load first 4-line src data
+    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
+    vld1.u8         {q4}, [r0], r1
+    vdup.8          d1, d24[4]
+    vld1.u8         {q5}, [r0], r1
+    vdup.8          d2, d25[0]
+    vld1.u8         {q6}, [r0], r1
+
+    vdup.8          d3, d25[4]
+    vdup.8          d4, d26[0]
+    vdup.8          d5, d26[4]
+
+    vext.8          d18, d6, d7, #5         ;construct src_ptr[3]
+    vext.8          d19, d8, d9, #5
+    vext.8          d20, d10, d11, #5
+    vext.8          d21, d12, d13, #5
+
+    vswp            d7, d8                  ;discard 2nd half data after src_ptr[3] is done
+    vswp            d11, d12
+
+    vzip.32         d18, d19                ;put 2-line data in 1 register (src_ptr[3])
+    vzip.32         d20, d21
+    vmull.u8        q7, d18, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmull.u8        q8, d20, d5
+
+    vmov            q4, q3                  ;keep original src data in q4 q6
+    vmov            q6, q5
+
+    vzip.32         d6, d7                  ;construct src_ptr[-2], and put 2-line data together
+    vzip.32         d10, d11
+    vshr.u64        q9, q4, #8              ;construct src_ptr[-1]
+    vshr.u64        q10, q6, #8
+    vmlal.u8        q7, d6, d0              ;+(src_ptr[-2] * vp8_filter[0])
+    vmlal.u8        q8, d10, d0
+
+    vzip.32         d18, d19                ;put 2-line data in 1 register (src_ptr[-1])
+    vzip.32         d20, d21
+    vshr.u64        q3, q4, #32             ;construct src_ptr[2]
+    vshr.u64        q5, q6, #32
+    vmlsl.u8        q7, d18, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q8, d20, d1
+
+    vzip.32         d6, d7                  ;put 2-line data in 1 register (src_ptr[2])
+    vzip.32         d10, d11
+    vshr.u64        q9, q4, #16             ;construct src_ptr[0]
+    vshr.u64        q10, q6, #16
+    vmlsl.u8        q7, d6, d4              ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q8, d10, d4
+
+    vzip.32         d18, d19                ;put 2-line data in 1 register (src_ptr[0])
+    vzip.32         d20, d21
+    vshr.u64        q3, q4, #24             ;construct src_ptr[1]
+    vshr.u64        q5, q6, #24
+    vmlal.u8        q7, d18, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q8, d20, d2
+
+    vzip.32         d6, d7                  ;put 2-line data in 1 register (src_ptr[1])
+    vzip.32         d10, d11
+    vmull.u8        q9, d6, d3              ;(src_ptr[1] * vp8_filter[3])
+    vmull.u8        q10, d10, d3
+
+    add             r0, r4, lr
+    add             r1, r0, lr
+    add             r2, r1, lr
+
+    vqadd.s16       q7, q9                  ;sum of all (src_data*filter_parameters)
+    vqadd.s16       q8, q10
+
+    vqrshrun.s16    d27, q7, #7             ;shift/round/saturate to u8
+    vqrshrun.s16    d28, q8, #7
+
+    vst1.32         {d27[0]}, [r4]          ;store result
+    vst1.32         {d27[1]}, [r0]
+    vst1.32         {d28[0]}, [r1]
+    vst1.32         {d28[1]}, [r2]
+
+    pop             {r4, pc}
+
+
+;---------------------
+secondpass_filter4x4_only
+    sub             r0, r0, r1, lsl #1
+    add             r3, r12, r3, lsl #5
+
+    vld1.32         {d27[0]}, [r0], r1      ;load src data
+    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
+    vld1.32         {d27[1]}, [r0], r1
+    vabs.s32        q7, q5
+    vld1.32         {d28[0]}, [r0], r1
+    vabs.s32        q8, q6
+    vld1.32         {d28[1]}, [r0], r1
+    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
+    vld1.32         {d29[0]}, [r0], r1
+    vdup.8          d1, d14[4]
+    vld1.32         {d29[1]}, [r0], r1
+    vdup.8          d2, d15[0]
+    vld1.32         {d30[0]}, [r0], r1
+    vdup.8          d3, d15[4]
+    vld1.32         {d30[1]}, [r0], r1
+    vdup.8          d4, d16[0]
+    vld1.32         {d31[0]}, [r0], r1
+    vdup.8          d5, d16[4]
+
+    vext.8          d23, d27, d28, #4
+    vext.8          d24, d28, d29, #4
+    vext.8          d25, d29, d30, #4
+    vext.8          d26, d30, d31, #4
+
+    vmull.u8        q3, d27, d0             ;(src_ptr[-2] * vp8_filter[0])
+    vmull.u8        q4, d28, d0
+
+    vmull.u8        q5, d25, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmull.u8        q6, d26, d5
+
+    vmlsl.u8        q3, d29, d4             ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q4, d30, d4
+
+    vmlsl.u8        q5, d23, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q6, d24, d1
+
+    vmlal.u8        q3, d28, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q4, d29, d2
+
+    vmlal.u8        q5, d24, d3             ;(src_ptr[1] * vp8_filter[3])
+    vmlal.u8        q6, d25, d3
+
+    add             r0, r4, lr
+    add             r1, r0, lr
+    add             r2, r1, lr
+
+    vqadd.s16       q5, q3                  ;sum of all (src_data*filter_parameters)
+    vqadd.s16       q6, q4
+
+    vqrshrun.s16    d3, q5, #7              ;shift/round/saturate to u8
+    vqrshrun.s16    d4, q6, #7
+
+    vst1.32         {d3[0]}, [r4]           ;store result
+    vst1.32         {d3[1]}, [r0]
+    vst1.32         {d4[0]}, [r1]
+    vst1.32         {d4[1]}, [r2]
+
+    pop             {r4, pc}
+
+    ENDP
+
+;-----------------
+    AREA    subpelfilters4_dat, DATA, READWRITE         ;read/write by default
+;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
+;One word each is reserved. Label filter_coeff can be used to access the data.
+;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
+_filter4_coeff_
+    DCD     filter4_coeff
+filter4_coeff
+    DCD     0,  0,  128,    0,   0,  0,   0,  0
+    DCD     0, -6,  123,   12,  -1,  0,   0,  0
+    DCD     2, -11, 108,   36,  -8,  1,   0,  0
+    DCD     0, -9,   93,   50,  -6,  0,   0,  0
+    DCD     3, -16,  77,   77, -16,  3,   0,  0
+    DCD     0, -6,   50,   93,  -9,  0,   0,  0
+    DCD     1, -8,   36,  108, -11,  2,   0,  0
+    DCD     0, -1,   12,  123,  -6,   0,  0,  0
+
+    END
--- a/media/libvpx/vp8/common/arm/neon/sixtappredict8x4_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/sixtappredict8x4_neon.asm
@ -0,0 +1,477 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_sixtap_predict8x4_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+; r0    unsigned char  *src_ptr,
+; r1    int  src_pixels_per_line,
+; r2    int  xoffset,
+; r3    int  yoffset,
+; r4    unsigned char *dst_ptr,
+; stack(r5) int  dst_pitch
+
+|vp8_sixtap_predict8x4_neon| PROC
+    push            {r4-r5, lr}
+
+    ldr             r12, _filter8_coeff_
+    ldr             r4, [sp, #12]           ;load parameters from stack
+    ldr             r5, [sp, #16]           ;load parameters from stack
+
+    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
+    beq             secondpass_filter8x4_only
+
+    add             r2, r12, r2, lsl #5     ;calculate filter location
+
+    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
+
+    vld1.s32        {q14, q15}, [r2]        ;load first_pass filter
+
+    beq             firstpass_filter8x4_only
+
+    sub             sp, sp, #32             ;reserve space on stack for temporary storage
+    vabs.s32        q12, q14
+    vabs.s32        q13, q15
+
+    sub             r0, r0, #2              ;move srcptr back to (line-2) and (column-2)
+    mov             lr, sp
+    sub             r0, r0, r1, lsl #1
+
+    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
+    vdup.8          d1, d24[4]
+    vdup.8          d2, d25[0]
+
+;First pass: output_height lines x output_width columns (9x8)
+    vld1.u8         {q3}, [r0], r1          ;load src data
+    vdup.8          d3, d25[4]
+    vld1.u8         {q4}, [r0], r1
+    vdup.8          d4, d26[0]
+    vld1.u8         {q5}, [r0], r1
+    vdup.8          d5, d26[4]
+    vld1.u8         {q6}, [r0], r1
+
+    pld             [r0]
+    pld             [r0, r1]
+    pld             [r0, r1, lsl #1]
+
+    vmull.u8        q7, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
+    vmull.u8        q8, d8, d0
+    vmull.u8        q9, d10, d0
+    vmull.u8        q10, d12, d0
+
+    vext.8          d28, d6, d7, #1         ;construct src_ptr[-1]
+    vext.8          d29, d8, d9, #1
+    vext.8          d30, d10, d11, #1
+    vext.8          d31, d12, d13, #1
+
+    vmlsl.u8        q7, d28, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q8, d29, d1
+    vmlsl.u8        q9, d30, d1
+    vmlsl.u8        q10, d31, d1
+
+    vext.8          d28, d6, d7, #4         ;construct src_ptr[2]
+    vext.8          d29, d8, d9, #4
+    vext.8          d30, d10, d11, #4
+    vext.8          d31, d12, d13, #4
+
+    vmlsl.u8        q7, d28, d4             ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q8, d29, d4
+    vmlsl.u8        q9, d30, d4
+    vmlsl.u8        q10, d31, d4
+
+    vext.8          d28, d6, d7, #2         ;construct src_ptr[0]
+    vext.8          d29, d8, d9, #2
+    vext.8          d30, d10, d11, #2
+    vext.8          d31, d12, d13, #2
+
+    vmlal.u8        q7, d28, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q8, d29, d2
+    vmlal.u8        q9, d30, d2
+    vmlal.u8        q10, d31, d2
+
+    vext.8          d28, d6, d7, #5         ;construct src_ptr[3]
+    vext.8          d29, d8, d9, #5
+    vext.8          d30, d10, d11, #5
+    vext.8          d31, d12, d13, #5
+
+    vmlal.u8        q7, d28, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmlal.u8        q8, d29, d5
+    vmlal.u8        q9, d30, d5
+    vmlal.u8        q10, d31, d5
+
+    vext.8          d28, d6, d7, #3         ;construct src_ptr[1]
+    vext.8          d29, d8, d9, #3
+    vext.8          d30, d10, d11, #3
+    vext.8          d31, d12, d13, #3
+
+    vmull.u8        q3, d28, d3             ;(src_ptr[1] * vp8_filter[3])
+    vmull.u8        q4, d29, d3
+    vmull.u8        q5, d30, d3
+    vmull.u8        q6, d31, d3
+
+    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
+    vqadd.s16       q8, q4
+    vqadd.s16       q9, q5
+    vqadd.s16       q10, q6
+
+    vld1.u8         {q3}, [r0], r1          ;load src data
+
+    vqrshrun.s16    d22, q7, #7             ;shift/round/saturate to u8
+    vqrshrun.s16    d23, q8, #7
+    vqrshrun.s16    d24, q9, #7
+    vqrshrun.s16    d25, q10, #7
+
+    vld1.u8         {q4}, [r0], r1
+    vst1.u8         {d22}, [lr]!            ;store result
+    vld1.u8         {q5}, [r0], r1
+    vst1.u8         {d23}, [lr]!
+    vld1.u8         {q6}, [r0], r1
+    vst1.u8         {d24}, [lr]!
+    vld1.u8         {q7}, [r0], r1
+    vst1.u8         {d25}, [lr]!
+
+    ;first_pass filtering on the rest 5-line data
+    vmull.u8        q8, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
+    vmull.u8        q9, d8, d0
+    vmull.u8        q10, d10, d0
+    vmull.u8        q11, d12, d0
+    vmull.u8        q12, d14, d0
+
+    vext.8          d27, d6, d7, #1         ;construct src_ptr[-1]
+    vext.8          d28, d8, d9, #1
+    vext.8          d29, d10, d11, #1
+    vext.8          d30, d12, d13, #1
+    vext.8          d31, d14, d15, #1
+
+    vmlsl.u8        q8, d27, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q9, d28, d1
+    vmlsl.u8        q10, d29, d1
+    vmlsl.u8        q11, d30, d1
+    vmlsl.u8        q12, d31, d1
+
+    vext.8          d27, d6, d7, #4         ;construct src_ptr[2]
+    vext.8          d28, d8, d9, #4
+    vext.8          d29, d10, d11, #4
+    vext.8          d30, d12, d13, #4
+    vext.8          d31, d14, d15, #4
+
+    vmlsl.u8        q8, d27, d4             ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q9, d28, d4
+    vmlsl.u8        q10, d29, d4
+    vmlsl.u8        q11, d30, d4
+    vmlsl.u8        q12, d31, d4
+
+    vext.8          d27, d6, d7, #2         ;construct src_ptr[0]
+    vext.8          d28, d8, d9, #2
+    vext.8          d29, d10, d11, #2
+    vext.8          d30, d12, d13, #2
+    vext.8          d31, d14, d15, #2
+
+    vmlal.u8        q8, d27, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q9, d28, d2
+    vmlal.u8        q10, d29, d2
+    vmlal.u8        q11, d30, d2
+    vmlal.u8        q12, d31, d2
+
+    vext.8          d27, d6, d7, #5         ;construct src_ptr[3]
+    vext.8          d28, d8, d9, #5
+    vext.8          d29, d10, d11, #5
+    vext.8          d30, d12, d13, #5
+    vext.8          d31, d14, d15, #5
+
+    vmlal.u8        q8, d27, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmlal.u8        q9, d28, d5
+    vmlal.u8        q10, d29, d5
+    vmlal.u8        q11, d30, d5
+    vmlal.u8        q12, d31, d5
+
+    vext.8          d27, d6, d7, #3         ;construct src_ptr[1]
+    vext.8          d28, d8, d9, #3
+    vext.8          d29, d10, d11, #3
+    vext.8          d30, d12, d13, #3
+    vext.8          d31, d14, d15, #3
+
+    vmull.u8        q3, d27, d3             ;(src_ptr[1] * vp8_filter[3])
+    vmull.u8        q4, d28, d3
+    vmull.u8        q5, d29, d3
+    vmull.u8        q6, d30, d3
+    vmull.u8        q7, d31, d3
+
+    vqadd.s16       q8, q3                  ;sum of all (src_data*filter_parameters)
+    vqadd.s16       q9, q4
+    vqadd.s16       q10, q5
+    vqadd.s16       q11, q6
+    vqadd.s16       q12, q7
+
+    vqrshrun.s16    d26, q8, #7             ;shift/round/saturate to u8
+    vqrshrun.s16    d27, q9, #7
+    vqrshrun.s16    d28, q10, #7
+    vqrshrun.s16    d29, q11, #7                ;load intermediate data from stack
+    vqrshrun.s16    d30, q12, #7
+
+;Second pass: 8x4
+;secondpass_filter
+    add             r3, r12, r3, lsl #5
+    sub             lr, lr, #32
+
+    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
+    vld1.u8         {q11}, [lr]!
+
+    vabs.s32        q7, q5
+    vabs.s32        q8, q6
+
+    vld1.u8         {q12}, [lr]!
+
+    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
+    vdup.8          d1, d14[4]
+    vdup.8          d2, d15[0]
+    vdup.8          d3, d15[4]
+    vdup.8          d4, d16[0]
+    vdup.8          d5, d16[4]
+
+    vmull.u8        q3, d22, d0             ;(src_ptr[-2] * vp8_filter[0])
+    vmull.u8        q4, d23, d0
+    vmull.u8        q5, d24, d0
+    vmull.u8        q6, d25, d0
+
+    vmlsl.u8        q3, d23, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q4, d24, d1
+    vmlsl.u8        q5, d25, d1
+    vmlsl.u8        q6, d26, d1
+
+    vmlsl.u8        q3, d26, d4             ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q4, d27, d4
+    vmlsl.u8        q5, d28, d4
+    vmlsl.u8        q6, d29, d4
+
+    vmlal.u8        q3, d24, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q4, d25, d2
+    vmlal.u8        q5, d26, d2
+    vmlal.u8        q6, d27, d2
+
+    vmlal.u8        q3, d27, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmlal.u8        q4, d28, d5
+    vmlal.u8        q5, d29, d5
+    vmlal.u8        q6, d30, d5
+
+    vmull.u8        q7, d25, d3             ;(src_ptr[1] * vp8_filter[3])
+    vmull.u8        q8, d26, d3
+    vmull.u8        q9, d27, d3
+    vmull.u8        q10, d28, d3
+
+    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
+    vqadd.s16       q8, q4
+    vqadd.s16       q9, q5
+    vqadd.s16       q10, q6
+
+    vqrshrun.s16    d6, q7, #7              ;shift/round/saturate to u8
+    vqrshrun.s16    d7, q8, #7
+    vqrshrun.s16    d8, q9, #7
+    vqrshrun.s16    d9, q10, #7
+
+    vst1.u8         {d6}, [r4], r5          ;store result
+    vst1.u8         {d7}, [r4], r5
+    vst1.u8         {d8}, [r4], r5
+    vst1.u8         {d9}, [r4], r5
+
+    add             sp, sp, #32
+    pop             {r4-r5,pc}
+
+;--------------------
+firstpass_filter8x4_only
+    vabs.s32        q12, q14
+    vabs.s32        q13, q15
+
+    sub             r0, r0, #2              ;move srcptr back to (line-2) and (column-2)
+    vld1.u8         {q3}, [r0], r1          ;load src data
+
+    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
+    vld1.u8         {q4}, [r0], r1
+    vdup.8          d1, d24[4]
+    vld1.u8         {q5}, [r0], r1
+    vdup.8          d2, d25[0]
+    vld1.u8         {q6}, [r0], r1
+    vdup.8          d3, d25[4]
+    vdup.8          d4, d26[0]
+    vdup.8          d5, d26[4]
+
+;First pass: output_height lines x output_width columns (4x8)
+    pld             [r0]
+    pld             [r0, r1]
+    pld             [r0, r1, lsl #1]
+
+    vmull.u8        q7, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
+    vmull.u8        q8, d8, d0
+    vmull.u8        q9, d10, d0
+    vmull.u8        q10, d12, d0
+
+    vext.8          d28, d6, d7, #1         ;construct src_ptr[-1]
+    vext.8          d29, d8, d9, #1
+    vext.8          d30, d10, d11, #1
+    vext.8          d31, d12, d13, #1
+
+    vmlsl.u8        q7, d28, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q8, d29, d1
+    vmlsl.u8        q9, d30, d1
+    vmlsl.u8        q10, d31, d1
+
+    vext.8          d28, d6, d7, #4         ;construct src_ptr[2]
+    vext.8          d29, d8, d9, #4
+    vext.8          d30, d10, d11, #4
+    vext.8          d31, d12, d13, #4
+
+    vmlsl.u8        q7, d28, d4             ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q8, d29, d4
+    vmlsl.u8        q9, d30, d4
+    vmlsl.u8        q10, d31, d4
+
+    vext.8          d28, d6, d7, #2         ;construct src_ptr[0]
+    vext.8          d29, d8, d9, #2
+    vext.8          d30, d10, d11, #2
+    vext.8          d31, d12, d13, #2
+
+    vmlal.u8        q7, d28, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q8, d29, d2
+    vmlal.u8        q9, d30, d2
+    vmlal.u8        q10, d31, d2
+
+    vext.8          d28, d6, d7, #5         ;construct src_ptr[3]
+    vext.8          d29, d8, d9, #5
+    vext.8          d30, d10, d11, #5
+    vext.8          d31, d12, d13, #5
+
+    vmlal.u8        q7, d28, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmlal.u8        q8, d29, d5
+    vmlal.u8        q9, d30, d5
+    vmlal.u8        q10, d31, d5
+
+    vext.8          d28, d6, d7, #3         ;construct src_ptr[1]
+    vext.8          d29, d8, d9, #3
+    vext.8          d30, d10, d11, #3
+    vext.8          d31, d12, d13, #3
+
+    vmull.u8        q3, d28, d3             ;(src_ptr[1] * vp8_filter[3])
+    vmull.u8        q4, d29, d3
+    vmull.u8        q5, d30, d3
+    vmull.u8        q6, d31, d3
+
+    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
+    vqadd.s16       q8, q4
+    vqadd.s16       q9, q5
+    vqadd.s16       q10, q6
+
+    vqrshrun.s16    d22, q7, #7             ;shift/round/saturate to u8
+    vqrshrun.s16    d23, q8, #7
+    vqrshrun.s16    d24, q9, #7
+    vqrshrun.s16    d25, q10, #7
+
+    vst1.u8         {d22}, [r4], r5         ;store result
+    vst1.u8         {d23}, [r4], r5
+    vst1.u8         {d24}, [r4], r5
+    vst1.u8         {d25}, [r4], r5
+
+    pop             {r4-r5,pc}
+
+;---------------------
+secondpass_filter8x4_only
+;Second pass: 8x4
+    add             r3, r12, r3, lsl #5
+    sub             r0, r0, r1, lsl #1
+    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
+    vabs.s32        q7, q5
+    vabs.s32        q8, q6
+
+    vld1.u8         {d22}, [r0], r1
+    vld1.u8         {d23}, [r0], r1
+    vld1.u8         {d24}, [r0], r1
+    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
+    vld1.u8         {d25}, [r0], r1
+    vdup.8          d1, d14[4]
+    vld1.u8         {d26}, [r0], r1
+    vdup.8          d2, d15[0]
+    vld1.u8         {d27}, [r0], r1
+    vdup.8          d3, d15[4]
+    vld1.u8         {d28}, [r0], r1
+    vdup.8          d4, d16[0]
+    vld1.u8         {d29}, [r0], r1
+    vdup.8          d5, d16[4]
+    vld1.u8         {d30}, [r0], r1
+
+    vmull.u8        q3, d22, d0             ;(src_ptr[-2] * vp8_filter[0])
+    vmull.u8        q4, d23, d0
+    vmull.u8        q5, d24, d0
+    vmull.u8        q6, d25, d0
+
+    vmlsl.u8        q3, d23, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q4, d24, d1
+    vmlsl.u8        q5, d25, d1
+    vmlsl.u8        q6, d26, d1
+
+    vmlsl.u8        q3, d26, d4             ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q4, d27, d4
+    vmlsl.u8        q5, d28, d4
+    vmlsl.u8        q6, d29, d4
+
+    vmlal.u8        q3, d24, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q4, d25, d2
+    vmlal.u8        q5, d26, d2
+    vmlal.u8        q6, d27, d2
+
+    vmlal.u8        q3, d27, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmlal.u8        q4, d28, d5
+    vmlal.u8        q5, d29, d5
+    vmlal.u8        q6, d30, d5
+
+    vmull.u8        q7, d25, d3             ;(src_ptr[1] * vp8_filter[3])
+    vmull.u8        q8, d26, d3
+    vmull.u8        q9, d27, d3
+    vmull.u8        q10, d28, d3
+
+    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
+    vqadd.s16       q8, q4
+    vqadd.s16       q9, q5
+    vqadd.s16       q10, q6
+
+    vqrshrun.s16    d6, q7, #7              ;shift/round/saturate to u8
+    vqrshrun.s16    d7, q8, #7
+    vqrshrun.s16    d8, q9, #7
+    vqrshrun.s16    d9, q10, #7
+
+    vst1.u8         {d6}, [r4], r5          ;store result
+    vst1.u8         {d7}, [r4], r5
+    vst1.u8         {d8}, [r4], r5
+    vst1.u8         {d9}, [r4], r5
+
+    pop             {r4-r5,pc}
+
+    ENDP
+
+;-----------------
+    AREA    subpelfilters8_dat, DATA, READWRITE         ;read/write by default
+;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
+;One word each is reserved. Label filter_coeff can be used to access the data.
+;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
+_filter8_coeff_
+    DCD     filter8_coeff
+filter8_coeff
+    DCD     0,  0,  128,    0,   0,  0,   0,  0
+    DCD     0, -6,  123,   12,  -1,  0,   0,  0
+    DCD     2, -11, 108,   36,  -8,  1,   0,  0
+    DCD     0, -9,   93,   50,  -6,  0,   0,  0
+    DCD     3, -16,  77,   77, -16,  3,   0,  0
+    DCD     0, -6,   50,   93,  -9,  0,   0,  0
+    DCD     1, -8,   36,  108, -11,  2,   0,  0
+    DCD     0, -1,   12,  123,  -6,   0,  0,  0
+
+    END
--- a/media/libvpx/vp8/common/arm/neon/sixtappredict8x8_neon.asm
+++ b/media/libvpx/vp8/common/arm/neon/sixtappredict8x8_neon.asm
@ -0,0 +1,528 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_sixtap_predict8x8_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+; r0    unsigned char  *src_ptr,
+; r1    int  src_pixels_per_line,
+; r2    int  xoffset,
+; r3    int  yoffset,
+; stack(r4) unsigned char *dst_ptr,
+; stack(r5) int  dst_pitch
+
+|vp8_sixtap_predict8x8_neon| PROC
+    push            {r4-r5, lr}
+
+    ldr             r12, _filter8_coeff_
+
+    ldr             r4, [sp, #12]           ;load parameters from stack
+    ldr             r5, [sp, #16]           ;load parameters from stack
+
+    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
+    beq             secondpass_filter8x8_only
+
+    add             r2, r12, r2, lsl #5     ;calculate filter location
+
+    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
+
+    vld1.s32        {q14, q15}, [r2]        ;load first_pass filter
+
+    beq             firstpass_filter8x8_only
+
+    sub             sp, sp, #64             ;reserve space on stack for temporary storage
+    mov             lr, sp
+
+    vabs.s32        q12, q14
+    vabs.s32        q13, q15
+
+    mov             r2, #2                  ;loop counter
+    sub             r0, r0, #2              ;move srcptr back to (line-2) and (column-2)
+    sub             r0, r0, r1, lsl #1
+
+    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
+    vdup.8          d1, d24[4]
+    vdup.8          d2, d25[0]
+
+;First pass: output_height lines x output_width columns (13x8)
+    vld1.u8         {q3}, [r0], r1          ;load src data
+    vdup.8          d3, d25[4]
+    vld1.u8         {q4}, [r0], r1
+    vdup.8          d4, d26[0]
+    vld1.u8         {q5}, [r0], r1
+    vdup.8          d5, d26[4]
+    vld1.u8         {q6}, [r0], r1
+
+filt_blk2d_fp8x8_loop_neon
+    pld             [r0]
+    pld             [r0, r1]
+    pld             [r0, r1, lsl #1]
+
+    vmull.u8        q7, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
+    vmull.u8        q8, d8, d0
+    vmull.u8        q9, d10, d0
+    vmull.u8        q10, d12, d0
+
+    vext.8          d28, d6, d7, #1         ;construct src_ptr[-1]
+    vext.8          d29, d8, d9, #1
+    vext.8          d30, d10, d11, #1
+    vext.8          d31, d12, d13, #1
+
+    vmlsl.u8        q7, d28, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q8, d29, d1
+    vmlsl.u8        q9, d30, d1
+    vmlsl.u8        q10, d31, d1
+
+    vext.8          d28, d6, d7, #4         ;construct src_ptr[2]
+    vext.8          d29, d8, d9, #4
+    vext.8          d30, d10, d11, #4
+    vext.8          d31, d12, d13, #4
+
+    vmlsl.u8        q7, d28, d4             ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q8, d29, d4
+    vmlsl.u8        q9, d30, d4
+    vmlsl.u8        q10, d31, d4
+
+    vext.8          d28, d6, d7, #2         ;construct src_ptr[0]
+    vext.8          d29, d8, d9, #2
+    vext.8          d30, d10, d11, #2
+    vext.8          d31, d12, d13, #2
+
+    vmlal.u8        q7, d28, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q8, d29, d2
+    vmlal.u8        q9, d30, d2
+    vmlal.u8        q10, d31, d2
+
+    vext.8          d28, d6, d7, #5         ;construct src_ptr[3]
+    vext.8          d29, d8, d9, #5
+    vext.8          d30, d10, d11, #5
+    vext.8          d31, d12, d13, #5
+
+    vmlal.u8        q7, d28, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmlal.u8        q8, d29, d5
+    vmlal.u8        q9, d30, d5
+    vmlal.u8        q10, d31, d5
+
+    vext.8          d28, d6, d7, #3         ;construct src_ptr[1]
+    vext.8          d29, d8, d9, #3
+    vext.8          d30, d10, d11, #3
+    vext.8          d31, d12, d13, #3
+
+    vmull.u8        q3, d28, d3             ;(src_ptr[1] * vp8_filter[3])
+    vmull.u8        q4, d29, d3
+    vmull.u8        q5, d30, d3
+    vmull.u8        q6, d31, d3
+
+    subs            r2, r2, #1
+
+    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
+    vqadd.s16       q8, q4
+    vqadd.s16       q9, q5
+    vqadd.s16       q10, q6
+
+    vld1.u8         {q3}, [r0], r1          ;load src data
+
+    vqrshrun.s16    d22, q7, #7             ;shift/round/saturate to u8
+    vqrshrun.s16    d23, q8, #7
+    vqrshrun.s16    d24, q9, #7
+    vqrshrun.s16    d25, q10, #7
+
+    vst1.u8         {d22}, [lr]!            ;store result
+    vld1.u8         {q4}, [r0], r1
+    vst1.u8         {d23}, [lr]!
+    vld1.u8         {q5}, [r0], r1
+    vst1.u8         {d24}, [lr]!
+    vld1.u8         {q6}, [r0], r1
+    vst1.u8         {d25}, [lr]!
+
+    bne             filt_blk2d_fp8x8_loop_neon
+
+    ;first_pass filtering on the rest 5-line data
+    ;vld1.u8            {q3}, [r0], r1          ;load src data
+    ;vld1.u8            {q4}, [r0], r1
+    ;vld1.u8            {q5}, [r0], r1
+    ;vld1.u8            {q6}, [r0], r1
+    vld1.u8         {q7}, [r0], r1
+
+    vmull.u8        q8, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
+    vmull.u8        q9, d8, d0
+    vmull.u8        q10, d10, d0
+    vmull.u8        q11, d12, d0
+    vmull.u8        q12, d14, d0
+
+    vext.8          d27, d6, d7, #1         ;construct src_ptr[-1]
+    vext.8          d28, d8, d9, #1
+    vext.8          d29, d10, d11, #1
+    vext.8          d30, d12, d13, #1
+    vext.8          d31, d14, d15, #1
+
+    vmlsl.u8        q8, d27, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q9, d28, d1
+    vmlsl.u8        q10, d29, d1
+    vmlsl.u8        q11, d30, d1
+    vmlsl.u8        q12, d31, d1
+
+    vext.8          d27, d6, d7, #4         ;construct src_ptr[2]
+    vext.8          d28, d8, d9, #4
+    vext.8          d29, d10, d11, #4
+    vext.8          d30, d12, d13, #4
+    vext.8          d31, d14, d15, #4
+
+    vmlsl.u8        q8, d27, d4             ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q9, d28, d4
+    vmlsl.u8        q10, d29, d4
+    vmlsl.u8        q11, d30, d4
+    vmlsl.u8        q12, d31, d4
+
+    vext.8          d27, d6, d7, #2         ;construct src_ptr[0]
+    vext.8          d28, d8, d9, #2
+    vext.8          d29, d10, d11, #2
+    vext.8          d30, d12, d13, #2
+    vext.8          d31, d14, d15, #2
+
+    vmlal.u8        q8, d27, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q9, d28, d2
+    vmlal.u8        q10, d29, d2
+    vmlal.u8        q11, d30, d2
+    vmlal.u8        q12, d31, d2
+
+    vext.8          d27, d6, d7, #5         ;construct src_ptr[3]
+    vext.8          d28, d8, d9, #5
+    vext.8          d29, d10, d11, #5
+    vext.8          d30, d12, d13, #5
+    vext.8          d31, d14, d15, #5
+
+    vmlal.u8        q8, d27, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmlal.u8        q9, d28, d5
+    vmlal.u8        q10, d29, d5
+    vmlal.u8        q11, d30, d5
+    vmlal.u8        q12, d31, d5
+
+    vext.8          d27, d6, d7, #3         ;construct src_ptr[1]
+    vext.8          d28, d8, d9, #3
+    vext.8          d29, d10, d11, #3
+    vext.8          d30, d12, d13, #3
+    vext.8          d31, d14, d15, #3
+
+    vmull.u8        q3, d27, d3             ;(src_ptr[1] * vp8_filter[3])
+    vmull.u8        q4, d28, d3
+    vmull.u8        q5, d29, d3
+    vmull.u8        q6, d30, d3
+    vmull.u8        q7, d31, d3
+
+    vqadd.s16       q8, q3                  ;sum of all (src_data*filter_parameters)
+    vqadd.s16       q9, q4
+    vqadd.s16       q10, q5
+    vqadd.s16       q11, q6
+    vqadd.s16       q12, q7
+
+    add             r3, r12, r3, lsl #5
+
+    vqrshrun.s16    d26, q8, #7             ;shift/round/saturate to u8
+    sub             lr, lr, #64
+    vqrshrun.s16    d27, q9, #7
+    vld1.u8         {q9}, [lr]!             ;load intermediate data from stack
+    vqrshrun.s16    d28, q10, #7
+    vld1.u8         {q10}, [lr]!
+
+    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
+
+    vqrshrun.s16    d29, q11, #7
+    vld1.u8         {q11}, [lr]!
+
+    vabs.s32        q7, q5
+    vabs.s32        q8, q6
+
+    vqrshrun.s16    d30, q12, #7
+    vld1.u8         {q12}, [lr]!
+
+;Second pass: 8x8
+    mov             r3, #2                  ;loop counter
+
+    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
+    vdup.8          d1, d14[4]
+    vdup.8          d2, d15[0]
+    vdup.8          d3, d15[4]
+    vdup.8          d4, d16[0]
+    vdup.8          d5, d16[4]
+
+filt_blk2d_sp8x8_loop_neon
+    vmull.u8        q3, d18, d0             ;(src_ptr[-2] * vp8_filter[0])
+    vmull.u8        q4, d19, d0
+    vmull.u8        q5, d20, d0
+    vmull.u8        q6, d21, d0
+
+    vmlsl.u8        q3, d19, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q4, d20, d1
+    vmlsl.u8        q5, d21, d1
+    vmlsl.u8        q6, d22, d1
+
+    vmlsl.u8        q3, d22, d4             ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q4, d23, d4
+    vmlsl.u8        q5, d24, d4
+    vmlsl.u8        q6, d25, d4
+
+    vmlal.u8        q3, d20, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q4, d21, d2
+    vmlal.u8        q5, d22, d2
+    vmlal.u8        q6, d23, d2
+
+    vmlal.u8        q3, d23, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmlal.u8        q4, d24, d5
+    vmlal.u8        q5, d25, d5
+    vmlal.u8        q6, d26, d5
+
+    vmull.u8        q7, d21, d3             ;(src_ptr[1] * vp8_filter[3])
+    vmull.u8        q8, d22, d3
+    vmull.u8        q9, d23, d3
+    vmull.u8        q10, d24, d3
+
+    subs            r3, r3, #1
+
+    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
+    vqadd.s16       q8, q4
+    vqadd.s16       q9, q5
+    vqadd.s16       q10, q6
+
+    vqrshrun.s16    d6, q7, #7              ;shift/round/saturate to u8
+    vqrshrun.s16    d7, q8, #7
+    vqrshrun.s16    d8, q9, #7
+    vqrshrun.s16    d9, q10, #7
+
+    vmov            q9, q11
+    vst1.u8         {d6}, [r4], r5          ;store result
+    vmov            q10, q12
+    vst1.u8         {d7}, [r4], r5
+    vmov            q11, q13
+    vst1.u8         {d8}, [r4], r5
+    vmov            q12, q14
+    vst1.u8         {d9}, [r4], r5
+    vmov            d26, d30
+
+    bne filt_blk2d_sp8x8_loop_neon
+
+    add             sp, sp, #64
+    pop             {r4-r5,pc}
+
+;---------------------
+firstpass_filter8x8_only
+    ;add                r2, r12, r2, lsl #5     ;calculate filter location
+    ;vld1.s32       {q14, q15}, [r2]        ;load first_pass filter
+    vabs.s32        q12, q14
+    vabs.s32        q13, q15
+
+    mov             r2, #2                  ;loop counter
+    sub             r0, r0, #2              ;move srcptr back to (line-2) and (column-2)
+
+    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
+    vdup.8          d1, d24[4]
+    vdup.8          d2, d25[0]
+    vdup.8          d3, d25[4]
+    vdup.8          d4, d26[0]
+    vdup.8          d5, d26[4]
+
+;First pass: output_height lines x output_width columns (8x8)
+filt_blk2d_fpo8x8_loop_neon
+    vld1.u8         {q3}, [r0], r1          ;load src data
+    vld1.u8         {q4}, [r0], r1
+    vld1.u8         {q5}, [r0], r1
+    vld1.u8         {q6}, [r0], r1
+
+    pld             [r0]
+    pld             [r0, r1]
+    pld             [r0, r1, lsl #1]
+
+    vmull.u8        q7, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
+    vmull.u8        q8, d8, d0
+    vmull.u8        q9, d10, d0
+    vmull.u8        q10, d12, d0
+
+    vext.8          d28, d6, d7, #1         ;construct src_ptr[-1]
+    vext.8          d29, d8, d9, #1
+    vext.8          d30, d10, d11, #1
+    vext.8          d31, d12, d13, #1
+
+    vmlsl.u8        q7, d28, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q8, d29, d1
+    vmlsl.u8        q9, d30, d1
+    vmlsl.u8        q10, d31, d1
+
+    vext.8          d28, d6, d7, #4         ;construct src_ptr[2]
+    vext.8          d29, d8, d9, #4
+    vext.8          d30, d10, d11, #4
+    vext.8          d31, d12, d13, #4
+
+    vmlsl.u8        q7, d28, d4             ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q8, d29, d4
+    vmlsl.u8        q9, d30, d4
+    vmlsl.u8        q10, d31, d4
+
+    vext.8          d28, d6, d7, #2         ;construct src_ptr[0]
+    vext.8          d29, d8, d9, #2
+    vext.8          d30, d10, d11, #2
+    vext.8          d31, d12, d13, #2
+
+    vmlal.u8        q7, d28, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q8, d29, d2
+    vmlal.u8        q9, d30, d2
+    vmlal.u8        q10, d31, d2
+
+    vext.8          d28, d6, d7, #5         ;construct src_ptr[3]
+    vext.8          d29, d8, d9, #5
+    vext.8          d30, d10, d11, #5
+    vext.8          d31, d12, d13, #5
+
+    vmlal.u8        q7, d28, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmlal.u8        q8, d29, d5
+    vmlal.u8        q9, d30, d5
+    vmlal.u8        q10, d31, d5
+
+    vext.8          d28, d6, d7, #3         ;construct src_ptr[1]
+    vext.8          d29, d8, d9, #3
+    vext.8          d30, d10, d11, #3
+    vext.8          d31, d12, d13, #3
+
+    vmull.u8        q3, d28, d3             ;(src_ptr[1] * vp8_filter[3])
+    vmull.u8        q4, d29, d3
+    vmull.u8        q5, d30, d3
+    vmull.u8        q6, d31, d3
+ ;
+    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
+    vqadd.s16       q8, q4
+    vqadd.s16       q9, q5
+    vqadd.s16       q10, q6
+
+    subs            r2, r2, #1
+
+    vqrshrun.s16    d22, q7, #7             ;shift/round/saturate to u8
+    vqrshrun.s16    d23, q8, #7
+    vqrshrun.s16    d24, q9, #7
+    vqrshrun.s16    d25, q10, #7
+
+    vst1.u8         {d22}, [r4], r5         ;store result
+    vst1.u8         {d23}, [r4], r5
+    vst1.u8         {d24}, [r4], r5
+    vst1.u8         {d25}, [r4], r5
+
+    bne             filt_blk2d_fpo8x8_loop_neon
+
+    pop             {r4-r5,pc}
+
+;---------------------
+secondpass_filter8x8_only
+    sub             r0, r0, r1, lsl #1
+    add             r3, r12, r3, lsl #5
+
+    vld1.u8         {d18}, [r0], r1         ;load src data
+    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
+    vld1.u8         {d19}, [r0], r1
+    vabs.s32        q7, q5
+    vld1.u8         {d20}, [r0], r1
+    vabs.s32        q8, q6
+    vld1.u8         {d21}, [r0], r1
+    mov             r3, #2                  ;loop counter
+    vld1.u8         {d22}, [r0], r1
+    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
+    vld1.u8         {d23}, [r0], r1
+    vdup.8          d1, d14[4]
+    vld1.u8         {d24}, [r0], r1
+    vdup.8          d2, d15[0]
+    vld1.u8         {d25}, [r0], r1
+    vdup.8          d3, d15[4]
+    vld1.u8         {d26}, [r0], r1
+    vdup.8          d4, d16[0]
+    vld1.u8         {d27}, [r0], r1
+    vdup.8          d5, d16[4]
+    vld1.u8         {d28}, [r0], r1
+    vld1.u8         {d29}, [r0], r1
+    vld1.u8         {d30}, [r0], r1
+
+;Second pass: 8x8
+filt_blk2d_spo8x8_loop_neon
+    vmull.u8        q3, d18, d0             ;(src_ptr[-2] * vp8_filter[0])
+    vmull.u8        q4, d19, d0
+    vmull.u8        q5, d20, d0
+    vmull.u8        q6, d21, d0
+
+    vmlsl.u8        q3, d19, d1             ;-(src_ptr[-1] * vp8_filter[1])
+    vmlsl.u8        q4, d20, d1
+    vmlsl.u8        q5, d21, d1
+    vmlsl.u8        q6, d22, d1
+
+    vmlsl.u8        q3, d22, d4             ;-(src_ptr[2] * vp8_filter[4])
+    vmlsl.u8        q4, d23, d4
+    vmlsl.u8        q5, d24, d4
+    vmlsl.u8        q6, d25, d4
+
+    vmlal.u8        q3, d20, d2             ;(src_ptr[0] * vp8_filter[2])
+    vmlal.u8        q4, d21, d2
+    vmlal.u8        q5, d22, d2
+    vmlal.u8        q6, d23, d2
+
+    vmlal.u8        q3, d23, d5             ;(src_ptr[3] * vp8_filter[5])
+    vmlal.u8        q4, d24, d5
+    vmlal.u8        q5, d25, d5
+    vmlal.u8        q6, d26, d5
+
+    vmull.u8        q7, d21, d3             ;(src_ptr[1] * vp8_filter[3])
+    vmull.u8        q8, d22, d3
+    vmull.u8        q9, d23, d3
+    vmull.u8        q10, d24, d3
+
+    subs            r3, r3, #1
+
+    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
+    vqadd.s16       q8, q4
+    vqadd.s16       q9, q5
+    vqadd.s16       q10, q6
+
+    vqrshrun.s16    d6, q7, #7              ;shift/round/saturate to u8
+    vqrshrun.s16    d7, q8, #7
+    vqrshrun.s16    d8, q9, #7
+    vqrshrun.s16    d9, q10, #7
+
+    vmov            q9, q11
+    vst1.u8         {d6}, [r4], r5          ;store result
+    vmov            q10, q12
+    vst1.u8         {d7}, [r4], r5
+    vmov            q11, q13
+    vst1.u8         {d8}, [r4], r5
+    vmov            q12, q14
+    vst1.u8         {d9}, [r4], r5
+    vmov            d26, d30
+
+    bne filt_blk2d_spo8x8_loop_neon
+
+    pop             {r4-r5,pc}
+
+    ENDP
+
+;-----------------
+    AREA    subpelfilters8_dat, DATA, READWRITE         ;read/write by default
+;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
+;One word each is reserved. Label filter_coeff can be used to access the data.
+;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
+_filter8_coeff_
+    DCD     filter8_coeff
+filter8_coeff
+    DCD     0,  0,  128,    0,   0,  0,   0,  0
+    DCD     0, -6,  123,   12,  -1,  0,   0,  0
+    DCD     2, -11, 108,   36,  -8,  1,   0,  0
+    DCD     0, -9,   93,   50,  -6,  0,   0,  0
+    DCD     3, -16,  77,   77, -16,  3,   0,  0
+    DCD     0, -6,   50,   93,  -9,  0,   0,  0
+    DCD     1, -8,   36,  108, -11,  2,   0,  0
+    DCD     0, -1,   12,  123,  -6,   0,  0,  0
+
+    END
--- a/media/libvpx/vp8/common/arm/recon_arm.h
+++ b/media/libvpx/vp8/common/arm/recon_arm.h
@ -0,0 +1,80 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef RECON_ARM_H
+#define RECON_ARM_H
+
+#if HAVE_ARMV6
+extern prototype_recon_block(vp8_recon_b_armv6);
+extern prototype_recon_block(vp8_recon2b_armv6);
+extern prototype_recon_block(vp8_recon4b_armv6);
+
+extern prototype_copy_block(vp8_copy_mem8x8_v6);
+extern prototype_copy_block(vp8_copy_mem8x4_v6);
+extern prototype_copy_block(vp8_copy_mem16x16_v6);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef  vp8_recon_recon
+#define vp8_recon_recon vp8_recon_b_armv6
+
+#undef  vp8_recon_recon2
+#define vp8_recon_recon2 vp8_recon2b_armv6
+
+#undef  vp8_recon_recon4
+#define vp8_recon_recon4 vp8_recon4b_armv6
+
+#undef  vp8_recon_copy8x8
+#define vp8_recon_copy8x8 vp8_copy_mem8x8_v6
+
+#undef  vp8_recon_copy8x4
+#define vp8_recon_copy8x4 vp8_copy_mem8x4_v6
+
+#undef  vp8_recon_copy16x16
+#define vp8_recon_copy16x16 vp8_copy_mem16x16_v6
+#endif
+#endif
+
+#if HAVE_ARMV7
+extern prototype_recon_block(vp8_recon_b_neon);
+extern prototype_recon_block(vp8_recon2b_neon);
+extern prototype_recon_block(vp8_recon4b_neon);
+
+extern prototype_copy_block(vp8_copy_mem8x8_neon);
+extern prototype_copy_block(vp8_copy_mem8x4_neon);
+extern prototype_copy_block(vp8_copy_mem16x16_neon);
+
+extern prototype_recon_macroblock(vp8_recon_mb_neon);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef  vp8_recon_recon
+#define vp8_recon_recon vp8_recon_b_neon
+
+#undef  vp8_recon_recon2
+#define vp8_recon_recon2 vp8_recon2b_neon
+
+#undef  vp8_recon_recon4
+#define vp8_recon_recon4 vp8_recon4b_neon
+
+#undef  vp8_recon_copy8x8
+#define vp8_recon_copy8x8 vp8_copy_mem8x8_neon
+
+#undef  vp8_recon_copy8x4
+#define vp8_recon_copy8x4 vp8_copy_mem8x4_neon
+
+#undef  vp8_recon_copy16x16
+#define vp8_recon_copy16x16 vp8_copy_mem16x16_neon
+
+#undef  vp8_recon_recon_mb
+#define vp8_recon_recon_mb vp8_recon_mb_neon
+#endif
+#endif
+
+#endif
--- a/media/libvpx/vp8/common/arm/reconintra_arm.c
+++ b/media/libvpx/vp8/common/arm/reconintra_arm.c
@ -0,0 +1,62 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "blockd.h"
+#include "reconintra.h"
+#include "vpx_mem/vpx_mem.h"
+#include "recon.h"
+
+#if HAVE_ARMV7
+extern void vp8_build_intra_predictors_mby_neon_func(
+    unsigned char *y_buffer,
+    unsigned char *ypred_ptr,
+    int y_stride,
+    int mode,
+    int Up,
+    int Left);
+
+void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x)
+{
+    unsigned char *y_buffer = x->dst.y_buffer;
+    unsigned char *ypred_ptr = x->predictor;
+    int y_stride = x->dst.y_stride;
+    int mode = x->mode_info_context->mbmi.mode;
+    int Up = x->up_available;
+    int Left = x->left_available;
+
+    vp8_build_intra_predictors_mby_neon_func(y_buffer, ypred_ptr, y_stride, mode, Up, Left);
+}
+#endif
+
+
+#if HAVE_ARMV7
+extern void vp8_build_intra_predictors_mby_s_neon_func(
+    unsigned char *y_buffer,
+    unsigned char *ypred_ptr,
+    int y_stride,
+    int mode,
+    int Up,
+    int Left);
+
+void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x)
+{
+    unsigned char *y_buffer = x->dst.y_buffer;
+    unsigned char *ypred_ptr = x->predictor;
+    int y_stride = x->dst.y_stride;
+    int mode = x->mode_info_context->mbmi.mode;
+    int Up = x->up_available;
+    int Left = x->left_available;
+
+    vp8_build_intra_predictors_mby_s_neon_func(y_buffer, ypred_ptr, y_stride, mode, Up, Left);
+}
+
+#endif
--- a/media/libvpx/vp8/common/arm/subpixel_arm.h
+++ b/media/libvpx/vp8/common/arm/subpixel_arm.h
@ -0,0 +1,89 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef SUBPIXEL_ARM_H
+#define SUBPIXEL_ARM_H
+
+#if HAVE_ARMV6
+extern prototype_subpixel_predict(vp8_sixtap_predict16x16_armv6);
+extern prototype_subpixel_predict(vp8_sixtap_predict8x8_armv6);
+extern prototype_subpixel_predict(vp8_sixtap_predict8x4_armv6);
+extern prototype_subpixel_predict(vp8_sixtap_predict_armv6);
+extern prototype_subpixel_predict(vp8_bilinear_predict16x16_armv6);
+extern prototype_subpixel_predict(vp8_bilinear_predict8x8_armv6);
+extern prototype_subpixel_predict(vp8_bilinear_predict8x4_armv6);
+extern prototype_subpixel_predict(vp8_bilinear_predict4x4_armv6);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef  vp8_subpix_sixtap16x16
+#define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_armv6
+
+#undef  vp8_subpix_sixtap8x8
+#define vp8_subpix_sixtap8x8 vp8_sixtap_predict8x8_armv6
+
+#undef  vp8_subpix_sixtap8x4
+#define vp8_subpix_sixtap8x4 vp8_sixtap_predict8x4_armv6
+
+#undef  vp8_subpix_sixtap4x4
+#define vp8_subpix_sixtap4x4 vp8_sixtap_predict_armv6
+
+#undef  vp8_subpix_bilinear16x16
+#define vp8_subpix_bilinear16x16 vp8_bilinear_predict16x16_armv6
+
+#undef  vp8_subpix_bilinear8x8
+#define vp8_subpix_bilinear8x8 vp8_bilinear_predict8x8_armv6
+
+#undef  vp8_subpix_bilinear8x4
+#define vp8_subpix_bilinear8x4 vp8_bilinear_predict8x4_armv6
+
+#undef  vp8_subpix_bilinear4x4
+#define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_armv6
+#endif
+#endif
+
+#if HAVE_ARMV7
+extern prototype_subpixel_predict(vp8_sixtap_predict16x16_neon);
+extern prototype_subpixel_predict(vp8_sixtap_predict8x8_neon);
+extern prototype_subpixel_predict(vp8_sixtap_predict8x4_neon);
+extern prototype_subpixel_predict(vp8_sixtap_predict_neon);
+extern prototype_subpixel_predict(vp8_bilinear_predict16x16_neon);
+extern prototype_subpixel_predict(vp8_bilinear_predict8x8_neon);
+extern prototype_subpixel_predict(vp8_bilinear_predict8x4_neon);
+extern prototype_subpixel_predict(vp8_bilinear_predict4x4_neon);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef  vp8_subpix_sixtap16x16
+#define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_neon
+
+#undef  vp8_subpix_sixtap8x8
+#define vp8_subpix_sixtap8x8 vp8_sixtap_predict8x8_neon
+
+#undef  vp8_subpix_sixtap8x4
+#define vp8_subpix_sixtap8x4 vp8_sixtap_predict8x4_neon
+
+#undef  vp8_subpix_sixtap4x4
+#define vp8_subpix_sixtap4x4 vp8_sixtap_predict_neon
+
+#undef  vp8_subpix_bilinear16x16
+#define vp8_subpix_bilinear16x16 vp8_bilinear_predict16x16_neon
+
+#undef  vp8_subpix_bilinear8x8
+#define vp8_subpix_bilinear8x8 vp8_bilinear_predict8x8_neon
+
+#undef  vp8_subpix_bilinear8x4
+#define vp8_subpix_bilinear8x4 vp8_bilinear_predict8x4_neon
+
+#undef  vp8_subpix_bilinear4x4
+#define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_neon
+#endif
+#endif
+
+#endif
--- a/media/libvpx/vp8/common/arm/vpx_asm_offsets.c
+++ b/media/libvpx/vp8/common/arm/vpx_asm_offsets.c
@ -0,0 +1,87 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include <stddef.h>
+
+#if CONFIG_VP8_ENCODER
+#include "vpx_scale/yv12config.h"
+#endif
+
+#if CONFIG_VP8_DECODER
+#include "onyxd_int.h"
+#endif
+
+#define DEFINE(sym, val) int sym = val;
+
+/*
+#define BLANK() asm volatile("\n->" : : )
+*/
+
+/*
+ * int main(void)
+ * {
+ */
+
+#if CONFIG_VP8_DECODER || CONFIG_VP8_ENCODER
+DEFINE(yv12_buffer_config_y_width,              offsetof(YV12_BUFFER_CONFIG, y_width));
+DEFINE(yv12_buffer_config_y_height,             offsetof(YV12_BUFFER_CONFIG, y_height));
+DEFINE(yv12_buffer_config_y_stride,             offsetof(YV12_BUFFER_CONFIG, y_stride));
+DEFINE(yv12_buffer_config_uv_width,             offsetof(YV12_BUFFER_CONFIG, uv_width));
+DEFINE(yv12_buffer_config_uv_height,            offsetof(YV12_BUFFER_CONFIG, uv_height));
+DEFINE(yv12_buffer_config_uv_stride,            offsetof(YV12_BUFFER_CONFIG, uv_stride));
+DEFINE(yv12_buffer_config_y_buffer,             offsetof(YV12_BUFFER_CONFIG, y_buffer));
+DEFINE(yv12_buffer_config_u_buffer,             offsetof(YV12_BUFFER_CONFIG, u_buffer));
+DEFINE(yv12_buffer_config_v_buffer,             offsetof(YV12_BUFFER_CONFIG, v_buffer));
+DEFINE(yv12_buffer_config_border,               offsetof(YV12_BUFFER_CONFIG, border));
+#endif
+
+#if CONFIG_VP8_DECODER
+DEFINE(mb_diff,                                 offsetof(MACROBLOCKD, diff));
+DEFINE(mb_predictor,                            offsetof(MACROBLOCKD, predictor));
+DEFINE(mb_dst_y_stride,                         offsetof(MACROBLOCKD, dst.y_stride));
+DEFINE(mb_dst_y_buffer,                         offsetof(MACROBLOCKD, dst.y_buffer));
+DEFINE(mb_dst_u_buffer,                         offsetof(MACROBLOCKD, dst.u_buffer));
+DEFINE(mb_dst_v_buffer,                         offsetof(MACROBLOCKD, dst.v_buffer));
+DEFINE(mb_up_available,                         offsetof(MACROBLOCKD, up_available));
+DEFINE(mb_left_available,                       offsetof(MACROBLOCKD, left_available));
+
+DEFINE(detok_scan,                              offsetof(DETOK, scan));
+DEFINE(detok_ptr_block2leftabove,               offsetof(DETOK, ptr_block2leftabove));
+DEFINE(detok_coef_tree_ptr,                     offsetof(DETOK, vp8_coef_tree_ptr));
+DEFINE(detok_teb_base_ptr,                      offsetof(DETOK, teb_base_ptr));
+DEFINE(detok_norm_ptr,                          offsetof(DETOK, norm_ptr));
+DEFINE(detok_ptr_coef_bands_x,                  offsetof(DETOK, ptr_coef_bands_x));
+
+DEFINE(detok_A,                                 offsetof(DETOK, A));
+DEFINE(detok_L,                                 offsetof(DETOK, L));
+
+DEFINE(detok_qcoeff_start_ptr,                  offsetof(DETOK, qcoeff_start_ptr));
+DEFINE(detok_current_bc,                        offsetof(DETOK, current_bc));
+DEFINE(detok_coef_probs,                        offsetof(DETOK, coef_probs));
+DEFINE(detok_eob,                               offsetof(DETOK, eob));
+
+DEFINE(bool_decoder_user_buffer_end,            offsetof(BOOL_DECODER, user_buffer_end));
+DEFINE(bool_decoder_user_buffer,                offsetof(BOOL_DECODER, user_buffer));
+DEFINE(bool_decoder_value,                      offsetof(BOOL_DECODER, value));
+DEFINE(bool_decoder_count,                      offsetof(BOOL_DECODER, count));
+DEFINE(bool_decoder_range,                      offsetof(BOOL_DECODER, range));
+
+DEFINE(tokenextrabits_min_val,                  offsetof(TOKENEXTRABITS, min_val));
+DEFINE(tokenextrabits_length,                   offsetof(TOKENEXTRABITS, Length));
+#endif
+
+//add asserts for any offset that is not supported by assembly code
+//add asserts for any size that is not supported by assembly code
+/*
+ * return 0;
+ * }
+ */
--- a/media/libvpx/vp8/common/blockd.c
+++ b/media/libvpx/vp8/common/blockd.c
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
--- a/media/libvpx/vp8/common/blockd.h
+++ b/media/libvpx/vp8/common/blockd.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -24,7 +24,7 @@ void vpx_log(const char *format, ...);
 #define TRUE    1
 #define FALSE   0

-//#define DCPRED 1
+/*#define DCPRED 1*/
 #define DCPREDSIMTHRESH 0
 #define DCPREDCNTTHRESH 3

@ -39,7 +39,7 @@ void vpx_log(const char *format, ...);
 #define MAX_REF_LF_DELTAS       4
 #define MAX_MODE_LF_DELTAS      4

-// Segment Feature Masks
+/* Segment Feature Masks */
 #define SEGMENT_DELTADATA   0
 #define SEGMENT_ABSDATA     1

@ -75,11 +75,11 @@ typedef enum

 typedef enum
 {
-    DC_PRED,            // average of above and left pixels
-    V_PRED,             // vertical prediction
-    H_PRED,             // horizontal prediction
-    TM_PRED,            // Truemotion prediction
-    B_PRED,             // block based prediction, each block has its own prediction mode
+    DC_PRED,            /* average of above and left pixels */
+    V_PRED,             /* vertical prediction */
+    H_PRED,             /* horizontal prediction */
+    TM_PRED,            /* Truemotion prediction */
+    B_PRED,             /* block based prediction, each block has its own prediction mode */

    NEARESTMV,
    NEARMV,
@ -90,16 +90,16 @@ typedef enum
    MB_MODE_COUNT
 } MB_PREDICTION_MODE;

-// Macroblock level features
+/* Macroblock level features */
 typedef enum
 {
-    MB_LVL_ALT_Q = 0,               // Use alternate Quantizer ....
-    MB_LVL_ALT_LF = 1,              // Use alternate loop filter value...
-    MB_LVL_MAX = 2                 // Number of MB level features supported
+    MB_LVL_ALT_Q = 0,               /* Use alternate Quantizer .... */
+    MB_LVL_ALT_LF = 1,              /* Use alternate loop filter value... */
+    MB_LVL_MAX = 2                  /* Number of MB level features supported */

 } MB_LVL_FEATURES;

-// Segment Feature Masks
+/* Segment Feature Masks */
 #define SEGMENT_ALTQ    0x01
 #define SEGMENT_ALT_LF  0x02

@ -110,11 +110,11 @@ typedef enum

 typedef enum
 {
-    B_DC_PRED,          // average of above and left pixels
+    B_DC_PRED,          /* average of above and left pixels */
    B_TM_PRED,

-    B_VE_PRED,           // vertical prediction
-    B_HE_PRED,           // horizontal prediction
+    B_VE_PRED,           /* vertical prediction */
+    B_HE_PRED,           /* horizontal prediction */

    B_LD_PRED,
    B_RD_PRED,
@ -168,14 +168,15 @@ typedef struct
        int as_int;
        MV  as_mv;
    } mv;
-    int partitioning;
-    int partition_count;
-    int mb_skip_coeff;                                //does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens
-    int dc_diff;
-    unsigned char   segment_id;                  // Which set of segmentation parameters should be used for this MB
-    int force_no_skip;
-    int need_to_clamp_mvs;
-    B_MODE_INFO partition_bmi[16];
+
+    unsigned char partitioning;
+    unsigned char mb_skip_coeff;                                /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
+    unsigned char dc_diff;
+    unsigned char need_to_clamp_mvs;
+
+    unsigned char segment_id;                  /* Which set of segmentation parameters should be used for this MB */
+
+    unsigned char force_no_skip; /* encoder only */
 } MB_MODE_INFO;


@ -194,9 +195,9 @@ typedef struct
    short *diff;
    short *reference;

-    short(*dequant)[4];
+    short *dequant;

-    // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
+    /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */
    unsigned char **base_pre;
    int pre;
    int pre_stride;
@ -213,22 +214,20 @@ typedef struct

 typedef struct
 {
-    DECLARE_ALIGNED(16, short, diff[400]);      // from idct diff
+    DECLARE_ALIGNED(16, short, diff[400]);      /* from idct diff */
    DECLARE_ALIGNED(16, unsigned char,  predictor[384]);
-//not used    DECLARE_ALIGNED(16, short, reference[384]);
+/* not used    DECLARE_ALIGNED(16, short, reference[384]); */
    DECLARE_ALIGNED(16, short, qcoeff[400]);
    DECLARE_ALIGNED(16, short, dqcoeff[400]);
    DECLARE_ALIGNED(16, char,  eobs[25]);

-    // 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries.
+    /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
    BLOCKD block[25];

-    YV12_BUFFER_CONFIG pre; // Filtered copy of previous frame reconstruction
+    YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
    YV12_BUFFER_CONFIG dst;

    MODE_INFO *mode_info_context;
-    MODE_INFO *mode_info;
-
    int mode_info_stride;

    FRAME_TYPE frame_type;
@ -236,39 +235,39 @@ typedef struct
    int up_available;
    int left_available;

-    // Y,U,V,Y2
+    /* Y,U,V,Y2 */
    ENTROPY_CONTEXT_PLANES *above_context;
    ENTROPY_CONTEXT_PLANES *left_context;

-    // 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active.
+    /* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */
    unsigned char segmentation_enabled;

-    // 0 (do not update) 1 (update) the macroblock segmentation map.
+    /* 0 (do not update) 1 (update) the macroblock segmentation map. */
    unsigned char update_mb_segmentation_map;

-    // 0 (do not update) 1 (update) the macroblock segmentation feature data.
+    /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
    unsigned char update_mb_segmentation_data;

-    // 0 (do not update) 1 (update) the macroblock segmentation feature data.
+    /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
    unsigned char mb_segement_abs_delta;

-    // Per frame flags that define which MB level features (such as quantizer or loop filter level)
-    // are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO
-    vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS];         // Probability Tree used to code Segment number
+    /* Per frame flags that define which MB level features (such as quantizer or loop filter level) */
+    /* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */
+    vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS];         /* Probability Tree used to code Segment number */

-    signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];            // Segment parameters
+    signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];            /* Segment parameters */

-    // mode_based Loop filter adjustment
+    /* mode_based Loop filter adjustment */
    unsigned char mode_ref_lf_delta_enabled;
    unsigned char mode_ref_lf_delta_update;

-    // Delta values have the range +/- MAX_LOOP_FILTER
-    //char ref_lf_deltas[MAX_REF_LF_DELTAS];                      // 0 = Intra, Last, GF, ARF
-    //char mode_lf_deltas[MAX_MODE_LF_DELTAS];                            // 0 = BPRED, ZERO_MV, MV, SPLIT
-    signed char ref_lf_deltas[MAX_REF_LF_DELTAS];                     // 0 = Intra, Last, GF, ARF
-    signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];                           // 0 = BPRED, ZERO_MV, MV, SPLIT
+    /* Delta values have the range +/- MAX_LOOP_FILTER */
+    signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS];                /* 0 = Intra, Last, GF, ARF */
+    signed char ref_lf_deltas[MAX_REF_LF_DELTAS];                     /* 0 = Intra, Last, GF, ARF */
+    signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];                      /* 0 = BPRED, ZERO_MV, MV, SPLIT */
+    signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];                           /* 0 = BPRED, ZERO_MV, MV, SPLIT */

-    // Distance of MB away from frame edges
+    /* Distance of MB away from frame edges */
    int mb_to_left_edge;
    int mb_to_right_edge;
    int mb_to_top_edge;
--- a/media/libvpx/vp8/common/coefupdateprobs.h
+++ b/media/libvpx/vp8/common/coefupdateprobs.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
--- a/media/libvpx/vp8/common/common.h
+++ b/media/libvpx/vp8/common/common.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
--- a/media/libvpx/vp8/common/common_types.h
+++ b/media/libvpx/vp8/common/common_types.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
--- a/media/libvpx/vp8/common/debugmodes.c
+++ b/media/libvpx/vp8/common/debugmodes.c
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -21,7 +21,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
    int mb_index = 0;
    FILE *mvs = fopen("mvs.stt", "a");

-    // print out the macroblock Y modes
+    /* print out the macroblock Y modes */
    mb_index = 0;
    fprintf(mvs, "Mb Modes for Frame %d\n", frame);

@ -60,7 +60,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f

    fprintf(mvs, "\n");

-    // print out the macroblock UV modes
+    /* print out the macroblock UV modes */
    mb_index = 0;
    fprintf(mvs, "UV Modes for Frame %d\n", frame);

@ -80,7 +80,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f

    fprintf(mvs, "\n");

-    // print out the block modes
+    /* print out the block modes */
    mb_index = 0;
    fprintf(mvs, "Mbs for Frame %d\n", frame);
    {
@ -108,7 +108,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
    }
    fprintf(mvs, "\n");

-    // print out the macroblock mvs
+    /* print out the macroblock mvs */
    mb_index = 0;
    fprintf(mvs, "MVs for Frame %d\n", frame);

@ -128,7 +128,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
    fprintf(mvs, "\n");


-    // print out the block modes
+    /* print out the block modes */
    mb_index = 0;
    fprintf(mvs, "MVs for Frame %d\n", frame);
    {
--- a/media/libvpx/vp8/common/defaultcoefcounts.h
+++ b/media/libvpx/vp8/common/defaultcoefcounts.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -15,204 +15,204 @@ static const unsigned int default_coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_C
 {

    {
-        // Block Type ( 0 )
+        /* Block Type ( 0 ) */
        {
-            // Coeff Band ( 0 )
+            /* Coeff Band ( 0 ) */
            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
        },
        {
-            // Coeff Band ( 1 )
+            /* Coeff Band ( 1 ) */
            {30190, 26544, 225,  24,   4,   0,   0,   0,   0,   0,   0, 4171593,},
            {26846, 25157, 1241, 130,  26,   6,   1,   0,   0,   0,   0, 149987,},
            {10484, 9538, 1006, 160,  36,  18,   0,   0,   0,   0,   0, 15104,},
        },
        {
-            // Coeff Band ( 2 )
+            /* Coeff Band ( 2 ) */
            {25842, 40456, 1126,  83,  11,   2,   0,   0,   0,   0,   0,   0,},
            {9338, 8010, 512,  73,   7,   3,   2,   0,   0,   0,   0, 43294,},
            {1047, 751, 149,  31,  13,   6,   1,   0,   0,   0,   0, 879,},
        },
        {
-            // Coeff Band ( 3 )
+            /* Coeff Band ( 3 ) */
            {26136, 9826, 252,  13,   0,   0,   0,   0,   0,   0,   0,   0,},
            {8134, 5574, 191,  14,   2,   0,   0,   0,   0,   0,   0, 35302,},
            { 605, 677, 116,   9,   1,   0,   0,   0,   0,   0,   0, 611,},
        },
        {
-            // Coeff Band ( 4 )
+            /* Coeff Band ( 4 ) */
            {10263, 15463, 283,  17,   0,   0,   0,   0,   0,   0,   0,   0,},
            {2773, 2191, 128,   9,   2,   2,   0,   0,   0,   0,   0, 10073,},
            { 134, 125,  32,   4,   0,   2,   0,   0,   0,   0,   0,  50,},
        },
        {
-            // Coeff Band ( 5 )
+            /* Coeff Band ( 5 ) */
            {10483, 2663,  23,   1,   0,   0,   0,   0,   0,   0,   0,   0,},
            {2137, 1251,  27,   1,   1,   0,   0,   0,   0,   0,   0, 14362,},
            { 116, 156,  14,   2,   1,   0,   0,   0,   0,   0,   0, 190,},
        },
        {
-            // Coeff Band ( 6 )
+            /* Coeff Band ( 6 ) */
            {40977, 27614, 412,  28,   0,   0,   0,   0,   0,   0,   0,   0,},
            {6113, 5213, 261,  22,   3,   0,   0,   0,   0,   0,   0, 26164,},
            { 382, 312,  50,  14,   2,   0,   0,   0,   0,   0,   0, 345,},
        },
        {
-            // Coeff Band ( 7 )
+            /* Coeff Band ( 7 ) */
            {   0,  26,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
            {   0,  13,   0,   0,   0,   0,   0,   0,   0,   0,   0, 319,},
            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   8,},
        },
    },
    {
-        // Block Type ( 1 )
+        /* Block Type ( 1 ) */
        {
-            // Coeff Band ( 0 )
+            /* Coeff Band ( 0 ) */
            {3268, 19382, 1043, 250,  93,  82,  49,  26,  17,   8,  25, 82289,},
            {8758, 32110, 5436, 1832, 827, 668, 420, 153,  24,   0,   3, 52914,},
            {9337, 23725, 8487, 3954, 2107, 1836, 1069, 399,  59,   0,   0, 18620,},
        },
        {
-            // Coeff Band ( 1 )
+            /* Coeff Band ( 1 ) */
            {12419, 8420, 452,  62,   9,   1,   0,   0,   0,   0,   0,   0,},
            {11715, 8705, 693,  92,  15,   7,   2,   0,   0,   0,   0, 53988,},
            {7603, 8585, 2306, 778, 270, 145,  39,   5,   0,   0,   0, 9136,},
        },
        {
-            // Coeff Band ( 2 )
+            /* Coeff Band ( 2 ) */
            {15938, 14335, 1207, 184,  55,  13,   4,   1,   0,   0,   0,   0,},
            {7415, 6829, 1138, 244,  71,  26,   7,   0,   0,   0,   0, 9980,},
            {1580, 1824, 655, 241,  89,  46,  10,   2,   0,   0,   0, 429,},
        },
        {
-            // Coeff Band ( 3 )
+            /* Coeff Band ( 3 ) */
            {19453, 5260, 201,  19,   0,   0,   0,   0,   0,   0,   0,   0,},
            {9173, 3758, 213,  22,   1,   1,   0,   0,   0,   0,   0, 9820,},
            {1689, 1277, 276,  51,  17,   4,   0,   0,   0,   0,   0, 679,},
        },
        {
-            // Coeff Band ( 4 )
+            /* Coeff Band ( 4 ) */
            {12076, 10667, 620,  85,  19,   9,   5,   0,   0,   0,   0,   0,},
            {4665, 3625, 423,  55,  19,   9,   0,   0,   0,   0,   0, 5127,},
            { 415, 440, 143,  34,  20,   7,   2,   0,   0,   0,   0, 101,},
        },
        {
-            // Coeff Band ( 5 )
+            /* Coeff Band ( 5 ) */
            {12183, 4846, 115,  11,   1,   0,   0,   0,   0,   0,   0,   0,},
            {4226, 3149, 177,  21,   2,   0,   0,   0,   0,   0,   0, 7157,},
            { 375, 621, 189,  51,  11,   4,   1,   0,   0,   0,   0, 198,},
        },
        {
-            // Coeff Band ( 6 )
+            /* Coeff Band ( 6 ) */
            {61658, 37743, 1203,  94,  10,   3,   0,   0,   0,   0,   0,   0,},
            {15514, 11563, 903, 111,  14,   5,   0,   0,   0,   0,   0, 25195,},
            { 929, 1077, 291,  78,  14,   7,   1,   0,   0,   0,   0, 507,},
        },
        {
-            // Coeff Band ( 7 )
+            /* Coeff Band ( 7 ) */
            {   0, 990,  15,   3,   0,   0,   0,   0,   0,   0,   0,   0,},
            {   0, 412,  13,   0,   0,   0,   0,   0,   0,   0,   0, 1641,},
            {   0,  18,   7,   1,   0,   0,   0,   0,   0,   0,   0,  30,},
        },
    },
    {
-        // Block Type ( 2 )
+        /* Block Type ( 2 ) */
        {
-            // Coeff Band ( 0 )
+            /* Coeff Band ( 0 ) */
            { 953, 24519, 628, 120,  28,  12,   4,   0,   0,   0,   0, 2248798,},
            {1525, 25654, 2647, 617, 239, 143,  42,   5,   0,   0,   0, 66837,},
            {1180, 11011, 3001, 1237, 532, 448, 239,  54,   5,   0,   0, 7122,},
        },
        {
-            // Coeff Band ( 1 )
+            /* Coeff Band ( 1 ) */
            {1356, 2220,  67,  10,   4,   1,   0,   0,   0,   0,   0,   0,},
            {1450, 2544, 102,  18,   4,   3,   0,   0,   0,   0,   0, 57063,},
            {1182, 2110, 470, 130,  41,  21,   0,   0,   0,   0,   0, 6047,},
        },
        {
-            // Coeff Band ( 2 )
+            /* Coeff Band ( 2 ) */
            { 370, 3378, 200,  30,   5,   4,   1,   0,   0,   0,   0,   0,},
            { 293, 1006, 131,  29,  11,   0,   0,   0,   0,   0,   0, 5404,},
            { 114, 387,  98,  23,   4,   8,   1,   0,   0,   0,   0, 236,},
        },
        {
-            // Coeff Band ( 3 )
+            /* Coeff Band ( 3 ) */
            { 579, 194,   4,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
            { 395, 213,   5,   1,   0,   0,   0,   0,   0,   0,   0, 4157,},
            { 119, 122,   4,   0,   0,   0,   0,   0,   0,   0,   0, 300,},
        },
        {
-            // Coeff Band ( 4 )
+            /* Coeff Band ( 4 ) */
            {  38, 557,  19,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
            {  21, 114,  12,   1,   0,   0,   0,   0,   0,   0,   0, 427,},
            {   0,   5,   0,   0,   0,   0,   0,   0,   0,   0,   0,   7,},
        },
        {
-            // Coeff Band ( 5 )
+            /* Coeff Band ( 5 ) */
            {  52,   7,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
            {  18,   6,   0,   0,   0,   0,   0,   0,   0,   0,   0, 652,},
            {   1,   1,   0,   0,   0,   0,   0,   0,   0,   0,   0,  30,},
        },
        {
-            // Coeff Band ( 6 )
+            /* Coeff Band ( 6 ) */
            { 640, 569,  10,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
            {  25,  77,   2,   0,   0,   0,   0,   0,   0,   0,   0, 517,},
            {   4,   7,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,},
        },
        {
-            // Coeff Band ( 7 )
+            /* Coeff Band ( 7 ) */
            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
        },
    },
    {
-        // Block Type ( 3 )
+        /* Block Type ( 3 ) */
        {
-            // Coeff Band ( 0 )
+            /* Coeff Band ( 0 ) */
            {2506, 20161, 2707, 767, 261, 178, 107,  30,  14,   3,   0, 100694,},
            {8806, 36478, 8817, 3268, 1280, 850, 401, 114,  42,   0,   0, 58572,},
            {11003, 27214, 11798, 5716, 2482, 2072, 1048, 175,  32,   0,   0, 19284,},
        },
        {
-            // Coeff Band ( 1 )
+            /* Coeff Band ( 1 ) */
            {9738, 11313, 959, 205,  70,  18,  11,   1,   0,   0,   0,   0,},
            {12628, 15085, 1507, 273,  52,  19,   9,   0,   0,   0,   0, 54280,},
            {10701, 15846, 5561, 1926, 813, 570, 249,  36,   0,   0,   0, 6460,},
        },
        {
-            // Coeff Band ( 2 )
+            /* Coeff Band ( 2 ) */
            {6781, 22539, 2784, 634, 182, 123,  20,   4,   0,   0,   0,   0,},
            {6263, 11544, 2649, 790, 259, 168,  27,   5,   0,   0,   0, 20539,},
            {3109, 4075, 2031, 896, 457, 386, 158,  29,   0,   0,   0, 1138,},
        },
        {
-            // Coeff Band ( 3 )
+            /* Coeff Band ( 3 ) */
            {11515, 4079, 465,  73,   5,  14,   2,   0,   0,   0,   0,   0,},
            {9361, 5834, 650,  96,  24,   8,   4,   0,   0,   0,   0, 22181,},
            {4343, 3974, 1360, 415, 132,  96,  14,   1,   0,   0,   0, 1267,},
        },
        {
-            // Coeff Band ( 4 )
+            /* Coeff Band ( 4 ) */
            {4787, 9297, 823, 168,  44,  12,   4,   0,   0,   0,   0,   0,},
            {3619, 4472, 719, 198,  60,  31,   3,   0,   0,   0,   0, 8401,},
            {1157, 1175, 483, 182,  88,  31,   8,   0,   0,   0,   0, 268,},
        },
        {
-            // Coeff Band ( 5 )
+            /* Coeff Band ( 5 ) */
            {8299, 1226,  32,   5,   1,   0,   0,   0,   0,   0,   0,   0,},
            {3502, 1568,  57,   4,   1,   1,   0,   0,   0,   0,   0, 9811,},
            {1055, 1070, 166,  29,   6,   1,   0,   0,   0,   0,   0, 527,},
        },
        {
-            // Coeff Band ( 6 )
+            /* Coeff Band ( 6 ) */
            {27414, 27927, 1989, 347,  69,  26,   0,   0,   0,   0,   0,   0,},
            {5876, 10074, 1574, 341,  91,  24,   4,   0,   0,   0,   0, 21954,},
            {1571, 2171, 778, 324, 124,  65,  16,   0,   0,   0,   0, 979,},
        },
        {
-            // Coeff Band ( 7 )
+            /* Coeff Band ( 7 ) */
            {   0,  29,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
            {   0,  23,   0,   0,   0,   0,   0,   0,   0,   0,   0, 459,},
            {   0,   1,   0,   0,   0,   0,   0,   0,   0,   0,   0,  13,},
--- a/media/libvpx/vp8/common/entropy.c
+++ b/media/libvpx/vp8/common/entropy.c
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
--- a/media/libvpx/vp8/common/entropy.h
+++ b/media/libvpx/vp8/common/entropy.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -17,18 +17,18 @@

 /* Coefficient token alphabet */

-#define ZERO_TOKEN              0       //0         Extra Bits 0+0
-#define ONE_TOKEN               1       //1         Extra Bits 0+1
-#define TWO_TOKEN               2       //2         Extra Bits 0+1
-#define THREE_TOKEN             3       //3         Extra Bits 0+1
-#define FOUR_TOKEN              4       //4         Extra Bits 0+1
-#define DCT_VAL_CATEGORY1       5       //5-6       Extra Bits 1+1
-#define DCT_VAL_CATEGORY2       6       //7-10      Extra Bits 2+1
-#define DCT_VAL_CATEGORY3       7       //11-26     Extra Bits 4+1
-#define DCT_VAL_CATEGORY4       8       //11-26     Extra Bits 5+1
-#define DCT_VAL_CATEGORY5       9       //27-58     Extra Bits 5+1
-#define DCT_VAL_CATEGORY6       10      //59+       Extra Bits 11+1
-#define DCT_EOB_TOKEN           11      //EOB       Extra Bits 0+0
+#define ZERO_TOKEN              0       /* 0         Extra Bits 0+0 */
+#define ONE_TOKEN               1       /* 1         Extra Bits 0+1 */
+#define TWO_TOKEN               2       /* 2         Extra Bits 0+1 */
+#define THREE_TOKEN             3       /* 3         Extra Bits 0+1 */
+#define FOUR_TOKEN              4       /* 4         Extra Bits 0+1 */
+#define DCT_VAL_CATEGORY1       5       /* 5-6       Extra Bits 1+1 */
+#define DCT_VAL_CATEGORY2       6       /* 7-10      Extra Bits 2+1 */
+#define DCT_VAL_CATEGORY3       7       /* 11-26     Extra Bits 4+1 */
+#define DCT_VAL_CATEGORY4       8       /* 11-26     Extra Bits 5+1 */
+#define DCT_VAL_CATEGORY5       9       /* 27-58     Extra Bits 5+1 */
+#define DCT_VAL_CATEGORY6       10      /* 59+       Extra Bits 11+1 */
+#define DCT_EOB_TOKEN           11      /* EOB       Extra Bits 0+0 */

 #define vp8_coef_tokens 12
 #define MAX_ENTROPY_TOKENS vp8_coef_tokens
@ -83,7 +83,7 @@ extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]);
   coefficient band (and since zigzag positions 0, 1, and 2 are in
   distinct bands). */

-/*# define DC_TOKEN_CONTEXTS        3 // 00, 0!0, !0!0 */
+/*# define DC_TOKEN_CONTEXTS        3*/ /* 00, 0!0, !0!0 */
 #   define PREV_COEF_CONTEXTS       3

 extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[vp8_coef_tokens]);
--- a/media/libvpx/vp8/common/entropymode.c
+++ b/media/libvpx/vp8/common/entropymode.c
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
--- a/media/libvpx/vp8/common/entropymode.h
+++ b/media/libvpx/vp8/common/entropymode.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
--- a/media/libvpx/vp8/common/entropymv.c
+++ b/media/libvpx/vp8/common/entropymv.c
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -29,21 +29,21 @@ const MV_CONTEXT vp8_mv_update_probs[2] =
 const MV_CONTEXT vp8_default_mv_context[2] =
 {
    {{
-        // row
-        162,                                        // is short
-        128,                                        // sign
-        225, 146, 172, 147, 214,  39, 156,          // short tree
-        128, 129, 132,  75, 145, 178, 206, 239, 254, 254 // long bits
+        /* row */
+        162,                                        /* is short */
+        128,                                        /* sign */
+        225, 146, 172, 147, 214,  39, 156,          /* short tree */
+        128, 129, 132,  75, 145, 178, 206, 239, 254, 254 /* long bits */
    }},



    {{
-        // same for column
-        164,                                        // is short
+        /* same for column */
+        164,                                        /* is short */
        128,
        204, 170, 119, 235, 140, 230, 228,
-        128, 130, 130,  74, 148, 180, 203, 236, 254, 254 // long bits
+        128, 130, 130,  74, 148, 180, 203, 236, 254, 254 /* long bits */

    }}
 };
--- a/media/libvpx/vp8/common/entropymv.h
+++ b/media/libvpx/vp8/common/entropymv.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
--- a/media/libvpx/vp8/common/extend.c
+++ b/media/libvpx/vp8/common/extend.c
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -15,14 +15,14 @@

 static void extend_plane_borders
 (
-    unsigned char *s, // source
-    int sp,           // pitch
-    int h,            // height
-    int w,            // width
-    int et,           // extend top border
-    int el,           // extend left border
-    int eb,           // extend bottom border
-    int er            // extend right border
+    unsigned char *s, /* source */
+    int sp,           /* pitch */
+    int h,            /* height */
+    int w,            /* width */
+    int et,           /* extend top border */
+    int el,           /* extend left border */
+    int eb,           /* extend bottom border */
+    int er            /* extend right border */
 )
 {

@ -31,7 +31,7 @@ static void extend_plane_borders
    unsigned char *dest_ptr1, *dest_ptr2;
    int linesize;

-    // copy the left and right most columns out
+    /* copy the left and right most columns out */
    src_ptr1 = s;
    src_ptr2 = s + w - 1;
    dest_ptr1 = s - el;
@ -39,8 +39,9 @@ static void extend_plane_borders

    for (i = 0; i < h - 0 + 1; i++)
    {
-        // Some linkers will complain if we call vpx_memset with el set to a
-        // constant 0.
+        /* Some linkers will complain if we call vpx_memset with el set to a
+         * constant 0.
+         */
        if (el)
            vpx_memset(dest_ptr1, src_ptr1[0], el);
        vpx_memset(dest_ptr2, src_ptr2[0], er);
@ -50,7 +51,7 @@ static void extend_plane_borders
        dest_ptr2 += sp;
    }

-    // Now copy the top and bottom source lines into each line of the respective borders
+    /* Now copy the top and bottom source lines into each line of the respective borders */
    src_ptr1 = s - el;
    src_ptr2 = s + sp * (h - 1) - el;
    dest_ptr1 = s + sp * (-et) - el;
@ -76,12 +77,12 @@ void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height)
    int er = 0xf & (16 - (width & 0xf));
    int eb = 0xf & (16 - (height & 0xf));

-    // check for non multiples of 16
+    /* check for non multiples of 16 */
    if (er != 0 || eb != 0)
    {
        extend_plane_borders(ybf->y_buffer, ybf->y_stride, height, width, 0, 0, eb, er);

-        //adjust for uv
+        /* adjust for uv */
        height = (height + 1) >> 1;
        width  = (width  + 1) >> 1;
        er = 0x7 & (8 - (width  & 0x7));
@ -95,7 +96,7 @@ void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height)
    }
 }

-// note the extension is only for the last row, for intra prediction purpose
+/* note the extension is only for the last row, for intra prediction purpose */
 void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr)
 {
    int i;
--- a/media/libvpx/vp8/common/extend.h
+++ b/media/libvpx/vp8/common/extend.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
--- a/media/libvpx/vp8/common/filter_c.c
+++ b/media/libvpx/vp8/common/filter_c.c
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -32,13 +32,13 @@ static const int bilinear_filters[8][2] =
 static const short sub_pel_filters[8][6] =
 {

-    { 0,  0,  128,    0,   0,  0 },         // note that 1/8 pel positions are just as per alpha -0.5 bicubic
+    { 0,  0,  128,    0,   0,  0 },         /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
    { 0, -6,  123,   12,  -1,  0 },
-    { 2, -11, 108,   36,  -8,  1 },         // New 1/4 pel 6 tap filter
+    { 2, -11, 108,   36,  -8,  1 },         /* New 1/4 pel 6 tap filter */
    { 0, -9,   93,   50,  -6,  0 },
-    { 3, -16,  77,   77, -16,  3 },         // New 1/2 pel 6 tap filter
+    { 3, -16,  77,   77, -16,  3 },         /* New 1/2 pel 6 tap filter */
    { 0, -6,   50,   93,  -9,  0 },
-    { 1, -8,   36,  108, -11,  2 },         // New 1/4 pel 6 tap filter
+    { 1, -8,   36,  108, -11,  2 },         /* New 1/4 pel 6 tap filter */
    { 0, -1,   12,  123,  -6,  0 },


@ -69,9 +69,9 @@ void vp8_filter_block2d_first_pass
                   ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
                   ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
                   ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
-                   (VP8_FILTER_WEIGHT >> 1);      // Rounding
+                   (VP8_FILTER_WEIGHT >> 1);      /* Rounding */

-            // Normalize back to 0-255
+            /* Normalize back to 0-255 */
            Temp = Temp >> VP8_FILTER_SHIFT;

            if (Temp < 0)
@ -83,7 +83,7 @@ void vp8_filter_block2d_first_pass
            src_ptr++;
        }

-        // Next row...
+        /* Next row... */
        src_ptr    += src_pixels_per_line - output_width;
        output_ptr += output_width;
    }
@ -108,16 +108,16 @@ void vp8_filter_block2d_second_pass
    {
        for (j = 0; j < output_width; j++)
        {
-            // Apply filter
+            /* Apply filter */
            Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
                   ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
                   ((int)src_ptr[0]                 * vp8_filter[2]) +
                   ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
                   ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
                   ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
-                   (VP8_FILTER_WEIGHT >> 1);   // Rounding
+                   (VP8_FILTER_WEIGHT >> 1);   /* Rounding */

-            // Normalize back to 0-255
+            /* Normalize back to 0-255 */
            Temp = Temp >> VP8_FILTER_SHIFT;

            if (Temp < 0)
@ -129,7 +129,7 @@ void vp8_filter_block2d_second_pass
            src_ptr++;
        }

-        // Start next row
+        /* Start next row */
        src_ptr    += src_pixels_per_line - output_width;
        output_ptr += output_pitch;
    }
@ -146,12 +146,12 @@ void vp8_filter_block2d
    const short  *VFilter
 )
 {
-    int FData[9*4]; // Temp data bufffer used in filtering
+    int FData[9*4]; /* Temp data bufffer used in filtering */

-    // First filter 1-D horizontally...
+    /* First filter 1-D horizontally... */
    vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);

-    // then filter verticaly...
+    /* then filter verticaly... */
    vp8_filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
 }

@ -195,8 +195,8 @@ void vp8_sixtap_predict_c
    const short  *HFilter;
    const short  *VFilter;

-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];   // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];   /* 6 tap */

    vp8_filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
 }
@ -212,16 +212,16 @@ void vp8_sixtap_predict8x8_c
 {
    const short  *HFilter;
    const short  *VFilter;
-    int FData[13*16];   // Temp data bufffer used in filtering
+    int FData[13*16];   /* Temp data bufffer used in filtering */

-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];   // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];   /* 6 tap */

-    // First filter 1-D horizontally...
+    /* First filter 1-D horizontally... */
    vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);


-    // then filter verticaly...
+    /* then filter verticaly... */
    vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);

 }
@ -238,16 +238,16 @@ void vp8_sixtap_predict8x4_c
 {
    const short  *HFilter;
    const short  *VFilter;
-    int FData[13*16];   // Temp data bufffer used in filtering
+    int FData[13*16];   /* Temp data bufffer used in filtering */

-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];   // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];   /* 6 tap */

-    // First filter 1-D horizontally...
+    /* First filter 1-D horizontally... */
    vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);


-    // then filter verticaly...
+    /* then filter verticaly... */
    vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);

 }
@ -264,16 +264,16 @@ void vp8_sixtap_predict16x16_c
 {
    const short  *HFilter;
    const short  *VFilter;
-    int FData[21*24];   // Temp data bufffer used in filtering
+    int FData[21*24];   /* Temp data bufffer used in filtering */


-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];   // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];   /* 6 tap */

-    // First filter 1-D horizontally...
+    /* First filter 1-D horizontally... */
    vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);

-    // then filter verticaly...
+    /* then filter verticaly... */
    vp8_filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);

 }
@ -324,14 +324,14 @@ void vp8_filter_block2d_bil_first_pass
    {
        for (j = 0; j < output_width; j++)
        {
-            // Apply bilinear filter
+            /* Apply bilinear filter */
            output_ptr[j] = (((int)src_ptr[0]          * vp8_filter[0]) +
                             ((int)src_ptr[pixel_step] * vp8_filter[1]) +
                             (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
            src_ptr++;
        }

-        // Next row...
+        /* Next row... */
        src_ptr    += src_pixels_per_line - output_width;
        output_ptr += output_width;
    }
@ -384,7 +384,7 @@ void vp8_filter_block2d_bil_second_pass
    {
        for (j = 0; j < output_width; j++)
        {
-            // Apply filter
+            /* Apply filter */
            Temp = ((int)src_ptr[0]         * vp8_filter[0]) +
                   ((int)src_ptr[pixel_step] * vp8_filter[1]) +
                   (VP8_FILTER_WEIGHT / 2);
@ -392,7 +392,7 @@ void vp8_filter_block2d_bil_second_pass
            src_ptr++;
        }

-        // Next row...
+        /* Next row... */
        src_ptr    += src_pixels_per_line - output_width;
        output_ptr += output_pitch;
    }
@ -432,12 +432,12 @@ void vp8_filter_block2d_bil
 )
 {

-    unsigned short FData[17*16];    // Temp data bufffer used in filtering
+    unsigned short FData[17*16];    /* Temp data bufffer used in filtering */

-    // First filter 1-D horizontally...
+    /* First filter 1-D horizontally... */
    vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, Height + 1, Width, HFilter);

-    // then 1-D vertically...
+    /* then 1-D vertically... */
    vp8_filter_block2d_bil_second_pass(FData, output_ptr, dst_pitch, Width, Width, Height, Width, VFilter);
 }

--- a/media/libvpx/vp8/common/findnearmv.c
+++ b/media/libvpx/vp8/common/findnearmv.c
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -168,7 +168,7 @@ void vp8_find_near_mvs

    vp8_clamp_mv(nearest, xd);
    vp8_clamp_mv(nearby, xd);
-    vp8_clamp_mv(best_mv, xd); //TODO: move this up before the copy
+    vp8_clamp_mv(best_mv, xd); /*TODO: move this up before the copy*/
 }

 vp8_prob *vp8_mv_ref_probs(
@ -179,7 +179,7 @@ vp8_prob *vp8_mv_ref_probs(
    p[1] = vp8_mode_contexts [near_mv_ref_ct[1]] [1];
    p[2] = vp8_mode_contexts [near_mv_ref_ct[2]] [2];
    p[3] = vp8_mode_contexts [near_mv_ref_ct[3]] [3];
-    //p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];
+    /*p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];*/
    return p;
 }

--- a/media/libvpx/vp8/common/findnearmv.h
+++ b/media/libvpx/vp8/common/findnearmv.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
--- a/media/libvpx/vp8/common/g_common.h
+++ b/media/libvpx/vp8/common/g_common.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
--- a/media/libvpx/vp8/common/generic/systemdependent.c
+++ b/media/libvpx/vp8/common/generic/systemdependent.c
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -18,6 +18,7 @@
 #include "onyxc_int.h"

 extern void vp8_arch_x86_common_init(VP8_COMMON *ctx);
+extern void vp8_arch_arm_common_init(VP8_COMMON *ctx);

 void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
 extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
@ -39,9 +40,11 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
    rtcd->recon.copy16x16   = vp8_copy_mem16x16_c;
    rtcd->recon.copy8x8     = vp8_copy_mem8x8_c;
    rtcd->recon.copy8x4     = vp8_copy_mem8x4_c;
-    rtcd->recon.recon      = vp8_recon_b_c;
+    rtcd->recon.recon       = vp8_recon_b_c;
    rtcd->recon.recon2      = vp8_recon2b_c;
-    rtcd->recon.recon4     = vp8_recon4b_c;
+    rtcd->recon.recon4      = vp8_recon4b_c;
+    rtcd->recon.recon_mb    = vp8_recon_mb_c;
+    rtcd->recon.recon_mby   = vp8_recon_mby_c;

    rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_c;
    rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_c;
@ -66,10 +69,11 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
    rtcd->postproc.across      = vp8_mbpost_proc_across_ip_c;
    rtcd->postproc.downacross  = vp8_post_proc_down_and_across_c;
    rtcd->postproc.addnoise    = vp8_plane_add_noise_c;
+    rtcd->postproc.blend_mb    = vp8_blend_mb_c;
 #endif

 #endif
-    // Pure C:
+    /* Pure C: */
    vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
    vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;

@ -77,4 +81,8 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
    vp8_arch_x86_common_init(ctx);
 #endif

+#if ARCH_ARM
+    vp8_arch_arm_common_init(ctx);
+#endif
+
 }
--- a/media/libvpx/vp8/common/header.h
+++ b/media/libvpx/vp8/common/header.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
--- a/media/libvpx/vp8/common/idct.h
+++ b/media/libvpx/vp8/common/idct.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
--- a/media/libvpx/vp8/common/idctllm.c
+++ b/media/libvpx/vp8/common/idctllm.c
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
--- a/media/libvpx/vp8/common/invtrans.c
+++ b/media/libvpx/vp8/common/invtrans.c
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -38,7 +38,7 @@ void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *
 {
    int i;

-    // do 2nd order transform on the dc block
+    /* do 2nd order transform on the dc block */
    IDCT_INVOKE(rtcd, iwalsh16)(x->block[24].dqcoeff, x->block[24].diff);

    recon_dcblock(x);
@ -68,7 +68,7 @@ void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x
    if (x->mode_info_context->mbmi.mode != B_PRED &&
        x->mode_info_context->mbmi.mode != SPLITMV)
    {
-        // do 2nd order transform on the dc block
+        /* do 2nd order transform on the dc block */

        IDCT_INVOKE(rtcd, iwalsh16)(&x->block[24].dqcoeff[0], x->block[24].diff);
        recon_dcblock(x);
--- a/media/libvpx/vp8/common/invtrans.h
+++ b/media/libvpx/vp8/common/invtrans.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
--- a/media/libvpx/vp8/common/loopfilter.c
+++ b/media/libvpx/vp8/common/loopfilter.c
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -23,7 +23,7 @@ prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);
 prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
 prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c);

-// Horizontal MB filtering
+/* Horizontal MB filtering */
 void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                           int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@ -47,7 +47,7 @@ void vp8_loop_filter_mbhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
    vp8_loop_filter_simple_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 }

-// Vertical MB Filtering
+/* Vertical MB Filtering */
 void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                           int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@ -71,7 +71,7 @@ void vp8_loop_filter_mbvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
    vp8_loop_filter_simple_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 }

-// Horizontal B Filtering
+/* Horizontal B Filtering */
 void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                          int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@ -99,7 +99,7 @@ void vp8_loop_filter_bhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
    vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
 }

-// Vertical B Filtering
+/* Vertical B Filtering */
 void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                          int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@ -140,7 +140,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
    const int yhedge_boost  = 2;
    const int uvhedge_boost = 2;

-    // For each possible value for the loop filter fill out a "loop_filter_info" entry.
+    /* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
    for (i = 0; i <= MAX_LOOP_FILTER; i++)
    {
        int filt_lvl = i;
@ -166,7 +166,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
                HEVThresh = 0;
        }

-        // Set loop filter paramaeters that control sharpness.
+        /* Set loop filter paramaeters that control sharpness. */
        block_inside_limit = filt_lvl >> (sharpness_lvl > 0);
        block_inside_limit = block_inside_limit >> (sharpness_lvl > 4);

@ -195,7 +195,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm)

    }

-    // Set up the function pointers depending on the type of loop filtering selected
+    /* Set up the function pointers depending on the type of loop filtering selected */
    if (lft == NORMAL_LOOPFILTER)
    {
        cm->lf_mbv = LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v);
@ -212,14 +212,15 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
    }
 }

-// Put vp8_init_loop_filter() in vp8dx_create_decompressor(). Only call vp8_frame_init_loop_filter() while decoding
-// each frame. Check last_frame_type to skip the function most of times.
+/* Put vp8_init_loop_filter() in vp8dx_create_decompressor(). Only call vp8_frame_init_loop_filter() while decoding
+ * each frame. Check last_frame_type to skip the function most of times.
+ */
 void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type)
 {
    int HEVThresh;
    int i, j;

-    // For each possible value for the loop filter fill out a "loop_filter_info" entry.
+    /* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
    for (i = 0; i <= MAX_LOOP_FILTER; i++)
    {
        int filt_lvl = i;
@ -247,15 +248,15 @@ void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type)

        for (j = 0; j < 16; j++)
        {
-            //lfi[i].lim[j] = block_inside_limit;
-            //lfi[i].mbflim[j] = filt_lvl+yhedge_boost;
+            /*lfi[i].lim[j] = block_inside_limit;
+            lfi[i].mbflim[j] = filt_lvl+yhedge_boost;*/
            lfi[i].mbthr[j] = HEVThresh;
-            //lfi[i].flim[j] = filt_lvl;
+            /*lfi[i].flim[j] = filt_lvl;*/
            lfi[i].thr[j] = HEVThresh;
-            //lfi[i].uvlim[j] = block_inside_limit;
-            //lfi[i].uvmbflim[j] = filt_lvl+uvhedge_boost;
+            /*lfi[i].uvlim[j] = block_inside_limit;
+            lfi[i].uvmbflim[j] = filt_lvl+uvhedge_boost;*/
            lfi[i].uvmbthr[j] = HEVThresh;
-            //lfi[i].uvflim[j] = filt_lvl;
+            /*lfi[i].uvflim[j] = filt_lvl;*/
            lfi[i].uvthr[j] = HEVThresh;
        }
    }
@ -268,32 +269,32 @@ void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level)

    if (mbd->mode_ref_lf_delta_enabled)
    {
-        // Aplly delta for reference frame
+        /* Apply delta for reference frame */
        *filter_level += mbd->ref_lf_deltas[mbmi->ref_frame];

-        // Apply delta for mode
+        /* Apply delta for mode */
        if (mbmi->ref_frame == INTRA_FRAME)
        {
-            // Only the split mode BPRED has a further special case
+            /* Only the split mode BPRED has a further special case */
            if (mbmi->mode == B_PRED)
                *filter_level +=  mbd->mode_lf_deltas[0];
        }
        else
        {
-            // Zero motion mode
+            /* Zero motion mode */
            if (mbmi->mode == ZEROMV)
                *filter_level +=  mbd->mode_lf_deltas[1];

-            // Split MB motion mode
+            /* Split MB motion mode */
            else if (mbmi->mode == SPLITMV)
                *filter_level +=  mbd->mode_lf_deltas[3];

-            // All other inter motion modes (Nearest, Near, New)
+            /* All other inter motion modes (Nearest, Near, New) */
            else
                *filter_level +=  mbd->mode_lf_deltas[2];
        }

-        // Range check
+        /* Range check */
        if (*filter_level > MAX_LOOP_FILTER)
            *filter_level = MAX_LOOP_FILTER;
        else if (*filter_level < 0)
@ -311,7 +312,7 @@ void vp8_loop_filter_frame
 {
    YV12_BUFFER_CONFIG *post = cm->frame_to_show;
    loop_filter_info *lfi = cm->lf_info;
-    int frame_type = cm->frame_type;
+    FRAME_TYPE frame_type = cm->frame_type;

    int mb_row;
    int mb_col;
@ -324,21 +325,21 @@ void vp8_loop_filter_frame
    int i;
    unsigned char *y_ptr, *u_ptr, *v_ptr;

-    mbd->mode_info_context = cm->mi;          // Point at base of Mb MODE_INFO list
+    mbd->mode_info_context = cm->mi;          /* Point at base of Mb MODE_INFO list */

-    // Note the baseline filter values for each segment
+    /* Note the baseline filter values for each segment */
    if (alt_flt_enabled)
    {
        for (i = 0; i < MAX_MB_SEGMENTS; i++)
        {
-            // Abs value
+            /* Abs value */
            if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
                baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-            // Delta Value
+            /* Delta Value */
            else
            {
                baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
+                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  /* Clamp to valid range */
            }
        }
    }
@ -348,18 +349,18 @@ void vp8_loop_filter_frame
            baseline_filter_level[i] = default_filt_lvl;
    }

-    // Initialize the loop filter for this frame.
+    /* Initialize the loop filter for this frame. */
    if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
        vp8_init_loop_filter(cm);
    else if (frame_type != cm->last_frame_type)
        vp8_frame_init_loop_filter(lfi, frame_type);

-    // Set up the buffer pointers
+    /* Set up the buffer pointers */
    y_ptr = post->y_buffer;
    u_ptr = post->u_buffer;
    v_ptr = post->v_buffer;

-    // vp8_filter each macro block
+    /* vp8_filter each macro block */
    for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
    {
        for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
@ -368,9 +369,10 @@ void vp8_loop_filter_frame

            filter_level = baseline_filter_level[Segment];

-            // Distance of Mb to the various image edges.
-            // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
-            // Apply any context driven MB level adjustment
+            /* Distance of Mb to the various image edges.
+             * These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+             * Apply any context driven MB level adjustment
+             */
            vp8_adjust_mb_lf_value(mbd, &filter_level);

            if (filter_level)
@ -381,7 +383,7 @@ void vp8_loop_filter_frame
                if (mbd->mode_info_context->mbmi.dc_diff > 0)
                    cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);

-                // don't apply across umv border
+                /* don't apply across umv border */
                if (mb_row > 0)
                    cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);

@ -393,14 +395,14 @@ void vp8_loop_filter_frame
            u_ptr += 8;
            v_ptr += 8;

-            mbd->mode_info_context++;     // step to next MB
+            mbd->mode_info_context++;     /* step to next MB */
        }

        y_ptr += post->y_stride  * 16 - post->y_width;
        u_ptr += post->uv_stride *  8 - post->uv_width;
        v_ptr += post->uv_stride *  8 - post->uv_width;

-        mbd->mode_info_context++;         // Skip border mb
+        mbd->mode_info_context++;         /* Skip border mb */
    }
 }

@ -424,26 +426,26 @@ void vp8_loop_filter_frame_yonly
    int baseline_filter_level[MAX_MB_SEGMENTS];
    int filter_level;
    int alt_flt_enabled = mbd->segmentation_enabled;
-    int frame_type = cm->frame_type;
+    FRAME_TYPE frame_type = cm->frame_type;

    (void) sharpness_lvl;

-    //MODE_INFO * this_mb_mode_info = cm->mi;  // Point at base of Mb MODE_INFO list
-    mbd->mode_info_context = cm->mi;          // Point at base of Mb MODE_INFO list
+    /*MODE_INFO * this_mb_mode_info = cm->mi;*/ /* Point at base of Mb MODE_INFO list */
+    mbd->mode_info_context = cm->mi;          /* Point at base of Mb MODE_INFO list */

-    // Note the baseline filter values for each segment
+    /* Note the baseline filter values for each segment */
    if (alt_flt_enabled)
    {
        for (i = 0; i < MAX_MB_SEGMENTS; i++)
        {
-            // Abs value
+            /* Abs value */
            if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
                baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-            // Delta Value
+            /* Delta Value */
            else
            {
                baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
+                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  /* Clamp to valid range */
            }
        }
    }
@ -453,16 +455,16 @@ void vp8_loop_filter_frame_yonly
            baseline_filter_level[i] = default_filt_lvl;
    }

-    // Initialize the loop filter for this frame.
+    /* Initialize the loop filter for this frame. */
    if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
        vp8_init_loop_filter(cm);
    else if (frame_type != cm->last_frame_type)
        vp8_frame_init_loop_filter(lfi, frame_type);

-    // Set up the buffer pointers
+    /* Set up the buffer pointers */
    y_ptr = post->y_buffer;

-    // vp8_filter each macro block
+    /* vp8_filter each macro block */
    for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
    {
        for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
@ -470,7 +472,7 @@ void vp8_loop_filter_frame_yonly
            int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
            filter_level = baseline_filter_level[Segment];

-            // Apply any context driven MB level adjustment
+            /* Apply any context driven MB level adjustment */
            vp8_adjust_mb_lf_value(mbd, &filter_level);

            if (filter_level)
@ -481,7 +483,7 @@ void vp8_loop_filter_frame_yonly
                if (mbd->mode_info_context->mbmi.dc_diff > 0)
                    cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);

-                // don't apply across umv border
+                /* don't apply across umv border */
                if (mb_row > 0)
                    cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);

@ -490,12 +492,12 @@ void vp8_loop_filter_frame_yonly
            }

            y_ptr += 16;
-            mbd->mode_info_context ++;        // step to next MB
+            mbd->mode_info_context ++;        /* step to next MB */

        }

        y_ptr += post->y_stride  * 16 - post->y_width;
-        mbd->mode_info_context ++;            // Skip border mb
+        mbd->mode_info_context ++;            /* Skip border mb */
    }

 }
@ -516,7 +518,7 @@ void vp8_loop_filter_partial_frame
    unsigned char *y_ptr;
    int mb_row;
    int mb_col;
-    //int mb_rows = post->y_height >> 4;
+    /*int mb_rows = post->y_height >> 4;*/
    int mb_cols = post->y_width  >> 4;

    int linestocopy;
@ -525,12 +527,12 @@ void vp8_loop_filter_partial_frame
    int baseline_filter_level[MAX_MB_SEGMENTS];
    int filter_level;
    int alt_flt_enabled = mbd->segmentation_enabled;
-    int frame_type = cm->frame_type;
+    FRAME_TYPE frame_type = cm->frame_type;

    (void) sharpness_lvl;

-    //MODE_INFO * this_mb_mode_info = cm->mi + (post->y_height>>5) * (mb_cols + 1);  // Point at base of Mb MODE_INFO list
-    mbd->mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);        // Point at base of Mb MODE_INFO list
+    /*MODE_INFO * this_mb_mode_info = cm->mi + (post->y_height>>5) * (mb_cols + 1);*/ /* Point at base of Mb MODE_INFO list */
+    mbd->mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);        /* Point at base of Mb MODE_INFO list */

    linestocopy = (post->y_height >> (4 + Fraction));

@ -539,19 +541,19 @@ void vp8_loop_filter_partial_frame

    linestocopy <<= 4;

-    // Note the baseline filter values for each segment
+    /* Note the baseline filter values for each segment */
    if (alt_flt_enabled)
    {
        for (i = 0; i < MAX_MB_SEGMENTS; i++)
        {
-            // Abs value
+            /* Abs value */
            if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
                baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-            // Delta Value
+            /* Delta Value */
            else
            {
                baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
+                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  /* Clamp to valid range */
            }
        }
    }
@ -561,16 +563,16 @@ void vp8_loop_filter_partial_frame
            baseline_filter_level[i] = default_filt_lvl;
    }

-    // Initialize the loop filter for this frame.
+    /* Initialize the loop filter for this frame. */
    if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
        vp8_init_loop_filter(cm);
    else if (frame_type != cm->last_frame_type)
        vp8_frame_init_loop_filter(lfi, frame_type);

-    // Set up the buffer pointers
+    /* Set up the buffer pointers */
    y_ptr = post->y_buffer + (post->y_height >> 5) * 16 * post->y_stride;

-    // vp8_filter each macro block
+    /* vp8_filter each macro block */
    for (mb_row = 0; mb_row<(linestocopy >> 4); mb_row++)
    {
        for (mb_col = 0; mb_col < mb_cols; mb_col++)
@ -593,10 +595,10 @@ void vp8_loop_filter_partial_frame
            }

            y_ptr += 16;
-            mbd->mode_info_context += 1;      // step to next MB
+            mbd->mode_info_context += 1;      /* step to next MB */
        }

        y_ptr += post->y_stride  * 16 - post->y_width;
-        mbd->mode_info_context += 1;          // Skip border mb
+        mbd->mode_info_context += 1;          /* Skip border mb */
    }
 }
--- a/media/libvpx/vp8/common/loopfilter.h
+++ b/media/libvpx/vp8/common/loopfilter.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -22,10 +22,10 @@ typedef enum
    SIMPLE_LOOPFILTER = 1
 } LOOPFILTERTYPE;

-// FRK
-// Need to align this structure so when it is declared and
-// passed it can be loaded into vector registers.
-// FRK
+/* FRK
+ * Need to align this structure so when it is declared and
+ * passed it can be loaded into vector registers.
+ */
 typedef struct
 {
    DECLARE_ALIGNED(16, signed char, lim[16]);
@ -119,8 +119,8 @@ typedef struct

 typedef void loop_filter_uvfunction
 (
-    unsigned char *u,   // source pointer
-    int p,              // pitch
+    unsigned char *u,   /* source pointer */
+    int p,              /* pitch */
    const signed char *flimit,
    const signed char *limit,
    const signed char *thresh,
--- a/media/libvpx/vp8/common/loopfilter_filters.c
+++ b/media/libvpx/vp8/common/loopfilter_filters.c
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -17,8 +17,6 @@
 #define __inline inline
 #endif

-#define NEW_LOOPFILTER_MASK
-
 typedef unsigned char uc;

 static __inline signed char vp8_signed_char_clamp(int t)
@ -29,7 +27,7 @@ static __inline signed char vp8_signed_char_clamp(int t)
 }


-// should we apply any filter at all ( 11111111 yes, 00000000 no)
+/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
 static __inline signed char vp8_filter_mask(signed char limit, signed char flimit,
                                     uc p3, uc p2, uc p1, uc p0, uc q0, uc q1, uc q2, uc q3)
 {
@ -40,16 +38,12 @@ static __inline signed char vp8_filter_mask(signed char limit, signed char flimi
    mask |= (abs(q1 - q0) > limit) * -1;
    mask |= (abs(q2 - q1) > limit) * -1;
    mask |= (abs(q3 - q2) > limit) * -1;
-#ifndef NEW_LOOPFILTER_MASK
-    mask |= (abs(p0 - q0) > flimit) * -1;
-#else
    mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2  > flimit * 2 + limit) * -1;
-#endif
    mask = ~mask;
    return mask;
 }

-// is there high variance internal edge ( 11111111 yes, 00000000 no)
+/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
 static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
 {
    signed char hev = 0;
@ -71,17 +65,18 @@ static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *
    qs0 = (signed char) * oq0 ^ 0x80;
    qs1 = (signed char) * oq1 ^ 0x80;

-    // add outer taps if we have high edge variance
+    /* add outer taps if we have high edge variance */
    vp8_filter = vp8_signed_char_clamp(ps1 - qs1);
    vp8_filter &= hev;

-    // inner taps
+    /* inner taps */
    vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0));
    vp8_filter &= mask;

-    // save bottom 3 bits so that we round one side +4 and the other +3
-    // if it equals 4 we'll set to adjust by -1 to account for the fact
-    // we'd round 3 the other way
+    /* save bottom 3 bits so that we round one side +4 and the other +3
+     * if it equals 4 we'll set to adjust by -1 to account for the fact
+     * we'd round 3 the other way
+     */
    Filter1 = vp8_signed_char_clamp(vp8_filter + 4);
    Filter2 = vp8_signed_char_clamp(vp8_filter + 3);
    Filter1 >>= 3;
@ -92,7 +87,7 @@ static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *
    *op0 = u ^ 0x80;
    vp8_filter = Filter1;

-    // outer tap adjustments
+    /* outer tap adjustments */
    vp8_filter += 1;
    vp8_filter >>= 1;
    vp8_filter &= ~hev;
@ -106,19 +101,20 @@ static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *
 void vp8_loop_filter_horizontal_edge_c
 (
    unsigned char *s,
-    int p, //pitch
+    int p, /* pitch */
    const signed char *flimit,
    const signed char *limit,
    const signed char *thresh,
    int count
 )
 {
-    int  hev = 0; // high edge variance
+    int  hev = 0; /* high edge variance */
    signed char mask = 0;
    int i = 0;

-    // loop filter designed to work using chars so that we can make maximum use
-    // of 8 bit simd instructions.
+    /* loop filter designed to work using chars so that we can make maximum use
+     * of 8 bit simd instructions.
+     */
    do
    {
        mask = vp8_filter_mask(limit[i], flimit[i],
@ -144,12 +140,13 @@ void vp8_loop_filter_vertical_edge_c
    int count
 )
 {
-    int  hev = 0; // high edge variance
+    int  hev = 0; /* high edge variance */
    signed char mask = 0;
    int i = 0;

-    // loop filter designed to work using chars so that we can make maximum use
-    // of 8 bit simd instructions.
+    /* loop filter designed to work using chars so that we can make maximum use
+     * of 8 bit simd instructions.
+     */
    do
    {
        mask = vp8_filter_mask(limit[i], flimit[i],
@ -176,7 +173,7 @@ static __inline void vp8_mbfilter(signed char mask, signed char hev,
    signed char qs1 = (signed char) * oq1 ^ 0x80;
    signed char qs2 = (signed char) * oq2 ^ 0x80;

-    // add outer taps if we have high edge variance
+    /* add outer taps if we have high edge variance */
    vp8_filter = vp8_signed_char_clamp(ps1 - qs1);
    vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0));
    vp8_filter &= mask;
@ -184,7 +181,7 @@ static __inline void vp8_mbfilter(signed char mask, signed char hev,
    Filter2 = vp8_filter;
    Filter2 &= hev;

-    // save bottom 3 bits so that we round one side +4 and the other +3
+    /* save bottom 3 bits so that we round one side +4 and the other +3 */
    Filter1 = vp8_signed_char_clamp(Filter2 + 4);
    Filter2 = vp8_signed_char_clamp(Filter2 + 3);
    Filter1 >>= 3;
@ -193,25 +190,25 @@ static __inline void vp8_mbfilter(signed char mask, signed char hev,
    ps0 = vp8_signed_char_clamp(ps0 + Filter2);


-    // only apply wider filter if not high edge variance
+    /* only apply wider filter if not high edge variance */
    vp8_filter &= ~hev;
    Filter2 = vp8_filter;

-    // roughly 3/7th difference across boundary
+    /* roughly 3/7th difference across boundary */
    u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
    s = vp8_signed_char_clamp(qs0 - u);
    *oq0 = s ^ 0x80;
    s = vp8_signed_char_clamp(ps0 + u);
    *op0 = s ^ 0x80;

-    // roughly 2/7th difference across boundary
+    /* roughly 2/7th difference across boundary */
    u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
    s = vp8_signed_char_clamp(qs1 - u);
    *oq1 = s ^ 0x80;
    s = vp8_signed_char_clamp(ps1 + u);
    *op1 = s ^ 0x80;

-    // roughly 1/7th difference across boundary
+    /* roughly 1/7th difference across boundary */
    u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
    s = vp8_signed_char_clamp(qs2 - u);
    *oq2 = s ^ 0x80;
@ -229,12 +226,13 @@ void vp8_mbloop_filter_horizontal_edge_c
    int count
 )
 {
-    signed char hev = 0; // high edge variance
+    signed char hev = 0; /* high edge variance */
    signed char mask = 0;
    int i = 0;

-    // loop filter designed to work using chars so that we can make maximum use
-    // of 8 bit simd instructions.
+    /* loop filter designed to work using chars so that we can make maximum use
+     * of 8 bit simd instructions.
+     */
    do
    {

@ -263,7 +261,7 @@ void vp8_mbloop_filter_vertical_edge_c
    int count
 )
 {
-    signed char hev = 0; // high edge variance
+    signed char hev = 0; /* high edge variance */
    signed char mask = 0;
    int i = 0;

@ -283,17 +281,14 @@ void vp8_mbloop_filter_vertical_edge_c

 }

-// should we apply any filter at all ( 11111111 yes, 00000000 no)
+/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
 static __inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
 {
-// Why does this cause problems for win32?
-// error C2143: syntax error : missing ';' before 'type'
-//  (void) limit;
-#ifndef NEW_LOOPFILTER_MASK
-    signed char mask = (abs(p0 - q0) <= flimit) * -1;
-#else
+/* Why does this cause problems for win32?
+ * error C2143: syntax error : missing ';' before 'type'
+ *  (void) limit;
+ */
    signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2  <= flimit * 2 + limit) * -1;
-#endif
    return mask;
 }

@ -310,7 +305,7 @@ static __inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *o
    vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (q0 - p0));
    vp8_filter &= mask;

-    // save bottom 3 bits so that we round one side +4 and the other +3
+    /* save bottom 3 bits so that we round one side +4 and the other +3 */
    Filter1 = vp8_signed_char_clamp(vp8_filter + 4);
    Filter1 >>= 3;
    u = vp8_signed_char_clamp(q0 - Filter1);
@ -338,7 +333,7 @@ void vp8_loop_filter_simple_horizontal_edge_c

    do
    {
-        //mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1*p],s[0*p]);
+        /*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1*p],s[0*p]);*/
        mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
        vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
        ++s;
@ -362,7 +357,7 @@ void vp8_loop_filter_simple_vertical_edge_c

    do
    {
-        //mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1],s[0]);
+        /*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1],s[0]);*/
        mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2], s[-1], s[0], s[1]);
        vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
        s += p;
--- a/media/libvpx/vp8/common/mbpitch.c
+++ b/media/libvpx/vp8/common/mbpitch.c
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -14,7 +14,7 @@
 typedef enum
 {
    PRED = 0,
-    DEST = 1,
+    DEST = 1
 } BLOCKSET;

 void vp8_setup_block
@ -62,13 +62,13 @@ void vp8_setup_macroblock(MACROBLOCKD *x, BLOCKSET bs)
        v = &x->pre.v_buffer;
    }

-    for (block = 0; block < 16; block++) // y blocks
+    for (block = 0; block < 16; block++) /* y blocks */
    {
        vp8_setup_block(&x->block[block], x->dst.y_stride, y, x->dst.y_stride,
                        (block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4, bs);
    }

-    for (block = 16; block < 20; block++) // U and V blocks
+    for (block = 16; block < 20; block++) /* U and V blocks */
    {
        vp8_setup_block(&x->block[block], x->dst.uv_stride, u, x->dst.uv_stride,
                        ((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4, bs);
@ -123,7 +123,7 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x)
 void vp8_build_block_doffsets(MACROBLOCKD *x)
 {

-    // handle the destination pitch features
+    /* handle the destination pitch features */
    vp8_setup_macroblock(x, DEST);
    vp8_setup_macroblock(x, PRED);
 }
--- a/media/libvpx/vp8/common/modecont.c
+++ b/media/libvpx/vp8/common/modecont.c
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -14,27 +14,27 @@
 const int vp8_mode_contexts[6][4] =
 {
    {
-        // 0
+        /* 0 */
        7,     1,     1,   143,
    },
    {
-        // 1
+        /* 1 */
        14,    18,    14,   107,
    },
    {
-        // 2
+        /* 2 */
        135,    64,    57,    68,
    },
    {
-        // 3
+        /* 3 */
        60,    56,   128,    65,
    },
    {
-        // 4
+        /* 4 */
        159,   134,   128,    34,
    },
    {
-        // 5
+        /* 5 */
        234,   188,   128,    28,
    },
 };
--- a/media/libvpx/vp8/common/modecont.h
+++ b/media/libvpx/vp8/common/modecont.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
--- a/media/libvpx/vp8/common/modecontext.c
+++ b/media/libvpx/vp8/common/modecontext.c
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -14,133 +14,133 @@
 const unsigned int vp8_kf_default_bmode_counts [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES] =
 {
    {
-        //Above Mode :  0
-        { 43438,   2195,    470,    316,    615,    171,    217,    412,    124,    160, }, // left_mode 0
-        {  5722,   2751,    296,    291,     81,     68,     80,    101,    100,    170, }, // left_mode 1
-        {  1629,    201,    307,     25,     47,     16,     34,     72,     19,     28, }, // left_mode 2
-        {   332,    266,     36,    500,     20,     65,     23,     14,    154,    106, }, // left_mode 3
-        {   450,     97,     10,     24,    117,     10,      2,     12,      8,     71, }, // left_mode 4
-        {   384,     49,     29,     44,     12,    162,     51,      5,     87,     42, }, // left_mode 5
-        {   495,     53,    157,     27,     14,     57,    180,     17,     17,     34, }, // left_mode 6
-        {   695,     64,     62,      9,     27,      5,      3,    147,     10,     26, }, // left_mode 7
-        {   230,     54,     20,    124,     16,    125,     29,     12,    283,     37, }, // left_mode 8
-        {   260,     87,     21,    120,     32,     16,     33,     16,     33,    203, }, // left_mode 9
+        /*Above Mode :  0*/
+        { 43438,   2195,    470,    316,    615,    171,    217,    412,    124,    160, }, /* left_mode 0 */
+        {  5722,   2751,    296,    291,     81,     68,     80,    101,    100,    170, }, /* left_mode 1 */
+        {  1629,    201,    307,     25,     47,     16,     34,     72,     19,     28, }, /* left_mode 2 */
+        {   332,    266,     36,    500,     20,     65,     23,     14,    154,    106, }, /* left_mode 3 */
+        {   450,     97,     10,     24,    117,     10,      2,     12,      8,     71, }, /* left_mode 4 */
+        {   384,     49,     29,     44,     12,    162,     51,      5,     87,     42, }, /* left_mode 5 */
+        {   495,     53,    157,     27,     14,     57,    180,     17,     17,     34, }, /* left_mode 6 */
+        {   695,     64,     62,      9,     27,      5,      3,    147,     10,     26, }, /* left_mode 7 */
+        {   230,     54,     20,    124,     16,    125,     29,     12,    283,     37, }, /* left_mode 8 */
+        {   260,     87,     21,    120,     32,     16,     33,     16,     33,    203, }, /* left_mode 9 */
    },
    {
-        //Above Mode :  1
-        {  3934,   2573,    355,    137,    128,     87,    133,    117,     37,     27, }, // left_mode 0
-        {  1036,   1929,    278,    135,     27,     37,     48,     55,     41,     91, }, // left_mode 1
-        {   223,    256,    253,     15,     13,      9,     28,     64,      3,      3, }, // left_mode 2
-        {   120,    129,     17,    316,     15,     11,      9,      4,     53,     74, }, // left_mode 3
-        {   129,     58,      6,     11,     38,      2,      0,      5,      2,     67, }, // left_mode 4
-        {    53,     22,     11,     16,      8,     26,     14,      3,     19,     12, }, // left_mode 5
-        {    59,     26,     61,     11,      4,      9,     35,     13,      8,      8, }, // left_mode 6
-        {   101,     52,     40,      8,      5,      2,      8,     59,      2,     20, }, // left_mode 7
-        {    48,     34,     10,     52,      8,     15,      6,      6,     63,     20, }, // left_mode 8
-        {    96,     48,     22,     63,     11,     14,      5,      8,      9,     96, }, // left_mode 9
+        /*Above Mode :  1*/
+        {  3934,   2573,    355,    137,    128,     87,    133,    117,     37,     27, }, /* left_mode 0 */
+        {  1036,   1929,    278,    135,     27,     37,     48,     55,     41,     91, }, /* left_mode 1 */
+        {   223,    256,    253,     15,     13,      9,     28,     64,      3,      3, }, /* left_mode 2 */
+        {   120,    129,     17,    316,     15,     11,      9,      4,     53,     74, }, /* left_mode 3 */
+        {   129,     58,      6,     11,     38,      2,      0,      5,      2,     67, }, /* left_mode 4 */
+        {    53,     22,     11,     16,      8,     26,     14,      3,     19,     12, }, /* left_mode 5 */
+        {    59,     26,     61,     11,      4,      9,     35,     13,      8,      8, }, /* left_mode 6 */
+        {   101,     52,     40,      8,      5,      2,      8,     59,      2,     20, }, /* left_mode 7 */
+        {    48,     34,     10,     52,      8,     15,      6,      6,     63,     20, }, /* left_mode 8 */
+        {    96,     48,     22,     63,     11,     14,      5,      8,      9,     96, }, /* left_mode 9 */
    },
    {
-        //Above Mode :  2
-        {   709,    461,    506,     36,     27,     33,    151,     98,     24,      6, }, // left_mode 0
-        {   201,    375,    442,     27,     13,      8,     46,     58,      6,     19, }, // left_mode 1
-        {   122,    140,    417,      4,     13,      3,     33,     59,      4,      2, }, // left_mode 2
-        {    36,     17,     22,     16,      6,      8,     12,     17,      9,     21, }, // left_mode 3
-        {    51,     15,      7,      1,     14,      0,      4,      5,      3,     22, }, // left_mode 4
-        {    18,     11,     30,      9,      7,     20,     11,      5,      2,      6, }, // left_mode 5
-        {    38,     21,    103,      9,      4,     12,     79,     13,      2,      5, }, // left_mode 6
-        {    64,     17,     66,      2,     12,      4,      2,     65,      4,      5, }, // left_mode 7
-        {    14,      7,      7,     16,      3,     11,      4,     13,     15,     16, }, // left_mode 8
-        {    36,      8,     32,      9,      9,      4,     14,      7,      6,     24, }, // left_mode 9
+        /*Above Mode :  2*/
+        {   709,    461,    506,     36,     27,     33,    151,     98,     24,      6, }, /* left_mode 0 */
+        {   201,    375,    442,     27,     13,      8,     46,     58,      6,     19, }, /* left_mode 1 */
+        {   122,    140,    417,      4,     13,      3,     33,     59,      4,      2, }, /* left_mode 2 */
+        {    36,     17,     22,     16,      6,      8,     12,     17,      9,     21, }, /* left_mode 3 */
+        {    51,     15,      7,      1,     14,      0,      4,      5,      3,     22, }, /* left_mode 4 */
+        {    18,     11,     30,      9,      7,     20,     11,      5,      2,      6, }, /* left_mode 5 */
+        {    38,     21,    103,      9,      4,     12,     79,     13,      2,      5, }, /* left_mode 6 */
+        {    64,     17,     66,      2,     12,      4,      2,     65,      4,      5, }, /* left_mode 7 */
+        {    14,      7,      7,     16,      3,     11,      4,     13,     15,     16, }, /* left_mode 8 */
+        {    36,      8,     32,      9,      9,      4,     14,      7,      6,     24, }, /* left_mode 9 */
    },
    {
-        //Above Mode :  3
-        {  1340,    173,     36,    119,     30,     10,     13,     10,     20,     26, }, // left_mode 0
-        {   156,    293,     26,    108,      5,     16,      2,      4,     23,     30, }, // left_mode 1
-        {    60,     34,     13,      7,      3,      3,      0,      8,      4,      5, }, // left_mode 2
-        {    72,     64,      1,    235,      3,      9,      2,      7,     28,     38, }, // left_mode 3
-        {    29,     14,      1,      3,      5,      0,      2,      2,      5,     13, }, // left_mode 4
-        {    22,      7,      4,     11,      2,      5,      1,      2,      6,      4, }, // left_mode 5
-        {    18,     14,      5,      6,      4,      3,     14,      0,      9,      2, }, // left_mode 6
-        {    41,     10,      7,      1,      2,      0,      0,     10,      2,      1, }, // left_mode 7
-        {    23,     19,      2,     33,      1,      5,      2,      0,     51,      8, }, // left_mode 8
-        {    33,     26,      7,     53,      3,      9,      3,      3,      9,     19, }, // left_mode 9
+        /*Above Mode :  3*/
+        {  1340,    173,     36,    119,     30,     10,     13,     10,     20,     26, }, /* left_mode 0 */
+        {   156,    293,     26,    108,      5,     16,      2,      4,     23,     30, }, /* left_mode 1 */
+        {    60,     34,     13,      7,      3,      3,      0,      8,      4,      5, }, /* left_mode 2 */
+        {    72,     64,      1,    235,      3,      9,      2,      7,     28,     38, }, /* left_mode 3 */
+        {    29,     14,      1,      3,      5,      0,      2,      2,      5,     13, }, /* left_mode 4 */
+        {    22,      7,      4,     11,      2,      5,      1,      2,      6,      4, }, /* left_mode 5 */
+        {    18,     14,      5,      6,      4,      3,     14,      0,      9,      2, }, /* left_mode 6 */
+        {    41,     10,      7,      1,      2,      0,      0,     10,      2,      1, }, /* left_mode 7 */
+        {    23,     19,      2,     33,      1,      5,      2,      0,     51,      8, }, /* left_mode 8 */
+        {    33,     26,      7,     53,      3,      9,      3,      3,      9,     19, }, /* left_mode 9 */
    },
    {
-        //Above Mode :  4
-        {   410,    165,     43,     31,     66,     15,     30,     54,      8,     17, }, // left_mode 0
-        {   115,     64,     27,     18,     30,      7,     11,     15,      4,     19, }, // left_mode 1
-        {    31,     23,     25,      1,      7,      2,      2,     10,      0,      5, }, // left_mode 2
-        {    17,      4,      1,      6,      8,      2,      7,      5,      5,     21, }, // left_mode 3
-        {   120,     12,      1,      2,     83,      3,      0,      4,      1,     40, }, // left_mode 4
-        {     4,      3,      1,      2,      1,      2,      5,      0,      3,      6, }, // left_mode 5
-        {    10,      2,     13,      6,      6,      6,      8,      2,      4,      5, }, // left_mode 6
-        {    58,     10,      5,      1,     28,      1,      1,     33,      1,      9, }, // left_mode 7
-        {     8,      2,      1,      4,      2,      5,      1,      1,      2,     10, }, // left_mode 8
-        {    76,      7,      5,      7,     18,      2,      2,      0,      5,     45, }, // left_mode 9
+        /*Above Mode :  4*/
+        {   410,    165,     43,     31,     66,     15,     30,     54,      8,     17, }, /* left_mode 0 */
+        {   115,     64,     27,     18,     30,      7,     11,     15,      4,     19, }, /* left_mode 1 */
+        {    31,     23,     25,      1,      7,      2,      2,     10,      0,      5, }, /* left_mode 2 */
+        {    17,      4,      1,      6,      8,      2,      7,      5,      5,     21, }, /* left_mode 3 */
+        {   120,     12,      1,      2,     83,      3,      0,      4,      1,     40, }, /* left_mode 4 */
+        {     4,      3,      1,      2,      1,      2,      5,      0,      3,      6, }, /* left_mode 5 */
+        {    10,      2,     13,      6,      6,      6,      8,      2,      4,      5, }, /* left_mode 6 */
+        {    58,     10,      5,      1,     28,      1,      1,     33,      1,      9, }, /* left_mode 7 */
+        {     8,      2,      1,      4,      2,      5,      1,      1,      2,     10, }, /* left_mode 8 */
+        {    76,      7,      5,      7,     18,      2,      2,      0,      5,     45, }, /* left_mode 9 */
    },
    {
-        //Above Mode :  5
-        {   444,     46,     47,     20,     14,    110,     60,     14,     60,      7, }, // left_mode 0
-        {    59,     57,     25,     18,      3,     17,     21,      6,     14,      6, }, // left_mode 1
-        {    24,     17,     20,      6,      4,     13,      7,      2,      3,      2, }, // left_mode 2
-        {    13,     11,      5,     14,      4,      9,      2,      4,     15,      7, }, // left_mode 3
-        {     8,      5,      2,      1,      4,      0,      1,      1,      2,     12, }, // left_mode 4
-        {    19,      5,      5,      7,      4,     40,      6,      3,     10,      4, }, // left_mode 5
-        {    16,      5,      9,      1,      1,     16,     26,      2,     10,      4, }, // left_mode 6
-        {    11,      4,      8,      1,      1,      4,      4,      5,      4,      1, }, // left_mode 7
-        {    15,      1,      3,      7,      3,     21,      7,      1,     34,      5, }, // left_mode 8
-        {    18,      5,      1,      3,      4,      3,      7,      1,      2,      9, }, // left_mode 9
+        /*Above Mode :  5*/
+        {   444,     46,     47,     20,     14,    110,     60,     14,     60,      7, }, /* left_mode 0 */
+        {    59,     57,     25,     18,      3,     17,     21,      6,     14,      6, }, /* left_mode 1 */
+        {    24,     17,     20,      6,      4,     13,      7,      2,      3,      2, }, /* left_mode 2 */
+        {    13,     11,      5,     14,      4,      9,      2,      4,     15,      7, }, /* left_mode 3 */
+        {     8,      5,      2,      1,      4,      0,      1,      1,      2,     12, }, /* left_mode 4 */
+        {    19,      5,      5,      7,      4,     40,      6,      3,     10,      4, }, /* left_mode 5 */
+        {    16,      5,      9,      1,      1,     16,     26,      2,     10,      4, }, /* left_mode 6 */
+        {    11,      4,      8,      1,      1,      4,      4,      5,      4,      1, }, /* left_mode 7 */
+        {    15,      1,      3,      7,      3,     21,      7,      1,     34,      5, }, /* left_mode 8 */
+        {    18,      5,      1,      3,      4,      3,      7,      1,      2,      9, }, /* left_mode 9 */
    },
    {
-        //Above Mode :  6
-        {   476,    149,     94,     13,     14,     77,    291,     27,     23,      3, }, // left_mode 0
-        {    79,     83,     42,     14,      2,     12,     63,      2,      4,     14, }, // left_mode 1
-        {    43,     36,     55,      1,      3,      8,     42,     11,      5,      1, }, // left_mode 2
-        {     9,      9,      6,     16,      1,      5,      6,      3,     11,     10, }, // left_mode 3
-        {    10,      3,      1,      3,     10,      1,      0,      1,      1,      4, }, // left_mode 4
-        {    14,      6,     15,      5,      1,     20,     25,      2,      5,      0, }, // left_mode 5
-        {    28,      7,     51,      1,      0,      8,    127,      6,      2,      5, }, // left_mode 6
-        {    13,      3,      3,      2,      3,      1,      2,      8,      1,      2, }, // left_mode 7
-        {    10,      3,      3,      3,      3,      8,      2,      2,      9,      3, }, // left_mode 8
-        {    13,      7,     11,      4,      0,      4,      6,      2,      5,      8, }, // left_mode 9
+        /*Above Mode :  6*/
+        {   476,    149,     94,     13,     14,     77,    291,     27,     23,      3, }, /* left_mode 0 */
+        {    79,     83,     42,     14,      2,     12,     63,      2,      4,     14, }, /* left_mode 1 */
+        {    43,     36,     55,      1,      3,      8,     42,     11,      5,      1, }, /* left_mode 2 */
+        {     9,      9,      6,     16,      1,      5,      6,      3,     11,     10, }, /* left_mode 3 */
+        {    10,      3,      1,      3,     10,      1,      0,      1,      1,      4, }, /* left_mode 4 */
+        {    14,      6,     15,      5,      1,     20,     25,      2,      5,      0, }, /* left_mode 5 */
+        {    28,      7,     51,      1,      0,      8,    127,      6,      2,      5, }, /* left_mode 6 */
+        {    13,      3,      3,      2,      3,      1,      2,      8,      1,      2, }, /* left_mode 7 */
+        {    10,      3,      3,      3,      3,      8,      2,      2,      9,      3, }, /* left_mode 8 */
+        {    13,      7,     11,      4,      0,      4,      6,      2,      5,      8, }, /* left_mode 9 */
    },
    {
-        //Above Mode :  7
-        {   376,    135,    119,      6,     32,      8,     31,    224,      9,      3, }, // left_mode 0
-        {    93,     60,     54,      6,     13,      7,      8,     92,      2,     12, }, // left_mode 1
-        {    74,     36,     84,      0,      3,      2,      9,     67,      2,      1, }, // left_mode 2
-        {    19,      4,      4,      8,      8,      2,      4,      7,      6,     16, }, // left_mode 3
-        {    51,      7,      4,      1,     77,      3,      0,     14,      1,     15, }, // left_mode 4
-        {     7,      7,      5,      7,      4,      7,      4,      5,      0,      3, }, // left_mode 5
-        {    18,      2,     19,      2,      2,      4,     12,     11,      1,      2, }, // left_mode 6
-        {   129,      6,     27,      1,     21,      3,      0,    189,      0,      6, }, // left_mode 7
-        {     9,      1,      2,      8,      3,      7,      0,      5,      3,      3, }, // left_mode 8
-        {    20,      4,      5,     10,      4,      2,      7,     17,      3,     16, }, // left_mode 9
+        /*Above Mode :  7*/
+        {   376,    135,    119,      6,     32,      8,     31,    224,      9,      3, }, /* left_mode 0 */
+        {    93,     60,     54,      6,     13,      7,      8,     92,      2,     12, }, /* left_mode 1 */
+        {    74,     36,     84,      0,      3,      2,      9,     67,      2,      1, }, /* left_mode 2 */
+        {    19,      4,      4,      8,      8,      2,      4,      7,      6,     16, }, /* left_mode 3 */
+        {    51,      7,      4,      1,     77,      3,      0,     14,      1,     15, }, /* left_mode 4 */
+        {     7,      7,      5,      7,      4,      7,      4,      5,      0,      3, }, /* left_mode 5 */
+        {    18,      2,     19,      2,      2,      4,     12,     11,      1,      2, }, /* left_mode 6 */
+        {   129,      6,     27,      1,     21,      3,      0,    189,      0,      6, }, /* left_mode 7 */
+        {     9,      1,      2,      8,      3,      7,      0,      5,      3,      3, }, /* left_mode 8 */
+        {    20,      4,      5,     10,      4,      2,      7,     17,      3,     16, }, /* left_mode 9 */
    },
    {
-        //Above Mode :  8
-        {   617,     68,     34,     79,     11,     27,     25,     14,     75,     13, }, // left_mode 0
-        {    51,     82,     21,     26,      6,     12,     13,      1,     26,     16, }, // left_mode 1
-        {    29,      9,     12,     11,      3,      7,      1,     10,      2,      2, }, // left_mode 2
-        {    17,     19,     11,     74,      4,      3,      2,      0,     58,     13, }, // left_mode 3
-        {    10,      1,      1,      3,      4,      1,      0,      2,      1,      8, }, // left_mode 4
-        {    14,      4,      5,      5,      1,     13,      2,      0,     27,      8, }, // left_mode 5
-        {    10,      3,      5,      4,      1,      7,      6,      4,      5,      1, }, // left_mode 6
-        {    10,      2,      6,      2,      1,      1,      1,      4,      2,      1, }, // left_mode 7
-        {    14,      8,      5,     23,      2,     12,      6,      2,    117,      5, }, // left_mode 8
-        {     9,      6,      2,     19,      1,      6,      3,      2,      9,      9, }, // left_mode 9
+        /*Above Mode :  8*/
+        {   617,     68,     34,     79,     11,     27,     25,     14,     75,     13, }, /* left_mode 0 */
+        {    51,     82,     21,     26,      6,     12,     13,      1,     26,     16, }, /* left_mode 1 */
+        {    29,      9,     12,     11,      3,      7,      1,     10,      2,      2, }, /* left_mode 2 */
+        {    17,     19,     11,     74,      4,      3,      2,      0,     58,     13, }, /* left_mode 3 */
+        {    10,      1,      1,      3,      4,      1,      0,      2,      1,      8, }, /* left_mode 4 */
+        {    14,      4,      5,      5,      1,     13,      2,      0,     27,      8, }, /* left_mode 5 */
+        {    10,      3,      5,      4,      1,      7,      6,      4,      5,      1, }, /* left_mode 6 */
+        {    10,      2,      6,      2,      1,      1,      1,      4,      2,      1, }, /* left_mode 7 */
+        {    14,      8,      5,     23,      2,     12,      6,      2,    117,      5, }, /* left_mode 8 */
+        {     9,      6,      2,     19,      1,      6,      3,      2,      9,      9, }, /* left_mode 9 */
    },
    {
-        //Above Mode :  9
-        {   680,     73,     22,     38,     42,      5,     11,      9,      6,     28, }, // left_mode 0
-        {   113,    112,     21,     22,     10,      2,      8,      4,      6,     42, }, // left_mode 1
-        {    44,     20,     24,      6,      5,      4,      3,      3,      1,      2, }, // left_mode 2
-        {    40,     23,      7,     71,      5,      2,      4,      1,      7,     22, }, // left_mode 3
-        {    85,      9,      4,      4,     17,      2,      0,      3,      2,     23, }, // left_mode 4
-        {    13,      4,      2,      6,      1,      7,      0,      1,      7,      6, }, // left_mode 5
-        {    26,      6,      8,      3,      2,      3,      8,      1,      5,      4, }, // left_mode 6
-        {    54,      8,      9,      6,      7,      0,      1,     11,      1,      3, }, // left_mode 7
-        {     9,     10,      4,     13,      2,      5,      4,      2,     14,      8, }, // left_mode 8
-        {    92,      9,      5,     19,     15,      3,      3,      1,      6,     58, }, // left_mode 9
+        /*Above Mode :  9*/
+        {   680,     73,     22,     38,     42,      5,     11,      9,      6,     28, }, /* left_mode 0 */
+        {   113,    112,     21,     22,     10,      2,      8,      4,      6,     42, }, /* left_mode 1 */
+        {    44,     20,     24,      6,      5,      4,      3,      3,      1,      2, }, /* left_mode 2 */
+        {    40,     23,      7,     71,      5,      2,      4,      1,      7,     22, }, /* left_mode 3 */
+        {    85,      9,      4,      4,     17,      2,      0,      3,      2,     23, }, /* left_mode 4 */
+        {    13,      4,      2,      6,      1,      7,      0,      1,      7,      6, }, /* left_mode 5 */
+        {    26,      6,      8,      3,      2,      3,      8,      1,      5,      4, }, /* left_mode 6 */
+        {    54,      8,      9,      6,      7,      0,      1,     11,      1,      3, }, /* left_mode 7 */
+        {     9,     10,      4,     13,      2,      5,      4,      2,     14,      8, }, /* left_mode 8 */
+        {    92,      9,      5,     19,     15,      3,      3,      1,      6,     58, }, /* left_mode 9 */
    },
 };
--- a/media/libvpx/vp8/common/mv.h
+++ b/media/libvpx/vp8/common/mv.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
--- a/media/libvpx/vp8/common/onyx.h
+++ b/media/libvpx/vp8/common/onyx.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
--- a/media/libvpx/vp8/common/onyxc_int.h
+++ b/media/libvpx/vp8/common/onyxc_int.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -21,9 +21,9 @@
 #include "recon.h"
 #include "postproc.h"

-//#ifdef PACKET_TESTING
+/*#ifdef PACKET_TESTING*/
 #include "header.h"
-//#endif
+/*#endif*/

 /* Create/destroy static data structures. */

@ -43,7 +43,7 @@ typedef struct frame_contexts
    vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1];
    vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1];
    MV_CONTEXT mvc[2];
-    MV_CONTEXT pre_mvc[2];  //not to caculate the mvcost for the frame if mvc doesn't change.
+    MV_CONTEXT pre_mvc[2];  /* not to caculate the mvcost for the frame if mvc doesn't change. */
 } FRAME_CONTEXT;

 typedef enum
@ -74,6 +74,7 @@ typedef struct VP8_COMMON_RTCD
    vp8_subpix_rtcd_vtable_t      subpix;
    vp8_loopfilter_rtcd_vtable_t  loopfilter;
    vp8_postproc_rtcd_vtable_t    postproc;
+    int                           flags;
 #else
    int unused;
 #endif
@ -83,9 +84,9 @@ typedef struct VP8Common
 {
    struct vpx_internal_error_info  error;

-    DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][16]);

    int Width;
    int Height;
@ -104,7 +105,7 @@ typedef struct VP8Common
    YV12_BUFFER_CONFIG post_proc_buffer;
    YV12_BUFFER_CONFIG temp_scale_frame;

-    FRAME_TYPE last_frame_type;  //Add to check if vp8_frame_init_loop_filter() can be skipped.
+    FRAME_TYPE last_frame_type;  /* Add to check if vp8_frame_init_loop_filter() can be skipped. */
    FRAME_TYPE frame_type;

    int show_frame;
@ -115,7 +116,7 @@ typedef struct VP8Common
    int mb_cols;
    int mode_info_stride;

-    // prfile settings
+    /* profile settings */
    int mb_no_coeff_skip;
    int no_lpf;
    int simpler_lpf;
@ -123,7 +124,7 @@ typedef struct VP8Common
    int full_pixel;

    int base_qindex;
-    int last_kf_gf_q;  // Q used on the last GF or KF
+    int last_kf_gf_q;  /* Q used on the last GF or KF */

    int y1dc_delta_q;
    int y2dc_delta_q;
@ -153,31 +154,31 @@ typedef struct VP8Common
    int last_sharpness_level;
    int sharpness_level;

-    int refresh_last_frame;       // Two state 0 = NO, 1 = YES
-    int refresh_golden_frame;     // Two state 0 = NO, 1 = YES
-    int refresh_alt_ref_frame;     // Two state 0 = NO, 1 = YES
+    int refresh_last_frame;       /* Two state 0 = NO, 1 = YES */
+    int refresh_golden_frame;     /* Two state 0 = NO, 1 = YES */
+    int refresh_alt_ref_frame;     /* Two state 0 = NO, 1 = YES */

-    int copy_buffer_to_gf;         // 0 none, 1 Last to GF, 2 ARF to GF
-    int copy_buffer_to_arf;        // 0 none, 1 Last to ARF, 2 GF to ARF
+    int copy_buffer_to_gf;         /* 0 none, 1 Last to GF, 2 ARF to GF */
+    int copy_buffer_to_arf;        /* 0 none, 1 Last to ARF, 2 GF to ARF */

-    int refresh_entropy_probs;    // Two state 0 = NO, 1 = YES
+    int refresh_entropy_probs;    /* Two state 0 = NO, 1 = YES */

-    int ref_frame_sign_bias[MAX_REF_FRAMES];    // Two state 0, 1
+    int ref_frame_sign_bias[MAX_REF_FRAMES];    /* Two state 0, 1 */

-    // Y,U,V,Y2
-    ENTROPY_CONTEXT_PLANES *above_context;   // row of context for each plane
-    ENTROPY_CONTEXT_PLANES left_context;  // (up to) 4 contexts ""
+    /* Y,U,V,Y2 */
+    ENTROPY_CONTEXT_PLANES *above_context;   /* row of context for each plane */
+    ENTROPY_CONTEXT_PLANES left_context;  /* (up to) 4 contexts "" */


-    // keyframe block modes are predicted by their above, left neighbors
+    /* keyframe block modes are predicted by their above, left neighbors */

    vp8_prob kf_bmode_prob [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1];
    vp8_prob kf_ymode_prob [VP8_YMODES-1];  /* keyframe "" */
    vp8_prob kf_uv_mode_prob [VP8_UV_MODES-1];


-    FRAME_CONTEXT lfc; // last frame entropy
-    FRAME_CONTEXT fc;  // this frame entropy
+    FRAME_CONTEXT lfc; /* last frame entropy */
+    FRAME_CONTEXT fc;  /* this frame entropy */

    unsigned int current_video_frame;

--- a/media/libvpx/vp8/common/onyxd.h
+++ b/media/libvpx/vp8/common/onyxd.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
--- a/media/libvpx/vp8/common/postproc.c
+++ b/media/libvpx/vp8/common/postproc.c
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -19,7 +19,35 @@
 #include <math.h>
 #include <stdlib.h>
 #include <stdio.h>
-// global constants
+
+#define RGB_TO_YUV(t)                                                                       \
+    ( (0.257*(float)(t>>16)) + (0.504*(float)(t>>8&0xff)) + (0.098*(float)(t&0xff)) + 16),  \
+    (-(0.148*(float)(t>>16)) - (0.291*(float)(t>>8&0xff)) + (0.439*(float)(t&0xff)) + 128), \
+    ( (0.439*(float)(t>>16)) - (0.368*(float)(t>>8&0xff)) - (0.071*(float)(t&0xff)) + 128)
+
+/* global constants */
+
+static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
+{
+    { RGB_TO_YUV(0x98FB98) },   /* PaleGreen */
+    { RGB_TO_YUV(0x00FF00) },   /* Green */
+    { RGB_TO_YUV(0xADFF2F) },   /* GreenYellow */
+    { RGB_TO_YUV(0x228B22) },   /* ForestGreen */
+    { RGB_TO_YUV(0x006400) },   /* DarkGreen */
+    { RGB_TO_YUV(0x98F5FF) },   /* Cadet Blue */
+    { RGB_TO_YUV(0x6CA6CD) },   /* Sky Blue */
+    { RGB_TO_YUV(0x00008B) },   /* Dark blue */
+    { RGB_TO_YUV(0x551A8B) },   /* Purple */
+    { RGB_TO_YUV(0xFF0000) }    /* Red */
+};
+
+static const unsigned char MV_REFERENCE_FRAME_colors[MB_MODE_COUNT][3] =
+{
+    { RGB_TO_YUV(0x00ff00) },   /* Blue */
+    { RGB_TO_YUV(0x0000ff) },   /* Green */
+    { RGB_TO_YUV(0xffff00) },   /* Yellow */
+    { RGB_TO_YUV(0xff0000) },   /* Red */
+};

 static const short kernel5[] =
 {
@ -76,7 +104,7 @@ const short vp8_rv[] =


 extern void vp8_blit_text(const char *msg, unsigned char *address, const int pitch);
-
+extern void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch);
 /***********************************************************************************************************
 */
 void vp8_post_proc_down_and_across_c
@ -101,7 +129,7 @@ void vp8_post_proc_down_and_across_c

    for (row = 0; row < rows; row++)
    {
-        // post_proc_down for one row
+        /* post_proc_down for one row */
        p_src = src_ptr;
        p_dst = dst_ptr;

@ -124,7 +152,7 @@ void vp8_post_proc_down_and_across_c
            p_dst[col] = v;
        }

-        // now post_proc_across
+        /* now post_proc_across */
        p_src = dst_ptr;
        p_dst = dst_ptr;

@ -153,12 +181,12 @@ void vp8_post_proc_down_and_across_c
                p_dst[col-2] = d[(col-2)&7];
        }

-        //handle the last two pixels
+        /* handle the last two pixels */
        p_dst[col-2] = d[(col-2)&7];
        p_dst[col-1] = d[(col-1)&7];


-        //next row
+        /* next row */
        src_ptr += pitch;
        dst_ptr += pitch;
    }
@ -351,9 +379,9 @@ static void fillrd(struct postproc_state *state, int q, int a)

    sigma = ai + .5 + .6 * (63 - qi) / 63.0;

-    // set up a lookup table of 256 entries that matches
-    // a gaussian distribution with sigma determined by q.
-    //
+    /* set up a lookup table of 256 entries that matches
+     * a gaussian distribution with sigma determined by q.
+     */
    {
        double i;
        int next, j;
@ -444,6 +472,89 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
    }
 }

+/* Blend the macro block with a solid colored square.  Leave the
+ * edges unblended to give distinction to macro blocks in areas
+ * filled with the same color block.
+ */
+void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
+                        int y1, int u1, int v1, int alpha, int stride)
+{
+    int i, j;
+    int y1_const = y1*((1<<16)-alpha);
+    int u1_const = u1*((1<<16)-alpha);
+    int v1_const = v1*((1<<16)-alpha);
+
+    y += stride + 2;
+    for (i = 0; i < 14; i++)
+    {
+        for (j = 0; j < 14; j++)
+        {
+            y[j] = (y[j]*alpha + y1_const)>>16;
+        }
+        y += stride;
+    }
+
+    stride >>= 1;
+
+    u += stride + 1;
+    v += stride + 1;
+
+    for (i = 0; i < 6; i++)
+    {
+        for (j = 0; j < 6; j++)
+        {
+            u[j] = (u[j]*alpha + u1_const)>>16;
+            v[j] = (v[j]*alpha + v1_const)>>16;
+        }
+        u += stride;
+        v += stride;
+    }
+}
+
+static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int height)
+{
+    int dx;
+    int dy;
+
+    if (*x1 > width)
+    {
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+
+        *x1 = width;
+        if (dy)
+            *y1 = ((width-x0)*dy)/dx + y0;
+    }
+    if (*x1 < 0)
+    {
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+
+        *x1 = 0;
+        if (dy)
+            *y1 = ((0-x0)*dy)/dx + y0;
+    }
+    if (*y1 > height)
+    {
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+
+        *y1 = height;
+        if (dx)
+            *x1 = ((height-y0)*dx)/dy + x0;
+    }
+    if (*y1 < 0)
+    {
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+
+        *y1 = 0;
+        if (dx)
+            *x1 = ((0-y0)*dx)/dy + x0;
+    }
+}
+
+
 #if CONFIG_RUNTIME_CPU_DETECT
 #define RTCD_VTABLE(oci) (&(oci)->rtcd.postproc)
 #else
@ -465,7 +576,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
    {
        *dest = *oci->frame_to_show;

-        // handle problem with extending borders
+        /* handle problem with extending borders */
        dest->y_width = oci->Width;
        dest->y_height = oci->Height;
        dest->uv_height = dest->y_height / 2;
@ -521,7 +632,8 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                oci->mb_cols, oci->mb_rows);
        vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
    }
-    else if (flags & VP8D_DEBUG_LEVEL2)
+
+    if (flags & VP8D_DEBUG_LEVEL2)
    {
        int i, j;
        unsigned char *y_ptr;
@ -533,7 +645,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l

        y_ptr = post->y_buffer + 4 * post->y_stride + 4;

-        // vp8_filter each macro block
+        /* vp8_filter each macro block */
        for (i = 0; i < mb_rows; i++)
        {
            for (j = 0; j < mb_cols; j++)
@ -547,12 +659,13 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                y_ptr += 16;
            }

-            mb_index ++; //border
+            mb_index ++; /* border */
            y_ptr += post->y_stride  * 16 - post->y_width;

        }
    }
-    else if (flags & VP8D_DEBUG_LEVEL3)
+
+    if (flags & VP8D_DEBUG_LEVEL3)
    {
        int i, j;
        unsigned char *y_ptr;
@ -564,7 +677,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l

        y_ptr = post->y_buffer + 4 * post->y_stride + 4;

-        // vp8_filter each macro block
+        /* vp8_filter each macro block */
        for (i = 0; i < mb_rows; i++)
        {
            for (j = 0; j < mb_cols; j++)
@ -581,12 +694,13 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                y_ptr += 16;
            }

-            mb_index ++; //border
+            mb_index ++; /* border */
            y_ptr += post->y_stride  * 16 - post->y_width;

        }
    }
-    else if (flags & VP8D_DEBUG_LEVEL4)
+
+    if (flags & VP8D_DEBUG_LEVEL4)
    {
        sprintf(message, "Bitrate: %10.2f frame_rate: %10.2f ", oci->bitrate, oci->framerate);
        vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
@ -601,7 +715,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l

        y_ptr = post->y_buffer + 4 * post->y_stride + 4;

-        // vp8_filter each macro block
+        /* vp8_filter each macro block */
        for (i = 0; i < mb_rows; i++)
        {
            for (j = 0; j < mb_cols; j++)
@ -614,7 +728,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                y_ptr += 16;
            }

-            mb_index ++; //border
+            mb_index ++; /* border */
            y_ptr += post->y_stride  * 16 - post->y_width;

        }
@ -623,11 +737,122 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l

    }

+    /* Draw motion vectors */
+    if (flags & VP8D_DEBUG_LEVEL5)
+    {
+        YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
+        int width  = post->y_width;
+        int height = post->y_height;
+        int mb_cols = width  >> 4;
+        unsigned char *y_buffer = oci->post_proc_buffer.y_buffer;
+        int y_stride = oci->post_proc_buffer.y_stride;
+        MODE_INFO *mi = oci->mi;
+        int x0, y0;

+        for (y0 = 8; y0 < (height + 8); y0 += 16)
+        {
+            for (x0 = 8; x0 < (width + 8); x0 += 16)
+            {
+               int x1, y1;
+               if (mi->mbmi.mode >= NEARESTMV)
+                {
+                    MV *mv = &mi->mbmi.mv.as_mv;
+
+                    x1 = x0 + (mv->col >> 3);
+                    y1 = y0 + (mv->row >> 3);
+
+                    if (x1 != x0 && y1 != y0)
+                    {
+                        constrain_line (x0, &x1, y0-1, &y1, width, height);
+                        vp8_blit_line  (x0,  x1, y0-1,  y1, y_buffer, y_stride);
+
+                        constrain_line (x0, &x1, y0+1, &y1, width, height);
+                        vp8_blit_line  (x0,  x1, y0+1,  y1, y_buffer, y_stride);
+                    }
+                    else
+                        vp8_blit_line  (x0,  x1, y0,  y1, y_buffer, y_stride);
+                }
+                mi++;
+            }
+            mi++;
+        }
+    }
+
+    /* Color in block modes */
+    if (flags & VP8D_DEBUG_LEVEL6)
+    {
+        int i, j;
+        YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
+        int width  = post->y_width;
+        int height = post->y_height;
+        unsigned char *y_ptr = oci->post_proc_buffer.y_buffer;
+        unsigned char *u_ptr = oci->post_proc_buffer.u_buffer;
+        unsigned char *v_ptr = oci->post_proc_buffer.v_buffer;
+        int y_stride = oci->post_proc_buffer.y_stride;
+        MODE_INFO *mi = oci->mi;
+
+        for (i = 0; i < height; i += 16)
+        {
+            for (j = 0; j < width; j += 16)
+            {
+                int Y = 0, U = 0, V = 0;
+
+                Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0];
+                U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
+                V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2];
+
+                POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb)
+                    (&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride);
+
+                mi++;
+            }
+            y_ptr += y_stride*16;
+            u_ptr += y_stride*4;
+            v_ptr += y_stride*4;
+
+            mi++;
+        }
+    }
+
+    /* Color in frame reference blocks */
+    if (flags & VP8D_DEBUG_LEVEL7)
+    {
+        int i, j;
+        YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
+        int width  = post->y_width;
+        int height = post->y_height;
+        unsigned char *y_ptr = oci->post_proc_buffer.y_buffer;
+        unsigned char *u_ptr = oci->post_proc_buffer.u_buffer;
+        unsigned char *v_ptr = oci->post_proc_buffer.v_buffer;
+        int y_stride = oci->post_proc_buffer.y_stride;
+        MODE_INFO *mi = oci->mi;
+
+        for (i = 0; i < height; i += 16)
+        {
+            for (j = 0; j < width; j +=16)
+            {
+                int Y = 0, U = 0, V = 0;
+
+                Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0];
+                U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
+                V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
+
+                POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb)
+                    (&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride);
+
+                mi++;
+            }
+            y_ptr += y_stride*16;
+            u_ptr += y_stride*4;
+            v_ptr += y_stride*4;
+
+            mi++;
+        }
+    }

    *dest = oci->post_proc_buffer;

-    // handle problem with extending borders
+    /* handle problem with extending borders */
    dest->y_width = oci->Width;
    dest->y_height = oci->Height;
    dest->uv_height = dest->y_height / 2;
--- a/media/libvpx/vp8/common/postproc.h
+++ b/media/libvpx/vp8/common/postproc.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -24,6 +24,10 @@
              char whiteclamp[16], char bothclamp[16],\
              unsigned int w, unsigned int h, int pitch)

+#define prototype_postproc_blend_mb(sym)\
+    void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
+              int y1, int u1, int v1, int alpha, int stride)
+
 #if ARCH_X86 || ARCH_X86_64
 #include "x86/postproc_x86.h"
 #endif
@ -48,16 +52,22 @@ extern prototype_postproc(vp8_postproc_downacross);
 #endif
 extern prototype_postproc_addnoise(vp8_postproc_addnoise);

+#ifndef vp8_postproc_blend_mb
+#define vp8_postproc_blend_mb vp8_blend_mb_c
+#endif
+extern prototype_postproc_blend_mb(vp8_postproc_blend_mb);

 typedef prototype_postproc((*vp8_postproc_fn_t));
 typedef prototype_postproc_inplace((*vp8_postproc_inplace_fn_t));
 typedef prototype_postproc_addnoise((*vp8_postproc_addnoise_fn_t));
+typedef prototype_postproc_blend_mb((*vp8_postproc_blend_mb_fn_t));
 typedef struct
 {
    vp8_postproc_inplace_fn_t   down;
    vp8_postproc_inplace_fn_t   across;
    vp8_postproc_fn_t           downacross;
    vp8_postproc_addnoise_fn_t  addnoise;
+    vp8_postproc_blend_mb_fn_t  blend_mb;
 } vp8_postproc_rtcd_vtable_t;

 #if CONFIG_RUNTIME_CPU_DETECT
--- a/media/libvpx/vp8/common/ppflags.h
+++ b/media/libvpx/vp8/common/ppflags.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -14,13 +14,16 @@
 enum
 {
    VP8D_NOFILTERING    = 0,
-    VP8D_DEBLOCK        = 1,
-    VP8D_DEMACROBLOCK   = 2,
-    VP8D_ADDNOISE       = 4,
-    VP8D_DEBUG_LEVEL1   = 8,
-    VP8D_DEBUG_LEVEL2   = 16,
-    VP8D_DEBUG_LEVEL3   = 32,
-    VP8D_DEBUG_LEVEL4   = 64
+    VP8D_DEBLOCK        = 1<<0,
+    VP8D_DEMACROBLOCK   = 1<<1,
+    VP8D_ADDNOISE       = 1<<2,
+    VP8D_DEBUG_LEVEL1   = 1<<3,
+    VP8D_DEBUG_LEVEL2   = 1<<4,
+    VP8D_DEBUG_LEVEL3   = 1<<5,
+    VP8D_DEBUG_LEVEL4   = 1<<6,
+    VP8D_DEBUG_LEVEL5   = 1<<7,
+    VP8D_DEBUG_LEVEL6   = 1<<8,
+    VP8D_DEBUG_LEVEL7   = 1<<9
 };

 #endif
--- a/Показать больше
+++ b/Показать больше