Bug 1295886 - P14. Add arm neon and vfp optimized methods to ffmpeg. r=mshal

MozReview-Commit-ID: KMUZoDWoisi --HG-- extra : rebase_source : a08eae842680b9d475bfc0afd2d569533dba676b
2017-11-02 01:48:00 +01:00 · 2017-11-02 01:48:00 +01:00 · dae7070b4a
--- a/media/ffvpx/config.h
+++ b/media/ffvpx/config.h
@ -7,8 +7,12 @@
 #ifndef MOZ_FFVPX_CONFIG_H
 #define MOZ_FFVPX_CONFIG_H
 #if defined(MOZ_FFVPX_FLACONLY)
-#include "config_flac.h"
+#if defined(MOZ_WIDGET_ANDROID)
+#include "config_android32.h"
 #else
+#include "config_flac.h"
+#endif
+#else // MOZ_FFVPX_FLACONLY
 #if defined(XP_WIN)
 // Avoid conflicts with mozilla-config.h
 #if !defined(_MSC_VER)
@ -38,6 +42,6 @@
 #include "config_unix32.h"
 #endif
 #endif
-#endif // MOZ_FFVPX_FLACONLY
+#endif // else MOZ_FFVPX_FLACONLY
 #include "config_common.h"
 #endif // MOZ_FFVPX_CONFIG_H
--- a/media/ffvpx/config_android32.h
+++ b/media/ffvpx/config_android32.h
--- a/media/ffvpx/ffvpxcommon.mozbuild
+++ b/media/ffvpx/ffvpxcommon.mozbuild
@ -9,9 +9,8 @@ ASFLAGS += CONFIG['FFVPX_ASFLAGS']
 ASFLAGS += ['-I%s/media/ffvpx' % TOPSRCDIR]

 if CONFIG['FFVPX_ASFLAGS']:
-    USE_YASM = True
-
    if CONFIG['OS_ARCH'] == 'WINNT':
+        USE_YASM = True
       # Fix inline symbols and math defines for windows.
        DEFINES['_USE_MATH_DEFINES'] = True
        DEFINES['inline'] = "__inline"
@ -22,24 +21,27 @@ if CONFIG['FFVPX_ASFLAGS']:
        else:
            ASFLAGS += ['-Pconfig_win64.asm']
    elif CONFIG['OS_ARCH'] == 'Darwin':
+        USE_YASM = True
        # 32/64-bit macosx assemblers need to prefix symbols with an underscore.
        ASFLAGS += [
            '-Pconfig_darwin64.asm',
            '-DPREFIX'
        ]
-    else:
+    elif CONFIG['CPU_ARCH'] != 'arm':
+        USE_YASM = True
        # Default to unix, similar to how ASFLAGS setup works in configure.in
        ASFLAGS += ['-Pconfig_unix64.asm']
-    # default disabled components
-    ASFLAGS += ['-Pdefaults_disabled.asm']

-    if int(CONFIG['YASM_MAJOR_VERSION']) == 1 and int(CONFIG['YASM_MINOR_VERSION']) < 2:
-        DEFINES['YASM_MISSING_AVX2'] = True
-        ASFLAGS += [
-            '-DHAVE_AVX2=0',
-            '-DHAVE_AVX2_INTERNAL=0',
-            '-DHAVE_AVX2_EXTERNAL=0',
-        ]
+    if USE_YASM:
+        # default disabled components
+        ASFLAGS += ['-Pdefaults_disabled.asm']
+        if int(CONFIG['YASM_MAJOR_VERSION']) == 1 and int(CONFIG['YASM_MINOR_VERSION']) < 2:
+            DEFINES['YASM_MISSING_AVX2'] = True
+            ASFLAGS += [
+                '-DHAVE_AVX2=0',
+                '-DHAVE_AVX2_INTERNAL=0',
+                '-DHAVE_AVX2_EXTERNAL=0',
+            ]


 LOCAL_INCLUDES += ['/media/ffvpx']
--- a/media/ffvpx/libavcodec/arm/flacdsp_arm.S
+++ b/media/ffvpx/libavcodec/arm/flacdsp_arm.S
@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2012 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+function flac_lpc_16_1_arm
+        ldr             r12, [sp]
+        push            {r4, lr}
+        ldr             r1,  [r1]
+        subs            r12, r12, #2
+        ldr             lr,  [r0], #4
+        beq             2f
+        it              lt
+        poplt           {r4, pc}
+1:
+        mul             r4,  lr,  r1
+        ldm             r0,  {r2, lr}
+        add_sh          r2,  r2,  r4,  asr r3
+        mul             r4,  r2,  r1
+        subs            r12, r12, #2
+        add_sh          lr,  lr,  r4,  asr r3
+        stm             r0!, {r2, lr}
+        bgt             1b
+        it              lt
+        poplt           {r4, pc}
+2:
+        mul             r4,  lr,  r1
+        ldr             r2,  [r0]
+        add_sh          r2,  r2,  r4,  asr r3
+        str             r2,  [r0]
+        pop             {r4, pc}
+endfunc
+
+function flac_lpc_16_2_arm
+        ldr             r12, [sp]
+        subs            r12, r12, r2
+        it              le
+        bxle            lr
+
+        push            {r4-r9, lr}
+        ldm             r0!, {r6, r7}
+        ldm             r1,  {r8, r9}
+        subs            r12, r12, #1
+        beq             2f
+1:
+        mul             r4,  r6,  r8
+        mul             r5,  r7,  r8
+        mla             r4,  r7,  r9,  r4
+        ldm             r0,  {r6, r7}
+        add_sh          r6,  r6,  r4,  asr r3
+        mla             r5,  r6,  r9,  r5
+        add_sh          r7,  r7,  r5,  asr r3
+        stm             r0!, {r6, r7}
+        subs            r12, r12, #2
+        bgt             1b
+        it              lt
+        poplt           {r4-r9, pc}
+2:
+        mul             r4,  r6,  r8
+        mla             r4,  r7,  r9,  r4
+        ldr             r5,  [r0]
+        add_sh          r5,  r5,  r4,  asr r3
+        str             r5,  [r0]
+        pop             {r4-r9, pc}
+endfunc
+
+function ff_flac_lpc_16_arm, export=1
+        cmp             r2,  #2
+        blt             flac_lpc_16_1_arm
+        beq             flac_lpc_16_2_arm
+
+        ldr             r12, [sp]
+        subs            r12, r12, r2
+        it              le
+        bxle            lr
+
+        push            {r4-r9, lr}
+
+        subs            r12, r12, #1
+        beq             3f
+1:
+        sub             lr,  r2,  #2
+        mov             r4,  #0
+        mov             r5,  #0
+
+        ldr             r7,  [r0], #4
+        ldr             r9,  [r1], #4
+2:
+        mla             r4,  r7,  r9,  r4
+        ldm             r0!, {r6, r7}
+        mla             r5,  r6,  r9,  r5
+        ldm             r1!, {r8, r9}
+        mla             r4,  r6,  r8,  r4
+        subs            lr,  lr,  #2
+        mla             r5,  r7,  r8,  r5
+        bgt             2b
+        blt             6f
+
+        mla             r4,  r7,  r9,  r4
+        ldr             r7,  [r0], #4
+        mla             r5,  r7,  r9,  r5
+        ldr             r9,  [r1], #4
+6:
+        mla             r4,  r7,  r9,  r4
+        ldm             r0,  {r6, r7}
+        add_sh          r6,  r6,  r4,  asr r3
+        mla             r5,  r6,  r9,  r5
+        add_sh          r7,  r7,  r5,  asr r3
+        stm             r0!, {r6, r7}
+        sub             r0,  r0,  r2,  lsl #2
+        sub             r1,  r1,  r2,  lsl #2
+
+        subs            r12, r12, #2
+        bgt             1b
+        it              lt
+        poplt           {r4-r9, pc}
+3:
+        mov             r4,  #0
+4:
+        ldr             r5,  [r1], #4
+        ldr             r6,  [r0], #4
+        mla             r4,  r5,  r6,  r4
+        subs            r2,  r2,  #1
+        bgt             4b
+        ldr             r5,  [r0]
+        add_sh          r5,  r5,  r4,  asr r3
+        str             r5,  [r0]
+        pop             {r4-r9, pc}
+endfunc
--- a/media/ffvpx/libavcodec/arm/flacdsp_init_arm.c
+++ b/media/ffvpx/libavcodec/arm/flacdsp_init_arm.c
@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2012 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/flacdsp.h"
+#include "config.h"
+
+void ff_flac_lpc_16_arm(int32_t *samples, const int coeffs[32], int order,
+                        int qlevel, int len);
+
+av_cold void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int channels,
+                                 int bps)
+{
+    if (CONFIG_FLAC_DECODER)
+        c->lpc16 = ff_flac_lpc_16_arm;
+}
--- a/media/ffvpx/libavcodec/arm/mathops.h
+++ b/media/ffvpx/libavcodec/arm/mathops.h
@ -0,0 +1,108 @@
+/*
+ * simple math operations
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ARM_MATHOPS_H
+#define AVCODEC_ARM_MATHOPS_H
+
+#include <stdint.h>
+#include "config.h"
+#include "libavutil/common.h"
+
+#if HAVE_INLINE_ASM
+
+#if HAVE_ARMV6_INLINE
+#define MULH MULH
+static inline av_const int MULH(int a, int b)
+{
+    int r;
+    __asm__ ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
+    return r;
+}
+
+#define FASTDIV FASTDIV
+static av_always_inline av_const int FASTDIV(int a, int b)
+{
+    int r;
+    __asm__ ("cmp     %2, #2               \n\t"
+             "ldr     %0, [%3, %2, lsl #2] \n\t"
+             "ite     le                   \n\t"
+             "lsrle   %0, %1, #1           \n\t"
+             "smmulgt %0, %0, %1           \n\t"
+             : "=&r"(r) : "r"(a), "r"(b), "r"(ff_inverse) : "cc");
+    return r;
+}
+
+#else /* HAVE_ARMV6_INLINE */
+
+#define FASTDIV FASTDIV
+static av_always_inline av_const int FASTDIV(int a, int b)
+{
+    int r, t;
+    __asm__ ("umull %1, %0, %2, %3"
+             : "=&r"(r), "=&r"(t) : "r"(a), "r"(ff_inverse[b]));
+    return r;
+}
+#endif
+
+#define MLS64(d, a, b) MAC64(d, -(a), b)
+
+#if HAVE_ARMV5TE_INLINE
+
+/* signed 16x16 -> 32 multiply add accumulate */
+#   define MAC16(rt, ra, rb)                                            \
+    __asm__ ("smlabb %0, %1, %2, %0" : "+r"(rt) : "r"(ra), "r"(rb));
+
+/* signed 16x16 -> 32 multiply */
+#   define MUL16 MUL16
+static inline av_const int MUL16(int ra, int rb)
+{
+    int rt;
+    __asm__ ("smulbb %0, %1, %2" : "=r"(rt) : "r"(ra), "r"(rb));
+    return rt;
+}
+
+#endif
+
+#define mid_pred mid_pred
+static inline av_const int mid_pred(int a, int b, int c)
+{
+    int m;
+    __asm__ (
+        "mov   %0, %2  \n\t"
+        "cmp   %1, %2  \n\t"
+        "itt   gt      \n\t"
+        "movgt %0, %1  \n\t"
+        "movgt %1, %2  \n\t"
+        "cmp   %1, %3  \n\t"
+        "it    le      \n\t"
+        "movle %1, %3  \n\t"
+        "cmp   %0, %1  \n\t"
+        "it    gt      \n\t"
+        "movgt %0, %1  \n\t"
+        : "=&r"(m), "+r"(a)
+        : "r"(b), "r"(c)
+        : "cc");
+    return m;
+}
+
+#endif /* HAVE_INLINE_ASM */
+
+#endif /* AVCODEC_ARM_MATHOPS_H */
--- a/media/ffvpx/libavcodec/arm/moz.build
+++ b/media/ffvpx/libavcodec/arm/moz.build
@ -0,0 +1,14 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+SOURCES += [
+    'flacdsp_arm.S',
+    'flacdsp_init_arm.c',
+]
+
+FINAL_LIBRARY = 'mozavcodec'
+
+include('/media/ffvpx/ffvpxcommon.mozbuild')
--- a/media/ffvpx/libavcodec/dummy_funcs.c
+++ b/media/ffvpx/libavcodec/dummy_funcs.c
@ -879,7 +879,9 @@ void ff_vp8dsp_init_mips(VP8DSPContext *c) {}
 void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp) {}
 void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp) {}
 void ff_vp9dsp_init_arm(VP9DSPContext *dsp, int bpp) {}
+#if !defined(__arm__)
 void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, int bps) {}
+#endif
 #if !defined(HAVE_64BIT_BUILD)
 void ff_flac_decorrelate_indep8_16_sse2(uint8_t **out, int32_t **in, int channels, int len, int shift) {}
 void ff_flac_decorrelate_indep8_32_avx(uint8_t **out, int32_t **in, int channels, int len, int shift) {}
--- a/media/ffvpx/libavcodec/moz.build
+++ b/media/ffvpx/libavcodec/moz.build
@ -7,7 +7,10 @@
 # Due to duplicate file names, we compile libavutil/x86 in its own
 # moz.build file.
 if CONFIG['FFVPX_ASFLAGS']:
-    DIRS += ['x86']
+    if CONFIG['CPU_ARCH'] == 'x86' or CONFIG['CPU_ARCH'] == 'x86_64':
+        DIRS += ['x86']
+    elif CONFIG['CPU_ARCH'] == 'arm':
+        DIRS += ['arm']

 SharedLibrary('mozavcodec')
 SOURCES += [
--- a/media/ffvpx/libavutil/arm/asm.S
+++ b/media/ffvpx/libavutil/arm/asm.S
@ -0,0 +1,349 @@
+/*
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#ifdef __ELF__
+#   define ELF
+#else
+#   define ELF @
+#endif
+
+#if CONFIG_THUMB
+#   define A @
+#   define T
+#else
+#   define A
+#   define T @
+#endif
+
+#if HAVE_AS_FUNC
+#   define FUNC
+#else
+#   define FUNC @
+#endif
+
+#if HAVE_AS_FPU_DIRECTIVE
+#   define FPU
+#else
+#   define FPU @
+#endif
+
+#if   HAVE_NEON
+        .arch           armv7-a
+#elif HAVE_ARMV6T2
+        .arch           armv6t2
+#elif HAVE_ARMV6
+        .arch           armv6
+#elif HAVE_ARMV5TE
+        .arch           armv5te
+#endif
+#if   HAVE_AS_OBJECT_ARCH
+ELF     .object_arch    armv4
+#endif
+
+#if   HAVE_NEON
+FPU     .fpu            neon
+ELF     .eabi_attribute 10, 0           @ suppress Tag_FP_arch
+ELF     .eabi_attribute 12, 0           @ suppress Tag_Advanced_SIMD_arch
+#elif HAVE_VFP
+FPU     .fpu            vfp
+ELF     .eabi_attribute 10, 0           @ suppress Tag_FP_arch
+#endif
+
+        .syntax unified
+T       .thumb
+ELF     .eabi_attribute 25, 1           @ Tag_ABI_align_preserved
+ELF     .section .note.GNU-stack,"",%progbits @ Mark stack as non-executable
+
+.macro  function name, export=0, align=2
+        .set            .Lpic_idx, 0
+        .set            .Lpic_gp, 0
+    .macro endfunc
+      .if .Lpic_idx
+        .align          2
+        .altmacro
+        put_pic         %(.Lpic_idx - 1)
+        .noaltmacro
+      .endif
+      .if .Lpic_gp
+        .unreq          gp
+      .endif
+ELF     .size   \name, . - \name
+FUNC    .endfunc
+        .purgem endfunc
+    .endm
+        .text
+        .align          \align
+    .if \export
+        .global EXTERN_ASM\name
+ELF     .type   EXTERN_ASM\name, %function
+FUNC    .func   EXTERN_ASM\name
+EXTERN_ASM\name:
+    .else
+ELF     .type   \name, %function
+FUNC    .func   \name
+\name:
+    .endif
+.endm
+
+.macro  const   name, align=2, relocate=0
+    .macro endconst
+ELF     .size   \name, . - \name
+        .purgem endconst
+    .endm
+.if HAVE_SECTION_DATA_REL_RO && \relocate
+        .section        .data.rel.ro
+.else
+        .section        .rodata
+.endif
+        .align          \align
+\name:
+.endm
+
+#if !HAVE_ARMV6T2_EXTERNAL
+.macro  movw    rd, val
+        mov     \rd, \val &  255
+        orr     \rd, \val & ~255
+.endm
+#endif
+
+.macro  mov32   rd, val
+#if HAVE_ARMV6T2_EXTERNAL
+        movw            \rd, #(\val) & 0xffff
+    .if (\val) >> 16
+        movt            \rd, #(\val) >> 16
+    .endif
+#else
+        ldr             \rd, =\val
+#endif
+.endm
+
+.macro  put_pic         num
+        put_pic_\num
+.endm
+
+.macro  do_def_pic      num, val, label
+    .macro put_pic_\num
+      .if \num
+        .altmacro
+        put_pic         %(\num - 1)
+        .noaltmacro
+      .endif
+\label: .word           \val
+        .purgem         put_pic_\num
+    .endm
+.endm
+
+.macro  def_pic         val, label
+        .altmacro
+        do_def_pic      %.Lpic_idx, \val, \label
+        .noaltmacro
+        .set            .Lpic_idx, .Lpic_idx + 1
+.endm
+
+.macro  ldpic           rd,  val, indir=0
+        ldr             \rd, .Lpicoff\@
+.Lpic\@:
+    .if \indir
+A       ldr             \rd, [pc, \rd]
+T       add             \rd, pc
+T       ldr             \rd, [\rd]
+    .else
+        add             \rd, pc
+    .endif
+        def_pic         \val - (.Lpic\@ + (8 >> CONFIG_THUMB)), .Lpicoff\@
+.endm
+
+.macro  movrel rd, val
+#if CONFIG_PIC
+        ldpic           \rd, \val
+#elif HAVE_ARMV6T2_EXTERNAL && !defined(__APPLE__)
+        movw            \rd, #:lower16:\val
+        movt            \rd, #:upper16:\val
+#else
+        ldr             \rd, =\val
+#endif
+.endm
+
+.macro  movrelx         rd,  val, gp
+    .ifc \rd,\gp
+        .error      "movrelx needs two distinct registers"
+    .endif
+    .ifc \rd\()_\gp,r12_
+        .warning    "movrelx rd=\rd without explicit set gp"
+    .endif
+    .ifc \rd\()_\gp,ip_
+        .warning    "movrelx rd=\rd without explicit set gp"
+    .endif
+#if CONFIG_PIC && defined(__ELF__)
+    .ifnb \gp
+      .if .Lpic_gp
+        .unreq          gp
+      .endif
+        gp      .req    \gp
+        ldpic           gp,  _GLOBAL_OFFSET_TABLE_
+    .elseif !.Lpic_gp
+        gp      .req    r12
+        ldpic           gp,  _GLOBAL_OFFSET_TABLE_
+    .endif
+        .set            .Lpic_gp, 1
+        ldr             \rd, .Lpicoff\@
+        ldr             \rd, [gp, \rd]
+        def_pic         \val(GOT), .Lpicoff\@
+#elif CONFIG_PIC && defined(__APPLE__)
+        ldpic           \rd, .Lpic\@, indir=1
+        .non_lazy_symbol_pointer
+.Lpic\@:
+        .indirect_symbol \val
+        .word           0
+        .text
+#else
+        movrel          \rd, \val
+#endif
+.endm
+
+.macro  add_sh          rd,  rn,  rm,  sh:vararg
+A       add             \rd, \rn, \rm, \sh
+T       mov             \rm, \rm, \sh
+T       add             \rd, \rn, \rm
+.endm
+
+.macro  ldr_pre         rt,  rn,  rm:vararg
+A       ldr             \rt, [\rn, \rm]!
+T       add             \rn, \rn, \rm
+T       ldr             \rt, [\rn]
+.endm
+
+.macro  ldr_dpre        rt,  rn,  rm:vararg
+A       ldr             \rt, [\rn, -\rm]!
+T       sub             \rn, \rn, \rm
+T       ldr             \rt, [\rn]
+.endm
+
+.macro  ldr_nreg        rt,  rn,  rm:vararg
+A       ldr             \rt, [\rn, -\rm]
+T       sub             \rt, \rn, \rm
+T       ldr             \rt, [\rt]
+.endm
+
+.macro  ldr_post        rt,  rn,  rm:vararg
+A       ldr             \rt, [\rn], \rm
+T       ldr             \rt, [\rn]
+T       add             \rn, \rn, \rm
+.endm
+
+.macro  ldrc_pre        cc,  rt,  rn,  rm:vararg
+A       ldr\cc          \rt, [\rn, \rm]!
+T       itt             \cc
+T       add\cc          \rn, \rn, \rm
+T       ldr\cc          \rt, [\rn]
+.endm
+
+.macro  ldrd_reg        rt,  rt2, rn,  rm
+A       ldrd            \rt, \rt2, [\rn, \rm]
+T       add             \rt, \rn, \rm
+T       ldrd            \rt, \rt2, [\rt]
+.endm
+
+.macro  ldrd_post       rt,  rt2, rn,  rm
+A       ldrd            \rt, \rt2, [\rn], \rm
+T       ldrd            \rt, \rt2, [\rn]
+T       add             \rn, \rn, \rm
+.endm
+
+.macro  ldrh_pre        rt,  rn,  rm
+A       ldrh            \rt, [\rn, \rm]!
+T       add             \rn, \rn, \rm
+T       ldrh            \rt, [\rn]
+.endm
+
+.macro  ldrh_dpre       rt,  rn,  rm
+A       ldrh            \rt, [\rn, -\rm]!
+T       sub             \rn, \rn, \rm
+T       ldrh            \rt, [\rn]
+.endm
+
+.macro  ldrh_post       rt,  rn,  rm
+A       ldrh            \rt, [\rn], \rm
+T       ldrh            \rt, [\rn]
+T       add             \rn, \rn, \rm
+.endm
+
+.macro  ldrb_post       rt,  rn,  rm
+A       ldrb            \rt, [\rn], \rm
+T       ldrb            \rt, [\rn]
+T       add             \rn, \rn, \rm
+.endm
+
+.macro  str_post       rt,  rn,  rm:vararg
+A       str             \rt, [\rn], \rm
+T       str             \rt, [\rn]
+T       add             \rn, \rn, \rm
+.endm
+
+.macro  strb_post       rt,  rn,  rm:vararg
+A       strb            \rt, [\rn], \rm
+T       strb            \rt, [\rn]
+T       add             \rn, \rn, \rm
+.endm
+
+.macro  strd_post       rt,  rt2, rn,  rm
+A       strd            \rt, \rt2, [\rn], \rm
+T       strd            \rt, \rt2, [\rn]
+T       add             \rn, \rn, \rm
+.endm
+
+.macro  strh_pre        rt,  rn,  rm
+A       strh            \rt, [\rn, \rm]!
+T       add             \rn, \rn, \rm
+T       strh            \rt, [\rn]
+.endm
+
+.macro  strh_dpre       rt,  rn,  rm
+A       strh            \rt, [\rn, -\rm]!
+T       sub             \rn, \rn, \rm
+T       strh            \rt, [\rn]
+.endm
+
+.macro  strh_post       rt,  rn,  rm
+A       strh            \rt, [\rn], \rm
+T       strh            \rt, [\rn]
+T       add             \rn, \rn, \rm
+.endm
+
+.macro  strh_dpost       rt,  rn,  rm
+A       strh            \rt, [\rn], -\rm
+T       strh            \rt, [\rn]
+T       sub             \rn, \rn, \rm
+.endm
+
+#if HAVE_VFP_ARGS
+ELF     .eabi_attribute 28, 1
+#   define VFP
+#   define NOVFP @
+#else
+#   define VFP   @
+#   define NOVFP
+#endif
+
+#define GLUE(a, b) a ## b
+#define JOIN(a, b) GLUE(a, b)
+#define X(s) JOIN(EXTERN_ASM, s)
--- a/media/ffvpx/libavutil/arm/bswap.h
+++ b/media/ffvpx/libavutil/arm/bswap.h
@ -0,0 +1,67 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_ARM_BSWAP_H
+#define AVUTIL_ARM_BSWAP_H
+
+#include <stdint.h>
+#include "config.h"
+#include "libavutil/attributes.h"
+
+#ifdef __ARMCC_VERSION
+
+#if HAVE_ARMV6
+#define av_bswap32 av_bswap32
+static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
+{
+    return __rev(x);
+}
+#endif /* HAVE_ARMV6 */
+
+#elif HAVE_INLINE_ASM
+
+#if HAVE_ARMV6_INLINE
+#define av_bswap16 av_bswap16
+static av_always_inline av_const unsigned av_bswap16(unsigned x)
+{
+    __asm__("rev16 %0, %0" : "+r"(x));
+    return x;
+}
+#endif
+
+#if AV_GCC_VERSION_AT_MOST(4,4)
+#define av_bswap32 av_bswap32
+static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
+{
+#if HAVE_ARMV6_INLINE
+    __asm__("rev %0, %0" : "+r"(x));
+#else
+    uint32_t t;
+    __asm__ ("eor %1, %0, %0, ror #16 \n\t"
+             "bic %1, %1, #0xFF0000   \n\t"
+             "mov %0, %0, ror #8      \n\t"
+             "eor %0, %0, %1, lsr #8  \n\t"
+             : "+r"(x), "=&r"(t));
+#endif /* HAVE_ARMV6_INLINE */
+    return x;
+}
+#endif /* AV_GCC_VERSION_AT_MOST(4,4) */
+
+#endif /* __ARMCC_VERSION */
+
+#endif /* AVUTIL_ARM_BSWAP_H */
--- a/media/ffvpx/libavutil/arm/cpu.c
+++ b/media/ffvpx/libavutil/arm/cpu.c
@ -0,0 +1,170 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/cpu_internal.h"
+#include "config.h"
+
+#define CORE_FLAG(f) \
+    (AV_CPU_FLAG_ ## f * (HAVE_ ## f ## _EXTERNAL || HAVE_ ## f ## _INLINE))
+
+#define CORE_CPU_FLAGS                          \
+    (CORE_FLAG(ARMV5TE) |                       \
+     CORE_FLAG(ARMV6)   |                       \
+     CORE_FLAG(ARMV6T2) |                       \
+     CORE_FLAG(VFP)     |                       \
+     CORE_FLAG(VFPV3)   |                       \
+     CORE_FLAG(NEON))
+
+#if defined __linux__ || defined __ANDROID__
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include "libavutil/avstring.h"
+
+#define AT_HWCAP        16
+
+/* Relevant HWCAP values from kernel headers */
+#define HWCAP_VFP       (1 << 6)
+#define HWCAP_EDSP      (1 << 7)
+#define HWCAP_THUMBEE   (1 << 11)
+#define HWCAP_NEON      (1 << 12)
+#define HWCAP_VFPv3     (1 << 13)
+#define HWCAP_TLS       (1 << 15)
+
+static int get_hwcap(uint32_t *hwcap)
+{
+    struct { uint32_t a_type; uint32_t a_val; } auxv;
+    FILE *f = fopen("/proc/self/auxv", "r");
+    int err = -1;
+
+    if (!f)
+        return -1;
+
+    while (fread(&auxv, sizeof(auxv), 1, f) > 0) {
+        if (auxv.a_type == AT_HWCAP) {
+            *hwcap = auxv.a_val;
+            err = 0;
+            break;
+        }
+    }
+
+    fclose(f);
+    return err;
+}
+
+static int get_cpuinfo(uint32_t *hwcap)
+{
+    FILE *f = fopen("/proc/cpuinfo", "r");
+    char buf[200];
+
+    if (!f)
+        return -1;
+
+    *hwcap = 0;
+    while (fgets(buf, sizeof(buf), f)) {
+        if (av_strstart(buf, "Features", NULL)) {
+            if (strstr(buf, " edsp "))
+                *hwcap |= HWCAP_EDSP;
+            if (strstr(buf, " tls "))
+                *hwcap |= HWCAP_TLS;
+            if (strstr(buf, " thumbee "))
+                *hwcap |= HWCAP_THUMBEE;
+            if (strstr(buf, " vfp "))
+                *hwcap |= HWCAP_VFP;
+            if (strstr(buf, " vfpv3 "))
+                *hwcap |= HWCAP_VFPv3;
+            if (strstr(buf, " neon ") || strstr(buf, " asimd "))
+                *hwcap |= HWCAP_NEON;
+            if (strstr(buf, " fp ")) // Listed on 64 bit ARMv8 kernels
+                *hwcap |= HWCAP_VFP | HWCAP_VFPv3;
+            break;
+        }
+    }
+    fclose(f);
+    return 0;
+}
+
+int ff_get_cpu_flags_arm(void)
+{
+    int flags = CORE_CPU_FLAGS;
+    uint32_t hwcap;
+
+    if (get_hwcap(&hwcap) < 0)
+        if (get_cpuinfo(&hwcap) < 0)
+            return flags;
+
+#define check_cap(cap, flag) do {               \
+        if (hwcap & HWCAP_ ## cap)              \
+            flags |= AV_CPU_FLAG_ ## flag;      \
+    } while (0)
+
+    /* No flags explicitly indicate v6 or v6T2 so check others which
+       imply support. */
+    check_cap(EDSP,    ARMV5TE);
+    check_cap(TLS,     ARMV6);
+    check_cap(THUMBEE, ARMV6T2);
+    check_cap(VFP,     VFP);
+    check_cap(VFPv3,   VFPV3);
+    check_cap(NEON,    NEON);
+
+    /* The v6 checks above are not reliable so let higher flags
+       trickle down. */
+    if (flags & (AV_CPU_FLAG_VFPV3 | AV_CPU_FLAG_NEON))
+        flags |= AV_CPU_FLAG_ARMV6T2;
+    else if (flags & (AV_CPU_FLAG_ARMV6T2 | AV_CPU_FLAG_ARMV6))
+    /* Some functions use the 'setend' instruction which is deprecated on ARMv8
+     * and serializing on some ARMv7 cores. This ensures such functions
+     * are only enabled on ARMv6. */
+        flags |= AV_CPU_FLAG_SETEND;
+
+    if (flags & AV_CPU_FLAG_ARMV6T2)
+        flags |= AV_CPU_FLAG_ARMV6;
+
+    /* set the virtual VFPv2 vector mode flag */
+    if ((flags & AV_CPU_FLAG_VFP) && !(flags & (AV_CPU_FLAG_VFPV3 | AV_CPU_FLAG_NEON)))
+        flags |= AV_CPU_FLAG_VFP_VM;
+
+    return flags;
+}
+
+#else
+
+int ff_get_cpu_flags_arm(void)
+{
+    return AV_CPU_FLAG_ARMV5TE * HAVE_ARMV5TE |
+           AV_CPU_FLAG_ARMV6   * HAVE_ARMV6   |
+           AV_CPU_FLAG_ARMV6T2 * HAVE_ARMV6T2 |
+           AV_CPU_FLAG_VFP     * HAVE_VFP     |
+           AV_CPU_FLAG_VFPV3   * HAVE_VFPV3   |
+           AV_CPU_FLAG_NEON    * HAVE_NEON    |
+           AV_CPU_FLAG_SETEND  * !(HAVE_NEON | HAVE_VFPV3);
+}
+
+#endif
+
+size_t ff_get_cpu_max_align_arm(void)
+{
+    int flags = av_get_cpu_flags();
+
+    if (flags & AV_CPU_FLAG_NEON)
+        return 16;
+
+    return 8;
+}
--- a/media/ffvpx/libavutil/arm/cpu.h
+++ b/media/ffvpx/libavutil/arm/cpu.h
@ -0,0 +1,39 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_ARM_CPU_H
+#define AVUTIL_ARM_CPU_H
+
+#include "config.h"
+#include "libavutil/cpu.h"
+#include "libavutil/cpu_internal.h"
+
+#define have_armv5te(flags) CPUEXT(flags, ARMV5TE)
+#define have_armv6(flags)   CPUEXT(flags, ARMV6)
+#define have_armv6t2(flags) CPUEXT(flags, ARMV6T2)
+#define have_vfp(flags)     CPUEXT(flags, VFP)
+#define have_vfpv3(flags)   CPUEXT(flags, VFPV3)
+#define have_neon(flags)    CPUEXT(flags, NEON)
+#define have_setend(flags)  CPUEXT(flags, SETEND)
+
+/* some functions use the VFPv2 vector mode which is deprecated in ARMv7-A
+ * and might trap on such CPU depending on the OS configuration */
+#define have_vfp_vm(flags)                                              \
+    (HAVE_VFP && ((flags) & AV_CPU_FLAG_VFP_VM))
+
+#endif /* AVUTIL_ARM_CPU_H */
--- a/media/ffvpx/libavutil/arm/float_dsp_arm.h
+++ b/media/ffvpx/libavutil/arm/float_dsp_arm.h
@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_ARM_FLOAT_DSP_ARM_H
+#define AVUTIL_ARM_FLOAT_DSP_ARM_H
+
+#include "libavutil/float_dsp.h"
+
+void ff_float_dsp_init_vfp(AVFloatDSPContext *fdsp, int cpu_flags);
+void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp);
+
+#endif /* AVUTIL_ARM_FLOAT_DSP_ARM_H */
--- a/media/ffvpx/libavutil/arm/float_dsp_init_arm.c
+++ b/media/ffvpx/libavutil/arm/float_dsp_init_arm.c
@ -0,0 +1,32 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/float_dsp.h"
+#include "cpu.h"
+#include "float_dsp_arm.h"
+
+av_cold void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_vfp(cpu_flags))
+        ff_float_dsp_init_vfp(fdsp, cpu_flags);
+    if (have_neon(cpu_flags))
+        ff_float_dsp_init_neon(fdsp);
+}
--- a/media/ffvpx/libavutil/arm/float_dsp_init_neon.c
+++ b/media/ffvpx/libavutil/arm/float_dsp_init_neon.c
@ -0,0 +1,59 @@
+/*
+ * ARM NEON optimised Float DSP functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/float_dsp.h"
+#include "float_dsp_arm.h"
+
+void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len);
+
+void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
+                                int len);
+
+void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
+                                int len);
+
+void ff_vector_fmul_window_neon(float *dst, const float *src0,
+                                const float *src1, const float *win, int len);
+
+void ff_vector_fmul_add_neon(float *dst, const float *src0, const float *src1,
+                             const float *src2, int len);
+
+void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
+                                 const float *src1, int len);
+
+void ff_butterflies_float_neon(float *v1, float *v2, int len);
+
+float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
+
+av_cold void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
+{
+    fdsp->vector_fmul = ff_vector_fmul_neon;
+    fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon;
+    fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_neon;
+    fdsp->vector_fmul_window = ff_vector_fmul_window_neon;
+    fdsp->vector_fmul_add    = ff_vector_fmul_add_neon;
+    fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_neon;
+    fdsp->butterflies_float = ff_butterflies_float_neon;
+    fdsp->scalarproduct_float = ff_scalarproduct_float_neon;
+}
--- a/media/ffvpx/libavutil/arm/float_dsp_init_vfp.c
+++ b/media/ffvpx/libavutil/arm/float_dsp_init_vfp.c
@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/float_dsp.h"
+#include "cpu.h"
+#include "float_dsp_arm.h"
+
+void ff_vector_fmul_vfp(float *dst, const float *src0, const float *src1,
+                        int len);
+
+void ff_vector_fmul_window_vfp(float *dst, const float *src0,
+                               const float *src1, const float *win, int len);
+
+void ff_vector_fmul_reverse_vfp(float *dst, const float *src0,
+                                const float *src1, int len);
+
+void ff_butterflies_float_vfp(float *av_restrict v1, float *av_restrict v2, int len);
+
+av_cold void ff_float_dsp_init_vfp(AVFloatDSPContext *fdsp, int cpu_flags)
+{
+    if (have_vfp_vm(cpu_flags)) {
+        fdsp->vector_fmul = ff_vector_fmul_vfp;
+        fdsp->vector_fmul_window = ff_vector_fmul_window_vfp;
+    }
+    fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_vfp;
+    if (have_vfp_vm(cpu_flags))
+        fdsp->butterflies_float = ff_butterflies_float_vfp;
+}
--- a/media/ffvpx/libavutil/arm/float_dsp_neon.S
+++ b/media/ffvpx/libavutil/arm/float_dsp_neon.S
@ -0,0 +1,271 @@
+/*
+ * ARM NEON optimised Float DSP functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "asm.S"
+
+function ff_vector_fmul_neon, export=1
+        subs            r3,  r3,  #8
+        vld1.32         {d0-d3},  [r1,:128]!
+        vld1.32         {d4-d7},  [r2,:128]!
+        vmul.f32        q8,  q0,  q2
+        vmul.f32        q9,  q1,  q3
+        beq             3f
+        bics            ip,  r3,  #15
+        beq             2f
+1:      subs            ip,  ip,  #16
+        vld1.32         {d0-d1},  [r1,:128]!
+        vld1.32         {d4-d5},  [r2,:128]!
+        vmul.f32        q10, q0,  q2
+        vld1.32         {d2-d3},  [r1,:128]!
+        vld1.32         {d6-d7},  [r2,:128]!
+        vmul.f32        q11, q1,  q3
+        vst1.32         {d16-d19},[r0,:128]!
+        vld1.32         {d0-d1},  [r1,:128]!
+        vld1.32         {d4-d5},  [r2,:128]!
+        vmul.f32        q8,  q0,  q2
+        vld1.32         {d2-d3},  [r1,:128]!
+        vld1.32         {d6-d7},  [r2,:128]!
+        vmul.f32        q9,  q1,  q3
+        vst1.32         {d20-d23},[r0,:128]!
+        bne             1b
+        ands            r3,  r3,  #15
+        beq             3f
+2:      vld1.32         {d0-d1},  [r1,:128]!
+        vld1.32         {d4-d5},  [r2,:128]!
+        vst1.32         {d16-d17},[r0,:128]!
+        vmul.f32        q8,  q0,  q2
+        vld1.32         {d2-d3},  [r1,:128]!
+        vld1.32         {d6-d7},  [r2,:128]!
+        vst1.32         {d18-d19},[r0,:128]!
+        vmul.f32        q9,  q1,  q3
+3:      vst1.32         {d16-d19},[r0,:128]!
+        bx              lr
+endfunc
+
+function ff_vector_fmac_scalar_neon, export=1
+VFP     len .req r2
+VFP     acc .req r3
+NOVFP   len .req r3
+NOVFP   acc .req r2
+VFP     vdup.32         q15, d0[0]
+NOVFP   vdup.32         q15, r2
+        bics            r12, len, #15
+        mov             acc, r0
+        beq             3f
+        vld1.32         {q0},     [r1,:128]!
+        vld1.32         {q8},     [acc,:128]!
+        vld1.32         {q1},     [r1,:128]!
+        vld1.32         {q9},     [acc,:128]!
+1:      vmla.f32        q8,  q0,  q15
+        vld1.32         {q2},     [r1,:128]!
+        vld1.32         {q10},    [acc,:128]!
+        vmla.f32        q9,  q1,  q15
+        vld1.32         {q3},     [r1,:128]!
+        vld1.32         {q11},    [acc,:128]!
+        vmla.f32        q10, q2,  q15
+        vst1.32         {q8},     [r0,:128]!
+        vmla.f32        q11, q3,  q15
+        vst1.32         {q9},     [r0,:128]!
+        subs            r12, r12, #16
+        beq             2f
+        vld1.32         {q0},     [r1,:128]!
+        vld1.32         {q8},     [acc,:128]!
+        vst1.32         {q10},    [r0,:128]!
+        vld1.32         {q1},     [r1,:128]!
+        vld1.32         {q9},     [acc,:128]!
+        vst1.32         {q11},    [r0,:128]!
+        b               1b
+2:      vst1.32         {q10},    [r0,:128]!
+        vst1.32         {q11},    [r0,:128]!
+        ands            len, len, #15
+        it              eq
+        bxeq            lr
+3:      vld1.32         {q0},     [r1,:128]!
+        vld1.32         {q8},     [acc,:128]!
+        vmla.f32        q8,  q0,  q15
+        vst1.32         {q8},     [r0,:128]!
+        subs            len, len, #4
+        bgt             3b
+        bx              lr
+        .unreq          len
+endfunc
+
+function ff_vector_fmul_scalar_neon, export=1
+VFP     len .req r2
+NOVFP   len .req r3
+VFP     vdup.32         q8,  d0[0]
+NOVFP   vdup.32         q8,  r2
+        bics            r12, len, #15
+        beq             3f
+        vld1.32         {q0},[r1,:128]!
+        vld1.32         {q1},[r1,:128]!
+1:      vmul.f32        q0,  q0,  q8
+        vld1.32         {q2},[r1,:128]!
+        vmul.f32        q1,  q1,  q8
+        vld1.32         {q3},[r1,:128]!
+        vmul.f32        q2,  q2,  q8
+        vst1.32         {q0},[r0,:128]!
+        vmul.f32        q3,  q3,  q8
+        vst1.32         {q1},[r0,:128]!
+        subs            r12, r12, #16
+        beq             2f
+        vld1.32         {q0},[r1,:128]!
+        vst1.32         {q2},[r0,:128]!
+        vld1.32         {q1},[r1,:128]!
+        vst1.32         {q3},[r0,:128]!
+        b               1b
+2:      vst1.32         {q2},[r0,:128]!
+        vst1.32         {q3},[r0,:128]!
+        ands            len, len, #15
+        it              eq
+        bxeq            lr
+3:      vld1.32         {q0},[r1,:128]!
+        vmul.f32        q0,  q0,  q8
+        vst1.32         {q0},[r0,:128]!
+        subs            len, len, #4
+        bgt             3b
+        bx              lr
+        .unreq          len
+endfunc
+
+function ff_vector_fmul_window_neon, export=1
+        push            {r4,r5,lr}
+        ldr             lr,  [sp, #12]
+        sub             r2,  r2,  #8
+        sub             r5,  lr,  #2
+        add             r2,  r2,  r5, lsl #2
+        add             r4,  r3,  r5, lsl #3
+        add             ip,  r0,  r5, lsl #3
+        mov             r5,  #-16
+        vld1.32         {d0,d1},  [r1,:128]!
+        vld1.32         {d2,d3},  [r2,:128], r5
+        vld1.32         {d4,d5},  [r3,:128]!
+        vld1.32         {d6,d7},  [r4,:128], r5
+1:      subs            lr,  lr,  #4
+        vmul.f32        d22, d0,  d4
+        vrev64.32       q3,  q3
+        vmul.f32        d23, d1,  d5
+        vrev64.32       q1,  q1
+        vmul.f32        d20, d0,  d7
+        vmul.f32        d21, d1,  d6
+        beq             2f
+        vmla.f32        d22, d3,  d7
+        vld1.32         {d0,d1},  [r1,:128]!
+        vmla.f32        d23, d2,  d6
+        vld1.32         {d18,d19},[r2,:128], r5
+        vmls.f32        d20, d3,  d4
+        vld1.32         {d24,d25},[r3,:128]!
+        vmls.f32        d21, d2,  d5
+        vld1.32         {d6,d7},  [r4,:128], r5
+        vmov            q1,  q9
+        vrev64.32       q11, q11
+        vmov            q2,  q12
+        vswp            d22, d23
+        vst1.32         {d20,d21},[r0,:128]!
+        vst1.32         {d22,d23},[ip,:128], r5
+        b               1b
+2:      vmla.f32        d22, d3,  d7
+        vmla.f32        d23, d2,  d6
+        vmls.f32        d20, d3,  d4
+        vmls.f32        d21, d2,  d5
+        vrev64.32       q11, q11
+        vswp            d22, d23
+        vst1.32         {d20,d21},[r0,:128]!
+        vst1.32         {d22,d23},[ip,:128], r5
+        pop             {r4,r5,pc}
+endfunc
+
+function ff_vector_fmul_add_neon, export=1
+        ldr             r12, [sp]
+        vld1.32         {q0-q1},  [r1,:128]!
+        vld1.32         {q8-q9},  [r2,:128]!
+        vld1.32         {q2-q3},  [r3,:128]!
+        vmul.f32        q10, q0,  q8
+        vmul.f32        q11, q1,  q9
+1:      vadd.f32        q12, q2,  q10
+        vadd.f32        q13, q3,  q11
+        pld             [r1, #16]
+        pld             [r2, #16]
+        pld             [r3, #16]
+        subs            r12, r12, #8
+        beq             2f
+        vld1.32         {q0},     [r1,:128]!
+        vld1.32         {q8},     [r2,:128]!
+        vmul.f32        q10, q0,  q8
+        vld1.32         {q1},     [r1,:128]!
+        vld1.32         {q9},     [r2,:128]!
+        vmul.f32        q11, q1,  q9
+        vld1.32         {q2-q3},  [r3,:128]!
+        vst1.32         {q12-q13},[r0,:128]!
+        b               1b
+2:      vst1.32         {q12-q13},[r0,:128]!
+        bx              lr
+endfunc
+
+function ff_vector_fmul_reverse_neon, export=1
+        add             r2,  r2,  r3,  lsl #2
+        sub             r2,  r2,  #32
+        mov             r12, #-32
+        vld1.32         {q0-q1},  [r1,:128]!
+        vld1.32         {q2-q3},  [r2,:128], r12
+1:      pld             [r1, #32]
+        vrev64.32       q3,  q3
+        vmul.f32        d16, d0,  d7
+        vmul.f32        d17, d1,  d6
+        pld             [r2, #-32]
+        vrev64.32       q2,  q2
+        vmul.f32        d18, d2,  d5
+        vmul.f32        d19, d3,  d4
+        subs            r3,  r3,  #8
+        beq             2f
+        vld1.32         {q0-q1},  [r1,:128]!
+        vld1.32         {q2-q3},  [r2,:128], r12
+        vst1.32         {q8-q9},  [r0,:128]!
+        b               1b
+2:      vst1.32         {q8-q9},  [r0,:128]!
+        bx              lr
+endfunc
+
+function ff_butterflies_float_neon, export=1
+1:      vld1.32         {q0},[r0,:128]
+        vld1.32         {q1},[r1,:128]
+        vsub.f32        q2,  q0,  q1
+        vadd.f32        q1,  q0,  q1
+        vst1.32         {q2},[r1,:128]!
+        vst1.32         {q1},[r0,:128]!
+        subs            r2,  r2,  #4
+        bgt             1b
+        bx              lr
+endfunc
+
+function ff_scalarproduct_float_neon, export=1
+        vmov.f32        q2,  #0.0
+1:      vld1.32         {q0},[r0,:128]!
+        vld1.32         {q1},[r1,:128]!
+        vmla.f32        q2,  q0,  q1
+        subs            r2,  r2,  #4
+        bgt             1b
+        vadd.f32        d0,  d4,  d5
+        vpadd.f32       d0,  d0,  d0
+NOVFP   vmov.32         r0,  d0[0]
+        bx              lr
+endfunc
--- a/media/ffvpx/libavutil/arm/float_dsp_vfp.S
+++ b/media/ffvpx/libavutil/arm/float_dsp_vfp.S
@ -0,0 +1,457 @@
+/*
+ * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
+ *
+ * This file is part of FFmpeg
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "asm.S"
+
+/**
+ * Assume that len is a positive number and is multiple of 8
+ */
+@ void ff_vector_fmul_vfp(float *dst, const float *src0, const float *src1, int len)
+function ff_vector_fmul_vfp, export=1
+        vpush           {d8-d15}
+        fmrx            r12, fpscr
+        orr             r12, r12, #(3 << 16) /* set vector size to 4 */
+        fmxr            fpscr, r12
+
+        vldmia          r1!, {s0-s3}
+        vldmia          r2!, {s8-s11}
+        vldmia          r1!, {s4-s7}
+        vldmia          r2!, {s12-s15}
+        vmul.f32        s8,  s0,  s8
+1:
+        subs            r3,  r3,  #16
+        vmul.f32        s12, s4,  s12
+        itttt           ge
+        vldmiage        r1!, {s16-s19}
+        vldmiage        r2!, {s24-s27}
+        vldmiage        r1!, {s20-s23}
+        vldmiage        r2!, {s28-s31}
+        it              ge
+        vmulge.f32      s24, s16, s24
+        vstmia          r0!, {s8-s11}
+        vstmia          r0!, {s12-s15}
+        it              ge
+        vmulge.f32      s28, s20, s28
+        itttt           gt
+        vldmiagt        r1!, {s0-s3}
+        vldmiagt        r2!, {s8-s11}
+        vldmiagt        r1!, {s4-s7}
+        vldmiagt        r2!, {s12-s15}
+        ittt            ge
+        vmulge.f32      s8,  s0,  s8
+        vstmiage        r0!, {s24-s27}
+        vstmiage        r0!, {s28-s31}
+        bgt             1b
+
+        bic             r12, r12, #(7 << 16) /* set vector size back to 1 */
+        fmxr            fpscr, r12
+        vpop            {d8-d15}
+        bx              lr
+endfunc
+
+/**
+ * ARM VFP implementation of 'vector_fmul_window_c' function
+ * Assume that len is a positive non-zero number
+ */
+@ void ff_vector_fmul_window_vfp(float *dst, const float *src0,
+@                                const float *src1, const float *win, int len)
+function ff_vector_fmul_window_vfp, export=1
+DST0    .req    a1
+SRC0    .req    a2
+SRC1    .req    a3
+WIN0    .req    a4
+LEN     .req    v1
+DST1    .req    v2
+WIN1    .req    v3
+OLDFPSCR .req   ip
+
+        push    {v1-v3,lr}
+        ldr     LEN, [sp, #4*4+0]
+        vpush   {s16-s31}
+        fmrx    OLDFPSCR, FPSCR
+        add     DST1, DST0, LEN, lsl #3
+        add     SRC1, SRC1, LEN, lsl #2
+        add     WIN1, WIN0, LEN, lsl #3
+
+        tst     LEN, #7
+        beq     4f                          @ common case: len is a multiple of 8
+
+        ldr     lr, =0x03000000             @ RunFast mode, scalar mode
+        fmxr    FPSCR, lr
+
+        tst     LEN, #1
+        beq     1f
+        vldmdb  WIN1!, {s0}
+        vldmia  SRC0!, {s8}
+        vldmia  WIN0!, {s16}
+        vmul.f  s24, s0, s8
+        vldmdb  SRC1!, {s20}
+        vmul.f  s8, s16, s8
+        vmls.f  s24, s16, s20
+        vmla.f  s8, s0, s20
+        vstmia  DST0!, {s24}
+        vstmdb  DST1!, {s8}
+1:
+        tst     LEN, #2
+        beq     2f
+        vldmdb  WIN1!, {s0}
+        vldmdb  WIN1!, {s1}
+        vldmia  SRC0!, {s8-s9}
+        vldmia  WIN0!, {s16-s17}
+        vmul.f  s24, s0, s8
+        vmul.f  s25, s1, s9
+        vldmdb  SRC1!, {s20}
+        vldmdb  SRC1!, {s21}
+        vmul.f  s8, s16, s8
+        vmul.f  s9, s17, s9
+        vmls.f  s24, s16, s20
+        vmls.f  s25, s17, s21
+        vmla.f  s8, s0, s20
+        vmla.f  s9, s1, s21
+        vstmia  DST0!, {s24-s25}
+        vstmdb  DST1!, {s8}
+        vstmdb  DST1!, {s9}
+2:
+        tst     LEN, #4
+        beq     3f
+        vldmdb  WIN1!, {s0}
+        vldmdb  WIN1!, {s1}
+        vldmdb  WIN1!, {s2}
+        vldmdb  WIN1!, {s3}
+        vldmia  SRC0!, {s8-s11}
+        vldmia  WIN0!, {s16-s19}
+        vmul.f  s24, s0, s8
+        vmul.f  s25, s1, s9
+        vmul.f  s26, s2, s10
+        vmul.f  s27, s3, s11
+        vldmdb  SRC1!, {s20}
+        vldmdb  SRC1!, {s21}
+        vldmdb  SRC1!, {s22}
+        vldmdb  SRC1!, {s23}
+        vmul.f  s8, s16, s8
+        vmul.f  s9, s17, s9
+        vmul.f  s10, s18, s10
+        vmul.f  s11, s19, s11
+        vmls.f  s24, s16, s20
+        vmls.f  s25, s17, s21
+        vmls.f  s26, s18, s22
+        vmls.f  s27, s19, s23
+        vmla.f  s8, s0, s20
+        vmla.f  s9, s1, s21
+        vmla.f  s10, s2, s22
+        vmla.f  s11, s3, s23
+        vstmia  DST0!, {s24-s27}
+        vstmdb  DST1!, {s8}
+        vstmdb  DST1!, {s9}
+        vstmdb  DST1!, {s10}
+        vstmdb  DST1!, {s11}
+3:
+        bics    LEN, LEN, #7
+        beq     7f
+4:
+        ldr     lr, =0x03030000             @ RunFast mode, short vectors of length 4, stride 1
+        fmxr    FPSCR, lr
+
+        vldmdb  WIN1!, {s0}
+        vldmdb  WIN1!, {s1}
+        vldmdb  WIN1!, {s2}
+        vldmdb  WIN1!, {s3}
+        vldmia  SRC0!, {s8-s11}
+        vldmia  WIN0!, {s16-s19}
+        vmul.f  s24, s0, s8                     @ vector * vector
+        vldmdb  SRC1!, {s20}
+        vldmdb  SRC1!, {s21}
+        vldmdb  SRC1!, {s22}
+        vldmdb  SRC1!, {s23}
+        vmul.f  s8, s16, s8                     @ vector * vector
+        vmls.f  s24, s16, s20                   @ vector * vector
+            vldmdb  WIN1!, {s4}
+            vldmdb  WIN1!, {s5}
+            vldmdb  WIN1!, {s6}
+            vldmdb  WIN1!, {s7}
+            vldmia  SRC0!, {s12-s13}
+        vmla.f  s8, s0, s20                     @ vector * vector
+            vldmia  SRC0!, {s14-s15}
+        subs    LEN, LEN, #8
+        beq     6f
+5:          vldmia  WIN0!, {s20-s23}
+            vmul.f  s28, s4, s12                @ vector * vector
+        vstmia  DST0!, {s24-s25}
+            vldmdb  SRC1!, {s16}
+            vldmdb  SRC1!, {s17}
+            vldmdb  SRC1!, {s18}
+            vldmdb  SRC1!, {s19}
+            vmul.f  s12, s20, s12               @ vector * vector
+        vstmia  DST0!, {s26-s27}
+        vstmdb  DST1!, {s8}
+        vstmdb  DST1!, {s9}
+        vstmdb  DST1!, {s10}
+        vstmdb  DST1!, {s11}
+            vmls.f  s28, s20, s16               @ vector * vector
+                vldmdb  WIN1!, {s0}
+                vldmdb  WIN1!, {s1}
+                vldmdb  WIN1!, {s2}
+                vldmdb  WIN1!, {s3}
+                vldmia  SRC0!, {s8-s9}
+            vmla.f  s12, s4, s16                @ vector * vector
+                vldmia  SRC0!, {s10-s11}
+        subs    LEN, LEN, #8
+                vldmia  WIN0!, {s16-s19}
+                vmul.f  s24, s0, s8             @ vector * vector
+            vstmia  DST0!, {s28-s29}
+                vldmdb  SRC1!, {s20}
+                vldmdb  SRC1!, {s21}
+                vldmdb  SRC1!, {s22}
+                vldmdb  SRC1!, {s23}
+                vmul.f  s8, s16, s8             @ vector * vector
+            vstmia  DST0!, {s30-s31}
+            vstmdb  DST1!, {s12}
+            vstmdb  DST1!, {s13}
+            vstmdb  DST1!, {s14}
+            vstmdb  DST1!, {s15}
+                vmls.f  s24, s16, s20           @ vector * vector
+                    vldmdb  WIN1!, {s4}
+                    vldmdb  WIN1!, {s5}
+                    vldmdb  WIN1!, {s6}
+                    vldmdb  WIN1!, {s7}
+                    vldmia  SRC0!, {s12-s13}
+                vmla.f  s8, s0, s20             @ vector * vector
+                    vldmia  SRC0!, {s14-s15}
+        bne     5b
+6:                  vldmia  WIN0!, {s20-s23}
+                    vmul.f  s28, s4, s12        @ vector * vector
+                vstmia  DST0!, {s24-s25}
+                    vldmdb  SRC1!, {s16}
+                    vldmdb  SRC1!, {s17}
+                    vldmdb  SRC1!, {s18}
+                    vldmdb  SRC1!, {s19}
+                    vmul.f  s12, s20, s12       @ vector * vector
+                vstmia  DST0!, {s26-s27}
+                vstmdb  DST1!, {s8}
+                vstmdb  DST1!, {s9}
+                vstmdb  DST1!, {s10}
+                vstmdb  DST1!, {s11}
+                    vmls.f  s28, s20, s16       @ vector * vector
+                    vmla.f  s12, s4, s16        @ vector * vector
+                    vstmia  DST0!, {s28-s31}
+                    vstmdb  DST1!, {s12}
+                    vstmdb  DST1!, {s13}
+                    vstmdb  DST1!, {s14}
+                    vstmdb  DST1!, {s15}
+7:
+        fmxr    FPSCR, OLDFPSCR
+        vpop    {s16-s31}
+        pop     {v1-v3,pc}
+
+        .unreq  DST0
+        .unreq  SRC0
+        .unreq  SRC1
+        .unreq  WIN0
+        .unreq  LEN
+        .unreq  OLDFPSCR
+        .unreq  DST1
+        .unreq  WIN1
+endfunc
+
+/**
+ * ARM VFP optimized implementation of 'vector_fmul_reverse_c' function.
+ * Assume that len is a positive number and is multiple of 8
+ */
+@ void ff_vector_fmul_reverse_vfp(float *dst, const float *src0,
+@                                 const float *src1, int len)
+function ff_vector_fmul_reverse_vfp, export=1
+        vpush           {d8-d15}
+        add             r2,  r2,  r3, lsl #2
+        vldmdb          r2!, {s0-s3}
+        vldmia          r1!, {s8-s11}
+        vldmdb          r2!, {s4-s7}
+        vldmia          r1!, {s12-s15}
+        vmul.f32        s8,  s3,  s8
+        vmul.f32        s9,  s2,  s9
+        vmul.f32        s10, s1,  s10
+        vmul.f32        s11, s0,  s11
+1:
+        subs            r3,  r3,  #16
+        it              ge
+        vldmdbge        r2!, {s16-s19}
+        vmul.f32        s12, s7,  s12
+        it              ge
+        vldmiage        r1!, {s24-s27}
+        vmul.f32        s13, s6,  s13
+        it              ge
+        vldmdbge        r2!, {s20-s23}
+        vmul.f32        s14, s5,  s14
+        it              ge
+        vldmiage        r1!, {s28-s31}
+        vmul.f32        s15, s4,  s15
+        it              ge
+        vmulge.f32      s24, s19, s24
+        it              gt
+        vldmdbgt        r2!, {s0-s3}
+        it              ge
+        vmulge.f32      s25, s18, s25
+        vstmia          r0!, {s8-s13}
+        it              ge
+        vmulge.f32      s26, s17, s26
+        it              gt
+        vldmiagt        r1!, {s8-s11}
+        itt             ge
+        vmulge.f32      s27, s16, s27
+        vmulge.f32      s28, s23, s28
+        it              gt
+        vldmdbgt        r2!, {s4-s7}
+        it              ge
+        vmulge.f32      s29, s22, s29
+        vstmia          r0!, {s14-s15}
+        ittt            ge
+        vmulge.f32      s30, s21, s30
+        vmulge.f32      s31, s20, s31
+        vmulge.f32      s8,  s3,  s8
+        it              gt
+        vldmiagt        r1!, {s12-s15}
+        itttt           ge
+        vmulge.f32      s9,  s2,  s9
+        vmulge.f32      s10, s1,  s10
+        vstmiage        r0!, {s24-s27}
+        vmulge.f32      s11, s0,  s11
+        it              ge
+        vstmiage        r0!, {s28-s31}
+        bgt             1b
+
+        vpop            {d8-d15}
+        bx              lr
+endfunc
+
+/**
+ * ARM VFP implementation of 'butterflies_float_c' function
+ * Assume that len is a positive non-zero number
+ */
+@ void ff_butterflies_float_vfp(float *restrict v1, float *restrict v2, int len)
+function ff_butterflies_float_vfp, export=1
+BASE1   .req    a1
+BASE2   .req    a2
+LEN     .req    a3
+OLDFPSCR .req   a4
+
+        vpush   {s16-s31}
+        fmrx    OLDFPSCR, FPSCR
+
+        tst     LEN, #7
+        beq     4f                          @ common case: len is a multiple of 8
+
+        ldr     ip, =0x03000000             @ RunFast mode, scalar mode
+        fmxr    FPSCR, ip
+
+        tst     LEN, #1
+        beq     1f
+        vldmia  BASE1!, {s0}
+        vldmia  BASE2!, {s8}
+        vadd.f  s16, s0, s8
+        vsub.f  s24, s0, s8
+        vstr    s16, [BASE1, #0-4*1]
+        vstr    s24, [BASE2, #0-4*1]
+1:
+        tst     LEN, #2
+        beq     2f
+        vldmia  BASE1!, {s0-s1}
+        vldmia  BASE2!, {s8-s9}
+        vadd.f  s16, s0, s8
+        vadd.f  s17, s1, s9
+        vsub.f  s24, s0, s8
+        vsub.f  s25, s1, s9
+        vstr    d8, [BASE1, #0-8*1]    @ s16,s17
+        vstr    d12, [BASE2, #0-8*1]   @ s24,s25
+2:
+        tst     LEN, #4
+        beq     3f
+        vldmia  BASE1!, {s0-s1}
+        vldmia  BASE2!, {s8-s9}
+        vldmia  BASE1!, {s2-s3}
+        vldmia  BASE2!, {s10-s11}
+        vadd.f  s16, s0, s8
+        vadd.f  s17, s1, s9
+        vsub.f  s24, s0, s8
+        vsub.f  s25, s1, s9
+        vadd.f  s18, s2, s10
+        vadd.f  s19, s3, s11
+        vsub.f  s26, s2, s10
+        vsub.f  s27, s3, s11
+        vstr    d8, [BASE1, #0-16*1]    @ s16,s17
+        vstr    d12, [BASE2, #0-16*1]   @ s24,s25
+        vstr    d9, [BASE1, #8-16*1]    @ s18,s19
+        vstr    d13, [BASE2, #8-16*1]   @ s26,s27
+3:
+        bics    LEN, LEN, #7
+        beq     7f
+4:
+        ldr     ip, =0x03030000             @ RunFast mode, short vectors of length 4, stride 1
+        fmxr    FPSCR, ip
+
+        vldmia  BASE1!, {s0-s1}
+        vldmia  BASE2!, {s8-s9}
+        vldmia  BASE1!, {s2-s3}
+        vldmia  BASE2!, {s10-s11}
+        vadd.f  s16, s0, s8
+            vldmia  BASE1!, {s4-s5}
+            vldmia  BASE2!, {s12-s13}
+            vldmia  BASE1!, {s6-s7}
+            vldmia  BASE2!, {s14-s15}
+        vsub.f  s24, s0, s8
+            vadd.f  s20, s4, s12
+        subs    LEN, LEN, #8
+        beq     6f
+5:              vldmia  BASE1!, {s0-s3}
+                vldmia  BASE2!, {s8-s11}
+            vsub.f  s28, s4, s12
+        vstr    d8, [BASE1, #0-16*3]    @ s16,s17
+        vstr    d9, [BASE1, #8-16*3]    @ s18,s19
+        vstr    d12, [BASE2, #0-16*3]   @ s24,s25
+        vstr    d13, [BASE2, #8-16*3]   @ s26,s27
+                vadd.f  s16, s0, s8
+                    vldmia  BASE1!, {s4-s7}
+                    vldmia  BASE2!, {s12-s15}
+                vsub.f  s24, s0, s8
+            vstr    d10, [BASE1, #0-16*3]   @ s20,s21
+            vstr    d11, [BASE1, #8-16*3]   @ s22,s23
+            vstr    d14, [BASE2, #0-16*3]   @ s28,s29
+            vstr    d15, [BASE2, #8-16*3]   @ s30,s31
+                    vadd.f  s20, s4, s12
+        subs    LEN, LEN, #8
+        bne     5b
+6:                   vsub.f  s28, s4, s12
+                vstr    d8, [BASE1, #0-16*2]    @ s16,s17
+                vstr    d9, [BASE1, #8-16*2]    @ s18,s19
+                vstr    d12, [BASE2, #0-16*2]   @ s24,s25
+                vstr    d13, [BASE2, #8-16*2]   @ s26,s27
+                    vstr    d10, [BASE1, #0-16*1]   @ s20,s21
+                    vstr    d11, [BASE1, #8-16*1]   @ s22,s23
+                    vstr    d14, [BASE2, #0-16*1]   @ s28,s29
+                    vstr    d15, [BASE2, #8-16*1]   @ s30,s31
+7:
+        fmxr    FPSCR, OLDFPSCR
+        vpop    {s16-s31}
+        bx      lr
+
+        .unreq  BASE1
+        .unreq  BASE2
+        .unreq  LEN
+        .unreq  OLDFPSCR
+endfunc
--- a/media/ffvpx/libavutil/arm/intmath.h
+++ b/media/ffvpx/libavutil/arm/intmath.h
@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_ARM_INTMATH_H
+#define AVUTIL_ARM_INTMATH_H
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+
+#if HAVE_INLINE_ASM
+
+#if HAVE_ARMV6_INLINE
+
+#define av_clip_uint8 av_clip_uint8_arm
+static av_always_inline av_const int av_clip_uint8_arm(int a)
+{
+    int x;
+    __asm__ ("usat %0, #8,  %1" : "=r"(x) : "r"(a));
+    return x;
+}
+
+#define av_clip_int8 av_clip_int8_arm
+static av_always_inline av_const int av_clip_int8_arm(int a)
+{
+    int x;
+    __asm__ ("ssat %0, #8,  %1" : "=r"(x) : "r"(a));
+    return x;
+}
+
+#define av_clip_uint16 av_clip_uint16_arm
+static av_always_inline av_const int av_clip_uint16_arm(int a)
+{
+    int x;
+    __asm__ ("usat %0, #16, %1" : "=r"(x) : "r"(a));
+    return x;
+}
+
+#define av_clip_int16 av_clip_int16_arm
+static av_always_inline av_const int av_clip_int16_arm(int a)
+{
+    int x;
+    __asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a));
+    return x;
+}
+
+#define av_clip_intp2 av_clip_intp2_arm
+static av_always_inline av_const int av_clip_intp2_arm(int a, int p)
+{
+    unsigned x;
+    __asm__ ("ssat %0, %2, %1" : "=r"(x) : "r"(a), "i"(p+1));
+    return x;
+}
+
+#define av_clip_uintp2 av_clip_uintp2_arm
+static av_always_inline av_const unsigned av_clip_uintp2_arm(int a, int p)
+{
+    unsigned x;
+    __asm__ ("usat %0, %2, %1" : "=r"(x) : "r"(a), "i"(p));
+    return x;
+}
+
+#define av_sat_add32 av_sat_add32_arm
+static av_always_inline int av_sat_add32_arm(int a, int b)
+{
+    int r;
+    __asm__ ("qadd %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
+    return r;
+}
+
+#define av_sat_dadd32 av_sat_dadd32_arm
+static av_always_inline int av_sat_dadd32_arm(int a, int b)
+{
+    int r;
+    __asm__ ("qdadd %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
+    return r;
+}
+
+#endif /* HAVE_ARMV6_INLINE */
+
+#if HAVE_ASM_MOD_Q
+
+#define av_clipl_int32 av_clipl_int32_arm
+static av_always_inline av_const int32_t av_clipl_int32_arm(int64_t a)
+{
+    int x, y;
+    __asm__ ("adds   %1, %R2, %Q2, lsr #31  \n\t"
+             "itet   ne                     \n\t"
+             "mvnne  %1, #1<<31             \n\t"
+             "moveq  %0, %Q2                \n\t"
+             "eorne  %0, %1,  %R2, asr #31  \n\t"
+             : "=r"(x), "=&r"(y) : "r"(a) : "cc");
+    return x;
+}
+
+#endif /* HAVE_ASM_MOD_Q */
+
+#endif /* HAVE_INLINE_ASM */
+
+#endif /* AVUTIL_ARM_INTMATH_H */
--- a/media/ffvpx/libavutil/arm/intreadwrite.h
+++ b/media/ffvpx/libavutil/arm/intreadwrite.h
@ -0,0 +1,91 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_ARM_INTREADWRITE_H
+#define AVUTIL_ARM_INTREADWRITE_H
+
+#include <stdint.h>
+#include "config.h"
+#include "libavutil/attributes.h"
+
+#if HAVE_FAST_UNALIGNED && HAVE_INLINE_ASM && AV_GCC_VERSION_AT_MOST(4,6)
+
+#define AV_RN16 AV_RN16
+static av_always_inline unsigned AV_RN16(const void *p)
+{
+    const uint8_t *q = p;
+    unsigned v;
+#if AV_GCC_VERSION_AT_MOST(4,5)
+    __asm__ ("ldrh %0, %1" : "=r"(v) : "m"(*(const uint16_t *)q));
+#elif defined __thumb__
+    __asm__ ("ldrh %0, %1" : "=r"(v) : "m"(q[0]), "m"(q[1]));
+#else
+    __asm__ ("ldrh %0, %1" : "=r"(v) : "Uq"(q[0]), "m"(q[1]));
+#endif
+    return v;
+}
+
+#define AV_WN16 AV_WN16
+static av_always_inline void AV_WN16(void *p, uint16_t v)
+{
+    __asm__ ("strh %1, %0" : "=m"(*(uint16_t *)p) : "r"(v));
+}
+
+#define AV_RN32 AV_RN32
+static av_always_inline uint32_t AV_RN32(const void *p)
+{
+    const struct __attribute__((packed)) { uint32_t v; } *q = p;
+    uint32_t v;
+    __asm__ ("ldr  %0, %1" : "=r"(v) : "m"(*q));
+    return v;
+}
+
+#define AV_WN32 AV_WN32
+static av_always_inline void AV_WN32(void *p, uint32_t v)
+{
+    __asm__ ("str  %1, %0" : "=m"(*(uint32_t *)p) : "r"(v));
+}
+
+#if HAVE_ASM_MOD_Q
+
+#define AV_RN64 AV_RN64
+static av_always_inline uint64_t AV_RN64(const void *p)
+{
+    const struct __attribute__((packed)) { uint32_t v; } *q = p;
+    uint64_t v;
+    __asm__ ("ldr   %Q0, %1  \n\t"
+             "ldr   %R0, %2  \n\t"
+             : "=&r"(v)
+             : "m"(q[0]), "m"(q[1]));
+    return v;
+}
+
+#define AV_WN64 AV_WN64
+static av_always_inline void AV_WN64(void *p, uint64_t v)
+{
+    __asm__ ("str  %Q2, %0  \n\t"
+             "str  %R2, %1  \n\t"
+             : "=m"(*(uint32_t*)p), "=m"(*((uint32_t*)p+1))
+             : "r"(v));
+}
+
+#endif /* HAVE_ASM_MOD_Q */
+
+#endif /* HAVE_INLINE_ASM */
+
+#endif /* AVUTIL_ARM_INTREADWRITE_H */
--- a/media/ffvpx/libavutil/arm/moz.build
+++ b/media/ffvpx/libavutil/arm/moz.build
@ -0,0 +1,18 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+SOURCES += [
+    'cpu.c',
+    'float_dsp_init_arm.c',
+    'float_dsp_init_neon.c',
+    'float_dsp_init_vfp.c',
+    'float_dsp_neon.S',
+    'float_dsp_vfp.S',
+]
+
+FINAL_LIBRARY = 'mozavutil'
+
+include('/media/ffvpx/ffvpxcommon.mozbuild')
--- a/media/ffvpx/libavutil/arm/timer.h
+++ b/media/ffvpx/libavutil/arm/timer.h
@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_ARM_TIMER_H
+#define AVUTIL_ARM_TIMER_H
+
+#include <stdint.h>
+#include "config.h"
+
+#if HAVE_INLINE_ASM && defined(__ARM_ARCH_7A__)
+
+#define AV_READ_TIME read_time
+
+static inline uint64_t read_time(void)
+{
+    unsigned cc;
+    __asm__ volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r"(cc));
+    return cc;
+}
+
+#endif /* HAVE_INLINE_ASM && __ARM_ARCH_7A__ */
+
+#endif /* AVUTIL_ARM_TIMER_H */
--- a/media/ffvpx/libavutil/dummy_funcs.c
+++ b/media/ffvpx/libavutil/dummy_funcs.c
@ -8,19 +8,24 @@

 // cpu_internal.c
 int ff_get_cpu_flags_aarch64(void) { return 0; }
+#if !defined(__arm__)
 int ff_get_cpu_flags_arm(void) { return 0; }
+#endif
 int ff_get_cpu_flags_ppc(void) { return 0; }

 // float_dsp.c
 #include "float_dsp.h"
 void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp) {}
-void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp) {}
 void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict) {}
 void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp) {}
-
+#if !defined(__arm__)
+void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp) {}
+#endif
 int av_hwframe_get_buffer(struct AVBufferRef* hwframe_ref, struct AVFrame* frame, int flags) { return 0; }

 // cpu.c
 size_t ff_get_cpu_max_align_aarch64() { return 0; }
-size_t ff_get_cpu_max_align_arm() { return 0; }
 size_t ff_get_cpu_max_align_ppc() { return 0; }
+#if !defined(__arm__)
+size_t ff_get_cpu_max_align_arm() { return 0; }
+#endif
--- a/media/ffvpx/libavutil/moz.build
+++ b/media/ffvpx/libavutil/moz.build
@ -7,7 +7,10 @@
 # Due to duplicate file names, we compile libavutil/x86 in its own
 # moz.build file.
 if CONFIG['FFVPX_ASFLAGS']:
-    DIRS += ['x86']
+    if CONFIG['CPU_ARCH'] == 'x86' or CONFIG['CPU_ARCH'] == 'x86_64':
+        DIRS += ['x86']
+    elif CONFIG['CPU_ARCH'] == 'arm':
+        DIRS += ['arm']

 SharedLibrary('mozavutil')
 SOURCES += [
--- a/old-configure.in
+++ b/old-configure.in
@ -3216,22 +3216,18 @@ dnl ========================================================
 MOZ_FFVPX=
 MOZ_FFVPX_FLACONLY=
 case "$CPU_ARCH" in
-  x86)
-      MOZ_FFVPX=1
-  ;;
-  x86_64)
+  x86|x86_64)
      MOZ_FFVPX=1
+      dnl Use same conditional as MOZ_LIBAV_FFT to enable FFmpeg's ffvpx assembly decoder.
+      FFVPX_ASFLAGS=$LIBAV_FFT_ASFLAGS
  ;;
  arm*)
      MOZ_FFVPX=1
      MOZ_FFVPX_FLACONLY=1
+      FFVPX_ASFLAGS=$VPX_ASFLAGS
  ;;
 esac

-dnl Use same conditional as MOZ_LIBAV_FFT to enable FFmpeg's ffvpx assembly decoder.
-if test -n "$MOZ_LIBAV_FFT"; then
-  FFVPX_ASFLAGS=$LIBAV_FFT_ASFLAGS
-fi
 if test -n "$MOZ_FFVPX"; then
  AC_DEFINE(MOZ_FFVPX)
 fi