зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1295886 - P14. Add arm neon and vfp optimized methods to ffmpeg. r=mshal
MozReview-Commit-ID: KMUZoDWoisi --HG-- extra : rebase_source : a08eae842680b9d475bfc0afd2d569533dba676b
This commit is contained in:
Родитель
7905dd9c5e
Коммит
dae7070b4a
|
@ -7,8 +7,12 @@
|
|||
#ifndef MOZ_FFVPX_CONFIG_H
|
||||
#define MOZ_FFVPX_CONFIG_H
|
||||
#if defined(MOZ_FFVPX_FLACONLY)
|
||||
#include "config_flac.h"
|
||||
#if defined(MOZ_WIDGET_ANDROID)
|
||||
#include "config_android32.h"
|
||||
#else
|
||||
#include "config_flac.h"
|
||||
#endif
|
||||
#else // MOZ_FFVPX_FLACONLY
|
||||
#if defined(XP_WIN)
|
||||
// Avoid conflicts with mozilla-config.h
|
||||
#if !defined(_MSC_VER)
|
||||
|
@ -38,6 +42,6 @@
|
|||
#include "config_unix32.h"
|
||||
#endif
|
||||
#endif
|
||||
#endif // MOZ_FFVPX_FLACONLY
|
||||
#endif // else MOZ_FFVPX_FLACONLY
|
||||
#include "config_common.h"
|
||||
#endif // MOZ_FFVPX_CONFIG_H
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -9,9 +9,8 @@ ASFLAGS += CONFIG['FFVPX_ASFLAGS']
|
|||
ASFLAGS += ['-I%s/media/ffvpx' % TOPSRCDIR]
|
||||
|
||||
if CONFIG['FFVPX_ASFLAGS']:
|
||||
USE_YASM = True
|
||||
|
||||
if CONFIG['OS_ARCH'] == 'WINNT':
|
||||
USE_YASM = True
|
||||
# Fix inline symbols and math defines for windows.
|
||||
DEFINES['_USE_MATH_DEFINES'] = True
|
||||
DEFINES['inline'] = "__inline"
|
||||
|
@ -22,24 +21,27 @@ if CONFIG['FFVPX_ASFLAGS']:
|
|||
else:
|
||||
ASFLAGS += ['-Pconfig_win64.asm']
|
||||
elif CONFIG['OS_ARCH'] == 'Darwin':
|
||||
USE_YASM = True
|
||||
# 32/64-bit macosx assemblers need to prefix symbols with an underscore.
|
||||
ASFLAGS += [
|
||||
'-Pconfig_darwin64.asm',
|
||||
'-DPREFIX'
|
||||
]
|
||||
else:
|
||||
elif CONFIG['CPU_ARCH'] != 'arm':
|
||||
USE_YASM = True
|
||||
# Default to unix, similar to how ASFLAGS setup works in configure.in
|
||||
ASFLAGS += ['-Pconfig_unix64.asm']
|
||||
# default disabled components
|
||||
ASFLAGS += ['-Pdefaults_disabled.asm']
|
||||
|
||||
if int(CONFIG['YASM_MAJOR_VERSION']) == 1 and int(CONFIG['YASM_MINOR_VERSION']) < 2:
|
||||
DEFINES['YASM_MISSING_AVX2'] = True
|
||||
ASFLAGS += [
|
||||
'-DHAVE_AVX2=0',
|
||||
'-DHAVE_AVX2_INTERNAL=0',
|
||||
'-DHAVE_AVX2_EXTERNAL=0',
|
||||
]
|
||||
if USE_YASM:
|
||||
# default disabled components
|
||||
ASFLAGS += ['-Pdefaults_disabled.asm']
|
||||
if int(CONFIG['YASM_MAJOR_VERSION']) == 1 and int(CONFIG['YASM_MINOR_VERSION']) < 2:
|
||||
DEFINES['YASM_MISSING_AVX2'] = True
|
||||
ASFLAGS += [
|
||||
'-DHAVE_AVX2=0',
|
||||
'-DHAVE_AVX2_INTERNAL=0',
|
||||
'-DHAVE_AVX2_EXTERNAL=0',
|
||||
]
|
||||
|
||||
|
||||
LOCAL_INCLUDES += ['/media/ffvpx']
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
/*
|
||||
* Copyright (c) 2012 Mans Rullgard <mans@mansr.com>
|
||||
*
|
||||
* This file is part of FFmpeg
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/arm/asm.S"
|
||||
|
||||
function flac_lpc_16_1_arm
|
||||
ldr r12, [sp]
|
||||
push {r4, lr}
|
||||
ldr r1, [r1]
|
||||
subs r12, r12, #2
|
||||
ldr lr, [r0], #4
|
||||
beq 2f
|
||||
it lt
|
||||
poplt {r4, pc}
|
||||
1:
|
||||
mul r4, lr, r1
|
||||
ldm r0, {r2, lr}
|
||||
add_sh r2, r2, r4, asr r3
|
||||
mul r4, r2, r1
|
||||
subs r12, r12, #2
|
||||
add_sh lr, lr, r4, asr r3
|
||||
stm r0!, {r2, lr}
|
||||
bgt 1b
|
||||
it lt
|
||||
poplt {r4, pc}
|
||||
2:
|
||||
mul r4, lr, r1
|
||||
ldr r2, [r0]
|
||||
add_sh r2, r2, r4, asr r3
|
||||
str r2, [r0]
|
||||
pop {r4, pc}
|
||||
endfunc
|
||||
|
||||
function flac_lpc_16_2_arm
|
||||
ldr r12, [sp]
|
||||
subs r12, r12, r2
|
||||
it le
|
||||
bxle lr
|
||||
|
||||
push {r4-r9, lr}
|
||||
ldm r0!, {r6, r7}
|
||||
ldm r1, {r8, r9}
|
||||
subs r12, r12, #1
|
||||
beq 2f
|
||||
1:
|
||||
mul r4, r6, r8
|
||||
mul r5, r7, r8
|
||||
mla r4, r7, r9, r4
|
||||
ldm r0, {r6, r7}
|
||||
add_sh r6, r6, r4, asr r3
|
||||
mla r5, r6, r9, r5
|
||||
add_sh r7, r7, r5, asr r3
|
||||
stm r0!, {r6, r7}
|
||||
subs r12, r12, #2
|
||||
bgt 1b
|
||||
it lt
|
||||
poplt {r4-r9, pc}
|
||||
2:
|
||||
mul r4, r6, r8
|
||||
mla r4, r7, r9, r4
|
||||
ldr r5, [r0]
|
||||
add_sh r5, r5, r4, asr r3
|
||||
str r5, [r0]
|
||||
pop {r4-r9, pc}
|
||||
endfunc
|
||||
|
||||
function ff_flac_lpc_16_arm, export=1
|
||||
cmp r2, #2
|
||||
blt flac_lpc_16_1_arm
|
||||
beq flac_lpc_16_2_arm
|
||||
|
||||
ldr r12, [sp]
|
||||
subs r12, r12, r2
|
||||
it le
|
||||
bxle lr
|
||||
|
||||
push {r4-r9, lr}
|
||||
|
||||
subs r12, r12, #1
|
||||
beq 3f
|
||||
1:
|
||||
sub lr, r2, #2
|
||||
mov r4, #0
|
||||
mov r5, #0
|
||||
|
||||
ldr r7, [r0], #4
|
||||
ldr r9, [r1], #4
|
||||
2:
|
||||
mla r4, r7, r9, r4
|
||||
ldm r0!, {r6, r7}
|
||||
mla r5, r6, r9, r5
|
||||
ldm r1!, {r8, r9}
|
||||
mla r4, r6, r8, r4
|
||||
subs lr, lr, #2
|
||||
mla r5, r7, r8, r5
|
||||
bgt 2b
|
||||
blt 6f
|
||||
|
||||
mla r4, r7, r9, r4
|
||||
ldr r7, [r0], #4
|
||||
mla r5, r7, r9, r5
|
||||
ldr r9, [r1], #4
|
||||
6:
|
||||
mla r4, r7, r9, r4
|
||||
ldm r0, {r6, r7}
|
||||
add_sh r6, r6, r4, asr r3
|
||||
mla r5, r6, r9, r5
|
||||
add_sh r7, r7, r5, asr r3
|
||||
stm r0!, {r6, r7}
|
||||
sub r0, r0, r2, lsl #2
|
||||
sub r1, r1, r2, lsl #2
|
||||
|
||||
subs r12, r12, #2
|
||||
bgt 1b
|
||||
it lt
|
||||
poplt {r4-r9, pc}
|
||||
3:
|
||||
mov r4, #0
|
||||
4:
|
||||
ldr r5, [r1], #4
|
||||
ldr r6, [r0], #4
|
||||
mla r4, r5, r6, r4
|
||||
subs r2, r2, #1
|
||||
bgt 4b
|
||||
ldr r5, [r0]
|
||||
add_sh r5, r5, r4, asr r3
|
||||
str r5, [r0]
|
||||
pop {r4-r9, pc}
|
||||
endfunc
|
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* Copyright (c) 2012 Mans Rullgard <mans@mansr.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavcodec/flacdsp.h"
|
||||
#include "config.h"
|
||||
|
||||
void ff_flac_lpc_16_arm(int32_t *samples, const int coeffs[32], int order,
|
||||
int qlevel, int len);
|
||||
|
||||
av_cold void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int channels,
|
||||
int bps)
|
||||
{
|
||||
if (CONFIG_FLAC_DECODER)
|
||||
c->lpc16 = ff_flac_lpc_16_arm;
|
||||
}
|
|
@ -0,0 +1,108 @@
|
|||
/*
|
||||
* simple math operations
|
||||
* Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVCODEC_ARM_MATHOPS_H
|
||||
#define AVCODEC_ARM_MATHOPS_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
#include "libavutil/common.h"
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
|
||||
#if HAVE_ARMV6_INLINE
|
||||
#define MULH MULH
|
||||
static inline av_const int MULH(int a, int b)
|
||||
{
|
||||
int r;
|
||||
__asm__ ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
|
||||
return r;
|
||||
}
|
||||
|
||||
#define FASTDIV FASTDIV
|
||||
static av_always_inline av_const int FASTDIV(int a, int b)
|
||||
{
|
||||
int r;
|
||||
__asm__ ("cmp %2, #2 \n\t"
|
||||
"ldr %0, [%3, %2, lsl #2] \n\t"
|
||||
"ite le \n\t"
|
||||
"lsrle %0, %1, #1 \n\t"
|
||||
"smmulgt %0, %0, %1 \n\t"
|
||||
: "=&r"(r) : "r"(a), "r"(b), "r"(ff_inverse) : "cc");
|
||||
return r;
|
||||
}
|
||||
|
||||
#else /* HAVE_ARMV6_INLINE */
|
||||
|
||||
#define FASTDIV FASTDIV
|
||||
static av_always_inline av_const int FASTDIV(int a, int b)
|
||||
{
|
||||
int r, t;
|
||||
__asm__ ("umull %1, %0, %2, %3"
|
||||
: "=&r"(r), "=&r"(t) : "r"(a), "r"(ff_inverse[b]));
|
||||
return r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define MLS64(d, a, b) MAC64(d, -(a), b)
|
||||
|
||||
#if HAVE_ARMV5TE_INLINE
|
||||
|
||||
/* signed 16x16 -> 32 multiply add accumulate */
|
||||
# define MAC16(rt, ra, rb) \
|
||||
__asm__ ("smlabb %0, %1, %2, %0" : "+r"(rt) : "r"(ra), "r"(rb));
|
||||
|
||||
/* signed 16x16 -> 32 multiply */
|
||||
# define MUL16 MUL16
|
||||
static inline av_const int MUL16(int ra, int rb)
|
||||
{
|
||||
int rt;
|
||||
__asm__ ("smulbb %0, %1, %2" : "=r"(rt) : "r"(ra), "r"(rb));
|
||||
return rt;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#define mid_pred mid_pred
|
||||
static inline av_const int mid_pred(int a, int b, int c)
|
||||
{
|
||||
int m;
|
||||
__asm__ (
|
||||
"mov %0, %2 \n\t"
|
||||
"cmp %1, %2 \n\t"
|
||||
"itt gt \n\t"
|
||||
"movgt %0, %1 \n\t"
|
||||
"movgt %1, %2 \n\t"
|
||||
"cmp %1, %3 \n\t"
|
||||
"it le \n\t"
|
||||
"movle %1, %3 \n\t"
|
||||
"cmp %0, %1 \n\t"
|
||||
"it gt \n\t"
|
||||
"movgt %0, %1 \n\t"
|
||||
: "=&r"(m), "+r"(a)
|
||||
: "r"(b), "r"(c)
|
||||
: "cc");
|
||||
return m;
|
||||
}
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
|
||||
#endif /* AVCODEC_ARM_MATHOPS_H */
|
|
@ -0,0 +1,14 @@
|
|||
# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
|
||||
# vim: set filetype=python:
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
SOURCES += [
|
||||
'flacdsp_arm.S',
|
||||
'flacdsp_init_arm.c',
|
||||
]
|
||||
|
||||
FINAL_LIBRARY = 'mozavcodec'
|
||||
|
||||
include('/media/ffvpx/ffvpxcommon.mozbuild')
|
|
@ -879,7 +879,9 @@ void ff_vp8dsp_init_mips(VP8DSPContext *c) {}
|
|||
void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp) {}
|
||||
void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp) {}
|
||||
void ff_vp9dsp_init_arm(VP9DSPContext *dsp, int bpp) {}
|
||||
#if !defined(__arm__)
|
||||
void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, int bps) {}
|
||||
#endif
|
||||
#if !defined(HAVE_64BIT_BUILD)
|
||||
void ff_flac_decorrelate_indep8_16_sse2(uint8_t **out, int32_t **in, int channels, int len, int shift) {}
|
||||
void ff_flac_decorrelate_indep8_32_avx(uint8_t **out, int32_t **in, int channels, int len, int shift) {}
|
||||
|
|
|
@ -7,7 +7,10 @@
|
|||
# Due to duplicate file names, we compile libavutil/x86 in its own
|
||||
# moz.build file.
|
||||
if CONFIG['FFVPX_ASFLAGS']:
|
||||
DIRS += ['x86']
|
||||
if CONFIG['CPU_ARCH'] == 'x86' or CONFIG['CPU_ARCH'] == 'x86_64':
|
||||
DIRS += ['x86']
|
||||
elif CONFIG['CPU_ARCH'] == 'arm':
|
||||
DIRS += ['arm']
|
||||
|
||||
SharedLibrary('mozavcodec')
|
||||
SOURCES += [
|
||||
|
|
|
@ -0,0 +1,349 @@
|
|||
/*
|
||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#ifdef __ELF__
|
||||
# define ELF
|
||||
#else
|
||||
# define ELF @
|
||||
#endif
|
||||
|
||||
#if CONFIG_THUMB
|
||||
# define A @
|
||||
# define T
|
||||
#else
|
||||
# define A
|
||||
# define T @
|
||||
#endif
|
||||
|
||||
#if HAVE_AS_FUNC
|
||||
# define FUNC
|
||||
#else
|
||||
# define FUNC @
|
||||
#endif
|
||||
|
||||
#if HAVE_AS_FPU_DIRECTIVE
|
||||
# define FPU
|
||||
#else
|
||||
# define FPU @
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON
|
||||
.arch armv7-a
|
||||
#elif HAVE_ARMV6T2
|
||||
.arch armv6t2
|
||||
#elif HAVE_ARMV6
|
||||
.arch armv6
|
||||
#elif HAVE_ARMV5TE
|
||||
.arch armv5te
|
||||
#endif
|
||||
#if HAVE_AS_OBJECT_ARCH
|
||||
ELF .object_arch armv4
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON
|
||||
FPU .fpu neon
|
||||
ELF .eabi_attribute 10, 0 @ suppress Tag_FP_arch
|
||||
ELF .eabi_attribute 12, 0 @ suppress Tag_Advanced_SIMD_arch
|
||||
#elif HAVE_VFP
|
||||
FPU .fpu vfp
|
||||
ELF .eabi_attribute 10, 0 @ suppress Tag_FP_arch
|
||||
#endif
|
||||
|
||||
.syntax unified
|
||||
T .thumb
|
||||
ELF .eabi_attribute 25, 1 @ Tag_ABI_align_preserved
|
||||
ELF .section .note.GNU-stack,"",%progbits @ Mark stack as non-executable
|
||||
|
||||
.macro function name, export=0, align=2
|
||||
.set .Lpic_idx, 0
|
||||
.set .Lpic_gp, 0
|
||||
.macro endfunc
|
||||
.if .Lpic_idx
|
||||
.align 2
|
||||
.altmacro
|
||||
put_pic %(.Lpic_idx - 1)
|
||||
.noaltmacro
|
||||
.endif
|
||||
.if .Lpic_gp
|
||||
.unreq gp
|
||||
.endif
|
||||
ELF .size \name, . - \name
|
||||
FUNC .endfunc
|
||||
.purgem endfunc
|
||||
.endm
|
||||
.text
|
||||
.align \align
|
||||
.if \export
|
||||
.global EXTERN_ASM\name
|
||||
ELF .type EXTERN_ASM\name, %function
|
||||
FUNC .func EXTERN_ASM\name
|
||||
EXTERN_ASM\name:
|
||||
.else
|
||||
ELF .type \name, %function
|
||||
FUNC .func \name
|
||||
\name:
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro const name, align=2, relocate=0
|
||||
.macro endconst
|
||||
ELF .size \name, . - \name
|
||||
.purgem endconst
|
||||
.endm
|
||||
.if HAVE_SECTION_DATA_REL_RO && \relocate
|
||||
.section .data.rel.ro
|
||||
.else
|
||||
.section .rodata
|
||||
.endif
|
||||
.align \align
|
||||
\name:
|
||||
.endm
|
||||
|
||||
#if !HAVE_ARMV6T2_EXTERNAL
|
||||
.macro movw rd, val
|
||||
mov \rd, \val & 255
|
||||
orr \rd, \val & ~255
|
||||
.endm
|
||||
#endif
|
||||
|
||||
.macro mov32 rd, val
|
||||
#if HAVE_ARMV6T2_EXTERNAL
|
||||
movw \rd, #(\val) & 0xffff
|
||||
.if (\val) >> 16
|
||||
movt \rd, #(\val) >> 16
|
||||
.endif
|
||||
#else
|
||||
ldr \rd, =\val
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro put_pic num
|
||||
put_pic_\num
|
||||
.endm
|
||||
|
||||
.macro do_def_pic num, val, label
|
||||
.macro put_pic_\num
|
||||
.if \num
|
||||
.altmacro
|
||||
put_pic %(\num - 1)
|
||||
.noaltmacro
|
||||
.endif
|
||||
\label: .word \val
|
||||
.purgem put_pic_\num
|
||||
.endm
|
||||
.endm
|
||||
|
||||
.macro def_pic val, label
|
||||
.altmacro
|
||||
do_def_pic %.Lpic_idx, \val, \label
|
||||
.noaltmacro
|
||||
.set .Lpic_idx, .Lpic_idx + 1
|
||||
.endm
|
||||
|
||||
.macro ldpic rd, val, indir=0
|
||||
ldr \rd, .Lpicoff\@
|
||||
.Lpic\@:
|
||||
.if \indir
|
||||
A ldr \rd, [pc, \rd]
|
||||
T add \rd, pc
|
||||
T ldr \rd, [\rd]
|
||||
.else
|
||||
add \rd, pc
|
||||
.endif
|
||||
def_pic \val - (.Lpic\@ + (8 >> CONFIG_THUMB)), .Lpicoff\@
|
||||
.endm
|
||||
|
||||
.macro movrel rd, val
|
||||
#if CONFIG_PIC
|
||||
ldpic \rd, \val
|
||||
#elif HAVE_ARMV6T2_EXTERNAL && !defined(__APPLE__)
|
||||
movw \rd, #:lower16:\val
|
||||
movt \rd, #:upper16:\val
|
||||
#else
|
||||
ldr \rd, =\val
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro movrelx rd, val, gp
|
||||
.ifc \rd,\gp
|
||||
.error "movrelx needs two distinct registers"
|
||||
.endif
|
||||
.ifc \rd\()_\gp,r12_
|
||||
.warning "movrelx rd=\rd without explicit set gp"
|
||||
.endif
|
||||
.ifc \rd\()_\gp,ip_
|
||||
.warning "movrelx rd=\rd without explicit set gp"
|
||||
.endif
|
||||
#if CONFIG_PIC && defined(__ELF__)
|
||||
.ifnb \gp
|
||||
.if .Lpic_gp
|
||||
.unreq gp
|
||||
.endif
|
||||
gp .req \gp
|
||||
ldpic gp, _GLOBAL_OFFSET_TABLE_
|
||||
.elseif !.Lpic_gp
|
||||
gp .req r12
|
||||
ldpic gp, _GLOBAL_OFFSET_TABLE_
|
||||
.endif
|
||||
.set .Lpic_gp, 1
|
||||
ldr \rd, .Lpicoff\@
|
||||
ldr \rd, [gp, \rd]
|
||||
def_pic \val(GOT), .Lpicoff\@
|
||||
#elif CONFIG_PIC && defined(__APPLE__)
|
||||
ldpic \rd, .Lpic\@, indir=1
|
||||
.non_lazy_symbol_pointer
|
||||
.Lpic\@:
|
||||
.indirect_symbol \val
|
||||
.word 0
|
||||
.text
|
||||
#else
|
||||
movrel \rd, \val
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro add_sh rd, rn, rm, sh:vararg
|
||||
A add \rd, \rn, \rm, \sh
|
||||
T mov \rm, \rm, \sh
|
||||
T add \rd, \rn, \rm
|
||||
.endm
|
||||
|
||||
.macro ldr_pre rt, rn, rm:vararg
|
||||
A ldr \rt, [\rn, \rm]!
|
||||
T add \rn, \rn, \rm
|
||||
T ldr \rt, [\rn]
|
||||
.endm
|
||||
|
||||
.macro ldr_dpre rt, rn, rm:vararg
|
||||
A ldr \rt, [\rn, -\rm]!
|
||||
T sub \rn, \rn, \rm
|
||||
T ldr \rt, [\rn]
|
||||
.endm
|
||||
|
||||
.macro ldr_nreg rt, rn, rm:vararg
|
||||
A ldr \rt, [\rn, -\rm]
|
||||
T sub \rt, \rn, \rm
|
||||
T ldr \rt, [\rt]
|
||||
.endm
|
||||
|
||||
.macro ldr_post rt, rn, rm:vararg
|
||||
A ldr \rt, [\rn], \rm
|
||||
T ldr \rt, [\rn]
|
||||
T add \rn, \rn, \rm
|
||||
.endm
|
||||
|
||||
.macro ldrc_pre cc, rt, rn, rm:vararg
|
||||
A ldr\cc \rt, [\rn, \rm]!
|
||||
T itt \cc
|
||||
T add\cc \rn, \rn, \rm
|
||||
T ldr\cc \rt, [\rn]
|
||||
.endm
|
||||
|
||||
.macro ldrd_reg rt, rt2, rn, rm
|
||||
A ldrd \rt, \rt2, [\rn, \rm]
|
||||
T add \rt, \rn, \rm
|
||||
T ldrd \rt, \rt2, [\rt]
|
||||
.endm
|
||||
|
||||
.macro ldrd_post rt, rt2, rn, rm
|
||||
A ldrd \rt, \rt2, [\rn], \rm
|
||||
T ldrd \rt, \rt2, [\rn]
|
||||
T add \rn, \rn, \rm
|
||||
.endm
|
||||
|
||||
.macro ldrh_pre rt, rn, rm
|
||||
A ldrh \rt, [\rn, \rm]!
|
||||
T add \rn, \rn, \rm
|
||||
T ldrh \rt, [\rn]
|
||||
.endm
|
||||
|
||||
.macro ldrh_dpre rt, rn, rm
|
||||
A ldrh \rt, [\rn, -\rm]!
|
||||
T sub \rn, \rn, \rm
|
||||
T ldrh \rt, [\rn]
|
||||
.endm
|
||||
|
||||
.macro ldrh_post rt, rn, rm
|
||||
A ldrh \rt, [\rn], \rm
|
||||
T ldrh \rt, [\rn]
|
||||
T add \rn, \rn, \rm
|
||||
.endm
|
||||
|
||||
.macro ldrb_post rt, rn, rm
|
||||
A ldrb \rt, [\rn], \rm
|
||||
T ldrb \rt, [\rn]
|
||||
T add \rn, \rn, \rm
|
||||
.endm
|
||||
|
||||
.macro str_post rt, rn, rm:vararg
|
||||
A str \rt, [\rn], \rm
|
||||
T str \rt, [\rn]
|
||||
T add \rn, \rn, \rm
|
||||
.endm
|
||||
|
||||
.macro strb_post rt, rn, rm:vararg
|
||||
A strb \rt, [\rn], \rm
|
||||
T strb \rt, [\rn]
|
||||
T add \rn, \rn, \rm
|
||||
.endm
|
||||
|
||||
.macro strd_post rt, rt2, rn, rm
|
||||
A strd \rt, \rt2, [\rn], \rm
|
||||
T strd \rt, \rt2, [\rn]
|
||||
T add \rn, \rn, \rm
|
||||
.endm
|
||||
|
||||
.macro strh_pre rt, rn, rm
|
||||
A strh \rt, [\rn, \rm]!
|
||||
T add \rn, \rn, \rm
|
||||
T strh \rt, [\rn]
|
||||
.endm
|
||||
|
||||
.macro strh_dpre rt, rn, rm
|
||||
A strh \rt, [\rn, -\rm]!
|
||||
T sub \rn, \rn, \rm
|
||||
T strh \rt, [\rn]
|
||||
.endm
|
||||
|
||||
.macro strh_post rt, rn, rm
|
||||
A strh \rt, [\rn], \rm
|
||||
T strh \rt, [\rn]
|
||||
T add \rn, \rn, \rm
|
||||
.endm
|
||||
|
||||
.macro strh_dpost rt, rn, rm
|
||||
A strh \rt, [\rn], -\rm
|
||||
T strh \rt, [\rn]
|
||||
T sub \rn, \rn, \rm
|
||||
.endm
|
||||
|
||||
#if HAVE_VFP_ARGS
|
||||
ELF .eabi_attribute 28, 1
|
||||
# define VFP
|
||||
# define NOVFP @
|
||||
#else
|
||||
# define VFP @
|
||||
# define NOVFP
|
||||
#endif
|
||||
|
||||
#define GLUE(a, b) a ## b
|
||||
#define JOIN(a, b) GLUE(a, b)
|
||||
#define X(s) JOIN(EXTERN_ASM, s)
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_ARM_BSWAP_H
|
||||
#define AVUTIL_ARM_BSWAP_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
#include "libavutil/attributes.h"
|
||||
|
||||
#ifdef __ARMCC_VERSION
|
||||
|
||||
#if HAVE_ARMV6
|
||||
#define av_bswap32 av_bswap32
|
||||
static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
|
||||
{
|
||||
return __rev(x);
|
||||
}
|
||||
#endif /* HAVE_ARMV6 */
|
||||
|
||||
#elif HAVE_INLINE_ASM
|
||||
|
||||
#if HAVE_ARMV6_INLINE
|
||||
#define av_bswap16 av_bswap16
|
||||
static av_always_inline av_const unsigned av_bswap16(unsigned x)
|
||||
{
|
||||
__asm__("rev16 %0, %0" : "+r"(x));
|
||||
return x;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if AV_GCC_VERSION_AT_MOST(4,4)
|
||||
#define av_bswap32 av_bswap32
|
||||
static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
|
||||
{
|
||||
#if HAVE_ARMV6_INLINE
|
||||
__asm__("rev %0, %0" : "+r"(x));
|
||||
#else
|
||||
uint32_t t;
|
||||
__asm__ ("eor %1, %0, %0, ror #16 \n\t"
|
||||
"bic %1, %1, #0xFF0000 \n\t"
|
||||
"mov %0, %0, ror #8 \n\t"
|
||||
"eor %0, %0, %1, lsr #8 \n\t"
|
||||
: "+r"(x), "=&r"(t));
|
||||
#endif /* HAVE_ARMV6_INLINE */
|
||||
return x;
|
||||
}
|
||||
#endif /* AV_GCC_VERSION_AT_MOST(4,4) */
|
||||
|
||||
#endif /* __ARMCC_VERSION */
|
||||
|
||||
#endif /* AVUTIL_ARM_BSWAP_H */
|
|
@ -0,0 +1,170 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/cpu_internal.h"
|
||||
#include "config.h"
|
||||
|
||||
#define CORE_FLAG(f) \
|
||||
(AV_CPU_FLAG_ ## f * (HAVE_ ## f ## _EXTERNAL || HAVE_ ## f ## _INLINE))
|
||||
|
||||
#define CORE_CPU_FLAGS \
|
||||
(CORE_FLAG(ARMV5TE) | \
|
||||
CORE_FLAG(ARMV6) | \
|
||||
CORE_FLAG(ARMV6T2) | \
|
||||
CORE_FLAG(VFP) | \
|
||||
CORE_FLAG(VFPV3) | \
|
||||
CORE_FLAG(NEON))
|
||||
|
||||
#if defined __linux__ || defined __ANDROID__
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "libavutil/avstring.h"
|
||||
|
||||
#define AT_HWCAP 16
|
||||
|
||||
/* Relevant HWCAP values from kernel headers */
|
||||
#define HWCAP_VFP (1 << 6)
|
||||
#define HWCAP_EDSP (1 << 7)
|
||||
#define HWCAP_THUMBEE (1 << 11)
|
||||
#define HWCAP_NEON (1 << 12)
|
||||
#define HWCAP_VFPv3 (1 << 13)
|
||||
#define HWCAP_TLS (1 << 15)
|
||||
|
||||
static int get_hwcap(uint32_t *hwcap)
|
||||
{
|
||||
struct { uint32_t a_type; uint32_t a_val; } auxv;
|
||||
FILE *f = fopen("/proc/self/auxv", "r");
|
||||
int err = -1;
|
||||
|
||||
if (!f)
|
||||
return -1;
|
||||
|
||||
while (fread(&auxv, sizeof(auxv), 1, f) > 0) {
|
||||
if (auxv.a_type == AT_HWCAP) {
|
||||
*hwcap = auxv.a_val;
|
||||
err = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int get_cpuinfo(uint32_t *hwcap)
|
||||
{
|
||||
FILE *f = fopen("/proc/cpuinfo", "r");
|
||||
char buf[200];
|
||||
|
||||
if (!f)
|
||||
return -1;
|
||||
|
||||
*hwcap = 0;
|
||||
while (fgets(buf, sizeof(buf), f)) {
|
||||
if (av_strstart(buf, "Features", NULL)) {
|
||||
if (strstr(buf, " edsp "))
|
||||
*hwcap |= HWCAP_EDSP;
|
||||
if (strstr(buf, " tls "))
|
||||
*hwcap |= HWCAP_TLS;
|
||||
if (strstr(buf, " thumbee "))
|
||||
*hwcap |= HWCAP_THUMBEE;
|
||||
if (strstr(buf, " vfp "))
|
||||
*hwcap |= HWCAP_VFP;
|
||||
if (strstr(buf, " vfpv3 "))
|
||||
*hwcap |= HWCAP_VFPv3;
|
||||
if (strstr(buf, " neon ") || strstr(buf, " asimd "))
|
||||
*hwcap |= HWCAP_NEON;
|
||||
if (strstr(buf, " fp ")) // Listed on 64 bit ARMv8 kernels
|
||||
*hwcap |= HWCAP_VFP | HWCAP_VFPv3;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ff_get_cpu_flags_arm(void)
|
||||
{
|
||||
int flags = CORE_CPU_FLAGS;
|
||||
uint32_t hwcap;
|
||||
|
||||
if (get_hwcap(&hwcap) < 0)
|
||||
if (get_cpuinfo(&hwcap) < 0)
|
||||
return flags;
|
||||
|
||||
#define check_cap(cap, flag) do { \
|
||||
if (hwcap & HWCAP_ ## cap) \
|
||||
flags |= AV_CPU_FLAG_ ## flag; \
|
||||
} while (0)
|
||||
|
||||
/* No flags explicitly indicate v6 or v6T2 so check others which
|
||||
imply support. */
|
||||
check_cap(EDSP, ARMV5TE);
|
||||
check_cap(TLS, ARMV6);
|
||||
check_cap(THUMBEE, ARMV6T2);
|
||||
check_cap(VFP, VFP);
|
||||
check_cap(VFPv3, VFPV3);
|
||||
check_cap(NEON, NEON);
|
||||
|
||||
/* The v6 checks above are not reliable so let higher flags
|
||||
trickle down. */
|
||||
if (flags & (AV_CPU_FLAG_VFPV3 | AV_CPU_FLAG_NEON))
|
||||
flags |= AV_CPU_FLAG_ARMV6T2;
|
||||
else if (flags & (AV_CPU_FLAG_ARMV6T2 | AV_CPU_FLAG_ARMV6))
|
||||
/* Some functions use the 'setend' instruction which is deprecated on ARMv8
|
||||
* and serializing on some ARMv7 cores. This ensures such functions
|
||||
* are only enabled on ARMv6. */
|
||||
flags |= AV_CPU_FLAG_SETEND;
|
||||
|
||||
if (flags & AV_CPU_FLAG_ARMV6T2)
|
||||
flags |= AV_CPU_FLAG_ARMV6;
|
||||
|
||||
/* set the virtual VFPv2 vector mode flag */
|
||||
if ((flags & AV_CPU_FLAG_VFP) && !(flags & (AV_CPU_FLAG_VFPV3 | AV_CPU_FLAG_NEON)))
|
||||
flags |= AV_CPU_FLAG_VFP_VM;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int ff_get_cpu_flags_arm(void)
|
||||
{
|
||||
return AV_CPU_FLAG_ARMV5TE * HAVE_ARMV5TE |
|
||||
AV_CPU_FLAG_ARMV6 * HAVE_ARMV6 |
|
||||
AV_CPU_FLAG_ARMV6T2 * HAVE_ARMV6T2 |
|
||||
AV_CPU_FLAG_VFP * HAVE_VFP |
|
||||
AV_CPU_FLAG_VFPV3 * HAVE_VFPV3 |
|
||||
AV_CPU_FLAG_NEON * HAVE_NEON |
|
||||
AV_CPU_FLAG_SETEND * !(HAVE_NEON | HAVE_VFPV3);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
size_t ff_get_cpu_max_align_arm(void)
|
||||
{
|
||||
int flags = av_get_cpu_flags();
|
||||
|
||||
if (flags & AV_CPU_FLAG_NEON)
|
||||
return 16;
|
||||
|
||||
return 8;
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_ARM_CPU_H
|
||||
#define AVUTIL_ARM_CPU_H
|
||||
|
||||
#include "config.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/cpu_internal.h"
|
||||
|
||||
#define have_armv5te(flags) CPUEXT(flags, ARMV5TE)
|
||||
#define have_armv6(flags) CPUEXT(flags, ARMV6)
|
||||
#define have_armv6t2(flags) CPUEXT(flags, ARMV6T2)
|
||||
#define have_vfp(flags) CPUEXT(flags, VFP)
|
||||
#define have_vfpv3(flags) CPUEXT(flags, VFPV3)
|
||||
#define have_neon(flags) CPUEXT(flags, NEON)
|
||||
#define have_setend(flags) CPUEXT(flags, SETEND)
|
||||
|
||||
/* some functions use the VFPv2 vector mode which is deprecated in ARMv7-A
|
||||
* and might trap on such CPU depending on the OS configuration */
|
||||
#define have_vfp_vm(flags) \
|
||||
(HAVE_VFP && ((flags) & AV_CPU_FLAG_VFP_VM))
|
||||
|
||||
#endif /* AVUTIL_ARM_CPU_H */
|
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_ARM_FLOAT_DSP_ARM_H
|
||||
#define AVUTIL_ARM_FLOAT_DSP_ARM_H
|
||||
|
||||
#include "libavutil/float_dsp.h"
|
||||
|
||||
void ff_float_dsp_init_vfp(AVFloatDSPContext *fdsp, int cpu_flags);
|
||||
void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp);
|
||||
|
||||
#endif /* AVUTIL_ARM_FLOAT_DSP_ARM_H */
|
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/float_dsp.h"
|
||||
#include "cpu.h"
|
||||
#include "float_dsp_arm.h"
|
||||
|
||||
av_cold void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (have_vfp(cpu_flags))
|
||||
ff_float_dsp_init_vfp(fdsp, cpu_flags);
|
||||
if (have_neon(cpu_flags))
|
||||
ff_float_dsp_init_neon(fdsp);
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* ARM NEON optimised Float DSP functions
|
||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/float_dsp.h"
|
||||
#include "float_dsp_arm.h"
|
||||
|
||||
void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len);
|
||||
|
||||
void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
|
||||
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
|
||||
void ff_vector_fmul_window_neon(float *dst, const float *src0,
|
||||
const float *src1, const float *win, int len);
|
||||
|
||||
void ff_vector_fmul_add_neon(float *dst, const float *src0, const float *src1,
|
||||
const float *src2, int len);
|
||||
|
||||
void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
|
||||
const float *src1, int len);
|
||||
|
||||
void ff_butterflies_float_neon(float *v1, float *v2, int len);
|
||||
|
||||
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
|
||||
|
||||
av_cold void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
|
||||
{
|
||||
fdsp->vector_fmul = ff_vector_fmul_neon;
|
||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon;
|
||||
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_neon;
|
||||
fdsp->vector_fmul_window = ff_vector_fmul_window_neon;
|
||||
fdsp->vector_fmul_add = ff_vector_fmul_add_neon;
|
||||
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_neon;
|
||||
fdsp->butterflies_float = ff_butterflies_float_neon;
|
||||
fdsp->scalarproduct_float = ff_scalarproduct_float_neon;
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/float_dsp.h"
|
||||
#include "cpu.h"
|
||||
#include "float_dsp_arm.h"
|
||||
|
||||
void ff_vector_fmul_vfp(float *dst, const float *src0, const float *src1,
|
||||
int len);
|
||||
|
||||
void ff_vector_fmul_window_vfp(float *dst, const float *src0,
|
||||
const float *src1, const float *win, int len);
|
||||
|
||||
void ff_vector_fmul_reverse_vfp(float *dst, const float *src0,
|
||||
const float *src1, int len);
|
||||
|
||||
void ff_butterflies_float_vfp(float *av_restrict v1, float *av_restrict v2, int len);
|
||||
|
||||
av_cold void ff_float_dsp_init_vfp(AVFloatDSPContext *fdsp, int cpu_flags)
|
||||
{
|
||||
if (have_vfp_vm(cpu_flags)) {
|
||||
fdsp->vector_fmul = ff_vector_fmul_vfp;
|
||||
fdsp->vector_fmul_window = ff_vector_fmul_window_vfp;
|
||||
}
|
||||
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_vfp;
|
||||
if (have_vfp_vm(cpu_flags))
|
||||
fdsp->butterflies_float = ff_butterflies_float_vfp;
|
||||
}
|
|
@ -0,0 +1,271 @@
|
|||
/*
|
||||
* ARM NEON optimised Float DSP functions
|
||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "asm.S"
|
||||
|
||||
function ff_vector_fmul_neon, export=1
|
||||
subs r3, r3, #8
|
||||
vld1.32 {d0-d3}, [r1,:128]!
|
||||
vld1.32 {d4-d7}, [r2,:128]!
|
||||
vmul.f32 q8, q0, q2
|
||||
vmul.f32 q9, q1, q3
|
||||
beq 3f
|
||||
bics ip, r3, #15
|
||||
beq 2f
|
||||
1: subs ip, ip, #16
|
||||
vld1.32 {d0-d1}, [r1,:128]!
|
||||
vld1.32 {d4-d5}, [r2,:128]!
|
||||
vmul.f32 q10, q0, q2
|
||||
vld1.32 {d2-d3}, [r1,:128]!
|
||||
vld1.32 {d6-d7}, [r2,:128]!
|
||||
vmul.f32 q11, q1, q3
|
||||
vst1.32 {d16-d19},[r0,:128]!
|
||||
vld1.32 {d0-d1}, [r1,:128]!
|
||||
vld1.32 {d4-d5}, [r2,:128]!
|
||||
vmul.f32 q8, q0, q2
|
||||
vld1.32 {d2-d3}, [r1,:128]!
|
||||
vld1.32 {d6-d7}, [r2,:128]!
|
||||
vmul.f32 q9, q1, q3
|
||||
vst1.32 {d20-d23},[r0,:128]!
|
||||
bne 1b
|
||||
ands r3, r3, #15
|
||||
beq 3f
|
||||
2: vld1.32 {d0-d1}, [r1,:128]!
|
||||
vld1.32 {d4-d5}, [r2,:128]!
|
||||
vst1.32 {d16-d17},[r0,:128]!
|
||||
vmul.f32 q8, q0, q2
|
||||
vld1.32 {d2-d3}, [r1,:128]!
|
||||
vld1.32 {d6-d7}, [r2,:128]!
|
||||
vst1.32 {d18-d19},[r0,:128]!
|
||||
vmul.f32 q9, q1, q3
|
||||
3: vst1.32 {d16-d19},[r0,:128]!
|
||||
bx lr
|
||||
endfunc
|
||||
|
||||
function ff_vector_fmac_scalar_neon, export=1
|
||||
VFP len .req r2
|
||||
VFP acc .req r3
|
||||
NOVFP len .req r3
|
||||
NOVFP acc .req r2
|
||||
VFP vdup.32 q15, d0[0]
|
||||
NOVFP vdup.32 q15, r2
|
||||
bics r12, len, #15
|
||||
mov acc, r0
|
||||
beq 3f
|
||||
vld1.32 {q0}, [r1,:128]!
|
||||
vld1.32 {q8}, [acc,:128]!
|
||||
vld1.32 {q1}, [r1,:128]!
|
||||
vld1.32 {q9}, [acc,:128]!
|
||||
1: vmla.f32 q8, q0, q15
|
||||
vld1.32 {q2}, [r1,:128]!
|
||||
vld1.32 {q10}, [acc,:128]!
|
||||
vmla.f32 q9, q1, q15
|
||||
vld1.32 {q3}, [r1,:128]!
|
||||
vld1.32 {q11}, [acc,:128]!
|
||||
vmla.f32 q10, q2, q15
|
||||
vst1.32 {q8}, [r0,:128]!
|
||||
vmla.f32 q11, q3, q15
|
||||
vst1.32 {q9}, [r0,:128]!
|
||||
subs r12, r12, #16
|
||||
beq 2f
|
||||
vld1.32 {q0}, [r1,:128]!
|
||||
vld1.32 {q8}, [acc,:128]!
|
||||
vst1.32 {q10}, [r0,:128]!
|
||||
vld1.32 {q1}, [r1,:128]!
|
||||
vld1.32 {q9}, [acc,:128]!
|
||||
vst1.32 {q11}, [r0,:128]!
|
||||
b 1b
|
||||
2: vst1.32 {q10}, [r0,:128]!
|
||||
vst1.32 {q11}, [r0,:128]!
|
||||
ands len, len, #15
|
||||
it eq
|
||||
bxeq lr
|
||||
3: vld1.32 {q0}, [r1,:128]!
|
||||
vld1.32 {q8}, [acc,:128]!
|
||||
vmla.f32 q8, q0, q15
|
||||
vst1.32 {q8}, [r0,:128]!
|
||||
subs len, len, #4
|
||||
bgt 3b
|
||||
bx lr
|
||||
.unreq len
|
||||
endfunc
|
||||
|
||||
function ff_vector_fmul_scalar_neon, export=1
|
||||
VFP len .req r2
|
||||
NOVFP len .req r3
|
||||
VFP vdup.32 q8, d0[0]
|
||||
NOVFP vdup.32 q8, r2
|
||||
bics r12, len, #15
|
||||
beq 3f
|
||||
vld1.32 {q0},[r1,:128]!
|
||||
vld1.32 {q1},[r1,:128]!
|
||||
1: vmul.f32 q0, q0, q8
|
||||
vld1.32 {q2},[r1,:128]!
|
||||
vmul.f32 q1, q1, q8
|
||||
vld1.32 {q3},[r1,:128]!
|
||||
vmul.f32 q2, q2, q8
|
||||
vst1.32 {q0},[r0,:128]!
|
||||
vmul.f32 q3, q3, q8
|
||||
vst1.32 {q1},[r0,:128]!
|
||||
subs r12, r12, #16
|
||||
beq 2f
|
||||
vld1.32 {q0},[r1,:128]!
|
||||
vst1.32 {q2},[r0,:128]!
|
||||
vld1.32 {q1},[r1,:128]!
|
||||
vst1.32 {q3},[r0,:128]!
|
||||
b 1b
|
||||
2: vst1.32 {q2},[r0,:128]!
|
||||
vst1.32 {q3},[r0,:128]!
|
||||
ands len, len, #15
|
||||
it eq
|
||||
bxeq lr
|
||||
3: vld1.32 {q0},[r1,:128]!
|
||||
vmul.f32 q0, q0, q8
|
||||
vst1.32 {q0},[r0,:128]!
|
||||
subs len, len, #4
|
||||
bgt 3b
|
||||
bx lr
|
||||
.unreq len
|
||||
endfunc
|
||||
|
||||
function ff_vector_fmul_window_neon, export=1
|
||||
push {r4,r5,lr}
|
||||
ldr lr, [sp, #12]
|
||||
sub r2, r2, #8
|
||||
sub r5, lr, #2
|
||||
add r2, r2, r5, lsl #2
|
||||
add r4, r3, r5, lsl #3
|
||||
add ip, r0, r5, lsl #3
|
||||
mov r5, #-16
|
||||
vld1.32 {d0,d1}, [r1,:128]!
|
||||
vld1.32 {d2,d3}, [r2,:128], r5
|
||||
vld1.32 {d4,d5}, [r3,:128]!
|
||||
vld1.32 {d6,d7}, [r4,:128], r5
|
||||
1: subs lr, lr, #4
|
||||
vmul.f32 d22, d0, d4
|
||||
vrev64.32 q3, q3
|
||||
vmul.f32 d23, d1, d5
|
||||
vrev64.32 q1, q1
|
||||
vmul.f32 d20, d0, d7
|
||||
vmul.f32 d21, d1, d6
|
||||
beq 2f
|
||||
vmla.f32 d22, d3, d7
|
||||
vld1.32 {d0,d1}, [r1,:128]!
|
||||
vmla.f32 d23, d2, d6
|
||||
vld1.32 {d18,d19},[r2,:128], r5
|
||||
vmls.f32 d20, d3, d4
|
||||
vld1.32 {d24,d25},[r3,:128]!
|
||||
vmls.f32 d21, d2, d5
|
||||
vld1.32 {d6,d7}, [r4,:128], r5
|
||||
vmov q1, q9
|
||||
vrev64.32 q11, q11
|
||||
vmov q2, q12
|
||||
vswp d22, d23
|
||||
vst1.32 {d20,d21},[r0,:128]!
|
||||
vst1.32 {d22,d23},[ip,:128], r5
|
||||
b 1b
|
||||
2: vmla.f32 d22, d3, d7
|
||||
vmla.f32 d23, d2, d6
|
||||
vmls.f32 d20, d3, d4
|
||||
vmls.f32 d21, d2, d5
|
||||
vrev64.32 q11, q11
|
||||
vswp d22, d23
|
||||
vst1.32 {d20,d21},[r0,:128]!
|
||||
vst1.32 {d22,d23},[ip,:128], r5
|
||||
pop {r4,r5,pc}
|
||||
endfunc
|
||||
|
||||
function ff_vector_fmul_add_neon, export=1
|
||||
ldr r12, [sp]
|
||||
vld1.32 {q0-q1}, [r1,:128]!
|
||||
vld1.32 {q8-q9}, [r2,:128]!
|
||||
vld1.32 {q2-q3}, [r3,:128]!
|
||||
vmul.f32 q10, q0, q8
|
||||
vmul.f32 q11, q1, q9
|
||||
1: vadd.f32 q12, q2, q10
|
||||
vadd.f32 q13, q3, q11
|
||||
pld [r1, #16]
|
||||
pld [r2, #16]
|
||||
pld [r3, #16]
|
||||
subs r12, r12, #8
|
||||
beq 2f
|
||||
vld1.32 {q0}, [r1,:128]!
|
||||
vld1.32 {q8}, [r2,:128]!
|
||||
vmul.f32 q10, q0, q8
|
||||
vld1.32 {q1}, [r1,:128]!
|
||||
vld1.32 {q9}, [r2,:128]!
|
||||
vmul.f32 q11, q1, q9
|
||||
vld1.32 {q2-q3}, [r3,:128]!
|
||||
vst1.32 {q12-q13},[r0,:128]!
|
||||
b 1b
|
||||
2: vst1.32 {q12-q13},[r0,:128]!
|
||||
bx lr
|
||||
endfunc
|
||||
|
||||
function ff_vector_fmul_reverse_neon, export=1
|
||||
add r2, r2, r3, lsl #2
|
||||
sub r2, r2, #32
|
||||
mov r12, #-32
|
||||
vld1.32 {q0-q1}, [r1,:128]!
|
||||
vld1.32 {q2-q3}, [r2,:128], r12
|
||||
1: pld [r1, #32]
|
||||
vrev64.32 q3, q3
|
||||
vmul.f32 d16, d0, d7
|
||||
vmul.f32 d17, d1, d6
|
||||
pld [r2, #-32]
|
||||
vrev64.32 q2, q2
|
||||
vmul.f32 d18, d2, d5
|
||||
vmul.f32 d19, d3, d4
|
||||
subs r3, r3, #8
|
||||
beq 2f
|
||||
vld1.32 {q0-q1}, [r1,:128]!
|
||||
vld1.32 {q2-q3}, [r2,:128], r12
|
||||
vst1.32 {q8-q9}, [r0,:128]!
|
||||
b 1b
|
||||
2: vst1.32 {q8-q9}, [r0,:128]!
|
||||
bx lr
|
||||
endfunc
|
||||
|
||||
function ff_butterflies_float_neon, export=1
|
||||
1: vld1.32 {q0},[r0,:128]
|
||||
vld1.32 {q1},[r1,:128]
|
||||
vsub.f32 q2, q0, q1
|
||||
vadd.f32 q1, q0, q1
|
||||
vst1.32 {q2},[r1,:128]!
|
||||
vst1.32 {q1},[r0,:128]!
|
||||
subs r2, r2, #4
|
||||
bgt 1b
|
||||
bx lr
|
||||
endfunc
|
||||
|
||||
function ff_scalarproduct_float_neon, export=1
|
||||
vmov.f32 q2, #0.0
|
||||
1: vld1.32 {q0},[r0,:128]!
|
||||
vld1.32 {q1},[r1,:128]!
|
||||
vmla.f32 q2, q0, q1
|
||||
subs r2, r2, #4
|
||||
bgt 1b
|
||||
vadd.f32 d0, d4, d5
|
||||
vpadd.f32 d0, d0, d0
|
||||
NOVFP vmov.32 r0, d0[0]
|
||||
bx lr
|
||||
endfunc
|
|
@ -0,0 +1,457 @@
|
|||
/*
|
||||
* Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
|
||||
*
|
||||
* This file is part of FFmpeg
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "asm.S"
|
||||
|
||||
/**
|
||||
* Assume that len is a positive number and is multiple of 8
|
||||
*/
|
||||
@ void ff_vector_fmul_vfp(float *dst, const float *src0, const float *src1, int len)
|
||||
function ff_vector_fmul_vfp, export=1
|
||||
vpush {d8-d15}
|
||||
fmrx r12, fpscr
|
||||
orr r12, r12, #(3 << 16) /* set vector size to 4 */
|
||||
fmxr fpscr, r12
|
||||
|
||||
vldmia r1!, {s0-s3}
|
||||
vldmia r2!, {s8-s11}
|
||||
vldmia r1!, {s4-s7}
|
||||
vldmia r2!, {s12-s15}
|
||||
vmul.f32 s8, s0, s8
|
||||
1:
|
||||
subs r3, r3, #16
|
||||
vmul.f32 s12, s4, s12
|
||||
itttt ge
|
||||
vldmiage r1!, {s16-s19}
|
||||
vldmiage r2!, {s24-s27}
|
||||
vldmiage r1!, {s20-s23}
|
||||
vldmiage r2!, {s28-s31}
|
||||
it ge
|
||||
vmulge.f32 s24, s16, s24
|
||||
vstmia r0!, {s8-s11}
|
||||
vstmia r0!, {s12-s15}
|
||||
it ge
|
||||
vmulge.f32 s28, s20, s28
|
||||
itttt gt
|
||||
vldmiagt r1!, {s0-s3}
|
||||
vldmiagt r2!, {s8-s11}
|
||||
vldmiagt r1!, {s4-s7}
|
||||
vldmiagt r2!, {s12-s15}
|
||||
ittt ge
|
||||
vmulge.f32 s8, s0, s8
|
||||
vstmiage r0!, {s24-s27}
|
||||
vstmiage r0!, {s28-s31}
|
||||
bgt 1b
|
||||
|
||||
bic r12, r12, #(7 << 16) /* set vector size back to 1 */
|
||||
fmxr fpscr, r12
|
||||
vpop {d8-d15}
|
||||
bx lr
|
||||
endfunc
|
||||
|
||||
/**
|
||||
* ARM VFP implementation of 'vector_fmul_window_c' function
|
||||
* Assume that len is a positive non-zero number
|
||||
*/
|
||||
@ void ff_vector_fmul_window_vfp(float *dst, const float *src0,
|
||||
@ const float *src1, const float *win, int len)
|
||||
function ff_vector_fmul_window_vfp, export=1
|
||||
DST0 .req a1
|
||||
SRC0 .req a2
|
||||
SRC1 .req a3
|
||||
WIN0 .req a4
|
||||
LEN .req v1
|
||||
DST1 .req v2
|
||||
WIN1 .req v3
|
||||
OLDFPSCR .req ip
|
||||
|
||||
push {v1-v3,lr}
|
||||
ldr LEN, [sp, #4*4+0]
|
||||
vpush {s16-s31}
|
||||
fmrx OLDFPSCR, FPSCR
|
||||
add DST1, DST0, LEN, lsl #3
|
||||
add SRC1, SRC1, LEN, lsl #2
|
||||
add WIN1, WIN0, LEN, lsl #3
|
||||
|
||||
tst LEN, #7
|
||||
beq 4f @ common case: len is a multiple of 8
|
||||
|
||||
ldr lr, =0x03000000 @ RunFast mode, scalar mode
|
||||
fmxr FPSCR, lr
|
||||
|
||||
tst LEN, #1
|
||||
beq 1f
|
||||
vldmdb WIN1!, {s0}
|
||||
vldmia SRC0!, {s8}
|
||||
vldmia WIN0!, {s16}
|
||||
vmul.f s24, s0, s8
|
||||
vldmdb SRC1!, {s20}
|
||||
vmul.f s8, s16, s8
|
||||
vmls.f s24, s16, s20
|
||||
vmla.f s8, s0, s20
|
||||
vstmia DST0!, {s24}
|
||||
vstmdb DST1!, {s8}
|
||||
1:
|
||||
tst LEN, #2
|
||||
beq 2f
|
||||
vldmdb WIN1!, {s0}
|
||||
vldmdb WIN1!, {s1}
|
||||
vldmia SRC0!, {s8-s9}
|
||||
vldmia WIN0!, {s16-s17}
|
||||
vmul.f s24, s0, s8
|
||||
vmul.f s25, s1, s9
|
||||
vldmdb SRC1!, {s20}
|
||||
vldmdb SRC1!, {s21}
|
||||
vmul.f s8, s16, s8
|
||||
vmul.f s9, s17, s9
|
||||
vmls.f s24, s16, s20
|
||||
vmls.f s25, s17, s21
|
||||
vmla.f s8, s0, s20
|
||||
vmla.f s9, s1, s21
|
||||
vstmia DST0!, {s24-s25}
|
||||
vstmdb DST1!, {s8}
|
||||
vstmdb DST1!, {s9}
|
||||
2:
|
||||
tst LEN, #4
|
||||
beq 3f
|
||||
vldmdb WIN1!, {s0}
|
||||
vldmdb WIN1!, {s1}
|
||||
vldmdb WIN1!, {s2}
|
||||
vldmdb WIN1!, {s3}
|
||||
vldmia SRC0!, {s8-s11}
|
||||
vldmia WIN0!, {s16-s19}
|
||||
vmul.f s24, s0, s8
|
||||
vmul.f s25, s1, s9
|
||||
vmul.f s26, s2, s10
|
||||
vmul.f s27, s3, s11
|
||||
vldmdb SRC1!, {s20}
|
||||
vldmdb SRC1!, {s21}
|
||||
vldmdb SRC1!, {s22}
|
||||
vldmdb SRC1!, {s23}
|
||||
vmul.f s8, s16, s8
|
||||
vmul.f s9, s17, s9
|
||||
vmul.f s10, s18, s10
|
||||
vmul.f s11, s19, s11
|
||||
vmls.f s24, s16, s20
|
||||
vmls.f s25, s17, s21
|
||||
vmls.f s26, s18, s22
|
||||
vmls.f s27, s19, s23
|
||||
vmla.f s8, s0, s20
|
||||
vmla.f s9, s1, s21
|
||||
vmla.f s10, s2, s22
|
||||
vmla.f s11, s3, s23
|
||||
vstmia DST0!, {s24-s27}
|
||||
vstmdb DST1!, {s8}
|
||||
vstmdb DST1!, {s9}
|
||||
vstmdb DST1!, {s10}
|
||||
vstmdb DST1!, {s11}
|
||||
3:
|
||||
bics LEN, LEN, #7
|
||||
beq 7f
|
||||
4:
|
||||
ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
|
||||
fmxr FPSCR, lr
|
||||
|
||||
vldmdb WIN1!, {s0}
|
||||
vldmdb WIN1!, {s1}
|
||||
vldmdb WIN1!, {s2}
|
||||
vldmdb WIN1!, {s3}
|
||||
vldmia SRC0!, {s8-s11}
|
||||
vldmia WIN0!, {s16-s19}
|
||||
vmul.f s24, s0, s8 @ vector * vector
|
||||
vldmdb SRC1!, {s20}
|
||||
vldmdb SRC1!, {s21}
|
||||
vldmdb SRC1!, {s22}
|
||||
vldmdb SRC1!, {s23}
|
||||
vmul.f s8, s16, s8 @ vector * vector
|
||||
vmls.f s24, s16, s20 @ vector * vector
|
||||
vldmdb WIN1!, {s4}
|
||||
vldmdb WIN1!, {s5}
|
||||
vldmdb WIN1!, {s6}
|
||||
vldmdb WIN1!, {s7}
|
||||
vldmia SRC0!, {s12-s13}
|
||||
vmla.f s8, s0, s20 @ vector * vector
|
||||
vldmia SRC0!, {s14-s15}
|
||||
subs LEN, LEN, #8
|
||||
beq 6f
|
||||
5: vldmia WIN0!, {s20-s23}
|
||||
vmul.f s28, s4, s12 @ vector * vector
|
||||
vstmia DST0!, {s24-s25}
|
||||
vldmdb SRC1!, {s16}
|
||||
vldmdb SRC1!, {s17}
|
||||
vldmdb SRC1!, {s18}
|
||||
vldmdb SRC1!, {s19}
|
||||
vmul.f s12, s20, s12 @ vector * vector
|
||||
vstmia DST0!, {s26-s27}
|
||||
vstmdb DST1!, {s8}
|
||||
vstmdb DST1!, {s9}
|
||||
vstmdb DST1!, {s10}
|
||||
vstmdb DST1!, {s11}
|
||||
vmls.f s28, s20, s16 @ vector * vector
|
||||
vldmdb WIN1!, {s0}
|
||||
vldmdb WIN1!, {s1}
|
||||
vldmdb WIN1!, {s2}
|
||||
vldmdb WIN1!, {s3}
|
||||
vldmia SRC0!, {s8-s9}
|
||||
vmla.f s12, s4, s16 @ vector * vector
|
||||
vldmia SRC0!, {s10-s11}
|
||||
subs LEN, LEN, #8
|
||||
vldmia WIN0!, {s16-s19}
|
||||
vmul.f s24, s0, s8 @ vector * vector
|
||||
vstmia DST0!, {s28-s29}
|
||||
vldmdb SRC1!, {s20}
|
||||
vldmdb SRC1!, {s21}
|
||||
vldmdb SRC1!, {s22}
|
||||
vldmdb SRC1!, {s23}
|
||||
vmul.f s8, s16, s8 @ vector * vector
|
||||
vstmia DST0!, {s30-s31}
|
||||
vstmdb DST1!, {s12}
|
||||
vstmdb DST1!, {s13}
|
||||
vstmdb DST1!, {s14}
|
||||
vstmdb DST1!, {s15}
|
||||
vmls.f s24, s16, s20 @ vector * vector
|
||||
vldmdb WIN1!, {s4}
|
||||
vldmdb WIN1!, {s5}
|
||||
vldmdb WIN1!, {s6}
|
||||
vldmdb WIN1!, {s7}
|
||||
vldmia SRC0!, {s12-s13}
|
||||
vmla.f s8, s0, s20 @ vector * vector
|
||||
vldmia SRC0!, {s14-s15}
|
||||
bne 5b
|
||||
6: vldmia WIN0!, {s20-s23}
|
||||
vmul.f s28, s4, s12 @ vector * vector
|
||||
vstmia DST0!, {s24-s25}
|
||||
vldmdb SRC1!, {s16}
|
||||
vldmdb SRC1!, {s17}
|
||||
vldmdb SRC1!, {s18}
|
||||
vldmdb SRC1!, {s19}
|
||||
vmul.f s12, s20, s12 @ vector * vector
|
||||
vstmia DST0!, {s26-s27}
|
||||
vstmdb DST1!, {s8}
|
||||
vstmdb DST1!, {s9}
|
||||
vstmdb DST1!, {s10}
|
||||
vstmdb DST1!, {s11}
|
||||
vmls.f s28, s20, s16 @ vector * vector
|
||||
vmla.f s12, s4, s16 @ vector * vector
|
||||
vstmia DST0!, {s28-s31}
|
||||
vstmdb DST1!, {s12}
|
||||
vstmdb DST1!, {s13}
|
||||
vstmdb DST1!, {s14}
|
||||
vstmdb DST1!, {s15}
|
||||
7:
|
||||
fmxr FPSCR, OLDFPSCR
|
||||
vpop {s16-s31}
|
||||
pop {v1-v3,pc}
|
||||
|
||||
.unreq DST0
|
||||
.unreq SRC0
|
||||
.unreq SRC1
|
||||
.unreq WIN0
|
||||
.unreq LEN
|
||||
.unreq OLDFPSCR
|
||||
.unreq DST1
|
||||
.unreq WIN1
|
||||
endfunc
|
||||
|
||||
/**
|
||||
* ARM VFP optimized implementation of 'vector_fmul_reverse_c' function.
|
||||
* Assume that len is a positive number and is multiple of 8
|
||||
*/
|
||||
@ void ff_vector_fmul_reverse_vfp(float *dst, const float *src0,
|
||||
@ const float *src1, int len)
|
||||
function ff_vector_fmul_reverse_vfp, export=1
|
||||
vpush {d8-d15}
|
||||
add r2, r2, r3, lsl #2
|
||||
vldmdb r2!, {s0-s3}
|
||||
vldmia r1!, {s8-s11}
|
||||
vldmdb r2!, {s4-s7}
|
||||
vldmia r1!, {s12-s15}
|
||||
vmul.f32 s8, s3, s8
|
||||
vmul.f32 s9, s2, s9
|
||||
vmul.f32 s10, s1, s10
|
||||
vmul.f32 s11, s0, s11
|
||||
1:
|
||||
subs r3, r3, #16
|
||||
it ge
|
||||
vldmdbge r2!, {s16-s19}
|
||||
vmul.f32 s12, s7, s12
|
||||
it ge
|
||||
vldmiage r1!, {s24-s27}
|
||||
vmul.f32 s13, s6, s13
|
||||
it ge
|
||||
vldmdbge r2!, {s20-s23}
|
||||
vmul.f32 s14, s5, s14
|
||||
it ge
|
||||
vldmiage r1!, {s28-s31}
|
||||
vmul.f32 s15, s4, s15
|
||||
it ge
|
||||
vmulge.f32 s24, s19, s24
|
||||
it gt
|
||||
vldmdbgt r2!, {s0-s3}
|
||||
it ge
|
||||
vmulge.f32 s25, s18, s25
|
||||
vstmia r0!, {s8-s13}
|
||||
it ge
|
||||
vmulge.f32 s26, s17, s26
|
||||
it gt
|
||||
vldmiagt r1!, {s8-s11}
|
||||
itt ge
|
||||
vmulge.f32 s27, s16, s27
|
||||
vmulge.f32 s28, s23, s28
|
||||
it gt
|
||||
vldmdbgt r2!, {s4-s7}
|
||||
it ge
|
||||
vmulge.f32 s29, s22, s29
|
||||
vstmia r0!, {s14-s15}
|
||||
ittt ge
|
||||
vmulge.f32 s30, s21, s30
|
||||
vmulge.f32 s31, s20, s31
|
||||
vmulge.f32 s8, s3, s8
|
||||
it gt
|
||||
vldmiagt r1!, {s12-s15}
|
||||
itttt ge
|
||||
vmulge.f32 s9, s2, s9
|
||||
vmulge.f32 s10, s1, s10
|
||||
vstmiage r0!, {s24-s27}
|
||||
vmulge.f32 s11, s0, s11
|
||||
it ge
|
||||
vstmiage r0!, {s28-s31}
|
||||
bgt 1b
|
||||
|
||||
vpop {d8-d15}
|
||||
bx lr
|
||||
endfunc
|
||||
|
||||
/**
|
||||
* ARM VFP implementation of 'butterflies_float_c' function
|
||||
* Assume that len is a positive non-zero number
|
||||
*/
|
||||
@ void ff_butterflies_float_vfp(float *restrict v1, float *restrict v2, int len)
|
||||
function ff_butterflies_float_vfp, export=1
|
||||
BASE1 .req a1
|
||||
BASE2 .req a2
|
||||
LEN .req a3
|
||||
OLDFPSCR .req a4
|
||||
|
||||
vpush {s16-s31}
|
||||
fmrx OLDFPSCR, FPSCR
|
||||
|
||||
tst LEN, #7
|
||||
beq 4f @ common case: len is a multiple of 8
|
||||
|
||||
ldr ip, =0x03000000 @ RunFast mode, scalar mode
|
||||
fmxr FPSCR, ip
|
||||
|
||||
tst LEN, #1
|
||||
beq 1f
|
||||
vldmia BASE1!, {s0}
|
||||
vldmia BASE2!, {s8}
|
||||
vadd.f s16, s0, s8
|
||||
vsub.f s24, s0, s8
|
||||
vstr s16, [BASE1, #0-4*1]
|
||||
vstr s24, [BASE2, #0-4*1]
|
||||
1:
|
||||
tst LEN, #2
|
||||
beq 2f
|
||||
vldmia BASE1!, {s0-s1}
|
||||
vldmia BASE2!, {s8-s9}
|
||||
vadd.f s16, s0, s8
|
||||
vadd.f s17, s1, s9
|
||||
vsub.f s24, s0, s8
|
||||
vsub.f s25, s1, s9
|
||||
vstr d8, [BASE1, #0-8*1] @ s16,s17
|
||||
vstr d12, [BASE2, #0-8*1] @ s24,s25
|
||||
2:
|
||||
tst LEN, #4
|
||||
beq 3f
|
||||
vldmia BASE1!, {s0-s1}
|
||||
vldmia BASE2!, {s8-s9}
|
||||
vldmia BASE1!, {s2-s3}
|
||||
vldmia BASE2!, {s10-s11}
|
||||
vadd.f s16, s0, s8
|
||||
vadd.f s17, s1, s9
|
||||
vsub.f s24, s0, s8
|
||||
vsub.f s25, s1, s9
|
||||
vadd.f s18, s2, s10
|
||||
vadd.f s19, s3, s11
|
||||
vsub.f s26, s2, s10
|
||||
vsub.f s27, s3, s11
|
||||
vstr d8, [BASE1, #0-16*1] @ s16,s17
|
||||
vstr d12, [BASE2, #0-16*1] @ s24,s25
|
||||
vstr d9, [BASE1, #8-16*1] @ s18,s19
|
||||
vstr d13, [BASE2, #8-16*1] @ s26,s27
|
||||
3:
|
||||
bics LEN, LEN, #7
|
||||
beq 7f
|
||||
4:
|
||||
ldr ip, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
|
||||
fmxr FPSCR, ip
|
||||
|
||||
vldmia BASE1!, {s0-s1}
|
||||
vldmia BASE2!, {s8-s9}
|
||||
vldmia BASE1!, {s2-s3}
|
||||
vldmia BASE2!, {s10-s11}
|
||||
vadd.f s16, s0, s8
|
||||
vldmia BASE1!, {s4-s5}
|
||||
vldmia BASE2!, {s12-s13}
|
||||
vldmia BASE1!, {s6-s7}
|
||||
vldmia BASE2!, {s14-s15}
|
||||
vsub.f s24, s0, s8
|
||||
vadd.f s20, s4, s12
|
||||
subs LEN, LEN, #8
|
||||
beq 6f
|
||||
5: vldmia BASE1!, {s0-s3}
|
||||
vldmia BASE2!, {s8-s11}
|
||||
vsub.f s28, s4, s12
|
||||
vstr d8, [BASE1, #0-16*3] @ s16,s17
|
||||
vstr d9, [BASE1, #8-16*3] @ s18,s19
|
||||
vstr d12, [BASE2, #0-16*3] @ s24,s25
|
||||
vstr d13, [BASE2, #8-16*3] @ s26,s27
|
||||
vadd.f s16, s0, s8
|
||||
vldmia BASE1!, {s4-s7}
|
||||
vldmia BASE2!, {s12-s15}
|
||||
vsub.f s24, s0, s8
|
||||
vstr d10, [BASE1, #0-16*3] @ s20,s21
|
||||
vstr d11, [BASE1, #8-16*3] @ s22,s23
|
||||
vstr d14, [BASE2, #0-16*3] @ s28,s29
|
||||
vstr d15, [BASE2, #8-16*3] @ s30,s31
|
||||
vadd.f s20, s4, s12
|
||||
subs LEN, LEN, #8
|
||||
bne 5b
|
||||
6: vsub.f s28, s4, s12
|
||||
vstr d8, [BASE1, #0-16*2] @ s16,s17
|
||||
vstr d9, [BASE1, #8-16*2] @ s18,s19
|
||||
vstr d12, [BASE2, #0-16*2] @ s24,s25
|
||||
vstr d13, [BASE2, #8-16*2] @ s26,s27
|
||||
vstr d10, [BASE1, #0-16*1] @ s20,s21
|
||||
vstr d11, [BASE1, #8-16*1] @ s22,s23
|
||||
vstr d14, [BASE2, #0-16*1] @ s28,s29
|
||||
vstr d15, [BASE2, #8-16*1] @ s30,s31
|
||||
7:
|
||||
fmxr FPSCR, OLDFPSCR
|
||||
vpop {s16-s31}
|
||||
bx lr
|
||||
|
||||
.unreq BASE1
|
||||
.unreq BASE2
|
||||
.unreq LEN
|
||||
.unreq OLDFPSCR
|
||||
endfunc
|
|
@ -0,0 +1,118 @@
|
|||
/*
|
||||
* Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_ARM_INTMATH_H
|
||||
#define AVUTIL_ARM_INTMATH_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "libavutil/attributes.h"
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
|
||||
#if HAVE_ARMV6_INLINE
|
||||
|
||||
#define av_clip_uint8 av_clip_uint8_arm
|
||||
static av_always_inline av_const int av_clip_uint8_arm(int a)
|
||||
{
|
||||
int x;
|
||||
__asm__ ("usat %0, #8, %1" : "=r"(x) : "r"(a));
|
||||
return x;
|
||||
}
|
||||
|
||||
#define av_clip_int8 av_clip_int8_arm
|
||||
static av_always_inline av_const int av_clip_int8_arm(int a)
|
||||
{
|
||||
int x;
|
||||
__asm__ ("ssat %0, #8, %1" : "=r"(x) : "r"(a));
|
||||
return x;
|
||||
}
|
||||
|
||||
#define av_clip_uint16 av_clip_uint16_arm
|
||||
static av_always_inline av_const int av_clip_uint16_arm(int a)
|
||||
{
|
||||
int x;
|
||||
__asm__ ("usat %0, #16, %1" : "=r"(x) : "r"(a));
|
||||
return x;
|
||||
}
|
||||
|
||||
#define av_clip_int16 av_clip_int16_arm
|
||||
static av_always_inline av_const int av_clip_int16_arm(int a)
|
||||
{
|
||||
int x;
|
||||
__asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a));
|
||||
return x;
|
||||
}
|
||||
|
||||
#define av_clip_intp2 av_clip_intp2_arm
|
||||
static av_always_inline av_const int av_clip_intp2_arm(int a, int p)
|
||||
{
|
||||
unsigned x;
|
||||
__asm__ ("ssat %0, %2, %1" : "=r"(x) : "r"(a), "i"(p+1));
|
||||
return x;
|
||||
}
|
||||
|
||||
#define av_clip_uintp2 av_clip_uintp2_arm
|
||||
static av_always_inline av_const unsigned av_clip_uintp2_arm(int a, int p)
|
||||
{
|
||||
unsigned x;
|
||||
__asm__ ("usat %0, %2, %1" : "=r"(x) : "r"(a), "i"(p));
|
||||
return x;
|
||||
}
|
||||
|
||||
#define av_sat_add32 av_sat_add32_arm
|
||||
static av_always_inline int av_sat_add32_arm(int a, int b)
|
||||
{
|
||||
int r;
|
||||
__asm__ ("qadd %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
|
||||
return r;
|
||||
}
|
||||
|
||||
#define av_sat_dadd32 av_sat_dadd32_arm
|
||||
static av_always_inline int av_sat_dadd32_arm(int a, int b)
|
||||
{
|
||||
int r;
|
||||
__asm__ ("qdadd %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
|
||||
return r;
|
||||
}
|
||||
|
||||
#endif /* HAVE_ARMV6_INLINE */
|
||||
|
||||
#if HAVE_ASM_MOD_Q
|
||||
|
||||
#define av_clipl_int32 av_clipl_int32_arm
|
||||
static av_always_inline av_const int32_t av_clipl_int32_arm(int64_t a)
|
||||
{
|
||||
int x, y;
|
||||
__asm__ ("adds %1, %R2, %Q2, lsr #31 \n\t"
|
||||
"itet ne \n\t"
|
||||
"mvnne %1, #1<<31 \n\t"
|
||||
"moveq %0, %Q2 \n\t"
|
||||
"eorne %0, %1, %R2, asr #31 \n\t"
|
||||
: "=r"(x), "=&r"(y) : "r"(a) : "cc");
|
||||
return x;
|
||||
}
|
||||
|
||||
#endif /* HAVE_ASM_MOD_Q */
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
|
||||
#endif /* AVUTIL_ARM_INTMATH_H */
|
|
@ -0,0 +1,91 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_ARM_INTREADWRITE_H
|
||||
#define AVUTIL_ARM_INTREADWRITE_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
#include "libavutil/attributes.h"
|
||||
|
||||
#if HAVE_FAST_UNALIGNED && HAVE_INLINE_ASM && AV_GCC_VERSION_AT_MOST(4,6)
|
||||
|
||||
#define AV_RN16 AV_RN16
|
||||
static av_always_inline unsigned AV_RN16(const void *p)
|
||||
{
|
||||
const uint8_t *q = p;
|
||||
unsigned v;
|
||||
#if AV_GCC_VERSION_AT_MOST(4,5)
|
||||
__asm__ ("ldrh %0, %1" : "=r"(v) : "m"(*(const uint16_t *)q));
|
||||
#elif defined __thumb__
|
||||
__asm__ ("ldrh %0, %1" : "=r"(v) : "m"(q[0]), "m"(q[1]));
|
||||
#else
|
||||
__asm__ ("ldrh %0, %1" : "=r"(v) : "Uq"(q[0]), "m"(q[1]));
|
||||
#endif
|
||||
return v;
|
||||
}
|
||||
|
||||
#define AV_WN16 AV_WN16
|
||||
static av_always_inline void AV_WN16(void *p, uint16_t v)
|
||||
{
|
||||
__asm__ ("strh %1, %0" : "=m"(*(uint16_t *)p) : "r"(v));
|
||||
}
|
||||
|
||||
#define AV_RN32 AV_RN32
|
||||
static av_always_inline uint32_t AV_RN32(const void *p)
|
||||
{
|
||||
const struct __attribute__((packed)) { uint32_t v; } *q = p;
|
||||
uint32_t v;
|
||||
__asm__ ("ldr %0, %1" : "=r"(v) : "m"(*q));
|
||||
return v;
|
||||
}
|
||||
|
||||
#define AV_WN32 AV_WN32
|
||||
static av_always_inline void AV_WN32(void *p, uint32_t v)
|
||||
{
|
||||
__asm__ ("str %1, %0" : "=m"(*(uint32_t *)p) : "r"(v));
|
||||
}
|
||||
|
||||
#if HAVE_ASM_MOD_Q
|
||||
|
||||
#define AV_RN64 AV_RN64
|
||||
static av_always_inline uint64_t AV_RN64(const void *p)
|
||||
{
|
||||
const struct __attribute__((packed)) { uint32_t v; } *q = p;
|
||||
uint64_t v;
|
||||
__asm__ ("ldr %Q0, %1 \n\t"
|
||||
"ldr %R0, %2 \n\t"
|
||||
: "=&r"(v)
|
||||
: "m"(q[0]), "m"(q[1]));
|
||||
return v;
|
||||
}
|
||||
|
||||
#define AV_WN64 AV_WN64
|
||||
static av_always_inline void AV_WN64(void *p, uint64_t v)
|
||||
{
|
||||
__asm__ ("str %Q2, %0 \n\t"
|
||||
"str %R2, %1 \n\t"
|
||||
: "=m"(*(uint32_t*)p), "=m"(*((uint32_t*)p+1))
|
||||
: "r"(v));
|
||||
}
|
||||
|
||||
#endif /* HAVE_ASM_MOD_Q */
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
|
||||
#endif /* AVUTIL_ARM_INTREADWRITE_H */
|
|
@ -0,0 +1,18 @@
|
|||
# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
|
||||
# vim: set filetype=python:
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
SOURCES += [
|
||||
'cpu.c',
|
||||
'float_dsp_init_arm.c',
|
||||
'float_dsp_init_neon.c',
|
||||
'float_dsp_init_vfp.c',
|
||||
'float_dsp_neon.S',
|
||||
'float_dsp_vfp.S',
|
||||
]
|
||||
|
||||
FINAL_LIBRARY = 'mozavutil'
|
||||
|
||||
include('/media/ffvpx/ffvpxcommon.mozbuild')
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_ARM_TIMER_H
|
||||
#define AVUTIL_ARM_TIMER_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
|
||||
#if HAVE_INLINE_ASM && defined(__ARM_ARCH_7A__)
|
||||
|
||||
#define AV_READ_TIME read_time
|
||||
|
||||
static inline uint64_t read_time(void)
|
||||
{
|
||||
unsigned cc;
|
||||
__asm__ volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r"(cc));
|
||||
return cc;
|
||||
}
|
||||
|
||||
#endif /* HAVE_INLINE_ASM && __ARM_ARCH_7A__ */
|
||||
|
||||
#endif /* AVUTIL_ARM_TIMER_H */
|
|
@ -8,19 +8,24 @@
|
|||
|
||||
// cpu_internal.c
|
||||
int ff_get_cpu_flags_aarch64(void) { return 0; }
|
||||
#if !defined(__arm__)
|
||||
int ff_get_cpu_flags_arm(void) { return 0; }
|
||||
#endif
|
||||
int ff_get_cpu_flags_ppc(void) { return 0; }
|
||||
|
||||
// float_dsp.c
|
||||
#include "float_dsp.h"
|
||||
void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp) {}
|
||||
void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp) {}
|
||||
void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict) {}
|
||||
void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp) {}
|
||||
|
||||
#if !defined(__arm__)
|
||||
void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp) {}
|
||||
#endif
|
||||
int av_hwframe_get_buffer(struct AVBufferRef* hwframe_ref, struct AVFrame* frame, int flags) { return 0; }
|
||||
|
||||
// cpu.c
|
||||
size_t ff_get_cpu_max_align_aarch64() { return 0; }
|
||||
size_t ff_get_cpu_max_align_arm() { return 0; }
|
||||
size_t ff_get_cpu_max_align_ppc() { return 0; }
|
||||
#if !defined(__arm__)
|
||||
size_t ff_get_cpu_max_align_arm() { return 0; }
|
||||
#endif
|
||||
|
|
|
@ -7,7 +7,10 @@
|
|||
# Due to duplicate file names, we compile libavutil/x86 in its own
|
||||
# moz.build file.
|
||||
if CONFIG['FFVPX_ASFLAGS']:
|
||||
DIRS += ['x86']
|
||||
if CONFIG['CPU_ARCH'] == 'x86' or CONFIG['CPU_ARCH'] == 'x86_64':
|
||||
DIRS += ['x86']
|
||||
elif CONFIG['CPU_ARCH'] == 'arm':
|
||||
DIRS += ['arm']
|
||||
|
||||
SharedLibrary('mozavutil')
|
||||
SOURCES += [
|
||||
|
|
|
@ -3216,22 +3216,18 @@ dnl ========================================================
|
|||
MOZ_FFVPX=
|
||||
MOZ_FFVPX_FLACONLY=
|
||||
case "$CPU_ARCH" in
|
||||
x86)
|
||||
MOZ_FFVPX=1
|
||||
;;
|
||||
x86_64)
|
||||
x86|x86_64)
|
||||
MOZ_FFVPX=1
|
||||
dnl Use same conditional as MOZ_LIBAV_FFT to enable FFmpeg's ffvpx assembly decoder.
|
||||
FFVPX_ASFLAGS=$LIBAV_FFT_ASFLAGS
|
||||
;;
|
||||
arm*)
|
||||
MOZ_FFVPX=1
|
||||
MOZ_FFVPX_FLACONLY=1
|
||||
FFVPX_ASFLAGS=$VPX_ASFLAGS
|
||||
;;
|
||||
esac
|
||||
|
||||
dnl Use same conditional as MOZ_LIBAV_FFT to enable FFmpeg's ffvpx assembly decoder.
|
||||
if test -n "$MOZ_LIBAV_FFT"; then
|
||||
FFVPX_ASFLAGS=$LIBAV_FFT_ASFLAGS
|
||||
fi
|
||||
if test -n "$MOZ_FFVPX"; then
|
||||
AC_DEFINE(MOZ_FFVPX)
|
||||
fi
|
||||
|
|
Загрузка…
Ссылка в новой задаче