Backed out changeset abc86341cd75 (bug 926838) for m3 test failures on a CLOSED TREE

This commit is contained in:
Carsten "Tomcat" Book 2015-05-05 15:52:11 +02:00
Родитель 7d3e843311
Коммит cd411ff6db
60 изменённых файлов: 0 добавлений и 21176 удалений

Просмотреть файл

@ -1,39 +0,0 @@
Use of this source code is governed by a BSD-style license that can be
found in the LICENSE file in the root of the source tree. All
contributing project authors may be found in the AUTHORS file in the
root of the source tree.
The files were originally licensed by ARM Limited.
The following files:
* dl/api/omxtypes.h
* dl/sp/api/omxSP.h
are licensed by Khronos:
Copyright (c) 2005-2008,2015 The Khronos Group Inc.
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and/or associated documentation files (the
"Materials"), to deal in the Materials without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Materials, and to
permit persons to whom the Materials are furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Materials.
MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
https://www.khronos.org/registry/
THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.

Просмотреть файл

@ -1,3 +0,0 @@
ajm@google.com
kma@google.com
rtoy@google.com

Просмотреть файл

@ -1,19 +0,0 @@
Name: OpenMAX DL
Short Name: OpenMax DL
URL: https://silver.arm.com/download/Software/Graphics/OX000-BU-00010-r1p0-00bet0/OX000-BU-00010-r1p0-00bet0.tgz
Version: 1.0.2
License: BSD
License File: LICENSE
Security Critical: yes
Description:
Implementation of OpenMAX DL spec from ARM. This is used to support
WebAudio for Chromium on Android.
Local Modifications:
Only the FFT routines from the OpenMAX DL package are included. The
code was modified to work with gcc and a new implementation for a
floating-point FFT was added.
The original ARM license is unclear, but Google has obtained
permission to relicense this code under a BSD license.

Просмотреть файл

@ -1,409 +0,0 @@
@// -*- Mode: asm; -*-
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: armCOMM_s.h
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 13871
@// Last Modified Date: Fri, 09 May 2008
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// ARM optimized OpenMAX common header file
@//
.set _SBytes, 0 @ Number of scratch bytes on stack
.set _Workspace, 0 @ Stack offset of scratch workspace
.set _RRegList, 0 @ R saved register list (last register number)
.set _DRegList, 0 @ D saved register list (last register number)
@// Work out a list of R saved registers, and how much stack space is needed.
@// gas doesn't support setting a variable to a string, so we set _RRegList to
@// the register number.
.macro _M_GETRREGLIST rreg
.ifeqs "\rreg", ""
@ Nothing needs to be saved
.exitm
.endif
@ If rreg is lr or r4, save lr and r4
.ifeqs "\rreg", "lr"
.set _RRegList, 4
.exitm
.endif
.ifeqs "\rreg", "r4"
.set _RRegList, 4
.exitm
.endif
@ If rreg = r5 or r6, save up to register r6
.ifeqs "\rreg", "r5"
.set _RRegList, 6
.exitm
.endif
.ifeqs "\rreg", "r6"
.set _RRegList, 6
.exitm
.endif
@ If rreg = r7 or r8, save up to register r8
.ifeqs "\rreg", "r7"
.set _RRegList, 8
.exitm
.endif
.ifeqs "\rreg", "r8"
.set _RRegList, 8
.exitm
.endif
@ If rreg = r9 or r10, save up to register r10
.ifeqs "\rreg", "r9"
.set _RRegList, 10
.exitm
.endif
.ifeqs "\rreg", "r10"
.set _RRegList, 10
.exitm
.endif
@ If rreg = r11 or r12, save up to register r12
.ifeqs "\rreg", "r11"
.set _RRegList, 12
.exitm
.endif
.ifeqs "\rreg", "r12"
.set _RRegList, 12
.exitm
.endif
.warning "Unrecognized saved r register limit: \rreg"
.endm
@ Work out list of D saved registers, like for R registers.
.macro _M_GETDREGLIST dreg
.ifeqs "\dreg", ""
.set _DRegList, 0
.exitm
.endif
.ifeqs "\dreg", "d8"
.set _DRegList, 8
.exitm
.endif
.ifeqs "\dreg", "d9"
.set _DRegList, 9
.exitm
.endif
.ifeqs "\dreg", "d10"
.set _DRegList, 10
.exitm
.endif
.ifeqs "\dreg", "d11"
.set _DRegList, 11
.exitm
.endif
.ifeqs "\dreg", "d12"
.set _DRegList, 12
.exitm
.endif
.ifeqs "\dreg", "d13"
.set _DRegList, 13
.exitm
.endif
.ifeqs "\dreg", "d14"
.set _DRegList, 14
.exitm
.endif
.ifeqs "\dreg", "d15"
.set _DRegList, 15
.exitm
.endif
.warning "Unrecognized saved d register limit: \rreg"
.endm
@//////////////////////////////////////////////////////////
@// Function header and footer macros
@//////////////////////////////////////////////////////////
@ Function Header Macro
@ Generates the function prologue
@ Note that functions should all be "stack-moves-once"
@ The FNSTART and FNEND macros should be the only places
@ where the stack moves.
@
@ name = function name
@ rreg = "" don't stack any registers
@ "lr" stack "lr" only
@ "rN" stack registers "r4-rN,lr"
@ dreg = "" don't stack any D registers
@ "dN" stack registers "d8-dN"
@
@ Note: ARM Archicture procedure call standard AAPCS
@ states that r4-r11, sp, d8-d15 must be preserved by
@ a compliant function.
.macro M_START name, rreg, dreg
.set _Workspace, 0
@ Define the function and make it external.
.global \name
.func \name
.section .text.\name,"ax",%progbits
.align 2
\name :
.fnstart
@ Save specified R registers
_M_GETRREGLIST \rreg
_M_PUSH_RREG
@ Save specified D registers
_M_GETDREGLIST \dreg
_M_PUSH_DREG
@ Ensure size claimed on stack is 8-byte aligned
.if (_SBytes & 7) != 0
.set _SBytes, _SBytes + (8 - (_SBytes & 7))
.endif
.if _SBytes != 0
sub sp, sp, #_SBytes
.endif
.endm
@ Function Footer Macro
@ Generates the function epilogue
.macro M_END
@ Restore the stack pointer to its original value on function entry
.if _SBytes != 0
add sp, sp, #_SBytes
.endif
@ Restore any saved R or D registers.
_M_RET
.fnend
.endfunc
@ Reset the global stack tracking variables back to their
@ initial values.
.set _SBytes, 0
.endm
@// Based on the value of _DRegList, push the specified set of registers
@// to the stack. Is there a better way?
.macro _M_PUSH_DREG
.if _DRegList == 8
vpush {d8}
.exitm
.endif
.if _DRegList == 9
vpush {d8-d9}
.exitm
.endif
.if _DRegList == 10
vpush {d8-d10}
.exitm
.endif
.if _DRegList == 11
vpush {d8-d11}
.exitm
.endif
.if _DRegList == 12
vpush {d8-d12}
.exitm
.endif
.if _DRegList == 13
vpush {d8-d13}
.exitm
.endif
.if _DRegList == 14
vpush {d8-d14}
.exitm
.endif
.if _DRegList == 15
vpush {d8-d15}
.exitm
.endif
.endm
@// Based on the value of _RRegList, push the specified set of registers
@// to the stack. Is there a better way?
.macro _M_PUSH_RREG
.if _RRegList == 4
stmfd sp!, {r4, lr}
.exitm
.endif
.if _RRegList == 6
stmfd sp!, {r4-r6, lr}
.exitm
.endif
.if _RRegList == 8
stmfd sp!, {r4-r8, lr}
.exitm
.endif
.if _RRegList == 10
stmfd sp!, {r4-r10, lr}
.exitm
.endif
.if _RRegList == 12
stmfd sp!, {r4-r12, lr}
.exitm
.endif
.endm
@// The opposite of _M_PUSH_DREG
.macro _M_POP_DREG
.if _DRegList == 8
vpop {d8}
.exitm
.endif
.if _DRegList == 9
vpop {d8-d9}
.exitm
.endif
.if _DRegList == 10
vpop {d8-d10}
.exitm
.endif
.if _DRegList == 11
vpop {d8-d11}
.exitm
.endif
.if _DRegList == 12
vpop {d8-d12}
.exitm
.endif
.if _DRegList == 13
vpop {d8-d13}
.exitm
.endif
.if _DRegList == 14
vpop {d8-d14}
.exitm
.endif
.if _DRegList == 15
vpop {d8-d15}
.exitm
.endif
.endm
@// The opposite of _M_PUSH_RREG
.macro _M_POP_RREG cc
.if _RRegList == 0
bx\cc lr
.exitm
.endif
.if _RRegList == 4
ldm\cc\()fd sp!, {r4, pc}
.exitm
.endif
.if _RRegList == 6
ldm\cc\()fd sp!, {r4-r6, pc}
.exitm
.endif
.if _RRegList == 8
ldm\cc\()fd sp!, {r4-r8, pc}
.exitm
.endif
.if _RRegList == 10
ldm\cc\()fd sp!, {r4-r10, pc}
.exitm
.endif
.if _RRegList == 12
ldm\cc\()fd sp!, {r4-r12, pc}
.exitm
.endif
.endm
@ Produce function return instructions
.macro _M_RET cc
_M_POP_DREG \cc
_M_POP_RREG \cc
.endm
@// Allocate 4-byte aligned area of name
@// |name| and size |size| bytes.
.macro M_ALLOC4 name, size
.if (_SBytes & 3) != 0
.set _SBytes, _SBytes + (4 - (_SBytes & 3))
.endif
.set \name\()_F, _SBytes
.set _SBytes, _SBytes + \size
.endm
@ Load word from stack
.macro M_LDR r, a0, a1, a2, a3
_M_DATA "ldr", 4, \r, \a0, \a1, \a2, \a3
.endm
@ Store word to stack
.macro M_STR r, a0, a1, a2, a3
_M_DATA "str", 4, \r, \a0, \a1, \a2, \a3
.endm
@ Macro to perform a data access operation
@ Such as LDR or STR
@ The addressing mode is modified such that
@ 1. If no address is given then the name is taken
@ as a stack offset
@ 2. If the addressing mode is not available for the
@ state being assembled for (eg Thumb) then a suitable
@ addressing mode is substituted.
@
@ On Entry:
@ $i = Instruction to perform (eg "LDRB")
@ $a = Required byte alignment
@ $r = Register(s) to transfer (eg "r1")
@ $a0,$a1,$a2. Addressing mode and condition. One of:
@ label {,cc}
@ [base] {,,,cc}
@ [base, offset]{!} {,,cc}
@ [base, offset, shift]{!} {,cc}
@ [base], offset {,,cc}
@ [base], offset, shift {,cc}
@
@ WARNING: Most of the above are not supported, except the first case.
.macro _M_DATA i, a, r, a0, a1, a2, a3
.set _Offset, _Workspace + \a0\()_F
\i\a1 \r, [sp, #_Offset]
.endm

Просмотреть файл

@ -1,289 +0,0 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*
* This file was originally licensed as follows. It has been
* relicensed with permission from the copyright holders.
*/
/*
*
* File Name: armOMX_ReleaseVersion.h
* OpenMAX DL: v1.0.2
* Last Modified Revision: 15322
* Last Modified Date: Wed, 15 Oct 2008
*
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
*
*
*
* This file allows a version of the OMX DL libraries to be built where some or
* all of the function names can be given a user specified suffix.
*
* You might want to use it where:
*
* - you want to rename a function "out of the way" so that you could replace
* a function with a different version (the original version would still be
* in the library just with a different name - so you could debug the new
* version by comparing it to the output of the old)
*
* - you want to rename all the functions to versions with a suffix so that
* you can include two versions of the library and choose between functions
* at runtime.
*
* e.g. omxIPBM_Copy_U8_C1R could be renamed omxIPBM_Copy_U8_C1R_CortexA8
*
*/
#ifndef _armOMX_H_
#define _armOMX_H_
#define ARMOMX_ENABLE_RENAMING 0
#if ARMOMX_ENABLE_RENAMING
/* We need to define these two macros in order to expand and concatenate the names */
#define OMXCAT2BAR(A, B) omx ## A ## B
#define OMXCATBAR(A, B) OMXCAT2BAR(A, B)
/* Define the suffix to add to all functions - the default is no suffix */
#define BARE_SUFFIX
/* Define what happens to the bare suffix-less functions, down to the sub-domain accuracy */
#define OMXACAAC_SUFFIX BARE_SUFFIX
#define OMXACMP3_SUFFIX BARE_SUFFIX
#define OMXICJP_SUFFIX BARE_SUFFIX
#define OMXIPBM_SUFFIX BARE_SUFFIX
#define OMXIPCS_SUFFIX BARE_SUFFIX
#define OMXIPPP_SUFFIX BARE_SUFFIX
#define OMXSP_SUFFIX BARE_SUFFIX
#define OMXVCCOMM_SUFFIX BARE_SUFFIX
#define OMXVCM4P10_SUFFIX BARE_SUFFIX
#define OMXVCM4P2_SUFFIX BARE_SUFFIX
/* Define what the each bare, un-suffixed OpenMAX API function names is to be renamed */
#define omxACAAC_DecodeChanPairElt OMXCATBAR(ACAAC_DecodeChanPairElt, OMXACAAC_SUFFIX)
#define omxACAAC_DecodeDatStrElt OMXCATBAR(ACAAC_DecodeDatStrElt, OMXACAAC_SUFFIX)
#define omxACAAC_DecodeFillElt OMXCATBAR(ACAAC_DecodeFillElt, OMXACAAC_SUFFIX)
#define omxACAAC_DecodeIsStereo_S32 OMXCATBAR(ACAAC_DecodeIsStereo_S32, OMXACAAC_SUFFIX)
#define omxACAAC_DecodeMsPNS_S32_I OMXCATBAR(ACAAC_DecodeMsPNS_S32_I, OMXACAAC_SUFFIX)
#define omxACAAC_DecodeMsStereo_S32_I OMXCATBAR(ACAAC_DecodeMsStereo_S32_I, OMXACAAC_SUFFIX)
#define omxACAAC_DecodePrgCfgElt OMXCATBAR(ACAAC_DecodePrgCfgElt, OMXACAAC_SUFFIX)
#define omxACAAC_DecodeTNS_S32_I OMXCATBAR(ACAAC_DecodeTNS_S32_I, OMXACAAC_SUFFIX)
#define omxACAAC_DeinterleaveSpectrum_S32 OMXCATBAR(ACAAC_DeinterleaveSpectrum_S32, OMXACAAC_SUFFIX)
#define omxACAAC_EncodeTNS_S32_I OMXCATBAR(ACAAC_EncodeTNS_S32_I, OMXACAAC_SUFFIX)
#define omxACAAC_LongTermPredict_S32 OMXCATBAR(ACAAC_LongTermPredict_S32, OMXACAAC_SUFFIX)
#define omxACAAC_LongTermReconstruct_S32_I OMXCATBAR(ACAAC_LongTermReconstruct_S32_I, OMXACAAC_SUFFIX)
#define omxACAAC_MDCTFwd_S32 OMXCATBAR(ACAAC_MDCTFwd_S32, OMXACAAC_SUFFIX)
#define omxACAAC_MDCTInv_S32_S16 OMXCATBAR(ACAAC_MDCTInv_S32_S16, OMXACAAC_SUFFIX)
#define omxACAAC_NoiselessDecode OMXCATBAR(ACAAC_NoiselessDecode, OMXACAAC_SUFFIX)
#define omxACAAC_QuantInv_S32_I OMXCATBAR(ACAAC_QuantInv_S32_I, OMXACAAC_SUFFIX)
#define omxACAAC_UnpackADIFHeader OMXCATBAR(ACAAC_UnpackADIFHeader, OMXACAAC_SUFFIX)
#define omxACAAC_UnpackADTSFrameHeader OMXCATBAR(ACAAC_UnpackADTSFrameHeader, OMXACAAC_SUFFIX)
#define omxACMP3_HuffmanDecode_S32 OMXCATBAR(ACMP3_HuffmanDecode_S32, OMXACMP3_SUFFIX)
#define omxACMP3_HuffmanDecodeSfb_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfb_S32, OMXACMP3_SUFFIX)
#define omxACMP3_HuffmanDecodeSfbMbp_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfbMbp_S32, OMXACMP3_SUFFIX)
#define omxACMP3_MDCTInv_S32 OMXCATBAR(ACMP3_MDCTInv_S32, OMXACMP3_SUFFIX)
#define omxACMP3_ReQuantize_S32_I OMXCATBAR(ACMP3_ReQuantize_S32_I, OMXACMP3_SUFFIX)
#define omxACMP3_ReQuantizeSfb_S32_I OMXCATBAR(ACMP3_ReQuantizeSfb_S32_I, OMXACMP3_SUFFIX)
#define omxACMP3_SynthPQMF_S32_S16 OMXCATBAR(ACMP3_SynthPQMF_S32_S16, OMXACMP3_SUFFIX)
#define omxACMP3_UnpackFrameHeader OMXCATBAR(ACMP3_UnpackFrameHeader, OMXACMP3_SUFFIX)
#define omxACMP3_UnpackScaleFactors_S8 OMXCATBAR(ACMP3_UnpackScaleFactors_S8, OMXACMP3_SUFFIX)
#define omxACMP3_UnpackSideInfo OMXCATBAR(ACMP3_UnpackSideInfo, OMXACMP3_SUFFIX)
#define omxICJP_CopyExpand_U8_C3 OMXCATBAR(ICJP_CopyExpand_U8_C3, OMXICJP_SUFFIX)
#define omxICJP_DCTFwd_S16 OMXCATBAR(ICJP_DCTFwd_S16, OMXICJP_SUFFIX)
#define omxICJP_DCTFwd_S16_I OMXCATBAR(ICJP_DCTFwd_S16_I, OMXICJP_SUFFIX)
#define omxICJP_DCTInv_S16 OMXCATBAR(ICJP_DCTInv_S16, OMXICJP_SUFFIX)
#define omxICJP_DCTInv_S16_I OMXCATBAR(ICJP_DCTInv_S16_I, OMXICJP_SUFFIX)
#define omxICJP_DCTQuantFwd_Multiple_S16 OMXCATBAR(ICJP_DCTQuantFwd_Multiple_S16, OMXICJP_SUFFIX)
#define omxICJP_DCTQuantFwd_S16 OMXCATBAR(ICJP_DCTQuantFwd_S16, OMXICJP_SUFFIX)
#define omxICJP_DCTQuantFwd_S16_I OMXCATBAR(ICJP_DCTQuantFwd_S16_I, OMXICJP_SUFFIX)
#define omxICJP_DCTQuantFwdTableInit OMXCATBAR(ICJP_DCTQuantFwdTableInit, OMXICJP_SUFFIX)
#define omxICJP_DCTQuantInv_Multiple_S16 OMXCATBAR(ICJP_DCTQuantInv_Multiple_S16, OMXICJP_SUFFIX)
#define omxICJP_DCTQuantInv_S16 OMXCATBAR(ICJP_DCTQuantInv_S16, OMXICJP_SUFFIX)
#define omxICJP_DCTQuantInv_S16_I OMXCATBAR(ICJP_DCTQuantInv_S16_I, OMXICJP_SUFFIX)
#define omxICJP_DCTQuantInvTableInit OMXCATBAR(ICJP_DCTQuantInvTableInit, OMXICJP_SUFFIX)
#define omxICJP_DecodeHuffman8x8_Direct_S16_C1 OMXCATBAR(ICJP_DecodeHuffman8x8_Direct_S16_C1, OMXICJP_SUFFIX)
#define omxICJP_DecodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX)
#define omxICJP_DecodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecInit_U8, OMXICJP_SUFFIX)
#define omxICJP_EncodeHuffman8x8_Direct_S16_U1_C1 OMXCATBAR(ICJP_EncodeHuffman8x8_Direct_S16_U1_C1, OMXICJP_SUFFIX)
#define omxICJP_EncodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX)
#define omxICJP_EncodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecInit_U8, OMXICJP_SUFFIX)
#define omxIPBM_AddC_U8_C1R_Sfs OMXCATBAR(IPBM_AddC_U8_C1R_Sfs, OMXIPBM_SUFFIX)
#define omxIPBM_Copy_U8_C1R OMXCATBAR(IPBM_Copy_U8_C1R, OMXIPBM_SUFFIX)
#define omxIPBM_Copy_U8_C3R OMXCATBAR(IPBM_Copy_U8_C3R, OMXIPBM_SUFFIX)
#define omxIPBM_Mirror_U8_C1R OMXCATBAR(IPBM_Mirror_U8_C1R, OMXIPBM_SUFFIX)
#define omxIPBM_MulC_U8_C1R_Sfs OMXCATBAR(IPBM_MulC_U8_C1R_Sfs, OMXIPBM_SUFFIX)
#define omxIPCS_ColorTwistQ14_U8_C3R OMXCATBAR(IPCS_ColorTwistQ14_U8_C3R, OMXIPCS_SUFFIX)
#define omxIPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
#define omxIPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
#define omxIPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
#define omxIPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
#define omxIPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
#define omxIPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
#define omxIPCS_YCbCr420RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr420RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX)
#define omxIPCS_YCbCr420RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr420RszRot_U8_P3R, OMXIPCS_SUFFIX)
#define omxIPCS_YCbCr420ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX)
#define omxIPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
#define omxIPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
#define omxIPCS_YCbCr422RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr422RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX)
#define omxIPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R OMXCATBAR(IPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R, OMXIPCS_SUFFIX)
#define omxIPCS_YCbCr422RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr422RszRot_U8_P3R, OMXIPCS_SUFFIX)
#define omxIPCS_YCbYCr422ToBGR565_U8_U16_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR565_U8_U16_C2C3R, OMXIPCS_SUFFIX)
#define omxIPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
#define omxIPCS_YCbYCr422ToBGR888_U8_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR888_U8_C2C3R, OMXIPCS_SUFFIX)
#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
#define omxIPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R OMXCATBAR(IPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R, OMXIPCS_SUFFIX)
#define omxIPCS_YCbCr422ToYCbCr420Rotate_U8_P3R OMXCATBAR(IPCS_YCbCr422ToYCbCr420Rotate_U8_P3R, OMXIPCS_SUFFIX)
#define omxIPCS_YCbCr444ToBGR565_U8_U16_C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_C3R, OMXIPCS_SUFFIX)
#define omxIPCS_YCbCr444ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX)
#define omxIPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
#define omxIPCS_YCbCr444ToBGR888_U8_C3R OMXCATBAR(IPCS_YCbCr444ToBGR888_U8_C3R, OMXIPCS_SUFFIX)
#define omxIPPP_Deblock_HorEdge_U8_I OMXCATBAR(IPPP_Deblock_HorEdge_U8_I, OMXIPPP_SUFFIX)
#define omxIPPP_Deblock_VerEdge_U8_I OMXCATBAR(IPPP_Deblock_VerEdge_U8_I, OMXIPPP_SUFFIX)
#define omxIPPP_FilterFIR_U8_C1R OMXCATBAR(IPPP_FilterFIR_U8_C1R, OMXIPPP_SUFFIX)
#define omxIPPP_FilterMedian_U8_C1R OMXCATBAR(IPPP_FilterMedian_U8_C1R, OMXIPPP_SUFFIX)
#define omxIPPP_GetCentralMoment_S64 OMXCATBAR(IPPP_GetCentralMoment_S64, OMXIPPP_SUFFIX)
#define omxIPPP_GetSpatialMoment_S64 OMXCATBAR(IPPP_GetSpatialMoment_S64, OMXIPPP_SUFFIX)
#define omxIPPP_MomentGetStateSize OMXCATBAR(IPPP_MomentGetStateSize, OMXIPPP_SUFFIX)
#define omxIPPP_MomentInit OMXCATBAR(IPPP_MomentInit, OMXIPPP_SUFFIX)
#define omxIPPP_Moments_U8_C1R OMXCATBAR(IPPP_Moments_U8_C1R, OMXIPPP_SUFFIX)
#define omxIPPP_Moments_U8_C3R OMXCATBAR(IPPP_Moments_U8_C3R, OMXIPPP_SUFFIX)
#define omxSP_BlockExp_S16 OMXCATBAR(SP_BlockExp_S16, OMXSP_SUFFIX)
#define omxSP_BlockExp_S32 OMXCATBAR(SP_BlockExp_S32, OMXSP_SUFFIX)
#define omxSP_Copy_S16 OMXCATBAR(SP_Copy_S16, OMXSP_SUFFIX)
#define omxSP_DotProd_S16 OMXCATBAR(SP_DotProd_S16, OMXSP_SUFFIX)
#define omxSP_DotProd_S16_Sfs OMXCATBAR(SP_DotProd_S16_Sfs, OMXSP_SUFFIX)
#define omxSP_FFTFwd_CToC_SC16_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC16_Sfs, OMXSP_SUFFIX)
#define omxSP_FFTFwd_CToC_SC32_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC32_Sfs, OMXSP_SUFFIX)
#define omxSP_FFTFwd_RToCCS_S16S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S16S32_Sfs, OMXSP_SUFFIX)
#define omxSP_FFTFwd_RToCCS_S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S32_Sfs, OMXSP_SUFFIX)
#define omxSP_FFTGetBufSize_C_SC16 OMXCATBAR(SP_FFTGetBufSize_C_SC16, OMXSP_SUFFIX)
#define omxSP_FFTGetBufSize_C_SC32 OMXCATBAR(SP_FFTGetBufSize_C_SC32, OMXSP_SUFFIX)
#define omxSP_FFTGetBufSize_R_S16S32 OMXCATBAR(SP_FFTGetBufSize_R_S16S32, OMXSP_SUFFIX)
#define omxSP_FFTGetBufSize_R_S32 OMXCATBAR(SP_FFTGetBufSize_R_S32, OMXSP_SUFFIX)
#define omxSP_FFTInit_C_SC16 OMXCATBAR(SP_FFTInit_C_SC16, OMXSP_SUFFIX)
#define omxSP_FFTInit_C_SC32 OMXCATBAR(SP_FFTInit_C_SC32, OMXSP_SUFFIX)
#define omxSP_FFTInit_R_S16S32 OMXCATBAR(SP_FFTInit_R_S16S32, OMXSP_SUFFIX)
#define omxSP_FFTInit_R_S32 OMXCATBAR(SP_FFTInit_R_S32, OMXSP_SUFFIX)
#define omxSP_FFTInv_CCSToR_S32_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32_Sfs, OMXSP_SUFFIX)
#define omxSP_FFTInv_CCSToR_S32S16_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32S16_Sfs, OMXSP_SUFFIX)
#define omxSP_FFTInv_CToC_SC16_Sfs OMXCATBAR(SP_FFTInv_CToC_SC16_Sfs, OMXSP_SUFFIX)
#define omxSP_FFTInv_CToC_SC32_Sfs OMXCATBAR(SP_FFTInv_CToC_SC32_Sfs, OMXSP_SUFFIX)
#define omxSP_FilterMedian_S32 OMXCATBAR(SP_FilterMedian_S32, OMXSP_SUFFIX)
#define omxSP_FilterMedian_S32_I OMXCATBAR(SP_FilterMedian_S32_I, OMXSP_SUFFIX)
#define omxSP_FIR_Direct_S16 OMXCATBAR(SP_FIR_Direct_S16, OMXSP_SUFFIX)
#define omxSP_FIR_Direct_S16_I OMXCATBAR(SP_FIR_Direct_S16_I, OMXSP_SUFFIX)
#define omxSP_FIR_Direct_S16_ISfs OMXCATBAR(SP_FIR_Direct_S16_ISfs, OMXSP_SUFFIX)
#define omxSP_FIR_Direct_S16_Sfs OMXCATBAR(SP_FIR_Direct_S16_Sfs, OMXSP_SUFFIX)
#define omxSP_FIROne_Direct_S16 OMXCATBAR(SP_FIROne_Direct_S16, OMXSP_SUFFIX)
#define omxSP_FIROne_Direct_S16_I OMXCATBAR(SP_FIROne_Direct_S16_I, OMXSP_SUFFIX)
#define omxSP_FIROne_Direct_S16_ISfs OMXCATBAR(SP_FIROne_Direct_S16_ISfs, OMXSP_SUFFIX)
#define omxSP_FIROne_Direct_S16_Sfs OMXCATBAR(SP_FIROne_Direct_S16_Sfs, OMXSP_SUFFIX)
#define omxSP_IIR_BiQuadDirect_S16 OMXCATBAR(SP_IIR_BiQuadDirect_S16, OMXSP_SUFFIX)
#define omxSP_IIR_BiQuadDirect_S16_I OMXCATBAR(SP_IIR_BiQuadDirect_S16_I, OMXSP_SUFFIX)
#define omxSP_IIR_Direct_S16 OMXCATBAR(SP_IIR_Direct_S16, OMXSP_SUFFIX)
#define omxSP_IIR_Direct_S16_I OMXCATBAR(SP_IIR_Direct_S16_I, OMXSP_SUFFIX)
#define omxSP_IIROne_BiQuadDirect_S16 OMXCATBAR(SP_IIROne_BiQuadDirect_S16, OMXSP_SUFFIX)
#define omxSP_IIROne_BiQuadDirect_S16_I OMXCATBAR(SP_IIROne_BiQuadDirect_S16_I, OMXSP_SUFFIX)
#define omxSP_IIROne_Direct_S16 OMXCATBAR(SP_IIROne_Direct_S16, OMXSP_SUFFIX)
#define omxSP_IIROne_Direct_S16_I OMXCATBAR(SP_IIROne_Direct_S16_I, OMXSP_SUFFIX)
#define omxVCCOMM_Average_16x OMXCATBAR(VCCOMM_Average_16x, OMXVCCOMM_SUFFIX)
#define omxVCCOMM_Average_8x OMXCATBAR(VCCOMM_Average_8x, OMXVCCOMM_SUFFIX)
#define omxVCCOMM_ComputeTextureErrorBlock OMXCATBAR(VCCOMM_ComputeTextureErrorBlock, OMXVCCOMM_SUFFIX)
#define omxVCCOMM_ComputeTextureErrorBlock_SAD OMXCATBAR(VCCOMM_ComputeTextureErrorBlock_SAD, OMXVCCOMM_SUFFIX)
#define omxVCCOMM_Copy16x16 OMXCATBAR(VCCOMM_Copy16x16, OMXVCCOMM_SUFFIX)
#define omxVCCOMM_Copy8x8 OMXCATBAR(VCCOMM_Copy8x8, OMXVCCOMM_SUFFIX)
#define omxVCCOMM_ExpandFrame_I OMXCATBAR(VCCOMM_ExpandFrame_I, OMXVCCOMM_SUFFIX)
#define omxVCCOMM_LimitMVToRect OMXCATBAR(VCCOMM_LimitMVToRect, OMXVCCOMM_SUFFIX)
#define omxVCCOMM_SAD_16x OMXCATBAR(VCCOMM_SAD_16x, OMXVCCOMM_SUFFIX)
#define omxVCCOMM_SAD_8x OMXCATBAR(VCCOMM_SAD_8x, OMXVCCOMM_SUFFIX)
#define omxVCM4P10_Average_4x OMXCATBAR(VCM4P10_Average_4x, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_BlockMatch_Half OMXCATBAR(VCM4P10_BlockMatch_Half, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_BlockMatch_Integer OMXCATBAR(VCM4P10_BlockMatch_Integer, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_BlockMatch_Quarter OMXCATBAR(VCM4P10_BlockMatch_Quarter, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_DeblockChroma_I OMXCATBAR(VCM4P10_DeblockChroma_I, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_DeblockLuma_I OMXCATBAR(VCM4P10_DeblockLuma_I, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeChromaDcCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_DecodeCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_DequantTransformResidualFromPairAndAdd OMXCATBAR(VCM4P10_DequantTransformResidualFromPairAndAdd, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_FilterDeblockingChroma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_HorEdge_I, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_FilterDeblockingChroma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_VerEdge_I, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_FilterDeblockingLuma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_HorEdge_I, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_FilterDeblockingLuma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_VerEdge_I, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_GetVLCInfo OMXCATBAR(VCM4P10_GetVLCInfo, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_InterpolateChroma OMXCATBAR(VCM4P10_InterpolateChroma, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_InterpolateHalfHor_Luma OMXCATBAR(VCM4P10_InterpolateHalfHor_Luma, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_InterpolateHalfVer_Luma OMXCATBAR(VCM4P10_InterpolateHalfVer_Luma, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_InterpolateLuma OMXCATBAR(VCM4P10_InterpolateLuma, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_InvTransformDequant_ChromaDC OMXCATBAR(VCM4P10_InvTransformDequant_ChromaDC, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_InvTransformDequant_LumaDC OMXCATBAR(VCM4P10_InvTransformDequant_LumaDC, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_InvTransformResidualAndAdd OMXCATBAR(VCM4P10_InvTransformResidualAndAdd, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_MEGetBufSize OMXCATBAR(VCM4P10_MEGetBufSize, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_MEInit OMXCATBAR(VCM4P10_MEInit, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_MotionEstimationMB OMXCATBAR(VCM4P10_MotionEstimationMB, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_PredictIntra_16x16 OMXCATBAR(VCM4P10_PredictIntra_16x16, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_PredictIntra_4x4 OMXCATBAR(VCM4P10_PredictIntra_4x4, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_PredictIntraChroma_8x8 OMXCATBAR(VCM4P10_PredictIntraChroma_8x8, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_SAD_4x OMXCATBAR(VCM4P10_SAD_4x, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_SADQuar_16x OMXCATBAR(VCM4P10_SADQuar_16x, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_SADQuar_4x OMXCATBAR(VCM4P10_SADQuar_4x, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_SADQuar_8x OMXCATBAR(VCM4P10_SADQuar_8x, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_SATD_4x4 OMXCATBAR(VCM4P10_SATD_4x4, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_SubAndTransformQDQResidual OMXCATBAR(VCM4P10_SubAndTransformQDQResidual, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_TransformDequantChromaDCFromPair OMXCATBAR(VCM4P10_TransformDequantChromaDCFromPair, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_TransformDequantLumaDCFromPair OMXCATBAR(VCM4P10_TransformDequantLumaDCFromPair, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_TransformQuant_ChromaDC OMXCATBAR(VCM4P10_TransformQuant_ChromaDC, OMXVCM4P10_SUFFIX)
#define omxVCM4P10_TransformQuant_LumaDC OMXCATBAR(VCM4P10_TransformQuant_LumaDC, OMXVCM4P10_SUFFIX)
#define omxVCM4P2_BlockMatch_Half_16x16 OMXCATBAR(VCM4P2_BlockMatch_Half_16x16, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_BlockMatch_Half_8x8 OMXCATBAR(VCM4P2_BlockMatch_Half_8x8, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_BlockMatch_Integer_16x16 OMXCATBAR(VCM4P2_BlockMatch_Integer_16x16, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_BlockMatch_Integer_8x8 OMXCATBAR(VCM4P2_BlockMatch_Integer_8x8, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_DCT8x8blk OMXCATBAR(VCM4P2_DCT8x8blk, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_DecodeBlockCoef_Inter OMXCATBAR(VCM4P2_DecodeBlockCoef_Inter, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_DecodeBlockCoef_Intra OMXCATBAR(VCM4P2_DecodeBlockCoef_Intra, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_DecodePadMV_PVOP OMXCATBAR(VCM4P2_DecodePadMV_PVOP, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_DecodeVLCZigzag_Inter OMXCATBAR(VCM4P2_DecodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_DecodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_DecodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_EncodeMV OMXCATBAR(VCM4P2_EncodeMV, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_EncodeVLCZigzag_Inter OMXCATBAR(VCM4P2_EncodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_EncodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_EncodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_FindMVpred OMXCATBAR(VCM4P2_FindMVpred, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_IDCT8x8blk OMXCATBAR(VCM4P2_IDCT8x8blk, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_MCReconBlock OMXCATBAR(VCM4P2_MCReconBlock, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_MEGetBufSize OMXCATBAR(VCM4P2_MEGetBufSize, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_MEInit OMXCATBAR(VCM4P2_MEInit, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_MotionEstimationMB OMXCATBAR(VCM4P2_MotionEstimationMB, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_PredictReconCoefIntra OMXCATBAR(VCM4P2_PredictReconCoefIntra, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_QuantInter_I OMXCATBAR(VCM4P2_QuantInter_I, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_QuantIntra_I OMXCATBAR(VCM4P2_QuantIntra_I, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_QuantInvInter_I OMXCATBAR(VCM4P2_QuantInvInter_I, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_QuantInvIntra_I OMXCATBAR(VCM4P2_QuantInvIntra_I, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_TransRecBlockCoef_inter OMXCATBAR(VCM4P2_TransRecBlockCoef_inter, OMXVCM4P2_SUFFIX)
#define omxVCM4P2_TransRecBlockCoef_intra OMXCATBAR(VCM4P2_TransRecBlockCoef_intra, OMXVCM4P2_SUFFIX)
#endif /* endif ARMOMX_ENABLE_RENAMING */
#endif /* _armOMX_h_ */

Просмотреть файл

@ -1,254 +0,0 @@
/**
* File: omxtypes.h
* Brief: Defines basic Data types used in OpenMAX v1.0.2 header files.
*
* Copyright (c) 2005-2008,2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
*
*/
#ifndef _OMXTYPES_H_
#define _OMXTYPES_H_
#include <limits.h>
#define OMX_IN
#define OMX_OUT
#define OMX_INOUT
typedef enum {
/* Mandatory return codes - use cases are explicitly described for each function */
OMX_Sts_NoErr = 0, /* No error, the function completed successfully */
OMX_Sts_Err = -2, /* Unknown/unspecified error */
OMX_Sts_InvalidBitstreamValErr = -182, /* Invalid value detected during bitstream processing */
OMX_Sts_MemAllocErr = -9, /* Not enough memory allocated for the operation */
OMX_StsACAAC_GainCtrErr = -159, /* AAC: Unsupported gain control data detected */
OMX_StsACAAC_PrgNumErr = -167, /* AAC: Invalid number of elements for one program */
OMX_StsACAAC_CoefValErr = -163, /* AAC: Invalid quantized coefficient value */
OMX_StsACAAC_MaxSfbErr = -162, /* AAC: Invalid maxSfb value in relation to numSwb */
OMX_StsACAAC_PlsDataErr = -160, /* AAC: pulse escape sequence data error */
/* Optional return codes - use cases are explicitly described for each function*/
OMX_Sts_BadArgErr = -5, /* Bad Arguments */
OMX_StsACAAC_TnsNumFiltErr = -157, /* AAC: Invalid number of TNS filters */
OMX_StsACAAC_TnsLenErr = -156, /* AAC: Invalid TNS region length */
OMX_StsACAAC_TnsOrderErr = -155, /* AAC: Invalid order of TNS filter */
OMX_StsACAAC_TnsCoefResErr = -154, /* AAC: Invalid bit-resolution for TNS filter coefficients */
OMX_StsACAAC_TnsCoefErr = -153, /* AAC: Invalid TNS filter coefficients */
OMX_StsACAAC_TnsDirectErr = -152, /* AAC: Invalid TNS filter direction */
OMX_StsICJP_JPEGMarkerErr = -183, /* JPEG marker encountered within an entropy-coded block; */
/* Huffman decoding operation terminated early. */
OMX_StsICJP_JPEGMarker = -181, /* JPEG marker encountered; Huffman decoding */
/* operation terminated early. */
OMX_StsIPPP_ContextMatchErr = -17, /* Context parameter doesn't match to the operation */
OMX_StsSP_EvenMedianMaskSizeErr = -180, /* Even size of the Median Filter mask was replaced by the odd one */
OMX_Sts_MaximumEnumeration = INT_MAX /*Placeholder, forces enum of size OMX_INT*/
} OMXResult; /** Return value or error value returned from a function. Identical to OMX_INT */
/* OMX_U8 */
#if UCHAR_MAX == 0xff
typedef unsigned char OMX_U8;
#elif USHRT_MAX == 0xff
typedef unsigned short int OMX_U8;
#else
#error OMX_U8 undefined
#endif
/* OMX_S8 */
#if SCHAR_MAX == 0x7f
typedef signed char OMX_S8;
#elif SHRT_MAX == 0x7f
typedef signed short int OMX_S8;
#else
#error OMX_S8 undefined
#endif
/* OMX_U16 */
#if USHRT_MAX == 0xffff
typedef unsigned short int OMX_U16;
#elif UINT_MAX == 0xffff
typedef unsigned int OMX_U16;
#else
#error OMX_U16 undefined
#endif
/* OMX_S16 */
#if SHRT_MAX == 0x7fff
typedef signed short int OMX_S16;
#elif INT_MAX == 0x7fff
typedef signed int OMX_S16;
#else
#error OMX_S16 undefined
#endif
/* OMX_U32 */
#if UINT_MAX == 0xffffffff
typedef unsigned int OMX_U32;
#elif LONG_MAX == 0xffffffff
typedef unsigned long int OMX_U32;
#else
#error OMX_U32 undefined
#endif
/* OMX_S32 */
#if INT_MAX == 0x7fffffff
typedef signed int OMX_S32;
#elif LONG_MAX == 0x7fffffff
typedef long signed int OMX_S32;
#else
#error OMX_S32 undefined
#endif
/* OMX_U64 & OMX_S64 */
#if defined( _WIN32 ) || defined ( _WIN64 )
typedef __int64 OMX_S64; /** Signed 64-bit integer */
typedef unsigned __int64 OMX_U64; /** Unsigned 64-bit integer */
#define OMX_MIN_S64 (0x8000000000000000i64)
#define OMX_MIN_U64 (0x0000000000000000i64)
#define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFi64)
#define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFi64)
#else
typedef long long OMX_S64; /** Signed 64-bit integer */
typedef unsigned long long OMX_U64; /** Unsigned 64-bit integer */
#define OMX_MIN_S64 (0x8000000000000000LL)
#define OMX_MIN_U64 (0x0000000000000000LL)
#define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFLL)
#define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFLL)
#endif
/* OMX_SC8 */
typedef struct
{
OMX_S8 Re; /** Real part */
OMX_S8 Im; /** Imaginary part */
} OMX_SC8; /** Signed 8-bit complex number */
/* OMX_SC16 */
typedef struct
{
OMX_S16 Re; /** Real part */
OMX_S16 Im; /** Imaginary part */
} OMX_SC16; /** Signed 16-bit complex number */
/* OMX_SC32 */
typedef struct
{
OMX_S32 Re; /** Real part */
OMX_S32 Im; /** Imaginary part */
} OMX_SC32; /** Signed 32-bit complex number */
/* OMX_SC64 */
typedef struct
{
OMX_S64 Re; /** Real part */
OMX_S64 Im; /** Imaginary part */
} OMX_SC64; /** Signed 64-bit complex number */
/* OMX_F32 */
typedef float OMX_F32; /** Single precision floating point,IEEE 754 */
/* OMX_F64 */
typedef double OMX_F64; /** Double precision floating point,IEEE 754 */
/* OMX_INT */
typedef int OMX_INT; /** signed integer corresponding to machine word length, has maximum signed value INT_MAX*/
#define OMX_MIN_S8 (-128)
#define OMX_MIN_U8 0
#define OMX_MIN_S16 (-32768)
#define OMX_MIN_U16 0
#define OMX_MIN_S32 (-2147483647-1)
#define OMX_MIN_U32 0
#define OMX_MAX_S8 (127)
#define OMX_MAX_U8 (255)
#define OMX_MAX_S16 (32767)
#define OMX_MAX_U16 (0xFFFF)
#define OMX_MAX_S32 (2147483647)
#define OMX_MAX_U32 (0xFFFFFFFF)
typedef void OMXVoid;
#ifndef NULL
#define NULL ((void*)0)
#endif
/** Defines the geometric position and size of a rectangle,
* where x,y defines the coordinates of the top left corner
* of the rectangle, with dimensions width in the x-direction
* and height in the y-direction */
typedef struct {
OMX_INT x; /** x-coordinate of top left corner of rectangle */
OMX_INT y; /** y-coordinate of top left corner of rectangle */
OMX_INT width; /** Width in the x-direction. */
OMX_INT height; /** Height in the y-direction. */
}OMXRect;
/** Defines the geometric position of a point, */
typedef struct
{
OMX_INT x; /** x-coordinate */
OMX_INT y; /** y-coordinate */
} OMXPoint;
/** Defines the dimensions of a rectangle, or region of interest in an image */
typedef struct
{
OMX_INT width; /** Width of the rectangle, in the x-direction */
OMX_INT height; /** Height of the rectangle, in the y-direction */
} OMXSize;
#endif /* _OMXTYPES_H_ */

Просмотреть файл

@ -1,76 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: omxtypes_s.h
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 9622
@// Last Modified Date: Wed, 06 Feb 2008
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@// Mandatory return codes - use cases are explicitly described for each function
.equ OMX_Sts_NoErr, 0 @// No error the function completed successfully
.equ OMX_Sts_Err, -2 @// Unknown/unspecified error
.equ OMX_Sts_InvalidBitstreamValErr, -182 @// Invalid value detected during bitstream processing
.equ OMX_Sts_MemAllocErr, -9 @// Not enough memory allocated for the operation
.equ OMX_StsACAAC_GainCtrErr, -159 @// AAC: Unsupported gain control data detected
.equ OMX_StsACAAC_PrgNumErr, -167 @// AAC: Invalid number of elements for one program
.equ OMX_StsACAAC_CoefValErr, -163 @// AAC: Invalid quantized coefficient value
.equ OMX_StsACAAC_MaxSfbErr, -162 @// AAC: Invalid maxSfb value in relation to numSwb
.equ OMX_StsACAAC_PlsDataErr, -160 @// AAC: pulse escape sequence data error
@// Optional return codes - use cases are explicitly described for each function
.equ OMX_Sts_BadArgErr, -5 @// Bad Arguments
.equ OMX_StsACAAC_TnsNumFiltErr, -157 @// AAC: Invalid number of TNS filters
.equ OMX_StsACAAC_TnsLenErr, -156 @// AAC: Invalid TNS region length
.equ OMX_StsACAAC_TnsOrderErr, -155 @// AAC: Invalid order of TNS filter
.equ OMX_StsACAAC_TnsCoefResErr, -154 @// AAC: Invalid bit-resolution for TNS filter coefficients
.equ OMX_StsACAAC_TnsCoefErr, -153 @// AAC: Invalid TNS filter coefficients
.equ OMX_StsACAAC_TnsDirectErr, -152 @// AAC: Invalid TNS filter direction
.equ OMX_StsICJP_JPEGMarkerErr, -183 @// JPEG marker encountered within an entropy-coded block;
@// Huffman decoding operation terminated early.
.equ OMX_StsICJP_JPEGMarker, -181 @// JPEG marker encountered; Huffman decoding
@// operation terminated early.
.equ OMX_StsIPPP_ContextMatchErr, -17 @// Context parameter doesn't match to the operation
.equ OMX_StsSP_EvenMedianMaskSizeErr, -180 @// Even size of the Median Filter mask was replaced by the odd one
.equ OMX_Sts_MaximumEnumeration, 0x7FFFFFFF
.equ OMX_MIN_S8, (-128)
.equ OMX_MIN_U8, 0
.equ OMX_MIN_S16, (-32768)
.equ OMX_MIN_U16, 0
.equ OMX_MIN_S32, (-2147483647-1)
.equ OMX_MIN_U32, 0
.equ OMX_MAX_S8, (127)
.equ OMX_MAX_U8, (255)
.equ OMX_MAX_S16, (32767)
.equ OMX_MAX_U16, (0xFFFF)
.equ OMX_MAX_S32, (2147483647)
.equ OMX_MAX_U32, (0xFFFFFFFF)
.equ OMX_VC_UPPER, 0x1 @// Used by the PredictIntra functions
.equ OMX_VC_LEFT, 0x2 @// Used by the PredictIntra functions
.equ OMX_VC_UPPER_RIGHT, 0x40 @// Used by the PredictIntra functions
.equ NULL, 0

Просмотреть файл

@ -1,92 +0,0 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*
* This file was originally licensed as follows. It has been
* relicensed with permission from the copyright holders.
*/
/**
*
* File Name: armSP.h
* OpenMAX DL: v1.0.2
* Last Modified Revision: 7014
* Last Modified Date: Wed, 01 Aug 2007
*
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
*
*
*
* File: armSP.h
* Brief: Declares API's/Basic Data types used across the OpenMAX Signal Processing domain
*
*/
#ifndef _armSP_H_
#define _armSP_H_
#include "dl/api/omxtypes.h"
#ifdef __cplusplus
extern "C" {
#endif
/** FFT Specific declarations */
extern OMX_S32 armSP_FFT_S32TwiddleTable[1026];
extern OMX_F32 armSP_FFT_F32TwiddleTable[];
typedef struct ARMsFFTSpec_SC32_Tag
{
OMX_U32 N;
OMX_U16 *pBitRev;
OMX_SC32 *pTwiddle;
OMX_SC32 *pBuf;
}ARMsFFTSpec_SC32;
typedef struct ARMsFFTSpec_SC16_Tag
{
OMX_U32 N;
OMX_U16 *pBitRev;
OMX_SC16 *pTwiddle;
OMX_SC16 *pBuf;
}ARMsFFTSpec_SC16;
typedef struct ARMsFFTSpec_R_SC32_Tag
{
OMX_U32 N;
OMX_U16 *pBitRev;
OMX_SC32 *pTwiddle;
OMX_S32 *pBuf;
}ARMsFFTSpec_R_SC32;
typedef struct ARMsFFTSpec_R_FC32_Tag
{
OMX_U32 N;
OMX_U16* pBitRev;
OMX_FC32* pTwiddle;
OMX_F32* pBuf;
} ARMsFFTSpec_R_FC32;
typedef struct ARMsFFTSpec_FC32_Tag
{
OMX_U32 N;
OMX_U16* pBitRev;
OMX_FC32* pTwiddle;
OMX_FC32* pBuf;
} ARMsFFTSpec_FC32;
#ifdef __cplusplus
}
#endif
#endif
/*End of File*/

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,294 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This is a modification of
@// armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe_s.s to support float
@// instead of SC32.
@//
@//
@// Description:
@// Compute the "preTwiddleRadix2" stage prior to the call to the complexFFT
@// It does a Z(k) = Feven(k) + jW^(-k) FOdd(k); k=0,1,2,...N/2-1 computation
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@//Input Registers
#define pSrc r0
#define pDst r1
#define pFFTSpec r2
#define scale r3
@// Output registers
#define result r0
@//Local Scratch Registers
#define argTwiddle r1
#define argDst r2
#define argScale r4
#define tmpOrder r4
#define pTwiddle r4
#define pOut r5
#define subFFTSize r7
#define subFFTNum r6
#define N r6
#define order r14
#define diff r9
@// Total num of radix stages required to complete the FFT
#define count r8
#define x0r r4
#define x0i r5
#define diffMinusOne r2
#define round r3
#define pOut1 r2
#define size r7
#define step r8
#define step1 r9
#define twStep r10
#define pTwiddleTmp r11
#define argTwiddle1 r12
#define zero r14
@// Neon registers
#define dX0 D0.F32
#define dShift D1.F32
#define dX1 D1.F32
#define dY0 D2.F32
#define dY1 D3.F32
#define dX0r D0.F32
#define dX0i D1.F32
#define dX1r D2.F32
#define dX1i D3.F32
#define dW0r D4.F32
#define dW0i D5.F32
#define dW1r D6.F32
#define dW1i D7.F32
#define dT0 D8.F32
#define dT1 D9.F32
#define dT2 D10.F32
#define dT3 D11.F32
#define qT0 D12.F32
#define qT1 D14.F32
#define qT2 D16.F32
#define qT3 D18.F32
#define dY0r D4.F32
#define dY0i D5.F32
#define dY1r D6.F32
#define dY1i D7.F32
#define dY2 D4.F32
#define dY3 D5.F32
#define dW0 D6.F32
#define dW1 D7.F32
#define dW0Tmp D10.F32
#define dW1Neg D11.F32
#define half D13.F32
@ Structure offsets for the FFTSpec
.set ARMsFFTSpec_N, 0
.set ARMsFFTSpec_pBitRev, 4
.set ARMsFFTSpec_pTwiddle, 8
.set ARMsFFTSpec_pBuf, 12
.MACRO FFTSTAGE scaled, inverse, name
@// Read the size from structure and take log
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
@// Read other structure parameters
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
VMOV half, 0.5
MOV size,N,ASR #1 @// preserve the contents of N
MOV step,N,LSL #2 @// step = N/2 * 8 bytes
@// Z(k) = 1/2 {[F(k) + F'(N/2-k)] +j*W^(-k) [F(k) - F'(N/2-k)]}
@// Note: W^(k) is stored as negated value and also need to
@// conjugate the values from the table
@// Z(0) : no need of twiddle multiply
@// Z(0) = 1/2 { [F(0) + F'(N/2)] +j [F(0) - F'(N/2)] }
VLD1 dX0,[pSrc],step
ADD pOut1,pOut,step @// pOut1 = pOut+ N/2*8 bytes
VLD1 dX1,[pSrc]!
@// twStep = 3N/8 * 8 bytes pointing to W^1
SUB twStep,step,size,LSL #1
MOV step1,size,LSL #2 @// step1 = N/4 * 8 = N/2*4 bytes
SUB step1,step1,#8 @// (N/4-1)*8 bytes
VADD dY0,dX0,dX1 @// [b+d | a+c]
VSUB dY1,dX0,dX1 @// [b-d | a-c]
VMUL dY0, dY0, half[0]
VMUL dY1, dY1, half[0]
@// dY0= [a-c | a+c] ;dY1= [b-d | b+d]
VZIP dY0,dY1
VSUB dX0,dY0,dY1
SUBS size,size,#2
VADD dX1,dY0,dY1
SUB pSrc,pSrc,step
VST1 dX0[0],[pOut1]!
ADD pTwiddleTmp,pTwiddle,#8 @// W^2
VST1 dX1[1],[pOut1]!
ADD argTwiddle1,pTwiddle,twStep @// W^1
BLT decrementScale\name
BEQ lastElement\name
@// Z(k) = 1/2[F(k) + F'(N/2-k)] +j*W^(-k) [F(k) - F'(N/2-k)]
@// Note: W^k is stored as negative values in the table and also
@// need to conjugate the values from the table.
@//
@// Process 4 elements at a time. E.g: Z(1),Z(2) and Z(N/2-2),Z(N/2-1)
@// since both of them require F(1),F(2) and F(N/2-2),F(N/2-1)
SUB step,step,#24
evenOddButterflyLoop\name :
VLD1 dW0r,[argTwiddle1],step1
VLD1 dW1r,[argTwiddle1]!
VLD2 {dX0r,dX0i},[pSrc],step
SUB argTwiddle1,argTwiddle1,step1
VLD2 {dX1r,dX1i},[pSrc]!
SUB step1,step1,#8 @// (N/4-2)*8 bytes
VLD1 dW0i,[pTwiddleTmp],step1
VLD1 dW1i,[pTwiddleTmp]!
SUB pSrc,pSrc,step
SUB pTwiddleTmp,pTwiddleTmp,step1
VREV64 dX1r,dX1r
VREV64 dX1i,dX1i
SUBS size,size,#4
VSUB dT2,dX0r,dX1r @// a-c
VADD dT3,dX0i,dX1i @// b+d
VADD dT0,dX0r,dX1r @// a+c
VSUB dT1,dX0i,dX1i @// b-d
SUB step1,step1,#8
VMUL dT2, dT2, half[0]
VMUL dT3, dT3, half[0]
VMUL dT0, dT0, half[0]
VMUL dT1, dT1, half[0]
VZIP dW1r,dW1i
VZIP dW0r,dW0i
VMUL dX1r,dW1r,dT2
VMUL dX1i,dW1r,dT3
VMUL dX0r,dW0r,dT2
VMUL dX0i,dW0r,dT3
VMLS dX1r,dW1i,dT3
VMLA dX1i,dW1i,dT2
VMLA dX0r,dW0i,dT3
VMLS dX0i,dW0i,dT2
VADD dY1r,dT0,dX1i @// F(N/2 -1)
VSUB dY1i,dX1r,dT1
VREV64 dY1r,dY1r
VREV64 dY1i,dY1i
VADD dY0r,dT0,dX0i @// F(1)
VSUB dY0i,dT1,dX0r
VST2 {dY0r,dY0i},[pOut1],step
VST2 {dY1r,dY1i},[pOut1]!
SUB pOut1,pOut1,step
SUB step,step,#32 @// (N/2-4)*8 bytes
BGT evenOddButterflyLoop\name
@// set both the ptrs to the last element
SUB pSrc,pSrc,#8
SUB pOut1,pOut1,#8
@// Last element can be expanded as follows
@// 1/2[Z(k) + Z'(k)] - j w^-k [Z(k) - Z'(k)] (since W^k is stored as
@// -ve)
@// 1/2[(a+jb) + (a-jb)] - j w^-k [(a+jb) - (a-jb)]
@// 1/2[2a+j0] - j (c-jd) [0+j2b]
@// (a+bc, -bd)
@// Since (c,d) = (0,1) for the last element, result is just (a,-b)
lastElement\name :
VLD1 dX0r,[pSrc]
VST1 dX0r[0],[pOut1]!
VNEG dX0r,dX0r
VST1 dX0r[1],[pOut1]
decrementScale\name :
.endm
M_START armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe,r4
FFTSTAGE "FALSE","TRUE",Inv
M_END
.end

Просмотреть файл

@ -1,321 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 7485
@// Last Modified Date: Fri, 21 Sep 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute the "preTwiddleRadix2" stage prior to the call to the complexFFT
@// It does a Z(k) = Feven(k) + jW^(-k) FOdd(k); k=0,1,2,...N/2-1 computation
@// It implements both "scaled"(by 1/2) and "unsclaed" versions of the above formula
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@//Input Registers
#define pSrc r0
#define pDst r1
#define pFFTSpec r2
#define scale r3
@// Output registers
#define result r0
@//Local Scratch Registers
#define argTwiddle r1
#define argDst r2
#define argScale r4
#define tmpOrder r4
#define pTwiddle r4
#define pOut r5
#define subFFTSize r7
#define subFFTNum r6
#define N r6
#define order r14
#define diff r9
#define count r8 @// Total num of radix stages required to comple the FFT
#define x0r r4
#define x0i r5
#define diffMinusOne r2
#define round r3
#define pOut1 r2
#define size r7
#define step r8
#define step1 r9
#define twStep r10
#define pTwiddleTmp r11
#define argTwiddle1 r12
#define zero r14
@// Neon registers
#define dX0 D0.S32
#define dShift D1.S32
#define dX1 D1.S32
#define dY0 D2.S32
#define dY1 D3.S32
#define dX0r D0.S32
#define dX0i D1.S32
#define dX1r D2.S32
#define dX1i D3.S32
#define dW0r D4.S32
#define dW0i D5.S32
#define dW1r D6.S32
#define dW1i D7.S32
#define dT0 D8.S32
#define dT1 D9.S32
#define dT2 D10.S32
#define dT3 D11.S32
#define qT0 Q6.S64
#define qT1 Q7.S64
#define qT2 Q8.S64
#define qT3 Q9.S64
#define dY0r D4.S32
#define dY0i D5.S32
#define dY1r D6.S32
#define dY1i D7.S32
#define dY2 D4.S32
#define dY3 D5.S32
#define dW0 D6.S32
#define dW1 D7.S32
#define dW0Tmp D10.S32
#define dW1Neg D11.S32
@ Structure offsets for the FFTSpec
.set ARMsFFTSpec_N, 0
.set ARMsFFTSpec_pBitRev, 4
.set ARMsFFTSpec_pTwiddle, 8
.set ARMsFFTSpec_pBuf, 12
.MACRO FFTSTAGE scaled, inverse, name
@// Read the size from structure and take log
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
@// Read other structure parameters
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
MOV size,N,ASR #1 @// preserve the contents of N
MOV step,N,LSL #2 @// step = N/2 * 8 bytes
@// Z(k) = 1/2 {[F(k) + F'(N/2-k)] +j*W^(-k) [F(k) - F'(N/2-k)]}
@// Note: W^(k) is stored as negated value and also need to conjugate the values from the table
@// Z(0) : no need of twiddle multiply
@// Z(0) = 1/2 { [F(0) + F'(N/2)] +j [F(0) - F'(N/2)] }
VLD1 dX0,[pSrc],step
ADD pOut1,pOut,step @// pOut1 = pOut+ N/2*8 bytes
VLD1 dX1,[pSrc]!
SUB twStep,step,size,LSL #1 @// twStep = 3N/8 * 8 bytes pointing to W^1
MOV step1,size,LSL #2 @// step1 = N/4 * 8 = N/2*4 bytes
SUB step1,step1,#8 @// (N/4-1)*8 bytes
VHADD dY0,dX0,dX1 @// [b+d | a+c]
VHSUB dY1,dX0,dX1 @// [b-d | a-c]
VZIP dY0,dY1 @// dY0= [a-c | a+c] ;dY1= [b-d | b+d]
.ifeqs "\scaled", "TRUE"
VHSUB dX0,dY0,dY1
SUBS size,size,#2
VHADD dX1,dY0,dY1
.else
VSUB dX0,dY0,dY1
SUBS size,size,#2
VADD dX1,dY0,dY1
.endif
SUB pSrc,pSrc,step
VST1 dX0[0],[pOut1]!
ADD pTwiddleTmp,pTwiddle,#8 @// W^2
VST1 dX1[1],[pOut1]!
ADD argTwiddle1,pTwiddle,twStep @// W^1
BLT decrementScale\name
BEQ lastElement\name
@// Z(k) = 1/2[F(k) + F'(N/2-k)] +j*W^(-k) [F(k) - F'(N/2-k)]
@// Note: W^k is stored as negative values in the table and also need to conjugate the values from the table
@// Process 4 elements at a time. E.g: Z(1),Z(2) and Z(N/2-2),Z(N/2-1) since both of them
@// require F(1),F(2) and F(N/2-2),F(N/2-1)
SUB step,step,#24
evenOddButterflyLoop\name :
VLD1 dW0r,[argTwiddle1],step1
VLD1 dW1r,[argTwiddle1]!
VLD2 {dX0r,dX0i},[pSrc],step
SUB argTwiddle1,argTwiddle1,step1
VLD2 {dX1r,dX1i},[pSrc]!
SUB step1,step1,#8 @// (N/4-2)*8 bytes
VLD1 dW0i,[pTwiddleTmp],step1
VLD1 dW1i,[pTwiddleTmp]!
SUB pSrc,pSrc,step
SUB pTwiddleTmp,pTwiddleTmp,step1
VREV64 dX1r,dX1r
VREV64 dX1i,dX1i
SUBS size,size,#4
VHSUB dT2,dX0r,dX1r @// a-c
VHADD dT3,dX0i,dX1i @// b+d
SUB step1,step1,#8
VHADD dT0,dX0r,dX1r @// a+c
VHSUB dT1,dX0i,dX1i @// b-d
VZIP dW1r,dW1i
VZIP dW0r,dW0i
VMULL qT0,dW1r,dT2
VMLSL qT0,dW1i,dT3
VMULL qT1,dW1r,dT3
VMLAL qT1,dW1i,dT2
VMULL qT2,dW0r,dT2
VMLAL qT2,dW0i,dT3
VMULL qT3,dW0r,dT3
VMLSL qT3,dW0i,dT2
VRSHRN dX1r,qT0,#31
VRSHRN dX1i,qT1,#31
.ifeqs "\scaled", "TRUE"
VHADD dY1r,dT0,dX1i @// F(N/2 -1)
VHSUB dY1i,dX1r,dT1
.else
VADD dY1r,dT0,dX1i @// F(N/2 -1)
VSUB dY1i,dX1r,dT1
.endif
VREV64 dY1r,dY1r
VREV64 dY1i,dY1i
VRSHRN dX0r,qT2,#31
VRSHRN dX0i,qT3,#31
.ifeqs "\scaled", "TRUE"
VHADD dY0r,dT0,dX0i @// F(1)
VHSUB dY0i,dT1,dX0r
.else
VADD dY0r,dT0,dX0i @// F(1)
VSUB dY0i,dT1,dX0r
.endif
VST2 {dY0r,dY0i},[pOut1],step
VST2 {dY1r,dY1i},[pOut1]!
SUB pOut1,pOut1,step
SUB step,step,#32 @// (N/2-4)*8 bytes
BGT evenOddButterflyLoop\name
SUB pSrc,pSrc,#8 @// set both the ptrs to the last element
SUB pOut1,pOut1,#8
@// Last element can be expanded as follows
@// 1/2[Z(k) + Z'(k)] - j w^-k [Z(k) - Z'(k)] (since W^k is stored as -ve)
@// 1/2[(a+jb) + (a-jb)] - j w^-k [(a+jb) - (a-jb)]
@// 1/2[2a+j0] - j (c-jd) [0+j2b]
@// (a+bc, -bd)
@// Since (c,d) = (0,1) for the last element, result is just (a,-b)
lastElement\name :
VLD1 dX0r,[pSrc]
.ifeqs "\scaled", "TRUE"
VSHR dX0r,dX0r,#1
.endif
VST1 dX0r[0],[pOut1]!
VNEG dX0r,dX0r
VST1 dX0r[1],[pOut1]
decrementScale\name :
.ifeqs "\scaled", "TRUE"
SUB scale,scale,#1
.endif
.endm
M_START armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe,r4
FFTSTAGE "FALSE","TRUE",Inv
M_END
M_START armSP_FFTInv_CCSToR_S32_Sfs_preTwiddleRadix2_unsafe,r4
FFTSTAGE "TRUE","TRUE",InvSfs
M_END
.end

Просмотреть файл

@ -1,134 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This is a modification of armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.S
@// to support float instead of SC32.
@//
@//
@// Description:
@// Compute the first stage of a Radix 2 DIT in-order out-of-place FFT
@// stage for a N point complex signal.
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define pPingPongBuf r5
#define subFFTNum r6
#define subFFTSize r7
@//Output Registers
@//Local Scratch Registers
#define pointStep r3
#define outPointStep r3
#define grpSize r4
#define setCount r4
#define step r8
#define dstStep r8
@// Neon Registers
#define dX0 D0.F32
#define dX1 D1.F32
#define dY0 D2.F32
#define dY1 D3.F32
.MACRO FFTSTAGE scaled, inverse, name
@// Define stack arguments
@// update subFFTSize and subFFTNum into RN6 and RN7 for the next stage
MOV subFFTSize,#2
LSR grpSize,subFFTNum,#1
MOV subFFTNum,grpSize
@// pT0+1 increments pT0 by 8 bytes
@// pT0+pointStep = increment of 8*pointStep bytes = 4*grpSize bytes
@// Note: outPointStep = pointStep for firststage
@// Note: setCount = grpSize/2 (reuse the updated grpSize for setCount)
MOV pointStep,grpSize,LSL #3
RSB step,pointStep,#8
@// Loop on the sets for grp zero
grpZeroSetLoop\name :
VLD1 dX0,[pSrc],pointStep
VLD1 dX1,[pSrc],step @// step = -pointStep + 8
SUBS setCount,setCount,#1
VADD dY0,dX0,dX1
VSUB dY1,dX0,dX1
VST1 dY0,[pDst],outPointStep
@// dstStep = step = -pointStep + 8
VST1 dY1,[pDst],dstStep
BGT grpZeroSetLoop\name
@// reset pSrc to pDst for the next stage
SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize
MOV pDst,pPingPongBuf
.endm
M_START armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","FALSE",fwd
M_END
M_START armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",inv
M_END
.end

Просмотреть файл

@ -1,153 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This is a modification of armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.S
@// to support float instead of SC32.
@//
@//
@// Description:
@// Compute the last stage of a Radix 2 DIT in-order out-of-place FFT
@// stage for a N point complex signal.
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define subFFTNum r6
#define subFFTSize r7
@//Output Registers
@//Local Scratch Registers
#define outPointStep r3
#define grpCount r4
#define dstStep r5
#define pTmp r4
@// Neon Registers
#define dWr d0.f32
#define dWi d1.f32
#define dXr0 d2.f32
#define dXi0 d3.f32
#define dXr1 d4.f32
#define dXi1 d5.f32
#define dYr0 d6.f32
#define dYi0 d7.f32
#define dYr1 d8.f32
#define dYi1 d9.f32
#define qT0 d10.f32
#define qT1 d12.f32
.MACRO FFTSTAGE scaled, inverse, name
MOV outPointStep,subFFTSize,LSL #3
@// Update grpCount and grpSize rightaway
MOV subFFTNum,#1 @//after the last stage
LSL grpCount,subFFTSize,#1
@// update subFFTSize for the next stage
MOV subFFTSize,grpCount
RSB dstStep,outPointStep,#16
@// Loop on 2 grps at a time for the last stage
radix2lsGrpLoop\name :
@ dWr = [pTwiddle[0].Re, pTwiddle[1].Re]
@ dWi = [pTwiddle[0].Im, pTwiddle[1].Im]
VLD2 {dWr,dWi},[pTwiddle :64]!
@ dXr0 = [pSrc[0].Re, pSrc[2].Re]
@ dXi0 = [pSrc[0].Im, pSrc[2].Im]
@ dXr1 = [pSrc[1].Re, pSrc[3].Re]
@ dXi1 = [pSrc[1].Im, pSrc[3].Im]
VLD4 {dXr0,dXi0,dXr1,dXi1},[pSrc :128]!
SUBS grpCount,grpCount,#4 @// grpCount is multiplied by 2
.ifeqs "\inverse", "TRUE"
VMUL qT0,dWr,dXr1
VMLA qT0,dWi,dXi1 @// real part
VMUL qT1,dWr,dXi1
VMLS qT1,dWi,dXr1 @// imag part
.else
VMUL qT0,dWr,dXr1
VMLS qT0,dWi,dXi1 @// real part
VMUL qT1,dWr,dXi1
VMLA qT1,dWi,dXr1 @// imag part
.endif
VSUB dYr0,dXr0,qT0
VSUB dYi0,dXi0,qT1
VADD dYr1,dXr0,qT0
VADD dYi1,dXi0,qT1
VST2 {dYr0,dYi0},[pDst],outPointStep
VST2 {dYr1,dYi1},[pDst],dstStep @// dstStep = step = -outPointStep + 16
BGT radix2lsGrpLoop\name
@// Reset and Swap pSrc and pDst for the next stage
MOV pTmp,pDst
SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 4*size; pSrc -= 8*size bytes
SUB pSrc,pTmp,outPointStep
@// Reset pTwiddle for the next stage
SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 4*size bytes
.endm
M_START armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe,r4,""
FFTSTAGE "FALSE","FALSE",fwd
M_END
M_START armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",inv
M_END
.end

Просмотреть файл

@ -1,191 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This is a modification of armSP_FFT_CToC_SC32_Radix2_unsafe_s.s
@// to support float instead of SC32.
@//
@// Description:
@// Compute a Radix 2 DIT in-order out-of-place FFT stage for an N point
@// complex signal. This handles the general stage, not the first or last
@// stage.
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define subFFTNum r6
#define subFFTSize r7
@//Output Registers
@//Local Scratch Registers
#define outPointStep r3
#define pointStep r4
#define grpCount r5
#define setCount r8
@//const RN 9
#define step r10
#define dstStep r11
#define pTable r9
#define pTmp r9
@// Neon Registers
#define dW D0.F32
#define dX0 D2.F32
#define dX1 D3.F32
#define dX2 D4.F32
#define dX3 D5.F32
#define dY0 D6.F32
#define dY1 D7.F32
#define dY2 D8.F32
#define dY3 D9.F32
#define qT0 D10.F32
#define qT1 D11.F32
.MACRO FFTSTAGE scaled, inverse, name
@// Define stack arguments
@// Update grpCount and grpSize rightaway inorder to reuse pGrpCount
@// and pGrpSize regs
LSR subFFTNum,subFFTNum,#1 @//grpSize
LSL grpCount,subFFTSize,#1
@// pT0+1 increments pT0 by 8 bytes
@// pT0+pointStep = increment of 8*pointStep bytes = 4*grpSize bytes
MOV pointStep,subFFTNum,LSL #2
@// update subFFTSize for the next stage
MOV subFFTSize,grpCount
@// pOut0+1 increments pOut0 by 8 bytes
@// pOut0+outPointStep == increment of 8*outPointStep bytes =
@// 4*size bytes
SMULBB outPointStep,grpCount,pointStep
LSL pointStep,pointStep,#1
RSB step,pointStep,#16
RSB dstStep,outPointStep,#16
@// Loop on the groups
radix2GrpLoop\name :
MOV setCount,pointStep,LSR #3
VLD1 dW,[pTwiddle],pointStep @//[wi | wr]
@// Loop on the sets
radix2SetLoop\name :
@// point0: dX0-real part dX1-img part
VLD2 {dX0,dX1},[pSrc],pointStep
@// point1: dX2-real part dX3-img part
VLD2 {dX2,dX3},[pSrc],step
SUBS setCount,setCount,#2
.ifeqs "\inverse", "TRUE"
VMUL qT0,dX2,dW[0]
VMLA qT0,dX3,dW[1] @// real part
VMUL qT1,dX3,dW[0]
VMLS qT1,dX2,dW[1] @// imag part
.else
VMUL qT0,dX2,dW[0]
VMLS qT0,dX3,dW[1] @// real part
VMUL qT1,dX3,dW[0]
VMLA qT1,dX2,dW[1] @// imag part
.endif
VSUB dY0,dX0,qT0
VSUB dY1,dX1,qT1
VADD dY2,dX0,qT0
VADD dY3,dX1,qT1
VST2 {dY0,dY1},[pDst],outPointStep
@// dstStep = -outPointStep + 16
VST2 {dY2,dY3},[pDst],dstStep
BGT radix2SetLoop\name
SUBS grpCount,grpCount,#2
ADD pSrc,pSrc,pointStep
BGT radix2GrpLoop\name
@// Reset and Swap pSrc and pDst for the next stage
MOV pTmp,pDst
@// pDst -= 4*size; pSrc -= 8*size bytes
SUB pDst,pSrc,outPointStep,LSL #1
SUB pSrc,pTmp,outPointStep
@// Reset pTwiddle for the next stage
@// pTwiddle -= 4*size bytes
SUB pTwiddle,pTwiddle,outPointStep
.endm
M_START armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","FALSE",FWD
M_END
M_START armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",INV
M_END
.end

Просмотреть файл

@ -1,251 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@//
@// This is a modification of armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.s
@// to support float instead of SC32.
@//
@//
@// Description:
@// Compute a first stage Radix 4 FFT stage for a N point complex signal
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define pPingPongBuf r5
#define subFFTNum r6
#define subFFTSize r7
@//Output Registers
@//Local Scratch Registers
#define grpSize r3
@// Reuse grpSize as setCount
#define setCount r3
#define pointStep r4
#define outPointStep r4
#define setStep r8
#define step1 r9
#define step3 r10
@// Neon Registers
#define dXr0 D0.F32
#define dXi0 D1.F32
#define dXr1 D2.F32
#define dXi1 D3.F32
#define dXr2 D4.F32
#define dXi2 D5.F32
#define dXr3 D6.F32
#define dXi3 D7.F32
#define dYr0 D8.F32
#define dYi0 D9.F32
#define dYr1 D10.F32
#define dYi1 D11.F32
#define dYr2 D12.F32
#define dYi2 D13.F32
#define dYr3 D14.F32
#define dYi3 D15.F32
#define qX0 Q0.F32
#define qX1 Q1.F32
#define qX2 Q2.F32
#define qX3 Q3.F32
#define qY0 Q4.F32
#define qY1 Q5.F32
#define qY2 Q6.F32
#define qY3 Q7.F32
#define dZr0 D16.F32
#define dZi0 D17.F32
#define dZr1 D18.F32
#define dZi1 D19.F32
#define dZr2 D20.F32
#define dZi2 D21.F32
#define dZr3 D22.F32
#define dZi3 D23.F32
#define qZ0 Q8.F32
#define qZ1 Q9.F32
#define qZ2 Q10.F32
#define qZ3 Q11.F32
.MACRO FFTSTAGE scaled, inverse, name
@// Define stack arguments
@// pT0+1 increments pT0 by 8 bytes
@// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
@// Note: outPointStep = pointStep for firststage
MOV pointStep,subFFTNum,LSL #1
@// Update pSubFFTSize and pSubFFTNum regs
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
@// subFFTSize = 1 for the first stage
MOV subFFTSize,#4
@// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount)
LSR grpSize,subFFTNum,#2
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
MOV subFFTNum,grpSize
@// Calculate the step of input data for the next set
@//MOV setStep,pointStep,LSL #1
MOV setStep,grpSize,LSL #4
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
@// setStep = 3*pointStep
ADD setStep,setStep,pointStep
@// setStep = - 3*pointStep+16
RSB setStep,setStep,#16
@// data[3] & update pSrc for the next set
VLD2 {dXr3,dXi3},[pSrc :128],setStep
@// step1 = 2*pointStep
MOV step1,pointStep,LSL #1
VADD qY0,qX0,qX2
@// step3 = -pointStep
RSB step3,pointStep,#0
@// grp = 0 a special case since all the twiddle factors are 1
@// Loop on the sets : 2 sets at a time
radix4fsGrpZeroSetLoop\name :
@// Decrement setcount
SUBS setCount,setCount,#2
@// finish first stage of 4 point FFT
VSUB qY2,qX0,qX2
VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0]
VADD qY1,qX1,qX3
VLD2 {dXr2,dXi2},[pSrc :128],step3 @// data[2]
VSUB qY3,qX1,qX3
@// finish second stage of 4 point FFT
.ifeqs "\inverse", "TRUE"
VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
VADD qZ0,qY0,qY1
@// data[3] & update pSrc for the next set, but not if it's the
@// last iteration so that we don't read past the end of the
@// input array.
BEQ radix4SkipLastUpdateInv\name
VLD2 {dXr3,dXi3},[pSrc :128],setStep
radix4SkipLastUpdateInv\name:
VSUB dZr3,dYr2,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VADD dZi3,dYi2,dYr3
VSUB qZ1,qY0,qY1
VST2 {dZr3,dZi3},[pDst :128],outPointStep
VADD dZr2,dYr2,dYi3
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VSUB dZi2,dYi2,dYr3
VADD qY0,qX0,qX2 @// u0 for next iteration
VST2 {dZr2,dZi2},[pDst :128],setStep
.else
VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
VADD qZ0,qY0,qY1
@// data[3] & update pSrc for the next set, but not if it's the
@// last iteration so that we don't read past the end of the
@// input array.
BEQ radix4SkipLastUpdateFwd\name
VLD2 {dXr3,dXi3},[pSrc :128],setStep
radix4SkipLastUpdateFwd\name:
VADD dZr2,dYr2,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VSUB dZi2,dYi2,dYr3
VSUB qZ1,qY0,qY1
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VSUB dZr3,dYr2,dYi3
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VADD dZi3,dYi2,dYr3
VADD qY0,qX0,qX2 @// u0 for next iteration
VST2 {dZr3,dZi3},[pDst :128],setStep
.endif
BGT radix4fsGrpZeroSetLoop\name
@// reset pSrc to pDst for the next stage
SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize
MOV pDst,pPingPongBuf
.endm
M_START armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","FALSE",fwd
M_END
M_START armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",inv
M_END
.end

Просмотреть файл

@ -1,339 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This is a modification of armSP_FFT_CToC_SC32_Radix4_ls_unsafe_s.s
@// to support float instead of SC32.
@//
@//
@// Description:
@// Compute a Radix 4 FFT stage for a N point complex signal
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Import symbols required from other files
@// (For example tables)
@//IMPORT armAAC_constTable
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define subFFTNum r6
#define subFFTSize r7
@//Output Registers
@//Local Scratch Registers
#define outPointStep r3
#define grpCount r4
#define dstStep r5
#define grpTwStep r8
#define stepTwiddle r9
#define twStep r10
#define pTmp r4
#define step16 r11
#define step24 r12
@// Neon Registers
#define dButterfly1Real02 D0.F32
#define dButterfly1Imag02 D1.F32
#define dButterfly1Real13 D2.F32
#define dButterfly1Imag13 D3.F32
#define dButterfly2Real02 D4.F32
#define dButterfly2Imag02 D5.F32
#define dButterfly2Real13 D6.F32
#define dButterfly2Imag13 D7.F32
#define dXr0 D0.F32
#define dXi0 D1.F32
#define dXr1 D2.F32
#define dXi1 D3.F32
#define dXr2 D4.F32
#define dXi2 D5.F32
#define dXr3 D6.F32
#define dXi3 D7.F32
#define dYr0 D16.F32
#define dYi0 D17.F32
#define dYr1 D18.F32
#define dYi1 D19.F32
#define dYr2 D20.F32
#define dYi2 D21.F32
#define dYr3 D22.F32
#define dYi3 D23.F32
#define dW1r D8.F32
#define dW1i D9.F32
#define dW2r D10.F32
#define dW2i D11.F32
#define dW3r D12.F32
#define dW3i D13.F32
#define qT0 d14.f32
#define qT1 d16.F32
#define qT2 d18.F32
#define qT3 d20.f32
#define qT4 d22.f32
#define qT5 d24.f32
#define dZr0 D14.F32
#define dZi0 D15.F32
#define dZr1 D26.F32
#define dZi1 D27.F32
#define dZr2 D28.F32
#define dZi2 D29.F32
#define dZr3 D30.F32
#define dZi3 D31.F32
#define qX0 Q0.F32
#define qY0 Q8.F32
#define qY1 Q9.F32
#define qY2 Q10.F32
#define qY3 Q11.F32
#define qZ0 Q7.F32
#define qZ1 Q13.F32
#define qZ2 Q14.F32
#define qZ3 Q15.F32
.MACRO FFTSTAGE scaled, inverse , name
@// Define stack arguments
@// pOut0+1 increments pOut0 by 8 bytes
@// pOut0+outPointStep == increment of 8*outPointStep bytes
MOV outPointStep,subFFTSize,LSL #3
@// Update grpCount and grpSize rightaway
VLD2 {dW1r,dW1i},[pTwiddle :128] @// [wi|wr]
MOV step16,#16
LSL grpCount,subFFTSize,#2
VLD1 dW2r,[pTwiddle :64] @// [wi|wr]
MOV subFFTNum,#1 @//after the last stage
VLD1 dW3r,[pTwiddle :64],step16 @// [wi|wr]
MOV stepTwiddle,#0
VLD1 dW2i,[pTwiddle :64]! @// [wi|wr]
SUB grpTwStep,stepTwiddle,#8 @// grpTwStep = -8 to start with
@// update subFFTSize for the next stage
MOV subFFTSize,grpCount
VLD1 dW3i,[pTwiddle :64],grpTwStep @// [wi|wr]
MOV dstStep,outPointStep,LSL #1
@// AC.r AC.i BD.r BD.i
VLD4 {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]!
ADD dstStep,dstStep,outPointStep @// dstStep = 3*outPointStep
RSB dstStep,dstStep,#16 @// dstStep = - 3*outPointStep+16
MOV step24,#24
@// AC.r AC.i BD.r BD.i
VLD4 {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]!
@// Process two groups at a time
radix4lsGrpLoop\name :
VZIP dW2r,dW2i
ADD stepTwiddle,stepTwiddle,#16
VZIP dW3r,dW3i
ADD grpTwStep,stepTwiddle,#4
VUZP dButterfly1Real13, dButterfly2Real13 @// B.r D.r
SUB twStep,stepTwiddle,#16 @// -16+stepTwiddle
VUZP dButterfly1Imag13, dButterfly2Imag13 @// B.i D.i
MOV grpTwStep,grpTwStep,LSL #1
VUZP dButterfly1Real02, dButterfly2Real02 @// A.r C.r
RSB grpTwStep,grpTwStep,#0 @// -8-2*stepTwiddle
VUZP dButterfly1Imag02, dButterfly2Imag02 @// A.i C.i
@// grpCount is multiplied by 4
SUBS grpCount,grpCount,#8
.ifeqs "\inverse", "TRUE"
VMUL dZr1,dW1r,dXr1
VMLA dZr1,dW1i,dXi1 @// real part
VMUL dZi1,dW1r,dXi1
VMLS dZi1,dW1i,dXr1 @// imag part
.else
VMUL dZr1,dW1r,dXr1
VMLS dZr1,dW1i,dXi1 @// real part
VMUL dZi1,dW1r,dXi1
VMLA dZi1,dW1i,dXr1 @// imag part
.endif
VLD2 {dW1r,dW1i},[pTwiddle :128],stepTwiddle @// [wi|wr]
.ifeqs "\inverse", "TRUE"
VMUL dZr2,dW2r,dXr2
VMLA dZr2,dW2i,dXi2 @// real part
VMUL dZi2,dW2r,dXi2
VLD1 dW2r,[pTwiddle :64],step16 @// [wi|wr]
VMLS dZi2,dW2i,dXr2 @// imag part
.else
VMUL dZr2,dW2r,dXr2
VMLS dZr2,dW2i,dXi2 @// real part
VMUL dZi2,dW2r,dXi2
VLD1 dW2r,[pTwiddle :64],step16 @// [wi|wr]
VMLA dZi2,dW2i,dXr2 @// imag part
.endif
VLD1 dW2i,[pTwiddle :64],twStep @// [wi|wr]
@// move qX0 so as to load for the next iteration
VMOV qZ0,qX0
.ifeqs "\inverse", "TRUE"
VMUL dZr3,dW3r,dXr3
VMLA dZr3,dW3i,dXi3 @// real part
VMUL dZi3,dW3r,dXi3
VLD1 dW3r,[pTwiddle :64],step24
VMLS dZi3,dW3i,dXr3 @// imag part
.else
VMUL dZr3,dW3r,dXr3
VMLS dZr3,dW3i,dXi3 @// real part
VMUL dZi3,dW3r,dXi3
VLD1 dW3r,[pTwiddle :64],step24
VMLA dZi3,dW3i,dXr3 @// imag part
.endif
VLD1 dW3i,[pTwiddle :64],grpTwStep @// [wi|wr]
@// Don't do the load on the last iteration so we don't read past the end
@// of pSrc.
addeq pSrc, pSrc, #64
beq radix4lsSkipRead\name
@// AC.r AC.i BD.r BD.i
VLD4 {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]!
@// AC.r AC.i BD.r BD.i
VLD4 {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]!
radix4lsSkipRead\name:
@// finish first stage of 4 point FFT
VADD qY0,qZ0,qZ2
VSUB qY2,qZ0,qZ2
VADD qY1,qZ1,qZ3
VSUB qY3,qZ1,qZ3
@// finish second stage of 4 point FFT
.ifeqs "\inverse", "TRUE"
VSUB qZ0,qY2,qY1
VADD dZr3,dYr0,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VSUB dZi3,dYi0,dYr3
VADD qZ2,qY2,qY1
VST2 {dZr3,dZi3},[pDst :128],outPointStep
VSUB dZr1,dYr0,dYi3
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VADD dZi1,dYi0,dYr3
@// dstStep = -outPointStep + 16
VST2 {dZr1,dZi1},[pDst :128],dstStep
.else
VSUB qZ0,qY2,qY1
VSUB dZr1,dYr0,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VADD dZi1,dYi0,dYr3
VADD qZ2,qY2,qY1
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VADD dZr3,dYr0,dYi3
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VSUB dZi3,dYi0,dYr3
@// dstStep = -outPointStep + 16
VST2 {dZr3,dZi3},[pDst :128],dstStep
.endif
BGT radix4lsGrpLoop\name
@// Reset and Swap pSrc and pDst for the next stage
MOV pTmp,pDst
@// Extra increment done in final iteration of the loop
SUB pSrc,pSrc,#64
@// pDst -= 4*size; pSrc -= 8*size bytes
SUB pDst,pSrc,outPointStep,LSL #2
SUB pSrc,pTmp,outPointStep
SUB pTwiddle,pTwiddle,subFFTSize,LSL #1
@// Extra increment done in final iteration of the loop
SUB pTwiddle,pTwiddle,#16
.endm
M_START armSP_FFTFwd_CToC_FC32_Radix4_ls_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","FALSE",fwd
M_END
M_START armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",inv
M_END
.end

Просмотреть файл

@ -1,331 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@//
@// This is a modification of armSP_FFT_CToC_SC32_Radix4_unsafe_s.s
@// to support float instead of SC32.
@//
@//
@// Description:
@// Compute a Radix 4 FFT stage for a N point complex signal
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@// Import symbols required from other files
@// (For example tables)
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define subFFTNum r6
#define subFFTSize r7
@//Output Registers
@//Local Scratch Registers
#define grpCount r3
#define pointStep r4
#define outPointStep r5
#define stepTwiddle r12
#define setCount r14
#define srcStep r8
#define setStep r9
#define dstStep r10
#define twStep r11
#define t1 r3
@// Neon Registers
#define dW1 D0.F32
#define dW2 D1.F32
#define dW3 D2.F32
#define dXr0 D4.F32
#define dXi0 D5.F32
#define dXr1 D6.F32
#define dXi1 D7.F32
#define dXr2 D8.F32
#define dXi2 D9.F32
#define dXr3 D10.F32
#define dXi3 D11.F32
#define dYr0 D12.F32
#define dYi0 D13.F32
#define dYr1 D14.F32
#define dYi1 D15.F32
#define dYr2 D16.F32
#define dYi2 D17.F32
#define dYr3 D18.F32
#define dYi3 D19.F32
#define qT0 d16.f32
#define qT1 d18.f32
#define qT2 d12.f32
#define qT3 d14.f32
#define dZr0 D20.F32
#define dZi0 D21.F32
#define dZr1 D22.F32
#define dZi1 D23.F32
#define dZr2 D24.F32
#define dZi2 D25.F32
#define dZr3 D26.F32
#define dZi3 D27.F32
#define qY0 Q6.F32
#define qY1 Q7.F32
#define qY2 Q8.F32
#define qY3 Q9.F32
#define qX0 Q2.F32
#define qZ0 Q10.F32
#define qZ1 Q11.F32
#define qZ2 Q12.F32
#define qZ3 Q13.F32
.MACRO FFTSTAGE scaled, inverse , name
@// Define stack arguments
@// Update grpCount and grpSize rightaway inorder to reuse
@// pGrpCount and pGrpSize regs
LSL grpCount,subFFTSize,#2
LSR subFFTNum,subFFTNum,#2
MOV subFFTSize,grpCount
VLD1 dW1,[pTwiddle] @//[wi | wr]
@// pT0+1 increments pT0 by 8 bytes
@// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
MOV pointStep,subFFTNum,LSL #1
@// pOut0+1 increments pOut0 by 8 bytes
@// pOut0+outPointStep == increment of 8*outPointStep bytes
@// = 2*size bytes
MOV stepTwiddle,#0
VLD1 dW2,[pTwiddle] @//[wi | wr]
SMULBB outPointStep,grpCount,pointStep
LSL pointStep,pointStep,#2 @// 2*grpSize
VLD1 dW3,[pTwiddle] @//[wi | wr]
MOV srcStep,pointStep,LSL #1 @// srcStep = 2*pointStep
ADD setStep,srcStep,pointStep @// setStep = 3*pointStep
RSB setStep,setStep,#0 @// setStep = - 3*pointStep
SUB srcStep,srcStep,#16 @// srcStep = 2*pointStep-16
MOV dstStep,outPointStep,LSL #1
ADD dstStep,dstStep,outPointStep @// dstStep = 3*outPointStep
@// dstStep = - 3*outPointStep+16
RSB dstStep,dstStep,#16
radix4GrpLoop\name :
VLD2 {dXr0,dXi0},[pSrc],pointStep @// data[0]
ADD stepTwiddle,stepTwiddle,pointStep
VLD2 {dXr1,dXi1},[pSrc],pointStep @// data[1]
@// set pTwiddle to the first point
ADD pTwiddle,pTwiddle,stepTwiddle
VLD2 {dXr2,dXi2},[pSrc],pointStep @// data[2]
MOV twStep,stepTwiddle,LSL #2
@// data[3] & update pSrc for the next set
VLD2 {dXr3,dXi3},[pSrc],setStep
SUB twStep,stepTwiddle,twStep @// twStep = -3*stepTwiddle
MOV setCount,pointStep,LSR #3
@// set pSrc to data[0] of the next set
ADD pSrc,pSrc,#16
@// increment to data[1] of the next set
ADD pSrc,pSrc,pointStep
@// Loop on the sets
radix4SetLoop\name :
.ifeqs "\inverse", "TRUE"
VMUL dZr1,dXr1,dW1[0]
VMUL dZi1,dXi1,dW1[0]
VMUL dZr2,dXr2,dW2[0]
VMUL dZi2,dXi2,dW2[0]
VMUL dZr3,dXr3,dW3[0]
VMUL dZi3,dXi3,dW3[0]
VMLA dZr1,dXi1,dW1[1] @// real part
VMLS dZi1,dXr1,dW1[1] @// imag part
@// data[1] for next iteration
VLD2 {dXr1,dXi1},[pSrc],pointStep
VMLA dZr2,dXi2,dW2[1] @// real part
VMLS dZi2,dXr2,dW2[1] @// imag part
@// data[2] for next iteration
VLD2 {dXr2,dXi2},[pSrc],pointStep
VMLA dZr3,dXi3,dW3[1] @// real part
VMLS dZi3,dXr3,dW3[1] @// imag part
.else
VMUL dZr1,dXr1,dW1[0]
VMUL dZi1,dXi1,dW1[0]
VMUL dZr2,dXr2,dW2[0]
VMUL dZi2,dXi2,dW2[0]
VMUL dZr3,dXr3,dW3[0]
VMUL dZi3,dXi3,dW3[0]
VMLS dZr1,dXi1,dW1[1] @// real part
VMLA dZi1,dXr1,dW1[1] @// imag part
@// data[1] for next iteration
VLD2 {dXr1,dXi1},[pSrc],pointStep
VMLS dZr2,dXi2,dW2[1] @// real part
VMLA dZi2,dXr2,dW2[1] @// imag part
@// data[2] for next iteration
VLD2 {dXr2,dXi2},[pSrc],pointStep
VMLS dZr3,dXi3,dW3[1] @// real part
VMLA dZi3,dXr3,dW3[1] @// imag part
.endif
@// data[3] & update pSrc to data[0]
@// But don't read on the very last iteration because that reads past
@// the end of pSrc. The last iteration is grpCount = 4, setCount = 2.
cmp grpCount, #4
cmpeq setCount, #2 @// Test setCount if grpCount = 4
@// These are executed only if both grpCount = 4 and setCount = 2
addeq pSrc, pSrc, setStep
beq radix4SkipRead\name
VLD2 {dXr3,dXi3},[pSrc],setStep
radix4SkipRead\name:
SUBS setCount,setCount,#2
@// finish first stage of 4 point FFT
VADD qY0,qX0,qZ2
VSUB qY2,qX0,qZ2
@// data[0] for next iteration
VLD2 {dXr0,dXi0},[pSrc :128]!
VADD qY1,qZ1,qZ3
VSUB qY3,qZ1,qZ3
@// finish second stage of 4 point FFT
VSUB qZ0,qY2,qY1
.ifeqs "\inverse", "TRUE"
VADD dZr3,dYr0,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VSUB dZi3,dYi0,dYr3
VADD qZ2,qY2,qY1
VST2 {dZr3,dZi3},[pDst :128],outPointStep
VSUB dZr1,dYr0,dYi3
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VADD dZi1,dYi0,dYr3
VST2 {dZr1,dZi1},[pDst :128],dstStep
.else
VSUB dZr1,dYr0,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VADD dZi1,dYi0,dYr3
VADD qZ2,qY2,qY1
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VADD dZr3,dYr0,dYi3
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VSUB dZi3,dYi0,dYr3
VST2 {dZr3,dZi3},[pDst :128],dstStep
.endif
@// increment to data[1] of the next set
ADD pSrc,pSrc,pointStep
BGT radix4SetLoop\name
VLD1 dW1,[pTwiddle :64],stepTwiddle @//[wi | wr]
@// subtract 4 since grpCount multiplied by 4
SUBS grpCount,grpCount,#4
VLD1 dW2,[pTwiddle :64],stepTwiddle @//[wi | wr]
@// increment pSrc for the next grp
ADD pSrc,pSrc,srcStep
VLD1 dW3,[pTwiddle :64],twStep @//[wi | wr]
BGT radix4GrpLoop\name
@// Reset and Swap pSrc and pDst for the next stage
MOV t1,pDst
@// pDst -= 2*size; pSrc -= 8*size bytes
SUB pDst,pSrc,outPointStep,LSL #2
SUB pSrc,t1,outPointStep
.endm
M_START armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","FALSE",FWD
M_END
M_START armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",INV
M_END
.end

Просмотреть файл

@ -1,426 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This is a modification of armSP_FFT_CToC_FC32_Radix8_fs_unsafe_s.s
@// to support float instead of SC32.
@//
@//
@// Description:
@// Compute a first stage Radix 8 FFT stage for a N point complex signal
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define subFFTNum r6
#define subFFTSize r7
@// dest buffer for the next stage (not pSrc for first stage)
#define pPingPongBuf r5
@//Output Registers
@//Local Scratch Registers
#define grpSize r3
@// Reuse grpSize as setCount
#define setCount r3
#define pointStep r4
#define outPointStep r4
#define setStep r8
#define step1 r9
#define step2 r10
#define t0 r11
@// Neon Registers
#define dXr0 D0.F32
#define dXi0 D1.F32
#define dXr1 D2.F32
#define dXi1 D3.F32
#define dXr2 D4.F32
#define dXi2 D5.F32
#define dXr3 D6.F32
#define dXi3 D7.F32
#define dXr4 D8.F32
#define dXi4 D9.F32
#define dXr5 D10.F32
#define dXi5 D11.F32
#define dXr6 D12.F32
#define dXi6 D13.F32
#define dXr7 D14.F32
#define dXi7 D15.F32
#define qX0 Q0.F32
#define qX1 Q1.F32
#define qX2 Q2.F32
#define qX3 Q3.F32
#define qX4 Q4.F32
#define qX5 Q5.F32
#define qX6 Q6.F32
#define qX7 Q7.F32
#define dUr0 D16.F32
#define dUi0 D17.F32
#define dUr2 D18.F32
#define dUi2 D19.F32
#define dUr4 D20.F32
#define dUi4 D21.F32
#define dUr6 D22.F32
#define dUi6 D23.F32
#define dUr1 D24.F32
#define dUi1 D25.F32
#define dUr3 D26.F32
#define dUi3 D27.F32
#define dUr5 D28.F32
#define dUi5 D29.F32
@// reuse dXr7 and dXi7
#define dUr7 D30.F32
#define dUi7 D31.F32
#define qU0 Q8.F32
#define qU1 Q12.F32
#define qU2 Q9.F32
#define qU3 Q13.F32
#define qU4 Q10.F32
#define qU5 Q14.F32
#define qU6 Q11.F32
#define qU7 Q15.F32
#define dVr0 D24.F32
#define dVi0 D25.F32
#define dVr2 D26.F32
#define dVi2 D27.F32
#define dVr4 D28.F32
#define dVi4 D29.F32
#define dVr6 D30.F32
#define dVi6 D31.F32
#define dVr1 D16.F32
#define dVi1 D17.F32
#define dVr3 D18.F32
#define dVi3 D19.F32
#define dVr5 D20.F32
#define dVi5 D21.F32
#define dVr7 D22.F32
#define dVi7 D23.F32
#define qV0 Q12.F32
#define qV1 Q8.F32
#define qV2 Q13.F32
#define qV3 Q9.F32
#define qV4 Q14.F32
#define qV5 Q10.F32
#define qV6 Q15.F32
#define qV7 Q11.F32
#define dYr0 D16.F32
#define dYi0 D17.F32
#define dYr2 D18.F32
#define dYi2 D19.F32
#define dYr4 D20.F32
#define dYi4 D21.F32
#define dYr6 D22.F32
#define dYi6 D23.F32
#define dYr1 D24.F32
#define dYi1 D25.F32
#define dYr3 D26.F32
#define dYi3 D27.F32
#define dYr5 D28.F32
#define dYi5 D29.F32
#define dYr7 D30.F32
#define dYi7 D31.F32
#define qY0 Q8.F32
#define qY1 Q12.F32
#define qY2 Q9.F32
#define qY3 Q13.F32
#define qY4 Q10.F32
#define qY5 Q14.F32
#define qY6 Q11.F32
#define qY7 Q15.F32
#define dT0 D14.F32
#define dT1 D15.F32
@// Define constants
@ sqrt(1/2)
ONEBYSQRT2: .float 0.7071067811865476e0
.MACRO FFTSTAGE scaled, inverse, name
@// Define stack arguments
@// Update pSubFFTSize and pSubFFTNum regs
@// subFFTSize = 1 for the first stage
MOV subFFTSize,#8
LDR t0,=ONEBYSQRT2
@// Note: setCount = subFFTNum/8 (reuse the grpSize reg for setCount)
LSR grpSize,subFFTNum,#3
MOV subFFTNum,grpSize
@// pT0+1 increments pT0 by 8 bytes
@// pT0+pointStep = increment of 8*pointStep bytes = grpSize bytes
@// Note: outPointStep = pointStep for firststage
MOV pointStep,grpSize,LSL #3
@// Calculate the step of input data for the next set
@//MOV step1,pointStep,LSL #1 @// step1 = 2*pointStep
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
MOV step1,grpSize,LSL #4
MOV step2,pointStep,LSL #3
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
SUB step2,step2,pointStep @// step2 = 7*pointStep
@// setStep = - 7*pointStep+16
RSB setStep,step2,#16
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
@// data[7] & update pSrc for the next set
@// setStep = -7*pointStep + 16
VLD2 {dXr7,dXi7},[pSrc :128],setStep
@// grp = 0 a special case since all the twiddle factors are 1
@// Loop on the sets
radix8fsGrpZeroSetLoop\name :
@// Decrement setcount
SUBS setCount,setCount,#2
@// finish first stage of 8 point FFT
VADD qU0,qX0,qX4
VADD qU2,qX1,qX5
VADD qU4,qX2,qX6
VADD qU6,qX3,qX7
@// finish second stage of 8 point FFT
VADD qV0,qU0,qU4
VSUB qV2,qU0,qU4
VADD qV4,qU2,qU6
VSUB qV6,qU2,qU6
@// finish third stage of 8 point FFT
VADD qY0,qV0,qV4
VSUB qY4,qV0,qV4
VST2 {dYr0,dYi0},[pDst :128],step1 @// store y0
.ifeqs "\inverse", "TRUE"
VSUB dYr2,dVr2,dVi6
VADD dYi2,dVi2,dVr6
VADD dYr6,dVr2,dVi6
VST2 {dYr2,dYi2},[pDst :128],step1 @// store y2
VSUB dYi6,dVi2,dVr6
VSUB qU1,qX0,qX4
VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
VSUB qU3,qX1,qX5
VSUB qU5,qX2,qX6
VST2 {dYr6,dYi6},[pDst :128],step1 @// store y6
.ELSE
VADD dYr6,dVr2,dVi6
VSUB dYi6,dVi2,dVr6
VSUB dYr2,dVr2,dVi6
VST2 {dYr6,dYi6},[pDst :128],step1 @// store y2
VADD dYi2,dVi2,dVr6
VSUB qU1,qX0,qX4
VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
VSUB qU3,qX1,qX5
VSUB qU5,qX2,qX6
VST2 {dYr2,dYi2},[pDst :128],step1 @// store y6
.ENDIF
@// finish first stage of 8 point FFT
VSUB qU7,qX3,qX7
VLD1 dT0[0], [t0]
@// finish second stage of 8 point FFT
VSUB dVr1,dUr1,dUi5
@// data[0] for next iteration
VLD2 {dXr0,dXi0},[pSrc :128],pointStep
VADD dVi1,dUi1,dUr5
VADD dVr3,dUr1,dUi5
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
VSUB dVi3,dUi1,dUr5
VSUB dVr5,dUr3,dUi7
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
VADD dVi5,dUi3,dUr7
VADD dVr7,dUr3,dUi7
VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
VSUB dVi7,dUi3,dUr7
@// finish third stage of 8 point FFT
.ifeqs "\inverse", "TRUE"
@// calculate a*v5
VMUL dT1,dVr5,dT0[0] @// use dVi0 for dT1
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
VMUL dVi5,dVi5,dT0[0]
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
VSUB dVr5,dT1,dVi5 @// a * V5
VADD dVi5,dT1,dVi5
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
@// calculate b*v7
VMUL dT1,dVr7,dT0[0]
VMUL dVi7,dVi7,dT0[0]
VADD qY1,qV1,qV5
VSUB qY5,qV1,qV5
VADD dVr7,dT1,dVi7 @// b * V7
VSUB dVi7,dVi7,dT1
SUB pDst, pDst, step2 @// set pDst to y1
@// On the last iteration, this will read past the end of pSrc,
@// so skip this read.
BEQ radix8SkipLastUpdateInv\name
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
radix8SkipLastUpdateInv\name:
VSUB dYr3,dVr3,dVr7
VSUB dYi3,dVi3,dVi7
VST2 {dYr1,dYi1},[pDst :128],step1 @// store y1
VADD dYr7,dVr3,dVr7
VADD dYi7,dVi3,dVi7
VST2 {dYr3,dYi3},[pDst :128],step1 @// store y3
VST2 {dYr5,dYi5},[pDst :128],step1 @// store y5
VST2 {dYr7,dYi7},[pDst :128] @// store y7
ADD pDst, pDst, #16
.ELSE
@// calculate b*v7
VMUL dT1,dVr7,dT0[0]
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
VMUL dVi7,dVi7,dT0[0]
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
VADD dVr7,dT1,dVi7 @// b * V7
VSUB dVi7,dVi7,dT1
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
@// calculate a*v5
VMUL dT1,dVr5,dT0[0] @// use dVi0 for dT1
VMUL dVi5,dVi5,dT0[0]
VADD dYr7,dVr3,dVr7
VADD dYi7,dVi3,dVi7
SUB pDst, pDst, step2 @// set pDst to y1
VSUB dVr5,dT1,dVi5 @// a * V5
VADD dVi5,dT1,dVi5
@// On the last iteration, this will read past the end of pSrc,
@// so skip this read.
BEQ radix8SkipLastUpdateFwd\name
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
radix8SkipLastUpdateFwd\name:
VSUB qY5,qV1,qV5
VSUB dYr3,dVr3,dVr7
VST2 {dYr7,dYi7},[pDst :128],step1 @// store y1
VSUB dYi3,dVi3,dVi7
VADD qY1,qV1,qV5
VST2 {dYr5,dYi5},[pDst :128],step1 @// store y3
VST2 {dYr3,dYi3},[pDst :128],step1 @// store y5
VST2 {dYr1,dYi1},[pDst :128]! @// store y7
.ENDIF
@// update pDst for the next set
SUB pDst, pDst, step2
BGT radix8fsGrpZeroSetLoop\name
@// reset pSrc to pDst for the next stage
SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize
MOV pDst,pPingPongBuf
.endm
@// Allocate stack memory required by the function
M_START armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","FALSE",FWD
M_END
M_START armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",INV
M_END
.end

Просмотреть файл

@ -1,170 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: armSP_FFT_CToC_SC16_Radix2_fs_unsafe_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 6693
@// Last Modified Date: Tue, 10 Jul 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute a Radix 2 FFT stage for a N point complex signal
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define pPingPongBuf r5
#define subFFTNum r6
#define subFFTSize r7
@//Output Registers
@//Local Scratch Registers
#define pointStep r3
#define outPointStep r3
#define grpSize r4
#define setCount r4
#define step r8
#define dstStep r8
@// Neon Registers
#define dX0 D0.S16
#define dX1 D1.S16
#define dY0 D2.S16
#define dY1 D3.S16
#define dX0S32 D0.S32
#define dX1S32 D1.S32
#define dY0S32 D2.S32
#define dY1S32 D3.S32
.MACRO FFTSTAGE scaled, inverse, name
@// Define stack arguments
@// update subFFTSize and subFFTNum into RN6 and RN7 for the next stage
MOV subFFTSize,#2
LSR grpSize,subFFTNum,#1
MOV subFFTNum,grpSize
@// pT0+1 increments pT0 by 8 bytes
@// pT0+pointStep = increment of 4*pointStep bytes = 2*grpSize bytes
@// Note: outPointStep = pointStep for firststage
@// Note: setCount = grpSize/2 (reuse the updated grpSize for setCount)
MOV pointStep,grpSize,LSL #2
RSB step,pointStep,#4
@// Loop on the sets for grp zero: 1 set at a time
grpZeroSetLoop\name:
VLD1 {dX0S32[0]},[pSrc],pointStep
VLD1 {dX1S32[0]},[pSrc],step @// step = -pointStep + 4
SUBS setCount,setCount,#1 @// decrement the loop counter
.ifeqs "\scaled", "TRUE"
VHADD dY0,dX0,dX1
VHSUB dY1,dX0,dX1
.ELSE
VADD dY0,dX0,dX1
VSUB dY1,dX0,dX1
.ENDIF
VST1 {dY0S32[0]},[pDst],outPointStep
VST1 {dY1S32[0]},[pDst],dstStep @// dstStep = step = -pointStep + 4
BGT grpZeroSetLoop\name
@// reset pSrc to pDst for the next stage
SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize
MOV pDst,pPingPongBuf
.endm
M_START armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","FALSE",FWD
M_END
M_START armSP_FFTInv_CToC_SC16_Radix2_fs_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",INV
M_END
M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","FALSE",FWDSFS
M_END
M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","TRUE",INVSFS
M_END
.END

Просмотреть файл

@ -1,210 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: armSP_FFT_CToC_SC16_Radix2_ls_unsafe_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 6741
@// Last Modified Date: Wed, 18 Jul 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute a Radix 2 FFT stage for a N point complex signal
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define subFFTNum r6
#define subFFTSize r7
@//Output Registers
@//Local Scratch Registers
#define outPointStep r3
#define grpCount r4
#define dstStep r5
#define pTmp r4
#define step r8
@// Neon Registers
#define dWr D0.S16
#define dWi D1.S16
#define dXr0 D2.S16
#define dXi0 D3.S16
#define dXr1 D4.S16
#define dXi1 D5.S16
#define dYr0 D6.S16
#define dYi0 D7.S16
#define dYr1 D8.S16
#define dYi1 D9.S16
#define qT0 Q5.S32
#define qT1 Q6.S32
.MACRO FFTSTAGE scaled, inverse, name
MOV outPointStep,subFFTSize,LSL #2
@// Update grpCount and grpSize rightaway
MOV subFFTNum,#1 @//after the last stage
LSL grpCount,subFFTSize,#1
@// update subFFTSize for the next stage
MOV subFFTSize,grpCount
SUB step,outPointStep,#4 @// step = -4+outPointStep
RSB dstStep,step,#0 @// dstStep = -4-outPointStep+8 = -step
@//RSB dstStep,outPointStep,#16
@// Loop on 2 grps at a time for the last stage
grpLoop\name:
VLD2 {dWr[0],dWi[0]},[pTwiddle]! @// grp 0
VLD2 {dWr[1],dWi[1]},[pTwiddle]! @// grp 1
@//VLD2 {dWr,dWi},[pTwiddle],#16
VLD4 {dXr0[0],dXi0[0],dXr1[0],dXi1[0]},[pSrc]! @// grp 0
VLD4 {dXr0[1],dXi0[1],dXr1[1],dXi1[1]},[pSrc]! @// grp 1
@//VLD4 {dXr0,dXi0,dXr1,dXi1},[pSrc],#32
SUBS grpCount,grpCount,#4 @// grpCount is multiplied by 2
.ifeqs "\inverse", "TRUE"
VMULL qT0,dXr1,dWr
VMLAL qT0,dXi1,dWi @// real part
VMULL qT1,dXi1,dWr
VMLSL qT1,dXr1,dWi @// imag part
.ELSE
VMULL qT0,dXr1,dWr
VMLSL qT0,dXi1,dWi @// real part
VMULL qT1,dXi1,dWr
VMLAL qT1,dXr1,dWi @// imag part
.ENDIF
VRSHRN dXr1,qT0,#15
VRSHRN dXi1,qT1,#15
.ifeqs "\scaled", "TRUE"
VHSUB dYr0,dXr0,dXr1
VHSUB dYi0,dXi0,dXi1
VHADD dYr1,dXr0,dXr1
VHADD dYi1,dXi0,dXi1
.ELSE
VSUB dYr0,dXr0,dXr1
VSUB dYi0,dXi0,dXi1
VADD dYr1,dXr0,dXr1
VADD dYi1,dXi0,dXi1
.ENDIF
VST2 {dYr0[0],dYi0[0]},[pDst]!
VST2 {dYr0[1],dYi0[1]},[pDst],step @// step = -4+outPointStep
VST2 {dYr1[0],dYi1[0]},[pDst]!
VST2 {dYr1[1],dYi1[1]},[pDst],dstStep @// dstStep = -4-outPointStep+8 = -step
@//VST2 {dYr0,dYi0},[pDst],outPointStep
@//VST2 {dYr1,dYi1},[pDst],dstStep @// dstStep = step = -outPointStep + 16
BGT grpLoop\name
@// Reset and Swap pSrc and pDst for the next stage
MOV pTmp,pDst
SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 2*size; pSrc -= 4*size bytes
SUB pSrc,pTmp,outPointStep
@// Reset pTwiddle for the next stage
SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 2*size bytes
.endm
M_START armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","FALSE",FWD
M_END
M_START armSP_FFTInv_CToC_SC16_Radix2_ls_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",INV
M_END
M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","FALSE",FWDSFS
M_END
M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","TRUE",INVSFS
M_END
.END

Просмотреть файл

@ -1,216 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: armSP_FFT_CToC_SC16_Radix2_ps_unsafe_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 6740
@// Last Modified Date: Wed, 18 Jul 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute a Radix 2 FFT stage for a N point complex signal
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define subFFTNum r6
#define subFFTSize r7
@//Output Registers
@//Local Scratch Registers
#define outPointStep r3
#define grpCount r4
#define dstStep r5
#define twStep r8
#define pTmp r4
@// Neon Registers
#define dW1S32 D0.S32
#define dW2S32 D1.S32
#define dW1 D0.S16
#define dW2 D1.S16
#define dX0 D2.S16
#define dX1 D3.S16
#define dX2 D4.S16
#define dX3 D5.S16
#define dY0 D6.S16
#define dY1 D7.S16
#define dY2 D8.S16
#define dY3 D9.S16
#define qT0 Q5.S32
#define qT1 Q6.S32
.MACRO FFTSTAGE scaled, inverse, name
@// Define stack arguments
@// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs
LSL grpCount,subFFTSize,#1
@// update subFFTSize for the next stage
MOV subFFTSize,grpCount
@// pOut0+1 increments pOut0 by 8 bytes
@// pOut0+outPointStep == increment of 4*outPointStep bytes = 2*size bytes
SMULBB outPointStep,grpCount,subFFTNum
MOV twStep,subFFTNum,LSL #1
LSR subFFTNum,subFFTNum,#1 @//grpSize
RSB dstStep,outPointStep,#8
@// Note: pointStep is 8 in this case: so need of extra reg
@// Loop on the groups: 2 groups at a time
grpLoop\name:
VLD1 dW1S32[],[pTwiddle],twStep @//[wi | wr]
VLD1 dW2S32[],[pTwiddle],twStep
@// Process the sets for each grp: 2 sets at a time (no set looping required)
VLD1 dX0,[pSrc]! @// point0: of set0,set1 of grp0
VLD1 dX1,[pSrc]! @// point1: of set0,set1 of grp0
VLD1 dX2,[pSrc]! @// point0: of set0,set1 of grp1
VLD1 dX3,[pSrc]! @// point1: of set0,set1 of grp1
SUBS grpCount,grpCount,#4 @// decrement the loop counter
VUZP dW1,dW2
VUZP dX1,dX3
.ifeqs "\inverse", "TRUE"
VMULL qT0,dX1,dW1
VMLAL qT0,dX3,dW2 @// real part
VMULL qT1,dX3,dW1
VMLSL qT1,dX1,dW2 @// imag part
.ELSE
VMULL qT0,dX1,dW1
VMLSL qT0,dX3,dW2 @// real part
VMULL qT1,dX3,dW1
VMLAL qT1,dX1,dW2 @// imag part
.ENDIF
VRSHRN dX1,qT0,#15
VRSHRN dX3,qT1,#15
VZIP dX1,dX3
.ifeqs "\scaled", "TRUE"
VHSUB dY0,dX0,dX1
VHADD dY1,dX0,dX1
VHSUB dY2,dX2,dX3
VHADD dY3,dX2,dX3
.ELSE
VSUB dY0,dX0,dX1
VADD dY1,dX0,dX1
VSUB dY2,dX2,dX3
VADD dY3,dX2,dX3
.ENDIF
VST1 dY0,[pDst],outPointStep @// point0: of set0,set1 of grp0
VST1 dY1,[pDst],dstStep @// dstStep = -outPointStep + 8
VST1 dY2,[pDst],outPointStep @// point0: of set0,set1 of grp1
VST1 dY3,[pDst],dstStep @// point1: of set0,set1 of grp1
BGT grpLoop\name
@// Reset and Swap pSrc and pDst for the next stage
MOV pTmp,pDst
SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 2*size; pSrc -= 4*size bytes
SUB pSrc,pTmp,outPointStep
@// Reset pTwiddle for the next stage
SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 2*size bytes
.endm
M_START armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","FALSE",FWD
M_END
M_START armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",INV
M_END
M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","FALSE",FWDSFS
M_END
M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","TRUE",INVSFS
M_END
.END

Просмотреть файл

@ -1,219 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: armSP_FFT_CToC_SC16_Radix2_unsafe_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 5892
@// Last Modified Date: Thu, 07 Jun 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute a Radix 2 FFT stage for a N point complex signal
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define subFFTNum r6
#define subFFTSize r7
@//Output Registers
@//Local Scratch Registers
#define outPointStep r3
#define pointStep r4
#define grpCount r5
#define setCount r8
#define step r10
#define dstStep r11
#define pTmp r9
@// Neon Registers
#define dW D0.S16
#define dX0 D2.S16
#define dX1 D3.S16
#define dX2 D4.S16
#define dX3 D5.S16
#define dY0 D6.S16
#define dY1 D7.S16
#define dY2 D8.S16
#define dY3 D9.S16
#define qT0 Q3.S32
#define qT1 Q4.S32
.MACRO FFTSTAGE scaled, inverse, name
@// Define stack arguments
@// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs
LSR subFFTNum,subFFTNum,#1 @//grpSize
LSL grpCount,subFFTSize,#1
@// pT0+1 increments pT0 by 8 bytes
@// pT0+pointStep = increment of 4*pointStep bytes = 2*grpSize bytes
MOV pointStep,subFFTNum,LSL #1
@// update subFFTSize for the next stage
MOV subFFTSize,grpCount
@// pOut0+1 increments pOut0 by 8 bytes
@// pOut0+outPointStep == increment of 4*outPointStep bytes = 2*size bytes
SMULBB outPointStep,grpCount,pointStep
LSL pointStep,pointStep,#1
RSB step,pointStep,#16
RSB dstStep,outPointStep,#16
@// Loop on the groups
grpLoop\name:
VLD1 dW,[pTwiddle],pointStep @//[wi | wr]
MOV setCount,pointStep,LSR #2
@// Loop on the sets: 4 at a time
setLoop\name:
VLD2 {dX0,dX1},[pSrc],pointStep @// point0: dX0-real part dX1-img part
VLD2 {dX2,dX3},[pSrc],step @// point1: dX2-real part dX3-img part
SUBS setCount,setCount,#4
.ifeqs "\inverse", "TRUE"
VMULL qT0,dX2,dW[0]
VMLAL qT0,dX3,dW[1] @// real part
VMULL qT1,dX3,dW[0]
VMLSL qT1,dX2,dW[1] @// imag part
.ELSE
VMULL qT0,dX2,dW[0]
VMLSL qT0,dX3,dW[1] @// real part
VMULL qT1,dX3,dW[0]
VMLAL qT1,dX2,dW[1] @// imag part
.ENDIF
VRSHRN dX2,qT0,#15
VRSHRN dX3,qT1,#15
.ifeqs "\scaled", "TRUE"
VHSUB dY0,dX0,dX2
VHSUB dY1,dX1,dX3
VHADD dY2,dX0,dX2
VHADD dY3,dX1,dX3
.ELSE
VSUB dY0,dX0,dX2
VSUB dY1,dX1,dX3
VADD dY2,dX0,dX2
VADD dY3,dX1,dX3
.ENDIF
VST2 {dY0,dY1},[pDst],outPointStep
VST2 {dY2,dY3},[pDst],dstStep @// dstStep = -outPointStep + 16
BGT setLoop\name
SUBS grpCount,grpCount,#2
ADD pSrc,pSrc,pointStep
BGT grpLoop\name
@// Reset and Swap pSrc and pDst for the next stage
MOV pTmp,pDst
SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 2*size; pSrc -= 4*size bytes
SUB pSrc,pTmp,outPointStep
@// Reset pTwiddle for the next stage
SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 2*size bytes
.endm
M_START armSP_FFTFwd_CToC_SC16_Radix2_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","FALSE",FWD
M_END
M_START armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",INV
M_END
M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","FALSE",FWDSFS
M_END
M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","TRUE",INVSFS
M_END
.END

Просмотреть файл

@ -1,314 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 7761
@// Last Modified Date: Wed, 26 Sep 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute a first stage Radix 4 FFT stage for a N point complex signal
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define pPingPongBuf r5
#define subFFTNum r6
#define subFFTSize r7
@//Output Registers
@//Local Scratch Registers
#define grpSize r3
@// Reuse grpSize as setCount
#define setCount r3
#define pointStep r4
#define outPointStep r4
#define setStep r8
#define step1 r9
#define step3 r10
@// Neon Registers
#define dXr0 D0.S16
#define dXi0 D1.S16
#define dXr1 D2.S16
#define dXi1 D3.S16
#define dXr2 D4.S16
#define dXi2 D5.S16
#define dXr3 D6.S16
#define dXi3 D7.S16
#define dYr0 D8.S16
#define dYi0 D9.S16
#define dYr1 D10.S16
#define dYi1 D11.S16
#define dYr2 D12.S16
#define dYi2 D13.S16
#define dYr3 D14.S16
#define dYi3 D15.S16
#define dZr0 D16.S16
#define dZi0 D17.S16
#define dZr1 D18.S16
#define dZi1 D19.S16
#define dZr2 D20.S16
#define dZi2 D21.S16
#define dZr3 D22.S16
#define dZi3 D23.S16
#define qY0 Q4.S16
#define qY2 Q6.S16
#define qX0 Q0.S16
#define qX2 Q2.S16
#define qY1 Q5.S16
#define qY3 Q7.S16
#define qX1 Q1.S16
#define qX3 Q3.S16
#define qZ0 Q8.S16
#define qZ1 Q9.S16
.MACRO FFTSTAGE scaled, inverse, name
@// Define stack arguments
MOV pointStep,subFFTNum
@// Update pSubFFTSize and pSubFFTNum regs
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
@// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount)
LSR grpSize,subFFTNum,#2
MOV subFFTNum,grpSize
@// pT0+1 increments pT0 by 4 bytes
@// pT0+pointStep = increment of 4*pointStep bytes = grpSize bytes
@// Note: outPointStep = pointStep for firststage
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
@// Calculate the step of input data for the next set
@//MOV setStep,pointStep,LSL #1
MOV setStep,grpSize,LSL #3
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
MOV step1,setStep
ADD setStep,setStep,pointStep @// setStep = 3*pointStep
RSB setStep,setStep,#16 @// setStep = - 3*pointStep+16
VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3]
MOV subFFTSize,#4 @// subFFTSize = 1 for the first stage
.ifeqs "\scaled", "TRUE"
VHADD qY0,qX0,qX2 @// u0
.ELSE
VADD qY0,qX0,qX2 @// u0
.ENDIF
RSB step3,pointStep,#0
@// grp = 0 a special case since all the twiddle factors are 1
@// Loop on the sets: 4 sets at a time
grpZeroSetLoop\name:
.ifeqs "\scaled", "TRUE"
@// finish first stage of 4 point FFT
VHSUB qY2,qX0,qX2 @// u1
SUBS setCount,setCount,#4 @// decrement the set loop counter
VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0]
VHADD qY1,qX1,qX3 @// u2
VLD2 {dXr2,dXi2},[pSrc :128],step3
VHSUB qY3,qX1,qX3 @// u3
@// finish second stage of 4 point FFT
VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
VHADD qZ0,qY0,qY1 @// y0
VLD2 {dXr3,dXi3},[pSrc :128],setStep
.ifeqs "\inverse", "TRUE"
VHSUB dZr3,dYr2,dYi3 @// y3
VHADD dZi3,dYi2,dYr3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VHSUB qZ1,qY0,qY1 @// y2
VST2 {dZr3,dZi3},[pDst :128],outPointStep
VHADD dZr2,dYr2,dYi3 @// y1
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VHSUB dZi2,dYi2,dYr3
VHADD qY0,qX0,qX2 @// u0 (next loop)
VST2 {dZr2,dZi2},[pDst :128],setStep
.ELSE
VHADD dZr2,dYr2,dYi3 @// y1
VHSUB dZi2,dYi2,dYr3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VHSUB qZ1,qY0,qY1 @// y2
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VHSUB dZr3,dYr2,dYi3 @// y3
VHADD dZi3,dYi2,dYr3
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VHADD qY0,qX0,qX2 @// u0 (next loop)
VST2 {dZr3,dZi3},[pDst :128],setStep
.ENDIF
.ELSE
@// finish first stage of 4 point FFT
VSUB qY2,qX0,qX2 @// u1
SUBS setCount,setCount,#4 @// decrement the set loop counter
VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0]
VADD qY1,qX1,qX3 @// u2
VLD2 {dXr2,dXi2},[pSrc :128],step3
VSUB qY3,qX1,qX3 @// u3
@// finish second stage of 4 point FFT
VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
VADD qZ0,qY0,qY1 @// y0
VLD2 {dXr3,dXi3},[pSrc :128],setStep
.ifeqs "\inverse", "TRUE"
VSUB dZr3,dYr2,dYi3 @// y3
VADD dZi3,dYi2,dYr3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VSUB qZ1,qY0,qY1 @// y2
VST2 {dZr3,dZi3},[pDst :128],outPointStep
VADD dZr2,dYr2,dYi3 @// y1
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VSUB dZi2,dYi2,dYr3
VADD qY0,qX0,qX2 @// u0 (next loop)
VST2 {dZr2,dZi2},[pDst :128],setStep
.ELSE
VADD dZr2,dYr2,dYi3 @// y1
VSUB dZi2,dYi2,dYr3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VSUB qZ1,qY0,qY1 @// y2
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VSUB dZr3,dYr2,dYi3 @// y3
VADD dZi3,dYi2,dYr3
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VADD qY0,qX0,qX2 @// u0 (next loop)
VST2 {dZr3,dZi3},[pDst :128],setStep
.ENDIF
.ENDIF
BGT grpZeroSetLoop\name
@// reset pSrc to pDst for the next stage
SUB pSrc,pDst,pointStep @// pDst -= grpSize
MOV pDst,pPingPongBuf
.endm
M_START armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","FALSE",FWD
M_END
M_START armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",INV
M_END
M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","FALSE",FWDSFS
M_END
M_START armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","TRUE",INVSFS
M_END
.END

Просмотреть файл

@ -1,410 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: armSP_FFT_CToC_SC16_Radix4_ls_unsafe_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 7765
@// Last Modified Date: Thu, 27 Sep 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute a Radix 4 FFT stage for a N point complex signal
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@// Import symbols required from other files
@// (For example tables)
@//IMPORT armAAC_constTable
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define subFFTNum r6
#define subFFTSize r7
@//Output Registers
@//Local Scratch Registers
#define outPointStep r3
#define grpCount r4
#define dstStep r5
#define pw1 r8
#define pw2 r9
#define pw3 r10
#define pTmp r4
@// Neon Registers
#define dButterfly1Real02 D0.S16
#define dButterfly1Imag02 D1.S16
#define dButterfly1Real13 D2.S16
#define dButterfly1Imag13 D3.S16
#define dButterfly2Real02 D4.S16
#define dButterfly2Imag02 D5.S16
#define dButterfly2Real13 D6.S16
#define dButterfly2Imag13 D7.S16
#define dXr0 D0.S16
#define dXi0 D1.S16
#define dXr1 D2.S16
#define dXi1 D3.S16
#define dXr2 D4.S16
#define dXi2 D5.S16
#define dXr3 D6.S16
#define dXi3 D7.S16
#define dW1rS32 D8.S32
#define dW1iS32 D9.S32
#define dW2rS32 D10.S32
#define dW2iS32 D11.S32
#define dW3rS32 D12.S32
#define dW3iS32 D13.S32
#define dW1r D8.S16
#define dW1i D9.S16
#define dW2r D10.S16
#define dW2i D11.S16
#define dW3r D12.S16
#define dW3i D13.S16
#define dTmp0 D12.S16
#define dTmp1 D13.S16
#define dTmp1S32 D13.S32
#define dTmp2S32 D14.S32
#define dTmp3S32 D15.S32
#define dYr0 D18.S16
#define dYi0 D19.S16
#define dYr1 D16.S16
#define dYi1 D17.S16
#define dYr2 D20.S16
#define dYi2 D21.S16
#define dYr3 D14.S16
#define dYi3 D15.S16
#define qY0 Q9.S16
#define qY1 Q8.S16
#define qY2 Q10.S16
#define qY3 Q7.S16
#define qX0 Q0.S16
#define qX1 Q1.S16
#define qX2 Q2.S16
#define qX3 Q3.S16
#define qT0 Q9.S32
#define qT1 Q10.S32
#define qT2 Q7.S32
#define qT3 Q8.S32
#define dZr0 D22.S16
#define dZi0 D23.S16
#define dZr1 D24.S16
#define dZi1 D25.S16
#define dZr2 D26.S16
#define dZi2 D27.S16
#define dZr3 D28.S16
#define dZi3 D29.S16
#define qZ0 Q11.S16
#define qZ1 Q12.S16
#define qZ2 Q13.S16
#define qZ3 Q14.S16
.MACRO FFTSTAGE scaled, inverse , name
@// Define stack arguments
MOV pw2,pTwiddle
VLD4 {dW2r,dW2i,dTmp0,dTmp1},[pw2 :256]!
MOV pw3,pTwiddle
MOV pw1,pTwiddle
@// pOut0+1 increments pOut0 by 8 bytes
@// pOut0+outPointStep == increment of 4*outPointStep bytes
MOV outPointStep,subFFTSize,LSL #2
VLD3 {dW3rS32,dTmp1S32,dTmp2S32},[pw3 :64]!
MOV subFFTNum,#1 @//after the last stage
LSL grpCount,subFFTSize,#2
@// Update grpCount and grpSize rightaway
VLD3 {dW3iS32,dTmp2S32,dTmp3S32},[pw3 :64]!
@// update subFFTSize for the next stage
MOV subFFTSize,grpCount
MOV dstStep,outPointStep,LSL #1
VLD2 {dW1r,dW1i}, [pw1 :128]!
ADD dstStep,dstStep,outPointStep @// dstStep = 3*outPointStep
RSB dstStep,dstStep,#16 @// dstStep = - 3*outPointStep+16
VLD4 {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
VLD4 {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
@// Process 4 groups at a time
grpLoop\name:
@// Rearrange the third twiddle
VUZP dW3r,dW3i
SUBS grpCount,grpCount,#16 @// grpCount is multiplied by 4
VUZP dButterfly1Real13, dButterfly2Real13 @// B.r D.r
VUZP dButterfly1Imag13, dButterfly2Imag13 @// B.i D.i
VUZP dButterfly1Real02, dButterfly2Real02 @// A.r C.r
VUZP dButterfly1Imag02, dButterfly2Imag02 @// A.i C.i
.ifeqs "\inverse", "TRUE"
VMULL qT0,dXr1,dW1r
VMLAL qT0,dXi1,dW1i @// real part
VMULL qT1,dXi1,dW1r
VMLSL qT1,dXr1,dW1i @// imag part
.ELSE
VMULL qT0,dXr1,dW1r
VMLSL qT0,dXi1,dW1i @// real part
VMULL qT1,dXi1,dW1r
VMLAL qT1,dXr1,dW1i @// imag part
.ENDIF
@// Load the first twiddle for 4 groups : w^1
@// w^1 twiddle (i+0,i+1,i+2,i+3) for group 0,1,2,3
VLD2 {dW1r,dW1i}, [pw1 :128]!
.ifeqs "\inverse", "TRUE"
VMULL qT2,dXr2,dW2r
VMLAL qT2,dXi2,dW2i @// real part
VMULL qT3,dXi2,dW2r
VMLSL qT3,dXr2,dW2i @// imag part
.ELSE
VMULL qT2,dXr2,dW2r
VMLSL qT2,dXi2,dW2i @// real part
VMULL qT3,dXi2,dW2r
VMLAL qT3,dXr2,dW2i @// imag part
.ENDIF
VRSHRN dZr1,qT0,#15
VRSHRN dZi1,qT1,#15
.ifeqs "\inverse", "TRUE"
VMULL qT0,dXr3,dW3r
VMLAL qT0,dXi3,dW3i @// real part
VMULL qT1,dXi3,dW3r
VMLSL qT1,dXr3,dW3i @// imag part
.ELSE
VMULL qT0,dXr3,dW3r
VMLSL qT0,dXi3,dW3i @// real part
VMULL qT1,dXi3,dW3r
VMLAL qT1,dXr3,dW3i @// imag part
.ENDIF
@// Load the second twiddle for 4 groups : w^2
@// w^2 twiddle (2i+0,2i+2,2i+4,2i+6) for group 0,1,2,3
VLD4 {dW2r,dW2i,dTmp0,dTmp1},[pw2 :256]!
VRSHRN dZr2,qT2,#15
VRSHRN dZi2,qT3,#15
@// Load the third twiddle for 4 groups : w^3
@// w^3 twiddle (3i+0,3i+3,3i+6,3i+9) for group 0,1,2,3
VLD3 {dW3rS32,dTmp1S32,dTmp2S32},[pw3 :64]!
VRSHRN dZr3,qT0,#15
VRSHRN dZi3,qT1,#15
VLD3 {dW3iS32,dTmp2S32,dTmp3S32},[pw3 :64]!
.ifeqs "\scaled", "TRUE"
@// finish first stage of 4 point FFT
VHADD qY0,qX0,qZ2
VHSUB qY2,qX0,qZ2
VHADD qY1,qZ1,qZ3
VLD4 {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
VHSUB qY3,qZ1,qZ3
@// finish second stage of 4 point FFT
VHSUB qZ0,qY2,qY1
VHADD qZ2,qY2,qY1
VLD4 {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
.ifeqs "\inverse", "TRUE"
VHADD dZr3,dYr0,dYi3 @// y3 = u0-ju3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VHSUB dZi3,dYi0,dYr3
VHSUB dZr1,dYr0,dYi3 @// y1 = u0+ju3
VHADD dZi1,dYi0,dYr3
VST2 {dZr3,dZi3},[pDst :128],outPointStep
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VST2 {dZr1,dZi1},[pDst :128],dstStep @// dstStep = -3*outPointStep + 16
.ELSE
VHSUB dZr1,dYr0,dYi3 @// y1 = u0+ju3
VHADD dZi1,dYi0,dYr3
VHADD dZr3,dYr0,dYi3 @// y3 = u0-ju3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VHSUB dZi3,dYi0,dYr3
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VST2 {dZr3,dZi3},[pDst :128],dstStep @// dstStep = -3*outPointStep + 16
.ENDIF
.ELSE
@// finish first stage of 4 point FFT
VADD qY0,qX0,qZ2
VSUB qY2,qX0,qZ2
VADD qY1,qZ1,qZ3
VLD4 {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
VSUB qY3,qZ1,qZ3
@// finish second stage of 4 point FFT
VSUB qZ0,qY2,qY1
VADD qZ2,qY2,qY1
VLD4 {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
.ifeqs "\inverse", "TRUE"
VADD dZr3,dYr0,dYi3 @// y3 = u0-ju3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VSUB dZi3,dYi0,dYr3
VSUB dZr1,dYr0,dYi3 @// y1 = u0+ju3
VADD dZi1,dYi0,dYr3
VST2 {dZr3,dZi3},[pDst :128],outPointStep
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VST2 {dZr1,dZi1},[pDst :128],dstStep @// dstStep = -3*outPointStep + 16
.ELSE
VSUB dZr1,dYr0,dYi3 @// y1 = u0+ju3
VADD dZi1,dYi0,dYr3
VADD dZr3,dYr0,dYi3 @// y3 = u0-ju3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VSUB dZi3,dYi0,dYr3
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VST2 {dZr3,dZi3},[pDst :128],dstStep @// dstStep = -3*outPointStep + 16
.ENDIF
.ENDIF
BGT grpLoop\name
@// Reset and Swap pSrc and pDst for the next stage
MOV pTmp,pDst
SUB pSrc,pSrc,#64 @// Extra increment currently done in the loop
SUB pDst,pSrc,outPointStep,LSL #2 @// pDst -= size; pSrc -= 4*size bytes
SUB pSrc,pTmp,outPointStep
.endm
M_START armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","FALSE",FWD
M_END
M_START armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",INV
M_END
M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","FALSE",FWDSFS
M_END
M_START armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","TRUE",INVSFS
M_END
.END

Просмотреть файл

@ -1,400 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: armSP_FFT_CToC_SC16_Radix4_unsafe_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 7761
@// Last Modified Date: Wed, 26 Sep 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute a Radix 4 FFT stage for a N point complex signal
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@// Import symbols required from other files
@// (For example tables)
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define subFFTNum r6
#define subFFTSize r7
@//Output Registers
@//Local Scratch Registers
#define grpCount r3
#define pointStep r4
#define outPointStep r5
#define stepTwiddle r12
#define setCount r14
#define srcStep r8
#define setStep r9
#define dstStep r10
#define twStep r11
#define t1 r3
@// Neon Registers
#define dW1 D0.S16
#define dW2 D1.S16
#define dW3 D2.S16
#define dXr0 D4.S16
#define dXi0 D5.S16
#define dXr1 D6.S16
#define dXi1 D7.S16
#define dXr2 D8.S16
#define dXi2 D9.S16
#define dXr3 D10.S16
#define dXi3 D11.S16
#define dYr0 D12.S16
#define dYi0 D13.S16
#define dYr1 D14.S16
#define dYi1 D15.S16
#define dYr2 D16.S16
#define dYi2 D17.S16
#define dYr3 D18.S16
#define dYi3 D19.S16
#define qT0 Q8.S32
#define qT1 Q9.S32
#define qT2 Q6.S32
#define qT3 Q7.S32
#define dZr0 D20.S16
#define dZi0 D21.S16
#define dZr1 D22.S16
#define dZi1 D23.S16
#define dZr2 D24.S16
#define dZi2 D25.S16
#define dZr3 D26.S16
#define dZi3 D27.S16
#define qY0 Q6.S16
#define qY1 Q7.S16
#define qY2 Q8.S16
#define qY3 Q9.S16
#define qX0 Q2.S16
#define qZ0 Q10.S16
#define qZ1 Q11.S16
#define qZ2 Q12.S16
#define qZ3 Q13.S16
.MACRO FFTSTAGE scaled, inverse , name
@// Define stack arguments
@// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs
LSL grpCount,subFFTSize,#2
LSR subFFTNum,subFFTNum,#2
MOV subFFTSize,grpCount
@// pOut0+1 increments pOut0 by 4 bytes
@// pOut0+outPointStep == increment of 4*outPointStep bytes = size bytes
MOV stepTwiddle,#0
SMULBB outPointStep,grpCount,subFFTNum
@// pT0+1 increments pT0 by 4 bytes
@// pT0+pointStep = increment of 4*pointStep bytes = grpSize bytes
LSL pointStep,subFFTNum,#2 @// 2*grpSize
VLD1 dW1,[pTwiddle :64] @//[wi | wr]
MOV srcStep,pointStep,LSL #1 @// srcStep = 2*pointStep
VLD1 dW2,[pTwiddle :64] @//[wi | wr]
ADD setStep,srcStep,pointStep @// setStep = 3*pointStep
SUB srcStep,srcStep,#16 @// srcStep = 2*pointStep-16
VLD1 dW3,[pTwiddle :64]
@//RSB setStep,setStep,#16 @// setStep = - 3*pointStep+16
RSB setStep,setStep,#0 @// setStep = - 3*pointStep
MOV dstStep,outPointStep,LSL #1
ADD dstStep,dstStep,outPointStep @// dstStep = 3*outPointStep
RSB dstStep,dstStep,#16 @// dstStep = - 3*outPointStep+16
grpLoop\name:
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
ADD stepTwiddle,stepTwiddle,pointStep
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
ADD pTwiddle,pTwiddle,stepTwiddle @// set pTwiddle to the first point
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
MOV twStep,stepTwiddle,LSL #2
VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & reset pSrc
SUB twStep,stepTwiddle,twStep @// twStep = -3*stepTwiddle
MOV setCount,pointStep,LSR #2
ADD pSrc,pSrc,#16 @// set pSrc to data[0] of the next set
ADD pSrc,pSrc,pointStep @// increment to data[1] of the next set
@// Loop on the sets : 4 at a time
setLoop\name:
SUBS setCount,setCount,#4 @// decrement the loop counter
.ifeqs "\inverse", "TRUE"
VMULL qT0,dXr1,dW1[0]
VMLAL qT0,dXi1,dW1[1] @// real part
VMULL qT1,dXi1,dW1[0]
VMLSL qT1,dXr1,dW1[1] @// imag part
.ELSE
VMULL qT0,dXr1,dW1[0]
VMLSL qT0,dXi1,dW1[1] @// real part
VMULL qT1,dXi1,dW1[0]
VMLAL qT1,dXr1,dW1[1] @// imag part
.ENDIF
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
.ifeqs "\inverse", "TRUE"
VMULL qT2,dXr2,dW2[0]
VMLAL qT2,dXi2,dW2[1] @// real part
VMULL qT3,dXi2,dW2[0]
VMLSL qT3,dXr2,dW2[1] @// imag part
.ELSE
VMULL qT2,dXr2,dW2[0]
VMLSL qT2,dXi2,dW2[1] @// real part
VMULL qT3,dXi2,dW2[0]
VMLAL qT3,dXr2,dW2[1] @// imag part
.ENDIF
VRSHRN dZr1,qT0,#15
VRSHRN dZi1,qT1,#15
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
.ifeqs "\inverse", "TRUE"
VMULL qT0,dXr3,dW3[0]
VMLAL qT0,dXi3,dW3[1] @// real part
VMULL qT1,dXi3,dW3[0]
VMLSL qT1,dXr3,dW3[1] @// imag part
.ELSE
VMULL qT0,dXr3,dW3[0]
VMLSL qT0,dXi3,dW3[1] @// real part
VMULL qT1,dXi3,dW3[0]
VMLAL qT1,dXr3,dW3[1] @// imag part
.ENDIF
VRSHRN dZr2,qT2,#15
VRSHRN dZi2,qT3,#15
VRSHRN dZr3,qT0,#15
VRSHRN dZi3,qT1,#15
VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
.ifeqs "\scaled", "TRUE"
@// finish first stage of 4 point FFT
VHADD qY0,qX0,qZ2
VHSUB qY2,qX0,qZ2
VLD2 {dXr0,dXi0},[pSrc :128]! @// data[0]
VHADD qY1,qZ1,qZ3
VHSUB qY3,qZ1,qZ3
@// finish second stage of 4 point FFT
.ifeqs "\inverse", "TRUE"
VHSUB qZ0,qY2,qY1
VHADD dZr2,dYr0,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VHSUB dZi2,dYi0,dYr3
VHADD qZ1,qY2,qY1
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VHSUB dZr3,dYr0,dYi3
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VHADD dZi3,dYi0,dYr3
VST2 {dZr3,dZi3},[pDst :128],dstStep
.ELSE
VHSUB qZ0,qY2,qY1
VHSUB dZr3,dYr0,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VHADD dZi3,dYi0,dYr3
VHADD qZ1,qY2,qY1
VST2 {dZr3,dZi3},[pDst :128],outPointStep
VHADD dZr2,dYr0,dYi3
VHSUB dZi2,dYi0,dYr3
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VST2 {dZr2,dZi2},[pDst :128],dstStep
.ENDIF
.ELSE
@// finish first stage of 4 point FFT
VADD qY0,qX0,qZ2
VSUB qY2,qX0,qZ2
VLD2 {dXr0,dXi0},[pSrc]! @// data[0]
VADD qY1,qZ1,qZ3
VSUB qY3,qZ1,qZ3
@// finish second stage of 4 point FFT
.ifeqs "\inverse", "TRUE"
VSUB qZ0,qY2,qY1
VADD dZr2,dYr0,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VSUB dZi2,dYi0,dYr3
VADD qZ1,qY2,qY1
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VSUB dZr3,dYr0,dYi3
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VADD dZi3,dYi0,dYr3
VST2 {dZr3,dZi3},[pDst :128],dstStep
.ELSE
VSUB qZ0,qY2,qY1
VSUB dZr3,dYr0,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VADD dZi3,dYi0,dYr3
VADD qZ1,qY2,qY1
VST2 {dZr3,dZi3},[pDst :128],outPointStep
VADD dZr2,dYr0,dYi3
VSUB dZi2,dYi0,dYr3
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VST2 {dZr2,dZi2},[pDst :128],dstStep
.ENDIF
.ENDIF
ADD pSrc,pSrc,pointStep @// increment to data[1] of the next set
BGT setLoop\name
VLD1 dW1,[pTwiddle :64],stepTwiddle @//[wi | wr]
SUBS grpCount,grpCount,#4 @// subtract 4 since grpCount multiplied by 4
VLD1 dW2,[pTwiddle :64],stepTwiddle @//[wi | wr]
ADD pSrc,pSrc,srcStep @// increment pSrc for the next grp
VLD1 dW3,[pTwiddle :64],twStep @//[wi | wr]
BGT grpLoop\name
@// Reset and Swap pSrc and pDst for the next stage
MOV t1,pDst
SUB pDst,pSrc,outPointStep,LSL #2 @// pDst -= size; pSrc -= 4*size bytes
SUB pSrc,t1,outPointStep
.endm
M_START armSP_FFTFwd_CToC_SC16_Radix4_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","FALSE",FWD
M_END
M_START armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",INV
M_END
M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","FALSE",FWDSFS
M_END
M_START armSP_FFTInv_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","TRUE",INVSFS
M_END
.END

Просмотреть файл

@ -1,619 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 7766
@// Last Modified Date: Thu, 27 Sep 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute a first stage Radix 8 FFT stage for a N point complex signal
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define subFFTNum r6
#define subFFTSize r7
@// dest buffer for the next stage (not pSrc for first stage)
#define pPingPongBuf r5
@//Output Registers
@//Local Scratch Registers
#define grpSize r3
@// Reuse grpSize as setCount
#define setCount r3
#define pointStep r4
#define outPointStep r4
#define setStep r8
#define step1 r9
#define step2 r10
#define t0 r11
@// Neon Registers
#define dXr0 D14.S16
#define dXi0 D15.S16
#define dXr1 D2.S16
#define dXi1 D3.S16
#define dXr2 D4.S16
#define dXi2 D5.S16
#define dXr3 D6.S16
#define dXi3 D7.S16
#define dXr4 D8.S16
#define dXi4 D9.S16
#define dXr5 D10.S16
#define dXi5 D11.S16
#define dXr6 D12.S16
#define dXi6 D13.S16
#define dXr7 D0.S16
#define dXi7 D1.S16
#define qX0 Q7.S16
#define qX1 Q1.S16
#define qX2 Q2.S16
#define qX3 Q3.S16
#define qX4 Q4.S16
#define qX5 Q5.S16
#define qX6 Q6.S16
#define qX7 Q0.S16
#define dUr0 D16.S16
#define dUi0 D17.S16
#define dUr2 D18.S16
#define dUi2 D19.S16
#define dUr4 D20.S16
#define dUi4 D21.S16
#define dUr6 D22.S16
#define dUi6 D23.S16
#define dUr1 D24.S16
#define dUi1 D25.S16
#define dUr3 D26.S16
#define dUi3 D27.S16
#define dUr5 D28.S16
#define dUi5 D29.S16
@// reuse dXr7 and dXi7
#define dUr7 D30.S16
#define dUi7 D31.S16
#define qU0 Q8.S16
#define qU1 Q12.S16
#define qU2 Q9.S16
#define qU3 Q13.S16
#define qU4 Q10.S16
#define qU5 Q14.S16
#define qU6 Q11.S16
#define qU7 Q15.S16
#define dVr0 D24.S16
#define dVi0 D25.S16
#define dVr2 D26.S16
#define dVi2 D27.S16
#define dVr4 D28.S16
#define dVi4 D29.S16
#define dVr6 D30.S16
#define dVi6 D31.S16
#define dVr1 D16.S16
#define dVi1 D17.S16
#define dVr3 D18.S16
#define dVi3 D19.S16
#define dVr5 D20.S16
#define dVi5 D21.S16
@// reuse dUi7
#define dVr7 D22.S16
@// reuse dUr7
#define dVi7 D23.S16
#define qV0 Q12.S16
#define qV1 Q8.S16
#define qV2 Q13.S16
#define qV3 Q9.S16
#define qV4 Q14.S16
#define qV5 Q10.S16
#define qV6 Q15.S16
#define qV7 Q11.S16
#define dYr0 D16.S16
#define dYi0 D17.S16
#define dYr2 D18.S16
#define dYi2 D19.S16
#define dYr4 D20.S16
#define dYi4 D21.S16
#define dYr6 D22.S16
#define dYi6 D23.S16
#define dYr1 D24.S16
#define dYi1 D25.S16
#define dYr3 D26.S16
#define dYi3 D27.S16
#define dYr5 D28.S16
#define dYi5 D29.S16
@// reuse dYr4 and dYi4
#define dYr7 D30.S16
#define dYi7 D31.S16
#define qY0 Q8.S16
#define qY1 Q12.S16
#define qY2 Q9.S16
#define qY3 Q13.S16
#define qY4 Q10.S16
#define qY5 Q14.S16
#define qY6 Q11.S16
#define qY7 Q15.S16
#define dT0 D0.S16
#define dT1 D1.S16
@// Define constants
.set ONEBYSQRT2, 0x00005A82 @// Q15 format
.MACRO FFTSTAGE scaled, inverse , name
@// Define stack arguments
@// Update pSubFFTSize and pSubFFTNum regs
MOV subFFTSize,#8 @// subFFTSize = 1 for the first stage
LDR t0,=ONEBYSQRT2 @// t0=(1/sqrt(2)) as Q15 format
@// Note: setCount = subFFTNum/8 (reuse the grpSize reg for setCount)
LSR grpSize,subFFTNum,#3
MOV subFFTNum,grpSize
@// pT0+1 increments pT0 by 4 bytes
@// pT0+pointStep = increment of 4*pointStep bytes = grpSize/2 bytes
@// Note: outPointStep = pointStep for firststage
MOV pointStep,grpSize,LSL #2
@// Calculate the step of input data for the next set
@//MOV step1,pointStep,LSL #1 @// step1 = 2*pointStep
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
MOV step1,grpSize,LSL #3
MOV step2,pointStep,LSL #3
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
SUB step2,step2,pointStep @// step2 = 7*pointStep
RSB setStep,step2,#16 @// setStep = - 7*pointStep+16
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] & update pSrc for the next set
@// setStep = -7*pointStep + 16
@// grp = 0 a special case since all the twiddle factors are 1
@// Loop on the sets : 4 sets at a time
grpZeroSetLoop\name:
@// Decrement setcount
SUBS setCount,setCount,#4 @// decrement the set loop counter
.ifeqs "\scaled", "TRUE"
@// finish first stage of 8 point FFT
VHADD qU0,qX0,qX4
VHADD qU2,qX1,qX5
VHADD qU4,qX2,qX6
VHADD qU6,qX3,qX7
@// finish second stage of 8 point FFT
VHADD qV0,qU0,qU4
VHSUB qV2,qU0,qU4
VHADD qV4,qU2,qU6
VHSUB qV6,qU2,qU6
@// finish third stage of 8 point FFT
VHADD qY0,qV0,qV4
VHSUB qY4,qV0,qV4
VST2 {dYr0,dYi0},[pDst :128],step1 @// store y0
.ifeqs "\inverse", "TRUE"
VHSUB dYr2,dVr2,dVi6
VHADD dYi2,dVi2,dVr6
VHADD dYr6,dVr2,dVi6
VST2 {dYr2,dYi2},[pDst :128],step1 @// store y2
VHSUB dYi6,dVi2,dVr6
VHSUB qU1,qX0,qX4
VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
VHSUB qU3,qX1,qX5
VHSUB qU5,qX2,qX6
VST2 {dYr6,dYi6},[pDst :128],step1 @// store y6
.ELSE
VHADD dYr6,dVr2,dVi6
VHSUB dYi6,dVi2,dVr6
VHSUB dYr2,dVr2,dVi6
VST2 {dYr6,dYi6},[pDst :128],step1 @// store y2
VHADD dYi2,dVi2,dVr6
VHSUB qU1,qX0,qX4
VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
VHSUB qU3,qX1,qX5
VHSUB qU5,qX2,qX6
VST2 {dYr2,dYi2},[pDst :128],step1 @// store y6
.ENDIF
@// finish first stage of 8 point FFT
VHSUB qU7,qX3,qX7
VMOV dT0[0],t0
@// finish second stage of 8 point FFT
VHSUB dVr1,dUr1,dUi5
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] for next iteration
VHADD dVi1,dUi1,dUr5
VHADD dVr3,dUr1,dUi5
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
VHSUB dVi3,dUi1,dUr5
VHSUB dVr5,dUr3,dUi7
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
VHADD dVi5,dUi3,dUr7
VHADD dVr7,dUr3,dUi7
VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
VHSUB dVi7,dUi3,dUr7
@// finish third stage of 8 point FFT
.ifeqs "\inverse", "TRUE"
@// calculate a*v5
VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
VQRDMULH dVi5,dVi5,dT0[0]
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
VSUB dVr5,dT1,dVi5 @// a * V5
VADD dVi5,dT1,dVi5
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
@// calculate b*v7
VQRDMULH dT1,dVr7,dT0[0]
VQRDMULH dVi7,dVi7,dT0[0]
VHADD qY1,qV1,qV5
VHSUB qY5,qV1,qV5
VADD dVr7,dT1,dVi7 @// b * V7
VSUB dVi7,dVi7,dT1
SUB pDst, pDst, step2 @// set pDst to y1
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
VHSUB dYr3,dVr3,dVr7
VHSUB dYi3,dVi3,dVi7
VST2 {dYr1,dYi1},[pDst :128],step1 @// store y1
VHADD dYr7,dVr3,dVr7
VHADD dYi7,dVi3,dVi7
VST2 {dYr3,dYi3},[pDst :128],step1 @// store y3
VST2 {dYr5,dYi5},[pDst :128],step1 @// store y5
#if 0
VST2 {dYr7,dYi7},[pDst :128],#16 @// store y7
#else
VST2 {dYr7,dYi7},[pDst :128]! @// store y7
#endif
.ELSE
@// calculate b*v7
VQRDMULH dT1,dVr7,dT0[0]
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
VQRDMULH dVi7,dVi7,dT0[0]
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
VADD dVr7,dT1,dVi7 @// b * V7
VSUB dVi7,dVi7,dT1
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
@// calculate a*v5
VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
VQRDMULH dVi5,dVi5,dT0[0]
VHADD dYr7,dVr3,dVr7
VHADD dYi7,dVi3,dVi7
SUB pDst, pDst, step2 @// set pDst to y1
VSUB dVr5,dT1,dVi5 @// a * V5
VADD dVi5,dT1,dVi5
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
VHSUB qY5,qV1,qV5
VHSUB dYr3,dVr3,dVr7
VST2 {dYr7,dYi7},[pDst :128],step1 @// store y1
VHSUB dYi3,dVi3,dVi7
VHADD qY1,qV1,qV5
VST2 {dYr5,dYi5},[pDst :128],step1 @// store y3
VST2 {dYr3,dYi3},[pDst :128],step1 @// store y5
#if 0
VST2 {dYr1,dYi1},[pDst :128],#16 @// store y7
#else
VST2 {dYr1,dYi1},[pDst :128]! @// store y7
#endif
.ENDIF
.ELSE
@// finish first stage of 8 point FFT
VADD qU0,qX0,qX4
VADD qU2,qX1,qX5
VADD qU4,qX2,qX6
VADD qU6,qX3,qX7
@// finish second stage of 8 point FFT
VADD qV0,qU0,qU4
VSUB qV2,qU0,qU4
VADD qV4,qU2,qU6
VSUB qV6,qU2,qU6
@// finish third stage of 8 point FFT
VADD qY0,qV0,qV4
VSUB qY4,qV0,qV4
VST2 {dYr0,dYi0},[pDst :128],step1 @// store y0
.ifeqs "\inverse", "TRUE"
VSUB dYr2,dVr2,dVi6
VADD dYi2,dVi2,dVr6
VADD dYr6,dVr2,dVi6
VST2 {dYr2,dYi2},[pDst :128],step1 @// store y2
VSUB dYi6,dVi2,dVr6
VSUB qU1,qX0,qX4
VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
VSUB qU3,qX1,qX5
VSUB qU5,qX2,qX6
VST2 {dYr6,dYi6},[pDst :128],step1 @// store y6
.ELSE
VADD dYr6,dVr2,dVi6
VSUB dYi6,dVi2,dVr6
VSUB dYr2,dVr2,dVi6
VST2 {dYr6,dYi6},[pDst :128],step1 @// store y2
VADD dYi2,dVi2,dVr6
VSUB qU1,qX0,qX4
VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
VSUB qU3,qX1,qX5
VSUB qU5,qX2,qX6
VST2 {dYr2,dYi2},[pDst :128],step1 @// store y6
.ENDIF
@// finish first stage of 8 point FFT
VSUB qU7,qX3,qX7
VMOV dT0[0],t0
@// finish second stage of 8 point FFT
VSUB dVr1,dUr1,dUi5
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] for next iteration
VADD dVi1,dUi1,dUr5
VADD dVr3,dUr1,dUi5
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
VSUB dVi3,dUi1,dUr5
VSUB dVr5,dUr3,dUi7
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
VADD dVi5,dUi3,dUr7
VADD dVr7,dUr3,dUi7
VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
VSUB dVi7,dUi3,dUr7
@// finish third stage of 8 point FFT
.ifeqs "\inverse", "TRUE"
@// calculate a*v5
VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
VQRDMULH dVi5,dVi5,dT0[0]
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
VSUB dVr5,dT1,dVi5 @// a * V5
VADD dVi5,dT1,dVi5
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
@// calculate b*v7
VQRDMULH dT1,dVr7,dT0[0]
VQRDMULH dVi7,dVi7,dT0[0]
VADD qY1,qV1,qV5
VSUB qY5,qV1,qV5
VADD dVr7,dT1,dVi7 @// b * V7
VSUB dVi7,dVi7,dT1
SUB pDst, pDst, step2 @// set pDst to y1
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
VSUB dYr3,dVr3,dVr7
VSUB dYi3,dVi3,dVi7
VST2 {dYr1,dYi1},[pDst :128],step1 @// store y1
VADD dYr7,dVr3,dVr7
VADD dYi7,dVi3,dVi7
VST2 {dYr3,dYi3},[pDst :128],step1 @// store y3
VST2 {dYr5,dYi5},[pDst :128],step1 @// store y5
#if 0
VST2 {dYr7,dYi7},[pDst :128],#16 @// store y7
#else
VST2 {dYr7,dYi7},[pDst :128]! @// store y7
#endif
.ELSE
@// calculate b*v7
VQRDMULH dT1,dVr7,dT0[0]
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
VQRDMULH dVi7,dVi7,dT0[0]
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
VADD dVr7,dT1,dVi7 @// b * V7
VSUB dVi7,dVi7,dT1
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
@// calculate a*v5
VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
VQRDMULH dVi5,dVi5,dT0[0]
VADD dYr7,dVr3,dVr7
VADD dYi7,dVi3,dVi7
SUB pDst, pDst, step2 @// set pDst to y1
VSUB dVr5,dT1,dVi5 @// a * V5
VADD dVi5,dT1,dVi5
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
VSUB qY5,qV1,qV5
VSUB dYr3,dVr3,dVr7
VST2 {dYr7,dYi7},[pDst :128],step1 @// store y1
VSUB dYi3,dVi3,dVi7
VADD qY1,qV1,qV5
VST2 {dYr5,dYi5},[pDst :128],step1 @// store y3
VST2 {dYr3,dYi3},[pDst :128],step1 @// store y5
#if 0
VST2 {dYr1,dYi1},[pDst :128],#16 @// store y7
#else
VST2 {dYr1,dYi1},[pDst :128]! @// store y7
#endif
.ENDIF
.ENDIF
SUB pDst, pDst, step2 @// update pDst for the next set
BGT grpZeroSetLoop\name
@// reset pSrc to pDst for the next stage
SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize
MOV pDst,pPingPongBuf
.endm
@// Allocate stack memory required by the function
M_START armSP_FFTFwd_CToC_SC16_Radix8_fs_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","FALSE",FWD
M_END
M_START armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",INV
M_END
M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","FALSE",FWDSFS
M_END
M_START armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","TRUE",INVSFS
M_END
.END

Просмотреть файл

@ -1,163 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 5995
@// Last Modified Date: Fri, 08 Jun 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute the first stage of a Radix 2 DIT in-order out-of-place FFT
@// stage for a N point complex signal.
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define pPingPongBuf r5
#define subFFTNum r6
#define subFFTSize r7
@//Output Registers
@//Local Scratch Registers
#define pointStep r3
#define outPointStep r3
#define grpSize r4
#define setCount r4
#define step r8
#define dstStep r8
@// Neon Registers
#define dX0 D0.S32
#define dX1 D1.S32
#define dY0 D2.S32
#define dY1 D3.S32
.MACRO FFTSTAGE scaled, inverse, name
@// Define stack arguments
@// update subFFTSize and subFFTNum into RN6 and RN7 for the next stage
MOV subFFTSize,#2
LSR grpSize,subFFTNum,#1
MOV subFFTNum,grpSize
@// pT0+1 increments pT0 by 8 bytes
@// pT0+pointStep = increment of 8*pointStep bytes = 4*grpSize bytes
@// Note: outPointStep = pointStep for firststage
@// Note: setCount = grpSize/2 (reuse the updated grpSize for setCount)
MOV pointStep,grpSize,LSL #3
RSB step,pointStep,#8
@// Loop on the sets for grp zero
grpZeroSetLoop\name :
VLD1 dX0,[pSrc],pointStep
VLD1 dX1,[pSrc],step @// step = -pointStep + 8
SUBS setCount,setCount,#1 @// decrement the loop counter
.ifeqs "\scaled", "TRUE"
VHADD dY0,dX0,dX1
VHSUB dY1,dX0,dX1
.ELSE
VADD dY0,dX0,dX1
VSUB dY1,dX0,dX1
.ENDIF
VST1 dY0,[pDst],outPointStep
VST1 dY1,[pDst],dstStep @// dstStep = step = -pointStep + 8
BGT grpZeroSetLoop\name
@// reset pSrc to pDst for the next stage
SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize
MOV pDst,pPingPongBuf
.endm
M_START armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","FALSE",fwd
M_END
M_START armSP_FFTInv_CToC_SC32_Radix2_fs_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",inv
M_END
M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","FALSE",fwdsfs
M_END
M_START armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","TRUE",invsfs
M_END
.end

Просмотреть файл

@ -1,184 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 7493
@// Last Modified Date: Mon, 24 Sep 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute the last stage of a Radix 2 DIT in-order out-of-place FFT
@// stage for a N point complex signal.
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define subFFTNum r6
#define subFFTSize r7
@//Output Registers
@//Local Scratch Registers
#define outPointStep r3
#define grpCount r4
#define dstStep r5
#define pTmp r4
@// Neon Registers
#define dWr D0.S32
#define dWi d1.s32
#define dXr0 d2.s32
#define dXi0 d3.s32
#define dXr1 d4.s32
#define dXi1 d5.s32
#define dYr0 d6.s32
#define dYi0 d7.s32
#define dYr1 d8.s32
#define dYi1 d9.s32
#define qT0 q5.s64
#define qT1 q6.s64
.macro FFTSTAGE scaled, inverse, name
MOV outPointStep,subFFTSize,LSL #3
@// Update grpCount and grpSize rightaway
MOV subFFTNum,#1 @//after the last stage
LSL grpCount,subFFTSize,#1
@// update subFFTSize for the next stage
MOV subFFTSize,grpCount
RSB dstStep,outPointStep,#16
@// Loop on 2 grps at a time for the last stage
grpLoop\name :
VLD2 {dWr,dWi},[pTwiddle :64]!
VLD4 {dXr0,dXi0,dXr1,dXi1},[pSrc :128]!
SUBS grpCount,grpCount,#4 @// grpCount is multiplied by 2
.ifeqs "\inverse", "TRUE"
VMULL qT0,dWr,dXr1
VMLAL qT0,dWi,dXi1 @// real part
VMULL qT1,dWr,dXi1
VMLSL qT1,dWi,dXr1 @// imag part
.else
VMULL qT0,dWr,dXr1
VMLSL qT0,dWi,dXi1 @// real part
VMULL qT1,dWr,dXi1
VMLAL qT1,dWi,dXr1 @// imag part
.endif
VRSHRN dXr1,qT0,#31
VRSHRN dXi1,qT1,#31
.ifeqs "\scaled", "TRUE"
VHSUB dYr0,dXr0,dXr1
VHSUB dYi0,dXi0,dXi1
VHADD dYr1,dXr0,dXr1
VHADD dYi1,dXi0,dXi1
.else
VSUB dYr0,dXr0,dXr1
VSUB dYi0,dXi0,dXi1
VADD dYr1,dXr0,dXr1
VADD dYi1,dXi0,dXi1
.endif
VST2 {dYr0,dYi0},[pDst],outPointStep
VST2 {dYr1,dYi1},[pDst],dstStep @// dstStep = step = -outPointStep + 16
bgt grpLoop\name
@// Reset and Swap pSrc and pDst for the next stage
MOV pTmp,pDst
SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 4*size; pSrc -= 8*size bytes
SUB pSrc,pTmp,outPointStep
@// Reset pTwiddle for the next stage
SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 4*size bytes
.endm
M_START armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe,r4,""
FFTSTAGE "FALSE","FALSE",fwd
M_END
M_START armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",inv
M_END
M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","FALSE",fwdsfs
M_END
M_START armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","TRUE",invsfs
M_END
.end

Просмотреть файл

@ -1,216 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: armSP_FFT_CToC_SC32_Radix2_unsafe_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 5638
@// Last Modified Date: Wed, 06 Jun 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute a Radix 2 DIT in-order out-of-place FFT stage for a N point complex signal.
@// This handle the general stage, not the first or last stage.
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define subFFTNum r6
#define subFFTSize r7
@//Output Registers
@//Local Scratch Registers
#define outPointStep r3
#define pointStep r4
#define grpCount r5
#define setCount r8
@//const RN 9
#define step r10
#define dstStep r11
#define pTable r9
#define pTmp r9
@// Neon Registers
#define dW D0.S32
#define dX0 D2.S32
#define dX1 D3.S32
#define dX2 D4.S32
#define dX3 D5.S32
#define dY0 D6.S32
#define dY1 D7.S32
#define dY2 D8.S32
#define dY3 D9.S32
#define qT0 Q3.S64
#define qT1 Q4.S64
.MACRO FFTSTAGE scaled, inverse, name
@// Define stack arguments
@// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs
LSR subFFTNum,subFFTNum,#1 @//grpSize
LSL grpCount,subFFTSize,#1
@// pT0+1 increments pT0 by 8 bytes
@// pT0+pointStep = increment of 8*pointStep bytes = 4*grpSize bytes
MOV pointStep,subFFTNum,LSL #2
@// update subFFTSize for the next stage
MOV subFFTSize,grpCount
@// pOut0+1 increments pOut0 by 8 bytes
@// pOut0+outPointStep == increment of 8*outPointStep bytes = 4*size bytes
SMULBB outPointStep,grpCount,pointStep
LSL pointStep,pointStep,#1
RSB step,pointStep,#16
RSB dstStep,outPointStep,#16
@// Loop on the groups
grpLoop\name :
MOV setCount,pointStep,LSR #3
VLD1 dW,[pTwiddle],pointStep @//[wi | wr]
@// Loop on the sets
setLoop\name :
VLD2 {dX0,dX1},[pSrc],pointStep @// point0: dX0-real part dX1-img part
VLD2 {dX2,dX3},[pSrc],step @// point1: dX2-real part dX3-img part
SUBS setCount,setCount,#2
.ifeqs "\inverse", "TRUE"
VMULL qT0,dX2,dW[0]
VMLAL qT0,dX3,dW[1] @// real part
VMULL qT1,dX3,dW[0]
VMLSL qT1,dX2,dW[1] @// imag part
.else
VMULL qT0,dX2,dW[0]
VMLSL qT0,dX3,dW[1] @// real part
VMULL qT1,dX3,dW[0]
VMLAL qT1,dX2,dW[1] @// imag part
.endif
VRSHRN dX2,qT0,#31
VRSHRN dX3,qT1,#31
.ifeqs "\scaled", "TRUE"
VHSUB dY0,dX0,dX2
VHSUB dY1,dX1,dX3
VHADD dY2,dX0,dX2
VHADD dY3,dX1,dX3
.else
VSUB dY0,dX0,dX2
VSUB dY1,dX1,dX3
VADD dY2,dX0,dX2
VADD dY3,dX1,dX3
.endif
VST2 {dY0,dY1},[pDst],outPointStep
VST2 {dY2,dY3},[pDst],dstStep @// dstStep = -outPointStep + 16
BGT setLoop\name
SUBS grpCount,grpCount,#2
ADD pSrc,pSrc,pointStep
BGT grpLoop\name
@// Reset and Swap pSrc and pDst for the next stage
MOV pTmp,pDst
SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 4*size; pSrc -= 8*size bytes
SUB pSrc,pTmp,outPointStep
@// Reset pTwiddle for the next stage
SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 4*size bytes
.endm
M_START armSP_FFTFwd_CToC_SC32_Radix2_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","FALSE",FWD
M_END
M_START armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",INV
M_END
M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","FALSE",FWDSFS
M_END
M_START armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","TRUE",INVSFS
M_END
.end

Просмотреть файл

@ -1,320 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 7767
@// Last Modified Date: Thu, 27 Sep 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute a first stage Radix 4 FFT stage for a N point complex signal
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define pPingPongBuf r5
#define subFFTNum r6
#define subFFTSize r7
@//Output Registers
@//Local Scratch Registers
#define grpSize r3
@// Reuse grpSize as setCount
#define setCount r3
#define pointStep r4
#define outPointStep r4
#define setStep r8
#define step1 r9
#define step3 r10
@// Neon Registers
#define dXr0 D0.S32
#define dXi0 D1.S32
#define dXr1 D2.S32
#define dXi1 D3.S32
#define dXr2 D4.S32
#define dXi2 D5.S32
#define dXr3 D6.S32
#define dXi3 D7.S32
#define dYr0 D8.S32
#define dYi0 D9.S32
#define dYr1 D10.S32
#define dYi1 D11.S32
#define dYr2 D12.S32
#define dYi2 D13.S32
#define dYr3 D14.S32
#define dYi3 D15.S32
#define qX0 Q0.S32
#define qX1 Q1.S32
#define qX2 Q2.S32
#define qX3 Q3.S32
#define qY0 Q4.S32
#define qY1 Q5.S32
#define qY2 Q6.S32
#define qY3 Q7.S32
#define dZr0 D16.S32
#define dZi0 D17.S32
#define dZr1 D18.S32
#define dZi1 D19.S32
#define dZr2 D20.S32
#define dZi2 D21.S32
#define dZr3 D22.S32
#define dZi3 D23.S32
#define qZ0 Q8.S32
#define qZ1 Q9.S32
#define qZ2 Q10.S32
#define qZ3 Q11.S32
.MACRO FFTSTAGE scaled, inverse, name
@// Define stack arguments
@// pT0+1 increments pT0 by 8 bytes
@// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
@// Note: outPointStep = pointStep for firststage
MOV pointStep,subFFTNum,LSL #1
@// Update pSubFFTSize and pSubFFTNum regs
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
MOV subFFTSize,#4 @// subFFTSize = 1 for the first stage
@// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount)
LSR grpSize,subFFTNum,#2
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
MOV subFFTNum,grpSize
@// Calculate the step of input data for the next set
@//MOV setStep,pointStep,LSL #1
MOV setStep,grpSize,LSL #4
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
ADD setStep,setStep,pointStep @// setStep = 3*pointStep
RSB setStep,setStep,#16 @// setStep = - 3*pointStep+16
VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
MOV step1,pointStep,LSL #1 @// step1 = 2*pointStep
.ifeqs "\scaled", "TRUE"
VHADD qY0,qX0,qX2
.else
VADD qY0,qX0,qX2
.endif
RSB step3,pointStep,#0 @// step3 = -pointStep
@// grp = 0 a special case since all the twiddle factors are 1
@// Loop on the sets : 2 sets at a time
grpZeroSetLoop\name :
@// Decrement setcount
SUBS setCount,setCount,#2 @// decrement the set loop counter
.ifeqs "\scaled", "TRUE"
@// finish first stage of 4 point FFT
VHSUB qY2,qX0,qX2
VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0]
VHADD qY1,qX1,qX3
VLD2 {dXr2,dXi2},[pSrc :128],step3 @// data[2]
VHSUB qY3,qX1,qX3
@// finish second stage of 4 point FFT
.ifeqs "\inverse", "TRUE"
VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
VHADD qZ0,qY0,qY1
VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
VHSUB dZr3,dYr2,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VHADD dZi3,dYi2,dYr3
VHSUB qZ1,qY0,qY1
VST2 {dZr3,dZi3},[pDst :128],outPointStep
VHADD dZr2,dYr2,dYi3
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VHSUB dZi2,dYi2,dYr3
VHADD qY0,qX0,qX2 @// u0 for next iteration
VST2 {dZr2,dZi2},[pDst :128],setStep
.else
VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
VHADD qZ0,qY0,qY1
VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
VHADD dZr2,dYr2,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VHSUB dZi2,dYi2,dYr3
VHSUB qZ1,qY0,qY1
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VHSUB dZr3,dYr2,dYi3
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VHADD dZi3,dYi2,dYr3
VHADD qY0,qX0,qX2 @// u0 for next iteration
VST2 {dZr3,dZi3},[pDst :128],setStep
.endif
.else
@// finish first stage of 4 point FFT
VSUB qY2,qX0,qX2
VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0]
VADD qY1,qX1,qX3
VLD2 {dXr2,dXi2},[pSrc :128],step3 @// data[2]
VSUB qY3,qX1,qX3
@// finish second stage of 4 point FFT
.ifeqs "\inverse", "TRUE"
VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
VADD qZ0,qY0,qY1
VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
VSUB dZr3,dYr2,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VADD dZi3,dYi2,dYr3
VSUB qZ1,qY0,qY1
VST2 {dZr3,dZi3},[pDst :128],outPointStep
VADD dZr2,dYr2,dYi3
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VSUB dZi2,dYi2,dYr3
VADD qY0,qX0,qX2 @// u0 for next iteration
VST2 {dZr2,dZi2},[pDst :128],setStep
.else
VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
VADD qZ0,qY0,qY1
VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
VADD dZr2,dYr2,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VSUB dZi2,dYi2,dYr3
VSUB qZ1,qY0,qY1
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VSUB dZr3,dYr2,dYi3
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VADD dZi3,dYi2,dYr3
VADD qY0,qX0,qX2 @// u0 for next iteration
VST2 {dZr3,dZi3},[pDst :128],setStep
.endif
.endif
BGT grpZeroSetLoop\name
@// reset pSrc to pDst for the next stage
SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize
MOV pDst,pPingPongBuf
.endm
M_START armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","FALSE",fwd
M_END
M_START armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",inv
M_END
M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","FALSE",fwdsfs
M_END
M_START armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","TRUE",invsfs
M_END
.end

Просмотреть файл

@ -1,404 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: armSP_FFT_CToC_SC32_Radix4_ls_unsafe_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 7767
@// Last Modified Date: Thu, 27 Sep 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute a Radix 4 FFT stage for a N point complex signal
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Import symbols required from other files
@// (For example tables)
@//IMPORT armAAC_constTable
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define subFFTNum r6
#define subFFTSize r7
@//Output Registers
@//Local Scratch Registers
#define outPointStep r3
#define grpCount r4
#define dstStep r5
#define grpTwStep r8
#define stepTwiddle r9
#define twStep r10
#define pTmp r4
#define step16 r11
#define step24 r12
@// Neon Registers
#define dButterfly1Real02 D0.S32
#define dButterfly1Imag02 D1.S32
#define dButterfly1Real13 D2.S32
#define dButterfly1Imag13 D3.S32
#define dButterfly2Real02 D4.S32
#define dButterfly2Imag02 D5.S32
#define dButterfly2Real13 D6.S32
#define dButterfly2Imag13 D7.S32
#define dXr0 D0.S32
#define dXi0 D1.S32
#define dXr1 D2.S32
#define dXi1 D3.S32
#define dXr2 D4.S32
#define dXi2 D5.S32
#define dXr3 D6.S32
#define dXi3 D7.S32
#define dYr0 D16.S32
#define dYi0 D17.S32
#define dYr1 D18.S32
#define dYi1 D19.S32
#define dYr2 D20.S32
#define dYi2 D21.S32
#define dYr3 D22.S32
#define dYi3 D23.S32
#define dW1r D8.S32
#define dW1i D9.S32
#define dW2r D10.S32
#define dW2i D11.S32
#define dW3r D12.S32
#define dW3i D13.S32
#define qT0 Q7.S64
#define qT1 Q8.S64
#define qT2 Q9.S64
#define qT3 Q10.S64
#define qT4 Q11.S64
#define qT5 Q12.S64
#define dZr0 D14.S32
#define dZi0 D15.S32
#define dZr1 D26.S32
#define dZi1 D27.S32
#define dZr2 D28.S32
#define dZi2 D29.S32
#define dZr3 D30.S32
#define dZi3 D31.S32
#define qX0 Q0.S32
#define qY0 Q8.S32
#define qY1 Q9.S32
#define qY2 Q10.S32
#define qY3 Q11.S32
#define qZ0 Q7.S32
#define qZ1 Q13.S32
#define qZ2 Q14.S32
#define qZ3 Q15.S32
.MACRO FFTSTAGE scaled, inverse , name
@// Define stack arguments
@// pOut0+1 increments pOut0 by 8 bytes
@// pOut0+outPointStep == increment of 8*outPointStep bytes
MOV outPointStep,subFFTSize,LSL #3
@// Update grpCount and grpSize rightaway
VLD2 {dW1r,dW1i},[pTwiddle :128] @// [wi|wr]
MOV step16,#16
LSL grpCount,subFFTSize,#2
VLD1 dW2r,[pTwiddle :64] @// [wi|wr]
MOV subFFTNum,#1 @//after the last stage
VLD1 dW3r,[pTwiddle :64],step16 @// [wi|wr]
MOV stepTwiddle,#0
VLD1 dW2i,[pTwiddle :64]! @// [wi|wr]
SUB grpTwStep,stepTwiddle,#8 @// grpTwStep = -8 to start with
@// update subFFTSize for the next stage
MOV subFFTSize,grpCount
VLD1 dW3i,[pTwiddle :64],grpTwStep @// [wi|wr]
MOV dstStep,outPointStep,LSL #1
VLD4 {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
ADD dstStep,dstStep,outPointStep @// dstStep = 3*outPointStep
RSB dstStep,dstStep,#16 @// dstStep = - 3*outPointStep+16
MOV step24,#24
VLD4 {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
@// Process two groups at a time
grpLoop\name :
VZIP dW2r,dW2i
ADD stepTwiddle,stepTwiddle,#16 @// increment for the next iteration
VZIP dW3r,dW3i
ADD grpTwStep,stepTwiddle,#4
VUZP dButterfly1Real13, dButterfly2Real13 @// B.r D.r
SUB twStep,stepTwiddle,#16 @// -16+stepTwiddle
VUZP dButterfly1Imag13, dButterfly2Imag13 @// B.i D.i
MOV grpTwStep,grpTwStep,LSL #1
VUZP dButterfly1Real02, dButterfly2Real02 @// A.r C.r
RSB grpTwStep,grpTwStep,#0 @// -8-2*stepTwiddle
VUZP dButterfly1Imag02, dButterfly2Imag02 @// A.i C.i
SUBS grpCount,grpCount,#8 @// grpCount is multiplied by 4
.ifeqs "\inverse", "TRUE"
VMULL qT0,dW1r,dXr1
VMLAL qT0,dW1i,dXi1 @// real part
VMULL qT1,dW1r,dXi1
VMLSL qT1,dW1i,dXr1 @// imag part
.else
VMULL qT0,dW1r,dXr1
VMLSL qT0,dW1i,dXi1 @// real part
VMULL qT1,dW1r,dXi1
VMLAL qT1,dW1i,dXr1 @// imag part
.endif
VLD2 {dW1r,dW1i},[pTwiddle :128],stepTwiddle @// [wi|wr]
.ifeqs "\inverse", "TRUE"
VMULL qT2,dW2r,dXr2
VMLAL qT2,dW2i,dXi2 @// real part
VMULL qT3,dW2r,dXi2
VLD1 dW2r,[pTwiddle :64],step16 @// [wi|wr]
VMLSL qT3,dW2i,dXr2 @// imag part
.else
VMULL qT2,dW2r,dXr2
VMLSL qT2,dW2i,dXi2 @// real part
VMULL qT3,dW2r,dXi2
VLD1 dW2r,[pTwiddle :64],step16 @// [wi|wr]
VMLAL qT3,dW2i,dXr2 @// imag part
.endif
VRSHRN dZr1,qT0,#31
VLD1 dW2i,[pTwiddle :64],twStep @// [wi|wr]
VRSHRN dZi1,qT1,#31
VMOV qZ0,qX0 @// move qX0 so as to load for the next iteration
VLD4 {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
.ifeqs "\inverse", "TRUE"
VMULL qT4,dW3r,dXr3
VMLAL qT4,dW3i,dXi3 @// real part
VMULL qT5,dW3r,dXi3
VLD1 dW3r,[pTwiddle :64],step24
VMLSL qT5,dW3i,dXr3 @// imag part
.else
VMULL qT4,dW3r,dXr3
VMLSL qT4,dW3i,dXi3 @// real part
VMULL qT5,dW3r,dXi3
VLD1 dW3r,[pTwiddle :64],step24
VMLAL qT5,dW3i,dXr3 @// imag part
.endif
VRSHRN dZr2,qT2,#31
VLD1 dW3i,[pTwiddle :64],grpTwStep @// [wi|wr]
VRSHRN dZi2,qT3,#31
VRSHRN dZr3,qT4,#31
VRSHRN dZi3,qT5,#31
VLD4 {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
.ifeqs "\scaled", "TRUE"
@// finish first stage of 4 point FFT
VHADD qY0,qZ0,qZ2
VHSUB qY2,qZ0,qZ2
VHADD qY1,qZ1,qZ3
VHSUB qY3,qZ1,qZ3
@// finish second stage of 4 point FFT
.ifeqs "\inverse", "TRUE"
VHSUB qZ0,qY2,qY1
VHADD dZr3,dYr0,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VHSUB dZi3,dYi0,dYr3
VHADD qZ2,qY2,qY1
VST2 {dZr3,dZi3},[pDst :128],outPointStep
VHSUB dZr1,dYr0,dYi3
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VHADD dZi1,dYi0,dYr3
VST2 {dZr1,dZi1},[pDst :128],dstStep @// dstStep = -outPointStep + 16
.else
VHSUB qZ0,qY2,qY1
VHSUB dZr1,dYr0,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VHADD dZi1,dYi0,dYr3
VHADD qZ2,qY2,qY1
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VHADD dZr3,dYr0,dYi3
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VHSUB dZi3,dYi0,dYr3
VST2 {dZr3,dZi3},[pDst :128],dstStep @// dstStep = -outPointStep + 16
.endif
.else
@// finish first stage of 4 point FFT
VADD qY0,qZ0,qZ2
VSUB qY2,qZ0,qZ2
VADD qY1,qZ1,qZ3
VSUB qY3,qZ1,qZ3
@// finish second stage of 4 point FFT
.ifeqs "\inverse", "TRUE"
VSUB qZ0,qY2,qY1
VADD dZr3,dYr0,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VSUB dZi3,dYi0,dYr3
VADD qZ2,qY2,qY1
VST2 {dZr3,dZi3},[pDst :128],outPointStep
VSUB dZr1,dYr0,dYi3
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VADD dZi1,dYi0,dYr3
VST2 {dZr1,dZi1},[pDst :128],dstStep @// dstStep = -outPointStep + 16
.else
VSUB qZ0,qY2,qY1
VSUB dZr1,dYr0,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VADD dZi1,dYi0,dYr3
VADD qZ2,qY2,qY1
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VADD dZr3,dYr0,dYi3
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VSUB dZi3,dYi0,dYr3
VST2 {dZr3,dZi3},[pDst :128],dstStep @// dstStep = -outPointStep + 16
.endif
.endif
BGT grpLoop\name
@// Reset and Swap pSrc and pDst for the next stage
MOV pTmp,pDst
SUB pSrc,pSrc,#64 @// Extra increment done in final iteration of the loop
SUB pDst,pSrc,outPointStep,LSL #2 @// pDst -= 4*size; pSrc -= 8*size bytes
SUB pSrc,pTmp,outPointStep
SUB pTwiddle,pTwiddle,subFFTSize,LSL #1
SUB pTwiddle,pTwiddle,#16 @// Extra increment done in final iteration of the loop
.endm
M_START armSP_FFTFwd_CToC_SC32_Radix4_ls_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","FALSE",fwd
M_END
M_START armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",inv
M_END
M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","FALSE",fwdsfs
M_END
M_START armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","TRUE",invsfs
M_END
.end

Просмотреть файл

@ -1,395 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: armSP_FFT_CToC_SC32_Radix4_unsafe_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 7767
@// Last Modified Date: Thu, 27 Sep 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute a Radix 4 FFT stage for a N point complex signal
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@// Import symbols required from other files
@// (For example tables)
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define subFFTNum r6
#define subFFTSize r7
@//Output Registers
@//Local Scratch Registers
#define grpCount r3
#define pointStep r4
#define outPointStep r5
#define stepTwiddle r12
#define setCount r14
#define srcStep r8
#define setStep r9
#define dstStep r10
#define twStep r11
#define t1 r3
@// Neon Registers
#define dW1 D0.S32
#define dW2 D1.S32
#define dW3 D2.S32
#define dXr0 D4.S32
#define dXi0 D5.S32
#define dXr1 D6.S32
#define dXi1 D7.S32
#define dXr2 D8.S32
#define dXi2 D9.S32
#define dXr3 D10.S32
#define dXi3 D11.S32
#define dYr0 D12.S32
#define dYi0 D13.S32
#define dYr1 D14.S32
#define dYi1 D15.S32
#define dYr2 D16.S32
#define dYi2 D17.S32
#define dYr3 D18.S32
#define dYi3 D19.S32
#define qT0 Q8.S64
#define qT1 Q9.S64
#define qT2 Q6.S64
#define qT3 Q7.S64
#define dZr0 D20.S32
#define dZi0 D21.S32
#define dZr1 D22.S32
#define dZi1 D23.S32
#define dZr2 D24.S32
#define dZi2 D25.S32
#define dZr3 D26.S32
#define dZi3 D27.S32
#define qY0 Q6.S32
#define qY1 Q7.S32
#define qY2 Q8.S32
#define qY3 Q9.S32
#define qX0 Q2.S32
#define qZ0 Q10.S32
#define qZ1 Q11.S32
#define qZ2 Q12.S32
#define qZ3 Q13.S32
.MACRO FFTSTAGE scaled, inverse , name
@// Define stack arguments
@// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs
LSL grpCount,subFFTSize,#2
LSR subFFTNum,subFFTNum,#2
MOV subFFTSize,grpCount
VLD1 dW1,[pTwiddle] @//[wi | wr]
@// pT0+1 increments pT0 by 8 bytes
@// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
MOV pointStep,subFFTNum,LSL #1
@// pOut0+1 increments pOut0 by 8 bytes
@// pOut0+outPointStep == increment of 8*outPointStep bytes = 2*size bytes
MOV stepTwiddle,#0
VLD1 dW2,[pTwiddle] @//[wi | wr]
SMULBB outPointStep,grpCount,pointStep
LSL pointStep,pointStep,#2 @// 2*grpSize
VLD1 dW3,[pTwiddle] @//[wi | wr]
MOV srcStep,pointStep,LSL #1 @// srcStep = 2*pointStep
ADD setStep,srcStep,pointStep @// setStep = 3*pointStep
@//RSB setStep,setStep,#16 @// setStep = - 3*pointStep+16
RSB setStep,setStep,#0 @// setStep = - 3*pointStep
SUB srcStep,srcStep,#16 @// srcStep = 2*pointStep-16
MOV dstStep,outPointStep,LSL #1
ADD dstStep,dstStep,outPointStep @// dstStep = 3*outPointStep
RSB dstStep,dstStep,#16 @// dstStep = - 3*outPointStep+16
grpLoop\name :
VLD2 {dXr0,dXi0},[pSrc],pointStep @// data[0]
ADD stepTwiddle,stepTwiddle,pointStep
VLD2 {dXr1,dXi1},[pSrc],pointStep @// data[1]
ADD pTwiddle,pTwiddle,stepTwiddle @// set pTwiddle to the first point
VLD2 {dXr2,dXi2},[pSrc],pointStep @// data[2]
MOV twStep,stepTwiddle,LSL #2
VLD2 {dXr3,dXi3},[pSrc],setStep @// data[3] & update pSrc for the next set
SUB twStep,stepTwiddle,twStep @// twStep = -3*stepTwiddle
MOV setCount,pointStep,LSR #3
ADD pSrc,pSrc,#16 @// set pSrc to data[0] of the next set
ADD pSrc,pSrc,pointStep @// increment to data[1] of the next set
@// Loop on the sets
setLoop\name :
SUBS setCount,setCount,#2 @// decrement the loop counter
.ifeqs "\inverse", "TRUE"
VMULL qT0,dXr1,dW1[0]
VMLAL qT0,dXi1,dW1[1] @// real part
VMULL qT1,dXi1,dW1[0]
VMLSL qT1,dXr1,dW1[1] @// imag part
.else
VMULL qT0,dXr1,dW1[0]
VMLSL qT0,dXi1,dW1[1] @// real part
VMULL qT1,dXi1,dW1[0]
VMLAL qT1,dXr1,dW1[1] @// imag part
.endif
VLD2 {dXr1,dXi1},[pSrc],pointStep @// data[1] for next iteration
.ifeqs "\inverse", "TRUE"
VMULL qT2,dXr2,dW2[0]
VMLAL qT2,dXi2,dW2[1] @// real part
VMULL qT3,dXi2,dW2[0]
VMLSL qT3,dXr2,dW2[1] @// imag part
.else
VMULL qT2,dXr2,dW2[0]
VMLSL qT2,dXi2,dW2[1] @// real part
VMULL qT3,dXi2,dW2[0]
VMLAL qT3,dXr2,dW2[1] @// imag part
.endif
VRSHRN dZr1,qT0,#31
VRSHRN dZi1,qT1,#31
VLD2 {dXr2,dXi2},[pSrc],pointStep @// data[2] for next iteration
.ifeqs "\inverse", "TRUE"
VMULL qT0,dXr3,dW3[0]
VMLAL qT0,dXi3,dW3[1] @// real part
VMULL qT1,dXi3,dW3[0]
VMLSL qT1,dXr3,dW3[1] @// imag part
.else
VMULL qT0,dXr3,dW3[0]
VMLSL qT0,dXi3,dW3[1] @// real part
VMULL qT1,dXi3,dW3[0]
VMLAL qT1,dXr3,dW3[1] @// imag part
.endif
VRSHRN dZr2,qT2,#31
VRSHRN dZi2,qT3,#31
VRSHRN dZr3,qT0,#31
VRSHRN dZi3,qT1,#31
VLD2 {dXr3,dXi3},[pSrc],setStep @// data[3] & update pSrc to data[0]
.ifeqs "\scaled", "TRUE"
@// finish first stage of 4 point FFT
VHADD qY0,qX0,qZ2
VHSUB qY2,qX0,qZ2
VLD2 {dXr0,dXi0},[pSrc]! @// data[0] for next iteration
VHADD qY1,qZ1,qZ3
VHSUB qY3,qZ1,qZ3
@// finish second stage of 4 point FFT
VHSUB qZ0,qY2,qY1
.ifeqs "\inverse", "TRUE"
VHADD dZr3,dYr0,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VHSUB dZi3,dYi0,dYr3
VHADD qZ2,qY2,qY1
VST2 {dZr3,dZi3},[pDst :128],outPointStep
VHSUB dZr1,dYr0,dYi3
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VHADD dZi1,dYi0,dYr3
VST2 {dZr1,dZi1},[pDst :128],dstStep
.else
VHSUB dZr1,dYr0,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VHADD dZi1,dYi0,dYr3
VHADD qZ2,qY2,qY1
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VHADD dZr3,dYr0,dYi3
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VHSUB dZi3,dYi0,dYr3
VST2 {dZr3,dZi3},[pDst :128],dstStep
.endif
.else
@// finish first stage of 4 point FFT
VADD qY0,qX0,qZ2
VSUB qY2,qX0,qZ2
VLD2 {dXr0,dXi0},[pSrc :128]! @// data[0] for next iteration
VADD qY1,qZ1,qZ3
VSUB qY3,qZ1,qZ3
@// finish second stage of 4 point FFT
VSUB qZ0,qY2,qY1
.ifeqs "\inverse", "TRUE"
VADD dZr3,dYr0,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VSUB dZi3,dYi0,dYr3
VADD qZ2,qY2,qY1
VST2 {dZr3,dZi3},[pDst :128],outPointStep
VSUB dZr1,dYr0,dYi3
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VADD dZi1,dYi0,dYr3
VST2 {dZr1,dZi1},[pDst :128],dstStep
.else
VSUB dZr1,dYr0,dYi3
VST2 {dZr0,dZi0},[pDst :128],outPointStep
VADD dZi1,dYi0,dYr3
VADD qZ2,qY2,qY1
VST2 {dZr1,dZi1},[pDst :128],outPointStep
VADD dZr3,dYr0,dYi3
VST2 {dZr2,dZi2},[pDst :128],outPointStep
VSUB dZi3,dYi0,dYr3
VST2 {dZr3,dZi3},[pDst :128],dstStep
.endif
.endif
ADD pSrc,pSrc,pointStep @// increment to data[1] of the next set
BGT setLoop\name
VLD1 dW1,[pTwiddle :64],stepTwiddle @//[wi | wr]
SUBS grpCount,grpCount,#4 @// subtract 4 since grpCount multiplied by 4
VLD1 dW2,[pTwiddle :64],stepTwiddle @//[wi | wr]
ADD pSrc,pSrc,srcStep @// increment pSrc for the next grp
VLD1 dW3,[pTwiddle :64],twStep @//[wi | wr]
BGT grpLoop\name
@// Reset and Swap pSrc and pDst for the next stage
MOV t1,pDst
SUB pDst,pSrc,outPointStep,LSL #2 @// pDst -= 2*size; pSrc -= 8*size bytes
SUB pSrc,t1,outPointStep
.endm
M_START armSP_FFTFwd_CToC_SC32_Radix4_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","FALSE",FWD
M_END
M_START armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",INV
M_END
M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","FALSE",FWDSFS
M_END
M_START armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","TRUE",INVSFS
M_END
.end

Просмотреть файл

@ -1,595 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: armSP_FFT_CToC_SC32_Radix8_fs_unsafe_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 7770
@// Last Modified Date: Thu, 27 Sep 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute a first stage Radix 8 FFT stage for a N point complex signal
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define subFFTNum r6
#define subFFTSize r7
@// dest buffer for the next stage (not pSrc for first stage)
#define pPingPongBuf r5
@//Output Registers
@//Local Scratch Registers
#define grpSize r3
@// Reuse grpSize as setCount
#define setCount r3
#define pointStep r4
#define outPointStep r4
#define setStep r8
#define step1 r9
#define step2 r10
#define t0 r11
@// Neon Registers
#define dXr0 D0.S32
#define dXi0 D1.S32
#define dXr1 D2.S32
#define dXi1 D3.S32
#define dXr2 D4.S32
#define dXi2 D5.S32
#define dXr3 D6.S32
#define dXi3 D7.S32
#define dXr4 D8.S32
#define dXi4 D9.S32
#define dXr5 D10.S32
#define dXi5 D11.S32
#define dXr6 D12.S32
#define dXi6 D13.S32
#define dXr7 D14.S32
#define dXi7 D15.S32
#define qX0 Q0.S32
#define qX1 Q1.S32
#define qX2 Q2.S32
#define qX3 Q3.S32
#define qX4 Q4.S32
#define qX5 Q5.S32
#define qX6 Q6.S32
#define qX7 Q7.S32
#define dUr0 D16.S32
#define dUi0 D17.S32
#define dUr2 D18.S32
#define dUi2 D19.S32
#define dUr4 D20.S32
#define dUi4 D21.S32
#define dUr6 D22.S32
#define dUi6 D23.S32
#define dUr1 D24.S32
#define dUi1 D25.S32
#define dUr3 D26.S32
#define dUi3 D27.S32
#define dUr5 D28.S32
#define dUi5 D29.S32
@// reuse dXr7 and dXi7
#define dUr7 D30.S32
#define dUi7 D31.S32
#define qU0 Q8.S32
#define qU1 Q12.S32
#define qU2 Q9.S32
#define qU3 Q13.S32
#define qU4 Q10.S32
#define qU5 Q14.S32
#define qU6 Q11.S32
#define qU7 Q15.S32
#define dVr0 D24.S32
#define dVi0 D25.S32
#define dVr2 D26.S32
#define dVi2 D27.S32
#define dVr4 D28.S32
#define dVi4 D29.S32
#define dVr6 D30.S32
#define dVi6 D31.S32
#define dVr1 D16.S32
#define dVi1 D17.S32
#define dVr3 D18.S32
#define dVi3 D19.S32
#define dVr5 D20.S32
#define dVi5 D21.S32
#define dVr7 D22.S32
#define dVi7 D23.S32
#define qV0 Q12.S32
#define qV1 Q8.S32
#define qV2 Q13.S32
#define qV3 Q9.S32
#define qV4 Q14.S32
#define qV5 Q10.S32
#define qV6 Q15.S32
#define qV7 Q11.S32
#define dYr0 D16.S32
#define dYi0 D17.S32
#define dYr2 D18.S32
#define dYi2 D19.S32
#define dYr4 D20.S32
#define dYi4 D21.S32
#define dYr6 D22.S32
#define dYi6 D23.S32
#define dYr1 D24.S32
#define dYi1 D25.S32
#define dYr3 D26.S32
#define dYi3 D27.S32
#define dYr5 D28.S32
#define dYi5 D29.S32
#define dYr7 D30.S32
#define dYi7 D31.S32
#define qY0 Q8.S32
#define qY1 Q12.S32
#define qY2 Q9.S32
#define qY3 Q13.S32
#define qY4 Q10.S32
#define qY5 Q14.S32
#define qY6 Q11.S32
#define qY7 Q15.S32
#define dT0 D14.S32
#define dT1 D15.S32
@// Define constants
.set ONEBYSQRT2, 0x5A82799A @// Q31 format
.MACRO FFTSTAGE scaled, inverse, name
@// Define stack arguments
@// Update pSubFFTSize and pSubFFTNum regs
MOV subFFTSize,#8 @// subFFTSize = 1 for the first stage
LDR t0,=ONEBYSQRT2 @// t0=(1/sqrt(2)) as Q31 value
@// Note: setCount = subFFTNum/8 (reuse the grpSize reg for setCount)
LSR grpSize,subFFTNum,#3
MOV subFFTNum,grpSize
@// pT0+1 increments pT0 by 8 bytes
@// pT0+pointStep = increment of 8*pointStep bytes = grpSize bytes
@// Note: outPointStep = pointStep for firststage
MOV pointStep,grpSize,LSL #3
@// Calculate the step of input data for the next set
@//MOV step1,pointStep,LSL #1 @// step1 = 2*pointStep
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
MOV step1,grpSize,LSL #4
MOV step2,pointStep,LSL #3
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
SUB step2,step2,pointStep @// step2 = 7*pointStep
RSB setStep,step2,#16 @// setStep = - 7*pointStep+16
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] & update pSrc for the next set
@// setStep = -7*pointStep + 16
@// grp = 0 a special case since all the twiddle factors are 1
@// Loop on the sets
grpZeroSetLoop\name :
@// Decrement setcount
SUBS setCount,setCount,#2 @// decrement the set loop counter
.ifeqs "\scaled", "TRUE"
@// finish first stage of 8 point FFT
VHADD qU0,qX0,qX4
VHADD qU2,qX1,qX5
VHADD qU4,qX2,qX6
VHADD qU6,qX3,qX7
@// finish second stage of 8 point FFT
VHADD qV0,qU0,qU4
VHSUB qV2,qU0,qU4
VHADD qV4,qU2,qU6
VHSUB qV6,qU2,qU6
@// finish third stage of 8 point FFT
VHADD qY0,qV0,qV4
VHSUB qY4,qV0,qV4
VST2 {dYr0,dYi0},[pDst :128],step1 @// store y0
.ifeqs "\inverse", "TRUE"
VHSUB dYr2,dVr2,dVi6
VHADD dYi2,dVi2,dVr6
VHADD dYr6,dVr2,dVi6
VST2 {dYr2,dYi2},[pDst :128],step1 @// store y2
VHSUB dYi6,dVi2,dVr6
VHSUB qU1,qX0,qX4
VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
VHSUB qU3,qX1,qX5
VHSUB qU5,qX2,qX6
VST2 {dYr6,dYi6},[pDst :128],step1 @// store y6
.ELSE
VHADD dYr6,dVr2,dVi6
VHSUB dYi6,dVi2,dVr6
VHSUB dYr2,dVr2,dVi6
VST2 {dYr6,dYi6},[pDst :128],step1 @// store y2
VHADD dYi2,dVi2,dVr6
VHSUB qU1,qX0,qX4
VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
VHSUB qU3,qX1,qX5
VHSUB qU5,qX2,qX6
VST2 {dYr2,dYi2},[pDst :128],step1 @// store y6
.ENDIF
@// finish first stage of 8 point FFT
VHSUB qU7,qX3,qX7
VMOV dT0[0],t0
@// finish second stage of 8 point FFT
VHSUB dVr1,dUr1,dUi5
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] for next iteration
VHADD dVi1,dUi1,dUr5
VHADD dVr3,dUr1,dUi5
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
VHSUB dVi3,dUi1,dUr5
VHSUB dVr5,dUr3,dUi7
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
VHADD dVi5,dUi3,dUr7
VHADD dVr7,dUr3,dUi7
VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
VHSUB dVi7,dUi3,dUr7
@// finish third stage of 8 point FFT
.ifeqs "\inverse", "TRUE"
@// calculate a*v5
VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
VQRDMULH dVi5,dVi5,dT0[0]
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
VSUB dVr5,dT1,dVi5 @// a * V5
VADD dVi5,dT1,dVi5
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
@// calculate b*v7
VQRDMULH dT1,dVr7,dT0[0]
VQRDMULH dVi7,dVi7,dT0[0]
VHADD qY1,qV1,qV5
VHSUB qY5,qV1,qV5
VADD dVr7,dT1,dVi7 @// b * V7
VSUB dVi7,dVi7,dT1
SUB pDst, pDst, step2 @// set pDst to y1
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
VHSUB dYr3,dVr3,dVr7
VHSUB dYi3,dVi3,dVi7
VST2 {dYr1,dYi1},[pDst :128],step1 @// store y1
VHADD dYr7,dVr3,dVr7
VHADD dYi7,dVi3,dVi7
VST2 {dYr3,dYi3},[pDst :128],step1 @// store y3
VST2 {dYr5,dYi5},[pDst :128],step1 @// store y5
VST2 {dYr7,dYi7},[pDst :128]! @// store y7
.ELSE
@// calculate b*v7
VQRDMULH dT1,dVr7,dT0[0]
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
VQRDMULH dVi7,dVi7,dT0[0]
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
VADD dVr7,dT1,dVi7 @// b * V7
VSUB dVi7,dVi7,dT1
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
@// calculate a*v5
VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
VQRDMULH dVi5,dVi5,dT0[0]
VHADD dYr7,dVr3,dVr7
VHADD dYi7,dVi3,dVi7
SUB pDst, pDst, step2 @// set pDst to y1
VSUB dVr5,dT1,dVi5 @// a * V5
VADD dVi5,dT1,dVi5
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
VHSUB qY5,qV1,qV5
VHSUB dYr3,dVr3,dVr7
VST2 {dYr7,dYi7},[pDst :128],step1 @// store y1
VHSUB dYi3,dVi3,dVi7
VHADD qY1,qV1,qV5
VST2 {dYr5,dYi5},[pDst :128],step1 @// store y3
VST2 {dYr3,dYi3},[pDst :128],step1 @// store y5
VST2 {dYr1,dYi1},[pDst :128]! @// store y7
.ENDIF
.ELSE
@// finish first stage of 8 point FFT
VADD qU0,qX0,qX4
VADD qU2,qX1,qX5
VADD qU4,qX2,qX6
VADD qU6,qX3,qX7
@// finish second stage of 8 point FFT
VADD qV0,qU0,qU4
VSUB qV2,qU0,qU4
VADD qV4,qU2,qU6
VSUB qV6,qU2,qU6
@// finish third stage of 8 point FFT
VADD qY0,qV0,qV4
VSUB qY4,qV0,qV4
VST2 {dYr0,dYi0},[pDst :128],step1 @// store y0
.ifeqs "\inverse", "TRUE"
VSUB dYr2,dVr2,dVi6
VADD dYi2,dVi2,dVr6
VADD dYr6,dVr2,dVi6
VST2 {dYr2,dYi2},[pDst :128],step1 @// store y2
VSUB dYi6,dVi2,dVr6
VSUB qU1,qX0,qX4
VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
VSUB qU3,qX1,qX5
VSUB qU5,qX2,qX6
VST2 {dYr6,dYi6},[pDst :128],step1 @// store y6
.ELSE
VADD dYr6,dVr2,dVi6
VSUB dYi6,dVi2,dVr6
VSUB dYr2,dVr2,dVi6
VST2 {dYr6,dYi6},[pDst :128],step1 @// store y2
VADD dYi2,dVi2,dVr6
VSUB qU1,qX0,qX4
VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
VSUB qU3,qX1,qX5
VSUB qU5,qX2,qX6
VST2 {dYr2,dYi2},[pDst :128],step1 @// store y6
.ENDIF
@// finish first stage of 8 point FFT
VSUB qU7,qX3,qX7
VMOV dT0[0],t0
@// finish second stage of 8 point FFT
VSUB dVr1,dUr1,dUi5
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] for next iteration
VADD dVi1,dUi1,dUr5
VADD dVr3,dUr1,dUi5
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
VSUB dVi3,dUi1,dUr5
VSUB dVr5,dUr3,dUi7
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
VADD dVi5,dUi3,dUr7
VADD dVr7,dUr3,dUi7
VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
VSUB dVi7,dUi3,dUr7
@// finish third stage of 8 point FFT
.ifeqs "\inverse", "TRUE"
@// calculate a*v5
VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
VQRDMULH dVi5,dVi5,dT0[0]
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
VSUB dVr5,dT1,dVi5 @// a * V5
VADD dVi5,dT1,dVi5
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
@// calculate b*v7
VQRDMULH dT1,dVr7,dT0[0]
VQRDMULH dVi7,dVi7,dT0[0]
VADD qY1,qV1,qV5
VSUB qY5,qV1,qV5
VADD dVr7,dT1,dVi7 @// b * V7
VSUB dVi7,dVi7,dT1
SUB pDst, pDst, step2 @// set pDst to y1
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
VSUB dYr3,dVr3,dVr7
VSUB dYi3,dVi3,dVi7
VST2 {dYr1,dYi1},[pDst :128],step1 @// store y1
VADD dYr7,dVr3,dVr7
VADD dYi7,dVi3,dVi7
VST2 {dYr3,dYi3},[pDst :128],step1 @// store y3
VST2 {dYr5,dYi5},[pDst :128],step1 @// store y5
VST2 {dYr7,dYi7},[pDst :128]! @// store y7
.ELSE
@// calculate b*v7
VQRDMULH dT1,dVr7,dT0[0]
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
VQRDMULH dVi7,dVi7,dT0[0]
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
VADD dVr7,dT1,dVi7 @// b * V7
VSUB dVi7,dVi7,dT1
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
@// calculate a*v5
VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
VQRDMULH dVi5,dVi5,dT0[0]
VADD dYr7,dVr3,dVr7
VADD dYi7,dVi3,dVi7
SUB pDst, pDst, step2 @// set pDst to y1
VSUB dVr5,dT1,dVi5 @// a * V5
VADD dVi5,dT1,dVi5
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
VSUB qY5,qV1,qV5
VSUB dYr3,dVr3,dVr7
VST2 {dYr7,dYi7},[pDst :128],step1 @// store y1
VSUB dYi3,dVi3,dVi7
VADD qY1,qV1,qV5
VST2 {dYr5,dYi5},[pDst :128],step1 @// store y3
VST2 {dYr3,dYi3},[pDst :128],step1 @// store y5
VST2 {dYr1,dYi1},[pDst :128]! @// store y7
.ENDIF
.ENDIF
SUB pDst, pDst, step2 @// update pDst for the next set
BGT grpZeroSetLoop\name
@// reset pSrc to pDst for the next stage
SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize
MOV pDst,pPingPongBuf
.endm
@// Allocate stack memory required by the function
M_START armSP_FFTFwd_CToC_SC32_Radix8_fs_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","FALSE",FWD
M_END
M_START armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe,r4
FFTSTAGE "FALSE","TRUE",INV
M_END
M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","FALSE",FWDSFS
M_END
M_START armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe,r4
FFTSTAGE "TRUE","TRUE",INVSFS
M_END
.end

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,556 +0,0 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*
* This file was originally licensed as follows. It has been
* relicensed with permission from the copyright holders.
*/
/**
*
* File Name: armSP_FFT_S32TwiddleTable.c
* OpenMAX DL: v1.0.2
* Last Modified Revision: 6781
* Last Modified Date: Wed, 25 Jul 2007
*
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
*
*
*
* Description:
* Twiddle table for Forward FFT in Q31 format.
* It contains complex pairs [-cos (W * i), -sin (W * i)] where W = -2*PI/N
* and 0<= i<= N/8. N is the max size of the FFT. Here N = 2^12.
* Values for N/8 < i < N are generated in the FFTInit function using the
* symmetries of cos and sine.
*
* NOTE: The values are stored negated. This is to represent '1' which cannot be otherwise
* represented as Q31 in 32 bits.
**/
#include "dl/api/omxtypes.h"
const OMX_S32 armSP_FFT_S32TwiddleTable[1026] ={
0x80000000, 0x0,
0x800009df, 0x3243f5,
0x8000277a, 0x6487e3,
0x800058d4, 0x96cbc1,
0x80009dea, 0xc90f88,
0x8000f6bd, 0xfb5330,
0x8001634e, 0x12d96b1,
0x8001e39b, 0x15fda03,
0x800277a6, 0x1921d20,
0x80031f6d, 0x1c45ffe,
0x8003daf1, 0x1f6a297,
0x8004aa32, 0x228e4e2,
0x80058d2f, 0x25b26d7,
0x800683e8, 0x28d6870,
0x80078e5e, 0x2bfa9a4,
0x8008ac90, 0x2f1ea6c,
0x8009de7e, 0x3242abf,
0x800b2427, 0x3566a96,
0x800c7d8c, 0x388a9ea,
0x800deaad, 0x3bae8b2,
0x800f6b88, 0x3ed26e6,
0x8011001f, 0x41f6480,
0x8012a86f, 0x451a177,
0x8014647b, 0x483ddc3,
0x80163440, 0x4b6195d,
0x801817bf, 0x4e8543e,
0x801a0ef8, 0x51a8e5c,
0x801c19ea, 0x54cc7b1,
0x801e3895, 0x57f0035,
0x80206af8, 0x5b137df,
0x8022b114, 0x5e36ea9,
0x80250ae7, 0x615a48b,
0x80277872, 0x647d97c,
0x8029f9b4, 0x67a0d76,
0x802c8ead, 0x6ac406f,
0x802f375d, 0x6de7262,
0x8031f3c2, 0x710a345,
0x8034c3dd, 0x742d311,
0x8037a7ac, 0x77501be,
0x803a9f31, 0x7a72f45,
0x803daa6a, 0x7d95b9e,
0x8040c956, 0x80b86c2,
0x8043fbf6, 0x83db0a7,
0x80474248, 0x86fd947,
0x804a9c4d, 0x8a2009a,
0x804e0a04, 0x8d42699,
0x80518b6b, 0x9064b3a,
0x80552084, 0x9386e78,
0x8058c94c, 0x96a9049,
0x805c85c4, 0x99cb0a7,
0x806055eb, 0x9cecf89,
0x806439c0, 0xa00ece8,
0x80683143, 0xa3308bd,
0x806c3c74, 0xa6522fe,
0x80705b50, 0xa973ba5,
0x80748dd9, 0xac952aa,
0x8078d40d, 0xafb6805,
0x807d2dec, 0xb2d7baf,
0x80819b74, 0xb5f8d9f,
0x80861ca6, 0xb919dcf,
0x808ab180, 0xbc3ac35,
0x808f5a02, 0xbf5b8cb,
0x8094162c, 0xc27c389,
0x8098e5fb, 0xc59cc68,
0x809dc971, 0xc8bd35e,
0x80a2c08b, 0xcbdd865,
0x80a7cb49, 0xcefdb76,
0x80ace9ab, 0xd21dc87,
0x80b21baf, 0xd53db92,
0x80b76156, 0xd85d88f,
0x80bcba9d, 0xdb7d376,
0x80c22784, 0xde9cc40,
0x80c7a80a, 0xe1bc2e4,
0x80cd3c2f, 0xe4db75b,
0x80d2e3f2, 0xe7fa99e,
0x80d89f51, 0xeb199a4,
0x80de6e4c, 0xee38766,
0x80e450e2, 0xf1572dc,
0x80ea4712, 0xf475bff,
0x80f050db, 0xf7942c7,
0x80f66e3c, 0xfab272b,
0x80fc9f35, 0xfdd0926,
0x8102e3c4, 0x100ee8ad,
0x81093be8, 0x1040c5bb,
0x810fa7a0, 0x1072a048,
0x811626ec, 0x10a4784b,
0x811cb9ca, 0x10d64dbd,
0x8123603a, 0x11082096,
0x812a1a3a, 0x1139f0cf,
0x8130e7c9, 0x116bbe60,
0x8137c8e6, 0x119d8941,
0x813ebd90, 0x11cf516a,
0x8145c5c7, 0x120116d5,
0x814ce188, 0x1232d979,
0x815410d4, 0x1264994e,
0x815b53a8, 0x1296564d,
0x8162aa04, 0x12c8106f,
0x816a13e6, 0x12f9c7aa,
0x8171914e, 0x132b7bf9,
0x8179223a, 0x135d2d53,
0x8180c6a9, 0x138edbb1,
0x81887e9a, 0x13c0870a,
0x81904a0c, 0x13f22f58,
0x819828fd, 0x1423d492,
0x81a01b6d, 0x145576b1,
0x81a82159, 0x148715ae,
0x81b03ac2, 0x14b8b17f,
0x81b867a5, 0x14ea4a1f,
0x81c0a801, 0x151bdf86,
0x81c8fbd6, 0x154d71aa,
0x81d16321, 0x157f0086,
0x81d9dde1, 0x15b08c12,
0x81e26c16, 0x15e21445,
0x81eb0dbe, 0x16139918,
0x81f3c2d7, 0x16451a83,
0x81fc8b60, 0x1676987f,
0x82056758, 0x16a81305,
0x820e56be, 0x16d98a0c,
0x82175990, 0x170afd8d,
0x82206fcc, 0x173c6d80,
0x82299971, 0x176dd9de,
0x8232d67f, 0x179f429f,
0x823c26f3, 0x17d0a7bc,
0x82458acc, 0x1802092c,
0x824f0208, 0x183366e9,
0x82588ca7, 0x1864c0ea,
0x82622aa6, 0x18961728,
0x826bdc04, 0x18c7699b,
0x8275a0c0, 0x18f8b83c,
0x827f78d8, 0x192a0304,
0x8289644b, 0x195b49ea,
0x82936317, 0x198c8ce7,
0x829d753a, 0x19bdcbf3,
0x82a79ab3, 0x19ef0707,
0x82b1d381, 0x1a203e1b,
0x82bc1fa2, 0x1a517128,
0x82c67f14, 0x1a82a026,
0x82d0f1d5, 0x1ab3cb0d,
0x82db77e5, 0x1ae4f1d6,
0x82e61141, 0x1b161479,
0x82f0bde8, 0x1b4732ef,
0x82fb7dd8, 0x1b784d30,
0x83065110, 0x1ba96335,
0x8311378d, 0x1bda74f6,
0x831c314e, 0x1c0b826a,
0x83273e52, 0x1c3c8b8c,
0x83325e97, 0x1c6d9053,
0x833d921b, 0x1c9e90b8,
0x8348d8dc, 0x1ccf8cb3,
0x835432d8, 0x1d00843d,
0x835fa00f, 0x1d31774d,
0x836b207d, 0x1d6265dd,
0x8376b422, 0x1d934fe5,
0x83825afb, 0x1dc4355e,
0x838e1507, 0x1df5163f,
0x8399e244, 0x1e25f282,
0x83a5c2b0, 0x1e56ca1e,
0x83b1b649, 0x1e879d0d,
0x83bdbd0e, 0x1eb86b46,
0x83c9d6fc, 0x1ee934c3,
0x83d60412, 0x1f19f97b,
0x83e2444d, 0x1f4ab968,
0x83ee97ad, 0x1f7b7481,
0x83fafe2e, 0x1fac2abf,
0x840777d0, 0x1fdcdc1b,
0x84140490, 0x200d888d,
0x8420a46c, 0x203e300d,
0x842d5762, 0x206ed295,
0x843a1d70, 0x209f701c,
0x8446f695, 0x20d0089c,
0x8453e2cf, 0x21009c0c,
0x8460e21a, 0x21312a65,
0x846df477, 0x2161b3a0,
0x847b19e1, 0x219237b5,
0x84885258, 0x21c2b69c,
0x84959dd9, 0x21f3304f,
0x84a2fc62, 0x2223a4c5,
0x84b06df2, 0x225413f8,
0x84bdf286, 0x22847de0,
0x84cb8a1b, 0x22b4e274,
0x84d934b1, 0x22e541af,
0x84e6f244, 0x23159b88,
0x84f4c2d4, 0x2345eff8,
0x8502a65c, 0x23763ef7,
0x85109cdd, 0x23a6887f,
0x851ea652, 0x23d6cc87,
0x852cc2bb, 0x24070b08,
0x853af214, 0x243743fa,
0x8549345c, 0x24677758,
0x85578991, 0x2497a517,
0x8565f1b0, 0x24c7cd33,
0x85746cb8, 0x24f7efa2,
0x8582faa5, 0x25280c5e,
0x85919b76, 0x2558235f,
0x85a04f28, 0x2588349d,
0x85af15b9, 0x25b84012,
0x85bdef28, 0x25e845b6,
0x85ccdb70, 0x26184581,
0x85dbda91, 0x26483f6c,
0x85eaec88, 0x26783370,
0x85fa1153, 0x26a82186,
0x860948ef, 0x26d809a5,
0x86189359, 0x2707ebc7,
0x8627f091, 0x2737c7e3,
0x86376092, 0x27679df4,
0x8646e35c, 0x27976df1,
0x865678eb, 0x27c737d3,
0x8666213c, 0x27f6fb92,
0x8675dc4f, 0x2826b928,
0x8685aa20, 0x2856708d,
0x86958aac, 0x288621b9,
0x86a57df2, 0x28b5cca5,
0x86b583ee, 0x28e5714b,
0x86c59c9f, 0x29150fa1,
0x86d5c802, 0x2944a7a2,
0x86e60614, 0x29743946,
0x86f656d3, 0x29a3c485,
0x8706ba3d, 0x29d34958,
0x8717304e, 0x2a02c7b8,
0x8727b905, 0x2a323f9e,
0x8738545e, 0x2a61b101,
0x87490258, 0x2a911bdc,
0x8759c2ef, 0x2ac08026,
0x876a9621, 0x2aefddd8,
0x877b7bec, 0x2b1f34eb,
0x878c744d, 0x2b4e8558,
0x879d7f41, 0x2b7dcf17,
0x87ae9cc5, 0x2bad1221,
0x87bfccd7, 0x2bdc4e6f,
0x87d10f75, 0x2c0b83fa,
0x87e2649b, 0x2c3ab2b9,
0x87f3cc48, 0x2c69daa6,
0x88054677, 0x2c98fbba,
0x8816d327, 0x2cc815ee,
0x88287256, 0x2cf72939,
0x883a23ff, 0x2d263596,
0x884be821, 0x2d553afc,
0x885dbeb8, 0x2d843964,
0x886fa7c2, 0x2db330c7,
0x8881a33d, 0x2de2211e,
0x8893b125, 0x2e110a62,
0x88a5d177, 0x2e3fec8b,
0x88b80432, 0x2e6ec792,
0x88ca4951, 0x2e9d9b70,
0x88dca0d3, 0x2ecc681e,
0x88ef0ab4, 0x2efb2d95,
0x890186f2, 0x2f29ebcc,
0x89141589, 0x2f58a2be,
0x8926b677, 0x2f875262,
0x893969b9, 0x2fb5fab2,
0x894c2f4c, 0x2fe49ba7,
0x895f072e, 0x30133539,
0x8971f15a, 0x3041c761,
0x8984edcf, 0x30705217,
0x8997fc8a, 0x309ed556,
0x89ab1d87, 0x30cd5115,
0x89be50c3, 0x30fbc54d,
0x89d1963c, 0x312a31f8,
0x89e4edef, 0x3158970e,
0x89f857d8, 0x3186f487,
0x8a0bd3f5, 0x31b54a5e,
0x8a1f6243, 0x31e39889,
0x8a3302be, 0x3211df04,
0x8a46b564, 0x32401dc6,
0x8a5a7a31, 0x326e54c7,
0x8a6e5123, 0x329c8402,
0x8a823a36, 0x32caab6f,
0x8a963567, 0x32f8cb07,
0x8aaa42b4, 0x3326e2c3,
0x8abe6219, 0x3354f29b,
0x8ad29394, 0x3382fa88,
0x8ae6d720, 0x33b0fa84,
0x8afb2cbb, 0x33def287,
0x8b0f9462, 0x340ce28b,
0x8b240e11, 0x343aca87,
0x8b3899c6, 0x3468aa76,
0x8b4d377c, 0x34968250,
0x8b61e733, 0x34c4520d,
0x8b76a8e4, 0x34f219a8,
0x8b8b7c8f, 0x351fd918,
0x8ba0622f, 0x354d9057,
0x8bb559c1, 0x357b3f5d,
0x8bca6343, 0x35a8e625,
0x8bdf7eb0, 0x35d684a6,
0x8bf4ac05, 0x36041ad9,
0x8c09eb40, 0x3631a8b8,
0x8c1f3c5d, 0x365f2e3b,
0x8c349f58, 0x368cab5c,
0x8c4a142f, 0x36ba2014,
0x8c5f9ade, 0x36e78c5b,
0x8c753362, 0x3714f02a,
0x8c8addb7, 0x37424b7b,
0x8ca099da, 0x376f9e46,
0x8cb667c8, 0x379ce885,
0x8ccc477d, 0x37ca2a30,
0x8ce238f6, 0x37f76341,
0x8cf83c30, 0x382493b0,
0x8d0e5127, 0x3851bb77,
0x8d2477d8, 0x387eda8e,
0x8d3ab03f, 0x38abf0ef,
0x8d50fa59, 0x38d8fe93,
0x8d675623, 0x39060373,
0x8d7dc399, 0x3932ff87,
0x8d9442b8, 0x395ff2c9,
0x8daad37b, 0x398cdd32,
0x8dc175e0, 0x39b9bebc,
0x8dd829e4, 0x39e6975e,
0x8deeef82, 0x3a136712,
0x8e05c6b7, 0x3a402dd2,
0x8e1caf80, 0x3a6ceb96,
0x8e33a9da, 0x3a99a057,
0x8e4ab5bf, 0x3ac64c0f,
0x8e61d32e, 0x3af2eeb7,
0x8e790222, 0x3b1f8848,
0x8e904298, 0x3b4c18ba,
0x8ea7948c, 0x3b78a007,
0x8ebef7fb, 0x3ba51e29,
0x8ed66ce1, 0x3bd19318,
0x8eedf33b, 0x3bfdfecd,
0x8f058b04, 0x3c2a6142,
0x8f1d343a, 0x3c56ba70,
0x8f34eed8, 0x3c830a50,
0x8f4cbadb, 0x3caf50da,
0x8f649840, 0x3cdb8e09,
0x8f7c8701, 0x3d07c1d6,
0x8f94871d, 0x3d33ec39,
0x8fac988f, 0x3d600d2c,
0x8fc4bb53, 0x3d8c24a8,
0x8fdcef66, 0x3db832a6,
0x8ff534c4, 0x3de4371f,
0x900d8b69, 0x3e10320d,
0x9025f352, 0x3e3c2369,
0x903e6c7b, 0x3e680b2c,
0x9056f6df, 0x3e93e950,
0x906f927c, 0x3ebfbdcd,
0x90883f4d, 0x3eeb889c,
0x90a0fd4e, 0x3f1749b8,
0x90b9cc7d, 0x3f430119,
0x90d2acd4, 0x3f6eaeb8,
0x90eb9e50, 0x3f9a5290,
0x9104a0ee, 0x3fc5ec98,
0x911db4a9, 0x3ff17cca,
0x9136d97d, 0x401d0321,
0x91500f67, 0x40487f94,
0x91695663, 0x4073f21d,
0x9182ae6d, 0x409f5ab6,
0x919c1781, 0x40cab958,
0x91b5919a, 0x40f60dfb,
0x91cf1cb6, 0x4121589b,
0x91e8b8d0, 0x414c992f,
0x920265e4, 0x4177cfb1,
0x921c23ef, 0x41a2fc1a,
0x9235f2ec, 0x41ce1e65,
0x924fd2d7, 0x41f93689,
0x9269c3ac, 0x42244481,
0x9283c568, 0x424f4845,
0x929dd806, 0x427a41d0,
0x92b7fb82, 0x42a5311b,
0x92d22fd9, 0x42d0161e,
0x92ec7505, 0x42faf0d4,
0x9306cb04, 0x4325c135,
0x932131d1, 0x4350873c,
0x933ba968, 0x437b42e1,
0x935631c5, 0x43a5f41e,
0x9370cae4, 0x43d09aed,
0x938b74c1, 0x43fb3746,
0x93a62f57, 0x4425c923,
0x93c0faa3, 0x4450507e,
0x93dbd6a0, 0x447acd50,
0x93f6c34a, 0x44a53f93,
0x9411c09e, 0x44cfa740,
0x942cce96, 0x44fa0450,
0x9447ed2f, 0x452456bd,
0x94631c65, 0x454e9e80,
0x947e5c33, 0x4578db93,
0x9499ac95, 0x45a30df0,
0x94b50d87, 0x45cd358f,
0x94d07f05, 0x45f7526b,
0x94ec010b, 0x4621647d,
0x95079394, 0x464b6bbe,
0x9523369c, 0x46756828,
0x953eea1e, 0x469f59b4,
0x955aae17, 0x46c9405c,
0x95768283, 0x46f31c1a,
0x9592675c, 0x471cece7,
0x95ae5c9f, 0x4746b2bc,
0x95ca6247, 0x47706d93,
0x95e67850, 0x479a1d67,
0x96029eb6, 0x47c3c22f,
0x961ed574, 0x47ed5be6,
0x963b1c86, 0x4816ea86,
0x965773e7, 0x48406e08,
0x9673db94, 0x4869e665,
0x96905388, 0x48935397,
0x96acdbbe, 0x48bcb599,
0x96c97432, 0x48e60c62,
0x96e61ce0, 0x490f57ee,
0x9702d5c3, 0x49389836,
0x971f9ed7, 0x4961cd33,
0x973c7817, 0x498af6df,
0x9759617f, 0x49b41533,
0x97765b0a, 0x49dd282a,
0x979364b5, 0x4a062fbd,
0x97b07e7a, 0x4a2f2be6,
0x97cda855, 0x4a581c9e,
0x97eae242, 0x4a8101de,
0x98082c3b, 0x4aa9dba2,
0x9825863d, 0x4ad2a9e2,
0x9842f043, 0x4afb6c98,
0x98606a49, 0x4b2423be,
0x987df449, 0x4b4ccf4d,
0x989b8e40, 0x4b756f40,
0x98b93828, 0x4b9e0390,
0x98d6f1fe, 0x4bc68c36,
0x98f4bbbc, 0x4bef092d,
0x9912955f, 0x4c177a6e,
0x99307ee0, 0x4c3fdff4,
0x994e783d, 0x4c6839b7,
0x996c816f, 0x4c9087b1,
0x998a9a74, 0x4cb8c9dd,
0x99a8c345, 0x4ce10034,
0x99c6fbde, 0x4d092ab0,
0x99e5443b, 0x4d31494b,
0x9a039c57, 0x4d595bfe,
0x9a22042d, 0x4d8162c4,
0x9a407bb9, 0x4da95d96,
0x9a5f02f5, 0x4dd14c6e,
0x9a7d99de, 0x4df92f46,
0x9a9c406e, 0x4e210617,
0x9abaf6a1, 0x4e48d0dd,
0x9ad9bc71, 0x4e708f8f,
0x9af891db, 0x4e984229,
0x9b1776da, 0x4ebfe8a5,
0x9b366b68, 0x4ee782fb,
0x9b556f81, 0x4f0f1126,
0x9b748320, 0x4f369320,
0x9b93a641, 0x4f5e08e3,
0x9bb2d8de, 0x4f857269,
0x9bd21af3, 0x4faccfab,
0x9bf16c7a, 0x4fd420a4,
0x9c10cd70, 0x4ffb654d,
0x9c303dcf, 0x50229da1,
0x9c4fbd93, 0x5049c999,
0x9c6f4cb6, 0x5070e92f,
0x9c8eeb34, 0x5097fc5e,
0x9cae9907, 0x50bf031f,
0x9cce562c, 0x50e5fd6d,
0x9cee229c, 0x510ceb40,
0x9d0dfe54, 0x5133cc94,
0x9d2de94d, 0x515aa162,
0x9d4de385, 0x518169a5,
0x9d6decf4, 0x51a82555,
0x9d8e0597, 0x51ced46e,
0x9dae2d68, 0x51f576ea,
0x9dce6463, 0x521c0cc2,
0x9deeaa82, 0x524295f0,
0x9e0effc1, 0x5269126e,
0x9e2f641b, 0x528f8238,
0x9e4fd78a, 0x52b5e546,
0x9e705a09, 0x52dc3b92,
0x9e90eb94, 0x53028518,
0x9eb18c26, 0x5328c1d0,
0x9ed23bb9, 0x534ef1b5,
0x9ef2fa49, 0x537514c2,
0x9f13c7d0, 0x539b2af0,
0x9f34a449, 0x53c13439,
0x9f558fb0, 0x53e73097,
0x9f7689ff, 0x540d2005,
0x9f979331, 0x5433027d,
0x9fb8ab41, 0x5458d7f9,
0x9fd9d22a, 0x547ea073,
0x9ffb07e7, 0x54a45be6,
0xa01c4c73, 0x54ca0a4b,
0xa03d9fc8, 0x54efab9c,
0xa05f01e1, 0x55153fd4,
0xa08072ba, 0x553ac6ee,
0xa0a1f24d, 0x556040e2,
0xa0c38095, 0x5585adad,
0xa0e51d8c, 0x55ab0d46,
0xa106c92f, 0x55d05faa,
0xa1288376, 0x55f5a4d2,
0xa14a4c5e, 0x561adcb9,
0xa16c23e1, 0x56400758,
0xa18e09fa, 0x566524aa,
0xa1affea3, 0x568a34a9,
0xa1d201d7, 0x56af3750,
0xa1f41392, 0x56d42c99,
0xa21633cd, 0x56f9147e,
0xa2386284, 0x571deefa,
0xa25a9fb1, 0x5742bc06,
0xa27ceb4f, 0x57677b9d,
0xa29f4559, 0x578c2dba,
0xa2c1adc9, 0x57b0d256,
0xa2e4249b, 0x57d5696d,
0xa306a9c8, 0x57f9f2f8,
0xa3293d4b, 0x581e6ef1,
0xa34bdf20, 0x5842dd54,
0xa36e8f41, 0x58673e1b,
0xa3914da8, 0x588b9140,
0xa3b41a50, 0x58afd6bd,
0xa3d6f534, 0x58d40e8c,
0xa3f9de4e, 0x58f838a9,
0xa41cd599, 0x591c550e,
0xa43fdb10, 0x594063b5,
0xa462eeac, 0x59646498,
0xa486106a, 0x598857b2,
0xa4a94043, 0x59ac3cfd,
0xa4cc7e32, 0x59d01475,
0xa4efca31, 0x59f3de12,
0xa513243b, 0x5a1799d1,
0xa5368c4b, 0x5a3b47ab,
0xa55a025b, 0x5a5ee79a,
0xa57d8666, 0x5a82799a
};
/*End of File*/

Просмотреть файл

@ -1,192 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This is a modification of omxSP_FFTFwd_CToC_SC32_Sfs_s.s
@// to support float instead of SC32.
@//
@//
@// Description:
@// Compute an inverse FFT for a complex signal
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
.extern armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace_unsafe
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@// Import symbols required from other files
@// (For example tables)
.extern armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_FC32_Sfs_Radix2_ls_OutOfPlace_unsafe
@//Input Registers
#define pSrc r0
#define pDst r1
#define pFFTSpec r2
@// Output registers
#define result r0
@//Local Scratch Registers
#define argTwiddle r1
#define argDst r2
#define argScale r4
#define tmpOrder r4
#define pTwiddle r4
#define pOut r5
#define subFFTSize r7
#define subFFTNum r6
#define N r6
#define order r14
#define diff r9
@// Total num of radix stages required to comple the FFT
#define count r8
#define x0r r4
#define x0i r5
#define diffMinusOne r2
@// Neon registers
#define dX0 D0.F32
@// Allocate stack memory required by the function
@// Write function header
M_START omxSP_FFTFwd_CToC_FC32_Sfs,r11,d15
@ Structure offsets for the FFTSpec
.set ARMsFFTSpec_N, 0
.set ARMsFFTSpec_pBitRev, 4
.set ARMsFFTSpec_pTwiddle, 8
.set ARMsFFTSpec_pBuf, 12
@// Define stack arguments
@// Read the size from structure and take log
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
@// Read other structure parameters
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
CLZ order,N @// N = 2^order
RSB order,order,#31
MOV subFFTSize,#1
@//MOV subFFTNum,N
CMP order,#3
BGT orderGreaterthan3 @// order > 3
CMP order,#1
BGE orderGreaterthan0 @// order > 0
VLD1 dX0,[pSrc]
VST1 dX0,[pDst]
MOV pSrc,pDst
BLT FFTEnd
orderGreaterthan0:
@// set the buffers appropriately for various orders
CMP order,#2
MOVNE argDst,pDst
MOVEQ argDst,pOut
@// Pass the first stage destination in RN5
MOVEQ pOut,pDst
MOV argTwiddle,pTwiddle
CMP order,#1
BGT orderGreaterthan1
@// order = 1
BL armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
B FFTEnd
orderGreaterthan1:
CMP order,#2
BGT orderGreaterthan2
@// order = 2
BL armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
BL armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
B FFTEnd
orderGreaterthan2: @// order =3
BL armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
BL armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace_unsafe
BL armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
B FFTEnd
orderGreaterthan3:
@// Set input args to fft stages
TST order, #2
MOVNE argDst,pDst
MOVEQ argDst,pOut
@// Pass the first stage destination in RN5
MOVEQ pOut,pDst
MOV argTwiddle,pTwiddle
@//check for even or odd order
@// NOTE: The following combination of BL's would work fine even though
@// the first BL would corrupt the flags. This is because the end of
@// the "grpZeroSetLoop" loop inside
@// armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag
@// to EQ
TST order,#0x00000001
BLEQ armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
BLNE armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
CMP subFFTNum,#4
BLT FFTEnd
unscaledRadix4Loop:
BEQ lastStageUnscaledRadix4
BL armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe
CMP subFFTNum,#4
B unscaledRadix4Loop
lastStageUnscaledRadix4:
BL armSP_FFTFwd_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
B FFTEnd
FFTEnd:
@// Set return value
MOV result, #OMX_Sts_NoErr
@// Write function tail
M_END
.end

Просмотреть файл

@ -1,356 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: omxSP_FFTFwd_CToC_SC16_Sfs_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 6729
@// Last Modified Date: Tue, 17 Jul 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute an inverse FFT for a complex signal
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC16_Radix4_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC16_Radix2_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
.extern armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
@//Input Registers
#define pSrc r0
#define pDst r1
#define pFFTSpec r2
#define scale r3
@// Output registers
#define result r0
@//Local Scratch Registers
#define argTwiddle r1
#define argDst r2
#define argScale r4
#define pTwiddle r4
#define tmpOrder r4
#define pOut r5
#define subFFTSize r7
#define subFFTNum r6
#define N r6
#define order r14
#define diff r9
@// Total num of radix stages required to comple the FFT
#define count r8
#define x0r r4
#define x0i r5
#define diffMinusOne r2
#define round r3
@// Neon registers
#define dX0 D0.S16
#define dShift D1.S16
#define dX0S32 D0.S32
@// Allocate stack memory required by the function
M_ALLOC4 diffOnStack, 4
@// Write function header
M_START omxSP_FFTFwd_CToC_SC16_Sfs,r11,d15
@ Structure offsets for the FFTSpec
.set ARMsFFTSpec_N, 0
.set ARMsFFTSpec_pBitRev, 4
.set ARMsFFTSpec_pTwiddle, 8
.set ARMsFFTSpec_pBuf, 12
@// Define stack arguments
@// Read the size from structure and take log
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
@// Read other structure parameters
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
CLZ order,N @// N = 2^order
RSB order,order,#31
MOV subFFTSize,#1
@//MOV subFFTNum,N
CMP order,#3
BGT orderGreaterthan3 @// order > 3
CMP order,#1
BGE orderGreaterthan0 @// order > 0
M_STR scale, diffOnStack,LT @// order = 0
LDRLT x0r,[pSrc]
STRLT x0r,[pDst]
MOVLT pSrc,pDst
BLT FFTEnd
orderGreaterthan0:
@// set the buffers appropriately for various orders
CMP order,#2
MOVNE argDst,pDst
MOVEQ argDst,pOut
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
MOV argTwiddle,pTwiddle
SUBS diff,scale,order
M_STR diff,diffOnStack
MOVGT scale,order
@// Now scale <= order
CMP order,#1
BGT orderGreaterthan1
SUBS scale,scale,#1
BLEQ armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @// order = 1
BLLT armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe @// order = 1
B FFTEnd
orderGreaterthan1:
CMP order,#2
MOV argScale,scale
BGT orderGreaterthan2
SUBS argScale,argScale,#1
BLGE armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @// order =2
BLLT armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
SUBS argScale,argScale,#1
BLEQ armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
BLLT armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
B FFTEnd
orderGreaterthan2: @// order =3
SUBS argScale,argScale,#1
BLGE armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
BLLT armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
SUBS argScale,argScale,#1
BLGE armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
BLLT armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
SUBS argScale,argScale,#1
BLEQ armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
BLLT armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
B FFTEnd
orderGreaterthan3:
@// check scale = 0 or scale = order
SUBS diff, scale, order @// scale > order
MOVGT scale,order
BGE specialScaleCase @// scale = 0 or scale = order
CMP scale,#0
BEQ specialScaleCase
B generalScaleCase
specialScaleCase: @// scale = 0 or scale = order and order > 3
TST order, #2 @// Set input args to fft stages
MOVNE argDst,pDst
MOVEQ argDst,pOut
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
MOV argTwiddle,pTwiddle
CMP diff,#0
M_STR diff, diffOnStack
BGE scaleEqualsOrder
@//check for even or odd order
@// NOTE: The following combination of BL's would work fine eventhough the first
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
@// armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
TST order,#0x00000001
BLEQ armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
BLNE armSP_FFTFwd_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
CMP subFFTNum,#4
BLT FFTEnd
unscaledRadix4Loop:
BEQ lastStageUnscaledRadix4
BL armSP_FFTFwd_CToC_SC16_Radix4_OutOfPlace_unsafe
CMP subFFTNum,#4
B unscaledRadix4Loop
lastStageUnscaledRadix4:
BL armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
B FFTEnd
scaleEqualsOrder:
@//check for even or odd order
@// NOTE: The following combination of BL's would work fine eventhough the first
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
@// armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
TST order,#0x00000001
BLEQ armSP_FFTFwd_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
BLNE armSP_FFTFwd_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
CMP subFFTNum,#4
BLT FFTEnd
scaledRadix4Loop:
BEQ lastStageScaledRadix4
BL armSP_FFTFwd_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
CMP subFFTNum,#4
B scaledRadix4Loop
lastStageScaledRadix4:
BL armSP_FFTFwd_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
B FFTEnd
generalScaleCase: @// 0 < scale < order and order > 3
@// Determine the correct destination buffer
SUB diff,order,scale
TST diff,#0x01
ADDEQ count,scale,diff,LSR #1 @// count = scale + (order - scale)/2
MOVNE count,order
TST count,#0x01 @// Is count even or odd ?
MOVNE argDst,pDst @// Set input args to fft stages
MOVEQ argDst,pOut
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
MOV argTwiddle,pTwiddle
CMP diff,#1
M_STR diff, diffOnStack
BEQ scaleps @// scaling including a radix2_ps stage
MOV argScale,scale @// Put scale in RN4 so as to save and restore
BL armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage
SUBS argScale,argScale,#1
scaledRadix2Loop:
BLGT armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages
BGT scaledRadix2Loop
B outScale
scaleps:
SUB argScale,scale,#1 @// order>3 and diff=1 => scale >= 3
BL armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage
SUBS argScale,argScale,#1
scaledRadix2psLoop:
BEQ scaledRadix2psStage
BLGT armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages
BGE scaledRadix2psLoop
scaledRadix2psStage:
BL armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
B generalLastStageUnscaledRadix2
outScale:
M_LDR diff, diffOnStack
@//check for even or odd order
TST diff,#0x00000001
BEQ generalUnscaledRadix4Loop
B unscaledRadix2Loop
generalUnscaledRadix4Loop:
CMP subFFTNum,#4
BEQ generalLastStageUnscaledRadix4
BL armSP_FFTFwd_CToC_SC16_Radix4_OutOfPlace_unsafe
B generalUnscaledRadix4Loop
generalLastStageUnscaledRadix4:
BL armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
B End
unscaledRadix2Loop:
CMP subFFTNum,#4
BEQ generalLastTwoStagesUnscaledRadix2
BL armSP_FFTFwd_CToC_SC16_Radix2_OutOfPlace_unsafe
B unscaledRadix2Loop
generalLastTwoStagesUnscaledRadix2:
BL armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
generalLastStageUnscaledRadix2:
BL armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
B End
FFTEnd: @// Does only the scaling
M_LDR diff, diffOnStack
CMP diff,#0
BLE End
RSB diff,diff,#0 @// to use VRSHL for right shift by a variable
VDUP dShift,diff
scaleFFTData: @// N = subFFTSize ; dataptr = pDst ; scale = diff
VLD1 {dX0S32[0]},[pSrc] @// pSrc contains pDst pointer
SUBS subFFTSize,subFFTSize,#1
VRSHL dX0,dShift
VST1 {dX0S32[0]},[pSrc]!
BGT scaleFFTData
End:
@// Set return value
MOV result, #OMX_Sts_NoErr
@// Write function tail
M_END
.END

Просмотреть файл

@ -1,335 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: omxSP_FFTFwd_CToC_SC32_Sfs_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 6684
@// Last Modified Date: Mon, 09 Jul 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute an inverse FFT for a complex signal
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Radix4_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Radix2_OutOfPlace_unsafe
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@// Import symbols required from other files
@// (For example tables)
.extern armSP_FFTFwd_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
@//Input Registers
#define pSrc r0
#define pDst r1
#define pFFTSpec r2
#define scale r3
@// Output registers
#define result r0
@//Local Scratch Registers
#define argTwiddle r1
#define argDst r2
#define argScale r4
#define tmpOrder r4
#define pTwiddle r4
#define pOut r5
#define subFFTSize r7
#define subFFTNum r6
#define N r6
#define order r14
#define diff r9
@// Total num of radix stages required to comple the FFT
#define count r8
#define x0r r4
#define x0i r5
#define diffMinusOne r2
#define round r3
@// Neon registers
#define dX0 D0.S32
#define dShift D1.S32
@// Allocate stack memory required by the function
M_ALLOC4 diffOnStack, 4
@// Write function header
M_START omxSP_FFTFwd_CToC_SC32_Sfs,r11,d15
@ Structure offsets for the FFTSpec
.set ARMsFFTSpec_N, 0
.set ARMsFFTSpec_pBitRev, 4
.set ARMsFFTSpec_pTwiddle, 8
.set ARMsFFTSpec_pBuf, 12
@// Define stack arguments
@// Read the size from structure and take log
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
@// Read other structure parameters
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
CLZ order,N @// N = 2^order
RSB order,order,#31
MOV subFFTSize,#1
@//MOV subFFTNum,N
CMP order,#3
BGT orderGreaterthan3 @// order > 3
CMP order,#1
BGE orderGreaterthan0 @// order > 0
M_STR scale, diffOnStack,LT @// order = 0
VLD1 dX0,[pSrc]
VST1 dX0,[pDst]
MOV pSrc,pDst
BLT FFTEnd
orderGreaterthan0:
@// set the buffers appropriately for various orders
CMP order,#2
MOVNE argDst,pDst
MOVEQ argDst,pOut
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
MOV argTwiddle,pTwiddle
SUBS diff,scale,order
M_STR diff,diffOnStack
MOVGT scale,order
@// Now scale <= order
CMP order,#1
BGT orderGreaterthan1
SUBS scale,scale,#1
BLEQ armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// order = 1
BLLT armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe @// order = 1
B FFTEnd
orderGreaterthan1:
CMP order,#2
MOV argScale,scale
BGT orderGreaterthan2
SUBS argScale,argScale,#1
BLGE armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// order =2
BLLT armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
SUBS argScale,argScale,#1
BLEQ armSP_FFTFwd_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
BLLT armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
B FFTEnd
orderGreaterthan2: @// order =3
SUBS argScale,argScale,#1
BLGE armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe // "fs" means first stage
BLLT armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
SUBS argScale,argScale,#1
BLGE armSP_FFTFwd_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
BLLT armSP_FFTFwd_CToC_SC32_Radix2_OutOfPlace_unsafe
SUBS argScale,argScale,#1
BLEQ armSP_FFTFwd_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe // "ls" means last stage
BLLT armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
B FFTEnd
orderGreaterthan3:
@// check scale = 0 or scale = order
SUBS diff, scale, order @// scale > order
MOVGT scale,order
BGE specialScaleCase @// scale = 0 or scale = order
CMP scale,#0
BEQ specialScaleCase
B generalScaleCase
specialScaleCase: @// scale = 0 or scale = order and order >= 2
TST order, #2 @// Set input args to fft stages
MOVNE argDst,pDst
MOVEQ argDst,pOut
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
MOV argTwiddle,pTwiddle
CMP diff,#0
M_STR diff, diffOnStack
BGE scaleEqualsOrder
@//check for even or odd order
@// NOTE: The following combination of BL's would work fine eventhough the first
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
@// armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
TST order,#0x00000001
BLEQ armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
BLNE armSP_FFTFwd_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
CMP subFFTNum,#4
BLT FFTEnd
unscaledRadix4Loop:
BEQ lastStageUnscaledRadix4
BL armSP_FFTFwd_CToC_SC32_Radix4_OutOfPlace_unsafe
CMP subFFTNum,#4
B unscaledRadix4Loop
lastStageUnscaledRadix4:
BL armSP_FFTFwd_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
B FFTEnd
scaleEqualsOrder:
@//check for even or odd order
@// NOTE: The following combination of BL's would work fine eventhough the first
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
@// armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
TST order,#0x00000001
BLEQ armSP_FFTFwd_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
BLNE armSP_FFTFwd_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
CMP subFFTNum,#4
BLT FFTEnd
scaledRadix4Loop:
BEQ lastStageScaledRadix4
BL armSP_FFTFwd_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
CMP subFFTNum,#4
B scaledRadix4Loop
lastStageScaledRadix4:
BL armSP_FFTFwd_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
B FFTEnd
generalScaleCase: @// 0 < scale < order and order >= 2
@// Determine the correct destination buffer
SUB diff,order,scale
TST diff,#0x01
ADDEQ count,scale,diff,LSR #1 @// count = scale + (order - scale)/2
MOVNE count,order
TST count,#0x01 @// Is count even or odd ?
MOVNE argDst,pDst @// Set input args to fft stages
MOVEQ argDst,pOut
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
MOV argTwiddle,pTwiddle
M_STR diff, diffOnStack
MOV argScale,scale @// Put scale in RN4 so as to save and restore
BL armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage
SUBS argScale,argScale,#1
scaledRadix2Loop:
BLGT armSP_FFTFwd_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages
BGT scaledRadix2Loop
M_LDR diff, diffOnStack
@//check for even or odd order
TST diff,#0x00000001
BEQ generalUnscaledRadix4Loop
B unscaledRadix2Loop
generalUnscaledRadix4Loop:
CMP subFFTNum,#4
BEQ generalLastStageUnscaledRadix4
BL armSP_FFTFwd_CToC_SC32_Radix4_OutOfPlace_unsafe
B generalUnscaledRadix4Loop
generalLastStageUnscaledRadix4:
BL armSP_FFTFwd_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
B End
unscaledRadix2Loop:
CMP subFFTNum,#2
BEQ generalLastStageUnscaledRadix2
BL armSP_FFTFwd_CToC_SC32_Radix2_OutOfPlace_unsafe
B unscaledRadix2Loop
generalLastStageUnscaledRadix2:
BL armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
B End
FFTEnd: @// Does only the scaling
M_LDR diff, diffOnStack
CMP diff,#0
BLE End
RSB diff,diff,#0 @// to use VRSHL for right shift by a variable
VDUP dShift,diff
scaleFFTData: @// N = subFFTSize ; dataptr = pDst ; scale = diff
VLD1 {dX0},[pSrc] @// pSrc contains pDst pointer
SUBS subFFTSize,subFFTSize,#1
VRSHL dX0,dShift
VST1 {dX0},[pSrc]!
BGT scaleFFTData
End:
@// Set return value
MOV result, #OMX_Sts_NoErr
@// Write function tail
M_END
.end

Просмотреть файл

@ -1,406 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This is a modification of omxSP_FFTFwd_RToCCS_S32_Sfs_s.s
@// to support float instead of SC32.
@//
@//
@// Description:
@// Compute FFT for a real signal
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
.extern armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace_unsafe
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@// Import symbols required from other files
@// (For example tables)
.extern armSP_FFTFwd_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
@//Input Registers
#define pSrc r0
#define pDst r1
#define pFFTSpec r2
#define scale r3
@// Output registers
#define result r0
@//Local Scratch Registers
#define argTwiddle r1
#define argDst r2
#define argScale r4
#define tmpOrder r4
#define pTwiddle r4
#define pOut r5
#define subFFTSize r7
#define subFFTNum r6
#define N r6
#define order r14
#define diff r9
@// Total num of radix stages required to comple the FFT
#define count r8
#define x0r r4
#define x0i r5
#define diffMinusOne r2
#define subFFTSizeTmp r6
#define step r3
#define step1 r4
#define twStep r8
#define zero r9
#define pTwiddleTmp r5
#define t0 r10
@// Neon registers
#define dX0 d0.f32
#define dzero d1.f32
#define dZero d2.f32
#define dShift d3.f32
#define dX0r d2.f32
#define dX0i d3.f32
#define dX1r d4.f32
#define dX1i d5.f32
#define dT0 d6.f32
#define dT1 d7.f32
#define dT2 d8.f32
#define dT3 d9.f32
#define qT0 d10.f32
#define qT1 d12.f32
#define dW0r d14.f32
#define dW0i d15.f32
#define dW1r d16.f32
#define dW1i d17.f32
#define dY0r d14.f32
#define dY0i d15.f32
#define dY1r d16.f32
#define dY1i d17.f32
#define dY0rS64 d14.s64
#define dY0iS64 d15.s64
#define qT2 d18.f32
#define qT3 d20.f32
@// lastThreeelements
#define dX1 d3.f32
#define dW0 d4.f32
#define dW1 d5.f32
#define dY0 d10.f32
#define dY1 d11.f32
#define dY2 d12.f32
#define dY3 d13.f32
#define half d0.f32
HALF: .float 0.5
@// Allocate stack memory required by the function
@// Write function header
M_START omxSP_FFTFwd_RToCCS_F32_Sfs,r11,d15
@ Structure offsets for the FFTSpec
.set ARMsFFTSpec_N, 0
.set ARMsFFTSpec_pBitRev, 4
.set ARMsFFTSpec_pTwiddle, 8
.set ARMsFFTSpec_pBuf, 12
@// Define stack arguments
@// Read the size from structure and take log
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
@// Read other structure parameters
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
@// N=1 Treat seperately
CMP N,#1
BGT sizeGreaterThanOne
VLD1 dX0[0],[pSrc]
MOV zero,#0
VMOV dzero[0],zero
VMOV dZero[0],zero
VST3 {dX0[0],dzero[0],dZero[0]},[pDst]
B End
sizeGreaterThanOne:
@// Do a N/2 point complex FFT including the scaling
MOV N,N,ASR #1 @// N/2 point complex FFT
CLZ order,N @// N = 2^order
RSB order,order,#31
MOV subFFTSize,#1
@//MOV subFFTNum,N
CMP order,#3
BGT orderGreaterthan3 @// order > 3
CMP order,#1
BGE orderGreaterthan0 @// order > 0
VLD1 dX0,[pSrc]
VST1 dX0,[pOut]
MOV pSrc,pOut
MOV argDst,pDst
BLT FFTEnd
orderGreaterthan0:
@// set the buffers appropriately for various orders
CMP order,#2
MOVEQ argDst,pDst
MOVNE argDst,pOut
@// Pass the first stage destination in RN5
MOVNE pOut,pDst
MOV argTwiddle,pTwiddle
CMP order,#1
BGT orderGreaterthan1
@// order = 1
BL armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
B FFTEnd
orderGreaterthan1:
CMP order,#2
BGT orderGreaterthan2
@// order =2
BL armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
BL armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
B FFTEnd
orderGreaterthan2:@// order =3
BL armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
BL armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace_unsafe
BL armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
B FFTEnd
orderGreaterthan3:
specialScaleCase:
@// Set input args to fft stages
TST order, #2
MOVEQ argDst,pDst
MOVNE argDst,pOut
@// Pass the first stage destination in RN5
MOVNE pOut,pDst
MOV argTwiddle,pTwiddle
@//check for even or odd order
@// NOTE: The following combination of BL's would work fine even though
@// the first BL would corrupt the flags. This is because the end of
@// the "grpZeroSetLoop" loop inside
@// armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag
@// to EQ
TST order,#0x00000001
BLEQ armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
BLNE armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
CMP subFFTNum,#4
BLT FFTEnd
unscaledRadix4Loop:
BEQ lastStageUnscaledRadix4
BL armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe
CMP subFFTNum,#4
B unscaledRadix4Loop
lastStageUnscaledRadix4:
BL armSP_FFTFwd_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
B FFTEnd
FFTEnd:
finalComplexToRealFixup:
@// F(0) = 1/2[Z(0) + Z'(0)] - j [Z(0) - Z'(0)]
@// 1/2[(a+jb) + (a-jb)] - j [(a+jb) - (a-jb)]
@// 1/2[2a+j0] - j [0+j2b]
@// (a+b, 0)
@// F(N/2) = 1/2[Z(0) + Z'(0)] + j [Z(0) - Z'(0)]
@// 1/2[(a+jb) + (a-jb)] + j [(a+jb) - (a-jb)]
@// 1/2[2a+j0] + j [0+j2b]
@// (a-b, 0)
@// F(0) and F(N/2)
VLD2 {dX0r[0],dX0i[0]},[pSrc]!
MOV zero,#0
VMOV dX0r[1],zero
MOV step,subFFTSize,LSL #3 @// step = N/2 * 8 bytes
VMOV dX0i[1],zero
@// twStep = 3N/8 * 8 bytes pointing to W^1
SUB twStep,step,subFFTSize,LSL #1
VADD dY0r,dX0r,dX0i @// F(0) = ((Z0.r+Z0.i) , 0)
MOV step1,subFFTSize,LSL #2 @// step1 = N/2 * 4 bytes
VSUB dY0i,dX0r,dX0i @// F(N/2) = ((Z0.r-Z0.i) , 0)
SUBS subFFTSize,subFFTSize,#2
VST1 dY0r,[argDst],step
ADD pTwiddleTmp,argTwiddle,#8 @// W^2
VST1 dY0i,[argDst]!
ADD argTwiddle,argTwiddle,twStep @// W^1
VDUP dzero,zero
SUB argDst,argDst,step
BLT End
BEQ lastElement
SUB step,step,#24
SUB step1,step1,#8 @// (N/4-1)*8 bytes
@// F(k) = 1/2[Z(k) + Z'(N/2-k)] -j*W^(k) [Z(k) - Z'(N/2-k)]
@// Note: W^k is stored as negative values in the table
@// Process 4 elements at a time. E.g: F(1),F(2) and F(N/2-2),F(N/2-1)
@// since both of them require Z(1),Z(2) and Z(N/2-2),Z(N/2-1)
LDR t0, =HALF
VLD1 half[0], [t0]
evenOddButterflyLoop:
VLD1 dW0r,[argTwiddle],step1
VLD1 dW1r,[argTwiddle]!
VLD2 {dX0r,dX0i},[pSrc],step
SUB argTwiddle,argTwiddle,step1
VLD2 {dX1r,dX1i},[pSrc]!
SUB step1,step1,#8 @// (N/4-2)*8 bytes
VLD1 dW0i,[pTwiddleTmp],step1
VLD1 dW1i,[pTwiddleTmp]!
SUB pSrc,pSrc,step
SUB pTwiddleTmp,pTwiddleTmp,step1
VREV64 dX1r,dX1r
VREV64 dX1i,dX1i
SUBS subFFTSize,subFFTSize,#4
VSUB dT2,dX0r,dX1r @// a-c
SUB step1,step1,#8
VADD dT0,dX0r,dX1r @// a+c
VSUB dT1,dX0i,dX1i @// b-d
VADD dT3,dX0i,dX1i @// b+d
VMUL dT0,dT0,half[0]
VMUL dT1,dT1,half[0]
VZIP dW1r,dW1i
VZIP dW0r,dW0i
VMUL qT0,dW1r,dT2
VMUL qT1,dW1r,dT3
VMUL qT2,dW0r,dT2
VMUL qT3,dW0r,dT3
VMLA qT0,dW1i,dT3
VMLS qT1,dW1i,dT2
VMLS qT2,dW0i,dT3
VMLA qT3,dW0i,dT2
VMUL dX1r,qT0,half[0]
VMUL dX1i,qT1,half[0]
VSUB dY1r,dT0,dX1i @// F(N/2 -1)
VADD dY1i,dT1,dX1r
VNEG dY1i,dY1i
VREV64 dY1r,dY1r
VREV64 dY1i,dY1i
VMUL dX0r,qT2,half[0]
VMUL dX0i,qT3,half[0]
VSUB dY0r,dT0,dX0i @// F(1)
VADD dY0i,dT1,dX0r
VST2 {dY0r,dY0i},[argDst],step
VST2 {dY1r,dY1i},[argDst]!
SUB argDst,argDst,step
SUB step,step,#32 @// (N/2-4)*8 bytes
BGT evenOddButterflyLoop
@// set both the ptrs to the last element
SUB pSrc,pSrc,#8
SUB argDst,argDst,#8
@// Last element can be expanded as follows
@// 1/2[Z(k) + Z'(k)] + j w^k [Z(k) - Z'(k)]
@// 1/2[(a+jb) + (a-jb)] + j w^k [(a+jb) - (a-jb)]
@// 1/2[2a+j0] + j (c+jd) [0+j2b]
@// (a-bc, -bd)
@// Since (c,d) = (0,1) for the last element, result is just (a,-b)
lastElement:
VLD1 dX0r,[pSrc]
VST1 dX0r[0],[argDst]!
VNEG dX0r,dX0r
VST1 dX0r[1],[argDst]!
End:
@// Set return value
MOV result, #OMX_Sts_NoErr
@// Write function tail
M_END
.end

Просмотреть файл

@ -1,158 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: omxSP_FFTFwd_RToCCS_S16S32_Sfs_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 7403
@// Last Modified Date: Mon, 17 Sep 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute FFT for a real signal
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
.extern omxSP_FFTFwd_RToCCS_S32_Sfs
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@// Import symbols required from other files
@// (For example tables)
@//Input Registers
#define pSrc r0
#define pDst r1
#define pFFTSpec r2
#define scale r3
@// Output registers
#define result r0
#define pTmpDst r4
#define pTmpSrc r5
#define N r6
#define order r7
#define pOut r8
@// Neon registers
#define dX0 D0.S16
#define qY0 Q1.S32
#define dY0S32 D2.S32
#define qX0 Q1.S32
#define dY1S32 D3.S32
#define dX0S32 D0.S32
@// Allocate stack memory required by the function
@// Write function header
M_START omxSP_FFTFwd_RToCCS_S16S32_Sfs,r11,d15
@ Structure offsets for the FFTSpec
.set ARMsFFTSpec_N, 0
.set ARMsFFTSpec_pBitRev, 4
.set ARMsFFTSpec_pTwiddle, 8
.set ARMsFFTSpec_pBuf, 12
@// Define stack arguments
@// Read the size from structure and take log
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
@// Read other structure parameters
@//LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
@// N=1 Treat seperately
CMP N,#1
BGT sizeGreaterThanOne
VLD1 dX0[0],[pSrc]
VMOVL qY0,dX0
VST1 dY0S32[0],[pDst]
MOV pSrc,pDst
B realS32FFT
sizeGreaterThanOne:
MOV N,N,ASR #1
CLZ order,N @// N = 2^order
RSB order,order,#31
TST order,#1
MOVEQ pTmpDst,pOut
MOVNE pTmpDst,pDst
MOV pTmpSrc,pTmpDst
CMP N,#1
BGT copyS16ToS32
VLD1 dX0S32[0],[pSrc]
VMOVL qX0,dX0
VST1 dY0S32,[pTmpDst]
B setpSrc
copyS16ToS32:
VLD1 dX0,[pSrc]!
SUBS N,N,#2
VMOVL qX0,dX0
VST1 {dY0S32,dY1S32},[pTmpDst]!
BGT copyS16ToS32
setpSrc:
MOV pSrc,pTmpSrc
realS32FFT:
BL omxSP_FFTFwd_RToCCS_S32_Sfs
End:
@// Set return value
MOV result, #OMX_Sts_NoErr
@// Write function tail
M_END
.end

Просмотреть файл

@ -1,549 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: omxSP_FFTFwd_RToCCS_S32_Sfs_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 7810
@// Last Modified Date: Thu, 04 Oct 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute FFT for a real signal
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Radix4_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Radix2_OutOfPlace_unsafe
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@// Import symbols required from other files
@// (For example tables)
.extern armSP_FFTFwd_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
@//Input Registers
#define pSrc r0
#define pDst r1
#define pFFTSpec r2
#define scale r3
@// Output registers
#define result r0
@//Local Scratch Registers
#define argTwiddle r1
#define argDst r2
#define argScale r4
#define tmpOrder r4
#define pTwiddle r4
#define pOut r5
#define subFFTSize r7
#define subFFTNum r6
#define N r6
#define order r14
#define diff r9
@// Total num of radix stages required to comple the FFT
#define count r8
#define x0r r4
#define x0i r5
#define diffMinusOne r2
#define subFFTSizeTmp r6
#define step r3
#define step1 r4
#define twStep r8
#define zero r9
#define pTwiddleTmp r5
#define t0 r10
@// Neon registers
#define dX0 d0.s32
#define dzero d1.s32
#define dZero d2.s32
#define dShift d3.s32
#define dX0r d2.s32
#define dX0i d3.s32
#define dX1r d4.s32
#define dX1i d5.s32
#define dT0 d6.s32
#define dT1 d7.s32
#define dT2 d8.s32
#define dT3 d9.s32
#define qT0 q5.s64
#define qT1 q6.s64
#define dW0r d14.s32
#define dW0i d15.s32
#define dW1r d16.s32
#define dW1i d17.s32
#define dY0r d14.s32
#define dY0i d15.s32
#define dY1r d16.s32
#define dY1i d17.s32
#define dY0rS64 d14.s64
#define dY0iS64 d15.s64
#define qT2 q9.s64
#define qT3 q10.s64
@// lastThreeelements
#define dX1 d3.s32
#define dW0 d4.s32
#define dW1 d5.s32
#define dY0 d10.s32
#define dY1 d11.s32
#define dY2 d12.s32
#define dY3 d13.s32
@// Allocate stack memory required by the function
M_ALLOC4 diffOnStack, 4
@// Write function header
M_START omxSP_FFTFwd_RToCCS_S32_Sfs,r11,d15
@ Structure offsets for the FFTSpec
.set ARMsFFTSpec_N, 0
.set ARMsFFTSpec_pBitRev, 4
.set ARMsFFTSpec_pTwiddle, 8
.set ARMsFFTSpec_pBuf, 12
@// Define stack arguments
@// Read the size from structure and take log
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
@// Read other structure parameters
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
@// N=1 Treat seperately
CMP N,#1
BGT sizeGreaterThanOne
VLD1 dX0[0],[pSrc]
RSB scale,scale,#0 @// to use VRSHL for right shift by a variable
MOV zero,#0
VMOV dShift[0],scale
VMOV dzero[0],zero
VRSHL dX0,dShift
VMOV dZero[0],zero
VST3 {dX0[0],dzero[0],dZero[0]},[pDst]
B End
sizeGreaterThanOne:
@// Do a N/2 point complex FFT including the scaling
MOV N,N,ASR #1 @// N/2 point complex FFT
CLZ order,N @// N = 2^order
RSB order,order,#31
MOV subFFTSize,#1
@//MOV subFFTNum,N
CMP order,#3
BGT orderGreaterthan3 @// order > 3
CMP order,#1
BGE orderGreaterthan0 @// order > 0
M_STR scale, diffOnStack,LT @// order = 0
VLD1 dX0,[pSrc]
VST1 dX0,[pOut]
MOV pSrc,pOut
MOV argDst,pDst
BLT FFTEnd
orderGreaterthan0:
@// set the buffers appropriately for various orders
CMP order,#2
MOVEQ argDst,pDst
MOVNE argDst,pOut
MOVNE pOut,pDst @// Pass the first stage destination in RN5
MOV argTwiddle,pTwiddle
SUBS diff,scale,order
M_STR diff,diffOnStack
MOVGT scale,order
@// Now scale <= order
CMP order,#1
BGT orderGreaterthan1
SUBS scale,scale,#1
BLEQ armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// order = 1
BLLT armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe @// order = 1
B FFTEnd
orderGreaterthan1:
CMP order,#2
MOV argScale,scale
BGT orderGreaterthan2
SUBS argScale,argScale,#1
BLGE armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// order =2
BLLT armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
SUBS argScale,argScale,#1
BLEQ armSP_FFTFwd_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
BLLT armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
B FFTEnd
orderGreaterthan2:@// order =3
SUBS argScale,argScale,#1
BLGE armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
BLLT armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
SUBS argScale,argScale,#1
BLGE armSP_FFTFwd_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
BLLT armSP_FFTFwd_CToC_SC32_Radix2_OutOfPlace_unsafe
SUBS argScale,argScale,#1
BLEQ armSP_FFTFwd_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
BLLT armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
B FFTEnd
orderGreaterthan3:
@// check scale = 0 or scale = order
SUBS diff, scale, order @// scale > order
MOVGT scale,order
BGE specialScaleCase @// scale = 0 or scale = order
CMP scale,#0
BEQ specialScaleCase
B generalScaleCase
specialScaleCase:@// scale = 0 or scale = order and order >= 2
TST order, #2 @// Set input args to fft stages
MOVEQ argDst,pDst
MOVNE argDst,pOut
MOVNE pOut,pDst @// Pass the first stage destination in RN5
MOV argTwiddle,pTwiddle
CMP diff,#0
M_STR diff, diffOnStack
BGE scaleEqualsOrder
@//check for even or odd order
@// NOTE: The following combination of BL's would work fine eventhough the first
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
@// armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
TST order,#0x00000001
BLEQ armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
BLNE armSP_FFTFwd_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
CMP subFFTNum,#4
BLT FFTEnd
unscaledRadix4Loop:
BEQ lastStageUnscaledRadix4
BL armSP_FFTFwd_CToC_SC32_Radix4_OutOfPlace_unsafe
CMP subFFTNum,#4
B unscaledRadix4Loop
lastStageUnscaledRadix4:
BL armSP_FFTFwd_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
B FFTEnd
scaleEqualsOrder:
@//check for even or odd order
@// NOTE: The following combination of BL's would work fine eventhough the first
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
@// armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
TST order,#0x00000001
BLEQ armSP_FFTFwd_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
BLNE armSP_FFTFwd_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
CMP subFFTNum,#4
BLT FFTEnd
scaledRadix4Loop:
BEQ lastStageScaledRadix4
BL armSP_FFTFwd_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
CMP subFFTNum,#4
B scaledRadix4Loop
lastStageScaledRadix4:
BL armSP_FFTFwd_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
B FFTEnd
generalScaleCase:@// 0 < scale < order and order >= 2
@// Determine the correct destination buffer
SUB diff,order,scale
TST diff,#0x01
ADDEQ count, scale,diff,lsr #1 @// count = scale + (order - scale)/2
MOVNE count, order
TST count, #0x01 @// Is count even or odd ?
MOVEQ argDst,pDst @// Set input args to fft stages
MOVNE argDst,pOut
MOVNE pOut,pDst @// Pass the first stage destination in RN5
MOV argTwiddle,pTwiddle
M_STR diff, diffOnStack
MOV argScale,scale @// Put scale in RN4 so as to save and restore
BL armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage
SUBS argScale,argScale,#1
scaledRadix2Loop:
BLGT armSP_FFTFwd_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages
BGT scaledRadix2Loop
M_LDR diff, diffOnStack
@//check for even or odd order
TST diff,#0x00000001
BEQ generalUnscaledRadix4Loop
B unscaledRadix2Loop
generalUnscaledRadix4Loop:
CMP subFFTNum,#4
BEQ generalLastStageUnscaledRadix4
BL armSP_FFTFwd_CToC_SC32_Radix4_OutOfPlace_unsafe
B generalUnscaledRadix4Loop
generalLastStageUnscaledRadix4:
BL armSP_FFTFwd_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
B finalComplexToRealFixup
unscaledRadix2Loop:
CMP subFFTNum,#2
BEQ generalLastStageUnscaledRadix2
BL armSP_FFTFwd_CToC_SC32_Radix2_OutOfPlace_unsafe
B unscaledRadix2Loop
generalLastStageUnscaledRadix2:
BL armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
B finalComplexToRealFixup
FFTEnd:@// Does only the scaling
M_LDR diff, diffOnStack
CMP diff,#0
BLE finalComplexToRealFixup
RSB diff,diff,#0 @// to use VRSHL for right shift by a variable
VDUP dShift,diff
@// save subFFTSize and use tmpsubfftsize in the folowwing loop
MOV subFFTSizeTmp,subFFTSize @// subFFTSizeTmp same reg as subFFTNum
scaleFFTData:@// N = subFFTSize ; dataptr = pDst ; scale = diff
VLD1 {dX0},[pSrc] @// pSrc contains pDst pointer
SUBS subFFTSizeTmp,subFFTSizeTmp,#1
VRSHL dX0,dShift
VST1 {dX0},[pSrc]!
BGT scaleFFTData
SUB pSrc,pSrc,subFFTSize,LSL #3 @// reset pSrc for final fixup
@// change the logic so that output after scaling is in pOut and not in pDst
@// finally store from pOut to pDst
@// change branch "End" to branch "finalComplexToRealFixup" in the above
@// chk the code below for multiplication by j factor
finalComplexToRealFixup:
@// F(0) = 1/2[Z(0) + Z'(0)] - j [Z(0) - Z'(0)]
@// 1/2[(a+jb) + (a-jb)] - j [(a+jb) - (a-jb)]
@// 1/2[2a+j0] - j [0+j2b]
@// (a+b, 0)
@// F(N/2) = 1/2[Z(0) + Z'(0)] + j [Z(0) - Z'(0)]
@// 1/2[(a+jb) + (a-jb)] + j [(a+jb) - (a-jb)]
@// 1/2[2a+j0] + j [0+j2b]
@// (a-b, 0)
@// F(0) and F(N/2)
VLD2 {dX0r[0],dX0i[0]},[pSrc]!
MOV zero,#0
VMOV dX0r[1],zero
MOV step,subFFTSize,LSL #3 @// step = N/2 * 8 bytes
VMOV dX0i[1],zero
SUB twStep,step,subFFTSize,LSL #1 @// twStep = 3N/8 * 8 bytes pointing to W^1
VADD dY0r,dX0r,dX0i @// F(0) = ((Z0.r+Z0.i) , 0)
MOV step1,subFFTSize,LSL #2 @// step1 = N/2 * 4 bytes
VSUB dY0i,dX0r,dX0i @// F(N/2) = ((Z0.r-Z0.i) , 0)
SUBS subFFTSize,subFFTSize,#2
VST1 dY0r,[argDst],step
ADD pTwiddleTmp,argTwiddle,#8 @// W^2
VST1 dY0i,[argDst]!
ADD argTwiddle,argTwiddle,twStep @// W^1
VDUP dzero,zero
SUB argDst,argDst,step
BLT End
BEQ lastElement
SUB step,step,#24
SUB step1,step1,#8 @// (N/4-1)*8 bytes
@// F(k) = 1/2[Z(k) + Z'(N/2-k)] -j*W^(k) [Z(k) - Z'(N/2-k)]
@// Note: W^k is stored as negative values in the table
@// Process 4 elements at a time. E.g: F(1),F(2) and F(N/2-2),F(N/2-1) since both of them
@// require Z(1),Z(2) and Z(N/2-2),Z(N/2-1)
evenOddButterflyLoop:
VLD1 dW0r,[argTwiddle],step1
VLD1 dW1r,[argTwiddle]!
VLD2 {dX0r,dX0i},[pSrc],step
SUB argTwiddle,argTwiddle,step1
VLD2 {dX1r,dX1i},[pSrc]!
SUB step1,step1,#8 @// (N/4-2)*8 bytes
VLD1 dW0i,[pTwiddleTmp],step1
VLD1 dW1i,[pTwiddleTmp]!
SUB pSrc,pSrc,step
SUB pTwiddleTmp,pTwiddleTmp,step1
VREV64 dX1r,dX1r
VREV64 dX1i,dX1i
SUBS subFFTSize,subFFTSize,#4
VSUB dT2,dX0r,dX1r @// a-c
SUB step1,step1,#8
VADD dT3,dX0i,dX1i @// b+d
VADD dT0,dX0r,dX1r @// a+c
VSUB dT1,dX0i,dX1i @// b-d
VHADD dT0,dT0,dzero
VHADD dT1,dT1,dzero
VZIP dW1r,dW1i
vzip dW0r,dW0i
VMULL qT0,dW1r,dT2
VMLAL qT0,dW1i,dT3
VMULL qT1,dW1r,dT3
VMLSL qT1,dW1i,dT2
VMULL qT2,dW0r,dT2
VMLSL qT2,dW0i,dT3
VMULL qT3,dW0r,dT3
VMLAL qT3,dW0i,dT2
VRSHRN dX1r,qT0,#32
VRSHRN dX1i,qT1,#32
VSUB dY1r,dT0,dX1i @// F(N/2 -1)
VADD dY1i,dT1,dX1r
VNEG dY1i,dY1i
VREV64 dY1r,dY1r
VREV64 dY1i,dY1i
VRSHRN dX0r,qT2,#32
VRSHRN dX0i,qT3,#32
VSUB dY0r,dT0,dX0i @// F(1)
VADD dY0i,dT1,dX0r
VST2 {dY0r,dY0i},[argDst],step
VST2 {dY1r,dY1i},[argDst]!
SUB argDst,argDst,step
SUB step,step,#32 @// (N/2-4)*8 bytes
BGT evenOddButterflyLoop
SUB pSrc,pSrc,#8 @// set both the ptrs to the last element
SUB argDst,argDst,#8
@// Last element can be expanded as follows
@// 1/2[Z(k) + Z'(k)] + j w^k [Z(k) - Z'(k)]
@// 1/2[(a+jb) + (a-jb)] + j w^k [(a+jb) - (a-jb)]
@// 1/2[2a+j0] + j (c+jd) [0+j2b]
@// (a-bc, -bd)
@// Since (c,d) = (0,1) for the last element, result is just (a,-b)
lastElement:
VLD1 dX0r,[pSrc]
VST1 dX0r[0],[argDst]!
VNEG dX0r,dX0r
VST1 dX0r[1],[argDst]!
End:
@// Set return value
MOV result, #OMX_Sts_NoErr
@// Write function tail
M_END
.end

Просмотреть файл

@ -1,52 +0,0 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "dl/api/armOMX.h"
#include "dl/api/omxtypes.h"
#include "dl/sp/api/armSP.h"
#include "dl/sp/api/omxSP.h"
/**
* Function: omxSP_FFTGetBufSize_C_FC32
*
* Description:
* These functions compute the size of the specification structure
* required for the length 2^order complex FFT and IFFT functions. The function
* <FFTGetBufSize_C_FC32> is used in conjunction with the 32-bit functions
* <FFTFwd_CToC_FC32_Sfs> and <FFTInv_CToC_FC32_Sfs>.
*
* Input Arguments:
*
* order - base-2 logarithm of the desired block length; valid in the range
* [1,12] ([1,15] if BIG_FFT_TABLE is defined.)
*
* Output Arguments:
*
* pSize - pointer to the number of bytes required for the specification
* structure
*
* Return Value:
*
* OMX_Sts_NoErr - no error
*
*
*/
OMXResult omxSP_FFTGetBufSize_C_FC32(OMX_INT order, OMX_INT *pSize) {
if (!pSize || (order < 1) || (order > TWIDDLE_TABLE_ORDER))
return OMX_Sts_BadArgErr;
/*
* The required size is the same as for C_SC32, because the
* elements are the same size and because ARMsFFTSpec_SC32 is
* the same size as ARMsFFTSpec_FC32.
*/
return omxSP_FFTGetBufSize_C_SC32(order, pSize);
}

Просмотреть файл

@ -1,96 +0,0 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*
* This file was originally licensed as follows. It has been
* relicensed with permission from the copyright holders.
*/
/**
*
* File Name: omxSP_FFTGetBufSize_C_SC16.c
* OpenMAX DL: v1.0.2
* Last Modified Revision: 9468
* Last Modified Date: Thu, 03 Jan 2008
*
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
*
*
* Description:
* Compute the size of the specification structure required
*/
#include "dl/api/armOMX.h"
#include "dl/api/omxtypes.h"
#include "dl/sp/api/armSP.h"
#include "dl/sp/api/omxSP.h"
/**
* Function: omxSP_FFTGetBufSize_C_SC16 (2.2.4.1.6)
*
* Description:
* These functions compute the size of the specification structure
* required for the length 2^order complex FFT and IFFT functions. The function
* <FFTGetBufSize_C_SC16> is used in conjunction with the 16-bit functions
* <FFTFwd_CToC_SC16_Sfs> and <FFTInv_CToC_SC16_Sfs>.
*
* Input Arguments:
*
* order - base-2 logarithm of the desired block length; valid in the range
* [0,12]
*
* Output Arguments:
*
* pSize - pointer to the number of bytes required for the specification
* structure
*
* Return Value:
*
* OMX_Sts_NoErr - no error
*
*
*/
OMXResult omxSP_FFTGetBufSize_C_SC16(
OMX_INT order,
OMX_INT *pSize)
{
OMX_INT N,twiddleSize;
/* Check for order zero */
if (order == 0)
{
*pSize = sizeof(ARMsFFTSpec_SC16);
return OMX_Sts_NoErr;
}
N = 1 << order;
/*The max size of the twiddle table needed is 3N/4 for a radix-4 stage*/
twiddleSize = 3*N/4;
/* 2 pointers to store bitreversed array and twiddle factor array */
*pSize = sizeof(ARMsFFTSpec_SC16)
/* Twiddle factors */
+ sizeof(OMX_SC16) * twiddleSize
/* Ping Pong buffer */
+ sizeof(OMX_SC16) * N
+ 62 ; /* Extra bytes to get 32 byte alignment of ptwiddle and pBuf */
return OMX_Sts_NoErr;
}
/*****************************************************************************
* END OF FILE
*****************************************************************************/

Просмотреть файл

@ -1,94 +0,0 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*
* This file was originally licensed as follows. It has been
* relicensed with permission from the copyright holders.
*/
/**
*
* File Name: omxSP_FFTGetBufSize_C_SC32.c
* OpenMAX DL: v1.0.2
* Last Modified Revision: 9468
* Last Modified Date: Thu, 03 Jan 2008
*
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
*
*
* Description:
* Compute the size of the specification structure required
*/
#include "dl/api/armOMX.h"
#include "dl/api/omxtypes.h"
#include "dl/sp/api/armSP.h"
#include "dl/sp/api/omxSP.h"
/**
* Function: omxSP_FFTGetBufSize_C_SC32 (2.2.4.1.6)
*
* Description:
* These functions compute the size of the specification structure
* required for the length 2^order complex FFT and IFFT functions. The function
* <FFTGetBufSize_C_SC32> is used in conjunction with the 32-bit functions
* <FFTFwd_CToC_SC32_Sfs> and <FFTInv_CToC_SC32_Sfs>.
*
* Input Arguments:
*
* order - base-2 logarithm of the desired block length; valid in the range
* [0,12]
*
* Output Arguments:
*
* pSize - pointer to the number of bytes required for the specification
* structure
*
* Return Value:
*
* OMX_Sts_NoErr - no error
*
*
*/
OMXResult omxSP_FFTGetBufSize_C_SC32(
OMX_INT order,
OMX_INT *pSize)
{
OMX_INT N,twiddleSize;
/* Check for order zero */
if (order == 0)
{
*pSize = sizeof(ARMsFFTSpec_SC32);
return OMX_Sts_NoErr;
}
N = 1 << order;
/*The max size of the twiddle table needed is 3N/4 for a radix-4 stage*/
twiddleSize = 3*N/4;
*pSize = sizeof(ARMsFFTSpec_SC32)
/* N Twiddle factors */
+ sizeof(OMX_SC32) * twiddleSize
/* Ping Pong buffer */
+ sizeof(OMX_SC32) * N
+ 62 ; /* Extra bytes to get 32 byte alignment of ptwiddle and pBuf */
return OMX_Sts_NoErr;
}
/*****************************************************************************
* END OF FILE
*****************************************************************************/

Просмотреть файл

@ -1,49 +0,0 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*
*/
#include "dl/api/armOMX.h"
#include "dl/api/omxtypes.h"
#include "dl/sp/api/armSP.h"
#include "dl/sp/api/omxSP.h"
/**
* Function: omxSP_FFTGetBufSize_R_F32
*
* Description:
* Computes the size of the specification structure required for the length
* 2^order real FFT and IFFT functions.
*
* Remarks:
* This function is used in conjunction with the 32-bit functions
* <FFTFwd_RToCCS_F32_Sfs> and <FFTInv_CCSToR_F32_Sfs>.
*
* Parameters:
* [in] order base-2 logarithm of the length; valid in the range
* [1,12]. ([1,15] if BIG_FFT_TABLE is defined.)
* [out] pSize pointer to the number of bytes required for the
* specification structure.
*
* Return Value:
* Standard omxError result. See enumeration for possible result codes.
*
*/
OMXResult omxSP_FFTGetBufSize_R_F32(OMX_INT order, OMX_INT *pSize) {
if (!pSize || (order < 1) || (order > TWIDDLE_TABLE_ORDER))
return OMX_Sts_BadArgErr;
/*
* The required size is the same as for R_S32, because the
* elements are the same size and because ARMsFFTSpec_R_SC32 is
* the same size as ARMsFFTSpec_R_FC32.
*/
return omxSP_FFTGetBufSize_R_S32(order, pSize);
}

Просмотреть файл

@ -1,91 +0,0 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*
* This file was originally licensed as follows. It has been
* relicensed with permission from the copyright holders.
*/
/**
*
* File Name: omxSP_FFTGetBufSize_R_S16S32.c
* OpenMAX DL: v1.0.2
* Last Modified Revision: 7777
* Last Modified Date: Thu, 27 Sep 2007
*
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
*
*
* Description:
* Computes the size of the specification structure required.
*/
#include "dl/api/armOMX.h"
#include "dl/api/omxtypes.h"
#include "dl/sp/api/armSP.h"
#include "dl/sp/api/omxSP.h"
/**
* Function: omxSP_FFTGetBufSize_R_S16S32
*
* Description:
* Computes the size of the specification structure required for the length
* 2^order real FFT and IFFT functions.
*
* Remarks:
* This function is used in conjunction with the 16-bit functions
* <FFTFwd_RToCCS_S16_S32_Sfs> and <FFTInv_CCSToR_S32_S16_Sfs>.
*
* Parameters:
* [in] order base-2 logarithm of the length; valid in the range
* [0,12].
* [out] pSize pointer to the number of bytes required for the
* specification structure.
*
* Return Value:
* Standard omxError result. See enumeration for possible result codes.
*
*/
OMXResult omxSP_FFTGetBufSize_R_S16S32(
OMX_INT order,
OMX_INT *pSize
)
{
OMX_INT NBy2,N,twiddleSize;
/* Check for order zero */
if (order == 0)
{
*pSize = sizeof(ARMsFFTSpec_R_SC32)
+ sizeof(OMX_S32) * (2); /* Extra size 'N' is used in FFTInv_CCSToR_S32S16_Sfs as a temporary buf */
return OMX_Sts_NoErr;
}
NBy2 = 1 << (order - 1);
N = NBy2<<1;
twiddleSize = 5*N/8; /* 3/4(N/2) + N/4 */
/* 2 pointers to store bitreversed array and twiddle factor array */
*pSize = sizeof(ARMsFFTSpec_R_SC32)
/* Twiddle factors */
+ sizeof(OMX_SC32) * twiddleSize
/* Ping Pong buffer for doing the N/2 point complex FFT */
+ sizeof(OMX_S32) * (N<<1) /* Extra size 'N' is used in FFTInv_CCSToR_S32S16_Sfs as a temporary buf */
+ 62 ; /* Extra bytes to get 32 byte alignment of ptwiddle and pBuf */
return OMX_Sts_NoErr;
}
/*****************************************************************************
* END OF FILE
*****************************************************************************/

Просмотреть файл

@ -1,91 +0,0 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*
* This file was originally licensed as follows. It has been
* relicensed with permission from the copyright holders.
*/
/**
*
* File Name: omxSP_FFTGetBufSize_R_S32.c
* OpenMAX DL: v1.0.2
* Last Modified Revision: 7777
* Last Modified Date: Thu, 27 Sep 2007
*
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
*
*
* Description:
* Computes the size of the specification structure required.
*/
#include "dl/api/armOMX.h"
#include "dl/api/omxtypes.h"
#include "dl/sp/api/armSP.h"
#include "dl/sp/api/omxSP.h"
/**
* Function: omxSP_FFTGetBufSize_R_S32
*
* Description:
* Computes the size of the specification structure required for the length
* 2^order real FFT and IFFT functions.
*
* Remarks:
* This function is used in conjunction with the 32-bit functions
* <FFTFwd_RToCCS_S32_Sfs> and <FFTInv_CCSToR_S32_Sfs>.
*
* Parameters:
* [in] order base-2 logarithm of the length; valid in the range
* [0,12].
* [out] pSize pointer to the number of bytes required for the
* specification structure.
*
* Return Value:
* Standard omxError result. See enumeration for possible result codes.
*
*/
OMXResult omxSP_FFTGetBufSize_R_S32(
OMX_INT order,
OMX_INT *pSize
)
{
OMX_INT NBy2,N,twiddleSize;
/* Check for order zero */
if (order == 0)
{
*pSize = sizeof(ARMsFFTSpec_R_SC32)
+ sizeof(OMX_S32) * (2); /* Extra size 'N' is used in FFTInv_CCSToR_S32S16_Sfs as a temporary buf */
return OMX_Sts_NoErr;
}
NBy2 = 1 << (order - 1);
N = NBy2<<1;
twiddleSize = 5*N/8; /* 3/4(N/2) + N/4 */
/* 2 pointers to store bitreversed array and twiddle factor array */
*pSize = sizeof(ARMsFFTSpec_R_SC32)
/* Twiddle factors */
+ sizeof(OMX_SC32) * twiddleSize
/* Ping Pong buffer for doing the N/2 point complex FFT */
+ sizeof(OMX_S32) * (N<<1) /* Extra size 'N' is used in FFTInv_CCSToR_S32_Sfs as a temporary buf */
+ 62 ; /* Extra bytes to get 32 byte alignment of ptwiddle and pBuf */
return OMX_Sts_NoErr;
}
/*****************************************************************************
* END OF FILE
*****************************************************************************/

Просмотреть файл

@ -1,162 +0,0 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*
* This is a modification of omxSP_FFTInit_C_SC32.c to support
* complex float instead of SC32.
*/
#include "dl/api/armOMX.h"
#include "dl/api/omxtypes.h"
#include "dl/sp/api/armSP.h"
#include "dl/sp/api/omxSP.h"
/**
* Function: omxSP_FFTInit_C_FC32
*
* Description:
* Initializes the specification structures required for the
* complex FFT and IFFT functions.
*
* Remarks:
* Desired block length is specified as an input. The function is used to
* initialize the specification structures for functions <FFTFwd_CToC_FC32_Sfs>
* and <FFTInv_CToC_FC32_Sfs>. Memory for the specification structure *pFFTSpec
* must be allocated prior to calling this function. The space required for
* *pFFTSpec, in bytes, can be determined using <FFTGetBufSize_C_FC32>.
*
* Parameters:
* [in] order base-2 logarithm of the desired block length;
* valid in the range [1,12]. ([1,15] if
* BIG_FFT_TABLE is defined.)
* [out] pFFTSpec pointer to initialized specification structure.
*
* Return Value:
* Standard omxError result. See enumeration for possible result codes.
*
*/
OMXResult omxSP_FFTInit_C_FC32(OMXFFTSpec_C_FC32* pFFTSpec, OMX_INT order) {
OMX_INT i;
OMX_INT j;
OMX_FC32* pTwiddle;
OMX_FC32* pBuf;
OMX_U16* pBitRev;
OMX_U32 pTmp;
OMX_INT Nby2;
OMX_INT N;
OMX_INT M;
OMX_INT diff;
OMX_INT step;
ARMsFFTSpec_FC32* pFFTStruct = 0;
OMX_F32 x;
OMX_F32 y;
OMX_F32 xNeg;
pFFTStruct = (ARMsFFTSpec_FC32 *) pFFTSpec;
/* Validate args */
if (!pFFTSpec || (order < 1) || (order > TWIDDLE_TABLE_ORDER))
return OMX_Sts_BadArgErr;
/* Do the initializations */
Nby2 = 1 << (order - 1);
N = Nby2 << 1;
M = N >> 3;
/* optimized implementations don't use bitreversal */
pBitRev = NULL;
pTwiddle = (OMX_FC32 *) (sizeof(ARMsFFTSpec_FC32) + (OMX_S8*) pFFTSpec);
/* Align to 32 byte boundary */
pTmp = ((OMX_U32) pTwiddle) & 31;
if (pTmp)
pTwiddle = (OMX_FC32*) ((OMX_S8*)pTwiddle + (32 - pTmp));
pBuf = (OMX_FC32*) (sizeof(OMX_FC32) * (3 * N / 4) + (OMX_S8*) pTwiddle);
/* Align to 32 byte boundary */
pTmp = ((OMX_U32)pBuf) & 31;
if (pTmp)
pBuf = (OMX_FC32*) ((OMX_S8*)pBuf + (32 - pTmp));
/*
* Filling Twiddle factors :
*
* The original twiddle table "armSP_FFT_S32TwiddleTable" is of size
* (MaxSize/8 + 1) Rest of the values i.e., upto MaxSize are
* calculated using the symmetries of sin and cos The max size of
* the twiddle table needed is 3N/4 for a radix-4 stage
*
* W = (-2 * PI) / N
* N = 1 << order
* W = -PI >> (order - 1)
*/
diff = TWIDDLE_TABLE_ORDER - order;
/* step into the twiddle table for the current order */
step = 1 << diff;
x = armSP_FFT_F32TwiddleTable[0];
y = armSP_FFT_F32TwiddleTable[1];
xNeg = 1;
if (order >= 3) {
/* i = 0 case */
pTwiddle[0].Re = x;
pTwiddle[0].Im = y;
pTwiddle[2 * M].Re = -y;
pTwiddle[2 * M].Im = xNeg;
pTwiddle[4 * M].Re = xNeg;
pTwiddle[4 * M].Im = y;
for (i = 1; i <= M; i++) {
j = i * step;
x = armSP_FFT_F32TwiddleTable[2 * j];
y = armSP_FFT_F32TwiddleTable[2 * j + 1];
pTwiddle[i].Re = x;
pTwiddle[i].Im = y;
pTwiddle[2 * M - i].Re = -y;
pTwiddle[2 * M - i].Im = -x;
pTwiddle[2 * M + i].Re = y;
pTwiddle[2 * M + i].Im = -x;
pTwiddle[4 * M - i].Re = -x;
pTwiddle[4 * M - i].Im = y;
pTwiddle[4 * M + i].Re = -x;
pTwiddle[4 * M + i].Im = -y;
pTwiddle[6 * M - i].Re = y;
pTwiddle[6 * M - i].Im = x;
}
} else if (order == 2) {
pTwiddle[0].Re = x;
pTwiddle[0].Im = y;
pTwiddle[1].Re = -y;
pTwiddle[1].Im = xNeg;
pTwiddle[2].Re = xNeg;
pTwiddle[2].Im = y;
} else if (order == 1) {
pTwiddle[0].Re = x;
pTwiddle[0].Im = y;
}
/* Update the structure */
pFFTStruct->N = N;
pFFTStruct->pTwiddle = pTwiddle;
pFFTStruct->pBitRev = pBitRev;
pFFTStruct->pBuf = pBuf;
return OMX_Sts_NoErr;
}
/*****************************************************************************
* END OF FILE
*****************************************************************************/

Просмотреть файл

@ -1,201 +0,0 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*
* This file was originally licensed as follows. It has been
* relicensed with permission from the copyright holders.
*/
/**
*
* File Name: omxSP_FFTInit_C_SC16.c
* OpenMAX DL: v1.0.2
* Last Modified Revision: 15322
* Last Modified Date: Wed, 15 Oct 2008
*
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
*
*
* Description:
* Initializes the specification structures required
*/
#include "dl/api/armOMX.h"
#include "dl/api/omxtypes.h"
#include "dl/sp/api/armSP.h"
#include "dl/sp/api/omxSP.h"
/**
* Function: omxSP_FFTInit_C_SC16
*
* Description:
* These functions initialize the specification structures required for the
* complex FFT and IFFT functions.
*
* Remarks:
* Desired block length is specified as an input. The function is used to
* initialize the specification structures for functions <FFTFwd_CToC_SC16_Sfs>
* and <FFTInv_CToC_SC16_Sfs>. Memory for the specification structure *pFFTSpec
* must be allocated prior to calling this function. The space required for
* *pFFTSpec, in bytes, can be determined using <FFTGetBufSize_C_SC16>.
*
* Parameters:
* [in] order base-2 logarithm of the desired block length;
* valid in the range [0,12].
* [out] pFFTSpec pointer to initialized specification structure.
*
* Return Value:
* Standard omxError result. See enumeration for possible result codes.
*
*/
OMXResult omxSP_FFTInit_C_SC16(
OMXFFTSpec_C_SC16* pFFTSpec,
OMX_INT order
)
{
OMX_INT i,j;
OMX_SC16 *pTwiddle, *pBuf;
OMX_U16 *pBitRev;
OMX_INT Nby2,N,M,diff,step;
OMX_U32 pTmp;
ARMsFFTSpec_SC16 *pFFTStruct = 0;
OMX_S16 x,y,xNeg;
OMX_S32 xS32,yS32;
pFFTStruct = (ARMsFFTSpec_SC16 *) pFFTSpec;
/* if order zero no init is needed */
if (order == 0)
{
pFFTStruct->N = 1;
return OMX_Sts_NoErr;
}
/* Do the initializations */
Nby2 = 1 << (order - 1);
N = Nby2 << 1;
M = N>>3;
pBitRev = NULL ;
pTwiddle = (OMX_SC16 *)
(sizeof(ARMsFFTSpec_SC16) + (OMX_S8*) pFFTSpec);
/* Align to 32 byte boundary */
pTmp = ((OMX_U32)pTwiddle)&31; /* (OMX_U32)pTwiddle % 32 */
if(pTmp != 0)
pTwiddle = (OMX_SC16*) ((OMX_S8*)pTwiddle + (32-pTmp));
pBuf = (OMX_SC16 *)
(sizeof(OMX_SC16) * (3*N/4) + (OMX_S8*) pTwiddle);
/* Align to 32 byte boundary */
pTmp = ((OMX_U32)pBuf)&31; /* (OMX_U32)pBuf % 32 */
if(pTmp != 0)
pBuf = (OMX_SC16*) ((OMX_S8*)pBuf + (32-pTmp));
/*
* Filling Twiddle factors :
* The original twiddle table "armSP_FFT_S16TwiddleTable" is of size (MaxSize/8 + 1)
* Rest of the values i.e., upto MaxSize are calculated using the symmetries of sin and cos
* The max size of the twiddle table needed is 3N/4 for a radix-4 stage
*
* W = (-2 * PI) / N
* N = 1 << order
* W = -PI >> (order - 1)
*/
diff = 12 - order;
step = 1<<diff; /* step into the twiddle table for the current order */
xS32 = armSP_FFT_S32TwiddleTable[0];
yS32 = armSP_FFT_S32TwiddleTable[1];
x = (xS32+0x8000)>>16;
y = (yS32+0x8000)>>16;
xNeg = 0x7FFF;
if(order >=3)
{
/* i = 0 case */
pTwiddle[0].Re = x;
pTwiddle[0].Im = y;
pTwiddle[2*M].Re = -y;
pTwiddle[2*M].Im = xNeg;
pTwiddle[4*M].Re = xNeg;
pTwiddle[4*M].Im = y;
for (i=1; i<=M; i++)
{
j = i*step;
xS32 = armSP_FFT_S32TwiddleTable[2*j];
yS32 = armSP_FFT_S32TwiddleTable[2*j+1];
x = (xS32+0x8000)>>16;
y = (yS32+0x8000)>>16;
pTwiddle[i].Re = x;
pTwiddle[i].Im = y;
pTwiddle[2*M-i].Re = -y;
pTwiddle[2*M-i].Im = -x;
pTwiddle[2*M+i].Re = y;
pTwiddle[2*M+i].Im = -x;
pTwiddle[4*M-i].Re = -x;
pTwiddle[4*M-i].Im = y;
pTwiddle[4*M+i].Re = -x;
pTwiddle[4*M+i].Im = -y;
pTwiddle[6*M-i].Re = y;
pTwiddle[6*M-i].Im = x;
}
}
else
{
if (order == 2)
{
pTwiddle[0].Re = x;
pTwiddle[0].Im = y;
pTwiddle[1].Re = -y;
pTwiddle[1].Im = xNeg;
pTwiddle[2].Re = xNeg;
pTwiddle[2].Im = y;
}
if (order == 1)
{
pTwiddle[0].Re = x;
pTwiddle[0].Im = y;
}
}
/* Update the structure */
pFFTStruct->N = N;
pFFTStruct->pTwiddle = pTwiddle;
pFFTStruct->pBitRev = pBitRev;
pFFTStruct->pBuf = pBuf;
return OMX_Sts_NoErr;
}
/*****************************************************************************
* END OF FILE
*****************************************************************************/

Просмотреть файл

@ -1,196 +0,0 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*
* This file was originally licensed as follows. It has been
* relicensed with permission from the copyright holders.
*/
/**
*
* File Name: omxSP_FFTInit_C_SC32.c
* OpenMAX DL: v1.0.2
* Last Modified Revision: 7769
* Last Modified Date: Thu, 27 Sep 2007
*
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
*
*
* Description:
* Initializes the specification structures required
*/
#include "dl/api/armOMX.h"
#include "dl/api/omxtypes.h"
#include "dl/sp/api/armSP.h"
#include "dl/sp/api/omxSP.h"
/**
* Function: omxSP_FFTInit_C_SC32
*
* Description:
* Initializes the specification structures required for the
* complex FFT and IFFT functions.
*
* Remarks:
* Desired block length is specified as an input. The function is used to
* initialize the specification structures for functions <FFTFwd_CToC_SC32_Sfs>
* and <FFTInv_CToC_SC32_Sfs>. Memory for the specification structure *pFFTSpec
* must be allocated prior to calling this function. The space required for
* *pFFTSpec, in bytes, can be determined using <FFTGetBufSize_C_SC32>.
*
* Parameters:
* [in] order base-2 logarithm of the desired block length;
* valid in the range [0,12].
* [out] pFFTSpec pointer to initialized specification structure.
*
* Return Value:
* Standard omxError result. See enumeration for possible result codes.
*
*/
OMXResult omxSP_FFTInit_C_SC32(
OMXFFTSpec_C_SC32* pFFTSpec,
OMX_INT order
)
{
OMX_INT i,j;
OMX_SC32 *pTwiddle, *pBuf;
OMX_U16 *pBitRev;
OMX_U32 pTmp;
OMX_INT Nby2,N,M,diff, step;
ARMsFFTSpec_SC32 *pFFTStruct = 0;
OMX_S32 x,y,xNeg;
pFFTStruct = (ARMsFFTSpec_SC32 *) pFFTSpec;
/* if order zero no init is needed */
if (order == 0)
{
pFFTStruct->N = 1;
return OMX_Sts_NoErr;
}
/* Do the initializations */
Nby2 = 1 << (order - 1);
N = Nby2 << 1;
M = N>>3;
pBitRev = NULL ; /* optimized implementations don't use bitreversal */
pTwiddle = (OMX_SC32 *)
(sizeof(ARMsFFTSpec_SC32) + (OMX_S8*) pFFTSpec);
/* Align to 32 byte boundary */
pTmp = ((OMX_U32)pTwiddle)&31; /* (OMX_U32)pTwiddle % 32 */
if(pTmp != 0)
pTwiddle = (OMX_SC32*) ((OMX_S8*)pTwiddle + (32-pTmp));
pBuf = (OMX_SC32*)
(sizeof(OMX_SC32) * (3*N/4) + (OMX_S8*) pTwiddle);
/* Align to 32 byte boundary */
pTmp = ((OMX_U32)pBuf)&31; /* (OMX_U32)pBuf % 32 */
if(pTmp != 0)
pBuf = (OMX_SC32*) ((OMX_S8*)pBuf + (32-pTmp));
/*
* Filling Twiddle factors :
* The original twiddle table "armSP_FFT_S32TwiddleTable" is of size (MaxSize/8 + 1)
* Rest of the values i.e., upto MaxSize are calculated using the symmetries of sin and cos
* The max size of the twiddle table needed is 3N/4 for a radix-4 stage
*
* W = (-2 * PI) / N
* N = 1 << order
* W = -PI >> (order - 1)
*/
diff = 12 - order;
step = 1<<diff; /* step into the twiddle table for the current order */
x = armSP_FFT_S32TwiddleTable[0];
y = armSP_FFT_S32TwiddleTable[1];
xNeg = 0x7FFFFFFF;
if(order >=3)
{
/* i = 0 case */
pTwiddle[0].Re = x;
pTwiddle[0].Im = y;
pTwiddle[2*M].Re = -y;
pTwiddle[2*M].Im = xNeg;
pTwiddle[4*M].Re = xNeg;
pTwiddle[4*M].Im = y;
for (i=1; i<=M; i++)
{
j = i*step;
x = armSP_FFT_S32TwiddleTable[2*j];
y = armSP_FFT_S32TwiddleTable[2*j+1];
pTwiddle[i].Re = x;
pTwiddle[i].Im = y;
pTwiddle[2*M-i].Re = -y;
pTwiddle[2*M-i].Im = -x;
pTwiddle[2*M+i].Re = y;
pTwiddle[2*M+i].Im = -x;
pTwiddle[4*M-i].Re = -x;
pTwiddle[4*M-i].Im = y;
pTwiddle[4*M+i].Re = -x;
pTwiddle[4*M+i].Im = -y;
pTwiddle[6*M-i].Re = y;
pTwiddle[6*M-i].Im = x;
}
}
else
{
if (order == 2)
{
pTwiddle[0].Re = x;
pTwiddle[0].Im = y;
pTwiddle[1].Re = -y;
pTwiddle[1].Im = xNeg;
pTwiddle[2].Re = xNeg;
pTwiddle[2].Im = y;
}
if (order == 1)
{
pTwiddle[0].Re = x;
pTwiddle[0].Im = y;
}
}
/* Update the structure */
pFFTStruct->N = N;
pFFTStruct->pTwiddle = pTwiddle;
pFFTStruct->pBitRev = pBitRev;
pFFTStruct->pBuf = pBuf;
return OMX_Sts_NoErr;
}
/*****************************************************************************
* END OF FILE
*****************************************************************************/

Просмотреть файл

@ -1,210 +0,0 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*
* This is a modification of omxSP_FFTInit_R_S32.c to support float
* instead of S32.
*/
#include "dl/api/armOMX.h"
#include "dl/api/omxtypes.h"
#include "dl/sp/api/armSP.h"
#include "dl/sp/api/omxSP.h"
/**
* Function: omxSP_FFTInit_R_F32
*
* Description:
* Initialize the real forward-FFT specification information struct.
*
* Remarks:
* This function is used to initialize the specification structures
* for functions <ippsFFTFwd_RToCCS_F32_Sfs> and
* <ippsFFTInv_CCSToR_F32_Sfs>. Memory for *pFFTSpec must be
* allocated prior to calling this function. The number of bytes
* required for *pFFTSpec can be determined using
* <FFTGetBufSize_R_F32>.
*
* Parameters:
* [in] order base-2 logarithm of the desired block length;
* valid in the range [1,12]. ([1,15] if
* BIG_FFT_TABLE is defined.)
* [out] pFFTFwdSpec pointer to the initialized specification structure.
*
* Return Value:
* Standard omxError result. See enumeration for possible result codes.
*
*/
OMXResult omxSP_FFTInit_R_F32(OMXFFTSpec_R_F32* pFFTSpec, OMX_INT order) {
OMX_INT i;
OMX_INT j;
OMX_FC32* pTwiddle;
OMX_FC32* pTwiddle1;
OMX_FC32* pTwiddle2;
OMX_FC32* pTwiddle3;
OMX_FC32* pTwiddle4;
OMX_F32* pBuf;
OMX_U16* pBitRev;
OMX_U32 pTmp;
OMX_INT Nby2;
OMX_INT N;
OMX_INT M;
OMX_INT diff;
OMX_INT step;
OMX_F32 x;
OMX_F32 y;
OMX_F32 xNeg;
ARMsFFTSpec_R_FC32* pFFTStruct = 0;
pFFTStruct = (ARMsFFTSpec_R_FC32 *) pFFTSpec;
/* Validate args */
if (!pFFTSpec || (order < 1) || (order > TWIDDLE_TABLE_ORDER))
return OMX_Sts_BadArgErr;
/* Do the initializations */
Nby2 = 1 << (order - 1);
N = Nby2 << 1;
/* optimized implementations don't use bitreversal */
pBitRev = NULL;
pTwiddle = (OMX_FC32 *) (sizeof(ARMsFFTSpec_R_SC32) + (OMX_S8*) pFFTSpec);
/* Align to 32 byte boundary */
pTmp = ((OMX_U32)pTwiddle) & 31;
if (pTmp)
pTwiddle = (OMX_FC32*) ((OMX_S8*)pTwiddle + (32 - pTmp));
pBuf = (OMX_F32*) (sizeof(OMX_FC32)*(5*N/8) + (OMX_S8*) pTwiddle);
/* Align to 32 byte boundary */
pTmp = ((OMX_U32)pBuf)&31; /* (OMX_U32)pBuf % 32 */
if (pTmp)
pBuf = (OMX_F32*) ((OMX_S8*)pBuf + (32 - pTmp));
/*
* Filling Twiddle factors :
*
* exp^(-j*2*PI*k/ (N/2) ) ; k=0,1,2,...,3/4(N/2)
*
* N/2 point complex FFT is used to compute N point real FFT The
* original twiddle table "armSP_FFT_F32TwiddleTable" is of size
* (MaxSize/8 + 1) Rest of the values i.e., upto MaxSize are
* calculated using the symmetries of sin and cos The max size of
* the twiddle table needed is 3/4(N/2) for a radix-4 stage
*
* W = (-2 * PI) / N
* N = 1 << order
* W = -PI >> (order - 1)
*/
M = Nby2 >> 3;
diff = TWIDDLE_TABLE_ORDER - (order - 1);
/* step into the twiddle table for the current order */
step = 1 << diff;
x = armSP_FFT_F32TwiddleTable[0];
y = armSP_FFT_F32TwiddleTable[1];
xNeg = 1;
if ((order - 1) >= 3) {
/* i = 0 case */
pTwiddle[0].Re = x;
pTwiddle[0].Im = y;
pTwiddle[2*M].Re = -y;
pTwiddle[2*M].Im = xNeg;
pTwiddle[4*M].Re = xNeg;
pTwiddle[4*M].Im = y;
for (i = 1; i <= M; i++) {
j = i*step;
x = armSP_FFT_F32TwiddleTable[2*j];
y = armSP_FFT_F32TwiddleTable[2*j+1];
pTwiddle[i].Re = x;
pTwiddle[i].Im = y;
pTwiddle[2*M-i].Re = -y;
pTwiddle[2*M-i].Im = -x;
pTwiddle[2*M+i].Re = y;
pTwiddle[2*M+i].Im = -x;
pTwiddle[4*M-i].Re = -x;
pTwiddle[4*M-i].Im = y;
pTwiddle[4*M+i].Re = -x;
pTwiddle[4*M+i].Im = -y;
pTwiddle[6*M-i].Re = y;
pTwiddle[6*M-i].Im = x;
}
} else if ((order - 1) == 2) {
pTwiddle[0].Re = x;
pTwiddle[0].Im = y;
pTwiddle[1].Re = -y;
pTwiddle[1].Im = xNeg;
pTwiddle[2].Re = xNeg;
pTwiddle[2].Im = y;
} else if ((order-1) == 1) {
pTwiddle[0].Re = x;
pTwiddle[0].Im = y;
}
/*
* Now fill the last N/4 values : exp^(-j*2*PI*k/N) ;
* k=1,3,5,...,N/2-1 These are used for the final twiddle fix-up for
* converting complex to real FFT
*/
M = N >> 3;
diff = TWIDDLE_TABLE_ORDER - order;
step = 1 << diff;
pTwiddle1 = pTwiddle + 3*N/8;
pTwiddle4 = pTwiddle1 + (N/4 - 1);
pTwiddle3 = pTwiddle1 + N/8;
pTwiddle2 = pTwiddle1 + (N/8 - 1);
x = armSP_FFT_F32TwiddleTable[0];
y = armSP_FFT_F32TwiddleTable[1];
xNeg = 1;
if (order >=3) {
for (i = 1; i <= M; i += 2) {
j = i*step;
x = armSP_FFT_F32TwiddleTable[2*j];
y = armSP_FFT_F32TwiddleTable[2*j+1];
pTwiddle1[0].Re = x;
pTwiddle1[0].Im = y;
pTwiddle1 += 1;
pTwiddle2[0].Re = -y;
pTwiddle2[0].Im = -x;
pTwiddle2 -= 1;
pTwiddle3[0].Re = y;
pTwiddle3[0].Im = -x;
pTwiddle3 += 1;
pTwiddle4[0].Re = -x;
pTwiddle4[0].Im = y;
pTwiddle4 -= 1;
}
} else {
if (order == 2) {
pTwiddle1[0].Re = -y;
pTwiddle1[0].Im = xNeg;
}
}
/* Update the structure */
pFFTStruct->N = N;
pFFTStruct->pTwiddle = pTwiddle;
pFFTStruct->pBitRev = pBitRev;
pFFTStruct->pBuf = pBuf;
return OMX_Sts_NoErr;
}

Просмотреть файл

@ -1,263 +0,0 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*
* This file was originally licensed as follows. It has been
* relicensed with permission from the copyright holders.
*/
/**
*
* File Name: omxSP_FFTInit_R_S16S32.c
* OpenMAX DL: v1.0.2
* Last Modified Revision: 7777
* Last Modified Date: Thu, 27 Sep 2007
*
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
*
*
* Description:
* Initialize the real forward-FFT specification information struct.
*/
#include "dl/api/armOMX.h"
#include "dl/api/omxtypes.h"
#include "dl/sp/api/armSP.h"
#include "dl/sp/api/omxSP.h"
/**
* Function: omxSP_FFTInit_R_S16_S32
*
* Description:
* Initialize the real forward-FFT specification information struct.
*
* Remarks:
* This function is used to initialize the specification structures
* for functions <ippsFFTFwd_RToCCS_S16_S32_Sfs> and
* <ippsFFTInv_CCSToR_S32_S16_Sfs>. Memory for *pFFTSpec must be
* allocated prior to calling this function. The number of bytes
* required for *pFFTSpec can be determined using
* <FFTGetBufSize_R_S16_S32>.
*
* Parameters:
* [in] order base-2 logarithm of the desired block length;
* valid in the range [0,12].
* [out] pFFTFwdSpec pointer to the initialized specification structure.
*
* Return Value:
* Standard omxError result. See enumeration for possible result codes.
*
*/
OMXResult omxSP_FFTInit_R_S16S32(
OMXFFTSpec_R_S16S32* pFFTSpec,
OMX_INT order
)
{
OMX_INT i,j;
OMX_SC32 *pTwiddle,*pTwiddle1,*pTwiddle2,*pTwiddle3,*pTwiddle4;
OMX_S32 *pBuf;
OMX_U16 *pBitRev;
OMX_U32 pTmp;
OMX_INT Nby2,N,M,diff, step;
OMX_S32 x,y,xNeg;
ARMsFFTSpec_R_SC32 *pFFTStruct = 0;
pFFTStruct = (ARMsFFTSpec_R_SC32 *) pFFTSpec;
/* if order zero no init is needed */
if (order == 0)
{
pFFTStruct->N = 1;
pFFTStruct->pTwiddle = NULL;
pFFTStruct->pBuf = (OMX_S32 *)
(sizeof(ARMsFFTSpec_R_SC32) + (OMX_S8*) pFFTSpec);
return OMX_Sts_NoErr;
}
/* Do the initializations */
Nby2 = 1 << (order - 1);
N = Nby2 << 1;
pBitRev = NULL ; /* optimized implementations don't use bitreversal */
pTwiddle = (OMX_SC32 *)
(sizeof(ARMsFFTSpec_R_SC32) + (OMX_S8*) pFFTSpec);
/* Align to 32 byte boundary */
pTmp = ((OMX_U32)pTwiddle)&31; /* (OMX_U32)pTwiddle % 32 */
if(pTmp != 0)
pTwiddle = (OMX_SC32*) ((OMX_S8*)pTwiddle + (32-pTmp));
pBuf = (OMX_S32*)
(sizeof(OMX_SC32) * (5*N/8) + (OMX_S8*) pTwiddle);
/* Align to 32 byte boundary */
pTmp = ((OMX_U32)pBuf)&31; /* (OMX_U32)pBuf % 32 */
if(pTmp != 0)
pBuf = (OMX_S32*) ((OMX_S8*)pBuf + (32-pTmp));
/*
* Filling Twiddle factors : exp^(-j*2*PI*k/ (N/2) ) ; k=0,1,2,...,3/4(N/2)
* N/2 point complex FFT is used to compute N point real FFT
* The original twiddle table "armSP_FFT_S32TwiddleTable" is of size (MaxSize/8 + 1)
* Rest of the values i.e., upto MaxSize are calculated using the symmetries of sin and cos
* The max size of the twiddle table needed is 3/4(N/2) for a radix-4 stage
*
* W = (-2 * PI) / N
* N = 1 << order
* W = -PI >> (order - 1)
*/
M = Nby2>>3;
diff = 12 - (order-1);
step = 1<<diff; /* step into the twiddle table for the current order */
x = armSP_FFT_S32TwiddleTable[0];
y = armSP_FFT_S32TwiddleTable[1];
xNeg = 0x7FFFFFFF;
if((order-1) >=3)
{
/* i = 0 case */
pTwiddle[0].Re = x;
pTwiddle[0].Im = y;
pTwiddle[2*M].Re = -y;
pTwiddle[2*M].Im = xNeg;
pTwiddle[4*M].Re = xNeg;
pTwiddle[4*M].Im = y;
for (i=1; i<=M; i++)
{
j = i*step;
x = armSP_FFT_S32TwiddleTable[2*j];
y = armSP_FFT_S32TwiddleTable[2*j+1];
pTwiddle[i].Re = x;
pTwiddle[i].Im = y;
pTwiddle[2*M-i].Re = -y;
pTwiddle[2*M-i].Im = -x;
pTwiddle[2*M+i].Re = y;
pTwiddle[2*M+i].Im = -x;
pTwiddle[4*M-i].Re = -x;
pTwiddle[4*M-i].Im = y;
pTwiddle[4*M+i].Re = -x;
pTwiddle[4*M+i].Im = -y;
pTwiddle[6*M-i].Re = y;
pTwiddle[6*M-i].Im = x;
}
}
else
{
if ((order-1) == 2)
{
pTwiddle[0].Re = x;
pTwiddle[0].Im = y;
pTwiddle[1].Re = -y;
pTwiddle[1].Im = xNeg;
pTwiddle[2].Re = xNeg;
pTwiddle[2].Im = y;
}
if ((order-1) == 1)
{
pTwiddle[0].Re = x;
pTwiddle[0].Im = y;
}
}
/*
* Now fill the last N/4 values : exp^(-j*2*PI*k/N) ; k=1,3,5,...,N/2-1
* These are used for the final twiddle fix-up for converting complex to real FFT
*/
M = N>>3;
diff = 12 - order;
step = 1<<diff;
pTwiddle1 = pTwiddle + 3*N/8;
pTwiddle4 = pTwiddle1 + (N/4-1);
pTwiddle3 = pTwiddle1 + N/8;
pTwiddle2 = pTwiddle1 + (N/8-1);
x = armSP_FFT_S32TwiddleTable[0];
y = armSP_FFT_S32TwiddleTable[1];
xNeg = 0x7FFFFFFF;
if((order) >=3)
{
for (i=1; i<=M; i+=2 )
{
j = i*step;
x = armSP_FFT_S32TwiddleTable[2*j];
y = armSP_FFT_S32TwiddleTable[2*j+1];
pTwiddle1[0].Re = x;
pTwiddle1[0].Im = y;
pTwiddle1 += 1;
pTwiddle2[0].Re = -y;
pTwiddle2[0].Im = -x;
pTwiddle2 -= 1;
pTwiddle3[0].Re = y;
pTwiddle3[0].Im = -x;
pTwiddle3 += 1;
pTwiddle4[0].Re = -x;
pTwiddle4[0].Im = y;
pTwiddle4 -= 1;
}
}
else
{
if (order == 2)
{
pTwiddle1[0].Re = -y;
pTwiddle1[0].Im = xNeg;
}
}
/* Update the structure */
pFFTStruct->N = N;
pFFTStruct->pTwiddle = pTwiddle;
pFFTStruct->pBitRev = pBitRev;
pFFTStruct->pBuf = pBuf;
return OMX_Sts_NoErr;
}
/*****************************************************************************
* END OF FILE
*****************************************************************************/

Просмотреть файл

@ -1,261 +0,0 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*
* This file was originally licensed as follows. It has been
* relicensed with permission from the copyright holders.
*/
/**
*
* File Name: omxSP_FFTInit_R_S32.c
* OpenMAX DL: v1.0.2
* Last Modified Revision: 7777
* Last Modified Date: Thu, 27 Sep 2007
*
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
*
*
* Description:
* Initialize the real forward-FFT specification information struct.
*/
#include "dl/api/armOMX.h"
#include "dl/api/omxtypes.h"
#include "dl/sp/api/armSP.h"
#include "dl/sp/api/omxSP.h"
/**
* Function: omxSP_FFTInit_R_S32
*
* Description:
* Initialize the real forward-FFT specification information struct.
*
* Remarks:
* This function is used to initialize the specification structures
* for functions <ippsFFTFwd_RToCCS_S32_Sfs> and
* <ippsFFTInv_CCSToR_S32_Sfs>. Memory for *pFFTSpec must be
* allocated prior to calling this function. The number of bytes
* required for *pFFTSpec can be determined using
* <FFTGetBufSize_R_S32>.
*
* Parameters:
* [in] order base-2 logarithm of the desired block length;
* valid in the range [0,12].
* [out] pFFTFwdSpec pointer to the initialized specification structure.
*
* Return Value:
* Standard omxError result. See enumeration for possible result codes.
*
*/
OMXResult omxSP_FFTInit_R_S32(
OMXFFTSpec_R_S32* pFFTSpec,
OMX_INT order
)
{
OMX_INT i,j;
OMX_SC32 *pTwiddle,*pTwiddle1,*pTwiddle2,*pTwiddle3,*pTwiddle4;
OMX_S32 *pBuf;
OMX_U16 *pBitRev;
OMX_U32 pTmp;
OMX_INT Nby2,N,M,diff, step;
OMX_S32 x,y,xNeg;
ARMsFFTSpec_R_SC32 *pFFTStruct = 0;
pFFTStruct = (ARMsFFTSpec_R_SC32 *) pFFTSpec;
/* if order zero no init is needed */
if (order == 0)
{
pFFTStruct->N = 1;
pFFTStruct->pTwiddle = NULL;
pFFTStruct->pBuf = (OMX_S32 *)
(sizeof(ARMsFFTSpec_R_SC32) + (OMX_S8*) pFFTSpec);
return OMX_Sts_NoErr;
}
/* Do the initializations */
Nby2 = 1 << (order - 1);
N = Nby2 << 1;
pBitRev = NULL ; /* optimized implementations don't use bitreversal */
pTwiddle = (OMX_SC32 *)
(sizeof(ARMsFFTSpec_R_SC32) + (OMX_S8*) pFFTSpec);
/* Align to 32 byte boundary */
pTmp = ((OMX_U32)pTwiddle)&31; /* (OMX_U32)pTwiddle % 32 */
if(pTmp != 0)
pTwiddle = (OMX_SC32*) ((OMX_S8*)pTwiddle + (32-pTmp));
pBuf = (OMX_S32*)
(sizeof(OMX_SC32) * (5*N/8) + (OMX_S8*) pTwiddle);
/* Align to 32 byte boundary */
pTmp = ((OMX_U32)pBuf)&31; /* (OMX_U32)pBuf % 32 */
if(pTmp != 0)
pBuf = (OMX_S32*) ((OMX_S8*)pBuf + (32-pTmp));
/*
* Filling Twiddle factors : exp^(-j*2*PI*k/ (N/2) ) ; k=0,1,2,...,3/4(N/2)
* N/2 point complex FFT is used to compute N point real FFT
* The original twiddle table "armSP_FFT_S32TwiddleTable" is of size (MaxSize/8 + 1)
* Rest of the values i.e., upto MaxSize are calculated using the symmetries of sin and cos
* The max size of the twiddle table needed is 3/4(N/2) for a radix-4 stage
*
* W = (-2 * PI) / N
* N = 1 << order
* W = -PI >> (order - 1)
*/
M = Nby2>>3;
diff = 12 - (order-1);
step = 1<<diff; /* step into the twiddle table for the current order */
x = armSP_FFT_S32TwiddleTable[0];
y = armSP_FFT_S32TwiddleTable[1];
xNeg = 0x7FFFFFFF;
if((order-1) >=3)
{
/* i = 0 case */
pTwiddle[0].Re = x;
pTwiddle[0].Im = y;
pTwiddle[2*M].Re = -y;
pTwiddle[2*M].Im = xNeg;
pTwiddle[4*M].Re = xNeg;
pTwiddle[4*M].Im = y;
for (i=1; i<=M; i++)
{
j = i*step;
x = armSP_FFT_S32TwiddleTable[2*j];
y = armSP_FFT_S32TwiddleTable[2*j+1];
pTwiddle[i].Re = x;
pTwiddle[i].Im = y;
pTwiddle[2*M-i].Re = -y;
pTwiddle[2*M-i].Im = -x;
pTwiddle[2*M+i].Re = y;
pTwiddle[2*M+i].Im = -x;
pTwiddle[4*M-i].Re = -x;
pTwiddle[4*M-i].Im = y;
pTwiddle[4*M+i].Re = -x;
pTwiddle[4*M+i].Im = -y;
pTwiddle[6*M-i].Re = y;
pTwiddle[6*M-i].Im = x;
}
}
else
{
if ((order-1) == 2)
{
pTwiddle[0].Re = x;
pTwiddle[0].Im = y;
pTwiddle[1].Re = -y;
pTwiddle[1].Im = xNeg;
pTwiddle[2].Re = xNeg;
pTwiddle[2].Im = y;
}
if ((order-1) == 1)
{
pTwiddle[0].Re = x;
pTwiddle[0].Im = y;
}
}
/*
* Now fill the last N/4 values : exp^(-j*2*PI*k/N) ; k=1,3,5,...,N/2-1
* These are used for the final twiddle fix-up for converting complex to real FFT
*/
M = N>>3;
diff = 12 - order;
step = 1<<diff;
pTwiddle1 = pTwiddle + 3*N/8;
pTwiddle4 = pTwiddle1 + (N/4-1);
pTwiddle3 = pTwiddle1 + N/8;
pTwiddle2 = pTwiddle1 + (N/8-1);
x = armSP_FFT_S32TwiddleTable[0];
y = armSP_FFT_S32TwiddleTable[1];
xNeg = 0x7FFFFFFF;
if((order) >=3)
{
for (i=1; i<=M; i+=2 )
{
j = i*step;
x = armSP_FFT_S32TwiddleTable[2*j];
y = armSP_FFT_S32TwiddleTable[2*j+1];
pTwiddle1[0].Re = x;
pTwiddle1[0].Im = y;
pTwiddle1 += 1;
pTwiddle2[0].Re = -y;
pTwiddle2[0].Im = -x;
pTwiddle2 -= 1;
pTwiddle3[0].Re = y;
pTwiddle3[0].Im = -x;
pTwiddle3 += 1;
pTwiddle4[0].Re = -x;
pTwiddle4[0].Im = y;
pTwiddle4 -= 1;
}
}
else
{
if (order == 2)
{
pTwiddle1[0].Re = -y;
pTwiddle1[0].Im = xNeg;
}
}
/* Update the structure */
pFFTStruct->N = N;
pFFTStruct->pTwiddle = pTwiddle;
pFFTStruct->pBitRev = pBitRev;
pFFTStruct->pBuf = pBuf;
return OMX_Sts_NoErr;
}
/*****************************************************************************
* END OF FILE
*****************************************************************************/

Просмотреть файл

@ -1,283 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This is a modification of omxSP_FFTInv_CCSToR_S32_Sfs_s.s
@// to support float instead of SC32.
@//
@//
@// Description:
@// Compute an inverse FFT for a complex signal
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
.extern armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe
.extern armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@// Import symbols required from other files
@// (For example tables)
.extern armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
@//Input Registers
#define pSrc r0
#define pDst r1
#define pFFTSpec r2
#define scale r3
@// Output registers
#define result r0
@//Local Scratch Registers
#define argTwiddle r1
#define argDst r2
#define argScale r4
#define tmpOrder r4
#define pTwiddle r4
#define pOut r5
#define subFFTSize r7
#define subFFTNum r6
#define N r6
#define order r14
#define diff r9
@// Total num of radix stages required to comple the FFT
#define count r8
#define x0r r4
#define x0i r5
#define diffMinusOne r2
#define round r3
#define pOut1 r2
#define size r7
#define step r8
#define step1 r9
#define twStep r10
#define pTwiddleTmp r11
#define argTwiddle1 r12
#define zero r14
@// Neon registers
#define dX0 D0.F32
#define dShift D1.F32
#define dX1 D1.F32
#define dY0 D2.F32
#define dY1 D3.F32
#define dX0r D0.F32
#define dX0i D1.F32
#define dX1r D2.F32
#define dX1i D3.F32
#define dW0r D4.F32
#define dW0i D5.F32
#define dW1r D6.F32
#define dW1i D7.F32
#define dT0 D8.F32
#define dT1 D9.F32
#define dT2 D10.F32
#define dT3 D11.F32
#define qT0 d12.F32
#define qT1 d14.F32
#define qT2 d16.F32
#define qT3 d18.F32
#define dY0r D4.F32
#define dY0i D5.F32
#define dY1r D6.F32
#define dY1i D7.F32
#define dzero D20.F32
#define dY2 D4.F32
#define dY3 D5.F32
#define dW0 D6.F32
#define dW1 D7.F32
#define dW0Tmp D10.F32
#define dW1Neg D11.F32
#define sN S0.S32
#define fN S1.F32
@// one must be the same as dScale[0]!
#define dScale D2.F32
#define one S4.F32
@// Allocate stack memory required by the function
M_ALLOC4 complexFFTSize, 4
@// Write function header
M_START omxSP_FFTInv_CCSToR_F32_Sfs,r11,d15
@ Structure offsets for the FFTSpec
.set ARMsFFTSpec_N, 0
.set ARMsFFTSpec_pBitRev, 4
.set ARMsFFTSpec_pTwiddle, 8
.set ARMsFFTSpec_pBuf, 12
@// Define stack arguments
@// Read the size from structure and take log
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
@// Read other structure parameters
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
@// N=1 Treat seperately
CMP N,#1
BGT sizeGreaterThanOne
VLD1 dX0[0],[pSrc]
VST1 dX0[0],[pDst]
B End
sizeGreaterThanOne:
@// Call the preTwiddle Radix2 stage before doing the compledIFFT
BL armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe
complexIFFT:
ASR N,N,#1 @// N/2 point complex IFFT
M_STR N, complexFFTSize @ Save N for scaling later
ADD pSrc,pOut,N,LSL #3 @// set pSrc as pOut1
CLZ order,N @// N = 2^order
RSB order,order,#31
MOV subFFTSize,#1
@//MOV subFFTNum,N
CMP order,#3
BGT orderGreaterthan3 @// order > 3
CMP order,#1
BGE orderGreaterthan0 @// order > 0
VLD1 dX0,[pSrc]
VST1 dX0,[pDst]
MOV pSrc,pDst
BLT FFTEnd
orderGreaterthan0:
@// set the buffers appropriately for various orders
CMP order,#2
MOVNE argDst,pDst
MOVEQ argDst,pOut
@// Pass the first stage destination in RN5
MOVEQ pOut,pDst
MOV argTwiddle,pTwiddle
BGE orderGreaterthan1
BLLT armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe @// order = 1
B FFTEnd
orderGreaterthan1:
MOV tmpOrder,order @// tmpOrder = RN 4
BL armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
CMP tmpOrder,#2
BLGT armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe
BL armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
B FFTEnd
orderGreaterthan3:
specialScaleCase:
@// Set input args to fft stages
TST order, #2
MOVNE argDst,pDst
MOVEQ argDst,pOut
@// Pass the first stage destination in RN5
MOVEQ pOut,pDst
MOV argTwiddle,pTwiddle
@//check for even or odd order
@// NOTE: The following combination of BL's would work fine even though
@// the first BL would corrupt the flags. This is because the end of
@// the "grpZeroSetLoop" loop inside
@// armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag
@// to EQ
TST order,#0x00000001
BLEQ armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
BLNE armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
CMP subFFTNum,#4
BLT FFTEnd
unscaledRadix4Loop:
BEQ lastStageUnscaledRadix4
BL armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe
CMP subFFTNum,#4
B unscaledRadix4Loop
lastStageUnscaledRadix4:
BL armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
B FFTEnd
FFTEnd: @// Does only the scaling
@ Scale inverse FFT result by 1/N
M_LDR N, complexFFTSize
VMOV sN,N
VCVT fN, sN @ fn = fftSize, as a float
VMOV one, 1.0
VDIV one, one, fN @ one = dScale[0] = 1 / fftSize
@// N = subFFTSize ; dataptr = pDst
scaleFFTData:
VLD1 {dX0},[pSrc] @// pSrc contains pDst pointer
SUBS subFFTSize,subFFTSize,#1
VMUL dX0, dX0, dScale[0]
VST1 {dX0},[pSrc]!
BGT scaleFFTData
End:
@// Set return value
MOV result, #OMX_Sts_NoErr
@// Write function tail
M_END
.end

Просмотреть файл

@ -1,146 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: omxSP_FFTInv_CCSToR_S32S16_Sfs_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 7098
@// Last Modified Date: Thu, 16 Aug 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute an inverse FFT for a complex signal
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
.extern omxSP_FFTInv_CCSToR_S32_Sfs
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@// Import symbols required from other files
@// (For example tables)
@//Input Registers
#define pSrc r0
#define pDst r1
#define pFFTSpec r2
#define scale r3
@// Output registers
#define result r0
#define N r6
#define pOut r5
#define pTmpDst r4
@// Neon registers
#define dX0 D0.S32
#define dX01 D1.S32
#define qX0 Q0.S32
#define dY0 D2.S16
#define dY0S32 D2.S32
@// Allocate stack memory required by the function
@// Write function header
M_START omxSP_FFTInv_CCSToR_S32S16_Sfs,r11,d15
.set ARMsFFTSpec_N, 0
.set ARMsFFTSpec_pBitRev, 4
.set ARMsFFTSpec_pTwiddle, 8
.set ARMsFFTSpec_pBuf, 12
@// Define stack arguments
@// Read the size from structure and take log
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
@// Read other structure parameters
@//LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
MOV pTmpDst,pDst
ADD pDst,pOut,N, LSL #2
BL omxSP_FFTInv_CCSToR_S32_Sfs
ADD pDst,pOut,N, LSL #2
CMP N,#2
BGT copyLoop
BEQ copyS32ToS16
VLD1 dX0[0],[pDst]
VQMOVN dY0,qX0
VST1 dY0[0],[pTmpDst]
B End
copyS32ToS16:
VLD1 dX0,[pDst]
VQMOVN dY0,qX0
VST1 dY0S32[0],[pTmpDst]
B End
copyLoop:
VLD1 {dX0,dX01},[pDst]!
SUBS N,N,#4
VQMOVN dY0,qX0
VST1 dY0,[pTmpDst]!
BGT copyLoop
End:
@// Set return value
MOV result, #OMX_Sts_NoErr
@// Write function tail
M_END
.end

Просмотреть файл

@ -1,390 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: omxSP_FFTInv_CCSToR_S32_Sfs_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 7469
@// Last Modified Date: Thu, 20 Sep 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute an inverse FFT for a complex signal
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe
.extern armSP_FFTInv_CCSToR_S32_Sfs_preTwiddleRadix2_unsafe
.extern armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@// Import symbols required from other files
@// (For example tables)
.extern armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
@//Input Registers
#define pSrc r0
#define pDst r1
#define pFFTSpec r2
#define scale r3
@// Output registers
#define result r0
@//Local Scratch Registers
#define argTwiddle r1
#define argDst r2
#define argScale r4
#define tmpOrder r4
#define pTwiddle r4
#define pOut r5
#define subFFTSize r7
#define subFFTNum r6
#define N r6
#define order r14
#define diff r9
@// Total num of radix stages required to comple the FFT
#define count r8
#define x0r r4
#define x0i r5
#define diffMinusOne r2
#define round r3
#define pOut1 r2
#define size r7
#define step r8
#define step1 r9
#define twStep r10
#define pTwiddleTmp r11
#define argTwiddle1 r12
#define zero r14
@// Neon registers
#define dX0 D0.S32
#define dShift D1.S32
#define dX1 D1.S32
#define dY0 D2.S32
#define dY1 D3.S32
#define dX0r D0.S32
#define dX0i D1.S32
#define dX1r D2.S32
#define dX1i D3.S32
#define dW0r D4.S32
#define dW0i D5.S32
#define dW1r D6.S32
#define dW1i D7.S32
#define dT0 D8.S32
#define dT1 D9.S32
#define dT2 D10.S32
#define dT3 D11.S32
#define qT0 Q6.S64
#define qT1 Q7.S64
#define qT2 Q8.S64
#define qT3 Q9.S64
#define dY0r D4.S32
#define dY0i D5.S32
#define dY1r D6.S32
#define dY1i D7.S32
#define dzero D20.S32
#define dY2 D4.S32
#define dY3 D5.S32
#define dW0 D6.S32
#define dW1 D7.S32
#define dW0Tmp D10.S32
#define dW1Neg D11.S32
@// Allocate stack memory required by the function
M_ALLOC4 diffOnStack, 4
@// Write function header
M_START omxSP_FFTInv_CCSToR_S32_Sfs,r11,d15
@ Structure offsets for the FFTSpec
.set ARMsFFTSpec_N, 0
.set ARMsFFTSpec_pBitRev, 4
.set ARMsFFTSpec_pTwiddle, 8
.set ARMsFFTSpec_pBuf, 12
@// Define stack arguments
@// Read the size from structure and take log
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
@// Read other structure parameters
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
@// N=1 Treat seperately
CMP N,#1
BGT sizeGreaterThanOne
VLD1 dX0[0],[pSrc]
RSB scale,scale,#0 @// to use VRSHL for right shift by a variable
VMOV dShift[0],scale
VRSHL dX0,dShift
VST1 dX0[0],[pDst]
B End
sizeGreaterThanOne:
@// Call the preTwiddle Radix2 stage before doing the compledIFFT
@// The following conditional BL combination would work since
@// evenOddButterflyLoop in the first call would set Z flag to zero
CMP scale,#0
BLEQ armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe
BLGT armSP_FFTInv_CCSToR_S32_Sfs_preTwiddleRadix2_unsafe
complexIFFT:
ASR N,N,#1 @// N/2 point complex IFFT
ADD pSrc,pOut,N,LSL #3 @// set pSrc as pOut1
CLZ order,N @// N = 2^order
RSB order,order,#31
MOV subFFTSize,#1
@//MOV subFFTNum,N
ADD scale,scale,order @// FFTInverse has a final scaling factor by N
CMP order,#3
BGT orderGreaterthan3 @// order > 3
CMP order,#1
BGE orderGreaterthan0 @// order > 0
M_STR scale, diffOnStack,LT @// order = 0
VLD1 dX0,[pSrc]
VST1 dX0,[pDst]
MOV pSrc,pDst
BLT FFTEnd
orderGreaterthan0:
@// set the buffers appropriately for various orders
CMP order,#2
MOVNE argDst,pDst
MOVEQ argDst,pOut
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
MOV argTwiddle,pTwiddle
@// Store the scale factor and scale at the end
SUB diff,scale,order
M_STR diff, diffOnStack
BGE orderGreaterthan1
BLLT armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// order = 1
B FFTEnd
orderGreaterthan1:
MOV tmpOrder,order @// tmpOrder = RN 4
BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
CMP tmpOrder,#2
BLGT armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
B FFTEnd
orderGreaterthan3:
@// check scale = 0 or scale = order
SUBS diff, scale, order @// scale > order
MOVGT scale,order
BGE specialScaleCase @// scale = 0 or scale = order
CMP scale,#0
BEQ specialScaleCase
B generalScaleCase
specialScaleCase: @// scale = 0 or scale = order and order >= 2
TST order, #2 @// Set input args to fft stages
MOVNE argDst,pDst
MOVEQ argDst,pOut
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
MOV argTwiddle,pTwiddle
CMP diff,#0
M_STR diff, diffOnStack
BGE scaleEqualsOrder
@//check for even or odd order
@// NOTE: The following combination of BL's would work fine eventhough the first
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
@// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
TST order,#0x00000001
BLEQ armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
BLNE armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
CMP subFFTNum,#4
BLT FFTEnd
unscaledRadix4Loop:
BEQ lastStageUnscaledRadix4
BL armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
CMP subFFTNum,#4
B unscaledRadix4Loop
lastStageUnscaledRadix4:
BL armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
B FFTEnd
scaleEqualsOrder:
@//check for even or odd order
@// NOTE: The following combination of BL's would work fine eventhough the first
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
@// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
TST order,#0x00000001
BLEQ armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
BLNE armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
CMP subFFTNum,#4
BLT FFTEnd
scaledRadix4Loop:
BEQ lastStageScaledRadix4
BL armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
CMP subFFTNum,#4
B scaledRadix4Loop
lastStageScaledRadix4:
BL armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
B FFTEnd
generalScaleCase: @// 0 < scale < order and order >= 2
@// Determine the correct destination buffer
SUB diff,order,scale
TST diff,#0x01
ADDEQ count,scale,diff,LSR #1 @// count = scale + (order - scale)/2
MOVNE count,order
TST count,#0x01 @// Is count even or odd ?
MOVNE argDst,pDst @// Set input args to fft stages
MOVEQ argDst,pOut
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
MOV argTwiddle,pTwiddle
M_STR diff, diffOnStack
MOV argScale,scale @// Put scale in RN4 so as to save and restore
BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage
SUBS argScale,argScale,#1
scaledRadix2Loop:
BLGT armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages
BGT scaledRadix2Loop
M_LDR diff, diffOnStack
@//check for even or odd order
TST diff,#0x00000001
BEQ generalUnscaledRadix4Loop
B unscaledRadix2Loop
generalUnscaledRadix4Loop:
CMP subFFTNum,#4
BEQ generalLastStageUnscaledRadix4
BL armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
B generalUnscaledRadix4Loop
generalLastStageUnscaledRadix4:
BL armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
B End
unscaledRadix2Loop:
CMP subFFTNum,#2
BEQ generalLastStageUnscaledRadix2
BL armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe
B unscaledRadix2Loop
generalLastStageUnscaledRadix2:
BL armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
B End
FFTEnd: @// Does only the scaling
M_LDR diff, diffOnStack
CMP diff,#0
BLE End
RSB diff,diff,#0 @// to use VRSHL for right shift by a variable
VDUP dShift,diff
scaleFFTData: @// N = subFFTSize ; dataptr = pDst ; scale = diff
VLD1 {dX0},[pSrc] @// pSrc contains pDst pointer
SUBS subFFTSize,subFFTSize,#1
VRSHL dX0,dShift
VST1 {dX0},[pSrc]!
BGT scaleFFTData
End:
@// Set return value
MOV result, #OMX_Sts_NoErr
@// Write function tail
M_END
.end

Просмотреть файл

@ -1,214 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This is a modification of armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.s
@// to support float instead of SC32.
@//
@//
@// Description:
@// Compute an inverse FFT for a complex signal
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
.extern armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@// Import symbols required from other files
@// (For example tables)
.extern armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
@//Input Registers
#define pSrc r0
#define pDst r1
#define pFFTSpec r2
@// Output registers
#define result r0
@//Local Scratch Registers
#define argTwiddle r1
#define argDst r2
#define argScale r4
#define tmpOrder r4
#define pTwiddle r4
#define pOut r5
#define subFFTSize r7
#define subFFTNum r6
#define N r6
#define order r14
#define diff r9
@// Total num of radix stages required to comple the FFT
#define count r8
#define x0r r4
#define x0i r5
#define diffMinusOne r2
@// Neon registers
#define dX0 D0.F32
#define qX0 Q0.F32
#define sN S0.S32
#define fN S1.F32
@// one must be the same as dScale[0]!
#define dScale D4.F32
#define one S8.F32
@// Allocate stack memory required by the function
M_ALLOC4 fftSize, 4
@// Write function header
M_START omxSP_FFTInv_CToC_FC32_Sfs,r11,d15
@ Structure offsets for the FFTSpec
.set ARMsFFTSpec_N, 0
.set ARMsFFTSpec_pBitRev, 4
.set ARMsFFTSpec_pTwiddle, 8
.set ARMsFFTSpec_pBuf, 12
@// Define stack arguments
@// Read the size from structure and take log
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
M_STR N, fftSize
@// Read other structure parameters
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
CLZ order,N @// N = 2^order
RSB order,order,#31
MOV subFFTSize,#1
@//MOV subFFTNum,N
CMP order,#3
BGT orderGreaterthan3 @// order > 3
CMP order,#1
BGE orderGreaterthan0 @// order > 0
VLD1 dX0,[pSrc]
VST1 dX0,[pDst]
MOV pSrc,pDst
BLT FFTEnd
orderGreaterthan0:
@// set the buffers appropriately for various orders
CMP order,#2
MOVNE argDst,pDst
MOVEQ argDst,pOut
@// Pass the first stage destination in RN5
MOVEQ pOut,pDst
MOV argTwiddle,pTwiddle
BGE orderGreaterthan1
@// order = 1
BLLT armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
B FFTEnd
orderGreaterthan1:
MOV tmpOrder,order @// tmpOrder = RN 4
BL armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
CMP tmpOrder,#2
BLGT armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe
BL armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
B FFTEnd
orderGreaterthan3:
@// Set input args to fft stages
TST order, #2
MOVNE argDst,pDst
MOVEQ argDst,pOut
@// Pass the first stage destination in RN5
MOVEQ pOut,pDst
MOV argTwiddle,pTwiddle
@//check for even or odd order
@// NOTE: The following combination of BL's would work fine even though
@// the first BL would corrupt the flags. This is because the end of
@// the "grpZeroSetLoop" loop inside
@// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag
@// to EQ
TST order,#0x00000001
BLEQ armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
BLNE armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
CMP subFFTNum,#4
BLT FFTEnd
unscaledRadix4Loop:
BEQ lastStageUnscaledRadix4
BL armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe
CMP subFFTNum,#4
B unscaledRadix4Loop
lastStageUnscaledRadix4:
BL armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
B FFTEnd
FFTEnd: @// Does only the scaling
M_LDR N, fftSize
VMOV sN,N
VCVT fN, sN @ fn = fftSize, as a float
VMOV one, 1.0
VDIV one, one, fN @ one = dScale[0] = 1 / fftSize
@ Scale data, doing 2 complex values at a time (because N is
@ always even).
@// N = subFFTSize ; dataptr = pDst ; scale = diff
scaleFFTData:
VLD1 {qX0},[pSrc :128] @// pSrc contains pDst pointer
SUBS subFFTSize,subFFTSize,#2
VMUL qX0, qX0, dScale[0]
VST1 {qX0},[pSrc :128]!
BGT scaleFFTData
End:
@// Set return value
MOV result, #OMX_Sts_NoErr
@// Write function tail
M_END
.end

Просмотреть файл

@ -1,342 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: omxSP_FFTInv_CToC_SC16_Sfs_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 6729
@// Last Modified Date: Tue, 17 Jul 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute an inverse FFT for a complex signal
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
.extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
.extern armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
@//Input Registers
#define pSrc r0
#define pDst r1
#define pFFTSpec r2
#define scale r3
@// Output registers
#define result r0
@//Local Scratch Registers
#define argTwiddle r1
#define argDst r2
#define argScale r4
#define pTwiddle r4
#define tmpOrder r4
#define pOut r5
#define subFFTSize r7
#define subFFTNum r6
#define N r6
#define order r14
#define diff r9
@// Total num of radix stages required to comple the FFT
#define count r8
#define x0r r4
#define x0i r5
#define diffMinusOne r2
#define round r3
@// Neon registers
#define dX0 D0.S16
#define dShift D1.S16
#define dX0S32 D0.S32
@// Allocate stack memory required by the function
M_ALLOC4 diffOnStack, 4
@// Write function header
M_START omxSP_FFTInv_CToC_SC16_Sfs,r11,d15
@ Structure offsets for the FFTSpec
.set ARMsFFTSpec_N, 0
.set ARMsFFTSpec_pBitRev, 4
.set ARMsFFTSpec_pTwiddle, 8
.set ARMsFFTSpec_pBuf, 12
@// Define stack arguments
@// Read the size from structure and take log
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
@// Read other structure parameters
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
CLZ order,N @// N = 2^order
RSB order,order,#31
MOV subFFTSize,#1
@//MOV subFFTNum,N
ADD scale,scale,order @// FFTInverse has a final scaling factor by N
CMP order,#3
BGT orderGreaterthan3 @// order > 3
CMP order,#1
BGE orderGreaterthan0 @// order > 0
M_STR scale, diffOnStack,LT @// order = 0
LDRLT x0r,[pSrc]
STRLT x0r,[pDst]
MOVLT pSrc,pDst
BLT FFTEnd
orderGreaterthan0:
@// set the buffers appropriately for various orders
CMP order,#2
MOVNE argDst,pDst
MOVEQ argDst,pOut
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
MOV argTwiddle,pTwiddle
@// Store the scale factor and scale at the end
SUB diff,scale,order
M_STR diff, diffOnStack
BGE orderGreaterthan1
BLLT armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @// order = 1
B FFTEnd
orderGreaterthan1:
MOV tmpOrder,order @// tmpOrder = RN 4
BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
CMP tmpOrder,#2
BLGT armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
B FFTEnd
orderGreaterthan3:
@// check scale = 0 or scale = order
SUBS diff, scale, order @// scale > order
MOVGT scale,order
BGE specialScaleCase @// scale = 0 or scale = order
CMP scale,#0
BEQ specialScaleCase
B generalScaleCase
specialScaleCase: @// scale = 0 or scale = order and order > 3
TST order, #2 @// Set input args to fft stages
MOVNE argDst,pDst
MOVEQ argDst,pOut
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
MOV argTwiddle,pTwiddle
CMP diff,#0
M_STR diff, diffOnStack
BGE scaleEqualsOrder
@//check for even or odd order
@// NOTE: The following combination of BL's would work fine eventhough the first
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
@// armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
TST order,#0x00000001
BLEQ armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
BLNE armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
CMP subFFTNum,#4
BLT FFTEnd
unscaledRadix4Loop:
BEQ lastStageUnscaledRadix4
BL armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe
CMP subFFTNum,#4
B unscaledRadix4Loop
lastStageUnscaledRadix4:
BL armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
B FFTEnd
scaleEqualsOrder:
@//check for even or odd order
@// NOTE: The following combination of BL's would work fine eventhough the first
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
@// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
TST order,#0x00000001
BLEQ armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
BLNE armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
CMP subFFTNum,#4
BLT FFTEnd
scaledRadix4Loop:
BEQ lastStageScaledRadix4
BL armSP_FFTInv_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
CMP subFFTNum,#4
B scaledRadix4Loop
lastStageScaledRadix4:
BL armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
B FFTEnd
generalScaleCase: @// 0 < scale < order and order > 3
@// Determine the correct destination buffer
SUB diff,order,scale
TST diff,#0x01
ADDEQ count,scale,diff,LSR #1 @// count = scale + (order - scale)/2
MOVNE count,order
TST count,#0x01 @// Is count even or odd ?
MOVNE argDst,pDst @// Set input args to fft stages
MOVEQ argDst,pOut
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
MOV argTwiddle,pTwiddle
CMP diff,#1
M_STR diff, diffOnStack
BEQ scaleps @// scaling including a radix2_ps stage
MOV argScale,scale @// Put scale in RN4 so as to save and restore
BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage
SUBS argScale,argScale,#1
scaledRadix2Loop:
BLGT armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages
BGT scaledRadix2Loop
B outScale
scaleps:
SUB argScale,scale,#1 @// order>3 and diff=1 => scale >= 3
BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage
SUBS argScale,argScale,#1
scaledRadix2psLoop:
BEQ scaledRadix2psStage
BLGT armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages
BGE scaledRadix2psLoop
scaledRadix2psStage:
BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
B generalLastStageUnscaledRadix2
outScale:
M_LDR diff, diffOnStack
@//check for even or odd order
TST diff,#0x00000001
BEQ generalUnscaledRadix4Loop
B unscaledRadix2Loop
generalUnscaledRadix4Loop:
CMP subFFTNum,#4
BEQ generalLastStageUnscaledRadix4
BL armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe
B generalUnscaledRadix4Loop
generalLastStageUnscaledRadix4:
BL armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
B End
unscaledRadix2Loop:
CMP subFFTNum,#4
BEQ generalLastTwoStagesUnscaledRadix2
BL armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe
B unscaledRadix2Loop
generalLastTwoStagesUnscaledRadix2:
BL armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
generalLastStageUnscaledRadix2:
BL armSP_FFTInv_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
B End
FFTEnd: @// Does only the scaling
M_LDR diff, diffOnStack
CMP diff,#0
BLE End
RSB diff,diff,#0 @// to use VRSHL for right shift by a variable
VDUP dShift,diff
scaleFFTData: @// N = subFFTSize ; dataptr = pDst ; scale = diff
VLD1 {dX0S32[0]},[pSrc] @// pSrc contains pDst pointer
SUBS subFFTSize,subFFTSize,#1
VRSHL dX0,dShift
VST1 {dX0S32[0]},[pSrc]!
BGT scaleFFTData
End:
@// Set return value
MOV result, #OMX_Sts_NoErr
@// Write function tail
M_END
.END

Просмотреть файл

@ -1,314 +0,0 @@
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This file was originally licensed as follows. It has been
@// relicensed with permission from the copyright holders.
@//
@//
@// File Name: omxSP_FFTInv_CToC_SC32_Sfs_s.s
@// OpenMAX DL: v1.0.2
@// Last Modified Revision: 6675
@// Last Modified Date: Fri, 06 Jul 2007
@//
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
@//
@//
@//
@// Description:
@// Compute an inverse FFT for a complex signal
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@// Import symbols required from other files
@// (For example tables)
.extern armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
@//Input Registers
#define pSrc r0
#define pDst r1
#define pFFTSpec r2
#define scale r3
@// Output registers
#define result r0
@//Local Scratch Registers
#define argTwiddle r1
#define argDst r2
#define argScale r4
#define tmpOrder r4
#define pTwiddle r4
#define pOut r5
#define subFFTSize r7
#define subFFTNum r6
#define N r6
#define order r14
#define diff r9
@// Total num of radix stages required to comple the FFT
#define count r8
#define x0r r4
#define x0i r5
#define diffMinusOne r2
#define round r3
@// Neon registers
#define dX0 D0.S32
#define dShift D1.S32
@// Allocate stack memory required by the function
M_ALLOC4 diffOnStack, 4
@// Write function header
M_START omxSP_FFTInv_CToC_SC32_Sfs,r11,d15
@ Structure offsets for the FFTSpec
.set ARMsFFTSpec_N, 0
.set ARMsFFTSpec_pBitRev, 4
.set ARMsFFTSpec_pTwiddle, 8
.set ARMsFFTSpec_pBuf, 12
@// Define stack arguments
@// Read the size from structure and take log
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
@// Read other structure parameters
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
CLZ order,N @// N = 2^order
RSB order,order,#31
MOV subFFTSize,#1
@//MOV subFFTNum,N
ADD scale,scale,order @// FFTInverse has a final scaling factor by N
CMP order,#3
BGT orderGreaterthan3 @// order > 3
CMP order,#1
BGE orderGreaterthan0 @// order > 0
M_STR scale, diffOnStack,LT @// order = 0
VLD1 dX0,[pSrc]
VST1 dX0,[pDst]
MOV pSrc,pDst
BLT FFTEnd
orderGreaterthan0:
@// set the buffers appropriately for various orders
CMP order,#2
MOVNE argDst,pDst
MOVEQ argDst,pOut
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
MOV argTwiddle,pTwiddle
@// Store the scale factor and scale at the end
SUB diff,scale,order
M_STR diff, diffOnStack
BGE orderGreaterthan1
BLLT armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// order = 1
B FFTEnd
orderGreaterthan1:
MOV tmpOrder,order @// tmpOrder = RN 4
BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
CMP tmpOrder,#2
BLGT armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
B FFTEnd
orderGreaterthan3:
@// check scale = 0 or scale = order
SUBS diff, scale, order @// scale > order
MOVGT scale,order
BGE specialScaleCase @// scale = 0 or scale = order
CMP scale,#0
BEQ specialScaleCase
B generalScaleCase
specialScaleCase: @// scale = 0 or scale = order and order >= 2
TST order, #2 @// Set input args to fft stages
MOVNE argDst,pDst
MOVEQ argDst,pOut
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
MOV argTwiddle,pTwiddle
CMP diff,#0
M_STR diff, diffOnStack
BGE scaleEqualsOrder
@//check for even or odd order
@// NOTE: The following combination of BL's would work fine eventhough the first
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
@// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
TST order,#0x00000001
BLEQ armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
BLNE armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
CMP subFFTNum,#4
BLT FFTEnd
unscaledRadix4Loop:
BEQ lastStageUnscaledRadix4
BL armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
CMP subFFTNum,#4
B unscaledRadix4Loop
lastStageUnscaledRadix4:
BL armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
B FFTEnd
scaleEqualsOrder:
@//check for even or odd order
@// NOTE: The following combination of BL's would work fine eventhough the first
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
@// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
TST order,#0x00000001
BLEQ armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
BLNE armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
CMP subFFTNum,#4
BLT FFTEnd
scaledRadix4Loop:
BEQ lastStageScaledRadix4
BL armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
CMP subFFTNum,#4
B scaledRadix4Loop
lastStageScaledRadix4:
BL armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
B FFTEnd
generalScaleCase: @// 0 < scale < order and order >= 2
@// Determine the correct destination buffer
SUB diff,order,scale
TST diff,#0x01
ADDEQ count,scale,diff,LSR #1 @// count = scale + (order - scale)/2
MOVNE count,order
TST count,#0x01 @// Is count even or odd ?
MOVNE argDst,pDst @// Set input args to fft stages
MOVEQ argDst,pOut
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
MOV argTwiddle,pTwiddle
M_STR diff, diffOnStack
MOV argScale,scale @// Put scale in RN4 so as to save and restore
BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage
SUBS argScale,argScale,#1
scaledRadix2Loop:
BLGT armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages
BGT scaledRadix2Loop
M_LDR diff, diffOnStack
@//check for even or odd order
TST diff,#0x00000001
BEQ generalUnscaledRadix4Loop
B unscaledRadix2Loop
generalUnscaledRadix4Loop:
CMP subFFTNum,#4
BEQ generalLastStageUnscaledRadix4
BL armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
B generalUnscaledRadix4Loop
generalLastStageUnscaledRadix4:
BL armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
B End
unscaledRadix2Loop:
CMP subFFTNum,#2
BEQ generalLastStageUnscaledRadix2
BL armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe
B unscaledRadix2Loop
generalLastStageUnscaledRadix2:
BL armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
B End
FFTEnd: @// Does only the scaling
M_LDR diff, diffOnStack
CMP diff,#0
BLE End
RSB diff,diff,#0 @// to use VRSHL for right shift by a variable
VDUP dShift,diff
scaleFFTData: @// N = subFFTSize ; dataptr = pDst ; scale = diff
VLD1 {dX0},[pSrc] @// pSrc contains pDst pointer
SUBS subFFTSize,subFFTSize,#1
VRSHL dX0,dShift
VST1 {dX0},[pSrc]!
BGT scaleFFTData
End:
@// Set return value
MOV result, #OMX_Sts_NoErr
@// Write function tail
M_END
.end

Просмотреть файл

@ -98,7 +98,6 @@
<li><a href="about:license#jpnic">Japan Network Information Center License</a></li>
<li><a href="about:license#jemalloc">jemalloc License</a></li>
<li><a href="about:license#jquery">jQuery License</a></li>
<li><a href="about:license#khronos">Khronos group License</a></li>
<li><a href="about:license#kiss_fft">Kiss FFT License</a></li>
<li><a href="about:license#libcubeb">libcubeb License</a></li>
<li><a href="about:license#libevent">libevent License</a></li>
@ -1951,7 +1950,6 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
<span class="path">dom/plugins/</span>,
<span class="path">tools/profiler/sps/</span>,
<span class="path">gfx/ots/</span>,
<span class="path">media/openmax_dl/</span>,
<span class="path">gfx/ycbcr</span> and
<span class="path">dom/media/webspeech/recognition/</span>.
</p>
@ -2884,43 +2882,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
<hr>
<h1><a id="khronos"></a>Khronos group License</h1>
<p>This license applies to the following files:</p>
<ul>
<li class="path">openmax_dl/dl/api/omxtypes.h</li>
<li class="path">openmax_dl/dl/sp/api/omxSP.h</li>
</ul>
<pre>
Copyright 2005-2008 The Khronos Group Inc. All Rights Reserved.
These materials are protected by copyright laws and contain material
proprietary to the Khronos Group, Inc. You may use these materials
for implementing Khronos specifications, without altering or removing
any trademark, copyright or other notice from the specification.
Khronos Group makes no, and expressly disclaims any, representations
or warranties, express or implied, regarding these materials, including,
without limitation, any implied warranties of merchantability or fitness
for a particular purpose or non-infringement of any intellectual property.
Khronos Group makes no, and expressly disclaims any, warranties, express
or implied, regarding the correctness, accuracy, completeness, timeliness,
and reliability of these materials.
Under no circumstances will the Khronos Group, or any of its Promoters,
Contributors or Members or their respective partners, officers, directors,
employees, agents or representatives be liable for any damages, whether
direct, indirect, special or consequential damages for lost revenues,
lost profits, or otherwise, arising from or in connection with these
materials.
Khronos and OpenMAX are trademarks of the Khronos Group Inc.
</pre>
<hr>
<h1><a id="kiss_fft"></a>Kiss FFT License</h1>
<p>This license applies to files in the directory