зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1854912 - Remove unnecessary patches on top of libspeex_resampler, modify existing patches so they apply. r=karlt
05_remove-empty-asm-clobber.patch is unnecessary with the current toolchain. 07_integer-halving.patch is now upstream. Differential Revision: https://phabricator.services.mozilla.com/D189119
This commit is contained in:
Родитель
fd66716406
Коммит
52ab4729f9
|
@ -1,22 +1,37 @@
|
|||
diff --git a/src/resample.c b/src/resample.c
|
||||
--- a/src/resample.c
|
||||
+++ b/src/resample.c
|
||||
@@ -94,13 +94,7 @@ static void speex_free (void *ptr) {free(ptr);}
|
||||
#define UINT32_MAX 4294967296U
|
||||
@@ -91,23 +91,17 @@ static void speex_free(void *ptr) {free(
|
||||
#ifndef NULL
|
||||
#define NULL 0
|
||||
#endif
|
||||
|
||||
-#ifdef _USE_SSE
|
||||
#ifndef UINT32_MAX
|
||||
#define UINT32_MAX 4294967295U
|
||||
#endif
|
||||
|
||||
-#ifdef USE_SSE
|
||||
-#include "resample_sse.h"
|
||||
-#endif
|
||||
-
|
||||
-#ifdef _USE_NEON
|
||||
-#ifdef USE_NEON
|
||||
-#include "resample_neon.h"
|
||||
-#endif
|
||||
+#include "simd_detect.h"
|
||||
|
||||
/* Numer of elements to allocate on the stack */
|
||||
/* Number of elements to allocate on the stack */
|
||||
#ifdef VAR_ARRAYS
|
||||
@@ -346,7 +340,9 @@ static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t c
|
||||
#define FIXED_STACK_ALLOC 8192
|
||||
#else
|
||||
#define FIXED_STACK_ALLOC 1024
|
||||
#endif
|
||||
|
||||
@@ -341,17 +335,19 @@ static int resampler_basic_direct_single
|
||||
const spx_uint32_t den_rate = st->den_rate;
|
||||
spx_word32_t sum;
|
||||
|
||||
while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
|
||||
{
|
||||
const spx_word16_t *sinct = & sinc_table[samp_frac_num*N];
|
||||
const spx_word16_t *iptr = & in[last_sample];
|
||||
|
||||
|
@ -27,7 +42,17 @@ diff --git a/src/resample.c b/src/resample.c
|
|||
int j;
|
||||
sum = 0;
|
||||
for(j=0;j<N;j++) sum += MULT16_16(sinct[j], iptr[j]);
|
||||
@@ -364,8 +360,10 @@ static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t c
|
||||
|
||||
/* This code is slower on most DSPs which have only 2 accumulators.
|
||||
Plus this this forces truncation to 32 bits and you lose the HW guard bits.
|
||||
I think we can trust the compiler and let it vectorize and/or unroll itself.
|
||||
spx_word32_t accum[4] = {0,0,0,0};
|
||||
@@ -359,18 +355,20 @@ static int resampler_basic_direct_single
|
||||
accum[0] += MULT16_16(sinct[j], iptr[j]);
|
||||
accum[1] += MULT16_16(sinct[j+1], iptr[j+1]);
|
||||
accum[2] += MULT16_16(sinct[j+2], iptr[j+2]);
|
||||
accum[3] += MULT16_16(sinct[j+3], iptr[j+3]);
|
||||
}
|
||||
sum = accum[0] + accum[1] + accum[2] + accum[3];
|
||||
*/
|
||||
sum = SATURATE32PSHR(sum, 15, 32767);
|
||||
|
@ -39,7 +64,17 @@ diff --git a/src/resample.c b/src/resample.c
|
|||
#endif
|
||||
|
||||
out[out_stride * out_sample++] = sum;
|
||||
@@ -404,7 +402,9 @@ static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t c
|
||||
last_sample += int_advance;
|
||||
samp_frac_num += frac_advance;
|
||||
if (samp_frac_num >= den_rate)
|
||||
{
|
||||
samp_frac_num -= den_rate;
|
||||
@@ -399,29 +397,33 @@ static int resampler_basic_direct_double
|
||||
const spx_uint32_t den_rate = st->den_rate;
|
||||
double sum;
|
||||
|
||||
while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
|
||||
{
|
||||
const spx_word16_t *sinct = & sinc_table[samp_frac_num*N];
|
||||
const spx_word16_t *iptr = & in[last_sample];
|
||||
|
||||
|
@ -50,7 +85,10 @@ diff --git a/src/resample.c b/src/resample.c
|
|||
int j;
|
||||
double accum[4] = {0,0,0,0};
|
||||
|
||||
@@ -415,8 +415,10 @@ static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t c
|
||||
for(j=0;j<N;j+=4) {
|
||||
accum[0] += sinct[j]*iptr[j];
|
||||
accum[1] += sinct[j+1]*iptr[j+1];
|
||||
accum[2] += sinct[j+2]*iptr[j+2];
|
||||
accum[3] += sinct[j+3]*iptr[j+3];
|
||||
}
|
||||
sum = accum[0] + accum[1] + accum[2] + accum[3];
|
||||
|
@ -62,7 +100,17 @@ diff --git a/src/resample.c b/src/resample.c
|
|||
#endif
|
||||
|
||||
out[out_stride * out_sample++] = PSHR32(sum, 15);
|
||||
@@ -460,7 +462,9 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
|
||||
last_sample += int_advance;
|
||||
samp_frac_num += frac_advance;
|
||||
if (samp_frac_num >= den_rate)
|
||||
{
|
||||
samp_frac_num -= den_rate;
|
||||
@@ -455,34 +457,38 @@ static int resampler_basic_interpolate_s
|
||||
#ifdef FIXED_POINT
|
||||
const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate);
|
||||
#else
|
||||
const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate;
|
||||
#endif
|
||||
spx_word16_t interp[4];
|
||||
|
||||
|
||||
|
@ -73,9 +121,16 @@ diff --git a/src/resample.c b/src/resample.c
|
|||
int j;
|
||||
spx_word32_t accum[4] = {0,0,0,0};
|
||||
|
||||
@@ -475,9 +479,11 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
|
||||
for(j=0;j<N;j++) {
|
||||
const spx_word16_t curr_in=iptr[j];
|
||||
accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
|
||||
accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
|
||||
accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
|
||||
accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
|
||||
}
|
||||
|
||||
cubic_coef(frac, interp);
|
||||
sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
|
||||
sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]);
|
||||
sum = SATURATE32PSHR(sum, 15, 32767);
|
||||
-#else
|
||||
+#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
|
||||
|
@ -86,7 +141,17 @@ diff --git a/src/resample.c b/src/resample.c
|
|||
#endif
|
||||
|
||||
out[out_stride * out_sample++] = sum;
|
||||
@@ -523,7 +529,9 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
|
||||
last_sample += int_advance;
|
||||
samp_frac_num += frac_advance;
|
||||
if (samp_frac_num >= den_rate)
|
||||
{
|
||||
samp_frac_num -= den_rate;
|
||||
@@ -518,33 +524,37 @@ static int resampler_basic_interpolate_d
|
||||
#ifdef FIXED_POINT
|
||||
const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate);
|
||||
#else
|
||||
const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate;
|
||||
#endif
|
||||
spx_word16_t interp[4];
|
||||
|
||||
|
||||
|
@ -97,7 +162,13 @@ diff --git a/src/resample.c b/src/resample.c
|
|||
int j;
|
||||
double accum[4] = {0,0,0,0};
|
||||
|
||||
@@ -537,9 +545,11 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
|
||||
for(j=0;j<N;j++) {
|
||||
const double curr_in=iptr[j];
|
||||
accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
|
||||
accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
|
||||
accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
|
||||
accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
|
||||
}
|
||||
|
||||
cubic_coef(frac, interp);
|
||||
sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]);
|
||||
|
@ -110,11 +181,15 @@ diff --git a/src/resample.c b/src/resample.c
|
|||
#endif
|
||||
|
||||
out[out_stride * out_sample++] = PSHR32(sum,15);
|
||||
last_sample += int_advance;
|
||||
samp_frac_num += frac_advance;
|
||||
if (samp_frac_num >= den_rate)
|
||||
{
|
||||
samp_frac_num -= den_rate;
|
||||
diff --git a/src/resample_neon.c b/src/resample_neon.c
|
||||
--- a/src/resample_neon.c
|
||||
+++ b/src/resample_neon.c
|
||||
@@ -31,16 +31,18 @@
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
@@ -32,16 +32,17 @@
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
|
@ -122,27 +197,46 @@ diff --git a/src/resample_neon.c b/src/resample_neon.c
|
|||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
+#include "simd_detect.h"
|
||||
+
|
||||
#include <arm_neon.h>
|
||||
|
||||
#ifdef FIXED_POINT
|
||||
#ifdef __thumb2__
|
||||
#if defined(__aarch64__)
|
||||
static inline int32_t saturate_32bit_to_16bit(int32_t a) {
|
||||
int32_t ret;
|
||||
asm ("ssat %[ret], #16, %[a]"
|
||||
: [ret] "=&r" (ret)
|
||||
@@ -60,17 +62,17 @@ static inline int32_t saturate_32bit_to_
|
||||
return ret;
|
||||
asm ("fmov s0, %w[a]\n"
|
||||
"sqxtn h0, s0\n"
|
||||
"sxtl v0.4s, v0.4h\n"
|
||||
@@ -73,17 +74,17 @@
|
||||
}
|
||||
#endif
|
||||
#undef WORD2INT
|
||||
#define WORD2INT(x) (saturate_32bit_to_16bit(x))
|
||||
|
||||
#define OVERRIDE_INNER_PRODUCT_SINGLE
|
||||
/* Only works when len % 4 == 0 */
|
||||
/* Only works when len % 4 == 0 and len >= 4 */
|
||||
#if defined(__aarch64__)
|
||||
-static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
|
||||
+int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
|
||||
+inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
|
||||
{
|
||||
int32_t ret;
|
||||
uint32_t remainder = len % 16;
|
||||
len = len - remainder;
|
||||
|
||||
asm volatile (" cmp %w[len], #0\n"
|
||||
" b.ne 1f\n"
|
||||
" ld1 {v16.4h}, [%[b]], #8\n"
|
||||
@@ -128,17 +129,17 @@
|
||||
: [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b),
|
||||
[len] "+r" (len), [remainder] "+r" (remainder)
|
||||
:
|
||||
: "cc", "v0",
|
||||
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
-static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
|
||||
+inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
|
||||
{
|
||||
int32_t ret;
|
||||
uint32_t remainder = len % 16;
|
||||
|
@ -151,17 +245,36 @@ diff --git a/src/resample_neon.c b/src/resample_neon.c
|
|||
asm volatile (" cmp %[len], #0\n"
|
||||
" bne 1f\n"
|
||||
" vld1.16 {d16}, [%[b]]!\n"
|
||||
@@ -134,17 +136,17 @@ static inline int32_t saturate_float_to_
|
||||
: "q0");
|
||||
return ret;
|
||||
}
|
||||
@@ -218,17 +219,17 @@
|
||||
#endif
|
||||
|
||||
#undef WORD2INT
|
||||
#define WORD2INT(x) (saturate_float_to_16bit(x))
|
||||
|
||||
#define OVERRIDE_INNER_PRODUCT_SINGLE
|
||||
/* Only works when len % 4 == 0 */
|
||||
/* Only works when len % 4 == 0 and len >= 4 */
|
||||
#if defined(__aarch64__)
|
||||
-static inline float inner_product_single(const float *a, const float *b, unsigned int len)
|
||||
+float inner_product_single(const float *a, const float *b, unsigned int len)
|
||||
+inline float inner_product_single(const float *a, const float *b, unsigned int len)
|
||||
{
|
||||
float ret;
|
||||
uint32_t remainder = len % 16;
|
||||
len = len - remainder;
|
||||
|
||||
asm volatile (" cmp %w[len], #0\n"
|
||||
" b.ne 1f\n"
|
||||
" ld1 {v16.4s}, [%[b]], #16\n"
|
||||
@@ -273,17 +274,17 @@
|
||||
: [ret] "=w" (ret), [a] "+r" (a), [b] "+r" (b),
|
||||
[len] "+r" (len), [remainder] "+r" (remainder)
|
||||
:
|
||||
: "cc", "v1", "v2", "v3", "v4",
|
||||
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
-static inline float inner_product_single(const float *a, const float *b, unsigned int len)
|
||||
+inline float inner_product_single(const float *a, const float *b, unsigned int len)
|
||||
{
|
||||
float ret;
|
||||
uint32_t remainder = len % 16;
|
||||
|
@ -220,7 +333,7 @@ diff --git a/src/resample_sse.c b/src/resample_sse.c
|
|||
return ret;
|
||||
}
|
||||
|
||||
#ifdef _USE_SSE2
|
||||
#ifdef USE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#define OVERRIDE_INNER_PRODUCT_DOUBLE
|
||||
|
||||
|
|
|
@ -14,13 +14,13 @@ diff --git a/src/resample.c b/src/resample.c
|
|||
+
|
||||
#ifdef OUTSIDE_SPEEX
|
||||
#include <stdlib.h>
|
||||
static void *speex_alloc (int size) {return calloc(size,1);}
|
||||
static void *speex_realloc (void *ptr, int size) {return realloc(ptr, size);}
|
||||
static void speex_free (void *ptr) {free(ptr);}
|
||||
#include "speex_resampler.h"
|
||||
#include "arch.h"
|
||||
#else /* OUTSIDE_SPEEX */
|
||||
@@ -643,25 +645,26 @@ static int update_filter(SpeexResamplerS
|
||||
static void *speex_alloc(int size) {return calloc(size,1);}
|
||||
static void *speex_realloc(void *ptr, int size) {return realloc(ptr, size);}
|
||||
static void speex_free(void *ptr) {free(ptr);}
|
||||
#ifndef EXPORT
|
||||
#define EXPORT
|
||||
#endif
|
||||
@@ -633,25 +645,26 @@ static int update_filter(SpeexResamplerS
|
||||
st->oversample >>= 1;
|
||||
if (st->oversample < 1)
|
||||
st->oversample = 1;
|
||||
|
@ -29,23 +29,24 @@ diff --git a/src/resample.c b/src/resample.c
|
|||
st->cutoff = quality_map[st->quality].upsample_bandwidth;
|
||||
}
|
||||
|
||||
- /* Choose the resampling type that requires the least amount of memory */
|
||||
-#ifdef RESAMPLE_FULL_SINC_TABLE
|
||||
- use_direct = 1;
|
||||
- if (INT_MAX/sizeof(spx_word16_t)/st->den_rate < st->filt_len)
|
||||
- goto fail;
|
||||
-#else
|
||||
- /* Choose the resampling type that requires the least amount of memory */
|
||||
- use_direct = st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
|
||||
- && INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
|
||||
+ use_direct =
|
||||
+#ifdef RESAMPLE_HUGEMEM
|
||||
+ /* Choose the direct resampler, even with higher initialization costs,
|
||||
+ when resampling any multiple of 100 to 44100. */
|
||||
+ st->den_rate <= 441
|
||||
#else
|
||||
- use_direct = st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
|
||||
+#else
|
||||
+ /* Choose the resampling type that requires the least amount of memory */
|
||||
+ st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
|
||||
+#endif
|
||||
&& INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
|
||||
-#endif
|
||||
#endif
|
||||
+ && INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
|
||||
if (use_direct)
|
||||
{
|
||||
min_sinc_table_length = st->filt_len*st->den_rate;
|
||||
|
@ -54,4 +55,3 @@ diff --git a/src/resample.c b/src/resample.c
|
|||
goto fail;
|
||||
|
||||
min_sinc_table_length = st->filt_len*st->oversample+8;
|
||||
|
||||
|
|
|
@ -1,33 +0,0 @@
|
|||
https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Extended-Asm says
|
||||
|
||||
asm [volatile] ( AssemblerTemplate : [OutputOperands] [ : [InputOperands] [ : [Clobbers] ] ] )
|
||||
|
||||
which implies that Clobbers is optional even after the third colon, but
|
||||
the gcc used for b2g_try_emulator_dep builds says
|
||||
|
||||
resample_neon.c: In function 'saturate_32bit_to_16bit':
|
||||
resample_neon.c:50: error: expected string literal before ')' token
|
||||
|
||||
diff --git a/src/resample_neon.c b/src/resample_neon.c
|
||||
--- a/src/resample_neon.c
|
||||
+++ b/src/resample_neon.c
|
||||
@@ -41,18 +41,17 @@
|
||||
#include <arm_neon.h>
|
||||
|
||||
#ifdef FIXED_POINT
|
||||
#ifdef __thumb2__
|
||||
static inline int32_t saturate_32bit_to_16bit(int32_t a) {
|
||||
int32_t ret;
|
||||
asm ("ssat %[ret], #16, %[a]"
|
||||
: [ret] "=&r" (ret)
|
||||
- : [a] "r" (a)
|
||||
- : );
|
||||
+ : [a] "r" (a));
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
static inline int32_t saturate_32bit_to_16bit(int32_t a) {
|
||||
int32_t ret;
|
||||
asm ("vmov.s32 d0[0], %[a]\n"
|
||||
"vqmovn.s32 d0, q0\n"
|
||||
"vmov.s16 %[ret], d0[0]\n"
|
|
@ -1,7 +1,9 @@
|
|||
This is a fix for https://bugzilla.mozilla.org/show_bug.cgi?id=1274083
|
||||
|
||||
diff --git a/src/resample.c b/src/resample.c
|
||||
--- a/src/resample.c
|
||||
+++ b/src/resample.c
|
||||
@@ -1141,18 +1141,19 @@ EXPORT int speex_resampler_set_rate_frac
|
||||
@@ -1129,18 +1129,19 @@ EXPORT int speex_resampler_set_rate_frac
|
||||
|
||||
st->num_rate /= fact;
|
||||
st->den_rate /= fact;
|
||||
|
@ -10,10 +12,10 @@ diff --git a/src/resample.c b/src/resample.c
|
|||
{
|
||||
for (i=0;i<st->nb_channels;i++)
|
||||
{
|
||||
- if (_muldiv(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS)
|
||||
- if (multiply_frac(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS)
|
||||
- return RESAMPLER_ERR_OVERFLOW;
|
||||
+ if (_muldiv(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS) {
|
||||
+ st->samp_frac_num[i] = st->den_rate-1;
|
||||
+ if (multiply_frac(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS) {
|
||||
+ st->samp_frac_num[i] = st->den_rate-1;
|
||||
+ }
|
||||
/* Safety net */
|
||||
if (st->samp_frac_num[i] >= st->den_rate)
|
|
@ -1,95 +0,0 @@
|
|||
diff --git a/src/arch.h b/src/arch.h
|
||||
--- a/src/arch.h
|
||||
+++ b/src/arch.h
|
||||
@@ -172,26 +172,23 @@ typedef float spx_word32_t;
|
||||
#define SHL(a,shift) (a)
|
||||
#define SATURATE(x,a) (x)
|
||||
|
||||
#define ADD16(a,b) ((a)+(b))
|
||||
#define SUB16(a,b) ((a)-(b))
|
||||
#define ADD32(a,b) ((a)+(b))
|
||||
#define SUB32(a,b) ((a)-(b))
|
||||
#define MULT16_16_16(a,b) ((a)*(b))
|
||||
+#define MULT16_32_32(a,b) ((a)*(b))
|
||||
#define MULT16_16(a,b) ((spx_word32_t)(a)*(spx_word32_t)(b))
|
||||
#define MAC16_16(c,a,b) ((c)+(spx_word32_t)(a)*(spx_word32_t)(b))
|
||||
|
||||
-#define MULT16_32_Q11(a,b) ((a)*(b))
|
||||
-#define MULT16_32_Q13(a,b) ((a)*(b))
|
||||
-#define MULT16_32_Q14(a,b) ((a)*(b))
|
||||
#define MULT16_32_Q15(a,b) ((a)*(b))
|
||||
#define MULT16_32_P15(a,b) ((a)*(b))
|
||||
|
||||
-#define MAC16_32_Q11(c,a,b) ((c)+(a)*(b))
|
||||
#define MAC16_32_Q15(c,a,b) ((c)+(a)*(b))
|
||||
|
||||
#define MAC16_16_Q11(c,a,b) ((c)+(a)*(b))
|
||||
#define MAC16_16_Q13(c,a,b) ((c)+(a)*(b))
|
||||
#define MAC16_16_P13(c,a,b) ((c)+(a)*(b))
|
||||
#define MULT16_16_Q11_32(a,b) ((a)*(b))
|
||||
#define MULT16_16_Q13(a,b) ((a)*(b))
|
||||
#define MULT16_16_Q14(a,b) ((a)*(b))
|
||||
diff --git a/src/fixed_generic.h b/src/fixed_generic.h
|
||||
--- a/src/fixed_generic.h
|
||||
+++ b/src/fixed_generic.h
|
||||
@@ -64,32 +64,27 @@
|
||||
|
||||
#define ADD16(a,b) ((spx_word16_t)((spx_word16_t)(a)+(spx_word16_t)(b)))
|
||||
#define SUB16(a,b) ((spx_word16_t)(a)-(spx_word16_t)(b))
|
||||
#define ADD32(a,b) ((spx_word32_t)(a)+(spx_word32_t)(b))
|
||||
#define SUB32(a,b) ((spx_word32_t)(a)-(spx_word32_t)(b))
|
||||
|
||||
|
||||
/* result fits in 16 bits */
|
||||
-#define MULT16_16_16(a,b) ((((spx_word16_t)(a))*((spx_word16_t)(b))))
|
||||
+#define MULT16_16_16(a,b) (((spx_word16_t)(a))*((spx_word16_t)(b)))
|
||||
+/* result fits in 32 bits */
|
||||
+#define MULT16_32_32(a,b) (((spx_word16_t)(a))*((spx_word32_t)(b)))
|
||||
|
||||
/* (spx_word32_t)(spx_word16_t) gives TI compiler a hint that it's 16x16->32 multiply */
|
||||
#define MULT16_16(a,b) (((spx_word32_t)(spx_word16_t)(a))*((spx_word32_t)(spx_word16_t)(b)))
|
||||
|
||||
#define MAC16_16(c,a,b) (ADD32((c),MULT16_16((a),(b))))
|
||||
-#define MULT16_32_Q12(a,b) ADD32(MULT16_16((a),SHR((b),12)), SHR(MULT16_16((a),((b)&0x00000fff)),12))
|
||||
-#define MULT16_32_Q13(a,b) ADD32(MULT16_16((a),SHR((b),13)), SHR(MULT16_16((a),((b)&0x00001fff)),13))
|
||||
-#define MULT16_32_Q14(a,b) ADD32(MULT16_16((a),SHR((b),14)), SHR(MULT16_16((a),((b)&0x00003fff)),14))
|
||||
-
|
||||
-#define MULT16_32_Q11(a,b) ADD32(MULT16_16((a),SHR((b),11)), SHR(MULT16_16((a),((b)&0x000007ff)),11))
|
||||
-#define MAC16_32_Q11(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),11)), SHR(MULT16_16((a),((b)&0x000007ff)),11)))
|
||||
-
|
||||
-#define MULT16_32_P15(a,b) ADD32(MULT16_16((a),SHR((b),15)), PSHR(MULT16_16((a),((b)&0x00007fff)),15))
|
||||
-#define MULT16_32_Q15(a,b) ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))
|
||||
-#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
|
||||
+#define MULT16_32_P15(a,b) ADD32(MULT16_32_32(a,SHR((b),15)), PSHR(MULT16_16((a),((b)&0x00007fff)),15))
|
||||
+#define MULT16_32_Q15(a,b) ADD32(MULT16_32_32(a,SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))
|
||||
+#define MAC16_32_Q15(c,a,b) ADD32(c,MULT16_32_Q15(a,b))
|
||||
|
||||
|
||||
#define MAC16_16_Q11(c,a,b) (ADD32((c),SHR(MULT16_16((a),(b)),11)))
|
||||
#define MAC16_16_Q13(c,a,b) (ADD32((c),SHR(MULT16_16((a),(b)),13)))
|
||||
#define MAC16_16_P13(c,a,b) (ADD32((c),SHR(ADD32(4096,MULT16_16((a),(b))),13)))
|
||||
|
||||
#define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
|
||||
#define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13))
|
||||
diff --git a/src/resample.c b/src/resample.c
|
||||
--- a/src/resample.c
|
||||
+++ b/src/resample.c
|
||||
@@ -474,17 +474,17 @@ static int resampler_basic_interpolate_s
|
||||
const spx_word16_t curr_in=iptr[j];
|
||||
accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
|
||||
accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
|
||||
accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
|
||||
accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
|
||||
}
|
||||
|
||||
cubic_coef(frac, interp);
|
||||
- sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
|
||||
+ sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]);
|
||||
sum = SATURATE32PSHR(sum, 15, 32767);
|
||||
#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
|
||||
} else {
|
||||
cubic_coef(frac, interp);
|
||||
sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
|
||||
}
|
||||
#endif
|
||||
|
|
@ -69,6 +69,4 @@ vendoring:
|
|||
- 02_simd-detect-runtime.patch
|
||||
- 03_set-skip-frac.patch
|
||||
- 04_hugemem.patch
|
||||
- 05_remove-empty-asm-clobber.patch
|
||||
- 06_set-rate-overflow-no-return.patch
|
||||
- 07_integer-halving.patch
|
||||
- 05_set-rate-overflow-no-return.patch
|
||||
|
|
Загрузка…
Ссылка в новой задаче