speex: Update to current version from libspeexdsp upstream.

This also adds the SSE and NEON files, but they're not integrated with
the build system, so you must define _USE_SSE or _USE_NEON when building
to take advantage of them for now.
This commit is contained in:
Matthew Gregan 2016-09-22 17:28:16 +12:00
Родитель d880e44e19
Коммит 6634f0eb3f
7 изменённых файлов: 678 добавлений и 252 удалений

Просмотреть файл

@ -7,18 +7,18 @@
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of the Xiph.org Foundation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -35,14 +35,6 @@
#ifndef ARCH_H
#define ARCH_H
#ifndef SPEEX_VERSION
#define SPEEX_MAJOR_VERSION 1 /**< Major Speex version. */
#define SPEEX_MINOR_VERSION 1 /**< Minor Speex version. */
#define SPEEX_MICRO_VERSION 15 /**< Micro Speex version. */
#define SPEEX_EXTRA_VERSION "" /**< Extra Speex version. */
#define SPEEX_VERSION "speex-1.2beta3" /**< Speex version string. */
#endif
/* A couple test to catch stupid option combinations */
#ifdef FIXED_POINT
@ -75,7 +67,7 @@
#endif
#ifndef OUTSIDE_SPEEX
#include "../include/speex/speex_types.h"
#include "speex/speexdsp_types.h"
#endif
#define ABS(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute integer value. */
@ -109,6 +101,8 @@ typedef spx_word32_t spx_sig_t;
#define SIG_SHIFT 14
#define GAIN_SHIFT 6
#define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))
#define VERY_SMALL 0
#define VERY_LARGE32 ((spx_word32_t)2147483647)
#define VERY_LARGE16 ((spx_word16_t)32767)
@ -171,6 +165,7 @@ typedef float spx_word32_t;
#define VSHR32(a,shift) (a)
#define SATURATE16(x,a) (x)
#define SATURATE32(x,a) (x)
#define SATURATE32PSHR(x,shift,a) (x)
#define PSHR(a,shift) (a)
#define SHR(a,shift) (a)
@ -210,18 +205,19 @@ typedef float spx_word32_t;
#define DIV32(a,b) (((spx_word32_t)(a))/(spx_word32_t)(b))
#define PDIV32(a,b) (((spx_word32_t)(a))/(spx_word32_t)(b))
#define WORD2INT(x) ((x) < -32767.5f ? -32768 : \
((x) > 32766.5f ? 32767 : (spx_int16_t)floor(.5 + (x))))
#endif
#if defined (CONFIG_TI_C54X) || defined (CONFIG_TI_C55X)
/* 2 on TI C5x DSP */
#define BYTES_PER_CHAR 2
#define BYTES_PER_CHAR 2
#define BITS_PER_CHAR 16
#define LOG2_BITS_PER_CHAR 4
#else
#else
#define BYTES_PER_CHAR 1
#define BITS_PER_CHAR 8

Просмотреть файл

@ -7,18 +7,18 @@
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of the Xiph.org Foundation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -52,6 +52,10 @@
#define SATURATE16(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
#define SATURATE32(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
#define SATURATE32PSHR(x,shift,a) (((x)>=(SHL32(a,shift))) ? (a) : \
(x)<=-(SHL32(a,shift)) ? -(a) : \
(PSHR32(x, shift)))
#define SHR(a,shift) ((a) >> (shift))
#define SHL(a,shift) ((spx_word32_t)(a) << (shift))
#define PSHR(a,shift) (SHR((a)+((EXTEND32(1)<<((shift))>>1)),shift))

Просмотреть файл

@ -1,6 +1,6 @@
/* Copyright (C) 2007-2008 Jean-Marc Valin
Copyright (C) 2008 Thorvald Natvig
File: resample.c
Arbitrary resampling code
@ -38,22 +38,22 @@
- Low memory requirement
- Good *perceptual* quality (and not best SNR)
Warning: This resampler is relatively new. Although I think I got rid of
Warning: This resampler is relatively new. Although I think I got rid of
all the major bugs and I don't expect the API to change anymore, there
may be something I've missed. So use with caution.
This algorithm is based on this original resampling algorithm:
Smith, Julius O. Digital Audio Resampling Home Page
Center for Computer Research in Music and Acoustics (CCRMA),
Center for Computer Research in Music and Acoustics (CCRMA),
Stanford University, 2007.
Web published at http://www-ccrma.stanford.edu/~jos/resample/.
Web published at http://ccrma.stanford.edu/~jos/resample/.
There is one main difference, though. This resampler uses cubic
There is one main difference, though. This resampler uses cubic
interpolation instead of linear interpolation in the above paper. This
makes the table much smaller and makes it possible to compute that table
on a per-stream basis. In turn, being able to tweak the table for each
stream makes it possible to both reduce complexity on simple ratios
(e.g. 2/3), and get rid of the rounding operations in the inner loop.
on a per-stream basis. In turn, being able to tweak the table for each
stream makes it possible to both reduce complexity on simple ratios
(e.g. 2/3), and get rid of the rounding operations in the inner loop.
The latter both reduces CPU time and makes the algorithm more SIMD-friendly.
*/
@ -69,25 +69,20 @@ static void speex_free (void *ptr) {free(ptr);}
#include "speex_resampler.h"
#include "arch.h"
#else /* OUTSIDE_SPEEX */
#include "../include/speex/speex_resampler.h"
#include "speex/speex_resampler.h"
#include "arch.h"
#include "os_support.h"
#endif /* OUTSIDE_SPEEX */
#include "stack_alloc.h"
#include <math.h>
#include <limits.h>
#ifndef M_PI
#define M_PI 3.14159263
#define M_PI 3.14159265358979323846
#endif
#ifdef FIXED_POINT
#define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))
#else
#define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x))))
#endif
#define IMAX(a,b) ((a) > (b) ? (a) : (b))
#define IMIN(a,b) ((a) < (b) ? (a) : (b))
@ -95,10 +90,18 @@ static void speex_free (void *ptr) {free(ptr);}
#define NULL 0
#endif
#ifndef UINT32_MAX
#define UINT32_MAX 4294967296U
#endif
#ifdef _USE_SSE
#include "resample_sse.h"
#endif
#ifdef _USE_NEON
#include "resample_neon.h"
#endif
/* Numer of elements to allocate on the stack */
#ifdef VAR_ARRAYS
#define FIXED_STACK_ALLOC 8192
@ -113,7 +116,7 @@ struct SpeexResamplerState_ {
spx_uint32_t out_rate;
spx_uint32_t num_rate;
spx_uint32_t den_rate;
int quality;
spx_uint32_t nb_channels;
spx_uint32_t filt_len;
@ -125,22 +128,22 @@ struct SpeexResamplerState_ {
spx_uint32_t oversample;
int initialised;
int started;
/* These are per-channel */
spx_int32_t *last_sample;
spx_uint32_t *samp_frac_num;
spx_uint32_t *magic_samples;
spx_word16_t *mem;
spx_word16_t *sinc_table;
spx_uint32_t sinc_table_length;
resampler_basic_func resampler_ptr;
int in_stride;
int out_stride;
} ;
static double kaiser12_table[68] = {
static const double kaiser12_table[68] = {
0.99859849, 1.00000000, 0.99859849, 0.99440475, 0.98745105, 0.97779076,
0.96549770, 0.95066529, 0.93340547, 0.91384741, 0.89213598, 0.86843014,
0.84290116, 0.81573067, 0.78710866, 0.75723148, 0.72629970, 0.69451601,
@ -154,7 +157,7 @@ static double kaiser12_table[68] = {
0.00105297, 0.00069463, 0.00043489, 0.00025272, 0.00013031, 0.0000527734,
0.00001000, 0.00000000};
/*
static double kaiser12_table[36] = {
static const double kaiser12_table[36] = {
0.99440475, 1.00000000, 0.99440475, 0.97779076, 0.95066529, 0.91384741,
0.86843014, 0.81573067, 0.75723148, 0.69451601, 0.62920216, 0.56287762,
0.49704014, 0.43304576, 0.37206735, 0.31506490, 0.26276832, 0.21567274,
@ -162,7 +165,7 @@ static double kaiser12_table[36] = {
0.03111947, 0.02127838, 0.01402878, 0.00886058, 0.00531256, 0.00298291,
0.00153438, 0.00069463, 0.00025272, 0.0000527734, 0.00000500, 0.00000000};
*/
static double kaiser10_table[36] = {
static const double kaiser10_table[36] = {
0.99537781, 1.00000000, 0.99537781, 0.98162644, 0.95908712, 0.92831446,
0.89005583, 0.84522401, 0.79486424, 0.74011713, 0.68217934, 0.62226347,
0.56155915, 0.50119680, 0.44221549, 0.38553619, 0.33194107, 0.28205962,
@ -170,15 +173,15 @@ static double kaiser10_table[36] = {
0.05731132, 0.04193980, 0.02979584, 0.02044510, 0.01345224, 0.00839739,
0.00488951, 0.00257636, 0.00115101, 0.00035515, 0.00000000, 0.00000000};
static double kaiser8_table[36] = {
static const double kaiser8_table[36] = {
0.99635258, 1.00000000, 0.99635258, 0.98548012, 0.96759014, 0.94302200,
0.91223751, 0.87580811, 0.83439927, 0.78875245, 0.73966538, 0.68797126,
0.63451750, 0.58014482, 0.52566725, 0.47185369, 0.41941150, 0.36897272,
0.32108304, 0.27619388, 0.23465776, 0.19672670, 0.16255380, 0.13219758,
0.10562887, 0.08273982, 0.06335451, 0.04724088, 0.03412321, 0.02369490,
0.01563093, 0.00959968, 0.00527363, 0.00233883, 0.00050000, 0.00000000};
static double kaiser6_table[36] = {
static const double kaiser6_table[36] = {
0.99733006, 1.00000000, 0.99733006, 0.98935595, 0.97618418, 0.95799003,
0.93501423, 0.90755855, 0.87598009, 0.84068475, 0.80211977, 0.76076565,
0.71712752, 0.67172623, 0.62508937, 0.57774224, 0.53019925, 0.48295561,
@ -187,19 +190,19 @@ static double kaiser6_table[36] = {
0.05031820, 0.03607231, 0.02432151, 0.01487334, 0.00752000, 0.00000000};
struct FuncDef {
double *table;
const double *table;
int oversample;
};
static struct FuncDef _KAISER12 = {kaiser12_table, 64};
static const struct FuncDef _KAISER12 = {kaiser12_table, 64};
#define KAISER12 (&_KAISER12)
/*static struct FuncDef _KAISER12 = {kaiser12_table, 32};
#define KAISER12 (&_KAISER12)*/
static struct FuncDef _KAISER10 = {kaiser10_table, 32};
static const struct FuncDef _KAISER10 = {kaiser10_table, 32};
#define KAISER10 (&_KAISER10)
static struct FuncDef _KAISER8 = {kaiser8_table, 32};
static const struct FuncDef _KAISER8 = {kaiser8_table, 32};
#define KAISER8 (&_KAISER8)
static struct FuncDef _KAISER6 = {kaiser6_table, 32};
static const struct FuncDef _KAISER6 = {kaiser6_table, 32};
#define KAISER6 (&_KAISER6)
struct QualityMapping {
@ -207,12 +210,12 @@ struct QualityMapping {
int oversample;
float downsample_bandwidth;
float upsample_bandwidth;
struct FuncDef *window_func;
const struct FuncDef *window_func;
};
/* This table maps conversion quality to internal parameters. There are two
reasons that explain why the up-sampling bandwidth is larger than the
reasons that explain why the up-sampling bandwidth is larger than the
down-sampling bandwidth:
1) When up-sampling, we can assume that the spectrum is already attenuated
close to the Nyquist rate (from an A/D or a previous resampling filter)
@ -234,11 +237,11 @@ static const struct QualityMapping quality_map[11] = {
{256, 32, 0.975f, 0.975f, KAISER12}, /* Q10 */ /* 96.6% cutoff (~100 dB stop) 10 */
};
/*8,24,40,56,80,104,128,160,200,256,320*/
static double compute_func(float x, struct FuncDef *func)
static double compute_func(float x, const struct FuncDef *func)
{
float y, frac;
double interp[4];
int ind;
int ind;
y = x*func->oversample;
ind = (int)floor(y);
frac = (y-ind);
@ -249,7 +252,7 @@ static double compute_func(float x, struct FuncDef *func)
interp[0] = -0.3333333333*frac + 0.5*(frac*frac) - 0.1666666667*(frac*frac*frac);
/* Just to make sure we don't have rounding problems */
interp[1] = 1.f-interp[3]-interp[2]-interp[0];
/*sum = frac*accum[1] + (1-frac)*accum[2];*/
return interp[0]*func->table[ind] + interp[1]*func->table[ind+1] + interp[2]*func->table[ind+2] + interp[3]*func->table[ind+3];
}
@ -269,7 +272,7 @@ int main(int argc, char **argv)
#ifdef FIXED_POINT
/* The slow way of computing a sinc for the table. Should improve that some day */
static spx_word16_t sinc(float cutoff, float x, int N, struct FuncDef *window_func)
static spx_word16_t sinc(float cutoff, float x, int N, const struct FuncDef *window_func)
{
/*fprintf (stderr, "%f ", x);*/
float xx = x * cutoff;
@ -282,7 +285,7 @@ static spx_word16_t sinc(float cutoff, float x, int N, struct FuncDef *window_fu
}
#else
/* The slow way of computing a sinc for the table. Should improve that some day */
static spx_word16_t sinc(float cutoff, float x, int N, struct FuncDef *window_func)
static spx_word16_t sinc(float cutoff, float x, int N, const struct FuncDef *window_func)
{
/*fprintf (stderr, "%f ", x);*/
float xx = x * cutoff;
@ -337,34 +340,35 @@ static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t c
const int frac_advance = st->frac_advance;
const spx_uint32_t den_rate = st->den_rate;
spx_word32_t sum;
int j;
while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
{
const spx_word16_t *sinc = & sinc_table[samp_frac_num*N];
const spx_word16_t *sinct = & sinc_table[samp_frac_num*N];
const spx_word16_t *iptr = & in[last_sample];
#ifndef OVERRIDE_INNER_PRODUCT_SINGLE
int j;
sum = 0;
for(j=0;j<N;j++) sum += MULT16_16(sinc[j], iptr[j]);
for(j=0;j<N;j++) sum += MULT16_16(sinct[j], iptr[j]);
/* This code is slower on most DSPs which have only 2 accumulators.
Plus this this forces truncation to 32 bits and you lose the HW guard bits.
I think we can trust the compiler and let it vectorize and/or unroll itself.
spx_word32_t accum[4] = {0,0,0,0};
for(j=0;j<N;j+=4) {
accum[0] += MULT16_16(sinc[j], iptr[j]);
accum[1] += MULT16_16(sinc[j+1], iptr[j+1]);
accum[2] += MULT16_16(sinc[j+2], iptr[j+2]);
accum[3] += MULT16_16(sinc[j+3], iptr[j+3]);
accum[0] += MULT16_16(sinct[j], iptr[j]);
accum[1] += MULT16_16(sinct[j+1], iptr[j+1]);
accum[2] += MULT16_16(sinct[j+2], iptr[j+2]);
accum[3] += MULT16_16(sinct[j+3], iptr[j+3]);
}
sum = accum[0] + accum[1] + accum[2] + accum[3];
*/
sum = SATURATE32PSHR(sum, 15, 32767);
#else
sum = inner_product_single(sinc, iptr, N);
sum = inner_product_single(sinct, iptr, N);
#endif
out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 15), 32767);
out[out_stride * out_sample++] = sum;
last_sample += int_advance;
samp_frac_num += frac_advance;
if (samp_frac_num >= den_rate)
@ -394,25 +398,25 @@ static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t c
const int frac_advance = st->frac_advance;
const spx_uint32_t den_rate = st->den_rate;
double sum;
int j;
while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
{
const spx_word16_t *sinc = & sinc_table[samp_frac_num*N];
const spx_word16_t *sinct = & sinc_table[samp_frac_num*N];
const spx_word16_t *iptr = & in[last_sample];
#ifndef OVERRIDE_INNER_PRODUCT_DOUBLE
int j;
double accum[4] = {0,0,0,0};
for(j=0;j<N;j+=4) {
accum[0] += sinc[j]*iptr[j];
accum[1] += sinc[j+1]*iptr[j+1];
accum[2] += sinc[j+2]*iptr[j+2];
accum[3] += sinc[j+3]*iptr[j+3];
accum[0] += sinct[j]*iptr[j];
accum[1] += sinct[j+1]*iptr[j+1];
accum[2] += sinct[j+2]*iptr[j+2];
accum[3] += sinct[j+3]*iptr[j+3];
}
sum = accum[0] + accum[1] + accum[2] + accum[3];
#else
sum = inner_product_double(sinc, iptr, N);
sum = inner_product_double(sinct, iptr, N);
#endif
out[out_stride * out_sample++] = PSHR32(sum, 15);
@ -441,7 +445,6 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
const int int_advance = st->int_advance;
const int frac_advance = st->frac_advance;
const spx_uint32_t den_rate = st->den_rate;
int j;
spx_word32_t sum;
while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
@ -458,6 +461,7 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
#ifndef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
int j;
spx_word32_t accum[4] = {0,0,0,0};
for(j=0;j<N;j++) {
@ -470,12 +474,13 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
cubic_coef(frac, interp);
sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
sum = SATURATE32PSHR(sum, 15, 32767);
#else
cubic_coef(frac, interp);
sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
#endif
out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 14), 32767);
out[out_stride * out_sample++] = sum;
last_sample += int_advance;
samp_frac_num += frac_advance;
if (samp_frac_num >= den_rate)
@ -503,7 +508,6 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
const int int_advance = st->int_advance;
const int frac_advance = st->frac_advance;
const spx_uint32_t den_rate = st->den_rate;
int j;
spx_word32_t sum;
while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
@ -520,6 +524,7 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
#ifndef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
int j;
double accum[4] = {0,0,0,0};
for(j=0;j<N;j++) {
@ -536,7 +541,7 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
cubic_coef(frac, interp);
sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
#endif
out[out_stride * out_sample++] = PSHR32(sum,15);
last_sample += int_advance;
samp_frac_num += frac_advance;
@ -553,22 +558,71 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
}
#endif
static void update_filter(SpeexResamplerState *st)
/* This resampler is used to produce zero output in situations where memory
for the filter could not be allocated. The expected numbers of input and
output samples are still processed so that callers failing to check error
codes are not surprised, possibly getting into infinite loops. */
static int resampler_basic_zero(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len)
{
spx_uint32_t old_length;
old_length = st->filt_len;
int out_sample = 0;
int last_sample = st->last_sample[channel_index];
spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
const int out_stride = st->out_stride;
const int int_advance = st->int_advance;
const int frac_advance = st->frac_advance;
const spx_uint32_t den_rate = st->den_rate;
while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
{
out[out_stride * out_sample++] = 0;
last_sample += int_advance;
samp_frac_num += frac_advance;
if (samp_frac_num >= den_rate)
{
samp_frac_num -= den_rate;
last_sample++;
}
}
st->last_sample[channel_index] = last_sample;
st->samp_frac_num[channel_index] = samp_frac_num;
return out_sample;
}
static int _muldiv(spx_uint32_t *result, spx_uint32_t value, spx_uint32_t mul, spx_uint32_t div)
{
speex_assert(result);
spx_uint32_t major = value / div;
spx_uint32_t remainder = value % div;
/* TODO: Could use 64 bits operation to check for overflow. But only guaranteed in C99+ */
if (remainder > UINT32_MAX / mul || major > UINT32_MAX / mul
|| major * mul > UINT32_MAX - remainder * mul / div)
return RESAMPLER_ERR_OVERFLOW;
*result = remainder * mul / div + major * mul;
return RESAMPLER_ERR_SUCCESS;
}
static int update_filter(SpeexResamplerState *st)
{
spx_uint32_t old_length = st->filt_len;
spx_uint32_t old_alloc_size = st->mem_alloc_size;
int use_direct;
spx_uint32_t min_sinc_table_length;
spx_uint32_t min_alloc_size;
st->int_advance = st->num_rate/st->den_rate;
st->frac_advance = st->num_rate%st->den_rate;
st->oversample = quality_map[st->quality].oversample;
st->filt_len = quality_map[st->quality].base_length;
if (st->num_rate > st->den_rate)
{
/* down-sampling */
st->cutoff = quality_map[st->quality].downsample_bandwidth * st->den_rate / st->num_rate;
/* FIXME: divide the numerator and denominator by a certain amount if they're too large */
st->filt_len = st->filt_len*st->num_rate / st->den_rate;
/* Round down to make sure we have a multiple of 4 */
st->filt_len &= (~0x3);
if (_muldiv(&st->filt_len,st->filt_len,st->num_rate,st->den_rate) != RESAMPLER_ERR_SUCCESS)
goto fail;
/* Round up to make sure we have a multiple of 8 for SSE */
st->filt_len = ((st->filt_len-1)&(~0x7))+8;
if (2*st->den_rate < st->num_rate)
st->oversample >>= 1;
if (4*st->den_rate < st->num_rate)
@ -583,18 +637,37 @@ static void update_filter(SpeexResamplerState *st)
/* up-sampling */
st->cutoff = quality_map[st->quality].upsample_bandwidth;
}
/* Choose the resampling type that requires the least amount of memory */
if (st->den_rate <= st->oversample)
#ifdef RESAMPLE_FULL_SINC_TABLE
use_direct = 1;
if (INT_MAX/sizeof(spx_word16_t)/st->den_rate < st->filt_len)
goto fail;
#else
use_direct = st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
&& INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
#endif
if (use_direct)
{
min_sinc_table_length = st->filt_len*st->den_rate;
} else {
if ((INT_MAX/sizeof(spx_word16_t)-8)/st->oversample < st->filt_len)
goto fail;
min_sinc_table_length = st->filt_len*st->oversample+8;
}
if (st->sinc_table_length < min_sinc_table_length)
{
spx_word16_t *sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,min_sinc_table_length*sizeof(spx_word16_t));
if (!sinc_table)
goto fail;
st->sinc_table = sinc_table;
st->sinc_table_length = min_sinc_table_length;
}
if (use_direct)
{
spx_uint32_t i;
if (!st->sinc_table)
st->sinc_table = (spx_word16_t *)speex_alloc(st->filt_len*st->den_rate*sizeof(spx_word16_t));
else if (st->sinc_table_length < st->filt_len*st->den_rate)
{
st->sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,st->filt_len*st->den_rate*sizeof(spx_word16_t));
st->sinc_table_length = st->filt_len*st->den_rate;
}
for (i=0;i<st->den_rate;i++)
{
spx_int32_t j;
@ -614,13 +687,6 @@ static void update_filter(SpeexResamplerState *st)
/*fprintf (stderr, "resampler uses direct sinc table and normalised cutoff %f\n", cutoff);*/
} else {
spx_int32_t i;
if (!st->sinc_table)
st->sinc_table = (spx_word16_t *)speex_alloc((st->filt_len*st->oversample+8)*sizeof(spx_word16_t));
else if (st->sinc_table_length < st->filt_len*st->oversample+8)
{
st->sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,(st->filt_len*st->oversample+8)*sizeof(spx_word16_t));
st->sinc_table_length = st->filt_len*st->oversample+8;
}
for (i=-4;i<(spx_int32_t)(st->oversample*st->filt_len+4);i++)
st->sinc_table[i+4] = sinc(st->cutoff,(i/(float)st->oversample - st->filt_len/2), st->filt_len, quality_map[st->quality].window_func);
#ifdef FIXED_POINT
@ -633,51 +699,47 @@ static void update_filter(SpeexResamplerState *st)
#endif
/*fprintf (stderr, "resampler uses interpolated sinc table and normalised cutoff %f\n", cutoff);*/
}
st->int_advance = st->num_rate/st->den_rate;
st->frac_advance = st->num_rate%st->den_rate;
/* Here's the place where we update the filter memory to take into account
the change in filter length. It's probably the messiest part of the code
due to handling of lots of corner cases. */
if (!st->mem)
/* Adding buffer_size to filt_len won't overflow here because filt_len
could be multiplied by sizeof(spx_word16_t) above. */
min_alloc_size = st->filt_len-1 + st->buffer_size;
if (min_alloc_size > st->mem_alloc_size)
{
spx_word16_t *mem;
if (INT_MAX/sizeof(spx_word16_t)/st->nb_channels < min_alloc_size)
goto fail;
else if (!(mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*min_alloc_size * sizeof(*mem))))
goto fail;
st->mem = mem;
st->mem_alloc_size = min_alloc_size;
}
if (!st->started)
{
spx_uint32_t i;
st->mem_alloc_size = st->filt_len-1 + st->buffer_size;
st->mem = (spx_word16_t*)speex_alloc(st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t));
for (i=0;i<st->nb_channels*st->mem_alloc_size;i++)
st->mem[i] = 0;
/*speex_warning("init filter");*/
} else if (!st->started)
{
spx_uint32_t i;
st->mem_alloc_size = st->filt_len-1 + st->buffer_size;
st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t));
for (i=0;i<st->nb_channels*st->mem_alloc_size;i++)
st->mem[i] = 0;
/*speex_warning("reinit filter");*/
} else if (st->filt_len > old_length)
{
spx_int32_t i;
spx_uint32_t i;
/* Increase the filter length */
/*speex_warning("increase filter size");*/
int old_alloc_size = st->mem_alloc_size;
if ((st->filt_len-1 + st->buffer_size) > st->mem_alloc_size)
for (i=st->nb_channels;i--;)
{
st->mem_alloc_size = st->filt_len-1 + st->buffer_size;
st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t));
}
for (i=st->nb_channels-1;i>=0;i--)
{
spx_int32_t j;
spx_uint32_t j;
spx_uint32_t olen = old_length;
/*if (st->magic_samples[i])*/
{
/* Try and remove the magic samples as if nothing had happened */
/* FIXME: This is wrong but for now we need it to avoid going over the array bounds */
olen = old_length + 2*st->magic_samples[i];
for (j=old_length-2+st->magic_samples[i];j>=0;j--)
for (j=old_length-1+st->magic_samples[i];j--;)
st->mem[i*st->mem_alloc_size+j+st->magic_samples[i]] = st->mem[i*old_alloc_size+j];
for (j=0;j<st->magic_samples[i];j++)
st->mem[i*st->mem_alloc_size+j] = 0;
@ -718,7 +780,15 @@ static void update_filter(SpeexResamplerState *st)
st->magic_samples[i] += old_magic;
}
}
return RESAMPLER_ERR_SUCCESS;
fail:
st->resampler_ptr = resampler_basic_zero;
/* st->mem may still contain consumed input samples for the filter.
Restore filt_len so that filt_len - 1 still points to the position after
the last of these samples. */
st->filt_len = old_length;
return RESAMPLER_ERR_ALLOC_FAILED;
}
EXPORT SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err)
@ -730,6 +800,8 @@ EXPORT SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels,
{
spx_uint32_t i;
SpeexResamplerState *st;
int filter_err;
if (quality > 10 || quality < 0)
{
if (err)
@ -737,6 +809,12 @@ EXPORT SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels,
return NULL;
}
st = (SpeexResamplerState *)speex_alloc(sizeof(SpeexResamplerState));
if (!st)
{
if (err)
*err = RESAMPLER_ERR_ALLOC_FAILED;
return NULL;
}
st->initialised = 0;
st->started = 0;
st->in_rate = 0;
@ -749,40 +827,43 @@ EXPORT SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels,
st->filt_len = 0;
st->mem = 0;
st->resampler_ptr = 0;
st->cutoff = 1.f;
st->nb_channels = nb_channels;
st->in_stride = 1;
st->out_stride = 1;
#ifdef FIXED_POINT
st->buffer_size = 160;
#else
st->buffer_size = 160;
#endif
/* Per channel data */
st->last_sample = (spx_int32_t*)speex_alloc(nb_channels*sizeof(int));
st->magic_samples = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(int));
st->samp_frac_num = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(int));
for (i=0;i<nb_channels;i++)
{
st->last_sample[i] = 0;
st->magic_samples[i] = 0;
st->samp_frac_num[i] = 0;
}
if (!(st->last_sample = (spx_int32_t*)speex_alloc(nb_channels*sizeof(spx_int32_t))))
goto fail;
if (!(st->magic_samples = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(spx_uint32_t))))
goto fail;
if (!(st->samp_frac_num = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(spx_uint32_t))))
goto fail;
speex_resampler_set_quality(st, quality);
speex_resampler_set_rate_frac(st, ratio_num, ratio_den, in_rate, out_rate);
update_filter(st);
st->initialised = 1;
filter_err = update_filter(st);
if (filter_err == RESAMPLER_ERR_SUCCESS)
{
st->initialised = 1;
} else {
speex_resampler_destroy(st);
st = NULL;
}
if (err)
*err = RESAMPLER_ERR_SUCCESS;
*err = filter_err;
return st;
fail:
if (err)
*err = RESAMPLER_ERR_ALLOC_FAILED;
speex_resampler_destroy(st);
return NULL;
}
EXPORT void speex_resampler_destroy(SpeexResamplerState *st)
@ -802,17 +883,17 @@ static int speex_resampler_process_native(SpeexResamplerState *st, spx_uint32_t
int out_sample = 0;
spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size;
spx_uint32_t ilen;
st->started = 1;
/* Call the right resampler through the function ptr */
out_sample = st->resampler_ptr(st, channel_index, mem, in_len, out, out_len);
if (st->last_sample[channel_index] < (spx_int32_t)*in_len)
*in_len = st->last_sample[channel_index];
*out_len = out_sample;
st->last_sample[channel_index] -= *in_len;
ilen = *in_len;
for(j=0;j<N-1;++j)
@ -825,11 +906,11 @@ static int speex_resampler_magic(SpeexResamplerState *st, spx_uint32_t channel_i
spx_uint32_t tmp_in_len = st->magic_samples[channel_index];
spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size;
const int N = st->filt_len;
speex_resampler_process_native(st, channel_index, &tmp_in_len, *out, &out_len);
st->magic_samples[channel_index] -= tmp_in_len;
/* If we couldn't process all "magic" input samples, save the rest for next time */
if (st->magic_samples[channel_index])
{
@ -855,13 +936,13 @@ EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t c
const spx_uint32_t xlen = st->mem_alloc_size - filt_offs;
const int istride = st->in_stride;
if (st->magic_samples[channel_index])
if (st->magic_samples[channel_index])
olen -= speex_resampler_magic(st, channel_index, &out, olen);
if (! st->magic_samples[channel_index]) {
while (ilen && olen) {
spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
spx_uint32_t ochunk = olen;
if (in) {
for(j=0;j<ichunk;++j)
x[j+filt_offs]=in[j*istride];
@ -879,7 +960,7 @@ EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t c
}
*in_len -= ilen;
*out_len -= olen;
return RESAMPLER_ERR_SUCCESS;
return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
}
#ifdef FIXED_POINT
@ -905,7 +986,7 @@ EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t cha
#endif
st->out_stride = 1;
while (ilen && olen) {
spx_word16_t *y = ystack;
spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
@ -942,7 +1023,7 @@ EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t cha
#else
out[j*ostride_save] = WORD2INT(ystack[j]);
#endif
ilen -= ichunk;
olen -= ochunk;
out += (ochunk+omagic) * ostride_save;
@ -953,7 +1034,7 @@ EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t cha
*in_len -= ilen;
*out_len -= olen;
return RESAMPLER_ERR_SUCCESS;
return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
}
EXPORT int speex_resampler_process_interleaved_float(SpeexResamplerState *st, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len)
@ -976,20 +1057,22 @@ EXPORT int speex_resampler_process_interleaved_float(SpeexResamplerState *st, co
}
st->in_stride = istride_save;
st->out_stride = ostride_save;
return RESAMPLER_ERR_SUCCESS;
return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
}
EXPORT int speex_resampler_process_interleaved_int(SpeexResamplerState *st, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len)
{
spx_uint32_t i;
int istride_save, ostride_save;
spx_uint32_t bak_len = *out_len;
spx_uint32_t bak_out_len = *out_len;
spx_uint32_t bak_in_len = *in_len;
istride_save = st->in_stride;
ostride_save = st->out_stride;
st->in_stride = st->out_stride = st->nb_channels;
for (i=0;i<st->nb_channels;i++)
{
*out_len = bak_len;
*out_len = bak_out_len;
*in_len = bak_in_len;
if (in != NULL)
speex_resampler_process_int(st, i, in+i, in_len, out+i, out_len);
else
@ -997,7 +1080,7 @@ EXPORT int speex_resampler_process_interleaved_int(SpeexResamplerState *st, cons
}
st->in_stride = istride_save;
st->out_stride = ostride_save;
return RESAMPLER_ERR_SUCCESS;
return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
}
EXPORT int speex_resampler_set_rate(SpeexResamplerState *st, spx_uint32_t in_rate, spx_uint32_t out_rate)
@ -1011,6 +1094,18 @@ EXPORT void speex_resampler_get_rate(SpeexResamplerState *st, spx_uint32_t *in_r
*out_rate = st->out_rate;
}
static inline spx_uint32_t _gcd(spx_uint32_t a, spx_uint32_t b)
{
while (b != 0)
{
spx_uint32_t temp = a;
a = b;
b = temp % b;
}
return a;
}
EXPORT int speex_resampler_set_rate_frac(SpeexResamplerState *st, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate)
{
spx_uint32_t fact;
@ -1018,35 +1113,32 @@ EXPORT int speex_resampler_set_rate_frac(SpeexResamplerState *st, spx_uint32_t r
spx_uint32_t i;
if (st->in_rate == in_rate && st->out_rate == out_rate && st->num_rate == ratio_num && st->den_rate == ratio_den)
return RESAMPLER_ERR_SUCCESS;
old_den = st->den_rate;
st->in_rate = in_rate;
st->out_rate = out_rate;
st->num_rate = ratio_num;
st->den_rate = ratio_den;
/* FIXME: This is terribly inefficient, but who cares (at least for now)? */
for (fact=2;fact<=IMIN(st->num_rate, st->den_rate);fact++)
{
while ((st->num_rate % fact == 0) && (st->den_rate % fact == 0))
{
st->num_rate /= fact;
st->den_rate /= fact;
}
}
fact = _gcd (st->num_rate, st->den_rate);
st->num_rate /= fact;
st->den_rate /= fact;
if (old_den > 0)
{
for (i=0;i<st->nb_channels;i++)
{
st->samp_frac_num[i]=st->samp_frac_num[i]*st->den_rate/old_den;
if (_muldiv(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS)
return RESAMPLER_ERR_OVERFLOW;
/* Safety net */
if (st->samp_frac_num[i] >= st->den_rate)
st->samp_frac_num[i] = st->den_rate-1;
}
}
if (st->initialised)
update_filter(st);
return update_filter(st);
return RESAMPLER_ERR_SUCCESS;
}
@ -1064,7 +1156,7 @@ EXPORT int speex_resampler_set_quality(SpeexResamplerState *st, int quality)
return RESAMPLER_ERR_SUCCESS;
st->quality = quality;
if (st->initialised)
update_filter(st);
return update_filter(st);
return RESAMPLER_ERR_SUCCESS;
}
@ -1114,6 +1206,12 @@ EXPORT int speex_resampler_skip_zeros(SpeexResamplerState *st)
EXPORT int speex_resampler_reset_mem(SpeexResamplerState *st)
{
spx_uint32_t i;
for (i=0;i<st->nb_channels;i++)
{
st->last_sample[i] = 0;
st->magic_samples[i] = 0;
st->samp_frac_num[i] = 0;
}
for (i=0;i<st->nb_channels*(st->filt_len-1);i++)
st->mem[i] = 0;
return RESAMPLER_ERR_SUCCESS;

201
src/speex/resample_neon.h Normal file
Просмотреть файл

@ -0,0 +1,201 @@
/* Copyright (C) 2007-2008 Jean-Marc Valin
* Copyright (C) 2008 Thorvald Natvig
* Copyright (C) 2011 Texas Instruments
* author Jyri Sarha
*/
/**
@file resample_neon.h
@brief Resampler functions (NEON version)
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of the Xiph.org Foundation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <arm_neon.h>
#ifdef FIXED_POINT
#ifdef __thumb2__
static inline int32_t saturate_32bit_to_16bit(int32_t a) {
int32_t ret;
asm ("ssat %[ret], #16, %[a]"
: [ret] "=&r" (ret)
: [a] "r" (a)
: );
return ret;
}
#else
static inline int32_t saturate_32bit_to_16bit(int32_t a) {
int32_t ret;
asm ("vmov.s32 d0[0], %[a]\n"
"vqmovn.s32 d0, q0\n"
"vmov.s16 %[ret], d0[0]\n"
: [ret] "=&r" (ret)
: [a] "r" (a)
: "q0");
return ret;
}
#endif
#undef WORD2INT
#define WORD2INT(x) (saturate_32bit_to_16bit(x))
#define OVERRIDE_INNER_PRODUCT_SINGLE
/* Only works when len % 4 == 0 */
static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
{
int32_t ret;
uint32_t remainder = len % 16;
len = len - remainder;
asm volatile (" cmp %[len], #0\n"
" bne 1f\n"
" vld1.16 {d16}, [%[b]]!\n"
" vld1.16 {d20}, [%[a]]!\n"
" subs %[remainder], %[remainder], #4\n"
" vmull.s16 q0, d16, d20\n"
" beq 5f\n"
" b 4f\n"
"1:"
" vld1.16 {d16, d17, d18, d19}, [%[b]]!\n"
" vld1.16 {d20, d21, d22, d23}, [%[a]]!\n"
" subs %[len], %[len], #16\n"
" vmull.s16 q0, d16, d20\n"
" vmlal.s16 q0, d17, d21\n"
" vmlal.s16 q0, d18, d22\n"
" vmlal.s16 q0, d19, d23\n"
" beq 3f\n"
"2:"
" vld1.16 {d16, d17, d18, d19}, [%[b]]!\n"
" vld1.16 {d20, d21, d22, d23}, [%[a]]!\n"
" subs %[len], %[len], #16\n"
" vmlal.s16 q0, d16, d20\n"
" vmlal.s16 q0, d17, d21\n"
" vmlal.s16 q0, d18, d22\n"
" vmlal.s16 q0, d19, d23\n"
" bne 2b\n"
"3:"
" cmp %[remainder], #0\n"
" beq 5f\n"
"4:"
" vld1.16 {d16}, [%[b]]!\n"
" vld1.16 {d20}, [%[a]]!\n"
" subs %[remainder], %[remainder], #4\n"
" vmlal.s16 q0, d16, d20\n"
" bne 4b\n"
"5:"
" vaddl.s32 q0, d0, d1\n"
" vadd.s64 d0, d0, d1\n"
" vqmovn.s64 d0, q0\n"
" vqrshrn.s32 d0, q0, #15\n"
" vmov.s16 %[ret], d0[0]\n"
: [ret] "=&r" (ret), [a] "+r" (a), [b] "+r" (b),
[len] "+r" (len), [remainder] "+r" (remainder)
:
: "cc", "q0",
"d16", "d17", "d18", "d19",
"d20", "d21", "d22", "d23");
return ret;
}
#elif defined(FLOATING_POINT)
static inline int32_t saturate_float_to_16bit(float a) {
int32_t ret;
asm ("vmov.f32 d0[0], %[a]\n"
"vcvt.s32.f32 d0, d0, #15\n"
"vqrshrn.s32 d0, q0, #15\n"
"vmov.s16 %[ret], d0[0]\n"
: [ret] "=&r" (ret)
: [a] "r" (a)
: "q0");
return ret;
}
#undef WORD2INT
#define WORD2INT(x) (saturate_float_to_16bit(x))
#define OVERRIDE_INNER_PRODUCT_SINGLE
/* Only works when len % 4 == 0 */
static inline float inner_product_single(const float *a, const float *b, unsigned int len)
{
float ret;
uint32_t remainder = len % 16;
len = len - remainder;
asm volatile (" cmp %[len], #0\n"
" bne 1f\n"
" vld1.32 {q4}, [%[b]]!\n"
" vld1.32 {q8}, [%[a]]!\n"
" subs %[remainder], %[remainder], #4\n"
" vmul.f32 q0, q4, q8\n"
" bne 4f\n"
" b 5f\n"
"1:"
" vld1.32 {q4, q5}, [%[b]]!\n"
" vld1.32 {q8, q9}, [%[a]]!\n"
" vld1.32 {q6, q7}, [%[b]]!\n"
" vld1.32 {q10, q11}, [%[a]]!\n"
" subs %[len], %[len], #16\n"
" vmul.f32 q0, q4, q8\n"
" vmul.f32 q1, q5, q9\n"
" vmul.f32 q2, q6, q10\n"
" vmul.f32 q3, q7, q11\n"
" beq 3f\n"
"2:"
" vld1.32 {q4, q5}, [%[b]]!\n"
" vld1.32 {q8, q9}, [%[a]]!\n"
" vld1.32 {q6, q7}, [%[b]]!\n"
" vld1.32 {q10, q11}, [%[a]]!\n"
" subs %[len], %[len], #16\n"
" vmla.f32 q0, q4, q8\n"
" vmla.f32 q1, q5, q9\n"
" vmla.f32 q2, q6, q10\n"
" vmla.f32 q3, q7, q11\n"
" bne 2b\n"
"3:"
" vadd.f32 q4, q0, q1\n"
" vadd.f32 q5, q2, q3\n"
" cmp %[remainder], #0\n"
" vadd.f32 q0, q4, q5\n"
" beq 5f\n"
"4:"
" vld1.32 {q6}, [%[b]]!\n"
" vld1.32 {q10}, [%[a]]!\n"
" subs %[remainder], %[remainder], #4\n"
" vmla.f32 q0, q6, q10\n"
" bne 4b\n"
"5:"
" vadd.f32 d0, d0, d1\n"
" vpadd.f32 d0, d0, d0\n"
" vmov.f32 %[ret], d0[0]\n"
: [ret] "=&r" (ret), [a] "+r" (a), [b] "+r" (b),
[len] "+l" (len), [remainder] "+l" (remainder)
:
: "cc", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8",
"q9", "q10", "q11");
return ret;
}
#endif

128
src/speex/resample_sse.h Normal file
Просмотреть файл

@ -0,0 +1,128 @@
/* Copyright (C) 2007-2008 Jean-Marc Valin
* Copyright (C) 2008 Thorvald Natvig
*/
/**
@file resample_sse.h
@brief Resampler functions (SSE version)
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of the Xiph.org Foundation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <xmmintrin.h>
#define OVERRIDE_INNER_PRODUCT_SINGLE
static inline float inner_product_single(const float *a, const float *b, unsigned int len)
{
int i;
float ret;
__m128 sum = _mm_setzero_ps();
for (i=0;i<len;i+=8)
{
sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i)));
sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4)));
}
sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
_mm_store_ss(&ret, sum);
return ret;
}
#define OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
static inline float interpolate_product_single(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
int i;
float ret;
__m128 sum = _mm_setzero_ps();
__m128 f = _mm_loadu_ps(frac);
for(i=0;i<len;i+=2)
{
sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample)));
sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample)));
}
sum = _mm_mul_ps(f, sum);
sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
_mm_store_ss(&ret, sum);
return ret;
}
#ifdef _USE_SSE2
#include <emmintrin.h>
#define OVERRIDE_INNER_PRODUCT_DOUBLE
static inline double inner_product_double(const float *a, const float *b, unsigned int len)
{
int i;
double ret;
__m128d sum = _mm_setzero_pd();
__m128 t;
for (i=0;i<len;i+=8)
{
t = _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i));
sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
t = _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4));
sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
}
sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
_mm_store_sd(&ret, sum);
return ret;
}
#define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
static inline double interpolate_product_double(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
int i;
double ret;
__m128d sum;
__m128d sum1 = _mm_setzero_pd();
__m128d sum2 = _mm_setzero_pd();
__m128 f = _mm_loadu_ps(frac);
__m128d f1 = _mm_cvtps_pd(f);
__m128d f2 = _mm_cvtps_pd(_mm_movehl_ps(f,f));
__m128 t;
for(i=0;i<len;i+=2)
{
t = _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample));
sum1 = _mm_add_pd(sum1, _mm_cvtps_pd(t));
sum2 = _mm_add_pd(sum2, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
t = _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample));
sum1 = _mm_add_pd(sum1, _mm_cvtps_pd(t));
sum2 = _mm_add_pd(sum2, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
}
sum1 = _mm_mul_pd(f1, sum1);
sum2 = _mm_mul_pd(f2, sum2);
sum = _mm_add_pd(sum1, sum2);
sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
_mm_store_sd(&ret, sum);
return ret;
}
#endif

Просмотреть файл

@ -1,8 +1,8 @@
/* Copyright (C) 2007 Jean-Marc Valin
File: speex_resampler.h
Resampling code
The design goals of this code are:
- Very fast algorithm
- Low memory requirement
@ -43,7 +43,7 @@
/********* WARNING: MENTAL SANITY ENDS HERE *************/
/* If the resampler is defined outside of Speex, we change the symbol names so that
/* If the resampler is defined outside of Speex, we change the symbol names so that
there won't be any clash if linking with Speex later on. */
/* #define RANDOM_PREFIX your software name here */
@ -53,7 +53,7 @@
#define CAT_PREFIX2(a,b) a ## b
#define CAT_PREFIX(a,b) CAT_PREFIX2(a, b)
#define speex_resampler_init CAT_PREFIX(RANDOM_PREFIX,_resampler_init)
#define speex_resampler_init_frac CAT_PREFIX(RANDOM_PREFIX,_resampler_init_frac)
#define speex_resampler_destroy CAT_PREFIX(RANDOM_PREFIX,_resampler_destroy)
@ -81,14 +81,12 @@
#define spx_int32_t int
#define spx_uint16_t unsigned short
#define spx_uint32_t unsigned int
#define speex_assert(cond)
#else /* OUTSIDE_SPEEX */
#ifdef _BUILD_SPEEX
# include "speex_types.h"
#else
# include <speex/speex_types.h>
#endif
#include "speexdsp_types.h"
#endif /* OUTSIDE_SPEEX */
@ -108,7 +106,8 @@ enum {
RESAMPLER_ERR_BAD_STATE = 2,
RESAMPLER_ERR_INVALID_ARG = 3,
RESAMPLER_ERR_PTR_OVERLAP = 4,
RESAMPLER_ERR_OVERFLOW = 5,
RESAMPLER_ERR_MAX_ERROR
};
@ -124,14 +123,14 @@ typedef struct SpeexResamplerState_ SpeexResamplerState;
* @return Newly created resampler state
* @retval NULL Error: not enough memory
*/
SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels,
spx_uint32_t in_rate,
spx_uint32_t out_rate,
SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels,
spx_uint32_t in_rate,
spx_uint32_t out_rate,
int quality,
int *err);
/** Create a new resampler with fractional input/output rates. The sampling
* rate ratio is an arbitrary rational number with both the numerator and
/** Create a new resampler with fractional input/output rates. The sampling
* rate ratio is an arbitrary rational number with both the numerator and
* denominator being 32-bit integers.
* @param nb_channels Number of channels to be processed
* @param ratio_num Numerator of the sampling rate ratio
@ -143,11 +142,11 @@ SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels,
* @return Newly created resampler state
* @retval NULL Error: not enough memory
*/
SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels,
spx_uint32_t ratio_num,
spx_uint32_t ratio_den,
spx_uint32_t in_rate,
spx_uint32_t out_rate,
SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels,
spx_uint32_t ratio_num,
spx_uint32_t ratio_den,
spx_uint32_t in_rate,
spx_uint32_t out_rate,
int quality,
int *err);
@ -158,24 +157,24 @@ void speex_resampler_destroy(SpeexResamplerState *st);
/** Resample a float array. The input and output buffers must *not* overlap.
* @param st Resampler state
* @param channel_index Index of the channel to process for the multi-channel
* @param channel_index Index of the channel to process for the multi-channel
* base (0 otherwise)
* @param in Input buffer
* @param in_len Number of input samples in the input buffer. Returns the
* @param in_len Number of input samples in the input buffer. Returns the
* number of samples processed
* @param out Output buffer
* @param out_len Size of the output buffer. Returns the number of samples written
*/
int speex_resampler_process_float(SpeexResamplerState *st,
spx_uint32_t channel_index,
const float *in,
spx_uint32_t *in_len,
float *out,
int speex_resampler_process_float(SpeexResamplerState *st,
spx_uint32_t channel_index,
const float *in,
spx_uint32_t *in_len,
float *out,
spx_uint32_t *out_len);
/** Resample an int array. The input and output buffers must *not* overlap.
* @param st Resampler state
* @param channel_index Index of the channel to process for the multi-channel
* @param channel_index Index of the channel to process for the multi-channel
* base (0 otherwise)
* @param in Input buffer
* @param in_len Number of input samples in the input buffer. Returns the number
@ -183,11 +182,11 @@ int speex_resampler_process_float(SpeexResamplerState *st,
* @param out Output buffer
* @param out_len Size of the output buffer. Returns the number of samples written
*/
int speex_resampler_process_int(SpeexResamplerState *st,
spx_uint32_t channel_index,
const spx_int16_t *in,
spx_uint32_t *in_len,
spx_int16_t *out,
int speex_resampler_process_int(SpeexResamplerState *st,
spx_uint32_t channel_index,
const spx_int16_t *in,
spx_uint32_t *in_len,
spx_int16_t *out,
spx_uint32_t *out_len);
/** Resample an interleaved float array. The input and output buffers must *not* overlap.
@ -199,10 +198,10 @@ int speex_resampler_process_int(SpeexResamplerState *st,
* @param out_len Size of the output buffer. Returns the number of samples written.
* This is all per-channel.
*/
int speex_resampler_process_interleaved_float(SpeexResamplerState *st,
const float *in,
spx_uint32_t *in_len,
float *out,
int speex_resampler_process_interleaved_float(SpeexResamplerState *st,
const float *in,
spx_uint32_t *in_len,
float *out,
spx_uint32_t *out_len);
/** Resample an interleaved int array. The input and output buffers must *not* overlap.
@ -214,10 +213,10 @@ int speex_resampler_process_interleaved_float(SpeexResamplerState *st,
* @param out_len Size of the output buffer. Returns the number of samples written.
* This is all per-channel.
*/
int speex_resampler_process_interleaved_int(SpeexResamplerState *st,
const spx_int16_t *in,
spx_uint32_t *in_len,
spx_int16_t *out,
int speex_resampler_process_interleaved_int(SpeexResamplerState *st,
const spx_int16_t *in,
spx_uint32_t *in_len,
spx_int16_t *out,
spx_uint32_t *out_len);
/** Set (change) the input/output sampling rates (integer value).
@ -225,8 +224,8 @@ int speex_resampler_process_interleaved_int(SpeexResamplerState *st,
* @param in_rate Input sampling rate (integer number of Hz).
* @param out_rate Output sampling rate (integer number of Hz).
*/
int speex_resampler_set_rate(SpeexResamplerState *st,
spx_uint32_t in_rate,
int speex_resampler_set_rate(SpeexResamplerState *st,
spx_uint32_t in_rate,
spx_uint32_t out_rate);
/** Get the current input/output sampling rates (integer value).
@ -234,11 +233,11 @@ int speex_resampler_set_rate(SpeexResamplerState *st,
* @param in_rate Input sampling rate (integer number of Hz) copied.
* @param out_rate Output sampling rate (integer number of Hz) copied.
*/
void speex_resampler_get_rate(SpeexResamplerState *st,
spx_uint32_t *in_rate,
void speex_resampler_get_rate(SpeexResamplerState *st,
spx_uint32_t *in_rate,
spx_uint32_t *out_rate);
/** Set (change) the input/output sampling rates and resampling ratio
/** Set (change) the input/output sampling rates and resampling ratio
* (fractional values in Hz supported).
* @param st Resampler state
* @param ratio_num Numerator of the sampling rate ratio
@ -246,10 +245,10 @@ void speex_resampler_get_rate(SpeexResamplerState *st,
* @param in_rate Input sampling rate rounded to the nearest integer (in Hz).
* @param out_rate Output sampling rate rounded to the nearest integer (in Hz).
*/
int speex_resampler_set_rate_frac(SpeexResamplerState *st,
spx_uint32_t ratio_num,
spx_uint32_t ratio_den,
spx_uint32_t in_rate,
int speex_resampler_set_rate_frac(SpeexResamplerState *st,
spx_uint32_t ratio_num,
spx_uint32_t ratio_den,
spx_uint32_t in_rate,
spx_uint32_t out_rate);
/** Get the current resampling ratio. This will be reduced to the least
@ -258,52 +257,52 @@ int speex_resampler_set_rate_frac(SpeexResamplerState *st,
* @param ratio_num Numerator of the sampling rate ratio copied
* @param ratio_den Denominator of the sampling rate ratio copied
*/
void speex_resampler_get_ratio(SpeexResamplerState *st,
spx_uint32_t *ratio_num,
void speex_resampler_get_ratio(SpeexResamplerState *st,
spx_uint32_t *ratio_num,
spx_uint32_t *ratio_den);
/** Set (change) the conversion quality.
* @param st Resampler state
* @param quality Resampling quality between 0 and 10, where 0 has poor
* @param quality Resampling quality between 0 and 10, where 0 has poor
* quality and 10 has very high quality.
*/
int speex_resampler_set_quality(SpeexResamplerState *st,
int speex_resampler_set_quality(SpeexResamplerState *st,
int quality);
/** Get the conversion quality.
* @param st Resampler state
* @param quality Resampling quality between 0 and 10, where 0 has poor
* @param quality Resampling quality between 0 and 10, where 0 has poor
* quality and 10 has very high quality.
*/
void speex_resampler_get_quality(SpeexResamplerState *st,
void speex_resampler_get_quality(SpeexResamplerState *st,
int *quality);
/** Set (change) the input stride.
* @param st Resampler state
* @param stride Input stride
*/
void speex_resampler_set_input_stride(SpeexResamplerState *st,
void speex_resampler_set_input_stride(SpeexResamplerState *st,
spx_uint32_t stride);
/** Get the input stride.
* @param st Resampler state
* @param stride Input stride copied
*/
void speex_resampler_get_input_stride(SpeexResamplerState *st,
void speex_resampler_get_input_stride(SpeexResamplerState *st,
spx_uint32_t *stride);
/** Set (change) the output stride.
* @param st Resampler state
* @param stride Output stride
*/
void speex_resampler_set_output_stride(SpeexResamplerState *st,
void speex_resampler_set_output_stride(SpeexResamplerState *st,
spx_uint32_t stride);
/** Get the output stride.
* @param st Resampler state copied
* @param stride Output stride
*/
void speex_resampler_get_output_stride(SpeexResamplerState *st,
void speex_resampler_get_output_stride(SpeexResamplerState *st,
spx_uint32_t *stride);
/** Get the latency introduced by the resampler measured in input samples.
@ -316,8 +315,8 @@ int speex_resampler_get_input_latency(SpeexResamplerState *st);
*/
int speex_resampler_get_output_latency(SpeexResamplerState *st);
/** Make sure that the first samples to go out of the resamplers don't have
* leading zeros. This is only useful before starting to use a newly created
/** Make sure that the first samples to go out of the resamplers don't have
* leading zeros. This is only useful before starting to use a newly created
* resampler. It is recommended to use that when resampling an audio file, as
* it will generate a file with the same length. For real-time processing,
* it is probably easier not to use this call (so that the output duration

Просмотреть файл

@ -7,18 +7,18 @@
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of the Xiph.org Foundation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -101,7 +101,7 @@
#endif
#if defined(VAR_ARRAYS)
#define VARDECL(var)
#define VARDECL(var)
#define ALLOC(var, size, type) type var[size]
#elif defined(USE_ALLOCA)
#define VARDECL(var) var