Bug 944538 - Update libopus to 1.1rc2. r=cpearce

Bumped to include the MULT16_32_P16 bugfix.
2013-11-28 14:20:00 -08:00 · 2013-11-28 14:20:00 -08:00 · 5427c4c87b
--- a/media/libopus/README_MOZILLA
+++ b/media/libopus/README_MOZILLA
@ -8,4 +8,4 @@ files after the copy step.

 The upstream repository is https://git.xiph.org/opus.git

-The git tag/revision used was v1.1-beta-23-gf2446c2.
+The git tag/revision used was v1.1-rc2-1-g35a44c6.
--- a/media/libopus/celt/_kiss_fft_guts.h
+++ b/media/libopus/celt/_kiss_fft_guts.h
@ -94,11 +94,11 @@
    do {(res).r = ADD32((res).r,(a).r);  (res).i = SUB32((res).i,(a).i); \
    }while(0)

-#if defined(ARMv4_ASM)
+#if defined(OPUS_ARM_INLINE_ASM)
 #include "arm/kiss_fft_armv4.h"
 #endif

-#if defined(ARMv5E_ASM)
+#if defined(OPUS_ARM_INLINE_EDSP)
 #include "arm/kiss_fft_armv5e.h"
 #endif

--- a/media/libopus/celt/arch.h
+++ b/media/libopus/celt/arch.h
@ -35,6 +35,7 @@
 #define ARCH_H

 #include "opus_types.h"
+#include "opus_defines.h"

 # if !defined(__GNUC_PREREQ)
 #  if defined(__GNUC__)&&defined(__GNUC_MINOR__)
@ -54,7 +55,7 @@
 #ifdef __GNUC__
 __attribute__((noreturn))
 #endif
-static inline void _celt_fatal(const char *str, const char *file, int line)
+static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
 {
   fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str);
   abort();
@ -113,9 +114,9 @@ typedef opus_val32 celt_ener;

 #include "fixed_generic.h"

-#ifdef ARMv5E_ASM
+#ifdef OPUS_ARM_INLINE_EDSP
 #include "arm/fixed_armv5e.h"
-#elif defined (ARMv4_ASM)
+#elif defined (OPUS_ARM_INLINE_ASM)
 #include "arm/fixed_armv4.h"
 #elif defined (BFIN_ASM)
 #include "fixed_bfin.h"
@ -185,6 +186,7 @@ typedef float celt_ener;
 #define MAC16_32_Q15(c,a,b)     ((c)+(a)*(b))

 #define MULT16_16_Q11_32(a,b)     ((a)*(b))
+#define MULT16_16_Q11(a,b)     ((a)*(b))
 #define MULT16_16_Q13(a,b)     ((a)*(b))
 #define MULT16_16_Q14(a,b)     ((a)*(b))
 #define MULT16_16_Q15(a,b)     ((a)*(b))
--- a/media/libopus/celt/arm/arm_celt_map.c
+++ b/media/libopus/celt/arm/arm_celt_map.c
@ -0,0 +1,49 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pitch.h"
+
+#if defined(OPUS_HAVE_RTCD)
+
+# if defined(FIXED_POINT)
+opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+    const opus_val16 *, opus_val32 *, int , int) = {
+  celt_pitch_xcorr_c,               /* ARMv4 */
+  MAY_HAVE_EDSP(celt_pitch_xcorr),  /* EDSP */
+  MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
+  MAY_HAVE_NEON(celt_pitch_xcorr)   /* NEON */
+};
+# else
+#  error "Floating-point implementation is not supported by ARM asm yet." \
+ "Reconfigure with --disable-rtcd or send patches."
+# endif
+
+#endif
--- a/media/libopus/celt/arm/armcpu.c
+++ b/media/libopus/celt/arm/armcpu.c
@ -49,13 +49,13 @@
 # define WIN32_EXTRA_LEAN
 # include <windows.h>

-static inline opus_uint32 opus_cpu_capabilities(void){
+static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
  opus_uint32 flags;
  flags=0;
-  /* MSVC has no inline __asm support for ARM, but it does let you __emit
+  /* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit
   * instructions via their assembled hex code.
   * All of these instructions should be essentially nops. */
-# if defined(ARMv5E_ASM)
+# if defined(OPUS_ARM_MAY_HAVE_EDSP)
  __try{
    /*PLD [r13]*/
    __emit(0xF5DDF000);
@ -64,7 +64,7 @@ static inline opus_uint32 opus_cpu_capabilities(void){
  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
    /*Ignore exception.*/
  }
-#  if defined(ARMv6E_ASM)
+#  if defined(OPUS_ARM_MAY_HAVE_MEDIA)
  __try{
    /*SHADD8 r3,r3,r3*/
    __emit(0xE6333F93);
@ -73,7 +73,7 @@ static inline opus_uint32 opus_cpu_capabilities(void){
  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
    /*Ignore exception.*/
  }
-#   if defined(ARM_HAVE_NEON)
+#   if defined(OPUS_ARM_MAY_HAVE_NEON)
  __try{
    /*VORR q0,q0,q0*/
    __emit(0xF2200150);
@ -107,19 +107,26 @@ opus_uint32 opus_cpu_capabilities(void)

    while(fgets(buf, 512, cpuinfo) != NULL)
    {
+# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON)
      /* Search for edsp and neon flag */
      if(memcmp(buf, "Features", 8) == 0)
      {
        char *p;
+#  if defined(OPUS_ARM_MAY_HAVE_EDSP)
        p = strstr(buf, " edsp");
        if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
          flags |= OPUS_CPU_ARM_EDSP;
+#  endif

+#  if defined(OPUS_ARM_MAY_HAVE_NEON)
        p = strstr(buf, " neon");
        if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
          flags |= OPUS_CPU_ARM_NEON;
+#  endif
      }
+# endif

+# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
      /* Search for media capabilities (>= ARMv6) */
      if(memcmp(buf, "CPU architecture:", 17) == 0)
      {
@ -129,6 +136,7 @@ opus_uint32 opus_cpu_capabilities(void)
        if(version >= 6)
          flags |= OPUS_CPU_ARM_MEDIA;
      }
+# endif
    }

    fclose(cpuinfo);
--- a/media/libopus/celt/arm/armcpu.h
+++ b/media/libopus/celt/arm/armcpu.h
@ -25,11 +25,47 @@
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-/* Original code from libtheora modified to suit to Opus */
+#if !defined(ARMCPU_H)
+# define ARMCPU_H

-#ifndef ARMCPU_H
-#define ARMCPU_H
+# if defined(OPUS_ARM_MAY_HAVE_EDSP)
+#  define MAY_HAVE_EDSP(name) name ## _edsp
+# else
+#  define MAY_HAVE_EDSP(name) name ## _c
+# endif

+# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+#  define MAY_HAVE_MEDIA(name) name ## _media
+# else
+#  define MAY_HAVE_MEDIA(name) MAY_HAVE_EDSP(name)
+# endif
+
+# if defined(OPUS_ARM_MAY_HAVE_NEON)
+#  define MAY_HAVE_NEON(name) name ## _neon
+# else
+#  define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name)
+# endif
+
+# if defined(OPUS_ARM_PRESUME_EDSP)
+#  define PRESUME_EDSP(name) name ## _edsp
+# else
+#  define PRESUME_EDSP(name) name ## _c
+# endif
+
+# if defined(OPUS_ARM_PRESUME_MEDIA)
+#  define PRESUME_MEDIA(name) name ## _media
+# else
+#  define PRESUME_MEDIA(name) PRESUME_EDSP(name)
+# endif
+
+# if defined(OPUS_ARM_PRESUME_NEON)
+#  define PRESUME_NEON(name) name ## _neon
+# else
+#  define PRESUME_NEON(name) PRESUME_MEDIA(name)
+# endif
+
+# if defined(OPUS_HAVE_RTCD)
 int opus_select_arch(void);
+# endif

 #endif
--- a/media/libopus/celt/arm/armopts.s.in
+++ b/media/libopus/celt/arm/armopts.s.in
@ -0,0 +1,37 @@
+/* Copyright (C) 2013 Mozilla Corporation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+; Set the following to 1 if we have EDSP instructions
+;  (LDRD/STRD, etc., ARMv5E and later).
+OPUS_ARM_MAY_HAVE_EDSP  * @OPUS_ARM_MAY_HAVE_EDSP@
+
+; Set the following to 1 if we have ARMv6 media instructions.
+OPUS_ARM_MAY_HAVE_MEDIA * @OPUS_ARM_MAY_HAVE_MEDIA@
+
+; Set the following to 1 if we have NEON (some ARMv7)
+OPUS_ARM_MAY_HAVE_NEON  * @OPUS_ARM_MAY_HAVE_NEON@
+
+END
--- a/media/libopus/celt/arm/celt_pitch_xcorr_arm.s
+++ b/media/libopus/celt/arm/celt_pitch_xcorr_arm.s
@ -0,0 +1,545 @@
+; Copyright (c) 2007-2008 CSIRO
+; Copyright (c) 2007-2009 Xiph.Org Foundation
+; Copyright (c) 2013      Parrot
+; Written by Aurélien Zanelli
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+; - Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+;
+; - Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in the
+; documentation and/or other materials provided with the distribution.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  AREA  |.text|, CODE, READONLY
+
+  GET    celt/arm/armopts.s
+
+IF OPUS_ARM_MAY_HAVE_EDSP
+  EXPORT celt_pitch_xcorr_edsp
+ENDIF
+
+IF OPUS_ARM_MAY_HAVE_NEON
+  EXPORT celt_pitch_xcorr_neon
+ENDIF
+
+IF OPUS_ARM_MAY_HAVE_NEON
+
+; Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3
+xcorr_kernel_neon PROC
+  ; input:
+  ;   r3     = int         len
+  ;   r4     = opus_val16 *x
+  ;   r5     = opus_val16 *y
+  ;   q0     = opus_val32  sum[4]
+  ; output:
+  ;   q0     = opus_val32  sum[4]
+  ; preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15
+  ; internal usage:
+  ;   r12 = int j
+  ;   d3  = y_3|y_2|y_1|y_0
+  ;   q2  = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4
+  ;   q3  = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0
+  ;   q8  = scratch
+  ;
+  ; Load y[0...3]
+  ; This requires len>0 to always be valid (which we assert in the C code).
+  VLD1.16      {d5}, [r5]!
+  SUBS         r12, r3, #8
+  BLE xcorr_kernel_neon_process4
+; Process 8 samples at a time.
+; This loop loads one y value more than we actually need. Therefore we have to
+; stop as soon as there are 8 or fewer samples left (instead of 7), to avoid
+; reading past the end of the array.
+xcorr_kernel_neon_process8
+  ; This loop has 19 total instructions (10 cycles to issue, minimum), with
+  ; - 2 cycles of ARM insrtuctions,
+  ; - 10 cycles of load/store/byte permute instructions, and
+  ; - 9 cycles of data processing instructions.
+  ; On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the
+  ; latter two categories, meaning the whole loop should run in 10 cycles per
+  ; iteration, barring cache misses.
+  ;
+  ; Load x[0...7]
+  VLD1.16      {d6, d7}, [r4]!
+  ; Unlike VMOV, VAND is a data processsing instruction (and doesn't get
+  ; assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1.
+  VAND         d3, d5, d5
+  SUBS         r12, r12, #8
+  ; Load y[4...11]
+  VLD1.16      {d4, d5}, [r5]!
+  VMLAL.S16    q0, d3, d6[0]
+  VEXT.16      d16, d3, d4, #1
+  VMLAL.S16    q0, d4, d7[0]
+  VEXT.16      d17, d4, d5, #1
+  VMLAL.S16    q0, d16, d6[1]
+  VEXT.16      d16, d3, d4, #2
+  VMLAL.S16    q0, d17, d7[1]
+  VEXT.16      d17, d4, d5, #2
+  VMLAL.S16    q0, d16, d6[2]
+  VEXT.16      d16, d3, d4, #3
+  VMLAL.S16    q0, d17, d7[2]
+  VEXT.16      d17, d4, d5, #3
+  VMLAL.S16    q0, d16, d6[3]
+  VMLAL.S16    q0, d17, d7[3]
+  BGT xcorr_kernel_neon_process8
+; Process 4 samples here if we have > 4 left (still reading one extra y value).
+xcorr_kernel_neon_process4
+  ADDS         r12, r12, #4
+  BLE xcorr_kernel_neon_process2
+  ; Load x[0...3]
+  VLD1.16      d6, [r4]!
+  ; Use VAND since it's a data processing instruction again.
+  VAND         d4, d5, d5
+  SUB          r12, r12, #4
+  ; Load y[4...7]
+  VLD1.16      d5, [r5]!
+  VMLAL.S16    q0, d4, d6[0]
+  VEXT.16      d16, d4, d5, #1
+  VMLAL.S16    q0, d16, d6[1]
+  VEXT.16      d16, d4, d5, #2
+  VMLAL.S16    q0, d16, d6[2]
+  VEXT.16      d16, d4, d5, #3
+  VMLAL.S16    q0, d16, d6[3]
+; Process 2 samples here if we have > 2 left (still reading one extra y value).
+xcorr_kernel_neon_process2
+  ADDS         r12, r12, #2
+  BLE xcorr_kernel_neon_process1
+  ; Load x[0...1]
+  VLD2.16      {d6[],d7[]}, [r4]!
+  ; Use VAND since it's a data processing instruction again.
+  VAND         d4, d5, d5
+  SUB          r12, r12, #2
+  ; Load y[4...5]
+  VLD1.32      {d5[]}, [r5]!
+  VMLAL.S16    q0, d4, d6
+  VEXT.16      d16, d4, d5, #1
+  ; Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI
+  ; instead of VEXT, since it's a data-processing instruction.
+  VSRI.64      d5, d4, #32
+  VMLAL.S16    q0, d16, d7
+; Process 1 sample using the extra y value we loaded above.
+xcorr_kernel_neon_process1
+  ; Load next *x
+  VLD1.16      {d6[]}, [r4]!
+  ADDS         r12, r12, #1
+  ; y[0...3] are left in d5 from prior iteration(s) (if any)
+  VMLAL.S16    q0, d5, d6
+  MOVLE        pc, lr
+; Now process 1 last sample, not reading ahead.
+  ; Load last *y
+  VLD1.16      {d4[]}, [r5]!
+  VSRI.64      d4, d5, #16
+  ; Load last *x
+  VLD1.16      {d6[]}, [r4]!
+  VMLAL.S16    q0, d4, d6
+  MOV          pc, lr
+  ENDP
+
+; opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y,
+;  opus_val32 *xcorr, int len, int max_pitch)
+celt_pitch_xcorr_neon PROC
+  ; input:
+  ;   r0  = opus_val16 *_x
+  ;   r1  = opus_val16 *_y
+  ;   r2  = opus_val32 *xcorr
+  ;   r3  = int         len
+  ; output:
+  ;   r0  = int         maxcorr
+  ; internal usage:
+  ;   r4  = opus_val16 *x (for xcorr_kernel_neon())
+  ;   r5  = opus_val16 *y (for xcorr_kernel_neon())
+  ;   r6  = int         max_pitch
+  ;   r12 = int         j
+  ;   q15 = int         maxcorr[4] (q15 is not used by xcorr_kernel_neon())
+  STMFD        sp!, {r4-r6, lr}
+  LDR          r6, [sp, #16]
+  VMOV.S32     q15, #1
+  ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
+  SUBS         r6, r6, #4
+  BLT celt_pitch_xcorr_neon_process4_done
+celt_pitch_xcorr_neon_process4
+  ; xcorr_kernel_neon parameters:
+  ; r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0}
+  MOV          r4, r0
+  MOV          r5, r1
+  VEOR         q0, q0, q0
+  ; xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3.
+  ; So we don't save/restore any other registers.
+  BL xcorr_kernel_neon
+  SUBS         r6, r6, #4
+  VST1.32      {q0}, [r2]!
+  ; _y += 4
+  ADD          r1, r1, #8
+  VMAX.S32     q15, q15, q0
+  ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
+  BGE celt_pitch_xcorr_neon_process4
+; We have less than 4 sums left to compute.
+celt_pitch_xcorr_neon_process4_done
+  ADDS         r6, r6, #4
+  ; Reduce maxcorr to a single value
+  VMAX.S32     d30, d30, d31
+  VPMAX.S32    d30, d30, d30
+  ; if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done
+  BLE celt_pitch_xcorr_neon_done
+; Now compute each remaining sum one at a time.
+celt_pitch_xcorr_neon_process_remaining
+  MOV          r4, r0
+  MOV          r5, r1
+  VMOV.I32     q0, #0
+  SUBS         r12, r3, #8
+  BLT celt_pitch_xcorr_neon_process_remaining4
+; Sum terms 8 at a time.
+celt_pitch_xcorr_neon_process_remaining_loop8
+  ; Load x[0...7]
+  VLD1.16      {q1}, [r4]!
+  ; Load y[0...7]
+  VLD1.16      {q2}, [r5]!
+  SUBS         r12, r12, #8
+  VMLAL.S16    q0, d4, d2
+  VMLAL.S16    q0, d5, d3
+  BGE celt_pitch_xcorr_neon_process_remaining_loop8
+; Sum terms 4 at a time.
+celt_pitch_xcorr_neon_process_remaining4
+  ADDS         r12, r12, #4
+  BLT celt_pitch_xcorr_neon_process_remaining4_done
+  ; Load x[0...3]
+  VLD1.16      {d2}, [r4]!
+  ; Load y[0...3]
+  VLD1.16      {d3}, [r5]!
+  SUB          r12, r12, #4
+  VMLAL.S16    q0, d3, d2
+celt_pitch_xcorr_neon_process_remaining4_done
+  ; Reduce the sum to a single value.
+  VADD.S32     d0, d0, d1
+  VPADDL.S32   d0, d0
+  ADDS         r12, r12, #4
+  BLE celt_pitch_xcorr_neon_process_remaining_loop_done
+; Sum terms 1 at a time.
+celt_pitch_xcorr_neon_process_remaining_loop1
+  VLD1.16      {d2[]}, [r4]!
+  VLD1.16      {d3[]}, [r5]!
+  SUBS         r12, r12, #1
+  VMLAL.S16    q0, d2, d3
+  BGT celt_pitch_xcorr_neon_process_remaining_loop1
+celt_pitch_xcorr_neon_process_remaining_loop_done
+  VST1.32      {d0[0]}, [r2]!
+  VMAX.S32     d30, d30, d0
+  SUBS         r6, r6, #1
+  ; _y++
+  ADD          r1, r1, #2
+  ; if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining
+  BGT celt_pitch_xcorr_neon_process_remaining
+celt_pitch_xcorr_neon_done
+  VMOV.32      r0, d30[0]
+  LDMFD        sp!, {r4-r6, pc}
+  ENDP
+
+ENDIF
+
+IF OPUS_ARM_MAY_HAVE_EDSP
+
+; This will get used on ARMv7 devices without NEON, so it has been optimized
+; to take advantage of dual-issuing where possible.
+xcorr_kernel_edsp PROC
+  ; input:
+  ;   r3      = int         len
+  ;   r4      = opus_val16 *_x (must be 32-bit aligned)
+  ;   r5      = opus_val16 *_y (must be 32-bit aligned)
+  ;   r6...r9 = opus_val32  sum[4]
+  ; output:
+  ;   r6...r9 = opus_val32  sum[4]
+  ; preserved: r0-r5
+  ; internal usage
+  ;   r2      = int         j
+  ;   r12,r14 = opus_val16  x[4]
+  ;   r10,r11 = opus_val16  y[4]
+  STMFD        sp!, {r2,r4,r5,lr}
+  LDR          r10, [r5], #4      ; Load y[0...1]
+  SUBS         r2, r3, #4         ; j = len-4
+  LDR          r11, [r5], #4      ; Load y[2...3]
+  BLE xcorr_kernel_edsp_process4_done
+  LDR          r12, [r4], #4      ; Load x[0...1]
+  ; Stall
+xcorr_kernel_edsp_process4
+  ; The multiplies must issue from pipeline 0, and can't dual-issue with each
+  ; other. Every other instruction here dual-issues with a multiply, and is
+  ; thus "free". There should be no stalls in the body of the loop.
+  SMLABB       r6, r12, r10, r6   ; sum[0] = MAC16_16(sum[0],x_0,y_0)
+  LDR          r14, [r4], #4      ; Load x[2...3]
+  SMLABT       r7, r12, r10, r7   ; sum[1] = MAC16_16(sum[1],x_0,y_1)
+  SUBS         r2, r2, #4         ; j-=4
+  SMLABB       r8, r12, r11, r8   ; sum[2] = MAC16_16(sum[2],x_0,y_2)
+  SMLABT       r9, r12, r11, r9   ; sum[3] = MAC16_16(sum[3],x_0,y_3)
+  SMLATT       r6, r12, r10, r6   ; sum[0] = MAC16_16(sum[0],x_1,y_1)
+  LDR          r10, [r5], #4      ; Load y[4...5]
+  SMLATB       r7, r12, r11, r7   ; sum[1] = MAC16_16(sum[1],x_1,y_2)
+  SMLATT       r8, r12, r11, r8   ; sum[2] = MAC16_16(sum[2],x_1,y_3)
+  SMLATB       r9, r12, r10, r9   ; sum[3] = MAC16_16(sum[3],x_1,y_4)
+  LDRGT        r12, [r4], #4      ; Load x[0...1]
+  SMLABB       r6, r14, r11, r6   ; sum[0] = MAC16_16(sum[0],x_2,y_2)
+  SMLABT       r7, r14, r11, r7   ; sum[1] = MAC16_16(sum[1],x_2,y_3)
+  SMLABB       r8, r14, r10, r8   ; sum[2] = MAC16_16(sum[2],x_2,y_4)
+  SMLABT       r9, r14, r10, r9   ; sum[3] = MAC16_16(sum[3],x_2,y_5)
+  SMLATT       r6, r14, r11, r6   ; sum[0] = MAC16_16(sum[0],x_3,y_3)
+  LDR          r11, [r5], #4      ; Load y[6...7]
+  SMLATB       r7, r14, r10, r7   ; sum[1] = MAC16_16(sum[1],x_3,y_4)
+  SMLATT       r8, r14, r10, r8   ; sum[2] = MAC16_16(sum[2],x_3,y_5)
+  SMLATB       r9, r14, r11, r9   ; sum[3] = MAC16_16(sum[3],x_3,y_6)
+  BGT xcorr_kernel_edsp_process4
+xcorr_kernel_edsp_process4_done
+  ADDS         r2, r2, #4
+  BLE xcorr_kernel_edsp_done
+  LDRH         r12, [r4], #2      ; r12 = *x++
+  SUBS         r2, r2, #1         ; j--
+  ; Stall
+  SMLABB       r6, r12, r10, r6   ; sum[0] = MAC16_16(sum[0],x,y_0)
+  LDRGTH       r14, [r4], #2      ; r14 = *x++
+  SMLABT       r7, r12, r10, r7   ; sum[1] = MAC16_16(sum[1],x,y_1)
+  SMLABB       r8, r12, r11, r8   ; sum[2] = MAC16_16(sum[2],x,y_2)
+  SMLABT       r9, r12, r11, r9   ; sum[3] = MAC16_16(sum[3],x,y_3)
+  BLE xcorr_kernel_edsp_done
+  SMLABT       r6, r14, r10, r6   ; sum[0] = MAC16_16(sum[0],x,y_1)
+  SUBS         r2, r2, #1         ; j--
+  SMLABB       r7, r14, r11, r7   ; sum[1] = MAC16_16(sum[1],x,y_2)
+  LDRH         r10, [r5], #2      ; r10 = y_4 = *y++
+  SMLABT       r8, r14, r11, r8   ; sum[2] = MAC16_16(sum[2],x,y_3)
+  LDRGTH       r12, [r4], #2      ; r12 = *x++
+  SMLABB       r9, r14, r10, r9   ; sum[3] = MAC16_16(sum[3],x,y_4)
+  BLE xcorr_kernel_edsp_done
+  SMLABB       r6, r12, r11, r6   ; sum[0] = MAC16_16(sum[0],tmp,y_2)
+  CMP          r2, #1             ; j--
+  SMLABT       r7, r12, r11, r7   ; sum[1] = MAC16_16(sum[1],tmp,y_3)
+  LDRH         r2, [r5], #2       ; r2 = y_5 = *y++
+  SMLABB       r8, r12, r10, r8   ; sum[2] = MAC16_16(sum[2],tmp,y_4)
+  LDRGTH       r14, [r4]          ; r14 = *x
+  SMLABB       r9, r12, r2, r9    ; sum[3] = MAC16_16(sum[3],tmp,y_5)
+  BLE xcorr_kernel_edsp_done
+  SMLABT       r6, r14, r11, r6   ; sum[0] = MAC16_16(sum[0],tmp,y_3)
+  LDRH         r11, [r5]          ; r11 = y_6 = *y
+  SMLABB       r7, r14, r10, r7   ; sum[1] = MAC16_16(sum[1],tmp,y_4)
+  SMLABB       r8, r14, r2, r8    ; sum[2] = MAC16_16(sum[2],tmp,y_5)
+  SMLABB       r9, r14, r11, r9   ; sum[3] = MAC16_16(sum[3],tmp,y_6)
+xcorr_kernel_edsp_done
+  LDMFD        sp!, {r2,r4,r5,pc}
+  ENDP
+
+celt_pitch_xcorr_edsp PROC
+  ; input:
+  ;   r0  = opus_val16 *_x (must be 32-bit aligned)
+  ;   r1  = opus_val16 *_y (only needs to be 16-bit aligned)
+  ;   r2  = opus_val32 *xcorr
+  ;   r3  = int         len
+  ; output:
+  ;   r0  = maxcorr
+  ; internal usage
+  ;   r4  = opus_val16 *x
+  ;   r5  = opus_val16 *y
+  ;   r6  = opus_val32  sum0
+  ;   r7  = opus_val32  sum1
+  ;   r8  = opus_val32  sum2
+  ;   r9  = opus_val32  sum3
+  ;   r1  = int         max_pitch
+  ;   r12 = int         j
+  STMFD        sp!, {r4-r11, lr}
+  MOV          r5, r1
+  LDR          r1, [sp, #36]
+  MOV          r4, r0
+  TST          r5, #3
+  ; maxcorr = 1
+  MOV          r0, #1
+  BEQ          celt_pitch_xcorr_edsp_process1u_done
+; Compute one sum at the start to make y 32-bit aligned.
+  SUBS         r12, r3, #4
+  ; r14 = sum = 0
+  MOV          r14, #0
+  LDRH         r8, [r5], #2
+  BLE celt_pitch_xcorr_edsp_process1u_loop4_done
+  LDR          r6, [r4], #4
+  MOV          r8, r8, LSL #16
+celt_pitch_xcorr_edsp_process1u_loop4
+  LDR          r9, [r5], #4
+  SMLABT       r14, r6, r8, r14     ; sum = MAC16_16(sum, x_0, y_0)
+  LDR          r7, [r4], #4
+  SMLATB       r14, r6, r9, r14     ; sum = MAC16_16(sum, x_1, y_1)
+  LDR          r8, [r5], #4
+  SMLABT       r14, r7, r9, r14     ; sum = MAC16_16(sum, x_2, y_2)
+  SUBS         r12, r12, #4         ; j-=4
+  SMLATB       r14, r7, r8, r14     ; sum = MAC16_16(sum, x_3, y_3)
+  LDRGT        r6, [r4], #4
+  BGT celt_pitch_xcorr_edsp_process1u_loop4
+  MOV          r8, r8, LSR #16
+celt_pitch_xcorr_edsp_process1u_loop4_done
+  ADDS         r12, r12, #4
+celt_pitch_xcorr_edsp_process1u_loop1
+  LDRGEH       r6, [r4], #2
+  ; Stall
+  SMLABBGE     r14, r6, r8, r14    ; sum = MAC16_16(sum, *x, *y)
+  SUBGES       r12, r12, #1
+  LDRGTH       r8, [r5], #2
+  BGT celt_pitch_xcorr_edsp_process1u_loop1
+  ; Restore _x
+  SUB          r4, r4, r3, LSL #1
+  ; Restore and advance _y
+  SUB          r5, r5, r3, LSL #1
+  ; maxcorr = max(maxcorr, sum)
+  CMP          r0, r14
+  ADD          r5, r5, #2
+  MOVLT        r0, r14
+  SUBS         r1, r1, #1
+  ; xcorr[i] = sum
+  STR          r14, [r2], #4
+  BLE celt_pitch_xcorr_edsp_done
+celt_pitch_xcorr_edsp_process1u_done
+  ; if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2
+  SUBS         r1, r1, #4
+  BLT celt_pitch_xcorr_edsp_process2
+celt_pitch_xcorr_edsp_process4
+  ; xcorr_kernel_edsp parameters:
+  ; r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0}
+  MOV          r6, #0
+  MOV          r7, #0
+  MOV          r8, #0
+  MOV          r9, #0
+  BL xcorr_kernel_edsp  ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
+  ; maxcorr = max(maxcorr, sum0, sum1, sum2, sum3)
+  CMP          r0, r6
+  ; _y+=4
+  ADD          r5, r5, #8
+  MOVLT        r0, r6
+  CMP          r0, r7
+  MOVLT        r0, r7
+  CMP          r0, r8
+  MOVLT        r0, r8
+  CMP          r0, r9
+  MOVLT        r0, r9
+  STMIA        r2!, {r6-r9}
+  SUBS         r1, r1, #4
+  BGE celt_pitch_xcorr_edsp_process4
+celt_pitch_xcorr_edsp_process2
+  ADDS         r1, r1, #2
+  BLT celt_pitch_xcorr_edsp_process1a
+  SUBS         r12, r3, #4
+  ; {r10, r11} = {sum0, sum1} = {0, 0}
+  MOV          r10, #0
+  MOV          r11, #0
+  LDR          r8, [r5], #4
+  BLE celt_pitch_xcorr_edsp_process2_loop_done
+  LDR          r6, [r4], #4
+  LDR          r9, [r5], #4
+celt_pitch_xcorr_edsp_process2_loop4
+  SMLABB       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_0)
+  LDR          r7, [r4], #4
+  SMLABT       r11, r6, r8, r11     ; sum1 = MAC16_16(sum1, x_0, y_1)
+  SUBS         r12, r12, #4         ; j-=4
+  SMLATT       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_1, y_1)
+  LDR          r8, [r5], #4
+  SMLATB       r11, r6, r9, r11     ; sum1 = MAC16_16(sum1, x_1, y_2)
+  LDRGT        r6, [r4], #4
+  SMLABB       r10, r7, r9, r10     ; sum0 = MAC16_16(sum0, x_2, y_2)
+  SMLABT       r11, r7, r9, r11     ; sum1 = MAC16_16(sum1, x_2, y_3)
+  SMLATT       r10, r7, r9, r10     ; sum0 = MAC16_16(sum0, x_3, y_3)
+  LDRGT        r9, [r5], #4
+  SMLATB       r11, r7, r8, r11     ; sum1 = MAC16_16(sum1, x_3, y_4)
+  BGT celt_pitch_xcorr_edsp_process2_loop4
+celt_pitch_xcorr_edsp_process2_loop_done
+  ADDS         r12, r12, #2
+  BLE  celt_pitch_xcorr_edsp_process2_1
+  LDR          r6, [r4], #4
+  ; Stall
+  SMLABB       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_0)
+  LDR          r9, [r5], #4
+  SMLABT       r11, r6, r8, r11     ; sum1 = MAC16_16(sum1, x_0, y_1)
+  SUB          r12, r12, #2
+  SMLATT       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_1, y_1)
+  MOV          r8, r9
+  SMLATB       r11, r6, r9, r11     ; sum1 = MAC16_16(sum1, x_1, y_2)
+celt_pitch_xcorr_edsp_process2_1
+  LDRH         r6, [r4], #2
+  ADDS         r12, r12, #1
+  ; Stall
+  SMLABB       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_0)
+  LDRGTH       r7, [r4], #2
+  SMLABT       r11, r6, r8, r11     ; sum1 = MAC16_16(sum1, x_0, y_1)
+  BLE celt_pitch_xcorr_edsp_process2_done
+  LDRH         r9, [r5], #2
+  SMLABT       r10, r7, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_1)
+  SMLABB       r11, r7, r9, r11     ; sum1 = MAC16_16(sum1, x_0, y_2)
+celt_pitch_xcorr_edsp_process2_done
+  ; Restore _x
+  SUB          r4, r4, r3, LSL #1
+  ; Restore and advance _y
+  SUB          r5, r5, r3, LSL #1
+  ; maxcorr = max(maxcorr, sum0)
+  CMP          r0, r10
+  ADD          r5, r5, #2
+  MOVLT        r0, r10
+  SUB          r1, r1, #2
+  ; maxcorr = max(maxcorr, sum1)
+  CMP          r0, r11
+  ; xcorr[i] = sum
+  STR          r10, [r2], #4
+  MOVLT        r0, r11
+  STR          r11, [r2], #4
+celt_pitch_xcorr_edsp_process1a
+  ADDS         r1, r1, #1
+  BLT celt_pitch_xcorr_edsp_done
+  SUBS         r12, r3, #4
+  ; r14 = sum = 0
+  MOV          r14, #0
+  BLT celt_pitch_xcorr_edsp_process1a_loop_done
+  LDR          r6, [r4], #4
+  LDR          r8, [r5], #4
+  LDR          r7, [r4], #4
+  LDR          r9, [r5], #4
+celt_pitch_xcorr_edsp_process1a_loop4
+  SMLABB       r14, r6, r8, r14     ; sum = MAC16_16(sum, x_0, y_0)
+  SUBS         r12, r12, #4         ; j-=4
+  SMLATT       r14, r6, r8, r14     ; sum = MAC16_16(sum, x_1, y_1)
+  LDRGE        r6, [r4], #4
+  SMLABB       r14, r7, r9, r14     ; sum = MAC16_16(sum, x_2, y_2)
+  LDRGE        r8, [r5], #4
+  SMLATT       r14, r7, r9, r14     ; sum = MAC16_16(sum, x_3, y_3)
+  LDRGE        r7, [r4], #4
+  LDRGE        r9, [r5], #4
+  BGE celt_pitch_xcorr_edsp_process1a_loop4
+celt_pitch_xcorr_edsp_process1a_loop_done
+  ADDS         r12, r12, #2
+  LDRGE        r6, [r4], #4
+  LDRGE        r8, [r5], #4
+  ; Stall
+  SMLABBGE     r14, r6, r8, r14     ; sum = MAC16_16(sum, x_0, y_0)
+  SUBGE        r12, r12, #2
+  SMLATTGE     r14, r6, r8, r14     ; sum = MAC16_16(sum, x_1, y_1)
+  ADDS         r12, r12, #1
+  LDRGEH       r6, [r4], #2
+  LDRGEH       r8, [r5], #2
+  ; Stall
+  SMLABBGE     r14, r6, r8, r14     ; sum = MAC16_16(sum, *x, *y)
+  ; maxcorr = max(maxcorr, sum)
+  CMP          r0, r14
+  ; xcorr[i] = sum
+  STR          r14, [r2], #4
+  MOVLT        r0, r14
+celt_pitch_xcorr_edsp_done
+  LDMFD        sp!, {r4-r11, pc}
+  ENDP
+
+ENDIF
+
+END
--- a/media/libopus/celt/arm/fixed_armv4.h
+++ b/media/libopus/celt/arm/fixed_armv4.h
@ -29,7 +29,7 @@

 /** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
 #undef MULT16_32_Q16
-static inline opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b)
+static OPUS_INLINE opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b)
 {
  unsigned rd_lo;
  int rd_hi;
@ -46,7 +46,7 @@ static inline opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b)

 /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
 #undef MULT16_32_Q15
-static inline opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
+static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
 {
  unsigned rd_lo;
  int rd_hi;
--- a/media/libopus/celt/arm/fixed_armv5e.h
+++ b/media/libopus/celt/arm/fixed_armv5e.h
@ -34,7 +34,7 @@

 /** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
 #undef MULT16_32_Q16
-static inline opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b)
+static OPUS_INLINE opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b)
 {
  int res;
  __asm__(
@ -50,7 +50,7 @@ static inline opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b)

 /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
 #undef MULT16_32_Q15
-static inline opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b)
+static OPUS_INLINE opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b)
 {
  int res;
  __asm__(
@ -68,7 +68,7 @@ static inline opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b)
    b must fit in 31 bits.
    Result fits in 32 bits. */
 #undef MAC16_32_Q15
-static inline opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
+static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
 opus_val32 b)
 {
  int res;
@ -84,7 +84,7 @@ static inline opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,

 /** 16x16 multiply-add where the result fits in 32 bits */
 #undef MAC16_16
-static inline opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a,
+static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a,
 opus_val16 b)
 {
  int res;
@ -100,7 +100,7 @@ static inline opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a,

 /** 16x16 multiplication where the result fits in 32 bits */
 #undef MULT16_16
-static inline opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b)
+static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b)
 {
  int res;
  __asm__(
--- a/media/libopus/celt/arm/pitch_arm.h
+++ b/media/libopus/celt/arm/pitch_arm.h
@ -0,0 +1,57 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(PITCH_ARM_H)
+# define PITCH_ARM_H
+
+# include "armcpu.h"
+
+# if defined(FIXED_POINT)
+
+#  if defined(OPUS_ARM_MAY_HAVE_NEON)
+opus_val32 celt_pitch_xcorr_neon(const opus_val16 *_x, const opus_val16 *_y,
+    opus_val32 *xcorr, int len, int max_pitch);
+#  endif
+
+#  if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+#   define celt_pitch_xcorr_media MAY_HAVE_EDSP(celt_pitch_xcorr)
+#  endif
+
+#  if defined(OPUS_ARM_MAY_HAVE_EDSP)
+opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
+    opus_val32 *xcorr, int len, int max_pitch);
+#  endif
+
+#  if !defined(OPUS_HAVE_RTCD)
+#   define OVERRIDE_PITCH_XCORR (1)
+#   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch))
+#  endif
+
+# endif
+
+#endif
--- a/media/libopus/celt/bands.c
+++ b/media/libopus/celt/bands.c
@ -214,7 +214,9 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
         j=M*eBands[i];
         band_end = M*eBands[i+1];
         lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6));
-#ifdef FIXED_POINT
+#ifndef FIXED_POINT
+         g = celt_exp2(lg);
+#else
         /* Handle the integer part of the log energy */
         shift = 16-(lg>>DB_SHIFT);
         if (shift>31)
@ -225,9 +227,23 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
            /* Handle the fractional part. */
            g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
         }
-#else
-         g = celt_exp2(lg);
+         /* Handle extreme gains with negative shift. */
+         if (shift<0)
+         {
+            /* For shift < -2 we'd be likely to overflow, so we're capping
+               the gain here. This shouldn't happen unless the bitstream is
+               already corrupted. */
+            if (shift < -2)
+            {
+               g = 32767;
+               shift = -2;
+            }
+            do {
+               *f++ = SHL32(MULT16_16(*x++, g), -shift);
+            } while (++j<band_end);
+         } else
 #endif
+         /* Be careful of the fixed-point "else" just above when changing this code */
         do {
            *f++ = SHR32(MULT16_16(*x++, g), shift);
         } while (++j<band_end);
@ -492,7 +508,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
         *tapset_decision=0;
   }
   /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
-   celt_assert(nbBands>0); /*M*(eBands[end]-eBands[end-1]) <= 8 assures this*/
+   celt_assert(nbBands>0); /* end has to be non-zero */
   sum /= nbBands;
   /* Recursive averaging */
   sum = (sum+*average)>>1;
@ -869,7 +885,6 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
   int q;
   int curr_bits;
   int imid=0, iside=0;
-   int N_B=N;
   int B0=B;
   opus_val16 mid=0, side=0;
   unsigned cm=0;
@ -891,8 +906,6 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
   spread = ctx->spread;
   ec = ctx->ec;

-   N_B /= B;
-
   /* If we need 1.5 more bit than we can produce, split the band in two. */
   cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i];
   if (LM != -1 && b > cache[cache[0]]+12 && N>2)
@ -1072,7 +1085,6 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
   longBlocks = B0==1;

   N_B /= B;
-   N_B0 = N_B;

   /* Special case for one sample */
   if (N==1)
--- a/media/libopus/celt/celt.h
+++ b/media/libopus/celt/celt.h
@ -52,11 +52,11 @@ extern "C" {

 typedef struct {
   int valid;
-   opus_val16 tonality;
-   opus_val16 tonality_slope;
-   opus_val16 noisiness;
-   opus_val16 activity;
-   opus_val16 music_prob;
+   float tonality;
+   float tonality_slope;
+   float noisiness;
+   float activity;
+   float music_prob;
   int        bandwidth;
 }AnalysisInfo;

@ -66,6 +66,10 @@ typedef struct {

 /* Encoder/decoder Requests */

+/* Expose this option again when variable framesize actually works */
+#define OPUS_FRAMESIZE_VARIABLE              5010 /**< Optimize the frame size dynamically */
+
+
 #define CELT_SET_PREDICTION_REQUEST    10002
 /** Controls the use of interframe prediction.
    0=Independent frames
@ -109,10 +113,7 @@ typedef struct {
 #define OPUS_SET_LFE_REQUEST    10024
 #define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x)

-#define OPUS_SET_ENERGY_SAVE_REQUEST    10026
-#define OPUS_SET_ENERGY_SAVE(x) OPUS_SET_ENERGY_SAVE_REQUEST, __opus_check_val16_ptr(x)
-
-#define OPUS_SET_ENERGY_MASK_REQUEST    10028
+#define OPUS_SET_ENERGY_MASK_REQUEST    10026
 #define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x)

 /* Encoder stuff */
@ -121,7 +122,8 @@ int celt_encoder_get_size(int channels);

 int celt_encode_with_ec(OpusCustomEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc);

-int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels);
+int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels,
+                      int arch);



@ -141,7 +143,7 @@ int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned cha
 #ifdef CUSTOM_MODES
 #define OPUS_CUSTOM_NOSTATIC
 #else
-#define OPUS_CUSTOM_NOSTATIC static inline
+#define OPUS_CUSTOM_NOSTATIC static OPUS_INLINE
 #endif

 static const unsigned char trim_icdf[11] = {126, 124, 119, 109, 87, 41, 19, 9, 4, 2, 0};
@ -166,7 +168,7 @@ static const unsigned char fromOpusTable[16] = {
      0x00, 0x08, 0x10, 0x18
 };

-static inline int toOpus(unsigned char c)
+static OPUS_INLINE int toOpus(unsigned char c)
 {
   int ret=0;
   if (c<0xA0)
@ -177,7 +179,7 @@ static inline int toOpus(unsigned char c)
      return ret|(c&0x7);
 }

-static inline int fromOpus(unsigned char c)
+static OPUS_INLINE int fromOpus(unsigned char c)
 {
   if (c<0x80)
      return -1;
@ -193,6 +195,9 @@ extern const signed char tf_select_table[4][8];

 int resampling_factor(opus_int32 rate);

+void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
+                        int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip);
+
 void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
      opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
      const opus_val16 *window, int overlap);
--- a/media/libopus/celt/celt_decoder.c
+++ b/media/libopus/celt/celt_decoder.c
@ -175,7 +175,7 @@ void opus_custom_decoder_destroy(CELTDecoder *st)
 }
 #endif /* CUSTOM_MODES */

-static inline opus_val16 SIG2WORD16(celt_sig x)
+static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x)
 {
 #ifdef FIXED_POINT
   x = PSHR32(x, SIG_SHIFT);
@ -447,10 +447,11 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
      {
         VARDECL( opus_val16, lp_pitch_buf );
         ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
-         pitch_downsample(decode_mem, lp_pitch_buf, DECODE_BUFFER_SIZE, C);
+         pitch_downsample(decode_mem, lp_pitch_buf,
+               DECODE_BUFFER_SIZE, C, st->arch);
         pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
               DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
-               PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index);
+               PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, st->arch);
         pitch_index = PLC_PITCH_LAG_MAX-pitch_index;
         st->last_pitch_index = pitch_index;
      } else {
@ -481,7 +482,8 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
            opus_val32 ac[LPC_ORDER+1];
            /* Compute LPC coefficients for the last MAX_PERIOD samples before
               the first loss so we can work in the excitation-filter domain. */
-            _celt_autocorr(exc, ac, window, overlap, LPC_ORDER, MAX_PERIOD);
+            _celt_autocorr(exc, ac, window, overlap,
+                   LPC_ORDER, MAX_PERIOD, st->arch);
            /* Add a noise floor of -40 dB. */
 #ifdef FIXED_POINT
            ac[0] += SHR32(ac[0],13);
@ -665,7 +667,6 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
   VARDECL(int, fine_priority);
   VARDECL(int, tf_res);
   VARDECL(unsigned char, collapse_masks);
-   celt_sig *out_mem[2];
   celt_sig *decode_mem[2];
   celt_sig *out_syn[2];
   opus_val16 *lpc;
@ -706,7 +707,6 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat

   c=0; do {
      decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
-      out_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE-MAX_PERIOD;
   } while (++c<CC);
   lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC);
   oldBandE = lpc+CC*LPC_ORDER;
@ -936,7 +936,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
   } while (++c<C);

   c=0; do {
-      out_syn[c] = out_mem[c]+MAX_PERIOD-N;
+      out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
   } while (++c<CC);

   if (CC==2&&C==1)
--- a/media/libopus/celt/celt_encoder.c
+++ b/media/libopus/celt/celt_encoder.c
@ -111,7 +111,6 @@ struct OpusCustomEncoder {
   opus_val32 overlap_max;
   opus_val16 stereo_saving;
   int intensity;
-   opus_val16 *energy_save;
   opus_val16 *energy_mask;
   opus_val16 spec_avg;

@ -162,17 +161,8 @@ CELTEncoder *opus_custom_encoder_create(const CELTMode *mode, int channels, int
 }
 #endif /* CUSTOM_MODES */

-int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels)
-{
-   int ret;
-   ret = opus_custom_encoder_init(st, opus_custom_mode_create(48000, 960, NULL), channels);
-   if (ret != OPUS_OK)
-      return ret;
-   st->upsample = resampling_factor(sampling_rate);
-   return OPUS_OK;
-}
-
-OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels)
+static int opus_custom_encoder_init_arch(CELTEncoder *st, const CELTMode *mode,
+                                         int channels, int arch)
 {
   if (channels < 0 || channels > 2)
      return OPUS_BAD_ARG;
@ -191,7 +181,7 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_init(CELTEncoder *st, const CELTMod
   st->end = st->mode->effEBands;
   st->signalling = 1;

-   st->arch = opus_select_arch();
+   st->arch = arch;

   st->constrained_vbr = 1;
   st->clip = 1;
@ -207,6 +197,25 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_init(CELTEncoder *st, const CELTMod
   return OPUS_OK;
 }

+#ifdef CUSTOM_MODES
+int opus_custom_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels)
+{
+   return opus_custom_encoder_init_arch(st, mode, channels, opus_select_arch());
+}
+#endif
+
+int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels,
+                      int arch)
+{
+   int ret;
+   ret = opus_custom_encoder_init_arch(st,
+           opus_custom_mode_create(48000, 960, NULL), channels, arch);
+   if (ret != OPUS_OK)
+      return ret;
+   st->upsample = resampling_factor(sampling_rate);
+   return OPUS_OK;
+}
+
 #ifdef CUSTOM_MODES
 void opus_custom_encoder_destroy(CELTEncoder *st)
 {
@ -241,7 +250,6 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
   ALLOC(tmp, len, opus_val16);

   len2=len/2;
-   tf_max = 0;
   for (c=0;c<C;c++)
   {
      opus_val32 mean;
@ -370,7 +378,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int

 /* Looks for sudden increases of energy to decide whether we need to patch
   the transient decision */
-int patch_transient_decision(opus_val16 *new, opus_val16 *old, int nbEBands,
+int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands,
      int end, int C)
 {
   int i, c;
@ -380,14 +388,14 @@ int patch_transient_decision(opus_val16 *new, opus_val16 *old, int nbEBands,
      avoid false detection caused by irrelevant bands */
   if (C==1)
   {
-      spread_old[0] = old[0];
+      spread_old[0] = oldE[0];
      for (i=1;i<end;i++)
-         spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), old[i]);
+         spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), oldE[i]);
   } else {
-      spread_old[0] = MAX16(old[0],old[nbEBands]);
+      spread_old[0] = MAX16(oldE[0],oldE[nbEBands]);
      for (i=1;i<end;i++)
         spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT),
-                               MAX16(old[i],old[i+nbEBands]));
+                               MAX16(oldE[i],oldE[i+nbEBands]));
   }
   for (i=end-2;i>=0;i--)
      spread_old[i] = MAX16(spread_old[i], spread_old[i+1]-QCONST16(1.0f, DB_SHIFT));
@ -396,7 +404,7 @@ int patch_transient_decision(opus_val16 *new, opus_val16 *old, int nbEBands,
      for (i=2;i<end-1;i++)
      {
         opus_val16 x1, x2;
-         x1 = MAX16(0, new[i]);
+         x1 = MAX16(0, newE[i]);
         x2 = MAX16(0, spread_old[i]);
         mean_diff = ADD32(mean_diff, EXTEND32(MAX16(0, SUB16(x1, x2))));
      }
@ -452,7 +460,7 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS
 }


-static void preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
+void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
                        int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip)
 {
   int i;
@ -489,6 +497,8 @@ static void preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_R
      for (i=0;i<Nu;i++)
         inp[i*upsample] = MAX32(-65536.f, MIN32(65536.f,inp[i*upsample]));
   }
+#else
+   (void)clip; /* Avoids a warning about clip being unused. */
 #endif
   m = *mem;
 #ifdef CUSTOM_MODES
@ -744,7 +754,7 @@ static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM,
 static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
      const opus_val16 *bandLogE, int end, int LM, int C, int N0,
      AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate,
-      int intensity)
+      int intensity, opus_val16 surround_trim)
 {
   int i;
   opus_val32 diff=0;
@ -818,11 +828,13 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
   if (diff < -QCONST16(10.f, DB_SHIFT))
      trim_index++;
   trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 ));
+   trim -= SHR16(surround_trim, DB_SHIFT-8);
   trim -= 2*SHR16(tf_estimate, 14-8);
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
   if (analysis->valid)
   {
-      trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), 2*(analysis->tonality_slope+.05f)));
+      trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8),
+            (opus_val16)(QCONST16(2.f, 8)*(analysis->tonality_slope+.05f))));
   }
 #endif

@ -877,7 +889,7 @@ static int stereo_analysis(const CELTMode *m, const celt_norm *X,
 static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2,
      int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN,
      int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM,
-      int effectiveBytes, opus_int32 *tot_boost_, int lfe)
+      int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc)
 {
   int i, c;
   opus_int32 tot_boost=0;
@ -940,6 +952,8 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
            follower[i] = MAX16(0, bandLogE[i]-follower[i]);
         }
      }
+      for (i=start;i<end;i++)
+         follower[i] = MAX16(follower[i], surround_dynalloc[i]);
      /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */
      if ((!vbr || constrained_vbr)&&!isTransient)
      {
@ -1021,11 +1035,12 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem,
      VARDECL(opus_val16, pitch_buf);
      ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16);

-      pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC);
+      pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC, st->arch);
      /* Don't search for the fir last 1.5 octave of the range because
         there's too many false-positives due to short-term correlation */
      pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
-            COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index);
+            COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index,
+            st->arch);
      pitch_index = COMBFILTER_MAXPERIOD-pitch_index;

      gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
@ -1140,7 +1155,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
   target = base_target;

   /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
   if (analysis->valid && analysis->activity<.4)
      target -= (opus_int32)((coded_bins<<BITRES)*(.4f-analysis->activity));
 #endif
@ -1154,6 +1169,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
      coded_stereo_dof = (eBands[coded_stereo_bands]<<LM)-coded_stereo_bands;
      /* Maximum fraction of the bits we can save if the signal is mono. */
      max_frac = DIV32_16(MULT16_16(QCONST16(0.8f, 15), coded_stereo_dof), coded_bins);
+      stereo_saving = MIN16(stereo_saving, QCONST16(1.f, 8));
      /*printf("%d %d %d ", coded_stereo_dof, coded_bins, tot_boost);*/
      target -= (opus_int32)MIN32(MULT16_32_Q15(max_frac,target),
                      SHR32(MULT16_16(stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<<BITRES)),8));
@ -1165,7 +1181,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
                    QCONST16(0.02f,14) : QCONST16(0.04f,14);
   target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1);

-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
   /* Apply tonality boost */
   if (analysis->valid && !lfe)
   {
@ -1177,7 +1193,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
      tonal_target = target + (opus_int32)((coded_bins<<BITRES)*1.2f*tonal);
      if (pitch_change)
         tonal_target +=  (opus_int32)((coded_bins<<BITRES)*.8f);
-      /*printf("%f %f ", st->analysis.tonality, tonal);*/
+      /*printf("%f %f ", analysis->tonality, tonal);*/
      target = tonal_target;
   }
 #endif
@ -1291,6 +1307,9 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
   int transient_got_disabled=0;
   opus_val16 surround_masking=0;
   opus_val16 temporal_vbr=0;
+   opus_val16 surround_trim = 0;
+   opus_int32 equiv_rate = 510000;
+   VARDECL(opus_val16, surround_dynalloc);
   ALLOC_STACK;

   mode = st->mode;
@ -1299,14 +1318,20 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
   eBands = mode->eBands;
   tf_estimate = 0;
   if (nbCompressedBytes<2 || pcm==NULL)
-     return OPUS_BAD_ARG;
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }

   frame_size *= st->upsample;
   for (LM=0;LM<=mode->maxLM;LM++)
      if (mode->shortMdctSize<<LM==frame_size)
         break;
   if (LM>mode->maxLM)
+   {
+      RESTORE_STACK;
      return OPUS_BAD_ARG;
+   }
   M=1<<LM;
   N = M*mode->shortMdctSize;

@ -1337,7 +1362,10 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
      {
         int c0 = toOpus(compressed[0]);
         if (c0<0)
+         {
+            RESTORE_STACK;
            return OPUS_BAD_ARG;
+         }
         compressed[0] = c0;
      }
      compressed++;
@ -1371,6 +1399,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
               (tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling));
      effectiveBytes = nbCompressedBytes;
   }
+   if (st->bitrate != OPUS_BITRATE_MAX)
+      equiv_rate = st->bitrate - (40*C+20)*((400>>LM) - 50);

   if (enc==NULL)
   {
@ -1444,7 +1474,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
      enc->nbits_total+=tell-ec_tell(enc);
   }
   c=0; do {
-      preemphasis(pcm+c, in+c*(N+st->overlap)+st->overlap, N, CC, st->upsample,
+      celt_preemphasis(pcm+c, in+c*(N+st->overlap)+st->overlap, N, CC, st->upsample,
                  mode->preemph, st->preemph_memE+c, st->clip);
   } while (++c<CC);

@ -1454,7 +1484,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
   {
      int enabled;
      int qg;
-      enabled = (st->lfe || nbAvailableBytes>12*C) && st->start==0 && !silence && !st->disable_pf
+      enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && st->start==0 && !silence && !st->disable_pf
            && st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration==OPUS_FRAMESIZE_VARIABLE);

      prefilter_tapset = st->tapset_decision;
@ -1526,43 +1556,89 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
      }
   }
   amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C);
-   if (st->energy_save)
-   {
-      opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;
-#ifdef FIXED_POINT
-      /* Compensate for the 1/8 gain we apply in the fixed-point downshift to avoid overflows. */
-      offset -= QCONST16(3.0f, DB_SHIFT);
-#endif
-      for(i=0;i<C*nbEBands;i++)
-         st->energy_save[i]=bandLogE[i]-offset;
-      st->energy_save=NULL;
-   }
+
+   ALLOC(surround_dynalloc, C*nbEBands, opus_val16);
+   for(i=0;i<st->end;i++)
+      surround_dynalloc[i] = 0;
   /* This computes how much masking takes place between surround channels */
-   if (st->energy_mask&&!st->lfe)
+   if (st->start==0&&st->energy_mask&&!st->lfe)
   {
+      int mask_end;
+      int midband;
+      int count_dynalloc;
      opus_val32 mask_avg=0;
-      opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;
+      opus_val32 diff=0;
+      int count=0;
+      mask_end = IMAX(2,st->lastCodedBands);
      for (c=0;c<C;c++)
      {
-         opus_val16 followE, followMask;
-         followE = followMask = -QCONST16(14.f, DB_SHIFT);
-         for(i=0;i<st->end;i++)
+         for(i=0;i<mask_end;i++)
         {
-            /* We use a simple follower to approximate the masking spreading function. */
-            followE = MAX16(followE-QCONST16(1.f, DB_SHIFT), bandLogE[nbEBands*c+i]-offset);
-            followMask = MAX16(followMask-QCONST16(1.f, DB_SHIFT), st->energy_mask[nbEBands*c+i]);
-            mask_avg += followE-followMask;
+            opus_val16 mask;
+            mask = MAX16(MIN16(st->energy_mask[nbEBands*c+i],
+                   QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT));
+            if (mask > 0)
+               mask = HALF16(mask);
+            mask_avg += MULT16_16(mask, eBands[i+1]-eBands[i]);
+            count += eBands[i+1]-eBands[i];
+            diff += MULT16_16(mask, 1+2*i-mask_end);
         }
      }
-      surround_masking = DIV32_16(mask_avg,C*st->end) + QCONST16(.7f, DB_SHIFT);
-      surround_masking = MIN16(MAX16(surround_masking, -QCONST16(2.f, DB_SHIFT)), QCONST16(.2f, DB_SHIFT));
-      surround_masking -= HALF16(HALF16(surround_masking));
+      mask_avg = DIV32_16(mask_avg,count);
+      mask_avg += QCONST16(.2f, DB_SHIFT);
+      diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end);
+      /* Again, being conservative */
+      diff = HALF32(diff);
+      diff = MAX32(MIN32(diff, QCONST32(.031f, DB_SHIFT)), -QCONST32(.031f, DB_SHIFT));
+      /* Find the band that's in the middle of the coded spectrum */
+      for (midband=0;eBands[midband+1] < eBands[mask_end]/2;midband++);
+      count_dynalloc=0;
+      for(i=0;i<mask_end;i++)
+      {
+         opus_val32 lin;
+         opus_val16 unmask;
+         lin = mask_avg + diff*(i-midband);
+         if (C==2)
+            unmask = MAX16(st->energy_mask[i], st->energy_mask[nbEBands+i]);
+         else
+            unmask = st->energy_mask[i];
+         unmask = MIN16(unmask, QCONST16(.0f, DB_SHIFT));
+         unmask -= lin;
+         if (unmask > QCONST16(.25f, DB_SHIFT))
+         {
+            surround_dynalloc[i] = unmask - QCONST16(.25f, DB_SHIFT);
+            count_dynalloc++;
+         }
+      }
+      if (count_dynalloc>=3)
+      {
+         /* If we need dynalloc in many bands, it's probably because our
+            initial masking rate was too low. */
+         mask_avg += QCONST16(.25f, DB_SHIFT);
+         if (mask_avg>0)
+         {
+            /* Something went really wrong in the original calculations,
+               disabling masking. */
+            mask_avg = 0;
+            diff = 0;
+            for(i=0;i<mask_end;i++)
+               surround_dynalloc[i] = 0;
+         } else {
+            for(i=0;i<mask_end;i++)
+               surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT));
+         }
+      }
+      mask_avg += QCONST16(.2f, DB_SHIFT);
+      /* Convert to 1/64th units used for the trim */
+      surround_trim = 64*diff;
+      /*printf("%d %d ", mask_avg, surround_trim);*/
+      surround_masking = mask_avg;
   }
   /* Temporal VBR (but not for LFE) */
   if (!st->lfe)
   {
      opus_val16 follow=-QCONST16(10.0f,DB_SHIFT);
-      float frame_avg=0;
+      opus_val32 frame_avg=0;
      opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;
      for(i=st->start;i<st->end;i++)
      {
@ -1660,7 +1736,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
         /* Disable new spreading+tapset estimator until we can show it works
            better than the old one. So far it seems like spreading_decision()
            works best. */
-         if (0&&st->analysis.valid)
+#if 0
+         if (st->analysis.valid)
         {
            static const opus_val16 spread_thresholds[3] = {-QCONST16(.6f, 15), -QCONST16(.2f, 15), -QCONST16(.07f, 15)};
            static const opus_val16 spread_histeresis[3] = {QCONST16(.15f, 15), QCONST16(.07f, 15), QCONST16(.02f, 15)};
@ -1668,7 +1745,9 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
            static const opus_val16 tapset_histeresis[2] = {QCONST16(.1f, 15), QCONST16(.05f, 15)};
            st->spread_decision = hysteresis_decision(-st->analysis.tonality, spread_thresholds, spread_histeresis, 3, st->spread_decision);
            st->tapset_decision = hysteresis_decision(st->analysis.tonality_slope, tapset_thresholds, tapset_histeresis, 2, st->tapset_decision);
-         } else {
+         } else
+#endif
+         {
            st->spread_decision = spreading_decision(mode, X,
                  &st->tonal_average, st->spread_decision, &st->hf_average,
                  &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M);
@ -1683,7 +1762,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,

   maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, st->start, st->end, C, offsets,
         st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr,
-         eBands, LM, effectiveBytes, &tot_boost, st->lfe);
+         eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc);
   /* For LFE, everything interesting is in the first band */
   if (st->lfe)
      offsets[0] = IMIN(8, effectiveBytes/3);
@ -1727,25 +1806,18 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,

   if (C==2)
   {
-      int effectiveRate;
-
      static const opus_val16 intensity_thresholds[21]=
      /* 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19  20  off*/
-        { 16,21,23,25,27,29,31,33,35,38,42,46,50,54,58,63,68,75,84,102,130};
+        {  1, 2, 3, 4, 5, 6, 7, 8,16,24,36,44,50,56,62,67,72,79,88,106,134};
      static const opus_val16 intensity_histeresis[21]=
-        {  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4, 5, 6,  8, 12};
+        {  1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 5, 6,  8, 8};

      /* Always use MS for 2.5 ms frames until we can do a better analysis */
      if (LM!=0)
         dual_stereo = stereo_analysis(mode, X, LM, N);

-      /* Account for coarse energy */
-      effectiveRate = (8*effectiveBytes - 80)>>LM;
-
-      /* effectiveRate in kb/s */
-      effectiveRate = 2*effectiveRate/5;
-
-      st->intensity = hysteresis_decision((opus_val16)effectiveRate, intensity_thresholds, intensity_histeresis, 21, st->intensity);
+      st->intensity = hysteresis_decision((opus_val16)equiv_rate/1000,
+            intensity_thresholds, intensity_histeresis, 21, st->intensity);
      st->intensity = IMIN(st->end,IMAX(st->start, st->intensity));
   }

@ -1756,7 +1828,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
         alloc_trim = 5;
      else
         alloc_trim = alloc_trim_analysis(mode, X, bandLogE,
-            st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity);
+            st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity, surround_trim);
      ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
      tell = ec_tell_frac(enc);
   }
@ -1779,7 +1851,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
     if (st->constrained_vbr)
        base_target += (st->vbr_offset>>lm_diff);

-     target = compute_vbr(mode, &st->analysis, base_target, LM, st->bitrate,
+     target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate,
           st->lastCodedBands, C, st->intensity, st->constrained_vbr,
           st->stereo_saving, tot_boost, tf_estimate, pitch_change, maxDepth,
           st->variable_duration, st->lfe, st->energy_mask!=NULL, surround_masking,
@ -1859,17 +1931,17 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
   anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
   bits -= anti_collapse_rsv;
   signalBandwidth = st->end-1;
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
   if (st->analysis.valid)
   {
      int min_bandwidth;
-      if (st->bitrate < (opus_int32)32000*C)
+      if (equiv_rate < (opus_int32)32000*C)
         min_bandwidth = 13;
-      else if (st->bitrate < (opus_int32)48000*C)
+      else if (equiv_rate < (opus_int32)48000*C)
         min_bandwidth = 16;
-      else if (st->bitrate < (opus_int32)60000*C)
+      else if (equiv_rate < (opus_int32)60000*C)
         min_bandwidth = 18;
-      else  if (st->bitrate < (opus_int32)80000*C)
+      else  if (equiv_rate < (opus_int32)80000*C)
         min_bandwidth = 19;
      else
         min_bandwidth = 20;
@ -2261,12 +2333,6 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...)
          st->lfe = value;
      }
      break;
-      case OPUS_SET_ENERGY_SAVE_REQUEST:
-      {
-          opus_val16 *value = va_arg(ap, opus_val16*);
-          st->energy_save=value;
-      }
-      break;
      case OPUS_SET_ENERGY_MASK_REQUEST:
      {
          opus_val16 *value = va_arg(ap, opus_val16*);
--- a/media/libopus/celt/celt_lpc.c
+++ b/media/libopus/celt/celt_lpc.c
@ -226,7 +226,8 @@ int _celt_autocorr(
                   const opus_val16       *window,
                   int          overlap,
                   int          lag,
-                   int          n
+                   int          n,
+                   int          arch
                  )
 {
   opus_val32 d;
@ -275,7 +276,7 @@ int _celt_autocorr(
         shift = 0;
   }
 #endif
-   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1);
+   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
   for (k=0;k<=lag;k++)
   {
      for (i = k+fastN, d = 0; i < n; i++)
--- a/media/libopus/celt/celt_lpc.h
+++ b/media/libopus/celt/celt_lpc.h
@ -48,6 +48,7 @@ void celt_iir(const opus_val32 *x,
         int ord,
         opus_val16 *mem);

-int _celt_autocorr(const opus_val16 *x, opus_val32 *ac, const opus_val16 *window, int overlap, int lag, int n);
+int _celt_autocorr(const opus_val16 *x, opus_val32 *ac,
+         const opus_val16 *window, int overlap, int lag, int n, int arch);

 #endif /* PLC_H */
--- a/media/libopus/celt/cpu_support.h
+++ b/media/libopus/celt/cpu_support.h
@ -28,7 +28,10 @@
 #ifndef CPU_SUPPORT_H
 #define CPU_SUPPORT_H

-#if defined(OPUS_HAVE_RTCD) && defined(ARMv4_ASM)
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#if defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_ASM)
 #include "arm/armcpu.h"

 /* We currently support 4 ARM variants:
@ -42,7 +45,7 @@
 #else
 #define OPUS_ARCHMASK 0

-static inline int opus_select_arch(void)
+static OPUS_INLINE int opus_select_arch(void)
 {
  return 0;
 }
--- a/media/libopus/celt/cwrs.c
+++ b/media/libopus/celt/cwrs.c
@ -410,7 +410,7 @@ static const opus_uint32 CELT_PVQ_U_DATA[1272]={
 };

 #if defined(CUSTOM_MODES)
-const opus_uint32 *const CELT_PVQ_U_ROW[15]={
+static const opus_uint32 *const CELT_PVQ_U_ROW[15]={
  CELT_PVQ_U_DATA+   0,CELT_PVQ_U_DATA+ 208,CELT_PVQ_U_DATA+ 415,
  CELT_PVQ_U_DATA+ 621,CELT_PVQ_U_DATA+ 826,CELT_PVQ_U_DATA+1030,
  CELT_PVQ_U_DATA+1233,CELT_PVQ_U_DATA+1336,CELT_PVQ_U_DATA+1389,
@ -418,7 +418,7 @@ const opus_uint32 *const CELT_PVQ_U_ROW[15]={
  CELT_PVQ_U_DATA+1464,CELT_PVQ_U_DATA+1470,CELT_PVQ_U_DATA+1473
 };
 #else
-const opus_uint32 *const CELT_PVQ_U_ROW[15]={
+static const opus_uint32 *const CELT_PVQ_U_ROW[15]={
  CELT_PVQ_U_DATA+   0,CELT_PVQ_U_DATA+ 176,CELT_PVQ_U_DATA+ 351,
  CELT_PVQ_U_DATA+ 525,CELT_PVQ_U_DATA+ 698,CELT_PVQ_U_DATA+ 870,
  CELT_PVQ_U_DATA+1041,CELT_PVQ_U_DATA+1131,CELT_PVQ_U_DATA+1178,
@ -534,7 +534,7 @@ void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
 /*Computes the next row/column of any recurrence that obeys the relation
   u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1].
  _ui0 is the base case for the new row/column.*/
-static inline void unext(opus_uint32 *_ui,unsigned _len,opus_uint32 _ui0){
+static OPUS_INLINE void unext(opus_uint32 *_ui,unsigned _len,opus_uint32 _ui0){
  opus_uint32 ui1;
  unsigned      j;
  /*This do-while will overrun the array if we don't have storage for at least
@ -550,7 +550,7 @@ static inline void unext(opus_uint32 *_ui,unsigned _len,opus_uint32 _ui0){
 /*Computes the previous row/column of any recurrence that obeys the relation
   u[i-1][j]=u[i][j]-u[i][j-1]-u[i-1][j-1].
  _ui0 is the base case for the new row/column.*/
-static inline void uprev(opus_uint32 *_ui,unsigned _n,opus_uint32 _ui0){
+static OPUS_INLINE void uprev(opus_uint32 *_ui,unsigned _n,opus_uint32 _ui0){
  opus_uint32 ui1;
  unsigned      j;
  /*This do-while will overrun the array if we don't have storage for at least
@ -617,7 +617,7 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
   of size 1 with associated sign bits.
  _y: The vector of pulses, whose sum of absolute values is K.
  _k: Returns K.*/
-static inline opus_uint32 icwrs1(const int *_y,int *_k){
+static OPUS_INLINE opus_uint32 icwrs1(const int *_y,int *_k){
  *_k=abs(_y[0]);
  return _y[0]<0;
 }
@ -626,7 +626,7 @@ static inline opus_uint32 icwrs1(const int *_y,int *_k){
   of size _n with associated sign bits.
  _y:  The vector of pulses, whose sum of absolute values must be _k.
  _nc: Returns V(_n,_k).*/
-static inline opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y,
+static OPUS_INLINE opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y,
 opus_uint32 *_u){
  opus_uint32 i;
  int         j;
--- a/media/libopus/celt/ecintrin.h
+++ b/media/libopus/celt/ecintrin.h
@ -33,7 +33,7 @@
 #if !defined(_ecintrin_H)
 # define _ecintrin_H (1)

-/*Some specific platforms may have optimized intrinsic or inline assembly
+/*Some specific platforms may have optimized intrinsic or OPUS_INLINE assembly
   versions of these functions which can substantially improve performance.
  We define macros for them to allow easy incorporation of these non-ANSI
   features.*/
--- a/media/libopus/celt/entcode.h
+++ b/media/libopus/celt/entcode.h
@ -26,6 +26,7 @@
 */

 #include "opus_types.h"
+#include "opus_defines.h"

 #if !defined(_entcode_H)
 # define _entcode_H (1)
@ -83,15 +84,15 @@ struct ec_ctx{
   int            error;
 };

-static inline opus_uint32 ec_range_bytes(ec_ctx *_this){
+static OPUS_INLINE opus_uint32 ec_range_bytes(ec_ctx *_this){
  return _this->offs;
 }

-static inline unsigned char *ec_get_buffer(ec_ctx *_this){
+static OPUS_INLINE unsigned char *ec_get_buffer(ec_ctx *_this){
  return _this->buf;
 }

-static inline int ec_get_error(ec_ctx *_this){
+static OPUS_INLINE int ec_get_error(ec_ctx *_this){
  return _this->error;
 }

@ -101,7 +102,7 @@ static inline int ec_get_error(ec_ctx *_this){
  Return: The number of bits.
          This will always be slightly larger than the exact value (e.g., all
           rounding error is in the positive direction).*/
-static inline int ec_tell(ec_ctx *_this){
+static OPUS_INLINE int ec_tell(ec_ctx *_this){
  return _this->nbits_total-EC_ILOG(_this->rng);
 }

--- a/media/libopus/celt/fixed_debug.h
+++ b/media/libopus/celt/fixed_debug.h
@ -33,9 +33,9 @@
 #define FIXED_DEBUG_H

 #include <stdio.h>
+#include "opus_defines.h"

 #ifdef CELT_C
-#include "opus_defines.h"
 OPUS_EXPORT opus_int64 celt_mips=0;
 #else
 extern opus_int64 celt_mips;
@ -59,7 +59,7 @@ extern opus_int64 celt_mips;
 #define SHR(a,b) SHR32(a,b)
 #define PSHR(a,b) PSHR32(a,b)

-static inline short NEG16(int x)
+static OPUS_INLINE short NEG16(int x)
 {
   int res;
   if (!VERIFY_SHORT(x))
@ -80,7 +80,7 @@ static inline short NEG16(int x)
   celt_mips++;
   return res;
 }
-static inline int NEG32(opus_int64 x)
+static OPUS_INLINE int NEG32(opus_int64 x)
 {
   opus_int64 res;
   if (!VERIFY_INT(x))
@ -103,7 +103,7 @@ static inline int NEG32(opus_int64 x)
 }

 #define EXTRACT16(x) EXTRACT16_(x, __FILE__, __LINE__)
-static inline short EXTRACT16_(int x, char *file, int line)
+static OPUS_INLINE short EXTRACT16_(int x, char *file, int line)
 {
   int res;
   if (!VERIFY_SHORT(x))
@ -119,7 +119,7 @@ static inline short EXTRACT16_(int x, char *file, int line)
 }

 #define EXTEND32(x) EXTEND32_(x, __FILE__, __LINE__)
-static inline int EXTEND32_(int x, char *file, int line)
+static OPUS_INLINE int EXTEND32_(int x, char *file, int line)
 {
   int res;
   if (!VERIFY_SHORT(x))
@ -135,7 +135,7 @@ static inline int EXTEND32_(int x, char *file, int line)
 }

 #define SHR16(a, shift) SHR16_(a, shift, __FILE__, __LINE__)
-static inline short SHR16_(int a, int shift, char *file, int line)
+static OPUS_INLINE short SHR16_(int a, int shift, char *file, int line)
 {
   int res;
   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift))
@ -157,7 +157,7 @@ static inline short SHR16_(int a, int shift, char *file, int line)
   return res;
 }
 #define SHL16(a, shift) SHL16_(a, shift, __FILE__, __LINE__)
-static inline short SHL16_(int a, int shift, char *file, int line)
+static OPUS_INLINE short SHL16_(int a, int shift, char *file, int line)
 {
   int res;
   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift))
@ -179,7 +179,7 @@ static inline short SHL16_(int a, int shift, char *file, int line)
   return res;
 }

-static inline int SHR32(opus_int64 a, int shift)
+static OPUS_INLINE int SHR32(opus_int64 a, int shift)
 {
   opus_int64  res;
   if (!VERIFY_INT(a) || !VERIFY_SHORT(shift))
@ -201,7 +201,7 @@ static inline int SHR32(opus_int64 a, int shift)
   return res;
 }
 #define SHL32(a, shift) SHL32_(a, shift, __FILE__, __LINE__)
-static inline int SHL32_(opus_int64 a, int shift, char *file, int line)
+static OPUS_INLINE int SHL32_(opus_int64 a, int shift, char *file, int line)
 {
   opus_int64  res;
   if (!VERIFY_INT(a) || !VERIFY_SHORT(shift))
@ -234,7 +234,7 @@ static inline int SHL32_(opus_int64 a, int shift, char *file, int line)
 //#define SHL(a,shift) ((a) << (shift))

 #define ADD16(a, b) ADD16_(a, b, __FILE__, __LINE__)
-static inline short ADD16_(int a, int b, char *file, int line)
+static OPUS_INLINE short ADD16_(int a, int b, char *file, int line)
 {
   int res;
   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
@ -257,7 +257,7 @@ static inline short ADD16_(int a, int b, char *file, int line)
 }

 #define SUB16(a, b) SUB16_(a, b, __FILE__, __LINE__)
-static inline short SUB16_(int a, int b, char *file, int line)
+static OPUS_INLINE short SUB16_(int a, int b, char *file, int line)
 {
   int res;
   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
@ -280,7 +280,7 @@ static inline short SUB16_(int a, int b, char *file, int line)
 }

 #define ADD32(a, b) ADD32_(a, b, __FILE__, __LINE__)
-static inline int ADD32_(opus_int64 a, opus_int64 b, char *file, int line)
+static OPUS_INLINE int ADD32_(opus_int64 a, opus_int64 b, char *file, int line)
 {
   opus_int64 res;
   if (!VERIFY_INT(a) || !VERIFY_INT(b))
@ -303,7 +303,7 @@ static inline int ADD32_(opus_int64 a, opus_int64 b, char *file, int line)
 }

 #define SUB32(a, b) SUB32_(a, b, __FILE__, __LINE__)
-static inline int SUB32_(opus_int64 a, opus_int64 b, char *file, int line)
+static OPUS_INLINE int SUB32_(opus_int64 a, opus_int64 b, char *file, int line)
 {
   opus_int64 res;
   if (!VERIFY_INT(a) || !VERIFY_INT(b))
@ -327,7 +327,7 @@ static inline int SUB32_(opus_int64 a, opus_int64 b, char *file, int line)

 #undef UADD32
 #define UADD32(a, b) UADD32_(a, b, __FILE__, __LINE__)
-static inline unsigned int UADD32_(opus_uint64 a, opus_uint64 b, char *file, int line)
+static OPUS_INLINE unsigned int UADD32_(opus_uint64 a, opus_uint64 b, char *file, int line)
 {
   opus_uint64 res;
   if (!VERIFY_UINT(a) || !VERIFY_UINT(b))
@ -351,7 +351,7 @@ static inline unsigned int UADD32_(opus_uint64 a, opus_uint64 b, char *file, int

 #undef USUB32
 #define USUB32(a, b) USUB32_(a, b, __FILE__, __LINE__)
-static inline unsigned int USUB32_(opus_uint64 a, opus_uint64 b, char *file, int line)
+static OPUS_INLINE unsigned int USUB32_(opus_uint64 a, opus_uint64 b, char *file, int line)
 {
   opus_uint64 res;
   if (!VERIFY_UINT(a) || !VERIFY_UINT(b))
@ -381,7 +381,7 @@ static inline unsigned int USUB32_(opus_uint64 a, opus_uint64 b, char *file, int
 }

 /* result fits in 16 bits */
-static inline short MULT16_16_16(int a, int b)
+static OPUS_INLINE short MULT16_16_16(int a, int b)
 {
   int res;
   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
@ -404,7 +404,7 @@ static inline short MULT16_16_16(int a, int b)
 }

 #define MULT16_16(a, b) MULT16_16_(a, b, __FILE__, __LINE__)
-static inline int MULT16_16_(int a, int b, char *file, int line)
+static OPUS_INLINE int MULT16_16_(int a, int b, char *file, int line)
 {
   opus_int64 res;
   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
@ -429,7 +429,7 @@ static inline int MULT16_16_(int a, int b, char *file, int line)
 #define MAC16_16(c,a,b)     (celt_mips-=2,ADD32((c),MULT16_16((a),(b))))

 #define MULT16_32_QX(a, b, Q) MULT16_32_QX_(a, b, Q, __FILE__, __LINE__)
-static inline int MULT16_32_QX_(int a, opus_int64 b, int Q, char *file, int line)
+static OPUS_INLINE int MULT16_32_QX_(int a, opus_int64 b, int Q, char *file, int line)
 {
   opus_int64 res;
   if (!VERIFY_SHORT(a) || !VERIFY_INT(b))
@ -462,7 +462,7 @@ static inline int MULT16_32_QX_(int a, opus_int64 b, int Q, char *file, int line
 }

 #define MULT16_32_PX(a, b, Q) MULT16_32_PX_(a, b, Q, __FILE__, __LINE__)
-static inline int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int line)
+static OPUS_INLINE int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int line)
 {
   opus_int64 res;
   if (!VERIFY_SHORT(a) || !VERIFY_INT(b))
@ -497,7 +497,7 @@ static inline int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int line
 #define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15)
 #define MAC16_32_Q15(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q15((a),(b))))

-static inline int SATURATE(int a, int b)
+static OPUS_INLINE int SATURATE(int a, int b)
 {
   if (a>b)
      a=b;
@ -507,7 +507,17 @@ static inline int SATURATE(int a, int b)
   return a;
 }

-static inline int MULT16_16_Q11_32(int a, int b)
+static OPUS_INLINE opus_int16 SATURATE16(opus_int32 a)
+{
+   celt_mips+=3;
+   if (a>32767)
+      return 32767;
+   else if (a<-32768)
+      return -32768;
+   else return a;
+}
+
+static OPUS_INLINE int MULT16_16_Q11_32(int a, int b)
 {
   opus_int64 res;
   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
@ -529,7 +539,7 @@ static inline int MULT16_16_Q11_32(int a, int b)
   celt_mips+=3;
   return res;
 }
-static inline short MULT16_16_Q13(int a, int b)
+static OPUS_INLINE short MULT16_16_Q13(int a, int b)
 {
   opus_int64 res;
   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
@ -551,7 +561,7 @@ static inline short MULT16_16_Q13(int a, int b)
   celt_mips+=3;
   return res;
 }
-static inline short MULT16_16_Q14(int a, int b)
+static OPUS_INLINE short MULT16_16_Q14(int a, int b)
 {
   opus_int64 res;
   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
@ -575,7 +585,7 @@ static inline short MULT16_16_Q14(int a, int b)
 }

 #define MULT16_16_Q15(a, b) MULT16_16_Q15_(a, b, __FILE__, __LINE__)
-static inline short MULT16_16_Q15_(int a, int b, char *file, int line)
+static OPUS_INLINE short MULT16_16_Q15_(int a, int b, char *file, int line)
 {
   opus_int64 res;
   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
@ -598,7 +608,7 @@ static inline short MULT16_16_Q15_(int a, int b, char *file, int line)
   return res;
 }

-static inline short MULT16_16_P13(int a, int b)
+static OPUS_INLINE short MULT16_16_P13(int a, int b)
 {
   opus_int64 res;
   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
@ -628,7 +638,7 @@ static inline short MULT16_16_P13(int a, int b)
   celt_mips+=4;
   return res;
 }
-static inline short MULT16_16_P14(int a, int b)
+static OPUS_INLINE short MULT16_16_P14(int a, int b)
 {
   opus_int64 res;
   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
@ -658,7 +668,7 @@ static inline short MULT16_16_P14(int a, int b)
   celt_mips+=4;
   return res;
 }
-static inline short MULT16_16_P15(int a, int b)
+static OPUS_INLINE short MULT16_16_P15(int a, int b)
 {
   opus_int64 res;
   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
@ -691,7 +701,7 @@ static inline short MULT16_16_P15(int a, int b)

 #define DIV32_16(a, b) DIV32_16_(a, b, __FILE__, __LINE__)

-static inline int DIV32_16_(opus_int64 a, opus_int64 b, char *file, int line)
+static OPUS_INLINE int DIV32_16_(opus_int64 a, opus_int64 b, char *file, int line)
 {
   opus_int64 res;
   if (b==0)
@ -726,7 +736,7 @@ static inline int DIV32_16_(opus_int64 a, opus_int64 b, char *file, int line)
 }

 #define DIV32(a, b) DIV32_(a, b, __FILE__, __LINE__)
-static inline int DIV32_(opus_int64 a, opus_int64 b, char *file, int line)
+static OPUS_INLINE int DIV32_(opus_int64 a, opus_int64 b, char *file, int line)
 {
   opus_int64 res;
   if (b==0)
--- a/media/libopus/celt/fixed_generic.h
+++ b/media/libopus/celt/fixed_generic.h
@ -40,7 +40,7 @@
 #define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))

 /** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */
-#define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16((a),((b)&0x0000ffff)),16))
+#define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16))

 /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
 #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
@ -116,6 +116,7 @@
 #define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))

 #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
+#define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11))
 #define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13))
 #define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14))
 #define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15))
--- a/media/libopus/celt/float_cast.h
+++ b/media/libopus/celt/float_cast.h
@ -101,7 +101,7 @@
        #include <math.h>

        /*      Win32 doesn't seem to have these functions.
-        **      Therefore implement inline versions of these functions here.
+        **      Therefore implement OPUS_INLINE versions of these functions here.
        */

        __inline long int
@ -128,7 +128,7 @@
 #endif

 #ifndef DISABLE_FLOAT_API
-static inline opus_int16 FLOAT2INT16(float x)
+static OPUS_INLINE opus_int16 FLOAT2INT16(float x)
 {
   x = x*CELT_SIG_SCALE;
   x = MAX32(x, -32768);
--- a/media/libopus/celt/mathops.c
+++ b/media/libopus/celt/mathops.c
@ -139,7 +139,7 @@ opus_val32 celt_sqrt(opus_val32 x)
 #define L3 8277
 #define L4 -626

-static inline opus_val16 _celt_cos_pi_2(opus_val16 x)
+static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x)
 {
   opus_val16 x2;

--- a/media/libopus/celt/mathops.h
+++ b/media/libopus/celt/mathops.h
@ -44,7 +44,7 @@
 unsigned isqrt32(opus_uint32 _val);

 #ifndef OVERRIDE_CELT_MAXABS16
-static inline opus_val32 celt_maxabs16(const opus_val16 *x, int len)
+static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len)
 {
   int i;
   opus_val16 maxval = 0;
@ -60,7 +60,7 @@ static inline opus_val32 celt_maxabs16(const opus_val16 *x, int len)

 #ifndef OVERRIDE_CELT_MAXABS32
 #ifdef FIXED_POINT
-static inline opus_val32 celt_maxabs32(const opus_val32 *x, int len)
+static OPUS_INLINE opus_val32 celt_maxabs32(const opus_val32 *x, int len)
 {
   int i;
   opus_val32 maxval = 0;
@ -95,7 +95,7 @@ static inline opus_val32 celt_maxabs32(const opus_val32 *x, int len)
         denorm, +/- inf and NaN are *not* handled */

 /** Base-2 log approximation (log2(x)). */
-static inline float celt_log2(float x)
+static OPUS_INLINE float celt_log2(float x)
 {
   int integer;
   float frac;
@ -113,7 +113,7 @@ static inline float celt_log2(float x)
 }

 /** Base-2 exponential approximation (2^x). */
-static inline float celt_exp2(float x)
+static OPUS_INLINE float celt_exp2(float x)
 {
   int integer;
   float frac;
@ -145,7 +145,7 @@ static inline float celt_exp2(float x)

 #ifndef OVERRIDE_CELT_ILOG2
 /** Integer log in base2. Undefined for zero and negative numbers */
-static inline opus_int16 celt_ilog2(opus_int32 x)
+static OPUS_INLINE opus_int16 celt_ilog2(opus_int32 x)
 {
   celt_assert2(x>0, "celt_ilog2() only defined for strictly positive numbers");
   return EC_ILOG(x)-1;
@ -154,7 +154,7 @@ static inline opus_int16 celt_ilog2(opus_int32 x)


 /** Integer log in base2. Defined for zero, but not for negative numbers */
-static inline opus_int16 celt_zlog2(opus_val32 x)
+static OPUS_INLINE opus_int16 celt_zlog2(opus_val32 x)
 {
   return x <= 0 ? 0 : celt_ilog2(x);
 }
@ -165,7 +165,8 @@ opus_val32 celt_sqrt(opus_val32 x);

 opus_val16 celt_cos_norm(opus_val32 x);

-static inline opus_val16 celt_log2(opus_val32 x)
+/** Base-2 logarithm approximation (log2(x)). (Q14 input, Q10 output) */
+static OPUS_INLINE opus_val16 celt_log2(opus_val32 x)
 {
   int i;
   opus_val16 n, frac;
@ -191,14 +192,14 @@ static inline opus_val16 celt_log2(opus_val32 x)
 #define D2 14819
 #define D3 10204

-static inline opus_val32 celt_exp2_frac(opus_val16 x)
+static OPUS_INLINE opus_val32 celt_exp2_frac(opus_val16 x)
 {
   opus_val16 frac;
   frac = SHL16(x, 4);
   return ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac))))));
 }
 /** Base-2 exponential approximation (2^x). (Q10 input, Q16 output) */
-static inline opus_val32 celt_exp2(opus_val16 x)
+static OPUS_INLINE opus_val32 celt_exp2(opus_val16 x)
 {
   int integer;
   opus_val16 frac;
@ -224,7 +225,7 @@ opus_val32 frac_div32(opus_val32 a, opus_val32 b);

 /* Atan approximation using a 4th order polynomial. Input is in Q15 format
   and normalized by pi/4. Output is in Q15 format */
-static inline opus_val16 celt_atan01(opus_val16 x)
+static OPUS_INLINE opus_val16 celt_atan01(opus_val16 x)
 {
   return MULT16_16_P15(x, ADD32(M1, MULT16_16_P15(x, ADD32(M2, MULT16_16_P15(x, ADD32(M3, MULT16_16_P15(M4, x)))))));
 }
@ -235,7 +236,7 @@ static inline opus_val16 celt_atan01(opus_val16 x)
 #undef M4

 /* atan2() approximation valid for positive input values */
-static inline opus_val16 celt_atan2p(opus_val16 y, opus_val16 x)
+static OPUS_INLINE opus_val16 celt_atan2p(opus_val16 y, opus_val16 x)
 {
   if (y < x)
   {
--- a/media/libopus/celt/os_support.h
+++ b/media/libopus/celt/os_support.h
@ -35,13 +35,16 @@
 #  include "custom_support.h"
 #endif

+#include "opus_types.h"
+#include "opus_defines.h"
+
 #include <string.h>
 #include <stdio.h>
 #include <stdlib.h>

 /** Opus wrapper for malloc(). To do your own dynamic allocation, all you need to do is replace this function and opus_free */
 #ifndef OVERRIDE_OPUS_ALLOC
-static inline void *opus_alloc (size_t size)
+static OPUS_INLINE void *opus_alloc (size_t size)
 {
   return malloc(size);
 }
@ -49,7 +52,7 @@ static inline void *opus_alloc (size_t size)

 /** Same as celt_alloc(), except that the area is only needed inside a CELT call (might cause problem with wideband though) */
 #ifndef OVERRIDE_OPUS_ALLOC_SCRATCH
-static inline void *opus_alloc_scratch (size_t size)
+static OPUS_INLINE void *opus_alloc_scratch (size_t size)
 {
   /* Scratch space doesn't need to be cleared */
   return opus_alloc(size);
@ -58,7 +61,7 @@ static inline void *opus_alloc_scratch (size_t size)

 /** Opus wrapper for free(). To do your own dynamic allocation, all you need to do is replace this function and opus_alloc */
 #ifndef OVERRIDE_OPUS_FREE
-static inline void opus_free (void *ptr)
+static OPUS_INLINE void opus_free (void *ptr)
 {
   free(ptr);
 }
--- a/media/libopus/celt/pitch.c
+++ b/media/libopus/celt/pitch.c
@ -145,7 +145,7 @@ static void celt_fir5(const opus_val16 *x,


 void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
-      int len, int C)
+      int len, int C, int arch)
 {
   int i;
   opus_val32 ac[5];
@ -180,7 +180,7 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
   }

   _celt_autocorr(x_lp, ac, NULL, 0,
-                  4, len>>1);
+                  4, len>>1, arch);

   /* Noise floor -40 dB */
 #ifdef FIXED_POINT
@ -250,9 +250,14 @@ opus_val32
 #else
 void
 #endif
-celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
+celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
 {
   int i,j;
+   /*The EDSP version requires that max_pitch is at least 1, and that _x is
+      32-bit aligned.
+     Since it's hard to put asserts in assembly, put them here.*/
+   celt_assert(max_pitch>0);
+   celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
 #ifdef FIXED_POINT
   opus_val32 maxcorr=1;
 #endif
@ -289,7 +294,7 @@ celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr,

 #endif
 void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
-                  int len, int max_pitch, int *pitch)
+                  int len, int max_pitch, int *pitch, int arch)
 {
   int i, j;
   int lag;
@ -342,7 +347,7 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
 #ifdef FIXED_POINT
   maxcorr =
 #endif
-   celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2);
+   celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2, arch);

   find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
 #ifdef FIXED_POINT
--- a/media/libopus/celt/pitch.h
+++ b/media/libopus/celt/pitch.h
@ -35,16 +35,21 @@
 #define PITCH_H

 #include "modes.h"
+#include "cpu_support.h"

 #if defined(__SSE__) && !defined(FIXED_POINT)
 #include "x86/pitch_sse.h"
 #endif

+#if defined(OPUS_ARM_ASM) && defined(FIXED_POINT)
+# include "arm/pitch_arm.h"
+#endif
+
 void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
-      int len, int C);
+      int len, int C, int arch);

 void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
-                  int len, int max_pitch, int *pitch);
+                  int len, int max_pitch, int *pitch, int arch);

 opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
      int N, int *T0, int prev_period, opus_val16 prev_gain);
@ -52,10 +57,11 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
 /* OPT: This is the kernel you really want to optimize. It gets used a lot
   by the prefilter and by the PLC. */
 #ifndef OVERRIDE_XCORR_KERNEL
-static inline void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
+static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
 {
   int j;
   opus_val16 y_0, y_1, y_2, y_3;
+   celt_assert(len>=3);
   y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
   y_0=*y++;
   y_1=*y++;
@ -119,7 +125,7 @@ static inline void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus
 #endif /* OVERRIDE_XCORR_KERNEL */

 #ifndef OVERRIDE_DUAL_INNER_PROD
-static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
      int N, opus_val32 *xy1, opus_val32 *xy2)
 {
   int i;
@ -140,6 +146,28 @@ opus_val32
 #else
 void
 #endif
-celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch);
+celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
+      opus_val32 *xcorr, int len, int max_pitch);
+
+#if !defined(OVERRIDE_PITCH_XCORR)
+/*Is run-time CPU detection enabled on this platform?*/
+# if defined(OPUS_HAVE_RTCD)
+extern
+#  if defined(FIXED_POINT)
+opus_val32
+#  else
+void
+#  endif
+(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+      const opus_val16 *, opus_val32 *, int, int);
+
+#  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
+        xcorr, len, max_pitch))
+# else
+#  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((void)(arch),celt_pitch_xcorr_c(_x, _y, xcorr, len, max_pitch))
+# endif
+#endif

 #endif
--- a/media/libopus/celt/quant_bands.c
+++ b/media/libopus/celt/quant_bands.c
@ -312,6 +312,7 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
      opus_int32 tell_intra;
      opus_uint32 nstart_bytes;
      opus_uint32 nintra_bytes;
+      opus_uint32 save_bytes;
      int badness2;
      VARDECL(unsigned char, intra_bits);

@ -322,7 +323,10 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
      nstart_bytes = ec_range_bytes(&enc_start_state);
      nintra_bytes = ec_range_bytes(&enc_intra_state);
      intra_buf = ec_get_buffer(&enc_intra_state) + nstart_bytes;
-      ALLOC(intra_bits, nintra_bytes-nstart_bytes, unsigned char);
+      save_bytes = nintra_bytes-nstart_bytes;
+      if (save_bytes == 0)
+         save_bytes = ALLOC_NONE;
+      ALLOC(intra_bits, save_bytes, unsigned char);
      /* Copy bits from intra bit-stream */
      OPUS_COPY(intra_bits, intra_buf, nintra_bytes - nstart_bytes);

--- a/media/libopus/celt/rate.c
+++ b/media/libopus/celt/rate.c
@ -245,7 +245,7 @@ void compute_pulse_cache(CELTMode *m, int LM)

 #define ALLOC_STEPS 6

-static inline int interp_bits2pulses(const CELTMode *m, int start, int end, int skip_start,
+static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, int skip_start,
      const int *bits1, const int *bits2, const int *thresh, const int *cap, opus_int32 total, opus_int32 *_balance,
      int skip_rsv, int *intensity, int intensity_rsv, int *dual_stereo, int dual_stereo_rsv, int *bits,
      int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth)
--- a/media/libopus/celt/rate.h
+++ b/media/libopus/celt/rate.h
@ -45,12 +45,12 @@

 void compute_pulse_cache(CELTMode *m, int LM);

-static inline int get_pulses(int i)
+static OPUS_INLINE int get_pulses(int i)
 {
   return i<8 ? i : (8 + (i&7)) << ((i>>3)-1);
 }

-static inline int bits2pulses(const CELTMode *m, int band, int LM, int bits)
+static OPUS_INLINE int bits2pulses(const CELTMode *m, int band, int LM, int bits)
 {
   int i;
   int lo, hi;
@ -77,7 +77,7 @@ static inline int bits2pulses(const CELTMode *m, int band, int LM, int bits)
      return hi;
 }

-static inline int pulses2bits(const CELTMode *m, int band, int LM, int pulses)
+static OPUS_INLINE int pulses2bits(const CELTMode *m, int band, int LM, int pulses)
 {
   const unsigned char *cache;

--- a/media/libopus/celt/stack_alloc.h
+++ b/media/libopus/celt/stack_alloc.h
@ -32,6 +32,9 @@
 #ifndef STACK_ALLOC_H
 #define STACK_ALLOC_H

+#include "opus_types.h"
+#include "opus_defines.h"
+
 #if (!defined (VAR_ARRAYS) && !defined (USE_ALLOCA) && !defined (NONTHREADSAFE_PSEUDOSTACK))
 #error "Opus requires one of VAR_ARRAYS, USE_ALLOCA, or NONTHREADSAFE_PSEUDOSTACK be defined to select the temporary allocation mode."
 #endif
@ -92,6 +95,8 @@
 #define SAVE_STACK
 #define RESTORE_STACK
 #define ALLOC_STACK
+/* C99 does not allow VLAs of size zero */
+#define ALLOC_NONE 1

 #elif defined(USE_ALLOCA)

@ -106,6 +111,7 @@
 #define SAVE_STACK
 #define RESTORE_STACK
 #define ALLOC_STACK
+#define ALLOC_NONE 0

 #else

@ -143,6 +149,7 @@ extern char *global_stack_top;
 #define VARDECL(type, var) type *var
 #define ALLOC(var, size, type) var = PUSH(global_stack, size, type)
 #define SAVE_STACK char *_saved_stack = global_stack;
+#define ALLOC_NONE 0

 #endif /* VAR_ARRAYS */

@ -159,7 +166,7 @@ extern char *global_stack_top;

 #else

-static inline int _opus_false(void) {return 0;}
+static OPUS_INLINE int _opus_false(void) {return 0;}
 #define OPUS_CHECK_ARRAY(ptr, len) _opus_false()
 #define OPUS_CHECK_VALUE(value) _opus_false()
 #define OPUS_PRINT_INT(value) do{}while(0)
--- a/media/libopus/celt/x86/pitch_sse.h
+++ b/media/libopus/celt/x86/pitch_sse.h
@ -36,7 +36,7 @@
 #include "arch.h"

 #define OVERRIDE_XCORR_KERNEL
-static inline void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)
+static OPUS_INLINE void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)
 {
   int j;
   __m128 xsum1, xsum2;
@ -72,7 +72,7 @@ static inline void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, opus_v
 }

 #define OVERRIDE_DUAL_INNER_PROD
-static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
      int N, opus_val32 *xy1, opus_val32 *xy2)
 {
   int i;
@ -102,7 +102,7 @@ static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, c
 }

 #define OVERRIDE_COMB_FILTER_CONST
-static inline void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
+static OPUS_INLINE void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
      opus_val16 g10, opus_val16 g11, opus_val16 g12)
 {
   int i;
--- a/media/libopus/celt_sources.mk
+++ b/media/libopus/celt_sources.mk
@ -18,4 +18,11 @@ celt/rate.c \
 celt/vq.c

 CELT_SOURCES_ARM = \
-celt/arm/armcpu.c
+celt/arm/armcpu.c \
+celt/arm/arm_celt_map.c
+
+CELT_SOURCES_ARM_ASM = \
+celt/arm/celt_pitch_xcorr_arm.s
+
+CELT_AM_SOURCES_ARM_ASM = \
+celt/arm/armopts.s.in
--- a/media/libopus/include/opus.h
+++ b/media/libopus/include/opus.h
@ -911,6 +911,64 @@ OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_repacketizer_get_nb_frames(OpusRepa
  */
 OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_repacketizer_out(OpusRepacketizer *rp, unsigned char *data, opus_int32 maxlen) OPUS_ARG_NONNULL(1);

+/** Pads a given Opus packet to a larger size (possibly changing the TOC sequence).
+  * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the
+  *                                                   packet to pad.
+  * @param len <tt>opus_int32</tt>: The size of the packet.
+  *                                 This must be at least 1.
+  * @param new_len <tt>opus_int32</tt>: The desired size of the packet after padding.
+  *                                 This must be at least as large as len.
+  * @returns an error code
+  * @retval #OPUS_OK \a on success.
+  * @retval #OPUS_BAD_ARG \a len was less than 1 or new_len was less than len.
+  * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet.
+  */
+OPUS_EXPORT int opus_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len);
+
+/** Remove all padding from a given Opus packet and rewrite the TOC sequence to
+  * minimize space usage.
+  * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the
+  *                                                   packet to strip.
+  * @param len <tt>opus_int32</tt>: The size of the packet.
+  *                                 This must be at least 1.
+  * @returns The new size of the output packet on success, or an error code
+  *          on failure.
+  * @retval #OPUS_BAD_ARG \a len was less than 1.
+  * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_packet_unpad(unsigned char *data, opus_int32 len);
+
+/** Pads a given Opus multi-stream packet to a larger size (possibly changing the TOC sequence).
+  * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the
+  *                                                   packet to pad.
+  * @param len <tt>opus_int32</tt>: The size of the packet.
+  *                                 This must be at least 1.
+  * @param new_len <tt>opus_int32</tt>: The desired size of the packet after padding.
+  *                                 This must be at least 1.
+  * @param nb_streams <tt>opus_int32</tt>: The number of streams (not channels) in the packet.
+  *                                 This must be at least as large as len.
+  * @returns an error code
+  * @retval #OPUS_OK \a on success.
+  * @retval #OPUS_BAD_ARG \a len was less than 1.
+  * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet.
+  */
+OPUS_EXPORT int opus_multistream_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len, int nb_streams);
+
+/** Remove all padding from a given Opus multi-stream packet and rewrite the TOC sequence to
+  * minimize space usage.
+  * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the
+  *                                                   packet to strip.
+  * @param len <tt>opus_int32</tt>: The size of the packet.
+  *                                 This must be at least 1.
+  * @param nb_streams <tt>opus_int32</tt>: The number of streams (not channels) in the packet.
+  *                                 This must be at least 1.
+  * @returns The new size of the output packet on success, or an error code
+  *          on failure.
+  * @retval #OPUS_BAD_ARG \a len was less than 1 or new_len was less than len.
+  * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_packet_unpad(unsigned char *data, opus_int32 len, int nb_streams);
+
 /**@}*/

 #ifdef __cplusplus
--- a/media/libopus/include/opus_custom.h
+++ b/media/libopus/include/opus_custom.h
@ -47,7 +47,7 @@ extern "C" {
 #else
 # define OPUS_CUSTOM_EXPORT
 # ifdef OPUS_BUILD
-#  define OPUS_CUSTOM_EXPORT_STATIC static inline
+#  define OPUS_CUSTOM_EXPORT_STATIC static OPUS_INLINE
 # else
 #  define OPUS_CUSTOM_EXPORT_STATIC
 # endif
@ -140,6 +140,7 @@ OPUS_CUSTOM_EXPORT_STATIC OPUS_WARN_UNUSED_RESULT int opus_custom_encoder_get_si
    int channels
 ) OPUS_ARG_NONNULL(1);

+# ifdef CUSTOM_MODES
 /** Initializes a previously allocated encoder state
  * The memory pointed to by st must be the size returned by opus_custom_encoder_get_size.
  * This is intended for applications which use their own allocator instead of malloc.
@ -152,12 +153,12 @@ OPUS_CUSTOM_EXPORT_STATIC OPUS_WARN_UNUSED_RESULT int opus_custom_encoder_get_si
  * @param [in] channels <tt>int</tt>: Number of channels
  * @return OPUS_OK Success or @ref opus_errorcodes
  */
-OPUS_CUSTOM_EXPORT_STATIC int opus_custom_encoder_init(
+OPUS_CUSTOM_EXPORT int opus_custom_encoder_init(
    OpusCustomEncoder *st,
    const OpusCustomMode *mode,
    int channels
 ) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2);
-
+# endif
 #endif


--- a/media/libopus/include/opus_defines.h
+++ b/media/libopus/include/opus_defines.h
@ -98,6 +98,18 @@ extern "C" {
 # define OPUS_RESTRICT restrict
 #endif

+#if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) )
+# if OPUS_GNUC_PREREQ(2,7)
+#  define OPUS_INLINE __inline__
+# elif (defined(_MSC_VER))
+#  define OPUS_INLINE __inline
+# else
+#  define OPUS_INLINE
+# endif
+#else
+# define OPUS_INLINE inline
+#endif
+
 /**Warning attributes for opus functions
  * NONNULL is not used in OPUS_BUILD to avoid the compiler optimizing out
  * some paranoid null checks. */
@ -151,6 +163,8 @@ extern "C" {
 #define OPUS_GET_LAST_PACKET_DURATION_REQUEST 4039
 #define OPUS_SET_EXPERT_FRAME_DURATION_REQUEST 4040
 #define OPUS_GET_EXPERT_FRAME_DURATION_REQUEST 4041
+#define OPUS_SET_PREDICTION_DISABLED_REQUEST 4042
+#define OPUS_GET_PREDICTION_DISABLED_REQUEST 4043

 /* Don't use 4045, it's already taken by OPUS_GET_GAIN_REQUEST */

@ -194,7 +208,6 @@ extern "C" {
 #define OPUS_FRAMESIZE_20_MS                 5004 /**< Use 20 ms frames */
 #define OPUS_FRAMESIZE_40_MS                 5005 /**< Use 40 ms frames */
 #define OPUS_FRAMESIZE_60_MS                 5006 /**< Use 60 ms frames */
-#define OPUS_FRAMESIZE_VARIABLE              5010 /**< Optimize the frame size dynamically */

 /**@}*/

@ -575,6 +588,14 @@ extern "C" {
  * @hideinitializer */
 #define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x)

+/** If set to 1, disables almost all use of prediction, making frames almost
+    completely independent. This reduces quality. (default : 0)
+  * @hideinitializer */
+#define OPUS_SET_PREDICTION_DISABLED(x) OPUS_SET_PREDICTION_DISABLED_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured prediction status.
+  * @hideinitializer */
+#define OPUS_GET_PREDICTION_DISABLED(x) OPUS_GET_PREDICTION_DISABLED_REQUEST, __opus_check_int_ptr(x)
+
 /**@}*/

 /** @defgroup opus_genericctls Generic CTLs
--- a/media/libopus/moz.build
+++ b/media/libopus/moz.build
@ -16,7 +16,7 @@ MSVC_ENABLE_PGO = True
 FINAL_LIBRARY = 'gkmedias'

 DEFINES['OPUS_BUILD'] = True
-DEFINES['OPUS_VERSION'] = '"v1.1-beta-23-gf2446c2-mozilla"'
+DEFINES['OPUS_VERSION'] = '"v1.1-rc2-1-g35a44c6-mozilla"'
 DEFINES['USE_ALLOCA'] = True

 if CONFIG['OS_ARCH'] in ('Linux', 'Darwin', 'DragonFly', 'FreeBSD',
--- a/media/libopus/silk/A2NLSF.c
+++ b/media/libopus/silk/A2NLSF.c
@ -44,7 +44,7 @@ POSSIBILITY OF SUCH DAMAGE.

 /* Helper function for A2NLSF(..)                    */
 /* Transforms polynomials from cos(n*f) to cos(f)^n  */
-static inline void silk_A2NLSF_trans_poly(
+static OPUS_INLINE void silk_A2NLSF_trans_poly(
    opus_int32          *p,                     /* I/O    Polynomial                                */
    const opus_int      dd                      /* I      Polynomial order (= filter order / 2 )    */
 )
@ -60,7 +60,7 @@ static inline void silk_A2NLSF_trans_poly(
 }
 /* Helper function for A2NLSF(..) */
 /* Polynomial evaluation          */
-static inline opus_int32 silk_A2NLSF_eval_poly( /* return the polynomial evaluation, in Q16     */
+static OPUS_INLINE opus_int32 silk_A2NLSF_eval_poly( /* return the polynomial evaluation, in Q16     */
    opus_int32          *p,                     /* I    Polynomial, Q16                         */
    const opus_int32    x,                      /* I    Evaluation point, Q12                   */
    const opus_int      dd                      /* I    Order                                   */
@ -77,7 +77,7 @@ static inline opus_int32 silk_A2NLSF_eval_poly( /* return the polynomial evaluat
    return y32;
 }

-static inline void silk_A2NLSF_init(
+static OPUS_INLINE void silk_A2NLSF_init(
     const opus_int32    *a_Q16,
     opus_int32          *P,
     opus_int32          *Q,
--- a/media/libopus/silk/API.h
+++ b/media/libopus/silk/API.h
@ -64,6 +64,7 @@ opus_int silk_Get_Encoder_Size(                         /* O    Returns error co
 /*************************/
 opus_int silk_InitEncoder(                              /* O    Returns error code                              */
    void                            *encState,          /* I/O  State                                           */
+    int                              arch,              /* I    Run-time architecture                           */
    silk_EncControlStruct           *encStatus          /* O    Encoder Status                                  */
 );

--- a/media/libopus/silk/CNG.c
+++ b/media/libopus/silk/CNG.c
@ -33,7 +33,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "stack_alloc.h"

 /* Generates excitation for CNG LPC synthesis */
-static inline void silk_CNG_exc(
+static OPUS_INLINE void silk_CNG_exc(
    opus_int32                       residual_Q10[],     /* O    CNG residual signal Q10                     */
    opus_int32                       exc_buf_Q14[],      /* I    Random samples buffer Q10                   */
    opus_int32                       Gain_Q16,           /* I    Gain to apply                               */
--- a/media/libopus/silk/Inlines.h
+++ b/media/libopus/silk/Inlines.h
@ -26,7 +26,7 @@ POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/

 /*! \file silk_Inlines.h
- *  \brief silk_Inlines.h defines inline signal processing functions.
+ *  \brief silk_Inlines.h defines OPUS_INLINE signal processing functions.
 */

 #ifndef SILK_FIX_INLINES_H
@ -38,7 +38,7 @@ extern "C"
 #endif

 /* count leading zeros of opus_int64 */
-static inline opus_int32 silk_CLZ64( opus_int64 in )
+static OPUS_INLINE opus_int32 silk_CLZ64( opus_int64 in )
 {
    opus_int32 in_upper;

@ -53,7 +53,7 @@ static inline opus_int32 silk_CLZ64( opus_int64 in )
 }

 /* get number of leading zeros and fractional part (the bits right after the leading one */
-static inline void silk_CLZ_FRAC(
+static OPUS_INLINE void silk_CLZ_FRAC(
    opus_int32 in,            /* I  input                               */
    opus_int32 *lz,           /* O  number of leading zeros             */
    opus_int32 *frac_Q7       /* O  the 7 bits right after the leading one */
@ -68,7 +68,7 @@ static inline void silk_CLZ_FRAC(
 /* Approximation of square root                                          */
 /* Accuracy: < +/- 10%  for output values > 15                           */
 /*           < +/- 2.5% for output values > 120                          */
-static inline opus_int32 silk_SQRT_APPROX( opus_int32 x )
+static OPUS_INLINE opus_int32 silk_SQRT_APPROX( opus_int32 x )
 {
    opus_int32 y, lz, frac_Q7;

@ -94,7 +94,7 @@ static inline opus_int32 silk_SQRT_APPROX( opus_int32 x )
 }

 /* Divide two int32 values and return result as int32 in a given Q-domain */
-static inline opus_int32 silk_DIV32_varQ(   /* O    returns a good approximation of "(a32 << Qres) / b32" */
+static OPUS_INLINE opus_int32 silk_DIV32_varQ(   /* O    returns a good approximation of "(a32 << Qres) / b32" */
    const opus_int32     a32,               /* I    numerator (Q0)                  */
    const opus_int32     b32,               /* I    denominator (Q0)                */
    const opus_int       Qres               /* I    Q-domain of result (>= 0)       */
@ -140,7 +140,7 @@ static inline opus_int32 silk_DIV32_varQ(   /* O    returns a good approximation
 }

 /* Invert int32 value and return result as int32 in a given Q-domain */
-static inline opus_int32 silk_INVERSE32_varQ(   /* O    returns a good approximation of "(1 << Qres) / b32" */
+static OPUS_INLINE opus_int32 silk_INVERSE32_varQ(   /* O    returns a good approximation of "(1 << Qres) / b32" */
    const opus_int32     b32,                   /* I    denominator (Q0)                */
    const opus_int       Qres                   /* I    Q-domain of result (> 0)        */
 )
--- a/media/libopus/silk/LP_variable_cutoff.c
+++ b/media/libopus/silk/LP_variable_cutoff.c
@ -38,7 +38,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "main.h"

 /* Helper function, interpolates the filter taps */
-static inline void silk_LP_interpolate_filter_taps(
+static OPUS_INLINE void silk_LP_interpolate_filter_taps(
    opus_int32           B_Q28[ TRANSITION_NB ],
    opus_int32           A_Q28[ TRANSITION_NA ],
    const opus_int       ind,
--- a/media/libopus/silk/MacroCount.h
+++ b/media/libopus/silk/MacroCount.h
@ -34,11 +34,11 @@ POSSIBILITY OF SUCH DAMAGE.

 extern opus_int64 ops_count;

-static inline opus_int64 silk_SaveCount(){
+static OPUS_INLINE opus_int64 silk_SaveCount(){
    return(ops_count);
 }

-static inline opus_int64 silk_SaveResetCount(){
+static OPUS_INLINE opus_int64 silk_SaveResetCount(){
    opus_int64 ret;

    ret = ops_count;
@ -46,12 +46,12 @@ static inline opus_int64 silk_SaveResetCount(){
    return(ret);
 }

-static inline silk_PrintCount(){
+static OPUS_INLINE silk_PrintCount(){
    printf("ops_count = %d \n ", (opus_int32)ops_count);
 }

 #undef silk_MUL
-static inline opus_int32 silk_MUL(opus_int32 a32, opus_int32 b32){
+static OPUS_INLINE opus_int32 silk_MUL(opus_int32 a32, opus_int32 b32){
    opus_int32 ret;
    ops_count += 4;
    ret = a32 * b32;
@ -59,14 +59,14 @@ static inline opus_int32 silk_MUL(opus_int32 a32, opus_int32 b32){
 }

 #undef silk_MUL_uint
-static inline opus_uint32 silk_MUL_uint(opus_uint32 a32, opus_uint32 b32){
+static OPUS_INLINE opus_uint32 silk_MUL_uint(opus_uint32 a32, opus_uint32 b32){
    opus_uint32 ret;
    ops_count += 4;
    ret = a32 * b32;
    return ret;
 }
 #undef silk_MLA
-static inline opus_int32 silk_MLA(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+static OPUS_INLINE opus_int32 silk_MLA(opus_int32 a32, opus_int32 b32, opus_int32 c32){
    opus_int32 ret;
    ops_count += 4;
    ret = a32 + b32 * c32;
@ -74,7 +74,7 @@ static inline opus_int32 silk_MLA(opus_int32 a32, opus_int32 b32, opus_int32 c32
 }

 #undef silk_MLA_uint
-static inline opus_int32 silk_MLA_uint(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32){
+static OPUS_INLINE opus_int32 silk_MLA_uint(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32){
    opus_uint32 ret;
    ops_count += 4;
    ret = a32 + b32 * c32;
@ -82,14 +82,14 @@ static inline opus_int32 silk_MLA_uint(opus_uint32 a32, opus_uint32 b32, opus_ui
 }

 #undef silk_SMULWB
-static inline opus_int32 silk_SMULWB(opus_int32 a32, opus_int32 b32){
+static OPUS_INLINE opus_int32 silk_SMULWB(opus_int32 a32, opus_int32 b32){
    opus_int32 ret;
    ops_count += 5;
    ret = (a32 >> 16) * (opus_int32)((opus_int16)b32) + (((a32 & 0x0000FFFF) * (opus_int32)((opus_int16)b32)) >> 16);
    return ret;
 }
 #undef    silk_SMLAWB
-static inline opus_int32 silk_SMLAWB(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+static OPUS_INLINE opus_int32 silk_SMLAWB(opus_int32 a32, opus_int32 b32, opus_int32 c32){
    opus_int32 ret;
    ops_count += 5;
    ret = ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16)));
@ -97,14 +97,14 @@ static inline opus_int32 silk_SMLAWB(opus_int32 a32, opus_int32 b32, opus_int32
 }

 #undef silk_SMULWT
-static inline opus_int32 silk_SMULWT(opus_int32 a32, opus_int32 b32){
+static OPUS_INLINE opus_int32 silk_SMULWT(opus_int32 a32, opus_int32 b32){
    opus_int32 ret;
    ops_count += 4;
    ret = (a32 >> 16) * (b32 >> 16) + (((a32 & 0x0000FFFF) * (b32 >> 16)) >> 16);
    return ret;
 }
 #undef silk_SMLAWT
-static inline opus_int32 silk_SMLAWT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+static OPUS_INLINE opus_int32 silk_SMLAWT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
    opus_int32 ret;
    ops_count += 4;
    ret = a32 + ((b32 >> 16) * (c32 >> 16)) + (((b32 & 0x0000FFFF) * ((c32 >> 16)) >> 16));
@ -112,14 +112,14 @@ static inline opus_int32 silk_SMLAWT(opus_int32 a32, opus_int32 b32, opus_int32
 }

 #undef silk_SMULBB
-static inline opus_int32 silk_SMULBB(opus_int32 a32, opus_int32 b32){
+static OPUS_INLINE opus_int32 silk_SMULBB(opus_int32 a32, opus_int32 b32){
    opus_int32 ret;
    ops_count += 1;
    ret = (opus_int32)((opus_int16)a32) * (opus_int32)((opus_int16)b32);
    return ret;
 }
 #undef silk_SMLABB
-static inline opus_int32 silk_SMLABB(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+static OPUS_INLINE opus_int32 silk_SMLABB(opus_int32 a32, opus_int32 b32, opus_int32 c32){
    opus_int32 ret;
    ops_count += 1;
    ret = a32 + (opus_int32)((opus_int16)b32) * (opus_int32)((opus_int16)c32);
@ -127,7 +127,7 @@ static inline opus_int32 silk_SMLABB(opus_int32 a32, opus_int32 b32, opus_int32
 }

 #undef silk_SMULBT
-static inline opus_int32 silk_SMULBT(opus_int32 a32, opus_int32 b32 ){
+static OPUS_INLINE opus_int32 silk_SMULBT(opus_int32 a32, opus_int32 b32 ){
    opus_int32 ret;
    ops_count += 4;
    ret = ((opus_int32)((opus_int16)a32)) * (b32 >> 16);
@ -135,7 +135,7 @@ static inline opus_int32 silk_SMULBT(opus_int32 a32, opus_int32 b32 ){
 }

 #undef silk_SMLABT
-static inline opus_int32 silk_SMLABT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+static OPUS_INLINE opus_int32 silk_SMLABT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
    opus_int32 ret;
    ops_count += 1;
    ret = a32 + ((opus_int32)((opus_int16)b32)) * (c32 >> 16);
@ -143,7 +143,7 @@ static inline opus_int32 silk_SMLABT(opus_int32 a32, opus_int32 b32, opus_int32
 }

 #undef silk_SMULTT
-static inline opus_int32 silk_SMULTT(opus_int32 a32, opus_int32 b32){
+static OPUS_INLINE opus_int32 silk_SMULTT(opus_int32 a32, opus_int32 b32){
    opus_int32 ret;
    ops_count += 1;
    ret = (a32 >> 16) * (b32 >> 16);
@ -151,7 +151,7 @@ static inline opus_int32 silk_SMULTT(opus_int32 a32, opus_int32 b32){
 }

 #undef    silk_SMLATT
-static inline opus_int32 silk_SMLATT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+static OPUS_INLINE opus_int32 silk_SMLATT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
    opus_int32 ret;
    ops_count += 1;
    ret = a32 + (b32 >> 16) * (c32 >> 16);
@ -179,7 +179,7 @@ static inline opus_int32 silk_SMLATT(opus_int32 a32, opus_int32 b32, opus_int32
 #define silk_SMLAWT_ovflw silk_SMLAWT

 #undef silk_SMULL
-static inline opus_int64 silk_SMULL(opus_int32 a32, opus_int32 b32){
+static OPUS_INLINE opus_int64 silk_SMULL(opus_int32 a32, opus_int32 b32){
    opus_int64 ret;
    ops_count += 8;
    ret = ((opus_int64)(a32) * /*(opus_int64)*/(b32));
@ -187,14 +187,14 @@ static inline opus_int64 silk_SMULL(opus_int32 a32, opus_int32 b32){
 }

 #undef    silk_SMLAL
-static inline opus_int64 silk_SMLAL(opus_int64 a64, opus_int32 b32, opus_int32 c32){
+static OPUS_INLINE opus_int64 silk_SMLAL(opus_int64 a64, opus_int32 b32, opus_int32 c32){
    opus_int64 ret;
    ops_count += 8;
    ret = a64 + ((opus_int64)(b32) * /*(opus_int64)*/(c32));
    return ret;
 }
 #undef    silk_SMLALBB
-static inline opus_int64 silk_SMLALBB(opus_int64 a64, opus_int16 b16, opus_int16 c16){
+static OPUS_INLINE opus_int64 silk_SMLALBB(opus_int64 a64, opus_int16 b16, opus_int16 c16){
    opus_int64 ret;
    ops_count += 4;
    ret = a64 + ((opus_int64)(b16) * /*(opus_int64)*/(c16));
@ -202,7 +202,7 @@ static inline opus_int64 silk_SMLALBB(opus_int64 a64, opus_int16 b16, opus_int16
 }

 #undef    SigProcFIX_CLZ16
-static inline opus_int32 SigProcFIX_CLZ16(opus_int16 in16)
+static OPUS_INLINE opus_int32 SigProcFIX_CLZ16(opus_int16 in16)
 {
    opus_int32 out32 = 0;
    ops_count += 10;
@ -240,7 +240,7 @@ static inline opus_int32 SigProcFIX_CLZ16(opus_int16 in16)
 }

 #undef SigProcFIX_CLZ32
-static inline opus_int32 SigProcFIX_CLZ32(opus_int32 in32)
+static OPUS_INLINE opus_int32 SigProcFIX_CLZ32(opus_int32 in32)
 {
    /* test highest 16 bits and convert to opus_int16 */
    ops_count += 2;
@ -252,19 +252,19 @@ static inline opus_int32 SigProcFIX_CLZ32(opus_int32 in32)
 }

 #undef silk_DIV32
-static inline opus_int32 silk_DIV32(opus_int32 a32, opus_int32 b32){
+static OPUS_INLINE opus_int32 silk_DIV32(opus_int32 a32, opus_int32 b32){
    ops_count += 64;
    return a32 / b32;
 }

 #undef silk_DIV32_16
-static inline opus_int32 silk_DIV32_16(opus_int32 a32, opus_int32 b32){
+static OPUS_INLINE opus_int32 silk_DIV32_16(opus_int32 a32, opus_int32 b32){
    ops_count += 32;
    return a32 / b32;
 }

 #undef silk_SAT8
-static inline opus_int8 silk_SAT8(opus_int64 a){
+static OPUS_INLINE opus_int8 silk_SAT8(opus_int64 a){
    opus_int8 tmp;
    ops_count += 1;
    tmp = (opus_int8)((a) > silk_int8_MAX ? silk_int8_MAX  : \
@ -273,7 +273,7 @@ static inline opus_int8 silk_SAT8(opus_int64 a){
 }

 #undef silk_SAT16
-static inline opus_int16 silk_SAT16(opus_int64 a){
+static OPUS_INLINE opus_int16 silk_SAT16(opus_int64 a){
    opus_int16 tmp;
    ops_count += 1;
    tmp = (opus_int16)((a) > silk_int16_MAX ? silk_int16_MAX  : \
@ -281,7 +281,7 @@ static inline opus_int16 silk_SAT16(opus_int64 a){
    return(tmp);
 }
 #undef silk_SAT32
-static inline opus_int32 silk_SAT32(opus_int64 a){
+static OPUS_INLINE opus_int32 silk_SAT32(opus_int64 a){
    opus_int32 tmp;
    ops_count += 1;
    tmp = (opus_int32)((a) > silk_int32_MAX ? silk_int32_MAX  : \
@ -289,7 +289,7 @@ static inline opus_int32 silk_SAT32(opus_int64 a){
    return(tmp);
 }
 #undef silk_POS_SAT32
-static inline opus_int32 silk_POS_SAT32(opus_int64 a){
+static OPUS_INLINE opus_int32 silk_POS_SAT32(opus_int64 a){
    opus_int32 tmp;
    ops_count += 1;
    tmp = (opus_int32)((a) > silk_int32_MAX ? silk_int32_MAX : (a));
@ -297,14 +297,14 @@ static inline opus_int32 silk_POS_SAT32(opus_int64 a){
 }

 #undef silk_ADD_POS_SAT8
-static inline opus_int8 silk_ADD_POS_SAT8(opus_int64 a, opus_int64 b){
+static OPUS_INLINE opus_int8 silk_ADD_POS_SAT8(opus_int64 a, opus_int64 b){
    opus_int8 tmp;
    ops_count += 1;
    tmp = (opus_int8)((((a)+(b)) & 0x80) ? silk_int8_MAX  : ((a)+(b)));
    return(tmp);
 }
 #undef silk_ADD_POS_SAT16
-static inline opus_int16 silk_ADD_POS_SAT16(opus_int64 a, opus_int64 b){
+static OPUS_INLINE opus_int16 silk_ADD_POS_SAT16(opus_int64 a, opus_int64 b){
    opus_int16 tmp;
    ops_count += 1;
    tmp = (opus_int16)((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b)));
@ -312,7 +312,7 @@ static inline opus_int16 silk_ADD_POS_SAT16(opus_int64 a, opus_int64 b){
 }

 #undef silk_ADD_POS_SAT32
-static inline opus_int32 silk_ADD_POS_SAT32(opus_int64 a, opus_int64 b){
+static OPUS_INLINE opus_int32 silk_ADD_POS_SAT32(opus_int64 a, opus_int64 b){
    opus_int32 tmp;
    ops_count += 1;
    tmp = (opus_int32)((((a)+(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b)));
@ -320,7 +320,7 @@ static inline opus_int32 silk_ADD_POS_SAT32(opus_int64 a, opus_int64 b){
 }

 #undef silk_ADD_POS_SAT64
-static inline opus_int64 silk_ADD_POS_SAT64(opus_int64 a, opus_int64 b){
+static OPUS_INLINE opus_int64 silk_ADD_POS_SAT64(opus_int64 a, opus_int64 b){
    opus_int64 tmp;
    ops_count += 1;
    tmp = ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b)));
@ -328,40 +328,40 @@ static inline opus_int64 silk_ADD_POS_SAT64(opus_int64 a, opus_int64 b){
 }

 #undef    silk_LSHIFT8
-static inline opus_int8 silk_LSHIFT8(opus_int8 a, opus_int32 shift){
+static OPUS_INLINE opus_int8 silk_LSHIFT8(opus_int8 a, opus_int32 shift){
    opus_int8 ret;
    ops_count += 1;
    ret = a << shift;
    return ret;
 }
 #undef    silk_LSHIFT16
-static inline opus_int16 silk_LSHIFT16(opus_int16 a, opus_int32 shift){
+static OPUS_INLINE opus_int16 silk_LSHIFT16(opus_int16 a, opus_int32 shift){
    opus_int16 ret;
    ops_count += 1;
    ret = a << shift;
    return ret;
 }
 #undef    silk_LSHIFT32
-static inline opus_int32 silk_LSHIFT32(opus_int32 a, opus_int32 shift){
+static OPUS_INLINE opus_int32 silk_LSHIFT32(opus_int32 a, opus_int32 shift){
    opus_int32 ret;
    ops_count += 1;
    ret = a << shift;
    return ret;
 }
 #undef    silk_LSHIFT64
-static inline opus_int64 silk_LSHIFT64(opus_int64 a, opus_int shift){
+static OPUS_INLINE opus_int64 silk_LSHIFT64(opus_int64 a, opus_int shift){
    ops_count += 1;
    return a << shift;
 }

 #undef    silk_LSHIFT_ovflw
-static inline opus_int32 silk_LSHIFT_ovflw(opus_int32 a, opus_int32 shift){
+static OPUS_INLINE opus_int32 silk_LSHIFT_ovflw(opus_int32 a, opus_int32 shift){
    ops_count += 1;
    return a << shift;
 }

 #undef    silk_LSHIFT_uint
-static inline opus_uint32 silk_LSHIFT_uint(opus_uint32 a, opus_int32 shift){
+static OPUS_INLINE opus_uint32 silk_LSHIFT_uint(opus_uint32 a, opus_int32 shift){
    opus_uint32 ret;
    ops_count += 1;
    ret = a << shift;
@ -369,83 +369,83 @@ static inline opus_uint32 silk_LSHIFT_uint(opus_uint32 a, opus_int32 shift){
 }

 #undef    silk_RSHIFT8
-static inline opus_int8 silk_RSHIFT8(opus_int8 a, opus_int32 shift){
+static OPUS_INLINE opus_int8 silk_RSHIFT8(opus_int8 a, opus_int32 shift){
    ops_count += 1;
    return a >> shift;
 }
 #undef    silk_RSHIFT16
-static inline opus_int16 silk_RSHIFT16(opus_int16 a, opus_int32 shift){
+static OPUS_INLINE opus_int16 silk_RSHIFT16(opus_int16 a, opus_int32 shift){
    ops_count += 1;
    return a >> shift;
 }
 #undef    silk_RSHIFT32
-static inline opus_int32 silk_RSHIFT32(opus_int32 a, opus_int32 shift){
+static OPUS_INLINE opus_int32 silk_RSHIFT32(opus_int32 a, opus_int32 shift){
    ops_count += 1;
    return a >> shift;
 }
 #undef    silk_RSHIFT64
-static inline opus_int64 silk_RSHIFT64(opus_int64 a, opus_int64 shift){
+static OPUS_INLINE opus_int64 silk_RSHIFT64(opus_int64 a, opus_int64 shift){
    ops_count += 1;
    return a >> shift;
 }

 #undef    silk_RSHIFT_uint
-static inline opus_uint32 silk_RSHIFT_uint(opus_uint32 a, opus_int32 shift){
+static OPUS_INLINE opus_uint32 silk_RSHIFT_uint(opus_uint32 a, opus_int32 shift){
    ops_count += 1;
    return a >> shift;
 }

 #undef    silk_ADD_LSHIFT
-static inline opus_int32 silk_ADD_LSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){
+static OPUS_INLINE opus_int32 silk_ADD_LSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){
    opus_int32 ret;
    ops_count += 1;
    ret = a + (b << shift);
    return ret;                /* shift >= 0*/
 }
 #undef    silk_ADD_LSHIFT32
-static inline opus_int32 silk_ADD_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
+static OPUS_INLINE opus_int32 silk_ADD_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
    opus_int32 ret;
    ops_count += 1;
    ret = a + (b << shift);
    return ret;                /* shift >= 0*/
 }
 #undef    silk_ADD_LSHIFT_uint
-static inline opus_uint32 silk_ADD_LSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){
+static OPUS_INLINE opus_uint32 silk_ADD_LSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){
    opus_uint32 ret;
    ops_count += 1;
    ret = a + (b << shift);
    return ret;                /* shift >= 0*/
 }
 #undef    silk_ADD_RSHIFT
-static inline opus_int32 silk_ADD_RSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){
+static OPUS_INLINE opus_int32 silk_ADD_RSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){
    opus_int32 ret;
    ops_count += 1;
    ret = a + (b >> shift);
    return ret;                /* shift  > 0*/
 }
 #undef    silk_ADD_RSHIFT32
-static inline opus_int32 silk_ADD_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
+static OPUS_INLINE opus_int32 silk_ADD_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
    opus_int32 ret;
    ops_count += 1;
    ret = a + (b >> shift);
    return ret;                /* shift  > 0*/
 }
 #undef    silk_ADD_RSHIFT_uint
-static inline opus_uint32 silk_ADD_RSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){
+static OPUS_INLINE opus_uint32 silk_ADD_RSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){
    opus_uint32 ret;
    ops_count += 1;
    ret = a + (b >> shift);
    return ret;                /* shift  > 0*/
 }
 #undef    silk_SUB_LSHIFT32
-static inline opus_int32 silk_SUB_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
+static OPUS_INLINE opus_int32 silk_SUB_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
    opus_int32 ret;
    ops_count += 1;
    ret = a - (b << shift);
    return ret;                /* shift >= 0*/
 }
 #undef    silk_SUB_RSHIFT32
-static inline opus_int32 silk_SUB_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
+static OPUS_INLINE opus_int32 silk_SUB_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
    opus_int32 ret;
    ops_count += 1;
    ret = a - (b >> shift);
@ -453,7 +453,7 @@ static inline opus_int32 silk_SUB_RSHIFT32(opus_int32 a, opus_int32 b, opus_int3
 }

 #undef    silk_RSHIFT_ROUND
-static inline opus_int32 silk_RSHIFT_ROUND(opus_int32 a, opus_int32 shift){
+static OPUS_INLINE opus_int32 silk_RSHIFT_ROUND(opus_int32 a, opus_int32 shift){
    opus_int32 ret;
    ops_count += 3;
    ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1;
@ -461,7 +461,7 @@ static inline opus_int32 silk_RSHIFT_ROUND(opus_int32 a, opus_int32 shift){
 }

 #undef    silk_RSHIFT_ROUND64
-static inline opus_int64 silk_RSHIFT_ROUND64(opus_int64 a, opus_int32 shift){
+static OPUS_INLINE opus_int64 silk_RSHIFT_ROUND64(opus_int64 a, opus_int32 shift){
    opus_int64 ret;
    ops_count += 6;
    ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1;
@ -469,13 +469,13 @@ static inline opus_int64 silk_RSHIFT_ROUND64(opus_int64 a, opus_int32 shift){
 }

 #undef    silk_abs_int64
-static inline opus_int64 silk_abs_int64(opus_int64 a){
+static OPUS_INLINE opus_int64 silk_abs_int64(opus_int64 a){
    ops_count += 1;
    return (((a) >  0)  ? (a) : -(a));            /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN*/
 }

 #undef    silk_abs_int32
-static inline opus_int32 silk_abs_int32(opus_int32 a){
+static OPUS_INLINE opus_int32 silk_abs_int32(opus_int32 a){
    ops_count += 1;
    return silk_abs(a);
 }
@ -498,7 +498,7 @@ static silk_sign(a){
 }

 #undef    silk_ADD16
-static inline opus_int16 silk_ADD16(opus_int16 a, opus_int16 b){
+static OPUS_INLINE opus_int16 silk_ADD16(opus_int16 a, opus_int16 b){
    opus_int16 ret;
    ops_count += 1;
    ret = a + b;
@ -506,7 +506,7 @@ static inline opus_int16 silk_ADD16(opus_int16 a, opus_int16 b){
 }

 #undef    silk_ADD32
-static inline opus_int32 silk_ADD32(opus_int32 a, opus_int32 b){
+static OPUS_INLINE opus_int32 silk_ADD32(opus_int32 a, opus_int32 b){
    opus_int32 ret;
    ops_count += 1;
    ret = a + b;
@ -514,7 +514,7 @@ static inline opus_int32 silk_ADD32(opus_int32 a, opus_int32 b){
 }

 #undef    silk_ADD64
-static inline opus_int64 silk_ADD64(opus_int64 a, opus_int64 b){
+static OPUS_INLINE opus_int64 silk_ADD64(opus_int64 a, opus_int64 b){
    opus_int64 ret;
    ops_count += 2;
    ret = a + b;
@ -522,7 +522,7 @@ static inline opus_int64 silk_ADD64(opus_int64 a, opus_int64 b){
 }

 #undef    silk_SUB16
-static inline opus_int16 silk_SUB16(opus_int16 a, opus_int16 b){
+static OPUS_INLINE opus_int16 silk_SUB16(opus_int16 a, opus_int16 b){
    opus_int16 ret;
    ops_count += 1;
    ret = a - b;
@ -530,7 +530,7 @@ static inline opus_int16 silk_SUB16(opus_int16 a, opus_int16 b){
 }

 #undef    silk_SUB32
-static inline opus_int32 silk_SUB32(opus_int32 a, opus_int32 b){
+static OPUS_INLINE opus_int32 silk_SUB32(opus_int32 a, opus_int32 b){
    opus_int32 ret;
    ops_count += 1;
    ret = a - b;
@ -538,7 +538,7 @@ static inline opus_int32 silk_SUB32(opus_int32 a, opus_int32 b){
 }

 #undef    silk_SUB64
-static inline opus_int64 silk_SUB64(opus_int64 a, opus_int64 b){
+static OPUS_INLINE opus_int64 silk_SUB64(opus_int64 a, opus_int64 b){
    opus_int64 ret;
    ops_count += 2;
    ret = a - b;
@ -546,7 +546,7 @@ static inline opus_int64 silk_SUB64(opus_int64 a, opus_int64 b){
 }

 #undef silk_ADD_SAT16
-static inline opus_int16 silk_ADD_SAT16( opus_int16 a16, opus_int16 b16 ) {
+static OPUS_INLINE opus_int16 silk_ADD_SAT16( opus_int16 a16, opus_int16 b16 ) {
    opus_int16 res;
    /* Nb will be counted in AKP_add32 and silk_SAT16*/
    res = (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a16), (b16) ) );
@ -554,7 +554,7 @@ static inline opus_int16 silk_ADD_SAT16( opus_int16 a16, opus_int16 b16 ) {
 }

 #undef silk_ADD_SAT32
-static inline opus_int32 silk_ADD_SAT32(opus_int32 a32, opus_int32 b32){
+static OPUS_INLINE opus_int32 silk_ADD_SAT32(opus_int32 a32, opus_int32 b32){
    opus_int32 res;
    ops_count += 1;
    res =    ((((a32) + (b32)) & 0x80000000) == 0 ?                                    \
@ -564,7 +564,7 @@ static inline opus_int32 silk_ADD_SAT32(opus_int32 a32, opus_int32 b32){
 }

 #undef silk_ADD_SAT64
-static inline opus_int64 silk_ADD_SAT64( opus_int64 a64, opus_int64 b64 ) {
+static OPUS_INLINE opus_int64 silk_ADD_SAT64( opus_int64 a64, opus_int64 b64 ) {
    opus_int64 res;
    ops_count += 1;
    res =    ((((a64) + (b64)) & 0x8000000000000000LL) == 0 ?                                \
@ -574,7 +574,7 @@ static inline opus_int64 silk_ADD_SAT64( opus_int64 a64, opus_int64 b64 ) {
 }

 #undef silk_SUB_SAT16
-static inline opus_int16 silk_SUB_SAT16( opus_int16 a16, opus_int16 b16 ) {
+static OPUS_INLINE opus_int16 silk_SUB_SAT16( opus_int16 a16, opus_int16 b16 ) {
    opus_int16 res;
    silk_assert(0);
    /* Nb will be counted in sub-macros*/
@ -583,7 +583,7 @@ static inline opus_int16 silk_SUB_SAT16( opus_int16 a16, opus_int16 b16 ) {
 }

 #undef silk_SUB_SAT32
-static inline opus_int32 silk_SUB_SAT32( opus_int32 a32, opus_int32 b32 ) {
+static OPUS_INLINE opus_int32 silk_SUB_SAT32( opus_int32 a32, opus_int32 b32 ) {
    opus_int32 res;
    ops_count += 1;
    res =     ((((a32)-(b32)) & 0x80000000) == 0 ?                                            \
@ -593,7 +593,7 @@ static inline opus_int32 silk_SUB_SAT32( opus_int32 a32, opus_int32 b32 ) {
 }

 #undef silk_SUB_SAT64
-static inline opus_int64 silk_SUB_SAT64( opus_int64 a64, opus_int64 b64 ) {
+static OPUS_INLINE opus_int64 silk_SUB_SAT64( opus_int64 a64, opus_int64 b64 ) {
    opus_int64 res;
    ops_count += 1;
    res =    ((((a64)-(b64)) & 0x8000000000000000LL) == 0 ?                                                        \
@ -604,7 +604,7 @@ static inline opus_int64 silk_SUB_SAT64( opus_int64 a64, opus_int64 b64 ) {
 }

 #undef    silk_SMULWW
-static inline opus_int32 silk_SMULWW(opus_int32 a32, opus_int32 b32){
+static OPUS_INLINE opus_int32 silk_SMULWW(opus_int32 a32, opus_int32 b32){
    opus_int32 ret;
    /* Nb will be counted in sub-macros*/
    ret = silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16));
@ -612,7 +612,7 @@ static inline opus_int32 silk_SMULWW(opus_int32 a32, opus_int32 b32){
 }

 #undef    silk_SMLAWW
-static inline opus_int32 silk_SMLAWW(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+static OPUS_INLINE opus_int32 silk_SMLAWW(opus_int32 a32, opus_int32 b32, opus_int32 c32){
    opus_int32 ret;
    /* Nb will be counted in sub-macros*/
    ret = silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16));
@ -620,26 +620,26 @@ static inline opus_int32 silk_SMLAWW(opus_int32 a32, opus_int32 b32, opus_int32
 }

 #undef    silk_min_int
-static inline opus_int silk_min_int(opus_int a, opus_int b)
+static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b)
 {
    ops_count += 1;
    return (((a) < (b)) ? (a) : (b));
 }

 #undef    silk_min_16
-static inline opus_int16 silk_min_16(opus_int16 a, opus_int16 b)
+static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b)
 {
    ops_count += 1;
    return (((a) < (b)) ? (a) : (b));
 }
 #undef    silk_min_32
-static inline opus_int32 silk_min_32(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b)
 {
    ops_count += 1;
    return (((a) < (b)) ? (a) : (b));
 }
 #undef    silk_min_64
-static inline opus_int64 silk_min_64(opus_int64 a, opus_int64 b)
+static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b)
 {
    ops_count += 1;
    return (((a) < (b)) ? (a) : (b));
@ -647,26 +647,26 @@ static inline opus_int64 silk_min_64(opus_int64 a, opus_int64 b)

 /* silk_min() versions with typecast in the function call */
 #undef    silk_max_int
-static inline opus_int silk_max_int(opus_int a, opus_int b)
+static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b)
 {
    ops_count += 1;
    return (((a) > (b)) ? (a) : (b));
 }
 #undef    silk_max_16
-static inline opus_int16 silk_max_16(opus_int16 a, opus_int16 b)
+static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b)
 {
    ops_count += 1;
    return (((a) > (b)) ? (a) : (b));
 }
 #undef    silk_max_32
-static inline opus_int32 silk_max_32(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b)
 {
    ops_count += 1;
    return (((a) > (b)) ? (a) : (b));
 }

 #undef    silk_max_64
-static inline opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
+static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
 {
    ops_count += 1;
    return (((a) > (b)) ? (a) : (b));
@ -674,7 +674,7 @@ static inline opus_int64 silk_max_64(opus_int64 a, opus_int64 b)


 #undef silk_LIMIT_int
-static inline opus_int silk_LIMIT_int(opus_int a, opus_int limit1, opus_int limit2)
+static OPUS_INLINE opus_int silk_LIMIT_int(opus_int a, opus_int limit1, opus_int limit2)
 {
    opus_int ret;
    ops_count += 6;
@ -686,7 +686,7 @@ static inline opus_int silk_LIMIT_int(opus_int a, opus_int limit1, opus_int limi
 }

 #undef silk_LIMIT_16
-static inline opus_int16 silk_LIMIT_16(opus_int16 a, opus_int16 limit1, opus_int16 limit2)
+static OPUS_INLINE opus_int16 silk_LIMIT_16(opus_int16 a, opus_int16 limit1, opus_int16 limit2)
 {
    opus_int16 ret;
    ops_count += 6;
@ -699,7 +699,7 @@ return(ret);


 #undef silk_LIMIT_32
-static inline opus_int silk_LIMIT_32(opus_int32 a, opus_int32 limit1, opus_int32 limit2)
+static OPUS_INLINE opus_int silk_LIMIT_32(opus_int32 a, opus_int32 limit1, opus_int32 limit2)
 {
    opus_int32 ret;
    ops_count += 6;
--- a/media/libopus/silk/MacroDebug.h
+++ b/media/libopus/silk/MacroDebug.h
@ -36,7 +36,7 @@ POSSIBILITY OF SUCH DAMAGE.

 #undef silk_ADD16
 #define silk_ADD16(a,b) silk_ADD16_((a), (b), __FILE__, __LINE__)
-static inline opus_int16 silk_ADD16_(opus_int16 a, opus_int16 b, char *file, int line){
+static OPUS_INLINE opus_int16 silk_ADD16_(opus_int16 a, opus_int16 b, char *file, int line){
    opus_int16 ret;

    ret = a + b;
@ -52,7 +52,7 @@ static inline opus_int16 silk_ADD16_(opus_int16 a, opus_int16 b, char *file, int

 #undef silk_ADD32
 #define silk_ADD32(a,b) silk_ADD32_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_ADD32_(opus_int32 a, opus_int32 b, char *file, int line){
+static OPUS_INLINE opus_int32 silk_ADD32_(opus_int32 a, opus_int32 b, char *file, int line){
    opus_int32 ret;

    ret = a + b;
@ -68,7 +68,7 @@ static inline opus_int32 silk_ADD32_(opus_int32 a, opus_int32 b, char *file, int

 #undef silk_ADD64
 #define silk_ADD64(a,b) silk_ADD64_((a), (b), __FILE__, __LINE__)
-static inline opus_int64 silk_ADD64_(opus_int64 a, opus_int64 b, char *file, int line){
+static OPUS_INLINE opus_int64 silk_ADD64_(opus_int64 a, opus_int64 b, char *file, int line){
    opus_int64 ret;

    ret = a + b;
@ -84,7 +84,7 @@ static inline opus_int64 silk_ADD64_(opus_int64 a, opus_int64 b, char *file, int

 #undef silk_SUB16
 #define silk_SUB16(a,b) silk_SUB16_((a), (b), __FILE__, __LINE__)
-static inline opus_int16 silk_SUB16_(opus_int16 a, opus_int16 b, char *file, int line){
+static OPUS_INLINE opus_int16 silk_SUB16_(opus_int16 a, opus_int16 b, char *file, int line){
    opus_int16 ret;

    ret = a - b;
@ -100,7 +100,7 @@ static inline opus_int16 silk_SUB16_(opus_int16 a, opus_int16 b, char *file, int

 #undef silk_SUB32
 #define silk_SUB32(a,b) silk_SUB32_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_SUB32_(opus_int32 a, opus_int32 b, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SUB32_(opus_int32 a, opus_int32 b, char *file, int line){
    opus_int32 ret;

    ret = a - b;
@ -116,7 +116,7 @@ static inline opus_int32 silk_SUB32_(opus_int32 a, opus_int32 b, char *file, int

 #undef silk_SUB64
 #define silk_SUB64(a,b) silk_SUB64_((a), (b), __FILE__, __LINE__)
-static inline opus_int64 silk_SUB64_(opus_int64 a, opus_int64 b, char *file, int line){
+static OPUS_INLINE opus_int64 silk_SUB64_(opus_int64 a, opus_int64 b, char *file, int line){
    opus_int64 ret;

    ret = a - b;
@ -132,7 +132,7 @@ static inline opus_int64 silk_SUB64_(opus_int64 a, opus_int64 b, char *file, int

 #undef silk_ADD_SAT16
 #define silk_ADD_SAT16(a,b) silk_ADD_SAT16_((a), (b), __FILE__, __LINE__)
-static inline opus_int16 silk_ADD_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line) {
+static OPUS_INLINE opus_int16 silk_ADD_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line) {
    opus_int16 res;
    res = (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a16), (b16) ) );
    if ( res != silk_SAT16( (opus_int32)a16 + (opus_int32)b16 ) )
@ -147,7 +147,7 @@ static inline opus_int16 silk_ADD_SAT16_( opus_int16 a16, opus_int16 b16, char *

 #undef silk_ADD_SAT32
 #define silk_ADD_SAT32(a,b) silk_ADD_SAT32_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_ADD_SAT32_(opus_int32 a32, opus_int32 b32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_ADD_SAT32_(opus_int32 a32, opus_int32 b32, char *file, int line){
    opus_int32 res;
    res =   ((((opus_uint32)(a32) + (opus_uint32)(b32)) & 0x80000000) == 0 ?       \
            ((((a32) & (b32)) & 0x80000000) != 0 ? silk_int32_MIN : (a32)+(b32)) : \
@ -164,7 +164,7 @@ static inline opus_int32 silk_ADD_SAT32_(opus_int32 a32, opus_int32 b32, char *f

 #undef silk_ADD_SAT64
 #define silk_ADD_SAT64(a,b) silk_ADD_SAT64_((a), (b), __FILE__, __LINE__)
-static inline opus_int64 silk_ADD_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line) {
+static OPUS_INLINE opus_int64 silk_ADD_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line) {
    opus_int64 res;
    int        fail = 0;
    res =   ((((a64) + (b64)) & 0x8000000000000000LL) == 0 ?                                 \
@ -193,7 +193,7 @@ static inline opus_int64 silk_ADD_SAT64_( opus_int64 a64, opus_int64 b64, char *

 #undef silk_SUB_SAT16
 #define silk_SUB_SAT16(a,b) silk_SUB_SAT16_((a), (b), __FILE__, __LINE__)
-static inline opus_int16 silk_SUB_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line ) {
+static OPUS_INLINE opus_int16 silk_SUB_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line ) {
    opus_int16 res;
    res = (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a16), (b16) ) );
    if ( res != silk_SAT16( (opus_int32)a16 - (opus_int32)b16 ) )
@ -208,7 +208,7 @@ static inline opus_int16 silk_SUB_SAT16_( opus_int16 a16, opus_int16 b16, char *

 #undef silk_SUB_SAT32
 #define silk_SUB_SAT32(a,b) silk_SUB_SAT32_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_SUB_SAT32_( opus_int32 a32, opus_int32 b32, char *file, int line ) {
+static OPUS_INLINE opus_int32 silk_SUB_SAT32_( opus_int32 a32, opus_int32 b32, char *file, int line ) {
    opus_int32 res;
    res =   ((((opus_uint32)(a32)-(opus_uint32)(b32)) & 0x80000000) == 0 ?                \
            (( (a32) & ((b32)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a32)-(b32)) : \
@ -225,7 +225,7 @@ static inline opus_int32 silk_SUB_SAT32_( opus_int32 a32, opus_int32 b32, char *

 #undef silk_SUB_SAT64
 #define silk_SUB_SAT64(a,b) silk_SUB_SAT64_((a), (b), __FILE__, __LINE__)
-static inline opus_int64 silk_SUB_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line ) {
+static OPUS_INLINE opus_int64 silk_SUB_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line ) {
    opus_int64 res;
    int        fail = 0;
    res =   ((((a64)-(b64)) & 0x8000000000000000LL) == 0 ?                                                    \
@ -254,7 +254,7 @@ static inline opus_int64 silk_SUB_SAT64_( opus_int64 a64, opus_int64 b64, char *

 #undef silk_MUL
 #define silk_MUL(a,b) silk_MUL_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_MUL_(opus_int32 a32, opus_int32 b32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_MUL_(opus_int32 a32, opus_int32 b32, char *file, int line){
    opus_int32 ret;
    opus_int64 ret64;
    ret = a32 * b32;
@ -271,7 +271,7 @@ static inline opus_int32 silk_MUL_(opus_int32 a32, opus_int32 b32, char *file, i

 #undef silk_MUL_uint
 #define silk_MUL_uint(a,b) silk_MUL_uint_((a), (b), __FILE__, __LINE__)
-static inline opus_uint32 silk_MUL_uint_(opus_uint32 a32, opus_uint32 b32, char *file, int line){
+static OPUS_INLINE opus_uint32 silk_MUL_uint_(opus_uint32 a32, opus_uint32 b32, char *file, int line){
    opus_uint32 ret;
    ret = a32 * b32;
    if ( (opus_uint64)ret != (opus_uint64)a32 * (opus_uint64)b32 )
@ -286,7 +286,7 @@ static inline opus_uint32 silk_MUL_uint_(opus_uint32 a32, opus_uint32 b32, char

 #undef silk_MLA
 #define silk_MLA(a,b,c) silk_MLA_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_MLA_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_MLA_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
    opus_int32 ret;
    ret = a32 + b32 * c32;
    if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int64)c32 )
@ -301,7 +301,7 @@ static inline opus_int32 silk_MLA_(opus_int32 a32, opus_int32 b32, opus_int32 c3

 #undef silk_MLA_uint
 #define silk_MLA_uint(a,b,c) silk_MLA_uint_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_MLA_uint_(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_MLA_uint_(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32, char *file, int line){
    opus_uint32 ret;
    ret = a32 + b32 * c32;
    if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int64)c32 )
@ -316,7 +316,7 @@ static inline opus_int32 silk_MLA_uint_(opus_uint32 a32, opus_uint32 b32, opus_u

 #undef silk_SMULWB
 #define silk_SMULWB(a,b) silk_SMULWB_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_SMULWB_(opus_int32 a32, opus_int32 b32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SMULWB_(opus_int32 a32, opus_int32 b32, char *file, int line){
    opus_int32 ret;
    ret = (a32 >> 16) * (opus_int32)((opus_int16)b32) + (((a32 & 0x0000FFFF) * (opus_int32)((opus_int16)b32)) >> 16);
    if ( (opus_int64)ret != ((opus_int64)a32 * (opus_int16)b32) >> 16 )
@ -331,7 +331,7 @@ static inline opus_int32 silk_SMULWB_(opus_int32 a32, opus_int32 b32, char *file

 #undef silk_SMLAWB
 #define silk_SMLAWB(a,b,c) silk_SMLAWB_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_SMLAWB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SMLAWB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
    opus_int32 ret;
    ret = silk_ADD32( a32, silk_SMULWB( b32, c32 ) );
    if ( silk_ADD32( a32, silk_SMULWB( b32, c32 ) ) != silk_ADD_SAT32( a32, silk_SMULWB( b32, c32 ) ) )
@ -346,7 +346,7 @@ static inline opus_int32 silk_SMLAWB_(opus_int32 a32, opus_int32 b32, opus_int32

 #undef silk_SMULWT
 #define silk_SMULWT(a,b) silk_SMULWT_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_SMULWT_(opus_int32 a32, opus_int32 b32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SMULWT_(opus_int32 a32, opus_int32 b32, char *file, int line){
    opus_int32 ret;
    ret = (a32 >> 16) * (b32 >> 16) + (((a32 & 0x0000FFFF) * (b32 >> 16)) >> 16);
    if ( (opus_int64)ret != ((opus_int64)a32 * (b32 >> 16)) >> 16 )
@ -361,7 +361,7 @@ static inline opus_int32 silk_SMULWT_(opus_int32 a32, opus_int32 b32, char *file

 #undef silk_SMLAWT
 #define silk_SMLAWT(a,b,c) silk_SMLAWT_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_SMLAWT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SMLAWT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
    opus_int32 ret;
    ret = a32 + ((b32 >> 16) * (c32 >> 16)) + (((b32 & 0x0000FFFF) * ((c32 >> 16)) >> 16));
    if ( (opus_int64)ret != (opus_int64)a32 + (((opus_int64)b32 * (c32 >> 16)) >> 16) )
@ -376,7 +376,7 @@ static inline opus_int32 silk_SMLAWT_(opus_int32 a32, opus_int32 b32, opus_int32

 #undef silk_SMULL
 #define silk_SMULL(a,b) silk_SMULL_((a), (b), __FILE__, __LINE__)
-static inline opus_int64 silk_SMULL_(opus_int64 a64, opus_int64 b64, char *file, int line){
+static OPUS_INLINE opus_int64 silk_SMULL_(opus_int64 a64, opus_int64 b64, char *file, int line){
    opus_int64 ret64;
    int        fail = 0;
    ret64 = a64 * b64;
@ -398,7 +398,7 @@ static inline opus_int64 silk_SMULL_(opus_int64 a64, opus_int64 b64, char *file,
 /* no checking needed for silk_SMULBB */
 #undef silk_SMLABB
 #define silk_SMLABB(a,b,c) silk_SMLABB_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_SMLABB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SMLABB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
    opus_int32 ret;
    ret = a32 + (opus_int32)((opus_int16)b32) * (opus_int32)((opus_int16)c32);
    if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int16)c32 )
@ -414,7 +414,7 @@ static inline opus_int32 silk_SMLABB_(opus_int32 a32, opus_int32 b32, opus_int32
 /* no checking needed for silk_SMULBT */
 #undef silk_SMLABT
 #define silk_SMLABT(a,b,c) silk_SMLABT_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_SMLABT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SMLABT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
    opus_int32 ret;
    ret = a32 + ((opus_int32)((opus_int16)b32)) * (c32 >> 16);
    if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (c32 >> 16) )
@ -430,7 +430,7 @@ static inline opus_int32 silk_SMLABT_(opus_int32 a32, opus_int32 b32, opus_int32
 /* no checking needed for silk_SMULTT */
 #undef silk_SMLATT
 #define silk_SMLATT(a,b,c) silk_SMLATT_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_SMLATT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SMLATT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
    opus_int32 ret;
    ret = a32 + (b32 >> 16) * (c32 >> 16);
    if ( (opus_int64)ret != (opus_int64)a32 + (b32 >> 16) * (c32 >> 16) )
@ -445,7 +445,7 @@ static inline opus_int32 silk_SMLATT_(opus_int32 a32, opus_int32 b32, opus_int32

 #undef silk_SMULWW
 #define silk_SMULWW(a,b) silk_SMULWW_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_SMULWW_(opus_int32 a32, opus_int32 b32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SMULWW_(opus_int32 a32, opus_int32 b32, char *file, int line){
    opus_int32 ret, tmp1, tmp2;
    opus_int64 ret64;
    int        fail = 0;
@ -476,7 +476,7 @@ static inline opus_int32 silk_SMULWW_(opus_int32 a32, opus_int32 b32, char *file

 #undef silk_SMLAWW
 #define silk_SMLAWW(a,b,c) silk_SMLAWW_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_SMLAWW_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SMLAWW_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
    opus_int32 ret, tmp;

    tmp = silk_SMULWW( b32, c32 );
@ -505,7 +505,7 @@ static inline opus_int32 silk_SMLAWW_(opus_int32 a32, opus_int32 b32, opus_int32

 #undef silk_DIV32
 #define silk_DIV32(a,b) silk_DIV32_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_DIV32_(opus_int32 a32, opus_int32 b32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_DIV32_(opus_int32 a32, opus_int32 b32, char *file, int line){
    if ( b32 == 0 )
    {
        fprintf (stderr, "silk_DIV32(%d, %d) in %s: line %d\n", a32, b32, file, line);
@ -518,7 +518,7 @@ static inline opus_int32 silk_DIV32_(opus_int32 a32, opus_int32 b32, char *file,

 #undef silk_DIV32_16
 #define silk_DIV32_16(a,b) silk_DIV32_16_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_DIV32_16_(opus_int32 a32, opus_int32 b32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_DIV32_16_(opus_int32 a32, opus_int32 b32, char *file, int line){
    int fail = 0;
    fail |= b32 == 0;
    fail |= b32 > silk_int16_MAX;
@ -544,7 +544,7 @@ static inline opus_int32 silk_DIV32_16_(opus_int32 a32, opus_int32 b32, char *fi

 #undef silk_LSHIFT8
 #define silk_LSHIFT8(a,b) silk_LSHIFT8_((a), (b), __FILE__, __LINE__)
-static inline opus_int8 silk_LSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int8 silk_LSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){
    opus_int8 ret;
    int       fail = 0;
    ret = a << shift;
@ -563,7 +563,7 @@ static inline opus_int8 silk_LSHIFT8_(opus_int8 a, opus_int32 shift, char *file,

 #undef silk_LSHIFT16
 #define silk_LSHIFT16(a,b) silk_LSHIFT16_((a), (b), __FILE__, __LINE__)
-static inline opus_int16 silk_LSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int16 silk_LSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){
    opus_int16 ret;
    int        fail = 0;
    ret = a << shift;
@ -582,7 +582,7 @@ static inline opus_int16 silk_LSHIFT16_(opus_int16 a, opus_int32 shift, char *fi

 #undef silk_LSHIFT32
 #define silk_LSHIFT32(a,b) silk_LSHIFT32_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_LSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int32 silk_LSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){
    opus_int32 ret;
    int        fail = 0;
    ret = a << shift;
@ -601,7 +601,7 @@ static inline opus_int32 silk_LSHIFT32_(opus_int32 a, opus_int32 shift, char *fi

 #undef silk_LSHIFT64
 #define silk_LSHIFT64(a,b) silk_LSHIFT64_((a), (b), __FILE__, __LINE__)
-static inline opus_int64 silk_LSHIFT64_(opus_int64 a, opus_int shift, char *file, int line){
+static OPUS_INLINE opus_int64 silk_LSHIFT64_(opus_int64 a, opus_int shift, char *file, int line){
    opus_int64 ret;
    int        fail = 0;
    ret = a << shift;
@ -620,7 +620,7 @@ static inline opus_int64 silk_LSHIFT64_(opus_int64 a, opus_int shift, char *file

 #undef silk_LSHIFT_ovflw
 #define silk_LSHIFT_ovflw(a,b) silk_LSHIFT_ovflw_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_LSHIFT_ovflw_(opus_int32 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int32 silk_LSHIFT_ovflw_(opus_int32 a, opus_int32 shift, char *file, int line){
    if ( (shift < 0) || (shift >= 32) ) /* no check for overflow */
    {
        fprintf (stderr, "silk_LSHIFT_ovflw(%d, %d) in %s: line %d\n", a, shift, file, line);
@ -633,7 +633,7 @@ static inline opus_int32 silk_LSHIFT_ovflw_(opus_int32 a, opus_int32 shift, char

 #undef silk_LSHIFT_uint
 #define silk_LSHIFT_uint(a,b) silk_LSHIFT_uint_((a), (b), __FILE__, __LINE__)
-static inline opus_uint32 silk_LSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_uint32 silk_LSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){
    opus_uint32 ret;
    ret = a << shift;
    if ( (shift < 0) || ((opus_int64)ret != ((opus_int64)a) << shift))
@ -648,7 +648,7 @@ static inline opus_uint32 silk_LSHIFT_uint_(opus_uint32 a, opus_int32 shift, cha

 #undef silk_RSHIFT8
 #define silk_RSHITF8(a,b) silk_RSHIFT8_((a), (b), __FILE__, __LINE__)
-static inline opus_int8 silk_RSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int8 silk_RSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){
    if ( (shift < 0) || (shift>=8) )
    {
        fprintf (stderr, "silk_RSHITF8(%d, %d) in %s: line %d\n", a, shift, file, line);
@ -661,7 +661,7 @@ static inline opus_int8 silk_RSHIFT8_(opus_int8 a, opus_int32 shift, char *file,

 #undef silk_RSHIFT16
 #define silk_RSHITF16(a,b) silk_RSHIFT16_((a), (b), __FILE__, __LINE__)
-static inline opus_int16 silk_RSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int16 silk_RSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){
    if ( (shift < 0) || (shift>=16) )
    {
        fprintf (stderr, "silk_RSHITF16(%d, %d) in %s: line %d\n", a, shift, file, line);
@ -674,7 +674,7 @@ static inline opus_int16 silk_RSHIFT16_(opus_int16 a, opus_int32 shift, char *fi

 #undef silk_RSHIFT32
 #define silk_RSHIFT32(a,b) silk_RSHIFT32_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_RSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int32 silk_RSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){
    if ( (shift < 0) || (shift>=32) )
    {
        fprintf (stderr, "silk_RSHITF32(%d, %d) in %s: line %d\n", a, shift, file, line);
@ -687,7 +687,7 @@ static inline opus_int32 silk_RSHIFT32_(opus_int32 a, opus_int32 shift, char *fi

 #undef silk_RSHIFT64
 #define silk_RSHIFT64(a,b) silk_RSHIFT64_((a), (b), __FILE__, __LINE__)
-static inline opus_int64 silk_RSHIFT64_(opus_int64 a, opus_int64 shift, char *file, int line){
+static OPUS_INLINE opus_int64 silk_RSHIFT64_(opus_int64 a, opus_int64 shift, char *file, int line){
    if ( (shift < 0) || (shift>=64) )
    {
        fprintf (stderr, "silk_RSHITF64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)shift, file, line);
@ -700,7 +700,7 @@ static inline opus_int64 silk_RSHIFT64_(opus_int64 a, opus_int64 shift, char *fi

 #undef silk_RSHIFT_uint
 #define silk_RSHIFT_uint(a,b) silk_RSHIFT_uint_((a), (b), __FILE__, __LINE__)
-static inline opus_uint32 silk_RSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_uint32 silk_RSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){
    if ( (shift < 0) || (shift>32) )
    {
        fprintf (stderr, "silk_RSHIFT_uint(%u, %d) in %s: line %d\n", a, shift, file, line);
@ -713,7 +713,7 @@ static inline opus_uint32 silk_RSHIFT_uint_(opus_uint32 a, opus_int32 shift, cha

 #undef silk_ADD_LSHIFT
 #define silk_ADD_LSHIFT(a,b,c) silk_ADD_LSHIFT_((a), (b), (c), __FILE__, __LINE__)
-static inline int silk_ADD_LSHIFT_(int a, int b, int shift, char *file, int line){
+static OPUS_INLINE int silk_ADD_LSHIFT_(int a, int b, int shift, char *file, int line){
    opus_int16 ret;
    ret = a + (b << shift);
    if ( (shift < 0) || (shift>15) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) )
@ -728,7 +728,7 @@ static inline int silk_ADD_LSHIFT_(int a, int b, int shift, char *file, int line

 #undef silk_ADD_LSHIFT32
 #define silk_ADD_LSHIFT32(a,b,c) silk_ADD_LSHIFT32_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_ADD_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int32 silk_ADD_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
    opus_int32 ret;
    ret = a + (b << shift);
    if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) )
@ -743,7 +743,7 @@ static inline opus_int32 silk_ADD_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int

 #undef silk_ADD_LSHIFT_uint
 #define silk_ADD_LSHIFT_uint(a,b,c) silk_ADD_LSHIFT_uint_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_uint32 silk_ADD_LSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_uint32 silk_ADD_LSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){
    opus_uint32 ret;
    ret = a + (b << shift);
    if ( (shift < 0) || (shift>32) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) )
@ -758,7 +758,7 @@ static inline opus_uint32 silk_ADD_LSHIFT_uint_(opus_uint32 a, opus_uint32 b, op

 #undef silk_ADD_RSHIFT
 #define silk_ADD_RSHIFT(a,b,c) silk_ADD_RSHIFT_((a), (b), (c), __FILE__, __LINE__)
-static inline int silk_ADD_RSHIFT_(int a, int b, int shift, char *file, int line){
+static OPUS_INLINE int silk_ADD_RSHIFT_(int a, int b, int shift, char *file, int line){
    opus_int16 ret;
    ret = a + (b >> shift);
    if ( (shift < 0) || (shift>15) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) )
@ -773,7 +773,7 @@ static inline int silk_ADD_RSHIFT_(int a, int b, int shift, char *file, int line

 #undef silk_ADD_RSHIFT32
 #define silk_ADD_RSHIFT32(a,b,c) silk_ADD_RSHIFT32_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_ADD_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int32 silk_ADD_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
    opus_int32 ret;
    ret = a + (b >> shift);
    if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) )
@ -788,7 +788,7 @@ static inline opus_int32 silk_ADD_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int

 #undef silk_ADD_RSHIFT_uint
 #define silk_ADD_RSHIFT_uint(a,b,c) silk_ADD_RSHIFT_uint_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_uint32 silk_ADD_RSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_uint32 silk_ADD_RSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){
    opus_uint32 ret;
    ret = a + (b >> shift);
    if ( (shift < 0) || (shift>32) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) )
@ -803,7 +803,7 @@ static inline opus_uint32 silk_ADD_RSHIFT_uint_(opus_uint32 a, opus_uint32 b, op

 #undef silk_SUB_LSHIFT32
 #define silk_SUB_LSHIFT32(a,b,c) silk_SUB_LSHIFT32_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_SUB_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SUB_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
    opus_int32 ret;
    ret = a - (b << shift);
    if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (((opus_int64)b) << shift)) )
@ -818,7 +818,7 @@ static inline opus_int32 silk_SUB_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int

 #undef silk_SUB_RSHIFT32
 #define silk_SUB_RSHIFT32(a,b,c) silk_SUB_RSHIFT32_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_SUB_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SUB_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
    opus_int32 ret;
    ret = a - (b >> shift);
    if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (((opus_int64)b) >> shift)) )
@ -833,7 +833,7 @@ static inline opus_int32 silk_SUB_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int

 #undef silk_RSHIFT_ROUND
 #define silk_RSHIFT_ROUND(a,b) silk_RSHIFT_ROUND_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_RSHIFT_ROUND_(opus_int32 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int32 silk_RSHIFT_ROUND_(opus_int32 a, opus_int32 shift, char *file, int line){
    opus_int32 ret;
    ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1;
    /* the marco definition can't handle a shift of zero */
@ -849,7 +849,7 @@ static inline opus_int32 silk_RSHIFT_ROUND_(opus_int32 a, opus_int32 shift, char

 #undef silk_RSHIFT_ROUND64
 #define silk_RSHIFT_ROUND64(a,b) silk_RSHIFT_ROUND64_((a), (b), __FILE__, __LINE__)
-static inline opus_int64 silk_RSHIFT_ROUND64_(opus_int64 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int64 silk_RSHIFT_ROUND64_(opus_int64 a, opus_int32 shift, char *file, int line){
    opus_int64 ret;
    /* the marco definition can't handle a shift of zero */
    if ( (shift <= 0) || (shift>=64) )
@ -865,14 +865,14 @@ static inline opus_int64 silk_RSHIFT_ROUND64_(opus_int64 a, opus_int32 shift, ch

 /* silk_abs is used on floats also, so doesn't work... */
 /*#undef silk_abs
-static inline opus_int32 silk_abs(opus_int32 a){
+static OPUS_INLINE opus_int32 silk_abs(opus_int32 a){
    silk_assert(a != 0x80000000);
    return (((a) >  0)  ? (a) : -(a));            // Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN
 }*/

 #undef silk_abs_int64
 #define silk_abs_int64(a) silk_abs_int64_((a), __FILE__, __LINE__)
-static inline opus_int64 silk_abs_int64_(opus_int64 a, char *file, int line){
+static OPUS_INLINE opus_int64 silk_abs_int64_(opus_int64 a, char *file, int line){
    if ( a == silk_int64_MIN )
    {
        fprintf (stderr, "silk_abs_int64(%lld) in %s: line %d\n", (long long)a, file, line);
@ -885,7 +885,7 @@ static inline opus_int64 silk_abs_int64_(opus_int64 a, char *file, int line){

 #undef silk_abs_int32
 #define silk_abs_int32(a) silk_abs_int32_((a), __FILE__, __LINE__)
-static inline opus_int32 silk_abs_int32_(opus_int32 a, char *file, int line){
+static OPUS_INLINE opus_int32 silk_abs_int32_(opus_int32 a, char *file, int line){
    if ( a == silk_int32_MIN )
    {
        fprintf (stderr, "silk_abs_int32(%d) in %s: line %d\n", a, file, line);
@ -898,7 +898,7 @@ static inline opus_int32 silk_abs_int32_(opus_int32 a, char *file, int line){

 #undef silk_CHECK_FIT8
 #define silk_CHECK_FIT8(a) silk_CHECK_FIT8_((a), __FILE__, __LINE__)
-static inline opus_int8 silk_CHECK_FIT8_( opus_int64 a, char *file, int line ){
+static OPUS_INLINE opus_int8 silk_CHECK_FIT8_( opus_int64 a, char *file, int line ){
    opus_int8 ret;
    ret = (opus_int8)a;
    if ( (opus_int64)ret != a )
@ -913,7 +913,7 @@ static inline opus_int8 silk_CHECK_FIT8_( opus_int64 a, char *file, int line ){

 #undef silk_CHECK_FIT16
 #define silk_CHECK_FIT16(a) silk_CHECK_FIT16_((a), __FILE__, __LINE__)
-static inline opus_int16 silk_CHECK_FIT16_( opus_int64 a, char *file, int line ){
+static OPUS_INLINE opus_int16 silk_CHECK_FIT16_( opus_int64 a, char *file, int line ){
    opus_int16 ret;
    ret = (opus_int16)a;
    if ( (opus_int64)ret != a )
@ -928,7 +928,7 @@ static inline opus_int16 silk_CHECK_FIT16_( opus_int64 a, char *file, int line )

 #undef silk_CHECK_FIT32
 #define silk_CHECK_FIT32(a) silk_CHECK_FIT32_((a), __FILE__, __LINE__)
-static inline opus_int32 silk_CHECK_FIT32_( opus_int64 a, char *file, int line ){
+static OPUS_INLINE opus_int32 silk_CHECK_FIT32_( opus_int64 a, char *file, int line ){
    opus_int32 ret;
    ret = (opus_int32)a;
    if ( (opus_int64)ret != a )
--- a/media/libopus/silk/NLSF2A.c
+++ b/media/libopus/silk/NLSF2A.c
@ -41,7 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define QA      16

 /* helper function for NLSF2A(..) */
-static inline void silk_NLSF2A_find_poly(
+static OPUS_INLINE void silk_NLSF2A_find_poly(
    opus_int32          *out,      /* O    intermediate polynomial, QA [dd+1]        */
    const opus_int32    *cLSF,     /* I    vector of interleaved 2*cos(LSFs), QA [d] */
    opus_int            dd         /* I    polynomial order (= 1/2 * filter order)   */
--- a/media/libopus/silk/NLSF_decode.c
+++ b/media/libopus/silk/NLSF_decode.c
@ -32,7 +32,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "main.h"

 /* Predictive dequantizer for NLSF residuals */
-static inline void silk_NLSF_residual_dequant(               /* O    Returns RD value in Q30                     */
+static OPUS_INLINE void silk_NLSF_residual_dequant(               /* O    Returns RD value in Q30                     */
          opus_int16         x_Q10[],                        /* O    Output [ order ]                            */
    const opus_int8          indices[],                      /* I    Quantization indices [ order ]              */
    const opus_uint8         pred_coef_Q8[],                 /* I    Backward predictor coefs [ order ]          */
--- a/media/libopus/silk/NLSF_del_dec_quant.c
+++ b/media/libopus/silk/NLSF_del_dec_quant.c
@ -121,7 +121,7 @@ opus_int32 silk_NLSF_del_dec_quant(                             /* O    Returns
            RD_Q25[ j + nStates ] = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate1_Q5 );
        }

-        if( nStates < NLSF_QUANT_DEL_DEC_STATES ) {
+        if( nStates <= ( NLSF_QUANT_DEL_DEC_STATES >> 1 ) ) {
            /* double number of states and copy */
            for( j = 0; j < nStates; j++ ) {
                ind[ j + nStates ][ i ] = ind[ j ][ i ] + 1;
--- a/media/libopus/silk/NSQ.c
+++ b/media/libopus/silk/NSQ.c
@ -32,7 +32,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "main.h"
 #include "stack_alloc.h"

-static inline void silk_nsq_scale_states(
+static OPUS_INLINE void silk_nsq_scale_states(
    const silk_encoder_state *psEncC,           /* I    Encoder State                   */
    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
    const opus_int32    x_Q3[],                 /* I    input in Q3                     */
@ -46,7 +46,7 @@ static inline void silk_nsq_scale_states(
    const opus_int      signal_type             /* I    Signal type                     */
 );

-static inline void silk_noise_shape_quantizer(
+static OPUS_INLINE void silk_noise_shape_quantizer(
    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
    opus_int            signalType,             /* I    Signal type                     */
    const opus_int32    x_sc_Q10[],             /* I                                    */
@ -172,7 +172,7 @@ void silk_NSQ(
 /***********************************/
 /* silk_noise_shape_quantizer  */
 /***********************************/
-static inline void silk_noise_shape_quantizer(
+static OPUS_INLINE void silk_noise_shape_quantizer(
    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
    opus_int            signalType,             /* I    Signal type                     */
    const opus_int32    x_sc_Q10[],             /* I                                    */
@ -370,7 +370,7 @@ static inline void silk_noise_shape_quantizer(
    silk_memcpy( NSQ->sLPC_Q14, &NSQ->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
 }

-static inline void silk_nsq_scale_states(
+static OPUS_INLINE void silk_nsq_scale_states(
    const silk_encoder_state *psEncC,           /* I    Encoder State                   */
    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
    const opus_int32    x_Q3[],                 /* I    input in Q3                     */
--- a/media/libopus/silk/NSQ_del_dec.c
+++ b/media/libopus/silk/NSQ_del_dec.c
@ -57,7 +57,7 @@ typedef struct {

 typedef NSQ_sample_struct  NSQ_sample_pair[ 2 ];

-static inline void silk_nsq_del_dec_scale_states(
+static OPUS_INLINE void silk_nsq_del_dec_scale_states(
    const silk_encoder_state *psEncC,               /* I    Encoder State                       */
    silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
    NSQ_del_dec_struct  psDelDec[],                 /* I/O  Delayed decision states             */
@ -77,7 +77,7 @@ static inline void silk_nsq_del_dec_scale_states(
 /******************************************/
 /* Noise shape quantizer for one subframe */
 /******************************************/
-static inline void silk_noise_shape_quantizer_del_dec(
+static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
    opus_int            signalType,             /* I    Signal type                         */
@ -303,7 +303,7 @@ void silk_NSQ_del_dec(
 /******************************************/
 /* Noise shape quantizer for one subframe */
 /******************************************/
-static inline void silk_noise_shape_quantizer_del_dec(
+static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
    opus_int            signalType,             /* I    Signal type                         */
@ -630,7 +630,7 @@ static inline void silk_noise_shape_quantizer_del_dec(
    RESTORE_STACK;
 }

-static inline void silk_nsq_del_dec_scale_states(
+static OPUS_INLINE void silk_nsq_del_dec_scale_states(
    const silk_encoder_state *psEncC,               /* I    Encoder State                       */
    silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
    NSQ_del_dec_struct  psDelDec[],                 /* I/O  Delayed decision states             */
--- a/media/libopus/silk/PLC.c
+++ b/media/libopus/silk/PLC.c
@ -38,12 +38,12 @@ static const opus_int16 HARM_ATT_Q15[NB_ATT]              = { 32440, 31130 }; /*
 static const opus_int16 PLC_RAND_ATTENUATE_V_Q15[NB_ATT]  = { 31130, 26214 }; /* 0.95, 0.8 */
 static const opus_int16 PLC_RAND_ATTENUATE_UV_Q15[NB_ATT] = { 32440, 29491 }; /* 0.99, 0.9 */

-static inline void silk_PLC_update(
+static OPUS_INLINE void silk_PLC_update(
    silk_decoder_state                  *psDec,             /* I/O Decoder state        */
    silk_decoder_control                *psDecCtrl          /* I/O Decoder control      */
 );

-static inline void silk_PLC_conceal(
+static OPUS_INLINE void silk_PLC_conceal(
    silk_decoder_state                  *psDec,             /* I/O Decoder state        */
    silk_decoder_control                *psDecCtrl,         /* I/O Decoder control      */
    opus_int16                          frame[]             /* O LPC residual signal    */
@ -92,7 +92,7 @@ void silk_PLC(
 /**************************************************/
 /* Update state of PLC                            */
 /**************************************************/
-static inline void silk_PLC_update(
+static OPUS_INLINE void silk_PLC_update(
    silk_decoder_state                  *psDec,             /* I/O Decoder state        */
    silk_decoder_control                *psDecCtrl          /* I/O Decoder control      */
 )
@ -165,7 +165,7 @@ static inline void silk_PLC_update(
    psPLC->nb_subfr = psDec->nb_subfr;
 }

-static inline void silk_PLC_conceal(
+static OPUS_INLINE void silk_PLC_conceal(
    silk_decoder_state                  *psDec,             /* I/O Decoder state        */
    silk_decoder_control                *psDecCtrl,         /* I/O Decoder control      */
    opus_int16                          frame[]             /* O LPC residual signal    */
--- a/media/libopus/silk/SigProc_FIX.h
+++ b/media/libopus/silk/SigProc_FIX.h
@ -227,7 +227,8 @@ void silk_autocorr(
    opus_int                    *scale,             /* O    Scaling of the correlation vector                           */
    const opus_int16            *inputData,         /* I    Input data to correlate                                     */
    const opus_int              inputDataSize,      /* I    Length of input                                             */
-    const opus_int              correlationCount    /* I    Number of correlation taps to compute                       */
+    const opus_int              correlationCount,   /* I    Number of correlation taps to compute                       */
+    int                         arch                /* I    Run-time architecture                                       */
 );

 void silk_decode_pitch(
@ -249,7 +250,8 @@ opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0
    const opus_int              search_thres2_Q13,  /* I    Final threshold for lag candidates 0 - 1                    */
    const opus_int              Fs_kHz,             /* I    Sample frequency (kHz)                                      */
    const opus_int              complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
-    const opus_int              nb_subfr            /* I    number of 5 ms subframes                                    */
+    const opus_int              nb_subfr,           /* I    number of 5 ms subframes                                    */
+    int                         arch                /* I    Run-time architecture                                       */
 );

 /* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients      */
@ -309,7 +311,8 @@ void silk_burg_modified(
    const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
    const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
    const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
-    const opus_int              D                   /* I    Order                                                       */
+    const opus_int              D,                  /* I    Order                                                       */
+    int                         arch                /* I    Run-time architecture                                       */
 );

 /* Copy and multiply a vector by a constant */
@ -358,8 +361,8 @@ opus_int64 silk_inner_prod16_aligned_64(
 /* Rotate a32 right by 'rot' bits. Negative rot values result in rotating
   left. Output is 32bit int.
   Note: contemporary compilers recognize the C expression below and
-   compile it into a 'ror' instruction if available. No need for inline ASM! */
-static inline opus_int32 silk_ROR32( opus_int32 a32, opus_int rot )
+   compile it into a 'ror' instruction if available. No need for OPUS_INLINE ASM! */
+static OPUS_INLINE opus_int32 silk_ROR32( opus_int32 a32, opus_int rot )
 {
    opus_uint32 x = (opus_uint32) a32;
    opus_uint32 r = (opus_uint32) rot;
@ -508,37 +511,37 @@ static inline opus_int32 silk_ROR32( opus_int32 a32, opus_int rot )
 #define SILK_FIX_CONST( C, Q )              ((opus_int32)((C) * ((opus_int64)1 << (Q)) + 0.5))

 /* silk_min() versions with typecast in the function call */
-static inline opus_int silk_min_int(opus_int a, opus_int b)
+static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b)
 {
    return (((a) < (b)) ? (a) : (b));
 }
-static inline opus_int16 silk_min_16(opus_int16 a, opus_int16 b)
+static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b)
 {
    return (((a) < (b)) ? (a) : (b));
 }
-static inline opus_int32 silk_min_32(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b)
 {
    return (((a) < (b)) ? (a) : (b));
 }
-static inline opus_int64 silk_min_64(opus_int64 a, opus_int64 b)
+static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b)
 {
    return (((a) < (b)) ? (a) : (b));
 }

 /* silk_min() versions with typecast in the function call */
-static inline opus_int silk_max_int(opus_int a, opus_int b)
+static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b)
 {
    return (((a) > (b)) ? (a) : (b));
 }
-static inline opus_int16 silk_max_16(opus_int16 a, opus_int16 b)
+static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b)
 {
    return (((a) > (b)) ? (a) : (b));
 }
-static inline opus_int32 silk_max_32(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b)
 {
    return (((a) > (b)) ? (a) : (b));
 }
-static inline opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
+static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
 {
    return (((a) > (b)) ? (a) : (b));
 }
@ -576,11 +579,11 @@ static inline opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
 #include "MacroCount.h"
 #include "MacroDebug.h"

-#ifdef ARMv4_ASM
+#ifdef OPUS_ARM_INLINE_ASM
 #include "arm/SigProc_FIX_armv4.h"
 #endif

-#ifdef ARMv5E_ASM
+#ifdef OPUS_ARM_INLINE_EDSP
 #include "arm/SigProc_FIX_armv5e.h"
 #endif

--- a/media/libopus/silk/VAD.c
+++ b/media/libopus/silk/VAD.c
@ -33,7 +33,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "stack_alloc.h"

 /* Silk VAD noise level estimation */
-static inline void silk_VAD_GetNoiseLevels(
+static OPUS_INLINE void silk_VAD_GetNoiseLevels(
    const opus_int32             pX[ VAD_N_BANDS ], /* I    subband energies                            */
    silk_VAD_state              *psSilk_VAD         /* I/O  Pointer to Silk VAD state                   */
 );
@ -296,7 +296,7 @@ opus_int silk_VAD_GetSA_Q8(                                     /* O    Return v
 /**************************/
 /* Noise level estimation */
 /**************************/
-static inline void silk_VAD_GetNoiseLevels(
+static OPUS_INLINE void silk_VAD_GetNoiseLevels(
    const opus_int32            pX[ VAD_N_BANDS ],  /* I    subband energies                            */
    silk_VAD_state              *psSilk_VAD         /* I/O  Pointer to Silk VAD state                   */
 )
--- a/media/libopus/silk/VQ_WMat_EC.c
+++ b/media/libopus/silk/VQ_WMat_EC.c
@ -35,15 +35,18 @@ POSSIBILITY OF SUCH DAMAGE.
 void silk_VQ_WMat_EC(
    opus_int8                   *ind,                           /* O    index of best codebook vector               */
    opus_int32                  *rate_dist_Q14,                 /* O    best weighted quant error + mu * rate       */
+    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
    const opus_int16            *in_Q14,                        /* I    input vector to be quantized                */
    const opus_int32            *W_Q18,                         /* I    weighting matrix                            */
    const opus_int8             *cb_Q7,                         /* I    codebook                                    */
+    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
    const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
    const opus_int              mu_Q9,                          /* I    tradeoff betw. weighted error and rate      */
+    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
    opus_int                    L                               /* I    number of vectors in codebook               */
 )
 {
-    opus_int   k;
+    opus_int   k, gain_tmp_Q7;
    const opus_int8 *cb_row_Q7;
    opus_int16 diff_Q14[ 5 ];
    opus_int32 sum1_Q14, sum2_Q16;
@ -52,6 +55,8 @@ void silk_VQ_WMat_EC(
    *rate_dist_Q14 = silk_int32_MAX;
    cb_row_Q7 = cb_Q7;
    for( k = 0; k < L; k++ ) {
+	    gain_tmp_Q7 = cb_gain_Q7[k];
+
        diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 );
        diff_Q14[ 1 ] = in_Q14[ 1 ] - silk_LSHIFT( cb_row_Q7[ 1 ], 7 );
        diff_Q14[ 2 ] = in_Q14[ 2 ] - silk_LSHIFT( cb_row_Q7[ 2 ], 7 );
@ -61,6 +66,9 @@ void silk_VQ_WMat_EC(
        /* Weighted rate */
        sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] );

+		/* Penalty for too large gain */
+		sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 );
+
        silk_assert( sum1_Q14 >= 0 );

        /* first row of W_Q18 */
@ -103,6 +111,7 @@ void silk_VQ_WMat_EC(
        if( sum1_Q14 < *rate_dist_Q14 ) {
            *rate_dist_Q14 = sum1_Q14;
            *ind = (opus_int8)k;
+			*gain_Q7 = gain_tmp_Q7;
        }

        /* Go to next cbk vector */
--- a/media/libopus/silk/arm/SigProc_FIX_armv4.h
+++ b/media/libopus/silk/arm/SigProc_FIX_armv4.h
@ -30,7 +30,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define SILK_SIGPROC_FIX_ARMv4_H

 #undef silk_MLA
-static inline opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b,
+static OPUS_INLINE opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b,
 opus_int32 c)
 {
  opus_int32 res;
--- a/media/libopus/silk/arm/SigProc_FIX_armv5e.h
+++ b/media/libopus/silk/arm/SigProc_FIX_armv5e.h
@ -30,7 +30,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define SILK_SIGPROC_FIX_ARMv5E_H

 #undef silk_SMULTT
-static inline opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b)
 {
  opus_int32 res;
  __asm__(
@ -44,7 +44,7 @@ static inline opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b)
 #define silk_SMULTT(a, b) (silk_SMULTT_armv5e(a, b))

 #undef silk_SMLATT
-static inline opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b,
+static OPUS_INLINE opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b,
 opus_int32 c)
 {
  opus_int32 res;
--- a/media/libopus/silk/arm/macros_armv4.h
+++ b/media/libopus/silk/arm/macros_armv4.h
@ -30,7 +30,7 @@ POSSIBILITY OF SUCH DAMAGE.

 /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
 #undef silk_SMULWB
-static inline opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b)
+static OPUS_INLINE opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b)
 {
  unsigned rd_lo;
  int rd_hi;
@ -50,7 +50,7 @@ static inline opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b)

 /* (a32 * (b32 >> 16)) >> 16 */
 #undef silk_SMULWT
-static inline opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b)
 {
  unsigned rd_lo;
  int rd_hi;
@ -70,7 +70,7 @@ static inline opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b)

 /* (a32 * b32) >> 16 */
 #undef silk_SMULWW
-static inline opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b)
 {
  unsigned rd_lo;
  int rd_hi;
@ -85,7 +85,7 @@ static inline opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b)
 #define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b))

 #undef silk_SMLAWW
-static inline opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b,
+static OPUS_INLINE opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b,
 opus_int32 c)
 {
  unsigned rd_lo;
--- a/media/libopus/silk/arm/macros_armv5e.h
+++ b/media/libopus/silk/arm/macros_armv5e.h
@ -31,7 +31,7 @@ POSSIBILITY OF SUCH DAMAGE.

 /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
 #undef silk_SMULWB
-static inline opus_int32 silk_SMULWB_armv5e(opus_int32 a, opus_int16 b)
+static OPUS_INLINE opus_int32 silk_SMULWB_armv5e(opus_int32 a, opus_int16 b)
 {
  int res;
  __asm__(
@ -46,7 +46,7 @@ static inline opus_int32 silk_SMULWB_armv5e(opus_int32 a, opus_int16 b)

 /* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
 #undef silk_SMLAWB
-static inline opus_int32 silk_SMLAWB_armv5e(opus_int32 a, opus_int32 b,
+static OPUS_INLINE opus_int32 silk_SMLAWB_armv5e(opus_int32 a, opus_int32 b,
 opus_int16 c)
 {
  int res;
@ -62,7 +62,7 @@ static inline opus_int32 silk_SMLAWB_armv5e(opus_int32 a, opus_int32 b,

 /* (a32 * (b32 >> 16)) >> 16 */
 #undef silk_SMULWT
-static inline opus_int32 silk_SMULWT_armv5e(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_SMULWT_armv5e(opus_int32 a, opus_int32 b)
 {
  int res;
  __asm__(
@ -77,7 +77,7 @@ static inline opus_int32 silk_SMULWT_armv5e(opus_int32 a, opus_int32 b)

 /* a32 + (b32 * (c32 >> 16)) >> 16 */
 #undef silk_SMLAWT
-static inline opus_int32 silk_SMLAWT_armv5e(opus_int32 a, opus_int32 b,
+static OPUS_INLINE opus_int32 silk_SMLAWT_armv5e(opus_int32 a, opus_int32 b,
 opus_int32 c)
 {
  int res;
@ -93,7 +93,7 @@ static inline opus_int32 silk_SMLAWT_armv5e(opus_int32 a, opus_int32 b,

 /* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */
 #undef silk_SMULBB
-static inline opus_int32 silk_SMULBB_armv5e(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_SMULBB_armv5e(opus_int32 a, opus_int32 b)
 {
  int res;
  __asm__(
@ -108,7 +108,7 @@ static inline opus_int32 silk_SMULBB_armv5e(opus_int32 a, opus_int32 b)

 /* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */
 #undef silk_SMLABB
-static inline opus_int32 silk_SMLABB_armv5e(opus_int32 a, opus_int32 b,
+static OPUS_INLINE opus_int32 silk_SMLABB_armv5e(opus_int32 a, opus_int32 b,
 opus_int32 c)
 {
  int res;
@ -124,7 +124,7 @@ static inline opus_int32 silk_SMLABB_armv5e(opus_int32 a, opus_int32 b,

 /* (opus_int32)((opus_int16)(a32)) * (b32 >> 16) */
 #undef silk_SMULBT
-static inline opus_int32 silk_SMULBT_armv5e(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_SMULBT_armv5e(opus_int32 a, opus_int32 b)
 {
  int res;
  __asm__(
@ -139,7 +139,7 @@ static inline opus_int32 silk_SMULBT_armv5e(opus_int32 a, opus_int32 b)

 /* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */
 #undef silk_SMLABT
-static inline opus_int32 silk_SMLABT_armv5e(opus_int32 a, opus_int32 b,
+static OPUS_INLINE opus_int32 silk_SMLABT_armv5e(opus_int32 a, opus_int32 b,
 opus_int32 c)
 {
  int res;
@ -155,7 +155,7 @@ static inline opus_int32 silk_SMLABT_armv5e(opus_int32 a, opus_int32 b,

 /* add/subtract with output saturated */
 #undef silk_ADD_SAT32
-static inline opus_int32 silk_ADD_SAT32_armv5e(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_ADD_SAT32_armv5e(opus_int32 a, opus_int32 b)
 {
  int res;
  __asm__(
@ -169,7 +169,7 @@ static inline opus_int32 silk_ADD_SAT32_armv5e(opus_int32 a, opus_int32 b)
 #define silk_ADD_SAT32(a, b) (silk_ADD_SAT32_armv5e(a, b))

 #undef silk_SUB_SAT32
-static inline opus_int32 silk_SUB_SAT32_armv5e(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_SUB_SAT32_armv5e(opus_int32 a, opus_int32 b)
 {
  int res;
  __asm__(
@ -183,7 +183,7 @@ static inline opus_int32 silk_SUB_SAT32_armv5e(opus_int32 a, opus_int32 b)
 #define silk_SUB_SAT32(a, b) (silk_SUB_SAT32_armv5e(a, b))

 #undef silk_CLZ16
-static inline opus_int32 silk_CLZ16_armv5(opus_int16 in16)
+static OPUS_INLINE opus_int32 silk_CLZ16_armv5(opus_int16 in16)
 {
  int res;
  __asm__(
@ -197,7 +197,7 @@ static inline opus_int32 silk_CLZ16_armv5(opus_int16 in16)
 #define silk_CLZ16(in16) (silk_CLZ16_armv5(in16))

 #undef silk_CLZ32
-static inline opus_int32 silk_CLZ32_armv5(opus_int32 in32)
+static OPUS_INLINE opus_int32 silk_CLZ32_armv5(opus_int32 in32)
 {
  int res;
  __asm__(
--- a/media/libopus/silk/control.h
+++ b/media/libopus/silk/control.h
@ -92,6 +92,9 @@ typedef struct {
    /* I:   Opus encoder is allowing us to switch bandwidth                                 */
    opus_int opusCanSwitch;

+    /* I: Make frames as independent as possible (but still use LPC)                        */
+    opus_int reducedDependency;
+
    /* O:   Internal sampling rate used, in Hertz; 8000/12000/16000                         */
    opus_int32 internalSampleRate;

--- a/media/libopus/silk/control_codec.c
+++ b/media/libopus/silk/control_codec.c
@ -55,7 +55,7 @@ static opus_int silk_setup_complexity(
    opus_int                        Complexity          /* I                        */
 );

-static inline opus_int silk_setup_LBRR(
+static OPUS_INLINE opus_int silk_setup_LBRR(
    silk_encoder_state              *psEncC,            /* I/O                      */
    const opus_int32                TargetRate_bps      /* I                        */
 );
@ -392,7 +392,7 @@ static opus_int silk_setup_complexity(
    return ret;
 }

-static inline opus_int silk_setup_LBRR(
+static OPUS_INLINE opus_int silk_setup_LBRR(
    silk_encoder_state          *psEncC,            /* I/O                      */
    const opus_int32            TargetRate_bps      /* I                        */
 )
--- a/media/libopus/silk/dec_API.c
+++ b/media/libopus/silk/dec_API.c
@ -305,7 +305,7 @@ opus_int silk_Decode(                                   /* O    Returns error co

    /* Set up pointers to temp buffers */
    ALLOC( samplesOut2_tmp,
-           decControl->nChannelsAPI == 2 ? *nSamplesOut : 0, opus_int16 );
+           decControl->nChannelsAPI == 2 ? *nSamplesOut : ALLOC_NONE, opus_int16 );
    if( decControl->nChannelsAPI == 2 ) {
        resample_out_ptr = samplesOut2_tmp;
    } else {
--- a/media/libopus/silk/enc_API.c
+++ b/media/libopus/silk/enc_API.c
@ -69,6 +69,7 @@ opus_int silk_Get_Encoder_Size(                         /* O    Returns error co
 /*************************/
 opus_int silk_InitEncoder(                              /* O    Returns error code                              */
    void                            *encState,          /* I/O  State                                           */
+    int                              arch,              /* I    Run-time architecture                           */
    silk_EncControlStruct           *encStatus          /* O    Encoder Status                                  */
 )
 {
@ -80,7 +81,7 @@ opus_int silk_InitEncoder(                              /* O    Returns error co
    /* Reset encoder */
    silk_memset( psEnc, 0, sizeof( silk_encoder ) );
    for( n = 0; n < ENCODER_NUM_CHANNELS; n++ ) {
-        if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ] ) ) {
+        if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ], arch ) ) {
            silk_assert( 0 );
        }
    }
@ -156,6 +157,11 @@ opus_int silk_Encode(                                   /* O    Returns error co
    opus_int transition, curr_block, tot_blocks;
    SAVE_STACK;

+    if (encControl->reducedDependency)
+    {
+       psEnc->state_Fxx[0].sCmn.first_frame_after_reset = 1;
+       psEnc->state_Fxx[1].sCmn.first_frame_after_reset = 1;
+    }
    psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0;

    /* Check values in encoder control structure */
@ -169,7 +175,7 @@ opus_int silk_Encode(                                   /* O    Returns error co

    if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) {
        /* Mono -> Stereo transition: init state of second channel and stereo state */
-        ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ] );
+        ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ], psEnc->state_Fxx[ 0 ].sCmn.arch );
        silk_memset( psEnc->sStereo.pred_prev_Q13, 0, sizeof( psEnc->sStereo.pred_prev_Q13 ) );
        silk_memset( psEnc->sStereo.sSide, 0, sizeof( psEnc->sStereo.sSide ) );
        psEnc->sStereo.mid_side_amp_Q0[ 0 ] = 0;
@ -201,9 +207,8 @@ opus_int silk_Encode(                                   /* O    Returns error co
        }
        /* Reset Encoder */
        for( n = 0; n < encControl->nChannelsInternal; n++ ) {
-            if( (ret = silk_init_encoder( &psEnc->state_Fxx[ n ] ) ) != 0 ) {
-                silk_assert( 0 );
-            }
+            ret = silk_init_encoder( &psEnc->state_Fxx[ n ], psEnc->state_Fxx[ n ].sCmn.arch );
+            silk_assert( !ret );
        }
        tmp_payloadSize_ms = encControl->payloadSize_ms;
        encControl->payloadSize_ms = 10;
--- a/media/libopus/silk/encode_pulses.c
+++ b/media/libopus/silk/encode_pulses.c
@ -36,7 +36,7 @@ POSSIBILITY OF SUCH DAMAGE.
 /* Encode quantization indices of excitation */
 /*********************************************/

-static inline opus_int combine_and_check(    /* return ok                           */
+static OPUS_INLINE opus_int combine_and_check(    /* return ok                           */
    opus_int         *pulses_comb,           /* O                                   */
    const opus_int   *pulses_in,             /* I                                   */
    opus_int         max_pulses,             /* I    max value for sum of pulses    */
--- a/media/libopus/silk/fixed/autocorr_FIX.c
+++ b/media/libopus/silk/fixed/autocorr_FIX.c
@ -38,10 +38,11 @@ void silk_autocorr(
    opus_int                    *scale,             /* O    Scaling of the correlation vector                           */
    const opus_int16            *inputData,         /* I    Input data to correlate                                     */
    const opus_int              inputDataSize,      /* I    Length of input                                             */
-    const opus_int              correlationCount    /* I    Number of correlation taps to compute                       */
+    const opus_int              correlationCount,   /* I    Number of correlation taps to compute                       */
+    int                         arch                /* I    Run-time architecture                                       */
 )
 {
    opus_int   corrCount;
    corrCount = silk_min_int( inputDataSize, correlationCount );
-    *scale = _celt_autocorr(inputData, results, NULL, 0, corrCount-1, inputDataSize);
+    *scale = _celt_autocorr(inputData, results, NULL, 0, corrCount-1, inputDataSize, arch);
 }
--- a/media/libopus/silk/fixed/burg_modified_FIX.c
+++ b/media/libopus/silk/fixed/burg_modified_FIX.c
@ -50,7 +50,8 @@ void silk_burg_modified(
    const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
    const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
    const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
-    const opus_int              D                   /* I    Order                                                       */
+    const opus_int              D,                  /* I    Order                                                       */
+    int                         arch                /* I    Run-time architecture                                       */
 )
 {
    opus_int         k, n, s, lz, rshifts, rshifts_extra, reached_max_gain;
@ -98,7 +99,7 @@ void silk_burg_modified(
            int i;
            opus_int32 d;
            x_ptr = x + s * subfr_length;
-            celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D );
+            celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch );
            for( n = 1; n < D + 1; n++ ) {
               for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ )
                  d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] );
--- a/media/libopus/silk/fixed/encode_frame_FIX.c
+++ b/media/libopus/silk/fixed/encode_frame_FIX.c
@ -34,7 +34,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "tuning_parameters.h"

 /* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate           */
-static inline void silk_LBRR_encode_FIX(
+static OPUS_INLINE void silk_LBRR_encode_FIX(
    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Pointer to Silk FIX encoder control struct                                  */
    const opus_int32                xfw_Q3[],                               /* I    Input signal                                                                */
@ -132,12 +132,12 @@ opus_int silk_encode_frame_FIX(
        /*****************************************/
        /* Find pitch lags, initial LPC analysis */
        /*****************************************/
-        silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame );
+        silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch );

        /************************/
        /* Noise shape analysis */
        /************************/
-        silk_noise_shape_analysis_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame );
+        silk_noise_shape_analysis_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame, psEnc->sCmn.arch );

        /***************************************************/
        /* Find linear prediction coefficients (LPC + LTP) */
@ -302,10 +302,6 @@ opus_int silk_encode_frame_FIX(
    silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ],
        ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( opus_int16 ) );

-    /* Parameters needed for next frame */
-    psEnc->sCmn.prevLag        = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
-    psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
-
    /* Exit without entropy coding */
    if( psEnc->sCmn.prefillFlag ) {
        /* No payload */
@ -314,6 +310,10 @@ opus_int silk_encode_frame_FIX(
        return ret;
    }

+    /* Parameters needed for next frame */
+    psEnc->sCmn.prevLag        = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
+    psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
+
    /****************************************/
    /* Finalize payload                     */
    /****************************************/
@ -326,7 +326,7 @@ opus_int silk_encode_frame_FIX(
 }

 /* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate  */
-static inline void silk_LBRR_encode_FIX(
+static OPUS_INLINE void silk_LBRR_encode_FIX(
    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Pointer to Silk FIX encoder control struct                                  */
    const opus_int32                xfw_Q3[],                               /* I    Input signal                                                                */
--- a/media/libopus/silk/fixed/find_LPC_FIX.c
+++ b/media/libopus/silk/fixed/find_LPC_FIX.c
@ -60,13 +60,13 @@ void silk_find_LPC_FIX(
    psEncC->indices.NLSFInterpCoef_Q2 = 4;

    /* Burg AR analysis for the full frame */
-    silk_burg_modified( &res_nrg, &res_nrg_Q, a_Q16, x, minInvGain_Q30, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder );
+    silk_burg_modified( &res_nrg, &res_nrg_Q, a_Q16, x, minInvGain_Q30, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder, psEncC->arch );

    if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) {
        VARDECL( opus_int16, LPC_res );

        /* Optimal solution for last 10 ms */
-        silk_burg_modified( &res_tmp_nrg, &res_tmp_nrg_Q, a_tmp_Q16, x + 2 * subfr_length, minInvGain_Q30, subfr_length, 2, psEncC->predictLPCOrder );
+        silk_burg_modified( &res_tmp_nrg, &res_tmp_nrg_Q, a_tmp_Q16, x + 2 * subfr_length, minInvGain_Q30, subfr_length, 2, psEncC->predictLPCOrder, psEncC->arch );

        /* subtract residual energy here, as that's easier than adding it to the    */
        /* residual energy of the first 10 ms in each iteration of the search below */
--- a/media/libopus/silk/fixed/find_pitch_lags_FIX.c
+++ b/media/libopus/silk/fixed/find_pitch_lags_FIX.c
@ -38,7 +38,8 @@ void silk_find_pitch_lags_FIX(
    silk_encoder_state_FIX          *psEnc,                                 /* I/O  encoder state                                                               */
    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  encoder control                                                             */
    opus_int16                      res[],                                  /* O    residual                                                                    */
-    const opus_int16                x[]                                     /* I    Speech signal                                                               */
+    const opus_int16                x[],                                    /* I    Speech signal                                                               */
+    int                             arch                                    /* I    Run-time architecture                                                       */
 )
 {
    opus_int   buf_len, i, scale;
@ -86,7 +87,7 @@ void silk_find_pitch_lags_FIX(
    silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch );

    /* Calculate autocorrelation sequence */
-    silk_autocorr( auto_corr, &scale, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1 );
+    silk_autocorr( auto_corr, &scale, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1, arch );

    /* Add white noise, as fraction of energy */
    auto_corr[ 0 ] = silk_SMLAWB( auto_corr[ 0 ], auto_corr[ 0 ], SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ) + 1;
@ -127,7 +128,8 @@ void silk_find_pitch_lags_FIX(
        /*****************************************/
        if( silk_pitch_analysis_core( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex, &psEnc->sCmn.indices.contourIndex,
                &psEnc->LTPCorr_Q15, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16,
-                (opus_int)thrhld_Q13, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr ) == 0 )
+                (opus_int)thrhld_Q13, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr,
+                psEnc->sCmn.arch) == 0 )
        {
            psEnc->sCmn.indices.signalType = TYPE_VOICED;
        } else {
--- a/media/libopus/silk/fixed/find_pred_coefs_FIX.c
+++ b/media/libopus/silk/fixed/find_pred_coefs_FIX.c
@ -93,7 +93,7 @@ void silk_find_pred_coefs_FIX(

        /* Quantize LTP gain parameters */
        silk_quant_LTP_gains( psEncCtrl->LTPCoef_Q14, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex,
-            WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr);
+            &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr);

        /* Control LTP scaling */
        silk_LTP_scale_ctrl_FIX( psEnc, psEncCtrl, condCoding );
@ -118,6 +118,7 @@ void silk_find_pred_coefs_FIX(

        silk_memset( psEncCtrl->LTPCoef_Q14, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( opus_int16 ) );
        psEncCtrl->LTPredCodGain_Q7 = 0;
+		psEnc->sCmn.sum_log_gain_Q7 = 0;
    }

    /* Limit on total predictive coding gain */
--- a/media/libopus/silk/fixed/main_FIX.h
+++ b/media/libopus/silk/fixed/main_FIX.h
@ -73,7 +73,8 @@ opus_int silk_encode_frame_FIX(

 /* Initializes the Silk encoder state */
 opus_int silk_init_encoder(
-    silk_encoder_state_Fxx          *psEnc                                  /* I/O  Pointer to Silk FIX encoder state                                           */
+    silk_encoder_state_Fxx          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
+    int                              arch                                   /* I    Run-time architecture                                                       */
 );

 /* Control the Silk encoder */
@ -104,7 +105,8 @@ void silk_noise_shape_analysis_FIX(
    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state FIX                                                           */
    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Encoder control FIX                                                         */
    const opus_int16                *pitch_res,                             /* I    LPC residual from pitch analysis                                            */
-    const opus_int16                *x                                      /* I    Input signal [ frame_length + la_shape ]                                    */
+    const opus_int16                *x,                                     /* I    Input signal [ frame_length + la_shape ]                                    */
+    int                              arch                                   /* I    Run-time architecture                                                       */
 );

 /* Autocorrelations for a warped frequency axis */
@ -132,7 +134,8 @@ void silk_find_pitch_lags_FIX(
    silk_encoder_state_FIX          *psEnc,                                 /* I/O  encoder state                                                               */
    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  encoder control                                                             */
    opus_int16                      res[],                                  /* O    residual                                                                    */
-    const opus_int16                x[]                                     /* I    Speech signal                                                               */
+    const opus_int16                x[],                                    /* I    Speech signal                                                               */
+    int                             arch                                    /* I    Run-time architecture                                                       */
 );

 /* Find LPC and LTP coefficients */
--- a/media/libopus/silk/fixed/noise_shape_analysis_FIX.c
+++ b/media/libopus/silk/fixed/noise_shape_analysis_FIX.c
@ -37,7 +37,7 @@ POSSIBILITY OF SUCH DAMAGE.
 /* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */
 /* Note: A monic filter is one with the first coefficient equal to 1.0. In Silk we omit the first */
 /* coefficient in an array of coefficients, for monic filters.                                    */
-static inline opus_int32 warped_gain( /* gain in Q16*/
+static OPUS_INLINE opus_int32 warped_gain( /* gain in Q16*/
    const opus_int32     *coefs_Q24,
    opus_int             lambda_Q16,
    opus_int             order
@ -56,7 +56,7 @@ static inline opus_int32 warped_gain( /* gain in Q16*/

 /* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum     */
 /* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */
-static inline void limit_warped_coefs(
+static OPUS_INLINE void limit_warped_coefs(
    opus_int32           *coefs_syn_Q24,
    opus_int32           *coefs_ana_Q24,
    opus_int             lambda_Q16,
@ -145,7 +145,8 @@ void silk_noise_shape_analysis_FIX(
    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state FIX                                                           */
    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Encoder control FIX                                                         */
    const opus_int16                *pitch_res,                             /* I    LPC residual from pitch analysis                                            */
-    const opus_int16                *x                                      /* I    Input signal [ frame_length + la_shape ]                                    */
+    const opus_int16                *x,                                     /* I    Input signal [ frame_length + la_shape ]                                    */
+    int                              arch                                   /* I    Run-time architecture                                                       */
 )
 {
    silk_shape_state_FIX *psShapeSt = &psEnc->sShape;
@ -281,7 +282,7 @@ void silk_noise_shape_analysis_FIX(
            silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder );
        } else {
            /* Calculate regular auto correlation */
-            silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1 );
+            silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch );
        }

        /* Add white noise, as a fraction of energy */
--- a/media/libopus/silk/fixed/pitch_analysis_core_FIX.c
+++ b/media/libopus/silk/fixed/pitch_analysis_core_FIX.c
@ -62,7 +62,8 @@ static void silk_P_Ana_calc_corr_st3(
    opus_int          start_lag,                       /* I lag offset to search around */
    opus_int          sf_length,                       /* I length of a 5 ms subframe   */
    opus_int          nb_subfr,                        /* I number of subframes         */
-    opus_int          complexity                       /* I Complexity setting          */
+    opus_int          complexity,                      /* I Complexity setting          */
+    int               arch                             /* I Run-time architecture       */
 );

 static void silk_P_Ana_calc_energy_st3(
@ -88,7 +89,8 @@ opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0
    const opus_int              search_thres2_Q13,  /* I    Final threshold for lag candidates 0 - 1                    */
    const opus_int              Fs_kHz,             /* I    Sample frequency (kHz)                                      */
    const opus_int              complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
-    const opus_int              nb_subfr            /* I    number of 5 ms subframes                                    */
+    const opus_int              nb_subfr,           /* I    number of 5 ms subframes                                    */
+    int                         arch                /* I    Run-time architecture                                       */
 )
 {
    VARDECL( opus_int16, frame_8kHz );
@ -189,7 +191,7 @@ opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0
        silk_assert( basis_ptr >= frame_4kHz );
        silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );

-        celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1 );
+        celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch );

        /* Calculate first vector products before loop */
        cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ];
@ -465,7 +467,7 @@ opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0
        /***************************************************************************/
        /* find scaling as max scaling for each subframe */
        silk_sum_sqr_shift( &energy, &shift, frame, frame_length );
-        ALLOC( scratch_mem, shift > 0 ? frame_length : 0, opus_int16 );
+        ALLOC( scratch_mem, shift > 0 ? frame_length : ALLOC_NONE, opus_int16 );
        if( shift > 0 ) {
            /* Move signal to scratch mem because the input signal should be unchanged */
            shift = silk_RSHIFT( shift, 1 );
@ -516,7 +518,7 @@ opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0
        /* Calculate the correlations and energies needed in stage 3 */
        ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
        ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
-        silk_P_Ana_calc_corr_st3(  cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );
+        silk_P_Ana_calc_corr_st3(  cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch );
        silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );

        lag_counter = 0;
@ -597,7 +599,8 @@ static void silk_P_Ana_calc_corr_st3(
    opus_int          start_lag,                       /* I lag offset to search around */
    opus_int          sf_length,                       /* I length of a 5 ms subframe   */
    opus_int          nb_subfr,                        /* I number of subframes         */
-    opus_int          complexity                       /* I Complexity setting          */
+    opus_int          complexity,                      /* I Complexity setting          */
+    int               arch                             /* I Run-time architecture       */
 )
 {
    const opus_int16 *target_ptr;
@ -634,7 +637,7 @@ static void silk_P_Ana_calc_corr_st3(
        lag_low  = matrix_ptr( Lag_range_ptr, k, 0, 2 );
        lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
        silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
-        celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1 );
+        celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch );
        for( j = lag_low; j <= lag_high; j++ ) {
            silk_assert( lag_counter < SCRATCH_SIZE );
            scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ];
--- a/media/libopus/silk/fixed/prefilter_FIX.c
+++ b/media/libopus/silk/fixed/prefilter_FIX.c
@ -34,7 +34,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "tuning_parameters.h"

 /* Prefilter for finding Quantizer input signal */
-static inline void silk_prefilt_FIX(
+static OPUS_INLINE void silk_prefilt_FIX(
    silk_prefilter_state_FIX    *P,                         /* I/O  state                               */
    opus_int32                  st_res_Q12[],               /* I    short term residual signal          */
    opus_int32                  xw_Q3[],                    /* O    prefiltered signal                  */
@ -156,7 +156,7 @@ void silk_prefilter_FIX(
 }

 /* Prefilter for finding Quantizer input signal */
-static inline void silk_prefilt_FIX(
+static OPUS_INLINE void silk_prefilt_FIX(
    silk_prefilter_state_FIX    *P,                         /* I/O  state                               */
    opus_int32                  st_res_Q12[],               /* I    short term residual signal          */
    opus_int32                  xw_Q3[],                    /* O    prefiltered signal                  */
--- a/media/libopus/silk/fixed/solve_LS_FIX.c
+++ b/media/libopus/silk/fixed/solve_LS_FIX.c
@ -43,7 +43,7 @@ typedef struct {
 } inv_D_t;

 /* Factorize square matrix A into LDL form */
-static inline void silk_LDL_factorize_FIX(
+static OPUS_INLINE void silk_LDL_factorize_FIX(
    opus_int32          *A,         /* I/O Pointer to Symetric Square Matrix                            */
    opus_int            M,          /* I   Size of Matrix                                               */
    opus_int32          *L_Q16,     /* I/O Pointer to Square Upper triangular Matrix                    */
@ -51,7 +51,7 @@ static inline void silk_LDL_factorize_FIX(
 );

 /* Solve Lx = b, when L is lower triangular and has ones on the diagonal */
-static inline void silk_LS_SolveFirst_FIX(
+static OPUS_INLINE void silk_LS_SolveFirst_FIX(
    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
    opus_int            M,          /* I    Dim of Matrix equation                                      */
    const opus_int32    *b,         /* I    b Vector                                                    */
@ -59,14 +59,14 @@ static inline void silk_LS_SolveFirst_FIX(
 );

 /* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */
-static inline void silk_LS_SolveLast_FIX(
+static OPUS_INLINE void silk_LS_SolveLast_FIX(
    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
    const opus_int      M,          /* I    Dim of Matrix equation                                      */
    const opus_int32    *b,         /* I    b Vector                                                    */
    opus_int32          *x_Q16      /* O    x Vector                                                    */
 );

-static inline void silk_LS_divide_Q16_FIX(
+static OPUS_INLINE void silk_LS_divide_Q16_FIX(
    opus_int32          T[],        /* I/O  Numenator vector                                            */
    inv_D_t             *inv_D,     /* I    1 / D vector                                                */
    opus_int            M           /* I    dimension                                                   */
@ -113,7 +113,7 @@ void silk_solve_LDL_FIX(
    RESTORE_STACK;
 }

-static inline void silk_LDL_factorize_FIX(
+static OPUS_INLINE void silk_LDL_factorize_FIX(
    opus_int32          *A,         /* I/O Pointer to Symetric Square Matrix                            */
    opus_int            M,          /* I   Size of Matrix                                               */
    opus_int32          *L_Q16,     /* I/O Pointer to Square Upper triangular Matrix                    */
@ -185,7 +185,7 @@ static inline void silk_LDL_factorize_FIX(
    silk_assert( status == 0 );
 }

-static inline void silk_LS_divide_Q16_FIX(
+static OPUS_INLINE void silk_LS_divide_Q16_FIX(
    opus_int32          T[],        /* I/O  Numenator vector                                            */
    inv_D_t             *inv_D,     /* I    1 / D vector                                                */
    opus_int            M           /* I    dimension                                                   */
@ -205,7 +205,7 @@ static inline void silk_LS_divide_Q16_FIX(
 }

 /* Solve Lx = b, when L is lower triangular and has ones on the diagonal */
-static inline void silk_LS_SolveFirst_FIX(
+static OPUS_INLINE void silk_LS_SolveFirst_FIX(
    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
    opus_int            M,          /* I    Dim of Matrix equation                                      */
    const opus_int32    *b,         /* I    b Vector                                                    */
@ -227,7 +227,7 @@ static inline void silk_LS_SolveFirst_FIX(
 }

 /* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */
-static inline void silk_LS_SolveLast_FIX(
+static OPUS_INLINE void silk_LS_SolveLast_FIX(
    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
    const opus_int      M,          /* I    Dim of Matrix equation                                      */
    const opus_int32    *b,         /* I    b Vector                                                    */
--- a/media/libopus/silk/float/LPC_analysis_filter_FLP.c
+++ b/media/libopus/silk/float/LPC_analysis_filter_FLP.c
@ -40,7 +40,7 @@ POSSIBILITY OF SUCH DAMAGE.
 /************************************************/

 /* 16th order LPC analysis filter, does not write first 16 samples */
-static inline void silk_LPC_analysis_filter16_FLP(
+static OPUS_INLINE void silk_LPC_analysis_filter16_FLP(
          silk_float                 r_LPC[],            /* O    LPC residual signal                     */
    const silk_float                 PredCoef[],         /* I    LPC coefficients                        */
    const silk_float                 s[],                /* I    Input signal                            */
@ -78,7 +78,7 @@ static inline void silk_LPC_analysis_filter16_FLP(
 }

 /* 12th order LPC analysis filter, does not write first 12 samples */
-static inline void silk_LPC_analysis_filter12_FLP(
+static OPUS_INLINE void silk_LPC_analysis_filter12_FLP(
          silk_float                 r_LPC[],            /* O    LPC residual signal                     */
    const silk_float                 PredCoef[],         /* I    LPC coefficients                        */
    const silk_float                 s[],                /* I    Input signal                            */
@ -112,7 +112,7 @@ static inline void silk_LPC_analysis_filter12_FLP(
 }

 /* 10th order LPC analysis filter, does not write first 10 samples */
-static inline void silk_LPC_analysis_filter10_FLP(
+static OPUS_INLINE void silk_LPC_analysis_filter10_FLP(
          silk_float                 r_LPC[],            /* O    LPC residual signal                     */
    const silk_float                 PredCoef[],         /* I    LPC coefficients                        */
    const silk_float                 s[],                /* I    Input signal                            */
@ -144,7 +144,7 @@ static inline void silk_LPC_analysis_filter10_FLP(
 }

 /* 8th order LPC analysis filter, does not write first 8 samples */
-static inline void silk_LPC_analysis_filter8_FLP(
+static OPUS_INLINE void silk_LPC_analysis_filter8_FLP(
          silk_float                 r_LPC[],            /* O    LPC residual signal                     */
    const silk_float                 PredCoef[],         /* I    LPC coefficients                        */
    const silk_float                 s[],                /* I    Input signal                            */
@ -174,7 +174,7 @@ static inline void silk_LPC_analysis_filter8_FLP(
 }

 /* 6th order LPC analysis filter, does not write first 6 samples */
-static inline void silk_LPC_analysis_filter6_FLP(
+static OPUS_INLINE void silk_LPC_analysis_filter6_FLP(
          silk_float                 r_LPC[],            /* O    LPC residual signal                     */
    const silk_float                 PredCoef[],         /* I    LPC coefficients                        */
    const silk_float                 s[],                /* I    Input signal                            */
--- a/media/libopus/silk/float/SigProc_FLP.h
+++ b/media/libopus/silk/float/SigProc_FLP.h
@ -94,7 +94,8 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
    const silk_float    search_thres2,      /* I    Final threshold for lag candidates 0 - 1                    */
    const opus_int      Fs_kHz,             /* I    sample frequency (kHz)                                      */
    const opus_int      complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
-    const opus_int      nb_subfr            /* I    Number of 5 ms subframes                                    */
+    const opus_int      nb_subfr,           /* I    Number of 5 ms subframes                                    */
+    int                 arch                /* I    Run-time architecture                                       */
 );

 void silk_insertion_sort_decreasing_FLP(
@ -153,19 +154,19 @@ double silk_energy_FLP(
 #define silk_abs_float( a )                     ((silk_float)fabs(a))

 /* sigmoid function */
-static inline silk_float silk_sigmoid( silk_float x )
+static OPUS_INLINE silk_float silk_sigmoid( silk_float x )
 {
    return (silk_float)(1.0 / (1.0 + exp(-x)));
 }

 /* floating-point to integer conversion (rounding) */
-static inline opus_int32 silk_float2int( silk_float x )
+static OPUS_INLINE opus_int32 silk_float2int( silk_float x )
 {
    return (opus_int32)float2int( x );
 }

 /* floating-point to integer conversion (rounding) */
-static inline void silk_float2short_array(
+static OPUS_INLINE void silk_float2short_array(
    opus_int16       *out,
    const silk_float *in,
    opus_int32       length
@ -178,7 +179,7 @@ static inline void silk_float2short_array(
 }

 /* integer to floating-point conversion */
-static inline void silk_short2float_array(
+static OPUS_INLINE void silk_short2float_array(
    silk_float       *out,
    const opus_int16 *in,
    opus_int32       length
@ -191,7 +192,7 @@ static inline void silk_short2float_array(
 }

 /* using log2() helps the fixed-point conversion */
-static inline silk_float silk_log2( double x )
+static OPUS_INLINE silk_float silk_log2( double x )
 {
    return ( silk_float )( 3.32192809488736 * log10( x ) );
 }
--- a/media/libopus/silk/float/encode_frame_FLP.c
+++ b/media/libopus/silk/float/encode_frame_FLP.c
@ -33,7 +33,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "tuning_parameters.h"

 /* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */
-static inline void silk_LBRR_encode_FLP(
+static OPUS_INLINE void silk_LBRR_encode_FLP(
    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
    const silk_float                xfw[],                              /* I    Input signal                                */
@ -129,7 +129,7 @@ opus_int silk_encode_frame_FLP(
        /*****************************************/
        /* Find pitch lags, initial LPC analysis */
        /*****************************************/
-        silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame );
+        silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch );

        /************************/
        /* Noise shape analysis */
@ -294,10 +294,6 @@ opus_int silk_encode_frame_FLP(
    silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ],
        ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( silk_float ) );

-    /* Parameters needed for next frame */
-    psEnc->sCmn.prevLag        = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
-    psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
-
    /* Exit without entropy coding */
    if( psEnc->sCmn.prefillFlag ) {
        /* No payload */
@ -305,6 +301,10 @@ opus_int silk_encode_frame_FLP(
        return ret;
    }

+    /* Parameters needed for next frame */
+    psEnc->sCmn.prevLag        = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
+    psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
+
    /****************************************/
    /* Finalize payload                     */
    /****************************************/
@ -316,7 +316,7 @@ opus_int silk_encode_frame_FLP(
 }

 /* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate  */
-static inline void silk_LBRR_encode_FLP(
+static OPUS_INLINE void silk_LBRR_encode_FLP(
    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
    const silk_float                xfw[],                              /* I    Input signal                                */
--- a/media/libopus/silk/float/find_pitch_lags_FLP.c
+++ b/media/libopus/silk/float/find_pitch_lags_FLP.c
@ -37,7 +37,8 @@ void silk_find_pitch_lags_FLP(
    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
    silk_float                      res[],                              /* O    Residual                                    */
-    const silk_float                x[]                                 /* I    Speech signal                               */
+    const silk_float                x[],                                /* I    Speech signal                               */
+    int                             arch                                /* I    Run-time architecture                       */
 )
 {
    opus_int   buf_len;
@ -116,7 +117,7 @@ void silk_find_pitch_lags_FLP(
        /*****************************************/
        if( silk_pitch_analysis_core_FLP( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex,
            &psEnc->sCmn.indices.contourIndex, &psEnc->LTPCorr, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16 / 65536.0f,
-            thrhld, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr ) == 0 )
+            thrhld, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr, arch ) == 0 )
        {
            psEnc->sCmn.indices.signalType = TYPE_VOICED;
        } else {
--- a/media/libopus/silk/float/find_pred_coefs_FLP.c
+++ b/media/libopus/silk/float/find_pred_coefs_FLP.c
@ -67,7 +67,7 @@ void silk_find_pred_coefs_FLP(

        /* Quantize LTP gain parameters */
        silk_quant_LTP_gains_FLP( psEncCtrl->LTPCoef, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex,
-            WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr );
+            &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr );

        /* Control LTP scaling */
        silk_LTP_scale_ctrl_FLP( psEnc, psEncCtrl, condCoding );
@ -90,6 +90,7 @@ void silk_find_pred_coefs_FLP(
        }
        silk_memset( psEncCtrl->LTPCoef, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( silk_float ) );
        psEncCtrl->LTPredCodGain = 0.0f;
+		psEnc->sCmn.sum_log_gain_Q7 = 0;
    }

    /* Limit on total predictive coding gain */
--- a/media/libopus/silk/float/main_FLP.h
+++ b/media/libopus/silk/float/main_FLP.h
@ -71,7 +71,8 @@ opus_int silk_encode_frame_FLP(

 /* Initializes the Silk encoder state */
 opus_int silk_init_encoder(
-    silk_encoder_state_FLP          *psEnc                              /* I/O  Encoder state FLP                           */
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    int                              arch                               /* I    Run-tim architecture                        */
 );

 /* Control the Silk encoder */
@ -129,7 +130,8 @@ void silk_find_pitch_lags_FLP(
    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
    silk_float                      res[],                              /* O    Residual                                    */
-    const silk_float                x[]                                 /* I    Speech signal                               */
+    const silk_float                x[],                                /* I    Speech signal                               */
+    int                             arch                                /* I    Run-time architecture                       */
 );

 /* Find LPC and LTP coefficients */
@ -199,6 +201,7 @@ void silk_quant_LTP_gains_FLP(
    silk_float                      B[ MAX_NB_SUBFR * LTP_ORDER ],      /* I/O  (Un-)quantized LTP gains                    */
    opus_int8                       cbk_index[ MAX_NB_SUBFR ],          /* O    Codebook index                              */
    opus_int8                       *periodicity_index,                 /* O    Periodicity index                           */
+    opus_int32                      *sum_log_gain_Q7,                   /* I/O  Cumulative max prediction gain  */
    const silk_float                W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I    Error weights                        */
    const opus_int                  mu_Q10,                             /* I    Mu value (R/D tradeoff)                     */
    const opus_int                  lowComplexity,                      /* I    Flag for low complexity                     */
--- a/media/libopus/silk/float/noise_shape_analysis_FLP.c
+++ b/media/libopus/silk/float/noise_shape_analysis_FLP.c
@ -36,7 +36,7 @@ POSSIBILITY OF SUCH DAMAGE.
 /* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */
 /* Note: A monic filter is one with the first coefficient equal to 1.0. In Silk we omit the first */
 /* coefficient in an array of coefficients, for monic filters.                                    */
-static inline silk_float warped_gain(
+static OPUS_INLINE silk_float warped_gain(
    const silk_float     *coefs,
    silk_float           lambda,
    opus_int             order
@ -54,7 +54,7 @@ static inline silk_float warped_gain(

 /* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum     */
 /* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */
-static inline void warped_true2monic_coefs(
+static OPUS_INLINE void warped_true2monic_coefs(
    silk_float           *coefs_syn,
    silk_float           *coefs_ana,
    silk_float           lambda,
--- a/media/libopus/silk/float/pitch_analysis_core_FLP.c
+++ b/media/libopus/silk/float/pitch_analysis_core_FLP.c
@ -48,7 +48,8 @@ static void silk_P_Ana_calc_corr_st3(
    opus_int            start_lag,          /* I start lag                                                      */
    opus_int            sf_length,          /* I sub frame length                                               */
    opus_int            nb_subfr,           /* I number of subframes                                            */
-    opus_int            complexity          /* I Complexity setting                                             */
+    opus_int            complexity,         /* I Complexity setting                                             */
+    int                 arch                /* I Run-time architecture                                          */
 );

 static void silk_P_Ana_calc_energy_st3(
@ -74,7 +75,8 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
    const silk_float    search_thres2,      /* I    Final threshold for lag candidates 0 - 1                    */
    const opus_int      Fs_kHz,             /* I    sample frequency (kHz)                                      */
    const opus_int      complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
-    const opus_int      nb_subfr            /* I    Number of 5 ms subframes                                    */
+    const opus_int      nb_subfr,           /* I    Number of 5 ms subframes                                    */
+    int                 arch                /* I    Run-time architecture                                       */
 )
 {
    opus_int   i, k, d, j;
@ -176,7 +178,7 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
        silk_assert( basis_ptr >= frame_4kHz );
        silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );

-        celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1 );
+        celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1, arch );

        /* Calculate first vector products before loop */
        cross_corr = xcorr[ max_lag_4kHz - min_lag_4kHz ];
@ -409,7 +411,7 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
        CCmax = -1000.0f;

        /* Calculate the correlations and energies needed in stage 3 */
-        silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity );
+        silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch );
        silk_P_Ana_calc_energy_st3( energies_st3, frame, start_lag, sf_length, nb_subfr, complexity );

        lag_counter = 0;
@ -493,10 +495,11 @@ static void silk_P_Ana_calc_corr_st3(
    opus_int            start_lag,          /* I start lag                                                      */
    opus_int            sf_length,          /* I sub frame length                                               */
    opus_int            nb_subfr,           /* I number of subframes                                            */
-    opus_int            complexity          /* I Complexity setting                                             */
+    opus_int            complexity,         /* I Complexity setting                                             */
+    int                 arch                /* I Run-time architecture                                          */
 )
 {
-    const silk_float *target_ptr, *basis_ptr;
+    const silk_float *target_ptr;
    opus_int   i, j, k, lag_counter, lag_low, lag_high;
    opus_int   nb_cbk_search, delta, idx, cbk_size;
    silk_float scratch_mem[ SCRATCH_SIZE ];
@ -527,9 +530,8 @@ static void silk_P_Ana_calc_corr_st3(
        lag_low  = matrix_ptr( Lag_range_ptr, k, 0, 2 );
        lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
        silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
-        celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1 );
+        celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1, arch );
        for( j = lag_low; j <= lag_high; j++ ) {
-            basis_ptr = target_ptr - ( start_lag + j );
            silk_assert( lag_counter < SCRATCH_SIZE );
            scratch_mem[ lag_counter ] = xcorr[ lag_high - j ];
            lag_counter++;
--- a/media/libopus/silk/float/prefilter_FLP.c
+++ b/media/libopus/silk/float/prefilter_FLP.c
@ -35,7 +35,7 @@ POSSIBILITY OF SUCH DAMAGE.
 /*
 * Prefilter for finding Quantizer input signal
 */
-static inline void silk_prefilt_FLP(
+static OPUS_INLINE void silk_prefilt_FLP(
    silk_prefilter_state_FLP    *P,                 /* I/O state */
    silk_float                  st_res[],           /* I */
    silk_float                  xw[],               /* O */
@ -153,7 +153,7 @@ void silk_prefilter_FLP(
 /*
 * Prefilter for finding Quantizer input signal
 */
-static inline void silk_prefilt_FLP(
+static OPUS_INLINE void silk_prefilt_FLP(
    silk_prefilter_state_FLP    *P,                 /* I/O state */
    silk_float                  st_res[],           /* I */
    silk_float                  xw[],               /* O */
--- a/media/libopus/silk/float/solve_LS_FLP.c
+++ b/media/libopus/silk/float/solve_LS_FLP.c
@ -37,7 +37,7 @@ POSSIBILITY OF SUCH DAMAGE.
 * Matrix D (only the diagonal elements returned in a vector)such that
 * the symmetric matric A is given by A = L*D*L'.
 **********************************************************************/
-static inline void silk_LDL_FLP(
+static OPUS_INLINE void silk_LDL_FLP(
    silk_float          *A,         /* I/O  Pointer to Symetric Square Matrix                               */
    opus_int            M,          /* I    Size of Matrix                                                  */
    silk_float          *L,         /* I/O  Pointer to Square Upper triangular Matrix                       */
@ -48,7 +48,7 @@ static inline void silk_LDL_FLP(
 * Function to solve linear equation Ax = b, when A is a MxM lower
 * triangular matrix, with ones on the diagonal.
 **********************************************************************/
-static inline void silk_SolveWithLowerTriangularWdiagOnes_FLP(
+static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP(
    const silk_float    *L,         /* I    Pointer to Lower Triangular Matrix                              */
    opus_int            M,          /* I    Dim of Matrix equation                                          */
    const silk_float    *b,         /* I    b Vector                                                        */
@ -59,7 +59,7 @@ static inline void silk_SolveWithLowerTriangularWdiagOnes_FLP(
 * Function to solve linear equation (A^T)x = b, when A is a MxM lower
 * triangular, with ones on the diagonal. (ie then A^T is upper triangular)
 **********************************************************************/
-static inline void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP(
+static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP(
    const silk_float    *L,         /* I    Pointer to Lower Triangular Matrix                              */
    opus_int            M,          /* I    Dim of Matrix equation                                          */
    const silk_float    *b,         /* I    b Vector                                                        */
@ -109,7 +109,7 @@ void silk_solve_LDL_FLP(
    silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( &L[ 0 ][ 0 ], M, T, x );
 }

-static inline void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP(
+static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP(
    const silk_float    *L,         /* I    Pointer to Lower Triangular Matrix                              */
    opus_int            M,          /* I    Dim of Matrix equation                                          */
    const silk_float    *b,         /* I    b Vector                                                        */
@ -131,7 +131,7 @@ static inline void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP(
    }
 }

-static inline void silk_SolveWithLowerTriangularWdiagOnes_FLP(
+static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP(
    const silk_float    *L,         /* I    Pointer to Lower Triangular Matrix                              */
    opus_int            M,          /* I    Dim of Matrix equation                                          */
    const silk_float    *b,         /* I    b Vector                                                        */
@ -153,7 +153,7 @@ static inline void silk_SolveWithLowerTriangularWdiagOnes_FLP(
    }
 }

-static inline void silk_LDL_FLP(
+static OPUS_INLINE void silk_LDL_FLP(
    silk_float          *A,         /* I/O  Pointer to Symetric Square Matrix                               */
    opus_int            M,          /* I    Size of Matrix                                                  */
    silk_float          *L,         /* I/O  Pointer to Square Upper triangular Matrix                       */
--- a/media/libopus/silk/float/wrappers_FLP.c
+++ b/media/libopus/silk/float/wrappers_FLP.c
@ -175,6 +175,7 @@ void silk_quant_LTP_gains_FLP(
    silk_float                      B[ MAX_NB_SUBFR * LTP_ORDER ],      /* I/O  (Un-)quantized LTP gains                    */
    opus_int8                       cbk_index[ MAX_NB_SUBFR ],          /* O    Codebook index                              */
    opus_int8                       *periodicity_index,                 /* O    Periodicity index                           */
+    opus_int32                      *sum_log_gain_Q7,                   /* I/O  Cumulative max prediction gain  */
    const silk_float                W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I    Error weights                        */
    const opus_int                  mu_Q10,                             /* I    Mu value (R/D tradeoff)                     */
    const opus_int                  lowComplexity,                      /* I    Flag for low complexity                     */
@ -192,7 +193,7 @@ void silk_quant_LTP_gains_FLP(
        W_Q18[ i ] = (opus_int32)silk_float2int( W[ i ] * 262144.0f );
    }

-    silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, W_Q18, mu_Q10, lowComplexity, nb_subfr );
+    silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, sum_log_gain_Q7, W_Q18, mu_Q10, lowComplexity, nb_subfr );

    for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) {
        B[ i ] = (silk_float)B_Q14[ i ] * ( 1.0f / 16384.0f );
--- a/media/libopus/silk/init_encoder.c
+++ b/media/libopus/silk/init_encoder.c
@ -34,12 +34,14 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "main_FLP.h"
 #endif
 #include "tuning_parameters.h"
+#include "cpu_support.h"

 /*********************************/
 /* Initialize Silk Encoder state */
 /*********************************/
 opus_int silk_init_encoder(
-    silk_encoder_state_Fxx          *psEnc                                  /* I/O  Pointer to Silk FIX encoder state                                           */
+    silk_encoder_state_Fxx          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
+    int                              arch                                   /* I    Run-time architecture                                                       */
 )
 {
    opus_int ret = 0;
@ -47,6 +49,8 @@ opus_int silk_init_encoder(
    /* Clear the entire encoder state */
    silk_memset( psEnc, 0, sizeof( silk_encoder_state_Fxx ) );

+    psEnc->sCmn.arch = arch;
+
    psEnc->sCmn.variable_HP_smth1_Q15 = silk_LSHIFT( silk_lin2log( SILK_FIX_CONST( VARIABLE_HP_MIN_CUTOFF_HZ, 16 ) ) - ( 16 << 7 ), 8 );
    psEnc->sCmn.variable_HP_smth2_Q15 = psEnc->sCmn.variable_HP_smth1_Q15;

--- a/media/libopus/silk/log2lin.c
+++ b/media/libopus/silk/log2lin.c
@ -41,7 +41,9 @@ opus_int32 silk_log2lin(

    if( inLog_Q7 < 0 ) {
        return 0;
-    }
+    } else if ( inLog_Q7 >= 3967 ) {
+		return silk_int32_MAX;
+	}

    out = silk_LSHIFT( 1, silk_RSHIFT( inLog_Q7, 7 ) );
    frac_Q7 = inLog_Q7 & 0x7F;
--- a/media/libopus/silk/macros.h
+++ b/media/libopus/silk/macros.h
@ -32,7 +32,10 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "config.h"
 #endif

-/* This is an inline header file for general platform. */
+#include "opus_types.h"
+#include "opus_defines.h"
+
+/* This is an OPUS_INLINE header file for general platform. */

 /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
 #define silk_SMULWB(a32, b32)            ((((a32) >> 16) * (opus_int32)((opus_int16)(b32))) + ((((a32) & 0x0000FFFF) * (opus_int32)((opus_int16)(b32))) >> 16))
@ -78,12 +81,12 @@ POSSIBILITY OF SUCH DAMAGE.

 #include "ecintrin.h"

-static inline opus_int32 silk_CLZ16(opus_int16 in16)
+static OPUS_INLINE opus_int32 silk_CLZ16(opus_int16 in16)
 {
    return 32 - EC_ILOG(in16<<16|0x8000);
 }

-static inline opus_int32 silk_CLZ32(opus_int32 in32)
+static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32)
 {
    return in32 ? 32 - EC_ILOG(in32) : 32;
 }
@ -100,11 +103,11 @@ static inline opus_int32 silk_CLZ32(opus_int32 in32)
    (*((Matrix_base_adr) + ((row)+(M)*(column))))
 #endif

-#ifdef ARMv4_ASM
+#ifdef OPUS_ARM_INLINE_ASM
 #include "arm/macros_armv4.h"
 #endif

-#ifdef ARMv5E_ASM
+#ifdef OPUS_ARM_INLINE_EDSP
 #include "arm/macros_armv5e.h"
 #endif

--- a/media/libopus/silk/main.h
+++ b/media/libopus/silk/main.h
@ -204,6 +204,7 @@ void silk_quant_LTP_gains(
    opus_int16                  B_Q14[ MAX_NB_SUBFR * LTP_ORDER ],          /* I/O  (un)quantized LTP gains         */
    opus_int8                   cbk_index[ MAX_NB_SUBFR ],                  /* O    Codebook Index                  */
    opus_int8                   *periodicity_index,                         /* O    Periodicity Index               */
+	opus_int32					*sum_gain_dB_Q7,							/* I/O  Cumulative max prediction gain  */
    const opus_int32            W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ],  /* I    Error Weights in Q18            */
    opus_int                    mu_Q9,                                      /* I    Mu value (R/D tradeoff)         */
    opus_int                    lowComplexity,                              /* I    Flag for low complexity         */
@ -214,11 +215,14 @@ void silk_quant_LTP_gains(
 void silk_VQ_WMat_EC(
    opus_int8                   *ind,                           /* O    index of best codebook vector               */
    opus_int32                  *rate_dist_Q14,                 /* O    best weighted quant error + mu * rate       */
+    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
    const opus_int16            *in_Q14,                        /* I    input vector to be quantized                */
    const opus_int32            *W_Q18,                         /* I    weighting matrix                            */
    const opus_int8             *cb_Q7,                         /* I    codebook                                    */
+    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
    const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
    const opus_int              mu_Q9,                          /* I    tradeoff betw. weighted error and rate      */
+    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
    opus_int                    L                               /* I    number of vectors in codebook               */
 );

--- a/media/libopus/silk/quant_LTP_gains.c
+++ b/media/libopus/silk/quant_LTP_gains.c
@ -30,11 +30,13 @@ POSSIBILITY OF SUCH DAMAGE.
 #endif

 #include "main.h"
+#include "tuning_parameters.h"

 void silk_quant_LTP_gains(
    opus_int16                  B_Q14[ MAX_NB_SUBFR * LTP_ORDER ],          /* I/O  (un)quantized LTP gains         */
    opus_int8                   cbk_index[ MAX_NB_SUBFR ],                  /* O    Codebook Index                  */
    opus_int8                   *periodicity_index,                         /* O    Periodicity Index               */
+	opus_int32					*sum_log_gain_Q7,							/* I/O  Cumulative max prediction gain  */
    const opus_int32            W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ],  /* I    Error Weights in Q18            */
    opus_int                    mu_Q9,                                      /* I    Mu value (R/D tradeoff)         */
    opus_int                    lowComplexity,                              /* I    Flag for low complexity         */
@ -45,18 +47,26 @@ void silk_quant_LTP_gains(
    opus_int8            temp_idx[ MAX_NB_SUBFR ];
    const opus_uint8     *cl_ptr_Q5;
    const opus_int8      *cbk_ptr_Q7;
+    const opus_uint8     *cbk_gain_ptr_Q7;
    const opus_int16     *b_Q14_ptr;
    const opus_int32     *W_Q18_ptr;
    opus_int32           rate_dist_Q14_subfr, rate_dist_Q14, min_rate_dist_Q14;
+	opus_int32           sum_log_gain_tmp_Q7, best_sum_log_gain_Q7, max_gain_Q7, gain_Q7;

    /***************************************************/
    /* iterate over different codebooks with different */
    /* rates/distortions, and choose best */
    /***************************************************/
    min_rate_dist_Q14 = silk_int32_MAX;
+    best_sum_log_gain_Q7 = 0;
    for( k = 0; k < 3; k++ ) {
+        /* Safety margin for pitch gain control, to take into account factors
+           such as state rescaling/rewhitening. */
+        opus_int32 gain_safety = SILK_FIX_CONST( 0.4, 7 );
+
        cl_ptr_Q5  = silk_LTP_gain_BITS_Q5_ptrs[ k ];
        cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[        k ];
+        cbk_gain_ptr_Q7 = silk_LTP_vq_gain_ptrs_Q7[ k ];
        cbk_size   = silk_LTP_vq_sizes[          k ];

        /* Set up pointer to first subframe */
@ -64,19 +74,28 @@ void silk_quant_LTP_gains(
        b_Q14_ptr = B_Q14;

        rate_dist_Q14 = 0;
+		sum_log_gain_tmp_Q7 = *sum_log_gain_Q7;
        for( j = 0; j < nb_subfr; j++ ) {
+			max_gain_Q7 = silk_log2lin( ( SILK_FIX_CONST( MAX_SUM_LOG_GAIN_DB / 6.0, 7 ) - sum_log_gain_tmp_Q7 ) 
+										+ SILK_FIX_CONST( 7, 7 ) ) - gain_safety;
+
            silk_VQ_WMat_EC(
                &temp_idx[ j ],         /* O    index of best codebook vector                           */
                &rate_dist_Q14_subfr,   /* O    best weighted quantization error + mu * rate            */
+				&gain_Q7,               /* O    sum of absolute LTP coefficients                        */
                b_Q14_ptr,              /* I    input vector to be quantized                            */
                W_Q18_ptr,              /* I    weighting matrix                                        */
                cbk_ptr_Q7,             /* I    codebook                                                */
+                cbk_gain_ptr_Q7,        /* I    codebook effective gains                                */
                cl_ptr_Q5,              /* I    code length for each codebook vector                    */
                mu_Q9,                  /* I    tradeoff between weighted error and rate                */
+				max_gain_Q7,            /* I    maximum sum of absolute LTP coefficients                */
                cbk_size                /* I    number of vectors in codebook                           */
            );

            rate_dist_Q14 = silk_ADD_POS_SAT32( rate_dist_Q14, rate_dist_Q14_subfr );
+            sum_log_gain_tmp_Q7 = silk_max(0, sum_log_gain_tmp_Q7
+                                + silk_lin2log( gain_safety + gain_Q7 ) - SILK_FIX_CONST( 7, 7 ));

            b_Q14_ptr += LTP_ORDER;
            W_Q18_ptr += LTP_ORDER * LTP_ORDER;
@ -89,6 +108,7 @@ void silk_quant_LTP_gains(
            min_rate_dist_Q14 = rate_dist_Q14;
            *periodicity_index = (opus_int8)k;
            silk_memcpy( cbk_index, temp_idx, nb_subfr * sizeof( opus_int8 ) );
+			best_sum_log_gain_Q7 = sum_log_gain_tmp_Q7;
        }

        /* Break early in low-complexity mode if rate distortion is below threshold */
@ -103,5 +123,6 @@ void silk_quant_LTP_gains(
            B_Q14[ j * LTP_ORDER + k ] = silk_LSHIFT( cbk_ptr_Q7[ cbk_index[ j ] * LTP_ORDER + k ], 7 );
        }
    }
+	*sum_log_gain_Q7 = best_sum_log_gain_Q7;
 }

--- a/media/libopus/silk/resampler_private_IIR_FIR.c
+++ b/media/libopus/silk/resampler_private_IIR_FIR.c
@ -33,7 +33,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "resampler_private.h"
 #include "stack_alloc.h"

-static inline opus_int16 *silk_resampler_private_IIR_FIR_INTERPOL(
+static OPUS_INLINE opus_int16 *silk_resampler_private_IIR_FIR_INTERPOL(
    opus_int16  *out,
    opus_int16  *buf,
    opus_int32  max_index_Q16,
--- a/media/libopus/silk/resampler_private_down_FIR.c
+++ b/media/libopus/silk/resampler_private_down_FIR.c
@ -33,7 +33,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "resampler_private.h"
 #include "stack_alloc.h"

-static inline opus_int16 *silk_resampler_private_down_FIR_INTERPOL(
+static OPUS_INLINE opus_int16 *silk_resampler_private_down_FIR_INTERPOL(
    opus_int16          *out,
    opus_int32          *buf,
    const opus_int16    *FIR_Coefs,
--- a/media/libopus/silk/shell_coder.c
+++ b/media/libopus/silk/shell_coder.c
@ -33,7 +33,7 @@ POSSIBILITY OF SUCH DAMAGE.

 /* shell coder; pulse-subframe length is hardcoded */

-static inline void combine_pulses(
+static OPUS_INLINE void combine_pulses(
    opus_int         *out,   /* O    combined pulses vector [len] */
    const opus_int   *in,    /* I    input vector       [2 * len] */
    const opus_int   len     /* I    number of OUTPUT samples     */
@ -45,7 +45,7 @@ static inline void combine_pulses(
    }
 }

-static inline void encode_split(
+static OPUS_INLINE void encode_split(
    ec_enc                      *psRangeEnc,    /* I/O  compressor data structure                   */
    const opus_int              p_child1,       /* I    pulse amplitude of first child subframe     */
    const opus_int              p,              /* I    pulse amplitude of current subframe         */
@ -57,7 +57,7 @@ static inline void encode_split(
    }
 }

-static inline void decode_split(
+static OPUS_INLINE void decode_split(
    opus_int                    *p_child1,      /* O    pulse amplitude of first child subframe     */
    opus_int                    *p_child2,      /* O    pulse amplitude of second child subframe    */
    ec_dec                      *psRangeDec,    /* I/O  Compressor data structure                   */
--- a/media/libopus/silk/structs.h
+++ b/media/libopus/silk/structs.h
@ -171,6 +171,7 @@ typedef struct {
    opus_int32                   pitchEstimationThreshold_Q16;      /* Threshold for pitch estimator                                    */
    opus_int                     LTPQuantLowComplexity;             /* Flag for low complexity LTP quantization                         */
    opus_int                     mu_LTP_Q9;                         /* Rate-distortion tradeoff in LTP quantization                     */
+    opus_int32                   sum_log_gain_Q7;					/* Cumulative max prediction gain									*/
    opus_int                     NLSF_MSVQ_Survivors;               /* Number of survivors in NLSF MSVQ                                 */
    opus_int                     first_frame_after_reset;           /* Flag for deactivating NLSF interpolation, pitch prediction       */
    opus_int                     controlled_since_last_payload;     /* Flag for ensuring codec_control only runs once per packet        */
@ -191,6 +192,8 @@ typedef struct {
    SideInfoIndices              indices;
    opus_int8                    pulses[ MAX_FRAME_LENGTH ];

+    int                          arch;
+
    /* Input/output buffering */
    opus_int16                   inputBuf[ MAX_FRAME_LENGTH + 2 ];  /* Buffer containing input signal                                   */
    opus_int                     inputBufIx;
--- a/media/libopus/silk/tables.h
+++ b/media/libopus/silk/tables.h
@ -78,6 +78,8 @@ extern const opus_uint8  * const silk_LTP_gain_iCDF_ptrs[ NB_LTP_CBKS ];
 extern const opus_uint8  * const silk_LTP_gain_BITS_Q5_ptrs[ NB_LTP_CBKS ];                         /*   3 */
 extern const opus_int16  silk_LTP_gain_middle_avg_RD_Q14;
 extern const opus_int8   * const silk_LTP_vq_ptrs_Q7[ NB_LTP_CBKS ];                                /* 168 */
+extern const opus_uint8  * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS];
+
 extern const opus_int8   silk_LTP_vq_sizes[ NB_LTP_CBKS ];                                          /*   3 */

 extern const opus_uint8  silk_LTPscale_iCDF[ 3 ];                                                   /*   4 */
--- a/media/libopus/silk/tables_LTP.c
+++ b/media/libopus/silk/tables_LTP.c
@ -267,6 +267,30 @@ const opus_int8 * const silk_LTP_vq_ptrs_Q7[NB_LTP_CBKS] = {
    (opus_int8 *)&silk_LTP_gain_vq_2[0][0]
 };

+/* Maximum frequency-dependent response of the pitch taps above,
+   computed as max(abs(freqz(taps))) */
+static const opus_uint8 silk_LTP_gain_vq_0_gain[8] = {
+      46,      2,     90,     87,     93,     91,     82,     98
+};
+
+static const opus_uint8 silk_LTP_gain_vq_1_gain[16] = {
+     109,    120,    118,     12,    113,    115,    117,    119,
+      99,     59,     87,    111,     63,    111,    112,     80
+};
+
+static const opus_uint8 silk_LTP_gain_vq_2_gain[32] = {
+     126,    124,    125,    124,    129,    121,    126,     23,
+     132,    127,    127,    127,    126,    127,    122,    133,
+     130,    134,    101,    118,    119,    145,    126,     86,
+     124,    120,    123,    119,    170,    173,    107,    109
+};
+
+const opus_uint8 * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS] = {
+    &silk_LTP_gain_vq_0_gain[0],
+    &silk_LTP_gain_vq_1_gain[0],
+    &silk_LTP_gain_vq_2_gain[0]
+};
+
 const opus_int8 silk_LTP_vq_sizes[NB_LTP_CBKS] = {
    8, 16, 32
 };
--- a/media/libopus/silk/tuning_parameters.h
+++ b/media/libopus/silk/tuning_parameters.h
@ -50,7 +50,7 @@ extern "C"
 /* Linear prediction */
 /*********************/

-/* LPC analysis defines: regularization and bandwidth expansion */
+/* LPC analysis regularization */
 #define FIND_LPC_COND_FAC                               1e-5f

 /* LTP analysis defines */
@ -63,6 +63,9 @@ extern "C"
 #define MU_LTP_QUANT_MB                                 0.025f
 #define MU_LTP_QUANT_WB                                 0.02f

+/* Max cumulative LTP gain */
+#define MAX_SUM_LOG_GAIN_DB								250.0f
+
 /***********************/
 /* High pass filtering */
 /***********************/
--- a/media/libopus/silk/typedef.h
+++ b/media/libopus/silk/typedef.h
@ -29,6 +29,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define SILK_TYPEDEF_H

 #include "opus_types.h"
+#include "opus_defines.h"

 #ifndef FIXED_POINT
 # include <float.h>
@ -63,7 +64,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifdef __GNUC__
 __attribute__((noreturn))
 #endif
-static inline void _silk_fatal(const char *str, const char *file, int line)
+static OPUS_INLINE void _silk_fatal(const char *str, const char *file, int line)
 {
   fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str);
   abort();
--- a/media/libopus/src/analysis.c
+++ b/media/libopus/src/analysis.c
@ -115,7 +115,7 @@ static const int extra_bands[NB_TOT_BANDS+1] = {
 #define cB 0.67848403f
 #define cC 0.08595542f
 #define cE ((float)M_PI/2)
-static inline float fast_atan2f(float y, float x) {
+static OPUS_INLINE float fast_atan2f(float y, float x) {
   float x2, y2;
   /* Should avoid underflow on the values we'll get */
   if (ABS16(x)+ABS16(y)<1e-9f)
@ -184,12 +184,12 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
   for (;i<DETECT_SIZE;i++)
      psum += tonal->pspeech[i];
   psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence;
-   /*printf("%f %f\n", psum, info_out->music_prob);*/
+   /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/

   info_out->music_prob = psum;
 }

-void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix)
+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
 {
    int i, b;
    const kiss_fft_state *kfft;
@ -234,7 +234,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
    kfft = celt_mode->mdct.kfft[0];
    if (tonal->count==0)
       tonal->mem_fill = 240;
-    downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, C);
+    downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C);
    if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
    {
       tonal->mem_fill += len;
@ -253,14 +253,14 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
    for (i=0;i<N2;i++)
    {
       float w = analysis_window[i];
-       in[i].r = MULT16_16(w, tonal->inmem[i]);
-       in[i].i = MULT16_16(w, tonal->inmem[N2+i]);
-       in[N-i-1].r = MULT16_16(w, tonal->inmem[N-i-1]);
-       in[N-i-1].i = MULT16_16(w, tonal->inmem[N+N2-i-1]);
+       in[i].r = (kiss_fft_scalar)(w*tonal->inmem[i]);
+       in[i].i = (kiss_fft_scalar)(w*tonal->inmem[N2+i]);
+       in[N-i-1].r = (kiss_fft_scalar)(w*tonal->inmem[N-i-1]);
+       in[N-i-1].i = (kiss_fft_scalar)(w*tonal->inmem[N+N2-i-1]);
    }
    OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
    remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
-    downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, C);
+    downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
    tonal->mem_fill = 240 + remaining;
    opus_fft(kfft, in, out);

@ -270,10 +270,10 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
       float angle, d_angle, d2_angle;
       float angle2, d_angle2, d2_angle2;
       float mod1, mod2, avg_mod;
-       X1r = out[i].r+out[N-i].r;
-       X1i = out[i].i-out[N-i].i;
-       X2r = out[i].i+out[N-i].i;
-       X2i = out[N-i].r-out[i].r;
+       X1r = (float)out[i].r+out[N-i].r;
+       X1i = (float)out[i].i-out[N-i].i;
+       X2r = (float)out[i].i+out[N-i].i;
+       X2i = (float)out[N-i].r-out[i].r;

       angle = (float)(.5f/M_PI)*fast_atan2f(X1i, X1r);
       d_angle = angle - A[i];
@ -317,7 +317,6 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
    }
    relativeE = 0;
    frame_loudness = 0;
-    bandwidth_mask = 0;
    for (b=0;b<NB_TBANDS;b++)
    {
       float E=0, tE=0, nE=0;
@ -325,8 +324,12 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
       float stationarity;
       for (i=tbands[b];i<tbands[b+1];i++)
       {
-          float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
-                     + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
+          float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
+                     + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
+#ifdef FIXED_POINT
+          /* FIXME: It's probably best to change the BFCC filter initial state instead */
+          binE *= 5.55e-17f;
+#endif
          E += binE;
          tE += binE*tonality[i];
          nE += binE*2.f*(.5f-noisiness[i]);
@ -334,7 +337,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
       tonal->E[tonal->E_count][b] = E;
       frame_noisiness += nE/(1e-15f+E);

-       frame_loudness += celt_sqrt(E+1e-10f);
+       frame_loudness += (float)sqrt(E+1e-10f);
       logE[b] = (float)log(E+1e-10f);
       tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f);
       tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f);
@ -343,21 +346,21 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
          tonal->highE[b]+=.5f;
          tonal->lowE[b]-=.5f;
       }
-       relativeE += (logE[b]-tonal->lowE[b])/(EPSILON+tonal->highE[b]-tonal->lowE[b]);
+       relativeE += (logE[b]-tonal->lowE[b])/(1e-15f+tonal->highE[b]-tonal->lowE[b]);

       L1=L2=0;
       for (i=0;i<NB_FRAMES;i++)
       {
-          L1 += celt_sqrt(tonal->E[i][b]);
+          L1 += (float)sqrt(tonal->E[i][b]);
          L2 += tonal->E[i][b];
       }

-       stationarity = MIN16(0.99f,L1/celt_sqrt(EPSILON+NB_FRAMES*L2));
+       stationarity = MIN16(0.99f,L1/(float)sqrt(1e-15+NB_FRAMES*L2));
       stationarity *= stationarity;
       stationarity *= stationarity;
       frame_stationarity += stationarity;
       /*band_tonality[b] = tE/(1e-15+E)*/;
-       band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]);
+       band_tonality[b] = MAX16(tE/(1e-15f+E), stationarity*tonal->prev_band_tonality[b]);
 #if 0
       if (b>=NB_TONAL_SKIP_BANDS)
       {
@ -379,6 +382,9 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
    bandwidth = 0;
    maxE = 0;
    noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));
+#ifdef FIXED_POINT
+    noise_floor *= 1<<(15+SIG_SHIFT);
+#endif
    noise_floor *= noise_floor;
    for (b=0;b<NB_TOT_BANDS;b++)
    {
@ -389,8 +395,8 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
       band_end = extra_bands[b+1];
       for (i=band_start;i<band_end;i++)
       {
-          float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
-                     + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
+          float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
+                     + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
          E += binE;
       }
       maxE = MAX32(maxE, E);
@ -469,14 +475,14 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
       tonal->mem[i] = BFCC[i];
    }
    for (i=0;i<9;i++)
-       features[11+i] = celt_sqrt(tonal->std[i]);
+       features[11+i] = (float)sqrt(tonal->std[i]);
    features[20] = info->tonality;
    features[21] = info->activity;
    features[22] = frame_stationarity;
    features[23] = info->tonality_slope;
    features[24] = tonal->lowECount;

-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
    mlp_process(&net, features, frame_probs);
    frame_probs[0] = .5f*(frame_probs[0]+1);
    /* Curve fitting between the MLP probability and the actual probability */
@ -590,7 +596,6 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
          if (tonal->speech_confidence_count==0)
             tonal->speech_confidence = .1f;
       }
-       psum = MAX16(tonal->speech_confidence, MIN16(tonal->music_confidence, psum));
    }
    if (tonal->last_music != (tonal->music_prob>.5f))
       tonal->last_transition=0;
@ -611,44 +616,30 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
    RESTORE_STACK;
 }

-int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,
-                        const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
-                        int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
+void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
+                 int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
+                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
 {
   int offset;
   int pcm_len;

-   /* Avoid overflow/wrap-around of the analysis buffer */
-   frame_size = IMIN((DETECT_SIZE-5)*Fs/100, frame_size);
-
-   pcm_len = frame_size - analysis->analysis_offset;
-   offset = 0;
-   do {
-      tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, C, lsb_depth, downmix);
-      offset += 480;
-      pcm_len -= 480;
-   } while (pcm_len>0);
-   analysis->analysis_offset = frame_size;
-
-   if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200)
+   if (analysis_pcm != NULL)
   {
-      int LM = 3;
-      LM = optimize_framesize((const opus_val16*)pcm, frame_size, C, Fs, bitrate_bps,
-            analysis->prev_tonality, analysis->subframe_mem, delay_compensation, downmix);
-      while ((Fs/400<<LM)>frame_size)
-         LM--;
-      frame_size = (Fs/400<<LM);
-   } else {
-      frame_size = frame_size_select(frame_size, variable_duration, Fs);
-   }
-   if (frame_size<0)
-      return -1;
-   analysis->analysis_offset -= frame_size;
+      /* Avoid overflow/wrap-around of the analysis buffer */
+      analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size);
+
+      pcm_len = analysis_frame_size - analysis->analysis_offset;
+      offset = analysis->analysis_offset;
+      do {
+         tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
+         offset += 480;
+         pcm_len -= 480;
+      } while (pcm_len>0);
+      analysis->analysis_offset = analysis_frame_size;
+
+      analysis->analysis_offset -= frame_size;
+   }

-   /* Only perform analysis up to 20-ms frames. Longer ones will be split if
-      they're in CELT-only mode. */
   analysis_info->valid = 0;
   tonality_get_info(analysis, analysis_info, frame_size);
-
-   return frame_size;
 }
--- a/Показать больше
+++ b/Показать больше