[FFmpeg-devel] [PATCH] Optimization of AMR NB and WB decoders for MIPS

Nedeljko Babic nbabic at mips.com
Fri May 18 15:47:30 CEST 2012


AMR NB and WB decoders are optimized for MIPS architecture.
Appropriate Makefiles are changed accordingly.

Configure script is changed in order to support optimizations.
 Optimizations are enabled by default when compiling is done for
  mips architecture.
 Appropriate cflags are automatically set.
 Support for several mips CPUs is added in configure script.

New ffmpeg options are added for disabling optimizations.

The FFMPEG option --disable-mipsfpu disables MIPS floating point
 optimizations.
The FFMPEG option --disable-mips32r2 disables MIPS32R2
 optimizations.
The FFMPEG option --disable-mipsdspr1 disables MIPS DSP ASE R1
 optimizations.
The FFMPEG option --disable-mipsdspr2 disables MIPS DSP ASE R2
 optimizations.

Change-Id: I566311805c05c6dae544c19f9d5194c157910014
Signed-off-by: Nedeljko Babic <nbabic at mips.com>
---
 configure                            |   47 ++++++
 libavcodec/acelp_filters.c           |    4 +
 libavcodec/acelp_vectors.c           |    2 +
 libavcodec/acelp_vectors.h           |    6 +
 libavcodec/amrwbdec.c                |    5 +
 libavcodec/celp_filters.c            |    2 +
 libavcodec/celp_math.c               |    3 +-
 libavcodec/celp_math.h               |    4 +
 libavcodec/lsp.c                     |    6 +
 libavcodec/lsp.h                     |    6 +
 libavcodec/mips/Makefile             |    6 +-
 libavcodec/mips/acelp_filters_mips.c |  211 +++++++++++++++++++++++++
 libavcodec/mips/acelp_vectors_mips.h |   93 +++++++++++
 libavcodec/mips/amrwb_lsp2lpc.h      |   97 ++++++++++++
 libavcodec/mips/amrwbdec_mips.h      |  186 ++++++++++++++++++++++
 libavcodec/mips/celp_filters_mips.c  |  289 ++++++++++++++++++++++++++++++++++
 libavcodec/mips/celp_math_mips.h     |   82 ++++++++++
 libavcodec/mips/lsp_mips.h           |  109 +++++++++++++
 libavutil/libm.h                     |    4 +
 libavutil/mips/libm_mips.h           |   74 +++++++++
 20 files changed, 1234 insertions(+), 2 deletions(-)
 create mode 100644 libavcodec/mips/acelp_filters_mips.c
 create mode 100644 libavcodec/mips/acelp_vectors_mips.h
 create mode 100644 libavcodec/mips/amrwb_lsp2lpc.h
 create mode 100644 libavcodec/mips/amrwbdec_mips.h
 create mode 100644 libavcodec/mips/celp_filters_mips.c
 create mode 100644 libavcodec/mips/celp_math_mips.h
 create mode 100644 libavcodec/mips/lsp_mips.h
 create mode 100644 libavutil/mips/libm_mips.h

diff --git a/configure b/configure
index e070c0b..b8fe719 100644
--- a/configure
+++ b/configure
@@ -268,6 +268,10 @@ Optimization options (experts only):
   --disable-neon           disable NEON optimizations
   --disable-vis            disable VIS optimizations
   --disable-yasm           disable use of yasm assembler
+  --disable-mips32r2       disable MIPS32R2 optimizations
+  --disable-mipsdspr1      disable MIPS DSP ASE R1 optimizations
+  --disable-mipsdspr2      disable MIPS DSP ASE R2 optimizations
+  --disable-mipsfpu        disable floating point MIPS optimizations
   --postproc-version=V     build libpostproc version V.
                            Where V can be '$ALT_PP_VER_MAJOR.$ALT_PP_VER_MINOR.$ALT_PP_VER_MICRO' or 'current'. [$postproc_version_default]
 
@@ -1141,6 +1145,10 @@ ARCH_EXT_LIST='
     ssse3
     vfpv3
     vis
+    mipsfpu
+    mips32r2
+    mipsdspr1
+    mipsdspr2
 '
 
 HAVE_LIST_PUB='
@@ -1359,6 +1367,10 @@ armvfp_deps="arm"
 neon_deps="arm"
 vfpv3_deps="armvfp"
 
+mipsfpu_deps="mips"
+mips32r2_deps="mips"
+mipsdspr1_deps="mips"
+mipsdspr2_deps="mips"
 mmi_deps="mips"
 
 altivec_deps="ppc"
@@ -2577,6 +2589,28 @@ elif enabled mips; then
 
     cpuflags="-march=$cpu"
 
+    case $cpu in
+        24kc)
+            disable mipsfpu
+            disable mipsdspr1
+            disable mipsdspr2
+        ;;
+        24kf*)
+            disable mipsdspr1
+            disable mipsdspr2
+        ;;
+        24kec|34kc|1004kc)
+            disable mipsfpu
+            disable mipsdspr2
+        ;;
+        24kef*|34kf*|1004kf*)
+            disable mipsdspr2
+        ;;
+        74kc)
+            disable mipsfpu
+        ;;
+    esac
+
 elif enabled avr32; then
 
     case $cpu in
@@ -2948,6 +2982,15 @@ elif enabled mips; then
 
     check_asm loongson '"dmult.g $1, $2, $3"'
     enabled mmi     && check_asm mmi     '"lq $2, 0($2)"'
+    enabled mips32r2  && add_cflags "-mips32r2" &&
+     check_asm mips32r2  '"rotr $t0, $t1, 1"'
+    enabled mipsdspr1 && add_cflags "-mdsp" &&
+     check_asm mipsdspr1 '"addu.qb $t0, $t1, $t2"'
+    enabled mipsdspr2 && add_cflags "-mdspr2" &&
+     check_asm mipsdspr2 '"absq_s.qb $t0, $t1"'
+    enabled mipsfpu   && add_cflags "-mhard-float" &&
+     check_asm mipsfpu   '"madd.d $f0, $f2, $f4, $f6"'
+
 
 elif enabled ppc; then
 
@@ -3547,6 +3590,10 @@ if enabled arm; then
 fi
 if enabled mips; then
     echo "MMI enabled               ${mmi-no}"
+    echo "MIPS FPU enabled          ${mipsfpu-no}"
+    echo "MIPS32R2 enabled          ${mips32r2-no}"
+    echo "MIPS DSP R1 enabled       ${mipsdspr1-no}"
+    echo "MIPS DSP R2 enabled       ${mipsdspr2-no}"
 fi
 if enabled ppc; then
     echo "AltiVec enabled           ${altivec-no}"
diff --git a/libavcodec/acelp_filters.c b/libavcodec/acelp_filters.c
index 1ce5eed..f623212 100644
--- a/libavcodec/acelp_filters.c
+++ b/libavcodec/acelp_filters.c
@@ -73,6 +73,7 @@ void ff_acelp_interpolate(int16_t* out, const int16_t* in,
     }
 }
 
+#if !HAVE_MIPSFPU || !HAVE_INLINE_ASM
 void ff_acelp_interpolatef(float *out, const float *in,
                            const float *filter_coeffs, int precision,
                            int frac_pos, int filter_length, int length)
@@ -92,6 +93,7 @@ void ff_acelp_interpolatef(float *out, const float *in,
         out[n] = v;
     }
 }
+#endif /* !HAVE_MIPSFPU || !HAVE_INLINE_ASM */
 
 
 void ff_acelp_high_pass_filter(int16_t* out, int hpf_f[2],
@@ -114,6 +116,7 @@ void ff_acelp_high_pass_filter(int16_t* out, int hpf_f[2],
     }
 }
 
+#if !HAVE_MIPSFPU || !HAVE_INLINE_ASM
 void ff_acelp_apply_order_2_transfer_function(float *out, const float *in,
                                               const float zero_coeffs[2],
                                               const float pole_coeffs[2],
@@ -130,6 +133,7 @@ void ff_acelp_apply_order_2_transfer_function(float *out, const float *in,
         mem[0] = tmp;
     }
 }
+#endif /* !HAVE_MIPSFPU || !HAVE_INLINE_ASM */
 
 void ff_tilt_compensation(float *mem, float tilt, float *samples, int size)
 {
diff --git a/libavcodec/acelp_vectors.c b/libavcodec/acelp_vectors.c
index 6a544a9..fbe06f7 100644
--- a/libavcodec/acelp_vectors.c
+++ b/libavcodec/acelp_vectors.c
@@ -187,6 +187,7 @@ void ff_acelp_weighted_vector_sum(
                  rounder) >> shift);
 }
 
+#if !HAVE_MIPSFPU || !HAVE_INLINE_ASM
 void ff_weighted_vector_sumf(float *out, const float *in_a, const float *in_b,
                              float weight_coeff_a, float weight_coeff_b, int length)
 {
@@ -196,6 +197,7 @@ void ff_weighted_vector_sumf(float *out, const float *in_a, const float *in_b,
         out[i] = weight_coeff_a * in_a[i]
                + weight_coeff_b * in_b[i];
 }
+#endif /* !HAVE_MIPSFPU || !HAVE_INLINE_ASM */
 
 void ff_adaptive_gain_control(float *out, const float *in, float speech_energ,
                               int size, float alpha, float *gain_mem)
diff --git a/libavcodec/acelp_vectors.h b/libavcodec/acelp_vectors.h
index f3bc781..fc4e94f 100644
--- a/libavcodec/acelp_vectors.h
+++ b/libavcodec/acelp_vectors.h
@@ -25,6 +25,10 @@
 
 #include <stdint.h>
 
+#if HAVE_MIPSFPU && HAVE_INLINE_ASM
+#include "libavcodec/mips/acelp_vectors_mips.h"
+#endif /* HAVE_MIPSFPU && HAVE_INLINE_ASM */
+
 /** Sparse representation for the algebraic codebook (fixed) vector */
 typedef struct {
     int      n;
@@ -207,9 +211,11 @@ void ff_acelp_weighted_vector_sum(int16_t* out,
  *
  * @note It is safe to pass the same buffer for out and in_a or in_b.
  */
+#if !HAVE_MIPSFPU || !HAVE_INLINE_ASM
 void ff_weighted_vector_sumf(float *out, const float *in_a, const float *in_b,
                              float weight_coeff_a, float weight_coeff_b,
                              int length);
+#endif /* !HAVE_MIPSFPU || !HAVE_INLINE_ASM */
 
 /**
  * Adaptive gain control (as used in AMR postfiltering)
diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c
index 9b8b306..ecf1085 100644
--- a/libavcodec/amrwbdec.c
+++ b/libavcodec/amrwbdec.c
@@ -38,6 +38,9 @@
 #include "amr.h"
 
 #include "amrwbdata.h"
+#if HAVE_MIPSFPU && HAVE_INLINE_ASM
+#include "libavcodec/mips/amrwbdec_mips.h"
+#endif /* HAVE_MIPSFPU && HAVE_INLINE_ASM */
 
 typedef struct {
     AVFrame                              avframe; ///< AVFrame for decoded samples
@@ -1022,6 +1025,7 @@ static void hb_synthesis(AMRWBContext *ctx, int subframe, float *samples,
  *
  * @remark It is safe to pass the same array in in and out parameters
  */
+#if !HAVE_MIPSFPU || !HAVE_INLINE_ASM
 static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1],
                           float mem[HB_FIR_SIZE], const float *in)
 {
@@ -1039,6 +1043,7 @@ static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1],
 
     memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float));
 }
+#endif /* !HAVE_MIPSFPU || !HAVE_INLINE_ASM */
 
 /**
  * Update context state before the next subframe.
diff --git a/libavcodec/celp_filters.c b/libavcodec/celp_filters.c
index 04ede49..295f7a1 100644
--- a/libavcodec/celp_filters.c
+++ b/libavcodec/celp_filters.c
@@ -80,6 +80,7 @@ int ff_celp_lp_synthesis_filter(int16_t *out, const int16_t *filter_coeffs,
     return 0;
 }
 
+#if !HAVE_MIPSFPU || !HAVE_INLINE_ASM
 void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs,
                                   const float* in, int buffer_length,
                                   int filter_length)
@@ -205,3 +206,4 @@ void ff_celp_lp_zero_synthesis_filterf(float *out, const float *filter_coeffs,
             out[n] += filter_coeffs[i-1] * in[n-i];
     }
 }
+#endif /* !HAVE_MIPSFPU || !HAVE_INLINE_ASM */
diff --git a/libavcodec/celp_math.c b/libavcodec/celp_math.c
index d85277f..afb149b 100644
--- a/libavcodec/celp_math.c
+++ b/libavcodec/celp_math.c
@@ -207,7 +207,7 @@ int64_t ff_dot_product(const int16_t *a, const int16_t *b, int length)
 
     return sum;
 }
-
+#if !HAVE_MIPSFPU || !HAVE_INLINE_ASM
 float ff_dot_productf(const float* a, const float* b, int length)
 {
     float sum = 0;
@@ -218,3 +218,4 @@ float ff_dot_productf(const float* a, const float* b, int length)
 
     return sum;
 }
+#endif /* !HAVE_MIPSFPU || !HAVE_INLINE_ASM */
diff --git a/libavcodec/celp_math.h b/libavcodec/celp_math.h
index ec62a9e..16ec29d 100644
--- a/libavcodec/celp_math.h
+++ b/libavcodec/celp_math.h
@@ -81,6 +81,10 @@ int64_t ff_dot_product(const int16_t *a, const int16_t *b, int length);
  *
  * @return dot product = sum of elementwise products
  */
+#if !HAVE_MIPSFPU || !HAVE_INLINE_ASM
 float ff_dot_productf(const float* a, const float* b, int length);
+#else /* !HAVE_MIPSFPU || !HAVE_INLINE_ASM */
+#include "libavcodec/mips/celp_math_mips.h"
+#endif /* !HAVE_MIPSFPU || !HAVE_INLINE_ASM */
 
 #endif /* AVCODEC_CELP_MATH_H */
diff --git a/libavcodec/lsp.c b/libavcodec/lsp.c
index 7fda12e..a090b9e 100644
--- a/libavcodec/lsp.c
+++ b/libavcodec/lsp.c
@@ -117,6 +117,7 @@ void ff_acelp_lsp2lpc(int16_t* lp, const int16_t* lsp, int lp_half_order)
     }
 }
 
+#if !HAVE_MIPSFPU || !HAVE_INLINE_ASM
 void ff_amrwb_lsp2lpc(const double *lsp, float *lp, int lp_order)
 {
     int lp_half_order = lp_order >> 1;
@@ -142,6 +143,9 @@ void ff_amrwb_lsp2lpc(const double *lsp, float *lp, int lp_order)
 
     lp[lp_order - 1] = lsp[lp_order - 1];
 }
+#else /* !HAVE_MIPSFPU || !HAVE_INLINE_ASM */
+#include "libavcodec/mips/amrwb_lsp2lpc.h"
+#endif /* !HAVE_MIPSFPU || !HAVE_INLINE_ASM */
 
 void ff_acelp_lp_decode(int16_t* lp_1st, int16_t* lp_2nd, const int16_t* lsp_2nd, const int16_t* lsp_prev, int lp_order)
 {
@@ -162,6 +166,7 @@ void ff_acelp_lp_decode(int16_t* lp_1st, int16_t* lp_2nd, const int16_t* lsp_2nd
     ff_acelp_lsp2lpc(lp_2nd, lsp_2nd, lp_order >> 1);
 }
 
+#if !HAVE_MIPSFPU || !HAVE_INLINE_ASM
 void ff_lsp2polyf(const double *lsp, double *f, int lp_half_order)
 {
     int i, j;
@@ -178,6 +183,7 @@ void ff_lsp2polyf(const double *lsp, double *f, int lp_half_order)
         f[1] += val;
     }
 }
+#endif /* !HAVE_MIPSFPU || !HAVE_INLINE_ASM */
 
 void ff_acelp_lspd2lpc(const double *lsp, float *lpc, int lp_half_order)
 {
diff --git a/libavcodec/lsp.h b/libavcodec/lsp.h
index 46a2d47..ec20074 100644
--- a/libavcodec/lsp.h
+++ b/libavcodec/lsp.h
@@ -25,6 +25,10 @@
 
 #include <stdint.h>
 
+#if HAVE_MIPSFPU && HAVE_INLINE_ASM
+#include "libavcodec/mips/lsp_mips.h"
+#endif /* HAVE_MIPSFPU && HAVE_INLINE_ASM */
+
 /**
   (I.F) means fixed-point value with F fractional and I integer bits
 */
@@ -125,6 +129,8 @@ void ff_sort_nearly_sorted_floats(float *vals, int len);
  *
  * TIA/EIA/IS-733 2.4.3.3.5-1/2
  */
+#if !HAVE_MIPSFPU || !HAVE_INLINE_ASM
 void ff_lsp2polyf(const double *lsp, double *f, int lp_half_order);
 
+#endif /* !HAVE_MIPSFPU || !HAVE_INLINE_ASM */
 #endif /* AVCODEC_LSP_H */
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 37899b1..c790577 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -1,3 +1,7 @@
 MMI-OBJS += mips/dsputil_mmi.o                                          \
             mips/idct_mmi.o                                             \
-            mips/mpegvideo_mmi.o                                        \
+            mips/mpegvideo_mmi.o
+OBJS-$(CONFIG_AMRNB_DECODER)           += mips/acelp_filters_mips.o     \
+                                          mips/celp_filters_mips.o
+OBJS-$(CONFIG_AMRWB_DECODER)           += mips/acelp_filters_mips.o     \
+                                          mips/celp_filters_mips.o
diff --git a/libavcodec/mips/acelp_filters_mips.c b/libavcodec/mips/acelp_filters_mips.c
new file mode 100644
index 0000000..d219898
--- /dev/null
+++ b/libavcodec/mips/acelp_filters_mips.c
@@ -0,0 +1,211 @@
+ /*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nedeljko Babic (nbabic at mips.com)
+ *
+ * various filters for ACELP-based codecs optimized for MIPS
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/acelp_filters.c
+ */
+#include "config.h"
+
+#if HAVE_MIPSFPU && HAVE_INLINE_ASM
+
+#include "libavutil/attributes.h"
+#include "libavcodec/acelp_filters.h"
+
+av_always_inline void ff_acelp_interpolatef(float *out, const float *in,
+                           const float *filter_coeffs, int precision,
+                           int frac_pos, int filter_length, int length)
+{
+    int n, i;
+    int prec = precision * 4;
+    int fc_offset = precision - frac_pos;
+    float in_val_p, in_val_m, fc_val_p, fc_val_m;
+
+    for (n = 0; n < length; n++) {
+        /**
+        * four pointers are defined in order to minimize number of
+        * computations done in inner loop
+        */
+        const float *p_in_p = &in[n];
+        const float *p_in_m = &in[n-1];
+        const float *p_filter_coeffs_p = &filter_coeffs[frac_pos];
+        const float *p_filter_coeffs_m = filter_coeffs + fc_offset;
+        float v = 0;
+
+        for (i = 0; i < filter_length;i++) {
+            __asm__ __volatile__ (
+                "lwc1   %[in_val_p],           0(%[p_in_p])                    \n\t"
+                "lwc1   %[fc_val_p],           0(%[p_filter_coeffs_p])         \n\t"
+                "lwc1   %[in_val_m],           0(%[p_in_m])                    \n\t"
+                "lwc1   %[fc_val_m],           0(%[p_filter_coeffs_m])         \n\t"
+                "addiu  %[p_in_p],             %[p_in_p],              4       \n\t"
+                "madd.s %[v],%[v],             %[in_val_p],%[fc_val_p]         \n\t"
+                "addiu  %[p_in_m],             %[p_in_m],              -4      \n\t"
+                "addu   %[p_filter_coeffs_p],  %[p_filter_coeffs_p],   %[prec] \n\t"
+                "addu   %[p_filter_coeffs_m],  %[p_filter_coeffs_m],   %[prec] \n\t"
+                "madd.s %[v],%[v],%[in_val_m], %[fc_val_m]                     \n\t"
+
+                : [v] "=&f" (v),[p_in_p] "+r" (p_in_p), [p_in_m] "+r" (p_in_m),
+                  [p_filter_coeffs_p] "+r" (p_filter_coeffs_p),
+                  [in_val_p] "=&f" (in_val_p), [in_val_m] "=&f" (in_val_m),
+                  [fc_val_p] "=&f" (fc_val_p), [fc_val_m] "=&f" (fc_val_m),
+                  [p_filter_coeffs_m] "+r" (p_filter_coeffs_m)
+                : [prec] "r" (prec)
+            );
+        }
+        out[n] = v;
+    }
+}
+
+av_always_inline void ff_acelp_apply_order_2_transfer_function(float *out, const float *in,
+                                              const float zero_coeffs[2],
+                                              const float pole_coeffs[2],
+                                              float gain, float mem[2], int n)
+{
+    /**
+    * loop is unrolled eight times
+    */
+
+    __asm__ __volatile__ (
+        "lwc1   $f0,    0(%[mem])                                              \n\t"
+        "blez   %[n],   ff_acelp_apply_order_2_transfer_function_end%=         \n\t"
+        "lwc1   $f1,    4(%[mem])                                              \n\t"
+        "lwc1   $f2,    0(%[pole_coeffs])                                      \n\t"
+        "lwc1   $f3,    4(%[pole_coeffs])                                      \n\t"
+        "lwc1   $f4,    0(%[zero_coeffs])                                      \n\t"
+        "lwc1   $f5,    4(%[zero_coeffs])                                      \n\t"
+
+        "ff_acelp_apply_order_2_transfer_function_madd%=:                      \n\t"
+
+        "lwc1   $f6,    0(%[in])                                               \n\t"
+        "mul.s  $f9,    $f3,      $f1                                          \n\t"
+        "mul.s  $f7,    $f2,      $f0                                          \n\t"
+        "msub.s $f7,    $f7,      %[gain], $f6                                 \n\t"
+        "sub.s  $f7,    $f7,      $f9                                          \n\t"
+        "madd.s $f8,    $f7,      $f4,     $f0                                 \n\t"
+        "madd.s $f8,    $f8,      $f5,     $f1                                 \n\t"
+        "lwc1   $f11,   4(%[in])                                               \n\t"
+        "mul.s  $f12,   $f3,      $f0                                          \n\t"
+        "mul.s  $f13,   $f2,      $f7                                          \n\t"
+        "msub.s $f13,   $f13,     %[gain], $f11                                \n\t"
+        "sub.s  $f13,   $f13,     $f12                                         \n\t"
+        "madd.s $f14,   $f13,     $f4,     $f7                                 \n\t"
+        "madd.s $f14,   $f14,     $f5,     $f0                                 \n\t"
+        "swc1   $f8,    0(%[out])                                              \n\t"
+        "lwc1   $f6,    8(%[in])                                               \n\t"
+        "mul.s  $f9,    $f3,      $f7                                          \n\t"
+        "mul.s  $f15,   $f2,      $f13                                         \n\t"
+        "msub.s $f15,   $f15,     %[gain], $f6                                 \n\t"
+        "sub.s  $f15,   $f15,     $f9                                          \n\t"
+        "madd.s $f8,    $f15,     $f4,     $f13                                \n\t"
+        "madd.s $f8,    $f8,      $f5,     $f7                                 \n\t"
+        "swc1   $f14,   4(%[out])                                              \n\t"
+        "lwc1   $f11,   12(%[in])                                              \n\t"
+        "mul.s  $f12,   $f3,      $f13                                         \n\t"
+        "mul.s  $f16,   $f2,      $f15                                         \n\t"
+        "msub.s $f16,   $f16,     %[gain], $f11                                \n\t"
+        "sub.s  $f16,   $f16,     $f12                                         \n\t"
+        "madd.s $f14,   $f16,     $f4,     $f15                                \n\t"
+        "madd.s $f14,   $f14,     $f5,     $f13                                \n\t"
+        "swc1   $f8,    8(%[out])                                              \n\t"
+        "lwc1   $f6,    16(%[in])                                              \n\t"
+        "mul.s  $f9,    $f3,      $f15                                         \n\t"
+        "mul.s  $f7,    $f2,      $f16                                         \n\t"
+        "msub.s $f7,    $f7,      %[gain], $f6                                 \n\t"
+        "sub.s  $f7,    $f7,      $f9                                          \n\t"
+        "madd.s $f8,    $f7,      $f4,     $f16                                \n\t"
+        "madd.s $f8,    $f8,      $f5,     $f15                                \n\t"
+        "swc1   $f14,   12(%[out])                                             \n\t"
+        "lwc1   $f11,   20(%[in])                                              \n\t"
+        "mul.s  $f12,   $f3,      $f16                                         \n\t"
+        "mul.s  $f13,   $f2,      $f7                                          \n\t"
+        "msub.s $f13,   $f13,     %[gain], $f11                                \n\t"
+        "sub.s  $f13,   $f13,     $f12                                         \n\t"
+        "madd.s $f14,   $f13,     $f4,     $f7                                 \n\t"
+        "madd.s $f14,   $f14,     $f5,     $f16                                \n\t"
+        "swc1   $f8,    16(%[out])                                             \n\t"
+        "lwc1   $f6,    24(%[in])                                              \n\t"
+        "mul.s  $f9,    $f3,      $f7                                          \n\t"
+        "mul.s  $f15,   $f2,      $f13                                         \n\t"
+        "msub.s $f15,   $f15,     %[gain], $f6                                 \n\t"
+        "sub.s  $f15,   $f15,     $f9                                          \n\t"
+        "madd.s $f8,    $f15,     $f4,     $f13                                \n\t"
+        "madd.s $f8,    $f8,      $f5,     $f7                                 \n\t"
+        "swc1   $f14,   20(%[out])                                             \n\t"
+        "lwc1   $f11,   28(%[in])                                              \n\t"
+        "mul.s  $f12,   $f3,      $f13                                         \n\t"
+        "mul.s  $f16,   $f2,      $f15                                         \n\t"
+        "msub.s $f16,   $f16,     %[gain], $f11                                \n\t"
+        "sub.s  $f16,   $f16,     $f12                                         \n\t"
+        "madd.s $f14,   $f16,     $f4,     $f15                                \n\t"
+        "madd.s $f14,   $f14,     $f5,     $f13                                \n\t"
+        "swc1   $f8,    24(%[out])                                             \n\t"
+        "addiu  %[out], 32                                                     \n\t"
+        "addiu  %[in],  32                                                     \n\t"
+        "addiu  %[n],   -8                                                     \n\t"
+        "swc1   $f15,   4(%[mem])                                              \n\t"
+        "mov.s  $f1,    $f15                                                   \n\t"
+        "mov.s  $f0,    $f16                                                   \n\t"
+        "swc1   $f16,   0(%[mem])                                              \n\t"
+        "swc1   $f14,   -4(%[out])                                             \n\t"
+        "bnez   %[n],   ff_acelp_apply_order_2_transfer_function_madd%=        \n\t"
+
+        "ff_acelp_apply_order_2_transfer_function_end%=:                       \n\t"
+
+         : [out] "+r" (out),
+           [in] "+r" (in), [gain] "+f" (gain),
+           [n] "+r" (n), [mem] "+r" (mem)
+         : [zero_coeffs] "r" (zero_coeffs),
+           [pole_coeffs] "r" (pole_coeffs)
+         : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5",
+           "$f6", "$f7",  "$f8", "$f9", "$f10", "$f11",
+           "$f12", "$f13", "$f14", "$f15", "$f16"
+    );
+}
+#endif /* HAVE_MIPSFPU && HAVE_INLINE_ASM */
diff --git a/libavcodec/mips/acelp_vectors_mips.h b/libavcodec/mips/acelp_vectors_mips.h
new file mode 100644
index 0000000..b47a695
--- /dev/null
+++ b/libavcodec/mips/acelp_vectors_mips.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nedeljko Babic (nbabic at mips.com)
+ *
+ * adaptive and fixed codebook vector operations for ACELP-based codecs
+ * optimized for MIPS
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/acelp_vectors.c
+ */
+
+#ifndef AVCODEC_ACELP_VECTORS_MIPS_H
+#define AVCODEC_ACELP_VECTORS_MIPS_H
+static av_always_inline void ff_weighted_vector_sumf(
+                             float *out, const float *in_a, const float *in_b,
+                             float weight_coeff_a, float weight_coeff_b, int length)
+{
+    /* loop unrolled two times */
+    __asm__ __volatile__ (
+        "blez   %[length], ff_weighted_vector_sumf_end%=                \n\t"
+
+        "ff_weighted_vector_sumf_madd%=:                                \n\t"
+        "lwc1   $f0,       0(%[in_a])                                   \n\t"
+        "lwc1   $f3,       4(%[in_a])                                   \n\t"
+        "lwc1   $f1,       0(%[in_b])                                   \n\t"
+        "lwc1   $f4,       4(%[in_b])                                   \n\t"
+        "mul.s  $f2,       %[weight_coeff_a], $f0                       \n\t"
+        "mul.s  $f5,       %[weight_coeff_a], $f3                       \n\t"
+        "madd.s $f2,       $f2,               %[weight_coeff_b], $f1    \n\t"
+        "madd.s $f5,       $f5,               %[weight_coeff_b], $f4    \n\t"
+        "addiu  %[length], -2                                           \n\t"
+        "addiu  %[in_a],   8                                            \n\t"
+        "addiu  %[in_b],   8                                            \n\t"
+        "swc1   $f2,       0(%[out])                                    \n\t"
+        "swc1   $f5,       4(%[out])                                    \n\t"
+        "addiu  %[out],    8                                            \n\t"
+        "bnez   %[length], ff_weighted_vector_sumf_madd%=               \n\t"
+
+        "ff_weighted_vector_sumf_end%=:                                 \n\t"
+
+        : [out] "+r" (out),
+          [in_a] "+r" (in_a),   [in_b] "+r" (in_b),
+          [length] "+r" (length)
+        : [weight_coeff_a] "f" (weight_coeff_a),
+          [weight_coeff_b] "f" (weight_coeff_b)
+        : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5"
+    );
+}
+#endif /* AVCODEC_ACELP_VECTORS_MIPS_H */
diff --git a/libavcodec/mips/amrwb_lsp2lpc.h b/libavcodec/mips/amrwb_lsp2lpc.h
new file mode 100644
index 0000000..e0303bc
--- /dev/null
+++ b/libavcodec/mips/amrwb_lsp2lpc.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nedeljko Babic (nbabic at mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/lsp.c
+ */
+
+#ifndef AVCODEC_AMRWB_LSP2LPC_H
+#define AVCODEC_AMRWB_LSP2LPC_H
+av_always_inline void ff_amrwb_lsp2lpc(const double *lsp, float *lp, int lp_order)
+{
+    int lp_half_order = lp_order >> 1;
+    double buf[lp_half_order + 1];
+    double pa[lp_half_order + 1];
+    double *qa = buf + 1;
+    double lsp_lp_o = lsp[lp_order - 1];
+    int i,j;
+    double paf, qaf;
+
+    qa[-1] = 0.0;
+
+    ff_lsp2polyf(lsp    , pa, lp_half_order    );
+    ff_lsp2polyf(lsp + 1, qa, lp_half_order - 1);
+
+    for (i = 1, j = lp_order - 1; i < lp_half_order; i++, j--) {
+        paf =  pa[i];
+        qaf = (qa[i] - qa[i-2]) * (1 - lsp_lp_o);
+
+        __asm__ __volatile__ (
+            "madd.d %[paf], %[paf], %[paf], %[lsp_lp_o] \n\t"
+
+            : [paf]"+f"(paf)
+            : [lsp_lp_o]"f"(lsp_lp_o)
+        );
+        lp[i-1]  = (paf + qaf) * 0.5;
+        lp[j-1]  = (paf - qaf) * 0.5;
+    }
+
+    paf = pa[lp_half_order] * 0.5;
+
+    __asm__ __volatile__ (
+        "madd.d %[paf], %[paf], %[paf], %[lsp_lp_o]     \n\t"
+
+        : [paf]"+f"(paf)
+        : [lsp_lp_o]"f"(lsp_lp_o)
+    );
+
+    lp[lp_half_order - 1] = paf;
+
+    lp[lp_order - 1] = lsp_lp_o;
+}
+#endif /* AVCODEC_AMRWB_LSP2LPC_H */
\ No newline at end of file
diff --git a/libavcodec/mips/amrwbdec_mips.h b/libavcodec/mips/amrwbdec_mips.h
new file mode 100644
index 0000000..e715df1
--- /dev/null
+++ b/libavcodec/mips/amrwbdec_mips.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nedeljko Babic (nbabic at mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/amrwbdec.c
+ */
+#ifndef AVCODEC_AMRWBDEC_MIPS_H
+#define AVCODEC_AMRWBDEC_MIPS_H
+
+static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1],
+                          float mem[HB_FIR_SIZE], const float *in)
+{
+    int i;
+    float data[AMRWB_SFR_SIZE_16k + HB_FIR_SIZE]; // past and current samples
+
+    memcpy(data, mem, HB_FIR_SIZE * sizeof(float));
+    memcpy(data + HB_FIR_SIZE, in, AMRWB_SFR_SIZE_16k * sizeof(float));
+
+    for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) {
+        float output;
+        float * p_data = (data+i);
+
+        /**
+        * inner loop is entirely unrolled and instructions are scheduled
+        * to minimize pipeline stall
+        */
+        __asm__ __volatile__(
+            "mtc1       $zero,     %[output]                      \n\t"
+            "lwc1       $f0,       0(%[p_data])                   \n\t"
+            "lwc1       $f1,       0(%[fir_coef])                 \n\t"
+            "lwc1       $f2,       4(%[p_data])                   \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+            "lwc1       $f3,       4(%[fir_coef])                 \n\t"
+            "lwc1       $f4,       8(%[p_data])                   \n\t"
+            "madd.s     %[output], %[output],       $f2, $f3      \n\t"
+            "lwc1       $f5,       8(%[fir_coef])                 \n\t"
+
+            "lwc1       $f0,       12(%[p_data])                  \n\t"
+            "lwc1       $f1,       12(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f4, $f5      \n\t"
+            "lwc1       $f2,       16(%[p_data])                  \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+            "lwc1       $f3,       16(%[fir_coef])                \n\t"
+            "lwc1       $f4,       20(%[p_data])                  \n\t"
+            "lwc1       $f5,       20(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f2, $f3      \n\t"
+
+            "lwc1       $f0,       24(%[p_data])                  \n\t"
+            "lwc1       $f1,       24(%[fir_coef])                \n\t"
+            "lwc1       $f2,       28(%[p_data])                  \n\t"
+            "madd.s     %[output], %[output],       $f4, $f5      \n\t"
+            "lwc1       $f3,       28(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+            "lwc1       $f4,       32(%[p_data])                  \n\t"
+            "madd.s     %[output], %[output],       $f2, $f3      \n\t"
+            "lwc1       $f5,       32(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f4, $f5      \n\t"
+
+            "lwc1       $f0,       36(%[p_data])                  \n\t"
+            "lwc1       $f1,       36(%[fir_coef])                \n\t"
+            "lwc1       $f2,       40(%[p_data])                  \n\t"
+            "lwc1       $f3,       40(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+            "lwc1       $f4,       44(%[p_data])                  \n\t"
+            "lwc1       $f5,       44(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f2, $f3      \n\t"
+
+            "lwc1       $f0,       48(%[p_data])                  \n\t"
+            "lwc1       $f1,       48(%[fir_coef])                \n\t"
+            "lwc1       $f2,       52(%[p_data])                  \n\t"
+            "madd.s     %[output], %[output],       $f4, $f5      \n\t"
+            "lwc1       $f3,       52(%[fir_coef])                \n\t"
+            "lwc1       $f4,       56(%[p_data])                  \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+            "lwc1       $f5,       56(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f2, $f3      \n\t"
+
+            "lwc1       $f0,       60(%[p_data])                  \n\t"
+            "lwc1       $f1,       60(%[fir_coef])                \n\t"
+            "lwc1       $f2,       64(%[p_data])                  \n\t"
+            "madd.s     %[output], %[output],       $f4, $f5      \n\t"
+            "lwc1       $f3,       64(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+            "lwc1       $f4,       68(%[p_data])                  \n\t"
+            "madd.s     %[output], %[output],       $f2, $f3      \n\t"
+            "lwc1       $f5,       68(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f4, $f5      \n\t"
+
+            "lwc1       $f0,       72(%[p_data])                  \n\t"
+            "lwc1       $f1,       72(%[fir_coef])                \n\t"
+            "lwc1       $f2,       76(%[p_data])                  \n\t"
+            "lwc1       $f3,       76(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+            "lwc1       $f4,       80(%[p_data])                  \n\t"
+            "lwc1       $f5,       80(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f2, $f3      \n\t"
+
+            "lwc1       $f0,       84(%[p_data])                  \n\t"
+            "lwc1       $f1,       84(%[fir_coef])                \n\t"
+            "lwc1       $f2,       88(%[p_data])                  \n\t"
+            "madd.s     %[output], %[output],       $f4, $f5      \n\t"
+            "lwc1       $f3,       88(%[fir_coef])                \n\t"
+            "lwc1       $f4,       92(%[p_data])                  \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+            "lwc1       $f5,       92(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f2, $f3      \n\t"
+
+            "lwc1       $f0,       96(%[p_data])                  \n\t"
+            "lwc1       $f1,       96(%[fir_coef])                \n\t"
+            "lwc1       $f2,       100(%[p_data])                 \n\t"
+            "madd.s     %[output], %[output],       $f4, $f5      \n\t"
+            "lwc1       $f3,       100(%[fir_coef])               \n\t"
+            "lwc1       $f4,       104(%[p_data])                 \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+            "lwc1       $f5,       104(%[fir_coef])               \n\t"
+            "madd.s     %[output], %[output],       $f2, $f3      \n\t"
+
+            "lwc1       $f0,       108(%[p_data])                 \n\t"
+            "lwc1       $f1,       108(%[fir_coef])               \n\t"
+            "madd.s     %[output], %[output],       $f4, $f5      \n\t"
+            "lwc1       $f2,       112(%[p_data])                 \n\t"
+            "lwc1       $f3,       112(%[fir_coef])               \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+            "lwc1       $f4,       116(%[p_data])                 \n\t"
+            "lwc1       $f5,       116(%[fir_coef])               \n\t"
+            "lwc1       $f0,       120(%[p_data])                 \n\t"
+            "madd.s     %[output], %[output],       $f2, $f3      \n\t"
+            "lwc1       $f1,       120(%[fir_coef])               \n\t"
+            "madd.s     %[output], %[output],       $f4, $f5      \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+
+            : [output]"+f"(output)
+            : [fir_coef]"r"(fir_coef), [p_data]"r"(p_data)
+            : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5"
+        );
+        out[i] = output;
+    }
+    memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float));
+}
+
+#endif /* AVCODEC_AMRWBDEC_MIPS_H  */
diff --git a/libavcodec/mips/celp_filters_mips.c b/libavcodec/mips/celp_filters_mips.c
new file mode 100644
index 0000000..a01bfde
--- /dev/null
+++ b/libavcodec/mips/celp_filters_mips.c
@@ -0,0 +1,289 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nedeljko Babic (nbabic at mips.com)
+ *
+ * various filters for CELP-based codecs optimized for MIPS
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/celp_filters.c
+ */
+#include "config.h"
+
+#if HAVE_MIPSFPU && HAVE_INLINE_ASM
+
+#include "libavutil/attributes.h"
+#include "libavutil/common.h"
+#include "libavcodec/celp_filters.h"
+
+av_always_inline void ff_celp_lp_synthesis_filterf(float *out,
+                                  const float *filter_coeffs,
+                                  const float* in, int buffer_length,
+                                  int filter_length)
+{
+    int i,n;
+
+    float out0, out1, out2, out3;
+    float old_out0, old_out1, old_out2, old_out3;
+    float a,b,c;
+    const float *p_filter_coeffs;
+    float *p_out;
+
+    a = filter_coeffs[0];
+    b = filter_coeffs[1];
+    c = filter_coeffs[2];
+    b -= filter_coeffs[0] * filter_coeffs[0];
+    c -= filter_coeffs[1] * filter_coeffs[0];
+    c -= filter_coeffs[0] * b;
+
+    old_out0 = out[-4];
+    old_out1 = out[-3];
+    old_out2 = out[-2];
+    old_out3 = out[-1];
+    for (n = 0; n <= buffer_length - 4; n+=4) {
+        p_filter_coeffs = filter_coeffs;
+        p_out = out;
+
+        out0 = in[0];
+        out1 = in[1];
+        out2 = in[2];
+        out3 = in[3];
+
+        __asm__ __volatile__(
+            "lwc1       $f2,     8(%[filter_coeffs])                        \n\t"
+            "lwc1       $f1,     4(%[filter_coeffs])                        \n\t"
+            "lwc1       $f0,     0(%[filter_coeffs])                        \n\t"
+            "nmsub.s    %[out0], %[out0],             $f2, %[old_out1]      \n\t"
+            "nmsub.s    %[out1], %[out1],             $f2, %[old_out2]      \n\t"
+            "nmsub.s    %[out2], %[out2],             $f2, %[old_out3]      \n\t"
+            "lwc1       $f3,     12(%[filter_coeffs])                       \n\t"
+            "nmsub.s    %[out0], %[out0],             $f1, %[old_out2]      \n\t"
+            "nmsub.s    %[out1], %[out1],             $f1, %[old_out3]      \n\t"
+            "nmsub.s    %[out2], %[out2],             $f3, %[old_out2]      \n\t"
+            "nmsub.s    %[out0], %[out0],             $f0, %[old_out3]      \n\t"
+            "nmsub.s    %[out3], %[out3],             $f3, %[old_out3]      \n\t"
+            "nmsub.s    %[out1], %[out1],             $f3, %[old_out1]      \n\t"
+            "nmsub.s    %[out0], %[out0],             $f3, %[old_out0]      \n\t"
+
+            : [out0]"+f"(out0), [out1]"+f"(out1),
+              [out2]"+f"(out2), [out3]"+f"(out3)
+            : [old_out0]"f"(old_out0), [old_out1]"f"(old_out1),
+              [old_out2]"f"(old_out2), [old_out3]"f"(old_out3),
+              [filter_coeffs]"r"(filter_coeffs)
+            : "$f0", "$f1", "$f2", "$f3", "$f4"
+        );
+
+        old_out3 = out[-5];
+
+        for (i = 5; i <= filter_length; i += 2) {
+            __asm__ __volatile__(
+                "lwc1    $f5,         16(%[p_filter_coeffs])                \n\t"
+                "addiu   %[p_out],    -8                                    \n\t"
+                "addiu   %[p_filter_coeffs], 8                              \n\t"
+                "nmsub.s %[out1],     %[out1],      $f5, %[old_out0]        \n\t"
+                "nmsub.s %[out3],     %[out3],      $f5, %[old_out2]        \n\t"
+                "lwc1    $f4,         12(%[p_filter_coeffs])                \n\t"
+                "lwc1    %[old_out2], -16(%[p_out])                         \n\t"
+                "nmsub.s %[out0],     %[out0],      $f5, %[old_out3]        \n\t"
+                "nmsub.s %[out2],     %[out2],      $f5, %[old_out1]        \n\t"
+                "nmsub.s %[out1],     %[out1],      $f4, %[old_out3]        \n\t"
+                "nmsub.s %[out3],     %[out3],      $f4, %[old_out1]        \n\t"
+                "mov.s   %[old_out1], %[old_out3]                           \n\t"
+                "lwc1    %[old_out3], -20(%[p_out])                         \n\t"
+                "nmsub.s %[out0],     %[out0],      $f4, %[old_out2]        \n\t"
+                "nmsub.s %[out2],     %[out2],      $f4, %[old_out0]        \n\t"
+
+                : [out0]"+f"(out0), [out1]"+f"(out1),
+                  [out2]"+f"(out2), [out3]"+f"(out3), [old_out0]"+f"(old_out0),
+                  [old_out1]"+f"(old_out1), [old_out2]"+f"(old_out2),
+                  [old_out3]"+f"(old_out3),[p_filter_coeffs]"+r"(p_filter_coeffs),
+                  [p_out]"+r"(p_out)
+                :
+                : "$f4", "$f5"
+            );
+            FFSWAP(float, old_out0, old_out2);
+        }
+
+        __asm__ __volatile__(
+            "nmsub.s    %[out3], %[out3], %[a], %[out2]                     \n\t"
+            "nmsub.s    %[out2], %[out2], %[a], %[out1]                     \n\t"
+            "nmsub.s    %[out3], %[out3], %[b], %[out1]                     \n\t"
+            "nmsub.s    %[out1], %[out1], %[a], %[out0]                     \n\t"
+            "nmsub.s    %[out2], %[out2], %[b], %[out0]                     \n\t"
+            "nmsub.s    %[out3], %[out3], %[c], %[out0]                     \n\t"
+
+            : [out0]"+f"(out0), [out1]"+f"(out1),
+              [out2]"+f"(out2), [out3]"+f"(out3)
+            : [a]"f"(a), [b]"f"(b), [c]"f"(c)
+        );
+
+        out[0] = out0;
+        out[1] = out1;
+        out[2] = out2;
+        out[3] = out3;
+
+        old_out0 = out0;
+        old_out1 = out1;
+        old_out2 = out2;
+        old_out3 = out3;
+
+        out += 4;
+        in  += 4;
+    }
+
+    out -= n;
+    in -= n;
+    for (; n < buffer_length; n++) {
+        float out_val, out_val_i, fc_val;
+        p_filter_coeffs = filter_coeffs;
+        p_out = &out[n];
+        out_val = in[n];
+        for (i = 1; i <= filter_length; i++) {
+            __asm__ __volatile__(
+                "lwc1    %[fc_val],          0(%[p_filter_coeffs])                        \n\t"
+                "lwc1    %[out_val_i],       -4(%[p_out])                                 \n\t"
+                "addiu   %[p_filter_coeffs], 4                                            \n\t"
+                "addiu   %[p_out],           -4                                           \n\t"
+                "nmsub.s %[out_val],         %[out_val],          %[fc_val], %[out_val_i] \n\t"
+
+                : [fc_val]"=&f"(fc_val), [out_val]"+f"(out_val),
+                  [out_val_i]"=&f"(out_val_i), [p_out]"+r"(p_out),
+                  [p_filter_coeffs]"+r"(p_filter_coeffs)
+            );
+        }
+        out[n] = out_val;
+    }
+}
+
+av_always_inline void ff_celp_lp_zero_synthesis_filterf(float *out,
+                                       const float *filter_coeffs,
+                                       const float *in, int buffer_length,
+                                       int filter_length)
+{
+    int i,n;
+    float sum_out8, sum_out7, sum_out6, sum_out5, sum_out4, fc_val;
+    float sum_out3, sum_out2, sum_out1;
+    const float *p_filter_coeffs, *p_in;
+
+    for (n = 0; n < buffer_length; n+=8) {
+        p_in = &in[n];
+        p_filter_coeffs = filter_coeffs;
+        sum_out8 = in[n+7];
+        sum_out7 = in[n+6];
+        sum_out6 = in[n+5];
+        sum_out5 = in[n+4];
+        sum_out4 = in[n+3];
+        sum_out3 = in[n+2];
+        sum_out2 = in[n+1];
+        sum_out1 = in[n];
+        i = filter_length;
+
+        /* i is always greater than 0
+        * inner loop is unrolled eight times so there is less memory access
+        */
+        __asm__ __volatile__(
+            "filt_lp_inner%=:                                               \n\t"
+            "lwc1   %[fc_val],   0(%[p_filter_coeffs])                      \n\t"
+            "lwc1   $f7,         6*4(%[p_in])                               \n\t"
+            "lwc1   $f6,         5*4(%[p_in])                               \n\t"
+            "lwc1   $f5,         4*4(%[p_in])                               \n\t"
+            "lwc1   $f4,         3*4(%[p_in])                               \n\t"
+            "lwc1   $f3,         2*4(%[p_in])                               \n\t"
+            "lwc1   $f2,         4(%[p_in])                                 \n\t"
+            "lwc1   $f1,         0(%[p_in])                                 \n\t"
+            "lwc1   $f0,         -4(%[p_in])                                \n\t"
+            "addiu  %[i],        -2                                         \n\t"
+            "madd.s %[sum_out8], %[sum_out8],          %[fc_val], $f7       \n\t"
+            "madd.s %[sum_out7], %[sum_out7],          %[fc_val], $f6       \n\t"
+            "madd.s %[sum_out6], %[sum_out6],          %[fc_val], $f5       \n\t"
+            "madd.s %[sum_out5], %[sum_out5],          %[fc_val], $f4       \n\t"
+            "madd.s %[sum_out4], %[sum_out4],          %[fc_val], $f3       \n\t"
+            "madd.s %[sum_out3], %[sum_out3],          %[fc_val], $f2       \n\t"
+            "madd.s %[sum_out2], %[sum_out2],          %[fc_val], $f1       \n\t"
+            "madd.s %[sum_out1], %[sum_out1],          %[fc_val], $f0       \n\t"
+            "lwc1   %[fc_val],   4(%[p_filter_coeffs])                      \n\t"
+            "mov.s  $f7,         $f6                                        \n\t"
+            "mov.s  $f6,         $f5                                        \n\t"
+            "mov.s  $f5,         $f4                                        \n\t"
+            "mov.s  $f4,         $f3                                        \n\t"
+            "mov.s  $f3,         $f2                                        \n\t"
+            "mov.s  $f2,         $f1                                        \n\t"
+            "mov.s  $f1,         $f0                                        \n\t"
+            "lwc1   $f0,         -8(%[p_in])                                \n\t"
+            "addiu  %[p_filter_coeffs], 8                                   \n\t"
+            "addiu  %[p_in],     -8                                         \n\t"
+            "madd.s %[sum_out8], %[sum_out8],          %[fc_val], $f7       \n\t"
+            "madd.s %[sum_out7], %[sum_out7],          %[fc_val], $f6       \n\t"
+            "madd.s %[sum_out6], %[sum_out6],          %[fc_val], $f5       \n\t"
+            "madd.s %[sum_out5], %[sum_out5],          %[fc_val], $f4       \n\t"
+            "madd.s %[sum_out4], %[sum_out4],          %[fc_val], $f3       \n\t"
+            "madd.s %[sum_out3], %[sum_out3],          %[fc_val], $f2       \n\t"
+            "madd.s %[sum_out2], %[sum_out2],          %[fc_val], $f1       \n\t"
+            "madd.s %[sum_out1], %[sum_out1],          %[fc_val], $f0       \n\t"
+            "bgtz   %[i],        filt_lp_inner%=                            \n\t"
+
+            : [sum_out8]"+f"(sum_out8), [sum_out7]"+f"(sum_out7),
+              [sum_out6]"+f"(sum_out6), [sum_out5]"+f"(sum_out5),
+              [sum_out4]"+f"(sum_out4), [sum_out3]"+f"(sum_out3),
+              [sum_out2]"+f"(sum_out2), [sum_out1]"+f"(sum_out1),
+              [fc_val]"=&f"(fc_val), [p_filter_coeffs]"+r"(p_filter_coeffs),
+              [p_in]"+r"(p_in), [i]"+r"(i)
+            :
+            : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7"
+        );
+
+        out[n+7] = sum_out8;
+        out[n+6] = sum_out7;
+        out[n+5] = sum_out6;
+        out[n+4] = sum_out5;
+        out[n+3] = sum_out4;
+        out[n+2] = sum_out3;
+        out[n+1] = sum_out2;
+        out[n] = sum_out1;
+    }
+}
+
+#endif /* HAVE_MIPSFPU && HAVE_INLINE_ASM */
diff --git a/libavcodec/mips/celp_math_mips.h b/libavcodec/mips/celp_math_mips.h
new file mode 100644
index 0000000..c3c373f
--- /dev/null
+++ b/libavcodec/mips/celp_math_mips.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nedeljko Babic (nbabic at mips.com)
+ *
+ * Math operations optimized for MIPS
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/celp_math.c
+ */
+
+#ifndef AVCODEC_CELP_MATH_MIPS_H
+#define AVCODEC_CELP_MATH_MIPS_H
+static av_always_inline float ff_dot_productf(const float* a, const float* b,
+                                              int length)
+{
+    float sum;
+
+    __asm__ __volatile__ (
+        "mtc1   $zero,      %[sum]                      \n\t"
+        "blez   %[length],  ff_dot_productf_end%=       \n\t"
+        "ff_dot_productf_madd%=:                        \n\t"
+        "lwc1   $f2,        0(%[a])                     \n\t"
+        "lwc1   $f1,        0(%[b])                     \n\t"
+        "addiu  %[length],  -1                          \n\t"
+        "addiu  %[a],       %[a],   4                   \n\t"
+        "addiu  %[b],       %[b],   4                   \n\t"
+        "madd.s %[sum],     %[sum], $f1, $f2            \n\t"
+        "bnez   %[length],  ff_dot_productf_madd%=      \n\t"
+        "ff_dot_productf_end%=:                         \n\t"
+
+        : [sum] "=&f" (sum), [a] "+r" (a), [b] "+r" (b),
+          [length] "+r" (length)
+        :
+        : "$f1", "$f2"
+    );
+    return sum;
+}
+#endif /* AVCODEC_CELP_MATH_MIPS_H */
diff --git a/libavcodec/mips/lsp_mips.h b/libavcodec/mips/lsp_mips.h
new file mode 100644
index 0000000..c91ccee
--- /dev/null
+++ b/libavcodec/mips/lsp_mips.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nedeljko Babic (nbabic at mips.com)
+ *
+ * LSP routines for ACELP-based codecs optimized for MIPS
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/lsp.c
+ */
+#ifndef AVCODEC_LSP_MIPS_H
+#define AVCODEC_LSP_MIPS_H
+
+static av_always_inline void ff_lsp2polyf(const double *lsp, double *f, int lp_half_order)
+{
+    int i, j = 0;
+    double * p_fi = f;
+    double * p_f = 0;
+
+    f[0] = 1.0;
+    f[1] = -2 * lsp[0];
+    lsp -= 2;
+
+    for(i=2; i<=lp_half_order; i++)
+    {
+        double tmp, f_j_2, f_j_1, f_j;
+        double val = lsp[2*i];
+
+        __asm__ __volatile__(
+            "move   %[p_f],     %[p_fi]                         \n\t"
+            "add.d  %[val],     %[val],     %[val]              \n\t"
+            "addiu  %[p_fi],    8                               \n\t"
+            "ldc1   %[f_j_2],   0(%[p_f])                       \n\t"
+            "ldc1   %[f_j_1],   8(%[p_f])                       \n\t"
+            "neg.d  %[val],     %[val]                          \n\t"
+            "add.d  %[tmp],     %[f_j_2],   %[f_j_2]            \n\t"
+            "madd.d %[tmp],     %[tmp],     %[f_j_1], %[val]    \n\t"
+            "mov.d  %[f_j],     %[f_j_1]                        \n\t"
+            "addiu  %[j],       %[i], -2                        \n\t"
+            "mov.d  %[f_j_1],   %[f_j_2]                        \n\t"
+            "ldc1   %[f_j_2],   -8(%[p_f])                      \n\t"
+            "sdc1   %[tmp],     16(%[p_f])                      \n\t"
+            "beqz   %[j],       ff_lsp2polyf_lp_j_end%=         \n\t"
+            "ff_lsp2polyf_lp_j%=:                               \n\t"
+            "add.d  %[tmp],     %[f_j],     %[f_j_2]            \n\t"
+            "madd.d %[tmp],     %[tmp],     %[f_j_1], %[val]    \n\t"
+            "mov.d  %[f_j],     %[f_j_1]                        \n\t"
+            "addiu  %[j],       -1                              \n\t"
+            "mov.d  %[f_j_1],   %[f_j_2]                        \n\t"
+            "ldc1   %[f_j_2],   -16(%[p_f])                     \n\t"
+            "sdc1   %[tmp],     8(%[p_f])                       \n\t"
+            "addiu  %[p_f],     -8                              \n\t"
+            "bgtz   %[j],       ff_lsp2polyf_lp_j%=             \n\t"
+            "ff_lsp2polyf_lp_j_end%=:                           \n\t"
+
+            : [f_j_2]"=&f"(f_j_2), [f_j_1]"=&f"(f_j_1), [val]"+f"(val),
+              [tmp]"=&f"(tmp), [f_j]"=&f"(f_j), [p_f]"+r"(p_f),
+              [j]"+r"(j), [p_fi]"+r"(p_fi)
+            : [i]"r"(i)
+        );
+        f[1] += val;
+    }
+
+}
+
+#endif /* AVCODEC_LSP_MIPS_H */
diff --git a/libavutil/libm.h b/libavutil/libm.h
index 62faea4..57eb0c0 100644
--- a/libavutil/libm.h
+++ b/libavutil/libm.h
@@ -28,6 +28,10 @@
 #include "config.h"
 #include "attributes.h"
 
+#if HAVE_MIPSFPU && HAVE_INLINE_ASM
+#include "libavutil/mips/libm_mips.h"
+#endif /* HAVE_MIPSFPU && HAVE_INLINE_ASM*/
+
 #if !HAVE_CBRTF
 #undef cbrtf
 #define cbrtf(x) powf(x, 1.0/3.0)
diff --git a/libavutil/mips/libm_mips.h b/libavutil/mips/libm_mips.h
new file mode 100644
index 0000000..0d8e3c3
--- /dev/null
+++ b/libavutil/mips/libm_mips.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nedeljko Babic (nbabic at mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * MIPS optimization for some libm functions
+ */
+
+#ifndef AVUTIL_LIBM_MIPS_H
+#define AVUTIL_LIBM_MIPS_H
+
+static av_always_inline av_const long int lrintf_mips(float x)
+{
+    register int ret_int;
+
+    __asm__ __volatile__ (
+        "cvt.w.s    %[x],       %[x]    \n\t"
+        "mfc1       %[ret_int], %[x]    \n\t"
+
+        :[x]"+f"(x), [ret_int]"=r"(ret_int)
+    );
+    return ret_int;
+}
+
+#undef lrintf
+#define lrintf(x)   lrintf_mips(x)
+
+#define HAVE_LRINTF 1
+#endif /* AVUTIL_LIBM_MIPS_H */
+
-- 
1.7.3.4



More information about the ffmpeg-devel mailing list