[FFmpeg-devel] [PATCH 3/4] mips: Implementation of AC3 fixed point decoder and optimization for MIPS.

Babic, Nedeljko nbabic at mips.com
Wed Aug 8 14:11:39 CEST 2012


There are some small changes that need to be made to this patch because of the changes 
made for AC3 floating point decoder.
After necessary changes are applied, this patch with the changes will be resubmitted.

-Nedeljko
________________________________________
From: Nedeljko Babic [nbabic at mips.com]
Sent: Friday, July 27, 2012 14:17
To: ffmpeg-devel at ffmpeg.org
Cc: Lukac, Zeljko; Babic, Nedeljko
Subject: [PATCH 3/4] mips: Implementation of AC3 fixed point decoder and optimization for MIPS.

AC3 fixed point decoder is implemented in C and appropriate functions
 are optimized for MIPS architecture. Some of DSP, format convert
 utils and FFT fixed point functions are optimized.

Signed-off-by: Nedeljko Babic <nbabic at mips.com>
---
 doc/mips.txt                            |    6 +
 libavcodec/allcodecs.c                  |    3 +
 libavcodec/dsputil.c                    |   24 +
 libavcodec/dsputil.h                    |    4 +
 libavcodec/fft.c                        |    1 +
 libavcodec/fft.h                        |   12 +
 libavcodec/fmtconvert.c                 |   79 ++
 libavcodec/fmtconvert.h                 |   57 +-
 libavcodec/kbdwin.c                     |   32 +
 libavcodec/kbdwin.h                     |    6 +-
 libavcodec/mips/Makefile                |    4 +
 libavcodec/mips/ac3dec_fixed.c          | 1660 +++++++++++++++++++++++++++++++
 libavcodec/mips/ac3dec_fixed.h          |  234 +++++
 libavcodec/mips/dsputil_mips_fixed.c    |  153 +++
 libavcodec/mips/fft_mips_fixed.c        |  900 +++++++++++++++++
 libavcodec/mips/fft_table_fixed.h       |  105 ++
 libavcodec/mips/fmtconvert_mips_fixed.c |  226 +++++
 libavutil/common.h                      |   12 +
 18 files changed, 3514 insertions(+), 4 deletions(-)
 create mode 100644 libavcodec/mips/ac3dec_fixed.c
 create mode 100644 libavcodec/mips/ac3dec_fixed.h
 create mode 100644 libavcodec/mips/dsputil_mips_fixed.c
 create mode 100644 libavcodec/mips/fft_mips_fixed.c
 create mode 100644 libavcodec/mips/fft_table_fixed.h
 create mode 100644 libavcodec/mips/fmtconvert_mips_fixed.c

diff --git a/doc/mips.txt b/doc/mips.txt
index 6fa6fb4..88ff481 100644
--- a/doc/mips.txt
+++ b/doc/mips.txt
@@ -47,6 +47,8 @@ Files that have MIPS copyright notice in them:
 * libavutil/mips/
       libm_mips.h
 * libavcodec/mips/
+      ac3dec_fixed.c
+      ac3dec_fixed.h
       acelp_filters_mips.c
       acelp_vectors_mips.c
       amrwbdec_mips.c
@@ -57,9 +59,13 @@ Files that have MIPS copyright notice in them:
       compute_antialias_float.h
       lsp_mips.h
       dsputil_mips.c
+      dsputil_mips_fixed.c
       fft_mips.c
+      fft_mips_fixed.c
       fft_table.h
+      fft_table_fixed.h
       fft_init_table.c
       fmtconvert_mips.c
+      fmtconvert_mips_mips.c
       mpegaudiodsp_mips_fixed.c
       mpegaudiodsp_mips_float.c
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index bc37907..847da04 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -272,6 +272,9 @@ void avcodec_register_all(void)
     REGISTER_DECODER (AAC_LATM, aac_latm);
     REGISTER_ENCDEC  (AC3, ac3);
     REGISTER_ENCODER (AC3_FIXED, ac3_fixed);
+#if (ARCH_MIPS)
+    REGISTER_DECODER (AC3_FIXED, ac3_fixed);
+#endif /* ARCH_MIPS */
     REGISTER_ENCDEC  (ALAC, alac);
     REGISTER_DECODER (ALS, als);
     REGISTER_DECODER (AMRNB, amrnb);
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 9d5fac6..0490205 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -2515,6 +2515,26 @@ static void vector_fmul_window_c(float *dst, const float *src0,
     }
 }

+#if (ARCH_MIPS)
+static void vector_fmul_window_fixed_c(int *dst, const int16_t *src0,
+                                       const int16_t *src1, const int16_t *win, int len)
+{
+    int i,j;
+    dst += len;
+    win += len;
+    src0+= len;
+
+    for (i=-len, j=len-1; i<0; i++, j--) {
+        int s0 = src0[i];
+        int s1 = src1[j];
+        int wi = win[i];
+        int wj = win[j];
+        dst[i] = (s0*wj - s1*wi + 0x4000) >> 15;
+        dst[j] = (s0*wi + s1*wj + 0x4000) >> 15;
+    }
+}
+#endif /* ARCH_MIPS */
+
 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
                                  int len)
 {
@@ -3045,6 +3065,9 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
     c->vector_fmul_reverse = vector_fmul_reverse_c;
     c->vector_fmul_add = vector_fmul_add_c;
     c->vector_fmul_window = vector_fmul_window_c;
+#if (ARCH_MIPS)
+    c->vector_fmul_window_fixed = vector_fmul_window_fixed_c;
+#endif
     c->vector_clipf = vector_clipf_c;
     c->scalarproduct_int16 = scalarproduct_int16_c;
     c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
@@ -3180,6 +3203,7 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
     if (ARCH_SH4)        ff_dsputil_init_sh4   (c, avctx);
     if (ARCH_BFIN)       ff_dsputil_init_bfin  (c, avctx);
     if (HAVE_MIPSFPU)    ff_dsputil_init_mips  (c, avctx);
+    if (HAVE_MIPSDSPR2)  ff_dsputil_init_mips_fixed(c);

     for (i = 0; i < 4; i++) {
         for (j = 0; j < 16; j++) {
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 84d9979..a7c4af7 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -401,6 +401,9 @@ typedef struct DSPContext {
     void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len);
     /* assume len is a multiple of 4, and arrays are 16-byte aligned */
     void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len);
+#if (ARCH_MIPS)
+    void (*vector_fmul_window_fixed)(int *dst, const int16_t *src0, const int16_t *src1, const int16_t *win, int len);
+#endif /* ARCH_MIPS */
     /* assume len is a multiple of 8, and arrays are 16-byte aligned */
     void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
     /**
@@ -627,6 +630,7 @@ void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
 void ff_dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
 void ff_dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
 void ff_dsputil_init_mips(DSPContext* c, AVCodecContext *avctx);
+void ff_dsputil_init_mips_fixed(DSPContext* c);

 void ff_dsputil_init_dwt(DSPContext *c);
 void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
diff --git a/libavcodec/fft.c b/libavcodec/fft.c
index 8463bfb..707b9fc 100644
--- a/libavcodec/fft.c
+++ b/libavcodec/fft.c
@@ -168,6 +168,7 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
 #else
     if (CONFIG_MDCT)  s->mdct_calcw = ff_mdct_calcw_c;
     if (ARCH_ARM)     ff_fft_fixed_init_arm(s);
+    if (ARCH_MIPS)    ff_fft_fixed_init_mips(s);
 #endif

     for(j=4; j<=nbits; j++) {
diff --git a/libavcodec/fft.h b/libavcodec/fft.h
index 15e5a12..deabbab 100644
--- a/libavcodec/fft.h
+++ b/libavcodec/fft.h
@@ -80,6 +80,10 @@ struct FFTContext {
     void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
     void (*imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
     void (*imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+#if (ARCH_MIPS)
+    void (*fft_fixed_calc)(struct FFTContext *s, FFTComplex *z);
+    void (*imdct_fixed_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+#endif /* ARCH_MIPS */
     void (*mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
     void (*mdct_calcw)(struct FFTContext *s, FFTDouble *output, const FFTSample *input);
     int fft_permutation;
@@ -140,6 +144,9 @@ void ff_fft_init_arm(FFTContext *s);
 void ff_fft_init_mips(FFTContext *s);
 #else
 void ff_fft_fixed_init_arm(FFTContext *s);
+#if (ARCH_MIPS)
+void ff_fft_fixed_init_mips(FFTContext *s);
+#endif
 #endif

 void ff_fft_end(FFTContext *s);
@@ -147,6 +154,11 @@ void ff_fft_end(FFTContext *s);
 #define ff_mdct_init FFT_NAME(ff_mdct_init)
 #define ff_mdct_end  FFT_NAME(ff_mdct_end)

+#if (ARCH_MIPS)
+int ff_mdct_fixed_init_hardcoded_128(FFTContext *s, int nbits, int inverse, int scale);
+int ff_mdct_fixed_init_hardcoded(FFTContext *s, int nbits, int inverse, int scale);
+int ff_mdct_fixed_init(FFTContext *s, int nbits, int inverse, int scale);
+#endif /* ARCH_MIPS */
 int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale);
 void ff_mdct_end(FFTContext *s);

diff --git a/libavcodec/fmtconvert.c b/libavcodec/fmtconvert.c
index e47c205..b7b0345 100644
--- a/libavcodec/fmtconvert.c
+++ b/libavcodec/fmtconvert.c
@@ -28,7 +28,18 @@ static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul,
     for(i=0; i<len; i++)
         dst[i] = src[i] * mul;
 }
+#if (ARCH_MIPS)
+static void int32_to_fixed_fmul_scalar_c(int16_t *dst, const int *src, int mul, int len) {
+    int i;
+    for(i=0; i<len; i++)
+    dst[i] = (src[i] * mul + 0x8000) >> 16;
+}

+static av_always_inline int fixed_to_int16_one(const int *src)
+{
+    return av_clip_int16_c_fixed(*src);
+}
+#endif /* ARCH_MIPS */
 static av_always_inline int float_to_int16_one(const float *src){
     return av_clip_int16(lrintf(*src));
 }
@@ -56,6 +67,37 @@ static void float_to_int16_interleave_c(int16_t *dst, const float **src,
     }
 }

+#if (ARCH_MIPS)
+static void fixed_to_int16_interleave_c(int16_t *dst, const int **src,
+                                        long len, int channels)
+{
+    int i,j,c;
+    if(channels==2) {
+        for(i=0; i<len; i++) {
+            dst[2*i] = fixed_to_int16_one(src[0]+i);
+            dst[2*i+1] = fixed_to_int16_one(src[1]+i);
+        }
+    }
+    else {
+        if(channels==6) {
+            for(i=0; i<len; i++) {
+                dst[6*i] = fixed_to_int16_one(src[0]+i);
+                dst[6*i+1] = fixed_to_int16_one(src[1]+i);
+                dst[6*i+2] = fixed_to_int16_one(src[2]+i);
+                dst[6*i+3] = fixed_to_int16_one(src[3]+i);
+                dst[6*i+4] = fixed_to_int16_one(src[4]+i);
+                dst[6*i+5] = fixed_to_int16_one(src[5]+i);
+            }
+        }
+        else {
+            for(c=0; c<channels; c++)
+                for(i=0, j=c; i<len; i++, j+=channels)
+                    dst[j] = fixed_to_int16_one(src[c]+i);
+        }
+    }
+}
+#endif /* ARCH_MIPS */
+
 void ff_float_interleave_c(float *dst, const float **src, unsigned int len,
                            int channels)
 {
@@ -75,9 +117,45 @@ void ff_float_interleave_c(float *dst, const float **src, unsigned int len,
     }
 }

+#if (ARCH_MIPS)
+void ff_fixed_interleave_c(int *dst, const int **src, unsigned int len,
+                           int channels)
+{
+    int j, c;
+    unsigned int i;
+    if (channels == 6) {
+        for (i = 0; i < len; i++) {
+            dst[6*i]   = src[0][i];
+            dst[6*i+1] = src[1][i];
+            dst[6*i+2] = src[2][i];
+            dst[6*i+3] = src[3][i];
+            dst[6*i+4] = src[4][i];
+            dst[6*i+5] = src[5][i];
+        }
+    }
+    else if (channels == 2) {
+        for (i = 0; i < len; i++) {
+            dst[2*i]   = src[0][i];
+            dst[2*i+1] = src[1][i];
+        }
+    } else if (channels == 1 && len < INT_MAX / sizeof(int)) {
+        memcpy(dst, src[0], len * sizeof(int));
+    } else {
+        for (c = 0; c < channels; c++)
+            for (i = 0, j = c; i < len; i++, j += channels)
+                dst[j] = src[c][i];
+    }
+}
+#endif /* ARCH_MIPS */
+
 av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx)
 {
     c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
+#if (ARCH_MIPS)
+    c->int32_to_fixed_fmul_scalar = int32_to_fixed_fmul_scalar_c;
+    c->fixed_to_int16_interleave  = fixed_to_int16_interleave_c;
+    c->fixed_interleave           = ff_fixed_interleave_c;
+#endif /* ARCH_MIPS */
     c->float_to_int16             = float_to_int16_c;
     c->float_to_int16_interleave  = float_to_int16_interleave_c;
     c->float_interleave           = ff_float_interleave_c;
@@ -86,6 +164,7 @@ av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx)
     if (HAVE_ALTIVEC) ff_fmt_convert_init_altivec(c, avctx);
     if (HAVE_MMX) ff_fmt_convert_init_x86(c, avctx);
     if (HAVE_MIPSFPU) ff_fmt_convert_init_mips(c);
+    if (HAVE_MIPSDSPR1) ff_fmt_convert_init_mips_fixed(c, avctx);
 }

 /* ffdshow custom code */
diff --git a/libavcodec/fmtconvert.h b/libavcodec/fmtconvert.h
index ab2caa2..49e7992 100644
--- a/libavcodec/fmtconvert.h
+++ b/libavcodec/fmtconvert.h
@@ -36,7 +36,55 @@ typedef struct FmtConvertContext {
      *            constraints: multiple of 8
      */
     void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
-
+#if (ARCH_MIPS)
+    /**
+     * Multiply a array of int32_t by a int32_t value and convert to int16_t.
+     * @param dst destination array of int16_t.
+     *            constraints: 16-byte aligned
+     * @param src source array of int32_t.
+     *            constraints: 16-byte aligned
+     * @param len number of elements in array.
+     *            constraints: multiple of 8
+     */
+    void (*int32_to_fixed_fmul_scalar)(int16_t *dst, const int *src, int mul, int len);
+    /**
+     * Convert an array of int32_t to an array of int16_t.
+     *
+     * @param dst destination array of int16_t.
+     *            constraints: 16-byte aligned
+     * @param src source array of int32_t.
+     *            constraints: 16-byte aligned
+     * @param len number of elements to convert.
+     *            constraints: multiple of 8
+     */
+    void (*fixed_to_int16)(int16_t *dst, const int *src, long len);
+    /**
+     * Convert multiple arrays of int32_t to an interleaved array of int16_t.
+     *
+     * @param dst destination array of interleaved int16_t.
+     *            constraints: 16-byte aligned
+     * @param src source array of int32_t arrays, one for each channel.
+     *            constraints: 16-byte aligned
+     * @param len number of elements to convert.
+     *            constraints: multiple of 8
+     * @param channels number of channels
+     */
+    void (*fixed_to_int16_interleave)(int16_t *dst, const int **src,
+                                      long len, int channels);
+    /**
+     * Convert multiple arrays of int32_t to an array of interleaved int32_t.
+     *
+     * @param dst destination array of interleaved int32_t.
+     *            constraints: 16-byte aligned
+     * @param src source array of int32_t arrays, one for each channel.
+     *            constraints: 16-byte aligned
+     * @param len number of elements to convert.
+     *            constraints: multiple of 8
+     * @param channels number of channels
+     */
+    void (*fixed_interleave)(int *dst, const int **src, unsigned int len,
+                             int channels);
+#endif /* ARCH_MIPS */
     /**
      * Convert an array of float to an array of int16_t.
      *
@@ -86,7 +134,12 @@ typedef struct FmtConvertContext {

 void ff_float_interleave_c(float *dst, const float **src, unsigned int len,
                            int channels);
-
+#if (ARCH_MIPS)
+void ff_fixed_interleave_c(int *dst, const int **src, unsigned int len,
+                           int channels);
+void fixed_interleave(int *dst, const int **src, unsigned int len, int channels);
+void ff_fmt_convert_init_mips_fixed(FmtConvertContext *c, AVCodecContext *avctx);
+#endif /* ARCH_MIPS */
 av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx);

 void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx);
diff --git a/libavcodec/kbdwin.c b/libavcodec/kbdwin.c
index 2722312..4f76b20 100644
--- a/libavcodec/kbdwin.c
+++ b/libavcodec/kbdwin.c
@@ -46,3 +46,35 @@ av_cold void ff_kbd_window_init(float *window, float alpha, int n)
    for (i = 0; i < n; i++)
        window[i] = sqrt(local_window[i] / sum);
 }
+
+#if (ARCH_MIPS)
+av_cold void ff_kbd_fixed_window_init(int16_t *window, float alpha, int n)
+{
+    int i, j;
+    double sum = 0.0, bessel, tmp;
+    double local_window[FF_KBD_WINDOW_MAX];
+    double alpha2 = (alpha * M_PI / n) * (alpha * M_PI / n);
+
+    assert(n <= FF_KBD_WINDOW_MAX);
+
+    for (i = 0; i < n; i++) {
+        tmp = i * (n - i) * alpha2;
+        bessel = 1.0;
+        for (j = BESSEL_I0_ITER; j > 0; j--)
+            bessel = bessel * tmp / (j * j) + 1;
+        sum += bessel;
+        local_window[i] = sum;
+    }
+
+    sum++;
+    for (i = 0; i < n; i++)
+    {
+        int tmp;
+
+        tmp = (int)(32767*sqrt(local_window[i] / sum) + 0.5);
+        if (tmp > 32767)
+            tmp = 32767;
+        window[i] = (int16_t)tmp;
+    }
+}
+#endif
diff --git a/libavcodec/kbdwin.h b/libavcodec/kbdwin.h
index 4b93975..66621a2 100644
--- a/libavcodec/kbdwin.h
+++ b/libavcodec/kbdwin.h
@@ -18,7 +18,7 @@

 #ifndef AVCODEC_KBDWIN_H
 #define AVCODEC_KBDWIN_H
-
+#include "config.h"
 /**
  * Maximum window size for ff_kbd_window_init.
  */
@@ -31,5 +31,7 @@
  * @param   n       size of half window, max FF_KBD_WINDOW_MAX
  */
 void ff_kbd_window_init(float *window, float alpha, int n);
-
+#if (ARCH_MIPS)
+void ff_kbd_fixed_window_init(int16_t *window, float alpha, int n);
+#endif
 #endif /* AVCODEC_KBDWIN_H */
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index ff46768..4830039 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -17,3 +17,7 @@ OBJS-$(CONFIG_FFT)                        += mips/fft_init_table.o
 MIPSFPU-OBJS-$(CONFIG_FFT)                += mips/fft_mips.o
 MIPSFPU-OBJS-$(HAVE_INLINE_ASM)           += mips/fmtconvert_mips.o
 MIPSFPU-OBJS-$(HAVE_INLINE_ASM)           += mips/dsputil_mips.o
+MIPSDSPR1-OBJS-$(HAVE_INLINE_ASM)         += mips/fmtconvert_mips_fixed.o
+MIPSDSPR2-OBJS-$(HAVE_INLINE_ASM)         += mips/dsputil_mips_fixed.o
+OBJS-$(CONFIG_FFT)                        += mips/fft_mips_fixed.o
+OBJS-$(CONFIG_AC3_FIXED_DECODER)          += mips/ac3dec_fixed.o
diff --git a/libavcodec/mips/ac3dec_fixed.c b/libavcodec/mips/ac3dec_fixed.c
new file mode 100644
index 0000000..c38b00c
--- /dev/null
+++ b/libavcodec/mips/ac3dec_fixed.c
@@ -0,0 +1,1660 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Stanislav Ocovaj (socovaj at mips.com)
+ *
+ * AC3 fixed-point decoder for MIPS platforms
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define CONFIG_FFT_FLOAT 0
+
+#include <stdio.h>
+#include <stddef.h>
+#include <math.h>
+#include <string.h>
+
+#include "libavutil/crc.h"
+#include "libavcodec/internal.h"
+#include "libavcodec/aac_ac3_parser.h"
+#include "libavcodec/ac3_parser.h"
+#include "ac3dec_fixed.h"
+#include "libavcodec/ac3dec_data.h"
+#include "libavcodec/ac3dsp.h"
+#include "libavcodec/kbdwin.h"
+
+#define INT2FIXED(x) (((x) << 15)  * ( x < -32767 ? -1 : 1))
+#define MULT_FINT(x, y)  (((long long)(x) * (y) ) >> 16 )
+#define ADD_FINT(x, y) ( (x) + (y) )
+#define SUB_FINT(a, b) ( (a) - (b) )
+#define SUB_INT_WITH_FINT_AND_CONVERT_TO_FINT(x, y) ( INT2FIXED(x) - (y) )
+#define DIV_INT_WITH_FINT_AND_CONVERT_TO_FINT(x, y) \
+                ( (((long long)(x) << 30)  * ( x < -32767 ? -1 : 1) ) / (y) )
+#define MULT_INT_WITH_FINT_AND_CONVERT_TO_FINT(x, y) \
+                                    (((long long)INT2FIXED(x) * (y) ) >> 15 )
+
+/**
+ * table for ungrouping 3 values in 7 bits.
+ * used for exponents and bap=2 mantissas
+ */
+static uint8_t ungroup_3_in_7_bits_tab[128][3];
+
+
+/** tables for ungrouping mantissas */
+static int b1_mantissas[32][3];
+static int b2_mantissas[128][3];
+static int b3_mantissas[8];
+static int b4_mantissas[128][2];
+static int b5_mantissas[16];
+
+/**
+ * Quantization table: levels for symmetric. bits for asymmetric.
+ * reference: Table 7.18 Mapping of bap to Quantizer
+ */
+static const uint8_t quantization_tab[16] = {
+    0, 3, 5, 7, 11, 15,
+    5, 6, 7, 8, 9, 10, 11, 12, 14, 16
+};
+
+/** Adjustments in dB gain */
+static const int gain_levels_fixed[9] = {
+    LEVEL_FIXED_PLUS_3DB,
+    LEVEL_FIXED_PLUS_1POINT5DB,
+    LEVEL_FIXED_ONE,
+    LEVEL_FIXED_MINUS_1POINT5DB,
+    LEVEL_FIXED_MINUS_3DB,
+    LEVEL_FIXED_MINUS_4POINT5DB,
+    LEVEL_FIXED_MINUS_6DB,
+    LEVEL_FIXED_ZERO,
+    LEVEL_FIXED_MINUS_9DB
+};
+
+/**
+ * Table for center mix levels
+ * reference: Section 5.4.2.4 cmixlev
+ */
+static const uint8_t center_levels[4] = { 4, 5, 6, 5 };
+
+/**
+ * Table for surround mix levels
+ * reference: Section 5.4.2.5 surmixlev
+ */
+static const uint8_t surround_levels[4] = { 4, 6, 7, 6 };
+
+/**
+ * Table for default stereo downmixing coefficients
+ * reference: Section 7.8.2 Downmixing Into Two Channels
+ */
+static const uint8_t ac3_default_coeffs[8][5][2] = {
+    { { 2, 7 }, { 7, 2 },                               },
+    { { 4, 4 },                                         },
+    { { 2, 7 }, { 7, 2 },                               },
+    { { 2, 7 }, { 5, 5 }, { 7, 2 },                     },
+    { { 2, 7 }, { 7, 2 }, { 6, 6 },                     },
+    { { 2, 7 }, { 5, 5 }, { 7, 2 }, { 8, 8 },           },
+    { { 2, 7 }, { 7, 2 }, { 6, 7 }, { 7, 6 },           },
+    { { 2, 7 }, { 5, 5 }, { 7, 2 }, { 6, 7 }, { 7, 6 }, },
+};
+
+/**
+ * Symmetrical Dequantization
+ * reference: Section 7.3.3 Expansion of Mantissas for Symmetrical Quantization
+ *            Tables 7.19 to 7.23
+ */
+static inline int
+symmetric_dequant(int code, int levels)
+{
+    return ((code - (levels >> 1)) << 24) / levels;
+}
+
+/**
+ * Initialize tables at runtime.
+ */
+static av_cold void ac3_tables_init(void)
+{
+    int i;
+
+    /* generate table for ungrouping 3 values in 7 bits
+       reference: Section 7.1.3 Exponent Decoding */
+    for(i=0; i<128; i++) {
+        ungroup_3_in_7_bits_tab[i][0] =  i / 25;
+        ungroup_3_in_7_bits_tab[i][1] = (i % 25) / 5;
+        ungroup_3_in_7_bits_tab[i][2] = (i % 25) % 5;
+    }
+
+    /* generate grouped mantissa tables
+       reference: Section 7.3.5 Ungrouping of Mantissas */
+    for(i=0; i<32; i++) {
+        /* bap=1 mantissas */
+        b1_mantissas[i][0] = symmetric_dequant(ff_ac3_ungroup_3_in_5_bits_tab[i][0], 3);
+        b1_mantissas[i][1] = symmetric_dequant(ff_ac3_ungroup_3_in_5_bits_tab[i][1], 3);
+        b1_mantissas[i][2] = symmetric_dequant(ff_ac3_ungroup_3_in_5_bits_tab[i][2], 3);
+    }
+    for(i=0; i<128; i++) {
+        /* bap=2 mantissas */
+        b2_mantissas[i][0] = symmetric_dequant(ungroup_3_in_7_bits_tab[i][0], 5);
+        b2_mantissas[i][1] = symmetric_dequant(ungroup_3_in_7_bits_tab[i][1], 5);
+        b2_mantissas[i][2] = symmetric_dequant(ungroup_3_in_7_bits_tab[i][2], 5);
+
+        /* bap=4 mantissas */
+        b4_mantissas[i][0] = symmetric_dequant(i / 11, 11);
+        b4_mantissas[i][1] = symmetric_dequant(i % 11, 11);
+    }
+    /* generate ungrouped mantissa tables
+       reference: Tables 7.21 and 7.23 */
+    for(i=0; i<7; i++) {
+        /* bap=3 mantissas */
+        b3_mantissas[i] = symmetric_dequant(i, 7);
+    }
+    for(i=0; i<15; i++) {
+        /* bap=5 mantissas */
+        b5_mantissas[i] = symmetric_dequant(i, 15);
+    }
+}
+
+/**
+ * AVCodec initialization
+ */
+av_cold int ac3_fixed_decode_init(AVCodecContext *avctx)
+{
+    AC3FixedDecodeContext *s = avctx->priv_data;
+    s->avctx = avctx;
+
+    ff_ac3_common_init();
+    ac3_tables_init();
+
+#if !CONFIG_HARDCODED_TABLES
+    ff_mdct_fixed_init(&s->imdct_256, 8, 1, 1);
+    ff_mdct_fixed_init(&s->imdct_512, 9, 1, 1);
+#else
+    ff_mdct_fixed_init_hardcoded(&s->imdct_256, 8, 1, 1);
+    ff_mdct_fixed_init_hardcoded_128(&s->imdct_512, 9, 1, 1);
+#endif
+
+    ff_kbd_fixed_window_init(s->window, 5.0, 256);
+    dsputil_init(&s->dsp, avctx);
+
+    ff_fmt_convert_init(&s->fmt_conv, avctx);
+    av_lfg_init(&s->dith_state, 0);
+
+
+    if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
+        //DONE s->mul_bias = 1.0f;
+        s->mul_bias = 65536;
+        avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
+    } else {
+        //DONE s->mul_bias = 32767.0f;
+        s->mul_bias = 2147418112;
+        avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+    }
+
+    /* allow downmixing to stereo or mono */
+    if (avctx->channels > 0 && avctx->request_channels > 0 &&
+            avctx->request_channels < avctx->channels &&
+            avctx->request_channels <= 2) {
+        avctx->channels = avctx->request_channels;
+    }
+    s->downmixed = 1;
+
+    avcodec_get_frame_defaults(&s->frame);
+    avctx->coded_frame = &s->frame;
+
+    return 0;
+}
+
+/**
+ * Parse the 'sync info' and 'bit stream info' from the AC-3 bitstream.
+ * GetBitContext within AC3DecodeContext must point to
+ * the start of the synchronized AC-3 bitstream.
+ */
+static int ac3_parse_header_fixed(AC3FixedDecodeContext *s)
+{
+    GetBitContext *gbc = &s->gbc;
+    int i;
+
+    /* read the rest of the bsi. read twice for dual mono mode. */
+    i = !(s->channel_mode);
+    do {
+        skip_bits(gbc, 5); /* skip dialog normalization */
+        if (get_bits1(gbc))
+            skip_bits(gbc, 8); /* skip compression */
+        if (get_bits1(gbc))
+            skip_bits(gbc, 8); /* skip language code */
+        if (get_bits1(gbc))
+            skip_bits(gbc, 7); /* skip audio production information */
+    } while (i--);
+
+    skip_bits(gbc, 2); /* skip copyright bit and original bitstream bit */
+
+    /* skip the timecodes (or extra bitstream information for Alternate Syntax)
+       TODO: read & use the xbsi1 downmix levels */
+    if (get_bits1(gbc))
+        skip_bits(gbc, 14); /* kip timecode1 / xbsi1 */
+    if (get_bits1(gbc))
+        skip_bits(gbc, 14); /* skip timecode2 / xbsi2 */
+
+    /* skip additional bitstream info */
+    if (get_bits1(gbc)) {
+        i = get_bits(gbc, 6);
+        do {
+            skip_bits(gbc, 8);
+        } while(i--);
+    }
+    return 0;
+}
+
+/**
+ * Common function to parse AC-3 or E-AC-3 frame header
+ */
+static int parse_frame_header_fixed(AC3FixedDecodeContext *s)
+{
+    AC3HeaderInfo hdr;
+    int err;
+
+    err = avpriv_ac3_parse_header(&s->gbc, &hdr);
+    if(err)
+        return err;
+
+    /* get decoding parameters from header info */
+    s->bit_alloc_params.sr_code     = hdr.sr_code;
+    s->bitstream_mode               = hdr.bitstream_mode;
+    s->channel_mode                 = hdr.channel_mode;
+    s->channel_layout               = hdr.channel_layout;
+    s->lfe_on                       = hdr.lfe_on;
+    s->bit_alloc_params.sr_shift    = hdr.sr_shift;
+    s->sample_rate                  = hdr.sample_rate;
+    s->bit_rate                     = hdr.bit_rate;
+    s->channels                     = hdr.channels;
+    s->fbw_channels                 = s->channels - s->lfe_on;
+    s->lfe_ch                       = s->fbw_channels + 1;
+    s->frame_size                   = hdr.frame_size;
+    s->center_mix_level             = hdr.center_mix_level;
+    s->surround_mix_level           = hdr.surround_mix_level;
+    s->num_blocks                   = hdr.num_blocks;
+    s->frame_type                   = hdr.frame_type;
+    s->substreamid                  = hdr.substreamid;
+
+    if(s->lfe_on) {
+        s->start_freq[s->lfe_ch] = 0;
+        s->end_freq[s->lfe_ch] = 7;
+        s->num_exp_groups[s->lfe_ch] = 2;
+        s->channel_in_cpl[s->lfe_ch] = 0;
+    }
+
+    if (hdr.bitstream_id <= 10) {
+        s->eac3                  = 0;
+        s->snr_offset_strategy   = 2;
+        s->block_switch_syntax   = 1;
+        s->dither_flag_syntax    = 1;
+        s->bit_allocation_syntax = 1;
+        s->fast_gain_syntax      = 0;
+        s->first_cpl_leak        = 0;
+        s->dba_syntax            = 1;
+        s->skip_syntax           = 1;
+        memset(s->channel_uses_aht, 0, sizeof(s->channel_uses_aht));
+        return ac3_parse_header_fixed(s);
+    }
+    else {
+        av_log(s->avctx, AV_LOG_ERROR, "E-AC-3 support not compiled in\n");
+        return -1;
+    }
+}
+
+/**
+ * Set stereo downmixing coefficients based on frame header info.
+ * reference: Section 7.8.2 Downmixing Into Two Channels
+ */
+static void set_downmix_coeffs_fixed(AC3FixedDecodeContext *s)
+{
+    int i;
+
+    int cmix = gain_levels_fixed[center_levels[s->center_mix_level]];
+    int smix = gain_levels_fixed[surround_levels[s->surround_mix_level]];
+    int norm0, norm1;
+
+    for(i=0; i<s->fbw_channels; i++) {
+        s->downmix_coeffs[i][0] = gain_levels_fixed[ac3_default_coeffs[s->channel_mode][i][0]];
+        s->downmix_coeffs[i][1] = gain_levels_fixed[ac3_default_coeffs[s->channel_mode][i][1]];
+    }
+    if(s->channel_mode > 1 && s->channel_mode & 1) {
+        s->downmix_coeffs[1][0] = s->downmix_coeffs[1][1] = cmix;
+    }
+    if(s->channel_mode == AC3_CHMODE_2F1R || s->channel_mode == AC3_CHMODE_3F1R) {
+        int nf = s->channel_mode - 2;
+        s->downmix_coeffs[nf][0] = s->downmix_coeffs[nf][1] = (smix * 23170 + 0x4000) >> 15;
+    }
+    if(s->channel_mode == AC3_CHMODE_2F2R || s->channel_mode == AC3_CHMODE_3F2R) {
+        int nf = s->channel_mode - 4;
+        s->downmix_coeffs[nf][0] = s->downmix_coeffs[nf+1][1] = smix;
+    }
+
+    /* renormalize FLOAT2FIXED(0.0f) */
+    norm0 = norm1 = 0;
+    for(i=0; i<s->fbw_channels; i++) {
+        norm0 += s->downmix_coeffs[i][0];
+        norm1 += s->downmix_coeffs[i][1];
+    }
+    for(i=0; i<s->fbw_channels; i++) {
+        s->downmix_coeffs[i][0] = (s->downmix_coeffs[i][0] << 12) / norm0;
+        s->downmix_coeffs[i][1] = (s->downmix_coeffs[i][1] << 12) / norm1;
+    }
+
+    if(s->output_mode == AC3_CHMODE_MONO) {
+        for(i=0; i<s->fbw_channels; i++)
+            //s->downmix_coeffs[i][0] = (s->downmix_coeffs[i][0] + s->downmix_coeffs[i][1]) * LEVEL_FIXED_MINUS_3DB;
+            s->downmix_coeffs[i][0] = ((s->downmix_coeffs[i][0] + s->downmix_coeffs[i][1]) * 23170 + 0x4000) >> 15;
+    }
+}
+
+/**
+ * Decode the grouped exponents according to exponent strategy.
+ * reference: Section 7.1.3 Exponent Decoding
+ */
+static int decode_exponents(GetBitContext *gbc, int exp_strategy, int ngrps,
+                            uint8_t absexp, int8_t *dexps)
+{
+    int i, j, grp, group_size;
+    int dexp[256];
+    int expacc, prevexp;
+
+    /* unpack groups */
+    group_size = exp_strategy + (exp_strategy == EXP_D45);
+    for(grp=0,i=0; grp<ngrps; grp++) {
+        expacc = get_bits(gbc, 7);
+        dexp[i++] = ungroup_3_in_7_bits_tab[expacc][0];
+        dexp[i++] = ungroup_3_in_7_bits_tab[expacc][1];
+        dexp[i++] = ungroup_3_in_7_bits_tab[expacc][2];
+    }
+
+    /* convert to absolute exps and expand groups */
+    prevexp = absexp;
+    for(i=0,j=0; i<ngrps*3; i++) {
+        prevexp += dexp[i] - 2;
+        if (prevexp > 24U)
+            return -1;
+        switch (group_size) {
+            case 4: dexps[j++] = prevexp;
+                    dexps[j++] = prevexp;
+            case 2: dexps[j++] = prevexp;
+            case 1: dexps[j++] = prevexp;
+        }
+    }
+    return 0;
+}
+
+/**
+ * Generate transform coefficients for each coupled channel in the coupling
+ * range using the coupling coefficients and coupling coordinates.
+ * reference: Section 7.4.3 Coupling Coordinate Format
+ */
+static void calc_transform_coeffs_cpl_fixed(AC3FixedDecodeContext *s)
+{
+    int bin, band, ch;
+
+    bin = s->start_freq[CPL_CH];
+    for (band = 0; band < s->num_cpl_bands; band++) {
+        int band_start = bin;
+        int band_end = bin + s->cpl_band_sizes[band];
+        for (ch = 1; ch <= s->fbw_channels; ch++) {
+            if (s->channel_in_cpl[ch]) {
+                int cpl_coord = s->cpl_coords[ch][band] << 5;
+                for (bin = band_start; bin < band_end; bin++) {
+                    s->fixed_coeffs[ch][bin] = MULH(s->fixed_coeffs[CPL_CH][bin] << 4, cpl_coord);
+                }
+                if (ch == 2 && s->phase_flags[band]) {
+                    for (bin = band_start; bin < band_end; bin++)
+                        s->fixed_coeffs[2][bin] = -s->fixed_coeffs[2][bin];
+                }
+            }
+        }
+        bin = band_end;
+    }
+}
+
+/**
+ * Grouped mantissas for 3-level 5-level and 11-level quantization
+ */
+typedef struct {
+    int b1_mant[2];
+    int b2_mant[2];
+    int b4_mant;
+    int b1;
+    int b2;
+    int b4;
+} mant_groups;
+
+static void ac3_decode_fixed_transform_coeffs_ch(
+    AC3FixedDecodeContext *s,
+    int ch_index,
+    mant_groups *m
+)
+{
+    int start_freq = s->start_freq[ch_index];
+    int end_freq = s->end_freq[ch_index];
+    uint8_t *baps = s->bap[ch_index];
+    int8_t *exps = s->dexps[ch_index];
+    int *coeffs = s->fixed_coeffs[ch_index];
+    int dither = (ch_index == CPL_CH) || s->dither_flag[ch_index];
+    GetBitContext *gbc = &s->gbc;
+    int freq;
+
+    for(freq = start_freq; freq < end_freq; freq++) {
+
+        int bap = baps[freq];
+        int mantissa;
+
+        if(bap == 1) {
+            if(m->b1) {
+                m->b1--;
+                mantissa = m->b1_mant[m->b1];
+                coeffs[freq] = mantissa >> exps[freq];
+                continue;
+            }
+            {
+                int bits = get_bits(gbc, 5);
+                mantissa = b1_mantissas[bits][0];
+                m->b1_mant[1] = b1_mantissas[bits][1];
+                m->b1_mant[0] = b1_mantissas[bits][2];
+                m->b1 = 2;
+                coeffs[freq] = mantissa >> exps[freq];
+                continue;
+            }
+        }
+
+        if(bap == 3) {
+            mantissa = b3_mantissas[get_bits(gbc, 3)];
+            coeffs[freq] = mantissa >> exps[freq];
+            continue;
+        }
+
+        if(bap == 0) {
+            if (dither) {
+                mantissa = (av_lfg_get(&s->dith_state) & 0x7FFFFF) - 0x400000;
+                coeffs[freq] = mantissa >> exps[freq];
+                continue;
+            }
+            coeffs[freq] = 0;
+            continue;
+        }
+
+        if(bap == 2) {
+            if(m->b2) {
+                m->b2--;
+                mantissa = m->b2_mant[m->b2];
+                coeffs[freq] = mantissa >> exps[freq];
+                continue;
+            }
+            {
+                int bits = get_bits(gbc, 7);
+                mantissa = b2_mantissas[bits][0];
+                m->b2_mant[1] = b2_mantissas[bits][1];
+                m->b2_mant[0] = b2_mantissas[bits][2];
+                m->b2 = 2;
+                coeffs[freq] = mantissa >> exps[freq];
+                continue;
+            }
+        }
+
+        if(bap == 4) {
+            if(m->b4) {
+                m->b4 = 0;
+                mantissa = m->b4_mant;
+                coeffs[freq] = mantissa >> exps[freq];
+                continue;
+            }
+            {
+                int bits = get_bits(gbc, 7);
+                mantissa = b4_mantissas[bits][0];
+                m->b4_mant = b4_mantissas[bits][1];
+                m->b4 = 1;
+                coeffs[freq] = mantissa >> exps[freq];
+                continue;
+            }
+        }
+
+        if(bap == 5) {
+            mantissa = b5_mantissas[get_bits(gbc, 4)];
+            coeffs[freq] = mantissa >> exps[freq];
+            continue;
+        }
+
+        {
+            /* 6 to 15 */
+            mantissa = get_bits(gbc, quantization_tab[bap]);
+            /* Shift mantissa and sign-extend it. */
+            mantissa = (mantissa << (32-quantization_tab[bap]))>>8;
+            coeffs[freq] = mantissa >> exps[freq];
+        }
+    }
+}
+
+static void decode_fixed_transform_coeffs_ch(AC3FixedDecodeContext *s, int blk, int ch, \
+                                    mant_groups *m)
+{
+    if (!s->channel_uses_aht[ch]) {
+        ac3_decode_fixed_transform_coeffs_ch(s, ch, m);
+    } else {
+        /* if AHT is used, mantissas for all blocks are encoded in the first
+           block of the frame. */
+        int bin;
+        for (bin = s->start_freq[ch]; bin < s->end_freq[ch]; bin++) {
+            s->fixed_coeffs[ch][bin] = s->pre_mantissa[ch][bin][blk] >> s->dexps[ch][bin];
+        }
+    }
+}
+
+/**
+ * Decode the transform coefficients for a particular channel
+ * reference: Section 7.3 Quantization and Decoding of Mantissas
+ */
+/**
+ * Remove random dithering from coupling range coefficients with zero-bit
+ * mantissas for coupled channels which do not use dithering.
+ * reference: Section 7.3.4 Dither for Zero Bit Mantissas (bap=0)
+ */
+static void remove_dithering_fixed(AC3FixedDecodeContext *s)
+{
+    int ch, i;
+    for(ch=1; ch<=s->fbw_channels; ch++) {
+        if(!s->dither_flag[ch] && s->channel_in_cpl[ch]) {
+            for(i = s->start_freq[CPL_CH]; i<s->end_freq[CPL_CH]; i++) {
+                if(!s->bap[CPL_CH][i])
+                    s->fixed_coeffs[ch][i] = 0;
+            }
+        }
+    }
+}
+
+static void scale_coefs (
+    int16_t *dst,
+    const int *src,
+    int dynrng,
+    int len)
+{
+    int i, shift, round;
+    int16_t mul;
+    int temp, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+    mul = (dynrng & 0x1f) + 0x20;
+    shift = 12 - ((dynrng << 24) >> 29);
+    round = 1 << (shift-1);
+    for (i=0; i<len; i+=8) {
+
+#if !(HAVE_INLINE_ASM && HAVE_MIPS32R2)
+
+        temp = src[i] * mul;
+        temp1 = src[i+1] * mul;
+        temp = temp + round;
+        temp2 = src[i+2] * mul;
+
+        temp1 = temp1 + round;
+        dst[i] = temp >> shift;
+        temp3 = src[i+3] * mul;
+        temp2 = temp2 + round;
+
+        dst[i+1] = temp1 >> shift;
+        temp4 = src[i + 4] * mul;
+        temp3 = temp3 + round;
+        dst[i+2] = temp2 >> shift;
+
+        temp5 = src[i+5] * mul;
+        temp4 = temp4 + round;
+        dst[i+3] = temp3 >> shift;
+        temp6 = src[i+6] * mul;
+
+        dst[i+4] = temp4 >> shift;
+        temp5 = temp5 + round;
+        temp7 = src[i+7] * mul;
+        temp6 = temp6 + round;
+
+        dst[i+5] = temp5 >> shift;
+        temp7 = temp7 + round;
+        dst[i+6] = temp6 >> shift;
+        dst[i+7] = temp7 >> shift;
+#else
+
+        __asm__ volatile (
+            "lw     %[temp],    0(%[src_i])                 \n\t"
+            "lw     %[temp1],   4(%[src_i])                 \n\t"
+            "lw     %[temp2],   8(%[src_i])                 \n\t"
+            "mul    %[temp],    %[temp],        %[mul]      \n\t"
+            "lw     %[temp3],   12(%[src_i])                \n\t"
+            "mul    %[temp1],   %[temp1],       %[mul]      \n\t"
+            "lw     %[temp4],   16(%[src_i])                \n\t"
+            "addu   %[temp],    %[temp],        %[round]    \n\t"
+            "mul    %[temp3],   %[temp3],       %[mul]      \n\t"
+            "addu   %[temp1],   %[temp1],       %[round]    \n\t"
+            "srav   %[temp],    %[temp],        %[shift]    \n\t"
+            "mul    %[temp2],   %[temp2],       %[mul]      \n\t"
+            "srav   %[temp1],   %[temp1],       %[shift]    \n\t"
+            "lw     %[temp5],   20(%[src_i])                \n\t"
+            "addu   %[temp2],   %[temp2],       %[round]    \n\t"
+            "addu   %[temp3],   %[temp3],       %[round]    \n\t"
+            "mul    %[temp4],   %[temp4],       %[mul]      \n\t"
+            "srav   %[temp2],   %[temp2],       %[shift]    \n\t"
+            "srav   %[temp3],   %[temp3],       %[shift]    \n\t"
+            "mul    %[temp5],   %[temp5],       %[mul]      \n\t"
+            "lw     %[temp6],   24(%[src_i])                \n\t"
+            "lw     %[temp7],   28(%[src_i])                \n\t"
+            "addu   %[temp4],   %[temp4],       %[round]    \n\t"
+            "mul    %[temp6],   %[temp6],       %[mul]      \n\t"
+            "mul    %[temp7],   %[temp7],       %[mul]      \n\t"
+            "addu   %[temp5],   %[temp5],       %[round]    \n\t"
+            "srav   %[temp4],   %[temp4],       %[shift]    \n\t"
+            "srav   %[temp5],   %[temp5],       %[shift]    \n\t"
+            "addu   %[temp6],   %[temp6],       %[round]    \n\t"
+            "addu   %[temp7],   %[temp7],       %[round]    \n\t"
+            "srav   %[temp6],   %[temp6],       %[shift]    \n\t"
+            "srav   %[temp7],   %[temp7],       %[shift]    \n\t"
+
+            : [temp] "=&r" (temp), [temp1] "=&r" (temp1),
+              [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
+              [temp4] "=&r" (temp4), [temp5] "=&r" (temp5),
+              [temp6] "=&r" (temp6), [temp7] "=&r" (temp7)
+            : [src_i] "r" (src + i), [mul] "r" (mul),
+              [round] "r" (round), [shift] "r" (shift)
+        );
+
+        dst[i  ]=temp;
+        dst[i+1]=temp1;
+        dst[i+2]=temp2;
+        dst[i+3]=temp3;
+
+        dst[i+4]=temp4;
+        dst[i+5]=temp5;
+        dst[i+6]=temp6;
+        dst[i+7]=temp7;
+#endif
+    }
+}
+
+/**
+ * Decode the transform coefficients.
+ */
+static void decode_transform_coeffs_fixed(AC3FixedDecodeContext *s, int blk)
+{
+    int ch, end;
+    int got_cplchan = 0;
+    mant_groups m;
+
+    m.b1 = m.b2 = m.b4 = 0;
+
+    for (ch = 1; ch <= s->channels; ch++) {
+        /* transform coefficients for full-bandwidth channel */
+        decode_fixed_transform_coeffs_ch(s, blk, ch, &m);
+        /* tranform coefficients for coupling channel come right after the
+           coefficients for the first coupled channel*/
+        if (s->channel_in_cpl[ch]) {
+            if (!got_cplchan) {
+                decode_fixed_transform_coeffs_ch(s, blk, CPL_CH, &m);
+                calc_transform_coeffs_cpl_fixed(s);
+                got_cplchan = 1;
+            }
+            end = s->end_freq[CPL_CH];
+        } else {
+            end = s->end_freq[ch];
+        }
+        do
+            s->fixed_coeffs[ch][end] = 0;
+        while(++end < 256);
+    }
+
+    /* zero the dithered coefficients for appropriate channels */
+    remove_dithering_fixed(s);
+}
+
+/**
+ * Stereo rematrixing.
+ * reference: Section 7.5.4 Rematrixing : Decoding Technique
+ */
+static void do_rematrixing_fixed(AC3FixedDecodeContext *s)
+{
+    int bnd, i;
+    int end, bndend;
+
+    end = FFMIN(s->end_freq[1], s->end_freq[2]);
+
+    for(bnd=0; bnd<s->num_rematrixing_bands; bnd++) {
+        if(s->rematrixing_flags[bnd]) {
+            bndend = FFMIN(end, ff_ac3_rematrix_band_tab[bnd+1]);
+            for(i=ff_ac3_rematrix_band_tab[bnd]; i<bndend; i++) {
+                int tmp0 = s->fixed_coeffs[1][i];
+                s->fixed_coeffs[1][i] += s->fixed_coeffs[2][i];
+                s->fixed_coeffs[2][i]  = tmp0 - s->fixed_coeffs[2][i];
+            }
+        }
+    }
+}
+
+/**
+ * Inverse MDCT Transform.
+ * Convert frequency domain coefficients to time-domain audio samples.
+ * reference: Section 7.9.4 Transformation Equations
+ */
+static inline void do_imdct_fixed(AC3FixedDecodeContext *s, int channels)
+{
+    int ch;
+    for (ch=1; ch<=channels; ch++) {
+        if (s->block_switch[ch]) {
+            int i;
+            FFTSample *x = s->tmp_output+128;
+
+            for(i=0; i<128; i++)
+                x[i] = s->transform_coeffs[ch][2*i];
+            s->imdct_256.imdct_fixed_half(&s->imdct_256, s->tmp_output, x);
+            s->dsp.vector_fmul_window_fixed(s->output[ch-1], s->delay[ch-1],
+            s->tmp_output, s->window, 128);
+
+            for(i=0; i<128; i++)
+                x[i] = s->transform_coeffs[ch][2*i+1];
+            s->imdct_256.imdct_fixed_half(&s->imdct_256, s->delay[ch-1], x);
+
+        } else {
+            s->imdct_512.imdct_fixed_half(&s->imdct_512, s->tmp_output,
+            s->transform_coeffs[ch]);
+            s->dsp.vector_fmul_window_fixed(s->output[ch-1], s->delay[ch-1],
+            s->tmp_output, s->window, 128);
+            memcpy(s->delay[ch-1], s->tmp_output+128, 128*sizeof(int16_t));
+        }
+    }
+}
+
+/**
+ * Downmix the output to mono or stereo.
+ */
+void ff_ac3_downmix_c_fixed(int (*samples)[256], int (*matrix)[2], int out_ch, int in_ch, int len)
+{
+    int i, j;
+    int v0, v1;
+
+    if (out_ch == 2)
+    {
+        for(i=0; i<len; i++)
+        {
+            v0 = v1 = 0;
+            for(j=0; j<in_ch; j++)
+            {
+                v0 += samples[j][i] * matrix[j][0];
+                v1 += samples[j][i] * matrix[j][1];
+            }
+            samples[0][i] = (v0 + 2048) >> 12;
+            samples[1][i] = (v1 + 2048) >> 12;
+        }
+    }
+    else if (out_ch == 1)
+    {
+        for(i=0; i<len; i++)
+        {
+            v0 = 0;
+            for(j=0; j<in_ch; j++)
+                v0 += samples[j][i] * matrix[j][0];
+            samples[0][i] = (v0 + 2048) >> 12;
+        }
+    }
+}
+
+static void ac3_bit_alloc_calc_bap_c(int16_t *mask, int16_t *psd,
+                                     int start, int end,
+                                     int snr_offset, int floor,
+                                     const uint8_t *bap_tab, uint8_t *bap)
+{
+    int bin, band;
+
+    /* special case, if snr offset is -960, set all bap's to zero */
+    if (snr_offset == -960) {
+        memset(bap, 0, AC3_MAX_COEFS);
+        return;
+    }
+
+    bin  = start;
+    band = ff_ac3_bin_to_band_tab[start];
+    do {
+        int m = (FFMAX(mask[band] - snr_offset - floor, 0) & 0x1FE0) + floor;
+        int band_end = FFMIN(ff_ac3_band_start_tab[band+1], end);
+        for (; bin < band_end; bin++) {
+            int address = av_clip((psd[bin] - m) >> 5, 0, 63);
+            bap[bin] = bap_tab[address];
+        }
+    } while (end > ff_ac3_band_start_tab[band++]);
+}
+
+/**
+ * Decode band structure for coupling, spectral extension, or enhanced coupling.
+ * The band structure defines how many subbands are in each band.  For each
+ * subband in the range, 1 means it is combined with the previous band, and 0
+ * means that it starts a new band.
+ *
+ * @param[in] gbc bit reader context
+ * @param[in] blk block number
+ * @param[in] eac3 flag to indicate E-AC-3
+ * @param[in] ecpl flag to indicate enhanced coupling
+ * @param[in] start_subband subband number for start of range
+ * @param[in] end_subband subband number for end of range
+ * @param[in] default_band_struct default band structure table
+ * @param[out] num_bands number of bands (optionally NULL)
+ * @param[out] band_sizes array containing the number of bins in each band (optionally NULL)
+ */
+static void decode_band_structure(GetBitContext *gbc, int blk, int eac3,
+                                  int ecpl, int start_subband, int end_subband,
+                                  const uint8_t *default_band_struct,
+                                  int *num_bands, uint8_t *band_sizes)
+{
+    int subbnd, bnd, n_subbands, n_bands=0;
+    uint8_t bnd_sz[22];
+    uint8_t coded_band_struct[22];
+    const uint8_t *band_struct;
+
+    n_subbands = end_subband - start_subband;
+
+    /* decode band structure from bitstream or use default */
+    if (!eac3 || get_bits1(gbc)) {
+        for (subbnd = 0; subbnd < n_subbands - 1; subbnd++) {
+            coded_band_struct[subbnd] = get_bits1(gbc);
+        }
+        band_struct = coded_band_struct;
+    } else if (!blk) {
+        band_struct = &default_band_struct[start_subband+1];
+    } else {
+        /* no change in band structure */
+        return;
+    }
+
+    /* calculate number of bands and band sizes based on band structure.
+       note that the first 4 subbands in enhanced coupling span only 6 bins
+       instead of 12. */
+    if (num_bands || band_sizes ) {
+        n_bands = n_subbands;
+        bnd_sz[0] = ecpl ? 6 : 12;
+        for (bnd = 0, subbnd = 1; subbnd < n_subbands; subbnd++) {
+            int subbnd_size = (ecpl && subbnd < 4) ? 6 : 12;
+            if (band_struct[subbnd-1]) {
+                n_bands--;
+                bnd_sz[bnd] += subbnd_size;
+            } else {
+                bnd_sz[++bnd] = subbnd_size;
+            }
+        }
+    }
+
+    /* set optional output params */
+    if (num_bands)
+        *num_bands = n_bands;
+    if (band_sizes)
+        memcpy(band_sizes, bnd_sz, n_bands);
+}
+
+int end_freq_inv_tab[8] =
+{
+  50529027, 44278013, 39403370, 32292987, 27356480, 23729101, 20951060, 18755316
+};
+
+static int ac3_fixed_sqrt(int x)
+{
+  int retval;
+  int bit_mask;
+  int guess;
+  int square;
+  int   i;
+  long long accu;
+
+    retval = 0;
+    bit_mask = 0x400000;
+
+    for (i=0; i<23; i++)
+    {
+        guess = retval + bit_mask;
+        accu = (long long)guess * guess;
+        square = (int)(accu >> 23);
+        if (x >= square)
+            retval += bit_mask;
+        bit_mask >>= 1;
+    }
+  return retval;
+}
+
+/**
+ * Decode a single audio block from the AC-3 bitstream.
+ */
+static int decode_audio_block_fixed(AC3FixedDecodeContext *s, int blk)
+{
+    int fbw_channels = s->fbw_channels;
+    int channel_mode = s->channel_mode;
+    int i, bnd, seg, ch;
+    int different_transforms;
+    int cpl_in_use;
+    GetBitContext *gbc = &s->gbc;
+    uint8_t bit_alloc_stages[AC3_MAX_CHANNELS];
+
+    memset(bit_alloc_stages, 0, AC3_MAX_CHANNELS);
+
+    /* block switch flags */
+    different_transforms = 0;
+    if (s->block_switch_syntax) {
+        for (ch = 1; ch <= fbw_channels; ch++) {
+            s->block_switch[ch] = get_bits1(gbc);
+            if(ch > 1 && s->block_switch[ch] != s->block_switch[1])
+                different_transforms = 1;
+        }
+    }
+
+    /* dithering flags */
+    if (s->dither_flag_syntax) {
+        for (ch = 1; ch <= fbw_channels; ch++) {
+            s->dither_flag[ch] = get_bits1(gbc);
+        }
+    }
+
+    /* dynamic range */
+    i = !(s->channel_mode);
+    do {
+        if(get_bits1(gbc)) {
+            s->dynamic_range[i] = get_bits(gbc, 8);
+        } else if(blk == 0) {
+            s->dynamic_range[i] = 0;
+        }
+    } while(i--);
+
+    /* spectral extension strategy */
+    if (s->eac3 && (!blk || get_bits1(gbc))) {
+        s->spx_in_use = get_bits1(gbc);
+        if (s->spx_in_use) {
+            int dst_start_freq, dst_end_freq, src_start_freq,
+                start_subband, end_subband;
+
+            /* determine which channels use spx */
+            if (s->channel_mode == AC3_CHMODE_MONO) {
+                s->channel_uses_spx[1] = 1;
+            } else {
+                for (ch = 1; ch <= fbw_channels; ch++)
+                    s->channel_uses_spx[ch] = get_bits1(gbc);
+            }
+
+            /* get the frequency bins of the spx copy region and the spx start
+               and end subbands */
+            dst_start_freq = get_bits(gbc, 2);
+            start_subband  = get_bits(gbc, 3) + 2;
+            if (start_subband > 7)
+                start_subband += start_subband - 7;
+            end_subband    = get_bits(gbc, 3) + 5;
+            s->spx_dst_end_freq = end_freq_inv_tab[end_subband];
+            end_subband += 5;
+            if (end_subband   > 7)
+                end_subband   += end_subband   - 7;
+            dst_start_freq = dst_start_freq * 12 + 25;
+            src_start_freq = start_subband  * 12 + 25;
+            dst_end_freq   = end_subband    * 12 + 25;
+
+            /* check validity of spx ranges */
+            if (start_subband >= end_subband) {
+                av_log(s->avctx, AV_LOG_ERROR, "invalid spectral extension "
+                       "range (%d >= %d)\n", start_subband, end_subband);
+                return -1;
+            }
+            if (dst_start_freq >= src_start_freq) {
+                av_log(s->avctx, AV_LOG_ERROR, "invalid spectral extension "
+                       "copy start bin (%d >= %d)\n", dst_start_freq, src_start_freq);
+                return -1;
+            }
+
+            s->spx_dst_start_freq = dst_start_freq;
+            s->spx_src_start_freq = src_start_freq;
+
+            decode_band_structure(gbc, blk, s->eac3, 0,
+                                  start_subband, end_subband,
+                                  ff_eac3_default_spx_band_struct,
+                                  &s->num_spx_bands,
+                                  s->spx_band_sizes);
+        } else {
+            for (ch = 1; ch <= fbw_channels; ch++) {
+                s->channel_uses_spx[ch] = 0;
+                s->first_spx_coords[ch] = 1;
+            }
+        }
+    }
+
+    /* spectral extension coordinates */
+    if (s->spx_in_use) {
+        for (ch = 1; ch <= fbw_channels; ch++) {
+            if (s->channel_uses_spx[ch]) {
+                if (s->first_spx_coords[ch] || get_bits1(gbc)) {
+
+                    int spx_blend;
+                    int bin, master_spx_coord;
+
+                    s->first_spx_coords[ch] = 0;
+
+                    spx_blend = MULT_INT_WITH_FINT_AND_CONVERT_TO_FINT(get_bits(gbc, 5) , 2048);
+                    master_spx_coord = get_bits(gbc, 2) * 3;
+
+                    bin = s->spx_src_start_freq;
+                    for (bnd = 0; bnd < s->num_spx_bands; bnd++) {
+                        long long accu;
+                        int bandsize;
+                        int spx_coord_exp, spx_coord_mant;
+                        int nratio, sblend, nblend;
+
+                        /* calculate blending factors */
+                        bandsize = s->spx_band_sizes[bnd];
+                        accu = (long long)((bin << 23) + (bandsize << 22)) * s->spx_dst_end_freq;
+                        nratio = (int)(accu >> 32);
+                        nratio -= spx_blend << 18;
+
+                        if (nratio < 0)
+                        {
+                            nblend = 0;
+                            sblend = 0x800000;
+                        }
+                        else if (nratio > 0x7fffff)
+                        {
+                            nblend = 0x800000;
+                            sblend = 0;
+                        }
+                        else
+                        {
+                            nblend = ac3_fixed_sqrt(nratio);
+                            accu = (long long)nblend * 1859775393;
+                            nblend = (int)((accu + (1<<29)) >> 30);
+                            sblend = ac3_fixed_sqrt(0x800000 - nratio);
+                        }
+
+                        bin += bandsize;
+
+                        /* decode spx coordinates */
+                        spx_coord_exp  = get_bits(gbc, 4);
+                        spx_coord_mant = get_bits(gbc, 2);
+                        if (spx_coord_exp == 15) spx_coord_mant <<= 1;
+                        else spx_coord_mant += 4;
+                        spx_coord_mant <<= (25 - spx_coord_exp - master_spx_coord);
+
+                        /* multiply noise and signal blending factors by spx coordinate */
+                        accu = (long long)nblend * spx_coord_mant;
+                        s->spx_noise_blend[ch][bnd]  = (int)((accu + (1<<22)) >> 23);
+                        accu = (long long)sblend * spx_coord_mant;
+                        s->spx_signal_blend[ch][bnd] = (int)((accu + (1<<22)) >> 23);
+                    }
+                }
+            } else {
+                s->first_spx_coords[ch] = 1;
+            }
+        }
+    }
+
+    /* coupling strategy */
+    if (s->eac3 ? s->cpl_strategy_exists[blk] : get_bits1(gbc)) {
+        memset(bit_alloc_stages, 3, AC3_MAX_CHANNELS);
+        if (!s->eac3)
+            s->cpl_in_use[blk] = get_bits1(gbc);
+        if (s->cpl_in_use[blk]) {
+            /* coupling in use */
+            int cpl_start_subband, cpl_end_subband;
+
+            if (channel_mode < AC3_CHMODE_STEREO) {
+                av_log(s->avctx, AV_LOG_ERROR, "coupling not allowed in mono or dual-mono\n");
+                return -1;
+            }
+
+            /* check for enhanced coupling */
+            if (s->eac3 && get_bits1(gbc)) {
+                /* TODO: parse enhanced coupling strategy info */
+                av_log_missing_feature(s->avctx, "Enhanced coupling", 1);
+                return -1;
+            }
+
+            /* determine which channels are coupled */
+            if (s->eac3 && s->channel_mode == AC3_CHMODE_STEREO) {
+                s->channel_in_cpl[1] = 1;
+                s->channel_in_cpl[2] = 1;
+            } else {
+                for (ch = 1; ch <= fbw_channels; ch++)
+                    s->channel_in_cpl[ch] = get_bits1(gbc);
+            }
+
+            /* phase flags in use */
+            if (channel_mode == AC3_CHMODE_STEREO)
+                s->phase_flags_in_use = get_bits1(gbc);
+
+            /* coupling frequency range */
+            cpl_start_subband = get_bits(gbc, 4);
+            cpl_end_subband = s->spx_in_use ? (s->spx_src_start_freq - 37) / 12 :
+                                              get_bits(gbc, 4) + 3;
+
+            if (cpl_start_subband >= cpl_end_subband) {
+                av_log(s->avctx, AV_LOG_ERROR, "invalid coupling range (%d >= %d)\n",
+                       cpl_start_subband, cpl_end_subband);
+                return -1;
+            }
+
+            s->start_freq[CPL_CH] = cpl_start_subband * 12 + 37;
+            s->end_freq[CPL_CH]   = cpl_end_subband   * 12 + 37;
+
+            decode_band_structure(gbc, blk, s->eac3, 0, cpl_start_subband,
+                                  cpl_end_subband,
+                                  ff_eac3_default_cpl_band_struct,
+                                  &s->num_cpl_bands, s->cpl_band_sizes);
+        } else {
+            /* coupling not in use */
+            for (ch = 1; ch <= fbw_channels; ch++) {
+                s->channel_in_cpl[ch] = 0;
+                s->first_cpl_coords[ch] = 1;
+            }
+            s->first_cpl_leak = s->eac3;
+            s->phase_flags_in_use = 0;
+        }
+    } else if (!s->eac3) {
+        if(!blk) {
+            av_log(s->avctx, AV_LOG_ERROR, "new coupling strategy must be present in block 0\n");
+            return -1;
+        } else {
+            s->cpl_in_use[blk] = s->cpl_in_use[blk-1];
+        }
+    }
+    cpl_in_use = s->cpl_in_use[blk];
+
+    /* coupling coordinates */
+    if (cpl_in_use) {
+        int cpl_coords_exist = 0;
+
+        for (ch = 1; ch <= fbw_channels; ch++) {
+            if (s->channel_in_cpl[ch]) {
+                if ((s->eac3 && s->first_cpl_coords[ch]) || get_bits1(gbc)) {
+                    int master_cpl_coord, cpl_coord_exp, cpl_coord_mant;
+                    s->first_cpl_coords[ch] = 0;
+                    cpl_coords_exist = 1;
+                    master_cpl_coord = 3 * get_bits(gbc, 2);
+                    for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
+                        cpl_coord_exp = get_bits(gbc, 4);
+                        cpl_coord_mant = get_bits(gbc, 4);
+                        if (cpl_coord_exp == 15)
+                            s->cpl_coords[ch][bnd] = cpl_coord_mant << 22;
+                        else
+                            s->cpl_coords[ch][bnd] = (cpl_coord_mant + 16) << 21;
+                        s->cpl_coords[ch][bnd] >>= (cpl_coord_exp + master_cpl_coord);
+                    }
+                } else if (!blk) {
+                    av_log(s->avctx, AV_LOG_ERROR, "new coupling coordinates must be present in block 0\n");
+                    return -1;
+                }
+            } else {
+                /* channel not in coupling */
+                s->first_cpl_coords[ch] = 1;
+            }
+        }
+        /* phase flags */
+        if (channel_mode == AC3_CHMODE_STEREO && cpl_coords_exist) {
+            for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
+                s->phase_flags[bnd] = s->phase_flags_in_use? get_bits1(gbc) : 0;
+            }
+        }
+    }
+
+    /* stereo rematrixing strategy and band structure */
+    if (channel_mode == AC3_CHMODE_STEREO) {
+        if ((s->eac3 && !blk) || get_bits1(gbc)) {
+            s->num_rematrixing_bands = 4;
+            if (cpl_in_use && s->start_freq[CPL_CH] <= 61) {
+                s->num_rematrixing_bands -= 1 + (s->start_freq[CPL_CH] == 37);
+            } else if (s->spx_in_use && s->spx_src_start_freq <= 61) {
+                s->num_rematrixing_bands--;
+            }
+            for(bnd=0; bnd<s->num_rematrixing_bands; bnd++)
+                s->rematrixing_flags[bnd] = get_bits1(gbc);
+        } else if (!blk) {
+            av_log(s->avctx, AV_LOG_WARNING, "Warning: new rematrixing strategy not present in block 0\n");
+            s->num_rematrixing_bands = 0;
+        }
+    }
+
+    /* exponent strategies for each channel */
+    for (ch = !cpl_in_use; ch <= s->channels; ch++) {
+        if (!s->eac3)
+            s->exp_strategy[blk][ch] = get_bits(gbc, 2 - (ch == s->lfe_ch));
+        if(s->exp_strategy[blk][ch] != EXP_REUSE)
+            bit_alloc_stages[ch] = 3;
+    }
+
+    /* channel bandwidth */
+    for (ch = 1; ch <= fbw_channels; ch++) {
+        s->start_freq[ch] = 0;
+        if (s->exp_strategy[blk][ch] != EXP_REUSE) {
+            int group_size;
+            int prev = s->end_freq[ch];
+            if (s->channel_in_cpl[ch])
+                s->end_freq[ch] = s->start_freq[CPL_CH];
+            else if (s->channel_uses_spx[ch])
+                s->end_freq[ch] = s->spx_src_start_freq;
+            else {
+                int bandwidth_code = get_bits(gbc, 6);
+                if (bandwidth_code > 60) {
+                    av_log(s->avctx, AV_LOG_ERROR, "bandwidth code = %d > 60\n", bandwidth_code);
+                    return -1;
+                }
+                s->end_freq[ch] = bandwidth_code * 3 + 73;
+            }
+            group_size = 3 << (s->exp_strategy[blk][ch] - 1);
+            s->num_exp_groups[ch] = (s->end_freq[ch]+group_size-4) / group_size;
+            if(blk > 0 && s->end_freq[ch] != prev)
+                memset(bit_alloc_stages, 3, AC3_MAX_CHANNELS);
+        }
+    }
+    if (cpl_in_use && s->exp_strategy[blk][CPL_CH] != EXP_REUSE) {
+        s->num_exp_groups[CPL_CH] = (s->end_freq[CPL_CH] - s->start_freq[CPL_CH]) /
+                                    (3 << (s->exp_strategy[blk][CPL_CH] - 1));
+    }
+
+    /* decode exponents for each channel */
+    for (ch = !cpl_in_use; ch <= s->channels; ch++) {
+        if (s->exp_strategy[blk][ch] != EXP_REUSE) {
+            s->dexps[ch][0] = get_bits(gbc, 4) << !ch;
+            if (decode_exponents(gbc, s->exp_strategy[blk][ch],
+                                 s->num_exp_groups[ch], s->dexps[ch][0],
+                                 &s->dexps[ch][s->start_freq[ch]+!!ch])) {
+                av_log(s->avctx, AV_LOG_ERROR, "exponent out-of-range\n");
+                return -1;
+            }
+            if(ch != CPL_CH && ch != s->lfe_ch)
+                skip_bits(gbc, 2); /* skip gainrng */
+        }
+    }
+
+    /* bit allocation information */
+    if (s->bit_allocation_syntax) {
+        if (get_bits1(gbc)) {
+            s->bit_alloc_params.slow_decay = ff_ac3_slow_decay_tab[get_bits(gbc, 2)] >> s->bit_alloc_params.sr_shift;
+            s->bit_alloc_params.fast_decay = ff_ac3_fast_decay_tab[get_bits(gbc, 2)] >> s->bit_alloc_params.sr_shift;
+            s->bit_alloc_params.slow_gain  = ff_ac3_slow_gain_tab[get_bits(gbc, 2)];
+            s->bit_alloc_params.db_per_bit = ff_ac3_db_per_bit_tab[get_bits(gbc, 2)];
+            s->bit_alloc_params.floor  = ff_ac3_floor_tab[get_bits(gbc, 3)];
+            for(ch=!cpl_in_use; ch<=s->channels; ch++)
+                bit_alloc_stages[ch] = FFMAX(bit_alloc_stages[ch], 2);
+        } else if (!blk) {
+            av_log(s->avctx, AV_LOG_ERROR, "new bit allocation info must be present in block 0\n");
+            return -1;
+        }
+    }
+
+    /* signal-to-noise ratio offsets and fast gains (signal-to-mask ratios) */
+    if(!s->eac3 || !blk){
+        if(s->snr_offset_strategy && get_bits1(gbc)) {
+            int snr = 0;
+            int csnr;
+            csnr = (get_bits(gbc, 6) - 15) << 4;
+            for (i = ch = !cpl_in_use; ch <= s->channels; ch++) {
+                /* snr offset */
+                if (ch == i || s->snr_offset_strategy == 2)
+                    snr = (csnr + get_bits(gbc, 4)) << 2;
+                /* run at least last bit allocation stage if snr offset changes */
+                if(blk && s->snr_offset[ch] != snr) {
+                    bit_alloc_stages[ch] = FFMAX(bit_alloc_stages[ch], 1);
+                }
+                s->snr_offset[ch] = snr;
+
+                /* fast gain (normal AC-3 only) */
+                if (!s->eac3) {
+                    int prev = s->fast_gain[ch];
+                    s->fast_gain[ch] = ff_ac3_fast_gain_tab[get_bits(gbc, 3)];
+                    /* run last 2 bit allocation stages if fast gain changes */
+                    if(blk && prev != s->fast_gain[ch])
+                        bit_alloc_stages[ch] = FFMAX(bit_alloc_stages[ch], 2);
+                }
+            }
+        } else if (!s->eac3 && !blk) {
+            av_log(s->avctx, AV_LOG_ERROR, "new snr offsets must be present in block 0\n");
+            return -1;
+        }
+    }
+
+    /* fast gain (E-AC-3 only) */
+    if (s->fast_gain_syntax && get_bits1(gbc)) {
+        for (ch = !cpl_in_use; ch <= s->channels; ch++) {
+            int prev = s->fast_gain[ch];
+            s->fast_gain[ch] = ff_ac3_fast_gain_tab[get_bits(gbc, 3)];
+            /* run last 2 bit allocation stages if fast gain changes */
+            if(blk && prev != s->fast_gain[ch])
+                bit_alloc_stages[ch] = FFMAX(bit_alloc_stages[ch], 2);
+        }
+    } else if (s->eac3 && !blk) {
+        for (ch = !cpl_in_use; ch <= s->channels; ch++)
+            s->fast_gain[ch] = ff_ac3_fast_gain_tab[4];
+    }
+
+    /* coupling leak information */
+    if (cpl_in_use) {
+        if (s->first_cpl_leak || get_bits1(gbc)) {
+            int fl = get_bits(gbc, 3);
+            int sl = get_bits(gbc, 3);
+            /* run last 2 bit allocation stages for coupling channel if
+               coupling leak changes */
+            if(blk && (fl != s->bit_alloc_params.cpl_fast_leak ||
+                       sl != s->bit_alloc_params.cpl_slow_leak)) {
+                bit_alloc_stages[CPL_CH] = FFMAX(bit_alloc_stages[CPL_CH], 2);
+            }
+            s->bit_alloc_params.cpl_fast_leak = fl;
+            s->bit_alloc_params.cpl_slow_leak = sl;
+        } else if (!s->eac3 && !blk) {
+            av_log(s->avctx, AV_LOG_ERROR, "new coupling leak info must be present in block 0\n");
+            return -1;
+        }
+        s->first_cpl_leak = 0;
+    }
+
+    /* delta bit allocation information */
+    if (s->dba_syntax && get_bits1(gbc)) {
+        /* delta bit allocation exists (strategy) */
+        for (ch = !cpl_in_use; ch <= fbw_channels; ch++) {
+            s->dba_mode[ch] = get_bits(gbc, 2);
+            if (s->dba_mode[ch] == DBA_RESERVED) {
+                av_log(s->avctx, AV_LOG_ERROR, "delta bit allocation strategy reserved\n");
+                return -1;
+            }
+            bit_alloc_stages[ch] = FFMAX(bit_alloc_stages[ch], 2);
+        }
+        /* channel delta offset, len and bit allocation */
+        for (ch = !cpl_in_use; ch <= fbw_channels; ch++) {
+            if (s->dba_mode[ch] == DBA_NEW) {
+                s->dba_nsegs[ch] = get_bits(gbc, 3) + 1;
+                for (seg = 0; seg < s->dba_nsegs[ch]; seg++) {
+                    s->dba_offsets[ch][seg] = get_bits(gbc, 5);
+                    s->dba_lengths[ch][seg] = get_bits(gbc, 4);
+                    s->dba_values[ch][seg] = get_bits(gbc, 3);
+                }
+                /* run last 2 bit allocation stages if new dba values */
+                bit_alloc_stages[ch] = FFMAX(bit_alloc_stages[ch], 2);
+            }
+        }
+    } else if(blk == 0) {
+        for(ch=0; ch<=s->channels; ch++) {
+            s->dba_mode[ch] = DBA_NONE;
+        }
+    }
+
+    /* Bit allocation */
+    for(ch=!cpl_in_use; ch<=s->channels; ch++) {
+        if(bit_alloc_stages[ch] > 2) {
+            /* Exponent mapping into PSD and PSD integration */
+            ff_ac3_bit_alloc_calc_psd(s->dexps[ch],
+                                      s->start_freq[ch], s->end_freq[ch],
+                                      s->psd[ch], s->band_psd[ch]);
+        }
+        if(bit_alloc_stages[ch] > 1) {
+            /* Compute excitation function, Compute masking curve, and
+               Apply delta bit allocation */
+            if (ff_ac3_bit_alloc_calc_mask(&s->bit_alloc_params, s->band_psd[ch],
+                                           s->start_freq[ch], s->end_freq[ch],
+                                           s->fast_gain[ch], (ch == s->lfe_ch),
+                                           s->dba_mode[ch], s->dba_nsegs[ch],
+                                           s->dba_offsets[ch], s->dba_lengths[ch],
+                                           s->dba_values[ch], s->mask[ch])) {
+                av_log(s->avctx, AV_LOG_ERROR, "error in bit allocation\n");
+                return -1;
+            }
+        }
+        if(bit_alloc_stages[ch] > 0) {
+            /* Compute bit allocation */
+            const uint8_t *bap_tab = s->channel_uses_aht[ch] ?
+                                     ff_eac3_hebap_tab : ff_ac3_bap_tab;
+            ac3_bit_alloc_calc_bap_c(s->mask[ch], s->psd[ch],
+                                      s->start_freq[ch], s->end_freq[ch],
+                                      s->snr_offset[ch],
+                                      s->bit_alloc_params.floor,
+                                      bap_tab, s->bap[ch]);
+        }
+    }
+
+    /* unused dummy data */
+    if (s->skip_syntax && get_bits1(gbc)) {
+        int skipl = get_bits(gbc, 9);
+        while(skipl--)
+            skip_bits(gbc, 8);
+    }
+
+    /* unpack the transform coefficients
+       this also uncouples channels if coupling is in use. */
+    decode_transform_coeffs_fixed(s, blk);
+
+    /* TODO: generate enhanced coupling coordinates and uncouple */
+
+    /* recover coefficients if rematrixing is in use */
+    if(s->channel_mode == AC3_CHMODE_STEREO)
+        do_rematrixing_fixed(s);
+
+    /* apply scaling to coefficients (headroom, dynrng) */
+    for(ch=1; ch<=s->channels; ch++) {
+        int dynrng;
+         if(s->channel_mode == AC3_CHMODE_DUALMONO) {
+            dynrng = s->dynamic_range[2-ch];
+        } else {
+            dynrng = s->dynamic_range[0];
+        }
+        scale_coefs(s->transform_coeffs[ch], s->fixed_coeffs[ch], dynrng, 256);
+    }
+
+        do_imdct_fixed(s, s->channels);
+
+    if (s->channels != s->out_channels && !((s->output_mode & AC3_OUTPUT_LFEON) &&
+        s->fbw_channels == s->out_channels))
+            ff_ac3_downmix_c_fixed(s->output, s->downmix_coeffs, s->out_channels, s->fbw_channels, 256);
+
+    return 0;
+}
+
+/**
+ * Decode a single AC-3 fixed frame.
+ */
+int ac3_fixed_decode_frame(AVCodecContext * avctx, void *data,
+                            int *got_frame_ptr, AVPacket *avpkt)
+{
+    const uint8_t *buf = avpkt->data;
+    int buf_size = avpkt->size;
+    AC3FixedDecodeContext *s = avctx->priv_data;
+    int   *out_samples_flt;
+    int16_t *out_samples_s16;
+    int blk, ch, err, ret;
+    const uint8_t *channel_map;
+    const int *output[AC3_MAX_CHANNELS];
+
+    /* copy input buffer to decoder context to avoid reading past the end
+       of the buffer, which can be caused by a damaged input stream. */
+    if (buf_size >= 2 && AV_RB16(buf) == 0x770B) {
+        /* seems to be byte-swapped AC-3 */
+        int cnt = FFMIN(buf_size, AC3_FRAME_BUFFER_SIZE) >> 1;
+        s->dsp.bswap16_buf((uint16_t *)s->input_buffer, (const uint16_t *)buf, cnt);
+    } else
+        memcpy(s->input_buffer, buf, FFMIN(buf_size, AC3_FRAME_BUFFER_SIZE));
+    buf = s->input_buffer;
+    /* initialize the GetBitContext with the start of valid AC-3 Frame */
+    init_get_bits(&s->gbc, buf, buf_size * 8);
+
+    /* parse the syncinfo */
+    err = parse_frame_header_fixed(s);
+
+    if (err) {
+        switch(err) {
+            case AAC_AC3_PARSE_ERROR_SYNC:
+                av_log(avctx, AV_LOG_ERROR, "frame sync error\n");
+                return -1;
+            case AAC_AC3_PARSE_ERROR_BSID:
+                av_log(avctx, AV_LOG_ERROR, "invalid bitstream id\n");
+                break;
+            case AAC_AC3_PARSE_ERROR_SAMPLE_RATE:
+                av_log(avctx, AV_LOG_ERROR, "invalid sample rate\n");
+                break;
+            case AAC_AC3_PARSE_ERROR_FRAME_SIZE:
+                av_log(avctx, AV_LOG_ERROR, "invalid frame size\n");
+                break;
+            case AAC_AC3_PARSE_ERROR_FRAME_TYPE:
+                /* skip frame if CRC is ok. otherwise use error concealment. */
+                break;
+            default:
+                av_log(avctx, AV_LOG_ERROR, "invalid header\n");
+                break;
+        }
+    } else {
+        /* check that reported frame size fits in input buffer */
+        if (s->frame_size > buf_size) {
+            av_log(avctx, AV_LOG_ERROR, "incomplete frame\n");
+            err = AAC_AC3_PARSE_ERROR_FRAME_SIZE;
+        }
+    }
+
+    /* if frame is ok, set audio parameters */
+    if (!err) {
+        avctx->sample_rate = s->sample_rate;
+        avctx->bit_rate = s->bit_rate;
+
+        /* channel config */
+        s->out_channels = s->channels;
+        s->output_mode = s->channel_mode;
+        if(s->lfe_on)
+            s->output_mode |= AC3_OUTPUT_LFEON;
+        if (avctx->request_channels > 0 && avctx->request_channels <= 2 &&
+                avctx->request_channels < s->channels) {
+            s->out_channels = avctx->request_channels;
+            s->output_mode  = avctx->request_channels == 1 ? AC3_CHMODE_MONO : AC3_CHMODE_STEREO;
+            s->channel_layout = avpriv_ac3_channel_layout_tab[s->output_mode];
+        }
+        avctx->channels = s->out_channels;
+        avctx->channel_layout = s->channel_layout;
+
+        s->loro_center_mix_level   = gain_levels_fixed[s->  center_mix_level];
+        s->loro_surround_mix_level = gain_levels_fixed[s->surround_mix_level];
+        s->ltrt_center_mix_level   = LEVEL_MINUS_3DB;
+        s->ltrt_surround_mix_level = LEVEL_MINUS_3DB;
+        /* set downmixing coefficients if needed */
+        if(s->channels != s->out_channels && !((s->output_mode & AC3_OUTPUT_LFEON) &&
+                s->fbw_channels == s->out_channels)) {
+            set_downmix_coeffs_fixed(s);
+        }
+    } else if (!s->out_channels) {
+        s->out_channels = avctx->channels;
+        if(s->out_channels < s->channels)
+            s->output_mode  = s->out_channels == 1 ? AC3_CHMODE_MONO : AC3_CHMODE_STEREO;
+    }
+    /* set audio service type based on bitstream mode for AC-3 */
+    avctx->audio_service_type = s->bitstream_mode;
+    if (s->bitstream_mode == 0x7 && s->channels > 1)
+        avctx->audio_service_type = AV_AUDIO_SERVICE_TYPE_KARAOKE;
+
+    /* get output buffer */
+    s->frame.nb_samples = s->num_blocks * 256;
+    if ((ret = avctx->get_buffer(avctx, &s->frame)) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return ret;
+    }
+    out_samples_flt = (int   *)s->frame.data[0];
+    out_samples_s16 = (int16_t *)s->frame.data[0];
+
+    /* decode the audio blocks */
+    channel_map = ff_ac3_dec_channel_map[s->output_mode & ~AC3_OUTPUT_LFEON][s->lfe_on];
+    for (ch = 0; ch < s->out_channels; ch++)
+        output[ch] = s->output[channel_map[ch]];
+    for (blk = 0; blk < s->num_blocks; blk++) {
+        if (!err && decode_audio_block_fixed(s, blk)) {
+            av_log(avctx, AV_LOG_ERROR, "error decoding the audio block\n");
+            err = 1;
+        }
+
+        if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
+            s->fmt_conv.fixed_interleave(out_samples_flt, output, 256,
+                                         s->out_channels);
+            out_samples_flt += 256 * s->out_channels;
+        } else {
+            s->fmt_conv.fixed_to_int16_interleave(out_samples_s16, output, 256,
+                                                  s->out_channels);
+            out_samples_s16 += 256 * s->out_channels;
+        }
+    }
+
+    *got_frame_ptr   = 1;
+    *(AVFrame *)data = s->frame;
+
+    return FFMIN(buf_size, s->frame_size);
+}
+
+/**
+ * Uninitialize the AC-3 decoder.
+ */
+ int ac3_fixed_decode_end(AVCodecContext *avctx)
+{
+    AC3FixedDecodeContext *s = avctx->priv_data;
+    ff_mdct_end(&s->imdct_512);
+    ff_mdct_end(&s->imdct_256);
+
+    return 0;
+}
+
+#define OFFSET(x) offsetof(AC3FixedDecodeContext, x)
+#define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM)
+static const AVOption options[] = {
+    { "drc_scale", "percentage of dynamic range compression to apply", OFFSET(drc_scale), AV_OPT_TYPE_FLOAT, {1.0}, 0.0, 1.0, PAR },
+
+{"dmix_mode", "Preferred Stereo Downmix Mode", OFFSET(preferred_stereo_downmix), AV_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, 0, "dmix_mode"},
+{"ltrt_cmixlev",   "Lt/Rt Center Mix Level",   OFFSET(ltrt_center_mix_level),    AV_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, 0},
+{"ltrt_surmixlev", "Lt/Rt Surround Mix Level", OFFSET(ltrt_surround_mix_level),  AV_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, 0},
+{"loro_cmixlev",   "Lo/Ro Center Mix Level",   OFFSET(loro_center_mix_level),    AV_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, 0},
+{"loro_surmixlev", "Lo/Ro Surround Mix Level", OFFSET(loro_surround_mix_level),  AV_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, 0},
+
+    { NULL},
+};
+
+static const AVClass ac3_decoder_class = {
+    .class_name = "AC3 fixed decoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_ac3_fixed_decoder = {
+    .name           = "ac3_fixed",
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = CODEC_ID_AC3,
+    .priv_data_size = sizeof (AC3FixedDecodeContext),
+    .init           = ac3_fixed_decode_init,
+    .close          = ac3_fixed_decode_end,
+    .decode         = ac3_fixed_decode_frame,
+    .capabilities   = CODEC_CAP_DR1,
+    .long_name      = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLT,
+                                                      AV_SAMPLE_FMT_S16,
+                                                      AV_SAMPLE_FMT_NONE },
+    .priv_class     = &ac3_decoder_class,
+};
diff --git a/libavcodec/mips/ac3dec_fixed.h b/libavcodec/mips/ac3dec_fixed.h
new file mode 100644
index 0000000..ee05f46
--- /dev/null
+++ b/libavcodec/mips/ac3dec_fixed.h
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Stanislav Ocovaj (socovaj at mips.com)
+ *
+ * AC3 fixed-point decoder for MIPS platforms
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_AC3DEC_FIXED_H
+#define AVCODEC_MIPS_AC3DEC_FIXED_H
+
+#include "libavutil/lfg.h"
+#include "libavcodec/ac3.h"
+#include "libavcodec/ac3dsp.h"
+#include "libavcodec/get_bits.h"
+#include "libavcodec/dsputil.h"
+#include "libavcodec/fft.h"
+#include "libavcodec/fmtconvert.h"
+
+#define AC3_OUTPUT_LFEON  8
+
+#define SPX_MAX_BANDS    17
+
+#define LEVEL_FIXED_PLUS_3DB          92681
+#define LEVEL_FIXED_PLUS_1POINT5DB    77935
+#define LEVEL_FIXED_MINUS_1POINT5DB   65536
+#define LEVEL_FIXED_MINUS_3DB         55108
+#define LEVEL_FIXED_MINUS_4POINT5DB   46340
+#define LEVEL_FIXED_MINUS_6DB         38967
+#define LEVEL_FIXED_MINUS_9DB         32768
+#define LEVEL_FIXED_ZERO              0
+#define LEVEL_FIXED_ONE               23170
+
+/** Large enough for maximum possible frame size when the specification limit is ignored */
+#define AC3_FRAME_BUFFER_SIZE 32768
+
+typedef struct {
+    AVClass        *class;                  ///< class for AVOptions
+    AVCodecContext *avctx;                  ///< parent context
+    AVFrame frame;                          ///< AVFrame for decoded output
+    GetBitContext gbc;                      ///< bitstream reader
+
+///@name Bit stream information
+///@{
+    int frame_type;                         ///< frame type                             (strmtyp)
+    int substreamid;                        ///< substream identification
+    int frame_size;                         ///< current frame size, in bytes
+    int bit_rate;                           ///< stream bit rate, in bits-per-second
+    int sample_rate;                        ///< sample frequency, in Hz
+    int num_blocks;                         ///< number of audio blocks
+    int bitstream_mode;                     ///< bitstream mode                         (bsmod)
+    int channel_mode;                       ///< channel mode                           (acmod)
+    int channel_layout;                     ///< channel layout
+    int lfe_on;                             ///< lfe channel in use
+    int channel_map;                        ///< custom channel map
+    int center_mix_level;                   ///< Center mix level index
+    int surround_mix_level;                 ///< Surround mix level index
+    int eac3;                               ///< indicates if current frame is E-AC-3
+///@}
+
+    int preferred_stereo_downmix;
+    float ltrt_center_mix_level;
+    float ltrt_surround_mix_level;
+    float loro_center_mix_level;
+    float loro_surround_mix_level;
+
+///@name Frame syntax parameters
+    int snr_offset_strategy;                ///< SNR offset strategy                    (snroffststr)
+    int block_switch_syntax;                 ///< block switch syntax enabled            (blkswe)
+    int dither_flag_syntax;                   ///< dither flag syntax enabled             (dithflage)
+    int bit_allocation_syntax;              ///< bit allocation model syntax enabled    (bamode)
+    int fast_gain_syntax;                   ///< fast gain codes enabled                (frmfgaincode)
+    int dba_syntax;                         ///< delta bit allocation syntax enabled    (dbaflde)
+    int skip_syntax;                        ///< skip field syntax enabled              (skipflde)
+ ///@}
+
+///@name Standard coupling
+    int cpl_in_use[AC3_MAX_BLOCKS];         ///< coupling in use                        (cplinu)
+    int cpl_strategy_exists[AC3_MAX_BLOCKS];///< coupling strategy exists               (cplstre)
+    int channel_in_cpl[AC3_MAX_CHANNELS];   ///< channel in coupling                    (chincpl)
+    int phase_flags_in_use;                 ///< phase flags in use                     (phsflginu)
+    int phase_flags[AC3_MAX_CPL_BANDS];     ///< phase flags                            (phsflg)
+    int num_cpl_bands;                      ///< number of coupling bands               (ncplbnd)
+    uint8_t cpl_band_sizes[AC3_MAX_CPL_BANDS]; ///< number of coeffs in each coupling band
+    int firstchincpl;                       ///< first channel in coupling
+    int first_cpl_coords[AC3_MAX_CHANNELS]; ///< first coupling coordinates states      (firstcplcos)
+    int cpl_coords[AC3_MAX_CHANNELS][AC3_MAX_CPL_BANDS]; ///< coupling coordinates      (cplco)
+///@}
+
+///@name Spectral extension
+///@{
+    int spx_in_use;                             ///< spectral extension in use              (spxinu)
+    uint8_t channel_uses_spx[AC3_MAX_CHANNELS]; ///< channel uses spectral extension        (chinspx)
+    int8_t spx_atten_code[AC3_MAX_CHANNELS];    ///< spx attenuation code                   (spxattencod)
+    int spx_src_start_freq;                     ///< spx start frequency bin
+    int spx_dst_end_freq;                       ///< spx end frequency bin
+    int spx_dst_start_freq;                     ///< spx starting frequency bin for copying (copystartmant)
+                                                ///< the copy region ends at the start of the spx region.
+    int num_spx_bands;                          ///< number of spx bands                    (nspxbnds)
+    uint8_t spx_band_sizes[SPX_MAX_BANDS];      ///< number of bins in each spx band
+    uint8_t first_spx_coords[AC3_MAX_CHANNELS]; ///< first spx coordinates states           (firstspxcos)
+    int spx_noise_blend[AC3_MAX_CHANNELS][SPX_MAX_BANDS]; ///< spx noise blending factor  (nblendfact)
+    int spx_signal_blend[AC3_MAX_CHANNELS][SPX_MAX_BANDS];///< spx signal blending factor (sblendfact)
+///@}
+
+///@name Adaptive hybrid transform
+    int channel_uses_aht[AC3_MAX_CHANNELS];                         ///< channel AHT in use (chahtinu)
+    int pre_mantissa[AC3_MAX_CHANNELS][AC3_MAX_COEFS][AC3_MAX_BLOCKS];  ///< pre-IDCT mantissas
+///@}
+
+///@name Channel
+    int fbw_channels;                           ///< number of full-bandwidth channels
+    int channels;                               ///< number of total channels
+    int lfe_ch;                                 ///< index of LFE channel
+    int downmix_coeffs[AC3_MAX_CHANNELS][2];  ///< stereo downmix coefficients
+    int downmixed;                              ///< indicates if coeffs are currently downmixed
+    int output_mode;                            ///< output channel configuration
+    int out_channels;                           ///< number of output channels
+///@}
+
+///@name Dynamic range
+    int dynamic_range[2];                 ///< dynamic range
+    int drc_scale;                        ///< percentage of dynamic range compression to be applied
+///@}
+
+///@name Bandwidth
+    int start_freq[AC3_MAX_CHANNELS];       ///< start frequency bin                    (strtmant)
+    int end_freq[AC3_MAX_CHANNELS];         ///< end frequency bin                      (endmant)
+///@}
+
+///@name Rematrixing
+    int num_rematrixing_bands;              ///< number of rematrixing bands            (nrematbnd)
+    int rematrixing_flags[4];               ///< rematrixing flags                      (rematflg)
+///@}
+
+///@name Exponents
+    int num_exp_groups[AC3_MAX_CHANNELS];           ///< Number of exponent groups      (nexpgrp)
+    int8_t dexps[AC3_MAX_CHANNELS][AC3_MAX_COEFS];  ///< decoded exponents
+    int exp_strategy[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS]; ///< exponent strategies        (expstr)
+///@}
+
+///@name Bit allocation
+    AC3BitAllocParameters bit_alloc_params;         ///< bit allocation parameters
+    int first_cpl_leak;                             ///< first coupling leak state      (firstcplleak)
+    int snr_offset[AC3_MAX_CHANNELS];               ///< signal-to-noise ratio offsets  (snroffst)
+    int fast_gain[AC3_MAX_CHANNELS];                ///< fast gain values/SMR's         (fgain)
+    uint8_t bap[AC3_MAX_CHANNELS][AC3_MAX_COEFS];   ///< bit allocation pointers
+    int16_t psd[AC3_MAX_CHANNELS][AC3_MAX_COEFS];   ///< scaled exponents
+    int16_t band_psd[AC3_MAX_CHANNELS][AC3_CRITICAL_BANDS]; ///< interpolated exponents
+    int16_t mask[AC3_MAX_CHANNELS][AC3_CRITICAL_BANDS];     ///< masking curve values
+    int dba_mode[AC3_MAX_CHANNELS];                 ///< delta bit allocation mode
+    int dba_nsegs[AC3_MAX_CHANNELS];                ///< number of delta segments
+    uint8_t dba_offsets[AC3_MAX_CHANNELS][8];       ///< delta segment offsets
+    uint8_t dba_lengths[AC3_MAX_CHANNELS][8];       ///< delta segment lengths
+    uint8_t dba_values[AC3_MAX_CHANNELS][8];        ///< delta values for each segment
+///@}
+
+///@name Zero-mantissa dithering
+    int dither_flag[AC3_MAX_CHANNELS];      ///< dither flags                           (dithflg)
+    AVLFG dith_state;                       ///< for dither generation
+///@}
+
+///@name IMDCT
+    int block_switch[AC3_MAX_CHANNELS];     ///< block switch flags                     (blksw)
+    FFTContext imdct_512;                   ///< for 512 sample IMDCT
+    FFTContext imdct_256;                   ///< for 256 sample IMDCT
+///@}
+
+///@name Optimization
+    DSPContext dsp;                         ///< for optimization
+   //AC3DSPContext ac3dsp;
+    FmtConvertContext fmt_conv;             ///< optimized conversion functions
+    int mul_bias;                         ///< scaling for fixed_to_int16 conversion
+///@}
+
+///@name Aligned arrays
+    DECLARE_ALIGNED(16, int, fixed_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS];       ///< fixed-point transform coefficients
+    DECLARE_ALIGNED(32, FFTSample, transform_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS];   ///< transform coefficients
+    DECLARE_ALIGNED(32, FFTSample, delay)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE];             ///< delay - added to the next block
+    DECLARE_ALIGNED(32, int16_t, window)[AC3_BLOCK_SIZE];                              ///< window coefficients
+    DECLARE_ALIGNED(32, FFTSample, tmp_output)[AC3_BLOCK_SIZE];                          ///< temporary storage for output before windowing
+    DECLARE_ALIGNED(32, int, output)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE];            ///< output after imdct transform and windowing
+    DECLARE_ALIGNED(32, uint8_t, input_buffer)[AC3_FRAME_BUFFER_SIZE + FF_INPUT_BUFFER_PADDING_SIZE]; ///< temp buffer to prevent overread
+///@}
+} AC3FixedDecodeContext;
+
+int ac3_fixed_decode_init(AVCodecContext *avctx);
+int ac3_fixed_decode_end(AVCodecContext *avctx);
+int ac3_fixed_decode_frame(AVCodecContext * avctx, void *data, int *data_size,
+                            AVPacket *avpkt);
+void ff_ac3_downmix_c_fixed(int (*samples)[256], int (*matrix)[2], int out_ch, int in_ch, int len);
+
+#endif /* AVCODEC_MIPS_AC3DEC_FIXED_H */
diff --git a/libavcodec/mips/dsputil_mips_fixed.c b/libavcodec/mips/dsputil_mips_fixed.c
new file mode 100644
index 0000000..e1b8037
--- /dev/null
+++ b/libavcodec/mips/dsputil_mips_fixed.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Dragan Mrdjan (dmrdjan at mips.com)
+ *
+ * DSP utils optimized for MIPS fixed-point platforms
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/dsputil.c
+ */
+
+#include "config.h"
+#include "libavcodec/dsputil.h"
+
+static void vector_fmul_window_mips_fixed(int *dst, const int16_t *src0, const int16_t *src1, const int16_t *win, int len)
+{
+    int i,j;
+    int *dst_i, *dst_j;
+    const int16_t * src0_i, *src1_j;
+    const int16_t *win_i, *win_j;
+    int16_t s0, s01, s02, s03, s1, s11, s12, s13;
+    int16_t wi, wi1, wi2, wi3, wj, wj1, wj2, wj3;
+
+    dst += len;
+    win += len;
+    src0 += len;
+
+    for(i=-len, j=len-1; i<0; i+=4, j-=4) {
+        dst_i = dst + i;
+        dst_j = dst + j;
+        src0_i = src0 + i;
+        src1_j = src1 + j;
+        win_i = win + i;
+        win_j = win + j;
+
+        __asm__ volatile (
+            "lh             %[s0],      0(%[src0_i])                \n\t"
+            "lh             %[s1],      0(%[src1_j])                \n\t"
+            "lh             %[wi],      0(%[win_i])                 \n\t"
+            "lh             %[wj],      0(%[win_j])                 \n\t"
+            "append         %[s0],      %[s1],          16          \n\t"
+            "append         %[wj],      %[wi],          16          \n\t"
+            "mult           $ac0,       $0,             $0          \n\t"
+            "mulsaq_s.w.ph  $ac0,       %[s0],          %[wj]       \n\t"
+            "mult           $ac1,       $0,             $0          \n\t"
+            "dpaqx_s.w.ph   $ac1,       %[s0],          %[wj]       \n\t"
+            "lh             %[s01],     2(%[src0_i])                \n\t"
+            "lh             %[s11],     -2(%[src1_j])               \n\t"
+            "extr_r.w       %[s1],      $ac0,           16          \n\t"
+            "lh             %[wi1],     2(%[win_i])                 \n\t"
+            "lh             %[wj1],     -2(%[win_j])                \n\t"
+            "extr_r.w       %[wj],      $ac1,           16          \n\t"
+            "append         %[s01],     %[s11],         16          \n\t"
+            "append         %[wj1],     %[wi1],         16          \n\t"
+            "mult           $ac2,       $0,             $0          \n\t"
+            "mulsaq_s.w.ph  $ac2,       %[s01],         %[wj1]      \n\t"
+            "sw             %[s1],      0(%[dst_i])                 \n\t"
+            "sw             %[wj],       0(%[dst_j])                \n\t"
+            "mult           $ac3,       $0,             $0          \n\t"
+            "dpaqx_s.w.ph   $ac3,       %[s01],         %[wj1]      \n\t"
+            "extr_r.w       %[s11],     $ac2,           16          \n\t"
+            "extr_r.w       %[wj1],     $ac3,           16          \n\t"
+            "lh             %[s02],     4(%[src0_i])                \n\t"
+            "lh             %[s12],     -4(%[src1_j])               \n\t"
+            "lh             %[wi2],     4(%[win_i])                 \n\t"
+            "lh             %[wj2],     -4(%[win_j])                \n\t"
+            "append         %[s02],     %[s12],         16          \n\t"
+            "append         %[wj2],     %[wi2],         16          \n\t"
+            "mult           $ac0,       $0,             $0          \n\t"
+            "mulsaq_s.w.ph  $ac0,       %[s02],         %[wj2]      \n\t"
+            "sw             %[s11],     4(%[dst_i])                 \n\t"
+            "sw             %[wj1],     -4(%[dst_j])                \n\t"
+            "mult           $ac1,       $0,             $0          \n\t"
+            "dpaqx_s.w.ph   $ac1,       %[s02],         %[wj2]      \n\t"
+            "extr_r.w       %[s12],     $ac0,           16          \n\t"
+            "lh             %[s03],     6(%[src0_i])                \n\t"
+            "lh             %[s13],     -6(%[src1_j])               \n\t"
+            "lh             %[wi3],     6(%[win_i])                 \n\t"
+            "lh             %[wj3],     -6(%[win_j])                \n\t"
+            "append         %[s03],     %[s13],         16          \n\t"
+            "append         %[wj3],     %[wi3],         16          \n\t"
+            "mult           $ac2,       $0,             $0          \n\t"
+            "mulsaq_s.w.ph  $ac2,       %[s03],         %[wj3]      \n\t"
+            "sw             %[s12],     8(%[dst_i])                 \n\t"
+            "extr_r.w       %[wj2],     $ac1,           16          \n\t"
+            "mult           $ac3,       $0,             $0          \n\t"
+            "dpaqx_s.w.ph   $ac3,       %[s03],         %[wj3]      \n\t"
+            "extr_r.w       %[s13],     $ac2,           16          \n\t"
+            "extr_r.w       %[wj3],     $ac3,           16          \n\t"
+            "sw             %[wj2],     -8(%[dst_j])                \n\t"
+            "sw             %[s13],     12(%[dst_i])                \n\t"
+            "sw             %[wj3],     -12(%[dst_j])               \n\t"
+
+            : [s0] "=&r" (s0), [s1] "=&r" (s1), [wi] "=&r" (wi),
+              [wj] "=&r" (wj), [s03] "=&r" (s03), [s01] "=&r" (s01),
+              [s11] "=&r" (s11), [wi1] "=&r" (wi1), [wj1] "=&r" (wj1),
+              [s13] "=&r" (s13), [s02] "=&r" (s02), [s12] "=&r" (s12),
+              [wi2] "=&r" (wi2), [wj2] "=&r" (wj2), [wi3] "=&r" (wi3),
+              [wj3] "=&r" (wj3)
+            : [src0_i] "r" (src0_i), [win_j] "r" (win_j ), [src1_j] "r" (src1_j),
+              [win_i] "r" (win_i), [dst_i] "r" (dst_i), [dst_j] "r" (dst_j)
+            : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+              "$ac3hi", "$ac3lo"
+        );
+    }
+}
+
+void ff_dsputil_init_mips_fixed(DSPContext* c) {
+    c->vector_fmul_window_fixed = vector_fmul_window_mips_fixed;
+}
diff --git a/libavcodec/mips/fft_mips_fixed.c b/libavcodec/mips/fft_mips_fixed.c
new file mode 100644
index 0000000..b4bff14
--- /dev/null
+++ b/libavcodec/mips/fft_mips_fixed.c
@@ -0,0 +1,900 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors:  Stanislav Ocovaj (socovaj at mips.com)
+ *           Dragan Mrdjan    (dmrdjan at mips.com)
+ *           Zoran Lukic      (zlukic at mips.com)
+ *           Bojan Zivkovic   (bojan at mips.com)
+ *
+ * Optimization of FFT and MDCT/IMDCT transforms for MIPS fixed-point
+ * architecture
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define CONFIG_FFT_FLOAT 0
+#include "libavcodec/fft.h"
+#include "libavcodec/mips/fft_table.h"
+
+#include "fft_table_fixed.h"
+
+av_cold int ff_mdct_fixed_init(FFTContext *s, int nbits, int inverse, int scale)
+{
+    int n, n4, i;
+    double alpha, theta;
+    int tstep;
+
+    memset(s, 0, sizeof(*s));
+    n = 1 << nbits;
+    s->mdct_bits = nbits;
+    s->mdct_size = n;
+    n4 = n >> 2;
+    s->mdct_permutation = FF_MDCT_PERM_NONE;
+
+    if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0)
+        goto fail;
+
+    s->tcos = av_malloc((n * sizeof(int)) / 2);
+
+    if (!s->tcos)
+        goto fail;
+
+    switch (s->mdct_permutation) {
+    case FF_MDCT_PERM_NONE:
+        s->tsin = s->tcos + n4;
+        tstep = 1;
+        break;
+    case FF_MDCT_PERM_INTERLEAVE:
+        s->tsin = s->tcos + 1;
+        tstep = 2;
+        break;
+    default:
+        goto fail;
+    }
+    theta = 0.125 + (scale < 0 ? n4 : 0);
+
+    for(i=0;i<n4;i++) {
+    int tmp;
+
+        alpha = 2 * M_PI * (i + theta) / n;
+        tmp = (int)(-cos(alpha) * 65536);
+        tmp = (tmp + 1) >> 1;
+        if (tmp > 32767)
+          tmp = 32767;
+        s->tcos[i*tstep] = (FFTSample)tmp;
+        tmp = (int)(-sin(alpha) * 65536);
+        tmp = (tmp + 1) >> 1;
+        if (tmp > 32767)
+          tmp = 32767;
+        s->tsin[i*tstep] = tmp;
+    }
+
+    return 0;
+fail:
+    ff_mdct_end(s);
+    return -1;
+}
+
+av_cold int ff_mdct_fixed_init_hardcoded_128(FFTContext *s, int nbits, int inverse, int scale)
+    {
+        int n, n4, i;
+        int tstep;
+
+        memset(s, 0, sizeof(*s));
+        n = 1 << nbits;
+        s->mdct_bits = nbits;
+        s->mdct_size = n;
+        n4 = n >> 2;
+        s->mdct_permutation = FF_MDCT_PERM_NONE;
+
+        if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0)
+            goto fail;
+
+        s->tcos = av_malloc((n * sizeof(int)) / 2);
+
+        if (!s->tcos)
+            goto fail;
+
+        switch (s->mdct_permutation) {
+        case FF_MDCT_PERM_NONE:
+            s->tsin = s->tcos + n4;
+            tstep = 1;
+            break;
+        case FF_MDCT_PERM_INTERLEAVE:
+            s->tsin = s->tcos + 1;
+            tstep = 2;
+            break;
+        default:
+            goto fail;
+        }
+        for(i=0;i<n4;i++) {
+            s->tcos[i*tstep] = tcos_fixed_128[i];
+            s->tsin[i*tstep] = tsin_fixed_128[i];
+        }
+    return 0;
+fail:
+    ff_mdct_end(s);
+    return -1;
+}
+
+#if HAVE_MIPSDSPR2 && HAVE_INLINE_ASM
+static void ff_imdct_fixed_half_mips(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    int k, n8, n4, n2, n, j, j2;
+    int ax0, ax1, ax2, ax3;
+    const uint16_t *revtab = s->revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    const FFTSample *in1, *in2, *in3, *in4;
+    FFTComplex *z = (FFTComplex *)output;
+
+    FFTSample t0, t1, t2, t3, t01, t11, t21, t31;
+
+    n = 1 << s->mdct_bits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+
+    /* pre rotation */
+    in1 = input;
+    in3 = input + 2;
+    in2 = input + n2 - 1;
+    in4 = input + n2 - 3;
+
+    for(k=0; k<n4; k+=4) {
+        int k1 = k * 2;
+        int k2 = k1 + 2;
+
+        __asm__ volatile (
+            "lh             %[ax0],     0(%[in2])                   \n\t"
+            "lh             %[ax1],     0(%[in1])                   \n\t"
+            "lhx            %[ax2],     %[k1](%[tcos])              \n\t"
+            "lhx            %[ax3],     %[k1](%[tsin])              \n\t"
+            "multu          $ac0,       $0,             $0          \n\t"
+            "multu          $ac1,       $0,             $0          \n\t"
+            "append         %[ax0],     %[ax1],         16          \n\t"
+            "append         %[ax2],     %[ax3],         16          \n\t"
+            "multu          $ac2,       $0,             $0          \n\t"
+            "mulsaq_s.w.ph  $ac0,       %[ax0],         %[ax2]      \n\t"
+            "dpaqx_s.w.ph   $ac1,       %[ax0],         %[ax2]      \n\t"
+            "lh             %[ax0],     -4(%[in2])                  \n\t"
+            "lh             %[ax1],     4(%[in1])                   \n\t"
+            "lhx            %[ax2],     %[k2](%[tcos])              \n\t"
+            "lhx            %[ax3],     %[k2](%[tsin])              \n\t"
+            "append         %[ax0],     %[ax1],         16          \n\t"
+            "append         %[ax2],     %[ax3],         16          \n\t"
+            "mulsaq_s.w.ph  $ac2,       %[ax0],         %[ax2]      \n\t"
+            "multu          $ac3,       $0,             $0          \n\t"
+            "dpaqx_s.w.ph   $ac3,       %[ax0],         %[ax2]      \n\t"
+            "extr_r.w       %[t0],      $ac0,           16          \n\t"
+            "extr_r.w       %[t2],      $ac1,           16          \n\t"
+            "extr_r.w       %[t1],      $ac2,           16          \n\t"
+            "extr_r.w       %[t3],      $ac3,           16          \n\t"
+
+            : [ax0] "=&r" (ax0), [ax2] "=&r" (ax2),[ax1]  "=&r"  (ax1), [ax3] "=&r" (ax3),
+              [t0] "=&r" (t0),  [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3)
+            : [in1] "r" (in1), [in2] "r" (in2), [tcos] "r" (tcos),
+              [tsin] "r" (tsin), [k1] "r" (k1), [k2] "r" (k2)
+            : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+              "$ac3hi", "$ac3lo"
+        );
+
+        j  = revtab[k];
+        j2 = revtab[k+1];
+
+        z[j].re = t0;
+        z[j].im = t2;
+        z[j2].re = t1;
+        z[j2].im = t3;
+
+        k1 += 4;
+        k2 += 4;
+
+        __asm__ volatile (
+            "lh             %[ax0],     -8(%[in2])                  \n\t"
+            "lh             %[ax1],     8(%[in1])                   \n\t"
+            "lhx            %[ax2],     %[k1](%[tcos])              \n\t"
+            "lhx            %[ax3],     %[k1](%[tsin])              \n\t"
+            "multu          $ac0,       $0,             $0          \n\t"
+            "multu          $ac1,       $0,             $0          \n\t"
+            "append         %[ax0],     %[ax1],         16          \n\t"
+            "append         %[ax2],     %[ax3],         16          \n\t"
+            "multu          $ac2,       $0,             $0          \n\t"
+            "mulsaq_s.w.ph  $ac0,       %[ax0],         %[ax2]      \n\t"
+            "dpaqx_s.w.ph   $ac1,       %[ax0],         %[ax2]      \n\t"
+            "lh             %[ax0],     -12(%[in2])                 \n\t"
+            "lh             %[ax1],     12(%[in1])                  \n\t"
+            "lhx            %[ax2],     %[k2](%[tcos])              \n\t"
+            "lhx            %[ax3],     %[k2](%[tsin])              \n\t"
+            "append         %[ax0],     %[ax1],         16          \n\t"
+            "append         %[ax2],     %[ax3],         16          \n\t"
+            "mulsaq_s.w.ph  $ac2,       %[ax0],         %[ax2]      \n\t"
+            "multu          $ac3,       $0,             $0          \n\t"
+            "dpaqx_s.w.ph   $ac3,       %[ax0],         %[ax2]      \n\t"
+            "extr_r.w       %[t0],      $ac0,           16          \n\t"
+            "extr_r.w       %[t2],      $ac1,           16          \n\t"
+            "extr_r.w       %[t1],      $ac2,           16          \n\t"
+            "extr_r.w       %[t3],      $ac3,           16          \n\t"
+
+            : [ax0] "=&r" (ax0), [ax2] "=&r" (ax2), [ax1] "=&r" (ax1), [ax3] "=&r" (ax3),
+              [t0] "=&r" (t0), [t2] "=&r" (t2), [t1] "=r" (t1), [t3] "=r" (t3)
+            : [in1] "r" (in1), [in2] "r" (in2), [tcos] "r" (tcos),
+              [tsin] "r"  (tsin),[k1] "r" (k1), [k2] "r" (k2)
+            : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+              "$ac3hi", "$ac3lo"
+        );
+
+        j  = revtab[k+2];
+        j2 = revtab[k+3];
+
+        z[j ].re = t0;
+        z[j ].im = t2;
+        z[j2].re = t1;
+        z[j2].im = t3;
+        in1 += 8;
+        in2 -= 8;
+    }
+
+    s->fft_fixed_calc(s, z);
+
+    /* post rotation + reordering */
+
+    for(k=0; k<n8; k+=2 ) {
+        int k1 = 2 * (n8 - k - 1), k2 = k1 - 2;
+        int k11 = 2 * (n8 + k), k21 = k11 + 2;
+        in1 = (const FFTSample*)(z + (n8 - k - 1));
+        in2 = (const FFTSample*)(z + (n8 + k));
+
+         __asm__ volatile (
+             "lh             %[ax0],     2(%[in1])                   \n\t"
+             "lh             %[ax1],     0(%[in1])                   \n\t"
+             "lhx            %[ax2],     %[k1](%[tsin])              \n\t"
+             "lhx            %[ax3],     %[k1](%[tcos])              \n\t"
+             "multu          $ac0,       $0,             $0          \n\t"
+             "multu          $ac1,       $0,             $0          \n\t"
+             "append         %[ax0],     %[ax1],         16          \n\t"
+             "append         %[ax2],     %[ax3],         16          \n\t"
+             "mulsaq_s.w.ph  $ac0,       %[ax0],         %[ax2]      \n\t"
+             "dpaqx_s.w.ph   $ac1,       %[ax0],         %[ax2]      \n\t"
+             "lh             %[ax0],     -2(%[in1])                  \n\t"
+             "lh             %[ax1],     -4(%[in1])                  \n\t"
+             "lhx            %[ax2],     %[k2](%[tsin])              \n\t"
+             "lhx            %[ax3],     %[k2](%[tcos])              \n\t"
+             "append         %[ax0],     %[ax1],         16          \n\t"
+             "append         %[ax2],     %[ax3],         16          \n\t"
+             "multu          $ac2,       $0,             $0          \n\t"
+             "mulsaq_s.w.ph  $ac2,       %[ax0],         %[ax2]      \n\t"
+             "multu          $ac3,       $0,             $0          \n\t"
+             "dpaqx_s.w.ph   $ac3,       %[ax0],         %[ax2]      \n\t"
+             "extr_r.w       %[t0],      $ac0,           16          \n\t"
+             "extr_r.w       %[t2],      $ac1,           16          \n\t"
+             "extr_r.w       %[t1],      $ac2,           16          \n\t"
+             "extr_r.w       %[t3],      $ac3,           16          \n\t"
+
+            : [ax0] "=&r" (ax0), [ax1] "=&r" (ax1), [ax2] "=&r" (ax2), [ax3] "=&r" (ax3),
+              [t0] "=r" (t0), [t2] "=r" (t2), [t1] "=r" (t1), [t3] "=r" (t3)
+            : [in1] "r" (in1), [k1] "r" (k1), [tsin] "r" (tsin), [tcos] "r" (tcos),
+              [z] "r" (z), [k2] "r" (k2)
+            : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+              "$ac3hi", "$ac3lo"
+         );
+
+         __asm__ volatile (
+             "lh             %[ax0],     2(%[in2])                   \n\t"
+             "lh             %[ax1],     0(%[in2])                   \n\t"
+             "lhx            %[ax2],     %[k11](%[tsin])             \n\t"
+             "lhx            %[ax3],     %[k11](%[tcos])             \n\t"
+             "multu          $ac0,       $0,             $0          \n\t"
+             "multu          $ac1,       $0,             $0          \n\t"
+             "append         %[ax0],     %[ax1],         16          \n\t"
+             "append         %[ax2],     %[ax3],         16          \n\t"
+             "mulsaq_s.w.ph  $ac0,       %[ax0],         %[ax2]      \n\t"
+             "dpaqx_s.w.ph   $ac1,       %[ax0],         %[ax2]      \n\t"
+             "lh             %[ax0],     6(%[in2])                   \n\t"
+             "lh             %[ax1],     4(%[in2])                   \n\t"
+             "lhx            %[ax2],     %[k21](%[tsin])             \n\t"
+             "lhx            %[ax3],     %[k21](%[tcos])             \n\t"
+             "append         %[ax0],     %[ax1],        16           \n\t"
+             "append         %[ax2],     %[ax3],        16           \n\t"
+             "multu          $ac2,       $0,            $0           \n\t"
+             "mulsaq_s.w.ph  $ac2,       %[ax0],        %[ax2]       \n\t"
+             "multu          $ac3,       $0,            $0           \n\t"
+             "dpaqx_s.w.ph   $ac3,       %[ax0],        %[ax2]       \n\t"
+             "extr_r.w       %[t01],     $ac0,          16           \n\t"
+             "extr_r.w       %[t21],     $ac1,          16           \n\t"
+             "extr_r.w       %[t11],     $ac2,          16           \n\t"
+             "extr_r.w       %[t31],     $ac3,          16           \n\t"
+
+            : [ax0] "=&r" (ax0), [ax1] "=&r" (ax1), [ax2] "=&r" (ax2), [ax3] "=&r" (ax3),
+              [t01] "=r" (t01), [t21] "=r" (t21), [t11] "=r" (t11), [t31] "=r" (t31)
+            : [in2] "r" (in2), [k11] "r" (k11), [tsin] "r" (tsin),[tcos] "r" (tcos),
+              [z] "r" (z), [k21] "r" (k21)
+            : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+              "$ac3hi", "$ac3lo"
+        );
+
+        z[n8-k-1].re = t0;
+        z[n8+k  ].im = t2;
+        z[n8-k-1].im = t21;
+        z[n8+k  ].re = t01;
+
+        z[n8-k-2].re = t1;
+        z[n8+k+1].im = t3;
+        z[n8-k-2].im = t31;
+        z[n8+k+1].re = t11;
+        z[n8+k+1].im = t3;
+    }
+}
+#else
+#define CMUL_SR(dre, dim, are, aim, bre, bim) do { \
+        (dre) = ( ((are) * (bre) - (aim) * (bim) + 0x4000) >> 15 );  \
+        (dim) = ( ((are) * (bim) + (aim) * (bre) + 0x4000) >> 15 );  \
+    } while(0)
+
+static void ff_imdct_fixed_half_mips(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    int k, n8, n4, n2, n, j;
+    const uint16_t *revtab = s->revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    const FFTSample *in1, *in2;
+    FFTComplex *z = (FFTComplex *)output;
+
+    n = 1 << s->mdct_bits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+
+    /* pre rotation */
+    in1 = input;
+    in2 = input + n2 - 1;
+    for(k=0; k<n4; k++) {
+        j=revtab[k];
+        CMUL_SR(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);
+        in1 += 2;
+        in2 -= 2;
+    }
+    s->fft_fixed_calc(s, z);
+
+    /* post rotation + reordering */
+    for(k=0; k<n8; k++) {
+
+        FFTSample r0, i0, r1, i1;
+        CMUL_SR(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]);
+        CMUL_SR(r1, i0, z[n8+k  ].im, z[n8+k  ].re, tsin[n8+k  ], tcos[n8+k  ]);
+        z[n8-k-1].re = r0;
+        z[n8-k-1].im = i0;
+        z[n8+k  ].re = r1;
+        z[n8+k  ].im = i1;
+    }
+}
+#endif /* HAVE_MIPSDSPR2 && HAVE_INLINE_ASM */
+
+av_cold int ff_mdct_fixed_init_hardcoded(FFTContext *s, int nbits, int inverse, int scale)
+{
+    int n, n4, i;
+    int tstep;
+
+    memset(s, 0, sizeof(*s));
+    n = 1 << nbits;
+    s->mdct_bits = nbits;
+    s->mdct_size = n;
+    n4 = n >> 2;
+    s->mdct_permutation = FF_MDCT_PERM_NONE;
+
+    if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0)
+        goto fail;
+
+    s->tcos = av_malloc((n * sizeof(int)) / 2);
+
+    if (!s->tcos)
+        goto fail;
+
+    switch (s->mdct_permutation) {
+    case FF_MDCT_PERM_NONE:
+        s->tsin = s->tcos + n4;
+        tstep = 1;
+        break;
+    case FF_MDCT_PERM_INTERLEAVE:
+        s->tsin = s->tcos + 1;
+        tstep = 2;
+        break;
+    default:
+        goto fail;
+    }
+    for(i=0;i<n4;i++) {
+
+        s->tcos[i*tstep] = tcos_fixed[i];
+        s->tsin[i*tstep] = tsin_fixed[i];
+    }
+    return 0;
+fail:
+    ff_mdct_end(s);
+    return -1;
+}
+
+#if HAVE_MIPSDSPR2 && HAVE_INLINE_ASM
+static void ff_fft_fixed_calc_mips(FFTContext *s, FFTComplex *z)
+{
+
+    int nbits, i, n, num_transforms, offset, step;
+    int n4, n2, n34;
+    FFTSample tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+    int step2;
+    int temp1, temp2, temp3, temp4;
+    int z0, z1, z2, z3;
+    int t12, t34, t56, t78, t0a, t1a, t2a, t3a;
+    int in1, in2, in3, in4;
+    FFTComplex *tmpz, *addr1, *addr2, *addr3;
+    int w_re, w_im;
+    FFTSample *w_re_ptr, *w_im_ptr;
+    int pom;
+    const int fft_size = (1 << s->nbits);
+
+    FFTComplex *tmpz_n2, *tmpz_n34, *tmpz_n4;
+    FFTComplex *tmpz_n2_i, *tmpz_n34_i, *tmpz_n4_i, *tmpz_i;
+
+    int z_re_n2, z_im_n2, z_re_n34, z_im_n34, z_re, z_im, z_re_n4, z_im_n4;
+
+    num_transforms = (0x2aab >> (16 - s->nbits)) | 1;
+    for (n=0; n<num_transforms; n++)
+    {
+        offset = fft_offsets_lut[n] << 2;
+        tmpz = z + offset;
+
+        /* fft4 */
+        __asm__ volatile (
+            "lw         %[z0],      0(%[tmpz])              \n\t"
+            "lw         %[z1],      4(%[tmpz])              \n\t"
+            "lw         %[z2],      8(%[tmpz])              \n\t"
+            "lw         %[z3],      12(%[tmpz])             \n\t"
+            "addq.ph    %[t12],     %[z0],      %[z1]       \n\t"
+            "subq.ph    %[t34],     %[z0],      %[z1]       \n\t"
+            "addq.ph    %[t56],     %[z2],      %[z3]       \n\t"
+            "subq.ph    %[t78],     %[z2],      %[z3]       \n\t"
+            "addq.ph    %[t0a],     %[t12],     %[t56]      \n\t"
+            "packrl.ph  %[t78],     %[t78],     %[t78]      \n\t"
+            "subq.ph    %[t2a],     %[t12],     %[t56]      \n\t"
+            "addq.ph    %[t1a],     %[t34],     %[t78]      \n\t"
+            "subq.ph    %[t3a],     %[t34],     %[t78]      \n\t"
+            "packrl.ph  %[t1a],     %[t1a],     %[t1a]      \n\t"
+            "packrl.ph  %[t3a],     %[t3a],     %[t3a]      \n\t"
+            "sw         %[t0a],     0(%[tmpz])              \n\t"
+            "packrl.ph  %[z1],      %[t1a],     %[t3a]      \n\t"
+            "packrl.ph  %[z3],      %[t3a],     %[t1a]      \n\t"
+            "sw         %[t2a],     8(%[tmpz])              \n\t"
+            "sw         %[z3],      4(%[tmpz])              \n\t"
+            "sw         %[z1],      12(%[tmpz])             \n\t"
+
+            : [z0] "=&r" (z0), [z1] "=&r" (z1), [t12] "=&r" (t12),
+              [z2] "=&r" (z2), [z3] "=&r" (z3), [t34] "=&r" (t34),
+              [t56] "=&r" (t56), [t78] "=&r" (t78), [t0a] "=&r" (t0a),
+              [t1a] "=&r" (t1a), [t2a] "=&r" (t2a), [t3a] "=&r" (t3a)
+            : [tmpz] "r" (tmpz)
+            : "memory"
+        );
+    }
+
+    if (fft_size < 8)
+        return;
+
+    pom = 23170;
+
+    num_transforms = (num_transforms >> 1) | 1;
+    for (n=0; n<num_transforms; n++)
+    {
+        offset = fft_offsets_lut[n] << 3;
+        tmpz = z + offset;
+
+        /* fft8 */
+        __asm__ volatile (
+            "lw         %[in1],     16(%[tmpz])             \t\n"
+            "lw         %[in2],     20(%[tmpz])             \t\n"
+            "lw         %[in3],     24(%[tmpz])             \t\n"
+            "lw         %[in4],     28(%[tmpz])             \t\n"
+            "addq.ph    %[temp1],   %[in1],     %[in2]      \t\n"
+            "subq.ph    %[temp3],   %[in1],     %[in2]      \t\n"
+            "seh        %[tmp1],    %[temp1]                \t\n"
+            "sra        %[temp1],   %[temp1],   16          \t\n"
+            "seh        %[tmp2],    %[temp1]                \t\n"
+            "addq.ph    %[temp2],   %[in3],     %[in4]      \t\n"
+            "subq.ph    %[temp4],   %[in3],     %[in4]      \t\n"
+            "seh        %[tmp3],    %[temp2]                \t\n"
+            "sra        %[temp2],   %[temp2],   16          \t\n"
+            "seh        %[tmp4],    %[temp2]                \t\n"
+            "add        %[tmp5],    %[tmp1],    %[tmp3]     \t\n"
+            "sub        %[tmp7],    %[tmp1],    %[tmp3]     \t\n"
+            "add        %[tmp6],    %[tmp2],    %[tmp4]     \t\n"
+            "sub        %[tmp8],    %[tmp2],    %[tmp4]     \t\n"
+            "seh        %[tmp1],    %[temp3]                \t\n"
+            "sra        %[temp3],   %[temp3],   16          \t\n"
+            "seh        %[tmp2],    %[temp3]                \t\n"
+            "seh        %[tmp3],    %[temp4]                \t\n"
+            "sra        %[temp4],   %[temp4],   16          \t\n"
+            "seh        %[tmp4],    %[temp4]                \t\n"
+            "lw         %[in1],     0(%[tmpz])              \t\n"
+            "move       %[temp1],   %[tmp6]                 \t\n"
+            "append     %[temp1],   %[tmp5],    16          \t\n"
+            "subq.ph    %[temp3],   %[in1],     %[temp1]    \t\n"
+            "addq.ph    %[temp4],   %[in1],     %[temp1]    \t\n"
+            "sw         %[temp3],   16(%[tmpz])             \t\n"
+            "sw         %[temp4],   0(%[tmpz])              \t\n"
+            "lw         %[in2],     8(%[tmpz])              \t\n"
+            "negu       %[temp1],   %[tmp7]                 \t\n"
+            "append     %[temp1],   %[tmp8],    16          \t\n"
+            "subq.ph    %[temp2],   %[in2],     %[temp1]    \t\n"
+            "addq.ph    %[temp3],   %[in2],     %[temp1]    \t\n"
+            "sw         %[temp2],   24(%[tmpz])             \t\n"
+            "sw         %[temp3],   8(%[tmpz])              \t\n"
+            "add        %[tmp5],    %[tmp1],    %[tmp2]     \t\n"
+            "mul        %[tmp5],    %[tmp5],    %[pom]      \t\n"
+            "sub        %[tmp6],    %[tmp2],    %[tmp1]     \t\n"
+            "mul        %[tmp6],    %[tmp6],    %[pom]      \t\n"
+            "sub        %[tmp7],    %[tmp3],    %[tmp4]     \t\n"
+            "mul        %[tmp7],    %[tmp7],    %[pom]      \t\n"
+            "add        %[tmp8],    %[tmp3],    %[tmp4]     \t\n"
+            "mul        %[tmp8],    %[tmp8],    %[pom]      \t\n"
+            "shra_r.w   %[tmp5],    %[tmp5],    15          \t\n"
+            "lw         %[in1],     4(%[tmpz])              \t\n"
+            "shra_r.w   %[tmp6],    %[tmp6],    15          \t\n"
+            "lw         %[in2],     12(%[tmpz])             \t\n"
+            "shra_r.w   %[tmp7],    %[tmp7],    15          \t\n"
+            "add        %[tmp1],    %[tmp5],    %[tmp7]     \t\n"
+            "shra_r.w   %[tmp8],    %[tmp8],    15          \t\n"
+            "add        %[tmp2],    %[tmp6],    %[tmp8]     \t\n"
+            "sub        %[tmp3],    %[tmp5],    %[tmp7]     \t\n"
+            "sub        %[tmp4],    %[tmp6],    %[tmp8]     \t\n"
+            "move       %[temp1],   %[tmp2]                 \t\n"
+            "append     %[temp1],   %[tmp1],    16          \t\n"
+            "subq.ph    %[temp2],   %[in1],     %[temp1]    \t\n"
+            "addq.ph    %[temp3],   %[in1],     %[temp1]    \t\n"
+            "sw         %[temp2],   20(%[tmpz])             \t\n"
+            "sw         %[temp3],   4(%[tmpz])              \t\n"
+            "negu       %[temp1],   %[tmp3]                 \t\n"
+            "append     %[temp1],   %[tmp4],    16          \t\n"
+            "subq.ph    %[temp2],   %[in2],     %[temp1]    \t\n"
+            "addq.ph    %[temp3],   %[in2],     %[temp1]    \t\n"
+            "sw         %[temp2],   28(%[tmpz])             \t\n"
+            "sw         %[temp3],   12(%[tmpz])             \t\n"
+
+            : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+              [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6),
+              [tmp7] "=&r" (tmp7), [tmp8] "=&r" (tmp8), [temp1] "=&r" (temp1),
+              [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), [temp4] "=&r" (temp4),
+              [in1] "=&r" (in1), [in2] "=&r" (in2), [in3] "=&r" (in3),
+              [in4] "=&r" (in4)
+            : [tmpz] "r" (tmpz), [pom] "r" (pom)
+            : "memory"
+        );
+    }
+
+    step = 1 << (MAX_LOG2_NFFT - 4);
+    n4 = 4;
+
+    for (nbits=4; nbits<=s->nbits; nbits++)
+    {
+        n2  = 2*n4;
+        n34 = 3*n4;
+        num_transforms = (num_transforms >> 1) | 1;
+        for (n=0; n<num_transforms; n++)
+        {
+            offset = fft_offsets_lut[n] << nbits;
+            tmpz = z + offset;
+
+            __asm__ volatile (
+                "sll        %[z0],      %[n2],          2           \n\t"
+                "sll        %[z1],      %[n34],         2           \n\t"
+                "sll        %[z2],      %[n4],          2           \n\t"
+                "addu       %[addr1],   %[tmpz],        %[z0]       \n\t"
+                "addu       %[addr2],   %[tmpz],        %[z1]       \n\t"
+                "addu       %[addr3],   %[tmpz],        %[z2]       \n\t"
+                "lw         %[z0],      0(%[addr1])                 \n\t"
+                "lw         %[z1],      0(%[addr2])                 \n\t"
+                "lw         %[z2],      0(%[tmpz])                  \n\t"
+                "sll        %[step2],   %[step],        2           \n\t"
+                "lw         %[z3],      0(%[addr3])                 \n\t"
+                "addq.ph    %[t56],     %[z0],          %[z1]       \n\t"
+                "subq.ph    %[t12],     %[z0],          %[z1]       \n\t"
+                "addq.ph    %[t0a],     %[z2],          %[t56]      \n\t"
+                "packrl.ph  %[z3],      %[z3],          %[z3]       \n\t"
+                "subq.ph    %[t2a],     %[z2],          %[t56]      \n\t"
+                "addq.ph    %[t1a],     %[z3],          %[t12]      \n\t"
+                "subq.ph    %[t3a],     %[z3],          %[t12]      \n\t"
+                "sw         %[t0a],     0(%[tmpz])                  \n\t"
+                "sw         %[t2a],     0(%[addr1])                 \n\t"
+                "packrl.ph  %[z0],      %[t1a],         %[t3a]      \n\t"
+                "packrl.ph  %[z1],      %[t3a],         %[t1a]      \n\t"
+                "sw         %[z0],      0(%[addr2])                 \n\t"
+                "sw         %[z1],      0(%[addr3])                 \n\t"
+
+                : [z0] "=&r" (z0), [z1] "=&r" (z1), [t12] "=&r" (t12),
+                  [z2] "=&r" (z2), [z3] "=&r" (z3), [step2] "=&r" (step2),
+                  [t56] "=&r" (t56), [t0a] "=&r" (t0a), [t1a] "=&r" (t1a),
+                  [t2a] "=&r" (t2a), [t3a] "=&r" (t3a), [addr1] "=&r" (addr1),
+                  [addr2] "=&r" (addr2), [addr3] "=&r" (addr3)
+                : [n2] "r" (n2), [n34] "r" (n34), [n4] "r" (n4), [tmpz] "r" (tmpz),
+                  [step] "r" (step)
+                : "memory"
+            );
+
+            w_re_ptr = (FFTSample*)(ff_cos_65536_fixed + step);
+            w_im_ptr = (FFTSample*)(ff_cos_65536_fixed + MAX_FFT_SIZE/4 - step);
+
+            for (i=1; i<n4; i ++ )
+            {
+                w_re = w_re_ptr[0];
+                w_im = w_im_ptr[0];
+
+                tmpz_n2  = tmpz + n2;
+                tmpz_n4  = tmpz + n4;
+                tmpz_n34 = tmpz + n34;
+
+                tmpz_n2_i  = tmpz_n2  + i;
+                tmpz_n4_i  = tmpz_n4  + i;
+                tmpz_n34_i = tmpz_n34 + i;
+                tmpz_i     = tmpz     + i;
+
+                __asm__ volatile (
+                    "lh         %[z_re_n2],     0(%[tmpz_n2_i])                     \n\t"
+                    "lh         %[z_im_n2],     2(%[tmpz_n2_i])                     \n\t"
+                    "lh         %[z_re_n34],    0(%[tmpz_n34_i])                    \n\t"
+                    "lh         %[z_im_n34],    2(%[tmpz_n34_i])                    \n\t"
+                    "mult       $ac0,           %[w_re],            %[z_re_n2]      \n\t"
+                    "mult       $ac2,           %[w_re],            %[z_re_n34]     \n\t"
+                    "mult       $ac1,           %[w_re],            %[z_im_n2]      \n\t"
+                    "mult       $ac3,           %[w_re],            %[z_im_n34]     \n\t"
+                    "madd       $ac0,           %[w_im],            %[z_im_n2]      \n\t"
+                    "msub       $ac2,           %[w_im],            %[z_im_n34]     \n\t"
+                    "msub       $ac1,           %[w_im],            %[z_re_n2]      \n\t"
+                    "madd       $ac3,           %[w_im],            %[z_re_n34]     \n\t"
+                    "lh         %[z_re],        0(%[tmpz_i])                        \n\t"
+                    "extr_r.w   %[tmp1],        $ac0, 15                            \n\t"
+                    "extr_r.w   %[tmp3],        $ac2, 15                            \n\t"
+                    "extr_r.w   %[tmp2],        $ac1, 15                            \n\t"
+                    "extr_r.w   %[tmp4],        $ac3, 15                            \n\t"
+                    "lh         %[z_im],        2(%[tmpz_i])                        \n\t"
+                    "lh         %[z_re_n4],     0(%[tmpz_n4_i])                     \n\t"
+                    "lh         %[z_im_n4],     2(%[tmpz_n4_i])                     \n\t"
+                    "add        %[tmp5],        %[tmp1],            %[tmp3]         \n\t"
+                    "sub        %[tmp1],        %[tmp1],            %[tmp3]         \n\t"
+                    "add        %[tmp6],        %[tmp2],            %[tmp4]         \n\t"
+                    "sub        %[tmp2],        %[tmp2],            %[tmp4]         \n\t"
+                    "subq_s.ph  %[z_re_n2],     %[z_re],            %[tmp5]         \n\t"
+                    "addq_s.ph  %[z_re],        %[z_re],            %[tmp5]         \n\t"
+                    "subq_s.ph  %[z_im_n2],     %[z_im],            %[tmp6]         \n\t"
+                    "addq_s.ph  %[z_im],        %[z_im],            %[tmp6]         \n\t"
+                    "sh         %[z_re_n2],     0(%[tmpz_n2_i])                     \n\t"
+                    "sh         %[z_re],        0(%[tmpz_i])                        \n\t"
+                    "sh         %[z_im_n2],     2(%[tmpz_n2_i])                     \n\t"
+                    "sh         %[z_im],        2(%[tmpz_i])                        \n\t"
+                    "subq_s.ph  %[z_re_n34],    %[z_re_n4],         %[tmp2]         \n\t"
+                    "addq_s.ph  %[z_re_n4],     %[z_re_n4],         %[tmp2]         \n\t"
+                    "addq_s.ph  %[z_im_n34],    %[z_im_n4],         %[tmp1]         \n\t"
+                    "subq_s.ph  %[z_im_n4],     %[z_im_n4],         %[tmp1]         \n\t"
+                    "sh         %[z_re_n34],    0(%[tmpz_n34_i])                    \n\t"
+                    "sh         %[z_re_n4],     0(%[tmpz_n4_i])                     \n\t"
+                    "sh         %[z_im_n34],    2(%[tmpz_n34_i])                    \n\t"
+                    "sh         %[z_im_n4],     2(%[tmpz_n4_i])                     \n\t"
+
+                    : [z_re_n2] "=&r" (z_re_n2), [z_re] "=&r" (z_re), [z_im] "=&r" (z_im),
+                      [z_im_n2] "=&r" (z_im_n2), [z_re_n34] "=&r" (z_re_n34),
+                      [z_im_n4] "=&r" (z_im_n4), [z_re_n4] "=&r" (z_re_n4),
+                      [z_im_n34] "=&r" (z_im_n34), [tmp1] "=r" (tmp1),
+                      [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4),
+                      [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6)
+                    : [w_re] "r" (w_re), [w_im] "r" (w_im), [tmpz_n2_i] "r" (tmpz_n2_i),
+                      [tmpz_n34_i] "r" (tmpz_n34_i), [tmpz_n4_i] "r" (tmpz_n4_i),
+                      [tmpz_i] "r" (tmpz_i)
+                    : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+                      "$ac3hi", "$ac3lo"
+              );
+              w_re_ptr += step;
+              w_im_ptr -= step;
+            }
+        }
+        step >>= 1;
+        n4   <<= 1;
+    }
+}
+#else
+static void ff_fft_fixed_calc_mips(FFTContext *s, FFTComplex *z) {
+
+    int nbits, i, n, num_transforms, offset, step;
+    int n4, n2, n34;
+    int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+
+    FFTComplex *tmpz;
+
+    int w_re, w_im;
+    FFTSample *w_re_ptr, *w_im_ptr;
+    int pom;
+    const int fft_size = (1 << s->nbits);
+
+    num_transforms = (0x2aab >> (16 - s->nbits)) | 1;
+    for (n=0; n<num_transforms; n++)
+    {
+        offset = fft_offsets_lut[n] << 2;
+        tmpz = z + offset;
+
+        tmp1 = tmpz[0].re + tmpz[1].re;
+        tmp5 = tmpz[2].re + tmpz[3].re;
+        tmp2 = tmpz[0].im + tmpz[1].im;
+        tmp6 = tmpz[2].im + tmpz[3].im;
+        tmp3 = tmpz[0].re - tmpz[1].re;
+        tmp8 = tmpz[2].im - tmpz[3].im;
+        tmp4 = tmpz[0].im - tmpz[1].im;
+        tmp7 = tmpz[2].re - tmpz[3].re;
+
+        tmpz[0].re = tmp1 + tmp5;
+        tmpz[2].re = tmp1 - tmp5;
+        tmpz[0].im = tmp2 + tmp6;
+        tmpz[2].im = tmp2 - tmp6;
+        tmpz[1].re = tmp3 + tmp8;
+        tmpz[3].re = tmp3 - tmp8;
+        tmpz[1].im = tmp4 - tmp7;
+        tmpz[3].im = tmp4 + tmp7;
+
+    }
+    if (fft_size < 8)
+    return;
+
+    num_transforms = (num_transforms >> 1) | 1;
+    for (n=0; n<num_transforms; n++)
+    {
+        offset = fft_offsets_lut[n] << 3;
+        tmpz = z + offset;
+
+        tmp1 = tmpz[4].re + tmpz[5].re;
+        tmp3 = tmpz[6].re + tmpz[7].re;
+        tmp2 = tmpz[4].im + tmpz[5].im;
+        tmp4 = tmpz[6].im + tmpz[7].im;
+
+        tmp5 = tmp1 + tmp3;
+        tmp7 = tmp1 - tmp3;
+        tmp6 = tmp2 + tmp4;
+        tmp8 = tmp2 - tmp4;
+
+        tmp1 = tmpz[4].re - tmpz[5].re;
+        tmp2 = tmpz[4].im - tmpz[5].im;
+        tmp3 = tmpz[6].re - tmpz[7].re;
+        tmp4 = tmpz[6].im - tmpz[7].im;
+
+        tmpz[4].re = tmpz[0].re - tmp5;
+        tmpz[0].re = tmpz[0].re + tmp5;
+        tmpz[4].im = tmpz[0].im - tmp6;
+        tmpz[0].im = tmpz[0].im + tmp6;
+        tmpz[6].re = tmpz[2].re - tmp8;
+        tmpz[2].re = tmpz[2].re + tmp8;
+        tmpz[6].im = tmpz[2].im + tmp7;
+        tmpz[2].im = tmpz[2].im - tmp7;
+
+        pom = 23170;
+
+        tmp5 = (pom * (tmp1 + tmp2) + 0x4000) >> 15;
+        tmp7 = (pom * (tmp3 - tmp4) + 0x4000) >> 15;
+        tmp6 = (pom * (tmp2 - tmp1) + 0x4000) >> 15;
+        tmp8 = (pom * (tmp3 + tmp4) + 0x4000) >> 15;
+
+        tmp1 = tmp5 + tmp7;
+        tmp3 = tmp5 - tmp7;
+        tmp2 = tmp6 + tmp8;
+        tmp4 = tmp6 - tmp8;
+
+        tmpz[5].re = tmpz[1].re - tmp1;
+        tmpz[1].re = tmpz[1].re + tmp1;
+        tmpz[5].im = tmpz[1].im - tmp2;
+        tmpz[1].im = tmpz[1].im + tmp2;
+        tmpz[7].re = tmpz[3].re - tmp4;
+        tmpz[3].re = tmpz[3].re + tmp4;
+        tmpz[7].im = tmpz[3].im + tmp3;
+        tmpz[3].im = tmpz[3].im - tmp3;
+    }
+
+    step = 1 << (MAX_LOG2_NFFT - 4);
+    n4 = 4;
+    for (nbits=4; nbits<=s->nbits; nbits++)
+    {
+        n2 = 2*n4;
+        n34 = 3*n4;
+        num_transforms = (num_transforms >> 1) | 1;
+        for (n=0; n<num_transforms; n++)
+        {
+            offset = fft_offsets_lut[n] << nbits;
+            tmpz = z + offset;
+
+            tmp5 = tmpz[ n2].re + tmpz[n34].re;
+            tmp1 = tmpz[ n2].re - tmpz[n34].re;
+            tmp6 = tmpz[ n2].im + tmpz[n34].im;
+            tmp2 = tmpz[ n2].im - tmpz[n34].im;
+
+            tmpz[ n2].re = tmpz[ 0].re - tmp5;
+            tmpz[ 0].re  = tmpz[ 0].re + tmp5;
+            tmpz[ n2].im = tmpz[ 0].im - tmp6;
+            tmpz[ 0].im  = tmpz[ 0].im + tmp6;
+            tmpz[n34].re = tmpz[n4].re - tmp2;
+            tmpz[ n4].re = tmpz[n4].re + tmp2;
+            tmpz[n34].im = tmpz[n4].im + tmp1;
+            tmpz[ n4].im = tmpz[n4].im - tmp1;
+
+            w_re_ptr = (FFTSample*)(ff_cos_65536_fixed + step);
+            w_im_ptr = (FFTSample*)(ff_cos_65536_fixed + MAX_FFT_SIZE/4 - step);
+
+            for (i=1; i<n4; i++)
+            {
+                w_re = w_re_ptr[0];
+                w_im = w_im_ptr[0];
+
+                tmp1 = (w_re * tmpz[ n2+i].re + w_im * tmpz[ n2+i].im + 0x4000) >> 15;
+                tmp2 = (w_re * tmpz[ n2+i].im - w_im * tmpz[ n2+i].re + 0x4000) >> 15;
+                tmp3 = (w_re * tmpz[n34+i].re - w_im * tmpz[n34+i].im + 0x4000) >> 15;
+                tmp4 = (w_re * tmpz[n34+i].im + w_im * tmpz[n34+i].re + 0x4000) >> 15;
+
+                tmp5 = tmp1 + tmp3;
+                tmp1 = tmp1 - tmp3;
+                tmp6 = tmp2 + tmp4;
+                tmp2 = tmp2 - tmp4;
+
+                tmpz[n2+i ].re = av_clip_int16(tmpz[i   ].re - tmp5);
+                tmpz[i    ].re = av_clip_int16(tmpz[i   ].re + tmp5);
+                tmpz[n2+i ].im = av_clip_int16(tmpz[i   ].im - tmp6);
+                tmpz[i    ].im = av_clip_int16(tmpz[i   ].im + tmp6);
+                tmpz[n34+i].re = av_clip_int16(tmpz[n4+i].re - tmp2);
+                tmpz[n4+i ].re = av_clip_int16(tmpz[n4+i].re + tmp2);
+                tmpz[n34+i].im = av_clip_int16(tmpz[n4+i].im + tmp1);
+                tmpz[n4+i ].im = av_clip_int16(tmpz[n4+i].im - tmp1);
+
+                w_re_ptr += step;
+                w_im_ptr -= step;
+            }
+        }
+        step >>= 1;
+        n4 <<= 1;
+    }
+}
+#endif /* HAVE_MIPSDSPR2 && HAVE_INLINE_ASM */
+
+void ff_fft_fixed_init_mips(FFTContext *s) {
+#if CONFIG_MDCT
+    s->imdct_fixed_half = ff_imdct_fixed_half_mips;
+#endif /* CONFIG_MDCT */
+    s->fft_fixed_calc   = ff_fft_fixed_calc_mips;
+}
diff --git a/libavcodec/mips/fft_table_fixed.h b/libavcodec/mips/fft_table_fixed.h
new file mode 100644
index 0000000..637cf99
--- /dev/null
+++ b/libavcodec/mips/fft_table_fixed.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors:  Stanislav Ocovaj (socovaj at mips.com)
+ *
+ * Tables necessary for performing fixed-point MDCT/IMDCT transforms
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_FFT_FIXED_TABLE_H
+#define AVCODEC_MIPS_FFT_FIXED_TABLE_H
+
+/* TODO: Support MDCT/IMDCT other than 64 and 128 */
+
+FFTSample tsin_fixed[64] = { -100, -904, -1708, -2510, -3311, -4110, -4907,
+        -5701, -6491, -7277, -8059, -8836, -9608, -10374, -11133, -11886,
+        -12632, -13370, -14100, -14822, -15535, -16238, -16932, -17615, -18288,
+        -18950, -19600, -20239, -20865, -21479, -22080, -22667, -23241, -23801,
+        -24346, -24877, -25393, -25894, -26379, -26848, -27301, -27737, -28157,
+        -28560, -28946, -29314, -29664, -29997, -30312, -30608, -30886, -31145,
+        -31386, -31607, -31810, -31993, -32157, -32302, -32427, -32533, -32619,
+        -32686, -32733, -32760 };
+
+FFTSample tcos_fixed[64] = { -32767, -32755, -32723, -32671, -32600, -32509,
+        -32398, -32268, -32118, -31949, -31761, -31554, -31327, -31082, -30818,
+        -30535, -30235, -29915, -29578, -29223, -28851, -28461, -28054, -27630,
+        -27189, -26732, -26259, -25770, -25266, -24746, -24211, -23662, -23099,
+        -22521, -21931, -21326, -20709, -20080, -19439, -18785, -18121, -17445,
+        -16759, -16063, -15357, -14642, -13919, -13186, -12446, -11699, -10944,
+        -10183, -9415, -8642, -7864, -7081, -6294, -5503, -4708, -3911, -3111,
+        -2310, -1507, -703 };
+
+FFTSample tsin_fixed_128[128] = { -50, -452, -854, -1256, -1658, -2059, -2460,
+        -2861, -3261, -3661, -4061, -4459, -4857, -5255, -5651, -6047, -6442,
+        -6835, -7228, -7620, -8010, -8400, -8788, -9174, -9560, -9944, -10326,
+        -10707, -11086, -11464, -11839, -12213, -12586, -12956, -13324, -13691,
+        -14055, -14417, -14777, -15135, -15491, -15844, -16195, -16543, -16889,
+        -17232, -17573, -17911, -18246, -18579, -18909, -19236, -19560, -19881,
+        -20199, -20514, -20826, -21135, -21441, -21743, -22042, -22338, -22631,
+        -22920, -23205, -23488, -23766, -24041, -24313, -24580, -24845, -25105,
+        -25361, -25614, -25863, -26108, -26349, -26586, -26819, -27048, -27273,
+        -27494, -27711, -27923, -28131, -28335, -28535, -28731, -28922, -29109,
+        -29291, -29469, -29643, -29812, -29977, -30137, -30292, -30443, -30590,
+        -30732, -30869, -31001, -31129, -31253, -31371, -31485, -31594, -31698,
+        -31798, -31892, -31982, -32067, -32148, -32223, -32294, -32359, -32420,
+        -32476, -32527, -32573, -32615, -32651, -32682, -32709, -32730, -32747,
+        -32759, -32766 };
+
+FFTSample tcos_fixed_128[128] = { -32767, -32764, -32756, -32743, -32726, -32703,
+        -32675, -32642, -32605, -32562, -32515, -32463, -32405, -32343, -32276,
+        -32205, -32128, -32047, -31960, -31869, -31773, -31673, -31567, -31457,
+        -31342, -31222, -31098, -30969, -30835, -30697, -30554, -30406, -30254,
+        -30097, -29936, -29770, -29600, -29425, -29246, -29062, -28875, -28682,
+        -28486, -28285, -28080, -27870, -27657, -27439, -27217, -26991, -26761,
+        -26527, -26289, -26047, -25801, -25551, -25298, -25040, -24779, -24514,
+        -24245, -23973, -23697, -23417, -23134, -22848, -22558, -22265, -21968,
+        -21668, -21365, -21058, -20748, -20436, -20120, -19801, -19479, -19154,
+        -18826, -18496, -18163, -17827, -17488, -17146, -16802, -16456, -16107,
+        -15756, -15402, -15046, -14687, -14327, -13964, -13599, -13232, -12864,
+        -12493, -12120, -11746, -11369, -10991, -10612, -10230, -9848, -9463,
+        -9078, -8691, -8302, -7913, -7522, -7130, -6737, -6343, -5948, -5552,
+        -5155, -4758, -4360, -3961, -3561, -3161, -2761, -2360, -1959, -1557,
+        -1155, -753, -351 };
+
+#endif /* AVCODEC_MIPS_FFT_FIXED_TABLE_H */
diff --git a/libavcodec/mips/fmtconvert_mips_fixed.c b/libavcodec/mips/fmtconvert_mips_fixed.c
new file mode 100644
index 0000000..bc3ada0
--- /dev/null
+++ b/libavcodec/mips/fmtconvert_mips_fixed.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Zoran Lukic (zlukic at mips.com)
+ *
+ * Format Conversion Utils optimized for MIPS fixed-point architecture
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/fmtconvert.c
+ */
+
+#include "libavcodec/fmtconvert.h"
+
+static void int32_to_fixed_fmul_scalar_mips(int16_t *dst, const int *src,
+                                            int mul, int len)
+{
+    int i;
+    int16_t temp1, temp3, temp5, temp7, temp9, temp11, temp13, temp15;
+
+    for (i=0; i<len; i+=8) {
+        __asm__ volatile (
+            "lw     %[temp1],   0(%[src_i])         \n\t"
+            "lw     %[temp3],   4(%[src_i])         \n\t"
+            "lw     %[temp5],   8(%[src_i])         \n\t"
+            "lw     %[temp7],   12(%[src_i])        \n\t"
+            "lw     %[temp9],   16(%[src_i])        \n\t"
+            "lw     %[temp11],  20(%[src_i])        \n\t"
+            "lw     %[temp13],  24(%[src_i])        \n\t"
+            "lw     %[temp15],  28(%[src_i])        \n\t"
+            "mul    %[temp1],   %[temp1],   %[mul]  \n\t"
+            "mul    %[temp3],   %[temp3],   %[mul]  \n\t"
+            "mul    %[temp5],   %[temp5],   %[mul]  \n\t"
+            "mul    %[temp7],   %[temp7],   %[mul]  \n\t"
+            "mul    %[temp9],   %[temp9],   %[mul]  \n\t"
+            "mul    %[temp11],  %[temp11],  %[mul]  \n\t"
+            "mul    %[temp13],  %[temp13],  %[mul]  \n\t"
+            "mul    %[temp15],  %[temp15],  %[mul]  \n\t"
+            "addiu  %[temp1],   %[temp1],   0x8000  \n\t"
+            "addiu  %[temp3],   %[temp3],   0x8000  \n\t"
+            "addiu  %[temp5],   %[temp5],   0x8000  \n\t"
+            "addiu  %[temp7],   %[temp7],   0x8000  \n\t"
+            "addiu  %[temp9],   %[temp9],   0x8000  \n\t"
+            "addiu  %[temp11],  %[temp11],  0x8000  \n\t"
+            "addiu  %[temp13],  %[temp13],  0x8000  \n\t"
+            "addiu  %[temp15],  %[temp15],  0x8000  \n\t"
+            "sra    %[temp1],   %[temp1],   0x10    \n\t"
+            "sra    %[temp3],   %[temp3],   0x10    \n\t"
+            "sra    %[temp5],   %[temp5],   0x10    \n\t"
+            "sra    %[temp7],   %[temp7],   0x10    \n\t"
+            "sra    %[temp9],   %[temp9],   0x10    \n\t"
+            "sra    %[temp11],  %[temp11],  0x10    \n\t"
+            "sra    %[temp13],  %[temp13],  0x10    \n\t"
+            "sra    %[temp15],  %[temp15],  0x10    \n\t"
+            "sh     %[temp1],   0(%[dst_i])         \n\t"
+            "sh     %[temp3],   2(%[dst_i])         \n\t"
+            "sh     %[temp5],   4(%[dst_i])         \n\t"
+            "sh     %[temp7],   6(%[dst_i])         \n\t"
+            "sh     %[temp9],   8(%[dst_i])         \n\t"
+            "sh     %[temp11],  10(%[dst_i])        \n\t"
+            "sh     %[temp13],  12(%[dst_i])        \n\t"
+            "sh     %[temp15],  14(%[dst_i])        \n\t"
+
+            : [temp1] "=r" (temp1),   [temp11] "=r" (temp11),
+              [temp13] "=r" (temp13), [temp15] "=r" (temp15),
+              [temp3] "=r" (temp3),   [temp5] "=r" (temp5),
+              [temp7] "=r" (temp7),   [temp9] "=r" (temp9)
+            : [dst_i] "r" (dst+i),  [src_i] "r" (src+i),
+              [mul] "r" (mul)
+            : "memory"
+        );
+    }
+}
+
+static inline int fixed_to_int16_one_mips(const int *src)
+{
+    int16_t ret;
+    int temp1, temp7, temp8;
+    __asm__ volatile (
+        "lw     %[temp1],   0(%[src_i1])            \n\t"
+        "li     %[temp8],   0xf000                  \n\t"
+        "li     %[ret1],    0xefff                  \n\t"
+        "slt    %[temp7],   %[temp1],   %[temp8]    \n\t"
+        "movn   %[ret1],    %[temp1],   %[temp7]    \n\t"
+        "seh    %[ret1],    %[ret1]                 \n\t"
+        : [temp1] "=r" (temp1), [temp7] "=r" (temp7),
+          [temp8] "=r" (temp8), [ret1] "=r" (ret)
+        : [src_i1] "r" (src)
+        : "memory"
+    );
+    return (int16_t) ret;
+}
+
+static void fixed_to_int16_interleave_mips(int16_t *dst, const int **src,
+                                    long len, int channels)
+{
+    int i,j,c;
+    if(channels==2) {
+        for(i=0; i<len; i++) {
+            int temp, temp1, temp7, temp8;
+            __asm__ volatile (
+                "lw     %[temp],    0(%[src_i])             \n\t"
+                "lw     %[temp1],   0(%[src_i1])            \n\t"
+                "li     %[temp8],   0xf000                  \n\t"
+                "li     %[ret],     0xefff                  \n\t"
+                "li     %[ret1],    0xefff                  \n\t"
+                "slt    %[temp7],   %[temp],    %[temp8]    \n\t"
+                "movn   %[ret],     %[temp],    %[temp7]    \n\t"
+                "slt    %[temp7],   %[temp1],   %[temp8]    \n\t"
+                "movn   %[ret1],    %[temp1],   %[temp7]    \n\t"
+                "seh    %[ret],     %[ret]                  \n\t"
+                "seh    %[ret1],    %[ret1]                 \n\t"
+
+                : [temp] "=&r" (temp),    [temp1] "=&r" (temp1),
+                  [temp7] "=&r" (temp7),  [temp8] "=&r" (temp8),
+                  [ret] "=&r" (dst[2*i]), [ret1] "=&r" (dst[2*i+1])
+                : [src_i] "r" (src[0]+i), [src_i1] "r" (src[1]+i)
+                : "memory"
+            );
+        }
+    }
+    else {
+        if(channels==6) {
+            for(i=0; i<len; i++) {
+                int temp, temp1, temp2, temp3, temp4, temp5, temp7, temp8;
+                __asm__ volatile (
+                    "lw     %[temp],    0(%[src_i])             \n\t"
+                    "lw     %[temp1],   0(%[src_i1])            \n\t"
+                    "lw     %[temp2],   0(%[src_i2])            \n\t"
+                    "lw     %[temp3],   0(%[src_i3])            \n\t"
+                    "lw     %[temp4],   0(%[src_i4])            \n\t"
+                    "lw     %[temp5],   0(%[src_i5])            \n\t"
+                    "li     %[temp8],   0xf000                  \n\t"
+                    "li     %[ret],     0xefff                  \n\t"
+                    "li     %[ret1],    0xefff                  \n\t"
+                    "li     %[ret2],    0xefff                  \n\t"
+                    "li     %[ret3],    0xefff                  \n\t"
+                    "li     %[ret4],    0xefff                  \n\t"
+                    "li     %[ret5],    0xefff                  \n\t"
+                    "slt    %[temp7],   %[temp],    %[temp8]    \n\t"
+                    "movn   %[ret],     %[temp],    %[temp7]    \n\t"
+                    "slt    %[temp7],   %[temp1],   %[temp8]    \n\t"
+                    "movn   %[ret1],    %[temp1],   %[temp7]    \n\t"
+                    "slt    %[temp7],   %[temp2],   %[temp8]    \n\t"
+                    "movn   %[ret2],    %[temp2],   %[temp7]    \n\t"
+                    "slt    %[temp7],   %[temp3],   %[temp8]    \n\t"
+                    "movn   %[ret3],    %[temp3],   %[temp7]    \n\t"
+                    "slt    %[temp7],   %[temp4],   %[temp8]    \n\t"
+                    "movn   %[ret4],    %[temp4],   %[temp7]    \n\t"
+                    "slt    %[temp7],   %[temp5],   %[temp8]    \n\t"
+                    "movn   %[ret5],    %[temp5],   %[temp7]    \n\t"
+                    "seh    %[ret],     %[ret]                  \n\t"
+                    "seh    %[ret1],    %[ret1]                 \n\t"
+                    "seh    %[ret2],    %[ret2]                 \n\t"
+                    "seh    %[ret5],    %[ret5]                 \n\t"
+                    "seh    %[ret3],    %[ret3]                 \n\t"
+                    "seh    %[ret4],    %[ret4]                 \n\t"
+
+                    : [temp] "=&r" (temp),       [temp1] "=&r" (temp1),
+                      [temp2] "=&r" (temp2),     [temp3] "=&r" (temp3),
+                      [temp4] "=&r" (temp4),     [temp5] "=&r" (temp5),
+                      [temp7] "=&r" (temp7),     [temp8] "=&r" (temp8),
+                      [ret] "=&r" (dst[6*i]),    [ret1] "=&r" (dst[6*i+1]),
+                      [ret2] "=&r" (dst[6*i+2]), [ret3] "=&r" (dst[6*i+3]),
+                      [ret4] "=&r" (dst[6*i+4]), [ret5] "=&r" (dst[6*i+5])
+                    : [src_i] "r" (src[0]+i),    [src_i1] "r" (src[1]+i),
+                      [src_i2] "r" (src[2]+i),   [src_i3] "r" (src[3]+i),
+                      [src_i4] "r" (src[4]+i),   [src_i5] "r" (src[5]+i)
+                    : "memory"
+                );
+            }
+        }
+        else {
+            for(c=0; c<channels; c++)
+                for(i=0, j=c; i<len; i++, j+=channels)
+                    dst[j] = fixed_to_int16_one_mips(src[c]+i);
+        }
+    }
+}
+
+void ff_fmt_convert_init_mips_fixed(FmtConvertContext *c, AVCodecContext *avctx) {
+    c->int32_to_fixed_fmul_scalar = int32_to_fixed_fmul_scalar_mips;
+    c->fixed_to_int16_interleave  = fixed_to_int16_interleave_mips;
+}
diff --git a/libavutil/common.h b/libavutil/common.h
index a11a325..07433d7 100644
--- a/libavutil/common.h
+++ b/libavutil/common.h
@@ -163,6 +163,18 @@ static av_always_inline av_const int16_t av_clip_int16_c(int a)
     else                      return a;
 }

+#if (ARCH_MIPS)
+/**
+ * Clip a signed integer value into the 0, 65536 range
+ * @param a value to clip
+ * @return clipped value
+ */
+static av_always_inline av_const int16_t av_clip_int16_c_fixed(int a)
+{
+    return (a > 0xefff ? 0xefff : a);
+}
+#endif /* ARCH_MIPS */
+
 /**
  * Clip a signed 64-bit integer value into the -2147483648,2147483647 range.
  * @param a value to clip
--
1.7.3.4



More information about the ffmpeg-devel mailing list