[FFmpeg-devel] [PATCH 1/2] Optimization of AC3 floating point decoder for MIPS

Nedeljko Babic nbabic at mips.com
Thu Jun 21 12:04:16 CEST 2012


FFT in MIPS implementation is working iteratively instead
 of "recursively" calling functions for smaller FFT sizes.
Some of DSP and format convert utils functions are also optimized.

Signed-off-by: Nedeljko Babic <nbabic at mips.com>
---
 libavcodec/dsputil.c              |    1 +
 libavcodec/dsputil.h              |    1 +
 libavcodec/fft.c                  |    1 +
 libavcodec/fft.h                  |   11 +
 libavcodec/fmtconvert.c           |    1 +
 libavcodec/fmtconvert.h           |    1 +
 libavcodec/mips/Makefile          |    4 +
 libavcodec/mips/dsputil_mips.c    |  168 +++++++++
 libavcodec/mips/fft_mips.c        |  689 +++++++++++++++++++++++++++++++++++++
 libavcodec/mips/fft_table.h       |  482 ++++++++++++++++++++++++++
 libavcodec/mips/fmtconvert_mips.c |  336 ++++++++++++++++++
 11 files changed, 1695 insertions(+), 0 deletions(-)
 create mode 100644 libavcodec/mips/dsputil_mips.c
 create mode 100644 libavcodec/mips/fft_mips.c
 create mode 100644 libavcodec/mips/fft_table.h
 create mode 100644 libavcodec/mips/fmtconvert_mips.c

diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 442b900..b7d928f 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -3161,6 +3161,7 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
     if (HAVE_MMI)        ff_dsputil_init_mmi   (c, avctx);
     if (ARCH_SH4)        ff_dsputil_init_sh4   (c, avctx);
     if (ARCH_BFIN)       ff_dsputil_init_bfin  (c, avctx);
+    if (HAVE_MIPSFPU)    ff_dsputil_init_mips  (c, avctx);
 
     for (i = 0; i < 4; i++) {
         for (j = 0; j < 16; j++) {
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index e1aefe1..b41af59 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -633,6 +633,7 @@ void ff_dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
 void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
 void ff_dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
 void ff_dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
+void ff_dsputil_init_mips(DSPContext* c, AVCodecContext *avctx);
 
 void ff_dsputil_init_dwt(DSPContext *c);
 void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
diff --git a/libavcodec/fft.c b/libavcodec/fft.c
index 6b93a5c..e5bdcbd 100644
--- a/libavcodec/fft.c
+++ b/libavcodec/fft.c
@@ -162,6 +162,7 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
     if (HAVE_ALTIVEC) ff_fft_init_altivec(s);
     if (HAVE_MMX)     ff_fft_init_mmx(s);
     if (CONFIG_MDCT)  s->mdct_calcw = s->mdct_calc;
+    if (HAVE_MIPSFPU) ff_fft_init_mips(s);
 #else
     if (CONFIG_MDCT)  s->mdct_calcw = ff_mdct_calcw_c;
     if (ARCH_ARM)     ff_fft_fixed_init_arm(s);
diff --git a/libavcodec/fft.h b/libavcodec/fft.h
index 0e19e94..b0f0ff2 100644
--- a/libavcodec/fft.h
+++ b/libavcodec/fft.h
@@ -38,6 +38,16 @@
 
 typedef float FFTDouble;
 
+#if ARCH_MIPS
+enum _fftConsts{
+    MIN_LOG2_NFFT = 5, //!< Specifies miniumum allowed fft size
+    MAX_LOG2_NFFT = 12 //!< Specifies maxiumum allowed fft size
+};
+
+#define MAX_FFT_SIZE (1 << MAX_LOG2_NFFT)
+#define MIN_FFT_SIZE (1 << MAX_LOG2_NFFT)
+
+#endif
 #else
 
 #define FFT_NAME(x) x ## _fixed
@@ -137,6 +147,7 @@ int ff_fft_init(FFTContext *s, int nbits, int inverse);
 void ff_fft_init_altivec(FFTContext *s);
 void ff_fft_init_mmx(FFTContext *s);
 void ff_fft_init_arm(FFTContext *s);
+void ff_fft_init_mips(FFTContext *s);
 #else
 void ff_fft_fixed_init_arm(FFTContext *s);
 #endif
diff --git a/libavcodec/fmtconvert.c b/libavcodec/fmtconvert.c
index c03117c..e47c205 100644
--- a/libavcodec/fmtconvert.c
+++ b/libavcodec/fmtconvert.c
@@ -85,6 +85,7 @@ av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx)
     if (ARCH_ARM) ff_fmt_convert_init_arm(c, avctx);
     if (HAVE_ALTIVEC) ff_fmt_convert_init_altivec(c, avctx);
     if (HAVE_MMX) ff_fmt_convert_init_x86(c, avctx);
+    if (HAVE_MIPSFPU) ff_fmt_convert_init_mips(c);
 }
 
 /* ffdshow custom code */
diff --git a/libavcodec/fmtconvert.h b/libavcodec/fmtconvert.h
index a9fbb31..ab2caa2 100644
--- a/libavcodec/fmtconvert.h
+++ b/libavcodec/fmtconvert.h
@@ -92,6 +92,7 @@ av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx);
 void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx);
 void ff_fmt_convert_init_altivec(FmtConvertContext *c, AVCodecContext *avctx);
 void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx);
+void ff_fmt_convert_init_mips(FmtConvertContext *c);
 
 /* ffdshow custom code */
 void float_interleave(float *dst, const float **src, long len, int channels);
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 24a95b5..4dfb2e3 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -11,3 +11,7 @@ MIPSFPU-OBJS-$(CONFIG_AMRWB_DECODER)      += mips/acelp_filters_mips.o     \
                                              mips/amrwbdec_mips.o          \
                                              mips/celp_math_mips.o         \
                                              mips/acelp_vectors_mips.o
+
+MIPSFPU-OBJS-$(CONFIG_FFT)                += mips/fft_mips.o
+MIPSFPU-OBJS                              += mips/dsputil_mips.o
+MIPSFPU-OBJS-$(CONFIG_AC3_DECODER)        += mips/fmtconvert_mips.o
diff --git a/libavcodec/mips/dsputil_mips.c b/libavcodec/mips/dsputil_mips.c
new file mode 100644
index 0000000..600e256
--- /dev/null
+++ b/libavcodec/mips/dsputil_mips.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of is
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Zoran Lukic (zoranl at mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "config.h"
+#include "libavcodec/dsputil.h"
+
+#if HAVE_INLINE_ASM
+static void vector_fmul_window_mips(float *dst, const float *src0,
+        const float *src1, const float *win, int len) {
+    int i, j;
+    /*
+     * variables used in inline assembler
+     */
+    float * dst_i, * dst_j, * dst_i2, * dst_j2;
+    float temp, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+    dst  += len;
+    win  += len;
+    src0 += len;
+
+    for (i = -len, j = len - 1; i < 0; i += 8, j -= 8) {
+
+        dst_i = dst + i;
+        dst_j = dst + j;
+
+        dst_i2 = dst + i + 4;
+        dst_j2 = dst + j - 4;
+
+        __asm__ __volatile__ (
+            "mul.s   %[temp],   %[s1],       %[wi]            \n\t"
+            "mul.s   %[temp1],  %[s1],       %[wj]            \n\t"
+            "mul.s   %[temp2],  %[s11],      %[wi1]           \n\t"
+            "mul.s   %[temp3],  %[s11],      %[wj1]           \n\t"
+
+            "msub.s  %[temp],   %[temp],     %[s0],  %[wj]    \n\t"
+            "madd.s  %[temp1],  %[temp1],    %[s0],  %[wi]    \n\t"
+            "msub.s  %[temp2],  %[temp2],    %[s01], %[wj1]   \n\t"
+            "madd.s  %[temp3],  %[temp3],    %[s01], %[wi1]   \n\t"
+
+            "swc1    %[temp],   0(%[dst_i])                   \n\t" /* dst[i] = s0*wj - s1*wi; */
+            "swc1    %[temp1],  0(%[dst_j])                   \n\t" /* dst[j] = s0*wi + s1*wj; */
+            "swc1    %[temp2],  4(%[dst_i])                   \n\t" /* dst[i+1] = s01*wj1 - s11*wi1; */
+            "swc1    %[temp3], -4(%[dst_j])                   \n\t" /* dst[j-1] = s01*wi1 + s11*wj1; */
+
+            "mul.s   %[temp4],  %[s12],      %[wi2]           \n\t"
+            "mul.s   %[temp5],  %[s12],      %[wj2]           \n\t"
+            "mul.s   %[temp6],  %[s13],      %[wi3]           \n\t"
+            "mul.s   %[temp7],  %[s13],      %[wj3]           \n\t"
+
+            "msub.s  %[temp4],  %[temp4],    %[s02], %[wj2]   \n\t"
+            "madd.s  %[temp5],  %[temp5],    %[s02], %[wi2]   \n\t"
+            "msub.s  %[temp6],  %[temp6],    %[s03], %[wj3]   \n\t"
+            "madd.s  %[temp7],  %[temp7],    %[s03], %[wi3]   \n\t"
+
+            "swc1    %[temp4],  8(%[dst_i])                   \n\t" /* dst[i+2] = s02*wj2 - s12*wi2; */
+            "swc1    %[temp5], -8(%[dst_j])                   \n\t" /* dst[j-2] = s02*wi2 + s12*wj2; */
+            "swc1    %[temp6],  12(%[dst_i])                  \n\t" /* dst[i+2] = s03*wj3 - s13*wi3; */
+            "swc1    %[temp7], -12(%[dst_j])                  \n\t" /* dst[j-3] = s03*wi3 + s13*wj3; */
+            : [temp]"=&f"(temp),  [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
+              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
+              [temp6]"=&f"(temp6), [temp7]"=&f"(temp7)
+            : [dst_j]"r"(dst_j),     [dst_i]"r" (dst_i),
+              [s0] "f"(src0[i]),     [wj] "f"(win[j]),     [s1] "f"(src1[j]),
+              [wi] "f"(win[i]),      [s01]"f"(src0[i + 1]),[wj1]"f"(win[j - 1]),
+              [s11]"f"(src1[j - 1]), [wi1]"f"(win[i + 1]), [s02]"f"(src0[i + 2]),
+              [wj2]"f"(win[j - 2]),  [s12]"f"(src1[j - 2]),[wi2]"f"(win[i + 2]),
+              [s03]"f"(src0[i + 3]), [wj3]"f"(win[j - 3]), [s13]"f"(src1[j - 3]),
+              [wi3]"f"(win[i + 3])
+            : "memory"
+        );
+
+        __asm__ __volatile__ (
+            "mul.s  %[temp],   %[s1],       %[wi]            \n\t"
+            "mul.s  %[temp1],  %[s1],       %[wj]            \n\t"
+            "mul.s  %[temp2],  %[s11],      %[wi1]           \n\t"
+            "mul.s  %[temp3],  %[s11],      %[wj1]           \n\t"
+
+            "msub.s %[temp],   %[temp],     %[s0],  %[wj]    \n\t"
+            "madd.s %[temp1],  %[temp1],    %[s0],  %[wi]    \n\t"
+            "msub.s %[temp2],  %[temp2],    %[s01], %[wj1]   \n\t"
+            "madd.s %[temp3],  %[temp3],    %[s01], %[wi1]   \n\t"
+
+            "swc1   %[temp],   0(%[dst_i2])                  \n\t" /* dst[i] = s0*wj - s1*wi; */
+            "swc1   %[temp1],  0(%[dst_j2])                  \n\t" /* dst[j] = s0*wi + s1*wj; */
+            "swc1   %[temp2],  4(%[dst_i2])                  \n\t" /* dst[i+1] = s01*wj1 - s11*wi1; */
+            "swc1   %[temp3], -4(%[dst_j2])                  \n\t" /* dst[j-1] = s01*wi1 + s11*wj1; */
+
+            "mul.s  %[temp4],  %[s12],      %[wi2]           \n\t"
+            "mul.s  %[temp5],  %[s12],      %[wj2]           \n\t"
+            "mul.s  %[temp6],  %[s13],      %[wi3]           \n\t"
+            "mul.s  %[temp7],  %[s13],      %[wj3]           \n\t"
+
+            "msub.s %[temp4],  %[temp4],    %[s02], %[wj2]   \n\t"
+            "madd.s %[temp5],  %[temp5],    %[s02], %[wi2]   \n\t"
+            "msub.s %[temp6],  %[temp6],    %[s03], %[wj3]   \n\t"
+            "madd.s %[temp7],  %[temp7],    %[s03], %[wi3]   \n\t"
+
+            "swc1   %[temp4],  8(%[dst_i2])                  \n\t" /* dst[i+2] = s02*wj2 - s12*wi2; */
+            "swc1   %[temp5], -8(%[dst_j2])                  \n\t" /* dst[j-2] = s02*wi2 + s12*wj2; */
+            "swc1   %[temp6],  12(%[dst_i2])                 \n\t" /* dst[i+2] = s03*wj3 - s13*wi3; */
+            "swc1   %[temp7], -12(%[dst_j2])                 \n\t" /* dst[j-3] = s03*wi3 + s13*wj3; */
+            : [temp]"=&f"(temp),
+              [temp1]"=&f"(temp1), [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
+              [temp4]"=&f"(temp4), [temp5]"=&f"(temp5), [temp6]"=&f"(temp6),
+              [temp7]  "=&f" (temp7)
+            : [dst_j2]"r"(dst_j2),   [dst_i2]"r"(dst_i2),
+              [s0] "f"(src0[i + 4]), [wj] "f"(win[j - 4]), [s1] "f"(src1[j - 4]),
+              [wi] "f"(win[i + 4]),  [s01]"f"(src0[i + 5]),[wj1]"f"(win[j - 5]),
+              [s11]"f"(src1[j - 5]), [wi1]"f"(win[i + 5]), [s02]"f"(src0[i + 6]),
+              [wj2]"f"(win[j - 6]),  [s12]"f"(src1[j - 6]),[wi2]"f"(win[i + 6]),
+              [s03]"f"(src0[i + 7]), [wj3]"f"(win[j - 7]), [s13]"f"(src1[j - 7]),
+              [wi3]"f"(win[i + 7])
+            : "memory"
+        );
+    }
+}
+#endif /* HAVE_INLINE_ASM */
+
+av_cold void ff_dsputil_init_mips( DSPContext* c, AVCodecContext *avctx )
+{
+#if HAVE_INLINE_ASM
+    c->vector_fmul_window = vector_fmul_window_mips;
+#endif
+}
+
diff --git a/libavcodec/mips/fft_mips.c b/libavcodec/mips/fft_mips.c
new file mode 100644
index 0000000..286c67f
--- /dev/null
+++ b/libavcodec/mips/fft_mips.c
@@ -0,0 +1,689 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Stanisalv Ocovaj (socovaj at mips.com)
+ * Author:  Zoran Lukic (zoranl at mips.com)
+ *
+ * Optimized MDCT/IMDCT and FFT transforms
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "config.h"
+#include "libavcodec/fft.h"
+#include "fft_table.h"
+
+/**
+ * FFT transform
+ */
+
+#if HAVE_INLINE_ASM
+static void ff_fft_calc_mips(FFTContext *s, FFTComplex *z) {
+
+    int nbits, i, n, num_transforms, offset, step;
+    int n4, n2, n34;
+    FFTSample tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+    FFTComplex *tmpz;
+    float w_re, w_im;
+    float *w_re_ptr;
+    const int fft_size = (1 << s->nbits);
+    int s_n = s->nbits;
+    int tem1, tem2;
+    float pom,  pom1,  pom2,  pom3;
+    float temp, temp1, temp3, temp4;
+    FFTComplex * tmpz_n2, * tmpz_n34, * tmpz_n4;
+    FFTComplex * tmpz_n2_i, * tmpz_n34_i, * tmpz_n4_i, * tmpz_i;
+
+    /**
+    *num_transforms = (0x2aab >> (16 - s->nbits)) | 1;
+    */
+    __asm__ __volatile__ (
+        "li   %[tem1], 16                                      \n\t"
+        "sub  %[s_n],  %[tem1], %[s_n]                         \n\t"
+        "li   %[tem2], 10923                                   \n\t"
+        "srav %[tem2], %[tem2], %[s_n]                         \n\t"
+        "ori  %[num_t],%[tem2], 1                              \n\t"
+        : [num_t]"=r"(num_transforms), [s_n]"+r"(s_n),
+          [tem1]"=&r"(tem1), [tem2]"=&r"(tem2)
+    );
+
+
+    for (n=0; n<num_transforms; n++)
+    {
+        offset = fft_offsets_lut[n] << 2;
+        tmpz = z + offset;
+
+        tmp1 = tmpz[0].re + tmpz[1].re;
+        tmp5 = tmpz[2].re + tmpz[3].re;
+        tmp2 = tmpz[0].im + tmpz[1].im;
+        tmp6 = tmpz[2].im + tmpz[3].im;
+        tmp3 = tmpz[0].re - tmpz[1].re;
+        tmp8 = tmpz[2].im - tmpz[3].im;
+        tmp4 = tmpz[0].im - tmpz[1].im;
+        tmp7 = tmpz[2].re - tmpz[3].re;
+
+        tmpz[0].re = tmp1 + tmp5;
+        tmpz[2].re = tmp1 - tmp5;
+        tmpz[0].im = tmp2 + tmp6;
+        tmpz[2].im = tmp2 - tmp6;
+        tmpz[1].re = tmp3 + tmp8;
+        tmpz[3].re = tmp3 - tmp8;
+        tmpz[1].im = tmp4 - tmp7;
+        tmpz[3].im = tmp4 + tmp7;
+
+}
+
+    if (fft_size < 8)
+        return;
+
+    num_transforms = (num_transforms >> 1) | 1;
+    for (n=0; n<num_transforms; n++)
+    {
+        offset = fft_offsets_lut[n] << 3;
+        tmpz = z + offset;
+
+        __asm__ __volatile__ (
+            "lwc1  %[tmp1], 32(%[tmpz])                     \n\t"
+            "lwc1  %[pom],  40(%[tmpz])                     \n\t"
+            "lwc1  %[tmp3], 48(%[tmpz])                     \n\t"
+            "lwc1  %[pom1], 56(%[tmpz])                     \n\t"
+            "lwc1  %[tmp2], 36(%[tmpz])                     \n\t"
+            "lwc1  %[pom2], 44(%[tmpz])                     \n\t"
+            "lwc1  %[pom3], 60(%[tmpz])                     \n\t"
+            "lwc1  %[tmp4], 52(%[tmpz])                     \n\t"
+            "add.s %[tmp1], %[tmp1],    %[pom]              \n\t"  // tmp1 = tmpz[4].re + tmpz[5].re;
+            "add.s %[tmp3], %[tmp3],    %[pom1]             \n\t"  // tmp3 = tmpz[6].re + tmpz[7].re;
+            "add.s %[tmp2], %[tmp2],    %[pom2]             \n\t"  // tmp2 = tmpz[4].im + tmpz[5].im;
+            "lwc1  %[pom],  40(%[tmpz])                     \n\t"
+            "add.s %[tmp4], %[tmp4],    %[pom3]             \n\t"  // tmp4 = tmpz[6].im + tmpz[7].im;
+            "add.s %[tmp5], %[tmp1],    %[tmp3]             \n\t"  // tmp5 = tmp1 + tmp3;
+            "sub.s %[tmp7], %[tmp1],    %[tmp3]             \n\t"  // tmp7 = tmp1 - tmp3;
+            "lwc1  %[tmp1], 32(%[tmpz])                     \n\t"
+            "lwc1  %[pom1], 44(%[tmpz])                     \n\t"
+            "add.s %[tmp6], %[tmp2],    %[tmp4]             \n\t"  // tmp6 = tmp2 + tmp4;
+            "sub.s %[tmp8], %[tmp2],    %[tmp4]             \n\t"  // tmp8 = tmp2 - tmp4;
+            "lwc1  %[tmp2], 36(%[tmpz])                     \n\t"
+            "lwc1  %[pom2], 56(%[tmpz])                     \n\t"
+            "lwc1  %[pom3], 60(%[tmpz])                     \n\t"
+            "lwc1  %[tmp3], 48(%[tmpz])                     \n\t"
+            "lwc1  %[tmp4], 52(%[tmpz])                     \n\t"
+            "sub.s %[tmp1], %[tmp1],    %[pom]              \n\t"  // tmp1 = tmpz[4].re - tmpz[5].re;
+            "lwc1  %[pom],  0(%[tmpz])                      \n\t"
+            "sub.s %[tmp2], %[tmp2],    %[pom1]             \n\t"  // tmp2 = tmpz[4].im - tmpz[5].im;
+            "sub.s %[tmp3], %[tmp3],    %[pom2]             \n\t"  // tmp3 = tmpz[6].re - tmpz[7].re;
+            "lwc1  %[pom2], 4(%[tmpz])                      \n\t"
+            "sub.s %[pom1], %[pom],     %[tmp5]             \n\t"
+            "sub.s %[tmp4], %[tmp4],    %[pom3]             \n\t"  // tmp4 = tmpz[6].im - tmpz[7].im;
+            "add.s %[pom3], %[pom],     %[tmp5]             \n\t"
+            "sub.s %[pom],  %[pom2],    %[tmp6]             \n\t"
+            "add.s %[pom2], %[pom2],    %[tmp6]             \n\t"
+            "swc1  %[pom1], 32(%[tmpz])                     \n\t"  // tmpz[4].re = tmpz[0].re - tmp5;
+            "swc1  %[pom3], 0(%[tmpz])                      \n\t"  // tmpz[0].re = tmpz[0].re + tmp5;
+            "swc1  %[pom],  36(%[tmpz])                     \n\t"  // tmpz[4].im = tmpz[0].im - tmp6;
+            "swc1  %[pom2], 4(%[tmpz])                      \n\t"  // tmpz[0].im = tmpz[0].im + tmp6;
+            "lwc1  %[pom1], 16(%[tmpz])                     \n\t"
+            "lwc1  %[pom3], 20(%[tmpz])                     \n\t"
+            "li.s  %[pom],  0.7071067812                    \n\t"  // float pom = 0.7071067812f;
+            "add.s %[temp1],%[tmp1],    %[tmp2]             \n\t"
+            "sub.s %[temp], %[pom1],    %[tmp8]             \n\t"
+            "add.s %[pom2], %[pom3],    %[tmp7]             \n\t"
+            "sub.s %[temp3],%[tmp3],    %[tmp4]             \n\t"
+            "sub.s %[temp4],%[tmp2],    %[tmp1]             \n\t"
+            "swc1  %[temp], 48(%[tmpz])                     \n\t"  // tmpz[6].re = tmpz[2].re - tmp8;
+            "swc1  %[pom2], 52(%[tmpz])                     \n\t"  // tmpz[6].im = tmpz[2].im + tmp7;
+            "add.s %[pom1], %[pom1],    %[tmp8]             \n\t"
+            "sub.s %[pom3], %[pom3],    %[tmp7]             \n\t"
+            "add.s %[tmp3], %[tmp3],    %[tmp4]             \n\t"
+            "mul.s %[tmp5], %[pom],     %[temp1]            \n\t"  // tmp5 = pom * (tmp1 + tmp2);
+            "mul.s %[tmp7], %[pom],     %[temp3]            \n\t"  // tmp7 = pom * (tmp3 - tmp4);
+            "mul.s %[tmp6], %[pom],     %[temp4]            \n\t"  // tmp6 = pom * (tmp2 - tmp1);
+            "mul.s %[tmp8], %[pom],     %[tmp3]             \n\t"  // tmp8 = pom * (tmp3 + tmp4);
+            "swc1  %[pom1], 16(%[tmpz])                     \n\t"  // tmpz[2].re = tmpz[2].re + tmp8;
+            "swc1  %[pom3], 20(%[tmpz])                     \n\t"  // tmpz[2].im = tmpz[2].im - tmp7;
+            "add.s %[tmp1], %[tmp5],    %[tmp7]             \n\t"  // tmp1 = tmp5 + tmp7;
+            "sub.s %[tmp3], %[tmp5],    %[tmp7]             \n\t"  // tmp3 = tmp5 - tmp7;
+            "add.s %[tmp2], %[tmp6],    %[tmp8]             \n\t"  // tmp2 = tmp6 + tmp8;
+            "sub.s %[tmp4], %[tmp6],    %[tmp8]             \n\t"  // tmp4 = tmp6 - tmp8;
+            "lwc1  %[temp], 8(%[tmpz])                      \n\t"
+            "lwc1  %[temp1],12(%[tmpz])                     \n\t"
+            "lwc1  %[pom],  24(%[tmpz])                     \n\t"
+            "lwc1  %[pom2], 28(%[tmpz])                     \n\t"
+            "sub.s %[temp4],%[temp],    %[tmp1]             \n\t"
+            "sub.s %[temp3],%[temp1],   %[tmp2]             \n\t"
+            "add.s %[temp], %[temp],    %[tmp1]             \n\t"
+            "add.s %[temp1],%[temp1],   %[tmp2]             \n\t"
+            "sub.s %[pom1], %[pom],     %[tmp4]             \n\t"
+            "add.s %[pom3], %[pom2],    %[tmp3]             \n\t"
+            "add.s %[pom],  %[pom],     %[tmp4]             \n\t"
+            "sub.s %[pom2], %[pom2],    %[tmp3]             \n\t"
+            "swc1  %[temp4],40(%[tmpz])                     \n\t"  // tmpz[5].re = tmpz[1].re - tmp1;
+            "swc1  %[temp3],44(%[tmpz])                     \n\t"  // tmpz[5].im = tmpz[1].im - tmp2;
+            "swc1  %[temp], 8(%[tmpz])                      \n\t"  // tmpz[1].re = tmpz[1].re + tmp1;
+            "swc1  %[temp1],12(%[tmpz])                     \n\t"  // tmpz[1].im = tmpz[1].im + tmp2;
+            "swc1  %[pom1], 56(%[tmpz])                     \n\t"  // tmpz[7].re = tmpz[3].re - tmp4;
+            "swc1  %[pom3], 60(%[tmpz])                     \n\t"  // tmpz[7].im = tmpz[3].im + tmp3;
+            "swc1  %[pom],  24(%[tmpz])                     \n\t"  // tmpz[3].re = tmpz[3].re + tmp4;
+            "swc1  %[pom2], 28(%[tmpz])                     \n\t"  // tmpz[3].im = tmpz[3].im - tmp3;
+            : [tmpz]"+r"(tmpz), [tmp1]"=f"(tmp1), [pom]"=f"(pom),   [pom1]"=&f"(pom1), [pom2]"=&f"(pom2),
+              [tmp3]"=f"(tmp3), [tmp2]"=f"(tmp2), [tmp4]"=f"(tmp4), [tmp5]"=f"(tmp5),  [tmp7]"=f"(tmp7),
+              [tmp6]"=f"(tmp6), [tmp8]"=f"(tmp8), [pom3]"=&f"(pom3),[temp]"=&f"(temp), [temp1]"=&f"(temp1),
+              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4)
+            :
+            : "memory"
+        );
+    }
+
+    step = 1 << (MAX_LOG2_NFFT - 4);
+    n4 = 4;
+    for (nbits=4; nbits<=s->nbits; nbits++)
+    {
+        /*
+        * num_transforms = (num_transforms >> 1) | 1;
+        */
+        __asm__ __volatile__ (
+            "sra %[num_t], %[num_t], 1               \n\t"
+            "ori %[num_t], %[num_t], 1               \n\t"
+
+            : [num_t] "+r" (num_transforms)
+        );
+        n2  = 2 * n4;
+        n34 = 3 * n4;
+
+        for (n=0; n<num_transforms; n++)
+        {
+            offset = fft_offsets_lut[n] << nbits;
+            tmpz = z + offset;
+
+            tmpz_n2  = tmpz +  n2;
+            tmpz_n4  = tmpz +  n4;
+            tmpz_n34 = tmpz +  n34;
+
+            __asm__ __volatile__ (
+                "lwc1  %[pom1], 0(%[tmpz_n2])            \n\t"
+                "lwc1  %[pom],  0(%[tmpz_n34])           \n\t"
+                "lwc1  %[pom2], 4(%[tmpz_n2])            \n\t"
+                "lwc1  %[pom3], 4(%[tmpz_n34])           \n\t"
+                "lwc1  %[temp1],0(%[tmpz])               \n\t"
+                "lwc1  %[temp3],4(%[tmpz])               \n\t"
+                "add.s %[tmp5], %[pom1],      %[pom]     \n\t"   //  tmp5 = tmpz[ n2].re + tmpz[n34].re;
+                "sub.s %[tmp1], %[pom1],      %[pom]     \n\t"   //  tmp1 = tmpz[ n2].re - tmpz[n34].re;
+                "add.s %[tmp6], %[pom2],      %[pom3]    \n\t"   //  tmp6 = tmpz[ n2].im + tmpz[n34].im;
+                "sub.s %[tmp2], %[pom2],      %[pom3]    \n\t"   //  tmp2 = tmpz[ n2].im - tmpz[n34].im;
+                "sub.s %[temp], %[temp1],     %[tmp5]    \n\t"
+                "add.s %[temp1],%[temp1],     %[tmp5]    \n\t"
+                "sub.s %[temp4],%[temp3],     %[tmp6]    \n\t"
+                "add.s %[temp3],%[temp3],     %[tmp6]    \n\t"
+                "swc1  %[temp], 0(%[tmpz_n2])            \n\t"   //  tmpz[ n2].re = tmpz[ 0].re - tmp5;
+                "swc1  %[temp1],0(%[tmpz])               \n\t"   //  tmpz[  0].re = tmpz[ 0].re + tmp5;
+                "lwc1  %[pom1], 0(%[tmpz_n4])            \n\t"
+                "swc1  %[temp4],4(%[tmpz_n2])            \n\t"   //  tmpz[ n2].im = tmpz[ 0].im - tmp6;
+                "lwc1  %[temp], 4(%[tmpz_n4])            \n\t"
+                "swc1  %[temp3],4(%[tmpz])               \n\t"   //  tmpz[  0].im = tmpz[ 0].im + tmp6;
+                "sub.s %[pom],  %[pom1],      %[tmp2]    \n\t"
+                "add.s %[pom1], %[pom1],      %[tmp2]    \n\t"
+                "add.s %[temp1],%[temp],      %[tmp1]    \n\t"
+                "sub.s %[temp], %[temp],      %[tmp1]    \n\t"
+                "swc1  %[pom],  0(%[tmpz_n34])           \n\t"   //  tmpz[n34].re = tmpz[n4].re - tmp2;
+                "swc1  %[pom1], 0(%[tmpz_n4])            \n\t"   //  tmpz[ n4].re = tmpz[n4].re + tmp2;
+                "swc1  %[temp1],4(%[tmpz_n34])           \n\t"   //  tmpz[n34].im = tmpz[n4].im + tmp1;
+                "swc1  %[temp], 4(%[tmpz_n4])            \n\t"   //  tmpz[ n4].im = tmpz[n4].im - tmp1;
+                : [tmpz]"+r"(tmpz), [tmpz_n2]"+r"(tmpz_n2), [tmpz_n34]"+r"(tmpz_n34), [tmp5]"=f"(tmp5),
+                  [tmp1]"=f"(tmp1), [pom]"=&f"(pom),        [pom1]"=&f"(pom1),        [pom2]"=&f"(pom2),
+                  [tmp2]"=f"(tmp2), [tmp6]"=f"(tmp6),       [tmpz_n4]"+r"(tmpz_n4),   [pom3]"=&f"(pom3),
+                  [temp]"=f"(temp), [temp1]"=f"(temp1),     [temp3]"=f"(temp3),       [temp4]"=f"(temp4)
+                :
+                : "memory"
+            );
+
+            w_re_ptr = w_tab + step;
+
+            for (i=1; i<n4; i++)
+            {
+                w_re = w_re_ptr[0];
+                w_im = w_re_ptr[MAX_FFT_SIZE/4];
+
+                tmpz_n2_i = tmpz_n2  + i;
+                tmpz_n4_i = tmpz_n4  + i;
+                tmpz_n34_i= tmpz_n34 + i;
+                tmpz_i    = tmpz     + i;
+
+                __asm__ __volatile__ (
+                    "lwc1     %[temp],  0(%[tmpz_n2_i])               \n\t"
+                    "lwc1     %[temp1], 4(%[tmpz_n2_i])               \n\t"
+                    "lwc1     %[pom],   0(%[tmpz_n34_i])              \n\t"
+                    "lwc1     %[pom1],  4(%[tmpz_n34_i])              \n\t"
+                    "mul.s    %[temp3], %[w_im],    %[temp]           \n\t"
+                    "mul.s    %[temp4], %[w_im],    %[temp1]          \n\t"
+                    "mul.s    %[pom2],  %[w_re],    %[pom]            \n\t"
+                    "mul.s    %[pom3],  %[w_im],    %[pom]            \n\t"
+                    "madd.s   %[tmp2],  %[temp3],   %[w_re], %[temp1] \n\t"  // tmp2 = w_re * tmpz[ n2+i].im + w_im * tmpz[ n2+i].re;
+                    "msub.s   %[tmp1],  %[temp4],   %[w_re], %[temp]  \n\t"  // tmp1 = w_re * tmpz[ n2+i].re - w_im * tmpz[ n2+i].im;
+                    "madd.s   %[tmp3],  %[pom2],    %[w_im], %[pom1]  \n\t"  // tmp3 = w_re * tmpz[n34+i].re + w_im * tmpz[n34+i].im;
+                    "msub.s   %[tmp4],  %[pom3],    %[w_re], %[pom1]  \n\t"  // tmp4 = w_re * tmpz[n34+i].im - w_im * tmpz[n34+i].re;
+                    "lwc1     %[temp],  0(%[tmpz_i])                  \n\t"
+                    "lwc1     %[pom],   4(%[tmpz_i])                  \n\t"
+                    "add.s    %[tmp5],  %[tmp1],    %[tmp3]           \n\t"  // tmp5 = tmp1 + tmp3;
+                    "sub.s    %[tmp1],  %[tmp1],    %[tmp3]           \n\t"  // tmp1 = tmp1 - tmp3;
+                    "add.s    %[tmp6],  %[tmp2],    %[tmp4]           \n\t"  // tmp6 = tmp2 + tmp4;
+                    "sub.s    %[tmp2],  %[tmp2],    %[tmp4]           \n\t"  // tmp2 = tmp2 - tmp4;
+                    "sub.s    %[temp1], %[temp],    %[tmp5]           \n\t"
+                    "add.s    %[temp],  %[temp],    %[tmp5]           \n\t"
+                    "sub.s    %[pom1],  %[pom],     %[tmp6]           \n\t"
+                    "add.s    %[pom],   %[pom],     %[tmp6]           \n\t"
+                    "lwc1     %[temp3], 0(%[tmpz_n4_i])               \n\t"
+                    "lwc1     %[pom2],  4(%[tmpz_n4_i])               \n\t"
+                    "swc1     %[temp1], 0(%[tmpz_n2_i])               \n\t"  // tmpz[ n2+i].re = tmpz[   i].re - tmp5;
+                    "swc1     %[temp],  0(%[tmpz_i])                  \n\t"  // tmpz[    i].re = tmpz[   i].re + tmp5;
+                    "swc1     %[pom1],  4(%[tmpz_n2_i])               \n\t"  // tmpz[ n2+i].im = tmpz[   i].im - tmp6;
+                    "swc1     %[pom] ,  4(%[tmpz_i])                  \n\t"  // tmpz[    i].im = tmpz[   i].im + tmp6;
+                    "sub.s    %[temp4], %[temp3],   %[tmp2]           \n\t"
+                    "add.s    %[pom3],  %[pom2],    %[tmp1]           \n\t"
+                    "add.s    %[temp3], %[temp3],   %[tmp2]           \n\t"
+                    "sub.s    %[pom2],  %[pom2],    %[tmp1]           \n\t"
+                    "swc1     %[temp4], 0(%[tmpz_n34_i])              \n\t"  // tmpz[n34+i].re = tmpz[n4+i].re - tmp2;
+                    "swc1     %[pom3],  4(%[tmpz_n34_i])              \n\t"  // tmpz[n34+i].im = tmpz[n4+i].im + tmp1;
+                    "swc1     %[temp3], 0(%[tmpz_n4_i])               \n\t"  // tmpz[ n4+i].re = tmpz[n4+i].re + tmp2;
+                    "swc1     %[pom2],  4(%[tmpz_n4_i])               \n\t"  // tmpz[ n4+i].im = tmpz[n4+i].im - tmp1;
+                    : [tmp1]"=f"(tmp1),    [tmp2]"=f" (tmp2),    [w_re]"+f"(w_re),   [w_im]"+f"(w_im), [temp]"=&f"(temp),
+                      [temp1]"=&f"(temp1), [temp3]"=&f"(temp3), [tmp3]"=f"(tmp3),    [pom]"=&f"(pom),  [pom1]"=&f"(pom1),
+                      [pom2]"=&f"(pom2),   [tmp4]"=f"(tmp4),    [temp4]"=&f"(temp4), [pom3]"=&f"(pom3),
+                      [tmp5]"=f"(tmp5),    [tmp6]"=f"(tmp6),    [tmpz_i]"+r"(tmpz_i),[tmpz_n2_i]"+r"(tmpz_n2_i),
+                      [tmpz_n34_i]"+r"(tmpz_n34_i), [tmpz_n4_i]"+r"(tmpz_n4_i)
+                    :
+                    : "memory"
+                );
+                w_re_ptr += step;
+            }
+        }
+        step >>= 1;
+        n4   <<= 1;
+    }
+}
+
+/**
+ * MDCT/IMDCT transforms.
+ */
+
+static void ff_imdct_half_mips(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    int k, n8, n4, n2, n, j;
+    const uint16_t *revtab = s->revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    const FFTSample *in1, *in2, *in3, *in4;
+    FFTComplex *z = (FFTComplex *)output;
+
+    int j1;
+    const float *tcos1, *tsin1, *tcos2, *tsin2;
+    float temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8,
+        temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
+    FFTComplex *z1, *z2;
+
+    n = 1 << s->mdct_bits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+
+    /* pre rotation */
+    in1 = input;
+    in2 = input + n2 - 1;
+    in3 = input + 2;
+    in4 = input + n2 - 3;
+
+    tcos1 = tcos;
+    tsin1 = tsin;
+
+    /* n4 = 64 or 128 */
+    for(k = 0; k < n4; k += 2) {
+        j  = revtab[k    ];
+        j1 = revtab[k + 1];
+
+        __asm__ __volatile__ (
+            "lwc1           %[temp1],       0(%[in2])                           \t\n"
+            "lwc1           %[temp2],       0(%[tcos1])                         \t\n"
+            "lwc1           %[temp3],       0(%[tsin1])                         \t\n"
+            "lwc1           %[temp4],       0(%[in1])                           \t\n"
+            "lwc1           %[temp5],       0(%[in4])                           \t\n"
+            "mul.s          %[temp9],       %[temp1],   %[temp2]                \t\n"
+            "mul.s          %[temp10],      %[temp1],   %[temp3]                \t\n"
+            "lwc1           %[temp6],       4(%[tcos1])                         \t\n"
+            "lwc1           %[temp7],       4(%[tsin1])                         \t\n"
+            "nmsub.s        %[temp9],       %[temp9],   %[temp4],   %[temp3]    \t\n"
+            "madd.s         %[temp10],      %[temp10],  %[temp4],   %[temp2]    \t\n"
+            "mul.s          %[temp11],      %[temp5],   %[temp6]                \t\n"
+            "mul.s          %[temp12],      %[temp5],   %[temp7]                \t\n"
+            "lwc1           %[temp8],       0(%[in3])                           \t\n"
+            "addiu          %[tcos1],       %[tcos1],   8                       \t\n"
+            "addiu          %[tsin1],       %[tsin1],   8                       \t\n"
+            "addiu          %[in1],         %[in1],     16                      \t\n"
+            "nmsub.s        %[temp11],      %[temp11],  %[temp8],   %[temp7]    \t\n"
+            "madd.s         %[temp12],      %[temp12],  %[temp8],   %[temp6]    \t\n"
+            "addiu          %[in2],         %[in2],     -16                     \t\n"
+            "addiu          %[in3],         %[in3],     16                      \t\n"
+            "addiu          %[in4],         %[in4],     -16                     \t\n"
+
+            : [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
+              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4),
+              [temp5]"=&f"(temp5), [temp6]"=&f"(temp6),
+              [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
+              [temp9]"=&f"(temp9), [temp10]"=&f"(temp10),
+              [temp11]"=&f"(temp11), [temp12]"=&f"(temp12),
+              [tsin1]"+r"(tsin1), [tcos1]"+r"(tcos1),
+              [in1]"+r"(in1), [in2]"+r"(in2),
+              [in3]"+r"(in3), [in4]"+r"(in4)
+        );
+
+        z[j ].re = temp9;
+        z[j ].im = temp10;
+        z[j1].re = temp11;
+        z[j1].im = temp12;
+    }
+
+    s->fft_calc(s, z);
+
+    /* post rotation + reordering */
+    /* n8 = 32 or 64 */
+    for(k = 0; k < n8; k += 2) {
+        tcos1 = &tcos[n8 - k - 2];
+        tsin1 = &tsin[n8 - k - 2];
+        tcos2 = &tcos[n8 + k];
+        tsin2 = &tsin[n8 + k];
+        z1 = &z[n8 - k - 2];
+        z2 = &z[n8 + k    ];
+
+        __asm__ __volatile__ (
+            "lwc1       %[temp1],   12(%[z1])                           \t\n"
+            "lwc1       %[temp2],   4(%[tsin1])                         \t\n"
+            "lwc1       %[temp3],   4(%[tcos1])                         \t\n"
+            "lwc1       %[temp4],   8(%[z1])                            \t\n"
+            "lwc1       %[temp5],   4(%[z1])                            \t\n"
+            "mul.s      %[temp9],   %[temp1],   %[temp2]                \t\n"
+            "mul.s      %[temp10],  %[temp1],   %[temp3]                \t\n"
+            "lwc1       %[temp6],   0(%[tsin1])                         \t\n"
+            "lwc1       %[temp7],   0(%[tcos1])                         \t\n"
+            "nmsub.s    %[temp9],   %[temp9],   %[temp4],   %[temp3]    \t\n"
+            "madd.s     %[temp10],  %[temp10],  %[temp4],   %[temp2]    \t\n"
+            "mul.s      %[temp11],  %[temp5],   %[temp6]                \t\n"
+            "mul.s      %[temp12],  %[temp5],   %[temp7]                \t\n"
+            "lwc1       %[temp8],   0(%[z1])                            \t\n"
+            "lwc1       %[temp1],   4(%[z2])                            \t\n"
+            "lwc1       %[temp2],   0(%[tsin2])                         \t\n"
+            "lwc1       %[temp3],   0(%[tcos2])                         \t\n"
+            "nmsub.s    %[temp11],  %[temp11],  %[temp8],   %[temp7]    \t\n"
+            "madd.s     %[temp12],  %[temp12],  %[temp8],   %[temp6]    \t\n"
+            "mul.s      %[temp13],  %[temp1],   %[temp2]                \t\n"
+            "mul.s      %[temp14],  %[temp1],   %[temp3]                \t\n"
+            "lwc1       %[temp4],   0(%[z2])                            \t\n"
+            "lwc1       %[temp5],   12(%[z2])                           \t\n"
+            "lwc1       %[temp6],   4(%[tsin2])                         \t\n"
+            "lwc1       %[temp7],   4(%[tcos2])                         \t\n"
+            "nmsub.s    %[temp13],  %[temp13],  %[temp4],   %[temp3]    \t\n"
+            "madd.s     %[temp14],  %[temp14],  %[temp4],   %[temp2]    \t\n"
+            "mul.s      %[temp15],  %[temp5],   %[temp6]                \t\n"
+            "mul.s      %[temp16],  %[temp5],   %[temp7]                \t\n"
+            "lwc1       %[temp8],   8(%[z2])                            \t\n"
+            "nmsub.s    %[temp15],  %[temp15],  %[temp8],   %[temp7]    \t\n"
+            "madd.s     %[temp16],  %[temp16],  %[temp8],   %[temp6]    \t\n"
+            : [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
+              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4),
+              [temp5]"=&f"(temp5), [temp6]"=&f"(temp6),
+              [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
+              [temp9]"=&f"(temp9), [temp10]"=&f"(temp10),
+              [temp11]"=&f"(temp11), [temp12]"=&f"(temp12),
+              [temp13]"=&f"(temp13), [temp14]"=&f"(temp14),
+              [temp15]"=&f"(temp15), [temp16]"=&f"(temp16)
+            : [z1]"r"(z1), [z2]"r"(z2),
+              [tsin1]"r"(tsin1), [tcos1]"r"(tcos1),
+              [tsin2]"r"(tsin2), [tcos2]"r"(tcos2)
+        );
+
+        z1[1].re = temp9;
+        z1[1].im = temp14;
+        z2[0].re = temp13;
+        z2[0].im = temp10;
+
+        z1[0].re = temp11;
+        z1[0].im = temp16;
+        z2[1].re = temp15;
+        z2[1].im = temp12;
+    }
+}
+#else
+static void ff_imdct_half_mips(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    int k, n8, n4, n2, n, j,j2;
+    const uint16_t *revtab = s->revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    const FFTSample *in1, *in2;
+    const FFTSample *in3, *in4;
+    FFTSample temp1, temp2, temp3, temp4;
+    FFTSample temp5, temp6, temp7, temp8;
+
+    FFTSample temp11, temp12, temp13, temp14;
+    FFTSample temp15, temp16, temp17, temp18;
+
+    FFTComplex *z = (FFTComplex *)output;
+
+    n = 1 << s->mdct_bits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+
+    /* pre rotation */
+    in1 = input;
+    in2 = input + n2 - 1;
+    in3 = input + 2;
+    in4 = input + n2 - 3;
+
+    for(k = 0; k < n4; k+=2) {
+        j=revtab[k];
+        j2=revtab[k+1];
+
+        temp1=*in2 * tcos[k];
+        temp2=*in1 * tsin[k];
+        temp3=*in2 * tsin[k];
+        temp4=*in1 * tcos[k];
+
+        temp5=*in4 * tcos[k+1];
+        temp6=*in3 * tsin[k+1];
+        temp7=*in4 * tsin[k+1];
+        temp8=*in3 * tcos[k+1];
+
+        z[j].re=temp1-temp2;
+        z[j].im=temp3+temp4;
+
+        z[j2].re=temp5-temp6;
+        z[j2].im=temp7+temp8;
+
+        in1 += 4;
+        in3 += 4;
+        in2 -= 4;
+        in4 -= 4;
+    }
+    s->fft_calc(s, z);
+
+    /* post rotation + reordering */
+    for(k = 0; k < n8; k+=2) {
+        temp1 = z[n8 - k - 1].im * tsin[n8 - k - 1];
+        temp2 = z[n8 - k - 1].re * tcos[n8 - k - 1];
+        temp3 = z[n8 - k - 1].im * tcos[n8 - k - 1];
+        temp4 = z[n8 - k - 1].re * tsin[n8 - k - 1];
+
+        temp5 = z[n8 + k].im * tsin[n8 + k];
+        temp6 = z[n8 + k].re * tcos[n8 + k];
+        temp7 = z[n8 + k].im * tcos[n8 + k];
+        temp8 = z[n8 + k].re * tsin[n8 + k];
+
+        temp11 = z[n8 - k - 2].im * tsin[n8 - k - 2];
+        temp12 = z[n8 - k - 2].re * tcos[n8 - k - 2];
+        temp13 = z[n8 - k - 2].im * tcos[n8 - k - 2];
+        temp14 = z[n8 - k - 2].re * tsin[n8 - k - 2];
+        temp15 = z[n8 + k + 1].im * tsin[n8 + k + 1];
+        temp16 = z[n8 + k + 1].re * tcos[n8 + k + 1];
+        temp17 = z[n8 + k + 1].im * tcos[n8 + k + 1];
+        temp18 = z[n8 + k + 1].re * tsin[n8 + k + 1];
+
+        z[n8 - k - 1].re = temp1 - temp2;
+        z[n8 - k - 1].im = temp7 + temp8;
+        z[n8 + k].re = temp5 - temp6;
+        z[n8 + k].im = temp3 + temp4;
+
+        z[n8 - k - 2].re = temp11 - temp12;
+        z[n8 - k - 2].im = temp17 + temp18;
+        z[n8 + k + 1].re = temp15 - temp16;
+        z[n8 + k + 1].im = temp13 + temp14;
+    }
+}
+#endif /* HAVE_INLINE_ASM */
+
+/**
+ * Compute inverse MDCT of size N = 2^nbits
+ * @param output N samples
+ * @param input N/2 samples
+ */
+static void ff_imdct_calc_mips(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    int k;
+    int n = 1 << s->mdct_bits;
+    int n2 = n >> 1;
+    int n4 = n >> 2;
+
+    ff_imdct_half_mips(s, output+n4, input);
+
+    for(k = 0; k < n4; k+=4) {
+        output[k] = -output[n2-k-1];
+        output[k+1] = -output[n2-k-2];
+        output[k+2] = -output[n2-k-3];
+        output[k+3] = -output[n2-k-4];
+
+        output[n-k-1] = output[n2+k];
+        output[n-k-2] = output[n2+k+1];
+        output[n-k-3] = output[n2+k+2];
+        output[n-k-4] = output[n2+k+3];
+    }
+}
+
+/**
+ * Compute MDCT of size N = 2^nbits
+ * @param input N samples
+ * @param out N/2 samples
+ */
+static void ff_mdct_calc_mips(FFTContext *s, FFTSample *out, const FFTSample *input)
+{
+    int i, j, j2, n, n8, n4, n2, n3;
+    FFTSample re, im;
+    FFTSample re2, im2;
+
+    const uint16_t *revtab = s->revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    FFTComplex *x = (FFTComplex *)out;
+    FFTSample temp1, temp2, temp3, temp4;
+
+    FFTSample temp5, temp6, temp7, temp8;
+
+    n = 1 << s->mdct_bits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+    n3 = 3 * n4;
+
+    /* pre rotation */
+    for(i=0;i<n8;i++) {
+        re = (-input[2*i+n3] - input[n3-1-2*i]);
+        im = (-input[n4+2*i] + input[n4-1-2*i]);
+
+        re2 = (input[2*i] - input[n2-1-2*i]);
+        im2 = (-input[n2+2*i] - input[ n-1-2*i]);
+
+        j = revtab[i];
+        j2 = revtab[n8 + i];
+
+        temp1 = re * tcos[i];
+        temp2 = im * tsin[i];
+        temp3 = re * tsin[i];
+        temp4 = im * tcos[i];
+
+        temp5 = re2 * tcos[n8 + i];
+        temp6 = im2 * tsin[n8 + i];
+        temp7 = re2 * tsin[n8 + i];
+        temp8 = im2 * tcos[n8 + i];
+
+        x[j].re = -(temp1 +  temp2);
+        x[j].im = temp3 - temp4;
+
+        x[j2].re = -(temp5 +  temp6);
+        x[j2].im = temp7 -  temp8;
+    }
+
+    s->fft_calc(s, x);
+
+    /* post rotation */
+    for(i=0;i<n8;i++) {
+        temp1 = x[n8-i-1].re * tcos[n8-i-1];
+        temp2 = x[n8-i-1].im * tsin[n8-i-1];
+        temp3 =x[n8+i].re * tsin[n8+i];
+        temp4 =x[n8+i].im * tcos[n8+i];
+
+        temp5 = x[n8+i].re * tcos[n8+i];
+        temp6 = x[n8+i].im * tsin[n8+i];
+        temp7 = x[n8-i-1].re * tsin[n8-i-1];
+        temp8 = x[n8-i-1].im * tcos[n8-i-1];
+
+        x[n8-i-1].re = -(temp2+temp1);
+        x[n8-i-1].im = temp4-temp3;
+        x[n8+i].re = -(temp5+temp6);
+        x[n8+i].im = temp8-temp7 ;
+    }
+}
+
+av_cold void ff_fft_init_mips(FFTContext *s)
+{
+#if HAVE_INLINE_ASM
+    s->fft_calc     = ff_fft_calc_mips;
+#endif
+#if CONFIG_MDCT
+    s->imdct_calc   = ff_imdct_calc_mips;
+    s->imdct_half   = ff_imdct_half_mips;
+    s->mdct_calc    = ff_mdct_calc_mips;
+#endif
+}
diff --git a/libavcodec/mips/fft_table.h b/libavcodec/mips/fft_table.h
new file mode 100644
index 0000000..7b7af15
--- /dev/null
+++ b/libavcodec/mips/fft_table.h
@@ -0,0 +1,482 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Stanisalv Ocovaj (socovaj at mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * w_re and w_im tables
+ */
+#ifndef AVCODEC_MIPS_FFT_TABLE_H
+#define AVCODEC_MIPS_FFT_TABLE_H
+
+#include "libavcodec/fft.h"
+
+short fft_offsets_lut[] =
+{
+    0,   2,   3,   4,   6,   8,  10,  11,  12,  14,  15,  16,  18,  19,  20,  22,  24,  26,  27,  28,
+   30,  32,  34,  35,  36,  38,  40,  42,  43,  44,  46,  47,  48,  50,  51,  52,  54,  56,  58,  59,
+   60,  62,  63,  64,  66,  67,  68,  70,  72,  74,  75,  76,  78,  79,  80,  82,  83,  84,  86,  88,
+   90,  91,  92,  94,  96,  98,  99, 100, 102, 104, 106, 107, 108, 110, 111, 112, 114, 115, 116, 118,
+  120, 122, 123, 124, 126, 128, 130, 131, 132, 134, 136, 138, 139, 140, 142, 143, 144, 146, 147, 148,
+  150, 152, 154, 155, 156, 158, 160, 162, 163, 164, 166, 168, 170, 171, 172, 174, 175, 176, 178, 179,
+  180, 182, 184, 186, 187, 188, 190, 191, 192, 194, 195, 196, 198, 200, 202, 203, 204, 206, 207, 208,
+  210, 211, 212, 214, 216, 218, 219, 220, 222, 224, 226, 227, 228, 230, 232, 234, 235, 236, 238, 239,
+  240, 242, 243, 244, 246, 248, 250, 251, 252, 254, 255, 256, 258, 259, 260, 262, 264, 266, 267, 268,
+  270, 271, 272, 274, 275, 276, 278, 280, 282, 283, 284, 286, 288, 290, 291, 292, 294, 296, 298, 299,
+  300, 302, 303, 304, 306, 307, 308, 310, 312, 314, 315, 316, 318, 319, 320, 322, 323, 324, 326, 328,
+  330, 331, 332, 334, 335, 336, 338, 339, 340, 342, 344, 346, 347, 348, 350, 352, 354, 355, 356, 358,
+  360, 362, 363, 364, 366, 367, 368, 370, 371, 372, 374, 376, 378, 379, 380, 382, 384, 386, 387, 388,
+  390, 392, 394, 395, 396, 398, 399, 400, 402, 403, 404, 406, 408, 410, 411, 412, 414, 416, 418, 419,
+  420, 422, 424, 426, 427, 428, 430, 431, 432, 434, 435, 436, 438, 440, 442, 443, 444, 446, 447, 448,
+  450, 451, 452, 454, 456, 458, 459, 460, 462, 463, 464, 466, 467, 468, 470, 472, 474, 475, 476, 478,
+  480, 482, 483, 484, 486, 488, 490, 491, 492, 494, 495, 496, 498, 499, 500, 502, 504, 506, 507, 508,
+  510, 512, 514, 515, 516, 518, 520, 522, 523, 524, 526, 527, 528, 530, 531, 532, 534, 536, 538, 539,
+  540, 542, 544, 546, 547, 548, 550, 552, 554, 555, 556, 558, 559, 560, 562, 563, 564, 566, 568, 570,
+  571, 572, 574, 575, 576, 578, 579, 580, 582, 584, 586, 587, 588, 590, 591, 592, 594, 595, 596, 598,
+  600, 602, 603, 604, 606, 608, 610, 611, 612, 614, 616, 618, 619, 620, 622, 623, 624, 626, 627, 628,
+  630, 632, 634, 635, 636, 638, 640, 642, 643, 644, 646, 648, 650, 651, 652, 654, 655, 656, 658, 659,
+  660, 662, 664, 666, 667, 668, 670, 672, 674, 675, 676, 678, 680, 682, 683, 684, 686, 687, 688, 690,
+  691, 692, 694, 696, 698, 699, 700, 702, 703, 704, 706, 707, 708, 710, 712, 714, 715, 716, 718, 719,
+  720, 722, 723, 724, 726, 728, 730, 731, 732, 734, 736, 738, 739, 740, 742, 744, 746, 747, 748, 750,
+  751, 752, 754, 755, 756, 758, 760, 762, 763, 764, 766, 767, 768, 770, 771, 772, 774, 776, 778, 779,
+  780, 782, 783, 784, 786, 787, 788, 790, 792, 794, 795, 796, 798, 800, 802, 803, 804, 806, 808, 810,
+  811, 812, 814, 815, 816, 818, 819, 820, 822, 824, 826, 827, 828, 830, 831, 832, 834, 835, 836, 838,
+  840, 842, 843, 844, 846, 847, 848, 850, 851, 852, 854, 856, 858, 859, 860, 862, 864, 866, 867, 868,
+  870, 872, 874, 875, 876, 878, 879, 880, 882, 883, 884, 886, 888, 890, 891, 892, 894, 896, 898, 899,
+  900, 902, 904, 906, 907, 908, 910, 911, 912, 914, 915, 916, 918, 920, 922, 923, 924, 926, 928, 930,
+  931, 932, 934, 936, 938, 939, 940, 942, 943, 944, 946, 947, 948, 950, 952, 954, 955, 956, 958, 959,
+  960, 962, 963, 964, 966, 968, 970, 971, 972, 974, 975, 976, 978, 979, 980, 982, 984, 986, 987, 988,
+  990, 992, 994, 995, 996, 998,1000,1002,1003,1004,1006,1007,1008,1010,1011,1012,1014,1016,1018,1019,
+ 1020,1022,1023
+};
+
+float w_tab[3*MAX_FFT_SIZE/4] = {
+  1.0000000000f,  0.9999988235f,  0.9999952938f,  0.9999894111f,  0.9999811753f,  0.9999705864f,  0.9999576446f,  0.9999423497f,
+  0.9999247018f,  0.9999047011f,  0.9998823475f,  0.9998576410f,  0.9998305818f,  0.9998011699f,  0.9997694054f,  0.9997352883f,
+  0.9996988187f,  0.9996599967f,  0.9996188225f,  0.9995752960f,  0.9995294175f,  0.9994811870f,  0.9994306046f,  0.9993776704f,
+  0.9993223846f,  0.9992647473f,  0.9992047586f,  0.9991424187f,  0.9990777278f,  0.9990106859f,  0.9989412932f,  0.9988695499f,
+  0.9987954562f,  0.9987190122f,  0.9986402182f,  0.9985590742f,  0.9984755806f,  0.9983897374f,  0.9983015449f,  0.9982110034f,
+  0.9981181129f,  0.9980228738f,  0.9979252862f,  0.9978253504f,  0.9977230666f,  0.9976184351f,  0.9975114561f,  0.9974021299f,
+  0.9972904567f,  0.9971764367f,  0.9970600703f,  0.9969413578f,  0.9968202993f,  0.9966968952f,  0.9965711458f,  0.9964430514f,
+  0.9963126122f,  0.9961798286f,  0.9960447009f,  0.9959072294f,  0.9957674145f,  0.9956252564f,  0.9954807555f,  0.9953339121f,
+  0.9951847267f,  0.9950331994f,  0.9948793308f,  0.9947231211f,  0.9945645707f,  0.9944036801f,  0.9942404495f,  0.9940748793f,
+  0.9939069700f,  0.9937367219f,  0.9935641355f,  0.9933892111f,  0.9932119492f,  0.9930323502f,  0.9928504145f,  0.9926661424f,
+  0.9924795346f,  0.9922905913f,  0.9920993131f,  0.9919057004f,  0.9917097537f,  0.9915114733f,  0.9913108598f,  0.9911079137f,
+  0.9909026354f,  0.9906950254f,  0.9904850843f,  0.9902728124f,  0.9900582103f,  0.9898412785f,  0.9896220175f,  0.9894004278f,
+  0.9891765100f,  0.9889502645f,  0.9887216920f,  0.9884907929f,  0.9882575677f,  0.9880220171f,  0.9877841416f,  0.9875439418f,
+  0.9873014182f,  0.9870565713f,  0.9868094018f,  0.9865599103f,  0.9863080972f,  0.9860539633f,  0.9857975092f,  0.9855387353f,
+  0.9852776424f,  0.9850142310f,  0.9847485018f,  0.9844804554f,  0.9842100924f,  0.9839374134f,  0.9836624192f,  0.9833851103f,
+  0.9831054874f,  0.9828235512f,  0.9825393023f,  0.9822527414f,  0.9819638691f,  0.9816726862f,  0.9813791933f,  0.9810833912f,
+  0.9807852804f,  0.9804848618f,  0.9801821360f,  0.9798771037f,  0.9795697657f,  0.9792601226f,  0.9789481753f,  0.9786339244f,
+  0.9783173707f,  0.9779985149f,  0.9776773578f,  0.9773539001f,  0.9770281427f,  0.9767000861f,  0.9763697313f,  0.9760370790f,
+  0.9757021300f,  0.9753648851f,  0.9750253451f,  0.9746835107f,  0.9743393828f,  0.9739929622f,  0.9736442497f,  0.9732932461f,
+  0.9729399522f,  0.9725843689f,  0.9722264971f,  0.9718663375f,  0.9715038910f,  0.9711391584f,  0.9707721407f,  0.9704028387f,
+  0.9700312532f,  0.9696573851f,  0.9692812354f,  0.9689028048f,  0.9685220943f,  0.9681391047f,  0.9677538371f,  0.9673662922f,
+  0.9669764710f,  0.9665843745f,  0.9661900034f,  0.9657933589f,  0.9653944417f,  0.9649932529f,  0.9645897933f,  0.9641840640f,
+  0.9637760658f,  0.9633657998f,  0.9629532669f,  0.9625384680f,  0.9621214043f,  0.9617020765f,  0.9612804858f,  0.9608566331f,
+  0.9604305194f,  0.9600021457f,  0.9595715131f,  0.9591386225f,  0.9587034749f,  0.9582660714f,  0.9578264130f,  0.9573845008f,
+  0.9569403357f,  0.9564939189f,  0.9560452513f,  0.9555943341f,  0.9551411683f,  0.9546857549f,  0.9542280951f,  0.9537681899f,
+  0.9533060404f,  0.9528416476f,  0.9523750127f,  0.9519061368f,  0.9514350210f,  0.9509616663f,  0.9504860739f,  0.9500082450f,
+  0.9495281806f,  0.9490458819f,  0.9485613499f,  0.9480745859f,  0.9475855910f,  0.9470943664f,  0.9466009131f,  0.9461052324f,
+  0.9456073254f,  0.9451071933f,  0.9446048373f,  0.9441002585f,  0.9435934582f,  0.9430844375f,  0.9425731976f,  0.9420597398f,
+  0.9415440652f,  0.9410261751f,  0.9405060706f,  0.9399837530f,  0.9394592236f,  0.9389324835f,  0.9384035341f,  0.9378723764f,
+  0.9373390119f,  0.9368034417f,  0.9362656672f,  0.9357256895f,  0.9351835099f,  0.9346391298f,  0.9340925504f,  0.9335437730f,
+  0.9329927988f,  0.9324396293f,  0.9318842656f,  0.9313267091f,  0.9307669611f,  0.9302050229f,  0.9296408958f,  0.9290745813f,
+  0.9285060805f,  0.9279353948f,  0.9273625257f,  0.9267874743f,  0.9262102421f,  0.9256308305f,  0.9250492408f,  0.9244654743f,
+  0.9238795325f,  0.9232914167f,  0.9227011283f,  0.9221086687f,  0.9215140393f,  0.9209172415f,  0.9203182767f,  0.9197171463f,
+  0.9191138517f,  0.9185083943f,  0.9179007756f,  0.9172909970f,  0.9166790599f,  0.9160649658f,  0.9154487161f,  0.9148303122f,
+  0.9142097557f,  0.9135870479f,  0.9129621904f,  0.9123351846f,  0.9117060320f,  0.9110747341f,  0.9104412923f,  0.9098057081f,
+  0.9091679831f,  0.9085281187f,  0.9078861165f,  0.9072419779f,  0.9065957045f,  0.9059472978f,  0.9052967593f,  0.9046440906f,
+  0.9039892931f,  0.9033323685f,  0.9026733182f,  0.9020121439f,  0.9013488470f,  0.9006834292f,  0.9000158920f,  0.8993462370f,
+  0.8986744657f,  0.8980005797f,  0.8973245807f,  0.8966464702f,  0.8959662498f,  0.8952839210f,  0.8945994856f,  0.8939129451f,
+  0.8932243012f,  0.8925335554f,  0.8918407094f,  0.8911457648f,  0.8904487232f,  0.8897495864f,  0.8890483559f,  0.8883450333f,
+  0.8876396204f,  0.8869321188f,  0.8862225301f,  0.8855108561f,  0.8847970984f,  0.8840812587f,  0.8833633387f,  0.8826433400f,
+  0.8819212643f,  0.8811971135f,  0.8804708891f,  0.8797425928f,  0.8790122264f,  0.8782797917f,  0.8775452902f,  0.8768087238f,
+  0.8760700942f,  0.8753294031f,  0.8745866523f,  0.8738418435f,  0.8730949784f,  0.8723460589f,  0.8715950867f,  0.8708420635f,
+  0.8700869911f,  0.8693298713f,  0.8685707060f,  0.8678094968f,  0.8670462455f,  0.8662809540f,  0.8655136241f,  0.8647442575f,
+  0.8639728561f,  0.8631994217f,  0.8624239561f,  0.8616464611f,  0.8608669386f,  0.8600853904f,  0.8593018184f,  0.8585162243f,
+  0.8577286100f,  0.8569389774f,  0.8561473284f,  0.8553536647f,  0.8545579884f,  0.8537603011f,  0.8529606049f,  0.8521589016f,
+  0.8513551931f,  0.8505494813f,  0.8497417680f,  0.8489320552f,  0.8481203448f,  0.8473066387f,  0.8464909388f,  0.8456732470f,
+  0.8448535652f,  0.8440318955f,  0.8432082396f,  0.8423825996f,  0.8415549774f,  0.8407253750f,  0.8398937942f,  0.8390602371f,
+  0.8382247056f,  0.8373872016f,  0.8365477272f,  0.8357062844f,  0.8348628750f,  0.8340175011f,  0.8331701647f,  0.8323208678f,
+  0.8314696123f,  0.8306164003f,  0.8297612338f,  0.8289041148f,  0.8280450453f,  0.8271840273f,  0.8263210628f,  0.8254561540f,
+  0.8245893028f,  0.8237205112f,  0.8228497814f,  0.8219771153f,  0.8211025150f,  0.8202259826f,  0.8193475201f,  0.8184671296f,
+  0.8175848132f,  0.8167005729f,  0.8158144108f,  0.8149263291f,  0.8140363297f,  0.8131444148f,  0.8122505866f,  0.8113548470f,
+  0.8104571983f,  0.8095576424f,  0.8086561816f,  0.8077528179f,  0.8068475535f,  0.8059403906f,  0.8050313311f,  0.8041203774f,
+  0.8032075315f,  0.8022927955f,  0.8013761717f,  0.8004576622f,  0.7995372691f,  0.7986149946f,  0.7976908409f,  0.7967648102f,
+  0.7958369046f,  0.7949071263f,  0.7939754776f,  0.7930419605f,  0.7921065773f,  0.7911693302f,  0.7902302214f,  0.7892892532f,
+  0.7883464276f,  0.7874017470f,  0.7864552136f,  0.7855068296f,  0.7845565972f,  0.7836045186f,  0.7826505962f,  0.7816948321f,
+  0.7807372286f,  0.7797777879f,  0.7788165124f,  0.7778534042f,  0.7768884657f,  0.7759216990f,  0.7749531066f,  0.7739826906f,
+  0.7730104534f,  0.7720363972f,  0.7710605243f,  0.7700828370f,  0.7691033376f,  0.7681220285f,  0.7671389119f,  0.7661539902f,
+  0.7651672656f,  0.7641787405f,  0.7631884173f,  0.7621962981f,  0.7612023855f,  0.7602066817f,  0.7592091890f,  0.7582099098f,
+  0.7572088465f,  0.7562060014f,  0.7552013769f,  0.7541949753f,  0.7531867990f,  0.7521768504f,  0.7511651319f,  0.7501516458f,
+  0.7491363945f,  0.7481193805f,  0.7471006060f,  0.7460800735f,  0.7450577854f,  0.7440337442f,  0.7430079521f,  0.7419804117f,
+  0.7409511254f,  0.7399200955f,  0.7388873245f,  0.7378528148f,  0.7368165689f,  0.7357785892f,  0.7347388781f,  0.7336974381f,
+  0.7326542717f,  0.7316093812f,  0.7305627692f,  0.7295144381f,  0.7284643904f,  0.7274126286f,  0.7263591551f,  0.7253039724f,
+  0.7242470830f,  0.7231884893f,  0.7221281939f,  0.7210661993f,  0.7200025080f,  0.7189371224f,  0.7178700451f,  0.7168012785f,
+  0.7157308253f,  0.7146586879f,  0.7135848688f,  0.7125093706f,  0.7114321957f,  0.7103533469f,  0.7092728264f,  0.7081906370f,
+  0.7071067812f,  0.7060212614f,  0.7049340804f,  0.7038452405f,  0.7027547445f,  0.7016625947f,  0.7005687939f,  0.6994733446f,
+  0.6983762494f,  0.6972775108f,  0.6961771315f,  0.6950751140f,  0.6939714609f,  0.6928661748f,  0.6917592584f,  0.6906507141f,
+  0.6895405447f,  0.6884287528f,  0.6873153409f,  0.6862003117f,  0.6850836678f,  0.6839654118f,  0.6828455464f,  0.6817240742f,
+  0.6806009978f,  0.6794763199f,  0.6783500431f,  0.6772221701f,  0.6760927036f,  0.6749616461f,  0.6738290004f,  0.6726947691f,
+  0.6715589548f,  0.6704215604f,  0.6692825883f,  0.6681420414f,  0.6669999223f,  0.6658562337f,  0.6647109782f,  0.6635641586f,
+  0.6624157776f,  0.6612658378f,  0.6601143421f,  0.6589612930f,  0.6578066933f,  0.6566505457f,  0.6554928530f,  0.6543336178f,
+  0.6531728430f,  0.6520105311f,  0.6508466850f,  0.6496813074f,  0.6485144010f,  0.6473459686f,  0.6461760130f,  0.6450045368f,
+  0.6438315429f,  0.6426570340f,  0.6414810128f,  0.6403034822f,  0.6391244449f,  0.6379439036f,  0.6367618612f,  0.6355783205f,
+  0.6343932842f,  0.6332067551f,  0.6320187359f,  0.6308292296f,  0.6296382389f,  0.6284457666f,  0.6272518155f,  0.6260563884f,
+  0.6248594881f,  0.6236611175f,  0.6224612794f,  0.6212599765f,  0.6200572118f,  0.6188529880f,  0.6176473079f,  0.6164401745f,
+  0.6152315906f,  0.6140215589f,  0.6128100824f,  0.6115971639f,  0.6103828063f,  0.6091670123f,  0.6079497850f,  0.6067311270f,
+  0.6055110414f,  0.6042895309f,  0.6030665985f,  0.6018422471f,  0.6006164794f,  0.5993892984f,  0.5981607070f,  0.5969307081f,
+  0.5956993045f,  0.5944664992f,  0.5932322950f,  0.5919966950f,  0.5907597019f,  0.5895213186f,  0.5882815482f,  0.5870403935f,
+  0.5857978575f,  0.5845539430f,  0.5833086529f,  0.5820619903f,  0.5808139581f,  0.5795645591f,  0.5783137964f,  0.5770616729f,
+  0.5758081914f,  0.5745533550f,  0.5732971667f,  0.5720396293f,  0.5707807459f,  0.5695205193f,  0.5682589527f,  0.5669960488f,
+  0.5657318108f,  0.5644662415f,  0.5631993440f,  0.5619311212f,  0.5606615762f,  0.5593907119f,  0.5581185312f,  0.5568450373f,
+  0.5555702330f,  0.5542941215f,  0.5530167056f,  0.5517379884f,  0.5504579729f,  0.5491766622f,  0.5478940592f,  0.5466101669f,
+  0.5453249884f,  0.5440385267f,  0.5427507849f,  0.5414617659f,  0.5401714727f,  0.5388799085f,  0.5375870763f,  0.5362929791f,
+  0.5349976199f,  0.5337010018f,  0.5324031279f,  0.5311040012f,  0.5298036247f,  0.5285020015f,  0.5271991348f,  0.5258950275f,
+  0.5245896827f,  0.5232831035f,  0.5219752929f,  0.5206662541f,  0.5193559902f,  0.5180445041f,  0.5167317990f,  0.5154178780f,
+  0.5141027442f,  0.5127864006f,  0.5114688504f,  0.5101500967f,  0.5088301425f,  0.5075089911f,  0.5061866453f,  0.5048631085f,
+  0.5035383837f,  0.5022124740f,  0.5008853826f,  0.4995571125f,  0.4982276670f,  0.4968970490f,  0.4955652618f,  0.4942323085f,
+  0.4928981922f,  0.4915629161f,  0.4902264833f,  0.4888888969f,  0.4875501601f,  0.4862102761f,  0.4848692480f,  0.4835270789f,
+  0.4821837721f,  0.4808393306f,  0.4794937577f,  0.4781470564f,  0.4767992301f,  0.4754502817f,  0.4741002147f,  0.4727490320f,
+  0.4713967368f,  0.4700433325f,  0.4686888220f,  0.4673332087f,  0.4659764958f,  0.4646186863f,  0.4632597836f,  0.4618997907f,
+  0.4605387110f,  0.4591765475f,  0.4578133036f,  0.4564489824f,  0.4550835871f,  0.4537171210f,  0.4523495872f,  0.4509809890f,
+  0.4496113297f,  0.4482406123f,  0.4468688402f,  0.4454960165f,  0.4441221446f,  0.4427472276f,  0.4413712687f,  0.4399942713f,
+  0.4386162385f,  0.4372371737f,  0.4358570799f,  0.4344759606f,  0.4330938189f,  0.4317106580f,  0.4303264813f,  0.4289412921f,
+  0.4275550934f,  0.4261678887f,  0.4247796812f,  0.4233904741f,  0.4220002708f,  0.4206090744f,  0.4192168884f,  0.4178237158f,
+  0.4164295601f,  0.4150344245f,  0.4136383122f,  0.4122412267f,  0.4108431711f,  0.4094441487f,  0.4080441629f,  0.4066432169f,
+  0.4052413140f,  0.4038384576f,  0.4024346509f,  0.4010298972f,  0.3996241998f,  0.3982175622f,  0.3968099874f,  0.3954014789f,
+  0.3939920401f,  0.3925816741f,  0.3911703843f,  0.3897581741f,  0.3883450467f,  0.3869310055f,  0.3855160538f,  0.3841001950f,
+  0.3826834324f,  0.3812657692f,  0.3798472089f,  0.3784277548f,  0.3770074102f,  0.3755861785f,  0.3741640630f,  0.3727410670f,
+  0.3713171940f,  0.3698924471f,  0.3684668300f,  0.3670403457f,  0.3656129978f,  0.3641847896f,  0.3627557244f,  0.3613258056f,
+  0.3598950365f,  0.3584634206f,  0.3570309612f,  0.3555976617f,  0.3541635254f,  0.3527285558f,  0.3512927561f,  0.3498561298f,
+  0.3484186802f,  0.3469804108f,  0.3455413250f,  0.3441014260f,  0.3426607173f,  0.3412192023f,  0.3397768844f,  0.3383337670f,
+  0.3368898534f,  0.3354451471f,  0.3339996514f,  0.3325533699f,  0.3311063058f,  0.3296584625f,  0.3282098436f,  0.3267604523f,
+  0.3253102922f,  0.3238593665f,  0.3224076788f,  0.3209552324f,  0.3195020308f,  0.3180480774f,  0.3165933756f,  0.3151379288f,
+  0.3136817404f,  0.3122248139f,  0.3107671527f,  0.3093087603f,  0.3078496400f,  0.3063897954f,  0.3049292297f,  0.3034679466f,
+  0.3020059493f,  0.3005432414f,  0.2990798263f,  0.2976157074f,  0.2961508882f,  0.2946853722f,  0.2932191627f,  0.2917522632f,
+  0.2902846773f,  0.2888164082f,  0.2873474595f,  0.2858778347f,  0.2844075372f,  0.2829365705f,  0.2814649379f,  0.2799926431f,
+  0.2785196894f,  0.2770460803f,  0.2755718193f,  0.2740969099f,  0.2726213554f,  0.2711451595f,  0.2696683256f,  0.2681908571f,
+  0.2667127575f,  0.2652340303f,  0.2637546790f,  0.2622747070f,  0.2607941179f,  0.2593129151f,  0.2578311022f,  0.2563486825f,
+  0.2548656596f,  0.2533820370f,  0.2518978182f,  0.2504130066f,  0.2489276057f,  0.2474416192f,  0.2459550503f,  0.2444679027f,
+  0.2429801799f,  0.2414918853f,  0.2400030224f,  0.2385135948f,  0.2370236060f,  0.2355330594f,  0.2340419586f,  0.2325503070f,
+  0.2310581083f,  0.2295653658f,  0.2280720832f,  0.2265782638f,  0.2250839114f,  0.2235890292f,  0.2220936210f,  0.2205976901f,
+  0.2191012402f,  0.2176042746f,  0.2161067971f,  0.2146088110f,  0.2131103199f,  0.2116113274f,  0.2101118369f,  0.2086118520f,
+  0.2071113762f,  0.2056104131f,  0.2041089661f,  0.2026070388f,  0.2011046348f,  0.1996017576f,  0.1980984107f,  0.1965945977f,
+  0.1950903220f,  0.1935855873f,  0.1920803970f,  0.1905747548f,  0.1890686641f,  0.1875621286f,  0.1860551517f,  0.1845477369f,
+  0.1830398880f,  0.1815316083f,  0.1800229014f,  0.1785137709f,  0.1770042204f,  0.1754942534f,  0.1739838734f,  0.1724730840f,
+  0.1709618888f,  0.1694502912f,  0.1679382950f,  0.1664259035f,  0.1649131205f,  0.1633999494f,  0.1618863938f,  0.1603724572f,
+  0.1588581433f,  0.1573434556f,  0.1558283977f,  0.1543129730f,  0.1527971853f,  0.1512810380f,  0.1497645347f,  0.1482476790f,
+  0.1467304745f,  0.1452129247f,  0.1436950332f,  0.1421768035f,  0.1406582393f,  0.1391393442f,  0.1376201216f,  0.1361005752f,
+  0.1345807085f,  0.1330605252f,  0.1315400287f,  0.1300192227f,  0.1284981108f,  0.1269766965f,  0.1254549834f,  0.1239329751f,
+  0.1224106752f,  0.1208880872f,  0.1193652148f,  0.1178420615f,  0.1163186309f,  0.1147949266f,  0.1132709522f,  0.1117467112f,
+  0.1102222073f,  0.1086974440f,  0.1071724250f,  0.1056471537f,  0.1041216339f,  0.1025958690f,  0.1010698628f,  0.0995436187f,
+  0.0980171403f,  0.0964904314f,  0.0949634953f,  0.0934363358f,  0.0919089565f,  0.0903813609f,  0.0888535526f,  0.0873255352f,
+  0.0857973123f,  0.0842688876f,  0.0827402645f,  0.0812114468f,  0.0796824380f,  0.0781532416f,  0.0766238614f,  0.0750943008f,
+  0.0735645636f,  0.0720346532f,  0.0705045734f,  0.0689743276f,  0.0674439196f,  0.0659133528f,  0.0643826309f,  0.0628517576f,
+  0.0613207363f,  0.0597895707f,  0.0582582645f,  0.0567268212f,  0.0551952443f,  0.0536635377f,  0.0521317047f,  0.0505997490f,
+  0.0490676743f,  0.0475354842f,  0.0460031821f,  0.0444707719f,  0.0429382569f,  0.0414056410f,  0.0398729276f,  0.0383401204f,
+  0.0368072229f,  0.0352742389f,  0.0337411719f,  0.0322080254f,  0.0306748032f,  0.0291415088f,  0.0276081458f,  0.0260747178f,
+  0.0245412285f,  0.0230076815f,  0.0214740803f,  0.0199404286f,  0.0184067299f,  0.0168729879f,  0.0153392063f,  0.0138053885f,
+  0.0122715383f,  0.0107376592f,  0.0092037548f,  0.0076698287f,  0.0061358846f,  0.0046019261f,  0.0030679568f,  0.0015339802f,
+  0.0000000000f, -0.0015339802f, -0.0030679568f, -0.0046019261f, -0.0061358846f, -0.0076698287f, -0.0092037548f, -0.0107376592f,
+ -0.0122715383f, -0.0138053885f, -0.0153392063f, -0.0168729879f, -0.0184067299f, -0.0199404286f, -0.0214740803f, -0.0230076815f,
+ -0.0245412285f, -0.0260747178f, -0.0276081458f, -0.0291415088f, -0.0306748032f, -0.0322080254f, -0.0337411719f, -0.0352742389f,
+ -0.0368072229f, -0.0383401204f, -0.0398729276f, -0.0414056410f, -0.0429382569f, -0.0444707719f, -0.0460031821f, -0.0475354842f,
+ -0.0490676743f, -0.0505997490f, -0.0521317047f, -0.0536635377f, -0.0551952443f, -0.0567268212f, -0.0582582645f, -0.0597895707f,
+ -0.0613207363f, -0.0628517576f, -0.0643826309f, -0.0659133528f, -0.0674439196f, -0.0689743276f, -0.0705045734f, -0.0720346532f,
+ -0.0735645636f, -0.0750943008f, -0.0766238614f, -0.0781532416f, -0.0796824380f, -0.0812114468f, -0.0827402645f, -0.0842688876f,
+ -0.0857973123f, -0.0873255352f, -0.0888535526f, -0.0903813609f, -0.0919089565f, -0.0934363358f, -0.0949634953f, -0.0964904314f,
+ -0.0980171403f, -0.0995436187f, -0.1010698628f, -0.1025958690f, -0.1041216339f, -0.1056471537f, -0.1071724250f, -0.1086974440f,
+ -0.1102222073f, -0.1117467112f, -0.1132709522f, -0.1147949266f, -0.1163186309f, -0.1178420615f, -0.1193652148f, -0.1208880872f,
+ -0.1224106752f, -0.1239329751f, -0.1254549834f, -0.1269766965f, -0.1284981108f, -0.1300192227f, -0.1315400287f, -0.1330605252f,
+ -0.1345807085f, -0.1361005752f, -0.1376201216f, -0.1391393442f, -0.1406582393f, -0.1421768035f, -0.1436950332f, -0.1452129247f,
+ -0.1467304745f, -0.1482476790f, -0.1497645347f, -0.1512810380f, -0.1527971853f, -0.1543129730f, -0.1558283977f, -0.1573434556f,
+ -0.1588581433f, -0.1603724572f, -0.1618863938f, -0.1633999494f, -0.1649131205f, -0.1664259035f, -0.1679382950f, -0.1694502912f,
+ -0.1709618888f, -0.1724730840f, -0.1739838734f, -0.1754942534f, -0.1770042204f, -0.1785137709f, -0.1800229014f, -0.1815316083f,
+ -0.1830398880f, -0.1845477369f, -0.1860551517f, -0.1875621286f, -0.1890686641f, -0.1905747548f, -0.1920803970f, -0.1935855873f,
+ -0.1950903220f, -0.1965945977f, -0.1980984107f, -0.1996017576f, -0.2011046348f, -0.2026070388f, -0.2041089661f, -0.2056104131f,
+ -0.2071113762f, -0.2086118520f, -0.2101118369f, -0.2116113274f, -0.2131103199f, -0.2146088110f, -0.2161067971f, -0.2176042746f,
+ -0.2191012402f, -0.2205976901f, -0.2220936210f, -0.2235890292f, -0.2250839114f, -0.2265782638f, -0.2280720832f, -0.2295653658f,
+ -0.2310581083f, -0.2325503070f, -0.2340419586f, -0.2355330594f, -0.2370236060f, -0.2385135948f, -0.2400030224f, -0.2414918853f,
+ -0.2429801799f, -0.2444679027f, -0.2459550503f, -0.2474416192f, -0.2489276057f, -0.2504130066f, -0.2518978182f, -0.2533820370f,
+ -0.2548656596f, -0.2563486825f, -0.2578311022f, -0.2593129151f, -0.2607941179f, -0.2622747070f, -0.2637546790f, -0.2652340303f,
+ -0.2667127575f, -0.2681908571f, -0.2696683256f, -0.2711451595f, -0.2726213554f, -0.2740969099f, -0.2755718193f, -0.2770460803f,
+ -0.2785196894f, -0.2799926431f, -0.2814649379f, -0.2829365705f, -0.2844075372f, -0.2858778347f, -0.2873474595f, -0.2888164082f,
+ -0.2902846773f, -0.2917522632f, -0.2932191627f, -0.2946853722f, -0.2961508882f, -0.2976157074f, -0.2990798263f, -0.3005432414f,
+ -0.3020059493f, -0.3034679466f, -0.3049292297f, -0.3063897954f, -0.3078496400f, -0.3093087603f, -0.3107671527f, -0.3122248139f,
+ -0.3136817404f, -0.3151379288f, -0.3165933756f, -0.3180480774f, -0.3195020308f, -0.3209552324f, -0.3224076788f, -0.3238593665f,
+ -0.3253102922f, -0.3267604523f, -0.3282098436f, -0.3296584625f, -0.3311063058f, -0.3325533699f, -0.3339996514f, -0.3354451471f,
+ -0.3368898534f, -0.3383337670f, -0.3397768844f, -0.3412192023f, -0.3426607173f, -0.3441014260f, -0.3455413250f, -0.3469804108f,
+ -0.3484186802f, -0.3498561298f, -0.3512927561f, -0.3527285558f, -0.3541635254f, -0.3555976617f, -0.3570309612f, -0.3584634206f,
+ -0.3598950365f, -0.3613258056f, -0.3627557244f, -0.3641847896f, -0.3656129978f, -0.3670403457f, -0.3684668300f, -0.3698924471f,
+ -0.3713171940f, -0.3727410670f, -0.3741640630f, -0.3755861785f, -0.3770074102f, -0.3784277548f, -0.3798472089f, -0.3812657692f,
+ -0.3826834324f, -0.3841001950f, -0.3855160538f, -0.3869310055f, -0.3883450467f, -0.3897581741f, -0.3911703843f, -0.3925816741f,
+ -0.3939920401f, -0.3954014789f, -0.3968099874f, -0.3982175622f, -0.3996241998f, -0.4010298972f, -0.4024346509f, -0.4038384576f,
+ -0.4052413140f, -0.4066432169f, -0.4080441629f, -0.4094441487f, -0.4108431711f, -0.4122412267f, -0.4136383122f, -0.4150344245f,
+ -0.4164295601f, -0.4178237158f, -0.4192168884f, -0.4206090744f, -0.4220002708f, -0.4233904741f, -0.4247796812f, -0.4261678887f,
+ -0.4275550934f, -0.4289412921f, -0.4303264813f, -0.4317106580f, -0.4330938189f, -0.4344759606f, -0.4358570799f, -0.4372371737f,
+ -0.4386162385f, -0.4399942713f, -0.4413712687f, -0.4427472276f, -0.4441221446f, -0.4454960165f, -0.4468688402f, -0.4482406123f,
+ -0.4496113297f, -0.4509809890f, -0.4523495872f, -0.4537171210f, -0.4550835871f, -0.4564489824f, -0.4578133036f, -0.4591765475f,
+ -0.4605387110f, -0.4618997907f, -0.4632597836f, -0.4646186863f, -0.4659764958f, -0.4673332087f, -0.4686888220f, -0.4700433325f,
+ -0.4713967368f, -0.4727490320f, -0.4741002147f, -0.4754502817f, -0.4767992301f, -0.4781470564f, -0.4794937577f, -0.4808393306f,
+ -0.4821837721f, -0.4835270789f, -0.4848692480f, -0.4862102761f, -0.4875501601f, -0.4888888969f, -0.4902264833f, -0.4915629161f,
+ -0.4928981922f, -0.4942323085f, -0.4955652618f, -0.4968970490f, -0.4982276670f, -0.4995571125f, -0.5008853826f, -0.5022124740f,
+ -0.5035383837f, -0.5048631085f, -0.5061866453f, -0.5075089911f, -0.5088301425f, -0.5101500967f, -0.5114688504f, -0.5127864006f,
+ -0.5141027442f, -0.5154178780f, -0.5167317990f, -0.5180445041f, -0.5193559902f, -0.5206662541f, -0.5219752929f, -0.5232831035f,
+ -0.5245896827f, -0.5258950275f, -0.5271991348f, -0.5285020015f, -0.5298036247f, -0.5311040012f, -0.5324031279f, -0.5337010018f,
+ -0.5349976199f, -0.5362929791f, -0.5375870763f, -0.5388799085f, -0.5401714727f, -0.5414617659f, -0.5427507849f, -0.5440385267f,
+ -0.5453249884f, -0.5466101669f, -0.5478940592f, -0.5491766622f, -0.5504579729f, -0.5517379884f, -0.5530167056f, -0.5542941215f,
+ -0.5555702330f, -0.5568450373f, -0.5581185312f, -0.5593907119f, -0.5606615762f, -0.5619311212f, -0.5631993440f, -0.5644662415f,
+ -0.5657318108f, -0.5669960488f, -0.5682589527f, -0.5695205193f, -0.5707807459f, -0.5720396293f, -0.5732971667f, -0.5745533550f,
+ -0.5758081914f, -0.5770616729f, -0.5783137964f, -0.5795645591f, -0.5808139581f, -0.5820619903f, -0.5833086529f, -0.5845539430f,
+ -0.5857978575f, -0.5870403935f, -0.5882815482f, -0.5895213186f, -0.5907597019f, -0.5919966950f, -0.5932322950f, -0.5944664992f,
+ -0.5956993045f, -0.5969307081f, -0.5981607070f, -0.5993892984f, -0.6006164794f, -0.6018422471f, -0.6030665985f, -0.6042895309f,
+ -0.6055110414f, -0.6067311270f, -0.6079497850f, -0.6091670123f, -0.6103828063f, -0.6115971639f, -0.6128100824f, -0.6140215589f,
+ -0.6152315906f, -0.6164401745f, -0.6176473079f, -0.6188529880f, -0.6200572118f, -0.6212599765f, -0.6224612794f, -0.6236611175f,
+ -0.6248594881f, -0.6260563884f, -0.6272518155f, -0.6284457666f, -0.6296382389f, -0.6308292296f, -0.6320187359f, -0.6332067551f,
+ -0.6343932842f, -0.6355783205f, -0.6367618612f, -0.6379439036f, -0.6391244449f, -0.6403034822f, -0.6414810128f, -0.6426570340f,
+ -0.6438315429f, -0.6450045368f, -0.6461760130f, -0.6473459686f, -0.6485144010f, -0.6496813074f, -0.6508466850f, -0.6520105311f,
+ -0.6531728430f, -0.6543336178f, -0.6554928530f, -0.6566505457f, -0.6578066933f, -0.6589612930f, -0.6601143421f, -0.6612658378f,
+ -0.6624157776f, -0.6635641586f, -0.6647109782f, -0.6658562337f, -0.6669999223f, -0.6681420414f, -0.6692825883f, -0.6704215604f,
+ -0.6715589548f, -0.6726947691f, -0.6738290004f, -0.6749616461f, -0.6760927036f, -0.6772221701f, -0.6783500431f, -0.6794763199f,
+ -0.6806009978f, -0.6817240742f, -0.6828455464f, -0.6839654118f, -0.6850836678f, -0.6862003117f, -0.6873153409f, -0.6884287528f,
+ -0.6895405447f, -0.6906507141f, -0.6917592584f, -0.6928661748f, -0.6939714609f, -0.6950751140f, -0.6961771315f, -0.6972775108f,
+ -0.6983762494f, -0.6994733446f, -0.7005687939f, -0.7016625947f, -0.7027547445f, -0.7038452405f, -0.7049340804f, -0.7060212614f,
+ -0.7071067812f, -0.7081906370f, -0.7092728264f, -0.7103533469f, -0.7114321957f, -0.7125093706f, -0.7135848688f, -0.7146586879f,
+ -0.7157308253f, -0.7168012785f, -0.7178700451f, -0.7189371224f, -0.7200025080f, -0.7210661993f, -0.7221281939f, -0.7231884893f,
+ -0.7242470830f, -0.7253039724f, -0.7263591551f, -0.7274126286f, -0.7284643904f, -0.7295144381f, -0.7305627692f, -0.7316093812f,
+ -0.7326542717f, -0.7336974381f, -0.7347388781f, -0.7357785892f, -0.7368165689f, -0.7378528148f, -0.7388873245f, -0.7399200955f,
+ -0.7409511254f, -0.7419804117f, -0.7430079521f, -0.7440337442f, -0.7450577854f, -0.7460800735f, -0.7471006060f, -0.7481193805f,
+ -0.7491363945f, -0.7501516458f, -0.7511651319f, -0.7521768504f, -0.7531867990f, -0.7541949753f, -0.7552013769f, -0.7562060014f,
+ -0.7572088465f, -0.7582099098f, -0.7592091890f, -0.7602066817f, -0.7612023855f, -0.7621962981f, -0.7631884173f, -0.7641787405f,
+ -0.7651672656f, -0.7661539902f, -0.7671389119f, -0.7681220285f, -0.7691033376f, -0.7700828370f, -0.7710605243f, -0.7720363972f,
+ -0.7730104534f, -0.7739826906f, -0.7749531066f, -0.7759216990f, -0.7768884657f, -0.7778534042f, -0.7788165124f, -0.7797777879f,
+ -0.7807372286f, -0.7816948321f, -0.7826505962f, -0.7836045186f, -0.7845565972f, -0.7855068296f, -0.7864552136f, -0.7874017470f,
+ -0.7883464276f, -0.7892892532f, -0.7902302214f, -0.7911693302f, -0.7921065773f, -0.7930419605f, -0.7939754776f, -0.7949071263f,
+ -0.7958369046f, -0.7967648102f, -0.7976908409f, -0.7986149946f, -0.7995372691f, -0.8004576622f, -0.8013761717f, -0.8022927955f,
+ -0.8032075315f, -0.8041203774f, -0.8050313311f, -0.8059403906f, -0.8068475535f, -0.8077528179f, -0.8086561816f, -0.8095576424f,
+ -0.8104571983f, -0.8113548470f, -0.8122505866f, -0.8131444148f, -0.8140363297f, -0.8149263291f, -0.8158144108f, -0.8167005729f,
+ -0.8175848132f, -0.8184671296f, -0.8193475201f, -0.8202259826f, -0.8211025150f, -0.8219771153f, -0.8228497814f, -0.8237205112f,
+ -0.8245893028f, -0.8254561540f, -0.8263210628f, -0.8271840273f, -0.8280450453f, -0.8289041148f, -0.8297612338f, -0.8306164003f,
+ -0.8314696123f, -0.8323208678f, -0.8331701647f, -0.8340175011f, -0.8348628750f, -0.8357062844f, -0.8365477272f, -0.8373872016f,
+ -0.8382247056f, -0.8390602371f, -0.8398937942f, -0.8407253750f, -0.8415549774f, -0.8423825996f, -0.8432082396f, -0.8440318955f,
+ -0.8448535652f, -0.8456732470f, -0.8464909388f, -0.8473066387f, -0.8481203448f, -0.8489320552f, -0.8497417680f, -0.8505494813f,
+ -0.8513551931f, -0.8521589016f, -0.8529606049f, -0.8537603011f, -0.8545579884f, -0.8553536647f, -0.8561473284f, -0.8569389774f,
+ -0.8577286100f, -0.8585162243f, -0.8593018184f, -0.8600853904f, -0.8608669386f, -0.8616464611f, -0.8624239561f, -0.8631994217f,
+ -0.8639728561f, -0.8647442575f, -0.8655136241f, -0.8662809540f, -0.8670462455f, -0.8678094968f, -0.8685707060f, -0.8693298713f,
+ -0.8700869911f, -0.8708420635f, -0.8715950867f, -0.8723460589f, -0.8730949784f, -0.8738418435f, -0.8745866523f, -0.8753294031f,
+ -0.8760700942f, -0.8768087238f, -0.8775452902f, -0.8782797917f, -0.8790122264f, -0.8797425928f, -0.8804708891f, -0.8811971135f,
+ -0.8819212643f, -0.8826433400f, -0.8833633387f, -0.8840812587f, -0.8847970984f, -0.8855108561f, -0.8862225301f, -0.8869321188f,
+ -0.8876396204f, -0.8883450333f, -0.8890483559f, -0.8897495864f, -0.8904487232f, -0.8911457648f, -0.8918407094f, -0.8925335554f,
+ -0.8932243012f, -0.8939129451f, -0.8945994856f, -0.8952839210f, -0.8959662498f, -0.8966464702f, -0.8973245807f, -0.8980005797f,
+ -0.8986744657f, -0.8993462370f, -0.9000158920f, -0.9006834292f, -0.9013488470f, -0.9020121439f, -0.9026733182f, -0.9033323685f,
+ -0.9039892931f, -0.9046440906f, -0.9052967593f, -0.9059472978f, -0.9065957045f, -0.9072419779f, -0.9078861165f, -0.9085281187f,
+ -0.9091679831f, -0.9098057081f, -0.9104412923f, -0.9110747341f, -0.9117060320f, -0.9123351846f, -0.9129621904f, -0.9135870479f,
+ -0.9142097557f, -0.9148303122f, -0.9154487161f, -0.9160649658f, -0.9166790599f, -0.9172909970f, -0.9179007756f, -0.9185083943f,
+ -0.9191138517f, -0.9197171463f, -0.9203182767f, -0.9209172415f, -0.9215140393f, -0.9221086687f, -0.9227011283f, -0.9232914167f,
+ -0.9238795325f, -0.9244654743f, -0.9250492408f, -0.9256308305f, -0.9262102421f, -0.9267874743f, -0.9273625257f, -0.9279353948f,
+ -0.9285060805f, -0.9290745813f, -0.9296408958f, -0.9302050229f, -0.9307669611f, -0.9313267091f, -0.9318842656f, -0.9324396293f,
+ -0.9329927988f, -0.9335437730f, -0.9340925504f, -0.9346391298f, -0.9351835099f, -0.9357256895f, -0.9362656672f, -0.9368034417f,
+ -0.9373390119f, -0.9378723764f, -0.9384035341f, -0.9389324835f, -0.9394592236f, -0.9399837530f, -0.9405060706f, -0.9410261751f,
+ -0.9415440652f, -0.9420597398f, -0.9425731976f, -0.9430844375f, -0.9435934582f, -0.9441002585f, -0.9446048373f, -0.9451071933f,
+ -0.9456073254f, -0.9461052324f, -0.9466009131f, -0.9470943664f, -0.9475855910f, -0.9480745859f, -0.9485613499f, -0.9490458819f,
+ -0.9495281806f, -0.9500082450f, -0.9504860739f, -0.9509616663f, -0.9514350210f, -0.9519061368f, -0.9523750127f, -0.9528416476f,
+ -0.9533060404f, -0.9537681899f, -0.9542280951f, -0.9546857549f, -0.9551411683f, -0.9555943341f, -0.9560452513f, -0.9564939189f,
+ -0.9569403357f, -0.9573845008f, -0.9578264130f, -0.9582660714f, -0.9587034749f, -0.9591386225f, -0.9595715131f, -0.9600021457f,
+ -0.9604305194f, -0.9608566331f, -0.9612804858f, -0.9617020765f, -0.9621214043f, -0.9625384680f, -0.9629532669f, -0.9633657998f,
+ -0.9637760658f, -0.9641840640f, -0.9645897933f, -0.9649932529f, -0.9653944417f, -0.9657933589f, -0.9661900034f, -0.9665843745f,
+ -0.9669764710f, -0.9673662922f, -0.9677538371f, -0.9681391047f, -0.9685220943f, -0.9689028048f, -0.9692812354f, -0.9696573851f,
+ -0.9700312532f, -0.9704028387f, -0.9707721407f, -0.9711391584f, -0.9715038910f, -0.9718663375f, -0.9722264971f, -0.9725843689f,
+ -0.9729399522f, -0.9732932461f, -0.9736442497f, -0.9739929622f, -0.9743393828f, -0.9746835107f, -0.9750253451f, -0.9753648851f,
+ -0.9757021300f, -0.9760370790f, -0.9763697313f, -0.9767000861f, -0.9770281427f, -0.9773539001f, -0.9776773578f, -0.9779985149f,
+ -0.9783173707f, -0.9786339244f, -0.9789481753f, -0.9792601226f, -0.9795697657f, -0.9798771037f, -0.9801821360f, -0.9804848618f,
+ -0.9807852804f, -0.9810833912f, -0.9813791933f, -0.9816726862f, -0.9819638691f, -0.9822527414f, -0.9825393023f, -0.9828235512f,
+ -0.9831054874f, -0.9833851103f, -0.9836624192f, -0.9839374134f, -0.9842100924f, -0.9844804554f, -0.9847485018f, -0.9850142310f,
+ -0.9852776424f, -0.9855387353f, -0.9857975092f, -0.9860539633f, -0.9863080972f, -0.9865599103f, -0.9868094018f, -0.9870565713f,
+ -0.9873014182f, -0.9875439418f, -0.9877841416f, -0.9880220171f, -0.9882575677f, -0.9884907929f, -0.9887216920f, -0.9889502645f,
+ -0.9891765100f, -0.9894004278f, -0.9896220175f, -0.9898412785f, -0.9900582103f, -0.9902728124f, -0.9904850843f, -0.9906950254f,
+ -0.9909026354f, -0.9911079137f, -0.9913108598f, -0.9915114733f, -0.9917097537f, -0.9919057004f, -0.9920993131f, -0.9922905913f,
+ -0.9924795346f, -0.9926661424f, -0.9928504145f, -0.9930323502f, -0.9932119492f, -0.9933892111f, -0.9935641355f, -0.9937367219f,
+ -0.9939069700f, -0.9940748793f, -0.9942404495f, -0.9944036801f, -0.9945645707f, -0.9947231211f, -0.9948793308f, -0.9950331994f,
+ -0.9951847267f, -0.9953339121f, -0.9954807555f, -0.9956252564f, -0.9957674145f, -0.9959072294f, -0.9960447009f, -0.9961798286f,
+ -0.9963126122f, -0.9964430514f, -0.9965711458f, -0.9966968952f, -0.9968202993f, -0.9969413578f, -0.9970600703f, -0.9971764367f,
+ -0.9972904567f, -0.9974021299f, -0.9975114561f, -0.9976184351f, -0.9977230666f, -0.9978253504f, -0.9979252862f, -0.9980228738f,
+ -0.9981181129f, -0.9982110034f, -0.9983015449f, -0.9983897374f, -0.9984755806f, -0.9985590742f, -0.9986402182f, -0.9987190122f,
+ -0.9987954562f, -0.9988695499f, -0.9989412932f, -0.9990106859f, -0.9990777278f, -0.9991424187f, -0.9992047586f, -0.9992647473f,
+ -0.9993223846f, -0.9993776704f, -0.9994306046f, -0.9994811870f, -0.9995294175f, -0.9995752960f, -0.9996188225f, -0.9996599967f,
+ -0.9996988187f, -0.9997352883f, -0.9997694054f, -0.9998011699f, -0.9998305818f, -0.9998576410f, -0.9998823475f, -0.9999047011f,
+ -0.9999247018f, -0.9999423497f, -0.9999576446f, -0.9999705864f, -0.9999811753f, -0.9999894111f, -0.9999952938f, -0.9999988235f,
+ -1.0000000000f, -0.9999988235f, -0.9999952938f, -0.9999894111f, -0.9999811753f, -0.9999705864f, -0.9999576446f, -0.9999423497f,
+ -0.9999247018f, -0.9999047011f, -0.9998823475f, -0.9998576410f, -0.9998305818f, -0.9998011699f, -0.9997694054f, -0.9997352883f,
+ -0.9996988187f, -0.9996599967f, -0.9996188225f, -0.9995752960f, -0.9995294175f, -0.9994811870f, -0.9994306046f, -0.9993776704f,
+ -0.9993223846f, -0.9992647473f, -0.9992047586f, -0.9991424187f, -0.9990777278f, -0.9990106859f, -0.9989412932f, -0.9988695499f,
+ -0.9987954562f, -0.9987190122f, -0.9986402182f, -0.9985590742f, -0.9984755806f, -0.9983897374f, -0.9983015449f, -0.9982110034f,
+ -0.9981181129f, -0.9980228738f, -0.9979252862f, -0.9978253504f, -0.9977230666f, -0.9976184351f, -0.9975114561f, -0.9974021299f,
+ -0.9972904567f, -0.9971764367f, -0.9970600703f, -0.9969413578f, -0.9968202993f, -0.9966968952f, -0.9965711458f, -0.9964430514f,
+ -0.9963126122f, -0.9961798286f, -0.9960447009f, -0.9959072294f, -0.9957674145f, -0.9956252564f, -0.9954807555f, -0.9953339121f,
+ -0.9951847267f, -0.9950331994f, -0.9948793308f, -0.9947231211f, -0.9945645707f, -0.9944036801f, -0.9942404495f, -0.9940748793f,
+ -0.9939069700f, -0.9937367219f, -0.9935641355f, -0.9933892111f, -0.9932119492f, -0.9930323502f, -0.9928504145f, -0.9926661424f,
+ -0.9924795346f, -0.9922905913f, -0.9920993131f, -0.9919057004f, -0.9917097537f, -0.9915114733f, -0.9913108598f, -0.9911079137f,
+ -0.9909026354f, -0.9906950254f, -0.9904850843f, -0.9902728124f, -0.9900582103f, -0.9898412785f, -0.9896220175f, -0.9894004278f,
+ -0.9891765100f, -0.9889502645f, -0.9887216920f, -0.9884907929f, -0.9882575677f, -0.9880220171f, -0.9877841416f, -0.9875439418f,
+ -0.9873014182f, -0.9870565713f, -0.9868094018f, -0.9865599103f, -0.9863080972f, -0.9860539633f, -0.9857975092f, -0.9855387353f,
+ -0.9852776424f, -0.9850142310f, -0.9847485018f, -0.9844804554f, -0.9842100924f, -0.9839374134f, -0.9836624192f, -0.9833851103f,
+ -0.9831054874f, -0.9828235512f, -0.9825393023f, -0.9822527414f, -0.9819638691f, -0.9816726862f, -0.9813791933f, -0.9810833912f,
+ -0.9807852804f, -0.9804848618f, -0.9801821360f, -0.9798771037f, -0.9795697657f, -0.9792601226f, -0.9789481753f, -0.9786339244f,
+ -0.9783173707f, -0.9779985149f, -0.9776773578f, -0.9773539001f, -0.9770281427f, -0.9767000861f, -0.9763697313f, -0.9760370790f,
+ -0.9757021300f, -0.9753648851f, -0.9750253451f, -0.9746835107f, -0.9743393828f, -0.9739929622f, -0.9736442497f, -0.9732932461f,
+ -0.9729399522f, -0.9725843689f, -0.9722264971f, -0.9718663375f, -0.9715038910f, -0.9711391584f, -0.9707721407f, -0.9704028387f,
+ -0.9700312532f, -0.9696573851f, -0.9692812354f, -0.9689028048f, -0.9685220943f, -0.9681391047f, -0.9677538371f, -0.9673662922f,
+ -0.9669764710f, -0.9665843745f, -0.9661900034f, -0.9657933589f, -0.9653944417f, -0.9649932529f, -0.9645897933f, -0.9641840640f,
+ -0.9637760658f, -0.9633657998f, -0.9629532669f, -0.9625384680f, -0.9621214043f, -0.9617020765f, -0.9612804858f, -0.9608566331f,
+ -0.9604305194f, -0.9600021457f, -0.9595715131f, -0.9591386225f, -0.9587034749f, -0.9582660714f, -0.9578264130f, -0.9573845008f,
+ -0.9569403357f, -0.9564939189f, -0.9560452513f, -0.9555943341f, -0.9551411683f, -0.9546857549f, -0.9542280951f, -0.9537681899f,
+ -0.9533060404f, -0.9528416476f, -0.9523750127f, -0.9519061368f, -0.9514350210f, -0.9509616663f, -0.9504860739f, -0.9500082450f,
+ -0.9495281806f, -0.9490458819f, -0.9485613499f, -0.9480745859f, -0.9475855910f, -0.9470943664f, -0.9466009131f, -0.9461052324f,
+ -0.9456073254f, -0.9451071933f, -0.9446048373f, -0.9441002585f, -0.9435934582f, -0.9430844375f, -0.9425731976f, -0.9420597398f,
+ -0.9415440652f, -0.9410261751f, -0.9405060706f, -0.9399837530f, -0.9394592236f, -0.9389324835f, -0.9384035341f, -0.9378723764f,
+ -0.9373390119f, -0.9368034417f, -0.9362656672f, -0.9357256895f, -0.9351835099f, -0.9346391298f, -0.9340925504f, -0.9335437730f,
+ -0.9329927988f, -0.9324396293f, -0.9318842656f, -0.9313267091f, -0.9307669611f, -0.9302050229f, -0.9296408958f, -0.9290745813f,
+ -0.9285060805f, -0.9279353948f, -0.9273625257f, -0.9267874743f, -0.9262102421f, -0.9256308305f, -0.9250492408f, -0.9244654743f,
+ -0.9238795325f, -0.9232914167f, -0.9227011283f, -0.9221086687f, -0.9215140393f, -0.9209172415f, -0.9203182767f, -0.9197171463f,
+ -0.9191138517f, -0.9185083943f, -0.9179007756f, -0.9172909970f, -0.9166790599f, -0.9160649658f, -0.9154487161f, -0.9148303122f,
+ -0.9142097557f, -0.9135870479f, -0.9129621904f, -0.9123351846f, -0.9117060320f, -0.9110747341f, -0.9104412923f, -0.9098057081f,
+ -0.9091679831f, -0.9085281187f, -0.9078861165f, -0.9072419779f, -0.9065957045f, -0.9059472978f, -0.9052967593f, -0.9046440906f,
+ -0.9039892931f, -0.9033323685f, -0.9026733182f, -0.9020121439f, -0.9013488470f, -0.9006834292f, -0.9000158920f, -0.8993462370f,
+ -0.8986744657f, -0.8980005797f, -0.8973245807f, -0.8966464702f, -0.8959662498f, -0.8952839210f, -0.8945994856f, -0.8939129451f,
+ -0.8932243012f, -0.8925335554f, -0.8918407094f, -0.8911457648f, -0.8904487232f, -0.8897495864f, -0.8890483559f, -0.8883450333f,
+ -0.8876396204f, -0.8869321188f, -0.8862225301f, -0.8855108561f, -0.8847970984f, -0.8840812587f, -0.8833633387f, -0.8826433400f,
+ -0.8819212643f, -0.8811971135f, -0.8804708891f, -0.8797425928f, -0.8790122264f, -0.8782797917f, -0.8775452902f, -0.8768087238f,
+ -0.8760700942f, -0.8753294031f, -0.8745866523f, -0.8738418435f, -0.8730949784f, -0.8723460589f, -0.8715950867f, -0.8708420635f,
+ -0.8700869911f, -0.8693298713f, -0.8685707060f, -0.8678094968f, -0.8670462455f, -0.8662809540f, -0.8655136241f, -0.8647442575f,
+ -0.8639728561f, -0.8631994217f, -0.8624239561f, -0.8616464611f, -0.8608669386f, -0.8600853904f, -0.8593018184f, -0.8585162243f,
+ -0.8577286100f, -0.8569389774f, -0.8561473284f, -0.8553536647f, -0.8545579884f, -0.8537603011f, -0.8529606049f, -0.8521589016f,
+ -0.8513551931f, -0.8505494813f, -0.8497417680f, -0.8489320552f, -0.8481203448f, -0.8473066387f, -0.8464909388f, -0.8456732470f,
+ -0.8448535652f, -0.8440318955f, -0.8432082396f, -0.8423825996f, -0.8415549774f, -0.8407253750f, -0.8398937942f, -0.8390602371f,
+ -0.8382247056f, -0.8373872016f, -0.8365477272f, -0.8357062844f, -0.8348628750f, -0.8340175011f, -0.8331701647f, -0.8323208678f,
+ -0.8314696123f, -0.8306164003f, -0.8297612338f, -0.8289041148f, -0.8280450453f, -0.8271840273f, -0.8263210628f, -0.8254561540f,
+ -0.8245893028f, -0.8237205112f, -0.8228497814f, -0.8219771153f, -0.8211025150f, -0.8202259826f, -0.8193475201f, -0.8184671296f,
+ -0.8175848132f, -0.8167005729f, -0.8158144108f, -0.8149263291f, -0.8140363297f, -0.8131444148f, -0.8122505866f, -0.8113548470f,
+ -0.8104571983f, -0.8095576424f, -0.8086561816f, -0.8077528179f, -0.8068475535f, -0.8059403906f, -0.8050313311f, -0.8041203774f,
+ -0.8032075315f, -0.8022927955f, -0.8013761717f, -0.8004576622f, -0.7995372691f, -0.7986149946f, -0.7976908409f, -0.7967648102f,
+ -0.7958369046f, -0.7949071263f, -0.7939754776f, -0.7930419605f, -0.7921065773f, -0.7911693302f, -0.7902302214f, -0.7892892532f,
+ -0.7883464276f, -0.7874017470f, -0.7864552136f, -0.7855068296f, -0.7845565972f, -0.7836045186f, -0.7826505962f, -0.7816948321f,
+ -0.7807372286f, -0.7797777879f, -0.7788165124f, -0.7778534042f, -0.7768884657f, -0.7759216990f, -0.7749531066f, -0.7739826906f,
+ -0.7730104534f, -0.7720363972f, -0.7710605243f, -0.7700828370f, -0.7691033376f, -0.7681220285f, -0.7671389119f, -0.7661539902f,
+ -0.7651672656f, -0.7641787405f, -0.7631884173f, -0.7621962981f, -0.7612023855f, -0.7602066817f, -0.7592091890f, -0.7582099098f,
+ -0.7572088465f, -0.7562060014f, -0.7552013769f, -0.7541949753f, -0.7531867990f, -0.7521768504f, -0.7511651319f, -0.7501516458f,
+ -0.7491363945f, -0.7481193805f, -0.7471006060f, -0.7460800735f, -0.7450577854f, -0.7440337442f, -0.7430079521f, -0.7419804117f,
+ -0.7409511254f, -0.7399200955f, -0.7388873245f, -0.7378528148f, -0.7368165689f, -0.7357785892f, -0.7347388781f, -0.7336974381f,
+ -0.7326542717f, -0.7316093812f, -0.7305627692f, -0.7295144381f, -0.7284643904f, -0.7274126286f, -0.7263591551f, -0.7253039724f,
+ -0.7242470830f, -0.7231884893f, -0.7221281939f, -0.7210661993f, -0.7200025080f, -0.7189371224f, -0.7178700451f, -0.7168012785f,
+ -0.7157308253f, -0.7146586879f, -0.7135848688f, -0.7125093706f, -0.7114321957f, -0.7103533469f, -0.7092728264f, -0.7081906370f,
+ -0.7071067812f, -0.7060212614f, -0.7049340804f, -0.7038452405f, -0.7027547445f, -0.7016625947f, -0.7005687939f, -0.6994733446f,
+ -0.6983762494f, -0.6972775108f, -0.6961771315f, -0.6950751140f, -0.6939714609f, -0.6928661748f, -0.6917592584f, -0.6906507141f,
+ -0.6895405447f, -0.6884287528f, -0.6873153409f, -0.6862003117f, -0.6850836678f, -0.6839654118f, -0.6828455464f, -0.6817240742f,
+ -0.6806009978f, -0.6794763199f, -0.6783500431f, -0.6772221701f, -0.6760927036f, -0.6749616461f, -0.6738290004f, -0.6726947691f,
+ -0.6715589548f, -0.6704215604f, -0.6692825883f, -0.6681420414f, -0.6669999223f, -0.6658562337f, -0.6647109782f, -0.6635641586f,
+ -0.6624157776f, -0.6612658378f, -0.6601143421f, -0.6589612930f, -0.6578066933f, -0.6566505457f, -0.6554928530f, -0.6543336178f,
+ -0.6531728430f, -0.6520105311f, -0.6508466850f, -0.6496813074f, -0.6485144010f, -0.6473459686f, -0.6461760130f, -0.6450045368f,
+ -0.6438315429f, -0.6426570340f, -0.6414810128f, -0.6403034822f, -0.6391244449f, -0.6379439036f, -0.6367618612f, -0.6355783205f,
+ -0.6343932842f, -0.6332067551f, -0.6320187359f, -0.6308292296f, -0.6296382389f, -0.6284457666f, -0.6272518155f, -0.6260563884f,
+ -0.6248594881f, -0.6236611175f, -0.6224612794f, -0.6212599765f, -0.6200572118f, -0.6188529880f, -0.6176473079f, -0.6164401745f,
+ -0.6152315906f, -0.6140215589f, -0.6128100824f, -0.6115971639f, -0.6103828063f, -0.6091670123f, -0.6079497850f, -0.6067311270f,
+ -0.6055110414f, -0.6042895309f, -0.6030665985f, -0.6018422471f, -0.6006164794f, -0.5993892984f, -0.5981607070f, -0.5969307081f,
+ -0.5956993045f, -0.5944664992f, -0.5932322950f, -0.5919966950f, -0.5907597019f, -0.5895213186f, -0.5882815482f, -0.5870403935f,
+ -0.5857978575f, -0.5845539430f, -0.5833086529f, -0.5820619903f, -0.5808139581f, -0.5795645591f, -0.5783137964f, -0.5770616729f,
+ -0.5758081914f, -0.5745533550f, -0.5732971667f, -0.5720396293f, -0.5707807459f, -0.5695205193f, -0.5682589527f, -0.5669960488f,
+ -0.5657318108f, -0.5644662415f, -0.5631993440f, -0.5619311212f, -0.5606615762f, -0.5593907119f, -0.5581185312f, -0.5568450373f,
+ -0.5555702330f, -0.5542941215f, -0.5530167056f, -0.5517379884f, -0.5504579729f, -0.5491766622f, -0.5478940592f, -0.5466101669f,
+ -0.5453249884f, -0.5440385267f, -0.5427507849f, -0.5414617659f, -0.5401714727f, -0.5388799085f, -0.5375870763f, -0.5362929791f,
+ -0.5349976199f, -0.5337010018f, -0.5324031279f, -0.5311040012f, -0.5298036247f, -0.5285020015f, -0.5271991348f, -0.5258950275f,
+ -0.5245896827f, -0.5232831035f, -0.5219752929f, -0.5206662541f, -0.5193559902f, -0.5180445041f, -0.5167317990f, -0.5154178780f,
+ -0.5141027442f, -0.5127864006f, -0.5114688504f, -0.5101500967f, -0.5088301425f, -0.5075089911f, -0.5061866453f, -0.5048631085f,
+ -0.5035383837f, -0.5022124740f, -0.5008853826f, -0.4995571125f, -0.4982276670f, -0.4968970490f, -0.4955652618f, -0.4942323085f,
+ -0.4928981922f, -0.4915629161f, -0.4902264833f, -0.4888888969f, -0.4875501601f, -0.4862102761f, -0.4848692480f, -0.4835270789f,
+ -0.4821837721f, -0.4808393306f, -0.4794937577f, -0.4781470564f, -0.4767992301f, -0.4754502817f, -0.4741002147f, -0.4727490320f,
+ -0.4713967368f, -0.4700433325f, -0.4686888220f, -0.4673332087f, -0.4659764958f, -0.4646186863f, -0.4632597836f, -0.4618997907f,
+ -0.4605387110f, -0.4591765475f, -0.4578133036f, -0.4564489824f, -0.4550835871f, -0.4537171210f, -0.4523495872f, -0.4509809890f,
+ -0.4496113297f, -0.4482406123f, -0.4468688402f, -0.4454960165f, -0.4441221446f, -0.4427472276f, -0.4413712687f, -0.4399942713f,
+ -0.4386162385f, -0.4372371737f, -0.4358570799f, -0.4344759606f, -0.4330938189f, -0.4317106580f, -0.4303264813f, -0.4289412921f,
+ -0.4275550934f, -0.4261678887f, -0.4247796812f, -0.4233904741f, -0.4220002708f, -0.4206090744f, -0.4192168884f, -0.4178237158f,
+ -0.4164295601f, -0.4150344245f, -0.4136383122f, -0.4122412267f, -0.4108431711f, -0.4094441487f, -0.4080441629f, -0.4066432169f,
+ -0.4052413140f, -0.4038384576f, -0.4024346509f, -0.4010298972f, -0.3996241998f, -0.3982175622f, -0.3968099874f, -0.3954014789f,
+ -0.3939920401f, -0.3925816741f, -0.3911703843f, -0.3897581741f, -0.3883450467f, -0.3869310055f, -0.3855160538f, -0.3841001950f,
+ -0.3826834324f, -0.3812657692f, -0.3798472089f, -0.3784277548f, -0.3770074102f, -0.3755861785f, -0.3741640630f, -0.3727410670f,
+ -0.3713171940f, -0.3698924471f, -0.3684668300f, -0.3670403457f, -0.3656129978f, -0.3641847896f, -0.3627557244f, -0.3613258056f,
+ -0.3598950365f, -0.3584634206f, -0.3570309612f, -0.3555976617f, -0.3541635254f, -0.3527285558f, -0.3512927561f, -0.3498561298f,
+ -0.3484186802f, -0.3469804108f, -0.3455413250f, -0.3441014260f, -0.3426607173f, -0.3412192023f, -0.3397768844f, -0.3383337670f,
+ -0.3368898534f, -0.3354451471f, -0.3339996514f, -0.3325533699f, -0.3311063058f, -0.3296584625f, -0.3282098436f, -0.3267604523f,
+ -0.3253102922f, -0.3238593665f, -0.3224076788f, -0.3209552324f, -0.3195020308f, -0.3180480774f, -0.3165933756f, -0.3151379288f,
+ -0.3136817404f, -0.3122248139f, -0.3107671527f, -0.3093087603f, -0.3078496400f, -0.3063897954f, -0.3049292297f, -0.3034679466f,
+ -0.3020059493f, -0.3005432414f, -0.2990798263f, -0.2976157074f, -0.2961508882f, -0.2946853722f, -0.2932191627f, -0.2917522632f,
+ -0.2902846773f, -0.2888164082f, -0.2873474595f, -0.2858778347f, -0.2844075372f, -0.2829365705f, -0.2814649379f, -0.2799926431f,
+ -0.2785196894f, -0.2770460803f, -0.2755718193f, -0.2740969099f, -0.2726213554f, -0.2711451595f, -0.2696683256f, -0.2681908571f,
+ -0.2667127575f, -0.2652340303f, -0.2637546790f, -0.2622747070f, -0.2607941179f, -0.2593129151f, -0.2578311022f, -0.2563486825f,
+ -0.2548656596f, -0.2533820370f, -0.2518978182f, -0.2504130066f, -0.2489276057f, -0.2474416192f, -0.2459550503f, -0.2444679027f,
+ -0.2429801799f, -0.2414918853f, -0.2400030224f, -0.2385135948f, -0.2370236060f, -0.2355330594f, -0.2340419586f, -0.2325503070f,
+ -0.2310581083f, -0.2295653658f, -0.2280720832f, -0.2265782638f, -0.2250839114f, -0.2235890292f, -0.2220936210f, -0.2205976901f,
+ -0.2191012402f, -0.2176042746f, -0.2161067971f, -0.2146088110f, -0.2131103199f, -0.2116113274f, -0.2101118369f, -0.2086118520f,
+ -0.2071113762f, -0.2056104131f, -0.2041089661f, -0.2026070388f, -0.2011046348f, -0.1996017576f, -0.1980984107f, -0.1965945977f,
+ -0.1950903220f, -0.1935855873f, -0.1920803970f, -0.1905747548f, -0.1890686641f, -0.1875621286f, -0.1860551517f, -0.1845477369f,
+ -0.1830398880f, -0.1815316083f, -0.1800229014f, -0.1785137709f, -0.1770042204f, -0.1754942534f, -0.1739838734f, -0.1724730840f,
+ -0.1709618888f, -0.1694502912f, -0.1679382950f, -0.1664259035f, -0.1649131205f, -0.1633999494f, -0.1618863938f, -0.1603724572f,
+ -0.1588581433f, -0.1573434556f, -0.1558283977f, -0.1543129730f, -0.1527971853f, -0.1512810380f, -0.1497645347f, -0.1482476790f,
+ -0.1467304745f, -0.1452129247f, -0.1436950332f, -0.1421768035f, -0.1406582393f, -0.1391393442f, -0.1376201216f, -0.1361005752f,
+ -0.1345807085f, -0.1330605252f, -0.1315400287f, -0.1300192227f, -0.1284981108f, -0.1269766965f, -0.1254549834f, -0.1239329751f,
+ -0.1224106752f, -0.1208880872f, -0.1193652148f, -0.1178420615f, -0.1163186309f, -0.1147949266f, -0.1132709522f, -0.1117467112f,
+ -0.1102222073f, -0.1086974440f, -0.1071724250f, -0.1056471537f, -0.1041216339f, -0.1025958690f, -0.1010698628f, -0.0995436187f,
+ -0.0980171403f, -0.0964904314f, -0.0949634953f, -0.0934363358f, -0.0919089565f, -0.0903813609f, -0.0888535526f, -0.0873255352f,
+ -0.0857973123f, -0.0842688876f, -0.0827402645f, -0.0812114468f, -0.0796824380f, -0.0781532416f, -0.0766238614f, -0.0750943008f,
+ -0.0735645636f, -0.0720346532f, -0.0705045734f, -0.0689743276f, -0.0674439196f, -0.0659133528f, -0.0643826309f, -0.0628517576f,
+ -0.0613207363f, -0.0597895707f, -0.0582582645f, -0.0567268212f, -0.0551952443f, -0.0536635377f, -0.0521317047f, -0.0505997490f,
+ -0.0490676743f, -0.0475354842f, -0.0460031821f, -0.0444707719f, -0.0429382569f, -0.0414056410f, -0.0398729276f, -0.0383401204f,
+ -0.0368072229f, -0.0352742389f, -0.0337411719f, -0.0322080254f, -0.0306748032f, -0.0291415088f, -0.0276081458f, -0.0260747178f,
+ -0.0245412285f, -0.0230076815f, -0.0214740803f, -0.0199404286f, -0.0184067299f, -0.0168729879f, -0.0153392063f, -0.0138053885f,
+ -0.0122715383f, -0.0107376592f, -0.0092037548f, -0.0076698287f, -0.0061358846f, -0.0046019261f, -0.0030679568f, -0.0015339802f,
+};
+#endif /* AVCODEC_MIPS_FFT_TABLE_H */
diff --git a/libavcodec/mips/fmtconvert_mips.c b/libavcodec/mips/fmtconvert_mips.c
new file mode 100644
index 0000000..9e45ba1
--- /dev/null
+++ b/libavcodec/mips/fmtconvert_mips.c
@@ -0,0 +1,336 @@
+/*
+ * Format Conversion Utils for MIPS
+ *
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of is
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Zoran Lukic (zoranl at mips.com)
+ * Author:  Nedeljko Babic (nbabic at mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "config.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/fmtconvert.h"
+
+#if HAVE_INLINE_ASM
+static void float_to_int16_mips(int16_t *dst, const float *src, long len) {
+    const float *src_end = src + len;
+    int ret0, ret1, ret2, ret3, ret4, ret5, ret6, ret7;
+    float src0, src1, src2, src3, src4, src5, src6, src7;
+
+    /*
+     * loop is 8 times unrolled in assembler in order to achieve better performance
+     */
+    __asm__ __volatile__(
+        "beq        %[len],  $zero,   fti16_end%=   \n\t"
+        "fti16_lp%=:                                \n\t"
+        "lwc1       %[src0], 0(%[src])              \n\t"
+        "lwc1       %[src1], 4(%[src])              \n\t"
+        "lwc1       %[src2], 8(%[src])              \n\t"
+        "lwc1       %[src3], 12(%[src])             \n\t"
+        "cvt.w.s    %[src0], %[src0]                \n\t"
+        "cvt.w.s    %[src1], %[src1]                \n\t"
+        "cvt.w.s    %[src2], %[src2]                \n\t"
+        "cvt.w.s    %[src3], %[src3]                \n\t"
+        "mfc1       %[ret0], %[src0]                \n\t"
+        "mfc1       %[ret1], %[src1]                \n\t"
+        "mfc1       %[ret2], %[src2]                \n\t"
+        "mfc1       %[ret3], %[src3]                \n\t"
+        "lwc1       %[src4], 16(%[src])             \n\t"
+        "lwc1       %[src5], 20(%[src])             \n\t"
+        "lwc1       %[src6], 24(%[src])             \n\t"
+        "lwc1       %[src7], 28(%[src])             \n\t"
+        "cvt.w.s    %[src4], %[src4]                \n\t"
+        "cvt.w.s    %[src5], %[src5]                \n\t"
+        "cvt.w.s    %[src6], %[src6]                \n\t"
+        "cvt.w.s    %[src7], %[src7]                \n\t"
+        "addiu      %[src],  32                     \n\t"
+        "shll_s.w   %[ret0], %[ret0], 16            \n\t"
+        "shll_s.w   %[ret1], %[ret1], 16            \n\t"
+        "shll_s.w   %[ret2], %[ret2], 16            \n\t"
+        "shll_s.w   %[ret3], %[ret3], 16            \n\t"
+        "srl        %[ret0], %[ret0], 16            \n\t"
+        "srl        %[ret1], %[ret1], 16            \n\t"
+        "srl        %[ret2], %[ret2], 16            \n\t"
+        "srl        %[ret3], %[ret3], 16            \n\t"
+        "sh         %[ret0], 0(%[dst])              \n\t"
+        "sh         %[ret1], 2(%[dst])              \n\t"
+        "sh         %[ret2], 4(%[dst])              \n\t"
+        "sh         %[ret3], 6(%[dst])              \n\t"
+        "mfc1       %[ret4], %[src4]                \n\t"
+        "mfc1       %[ret5], %[src5]                \n\t"
+        "mfc1       %[ret6], %[src6]                \n\t"
+        "mfc1       %[ret7], %[src7]                \n\t"
+        "shll_s.w   %[ret4], %[ret4], 16            \n\t"
+        "shll_s.w   %[ret5], %[ret5], 16            \n\t"
+        "shll_s.w   %[ret6], %[ret6], 16            \n\t"
+        "shll_s.w   %[ret7], %[ret7], 16            \n\t"
+        "srl        %[ret4], %[ret4], 16            \n\t"
+        "srl        %[ret5], %[ret5], 16            \n\t"
+        "srl        %[ret6], %[ret6], 16            \n\t"
+        "srl        %[ret7], %[ret7], 16            \n\t"
+        "sh         %[ret4], 8(%[dst])              \n\t"
+        "sh         %[ret5], 10(%[dst])             \n\t"
+        "sh         %[ret6], 12(%[dst])             \n\t"
+        "sh         %[ret7], 14(%[dst])             \n\t"
+        "addiu      %[dst],  16                     \n\t"
+        "bne        %[src],  %[src_end], fti16_lp%= \n\t"
+        "fti16_end%=:                               \n\t"
+        : [ret0]"=&r"(ret0), [ret1]"=&r"(ret1), [ret2]"=&r"(ret2), [ret3]"=&r"(ret3),
+          [ret4]"=&r"(ret4), [ret5]"=&r"(ret5), [ret6]"=&r"(ret6), [ret7]"=&r"(ret7),
+          [src0]"=&f"(src0), [src1]"=&f"(src1), [src2]"=&f"(src2), [src3]"=&f"(src3),
+          [src4]"=&f"(src4), [src5]"=&f"(src5), [src6]"=&f"(src6), [src7]"=&f"(src7),
+          [src]"+r"(src), [dst]"+r"(dst)
+        : [src_end]"r"(src_end), [len]"r"(len)
+        : "memory"
+    );
+}
+
+static void int32_to_float_fmul_scalar_mips(float *dst, const int *src,
+        float mul, int len) {
+    /*
+     * variables used in inline assembler
+     */
+    float temp1, temp3, temp5, temp7, temp9, temp11, temp13, temp15;
+
+    int rpom1, rpom2, rpom11, rpom21, rpom12, rpom22, rpom13, rpom23;
+    const int *src_end = src + len;
+    /*
+     * loop is 8 times unrolled in assembler in order to achieve better performance
+     */
+    __asm__ __volatile__ (
+        "i32tf_lp%=:                                    \n\t"
+        "lw       %[rpom11],     0(%[src])              \n\t"
+        "lw       %[rpom21],     4(%[src])              \n\t"
+        "lw       %[rpom1],      8(%[src])              \n\t"
+        "lw       %[rpom2],      12(%[src])             \n\t"
+        "mtc1     %[rpom11],     %[temp1]               \n\t"
+        "mtc1     %[rpom21],     %[temp3]               \n\t"
+        "mtc1     %[rpom1],      %[temp5]               \n\t"
+        "mtc1     %[rpom2],      %[temp7]               \n\t"
+
+        "lw       %[rpom13],     16(%[src])             \n\t"
+        "lw       %[rpom23],     20(%[src])             \n\t"
+        "lw       %[rpom12],     24(%[src])             \n\t"
+        "lw       %[rpom22],     28(%[src])             \n\t"
+        "mtc1     %[rpom13],     %[temp9]               \n\t"
+        "mtc1     %[rpom23],     %[temp11]              \n\t"
+        "mtc1     %[rpom12],     %[temp13]              \n\t"
+        "mtc1     %[rpom22],     %[temp15]              \n\t"
+
+        "addiu    %[src],        32                     \n\t"
+        "cvt.s.w  %[temp1],      %[temp1]               \n\t"
+        "cvt.s.w  %[temp3],      %[temp3]               \n\t"
+        "cvt.s.w  %[temp5],      %[temp5]               \n\t"
+        "cvt.s.w  %[temp7],      %[temp7]               \n\t"
+
+        "cvt.s.w  %[temp9],      %[temp9]               \n\t"
+        "cvt.s.w  %[temp11],     %[temp11]              \n\t"
+        "cvt.s.w  %[temp13],     %[temp13]              \n\t"
+        "cvt.s.w  %[temp15],     %[temp15]              \n\t"
+
+        "mul.s   %[temp1],       %[temp1],    %[mul]    \n\t"
+        "mul.s   %[temp3],       %[temp3],    %[mul]    \n\t"
+        "mul.s   %[temp5],       %[temp5],    %[mul]    \n\t"
+        "mul.s   %[temp7],       %[temp7],    %[mul]    \n\t"
+
+        "mul.s   %[temp9],       %[temp9],    %[mul]    \n\t"
+        "mul.s   %[temp11],      %[temp11],   %[mul]    \n\t"
+        "mul.s   %[temp13],      %[temp13],   %[mul]    \n\t"
+        "mul.s   %[temp15],      %[temp15],   %[mul]    \n\t"
+
+        "swc1    %[temp1],       0(%[dst])              \n\t" /*dst[i] = src[i] * mul;    */
+        "swc1    %[temp3],       4(%[dst])              \n\t" /*dst[i+1] = src[i+1] * mul;*/
+        "swc1    %[temp5],       8(%[dst])              \n\t" /*dst[i+2] = src[i+2] * mul;*/
+        "swc1    %[temp7],       12(%[dst])             \n\t" /*dst[i+3] = src[i+3] * mul;*/
+
+        "swc1    %[temp9],       16(%[dst])             \n\t" /*dst[i+4] = src[i+4] * mul;*/
+        "swc1    %[temp11],      20(%[dst])             \n\t" /*dst[i+5] = src[i+5] * mul;*/
+        "swc1    %[temp13],      24(%[dst])             \n\t" /*dst[i+6] = src[i+6] * mul;*/
+        "swc1    %[temp15],      28(%[dst])             \n\t" /*dst[i+7] = src[i+7] * mul;*/
+        "addiu   %[dst],        32                      \n\t"
+        "bne     %[src],        %[src_end], i32tf_lp%=  \n\t"
+        : [temp1]"=&f"(temp1),   [temp11]"=&f"(temp11),
+          [temp13]"=&f"(temp13), [temp15]"=&f"(temp15),
+          [temp3]"=&f"(temp3),   [temp5]"=&f"(temp5),
+          [temp7]"=&f"(temp7),   [temp9]"=&f"(temp9),
+          [rpom1]"=&r"(rpom1),   [rpom2]"=&r"(rpom2),
+          [rpom11]"=&r"(rpom11), [rpom21]"=&r"(rpom21),
+          [rpom12]"=&r"(rpom12), [rpom22]"=&r"(rpom22),
+          [rpom13]"=&r"(rpom13), [rpom23]"=&r"(rpom23),
+          [dst]"+r"(dst),       [src]"+r"(src)
+        : [mul]"f"(mul),        [src_end]"r"(src_end)
+        : "memory"
+    );
+}
+
+static void float_to_int16_interleave_mips(int16_t *dst, const float **src, long len,
+        int channels)
+{
+    int   c, ch2 = channels <<1;
+    int ret0, ret1, ret2, ret3, ret4, ret5, ret6, ret7;
+    float src0, src1, src2, src3, src4, src5, src6, src7;
+    int16_t *dst_ptr0, *dst_ptr1, *dst_ptr2, *dst_ptr3;
+    int16_t *dst_ptr4, *dst_ptr5, *dst_ptr6, *dst_ptr7;
+    const float *src_ptr, *src_ptr2, *src_end;
+
+    if (channels == 2) {
+        src_ptr = &src[0][0];
+        src_ptr2 = &src[1][0];
+        src_end = src_ptr + len;
+
+        __asm__ __volatile__ (
+            "fti16i2_lp%=:                                   \n\t"
+            "lwc1       %[src0],    0(%[src_ptr])            \n\t"
+            "lwc1       %[src1],    0(%[src_ptr2])           \n\t"
+            "addiu      %[src_ptr], 4                        \n\t"
+            "cvt.w.s    $f9,        %[src0]                  \n\t"
+            "cvt.w.s    $f10,       %[src1]                  \n\t"
+            "mfc1       %[ret0],    $f9                      \n\t"
+            "mfc1       %[ret1],    $f10                     \n\t"
+            "shll_s.w   %[ret0],    %[ret0], 16              \n\t"
+            "shll_s.w   %[ret1],    %[ret1], 16              \n\t"
+            "addiu      %[src_ptr2], 4                       \n\t"
+            "srl        %[ret0],    %[ret0], 16              \n\t"
+            "srl        %[ret1],    %[ret1], 16              \n\t"
+            "sh         %[ret0],    0(%[dst])                \n\t"
+            "sh         %[ret1],    2(%[dst])                \n\t"
+            "addiu      %[dst],     4                        \n\t"
+            "bne        %[src_ptr], %[src_end], fti16i2_lp%= \n\t"
+            : [ret0]"=&r"(ret0), [ret1]"=&r"(ret1),
+              [src0]"=&f"(src0), [src1]"=&f"(src1),
+              [src_ptr]"+r"(src_ptr), [src_ptr2]"+r"(src_ptr2),
+              [dst]"+r"(dst)
+            : [src_end]"r"(src_end)
+            : "memory"
+        );
+    } else {
+        for (c = 0; c < channels; c++)
+        {
+            src_ptr  = &src[c][0];
+            dst_ptr0 = &dst[c];
+            src_end = src_ptr + len;
+            /*
+             * loop is 8 times unrolled in assembler in order to achieve better performance
+             */
+            __asm__ __volatile__(
+                "fti16i_lp%=:                                     \n\t"
+                "lwc1       %[src0], 0(%[src_ptr])                \n\t"
+                "lwc1       %[src1], 4(%[src_ptr])                \n\t"
+                "lwc1       %[src2], 8(%[src_ptr])                \n\t"
+                "lwc1       %[src3], 12(%[src_ptr])               \n\t"
+                "cvt.w.s    %[src0], %[src0]                      \n\t"
+                "cvt.w.s    %[src1], %[src1]                      \n\t"
+                "cvt.w.s    %[src2], %[src2]                      \n\t"
+                "cvt.w.s    %[src3], %[src3]                      \n\t"
+                "mfc1       %[ret0], %[src0]                      \n\t"
+                "mfc1       %[ret1], %[src1]                      \n\t"
+                "mfc1       %[ret2], %[src2]                      \n\t"
+                "mfc1       %[ret3], %[src3]                      \n\t"
+                "lwc1       %[src4], 16(%[src_ptr])               \n\t"
+                "lwc1       %[src5], 20(%[src_ptr])               \n\t"
+                "lwc1       %[src6], 24(%[src_ptr])               \n\t"
+                "lwc1       %[src7], 28(%[src_ptr])               \n\t"
+                "addu       %[dst_ptr1], %[dst_ptr0], %[ch2]      \n\t"
+                "addu       %[dst_ptr2], %[dst_ptr1], %[ch2]      \n\t"
+                "addu       %[dst_ptr3], %[dst_ptr2], %[ch2]      \n\t"
+                "addu       %[dst_ptr4], %[dst_ptr3], %[ch2]      \n\t"
+                "addu       %[dst_ptr5], %[dst_ptr4], %[ch2]      \n\t"
+                "addu       %[dst_ptr6], %[dst_ptr5], %[ch2]      \n\t"
+                "addu       %[dst_ptr7], %[dst_ptr6], %[ch2]      \n\t"
+                "addiu      %[src_ptr],  32                       \n\t"
+                "cvt.w.s    %[src4], %[src4]                      \n\t"
+                "cvt.w.s    %[src5], %[src5]                      \n\t"
+                "cvt.w.s    %[src6], %[src6]                      \n\t"
+                "cvt.w.s    %[src7], %[src7]                      \n\t"
+                "shll_s.w   %[ret0], %[ret0], 16                  \n\t"
+                "shll_s.w   %[ret1], %[ret1], 16                  \n\t"
+                "shll_s.w   %[ret2], %[ret2], 16                  \n\t"
+                "shll_s.w   %[ret3], %[ret3], 16                  \n\t"
+                "srl        %[ret0], %[ret0], 16                  \n\t"
+                "srl        %[ret1], %[ret1], 16                  \n\t"
+                "srl        %[ret2], %[ret2], 16                  \n\t"
+                "srl        %[ret3], %[ret3], 16                  \n\t"
+                "sh         %[ret0], 0(%[dst_ptr0])               \n\t"
+                "sh         %[ret1], 0(%[dst_ptr1])               \n\t"
+                "sh         %[ret2], 0(%[dst_ptr2])               \n\t"
+                "sh         %[ret3], 0(%[dst_ptr3])               \n\t"
+                "mfc1       %[ret4], %[src4]                      \n\t"
+                "mfc1       %[ret5], %[src5]                      \n\t"
+                "mfc1       %[ret6], %[src6]                      \n\t"
+                "mfc1       %[ret7], %[src7]                      \n\t"
+                "shll_s.w   %[ret4], %[ret4], 16                  \n\t"
+                "shll_s.w   %[ret5], %[ret5], 16                  \n\t"
+                "shll_s.w   %[ret6], %[ret6], 16                  \n\t"
+                "shll_s.w   %[ret7], %[ret7], 16                  \n\t"
+                "srl        %[ret4], %[ret4], 16                  \n\t"
+                "srl        %[ret5], %[ret5], 16                  \n\t"
+                "srl        %[ret6], %[ret6], 16                  \n\t"
+                "srl        %[ret7], %[ret7], 16                  \n\t"
+                "sh         %[ret4], 0(%[dst_ptr4])               \n\t"
+                "sh         %[ret5], 0(%[dst_ptr5])               \n\t"
+                "sh         %[ret6], 0(%[dst_ptr6])               \n\t"
+                "sh         %[ret7], 0(%[dst_ptr7])               \n\t"
+                "addu       %[dst_ptr0], %[dst_ptr7], %[ch2]      \n\t"
+                "bne        %[src_ptr],  %[src_end],  fti16i_lp%= \n\t"
+                : [ret0]"=&r"(ret0), [ret1]"=&r"(ret1), [ret2]"=&r"(ret2), [ret3]"=&r"(ret3),
+                  [ret4]"=&r"(ret4), [ret5]"=&r"(ret5), [ret6]"=&r"(ret6), [ret7]"=&r"(ret7),
+                  [src0]"=&f"(src0), [src1]"=&f"(src1), [src2]"=&f"(src2), [src3]"=&f"(src3),
+                  [src4]"=&f"(src4), [src5]"=&f"(src5), [src6]"=&f"(src6), [src7]"=&f"(src7),
+                  [dst_ptr1]"=&r"(dst_ptr1), [dst_ptr2]"=&r"(dst_ptr2), [dst_ptr3]"=&r"(dst_ptr3),
+                  [dst_ptr4]"=&r"(dst_ptr4), [dst_ptr5]"=&r"(dst_ptr5), [dst_ptr6]"=&r"(dst_ptr6),
+                  [dst_ptr7]"=&r"(dst_ptr7), [dst_ptr0]"+r"(dst_ptr0), [src_ptr]"+r"(src_ptr)
+                : [ch2]"r"(ch2), [src_end]"r"(src_end)
+                : "memory"
+            );
+        }
+    }
+}
+#endif
+
+av_cold void ff_fmt_convert_init_mips(FmtConvertContext *c) {
+#if HAVE_INLINE_ASM
+    c->float_to_int16_interleave = float_to_int16_interleave_mips;
+    c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_mips;
+    c->float_to_int16 = float_to_int16_mips;
+#endif
+}
-- 
1.7.3.4



More information about the ffmpeg-devel mailing list