[FFmpeg-devel] [PATCH 07/12] mips/aacdec: remove uses of mips32r2 specific ext instructions

Nedeljko Babic Nedeljko.Babic at imgtec.com
Tue Mar 3 13:42:41 CET 2015


>Removing these removes the dependency of this code on mips32r2 which would
>allow it to be used on processors which have FPU instructions, but not r2
>instructions (like the mips64el debian port for instance).
>

I would be more comfortable if there were two instances of this code: one for
mips32r2 and one for mips32 so advantages of using mips32r2 instructions 
(however small here) are left intact.

On the other hand, since this doesn't change much number of instructions used
(adding at maximum around 100 instructions overall if I am not mistaking) I 
am ok with this.

>Signed-off-by: James Cowgill <james410 at cowgill.org.uk>
>---
> libavcodec/mips/aacdec_mips.h | 49 ++++++++++++++++++++++---------------------
> 1 file changed, 25 insertions(+), 24 deletions(-)
>
>diff --git a/libavcodec/mips/aacdec_mips.h b/libavcodec/mips/aacdec_mips.h
>index 9ba3079..c9efdbb 100644
>--- a/libavcodec/mips/aacdec_mips.h
>+++ b/libavcodec/mips/aacdec_mips.h
>@@ -68,10 +68,10 @@ static inline float *VMUL2_mips(float *dst, const float *v, unsigned idx,
>     float *ret;
>
>     __asm__ volatile(
>-        "andi    %[temp3],  %[idx],       15           \n\t"
>-        "ext     %[temp4],  %[idx],       4,      4    \n\t"
>+        "andi    %[temp3],  %[idx],       0x0F         \n\t"
>+        "andi    %[temp4],  %[idx],       0xF0         \n\t"
>         "sll     %[temp3],  %[temp3],     2            \n\t"
>-        "sll     %[temp4],  %[temp4],     2            \n\t"
>+        "srl     %[temp4],  %[temp4],     2            \n\t"
>         "lwc1    %[temp2],  0(%[scale])                \n\t"
>         "lwxc1   %[temp0],  %[temp3](%[v])             \n\t"
>         "lwxc1   %[temp1],  %[temp4](%[v])             \n\t"
>@@ -99,14 +99,13 @@ static inline float *VMUL4_mips(float *dst, const float *v, unsigned idx,
>     float *ret;
>
>     __asm__ volatile(
>-        "andi    %[temp0],  %[idx],       3           \n\t"
>-        "ext     %[temp1],  %[idx],       2,      2   \n\t"
>-        "ext     %[temp2],  %[idx],       4,      2   \n\t"
>-        "ext     %[temp3],  %[idx],       6,      2   \n\t"
>+        "andi    %[temp0],  %[idx],       0x03        \n\t"
>+        "andi    %[temp1],  %[idx],       0x0C        \n\t"
>+        "andi    %[temp2],  %[idx],       0x30        \n\t"
>+        "andi    %[temp3],  %[idx],       0xC0        \n\t"
>         "sll     %[temp0],  %[temp0],     2           \n\t"
>-        "sll     %[temp1],  %[temp1],     2           \n\t"
>-        "sll     %[temp2],  %[temp2],     2           \n\t"
>-        "sll     %[temp3],  %[temp3],     2           \n\t"
>+        "srl     %[temp2],  %[temp2],     2           \n\t"
>+        "srl     %[temp3],  %[temp3],     4           \n\t"
>         "lwc1    %[temp4],  0(%[scale])               \n\t"
>         "lwxc1   %[temp5],  %[temp0](%[v])            \n\t"
>         "lwxc1   %[temp6],  %[temp1](%[v])            \n\t"
>@@ -142,14 +141,14 @@ static inline float *VMUL2S_mips(float *dst, const float *v, unsigned idx,
>     float *ret;
>
>     __asm__ volatile(
>-        "andi    %[temp0],  %[idx],       15         \n\t"
>-        "ext     %[temp1],  %[idx],       4,     4   \n\t"
>+        "andi    %[temp0],  %[idx],       0x0F       \n\t"
>+        "andi    %[temp1],  %[idx],       0xF0       \n\t"
>         "lw      %[temp4],  0(%[scale])              \n\t"
>         "srl     %[temp2],  %[sign],      1          \n\t"
>         "sll     %[temp3],  %[sign],      31         \n\t"
>         "sll     %[temp2],  %[temp2],     31         \n\t"
>         "sll     %[temp0],  %[temp0],     2          \n\t"
>-        "sll     %[temp1],  %[temp1],     2          \n\t"
>+        "srl     %[temp1],  %[temp1],     2          \n\t"
>         "lwxc1   %[temp8],  %[temp0](%[v])           \n\t"
>         "lwxc1   %[temp9],  %[temp1](%[v])           \n\t"
>         "xor     %[temp5],  %[temp4],     %[temp2]   \n\t"
>@@ -185,22 +184,24 @@ static inline float *VMUL4S_mips(float *dst, const float *v, unsigned idx,
>
>     __asm__ volatile(
>         "lw      %[temp0],   0(%[scale])               \n\t"
>-        "and     %[temp1],   %[idx],       3           \n\t"
>-        "ext     %[temp2],   %[idx],       2,      2   \n\t"
>-        "ext     %[temp3],   %[idx],       4,      2   \n\t"
>-        "ext     %[temp4],   %[idx],       6,      2   \n\t"
>-        "sll     %[temp1],   %[temp1],     2           \n\t"
>-        "sll     %[temp2],   %[temp2],     2           \n\t"
>-        "sll     %[temp3],   %[temp3],     2           \n\t"
>-        "sll     %[temp4],   %[temp4],     2           \n\t"
>+        "andi    %[temp1],  %[idx],       0x03         \n\t"
>+        "andi    %[temp2],  %[idx],       0x0C         \n\t"
>+        "andi    %[temp3],  %[idx],       0x30         \n\t"
>+        "andi    %[temp4],  %[idx],       0xC0         \n\t"
>+        "sll     %[temp1],  %[temp1],     2            \n\t"
>+        "srl     %[temp3],  %[temp3],     2            \n\t"
>+        "srl     %[temp4],  %[temp4],     4            \n\t"
>         "lwxc1   %[temp10],  %[temp1](%[v])            \n\t"
>         "lwxc1   %[temp11],  %[temp2](%[v])            \n\t"
>         "lwxc1   %[temp12],  %[temp3](%[v])            \n\t"
>         "lwxc1   %[temp13],  %[temp4](%[v])            \n\t"
>         "and     %[temp1],   %[sign],      %[mask]     \n\t"
>-        "ext     %[temp2],   %[idx],       12,     1   \n\t"
>-        "ext     %[temp3],   %[idx],       13,     1   \n\t"
>-        "ext     %[temp4],   %[idx],       14,     1   \n\t"
>+        "srl     %[temp2],   %[idx],       12          \n\t"
>+        "srl     %[temp3],   %[idx],       13          \n\t"
>+        "srl     %[temp4],   %[idx],       14          \n\t"
>+        "andi    %[temp2],   %[temp2],     1           \n\t"
>+        "andi    %[temp3],   %[temp3],     1           \n\t"
>+        "andi    %[temp4],   %[temp4],     1           \n\t"
>         "sllv    %[sign],    %[sign],      %[temp2]    \n\t"
>         "xor     %[temp1],   %[temp0],     %[temp1]    \n\t"
>         "and     %[temp2],   %[sign],      %[mask]     \n\t"
>--
>2.1.4

-Nedeljko


More information about the ffmpeg-devel mailing list