[Ffmpeg-devel] [PATCH] (3) building with --disable-opts on i386 with mmx enabled

Marco Manfredini mldb
Fri Aug 11 23:38:48 CEST 2006

On Friday 11 August 2006 22:43, Michael Niedermayer wrote:

> yes the code looks valid now, though its still wont work on 2.95 ...

Can I bother you for a comment on this pattern to avoid register spills?
This should have no negative effect on optimisation, but the input parameters  
in -O0, may all come via offset(bp), leaving enough registers free to serve 
the pointers for the output values.

static inline void transpose4x4_block(
   uint32_t *o0,uint32_t *o1,uint32_t *o2,uint32_t *o3,
   uint32_t i4,uint32_t i5,uint32_t i6,uint32_t i7)
    asm volatile( 
        "movd  %4, %%mm0                \n\t"
        "movd  %5, %%mm1                \n\t"
        "movd  %6, %%mm2                \n\t"
        "movd  %7, %%mm3                \n\t"
        "punpcklbw %%mm1, %%mm0         \n\t"
        "punpcklbw %%mm3, %%mm2         \n\t"
        "movq %%mm0, %%mm1              \n\t"
        "punpcklwd %%mm2, %%mm0         \n\t"
        "punpckhwd %%mm2, %%mm1         \n\t"
        "movd  %%mm0, %0                \n\t"
        "punpckhdq %%mm0, %%mm0         \n\t"
        "movd  %%mm0, %1                \n\t"
        "movd  %%mm1, %2                \n\t"
        "punpckhdq %%mm1, %%mm1         \n\t"
        "movd  %%mm1, %3                \n\t"

        : "=m" (*o0),
          "=m" (*o1),
          "=m" (*o2),
          "=m" (*o3)
        :  "m" (i4),
           "m" (i5),
           "m" (i6),
           "m" (i7)

static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, 
int src_stride){
       ((uint32_t*)(dst + 0*dst_stride)),
       ((uint32_t*)(dst + 1*dst_stride)),
       ((uint32_t*)(dst + 2*dst_stride)),
       ((uint32_t*)(dst + 3*dst_stride)),
       (*(uint32_t*)(src + 0*src_stride)),
       (*(uint32_t*)(src + 1*src_stride)),
       (*(uint32_t*)(src + 2*src_stride)),
       (*(uint32_t*)(src + 3*src_stride))); 

