[Ffmpeg-cvslog] CVS: ffmpeg/libavcodec/i386 dsputil_mmx.c, 1.110, 1.111 h264dsp_mmx.c, 1.14, 1.15
Loren Merritt CVS
lorenm
Fri Feb 10 07:55:28 CET 2006
- Previous message: [Ffmpeg-cvslog] CVS: ffmpeg/libavcodec dsputil.c, 1.133, 1.134 dsputil.h, 1.127, 1.128 h264.c, 1.186, 1.187 h264idct.c, 1.4, 1.5
- Next message: [Ffmpeg-cvslog] CVS: ffmpeg MAINTAINERS,1.2,1.3
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvsroot/ffmpeg/ffmpeg/libavcodec/i386
In directory mail:/var2/tmp/cvs-serv3337/i386
Modified Files:
dsputil_mmx.c h264dsp_mmx.c
Log Message:
h264: special case dc-only idct. ~1% faster overall
Index: dsputil_mmx.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/dsputil_mmx.c,v
retrieving revision 1.110
retrieving revision 1.111
diff -u -d -r1.110 -r1.111
--- dsputil_mmx.c 5 Feb 2006 13:35:17 -0000 1.110
+++ dsputil_mmx.c 10 Feb 2006 06:55:25 -0000 1.111
@@ -2754,6 +2754,8 @@
#endif //CONFIG_ENCODERS
c->h264_idct_add= ff_h264_idct_add_mmx2;
+ c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2;
+ c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2;
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
Index: h264dsp_mmx.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/h264dsp_mmx.c,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -d -r1.14 -r1.15
--- h264dsp_mmx.c 9 Feb 2006 02:43:23 -0000 1.14
+++ h264dsp_mmx.c 10 Feb 2006 06:55:25 -0000 1.15
@@ -104,6 +104,87 @@
);
}
+void ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
+{
+ int dc = (block[0] + 32) >> 6;
+ asm volatile(
+ "movd %0, %%mm0 \n\t"
+ "pxor %%mm7, %%mm7 \n\t"
+ "pshufw $0, %%mm0, %%mm0 \n\t"
+ "pxor %%mm1, %%mm1 \n\t"
+ "psubw %%mm0, %%mm1 \n\t"
+ "pmaxsw %%mm7, %%mm0 \n\t"
+ "pmaxsw %%mm7, %%mm1 \n\t"
+ "packuswb %%mm0, %%mm0 \n\t"
+ "packuswb %%mm1, %%mm1 \n\t"
+ ::"r"(dc)
+ );
+ asm volatile(
+ "movd %0, %%mm2 \n\t"
+ "movd %1, %%mm3 \n\t"
+ "movd %2, %%mm4 \n\t"
+ "movd %3, %%mm5 \n\t"
+ "paddusb %%mm0, %%mm2 \n\t"
+ "paddusb %%mm0, %%mm3 \n\t"
+ "paddusb %%mm0, %%mm4 \n\t"
+ "paddusb %%mm0, %%mm5 \n\t"
+ "psubusb %%mm1, %%mm2 \n\t"
+ "psubusb %%mm1, %%mm3 \n\t"
+ "psubusb %%mm1, %%mm4 \n\t"
+ "psubusb %%mm1, %%mm5 \n\t"
+ "movd %%mm2, %0 \n\t"
+ "movd %%mm3, %1 \n\t"
+ "movd %%mm4, %2 \n\t"
+ "movd %%mm5, %3 \n\t"
+ :"+m"(*(uint32_t*)(dst+0*stride)),
+ "+m"(*(uint32_t*)(dst+1*stride)),
+ "+m"(*(uint32_t*)(dst+2*stride)),
+ "+m"(*(uint32_t*)(dst+3*stride))
+ );
+}
+
+void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
+{
+ int dc = (block[0] + 32) >> 6;
+ int y;
+ asm volatile(
+ "movd %0, %%mm0 \n\t"
+ "pxor %%mm7, %%mm7 \n\t"
+ "pshufw $0, %%mm0, %%mm0 \n\t"
+ "pxor %%mm1, %%mm1 \n\t"
+ "psubw %%mm0, %%mm1 \n\t"
+ "pmaxsw %%mm7, %%mm0 \n\t"
+ "pmaxsw %%mm7, %%mm1 \n\t"
+ "packuswb %%mm0, %%mm0 \n\t"
+ "packuswb %%mm1, %%mm1 \n\t"
+ ::"r"(dc)
+ );
+ for(y=2; y--; dst += 4*stride){
+ asm volatile(
+ "movq %0, %%mm2 \n\t"
+ "movq %1, %%mm3 \n\t"
+ "movq %2, %%mm4 \n\t"
+ "movq %3, %%mm5 \n\t"
+ "paddusb %%mm0, %%mm2 \n\t"
+ "paddusb %%mm0, %%mm3 \n\t"
+ "paddusb %%mm0, %%mm4 \n\t"
+ "paddusb %%mm0, %%mm5 \n\t"
+ "psubusb %%mm1, %%mm2 \n\t"
+ "psubusb %%mm1, %%mm3 \n\t"
+ "psubusb %%mm1, %%mm4 \n\t"
+ "psubusb %%mm1, %%mm5 \n\t"
+ "movq %%mm2, %0 \n\t"
+ "movq %%mm3, %1 \n\t"
+ "movq %%mm4, %2 \n\t"
+ "movq %%mm5, %3 \n\t"
+ :"+m"(*(uint64_t*)(dst+0*stride)),
+ "+m"(*(uint64_t*)(dst+1*stride)),
+ "+m"(*(uint64_t*)(dst+2*stride)),
+ "+m"(*(uint64_t*)(dst+3*stride))
+ );
+ }
+}
+
/***********************************/
/* deblocking */
- Previous message: [Ffmpeg-cvslog] CVS: ffmpeg/libavcodec dsputil.c, 1.133, 1.134 dsputil.h, 1.127, 1.128 h264.c, 1.186, 1.187 h264idct.c, 1.4, 1.5
- Next message: [Ffmpeg-cvslog] CVS: ffmpeg MAINTAINERS,1.2,1.3
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the ffmpeg-cvslog
mailing list