[FFmpeg-cvslog] x86/svq1enc: port ssd_int8_vs_int16 to yasm

James Almer git at videolan.org
Sat Jul 5 22:02:37 CEST 2014


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Sat Jul  5 16:40:29 2014 -0300| [dad31083aec7f0fb7bf39406dc6cbf11c4834b02] | committer: Michael Niedermayer

x86/svq1enc: port ssd_int8_vs_int16 to yasm

Also add an SSE2 version

Signed-off-by: James Almer <jamrial at gmail.com>
Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=dad31083aec7f0fb7bf39406dc6cbf11c4834b02
---

 libavcodec/svq1enc.c          |    2 +-
 libavcodec/svq1enc.h          |    2 +-
 libavcodec/x86/Makefile       |    3 +-
 libavcodec/x86/svq1enc.asm    |   61 +++++++++++++++++++++++++++++++++
 libavcodec/x86/svq1enc.c      |   75 -----------------------------------------
 libavcodec/x86/svq1enc_init.c |   42 +++++++++++++++++++++++
 6 files changed, 107 insertions(+), 78 deletions(-)

diff --git a/libavcodec/svq1enc.c b/libavcodec/svq1enc.c
index 26e7aea..6d1b397 100644
--- a/libavcodec/svq1enc.c
+++ b/libavcodec/svq1enc.c
@@ -77,7 +77,7 @@ static void svq1_write_header(SVQ1EncContext *s, int frame_type)
 #define THRESHOLD_MULTIPLIER 0.6
 
 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
-                               int size)
+                               intptr_t size)
 {
     int score = 0, i;
 
diff --git a/libavcodec/svq1enc.h b/libavcodec/svq1enc.h
index 5179ca5..612b6e2 100644
--- a/libavcodec/svq1enc.h
+++ b/libavcodec/svq1enc.h
@@ -69,7 +69,7 @@ typedef struct SVQ1EncContext {
     uint8_t *scratchbuf;
 
     int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
-                             int size);
+                             intptr_t size);
 } SVQ1EncContext;
 
 void ff_svq1enc_init_ppc(SVQ1EncContext *c);
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index f58a320..d376bb7 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -45,7 +45,7 @@ OBJS-$(CONFIG_PRORES_LGPL_DECODER)     += x86/proresdsp_init.o
 OBJS-$(CONFIG_RV30_DECODER)            += x86/rv34dsp_init.o
 OBJS-$(CONFIG_RV40_DECODER)            += x86/rv34dsp_init.o            \
                                           x86/rv40dsp_init.o
-OBJS-$(CONFIG_SVQ1_ENCODER)            += x86/svq1enc.o
+OBJS-$(CONFIG_SVQ1_ENCODER)            += x86/svq1enc_init.o
 OBJS-$(CONFIG_V210_DECODER)            += x86/v210-init.o
 OBJS-$(CONFIG_TTA_DECODER)             += x86/ttadsp_init.o
 OBJS-$(CONFIG_TRUEHD_DECODER)          += x86/mlpdsp.o
@@ -122,6 +122,7 @@ YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
 YASM-OBJS-$(CONFIG_RV30_DECODER)       += x86/rv34dsp.o
 YASM-OBJS-$(CONFIG_RV40_DECODER)       += x86/rv34dsp.o                 \
                                           x86/rv40dsp.o
+YASM-OBJS-$(CONFIG_SVQ1_ENCODER)       += x86/svq1enc.o
 YASM-OBJS-$(CONFIG_TTA_DECODER)        += x86/ttadsp.o
 YASM-OBJS-$(CONFIG_V210_DECODER)       += x86/v210.o
 YASM-OBJS-$(CONFIG_VC1_DECODER)        += x86/vc1dsp.o
diff --git a/libavcodec/x86/svq1enc.asm b/libavcodec/x86/svq1enc.asm
new file mode 100644
index 0000000..24ee70f
--- /dev/null
+++ b/libavcodec/x86/svq1enc.asm
@@ -0,0 +1,61 @@
+;******************************************************************************
+;* SIMD-optimized SVQ1 encoder functions
+;* Copyright (c) 2007 Loren Merritt
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_TEXT
+
+%macro SSD_INT8_VS_INT16 0
+cglobal ssd_int8_vs_int16, 3, 3, 3, pix1, pix2, size
+    pxor m0, m0
+.loop
+    sub       sizeq, 8
+    movq      m1, [pix1q + sizeq]
+    mova      m2, [pix2q + sizeq*2]
+%if mmsize == 8
+    movq      m3, [pix2q + sizeq*2 + mmsize]
+    punpckhbw m4, m1
+    punpcklbw m1, m1
+    psraw     m4, 8
+    psraw     m1, 8
+    psubw     m3, m4
+    psubw     m2, m1
+    pmaddwd   m3, m3
+    pmaddwd   m2, m2
+    paddd     m0, m3
+    paddd     m0, m2
+%else
+    punpcklbw m1, m1
+    psraw     m1, 8
+    psubw     m2, m1
+    pmaddwd   m2, m2
+    paddd     m0, m2
+%endif
+    jg .loop
+    HADDD     m0, m1
+    movd     eax, m0
+    RET
+%endmacro
+
+INIT_MMX mmx
+SSD_INT8_VS_INT16
+INIT_XMM sse2
+SSD_INT8_VS_INT16
diff --git a/libavcodec/x86/svq1enc.c b/libavcodec/x86/svq1enc.c
deleted file mode 100644
index 01f16b1..0000000
--- a/libavcodec/x86/svq1enc.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2007 Loren Merritt
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "config.h"
-#include "libavutil/attributes.h"
-#include "libavutil/cpu.h"
-#include "libavutil/x86/asm.h"
-#include "libavutil/x86/cpu.h"
-#include "libavcodec/svq1enc.h"
-
-#if HAVE_INLINE_ASM
-
-static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2,
-                                 int size)
-{
-    int sum;
-    x86_reg i = size;
-
-    __asm__ volatile (
-        "pxor %%mm4, %%mm4 \n"
-        "1: \n"
-        "sub $8, %0 \n"
-        "movq (%2, %0), %%mm2 \n"
-        "movq (%3, %0, 2), %%mm0 \n"
-        "movq 8(%3, %0, 2), %%mm1 \n"
-        "punpckhbw %%mm2, %%mm3 \n"
-        "punpcklbw %%mm2, %%mm2 \n"
-        "psraw $8, %%mm3 \n"
-        "psraw $8, %%mm2 \n"
-        "psubw %%mm3, %%mm1 \n"
-        "psubw %%mm2, %%mm0 \n"
-        "pmaddwd %%mm1, %%mm1 \n"
-        "pmaddwd %%mm0, %%mm0 \n"
-        "paddd %%mm1, %%mm4 \n"
-        "paddd %%mm0, %%mm4 \n"
-        "jg 1b \n"
-        "movq %%mm4, %%mm3 \n"
-        "psrlq $32, %%mm3 \n"
-        "paddd %%mm3, %%mm4 \n"
-        "movd %%mm4, %1 \n"
-        : "+r" (i), "=r" (sum)
-        : "r" (pix1), "r" (pix2));
-
-    return sum;
-}
-
-#endif /* HAVE_INLINE_ASM */
-
-av_cold void ff_svq1enc_init_x86(SVQ1EncContext *c)
-{
-#if HAVE_INLINE_ASM
-    int cpu_flags = av_get_cpu_flags();
-
-    if (INLINE_MMX(cpu_flags)) {
-        c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
-    }
-#endif /* HAVE_INLINE_ASM */
-}
diff --git a/libavcodec/x86/svq1enc_init.c b/libavcodec/x86/svq1enc_init.c
new file mode 100644
index 0000000..40b4b0e
--- /dev/null
+++ b/libavcodec/x86/svq1enc_init.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2007 Loren Merritt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/svq1enc.h"
+
+int ff_ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2,
+                             intptr_t size);
+int ff_ssd_int8_vs_int16_sse2(const int8_t *pix1, const int16_t *pix2,
+                              intptr_t size);
+
+av_cold void ff_svq1enc_init_x86(SVQ1EncContext *c)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_MMX(cpu_flags)) {
+        c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_mmx;
+    }
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_sse2;
+    }
+}



More information about the ffmpeg-cvslog mailing list