[FFmpeg-devel] [PATCH] x86/vf_noise: move asm code to a separate file

James Almer jamrial at gmail.com
Fri Oct 17 03:33:58 CEST 2014


Signed-off-by: James Almer <jamrial at gmail.com>
---
 libavfilter/vf_noise.c     | 164 +++------------------------------------------
 libavfilter/vf_noise.h     |  64 ++++++++++++++++++
 libavfilter/x86/Makefile   |   1 +
 libavfilter/x86/vf_noise.c | 144 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 218 insertions(+), 155 deletions(-)
 create mode 100644 libavfilter/vf_noise.h
 create mode 100644 libavfilter/x86/vf_noise.c

diff --git a/libavfilter/vf_noise.c b/libavfilter/vf_noise.c
index 6218ed0..4acad8a 100644
--- a/libavfilter/vf_noise.c
+++ b/libavfilter/vf_noise.c
@@ -29,43 +29,12 @@
 #include "libavutil/lfg.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/pixdesc.h"
-#include "libavutil/x86/asm.h"
 #include "avfilter.h"
 #include "formats.h"
 #include "internal.h"
+#include "vf_noise.h"
 #include "video.h"
 
-#define MAX_NOISE 5120
-#define MAX_SHIFT 1024
-#define MAX_RES (MAX_NOISE-MAX_SHIFT)
-
-#define NOISE_UNIFORM  1
-#define NOISE_TEMPORAL 2
-#define NOISE_AVERAGED 8
-#define NOISE_PATTERN  16
-
-typedef struct {
-    int strength;
-    unsigned flags;
-    AVLFG lfg;
-    int seed;
-    int8_t *noise;
-    int8_t *prev_shift[MAX_RES][3];
-    int rand_shift[MAX_RES];
-    int rand_shift_init;
-} FilterParams;
-
-typedef struct {
-    const AVClass *class;
-    int nb_planes;
-    int bytewidth[4];
-    int height[4];
-    FilterParams all;
-    FilterParams param[4];
-    void (*line_noise)(uint8_t *dst, const uint8_t *src, const int8_t *noise, int len, int shift);
-    void (*line_noise_avg)(uint8_t *dst, const uint8_t *src, int len, const int8_t * const *shift);
-} NoiseContext;
-
 typedef struct ThreadData {
     AVFrame *in, *out;
 } ThreadData;
@@ -193,8 +162,8 @@ static int config_input(AVFilterLink *inlink)
     return 0;
 }
 
-static inline void line_noise_c(uint8_t *dst, const uint8_t *src, const int8_t *noise,
-                       int len, int shift)
+void ff_line_noise_c(uint8_t *dst, const uint8_t *src, const int8_t *noise,
+                     int len, int shift)
 {
     int i;
 
@@ -206,70 +175,8 @@ static inline void line_noise_c(uint8_t *dst, const uint8_t *src, const int8_t *
     }
 }
 
-#define ASMALIGN(ZEROBITS) ".p2align " #ZEROBITS "\n\t"
-
-static void line_noise_mmx(uint8_t *dst, const uint8_t *src,
-                           const int8_t *noise, int len, int shift)
-{
-#if HAVE_MMX_INLINE
-    x86_reg mmx_len= len&(~7);
-    noise+=shift;
-
-    __asm__ volatile(
-            "mov %3, %%"REG_a"               \n\t"
-            "pcmpeqb %%mm7, %%mm7            \n\t"
-            "psllw $15, %%mm7                \n\t"
-            "packsswb %%mm7, %%mm7           \n\t"
-            ASMALIGN(4)
-            "1:                              \n\t"
-            "movq (%0, %%"REG_a"), %%mm0     \n\t"
-            "movq (%1, %%"REG_a"), %%mm1     \n\t"
-            "pxor %%mm7, %%mm0               \n\t"
-            "paddsb %%mm1, %%mm0             \n\t"
-            "pxor %%mm7, %%mm0               \n\t"
-            "movq %%mm0, (%2, %%"REG_a")     \n\t"
-            "add $8, %%"REG_a"               \n\t"
-            " js 1b                          \n\t"
-            :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len)
-            : "%"REG_a
-    );
-    if (mmx_len!=len)
-        line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0);
-#endif
-}
-
-static void line_noise_mmxext(uint8_t *dst, const uint8_t *src,
-                              const int8_t *noise, int len, int shift)
-{
-#if HAVE_MMXEXT_INLINE
-    x86_reg mmx_len= len&(~7);
-    noise+=shift;
-
-    __asm__ volatile(
-            "mov %3, %%"REG_a"                \n\t"
-            "pcmpeqb %%mm7, %%mm7             \n\t"
-            "psllw $15, %%mm7                 \n\t"
-            "packsswb %%mm7, %%mm7            \n\t"
-            ASMALIGN(4)
-            "1:                               \n\t"
-            "movq (%0, %%"REG_a"), %%mm0      \n\t"
-            "movq (%1, %%"REG_a"), %%mm1      \n\t"
-            "pxor %%mm7, %%mm0                \n\t"
-            "paddsb %%mm1, %%mm0              \n\t"
-            "pxor %%mm7, %%mm0                \n\t"
-            "movntq %%mm0, (%2, %%"REG_a")    \n\t"
-            "add $8, %%"REG_a"                \n\t"
-            " js 1b                           \n\t"
-            :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len)
-            : "%"REG_a
-            );
-    if (mmx_len != len)
-        line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0);
-#endif
-}
-
-static inline void line_noise_avg_c(uint8_t *dst, const uint8_t *src,
-                           int len, const int8_t * const *shift)
+void ff_line_noise_avg_c(uint8_t *dst, const uint8_t *src,
+                         int len, const int8_t * const *shift)
 {
     int i;
     const int8_t *src2 = (const int8_t*)src;
@@ -280,50 +187,6 @@ static inline void line_noise_avg_c(uint8_t *dst, const uint8_t *src,
     }
 }
 
-static inline void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src,
-                                      int len, const int8_t * const *shift)
-{
-#if HAVE_MMX_INLINE && HAVE_6REGS
-    x86_reg mmx_len= len&(~7);
-
-    __asm__ volatile(
-            "mov %5, %%"REG_a"              \n\t"
-            ASMALIGN(4)
-            "1:                             \n\t"
-            "movq (%1, %%"REG_a"), %%mm1    \n\t"
-            "movq (%0, %%"REG_a"), %%mm0    \n\t"
-            "paddb (%2, %%"REG_a"), %%mm1   \n\t"
-            "paddb (%3, %%"REG_a"), %%mm1   \n\t"
-            "movq %%mm0, %%mm2              \n\t"
-            "movq %%mm1, %%mm3              \n\t"
-            "punpcklbw %%mm0, %%mm0         \n\t"
-            "punpckhbw %%mm2, %%mm2         \n\t"
-            "punpcklbw %%mm1, %%mm1         \n\t"
-            "punpckhbw %%mm3, %%mm3         \n\t"
-            "pmulhw %%mm0, %%mm1            \n\t"
-            "pmulhw %%mm2, %%mm3            \n\t"
-            "paddw %%mm1, %%mm1             \n\t"
-            "paddw %%mm3, %%mm3             \n\t"
-            "paddw %%mm0, %%mm1             \n\t"
-            "paddw %%mm2, %%mm3             \n\t"
-            "psrlw $8, %%mm1                \n\t"
-            "psrlw $8, %%mm3                \n\t"
-            "packuswb %%mm3, %%mm1          \n\t"
-            "movq %%mm1, (%4, %%"REG_a")    \n\t"
-            "add $8, %%"REG_a"              \n\t"
-            " js 1b                         \n\t"
-            :: "r" (src+mmx_len), "r" (shift[0]+mmx_len), "r" (shift[1]+mmx_len), "r" (shift[2]+mmx_len),
-               "r" (dst+mmx_len), "g" (-mmx_len)
-            : "%"REG_a
-        );
-
-    if (mmx_len != len){
-        const int8_t *shift2[3]={shift[0]+mmx_len, shift[1]+mmx_len, shift[2]+mmx_len};
-        line_noise_avg_c(dst+mmx_len, src+mmx_len, len-mmx_len, shift2);
-    }
-#endif
-}
-
 static void noise(uint8_t *dst, const uint8_t *src,
                   int dst_linesize, int src_linesize,
                   int width, int start, int end, NoiseContext *n, int comp)
@@ -421,7 +284,6 @@ static av_cold int init(AVFilterContext *ctx)
 {
     NoiseContext *n = ctx->priv;
     int ret, i;
-    int cpu_flags = av_get_cpu_flags();
 
     for (i = 0; i < 4; i++) {
         if (n->all.seed >= 0)
@@ -439,19 +301,11 @@ static av_cold int init(AVFilterContext *ctx)
             return ret;
     }
 
-    n->line_noise     = line_noise_c;
-    n->line_noise_avg = line_noise_avg_c;
+    n->line_noise     = ff_line_noise_c;
+    n->line_noise_avg = ff_line_noise_avg_c;
 
-    if (HAVE_MMX_INLINE &&
-        cpu_flags & AV_CPU_FLAG_MMX) {
-        n->line_noise = line_noise_mmx;
-#if HAVE_6REGS
-        n->line_noise_avg = line_noise_avg_mmx;
-#endif
-    }
-    if (HAVE_MMXEXT_INLINE &&
-        cpu_flags & AV_CPU_FLAG_MMXEXT)
-        n->line_noise = line_noise_mmxext;
+    if (ARCH_X86)
+        ff_noise_init_x86(n);
 
     return 0;
 }
diff --git a/libavfilter/vf_noise.h b/libavfilter/vf_noise.h
new file mode 100644
index 0000000..2207ed9
--- /dev/null
+++ b/libavfilter/vf_noise.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2002 Michael Niedermayer <michaelni at gmx.at>
+ * Copyright (c) 2013 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_NOISE_H
+#define AVFILTER_NOISE_H
+
+#include "libavutil/lfg.h"
+#include "avfilter.h"
+
+#define MAX_NOISE 5120
+#define MAX_SHIFT 1024
+#define MAX_RES (MAX_NOISE-MAX_SHIFT)
+
+#define NOISE_UNIFORM  1
+#define NOISE_TEMPORAL 2
+#define NOISE_AVERAGED 8
+#define NOISE_PATTERN  16
+
+typedef struct {
+    int strength;
+    unsigned flags;
+    AVLFG lfg;
+    int seed;
+    int8_t *noise;
+    int8_t *prev_shift[MAX_RES][3];
+    int rand_shift[MAX_RES];
+    int rand_shift_init;
+} FilterParams;
+
+typedef struct {
+    const AVClass *class;
+    int nb_planes;
+    int bytewidth[4];
+    int height[4];
+    FilterParams all;
+    FilterParams param[4];
+    void (*line_noise)(uint8_t *dst, const uint8_t *src, const int8_t *noise, int len, int shift);
+    void (*line_noise_avg)(uint8_t *dst, const uint8_t *src, int len, const int8_t * const *shift);
+} NoiseContext;
+
+void ff_line_noise_c(uint8_t *dst, const uint8_t *src, const int8_t *noise, int len, int shift);
+void ff_line_noise_avg_c(uint8_t *dst, const uint8_t *src, int len, const int8_t * const *shift);
+
+void ff_noise_init_x86(NoiseContext *n);
+
+#endif /* AVFILTER_NOISE_H */
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index ddb3774..32145db 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -1,6 +1,7 @@
 OBJS-$(CONFIG_GRADFUN_FILTER)                += x86/vf_gradfun_init.o
 OBJS-$(CONFIG_HQDN3D_FILTER)                 += x86/vf_hqdn3d_init.o
 OBJS-$(CONFIG_IDET_FILTER)                   += x86/vf_idet_init.o
+OBJS-$(CONFIG_NOISE_FILTER)                  += x86/vf_noise.o
 OBJS-$(CONFIG_PULLUP_FILTER)                 += x86/vf_pullup_init.o
 OBJS-$(CONFIG_SPP_FILTER)                    += x86/vf_spp.o
 OBJS-$(CONFIG_VOLUME_FILTER)                 += x86/af_volume_init.o
diff --git a/libavfilter/x86/vf_noise.c b/libavfilter/x86/vf_noise.c
new file mode 100644
index 0000000..0a86cb0
--- /dev/null
+++ b/libavfilter/x86/vf_noise.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2002 Michael Niedermayer <michaelni at gmx.at>
+ * Copyright (c) 2013 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/x86/cpu.h"
+#include "libavutil/x86/asm.h"
+#include "libavfilter/vf_noise.h"
+
+#if HAVE_INLINE_ASM
+static void line_noise_mmx(uint8_t *dst, const uint8_t *src,
+                           const int8_t *noise, int len, int shift)
+{
+    x86_reg mmx_len= len & (~7);
+    noise += shift;
+
+    __asm__ volatile(
+            "mov %3, %%"REG_a"               \n\t"
+            "pcmpeqb %%mm7, %%mm7            \n\t"
+            "psllw $15, %%mm7                \n\t"
+            "packsswb %%mm7, %%mm7           \n\t"
+            ".p2align 4                      \n\t"
+            "1:                              \n\t"
+            "movq (%0, %%"REG_a"), %%mm0     \n\t"
+            "movq (%1, %%"REG_a"), %%mm1     \n\t"
+            "pxor %%mm7, %%mm0               \n\t"
+            "paddsb %%mm1, %%mm0             \n\t"
+            "pxor %%mm7, %%mm0               \n\t"
+            "movq %%mm0, (%2, %%"REG_a")     \n\t"
+            "add $8, %%"REG_a"               \n\t"
+            " js 1b                          \n\t"
+            :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len)
+            : "%"REG_a
+    );
+    if (mmx_len != len)
+        ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0);
+}
+
+#if HAVE_6REGS
+static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src,
+                                      int len, const int8_t * const *shift)
+{
+    x86_reg mmx_len = len & (~7);
+
+    __asm__ volatile(
+            "mov %5, %%"REG_a"              \n\t"
+            ".p2align 4                     \n\t"
+            "1:                             \n\t"
+            "movq (%1, %%"REG_a"), %%mm1    \n\t"
+            "movq (%0, %%"REG_a"), %%mm0    \n\t"
+            "paddb (%2, %%"REG_a"), %%mm1   \n\t"
+            "paddb (%3, %%"REG_a"), %%mm1   \n\t"
+            "movq %%mm0, %%mm2              \n\t"
+            "movq %%mm1, %%mm3              \n\t"
+            "punpcklbw %%mm0, %%mm0         \n\t"
+            "punpckhbw %%mm2, %%mm2         \n\t"
+            "punpcklbw %%mm1, %%mm1         \n\t"
+            "punpckhbw %%mm3, %%mm3         \n\t"
+            "pmulhw %%mm0, %%mm1            \n\t"
+            "pmulhw %%mm2, %%mm3            \n\t"
+            "paddw %%mm1, %%mm1             \n\t"
+            "paddw %%mm3, %%mm3             \n\t"
+            "paddw %%mm0, %%mm1             \n\t"
+            "paddw %%mm2, %%mm3             \n\t"
+            "psrlw $8, %%mm1                \n\t"
+            "psrlw $8, %%mm3                \n\t"
+            "packuswb %%mm3, %%mm1          \n\t"
+            "movq %%mm1, (%4, %%"REG_a")    \n\t"
+            "add $8, %%"REG_a"              \n\t"
+            " js 1b                         \n\t"
+            :: "r" (src+mmx_len), "r" (shift[0]+mmx_len), "r" (shift[1]+mmx_len), "r" (shift[2]+mmx_len),
+               "r" (dst+mmx_len), "g" (-mmx_len)
+            : "%"REG_a
+        );
+
+    if (mmx_len != len){
+        const int8_t *shift2[3] = { shift[0]+mmx_len, shift[1]+mmx_len, shift[2]+mmx_len };
+        ff_line_noise_avg_c(dst+mmx_len, src+mmx_len, len-mmx_len, shift2);
+    }
+}
+#endif /* HAVE_6REGS */
+
+static void line_noise_mmxext(uint8_t *dst, const uint8_t *src,
+                              const int8_t *noise, int len, int shift)
+{
+    x86_reg mmx_len = len & (~7);
+    noise += shift;
+
+    __asm__ volatile(
+            "mov %3, %%"REG_a"                \n\t"
+            "pcmpeqb %%mm7, %%mm7             \n\t"
+            "psllw $15, %%mm7                 \n\t"
+            "packsswb %%mm7, %%mm7            \n\t"
+            ".p2align 4                       \n\t"
+            "1:                               \n\t"
+            "movq (%0, %%"REG_a"), %%mm0      \n\t"
+            "movq (%1, %%"REG_a"), %%mm1      \n\t"
+            "pxor %%mm7, %%mm0                \n\t"
+            "paddsb %%mm1, %%mm0              \n\t"
+            "pxor %%mm7, %%mm0                \n\t"
+            "movntq %%mm0, (%2, %%"REG_a")    \n\t"
+            "add $8, %%"REG_a"                \n\t"
+            " js 1b                           \n\t"
+            :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len)
+            : "%"REG_a
+            );
+    if (mmx_len != len)
+        ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0);
+}
+#endif /* HAVE_INLINE_ASM */
+
+av_cold void ff_noise_init_x86(NoiseContext *n)
+{
+#if HAVE_INLINE_ASM
+    int cpu_flags = av_get_cpu_flags();
+
+    if (INLINE_MMX(cpu_flags)) {
+        n->line_noise     = line_noise_mmx;
+#if HAVE_6REGS
+        n->line_noise_avg = line_noise_avg_mmx;
+#endif
+    }
+    if (INLINE_MMXEXT(cpu_flags)) {
+        n->line_noise     = line_noise_mmxext;
+    }
+#endif
+}
-- 
2.0.4



More information about the ffmpeg-devel mailing list