[FFmpeg-cvslog] x86/vf_blend: simplify using macros

James Almer git at videolan.org
Thu Dec 24 17:06:22 CET 2015


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Wed Dec 23 23:51:45 2015 -0300| [0988c68cf9cd39680b1e49cc2873ec38c8199905] | committer: James Almer

x86/vf_blend: simplify using macros

Reviewed-by: Paul B Mahol <onemda at gmail.com>
Signed-off-by: James Almer <jamrial at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0988c68cf9cd39680b1e49cc2873ec38c8199905
---

 libavfilter/x86/vf_blend.asm    |  276 +++++----------------------------------
 libavfilter/x86/vf_blend_init.c |  102 +++------------
 2 files changed, 53 insertions(+), 325 deletions(-)

diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm
index f0fb2ea..d079b79 100644
--- a/libavfilter/x86/vf_blend.asm
+++ b/libavfilter/x86/vf_blend.asm
@@ -33,113 +33,26 @@ pb_255: times 16 db 255
 
 SECTION .text
 
-INIT_XMM sse2
-cglobal blend_xor, 9, 11, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
-    add      topq, widthq
-    add   bottomq, widthq
-    add      dstq, widthq
-    sub      endq, startq
-    neg    widthq
-.nextrow:
-    mov       r10q, widthq
-    %define      x  r10q
-
-    .loop:
-        movu            m0, [topq + x]
-        movu            m1, [bottomq + x]
-        pxor            m0, m1
-        mova    [dstq + x], m0
-        add           r10q, mmsize
-    jl .loop
-
-    add          topq, top_linesizeq
-    add       bottomq, bottom_linesizeq
-    add          dstq, dst_linesizeq
-    sub          endd, 1
-    jg .nextrow
-REP_RET
-
-cglobal blend_or, 9, 11, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
-    add      topq, widthq
-    add   bottomq, widthq
-    add      dstq, widthq
-    sub      endq, startq
-    neg    widthq
-.nextrow:
-    mov       r10q, widthq
-    %define      x  r10q
-
-    .loop:
-        movu            m0, [topq + x]
-        movu            m1, [bottomq + x]
-        por             m0, m1
-        mova    [dstq + x], m0
-        add           r10q, mmsize
-    jl .loop
-
-    add          topq, top_linesizeq
-    add       bottomq, bottom_linesizeq
-    add          dstq, dst_linesizeq
-    sub          endd, 1
-    jg .nextrow
-REP_RET
-
-cglobal blend_and, 9, 11, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
-    add      topq, widthq
-    add   bottomq, widthq
-    add      dstq, widthq
-    sub      endq, startq
-    neg    widthq
-.nextrow:
-    mov       r10q, widthq
-    %define      x  r10q
-
-    .loop:
-        movu            m0, [topq + x]
-        movu            m1, [bottomq + x]
-        pand            m0, m1
-        mova    [dstq + x], m0
-        add           r10q, mmsize
-    jl .loop
-
-    add          topq, top_linesizeq
-    add       bottomq, bottom_linesizeq
-    add          dstq, dst_linesizeq
-    sub          endd, 1
-    jg .nextrow
-REP_RET
-
-cglobal blend_addition, 9, 11, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
+%macro BLEND_INIT 2
+cglobal blend_%1, 9, 11, %2, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
     add      topq, widthq
     add   bottomq, widthq
     add      dstq, widthq
     sub      endq, startq
     neg    widthq
-.nextrow:
-    mov       r10q, widthq
-    %define      x  r10q
-
-    .loop:
-        movu            m0, [topq + x]
-        movu            m1, [bottomq + x]
-        paddusb         m0, m1
-        mova    [dstq + x], m0
-        add           r10q, mmsize
-    jl .loop
+%endmacro
 
+%macro BLEND_END 0
     add          topq, top_linesizeq
     add       bottomq, bottom_linesizeq
     add          dstq, dst_linesizeq
     sub          endd, 1
     jg .nextrow
 REP_RET
+%endmacro
 
-cglobal blend_subtract, 9, 11, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
-    add      topq, widthq
-    add   bottomq, widthq
-    add      dstq, widthq
-    sub      endq, startq
-    neg    widthq
+%macro BLEND_SIMPLE 2
+BLEND_INIT %1, 2
 .nextrow:
     mov       r10q, widthq
     %define      x  r10q
@@ -147,26 +60,25 @@ cglobal blend_subtract, 9, 11, 2, 0, top, top_linesize, bottom, bottom_linesize,
     .loop:
         movu            m0, [topq + x]
         movu            m1, [bottomq + x]
-        psubusb         m0, m1
+        p%2             m0, m1
         mova    [dstq + x], m0
         add           r10q, mmsize
     jl .loop
+BLEND_END
+%endmacro
 
-    add          topq, top_linesizeq
-    add       bottomq, bottom_linesizeq
-    add          dstq, dst_linesizeq
-    sub          endd, 1
-    jg .nextrow
-REP_RET
-
-cglobal blend_difference128, 9, 11, 4, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
-    add      topq, widthq
-    add   bottomq, widthq
-    add      dstq, widthq
-    sub      endq, startq
+INIT_XMM sse2
+BLEND_SIMPLE xor,      xor
+BLEND_SIMPLE or,       or
+BLEND_SIMPLE and,      and
+BLEND_SIMPLE addition, addusb
+BLEND_SIMPLE subtract, subusb
+BLEND_SIMPLE darken,   minub
+BLEND_SIMPLE lighten,  maxub
+
+BLEND_INIT difference128, 4
     pxor       m2, m2
     mova       m3, [pw_128]
-    neg    widthq
 .nextrow:
     mov       r10q, widthq
     %define      x  r10q
@@ -182,21 +94,10 @@ cglobal blend_difference128, 9, 11, 4, 0, top, top_linesize, bottom, bottom_line
         movh    [dstq + x], m0
         add           r10q, mmsize / 2
     jl .loop
+BLEND_END
 
-    add          topq, top_linesizeq
-    add       bottomq, bottom_linesizeq
-    add          dstq, dst_linesizeq
-    sub          endd, 1
-    jg .nextrow
-REP_RET
-
-cglobal blend_average, 9, 11, 3, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
-    add      topq, widthq
-    add   bottomq, widthq
-    add      dstq, widthq
-    sub      endq, startq
+BLEND_INIT average, 3
     pxor       m2, m2
-    neg    widthq
 .nextrow:
     mov       r10q, widthq
     %define      x  r10q
@@ -212,22 +113,11 @@ cglobal blend_average, 9, 11, 3, 0, top, top_linesize, bottom, bottom_linesize,
         movh    [dstq + x], m0
         add           r10q, mmsize / 2
     jl .loop
+BLEND_END
 
-    add          topq, top_linesizeq
-    add       bottomq, bottom_linesizeq
-    add          dstq, dst_linesizeq
-    sub          endd, 1
-    jg .nextrow
-REP_RET
-
-cglobal blend_addition128, 9, 11, 4, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
-    add      topq, widthq
-    add   bottomq, widthq
-    add      dstq, widthq
-    sub      endq, startq
+BLEND_INIT addition128, 4
     pxor       m2, m2
     mova       m3, [pw_128]
-    neg    widthq
 .nextrow:
     mov       r10q, widthq
     %define      x  r10q
@@ -243,48 +133,12 @@ cglobal blend_addition128, 9, 11, 4, 0, top, top_linesize, bottom, bottom_linesi
         movh    [dstq + x], m0
         add           r10q, mmsize / 2
     jl .loop
+BLEND_END
 
-    add          topq, top_linesizeq
-    add       bottomq, bottom_linesizeq
-    add          dstq, dst_linesizeq
-    sub          endd, 1
-    jg .nextrow
-REP_RET
-
-cglobal blend_darken, 9, 11, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
-    add      topq, widthq
-    add   bottomq, widthq
-    add      dstq, widthq
-    sub      endq, startq
-    neg    widthq
-.nextrow:
-    mov       r10q, widthq
-    %define      x  r10q
-
-    .loop:
-        movu            m0, [topq + x]
-        movu            m1, [bottomq + x]
-        pminub          m0, m1
-        mova    [dstq + x], m0
-        add           r10q, mmsize
-    jl .loop
-
-    add          topq, top_linesizeq
-    add       bottomq, bottom_linesizeq
-    add          dstq, dst_linesizeq
-    sub          endd, 1
-    jg .nextrow
-REP_RET
-
-cglobal blend_hardmix, 9, 11, 5, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
-    add      topq, widthq
-    add   bottomq, widthq
-    add      dstq, widthq
-    sub      endq, startq
+BLEND_INIT hardmix, 5
     mova       m2, [pb_255]
     mova       m3, [pb_128]
     mova       m4, [pb_127]
-    neg    widthq
 .nextrow:
     mov       r10q, widthq
     %define      x  r10q
@@ -299,46 +153,10 @@ cglobal blend_hardmix, 9, 11, 5, 0, top, top_linesize, bottom, bottom_linesize,
         mova    [dstq + x], m1
         add           r10q, mmsize
     jl .loop
+BLEND_END
 
-    add          topq, top_linesizeq
-    add       bottomq, bottom_linesizeq
-    add          dstq, dst_linesizeq
-    sub          endd, 1
-    jg .nextrow
-REP_RET
-
-cglobal blend_lighten, 9, 11, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
-    add      topq, widthq
-    add   bottomq, widthq
-    add      dstq, widthq
-    sub      endq, startq
-    neg    widthq
-.nextrow:
-    mov       r10q, widthq
-    %define      x  r10q
-
-    .loop:
-        movu            m0, [topq + x]
-        movu            m1, [bottomq + x]
-        pmaxub          m0, m1
-        mova    [dstq + x], m0
-        add           r10q, mmsize
-    jl .loop
-
-    add          topq, top_linesizeq
-    add       bottomq, bottom_linesizeq
-    add          dstq, dst_linesizeq
-    sub          endd, 1
-    jg .nextrow
-REP_RET
-
-cglobal blend_phoenix, 9, 11, 4, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
-    add      topq, widthq
-    add   bottomq, widthq
-    add      dstq, widthq
-    sub      endq, startq
+BLEND_INIT phoenix, 4
     mova       m3, [pb_255]
-    neg    widthq
 .nextrow:
     mov       r10q, widthq
     %define      x  r10q
@@ -355,22 +173,11 @@ cglobal blend_phoenix, 9, 11, 4, 0, top, top_linesize, bottom, bottom_linesize,
         mova    [dstq + x], m2
         add           r10q, mmsize
     jl .loop
-
-    add          topq, top_linesizeq
-    add       bottomq, bottom_linesizeq
-    add          dstq, dst_linesizeq
-    sub          endd, 1
-    jg .nextrow
-REP_RET
+BLEND_END
 
 INIT_XMM ssse3
-cglobal blend_difference, 9, 11, 3, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
-    add      topq, widthq
-    add   bottomq, widthq
-    add      dstq, widthq
-    sub      endq, startq
+BLEND_INIT difference, 3
     pxor       m2, m2
-    neg    widthq
 .nextrow:
     mov       r10q, widthq
     %define      x  r10q
@@ -386,22 +193,11 @@ cglobal blend_difference, 9, 11, 3, 0, top, top_linesize, bottom, bottom_linesiz
         movh    [dstq + x], m0
         add           r10q, mmsize / 2
     jl .loop
+BLEND_END
 
-    add          topq, top_linesizeq
-    add       bottomq, bottom_linesizeq
-    add          dstq, dst_linesizeq
-    sub          endd, 1
-    jg .nextrow
-REP_RET
-
-cglobal blend_negation, 9, 11, 5, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
-    add      topq, widthq
-    add   bottomq, widthq
-    add      dstq, widthq
-    sub      endq, startq
+BLEND_INIT negation, 5
     pxor       m2, m2
     mova       m4, [pw_255]
-    neg    widthq
 .nextrow:
     mov       r10q, widthq
     %define      x  r10q
@@ -421,12 +217,6 @@ cglobal blend_negation, 9, 11, 5, 0, top, top_linesize, bottom, bottom_linesize,
         movh    [dstq + x], m0
         add           r10q, mmsize / 2
     jl .loop
-
-    add          topq, top_linesizeq
-    add       bottomq, bottom_linesizeq
-    add          dstq, dst_linesizeq
-    sub          endd, 1
-    jg .nextrow
-REP_RET
+BLEND_END
 
 %endif
diff --git a/libavfilter/x86/vf_blend_init.c b/libavfilter/x86/vf_blend_init.c
index 454d030..82b8848 100644
--- a/libavfilter/x86/vf_blend_init.c
+++ b/libavfilter/x86/vf_blend_init.c
@@ -23,90 +23,28 @@
 #include "libavutil/x86/cpu.h"
 #include "libavfilter/blend.h"
 
-void ff_blend_addition_sse2(const uint8_t *top, ptrdiff_t top_linesize,
-                            const uint8_t *bottom, ptrdiff_t bottom_linesize,
-                            uint8_t *dst, ptrdiff_t dst_linesize,
-                            ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
-                            struct FilterParams *param, double *values);
-
-void ff_blend_addition128_sse2(const uint8_t *top, ptrdiff_t top_linesize,
-                               const uint8_t *bottom, ptrdiff_t bottom_linesize,
-                               uint8_t *dst, ptrdiff_t dst_linesize,
-                               ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
-                               struct FilterParams *param, double *values);
-
-void ff_blend_average_sse2(const uint8_t *top, ptrdiff_t top_linesize,
-                           const uint8_t *bottom, ptrdiff_t bottom_linesize,
-                           uint8_t *dst, ptrdiff_t dst_linesize,
-                           ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
-                           struct FilterParams *param, double *values);
-
-void ff_blend_and_sse2(const uint8_t *top, ptrdiff_t top_linesize,
-                       const uint8_t *bottom, ptrdiff_t bottom_linesize,
-                       uint8_t *dst, ptrdiff_t dst_linesize,
-                       ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
-                       struct FilterParams *param, double *values);
-
-void ff_blend_darken_sse2(const uint8_t *top, ptrdiff_t top_linesize,
-                          const uint8_t *bottom, ptrdiff_t bottom_linesize,
-                          uint8_t *dst, ptrdiff_t dst_linesize,
-                          ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
-                          struct FilterParams *param, double *values);
-
-void ff_blend_difference128_sse2(const uint8_t *top, ptrdiff_t top_linesize,
-                                 const uint8_t *bottom, ptrdiff_t bottom_linesize,
-                                 uint8_t *dst, ptrdiff_t dst_linesize,
-                                 ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
-                                 struct FilterParams *param, double *values);
-
-void ff_blend_hardmix_sse2(const uint8_t *top, ptrdiff_t top_linesize,
-                           const uint8_t *bottom, ptrdiff_t bottom_linesize,
-                           uint8_t *dst, ptrdiff_t dst_linesize,
-                           ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
-                           struct FilterParams *param, double *values);
-
-void ff_blend_lighten_sse2(const uint8_t *top, ptrdiff_t top_linesize,
-                           const uint8_t *bottom, ptrdiff_t bottom_linesize,
-                           uint8_t *dst, ptrdiff_t dst_linesize,
-                           ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
-                           struct FilterParams *param, double *values);
-
-void ff_blend_or_sse2(const uint8_t *top, ptrdiff_t top_linesize,
-                      const uint8_t *bottom, ptrdiff_t bottom_linesize,
-                      uint8_t *dst, ptrdiff_t dst_linesize,
-                      ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
-                      struct FilterParams *param, double *values);
-
-void ff_blend_phoenix_sse2(const uint8_t *top, ptrdiff_t top_linesize,
-                           const uint8_t *bottom, ptrdiff_t bottom_linesize,
-                           uint8_t *dst, ptrdiff_t dst_linesize,
-                           ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
-                           struct FilterParams *param, double *values);
-
-void ff_blend_subtract_sse2(const uint8_t *top, ptrdiff_t top_linesize,
-                            const uint8_t *bottom, ptrdiff_t bottom_linesize,
-                            uint8_t *dst, ptrdiff_t dst_linesize,
-                            ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
-                            struct FilterParams *param, double *values);
-
-void ff_blend_xor_sse2(const uint8_t *top, ptrdiff_t top_linesize,
-                       const uint8_t *bottom, ptrdiff_t bottom_linesize,
-                       uint8_t *dst, ptrdiff_t dst_linesize,
-                       ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
-                       struct FilterParams *param, double *values);
-
-void ff_blend_difference_ssse3(const uint8_t *top, ptrdiff_t top_linesize,
-                               const uint8_t *bottom, ptrdiff_t bottom_linesize,
-                               uint8_t *dst, ptrdiff_t dst_linesize,
-                               ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
-                               struct FilterParams *param, double *values);
-
-void ff_blend_negation_ssse3(const uint8_t *top, ptrdiff_t top_linesize,
-                             const uint8_t *bottom, ptrdiff_t bottom_linesize,
-                             uint8_t *dst, ptrdiff_t dst_linesize,
-                             ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
+#define BLEND_FUNC(name, opt) \
+void ff_blend_##name##_##opt(const uint8_t *top, ptrdiff_t top_linesize,       \
+                             const uint8_t *bottom, ptrdiff_t bottom_linesize, \
+                             uint8_t *dst, ptrdiff_t dst_linesize,             \
+                             ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,  \
                              struct FilterParams *param, double *values);
 
+BLEND_FUNC(addition, sse2)
+BLEND_FUNC(addition128, sse2)
+BLEND_FUNC(average, sse2)
+BLEND_FUNC(and, sse2)
+BLEND_FUNC(darken, sse2)
+BLEND_FUNC(difference128, sse2)
+BLEND_FUNC(hardmix, sse2)
+BLEND_FUNC(lighten, sse2)
+BLEND_FUNC(or, sse2)
+BLEND_FUNC(phoenix, sse2)
+BLEND_FUNC(subtract, sse2)
+BLEND_FUNC(xor, sse2)
+BLEND_FUNC(difference, ssse3)
+BLEND_FUNC(negation, ssse3)
+
 av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
 {
     int cpu_flags = av_get_cpu_flags();



More information about the ffmpeg-cvslog mailing list