[FFmpeg-cvslog] avfilter/x86/vf_blend.asm: add hardmix and phoenix sse2 SIMD

Paul B Mahol git at videolan.org
Wed Oct 7 23:03:05 CEST 2015


ffmpeg | branch: master | Paul B Mahol <onemda at gmail.com> | Wed Oct  7 10:12:26 2015 +0200| [0948ba320496d02ad185487c18b249610de1a184] | committer: Paul B Mahol

avfilter/x86/vf_blend.asm: add hardmix and phoenix sse2 SIMD

Signed-off-by: Paul B Mahol <onemda at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0948ba320496d02ad185487c18b249610de1a184
---

 libavfilter/x86/vf_blend.asm    |   64 +++++++++++++++++++++++++++++++++++++++
 libavfilter/x86/vf_blend_init.c |   14 +++++++++
 2 files changed, 78 insertions(+)

diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm
index 167e72b..54b5430 100644
--- a/libavfilter/x86/vf_blend.asm
+++ b/libavfilter/x86/vf_blend.asm
@@ -27,6 +27,8 @@ SECTION_RODATA
 
 pw_128: times 8 dw 128
 pw_255: times 8 dw 255
+pb_128: times 16 db 128
+pb_255: times 16 db 255
 
 SECTION .text
 
@@ -273,6 +275,37 @@ cglobal blend_darken, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize, d
     jg .nextrow
 REP_RET
 
+cglobal blend_hardmix, 9, 10, 4, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
+    add      topq, widthq
+    add   bottomq, widthq
+    add      dstq, widthq
+    sub      endq, startq
+    mova       m2, [pb_255]
+    mova       m3, [pb_128]
+    neg    widthq
+.nextrow:
+    mov       r10q, widthq
+    %define      x  r10q
+
+    .loop:
+        movu            m0, [topq + x]
+        movu            m1, [bottomq + x]
+        pxor            m1, m2
+        pxor            m0, m3
+        pxor            m1, m3
+        pcmpgtb         m1, m0
+        pxor            m1, m2
+        mova    [dstq + x], m1
+        add           r10q, mmsize
+    jl .loop
+
+    add          topq, top_linesizeq
+    add       bottomq, bottom_linesizeq
+    add          dstq, dst_linesizeq
+    sub          endd, 1
+    jg .nextrow
+REP_RET
+
 cglobal blend_lighten, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
     add      topq, widthq
     add   bottomq, widthq
@@ -298,6 +331,37 @@ cglobal blend_lighten, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize,
     jg .nextrow
 REP_RET
 
+cglobal blend_phoenix, 9, 10, 4, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
+    add      topq, widthq
+    add   bottomq, widthq
+    add      dstq, widthq
+    sub      endq, startq
+    mova       m3, [pb_255]
+    neg    widthq
+.nextrow:
+    mov       r10q, widthq
+    %define      x  r10q
+
+    .loop:
+        movu            m0, [topq + x]
+        movu            m1, [bottomq + x]
+        mova            m2, m0
+        pminub          m0, m1
+        pmaxub          m1, m2
+        mova            m2, m3
+        psubusb         m2, m1
+        paddusb         m2, m0
+        mova    [dstq + x], m2
+        add           r10q, mmsize
+    jl .loop
+
+    add          topq, top_linesizeq
+    add       bottomq, bottom_linesizeq
+    add          dstq, dst_linesizeq
+    sub          endd, 1
+    jg .nextrow
+REP_RET
+
 INIT_XMM ssse3
 cglobal blend_difference, 9, 10, 3, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
     add      topq, widthq
diff --git a/libavfilter/x86/vf_blend_init.c b/libavfilter/x86/vf_blend_init.c
index 61e90f8..454d030 100644
--- a/libavfilter/x86/vf_blend_init.c
+++ b/libavfilter/x86/vf_blend_init.c
@@ -59,6 +59,12 @@ void ff_blend_difference128_sse2(const uint8_t *top, ptrdiff_t top_linesize,
                                  ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
                                  struct FilterParams *param, double *values);
 
+void ff_blend_hardmix_sse2(const uint8_t *top, ptrdiff_t top_linesize,
+                           const uint8_t *bottom, ptrdiff_t bottom_linesize,
+                           uint8_t *dst, ptrdiff_t dst_linesize,
+                           ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
+                           struct FilterParams *param, double *values);
+
 void ff_blend_lighten_sse2(const uint8_t *top, ptrdiff_t top_linesize,
                            const uint8_t *bottom, ptrdiff_t bottom_linesize,
                            uint8_t *dst, ptrdiff_t dst_linesize,
@@ -71,6 +77,12 @@ void ff_blend_or_sse2(const uint8_t *top, ptrdiff_t top_linesize,
                       ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
                       struct FilterParams *param, double *values);
 
+void ff_blend_phoenix_sse2(const uint8_t *top, ptrdiff_t top_linesize,
+                           const uint8_t *bottom, ptrdiff_t bottom_linesize,
+                           uint8_t *dst, ptrdiff_t dst_linesize,
+                           ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
+                           struct FilterParams *param, double *values);
+
 void ff_blend_subtract_sse2(const uint8_t *top, ptrdiff_t top_linesize,
                             const uint8_t *bottom, ptrdiff_t bottom_linesize,
                             uint8_t *dst, ptrdiff_t dst_linesize,
@@ -107,8 +119,10 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
         case BLEND_AVERAGE:  param->blend = ff_blend_average_sse2;  break;
         case BLEND_DARKEN:   param->blend = ff_blend_darken_sse2;   break;
         case BLEND_DIFFERENCE128: param->blend = ff_blend_difference128_sse2; break;
+        case BLEND_HARDMIX:  param->blend = ff_blend_hardmix_sse2;  break;
         case BLEND_LIGHTEN:  param->blend = ff_blend_lighten_sse2;  break;
         case BLEND_OR:       param->blend = ff_blend_or_sse2;       break;
+        case BLEND_PHOENIX:  param->blend = ff_blend_phoenix_sse2;  break;
         case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break;
         case BLEND_XOR:      param->blend = ff_blend_xor_sse2;      break;
         }



More information about the ffmpeg-cvslog mailing list