[FFmpeg-cvslog] x86/vf_blend: add sse and ssse3 extremity functions

James Almer git at videolan.org
Tue Jun 27 19:51:35 EEST 2017


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Tue Jun 27 12:27:10 2017 -0300| [fa50d9360ba36ba2ee8f85f2c59e8d6af20e833a] | committer: James Almer

x86/vf_blend: add sse and ssse3 extremity functions

Reviewed-by: Paul B Mahol <onemda at gmail.com>
Signed-off-by: James Almer <jamrial at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fa50d9360ba36ba2ee8f85f2c59e8d6af20e833a
---

 libavfilter/x86/vf_blend.asm    | 25 +++++++++++++++++++++++++
 libavfilter/x86/vf_blend_init.c |  4 ++++
 tests/checkasm/vf_blend.c       |  1 +
 3 files changed, 30 insertions(+)

diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm
index 33b1ad1496..25f6f5affc 100644
--- a/libavfilter/x86/vf_blend.asm
+++ b/libavfilter/x86/vf_blend.asm
@@ -286,6 +286,31 @@ BLEND_INIT difference, 3
     jl .loop
 BLEND_END
 
+BLEND_INIT extremity, 8
+    pxor       m2, m2
+    mova       m4, [pw_255]
+.nextrow:
+    mov        xq, widthq
+
+    .loop:
+        movu            m0, [topq + xq]
+        movu            m1, [bottomq + xq]
+        punpckhbw       m5, m0, m2
+        punpcklbw       m0, m2
+        punpckhbw       m6, m1, m2
+        punpcklbw       m1, m2
+        psubw           m3, m4, m0
+        psubw           m7, m4, m5
+        psubw           m3, m1
+        psubw           m7, m6
+        ABS1            m3, m1
+        ABS1            m7, m6
+        packuswb        m3, m7
+        mova   [dstq + xq], m3
+        add             xq, mmsize
+    jl .loop
+BLEND_END
+
 BLEND_INIT negation, 5
     pxor       m2, m2
     mova       m4, [pw_255]
diff --git a/libavfilter/x86/vf_blend_init.c b/libavfilter/x86/vf_blend_init.c
index 96fe3d8baa..71f9b0a685 100644
--- a/libavfilter/x86/vf_blend_init.c
+++ b/libavfilter/x86/vf_blend_init.c
@@ -47,6 +47,8 @@ BLEND_FUNC(subtract, sse2)
 BLEND_FUNC(xor, sse2)
 BLEND_FUNC(difference, sse2)
 BLEND_FUNC(difference, ssse3)
+BLEND_FUNC(extremity, sse2)
+BLEND_FUNC(extremity, ssse3)
 BLEND_FUNC(negation, sse2)
 BLEND_FUNC(negation, ssse3)
 
@@ -72,12 +74,14 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
         case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break;
         case BLEND_XOR:      param->blend = ff_blend_xor_sse2;      break;
         case BLEND_DIFFERENCE: param->blend = ff_blend_difference_sse2; break;
+        case BLEND_EXTREMITY:  param->blend = ff_blend_extremity_sse2; break;
         case BLEND_NEGATION:   param->blend = ff_blend_negation_sse2;   break;
         }
     }
     if (EXTERNAL_SSSE3(cpu_flags) && param->opacity == 1 && !is_16bit) {
         switch (param->mode) {
         case BLEND_DIFFERENCE: param->blend = ff_blend_difference_ssse3; break;
+        case BLEND_EXTREMITY:  param->blend = ff_blend_extremity_ssse3; break;
         case BLEND_NEGATION:   param->blend = ff_blend_negation_ssse3;   break;
         }
     }
diff --git a/tests/checkasm/vf_blend.c b/tests/checkasm/vf_blend.c
index aa568c0de0..4e018ac69e 100644
--- a/tests/checkasm/vf_blend.c
+++ b/tests/checkasm/vf_blend.c
@@ -117,6 +117,7 @@ void checkasm_check_blend(void)
     check_and_report(subtract, BLEND_SUBTRACT)
     check_and_report(xor, BLEND_XOR)
     check_and_report(difference, BLEND_DIFFERENCE)
+    check_and_report(extremity, BLEND_EXTREMITY)
     check_and_report(negation, BLEND_NEGATION)
 
     report("8bit");



More information about the ffmpeg-cvslog mailing list