[FFmpeg-cvslog] avcodec/x86/lossless_videodsp: port sub_hfyu_median_prediction_int16 to yasm

Michael Niedermayer git at videolan.org
Wed Jan 22 23:34:49 CET 2014


ffmpeg | branch: master | Michael Niedermayer <michaelni at gmx.at> | Wed Jan 22 23:26:32 2014 +0100| [ef00ef7553fc3b0ab842eccea068ede5eb41d6b0] | committer: Michael Niedermayer

avcodec/x86/lossless_videodsp: port sub_hfyu_median_prediction_int16 to yasm

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ef00ef7553fc3b0ab842eccea068ede5eb41d6b0
---

 libavcodec/x86/lossless_videodsp.asm    |   39 ++++++++++++++++++++++++
 libavcodec/x86/lossless_videodsp_init.c |   49 ++-----------------------------
 2 files changed, 41 insertions(+), 47 deletions(-)

diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm
index 531bf17..a1869b3 100644
--- a/libavcodec/x86/lossless_videodsp.asm
+++ b/libavcodec/x86/lossless_videodsp.asm
@@ -299,3 +299,42 @@ cglobal add_hfyu_median_prediction_int16, 7,7,0, dst, top, diff, mask, w, left,
     movzx   r2d, word [topq-2]
     mov [left_topq], r2d
     RET
+
+cglobal sub_hfyu_median_prediction_int16, 7,7,0, dst, src1, src2, mask, w, left, left_top
+    add      wq, wq
+    movd    mm7, maskd
+    SPLATW  mm7, mm7
+    movq    mm0, [src1q]
+    movq    mm2, [src2q]
+    psllq   mm0, 16
+    psllq   mm2, 16
+    movd    mm6, [left_topq]
+    por     mm0, mm6
+    movd    mm6, [leftq]
+    por     mm2, mm6
+    xor     maskq, maskq
+.loop:
+    movq    mm1, [src1q + maskq]
+    movq    mm3, [src2q + maskq]
+    movq    mm4, mm2
+    psubw   mm2, mm0
+    paddw   mm2, mm1
+    pand    mm2, mm7
+    movq    mm5, mm4
+    pmaxsw  mm4, mm1
+    pminsw  mm1, mm5
+    pminsw  mm4, mm2
+    pmaxsw  mm4, mm1
+    psubw   mm3, mm4
+    pand    mm3, mm7
+    movq    [dstq + maskq], mm3
+    add     maskq, 8
+    movq    mm0, [src1q + maskq - 2]
+    movq    mm2, [src2q + maskq - 2]
+    cmp     maskq, wq
+        jb .loop
+    mov maskd, [src1q + wq - 2]
+    mov [left_topq], maskd
+    mov maskd, [src2q + wq - 2]
+    mov [leftq], maskd
+    RET
diff --git a/libavcodec/x86/lossless_videodsp_init.c b/libavcodec/x86/lossless_videodsp_init.c
index eac3395..368f072 100644
--- a/libavcodec/x86/lossless_videodsp_init.c
+++ b/libavcodec/x86/lossless_videodsp_init.c
@@ -20,8 +20,6 @@
 
 #include "../lossless_videodsp.h"
 #include "libavutil/x86/cpu.h"
-#include "libavutil/x86/asm.h"
-#include "libavcodec/mathops.h"
 
 void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
 void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
@@ -30,51 +28,8 @@ void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src
 int ff_add_hfyu_left_prediction_int16_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int acc);
 int ff_add_hfyu_left_prediction_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int acc);
 void ff_add_hfyu_median_prediction_int16_mmxext(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top);
+void ff_sub_hfyu_median_prediction_int16_mmxext(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top);
 
-static void sub_hfyu_median_prediction_int16_mmxext(uint16_t *dst, const uint16_t *src1,
-                                                    const uint16_t *src2, unsigned mask, int w,
-                                                    int *left, int *left_top)
-{
-    x86_reg i=0;
-    uint16_t l, lt;
-
-    __asm__ volatile(
-        "movd %5, %%mm7                 \n\t"
-        "pshufw $0, %%mm7, %%mm7        \n\t"
-        "movq  (%1, %0), %%mm0          \n\t" // LT
-        "psllq $16, %%mm0                \n\t"
-        "1:                             \n\t"
-        "movq  (%1, %0), %%mm1          \n\t" // T
-        "movq  -2(%2, %0), %%mm2        \n\t" // L
-        "movq  (%2, %0), %%mm3          \n\t" // X
-        "movq %%mm2, %%mm4              \n\t" // L
-        "psubw %%mm0, %%mm2             \n\t"
-        "paddw %%mm1, %%mm2             \n\t" // L + T - LT
-        "pand %%mm7, %%mm2              \n\t"
-        "movq %%mm4, %%mm5              \n\t" // L
-        "pmaxsw %%mm1, %%mm4            \n\t" // max(T, L)
-        "pminsw %%mm5, %%mm1            \n\t" // min(T, L)
-        "pminsw %%mm2, %%mm4            \n\t"
-        "pmaxsw %%mm1, %%mm4            \n\t"
-        "psubw %%mm4, %%mm3             \n\t" // dst - pred
-        "pand %%mm7, %%mm3              \n\t"
-        "movq %%mm3, (%3, %0)           \n\t"
-        "add $8, %0                     \n\t"
-        "movq -2(%1, %0), %%mm0         \n\t" // LT
-        "cmp %4, %0                     \n\t"
-        " jb 1b                         \n\t"
-        : "+r" (i)
-        : "r"(src1), "r"(src2), "r"(dst), "r"((x86_reg)2*w), "rm"(mask)
-    );
-
-    l= *left;
-    lt= *left_top;
-
-    dst[0]= src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt)&mask);
-
-    *left_top= src1[w-1];
-    *left    = src2[w-1];
-}
 
 void ff_llviddsp_init_x86(LLVidDSPContext *c)
 {
@@ -87,7 +42,7 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c)
 
     if (EXTERNAL_MMXEXT(cpu_flags)) {
         c->add_hfyu_median_prediction_int16 = ff_add_hfyu_median_prediction_int16_mmxext;
-        c->sub_hfyu_median_prediction_int16 = sub_hfyu_median_prediction_int16_mmxext;
+        c->sub_hfyu_median_prediction_int16 = ff_sub_hfyu_median_prediction_int16_mmxext;
     }
 
     if (EXTERNAL_SSE2(cpu_flags)) {



More information about the ffmpeg-cvslog mailing list