[FFmpeg-cvslog] avcodec/x86/lossless_videodsp: Port sub_hfyu_median_prediction_mmxext to int16

Michael Niedermayer git at videolan.org
Wed Jan 22 23:34:49 CET 2014


ffmpeg | branch: master | Michael Niedermayer <michaelni at gmx.at> | Wed Jan 22 22:55:49 2014 +0100| [fad49aae28cf3e47791f03a04d7aad328a6d6fdf] | committer: Michael Niedermayer

avcodec/x86/lossless_videodsp: Port sub_hfyu_median_prediction_mmxext to int16

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fad49aae28cf3e47791f03a04d7aad328a6d6fdf
---

 libavcodec/x86/lossless_videodsp_init.c |   48 +++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/libavcodec/x86/lossless_videodsp_init.c b/libavcodec/x86/lossless_videodsp_init.c
index 4eca2a1..eac3395 100644
--- a/libavcodec/x86/lossless_videodsp_init.c
+++ b/libavcodec/x86/lossless_videodsp_init.c
@@ -20,6 +20,8 @@
 
 #include "../lossless_videodsp.h"
 #include "libavutil/x86/cpu.h"
+#include "libavutil/x86/asm.h"
+#include "libavcodec/mathops.h"
 
 void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
 void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
@@ -29,6 +31,51 @@ int ff_add_hfyu_left_prediction_int16_ssse3(uint16_t *dst, const uint16_t *src,
 int ff_add_hfyu_left_prediction_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int acc);
 void ff_add_hfyu_median_prediction_int16_mmxext(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top);
 
+static void sub_hfyu_median_prediction_int16_mmxext(uint16_t *dst, const uint16_t *src1,
+                                                    const uint16_t *src2, unsigned mask, int w,
+                                                    int *left, int *left_top)
+{
+    x86_reg i=0;
+    uint16_t l, lt;
+
+    __asm__ volatile(
+        "movd %5, %%mm7                 \n\t"
+        "pshufw $0, %%mm7, %%mm7        \n\t"
+        "movq  (%1, %0), %%mm0          \n\t" // LT
+        "psllq $16, %%mm0                \n\t"
+        "1:                             \n\t"
+        "movq  (%1, %0), %%mm1          \n\t" // T
+        "movq  -2(%2, %0), %%mm2        \n\t" // L
+        "movq  (%2, %0), %%mm3          \n\t" // X
+        "movq %%mm2, %%mm4              \n\t" // L
+        "psubw %%mm0, %%mm2             \n\t"
+        "paddw %%mm1, %%mm2             \n\t" // L + T - LT
+        "pand %%mm7, %%mm2              \n\t"
+        "movq %%mm4, %%mm5              \n\t" // L
+        "pmaxsw %%mm1, %%mm4            \n\t" // max(T, L)
+        "pminsw %%mm5, %%mm1            \n\t" // min(T, L)
+        "pminsw %%mm2, %%mm4            \n\t"
+        "pmaxsw %%mm1, %%mm4            \n\t"
+        "psubw %%mm4, %%mm3             \n\t" // dst - pred
+        "pand %%mm7, %%mm3              \n\t"
+        "movq %%mm3, (%3, %0)           \n\t"
+        "add $8, %0                     \n\t"
+        "movq -2(%1, %0), %%mm0         \n\t" // LT
+        "cmp %4, %0                     \n\t"
+        " jb 1b                         \n\t"
+        : "+r" (i)
+        : "r"(src1), "r"(src2), "r"(dst), "r"((x86_reg)2*w), "rm"(mask)
+    );
+
+    l= *left;
+    lt= *left_top;
+
+    dst[0]= src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt)&mask);
+
+    *left_top= src1[w-1];
+    *left    = src2[w-1];
+}
+
 void ff_llviddsp_init_x86(LLVidDSPContext *c)
 {
     int cpu_flags = av_get_cpu_flags();
@@ -40,6 +87,7 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c)
 
     if (EXTERNAL_MMXEXT(cpu_flags)) {
         c->add_hfyu_median_prediction_int16 = ff_add_hfyu_median_prediction_int16_mmxext;
+        c->sub_hfyu_median_prediction_int16 = sub_hfyu_median_prediction_int16_mmxext;
     }
 
     if (EXTERNAL_SSE2(cpu_flags)) {



More information about the ffmpeg-cvslog mailing list