[FFmpeg-cvslog] Merge commit '1dfc3cf89d0eb026af28be46294b85d79499ffb5'

James Almer git at videolan.org
Tue Jan 31 19:51:45 EET 2017


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Tue Jan 31 14:49:29 2017 -0300| [ca8a3978e57c7c8f6abab8547f47483e407469b7] | committer: James Almer

Merge commit '1dfc3cf89d0eb026af28be46294b85d79499ffb5'

* commit '1dfc3cf89d0eb026af28be46294b85d79499ffb5':
  x86: hpeldsp: Split off VP3-specific bits into a separate file

Merged-by: James Almer <jamrial at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ca8a3978e57c7c8f6abab8547f47483e407469b7
---

 libavcodec/x86/Makefile           |   2 +
 libavcodec/x86/hpeldsp.asm        |  89 ------------------------------
 libavcodec/x86/hpeldsp.h          |   4 ++
 libavcodec/x86/hpeldsp_init.c     |  25 ++-------
 libavcodec/x86/hpeldsp_vp3.asm    | 111 ++++++++++++++++++++++++++++++++++++++
 libavcodec/x86/hpeldsp_vp3_init.c |  56 +++++++++++++++++++
 6 files changed, 176 insertions(+), 111 deletions(-)

diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 2f0354a..2864952 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -67,6 +67,7 @@ OBJS-$(CONFIG_TTA_ENCODER)             += x86/ttaencdsp_init.o
 OBJS-$(CONFIG_V210_DECODER)            += x86/v210-init.o
 OBJS-$(CONFIG_V210_ENCODER)            += x86/v210enc_init.o
 OBJS-$(CONFIG_VORBIS_DECODER)          += x86/vorbisdsp_init.o
+OBJS-$(CONFIG_VP3_DECODER)             += x86/hpeldsp_vp3_init.o
 OBJS-$(CONFIG_VP6_DECODER)             += x86/vp6dsp_init.o
 OBJS-$(CONFIG_VP9_DECODER)             += x86/vp9dsp_init.o            \
                                           x86/vp9dsp_init_10bpp.o      \
@@ -169,6 +170,7 @@ YASM-OBJS-$(CONFIG_TTA_ENCODER)        += x86/ttaencdsp.o
 YASM-OBJS-$(CONFIG_V210_ENCODER)       += x86/v210enc.o
 YASM-OBJS-$(CONFIG_V210_DECODER)       += x86/v210.o
 YASM-OBJS-$(CONFIG_VORBIS_DECODER)     += x86/vorbisdsp.o
+YASM-OBJS-$(CONFIG_VP3_DECODER)        += x86/hpeldsp_vp3.o
 YASM-OBJS-$(CONFIG_VP6_DECODER)        += x86/vp6dsp.o
 YASM-OBJS-$(CONFIG_VP9_DECODER)        += x86/vp9intrapred.o            \
                                           x86/vp9intrapred_16bpp.o      \
diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm
index 82fb893..ce5d7a4 100644
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@@ -175,53 +175,6 @@ INIT_MMX 3dnow
 PUT_NO_RND_PIXELS8_X2
 
 
-; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
-%macro PUT_NO_RND_PIXELS8_X2_EXACT 0
-cglobal put_no_rnd_pixels8_x2_exact, 4,5
-    lea          r4, [r2*3]
-    pcmpeqb      m6, m6
-.loop:
-    mova         m0, [r1]
-    mova         m2, [r1+r2]
-    mova         m1, [r1+1]
-    mova         m3, [r1+r2+1]
-    pxor         m0, m6
-    pxor         m2, m6
-    pxor         m1, m6
-    pxor         m3, m6
-    PAVGB        m0, m1
-    PAVGB        m2, m3
-    pxor         m0, m6
-    pxor         m2, m6
-    mova       [r0], m0
-    mova    [r0+r2], m2
-    mova         m0, [r1+r2*2]
-    mova         m1, [r1+r2*2+1]
-    mova         m2, [r1+r4]
-    mova         m3, [r1+r4+1]
-    pxor         m0, m6
-    pxor         m1, m6
-    pxor         m2, m6
-    pxor         m3, m6
-    PAVGB        m0, m1
-    PAVGB        m2, m3
-    pxor         m0, m6
-    pxor         m2, m6
-    mova  [r0+r2*2], m0
-    mova    [r0+r4], m2
-    lea          r1, [r1+r2*4]
-    lea          r0, [r0+r2*4]
-    sub         r3d, 4
-    jg .loop
-    REP_RET
-%endmacro
-
-INIT_MMX mmxext
-PUT_NO_RND_PIXELS8_X2_EXACT
-INIT_MMX 3dnow
-PUT_NO_RND_PIXELS8_X2_EXACT
-
-
 ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 %macro PUT_PIXELS8_Y2 0
 %if cpuflag(sse2)
@@ -300,48 +253,6 @@ INIT_MMX 3dnow
 PUT_NO_RND_PIXELS8_Y2
 
 
-; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
-%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0
-cglobal put_no_rnd_pixels8_y2_exact, 4,5
-    lea          r4, [r2*3]
-    mova         m0, [r1]
-    pcmpeqb      m6, m6
-    add          r1, r2
-    pxor         m0, m6
-.loop:
-    mova         m1, [r1]
-    mova         m2, [r1+r2]
-    pxor         m1, m6
-    pxor         m2, m6
-    PAVGB        m0, m1
-    PAVGB        m1, m2
-    pxor         m0, m6
-    pxor         m1, m6
-    mova       [r0], m0
-    mova    [r0+r2], m1
-    mova         m1, [r1+r2*2]
-    mova         m0, [r1+r4]
-    pxor         m1, m6
-    pxor         m0, m6
-    PAVGB        m2, m1
-    PAVGB        m1, m0
-    pxor         m2, m6
-    pxor         m1, m6
-    mova  [r0+r2*2], m2
-    mova    [r0+r4], m1
-    lea          r1, [r1+r2*4]
-    lea          r0, [r0+r2*4]
-    sub         r3d, 4
-    jg .loop
-    REP_RET
-%endmacro
-
-INIT_MMX mmxext
-PUT_NO_RND_PIXELS8_Y2_EXACT
-INIT_MMX 3dnow
-PUT_NO_RND_PIXELS8_Y2_EXACT
-
-
 ; void ff_avg_pixels8(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 %macro AVG_PIXELS8 0
 cglobal avg_pixels8, 4,5
diff --git a/libavcodec/x86/hpeldsp.h b/libavcodec/x86/hpeldsp.h
index 5fae990..bf97029 100644
--- a/libavcodec/x86/hpeldsp.h
+++ b/libavcodec/x86/hpeldsp.h
@@ -22,6 +22,8 @@
 #include <stddef.h>
 #include <stdint.h>
 
+#include "libavcodec/hpeldsp.h"
+
 void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
                            ptrdiff_t line_size, int h);
 
@@ -50,4 +52,6 @@ void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels,
 void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
                                ptrdiff_t line_size, int h);
 
+void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags);
+
 #endif /* AVCODEC_X86_HPELDSP_H */
diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c
index f1ba4be..e8da184 100644
--- a/libavcodec/x86/hpeldsp_init.c
+++ b/libavcodec/x86/hpeldsp_init.c
@@ -51,12 +51,6 @@ void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
                                      ptrdiff_t line_size, int h);
 void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
                                     ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
-                                           const uint8_t *pixels,
-                                           ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block,
-                                          const uint8_t *pixels,
-                                          ptrdiff_t line_size, int h);
 void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
                               ptrdiff_t line_size, int h);
 void ff_put_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
@@ -65,12 +59,6 @@ void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
                                      ptrdiff_t line_size, int h);
 void ff_put_no_rnd_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
                                     ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
-                                           const uint8_t *pixels,
-                                           ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block,
-                                          const uint8_t *pixels,
-                                          ptrdiff_t line_size, int h);
 void ff_avg_pixels8_3dnow(uint8_t *block, const uint8_t *pixels,
                           ptrdiff_t line_size, int h);
 void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
@@ -242,11 +230,6 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags)
         c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_mmxext;
         c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_mmxext;
     }
-
-    if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) {
-        c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
-        c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
-    }
 #endif /* HAVE_MMXEXT_EXTERNAL */
 }
 
@@ -278,11 +261,6 @@ static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int cpu_flags)
         c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_3dnow;
         c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_3dnow;
     }
-
-    if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) {
-        c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
-        c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
-    }
 #endif /* HAVE_AMD3DNOW_EXTERNAL */
 }
 
@@ -332,4 +310,7 @@ av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
 
     if (EXTERNAL_SSSE3(cpu_flags))
         hpeldsp_init_ssse3(c, flags, cpu_flags);
+
+    if (CONFIG_VP3_DECODER)
+        ff_hpeldsp_vp3_init_x86(c, cpu_flags, flags);
 }
diff --git a/libavcodec/x86/hpeldsp_vp3.asm b/libavcodec/x86/hpeldsp_vp3.asm
new file mode 100644
index 0000000..cba96d0
--- /dev/null
+++ b/libavcodec/x86/hpeldsp_vp3.asm
@@ -0,0 +1,111 @@
+;******************************************************************************
+;* SIMD-optimized halfpel functions for VP3
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+%macro PUT_NO_RND_PIXELS8_X2_EXACT 0
+cglobal put_no_rnd_pixels8_x2_exact, 4,5
+    lea          r4, [r2*3]
+    pcmpeqb      m6, m6
+.loop:
+    mova         m0, [r1]
+    mova         m2, [r1+r2]
+    mova         m1, [r1+1]
+    mova         m3, [r1+r2+1]
+    pxor         m0, m6
+    pxor         m2, m6
+    pxor         m1, m6
+    pxor         m3, m6
+    PAVGB        m0, m1
+    PAVGB        m2, m3
+    pxor         m0, m6
+    pxor         m2, m6
+    mova       [r0], m0
+    mova    [r0+r2], m2
+    mova         m0, [r1+r2*2]
+    mova         m1, [r1+r2*2+1]
+    mova         m2, [r1+r4]
+    mova         m3, [r1+r4+1]
+    pxor         m0, m6
+    pxor         m1, m6
+    pxor         m2, m6
+    pxor         m3, m6
+    PAVGB        m0, m1
+    PAVGB        m2, m3
+    pxor         m0, m6
+    pxor         m2, m6
+    mova  [r0+r2*2], m0
+    mova    [r0+r4], m2
+    lea          r1, [r1+r2*4]
+    lea          r0, [r0+r2*4]
+    sub         r3d, 4
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmxext
+PUT_NO_RND_PIXELS8_X2_EXACT
+INIT_MMX 3dnow
+PUT_NO_RND_PIXELS8_X2_EXACT
+
+
+; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0
+cglobal put_no_rnd_pixels8_y2_exact, 4,5
+    lea          r4, [r2*3]
+    mova         m0, [r1]
+    pcmpeqb      m6, m6
+    add          r1, r2
+    pxor         m0, m6
+.loop:
+    mova         m1, [r1]
+    mova         m2, [r1+r2]
+    pxor         m1, m6
+    pxor         m2, m6
+    PAVGB        m0, m1
+    PAVGB        m1, m2
+    pxor         m0, m6
+    pxor         m1, m6
+    mova       [r0], m0
+    mova    [r0+r2], m1
+    mova         m1, [r1+r2*2]
+    mova         m0, [r1+r4]
+    pxor         m1, m6
+    pxor         m0, m6
+    PAVGB        m2, m1
+    PAVGB        m1, m0
+    pxor         m2, m6
+    pxor         m1, m6
+    mova  [r0+r2*2], m2
+    mova    [r0+r4], m1
+    lea          r1, [r1+r2*4]
+    lea          r0, [r0+r2*4]
+    sub         r3d, 4
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmxext
+PUT_NO_RND_PIXELS8_Y2_EXACT
+INIT_MMX 3dnow
+PUT_NO_RND_PIXELS8_Y2_EXACT
diff --git a/libavcodec/x86/hpeldsp_vp3_init.c b/libavcodec/x86/hpeldsp_vp3_init.c
new file mode 100644
index 0000000..5979f41
--- /dev/null
+++ b/libavcodec/x86/hpeldsp_vp3_init.c
@@ -0,0 +1,56 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+
+#include "libavcodec/avcodec.h"
+#include "libavcodec/hpeldsp.h"
+
+#include "hpeldsp.h"
+
+void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
+                                           const uint8_t *pixels,
+                                           ptrdiff_t line_size, int h);
+void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block,
+                                          const uint8_t *pixels,
+                                          ptrdiff_t line_size, int h);
+void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
+                                           const uint8_t *pixels,
+                                           ptrdiff_t line_size, int h);
+void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block,
+                                          const uint8_t *pixels,
+                                          ptrdiff_t line_size, int h);
+
+av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags)
+{
+    if (EXTERNAL_AMD3DNOW(cpu_flags)) {
+        if (flags & AV_CODEC_FLAG_BITEXACT) {
+            c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
+            c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
+        }
+    }
+
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
+        if (flags & AV_CODEC_FLAG_BITEXACT) {
+            c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
+            c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
+        }
+    }
+}


======================================================================

diff --cc libavcodec/x86/Makefile
index 2f0354a,3208699..2864952
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@@ -57,22 -46,14 +57,23 @@@ OBJS-$(CONFIG_MLP_DECODER)             
  OBJS-$(CONFIG_MPEG4_DECODER)           += x86/xvididct_init.o
  OBJS-$(CONFIG_PNG_DECODER)             += x86/pngdsp_init.o
  OBJS-$(CONFIG_PRORES_DECODER)          += x86/proresdsp_init.o
 +OBJS-$(CONFIG_PRORES_LGPL_DECODER)     += x86/proresdsp_init.o
  OBJS-$(CONFIG_RV40_DECODER)            += x86/rv40dsp_init.o
 -OBJS-$(CONFIG_SVQ1_ENCODER)            += x86/svq1enc.o
 -OBJS-$(CONFIG_TRUEHD_DECODER)          += x86/mlpdsp.o
 +OBJS-$(CONFIG_SVQ1_ENCODER)            += x86/svq1enc_init.o
 +OBJS-$(CONFIG_TAK_DECODER)             += x86/takdsp_init.o
 +OBJS-$(CONFIG_TRUEHD_DECODER)          += x86/mlpdsp_init.o
 +OBJS-$(CONFIG_TTA_DECODER)             += x86/ttadsp_init.o
 +OBJS-$(CONFIG_TTA_ENCODER)             += x86/ttaencdsp_init.o
 +OBJS-$(CONFIG_V210_DECODER)            += x86/v210-init.o
  OBJS-$(CONFIG_V210_ENCODER)            += x86/v210enc_init.o
  OBJS-$(CONFIG_VORBIS_DECODER)          += x86/vorbisdsp_init.o
+ OBJS-$(CONFIG_VP3_DECODER)             += x86/hpeldsp_vp3_init.o
  OBJS-$(CONFIG_VP6_DECODER)             += x86/vp6dsp_init.o
 -OBJS-$(CONFIG_VP9_DECODER)             += x86/vp9dsp_init.o
 +OBJS-$(CONFIG_VP9_DECODER)             += x86/vp9dsp_init.o            \
 +                                          x86/vp9dsp_init_10bpp.o      \
 +                                          x86/vp9dsp_init_12bpp.o      \
 +                                          x86/vp9dsp_init_16bpp.o
 +OBJS-$(CONFIG_WEBP_DECODER)            += x86/vp8dsp_init.o
  
  
  # GCC inline assembly optimizations
@@@ -133,49 -111,18 +134,50 @@@ YASM-OBJS-$(CONFIG_VP8DSP)             
                                            x86/vp8dsp_loopfilter.o
  
  # decoders/encoders
 -YASM-OBJS-$(CONFIG_AAC_DECODER)        += x86/sbrdsp.o
 -YASM-OBJS-$(CONFIG_APE_DECODER)        += x86/apedsp.o
 -YASM-OBJS-$(CONFIG_DCA_DECODER)        += x86/dcadsp.o
 +YASM-OBJS-$(CONFIG_AAC_DECODER)        += x86/aacpsdsp.o                \
 +                                          x86/sbrdsp.o
 +YASM-OBJS-$(CONFIG_AAC_ENCODER)        += x86/aacencdsp.o
 +YASM-OBJS-$(CONFIG_ADPCM_G722_DECODER) += x86/g722dsp.o
 +YASM-OBJS-$(CONFIG_ADPCM_G722_ENCODER) += x86/g722dsp.o
 +YASM-OBJS-$(CONFIG_ALAC_DECODER)       += x86/alacdsp.o
 +YASM-OBJS-$(CONFIG_APNG_DECODER)       += x86/pngdsp.o
 +YASM-OBJS-$(CONFIG_DCA_DECODER)        += x86/dcadsp.o x86/synth_filter.o
 +YASM-OBJS-$(CONFIG_DIRAC_DECODER)      += x86/diracdsp.o                \
 +                                          x86/dirac_dwt.o
  YASM-OBJS-$(CONFIG_DNXHD_ENCODER)      += x86/dnxhdenc.o
 -YASM-OBJS-$(CONFIG_HEVC_DECODER)       += x86/hevc_deblock.o            \
 -                                          x86/hevc_mc.o                 \
 -                                          x86/hevc_idct.o
 +YASM-OBJS-$(CONFIG_FLAC_DECODER)       += x86/flacdsp.o
 +ifdef CONFIG_GPL
 +YASM-OBJS-$(CONFIG_FLAC_ENCODER)       += x86/flac_dsp_gpl.o
 +endif
 +YASM-OBJS-$(CONFIG_HEVC_DECODER)       += x86/hevc_mc.o                 \
 +                                          x86/hevc_deblock.o            \
 +                                          x86/hevc_idct.o               \
 +                                          x86/hevc_res_add.o            \
 +                                          x86/hevc_sao.o                \
 +                                          x86/hevc_sao_10bit.o
 +YASM-OBJS-$(CONFIG_JPEG2000_DECODER)   += x86/jpeg2000dsp.o
 +YASM-OBJS-$(CONFIG_MLP_DECODER)        += x86/mlpdsp.o
 +YASM-OBJS-$(CONFIG_MPEG4_DECODER)      += x86/xvididct.o
  YASM-OBJS-$(CONFIG_PNG_DECODER)        += x86/pngdsp.o
  YASM-OBJS-$(CONFIG_PRORES_DECODER)     += x86/proresdsp.o
 +YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
  YASM-OBJS-$(CONFIG_RV40_DECODER)       += x86/rv40dsp.o
 +YASM-OBJS-$(CONFIG_SVQ1_ENCODER)       += x86/svq1enc.o
 +YASM-OBJS-$(CONFIG_TAK_DECODER)        += x86/takdsp.o
 +YASM-OBJS-$(CONFIG_TRUEHD_DECODER)     += x86/mlpdsp.o
 +YASM-OBJS-$(CONFIG_TTA_DECODER)        += x86/ttadsp.o
 +YASM-OBJS-$(CONFIG_TTA_ENCODER)        += x86/ttaencdsp.o
  YASM-OBJS-$(CONFIG_V210_ENCODER)       += x86/v210enc.o
 +YASM-OBJS-$(CONFIG_V210_DECODER)       += x86/v210.o
  YASM-OBJS-$(CONFIG_VORBIS_DECODER)     += x86/vorbisdsp.o
+ YASM-OBJS-$(CONFIG_VP3_DECODER)        += x86/hpeldsp_vp3.o
  YASM-OBJS-$(CONFIG_VP6_DECODER)        += x86/vp6dsp.o
 -YASM-OBJS-$(CONFIG_VP9_DECODER)        += x86/vp9dsp.o
 +YASM-OBJS-$(CONFIG_VP9_DECODER)        += x86/vp9intrapred.o            \
 +                                          x86/vp9intrapred_16bpp.o      \
 +                                          x86/vp9itxfm.o                \
 +                                          x86/vp9itxfm_16bpp.o          \
 +                                          x86/vp9lpf.o                  \
 +                                          x86/vp9lpf_16bpp.o            \
 +                                          x86/vp9mc.o                   \
 +                                          x86/vp9mc_16bpp.o
 +YASM-OBJS-$(CONFIG_WEBP_DECODER)       += x86/vp8dsp.o
diff --cc libavcodec/x86/hpeldsp.asm
index 82fb893,8e21114..ce5d7a4
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@@@ -175,66 -142,15 +175,19 @@@ INIT_MMX 3dno
  PUT_NO_RND_PIXELS8_X2
  
  
- ; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
- %macro PUT_NO_RND_PIXELS8_X2_EXACT 0
- cglobal put_no_rnd_pixels8_x2_exact, 4,5
-     lea          r4, [r2*3]
-     pcmpeqb      m6, m6
- .loop:
-     mova         m0, [r1]
-     mova         m2, [r1+r2]
-     mova         m1, [r1+1]
-     mova         m3, [r1+r2+1]
-     pxor         m0, m6
-     pxor         m2, m6
-     pxor         m1, m6
-     pxor         m3, m6
-     PAVGB        m0, m1
-     PAVGB        m2, m3
-     pxor         m0, m6
-     pxor         m2, m6
-     mova       [r0], m0
-     mova    [r0+r2], m2
-     mova         m0, [r1+r2*2]
-     mova         m1, [r1+r2*2+1]
-     mova         m2, [r1+r4]
-     mova         m3, [r1+r4+1]
-     pxor         m0, m6
-     pxor         m1, m6
-     pxor         m2, m6
-     pxor         m3, m6
-     PAVGB        m0, m1
-     PAVGB        m2, m3
-     pxor         m0, m6
-     pxor         m2, m6
-     mova  [r0+r2*2], m0
-     mova    [r0+r4], m2
-     lea          r1, [r1+r2*4]
-     lea          r0, [r0+r2*4]
-     sub         r3d, 4
-     jg .loop
-     REP_RET
- %endmacro
- 
- INIT_MMX mmxext
- PUT_NO_RND_PIXELS8_X2_EXACT
- INIT_MMX 3dnow
- PUT_NO_RND_PIXELS8_X2_EXACT
- 
- 
  ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
  %macro PUT_PIXELS8_Y2 0
 +%if cpuflag(sse2)
 +cglobal put_pixels16_y2, 4,5,3
 +%else
  cglobal put_pixels8_y2, 4,5
 +%endif
      lea          r4, [r2*2]
 -    mova         m0, [r1]
 +    movu         m0, [r1]
      sub          r0, r2
  .loop:
 -    mova         m1, [r1+r2]
 -    mova         m2, [r1+r4]
 +    movu         m1, [r1+r2]
 +    movu         m2, [r1+r4]
      add          r1, r4
      PAVGB        m0, m1
      PAVGB        m1, m2
diff --cc libavcodec/x86/hpeldsp.h
index 5fae990,d624ed9..bf97029
--- a/libavcodec/x86/hpeldsp.h
+++ b/libavcodec/x86/hpeldsp.h
@@@ -41,13 -34,9 +43,15 @@@ void ff_avg_pixels16_xy2_ssse3(uint8_t 
  
  void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
                              ptrdiff_t line_size, int h);
 +void ff_put_pixels8_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
 +                              ptrdiff_t line_size, int h);
  void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
                               ptrdiff_t line_size, int h);
 +void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels,
 +                              ptrdiff_t line_size, int h);
 +void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
 +                               ptrdiff_t line_size, int h);
  
+ void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags);
+ 
  #endif /* AVCODEC_X86_HPELDSP_H */
diff --cc libavcodec/x86/hpeldsp_init.c
index f1ba4be,6731428..e8da184
--- a/libavcodec/x86/hpeldsp_init.c
+++ b/libavcodec/x86/hpeldsp_init.c
@@@ -239,14 -194,9 +227,9 @@@ static void hpeldsp_init_mmxext(HpelDSP
          c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_mmxext;
          c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_mmxext;
  
 -        c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext;
 -        c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
 +        c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_mmxext;
 +        c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_mmxext;
      }
- 
-     if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) {
-         c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
-         c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
-     }
  #endif /* HAVE_MMXEXT_EXTERNAL */
  }
  
@@@ -275,14 -223,9 +258,9 @@@ static void hpeldsp_init_3dnow(HpelDSPC
          c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_3dnow;
          c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_3dnow;
  
 -        c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
 -        c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_3dnow;
 +        c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_3dnow;
 +        c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_3dnow;
      }
- 
-     if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) {
-         c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
-         c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
-     }
  #endif /* HAVE_AMD3DNOW_EXTERNAL */
  }
  
@@@ -330,6 -257,6 +308,9 @@@ av_cold void ff_hpeldsp_init_x86(HpelDS
      if (EXTERNAL_SSE2(cpu_flags))
          hpeldsp_init_sse2(c, flags, cpu_flags);
  
 +    if (EXTERNAL_SSSE3(cpu_flags))
 +        hpeldsp_init_ssse3(c, flags, cpu_flags);
++
+     if (CONFIG_VP3_DECODER)
+         ff_hpeldsp_vp3_init_x86(c, cpu_flags, flags);
  }
diff --cc libavcodec/x86/hpeldsp_vp3.asm
index 0000000,513f14e..cba96d0
mode 000000,100644..100644
--- a/libavcodec/x86/hpeldsp_vp3.asm
+++ b/libavcodec/x86/hpeldsp_vp3.asm
@@@ -1,0 -1,111 +1,111 @@@
+ ;******************************************************************************
+ ;* SIMD-optimized halfpel functions for VP3
+ ;*
 -;* This file is part of Libav.
++;* This file is part of FFmpeg.
+ ;*
 -;* Libav is free software; you can redistribute it and/or
++;* FFmpeg is free software; you can redistribute it and/or
+ ;* modify it under the terms of the GNU Lesser General Public
+ ;* License as published by the Free Software Foundation; either
+ ;* version 2.1 of the License, or (at your option) any later version.
+ ;*
 -;* Libav is distributed in the hope that it will be useful,
++;* FFmpeg is distributed in the hope that it will be useful,
+ ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ ;* Lesser General Public License for more details.
+ ;*
+ ;* You should have received a copy of the GNU Lesser General Public
 -;* License along with Libav; if not, write to the Free Software
++;* License along with FFmpeg; if not, write to the Free Software
+ ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ;******************************************************************************
+ 
+ %include "libavutil/x86/x86util.asm"
+ 
+ SECTION .text
+ 
+ ; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+ %macro PUT_NO_RND_PIXELS8_X2_EXACT 0
+ cglobal put_no_rnd_pixels8_x2_exact, 4,5
+     lea          r4, [r2*3]
+     pcmpeqb      m6, m6
+ .loop:
+     mova         m0, [r1]
+     mova         m2, [r1+r2]
+     mova         m1, [r1+1]
+     mova         m3, [r1+r2+1]
+     pxor         m0, m6
+     pxor         m2, m6
+     pxor         m1, m6
+     pxor         m3, m6
+     PAVGB        m0, m1
+     PAVGB        m2, m3
+     pxor         m0, m6
+     pxor         m2, m6
+     mova       [r0], m0
+     mova    [r0+r2], m2
+     mova         m0, [r1+r2*2]
+     mova         m1, [r1+r2*2+1]
+     mova         m2, [r1+r4]
+     mova         m3, [r1+r4+1]
+     pxor         m0, m6
+     pxor         m1, m6
+     pxor         m2, m6
+     pxor         m3, m6
+     PAVGB        m0, m1
+     PAVGB        m2, m3
+     pxor         m0, m6
+     pxor         m2, m6
+     mova  [r0+r2*2], m0
+     mova    [r0+r4], m2
+     lea          r1, [r1+r2*4]
+     lea          r0, [r0+r2*4]
+     sub         r3d, 4
+     jg .loop
+     REP_RET
+ %endmacro
+ 
+ INIT_MMX mmxext
+ PUT_NO_RND_PIXELS8_X2_EXACT
+ INIT_MMX 3dnow
+ PUT_NO_RND_PIXELS8_X2_EXACT
+ 
+ 
+ ; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+ %macro PUT_NO_RND_PIXELS8_Y2_EXACT 0
+ cglobal put_no_rnd_pixels8_y2_exact, 4,5
+     lea          r4, [r2*3]
+     mova         m0, [r1]
+     pcmpeqb      m6, m6
+     add          r1, r2
+     pxor         m0, m6
+ .loop:
+     mova         m1, [r1]
+     mova         m2, [r1+r2]
+     pxor         m1, m6
+     pxor         m2, m6
+     PAVGB        m0, m1
+     PAVGB        m1, m2
+     pxor         m0, m6
+     pxor         m1, m6
+     mova       [r0], m0
+     mova    [r0+r2], m1
+     mova         m1, [r1+r2*2]
+     mova         m0, [r1+r4]
+     pxor         m1, m6
+     pxor         m0, m6
+     PAVGB        m2, m1
+     PAVGB        m1, m0
+     pxor         m2, m6
+     pxor         m1, m6
+     mova  [r0+r2*2], m2
+     mova    [r0+r4], m1
+     lea          r1, [r1+r2*4]
+     lea          r0, [r0+r2*4]
+     sub         r3d, 4
+     jg .loop
+     REP_RET
+ %endmacro
+ 
+ INIT_MMX mmxext
+ PUT_NO_RND_PIXELS8_Y2_EXACT
+ INIT_MMX 3dnow
+ PUT_NO_RND_PIXELS8_Y2_EXACT
diff --cc libavcodec/x86/hpeldsp_vp3_init.c
index 0000000,2510c11..5979f41
mode 000000,100644..100644
--- a/libavcodec/x86/hpeldsp_vp3_init.c
+++ b/libavcodec/x86/hpeldsp_vp3_init.c
@@@ -1,0 -1,56 +1,56 @@@
+ /*
 - * This file is part of Libav.
++ * This file is part of FFmpeg.
+  *
 - * Libav is free software; you can redistribute it and/or
++ * FFmpeg is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2.1 of the License, or (at your option) any later version.
+  *
 - * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
 - * License along with Libav; if not, write to the Free Software
++ * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ 
+ #include "libavutil/attributes.h"
+ #include "libavutil/cpu.h"
+ #include "libavutil/x86/cpu.h"
+ 
+ #include "libavcodec/avcodec.h"
+ #include "libavcodec/hpeldsp.h"
+ 
+ #include "hpeldsp.h"
+ 
+ void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
+                                            const uint8_t *pixels,
+                                            ptrdiff_t line_size, int h);
+ void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block,
+                                           const uint8_t *pixels,
+                                           ptrdiff_t line_size, int h);
+ void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
+                                            const uint8_t *pixels,
+                                            ptrdiff_t line_size, int h);
+ void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block,
+                                           const uint8_t *pixels,
+                                           ptrdiff_t line_size, int h);
+ 
+ av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags)
+ {
+     if (EXTERNAL_AMD3DNOW(cpu_flags)) {
+         if (flags & AV_CODEC_FLAG_BITEXACT) {
+             c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
+             c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
+         }
+     }
+ 
+     if (EXTERNAL_MMXEXT(cpu_flags)) {
+         if (flags & AV_CODEC_FLAG_BITEXACT) {
+             c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
+             c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
+         }
+     }
+ }



More information about the ffmpeg-cvslog mailing list