[FFmpeg-cvslog] Merge commit '1dfc3cf89d0eb026af28be46294b85d79499ffb5'
James Almer
git at videolan.org
Tue Jan 31 19:51:45 EET 2017
ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Tue Jan 31 14:49:29 2017 -0300| [ca8a3978e57c7c8f6abab8547f47483e407469b7] | committer: James Almer
Merge commit '1dfc3cf89d0eb026af28be46294b85d79499ffb5'
* commit '1dfc3cf89d0eb026af28be46294b85d79499ffb5':
x86: hpeldsp: Split off VP3-specific bits into a separate file
Merged-by: James Almer <jamrial at gmail.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ca8a3978e57c7c8f6abab8547f47483e407469b7
---
libavcodec/x86/Makefile | 2 +
libavcodec/x86/hpeldsp.asm | 89 ------------------------------
libavcodec/x86/hpeldsp.h | 4 ++
libavcodec/x86/hpeldsp_init.c | 25 ++-------
libavcodec/x86/hpeldsp_vp3.asm | 111 ++++++++++++++++++++++++++++++++++++++
libavcodec/x86/hpeldsp_vp3_init.c | 56 +++++++++++++++++++
6 files changed, 176 insertions(+), 111 deletions(-)
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 2f0354a..2864952 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -67,6 +67,7 @@ OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp_init.o
OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o
OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o
OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o
+OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3_init.o
OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp_init.o
OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o \
x86/vp9dsp_init_10bpp.o \
@@ -169,6 +170,7 @@ YASM-OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp.o
YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o
YASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o
YASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o
+YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3.o
YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp.o
YASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9intrapred.o \
x86/vp9intrapred_16bpp.o \
diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm
index 82fb893..ce5d7a4 100644
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@@ -175,53 +175,6 @@ INIT_MMX 3dnow
PUT_NO_RND_PIXELS8_X2
-; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
-%macro PUT_NO_RND_PIXELS8_X2_EXACT 0
-cglobal put_no_rnd_pixels8_x2_exact, 4,5
- lea r4, [r2*3]
- pcmpeqb m6, m6
-.loop:
- mova m0, [r1]
- mova m2, [r1+r2]
- mova m1, [r1+1]
- mova m3, [r1+r2+1]
- pxor m0, m6
- pxor m2, m6
- pxor m1, m6
- pxor m3, m6
- PAVGB m0, m1
- PAVGB m2, m3
- pxor m0, m6
- pxor m2, m6
- mova [r0], m0
- mova [r0+r2], m2
- mova m0, [r1+r2*2]
- mova m1, [r1+r2*2+1]
- mova m2, [r1+r4]
- mova m3, [r1+r4+1]
- pxor m0, m6
- pxor m1, m6
- pxor m2, m6
- pxor m3, m6
- PAVGB m0, m1
- PAVGB m2, m3
- pxor m0, m6
- pxor m2, m6
- mova [r0+r2*2], m0
- mova [r0+r4], m2
- lea r1, [r1+r2*4]
- lea r0, [r0+r2*4]
- sub r3d, 4
- jg .loop
- REP_RET
-%endmacro
-
-INIT_MMX mmxext
-PUT_NO_RND_PIXELS8_X2_EXACT
-INIT_MMX 3dnow
-PUT_NO_RND_PIXELS8_X2_EXACT
-
-
; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PUT_PIXELS8_Y2 0
%if cpuflag(sse2)
@@ -300,48 +253,6 @@ INIT_MMX 3dnow
PUT_NO_RND_PIXELS8_Y2
-; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
-%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0
-cglobal put_no_rnd_pixels8_y2_exact, 4,5
- lea r4, [r2*3]
- mova m0, [r1]
- pcmpeqb m6, m6
- add r1, r2
- pxor m0, m6
-.loop:
- mova m1, [r1]
- mova m2, [r1+r2]
- pxor m1, m6
- pxor m2, m6
- PAVGB m0, m1
- PAVGB m1, m2
- pxor m0, m6
- pxor m1, m6
- mova [r0], m0
- mova [r0+r2], m1
- mova m1, [r1+r2*2]
- mova m0, [r1+r4]
- pxor m1, m6
- pxor m0, m6
- PAVGB m2, m1
- PAVGB m1, m0
- pxor m2, m6
- pxor m1, m6
- mova [r0+r2*2], m2
- mova [r0+r4], m1
- lea r1, [r1+r2*4]
- lea r0, [r0+r2*4]
- sub r3d, 4
- jg .loop
- REP_RET
-%endmacro
-
-INIT_MMX mmxext
-PUT_NO_RND_PIXELS8_Y2_EXACT
-INIT_MMX 3dnow
-PUT_NO_RND_PIXELS8_Y2_EXACT
-
-
; void ff_avg_pixels8(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro AVG_PIXELS8 0
cglobal avg_pixels8, 4,5
diff --git a/libavcodec/x86/hpeldsp.h b/libavcodec/x86/hpeldsp.h
index 5fae990..bf97029 100644
--- a/libavcodec/x86/hpeldsp.h
+++ b/libavcodec/x86/hpeldsp.h
@@ -22,6 +22,8 @@
#include <stddef.h>
#include <stdint.h>
+#include "libavcodec/hpeldsp.h"
+
void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
@@ -50,4 +52,6 @@ void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels,
void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
+void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags);
+
#endif /* AVCODEC_X86_HPELDSP_H */
diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c
index f1ba4be..e8da184 100644
--- a/libavcodec/x86/hpeldsp_init.c
+++ b/libavcodec/x86/hpeldsp_init.c
@@ -51,12 +51,6 @@ void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
- const uint8_t *pixels,
- ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block,
- const uint8_t *pixels,
- ptrdiff_t line_size, int h);
void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
@@ -65,12 +59,6 @@ void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
- const uint8_t *pixels,
- ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block,
- const uint8_t *pixels,
- ptrdiff_t line_size, int h);
void ff_avg_pixels8_3dnow(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
@@ -242,11 +230,6 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags)
c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_mmxext;
c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_mmxext;
}
-
- if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) {
- c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
- c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
- }
#endif /* HAVE_MMXEXT_EXTERNAL */
}
@@ -278,11 +261,6 @@ static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int cpu_flags)
c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_3dnow;
c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_3dnow;
}
-
- if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) {
- c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
- c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
- }
#endif /* HAVE_AMD3DNOW_EXTERNAL */
}
@@ -332,4 +310,7 @@ av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
if (EXTERNAL_SSSE3(cpu_flags))
hpeldsp_init_ssse3(c, flags, cpu_flags);
+
+ if (CONFIG_VP3_DECODER)
+ ff_hpeldsp_vp3_init_x86(c, cpu_flags, flags);
}
diff --git a/libavcodec/x86/hpeldsp_vp3.asm b/libavcodec/x86/hpeldsp_vp3.asm
new file mode 100644
index 0000000..cba96d0
--- /dev/null
+++ b/libavcodec/x86/hpeldsp_vp3.asm
@@ -0,0 +1,111 @@
+;******************************************************************************
+;* SIMD-optimized halfpel functions for VP3
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+%macro PUT_NO_RND_PIXELS8_X2_EXACT 0
+cglobal put_no_rnd_pixels8_x2_exact, 4,5
+ lea r4, [r2*3]
+ pcmpeqb m6, m6
+.loop:
+ mova m0, [r1]
+ mova m2, [r1+r2]
+ mova m1, [r1+1]
+ mova m3, [r1+r2+1]
+ pxor m0, m6
+ pxor m2, m6
+ pxor m1, m6
+ pxor m3, m6
+ PAVGB m0, m1
+ PAVGB m2, m3
+ pxor m0, m6
+ pxor m2, m6
+ mova [r0], m0
+ mova [r0+r2], m2
+ mova m0, [r1+r2*2]
+ mova m1, [r1+r2*2+1]
+ mova m2, [r1+r4]
+ mova m3, [r1+r4+1]
+ pxor m0, m6
+ pxor m1, m6
+ pxor m2, m6
+ pxor m3, m6
+ PAVGB m0, m1
+ PAVGB m2, m3
+ pxor m0, m6
+ pxor m2, m6
+ mova [r0+r2*2], m0
+ mova [r0+r4], m2
+ lea r1, [r1+r2*4]
+ lea r0, [r0+r2*4]
+ sub r3d, 4
+ jg .loop
+ REP_RET
+%endmacro
+
+INIT_MMX mmxext
+PUT_NO_RND_PIXELS8_X2_EXACT
+INIT_MMX 3dnow
+PUT_NO_RND_PIXELS8_X2_EXACT
+
+
+; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0
+cglobal put_no_rnd_pixels8_y2_exact, 4,5
+ lea r4, [r2*3]
+ mova m0, [r1]
+ pcmpeqb m6, m6
+ add r1, r2
+ pxor m0, m6
+.loop:
+ mova m1, [r1]
+ mova m2, [r1+r2]
+ pxor m1, m6
+ pxor m2, m6
+ PAVGB m0, m1
+ PAVGB m1, m2
+ pxor m0, m6
+ pxor m1, m6
+ mova [r0], m0
+ mova [r0+r2], m1
+ mova m1, [r1+r2*2]
+ mova m0, [r1+r4]
+ pxor m1, m6
+ pxor m0, m6
+ PAVGB m2, m1
+ PAVGB m1, m0
+ pxor m2, m6
+ pxor m1, m6
+ mova [r0+r2*2], m2
+ mova [r0+r4], m1
+ lea r1, [r1+r2*4]
+ lea r0, [r0+r2*4]
+ sub r3d, 4
+ jg .loop
+ REP_RET
+%endmacro
+
+INIT_MMX mmxext
+PUT_NO_RND_PIXELS8_Y2_EXACT
+INIT_MMX 3dnow
+PUT_NO_RND_PIXELS8_Y2_EXACT
diff --git a/libavcodec/x86/hpeldsp_vp3_init.c b/libavcodec/x86/hpeldsp_vp3_init.c
new file mode 100644
index 0000000..5979f41
--- /dev/null
+++ b/libavcodec/x86/hpeldsp_vp3_init.c
@@ -0,0 +1,56 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+
+#include "libavcodec/avcodec.h"
+#include "libavcodec/hpeldsp.h"
+
+#include "hpeldsp.h"
+
+void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
+ const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block,
+ const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
+ const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block,
+ const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+
+av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags)
+{
+ if (EXTERNAL_AMD3DNOW(cpu_flags)) {
+ if (flags & AV_CODEC_FLAG_BITEXACT) {
+ c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
+ c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
+ }
+ }
+
+ if (EXTERNAL_MMXEXT(cpu_flags)) {
+ if (flags & AV_CODEC_FLAG_BITEXACT) {
+ c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
+ c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
+ }
+ }
+}
======================================================================
diff --cc libavcodec/x86/Makefile
index 2f0354a,3208699..2864952
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@@ -57,22 -46,14 +57,23 @@@ OBJS-$(CONFIG_MLP_DECODER)
OBJS-$(CONFIG_MPEG4_DECODER) += x86/xvididct_init.o
OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp_init.o
OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp_init.o
+OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o
OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp_init.o
-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o
-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
+OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_init.o
+OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o
+OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o
+OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o
+OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp_init.o
+OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o
OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o
OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o
+ OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3_init.o
OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp_init.o
-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o
+OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o \
+ x86/vp9dsp_init_10bpp.o \
+ x86/vp9dsp_init_12bpp.o \
+ x86/vp9dsp_init_16bpp.o
+OBJS-$(CONFIG_WEBP_DECODER) += x86/vp8dsp_init.o
# GCC inline assembly optimizations
@@@ -133,49 -111,18 +134,50 @@@ YASM-OBJS-$(CONFIG_VP8DSP)
x86/vp8dsp_loopfilter.o
# decoders/encoders
-YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp.o
-YASM-OBJS-$(CONFIG_APE_DECODER) += x86/apedsp.o
-YASM-OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp.o
+YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/aacpsdsp.o \
+ x86/sbrdsp.o
+YASM-OBJS-$(CONFIG_AAC_ENCODER) += x86/aacencdsp.o
+YASM-OBJS-$(CONFIG_ADPCM_G722_DECODER) += x86/g722dsp.o
+YASM-OBJS-$(CONFIG_ADPCM_G722_ENCODER) += x86/g722dsp.o
+YASM-OBJS-$(CONFIG_ALAC_DECODER) += x86/alacdsp.o
+YASM-OBJS-$(CONFIG_APNG_DECODER) += x86/pngdsp.o
+YASM-OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp.o x86/synth_filter.o
+YASM-OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp.o \
+ x86/dirac_dwt.o
YASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o
-YASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_deblock.o \
- x86/hevc_mc.o \
- x86/hevc_idct.o
+YASM-OBJS-$(CONFIG_FLAC_DECODER) += x86/flacdsp.o
+ifdef CONFIG_GPL
+YASM-OBJS-$(CONFIG_FLAC_ENCODER) += x86/flac_dsp_gpl.o
+endif
+YASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_mc.o \
+ x86/hevc_deblock.o \
+ x86/hevc_idct.o \
+ x86/hevc_res_add.o \
+ x86/hevc_sao.o \
+ x86/hevc_sao_10bit.o
+YASM-OBJS-$(CONFIG_JPEG2000_DECODER) += x86/jpeg2000dsp.o
+YASM-OBJS-$(CONFIG_MLP_DECODER) += x86/mlpdsp.o
+YASM-OBJS-$(CONFIG_MPEG4_DECODER) += x86/xvididct.o
YASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o
YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
+YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o
+YASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o
+YASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o
+YASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
+YASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o
+YASM-OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp.o
YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o
+YASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o
YASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o
+ YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3.o
YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp.o
-YASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp.o
+YASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9intrapred.o \
+ x86/vp9intrapred_16bpp.o \
+ x86/vp9itxfm.o \
+ x86/vp9itxfm_16bpp.o \
+ x86/vp9lpf.o \
+ x86/vp9lpf_16bpp.o \
+ x86/vp9mc.o \
+ x86/vp9mc_16bpp.o
+YASM-OBJS-$(CONFIG_WEBP_DECODER) += x86/vp8dsp.o
diff --cc libavcodec/x86/hpeldsp.asm
index 82fb893,8e21114..ce5d7a4
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@@@ -175,66 -142,15 +175,19 @@@ INIT_MMX 3dno
PUT_NO_RND_PIXELS8_X2
- ; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
- %macro PUT_NO_RND_PIXELS8_X2_EXACT 0
- cglobal put_no_rnd_pixels8_x2_exact, 4,5
- lea r4, [r2*3]
- pcmpeqb m6, m6
- .loop:
- mova m0, [r1]
- mova m2, [r1+r2]
- mova m1, [r1+1]
- mova m3, [r1+r2+1]
- pxor m0, m6
- pxor m2, m6
- pxor m1, m6
- pxor m3, m6
- PAVGB m0, m1
- PAVGB m2, m3
- pxor m0, m6
- pxor m2, m6
- mova [r0], m0
- mova [r0+r2], m2
- mova m0, [r1+r2*2]
- mova m1, [r1+r2*2+1]
- mova m2, [r1+r4]
- mova m3, [r1+r4+1]
- pxor m0, m6
- pxor m1, m6
- pxor m2, m6
- pxor m3, m6
- PAVGB m0, m1
- PAVGB m2, m3
- pxor m0, m6
- pxor m2, m6
- mova [r0+r2*2], m0
- mova [r0+r4], m2
- lea r1, [r1+r2*4]
- lea r0, [r0+r2*4]
- sub r3d, 4
- jg .loop
- REP_RET
- %endmacro
-
- INIT_MMX mmxext
- PUT_NO_RND_PIXELS8_X2_EXACT
- INIT_MMX 3dnow
- PUT_NO_RND_PIXELS8_X2_EXACT
-
-
; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PUT_PIXELS8_Y2 0
+%if cpuflag(sse2)
+cglobal put_pixels16_y2, 4,5,3
+%else
cglobal put_pixels8_y2, 4,5
+%endif
lea r4, [r2*2]
- mova m0, [r1]
+ movu m0, [r1]
sub r0, r2
.loop:
- mova m1, [r1+r2]
- mova m2, [r1+r4]
+ movu m1, [r1+r2]
+ movu m2, [r1+r4]
add r1, r4
PAVGB m0, m1
PAVGB m1, m2
diff --cc libavcodec/x86/hpeldsp.h
index 5fae990,d624ed9..bf97029
--- a/libavcodec/x86/hpeldsp.h
+++ b/libavcodec/x86/hpeldsp.h
@@@ -41,13 -34,9 +43,15 @@@ void ff_avg_pixels16_xy2_ssse3(uint8_t
void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
+void ff_put_pixels8_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
+void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+ void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags);
+
#endif /* AVCODEC_X86_HPELDSP_H */
diff --cc libavcodec/x86/hpeldsp_init.c
index f1ba4be,6731428..e8da184
--- a/libavcodec/x86/hpeldsp_init.c
+++ b/libavcodec/x86/hpeldsp_init.c
@@@ -239,14 -194,9 +227,9 @@@ static void hpeldsp_init_mmxext(HpelDSP
c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_mmxext;
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_mmxext;
- c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext;
- c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
+ c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_mmxext;
+ c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_mmxext;
}
-
- if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) {
- c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
- c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
- }
#endif /* HAVE_MMXEXT_EXTERNAL */
}
@@@ -275,14 -223,9 +258,9 @@@ static void hpeldsp_init_3dnow(HpelDSPC
c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_3dnow;
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_3dnow;
- c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
- c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_3dnow;
+ c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_3dnow;
+ c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_3dnow;
}
-
- if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) {
- c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
- c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
- }
#endif /* HAVE_AMD3DNOW_EXTERNAL */
}
@@@ -330,6 -257,6 +308,9 @@@ av_cold void ff_hpeldsp_init_x86(HpelDS
if (EXTERNAL_SSE2(cpu_flags))
hpeldsp_init_sse2(c, flags, cpu_flags);
+ if (EXTERNAL_SSSE3(cpu_flags))
+ hpeldsp_init_ssse3(c, flags, cpu_flags);
++
+ if (CONFIG_VP3_DECODER)
+ ff_hpeldsp_vp3_init_x86(c, cpu_flags, flags);
}
diff --cc libavcodec/x86/hpeldsp_vp3.asm
index 0000000,513f14e..cba96d0
mode 000000,100644..100644
--- a/libavcodec/x86/hpeldsp_vp3.asm
+++ b/libavcodec/x86/hpeldsp_vp3.asm
@@@ -1,0 -1,111 +1,111 @@@
+ ;******************************************************************************
+ ;* SIMD-optimized halfpel functions for VP3
+ ;*
-;* This file is part of Libav.
++;* This file is part of FFmpeg.
+ ;*
-;* Libav is free software; you can redistribute it and/or
++;* FFmpeg is free software; you can redistribute it and/or
+ ;* modify it under the terms of the GNU Lesser General Public
+ ;* License as published by the Free Software Foundation; either
+ ;* version 2.1 of the License, or (at your option) any later version.
+ ;*
-;* Libav is distributed in the hope that it will be useful,
++;* FFmpeg is distributed in the hope that it will be useful,
+ ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ ;* Lesser General Public License for more details.
+ ;*
+ ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
++;* License along with FFmpeg; if not, write to the Free Software
+ ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ;******************************************************************************
+
+ %include "libavutil/x86/x86util.asm"
+
+ SECTION .text
+
+ ; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+ %macro PUT_NO_RND_PIXELS8_X2_EXACT 0
+ cglobal put_no_rnd_pixels8_x2_exact, 4,5
+ lea r4, [r2*3]
+ pcmpeqb m6, m6
+ .loop:
+ mova m0, [r1]
+ mova m2, [r1+r2]
+ mova m1, [r1+1]
+ mova m3, [r1+r2+1]
+ pxor m0, m6
+ pxor m2, m6
+ pxor m1, m6
+ pxor m3, m6
+ PAVGB m0, m1
+ PAVGB m2, m3
+ pxor m0, m6
+ pxor m2, m6
+ mova [r0], m0
+ mova [r0+r2], m2
+ mova m0, [r1+r2*2]
+ mova m1, [r1+r2*2+1]
+ mova m2, [r1+r4]
+ mova m3, [r1+r4+1]
+ pxor m0, m6
+ pxor m1, m6
+ pxor m2, m6
+ pxor m3, m6
+ PAVGB m0, m1
+ PAVGB m2, m3
+ pxor m0, m6
+ pxor m2, m6
+ mova [r0+r2*2], m0
+ mova [r0+r4], m2
+ lea r1, [r1+r2*4]
+ lea r0, [r0+r2*4]
+ sub r3d, 4
+ jg .loop
+ REP_RET
+ %endmacro
+
+ INIT_MMX mmxext
+ PUT_NO_RND_PIXELS8_X2_EXACT
+ INIT_MMX 3dnow
+ PUT_NO_RND_PIXELS8_X2_EXACT
+
+
+ ; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+ %macro PUT_NO_RND_PIXELS8_Y2_EXACT 0
+ cglobal put_no_rnd_pixels8_y2_exact, 4,5
+ lea r4, [r2*3]
+ mova m0, [r1]
+ pcmpeqb m6, m6
+ add r1, r2
+ pxor m0, m6
+ .loop:
+ mova m1, [r1]
+ mova m2, [r1+r2]
+ pxor m1, m6
+ pxor m2, m6
+ PAVGB m0, m1
+ PAVGB m1, m2
+ pxor m0, m6
+ pxor m1, m6
+ mova [r0], m0
+ mova [r0+r2], m1
+ mova m1, [r1+r2*2]
+ mova m0, [r1+r4]
+ pxor m1, m6
+ pxor m0, m6
+ PAVGB m2, m1
+ PAVGB m1, m0
+ pxor m2, m6
+ pxor m1, m6
+ mova [r0+r2*2], m2
+ mova [r0+r4], m1
+ lea r1, [r1+r2*4]
+ lea r0, [r0+r2*4]
+ sub r3d, 4
+ jg .loop
+ REP_RET
+ %endmacro
+
+ INIT_MMX mmxext
+ PUT_NO_RND_PIXELS8_Y2_EXACT
+ INIT_MMX 3dnow
+ PUT_NO_RND_PIXELS8_Y2_EXACT
diff --cc libavcodec/x86/hpeldsp_vp3_init.c
index 0000000,2510c11..5979f41
mode 000000,100644..100644
--- a/libavcodec/x86/hpeldsp_vp3_init.c
+++ b/libavcodec/x86/hpeldsp_vp3_init.c
@@@ -1,0 -1,56 +1,56 @@@
+ /*
- * This file is part of Libav.
++ * This file is part of FFmpeg.
+ *
- * Libav is free software; you can redistribute it and/or
++ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
- * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
++ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ #include "libavutil/attributes.h"
+ #include "libavutil/cpu.h"
+ #include "libavutil/x86/cpu.h"
+
+ #include "libavcodec/avcodec.h"
+ #include "libavcodec/hpeldsp.h"
+
+ #include "hpeldsp.h"
+
+ void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
+ const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+ void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block,
+ const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+ void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
+ const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+ void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block,
+ const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+
+ av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags)
+ {
+ if (EXTERNAL_AMD3DNOW(cpu_flags)) {
+ if (flags & AV_CODEC_FLAG_BITEXACT) {
+ c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
+ c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
+ }
+ }
+
+ if (EXTERNAL_MMXEXT(cpu_flags)) {
+ if (flags & AV_CODEC_FLAG_BITEXACT) {
+ c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
+ c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
+ }
+ }
+ }
More information about the ffmpeg-cvslog
mailing list