[FFmpeg-cvslog] avcodec/jpeg200dsp: add ff_rct_int_{sse2,avx2}
James Almer
git at videolan.org
Sat Jun 13 21:54:34 CEST 2015
ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Sat Jun 13 13:13:10 2015 -0300| [9f815bc2c294a2582cd4c2bba71803104c3d0bc5] | committer: James Almer
avcodec/jpeg200dsp: add ff_rct_int_{sse2,avx2}
Reviewed-by: Michael Niedermayer <michaelni at gmx.at>
Signed-off-by: James Almer <jamrial at gmail.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9f815bc2c294a2582cd4c2bba71803104c3d0bc5
---
libavcodec/jpeg2000.c | 1 +
libavcodec/x86/jpeg2000dsp.asm | 36 ++++++++++++++++++++++++++++++++++++
libavcodec/x86/jpeg2000dsp_init.c | 10 ++++++++++
3 files changed, 47 insertions(+)
diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index af24e99..ec00ebc 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -221,6 +221,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
if (!comp->f_data)
return AVERROR(ENOMEM);
} else {
+ csize += FF_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->i_data);
comp->f_data = NULL;
comp->i_data = av_mallocz_array(csize, sizeof(*comp->i_data));
if (!comp->i_data)
diff --git a/libavcodec/x86/jpeg2000dsp.asm b/libavcodec/x86/jpeg2000dsp.asm
index 0d79ab7..712a298 100644
--- a/libavcodec/x86/jpeg2000dsp.asm
+++ b/libavcodec/x86/jpeg2000dsp.asm
@@ -106,3 +106,39 @@ INIT_XMM sse
ICT_FLOAT 10
INIT_YMM avx
ICT_FLOAT 9
+
+;***************************************************************************
+; ff_rct_int_<opt>(int32_t *src0, int32_t *src1, int32_t *src2, int csize)
+;***************************************************************************
+%macro RCT_INT 0
+cglobal rct_int, 4, 4, 4, src0, src1, src2, csize
+ shl csized, 2
+ add src0q, csizeq
+ add src1q, csizeq
+ add src2q, csizeq
+ neg csizeq
+
+align 16
+.loop:
+ mova m1, [src1q+csizeq]
+ mova m2, [src2q+csizeq]
+ mova m0, [src0q+csizeq]
+ paddd m3, m1, m2
+ psrad m3, 2
+ psubd m0, m3
+ paddd m1, m0
+ paddd m2, m0
+ mova [src1q+csizeq], m0
+ mova [src2q+csizeq], m1
+ mova [src0q+csizeq], m2
+ add csizeq, mmsize
+ jl .loop
+ REP_RET
+%endmacro
+
+INIT_XMM sse2
+RCT_INT
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+RCT_INT
+%endif
diff --git a/libavcodec/x86/jpeg2000dsp_init.c b/libavcodec/x86/jpeg2000dsp_init.c
index 43b9ccd..0dbd2db 100644
--- a/libavcodec/x86/jpeg2000dsp_init.c
+++ b/libavcodec/x86/jpeg2000dsp_init.c
@@ -26,6 +26,8 @@
void ff_ict_float_sse(void *src0, void *src1, void *src2, int csize);
void ff_ict_float_avx(void *src0, void *src1, void *src2, int csize);
+void ff_rct_int_sse2 (void *src0, void *src1, void *src2, int csize);
+void ff_rct_int_avx2 (void *src0, void *src1, void *src2, int csize);
av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c)
{
@@ -34,7 +36,15 @@ av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c)
c->mct_decode[FF_DWT97] = ff_ict_float_sse;
}
+ if (EXTERNAL_SSE2(cpu_flags)) {
+ c->mct_decode[FF_DWT53] = ff_rct_int_sse2;
+ }
+
if (EXTERNAL_AVX_FAST(cpu_flags)) {
c->mct_decode[FF_DWT97] = ff_ict_float_avx;
}
+
+ if (EXTERNAL_AVX2(cpu_flags)) {
+ c->mct_decode[FF_DWT53] = ff_rct_int_avx2;
+ }
}
More information about the ffmpeg-cvslog
mailing list