[FFmpeg-cvslog] avcodec/jpeg200dsp: add ff_rct_int_{sse2,avx2}

James Almer git at videolan.org
Sat Jun 13 21:54:34 CEST 2015


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Sat Jun 13 13:13:10 2015 -0300| [9f815bc2c294a2582cd4c2bba71803104c3d0bc5] | committer: James Almer

avcodec/jpeg200dsp: add ff_rct_int_{sse2,avx2}

Reviewed-by: Michael Niedermayer <michaelni at gmx.at>
Signed-off-by: James Almer <jamrial at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9f815bc2c294a2582cd4c2bba71803104c3d0bc5
---

 libavcodec/jpeg2000.c             |    1 +
 libavcodec/x86/jpeg2000dsp.asm    |   36 ++++++++++++++++++++++++++++++++++++
 libavcodec/x86/jpeg2000dsp_init.c |   10 ++++++++++
 3 files changed, 47 insertions(+)

diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index af24e99..ec00ebc 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -221,6 +221,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
         if (!comp->f_data)
             return AVERROR(ENOMEM);
     } else {
+        csize += FF_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->i_data);
         comp->f_data = NULL;
         comp->i_data = av_mallocz_array(csize, sizeof(*comp->i_data));
         if (!comp->i_data)
diff --git a/libavcodec/x86/jpeg2000dsp.asm b/libavcodec/x86/jpeg2000dsp.asm
index 0d79ab7..712a298 100644
--- a/libavcodec/x86/jpeg2000dsp.asm
+++ b/libavcodec/x86/jpeg2000dsp.asm
@@ -106,3 +106,39 @@ INIT_XMM sse
 ICT_FLOAT 10
 INIT_YMM avx
 ICT_FLOAT 9
+
+;***************************************************************************
+; ff_rct_int_<opt>(int32_t *src0, int32_t *src1, int32_t *src2, int csize)
+;***************************************************************************
+%macro RCT_INT 0
+cglobal rct_int, 4, 4, 4, src0, src1, src2, csize
+    shl  csized, 2
+    add   src0q, csizeq
+    add   src1q, csizeq
+    add   src2q, csizeq
+    neg  csizeq
+
+align 16
+.loop:
+    mova   m1, [src1q+csizeq]
+    mova   m2, [src2q+csizeq]
+    mova   m0, [src0q+csizeq]
+    paddd  m3, m1, m2
+    psrad  m3, 2
+    psubd  m0, m3
+    paddd  m1, m0
+    paddd  m2, m0
+    mova   [src1q+csizeq], m0
+    mova   [src2q+csizeq], m1
+    mova   [src0q+csizeq], m2
+    add  csizeq, mmsize
+    jl .loop
+    REP_RET
+%endmacro
+
+INIT_XMM sse2
+RCT_INT
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+RCT_INT
+%endif
diff --git a/libavcodec/x86/jpeg2000dsp_init.c b/libavcodec/x86/jpeg2000dsp_init.c
index 43b9ccd..0dbd2db 100644
--- a/libavcodec/x86/jpeg2000dsp_init.c
+++ b/libavcodec/x86/jpeg2000dsp_init.c
@@ -26,6 +26,8 @@
 
 void ff_ict_float_sse(void *src0, void *src1, void *src2, int csize);
 void ff_ict_float_avx(void *src0, void *src1, void *src2, int csize);
+void ff_rct_int_sse2 (void *src0, void *src1, void *src2, int csize);
+void ff_rct_int_avx2 (void *src0, void *src1, void *src2, int csize);
 
 av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c)
 {
@@ -34,7 +36,15 @@ av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c)
         c->mct_decode[FF_DWT97] = ff_ict_float_sse;
     }
 
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        c->mct_decode[FF_DWT53] = ff_rct_int_sse2;
+    }
+
     if (EXTERNAL_AVX_FAST(cpu_flags)) {
         c->mct_decode[FF_DWT97] = ff_ict_float_avx;
     }
+
+    if (EXTERNAL_AVX2(cpu_flags)) {
+        c->mct_decode[FF_DWT53] = ff_rct_int_avx2;
+    }
 }



More information about the ffmpeg-cvslog mailing list