[FFmpeg-devel] [PATCH] ARM: NEON optimised vector_fmul

Mans Rullgard mans
Mon Aug 25 05:06:43 CEST 2008


---
 libavcodec/armv4l/dsputil_neon.c   |    2 ++
 libavcodec/armv4l/dsputil_neon_s.S |   17 +++++++++++++++++
 2 files changed, 19 insertions(+), 0 deletions(-)

diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c
index 6c44940..c6fc173 100644
--- a/libavcodec/armv4l/dsputil_neon.c
+++ b/libavcodec/armv4l/dsputil_neon.c
@@ -91,6 +91,7 @@ void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
 void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
 void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
 
+void ff_vector_fmul_neon(float *dst, const float *src, int len);
 void ff_vector_fmul_window_neon(float *dst, const float *src0,
                                 const float *src1, const float *win,
                                 float add_bias, int len);
@@ -168,6 +169,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
     c->h264_idct_add = ff_h264_idct_add_neon;
     c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
 
+    c->vector_fmul = ff_vector_fmul_neon;
     c->vector_fmul_window = ff_vector_fmul_window_neon;
 
     c->float_to_int16 = ff_float_to_int16_neon;
diff --git a/libavcodec/armv4l/dsputil_neon_s.S b/libavcodec/armv4l/dsputil_neon_s.S
index e4b809e..d1bdba1 100644
--- a/libavcodec/armv4l/dsputil_neon_s.S
+++ b/libavcodec/armv4l/dsputil_neon_s.S
@@ -324,6 +324,23 @@ extern ff_float_to_int16_interleave_neon
         pop           {r4,r5,pc}
         .endfunc
 
+extern ff_vector_fmul_neon
+        mov           r3, r0
+        vld1.64       {d0-d3}, [r0,:128]!
+        vld1.64       {d4-d7}, [r1,:128]!
+        dmb
+1:      subs          r2, r2, #8
+        vmul.f32      q8, q0, q2
+        vmul.f32      q9, q1, q3
+        beq           2f
+        vld1.64       {d0-d3},   [r0,:128]!
+        vld1.64       {d4-d7},   [r1,:128]!
+        vst1.64       {d16-d19}, [r3,:128]!
+        b             1b
+2:      vst1.64       {d16-d19}, [r3,:128]!
+        bx            lr
+        .endfunc
+
 extern ff_vector_fmul_window_neon
         vld1.32       {d16[],d17[]}, [sp,:32]
         push          {r4,r5,lr}
-- 
1.6.0





More information about the ffmpeg-devel mailing list