[FFmpeg-cvslog] r16312 - in trunk/libavcodec/arm: dsputil_neon.c h264idct_neon.S

mru subversion
Fri Dec 26 00:13:43 CET 2008


Author: mru
Date: Fri Dec 26 00:13:43 2008
New Revision: 16312

Log:
ARM: add new h264 idct functions

Modified:
   trunk/libavcodec/arm/dsputil_neon.c
   trunk/libavcodec/arm/h264idct_neon.S

Modified: trunk/libavcodec/arm/dsputil_neon.c
==============================================================================
--- trunk/libavcodec/arm/dsputil_neon.c	Thu Dec 25 19:27:49 2008	(r16311)
+++ trunk/libavcodec/arm/dsputil_neon.c	Fri Dec 26 00:13:43 2008	(r16312)
@@ -94,6 +94,15 @@ void ff_h264_h_loop_filter_chroma_neon(u
 
 void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
 void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset,
+                             DCTELEM *block, int stride,
+                             const uint8_t nnzc[6*8]);
+void ff_h264_idct_add16intra_neon(uint8_t *dst, const int *block_offset,
+                                  DCTELEM *block, int stride,
+                                  const uint8_t nnzc[6*8]);
+void ff_h264_idct_add8_neon(uint8_t **dest, const int *block_offset,
+                            DCTELEM *block, int stride,
+                            const uint8_t nnzc[6*8]);
 
 void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
 {
@@ -166,4 +175,7 @@ void ff_dsputil_init_neon(DSPContext *c,
 
     c->h264_idct_add = ff_h264_idct_add_neon;
     c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
+    c->h264_idct_add16      = ff_h264_idct_add16_neon;
+    c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
+    c->h264_idct_add8       = ff_h264_idct_add8_neon;
 }

Modified: trunk/libavcodec/arm/h264idct_neon.S
==============================================================================
--- trunk/libavcodec/arm/h264idct_neon.S	Thu Dec 25 19:27:49 2008	(r16311)
+++ trunk/libavcodec/arm/h264idct_neon.S	Fri Dec 26 00:13:43 2008	(r16312)
@@ -20,6 +20,7 @@
 
 #include "asm.S"
 
+        preserve8
         .fpu neon
 
         .text
@@ -94,3 +95,95 @@ function ff_h264_idct_dc_add_neon, expor
         vst1.32         {d1[1]},  [r0,:32], r2
         bx              lr
         .endfunc
+
+function ff_h264_idct_add16_neon, export=1
+        push            {r4-r8,lr}
+        mov             r4,  r0
+        mov             r5,  r1
+        mov             r1,  r2
+        mov             r2,  r3
+        ldr             r6,  [sp, #24]
+        movw            r7,  #:lower16:scan8
+        movt            r7,  #:upper16:scan8
+        mov             ip,  #16
+1:      ldrb            r8,  [r7], #1
+        ldr             r0,  [r5], #4
+        ldrb            r8,  [r6, r8]
+        subs            r8,  r8,  #1
+        blt             2f
+        ldrsh           lr,  [r1]
+        add             r0,  r0,  r4
+        movne           lr,  #0
+        cmp             lr,  #0
+        adrne           lr,  ff_h264_idct_dc_add_neon
+        adreq           lr,  ff_h264_idct_add_neon
+        blx             lr
+2:      subs            ip,  ip,  #1
+        add             r1,  r1,  #32
+        bne             1b
+        pop             {r4-r8,pc}
+        .endfunc
+
+function ff_h264_idct_add16intra_neon, export=1
+        push            {r4-r8,lr}
+        mov             r4,  r0
+        mov             r5,  r1
+        mov             r1,  r2
+        mov             r2,  r3
+        ldr             r6,  [sp, #24]
+        movw            r7,  #:lower16:scan8
+        movt            r7,  #:upper16:scan8
+        mov             ip,  #16
+1:      ldrb            r8,  [r7], #1
+        ldr             r0,  [r5], #4
+        ldrb            r8,  [r6, r8]
+        add             r0,  r0,  r4
+        cmp             r8,  #0
+        ldrsh           r8,  [r1]
+        adrne           lr,  ff_h264_idct_add_neon
+        adreq           lr,  ff_h264_idct_dc_add_neon
+        cmpeq           r8,  #0
+        blxne           lr
+        subs            ip,  ip,  #1
+        add             r1,  r1,  #32
+        bne             1b
+        pop             {r4-r8,pc}
+        .endfunc
+
+function ff_h264_idct_add8_neon, export=1
+        push            {r4-r10,lr}
+        ldm             r0,  {r4,r9}
+        add             r5,  r1,  #16*4
+        add             r1,  r2,  #16*32
+        mov             r2,  r3
+        ldr             r6,  [sp, #32]
+        movw            r7,  #:lower16:scan8+16
+        movt            r7,  #:upper16:scan8+16
+        mov             ip,  #8
+1:      ldrb            r8,  [r7], #1
+        ldr             r0,  [r5], #4
+        ldrb            r8,  [r6, r8]
+        tst             ip,  #4
+        addeq           r0,  r0,  r4
+        addne           r0,  r0,  r9
+        cmp             r8,  #0
+        ldrsh           r8,  [r1]
+        adrne           lr,  ff_h264_idct_add_neon
+        adreq           lr,  ff_h264_idct_dc_add_neon
+        cmpeq           r8,  #0
+        blxne           lr
+        subs            ip,  ip,  #1
+        add             r1,  r1,  #32
+        bne             1b
+        pop             {r4-r10,pc}
+        .endfunc
+
+        .section .rodata
+scan8:  .byte           4+1*8, 5+1*8, 4+2*8, 5+2*8
+        .byte           6+1*8, 7+1*8, 6+2*8, 7+2*8
+        .byte           4+3*8, 5+3*8, 4+4*8, 5+4*8
+        .byte           6+3*8, 7+3*8, 6+4*8, 7+4*8
+        .byte           1+1*8, 2+1*8
+        .byte           1+2*8, 2+2*8
+        .byte           1+4*8, 2+4*8
+        .byte           1+5*8, 2+5*8




More information about the ffmpeg-cvslog mailing list