[FFmpeg-devel] [PATCH] ARMv6 VP8 IDCT

David Conrad lessen42
Tue Jun 29 08:45:45 CEST 2010


---
 libavcodec/arm/Makefile          |    4 +
 libavcodec/arm/vp8dsp_armv6.S    |  144 ++++++++++++++++++++++++++++++++++++++
 libavcodec/arm/vp8dsp_init_arm.c |   33 +++++++++
 libavcodec/vp8dsp.c              |    2 +
 libavcodec/vp8dsp.h              |    1 +
 5 files changed, 184 insertions(+), 0 deletions(-)
 create mode 100644 libavcodec/arm/vp8dsp_armv6.S
 create mode 100644 libavcodec/arm/vp8dsp_init_arm.c

diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index 0f2466f..4214c10 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -2,6 +2,7 @@ OBJS-$(CONFIG_DCA_DECODER)             += arm/dcadsp_init_arm.o         \
 
 OBJS-$(CONFIG_VP5_DECODER)             += arm/vp56dsp_init_arm.o
 OBJS-$(CONFIG_VP6_DECODER)             += arm/vp56dsp_init_arm.o
+OBJS-$(CONFIG_VP8_DECODER)             += arm/vp8dsp_init_arm.o
 
 OBJS-$(CONFIG_H264DSP)                 += arm/h264dsp_init_arm.o        \
                                           arm/h264pred_init_arm.o       \
@@ -18,9 +19,12 @@ OBJS-$(HAVE_ARMV5TE)                   += arm/dsputil_init_armv5te.o    \
                                           arm/mpegvideo_armv5te_s.o     \
                                           arm/simple_idct_armv5te.o     \
 
+ARMV6-OBJS-$(CONFIG_VP6_DECODER)       += arm/vp8dsp_armv6.o            \
+
 OBJS-$(HAVE_ARMV6)                     += arm/dsputil_init_armv6.o      \
                                           arm/dsputil_armv6.o           \
                                           arm/simple_idct_armv6.o       \
+                                          $(ARMV6-OBJS-yes)
 
 OBJS-$(HAVE_ARMVFP)                    += arm/dsputil_vfp.o             \
                                           arm/dsputil_init_vfp.o        \
diff --git a/libavcodec/arm/vp8dsp_armv6.S b/libavcodec/arm/vp8dsp_armv6.S
new file mode 100644
index 0000000..23ba8ec
--- /dev/null
+++ b/libavcodec/arm/vp8dsp_armv6.S
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2010 David Conrad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "asm.S"
+
+.macro idct_add16 pix, diff
+    uxtb16      ip, \pix
+    uxtb16      lr, \pix, ror #8
+    sadd16      ip, ip, \diff
+    sadd16      lr, lr, \diff
+    usat16      lr, #8, lr
+    usat16      ip, #8, ip
+    orr         ip, ip, lr, lsl #8
+    str         ip, [r0], r2
+.endm
+
+function ff_vp8_idct_dc_add_armv6, export=1
+    ldrsh       r1, [r1]
+    push        {r4-r5,lr}
+    add         r1, r1, #4
+    asr         r1, r1, #3
+
+    ldr         r3, [r0]
+    pkhbt       r1, r1, r1, lsl #16
+    ldr         r4, [r0, r2]
+    ldr         r5, [r0, r2, lsl #1]
+    idct_add16  r3, r1
+    idct_add16  r4, r1
+    ldr         r3, [r0, r2]
+    idct_add16  r5, r1
+    idct_add16  r3, r1
+    pop         {r4-r5,pc}
+endfunc
+
+// 2x 1D IDCT loaded from src, result is in r6,r7,r8,r9
+.macro vp8_idct_1d src, increment
+    ldr         r10, [\src, #1*8]
+    ldr         r9,  [\src, #3*8]
+    ldr         r4,  [\src, #2*8]
+    ldr         r3,  [\src], #4
+
+    smulwt      r5,  r12, r10
+    smulwb      r7,  r12, r10           // [1]*35468
+    smulwt      r6,  r11, r9
+    smulwb      r8,  r11, r9            // [3]*20091
+    pkhbt       r5,  r7,  r5,  lsl #16
+    ssub16      r7,  r3,  r4            // t1
+    pkhbt       r6,  r8,  r6,  lsl #16
+    ssub16      r5,  r5,  r6
+
+    smulwt      r6,  r11, r10
+    smulwb      r8,  r11, r10           // [1]*20091
+    smlawt      r6,  r12, r9,  r6
+    smlawb      r8,  r12, r9,  r8       //+[3]*35468
+    ssub16      r5,  r5,  r9            // t2
+    sadd16      r3,  r3,  r4            // t0
+    pkhbt       r6,  r8,  r6,  lsl #16
+    sadd16      r6,  r6,  r10           // t3
+
+    ssub16      r9,  r3,  r6
+    ssub16      r8,  r7,  r5
+    sadd16      r7,  r7,  r5
+    sadd16      r6,  r3,  r6
+.endm
+
+.macro add_diff s0, s1, row, offset
+    ldrb        r5,  [r0, #\offset+1]
+    ldrb        r4,  [r0, #\offset]
+    sxtah       r3,  r1,  \s1, ror #16*\row
+    sxtah       r10, r1,  \s0, ror #16*\row     // +4
+    add         r5,  r5,  r3,  asr #3
+    add         r4,  r4,  r10, asr #3
+    usat        r5,  #8,  r5
+    usat        r4,  #8,  r4
+    strb        r5,  [r0, #\offset+1]
+.if \offset
+    strb        r4,  [r0, #\offset]
+.else
+    strb        r4,  [r0], r2
+.endif
+.endm
+
+// 1D IDCT of 2 cols at once, followed by 2 2x2 transposes
+function vp8_idct_col
+    vp8_idct_1d r1
+    pkhbt       r4,  r6,  r7,  lsl #16
+    pkhbt       r5,  r8,  r9,  lsl #16
+    pkhtb       r6,  r7,  r6,  asr #16
+    pkhtb       r7,  r9,  r8,  asr #16
+    bx          lr
+endfunc
+
+// 1D IDCT of 2 rows at once, read from the stack, and added to pixels
+function vp8_idct_row_add
+    vp8_idct_1d sp
+    add_diff    r8, r9, 0, 2
+    add_diff    r6, r7, 0, 0
+    add_diff    r8, r9, 1, 2
+    add_diff    r6, r7, 1, 0
+    bx          lr
+endfunc
+
+function ff_vp8_idct_add_armv6, export=1
+    push        {r4-r11,lr}
+    sub         sp,  sp,  #16*2
+#if HAVE_ARMV6T2
+    movw        r11, 20091
+    movw        r12, 35468
+#else
+    ldr         r11, =20091
+    ldr         r12, =35468
+#endif
+
+    bl vp8_idct_col
+    strd        r4,  [sp, #0]
+    strd        r6,  [sp, #8]
+    bl vp8_idct_col
+    strd        r4,  [sp, #16]
+    strd        r6,  [sp, #24]
+
+    mov         r1,  #4     // rounding factor
+    bl vp8_idct_row_add
+    bl vp8_idct_row_add
+
+    add         sp,  sp,  #16*2 - 8     // 2x vp8_idct_1d advances sp by 8
+    pop         {r4-r11,pc}
+endfunc
diff --git a/libavcodec/arm/vp8dsp_init_arm.c b/libavcodec/arm/vp8dsp_init_arm.c
new file mode 100644
index 0000000..3b3b524
--- /dev/null
+++ b/libavcodec/arm/vp8dsp_init_arm.c
@@ -0,0 +1,33 @@
+/*
+ * VP8 ARM optimized dsp functions
+ * Copyright (c) 2010 David Conrad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/vp8dsp.h"
+
+void ff_vp8_idct_dc_add_armv6(uint8_t *dst, DCTELEM block[16], int stride);
+void ff_vp8_idct_add_armv6(uint8_t *dst, DCTELEM block[16], int stride);
+
+av_cold void ff_vp8dsp_init_arm(VP8DSPContext *c)
+{
+    if (HAVE_ARMV6) {
+        c->vp8_idct_dc_add = ff_vp8_idct_dc_add_armv6;
+        c->vp8_idct_add    = ff_vp8_idct_add_armv6;
+    }
+}
diff --git a/libavcodec/vp8dsp.c b/libavcodec/vp8dsp.c
index 81825b0..fb4c748 100644
--- a/libavcodec/vp8dsp.c
+++ b/libavcodec/vp8dsp.c
@@ -466,6 +466,8 @@ av_cold void ff_vp8dsp_init(VP8DSPContext *dsp)
 
     if (HAVE_MMX)
         ff_vp8dsp_init_x86(dsp);
+    if (ARCH_ARM)
+        ff_vp8dsp_init_arm(dsp);
     if (HAVE_ALTIVEC)
         ff_vp8dsp_init_altivec(dsp);
 }
diff --git a/libavcodec/vp8dsp.h b/libavcodec/vp8dsp.h
index b660048..1ed05f6 100644
--- a/libavcodec/vp8dsp.h
+++ b/libavcodec/vp8dsp.h
@@ -67,6 +67,7 @@ void ff_put_vp8_pixels4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x,
 
 void ff_vp8dsp_init(VP8DSPContext *c);
 void ff_vp8dsp_init_x86(VP8DSPContext *c);
+void ff_vp8dsp_init_arm(VP8DSPContext *c);
 void ff_vp8dsp_init_altivec(VP8DSPContext *c);
 
 #endif /* AVCODEC_VP8DSP_H */
-- 
1.7.1




More information about the ffmpeg-devel mailing list