[FFmpeg-cvslog] avcodec/ppc/fdctdsp: POWER LE support in ff_fdct_altivec()

Rong Yan git at videolan.org
Fri Apr 3 18:11:41 CEST 2015


ffmpeg | branch: master | Rong Yan <rongyan236 at gmail.com> | Fri Apr  3 12:36:18 2015 +0000| [af5fec9538d88540af5bb2dd0e4b91162ce6fa13] | committer: Michael Niedermayer

avcodec/ppc/fdctdsp: POWER LE support in ff_fdct_altivec()

add marcos VEC_FMERGEH() VEC_FMERGEL()

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=af5fec9538d88540af5bb2dd0e4b91162ce6fa13
---

 libavcodec/ppc/fdctdsp.c |   86 +++++++++++++++++++++++++---------------------
 1 file changed, 47 insertions(+), 39 deletions(-)

diff --git a/libavcodec/ppc/fdctdsp.c b/libavcodec/ppc/fdctdsp.c
index 40f4c6c..80f1366 100644
--- a/libavcodec/ppc/fdctdsp.c
+++ b/libavcodec/ppc/fdctdsp.c
@@ -78,6 +78,14 @@ static const vector float fdctconsts[3] = {
 #define LD_WA vec_splat(cnsts2, 2)
 #define LD_WB vec_splat(cnsts2, 3)
 
+#if HAVE_BIGENDIAN
+#define VEC_FMERGEH(a, b) vec_mergeh(a, b)
+#define VEC_FMERGEL(a, b) vec_mergel(a, b)
+#else
+#define VEC_FMERGEH(a, b) vec_mergel(b, a)
+#define VEC_FMERGEL(a, b) vec_mergeh(b, a)
+#endif
+
 #define FDCTROW(b0, b1, b2, b3, b4, b5, b6, b7) /* {{{ */           \
     x0 = vec_add(b0, b7);               /* x0 = b0 + b7; */         \
     x7 = vec_sub(b0, b7);               /* x7 = b0 - b7; */         \
@@ -385,45 +393,45 @@ void ff_fdct_altivec(int16_t *block)
     /* }}} */
 
     /* 8x8 matrix transpose (vector float[8][2]) {{{ */
-    x0 = vec_mergel(b00, b20);
-    x1 = vec_mergeh(b00, b20);
-    x2 = vec_mergel(b10, b30);
-    x3 = vec_mergeh(b10, b30);
-
-    b00 = vec_mergeh(x1, x3);
-    b10 = vec_mergel(x1, x3);
-    b20 = vec_mergeh(x0, x2);
-    b30 = vec_mergel(x0, x2);
-
-    x4 = vec_mergel(b41, b61);
-    x5 = vec_mergeh(b41, b61);
-    x6 = vec_mergel(b51, b71);
-    x7 = vec_mergeh(b51, b71);
-
-    b41 = vec_mergeh(x5, x7);
-    b51 = vec_mergel(x5, x7);
-    b61 = vec_mergeh(x4, x6);
-    b71 = vec_mergel(x4, x6);
-
-    x0 = vec_mergel(b01, b21);
-    x1 = vec_mergeh(b01, b21);
-    x2 = vec_mergel(b11, b31);
-    x3 = vec_mergeh(b11, b31);
-
-    x4 = vec_mergel(b40, b60);
-    x5 = vec_mergeh(b40, b60);
-    x6 = vec_mergel(b50, b70);
-    x7 = vec_mergeh(b50, b70);
-
-    b40 = vec_mergeh(x1, x3);
-    b50 = vec_mergel(x1, x3);
-    b60 = vec_mergeh(x0, x2);
-    b70 = vec_mergel(x0, x2);
-
-    b01 = vec_mergeh(x5, x7);
-    b11 = vec_mergel(x5, x7);
-    b21 = vec_mergeh(x4, x6);
-    b31 = vec_mergel(x4, x6);
+    x0 = VEC_FMERGEL(b00, b20);
+    x1 = VEC_FMERGEH(b00, b20);
+    x2 = VEC_FMERGEL(b10, b30);
+    x3 = VEC_FMERGEH(b10, b30);
+
+    b00 = VEC_FMERGEH(x1, x3);
+    b10 = VEC_FMERGEL(x1, x3);
+    b20 = VEC_FMERGEH(x0, x2);
+    b30 = VEC_FMERGEL(x0, x2);
+
+    x4 = VEC_FMERGEL(b41, b61);
+    x5 = VEC_FMERGEH(b41, b61);
+    x6 = VEC_FMERGEL(b51, b71);
+    x7 = VEC_FMERGEH(b51, b71);
+
+    b41 = VEC_FMERGEH(x5, x7);
+    b51 = VEC_FMERGEL(x5, x7);
+    b61 = VEC_FMERGEH(x4, x6);
+    b71 = VEC_FMERGEL(x4, x6);
+
+    x0 = VEC_FMERGEL(b01, b21);
+    x1 = VEC_FMERGEH(b01, b21);
+    x2 = VEC_FMERGEL(b11, b31);
+    x3 = VEC_FMERGEH(b11, b31);
+
+    x4 = VEC_FMERGEL(b40, b60);
+    x5 = VEC_FMERGEH(b40, b60);
+    x6 = VEC_FMERGEL(b50, b70);
+    x7 = VEC_FMERGEH(b50, b70);
+
+    b40 = VEC_FMERGEH(x1, x3);
+    b50 = VEC_FMERGEL(x1, x3);
+    b60 = VEC_FMERGEH(x0, x2);
+    b70 = VEC_FMERGEL(x0, x2);
+
+    b01 = VEC_FMERGEH(x5, x7);
+    b11 = VEC_FMERGEL(x5, x7);
+    b21 = VEC_FMERGEH(x4, x6);
+    b31 = VEC_FMERGEL(x4, x6);
     /* }}} */
 
     FDCTCOL(b00, b10, b20, b30, b40, b50, b60, b70);



More information about the ffmpeg-cvslog mailing list