[FFmpeg-cvslog] avcodec: optimize mathops for Loongson-3 v1
ZhouXiaoyong
git at videolan.org
Mon Apr 27 02:27:44 CEST 2015
ffmpeg | branch: master | ZhouXiaoyong <zhouxiaoyong at loongson.cn> | Thu Apr 16 14:42:44 2015 +0800| [0ace686ae8543750165d423adfe9249f3ce4c235] | committer: Michael Niedermayer
avcodec: optimize mathops for Loongson-3 v1
HAVE_LOONGSON is replaced by HAVE_LOONGSON3. Even Loongson-2E and 2F support
Loongson SIMD instructs but have low performance for decoding. We plan to focus
on optimizing Loongson-3A1000, 3B1500 and 3A1500, and modify the configure file
to support Loongson-2 series later by adding HAVE_LOONGSON2.
Signed-off-by: Michael Niedermayer <michaelni at gmx.at>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0ace686ae8543750165d423adfe9249f3ce4c235
---
libavcodec/mathops.h | 3 ++
libavcodec/mips/mathops.h | 78 ++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 73 insertions(+), 8 deletions(-)
diff --git a/libavcodec/mathops.h b/libavcodec/mathops.h
index 87d110b..46283ca 100644
--- a/libavcodec/mathops.h
+++ b/libavcodec/mathops.h
@@ -211,6 +211,8 @@ if ((y) < (x)) {\
# define FASTDIV(a,b) ((uint32_t)((((uint64_t)a) * ff_inverse[b]) >> 32))
#endif /* FASTDIV */
+#ifndef ff_sqrt
+#define ff_sqrt ff_sqrt
static inline av_const unsigned int ff_sqrt(unsigned int a)
{
unsigned int b;
@@ -230,6 +232,7 @@ static inline av_const unsigned int ff_sqrt(unsigned int a)
return b - (a < b * b);
}
+#endif
static inline int8_t ff_u8_to_s8(uint8_t a)
{
diff --git a/libavcodec/mips/mathops.h b/libavcodec/mips/mathops.h
index 5673fc0..cdc7705 100644
--- a/libavcodec/mips/mathops.h
+++ b/libavcodec/mips/mathops.h
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2009 Mans Rullgard <mans at mansr.com>
+ * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong at loongson.cn>
*
* This file is part of FFmpeg.
*
@@ -27,14 +28,73 @@
#if HAVE_INLINE_ASM
-#if HAVE_LOONGSON
+#if HAVE_LOONGSON3
+
+#define MULH MULH
+static inline av_const int MULH(int a, int b)
+{
+ int c;
+ __asm__ ("dmult %1, %2 \n\t"
+ "mflo %0 \n\t"
+ "dsrl %0, %0, 32 \n\t"
+ : "=r"(c)
+ : "r"(a),"r"(b)
+ : "hi", "lo");
+ return c;
+}
+
+#define UMULH UMULH
+static inline av_const unsigned UMULH(unsigned a, unsigned b)
+{
+ unsigned c;
+ __asm__ ("dmultu %1, %2 \n\t"
+ "mflo %0 \n\t"
+ "dsrl %0, %0, 32 \n\t"
+ : "=r"(c)
+ : "r"(a),"r"(b)
+ : "hi", "lo");
+ return c;
+}
+
+#define mid_pred mid_pred
+static inline av_const int mid_pred(int a, int b, int c)
+{
+ int t = b;
+ __asm__ ("sgt $8, %1, %2 \n\t"
+ "movn %0, %1, $8 \n\t"
+ "movn %1, %2, $8 \n\t"
+ "sgt $8, %1, %3 \n\t"
+ "movz %1, %3, $8 \n\t"
+ "sgt $8, %0, %1 \n\t"
+ "movn %0, %1, $8 \n\t"
+ : "+&r"(t),"+&r"(a)
+ : "r"(b),"r"(c)
+ : "$8");
+ return t;
+}
+
+#define ff_sqrt ff_sqrt
+static inline av_const unsigned int ff_sqrt(unsigned int a)
+{
+ unsigned int b;
+
+ __asm__ ("ctc1 %1, $f0 \n\t"
+ "sqrt.s $f2, $f0 \n\t"
+ "cvt.w.s $f0, $f2 \n\t"
+ "cfc1 %0, $f0 \n\t"
+ : "=r"(b)
+ : "r"(a));
+ return b;
+}
static inline av_const int64_t MAC64(int64_t d, int a, int b)
{
int64_t m;
- __asm__ ("dmult.g %1, %2, %3 \n\t"
- "daddu %0, %0, %1 \n\t"
- : "+r"(d), "=&r"(m) : "r"(a), "r"(b));
+ __asm__ ("dmult %2, %3 \n\t"
+ "mflo %1 \n\t"
+ "daddu %0, %0, %1 \n\t"
+ : "+r"(d), "=&r"(m) : "r"(a), "r"(b)
+ : "hi", "lo");
return d;
}
#define MAC64(d, a, b) ((d) = MAC64(d, a, b))
@@ -42,14 +102,16 @@ static inline av_const int64_t MAC64(int64_t d, int a, int b)
static inline av_const int64_t MLS64(int64_t d, int a, int b)
{
int64_t m;
- __asm__ ("dmult.g %1, %2, %3 \n\t"
- "dsubu %0, %0, %1 \n\t"
- : "+r"(d), "=&r"(m) : "r"(a), "r"(b));
+ __asm__ ("dmult %2, %3 \n\t"
+ "mflo %1 \n\t"
+ "dsubu %0, %0, %1 \n\t"
+ : "+r"(d), "=&r"(m) : "r"(a), "r"(b)
+ : "hi", "lo");
return d;
}
#define MLS64(d, a, b) ((d) = MLS64(d, a, b))
-#endif
+#endif /* HAVE_LOONGSON3 */
#endif /* HAVE_INLINE_ASM */
More information about the ffmpeg-cvslog
mailing list