[FFmpeg-cvslog] adler32: rewrite using integer SIMD.

Michael Niedermayer git at videolan.org
Sat Feb 4 20:26:03 CET 2012


ffmpeg | branch: master | Michael Niedermayer <michaelni at gmx.at> | Sat Feb  4 07:52:31 2012 +0100| [26585d2a7f5f13bec3ed8623119bf625fdb728f4] | committer: Michael Niedermayer

adler32: rewrite using integer SIMD.

about twice as fast as before.
the not CONFIG_SMALL case is also droped as it is not faster than the
CONFIG_SMALL case.

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=26585d2a7f5f13bec3ed8623119bf625fdb728f4
---

 libavutil/adler32.c |   41 +++++++++++++++++++++++++++++++----------
 1 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/libavutil/adler32.c b/libavutil/adler32.c
index 571242e..c5f96db 100644
--- a/libavutil/adler32.c
+++ b/libavutil/adler32.c
@@ -24,6 +24,7 @@
 #include "config.h"
 #include "adler32.h"
 #include "common.h"
+#include "intreadwrite.h"
 
 #define BASE 65521L /* largest prime smaller than 65536 */
 
@@ -38,22 +39,42 @@ unsigned long av_adler32_update(unsigned long adler, const uint8_t * buf,
     unsigned long s2 = adler >> 16;
 
     while (len > 0) {
-        unsigned len2 = FFMIN((len-1) & ~15, 2048);
+#if HAVE_FAST_64BIT && HAVE_FAST_UNALIGNED && !CONFIG_SMALL
+        unsigned len2 = FFMIN((len-1) & ~7, 23*8);
         if (len2) {
+            uint64_t a1= 0;
+            uint64_t a2= 0;
+            uint64_t b1= 0;
+            uint64_t b2= 0;
             len -= len2;
-
-#if CONFIG_SMALL
-        while (len2 >= 4) {
-            DO4(buf);
-            len2 -= 4;
+            s2 += s1*len2;
+            while (len2 >= 8) {
+                uint64_t v = AV_RN64(buf);
+                a2 += a1;
+                b2 += b1;
+                a1 +=  v    &0x00FF00FF00FF00FF;
+                b1 += (v>>8)&0x00FF00FF00FF00FF;
+                len2 -= 8;
+                buf+=8;
+            }
+            s1 += ((a1+b1)*0x1000100010001)>>48;
+            s2 += ((((a2&0xFFFF0000FFFF)+(b2&0xFFFF0000FFFF)+((a2>>16)&0xFFFF0000FFFF)+((b2>>16)&0xFFFF0000FFFF))*0x800000008)>>32)
+#if HAVE_BIGENDIAN
+                 + 2*((b1*0x1000200030004)>>48)
+                 +   ((a1*0x1000100010001)>>48)
+                 + 2*((a1*0x0000100020003)>>48);
+#else
+                 + 2*((a1*0x4000300020001)>>48)
+                 +   ((b1*0x1000100010001)>>48)
+                 + 2*((b1*0x3000200010000)>>48);
+#endif
         }
 #else
-        while (len2 >= 16) {
-            DO16(buf);
-            len2 -= 16;
+        while (len > 4  && s2 < (1U << 31)) {
+            DO4(buf);
+            len -= 4;
         }
 #endif
-        }
         DO1(buf); len--;
         s1 %= BASE;
         s2 %= BASE;



More information about the ffmpeg-cvslog mailing list