[FFmpeg-cvslog] vf_blend: Use integers for divide mode

Timothy Gu git at videolan.org
Sun Feb 14 18:12:45 CET 2016


ffmpeg | branch: master | Timothy Gu <timothygu99 at gmail.com> | Sun Feb 14 02:15:18 2016 +0000| [a678d667816a86b7e5f8c4b558f8ed333bb98841] | committer: Timothy Gu

vf_blend: Use integers for divide mode

2.5x faster for 8-bit mode without autovectorization in GCC, 2x
slower with it on x86. However, since the platforms we enable GCC
autovectorization on most probably has support for SSE2
optimization (added in the subsequent commit), this commit should
in general do good.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a678d667816a86b7e5f8c4b558f8ed333bb98841
---

 libavfilter/vf_blend.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavfilter/vf_blend.c b/libavfilter/vf_blend.c
index 4b4d435..61aa17e 100644
--- a/libavfilter/vf_blend.c
+++ b/libavfilter/vf_blend.c
@@ -247,7 +247,7 @@ DEFINE_BLEND8(hardlight,  (B < 128) ? MULTIPLY(2, B, A) : SCREEN(2, B, A))
 DEFINE_BLEND8(hardmix,    (A < (255 - B)) ? 0: 255)
 DEFINE_BLEND8(darken,     FFMIN(A, B))
 DEFINE_BLEND8(lighten,    FFMAX(A, B))
-DEFINE_BLEND8(divide,     av_clip_uint8(((float)A / ((float)B) * 255)))
+DEFINE_BLEND8(divide,     av_clip_uint8(B == 0 ? 255 : 255 * A / B))
 DEFINE_BLEND8(dodge,      DODGE(A, B))
 DEFINE_BLEND8(burn,       BURN(A, B))
 DEFINE_BLEND8(softlight,  (A > 127) ? B + (255 - B) * (A - 127.5) / 127.5 * (0.5 - fabs(B - 127.5) / 255): B - B * ((127.5 - A) / 127.5) * (0.5 - fabs(B - 127.5)/255))
@@ -287,7 +287,7 @@ DEFINE_BLEND16(hardlight,  (B < 32768) ? MULTIPLY(2, B, A) : SCREEN(2, B, A))
 DEFINE_BLEND16(hardmix,    (A < (65535 - B)) ? 0: 65535)
 DEFINE_BLEND16(darken,     FFMIN(A, B))
 DEFINE_BLEND16(lighten,    FFMAX(A, B))
-DEFINE_BLEND16(divide,     av_clip_uint16(((float)A / ((float)B) * 65535)))
+DEFINE_BLEND16(divide,     av_clip_uint16(B == 0 ? 65535 : 65535 * A / B))
 DEFINE_BLEND16(dodge,      DODGE(A, B))
 DEFINE_BLEND16(burn,       BURN(A, B))
 DEFINE_BLEND16(softlight,  (A > 32767) ? B + (65535 - B) * (A - 32767.5) / 32767.5 * (0.5 - fabs(B - 32767.5) / 65535): B - B * ((32767.5 - A) / 32767.5) * (0.5 - fabs(B - 32767.5)/65535))



More information about the ffmpeg-cvslog mailing list