[FFmpeg-cvslog] arm/aarch64: vp9lpf: Calculate !hev directly
Martin Storsjö
git at videolan.org
Thu Sep 28 02:52:57 EEST 2017
ffmpeg | branch: master | Martin Storsjö <martin at martin.st> | Thu Jan 12 16:52:33 2017 +0200| [e1f9de86f454861b69b199ad801adc2ec6c3b220] | committer: Martin Storsjö
arm/aarch64: vp9lpf: Calculate !hev directly
Previously we first calculated hev, and then negated it.
Since we were able to schedule the negation in the middle
of another calculation, we don't see any gain in all cases.
Before: Cortex A7 A8 A9 A53 A53/AArch64
vp9_loop_filter_v_4_8_neon: 147.0 129.0 115.8 89.0 88.7
vp9_loop_filter_v_8_8_neon: 242.0 198.5 174.7 140.0 136.7
vp9_loop_filter_v_16_8_neon: 500.0 419.5 382.7 293.0 275.7
vp9_loop_filter_v_16_16_neon: 971.2 825.5 731.5 579.0 453.0
After:
vp9_loop_filter_v_4_8_neon: 143.0 127.7 114.8 88.0 87.7
vp9_loop_filter_v_8_8_neon: 241.0 197.2 173.7 140.0 136.7
vp9_loop_filter_v_16_8_neon: 497.0 419.5 379.7 293.0 275.7
vp9_loop_filter_v_16_16_neon: 965.2 818.7 731.4 579.0 452.0
Signed-off-by: Martin Storsjö <martin at martin.st>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e1f9de86f454861b69b199ad801adc2ec6c3b220
---
libavcodec/aarch64/vp9lpf_neon.S | 5 ++---
libavcodec/arm/vp9lpf_neon.S | 5 ++---
2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/libavcodec/aarch64/vp9lpf_neon.S b/libavcodec/aarch64/vp9lpf_neon.S
index e9c7d9edc8..3b8e6ebc99 100644
--- a/libavcodec/aarch64/vp9lpf_neon.S
+++ b/libavcodec/aarch64/vp9lpf_neon.S
@@ -292,7 +292,7 @@
.if \mix != 0
sxtl v1.8h, v1.8b
.endif
- cmhi v5\sz, v5\sz, v3\sz // hev
+ cmhs v5\sz, v3\sz, v5\sz // !hev
.if \wd == 8
// If a 4/8 or 8/4 mix is used, clear the relevant half of v6
.if \mix != 0
@@ -306,11 +306,10 @@
.elseif \wd == 8
bic v4\sz, v4\sz, v6\sz // fm && !flat8in
.endif
- mvn v5\sz, v5\sz // !hev
+ and v5\sz, v5\sz, v4\sz // !hev && fm && !flat8in
.if \wd == 16
and v7\sz, v7\sz, v6\sz // flat8out && flat8in && fm
.endif
- and v5\sz, v5\sz, v4\sz // !hev && fm && !flat8in
mul_sz \tmp3\().8h, \tmp4\().8h, \tmp3\().8h, \tmp4\().8h, \tmp5\().8h, \tmp5\().8h, \sz // 3 * (q0 - p0)
bic \tmp1\sz, \tmp1\sz, v5\sz // if (!hev) av_clip_int8 = 0
diff --git a/libavcodec/arm/vp9lpf_neon.S b/libavcodec/arm/vp9lpf_neon.S
index fbf2901f75..c57c0e9c31 100644
--- a/libavcodec/arm/vp9lpf_neon.S
+++ b/libavcodec/arm/vp9lpf_neon.S
@@ -141,7 +141,7 @@
.if \wd == 8
vcle.u8 d6, d6, d0 @ flat8in
.endif
- vcgt.u8 d5, d5, d3 @ hev
+ vcle.u8 d5, d5, d3 @ !hev
.if \wd == 8
vand d6, d6, d4 @ flat8in && fm
.endif
@@ -151,11 +151,10 @@
.elseif \wd == 8
vbic d4, d4, d6 @ fm && !flat8in
.endif
- vmvn d5, d5 @ !hev
+ vand d5, d5, d4 @ !hev && fm && !flat8in
.if \wd == 16
vand d7, d7, d6 @ flat8out && flat8in && fm
.endif
- vand d5, d5, d4 @ !hev && fm && !flat8in
vmul.s16 \tmpq2, \tmpq2, \tmpq3 @ 3 * (q0 - p0)
vbic \tmp1, \tmp1, d5 @ if (!hev) av_clip_int8 = 0
More information about the ffmpeg-cvslog
mailing list