[FFmpeg-cvslog] avcodec/h264: enable sse2 chroma deblock/loop filter functions

James Darnley git at videolan.org
Mon Feb 27 14:09:36 EET 2017


ffmpeg | branch: h264_assembly | James Darnley <jdarnley at obe.tv> | Wed Feb 22 01:17:06 2017 +0100| [4deaf9e78a45e7d2ee9f8190edd16f3217c16d88] | committer: James Darnley

avcodec/h264: enable sse2 chroma deblock/loop filter functions

Between 1.00 and 1.16 times faster on Intel Yorkfield Core 2 Quad.
Between 1.11 and 1.39 times faster on Intel Kaby Lake Pentium.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4deaf9e78a45e7d2ee9f8190edd16f3217c16d88
---

 libavcodec/x86/h264_deblock.asm |  1 +
 libavcodec/x86/h264dsp_init.c   | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 32aa3d3..6702ae9 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -1252,6 +1252,7 @@ RET
 
 %endmacro ; DEBLOCK_CHROMA_XMM
 
+DEBLOCK_CHROMA_XMM sse2
 DEBLOCK_CHROMA_XMM avx
 
 ;-----------------------------------------------------------------------------
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index 51082e8..0643b37 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -304,6 +304,16 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
 #if ARCH_X86_64
             c->h264_h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_sse2;
 #endif
+
+            c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_8_sse2;
+            c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_sse2;
+            if (chroma_format_idc <= 1) {
+                c->h264_h_loop_filter_chroma       = ff_deblock_h_chroma_8_sse2;
+                c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_sse2;
+            } else {
+                c->h264_h_loop_filter_chroma       = ff_deblock_h_chroma422_8_sse2;
+                c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_sse2;
+            }
         }
         if (EXTERNAL_SSSE3(cpu_flags)) {
             c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;



More information about the ffmpeg-cvslog mailing list