[FFmpeg-devel] [PATCH] x86/hevc_sao: fix loading of RIP address

James Almer jamrial at gmail.com
Fri Feb 6 17:54:24 CET 2015


pb_eo must be handled as a rip relative address for MSVC64, so an
intermediate register is needed. Should fix link failures.

Suggested by Hendrik Leppkes and Christophe Gisquet.

Signed-off-by: James Almer <jamrial at gmail.com>
---
 libavcodec/x86/hevc_sao.asm | 42 +++++++++---------------------------------
 1 file changed, 9 insertions(+), 33 deletions(-)

diff --git a/libavcodec/x86/hevc_sao.asm b/libavcodec/x86/hevc_sao.asm
index 6058967..f4eca0c 100644
--- a/libavcodec/x86/hevc_sao.asm
+++ b/libavcodec/x86/hevc_sao.asm
@@ -293,24 +293,14 @@ HEVC_SAO_BAND_FILTER_16 12, 64, 2
 ;void ff_hevc_sao_edge_filter_<width>_8_<opt>(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
 ;                                             int eo, int width, int height);
 %macro HEVC_SAO_EDGE_FILTER_8 2-3
+%if ARCH_X86_64
+cglobal hevc_sao_edge_filter_%1_8, 4, 9, 8, dst, src, dststride, offset, eo, a_stride, b_stride, height, tmp
+%define tmp2q heightq
 %if WIN64
-cglobal hevc_sao_edge_filter_%1_8, 4, 8, 8, dst, src, dststride, offset, a_stride, b_stride, height, tmp
-%define  eoq heightq
     movsxd           eoq, dword r4m
-    movsx      a_strideq, byte [pb_eo+eoq*4+1]
-    movsx      b_strideq, byte [pb_eo+eoq*4+3]
-    imul       a_strideq, EDGE_SRCSTRIDE
-    imul       b_strideq, EDGE_SRCSTRIDE
-    movsx           tmpq, byte [pb_eo+eoq*4]
-    add        a_strideq, tmpq
-    movsx           tmpq, byte [pb_eo+eoq*4+2]
-    add        b_strideq, tmpq
-    mov          heightd, r6m
-
-%elif ARCH_X86_64
-cglobal hevc_sao_edge_filter_%1_8, 5, 9, 8, dst, src, dststride, offset, eo, a_stride, b_stride, height, tmp
-%define tmp2q heightq
+%else
     movsxd           eoq, eod
+%endif
     lea            tmp2q, [pb_eo]
     movsx      a_strideq, byte [tmp2q+eoq*4+1]
     movsx      b_strideq, byte [tmp2q+eoq*4+3]
@@ -439,26 +429,13 @@ INIT_YMM cpuname
 ;void ff_hevc_sao_edge_filter_<width>_<depth>_<opt>(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
 ;                                                   int eo, int width, int height);
 %macro HEVC_SAO_EDGE_FILTER_16 3
+cglobal hevc_sao_edge_filter_%2_%1, 4, 9, 16, dst, src, dststride, offset, eo, a_stride, b_stride, height, tmp
+%define tmp2q heightq
 %if WIN64
-cglobal hevc_sao_edge_filter_%2_%1, 4, 8, 16, dst, src, dststride, offset, a_stride, b_stride, height, tmp
-%define  eoq heightq
     movsxd           eoq, dword r4m
-    movsx      a_strideq, byte [pb_eo+eoq*4+1]
-    movsx      b_strideq, byte [pb_eo+eoq*4+3]
-    imul       a_strideq, EDGE_SRCSTRIDE>>1
-    imul       b_strideq, EDGE_SRCSTRIDE>>1
-    movsx           tmpq, byte [pb_eo+eoq*4]
-    add        a_strideq, tmpq
-    movsx           tmpq, byte [pb_eo+eoq*4+2]
-    add        b_strideq, tmpq
-    mov          heightd, r6m
-    add        a_strideq, a_strideq
-    add        b_strideq, b_strideq
-
-%else ; UNIX64
-cglobal hevc_sao_edge_filter_%2_%1, 5, 9, 16, dst, src, dststride, offset, eo, a_stride, b_stride, height, tmp
-%define tmp2q heightq
+%else
     movsxd           eoq, eod
+%endif
     lea            tmp2q, [pb_eo]
     movsx      a_strideq, byte [tmp2q+eoq*4+1]
     movsx      b_strideq, byte [tmp2q+eoq*4+3]
@@ -471,7 +448,6 @@ cglobal hevc_sao_edge_filter_%2_%1, 5, 9, 16, dst, src, dststride, offset, eo, a
     mov          heightd, r6m
     add        a_strideq, a_strideq
     add        b_strideq, b_strideq
-%endif ; ARCH
 
 %if cpuflag(avx2)
     SPLATW            m8, [offsetq+2]
-- 
2.2.2



More information about the ffmpeg-devel mailing list