[FFmpeg-cvslog] x86: hevc_mc: save 1 gpr in epel filter loading

Christophe Gisquet git at videolan.org
Mon Feb 16 22:19:41 CET 2015


ffmpeg | branch: master | Christophe Gisquet <christophe.gisquet at gmail.com> | Sat Feb  7 18:49:38 2015 +0000| [89cb4995fa0cc9375d9bb246512a82037ffeedc1] | committer: Michael Niedermayer

x86: hevc_mc: save 1 gpr in epel filter loading

The 3*stride value stored in r3src can be loaded much later,
so use r3src instead of a dedicated gpr when possible.

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=89cb4995fa0cc9375d9bb246512a82037ffeedc1
---

 libavcodec/x86/hevc_mc.asm |   71 ++++++++++++++++++++++----------------------
 1 file changed, 35 insertions(+), 36 deletions(-)

diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm
index 9a7367a..6ef8a60 100644
--- a/libavcodec/x86/hevc_mc.asm
+++ b/libavcodec/x86/hevc_mc.asm
@@ -136,20 +136,22 @@ QPEL_TABLE 10, 8, w, avx2
 %endmacro
 
 
-%macro EPEL_FILTER 2-4                            ; bit depth, filter index
+%macro EPEL_FILTER 5 ; bit depth, filter index, xmma, xmmb, gprtmp
 %if cpuflag(avx2)
 %assign %%offset 32
 %ifdef PIC
-    lea         rfilterq, [hevc_epel_filters_avx2_%1]
+    lea              %5q, [hevc_epel_filters_avx2_%1]
+    %define FILTER %5q
 %else
-    %define rfilterq hevc_epel_filters_avx2_%1
+    %define FILTER hevc_epel_filters_avx2_%1
 %endif
 %else
 %assign %%offset 16
 %ifdef PIC
-    lea         rfilterq, [hevc_epel_filters_sse4_%1]
+    lea              %5q, [hevc_epel_filters_sse4_%1]
+    %define FILTER %5q
 %else
-    %define rfilterq hevc_epel_filters_sse4_%1
+    %define FILTER hevc_epel_filters_sse4_%1
 %endif
 %endif ;cpuflag(avx2)
     sub              %2q, 1
@@ -158,13 +160,8 @@ QPEL_TABLE 10, 8, w, avx2
   %else
     shl              %2q, 5                      ; multiply by 32
 %endif
-%if %0 == 2
-    mova           m14, [rfilterq + %2q]        ; get 2 first values of filters
-    mova           m15, [rfilterq + %2q+%%offset]     ; get 2 last values of filters
-%else
-    mova           %3, [rfilterq + %2q]        ; get 2 first values of filters
-    mova           %4, [rfilterq + %2q+%%offset]     ; get 2 last values of filters
-%endif
+    mova           %3, [FILTER + %2q]        ; get 2 first values of filters
+    mova           %4, [FILTER + %2q+%%offset]     ; get 2 last values of filters
 %endmacro
 
 %macro EPEL_HV_FILTER 1
@@ -179,17 +176,17 @@ QPEL_TABLE 10, 8, w, avx2
 %endif
 
 %ifdef PIC
-    lea         rfilterq, [%%table]
+    lea           r3srcq, [%%table]
+    %define FILTER r3srcq
 %else
-    %define rfilterq %%table
+    %define FILTER %%table
 %endif
     sub              mxq, 1
     sub              myq, 1
     shl              mxq, %%shift                ; multiply by 32
     shl              myq, %%shift                ; multiply by 32
-    mova             m14, [rfilterq + mxq]        ; get 2 first values of filters
-    mova             m15, [rfilterq + mxq+%%offset]     ; get 2 last values of filters
-    lea           r3srcq, [srcstrideq*3]
+    mova             m14, [FILTER + mxq]        ; get 2 first values of filters
+    mova             m15, [FILTER + mxq+%%offset]     ; get 2 last values of filters
 
 %if cpuflag(avx2)
 %define %%table  hevc_epel_filters_avx2_10
@@ -197,12 +194,14 @@ QPEL_TABLE 10, 8, w, avx2
 %define %%table  hevc_epel_filters_sse4_10
 %endif
 %ifdef PIC
-    lea         rfilterq, [%%table]
+    lea           r3srcq, [%%table]
+    %define FILTER r3srcq
 %else
-    %define rfilterq %%table
+    %define FILTER %%table
 %endif
-    mova             m12, [rfilterq + myq]        ; get 2 first values of filters
-    mova             m13, [rfilterq + myq+%%offset]     ; get 2 last values of filters
+    mova             m12, [FILTER + myq]        ; get 2 first values of filters
+    mova             m13, [FILTER + myq+%%offset]     ; get 2 last values of filters
+    lea           r3srcq, [srcstrideq*3]
 %endmacro
 
 %macro QPEL_FILTER 2
@@ -733,7 +732,7 @@ cglobal hevc_put_hevc_bi_pel_pixels%1_%2, 6, 6, 6, dst, dststride, src, srcstrid
 %macro HEVC_PUT_HEVC_EPEL 2
 cglobal hevc_put_hevc_epel_h%1_%2, 5, 6, 11, dst, src, srcstride, height, mx, rfilter
 %assign %%stride ((%2 + 7)/8)
-    EPEL_FILTER       %2, mx, m4, m5
+    EPEL_FILTER       %2, mx, m4, m5, rfilter
 .loop
     EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
     EPEL_COMPUTE      %2, %1, m4, m5, 1
@@ -744,7 +743,7 @@ cglobal hevc_put_hevc_epel_h%1_%2, 5, 6, 11, dst, src, srcstride, height, mx, rf
 cglobal hevc_put_hevc_uni_epel_h%1_%2, 6, 7, 11, dst, dststride, src, srcstride, height, mx, rfilter
 %assign %%stride ((%2 + 7)/8)
     movdqa            m6, [pw_%2]
-    EPEL_FILTER       %2, mx, m4, m5
+    EPEL_FILTER       %2, mx, m4, m5, rfilter
 .loop
     EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
     EPEL_COMPUTE      %2, %1, m4, m5
@@ -758,7 +757,7 @@ cglobal hevc_put_hevc_uni_epel_h%1_%2, 6, 7, 11, dst, dststride, src, srcstride,
 
 cglobal hevc_put_hevc_bi_epel_h%1_%2, 7, 8, 11, dst, dststride, src, srcstride, src2, height, mx, rfilter
     movdqa            m6, [pw_bi_%2]
-    EPEL_FILTER       %2, mx, m4, m5
+    EPEL_FILTER       %2, mx, m4, m5, rfilter
 .loop
     EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
     EPEL_COMPUTE      %2, %1, m4, m5, 1
@@ -778,11 +777,11 @@ cglobal hevc_put_hevc_bi_epel_h%1_%2, 7, 8, 11, dst, dststride, src, srcstride,
 ;                      int height, int mx, int my, int width)
 ; ******************************
 
-cglobal hevc_put_hevc_epel_v%1_%2, 4, 7, 11, dst, src, srcstride, height, r3src, my, rfilter
+cglobal hevc_put_hevc_epel_v%1_%2, 4, 6, 11, dst, src, srcstride, height, r3src, my
     movifnidn        myd, mym
-    lea           r3srcq, [srcstrideq*3]
     sub             srcq, srcstrideq
-    EPEL_FILTER       %2, my, m4, m5
+    EPEL_FILTER       %2, my, m4, m5, r3src
+    lea           r3srcq, [srcstrideq*3]
 .loop
     EPEL_LOAD         %2, srcq, srcstride, %1
     EPEL_COMPUTE      %2, %1, m4, m5, 1
@@ -790,12 +789,12 @@ cglobal hevc_put_hevc_epel_v%1_%2, 4, 7, 11, dst, src, srcstride, height, r3src,
     LOOP_END          dst, src, srcstride
     RET
 
-cglobal hevc_put_hevc_uni_epel_v%1_%2, 5, 8, 11, dst, dststride, src, srcstride, height, r3src, my, rfilter
+cglobal hevc_put_hevc_uni_epel_v%1_%2, 5, 7, 11, dst, dststride, src, srcstride, height, r3src, my
     movifnidn        myd, mym
-    lea           r3srcq, [srcstrideq*3]
     movdqa            m6, [pw_%2]
     sub             srcq, srcstrideq
-    EPEL_FILTER       %2, my, m4, m5
+    EPEL_FILTER       %2, my, m4, m5, r3src
+    lea           r3srcq, [srcstrideq*3]
 .loop
     EPEL_LOAD         %2, srcq, srcstride, %1
     EPEL_COMPUTE      %2, %1, m4, m5
@@ -808,12 +807,12 @@ cglobal hevc_put_hevc_uni_epel_v%1_%2, 5, 8, 11, dst, dststride, src, srcstride,
     RET
 
 
-cglobal hevc_put_hevc_bi_epel_v%1_%2, 6, 9, 11, dst, dststride, src, srcstride, src2, height, r3src, my, rfilter
+cglobal hevc_put_hevc_bi_epel_v%1_%2, 6, 8, 11, dst, dststride, src, srcstride, src2, height, r3src, my
     movifnidn        myd, mym
-    lea           r3srcq, [srcstrideq*3]
     movdqa            m6, [pw_bi_%2]
     sub             srcq, srcstrideq
-    EPEL_FILTER       %2, my, m4, m5
+    EPEL_FILTER       %2, my, m4, m5, r3src
+    lea           r3srcq, [srcstrideq*3]
 .loop
     EPEL_LOAD         %2, srcq, srcstride, %1
     EPEL_COMPUTE      %2, %1, m4, m5, 1
@@ -836,7 +835,7 @@ cglobal hevc_put_hevc_bi_epel_v%1_%2, 6, 9, 11, dst, dststride, src, srcstride,
 ; ******************************
 
 %macro HEVC_PUT_HEVC_EPEL_HV 2
-cglobal hevc_put_hevc_epel_hv%1_%2, 6, 8, 16 , dst, src, srcstride, height, mx, my, r3src, rfilter
+cglobal hevc_put_hevc_epel_hv%1_%2, 6, 7, 16 , dst, src, srcstride, height, mx, my, r3src
 %assign %%stride ((%2 + 7)/8)
     sub             srcq, srcstrideq
     EPEL_HV_FILTER    %2
@@ -902,7 +901,7 @@ cglobal hevc_put_hevc_epel_hv%1_%2, 6, 8, 16 , dst, src, srcstride, height, mx,
     LOOP_END         dst, src, srcstride
     RET
 
-cglobal hevc_put_hevc_uni_epel_hv%1_%2, 7, 9, 16 , dst, dststride, src, srcstride, height, mx, my, r3src, rfilter
+cglobal hevc_put_hevc_uni_epel_hv%1_%2, 7, 8, 16 , dst, dststride, src, srcstride, height, mx, my, r3src
 %assign %%stride ((%2 + 7)/8)
     sub             srcq, srcstrideq
     EPEL_HV_FILTER    %2
@@ -966,7 +965,7 @@ cglobal hevc_put_hevc_uni_epel_hv%1_%2, 7, 9, 16 , dst, dststride, src, srcstrid
     jnz               .loop                      ; height loop
     RET
 
-cglobal hevc_put_hevc_bi_epel_hv%1_%2, 8, 10, 16, dst, dststride, src, srcstride, src2, height, mx, my, r3src, rfilter
+cglobal hevc_put_hevc_bi_epel_hv%1_%2, 8, 9, 16, dst, dststride, src, srcstride, src2, height, mx, my, r3src
 %assign %%stride ((%2 + 7)/8)
     sub             srcq, srcstrideq
     EPEL_HV_FILTER    %2



More information about the ffmpeg-cvslog mailing list