[FFmpeg-devel] [PATCH 3/5] x86: hevc_mc: save 1 gpr in epel filter loading

Christophe Gisquet christophe.gisquet at gmail.com
Sat Feb 7 19:49:38 CET 2015


The 3*stride value stored in r3src can be loaded much later,
so use r3src instead of a dedicated gpr when possible.
---
 libavcodec/x86/hevc_mc.asm | 65 ++++++++++++++++++++++------------------------
 1 file changed, 31 insertions(+), 34 deletions(-)

diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm
index aab69dd..74e08d4 100644
--- a/libavcodec/x86/hevc_mc.asm
+++ b/libavcodec/x86/hevc_mc.asm
@@ -136,20 +136,22 @@ QPEL_TABLE 10, 8, w, avx2
 %endmacro
 
 
-%macro EPEL_FILTER 2-4                            ; bit depth, filter index
+%macro EPEL_FILTER 5 ; bit depth, filter index, xmma, xmmb, gprtmp
 %if cpuflag(avx2)
 %assign %%offset 32
 %ifdef PIC
-    lea         rfilterq, [hevc_epel_filters_avx2_%1]
+    lea              %5q, [hevc_epel_filters_avx2_%1]
+    %define FILTER %5q
 %else
-    %define rfilterq hevc_epel_filters_avx2_%1
+    %define FILTER hevc_epel_filters_avx2_%1
 %endif
 %else
 %assign %%offset 16
 %ifdef PIC
-    lea         rfilterq, [hevc_epel_filters_sse4_%1]
+    lea              %5q, [hevc_epel_filters_sse4_%1]
+    %define FILTER %5q
 %else
-    %define rfilterq hevc_epel_filters_sse4_%1
+    %define FILTER hevc_epel_filters_sse4_%1
 %endif
 %endif ;cpuflag(avx2)
     sub              %2q, 1
@@ -158,13 +160,8 @@ QPEL_TABLE 10, 8, w, avx2
   %else
     shl              %2q, 5                      ; multiply by 32
 %endif
-%if %0 == 2
-    mova           m14, [rfilterq + %2q]        ; get 2 first values of filters
-    mova           m15, [rfilterq + %2q+%%offset]     ; get 2 last values of filters
-%else
-    mova           %3, [rfilterq + %2q]        ; get 2 first values of filters
-    mova           %4, [rfilterq + %2q+%%offset]     ; get 2 last values of filters
-%endif
+    mova           %3, [FILTER + %2q]        ; get 2 first values of filters
+    mova           %4, [FILTER + %2q+%%offset]     ; get 2 last values of filters
 %endmacro
 
 %macro EPEL_HV_FILTER 1
@@ -179,7 +176,7 @@ QPEL_TABLE 10, 8, w, avx2
 %endif
 
 %ifdef PIC
-    lea         rfilterq, [%%table]
+    lea           r3srcq, [%%table]
 %else
     %define rfilterq %%table
 %endif
@@ -187,9 +184,8 @@ QPEL_TABLE 10, 8, w, avx2
     sub              myq, 1
     shl              mxq, %%shift                ; multiply by 32
     shl              myq, %%shift                ; multiply by 32
-    mova             m14, [rfilterq + mxq]        ; get 2 first values of filters
-    mova             m15, [rfilterq + mxq+%%offset]     ; get 2 last values of filters
-    lea           r3srcq, [srcstrideq*3]
+    mova             m14, [r3srcq + mxq]        ; get 2 first values of filters
+    mova             m15, [r3srcq + mxq+%%offset]     ; get 2 last values of filters
 
 %if cpuflag(avx2)
 %define %%table  hevc_epel_filters_avx2_10
@@ -197,12 +193,13 @@ QPEL_TABLE 10, 8, w, avx2
 %define %%table  hevc_epel_filters_sse4_10
 %endif
 %ifdef PIC
-    lea         rfilterq, [%%table]
+    lea           r3srcq, [%%table]
 %else
     %define rfilterq %%table
 %endif
-    mova             m12, [rfilterq + myq]        ; get 2 first values of filters
-    mova             m13, [rfilterq + myq+%%offset]     ; get 2 last values of filters
+    mova             m12, [r3srcq + myq]        ; get 2 first values of filters
+    mova             m13, [r3srcq + myq+%%offset]     ; get 2 last values of filters
+    lea           r3srcq, [srcstrideq*3]
 %endmacro
 
 %macro QPEL_FILTER 2
@@ -739,7 +736,7 @@ cglobal hevc_put_hevc_bi_pel_pixels%1_%2, 6, 6, 6, dst, dststride, src, srcstrid
 %macro HEVC_PUT_HEVC_EPEL 2
 cglobal hevc_put_hevc_epel_h%1_%2, 5, 6, 11, dst, src, srcstride, height, mx, rfilter
 %assign %%stride ((%2 + 7)/8)
-    EPEL_FILTER       %2, mx, m4, m5
+    EPEL_FILTER       %2, mx, m4, m5, rfilter
 .loop
     EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
     EPEL_COMPUTE      %2, %1, m4, m5, 1
@@ -750,7 +747,7 @@ cglobal hevc_put_hevc_epel_h%1_%2, 5, 6, 11, dst, src, srcstride, height, mx, rf
 cglobal hevc_put_hevc_uni_epel_h%1_%2, 6, 7, 11, dst, dststride, src, srcstride, height, mx, rfilter
 %assign %%stride ((%2 + 7)/8)
     movdqa            m6, [pw_%2]
-    EPEL_FILTER       %2, mx, m4, m5
+    EPEL_FILTER       %2, mx, m4, m5, rfilter
 .loop
     EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
     EPEL_COMPUTE      %2, %1, m4, m5
@@ -764,7 +761,7 @@ cglobal hevc_put_hevc_uni_epel_h%1_%2, 6, 7, 11, dst, dststride, src, srcstride,
 
 cglobal hevc_put_hevc_bi_epel_h%1_%2, 7, 8, 11, dst, dststride, src, srcstride, src2, height, mx, rfilter
     movdqa            m6, [pw_bi_%2]
-    EPEL_FILTER       %2, mx, m4, m5
+    EPEL_FILTER       %2, mx, m4, m5, rfilter
 .loop
     EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
     EPEL_COMPUTE      %2, %1, m4, m5, 1
@@ -784,11 +781,11 @@ cglobal hevc_put_hevc_bi_epel_h%1_%2, 7, 8, 11, dst, dststride, src, srcstride,
 ;                      int height, int mx, int my, int width)
 ; ******************************
 
-cglobal hevc_put_hevc_epel_v%1_%2, 4, 7, 11, dst, src, srcstride, height, r3src, my, rfilter
+cglobal hevc_put_hevc_epel_v%1_%2, 4, 6, 11, dst, src, srcstride, height, r3src, my
     movifnidn        myd, mym
-    lea           r3srcq, [srcstrideq*3]
     sub             srcq, srcstrideq
-    EPEL_FILTER       %2, my, m4, m5
+    EPEL_FILTER       %2, my, m4, m5, r3src
+    lea           r3srcq, [srcstrideq*3]
 .loop
     EPEL_LOAD         %2, srcq, srcstride, %1
     EPEL_COMPUTE      %2, %1, m4, m5, 1
@@ -796,12 +793,12 @@ cglobal hevc_put_hevc_epel_v%1_%2, 4, 7, 11, dst, src, srcstride, height, r3src,
     LOOP_END          dst, src, srcstride
     RET
 
-cglobal hevc_put_hevc_uni_epel_v%1_%2, 5, 8, 11, dst, dststride, src, srcstride, height, r3src, my, rfilter
+cglobal hevc_put_hevc_uni_epel_v%1_%2, 5, 7, 11, dst, dststride, src, srcstride, height, r3src, my
     movifnidn        myd, mym
-    lea           r3srcq, [srcstrideq*3]
     movdqa            m6, [pw_%2]
     sub             srcq, srcstrideq
-    EPEL_FILTER       %2, my, m4, m5
+    EPEL_FILTER       %2, my, m4, m5, r3src
+    lea           r3srcq, [srcstrideq*3]
 .loop
     EPEL_LOAD         %2, srcq, srcstride, %1
     EPEL_COMPUTE      %2, %1, m4, m5
@@ -814,12 +811,12 @@ cglobal hevc_put_hevc_uni_epel_v%1_%2, 5, 8, 11, dst, dststride, src, srcstride,
     RET
 
 
-cglobal hevc_put_hevc_bi_epel_v%1_%2, 6, 9, 11, dst, dststride, src, srcstride, src2, height, r3src, my, rfilter
+cglobal hevc_put_hevc_bi_epel_v%1_%2, 6, 8, 11, dst, dststride, src, srcstride, src2, height, r3src, my
     movifnidn        myd, mym
-    lea           r3srcq, [srcstrideq*3]
     movdqa            m6, [pw_bi_%2]
     sub             srcq, srcstrideq
-    EPEL_FILTER       %2, my, m4, m5
+    EPEL_FILTER       %2, my, m4, m5, r3src
+    lea           r3srcq, [srcstrideq*3]
 .loop
     EPEL_LOAD         %2, srcq, srcstride, %1
     EPEL_COMPUTE      %2, %1, m4, m5, 1
@@ -842,7 +839,7 @@ cglobal hevc_put_hevc_bi_epel_v%1_%2, 6, 9, 11, dst, dststride, src, srcstride,
 ; ******************************
 
 %macro HEVC_PUT_HEVC_EPEL_HV 2
-cglobal hevc_put_hevc_epel_hv%1_%2, 6, 8, 16 , dst, src, srcstride, height, mx, my, r3src, rfilter
+cglobal hevc_put_hevc_epel_hv%1_%2, 6, 7, 16 , dst, src, srcstride, height, mx, my, r3src
 %assign %%stride ((%2 + 7)/8)
     sub             srcq, srcstrideq
     EPEL_HV_FILTER    %2
@@ -909,7 +906,7 @@ cglobal hevc_put_hevc_epel_hv%1_%2, 6, 8, 16 , dst, src, srcstride, height, mx,
     LOOP_END         dst, src, srcstride
     RET
 
-cglobal hevc_put_hevc_uni_epel_hv%1_%2, 7, 9, 16 , dst, dststride, src, srcstride, height, mx, my, r3src, rfilter
+cglobal hevc_put_hevc_uni_epel_hv%1_%2, 7, 8, 16 , dst, dststride, src, srcstride, height, mx, my, r3src
 %assign %%stride ((%2 + 7)/8)
     sub             srcq, srcstrideq
     EPEL_HV_FILTER    %2
@@ -973,7 +970,7 @@ cglobal hevc_put_hevc_uni_epel_hv%1_%2, 7, 9, 16 , dst, dststride, src, srcstrid
     jnz               .loop                      ; height loop
     RET
 
-cglobal hevc_put_hevc_bi_epel_hv%1_%2, 8, 10, 16, dst, dststride, src, srcstride, src2, height, mx, my, r3src, rfilter
+cglobal hevc_put_hevc_bi_epel_hv%1_%2, 8, 9, 16, dst, dststride, src, srcstride, src2, height, mx, my, r3src
 %assign %%stride ((%2 + 7)/8)
     sub             srcq, srcstrideq
     EPEL_HV_FILTER    %2
-- 
1.9.2.msysgit.0



More information about the ffmpeg-devel mailing list