[FFmpeg-devel] [FFmpeg-Devel] [PATCH 2/5] postproc: Made QP, nonBQP, and pQPb arrays

Tucker DiNapoli t.dinapoli42 at gmail.com
Wed Apr 1 20:36:02 CEST 2015


Also pulled QP initialization out of inner loop.

Added some dummy fields to PPContext to allow current code to work while
changing QP stuff.
---
 libpostproc/postprocess_internal.h | 10 ++++-
 libpostproc/postprocess_template.c | 82 ++++++++++++++++++--------------------
 2 files changed, 47 insertions(+), 45 deletions(-)

diff --git a/libpostproc/postprocess_internal.h b/libpostproc/postprocess_internal.h
index 1ebd974..1f2758c 100644
--- a/libpostproc/postprocess_internal.h
+++ b/libpostproc/postprocess_internal.h
@@ -143,8 +143,11 @@ typedef struct PPContext{
     DECLARE_ALIGNED(8, uint64_t, pQPb);
     DECLARE_ALIGNED(8, uint64_t, pQPb2);
 
-    DECLARE_ALIGNED(8, uint64_t, mmxDcOffset)[64];
-    DECLARE_ALIGNED(8, uint64_t, mmxDcThreshold)[64];
+    DECLARE_ALIGNED(32, uint64_t, pQPb_block)[4];
+    DECLARE_ALIGNED(32, uint64_t, pQPb2_block)[4];
+
+    DECLARE_ALIGNED(32, uint64_t, mmxDcOffset)[64];
+    DECLARE_ALIGNED(32, uint64_t, mmxDcThreshold)[64];
 
     QP_STORE_T *stdQPTable;       ///< used to fix MPEG2 style qscale
     QP_STORE_T *nonBQPTable;
@@ -153,6 +156,9 @@ typedef struct PPContext{
     int QP;
     int nonBQP;
 
+    QP_STORE_T QP_block[4];
+    QP_STORE_T nonBQP_block[4];
+
     int frameNum;
 
     int cpuCaps;
diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
index 6377ea7..8220d36 100644
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@@ -3416,7 +3416,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
 #endif
         const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
         int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*FFABS(QPStride)];
-        int QP=0;
+        int QP=0, nonBQP=0;
         /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards
            if not than use a temporary buffer */
         if(y+15 >= height){
@@ -3449,6 +3449,29 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
             int endx = FFMIN(width, x+32);
             uint8_t *dstBlockStart = dstBlock;
             const uint8_t *srcBlockStart = srcBlock;
+            int qp_index = 0;
+            for(qp_index=0; qp_index < (endx-startx)/BLOCK_SIZE; qp_index++){
+                QP = QPptr[(x+qp_index*BLOCK_SIZE)>>qpHShift];
+                nonBQP = nonBQPptr[(x+qp_index*BLOCK_SIZE)>>qpHShift];
+            if(!isColor){
+                QP= (QP* QPCorrecture + 256*128)>>16;
+                nonBQP= (nonBQP* QPCorrecture + 256*128)>>16;
+                yHistogram[srcBlock[srcStride*12 + 4]]++;
+            }
+            c.QP_block[qp_index] = QP;
+            c.nonBQP_block[qp_index] = nonBQP;
+#if TEMPLATE_PP_MMX
+            __asm__ volatile(
+                "movd %1, %%mm7         \n\t"
+                "packuswb %%mm7, %%mm7  \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
+                "packuswb %%mm7, %%mm7  \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
+                "packuswb %%mm7, %%mm7  \n\t" // QP,..., QP
+                "movq %%mm7, %0         \n\t"
+                : "=m" (c.pQPb_block[qp_index])
+                : "r" (QP)
+            );
+#endif
+            }
           for(; x < endx; x+=BLOCK_SIZE){
             prefetchnta(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
             prefetchnta(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
@@ -3480,27 +3503,16 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
           dstBlock = dstBlockStart;
           srcBlock = srcBlockStart;
 
-          for(x = startx; x < endx; x+=BLOCK_SIZE){
+          for(x = startx, qp_index = 0; x < endx; x+=BLOCK_SIZE, qp_index++){
             const int stride= dstStride;
-            QP = QPptr[x>>qpHShift];
-            c.nonBQP = nonBQPptr[x>>qpHShift];
-            if(!isColor){
-                QP= (QP* QPCorrecture + 256*128)>>16;
-                c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
-                yHistogram[srcBlock[srcStride*12 + 4]]++;
-            }
-            c.QP= QP;
-#if TEMPLATE_PP_MMX
-            __asm__ volatile(
-                "movd %1, %%mm7         \n\t"
-                "packuswb %%mm7, %%mm7  \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
-                "packuswb %%mm7, %%mm7  \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
-                "packuswb %%mm7, %%mm7  \n\t" // QP,..., QP
-                "movq %%mm7, %0         \n\t"
-                : "=m" (c.pQPb)
-                : "r" (QP)
-            );
-#endif
+            //temporary while changing QP stuff to make things continue to work
+            //eventually QP,nonBQP,etc will be arrays and this will be unnecessary
+            c.QP = c.QP_block[qp_index];
+            c.nonBQP = c.nonBQP_block[qp_index];
+            c.pQPb = c.pQPb_block[qp_index];
+            c.pQPb2 = c.pQPb2_block[qp_index];
+
+
             /* only deblock if we have 2 blocks */
             if(y + 8 < height){
                 if(mode & V_X1_FILTER)
@@ -3524,30 +3536,14 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
           dstBlock = dstBlockStart;
           srcBlock = srcBlockStart;
 
-          for(x = startx; x < endx; x+=BLOCK_SIZE){
+          for(x = startx, qp_index=0; x < endx; x+=BLOCK_SIZE, qp_index++){
             const int stride= dstStride;
             av_unused uint8_t *tmpXchg;
-
-            if(isColor){
-                QP= QPptr[x>>qpHShift];
-                c.nonBQP= nonBQPptr[x>>qpHShift];
-            }else{
-                QP= QPptr[x>>4];
-                QP= (QP* QPCorrecture + 256*128)>>16;
-                c.nonBQP= nonBQPptr[x>>4];
-                c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
-            }
-            c.QP= QP;
+            c.QP = c.QP_block[qp_index];
+            c.nonBQP = c.nonBQP_block[qp_index];
+            c.pQPb = c.pQPb_block[qp_index];
+            c.pQPb2 = c.pQPb2_block[qp_index];
 #if TEMPLATE_PP_MMX
-            __asm__ volatile(
-                "movd %1, %%mm7         \n\t"
-                "packuswb %%mm7, %%mm7  \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
-                "packuswb %%mm7, %%mm7  \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
-                "packuswb %%mm7, %%mm7  \n\t" // QP,..., QP
-                "movq %%mm7, %0         \n\t"
-                : "=m" (c.pQPb)
-                : "r" (QP)
-            );
             RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
 #endif
             /* check if we have a previous block to deblock it with dstBlock */
@@ -3569,7 +3565,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
 
 #else
                 if(mode & H_X1_FILTER)
-                    horizX1Filter(dstBlock-4, stride, QP);
+                    horizX1Filter(dstBlock-4, stride, c.QP);
                 else if(mode & H_DEBLOCK){
 #if TEMPLATE_PP_ALTIVEC
                     DECLARE_ALIGNED(16, unsigned char, tempBlock)[272];
-- 
2.3.3



More information about the ffmpeg-devel mailing list