[FFmpeg-devel] [PATCH] avfilter/vf_convolution: add 16-column operation for filter_column() to prepare for x86 SIMD.

xujunzz at sjtu.edu.cn xujunzz at sjtu.edu.cn
Wed Nov 27 16:55:46 EET 2019


From: Xu Jun <xujunzz at sjtu.edu.cn>

In order to add x86 SIMD for filter_column(), I write a C function which processes 16 columns at a time.

Signed-off-by: Xu Jun <xujunzz at sjtu.edu.cn>
---
 libavfilter/vf_convolution.c          | 56 +++++++++++++++++++++++++++
 libavfilter/x86/vf_convolution_init.c | 23 +++++++++++
 2 files changed, 79 insertions(+)

diff --git a/libavfilter/vf_convolution.c b/libavfilter/vf_convolution.c
index d022f1a04a..5291415d48 100644
--- a/libavfilter/vf_convolution.c
+++ b/libavfilter/vf_convolution.c
@@ -520,6 +520,61 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
             continue;
         }
 
+        if (mode == MATRIX_COLUMN && s->filter[plane] != filter_column){
+            for (y = slice_start; y < slice_end - 16; y+=16) {
+                const int xoff = (y - slice_start) * bpc;
+                const int yoff = radius * stride;
+                for (x = 0; x < radius; x++) {
+                    const int xoff = (y - slice_start) * bpc;
+                    const int yoff = x * stride;
+
+                    s->setup[plane](radius, c, src, stride, x, width, y, height, bpc);
+                    s->filter[plane](dst + yoff + xoff, 1, rdiv,
+                                    bias, matrix, c, 16, radius,
+                                    dstride, stride);
+                }
+                s->setup[plane](radius, c, src, stride, radius, width, y, height, bpc);
+                s->filter[plane](dst + yoff + xoff, sizew - 2 * radius,
+                                rdiv, bias, matrix, c, 16, radius,
+                                dstride, stride);
+                for (x = sizew - radius; x < sizew; x++) {
+                    const int xoff = (y - slice_start) * bpc;
+                    const int yoff = x * stride;
+
+                    s->setup[plane](radius, c, src, stride, x, width, y, height, bpc);
+                    s->filter[plane](dst + yoff + xoff, 1, rdiv,
+                                    bias, matrix, c, 16, radius,
+                                    dstride, stride);
+                }
+            }
+            if (y < slice_end){
+                const int xoff = (y - slice_start) * bpc;
+                const int yoff = radius * stride;
+                for (x = 0; x < radius; x++) {
+                    const int xoff = (y - slice_start) * bpc;
+                    const int yoff = x * stride;
+
+                    s->setup[plane](radius, c, src, stride, x, width, y, height, bpc);
+                    s->filter[plane](dst + yoff + xoff, 1, rdiv,
+                                    bias, matrix, c, slice_end - y, radius,
+                                    dstride, stride);
+                }
+                s->setup[plane](radius, c, src, stride, radius, width, y, height, bpc);
+                s->filter[plane](dst + yoff + xoff, sizew - 2 * radius,
+                                rdiv, bias, matrix, c, slice_end - y, radius,
+                                dstride, stride);
+                for (x = sizew - radius; x < sizew; x++) {
+                    const int xoff = (y - slice_start) * bpc;
+                    const int yoff = x * stride;
+
+                    s->setup[plane](radius, c, src, stride, x, width, y, height, bpc);
+                    s->filter[plane](dst + yoff + xoff, 1, rdiv,
+                                    bias, matrix, c, slice_end - y, radius,
+                                    dstride, stride);
+                }
+            }
+        }
+        else {
         for (y = slice_start; y < slice_end; y++) {
             const int xoff = mode == MATRIX_COLUMN ? (y - slice_start) * bpc : radius * bpc;
             const int yoff = mode == MATRIX_COLUMN ? radius * stride : 0;
@@ -550,6 +605,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
                 dst += dstride;
         }
     }
+    }
 
     return 0;
 }
diff --git a/libavfilter/x86/vf_convolution_init.c b/libavfilter/x86/vf_convolution_init.c
index d1e8c90ceb..6b1c2f0e9f 100644
--- a/libavfilter/x86/vf_convolution_init.c
+++ b/libavfilter/x86/vf_convolution_init.c
@@ -34,6 +34,27 @@ void ff_filter_row_sse4(uint8_t *dst, int width,
                         const uint8_t *c[], int peak, int radius,
                         int dstride, int stride);
 
+static void filter_column16(uint8_t *dst, int height,
+                          float rdiv, float bias, const int *const matrix,
+                          const uint8_t *c[], int length, int radius,
+                          int dstride, int stride)
+{
+    int y, off16;
+
+    for (y = 0; y < height; y++) {
+        for (off16 = 0; off16 < length; off16++){
+            int i, sum = 0;
+
+            for (i = 0; i < 2 * radius + 1; i++)
+                sum += c[i][0 + y * stride + off16] * matrix[i];
+
+            sum = (int)(sum * rdiv + bias + 0.5f);
+            dst[off16] = av_clip_uint8(sum);
+        }
+        dst += dstride;
+    }
+
+}
 
 av_cold void ff_convolution_init_x86(ConvolutionContext *s)
 {
@@ -51,6 +72,8 @@ av_cold void ff_convolution_init_x86(ConvolutionContext *s)
                 if (EXTERNAL_SSE4(cpu_flags))
                     s->filter[i] = ff_filter_row_sse4;
         }
+        if (s->mode[i] == MATRIX_COLUMN)
+            s->filter[i] = filter_column16;
     }
 #endif
 }
-- 
2.17.1



More information about the ffmpeg-devel mailing list