[FFmpeg-cvslog] avfilter/vf_nnedi: simplify code even more

Tue Jan 19 21:38:26 EET 2021

ffmpeg | branch: master | Paul B Mahol <onemda at gmail.com> | Tue Jan 19 17:06:05 2021 +0100| [c737f6edcef74a64f4d0ebcefa970bd31266d512] | committer: Paul B Mahol

avfilter/vf_nnedi: simplify code even more

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c737f6edcef74a64f4d0ebcefa970bd31266d512
---

 libavfilter/vf_nnedi.c | 151 +++++++++++++++++++++++++++----------------------
 1 file changed, 82 insertions(+), 69 deletions(-)

diff --git a/libavfilter/vf_nnedi.c b/libavfilter/vf_nnedi.c
index 5863ec478c..786059052b 100644
--- a/libavfilter/vf_nnedi.c
+++ b/libavfilter/vf_nnedi.c
@@ -37,8 +37,8 @@ static const uint8_t NNEDI_XDIM[] = { 8, 16, 32, 48, 8, 16, 32 };
 static const uint8_t NNEDI_YDIM[] = { 6, 6, 6, 6, 4, 4, 4 };
 static const uint16_t NNEDI_NNS[] = { 16, 32, 64, 128, 256 };
 
-typedef struct PrescreenerOldCoefficients {
-    DECLARE_ALIGNED(32, float, kernel_l0)[4][14 * 4];
+typedef struct PrescreenerCoefficients {
+    DECLARE_ALIGNED(32, float, kernel_l0)[4][16 * 4];
     DECLARE_ALIGNED(32, float, bias_l0)[4];
 
     DECLARE_ALIGNED(32, float, kernel_l1)[4][4];
@@ -46,15 +46,7 @@ typedef struct PrescreenerOldCoefficients {
 
     DECLARE_ALIGNED(32, float, kernel_l2)[4][8];
     DECLARE_ALIGNED(32, float, bias_l2)[4];
-} PrescreenerOldCoefficients;
-
-typedef struct PrescreenerNewCoefficients {
-    DECLARE_ALIGNED(32, float, kernel_l0)[4][16 * 4];
-    DECLARE_ALIGNED(32, float, bias_l0)[4];
-
-    DECLARE_ALIGNED(32, float, kernel_l1)[4][4];
-    DECLARE_ALIGNED(32, float, bias_l1)[4];
-} PrescreenerNewCoefficients;
+} PrescreenerCoefficients;
 
 typedef struct PredictorCoefficients {
     int xdim, ydim, nns, nsize;
@@ -89,8 +81,7 @@ typedef struct NNEDIContext {
     int planeheight[4];
     int field_n;
 
-    PrescreenerOldCoefficients prescreener_old;
-    PrescreenerNewCoefficients prescreener_new[3];
+    PrescreenerCoefficients prescreener[4];
     PredictorCoefficients coeffs[2][5][7];
 
     float half;
@@ -108,9 +99,9 @@ typedef struct NNEDIContext {
     int pscrn;
 
     int input_size;
-    uint8_t *prescreen_buf;
-    float *input_buf;
-    float *output_buf;
+    uint8_t **prescreen_buf;
+    float **input_buf;
+    float **output_buf;
 
     void (*read)(const uint8_t *src, float *dst,
                  int src_stride, int dst_stride,
@@ -120,7 +111,8 @@ typedef struct NNEDIContext {
                   int width, int height, int depth, float scale);
     void (*prescreen[2])(AVFilterContext *ctx,
                          const void *src, ptrdiff_t src_stride,
-                         uint8_t *prescreen, int N, void *data);
+                         uint8_t *prescreen, int N,
+                         const PrescreenerCoefficients *const coeffs);
 } NNEDIContext;
 
 #define OFFSET(x) offsetof(NNEDIContext, x)
@@ -222,7 +214,7 @@ static int query_formats(AVFilterContext *ctx)
     return ff_set_common_formats(ctx, fmts_list);
 }
 
-static float dot_dsp(NNEDIContext *s, const float *kernel, const float *input,
+static float dot_dsp(const NNEDIContext *const s, const float *kernel, const float *input,
                      int n, float scale, float bias)
 {
     float sum;
@@ -246,10 +238,9 @@ static void transform_elliott(float *input, int size)
 static void process_old(AVFilterContext *ctx,
                         const void *src, ptrdiff_t src_stride,
                         uint8_t *prescreen, int N,
-                        void *data)
+                        const PrescreenerCoefficients *const m_data)
 {
     NNEDIContext *s = ctx->priv;
-    const PrescreenerOldCoefficients *const m_data = data;
     const float *src_p = src;
 
     // Adjust source pointer to point to top-left of filter window.
@@ -283,10 +274,9 @@ static void process_old(AVFilterContext *ctx,
 static void process_new(AVFilterContext *ctx,
                         const void *src, ptrdiff_t src_stride,
                         uint8_t *prescreen, int N,
-                        void *data)
+                        const PrescreenerCoefficients *const m_data)
 {
     NNEDIContext *s = ctx->priv;
-    const PrescreenerNewCoefficients *const m_data = data;
     const float *src_p = src;
 
     // Adjust source pointer to point to top-left of filter window.
@@ -344,8 +334,8 @@ static void gather_input(const float *src, ptrdiff_t src_stride,
                          float *buf, float mstd[4],
                          const PredictorCoefficients *const model)
 {
-    float sum = 0;
-    float sum_sq = 0;
+    float sum = 0.f;
+    float sum_sq = 0.f;
     float tmp;
 
     for (int i = 0; i < model->ydim; i++) {
@@ -405,17 +395,16 @@ static void wae5(const float *softmax, const float *el,
 static void predictor(AVFilterContext *ctx,
                       const void *src, ptrdiff_t src_stride, void *dst,
                       const uint8_t *prescreen, int N,
-                      void *data, int use_q2)
+                      const PredictorCoefficients *const model, int use_q2)
 {
-    NNEDIContext *s = ctx->priv;
-    const PredictorCoefficients *const model = data;
+    const NNEDIContext *const s = ctx->priv;
     const float *src_p = src;
     float *dst_p = dst;
 
     // Adjust source pointer to point to top-left of filter window.
     const float *window = src_p - (model->ydim / 2) * src_stride - (model->xdim / 2 - 1);
-    int filter_size = model->nsize;
-    int nns = model->nns;
+    const int filter_size = model->nsize;
+    const int nns = model->nns;
 
     for (int i = 0; i < N; i++) {
         LOCAL_ALIGNED_32(float, input, [48 * 6]);
@@ -433,7 +422,7 @@ static void predictor(AVFilterContext *ctx,
             activation[nn] = dot_dsp(s, softmax_q1_filter(nn, model), input, filter_size, scale, model->softmax_bias_q1[nn]);
 
         for (int nn = 0; nn < nns; nn++)
-            activation[model->nns + nn] = dot_dsp(s, elliott_q1_filter(nn, model), input, filter_size, scale, model->elliott_bias_q1[nn]);
+            activation[nns + nn] = dot_dsp(s, elliott_q1_filter(nn, model), input, filter_size, scale, model->elliott_bias_q1[nn]);
 
         transform_softmax_exp(activation, nns);
         wae5(activation, activation + nns, nns, mstd);
@@ -449,7 +438,7 @@ static void predictor(AVFilterContext *ctx,
             wae5(activation, activation + nns, nns, mstd);
         }
 
-        dst_p[i] = mstd[3] / (use_q2 ? 2 : 1);
+        dst_p[i] = mstd[3] * (use_q2 ? 0.5f : 1.f);
     }
 }
 
@@ -551,7 +540,7 @@ static void interpolation(const void *src, ptrdiff_t src_stride,
 
 static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
-    NNEDIContext *s = ctx->priv;
+    const NNEDIContext *const s = ctx->priv;
     AVFrame *out = s->dst;
     AVFrame *in = s->src;
     const float in_scale = s->in_scale;
@@ -572,10 +561,10 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
         uint8_t *dst = out->data[p] + slice_start * out->linesize[p];
         const int src_linesize = in->linesize[p];
         const int dst_linesize = out->linesize[p];
-        uint8_t *prescreen_buf = s->prescreen_buf + s->planewidth[0] * jobnr;
-        float *srcbuf = s->input_buf + s->input_size * jobnr;
+        uint8_t *prescreen_buf = s->prescreen_buf[jobnr];
+        float *srcbuf = s->input_buf[jobnr];
         const int srcbuf_stride = width + 64;
-        float *dstbuf = s->output_buf + s->input_size * jobnr;
+        float *dstbuf = s->output_buf[jobnr];
         const int dstbuf_stride = width;
         const int slice_height = (slice_end - slice_start) / 2;
         const int last_slice = slice_end == height;
@@ -646,15 +635,10 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
                 width, 1, in_scale);
 
         for (int y = 0; y < slice_end - slice_start; y += 2) {
-            if (s->pscrn > 1) {
-                s->prescreen[1](ctx, srcbuf + (y / 2) * srcbuf_stride + 32,
-                                srcbuf_stride, prescreen_buf, width,
-                                &s->prescreener_new[s->pscrn - 2]);
-            } else if (s->pscrn == 1) {
-                s->prescreen[0](ctx, srcbuf + (y / 2) * srcbuf_stride + 32,
-                                srcbuf_stride, prescreen_buf, width,
-                                &s->prescreener_old);
-            }
+            if (s->prescreen > 0)
+                s->prescreen[s->pscrn > 1](ctx, srcbuf + (y / 2) * srcbuf_stride + 32,
+                             srcbuf_stride, prescreen_buf, width,
+                             &s->prescreener[s->pscrn - 1]);
 
             predictor(ctx,
                       srcbuf + (y / 2) * srcbuf_stride + 32,
@@ -831,7 +815,7 @@ static int allocate_model(PredictorCoefficients *coeffs, int xdim, int ydim, int
     int bias_size = nns;
     float *data;
 
-    data = av_malloc_array(filter_size + bias_size, 4 * sizeof(float));
+    data = av_calloc(filter_size + bias_size, 4 * sizeof(float));
     if (!data)
         return AVERROR(ENOMEM);
 
@@ -859,17 +843,17 @@ static int read_weights(AVFilterContext *ctx, const float *bdata)
     NNEDIContext *s = ctx->priv;
     int ret;
 
-    copy_weights(&s->prescreener_old.kernel_l0[0][0], 4 * 48, &bdata);
-    copy_weights(s->prescreener_old.bias_l0, 4, &bdata);
+    copy_weights(&s->prescreener[0].kernel_l0[0][0], 4 * 48, &bdata);
+    copy_weights(s->prescreener[0].bias_l0, 4, &bdata);
 
-    copy_weights(&s->prescreener_old.kernel_l1[0][0], 4 * 4, &bdata);
-    copy_weights(s->prescreener_old.bias_l1, 4, &bdata);
+    copy_weights(&s->prescreener[0].kernel_l1[0][0], 4 * 4, &bdata);
+    copy_weights(s->prescreener[0].bias_l1, 4, &bdata);
 
-    copy_weights(&s->prescreener_old.kernel_l2[0][0], 4 * 8, &bdata);
-    copy_weights(s->prescreener_old.bias_l2, 4, &bdata);
+    copy_weights(&s->prescreener[0].kernel_l2[0][0], 4 * 8, &bdata);
+    copy_weights(s->prescreener[0].bias_l2, 4, &bdata);
 
     for (int i = 0; i < 3; i++) {
-        PrescreenerNewCoefficients *data = &s->prescreener_new[i];
+        PrescreenerCoefficients *data = &s->prescreener[i + 1];
         float kernel_l0_shuffled[4 * 64];
         float kernel_l1_shuffled[4 * 4];
 
@@ -890,14 +874,14 @@ static int read_weights(AVFilterContext *ctx, const float *bdata)
     for (int m = 0; m < 2; m++) {
         // Grouping by neuron count.
         for (int i = 0; i < 5; i++) {
-            int nns = NNEDI_NNS[i];
+            const int nns = NNEDI_NNS[i];
 
             // Grouping by window size.
             for (int j = 0; j < 7; j++) {
                 PredictorCoefficients *model = &s->coeffs[m][i][j];
-                int xdim = NNEDI_XDIM[j];
-                int ydim = NNEDI_YDIM[j];
-                int filter_size = xdim * ydim;
+                const int xdim = NNEDI_XDIM[j];
+                const int ydim = NNEDI_YDIM[j];
+                const int filter_size = xdim * ydim;
 
                 ret = allocate_model(model, xdim, ydim, nns);
                 if (ret < 0)
@@ -927,7 +911,7 @@ static int read_weights(AVFilterContext *ctx, const float *bdata)
 
 static float mean(const float *input, int size)
 {
-    float sum = 0.;
+    float sum = 0.f;
 
     for (int i = 0; i < size; i++)
         sum += input[i];
@@ -941,7 +925,7 @@ static void transform(float *input, int size, float mean, float half)
         input[i] = (input[i] - mean) / half;
 }
 
-static void subtract_mean_old(PrescreenerOldCoefficients *coeffs, float half)
+static void subtract_mean_old(PrescreenerCoefficients *coeffs, float half)
 {
     for (int n = 0; n < 4; n++) {
         float m = mean(coeffs->kernel_l0[n], 48);
@@ -950,7 +934,7 @@ static void subtract_mean_old(PrescreenerOldCoefficients *coeffs, float half)
     }
 }
 
-static void subtract_mean_new(PrescreenerNewCoefficients *coeffs, float half)
+static void subtract_mean_new(PrescreenerCoefficients *coeffs, float half)
 {
     for (int n = 0; n < 4; n++) {
         float m = mean(coeffs->kernel_l0[n], 64);
@@ -1121,10 +1105,10 @@ static int config_input(AVFilterLink *inlink)
         break;
     }
 
-    subtract_mean_old(&s->prescreener_old, s->half);
-    subtract_mean_new(&s->prescreener_new[0], s->half);
-    subtract_mean_new(&s->prescreener_new[1], s->half);
-    subtract_mean_new(&s->prescreener_new[2], s->half);
+    subtract_mean_old(&s->prescreener[0], s->half);
+    subtract_mean_new(&s->prescreener[1], s->half);
+    subtract_mean_new(&s->prescreener[2], s->half);
+    subtract_mean_new(&s->prescreener[3], s->half);
 
     s->prescreen[0] = process_old;
     s->prescreen[1] = process_new;
@@ -1136,19 +1120,37 @@ static int config_input(AVFilterLink *inlink)
         }
     }
 
-    s->prescreen_buf = av_calloc(s->nb_threads * s->planewidth[0], sizeof(*s->prescreen_buf));
-    if (!s->prescreen_buf)
-        return AVERROR(ENOMEM);
-
     s->input_size = (s->planewidth[0] + 64) * (s->planeheight[0] + 6);
-    s->input_buf = av_calloc(s->nb_threads * s->input_size, sizeof(*s->input_buf));
+    s->input_buf = av_calloc(s->nb_threads, sizeof(*s->input_buf));
     if (!s->input_buf)
         return AVERROR(ENOMEM);
 
-    s->output_buf = av_calloc(s->nb_threads * s->input_size, sizeof(*s->output_buf));
+    for (int i = 0; i < s->nb_threads; i++) {
+        s->input_buf[i] = av_calloc(s->input_size, sizeof(**s->input_buf));
+        if (!s->input_buf[i])
+            return AVERROR(ENOMEM);
+    }
+
+    s->output_buf = av_calloc(s->nb_threads, sizeof(*s->output_buf));
     if (!s->output_buf)
         return AVERROR(ENOMEM);
 
+    for (int i = 0; i < s->nb_threads; i++) {
+        s->output_buf[i] = av_calloc(s->input_size, sizeof(**s->output_buf));
+        if (!s->output_buf[i])
+            return AVERROR(ENOMEM);
+    }
+
+    s->prescreen_buf = av_calloc(s->nb_threads, sizeof(*s->prescreen_buf));
+    if (!s->prescreen_buf)
+        return AVERROR(ENOMEM);
+
+    for (int i = 0; i < s->nb_threads; i++) {
+        s->prescreen_buf[i] = av_calloc(s->planewidth[0], sizeof(**s->prescreen_buf));
+        if (!s->prescreen_buf[i])
+            return AVERROR(ENOMEM);
+    }
+
     return 0;
 }
 
@@ -1156,8 +1158,19 @@ static av_cold void uninit(AVFilterContext *ctx)
 {
     NNEDIContext *s = ctx->priv;
 
+    for (int i = 0; i < s->nb_threads && s->prescreen_buf; i++)
+        av_freep(&s->prescreen_buf[i]);
+
     av_freep(&s->prescreen_buf);
+
+    for (int i = 0; i < s->nb_threads && s->input_buf; i++)
+        av_freep(&s->input_buf[i]);
+
     av_freep(&s->input_buf);
+
+    for (int i = 0; i < s->nb_threads && s->output_buf; i++)
+        av_freep(&s->output_buf[i]);
+
     av_freep(&s->output_buf);
     av_freep(&s->fdsp);