[FFmpeg-devel] [PATCH v7 08/12] swscale/graph: add new high-level scaler dispatch mechanism

Michael Niedermayer michael at niedermayer.cc
Thu Nov 21 03:03:56 EET 2024


On Sat, Nov 16, 2024 at 12:25:03PM +0100, Niklas Haas wrote:
> From: Niklas Haas <git at haasn.dev>
> 
> This interface has been designed from the ground up to serve as a new
> framework for dispatching various scaling operations at a high level. This
> will eventually replace the old ad-hoc system of using cascaded contexts,
> as well as allowing us to plug in more dynamic scaling passes requiring
> intermediate steps, such as colorspace conversions, etc.
> 
> The starter implementation merely piggybacks off the existing sws_init() and
> sws_scale(), functions, though it does bring the immediate improvement of
> splitting up cascaded functions and pre/post conversion functions into
> separate filter passes, which allows them to e.g. be executed in parallel
> even when the main scaler is required to be single threaded. Additionally,
> a dedicated (multi-threaded) noop memcpy pass substantially improves
> throughput of that fast path.
> 
> Follow-up commits will eventually expand this to move all of the scaling
> decision logic into the graph init function, and also eliminate some of the
> current special cases.
> 
> Sponsored-by: Sovereign Tech Fund
> Signed-off-by: Niklas Haas <git at haasn.dev>
> ---
>  libswscale/Makefile |   1 +
>  libswscale/graph.c  | 602 ++++++++++++++++++++++++++++++++++++++++++++
>  libswscale/graph.h  | 122 +++++++++
>  3 files changed, 725 insertions(+)
>  create mode 100644 libswscale/graph.c
>  create mode 100644 libswscale/graph.h
> 
> diff --git a/libswscale/Makefile b/libswscale/Makefile
> index 757997b401..81f32f4dd7 100644
> --- a/libswscale/Makefile
> +++ b/libswscale/Makefile
> @@ -9,6 +9,7 @@ OBJS = alphablend.o                                     \
>         hscale.o                                         \
>         hscale_fast_bilinear.o                           \
>         gamma.o                                          \
> +       graph.o                                          \
>         half2float.o                                     \
>         input.o                                          \
>         options.o                                        \
> diff --git a/libswscale/graph.c b/libswscale/graph.c
> new file mode 100644
> index 0000000000..ec1015653e
> --- /dev/null
> +++ b/libswscale/graph.c
> @@ -0,0 +1,602 @@
> +/*
> + * Copyright (C) 2024 Niklas Haas
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavutil/avassert.h"
> +#include "libavutil/error.h"
> +#include "libavutil/macros.h"
> +#include "libavutil/mem.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/pixdesc.h"
> +#include "libavutil/slicethread.h"
> +
> +#include "libswscale/swscale.h"
> +#include "libswscale/utils.h"
> +
> +#include "swscale_internal.h"
> +#include "graph.h"
> +

> +/* slice_align should be a power of two, or 0 to disable slice threading */

/** for doxygen
unless its intentional


> +static SwsPass *pass_add(SwsGraph *graph, void *priv, int w, int h,
> +                         sws_filter_run_t run, SwsImg in, SwsImg out,
> +                         int slice_align)
> +{
> +    SwsPass *pass = av_mallocz(sizeof(*pass));
> +    int ret;

malloc failure check missing


[...]
> +static int vshift(enum AVPixelFormat fmt, int plane)
> +{
> +    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
> +    const int is_pal = usePal(fmt);

> +    const int is_chroma = (plane == 1 || plane == 2) && !is_pal;

gray + alpha ? (plane 1 would be alpha not chroma)


[...]
> +static void run_rgb0(const SwsImg *out, const SwsImg *in, int y, int h,
> +                     const SwsPass *pass)
> +{
> +    SwsInternal *c = pass->priv;
> +    const int x0 = c->src0Alpha - 1;
> +    const int w4 = 4 * pass->width;
> +    const int src_stride = in->linesize[0];
> +    const int dst_stride = out->linesize[0];
> +    const uint8_t *src = in->data[0] + y * src_stride;
> +    uint8_t *dst = out->data[0] + y * dst_stride;
> +
> +    for (int y = 0; y < h; y++) {

> +        memcpy(dst, src, w4 * sizeof(*dst));
> +        for (int x = x0; x < w4; x += 4)
> +            dst[x] = 0xFF;

A loop reading 32 or 64 bit, setting FF and writing may be faster but maybe
this is the wrong time to suggest that


[...]
> diff --git a/libswscale/graph.h b/libswscale/graph.h
> new file mode 100644
> index 0000000000..8b4544973e
> --- /dev/null
> +++ b/libswscale/graph.h
> @@ -0,0 +1,122 @@
> +/*
> + * Copyright (C) 2024 Niklas Haas
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef SWSCALE_GRAPH_H
> +#define SWSCALE_GRAPH_H
> +
> +#include "libavutil/slicethread.h"
> +#include "swscale.h"
> +#include "utils.h"
> +
> +/* Represents a view into a single field of frame data */
> +typedef struct SwsImg {
> +    enum AVPixelFormat fmt;
> +    uint8_t *data[4]; /* points to y=0 */
> +    int linesize[4];
> +} SwsImg;
> +
> +typedef struct SwsPass  SwsPass;
> +typedef struct SwsGraph SwsGraph;
> +
> +/**
> + * Output `h` lines of filtered data. `out` and `in` point to the
> + * start of the image buffer for this pass.
> + */
> +typedef void (*sws_filter_run_t)(const SwsImg *out, const SwsImg *in,
> +                                 int y, int h, const SwsPass *pass);
> +

> +struct SwsPass {

This should have some documentation



> +    const SwsGraph *graph;
> +    uint8_t *buf; /* temporary buffer for this pass, freed automatically */
> +
> +    sws_filter_run_t run;
> +    int width, height; /* new output size */
> +    int pixel_bytes;   /* bytes per pixel */
> +    int slice_h;       /* filter granularity */
> +    int num_slices;
> +
> +    /* Filter input/output. */
> +    SwsImg input;
> +    SwsImg output;
> +
> +    /**
> +     * Called once from the main thread before running the filter. Optional.
> +     * `out` and `in` always point to the main image input/output, regardless
> +     * of `input` and `output` fields.
> +     */
> +    void (*setup)(const SwsImg *out, const SwsImg *in, const SwsPass *pass);

this mixes doxygen compatible and incomplete comments


> +
> +    void (*uninit)(const SwsPass *pass); /* optional */
> +    void *priv;
> +};
> +
> +/* Filter graph, which represents a 'baked' pixel format conversion */
> +typedef struct SwsGraph {
> +    SwsContext *ctx;
> +    AVSliceThread *slicethread;
> +    int num_threads; /* resolved at init() time */
> +    int incomplete;  /* set during init() if formats had to be inferred */
> +    SwsContext *sws; /* wrapped legacy context */
> +
> +    /* Sorted sequence of filter passes to apply */
> +    SwsPass **passes;
> +    int num_passes;
> +
> +    /* Overall image parameters and flags */
> +    SwsContext opts;

3 SwsContext, ctx, sws, opts. This is a little confusing, please docuemnt
this more verbosely



thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Those who are too smart to engage in politics are punished by being
governed by those who are dumber. -- Plato 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 195 bytes
Desc: not available
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20241121/6cea1d38/attachment.sig>


More information about the ffmpeg-devel mailing list