[FFmpeg-devel] [PATCH] lavfi: edgedetect filter

Wed Aug 8 12:55:15 CEST 2012

On date Tuesday 2012-08-07 23:57:15 +0200, Clément Bœsch encoded:
[...]
> From 66705d9200c50f8f8fa7cc3874c7d494c5bd2c31 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Cl=C3=A9ment=20B=C5=93sch?= <ubitux at gmail.com>
> Date: Thu, 26 Jul 2012 19:45:53 +0200
> Subject: [PATCH] lavfi: edgedetect filter
> 
> FIXME: bump lavfi minor
> ---
>  doc/filters.texi            |   5 +
>  libavfilter/Makefile        |   1 +
>  libavfilter/allfilters.c    |   1 +
>  libavfilter/vf_edgedetect.c | 288 ++++++++++++++++++++++++++++++++++++++++++++
>  tests/lavfi-regression.sh   |   1 +
>  tests/ref/lavfi/edgedetect  |   1 +
>  6 files changed, 297 insertions(+)
>  create mode 100644 libavfilter/vf_edgedetect.c
>  create mode 100644 tests/ref/lavfi/edgedetect
> 
> diff --git a/doc/filters.texi b/doc/filters.texi
> index e73fc09..851d9bb 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -1929,6 +1929,11 @@ For more information about libfreetype, check:
>  For more information about fontconfig, check:
>  @url{http://freedesktop.org/software/fontconfig/fontconfig-user.html}.
>  
> + at section edgedetect
> +
> +Detect and draw edges. The filter uses the Canny Edge Detection algorithm, with
> +no parameter at the moment.
> +
>  @section fade
>  
>  Apply fade-in/out effect to input video.
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index b6bd37f..b8d89b0 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -89,6 +89,7 @@ OBJS-$(CONFIG_DELOGO_FILTER)                 += vf_delogo.o
>  OBJS-$(CONFIG_DESHAKE_FILTER)                += vf_deshake.o
>  OBJS-$(CONFIG_DRAWBOX_FILTER)                += vf_drawbox.o
>  OBJS-$(CONFIG_DRAWTEXT_FILTER)               += vf_drawtext.o
> +OBJS-$(CONFIG_EDGEDETECT_FILTER)             += vf_edgedetect.o
>  OBJS-$(CONFIG_FADE_FILTER)                   += vf_fade.o
>  OBJS-$(CONFIG_FIELDORDER_FILTER)             += vf_fieldorder.o
>  OBJS-$(CONFIG_FIFO_FILTER)                   += fifo.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index da1c8e6..91eec2a 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -79,6 +79,7 @@ void avfilter_register_all(void)
>      REGISTER_FILTER (DESHAKE,     deshake,     vf);
>      REGISTER_FILTER (DRAWBOX,     drawbox,     vf);
>      REGISTER_FILTER (DRAWTEXT,    drawtext,    vf);
> +    REGISTER_FILTER (EDGEDETECT,  edgedetect,  vf);
>      REGISTER_FILTER (FADE,        fade,        vf);
>      REGISTER_FILTER (FIELDORDER,  fieldorder,  vf);
>      REGISTER_FILTER (FIFO,        fifo,        vf);
> diff --git a/libavfilter/vf_edgedetect.c b/libavfilter/vf_edgedetect.c
> new file mode 100644
> index 0000000..3299535
> --- /dev/null
> +++ b/libavfilter/vf_edgedetect.c
> @@ -0,0 +1,288 @@
> +/*
> + * Copyright (c) 2012 Clément Bœsch
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * Edge detection filter
> + *
> + * @url https://en.wikipedia.org/wiki/Canny_edge_detector
> + */
> +
> +#include "avfilter.h"
> +#include "formats.h"
> +#include "internal.h"
> +#include "video.h"
> +
> +typedef struct {
> +    uint8_t  *tmpbuf;
> +    uint16_t *gradients;
> +    char     *directions;
> +} EdgeDetectContext;
> +
> +static int query_formats(AVFilterContext *ctx)
> +{
> +    static const enum PixelFormat pix_fmts[] = {PIX_FMT_GRAY8, PIX_FMT_NONE};
> +    ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
> +    return 0;
> +}
> +
> +static int config_props(AVFilterLink *inlink)
> +{
> +    AVFilterContext *ctx = inlink->dst;
> +    EdgeDetectContext *edgedetect = ctx->priv;
> +
> +    edgedetect->tmpbuf     = av_malloc(inlink->w * inlink->h);
> +    edgedetect->gradients  = av_calloc(inlink->w * inlink->h, sizeof(*edgedetect->gradients));
> +    edgedetect->directions = av_malloc(inlink->w * inlink->h);
> +    if (!edgedetect->tmpbuf || !edgedetect->gradients || !edgedetect->directions)
> +        return AVERROR(ENOMEM);
> +    return 0;
> +}
> +

> +static void gaussian_blur(AVFilterContext *ctx, int w, int h,
> +                                uint8_t *dst, int dst_linesize,
> +                          const uint8_t *src, int src_linesize)
> +{
> +    int i, j;
> +
> +    memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
> +    memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
> +    for (j = 2; j < h - 2; j++) {
> +        dst[0] = src[0];
> +        dst[1] = src[1];
> +        for (i = 2; i < w - 2; i++) {
> +            dst[i] = ((src[-2*src_linesize + i-2] + src[2*src_linesize + i-2]) * 2
> +                    + (src[-2*src_linesize + i-1] + src[2*src_linesize + i-1]) * 4
> +                    + (src[-2*src_linesize + i  ] + src[2*src_linesize + i  ]) * 5
> +                    + (src[-2*src_linesize + i+1] + src[2*src_linesize + i+1]) * 4
> +                    + (src[-2*src_linesize + i+2] + src[2*src_linesize + i+2]) * 2
> +
> +                    + (src[  -src_linesize + i-2] + src[  src_linesize + i-2]) *  4
> +                    + (src[  -src_linesize + i-1] + src[  src_linesize + i-1]) *  9
> +                    + (src[  -src_linesize + i  ] + src[  src_linesize + i  ]) * 12
> +                    + (src[  -src_linesize + i+1] + src[  src_linesize + i+1]) *  9
> +                    + (src[  -src_linesize + i+2] + src[  src_linesize + i+2]) *  4
> +
> +                    + src[i-2] *  5
> +                    + src[i-1] * 12
> +                    + src[i  ] * 15
> +                    + src[i+1] * 12
> +                    + src[i+2] *  5) / 159;
> +        }

My feeling is that we should avoid to hardcode convolution operations,
and write generic code for it. Also we may want to make the size of
the gaussian mask parametric, as well as the sigma parameter used to
compute the mask.

Convolution operations are useful per-se, and could be used to
implement ad-hoc filters.

> +        dst[i    ] = src[i    ];
> +        dst[i + 1] = src[i + 1];
> +
> +        dst += dst_linesize;
> +        src += src_linesize;
> +    }
> +    memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
> +    memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
> +}
> +
> +enum {
> +    DIRECTION_45UP,
> +    DIRECTION_45DOWN,
> +    DIRECTION_HORIZONTAL,
> +    DIRECTION_VERTICAL,
> +};
> +
> +static int get_rounded_direction(int gx, int gy)
> +{
> +    /* reference angles:
> +     *   tan( pi/8) = sqrt(2)-1
> +     *   tan(3pi/8) = sqrt(2)+1
> +     * Gy/Gx is the tangent of the angle (theta), so Gy/Gx is compared against
> +     * <ref-angle>, or more simply Gy against <ref-angle>*Gx
> +     *
> +     * Gx and Gy bounds = [1020;1020], using 16-bit arith:
> +     *   round((sqrt(2)-1) * (1<<16)) =  27146
> +     *   round((sqrt(2)+1) * (1<<16)) = 158218
> +     */
> +    if (gx) {
> +        int tanpi8gx, tan3pi8gx;
> +
> +        if (gx < 0)
> +            gx = -gx, gy = -gy;
> +        gy <<= 16;
> +        tanpi8gx  =  27145 * gx;
> +        tan3pi8gx = 158218 * gx;
> +        if (gy > -tan3pi8gx && gy < -tanpi8gx)  return DIRECTION_45UP;
> +        if (gy > -tanpi8gx  && gy <  tanpi8gx)  return DIRECTION_HORIZONTAL;
> +        if (gy >  tanpi8gx  && gy <  tan3pi8gx) return DIRECTION_45DOWN;
> +    }
> +    return DIRECTION_VERTICAL;
> +}
> +

> +static void sobel(AVFilterContext *ctx, int w, int h,
> +                        uint16_t *dst, int dst_linesize,
> +                  const uint8_t  *src, int src_linesize)
> +{
> +    int i, j;
> +    EdgeDetectContext *edgedetect = ctx->priv;
> +
> +    for (j = 1; j < h - 1; j++) {
> +        dst += dst_linesize;
> +        src += src_linesize;
> +        for (i = 1; i < w - 1; i++) {
> +            const int gx =
> +                -1*src[-src_linesize + i-1] + 1*src[-src_linesize + i+1]
> +                -2*src[                i-1] + 2*src[                i+1]
> +                -1*src[ src_linesize + i-1] + 1*src[ src_linesize + i+1];
> +            const int gy =
> +                -1*src[-src_linesize + i-1] + 1*src[ src_linesize + i-1]
> +                -2*src[-src_linesize + i  ] + 2*src[ src_linesize + i  ]
> +                -1*src[-src_linesize + i+1] + 1*src[ src_linesize + i+1];
> +
> +            dst[i] = FFABS(gx) + FFABS(gy);
> +            edgedetect->directions[j*w + i] = get_rounded_direction(gx, gy);
> +        }
> +    }
> +}

Same consideration as above.

> +
> +static void non_maximum_suppression(AVFilterContext *ctx, int w, int h,
> +                                          uint8_t  *dst, int dst_linesize,
> +                                    const uint16_t *src, int src_linesize)
> +{
> +    int i, j;
> +    EdgeDetectContext *edgedetect = ctx->priv;
> +
> +#define COPY_MAXIMA(ay, ax, by, bx) do {                \
> +    if (src[i] > src[(ay)*src_linesize + i+(ax)] &&     \
> +        src[i] > src[(by)*src_linesize + i+(bx)])       \
> +        dst[i] = av_clip_uint8(src[i]);                 \
> +} while (0)
> +
> +    for (j = 1; j < h - 1; j++) {
> +        dst += dst_linesize;
> +        src += src_linesize;
> +        for (i = 1; i < w - 1; i++) {
> +            switch (edgedetect->directions[j*w + i]) {
> +            case DIRECTION_45UP:        COPY_MAXIMA( 1, -1, -1,  1); break;
> +            case DIRECTION_45DOWN:      COPY_MAXIMA(-1, -1,  1,  1); break;
> +            case DIRECTION_HORIZONTAL:  COPY_MAXIMA( 0, -1,  0,  1); break;
> +            case DIRECTION_VERTICAL:    COPY_MAXIMA(-1,  0,  1,  0); break;
> +            }
> +        }
> +    }
> +}
> +
> +static void double_threshold(AVFilterContext *ctx, int w, int h,
> +                                   uint8_t *dst, int dst_linesize,
> +                             const uint8_t *src, int src_linesize)
> +{
> +    int i, j;
> +
> +#define THRES_HIGH 80
> +#define THRES_LOW  20

This values should be made parametric (and expressed as float values
in the [0, 1] range).

> +
> +    for (j = 0; j < h; j++) {
> +        for (i = 0; i < w; i++) {
> +            if (src[i] > THRES_HIGH) {
> +                dst[i] = src[i];
> +                continue;
> +            }
> +
> +            if ((!i || i == w - 1 || !j || j == h - 1) &&
> +                src[i] > THRES_LOW &&
> +                (src[-src_linesize + i-1] > THRES_HIGH ||
> +                 src[-src_linesize + i  ] > THRES_HIGH ||
> +                 src[-src_linesize + i+1] > THRES_HIGH ||
> +                 src[                i-1] > THRES_HIGH ||
> +                 src[                i+1] > THRES_HIGH ||
> +                 src[ src_linesize + i-1] > THRES_HIGH ||
> +                 src[ src_linesize + i  ] > THRES_HIGH ||
> +                 src[ src_linesize + i+1] > THRES_HIGH))
> +                dst[i] = src[i];
> +            else
> +                dst[i] = 0;
> +        }
> +        dst += dst_linesize;
> +        src += src_linesize;
> +    }
> +}
> +
> +static int end_frame(AVFilterLink *inlink)
> +{
> +    AVFilterContext *ctx = inlink->dst;
> +    EdgeDetectContext *edgedetect = ctx->priv;
> +    AVFilterLink *outlink = inlink->dst->outputs[0];
> +    AVFilterBufferRef  *inpicref = inlink->cur_buf;
> +    AVFilterBufferRef *outpicref = outlink->out_buf;
> +    uint8_t  *tmpbuf    = edgedetect->tmpbuf;
> +    uint16_t *gradients = edgedetect->gradients;
> +
> +    /* gaussian filter to reduce noise  */
> +    gaussian_blur(ctx, inlink->w, inlink->h,
> +                  tmpbuf,            inlink->w,
> +                  inpicref->data[0], inpicref->linesize[0]);
> +
> +    /* compute the 16-bits gradients and directions for the next step */
> +    sobel(ctx, inlink->w, inlink->h,
> +          gradients, inlink->w,
> +          tmpbuf,    inlink->w);
> +
> +    /* non_maximum_suppression() will actually keep & clip what's necessary and
> +     * ignore the rest, so we need a clean output buffer */
> +    memset(tmpbuf, 0, inlink->w * inlink->h);
> +    non_maximum_suppression(ctx, inlink->w, inlink->h,
> +                            tmpbuf,    inlink->w,
> +                            gradients, inlink->w);
> +
> +    /* keep high values, or low values surrounded by high values */
> +    double_threshold(ctx, inlink->w, inlink->h,
> +                     outpicref->data[0], outpicref->linesize[0],
> +                     tmpbuf,             inlink->w);
> +
> +    ff_draw_slice(outlink, 0, outlink->h, 1);
> +    return ff_end_frame(outlink);
> +}
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> +    EdgeDetectContext *edgedetect = ctx->priv;
> +    av_freep(&edgedetect->tmpbuf);
> +    av_freep(&edgedetect->gradients);
> +    av_freep(&edgedetect->directions);
> +}
> +
> +static int null_draw_slice(AVFilterLink *inlink, int y, int h, int slice_dir) { return 0; }
> +
> +AVFilter avfilter_vf_edgedetect = {
> +    .name          = "edgedetect",
> +    .description   = NULL_IF_CONFIG_SMALL("Detect and draw edge."),
> +    .priv_size     = sizeof(EdgeDetectContext),
> +    .uninit        = uninit,
> +    .query_formats = query_formats,
> +

> +    .inputs    = (const AVFilterPad[]) {{ .name             = "default",
> +                                          .type             = AVMEDIA_TYPE_VIDEO,
> +                                          .draw_slice       = null_draw_slice,
> +                                          .config_props     = config_props,
> +                                          .end_frame        = end_frame,
> +                                          .min_perms        = AV_PERM_READ
> +                                        },
> +                                        { .name = NULL }

Nit+: weird indent, I would prefer:
   .inputs    = (const AVFilterPad[]) {
      {
          .name             = "default",
          .type             = AVMEDIA_TYPE_VIDEO,
          .draw_slice       = null_draw_slice,
          .config_props     = config_props,
          .end_frame        = end_frame,
          .min_perms        = AV_PERM_READ
       },
       { .name = NULL }
   }

[...]

Another consideration: we could optionally support faster less
accurate algorithms (e.g. Marr-Hildreth).
-- 
FFmpeg = Fundamentalist and Frightening Mega Purposeless Extroverse Goblin