[FFmpeg-devel] [PATCH] libavfilter image transformations

Bobby Bingham uhmmmm
Wed Apr 21 04:07:46 CEST 2010


On Tue, 20 Apr 2010 17:03:06 -0400
"Daniel G. Taylor" <dan at programmer-art.org> wrote:

> Index: libavfilter/transform.c
> ===================================================================
> --- libavfilter/transform.c	(revision 0)
> +++ libavfilter/transform.c	(revision 0)
> @@ -0,0 +1,200 @@
> +/*
> + * Copyright (C) 2010 Daniel G. Taylor <dan at programmer-art.org>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file libavfilter/transform.c
> + * transform input video
> + */
> +
> +#include "libavutil/common.h"
> +
> +#include "transform.h"
> +
> +#define INTERPOLATE_METHOD(name) static uint8_t name(float x, float y, const uint8_t *src, int width, int height, int stride, uint8_t def)
> +
> +#define PIXEL(img, x, y, w, h, stride, def) ((x) < 0 || (y) < 0) ? (def) : (((x) >= (w) || (y) >= (h)) ? (def) : img[(x) + (y) * (stride)])
> +
> +/**
> + * Nearest neighbor interpolation
> + */
> +INTERPOLATE_METHOD(interpolate_nearest)
> +{
> +    if (x < - 1 || x > width || y < -1 || y > height)
> +        return def;    
> +    else
> +        return PIXEL(src, (int)round(x), (int)round(y), width, height, stride, def);
> +}

In typical usage, is it actually faster to do the x < 0 || x > width ||
y < 0 || y > height check twice like this?  Or is round() expensive
enough to make it worthwhile?

> +
> +/**
> + * Bilinear interpolation
> + */
> +INTERPOLATE_METHOD(interpolate_bilinear)
> +{
> +    int x_c, x_f, y_c, y_f;
> +    int v1, v2, v3, v4;
> +    float s;
> +    
> +    if (x < -1 || x > width || y < -1 || y > height) {
> +        return def;
> +    } else {
> +        x_c = ceil(x);
> +        x_f = floor(x);
> +        if (x_c == x_f)
> +            x_c++;

Why do you use floor/ceil here and floorf/ceilf below?

I guess this is probably faster, but benchmarks are welcome:

x_f = floorf(x);
x_c = x_f + 1;

Though if x_c == x_f, that means x is an integer and you don't need to
average as many pixel values.  Though that may be rare enough that it's
more expensive to test for it than to just do the extra averaging?

> +
> +        y_c = ceil(y);
> +        y_f = floor(y);
> +        if (y_c == y_f)
> +            y_c++;

Ditto.

> +
> +        v1 = PIXEL(src, x_c, y_c, width, height, stride, def);
> +        v2 = PIXEL(src, x_c, y_f, width, height, stride, def);
> +        v3 = PIXEL(src, x_f, y_c, width, height, stride, def);
> +        v4 = PIXEL(src, x_f, y_f, width, height, stride, def);
> +        
> +        s = (v1*(x - x_f)*(y - y_f) + v2*((x - x_f)*(y_c - y)) + 
> +             v3*(x_c - x)*(y - y_f) + v4*((x_c - x)*(y_c - y)));
> +
> +        return (uint8_t) s;
> +    }
> +}
> +
> +/**
> + * Biquadratic interpolation
> + */
> +INTERPOLATE_METHOD(interpolate_biquadratic)
> +{
> +    int     x_c, x_f, y_c, y_f;
> +    uint8_t v1,  v2,  v3,  v4;
> +    float   f1,  f2,  f3,  f4;
> +    
> +    if (x < - 1 || x > width || y < -1 || y > height)
> +        return def;
> +    else {
> +        x_c = (int)ceilf(x);
> +        x_f = (int)floorf(x);
> +        y_c = (int)ceilf(y);
> +        y_f = (int)floorf(y);

The math for this function works out whether or not you specially handle
the case of integral x or y, but I'd be interested to see which is
faster, this or the x_c = x_f + 1 idea above.

> +        v1 = PIXEL(src, x_c, y_c, width, height, stride, def);
> +        v2 = PIXEL(src, x_c, y_f, width, height, stride, def);
> +        v3 = PIXEL(src, x_f, y_c, width, height, stride, def);
> +        v4 = PIXEL(src, x_f, y_f, width, height, stride, def);
> +        f1 = 1 - sqrt(fabs(x_c - x) * fabs(y_c - y));
> +        f2 = 1 - sqrt(fabs(x_c - x) * fabs(y_f - y));
> +        f3 = 1 - sqrt(fabs(x_f - x) * fabs(y_c - y));
> +        f4 = 1 - sqrt(fabs(x_f - x) * fabs(y_f - y));

You know x_f <= x <= x_c, and similar for y.  You can avoid all the
fabs calls.

> +        return (v1*f1 + v2*f2 + v3*f3+ v4*f4)/(f1 + f2 + f3 + f4);
> +    }
> +}
> +
> +void avfilter_get_matrix(double x_shift, double y_shift, double angle, double zoom, double *matrix) {
> +    matrix[0] = zoom * cos(angle);
> +    matrix[1] = -sin(angle);
> +    matrix[2] = x_shift;
> +    matrix[3] = -matrix[1];
> +    matrix[4] = matrix[0];
> +    matrix[5] = y_shift;
> +    matrix[6] = 0;
> +    matrix[7] = 0;
> +    matrix[8] = 1;
> +}
> +
> +void avfilter_add_matrix(const double *m1, const double *m2, double *result)
> +{
> +    result[0] = m1[0] + m2[0];
> +    result[1] = m1[1] + m2[1];
> +    result[2] = m1[2] + m2[2];
> +    result[3] = m1[3] + m2[3];
> +    result[4] = m1[4] + m2[4];
> +    result[5] = m1[5] + m2[5];
> +    result[6] = m1[6] + m2[6];
> +    result[7] = m1[7] + m2[7];
> +    result[8] = m1[8] + m2[8];
> +}
> +
> +void avfilter_sub_matrix(const double *m1, const double *m2, double *result)
> +{
> +    result[0] = m1[0] - m2[0];
> +    result[1] = m1[1] - m2[1];
> +    result[2] = m1[2] - m2[2];
> +    result[3] = m1[3] - m2[3];
> +    result[4] = m1[4] - m2[4];
> +    result[5] = m1[5] - m2[5];
> +    result[6] = m1[6] - m2[6];
> +    result[7] = m1[7] - m2[7];
> +    result[8] = m1[8] - m2[8];
> +}
> +
> +void avfilter_mul_matrix(const double *m1, double scalar, double *result)
> +{
> +    result[0] = m1[0] * scalar;
> +    result[1] = m1[1] * scalar;
> +    result[2] = m1[2] * scalar;
> +    result[3] = m1[3] * scalar;
> +    result[4] = m1[4] * scalar;
> +    result[5] = m1[5] * scalar;
> +    result[6] = m1[6] * scalar;
> +    result[7] = m1[7] * scalar;
> +    result[8] = m1[8] * scalar;
> +}
> +
> +void avfilter_transform(const uint8_t *src, uint8_t *dst, int src_stride, int dst_stride, int width, int height, const double *matrix, enum InterpolateMethod interpolate, enum FillMethod fill)
> +{
> +    int x, y;
> +    double x_s, y_s;
> +    uint8_t def;
> +    uint8_t (*func)(float, float, const uint8_t *, int, int, int, uint8_t) = NULL;
> +
> +    switch(interpolate) {
> +        case INTERPOLATE_NEAREST:
> +            func = interpolate_nearest;
> +            break;
> +        case INTERPOLATE_BILINEAR:
> +            func = interpolate_bilinear;
> +            break;
> +        case INTERPOLATE_BIQUADRATIC:
> +            func = interpolate_biquadratic;
> +            break;
> +    }
> +    
> +    for (y = 0; y < height; y++) {
> +        for(x = 0; x < width; x++) {
> +            x_s = x * matrix[0] + y * matrix[1] + matrix[2];
> +            y_s = x * matrix[3] + y * matrix[4] + matrix[5];
> +
> +            switch(fill) {
> +                case FILL_BLANK:
> +                    def = 0;
> +                    break;
> +                case FILL_ORIGINAL:
> +                    def = src[y * src_stride + x];
> +                    break;
> +                case FILL_EXTRUDE:
> +                    def = src[av_clip(y_s, 0, height - 1) * src_stride + av_clip(x_s, 0, width - 1)];
> +                    break;
> +                case FILL_MIRROR:
> +                    def = src[(int)((y_s < 0) ? -y_s : (y_s >= height) ? (height + height - y_s) : y_s) * src_stride + (int)((x_s < 0) ? -x_s : (x_s >= width) ? (width + width - x_s) : x_s)];

For these last two cases, if instead of setting def, you modified x_s
and y_s, you'd get the benefit of interpolation in the
mirrored/extruded part of the image as well.

> +            }
> +            
> +            dst[y * dst_stride + x] = func(x_s, y_s, src, width, height, src_stride, def);
> +        }
> +    }
> +}
> +
> Index: libavfilter/transform.h
> ===================================================================
> --- libavfilter/transform.h	(revision 0)
> +++ libavfilter/transform.h	(revision 0)
> @@ -0,0 +1,114 @@
> +/*
> + * Copyright (C) 2010 Daniel G. Taylor <dan at programmer-art.org>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file libavfilter/transform.h
> + * transform input video
> + *
> + * All matrices are defined as a single 9-item block of contiguous memory. For
> + * example, the identity matrix would be:
> + * 
> + *     double *matrix = {1, 0, 0,
> + *                       0, 1, 0,
> + *                       0, 0, 1};
> + */
> +
> +enum InterpolateMethod {
> +    INTERPOLATE_NEAREST,        //< Nearest-neighbor (fast)
> +    INTERPOLATE_BILINEAR,       //< Bilinear
> +    INTERPOLATE_BIQUADRATIC,    //< Biquadratic (best)
> +};
> +
> +// Shortcuts for the fastest and best interpolation methods
> +#define INTERPOLATE_FAST INTERPOLATE_NEAREST
> +#define INTERPOLATE_BEST INTERPOLATE_BIQUADRATIC
> +
> +enum FillMethod {
> +    FILL_BLANK,         //< Fill zeroes at blank locations
> +    FILL_ORIGINAL,      //< Original image at blank locations
> +    FILL_EXTRUDE,       //< Extruded edge value at blank locations
> +    FILL_MIRROR,        //< Mirrored edge at blank locations
> +};
> +
> +/**
> + * Get an affine transformation matrix from a given translation, rotation, and
> + * zoom factor. The matrix will look like:
> + *
> + * [ zoom * cos(angle),           -sin(angle),     x_shift,
> + *          sin(angle),     zoom * cos(angle),     y_shift,
> +                     0,                     0,           1 ]
> + *
> + * Paramters:
> + *  x_shift: Horizontal translation
> + *  y_shift: Vertical translation
> + *    angle: Rotation in radians
> + *     zoom: Scale percent (1.0 = 100%)
> + *   matrix: 9-item affine transformation matrix
> + */
> +void avfilter_get_matrix(double x_shift, double y_shift, double angle, double zoom, double *matrix);
> +
> +/**
> + * Add two matrices together. result = m1 + m2.
> + *
> + * Parameters:
> + *      m1: 9-item transformation matrix
> + *      m2: 9-item transformation matrix
> + *  result: 9-item transformation matrix
> + */
> +void avfilter_add_matrix(const double *m1, const double *m2, double *result);
> +
> +/**
> + * Subtract one matrix from another. result = m1 - m2.
> + *
> + * Parameters:
> + *      m1: 9-item transformation matrix
> + *      m2: 9-item transformation matrix
> + *  result: 9-item transformation matrix
> + */
> +void avfilter_sub_matrix(const double *m1, const double *m2, double *result);
> +
> +/**
> + * Multiply a matrix by a scalar value. result = m1 * scalar.
> + *
> + * Parameters:
> + *      m1: 9-item transformation matrix
> + *  scalar: A number
> + *  result: 9-item transformation matrix
> + */
> +void avfilter_mul_matrix(const double *m1, double scalar, double *result);
> +
> +/**
> + * Do an affine transformation with the given interpolation method. This
> + * multiplies each vector [x,y,1] by the matrix and then interpolates to
> + * get the final value.
> + *
> + * Parameters:
> + *          src: Source image
> + *          dst: Destination image
> + *   src_stride: Source image line size in bytes
> + *   dst_stride: Destination image line size in bytes
> + *        width: Image width in pixels
> + *       height: Image height in pixels
> + *       matrix: 9-item affine transformation matrix
> + *  interpolate: Pixel interpolation method
> + *         fill: Edge fill method
> + */
> +void avfilter_transform(const uint8_t *src, uint8_t *dst, int src_stride, int dst_stride, int width, int height, const double *matrix, enum InterpolateMethod interpolate, enum FillMethod fill);
> +
> Index: libavfilter/Makefile
> ===================================================================
> --- libavfilter/Makefile	(revision 22924)
> +++ libavfilter/Makefile	(working copy)
> @@ -13,6 +13,7 @@
>         formats.o                                                        \
>         graphparser.o                                                    \
>         parseutils.o                                                     \
> +       transform.o                                                      \
>  
>  OBJS-$(CONFIG_ASPECT_FILTER)                 += vf_aspect.o
>  OBJS-$(CONFIG_CROP_FILTER)                   += vf_crop.o


-- 
Bobby Bingham
??????????????????????



More information about the ffmpeg-devel mailing list