[FFmpeg-devel] [PATCH 2/6] dvdsubenc: make it usable for transcoding.

Clément Bœsch ubitux at gmail.com
Thu Aug 9 00:23:45 CEST 2012


On Tue, Aug 07, 2012 at 09:13:30PM +0200, Nicolas George wrote:
> DVD subtitles packets can only encode a single rectangle:
> if there are several, copy them into a big transparent one.
> 
> DVD subtitles rely on an external 16-colors palette:
> use a reasonable default one, stored in the private context,
> and encode it into the extradata, as specified by Matroska.
> TODO: allow to change the palette with an option.
> 
> Each packet can use four colors out of the global palette.
> The old logic was to map transparent colors to the color 0
> and all other colors to 3, 2, 1, cyclically in descending
> frequency order, completely disregarding the original color.
> 
> Select the "best" four colors from the global palette, according
> to heuristics based on frequency, opacity and brightness, and
> arrange them in standard DVD order: background, foreground,
> outline, other.
> TODO: select the alpha value more finely; see if CHG_COLCON can
> allow more than 4 colors per packet.
> 
> Reference:
> http://dvd.sourceforge.net/dvdinfo/spu.html
> 
> With these changes, dvdsubenc can be used to transcode DVB subtitles
> and get a very decent result.
> 
> Signed-off-by: Nicolas George <nicolas.george at normalesup.org>
> ---
>  libavcodec/dvdsubenc.c |  333 ++++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 263 insertions(+), 70 deletions(-)
> 
> 
> Changed to use bprint for the extradata.
> 
> 
> diff --git a/libavcodec/dvdsubenc.c b/libavcodec/dvdsubenc.c
> index 543fe61..9a15e06 100644
> --- a/libavcodec/dvdsubenc.c
> +++ b/libavcodec/dvdsubenc.c
> @@ -21,7 +21,12 @@
>  #include "avcodec.h"
>  #include "bytestream.h"
>  #include "libavutil/avassert.h"
> +#include "libavutil/bprint.h"
> +#include "libavutil/imgutils.h"
>  
> +typedef struct {
> +    uint32_t global_palette[16];
> +} DVDSubtitleContext;
>  
>  // ncnt is the nibble counter
>  #define PUTNIBBLE(val)\
> @@ -85,73 +90,226 @@ static void dvd_encode_rle(uint8_t **pq,
>      *pq = q;
>  }
>  
> -static int encode_dvd_subtitles(uint8_t *outbuf, int outbuf_size,
> +static int color_distance(uint32_t a, uint32_t b)
> +{
> +    int r = 0, d, i;
> +
> +    for (i = 0; i < 32; i += 8) {
> +        d = ((a >> i) & 0xFF) - ((b >> i) & 0xFF);

nit: & is known for its low priority, you can safely drop some parenthesis
here, and eventually remove some spaces to explicit the priority.

> +        r += d * d;
> +    }
> +    return r;
> +}
> +
> +static void count_colors(AVCodecContext *avctx, unsigned hits[33],
> +                         const AVSubtitleRect *r)
> +{
> +    DVDSubtitleContext *dvdc = avctx->priv_data;
> +    unsigned count[256] = { 0 };
> +    uint32_t *palette = (uint32_t *)r->pict.data[1];
> +    uint32_t color;
> +    int x, y, i, j, match, d, best_d, av_uninit(best_j);
> +    uint8_t *p = r->pict.data[0];
> +
> +    for (y = 0; y < r->h; y++) {
> +        for (x = 0; x < r->w; x++)
> +            count[*(p++)]++;
> +        p += r->pict.linesize[0] - r->w;
> +    }
> +    for (i = 0; i < 256; i++) {
> +        if (!count[i]) /* avoid useless search */
> +            continue;
> +        color = palette[i];

the palette always has 256 colors defined?

> +        /* 0: transparent, 1-16: semo-transparent, 17-33 opaque */
                                    ^^^^
semi

> +        match = color < 0x33000000 ? 0 : color < 0xCC000000 ? 1 : 17;
> +        if (match) {
> +            best_d = INT_MAX;
> +            for (j = 0; j < 16; j++) {
> +                d = color_distance(color & 0xFFFFFF, dvdc->global_palette[j]);
> +                if (d < best_d) {
> +                    best_d = d;
> +                    best_j = j;
> +                }
> +            }
> +            match += best_j;
> +        }
> +        hits[match] += count[i];

Maybe add another comment on top of the function (sorry I ask for comments
a lot) describing how hits is sorted; aka if the color gets close to the
original one and is "important", it will be at the beginning or something.
A bit like what you said in the description.

> +    }
> +}
> +
> +static void select_palette(AVCodecContext *avctx, int out_palette[4],
> +                           int out_alpha[4], unsigned hits[33])
> +{
> +    DVDSubtitleContext *dvdc = avctx->priv_data;
> +    int i, j, bright, mult;
> +    uint32_t color;
> +    int selected[4] = { 0 };
> +    uint32_t pseudopal[33] = { 0 };
> +    uint32_t refcolor[3] = { 0x00000000, 0xFFFFFFFF, 0xFF000000 };
> +
> +    /* Bonus for transparent: if the rectangle fits tightly the text, the
> +       background color can be quite rare, but it would be ugly without it */
> +    hits[0] *= 16;
> +    /* Bonus for bright colors */
> +    for (i = 0; i < 16; i++) {
> +        if (!(hits[1 + i] + hits[17 + i]))
> +            continue;

Maybe add "// ignore non-opaque colors" so those magics 1 & 17 make sense
here and in the following of the function.

> +        color = dvdc->global_palette[i];
> +        bright = 0;
> +        for (j = 0; j < 3; j++, color >>= 8)
> +            bright += (color & 0xFF) < 0x40 || (color & 0xFF) >= 0xC0;
> +        mult = 2 + FFMIN(bright, 2);
> +        hits[ 1 + i] *= mult;
> +        hits[17 + i] *= mult;
> +    }
> +
> +    /* Select four most frequent colors */
> +    for (i = 0; i < 4; i++) {
> +        for (j = 0; j < 33; j++)
> +            if (hits[j] > hits[selected[i]])
> +                selected[i] = j;
> +        hits[selected[i]] = 0;
> +    }
> +
> +    /* Order the colors like in most DVDs:
> +       0: background, 1: foreground, 2: outline */
> +    for (i = 0; i < 16; i++) {
> +        pseudopal[ 1 + i] = 0x80000000 | dvdc->global_palette[i];
> +        pseudopal[17 + i] = 0xFF000000 | dvdc->global_palette[i];
> +    }
> +    for (i = 0; i < 3; i++) {
> +        int best_d = color_distance(refcolor[i], pseudopal[selected[i]]);
> +        for (j = i + 1; j < 4; j++) {
> +            int d = color_distance(refcolor[i], pseudopal[selected[j]]);
> +            if (d < best_d) {
> +                FFSWAP(int, selected[i], selected[j]);
> +                best_d = d;
> +            }
> +        }
> +    }
> +
> +    /* Output */
> +    for (i = 0; i < 4; i++) {
> +        out_palette[i] = selected[i] ? (selected[i] - 1) & 0xF : 0;
> +        out_alpha  [i] = !selected[i] ? 0 : selected[i] < 17 ? 0x80 : 0xFF;
> +    }
> +}
> +
> +static void build_color_map(AVCodecContext *avctx, int cmap[],
> +                            uint32_t palette[],

const?

question: is there some particular reason for using [] instead of * here?

> +                            int out_palette[], int out_alpha[])
> +{
> +    DVDSubtitleContext *dvdc = avctx->priv_data;
> +    int i, j, d, best_d;
> +    uint32_t pseudopal[4];
> +
> +    for (i = 0; i < 4; i++)
> +        pseudopal[i] = (out_alpha[i] << 24) |
> +                       dvdc->global_palette[out_palette[i]];
> +    for (i = 0; i < 256; i++) {
> +        best_d = INT_MAX;
> +        for (j = 0; j < 4; j++) {
> +            d = color_distance(pseudopal[j], palette[i]);
> +            if (d < best_d) {
> +                cmap[i] = j;
> +                best_d = d;
> +            }
> +        }
> +    }
> +}
> +
> +static void copy_rectangle(AVSubtitleRect *dst, AVSubtitleRect *src, int cmap[])
> +{
> +    int x, y;
> +    uint8_t *p, *q;
> +
> +    p = src->pict.data[0];
> +    q = dst->pict.data[0] + (src->x - dst->x) +
> +                            (src->y - dst->y) * dst->pict.linesize[0];
> +    for (y = 0; y < src->h; y++) {
> +        for (x = 0; x < src->w; x++)
> +            *(q++) = cmap[*(p++)];

I'm still uncomfortable when I see the ( ) like this because they look
necessary while they aren't.

> +        p += src->pict.linesize[0] - src->w;
> +        q += dst->pict.linesize[0] - src->w;
> +    }
> +}
> +
> +static int encode_dvd_subtitles(AVCodecContext *avctx,
> +                                uint8_t *outbuf, int outbuf_size,
>                                  const AVSubtitle *h)
>  {
>      uint8_t *q, *qq;
> -    int object_id;
> -    int offset1[20], offset2[20];
> -    int i, imax, color, alpha, rects = h->num_rects;
> -    unsigned long hmax;
> -    unsigned long hist[256];
> -    int           cmap[256];
> +    int offset1, offset2;
> +    int i, rects = h->num_rects, ret;
> +    unsigned global_palette_hits[33] = { 0 };
> +    int cmap[256];
> +    int out_palette[4];
> +    int out_alpha[4];
> +    AVSubtitleRect vrect;
> +    uint8_t *vrect_data = NULL;
> +    int x2, y2;
>  
>      if (rects == 0 || h->rects == NULL)
> -        return -1;
> -    if (rects > 20)
> -        rects = 20;
> -
> -    // analyze bitmaps, compress to 4 colors
> -    for (i=0; i<256; ++i) {
> -        hist[i] = 0;
> -        cmap[i] = 0;
> -    }
> -    for (object_id = 0; object_id < rects; object_id++)
> -        for (i=0; i<h->rects[object_id]->w*h->rects[object_id]->h; ++i) {
> -            color = h->rects[object_id]->pict.data[0][i];
> -            // only count non-transparent pixels
> -            alpha = ((uint32_t*)h->rects[object_id]->pict.data[1])[color] >> 24;
> -            hist[color] += alpha;
> +        return AVERROR(EINVAL);


> +    vrect = *h->rects[0];
> +    if (rects > 1) {
> +        int xmin = h->rects[0]->x, xmax = xmin + h->rects[0]->w;
> +        int ymin = h->rects[0]->y, ymax = ymin + h->rects[0]->h;
> +        for (i = 1; i < rects; i++) {
> +            xmin = FFMIN(xmin, h->rects[i]->x);
> +            ymin = FFMIN(ymin, h->rects[i]->y);
> +            xmax = FFMAX(xmax, h->rects[i]->x + h->rects[i]->w);
> +            ymax = FFMAX(ymax, h->rects[i]->y + h->rects[i]->h);
>          }
> -    for (color=3;; --color) {
> -        hmax = 0;
> -        imax = 0;
> -        for (i=0; i<256; ++i)
> -            if (hist[i] > hmax) {
> -                imax = i;
> -                hmax = hist[i];
> -            }
> -        if (hmax == 0)
> -            break;
> -        if (color == 0)
> -            color = 3;
> -        av_log(NULL, AV_LOG_DEBUG, "dvd_subtitle hist[%d]=%ld -> col %d\n",
> -               imax, hist[imax], color);
> -        cmap[imax] = color;
> -        hist[imax] = 0;
> +        vrect.x = xmin;
> +        vrect.y = ymin;
> +        vrect.w = xmax - xmin;
> +        vrect.h = ymax - ymin;
> +        if ((ret = av_image_check_size(vrect.w, vrect.h, 0, avctx)) < 0)
> +            return ret;
> +

Please add a comment around this block such as "Define the virtual
rectangle containing all the other since DVD subs only support one rect"
or anything you think would be more correct or appropriate.

> +        /* Count pixels outside the virtual rectangle as transparent */
> +        global_palette_hits[0] = vrect.w * vrect.h;
> +        for (i = 0; i < rects; i++)
> +            global_palette_hits[0] -= h->rects[i]->w * h->rects[i]->h;
>      }
>  
> +    for (i = 0; i < rects; i++)
> +        count_colors(avctx, global_palette_hits, h->rects[i]);
> +    select_palette(avctx, out_palette, out_alpha, global_palette_hits);
> +
> +    if (rects > 1) {
> +        if (!(vrect_data = av_calloc(vrect.w, vrect.h)))
> +            return AVERROR(ENOMEM);
> +        vrect.pict.data    [0] = vrect_data;
> +        vrect.pict.linesize[0] = vrect.w;
> +        for (i = 0; i < rects; i++) {
> +            build_color_map(avctx, cmap, (uint32_t *)h->rects[i]->pict.data[1],
> +                            out_palette, out_alpha);
> +            copy_rectangle(&vrect, h->rects[i], cmap);
> +        }
> +        for (i = 0; i < 4; i++)
> +            cmap[i] = i;
> +    } else {
> +        build_color_map(avctx, cmap, (uint32_t *)h->rects[0]->pict.data[1],
> +                        out_palette, out_alpha);
> +    }

Might be nice to av_log debug the select palette.

[...]
> +static int dvdsub_init(AVCodecContext *avctx)
> +{
> +    DVDSubtitleContext *dvdc = avctx->priv_data;
> +    static const uint32_t default_palette[16] = {
> +        0x000000, 0x0000FF, 0x00FF00, 0xFF0000,
> +        0xFFFF00, 0xFF00FF, 0x00FFFF, 0xFFFFFF,
> +        0x808000, 0x8080FF, 0x800080, 0x80FF80,
> +        0x008080, 0xFF8080, 0x555555, 0xAAAAAA,
> +    };
> +    AVBPrint extradata;
> +    int i;
> +
> +    av_assert0(sizeof(dvdc->global_palette) == sizeof(default_palette));
> +    memcpy(dvdc->global_palette, default_palette, sizeof(dvdc->global_palette));
> +
> +    av_bprint_init(&extradata, 0, 1);
> +    av_bprintf(&extradata, "palette:");
> +    for (i = 0; i < 16; i++)
> +        av_bprintf(&extradata, " %06"PRIx32"%c",
> +                   dvdc->global_palette[i] & 0xFFFFFF, i < 15 ? ',' : '\n');

> +    avctx->extradata_size = strlen(extradata.str);
> +    avctx->extradata = av_malloc(avctx->extradata_size);
> +    if (!avctx->extradata)
> +        return AVERROR(ENOMEM);
> +    memcpy(avctx->extradata, extradata.str, avctx->extradata_size);

    av_bprint_finalize(&extradata, (char **)&avctx->extradata);
    if (!avctx->extradata)
        return AVERROR(ENOMEM);
    avctx->extradata_size = extradata.len + 1;

Sounds better to me (no explicit memcpy, and '\0' at the end of the
string).

[...]

No more comments from me,

Thanks

-- 
Clément B.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 490 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20120809/dca39bfc/attachment.asc>


More information about the ffmpeg-devel mailing list