[FFmpeg-devel] [PATCH 2/2] Adding closed caption decoder

Nicolas George george at nsup.org
Fri Dec 5 15:26:03 CET 2014


Hi. I had time to look at the code with some more details. Comments are
below.

> >From 31f69ccfb45247a7cc203084a931b8523284aa13 Mon Sep 17 00:00:00 2001
> From: Anshul Maheshwari <anshul.ffmpeg at gmail.com>
> Date: Wed, 3 Dec 2014 23:37:22 +0530
> Subject: [PATCH 2/2] Adding Closed caption Decoder
> 
> ---
>  libavcodec/Makefile       |   1 +
>  libavcodec/allcodecs.c    |   1 +
>  libavcodec/ccaption_dec.c | 318 ++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 320 insertions(+)
>  create mode 100644 libavcodec/ccaption_dec.c
> 
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index fa0f53d..bbc516d 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -173,6 +173,7 @@ OBJS-$(CONFIG_BRENDER_PIX_DECODER)     += brenderpix.o
>  OBJS-$(CONFIG_C93_DECODER)             += c93.o
>  OBJS-$(CONFIG_CAVS_DECODER)            += cavs.o cavsdec.o cavsdsp.o \
>                                            cavsdata.o mpeg12data.o
> +OBJS-$(CONFIG_CCAPTION_DECODER)        += ccaption_dec.o
>  OBJS-$(CONFIG_CDGRAPHICS_DECODER)      += cdgraphics.o
>  OBJS-$(CONFIG_CDXL_DECODER)            += cdxl.o
>  OBJS-$(CONFIG_CINEPAK_DECODER)         += cinepak.o
> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> index 0d39d33..8c07388 100644
> --- a/libavcodec/allcodecs.c
> +++ b/libavcodec/allcodecs.c
> @@ -480,6 +480,7 @@ void avcodec_register_all(void)
>      /* subtitles */
>      REGISTER_ENCDEC (SSA,               ssa);
>      REGISTER_ENCDEC (ASS,               ass);
> +    REGISTER_DECODER(CCAPTION,          ccaption);
>      REGISTER_ENCDEC (DVBSUB,            dvbsub);
>      REGISTER_ENCDEC (DVDSUB,            dvdsub);
>      REGISTER_DECODER(JACOSUB,           jacosub);
> diff --git a/libavcodec/ccaption_dec.c b/libavcodec/ccaption_dec.c
> new file mode 100644
> index 0000000..0a7dfd8
> --- /dev/null
> +++ b/libavcodec/ccaption_dec.c
> @@ -0,0 +1,318 @@
> +/*
> + * Closed Caption Decoding
> + * Copyright (c) 2014 Anshul Maheshwari
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "avcodec.h"
> +#include "ass.h"
> +
> +#define SCREEN_ROWS 15
> +#define SCREEN_COLUMNS 32
> +
> +#define SET_FLAG(var, val) ( var |= ( 1 << (val) ) )
> +#define UNSET_FLAG(var, val) ( var &=  ~( 1 << (val)) )
> +#define CHECK_FLAG(var, val) ( (var) & (1 << (val) ) )
> +
> +enum cc_mode {
> +    CCMODE_POPON,
> +    CCMODE_PAINTON,
> +    CCMODE_ROLLUP_2,
> +    CCMODE_ROLLUP_3,
> +    CCMODE_ROLLUP_4,
> +    CCMODE_TEXT,
> +};
> +
> +struct Screen {

> +    uint8_t characters[SCREEN_ROWS][SCREEN_COLUMNS+1];

Maybe add a comment about the +1?

> +    /*

> +     * row used flag will be 0 when none in use other wise it will have its
> +     * corrosponding bit high.

Language nit. I suggest: "Bitmask of used rows; if a bit is not set, the
corresponding row is not used."

> +     * for setting row 1  use row | (1 >> 1)
> +     * for setting row 15 use row | (1 >> 15)

Are you sure that is ">>" and not "<<"? And is it a good idea to number
starting from 1?

> +     */
> +    int16_t  row_used;
> +};
> +
> +
> +typedef struct CCaptionSubContext {
> +    int parity_table[256];
> +    int row_cnt;
> +    struct Screen screen[2];
> +    int active_screen;
> +    int cursor_row;
> +    int cursor_column;
> +    AVBPrint buffer;

> +    /* erase display memory */
> +    int edm;

It is used only a handful of times: I suggest a more meaningful name instead
of a comment: "erase_disp_mem" for example.

> +    int rollup;
> +    enum  cc_mode mode;
> +    int64_t start_time;
> +    /* visible screen time */

> +    int64_t startv_time;

Is the v a typo?

> +    int64_t end_time;

> +    char prev_cmd[2];

The code uses various types for these values: char, unsigned char, uint8_t.
I suggest to stick with uint8_t if it works.

> +}CCaptionSubContext;
> +
> +static void build_parity_table(int *parity_table)
> +{

> +        unsigned int byte;

Inconsistent indentation.

> +        int parity_v;
> +        for (byte = 0; byte <= 127; byte++) {

> +                parity_v = av_popcount(byte & 0x7f) & 1;

The & 0x7f is redundant.

> +                parity_table[byte] = parity_v;
> +                parity_table[byte | 0x80] = !parity_v;
> +        }
> +}
> +
> +static av_cold int init_decoder(AVCodecContext *avctx)
> +{
> +
> +    CCaptionSubContext *ctx = avctx->priv_data;
> +
> +    build_parity_table(ctx->parity_table);

> +    ctx->row_cnt = 0;
> +    av_bprint_init(&ctx->buffer, 0, AV_BPRINT_SIZE_UNLIMITED);
> +    ctx->edm = 0;
> +    /* taking by default roll up to 2 */
> +    ctx->rollup = 2;
> +    /* set active screen as 0 */
> +    ctx->active_screen = 0;
> +    memset(ctx->prev_cmd,0,2);
> +    ff_ass_subtitle_header_default(avctx);

All the "= 0" and the memset are unnecessary.

> +    return 0;
> +}
> +
> +static av_cold int close_decoder(AVCodecContext *avctx)
> +{
> +    CCaptionSubContext *ctx = avctx->priv_data;
> +    av_bprint_finalize( &ctx->buffer, NULL);
> +    return 0;
> +}
> +
> +/**
> + * This function after validating parity bit, also remove it from data pair.
> + */
> +static int validate_cc_data_pair (unsigned char *cc_data_pair, int *parity_table)
> +{
> +    unsigned char cc_valid = (*cc_data_pair & 4) >>2;
> +    unsigned char cc_type = *cc_data_pair & 3;
> +
> +    if (!cc_valid)
> +        return -1;
> +
> +    // if EIA-608 data then verify parity.
> +    if (cc_type==0 || cc_type==1) {
> +        if (!parity_table[cc_data_pair[2]]) {
> +        // If the second byte doesn't pass parity, ignore pair

> +            return -1;

Meaningful error codes? AVERROR(EINVAL) or AVERROR_INVALID_DATA? Even when
they are not supposed to, the -1 have a habit of reaching the user. Same
below.

> +        }
> +        if (!parity_table[cc_data_pair[1]]) {
> +        // The first byte doesn't pass parity, we replace it with a solid blank
> +        // and process the pair.
> +            cc_data_pair[1]=0x7F;
> +        }
> +    }
> +
> +    //Skip non-data
> +    if( (cc_data_pair[0] == 0xFA || cc_data_pair[0] == 0xFC || cc_data_pair[0] == 0xFD )
> +         && (cc_data_pair[1] & 0x7F) == 0 && (cc_data_pair[2] & 0x7F) == 0)
> +        return -1;
> +
> +    //skip 708 data
> +    if(cc_type == 3 || cc_type == 2 )
> +        return -1;
> +
> +    /* remove parity bit */
> +    cc_data_pair[1] &= 0x7F;
> +    cc_data_pair[2] &= 0x7F;
> +
> +
> +    return 0;
> +
> +}
> +static void handle_pac( CCaptionSubContext *ctx, uint8_t hi, uint8_t lo )
> +{

> +    static const int row_map[] = {

Could be int8_t to save memory.

> +        11, -1, 1, 2, 3, 4, 12, 13, 14, 15, 5, 6, 7, 8, 9, 10
> +    };
> +    const int index = ( (hi<<1) & 0x0e) | ( (lo>>5) & 0x01 );
> +

It looks like the values come from external data with subtle arithmetic, so
I suggest:

    av_assert2((unsigned)index < sizeof(row_map));

> +    if( row_map[index] <= 0 )
> +        return;
> +
> +    ctx->cursor_row = row_map[index] - 1;
> +    ctx->cursor_column = 0;
> +
> +}
> +
> +/**
> + * @param pts it is required to set end time
> + */
> +static void handle_edm(CCaptionSubContext *ctx,int64_t pts)
> +{
> +    int i;
> +    struct Screen *screen = ctx->screen + ctx->active_screen;
> +
> +    ctx->start_time = ctx->startv_time;
> +    for( i = 0; screen->row_used && i < SCREEN_ROWS; i++)
> +    {
> +        if(CHECK_FLAG(screen->row_used,i)) {
> +            av_bprint_append_data(&ctx->buffer, screen->characters[i], strlen(screen->characters[i]));
> +            av_bprint_append_data(&ctx->buffer, "\\N",2);
> +            UNSET_FLAG(screen->row_used, i);
> +        }
> +    }
> +    ctx->startv_time = pts;
> +    ctx->edm = 1;
> +    ctx->end_time = pts;
> +}
> +static void handle_eoc(CCaptionSubContext *ctx, int64_t pts)
> +{
> +    ctx->active_screen = !ctx->active_screen;
> +    ctx->startv_time = pts;
> +}
> +static struct Screen *get_writing_screen(CCaptionSubContext *ctx)
> +{
> +    switch (ctx->mode) {
> +    case CCMODE_POPON:
> +        // use Inactive screen
> +        return ctx->screen + !ctx->active_screen;
> +    case CCMODE_PAINTON:
> +    case CCMODE_ROLLUP_2:
> +    case CCMODE_ROLLUP_3:
> +    case CCMODE_ROLLUP_4:
> +    case CCMODE_TEXT:
> +        // use active screen
> +        return ctx->screen + !ctx->active_screen;
> +    }
> +    /* It was never an option */
> +    return NULL;
> +}
> +static void handle_char(CCaptionSubContext *ctx, char hi, char lo, int64_t pts)
> +{
> +    struct Screen *screen = get_writing_screen(ctx);
> +    char *row = screen->characters[ctx->cursor_row] + ctx->cursor_column;

Here too: av_assert0((unsigned)ctx->cursor_row < SCREEN_ROWS) and same for
column.

> +
> +    SET_FLAG(screen->row_used,ctx->cursor_row);
> +
> +    *row++ = hi;
> +    ctx->cursor_column++;
> +    if(lo) {
> +        *row++ = lo;
> +        ctx->cursor_column++;
> +    }
> +    *row = 0;
> +    /* reset prev command since character can repeat */
> +    ctx->prev_cmd[0] = 0;
> +    ctx->prev_cmd[1] = 0;
> +}
> +static int process_cc608(CCaptionSubContext *ctx, int64_t pts, unsigned char hi, unsigned char lo)
> +{
> +

> +#define COR3(var, with1, with2, with3)  ( (var) == (with1) ||  (var) == (with2) || (var) == (with3) )

It looks like you always use COR3(hi, 0x14, 0x15, 0x1C) exactly: is it on
purpose, a typo, or maybe can you simplify?

> +    if ( hi == ctx->prev_cmd[0] && lo == ctx->prev_cmd[1]) {
> +    /* ignore redundant command */
> +    } else if ( (hi == 0x10 && (lo >= 0x40 || lo <= 0x5f)) ||
> +              ( (hi >= 0x11 && hi <= 0x17) && (lo >= 0x40 && lo <= 0x7f) ) ) {
> +        handle_pac(ctx, hi, lo);
> +    } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x20 ) {
> +    /* resume caption loading */
> +        ctx->mode = CCMODE_POPON;
> +    } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x25 ) {
> +        ctx->rollup = 2;
> +    } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x26 ) {
> +        ctx->rollup = 3;
> +    } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x27 ) {
> +        ctx->rollup = 4;
> +    } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x29 ) {
> +    /* resume direct captioning */
> +        ctx->mode = CCMODE_PAINTON;
> +    } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x2C ) {
> +    /* erase display memory */
> +        handle_edm(ctx, pts);
> +    } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x2D ) {
> +    /* carriage return */
> +        ctx->row_cnt++;
> +        if(ctx->row_cnt == ctx->rollup) {
> +            ctx->row_cnt = 0;
> +            handle_edm(ctx, pts);
> +        }
> +    } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x2F ) {
> +    /* end of caption */
> +        handle_eoc(ctx, pts);
> +    } else if (hi>=0x20) {
> +    /* Standard characters (always in pairs) */
> +        handle_char(ctx, hi, lo, pts);
> +    } else {
> +    /* Ignoring all other non data code */
> +    }
> +
> +    /* set prev command */
> +     ctx->prev_cmd[0] = hi;
> +     ctx->prev_cmd[1] = lo;
> +
> +#undef COR3
> +    return 0;
> +
> +}
> +static int decode(AVCodecContext *avctx, void *data, int *got_sub, AVPacket *avpkt)
> +{
> +    CCaptionSubContext *ctx = avctx->priv_data;
> +    AVSubtitle *sub = data;
> +    unsigned char *bptr = avpkt->data;
> +    int len = avpkt->size;
> +    int ret = 0;
> +    int i;
> +
> +    for (i  = 0; i < len; i += 3) {
> +        unsigned char cc_type = *(bptr + i) & 3;
> +        if (validate_cc_data_pair( bptr + i, ctx->parity_table ) )
> +            continue;
> +        /* ignoring data field 1 */
> +        if(cc_type == 1)
> +            continue;
> +        else
> +            process_cc608(ctx, avpkt->pts, *(bptr + i + 1), *(bptr + i + 2));
> +    }
> +    if(ctx->edm && *ctx->buffer.str)
> +    {
> +        int start_time = av_rescale_q(ctx->start_time, avctx->time_base, (AVRational){ 1, 100 });
> +        int end_time = av_rescale_q(ctx->end_time, avctx->time_base, (AVRational){ 1, 100 });

> +        ret = ff_ass_add_rect(sub, ctx->buffer.str, start_time, end_time - start_time , 0);

You need to use av_bprint_is_complete() and return AVERROR(ENOMEM) if the
text was truncated.

> +        if (ret < 0)
> +            return ret;
> +        sub->pts = av_rescale_q(ctx->start_time, avctx->time_base, AV_TIME_BASE_Q);
> +        ctx->edm = 0;
> +        av_bprint_clear(&ctx->buffer);
> +    }
> +
> +    *got_sub = sub->num_rects > 0;
> +    return 0;
> +}
> +
> +AVCodec ff_ccaption_decoder = {
> +    .name           = "cc_dec",

> +    .long_name      = NULL_IF_CONFIG_SMALL("Closed Caption Decoder"),

I suggest to have "EIA-608" and "CEA-708" appear in the name, for people
specifically looking for it: "Closed Caption (EIA-608 / CEA-708) Decoder"
maybe.

> +    .type           = AVMEDIA_TYPE_SUBTITLE,
> +    .id             = AV_CODEC_ID_EIA_608,
> +    .priv_data_size = sizeof(CCaptionSubContext),
> +    .init           = init_decoder,
> +    .close          = close_decoder,
> +    .decode         = decode,
> +};

Regards,

-- 
  Nicolas George


More information about the ffmpeg-devel mailing list