[FFmpeg-devel] [PATCH v2 2/9] lavc/ccaption_dec: implement real_time option

Wed Jan 13 09:48:36 CET 2016

On 13 January 2016 7:12:53 AM IST, Aman Gupta <ffmpeg at tmm1.net> wrote:
>From: Aman Gupta <aman at tmm1.net>
>
>This new mode is useful for realtime decoding of closed captions so
>they
>can be display along with mpeg2 frames.
>
>Closed caption streams contain two major types of captions:
>
>- POPON captions, which are buffered off-screen and displayed
>  only after EOC (end of caption, aka display buffer)
>
>- PAINTON/ROLLUP captions, which are written to the display as soon as
>  they arrive.
>
>In a typical real-time eia608 decoder, commands like EOC (end of
>caption; display buffer), EDM (erase display memory) and EBM (erase
>buffered memory) perform their expected functions as soon as the
>commands are processed. This is implemented in the real_time branches
>added in this commit.
>
>Before this commit, and in the !real_time branches after this commit,
>the decoder cleverly implements its own version of the decoder which is
>specifically geared towards buffered decoding. It does so by actively
>ignoring commands like EBM (erase buffered memory), and then re-using
>the non-display buffer to hold the previous caption while the new one
>is
>received. This is the opposite of the real-time decoder, which uses the
>non-display buffer to hold the new caption while the display buffer is
>still showing the current caption.
>
>In addition to ignoring EBM, the buffered decoder also has custom
>implementations for EDM and EOC. An EDM (erase display memory) command
>flushes the existing contents before clearing the screen, and EOC
>similarly always flushes the active buffer (the previous subtitle)
>before flipping buffers.
>---
>libavcodec/ccaption_dec.c | 80
>++++++++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 72 insertions(+), 8 deletions(-)
>
>diff --git a/libavcodec/ccaption_dec.c b/libavcodec/ccaption_dec.c
>index a9dfc94..6bdd754 100644
>--- a/libavcodec/ccaption_dec.c
>+++ b/libavcodec/ccaption_dec.c
>@@ -116,6 +116,7 @@ struct Screen {
> 
> typedef struct CCaptionSubContext {
>     AVClass *class;
>+    int real_time;
>     struct Screen screen[2];
>     int active_screen;
>     uint8_t cursor_row;
>@@ -130,6 +131,8 @@ typedef struct CCaptionSubContext {
>     /* visible screen time */
>     int64_t startv_time;
>     int64_t end_time;
>+    int screen_touched;
>+    int64_t last_real_time;
>     char prev_cmd[2];
>     /* buffer to store pkt data */
>     AVBufferRef *pktbuf;
>@@ -428,15 +431,33 @@ static void handle_edm(CCaptionSubContext *ctx,
>int64_t pts)
> {
>     struct Screen *screen = ctx->screen + ctx->active_screen;
> 
>-    reap_screen(ctx, pts);
>+    // In buffered mode, keep writing to screen until it is wiped.
>+    // Before wiping the display, capture contents to emit subtitle.
>+    if (!ctx->real_time)
>+        reap_screen(ctx, pts);
>+
>     screen->row_used = 0;
>+
>+    // In realtime mode, emit an empty caption so the last one doesn't
>+    // stay on the screen.
>+    if (ctx->real_time)
>+        reap_screen(ctx, pts);
> }
> 
> static void handle_eoc(CCaptionSubContext *ctx, int64_t pts)
> {
>-    handle_edm(ctx,pts);
>+    // In buffered mode, we wait til the *next* EOC and
>+    // reap what was already on the screen since the last EOC.
>+    if (!ctx->real_time)
>+        handle_edm(ctx,pts);
>+
>     ctx->active_screen = !ctx->active_screen;
>     ctx->cursor_column = 0;
>+
>+    // In realtime mode, we display the buffered contents (after
>+    // flipping the buffer to active above) as soon as EOC arrives.
>+    if (ctx->real_time)
>+        reap_screen(ctx, pts);
> }
> 
>static void handle_delete_end_of_row(CCaptionSubContext *ctx, char hi,
>char lo)
>@@ -458,6 +479,9 @@ static void handle_char(CCaptionSubContext *ctx,
>char hi, char lo, int64_t pts)
>     }
>     write_char(ctx, screen, 0);
> 
>+    if (ctx->mode != CCMODE_POPON)
>+        ctx->screen_touched = 1;
>+
>     /* reset prev command since character can repeat */
>     ctx->prev_cmd[0] = 0;
>     ctx->prev_cmd[1] = 0;
>@@ -507,10 +531,20 @@ static void process_cc608(CCaptionSubContext
>*ctx, int64_t pts, uint8_t hi, uint
>         case 0x2d:
>             /* carriage return */
>             ff_dlog(ctx, "carriage return\n");
>-            reap_screen(ctx, pts);
>+            if (!ctx->real_time)
>+                reap_screen(ctx, pts);
>             roll_up(ctx);
>             ctx->cursor_column = 0;
>             break;
>+        case 0x2e:
>+            /* erase buffered (non displayed) memory */
>+            // Only in realtime mode. In buffered mode, we re-use the
>inactive screen
>+            // for our own buffering.
>+            if (ctx->real_time) {
>+                struct Screen *screen = ctx->screen +
>!ctx->active_screen;
>+                screen->row_used = 0;
>+            }
>+            break;
>         case 0x2f:
>             /* end of caption */
>             ff_dlog(ctx, "handle_eoc\n");
>@@ -562,24 +596,54 @@ static int decode(AVCodecContext *avctx, void
>*data, int *got_sub, AVPacket *avp
>             continue;
>         else
>process_cc608(ctx, avpkt->pts, *(bptr + i + 1) & 0x7f, *(bptr + i + 2)
>& 0x7f);
>-        if (ctx->buffer_changed && *ctx->buffer.str)
>+
>+        if (!ctx->buffer_changed)
>+            continue;
>+        ctx->buffer_changed = 0;
>+
>+        if (*ctx->buffer.str || ctx->real_time)
>         {
>-            int start_time = av_rescale_q(ctx->start_time,
>avctx->time_base, ass_tb);
>-            int end_time = av_rescale_q(ctx->end_time,
>avctx->time_base, ass_tb);
>+            int64_t sub_pts = ctx->real_time ? avpkt->pts :
>ctx->start_time;
>+            int start_time = av_rescale_q(sub_pts, avctx->time_base,
>ass_tb);
>+            int duration = 10 * ass_tb.den;
>+            if (!ctx->real_time) {
>+                int end_time = av_rescale_q(ctx->end_time,
>avctx->time_base, ass_tb);
>+                duration = end_time - start_time;
>+            }
>             ff_dlog(ctx, "cdp writing data (%s)\n",ctx->buffer.str);
>-            ret = ff_ass_add_rect_bprint(sub, &ctx->buffer,
>start_time, end_time - start_time);
>+            ret = ff_ass_add_rect_bprint(sub, &ctx->buffer,
>start_time, duration);
>             if (ret < 0)
>                 return ret;
>-            sub->pts = av_rescale_q(ctx->start_time, avctx->time_base,
>AV_TIME_BASE_Q);
>+            sub->pts = av_rescale_q(sub_pts, avctx->time_base,
>AV_TIME_BASE_Q);
>             ctx->buffer_changed = 0;
>+            ctx->last_real_time = avpkt->pts;
>+            ctx->screen_touched = 0;
>         }
>     }
> 
>+    if (ctx->real_time && ctx->screen_touched &&
>+        avpkt->pts > ctx->last_real_time + av_rescale_q(20, ass_tb,
>avctx->time_base)) {
>+        ctx->last_real_time = avpkt->pts;
>+        ctx->screen_touched = 0;
>+
>+        capture_screen(ctx);
>+        ctx->buffer_changed = 0;
>+
>+        int start_time = av_rescale_q(avpkt->pts, avctx->time_base,
>ass_tb);
>+        ret = ff_ass_add_rect_bprint(sub, &ctx->buffer, start_time,
>-1);
>+        if (ret < 0)
>+            return ret;
>+        sub->pts = av_rescale_q(avpkt->pts, avctx->time_base,
>AV_TIME_BASE_Q);
>+    }
>+
>     *got_sub = sub->num_rects > 0;
>     return ret;
> }
> 
>+#define OFFSET(x) offsetof(CCaptionSubContext, x)
>+#define SD AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM
> static const AVOption options[] = {
>+    { "real_time", "emit subtitle events as they are decoded for
>real-time display", OFFSET(real_time), AV_OPT_TYPE_BOOL, { .i64 = 0 },
>0, 1, SD },
>     {NULL}
> };
> 

LGTM.

-Anshul