[FFmpeg-cvslog] lavc/ccaption_dec: implement real_time option

Thu Jan 14 22:42:15 CET 2016

ffmpeg | branch: master | Aman Gupta <aman at tmm1.net> | Fri Jan  8 19:01:22 2016 -0800| [5c041e2cccc1e09321f0be729364eb70411b6ce1] | committer: Clément Bœsch

lavc/ccaption_dec: implement real_time option

This new mode is useful for realtime decoding of closed captions so they
can be display along with mpeg2 frames.

Closed caption streams contain two major types of captions:

- POPON captions, which are buffered off-screen and displayed
  only after EOC (end of caption, aka display buffer)

- PAINTON/ROLLUP captions, which are written to the display as soon as
  they arrive.

In a typical real-time eia608 decoder, commands like EOC (end of
caption; display buffer), EDM (erase display memory) and EBM (erase
buffered memory) perform their expected functions as soon as the
commands are processed. This is implemented in the real_time branches
added in this commit.

Before this commit, and in the !real_time branches after this commit,
the decoder cleverly implements its own version of the decoder which is
specifically geared towards buffered decoding. It does so by actively
ignoring commands like EBM (erase buffered memory), and then re-using
the non-display buffer to hold the previous caption while the new one is
received. This is the opposite of the real-time decoder, which uses the
non-display buffer to hold the new caption while the display buffer is
still showing the current caption.

In addition to ignoring EBM, the buffered decoder also has custom
implementations for EDM and EOC. An EDM (erase display memory) command
flushes the existing contents before clearing the screen, and EOC
similarly always flushes the active buffer (the previous subtitle)
before flipping buffers.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5c041e2cccc1e09321f0be729364eb70411b6ce1
---

 libavcodec/ccaption_dec.c |   82 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 74 insertions(+), 8 deletions(-)

diff --git a/libavcodec/ccaption_dec.c b/libavcodec/ccaption_dec.c
index 6cb826c..ca08d48 100644
--- a/libavcodec/ccaption_dec.c
+++ b/libavcodec/ccaption_dec.c
@@ -116,6 +116,7 @@ struct Screen {
 
 typedef struct CCaptionSubContext {
     AVClass *class;
+    int real_time;
     struct Screen screen[2];
     int active_screen;
     uint8_t cursor_row;
@@ -130,6 +131,8 @@ typedef struct CCaptionSubContext {
     /* visible screen time */
     int64_t startv_time;
     int64_t end_time;
+    int screen_touched;
+    int64_t last_real_time;
     char prev_cmd[2];
     /* buffer to store pkt data */
     AVBufferRef *pktbuf;
@@ -187,6 +190,8 @@ static void flush_decoder(AVCodecContext *avctx)
     ctx->cursor_font = 0;
     ctx->cursor_color = 0;
     ctx->active_screen = 0;
+    ctx->last_real_time = 0;
+    ctx->screen_touched = 0;
     ctx->buffer_changed = 0;
     av_bprint_clear(&ctx->buffer);
 }
@@ -426,15 +431,33 @@ static void handle_edm(CCaptionSubContext *ctx, int64_t pts)
 {
     struct Screen *screen = ctx->screen + ctx->active_screen;
 
-    reap_screen(ctx, pts);
+    // In buffered mode, keep writing to screen until it is wiped.
+    // Before wiping the display, capture contents to emit subtitle.
+    if (!ctx->real_time)
+        reap_screen(ctx, pts);
+
     screen->row_used = 0;
+
+    // In realtime mode, emit an empty caption so the last one doesn't
+    // stay on the screen.
+    if (ctx->real_time)
+        reap_screen(ctx, pts);
 }
 
 static void handle_eoc(CCaptionSubContext *ctx, int64_t pts)
 {
-    handle_edm(ctx,pts);
+    // In buffered mode, we wait til the *next* EOC and
+    // reap what was already on the screen since the last EOC.
+    if (!ctx->real_time)
+        handle_edm(ctx,pts);
+
     ctx->active_screen = !ctx->active_screen;
     ctx->cursor_column = 0;
+
+    // In realtime mode, we display the buffered contents (after
+    // flipping the buffer to active above) as soon as EOC arrives.
+    if (ctx->real_time)
+        reap_screen(ctx, pts);
 }
 
 static void handle_delete_end_of_row(CCaptionSubContext *ctx, char hi, char lo)
@@ -456,6 +479,9 @@ static void handle_char(CCaptionSubContext *ctx, char hi, char lo, int64_t pts)
     }
     write_char(ctx, screen, 0);
 
+    if (ctx->mode != CCMODE_POPON)
+        ctx->screen_touched = 1;
+
     /* reset prev command since character can repeat */
     ctx->prev_cmd[0] = 0;
     ctx->prev_cmd[1] = 0;
@@ -505,10 +531,20 @@ static void process_cc608(CCaptionSubContext *ctx, int64_t pts, uint8_t hi, uint
         case 0x2d:
             /* carriage return */
             ff_dlog(ctx, "carriage return\n");
-            reap_screen(ctx, pts);
+            if (!ctx->real_time)
+                reap_screen(ctx, pts);
             roll_up(ctx);
             ctx->cursor_column = 0;
             break;
+        case 0x2e:
+            /* erase buffered (non displayed) memory */
+            // Only in realtime mode. In buffered mode, we re-use the inactive screen
+            // for our own buffering.
+            if (ctx->real_time) {
+                struct Screen *screen = ctx->screen + !ctx->active_screen;
+                screen->row_used = 0;
+            }
+            break;
         case 0x2f:
             /* end of caption */
             ff_dlog(ctx, "handle_eoc\n");
@@ -560,24 +596,54 @@ static int decode(AVCodecContext *avctx, void *data, int *got_sub, AVPacket *avp
             continue;
         else
             process_cc608(ctx, avpkt->pts, *(bptr + i + 1) & 0x7f, *(bptr + i + 2) & 0x7f);
-        if (ctx->buffer_changed && *ctx->buffer.str)
+
+        if (!ctx->buffer_changed)
+            continue;
+        ctx->buffer_changed = 0;
+
+        if (*ctx->buffer.str || ctx->real_time)
         {
-            int start_time = av_rescale_q(ctx->start_time, avctx->time_base, ass_tb);
-            int end_time = av_rescale_q(ctx->end_time, avctx->time_base, ass_tb);
+            int64_t sub_pts = ctx->real_time ? avpkt->pts : ctx->start_time;
+            int start_time = av_rescale_q(sub_pts, avctx->time_base, ass_tb);
+            int duration = -1;
+            if (!ctx->real_time) {
+                int end_time = av_rescale_q(ctx->end_time, avctx->time_base, ass_tb);
+                duration = end_time - start_time;
+            }
             ff_dlog(ctx, "cdp writing data (%s)\n",ctx->buffer.str);
-            ret = ff_ass_add_rect_bprint(sub, &ctx->buffer, start_time, end_time - start_time);
+            ret = ff_ass_add_rect_bprint(sub, &ctx->buffer, start_time, duration);
             if (ret < 0)
                 return ret;
-            sub->pts = av_rescale_q(ctx->start_time, avctx->time_base, AV_TIME_BASE_Q);
+            sub->pts = av_rescale_q(sub_pts, avctx->time_base, AV_TIME_BASE_Q);
             ctx->buffer_changed = 0;
+            ctx->last_real_time = avpkt->pts;
+            ctx->screen_touched = 0;
         }
     }
 
+    if (ctx->real_time && ctx->screen_touched &&
+        avpkt->pts > ctx->last_real_time + av_rescale_q(20, ass_tb, avctx->time_base)) {
+        ctx->last_real_time = avpkt->pts;
+        ctx->screen_touched = 0;
+
+        capture_screen(ctx);
+        ctx->buffer_changed = 0;
+
+        int start_time = av_rescale_q(avpkt->pts, avctx->time_base, ass_tb);
+        ret = ff_ass_add_rect_bprint(sub, &ctx->buffer, start_time, -1);
+        if (ret < 0)
+            return ret;
+        sub->pts = av_rescale_q(avpkt->pts, avctx->time_base, AV_TIME_BASE_Q);
+    }
+
     *got_sub = sub->num_rects > 0;
     return ret;
 }
 
+#define OFFSET(x) offsetof(CCaptionSubContext, x)
+#define SD AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
+    { "real_time", "emit subtitle events as they are decoded for real-time display", OFFSET(real_time), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, SD },
     {NULL}
 };