[FFmpeg-devel] [PATCH 4/8] h264_metadata: Add support for A/53 closed captions

Mark Thompson sw at jkqxz.net
Mon Mar 12 18:25:05 EET 2018


On 12/03/18 15:10, Hendrik Leppkes wrote:
> On Mon, Mar 12, 2018 at 2:38 PM, Mark Thompson <sw at jkqxz.net> wrote:
>> On 12/03/18 09:54, Hendrik Leppkes wrote:
>>> On Sun, Mar 11, 2018 at 7:30 PM, Mark Thompson <sw at jkqxz.net> wrote:
>>>> ---
>>>>  libavcodec/h264_metadata_bsf.c | 121 +++++++++++++++++++++++++++++++++++++++++
>>>>  1 file changed, 121 insertions(+)
>>>>
>>>> diff --git a/libavcodec/h264_metadata_bsf.c b/libavcodec/h264_metadata_bsf.c
>>>> index 36047887ca..d340c55990 100644
>>>> --- a/libavcodec/h264_metadata_bsf.c
>>>> +++ b/libavcodec/h264_metadata_bsf.c
>>>> @@ -77,6 +77,8 @@ typedef struct H264MetadataContext {
>>>>      int display_orientation;
>>>>      double rotate;
>>>>      int flip;
>>>> +
>>>> +    int a53_cc;
>>>>  } H264MetadataContext;
>>>>
>>>>
>>>> @@ -225,6 +227,8 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
>>>>      int err, i, j, has_sps;
>>>>      uint8_t *displaymatrix_side_data = NULL;
>>>>      size_t displaymatrix_side_data_size = 0;
>>>> +    uint8_t *a53_side_data = NULL;
>>>> +    size_t a53_side_data_size = 0;
>>>>
>>>>      err = ff_bsf_get_packet(bsf, &in);
>>>>      if (err < 0)
>>>> @@ -514,6 +518,104 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
>>>>          }
>>>>      }
>>>>
>>>> +    if (ctx->a53_cc == INSERT) {
>>>> +        uint8_t *data;
>>>> +        int size;
>>>> +
>>>> +        data = av_packet_get_side_data(in, AV_PKT_DATA_A53_CC, &size);
>>>> +        if (data) {
>>>> +            H264RawSEIPayload payload = {
>>>> +                .payload_type = H264_SEI_TYPE_USER_DATA_REGISTERED,
>>>> +            };
>>>> +            H264RawSEIUserDataRegistered *udr =
>>>> +                &payload.payload.user_data_registered;
>>>> +
>>>> +            av_log(bsf, AV_LOG_WARNING, "A53 CC insert: %d bytes.\n", size);
>>>> +
>>>> +            udr->data_length = size + 10;
>>>> +            udr->data_ref    = av_buffer_alloc(udr->data_length);
>>>> +            if (!udr->data_ref) {
>>>> +                err = AVERROR(ENOMEM);
>>>> +                goto fail;
>>>> +            }
>>>> +            udr->data = udr->data_ref->data;
>>>> +
>>>> +            udr->itu_t_t35_country_code = 181;
>>>> +            udr->data[0] = 0;
>>>> +            udr->data[1] = 49;
>>>> +            AV_WB32(udr->data + 2, MKBETAG('G', 'A', '9', '4'));
>>>> +            udr->data[6] = 3;
>>>> +            udr->data[7] = ((size / 3) & 0x1f) | 0x40;
>>>> +            udr->data[8] = 0;
>>>> +            memcpy(udr->data + 9, data, size);
>>>> +            udr->data[size + 9] = 0xff;
>>>> +
>>>> +            err = ff_cbs_h264_add_sei_message(ctx->cbc, au, &payload);
>>>> +            if (err < 0) {
>>>> +                av_log(bsf, AV_LOG_ERROR, "Failed to add user data SEI "
>>>> +                       "message to access unit.\n");
>>>> +                av_buffer_unref(&udr->data_ref);
>>>> +                goto fail;
>>>> +            }
>>>> +        }
>>>> +
>>>> +    } else if (ctx->a53_cc == REMOVE || ctx->a53_cc == EXTRACT) {
>>>> +        for (i = 0; i < au->nb_units; i++) {
>>>> +            H264RawSEI *sei;
>>>> +            if (au->units[i].type != H264_NAL_SEI)
>>>> +                continue;
>>>> +            sei = au->units[i].content;
>>>> +
>>>> +            for (j = 0; j < sei->payload_count; j++) {
>>>> +                H264RawSEIUserDataRegistered *udr;
>>>> +                uint32_t tag;
>>>> +                uint8_t type_code, count;
>>>> +
>>>> +                if (sei->payload[j].payload_type !=
>>>> +                    H264_SEI_TYPE_USER_DATA_REGISTERED)
>>>> +                    continue;
>>>> +                udr = &sei->payload[j].payload.user_data_registered;
>>>> +                tag = AV_RB32(udr->data + 2);
>>>> +                type_code = udr->data[6];
>>>> +                if (tag != MKBETAG('G', 'A', '9', '4') || type_code != 3)
>>>> +                    continue;
>>>> +
>>>> +                if (ctx->a53_cc == REMOVE) {
>>>> +                    err = ff_cbs_h264_delete_sei_message(ctx->cbc, au,
>>>> +                                                         &au->units[i], j);
>>>> +                    if (err < 0) {
>>>> +                        av_log(bsf, AV_LOG_ERROR, "Failed to delete "
>>>> +                               "A53 CC SEI message.\n");
>>>> +                        goto fail;
>>>> +                    }
>>>> +                    av_log(bsf, AV_LOG_WARNING, "A53 CC remove!.\n");
>>>> +
>>>> +                    --i;
>>>> +                    break;
>>>> +                }
>>>> +
>>>> +                // Extract.
>>>> +                count = udr->data[7] & 0x1f;
>>>> +                if (3 * count + 10 > udr->data_length) {
>>>> +                    av_log(bsf, AV_LOG_ERROR, "Invalid A/53 closed caption "
>>>> +                           "data: count %d overflows length %zu.\n",
>>>> +                           count, udr->data_length);
>>>> +                    continue;
>>>> +                }
>>>> +
>>>> +                av_log(bsf, AV_LOG_WARNING, "A53 CC extract: %zu bytes.\n", udr->data_length);
>>>> +
>>>> +                err = av_reallocp(&a53_side_data,
>>>> +                                  a53_side_data_size + 3 * count);
>>>> +                if (err)
>>>> +                    goto fail;
>>>> +                memcpy(a53_side_data + a53_side_data_size,
>>>> +                       udr->data + 9, 3 * count);
>>>> +                a53_side_data_size += 3 * count;
>>>> +            }
>>>> +        }
>>>> +    }
>>>> +
>>>>      err = ff_cbs_write_packet(ctx->cbc, out, au);
>>>>      if (err < 0) {
>>>>          av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
>>>> @@ -535,6 +637,16 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
>>>>          }
>>>>          displaymatrix_side_data = NULL;
>>>>      }
>>>> +    if (a53_side_data) {
>>>> +        err = av_packet_add_side_data(out, AV_PKT_DATA_A53_CC,
>>>> +                                      a53_side_data, a53_side_data_size);
>>>> +        if (err) {
>>>> +            av_log(bsf, AV_LOG_ERROR, "Failed to attach extracted A/53 "
>>>> +                   "side data to packet.\n");
>>>> +            goto fail;
>>>> +        }
>>>> +        a53_side_data = NULL;
>>>> +    }
>>>>
>>>>      ctx->done_first_au = 1;
>>>>
>>>> @@ -542,6 +654,7 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
>>>>  fail:
>>>>      ff_cbs_fragment_uninit(ctx->cbc, au);
>>>>      av_freep(&displaymatrix_side_data);
>>>> +    av_freep(&a53_side_data);
>>>>
>>>>      av_packet_free(&in);
>>>>
>>>> @@ -670,6 +783,14 @@ static const AVOption h264_metadata_options[] = {
>>>>      { "vertical",   "Set ver_flip",
>>>>          0, AV_OPT_TYPE_CONST, { .i64 = FLIP_VERTICAL },   .unit ="flip" },
>>>>
>>>> +    { "a53_cc", "A/53 Closed Captions in SEI NAL units",
>>>> +        OFFSET(a53_cc), AV_OPT_TYPE_INT,
>>>> +        { .i64 = PASS }, PASS, EXTRACT, 0, "a53_cc" },
>>>> +    { "pass",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PASS    }, .unit = "a53_cc" },
>>>> +    { "insert",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = INSERT  }, .unit = "a53_cc" },
>>>> +    { "remove",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE  }, .unit = "a53_cc" },
>>>> +    { "extract", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = EXTRACT }, .unit = "a53_cc" },
>>>> +
>>>
>>> Does extracting really make sense? Doesn't the data end up out of
>>> order and basically unusable?
>>
>> Well, it's up to whatever follows to deal with that.  If the stream has correct timesatamps (ha) then you can use those directly.  If you're feeding the output to an opaque hardware decoder then having some way to associate input packets with output frames is sufficient to get the right ordering.  A BSF to deal with reordering somehow is also possible.
>>
> 
> That seems like a problem. You generate side-data like any other, but
> in reality its invalid, and if you process it like any other A53
> sidedata you get corrupt output.

Each packet is being tagged with the side-data extracted from the contents of that packet.  Like the non-side-data part of a packet, this needs to be passed through a decoder (of some kind) to turn a packet into a frame before you can display it sensibly.

> The same goes for "insert", I guess, how do I figure out in which
> order to pass things to it to get any sort of functional output? Thats
> a very specific setup which requires extremely custom and careful
> usage, does that really fit in a generic filter?
As with the decoder case, it can work with any encoder which can match output packets to input frames.  (I haven't sent any patches to pass side-data through an encoder yet, but I plan to do so at least for VAAPI.)

- Mark


More information about the ffmpeg-devel mailing list