[FFmpeg-devel] [PATCH] avcodec: add an AV1 parser

Tue Sep 25 03:08:10 EEST 2018

On 9/24/2018 8:41 PM, Mark Thompson wrote:
> On 24/09/18 01:12, James Almer wrote:
>> Simple parser to set keyframes, frame type, structure, width, height, and pixel
>> format, plus stream profile and level.
>>
>> Signed-off-by: James Almer <jamrial at gmail.com>
>> ---
>> Missing Changelog entry and version bump.
>>
>> This depends on "[PATCH v2 2/3] lavc: Add coded bitstream read/write support
>> for AV1" which should be committed in the coming days.
>>
>> The AVCodecParser.split() implementation, added for the sake of completeness,
>> is very naive and much like the h264 and hevc ones can result in useless OBUs
>> being "extracted", but since it's no longer used by libavformat to fill global
>> headers when reading raw containers it shouldn't really matter. It's pretty
>> much used only by the remove_extradata bsf at this point.
>>
>>  configure               |   1 +
>>  libavcodec/Makefile     |   1 +
>>  libavcodec/av1_parser.c | 218 ++++++++++++++++++++++++++++++++++++++++
>>  libavcodec/parsers.c    |   1 +
>>  4 files changed, 221 insertions(+)
>>  create mode 100644 libavcodec/av1_parser.c
>>
>> diff --git a/configure b/configure
>> index ca8b599b63..b46c86ec95 100755
>> --- a/configure
>> +++ b/configure
>> @@ -3020,6 +3020,7 @@ wmv3_crystalhd_decoder_select="crystalhd"
>>  
>>  # parsers
>>  aac_parser_select="adts_header"
>> +av1_parser_select="cbs_av1"
>>  h264_parser_select="golomb h264dsp h264parse"
>>  hevc_parser_select="hevcparse"
>>  mpegaudio_parser_select="mpegaudioheader"
>> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>> index b2c6995f9a..dc28892e64 100644
>> --- a/libavcodec/Makefile
>> +++ b/libavcodec/Makefile
>> @@ -1006,6 +1006,7 @@ OBJS-$(CONFIG_AAC_PARSER)              += aac_parser.o aac_ac3_parser.o \
>>                                            mpeg4audio.o
>>  OBJS-$(CONFIG_AC3_PARSER)              += ac3tab.o aac_ac3_parser.o
>>  OBJS-$(CONFIG_ADX_PARSER)              += adx_parser.o adx.o
>> +OBJS-$(CONFIG_AV1_PARSER)              += av1_parser.o
>>  OBJS-$(CONFIG_AVS2_PARSER)             += avs2_parser.o
>>  OBJS-$(CONFIG_BMP_PARSER)              += bmp_parser.o
>>  OBJS-$(CONFIG_CAVSVIDEO_PARSER)        += cavs_parser.o
>> diff --git a/libavcodec/av1_parser.c b/libavcodec/av1_parser.c
>> new file mode 100644
>> index 0000000000..b2e19e2119
>> --- /dev/null
>> +++ b/libavcodec/av1_parser.c
>> @@ -0,0 +1,218 @@
>> +/*
>> + * AV1 parser
>> + *
>> + * Copyright (C) 2018 James Almer <jamrial at gmail.com>
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +#include "av1_parse.h"
>> +#include "cbs.h"
>> +#include "cbs_av1.h"
>> +#include "parser.h"
>> +
>> +typedef struct AV1ParseContext {
>> +    CodedBitstreamContext *cbc;
>> +    CodedBitstreamFragment temporal_unit;
>> +    int parsed_extradata;
>> +} AV1ParseContext;
>> +
>> +static int av1_parser_parse(AVCodecParserContext *ctx,
>> +                            AVCodecContext *avctx,
>> +                            const uint8_t **out_data, int *out_size,
>> +                            const uint8_t *data, int size)
>> +{
>> +    AV1ParseContext *s = ctx->priv_data;
>> +    CodedBitstreamFragment *td = &s->temporal_unit;
>> +    CodedBitstreamAV1Context *av1 = s->cbc->priv_data;
>> +    int ret;
>> +
>> +    *out_data = data;
>> +    *out_size = size;
>> +
>> +    ctx->key_frame         = -1;
>> +    ctx->pict_type         = AV_PICTURE_TYPE_NONE;
>> +    ctx->picture_structure = AV_PICTURE_STRUCTURE_UNKNOWN;
>> +
>> +    if (avctx->extradata_size && !s->parsed_extradata) {
>> +        ret = ff_cbs_read(s->cbc, td, avctx->extradata, avctx->extradata_size);
>> +        if (ret < 0) {
>> +            av_log(avctx, AV_LOG_ERROR, "Failed to parse extradata.\n");
>> +            return size;
>> +        }
>> +
>> +        s->parsed_extradata = 1;

I'll move this above the ff_cbs_read() call, btw. Otherwise if it fails,
it will keep being called on every av_parser_parse2() call and no packet
will ever be parsed.

>> +
>> +        ff_cbs_fragment_uninit(s->cbc, td);
>> +    }
>> +
>> +    ret = ff_cbs_read(s->cbc, td, data, size);
>> +    if (ret < 0) {
>> +        av_log(avctx, AV_LOG_ERROR, "Failed to parse temporal unit.\n");
>> +        return size;
>> +    }
>> +
>> +    if (!av1->sequence_header) {
>> +        av_log(avctx, AV_LOG_ERROR, "No sequence header available\n");
>> +        goto end;
>> +    }
>> +
>> +    for (int i = 0; i < td->nb_units; i++) {
>> +        CodedBitstreamUnit *unit = &td->units[i];
>> +        AV1RawOBU *obu = unit->content;
>> +        AV1RawSequenceHeader *seq = av1->sequence_header;
>> +        AV1RawFrameHeader *frame;
>> +        int frame_type, bitdepth, subsampling;
>> +
>> +        if (unit->type == AV1_OBU_FRAME)
>> +            frame = &obu->obu.frame.header;
>> +        else if (unit->type == AV1_OBU_FRAME_HEADER)
>> +            frame = &obu->obu.frame_header;
>> +        else
>> +            continue;
>> +
>> +        if (frame->show_existing_frame) {
>> +            AV1ReferenceFrameState *ref = &av1->ref[frame->frame_to_show_map_idx];
>> +
>> +            if (!ref->valid) {
>> +                av_log(avctx, AV_LOG_ERROR, "Invalid reference frame\n");
>> +                goto end;
>> +            }
>> +
>> +            ctx->width  = ref->frame_width;
>> +            ctx->height = ref->frame_height;
>> +            frame_type  = ref->frame_type;
>> +
>> +            ctx->key_frame = 0;
>> +        } else if (!frame->show_frame) {
>> +            continue;
> 
> I think you want to set the key_frame flag if the you see any key frame, even an invisible one.
> 
> (E.g. if the first output frame is in a packet after an invisible key frame then you would miss it here, which possibly leads to incorrectly discarding some of the stream.)

No, the only frame that matters here is the visible one for the current
temporal unit, be it a show_frame == 1 one or a show_existing_frame == 1
one. And the latter is not meant to be tagged as a key frame, hence
setting it to 0 above.

See section 7.6.2 in the AV1 spec. "Key Frame Dependent Recovery Point",
AKA frames where show_existing_frame == 1 and the frame_to_show_map_idx
value pointing to a "Delayed Random Access Point" (frame where
show_frame == 0, showable_frame == 1, and frame_type == KEY_FRAME,
parsed in a previous OBU), are not considered actual random access
points. Not even libaom signals them as such. They are meant to be
handled in a custom way by the container.

> 
>> +        } else {
>> +            ctx->width  = av1->frame_width;
>> +            ctx->height = av1->frame_height;
>> +            frame_type  = frame->frame_type;
>> +
>> +            ctx->key_frame = frame_type == AV1_FRAME_KEY;
>> +        }
>> +
>> +        avctx->profile = seq->seq_profile;
>> +        avctx->level   = seq->seq_level_idx[0];
>> +
>> +        switch (frame_type) {
>> +        case AV1_FRAME_KEY:
>> +        case AV1_FRAME_INTRA_ONLY:
>> +            ctx->pict_type = AV_PICTURE_TYPE_I;
>> +            break;
>> +        case AV1_FRAME_INTER:
>> +            ctx->pict_type = AV_PICTURE_TYPE_P;
>> +            break;
>> +        case AV1_FRAME_SWITCH:
>> +            ctx->pict_type = AV_PICTURE_TYPE_SP;
>> +            break;
>> +        }
>> +
>> +        ctx->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
>> +
>> +        subsampling = seq->color_config.subsampling_x << 1 & seq->color_config.subsampling_y;
>> +        bitdepth    = 8 + seq->color_config.high_bitdepth * 2 + seq->color_config.twelve_bit * 2;
>> +        switch (bitdepth) {
>> +        case 8:
>> +            if (subsampling == 3)      ctx->format = seq->color_config.mono_chrome ? AV_PIX_FMT_GRAY8 :
>> +                                                                                     AV_PIX_FMT_YUV420P;
>> +            else if (subsampling == 2) ctx->format = AV_PIX_FMT_YUV422P;
>> +            else                       ctx->format = AV_PIX_FMT_YUV444P;
>> +            break;
>> +        case 10:
>> +            if (subsampling == 3)      ctx->format = seq->color_config.mono_chrome ? AV_PIX_FMT_GRAY10 :
>> +                                                                                     AV_PIX_FMT_YUV420P10;
>> +            else if (subsampling == 2) ctx->format = AV_PIX_FMT_YUV422P10;
>> +            else                       ctx->format = AV_PIX_FMT_YUV444P10;
>> +            break;
>> +        case 12:
>> +            if (subsampling == 3)      ctx->format = seq->color_config.mono_chrome ? AV_PIX_FMT_GRAY12 :
>> +                                                                                     AV_PIX_FMT_YUV420P12;
>> +            else if (subsampling == 2) ctx->format = AV_PIX_FMT_YUV422P12;
>> +            else                       ctx->format = AV_PIX_FMT_YUV444P12;
>> +            break;
>> +        }
> 
> I think I'd put mono_chrome outside the switch - while it does imply that subsampling_(x|y) are both 1, that is still slightly confusing.

Sure.

> 
> Also, perhaps this would be nicer as a lookup table?

I'll see if i can get it to look nicer.

> 
>> +    }
>> +
>> +end:
>> +    ff_cbs_fragment_uninit(s->cbc, td);
>> +
>> +    return size;
>> +}
>> +
>> +static const CodedBitstreamUnitType decompose_unit_types[] = {
>> +    AV1_OBU_TEMPORAL_DELIMITER,
>> +    AV1_OBU_SEQUENCE_HEADER,
>> +    AV1_OBU_FRAME_HEADER,
>> +    AV1_OBU_TILE_GROUP,
>> +    AV1_OBU_FRAME,
>> +};
>> +
>> +static av_cold int av1_parser_init(AVCodecParserContext *ctx)
>> +{
>> +    AV1ParseContext *s = ctx->priv_data;
>> +    int ret;
>> +
>> +    ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_AV1, NULL);
> 
> Can we forge a logging context here?  Having NULL is not very nice if it does find errors.
> 
> (Or overwrite it with the passed AVCodecContext only during the parse call?)

So a s->cbc.log_ctx = avctx in av1_parser_parse()?

> 
>> +    if (ret < 0)
>> +        return ret;
>> +
>> +    s->cbc->decompose_unit_types    = (CodedBitstreamUnitType *)decompose_unit_types;
>> +    s->cbc->nb_decompose_unit_types = FF_ARRAY_ELEMS(decompose_unit_types);
>> +
>> +    return 0;
>> +}
>> +
>> +static void av1_parser_close(AVCodecParserContext *ctx)
>> +{
>> +    AV1ParseContext *s = ctx->priv_data;
>> +
>> +    ff_cbs_close(&s->cbc);
>> +}
>> +
>> +static int av1_parser_split(AVCodecContext *avctx,
>> +                            const uint8_t *buf, int buf_size)
>> +{
>> +    AV1OBU obu;
>> +    const uint8_t *ptr = buf, *end = buf + buf_size;
>> +
>> +    while (ptr < end) {
>> +        int len = ff_av1_extract_obu(&obu, ptr, buf_size, avctx);
>> +        if (len < 0)
>> +            break;
>> +
>> +        if (obu.type == AV1_OBU_FRAME_HEADER ||
>> +            obu.type == AV1_OBU_FRAME) {
>> +            return ptr - buf;
>> +        }
>> +        ptr      += len;
>> +        buf_size -= len;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +AVCodecParser ff_av1_parser = {
>> +    .codec_ids      = { AV_CODEC_ID_AV1 },
>> +    .priv_data_size = sizeof(AV1ParseContext),
>> +    .parser_init    = av1_parser_init,
>> +    .parser_close   = av1_parser_close,
>> +    .parser_parse   = av1_parser_parse,
>> +    .split          = av1_parser_split,
>> +};
>> diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
>> index cb86cceecc..f01cad4c84 100644
>> --- a/libavcodec/parsers.c
>> +++ b/libavcodec/parsers.c
>> @@ -26,6 +26,7 @@ extern AVCodecParser ff_aac_parser;
>>  extern AVCodecParser ff_aac_latm_parser;
>>  extern AVCodecParser ff_ac3_parser;
>>  extern AVCodecParser ff_adx_parser;
>> +extern AVCodecParser ff_av1_parser;
>>  extern AVCodecParser ff_avs2_parser;
>>  extern AVCodecParser ff_bmp_parser;
>>  extern AVCodecParser ff_cavsvideo_parser;
>>
> 
> Looks good!
> 
> Thanks,
> 
> - Mark
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>