[FFmpeg-devel] [PATCH 7/8] avformat: Immersive Audio Model and Formats demuxer

James Almer jamrial at gmail.com
Thu Dec 14 22:14:32 EET 2023


Signed-off-by: James Almer <jamrial at gmail.com>
---
 libavformat/Makefile     |    1 +
 libavformat/allformats.c |    1 +
 libavformat/iamf.c       |  125 +++++
 libavformat/iamf.h       |  163 ++++++
 libavformat/iamf_parse.c | 1106 ++++++++++++++++++++++++++++++++++++++
 libavformat/iamf_parse.h |   38 ++
 libavformat/iamfdec.c    |  503 +++++++++++++++++
 7 files changed, 1937 insertions(+)
 create mode 100644 libavformat/iamf.c
 create mode 100644 libavformat/iamf.h
 create mode 100644 libavformat/iamf_parse.c
 create mode 100644 libavformat/iamf_parse.h
 create mode 100644 libavformat/iamfdec.c

diff --git a/libavformat/Makefile b/libavformat/Makefile
index 2db83aff81..f23c22792b 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -258,6 +258,7 @@ OBJS-$(CONFIG_EVC_MUXER)                 += rawenc.o
 OBJS-$(CONFIG_HLS_DEMUXER)               += hls.o hls_sample_encryption.o
 OBJS-$(CONFIG_HLS_MUXER)                 += hlsenc.o hlsplaylist.o avc.o
 OBJS-$(CONFIG_HNM_DEMUXER)               += hnm.o
+OBJS-$(CONFIG_IAMF_DEMUXER)              += iamfdec.o iamf_parse.o iamf.o
 OBJS-$(CONFIG_ICO_DEMUXER)               += icodec.o
 OBJS-$(CONFIG_ICO_MUXER)                 += icoenc.o
 OBJS-$(CONFIG_IDCIN_DEMUXER)             += idcin.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index c8bb4e3866..6e520b78a6 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -212,6 +212,7 @@ extern const FFOutputFormat ff_hevc_muxer;
 extern const AVInputFormat  ff_hls_demuxer;
 extern const FFOutputFormat ff_hls_muxer;
 extern const AVInputFormat  ff_hnm_demuxer;
+extern const AVInputFormat  ff_iamf_demuxer;
 extern const AVInputFormat  ff_ico_demuxer;
 extern const FFOutputFormat ff_ico_muxer;
 extern const AVInputFormat  ff_idcin_demuxer;
diff --git a/libavformat/iamf.c b/libavformat/iamf.c
new file mode 100644
index 0000000000..5de70dc082
--- /dev/null
+++ b/libavformat/iamf.c
@@ -0,0 +1,125 @@
+/*
+ * Immersive Audio Model and Formats common helpers and structs
+ * Copyright (c) 2023 James Almer <jamrial at gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/channel_layout.h"
+#include "libavutil/iamf.h"
+#include "libavutil/mem.h"
+#include "iamf.h"
+
+const AVChannelLayout ff_iamf_scalable_ch_layouts[10] = {
+    AV_CHANNEL_LAYOUT_MONO,
+    AV_CHANNEL_LAYOUT_STEREO,
+    // "Loudspeaker configuration for Sound System B"
+    AV_CHANNEL_LAYOUT_5POINT1_BACK,
+    // "Loudspeaker configuration for Sound System C"
+    AV_CHANNEL_LAYOUT_5POINT1POINT2_BACK,
+    // "Loudspeaker configuration for Sound System D"
+    AV_CHANNEL_LAYOUT_5POINT1POINT4_BACK,
+    // "Loudspeaker configuration for Sound System I"
+    AV_CHANNEL_LAYOUT_7POINT1,
+    // "Loudspeaker configuration for Sound System I" + Ltf + Rtf
+    AV_CHANNEL_LAYOUT_7POINT1POINT2,
+    // "Loudspeaker configuration for Sound System J"
+    AV_CHANNEL_LAYOUT_7POINT1POINT4_BACK,
+    // Front subset of "Loudspeaker configuration for Sound System J"
+    AV_CHANNEL_LAYOUT_3POINT1POINT2,
+    // Binaural
+    AV_CHANNEL_LAYOUT_STEREO,
+};
+
+const struct IAMFSoundSystemMap ff_iamf_sound_system_map[13] = {
+    { SOUND_SYSTEM_A_0_2_0, AV_CHANNEL_LAYOUT_STEREO },
+    { SOUND_SYSTEM_B_0_5_0, AV_CHANNEL_LAYOUT_5POINT1_BACK },
+    { SOUND_SYSTEM_C_2_5_0, AV_CHANNEL_LAYOUT_5POINT1POINT2_BACK },
+    { SOUND_SYSTEM_D_4_5_0, AV_CHANNEL_LAYOUT_5POINT1POINT4_BACK },
+    { SOUND_SYSTEM_E_4_5_1,
+        {
+            .nb_channels = 11,
+            .order       = AV_CHANNEL_ORDER_NATIVE,
+            .u.mask      = AV_CH_LAYOUT_5POINT1POINT4_BACK | AV_CH_BOTTOM_FRONT_CENTER,
+        },
+    },
+    { SOUND_SYSTEM_F_3_7_0,  AV_CHANNEL_LAYOUT_7POINT2POINT3 },
+    { SOUND_SYSTEM_G_4_9_0,  AV_CHANNEL_LAYOUT_9POINT1POINT4_BACK },
+    { SOUND_SYSTEM_H_9_10_3, AV_CHANNEL_LAYOUT_22POINT2 },
+    { SOUND_SYSTEM_I_0_7_0,  AV_CHANNEL_LAYOUT_7POINT1 },
+    { SOUND_SYSTEM_J_4_7_0,  AV_CHANNEL_LAYOUT_7POINT1POINT4_BACK },
+    { SOUND_SYSTEM_10_2_7_0, AV_CHANNEL_LAYOUT_7POINT1POINT2 },
+    { SOUND_SYSTEM_11_2_3_0, AV_CHANNEL_LAYOUT_3POINT1POINT2 },
+    { SOUND_SYSTEM_12_0_1_0, AV_CHANNEL_LAYOUT_MONO },
+};
+
+void ff_iamf_free_audio_element(IAMFAudioElement **paudio_element)
+{
+    IAMFAudioElement *audio_element = *paudio_element;
+
+    if (!audio_element)
+        return;
+
+    for (int i = 0; i < audio_element->nb_substreams; i++)
+        avcodec_parameters_free(&audio_element->substreams[i].codecpar);
+    av_free(audio_element->substreams);
+    av_free(audio_element->layers);
+    av_iamf_audio_element_free(&audio_element->element);
+    av_freep(paudio_element);
+}
+
+void ff_iamf_free_mix_presentation(IAMFMixPresentation **pmix_presentation)
+{
+    IAMFMixPresentation *mix_presentation = *pmix_presentation;
+
+    if (!mix_presentation)
+        return;
+
+    for (int i = 0; i < mix_presentation->count_label; i++)
+        av_free(mix_presentation->language_label[i]);
+    av_free(mix_presentation->language_label);
+    av_iamf_mix_presentation_free(&mix_presentation->mix);
+    av_freep(pmix_presentation);
+}
+
+void ff_iamf_uninit_context(IAMFContext *c)
+{
+    if (!c)
+        return;
+
+    for (int i = 0; i < c->nb_codec_configs; i++) {
+        av_free(c->codec_configs[i]->extradata);
+        av_free(c->codec_configs[i]);
+    }
+    av_freep(&c->codec_configs);
+    c->nb_codec_configs = 0;
+
+    for (int i = 0; i < c->nb_audio_elements; i++)
+        ff_iamf_free_audio_element(&c->audio_elements[i]);
+    av_freep(&c->audio_elements);
+    c->nb_audio_elements = 0;
+
+    for (int i = 0; i < c->nb_mix_presentations; i++)
+        ff_iamf_free_mix_presentation(&c->mix_presentations[i]);
+    av_freep(&c->mix_presentations);
+    c->nb_mix_presentations = 0;
+
+    for (int i = 0; i < c->nb_param_definitions; i++)
+        av_free(c->param_definitions[i]);
+    av_freep(&c->param_definitions);
+    c->nb_param_definitions = 0;
+}
diff --git a/libavformat/iamf.h b/libavformat/iamf.h
new file mode 100644
index 0000000000..ce94cb5bc4
--- /dev/null
+++ b/libavformat/iamf.h
@@ -0,0 +1,163 @@
+/*
+ * Immersive Audio Model and Formats common helpers and structs
+ * Copyright (c) 2023 James Almer <jamrial at gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFORMAT_IAMF_H
+#define AVFORMAT_IAMF_H
+
+#include <stdint.h>
+
+#include "libavutil/channel_layout.h"
+#include "libavutil/iamf.h"
+#include "libavcodec/codec_id.h"
+#include "libavcodec/codec_par.h"
+#include "avformat.h"
+
+#define MAX_IAMF_OBU_HEADER_SIZE (1 + 8 * 3)
+
+// OBU types (section 3.2).
+enum IAMF_OBU_Type {
+    IAMF_OBU_IA_CODEC_CONFIG        = 0,
+    IAMF_OBU_IA_AUDIO_ELEMENT       = 1,
+    IAMF_OBU_IA_MIX_PRESENTATION    = 2,
+    IAMF_OBU_IA_PARAMETER_BLOCK     = 3,
+    IAMF_OBU_IA_TEMPORAL_DELIMITER  = 4,
+    IAMF_OBU_IA_AUDIO_FRAME         = 5,
+    IAMF_OBU_IA_AUDIO_FRAME_ID0     = 6,
+    IAMF_OBU_IA_AUDIO_FRAME_ID1     = 7,
+    IAMF_OBU_IA_AUDIO_FRAME_ID2     = 8,
+    IAMF_OBU_IA_AUDIO_FRAME_ID3     = 9,
+    IAMF_OBU_IA_AUDIO_FRAME_ID4     = 10,
+    IAMF_OBU_IA_AUDIO_FRAME_ID5     = 11,
+    IAMF_OBU_IA_AUDIO_FRAME_ID6     = 12,
+    IAMF_OBU_IA_AUDIO_FRAME_ID7     = 13,
+    IAMF_OBU_IA_AUDIO_FRAME_ID8     = 14,
+    IAMF_OBU_IA_AUDIO_FRAME_ID9     = 15,
+    IAMF_OBU_IA_AUDIO_FRAME_ID10    = 16,
+    IAMF_OBU_IA_AUDIO_FRAME_ID11    = 17,
+    IAMF_OBU_IA_AUDIO_FRAME_ID12    = 18,
+    IAMF_OBU_IA_AUDIO_FRAME_ID13    = 19,
+    IAMF_OBU_IA_AUDIO_FRAME_ID14    = 20,
+    IAMF_OBU_IA_AUDIO_FRAME_ID15    = 21,
+    IAMF_OBU_IA_AUDIO_FRAME_ID16    = 22,
+    IAMF_OBU_IA_AUDIO_FRAME_ID17    = 23,
+    // 24~30 reserved.
+    IAMF_OBU_IA_SEQUENCE_HEADER     = 31,
+};
+
+typedef struct IAMFCodecConfig {
+    unsigned codec_config_id;
+    enum AVCodecID codec_id;
+    uint32_t codec_tag;
+    unsigned nb_samples;
+    int seek_preroll;
+    int sample_rate;
+    int extradata_size;
+    uint8_t *extradata;
+} IAMFCodecConfig;
+
+typedef struct IAMFLayer {
+    unsigned int substream_count;
+    unsigned int coupled_substream_count;
+} IAMFLayer;
+
+typedef struct IAMFSubStream {
+    unsigned int audio_substream_id;
+
+    // demux
+    AVCodecParameters *codecpar;
+} IAMFSubStream;
+
+typedef struct IAMFAudioElement {
+    AVIAMFAudioElement *element;
+    unsigned int audio_element_id;
+
+    IAMFSubStream *substreams;
+    unsigned int nb_substreams;
+
+    unsigned int codec_config_id;
+
+    // mux
+    IAMFLayer *layers;
+    unsigned int nb_layers;
+} IAMFAudioElement;
+
+typedef struct IAMFMixPresentation {
+    AVIAMFMixPresentation *mix;
+    unsigned int mix_presentation_id;
+
+    // demux
+    unsigned int count_label;
+    char **language_label;
+} IAMFMixPresentation;
+
+typedef struct IAMFParamDefinition {
+    const IAMFAudioElement *audio_element;
+    AVIAMFParamDefinition *param;
+    int mode;
+    size_t param_size;
+} IAMFParamDefinition;
+
+typedef struct IAMFContext {
+    IAMFCodecConfig **codec_configs;
+    int nb_codec_configs;
+    IAMFAudioElement **audio_elements;
+    int nb_audio_elements;
+    IAMFMixPresentation **mix_presentations;
+    int nb_mix_presentations;
+    IAMFParamDefinition **param_definitions;
+    int nb_param_definitions;
+} IAMFContext;
+
+enum IAMF_Anchor_Element {
+    IAMF_ANCHOR_ELEMENT_UNKNWONW,
+    IAMF_ANCHOR_ELEMENT_DIALOGUE,
+    IAMF_ANCHOR_ELEMENT_ALBUM,
+};
+
+enum IAMF_Sound_System {
+    SOUND_SYSTEM_A_0_2_0  = 0,  // "Loudspeaker configuration for Sound System A"
+    SOUND_SYSTEM_B_0_5_0  = 1,  // "Loudspeaker configuration for Sound System B"
+    SOUND_SYSTEM_C_2_5_0  = 2,  // "Loudspeaker configuration for Sound System C"
+    SOUND_SYSTEM_D_4_5_0  = 3,  // "Loudspeaker configuration for Sound System D"
+    SOUND_SYSTEM_E_4_5_1  = 4,  // "Loudspeaker configuration for Sound System E"
+    SOUND_SYSTEM_F_3_7_0  = 5,  // "Loudspeaker configuration for Sound System F"
+    SOUND_SYSTEM_G_4_9_0  = 6,  // "Loudspeaker configuration for Sound System G"
+    SOUND_SYSTEM_H_9_10_3 = 7,  // "Loudspeaker configuration for Sound System H"
+    SOUND_SYSTEM_I_0_7_0  = 8,  // "Loudspeaker configuration for Sound System I"
+    SOUND_SYSTEM_J_4_7_0  = 9, // "Loudspeaker configuration for Sound System J"
+    SOUND_SYSTEM_10_2_7_0 = 10, // "Loudspeaker configuration for Sound System I" + Ltf + Rtf
+    SOUND_SYSTEM_11_2_3_0 = 11, // Front subset of "Loudspeaker configuration for Sound System J"
+    SOUND_SYSTEM_12_0_1_0 = 12, // Mono
+};
+
+struct IAMFSoundSystemMap {
+    enum IAMF_Sound_System id;
+    AVChannelLayout layout;
+};
+
+extern const AVChannelLayout ff_iamf_scalable_ch_layouts[10];
+extern const struct IAMFSoundSystemMap ff_iamf_sound_system_map[13];
+
+void ff_iamf_free_audio_element(IAMFAudioElement **paudio_element);
+void ff_iamf_free_mix_presentation(IAMFMixPresentation **pmix_presentation);
+void ff_iamf_uninit_context(IAMFContext *c);
+
+#endif /* AVFORMAT_IAMF_H */
diff --git a/libavformat/iamf_parse.c b/libavformat/iamf_parse.c
new file mode 100644
index 0000000000..60305743f9
--- /dev/null
+++ b/libavformat/iamf_parse.c
@@ -0,0 +1,1106 @@
+/*
+ * Immersive Audio Model and Formats parsing
+ * Copyright (c) 2023 James Almer <jamrial at gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/iamf.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/log.h"
+#include "libavcodec/get_bits.h"
+#include "libavcodec/flac.h"
+#include "libavcodec/mpeg4audio.h"
+#include "libavcodec/put_bits.h"
+#include "avio_internal.h"
+#include "iamf_parse.h"
+#include "isom.h"
+
+static int opus_decoder_config(IAMFCodecConfig *codec_config,
+                               AVIOContext *pb, int len)
+{
+    int left = len - avio_tell(pb);
+
+    if (left < 11)
+        return AVERROR_INVALIDDATA;
+
+    codec_config->extradata = av_malloc(left + 8);
+    if (!codec_config->extradata)
+        return AVERROR(ENOMEM);
+
+    AV_WB32(codec_config->extradata, MKBETAG('O','p','u','s'));
+    AV_WB32(codec_config->extradata + 4, MKBETAG('H','e','a','d'));
+    codec_config->extradata_size = avio_read(pb, codec_config->extradata + 8, left);
+    if (codec_config->extradata_size < left)
+        return AVERROR_INVALIDDATA;
+
+    codec_config->extradata_size += 8;
+    codec_config->sample_rate = 48000;
+
+    return 0;
+}
+
+static int aac_decoder_config(IAMFCodecConfig *codec_config,
+                              AVIOContext *pb, int len, void *logctx)
+{
+    MPEG4AudioConfig cfg = { 0 };
+    int object_type_id, codec_id, stream_type;
+    int ret, tag, left;
+
+    tag = avio_r8(pb);
+    if (tag != MP4DecConfigDescrTag)
+        return AVERROR_INVALIDDATA;
+
+    object_type_id = avio_r8(pb);
+    if (object_type_id != 0x40)
+        return AVERROR_INVALIDDATA;
+
+    stream_type = avio_r8(pb);
+    if (((stream_type >> 2) != 5) || ((stream_type >> 1) & 1))
+        return AVERROR_INVALIDDATA;
+
+    avio_skip(pb, 3); // buffer size db
+    avio_skip(pb, 4); // rc_max_rate
+    avio_skip(pb, 4); // avg bitrate
+
+    codec_id = ff_codec_get_id(ff_mp4_obj_type, object_type_id);
+    if (codec_id && codec_id != codec_config->codec_id)
+        return AVERROR_INVALIDDATA;
+
+    tag = avio_r8(pb);
+    if (tag != MP4DecSpecificDescrTag)
+        return AVERROR_INVALIDDATA;
+
+    left = len - avio_tell(pb);
+    if (left <= 0)
+        return AVERROR_INVALIDDATA;
+
+    codec_config->extradata = av_malloc(left);
+    if (!codec_config->extradata)
+        return AVERROR(ENOMEM);
+
+    codec_config->extradata_size = avio_read(pb, codec_config->extradata, left);
+    if (codec_config->extradata_size < left)
+        return AVERROR_INVALIDDATA;
+
+    ret = avpriv_mpeg4audio_get_config2(&cfg, codec_config->extradata,
+                                        codec_config->extradata_size, 1, logctx);
+    if (ret < 0)
+        return ret;
+
+    codec_config->sample_rate = cfg.sample_rate;
+
+    return 0;
+}
+
+static int flac_decoder_config(IAMFCodecConfig *codec_config,
+                               AVIOContext *pb, int len)
+{
+    int left;
+
+    avio_skip(pb, 4); // METADATA_BLOCK_HEADER
+
+    left = len - avio_tell(pb);
+    if (left < FLAC_STREAMINFO_SIZE)
+        return AVERROR_INVALIDDATA;
+
+    codec_config->extradata = av_malloc(left);
+    if (!codec_config->extradata)
+        return AVERROR(ENOMEM);
+
+    codec_config->extradata_size = avio_read(pb, codec_config->extradata, left);
+    if (codec_config->extradata_size < left)
+        return AVERROR_INVALIDDATA;
+
+    codec_config->sample_rate = AV_RB24(codec_config->extradata + 10) >> 4;
+
+    return 0;
+}
+
+static int ipcm_decoder_config(IAMFCodecConfig *codec_config,
+                               AVIOContext *pb, int len)
+{
+    static const enum AVSampleFormat sample_fmt[2][3] = {
+        { AV_CODEC_ID_PCM_S16BE, AV_CODEC_ID_PCM_S24BE, AV_CODEC_ID_PCM_S32BE },
+        { AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S24LE, AV_CODEC_ID_PCM_S32LE },
+    };
+    int sample_format = avio_r8(pb); // 0 = BE, 1 = LE
+    int sample_size = (avio_r8(pb) / 8 - 2); // 16, 24, 32
+    if (sample_format > 1 || sample_size > 2)
+        return AVERROR_INVALIDDATA;
+
+    codec_config->codec_id = sample_fmt[sample_format][sample_size];
+    codec_config->sample_rate = avio_rb32(pb);
+
+    if (len - avio_tell(pb))
+        return AVERROR_INVALIDDATA;
+
+    return 0;
+}
+
+static int codec_config_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
+{
+    IAMFCodecConfig **tmp, *codec_config = NULL;
+    FFIOContext b;
+    AVIOContext *pbc;
+    uint8_t *buf;
+    enum AVCodecID avcodec_id;
+    unsigned codec_config_id, nb_samples, codec_id;
+    int16_t seek_preroll;
+    int ret;
+
+    buf = av_malloc(len);
+    if (!buf)
+        return AVERROR(ENOMEM);
+
+    ret = avio_read(pb, buf, len);
+    if (ret != len) {
+        if (ret >= 0)
+            ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
+    pbc = &b.pub;
+
+    codec_config_id = ffio_read_leb(pbc);
+    codec_id = avio_rb32(pbc);
+    nb_samples = ffio_read_leb(pbc);
+    seek_preroll = avio_rb16(pbc);
+
+    switch(codec_id) {
+    case MKBETAG('O','p','u','s'):
+        avcodec_id = AV_CODEC_ID_OPUS;
+        break;
+    case MKBETAG('m','p','4','a'):
+        avcodec_id = AV_CODEC_ID_AAC;
+        break;
+    case MKBETAG('f','L','a','C'):
+        avcodec_id = AV_CODEC_ID_FLAC;
+        break;
+    default:
+        avcodec_id = AV_CODEC_ID_NONE;
+        break;
+    }
+
+    for (int i = 0; i < c->nb_codec_configs; i++)
+        if (c->codec_configs[i]->codec_config_id == codec_config_id) {
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+    tmp = av_realloc_array(c->codec_configs, c->nb_codec_configs + 1, sizeof(*c->codec_configs));
+    if (!tmp) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+    c->codec_configs = tmp;
+
+    codec_config = av_mallocz(sizeof(*codec_config));
+    if (!codec_config) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    codec_config->codec_config_id = codec_config_id;
+    codec_config->codec_id = avcodec_id;
+    codec_config->nb_samples = nb_samples;
+    codec_config->seek_preroll = seek_preroll;
+
+    switch(codec_id) {
+    case MKBETAG('O','p','u','s'):
+        ret = opus_decoder_config(codec_config, pbc, len);
+        break;
+    case MKBETAG('m','p','4','a'):
+        ret = aac_decoder_config(codec_config, pbc, len, s);
+        break;
+    case MKBETAG('f','L','a','C'):
+        ret = flac_decoder_config(codec_config, pbc, len);
+        break;
+    case MKBETAG('i','p','c','m'):
+        ret = ipcm_decoder_config(codec_config, pbc, len);
+        break;
+    default:
+        break;
+    }
+    if (ret < 0)
+        goto fail;
+
+    c->codec_configs[c->nb_codec_configs++] = codec_config;
+
+    len -= avio_tell(pbc);
+    if (len)
+       av_log(s, AV_LOG_WARNING, "Underread in codec_config_obu. %d bytes left at the end\n", len);
+
+    ret = 0;
+fail:
+    av_free(buf);
+    if (ret < 0) {
+        if (codec_config)
+            av_free(codec_config->extradata);
+        av_free(codec_config);
+    }
+    return ret;
+}
+
+static int update_extradata(AVCodecParameters *codecpar)
+{
+    GetBitContext gb;
+    PutBitContext pb;
+    int ret;
+
+    switch(codecpar->codec_id) {
+    case AV_CODEC_ID_OPUS:
+        AV_WB8(codecpar->extradata + 9, codecpar->ch_layout.nb_channels);
+        break;
+    case AV_CODEC_ID_AAC: {
+        uint8_t buf[5];
+
+        init_put_bits(&pb, buf, sizeof(buf));
+        ret = init_get_bits8(&gb, codecpar->extradata, codecpar->extradata_size);
+        if (ret < 0)
+            return ret;
+
+        ret = get_bits(&gb, 5);
+        put_bits(&pb, 5, ret);
+        if (ret == AOT_ESCAPE) // violates section 3.11.2, but better check for it
+            put_bits(&pb, 6, get_bits(&gb, 6));
+        ret = get_bits(&gb, 4);
+        put_bits(&pb, 4, ret);
+        if (ret == 0x0f)
+            put_bits(&pb, 24, get_bits(&gb, 24));
+
+        skip_bits(&gb, 4);
+        put_bits(&pb, 4, codecpar->ch_layout.nb_channels); // set channel config
+        ret = put_bits_left(&pb);
+        put_bits(&pb, ret, get_bits(&gb, ret));
+        flush_put_bits(&pb);
+
+        memcpy(codecpar->extradata, buf, sizeof(buf));
+        break;
+    }
+    case AV_CODEC_ID_FLAC: {
+        uint8_t buf[13];
+
+        init_put_bits(&pb, buf, sizeof(buf));
+        ret = init_get_bits8(&gb, codecpar->extradata, codecpar->extradata_size);
+        if (ret < 0)
+            return ret;
+
+        put_bits32(&pb, get_bits_long(&gb, 32)); // min/max blocksize
+        put_bits64(&pb, 48, get_bits64(&gb, 48)); // min/max framesize
+        put_bits(&pb, 20, get_bits(&gb, 20)); // samplerate
+        skip_bits(&gb, 3);
+        put_bits(&pb, 3, codecpar->ch_layout.nb_channels - 1);
+        ret = put_bits_left(&pb);
+        put_bits(&pb, ret, get_bits(&gb, ret));
+        flush_put_bits(&pb);
+
+        memcpy(codecpar->extradata, buf, sizeof(buf));
+        break;
+    }
+    }
+
+    return 0;
+}
+
+static int scalable_channel_layout_config(void *s, AVIOContext *pb,
+                                          IAMFAudioElement *audio_element,
+                                          const IAMFCodecConfig *codec_config)
+{
+    int nb_layers, k = 0;
+
+    nb_layers = avio_r8(pb) >> 5; // get_bits(&gb, 3);
+    // skip_bits(&gb, 5); //reserved
+
+    if (nb_layers > 6)
+        return AVERROR_INVALIDDATA;
+
+    for (int i = 0; i < nb_layers; i++) {
+        AVIAMFLayer *layer;
+        int loudspeaker_layout, output_gain_is_present_flag;
+        int substream_count, coupled_substream_count;
+        int ret, byte = avio_r8(pb);
+
+        layer = av_iamf_audio_element_add_layer(audio_element->element);
+        if (!layer)
+            return AVERROR(ENOMEM);
+
+        loudspeaker_layout = byte >> 4; // get_bits(&gb, 4);
+        output_gain_is_present_flag = (byte >> 3) & 1; //get_bits1(&gb);
+        if ((byte >> 2) & 1)
+            layer->flags |= AV_IAMF_LAYER_FLAG_RECON_GAIN;
+        substream_count = avio_r8(pb);
+        coupled_substream_count = avio_r8(pb);
+
+        if (output_gain_is_present_flag) {
+            layer->output_gain_flags = avio_r8(pb) >> 2;  // get_bits(&gb, 6);
+            layer->output_gain = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
+        }
+
+        if (loudspeaker_layout < 10)
+            av_channel_layout_copy(&layer->ch_layout, &ff_iamf_scalable_ch_layouts[loudspeaker_layout]);
+        else
+            layer->ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_UNSPEC,
+                                                          .nb_channels = substream_count +
+                                                                         coupled_substream_count };
+
+        for (int j = 0; j < substream_count; j++) {
+            IAMFSubStream *substream = &audio_element->substreams[k++];
+
+            substream->codecpar->ch_layout = coupled_substream_count-- > 0 ? (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO :
+                                                                             (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
+
+            ret = update_extradata(substream->codecpar);
+            if (ret < 0)
+                return ret;
+        }
+
+    }
+
+    return 0;
+}
+
+static int ambisonics_config(void *s, AVIOContext *pb,
+                             IAMFAudioElement *audio_element,
+                             const IAMFCodecConfig *codec_config)
+{
+    AVIAMFLayer *layer;
+    unsigned ambisonics_mode;
+    int output_channel_count, substream_count, order;
+    int ret;
+
+    ambisonics_mode = ffio_read_leb(pb);
+    if (ambisonics_mode > 1)
+        return 0;
+
+    output_channel_count = avio_r8(pb);  // C
+    substream_count = avio_r8(pb);  // N
+    if (audio_element->nb_substreams != substream_count)
+        return AVERROR_INVALIDDATA;
+
+    order = floor(sqrt(output_channel_count - 1));
+    /* incomplete order - some harmonics are missing */
+    if ((order + 1) * (order + 1) != output_channel_count)
+        return AVERROR_INVALIDDATA;
+
+    layer = av_iamf_audio_element_add_layer(audio_element->element);
+    if (!layer)
+        return AVERROR(ENOMEM);
+
+    layer->ambisonics_mode = ambisonics_mode;
+    if (ambisonics_mode == 0) {
+        for (int i = 0; i < substream_count; i++) {
+            IAMFSubStream *substream = &audio_element->substreams[i];
+
+            substream->codecpar->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
+
+            ret = update_extradata(substream->codecpar);
+            if (ret < 0)
+                return ret;
+        }
+
+        layer->ch_layout.order = AV_CHANNEL_ORDER_CUSTOM;
+        layer->ch_layout.nb_channels = output_channel_count;
+        layer->ch_layout.u.map = av_calloc(output_channel_count, sizeof(*layer->ch_layout.u.map));
+        if (!layer->ch_layout.u.map)
+            return AVERROR(ENOMEM);
+
+        for (int i = 0; i < output_channel_count; i++)
+            layer->ch_layout.u.map[i].id = avio_r8(pb) + AV_CHAN_AMBISONIC_BASE;
+    } else {
+        int coupled_substream_count = avio_r8(pb);  // M
+        int nb_demixing_matrix = substream_count + coupled_substream_count;
+        int demixing_matrix_size = nb_demixing_matrix * output_channel_count;
+
+        layer->ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_AMBISONIC, .nb_channels = output_channel_count };
+        layer->demixing_matrix = av_malloc_array(demixing_matrix_size, sizeof(*layer->demixing_matrix));
+        if (!layer->demixing_matrix)
+            return AVERROR(ENOMEM);
+
+        for (int i = 0; i < demixing_matrix_size; i++)
+            layer->demixing_matrix[i] = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
+
+        for (int i = 0; i < substream_count; i++) {
+            IAMFSubStream *substream = &audio_element->substreams[i];
+
+            substream->codecpar->ch_layout = coupled_substream_count-- > 0 ? (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO :
+                                                                             (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
+
+
+            ret = update_extradata(substream->codecpar);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
+    return 0;
+}
+
+static int param_parse(void *s, IAMFContext *c, AVIOContext *pb,
+                       unsigned int type,
+                       const IAMFAudioElement *audio_element,
+                       AVIAMFParamDefinition **out_param_definition)
+{
+    IAMFParamDefinition *param_definition = NULL;
+    AVIAMFParamDefinition *param;
+    unsigned int parameter_id, parameter_rate, mode;
+    unsigned int duration = 0, constant_subblock_duration = 0, nb_subblocks = 0;
+    size_t param_size;
+
+    parameter_id = ffio_read_leb(pb);
+
+    for (int i = 0; i < c->nb_param_definitions; i++)
+        if (c->param_definitions[i]->param->parameter_id == parameter_id) {
+            param_definition = c->param_definitions[i];
+            break;
+        }
+
+    parameter_rate = ffio_read_leb(pb);
+    mode = avio_r8(pb) >> 7;
+
+    if (mode == 0) {
+        duration = ffio_read_leb(pb);
+        constant_subblock_duration = ffio_read_leb(pb);
+        if (constant_subblock_duration == 0)
+            nb_subblocks = ffio_read_leb(pb);
+        else
+            nb_subblocks = duration / constant_subblock_duration;
+    }
+
+    param = av_iamf_param_definition_alloc(type, nb_subblocks, &param_size);
+    if (!param)
+        return AVERROR(ENOMEM);
+
+    for (int i = 0; i < nb_subblocks; i++) {
+        void *subblock = av_iamf_param_definition_get_subblock(param, i);
+        unsigned int subblock_duration = constant_subblock_duration;
+
+        if (constant_subblock_duration == 0)
+            subblock_duration = ffio_read_leb(pb);
+
+        switch (type) {
+        case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
+            AVIAMFMixGain *mix = subblock;
+            mix->subblock_duration = subblock_duration;
+            break;
+        }
+        case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
+            AVIAMFDemixingInfo *demix = subblock;
+            demix->subblock_duration = subblock_duration;
+            // DemixingInfoParameterData
+            demix->dmixp_mode = avio_r8(pb) >> 5;
+            break;
+        }
+        case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
+            AVIAMFReconGain *recon = subblock;
+            recon->subblock_duration = subblock_duration;
+            break;
+        }
+        default:
+            av_free(param);
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    param->parameter_id = parameter_id;
+    param->parameter_rate = parameter_rate;
+    param->duration = duration;
+    param->constant_subblock_duration = constant_subblock_duration;
+    param->nb_subblocks = nb_subblocks;
+
+    if (param_definition) {
+        if (param_definition->param_size != param_size || memcmp(param_definition->param, param, param_size)) {
+            av_log(s, AV_LOG_ERROR, "Incosistent parameters for parameter_id %u\n", parameter_id);
+            av_free(param);
+            return AVERROR_INVALIDDATA;
+        }
+    } else {
+        IAMFParamDefinition **tmp = av_realloc_array(c->param_definitions, c->nb_param_definitions + 1,
+                                                     sizeof(*c->param_definitions));
+        if (!tmp) {
+            av_free(param);
+            return AVERROR(ENOMEM);
+        }
+        c->param_definitions = tmp;
+
+        param_definition = av_mallocz(sizeof(*param_definition));
+        if (!param_definition) {
+            av_free(param);
+            return AVERROR(ENOMEM);
+        }
+        param_definition->param = param;
+        param_definition->mode = !mode;
+        param_definition->param_size = param_size;
+        param_definition->audio_element = audio_element;
+
+        c->param_definitions[c->nb_param_definitions++] = param_definition;
+    }
+
+    av_assert0(out_param_definition);
+    *out_param_definition = param;
+
+    return 0;
+}
+
+static IAMFCodecConfig *get_codec_config(IAMFContext *c, unsigned int codec_config_id)
+{
+    for (int i = 0; i < c->nb_codec_configs; i++) {
+        if (c->codec_configs[i]->codec_config_id == codec_config_id)
+            return c->codec_configs[i];
+    }
+
+    return NULL;
+}
+
+static int audio_element_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
+{
+    const IAMFCodecConfig *codec_config;
+    AVIAMFAudioElement *element;
+    IAMFAudioElement **tmp, *audio_element = NULL;
+    FFIOContext b;
+    AVIOContext *pbc;
+    uint8_t *buf;
+    unsigned audio_element_id, codec_config_id, num_parameters;
+    int audio_element_type, ret;
+
+    buf = av_malloc(len);
+    if (!buf)
+        return AVERROR(ENOMEM);
+
+    ret = avio_read(pb, buf, len);
+    if (ret != len) {
+        if (ret >= 0)
+            ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
+    pbc = &b.pub;
+
+    audio_element_id = ffio_read_leb(pbc);
+
+    for (int i = 0; i < c->nb_audio_elements; i++)
+        if (c->audio_elements[i]->audio_element_id == audio_element_id) {
+            av_log(s, AV_LOG_ERROR, "Duplicate audio_element_id %d\n", audio_element_id);
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+    audio_element_type = avio_r8(pbc) >> 5;
+    codec_config_id = ffio_read_leb(pbc);
+
+    codec_config = get_codec_config(c, codec_config_id);
+    if (!codec_config) {
+        av_log(s, AV_LOG_ERROR, "Non existant codec config id %d referenced in an audio element\n", codec_config_id);
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    if (codec_config->codec_id == AV_CODEC_ID_NONE) {
+        av_log(s, AV_LOG_DEBUG, "Unknown codec id referenced in an audio element. Ignoring\n");
+        ret = 0;
+        goto fail;
+    }
+
+    tmp = av_realloc_array(c->audio_elements, c->nb_audio_elements + 1, sizeof(*c->audio_elements));
+    if (!tmp) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+    c->audio_elements = tmp;
+
+    audio_element = av_mallocz(sizeof(*audio_element));
+    if (!audio_element) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    audio_element->nb_substreams = ffio_read_leb(pbc);
+    audio_element->codec_config_id = codec_config_id;
+    audio_element->audio_element_id = audio_element_id;
+    audio_element->substreams = av_calloc(audio_element->nb_substreams, sizeof(*audio_element->substreams));
+    if (!audio_element->substreams) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    element = audio_element->element = av_iamf_audio_element_alloc();
+    if (!element) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    element->audio_element_type = audio_element_type;
+
+    for (int i = 0; i < audio_element->nb_substreams; i++) {
+        IAMFSubStream *substream = &audio_element->substreams[i];
+
+        substream->codecpar = avcodec_parameters_alloc();
+        if (!substream->codecpar) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        substream->audio_substream_id = ffio_read_leb(pbc);
+
+        substream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
+        substream->codecpar->codec_id   = codec_config->codec_id;
+        substream->codecpar->frame_size = codec_config->nb_samples;
+        substream->codecpar->sample_rate = codec_config->sample_rate;
+        substream->codecpar->seek_preroll = codec_config->seek_preroll;
+
+        switch(substream->codecpar->codec_id) {
+        case AV_CODEC_ID_AAC:
+        case AV_CODEC_ID_FLAC:
+        case AV_CODEC_ID_OPUS:
+            substream->codecpar->extradata = av_malloc(codec_config->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
+            if (!substream->codecpar->extradata) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+            memcpy(substream->codecpar->extradata, codec_config->extradata, codec_config->extradata_size);
+            memset(substream->codecpar->extradata + codec_config->extradata_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+            substream->codecpar->extradata_size = codec_config->extradata_size;
+            break;
+        }
+    }
+
+    num_parameters = ffio_read_leb(pbc);
+    if (num_parameters && audio_element_type != 0) {
+        av_log(s, AV_LOG_ERROR, "Audio Element parameter count %u is invalid"
+                                " for Scene representations\n", num_parameters);
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    for (int i = 0; i < num_parameters; i++) {
+        unsigned type;
+
+        type = ffio_read_leb(pbc);
+        if (type == AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN) {
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        } else if (type == AV_IAMF_PARAMETER_DEFINITION_DEMIXING) {
+            ret = param_parse(s, c, pbc, type, audio_element, &element->demixing_info);
+            if (ret < 0)
+                goto fail;
+
+            element->default_w = avio_r8(pbc) >> 4;
+        } else if (type == AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN) {
+            ret = param_parse(s, c, pbc, type, audio_element, &element->recon_gain_info);
+            if (ret < 0)
+                goto fail;
+        } else {
+            unsigned param_definition_size = ffio_read_leb(pbc);
+            avio_skip(pbc, param_definition_size);
+        }
+    }
+
+    if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL) {
+        ret = scalable_channel_layout_config(s, pbc, audio_element, codec_config);
+        if (ret < 0)
+            goto fail;
+    } else if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE) {
+        ret = ambisonics_config(s, pbc, audio_element, codec_config);
+        if (ret < 0)
+            goto fail;
+    } else {
+        unsigned audio_element_config_size = ffio_read_leb(pbc);
+        avio_skip(pbc, audio_element_config_size);
+    }
+
+    c->audio_elements[c->nb_audio_elements++] = audio_element;
+
+    len -= avio_tell(pbc);
+    if (len)
+       av_log(s, AV_LOG_WARNING, "Underread in audio_element_obu. %d bytes left at the end\n", len);
+
+    ret = 0;
+fail:
+    av_free(buf);
+    if (ret < 0)
+        ff_iamf_free_audio_element(&audio_element);
+    return ret;
+}
+
+static int label_string(AVIOContext *pb, char **label)
+{
+    uint8_t buf[128];
+
+    avio_get_str(pb, sizeof(buf), buf, sizeof(buf));
+
+    if (pb->error)
+        return pb->error;
+    if (pb->eof_reached)
+        return AVERROR_INVALIDDATA;
+    *label = av_strdup(buf);
+    if (!*label)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static int mix_presentation_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
+{
+    AVIAMFMixPresentation *mix;
+    IAMFMixPresentation **tmp, *mix_presentation = NULL;
+    FFIOContext b;
+    AVIOContext *pbc;
+    uint8_t *buf;
+    unsigned mix_presentation_id;
+    int ret;
+
+    buf = av_malloc(len);
+    if (!buf)
+        return AVERROR(ENOMEM);
+
+    ret = avio_read(pb, buf, len);
+    if (ret != len) {
+        if (ret >= 0)
+            ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
+    pbc = &b.pub;
+
+    mix_presentation_id = ffio_read_leb(pbc);
+
+    for (int i = 0; i < c->nb_mix_presentations; i++)
+        if (c->mix_presentations[i]->mix_presentation_id == mix_presentation_id) {
+            av_log(s, AV_LOG_ERROR, "Duplicate mix_presentation_id %d\n", mix_presentation_id);
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+    tmp = av_realloc_array(c->mix_presentations, c->nb_mix_presentations + 1, sizeof(*c->mix_presentations));
+    if (!tmp) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+    c->mix_presentations = tmp;
+
+    mix_presentation = av_mallocz(sizeof(*mix_presentation));
+    if (!mix_presentation) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    mix_presentation->mix_presentation_id = mix_presentation_id;
+    mix = mix_presentation->mix = av_iamf_mix_presentation_alloc();
+    if (!mix) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    mix_presentation->count_label = ffio_read_leb(pbc);
+    mix_presentation->language_label = av_calloc(mix_presentation->count_label,
+                                                 sizeof(*mix_presentation->language_label));
+    if (!mix_presentation->language_label) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (int i = 0; i < mix_presentation->count_label; i++) {
+        ret = label_string(pbc, &mix_presentation->language_label[i]);
+        if (ret < 0)
+            goto fail;
+    }
+
+    for (int i = 0; i < mix_presentation->count_label; i++) {
+        char *annotation = NULL;
+        ret = label_string(pbc, &annotation);
+        if (ret < 0)
+            goto fail;
+        ret = av_dict_set(&mix->annotations, mix_presentation->language_label[i], annotation,
+                          AV_DICT_DONT_STRDUP_VAL | AV_DICT_DONT_OVERWRITE);
+        if (ret < 0)
+            goto fail;
+    }
+
+    mix->nb_submixes = ffio_read_leb(pbc);
+    mix->submixes = av_calloc(mix->nb_submixes, sizeof(*mix->submixes));
+    if (!mix->submixes) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (int i = 0; i < mix->nb_submixes; i++) {
+        AVIAMFSubmix *sub_mix;
+
+        sub_mix = mix->submixes[i] = av_mallocz(sizeof(*sub_mix));
+        if (!sub_mix) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        sub_mix->nb_elements = ffio_read_leb(pbc);
+        sub_mix->elements = av_calloc(sub_mix->nb_elements, sizeof(*sub_mix->elements));
+        if (!sub_mix->elements) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        for (int j = 0; j < sub_mix->nb_elements; j++) {
+            AVIAMFSubmixElement *submix_element;
+            IAMFAudioElement *audio_element = NULL;
+            unsigned int rendering_config_extension_size;
+
+            submix_element = sub_mix->elements[j] = av_mallocz(sizeof(*submix_element));
+            if (!submix_element) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+
+            submix_element->audio_element_id = ffio_read_leb(pbc);
+
+            for (int k = 0; k < c->nb_audio_elements; k++)
+                if (c->audio_elements[k]->audio_element_id == submix_element->audio_element_id) {
+                    audio_element = c->audio_elements[k];
+                    break;
+                }
+
+            if (!audio_element) {
+                av_log(s, AV_LOG_ERROR, "Invalid Audio Element with id %u referenced by Mix Parameters %u\n",
+                       submix_element->audio_element_id, mix_presentation_id);
+                ret = AVERROR_INVALIDDATA;
+                goto fail;
+            }
+
+            for (int k = 0; k < mix_presentation->count_label; k++) {
+                char *annotation = NULL;
+                ret = label_string(pbc, &annotation);
+                if (ret < 0)
+                    goto fail;
+                ret = av_dict_set(&submix_element->annotations, mix_presentation->language_label[k], annotation,
+                                  AV_DICT_DONT_STRDUP_VAL | AV_DICT_DONT_OVERWRITE);
+                if (ret < 0)
+                    goto fail;
+            }
+
+            submix_element->headphones_rendering_mode = avio_r8(pbc) >> 6;
+
+            rendering_config_extension_size = ffio_read_leb(pbc);
+            avio_skip(pbc, rendering_config_extension_size);
+
+            ret = param_parse(s, c, pbc, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN,
+                              NULL,
+                              &submix_element->element_mix_config);
+            if (ret < 0)
+                goto fail;
+            submix_element->default_mix_gain = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
+        }
+
+        ret = param_parse(s, c, pbc, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, NULL, &sub_mix->output_mix_config);
+        if (ret < 0)
+            goto fail;
+        sub_mix->default_mix_gain = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
+
+        sub_mix->nb_layouts = ffio_read_leb(pbc);
+        sub_mix->layouts = av_calloc(sub_mix->nb_layouts, sizeof(*sub_mix->layouts));
+        if (!sub_mix->layouts) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        for (int j = 0; j < sub_mix->nb_layouts; j++) {
+            AVIAMFSubmixLayout *submix_layout;
+            int info_type;
+            int byte = avio_r8(pbc);
+
+            submix_layout = sub_mix->layouts[j] = av_mallocz(sizeof(*submix_layout));
+            if (!submix_layout) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+
+            submix_layout->layout_type = byte >> 6;
+            if (submix_layout->layout_type < AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS &&
+                submix_layout->layout_type > AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL) {
+                av_log(s, AV_LOG_ERROR, "Invalid Layout type %u in a submix from Mix Presentation %u\n",
+                       submix_layout->layout_type, mix_presentation_id);
+                ret = AVERROR_INVALIDDATA;
+                goto fail;
+            }
+            if (submix_layout->layout_type == 2) {
+                int sound_system;
+                sound_system = (byte >> 2) & 0xF;
+                av_channel_layout_copy(&submix_layout->sound_system, &ff_iamf_sound_system_map[sound_system].layout);
+            }
+
+            info_type = avio_r8(pbc);
+            submix_layout->integrated_loudness = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
+            submix_layout->digital_peak = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
+
+            if (info_type & 1)
+                submix_layout->true_peak = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
+            if (info_type & 2) {
+                unsigned int num_anchored_loudness = avio_r8(pbc);
+
+                for (int k = 0; k < num_anchored_loudness; k++) {
+                    unsigned int anchor_element = avio_r8(pbc);
+                    AVRational anchored_loudness = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
+                    if (anchor_element == IAMF_ANCHOR_ELEMENT_DIALOGUE)
+                        submix_layout->dialogue_anchored_loudness = anchored_loudness;
+                    else if (anchor_element <= IAMF_ANCHOR_ELEMENT_ALBUM)
+                        submix_layout->album_anchored_loudness = anchored_loudness;
+                    else
+                        av_log(s, AV_LOG_DEBUG, "Unknown anchor_element. Ignoring\n");
+                }
+            }
+
+            if (info_type & 0xFC) {
+                unsigned int info_type_size = ffio_read_leb(pbc);
+                avio_skip(pbc, info_type_size);
+            }
+        }
+    }
+
+    c->mix_presentations[c->nb_mix_presentations++] = mix_presentation;
+
+    len -= avio_tell(pbc);
+    if (len)
+        av_log(s, AV_LOG_WARNING, "Underread in mix_presentation_obu. %d bytes left at the end\n", len);
+
+    ret = 0;
+fail:
+    av_free(buf);
+    if (ret < 0)
+        ff_iamf_free_mix_presentation(&mix_presentation);
+    return ret;
+}
+
+int ff_iamf_parse_obu_header(const uint8_t *buf, int buf_size,
+                             unsigned *obu_size, int *start_pos, enum IAMF_OBU_Type *type,
+                             unsigned *skip_samples, unsigned *discard_padding)
+{
+    GetBitContext gb;
+    int ret, extension_flag, trimming, start;
+    unsigned skip = 0, discard = 0;
+    unsigned size;
+
+    ret = init_get_bits8(&gb, buf, FFMIN(buf_size, MAX_IAMF_OBU_HEADER_SIZE));
+    if (ret < 0)
+        return ret;
+
+    *type          = get_bits(&gb, 5);
+    /*redundant      =*/ get_bits1(&gb);
+    trimming       = get_bits1(&gb);
+    extension_flag = get_bits1(&gb);
+
+    *obu_size = get_leb(&gb);
+    if (*obu_size > INT_MAX)
+        return AVERROR_INVALIDDATA;
+
+    start = get_bits_count(&gb) / 8;
+
+    if (trimming) {
+        discard = get_leb(&gb); // num_samples_to_trim_at_end
+        skip = get_leb(&gb); // num_samples_to_trim_at_start
+    }
+
+    if (skip_samples)
+        *skip_samples = skip;
+    if (discard_padding)
+        *discard_padding = discard;
+
+    if (extension_flag) {
+        unsigned int extension_bytes;
+        extension_bytes = get_leb(&gb);
+        if (extension_bytes > INT_MAX / 8)
+            return AVERROR_INVALIDDATA;
+        skip_bits_long(&gb, extension_bytes * 8);
+    }
+
+    if (get_bits_left(&gb) < 0)
+        return AVERROR_INVALIDDATA;
+
+    size = *obu_size + start;
+    if (size > INT_MAX)
+        return AVERROR_INVALIDDATA;
+
+    *obu_size -= get_bits_count(&gb) / 8 - start;
+    *start_pos = size - *obu_size;
+
+    return size;
+}
+
+int ff_iamfdec_read_descriptors(IAMFContext *c, AVIOContext *pb,
+                                int max_size, void *log_ctx)
+{
+    uint8_t header[MAX_IAMF_OBU_HEADER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
+    int ret;
+
+    while (1) {
+        unsigned obu_size;
+        enum IAMF_OBU_Type type;
+        int start_pos, len, size;
+
+        if ((ret = ffio_ensure_seekback(pb, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size))) < 0)
+            return ret;
+        size = avio_read(pb, header, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size));
+        if (size < 0)
+            return size;
+
+        len = ff_iamf_parse_obu_header(header, size, &obu_size, &start_pos, &type, NULL, NULL);
+        if (len < 0 || obu_size > max_size) {
+            av_log(log_ctx, AV_LOG_ERROR, "Failed to read obu header\n");
+            avio_seek(pb, -size, SEEK_CUR);
+            return len;
+        }
+
+        if (type >= IAMF_OBU_IA_PARAMETER_BLOCK && type < IAMF_OBU_IA_SEQUENCE_HEADER) {
+            avio_seek(pb, -size, SEEK_CUR);
+            break;
+        }
+
+        avio_seek(pb, -(size - start_pos), SEEK_CUR);
+        switch (type) {
+        case IAMF_OBU_IA_CODEC_CONFIG:
+            ret = codec_config_obu(log_ctx, c, pb, obu_size);
+            break;
+        case IAMF_OBU_IA_AUDIO_ELEMENT:
+            ret = audio_element_obu(log_ctx, c, pb, obu_size);
+            break;
+        case IAMF_OBU_IA_MIX_PRESENTATION:
+            ret = mix_presentation_obu(log_ctx, c, pb, obu_size);
+            break;
+        case IAMF_OBU_IA_TEMPORAL_DELIMITER:
+            break;
+        default: {
+            int64_t offset = avio_skip(pb, obu_size);
+            if (offset < 0)
+                ret = offset;
+            break;
+        }
+        }
+        if (ret < 0) {
+            av_log(log_ctx, AV_LOG_ERROR, "Failed to read obu type %d\n", type);
+            return ret;
+        }
+        max_size -= obu_size + start_pos;
+        if (max_size < 0)
+            return AVERROR_INVALIDDATA;
+        if (!max_size)
+            break;
+    }
+
+    return 0;
+}
diff --git a/libavformat/iamf_parse.h b/libavformat/iamf_parse.h
new file mode 100644
index 0000000000..f4f297ecd4
--- /dev/null
+++ b/libavformat/iamf_parse.h
@@ -0,0 +1,38 @@
+/*
+ * Immersive Audio Model and Formats parsing
+ * Copyright (c) 2023 James Almer <jamrial at gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFORMAT_IAMF_PARSE_H
+#define AVFORMAT_IAMF_PARSE_H
+
+#include <stdint.h>
+
+#include "libavutil/iamf.h"
+#include "avio.h"
+#include "iamf.h"
+
+int ff_iamf_parse_obu_header(const uint8_t *buf, int buf_size,
+                             unsigned *obu_size, int *start_pos, enum IAMF_OBU_Type *type,
+                             unsigned *skip_samples, unsigned *discard_padding);
+
+int ff_iamfdec_read_descriptors(IAMFContext *c, AVIOContext *pb,
+                                int size, void *log_ctx);
+
+#endif /* AVFORMAT_IAMF_PARSE_H */
diff --git a/libavformat/iamfdec.c b/libavformat/iamfdec.c
new file mode 100644
index 0000000000..0374d0f241
--- /dev/null
+++ b/libavformat/iamfdec.c
@@ -0,0 +1,503 @@
+/*
+ * Immersive Audio Model and Formats demuxer
+ * Copyright (c) 2023 James Almer <jamrial at gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config_components.h"
+
+#include "libavutil/avassert.h"
+#include "libavutil/iamf.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/log.h"
+#include "libavcodec/mathops.h"
+#include "avformat.h"
+#include "avio_internal.h"
+#include "demux.h"
+#include "iamf.h"
+#include "iamf_parse.h"
+#include "internal.h"
+
+typedef struct IAMFDemuxContext {
+    IAMFContext iamf;
+
+    // Packet side data
+    AVIAMFParamDefinition *mix;
+    size_t mix_size;
+    AVIAMFParamDefinition *demix;
+    size_t demix_size;
+    AVIAMFParamDefinition *recon;
+    size_t recon_size;
+} IAMFDemuxContext;
+
+static AVStream *find_stream_by_id(AVFormatContext *s, int id)
+{
+    for (int i = 0; i < s->nb_streams; i++)
+        if (s->streams[i]->id == id)
+            return s->streams[i];
+
+    av_log(s, AV_LOG_ERROR, "Invalid stream id %d\n", id);
+    return NULL;
+}
+
+static int audio_frame_obu(AVFormatContext *s, AVPacket *pkt, int len,
+                           enum IAMF_OBU_Type type,
+                           unsigned skip_samples, unsigned discard_padding,
+                           int id_in_bitstream)
+{
+    const IAMFDemuxContext *const c = s->priv_data;
+    AVStream *st;
+    int ret, audio_substream_id;
+
+    if (id_in_bitstream) {
+        unsigned explicit_audio_substream_id;
+        int64_t pos = avio_tell(s->pb);
+        explicit_audio_substream_id = ffio_read_leb(s->pb);
+        len -= avio_tell(s->pb) - pos;
+        audio_substream_id = explicit_audio_substream_id;
+    } else
+        audio_substream_id = type - IAMF_OBU_IA_AUDIO_FRAME_ID0;
+
+    st = find_stream_by_id(s, audio_substream_id);
+    if (!st)
+        return AVERROR_INVALIDDATA;
+
+    ret = av_get_packet(s->pb, pkt, len);
+    if (ret < 0)
+        return ret;
+    if (ret != len)
+        return AVERROR_INVALIDDATA;
+
+    if (skip_samples || discard_padding) {
+        uint8_t *side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_SKIP_SAMPLES, 10);
+        if (!side_data)
+            return AVERROR(ENOMEM);
+        AV_WL32(side_data, skip_samples);
+        AV_WL32(side_data + 4, discard_padding);
+    }
+    if (c->mix) {
+        uint8_t *side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_IAMF_MIX_GAIN_PARAM, c->mix_size);
+        if (!side_data)
+            return AVERROR(ENOMEM);
+        memcpy(side_data, c->mix, c->mix_size);
+    }
+    if (c->demix) {
+        uint8_t *side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_IAMF_DEMIXING_INFO_PARAM, c->demix_size);
+        if (!side_data)
+            return AVERROR(ENOMEM);
+        memcpy(side_data, c->demix, c->demix_size);
+    }
+    if (c->recon) {
+        uint8_t *side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_IAMF_RECON_GAIN_INFO_PARAM, c->recon_size);
+        if (!side_data)
+            return AVERROR(ENOMEM);
+        memcpy(side_data, c->recon, c->recon_size);
+    }
+
+    pkt->stream_index = st->index;
+    return 0;
+}
+
+static const IAMFParamDefinition *get_param_definition(AVFormatContext *s, unsigned int parameter_id)
+{
+    const IAMFDemuxContext *const c = s->priv_data;
+    const IAMFContext *const iamf = &c->iamf;
+    const IAMFParamDefinition *param_definition = NULL;
+
+    for (int i = 0; i < iamf->nb_param_definitions; i++)
+        if (iamf->param_definitions[i]->param->parameter_id == parameter_id) {
+            param_definition = iamf->param_definitions[i];
+            break;
+        }
+
+    return param_definition;
+}
+
+static int parameter_block_obu(AVFormatContext *s, int len)
+{
+    IAMFDemuxContext *const c = s->priv_data;
+    const IAMFParamDefinition *param_definition;
+    const AVIAMFParamDefinition *param;
+    AVIAMFParamDefinition *out_param = NULL;
+    FFIOContext b;
+    AVIOContext *pb;
+    uint8_t *buf;
+    unsigned int duration, constant_subblock_duration;
+    unsigned int nb_subblocks;
+    unsigned int parameter_id;
+    size_t out_param_size;
+    int ret;
+
+    buf = av_malloc(len);
+    if (!buf)
+        return AVERROR(ENOMEM);
+
+    ret = avio_read(s->pb, buf, len);
+    if (ret != len) {
+        if (ret >= 0)
+            ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
+    pb = &b.pub;
+
+    parameter_id = ffio_read_leb(pb);
+    param_definition = get_param_definition(s, parameter_id);
+    if (!param_definition) {
+        av_log(s, AV_LOG_VERBOSE, "Non existant parameter_id %d referenced in a parameter block. Ignoring\n",
+               parameter_id);
+        ret = 0;
+        goto fail;
+    }
+
+    param = param_definition->param;
+    if (!param_definition->mode) {
+        duration = ffio_read_leb(pb);
+        constant_subblock_duration = ffio_read_leb(pb);
+        if (constant_subblock_duration == 0)
+            nb_subblocks = ffio_read_leb(pb);
+        else
+            nb_subblocks = duration / constant_subblock_duration;
+    } else {
+        duration = param->duration;
+        constant_subblock_duration = param->constant_subblock_duration;
+        nb_subblocks = param->nb_subblocks;
+        if (!nb_subblocks)
+            nb_subblocks = duration / constant_subblock_duration;
+    }
+
+    out_param = av_iamf_param_definition_alloc(param->type, nb_subblocks, &out_param_size);
+    if (!out_param) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    out_param->parameter_id = param->parameter_id;
+    out_param->type = param->type;
+    out_param->parameter_rate = param->parameter_rate;
+    out_param->duration = duration;
+    out_param->constant_subblock_duration = constant_subblock_duration;
+    out_param->nb_subblocks = nb_subblocks;
+
+    for (int i = 0; i < nb_subblocks; i++) {
+        void *subblock = av_iamf_param_definition_get_subblock(out_param, i);
+        unsigned int subblock_duration = constant_subblock_duration;
+
+        if (!param_definition->mode && !constant_subblock_duration)
+            subblock_duration = ffio_read_leb(pb);
+
+        switch (param->type) {
+        case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
+            AVIAMFMixGain *mix = subblock;
+
+            mix->animation_type = ffio_read_leb(pb);
+            if (mix->animation_type > AV_IAMF_ANIMATION_TYPE_BEZIER) {
+                ret = 0;
+                av_free(out_param);
+                goto fail;
+            }
+
+            mix->start_point_value = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
+            if (mix->animation_type >= AV_IAMF_ANIMATION_TYPE_LINEAR)
+                mix->end_point_value = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
+            if (mix->animation_type == AV_IAMF_ANIMATION_TYPE_BEZIER) {
+                mix->control_point_value = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
+                mix->control_point_relative_time = av_make_q(avio_r8(pb), 1 << 8);
+            }
+            mix->subblock_duration = subblock_duration;
+            break;
+        }
+        case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
+            AVIAMFDemixingInfo *demix = subblock;
+
+            demix->dmixp_mode = avio_r8(pb) >> 5;
+            demix->subblock_duration = subblock_duration;
+            break;
+        }
+        case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
+            AVIAMFReconGain *recon = subblock;
+            const IAMFAudioElement *audio_element = param_definition->audio_element;
+            const AVIAMFAudioElement *element = audio_element->element;
+
+            av_assert0(audio_element && element);
+            for (int i = 0; i < element->nb_layers; i++) {
+                const AVIAMFLayer *layer = element->layers[i];
+                if (layer->flags & AV_IAMF_LAYER_FLAG_RECON_GAIN) {
+                    unsigned int recon_gain_flags = ffio_read_leb(pb);
+                    unsigned int bitcount = 7 + 5 * !!(recon_gain_flags & 0x80);
+                    recon_gain_flags = (recon_gain_flags & 0x7F) | ((recon_gain_flags & 0xFF00) >> 1);
+                    for (int j = 0; j < bitcount; j++) {
+                        if (recon_gain_flags & (1 << j))
+                            recon->recon_gain[i][j] = avio_r8(pb);
+                    }
+                }
+            }
+            recon->subblock_duration = subblock_duration;
+            break;
+        }
+        default:
+            av_assert0(0);
+        }
+    }
+
+    len -= avio_tell(pb);
+    if (len) {
+       int level = (s->error_recognition & AV_EF_EXPLODE) ? AV_LOG_ERROR : AV_LOG_WARNING;
+       av_log(s, level, "Underread in parameter_block_obu. %d bytes left at the end\n", len);
+    }
+
+    switch (param->type) {
+    case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN:
+        av_free(c->mix);
+        c->mix = out_param;
+        c->mix_size = out_param_size;
+        break;
+    case AV_IAMF_PARAMETER_DEFINITION_DEMIXING:
+        av_free(c->demix);
+        c->demix = out_param;
+        c->demix_size = out_param_size;
+        break;
+    case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN:
+        av_free(c->recon);
+        c->recon = out_param;
+        c->recon_size = out_param_size;
+        break;
+    default:
+        av_assert0(0);
+    }
+
+    ret = 0;
+fail:
+    if (ret < 0)
+        av_free(out_param);
+    av_free(buf);
+
+    return ret;
+}
+
+static int iamf_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    IAMFDemuxContext *const c = s->priv_data;
+    uint8_t header[MAX_IAMF_OBU_HEADER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
+    unsigned obu_size;
+    int ret;
+
+    while (1) {
+        enum IAMF_OBU_Type type;
+        unsigned skip_samples, discard_padding;
+        int len, size, start_pos;
+
+        if ((ret = ffio_ensure_seekback(s->pb, MAX_IAMF_OBU_HEADER_SIZE)) < 0)
+            return ret;
+        size = avio_read(s->pb, header, MAX_IAMF_OBU_HEADER_SIZE);
+        if (size < 0)
+            return size;
+
+        len = ff_iamf_parse_obu_header(header, size, &obu_size, &start_pos, &type,
+                                       &skip_samples, &discard_padding);
+        if (len < 0) {
+            av_log(s, AV_LOG_ERROR, "Failed to read obu\n");
+            return len;
+        }
+        avio_seek(s->pb, -(size - start_pos), SEEK_CUR);
+
+        if (type >= IAMF_OBU_IA_AUDIO_FRAME && type <= IAMF_OBU_IA_AUDIO_FRAME_ID17)
+            return audio_frame_obu(s, pkt, obu_size, type,
+                                   skip_samples, discard_padding,
+                                   type == IAMF_OBU_IA_AUDIO_FRAME);
+        else if (type == IAMF_OBU_IA_PARAMETER_BLOCK) {
+            ret = parameter_block_obu(s, obu_size);
+            if (ret < 0)
+                return ret;
+        } else if (type == IAMF_OBU_IA_TEMPORAL_DELIMITER) {
+            av_freep(&c->mix);
+            c->mix_size = 0;
+            av_freep(&c->demix);
+            c->demix_size = 0;
+            av_freep(&c->recon);
+            c->recon_size = 0;
+        } else {
+            int64_t offset = avio_skip(s->pb, obu_size);
+            if (offset < 0) {
+                ret = offset;
+                break;
+            }
+        }
+    }
+
+    return ret;
+}
+
+//return < 0 if we need more data
+static int get_score(const uint8_t *buf, int buf_size, enum IAMF_OBU_Type type, int *seq)
+{
+    if (type == IAMF_OBU_IA_SEQUENCE_HEADER) {
+        if (buf_size < 4 || AV_RB32(buf) != MKBETAG('i','a','m','f'))
+            return 0;
+        *seq = 1;
+        return -1;
+    }
+    if (type >= IAMF_OBU_IA_CODEC_CONFIG && type <= IAMF_OBU_IA_TEMPORAL_DELIMITER)
+        return *seq ? -1 : 0;
+    if (type >= IAMF_OBU_IA_AUDIO_FRAME && type <= IAMF_OBU_IA_AUDIO_FRAME_ID17)
+        return *seq ? AVPROBE_SCORE_EXTENSION + 1 : 0;
+    return 0;
+}
+
+static int iamf_probe(const AVProbeData *p)
+{
+    unsigned obu_size;
+    enum IAMF_OBU_Type type;
+    int seq = 0, cnt = 0, start_pos;
+    int ret;
+
+    while (1) {
+        int size = ff_iamf_parse_obu_header(p->buf + cnt, p->buf_size - cnt,
+                                            &obu_size, &start_pos, &type,
+                                            NULL, NULL);
+        if (size < 0)
+            return 0;
+
+        ret = get_score(p->buf + cnt + start_pos,
+                        p->buf_size - cnt - start_pos,
+                        type, &seq);
+        if (ret >= 0)
+            return ret;
+
+        cnt += FFMIN(size, p->buf_size - cnt);
+    }
+    return 0;
+}
+
+static int iamf_read_header(AVFormatContext *s)
+{
+    IAMFDemuxContext *const c = s->priv_data;
+    IAMFContext *const iamf = &c->iamf;
+    int ret;
+
+    ret = ff_iamfdec_read_descriptors(iamf, s->pb, INT_MAX, s);
+    if (ret < 0)
+        return ret;
+
+    for (int i = 0; i < iamf->nb_audio_elements; i++) {
+        IAMFAudioElement *audio_element = iamf->audio_elements[i];
+        AVStreamGroup *stg = avformat_stream_group_create(s, AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT, NULL);
+
+        if (!stg)
+            return AVERROR(ENOMEM);
+
+        stg->id = audio_element->audio_element_id;
+        stg->params.iamf_audio_element = audio_element->element;
+
+        for (int j = 0; j < audio_element->nb_substreams; j++) {
+            IAMFSubStream *substream = &audio_element->substreams[j];
+            AVStream *st = avformat_new_stream(s, NULL);
+
+            if (!st)
+                return AVERROR(ENOMEM);
+
+            ret = avformat_stream_group_add_stream(stg, st);
+            if (ret < 0)
+                return ret;
+
+            ret = avcodec_parameters_copy(st->codecpar, substream->codecpar);
+            if (ret < 0)
+                return ret;
+
+            st->id = substream->audio_substream_id;
+            avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
+        }
+    }
+
+    for (int i = 0; i < iamf->nb_mix_presentations; i++) {
+        IAMFMixPresentation *mix_presentation = iamf->mix_presentations[i];
+        AVStreamGroup *stg = avformat_stream_group_create(s, AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION, NULL);
+        const AVIAMFMixPresentation *mix = mix_presentation->mix;
+
+        if (!stg)
+            return AVERROR(ENOMEM);
+
+        stg->id = mix_presentation->mix_presentation_id;
+        stg->params.iamf_mix_presentation = mix_presentation->mix;
+
+        for (int j = 0; j < mix->nb_submixes; j++) {
+            AVIAMFSubmix *sub_mix = mix->submixes[j];
+
+            for (int k = 0; k < sub_mix->nb_elements; k++) {
+                AVIAMFSubmixElement *submix_element = sub_mix->elements[k];
+                AVStreamGroup *audio_element = NULL;
+
+                for (int l = 0; l < s->nb_stream_groups; l++)
+                    if (s->stream_groups[l]->type == AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT &&
+                        s->stream_groups[l]->id == submix_element->audio_element_id) {
+                        audio_element = s->stream_groups[l];
+                        break;
+                    }
+                av_assert0(audio_element);
+
+                for (int l = 0; l < audio_element->nb_streams; l++) {
+                    ret = avformat_stream_group_add_stream(stg, audio_element->streams[l]);
+                    if (ret < 0 && ret != AVERROR(EEXIST))
+                        return ret;
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int iamf_read_close(AVFormatContext *s)
+{
+    IAMFDemuxContext *const c = s->priv_data;
+    IAMFContext *const iamf = &c->iamf;
+
+    for (int i = 0; i < iamf->nb_audio_elements; i++) {
+        IAMFAudioElement *audio_element = iamf->audio_elements[i];
+        audio_element->element = NULL;
+    }
+    for (int i = 0; i < iamf->nb_mix_presentations; i++) {
+        IAMFMixPresentation *mix_presentation = iamf->mix_presentations[i];
+        mix_presentation->mix = NULL;
+    }
+
+    ff_iamf_uninit_context(&c->iamf);
+
+    av_freep(&c->mix);
+    c->mix_size = 0;
+    av_freep(&c->demix);
+    c->demix_size = 0;
+    av_freep(&c->recon);
+    c->recon_size = 0;
+
+    return 0;
+}
+
+const AVInputFormat ff_iamf_demuxer = {
+    .name           = "iamf",
+    .long_name      = NULL_IF_CONFIG_SMALL("Raw Immersive Audio Model and Formats"),
+    .priv_data_size = sizeof(IAMFDemuxContext),
+    .flags_internal = FF_FMT_INIT_CLEANUP,
+    .read_probe     = iamf_probe,
+    .read_header    = iamf_read_header,
+    .read_packet    = iamf_read_packet,
+    .read_close     = iamf_read_close,
+    .extensions     = "iamf",
+    .flags          = AVFMT_GENERIC_INDEX | AVFMT_NO_BYTE_SEEK | AVFMT_NOTIMESTAMPS | AVFMT_SHOW_IDS,
+};
-- 
2.43.0



More information about the ffmpeg-devel mailing list