[FFmpeg-devel] [PATCH 1/7 v4] avcodec: add an Immersive Audio Model and Formats frame split bsf
James Almer
jamrial at gmail.com
Tue Feb 6 15:05:15 EET 2024
Signed-off-by: James Almer <jamrial at gmail.com>
---
Now reading decriptors from extradata, plus a setting to ensure any descriptors
present inband are omitted has been added.
doc/bitstream_filters.texi | 16 +
libavcodec/bitstream_filters.c | 1 +
libavcodec/bsf/Makefile | 1 +
libavcodec/bsf/iamf_frame_split_bsf.c | 887 ++++++++++++++++++++++++++
4 files changed, 905 insertions(+)
create mode 100644 libavcodec/bsf/iamf_frame_split_bsf.c
diff --git a/doc/bitstream_filters.texi b/doc/bitstream_filters.texi
index d5bac105ff..3d1a5e7a24 100644
--- a/doc/bitstream_filters.texi
+++ b/doc/bitstream_filters.texi
@@ -465,6 +465,22 @@ Please note that this filter is auto-inserted for MPEG-TS (muxer
@code{mpegts}) and raw HEVC/H.265 (muxer @code{h265} or
@code{hevc}) output formats.
+ at section iamf_frame_split
+
+Split a packet containing one or more Audio Frame OBUs into several
+packets each containing the respective extracted audio data without the
+Audio Frame OBU encapsulation.
+Stream index in output packets will be set based on the order the OBUs
+are coded.
+
+ at table @option
+ at item first_index
+Lowest stream index value to set in output packets
+
+ at item inband_descriptors
+Enable parsing in-band descriptor OBUs
+ at end table
+
@section imxdump
Modifies the bitstream to fit in MOV and to be usable by the Final Cut
diff --git a/libavcodec/bitstream_filters.c b/libavcodec/bitstream_filters.c
index 1bae113d92..633187bc6e 100644
--- a/libavcodec/bitstream_filters.c
+++ b/libavcodec/bitstream_filters.c
@@ -42,6 +42,7 @@ extern const FFBitStreamFilter ff_h264_redundant_pps_bsf;
extern const FFBitStreamFilter ff_hapqa_extract_bsf;
extern const FFBitStreamFilter ff_hevc_metadata_bsf;
extern const FFBitStreamFilter ff_hevc_mp4toannexb_bsf;
+extern const FFBitStreamFilter ff_iamf_frame_split_bsf;
extern const FFBitStreamFilter ff_imx_dump_header_bsf;
extern const FFBitStreamFilter ff_media100_to_mjpegb_bsf;
extern const FFBitStreamFilter ff_mjpeg2jpeg_bsf;
diff --git a/libavcodec/bsf/Makefile b/libavcodec/bsf/Makefile
index 62609eb24e..738b97bdd1 100644
--- a/libavcodec/bsf/Makefile
+++ b/libavcodec/bsf/Makefile
@@ -20,6 +20,7 @@ OBJS-$(CONFIG_H264_REDUNDANT_PPS_BSF) += bsf/h264_redundant_pps.o
OBJS-$(CONFIG_HAPQA_EXTRACT_BSF) += bsf/hapqa_extract.o
OBJS-$(CONFIG_HEVC_METADATA_BSF) += bsf/h265_metadata.o
OBJS-$(CONFIG_HEVC_MP4TOANNEXB_BSF) += bsf/hevc_mp4toannexb.o
+OBJS-$(CONFIG_IAMF_FRAME_SPLIT_BSF) += bsf/iamf_frame_split_bsf.o
OBJS-$(CONFIG_IMX_DUMP_HEADER_BSF) += bsf/imx_dump_header.o
OBJS-$(CONFIG_MEDIA100_TO_MJPEGB_BSF) += bsf/media100_to_mjpegb.o
OBJS-$(CONFIG_MJPEG2JPEG_BSF) += bsf/mjpeg2jpeg.o
diff --git a/libavcodec/bsf/iamf_frame_split_bsf.c b/libavcodec/bsf/iamf_frame_split_bsf.c
new file mode 100644
index 0000000000..dc8ad6730a
--- /dev/null
+++ b/libavcodec/bsf/iamf_frame_split_bsf.c
@@ -0,0 +1,887 @@
+/*
+ * Copyright (c) 2023 James Almer <jamrial at gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+
+#include "libavutil/dict.h"
+#include "libavutil/opt.h"
+#include "libavformat/iamf.h"
+#include "bsf.h"
+#include "bsf_internal.h"
+#include "get_bits.h"
+#include "leb.h"
+
+typedef struct ParamDefinition {
+ AVIAMFParamDefinition *param;
+ size_t param_size;
+ int mode;
+ int recon_gain_present_bitmask;
+} ParamDefinition;
+
+typedef struct IAMFSplitContext {
+ AVClass *class;
+ AVPacket *buffer_pkt;
+
+ ParamDefinition *param_definitions;
+ unsigned int nb_param_definitions;
+
+ unsigned int *ids;
+ int nb_ids;
+
+ // AVOptions
+ int first_index;
+ int inband_descriptors;
+
+ // Packet side data
+ AVIAMFParamDefinition *mix;
+ size_t mix_size;
+ AVIAMFParamDefinition *demix;
+ size_t demix_size;
+ AVIAMFParamDefinition *recon;
+ size_t recon_size;
+} IAMFSplitContext;
+
+static int param_parse(AVBSFContext *ctx, GetBitContext *gb,
+ unsigned int type,
+ ParamDefinition **out)
+{
+ IAMFSplitContext *const c = ctx->priv_data;
+ ParamDefinition *param_definition = NULL;
+ AVIAMFParamDefinition *param;
+ unsigned int parameter_id, parameter_rate, mode;
+ unsigned int duration = 0, constant_subblock_duration = 0, nb_subblocks = 0;
+ size_t param_size;
+
+ parameter_id = get_leb(gb);
+
+ for (int i = 0; i < c->nb_param_definitions; i++)
+ if (c->param_definitions[i].param->parameter_id == parameter_id) {
+ param_definition = &c->param_definitions[i];
+ break;
+ }
+
+ parameter_rate = get_leb(gb);
+ mode = get_bits(gb, 8) >> 7;
+
+ if (mode == 0) {
+ duration = get_leb(gb);
+ constant_subblock_duration = get_leb(gb);
+ if (constant_subblock_duration == 0) {
+ nb_subblocks = get_leb(gb);
+ } else
+ nb_subblocks = duration / constant_subblock_duration;
+ }
+
+ param = av_iamf_param_definition_alloc(type, nb_subblocks, ¶m_size);
+ if (!param)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; i < nb_subblocks; i++) {
+ if (constant_subblock_duration == 0)
+ get_leb(gb); // subblock_duration
+
+ switch (type) {
+ case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN:
+ break;
+ case AV_IAMF_PARAMETER_DEFINITION_DEMIXING:
+ skip_bits(gb, 8); // dmixp_mode
+ break;
+ case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN:
+ break;
+ default:
+ av_free(param);
+ return AVERROR_INVALIDDATA;
+ }
+ }
+
+ param->parameter_id = parameter_id;
+ param->parameter_rate = parameter_rate;
+ param->duration = duration;
+ param->constant_subblock_duration = constant_subblock_duration;
+ param->nb_subblocks = nb_subblocks;
+
+ if (param_definition) {
+ if (param_definition->param_size != param_size ||
+ memcmp(param_definition->param, param, param_size)) {
+ av_log(ctx, AV_LOG_ERROR, "Incosistent parameters for parameter_id %u\n",
+ parameter_id);
+ av_free(param);
+ return AVERROR_INVALIDDATA;
+ }
+ av_freep(¶m);
+ } else {
+ ParamDefinition *tmp = av_realloc_array(c->param_definitions,
+ c->nb_param_definitions + 1,
+ sizeof(*c->param_definitions));
+ if (!tmp) {
+ av_free(param);
+ return AVERROR(ENOMEM);
+ }
+ c->param_definitions = tmp;
+
+ param_definition = &c->param_definitions[c->nb_param_definitions++];
+ param_definition->param = param;
+ param_definition->mode = !mode;
+ param_definition->param_size = param_size;
+ }
+ if (out)
+ *out = param_definition;
+
+ return 0;
+}
+
+static int scalable_channel_layout_config(AVBSFContext *ctx, GetBitContext *gb,
+ ParamDefinition *recon_gain)
+{
+ int nb_layers;
+
+ nb_layers = get_bits(gb, 3);
+ skip_bits(gb, 5); //reserved
+
+ if (nb_layers > 6)
+ return AVERROR_INVALIDDATA;
+
+ for (int i = 0; i < nb_layers; i++) {
+ int output_gain_is_present_flag, recon_gain_is_present;
+
+ skip_bits(gb, 4); // loudspeaker_layout
+ output_gain_is_present_flag = get_bits1(gb);
+ recon_gain_is_present = get_bits1(gb);
+ if (recon_gain)
+ recon_gain->recon_gain_present_bitmask |= recon_gain_is_present << i;
+ skip_bits(gb, 2); // reserved
+ skip_bits(gb, 8); // substream_count
+ skip_bits(gb, 8); // coupled_substream_count
+ if (output_gain_is_present_flag) {
+ skip_bits(gb, 8); // output_gain_flags & reserved
+ skip_bits(gb, 16); // output_gain
+ }
+ }
+
+ return 0;
+}
+
+static int audio_element_obu(AVBSFContext *ctx, uint8_t *buf, unsigned size)
+{
+ IAMFSplitContext *const c = ctx->priv_data;
+ GetBitContext gb;
+ ParamDefinition *recon_gain = NULL;
+ unsigned audio_element_type;
+ unsigned num_substreams, num_parameters;
+ int ret;
+
+ ret = init_get_bits8(&gb, buf, size);
+ if (ret < 0)
+ return ret;
+
+ get_leb(&gb); // audio_element_id
+ audio_element_type = get_bits(&gb, 3);
+ skip_bits(&gb, 5); // reserved
+
+ get_leb(&gb); // codec_config_id
+ num_substreams = get_leb(&gb);
+ for (unsigned i = 0; i < num_substreams; i++) {
+ unsigned *audio_substream_id = av_dynarray2_add((void **)&c->ids, &c->nb_ids,
+ sizeof(*c->ids), NULL);
+ if (!audio_substream_id)
+ return AVERROR(ENOMEM);
+
+ *audio_substream_id = get_leb(&gb);
+ }
+
+ num_parameters = get_leb(&gb);
+ if (num_parameters && audio_element_type != 0) {
+ av_log(ctx, AV_LOG_ERROR, "Audio Element parameter count %u is invalid"
+ " for Scene representations\n", num_parameters);
+ return AVERROR_INVALIDDATA;
+ }
+
+ for (int i = 0; i < num_parameters; i++) {
+ unsigned type = get_leb(&gb);
+
+ if (type == AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN)
+ return AVERROR_INVALIDDATA;
+ else if (type == AV_IAMF_PARAMETER_DEFINITION_DEMIXING) {
+ ret = param_parse(ctx, &gb, type, NULL);
+ if (ret < 0)
+ return ret;
+ skip_bits(&gb, 8); // default_w
+ } else if (type == AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN) {
+ ret = param_parse(ctx, &gb, type, &recon_gain);
+ if (ret < 0)
+ return ret;
+ } else {
+ unsigned param_definition_size = get_leb(&gb);
+ skip_bits_long(&gb, param_definition_size * 8);
+ }
+ }
+
+ if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL) {
+ ret = scalable_channel_layout_config(ctx, &gb, recon_gain);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int label_string(GetBitContext *gb)
+{
+ int byte;
+
+ do {
+ byte = get_bits(gb, 8);
+ } while (byte);
+
+ return 0;
+}
+
+static int mix_presentation_obu(AVBSFContext *ctx, uint8_t *buf, unsigned size)
+{
+ GetBitContext gb;
+ unsigned mix_presentation_id, count_label;
+ unsigned nb_submixes, nb_elements;
+ int ret;
+
+ ret = init_get_bits8(&gb, buf, size);
+ if (ret < 0)
+ return ret;
+
+ mix_presentation_id = get_leb(&gb);
+ count_label = get_leb(&gb);
+
+ for (int i = 0; i < count_label; i++) {
+ ret = label_string(&gb);
+ if (ret < 0)
+ return ret;
+ }
+
+ for (int i = 0; i < count_label; i++) {
+ ret = label_string(&gb);
+ if (ret < 0)
+ return ret;
+ }
+
+ nb_submixes = get_leb(&gb);
+ for (int i = 0; i < nb_submixes; i++) {
+ unsigned nb_layouts;
+
+ nb_elements = get_leb(&gb);
+
+ for (int j = 0; j < nb_elements; j++) {
+ unsigned rendering_config_extension_size;
+
+ get_leb(&gb); // audio_element_id
+ for (int k = 0; k < count_label; k++) {
+ ret = label_string(&gb);
+ if (ret < 0)
+ return ret;
+ }
+
+ skip_bits(&gb, 8); // headphones_rendering_mode & reserved
+ rendering_config_extension_size = get_leb(&gb);
+ skip_bits_long(&gb, rendering_config_extension_size * 8);
+
+ ret = param_parse(ctx, &gb, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, NULL);
+ if (ret < 0)
+ return ret;
+ skip_bits(&gb, 16); // default_mix_gain
+ }
+
+ ret = param_parse(ctx, &gb, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, NULL);
+ if (ret < 0)
+ return ret;
+ get_bits(&gb, 16); // default_mix_gain
+
+ nb_layouts = get_leb(&gb);
+ for (int j = 0; j < nb_layouts; j++) {
+ int info_type, layout_type;
+ int byte = get_bits(&gb, 8);
+
+ layout_type = byte >> 6;
+ if (layout_type < AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS &&
+ layout_type > AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL) {
+ av_log(ctx, AV_LOG_ERROR, "Invalid Layout type %u in a submix from "
+ "Mix Presentation %u\n",
+ layout_type, mix_presentation_id);
+ return AVERROR_INVALIDDATA;
+ }
+
+ info_type = get_bits(&gb, 8);
+ get_bits(&gb, 16); // integrated_loudness
+ get_bits(&gb, 16); // digital_peak
+
+ if (info_type & 1)
+ get_bits(&gb, 16); // true_peak
+
+ if (info_type & 2) {
+ unsigned int num_anchored_loudness = get_bits(&gb, 8);
+
+ for (int k = 0; k < num_anchored_loudness; k++) {
+ get_bits(&gb, 8); // anchor_element
+ get_bits(&gb, 16); // anchored_loudness
+ }
+ }
+
+ if (info_type & 0xFC) {
+ unsigned int info_type_size = get_leb(&gb);
+ skip_bits_long(&gb, info_type_size * 8);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int find_idx_by_id(AVBSFContext *ctx, unsigned id)
+{
+ IAMFSplitContext *const c = ctx->priv_data;
+
+ for (int i = 0; i < c->nb_ids; i++)
+ if (c->ids[i] == id)
+ return i;
+
+ av_log(ctx, AV_LOG_ERROR, "Invalid id %d\n", id);
+ return AVERROR_INVALIDDATA;
+}
+
+static int audio_frame_obu(AVBSFContext *ctx, enum IAMF_OBU_Type type, int *idx,
+ uint8_t *buf, int *start_pos, unsigned *size,
+ int id_in_bitstream)
+{
+ GetBitContext gb;
+ unsigned audio_substream_id;
+ int ret;
+
+ ret = init_get_bits8(&gb, buf + *start_pos, *size);
+ if (ret < 0)
+ return ret;
+
+ if (id_in_bitstream) {
+ int pos;
+ audio_substream_id = get_leb(&gb);
+ pos = get_bits_count(&gb) / 8;
+ *start_pos += pos;
+ *size -= pos;
+ } else
+ audio_substream_id = type - IAMF_OBU_IA_AUDIO_FRAME_ID0;
+
+ ret = find_idx_by_id(ctx, audio_substream_id);
+ if (ret < 0)
+ return ret;
+
+ *idx = ret;
+
+ return 0;
+}
+
+static const ParamDefinition *get_param_definition(AVBSFContext *ctx,
+ unsigned int parameter_id)
+{
+ const IAMFSplitContext *const c = ctx->priv_data;
+ const ParamDefinition *param_definition = NULL;
+
+ for (int i = 0; i < c->nb_param_definitions; i++)
+ if (c->param_definitions[i].param->parameter_id == parameter_id) {
+ param_definition = &c->param_definitions[i];
+ break;
+ }
+
+ return param_definition;
+}
+
+static int parameter_block_obu(AVBSFContext *ctx, uint8_t *buf, unsigned size)
+{
+ IAMFSplitContext *const c = ctx->priv_data;
+ GetBitContext gb;
+ const ParamDefinition *param_definition;
+ const AVIAMFParamDefinition *param;
+ AVIAMFParamDefinition *out_param = NULL;
+ unsigned int duration, constant_subblock_duration;
+ unsigned int nb_subblocks;
+ unsigned int parameter_id;
+ size_t out_param_size;
+ int ret;
+
+ ret = init_get_bits8(&gb, buf, size);
+ if (ret < 0)
+ return ret;
+
+ parameter_id = get_leb(&gb);
+
+ param_definition = get_param_definition(ctx, parameter_id);
+ if (!param_definition) {
+ ret = 0;
+ goto fail;
+ }
+
+ param = param_definition->param;
+ if (!param_definition->mode) {
+ duration = get_leb(&gb);
+ constant_subblock_duration = get_leb(&gb);
+ if (constant_subblock_duration == 0)
+ nb_subblocks = get_leb(&gb);
+ else
+ nb_subblocks = duration / constant_subblock_duration;
+ } else {
+ duration = param->duration;
+ constant_subblock_duration = param->constant_subblock_duration;
+ nb_subblocks = param->nb_subblocks;
+ if (!nb_subblocks)
+ nb_subblocks = duration / constant_subblock_duration;
+ }
+
+ out_param = av_iamf_param_definition_alloc(param->type, nb_subblocks,
+ &out_param_size);
+ if (!out_param) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ out_param->parameter_id = param->parameter_id;
+ out_param->type = param->type;
+ out_param->parameter_rate = param->parameter_rate;
+ out_param->duration = duration;
+ out_param->constant_subblock_duration = constant_subblock_duration;
+ out_param->nb_subblocks = nb_subblocks;
+
+ for (int i = 0; i < nb_subblocks; i++) {
+ void *subblock = av_iamf_param_definition_get_subblock(out_param, i);
+ unsigned int subblock_duration = constant_subblock_duration;
+
+ if (!param_definition->mode && !constant_subblock_duration)
+ subblock_duration = get_leb(&gb);
+
+ switch (param->type) {
+ case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
+ AVIAMFMixGain *mix = subblock;
+
+ mix->animation_type = get_leb(&gb);
+ if (mix->animation_type > AV_IAMF_ANIMATION_TYPE_BEZIER) {
+ ret = 0;
+ av_free(out_param);
+ goto fail;
+ }
+
+ mix->start_point_value =
+ av_make_q(sign_extend(get_bits(&gb, 16), 16), 1 << 8);
+ if (mix->animation_type >= AV_IAMF_ANIMATION_TYPE_LINEAR)
+ mix->end_point_value =
+ av_make_q(sign_extend(get_bits(&gb, 16), 16), 1 << 8);
+ if (mix->animation_type == AV_IAMF_ANIMATION_TYPE_BEZIER) {
+ mix->control_point_value =
+ av_make_q(sign_extend(get_bits(&gb, 16), 16), 1 << 8);
+ mix->control_point_relative_time =
+ av_make_q(get_bits(&gb, 8), 1 << 8);
+ }
+ mix->subblock_duration = subblock_duration;
+ break;
+ }
+ case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
+ AVIAMFDemixingInfo *demix = subblock;
+
+ demix->dmixp_mode = get_bits(&gb, 3);
+ skip_bits(&gb, 5); // reserved
+ demix->subblock_duration = subblock_duration;
+ break;
+ }
+ case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
+ AVIAMFReconGain *recon = subblock;
+
+ for (int i = 0; i < 6; i++) {
+ if (param_definition->recon_gain_present_bitmask & (1 << i)) {
+ unsigned int recon_gain_flags = get_leb(&gb);
+ unsigned int bitcount = 7 + 5 * !!(recon_gain_flags & 0x80);
+ recon_gain_flags =
+ (recon_gain_flags & 0x7F) | ((recon_gain_flags & 0xFF00) >> 1);
+ for (int j = 0; j < bitcount; j++) {
+ if (recon_gain_flags & (1 << j))
+ recon->recon_gain[i][j] = get_bits(&gb, 8);
+ }
+ }
+ }
+ recon->subblock_duration = subblock_duration;
+ break;
+ }
+ default:
+ av_assert0(0);
+ }
+ }
+
+ switch (param->type) {
+ case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN:
+ av_free(c->mix);
+ c->mix = out_param;
+ c->mix_size = out_param_size;
+ break;
+ case AV_IAMF_PARAMETER_DEFINITION_DEMIXING:
+ av_free(c->demix);
+ c->demix = out_param;
+ c->demix_size = out_param_size;
+ break;
+ case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN:
+ av_free(c->recon);
+ c->recon = out_param;
+ c->recon_size = out_param_size;
+ break;
+ default:
+ av_assert0(0);
+ }
+
+ ret = 0;
+fail:
+ if (ret < 0)
+ av_free(out_param);
+
+ return ret;
+}
+
+static int iamf_parse_obu_header(const uint8_t *buf, int buf_size,
+ unsigned *obu_size, int *start_pos,
+ enum IAMF_OBU_Type *type,
+ unsigned *skip, unsigned *discard)
+{
+ GetBitContext gb;
+ int ret, extension_flag, trimming, start;
+ unsigned size;
+
+ ret = init_get_bits8(&gb, buf, FFMIN(buf_size, MAX_IAMF_OBU_HEADER_SIZE));
+ if (ret < 0)
+ return ret;
+
+ *type = get_bits(&gb, 5);
+ /*redundant =*/ get_bits1(&gb);
+ trimming = get_bits1(&gb);
+ extension_flag = get_bits1(&gb);
+
+ *obu_size = get_leb(&gb);
+ if (*obu_size > INT_MAX)
+ return AVERROR_INVALIDDATA;
+
+ start = get_bits_count(&gb) / 8;
+
+ if (trimming) {
+ *skip = get_leb(&gb); // num_samples_to_trim_at_end
+ *discard = get_leb(&gb); // num_samples_to_trim_at_start
+ }
+
+ if (extension_flag) {
+ unsigned extension_bytes = get_leb(&gb);
+ if (extension_bytes > INT_MAX / 8)
+ return AVERROR_INVALIDDATA;
+ skip_bits_long(&gb, extension_bytes * 8);
+ }
+
+ if (get_bits_left(&gb) < 0)
+ return AVERROR_INVALIDDATA;
+
+ size = *obu_size + start;
+ if (size > INT_MAX)
+ return AVERROR_INVALIDDATA;
+
+ *obu_size -= get_bits_count(&gb) / 8 - start;
+ *start_pos = size - *obu_size;
+
+ return size;
+}
+
+static int iamf_frame_split_filter(AVBSFContext *ctx, AVPacket *out)
+{
+ IAMFSplitContext *const c = ctx->priv_data;
+ int ret = 0;
+
+ if (!c->buffer_pkt->data) {
+ ret = ff_bsf_get_packet_ref(ctx, c->buffer_pkt);
+ if (ret < 0)
+ return ret;
+ }
+
+ while (1) {
+ enum IAMF_OBU_Type type;
+ unsigned skip_samples = 0, discard_padding = 0, obu_size;
+ int len, start_pos, idx;
+
+ len = iamf_parse_obu_header(c->buffer_pkt->data,
+ c->buffer_pkt->size,
+ &obu_size, &start_pos, &type,
+ &skip_samples, &discard_padding);
+ if (len < 0) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to read obu\n");
+ ret = len;
+ goto fail;
+ }
+
+ if (type >= IAMF_OBU_IA_AUDIO_FRAME &&
+ type <= IAMF_OBU_IA_AUDIO_FRAME_ID17) {
+ ret = audio_frame_obu(ctx, type, &idx,
+ c->buffer_pkt->data, &start_pos,
+ &obu_size,
+ type == IAMF_OBU_IA_AUDIO_FRAME);
+ if (ret < 0)
+ goto fail;
+ } else {
+ switch (type) {
+ case IAMF_OBU_IA_PARAMETER_BLOCK:
+ ret = parameter_block_obu(ctx, c->buffer_pkt->data + start_pos, obu_size);
+ if (ret < 0)
+ goto fail;
+ break;
+ case IAMF_OBU_IA_SEQUENCE_HEADER:
+ for (int i = 0; c->param_definitions && i < c->nb_param_definitions; i++)
+ av_free(c->param_definitions[i].param);
+ av_freep(&c->param_definitions);
+ av_freep(&c->ids);
+ c->nb_param_definitions = 0;
+ c->nb_ids = 0;
+ // fall-through
+ case IAMF_OBU_IA_TEMPORAL_DELIMITER:
+ av_freep(&c->mix);
+ av_freep(&c->demix);
+ av_freep(&c->recon);
+ c->mix_size = 0;
+ c->demix_size = 0;
+ c->recon_size = 0;
+ break;
+ case IAMF_OBU_IA_AUDIO_ELEMENT:
+ if (!c->inband_descriptors)
+ break;
+ ret = audio_element_obu(ctx, c->buffer_pkt->data + start_pos, obu_size);
+ if (ret < 0) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to read Audio Element OBU\n");
+ return ret;
+ }
+ break;
+ case IAMF_OBU_IA_MIX_PRESENTATION:
+ if (!c->inband_descriptors)
+ break;
+ ret = mix_presentation_obu(ctx, c->buffer_pkt->data + start_pos, obu_size);
+ if (ret < 0) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to read Mix Presentation OBU\n");
+ return ret;
+ }
+ break;
+ }
+
+ c->buffer_pkt->data += len;
+ c->buffer_pkt->size -= len;
+
+ if (!c->buffer_pkt->size) {
+ av_packet_unref(c->buffer_pkt);
+ ret = ff_bsf_get_packet_ref(ctx, c->buffer_pkt);
+ if (ret < 0)
+ return ret;
+ } else if (c->buffer_pkt->size < 0) {
+ ret = AVERROR_INVALIDDATA;
+ goto fail;
+ }
+ continue;
+ }
+
+ ret = av_packet_ref(out, c->buffer_pkt);
+ if (ret < 0)
+ goto fail;
+
+ if (skip_samples || discard_padding) {
+ uint8_t *side_data = av_packet_new_side_data(out,
+ AV_PKT_DATA_SKIP_SAMPLES, 10);
+ if (!side_data)
+ return AVERROR(ENOMEM);
+ AV_WL32(side_data, skip_samples);
+ AV_WL32(side_data + 4, discard_padding);
+ }
+ if (c->mix) {
+ uint8_t *side_data = av_packet_new_side_data(out,
+ AV_PKT_DATA_IAMF_MIX_GAIN_PARAM,
+ c->mix_size);
+ if (!side_data)
+ return AVERROR(ENOMEM);
+ memcpy(side_data, c->mix, c->mix_size);
+ }
+ if (c->demix) {
+ uint8_t *side_data = av_packet_new_side_data(out,
+ AV_PKT_DATA_IAMF_DEMIXING_INFO_PARAM,
+ c->demix_size);
+ if (!side_data)
+ return AVERROR(ENOMEM);
+ memcpy(side_data, c->demix, c->demix_size);
+ }
+ if (c->recon) {
+ uint8_t *side_data = av_packet_new_side_data(out,
+ AV_PKT_DATA_IAMF_RECON_GAIN_INFO_PARAM,
+ c->recon_size);
+ if (!side_data)
+ return AVERROR(ENOMEM);
+ memcpy(side_data, c->recon, c->recon_size);
+ }
+
+ out->data += start_pos;
+ out->size = obu_size;
+ out->stream_index = idx + c->first_index;
+
+ c->buffer_pkt->data += len;
+ c->buffer_pkt->size -= len;
+
+ if (!c->buffer_pkt->size)
+ av_packet_unref(c->buffer_pkt);
+ else if (c->buffer_pkt->size < 0) {
+ ret = AVERROR_INVALIDDATA;
+ goto fail;
+ }
+
+ return 0;
+ }
+
+fail:
+ if (ret < 0) {
+ av_packet_unref(out);
+ av_packet_unref(c->buffer_pkt);
+ }
+
+ return ret;
+}
+
+static int iamf_frame_split_init(AVBSFContext *ctx)
+{
+ IAMFSplitContext *const c = ctx->priv_data;
+ uint8_t *extradata;
+ int extradata_size;
+
+ c->buffer_pkt = av_packet_alloc();
+ if (!c->buffer_pkt)
+ return AVERROR(ENOMEM);
+
+ if (!c->inband_descriptors && !ctx->par_in->extradata_size) {
+ av_log(ctx, AV_LOG_ERROR, "Missing extradata\n");
+ return AVERROR_INVALIDDATA;
+ }
+
+ extradata = ctx->par_in->extradata;
+ extradata_size = ctx->par_in->extradata_size;
+
+ while (extradata_size) {
+ enum IAMF_OBU_Type type;
+ unsigned skip_samples = 0, discard_padding = 0, obu_size;
+ int ret, len, start_pos;
+
+ len = iamf_parse_obu_header(extradata,
+ extradata_size,
+ &obu_size, &start_pos, &type,
+ &skip_samples, &discard_padding);
+ if (len < 0) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to read descriptors\n");
+ return len;
+ }
+
+ switch (type) {
+ case IAMF_OBU_IA_SEQUENCE_HEADER:
+ for (int i = 0; c->param_definitions && i < c->nb_param_definitions; i++)
+ av_free(c->param_definitions[i].param);
+ av_freep(&c->param_definitions);
+ av_freep(&c->ids);
+ c->nb_param_definitions = 0;
+ c->nb_ids = 0;
+ break;
+ case IAMF_OBU_IA_AUDIO_ELEMENT:
+ ret = audio_element_obu(ctx, extradata + start_pos, obu_size);
+ if (ret < 0) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to read Audio Element OBU\n");
+ return ret;
+ }
+ break;
+ case IAMF_OBU_IA_MIX_PRESENTATION:
+ ret = mix_presentation_obu(ctx, extradata + start_pos, obu_size);
+ if (ret < 0) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to read Mix Presentation OBU\n");
+ return ret;
+ }
+ break;
+ }
+
+ extradata += len;
+ extradata_size -= len;
+
+ if (extradata_size < 0)
+ return AVERROR_INVALIDDATA;
+ }
+
+ av_freep(&ctx->par_out->extradata);
+ ctx->par_out->extradata_size = 0;
+
+ return 0;
+}
+
+static void iamf_frame_split_flush(AVBSFContext *ctx)
+{
+ IAMFSplitContext *const c = ctx->priv_data;
+
+ if (c->buffer_pkt)
+ av_packet_unref(c->buffer_pkt);
+
+ av_freep(&c->mix);
+ av_freep(&c->demix);
+ av_freep(&c->recon);
+ c->mix_size = 0;
+ c->demix_size = 0;
+ c->recon_size = 0;
+}
+
+static void iamf_frame_split_close(AVBSFContext *ctx)
+{
+ IAMFSplitContext *const c = ctx->priv_data;
+
+ iamf_frame_split_flush(ctx);
+ av_packet_free(&c->buffer_pkt);
+
+ for (int i = 0; c->param_definitions && i < c->nb_param_definitions; i++)
+ av_free(c->param_definitions[i].param);
+ av_freep(&c->param_definitions);
+ c->nb_param_definitions = 0;
+
+ av_freep(&c->ids);
+ c->nb_ids = 0;
+}
+
+#define OFFSET(x) offsetof(IAMFSplitContext, x)
+#define FLAGS (AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_BSF_PARAM)
+static const AVOption iamf_frame_split_options[] = {
+ { "first_index", "First index to set stream index in output packets",
+ OFFSET(first_index), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },
+ { "inband_descriptors", "Parse inband descriptor OBUs",
+ OFFSET(inband_descriptors), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
+ { NULL }
+};
+
+static const AVClass iamf_frame_split_class = {
+ .class_name = "iamf_frame_split_bsf",
+ .item_name = av_default_item_name,
+ .option = iamf_frame_split_options,
+ .version = LIBAVUTIL_VERSION_INT,
+};
+
+const FFBitStreamFilter ff_iamf_frame_split_bsf = {
+ .p.name = "iamf_frame_split",
+ .p.priv_class = &iamf_frame_split_class,
+ .priv_data_size = sizeof(IAMFSplitContext),
+ .init = iamf_frame_split_init,
+ .flush = iamf_frame_split_flush,
+ .close = iamf_frame_split_close,
+ .filter = iamf_frame_split_filter,
+};
--
2.43.0
More information about the ffmpeg-devel
mailing list