[FFmpeg-devel] [PATCH 5/7 v3] avcodec: add an Immersive Audio Model and Formats frame merge bsf
James Almer
jamrial at gmail.com
Tue Feb 6 15:05:19 EET 2024
Signed-off-by: James Almer <jamrial at gmail.com>
---
No changes since last version
doc/bitstream_filters.texi | 14 ++
libavcodec/bitstream_filters.c | 1 +
libavcodec/bsf/Makefile | 1 +
libavcodec/bsf/iamf_frame_merge_bsf.c | 228 ++++++++++++++++++++++++++
libavcodec/leb.h | 22 +++
5 files changed, 266 insertions(+)
create mode 100644 libavcodec/bsf/iamf_frame_merge_bsf.c
diff --git a/doc/bitstream_filters.texi b/doc/bitstream_filters.texi
index 3d1a5e7a24..2f3077d02c 100644
--- a/doc/bitstream_filters.texi
+++ b/doc/bitstream_filters.texi
@@ -481,6 +481,20 @@ Lowest stream index value to set in output packets
Enable parsing in-band descriptor OBUs
@end table
+ at section iamf_frame_merge
+
+Encapsulate audio data packets from different streams and merge them
+into a single packet containing all Audio Frame OBUs.
+
+ at table @option
+ at item index_mapping
+A :-separated list of stream_index=audio_substream_id entries to set
+stream id in output Audio Frame OBUs
+
+ at item out_index
+Stream index to in output packets
+ at end table
+
@section imxdump
Modifies the bitstream to fit in MOV and to be usable by the Final Cut
diff --git a/libavcodec/bitstream_filters.c b/libavcodec/bitstream_filters.c
index 633187bc6e..a50488ad92 100644
--- a/libavcodec/bitstream_filters.c
+++ b/libavcodec/bitstream_filters.c
@@ -42,6 +42,7 @@ extern const FFBitStreamFilter ff_h264_redundant_pps_bsf;
extern const FFBitStreamFilter ff_hapqa_extract_bsf;
extern const FFBitStreamFilter ff_hevc_metadata_bsf;
extern const FFBitStreamFilter ff_hevc_mp4toannexb_bsf;
+extern const FFBitStreamFilter ff_iamf_frame_merge_bsf;
extern const FFBitStreamFilter ff_iamf_frame_split_bsf;
extern const FFBitStreamFilter ff_imx_dump_header_bsf;
extern const FFBitStreamFilter ff_media100_to_mjpegb_bsf;
diff --git a/libavcodec/bsf/Makefile b/libavcodec/bsf/Makefile
index 738b97bdd1..662c5139bc 100644
--- a/libavcodec/bsf/Makefile
+++ b/libavcodec/bsf/Makefile
@@ -20,6 +20,7 @@ OBJS-$(CONFIG_H264_REDUNDANT_PPS_BSF) += bsf/h264_redundant_pps.o
OBJS-$(CONFIG_HAPQA_EXTRACT_BSF) += bsf/hapqa_extract.o
OBJS-$(CONFIG_HEVC_METADATA_BSF) += bsf/h265_metadata.o
OBJS-$(CONFIG_HEVC_MP4TOANNEXB_BSF) += bsf/hevc_mp4toannexb.o
+OBJS-$(CONFIG_IAMF_FRAME_MERGE_BSF) += bsf/iamf_frame_merge_bsf.o
OBJS-$(CONFIG_IAMF_FRAME_SPLIT_BSF) += bsf/iamf_frame_split_bsf.o
OBJS-$(CONFIG_IMX_DUMP_HEADER_BSF) += bsf/imx_dump_header.o
OBJS-$(CONFIG_MEDIA100_TO_MJPEGB_BSF) += bsf/media100_to_mjpegb.o
diff --git a/libavcodec/bsf/iamf_frame_merge_bsf.c b/libavcodec/bsf/iamf_frame_merge_bsf.c
new file mode 100644
index 0000000000..98f37be653
--- /dev/null
+++ b/libavcodec/bsf/iamf_frame_merge_bsf.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2024 James Almer <jamrial at gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+
+#include "libavutil/dict.h"
+#include "libavutil/fifo.h"
+#include "libavutil/opt.h"
+#include "libavformat/iamf.h"
+#include "bsf.h"
+#include "bsf_internal.h"
+#include "bytestream.h"
+#include "get_bits.h"
+#include "leb.h"
+#include "put_bits.h"
+
+typedef struct IAMFMergeContext {
+ AVClass *class;
+
+ AVFifo *fifo;
+
+ // AVOptions
+ AVDictionary *index_mapping;
+ int stream_count;
+ int out_index;
+} IAMFMergeContext;
+
+static int find_id_from_idx(AVBSFContext *ctx, int idx)
+{
+ IAMFMergeContext *const c = ctx->priv_data;
+ const AVDictionaryEntry *e = NULL;
+
+ while (e = av_dict_iterate(c->index_mapping, e)) {
+ char *endptr = NULL;
+ int id, map_idx = strtol(e->key, &endptr, 0);
+ if (!endptr || *endptr)
+ return AVERROR_INVALIDDATA;
+ endptr = NULL;
+ id = strtol(e->value, &endptr, 0);
+ if (!endptr || *endptr)
+ return AVERROR_INVALIDDATA;
+ if (map_idx == idx)
+ return id;
+ }
+
+ av_log(ctx, AV_LOG_ERROR, "Invalid stream idx %d\n", idx);
+ return AVERROR_INVALIDDATA;
+}
+
+static int iamf_frame_merge_filter(AVBSFContext *ctx, AVPacket *out)
+{
+ IAMFMergeContext *const c = ctx->priv_data;
+ AVPacket *pkt;
+ int ret;
+
+ while (av_fifo_can_write(c->fifo)) {
+ ret = ff_bsf_get_packet(ctx, &pkt);
+ if (ret < 0)
+ return ret;
+ av_fifo_write(c->fifo, &pkt, 1);
+ }
+
+ pkt = NULL;
+ while (av_fifo_can_read(c->fifo)) {
+ PutBitContext pb;
+ PutByteContext p;
+ uint8_t *side_data, header[MAX_IAMF_OBU_HEADER_SIZE], obu[8];
+ unsigned int obu_header;
+ unsigned int skip_samples = 0, discard_padding = 0;
+ size_t side_data_size;
+ int header_size, obu_size, old_out_size = out->size;
+ int id, type;
+
+ av_packet_free(&pkt);
+ av_fifo_read(c->fifo, &pkt, 1);
+ id = find_id_from_idx(ctx, pkt->stream_index);
+ if (id < 0)
+ return AVERROR_INVALIDDATA;
+
+ type = id <= 17 ? id + IAMF_OBU_IA_AUDIO_FRAME_ID0 : IAMF_OBU_IA_AUDIO_FRAME;
+
+ side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_SKIP_SAMPLES,
+ &side_data_size);
+
+ if (side_data && side_data_size >= 10) {
+ skip_samples = AV_RL32(side_data);
+ discard_padding = AV_RL32(side_data + 4);
+ }
+
+ init_put_bits(&pb, (uint8_t *)&obu_header, sizeof(obu_header));
+ put_bits(&pb, 5, type);
+ put_bits(&pb, 1, 0); // obu_redundant_copy
+ put_bits(&pb, 1, skip_samples || discard_padding);
+ put_bits(&pb, 1, 0); // obu_extension_flag
+ flush_put_bits(&pb);
+
+ init_put_bits(&pb, header, sizeof(header));
+ if (skip_samples || discard_padding) {
+ put_leb(&pb, discard_padding);
+ put_leb(&pb, skip_samples);
+ }
+ if (id > 17)
+ put_leb(&pb, id);
+ flush_put_bits(&pb);
+
+ header_size = put_bytes_count(&pb, 1);
+
+ init_put_bits(&pb, obu, sizeof(obu));
+ put_leb(&pb, header_size + pkt->size);
+ flush_put_bits(&pb);
+
+ obu_size = put_bytes_count(&pb, 1);
+
+ ret = av_grow_packet(out, 1 + obu_size + header_size + pkt->size);
+ if (ret < 0)
+ goto fail;
+
+ bytestream2_init_writer(&p, out->data + old_out_size, 1 + obu_size + header_size + pkt->size);
+ bytestream2_put_byteu(&p, obu_header);
+ bytestream2_put_bufferu(&p, obu, obu_size);
+ bytestream2_put_bufferu(&p, header, header_size);
+ bytestream2_put_bufferu(&p, pkt->data, pkt->size);
+ }
+
+ ret = av_packet_copy_props(out, pkt);
+ if (ret < 0)
+ goto fail;
+ out->stream_index = c->out_index;
+
+ ret = 0;
+fail:
+ av_packet_free(&pkt);
+ if (ret < 0)
+ av_packet_free(&out);
+ return ret;
+}
+
+static int iamf_frame_merge_init(AVBSFContext *ctx)
+{
+ IAMFMergeContext *const c = ctx->priv_data;
+
+ if (!c->index_mapping) {
+ av_log(ctx, AV_LOG_ERROR, "Empty index map\n");
+ return AVERROR(EINVAL);
+ }
+
+ c->fifo = av_fifo_alloc2(av_dict_count(c->index_mapping), sizeof(AVPacket*), 0);
+ if (!c->fifo)
+ return AVERROR(ENOMEM);
+
+ return 0;
+}
+
+static void iamf_frame_merge_flush(AVBSFContext *ctx)
+{
+ IAMFMergeContext *const c = ctx->priv_data;
+
+ while (av_fifo_can_read(c->fifo)) {
+ AVPacket *pkt;
+ av_fifo_read(c->fifo, &pkt, 1);
+ av_packet_free(&pkt);
+ }
+ av_fifo_reset2(c->fifo);
+}
+
+static void iamf_frame_merge_close(AVBSFContext *ctx)
+{
+ IAMFMergeContext *const c = ctx->priv_data;
+
+ if (c->fifo)
+ iamf_frame_merge_flush(ctx);
+ av_fifo_freep2(&c->fifo);
+}
+
+#define OFFSET(x) offsetof(IAMFMergeContext, x)
+#define FLAGS (AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_BSF_PARAM)
+static const AVOption iamf_frame_merge_options[] = {
+ { "index_mapping", "a :-separated list of stream_index=audio_substream_id entries "
+ "to set stream id in output Audio Frame OBUs",
+ OFFSET(index_mapping), AV_OPT_TYPE_DICT, { .str = NULL }, 0, 0, FLAGS },
+ { "out_index", "Stream index to set in output packets",
+ OFFSET(out_index), AV_OPT_TYPE_INT, { 0 }, 0, INT_MAX, FLAGS },
+ { NULL }
+};
+
+static const AVClass iamf_frame_merge_class = {
+ .class_name = "iamf_frame_merge_bsf",
+ .item_name = av_default_item_name,
+ .option = iamf_frame_merge_options,
+ .version = LIBAVUTIL_VERSION_INT,
+};
+
+static const enum AVCodecID iamf_frame_merge_codec_ids[] = {
+ AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S16BE,
+ AV_CODEC_ID_PCM_S24LE, AV_CODEC_ID_PCM_S24BE,
+ AV_CODEC_ID_PCM_S32LE, AV_CODEC_ID_PCM_S32BE,
+ AV_CODEC_ID_OPUS, AV_CODEC_ID_AAC,
+ AV_CODEC_ID_FLAC, AV_CODEC_ID_NONE,
+};
+
+const FFBitStreamFilter ff_iamf_frame_merge_bsf = {
+ .p.name = "iamf_frame_merge",
+ .p.codec_ids = iamf_frame_merge_codec_ids,
+ .p.priv_class = &iamf_frame_merge_class,
+ .priv_data_size = sizeof(IAMFMergeContext),
+ .init = iamf_frame_merge_init,
+ .flush = iamf_frame_merge_flush,
+ .close = iamf_frame_merge_close,
+ .filter = iamf_frame_merge_filter,
+};
diff --git a/libavcodec/leb.h b/libavcodec/leb.h
index 5159c434b1..3f00b2988d 100644
--- a/libavcodec/leb.h
+++ b/libavcodec/leb.h
@@ -25,6 +25,7 @@
#define AVCODEC_LEB_H
#include "get_bits.h"
+#include "put_bits.h"
/**
* Read a unsigned integer coded as a variable number of up to eight
@@ -67,4 +68,25 @@ static inline int64_t get_leb128(GetBitContext *gb) {
return ret;
}
+/**
+ * Write a unsigned integer coded as a variable number of up to eight
+ * little-endian bytes, where the MSB in a byte signals another byte
+ * is coded.
+ */
+static inline void put_leb(PutBitContext *s, unsigned value)
+{
+ int len;
+ uint8_t byte;
+
+ len = (av_log2(value) + 7) / 7;
+
+ for (int i = 0; i < len; i++) {
+ byte = value >> (7 * i) & 0x7f;
+ if (i < len - 1)
+ byte |= 0x80;
+
+ put_bits_no_assert(s, 8, byte);
+ }
+}
+
#endif /* AVCODEC_LEB_H */
--
2.43.0
More information about the ffmpeg-devel
mailing list