[FFmpeg-devel] [PATCH 6/7 v3] avformat/movenc: add support for Immersive Audio Model and Formats in ISOBMFF
James Almer
jamrial at gmail.com
Tue Feb 6 15:05:20 EET 2024
Signed-off-by: James Almer <jamrial at gmail.com>
---
No changes since last version
configure | 2 +-
libavformat/Makefile | 2 +-
libavformat/movenc.c | 336 +++++++++++++++++++++++++++++++++++--------
libavformat/movenc.h | 5 +
4 files changed, 281 insertions(+), 64 deletions(-)
diff --git a/configure b/configure
index 567a39b242..552e74acf8 100755
--- a/configure
+++ b/configure
@@ -3550,7 +3550,7 @@ mlp_demuxer_select="mlp_parser"
mmf_muxer_select="riffenc"
mov_demuxer_select="iso_media riffdec iamf_frame_split_bsf"
mov_demuxer_suggest="zlib"
-mov_muxer_select="iso_media riffenc rtpenc_chain vp9_superframe_bsf aac_adtstoasc_bsf ac3_parser"
+mov_muxer_select="iso_media riffenc rtpenc_chain vp9_superframe_bsf aac_adtstoasc_bsf iamf_frame_merge_bsf ac3_parser"
mp3_demuxer_select="mpegaudio_parser"
mp3_muxer_select="mpegaudioheader"
mp4_muxer_select="mov_muxer"
diff --git a/libavformat/Makefile b/libavformat/Makefile
index ab264644c6..b5996b08ce 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -369,7 +369,7 @@ OBJS-$(CONFIG_MOV_DEMUXER) += mov.o mov_chan.o mov_esds.o \
OBJS-$(CONFIG_MOV_MUXER) += movenc.o av1.o avc.o hevc.o vvc.o vpcc.o \
movenchint.o mov_chan.o rtp.o \
movenccenc.o movenc_ttml.o rawutils.o \
- dovi_isom.o evc.o
+ dovi_isom.o evc.o iamf_writer.o
OBJS-$(CONFIG_MP2_MUXER) += rawenc.o
OBJS-$(CONFIG_MP3_DEMUXER) += mp3dec.o replaygain.o
OBJS-$(CONFIG_MP3_MUXER) += mp3enc.o rawenc.o id3v2enc.o
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 40473fdf56..676fba11ee 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -32,6 +32,7 @@
#include "dovi_isom.h"
#include "riff.h"
#include "avio.h"
+#include "iamf_writer.h"
#include "isom.h"
#include "av1.h"
#include "avc.h"
@@ -41,12 +42,14 @@
#include "libavcodec/flac.h"
#include "libavcodec/get_bits.h"
+#include "libavcodec/bsf.h"
#include "libavcodec/internal.h"
#include "libavcodec/put_bits.h"
#include "libavcodec/vc1_common.h"
#include "libavcodec/raw.h"
#include "internal.h"
#include "libavutil/avstring.h"
+#include "libavutil/bprint.h"
#include "libavutil/channel_layout.h"
#include "libavutil/csp.h"
#include "libavutil/intfloat.h"
@@ -316,6 +319,32 @@ static int mov_write_sdtp_tag(AVIOContext *pb, MOVTrack *track)
return update_size(pb, pos);
}
+static int mov_write_iacb_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *track)
+{
+ AVIOContext *dyn_bc;
+ int64_t pos = avio_tell(pb);
+ uint8_t *dyn_buf = NULL;
+ int dyn_size;
+ int ret = avio_open_dyn_buf(&dyn_bc);
+ if (ret < 0)
+ return ret;
+
+ avio_wb32(pb, 0);
+ ffio_wfourcc(pb, "iacb");
+ avio_w8(pb, 1); // configurationVersion
+
+ ret = ff_iamf_write_descriptors(track->iamf, dyn_bc, s);
+ if (ret < 0)
+ return ret;
+
+ dyn_size = avio_close_dyn_buf(dyn_bc, &dyn_buf);
+ ffio_write_leb(pb, dyn_size);
+ avio_write(pb, dyn_buf, dyn_size);
+ av_free(dyn_buf);
+
+ return update_size(pb, pos);
+}
+
static int mov_write_amr_tag(AVIOContext *pb, MOVTrack *track)
{
avio_wb32(pb, 0x11); /* size */
@@ -1358,6 +1387,8 @@ static int mov_write_audio_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
ret = mov_write_wave_tag(s, pb, track);
else if (track->tag == MKTAG('m','p','4','a'))
ret = mov_write_esds_tag(pb, track);
+ else if (track->tag == MKTAG('i','a','m','f'))
+ ret = mov_write_iacb_tag(mov->fc, pb, track);
else if (track->par->codec_id == AV_CODEC_ID_AMR_NB)
ret = mov_write_amr_tag(pb, track);
else if (track->par->codec_id == AV_CODEC_ID_AC3)
@@ -2529,7 +2560,7 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
if (track->mode == MODE_AVIF) {
mov_write_ccst_tag(pb);
- if (s->nb_streams > 0 && track == &mov->tracks[1])
+ if (mov->nb_streams > 0 && track == &mov->tracks[1])
mov_write_aux_tag(pb, "auxi");
}
@@ -3124,9 +3155,9 @@ static int mov_write_iloc_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatConte
avio_wb32(pb, 0); /* Version & flags */
avio_w8(pb, (4 << 4) + 4); /* offset_size(4) and length_size(4) */
avio_w8(pb, 0); /* base_offset_size(4) and reserved(4) */
- avio_wb16(pb, s->nb_streams); /* item_count */
+ avio_wb16(pb, mov->nb_streams); /* item_count */
- for (int i = 0; i < s->nb_streams; i++) {
+ for (int i = 0; i < mov->nb_streams; i++) {
avio_wb16(pb, i + 1); /* item_id */
avio_wb16(pb, 0); /* data_reference_index */
avio_wb16(pb, 1); /* extent_count */
@@ -3145,9 +3176,9 @@ static int mov_write_iinf_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatConte
avio_wb32(pb, 0); /* size */
ffio_wfourcc(pb, "iinf");
avio_wb32(pb, 0); /* Version & flags */
- avio_wb16(pb, s->nb_streams); /* entry_count */
+ avio_wb16(pb, mov->nb_streams); /* entry_count */
- for (int i = 0; i < s->nb_streams; i++) {
+ for (int i = 0; i < mov->nb_streams; i++) {
int64_t infe_pos = avio_tell(pb);
avio_wb32(pb, 0); /* size */
ffio_wfourcc(pb, "infe");
@@ -3216,7 +3247,7 @@ static int mov_write_ipco_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatConte
int64_t pos = avio_tell(pb);
avio_wb32(pb, 0); /* size */
ffio_wfourcc(pb, "ipco");
- for (int i = 0; i < s->nb_streams; i++) {
+ for (int i = 0; i < mov->nb_streams; i++) {
mov_write_ispe_tag(pb, mov, s, i);
mov_write_pixi_tag(pb, mov, s, i);
mov_write_av1c_tag(pb, &mov->tracks[i]);
@@ -3234,9 +3265,9 @@ static int mov_write_ipma_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatConte
avio_wb32(pb, 0); /* size */
ffio_wfourcc(pb, "ipma");
avio_wb32(pb, 0); /* Version & flags */
- avio_wb32(pb, s->nb_streams); /* entry_count */
+ avio_wb32(pb, mov->nb_streams); /* entry_count */
- for (int i = 0, index = 1; i < s->nb_streams; i++) {
+ for (int i = 0, index = 1; i < mov->nb_streams; i++) {
avio_wb16(pb, i + 1); /* item_ID */
avio_w8(pb, 4); /* association_count */
@@ -4213,7 +4244,7 @@ static int mov_write_covr(AVIOContext *pb, AVFormatContext *s)
int64_t pos = 0;
int i;
- for (i = 0; i < s->nb_streams; i++) {
+ for (i = 0; i < mov->nb_streams; i++) {
MOVTrack *trk = &mov->tracks[i];
if (!is_cover_image(trk->st) || trk->cover_image->size <= 0)
@@ -4360,7 +4391,7 @@ static int mov_write_meta_tag(AVIOContext *pb, MOVMuxContext *mov,
mov_write_pitm_tag(pb, 1);
mov_write_iloc_tag(pb, mov, s);
mov_write_iinf_tag(pb, mov, s);
- if (s->nb_streams > 1)
+ if (mov->nb_streams > 1)
mov_write_iref_tag(pb, mov, s);
mov_write_iprp_tag(pb, mov, s);
} else {
@@ -4611,16 +4642,17 @@ static int mov_setup_track_ids(MOVMuxContext *mov, AVFormatContext *s)
if (mov->use_stream_ids_as_track_ids) {
int next_generated_track_id = 0;
- for (i = 0; i < s->nb_streams; i++) {
- if (s->streams[i]->id > next_generated_track_id)
- next_generated_track_id = s->streams[i]->id;
+ for (i = 0; i < mov->nb_streams; i++) {
+ AVStream *st = mov->tracks[i].st;
+ if (st->id > next_generated_track_id)
+ next_generated_track_id = st->id;
}
for (i = 0; i < mov->nb_tracks; i++) {
if (mov->tracks[i].entry <= 0 && !(mov->flags & FF_MOV_FLAG_FRAGMENT))
continue;
- mov->tracks[i].track_id = i >= s->nb_streams ? ++next_generated_track_id : s->streams[i]->id;
+ mov->tracks[i].track_id = i >= mov->nb_streams ? ++next_generated_track_id : mov->tracks[i].st->id;
}
} else {
for (i = 0; i < mov->nb_tracks; i++) {
@@ -4657,7 +4689,7 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
}
if (mov->chapter_track)
- for (i = 0; i < s->nb_streams; i++) {
+ for (i = 0; i < mov->nb_streams; i++) {
mov->tracks[i].tref_tag = MKTAG('c','h','a','p');
mov->tracks[i].tref_id = mov->tracks[mov->chapter_track].track_id;
}
@@ -4697,7 +4729,7 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
for (i = 0; i < mov->nb_tracks; i++) {
if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT ||
mov->mode == MODE_AVIF) {
- int ret = mov_write_trak_tag(s, pb, mov, &(mov->tracks[i]), i < s->nb_streams ? s->streams[i] : NULL);
+ int ret = mov_write_trak_tag(s, pb, mov, &(mov->tracks[i]), i < mov->nb_streams ? mov->tracks[i].st : NULL);
if (ret < 0)
return ret;
}
@@ -5489,10 +5521,20 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
MOVMuxContext *mov = s->priv_data;
int64_t pos = avio_tell(pb);
int has_h264 = 0, has_av1 = 0, has_video = 0, has_dolby = 0;
+ int has_iamf = 0;
int i;
- for (i = 0; i < s->nb_streams; i++) {
- AVStream *st = s->streams[i];
+ for (i = 0; i < s->nb_stream_groups; i++) {
+ const AVStreamGroup *stg = s->stream_groups[i];
+
+ if (stg->type == AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT ||
+ stg->type == AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION) {
+ has_iamf = 1;
+ break;
+ }
+ }
+ for (i = 0; i < mov->nb_streams; i++) {
+ AVStream *st = mov->tracks[i].st;
if (is_cover_image(st))
continue;
if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
@@ -5560,6 +5602,8 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
ffio_wfourcc(pb, "av01");
if (has_dolby)
ffio_wfourcc(pb, "dby1");
+ if (has_iamf)
+ ffio_wfourcc(pb, "iamf");
} else {
if (mov->flags & FF_MOV_FLAG_FRAGMENT)
ffio_wfourcc(pb, "iso6");
@@ -5667,8 +5711,8 @@ static int mov_write_identification(AVIOContext *pb, AVFormatContext *s)
mov_write_ftyp_tag(pb,s);
if (mov->mode == MODE_PSP) {
int video_streams_nb = 0, audio_streams_nb = 0, other_streams_nb = 0;
- for (i = 0; i < s->nb_streams; i++) {
- AVStream *st = s->streams[i];
+ for (i = 0; i < mov->nb_streams; i++) {
+ AVStream *st = mov->tracks[i].st;
if (is_cover_image(st))
continue;
if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
@@ -5855,7 +5899,7 @@ static int mov_write_squashed_packets(AVFormatContext *s)
{
MOVMuxContext *mov = s->priv_data;
- for (int i = 0; i < s->nb_streams; i++) {
+ for (int i = 0; i < mov->nb_streams; i++) {
MOVTrack *track = &mov->tracks[i];
int ret = AVERROR_BUG;
@@ -5896,7 +5940,7 @@ static int mov_flush_fragment(AVFormatContext *s, int force)
// of fragments was triggered automatically by an AVPacket, we
// already have reliable info for the end of that track, but other
// tracks may need to be filled in.
- for (i = 0; i < s->nb_streams; i++) {
+ for (i = 0; i < mov->nb_streams; i++) {
MOVTrack *track = &mov->tracks[i];
if (!track->end_reliable) {
const AVPacket *pkt = ff_interleaved_peek(s, i);
@@ -6097,10 +6141,8 @@ static int mov_auto_flush_fragment(AVFormatContext *s, int force)
return ret;
}
-static int check_pkt(AVFormatContext *s, AVPacket *pkt)
+static int check_pkt(AVFormatContext *s, MOVTrack *trk, AVPacket *pkt)
{
- MOVMuxContext *mov = s->priv_data;
- MOVTrack *trk = &mov->tracks[pkt->stream_index];
int64_t ref;
uint64_t duration;
@@ -6138,15 +6180,21 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
{
MOVMuxContext *mov = s->priv_data;
AVIOContext *pb = s->pb;
- MOVTrack *trk = &mov->tracks[pkt->stream_index];
- AVCodecParameters *par = trk->par;
+ MOVTrack *trk;
+ AVCodecParameters *par;
AVProducerReferenceTime *prft;
unsigned int samples_in_chunk = 0;
int size = pkt->size, ret = 0, offset = 0;
size_t prft_size;
uint8_t *reformatted_data = NULL;
- ret = check_pkt(s, pkt);
+ if (pkt->stream_index < s->nb_streams)
+ trk = s->streams[pkt->stream_index]->priv_data;
+ else // Timecode or chapter
+ trk = &mov->tracks[pkt->stream_index];
+ par = trk->par;
+
+ ret = check_pkt(s, trk, pkt);
if (ret < 0)
return ret;
@@ -6236,7 +6284,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
if (par->codec_id == AV_CODEC_ID_AAC && pkt->size > 2 &&
(AV_RB16(pkt->data) & 0xfff0) == 0xfff0) {
- if (!s->streams[pkt->stream_index]->nb_frames) {
+ if (!trk->st->nb_frames) {
av_log(s, AV_LOG_ERROR, "Malformed AAC bitstream detected: "
"use the audio bitstream filter 'aac_adtstoasc' to fix it "
"('-bsf:a aac_adtstoasc' option with ffmpeg)\n");
@@ -6498,18 +6546,18 @@ err:
static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt)
{
MOVMuxContext *mov = s->priv_data;
- MOVTrack *trk = &mov->tracks[pkt->stream_index];
+ MOVTrack *trk = s->streams[pkt->stream_index]->priv_data;
AVCodecParameters *par = trk->par;
int64_t frag_duration = 0;
int size = pkt->size;
- int ret = check_pkt(s, pkt);
+ int ret = check_pkt(s, trk, pkt);
if (ret < 0)
return ret;
if (mov->flags & FF_MOV_FLAG_FRAG_DISCONT) {
int i;
- for (i = 0; i < s->nb_streams; i++)
+ for (i = 0; i < mov->nb_streams; i++)
mov->tracks[i].frag_discont = 1;
mov->flags &= ~FF_MOV_FLAG_FRAG_DISCONT;
}
@@ -6551,7 +6599,7 @@ static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt)
return 0; /* Discard 0 sized packets */
}
- if (trk->entry && pkt->stream_index < s->nb_streams)
+ if (trk->entry && pkt->stream_index < mov->nb_streams)
frag_duration = av_rescale_q(pkt->dts - trk->cluster[0].dts,
s->streams[pkt->stream_index]->time_base,
AV_TIME_BASE_Q);
@@ -6606,17 +6654,45 @@ static int mov_write_subtitle_end_packet(AVFormatContext *s,
return ret;
}
+static int mov_filter_packet(AVFormatContext *s, MOVTrack *track, AVPacket *pkt)
+{
+ int ret;
+
+ if (!track->bsf)
+ return 0;
+
+ ret = av_bsf_send_packet(track->bsf, pkt);
+ if (ret < 0) {
+ av_log(s, AV_LOG_ERROR,
+ "Failed to send packet to filter %s for stream %d: %s\n",
+ track->bsf->filter->name, pkt->stream_index, av_err2str(ret));
+ return ret;
+ }
+
+ return av_bsf_receive_packet(track->bsf, pkt);
+}
+
static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
{
MOVMuxContext *mov = s->priv_data;
MOVTrack *trk;
+ int ret;
if (!pkt) {
mov_flush_fragment(s, 1);
return 1;
}
- trk = &mov->tracks[pkt->stream_index];
+ trk = s->streams[pkt->stream_index]->priv_data;
+
+ ret = mov_filter_packet(s, trk, pkt);
+ if (ret < 0) {
+ if (ret == AVERROR(EAGAIN))
+ return 0;
+ av_log(s, AV_LOG_ERROR, "Error applying bitstream filters to an output "
+ "packet for stream #%d: %s\n", trk->st->index, av_err2str(ret));
+ return ret;
+ }
if (is_cover_image(trk->st)) {
int ret;
@@ -6817,12 +6893,12 @@ static int mov_create_chapter_track(AVFormatContext *s, int tracknum)
}
-static int mov_check_timecode_track(AVFormatContext *s, AVTimecode *tc, int src_index, const char *tcstr)
+static int mov_check_timecode_track(AVFormatContext *s, AVTimecode *tc, AVStream *src_st, const char *tcstr)
{
int ret;
/* compute the frame number */
- ret = av_timecode_init_from_string(tc, s->streams[src_index]->avg_frame_rate, tcstr, s);
+ ret = av_timecode_init_from_string(tc, src_st->avg_frame_rate, tcstr, s);
return ret;
}
@@ -6830,7 +6906,7 @@ static int mov_create_timecode_track(AVFormatContext *s, int index, int src_inde
{
MOVMuxContext *mov = s->priv_data;
MOVTrack *track = &mov->tracks[index];
- AVStream *src_st = s->streams[src_index];
+ AVStream *src_st = mov->tracks[src_index].st;
uint8_t data[4];
AVPacket *pkt = mov->pkt;
AVRational rate = src_st->avg_frame_rate;
@@ -6890,8 +6966,8 @@ static void enable_tracks(AVFormatContext *s)
first[i] = -1;
}
- for (i = 0; i < s->nb_streams; i++) {
- AVStream *st = s->streams[i];
+ for (i = 0; i < mov->nb_streams; i++) {
+ AVStream *st = mov->tracks[i].st;
if (st->codecpar->codec_type <= AVMEDIA_TYPE_UNKNOWN ||
st->codecpar->codec_type >= AVMEDIA_TYPE_NB ||
@@ -6925,6 +7001,9 @@ static void mov_free(AVFormatContext *s)
MOVMuxContext *mov = s->priv_data;
int i;
+ for (i = 0; i < s->nb_streams; i++)
+ s->streams[i]->priv_data = NULL;
+
if (!mov->tracks)
return;
@@ -6955,6 +7034,7 @@ static void mov_free(AVFormatContext *s)
ffio_free_dyn_buf(&track->mdat_buf);
avpriv_packet_list_free(&track->squashed_packet_queue);
+ av_bsf_free(&track->bsf);
}
av_freep(&mov->tracks);
@@ -7027,6 +7107,92 @@ static int mov_create_dvd_sub_decoder_specific_info(MOVTrack *track,
return 0;
}
+static int mov_init_iamf_track(AVFormatContext *s)
+{
+ MOVMuxContext *mov = s->priv_data;
+ MOVTrack *track = &mov->tracks[0]; // IAMF if present is always the first track
+ const AVBitStreamFilter *filter;
+ AVBPrint bprint;
+ AVStream *first_st = NULL;
+ char *args;
+ int nb_audio_elements = 0, nb_mix_presentations = 0;
+ int ret;
+
+ for (int i = 0; i < s->nb_stream_groups; i++) {
+ const AVStreamGroup *stg = s->stream_groups[i];
+
+ if (stg->type == AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT)
+ nb_audio_elements++;
+ if (stg->type == AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION)
+ nb_mix_presentations++;
+ }
+
+ if (!nb_audio_elements && !nb_mix_presentations)
+ return 0;
+
+ if ((nb_audio_elements < 1 && nb_audio_elements > 2) || nb_mix_presentations < 1) {
+ av_log(s, AV_LOG_ERROR, "There must be >= 1 and <= 2 IAMF_AUDIO_ELEMENT and at least "
+ "one IAMF_MIX_PRESENTATION stream groups to write a IMAF track\n");
+ return AVERROR(EINVAL);
+ }
+
+ track->iamf = av_mallocz(sizeof(*track->iamf));
+ if (!track->iamf)
+ return AVERROR(ENOMEM);
+
+ av_bprint_init(&bprint, 0, AV_BPRINT_SIZE_UNLIMITED);
+
+ for (int i = 0; i < s->nb_stream_groups; i++) {
+ const AVStreamGroup *stg = s->stream_groups[i];
+
+ switch(stg->type) {
+ case AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT:
+ if (!first_st)
+ first_st = stg->streams[0];
+
+ for (int j = 0; j < stg->nb_streams; j++) {
+ av_bprintf(&bprint, "%d=%d%s", s->streams[j]->index, s->streams[j]->id,
+ j < (stg->nb_streams - 1) ? ":" : "");
+ s->streams[j]->priv_data = track;
+ }
+
+ ret = ff_iamf_add_audio_element(track->iamf, stg, s);
+ break;
+ case AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION:
+ ret = ff_iamf_add_mix_presentation(track->iamf, stg, s);
+ break;
+ default:
+ av_assert0(0);
+ }
+ if (ret < 0)
+ return ret;
+ }
+
+ av_bprint_finalize(&bprint, &args);
+
+ filter = av_bsf_get_by_name("iamf_frame_merge");
+ if (!filter) {
+ av_log(s, AV_LOG_ERROR, "iamf_frame_merge bitstream filter "
+ "not found. This is a bug, please report it.\n");
+ return AVERROR_BUG;
+ }
+
+ ret = av_bsf_alloc(filter, &track->bsf);
+ if (ret < 0)
+ return ret;
+
+ ret = avcodec_parameters_copy(track->bsf->par_in, first_st->codecpar);
+ if (ret < 0)
+ return ret;
+
+ av_opt_set(track->bsf->priv_data, "index_mapping", args, 0);
+ av_opt_set_int(track->bsf->priv_data, "out_index", first_st->index, 0);
+
+ track->tag = MKTAG('i','a','m','f');
+
+ return av_bsf_init(track->bsf);
+}
+
static int mov_init(AVFormatContext *s)
{
MOVMuxContext *mov = s->priv_data;
@@ -7164,7 +7330,37 @@ static int mov_init(AVFormatContext *s)
s->streams[0]->disposition |= AV_DISPOSITION_DEFAULT;
}
- mov->nb_tracks = s->nb_streams;
+ for (i = 0; i < s->nb_stream_groups; i++) {
+ AVStreamGroup *stg = s->stream_groups[i];
+
+ if (stg->type != AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT)
+ continue;
+
+ for (int j = 0; j < stg->nb_streams; j++) {
+ AVStream *st = stg->streams[j];
+
+ if (st->priv_data) {
+ av_log(s, AV_LOG_ERROR, "Stream %d is present in more than one Stream Group of type "
+ "IAMF Audio Element\n", j);
+ return AVERROR(EINVAL);
+ }
+ st->priv_data = st;
+ }
+
+ if (!mov->nb_tracks) // We support one track for the entire IAMF structure
+ mov->nb_tracks++;
+ }
+
+ for (i = 0; i < s->nb_streams; i++) {
+ AVStream *st = s->streams[i];
+ if (st->priv_data)
+ continue;
+ st->priv_data = st;
+ mov->nb_tracks++;
+ }
+
+ mov->nb_streams = mov->nb_tracks;
+
if (mov->mode & (MODE_MP4|MODE_MOV|MODE_IPOD) && s->nb_chapters)
mov->chapter_track = mov->nb_tracks++;
@@ -7190,7 +7386,7 @@ static int mov_init(AVFormatContext *s)
if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO &&
(t || (t=av_dict_get(st->metadata, "timecode", NULL, 0)))) {
AVTimecode tc;
- ret = mov_check_timecode_track(s, &tc, i, t->value);
+ ret = mov_check_timecode_track(s, &tc, st, t->value);
if (ret >= 0)
mov->nb_meta_tmcd++;
}
@@ -7239,18 +7435,33 @@ static int mov_init(AVFormatContext *s)
}
}
+ ret = mov_init_iamf_track(s);
+ if (ret < 0)
+ return ret;
+
+ for (int j = 0, i = 0; j < s->nb_streams; j++) {
+ AVStream *st = s->streams[j];
+
+ if (st != st->priv_data)
+ continue;
+ st->priv_data = &mov->tracks[i++];
+ }
+
for (i = 0; i < s->nb_streams; i++) {
AVStream *st= s->streams[i];
- MOVTrack *track= &mov->tracks[i];
+ MOVTrack *track = st->priv_data;
AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL,0);
- track->st = st;
- track->par = st->codecpar;
+ if (!track->st) {
+ track->st = st;
+ track->par = st->codecpar;
+ }
track->language = ff_mov_iso639_to_lang(lang?lang->value:"und", mov->mode!=MODE_MOV);
if (track->language < 0)
track->language = 32767; // Unspecified Macintosh language code
track->mode = mov->mode;
- track->tag = mov_find_codec_tag(s, track);
+ if (!track->tag)
+ track->tag = mov_find_codec_tag(s, track);
if (!track->tag) {
av_log(s, AV_LOG_ERROR, "Could not find tag for codec %s in stream #%d, "
"codec not currently supported in container\n",
@@ -7442,25 +7653,26 @@ static int mov_write_header(AVFormatContext *s)
{
AVIOContext *pb = s->pb;
MOVMuxContext *mov = s->priv_data;
- int i, ret, hint_track = 0, tmcd_track = 0, nb_tracks = s->nb_streams;
+ int i, ret, hint_track = 0, tmcd_track = 0, nb_tracks = mov->nb_streams;
if (mov->mode & (MODE_MP4|MODE_MOV|MODE_IPOD) && s->nb_chapters)
nb_tracks++;
if (mov->flags & FF_MOV_FLAG_RTP_HINT) {
hint_track = nb_tracks;
- for (i = 0; i < s->nb_streams; i++)
- if (rtp_hinting_needed(s->streams[i]))
+ for (i = 0; i < mov->nb_streams; i++) {
+ if (rtp_hinting_needed(mov->tracks[i].st))
nb_tracks++;
+ }
}
if (mov->nb_meta_tmcd)
tmcd_track = nb_tracks;
- for (i = 0; i < s->nb_streams; i++) {
+ for (i = 0; i < mov->nb_streams; i++) {
int j;
- AVStream *st= s->streams[i];
- MOVTrack *track= &mov->tracks[i];
+ MOVTrack *track = &mov->tracks[i];
+ AVStream *st = track->st;
/* copy extradata if it exists */
if (st->codecpar->extradata_size) {
@@ -7482,8 +7694,8 @@ static int mov_write_header(AVFormatContext *s)
&(AVChannelLayout)AV_CHANNEL_LAYOUT_MONO))
continue;
- for (j = 0; j < s->nb_streams; j++) {
- AVStream *stj= s->streams[j];
+ for (j = 0; j < mov->nb_streams; j++) {
+ AVStream *stj= mov->tracks[j].st;
MOVTrack *trackj= &mov->tracks[j];
if (j == i)
continue;
@@ -7546,8 +7758,8 @@ static int mov_write_header(AVFormatContext *s)
return ret;
if (mov->flags & FF_MOV_FLAG_RTP_HINT) {
- for (i = 0; i < s->nb_streams; i++) {
- if (rtp_hinting_needed(s->streams[i])) {
+ for (i = 0; i < mov->nb_streams; i++) {
+ if (rtp_hinting_needed(mov->tracks[i].st)) {
if ((ret = ff_mov_init_hinting(s, hint_track, i)) < 0)
return ret;
hint_track++;
@@ -7559,8 +7771,8 @@ static int mov_write_header(AVFormatContext *s)
const AVDictionaryEntry *t, *global_tcr = av_dict_get(s->metadata,
"timecode", NULL, 0);
/* Initialize the tmcd tracks */
- for (i = 0; i < s->nb_streams; i++) {
- AVStream *st = s->streams[i];
+ for (i = 0; i < mov->nb_streams; i++) {
+ AVStream *st = mov->tracks[i].st;
t = global_tcr;
if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
@@ -7569,7 +7781,7 @@ static int mov_write_header(AVFormatContext *s)
t = av_dict_get(st->metadata, "timecode", NULL, 0);
if (!t)
continue;
- if (mov_check_timecode_track(s, &tc, i, t->value) < 0)
+ if (mov_check_timecode_track(s, &tc, st, t->value) < 0)
continue;
if ((ret = mov_create_timecode_track(s, tmcd_track, i, tc)) < 0)
return ret;
@@ -7690,7 +7902,7 @@ static int mov_write_trailer(AVFormatContext *s)
int64_t moov_pos;
if (mov->need_rewrite_extradata) {
- for (i = 0; i < s->nb_streams; i++) {
+ for (i = 0; i < mov->nb_streams; i++) {
MOVTrack *track = &mov->tracks[i];
AVCodecParameters *par = track->par;
@@ -7830,7 +8042,7 @@ static int avif_write_trailer(AVFormatContext *s)
if (mov->moov_written) return 0;
mov->is_animated_avif = s->streams[0]->nb_frames > 1;
- if (mov->is_animated_avif && s->nb_streams > 1) {
+ if (mov->is_animated_avif && mov->nb_streams > 1) {
// For animated avif with alpha channel, we need to write a tref tag
// with type "auxl".
mov->tracks[1].tref_tag = MKTAG('a', 'u', 'x', 'l');
@@ -7840,7 +8052,7 @@ static int avif_write_trailer(AVFormatContext *s)
mov_write_meta_tag(pb, mov, s);
moov_size = get_moov_size(s);
- for (i = 0; i < s->nb_streams; i++)
+ for (i = 0; i < mov->nb_tracks; i++)
mov->tracks[i].data_offset = avio_tell(pb) + moov_size + 8;
if (mov->is_animated_avif) {
@@ -7862,7 +8074,7 @@ static int avif_write_trailer(AVFormatContext *s)
// write extent offsets.
pos_backup = avio_tell(pb);
- for (i = 0; i < s->nb_streams; i++) {
+ for (i = 0; i < mov->nb_streams; i++) {
if (extent_offsets[i] != (uint32_t)extent_offsets[i]) {
av_log(s, AV_LOG_ERROR, "extent offset does not fit in 32 bits\n");
return AVERROR_INVALIDDATA;
diff --git a/libavformat/movenc.h b/libavformat/movenc.h
index 60363198c9..2038ce9176 100644
--- a/libavformat/movenc.h
+++ b/libavformat/movenc.h
@@ -170,6 +170,10 @@ typedef struct MOVTrack {
unsigned int squash_fragment_samples_to_one; //< flag to note formats where all samples for a fragment are to be squashed
PacketList squashed_packet_queue;
+
+ struct AVBSFContext *bsf;
+
+ struct IAMFContext *iamf;
} MOVTrack;
typedef enum {
@@ -188,6 +192,7 @@ typedef struct MOVMuxContext {
const AVClass *av_class;
int mode;
int64_t time;
+ int nb_streams;
int nb_tracks;
int nb_meta_tmcd; ///< number of new created tmcd track based on metadata (aka not data copy)
int chapter_track; ///< qt chapter track number
--
2.43.0
More information about the ffmpeg-devel
mailing list