[FFmpeg-devel] [PATCH v5 1/2] avfilter: add audio overlay filter
Stefano Sabatini
stefasab at gmail.com
Mon Feb 5 01:41:36 EET 2024
On date Thursday 2024-02-01 18:31:56 +0530, Harshit Karwal wrote:
> Co-authored-by: Paul B Mahol <onemda at gmail.com>
> Signed-off-by: Harshit Karwal <karwalharshit at gmail.com>
> ---
> doc/filters.texi | 40 +++
> libavfilter/Makefile | 1 +
> libavfilter/af_aoverlay.c | 548 ++++++++++++++++++++++++++++++++++++++
> libavfilter/allfilters.c | 1 +
> 4 files changed, 590 insertions(+)
> create mode 100644 libavfilter/af_aoverlay.c
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 20c91bab3a..f36ad9a2fd 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -2779,6 +2779,46 @@ This filter supports the same commands as options, excluding option @code{order}
>
> Pass the audio source unchanged to the output.
>
> + at section aoverlay
> +
> +Replace a specified section of an audio stream with another input audio stream.
> +
> +In case no @option{enable} for timeline editing is specified, the second audio stream will
> +be output at sections of the first stream which have a gap in PTS (Presentation TimeStamp) values
> +such that the output stream's PTS values are monotonous.
> +
> +This filter also supports linear cross fading when transitioning from one
> +input stream to another.
> +
> +The filter accepts the following options:
> +
> + at table @option
> + at item cf_duration
> +Set duration (in seconds) for cross fade between the inputs. Default value is @code{100} milliseconds.
> + at end table
> +
> + at subsection Examples
> +
> + at itemize
> + at item
> +Replace the first stream with the second stream from @code{t=10} seconds to @code{t=20} seconds:
> + at example
> +ffmpeg -i first.wav -i second.wav -filter_complex "aoverlay=enable='between(t,10,20)'" output.wav
> + at end example
> +
> + at item
> +Do the same as above, but with crossfading for @code{2} seconds between the streams:
> + at example
> +ffmpeg -i first.wav -i second.wav -filter_complex "aoverlay=cf_duration=2:enable='between(t,10,20)'" output.wav
> + at end example
> +
> + at item
> +Introduce a PTS gap from @code{t=4} seconds to @code{t=8} seconds in the first stream and output the second stream during this gap:
> + at example
> +ffmpeg -i first.wav -i second.wav -filter_complex "[0]aselect='not(between(t,4,8))'[temp];[temp][1]aoverlay[out]" -map "[out]" output.wav
> + at end example
> + at end itemize
> +
> @section apad
>
> Pad the end of an audio stream with silence.
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index bba0219876..0f2b403441 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -81,6 +81,7 @@ OBJS-$(CONFIG_ANLMDN_FILTER) += af_anlmdn.o
> OBJS-$(CONFIG_ANLMF_FILTER) += af_anlms.o
> OBJS-$(CONFIG_ANLMS_FILTER) += af_anlms.o
> OBJS-$(CONFIG_ANULL_FILTER) += af_anull.o
> +OBJS-$(CONFIG_AOVERLAY_FILTER) += af_aoverlay.o
> OBJS-$(CONFIG_APAD_FILTER) += af_apad.o
> OBJS-$(CONFIG_APERMS_FILTER) += f_perms.o
> OBJS-$(CONFIG_APHASER_FILTER) += af_aphaser.o generate_wave_table.o
> diff --git a/libavfilter/af_aoverlay.c b/libavfilter/af_aoverlay.c
> new file mode 100644
> index 0000000000..8dd2d02951
> --- /dev/null
> +++ b/libavfilter/af_aoverlay.c
> @@ -0,0 +1,548 @@
> +/*
> + * Copyright (c) 2023 Harshit Karwal
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavutil/opt.h"
> +#include "libavutil/audio_fifo.h"
> +
> +#include "audio.h"
> +#include "avfilter.h"
> +#include "filters.h"
> +#include "internal.h"
> +
> +typedef struct AOverlayContext {
> + const AVClass *class;
> + AVFrame *main_input;
> + AVFrame *overlay_input;
> + int64_t pts;
> + int main_eof;
> + int overlay_eof;
> +
> + int default_mode;
> + int previous_samples;
> + int64_t pts_gap;
> + int64_t previous_pts;
> + int64_t pts_gap_start;
> + int64_t pts_gap_end;
> +
> + int is_disabled;
> + int nb_channels;
> + int crossfade_ready;
> + AVAudioFifo *main_sample_buffers;
> + AVAudioFifo *overlay_sample_buffers;
> + int64_t cf_duration;
> + int64_t cf_samples;
> + void (*crossfade_samples)(uint8_t **dst, uint8_t * const *cf0,
> + uint8_t * const *cf1,
> + int nb_samples, int channels);
> +
> + int64_t transition_pts;
> + int64_t transition_pts2;
> +
> + uint8_t **cf0;
> + uint8_t **cf1;
> +} AOverlayContext;
> +
> +static const enum AVSampleFormat sample_fmts[] = {
> + AV_SAMPLE_FMT_DBLP, AV_SAMPLE_FMT_FLTP,
> + AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S32P,
> + AV_SAMPLE_FMT_NONE
> +};
> +
> +enum CrossfadeModes {
> + MODE_TIMELINE,
> + MODE_DEFAULT,
> + MODE_OVERLAY_EOF
> +};
> +
> +#define SEGMENT_SIZE 1024
> +#define OFFSET(x) offsetof(AOverlayContext, x)
> +#define FLAGS AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
> +
> +static const AVOption aoverlay_options[] = {
> + { "cf_duration", "set duration for cross fade between the inputs", OFFSET(cf_duration), AV_OPT_TYPE_DURATION, {.i64 = 100000}, 0, 60000000, FLAGS },
> + { NULL }
> +};
> +
> +AVFILTER_DEFINE_CLASS(aoverlay);
> +
> +#define CROSSFADE_PLANAR(name, type) \
> +static void crossfade_samples_## name ##p(uint8_t **dst, uint8_t * const *cf0, \
> + uint8_t * const *cf1, \
> + int nb_samples, int channels) \
> +{ \
> + for (int i = 0; i < nb_samples; i++) { \
> + double main_gain = av_clipd(1.0 * (nb_samples - 1 - i) / nb_samples, 0, 1.); \
> + double overlay_gain = av_clipd(1.0 * i / nb_samples, 0, 1.); \
> + for (int c = 0; c < channels; c++) { \
> + type *d = (type *)dst[c]; \
> + const type *s0 = (type *)cf0[c]; \
> + const type *s1 = (type *)cf1[c]; \
> + \
> + d[i] = s0[i] * main_gain + s1[i] * overlay_gain; \
> + } \
> + } \
> +}
> +
> +CROSSFADE_PLANAR(dbl, double)
> +CROSSFADE_PLANAR(flt, float)
> +CROSSFADE_PLANAR(s16, int16_t)
> +CROSSFADE_PLANAR(s32, int32_t)
> +
> +static av_cold int init(AVFilterContext *ctx)
> +{
> + AOverlayContext *s = ctx->priv;
> +
> + s->is_disabled = 1;
> + s->transition_pts = AV_NOPTS_VALUE;
> + s->transition_pts2 = AV_NOPTS_VALUE;
> +
> + return 0;
> +}
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> + AOverlayContext *s = ctx->priv;
> +
> + av_audio_fifo_free(s->main_sample_buffers);
> + av_audio_fifo_free(s->overlay_sample_buffers);
> +
> + for (int i = 0; i < s->nb_channels; i++) {
> + if (s->cf0)
> + av_freep(&s->cf0[i]);
> + if (s->cf1)
> + av_freep(&s->cf1[i]);
> + }
> + av_freep(&s->cf0);
> + av_freep(&s->cf1);
> +
> + av_frame_free(&s->main_input);
> + av_frame_free(&s->overlay_input);
> +}
> +
> +static int crossfade_prepare(AOverlayContext *s, AVFilterLink *main_inlink, AVFilterLink *overlay_inlink, AVFilterLink *outlink,
> + int nb_samples, AVFrame **main_buffer, AVFrame **overlay_buffer, enum CrossfadeModes mode)
> +{
> + int ret;
> +
> + *main_buffer = ff_get_audio_buffer(outlink, nb_samples);
> + if (!(*main_buffer))
> + return AVERROR(ENOMEM);
> +
> + (*main_buffer)->pts = s->pts;
> + s->pts += av_rescale_q(nb_samples, (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
> +
> + if ((ret = av_audio_fifo_read(s->main_sample_buffers, (void **)(*main_buffer)->extended_data, nb_samples)) < 0)
> + return ret;
> +
> + if (mode == MODE_DEFAULT) {
> + s->previous_samples = (*main_buffer)->nb_samples;
> + } else if (mode == MODE_OVERLAY_EOF || (mode == MODE_TIMELINE && s->is_disabled)) {
> + *overlay_buffer = ff_get_audio_buffer(outlink, nb_samples);
> + if (!(*overlay_buffer))
> + return AVERROR(ENOMEM);
> +
> + if ((ret = av_audio_fifo_read(s->overlay_sample_buffers, (void **)(*overlay_buffer)->extended_data, nb_samples)) < 0)
> + return ret;
> +
> + (*overlay_buffer)->pts = (*main_buffer)->pts;
> + }
> +
> + s->crossfade_ready = 1;
> +
> + return 0;
> +}
> +
> +static int crossfade_samples(AOverlayContext *s, AVFilterLink *main_inlink, AVFilterLink *overlay_inlink, AVFilterLink *outlink,
> + int nb_samples, AVFrame **out, enum CrossfadeModes mode)
> +{
> + int ret;
> +
> + *out = ff_get_audio_buffer(outlink, nb_samples);
> + if (!(*out))
> + return AVERROR(ENOMEM);
> +
> + if ((ret = av_audio_fifo_read(s->main_sample_buffers, (void **) s->cf0, nb_samples)) < 0)
> + return ret;
> +
> + if ((ret = av_audio_fifo_read(s->overlay_sample_buffers, (void **) s->cf1, nb_samples)) < 0)
> + return ret;
> +
> + if (mode == MODE_TIMELINE) {
> + s->is_disabled ? s->crossfade_samples((*out)->extended_data, s->cf1, s->cf0, nb_samples, (*out)->ch_layout.nb_channels)
> + : s->crossfade_samples((*out)->extended_data, s->cf0, s->cf1, nb_samples, (*out)->ch_layout.nb_channels);
> + } else if (mode == MODE_OVERLAY_EOF) {
> + s->crossfade_samples((*out)->extended_data, s->cf1, s->cf0, s->cf_samples, (*out)->ch_layout.nb_channels);
> + } else if (mode == MODE_DEFAULT) {
> + s->transition_pts2 != AV_NOPTS_VALUE ? s->crossfade_samples((*out)->extended_data, s->cf1, s->cf0, nb_samples, (*out)->ch_layout.nb_channels)
> + : s->crossfade_samples((*out)->extended_data, s->cf0, s->cf1, nb_samples, (*out)->ch_layout.nb_channels);
> + }
here you could split the code performing the check and the one running
the crossfade (at the end you have only two options):
int switch = (mode == MODE_TIMELINE && s->is_disabled) ||
mode == MODE_OVERLAY_EOF ||
(mode == MODE_DEFAULT && s->transition_pts2 != AV_NOPTS_VALUE);
s->crossfade_samples((*out)->extended_data,
switch ? c->cf1 : c->cf0,
switch ? c->cf0 : c->cf1,
nb_samples, (*out)->ch_layout.nb_channels);
> +
> + (*out)->pts = s->pts;
> + s->pts += av_rescale_q(nb_samples, (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
> + s->transition_pts = AV_NOPTS_VALUE;
> + s->transition_pts2 = AV_NOPTS_VALUE;
> + s->crossfade_ready = 0;
> +
> + return 0;
> +}
> +
> +static int consume_samples(AOverlayContext *s, AVFilterLink *overlay_inlink, AVFilterLink *outlink)
> +{
> + int ret, status, nb_samples;
> + int64_t pts;
> +
> + nb_samples = FFMIN(SEGMENT_SIZE, av_audio_fifo_space(s->overlay_sample_buffers));
> +
> + ret = ff_inlink_consume_samples(overlay_inlink, nb_samples, nb_samples, &s->overlay_input);
> + if (ret < 0) {
> + return ret;
> + } else if (ff_inlink_acknowledge_status(overlay_inlink, &status, &pts)) {
> + s->overlay_eof = 1;
> + return 0;
> + } else if (!ret) {
> + if (ff_outlink_frame_wanted(outlink))
> + ff_inlink_request_frame(overlay_inlink);
> + return 0;
> + }
> +
> + ret = av_audio_fifo_write(s->overlay_sample_buffers, (void **)s->overlay_input->extended_data, nb_samples);
> + av_frame_free(&s->overlay_input);
> + if (ret < 0)
> + return ret;
> +
> + return 1;
> +}
> +
> +static int activate(AVFilterContext *ctx)
> +{
> + AOverlayContext *s = ctx->priv;
> + int status, ret, nb_samples;
> + int64_t pts;
> + AVFrame *out = NULL, *main_buffer = NULL, *overlay_buffer = NULL;
> +
> + AVFilterLink *main_inlink = ctx->inputs[0];
> + AVFilterLink *overlay_inlink = ctx->inputs[1];
> + AVFilterLink *outlink = ctx->outputs[0];
> +
> + FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, ctx);
> +
> + if (s->default_mode && (s->pts_gap_end - s->pts_gap_start <= 0 || s->overlay_eof)) {
> + s->default_mode = 0;
> + s->transition_pts2 = s->pts_gap_end;
> + }
> +
> + if (av_audio_fifo_space(s->main_sample_buffers) != 0 && !s->main_eof && !s->default_mode) {
> + nb_samples = FFMIN(SEGMENT_SIZE, av_audio_fifo_space(s->main_sample_buffers));
> +
> + ret = ff_inlink_consume_samples(main_inlink, nb_samples, nb_samples, &s->main_input);
> + if (ret > 0) {
> + if (ctx->enable_str && s->is_disabled != ctx->is_disabled && !s->overlay_eof) {
> + s->is_disabled = ctx->is_disabled;
> + s->transition_pts = s->main_input->pts;
> +
> + if (s->main_input->nb_samples < av_audio_fifo_space(s->main_sample_buffers))
> + s->crossfade_ready = 1;
> + if (av_audio_fifo_size(s->main_sample_buffers) == 0) {
> + s->transition_pts = AV_NOPTS_VALUE;
> + s->crossfade_ready = 0;
> + }
> + }
> + else if (!ctx->enable_str && !s->default_mode) {
> + if (s->previous_pts + av_rescale_q(s->previous_samples, (AVRational){ 1, outlink->sample_rate }, outlink->time_base) >= s->main_input->pts) {
> + s->default_mode = 0;
> + s->previous_pts = s->main_input->pts;
> + s->previous_samples = s->main_input->nb_samples;
> + } else if (!s->overlay_eof) {
> + s->pts_gap_start = s->previous_pts;
> + if (s->pts > 0 || av_audio_fifo_size(s->main_sample_buffers) > 0)
> + s->transition_pts = s->pts_gap_start;
> + s->pts_gap_end = s->main_input->pts;
> + s->default_mode = 1;
> + }
> + }
> +
> + ret = av_audio_fifo_write(s->main_sample_buffers, (void **)s->main_input->extended_data, nb_samples);
> + av_frame_free(&s->main_input);
> + if (ret < 0)
> + return ret;
> + } else if (ret < 0) {
> + return ret;
> + } else if (ff_inlink_acknowledge_status(main_inlink, &status, &pts)) {
> + s->main_eof = 1;
> + s->crossfade_ready = 1;
> + } else if (!ret) {
> + if (ff_outlink_frame_wanted(outlink))
> + ff_inlink_request_frame(main_inlink);
> + return 0;
> + }
> + }
> +
> + if (s->main_eof && av_audio_fifo_size(s->main_sample_buffers) == 0 && ff_inlink_acknowledge_status(main_inlink, &status, &pts)) {
> + ff_outlink_set_status(outlink, status, pts);
> + return 0;
> + }
> +
> + if (av_audio_fifo_space(s->main_sample_buffers) > 0 &&
> + (s->transition_pts == AV_NOPTS_VALUE || av_audio_fifo_size(s->main_sample_buffers) != s->cf_samples) && !s->default_mode) {
> + if (ff_inlink_acknowledge_status(main_inlink, &status, &pts)) {
> + s->main_eof = 1;
> + s->crossfade_ready = 1;
> + } else {
> + ff_inlink_request_frame(main_inlink);
> + return 0;
> + }
> + }
> +
> + if (!s->overlay_eof) {
> + if (av_audio_fifo_space(s->overlay_sample_buffers) > 0) {
> + ret = consume_samples(s, overlay_inlink, outlink);
> + if (ret <= 0) {
> + if (!s->overlay_eof)
> + return ret;
nit: you can merge the two branches (if (ret <= 0 && !s->overlay_eof)...)
> + }
> + }
> +
> + if (av_audio_fifo_space(s->overlay_sample_buffers) > 0) {
> + if (ff_inlink_acknowledge_status(overlay_inlink, &status, &pts)) {
> + s->overlay_eof = 1;
> + s->transition_pts = s->pts + av_rescale_q(av_audio_fifo_size(s->overlay_sample_buffers) - (s->cf_samples / 2),
> + (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
> + s->is_disabled = 1;
> + } else {
> + ff_inlink_request_frame(overlay_inlink);
> + return 0;
> + }
> + }
> + }
> +
> + if (!ctx->enable_str) {
> + if (s->transition_pts != AV_NOPTS_VALUE && av_audio_fifo_size(s->main_sample_buffers) > s->cf_samples + SEGMENT_SIZE) {
> + nb_samples = av_audio_fifo_size(s->main_sample_buffers) + av_audio_fifo_space(s->main_sample_buffers) - s->cf_samples - SEGMENT_SIZE;
> +
> + if ((ret = crossfade_prepare(s, main_inlink, overlay_inlink, outlink, nb_samples, &main_buffer, &overlay_buffer, MODE_DEFAULT)) < 0)
> + return ret;
> +
> + return ff_filter_frame(outlink, main_buffer);
> + } else if (s->transition_pts != AV_NOPTS_VALUE || s->transition_pts2 != AV_NOPTS_VALUE) {
> + nb_samples = FFMIN(s->cf_samples, av_audio_fifo_size(s->main_sample_buffers) - SEGMENT_SIZE);
> +
> + if ((ret = crossfade_samples(s, main_inlink, overlay_inlink, outlink, nb_samples, &out, MODE_DEFAULT)) < 0)
> + return ret;
> +
> + av_log(ctx, AV_LOG_DEBUG, "PTS at stream transition: %lld\n", out->pts);
libavutil/timestamp.h to give a more friendly representation of a
timestamp, also it is not clear what this transition is about
>From the point of view of the user you need to know: when the
crossfade starts, and when it finishes, when the overlay starts and
ends, and whether/when a timestamp gap was detected
> +
> + return ff_filter_frame(outlink, out);
> + } else if (!s->default_mode) {
> + nb_samples = FFMIN(av_audio_fifo_size(s->main_sample_buffers), SEGMENT_SIZE);
> +
> + main_buffer = ff_get_audio_buffer(outlink, nb_samples);
> + if (!main_buffer)
> + return AVERROR(ENOMEM);
> +
> + main_buffer->pts = s->pts;
> + s->pts += av_rescale_q(nb_samples, (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
> +
> + if ((ret = av_audio_fifo_read(s->main_sample_buffers, (void **)main_buffer->extended_data, nb_samples)) < 0)
> + return ret;
> + }
> +
> + if (!s->default_mode || s->overlay_eof) {
> + s->previous_samples = main_buffer->nb_samples;
> + return ff_filter_frame(outlink, main_buffer);
> + }
> +
> + s->pts_gap = s->pts_gap_end - s->pts_gap_start;
> +
> + nb_samples = FFMIN(SEGMENT_SIZE, av_rescale_q(s->pts_gap, outlink->time_base, (AVRational){ 1, outlink->sample_rate }));
> +
> + overlay_buffer = ff_get_audio_buffer(outlink, nb_samples);
> + if (!overlay_buffer)
> + return AVERROR(ENOMEM);
> +
> + if ((ret = av_audio_fifo_read(s->overlay_sample_buffers, (void **)overlay_buffer->extended_data, nb_samples)) < 0)
> + return ret;
> +
> + s->previous_samples = nb_samples;
> + s->previous_pts += av_rescale_q(nb_samples, (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
> + s->pts_gap_start += av_rescale_q(nb_samples, (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
> +
> + overlay_buffer->pts = s->pts;
> + s->pts += av_rescale_q(nb_samples, (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
the second hand is computed three times, store it in a variable and
reuse it
> +
> + av_frame_free(&main_buffer);
> +
> + return ff_filter_frame(outlink, overlay_buffer);
> + }
> +
> + if (s->overlay_eof && av_audio_fifo_size(s->overlay_sample_buffers) > 0) {
> + if (av_audio_fifo_size(s->overlay_sample_buffers) > s->cf_samples) {
> + nb_samples = av_audio_fifo_size(s->overlay_sample_buffers) - s->cf_samples;
> +
> + if ((ret = crossfade_prepare(s, main_inlink, overlay_inlink, outlink, nb_samples, &main_buffer, &overlay_buffer, MODE_OVERLAY_EOF)) < 0)
> + return ret;
> +
> + return ff_filter_frame(outlink, overlay_buffer);
> + } else if (av_audio_fifo_size(s->overlay_sample_buffers) >= s->cf_samples) {
> + if ((ret = crossfade_samples(s, main_inlink, overlay_inlink, outlink, s->cf_samples, &out, MODE_OVERLAY_EOF)) < 0)
> + return ret;
> +
> + av_log(ctx, AV_LOG_DEBUG, "PTS at stream transition: %lld\n", out->pts);
ditto
> +
> + return ff_filter_frame(outlink, out);
> + }
> + }
> +
> + if (s->transition_pts != AV_NOPTS_VALUE && !s->crossfade_ready) {
> + nb_samples = av_rescale_q(s->transition_pts - (s->cf_samples / 2) - s->pts, outlink->time_base, (AVRational) { 1, outlink->sample_rate });
> +
> + if ((ret = crossfade_prepare(s, main_inlink, overlay_inlink, outlink, nb_samples, &main_buffer, &overlay_buffer, MODE_TIMELINE)) < 0)
> + return ret;
> + } else if (s->transition_pts != AV_NOPTS_VALUE) {
> + nb_samples = s->main_eof ? av_audio_fifo_size(s->main_sample_buffers) : s->cf_samples;
> + if (s->transition_pts < av_rescale_q(s->cf_samples, (AVRational){ 1, outlink->sample_rate }, outlink->time_base)) {
> + nb_samples = av_rescale_q(s->transition_pts, outlink->time_base, (AVRational){ 1, outlink->sample_rate });
> + }
> +
> + if ((ret = crossfade_samples(s, main_inlink, overlay_inlink, outlink, nb_samples, &out, MODE_TIMELINE)) < 0)
> + return ret;
> +
> + av_log(ctx, AV_LOG_DEBUG, "PTS at stream transition: %lld\n", out->pts);
ditto
> +
> + return ff_filter_frame(outlink, out);
> + } else {
> + nb_samples = FFMIN(av_audio_fifo_size(s->main_sample_buffers), SEGMENT_SIZE);
> + main_buffer = ff_get_audio_buffer(outlink, nb_samples);
> + if (!main_buffer)
> + return AVERROR(ENOMEM);
> +
> + main_buffer->pts = s->pts;
> + s->pts += av_rescale_q(nb_samples, (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
> +
> + if ((ret = av_audio_fifo_read(s->main_sample_buffers, (void **)main_buffer->extended_data, nb_samples)) < 0)
> + return ret;
> + }
> +
> + if (!ff_inlink_evaluate_timeline_at_frame(main_inlink, main_buffer) || (s->overlay_eof && av_audio_fifo_size(s->overlay_sample_buffers) == 0)) {
> + return ff_filter_frame(outlink, main_buffer);
> + } else {
> + if (s->transition_pts == AV_NOPTS_VALUE) {
> + nb_samples = FFMIN(av_audio_fifo_size(s->overlay_sample_buffers), SEGMENT_SIZE);
> + overlay_buffer = ff_get_audio_buffer(outlink, nb_samples);
> + if (!overlay_buffer)
> + return AVERROR(ENOMEM);
> +
> + if ((ret = av_audio_fifo_read(s->overlay_sample_buffers, (void **)overlay_buffer->extended_data, nb_samples)) < 0)
> + return ret;
> +
> + overlay_buffer->pts = main_buffer->pts;
> + }
> + av_frame_free(&main_buffer);
> + return ff_filter_frame(outlink, overlay_buffer);
> + }
> +}
I'm a bit concerned by the complexity of this function. I wonder if it
would make sense to split the filter in two separate ones, one for the
timeline editing and another one filling the PTS gaps. Or we might
have a "mode" option to enable one or the other behavior. It probably
does not make sense to mix the two. Would this help with simplifing
the logic?
More information about the ffmpeg-devel
mailing list