FFmpeg
af_dialoguenhance.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2022 Paul B Mahol
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public License
8  * as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
22 #include "libavutil/mem.h"
23 #include "libavutil/opt.h"
24 #include "libavutil/tx.h"
25 #include "audio.h"
26 #include "avfilter.h"
27 #include "filters.h"
28 #include "formats.h"
29 
30 #include <float.h>
31 
33  const AVClass *class;
34 
36 
37  int fft_size;
38  int overlap;
39 
40  void *window;
41  float *window_float;
42  double *window_double;
45 
53 
55 
58 } AudioDialogueEnhanceContext;
59 
60 #define OFFSET(x) offsetof(AudioDialogueEnhanceContext, x)
61 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_RUNTIME_PARAM
62 
63 static const AVOption dialoguenhance_options[] = {
64  { "original", "set original center factor", OFFSET(original), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0, 1, FLAGS },
65  { "enhance", "set dialogue enhance factor",OFFSET(enhance), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0, 3, FLAGS },
66  { "voice", "set voice detection factor", OFFSET(voice), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 2,32, FLAGS },
67  {NULL}
68 };
69 
70 AVFILTER_DEFINE_CLASS(dialoguenhance);
71 
72 static int query_formats(const AVFilterContext *ctx,
73  AVFilterFormatsConfig **cfg_in,
74  AVFilterFormatsConfig **cfg_out)
75 {
76  static const enum AVSampleFormat formats[] = {
80  };
81 
82  AVFilterChannelLayouts *in_layout = NULL, *out_layout = NULL;
83  int ret;
84 
85  ret = ff_set_common_formats_from_list2(ctx, cfg_in, cfg_out, formats);
86  if (ret < 0)
87  return ret;
88 
90  (ret = ff_channel_layouts_ref(in_layout, &cfg_in[0]->channel_layouts)) < 0 ||
92  (ret = ff_channel_layouts_ref(out_layout, &cfg_out[0]->channel_layouts)) < 0)
93  return ret;
94 
95  return 0;
96 }
97 
98 #define DEPTH 32
100 
101 #undef DEPTH
102 #define DEPTH 64
103 #include "dialoguenhance_template.c"
104 
106 {
107  AVFilterContext *ctx = inlink->dst;
108  AudioDialogueEnhanceContext *s = ctx->priv;
109  int ret;
110 
111  s->fft_size = inlink->sample_rate > 100000 ? 8192 : inlink->sample_rate > 50000 ? 4096 : 2048;
112  s->overlap = s->fft_size / 4;
113 
114  s->in_frame = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2);
115  s->center_frame = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2);
116  s->out_dist_frame = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2);
117  s->windowed_frame = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2);
118  s->windowed_out = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2);
119  s->windowed_prev = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2);
120  if (!s->in_frame || !s->windowed_out || !s->windowed_prev ||
121  !s->out_dist_frame || !s->windowed_frame || !s->center_frame)
122  return AVERROR(ENOMEM);
123 
124  switch (inlink->format) {
125  case AV_SAMPLE_FMT_FLTP:
126  s->de_stereo = de_stereo_float;
127  ret = de_tx_init_float(ctx);
128  break;
129  case AV_SAMPLE_FMT_DBLP:
130  s->de_stereo = de_stereo_double;
131  ret = de_tx_init_double(ctx);
132  break;
133  }
134 
135  return ret;
136 }
137 
139 {
140  AVFilterContext *ctx = inlink->dst;
141  AVFilterLink *outlink = ctx->outputs[0];
142  AudioDialogueEnhanceContext *s = ctx->priv;
143  AVFrame *out;
144  int ret;
145 
146  out = ff_get_audio_buffer(outlink, s->overlap);
147  if (!out) {
148  ret = AVERROR(ENOMEM);
149  goto fail;
150  }
151 
152  s->in = in;
153  s->de_stereo(ctx, out);
154 
156  out->nb_samples = in->nb_samples;
157  ret = ff_filter_frame(outlink, out);
158 fail:
159  av_frame_free(&in);
160  s->in = NULL;
161  return ret < 0 ? ret : 0;
162 }
163 
165 {
166  AVFilterLink *inlink = ctx->inputs[0];
167  AVFilterLink *outlink = ctx->outputs[0];
168  AudioDialogueEnhanceContext *s = ctx->priv;
169  AVFrame *in = NULL;
170  int ret = 0, status;
171  int64_t pts;
172 
174 
175  ret = ff_inlink_consume_samples(inlink, s->overlap, s->overlap, &in);
176  if (ret < 0)
177  return ret;
178 
179  if (ret > 0) {
180  return filter_frame(inlink, in);
181  } else if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
182  ff_outlink_set_status(outlink, status, pts);
183  return 0;
184  } else {
185  if (ff_inlink_queued_samples(inlink) >= s->overlap) {
187  } else if (ff_outlink_frame_wanted(outlink)) {
189  }
190  return 0;
191  }
192 }
193 
195 {
196  AudioDialogueEnhanceContext *s = ctx->priv;
197 
198  av_freep(&s->window);
199 
200  av_frame_free(&s->in_frame);
201  av_frame_free(&s->center_frame);
202  av_frame_free(&s->out_dist_frame);
203  av_frame_free(&s->windowed_frame);
204  av_frame_free(&s->windowed_out);
205  av_frame_free(&s->windowed_prev);
206 
207  av_tx_uninit(&s->tx_ctx[0]);
208  av_tx_uninit(&s->tx_ctx[1]);
209  av_tx_uninit(&s->itx_ctx);
210 }
211 
212 static const AVFilterPad inputs[] = {
213  {
214  .name = "default",
215  .type = AVMEDIA_TYPE_AUDIO,
216  .config_props = config_input,
217  },
218 };
219 
221  .name = "dialoguenhance",
222  .description = NULL_IF_CONFIG_SMALL("Audio Dialogue Enhancement."),
223  .priv_size = sizeof(AudioDialogueEnhanceContext),
224  .priv_class = &dialoguenhance_class,
225  .uninit = uninit,
230  .activate = activate,
231  .process_command = ff_filter_process_command,
232 };
formats
formats
Definition: signature.h:47
ff_get_audio_buffer
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
Definition: audio.c:98
AV_SAMPLE_FMT_FLTP
@ AV_SAMPLE_FMT_FLTP
float, planar
Definition: samplefmt.h:66
AVFilterChannelLayouts
A list of supported channel layouts.
Definition: formats.h:85
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
out
FILE * out
Definition: movenc.c:55
AV_CHANNEL_LAYOUT_STEREO
#define AV_CHANNEL_LAYOUT_STEREO
Definition: channel_layout.h:387
inputs
static const AVFilterPad inputs[]
Definition: af_dialoguenhance.c:212
AudioDialogueEnhancementContext::in_frame
AVFrame * in_frame
Definition: af_dialoguenhance.c:47
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1023
ff_channel_layouts_ref
int ff_channel_layouts_ref(AVFilterChannelLayouts *f, AVFilterChannelLayouts **ref)
Add *ref as a new reference to f.
Definition: formats.c:673
AVTXContext
Definition: tx_priv.h:235
int64_t
long long int64_t
Definition: coverity.c:34
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:160
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: filters.h:262
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:374
AudioDialogueEnhancementContext::de_stereo
int(* de_stereo)(AVFilterContext *ctx, AVFrame *out)
Definition: af_dialoguenhance.c:54
AVOption
AVOption.
Definition: opt.h:429
filter_frame
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
Definition: af_dialoguenhance.c:138
float.h
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:205
AudioDialogueEnhancementContext::overlap
int overlap
Definition: af_dialoguenhance.c:38
FF_FILTER_FORWARD_STATUS_BACK
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
Definition: filters.h:434
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_dialoguenhance.c:194
formats.h
AudioDialogueEnhancementContext::windowed_prev
AVFrame * windowed_prev
Definition: af_dialoguenhance.c:51
AudioDialogueEnhancementContext::voice
double voice
Definition: af_dialoguenhance.c:35
fail
#define fail()
Definition: checkasm.h:188
AudioDialogueEnhancementContext::prev_vad_float
float prev_vad_float
Definition: af_dialoguenhance.c:43
ff_af_dialoguenhance
const AVFilter ff_af_dialoguenhance
Definition: af_dialoguenhance.c:220
AudioDialogueEnhancementContext::prev_vad_double
double prev_vad_double
Definition: af_dialoguenhance.c:44
pts
static int64_t pts
Definition: transcode_aac.c:644
AudioDialogueEnhancementContext::windowed_out
AVFrame * windowed_out
Definition: af_dialoguenhance.c:50
AV_CHANNEL_LAYOUT_SURROUND
#define AV_CHANNEL_LAYOUT_SURROUND
Definition: channel_layout.h:390
AVFilterPad
A filter pad used for either input or output.
Definition: filters.h:38
AudioDialogueEnhancementContext::tx_ctx
AVTXContext * tx_ctx[2]
Definition: af_dialoguenhance.c:56
AudioDialogueEnhancementContext::windowed_frame
AVFrame * windowed_frame
Definition: af_dialoguenhance.c:49
AudioDialogueEnhancementContext::itx_fn
av_tx_fn itx_fn
Definition: af_dialoguenhance.c:57
av_cold
#define av_cold
Definition: attributes.h:90
av_tx_fn
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
Definition: tx.h:151
ff_outlink_set_status
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
Definition: filters.h:424
ff_inlink_request_frame
void ff_inlink_request_frame(AVFilterLink *link)
Mark that a frame is wanted on the link.
Definition: avfilter.c:1578
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Underlying C type is double.
Definition: opt.h:267
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
query_formats
static int query_formats(const AVFilterContext *ctx, AVFilterFormatsConfig **cfg_in, AVFilterFormatsConfig **cfg_out)
Definition: af_dialoguenhance.c:72
filters.h
AudioDialogueEnhancementContext::out_dist_frame
AVFrame * out_dist_frame
Definition: af_dialoguenhance.c:48
ctx
AVFormatContext * ctx
Definition: movenc.c:49
FLAGS
#define FLAGS
Definition: af_dialoguenhance.c:61
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: filters.h:263
AudioDialogueEnhancementContext::original
double original
Definition: af_dialoguenhance.c:35
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:66
ff_inlink_consume_samples
int ff_inlink_consume_samples(AVFilterLink *link, unsigned min, unsigned max, AVFrame **rframe)
Take samples from the link's FIFO and update the link's stats.
Definition: avfilter.c:1471
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:711
OFFSET
#define OFFSET(x)
Definition: af_dialoguenhance.c:60
ff_audio_default_filterpad
const AVFilterPad ff_audio_default_filterpad[1]
An AVFilterPad array whose only entry has name "default" and is of type AVMEDIA_TYPE_AUDIO.
Definition: audio.c:34
ff_add_channel_layout
int ff_add_channel_layout(AVFilterChannelLayouts **l, const AVChannelLayout *channel_layout)
Definition: formats.c:521
ff_inlink_acknowledge_status
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
Definition: avfilter.c:1398
AVFilterFormatsConfig
Lists of formats / etc.
Definition: avfilter.h:111
AudioDialogueEnhancementContext::window
void * window
Definition: af_dialoguenhance.c:40
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
AudioDialogueEnhancementContext::fft_size
int fft_size
Definition: af_dialoguenhance.c:37
AVChannelLayout
An AVChannelLayout holds information about the channel layout of audio data.
Definition: channel_layout.h:311
AV_SAMPLE_FMT_NONE
@ AV_SAMPLE_FMT_NONE
Definition: samplefmt.h:56
dialoguenhance_template.c
ff_filter_process_command
int ff_filter_process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Generic processing of user supplied commands that are set in the same way as the filter options.
Definition: avfilter.c:894
AudioDialogueEnhancementContext::window_float
float * window_float
Definition: af_dialoguenhance.c:41
av_tx_uninit
av_cold void av_tx_uninit(AVTXContext **ctx)
Frees a context and sets *ctx to NULL, does nothing when *ctx == NULL.
Definition: tx.c:295
AudioDialogueEnhancementContext
Definition: af_dialoguenhance.c:32
AVFrame::nb_samples
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:454
AVSampleFormat
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:55
FILTER_QUERY_FUNC2
#define FILTER_QUERY_FUNC2(func)
Definition: filters.h:239
dialoguenhance_options
static const AVOption dialoguenhance_options[]
Definition: af_dialoguenhance.c:63
AVFilterPad::name
const char * name
Pad name.
Definition: filters.h:44
ff_inlink_queued_samples
int ff_inlink_queued_samples(AVFilterLink *link)
Definition: avfilter.c:1426
AudioDialogueEnhancementContext::in
AVFrame * in
Definition: af_dialoguenhance.c:46
AudioDialogueEnhancementContext::tx_fn
av_tx_fn tx_fn
Definition: af_dialoguenhance.c:57
AVFilter
Filter definition.
Definition: avfilter.h:201
ret
ret
Definition: filter_design.txt:187
status
ov_status_e status
Definition: dnn_backend_openvino.c:100
config_input
static int config_input(AVFilterLink *inlink)
Definition: af_dialoguenhance.c:105
ff_set_common_formats_from_list2
int ff_set_common_formats_from_list2(const AVFilterContext *ctx, AVFilterFormatsConfig **cfg_in, AVFilterFormatsConfig **cfg_out, const int *fmts)
Definition: formats.c:1016
channel_layout.h
avfilter.h
AV_SAMPLE_FMT_DBLP
@ AV_SAMPLE_FMT_DBLP
double, planar
Definition: samplefmt.h:67
AVFilterContext
An instance of a filter.
Definition: avfilter.h:457
activate
static int activate(AVFilterContext *ctx)
Definition: af_dialoguenhance.c:164
AudioDialogueEnhancementContext::window_double
double * window_double
Definition: af_dialoguenhance.c:42
mem.h
audio.h
AudioDialogueEnhancementContext::itx_ctx
AVTXContext * itx_ctx
Definition: af_dialoguenhance.c:56
channel_layouts
static const uint16_t channel_layouts[7]
Definition: dca_lbr.c:112
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
#define AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
Same as AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, except that the filter will have its filter_frame() c...
Definition: avfilter.h:190
ff_outlink_frame_wanted
the definition of that something depends on the semantic of the filter The callback must examine the status of the filter s links and proceed accordingly The status of output links is stored in the status_in and status_out fields and tested by the ff_outlink_frame_wanted() function. If this function returns true
AudioDialogueEnhancementContext::center_frame
AVFrame * center_frame
Definition: af_dialoguenhance.c:52
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(dialoguenhance)
AudioDialogueEnhancementContext::enhance
double enhance
Definition: af_dialoguenhance.c:35
ff_filter_set_ready
void ff_filter_set_ready(AVFilterContext *filter, unsigned priority)
Mark a filter ready and schedule it for activation.
Definition: avfilter.c:237
tx.h