Go to the documentation of this file.
105 int ch,
int out_offset,
int in_offset);
110 #define OFFSET(x) offsetof(SilenceRemoveContext, x)
111 #define AF AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM
136 int ch,
int out_offset,
int in_offset)
138 const double *srcp = (
const double *)in->
data[0];
139 const double src = srcp[in->
channels * in_offset + ch];
140 double *dstp = (
double *)
out->data[0];
142 dstp[
out->channels * out_offset + ch] =
src;
146 int ch,
int out_offset,
int in_offset)
149 const double src = srcp[in_offset];
150 double *dstp = (
double *)
out->extended_data[ch];
152 dstp[out_offset] =
src;
156 int ch,
int out_offset,
int in_offset)
158 const float *srcp = (
const float *)in->
data[0];
159 const float src = srcp[in->
channels * in_offset + ch];
160 float *dstp = (
float *)
out->data[0];
162 dstp[
out->channels * out_offset + ch] =
src;
166 int ch,
int out_offset,
int in_offset)
169 const float src = srcp[in_offset];
170 float *dstp = (
float *)
out->extended_data[ch];
172 dstp[out_offset] =
src;
178 const double *wsamples = (
const double *)
s->window->data[0];
180 double wsample = wsamples[
frame->channels *
s->window_offset + ch];
185 new_sum =
fmax(new_sum, 0.);
188 return new_sum /
s->window_duration;
194 double *wsamples = (
double *)
s->window->data[0];
196 double *wsample = &wsamples[
frame->channels *
s->window_offset + ch];
199 s->sum =
fmax(
s->sum, 0.);
207 const float *wsamples = (
const float *)
s->window->data[0];
209 float wsample = wsamples[
frame->channels *
s->window_offset + ch];
214 new_sum =
fmaxf(new_sum, 0.
f);
217 return new_sum /
s->window_duration;
223 float *wsamples = (
float *)
s->window->data[0];
225 float *wsample = &wsamples[
frame->channels *
s->window_offset + ch];
236 const double *wsamples = (
const double *)
s->window->data[0];
238 double wsample = wsamples[
frame->channels *
s->window_offset + ch];
243 new_sum =
fmax(new_sum, 0.);
247 return sqrt(new_sum /
s->window_duration);
253 double *wsamples = (
double *)
s->window->data[0];
255 double *wsample = &wsamples[
frame->channels *
s->window_offset + ch];
258 s->sum =
fmax(
s->sum, 0.);
266 const float *wsamples = (
const float *)
s->window->data[0];
268 float wsample = wsamples[
frame->channels *
s->window_offset + ch];
273 new_sum =
fmaxf(new_sum, 0.
f);
277 return sqrtf(new_sum /
s->window_duration);
284 float *wsamples = (
float *)
s->window->data[0];
285 float *wsample = &wsamples[
frame->channels *
s->window_offset + ch];
295 const double *
samples = (
const double *)
frame->extended_data[ch];
296 const double *wsamples = (
const double *)
s->window->extended_data[ch];
298 double wsample = wsamples[
s->window_offset];
303 new_sum =
fmax(new_sum, 0.);
306 return new_sum /
s->window_duration;
311 const double *
samples = (
const double *)
frame->extended_data[ch];
312 double *wsamples = (
double *)
s->window->extended_data[ch];
314 double *wsample = &wsamples[
s->window_offset];
317 s->sum =
fmax(
s->sum, 0.);
324 const float *
samples = (
const float *)
frame->extended_data[ch];
325 const float *wsamples = (
const float *)
s->window->extended_data[ch];
327 float wsample = wsamples[
s->window_offset];
332 new_sum =
fmaxf(new_sum, 0.
f);
335 return new_sum /
s->window_duration;
340 const float *
samples = (
const float *)
frame->extended_data[ch];
341 float *wsamples = (
float *)
s->window->extended_data[ch];
343 float *wsample = &wsamples[
s->window_offset];
353 const double *
samples = (
const double *)
frame->extended_data[ch];
354 const double *wsamples = (
const double *)
s->window->extended_data[ch];
356 double wsample = wsamples[
s->window_offset];
361 new_sum =
fmax(new_sum, 0.);
365 return sqrt(new_sum /
s->window_duration);
370 const double *
samples = (
const double *)
frame->extended_data[ch];
371 double *wsamples = (
double *)
s->window->extended_data[ch];
373 double *wsample = &wsamples[
s->window_offset];
376 s->sum =
fmax(
s->sum, 0.);
383 const float *
samples = (
const float *)
frame->extended_data[ch];
384 const float *wsamples = (
const float *)
s->window->extended_data[ch];
386 float wsample = wsamples[
s->window_offset];
391 new_sum =
fmaxf(new_sum, 0.
f);
395 return sqrtf(new_sum /
s->window_duration);
400 const float *
samples = (
const float *)
frame->extended_data[ch];
401 float *wsamples = (
float *)
s->window->extended_data[ch];
403 float *wsample = &wsamples[
s->window_offset];
415 if (
s->stop_periods < 0) {
416 s->stop_periods = -
s->stop_periods;
426 s->window->channels,
s->window->format);
428 s->window_offset = 0;
440 s->window_duration =
FFMAX(1,
s->window_duration);
457 FFMAX(
s->start_duration, 1));
458 if (!
s->start_holdoff)
462 FFMAX(
s->start_silence, 1));
463 if (!
s->start_silence_hold)
466 s->start_holdoff_offset = 0;
467 s->start_holdoff_end = 0;
468 s->start_found_periods = 0;
471 FFMAX(
s->stop_duration, 1));
472 if (!
s->stop_holdoff)
476 FFMAX(
s->stop_silence, 1));
477 if (!
s->stop_silence_hold)
480 s->stop_holdoff_offset = 0;
481 s->stop_holdoff_end = 0;
482 s->stop_found_periods = 0;
484 if (
s->start_periods)
492 switch (
s->detection) {
505 switch (
s->detection) {
518 switch (
s->detection) {
531 switch (
s->detection) {
555 int *nb_samples_written,
int flush_silence)
559 if (*nb_samples_written) {
560 out->nb_samples = *nb_samples_written;
563 *nb_samples_written = 0;
568 if (
s->stop_silence_end <= 0 || !flush_silence)
575 if (
s->stop_silence_offset <
s->stop_silence_end) {
577 s->stop_silence_offset,
578 s->stop_silence_end -
s->stop_silence_offset,
582 if (
s->stop_silence_offset > 0) {
584 s->stop_silence_end -
s->stop_silence_offset,
585 0,
s->stop_silence_offset,
589 s->stop_silence_offset = 0;
590 s->stop_silence_end = 0;
601 int nbs, nb_samples_read, nb_samples_written;
602 int i, j, threshold,
ret = 0;
605 nb_samples_read = nb_samples_written = 0;
608 s->next_pts = in->
pts;
617 for (
i = 0;
i < nbs;
i++) {
618 if (
s->start_mode ==
T_ANY) {
620 for (j = 0; j < outlink->
channels; j++) {
621 threshold |=
s->compute(
s, in, j, nb_samples_read) >
s->start_threshold;
625 for (j = 0; j < outlink->
channels; j++) {
626 threshold &=
s->compute(
s, in, j, nb_samples_read) >
s->start_threshold;
631 for (j = 0; j < outlink->
channels; j++) {
632 s->update(
s, in, j, nb_samples_read);
633 s->copy(
s,
s->start_holdoff, in, j,
s->start_holdoff_end, nb_samples_read);
637 if (
s->window_offset >=
s->window_duration)
638 s->window_offset = 0;
639 s->start_holdoff_end++;
642 if (
s->start_holdoff_end >=
s->start_duration) {
643 s->start_found_periods +=
s->one_period >= 1;
645 if (
s->start_found_periods >=
s->start_periods) {
647 goto silence_trim_flush;
650 s->start_holdoff_offset = 0;
651 s->start_holdoff_end = 0;
652 s->start_silence_offset = 0;
653 s->start_silence_end = 0;
656 s->start_holdoff_end = 0;
659 for (j = 0; j < outlink->
channels; j++) {
660 s->update(
s, in, j, nb_samples_read);
661 if (
s->start_silence)
662 s->copy(
s,
s->start_silence_hold, in, j,
s->start_silence_offset, nb_samples_read);
666 if (
s->window_offset >=
s->window_duration)
667 s->window_offset = 0;
669 s->start_silence_offset++;
671 if (
s->start_silence) {
672 s->start_silence_end =
FFMIN(
s->start_silence_end + 1,
s->start_silence);
673 if (
s->start_silence_offset >=
s->start_silence)
674 s->start_silence_offset = 0;
682 nbs =
s->start_holdoff_end -
s->start_holdoff_offset;
692 if (
s->start_silence_end > 0) {
693 if (
s->start_silence_offset <
s->start_silence_end) {
695 s->start_silence_offset,
696 s->start_silence_end -
s->start_silence_offset,
700 if (
s->start_silence_offset > 0) {
702 s->start_silence_end -
s->start_silence_offset,
703 0,
s->start_silence_offset,
709 s->start_silence_end,
710 s->start_holdoff_offset, nbs,
713 s->start_holdoff_offset += nbs;
718 if (
s->start_holdoff_offset ==
s->start_holdoff_end) {
719 s->start_holdoff_offset = 0;
720 s->start_holdoff_end = 0;
721 s->start_silence_offset = 0;
722 s->start_silence_end = 0;
740 if (
s->stop_periods) {
741 for (
i = 0;
i < nbs;
i++) {
742 if (
s->stop_mode ==
T_ANY) {
744 for (j = 0; j < outlink->
channels; j++) {
745 threshold |=
s->compute(
s, in, j, nb_samples_read) >
s->stop_threshold;
749 for (j = 0; j < outlink->
channels; j++) {
750 threshold &=
s->compute(
s, in, j, nb_samples_read) >
s->stop_threshold;
754 if (threshold &&
s->stop_holdoff_end && !
s->stop_silence) {
756 flush(
s,
out, outlink, &nb_samples_written, 0);
758 goto silence_copy_flush;
759 }
else if (threshold) {
760 for (j = 0; j < outlink->
channels; j++) {
761 s->update(
s, in, j, nb_samples_read);
762 s->copy(
s,
out, in, j, nb_samples_written, nb_samples_read);
766 if (
s->window_offset >=
s->window_duration)
767 s->window_offset = 0;
769 nb_samples_written++;
771 }
else if (!threshold) {
772 for (j = 0; j < outlink->
channels; j++) {
773 s->update(
s, in, j, nb_samples_read);
775 s->copy(
s,
s->stop_silence_hold, in, j,
s->stop_silence_offset, nb_samples_read);
777 s->copy(
s,
s->stop_holdoff, in, j,
s->stop_holdoff_end, nb_samples_read);
780 if (
s->stop_silence) {
781 s->stop_silence_offset++;
782 s->stop_silence_end =
FFMIN(
s->stop_silence_end + 1,
s->stop_silence);
783 if (
s->stop_silence_offset >=
s->stop_silence) {
784 s->stop_silence_offset = 0;
789 if (
s->window_offset >=
s->window_duration)
790 s->window_offset = 0;
792 s->stop_holdoff_end++;
794 if (
s->stop_holdoff_end >=
s->stop_duration) {
795 s->stop_found_periods +=
s->one_period >= 1;
797 if (
s->stop_found_periods >=
s->stop_periods) {
798 s->stop_holdoff_offset = 0;
799 s->stop_holdoff_end = 0;
803 flush(
s,
out, outlink, &nb_samples_written, 1);
806 s->stop_found_periods = 0;
807 s->start_found_periods = 0;
808 s->start_holdoff_offset = 0;
809 s->start_holdoff_end = 0;
810 s->start_silence_offset = 0;
811 s->start_silence_end = 0;
814 flush(
s,
out, outlink, &nb_samples_written, 1);
819 flush(
s,
out, outlink, &nb_samples_written, 0);
820 goto silence_copy_flush;
825 flush(
s,
out, outlink, &nb_samples_written, 0);
829 nb_samples_read, nbs,
839 nbs =
s->stop_holdoff_end -
s->stop_holdoff_offset;
850 s->stop_holdoff_offset, nbs,
853 s->stop_holdoff_offset += nbs;
858 if (
s->stop_holdoff_offset ==
s->stop_holdoff_end) {
859 s->stop_holdoff_offset = 0;
860 s->stop_holdoff_end = 0;
861 s->stop_silence_offset = 0;
862 s->stop_silence_end = 0;
882 out->pts =
s->next_pts;
902 int nbs =
s->stop_holdoff_end -
s->stop_holdoff_offset;
911 s->stop_holdoff_offset, nbs,
958 .
name =
"silenceremove",
961 .priv_class = &silenceremove_class,
void av_audio_fifo_free(AVAudioFifo *af)
Free an AVAudioFifo.
static double compute_rms_double(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
@ AV_SAMPLE_FMT_FLTP
float, planar
static void update_rms_doublep(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
static void update_rms_float(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
size_t stop_silence_offset
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
#define AVERROR_EOF
End of file.
static av_cold int init(AVFilterContext *ctx)
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
size_t start_holdoff_offset
This structure describes decoded (raw) audio or video data.
static int request_frame(AVFilterLink *outlink)
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
AVFrame * stop_silence_hold
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
static const AVFilterPad silenceremove_inputs[]
int64_t start_duration_opt
static void update_peak_float(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
const char * name
Filter name.
int64_t start_silence_opt
A link between two filters.
AVFILTER_DEFINE_CLASS(silenceremove)
int channels
Number of channels.
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
static double compute_rms_float(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
Context for an Audio FIFO Buffer.
static void copy_floatp(SilenceRemoveContext *s, AVFrame *out, AVFrame *in, int ch, int out_offset, int in_offset)
static __device__ float fabsf(float a)
static double compute_peak_floatp(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
A filter pad used for either input or output.
static double compute_peak_doublep(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
static void flush(SilenceRemoveContext *s, AVFrame *out, AVFilterLink *outlink, int *nb_samples_written, int flush_silence)
int channels
number of audio channels, only used for audio.
int av_audio_fifo_write(AVAudioFifo *af, void **data, int nb_samples)
Write data to an AVAudioFifo.
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
static void update_peak_floatp(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
AVFrame * start_silence_hold
static double compute_rms_doublep(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
#define FILTER_INPUTS(array)
Describe the class of an AVClass context structure.
static __device__ float fabs(float a)
static double compute_peak_double(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
Rational number (pair of numerator and denominator).
AVAudioFifo * av_audio_fifo_alloc(enum AVSampleFormat sample_fmt, int channels, int nb_samples)
Allocate an AVAudioFifo.
size_t start_silence_offset
static const AVOption silenceremove_options[]
static av_cold void uninit(AVFilterContext *ctx)
static void copy_double(SilenceRemoveContext *s, AVFrame *out, AVFrame *in, int ch, int out_offset, int in_offset)
int64_t window_duration_opt
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
static int config_input(AVFilterLink *inlink)
float fmaxf(float, float)
int format
agreed upon media format
#define AV_NOPTS_VALUE
Undefined timestamp value.
size_t stop_holdoff_offset
static void update_peak_doublep(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
AVFilterContext * src
source filter
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
int av_audio_fifo_size(AVAudioFifo *af)
Get the current number of samples in the AVAudioFifo available for reading.
static void clear_window(SilenceRemoveContext *s)
int av_audio_fifo_read(AVAudioFifo *af, void **data, int nb_samples)
Read data from an AVAudioFifo.
int av_samples_copy(uint8_t **dst, uint8_t *const *src, int dst_offset, int src_offset, int nb_samples, int nb_channels, enum AVSampleFormat sample_fmt)
Copy samples from src to dst.
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
static void update_rms_double(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
int nb_samples
number of audio samples (per channel) described by this frame
static void update_peak_double(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
#define i(width, name, range_min, range_max)
const AVFilter ff_af_silenceremove
#define AV_TIME_BASE
Internal time base represented as integer.
uint8_t ** extended_data
pointers to the data planes/channels.
const char * name
Pad name.
int64_t av_rescale(int64_t a, int64_t b, int64_t c)
Rescale a 64-bit integer with rounding to nearest.
static void update_rms_floatp(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
void(* update)(struct SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
double(* compute)(struct SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
int av_samples_set_silence(uint8_t **audio_data, int offset, int nb_samples, int nb_channels, enum AVSampleFormat sample_fmt)
Fill an audio buffer with silence.
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
static double compute_peak_float(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
double fmax(double, double)
static void copy_doublep(SilenceRemoveContext *s, AVFrame *out, AVFrame *in, int ch, int out_offset, int in_offset)
@ AV_SAMPLE_FMT_DBLP
double, planar
Filter the word “frame” indicates either a video frame or a group of audio samples
AVRational time_base
Define the time base used by the PTS of the frames/samples which will pass through this link.
int64_t stop_duration_opt
static double compute_rms_floatp(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
void(* copy)(struct SilenceRemoveContext *s, AVFrame *out, AVFrame *in, int ch, int out_offset, int in_offset)
#define FILTER_OUTPUTS(array)
static const AVFilterPad silenceremove_outputs[]
#define AVERROR_BUG
Internal bug, also see AVERROR_BUG2.
static void copy_float(SilenceRemoveContext *s, AVFrame *out, AVFrame *in, int ch, int out_offset, int in_offset)
@ AV_SAMPLE_FMT_DBL
double
#define FILTER_SAMPLEFMTS(...)