Go to the source code of this file.
|
#define | FRAME_SIZE_SHIFT 2 |
|
#define | FRAME_SIZE (120<<FRAME_SIZE_SHIFT) |
|
#define | WINDOW_SIZE (2*FRAME_SIZE) |
|
#define | FREQ_SIZE (FRAME_SIZE + 1) |
|
#define | PITCH_MIN_PERIOD 60 |
|
#define | PITCH_MAX_PERIOD 768 |
|
#define | PITCH_FRAME_SIZE 960 |
|
#define | PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE) |
|
#define | SQUARE(x) ((x)*(x)) |
|
#define | NB_BANDS 22 |
|
#define | CEPS_MEM 8 |
|
#define | NB_DELTA_CEPS 6 |
|
#define | NB_FEATURES (NB_BANDS+3*NB_DELTA_CEPS+2) |
|
#define | WEIGHTS_SCALE (1.f/256) |
|
#define | MAX_NEURONS 128 |
|
#define | ACTIVATION_TANH 0 |
|
#define | ACTIVATION_SIGMOID 1 |
|
#define | ACTIVATION_RELU 2 |
|
#define | Q15ONE 1.0f |
|
#define | F_ACTIVATION_TANH 0 |
|
#define | F_ACTIVATION_SIGMOID 1 |
|
#define | F_ACTIVATION_RELU 2 |
|
#define | FREE_MAYBE(ptr) do { if (ptr) free(ptr); } while (0) |
|
#define | FREE_DENSE(name) |
|
#define | FREE_GRU(name) |
|
#define | ALLOC_LAYER(type, name) |
|
#define | INPUT_VAL(name) |
|
#define | INPUT_ACTIVATION(name) |
|
#define | INPUT_ARRAY(name, len) |
|
#define | INPUT_ARRAY3(name, len0, len1, len2) |
|
#define | INPUT_DENSE(name) |
|
#define | INPUT_GRU(name) |
|
#define | RNN_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) |
|
#define | RNN_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst)))) |
|
#define | RNN_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) |
|
#define | INPUT_SIZE 42 |
|
#define | OFFSET(x) offsetof(AudioRNNContext, x) |
|
#define | AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM |
|
|
static void | rnnoise_model_free (RNNModel *model) |
|
static RNNModel * | rnnoise_model_from_file (FILE *f) |
|
static int | query_formats (AVFilterContext *ctx) |
|
static int | config_input (AVFilterLink *inlink) |
|
static void | biquad (float *y, float mem[2], const float *x, const float *b, const float *a, int N) |
|
static void | forward_transform (DenoiseState *st, AVComplexFloat *out, const float *in) |
|
static void | inverse_transform (DenoiseState *st, float *out, const AVComplexFloat *in) |
|
static void | compute_band_energy (float *bandE, const AVComplexFloat *X) |
|
static void | compute_band_corr (float *bandE, const AVComplexFloat *X, const AVComplexFloat *P) |
|
static void | frame_analysis (AudioRNNContext *s, DenoiseState *st, AVComplexFloat *X, float *Ex, const float *in) |
|
static void | frame_synthesis (AudioRNNContext *s, DenoiseState *st, float *out, const AVComplexFloat *y) |
|
static void | xcorr_kernel (const float *x, const float *y, float sum[4], int len) |
|
static float | celt_inner_prod (const float *x, const float *y, int N) |
|
static void | celt_pitch_xcorr (const float *x, const float *y, float *xcorr, int len, int max_pitch) |
|
static int | celt_autocorr (const float *x, float *ac, const float *window, int overlap, int lag, int n) |
|
static void | celt_lpc (float *lpc, const float *ac, int p) |
|
static void | celt_fir5 (const float *x, const float *num, float *y, int N, float *mem) |
|
static void | pitch_downsample (float *x[], float *x_lp, int len, int C) |
|
static void | dual_inner_prod (const float *x, const float *y01, const float *y02, int N, float *xy1, float *xy2) |
|
static float | compute_pitch_gain (float xy, float xx, float yy) |
|
static float | remove_doubling (float *x, int maxperiod, int minperiod, int N, int *T0_, int prev_period, float prev_gain) |
|
static void | find_best_pitch (float *xcorr, float *y, int len, int max_pitch, int *best_pitch) |
|
static void | pitch_search (const float *x_lp, float *y, int len, int max_pitch, int *pitch) |
|
static void | dct (AudioRNNContext *s, float *out, const float *in) |
|
static int | compute_frame_features (AudioRNNContext *s, DenoiseState *st, AVComplexFloat *X, AVComplexFloat *P, float *Ex, float *Ep, float *Exp, float *features, const float *in) |
|
static void | interp_band_gain (float *g, const float *bandE) |
|
static void | pitch_filter (AVComplexFloat *X, const AVComplexFloat *P, const float *Ex, const float *Ep, const float *Exp, const float *g) |
|
static float | tansig_approx (float x) |
|
static float | sigmoid_approx (float x) |
|
static void | compute_dense (const DenseLayer *layer, float *output, const float *input) |
|
static void | compute_gru (AudioRNNContext *s, const GRULayer *gru, float *state, const float *input) |
|
static void | compute_rnn (AudioRNNContext *s, RNNState *rnn, float *gains, float *vad, const float *input) |
|
static float | rnnoise_channel (AudioRNNContext *s, DenoiseState *st, float *out, const float *in) |
|
static int | rnnoise_channels (AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) |
|
static int | filter_frame (AVFilterLink *inlink, AVFrame *in) |
|
static int | activate (AVFilterContext *ctx) |
|
static av_cold int | init (AVFilterContext *ctx) |
|
static av_cold void | uninit (AVFilterContext *ctx) |
|
| AVFILTER_DEFINE_CLASS (arnndn) |
|
|
static const uint8_t | eband5ms [] |
|
static const int | second_check [16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2} |
|
static const float | tansig_table [201] |
|
static const AVFilterPad | inputs [] |
|
static const AVFilterPad | outputs [] |
|
static const AVOption | arnndn_options [] |
|
AVFilter | ff_af_arnndn |
|
◆ FRAME_SIZE_SHIFT
#define FRAME_SIZE_SHIFT 2 |
◆ FRAME_SIZE
◆ WINDOW_SIZE
◆ FREQ_SIZE
◆ PITCH_MIN_PERIOD
#define PITCH_MIN_PERIOD 60 |
◆ PITCH_MAX_PERIOD
#define PITCH_MAX_PERIOD 768 |
◆ PITCH_FRAME_SIZE
#define PITCH_FRAME_SIZE 960 |
◆ PITCH_BUF_SIZE
◆ SQUARE
#define SQUARE |
( |
|
x | ) |
((x)*(x)) |
◆ NB_BANDS
◆ CEPS_MEM
◆ NB_DELTA_CEPS
◆ NB_FEATURES
◆ WEIGHTS_SCALE
#define WEIGHTS_SCALE (1.f/256) |
◆ MAX_NEURONS
◆ ACTIVATION_TANH
#define ACTIVATION_TANH 0 |
◆ ACTIVATION_SIGMOID
#define ACTIVATION_SIGMOID 1 |
◆ ACTIVATION_RELU
#define ACTIVATION_RELU 2 |
◆ Q15ONE
◆ F_ACTIVATION_TANH
#define F_ACTIVATION_TANH 0 |
◆ F_ACTIVATION_SIGMOID
#define F_ACTIVATION_SIGMOID 1 |
◆ F_ACTIVATION_RELU
#define F_ACTIVATION_RELU 2 |
◆ FREE_MAYBE
#define FREE_MAYBE |
( |
|
ptr | ) |
do { if (ptr) free(ptr); } while (0) |
◆ FREE_DENSE
#define FREE_DENSE |
( |
|
name | ) |
|
Value: do { \
if (model->name) { \
av_free((void *) model->name->input_weights); \
av_free((void *) model->name->bias); \
av_free((void *) model->name); \
} \
} while (0)
◆ FREE_GRU
Value: do { \
if (model->name) { \
av_free((void *) model->name->input_weights); \
av_free((void *) model->name->recurrent_weights); \
av_free((void *) model->name->bias); \
av_free((void *) model->name); \
} \
} while (0)
◆ ALLOC_LAYER
Value:
rnnoise_model_free(
ret); \
} \
◆ INPUT_VAL
#define INPUT_VAL |
( |
|
name | ) |
|
Value: do { \
if (fscanf(
f,
"%d", &
in) != 1 || in < 0 || in > 128) { \
rnnoise_model_free(
ret); \
} \
} while (0)
◆ INPUT_ACTIVATION
#define INPUT_ACTIVATION |
( |
|
name | ) |
|
Value: do { \
int activation; \
INPUT_VAL(activation); \
switch (activation) { \
break; \
break; \
default: \
} \
} while (0)
◆ INPUT_ARRAY
Value: do { \
rnnoise_model_free(
ret); \
} \
for (
int i = 0;
i < (
len);
i++) { \
if (fscanf(
f,
"%d", &
in) != 1) { \
rnnoise_model_free(
ret); \
} \
} \
} while (0)
◆ INPUT_ARRAY3
#define INPUT_ARRAY3 |
( |
|
name, |
|
|
|
len0, |
|
|
|
len1, |
|
|
|
len2 |
|
) |
| |
Value: do { \
rnnoise_model_free(
ret); \
} \
for (int k = 0; k < (len0); k++) { \
for (
int i = 0;
i < (len2);
i++) { \
for (int j = 0; j < (len1); j++) { \
if (fscanf(
f,
"%d", &
in) != 1) { \
rnnoise_model_free(
ret); \
} \
} \
} \
} \
} while (0)
◆ INPUT_DENSE
#define INPUT_DENSE |
( |
|
name | ) |
|
Value: do { \
INPUT_VAL(
name->nb_inputs); \
INPUT_VAL(
name->nb_neurons); \
ret->name ## _size =
name->nb_neurons; \
INPUT_ACTIVATION(
name->activation); \
INPUT_ARRAY(
name->input_weights,
name->nb_inputs *
name->nb_neurons); \
INPUT_ARRAY(
name->bias,
name->nb_neurons); \
} while (0)
◆ INPUT_GRU
#define INPUT_GRU |
( |
|
name | ) |
|
Value: do { \
INPUT_VAL(
name->nb_inputs); \
INPUT_VAL(
name->nb_neurons); \
ret->name ## _size =
name->nb_neurons; \
INPUT_ACTIVATION(
name->activation); \
INPUT_ARRAY3(
name->input_weights,
name->nb_inputs,
name->nb_neurons, 3); \
INPUT_ARRAY3(
name->recurrent_weights,
name->nb_neurons,
name->nb_neurons, 3); \
INPUT_ARRAY(
name->bias,
name->nb_neurons * 3); \
} while (0)
◆ RNN_MOVE
#define RNN_MOVE |
( |
|
dst, |
|
|
|
src, |
|
|
|
n |
|
) |
| (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) |
◆ RNN_CLEAR
#define RNN_CLEAR |
( |
|
dst, |
|
|
|
n |
|
) |
| (memset((dst), 0, (n)*sizeof(*(dst)))) |
◆ RNN_COPY
#define RNN_COPY |
( |
|
dst, |
|
|
|
src, |
|
|
|
n |
|
) |
| (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) |
◆ INPUT_SIZE
◆ OFFSET
◆ AF
◆ rnnoise_model_free()
static void rnnoise_model_free |
( |
RNNModel * |
model | ) |
|
|
static |
◆ rnnoise_model_from_file()
static RNNModel* rnnoise_model_from_file |
( |
FILE * |
f | ) |
|
|
static |
◆ query_formats()
◆ config_input()
◆ biquad()
static void biquad |
( |
float * |
y, |
|
|
float |
mem[2], |
|
|
const float * |
x, |
|
|
const float * |
b, |
|
|
const float * |
a, |
|
|
int |
N |
|
) |
| |
|
static |
◆ forward_transform()
◆ inverse_transform()
◆ compute_band_energy()
static void compute_band_energy |
( |
float * |
bandE, |
|
|
const AVComplexFloat * |
X |
|
) |
| |
|
static |
◆ compute_band_corr()
◆ frame_analysis()
◆ frame_synthesis()
◆ xcorr_kernel()
static void xcorr_kernel |
( |
const float * |
x, |
|
|
const float * |
y, |
|
|
float |
sum[4], |
|
|
int |
len |
|
) |
| |
|
inlinestatic |
◆ celt_inner_prod()
static float celt_inner_prod |
( |
const float * |
x, |
|
|
const float * |
y, |
|
|
int |
N |
|
) |
| |
|
inlinestatic |
◆ celt_pitch_xcorr()
static void celt_pitch_xcorr |
( |
const float * |
x, |
|
|
const float * |
y, |
|
|
float * |
xcorr, |
|
|
int |
len, |
|
|
int |
max_pitch |
|
) |
| |
|
static |
◆ celt_autocorr()
static int celt_autocorr |
( |
const float * |
x, |
|
|
float * |
ac, |
|
|
const float * |
window, |
|
|
int |
overlap, |
|
|
int |
lag, |
|
|
int |
n |
|
) |
| |
|
static |
◆ celt_lpc()
static void celt_lpc |
( |
float * |
lpc, |
|
|
const float * |
ac, |
|
|
int |
p |
|
) |
| |
|
static |
◆ celt_fir5()
static void celt_fir5 |
( |
const float * |
x, |
|
|
const float * |
num, |
|
|
float * |
y, |
|
|
int |
N, |
|
|
float * |
mem |
|
) |
| |
|
static |
◆ pitch_downsample()
static void pitch_downsample |
( |
float * |
x[], |
|
|
float * |
x_lp, |
|
|
int |
len, |
|
|
int |
C |
|
) |
| |
|
static |
◆ dual_inner_prod()
static void dual_inner_prod |
( |
const float * |
x, |
|
|
const float * |
y01, |
|
|
const float * |
y02, |
|
|
int |
N, |
|
|
float * |
xy1, |
|
|
float * |
xy2 |
|
) |
| |
|
inlinestatic |
◆ compute_pitch_gain()
static float compute_pitch_gain |
( |
float |
xy, |
|
|
float |
xx, |
|
|
float |
yy |
|
) |
| |
|
static |
◆ remove_doubling()
static float remove_doubling |
( |
float * |
x, |
|
|
int |
maxperiod, |
|
|
int |
minperiod, |
|
|
int |
N, |
|
|
int * |
T0_, |
|
|
int |
prev_period, |
|
|
float |
prev_gain |
|
) |
| |
|
static |
◆ find_best_pitch()
static void find_best_pitch |
( |
float * |
xcorr, |
|
|
float * |
y, |
|
|
int |
len, |
|
|
int |
max_pitch, |
|
|
int * |
best_pitch |
|
) |
| |
|
static |
◆ pitch_search()
static void pitch_search |
( |
const float * |
x_lp, |
|
|
float * |
y, |
|
|
int |
len, |
|
|
int |
max_pitch, |
|
|
int * |
pitch |
|
) |
| |
|
static |
◆ dct()
◆ compute_frame_features()
◆ interp_band_gain()
static void interp_band_gain |
( |
float * |
g, |
|
|
const float * |
bandE |
|
) |
| |
|
static |
◆ pitch_filter()
static void pitch_filter |
( |
AVComplexFloat * |
X, |
|
|
const AVComplexFloat * |
P, |
|
|
const float * |
Ex, |
|
|
const float * |
Ep, |
|
|
const float * |
Exp, |
|
|
const float * |
g |
|
) |
| |
|
static |
◆ tansig_approx()
static float tansig_approx |
( |
float |
x | ) |
|
|
inlinestatic |
◆ sigmoid_approx()
static float sigmoid_approx |
( |
float |
x | ) |
|
|
inlinestatic |
◆ compute_dense()
static void compute_dense |
( |
const DenseLayer * |
layer, |
|
|
float * |
output, |
|
|
const float * |
input |
|
) |
| |
|
static |
◆ compute_gru()
◆ compute_rnn()
◆ rnnoise_channel()
◆ rnnoise_channels()
◆ filter_frame()
◆ activate()
◆ init()
◆ uninit()
◆ AVFILTER_DEFINE_CLASS()
AVFILTER_DEFINE_CLASS |
( |
arnndn |
| ) |
|
◆ eband5ms
Initial value:= {
0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
}
Definition at line 430 of file af_arnndn.c.
Referenced by compute_band_corr(), compute_band_energy(), and interp_band_gain().
◆ second_check
const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2} |
|
static |
◆ tansig_table
const float tansig_table[201] |
|
static |
◆ inputs
Initial value:= {
{
.name = "default",
},
}
Definition at line 1509 of file af_arnndn.c.
◆ outputs
Initial value:= {
{
.name = "default",
},
}
Definition at line 1518 of file af_arnndn.c.
◆ arnndn_options
◆ ff_af_arnndn
Initial value:= {
.name = "arnndn",
.priv_class = &arnndn_class,
}
Definition at line 1537 of file af_arnndn.c.
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in