FFmpeg
aacpsy.c
Go to the documentation of this file.
1 /*
2  * AAC encoder psychoacoustic model
3  * Copyright (C) 2008 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * AAC encoder psychoacoustic model
25  */
26 
27 #include "libavutil/attributes.h"
28 #include "libavutil/ffmath.h"
29 #include "libavutil/mem.h"
30 
31 #include "avcodec.h"
32 #include "aac.h"
33 #include "psymodel.h"
34 
35 /***********************************
36  * TODOs:
37  * try other bitrate controlling mechanism (maybe use ratecontrol.c?)
38  * control quality for quality-based output
39  **********************************/
40 
41 /**
42  * constants for 3GPP AAC psychoacoustic model
43  * @{
44  */
45 #define PSY_3GPP_THR_SPREAD_HI 1.5f // spreading factor for low-to-hi threshold spreading (15 dB/Bark)
46 #define PSY_3GPP_THR_SPREAD_LOW 3.0f // spreading factor for hi-to-low threshold spreading (30 dB/Bark)
47 /* spreading factor for low-to-hi energy spreading, long block, > 22kbps/channel (20dB/Bark) */
48 #define PSY_3GPP_EN_SPREAD_HI_L1 2.0f
49 /* spreading factor for low-to-hi energy spreading, long block, <= 22kbps/channel (15dB/Bark) */
50 #define PSY_3GPP_EN_SPREAD_HI_L2 1.5f
51 /* spreading factor for low-to-hi energy spreading, short block (15 dB/Bark) */
52 #define PSY_3GPP_EN_SPREAD_HI_S 1.5f
53 /* spreading factor for hi-to-low energy spreading, long block (30dB/Bark) */
54 #define PSY_3GPP_EN_SPREAD_LOW_L 3.0f
55 /* spreading factor for hi-to-low energy spreading, short block (20dB/Bark) */
56 #define PSY_3GPP_EN_SPREAD_LOW_S 2.0f
57 
58 #define PSY_3GPP_RPEMIN 0.01f
59 #define PSY_3GPP_RPELEV 2.0f
60 
61 #define PSY_3GPP_C1 3.0f /* log2(8) */
62 #define PSY_3GPP_C2 1.3219281f /* log2(2.5) */
63 #define PSY_3GPP_C3 0.55935729f /* 1 - C2 / C1 */
64 
65 #define PSY_SNR_1DB 7.9432821e-1f /* -1dB */
66 #define PSY_SNR_25DB 3.1622776e-3f /* -25dB */
67 
68 #define PSY_3GPP_SAVE_SLOPE_L -0.46666667f
69 #define PSY_3GPP_SAVE_SLOPE_S -0.36363637f
70 #define PSY_3GPP_SAVE_ADD_L -0.84285712f
71 #define PSY_3GPP_SAVE_ADD_S -0.75f
72 #define PSY_3GPP_SPEND_SLOPE_L 0.66666669f
73 #define PSY_3GPP_SPEND_SLOPE_S 0.81818181f
74 #define PSY_3GPP_SPEND_ADD_L -0.35f
75 #define PSY_3GPP_SPEND_ADD_S -0.26111111f
76 #define PSY_3GPP_CLIP_LO_L 0.2f
77 #define PSY_3GPP_CLIP_LO_S 0.2f
78 #define PSY_3GPP_CLIP_HI_L 0.95f
79 #define PSY_3GPP_CLIP_HI_S 0.75f
80 
81 #define PSY_3GPP_AH_THR_LONG 0.5f
82 #define PSY_3GPP_AH_THR_SHORT 0.63f
83 
84 #define PSY_PE_FORGET_SLOPE 511
85 
86 enum {
90 };
91 
92 #define PSY_3GPP_BITS_TO_PE(bits) ((bits) * 1.18f)
93 #define PSY_3GPP_PE_TO_BITS(bits) ((bits) / 1.18f)
94 
95 /* LAME psy model constants */
96 #define PSY_LAME_FIR_LEN 21 ///< LAME psy model FIR order
97 #define AAC_BLOCK_SIZE_LONG 1024 ///< long block size
98 #define AAC_BLOCK_SIZE_SHORT 128 ///< short block size
99 #define AAC_NUM_BLOCKS_SHORT 8 ///< number of blocks in a short sequence
100 #define PSY_LAME_NUM_SUBBLOCKS 3 ///< Number of sub-blocks in each short block
101 
102 /**
103  * @}
104  */
105 
106 /**
107  * information for single band used by 3GPP TS26.403-inspired psychoacoustic model
108  */
109 typedef struct AacPsyBand{
110  float energy; ///< band energy
111  float thr; ///< energy threshold
112  float thr_quiet; ///< threshold in quiet
113  float nz_lines; ///< number of non-zero spectral lines
114  float active_lines; ///< number of active spectral lines
115  float pe; ///< perceptual entropy
116  float pe_const; ///< constant part of the PE calculation
117  float norm_fac; ///< normalization factor for linearization
118  int avoid_holes; ///< hole avoidance flag
119 }AacPsyBand;
120 
121 /**
122  * single/pair channel context for psychoacoustic model
123  */
124 typedef struct AacPsyChannel{
125  AacPsyBand band[128]; ///< bands information
126  AacPsyBand prev_band[128]; ///< bands information from the previous frame
127 
128  float win_energy; ///< sliding average of channel energy
129  float iir_state[2]; ///< hi-pass IIR filter state
130  uint8_t next_grouping; ///< stored grouping scheme for the next frame (in case of 8 short window sequence)
131  enum WindowSequence next_window_seq; ///< window sequence to be used in the next frame
132  /* LAME psy model specific members */
133  float attack_threshold; ///< attack threshold for this channel
135  int prev_attack; ///< attack value for the last short block in the previous sequence
137 
138 /**
139  * psychoacoustic model frame type-dependent coefficients
140  */
141 typedef struct AacPsyCoeffs{
142  float ath; ///< absolute threshold of hearing per bands
143  float barks; ///< Bark value for each spectral band in long frame
144  float spread_low[2]; ///< spreading factor for low-to-high threshold spreading in long frame
145  float spread_hi [2]; ///< spreading factor for high-to-low threshold spreading in long frame
146  float min_snr; ///< minimal SNR
147 }AacPsyCoeffs;
148 
149 /**
150  * 3GPP TS26.403-inspired psychoacoustic model specific data
151  */
152 typedef struct AacPsyContext{
153  int chan_bitrate; ///< bitrate per channel
154  int frame_bits; ///< average bits per frame
155  int fill_level; ///< bit reservoir fill level
156  struct {
157  float min; ///< minimum allowed PE for bit factor calculation
158  float max; ///< maximum allowed PE for bit factor calculation
159  float previous; ///< allowed PE of the previous frame
160  float correction; ///< PE correction factor
161  } pe;
164  float global_quality; ///< normalized global quality taken from avctx
166 
167 /**
168  * LAME psy model preset struct
169  */
170 typedef struct PsyLamePreset {
171  int quality; ///< Quality to map the rest of the vaules to.
172  /* This is overloaded to be both kbps per channel in ABR mode, and
173  * requested quality in constant quality mode.
174  */
175  float st_lrm; ///< short threshold for L, R, and M channels
176 } PsyLamePreset;
177 
178 /**
179  * LAME psy model preset table for ABR
180  */
181 static const PsyLamePreset psy_abr_map[] = {
182 /* TODO: Tuning. These were taken from LAME. */
183 /* kbps/ch st_lrm */
184  { 8, 6.60},
185  { 16, 6.60},
186  { 24, 6.60},
187  { 32, 6.60},
188  { 40, 6.60},
189  { 48, 6.60},
190  { 56, 6.60},
191  { 64, 6.40},
192  { 80, 6.00},
193  { 96, 5.60},
194  {112, 5.20},
195  {128, 5.20},
196  {160, 5.20}
197 };
198 
199 /**
200 * LAME psy model preset table for constant quality
201 */
202 static const PsyLamePreset psy_vbr_map[] = {
203 /* vbr_q st_lrm */
204  { 0, 4.20},
205  { 1, 4.20},
206  { 2, 4.20},
207  { 3, 4.20},
208  { 4, 4.20},
209  { 5, 4.20},
210  { 6, 4.20},
211  { 7, 4.20},
212  { 8, 4.20},
213  { 9, 4.20},
214  {10, 4.20}
215 };
216 
217 /**
218  * LAME psy model FIR coefficient table
219  */
220 static const float psy_fir_coeffs[] = {
221  -8.65163e-18 * 2, -0.00851586 * 2, -6.74764e-18 * 2, 0.0209036 * 2,
222  -3.36639e-17 * 2, -0.0438162 * 2, -1.54175e-17 * 2, 0.0931738 * 2,
223  -5.52212e-17 * 2, -0.313819 * 2
224 };
225 
226 #if ARCH_MIPS
227 # include "mips/aacpsy_mips.h"
228 #endif /* ARCH_MIPS */
229 
230 /**
231  * Calculate the ABR attack threshold from the above LAME psymodel table.
232  */
234 {
235  /* Assume max bitrate to start with */
236  int lower_range = 12, upper_range = 12;
237  int lower_range_kbps = psy_abr_map[12].quality;
238  int upper_range_kbps = psy_abr_map[12].quality;
239  int i;
240 
241  /* Determine which bitrates the value specified falls between.
242  * If the loop ends without breaking our above assumption of 320kbps was correct.
243  */
244  for (i = 1; i < 13; i++) {
246  upper_range = i;
247  upper_range_kbps = psy_abr_map[i ].quality;
248  lower_range = i - 1;
249  lower_range_kbps = psy_abr_map[i - 1].quality;
250  break; /* Upper range found */
251  }
252  }
253 
254  /* Determine which range the value specified is closer to */
255  if ((upper_range_kbps - bitrate) > (bitrate - lower_range_kbps))
256  return psy_abr_map[lower_range].st_lrm;
257  return psy_abr_map[upper_range].st_lrm;
258 }
259 
260 /**
261  * LAME psy model specific initialization
262  */
264 {
265  int i, j;
266 
267  for (i = 0; i < avctx->ch_layout.nb_channels; i++) {
268  AacPsyChannel *pch = &ctx->ch[i];
269 
270  if (avctx->flags & AV_CODEC_FLAG_QSCALE)
272  else
274 
275  for (j = 0; j < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; j++)
276  pch->prev_energy_subshort[j] = 10.0f;
277  }
278 }
279 
280 /**
281  * Calculate Bark value for given line.
282  */
283 static av_cold float calc_bark(float f)
284 {
285  return 13.3f * atanf(0.00076f * f) + 3.5f * atanf((f / 7500.0f) * (f / 7500.0f));
286 }
287 
288 #define ATH_ADD 4
289 /**
290  * Calculate ATH value for given frequency.
291  * Borrowed from Lame.
292  */
293 static av_cold float ath(float f, float add)
294 {
295  f /= 1000.0f;
296  return 3.64 * pow(f, -0.8)
297  - 6.8 * exp(-0.6 * (f - 3.4) * (f - 3.4))
298  + 6.0 * exp(-0.15 * (f - 8.7) * (f - 8.7))
299  + (0.6 + 0.04 * add) * 0.001 * f * f * f * f;
300 }
301 
303  AacPsyContext *pctx;
304  float bark;
305  int i, j, g, start;
306  float prev, minscale, minath, minsnr, pe_min;
307  int chan_bitrate = ctx->avctx->bit_rate / ((ctx->avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : ctx->avctx->ch_layout.nb_channels);
308 
309  const int bandwidth = ctx->cutoff ? ctx->cutoff : AAC_CUTOFF(ctx->avctx);
310  const float num_bark = calc_bark((float)bandwidth);
311 
312  if (bandwidth <= 0)
313  return AVERROR(EINVAL);
314 
315  ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext));
316  if (!ctx->model_priv_data)
317  return AVERROR(ENOMEM);
318  pctx = ctx->model_priv_data;
319  pctx->global_quality = (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120) * 0.01f;
320 
321  if (ctx->avctx->flags & AV_CODEC_FLAG_QSCALE) {
322  /* Use the target average bitrate to compute spread parameters */
323  chan_bitrate = (int)(chan_bitrate / 120.0 * (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120));
324  }
325 
326  pctx->chan_bitrate = chan_bitrate;
327  pctx->frame_bits = FFMIN(2560, chan_bitrate * AAC_BLOCK_SIZE_LONG / ctx->avctx->sample_rate);
328  pctx->pe.min = 8.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f);
329  pctx->pe.max = 12.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f);
330  ctx->bitres.size = 6144 - pctx->frame_bits;
331  ctx->bitres.size -= ctx->bitres.size % 8;
332  pctx->fill_level = ctx->bitres.size;
333  minath = ath(3410 - 0.733 * ATH_ADD, ATH_ADD);
334  for (j = 0; j < 2; j++) {
335  AacPsyCoeffs *coeffs = pctx->psy_coef[j];
336  const uint8_t *band_sizes = ctx->bands[j];
337  float line_to_frequency = ctx->avctx->sample_rate / (j ? 256.f : 2048.0f);
338  float avg_chan_bits = chan_bitrate * (j ? 128.0f : 1024.0f) / ctx->avctx->sample_rate;
339  /* reference encoder uses 2.4% here instead of 60% like the spec says */
340  float bark_pe = 0.024f * PSY_3GPP_BITS_TO_PE(avg_chan_bits) / num_bark;
341  float en_spread_low = j ? PSY_3GPP_EN_SPREAD_LOW_S : PSY_3GPP_EN_SPREAD_LOW_L;
342  /* High energy spreading for long blocks <= 22kbps/channel and short blocks are the same. */
343  float en_spread_hi = (j || (chan_bitrate <= 22.0f)) ? PSY_3GPP_EN_SPREAD_HI_S : PSY_3GPP_EN_SPREAD_HI_L1;
344 
345  i = 0;
346  prev = 0.0;
347  for (g = 0; g < ctx->num_bands[j]; g++) {
348  i += band_sizes[g];
349  bark = calc_bark((i-1) * line_to_frequency);
350  coeffs[g].barks = (bark + prev) / 2.0;
351  prev = bark;
352  }
353  for (g = 0; g < ctx->num_bands[j] - 1; g++) {
354  AacPsyCoeffs *coeff = &coeffs[g];
355  float bark_width = coeffs[g+1].barks - coeffs->barks;
356  coeff->spread_low[0] = ff_exp10(-bark_width * PSY_3GPP_THR_SPREAD_LOW);
357  coeff->spread_hi [0] = ff_exp10(-bark_width * PSY_3GPP_THR_SPREAD_HI);
358  coeff->spread_low[1] = ff_exp10(-bark_width * en_spread_low);
359  coeff->spread_hi [1] = ff_exp10(-bark_width * en_spread_hi);
360  pe_min = bark_pe * bark_width;
361  minsnr = exp2(pe_min / band_sizes[g]) - 1.5f;
362  coeff->min_snr = av_clipf(1.0f / minsnr, PSY_SNR_25DB, PSY_SNR_1DB);
363  }
364  start = 0;
365  for (g = 0; g < ctx->num_bands[j]; g++) {
366  minscale = ath(start * line_to_frequency, ATH_ADD);
367  for (i = 1; i < band_sizes[g]; i++)
368  minscale = FFMIN(minscale, ath((start + i) * line_to_frequency, ATH_ADD));
369  coeffs[g].ath = minscale - minath;
370  start += band_sizes[g];
371  }
372  }
373 
374  pctx->ch = av_calloc(ctx->avctx->ch_layout.nb_channels, sizeof(*pctx->ch));
375  if (!pctx->ch) {
376  av_freep(&ctx->model_priv_data);
377  return AVERROR(ENOMEM);
378  }
379 
380  lame_window_init(pctx, ctx->avctx);
381 
382  return 0;
383 }
384 
385 /**
386  * IIR filter used in block switching decision
387  */
388 static float iir_filter(int in, float state[2])
389 {
390  float ret;
391 
392  ret = 0.7548f * (in - state[0]) + 0.5095f * state[1];
393  state[0] = in;
394  state[1] = ret;
395  return ret;
396 }
397 
398 /**
399  * window grouping information stored as bits (0 - new group, 1 - group continues)
400  */
401 static const uint8_t window_grouping[9] = {
402  0xB6, 0x6C, 0xD8, 0xB2, 0x66, 0xC6, 0x96, 0x36, 0x36
403 };
404 
405 /**
406  * Tell encoder which window types to use.
407  * @see 3GPP TS26.403 5.4.1 "Blockswitching"
408  */
410  const int16_t *audio,
411  const int16_t *la,
412  int channel, int prev_type)
413 {
414  int i, j;
415  int br = ((AacPsyContext*)ctx->model_priv_data)->chan_bitrate;
416  int attack_ratio = br <= 16000 ? 18 : 10;
417  AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
418  AacPsyChannel *pch = &pctx->ch[channel];
419  uint8_t grouping = 0;
420  int next_type = pch->next_window_seq;
421  FFPsyWindowInfo wi = { { 0 } };
422 
423  if (la) {
424  float s[8], v;
425  int switch_to_eight = 0;
426  float sum = 0.0, sum2 = 0.0;
427  int attack_n = 0;
428  int stay_short = 0;
429  for (i = 0; i < 8; i++) {
430  for (j = 0; j < 128; j++) {
431  v = iir_filter(la[i*128+j], pch->iir_state);
432  sum += v*v;
433  }
434  s[i] = sum;
435  sum2 += sum;
436  }
437  for (i = 0; i < 8; i++) {
438  if (s[i] > pch->win_energy * attack_ratio) {
439  attack_n = i + 1;
440  switch_to_eight = 1;
441  break;
442  }
443  }
444  pch->win_energy = pch->win_energy*7/8 + sum2/64;
445 
446  wi.window_type[1] = prev_type;
447  switch (prev_type) {
448  case ONLY_LONG_SEQUENCE:
449  wi.window_type[0] = switch_to_eight ? LONG_START_SEQUENCE : ONLY_LONG_SEQUENCE;
450  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : ONLY_LONG_SEQUENCE;
451  break;
452  case LONG_START_SEQUENCE:
453  wi.window_type[0] = EIGHT_SHORT_SEQUENCE;
454  grouping = pch->next_grouping;
455  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
456  break;
457  case LONG_STOP_SEQUENCE:
458  wi.window_type[0] = switch_to_eight ? LONG_START_SEQUENCE : ONLY_LONG_SEQUENCE;
459  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : ONLY_LONG_SEQUENCE;
460  break;
462  stay_short = next_type == EIGHT_SHORT_SEQUENCE || switch_to_eight;
463  wi.window_type[0] = stay_short ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
464  grouping = next_type == EIGHT_SHORT_SEQUENCE ? pch->next_grouping : 0;
465  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
466  break;
467  }
468 
469  pch->next_grouping = window_grouping[attack_n];
470  pch->next_window_seq = next_type;
471  } else {
472  for (i = 0; i < 3; i++)
473  wi.window_type[i] = prev_type;
474  grouping = (prev_type == EIGHT_SHORT_SEQUENCE) ? window_grouping[0] : 0;
475  }
476 
477  wi.window_shape = 1;
478  if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
479  wi.num_windows = 1;
480  wi.grouping[0] = 1;
481  } else {
482  int lastgrp = 0;
483  wi.num_windows = 8;
484  for (i = 0; i < 8; i++) {
485  if (!((grouping >> i) & 1))
486  lastgrp = i;
487  wi.grouping[lastgrp]++;
488  }
489  }
490 
491  return wi;
492 }
493 
494 /* 5.6.1.2 "Calculation of Bit Demand" */
495 static int calc_bit_demand(AacPsyContext *ctx, float pe, int bits, int size,
496  int short_window)
497 {
498  const float bitsave_slope = short_window ? PSY_3GPP_SAVE_SLOPE_S : PSY_3GPP_SAVE_SLOPE_L;
499  const float bitsave_add = short_window ? PSY_3GPP_SAVE_ADD_S : PSY_3GPP_SAVE_ADD_L;
500  const float bitspend_slope = short_window ? PSY_3GPP_SPEND_SLOPE_S : PSY_3GPP_SPEND_SLOPE_L;
501  const float bitspend_add = short_window ? PSY_3GPP_SPEND_ADD_S : PSY_3GPP_SPEND_ADD_L;
502  const float clip_low = short_window ? PSY_3GPP_CLIP_LO_S : PSY_3GPP_CLIP_LO_L;
503  const float clip_high = short_window ? PSY_3GPP_CLIP_HI_S : PSY_3GPP_CLIP_HI_L;
504  float clipped_pe, bit_save, bit_spend, bit_factor, fill_level, forgetful_min_pe;
505 
506  ctx->fill_level += ctx->frame_bits - bits;
507  ctx->fill_level = av_clip(ctx->fill_level, 0, size);
508  fill_level = av_clipf((float)ctx->fill_level / size, clip_low, clip_high);
509  clipped_pe = av_clipf(pe, ctx->pe.min, ctx->pe.max);
510  bit_save = (fill_level + bitsave_add) * bitsave_slope;
511  assert(bit_save <= 0.3f && bit_save >= -0.05000001f);
512  bit_spend = (fill_level + bitspend_add) * bitspend_slope;
513  assert(bit_spend <= 0.5f && bit_spend >= -0.1f);
514  /* The bit factor graph in the spec is obviously incorrect.
515  * bit_spend + ((bit_spend - bit_spend))...
516  * The reference encoder subtracts everything from 1, but also seems incorrect.
517  * 1 - bit_save + ((bit_spend + bit_save))...
518  * Hopefully below is correct.
519  */
520  bit_factor = 1.0f - bit_save + ((bit_spend - bit_save) / (ctx->pe.max - ctx->pe.min)) * (clipped_pe - ctx->pe.min);
521  /* NOTE: The reference encoder attempts to center pe max/min around the current pe.
522  * Here we do that by slowly forgetting pe.min when pe stays in a range that makes
523  * it unlikely (ie: above the mean)
524  */
525  ctx->pe.max = FFMAX(pe, ctx->pe.max);
526  forgetful_min_pe = ((ctx->pe.min * PSY_PE_FORGET_SLOPE)
527  + FFMAX(ctx->pe.min, pe * (pe / ctx->pe.max))) / (PSY_PE_FORGET_SLOPE + 1);
528  ctx->pe.min = FFMIN(pe, forgetful_min_pe);
529 
530  /* NOTE: allocate a minimum of 1/8th average frame bits, to avoid
531  * reservoir starvation from producing zero-bit frames
532  */
533  return FFMIN(
534  ctx->frame_bits * bit_factor,
535  FFMAX(ctx->frame_bits + size - bits, ctx->frame_bits / 8));
536 }
537 
538 static float calc_pe_3gpp(AacPsyBand *band)
539 {
540  float pe, a;
541 
542  band->pe = 0.0f;
543  band->pe_const = 0.0f;
544  band->active_lines = 0.0f;
545  if (band->energy > band->thr) {
546  a = log2f(band->energy);
547  pe = a - log2f(band->thr);
548  band->active_lines = band->nz_lines;
549  if (pe < PSY_3GPP_C1) {
550  pe = pe * PSY_3GPP_C3 + PSY_3GPP_C2;
551  a = a * PSY_3GPP_C3 + PSY_3GPP_C2;
552  band->active_lines *= PSY_3GPP_C3;
553  }
554  band->pe = pe * band->nz_lines;
555  band->pe_const = a * band->nz_lines;
556  }
557 
558  return band->pe;
559 }
560 
561 static float calc_reduction_3gpp(float a, float desired_pe, float pe,
562  float active_lines)
563 {
564  float thr_avg, reduction;
565 
566  if(active_lines == 0.0)
567  return 0;
568 
569  thr_avg = exp2f((a - pe) / (4.0f * active_lines));
570  reduction = exp2f((a - desired_pe) / (4.0f * active_lines)) - thr_avg;
571 
572  return FFMAX(reduction, 0.0f);
573 }
574 
575 static float calc_reduced_thr_3gpp(AacPsyBand *band, float min_snr,
576  float reduction)
577 {
578  float thr = band->thr;
579 
580  if (band->energy > thr) {
581  thr = sqrtf(thr);
582  thr = sqrtf(thr) + reduction;
583  thr *= thr;
584  thr *= thr;
585 
586  /* This deviates from the 3GPP spec to match the reference encoder.
587  * It performs min(thr_reduced, max(thr, energy/min_snr)) only for bands
588  * that have hole avoidance on (active or inactive). It always reduces the
589  * threshold of bands with hole avoidance off.
590  */
591  if (thr > band->energy * min_snr && band->avoid_holes != PSY_3GPP_AH_NONE) {
592  thr = FFMAX(band->thr, band->energy * min_snr);
594  }
595  }
596 
597  return thr;
598 }
599 
600 #ifndef calc_thr_3gpp
601 static void calc_thr_3gpp(const FFPsyWindowInfo *wi, const int num_bands, AacPsyChannel *pch,
602  const uint8_t *band_sizes, const float *coefs, const int cutoff)
603 {
604  int i, w, g;
605  int start = 0, wstart = 0;
606  for (w = 0; w < wi->num_windows*16; w += 16) {
607  wstart = 0;
608  for (g = 0; g < num_bands; g++) {
609  AacPsyBand *band = &pch->band[w+g];
610 
611  float form_factor = 0.0f;
612  float Temp;
613  band->energy = 0.0f;
614  if (wstart < cutoff) {
615  for (i = 0; i < band_sizes[g]; i++) {
616  band->energy += coefs[start+i] * coefs[start+i];
617  form_factor += sqrtf(fabs(coefs[start+i]));
618  }
619  }
620  Temp = band->energy > 0 ? sqrtf((float)band_sizes[g] / band->energy) : 0;
621  band->thr = band->energy * 0.001258925f;
622  band->nz_lines = form_factor * sqrtf(Temp);
623 
624  start += band_sizes[g];
625  wstart += band_sizes[g];
626  }
627  }
628 }
629 #endif /* calc_thr_3gpp */
630 
631 #ifndef psy_hp_filter
632 static void psy_hp_filter(const float *firbuf, float *hpfsmpl, const float *psy_fir_coeffs)
633 {
634  int i, j;
635  for (i = 0; i < AAC_BLOCK_SIZE_LONG; i++) {
636  float sum1, sum2;
637  sum1 = firbuf[i + (PSY_LAME_FIR_LEN - 1) / 2];
638  sum2 = 0.0;
639  for (j = 0; j < ((PSY_LAME_FIR_LEN - 1) / 2) - 1; j += 2) {
640  sum1 += psy_fir_coeffs[j] * (firbuf[i + j] + firbuf[i + PSY_LAME_FIR_LEN - j]);
641  sum2 += psy_fir_coeffs[j + 1] * (firbuf[i + j + 1] + firbuf[i + PSY_LAME_FIR_LEN - j - 1]);
642  }
643  /* NOTE: The LAME psymodel expects it's input in the range -32768 to 32768.
644  * Tuning this for normalized floats would be difficult. */
645  hpfsmpl[i] = (sum1 + sum2) * 32768.0f;
646  }
647 }
648 #endif /* psy_hp_filter */
649 
650 /**
651  * Calculate band thresholds as suggested in 3GPP TS26.403
652  */
654  const float *coefs, const FFPsyWindowInfo *wi)
655 {
656  AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
657  AacPsyChannel *pch = &pctx->ch[channel];
658  int i, w, g;
659  float desired_bits, desired_pe, delta_pe, reduction= NAN, spread_en[128] = {0};
660  float a = 0.0f, active_lines = 0.0f, norm_fac = 0.0f;
661  float pe = pctx->chan_bitrate > 32000 ? 0.0f : FFMAX(50.0f, 100.0f - pctx->chan_bitrate * 100.0f / 32000.0f);
662  const int num_bands = ctx->num_bands[wi->num_windows == 8];
663  const uint8_t *band_sizes = ctx->bands[wi->num_windows == 8];
664  AacPsyCoeffs *coeffs = pctx->psy_coef[wi->num_windows == 8];
665  const float avoid_hole_thr = wi->num_windows == 8 ? PSY_3GPP_AH_THR_SHORT : PSY_3GPP_AH_THR_LONG;
666  const int bandwidth = ctx->cutoff ? ctx->cutoff : AAC_CUTOFF(ctx->avctx);
667  const int cutoff = bandwidth * 2048 / wi->num_windows / ctx->avctx->sample_rate;
668 
669  //calculate energies, initial thresholds and related values - 5.4.2 "Threshold Calculation"
670  calc_thr_3gpp(wi, num_bands, pch, band_sizes, coefs, cutoff);
671 
672  //modify thresholds and energies - spread, threshold in quiet, pre-echo control
673  for (w = 0; w < wi->num_windows*16; w += 16) {
674  AacPsyBand *bands = &pch->band[w];
675 
676  /* 5.4.2.3 "Spreading" & 5.4.3 "Spread Energy Calculation" */
677  spread_en[0] = bands[0].energy;
678  for (g = 1; g < num_bands; g++) {
679  bands[g].thr = FFMAX(bands[g].thr, bands[g-1].thr * coeffs[g].spread_hi[0]);
680  spread_en[w+g] = FFMAX(bands[g].energy, spread_en[w+g-1] * coeffs[g].spread_hi[1]);
681  }
682  for (g = num_bands - 2; g >= 0; g--) {
683  bands[g].thr = FFMAX(bands[g].thr, bands[g+1].thr * coeffs[g].spread_low[0]);
684  spread_en[w+g] = FFMAX(spread_en[w+g], spread_en[w+g+1] * coeffs[g].spread_low[1]);
685  }
686  //5.4.2.4 "Threshold in quiet"
687  for (g = 0; g < num_bands; g++) {
688  AacPsyBand *band = &bands[g];
689 
690  band->thr_quiet = band->thr = FFMAX(band->thr, coeffs[g].ath);
691  //5.4.2.5 "Pre-echo control"
692  if (!(wi->window_type[0] == LONG_STOP_SEQUENCE || (!w && wi->window_type[1] == LONG_START_SEQUENCE)))
693  band->thr = FFMAX(PSY_3GPP_RPEMIN*band->thr, FFMIN(band->thr,
694  PSY_3GPP_RPELEV*pch->prev_band[w+g].thr_quiet));
695 
696  /* 5.6.1.3.1 "Preparatory steps of the perceptual entropy calculation" */
697  pe += calc_pe_3gpp(band);
698  a += band->pe_const;
699  active_lines += band->active_lines;
700 
701  /* 5.6.1.3.3 "Selection of the bands for avoidance of holes" */
702  if (spread_en[w+g] * avoid_hole_thr > band->energy || coeffs[g].min_snr > 1.0f)
704  else
706  }
707  }
708 
709  /* 5.6.1.3.2 "Calculation of the desired perceptual entropy" */
710  ctx->ch[channel].entropy = pe;
711  if (ctx->avctx->flags & AV_CODEC_FLAG_QSCALE) {
712  /* (2.5 * 120) achieves almost transparent rate, and we want to give
713  * ample room downwards, so we make that equivalent to QSCALE=2.4
714  */
715  desired_pe = pe * (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120) / (2 * 2.5f * 120.0f);
716  desired_bits = FFMIN(2560, PSY_3GPP_PE_TO_BITS(desired_pe));
717  desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); // reflect clipping
718 
719  /* PE slope smoothing */
720  if (ctx->bitres.bits > 0) {
721  desired_bits = FFMIN(2560, PSY_3GPP_PE_TO_BITS(desired_pe));
722  desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); // reflect clipping
723  }
724 
725  pctx->pe.max = FFMAX(pe, pctx->pe.max);
726  pctx->pe.min = FFMIN(pe, pctx->pe.min);
727  } else {
728  desired_bits = calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8);
729  desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits);
730 
731  /* NOTE: PE correction is kept simple. During initial testing it had very
732  * little effect on the final bitrate. Probably a good idea to come
733  * back and do more testing later.
734  */
735  if (ctx->bitres.bits > 0)
736  desired_pe *= av_clipf(pctx->pe.previous / PSY_3GPP_BITS_TO_PE(ctx->bitres.bits),
737  0.85f, 1.15f);
738  }
739  pctx->pe.previous = PSY_3GPP_BITS_TO_PE(desired_bits);
740  ctx->bitres.alloc = desired_bits;
741 
742  if (desired_pe < pe) {
743  /* 5.6.1.3.4 "First Estimation of the reduction value" */
744  for (w = 0; w < wi->num_windows*16; w += 16) {
745  reduction = calc_reduction_3gpp(a, desired_pe, pe, active_lines);
746  pe = 0.0f;
747  a = 0.0f;
748  active_lines = 0.0f;
749  for (g = 0; g < num_bands; g++) {
750  AacPsyBand *band = &pch->band[w+g];
751 
752  band->thr = calc_reduced_thr_3gpp(band, coeffs[g].min_snr, reduction);
753  /* recalculate PE */
754  pe += calc_pe_3gpp(band);
755  a += band->pe_const;
756  active_lines += band->active_lines;
757  }
758  }
759 
760  /* 5.6.1.3.5 "Second Estimation of the reduction value" */
761  for (i = 0; i < 2; i++) {
762  float pe_no_ah = 0.0f, desired_pe_no_ah;
763  active_lines = a = 0.0f;
764  for (w = 0; w < wi->num_windows*16; w += 16) {
765  for (g = 0; g < num_bands; g++) {
766  AacPsyBand *band = &pch->band[w+g];
767 
768  if (band->avoid_holes != PSY_3GPP_AH_ACTIVE) {
769  pe_no_ah += band->pe;
770  a += band->pe_const;
771  active_lines += band->active_lines;
772  }
773  }
774  }
775  desired_pe_no_ah = FFMAX(desired_pe - (pe - pe_no_ah), 0.0f);
776  if (active_lines > 0.0f)
777  reduction = calc_reduction_3gpp(a, desired_pe_no_ah, pe_no_ah, active_lines);
778 
779  pe = 0.0f;
780  for (w = 0; w < wi->num_windows*16; w += 16) {
781  for (g = 0; g < num_bands; g++) {
782  AacPsyBand *band = &pch->band[w+g];
783 
784  if (active_lines > 0.0f)
785  band->thr = calc_reduced_thr_3gpp(band, coeffs[g].min_snr, reduction);
786  pe += calc_pe_3gpp(band);
787  if (band->thr > 0.0f)
788  band->norm_fac = band->active_lines / band->thr;
789  else
790  band->norm_fac = 0.0f;
791  norm_fac += band->norm_fac;
792  }
793  }
794  delta_pe = desired_pe - pe;
795  if (fabs(delta_pe) > 0.05f * desired_pe)
796  break;
797  }
798 
799  if (pe < 1.15f * desired_pe) {
800  /* 6.6.1.3.6 "Final threshold modification by linearization" */
801  norm_fac = norm_fac ? 1.0f / norm_fac : 0;
802  for (w = 0; w < wi->num_windows*16; w += 16) {
803  for (g = 0; g < num_bands; g++) {
804  AacPsyBand *band = &pch->band[w+g];
805 
806  if (band->active_lines > 0.5f) {
807  float delta_sfb_pe = band->norm_fac * norm_fac * delta_pe;
808  float thr = band->thr;
809 
810  thr *= exp2f(delta_sfb_pe / band->active_lines);
811  if (thr > coeffs[g].min_snr * band->energy && band->avoid_holes == PSY_3GPP_AH_INACTIVE)
812  thr = FFMAX(band->thr, coeffs[g].min_snr * band->energy);
813  band->thr = thr;
814  }
815  }
816  }
817  } else {
818  /* 5.6.1.3.7 "Further perceptual entropy reduction" */
819  g = num_bands;
820  while (pe > desired_pe && g--) {
821  for (w = 0; w < wi->num_windows*16; w+= 16) {
822  AacPsyBand *band = &pch->band[w+g];
823  if (band->avoid_holes != PSY_3GPP_AH_NONE && coeffs[g].min_snr < PSY_SNR_1DB) {
824  coeffs[g].min_snr = PSY_SNR_1DB;
825  band->thr = band->energy * PSY_SNR_1DB;
826  pe += band->active_lines * 1.5f - band->pe;
827  }
828  }
829  }
830  /* TODO: allow more holes (unused without mid/side) */
831  }
832  }
833 
834  for (w = 0; w < wi->num_windows*16; w += 16) {
835  for (g = 0; g < num_bands; g++) {
836  AacPsyBand *band = &pch->band[w+g];
837  FFPsyBand *psy_band = &ctx->ch[channel].psy_bands[w+g];
838 
839  psy_band->threshold = band->thr;
840  psy_band->energy = band->energy;
841  psy_band->spread = band->active_lines * 2.0f / band_sizes[g];
842  psy_band->bits = PSY_3GPP_PE_TO_BITS(band->pe);
843  }
844  }
845 
846  memcpy(pch->prev_band, pch->band, sizeof(pch->band));
847 }
848 
850  const float **coeffs, const FFPsyWindowInfo *wi)
851 {
852  int ch;
854 
855  for (ch = 0; ch < group->num_ch; ch++)
856  psy_3gpp_analyze_channel(ctx, channel + ch, coeffs[ch], &wi[ch]);
857 }
858 
860 {
862  if (pctx)
863  av_freep(&pctx->ch);
864  av_freep(&apc->model_priv_data);
865 }
866 
867 static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int uselongblock)
868 {
869  int blocktype = ONLY_LONG_SEQUENCE;
870  if (uselongblock) {
871  if (ctx->next_window_seq == EIGHT_SHORT_SEQUENCE)
872  blocktype = LONG_STOP_SEQUENCE;
873  } else {
874  blocktype = EIGHT_SHORT_SEQUENCE;
875  if (ctx->next_window_seq == ONLY_LONG_SEQUENCE)
876  ctx->next_window_seq = LONG_START_SEQUENCE;
877  if (ctx->next_window_seq == LONG_STOP_SEQUENCE)
878  ctx->next_window_seq = EIGHT_SHORT_SEQUENCE;
879  }
880 
881  wi->window_type[0] = ctx->next_window_seq;
882  ctx->next_window_seq = blocktype;
883 }
884 
885 static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
886  const float *la, int channel, int prev_type)
887 {
888  AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
889  AacPsyChannel *pch = &pctx->ch[channel];
890  int grouping = 0;
891  int uselongblock = 1;
892  int attacks[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
893  int i;
894  FFPsyWindowInfo wi = { { 0 } };
895 
896  if (la) {
897  float hpfsmpl[AAC_BLOCK_SIZE_LONG];
898  const float *pf = hpfsmpl;
899  float attack_intensity[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS];
900  float energy_subshort[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS];
901  float energy_short[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
902  const float *firbuf = la + (AAC_BLOCK_SIZE_SHORT/4 - PSY_LAME_FIR_LEN);
903  int att_sum = 0;
904 
905  /* LAME comment: apply high pass filter of fs/4 */
906  psy_hp_filter(firbuf, hpfsmpl, psy_fir_coeffs);
907 
908  /* Calculate the energies of each sub-shortblock */
909  for (i = 0; i < PSY_LAME_NUM_SUBBLOCKS; i++) {
910  energy_subshort[i] = pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS)];
911  assert(pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)] > 0);
912  attack_intensity[i] = energy_subshort[i] / pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)];
913  energy_short[0] += energy_subshort[i];
914  }
915 
916  for (i = 0; i < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; i++) {
917  const float *const pfe = pf + AAC_BLOCK_SIZE_LONG / (AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS);
918  float p = 1.0f;
919  for (; pf < pfe; pf++)
920  p = FFMAX(p, fabsf(*pf));
921  pch->prev_energy_subshort[i] = energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS] = p;
922  energy_short[1 + i / PSY_LAME_NUM_SUBBLOCKS] += p;
923  /* NOTE: The indexes below are [i + 3 - 2] in the LAME source.
924  * Obviously the 3 and 2 have some significance, or this would be just [i + 1]
925  * (which is what we use here). What the 3 stands for is ambiguous, as it is both
926  * number of short blocks, and the number of sub-short blocks.
927  * It seems that LAME is comparing each sub-block to sub-block + 1 in the
928  * previous block.
929  */
930  if (p > energy_subshort[i + 1])
931  p = p / energy_subshort[i + 1];
932  else if (energy_subshort[i + 1] > p * 10.0f)
933  p = energy_subshort[i + 1] / (p * 10.0f);
934  else
935  p = 0.0;
936  attack_intensity[i + PSY_LAME_NUM_SUBBLOCKS] = p;
937  }
938 
939  /* compare energy between sub-short blocks */
940  for (i = 0; i < (AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS; i++)
941  if (!attacks[i / PSY_LAME_NUM_SUBBLOCKS])
942  if (attack_intensity[i] > pch->attack_threshold)
943  attacks[i / PSY_LAME_NUM_SUBBLOCKS] = (i % PSY_LAME_NUM_SUBBLOCKS) + 1;
944 
945  /* should have energy change between short blocks, in order to avoid periodic signals */
946  /* Good samples to show the effect are Trumpet test songs */
947  /* GB: tuned (1) to avoid too many short blocks for test sample TRUMPET */
948  /* RH: tuned (2) to let enough short blocks through for test sample FSOL and SNAPS */
949  for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++) {
950  const float u = energy_short[i - 1];
951  const float v = energy_short[i];
952  const float m = FFMAX(u, v);
953  if (m < 40000) { /* (2) */
954  if (u < 1.7f * v && v < 1.7f * u) { /* (1) */
955  if (i == 1 && attacks[0] < attacks[i])
956  attacks[0] = 0;
957  attacks[i] = 0;
958  }
959  }
960  att_sum += attacks[i];
961  }
962 
963  if (attacks[0] <= pch->prev_attack)
964  attacks[0] = 0;
965 
966  att_sum += attacks[0];
967  /* 3 below indicates the previous attack happened in the last sub-block of the previous sequence */
968  if (pch->prev_attack == 3 || att_sum) {
969  uselongblock = 0;
970 
971  for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++)
972  if (attacks[i] && attacks[i-1])
973  attacks[i] = 0;
974  }
975  } else {
976  /* We have no lookahead info, so just use same type as the previous sequence. */
977  uselongblock = !(prev_type == EIGHT_SHORT_SEQUENCE);
978  }
979 
980  lame_apply_block_type(pch, &wi, uselongblock);
981 
982  wi.window_type[1] = prev_type;
983  if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
984 
985  wi.num_windows = 1;
986  wi.grouping[0] = 1;
987  if (wi.window_type[0] == LONG_START_SEQUENCE)
988  wi.window_shape = 0;
989  else
990  wi.window_shape = 1;
991 
992  } else {
993  int lastgrp = 0;
994 
995  wi.num_windows = 8;
996  wi.window_shape = 0;
997  for (i = 0; i < 8; i++) {
998  if (!((pch->next_grouping >> i) & 1))
999  lastgrp = i;
1000  wi.grouping[lastgrp]++;
1001  }
1002  }
1003 
1004  /* Determine grouping, based on the location of the first attack, and save for
1005  * the next frame.
1006  * FIXME: Move this to analysis.
1007  * TODO: Tune groupings depending on attack location
1008  * TODO: Handle more than one attack in a group
1009  */
1010  for (i = 0; i < 9; i++) {
1011  if (attacks[i]) {
1012  grouping = i;
1013  break;
1014  }
1015  }
1016  pch->next_grouping = window_grouping[grouping];
1017 
1018  pch->prev_attack = attacks[8];
1019 
1020  return wi;
1021 }
1022 
1024 {
1025  .name = "3GPP TS 26.403-inspired model",
1026  .init = psy_3gpp_init,
1027  .window = psy_lame_window,
1028  .analyze = psy_3gpp_analyze,
1029  .end = psy_3gpp_end,
1030 };
AacPsyCoeffs::spread_low
float spread_low[2]
spreading factor for low-to-high threshold spreading in long frame
Definition: aacpsy.c:144
ff_exp10
static av_always_inline double ff_exp10(double x)
Compute 10^x for floating point values.
Definition: ffmath.h:42
av_clip
#define av_clip
Definition: common.h:99
psy_3gpp_init
static av_cold int psy_3gpp_init(FFPsyContext *ctx)
Definition: aacpsy.c:302
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
psy_3gpp_window
static av_unused FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type)
Tell encoder which window types to use.
Definition: aacpsy.c:409
lame_calc_attack_threshold
static float lame_calc_attack_threshold(int bitrate)
Calculate the ABR attack threshold from the above LAME psymodel table.
Definition: aacpsy.c:233
FFPsyModel::name
const char * name
Definition: psymodel.h:115
u
#define u(width, name, range_min, range_max)
Definition: cbs_h2645.c:251
PSY_PE_FORGET_SLOPE
#define PSY_PE_FORGET_SLOPE
Definition: aacpsy.c:84
psy_lame_window
static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type)
Definition: aacpsy.c:885
log2f
#define log2f(x)
Definition: libm.h:409
AacPsyBand::thr
float thr
energy threshold
Definition: aacpsy.c:111
calc_thr_3gpp
static void calc_thr_3gpp(const FFPsyWindowInfo *wi, const int num_bands, AacPsyChannel *pch, const uint8_t *band_sizes, const float *coefs, const int cutoff)
Definition: aacpsy.c:601
PSY_3GPP_PE_TO_BITS
#define PSY_3GPP_PE_TO_BITS(bits)
Definition: aacpsy.c:93
AV_CODEC_FLAG_QSCALE
#define AV_CODEC_FLAG_QSCALE
Use fixed qscale.
Definition: avcodec.h:224
calc_bark
static av_cold float calc_bark(float f)
Calculate Bark value for given line.
Definition: aacpsy.c:283
AacPsyBand::nz_lines
float nz_lines
number of non-zero spectral lines
Definition: aacpsy.c:113
av_unused
#define av_unused
Definition: attributes.h:131
PSY_3GPP_CLIP_LO_S
#define PSY_3GPP_CLIP_LO_S
Definition: aacpsy.c:77
w
uint8_t w
Definition: llviddspenc.c:38
PSY_3GPP_AH_THR_LONG
#define PSY_3GPP_AH_THR_LONG
Definition: aacpsy.c:81
FFPsyWindowInfo::window_shape
int window_shape
window shape (sine/KBD/whatever)
Definition: psymodel.h:79
PSY_3GPP_AH_INACTIVE
@ PSY_3GPP_AH_INACTIVE
Definition: aacpsy.c:88
PSY_SNR_1DB
#define PSY_SNR_1DB
Definition: aacpsy.c:65
calc_pe_3gpp
static float calc_pe_3gpp(AacPsyBand *band)
Definition: aacpsy.c:538
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
AacPsyContext::min
float min
minimum allowed PE for bit factor calculation
Definition: aacpsy.c:157
PSY_3GPP_SPEND_SLOPE_L
#define PSY_3GPP_SPEND_SLOPE_L
Definition: aacpsy.c:72
PSY_3GPP_THR_SPREAD_HI
#define PSY_3GPP_THR_SPREAD_HI
constants for 3GPP AAC psychoacoustic model
Definition: aacpsy.c:45
AacPsyContext::fill_level
int fill_level
bit reservoir fill level
Definition: aacpsy.c:155
AVChannelLayout::nb_channels
int nb_channels
Number of channels in this layout.
Definition: channel_layout.h:313
AacPsyCoeffs::spread_hi
float spread_hi[2]
spreading factor for high-to-low threshold spreading in long frame
Definition: aacpsy.c:145
quality
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about quality
Definition: rate_distortion.txt:12
lame_apply_block_type
static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int uselongblock)
Definition: aacpsy.c:867
AacPsyCoeffs
psychoacoustic model frame type-dependent coefficients
Definition: aacpsy.c:141
AVCodecContext::ch_layout
AVChannelLayout ch_layout
Audio channel layout.
Definition: avcodec.h:1065
lame_window_init
static av_cold void lame_window_init(AacPsyContext *ctx, AVCodecContext *avctx)
LAME psy model specific initialization.
Definition: aacpsy.c:263
PsyLamePreset::st_lrm
float st_lrm
short threshold for L, R, and M channels
Definition: aacpsy.c:175
PSY_3GPP_EN_SPREAD_HI_S
#define PSY_3GPP_EN_SPREAD_HI_S
Definition: aacpsy.c:52
PSY_3GPP_SPEND_ADD_L
#define PSY_3GPP_SPEND_ADD_L
Definition: aacpsy.c:74
AVCodecContext::flags
int flags
AV_CODEC_FLAG_*.
Definition: avcodec.h:502
AacPsyCoeffs::barks
float barks
Bark value for each spectral band in long frame.
Definition: aacpsy.c:143
AacPsyChannel::prev_energy_subshort
float prev_energy_subshort[AAC_NUM_BLOCKS_SHORT *PSY_LAME_NUM_SUBBLOCKS]
Definition: aacpsy.c:134
fabsf
static __device__ float fabsf(float a)
Definition: cuda_runtime.h:181
FFPsyWindowInfo
windowing related information
Definition: psymodel.h:77
ATH_ADD
#define ATH_ADD
Definition: aacpsy.c:288
AVFormatContext::bit_rate
int64_t bit_rate
Total stream bitrate in bit/s, 0 if not available.
Definition: avformat.h:1397
AacPsyContext::previous
float previous
allowed PE of the previous frame
Definition: aacpsy.c:159
ff_aac_psy_model
const FFPsyModel ff_aac_psy_model
Definition: aacpsy.c:1023
AacPsyContext::ch
AacPsyChannel * ch
Definition: aacpsy.c:163
av_cold
#define av_cold
Definition: attributes.h:90
FFPsyChannelGroup::num_ch
uint8_t num_ch
number of channels in this group
Definition: psymodel.h:70
PsyLamePreset
LAME psy model preset struct.
Definition: aacpsy.c:170
AacPsyContext::pe
struct AacPsyContext::@15 pe
PSY_3GPP_CLIP_HI_S
#define PSY_3GPP_CLIP_HI_S
Definition: aacpsy.c:79
s
#define s(width, name)
Definition: cbs_vp9.c:198
AacPsyBand
information for single band used by 3GPP TS26.403-inspired psychoacoustic model
Definition: aacpsy.c:109
AVCodecContext::global_quality
int global_quality
Global quality for codecs which cannot change it per frame.
Definition: avcodec.h:1239
AVFormatContext::flags
int flags
Flags modifying the (de)muxer behaviour.
Definition: avformat.h:1406
bitrate
int64_t bitrate
Definition: av1_levels.c:47
g
const char * g
Definition: vf_curves.c:128
EIGHT_SHORT_SEQUENCE
@ EIGHT_SHORT_SEQUENCE
Definition: aac.h:65
PsyLamePreset::quality
int quality
Quality to map the rest of the vaules to.
Definition: aacpsy.c:171
AacPsyBand::pe_const
float pe_const
constant part of the PE calculation
Definition: aacpsy.c:116
bits
uint8_t bits
Definition: vp3data.h:128
AacPsyContext
3GPP TS26.403-inspired psychoacoustic model specific data
Definition: aacpsy.c:152
AacPsyCoeffs::min_snr
float min_snr
minimal SNR
Definition: aacpsy.c:146
ctx
AVFormatContext * ctx
Definition: movenc.c:49
exp2f
#define exp2f(x)
Definition: libm.h:293
calc_reduction_3gpp
static float calc_reduction_3gpp(float a, float desired_pe, float pe, float active_lines)
Definition: aacpsy.c:561
window_grouping
static const uint8_t window_grouping[9]
window grouping information stored as bits (0 - new group, 1 - group continues)
Definition: aacpsy.c:401
aacpsy_mips.h
AAC_BLOCK_SIZE_SHORT
#define AAC_BLOCK_SIZE_SHORT
short block size
Definition: aacpsy.c:98
bands
static const float bands[]
Definition: af_superequalizer.c:57
ath
static av_cold float ath(float f, float add)
Calculate ATH value for given frequency.
Definition: aacpsy.c:293
calc_bit_demand
static int calc_bit_demand(AacPsyContext *ctx, float pe, int bits, int size, int short_window)
Definition: aacpsy.c:495
NAN
#define NAN
Definition: mathematics.h:115
PSY_3GPP_AH_THR_SHORT
#define PSY_3GPP_AH_THR_SHORT
Definition: aacpsy.c:82
psy_hp_filter
static void psy_hp_filter(const float *firbuf, float *hpfsmpl, const float *psy_fir_coeffs)
Definition: aacpsy.c:632
if
if(ret)
Definition: filter_design.txt:179
iir_filter
static float iir_filter(int in, float state[2])
IIR filter used in block switching decision.
Definition: aacpsy.c:388
psy_vbr_map
static const PsyLamePreset psy_vbr_map[]
LAME psy model preset table for constant quality.
Definition: aacpsy.c:202
AAC_CUTOFF
#define AAC_CUTOFF(s)
Definition: psymodel.h:41
FFPsyWindowInfo::window_type
int window_type[3]
window type (short/long/transitional, etc.) - current, previous and next
Definition: psymodel.h:78
FFPsyBand::bits
int bits
Definition: psymodel.h:51
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
PSY_3GPP_RPEMIN
#define PSY_3GPP_RPEMIN
Definition: aacpsy.c:58
psy_abr_map
static const PsyLamePreset psy_abr_map[]
LAME psy model preset table for ABR.
Definition: aacpsy.c:181
PSY_3GPP_C1
#define PSY_3GPP_C1
Definition: aacpsy.c:61
AVCodecContext::bit_rate
int64_t bit_rate
the average bitrate
Definition: avcodec.h:495
psy_3gpp_end
static av_cold void psy_3gpp_end(FFPsyContext *apc)
Definition: aacpsy.c:859
PSY_3GPP_BITS_TO_PE
#define PSY_3GPP_BITS_TO_PE(bits)
Definition: aacpsy.c:92
FFPsyBand
single band psychoacoustic information
Definition: psymodel.h:50
aac.h
sqrtf
static __device__ float sqrtf(float a)
Definition: cuda_runtime.h:184
FFPsyWindowInfo::grouping
int grouping[8]
window grouping (for e.g. AAC)
Definition: psymodel.h:81
av_clipf
av_clipf
Definition: af_crystalizer.c:121
AacPsyContext::max
float max
maximum allowed PE for bit factor calculation
Definition: aacpsy.c:158
exp
int8_t exp
Definition: eval.c:73
AacPsyChannel::iir_state
float iir_state[2]
hi-pass IIR filter state
Definition: aacpsy.c:129
AacPsyContext::psy_coef
AacPsyCoeffs psy_coef[2][64]
Definition: aacpsy.c:162
AacPsyBand::thr_quiet
float thr_quiet
threshold in quiet
Definition: aacpsy.c:112
AAC_BLOCK_SIZE_LONG
#define AAC_BLOCK_SIZE_LONG
long block size
Definition: aacpsy.c:97
f
f
Definition: af_crystalizer.c:121
ONLY_LONG_SEQUENCE
@ ONLY_LONG_SEQUENCE
Definition: aac.h:63
AacPsyChannel::band
AacPsyBand band[128]
bands information
Definition: aacpsy.c:125
PSY_3GPP_AH_NONE
@ PSY_3GPP_AH_NONE
Definition: aacpsy.c:87
size
int size
Definition: twinvq_data.h:10344
state
static struct @395 state
calc_reduced_thr_3gpp
static float calc_reduced_thr_3gpp(AacPsyBand *band, float min_snr, float reduction)
Definition: aacpsy.c:575
AacPsyCoeffs::ath
float ath
absolute threshold of hearing per bands
Definition: aacpsy.c:142
AacPsyBand::active_lines
float active_lines
number of active spectral lines
Definition: aacpsy.c:114
AAC_NUM_BLOCKS_SHORT
#define AAC_NUM_BLOCKS_SHORT
number of blocks in a short sequence
Definition: aacpsy.c:99
PSY_LAME_FIR_LEN
#define PSY_LAME_FIR_LEN
LAME psy model FIR order.
Definition: aacpsy.c:96
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
PSY_3GPP_CLIP_LO_L
#define PSY_3GPP_CLIP_LO_L
Definition: aacpsy.c:76
attributes.h
AacPsyBand::avoid_holes
int avoid_holes
hole avoidance flag
Definition: aacpsy.c:118
PSY_3GPP_THR_SPREAD_LOW
#define PSY_3GPP_THR_SPREAD_LOW
Definition: aacpsy.c:46
PSY_3GPP_SAVE_ADD_S
#define PSY_3GPP_SAVE_ADD_S
Definition: aacpsy.c:71
PSY_3GPP_SPEND_ADD_S
#define PSY_3GPP_SPEND_ADD_S
Definition: aacpsy.c:75
psy_fir_coeffs
static const float psy_fir_coeffs[]
LAME psy model FIR coefficient table.
Definition: aacpsy.c:220
AacPsyChannel::attack_threshold
float attack_threshold
attack threshold for this channel
Definition: aacpsy.c:133
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
AacPsyBand::norm_fac
float norm_fac
normalization factor for linearization
Definition: aacpsy.c:117
FFPsyBand::threshold
float threshold
Definition: psymodel.h:53
PSY_3GPP_CLIP_HI_L
#define PSY_3GPP_CLIP_HI_L
Definition: aacpsy.c:78
LONG_STOP_SEQUENCE
@ LONG_STOP_SEQUENCE
Definition: aac.h:66
atanf
#define atanf(x)
Definition: libm.h:40
exp2
#define exp2(x)
Definition: libm.h:288
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
PSY_3GPP_RPELEV
#define PSY_3GPP_RPELEV
Definition: aacpsy.c:59
AacPsyBand::pe
float pe
perceptual entropy
Definition: aacpsy.c:115
av_mallocz
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:256
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Definition: mem.c:264
AacPsyBand::energy
float energy
band energy
Definition: aacpsy.c:110
avcodec.h
FFPsyChannelGroup
psychoacoustic information for an arbitrary group of channels
Definition: psymodel.h:68
AacPsyChannel::next_window_seq
enum WindowSequence next_window_seq
window sequence to be used in the next frame
Definition: aacpsy.c:131
AacPsyChannel::win_energy
float win_energy
sliding average of channel energy
Definition: aacpsy.c:128
ret
ret
Definition: filter_design.txt:187
AacPsyChannel
single/pair channel context for psychoacoustic model
Definition: aacpsy.c:124
AacPsyContext::correction
float correction
PE correction factor.
Definition: aacpsy.c:160
FFPsyContext::model_priv_data
void * model_priv_data
psychoacoustic model implementation private data
Definition: psymodel.h:108
LONG_START_SEQUENCE
@ LONG_START_SEQUENCE
Definition: aac.h:64
PSY_3GPP_SAVE_SLOPE_S
#define PSY_3GPP_SAVE_SLOPE_S
Definition: aacpsy.c:69
PSY_3GPP_EN_SPREAD_HI_L1
#define PSY_3GPP_EN_SPREAD_HI_L1
Definition: aacpsy.c:48
AacPsyChannel::next_grouping
uint8_t next_grouping
stored grouping scheme for the next frame (in case of 8 short window sequence)
Definition: aacpsy.c:130
FFPsyBand::energy
float energy
Definition: psymodel.h:52
AVCodecContext
main external API structure.
Definition: avcodec.h:445
PSY_LAME_NUM_SUBBLOCKS
#define PSY_LAME_NUM_SUBBLOCKS
Number of sub-blocks in each short block.
Definition: aacpsy.c:100
PSY_SNR_25DB
#define PSY_SNR_25DB
Definition: aacpsy.c:66
AacPsyContext::global_quality
float global_quality
normalized global quality taken from avctx
Definition: aacpsy.c:164
psy_3gpp_analyze_channel
static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, const float *coefs, const FFPsyWindowInfo *wi)
Calculate band thresholds as suggested in 3GPP TS26.403.
Definition: aacpsy.c:653
FFPsyModel
codec-specific psychoacoustic model implementation
Definition: psymodel.h:114
AacPsyContext::frame_bits
int frame_bits
average bits per frame
Definition: aacpsy.c:154
ffmath.h
ff_psy_find_group
FFPsyChannelGroup * ff_psy_find_group(FFPsyContext *ctx, int channel)
Determine what group a channel belongs to.
Definition: psymodel.c:73
psy_3gpp_analyze
static void psy_3gpp_analyze(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi)
Definition: aacpsy.c:849
PSY_3GPP_C3
#define PSY_3GPP_C3
Definition: aacpsy.c:63
mem.h
PSY_3GPP_EN_SPREAD_LOW_L
#define PSY_3GPP_EN_SPREAD_LOW_L
Definition: aacpsy.c:54
PSY_3GPP_AH_ACTIVE
@ PSY_3GPP_AH_ACTIVE
Definition: aacpsy.c:89
AacPsyContext::chan_bitrate
int chan_bitrate
bitrate per channel
Definition: aacpsy.c:153
PSY_3GPP_SAVE_SLOPE_L
#define PSY_3GPP_SAVE_SLOPE_L
Definition: aacpsy.c:68
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
PSY_3GPP_C2
#define PSY_3GPP_C2
Definition: aacpsy.c:62
coeff
static const double coeff[2][5]
Definition: vf_owdenoise.c:80
PSY_3GPP_SPEND_SLOPE_S
#define PSY_3GPP_SPEND_SLOPE_S
Definition: aacpsy.c:73
WindowSequence
WindowSequence
Definition: aac.h:62
FFPsyBand::spread
float spread
Definition: psymodel.h:54
FF_QP2LAMBDA
#define FF_QP2LAMBDA
factor to convert from H.263 QP to lambda
Definition: avutil.h:227
int
int
Definition: ffmpeg_filter.c:424
PSY_3GPP_EN_SPREAD_LOW_S
#define PSY_3GPP_EN_SPREAD_LOW_S
Definition: aacpsy.c:56
AacPsyChannel::prev_attack
int prev_attack
attack value for the last short block in the previous sequence
Definition: aacpsy.c:135
FFPsyContext
context used by psychoacoustic model
Definition: psymodel.h:89
AacPsyChannel::prev_band
AacPsyBand prev_band[128]
bands information from the previous frame
Definition: aacpsy.c:126
psymodel.h
channel
channel
Definition: ebur128.h:39
FFPsyWindowInfo::num_windows
int num_windows
number of windows in a frame
Definition: psymodel.h:80
PSY_3GPP_SAVE_ADD_L
#define PSY_3GPP_SAVE_ADD_L
Definition: aacpsy.c:70