FFmpeg
wmavoice.c
Go to the documentation of this file.
1 /*
2  * Windows Media Audio Voice decoder.
3  * Copyright (c) 2009 Ronald S. Bultje
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * @brief Windows Media Audio Voice compatible decoder
25  * @author Ronald S. Bultje <rsbultje@gmail.com>
26  */
27 
28 #include <math.h>
29 
31 #include "libavutil/float_dsp.h"
32 #include "libavutil/mem.h"
33 #include "libavutil/mem_internal.h"
34 #include "libavutil/thread.h"
35 #include "libavutil/tx.h"
36 #include "avcodec.h"
37 #include "codec_internal.h"
38 #include "decode.h"
39 #include "get_bits.h"
40 #include "put_bits.h"
41 #include "wmavoice_data.h"
42 #include "celp_filters.h"
43 #include "acelp_vectors.h"
44 #include "acelp_filters.h"
45 #include "lsp.h"
46 #include "sinewin.h"
47 
48 #define MAX_BLOCKS 8 ///< maximum number of blocks per frame
49 #define MAX_LSPS 16 ///< maximum filter order
50 #define MAX_LSPS_ALIGN16 16 ///< same as #MAX_LSPS; needs to be multiple
51  ///< of 16 for ASM input buffer alignment
52 #define MAX_FRAMES 3 ///< maximum number of frames per superframe
53 #define MAX_FRAMESIZE 160 ///< maximum number of samples per frame
54 #define MAX_SIGNAL_HISTORY 416 ///< maximum excitation signal history
55 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
56  ///< maximum number of samples per superframe
57 #define SFRAME_CACHE_MAXSIZE 256 ///< maximum cache size for frame data that
58  ///< was split over two packets
59 #define VLC_NBITS 6 ///< number of bits to read per VLC iteration
60 
61 /**
62  * Frame type VLC coding.
63  */
64 static VLCElem frame_type_vlc[132];
65 
66 /**
67  * Adaptive codebook types.
68  */
69 enum {
70  ACB_TYPE_NONE = 0, ///< no adaptive codebook (only hardcoded fixed)
71  ACB_TYPE_ASYMMETRIC = 1, ///< adaptive codebook with per-frame pitch, which
72  ///< we interpolate to get a per-sample pitch.
73  ///< Signal is generated using an asymmetric sinc
74  ///< window function
75  ///< @note see #wmavoice_ipol1_coeffs
76  ACB_TYPE_HAMMING = 2 ///< Per-block pitch with signal generation using
77  ///< a Hamming sinc window function
78  ///< @note see #wmavoice_ipol2_coeffs
79 };
80 
81 /**
82  * Fixed codebook types.
83  */
84 enum {
85  FCB_TYPE_SILENCE = 0, ///< comfort noise during silence
86  ///< generated from a hardcoded (fixed) codebook
87  ///< with per-frame (low) gain values
88  FCB_TYPE_HARDCODED = 1, ///< hardcoded (fixed) codebook with per-block
89  ///< gain values
90  FCB_TYPE_AW_PULSES = 2, ///< Pitch-adaptive window (AW) pulse signals,
91  ///< used in particular for low-bitrate streams
92  FCB_TYPE_EXC_PULSES = 3, ///< Innovation (fixed) codebook pulse sets in
93  ///< combinations of either single pulses or
94  ///< pulse pairs
95 };
96 
97 /**
98  * Description of frame types.
99  */
100 static const struct frame_type_desc {
101  uint8_t n_blocks; ///< amount of blocks per frame (each block
102  ///< (contains 160/#n_blocks samples)
103  uint8_t log_n_blocks; ///< log2(#n_blocks)
104  uint8_t acb_type; ///< Adaptive codebook type (ACB_TYPE_*)
105  uint8_t fcb_type; ///< Fixed codebook type (FCB_TYPE_*)
106  uint8_t dbl_pulses; ///< how many pulse vectors have pulse pairs
107  ///< (rather than just one single pulse)
108  ///< only if #fcb_type == #FCB_TYPE_EXC_PULSES
109 } frame_descs[17] = {
110  { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0 },
111  { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0 },
127 };
128 
129 /**
130  * WMA Voice decoding context.
131  */
132 typedef struct WMAVoiceContext {
133  /**
134  * @name Global values specified in the stream header / extradata or used all over.
135  * @{
136  */
137  GetBitContext gb; ///< packet bitreader. During decoder init,
138  ///< it contains the extradata from the
139  ///< demuxer. During decoding, it contains
140  ///< packet data.
141  int8_t vbm_tree[25]; ///< converts VLC codes to frame type
142 
143  int spillover_bitsize; ///< number of bits used to specify
144  ///< #spillover_nbits in the packet header
145  ///< = ceil(log2(ctx->block_align << 3))
146  int history_nsamples; ///< number of samples in history for signal
147  ///< prediction (through ACB)
148 
149  /* postfilter specific values */
150  int do_apf; ///< whether to apply the averaged
151  ///< projection filter (APF)
152  int denoise_strength; ///< strength of denoising in Wiener filter
153  ///< [0-11]
154  int denoise_tilt_corr; ///< Whether to apply tilt correction to the
155  ///< Wiener filter coefficients (postfilter)
156  int dc_level; ///< Predicted amount of DC noise, based
157  ///< on which a DC removal filter is used
158 
159  int lsps; ///< number of LSPs per frame [10 or 16]
160  int lsp_q_mode; ///< defines quantizer defaults [0, 1]
161  int lsp_def_mode; ///< defines different sets of LSP defaults
162  ///< [0, 1]
163 
164  int min_pitch_val; ///< base value for pitch parsing code
165  int max_pitch_val; ///< max value + 1 for pitch parsing
166  int pitch_nbits; ///< number of bits used to specify the
167  ///< pitch value in the frame header
168  int block_pitch_nbits; ///< number of bits used to specify the
169  ///< first block's pitch value
170  int block_pitch_range; ///< range of the block pitch
171  int block_delta_pitch_nbits; ///< number of bits used to specify the
172  ///< delta pitch between this and the last
173  ///< block's pitch value, used in all but
174  ///< first block
175  int block_delta_pitch_hrange; ///< 1/2 range of the delta (full range is
176  ///< from -this to +this-1)
177  uint16_t block_conv_table[4]; ///< boundaries for block pitch unit/scale
178  ///< conversion
179 
180  /**
181  * @}
182  *
183  * @name Packet values specified in the packet header or related to a packet.
184  *
185  * A packet is considered to be a single unit of data provided to this
186  * decoder by the demuxer.
187  * @{
188  */
189  int spillover_nbits; ///< number of bits of the previous packet's
190  ///< last superframe preceding this
191  ///< packet's first full superframe (useful
192  ///< for re-synchronization also)
193  int has_residual_lsps; ///< if set, superframes contain one set of
194  ///< LSPs that cover all frames, encoded as
195  ///< independent and residual LSPs; if not
196  ///< set, each frame contains its own, fully
197  ///< independent, LSPs
198  int skip_bits_next; ///< number of bits to skip at the next call
199  ///< to #wmavoice_decode_packet() (since
200  ///< they're part of the previous superframe)
201 
203  ///< cache for superframe data split over
204  ///< multiple packets
205  int sframe_cache_size; ///< set to >0 if we have data from an
206  ///< (incomplete) superframe from a previous
207  ///< packet that spilled over in the current
208  ///< packet; specifies the amount of bits in
209  ///< #sframe_cache
210  PutBitContext pb; ///< bitstream writer for #sframe_cache
211 
212  /**
213  * @}
214  *
215  * @name Frame and superframe values
216  * Superframe and frame data - these can change from frame to frame,
217  * although some of them do in that case serve as a cache / history for
218  * the next frame or superframe.
219  * @{
220  */
221  double prev_lsps[MAX_LSPS]; ///< LSPs of the last frame of the previous
222  ///< superframe
223  int last_pitch_val; ///< pitch value of the previous frame
224  int last_acb_type; ///< frame type [0-2] of the previous frame
225  int pitch_diff_sh16; ///< ((cur_pitch_val - #last_pitch_val)
226  ///< << 16) / #MAX_FRAMESIZE
227  float silence_gain; ///< set for use in blocks if #ACB_TYPE_NONE
228 
229  int aw_idx_is_ext; ///< whether the AW index was encoded in
230  ///< 8 bits (instead of 6)
231  int aw_pulse_range; ///< the range over which #aw_pulse_set1()
232  ///< can apply the pulse, relative to the
233  ///< value in aw_first_pulse_off. The exact
234  ///< position of the first AW-pulse is within
235  ///< [pulse_off, pulse_off + this], and
236  ///< depends on bitstream values; [16 or 24]
237  int aw_n_pulses[2]; ///< number of AW-pulses in each block; note
238  ///< that this number can be negative (in
239  ///< which case it basically means "zero")
240  int aw_first_pulse_off[2]; ///< index of first sample to which to
241  ///< apply AW-pulses, or -0xff if unset
242  int aw_next_pulse_off_cache; ///< the position (relative to start of the
243  ///< second block) at which pulses should
244  ///< start to be positioned, serves as a
245  ///< cache for pitch-adaptive window pulses
246  ///< between blocks
247 
248  int frame_cntr; ///< current frame index [0 - 0xFFFE]; is
249  ///< only used for comfort noise in #pRNG()
250  int nb_superframes; ///< number of superframes in current packet
251  float gain_pred_err[6]; ///< cache for gain prediction
252  float excitation_history[MAX_SIGNAL_HISTORY]; ///< cache of the signal of
253  ///< previous superframes, used as a history
254  ///< for signal generation
255  float synth_history[MAX_LSPS]; ///< see #excitation_history
256  /**
257  * @}
258  *
259  * @name Postfilter values
260  *
261  * Variables used for postfilter implementation, mostly history for
262  * smoothing and so on, and context variables for FFT/iFFT.
263  * @{
264  */
265  AVTXContext *rdft, *irdft; ///< contexts for FFT-calculation in the
266  av_tx_fn rdft_fn, irdft_fn; ///< postfilter (for denoise filter)
267  AVTXContext *dct, *dst; ///< contexts for phase shift (in Hilbert
268  av_tx_fn dct_fn, dst_fn; ///< transform, part of postfilter)
269  float sin[511], cos[511]; ///< 8-bit cosine/sine windows over [-pi,pi]
270  ///< range
271  float postfilter_agc; ///< gain control memory, used in
272  ///< #adaptive_gain_control()
273  float dcf_mem[2]; ///< DC filter history
274  /// zero filter output (i.e. excitation) by postfilter
277  int denoise_filter_cache_size; ///< samples in #denoise_filter_cache
278  /// aligned buffer for LPC tilting
279  DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x82];
280  /// aligned buffer for denoise coefficients
282  /// aligned buffer for postfilter speech synthesis
284  /**
285  * @}
286  */
288 
289 /**
290  * Set up the variable bit mode (VBM) tree from container extradata.
291  * @param gb bit I/O context.
292  * The bit context (s->gb) should be loaded with byte 23-46 of the
293  * container extradata (i.e. the ones containing the VBM tree).
294  * @param vbm_tree pointer to array to which the decoded VBM tree will be
295  * written.
296  * @return 0 on success, <0 on error.
297  */
298 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
299 {
300  int cntr[8] = { 0 }, n, res;
301 
302  memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
303  for (n = 0; n < 17; n++) {
304  res = get_bits(gb, 3);
305  if (cntr[res] > 3) // should be >= 3 + (res == 7))
306  return -1;
307  vbm_tree[res * 3 + cntr[res]++] = n;
308  }
309  return 0;
310 }
311 
313 {
314  static const uint8_t bits[] = {
315  2, 2, 2, 4, 4, 4,
316  6, 6, 6, 8, 8, 8,
317  10, 10, 10, 12, 12, 12,
318  14, 14, 14, 14
319  };
320 
323  1, NULL, 0, 0, 0, 0);
324 }
325 
327 {
329  int n;
330 
331  s->postfilter_agc = 0;
332  s->sframe_cache_size = 0;
333  s->skip_bits_next = 0;
334  for (n = 0; n < s->lsps; n++)
335  s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
336  memset(s->excitation_history, 0,
337  sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
338  memset(s->synth_history, 0,
339  sizeof(*s->synth_history) * MAX_LSPS);
340  memset(s->gain_pred_err, 0,
341  sizeof(s->gain_pred_err));
342 
343  if (s->do_apf) {
344  memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
345  sizeof(*s->synth_filter_out_buf) * s->lsps);
346  memset(s->dcf_mem, 0,
347  sizeof(*s->dcf_mem) * 2);
348  memset(s->zero_exc_pf, 0,
349  sizeof(*s->zero_exc_pf) * s->history_nsamples);
350  memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
351  }
352 }
353 
354 /**
355  * Set up decoder with parameters from demuxer (extradata etc.).
356  */
358 {
359  static AVOnce init_static_once = AV_ONCE_INIT;
360  int n, flags, pitch_range, lsp16_flag, ret;
362 
363  ff_thread_once(&init_static_once, wmavoice_init_static_data);
364 
365  /**
366  * Extradata layout:
367  * - byte 0-18: WMAPro-in-WMAVoice extradata (see wmaprodec.c),
368  * - byte 19-22: flags field (annoyingly in LE; see below for known
369  * values),
370  * - byte 23-46: variable bitmode tree (really just 17 * 3 bits,
371  * rest is 0).
372  */
373  if (ctx->extradata_size != 46) {
375  "Invalid extradata size %d (should be 46)\n",
376  ctx->extradata_size);
377  return AVERROR_INVALIDDATA;
378  }
379  if (ctx->block_align <= 0 || ctx->block_align > (1<<22)) {
380  av_log(ctx, AV_LOG_ERROR, "Invalid block alignment %d.\n", ctx->block_align);
381  return AVERROR_INVALIDDATA;
382  }
383 
384  flags = AV_RL32(ctx->extradata + 18);
385  s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
386  s->do_apf = flags & 0x1;
387  if (s->do_apf) {
388  float scale = 1.0f;
389 
390  ret = av_tx_init(&s->rdft, &s->rdft_fn, AV_TX_FLOAT_RDFT, 0, 1 << 7, &scale, 0);
391  if (ret < 0)
392  return ret;
393 
394  ret = av_tx_init(&s->irdft, &s->irdft_fn, AV_TX_FLOAT_RDFT, 1, 1 << 7, &scale, 0);
395  if (ret < 0)
396  return ret;
397 
398  scale = 1.0 / (1 << 6);
399  ret = av_tx_init(&s->dct, &s->dct_fn, AV_TX_FLOAT_DCT_I, 0, 1 << 6, &scale, 0);
400  if (ret < 0)
401  return ret;
402 
403  scale = 1.0 / (1 << 6);
404  ret = av_tx_init(&s->dst, &s->dst_fn, AV_TX_FLOAT_DST_I, 0, 1 << 6, &scale, 0);
405  if (ret < 0)
406  return ret;
407 
408  ff_sine_window_init(s->cos, 256);
409  memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
410  for (n = 0; n < 255; n++) {
411  s->sin[n] = -s->sin[510 - n];
412  s->cos[510 - n] = s->cos[n];
413  }
414  }
415  s->denoise_strength = (flags >> 2) & 0xF;
416  if (s->denoise_strength >= 12) {
418  "Invalid denoise filter strength %d (max=11)\n",
419  s->denoise_strength);
420  return AVERROR_INVALIDDATA;
421  }
422  s->denoise_tilt_corr = !!(flags & 0x40);
423  s->dc_level = (flags >> 7) & 0xF;
424  s->lsp_q_mode = !!(flags & 0x2000);
425  s->lsp_def_mode = !!(flags & 0x4000);
426  lsp16_flag = flags & 0x1000;
427  if (lsp16_flag) {
428  s->lsps = 16;
429  } else {
430  s->lsps = 10;
431  }
432  for (n = 0; n < s->lsps; n++)
433  s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
434 
435  init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
436  if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
437  av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
438  return AVERROR_INVALIDDATA;
439  }
440 
441  if (ctx->sample_rate >= INT_MAX / (256 * 37))
442  return AVERROR_INVALIDDATA;
443 
444  s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;
445  s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
446  pitch_range = s->max_pitch_val - s->min_pitch_val;
447  if (pitch_range <= 0) {
448  av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
449  return AVERROR_INVALIDDATA;
450  }
451  s->pitch_nbits = av_ceil_log2(pitch_range);
452  s->last_pitch_val = 40;
453  s->last_acb_type = ACB_TYPE_NONE;
454  s->history_nsamples = s->max_pitch_val + 8;
455 
456  if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
457  int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
458  max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
459 
461  "Unsupported samplerate %d (min=%d, max=%d)\n",
462  ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz
463 
464  return AVERROR(ENOSYS);
465  }
466 
467  s->block_conv_table[0] = s->min_pitch_val;
468  s->block_conv_table[1] = (pitch_range * 25) >> 6;
469  s->block_conv_table[2] = (pitch_range * 44) >> 6;
470  s->block_conv_table[3] = s->max_pitch_val - 1;
471  s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
472  if (s->block_delta_pitch_hrange <= 0) {
473  av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
474  return AVERROR_INVALIDDATA;
475  }
476  s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
477  s->block_pitch_range = s->block_conv_table[2] +
478  s->block_conv_table[3] + 1 +
479  2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
480  s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);
481 
482  av_channel_layout_uninit(&ctx->ch_layout);
484  ctx->sample_fmt = AV_SAMPLE_FMT_FLT;
485 
486  return 0;
487 }
488 
489 /**
490  * @name Postfilter functions
491  * Postfilter functions (gain control, wiener denoise filter, DC filter,
492  * kalman smoothening, plus surrounding code to wrap it)
493  * @{
494  */
495 /**
496  * Adaptive gain control (as used in postfilter).
497  *
498  * Identical to #ff_adaptive_gain_control() in acelp_vectors.c, except
499  * that the energy here is calculated using sum(abs(...)), whereas the
500  * other codecs (e.g. AMR-NB, SIPRO) use sqrt(dotproduct(...)).
501  *
502  * @param out output buffer for filtered samples
503  * @param in input buffer containing the samples as they are after the
504  * postfilter steps so far
505  * @param speech_synth input buffer containing speech synth before postfilter
506  * @param size input buffer size
507  * @param alpha exponential filter factor
508  * @param gain_mem pointer to filter memory (single float)
509  */
510 static void adaptive_gain_control(float *out, const float *in,
511  const float *speech_synth,
512  int size, float alpha, float *gain_mem)
513 {
514  int i;
515  float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
516  float mem = *gain_mem;
517 
518  for (i = 0; i < size; i++) {
519  speech_energy += fabsf(speech_synth[i]);
520  postfilter_energy += fabsf(in[i]);
521  }
522  gain_scale_factor = postfilter_energy == 0.0 ? 0.0 :
523  (1.0 - alpha) * speech_energy / postfilter_energy;
524 
525  for (i = 0; i < size; i++) {
526  mem = alpha * mem + gain_scale_factor;
527  out[i] = in[i] * mem;
528  }
529 
530  *gain_mem = mem;
531 }
532 
533 /**
534  * Kalman smoothing function.
535  *
536  * This function looks back pitch +/- 3 samples back into history to find
537  * the best fitting curve (that one giving the optimal gain of the two
538  * signals, i.e. the highest dot product between the two), and then
539  * uses that signal history to smoothen the output of the speech synthesis
540  * filter.
541  *
542  * @param s WMA Voice decoding context
543  * @param pitch pitch of the speech signal
544  * @param in input speech signal
545  * @param out output pointer for smoothened signal
546  * @param size input/output buffer size
547  *
548  * @returns -1 if no smoothening took place, e.g. because no optimal
549  * fit could be found, or 0 on success.
550  */
551 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
552  const float *in, float *out, int size)
553 {
554  int n;
555  float optimal_gain = 0, dot;
556  const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
557  *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
558  *best_hist_ptr = NULL;
559 
560  /* find best fitting point in history */
561  do {
562  dot = avpriv_scalarproduct_float_c(in, ptr, size);
563  if (dot > optimal_gain) {
564  optimal_gain = dot;
565  best_hist_ptr = ptr;
566  }
567  } while (--ptr >= end);
568 
569  if (optimal_gain <= 0)
570  return -1;
571  dot = avpriv_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size);
572  if (dot <= 0) // would be 1.0
573  return -1;
574 
575  if (optimal_gain <= dot) {
576  dot = dot / (dot + 0.6 * optimal_gain); // 0.625-1.000
577  } else
578  dot = 0.625;
579 
580  /* actual smoothing */
581  for (n = 0; n < size; n++)
582  out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
583 
584  return 0;
585 }
586 
587 /**
588  * Get the tilt factor of a formant filter from its transfer function
589  * @see #tilt_factor() in amrnbdec.c, which does essentially the same,
590  * but somehow (??) it does a speech synthesis filter in the
591  * middle, which is missing here
592  *
593  * @param lpcs LPC coefficients
594  * @param n_lpcs Size of LPC buffer
595  * @returns the tilt factor
596  */
597 static float tilt_factor(const float *lpcs, int n_lpcs)
598 {
599  float rh0, rh1;
600 
601  rh0 = 1.0 + avpriv_scalarproduct_float_c(lpcs, lpcs, n_lpcs);
602  rh1 = lpcs[0] + avpriv_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1);
603 
604  return rh1 / rh0;
605 }
606 
607 /**
608  * Derive denoise filter coefficients (in real domain) from the LPCs.
609  */
610 static void calc_input_response(WMAVoiceContext *s, float *lpcs_src,
611  int fcb_type, float *coeffs_dst, int remainder)
612 {
613  float last_coeff, min = 15.0, max = -15.0;
614  float irange, angle_mul, gain_mul, range, sq;
615  LOCAL_ALIGNED_32(float, coeffs, [0x82]);
616  LOCAL_ALIGNED_32(float, lpcs, [0x82]);
617  LOCAL_ALIGNED_32(float, lpcs_dct, [0x82]);
618  int n, idx;
619 
620  memcpy(coeffs, coeffs_dst, 0x82*sizeof(float));
621 
622  /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
623  s->rdft_fn(s->rdft, lpcs, lpcs_src, sizeof(float));
624 #define log_range(var, assign) do { \
625  float tmp = log10f(assign); var = tmp; \
626  max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
627  } while (0)
628  log_range(last_coeff, lpcs[64] * lpcs[64]);
629  for (n = 1; n < 64; n++)
630  log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
631  lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
632  log_range(lpcs[0], lpcs[0] * lpcs[0]);
633 #undef log_range
634  range = max - min;
635  lpcs[64] = last_coeff;
636 
637  /* Now, use this spectrum to pick out these frequencies with higher
638  * (relative) power/energy (which we then take to be "not noise"),
639  * and set up a table (still in lpc[]) of (relative) gains per frequency.
640  * These frequencies will be maintained, while others ("noise") will be
641  * decreased in the filter output. */
642  irange = 64.0 / range; // so irange*(max-value) is in the range [0, 63]
643  gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
644  (5.0 / 14.7));
645  angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
646  for (n = 0; n <= 64; n++) {
647  float pwr;
648 
649  idx = lrint((max - lpcs[n]) * irange - 1);
650  idx = FFMAX(0, idx);
651  pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
652  lpcs[n] = angle_mul * pwr;
653 
654  /* 70.57 =~ 1/log10(1.0331663) */
655  idx = av_clipd((pwr * gain_mul - 0.0295) * 70.570526123, 0, INT_MAX / 2);
656 
657  if (idx > 127) { // fall back if index falls outside table range
658  coeffs[n] = wmavoice_energy_table[127] *
659  powf(1.0331663, idx - 127);
660  } else
661  coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
662  }
663 
664  /* calculate the Hilbert transform of the gains, which we do (since this
665  * is a sine input) by doing a phase shift (in theory, H(sin())=cos()).
666  * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
667  * "moment" of the LPCs in this filter. */
668  s->dct_fn(s->dct, lpcs_dct, lpcs, sizeof(float));
669  s->dst_fn(s->dst, lpcs, lpcs_dct, sizeof(float));
670 
671  /* Split out the coefficient indexes into phase/magnitude pairs */
672  idx = 255 + av_clip(lpcs[64], -255, 255);
673  coeffs[0] = coeffs[0] * s->cos[idx];
674  idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
675  last_coeff = coeffs[64] * s->cos[idx];
676  for (n = 63;; n--) {
677  idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
678  coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
679  coeffs[n * 2] = coeffs[n] * s->cos[idx];
680 
681  if (!--n) break;
682 
683  idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
684  coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
685  coeffs[n * 2] = coeffs[n] * s->cos[idx];
686  }
687  coeffs[64] = last_coeff;
688 
689  /* move into real domain */
690  s->irdft_fn(s->irdft, coeffs_dst, coeffs, sizeof(AVComplexFloat));
691 
692  /* tilt correction and normalize scale */
693  memset(&coeffs_dst[remainder], 0, sizeof(coeffs_dst[0]) * (128 - remainder));
694  if (s->denoise_tilt_corr) {
695  float tilt_mem = 0;
696 
697  coeffs_dst[remainder - 1] = 0;
698  ff_tilt_compensation(&tilt_mem,
699  -1.8 * tilt_factor(coeffs_dst, remainder - 1),
700  coeffs_dst, remainder);
701  }
702  sq = (1.0 / 64.0) * sqrtf(1 / avpriv_scalarproduct_float_c(coeffs_dst, coeffs_dst,
703  remainder));
704  for (n = 0; n < remainder; n++)
705  coeffs_dst[n] *= sq;
706 }
707 
708 /**
709  * This function applies a Wiener filter on the (noisy) speech signal as
710  * a means to denoise it.
711  *
712  * - take RDFT of LPCs to get the power spectrum of the noise + speech;
713  * - using this power spectrum, calculate (for each frequency) the Wiener
714  * filter gain, which depends on the frequency power and desired level
715  * of noise subtraction (when set too high, this leads to artifacts)
716  * We can do this symmetrically over the X-axis (so 0-4kHz is the inverse
717  * of 4-8kHz);
718  * - by doing a phase shift, calculate the Hilbert transform of this array
719  * of per-frequency filter-gains to get the filtering coefficients;
720  * - smoothen/normalize/de-tilt these filter coefficients as desired;
721  * - take RDFT of noisy sound, apply the coefficients and take its IRDFT
722  * to get the denoised speech signal;
723  * - the leftover (i.e. output of the IRDFT on denoised speech data beyond
724  * the frame boundary) are saved and applied to subsequent frames by an
725  * overlap-add method (otherwise you get clicking-artifacts).
726  *
727  * @param s WMA Voice decoding context
728  * @param fcb_type Frame (codebook) type
729  * @param synth_pf input: the noisy speech signal, output: denoised speech
730  * data; should be 16-byte aligned (for ASM purposes)
731  * @param size size of the speech data
732  * @param lpcs LPCs used to synthesize this frame's speech data
733  */
734 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
735  float *synth_pf, int size,
736  const float *lpcs)
737 {
738  int remainder, lim, n;
739 
740  if (fcb_type != FCB_TYPE_SILENCE) {
741  LOCAL_ALIGNED_32(float, coeffs_f, [0x82]);
742  LOCAL_ALIGNED_32(float, synth_f, [0x82]);
743  float *tilted_lpcs = s->tilted_lpcs_pf,
744  *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
745 
746  tilted_lpcs[0] = 1.0;
747  memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
748  memset(&tilted_lpcs[s->lsps + 1], 0,
749  sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
750  ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
751  tilted_lpcs, s->lsps + 2);
752 
753  /* The IRDFT output (127 samples for 7-bit filter) beyond the frame
754  * size is applied to the next frame. All input beyond this is zero,
755  * and thus all output beyond this will go towards zero, hence we can
756  * limit to min(size-1, 127-size) as a performance consideration. */
757  remainder = FFMIN(127 - size, size - 1);
758  calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
759 
760  /* apply coefficients (in frequency spectrum domain), i.e. complex
761  * number multiplication */
762  memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
763  s->rdft_fn(s->rdft, synth_f, synth_pf, sizeof(float));
764  s->rdft_fn(s->rdft, coeffs_f, coeffs, sizeof(float));
765  synth_f[0] *= coeffs_f[0];
766  synth_f[1] *= coeffs_f[1];
767  for (n = 1; n <= 64; n++) {
768  float v1 = synth_f[n * 2], v2 = synth_f[n * 2 + 1];
769  synth_f[n * 2] = v1 * coeffs_f[n * 2] - v2 * coeffs_f[n * 2 + 1];
770  synth_f[n * 2 + 1] = v2 * coeffs_f[n * 2] + v1 * coeffs_f[n * 2 + 1];
771  }
772  s->irdft_fn(s->irdft, synth_pf, synth_f, sizeof(AVComplexFloat));
773  }
774 
775  /* merge filter output with the history of previous runs */
776  if (s->denoise_filter_cache_size) {
777  lim = FFMIN(s->denoise_filter_cache_size, size);
778  for (n = 0; n < lim; n++)
779  synth_pf[n] += s->denoise_filter_cache[n];
780  s->denoise_filter_cache_size -= lim;
781  memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
782  sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
783  }
784 
785  /* move remainder of filter output into a cache for future runs */
786  if (fcb_type != FCB_TYPE_SILENCE) {
787  lim = FFMIN(remainder, s->denoise_filter_cache_size);
788  for (n = 0; n < lim; n++)
789  s->denoise_filter_cache[n] += synth_pf[size + n];
790  if (lim < remainder) {
791  memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
792  sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
793  s->denoise_filter_cache_size = remainder;
794  }
795  }
796 }
797 
798 /**
799  * Averaging projection filter, the postfilter used in WMAVoice.
800  *
801  * This uses the following steps:
802  * - A zero-synthesis filter (generate excitation from synth signal)
803  * - Kalman smoothing on excitation, based on pitch
804  * - Re-synthesized smoothened output
805  * - Iterative Wiener denoise filter
806  * - Adaptive gain filter
807  * - DC filter
808  *
809  * @param s WMAVoice decoding context
810  * @param synth Speech synthesis output (before postfilter)
811  * @param samples Output buffer for filtered samples
812  * @param size Buffer size of synth & samples
813  * @param lpcs Generated LPCs used for speech synthesis
814  * @param zero_exc_pf destination for zero synthesis filter (16-byte aligned)
815  * @param fcb_type Frame type (silence, hardcoded, AW-pulses or FCB-pulses)
816  * @param pitch Pitch of the input signal
817  */
818 static void postfilter(WMAVoiceContext *s, const float *synth,
819  float *samples, int size,
820  const float *lpcs, float *zero_exc_pf,
821  int fcb_type, int pitch)
822 {
823  float synth_filter_in_buf[MAX_FRAMESIZE / 2],
824  *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
825  *synth_filter_in = zero_exc_pf;
826 
827  av_assert0(size <= MAX_FRAMESIZE / 2);
828 
829  /* generate excitation from input signal */
830  ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
831 
832  if (fcb_type >= FCB_TYPE_AW_PULSES &&
833  !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
834  synth_filter_in = synth_filter_in_buf;
835 
836  /* re-synthesize speech after smoothening, and keep history */
837  ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
838  synth_filter_in, size, s->lsps);
839  memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
840  sizeof(synth_pf[0]) * s->lsps);
841 
842  wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
843 
844  adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
845  &s->postfilter_agc);
846 
847  if (s->dc_level > 8) {
848  /* remove ultra-low frequency DC noise / highpass filter;
849  * coefficients are identical to those used in SIPR decoding,
850  * and very closely resemble those used in AMR-NB decoding. */
852  (const float[2]) { -1.99997, 1.0 },
853  (const float[2]) { -1.9330735188, 0.93589198496 },
854  0.93980580475, s->dcf_mem, size);
855  }
856 }
857 /**
858  * @}
859  */
860 
861 /**
862  * Dequantize LSPs
863  * @param lsps output pointer to the array that will hold the LSPs
864  * @param num number of LSPs to be dequantized
865  * @param values quantized values, contains n_stages values
866  * @param sizes range (i.e. max value) of each quantized value
867  * @param n_stages number of dequantization runs
868  * @param table dequantization table to be used
869  * @param mul_q LSF multiplier
870  * @param base_q base (lowest) LSF values
871  */
872 static void dequant_lsps(double *lsps, int num,
873  const uint16_t *values,
874  const uint16_t *sizes,
875  int n_stages, const uint8_t *table,
876  const double *mul_q,
877  const double *base_q)
878 {
879  int n, m;
880 
881  memset(lsps, 0, num * sizeof(*lsps));
882  for (n = 0; n < n_stages; n++) {
883  const uint8_t *t_off = &table[values[n] * num];
884  double base = base_q[n], mul = mul_q[n];
885 
886  for (m = 0; m < num; m++)
887  lsps[m] += base + mul * t_off[m];
888 
889  table += sizes[n] * num;
890  }
891 }
892 
893 /**
894  * @name LSP dequantization routines
895  * LSP dequantization routines, for 10/16LSPs and independent/residual coding.
896  * lsp10i() consumes 24 bits; lsp10r() consumes an additional 24 bits;
897  * lsp16i() consumes 34 bits; lsp16r() consumes an additional 26 bits.
898  * @{
899  */
900 /**
901  * Parse 10 independently-coded LSPs.
902  */
903 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
904 {
905  static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
906  static const double mul_lsf[4] = {
907  5.2187144800e-3, 1.4626986422e-3,
908  9.6179549166e-4, 1.1325736225e-3
909  };
910  static const double base_lsf[4] = {
911  M_PI * -2.15522e-1, M_PI * -6.1646e-2,
912  M_PI * -3.3486e-2, M_PI * -5.7408e-2
913  };
914  uint16_t v[4];
915 
916  v[0] = get_bits(gb, 8);
917  v[1] = get_bits(gb, 6);
918  v[2] = get_bits(gb, 5);
919  v[3] = get_bits(gb, 5);
920 
921  dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
922  mul_lsf, base_lsf);
923 }
924 
925 /**
926  * Parse 10 independently-coded LSPs, and then derive the tables to
927  * generate LSPs for the other frames from them (residual coding).
928  */
930  double *i_lsps, const double *old,
931  double *a1, double *a2, int q_mode)
932 {
933  static const uint16_t vec_sizes[3] = { 128, 64, 64 };
934  static const double mul_lsf[3] = {
935  2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
936  };
937  static const double base_lsf[3] = {
938  M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
939  };
940  const float (*ipol_tab)[2][10] = q_mode ?
942  uint16_t interpol, v[3];
943  int n;
944 
945  dequant_lsp10i(gb, i_lsps);
946 
947  interpol = get_bits(gb, 5);
948  v[0] = get_bits(gb, 7);
949  v[1] = get_bits(gb, 6);
950  v[2] = get_bits(gb, 6);
951 
952  for (n = 0; n < 10; n++) {
953  double delta = old[n] - i_lsps[n];
954  a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
955  a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
956  }
957 
958  dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
959  mul_lsf, base_lsf);
960 }
961 
962 /**
963  * Parse 16 independently-coded LSPs.
964  */
965 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
966 {
967  static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
968  static const double mul_lsf[5] = {
969  3.3439586280e-3, 6.9908173703e-4,
970  3.3216608306e-3, 1.0334960326e-3,
971  3.1899104283e-3
972  };
973  static const double base_lsf[5] = {
974  M_PI * -1.27576e-1, M_PI * -2.4292e-2,
975  M_PI * -1.28094e-1, M_PI * -3.2128e-2,
976  M_PI * -1.29816e-1
977  };
978  uint16_t v[5];
979 
980  v[0] = get_bits(gb, 8);
981  v[1] = get_bits(gb, 6);
982  v[2] = get_bits(gb, 7);
983  v[3] = get_bits(gb, 6);
984  v[4] = get_bits(gb, 7);
985 
986  dequant_lsps( lsps, 5, v, vec_sizes, 2,
987  wmavoice_dq_lsp16i1, mul_lsf, base_lsf);
988  dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,
989  wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
990  dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
991  wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
992 }
993 
994 /**
995  * Parse 16 independently-coded LSPs, and then derive the tables to
996  * generate LSPs for the other frames from them (residual coding).
997  */
999  double *i_lsps, const double *old,
1000  double *a1, double *a2, int q_mode)
1001 {
1002  static const uint16_t vec_sizes[3] = { 128, 128, 128 };
1003  static const double mul_lsf[3] = {
1004  1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
1005  };
1006  static const double base_lsf[3] = {
1007  M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
1008  };
1009  const float (*ipol_tab)[2][16] = q_mode ?
1011  uint16_t interpol, v[3];
1012  int n;
1013 
1014  dequant_lsp16i(gb, i_lsps);
1015 
1016  interpol = get_bits(gb, 5);
1017  v[0] = get_bits(gb, 7);
1018  v[1] = get_bits(gb, 7);
1019  v[2] = get_bits(gb, 7);
1020 
1021  for (n = 0; n < 16; n++) {
1022  double delta = old[n] - i_lsps[n];
1023  a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
1024  a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
1025  }
1026 
1027  dequant_lsps( a2, 10, v, vec_sizes, 1,
1028  wmavoice_dq_lsp16r1, mul_lsf, base_lsf);
1029  dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
1030  wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
1031  dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
1032  wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
1033 }
1034 
1035 /**
1036  * @}
1037  * @name Pitch-adaptive window coding functions
1038  * The next few functions are for pitch-adaptive window coding.
1039  * @{
1040  */
1041 /**
1042  * Parse the offset of the first pitch-adaptive window pulses, and
1043  * the distribution of pulses between the two blocks in this frame.
1044  * @param s WMA Voice decoding context private data
1045  * @param gb bit I/O context
1046  * @param pitch pitch for each block in this frame
1047  */
1049  const int *pitch)
1050 {
1051  static const int16_t start_offset[94] = {
1052  -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
1053  13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
1054  27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
1055  45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
1056  69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
1057  93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
1058  117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
1059  141, 143, 145, 147, 149, 151, 153, 155, 157, 159
1060  };
1061  int bits, offset;
1062 
1063  /* position of pulse */
1064  s->aw_idx_is_ext = 0;
1065  if ((bits = get_bits(gb, 6)) >= 54) {
1066  s->aw_idx_is_ext = 1;
1067  bits += (bits - 54) * 3 + get_bits(gb, 2);
1068  }
1069 
1070  /* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count
1071  * the distribution of the pulses in each block contained in this frame. */
1072  s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
1073  for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
1074  s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
1075  s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
1076  offset += s->aw_n_pulses[0] * pitch[0];
1077  s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
1078  s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
1079 
1080  /* if continuing from a position before the block, reset position to
1081  * start of block (when corrected for the range over which it can be
1082  * spread in aw_pulse_set1()). */
1083  if (start_offset[bits] < MAX_FRAMESIZE / 2) {
1084  while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
1085  s->aw_first_pulse_off[1] -= pitch[1];
1086  if (start_offset[bits] < 0)
1087  while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
1088  s->aw_first_pulse_off[0] -= pitch[0];
1089  }
1090 }
1091 
1092 /**
1093  * Apply second set of pitch-adaptive window pulses.
1094  * @param s WMA Voice decoding context private data
1095  * @param gb bit I/O context
1096  * @param block_idx block index in frame [0, 1]
1097  * @param fcb structure containing fixed codebook vector info
1098  * @return -1 on error, 0 otherwise
1099  */
1101  int block_idx, AMRFixed *fcb)
1102 {
1103  uint16_t use_mask_mem[9]; // only 5 are used, rest is padding
1104  uint16_t *use_mask = use_mask_mem + 2;
1105  /* in this function, idx is the index in the 80-bit (+ padding) use_mask
1106  * bit-array. Since use_mask consists of 16-bit values, the lower 4 bits
1107  * of idx are the position of the bit within a particular item in the
1108  * array (0 being the most significant bit, and 15 being the least
1109  * significant bit), and the remainder (>> 4) is the index in the
1110  * use_mask[]-array. This is faster and uses less memory than using a
1111  * 80-byte/80-int array. */
1112  int pulse_off = s->aw_first_pulse_off[block_idx],
1113  pulse_start, n, idx, range, aidx, start_off = 0;
1114 
1115  /* set offset of first pulse to within this block */
1116  if (s->aw_n_pulses[block_idx] > 0)
1117  while (pulse_off + s->aw_pulse_range < 1)
1118  pulse_off += fcb->pitch_lag;
1119 
1120  /* find range per pulse */
1121  if (s->aw_n_pulses[0] > 0) {
1122  if (block_idx == 0) {
1123  range = 32;
1124  } else /* block_idx = 1 */ {
1125  range = 8;
1126  if (s->aw_n_pulses[block_idx] > 0)
1127  pulse_off = s->aw_next_pulse_off_cache;
1128  }
1129  } else
1130  range = 16;
1131  pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
1132 
1133  /* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,
1134  * in the range of [pulse_off, pulse_off + s->aw_pulse_range], and thus
1135  * we exclude that range from being pulsed again in this function. */
1136  memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
1137  memset( use_mask, -1, 5 * sizeof(use_mask[0]));
1138  memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
1139  if (s->aw_n_pulses[block_idx] > 0)
1140  for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
1141  int excl_range = s->aw_pulse_range; // always 16 or 24
1142  uint16_t *use_mask_ptr = &use_mask[idx >> 4];
1143  int first_sh = 16 - (idx & 15);
1144  *use_mask_ptr++ &= 0xFFFFu << first_sh;
1145  excl_range -= first_sh;
1146  if (excl_range >= 16) {
1147  *use_mask_ptr++ = 0;
1148  *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
1149  } else
1150  *use_mask_ptr &= 0xFFFF >> excl_range;
1151  }
1152 
1153  /* find the 'aidx'th offset that is not excluded */
1154  aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
1155  for (n = 0; n <= aidx; pulse_start++) {
1156  for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
1157  if (idx >= MAX_FRAMESIZE / 2) { // find from zero
1158  if (use_mask[0]) idx = 0x0F;
1159  else if (use_mask[1]) idx = 0x1F;
1160  else if (use_mask[2]) idx = 0x2F;
1161  else if (use_mask[3]) idx = 0x3F;
1162  else if (use_mask[4]) idx = 0x4F;
1163  else return -1;
1164  idx -= av_log2_16bit(use_mask[idx >> 4]);
1165  }
1166  if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
1167  use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
1168  n++;
1169  start_off = idx;
1170  }
1171  }
1172 
1173  fcb->x[fcb->n] = start_off;
1174  fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
1175  fcb->n++;
1176 
1177  /* set offset for next block, relative to start of that block */
1178  n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
1179  s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
1180  return 0;
1181 }
1182 
1183 /**
1184  * Apply first set of pitch-adaptive window pulses.
1185  * @param s WMA Voice decoding context private data
1186  * @param gb bit I/O context
1187  * @param block_idx block index in frame [0, 1]
1188  * @param fcb storage location for fixed codebook pulse info
1189  */
1191  int block_idx, AMRFixed *fcb)
1192 {
1193  int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
1194  float v;
1195 
1196  if (s->aw_n_pulses[block_idx] > 0) {
1197  int n, v_mask, i_mask, sh, n_pulses;
1198 
1199  if (s->aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each
1200  n_pulses = 3;
1201  v_mask = 8;
1202  i_mask = 7;
1203  sh = 4;
1204  } else { // 4 pulses, 1:sign + 2:index each
1205  n_pulses = 4;
1206  v_mask = 4;
1207  i_mask = 3;
1208  sh = 3;
1209  }
1210 
1211  for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
1212  fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
1213  fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
1214  s->aw_first_pulse_off[block_idx];
1215  while (fcb->x[fcb->n] < 0)
1216  fcb->x[fcb->n] += fcb->pitch_lag;
1217  if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
1218  fcb->n++;
1219  }
1220  } else {
1221  int num2 = (val & 0x1FF) >> 1, delta, idx;
1222 
1223  if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }
1224  else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
1225  else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
1226  else { delta = 7; idx = num2 + 1 - 3 * 75; }
1227  v = (val & 0x200) ? -1.0 : 1.0;
1228 
1229  fcb->no_repeat_mask |= 3 << fcb->n;
1230  fcb->x[fcb->n] = idx - delta;
1231  fcb->y[fcb->n] = v;
1232  fcb->x[fcb->n + 1] = idx;
1233  fcb->y[fcb->n + 1] = (val & 1) ? -v : v;
1234  fcb->n += 2;
1235  }
1236 }
1237 
1238 /**
1239  * @}
1240  *
1241  * Generate a random number from frame_cntr and block_idx, which will live
1242  * in the range [0, 1000 - block_size] (so it can be used as an index in a
1243  * table of size 1000 of which you want to read block_size entries).
1244  *
1245  * @param frame_cntr current frame number
1246  * @param block_num current block index
1247  * @param block_size amount of entries we want to read from a table
1248  * that has 1000 entries
1249  * @return a (non-)random number in the [0, 1000 - block_size] range.
1250  */
1251 static int pRNG(int frame_cntr, int block_num, int block_size)
1252 {
1253  /* array to simplify the calculation of z:
1254  * y = (x % 9) * 5 + 6;
1255  * z = (49995 * x) / y;
1256  * Since y only has 9 values, we can remove the division by using a
1257  * LUT and using FASTDIV-style divisions. For each of the 9 values
1258  * of y, we can rewrite z as:
1259  * z = x * (49995 / y) + x * ((49995 % y) / y)
1260  * In this table, each col represents one possible value of y, the
1261  * first number is 49995 / y, and the second is the FASTDIV variant
1262  * of 49995 % y / y. */
1263  static const unsigned int div_tbl[9][2] = {
1264  { 8332, 3 * 715827883U }, // y = 6
1265  { 4545, 0 * 390451573U }, // y = 11
1266  { 3124, 11 * 268435456U }, // y = 16
1267  { 2380, 15 * 204522253U }, // y = 21
1268  { 1922, 23 * 165191050U }, // y = 26
1269  { 1612, 23 * 138547333U }, // y = 31
1270  { 1388, 27 * 119304648U }, // y = 36
1271  { 1219, 16 * 104755300U }, // y = 41
1272  { 1086, 39 * 93368855U } // y = 46
1273  };
1274  unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
1275  if (x >= 0xFFFF) x -= 0xFFFF; // max value of x is 8*1877+0xFFFE=0x13AA6,
1276  // so this is effectively a modulo (%)
1277  y = x - 9 * MULH(477218589, x); // x % 9
1278  z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
1279  // z = x * 49995 / (y * 5 + 6)
1280  return z % (1000 - block_size);
1281 }
1282 
1283 /**
1284  * Parse hardcoded signal for a single block.
1285  * @note see #synth_block().
1286  */
1288  int block_idx, int size,
1289  const struct frame_type_desc *frame_desc,
1290  float *excitation)
1291 {
1292  float gain;
1293  int n, r_idx;
1294 
1296 
1297  /* Set the offset from which we start reading wmavoice_std_codebook */
1298  if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
1299  r_idx = pRNG(s->frame_cntr, block_idx, size);
1300  gain = s->silence_gain;
1301  } else /* FCB_TYPE_HARDCODED */ {
1302  r_idx = get_bits(gb, 8);
1303  gain = wmavoice_gain_universal[get_bits(gb, 6)];
1304  }
1305 
1306  /* Clear gain prediction parameters */
1307  memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
1308 
1309  /* Apply gain to hardcoded codebook and use that as excitation signal */
1310  for (n = 0; n < size; n++)
1311  excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
1312 }
1313 
1314 /**
1315  * Parse FCB/ACB signal for a single block.
1316  * @note see #synth_block().
1317  */
1319  int block_idx, int size,
1320  int block_pitch_sh2,
1321  const struct frame_type_desc *frame_desc,
1322  float *excitation)
1323 {
1324  static const float gain_coeff[6] = {
1325  0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
1326  };
1327  float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
1328  int n, idx, gain_weight;
1329  AMRFixed fcb;
1330 
1331  av_assert0(size <= MAX_FRAMESIZE / 2);
1332  memset(pulses, 0, sizeof(*pulses) * size);
1333 
1334  fcb.pitch_lag = block_pitch_sh2 >> 2;
1335  fcb.pitch_fac = 1.0;
1336  fcb.no_repeat_mask = 0;
1337  fcb.n = 0;
1338 
1339  /* For the other frame types, this is where we apply the innovation
1340  * (fixed) codebook pulses of the speech signal. */
1341  if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
1342  aw_pulse_set1(s, gb, block_idx, &fcb);
1343  if (aw_pulse_set2(s, gb, block_idx, &fcb)) {
1344  /* Conceal the block with silence and return.
1345  * Skip the correct amount of bits to read the next
1346  * block from the correct offset. */
1347  int r_idx = pRNG(s->frame_cntr, block_idx, size);
1348 
1349  for (n = 0; n < size; n++)
1350  excitation[n] =
1351  wmavoice_std_codebook[r_idx + n] * s->silence_gain;
1352  skip_bits(gb, 7 + 1);
1353  return;
1354  }
1355  } else /* FCB_TYPE_EXC_PULSES */ {
1356  int offset_nbits = 5 - frame_desc->log_n_blocks;
1357 
1358  fcb.no_repeat_mask = -1;
1359  /* similar to ff_decode_10_pulses_35bits(), but with single pulses
1360  * (instead of double) for a subset of pulses */
1361  for (n = 0; n < 5; n++) {
1362  float sign;
1363  int pos1, pos2;
1364 
1365  sign = get_bits1(gb) ? 1.0 : -1.0;
1366  pos1 = get_bits(gb, offset_nbits);
1367  fcb.x[fcb.n] = n + 5 * pos1;
1368  fcb.y[fcb.n++] = sign;
1369  if (n < frame_desc->dbl_pulses) {
1370  pos2 = get_bits(gb, offset_nbits);
1371  fcb.x[fcb.n] = n + 5 * pos2;
1372  fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
1373  }
1374  }
1375  }
1376  ff_set_fixed_vector(pulses, &fcb, 1.0, size);
1377 
1378  /* Calculate gain for adaptive & fixed codebook signal.
1379  * see ff_amr_set_fixed_gain(). */
1380  idx = get_bits(gb, 7);
1381  fcb_gain = expf(avpriv_scalarproduct_float_c(s->gain_pred_err,
1382  gain_coeff, 6) -
1383  5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
1384  acb_gain = wmavoice_gain_codebook_acb[idx];
1385  pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
1386  -2.9957322736 /* log(0.05) */,
1387  1.6094379124 /* log(5.0) */);
1388 
1389  gain_weight = 8 >> frame_desc->log_n_blocks;
1390  memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
1391  sizeof(*s->gain_pred_err) * (6 - gain_weight));
1392  for (n = 0; n < gain_weight; n++)
1393  s->gain_pred_err[n] = pred_err;
1394 
1395  /* Calculation of adaptive codebook */
1396  if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
1397  int len;
1398  for (n = 0; n < size; n += len) {
1399  int next_idx_sh16;
1400  int abs_idx = block_idx * size + n;
1401  int pitch_sh16 = (s->last_pitch_val << 16) +
1402  s->pitch_diff_sh16 * abs_idx;
1403  int pitch = (pitch_sh16 + 0x6FFF) >> 16;
1404  int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
1405  idx = idx_sh16 >> 16;
1406  if (s->pitch_diff_sh16) {
1407  if (s->pitch_diff_sh16 > 0) {
1408  next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
1409  } else
1410  next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
1411  len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
1412  1, size - n);
1413  } else
1414  len = size;
1415 
1416  ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
1418  idx, 9, len);
1419  }
1420  } else /* ACB_TYPE_HAMMING */ {
1421  int block_pitch = block_pitch_sh2 >> 2;
1422  idx = block_pitch_sh2 & 3;
1423  if (idx) {
1424  ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
1426  idx, 8, size);
1427  } else
1428  av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
1429  sizeof(float) * size);
1430  }
1431 
1432  /* Interpolate ACB/FCB and use as excitation signal */
1433  ff_weighted_vector_sumf(excitation, excitation, pulses,
1434  acb_gain, fcb_gain, size);
1435 }
1436 
1437 /**
1438  * Parse data in a single block.
1439  *
1440  * @param s WMA Voice decoding context private data
1441  * @param gb bit I/O context
1442  * @param block_idx index of the to-be-read block
1443  * @param size amount of samples to be read in this block
1444  * @param block_pitch_sh2 pitch for this block << 2
1445  * @param lsps LSPs for (the end of) this frame
1446  * @param prev_lsps LSPs for the last frame
1447  * @param frame_desc frame type descriptor
1448  * @param excitation target memory for the ACB+FCB interpolated signal
1449  * @param synth target memory for the speech synthesis filter output
1450  * @return 0 on success, <0 on error.
1451  */
1453  int block_idx, int size,
1454  int block_pitch_sh2,
1455  const double *lsps, const double *prev_lsps,
1456  const struct frame_type_desc *frame_desc,
1457  float *excitation, float *synth)
1458 {
1459  double i_lsps[MAX_LSPS];
1460  float lpcs[MAX_LSPS];
1461  float fac;
1462  int n;
1463 
1464  if (frame_desc->acb_type == ACB_TYPE_NONE)
1465  synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
1466  else
1467  synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
1468  frame_desc, excitation);
1469 
1470  /* convert interpolated LSPs to LPCs */
1471  fac = (block_idx + 0.5) / frame_desc->n_blocks;
1472  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1473  i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
1474  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1475 
1476  /* Speech synthesis */
1477  ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
1478 }
1479 
1480 /**
1481  * Synthesize output samples for a single frame.
1482  *
1483  * @param ctx WMA Voice decoder context
1484  * @param gb bit I/O context (s->gb or one for cross-packet superframes)
1485  * @param frame_idx Frame number within superframe [0-2]
1486  * @param samples pointer to output sample buffer, has space for at least 160
1487  * samples
1488  * @param lsps LSP array
1489  * @param prev_lsps array of previous frame's LSPs
1490  * @param excitation target buffer for excitation signal
1491  * @param synth target buffer for synthesized speech data
1492  * @return 0 on success, <0 on error.
1493  */
1494 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
1495  float *samples,
1496  const double *lsps, const double *prev_lsps,
1497  float *excitation, float *synth)
1498 {
1500  int n, n_blocks_x2, log_n_blocks_x2, av_uninit(cur_pitch_val);
1501  int pitch[MAX_BLOCKS], av_uninit(last_block_pitch);
1502 
1503  /* Parse frame type ("frame header"), see frame_descs */
1504  int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc, 6, 3)], block_nsamples;
1505 
1506  pitch[0] = INT_MAX;
1507 
1508  if (bd_idx < 0) {
1510  "Invalid frame type VLC code, skipping\n");
1511  return AVERROR_INVALIDDATA;
1512  }
1513 
1514  block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
1515 
1516  /* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitch-per-frame") */
1517  if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
1518  /* Pitch is provided per frame, which is interpreted as the pitch of
1519  * the last sample of the last block of this frame. We can interpolate
1520  * the pitch of other blocks (and even pitch-per-sample) by gradually
1521  * incrementing/decrementing prev_frame_pitch to cur_pitch_val. */
1522  n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;
1523  log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;
1524  cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
1525  cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
1526  if (s->last_acb_type == ACB_TYPE_NONE ||
1527  20 * abs(cur_pitch_val - s->last_pitch_val) >
1528  (cur_pitch_val + s->last_pitch_val))
1529  s->last_pitch_val = cur_pitch_val;
1530 
1531  /* pitch per block */
1532  for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
1533  int fac = n * 2 + 1;
1534 
1535  pitch[n] = (MUL16(fac, cur_pitch_val) +
1536  MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
1537  frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
1538  }
1539 
1540  /* "pitch-diff-per-sample" for calculation of pitch per sample */
1541  s->pitch_diff_sh16 =
1542  (cur_pitch_val - s->last_pitch_val) * (1 << 16) / MAX_FRAMESIZE;
1543  }
1544 
1545  /* Global gain (if silence) and pitch-adaptive window coordinates */
1546  switch (frame_descs[bd_idx].fcb_type) {
1547  case FCB_TYPE_SILENCE:
1548  s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
1549  break;
1550  case FCB_TYPE_AW_PULSES:
1551  aw_parse_coords(s, gb, pitch);
1552  break;
1553  }
1554 
1555  for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
1556  int bl_pitch_sh2;
1557 
1558  /* Pitch calculation for ACB_TYPE_HAMMING ("pitch-per-block") */
1559  switch (frame_descs[bd_idx].acb_type) {
1560  case ACB_TYPE_HAMMING: {
1561  /* Pitch is given per block. Per-block pitches are encoded as an
1562  * absolute value for the first block, and then delta values
1563  * relative to this value) for all subsequent blocks. The scale of
1564  * this pitch value is semi-logarithmic compared to its use in the
1565  * decoder, so we convert it to normal scale also. */
1566  int block_pitch,
1567  t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
1568  t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
1569  t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;
1570 
1571  if (n == 0) {
1572  block_pitch = get_bits(gb, s->block_pitch_nbits);
1573  } else
1574  block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
1575  get_bits(gb, s->block_delta_pitch_nbits);
1576  /* Convert last_ so that any next delta is within _range */
1577  last_block_pitch = av_clip(block_pitch,
1578  s->block_delta_pitch_hrange,
1579  s->block_pitch_range -
1580  s->block_delta_pitch_hrange);
1581 
1582  /* Convert semi-log-style scale back to normal scale */
1583  if (block_pitch < t1) {
1584  bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
1585  } else {
1586  block_pitch -= t1;
1587  if (block_pitch < t2) {
1588  bl_pitch_sh2 =
1589  (s->block_conv_table[1] << 2) + (block_pitch << 1);
1590  } else {
1591  block_pitch -= t2;
1592  if (block_pitch < t3) {
1593  bl_pitch_sh2 =
1594  (s->block_conv_table[2] + block_pitch) << 2;
1595  } else
1596  bl_pitch_sh2 = s->block_conv_table[3] << 2;
1597  }
1598  }
1599  pitch[n] = bl_pitch_sh2 >> 2;
1600  break;
1601  }
1602 
1603  case ACB_TYPE_ASYMMETRIC: {
1604  bl_pitch_sh2 = pitch[n] << 2;
1605  break;
1606  }
1607 
1608  default: // ACB_TYPE_NONE has no pitch
1609  bl_pitch_sh2 = 0;
1610  break;
1611  }
1612 
1613  synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
1614  lsps, prev_lsps, &frame_descs[bd_idx],
1615  &excitation[n * block_nsamples],
1616  &synth[n * block_nsamples]);
1617  }
1618 
1619  /* Averaging projection filter, if applicable. Else, just copy samples
1620  * from synthesis buffer */
1621  if (s->do_apf) {
1622  double i_lsps[MAX_LSPS];
1623  float lpcs[MAX_LSPS];
1624 
1625  if(frame_descs[bd_idx].fcb_type >= FCB_TYPE_AW_PULSES && pitch[0] == INT_MAX)
1626  return AVERROR_INVALIDDATA;
1627 
1628  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1629  i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
1630  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1631  postfilter(s, synth, samples, 80, lpcs,
1632  &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
1633  frame_descs[bd_idx].fcb_type, pitch[0]);
1634 
1635  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1636  i_lsps[n] = cos(lsps[n]);
1637  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1638  postfilter(s, &synth[80], &samples[80], 80, lpcs,
1639  &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
1640  frame_descs[bd_idx].fcb_type, pitch[0]);
1641  } else
1642  memcpy(samples, synth, 160 * sizeof(synth[0]));
1643 
1644  /* Cache values for next frame */
1645  s->frame_cntr++;
1646  if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF; // i.e. modulo (%)
1647  s->last_acb_type = frame_descs[bd_idx].acb_type;
1648  switch (frame_descs[bd_idx].acb_type) {
1649  case ACB_TYPE_NONE:
1650  s->last_pitch_val = 0;
1651  break;
1652  case ACB_TYPE_ASYMMETRIC:
1653  s->last_pitch_val = cur_pitch_val;
1654  break;
1655  case ACB_TYPE_HAMMING:
1656  s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
1657  break;
1658  }
1659 
1660  return 0;
1661 }
1662 
1663 /**
1664  * Ensure minimum value for first item, maximum value for last value,
1665  * proper spacing between each value and proper ordering.
1666  *
1667  * @param lsps array of LSPs
1668  * @param num size of LSP array
1669  *
1670  * @note basically a double version of #ff_acelp_reorder_lsf(), might be
1671  * useful to put in a generic location later on. Parts are also
1672  * present in #ff_set_min_dist_lsf() + #ff_sort_nearly_sorted_floats(),
1673  * which is in float.
1674  */
1675 static void stabilize_lsps(double *lsps, int num)
1676 {
1677  int n, m, l;
1678 
1679  /* set minimum value for first, maximum value for last and minimum
1680  * spacing between LSF values.
1681  * Very similar to ff_set_min_dist_lsf(), but in double. */
1682  lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);
1683  for (n = 1; n < num; n++)
1684  lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);
1685  lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
1686 
1687  /* reorder (looks like one-time / non-recursed bubblesort).
1688  * Very similar to ff_sort_nearly_sorted_floats(), but in double. */
1689  for (n = 1; n < num; n++) {
1690  if (lsps[n] < lsps[n - 1]) {
1691  for (m = 1; m < num; m++) {
1692  double tmp = lsps[m];
1693  for (l = m - 1; l >= 0; l--) {
1694  if (lsps[l] <= tmp) break;
1695  lsps[l + 1] = lsps[l];
1696  }
1697  lsps[l + 1] = tmp;
1698  }
1699  break;
1700  }
1701  }
1702 }
1703 
1704 /**
1705  * Synthesize output samples for a single superframe. If we have any data
1706  * cached in s->sframe_cache, that will be used instead of whatever is loaded
1707  * in s->gb.
1708  *
1709  * WMA Voice superframes contain 3 frames, each containing 160 audio samples,
1710  * to give a total of 480 samples per frame. See #synth_frame() for frame
1711  * parsing. In addition to 3 frames, superframes can also contain the LSPs
1712  * (if these are globally specified for all frames (residually); they can
1713  * also be specified individually per-frame. See the s->has_residual_lsps
1714  * option), and can specify the number of samples encoded in this superframe
1715  * (if less than 480), usually used to prevent blanks at track boundaries.
1716  *
1717  * @param ctx WMA Voice decoder context
1718  * @return 0 on success, <0 on error or 1 if there was not enough data to
1719  * fully parse the superframe
1720  */
1722  int *got_frame_ptr)
1723 {
1725  GetBitContext *gb = &s->gb, s_gb;
1726  int n, res, n_samples = MAX_SFRAMESIZE;
1727  double lsps[MAX_FRAMES][MAX_LSPS];
1728  const double *mean_lsf = s->lsps == 16 ?
1729  wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
1730  float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
1731  float synth[MAX_LSPS + MAX_SFRAMESIZE];
1732  float *samples;
1733 
1734  memcpy(synth, s->synth_history,
1735  s->lsps * sizeof(*synth));
1736  memcpy(excitation, s->excitation_history,
1737  s->history_nsamples * sizeof(*excitation));
1738 
1739  if (s->sframe_cache_size > 0) {
1740  gb = &s_gb;
1741  init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
1742  s->sframe_cache_size = 0;
1743  }
1744 
1745  /* First bit is speech/music bit, it differentiates between WMAVoice
1746  * speech samples (the actual codec) and WMAVoice music samples, which
1747  * are really WMAPro-in-WMAVoice-superframes. I've never seen those in
1748  * the wild yet. */
1749  if (!get_bits1(gb)) {
1750  avpriv_request_sample(ctx, "WMAPro-in-WMAVoice");
1751  return AVERROR_PATCHWELCOME;
1752  }
1753 
1754  /* (optional) nr. of samples in superframe; always <= 480 and >= 0 */
1755  if (get_bits1(gb)) {
1756  if ((n_samples = get_bits(gb, 12)) > MAX_SFRAMESIZE) {
1758  "Superframe encodes > %d samples (%d), not allowed\n",
1759  MAX_SFRAMESIZE, n_samples);
1760  return AVERROR_INVALIDDATA;
1761  }
1762  }
1763 
1764  /* Parse LSPs, if global for the superframe (can also be per-frame). */
1765  if (s->has_residual_lsps) {
1766  double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
1767 
1768  for (n = 0; n < s->lsps; n++)
1769  prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
1770 
1771  if (s->lsps == 10) {
1772  dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
1773  } else /* s->lsps == 16 */
1774  dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
1775 
1776  for (n = 0; n < s->lsps; n++) {
1777  lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
1778  lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
1779  lsps[2][n] += mean_lsf[n];
1780  }
1781  for (n = 0; n < 3; n++)
1782  stabilize_lsps(lsps[n], s->lsps);
1783  }
1784 
1785  /* synth_superframe can run multiple times per packet
1786  * free potential previous frame */
1788 
1789  /* get output buffer */
1790  frame->nb_samples = MAX_SFRAMESIZE;
1791  if ((res = ff_get_buffer(ctx, frame, 0)) < 0)
1792  return res;
1793  frame->nb_samples = n_samples;
1794  samples = (float *)frame->data[0];
1795 
1796  /* Parse frames, optionally preceded by per-frame (independent) LSPs. */
1797  for (n = 0; n < 3; n++) {
1798  if (!s->has_residual_lsps) {
1799  int m;
1800 
1801  if (s->lsps == 10) {
1802  dequant_lsp10i(gb, lsps[n]);
1803  } else /* s->lsps == 16 */
1804  dequant_lsp16i(gb, lsps[n]);
1805 
1806  for (m = 0; m < s->lsps; m++)
1807  lsps[n][m] += mean_lsf[m];
1808  stabilize_lsps(lsps[n], s->lsps);
1809  }
1810 
1811  if ((res = synth_frame(ctx, gb, n,
1812  &samples[n * MAX_FRAMESIZE],
1813  lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
1814  &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
1815  &synth[s->lsps + n * MAX_FRAMESIZE]))) {
1816  *got_frame_ptr = 0;
1817  return res;
1818  }
1819  }
1820 
1821  /* Statistics? FIXME - we don't check for length, a slight overrun
1822  * will be caught by internal buffer padding, and anything else
1823  * will be skipped, not read. */
1824  if (get_bits1(gb)) {
1825  res = get_bits(gb, 4);
1826  skip_bits(gb, 10 * (res + 1));
1827  }
1828 
1829  if (get_bits_left(gb) < 0) {
1831  return AVERROR_INVALIDDATA;
1832  }
1833 
1834  *got_frame_ptr = 1;
1835 
1836  /* Update history */
1837  memcpy(s->prev_lsps, lsps[2],
1838  s->lsps * sizeof(*s->prev_lsps));
1839  memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],
1840  s->lsps * sizeof(*synth));
1841  memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
1842  s->history_nsamples * sizeof(*excitation));
1843  if (s->do_apf)
1844  memmove(s->zero_exc_pf, &s->zero_exc_pf[MAX_SFRAMESIZE],
1845  s->history_nsamples * sizeof(*s->zero_exc_pf));
1846 
1847  return 0;
1848 }
1849 
1850 /**
1851  * Parse the packet header at the start of each packet (input data to this
1852  * decoder).
1853  *
1854  * @param s WMA Voice decoding context private data
1855  * @return <0 on error, nb_superframes on success.
1856  */
1858 {
1859  GetBitContext *gb = &s->gb;
1860  unsigned int res, n_superframes = 0;
1861 
1862  skip_bits(gb, 4); // packet sequence number
1863  s->has_residual_lsps = get_bits1(gb);
1864  do {
1865  if (get_bits_left(gb) < 6 + s->spillover_bitsize)
1866  return AVERROR_INVALIDDATA;
1867 
1868  res = get_bits(gb, 6); // number of superframes per packet
1869  // (minus first one if there is spillover)
1870  n_superframes += res;
1871  } while (res == 0x3F);
1872  s->spillover_nbits = get_bits(gb, s->spillover_bitsize);
1873 
1874  return get_bits_left(gb) >= 0 ? n_superframes : AVERROR_INVALIDDATA;
1875 }
1876 
1877 /**
1878  * Copy (unaligned) bits from gb/data/size to pb.
1879  *
1880  * @param pb target buffer to copy bits into
1881  * @param data source buffer to copy bits from
1882  * @param size size of the source data, in bytes
1883  * @param gb bit I/O context specifying the current position in the source.
1884  * data. This function might use this to align the bit position to
1885  * a whole-byte boundary before calling #ff_copy_bits() on aligned
1886  * source data
1887  * @param nbits the amount of bits to copy from source to target
1888  *
1889  * @note after calling this function, the current position in the input bit
1890  * I/O context is undefined.
1891  */
1892 static void copy_bits(PutBitContext *pb,
1893  const uint8_t *data, int size,
1894  GetBitContext *gb, int nbits)
1895 {
1896  int rmn_bytes, rmn_bits;
1897 
1898  rmn_bits = rmn_bytes = get_bits_left(gb);
1899  if (rmn_bits < nbits)
1900  return;
1901  if (nbits > put_bits_left(pb))
1902  return;
1903  rmn_bits &= 7; rmn_bytes >>= 3;
1904  if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
1905  put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
1906  ff_copy_bits(pb, data + size - rmn_bytes,
1907  FFMIN(nbits - rmn_bits, rmn_bytes << 3));
1908 }
1909 
1910 /**
1911  * Packet decoding: a packet is anything that the (ASF) demuxer contains,
1912  * and we expect that the demuxer / application provides it to us as such
1913  * (else you'll probably get garbage as output). Every packet has a size of
1914  * ctx->block_align bytes, starts with a packet header (see
1915  * #parse_packet_header()), and then a series of superframes. Superframe
1916  * boundaries may exceed packets, i.e. superframes can split data over
1917  * multiple (two) packets.
1918  *
1919  * For more information about frames, see #synth_superframe().
1920  */
1922  int *got_frame_ptr, AVPacket *avpkt)
1923 {
1925  GetBitContext *gb = &s->gb;
1926  const uint8_t *buf = avpkt->data;
1927  uint8_t dummy[1];
1928  int size, res, pos;
1929 
1930  /* Packets are sometimes a multiple of ctx->block_align, with a packet
1931  * header at each ctx->block_align bytes. However, FFmpeg's ASF demuxer
1932  * feeds us ASF packets, which may concatenate multiple "codec" packets
1933  * in a single "muxer" packet, so we artificially emulate that by
1934  * capping the packet size at ctx->block_align. */
1935  for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
1936  buf = size ? buf : dummy;
1937  res = init_get_bits8(&s->gb, buf, size);
1938  if (res < 0)
1939  return res;
1940 
1941  /* size == ctx->block_align is used to indicate whether we are dealing with
1942  * a new packet or a packet of which we already read the packet header
1943  * previously. */
1944  if (!(size % ctx->block_align)) { // new packet header
1945  if (!size) {
1946  s->spillover_nbits = 0;
1947  s->nb_superframes = 0;
1948  } else {
1949  if ((res = parse_packet_header(s)) < 0)
1950  return res;
1951  s->nb_superframes = res;
1952  }
1953 
1954  /* If the packet header specifies a s->spillover_nbits, then we want
1955  * to push out all data of the previous packet (+ spillover) before
1956  * continuing to parse new superframes in the current packet. */
1957  if (s->sframe_cache_size > 0) {
1958  int cnt = get_bits_count(gb);
1959  if (cnt + s->spillover_nbits > avpkt->size * 8) {
1960  s->spillover_nbits = avpkt->size * 8 - cnt;
1961  }
1962  copy_bits(&s->pb, buf, size, gb, s->spillover_nbits);
1963  flush_put_bits(&s->pb);
1964  s->sframe_cache_size += s->spillover_nbits;
1965  if ((res = synth_superframe(ctx, frame, got_frame_ptr)) == 0 &&
1966  *got_frame_ptr) {
1967  cnt += s->spillover_nbits;
1968  s->skip_bits_next = cnt & 7;
1969  res = cnt >> 3;
1970  return res;
1971  } else
1972  skip_bits_long (gb, s->spillover_nbits - cnt +
1973  get_bits_count(gb)); // resync
1974  } else if (s->spillover_nbits) {
1975  skip_bits_long(gb, s->spillover_nbits); // resync
1976  }
1977  } else if (s->skip_bits_next)
1978  skip_bits(gb, s->skip_bits_next);
1979 
1980  /* Try parsing superframes in current packet */
1981  s->sframe_cache_size = 0;
1982  s->skip_bits_next = 0;
1983  pos = get_bits_left(gb);
1984  if (s->nb_superframes-- == 0) {
1985  *got_frame_ptr = 0;
1986  return size;
1987  } else if (s->nb_superframes > 0) {
1988  if ((res = synth_superframe(ctx, frame, got_frame_ptr)) < 0) {
1989  return res;
1990  } else if (*got_frame_ptr) {
1991  int cnt = get_bits_count(gb);
1992  s->skip_bits_next = cnt & 7;
1993  res = cnt >> 3;
1994  return res;
1995  }
1996  } else if ((s->sframe_cache_size = pos) > 0) {
1997  /* ... cache it for spillover in next packet */
1998  init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
1999  copy_bits(&s->pb, buf, size, gb, s->sframe_cache_size);
2000  // FIXME bad - just copy bytes as whole and add use the
2001  // skip_bits_next field
2002  }
2003 
2004  return size;
2005 }
2006 
2008 {
2010 
2011  if (s->do_apf) {
2012  av_tx_uninit(&s->rdft);
2013  av_tx_uninit(&s->irdft);
2014  av_tx_uninit(&s->dct);
2015  av_tx_uninit(&s->dst);
2016  }
2017 
2018  return 0;
2019 }
2020 
2022  .p.name = "wmavoice",
2023  CODEC_LONG_NAME("Windows Media Audio Voice"),
2024  .p.type = AVMEDIA_TYPE_AUDIO,
2025  .p.id = AV_CODEC_ID_WMAVOICE,
2026  .priv_data_size = sizeof(WMAVoiceContext),
2028  .close = wmavoice_decode_end,
2030  .p.capabilities =
2031 #if FF_API_SUBFRAMES
2032  AV_CODEC_CAP_SUBFRAMES |
2033 #endif
2035  .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
2036  .flush = wmavoice_flush,
2037 };
WMAVoiceContext::has_residual_lsps
int has_residual_lsps
if set, superframes contain one set of LSPs that cover all frames, encoded as independent and residua...
Definition: wmavoice.c:193
skip_bits_long
static void skip_bits_long(GetBitContext *s, int n)
Skips the specified number of bits.
Definition: get_bits.h:261
AMRFixed::x
int x[10]
Definition: acelp_vectors.h:55
wmavoice_std_codebook
static const float wmavoice_std_codebook[1000]
Definition: wmavoice_data.h:2585
interpol
static int interpol(MBContext *s, uint32_t *color, int x, int y, int linesize)
Definition: vsrc_mandelbrot.c:186
MAX_LSPS
#define MAX_LSPS
maximum filter order
Definition: wmavoice.c:49
WMAVoiceContext::aw_next_pulse_off_cache
int aw_next_pulse_off_cache
the position (relative to start of the second block) at which pulses should start to be positioned,...
Definition: wmavoice.c:242
WMAVoiceContext::max_pitch_val
int max_pitch_val
max value + 1 for pitch parsing
Definition: wmavoice.c:165
av_clip
#define av_clip
Definition: common.h:100
aw_pulse_set2
static int aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
Apply second set of pitch-adaptive window pulses.
Definition: wmavoice.c:1100
FF_CODEC_CAP_INIT_CLEANUP
#define FF_CODEC_CAP_INIT_CLEANUP
The codec allows calling the close function for deallocation even if the init function returned a fai...
Definition: codec_internal.h:42
acelp_vectors.h
get_bits_left
static int get_bits_left(GetBitContext *gb)
Definition: get_bits.h:678
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
wmavoice_dq_lsp10i
static const uint8_t wmavoice_dq_lsp10i[0xf00]
Definition: wmavoice_data.h:33
mem_internal.h
WMAVoiceContext::tilted_lpcs_pf
float tilted_lpcs_pf[0x82]
aligned buffer for LPC tilting
Definition: wmavoice.c:279
out
FILE * out
Definition: movenc.c:55
u
#define u(width, name, range_min, range_max)
Definition: cbs_h2645.c:251
thread.h
frame_descs
static const struct frame_type_desc frame_descs[17]
AVTXContext
Definition: tx_priv.h:235
wmavoice_dq_lsp16r3
static const uint8_t wmavoice_dq_lsp16r3[0x600]
Definition: wmavoice_data.h:1526
dequant_lsps
static void dequant_lsps(double *lsps, int num, const uint16_t *values, const uint16_t *sizes, int n_stages, const uint8_t *table, const double *mul_q, const double *base_q)
Dequantize LSPs.
Definition: wmavoice.c:872
init_put_bits
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Definition: put_bits.h:62
WMAVoiceContext::excitation_history
float excitation_history[MAX_SIGNAL_HISTORY]
cache of the signal of
Definition: wmavoice.c:252
get_bits_count
static int get_bits_count(const GetBitContext *s)
Definition: get_bits.h:249
av_log2_16bit
int av_log2_16bit(unsigned v)
Definition: intmath.c:31
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:410
put_bits
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:223
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
aw_pulse_set1
static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
Apply first set of pitch-adaptive window pulses.
Definition: wmavoice.c:1190
ff_acelp_apply_order_2_transfer_function
void ff_acelp_apply_order_2_transfer_function(float *out, const float *in, const float zero_coeffs[2], const float pole_coeffs[2], float gain, float mem[2], int n)
Apply an order 2 rational transfer function in-place.
Definition: acelp_filters.c:121
AVPacket::data
uint8_t * data
Definition: packet.h:539
pRNG
static int pRNG(int frame_cntr, int block_num, int block_size)
Generate a random number from frame_cntr and block_idx, which will live in the range [0,...
Definition: wmavoice.c:1251
ff_wmavoice_decoder
const FFCodec ff_wmavoice_decoder
Definition: wmavoice.c:2021
table
static const uint16_t table[]
Definition: prosumer.c:203
data
const char data[16]
Definition: mxf.c:149
WMAVoiceContext::silence_gain
float silence_gain
set for use in blocks if ACB_TYPE_NONE
Definition: wmavoice.c:227
expf
#define expf(x)
Definition: libm.h:283
WMAVoiceContext::denoise_filter_cache_size
int denoise_filter_cache_size
samples in denoise_filter_cache
Definition: wmavoice.c:277
wmavoice_denoise_power_table
static const float wmavoice_denoise_power_table[12][64]
LUT for f(x,y) = pow((y + 6.9) / 64, 0.025 * (x + 1)).
Definition: wmavoice_data.h:3064
wmavoice_gain_codebook_acb
static const float wmavoice_gain_codebook_acb[128]
Definition: wmavoice_data.h:2874
FFCodec
Definition: codec_internal.h:127
base
uint8_t base
Definition: vp3data.h:128
AVComplexFloat
Definition: tx.h:27
max
#define max(a, b)
Definition: cuda_runtime.h:33
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
ff_celp_lp_synthesis_filterf
void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP synthesis filter.
Definition: celp_filters.c:85
WMAVoiceContext::aw_idx_is_ext
int aw_idx_is_ext
whether the AW index was encoded in 8 bits (instead of 6)
Definition: wmavoice.c:229
init_get_bits
static int init_get_bits(GetBitContext *s, const uint8_t *buffer, int bit_size)
Initialize GetBitContext.
Definition: get_bits.h:497
ACB_TYPE_NONE
@ ACB_TYPE_NONE
no adaptive codebook (only hardcoded fixed)
Definition: wmavoice.c:70
av_tx_init
av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Initialize a transform context with the given configuration (i)MDCTs with an odd length are currently...
Definition: tx.c:903
WMAVoiceContext::dc_level
int dc_level
Predicted amount of DC noise, based on which a DC removal filter is used.
Definition: wmavoice.c:156
FCB_TYPE_AW_PULSES
@ FCB_TYPE_AW_PULSES
Pitch-adaptive window (AW) pulse signals, used in particular for low-bitrate streams.
Definition: wmavoice.c:90
wmavoice_dq_lsp16i1
static const uint8_t wmavoice_dq_lsp16i1[0x640]
Definition: wmavoice_data.h:420
WMAVoiceContext::block_conv_table
uint16_t block_conv_table[4]
boundaries for block pitch unit/scale conversion
Definition: wmavoice.c:177
frame_type_desc::log_n_blocks
uint8_t log_n_blocks
log2(n_blocks)
Definition: wmavoice.c:103
skip_bits
static void skip_bits(GetBitContext *s, int n)
Definition: get_bits.h:364
WMAVoiceContext::aw_pulse_range
int aw_pulse_range
the range over which aw_pulse_set1() can apply the pulse, relative to the value in aw_first_pulse_off...
Definition: wmavoice.c:231
ACB_TYPE_HAMMING
@ ACB_TYPE_HAMMING
Per-block pitch with signal generation using a Hamming sinc window function.
Definition: wmavoice.c:76
get_bits
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
Definition: get_bits.h:318
ff_copy_bits
void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length)
Copy the content of src to the bitstream.
Definition: bitstream.c:49
FFCodec::p
AVCodec p
The public AVCodec.
Definition: codec_internal.h:131
av_ceil_log2
#define av_ceil_log2
Definition: common.h:97
AMRFixed::pitch_fac
float pitch_fac
Definition: acelp_vectors.h:59
dummy
int dummy
Definition: motion.c:66
GetBitContext
Definition: get_bits.h:108
MULH
#define MULH
Definition: mathops.h:42
wmavoice_flush
static av_cold void wmavoice_flush(AVCodecContext *ctx)
Definition: wmavoice.c:326
put_bits_left
static int put_bits_left(PutBitContext *s)
Definition: put_bits.h:125
frame_type_desc::n_blocks
uint8_t n_blocks
amount of blocks per frame (each block (contains 160/n_blocks samples)
Definition: wmavoice.c:101
val
static double val(void *priv, double ch)
Definition: aeval.c:77
WMAVoiceContext::irdft_fn
av_tx_fn irdft_fn
postfilter (for denoise filter)
Definition: wmavoice.c:266
dequant_lsp10i
static void dequant_lsp10i(GetBitContext *gb, double *lsps)
Parse 10 independently-coded LSPs.
Definition: wmavoice.c:903
synth_block
static void synth_block(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const double *lsps, const double *prev_lsps, const struct frame_type_desc *frame_desc, float *excitation, float *synth)
Parse data in a single block.
Definition: wmavoice.c:1452
MAX_SFRAMESIZE
#define MAX_SFRAMESIZE
maximum number of samples per superframe
Definition: wmavoice.c:55
wmavoice_gain_codebook_fcb
static const float wmavoice_gain_codebook_fcb[128]
Definition: wmavoice_data.h:2893
a2
static double a2(void *priv, double x, double y)
Definition: vf_xfade.c:2030
WMAVoiceContext::denoise_filter_cache
float denoise_filter_cache[MAX_FRAMESIZE]
Definition: wmavoice.c:276
fabsf
static __device__ float fabsf(float a)
Definition: cuda_runtime.h:181
WMAVoiceContext::sin
float sin[511]
Definition: wmavoice.c:269
calc_input_response
static void calc_input_response(WMAVoiceContext *s, float *lpcs_src, int fcb_type, float *coeffs_dst, int remainder)
Derive denoise filter coefficients (in real domain) from the LPCs.
Definition: wmavoice.c:610
AV_CODEC_ID_WMAVOICE
@ AV_CODEC_ID_WMAVOICE
Definition: codec_id.h:484
lrint
#define lrint
Definition: tablegen.h:53
MUL16
#define MUL16(ra, rb)
Definition: mathops.h:87
ff_thread_once
static int ff_thread_once(char *control, void(*routine)(void))
Definition: thread.h:205
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:209
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
av_cold
#define av_cold
Definition: attributes.h:90
init_get_bits8
static int init_get_bits8(GetBitContext *s, const uint8_t *buffer, int byte_size)
Initialize GetBitContext.
Definition: get_bits.h:528
MAX_LSPS_ALIGN16
#define MAX_LSPS_ALIGN16
same as MAX_LSPS; needs to be multiple
Definition: wmavoice.c:50
av_tx_fn
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
Definition: tx.h:151
av_memcpy_backptr
void av_memcpy_backptr(uint8_t *dst, int back, int cnt)
Overlapping memcpy() implementation.
Definition: mem.c:447
float
float
Definition: af_crystalizer.c:122
wmavoice_dq_lsp10r
static const uint8_t wmavoice_dq_lsp10r[0x1400]
Definition: wmavoice_data.h:749
FF_CODEC_DECODE_CB
#define FF_CODEC_DECODE_CB(func)
Definition: codec_internal.h:341
WMAVoiceContext::sframe_cache_size
int sframe_cache_size
set to >0 if we have data from an (incomplete) superframe from a previous packet that spilled over in...
Definition: wmavoice.c:205
WMAVoiceContext::dst
AVTXContext * dst
contexts for phase shift (in Hilbert
Definition: wmavoice.c:267
s
#define s(width, name)
Definition: cbs_vp9.c:198
WMAVoiceContext::lsp_q_mode
int lsp_q_mode
defines quantizer defaults [0, 1]
Definition: wmavoice.c:160
frame_type_desc::fcb_type
uint8_t fcb_type
Fixed codebook type (FCB_TYPE_*)
Definition: wmavoice.c:105
log_range
#define log_range(var, assign)
ACB_TYPE_ASYMMETRIC
@ ACB_TYPE_ASYMMETRIC
adaptive codebook with per-frame pitch, which we interpolate to get a per-sample pitch.
Definition: wmavoice.c:71
WMAVoiceContext::prev_lsps
double prev_lsps[MAX_LSPS]
LSPs of the last frame of the previous superframe.
Definition: wmavoice.c:221
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
WMAVoiceContext::aw_n_pulses
int aw_n_pulses[2]
number of AW-pulses in each block; note that this number can be negative (in which case it basically ...
Definition: wmavoice.c:237
AMRFixed
Sparse representation for the algebraic codebook (fixed) vector.
Definition: acelp_vectors.h:53
bits
uint8_t bits
Definition: vp3data.h:128
adaptive_gain_control
static void adaptive_gain_control(float *out, const float *in, const float *speech_synth, int size, float alpha, float *gain_mem)
Adaptive gain control (as used in postfilter).
Definition: wmavoice.c:510
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:40
wmavoice_lsp16_intercoeff_a
static const float wmavoice_lsp16_intercoeff_a[32][2][16]
Definition: wmavoice_data.h:2047
ctx
AVFormatContext * ctx
Definition: movenc.c:49
decode.h
get_bits.h
wmavoice_mean_lsf10
static const double wmavoice_mean_lsf10[2][10]
Definition: wmavoice_data.h:2565
WMAVoiceContext::spillover_nbits
int spillover_nbits
number of bits of the previous packet's last superframe preceding this packet's first full superframe...
Definition: wmavoice.c:189
UMULH
static av_always_inline unsigned UMULH(unsigned a, unsigned b)
Definition: mathops.h:67
AMRFixed::y
float y[10]
Definition: acelp_vectors.h:56
WMAVoiceContext::denoise_coeffs_pf
float denoise_coeffs_pf[0x82]
aligned buffer for denoise coefficients
Definition: wmavoice.c:281
wmavoice_gain_silence
static const float wmavoice_gain_silence[256]
Definition: wmavoice_data.h:2788
PutBitContext
Definition: put_bits.h:50
WMAVoiceContext::vbm_tree
int8_t vbm_tree[25]
converts VLC codes to frame type
Definition: wmavoice.c:141
CODEC_LONG_NAME
#define CODEC_LONG_NAME(str)
Definition: codec_internal.h:326
WMAVoiceContext::dct_fn
av_tx_fn dct_fn
Definition: wmavoice.c:268
wmavoice_dq_lsp16i3
static const uint8_t wmavoice_dq_lsp16i3[0x300]
Definition: wmavoice_data.h:682
if
if(ret)
Definition: filter_design.txt:179
AMRFixed::no_repeat_mask
int no_repeat_mask
Definition: acelp_vectors.h:57
postfilter
static void postfilter(WMAVoiceContext *s, const float *synth, float *samples, int size, const float *lpcs, float *zero_exc_pf, int fcb_type, int pitch)
Averaging projection filter, the postfilter used in WMAVoice.
Definition: wmavoice.c:818
AV_ONCE_INIT
#define AV_ONCE_INIT
Definition: thread.h:203
NULL
#define NULL
Definition: coverity.c:32
sizes
static const int sizes[][2]
Definition: img2dec.c:60
WMAVoiceContext::history_nsamples
int history_nsamples
number of samples in history for signal prediction (through ACB)
Definition: wmavoice.c:146
WMAVoiceContext::synth_history
float synth_history[MAX_LSPS]
see excitation_history
Definition: wmavoice.c:255
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:132
AVERROR_PATCHWELCOME
#define AVERROR_PATCHWELCOME
Not yet implemented in FFmpeg, patches welcome.
Definition: error.h:64
last_coeff
static const uint8_t last_coeff[3]
Definition: qdm2data.h:187
WMAVoiceContext::denoise_strength
int denoise_strength
strength of denoising in Wiener filter [0-11]
Definition: wmavoice.c:152
MAX_SIGNAL_HISTORY
#define MAX_SIGNAL_HISTORY
maximum excitation signal history
Definition: wmavoice.c:54
WMAVoiceContext::sframe_cache
uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE+AV_INPUT_BUFFER_PADDING_SIZE]
cache for superframe data split over multiple packets
Definition: wmavoice.c:202
get_bits1
static unsigned int get_bits1(GetBitContext *s)
Definition: get_bits.h:371
dequant_lsp10r
static void dequant_lsp10r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...
Definition: wmavoice.c:929
WMAVoiceContext::pitch_nbits
int pitch_nbits
number of bits used to specify the pitch value in the frame header
Definition: wmavoice.c:166
FCB_TYPE_SILENCE
@ FCB_TYPE_SILENCE
comfort noise during silence generated from a hardcoded (fixed) codebook with per-frame (low) gain va...
Definition: wmavoice.c:85
WMAVoiceContext::block_delta_pitch_nbits
int block_delta_pitch_nbits
number of bits used to specify the delta pitch between this and the last block's pitch value,...
Definition: wmavoice.c:171
kalman_smoothen
static int kalman_smoothen(WMAVoiceContext *s, int pitch, const float *in, float *out, int size)
Kalman smoothing function.
Definition: wmavoice.c:551
WMAVoiceContext::skip_bits_next
int skip_bits_next
number of bits to skip at the next call to wmavoice_decode_packet() (since they're part of the previo...
Definition: wmavoice.c:198
sqrtf
static __device__ float sqrtf(float a)
Definition: cuda_runtime.h:184
abs
#define abs(x)
Definition: cuda_runtime.h:35
WMAVoiceContext::dst_fn
av_tx_fn dst_fn
transform, part of postfilter)
Definition: wmavoice.c:268
WMAVoiceContext::rdft
AVTXContext * rdft
Definition: wmavoice.c:265
celp_filters.h
MAX_FRAMESIZE
#define MAX_FRAMESIZE
maximum number of samples per frame
Definition: wmavoice.c:53
av_clipf
av_clipf
Definition: af_crystalizer.c:122
MAX_FRAMES
#define MAX_FRAMES
maximum number of frames per superframe
Definition: wmavoice.c:52
get_vlc2
static av_always_inline int get_vlc2(GetBitContext *s, const VLCElem *table, int bits, int max_depth)
Parse a vlc code.
Definition: get_bits.h:635
decode_vbmtree
static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
Set up the variable bit mode (VBM) tree from container extradata.
Definition: wmavoice.c:298
AVOnce
#define AVOnce
Definition: thread.h:202
FCB_TYPE_EXC_PULSES
@ FCB_TYPE_EXC_PULSES
Innovation (fixed) codebook pulse sets in combinations of either single pulses or pulse pairs.
Definition: wmavoice.c:92
aw_parse_coords
static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb, const int *pitch)
Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between th...
Definition: wmavoice.c:1048
wmavoice_init_static_data
static av_cold void wmavoice_init_static_data(void)
Definition: wmavoice.c:312
float_dsp.h
WMAVoiceContext::dcf_mem
float dcf_mem[2]
DC filter history.
Definition: wmavoice.c:273
ff_get_buffer
int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
Get a buffer for a frame.
Definition: decode.c:1703
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:368
AV_CODEC_CAP_DR1
#define AV_CODEC_CAP_DR1
Codec uses get_buffer() or get_encode_buffer() for allocating buffers and supports custom allocators.
Definition: codec.h:52
parse_packet_header
static int parse_packet_header(WMAVoiceContext *s)
Parse the packet header at the start of each packet (input data to this decoder).
Definition: wmavoice.c:1857
AV_TX_FLOAT_DCT_I
@ AV_TX_FLOAT_DCT_I
Discrete Cosine Transform I.
Definition: tx.h:116
AVPacket::size
int size
Definition: packet.h:540
powf
#define powf(x, y)
Definition: libm.h:50
AVChannelLayout
An AVChannelLayout holds information about the channel layout of audio data.
Definition: channel_layout.h:319
codec_internal.h
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem_internal.h:104
WMAVoiceContext::spillover_bitsize
int spillover_bitsize
number of bits used to specify spillover_nbits in the packet header = ceil(log2(ctx->block_align << 3...
Definition: wmavoice.c:143
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:424
WMAVoiceContext::pb
PutBitContext pb
bitstream writer for sframe_cache
Definition: wmavoice.c:210
WMAVoiceContext::last_pitch_val
int last_pitch_val
pitch value of the previous frame
Definition: wmavoice.c:223
size
int size
Definition: twinvq_data.h:10344
wiener_denoise
static void wiener_denoise(WMAVoiceContext *s, int fcb_type, float *synth_pf, int size, const float *lpcs)
This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it.
Definition: wmavoice.c:734
VLCElem
Definition: vlc.h:32
wmavoice_lsp10_intercoeff_b
static const float wmavoice_lsp10_intercoeff_b[32][2][10]
Definition: wmavoice_data.h:1852
range
enum AVColorRange range
Definition: mediacodec_wrapper.c:2594
dequant_lsp16i
static void dequant_lsp16i(GetBitContext *gb, double *lsps)
Parse 16 independently-coded LSPs.
Definition: wmavoice.c:965
wmavoice_dq_lsp16r1
static const uint8_t wmavoice_dq_lsp16r1[0x500]
Definition: wmavoice_data.h:1264
WMAVoiceContext::aw_first_pulse_off
int aw_first_pulse_off[2]
index of first sample to which to apply AW-pulses, or -0xff if unset
Definition: wmavoice.c:240
WMAVoiceContext::zero_exc_pf
float zero_exc_pf[MAX_SIGNAL_HISTORY+MAX_SFRAMESIZE]
zero filter output (i.e. excitation) by postfilter
Definition: wmavoice.c:275
sinewin.h
wmavoice_dq_lsp16r2
static const uint8_t wmavoice_dq_lsp16r2[0x500]
Definition: wmavoice_data.h:1395
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
frame_type_desc
Description of frame types.
Definition: wmavoice.c:100
WMAVoiceContext::block_pitch_range
int block_pitch_range
range of the block pitch
Definition: wmavoice.c:170
stabilize_lsps
static void stabilize_lsps(double *lsps, int num)
Ensure minimum value for first item, maximum value for last value, proper spacing between each value ...
Definition: wmavoice.c:1675
M_PI
#define M_PI
Definition: mathematics.h:67
ff_tilt_compensation
void ff_tilt_compensation(float *mem, float tilt, float *samples, int size)
Apply tilt compensation filter, 1 - tilt * z-1.
Definition: acelp_filters.c:138
av_tx_uninit
av_cold void av_tx_uninit(AVTXContext **ctx)
Frees a context and sets *ctx to NULL, does nothing when *ctx == NULL.
Definition: tx.c:295
wmavoice_energy_table
static const float wmavoice_energy_table[128]
LUT for 1.071575641632 * pow(1.0331663, n - 127)
Definition: wmavoice_data.h:3026
ff_sine_window_init
void ff_sine_window_init(float *window, int n)
Generate a sine window.
Definition: sinewin_tablegen.h:59
wmavoice_decode_init
static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
Set up decoder with parameters from demuxer (extradata etc.).
Definition: wmavoice.c:357
WMAVoiceContext::block_delta_pitch_hrange
int block_delta_pitch_hrange
1/2 range of the delta (full range is from -this to +this-1)
Definition: wmavoice.c:175
wmavoice_ipol2_coeffs
static const float wmavoice_ipol2_coeffs[32]
Hamming-window sinc function (num = 32, x = [ 0, 31 ]): (0.54 + 0.46 * cos(2 * M_PI * x / (num - 1)))...
Definition: wmavoice_data.h:3012
WMAVoiceContext::pitch_diff_sh16
int pitch_diff_sh16
((cur_pitch_val - last_pitch_val) << 16) / MAX_FRAMESIZE
Definition: wmavoice.c:225
WMAVoiceContext::gain_pred_err
float gain_pred_err[6]
cache for gain prediction
Definition: wmavoice.c:251
WMAVoiceContext::rdft_fn
av_tx_fn rdft_fn
Definition: wmavoice.c:266
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
WMAVoiceContext::nb_superframes
int nb_superframes
number of superframes in current packet
Definition: wmavoice.c:250
WMAVoiceContext::cos
float cos[511]
8-bit cosine/sine windows over [-pi,pi] range
Definition: wmavoice.c:269
WMAVoiceContext::denoise_tilt_corr
int denoise_tilt_corr
Whether to apply tilt correction to the Wiener filter coefficients (postfilter)
Definition: wmavoice.c:154
delta
float delta
Definition: vorbis_enc_data.h:430
wmavoice_lsp16_intercoeff_b
static const float wmavoice_lsp16_intercoeff_b[32][2][16]
Definition: wmavoice_data.h:2306
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
av_frame_unref
void av_frame_unref(AVFrame *frame)
Unreference all the buffers referenced by frame and reset the frame fields.
Definition: frame.c:530
acelp_filters.h
ff_weighted_vector_sumf
void ff_weighted_vector_sumf(float *out, const float *in_a, const float *in_b, float weight_coeff_a, float weight_coeff_b, int length)
float implementation of weighted sum of two vectors.
Definition: acelp_vectors.c:182
WMAVoiceContext::lsp_def_mode
int lsp_def_mode
defines different sets of LSP defaults [0, 1]
Definition: wmavoice.c:161
wmavoice_gain_universal
static const float wmavoice_gain_universal[64]
Definition: wmavoice_data.h:2855
AVCodec::name
const char * name
Name of the codec implementation.
Definition: codec.h:194
len
int len
Definition: vorbis_enc_data.h:426
WMAVoiceContext::synth_filter_out_buf
float synth_filter_out_buf[0x80+MAX_LSPS_ALIGN16]
aligned buffer for postfilter speech synthesis
Definition: wmavoice.c:283
tilt_factor
static float tilt_factor(const float *lpcs, int n_lpcs)
Get the tilt factor of a formant filter from its transfer function.
Definition: wmavoice.c:597
VLC_NBITS
#define VLC_NBITS
number of bits to read per VLC iteration
Definition: wmavoice.c:59
wmavoice_data.h
Windows Media Voice (WMAVoice) tables.
avcodec.h
WMAVoiceContext::min_pitch_val
int min_pitch_val
base value for pitch parsing code
Definition: wmavoice.c:164
WMAVoiceContext::last_acb_type
int last_acb_type
frame type [0-2] of the previous frame
Definition: wmavoice.c:224
WMAVoiceContext::dct
AVTXContext * dct
Definition: wmavoice.c:267
av_uninit
#define av_uninit(x)
Definition: attributes.h:154
ret
ret
Definition: filter_design.txt:187
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:264
lsp.h
ff_celp_lp_zero_synthesis_filterf
void ff_celp_lp_zero_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP zero synthesis filter.
Definition: celp_filters.c:200
WMAVoiceContext::do_apf
int do_apf
whether to apply the averaged projection filter (APF)
Definition: wmavoice.c:150
pos
unsigned int pos
Definition: spdifenc.c:414
AMRFixed::n
int n
Definition: acelp_vectors.h:54
wmavoice_dq_lsp16i2
static const uint8_t wmavoice_dq_lsp16i2[0x3c0]
Definition: wmavoice_data.h:583
AV_INPUT_BUFFER_PADDING_SIZE
#define AV_INPUT_BUFFER_PADDING_SIZE
Definition: defs.h:40
wmavoice_mean_lsf16
static const double wmavoice_mean_lsf16[2][16]
Definition: wmavoice_data.h:2574
AV_RL32
uint64_t_TMPL AV_WL64 unsigned int_TMPL AV_RL32
Definition: bytestream.h:92
U
#define U(x)
Definition: vpx_arith.h:37
AV_TX_FLOAT_RDFT
@ AV_TX_FLOAT_RDFT
Real to complex and complex to real DFTs.
Definition: tx.h:90
wmavoice_decode_end
static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
Definition: wmavoice.c:2007
WMAVoiceContext::lsps
int lsps
number of LSPs per frame [10 or 16]
Definition: wmavoice.c:159
AVCodecContext
main external API structure.
Definition: avcodec.h:451
wmavoice_decode_packet
static int wmavoice_decode_packet(AVCodecContext *ctx, AVFrame *frame, int *got_frame_ptr, AVPacket *avpkt)
Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer...
Definition: wmavoice.c:1921
channel_layout.h
WMAVoiceContext::block_pitch_nbits
int block_pitch_nbits
number of bits used to specify the first block's pitch value
Definition: wmavoice.c:168
AV_TX_FLOAT_DST_I
@ AV_TX_FLOAT_DST_I
Discrete Sine Transform I.
Definition: tx.h:128
synth_superframe
static int synth_superframe(AVCodecContext *ctx, AVFrame *frame, int *got_frame_ptr)
Synthesize output samples for a single superframe.
Definition: wmavoice.c:1721
av_channel_layout_uninit
void av_channel_layout_uninit(AVChannelLayout *channel_layout)
Free any allocated data in the channel layout and reset the channel count to 0.
Definition: channel_layout.c:442
WMAVoiceContext::frame_cntr
int frame_cntr
current frame index [0 - 0xFFFE]; is only used for comfort noise in pRNG()
Definition: wmavoice.c:248
wmavoice_ipol1_coeffs
static const float wmavoice_ipol1_coeffs[17 *9]
Definition: wmavoice_data.h:2960
FCB_TYPE_HARDCODED
@ FCB_TYPE_HARDCODED
hardcoded (fixed) codebook with per-block gain values
Definition: wmavoice.c:88
values
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return values
Definition: filter_design.txt:263
ff_set_fixed_vector
void ff_set_fixed_vector(float *out, const AMRFixed *in, float scale, int size)
Add fixed vector to an array from a sparse representation.
Definition: acelp_vectors.c:224
mean_lsf
static const float mean_lsf[10]
Definition: siprdata.h:27
AV_CODEC_CAP_DELAY
#define AV_CODEC_CAP_DELAY
Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...
Definition: codec.h:76
samples
Filter the word “frame” indicates either a video frame or a group of audio samples
Definition: filter_design.txt:8
copy_bits
static void copy_bits(PutBitContext *pb, const uint8_t *data, int size, GetBitContext *gb, int nbits)
Copy (unaligned) bits from gb/data/size to pb.
Definition: wmavoice.c:1892
avpriv_scalarproduct_float_c
float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
Return the scalar product of two vectors of floats.
Definition: float_dsp.c:124
synth_frame
static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx, float *samples, const double *lsps, const double *prev_lsps, float *excitation, float *synth)
Synthesize output samples for a single frame.
Definition: wmavoice.c:1494
mem.h
M_LN10
#define M_LN10
Definition: mathematics.h:49
WMAVoiceContext::gb
GetBitContext gb
packet bitreader.
Definition: wmavoice.c:137
avpriv_request_sample
#define avpriv_request_sample(...)
Definition: tableprint_vlc.h:36
synth_block_fcb_acb
static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const struct frame_type_desc *frame_desc, float *excitation)
Parse FCB/ACB signal for a single block.
Definition: wmavoice.c:1318
flush_put_bits
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
Definition: put_bits.h:143
AV_CHANNEL_LAYOUT_MONO
#define AV_CHANNEL_LAYOUT_MONO
Definition: channel_layout.h:394
VLC_INIT_STATIC_TABLE_FROM_LENGTHS
#define VLC_INIT_STATIC_TABLE_FROM_LENGTHS(vlc_table, nb_bits, nb_codes, lens, lens_wrap, syms, syms_wrap, syms_size, offset, flags)
Definition: vlc.h:280
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:291
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
AVPacket
This structure stores compressed data.
Definition: packet.h:516
synth_block_hardcoded
static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, const struct frame_type_desc *frame_desc, float *excitation)
Parse hardcoded signal for a single block.
Definition: wmavoice.c:1287
SFRAME_CACHE_MAXSIZE
#define SFRAME_CACHE_MAXSIZE
maximum cache size for frame data that
Definition: wmavoice.c:57
frame_type_vlc
static VLCElem frame_type_vlc[132]
Frame type VLC coding.
Definition: wmavoice.c:64
AMRFixed::pitch_lag
int pitch_lag
Definition: acelp_vectors.h:58
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:482
WMAVoiceContext::irdft
AVTXContext * irdft
contexts for FFT-calculation in the
Definition: wmavoice.c:265
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
wmavoice_lsp10_intercoeff_a
static const float wmavoice_lsp10_intercoeff_a[32][2][10]
Definition: wmavoice_data.h:1657
AVERROR_INVALIDDATA
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:61
a1
static double a1(void *priv, double x, double y)
Definition: vf_xfade.c:2029
dequant_lsp16r
static void dequant_lsp16r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...
Definition: wmavoice.c:998
frame_type_desc::dbl_pulses
uint8_t dbl_pulses
how many pulse vectors have pulse pairs (rather than just one single pulse) only if fcb_type == FCB_T...
Definition: wmavoice.c:106
frame_type_desc::acb_type
uint8_t acb_type
Adaptive codebook type (ACB_TYPE_*)
Definition: wmavoice.c:104
MAX_BLOCKS
#define MAX_BLOCKS
maximum number of blocks per frame
Definition: wmavoice.c:48
ff_acelp_lspd2lpc
void ff_acelp_lspd2lpc(const double *lsp, float *lpc, int lp_half_order)
Reconstruct LPC coefficients from the line spectral pair frequencies.
Definition: lsp.c:220
WMAVoiceContext::postfilter_agc
float postfilter_agc
gain control memory, used in adaptive_gain_control()
Definition: wmavoice.c:271
put_bits.h
pulses
static const int8_t pulses[4]
Number of non-zero pulses in the MP-MLQ excitation.
Definition: g723_1.h:260
ff_acelp_interpolatef
void ff_acelp_interpolatef(float *out, const float *in, const float *filter_coeffs, int precision, int frac_pos, int filter_length, int length)
Floating point version of ff_acelp_interpolate()
Definition: acelp_filters.c:80
AVFormatContext::priv_data
void * priv_data
Format private data.
Definition: avformat.h:1328
AV_SAMPLE_FMT_FLT
@ AV_SAMPLE_FMT_FLT
float
Definition: samplefmt.h:60
av_clipd
av_clipd
Definition: af_crystalizer.c:132
tx.h
min
float min
Definition: vorbis_enc_data.h:429
WMAVoiceContext
WMA Voice decoding context.
Definition: wmavoice.c:132