FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
aacdec.c
Go to the documentation of this file.
1 /*
2  * AAC decoder
3  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
5  *
6  * AAC LATM decoder
7  * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
8  * Copyright (c) 2010 Janne Grunau <janne-libav@jannau.net>
9  *
10  * This file is part of FFmpeg.
11  *
12  * FFmpeg is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2.1 of the License, or (at your option) any later version.
16  *
17  * FFmpeg is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with FFmpeg; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25  */
26 
27 /**
28  * @file
29  * AAC decoder
30  * @author Oded Shimon ( ods15 ods15 dyndns org )
31  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
32  */
33 
34 /*
35  * supported tools
36  *
37  * Support? Name
38  * N (code in SoC repo) gain control
39  * Y block switching
40  * Y window shapes - standard
41  * N window shapes - Low Delay
42  * Y filterbank - standard
43  * N (code in SoC repo) filterbank - Scalable Sample Rate
44  * Y Temporal Noise Shaping
45  * Y Long Term Prediction
46  * Y intensity stereo
47  * Y channel coupling
48  * Y frequency domain prediction
49  * Y Perceptual Noise Substitution
50  * Y Mid/Side stereo
51  * N Scalable Inverse AAC Quantization
52  * N Frequency Selective Switch
53  * N upsampling filter
54  * Y quantization & coding - AAC
55  * N quantization & coding - TwinVQ
56  * N quantization & coding - BSAC
57  * N AAC Error Resilience tools
58  * N Error Resilience payload syntax
59  * N Error Protection tool
60  * N CELP
61  * N Silence Compression
62  * N HVXC
63  * N HVXC 4kbits/s VR
64  * N Structured Audio tools
65  * N Structured Audio Sample Bank Format
66  * N MIDI
67  * N Harmonic and Individual Lines plus Noise
68  * N Text-To-Speech Interface
69  * Y Spectral Band Replication
70  * Y (not in this code) Layer-1
71  * Y (not in this code) Layer-2
72  * Y (not in this code) Layer-3
73  * N SinuSoidal Coding (Transient, Sinusoid, Noise)
74  * Y Parametric Stereo
75  * N Direct Stream Transfer
76  *
77  * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
78  * - HE AAC v2 comprises LC AAC with Spectral Band Replication and
79  Parametric Stereo.
80  */
81 
82 #include "libavutil/float_dsp.h"
83 #include "libavutil/opt.h"
84 #include "avcodec.h"
85 #include "internal.h"
86 #include "get_bits.h"
87 #include "fft.h"
88 #include "fmtconvert.h"
89 #include "lpc.h"
90 #include "kbdwin.h"
91 #include "sinewin.h"
92 
93 #include "aac.h"
94 #include "aactab.h"
95 #include "aacdectab.h"
96 #include "cbrt_tablegen.h"
97 #include "sbr.h"
98 #include "aacsbr.h"
99 #include "mpeg4audio.h"
100 #include "aacadtsdec.h"
101 #include "libavutil/intfloat.h"
102 
103 #include <assert.h>
104 #include <errno.h>
105 #include <math.h>
106 #include <string.h>
107 
108 #if ARCH_ARM
109 # include "arm/aac.h"
110 #elif ARCH_MIPS
111 # include "mips/aacdec_mips.h"
112 #endif
113 
115 static VLC vlc_spectral[11];
116 
117 static int output_configure(AACContext *ac,
118  uint8_t layout_map[MAX_ELEM_ID*4][3], int tags,
119  enum OCStatus oc_type, int get_new_frame);
120 
121 #define overread_err "Input buffer exhausted before END element found\n"
122 
123 static int count_channels(uint8_t (*layout)[3], int tags)
124 {
125  int i, sum = 0;
126  for (i = 0; i < tags; i++) {
127  int syn_ele = layout[i][0];
128  int pos = layout[i][2];
129  sum += (1 + (syn_ele == TYPE_CPE)) *
130  (pos != AAC_CHANNEL_OFF && pos != AAC_CHANNEL_CC);
131  }
132  return sum;
133 }
134 
135 /**
136  * Check for the channel element in the current channel position configuration.
137  * If it exists, make sure the appropriate element is allocated and map the
138  * channel order to match the internal FFmpeg channel layout.
139  *
140  * @param che_pos current channel position configuration
141  * @param type channel element type
142  * @param id channel element id
143  * @param channels count of the number of channels in the configuration
144  *
145  * @return Returns error status. 0 - OK, !0 - error
146  */
148  enum ChannelPosition che_pos,
149  int type, int id, int *channels)
150 {
151  if (*channels >= MAX_CHANNELS)
152  return AVERROR_INVALIDDATA;
153  if (che_pos) {
154  if (!ac->che[type][id]) {
155  if (!(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
156  return AVERROR(ENOMEM);
157  ff_aac_sbr_ctx_init(ac, &ac->che[type][id]->sbr);
158  }
159  if (type != TYPE_CCE) {
160  if (*channels >= MAX_CHANNELS - (type == TYPE_CPE || (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1))) {
161  av_log(ac->avctx, AV_LOG_ERROR, "Too many channels\n");
162  return AVERROR_INVALIDDATA;
163  }
164  ac->output_element[(*channels)++] = &ac->che[type][id]->ch[0];
165  if (type == TYPE_CPE ||
166  (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1)) {
167  ac->output_element[(*channels)++] = &ac->che[type][id]->ch[1];
168  }
169  }
170  } else {
171  if (ac->che[type][id])
172  ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
173  av_freep(&ac->che[type][id]);
174  }
175  return 0;
176 }
177 
179 {
180  AACContext *ac = avctx->priv_data;
181  int type, id, ch, ret;
182 
183  /* set channel pointers to internal buffers by default */
184  for (type = 0; type < 4; type++) {
185  for (id = 0; id < MAX_ELEM_ID; id++) {
186  ChannelElement *che = ac->che[type][id];
187  if (che) {
188  che->ch[0].ret = che->ch[0].ret_buf;
189  che->ch[1].ret = che->ch[1].ret_buf;
190  }
191  }
192  }
193 
194  /* get output buffer */
195  av_frame_unref(ac->frame);
196  ac->frame->nb_samples = 2048;
197  if ((ret = ff_get_buffer(avctx, ac->frame, 0)) < 0)
198  return ret;
199 
200  /* map output channel pointers to AVFrame data */
201  for (ch = 0; ch < avctx->channels; ch++) {
202  if (ac->output_element[ch])
203  ac->output_element[ch]->ret = (float *)ac->frame->extended_data[ch];
204  }
205 
206  return 0;
207 }
208 
210  uint64_t av_position;
214 };
215 
216 static int assign_pair(struct elem_to_channel e2c_vec[MAX_ELEM_ID],
217  uint8_t (*layout_map)[3], int offset, uint64_t left,
218  uint64_t right, int pos)
219 {
220  if (layout_map[offset][0] == TYPE_CPE) {
221  e2c_vec[offset] = (struct elem_to_channel) {
222  .av_position = left | right,
223  .syn_ele = TYPE_CPE,
224  .elem_id = layout_map[offset][1],
225  .aac_position = pos
226  };
227  return 1;
228  } else {
229  e2c_vec[offset] = (struct elem_to_channel) {
230  .av_position = left,
231  .syn_ele = TYPE_SCE,
232  .elem_id = layout_map[offset][1],
233  .aac_position = pos
234  };
235  e2c_vec[offset + 1] = (struct elem_to_channel) {
236  .av_position = right,
237  .syn_ele = TYPE_SCE,
238  .elem_id = layout_map[offset + 1][1],
239  .aac_position = pos
240  };
241  return 2;
242  }
243 }
244 
245 static int count_paired_channels(uint8_t (*layout_map)[3], int tags, int pos,
246  int *current)
247 {
248  int num_pos_channels = 0;
249  int first_cpe = 0;
250  int sce_parity = 0;
251  int i;
252  for (i = *current; i < tags; i++) {
253  if (layout_map[i][2] != pos)
254  break;
255  if (layout_map[i][0] == TYPE_CPE) {
256  if (sce_parity) {
257  if (pos == AAC_CHANNEL_FRONT && !first_cpe) {
258  sce_parity = 0;
259  } else {
260  return -1;
261  }
262  }
263  num_pos_channels += 2;
264  first_cpe = 1;
265  } else {
266  num_pos_channels++;
267  sce_parity ^= 1;
268  }
269  }
270  if (sce_parity &&
271  ((pos == AAC_CHANNEL_FRONT && first_cpe) || pos == AAC_CHANNEL_SIDE))
272  return -1;
273  *current = i;
274  return num_pos_channels;
275 }
276 
277 static uint64_t sniff_channel_order(uint8_t (*layout_map)[3], int tags)
278 {
279  int i, n, total_non_cc_elements;
280  struct elem_to_channel e2c_vec[4 * MAX_ELEM_ID] = { { 0 } };
281  int num_front_channels, num_side_channels, num_back_channels;
282  uint64_t layout;
283 
284  if (FF_ARRAY_ELEMS(e2c_vec) < tags)
285  return 0;
286 
287  i = 0;
288  num_front_channels =
289  count_paired_channels(layout_map, tags, AAC_CHANNEL_FRONT, &i);
290  if (num_front_channels < 0)
291  return 0;
292  num_side_channels =
293  count_paired_channels(layout_map, tags, AAC_CHANNEL_SIDE, &i);
294  if (num_side_channels < 0)
295  return 0;
296  num_back_channels =
297  count_paired_channels(layout_map, tags, AAC_CHANNEL_BACK, &i);
298  if (num_back_channels < 0)
299  return 0;
300 
301  i = 0;
302  if (num_front_channels & 1) {
303  e2c_vec[i] = (struct elem_to_channel) {
305  .syn_ele = TYPE_SCE,
306  .elem_id = layout_map[i][1],
307  .aac_position = AAC_CHANNEL_FRONT
308  };
309  i++;
310  num_front_channels--;
311  }
312  if (num_front_channels >= 4) {
313  i += assign_pair(e2c_vec, layout_map, i,
317  num_front_channels -= 2;
318  }
319  if (num_front_channels >= 2) {
320  i += assign_pair(e2c_vec, layout_map, i,
324  num_front_channels -= 2;
325  }
326  while (num_front_channels >= 2) {
327  i += assign_pair(e2c_vec, layout_map, i,
328  UINT64_MAX,
329  UINT64_MAX,
331  num_front_channels -= 2;
332  }
333 
334  if (num_side_channels >= 2) {
335  i += assign_pair(e2c_vec, layout_map, i,
339  num_side_channels -= 2;
340  }
341  while (num_side_channels >= 2) {
342  i += assign_pair(e2c_vec, layout_map, i,
343  UINT64_MAX,
344  UINT64_MAX,
346  num_side_channels -= 2;
347  }
348 
349  while (num_back_channels >= 4) {
350  i += assign_pair(e2c_vec, layout_map, i,
351  UINT64_MAX,
352  UINT64_MAX,
354  num_back_channels -= 2;
355  }
356  if (num_back_channels >= 2) {
357  i += assign_pair(e2c_vec, layout_map, i,
361  num_back_channels -= 2;
362  }
363  if (num_back_channels) {
364  e2c_vec[i] = (struct elem_to_channel) {
366  .syn_ele = TYPE_SCE,
367  .elem_id = layout_map[i][1],
368  .aac_position = AAC_CHANNEL_BACK
369  };
370  i++;
371  num_back_channels--;
372  }
373 
374  if (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
375  e2c_vec[i] = (struct elem_to_channel) {
377  .syn_ele = TYPE_LFE,
378  .elem_id = layout_map[i][1],
379  .aac_position = AAC_CHANNEL_LFE
380  };
381  i++;
382  }
383  while (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
384  e2c_vec[i] = (struct elem_to_channel) {
385  .av_position = UINT64_MAX,
386  .syn_ele = TYPE_LFE,
387  .elem_id = layout_map[i][1],
388  .aac_position = AAC_CHANNEL_LFE
389  };
390  i++;
391  }
392 
393  // Must choose a stable sort
394  total_non_cc_elements = n = i;
395  do {
396  int next_n = 0;
397  for (i = 1; i < n; i++)
398  if (e2c_vec[i - 1].av_position > e2c_vec[i].av_position) {
399  FFSWAP(struct elem_to_channel, e2c_vec[i - 1], e2c_vec[i]);
400  next_n = i;
401  }
402  n = next_n;
403  } while (n > 0);
404 
405  layout = 0;
406  for (i = 0; i < total_non_cc_elements; i++) {
407  layout_map[i][0] = e2c_vec[i].syn_ele;
408  layout_map[i][1] = e2c_vec[i].elem_id;
409  layout_map[i][2] = e2c_vec[i].aac_position;
410  if (e2c_vec[i].av_position != UINT64_MAX) {
411  layout |= e2c_vec[i].av_position;
412  }
413  }
414 
415  return layout;
416 }
417 
418 /**
419  * Save current output configuration if and only if it has been locked.
420  */
422  if (ac->oc[1].status == OC_LOCKED) {
423  ac->oc[0] = ac->oc[1];
424  }
425  ac->oc[1].status = OC_NONE;
426 }
427 
428 /**
429  * Restore the previous output configuration if and only if the current
430  * configuration is unlocked.
431  */
433  if (ac->oc[1].status != OC_LOCKED && ac->oc[0].status != OC_NONE) {
434  ac->oc[1] = ac->oc[0];
435  ac->avctx->channels = ac->oc[1].channels;
436  ac->avctx->channel_layout = ac->oc[1].channel_layout;
437  output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags,
438  ac->oc[1].status, 0);
439  }
440 }
441 
442 /**
443  * Configure output channel order based on the current program
444  * configuration element.
445  *
446  * @return Returns error status. 0 - OK, !0 - error
447  */
449  uint8_t layout_map[MAX_ELEM_ID * 4][3], int tags,
450  enum OCStatus oc_type, int get_new_frame)
451 {
452  AVCodecContext *avctx = ac->avctx;
453  int i, channels = 0, ret;
454  uint64_t layout = 0;
455 
456  if (ac->oc[1].layout_map != layout_map) {
457  memcpy(ac->oc[1].layout_map, layout_map, tags * sizeof(layout_map[0]));
458  ac->oc[1].layout_map_tags = tags;
459  }
460 
461  // Try to sniff a reasonable channel order, otherwise output the
462  // channels in the order the PCE declared them.
464  layout = sniff_channel_order(layout_map, tags);
465  for (i = 0; i < tags; i++) {
466  int type = layout_map[i][0];
467  int id = layout_map[i][1];
468  int position = layout_map[i][2];
469  // Allocate or free elements depending on if they are in the
470  // current program configuration.
471  ret = che_configure(ac, position, type, id, &channels);
472  if (ret < 0)
473  return ret;
474  }
475  if (ac->oc[1].m4ac.ps == 1 && channels == 2) {
476  if (layout == AV_CH_FRONT_CENTER) {
478  } else {
479  layout = 0;
480  }
481  }
482 
483  memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
484  if (layout) avctx->channel_layout = layout;
485  ac->oc[1].channel_layout = layout;
486  avctx->channels = ac->oc[1].channels = channels;
487  ac->oc[1].status = oc_type;
488 
489  if (get_new_frame) {
490  if ((ret = frame_configure_elements(ac->avctx)) < 0)
491  return ret;
492  }
493 
494  return 0;
495 }
496 
497 static void flush(AVCodecContext *avctx)
498 {
499  AACContext *ac= avctx->priv_data;
500  int type, i, j;
501 
502  for (type = 3; type >= 0; type--) {
503  for (i = 0; i < MAX_ELEM_ID; i++) {
504  ChannelElement *che = ac->che[type][i];
505  if (che) {
506  for (j = 0; j <= 1; j++) {
507  memset(che->ch[j].saved, 0, sizeof(che->ch[j].saved));
508  }
509  }
510  }
511  }
512 }
513 
514 /**
515  * Set up channel positions based on a default channel configuration
516  * as specified in table 1.17.
517  *
518  * @return Returns error status. 0 - OK, !0 - error
519  */
521  uint8_t (*layout_map)[3],
522  int *tags,
523  int channel_config)
524 {
525  if (channel_config < 1 || channel_config > 7) {
526  av_log(avctx, AV_LOG_ERROR,
527  "invalid default channel configuration (%d)\n",
528  channel_config);
529  return AVERROR_INVALIDDATA;
530  }
531  *tags = tags_per_config[channel_config];
532  memcpy(layout_map, aac_channel_layout_map[channel_config - 1],
533  *tags * sizeof(*layout_map));
534  return 0;
535 }
536 
537 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
538 {
539  /* For PCE based channel configurations map the channels solely based
540  * on tags. */
541  if (!ac->oc[1].m4ac.chan_config) {
542  return ac->tag_che_map[type][elem_id];
543  }
544  // Allow single CPE stereo files to be signalled with mono configuration.
545  if (!ac->tags_mapped && type == TYPE_CPE &&
546  ac->oc[1].m4ac.chan_config == 1) {
547  uint8_t layout_map[MAX_ELEM_ID*4][3];
548  int layout_map_tags;
550 
551  av_log(ac->avctx, AV_LOG_DEBUG, "mono with CPE\n");
552 
553  if (set_default_channel_config(ac->avctx, layout_map,
554  &layout_map_tags, 2) < 0)
555  return NULL;
556  if (output_configure(ac, layout_map, layout_map_tags,
557  OC_TRIAL_FRAME, 1) < 0)
558  return NULL;
559 
560  ac->oc[1].m4ac.chan_config = 2;
561  ac->oc[1].m4ac.ps = 0;
562  }
563  // And vice-versa
564  if (!ac->tags_mapped && type == TYPE_SCE &&
565  ac->oc[1].m4ac.chan_config == 2) {
566  uint8_t layout_map[MAX_ELEM_ID * 4][3];
567  int layout_map_tags;
569 
570  av_log(ac->avctx, AV_LOG_DEBUG, "stereo with SCE\n");
571 
572  if (set_default_channel_config(ac->avctx, layout_map,
573  &layout_map_tags, 1) < 0)
574  return NULL;
575  if (output_configure(ac, layout_map, layout_map_tags,
576  OC_TRIAL_FRAME, 1) < 0)
577  return NULL;
578 
579  ac->oc[1].m4ac.chan_config = 1;
580  if (ac->oc[1].m4ac.sbr)
581  ac->oc[1].m4ac.ps = -1;
582  }
583  /* For indexed channel configurations map the channels solely based
584  * on position. */
585  switch (ac->oc[1].m4ac.chan_config) {
586  case 7:
587  if (ac->tags_mapped == 3 && type == TYPE_CPE) {
588  ac->tags_mapped++;
589  return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
590  }
591  case 6:
592  /* Some streams incorrectly code 5.1 audio as
593  * SCE[0] CPE[0] CPE[1] SCE[1]
594  * instead of
595  * SCE[0] CPE[0] CPE[1] LFE[0].
596  * If we seem to have encountered such a stream, transfer
597  * the LFE[0] element to the SCE[1]'s mapping */
598  if (ac->tags_mapped == tags_per_config[ac->oc[1].m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
599  ac->tags_mapped++;
600  return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
601  }
602  case 5:
603  if (ac->tags_mapped == 2 && type == TYPE_CPE) {
604  ac->tags_mapped++;
605  return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
606  }
607  case 4:
608  if (ac->tags_mapped == 2 &&
609  ac->oc[1].m4ac.chan_config == 4 &&
610  type == TYPE_SCE) {
611  ac->tags_mapped++;
612  return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
613  }
614  case 3:
615  case 2:
616  if (ac->tags_mapped == (ac->oc[1].m4ac.chan_config != 2) &&
617  type == TYPE_CPE) {
618  ac->tags_mapped++;
619  return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
620  } else if (ac->oc[1].m4ac.chan_config == 2) {
621  return NULL;
622  }
623  case 1:
624  if (!ac->tags_mapped && type == TYPE_SCE) {
625  ac->tags_mapped++;
626  return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
627  }
628  default:
629  return NULL;
630  }
631 }
632 
633 /**
634  * Decode an array of 4 bit element IDs, optionally interleaved with a
635  * stereo/mono switching bit.
636  *
637  * @param type speaker type/position for these channels
638  */
639 static void decode_channel_map(uint8_t layout_map[][3],
640  enum ChannelPosition type,
641  GetBitContext *gb, int n)
642 {
643  while (n--) {
644  enum RawDataBlockType syn_ele;
645  switch (type) {
646  case AAC_CHANNEL_FRONT:
647  case AAC_CHANNEL_BACK:
648  case AAC_CHANNEL_SIDE:
649  syn_ele = get_bits1(gb);
650  break;
651  case AAC_CHANNEL_CC:
652  skip_bits1(gb);
653  syn_ele = TYPE_CCE;
654  break;
655  case AAC_CHANNEL_LFE:
656  syn_ele = TYPE_LFE;
657  break;
658  default:
659  av_assert0(0);
660  }
661  layout_map[0][0] = syn_ele;
662  layout_map[0][1] = get_bits(gb, 4);
663  layout_map[0][2] = type;
664  layout_map++;
665  }
666 }
667 
668 /**
669  * Decode program configuration element; reference: table 4.2.
670  *
671  * @return Returns error status. 0 - OK, !0 - error
672  */
673 static int decode_pce(AVCodecContext *avctx, MPEG4AudioConfig *m4ac,
674  uint8_t (*layout_map)[3],
675  GetBitContext *gb)
676 {
677  int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc;
678  int sampling_index;
679  int comment_len;
680  int tags;
681 
682  skip_bits(gb, 2); // object_type
683 
684  sampling_index = get_bits(gb, 4);
685  if (m4ac->sampling_index != sampling_index)
686  av_log(avctx, AV_LOG_WARNING,
687  "Sample rate index in program config element does not "
688  "match the sample rate index configured by the container.\n");
689 
690  num_front = get_bits(gb, 4);
691  num_side = get_bits(gb, 4);
692  num_back = get_bits(gb, 4);
693  num_lfe = get_bits(gb, 2);
694  num_assoc_data = get_bits(gb, 3);
695  num_cc = get_bits(gb, 4);
696 
697  if (get_bits1(gb))
698  skip_bits(gb, 4); // mono_mixdown_tag
699  if (get_bits1(gb))
700  skip_bits(gb, 4); // stereo_mixdown_tag
701 
702  if (get_bits1(gb))
703  skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
704 
705  if (get_bits_left(gb) < 4 * (num_front + num_side + num_back + num_lfe + num_assoc_data + num_cc)) {
706  av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err);
707  return -1;
708  }
709  decode_channel_map(layout_map , AAC_CHANNEL_FRONT, gb, num_front);
710  tags = num_front;
711  decode_channel_map(layout_map + tags, AAC_CHANNEL_SIDE, gb, num_side);
712  tags += num_side;
713  decode_channel_map(layout_map + tags, AAC_CHANNEL_BACK, gb, num_back);
714  tags += num_back;
715  decode_channel_map(layout_map + tags, AAC_CHANNEL_LFE, gb, num_lfe);
716  tags += num_lfe;
717 
718  skip_bits_long(gb, 4 * num_assoc_data);
719 
720  decode_channel_map(layout_map + tags, AAC_CHANNEL_CC, gb, num_cc);
721  tags += num_cc;
722 
723  align_get_bits(gb);
724 
725  /* comment field, first byte is length */
726  comment_len = get_bits(gb, 8) * 8;
727  if (get_bits_left(gb) < comment_len) {
728  av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err);
729  return AVERROR_INVALIDDATA;
730  }
731  skip_bits_long(gb, comment_len);
732  return tags;
733 }
734 
735 /**
736  * Decode GA "General Audio" specific configuration; reference: table 4.1.
737  *
738  * @param ac pointer to AACContext, may be null
739  * @param avctx pointer to AVCCodecContext, used for logging
740  *
741  * @return Returns error status. 0 - OK, !0 - error
742  */
744  GetBitContext *gb,
745  MPEG4AudioConfig *m4ac,
746  int channel_config)
747 {
748  int extension_flag, ret;
749  uint8_t layout_map[MAX_ELEM_ID*4][3];
750  int tags = 0;
751 
752  if (get_bits1(gb)) { // frameLengthFlag
753  avpriv_request_sample(avctx, "960/120 MDCT window");
754  return AVERROR_PATCHWELCOME;
755  }
756 
757  if (get_bits1(gb)) // dependsOnCoreCoder
758  skip_bits(gb, 14); // coreCoderDelay
759  extension_flag = get_bits1(gb);
760 
761  if (m4ac->object_type == AOT_AAC_SCALABLE ||
763  skip_bits(gb, 3); // layerNr
764 
765  if (channel_config == 0) {
766  skip_bits(gb, 4); // element_instance_tag
767  tags = decode_pce(avctx, m4ac, layout_map, gb);
768  if (tags < 0)
769  return tags;
770  } else {
771  if ((ret = set_default_channel_config(avctx, layout_map,
772  &tags, channel_config)))
773  return ret;
774  }
775 
776  if (count_channels(layout_map, tags) > 1) {
777  m4ac->ps = 0;
778  } else if (m4ac->sbr == 1 && m4ac->ps == -1)
779  m4ac->ps = 1;
780 
781  if (ac && (ret = output_configure(ac, layout_map, tags, OC_GLOBAL_HDR, 0)))
782  return ret;
783 
784  if (extension_flag) {
785  switch (m4ac->object_type) {
786  case AOT_ER_BSAC:
787  skip_bits(gb, 5); // numOfSubFrame
788  skip_bits(gb, 11); // layer_length
789  break;
790  case AOT_ER_AAC_LC:
791  case AOT_ER_AAC_LTP:
792  case AOT_ER_AAC_SCALABLE:
793  case AOT_ER_AAC_LD:
794  skip_bits(gb, 3); /* aacSectionDataResilienceFlag
795  * aacScalefactorDataResilienceFlag
796  * aacSpectralDataResilienceFlag
797  */
798  break;
799  }
800  skip_bits1(gb); // extensionFlag3 (TBD in version 3)
801  }
802  return 0;
803 }
804 
805 /**
806  * Decode audio specific configuration; reference: table 1.13.
807  *
808  * @param ac pointer to AACContext, may be null
809  * @param avctx pointer to AVCCodecContext, used for logging
810  * @param m4ac pointer to MPEG4AudioConfig, used for parsing
811  * @param data pointer to buffer holding an audio specific config
812  * @param bit_size size of audio specific config or data in bits
813  * @param sync_extension look for an appended sync extension
814  *
815  * @return Returns error status or number of consumed bits. <0 - error
816  */
818  AVCodecContext *avctx,
819  MPEG4AudioConfig *m4ac,
820  const uint8_t *data, int bit_size,
821  int sync_extension)
822 {
823  GetBitContext gb;
824  int i, ret;
825 
826  av_dlog(avctx, "audio specific config size %d\n", bit_size >> 3);
827  for (i = 0; i < bit_size >> 3; i++)
828  av_dlog(avctx, "%02x ", data[i]);
829  av_dlog(avctx, "\n");
830 
831  if ((ret = init_get_bits(&gb, data, bit_size)) < 0)
832  return ret;
833 
834  if ((i = avpriv_mpeg4audio_get_config(m4ac, data, bit_size,
835  sync_extension)) < 0)
836  return AVERROR_INVALIDDATA;
837  if (m4ac->sampling_index > 12) {
838  av_log(avctx, AV_LOG_ERROR,
839  "invalid sampling rate index %d\n",
840  m4ac->sampling_index);
841  return AVERROR_INVALIDDATA;
842  }
843 
844  skip_bits_long(&gb, i);
845 
846  switch (m4ac->object_type) {
847  case AOT_AAC_MAIN:
848  case AOT_AAC_LC:
849  case AOT_AAC_LTP:
850  if ((ret = decode_ga_specific_config(ac, avctx, &gb,
851  m4ac, m4ac->chan_config)) < 0)
852  return ret;
853  break;
854  default:
855  av_log(avctx, AV_LOG_ERROR,
856  "Audio object type %s%d is not supported.\n",
857  m4ac->sbr == 1 ? "SBR+" : "",
858  m4ac->object_type);
859  return AVERROR(ENOSYS);
860  }
861 
862  av_dlog(avctx,
863  "AOT %d chan config %d sampling index %d (%d) SBR %d PS %d\n",
864  m4ac->object_type, m4ac->chan_config, m4ac->sampling_index,
865  m4ac->sample_rate, m4ac->sbr,
866  m4ac->ps);
867 
868  return get_bits_count(&gb);
869 }
870 
871 /**
872  * linear congruential pseudorandom number generator
873  *
874  * @param previous_val pointer to the current state of the generator
875  *
876  * @return Returns a 32-bit pseudorandom integer
877  */
878 static av_always_inline int lcg_random(unsigned previous_val)
879 {
880  union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
881  return v.s;
882 }
883 
885 {
886  ps->r0 = 0.0f;
887  ps->r1 = 0.0f;
888  ps->cor0 = 0.0f;
889  ps->cor1 = 0.0f;
890  ps->var0 = 1.0f;
891  ps->var1 = 1.0f;
892 }
893 
895 {
896  int i;
897  for (i = 0; i < MAX_PREDICTORS; i++)
898  reset_predict_state(&ps[i]);
899 }
900 
901 static int sample_rate_idx (int rate)
902 {
903  if (92017 <= rate) return 0;
904  else if (75132 <= rate) return 1;
905  else if (55426 <= rate) return 2;
906  else if (46009 <= rate) return 3;
907  else if (37566 <= rate) return 4;
908  else if (27713 <= rate) return 5;
909  else if (23004 <= rate) return 6;
910  else if (18783 <= rate) return 7;
911  else if (13856 <= rate) return 8;
912  else if (11502 <= rate) return 9;
913  else if (9391 <= rate) return 10;
914  else return 11;
915 }
916 
917 static void reset_predictor_group(PredictorState *ps, int group_num)
918 {
919  int i;
920  for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
921  reset_predict_state(&ps[i]);
922 }
923 
924 #define AAC_INIT_VLC_STATIC(num, size) \
925  INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
926  ff_aac_spectral_bits[num], sizeof(ff_aac_spectral_bits[num][0]), \
927  sizeof(ff_aac_spectral_bits[num][0]), \
928  ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), \
929  sizeof(ff_aac_spectral_codes[num][0]), \
930  size);
931 
932 static void aacdec_init(AACContext *ac);
933 
935 {
936  AACContext *ac = avctx->priv_data;
937  int ret;
938 
939  ac->avctx = avctx;
940  ac->oc[1].m4ac.sample_rate = avctx->sample_rate;
941 
942  aacdec_init(ac);
943 
945 
946  if (avctx->extradata_size > 0) {
947  if ((ret = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
948  avctx->extradata,
949  avctx->extradata_size * 8,
950  1)) < 0)
951  return ret;
952  } else {
953  int sr, i;
954  uint8_t layout_map[MAX_ELEM_ID*4][3];
955  int layout_map_tags;
956 
957  sr = sample_rate_idx(avctx->sample_rate);
958  ac->oc[1].m4ac.sampling_index = sr;
959  ac->oc[1].m4ac.channels = avctx->channels;
960  ac->oc[1].m4ac.sbr = -1;
961  ac->oc[1].m4ac.ps = -1;
962 
963  for (i = 0; i < FF_ARRAY_ELEMS(ff_mpeg4audio_channels); i++)
964  if (ff_mpeg4audio_channels[i] == avctx->channels)
965  break;
967  i = 0;
968  }
969  ac->oc[1].m4ac.chan_config = i;
970 
971  if (ac->oc[1].m4ac.chan_config) {
972  int ret = set_default_channel_config(avctx, layout_map,
973  &layout_map_tags, ac->oc[1].m4ac.chan_config);
974  if (!ret)
975  output_configure(ac, layout_map, layout_map_tags,
976  OC_GLOBAL_HDR, 0);
977  else if (avctx->err_recognition & AV_EF_EXPLODE)
978  return AVERROR_INVALIDDATA;
979  }
980  }
981 
982  if (avctx->channels > MAX_CHANNELS) {
983  av_log(avctx, AV_LOG_ERROR, "Too many channels\n");
984  return AVERROR_INVALIDDATA;
985  }
986 
987  AAC_INIT_VLC_STATIC( 0, 304);
988  AAC_INIT_VLC_STATIC( 1, 270);
989  AAC_INIT_VLC_STATIC( 2, 550);
990  AAC_INIT_VLC_STATIC( 3, 300);
991  AAC_INIT_VLC_STATIC( 4, 328);
992  AAC_INIT_VLC_STATIC( 5, 294);
993  AAC_INIT_VLC_STATIC( 6, 306);
994  AAC_INIT_VLC_STATIC( 7, 268);
995  AAC_INIT_VLC_STATIC( 8, 510);
996  AAC_INIT_VLC_STATIC( 9, 366);
997  AAC_INIT_VLC_STATIC(10, 462);
998 
999  ff_aac_sbr_init();
1000 
1001  ff_fmt_convert_init(&ac->fmt_conv, avctx);
1003 
1004  ac->random_state = 0x1f2e3d4c;
1005 
1006  ff_aac_tableinit();
1007 
1008  INIT_VLC_STATIC(&vlc_scalefactors, 7,
1011  sizeof(ff_aac_scalefactor_bits[0]),
1012  sizeof(ff_aac_scalefactor_bits[0]),
1014  sizeof(ff_aac_scalefactor_code[0]),
1015  sizeof(ff_aac_scalefactor_code[0]),
1016  352);
1017 
1018  ff_mdct_init(&ac->mdct, 11, 1, 1.0 / (32768.0 * 1024.0));
1019  ff_mdct_init(&ac->mdct_small, 8, 1, 1.0 / (32768.0 * 128.0));
1020  ff_mdct_init(&ac->mdct_ltp, 11, 0, -2.0 * 32768.0);
1021  // window initialization
1026 
1027  cbrt_tableinit();
1028 
1029  return 0;
1030 }
1031 
1032 /**
1033  * Skip data_stream_element; reference: table 4.10.
1034  */
1036 {
1037  int byte_align = get_bits1(gb);
1038  int count = get_bits(gb, 8);
1039  if (count == 255)
1040  count += get_bits(gb, 8);
1041  if (byte_align)
1042  align_get_bits(gb);
1043 
1044  if (get_bits_left(gb) < 8 * count) {
1045  av_log(ac->avctx, AV_LOG_ERROR, "skip_data_stream_element: "overread_err);
1046  return AVERROR_INVALIDDATA;
1047  }
1048  skip_bits_long(gb, 8 * count);
1049  return 0;
1050 }
1051 
1053  GetBitContext *gb)
1054 {
1055  int sfb;
1056  if (get_bits1(gb)) {
1057  ics->predictor_reset_group = get_bits(gb, 5);
1058  if (ics->predictor_reset_group == 0 ||
1059  ics->predictor_reset_group > 30) {
1060  av_log(ac->avctx, AV_LOG_ERROR,
1061  "Invalid Predictor Reset Group.\n");
1062  return AVERROR_INVALIDDATA;
1063  }
1064  }
1065  for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index]); sfb++) {
1066  ics->prediction_used[sfb] = get_bits1(gb);
1067  }
1068  return 0;
1069 }
1070 
1071 /**
1072  * Decode Long Term Prediction data; reference: table 4.xx.
1073  */
1075  GetBitContext *gb, uint8_t max_sfb)
1076 {
1077  int sfb;
1078 
1079  ltp->lag = get_bits(gb, 11);
1080  ltp->coef = ltp_coef[get_bits(gb, 3)];
1081  for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++)
1082  ltp->used[sfb] = get_bits1(gb);
1083 }
1084 
1085 /**
1086  * Decode Individual Channel Stream info; reference: table 4.6.
1087  */
1089  GetBitContext *gb)
1090 {
1091  if (get_bits1(gb)) {
1092  av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
1093  return AVERROR_INVALIDDATA;
1094  }
1095  ics->window_sequence[1] = ics->window_sequence[0];
1096  ics->window_sequence[0] = get_bits(gb, 2);
1097  ics->use_kb_window[1] = ics->use_kb_window[0];
1098  ics->use_kb_window[0] = get_bits1(gb);
1099  ics->num_window_groups = 1;
1100  ics->group_len[0] = 1;
1101  if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1102  int i;
1103  ics->max_sfb = get_bits(gb, 4);
1104  for (i = 0; i < 7; i++) {
1105  if (get_bits1(gb)) {
1106  ics->group_len[ics->num_window_groups - 1]++;
1107  } else {
1108  ics->num_window_groups++;
1109  ics->group_len[ics->num_window_groups - 1] = 1;
1110  }
1111  }
1112  ics->num_windows = 8;
1116  ics->predictor_present = 0;
1117  } else {
1118  ics->max_sfb = get_bits(gb, 6);
1119  ics->num_windows = 1;
1123  ics->predictor_present = get_bits1(gb);
1124  ics->predictor_reset_group = 0;
1125  if (ics->predictor_present) {
1126  if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN) {
1127  if (decode_prediction(ac, ics, gb)) {
1128  goto fail;
1129  }
1130  } else if (ac->oc[1].m4ac.object_type == AOT_AAC_LC) {
1131  av_log(ac->avctx, AV_LOG_ERROR,
1132  "Prediction is not allowed in AAC-LC.\n");
1133  goto fail;
1134  } else {
1135  if ((ics->ltp.present = get_bits(gb, 1)))
1136  decode_ltp(&ics->ltp, gb, ics->max_sfb);
1137  }
1138  }
1139  }
1140 
1141  if (ics->max_sfb > ics->num_swb) {
1142  av_log(ac->avctx, AV_LOG_ERROR,
1143  "Number of scalefactor bands in group (%d) "
1144  "exceeds limit (%d).\n",
1145  ics->max_sfb, ics->num_swb);
1146  goto fail;
1147  }
1148 
1149  return 0;
1150 fail:
1151  ics->max_sfb = 0;
1152  return AVERROR_INVALIDDATA;
1153 }
1154 
1155 /**
1156  * Decode band types (section_data payload); reference: table 4.46.
1157  *
1158  * @param band_type array of the used band type
1159  * @param band_type_run_end array of the last scalefactor band of a band type run
1160  *
1161  * @return Returns error status. 0 - OK, !0 - error
1162  */
1163 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
1164  int band_type_run_end[120], GetBitContext *gb,
1166 {
1167  int g, idx = 0;
1168  const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
1169  for (g = 0; g < ics->num_window_groups; g++) {
1170  int k = 0;
1171  while (k < ics->max_sfb) {
1172  uint8_t sect_end = k;
1173  int sect_len_incr;
1174  int sect_band_type = get_bits(gb, 4);
1175  if (sect_band_type == 12) {
1176  av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
1177  return AVERROR_INVALIDDATA;
1178  }
1179  do {
1180  sect_len_incr = get_bits(gb, bits);
1181  sect_end += sect_len_incr;
1182  if (get_bits_left(gb) < 0) {
1183  av_log(ac->avctx, AV_LOG_ERROR, "decode_band_types: "overread_err);
1184  return AVERROR_INVALIDDATA;
1185  }
1186  if (sect_end > ics->max_sfb) {
1187  av_log(ac->avctx, AV_LOG_ERROR,
1188  "Number of bands (%d) exceeds limit (%d).\n",
1189  sect_end, ics->max_sfb);
1190  return AVERROR_INVALIDDATA;
1191  }
1192  } while (sect_len_incr == (1 << bits) - 1);
1193  for (; k < sect_end; k++) {
1194  band_type [idx] = sect_band_type;
1195  band_type_run_end[idx++] = sect_end;
1196  }
1197  }
1198  }
1199  return 0;
1200 }
1201 
1202 /**
1203  * Decode scalefactors; reference: table 4.47.
1204  *
1205  * @param global_gain first scalefactor value as scalefactors are differentially coded
1206  * @param band_type array of the used band type
1207  * @param band_type_run_end array of the last scalefactor band of a band type run
1208  * @param sf array of scalefactors or intensity stereo positions
1209  *
1210  * @return Returns error status. 0 - OK, !0 - error
1211  */
1212 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
1213  unsigned int global_gain,
1215  enum BandType band_type[120],
1216  int band_type_run_end[120])
1217 {
1218  int g, i, idx = 0;
1219  int offset[3] = { global_gain, global_gain - 90, 0 };
1220  int clipped_offset;
1221  int noise_flag = 1;
1222  for (g = 0; g < ics->num_window_groups; g++) {
1223  for (i = 0; i < ics->max_sfb;) {
1224  int run_end = band_type_run_end[idx];
1225  if (band_type[idx] == ZERO_BT) {
1226  for (; i < run_end; i++, idx++)
1227  sf[idx] = 0.;
1228  } else if ((band_type[idx] == INTENSITY_BT) ||
1229  (band_type[idx] == INTENSITY_BT2)) {
1230  for (; i < run_end; i++, idx++) {
1231  offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1232  clipped_offset = av_clip(offset[2], -155, 100);
1233  if (offset[2] != clipped_offset) {
1235  "If you heard an audible artifact, there may be a bug in the decoder. "
1236  "Clipped intensity stereo position (%d -> %d)",
1237  offset[2], clipped_offset);
1238  }
1239  sf[idx] = ff_aac_pow2sf_tab[-clipped_offset + POW_SF2_ZERO];
1240  }
1241  } else if (band_type[idx] == NOISE_BT) {
1242  for (; i < run_end; i++, idx++) {
1243  if (noise_flag-- > 0)
1244  offset[1] += get_bits(gb, 9) - 256;
1245  else
1246  offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1247  clipped_offset = av_clip(offset[1], -100, 155);
1248  if (offset[1] != clipped_offset) {
1250  "If you heard an audible artifact, there may be a bug in the decoder. "
1251  "Clipped noise gain (%d -> %d)",
1252  offset[1], clipped_offset);
1253  }
1254  sf[idx] = -ff_aac_pow2sf_tab[clipped_offset + POW_SF2_ZERO];
1255  }
1256  } else {
1257  for (; i < run_end; i++, idx++) {
1258  offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1259  if (offset[0] > 255U) {
1260  av_log(ac->avctx, AV_LOG_ERROR,
1261  "Scalefactor (%d) out of range.\n", offset[0]);
1262  return AVERROR_INVALIDDATA;
1263  }
1264  sf[idx] = -ff_aac_pow2sf_tab[offset[0] - 100 + POW_SF2_ZERO];
1265  }
1266  }
1267  }
1268  }
1269  return 0;
1270 }
1271 
1272 /**
1273  * Decode pulse data; reference: table 4.7.
1274  */
1275 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
1276  const uint16_t *swb_offset, int num_swb)
1277 {
1278  int i, pulse_swb;
1279  pulse->num_pulse = get_bits(gb, 2) + 1;
1280  pulse_swb = get_bits(gb, 6);
1281  if (pulse_swb >= num_swb)
1282  return -1;
1283  pulse->pos[0] = swb_offset[pulse_swb];
1284  pulse->pos[0] += get_bits(gb, 5);
1285  if (pulse->pos[0] > 1023)
1286  return -1;
1287  pulse->amp[0] = get_bits(gb, 4);
1288  for (i = 1; i < pulse->num_pulse; i++) {
1289  pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
1290  if (pulse->pos[i] > 1023)
1291  return -1;
1292  pulse->amp[i] = get_bits(gb, 4);
1293  }
1294  return 0;
1295 }
1296 
1297 /**
1298  * Decode Temporal Noise Shaping data; reference: table 4.48.
1299  *
1300  * @return Returns error status. 0 - OK, !0 - error
1301  */
1303  GetBitContext *gb, const IndividualChannelStream *ics)
1304 {
1305  int w, filt, i, coef_len, coef_res, coef_compress;
1306  const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
1307  const int tns_max_order = is8 ? 7 : ac->oc[1].m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
1308  for (w = 0; w < ics->num_windows; w++) {
1309  if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
1310  coef_res = get_bits1(gb);
1311 
1312  for (filt = 0; filt < tns->n_filt[w]; filt++) {
1313  int tmp2_idx;
1314  tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
1315 
1316  if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
1317  av_log(ac->avctx, AV_LOG_ERROR,
1318  "TNS filter order %d is greater than maximum %d.\n",
1319  tns->order[w][filt], tns_max_order);
1320  tns->order[w][filt] = 0;
1321  return AVERROR_INVALIDDATA;
1322  }
1323  if (tns->order[w][filt]) {
1324  tns->direction[w][filt] = get_bits1(gb);
1325  coef_compress = get_bits1(gb);
1326  coef_len = coef_res + 3 - coef_compress;
1327  tmp2_idx = 2 * coef_compress + coef_res;
1328 
1329  for (i = 0; i < tns->order[w][filt]; i++)
1330  tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
1331  }
1332  }
1333  }
1334  }
1335  return 0;
1336 }
1337 
1338 /**
1339  * Decode Mid/Side data; reference: table 4.54.
1340  *
1341  * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
1342  * [1] mask is decoded from bitstream; [2] mask is all 1s;
1343  * [3] reserved for scalable AAC
1344  */
1346  int ms_present)
1347 {
1348  int idx;
1349  if (ms_present == 1) {
1350  for (idx = 0;
1351  idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb;
1352  idx++)
1353  cpe->ms_mask[idx] = get_bits1(gb);
1354  } else if (ms_present == 2) {
1355  memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask[0]) * cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb);
1356  }
1357 }
1358 
1359 #ifndef VMUL2
1360 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
1361  const float *scale)
1362 {
1363  float s = *scale;
1364  *dst++ = v[idx & 15] * s;
1365  *dst++ = v[idx>>4 & 15] * s;
1366  return dst;
1367 }
1368 #endif
1369 
1370 #ifndef VMUL4
1371 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
1372  const float *scale)
1373 {
1374  float s = *scale;
1375  *dst++ = v[idx & 3] * s;
1376  *dst++ = v[idx>>2 & 3] * s;
1377  *dst++ = v[idx>>4 & 3] * s;
1378  *dst++ = v[idx>>6 & 3] * s;
1379  return dst;
1380 }
1381 #endif
1382 
1383 #ifndef VMUL2S
1384 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
1385  unsigned sign, const float *scale)
1386 {
1387  union av_intfloat32 s0, s1;
1388 
1389  s0.f = s1.f = *scale;
1390  s0.i ^= sign >> 1 << 31;
1391  s1.i ^= sign << 31;
1392 
1393  *dst++ = v[idx & 15] * s0.f;
1394  *dst++ = v[idx>>4 & 15] * s1.f;
1395 
1396  return dst;
1397 }
1398 #endif
1399 
1400 #ifndef VMUL4S
1401 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
1402  unsigned sign, const float *scale)
1403 {
1404  unsigned nz = idx >> 12;
1405  union av_intfloat32 s = { .f = *scale };
1406  union av_intfloat32 t;
1407 
1408  t.i = s.i ^ (sign & 1U<<31);
1409  *dst++ = v[idx & 3] * t.f;
1410 
1411  sign <<= nz & 1; nz >>= 1;
1412  t.i = s.i ^ (sign & 1U<<31);
1413  *dst++ = v[idx>>2 & 3] * t.f;
1414 
1415  sign <<= nz & 1; nz >>= 1;
1416  t.i = s.i ^ (sign & 1U<<31);
1417  *dst++ = v[idx>>4 & 3] * t.f;
1418 
1419  sign <<= nz & 1;
1420  t.i = s.i ^ (sign & 1U<<31);
1421  *dst++ = v[idx>>6 & 3] * t.f;
1422 
1423  return dst;
1424 }
1425 #endif
1426 
1427 /**
1428  * Decode spectral data; reference: table 4.50.
1429  * Dequantize and scale spectral data; reference: 4.6.3.3.
1430  *
1431  * @param coef array of dequantized, scaled spectral data
1432  * @param sf array of scalefactors or intensity stereo positions
1433  * @param pulse_present set if pulses are present
1434  * @param pulse pointer to pulse data struct
1435  * @param band_type array of the used band type
1436  *
1437  * @return Returns error status. 0 - OK, !0 - error
1438  */
1439 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
1440  GetBitContext *gb, const float sf[120],
1441  int pulse_present, const Pulse *pulse,
1442  const IndividualChannelStream *ics,
1443  enum BandType band_type[120])
1444 {
1445  int i, k, g, idx = 0;
1446  const int c = 1024 / ics->num_windows;
1447  const uint16_t *offsets = ics->swb_offset;
1448  float *coef_base = coef;
1449 
1450  for (g = 0; g < ics->num_windows; g++)
1451  memset(coef + g * 128 + offsets[ics->max_sfb], 0,
1452  sizeof(float) * (c - offsets[ics->max_sfb]));
1453 
1454  for (g = 0; g < ics->num_window_groups; g++) {
1455  unsigned g_len = ics->group_len[g];
1456 
1457  for (i = 0; i < ics->max_sfb; i++, idx++) {
1458  const unsigned cbt_m1 = band_type[idx] - 1;
1459  float *cfo = coef + offsets[i];
1460  int off_len = offsets[i + 1] - offsets[i];
1461  int group;
1462 
1463  if (cbt_m1 >= INTENSITY_BT2 - 1) {
1464  for (group = 0; group < g_len; group++, cfo+=128) {
1465  memset(cfo, 0, off_len * sizeof(float));
1466  }
1467  } else if (cbt_m1 == NOISE_BT - 1) {
1468  for (group = 0; group < g_len; group++, cfo+=128) {
1469  float scale;
1470  float band_energy;
1471 
1472  for (k = 0; k < off_len; k++) {
1474  cfo[k] = ac->random_state;
1475  }
1476 
1477  band_energy = ac->fdsp.scalarproduct_float(cfo, cfo, off_len);
1478  scale = sf[idx] / sqrtf(band_energy);
1479  ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1480  }
1481  } else {
1482  const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1483  const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1484  VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1485  OPEN_READER(re, gb);
1486 
1487  switch (cbt_m1 >> 1) {
1488  case 0:
1489  for (group = 0; group < g_len; group++, cfo+=128) {
1490  float *cf = cfo;
1491  int len = off_len;
1492 
1493  do {
1494  int code;
1495  unsigned cb_idx;
1496 
1497  UPDATE_CACHE(re, gb);
1498  GET_VLC(code, re, gb, vlc_tab, 8, 2);
1499  cb_idx = cb_vector_idx[code];
1500  cf = VMUL4(cf, vq, cb_idx, sf + idx);
1501  } while (len -= 4);
1502  }
1503  break;
1504 
1505  case 1:
1506  for (group = 0; group < g_len; group++, cfo+=128) {
1507  float *cf = cfo;
1508  int len = off_len;
1509 
1510  do {
1511  int code;
1512  unsigned nnz;
1513  unsigned cb_idx;
1514  uint32_t bits;
1515 
1516  UPDATE_CACHE(re, gb);
1517  GET_VLC(code, re, gb, vlc_tab, 8, 2);
1518  cb_idx = cb_vector_idx[code];
1519  nnz = cb_idx >> 8 & 15;
1520  bits = nnz ? GET_CACHE(re, gb) : 0;
1521  LAST_SKIP_BITS(re, gb, nnz);
1522  cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1523  } while (len -= 4);
1524  }
1525  break;
1526 
1527  case 2:
1528  for (group = 0; group < g_len; group++, cfo+=128) {
1529  float *cf = cfo;
1530  int len = off_len;
1531 
1532  do {
1533  int code;
1534  unsigned cb_idx;
1535 
1536  UPDATE_CACHE(re, gb);
1537  GET_VLC(code, re, gb, vlc_tab, 8, 2);
1538  cb_idx = cb_vector_idx[code];
1539  cf = VMUL2(cf, vq, cb_idx, sf + idx);
1540  } while (len -= 2);
1541  }
1542  break;
1543 
1544  case 3:
1545  case 4:
1546  for (group = 0; group < g_len; group++, cfo+=128) {
1547  float *cf = cfo;
1548  int len = off_len;
1549 
1550  do {
1551  int code;
1552  unsigned nnz;
1553  unsigned cb_idx;
1554  unsigned sign;
1555 
1556  UPDATE_CACHE(re, gb);
1557  GET_VLC(code, re, gb, vlc_tab, 8, 2);
1558  cb_idx = cb_vector_idx[code];
1559  nnz = cb_idx >> 8 & 15;
1560  sign = nnz ? SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12) : 0;
1561  LAST_SKIP_BITS(re, gb, nnz);
1562  cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1563  } while (len -= 2);
1564  }
1565  break;
1566 
1567  default:
1568  for (group = 0; group < g_len; group++, cfo+=128) {
1569  float *cf = cfo;
1570  uint32_t *icf = (uint32_t *) cf;
1571  int len = off_len;
1572 
1573  do {
1574  int code;
1575  unsigned nzt, nnz;
1576  unsigned cb_idx;
1577  uint32_t bits;
1578  int j;
1579 
1580  UPDATE_CACHE(re, gb);
1581  GET_VLC(code, re, gb, vlc_tab, 8, 2);
1582 
1583  if (!code) {
1584  *icf++ = 0;
1585  *icf++ = 0;
1586  continue;
1587  }
1588 
1589  cb_idx = cb_vector_idx[code];
1590  nnz = cb_idx >> 12;
1591  nzt = cb_idx >> 8;
1592  bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1593  LAST_SKIP_BITS(re, gb, nnz);
1594 
1595  for (j = 0; j < 2; j++) {
1596  if (nzt & 1<<j) {
1597  uint32_t b;
1598  int n;
1599  /* The total length of escape_sequence must be < 22 bits according
1600  to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1601  UPDATE_CACHE(re, gb);
1602  b = GET_CACHE(re, gb);
1603  b = 31 - av_log2(~b);
1604 
1605  if (b > 8) {
1606  av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1607  return AVERROR_INVALIDDATA;
1608  }
1609 
1610  SKIP_BITS(re, gb, b + 1);
1611  b += 4;
1612  n = (1 << b) + SHOW_UBITS(re, gb, b);
1613  LAST_SKIP_BITS(re, gb, b);
1614  *icf++ = cbrt_tab[n] | (bits & 1U<<31);
1615  bits <<= 1;
1616  } else {
1617  unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1618  *icf++ = (bits & 1U<<31) | v;
1619  bits <<= !!v;
1620  }
1621  cb_idx >>= 4;
1622  }
1623  } while (len -= 2);
1624 
1625  ac->fdsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1626  }
1627  }
1628 
1629  CLOSE_READER(re, gb);
1630  }
1631  }
1632  coef += g_len << 7;
1633  }
1634 
1635  if (pulse_present) {
1636  idx = 0;
1637  for (i = 0; i < pulse->num_pulse; i++) {
1638  float co = coef_base[ pulse->pos[i] ];
1639  while (offsets[idx + 1] <= pulse->pos[i])
1640  idx++;
1641  if (band_type[idx] != NOISE_BT && sf[idx]) {
1642  float ico = -pulse->amp[i];
1643  if (co) {
1644  co /= sf[idx];
1645  ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1646  }
1647  coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1648  }
1649  }
1650  }
1651  return 0;
1652 }
1653 
1654 static av_always_inline float flt16_round(float pf)
1655 {
1656  union av_intfloat32 tmp;
1657  tmp.f = pf;
1658  tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1659  return tmp.f;
1660 }
1661 
1662 static av_always_inline float flt16_even(float pf)
1663 {
1664  union av_intfloat32 tmp;
1665  tmp.f = pf;
1666  tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1667  return tmp.f;
1668 }
1669 
1670 static av_always_inline float flt16_trunc(float pf)
1671 {
1672  union av_intfloat32 pun;
1673  pun.f = pf;
1674  pun.i &= 0xFFFF0000U;
1675  return pun.f;
1676 }
1677 
1678 static av_always_inline void predict(PredictorState *ps, float *coef,
1679  int output_enable)
1680 {
1681  const float a = 0.953125; // 61.0 / 64
1682  const float alpha = 0.90625; // 29.0 / 32
1683  float e0, e1;
1684  float pv;
1685  float k1, k2;
1686  float r0 = ps->r0, r1 = ps->r1;
1687  float cor0 = ps->cor0, cor1 = ps->cor1;
1688  float var0 = ps->var0, var1 = ps->var1;
1689 
1690  k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
1691  k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
1692 
1693  pv = flt16_round(k1 * r0 + k2 * r1);
1694  if (output_enable)
1695  *coef += pv;
1696 
1697  e0 = *coef;
1698  e1 = e0 - k1 * r0;
1699 
1700  ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
1701  ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
1702  ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
1703  ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
1704 
1705  ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
1706  ps->r0 = flt16_trunc(a * e0);
1707 }
1708 
1709 /**
1710  * Apply AAC-Main style frequency domain prediction.
1711  */
1713 {
1714  int sfb, k;
1715 
1716  if (!sce->ics.predictor_initialized) {
1718  sce->ics.predictor_initialized = 1;
1719  }
1720 
1721  if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1722  for (sfb = 0;
1723  sfb < ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index];
1724  sfb++) {
1725  for (k = sce->ics.swb_offset[sfb];
1726  k < sce->ics.swb_offset[sfb + 1];
1727  k++) {
1728  predict(&sce->predictor_state[k], &sce->coeffs[k],
1729  sce->ics.predictor_present &&
1730  sce->ics.prediction_used[sfb]);
1731  }
1732  }
1733  if (sce->ics.predictor_reset_group)
1735  sce->ics.predictor_reset_group);
1736  } else
1738 }
1739 
1740 /**
1741  * Decode an individual_channel_stream payload; reference: table 4.44.
1742  *
1743  * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
1744  * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1745  *
1746  * @return Returns error status. 0 - OK, !0 - error
1747  */
1749  GetBitContext *gb, int common_window, int scale_flag)
1750 {
1751  Pulse pulse;
1752  TemporalNoiseShaping *tns = &sce->tns;
1753  IndividualChannelStream *ics = &sce->ics;
1754  float *out = sce->coeffs;
1755  int global_gain, pulse_present = 0;
1756  int ret;
1757 
1758  /* This assignment is to silence a GCC warning about the variable being used
1759  * uninitialized when in fact it always is.
1760  */
1761  pulse.num_pulse = 0;
1762 
1763  global_gain = get_bits(gb, 8);
1764 
1765  if (!common_window && !scale_flag) {
1766  if (decode_ics_info(ac, ics, gb) < 0)
1767  return AVERROR_INVALIDDATA;
1768  }
1769 
1770  if ((ret = decode_band_types(ac, sce->band_type,
1771  sce->band_type_run_end, gb, ics)) < 0)
1772  return ret;
1773  if ((ret = decode_scalefactors(ac, sce->sf, gb, global_gain, ics,
1774  sce->band_type, sce->band_type_run_end)) < 0)
1775  return ret;
1776 
1777  pulse_present = 0;
1778  if (!scale_flag) {
1779  if ((pulse_present = get_bits1(gb))) {
1780  if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1781  av_log(ac->avctx, AV_LOG_ERROR,
1782  "Pulse tool not allowed in eight short sequence.\n");
1783  return AVERROR_INVALIDDATA;
1784  }
1785  if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1786  av_log(ac->avctx, AV_LOG_ERROR,
1787  "Pulse data corrupt or invalid.\n");
1788  return AVERROR_INVALIDDATA;
1789  }
1790  }
1791  if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1792  return AVERROR_INVALIDDATA;
1793  if (get_bits1(gb)) {
1794  avpriv_request_sample(ac->avctx, "SSR");
1795  return AVERROR_PATCHWELCOME;
1796  }
1797  }
1798 
1799  if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present,
1800  &pulse, ics, sce->band_type) < 0)
1801  return AVERROR_INVALIDDATA;
1802 
1803  if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN && !common_window)
1804  apply_prediction(ac, sce);
1805 
1806  return 0;
1807 }
1808 
1809 /**
1810  * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1811  */
1813 {
1814  const IndividualChannelStream *ics = &cpe->ch[0].ics;
1815  float *ch0 = cpe->ch[0].coeffs;
1816  float *ch1 = cpe->ch[1].coeffs;
1817  int g, i, group, idx = 0;
1818  const uint16_t *offsets = ics->swb_offset;
1819  for (g = 0; g < ics->num_window_groups; g++) {
1820  for (i = 0; i < ics->max_sfb; i++, idx++) {
1821  if (cpe->ms_mask[idx] &&
1822  cpe->ch[0].band_type[idx] < NOISE_BT &&
1823  cpe->ch[1].band_type[idx] < NOISE_BT) {
1824  for (group = 0; group < ics->group_len[g]; group++) {
1825  ac->fdsp.butterflies_float(ch0 + group * 128 + offsets[i],
1826  ch1 + group * 128 + offsets[i],
1827  offsets[i+1] - offsets[i]);
1828  }
1829  }
1830  }
1831  ch0 += ics->group_len[g] * 128;
1832  ch1 += ics->group_len[g] * 128;
1833  }
1834 }
1835 
1836 /**
1837  * intensity stereo decoding; reference: 4.6.8.2.3
1838  *
1839  * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
1840  * [1] mask is decoded from bitstream; [2] mask is all 1s;
1841  * [3] reserved for scalable AAC
1842  */
1844  ChannelElement *cpe, int ms_present)
1845 {
1846  const IndividualChannelStream *ics = &cpe->ch[1].ics;
1847  SingleChannelElement *sce1 = &cpe->ch[1];
1848  float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1849  const uint16_t *offsets = ics->swb_offset;
1850  int g, group, i, idx = 0;
1851  int c;
1852  float scale;
1853  for (g = 0; g < ics->num_window_groups; g++) {
1854  for (i = 0; i < ics->max_sfb;) {
1855  if (sce1->band_type[idx] == INTENSITY_BT ||
1856  sce1->band_type[idx] == INTENSITY_BT2) {
1857  const int bt_run_end = sce1->band_type_run_end[idx];
1858  for (; i < bt_run_end; i++, idx++) {
1859  c = -1 + 2 * (sce1->band_type[idx] - 14);
1860  if (ms_present)
1861  c *= 1 - 2 * cpe->ms_mask[idx];
1862  scale = c * sce1->sf[idx];
1863  for (group = 0; group < ics->group_len[g]; group++)
1864  ac->fdsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i],
1865  coef0 + group * 128 + offsets[i],
1866  scale,
1867  offsets[i + 1] - offsets[i]);
1868  }
1869  } else {
1870  int bt_run_end = sce1->band_type_run_end[idx];
1871  idx += bt_run_end - i;
1872  i = bt_run_end;
1873  }
1874  }
1875  coef0 += ics->group_len[g] * 128;
1876  coef1 += ics->group_len[g] * 128;
1877  }
1878 }
1879 
1880 /**
1881  * Decode a channel_pair_element; reference: table 4.4.
1882  *
1883  * @return Returns error status. 0 - OK, !0 - error
1884  */
1886 {
1887  int i, ret, common_window, ms_present = 0;
1888 
1889  common_window = get_bits1(gb);
1890  if (common_window) {
1891  if (decode_ics_info(ac, &cpe->ch[0].ics, gb))
1892  return AVERROR_INVALIDDATA;
1893  i = cpe->ch[1].ics.use_kb_window[0];
1894  cpe->ch[1].ics = cpe->ch[0].ics;
1895  cpe->ch[1].ics.use_kb_window[1] = i;
1896  if (cpe->ch[1].ics.predictor_present &&
1897  (ac->oc[1].m4ac.object_type != AOT_AAC_MAIN))
1898  if ((cpe->ch[1].ics.ltp.present = get_bits(gb, 1)))
1899  decode_ltp(&cpe->ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb);
1900  ms_present = get_bits(gb, 2);
1901  if (ms_present == 3) {
1902  av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1903  return AVERROR_INVALIDDATA;
1904  } else if (ms_present)
1905  decode_mid_side_stereo(cpe, gb, ms_present);
1906  }
1907  if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1908  return ret;
1909  if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1910  return ret;
1911 
1912  if (common_window) {
1913  if (ms_present)
1914  apply_mid_side_stereo(ac, cpe);
1915  if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN) {
1916  apply_prediction(ac, &cpe->ch[0]);
1917  apply_prediction(ac, &cpe->ch[1]);
1918  }
1919  }
1920 
1921  apply_intensity_stereo(ac, cpe, ms_present);
1922  return 0;
1923 }
1924 
1925 static const float cce_scale[] = {
1926  1.09050773266525765921, //2^(1/8)
1927  1.18920711500272106672, //2^(1/4)
1928  M_SQRT2,
1929  2,
1930 };
1931 
1932 /**
1933  * Decode coupling_channel_element; reference: table 4.8.
1934  *
1935  * @return Returns error status. 0 - OK, !0 - error
1936  */
1938 {
1939  int num_gain = 0;
1940  int c, g, sfb, ret;
1941  int sign;
1942  float scale;
1943  SingleChannelElement *sce = &che->ch[0];
1944  ChannelCoupling *coup = &che->coup;
1945 
1946  coup->coupling_point = 2 * get_bits1(gb);
1947  coup->num_coupled = get_bits(gb, 3);
1948  for (c = 0; c <= coup->num_coupled; c++) {
1949  num_gain++;
1950  coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1951  coup->id_select[c] = get_bits(gb, 4);
1952  if (coup->type[c] == TYPE_CPE) {
1953  coup->ch_select[c] = get_bits(gb, 2);
1954  if (coup->ch_select[c] == 3)
1955  num_gain++;
1956  } else
1957  coup->ch_select[c] = 2;
1958  }
1959  coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1960 
1961  sign = get_bits(gb, 1);
1962  scale = cce_scale[get_bits(gb, 2)];
1963 
1964  if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1965  return ret;
1966 
1967  for (c = 0; c < num_gain; c++) {
1968  int idx = 0;
1969  int cge = 1;
1970  int gain = 0;
1971  float gain_cache = 1.;
1972  if (c) {
1973  cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1974  gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1975  gain_cache = powf(scale, -gain);
1976  }
1977  if (coup->coupling_point == AFTER_IMDCT) {
1978  coup->gain[c][0] = gain_cache;
1979  } else {
1980  for (g = 0; g < sce->ics.num_window_groups; g++) {
1981  for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1982  if (sce->band_type[idx] != ZERO_BT) {
1983  if (!cge) {
1984  int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1985  if (t) {
1986  int s = 1;
1987  t = gain += t;
1988  if (sign) {
1989  s -= 2 * (t & 0x1);
1990  t >>= 1;
1991  }
1992  gain_cache = powf(scale, -t) * s;
1993  }
1994  }
1995  coup->gain[c][idx] = gain_cache;
1996  }
1997  }
1998  }
1999  }
2000  }
2001  return 0;
2002 }
2003 
2004 /**
2005  * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
2006  *
2007  * @return Returns number of bytes consumed.
2008  */
2010  GetBitContext *gb)
2011 {
2012  int i;
2013  int num_excl_chan = 0;
2014 
2015  do {
2016  for (i = 0; i < 7; i++)
2017  che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
2018  } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
2019 
2020  return num_excl_chan / 7;
2021 }
2022 
2023 /**
2024  * Decode dynamic range information; reference: table 4.52.
2025  *
2026  * @return Returns number of bytes consumed.
2027  */
2029  GetBitContext *gb)
2030 {
2031  int n = 1;
2032  int drc_num_bands = 1;
2033  int i;
2034 
2035  /* pce_tag_present? */
2036  if (get_bits1(gb)) {
2037  che_drc->pce_instance_tag = get_bits(gb, 4);
2038  skip_bits(gb, 4); // tag_reserved_bits
2039  n++;
2040  }
2041 
2042  /* excluded_chns_present? */
2043  if (get_bits1(gb)) {
2044  n += decode_drc_channel_exclusions(che_drc, gb);
2045  }
2046 
2047  /* drc_bands_present? */
2048  if (get_bits1(gb)) {
2049  che_drc->band_incr = get_bits(gb, 4);
2050  che_drc->interpolation_scheme = get_bits(gb, 4);
2051  n++;
2052  drc_num_bands += che_drc->band_incr;
2053  for (i = 0; i < drc_num_bands; i++) {
2054  che_drc->band_top[i] = get_bits(gb, 8);
2055  n++;
2056  }
2057  }
2058 
2059  /* prog_ref_level_present? */
2060  if (get_bits1(gb)) {
2061  che_drc->prog_ref_level = get_bits(gb, 7);
2062  skip_bits1(gb); // prog_ref_level_reserved_bits
2063  n++;
2064  }
2065 
2066  for (i = 0; i < drc_num_bands; i++) {
2067  che_drc->dyn_rng_sgn[i] = get_bits1(gb);
2068  che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
2069  n++;
2070  }
2071 
2072  return n;
2073 }
2074 
2075 static int decode_fill(AACContext *ac, GetBitContext *gb, int len) {
2076  uint8_t buf[256];
2077  int i, major, minor;
2078 
2079  if (len < 13+7*8)
2080  goto unknown;
2081 
2082  get_bits(gb, 13); len -= 13;
2083 
2084  for(i=0; i+1<sizeof(buf) && len>=8; i++, len-=8)
2085  buf[i] = get_bits(gb, 8);
2086 
2087  buf[i] = 0;
2088  if (ac->avctx->debug & FF_DEBUG_PICT_INFO)
2089  av_log(ac->avctx, AV_LOG_DEBUG, "FILL:%s\n", buf);
2090 
2091  if (sscanf(buf, "libfaac %d.%d", &major, &minor) == 2){
2092  ac->avctx->internal->skip_samples = 1024;
2093  }
2094 
2095 unknown:
2096  skip_bits_long(gb, len);
2097 
2098  return 0;
2099 }
2100 
2101 /**
2102  * Decode extension data (incomplete); reference: table 4.51.
2103  *
2104  * @param cnt length of TYPE_FIL syntactic element in bytes
2105  *
2106  * @return Returns number of bytes consumed
2107  */
2109  ChannelElement *che, enum RawDataBlockType elem_type)
2110 {
2111  int crc_flag = 0;
2112  int res = cnt;
2113  switch (get_bits(gb, 4)) { // extension type
2114  case EXT_SBR_DATA_CRC:
2115  crc_flag++;
2116  case EXT_SBR_DATA:
2117  if (!che) {
2118  av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
2119  return res;
2120  } else if (!ac->oc[1].m4ac.sbr) {
2121  av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
2122  skip_bits_long(gb, 8 * cnt - 4);
2123  return res;
2124  } else if (ac->oc[1].m4ac.sbr == -1 && ac->oc[1].status == OC_LOCKED) {
2125  av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
2126  skip_bits_long(gb, 8 * cnt - 4);
2127  return res;
2128  } else if (ac->oc[1].m4ac.ps == -1 && ac->oc[1].status < OC_LOCKED && ac->avctx->channels == 1) {
2129  ac->oc[1].m4ac.sbr = 1;
2130  ac->oc[1].m4ac.ps = 1;
2131  output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags,
2132  ac->oc[1].status, 1);
2133  } else {
2134  ac->oc[1].m4ac.sbr = 1;
2135  }
2136  res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
2137  break;
2138  case EXT_DYNAMIC_RANGE:
2139  res = decode_dynamic_range(&ac->che_drc, gb);
2140  break;
2141  case EXT_FILL:
2142  decode_fill(ac, gb, 8 * cnt - 4);
2143  break;
2144  case EXT_FILL_DATA:
2145  case EXT_DATA_ELEMENT:
2146  default:
2147  skip_bits_long(gb, 8 * cnt - 4);
2148  break;
2149  };
2150  return res;
2151 }
2152 
2153 /**
2154  * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
2155  *
2156  * @param decode 1 if tool is used normally, 0 if tool is used in LTP.
2157  * @param coef spectral coefficients
2158  */
2159 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
2160  IndividualChannelStream *ics, int decode)
2161 {
2162  const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
2163  int w, filt, m, i;
2164  int bottom, top, order, start, end, size, inc;
2165  float lpc[TNS_MAX_ORDER];
2166  float tmp[TNS_MAX_ORDER+1];
2167 
2168  for (w = 0; w < ics->num_windows; w++) {
2169  bottom = ics->num_swb;
2170  for (filt = 0; filt < tns->n_filt[w]; filt++) {
2171  top = bottom;
2172  bottom = FFMAX(0, top - tns->length[w][filt]);
2173  order = tns->order[w][filt];
2174  if (order == 0)
2175  continue;
2176 
2177  // tns_decode_coef
2178  compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
2179 
2180  start = ics->swb_offset[FFMIN(bottom, mmm)];
2181  end = ics->swb_offset[FFMIN( top, mmm)];
2182  if ((size = end - start) <= 0)
2183  continue;
2184  if (tns->direction[w][filt]) {
2185  inc = -1;
2186  start = end - 1;
2187  } else {
2188  inc = 1;
2189  }
2190  start += w * 128;
2191 
2192  if (decode) {
2193  // ar filter
2194  for (m = 0; m < size; m++, start += inc)
2195  for (i = 1; i <= FFMIN(m, order); i++)
2196  coef[start] -= coef[start - i * inc] * lpc[i - 1];
2197  } else {
2198  // ma filter
2199  for (m = 0; m < size; m++, start += inc) {
2200  tmp[0] = coef[start];
2201  for (i = 1; i <= FFMIN(m, order); i++)
2202  coef[start] += tmp[i] * lpc[i - 1];
2203  for (i = order; i > 0; i--)
2204  tmp[i] = tmp[i - 1];
2205  }
2206  }
2207  }
2208  }
2209 }
2210 
2211 /**
2212  * Apply windowing and MDCT to obtain the spectral
2213  * coefficient from the predicted sample by LTP.
2214  */
2215 static void windowing_and_mdct_ltp(AACContext *ac, float *out,
2216  float *in, IndividualChannelStream *ics)
2217 {
2218  const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2219  const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
2220  const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2221  const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
2222 
2223  if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) {
2224  ac->fdsp.vector_fmul(in, in, lwindow_prev, 1024);
2225  } else {
2226  memset(in, 0, 448 * sizeof(float));
2227  ac->fdsp.vector_fmul(in + 448, in + 448, swindow_prev, 128);
2228  }
2229  if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
2230  ac->fdsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024);
2231  } else {
2232  ac->fdsp.vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128);
2233  memset(in + 1024 + 576, 0, 448 * sizeof(float));
2234  }
2235  ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in);
2236 }
2237 
2238 /**
2239  * Apply the long term prediction
2240  */
2242 {
2243  const LongTermPrediction *ltp = &sce->ics.ltp;
2244  const uint16_t *offsets = sce->ics.swb_offset;
2245  int i, sfb;
2246 
2247  if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
2248  float *predTime = sce->ret;
2249  float *predFreq = ac->buf_mdct;
2250  int16_t num_samples = 2048;
2251 
2252  if (ltp->lag < 1024)
2253  num_samples = ltp->lag + 1024;
2254  for (i = 0; i < num_samples; i++)
2255  predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef;
2256  memset(&predTime[i], 0, (2048 - i) * sizeof(float));
2257 
2258  ac->windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
2259 
2260  if (sce->tns.present)
2261  ac->apply_tns(predFreq, &sce->tns, &sce->ics, 0);
2262 
2263  for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
2264  if (ltp->used[sfb])
2265  for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
2266  sce->coeffs[i] += predFreq[i];
2267  }
2268 }
2269 
2270 /**
2271  * Update the LTP buffer for next frame
2272  */
2274 {
2275  IndividualChannelStream *ics = &sce->ics;
2276  float *saved = sce->saved;
2277  float *saved_ltp = sce->coeffs;
2278  const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2279  const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
2280  int i;
2281 
2282  if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2283  memcpy(saved_ltp, saved, 512 * sizeof(float));
2284  memset(saved_ltp + 576, 0, 448 * sizeof(float));
2285  ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64);
2286  for (i = 0; i < 64; i++)
2287  saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
2288  } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
2289  memcpy(saved_ltp, ac->buf_mdct + 512, 448 * sizeof(float));
2290  memset(saved_ltp + 576, 0, 448 * sizeof(float));
2291  ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64);
2292  for (i = 0; i < 64; i++)
2293  saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
2294  } else { // LONG_STOP or ONLY_LONG
2295  ac->fdsp.vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512);
2296  for (i = 0; i < 512; i++)
2297  saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * lwindow[511 - i];
2298  }
2299 
2300  memcpy(sce->ltp_state, sce->ltp_state+1024, 1024 * sizeof(*sce->ltp_state));
2301  memcpy(sce->ltp_state+1024, sce->ret, 1024 * sizeof(*sce->ltp_state));
2302  memcpy(sce->ltp_state+2048, saved_ltp, 1024 * sizeof(*sce->ltp_state));
2303 }
2304 
2305 /**
2306  * Conduct IMDCT and windowing.
2307  */
2309 {
2310  IndividualChannelStream *ics = &sce->ics;
2311  float *in = sce->coeffs;
2312  float *out = sce->ret;
2313  float *saved = sce->saved;
2314  const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
2315  const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2316  const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
2317  float *buf = ac->buf_mdct;
2318  float *temp = ac->temp;
2319  int i;
2320 
2321  // imdct
2322  if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2323  for (i = 0; i < 1024; i += 128)
2324  ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
2325  } else
2326  ac->mdct.imdct_half(&ac->mdct, buf, in);
2327 
2328  /* window overlapping
2329  * NOTE: To simplify the overlapping code, all 'meaningless' short to long
2330  * and long to short transitions are considered to be short to short
2331  * transitions. This leaves just two cases (long to long and short to short)
2332  * with a little special sauce for EIGHT_SHORT_SEQUENCE.
2333  */
2334  if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
2336  ac->fdsp.vector_fmul_window( out, saved, buf, lwindow_prev, 512);
2337  } else {
2338  memcpy( out, saved, 448 * sizeof(float));
2339 
2340  if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2341  ac->fdsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 64);
2342  ac->fdsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 64);
2343  ac->fdsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 64);
2344  ac->fdsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 64);
2345  ac->fdsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 64);
2346  memcpy( out + 448 + 4*128, temp, 64 * sizeof(float));
2347  } else {
2348  ac->fdsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64);
2349  memcpy( out + 576, buf + 64, 448 * sizeof(float));
2350  }
2351  }
2352 
2353  // buffer update
2354  if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2355  memcpy( saved, temp + 64, 64 * sizeof(float));
2356  ac->fdsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64);
2357  ac->fdsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
2358  ac->fdsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
2359  memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
2360  } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
2361  memcpy( saved, buf + 512, 448 * sizeof(float));
2362  memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
2363  } else { // LONG_STOP or ONLY_LONG
2364  memcpy( saved, buf + 512, 512 * sizeof(float));
2365  }
2366 }
2367 
2368 /**
2369  * Apply dependent channel coupling (applied before IMDCT).
2370  *
2371  * @param index index into coupling gain array
2372  */
2374  SingleChannelElement *target,
2375  ChannelElement *cce, int index)
2376 {
2377  IndividualChannelStream *ics = &cce->ch[0].ics;
2378  const uint16_t *offsets = ics->swb_offset;
2379  float *dest = target->coeffs;
2380  const float *src = cce->ch[0].coeffs;
2381  int g, i, group, k, idx = 0;
2382  if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
2383  av_log(ac->avctx, AV_LOG_ERROR,
2384  "Dependent coupling is not supported together with LTP\n");
2385  return;
2386  }
2387  for (g = 0; g < ics->num_window_groups; g++) {
2388  for (i = 0; i < ics->max_sfb; i++, idx++) {
2389  if (cce->ch[0].band_type[idx] != ZERO_BT) {
2390  const float gain = cce->coup.gain[index][idx];
2391  for (group = 0; group < ics->group_len[g]; group++) {
2392  for (k = offsets[i]; k < offsets[i + 1]; k++) {
2393  // XXX dsputil-ize
2394  dest[group * 128 + k] += gain * src[group * 128 + k];
2395  }
2396  }
2397  }
2398  }
2399  dest += ics->group_len[g] * 128;
2400  src += ics->group_len[g] * 128;
2401  }
2402 }
2403 
2404 /**
2405  * Apply independent channel coupling (applied after IMDCT).
2406  *
2407  * @param index index into coupling gain array
2408  */
2410  SingleChannelElement *target,
2411  ChannelElement *cce, int index)
2412 {
2413  int i;
2414  const float gain = cce->coup.gain[index][0];
2415  const float *src = cce->ch[0].ret;
2416  float *dest = target->ret;
2417  const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
2418 
2419  for (i = 0; i < len; i++)
2420  dest[i] += gain * src[i];
2421 }
2422 
2423 /**
2424  * channel coupling transformation interface
2425  *
2426  * @param apply_coupling_method pointer to (in)dependent coupling function
2427  */
2429  enum RawDataBlockType type, int elem_id,
2430  enum CouplingPoint coupling_point,
2431  void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
2432 {
2433  int i, c;
2434 
2435  for (i = 0; i < MAX_ELEM_ID; i++) {
2436  ChannelElement *cce = ac->che[TYPE_CCE][i];
2437  int index = 0;
2438 
2439  if (cce && cce->coup.coupling_point == coupling_point) {
2440  ChannelCoupling *coup = &cce->coup;
2441 
2442  for (c = 0; c <= coup->num_coupled; c++) {
2443  if (coup->type[c] == type && coup->id_select[c] == elem_id) {
2444  if (coup->ch_select[c] != 1) {
2445  apply_coupling_method(ac, &cc->ch[0], cce, index);
2446  if (coup->ch_select[c] != 0)
2447  index++;
2448  }
2449  if (coup->ch_select[c] != 2)
2450  apply_coupling_method(ac, &cc->ch[1], cce, index++);
2451  } else
2452  index += 1 + (coup->ch_select[c] == 3);
2453  }
2454  }
2455  }
2456 }
2457 
2458 /**
2459  * Convert spectral data to float samples, applying all supported tools as appropriate.
2460  */
2462 {
2463  int i, type;
2464  for (type = 3; type >= 0; type--) {
2465  for (i = 0; i < MAX_ELEM_ID; i++) {
2466  ChannelElement *che = ac->che[type][i];
2467  if (che) {
2468  if (type <= TYPE_CPE)
2470  if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
2471  if (che->ch[0].ics.predictor_present) {
2472  if (che->ch[0].ics.ltp.present)
2473  ac->apply_ltp(ac, &che->ch[0]);
2474  if (che->ch[1].ics.ltp.present && type == TYPE_CPE)
2475  ac->apply_ltp(ac, &che->ch[1]);
2476  }
2477  }
2478  if (che->ch[0].tns.present)
2479  ac->apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
2480  if (che->ch[1].tns.present)
2481  ac->apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
2482  if (type <= TYPE_CPE)
2484  if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
2485  ac->imdct_and_windowing(ac, &che->ch[0]);
2486  if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
2487  ac->update_ltp(ac, &che->ch[0]);
2488  if (type == TYPE_CPE) {
2489  ac->imdct_and_windowing(ac, &che->ch[1]);
2490  if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
2491  ac->update_ltp(ac, &che->ch[1]);
2492  }
2493  if (ac->oc[1].m4ac.sbr > 0) {
2494  ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
2495  }
2496  }
2497  if (type <= TYPE_CCE)
2499  }
2500  }
2501  }
2502 }
2503 
2505 {
2506  int size;
2507  AACADTSHeaderInfo hdr_info;
2508  uint8_t layout_map[MAX_ELEM_ID*4][3];
2509  int layout_map_tags;
2510 
2511  size = avpriv_aac_parse_header(gb, &hdr_info);
2512  if (size > 0) {
2513  if (!ac->warned_num_aac_frames && hdr_info.num_aac_frames != 1) {
2514  // This is 2 for "VLB " audio in NSV files.
2515  // See samples/nsv/vlb_audio.
2517  "More than one AAC RDB per ADTS frame");
2518  ac->warned_num_aac_frames = 1;
2519  }
2521  if (hdr_info.chan_config) {
2522  ac->oc[1].m4ac.chan_config = hdr_info.chan_config;
2523  if (set_default_channel_config(ac->avctx, layout_map,
2524  &layout_map_tags, hdr_info.chan_config))
2525  return -7;
2526  if (output_configure(ac, layout_map, layout_map_tags,
2527  FFMAX(ac->oc[1].status, OC_TRIAL_FRAME), 0))
2528  return -7;
2529  } else {
2530  ac->oc[1].m4ac.chan_config = 0;
2531  /**
2532  * dual mono frames in Japanese DTV can have chan_config 0
2533  * WITHOUT specifying PCE.
2534  * thus, set dual mono as default.
2535  */
2536  if (ac->dmono_mode && ac->oc[0].status == OC_NONE) {
2537  layout_map_tags = 2;
2538  layout_map[0][0] = layout_map[1][0] = TYPE_SCE;
2539  layout_map[0][2] = layout_map[1][2] = AAC_CHANNEL_FRONT;
2540  layout_map[0][1] = 0;
2541  layout_map[1][1] = 1;
2542  if (output_configure(ac, layout_map, layout_map_tags,
2543  OC_TRIAL_FRAME, 0))
2544  return -7;
2545  }
2546  }
2547  ac->oc[1].m4ac.sample_rate = hdr_info.sample_rate;
2548  ac->oc[1].m4ac.sampling_index = hdr_info.sampling_index;
2549  ac->oc[1].m4ac.object_type = hdr_info.object_type;
2550  if (ac->oc[0].status != OC_LOCKED ||
2551  ac->oc[0].m4ac.chan_config != hdr_info.chan_config ||
2552  ac->oc[0].m4ac.sample_rate != hdr_info.sample_rate) {
2553  ac->oc[1].m4ac.sbr = -1;
2554  ac->oc[1].m4ac.ps = -1;
2555  }
2556  if (!hdr_info.crc_absent)
2557  skip_bits(gb, 16);
2558  }
2559  return size;
2560 }
2561 
2562 static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
2563  int *got_frame_ptr, GetBitContext *gb, AVPacket *avpkt)
2564 {
2565  AACContext *ac = avctx->priv_data;
2566  ChannelElement *che = NULL, *che_prev = NULL;
2567  enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
2568  int err, elem_id;
2569  int samples = 0, multiplier, audio_found = 0, pce_found = 0;
2570  int is_dmono, sce_count = 0;
2571 
2572  ac->frame = data;
2573 
2574  if (show_bits(gb, 12) == 0xfff) {
2575  if (parse_adts_frame_header(ac, gb) < 0) {
2576  av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
2577  err = -1;
2578  goto fail;
2579  }
2580  if (ac->oc[1].m4ac.sampling_index > 12) {
2581  av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->oc[1].m4ac.sampling_index);
2582  err = -1;
2583  goto fail;
2584  }
2585  }
2586 
2587  if (frame_configure_elements(avctx) < 0) {
2588  err = -1;
2589  goto fail;
2590  }
2591 
2592  ac->tags_mapped = 0;
2593  // parse
2594  while ((elem_type = get_bits(gb, 3)) != TYPE_END) {
2595  elem_id = get_bits(gb, 4);
2596 
2597  if (elem_type < TYPE_DSE) {
2598  if (!(che=get_che(ac, elem_type, elem_id))) {
2599  av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
2600  elem_type, elem_id);
2601  err = -1;
2602  goto fail;
2603  }
2604  samples = 1024;
2605  }
2606 
2607  switch (elem_type) {
2608 
2609  case TYPE_SCE:
2610  err = decode_ics(ac, &che->ch[0], gb, 0, 0);
2611  audio_found = 1;
2612  sce_count++;
2613  break;
2614 
2615  case TYPE_CPE:
2616  err = decode_cpe(ac, gb, che);
2617  audio_found = 1;
2618  break;
2619 
2620  case TYPE_CCE:
2621  err = decode_cce(ac, gb, che);
2622  break;
2623 
2624  case TYPE_LFE:
2625  err = decode_ics(ac, &che->ch[0], gb, 0, 0);
2626  audio_found = 1;
2627  break;
2628 
2629  case TYPE_DSE:
2630  err = skip_data_stream_element(ac, gb);
2631  break;
2632 
2633  case TYPE_PCE: {
2634  uint8_t layout_map[MAX_ELEM_ID*4][3];
2635  int tags;
2637  tags = decode_pce(avctx, &ac->oc[1].m4ac, layout_map, gb);
2638  if (tags < 0) {
2639  err = tags;
2640  break;
2641  }
2642  if (pce_found) {
2643  av_log(avctx, AV_LOG_ERROR,
2644  "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2645  } else {
2646  err = output_configure(ac, layout_map, tags, OC_TRIAL_PCE, 1);
2647  if (!err)
2648  ac->oc[1].m4ac.chan_config = 0;
2649  pce_found = 1;
2650  }
2651  break;
2652  }
2653 
2654  case TYPE_FIL:
2655  if (elem_id == 15)
2656  elem_id += get_bits(gb, 8) - 1;
2657  if (get_bits_left(gb) < 8 * elem_id) {
2658  av_log(avctx, AV_LOG_ERROR, "TYPE_FIL: "overread_err);
2659  err = -1;
2660  goto fail;
2661  }
2662  while (elem_id > 0)
2663  elem_id -= decode_extension_payload(ac, gb, elem_id, che_prev, elem_type_prev);
2664  err = 0; /* FIXME */
2665  break;
2666 
2667  default:
2668  err = -1; /* should not happen, but keeps compiler happy */
2669  break;
2670  }
2671 
2672  che_prev = che;
2673  elem_type_prev = elem_type;
2674 
2675  if (err)
2676  goto fail;
2677 
2678  if (get_bits_left(gb) < 3) {
2679  av_log(avctx, AV_LOG_ERROR, overread_err);
2680  err = -1;
2681  goto fail;
2682  }
2683  }
2684 
2685  spectral_to_sample(ac);
2686 
2687  multiplier = (ac->oc[1].m4ac.sbr == 1) ? ac->oc[1].m4ac.ext_sample_rate > ac->oc[1].m4ac.sample_rate : 0;
2688  samples <<= multiplier;
2689  /* for dual-mono audio (SCE + SCE) */
2690  is_dmono = ac->dmono_mode && sce_count == 2 &&
2692 
2693  if (samples)
2694  ac->frame->nb_samples = samples;
2695  else
2696  av_frame_unref(ac->frame);
2697  *got_frame_ptr = !!samples;
2698 
2699  if (is_dmono) {
2700  if (ac->dmono_mode == 1)
2701  ((AVFrame *)data)->data[1] =((AVFrame *)data)->data[0];
2702  else if (ac->dmono_mode == 2)
2703  ((AVFrame *)data)->data[0] =((AVFrame *)data)->data[1];
2704  }
2705 
2706  if (ac->oc[1].status && audio_found) {
2707  avctx->sample_rate = ac->oc[1].m4ac.sample_rate << multiplier;
2708  avctx->frame_size = samples;
2709  ac->oc[1].status = OC_LOCKED;
2710  }
2711 
2712  if (multiplier) {
2713  int side_size;
2714  const uint8_t *side = av_packet_get_side_data(avpkt, AV_PKT_DATA_SKIP_SAMPLES, &side_size);
2715  if (side && side_size>=4)
2716  AV_WL32(side, 2*AV_RL32(side));
2717  }
2718  return 0;
2719 fail:
2721  return err;
2722 }
2723 
2724 static int aac_decode_frame(AVCodecContext *avctx, void *data,
2725  int *got_frame_ptr, AVPacket *avpkt)
2726 {
2727  AACContext *ac = avctx->priv_data;
2728  const uint8_t *buf = avpkt->data;
2729  int buf_size = avpkt->size;
2730  GetBitContext gb;
2731  int buf_consumed;
2732  int buf_offset;
2733  int err;
2734  int new_extradata_size;
2735  const uint8_t *new_extradata = av_packet_get_side_data(avpkt,
2737  &new_extradata_size);
2738  int jp_dualmono_size;
2739  const uint8_t *jp_dualmono = av_packet_get_side_data(avpkt,
2741  &jp_dualmono_size);
2742 
2743  if (new_extradata && 0) {
2744  av_free(avctx->extradata);
2745  avctx->extradata = av_mallocz(new_extradata_size +
2747  if (!avctx->extradata)
2748  return AVERROR(ENOMEM);
2749  avctx->extradata_size = new_extradata_size;
2750  memcpy(avctx->extradata, new_extradata, new_extradata_size);
2752  if (decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
2753  avctx->extradata,
2754  avctx->extradata_size*8, 1) < 0) {
2756  return AVERROR_INVALIDDATA;
2757  }
2758  }
2759 
2760  ac->dmono_mode = 0;
2761  if (jp_dualmono && jp_dualmono_size > 0)
2762  ac->dmono_mode = 1 + *jp_dualmono;
2763  if (ac->force_dmono_mode >= 0)
2764  ac->dmono_mode = ac->force_dmono_mode;
2765 
2766  if (INT_MAX / 8 <= buf_size)
2767  return AVERROR_INVALIDDATA;
2768 
2769  init_get_bits(&gb, buf, buf_size * 8);
2770 
2771  if ((err = aac_decode_frame_int(avctx, data, got_frame_ptr, &gb, avpkt)) < 0)
2772  return err;
2773 
2774  buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2775  for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2776  if (buf[buf_offset])
2777  break;
2778 
2779  return buf_size > buf_offset ? buf_consumed : buf_size;
2780 }
2781 
2783 {
2784  AACContext *ac = avctx->priv_data;
2785  int i, type;
2786 
2787  for (i = 0; i < MAX_ELEM_ID; i++) {
2788  for (type = 0; type < 4; type++) {
2789  if (ac->che[type][i])
2790  ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2791  av_freep(&ac->che[type][i]);
2792  }
2793  }
2794 
2795  ff_mdct_end(&ac->mdct);
2796  ff_mdct_end(&ac->mdct_small);
2797  ff_mdct_end(&ac->mdct_ltp);
2798  return 0;
2799 }
2800 
2801 
2802 #define LOAS_SYNC_WORD 0x2b7 ///< 11 bits LOAS sync word
2803 
2804 struct LATMContext {
2805  AACContext aac_ctx; ///< containing AACContext
2806  int initialized; ///< initialized after a valid extradata was seen
2807 
2808  // parser data
2809  int audio_mux_version_A; ///< LATM syntax version
2810  int frame_length_type; ///< 0/1 variable/fixed frame length
2811  int frame_length; ///< frame length for fixed frame length
2812 };
2813 
2814 static inline uint32_t latm_get_value(GetBitContext *b)
2815 {
2816  int length = get_bits(b, 2);
2817 
2818  return get_bits_long(b, (length+1)*8);
2819 }
2820 
2822  GetBitContext *gb, int asclen)
2823 {
2824  AACContext *ac = &latmctx->aac_ctx;
2825  AVCodecContext *avctx = ac->avctx;
2826  MPEG4AudioConfig m4ac = { 0 };
2827  int config_start_bit = get_bits_count(gb);
2828  int sync_extension = 0;
2829  int bits_consumed, esize;
2830 
2831  if (asclen) {
2832  sync_extension = 1;
2833  asclen = FFMIN(asclen, get_bits_left(gb));
2834  } else
2835  asclen = get_bits_left(gb);
2836 
2837  if (config_start_bit % 8) {
2839  "Non-byte-aligned audio-specific config");
2840  return AVERROR_PATCHWELCOME;
2841  }
2842  if (asclen <= 0)
2843  return AVERROR_INVALIDDATA;
2844  bits_consumed = decode_audio_specific_config(NULL, avctx, &m4ac,
2845  gb->buffer + (config_start_bit / 8),
2846  asclen, sync_extension);
2847 
2848  if (bits_consumed < 0)
2849  return AVERROR_INVALIDDATA;
2850 
2851  if (!latmctx->initialized ||
2852  ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
2853  ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
2854 
2855  if(latmctx->initialized) {
2856  av_log(avctx, AV_LOG_INFO, "audio config changed\n");
2857  } else {
2858  av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
2859  }
2860  latmctx->initialized = 0;
2861 
2862  esize = (bits_consumed+7) / 8;
2863 
2864  if (avctx->extradata_size < esize) {
2865  av_free(avctx->extradata);
2867  if (!avctx->extradata)
2868  return AVERROR(ENOMEM);
2869  }
2870 
2871  avctx->extradata_size = esize;
2872  memcpy(avctx->extradata, gb->buffer + (config_start_bit/8), esize);
2873  memset(avctx->extradata+esize, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2874  }
2875  skip_bits_long(gb, bits_consumed);
2876 
2877  return bits_consumed;
2878 }
2879 
2880 static int read_stream_mux_config(struct LATMContext *latmctx,
2881  GetBitContext *gb)
2882 {
2883  int ret, audio_mux_version = get_bits(gb, 1);
2884 
2885  latmctx->audio_mux_version_A = 0;
2886  if (audio_mux_version)
2887  latmctx->audio_mux_version_A = get_bits(gb, 1);
2888 
2889  if (!latmctx->audio_mux_version_A) {
2890 
2891  if (audio_mux_version)
2892  latm_get_value(gb); // taraFullness
2893 
2894  skip_bits(gb, 1); // allStreamSameTimeFraming
2895  skip_bits(gb, 6); // numSubFrames
2896  // numPrograms
2897  if (get_bits(gb, 4)) { // numPrograms
2898  avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple programs");
2899  return AVERROR_PATCHWELCOME;
2900  }
2901 
2902  // for each program (which there is only one in DVB)
2903 
2904  // for each layer (which there is only one in DVB)
2905  if (get_bits(gb, 3)) { // numLayer
2906  avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple layers");
2907  return AVERROR_PATCHWELCOME;
2908  }
2909 
2910  // for all but first stream: use_same_config = get_bits(gb, 1);
2911  if (!audio_mux_version) {
2912  if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0)
2913  return ret;
2914  } else {
2915  int ascLen = latm_get_value(gb);
2916  if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0)
2917  return ret;
2918  ascLen -= ret;
2919  skip_bits_long(gb, ascLen);
2920  }
2921 
2922  latmctx->frame_length_type = get_bits(gb, 3);
2923  switch (latmctx->frame_length_type) {
2924  case 0:
2925  skip_bits(gb, 8); // latmBufferFullness
2926  break;
2927  case 1:
2928  latmctx->frame_length = get_bits(gb, 9);
2929  break;
2930  case 3:
2931  case 4:
2932  case 5:
2933  skip_bits(gb, 6); // CELP frame length table index
2934  break;
2935  case 6:
2936  case 7:
2937  skip_bits(gb, 1); // HVXC frame length table index
2938  break;
2939  }
2940 
2941  if (get_bits(gb, 1)) { // other data
2942  if (audio_mux_version) {
2943  latm_get_value(gb); // other_data_bits
2944  } else {
2945  int esc;
2946  do {
2947  esc = get_bits(gb, 1);
2948  skip_bits(gb, 8);
2949  } while (esc);
2950  }
2951  }
2952 
2953  if (get_bits(gb, 1)) // crc present
2954  skip_bits(gb, 8); // config_crc
2955  }
2956 
2957  return 0;
2958 }
2959 
2961 {
2962  uint8_t tmp;
2963 
2964  if (ctx->frame_length_type == 0) {
2965  int mux_slot_length = 0;
2966  do {
2967  tmp = get_bits(gb, 8);
2968  mux_slot_length += tmp;
2969  } while (tmp == 255);
2970  return mux_slot_length;
2971  } else if (ctx->frame_length_type == 1) {
2972  return ctx->frame_length;
2973  } else if (ctx->frame_length_type == 3 ||
2974  ctx->frame_length_type == 5 ||
2975  ctx->frame_length_type == 7) {
2976  skip_bits(gb, 2); // mux_slot_length_coded
2977  }
2978  return 0;
2979 }
2980 
2981 static int read_audio_mux_element(struct LATMContext *latmctx,
2982  GetBitContext *gb)
2983 {
2984  int err;
2985  uint8_t use_same_mux = get_bits(gb, 1);
2986  if (!use_same_mux) {
2987  if ((err = read_stream_mux_config(latmctx, gb)) < 0)
2988  return err;
2989  } else if (!latmctx->aac_ctx.avctx->extradata) {
2990  av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
2991  "no decoder config found\n");
2992  return AVERROR(EAGAIN);
2993  }
2994  if (latmctx->audio_mux_version_A == 0) {
2995  int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
2996  if (mux_slot_length_bytes * 8 > get_bits_left(gb)) {
2997  av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
2998  return AVERROR_INVALIDDATA;
2999  } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
3000  av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
3001  "frame length mismatch %d << %d\n",
3002  mux_slot_length_bytes * 8, get_bits_left(gb));
3003  return AVERROR_INVALIDDATA;
3004  }
3005  }
3006  return 0;
3007 }
3008 
3009 
3010 static int latm_decode_frame(AVCodecContext *avctx, void *out,
3011  int *got_frame_ptr, AVPacket *avpkt)
3012 {
3013  struct LATMContext *latmctx = avctx->priv_data;
3014  int muxlength, err;
3015  GetBitContext gb;
3016 
3017  if ((err = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0)
3018  return err;
3019 
3020  // check for LOAS sync word
3021  if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
3022  return AVERROR_INVALIDDATA;
3023 
3024  muxlength = get_bits(&gb, 13) + 3;
3025  // not enough data, the parser should have sorted this out
3026  if (muxlength > avpkt->size)
3027  return AVERROR_INVALIDDATA;
3028 
3029  if ((err = read_audio_mux_element(latmctx, &gb)) < 0)
3030  return err;
3031 
3032  if (!latmctx->initialized) {
3033  if (!avctx->extradata) {
3034  *got_frame_ptr = 0;
3035  return avpkt->size;
3036  } else {
3038  if ((err = decode_audio_specific_config(
3039  &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
3040  avctx->extradata, avctx->extradata_size*8, 1)) < 0) {
3041  pop_output_configuration(&latmctx->aac_ctx);
3042  return err;
3043  }
3044  latmctx->initialized = 1;
3045  }
3046  }
3047 
3048  if (show_bits(&gb, 12) == 0xfff) {
3049  av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
3050  "ADTS header detected, probably as result of configuration "
3051  "misparsing\n");
3052  return AVERROR_INVALIDDATA;
3053  }
3054 
3055  if ((err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb, avpkt)) < 0)
3056  return err;
3057 
3058  return muxlength;
3059 }
3060 
3062 {
3063  struct LATMContext *latmctx = avctx->priv_data;
3064  int ret = aac_decode_init(avctx);
3065 
3066  if (avctx->extradata_size > 0)
3067  latmctx->initialized = !ret;
3068 
3069  return ret;
3070 }
3071 
3072 static void aacdec_init(AACContext *c)
3073 {
3075  c->apply_ltp = apply_ltp;
3076  c->apply_tns = apply_tns;
3078  c->update_ltp = update_ltp;
3079 
3080  if(ARCH_MIPS)
3082 }
3083 /**
3084  * AVOptions for Japanese DTV specific extensions (ADTS only)
3085  */
3086 #define AACDEC_FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
3087 static const AVOption options[] = {
3088  {"dual_mono_mode", "Select the channel to decode for dual mono",
3089  offsetof(AACContext, force_dmono_mode), AV_OPT_TYPE_INT, {.i64=-1}, -1, 2,
3090  AACDEC_FLAGS, "dual_mono_mode"},
3091 
3092  {"auto", "autoselection", 0, AV_OPT_TYPE_CONST, {.i64=-1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
3093  {"main", "Select Main/Left channel", 0, AV_OPT_TYPE_CONST, {.i64= 1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
3094  {"sub" , "Select Sub/Right channel", 0, AV_OPT_TYPE_CONST, {.i64= 2}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
3095  {"both", "Select both channels", 0, AV_OPT_TYPE_CONST, {.i64= 0}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
3096 
3097  {NULL},
3098 };
3099 
3100 static const AVClass aac_decoder_class = {
3101  .class_name = "AAC decoder",
3102  .item_name = av_default_item_name,
3103  .option = options,
3104  .version = LIBAVUTIL_VERSION_INT,
3105 };
3106 
3108  .name = "aac",
3109  .type = AVMEDIA_TYPE_AUDIO,
3110  .id = AV_CODEC_ID_AAC,
3111  .priv_data_size = sizeof(AACContext),
3112  .init = aac_decode_init,
3115  .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
3116  .sample_fmts = (const enum AVSampleFormat[]) {
3118  },
3119  .capabilities = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1,
3120  .channel_layouts = aac_channel_layout,
3121  .flush = flush,
3122  .priv_class = &aac_decoder_class,
3123 };
3124 
3125 /*
3126  Note: This decoder filter is intended to decode LATM streams transferred
3127  in MPEG transport streams which only contain one program.
3128  To do a more complex LATM demuxing a separate LATM demuxer should be used.
3129 */
3131  .name = "aac_latm",
3132  .type = AVMEDIA_TYPE_AUDIO,
3133  .id = AV_CODEC_ID_AAC_LATM,
3134  .priv_data_size = sizeof(struct LATMContext),
3135  .init = latm_decode_init,
3136  .close = aac_decode_close,
3137  .decode = latm_decode_frame,
3138  .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
3139  .sample_fmts = (const enum AVSampleFormat[]) {
3141  },
3142  .capabilities = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1,
3143  .channel_layouts = aac_channel_layout,
3144  .flush = flush,
3145 };