[FFmpeg-devel] AAC decoder round 5

Michael Niedermayer michaelni
Wed Aug 6 21:13:02 CEST 2008


On Wed, Aug 06, 2008 at 12:32:32AM +0100, Robert Swain wrote:
> $subj
> 
> Best regards,
> Rob

> Index: Changelog
> ===================================================================
> --- Changelog	(revision 14623)
> +++ Changelog	(working copy)
> @@ -128,6 +128,7 @@
>  - Motion Pixels MVI demuxer
>  - removed animated GIF decoder/demuxer
>  - D-Cinema audio muxer
> +- AAC decoder
>  
>  version 0.4.9-pre1:
>  

ok

[...]

> Index: libavcodec/aactab.c
> ===================================================================
> --- libavcodec/aactab.c	(revision 14625)
> +++ libavcodec/aactab.c	(working copy)
> @@ -27,10 +27,24 @@
>   * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
>   */
>  
> +#include "libavutil/mem.h"
>  #include "aac.h"
>  
>  #include <stdint.h>
>  

ok


> +DECLARE_ALIGNED(16, float,  ff_aac_kbd_long_1024[1024]);
> +DECLARE_ALIGNED(16, float,  ff_aac_kbd_short_128[128]);
> +DECLARE_ALIGNED(16, float, ff_aac_sine_long_1024[1024]);
> +DECLARE_ALIGNED(16, float, ff_aac_sine_short_128[128]);
> +
> +const uint8_t ff_aac_num_swb_1024[] = {
> +    41, 41, 47, 49, 49, 51, 47, 47, 43, 43, 43, 40
> +};
> +
> +const uint8_t ff_aac_num_swb_128[] = {
> +    12, 12, 12, 14, 14, 14, 15, 15, 15, 15, 15, 15
> +};
> +
>  const uint32_t ff_aac_scalefactor_code[121] = {
>      0x3ffe8, 0x3ffe6, 0x3ffe7, 0x3ffe5, 0x7fff5, 0x7fff1, 0x7ffed, 0x7fff6,
>      0x7ffee, 0x7ffef, 0x7fff0, 0x7fffc, 0x7fffd, 0x7ffff, 0x7fffe, 0x7fff7,


> @@ -795,4 +809,90 @@
>       4064.0312908,  4074.6805676,  4085.3368071,  4096.0000000,
>  };
>  
> +/* [ 0, 255] scale factor decoding when using C dsp.float_to_int16
> + * [60, 315] scale factor decoding when using SIMD dsp.float_to_int16
> + * [45, 300] intensity stereo position decoding mapped in reverse order i.e. 0->300, 1->299, ..., 254->46, 255->45
> + */

not doxygen compat, no description of what the table actually contains.


> +const float ff_aac_pow2sf_tab[316] = {
> +    8.88178420e-16, 1.05622810e-15, 1.25607397e-15, 1.49373210e-15,
> +    1.77635684e-15, 2.11245619e-15, 2.51214793e-15, 2.98746420e-15,
> +    3.55271368e-15, 4.22491238e-15, 5.02429587e-15, 5.97492839e-15,
> +    7.10542736e-15, 8.44982477e-15, 1.00485917e-14, 1.19498568e-14,
> +    1.42108547e-14, 1.68996495e-14, 2.00971835e-14, 2.38997136e-14,
> +    2.84217094e-14, 3.37992991e-14, 4.01943669e-14, 4.77994272e-14,
> +    5.68434189e-14, 6.75985982e-14, 8.03887339e-14, 9.55988543e-14,
> +    1.13686838e-13, 1.35197196e-13, 1.60777468e-13, 1.91197709e-13,
> +    2.27373675e-13, 2.70394393e-13, 3.21554936e-13, 3.82395417e-13,
> +    4.54747351e-13, 5.40788785e-13, 6.43109871e-13, 7.64790834e-13,
> +    9.09494702e-13, 1.08157757e-12, 1.28621974e-12, 1.52958167e-12,
> +    1.81898940e-12, 2.16315514e-12, 2.57243948e-12, 3.05916334e-12,
> +    3.63797881e-12, 4.32631028e-12, 5.14487897e-12, 6.11832668e-12,
> +    7.27595761e-12, 8.65262056e-12, 1.02897579e-11, 1.22366534e-11,
> +    1.45519152e-11, 1.73052411e-11, 2.05795159e-11, 2.44733067e-11,
> +    2.91038305e-11, 3.46104823e-11, 4.11590317e-11, 4.89466134e-11,
> +    5.82076609e-11, 6.92209645e-11, 8.23180635e-11, 9.78932268e-11,
> +    1.16415322e-10, 1.38441929e-10, 1.64636127e-10, 1.95786454e-10,
> +    2.32830644e-10, 2.76883858e-10, 3.29272254e-10, 3.91572907e-10,
> +    4.65661287e-10, 5.53767716e-10, 6.58544508e-10, 7.83145814e-10,
> +    9.31322575e-10, 1.10753543e-09, 1.31708902e-09, 1.56629163e-09,
> +    1.86264515e-09, 2.21507086e-09, 2.63417803e-09, 3.13258326e-09,
> +    3.72529030e-09, 4.43014173e-09, 5.26835606e-09, 6.26516652e-09,
> +    7.45058060e-09, 8.86028346e-09, 1.05367121e-08, 1.25303330e-08,
> +    1.49011612e-08, 1.77205669e-08, 2.10734243e-08, 2.50606661e-08,
> +    2.98023224e-08, 3.54411338e-08, 4.21468485e-08, 5.01213321e-08,
> +    5.96046448e-08, 7.08822677e-08, 8.42936970e-08, 1.00242664e-07,
> +    1.19209290e-07, 1.41764535e-07, 1.68587394e-07, 2.00485328e-07,
> +    2.38418579e-07, 2.83529071e-07, 3.37174788e-07, 4.00970657e-07,
> +    4.76837158e-07, 5.67058141e-07, 6.74349576e-07, 8.01941314e-07,
> +    9.53674316e-07, 1.13411628e-06, 1.34869915e-06, 1.60388263e-06,
> +    1.90734863e-06, 2.26823256e-06, 2.69739830e-06, 3.20776526e-06,
> +    3.81469727e-06, 4.53646513e-06, 5.39479661e-06, 6.41553051e-06,
> +    7.62939453e-06, 9.07293026e-06, 1.07895932e-05, 1.28310610e-05,
> +    1.52587891e-05, 1.81458605e-05, 2.15791864e-05, 2.56621220e-05,
> +    3.05175781e-05, 3.62917210e-05, 4.31583729e-05, 5.13242441e-05,
> +    6.10351562e-05, 7.25834421e-05, 8.63167458e-05, 1.02648488e-04,
> +    1.22070312e-04, 1.45166884e-04, 1.72633492e-04, 2.05296976e-04,
> +    2.44140625e-04, 2.90333768e-04, 3.45266983e-04, 4.10593953e-04,
> +    4.88281250e-04, 5.80667537e-04, 6.90533966e-04, 8.21187906e-04,
> +    9.76562500e-04, 1.16133507e-03, 1.38106793e-03, 1.64237581e-03,
> +    1.95312500e-03, 2.32267015e-03, 2.76213586e-03, 3.28475162e-03,
> +    3.90625000e-03, 4.64534029e-03, 5.52427173e-03, 6.56950324e-03,
> +    7.81250000e-03, 9.29068059e-03, 1.10485435e-02, 1.31390065e-02,
> +    1.56250000e-02, 1.85813612e-02, 2.20970869e-02, 2.62780130e-02,
> +    3.12500000e-02, 3.71627223e-02, 4.41941738e-02, 5.25560260e-02,
> +    6.25000000e-02, 7.43254447e-02, 8.83883476e-02, 1.05112052e-01,
> +    1.25000000e-01, 1.48650889e-01, 1.76776695e-01, 2.10224104e-01,
> +    2.50000000e-01, 2.97301779e-01, 3.53553391e-01, 4.20448208e-01,
> +    5.00000000e-01, 5.94603558e-01, 7.07106781e-01, 8.40896415e-01,
> +    1.00000000e+00, 1.18920712e+00, 1.41421356e+00, 1.68179283e+00,
> +    2.00000000e+00, 2.37841423e+00, 2.82842712e+00, 3.36358566e+00,
> +    4.00000000e+00, 4.75682846e+00, 5.65685425e+00, 6.72717132e+00,
> +    8.00000000e+00, 9.51365692e+00, 1.13137085e+01, 1.34543426e+01,
> +    1.60000000e+01, 1.90273138e+01, 2.26274170e+01, 2.69086853e+01,
> +    3.20000000e+01, 3.80546277e+01, 4.52548340e+01, 5.38173706e+01,
> +    6.40000000e+01, 7.61092554e+01, 9.05096680e+01, 1.07634741e+02,
> +    1.28000000e+02, 1.52218511e+02, 1.81019336e+02, 2.15269482e+02,
> +    2.56000000e+02, 3.04437021e+02, 3.62038672e+02, 4.30538965e+02,
> +    5.12000000e+02, 6.08874043e+02, 7.24077344e+02, 8.61077929e+02,
> +    1.02400000e+03, 1.21774809e+03, 1.44815469e+03, 1.72215586e+03,
> +    2.04800000e+03, 2.43549617e+03, 2.89630938e+03, 3.44431172e+03,
> +    4.09600000e+03, 4.87099234e+03, 5.79261875e+03, 6.88862343e+03,
> +    8.19200000e+03, 9.74198469e+03, 1.15852375e+04, 1.37772469e+04,
> +    1.63840000e+04, 1.94839694e+04, 2.31704750e+04, 2.75544937e+04,
> +    3.27680000e+04, 3.89679387e+04, 4.63409500e+04, 5.51089875e+04,
> +    6.55360000e+04, 7.79358775e+04, 9.26819000e+04, 1.10217975e+05,
> +    1.31072000e+05, 1.55871755e+05, 1.85363800e+05, 2.20435950e+05,
> +    2.62144000e+05, 3.11743510e+05, 3.70727600e+05, 4.40871900e+05,
> +    5.24288000e+05, 6.23487020e+05, 7.41455200e+05, 8.81743800e+05,
> +    1.04857600e+06, 1.24697404e+06, 1.48291040e+06, 1.76348760e+06,
> +    2.09715200e+06, 2.49394808e+06, 2.96582080e+06, 3.52697520e+06,
> +    4.19430400e+06, 4.98789616e+06, 5.93164160e+06, 7.05395040e+06,
> +    8.38860800e+06, 9.97579232e+06, 1.18632832e+07, 1.41079008e+07,
> +    1.67772160e+07, 1.99515846e+07, 2.37265664e+07, 2.82158016e+07,
> +    3.35544320e+07, 3.99031693e+07, 4.74531328e+07, 5.64316032e+07,
> +    6.71088640e+07, 7.98063385e+07, 9.49062656e+07, 1.12863206e+08,
> +    1.34217728e+08, 1.59612677e+08, 1.89812531e+08, 2.25726413e+08,
> +    2.68435456e+08, 3.19225354e+08, 3.79625062e+08, 4.51452825e+08,
> +};
> +
>  #endif /* CONFIG_HARDCODED_TABLES */

ok


[...]

> @@ -45,6 +54,7 @@
>  
>  #ifdef CONFIG_HARDCODED_TABLES
>  extern const float ff_aac_ivquant_tab[IVQUANT_SIZE];
> +extern const float  ff_aac_pow2sf_tab[316];
>  #endif /* CONFIG_HARDCODED_TABLES */
>  
>  #endif /* FFMPEG_AACTAB_H */

ok, with a space less


> Index: libavcodec/aac.c
> ===================================================================
> --- libavcodec/aac.c	(revision 14626)
> +++ libavcodec/aac.c	(working copy)

[...]
>  
>  #include "aac.h"
>  #include "aactab.h"
> +#include "aacdectab.h"
>  #include "mpeg4audio.h"
>  
>  #include <assert.h>
> @@ -91,12 +93,165 @@
>  
>  #ifndef CONFIG_HARDCODED_TABLES
>      static float ff_aac_ivquant_tab[IVQUANT_SIZE];
> +    static float ff_aac_pow2sf_tab[316];
>  #endif /* CONFIG_HARDCODED_TABLES */
>  
>  static VLC vlc_scalefactors;
>  static VLC vlc_spectral[11];

ok


[...]
> +/**
> + * Configure output channel order and optional mixing based on the current
> + * program configuration element and user requested channels.
> + *
> + * \param newpcs New program configuration struct - we only do something if it differs from the current one.
> + */
> +static int output_configure(AACContext *ac, ProgramConfig *pcs, ProgramConfig *newpcs) {
> +    AVCodecContext *avctx = ac->avccontext;
> +    int i, j, channels = 0;
> +    float a, b;
> +    ChannelElement *mixdown[3] = { NULL, NULL, NULL };
> +
> +    static const float mixdowncoeff[4] = {
> +        /* matrix mix-down coefficient, table 4.70 */
> +        1. / M_SQRT2,
> +        1. / 2.,
> +        1. / (2 * M_SQRT2),
> +        0
> +    };
> +
> +    if(!memcmp(pcs, newpcs, sizeof(ProgramConfig)))
> +        return 0; /* no change */
> +
> +    *pcs = *newpcs;
> +
> +    /* Allocate or free elements depending on if they are in the
> +     * current program configuration struct.
> +     *
> +     * Set up default 1:1 output mapping.
> +     *
> +     * For a 5.1 stream the output order will be:
> +     *    [ Front Left ] [ Front Right ] [ Center ] [ LFE ] [ Surround Left ] [ Surround Right ]
> +     *
> +     * Locate front, center and back channels for later matrix mix-down.
> +     */
> +
> +    for(i = 0; i < MAX_TAGID; i++) {
> +        for(j = 0; j < 4; j++) {
> +            if(pcs->che_type[j][i]) {
> +                if(!ac->che[j][i] && !(ac->che[j][i] = av_mallocz(sizeof(ChannelElement))))
> +                    return AVERROR(ENOMEM);
> +                if(j != ID_CCE) {
> +                    ac->output_data[channels++] = ac->che[j][i]->ch[0].ret;
> +                    ac->che[j][i]->ch[0].mixing_gain = 1.0f;
> +                    if(j == ID_CPE) {
> +                        ac->output_data[channels++] = ac->che[j][i]->ch[1].ret;
> +                        ac->che[j][i]->ch[1].mixing_gain = 1.0f;
> +                        if(!mixdown[MIXDOWN_FRONT] && pcs->che_type[j][i] == AAC_CHANNEL_FRONT)
> +                            mixdown[MIXDOWN_FRONT] = ac->che[j][i];
> +                        if(!mixdown[MIXDOWN_BACK ] && pcs->che_type[j][i] == AAC_CHANNEL_BACK)
> +                            mixdown[MIXDOWN_BACK ] = ac->che[j][i];
> +                    }
> +                    if(j == ID_SCE && !mixdown[MIXDOWN_CENTER] && pcs->che_type[j][i] == AAC_CHANNEL_FRONT)
> +                        mixdown[MIXDOWN_CENTER] = ac->che[j][i];
> +                }
> +            } else
> +                av_freep(&ac->che[j][i]);
> +        }
> +    }
> +
> +    // allocate appropriately aligned buffer for interleaved output
> +    if(channels > avctx->channels)
> +        av_freep(&ac->interleaved_output);
> +    if(!ac->interleaved_output && !(ac->interleaved_output = av_malloc(channels * 1024 * sizeof(float))))
> +        return AVERROR(ENOMEM);
> +
> +    ac->mm[MIXDOWN_FRONT] = ac->mm[MIXDOWN_BACK] = ac->mm[MIXDOWN_CENTER] = NULL;
> +
> +    /* Check for matrix mix-down to mono or stereo. */
> +
> +    if(avctx->request_channels && avctx->request_channels <= 2 &&
> +       avctx->request_channels != channels) {
> +
> +        if((avctx->request_channels == 1 && pcs->mono_mixdown_tag   != -1) ||
> +           (avctx->request_channels == 2 && pcs->stereo_mixdown_tag != -1)) {
> +            /* Add support for this as soon as we get a sample so we can figure out
> +               exactly how this is supposed to work. */
> +            av_log(avctx, AV_LOG_ERROR,
> +                   "Mix-down using pre-mixed elements is not supported, please file a bug. "
> +                   "Reverting to matrix mix-down.\n");
> +        }
> +
> +        /* We need 'center + L + R + sL + sR' for matrix mix-down. */
> +        if(mixdown[MIXDOWN_CENTER] && mixdown[MIXDOWN_FRONT] && mixdown[MIXDOWN_BACK]) {

> +            a = mixdowncoeff[pcs->mixdown_coeff_index];
> +
> +            if(avctx->request_channels == 2) {
> +                b = 1. / (1. + (1. / M_SQRT2) + a * (pcs->pseudo_surround ? 2. : 1.));
> +                mixdown[MIXDOWN_CENTER]->ch[0].mixing_gain      = b / M_SQRT2;
> +            } else {
> +                b = 1. / (3. + 2. * a);
> +                mixdown[MIXDOWN_CENTER]->ch[0].mixing_gain      = b;
> +            }
> +            mixdown[MIXDOWN_FRONT]->ch[0].mixing_gain = b;
> +            mixdown[MIXDOWN_FRONT]->ch[1].mixing_gain = b;
> +            mixdown[MIXDOWN_BACK ]->ch[0].mixing_gain = b * a;
> +            mixdown[MIXDOWN_BACK ]->ch[1].mixing_gain = b * a;

this is totally obfuscated
at least rename b to something that has something to do with normalization
factor.
And while it is written in the spec like above, i do not think it is correct.
As it does not seem volume is kept constant in the convertion. Also AC3 has to
do downmix too and the matrix init could be shared.

also mixing_gain and global_gain could maybe be merged if the volume is
slightly changed ... and as it does not seem to be correct as is anyway
this might be worth a try ...


[...]
> @@ -125,13 +280,149 @@
>  
>      /* comment field, first byte is length */
>      skip_bits_long(gb, 8 * get_bits(gb, 8));
> +    return 0;
> +}

ok

>  

> +/**
> + * Set up ProgramConfig, but based on a default channel configuration
> + * as specified in table 1.17.
> + */

> +static int program_config_element_default(AACContext *ac, ProgramConfig *newpcs, int channels)

set_pce_to_defaults() ?


> +{
> +    /* Pre-mixed down-mix outputs are not available. */
> +    newpcs->mono_mixdown_tag   = -1;
> +    newpcs->stereo_mixdown_tag = -1;
> +
> +    if(channels < 1 || channels > 7) {
> +        av_log(ac->avccontext, AV_LOG_ERROR, "invalid default channel configuration (%d channels)\n",
> +               channels);
> +        return -1;
> +    }
> +
> +    /* default channel configurations:
> +     *
> +     * 1ch : front center (mono)
> +     * 2ch : L + R (stereo)
> +     * 3ch : front center + L + R
> +     * 4ch : front center + L + R + back center
> +     * 5ch : front center + L + R + back stereo
> +     * 6ch : front center + L + R + back stereo + LFE

> +     * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE

its still 8, so channels/num_channels is no longer an appropriate name for
the vars used as its not 7 channels.


[...]
> +/**
> + * Parse GA "General Audio" specific configuration; reference: table 4.1.
> + */
> +static int ga_specific_config(AACContext * ac, GetBitContext * gb, int channels) {

read/parse/decode_...


[...]

> +/**
> + * Parse audio specific configuration; reference: table 1.13.
> + *
> + * @param   data        pointer to AVCodecContext extradata
> + * @param   data_size   size of AVCCodecContext extradata
> + * @return  Returns error status. 0 - OK, !0 - error
> + */

> +static int audio_specific_config(AACContext * ac, void *data, int data_size) {

decode/parse/read_... ?


[...]
>  static av_cold int aac_decode_init(AVCodecContext * avccontext) {
>      AACContext * ac = avccontext->priv_data;
>      int i;
>  
>      ac->avccontext = avccontext;
>  
> +    if (avccontext->extradata_size <= 0 ||

ok

> +        audio_specific_config(ac, avccontext->extradata, avccontext->extradata_size))


> +        return -1;
> +
>      avccontext->sample_rate = ac->m4ac.sample_rate;
>      avccontext->frame_size  = 1024;
>  

ok


[...]
> @@ -166,6 +459,8 @@
>  #ifndef CONFIG_HARDCODED_TABLES
>      for (i = 1 - IVQUANT_SIZE/2; i < IVQUANT_SIZE/2; i++)
>          ff_aac_ivquant_tab[i + IVQUANT_SIZE/2 - 1] =  cbrt(fabs(i)) * i;
> +    for (i = 0; i < 316; i++)
> +        ff_aac_pow2sf_tab[i] = pow(2, (i - 200)/4.);
>  #endif /* CONFIG_HARDCODED_TABLES */
>  
>      INIT_VLC_STATIC(&vlc_scalefactors, 7, sizeof(ff_aac_scalefactor_code)/sizeof(ff_aac_scalefactor_code[0]),

ok


[...]
>  }
>  
> +/**
> + * Decode a data_stream_element; reference: table 4.10.

decode? wasnt it skip?


> + */
> +static void skip_data_stream_element(GetBitContext * gb) {
>      int byte_align = get_bits1(gb);
>      int count = get_bits(gb, 8);
>      if (count == 255)


[...]

> @@ -200,6 +565,258 @@
>          return cbrtf(fabsf(a)) * a;
>  }
>  
> +/**
> + * Decode section_data payload; reference: table 4.46.
> + *
> + * @param   band_type           array of the used band type
> + * @param   band_type_run_end   array of the last scalefactor band of a band type run
> + *
> + * The band_type* arrays have indices [window group][scalefactor band].
> + * @return  Returns error status. 0 - OK, !0 - error
> + */
> +static int decode_band_types(AACContext * ac, enum BandType band_type[][64],

section data or band types ?


> +        int band_type_run_end[][64], GetBitContext * gb, IndividualChannelStream * ics) {
> +    int g;
> +    const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
> +    for (g = 0; g < ics->num_window_groups; g++) {
> +        int k = 0;
> +        while (k < ics->max_sfb) {
> +            uint8_t sect_len = k;
> +            int sect_len_incr;
> +            int sect_band_type = get_bits(gb, 4);
> +            if (sect_band_type == 12) {
> +                av_log(ac->avccontext, AV_LOG_ERROR, "invalid band type\n");
> +                return -1;
> +            }
> +            while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits)-1)
> +                sect_len += sect_len_incr;
> +            sect_len += sect_len_incr;
> +            if (sect_len > ics->max_sfb) {
> +                av_log(ac->avccontext, AV_LOG_ERROR,
> +                    "Number of bands (%d) exceeds limit (%d).\n",
> +                    sect_len, ics->max_sfb);
> +                return -1;
> +            }

ok


> +            for (; k < sect_len; k++) {
> +                band_type        [g][k] = sect_band_type;
> +                band_type_run_end[g][k] = sect_len;
> +            }
> +        }
> +    }
> +    return 0;
> +}
> +

> +/**
> + * Decode scale_factor_data; reference: table 4.47.

decode scale factors


> + *
> + * @param   mix_gain            channel gain (Not used by AAC bitstream.)
> + * @param   global_gain         first scalefactor value as scalefactors are differentially coded
> + * @param   band_type           array of the used band type
> + * @param   band_type_run_end   array of the last scalefactor band of a band type run
> + * @param   sf                  array of scalefactors or intensity stereo positions
> + *
> + * The band_type* and sf arrays have indices [window group][scalefactor band].
> + * @return  Returns error status. 0 - OK, !0 - error
> + */
> +static int decode_scalefactors(AACContext * ac, float sf[][64], GetBitContext * gb,
> +        float mix_gain, unsigned int global_gain, IndividualChannelStream * ics,
> +        enum BandType band_type[][64], int band_type_run_end[][64]) {
> +    const int sf_offset = ac->sf_offset + (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? 12 : 0);
> +    int g, i;
> +    int offset[3] = { global_gain, global_gain - 90, 100 };
> +    int noise_flag = 1;
> +    static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
> +    ics->intensity_present = 0;
> +    for (g = 0; g < ics->num_window_groups; g++) {
> +        for (i = 0; i < ics->max_sfb;) {
> +            int run_end = band_type_run_end[g][i];
> +            if (band_type[g][i] == ZERO_BT) {
> +                for(; i < run_end; i++)
> +                    sf[g][i] = 0.;
> +            }else if((band_type[g][i] == INTENSITY_BT) || (band_type[g][i] == INTENSITY_BT2)) {
> +                ics->intensity_present = 1;
> +                for(; i < run_end; i++) {
> +                    offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
> +                    if(offset[2] > 255U) {
> +                        av_log(ac->avccontext, AV_LOG_ERROR,
> +                            "%s (%d) out of range.\n", sf_str[2], offset[2]);
> +                        return -1;
> +                    }
> +                    sf[g][i] =  ff_aac_pow2sf_tab[-offset[2] + 300];
> +                    sf[g][i] *= mix_gain;
> +                }
> +            }else if(band_type[g][i] == NOISE_BT) {
> +                for(; i < run_end; i++) {
> +                    if(noise_flag-- > 0)
> +                        offset[1] += get_bits(gb, 9) - 256;
> +                    else
> +                        offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
> +                    if(offset[1] > 255U) {
> +                        av_log(ac->avccontext, AV_LOG_ERROR,
> +                            "%s (%d) out of range.\n", sf_str[1], offset[1]);
> +                        return -1;
> +                    }
> +                    sf[g][i] = -ff_aac_pow2sf_tab[ offset[1] + sf_offset];
> +                    sf[g][i] *= mix_gain;
> +                }
> +            }else {
> +                for(; i < run_end; i++) {
> +                    offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
> +                    if(offset[0] > 255U) {
> +                        av_log(ac->avccontext, AV_LOG_ERROR,
> +                            "%s (%d) out of range.\n", sf_str[0], offset[0]);
> +                        return -1;
> +                    }
> +                    sf[g][i] = -ff_aac_pow2sf_tab[ offset[0] + sf_offset];
> +                    sf[g][i] *= mix_gain;
> +                }
> +            }
> +        }
> +    }
> +    return 0;
> +}

ok


> +
> +/**
> + * Decode pulse data; reference: table 4.7.
> + */
> +static void decode_pulses(Pulse * pulse, GetBitContext * gb) {
> +    int i;
> +    pulse->num_pulse = get_bits(gb, 2) + 1;
> +    pulse->start = get_bits(gb, 6);
> +    for (i = 0; i < pulse->num_pulse; i++) {
> +        pulse->offset[i] = get_bits(gb, 5);
> +        pulse->amp   [i] = get_bits(gb, 4);
> +    }
> +}

ok


[...]
> +
> +/**
> + * Add pulses with particular amplitudes to the quantized spectral data; reference: 4.6.3.3.
> + *
>   * @param   pulse   pointer to pulse data struct
>   * @param   icoef   array of quantized spectral data
>   */

ok


> @@ -213,6 +830,764 @@
>      }
>  }
>  

> +/**
> + * Dequantize and scale spectral data; reference: 4.6.3.3.
> + *
> + * @param   icoef       array of quantized spectral data
> + * @param   band_type   array of the used band type
> + * @param   sf          array of scalefactors or intensity stereo positions
> + * @param   coef        array of dequantized, scaled spectral data

why are the coeffs not dequantized in place, that is coef==icoef ?


[...]
> +/**
> + * Decode an individual_channel_stream payload; reference: table 4.44.
> + *
> + * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
> + * @param   scale_flag      scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
> + * @return  Returns error status. 0 - OK, !0 - error
> + */
> +static int decode_ics(AACContext * ac, SingleChannelElement * sce, GetBitContext * gb, int common_window, int scale_flag) {
> +    int icoeffs[1024];
> +    Pulse pulse;
> +    TemporalNoiseShaping * tns = &sce->tns;
> +    IndividualChannelStream * ics = &sce->ics;
> +    float * out = sce->coeffs;
> +    int global_gain, pulse_present = 0;
> +

> +    pulse.num_pulse = 0;
> +    pulse.start     = 0;

what effect does start have when num_pulse=0 ?


[...]
> +/**
> + * intensity stereo decoding; reference: 4.6.8.2.3
> + */
> +static void apply_intensity_stereo(ChannelElement * cpe) {
> +    const IndividualChannelStream * ics = &cpe->ch[1].ics;
> +    SingleChannelElement * sce1 = &cpe->ch[1];
> +    float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
> +    const uint16_t * offsets = ics->swb_offset;
> +    int g, gp, i, k;
> +    int c;
> +    float scale;
> +    for (g = 0; g < ics->num_window_groups; g++) {
> +        for (gp = 0; gp < ics->group_len[g]; gp++) {
> +            for (i = 0; i < ics->max_sfb;) {
> +                if (sce1->band_type[g][i] == INTENSITY_BT || sce1->band_type[g][i] == INTENSITY_BT2) {

should the gp loop maybe be inside the if() so its not redone ? 


[...]
> +/**
> + * Decode a channel_pair_element; reference: table 4.4.
> + *
> + * @param   tag Identifies the instance of a syntax element.
> + * @return  Returns error status. 0 - OK, !0 - error
> + */
> +static int decode_cpe(AACContext * ac, GetBitContext * gb, int tag) {
> +    int i, ret, common_window;
> +    ChannelElement * cpe;
> +
> +    cpe = ac->che[ID_CPE][tag];
> +    common_window = get_bits1(gb);
> +    if (common_window) {
> +        if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1))
> +            return -1;
> +        i = cpe->ch[1].ics.use_kb_window[0];
> +        cpe->ch[1].ics = cpe->ch[0].ics;
> +        cpe->ch[1].ics.use_kb_window[1] = i;
> +        if((cpe->ms.present = get_bits(gb, 2)))
> +            decode_mid_side_stereo(cpe, gb);
> +    } else {
> +        cpe->ms.present = 0;
> +    }
> +    if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
> +        return ret;
> +    if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
> +        return ret;
> +
> +    if (common_window && cpe->ms.present)
> +        apply_mid_side_stereo(cpe);
> +
> +    if (cpe->ch[1].ics.intensity_present)
> +        apply_intensity_stereo(cpe);

ms.present can be a local variable i think


[...]
> +/**
> + * Parse Spectral Band Replication extension data; reference: table 4.55.
> + *
> + * @param   crc flag indicating the presence of CRC checksum
> + * @param   cnt length of ID_FIL syntactic element in bytes
> + * @return  Returns number of bytes consumed from the ID_FIL element.
> + */
> +static int decode_sbr_extension(AACContext * ac, GetBitContext * gb, int crc, int cnt) {
> +    // TODO : sbr_extension implementation
> +    av_log(ac->avccontext, AV_LOG_DEBUG, "aac: SBR not yet supported.\n");
> +    skip_bits_long(gb, 8*cnt - 4); // -4 due to reading extension type
> +    return cnt;
> +}

ok


> +
> +/**
> + * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
> + *
> + * @return  Returns number of bytes consumed.
> + */
> +static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc, GetBitContext * gb) {
> +    int i;
> +    int n = 0;
> +    int num_excl_chan = 0;
> +
> +    do {
> +        for (i = 0; i < 7; i++)
> +            che_drc->exclude_mask[num_excl_chan + i] = get_bits1(gb);
> +        n++;
> +        num_excl_chan += 7;
> +    } while (num_excl_chan < MAX_CHANNELS - 7 && (che_drc->additional_excluded_chns[n-1] = get_bits1(gb)));
> +
> +    return n;
> +}

iam still not happy with this
If i understand correctly this is a mask of channels, it should need just
one array, not 2 (exclude_mask / additional_excluded_chns)


[...]
> +/**
> + * Parse extension data (incomplete); reference: table 4.51.
> + *
> + * @param   cnt length of ID_FIL syntactic element in bytes
> + */

> +static int extension_payload(AACContext * ac, GetBitContext * gb, int cnt) {

parse/decode/read...



> +    int crc_flag = 0;
> +    int res = cnt;
> +    switch (get_bits(gb, 4)) { // extension type
> +        case EXT_SBR_DATA_CRC:
> +            crc_flag++;
> +        case EXT_SBR_DATA:
> +            res = decode_sbr_extension(ac, gb, crc_flag, cnt);
> +            break;
> +        case EXT_DYNAMIC_RANGE:
> +            res = decode_dynamic_range(&ac->che_drc, gb, cnt);
> +            break;
> +        case EXT_FILL:
> +        case EXT_FILL_DATA:
> +        case EXT_DATA_ELEMENT:
> +        default:
> +            skip_bits_long(gb, 8*cnt - 4);
> +            break;
> +    };
> +    return res;
> +}

ok


[...]
> +/**
> + * Apply dependent channel coupling (applied before IMDCT).
> + *
> + * @param   index   index into coupling gain array
> + */
> +static void apply_dependent_coupling(AACContext * ac, SingleChannelElement * sce, ChannelElement * cc, int index) {
> +    IndividualChannelStream * ics = &cc->ch[0].ics;
> +    const uint16_t * offsets = ics->swb_offset;
> +    float * dest = sce->coeffs;
> +    const float * src = cc->ch[0].coeffs;
> +    int g, i, group, k;
> +    if(ac->m4ac.object_type == AOT_AAC_LTP) {
> +        av_log(ac->avccontext, AV_LOG_ERROR,
> +               "Dependent coupling is not supported together with LTP\n");
> +        return;
> +    }


> +    for (g = 0; g < ics->num_window_groups; g++) {
> +        for (i = 0; i < ics->max_sfb; i++) {
> +            if (cc->ch[0].band_type[g][i] != ZERO_BT) {
> +                float gain = cc->coup.gain[index][g][i] * sce->mixing_gain;
> +                for (group = 0; group < ics->group_len[g]; group++) {
> +                    for (k = offsets[i]; k < offsets[i+1]; k++) {
> +                        // XXX dsputil-ize
> +                        dest[group*128+k] += gain * src[group*128+k];
> +                    }
> +                }
> +            }
> +        }
> +        dest += ics->group_len[g]*128;
> +        src  += ics->group_len[g]*128;
> +    }
> +}

ok


> +
> +/**
> + * Apply independent channel coupling (applied after IMDCT).
> + *
> + * @param   index   index into coupling gain array
> + */
> +static void apply_independent_coupling(AACContext * ac, SingleChannelElement * sce, ChannelElement * cc, int index) {
> +    int i;
> +    float gain = cc->coup.gain[index][0][0] * sce->mixing_gain;
> +    for (i = 0; i < 1024; i++)
> +        sce->ret[i] += gain * (cc->ch[0].ret[i] - ac->add_bias);
> +}

ok


> +
> +/**
> + * channel coupling transformation interface
> + *
> + * @param   index   index into coupling gain array
> + * @param   apply_coupling_method   pointer to (in)dependent coupling function
> + */
> +static void apply_channel_coupling(AACContext * ac, ChannelElement * cc,
> +        void (*apply_coupling_method)(AACContext * ac, SingleChannelElement * sce, ChannelElement * cc, int index))
> +{
> +    int c;
> +    int index = 0;
> +    ChannelCoupling * coup = &cc->coup;
> +    for (c = 0; c <= coup->num_coupled; c++) {
> +        if (     !coup->is_cpe[c] && ac->che[ID_SCE][coup->tag_select[c]]) {
> +            apply_coupling_method(ac, &ac->che[ID_SCE][coup->tag_select[c]]->ch[0], cc, index++);
> +        } else if(coup->is_cpe[c] && ac->che[ID_CPE][coup->tag_select[c]]) {

> +            if (!coup->l[c] && !coup->r[c]) {
> +                apply_coupling_method(ac, &ac->che[ID_CPE][coup->tag_select[c]]->ch[0], cc, index);
> +                apply_coupling_method(ac, &ac->che[ID_CPE][coup->tag_select[c]]->ch[1], cc, index++);
> +            }

The struct docs say r/l are "apply gain to r/l channel" this contradicts the
0/0 case. which is correct and which is wrong?


[...]
> +/**
> + * Convert spectral data to float samples, applying all supported tools as appropriate.
> + */
> +static int spectral_to_sample(AACContext * ac) {
> +    int i, j;
> +    for (i = 0; i < MAX_TAGID; i++) {
> +        for(j = 0; j < 4; j++) {
> +            ChannelElement *che = ac->che[j][i];
> +            if(che) {
> +                if(j == ID_CCE && !che->coup.is_indep_coup && (che->coup.domain == 0))
> +                    apply_channel_coupling(ac, che, apply_dependent_coupling);
> +                if(che->ch[0].tns.present)
> +                    apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
> +                if(che->ch[1].tns.present)
> +                    apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
> +                if(j == ID_CCE && !che->coup.is_indep_coup && (che->coup.domain == 1))
> +                    apply_channel_coupling(ac, che, apply_dependent_coupling);
> +                imdct_and_windowing(ac, &che->ch[0]);
> +                if(j == ID_CPE)
> +                    imdct_and_windowing(ac, &che->ch[1]);
> +                if(j == ID_CCE && che->coup.is_indep_coup && (che->coup.domain == 1))
> +                    apply_channel_coupling(ac, che, apply_independent_coupling);
> +            }
> +        }
> +    }
> +    return 0;
> +}

this function can be void it always returns 0


> +
> +/**
> + * Conduct matrix mix-down and float to int16 conversion.
> + *
> + * @param   data        pointer to output data
> + * @param   data_size   output data size in bytes
> + * @return  Returns error status. 0 - OK, !0 - error
> + */
> +static int mixdown_and_convert_to_int16(AVCodecContext * avccontext, uint16_t * data, int * data_size) {
> +    AACContext * ac = avccontext->priv_data;
> +    int i;
> +    float *c, *l, *r, *sl, *sr, *out;
> +
> +    if (!ac->is_saved) {
> +        ac->is_saved = 1;
> +        *data_size = 0;
> +        return 0;
> +    }
> +
> +    i = 1024 * avccontext->channels * sizeof(int16_t);
> +    if(*data_size < i) {
> +        av_log(avccontext, AV_LOG_ERROR,
> +               "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
> +               *data_size, i);
> +        return -1;
> +    }
> +    *data_size = i;
> +
> +    if(ac->mm[MIXDOWN_CENTER]) {
> +        /* matrix mix-down */
> +        l   = ac->mm[MIXDOWN_FRONT ]->ch[0].ret;
> +        r   = ac->mm[MIXDOWN_FRONT ]->ch[1].ret;
> +        c   = ac->mm[MIXDOWN_CENTER]->ch[0].ret;
> +        sl  = ac->mm[MIXDOWN_BACK  ]->ch[0].ret;
> +        sr  = ac->mm[MIXDOWN_BACK  ]->ch[1].ret;
> +        out = ac->interleaved_output;
> +
> +        // XXX dsputil-ize
> +        if(avccontext->channels == 2) {
> +            if(ac->pcs.pseudo_surround) {
> +                for(i = 0; i < 1024; i++) {
> +                    *out++ = *l++ + *c   - *sl   - *sr   + ac->add_bias;
> +                    *out++ = *r++ + *c++ + *sl++ + *sr++ - ac->add_bias * 3;
> +                }
> +            } else {
> +                for(i = 0; i < 1024; i++) {
> +                    *out++ = *l++ + *c   + *sl++ - ac->add_bias * 2;
> +                    *out++ = *r++ + *c++ + *sr++ - ac->add_bias * 2;
> +                }
> +            }
> +
> +        } else {
> +            assert(avccontext->channels == 1);
> +            for(i = 0; i < 1024; i++) {
> +                *out++ = *l++ + *r++ + *c++ + *sl++ + *sr++ - ac->add_bias * 4;
> +            }
> +        }
> +
> +        ac->dsp.float_to_int16(data, ac->interleaved_output, 1024 * avccontext->channels);
> +    } else {
> +        ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, 1024, avccontext->channels);
> +    }
> +
> +    return 0;
> +}

mixdown should be done prior to the IMDCT when possible and the IMDCT skipped
for channels that are not needed, or _ALL_ mixdown code should be removed
from the AAC decoder, as in that case mixdown can be done outside of the
decoder equally well and cleaner.


> +
> +
> +static int aac_decode_frame(AVCodecContext * avccontext, void * data, int * data_size, const uint8_t * buf, int buf_size) {
> +    AACContext * ac = avccontext->priv_data;
> +    GetBitContext gb;
> +    enum RawDataBlockID id;
> +    int err, tag;
> +
> +    init_get_bits(&gb, buf, buf_size*8);
> +
> +    // parse
> +    while ((id = get_bits(&gb, 3)) != ID_END) {
> +        tag = get_bits(&gb, 4);
> +        err = -1;
> +        switch (id) {
> +

> +        case ID_SCE:
> +            if(!ac->che[ID_SCE][tag]) {

this id / tag naming confuses the hell out of me ...
Both values together identify the "thing". the id here is the
type (channel pair / single channel / ...)


[...]
> @@ -239,3 +1614,4 @@
>      aac_decode_frame,
>      .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
>  };
> +

ok


> Index: libavcodec/aac.h
> ===================================================================
> --- libavcodec/aac.h	(revision 14624)
> +++ libavcodec/aac.h	(working copy)
> @@ -42,8 +44,63 @@
>          ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
>          size);
>  
> +#define MAX_CHANNELS 64

ok as long as its not causing huge arrays, like if there was a use like
array[MAX_FRAME][MAX_TAGS][MAX_BANDS][MAX_CHANNELS] ...



[...]
> +enum AudioObjectType {
> +    AOT_NULL,
> +                               // Support?                Name
> +    AOT_AAC_MAIN,              ///< Y                       Main
> +    AOT_AAC_LC,                ///< Y                       Low Complexity
> +    AOT_AAC_SSR,               ///< N (code in SoC repo)    Scalable Sample Rate
> +    AOT_AAC_LTP,               ///< N (code in SoC repo)    Long Term Prediction
> +    AOT_SBR,                   ///< N (in progress)         Spectral Band Replication
> +    AOT_AAC_SCALABLE,          ///< N                       Scalable
> +    AOT_TWINVQ,                ///< N                       Twin Vector Quantizer
> +    AOT_CELP,                  ///< N                       Code Excited Linear Prediction
> +    AOT_HVXC,                  ///< N                       Harmonic Vector eXcitation Coding
> +    AOT_TTSI             = 12, ///< N                       Text-To-Speech Interface
> +    AOT_MAINSYNTH,             ///< N                       Main Synthesis
> +    AOT_WAVESYNTH,             ///< N                       Wavetable Synthesis
> +    AOT_MIDI,                  ///< N                       General MIDI
> +    AOT_SAFX,                  ///< N                       Algorithmic Synthesis and Audio Effects
> +    AOT_ER_AAC_LC,             ///< N                       Error Resilient Low Complexity
> +    AOT_ER_AAC_LTP       = 19, ///< N                       Error Resilient Long Term Prediction
> +    AOT_ER_AAC_SCALABLE,       ///< N                       Error Resilient Scalable
> +    AOT_ER_TWINVQ,             ///< N                       Error Resilient Twin Vector Quantizer
> +    AOT_ER_BSAC,               ///< N                       Error Resilient Bit-Sliced Arithmetic Coding
> +    AOT_ER_AAC_LD,             ///< N                       Error Resilient Low Delay
> +    AOT_ER_CELP,               ///< N                       Error Resilient Code Excited Linear Prediction
> +    AOT_ER_HVXC,               ///< N                       Error Resilient Harmonic Vector eXcitation Coding
> +    AOT_ER_HILN,               ///< N                       Error Resilient Harmonic and Individual Lines plus Noise
> +    AOT_ER_PARAM,              ///< N                       Error Resilient Parametric
> +    AOT_SSC,                   ///< N                       SinuSoidal Coding
> +};

ok

[...]

> +
> +enum ExtensionPayloadID {
> +    EXT_FILL,
> +    EXT_FILL_DATA,
> +    EXT_DATA_ELEMENT,
> +    EXT_DYNAMIC_RANGE = 0xb,
> +    EXT_SBR_DATA      = 0xd,
> +    EXT_SBR_DATA_CRC  = 0xe,
> +};
> +
>  enum WindowSequence {
>      ONLY_LONG_SEQUENCE,
>      LONG_START_SEQUENCE,
> @@ -51,6 +108,17 @@
>      LONG_STOP_SEQUENCE,
>  };
>  
> +enum BandType {
> +    ZERO_BT        = 0,     ///< Scalefactors and spectral data are all zero.
> +    FIRST_PAIR_BT  = 5,     ///< This and later band types encode two values (rather than four) with one code word.
> +    ESC_BT         = 11,    ///< Spectral data are coded with an escape sequence.
> +    NOISE_BT       = 13,    ///< Spectral data are scaled white noise not coded in the bitstream.
> +    INTENSITY_BT2  = 14,    ///< Scalefactor data are intensity stereo positions.
> +    INTENSITY_BT   = 15,    ///< Scalefactor data are intensity stereo positions.
> +};
> +
> +#define IS_CODEBOOK_UNSIGNED(x) ((x - 1) & 10)
> +

ok


>  enum ChannelType {
>      AAC_CHANNEL_FRONT = 1,
>      AAC_CHANNEL_SIDE  = 2,

This maybe should be renamed to ChannelPosition, avoids confusing
type with pair/single channel elements


> @@ -60,24 +128,182 @@
>  };
>  
>  /**
> + * mix-down channel types
> + * MIXDOWN_CENTER is the index into the mix-down arrays for a Single Channel Element with AAC_CHANNEL_FRONT.
> + * MIXDOWN_(BACK|FRONT) are the indices for Channel Pair Elements with AAC_CHANNEL_(BACK|FRONT).
> + */
> +enum {
> +    MIXDOWN_CENTER,
> +    MIXDOWN_FRONT,
> +    MIXDOWN_BACK,
> +};

the descriptions for each should be with each not clustered before all.


> +
> +/**
> + * Program configuration - describes how channels are arranged. Either read from
> + * stream (ID_PCE) or created based on a default fixed channel arrangement.
> + */
> +typedef struct {

ok

> +    enum ChannelType che_type[4][MAX_TAGID]; ///< channel element type with the first index as the first 4 raw_data_block IDs


> +    int mono_mixdown_tag;                    ///< The SCE tag to use if user requests mono   output, -1 if not available.
> +    int stereo_mixdown_tag;                  ///< The CPE tag to use if user requests stereo output, -1 if not available.
> +    int mixdown_coeff_index;                 ///< 0-3
> +    int pseudo_surround;                     ///< Mix surround channels out of phase.
> +} ProgramConfig;

ok


[...]
> +/**
> + * M/S joint channel coding
> + */
> +typedef struct {
> +    int present;
> +    uint8_t mask[8][64];
> +} MidSideStereo;

cant this use the channel coupling struct and code? Its doing the same thing
i think


> +
> +/**
> + * Dynamic Range Control - decoded from the bitstream but not processed further.
> + */
> +typedef struct {
> +    int pce_instance_tag;                           ///< Indicates with which program the DRC info is associated.
> +    int dyn_rng_sgn[17];                            ///< DRC sign information; 0 - positive, 1 - negative
> +    int dyn_rng_ctl[17];                            ///< DRC magnitude information
> +    int exclude_mask[MAX_CHANNELS];                 ///< Channels to be excluded from DRC processing.
> +    int additional_excluded_chns[MAX_CHANNELS / 7]; /**< The exclude_mask bits are
> +                                                        coded in groups of 7 with 1 bit preceeding each group (except the first)
> +                                                        indicating that 7 more mask bits are coded. */
> +    int band_incr;                                  ///< Number of DRC bands greater than 1 having DRC info.
> +    int interpolation_scheme;                       ///< Indicates the interpolation scheme used in the SBR QMF domain.
> +    int band_top[17];                               ///< Indicates the top of the i-th DRC band in units of 4 spectral lines.
> +    int prog_ref_level;                             /**< A reference level for the long-term program audio level for all
> +                                                        channels combined. */
> +} DynamicRangeControl;

> +
> +/**
> + * pulse tool
> + */

mee, i thought the function was called like that, the struct too??


> +typedef struct {
> +    int num_pulse;
> +    int start;
> +    int offset[4];
> +    int amp[4];
> +} Pulse;
> +
> +/**
> + * coupling parameters
> + */
> +typedef struct {

ok


> +    int is_indep_coup;     ///< Set if independent coupling (i.e. after IMDCT).
> +    int domain;            ///< Controls if coupling is performed before (0) or after (1) the TNS decoding of the target channels.

wouldnt a
int or enum where= 0 (before TNS) 1(after TNS before IMDCT) 2(after IMDCT)
be cleaner?


> +    int num_coupled;       ///< number of target elements

> +    int is_cpe[9];         ///< Set if target is an CPE (otherwise it's an SCE).

Maybe some code could be simplified if these where ID_CPE/ID_SCE ?


> +    int tag_select[9];     ///< element tag index
> +    int l[9];              ///< Apply gain to left channel of a CPE.
> +    int r[9];              ///< Apply gain to right channel of a CPE.

do these arrays need 9 or 8 elements?


[...]

> +
> +/**
>   * main AAC context
>   */
>  typedef struct {
>      AVCodecContext * avccontext;
>  
> +    MPEG4AudioConfig m4ac;
> +


> +    int is_saved;                 ///< Set if elements have stored overlap from previous frame.
> +    DynamicRangeControl che_drc;
> +

ok


[...]
>      /** @} */
>  
>      /**
>       * @defgroup output   Members used for output interleaving and down-mixing.
>       * @{
>       */
> +    float *interleaved_output;                        ///< Interim buffer for interleaving PCM samples.


> +    float *output_data[MAX_CHANNELS];                 ///< Points to each element's 'ret' buffer (PCM output).

ok

[...]


> Index: libavcodec/aacdectab.h
> ===================================================================
> --- libavcodec/aacdectab.h	(revision 0)
> +++ libavcodec/aacdectab.h	(revision 0)
> @@ -0,0 +1,189 @@
> +/*
> + * AAC decoder data
> + * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
> + * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file aacdectab.h
> + * AAC decoder data
> + * @author Oded Shimon  ( ods15 ods15 dyndns org )
> + * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
> + */
> +
> +#ifndef FFMPEG_AACDECTAB_H
> +#define FFMPEG_AACDECTAB_H
> +
> +#include "aac.h"
> +
> +#include <stdint.h>
> +
> +static const uint16_t swb_offset_1024_96[] = {
> +      0,   4,   8,  12,  16,  20,  24,  28,
> +     32,  36,  40,  44,  48,  52,  56,  64,
> +     72,  80,  88,  96, 108, 120, 132, 144,
> +    156, 172, 188, 212, 240, 276, 320, 384,
> +    448, 512, 576, 640, 704, 768, 832, 896,
> +    960, 1024
> +};
> +
> +static const uint16_t swb_offset_128_96[] = {
> +    0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 64, 92, 128
> +};
> +
> +static const uint16_t swb_offset_1024_64[] = {
> +      0,   4,   8,  12,  16,  20,  24,  28,
> +     32,  36,  40,  44,  48,  52,  56,  64,
> +     72,  80,  88, 100, 112, 124, 140, 156,
> +    172, 192, 216, 240, 268, 304, 344, 384,
> +    424, 464, 504, 544, 584, 624, 664, 704,
> +    744, 784, 824, 864, 904, 944, 984, 1024
> +};
> +
> +static const uint16_t swb_offset_1024_48[] = {
> +      0,   4,   8,  12,  16,  20,  24,  28,
> +     32,  36,  40,  48,  56,  64,  72,  80,
> +     88,  96, 108, 120, 132, 144, 160, 176,
> +    196, 216, 240, 264, 292, 320, 352, 384,
> +    416, 448, 480, 512, 544, 576, 608, 640,
> +    672, 704, 736, 768, 800, 832, 864, 896,
> +    928, 1024
> +};
> +
> +static const uint16_t swb_offset_128_48[] = {
> +     0,   4,   8,  12,  16,  20,  28,  36,
> +    44,  56,  68,  80,  96, 112, 128
> +};
> +
> +static const uint16_t swb_offset_1024_32[] = {
> +      0,   4,   8,  12,  16,  20,  24,  28,
> +     32,  36,  40,  48,  56,  64,  72,  80,
> +     88,  96, 108, 120, 132, 144, 160, 176,
> +    196, 216, 240, 264, 292, 320, 352, 384,
> +    416, 448, 480, 512, 544, 576, 608, 640,
> +    672, 704, 736, 768, 800, 832, 864, 896,
> +    928, 960, 992, 1024
> +};
> +
> +static const uint16_t swb_offset_1024_24[] = {
> +      0,   4,   8,  12,  16,  20,  24,  28,
> +     32,  36,  40,  44,  52,  60,  68,  76,
> +     84,  92, 100, 108, 116, 124, 136, 148,
> +    160, 172, 188, 204, 220, 240, 260, 284,
> +    308, 336, 364, 396, 432, 468, 508, 552,
> +    600, 652, 704, 768, 832, 896, 960, 1024
> +};
> +
> +static const uint16_t swb_offset_128_24[] = {
> +     0,   4,   8,  12,  16,  20,  24,  28,
> +    36,  44,  52,  64,  76,  92, 108, 128
> +};
> +
> +static const uint16_t swb_offset_1024_16[] = {
> +      0,   8,  16,  24,  32,  40,  48,  56,
> +     64,  72,  80,  88, 100, 112, 124, 136,
> +    148, 160, 172, 184, 196, 212, 228, 244,
> +    260, 280, 300, 320, 344, 368, 396, 424,
> +    456, 492, 532, 572, 616, 664, 716, 772,
> +    832, 896, 960, 1024
> +};
> +
> +static const uint16_t swb_offset_128_16[] = {
> +     0,   4,   8,  12,  16,  20,  24,  28,
> +    32,  40,  48,  60,  72,  88, 108, 128
> +};
> +
> +static const uint16_t swb_offset_1024_8[] = {
> +      0,  12,  24,  36,  48,  60,  72,  84,
> +     96, 108, 120, 132, 144, 156, 172, 188,
> +    204, 220, 236, 252, 268, 288, 308, 328,
> +    348, 372, 396, 420, 448, 476, 508, 544,
> +    580, 620, 664, 712, 764, 820, 880, 944,
> +    1024
> +};
> +
> +static const uint16_t swb_offset_128_8[] = {
> +     0,   4,   8,  12,  16,  20,  24,  28,
> +    36,  44,  52,  60,  72,  88, 108, 128
> +};
> +
> +static const uint16_t *swb_offset_1024[] = {
> +    swb_offset_1024_96, swb_offset_1024_96, swb_offset_1024_64,
> +    swb_offset_1024_48, swb_offset_1024_48, swb_offset_1024_32,
> +    swb_offset_1024_24, swb_offset_1024_24, swb_offset_1024_16,
> +    swb_offset_1024_16, swb_offset_1024_16, swb_offset_1024_8
> +};
> +
> +static const uint16_t *swb_offset_128[] = {
> +    /* The last entry on the following row is swb_offset_128_64 but is a
> +       duplicate of swb_offset_128_96. */
> +    swb_offset_128_96, swb_offset_128_96, swb_offset_128_96,
> +    swb_offset_128_48, swb_offset_128_48, swb_offset_128_48,
> +    swb_offset_128_24, swb_offset_128_24, swb_offset_128_16,
> +    swb_offset_128_16, swb_offset_128_16, swb_offset_128_8
> +};

Does storing band sizes instead of band offsets lead to simpler code?
if not, the code above is ok
Btw, what does swb stand for? it should be mentioned somewhere in a doxy


[...]
> Index: doc/general.texi
> ===================================================================
> --- doc/general.texi	(revision 14623)
> +++ doc/general.texi	(working copy)
> @@ -336,7 +336,7 @@
>  @item 4X IMA ADPCM           @tab     @tab  X
>  @item 8SVX audio             @tab     @tab  X
>  @item AAC                    @tab  X  @tab  X
> -    @tab Supported through the external library libfaac/libfaad.
> +    @tab Encoding is supported through the external library libfaac.
>  @item AC-3                   @tab IX  @tab IX
>      @tab liba52 can be used alternatively for decoding.
>  @item AMR-NB                 @tab  X  @tab  X

ok


[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

I am the wisest man alive, for I know one thing, and that is that I know
nothing. -- Socrates
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080806/f011d8ce/attachment.pgp>



More information about the ffmpeg-devel mailing list