[FFmpeg-devel] [PATCH] AAC Decoder round 4

Robert Swain robert.swain
Wed Jul 30 14:41:54 CEST 2008


2008/7/27 Michael Niedermayer <michaelni at gmx.at>:
> On Fri, Jul 18, 2008 at 03:13:38PM +0100, Robert Swain wrote:
>> Index: libavcodec/aac.c
>> ===================================================================
>> --- libavcodec/aac.c  (revision 0)
>> +++ libavcodec/aac.c  (revision 0)

[...]

>> +
>> +/**
>> + * Audio Object Types
>> + */
>> +enum {
>> +    AOT_NULL,
>> +                               // Support?                Name
>> +    AOT_AAC_MAIN,              // Y                       Main
>> +    AOT_AAC_LC,                // Y                       Low Complexity
>> +    AOT_AAC_SSR,               // N (code in SoC repo)    Scalable Sample Rate
>> +    AOT_AAC_LTP,               // N (code in SoC repo)    Long Term Prediction
>> +    AOT_SBR,                   // N (in progress)         Spectral Band Replication
>> +    AOT_AAC_SCALABLE,          // N                       Scalable
>> +    AOT_TWINVQ,                // N                       Twin Vector Quantizer
>> +    AOT_CELP,                  // N                       Code Excited Linear Prediction
>> +    AOT_HVXC,                  // N                       Harmonic Vector eXcitation Coding
>> +    AOT_TTSI             = 12, // N                       Text-To-Speech Interface
>> +    AOT_MAINSYNTH,             // N                       Main Synthesis
>> +    AOT_WAVESYNTH,             // N                       Wavetable Synthesis
>> +    AOT_MIDI,                  // N                       General MIDI
>> +    AOT_SAFX,                  // N                       Algorithmic Synthesis and Audio Effects
>> +    AOT_ER_AAC_LC,             // N                       Error Resilient Low Complexity
>> +    AOT_ER_AAC_LTP       = 19, // N                       Error Resilient Long Term Prediction
>> +    AOT_ER_AAC_SCALABLE,       // N                       Error Resilient Scalable
>> +    AOT_ER_TWINVQ,             // N                       Error Resilient Twin Vector Quantizer
>> +    AOT_ER_BSAC,               // N                       Error Resilient Bit-Sliced Arithmetic Coding
>> +    AOT_ER_AAC_LD,             // N                       Error Resilient Low Delay
>> +    AOT_ER_CELP,               // N                       Error Resilient Code Excited Linear Prediction
>> +    AOT_ER_HVXC,               // N                       Error Resilient Harmonic Vector eXcitation Coding
>> +    AOT_ER_HILN,               // N                       Error Resilient Harmonic and Individual Lines plus Noise
>> +    AOT_ER_PARAM,              // N                       Error Resilient Parametric
>> +    AOT_SSC,                   // N                       SinuSoidal Coding
>> +};
>
> doxygen

Done.

>> +
>> +/**
>> + * IDs for raw_data_block
>> + */
>> +enum {
>> +    ID_SCE,
>> +    ID_CPE,
>> +    ID_CCE,
>> +    ID_LFE,
>> +    ID_DSE,
>> +    ID_PCE,
>> +    ID_FIL,
>> +    ID_END,
>> +};
>> +
>> +/**
>> + * IDs for extension_payload
>> + */
>> +enum {
>> +    EXT_FILL,
>> +    EXT_FILL_DATA,
>> +    EXT_DATA_ELEMENT,
>> +    EXT_DYNAMIC_RANGE = 0xb,
>> +    EXT_SBR_DATA      = 0xd,
>> +    EXT_SBR_DATA_CRC  = 0xe,
>> +};
>
> The enums should have some named type that is also used instead of int
> where appropriate

Done. ID_* is used with loop variable 'j' so I left those as type int
but they can be changed if you wish.

>> +
>> +/**
>> + * window sequences
>> + */
>
>> +enum WindowSequence {
>> +    ONLY_LONG_SEQUENCE,
>> +    LONG_START_SEQUENCE,
>> +    EIGHT_SHORT_SEQUENCE,
>> +    LONG_STOP_SEQUENCE,
>> +};
>
> ok (yes the doxy is not as long as it just repeats the name.

I've removed all these brief doxygen comments for the named enums.

>> +
>> +/**
>> + * special band types
>> + */
>> +enum BandType {
>> +    ZERO_BT        = 0,
>> +    FIRST_PAIR_BT  = 5,
>> +    ESC_BT         = 11,
>> +    NOISE_BT       = 13,
>> +    INTENSITY_BT2  = 14,
>> +    INTENSITY_BT   = 15,
>> +    ESC_FLAG       = 16,
>> +};
>
> These could benfit from some short comments on what each does

Done.

> [...]
>> +
>> +/**
>> + * mix-down channel types
>> + */
>> +enum {
>
> what is a mixdown channel type?
> i think the doxy should maybe awnser that

Done.

> [...]
>> +/**
>> + * Dynamic Range Control - decoded from the bitstream but not processed further.
>> + */
>> +typedef struct {
>> +    int pce_instance_tag;
>> +    int dyn_rng_sgn[17];
>> +    int dyn_rng_ctl[17];
>> +    int exclude_mask[MAX_CHANNELS];
>> +    int additional_excluded_chns[MAX_CHANNELS];
>> +    int band_incr;
>> +    int interpolation_scheme;
>> +    int band_top[17];
>> +    int prog_ref_level;
>> +} DynamicRangeControl;
>
> missing doxygen comments explaining what the fields are.

Done.

> [...]
>> +/**
>> + * Decode an array of 4 bit tag IDs, optionally interleaved with a stereo/mono switching bit.
>> + *
>> + * @param cpe_map Stereo (Channel Pair Element) map, NULL if stereo bit is not present.
>> + * @param sce_map mono (Single Channel Element) map
>> + * @param type speaker type/position for these channels
>> + */
>> +static void program_config_element_parse_tags(GetBitContext * gb, enum ChannelType *cpe_map,
>> +                                              enum ChannelType *sce_map, int n, enum ChannelType type) {
>
>> +    enum ChannelType *map;
>> +    while(n--) {
>> +        map = cpe_map && get_bits1(gb) ? cpe_map : sce_map; // stereo or mono map
>
> declaration and init can be merged

Done.

> [...]
>
>> +
>> +/**
>> + * Parse program configuration element; reference: table 4.2.
>> + */
>> +static int program_config_element(AACContext * ac, GetBitContext * gb) {
>> +    ProgramConfig pcs;
>> +    int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc;
>> +
>
>> +    memset(&pcs, 0, sizeof(pcs));
>
> if the struct is passed as argument then this could be factored out

Done.

>> +/**
>> + * Set up ProgramConfig, but based on a default channel configuration
>> + * as specified in table 1.17.
>> + */
>> +static int program_config_element_default(AACContext *ac, int channels)
>> +{
>> +    ProgramConfig pcs;
>> +
>> +    memset(&pcs, 0, sizeof(ProgramConfig));
>> +
>> +    /* Pre-mixed down-mix outputs are not available. */
>> +    pcs.mono_mixdown_tag   = -1;
>> +    pcs.stereo_mixdown_tag = -1;
>> +
>> +    if(channels < 1 || channels > 7) {
>> +        av_log(ac->avccontext, AV_LOG_ERROR, "invalid default channel configuration (%d channels)\n",
>> +               channels);
>> +        return -1;
>> +    }
>> +
>
>> +    /* default channel configurations:
>> +     *
>> +     * 1ch : front center (mono)
>> +     * 2ch : L + R (stereo)
>> +     * 3ch : front center + L + R
>> +     * 4ch : front center + L + R + back center
>> +     * 5ch : front center + L + R + back stereo
>> +     * 6ch : front center + L + R + back stereo + LFE
>> +     * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE
>> +     */
>> +
>> +    if(channels != 2)
>> +        pcs.che_type[ID_SCE][0] = AAC_CHANNEL_FRONT; // front center (or mono)
>> +    if(channels > 1)
>> +        pcs.che_type[ID_CPE][0] = AAC_CHANNEL_FRONT; // L + R (or stereo)
>> +    if(channels == 4)
>> +        pcs.che_type[ID_SCE][1] = AAC_CHANNEL_BACK;  // back center
>> +    if(channels > 4)
>> +        pcs.che_type[ID_CPE][(channels == 7) + 1]
>> +                                = AAC_CHANNEL_BACK;  // back stereo
>> +    if(channels > 5)
>> +        pcs.che_type[ID_LFE][0] = AAC_CHANNEL_LFE;   // LFE
>> +    if(channels == 7)
>> +        pcs.che_type[ID_CPE][1] = AAC_CHANNEL_FRONT; // outer front left + outer front right
>
> i tried to simplify this:
>
> switch(channels){
>    case 7: pcs.che_type[ID_CPE][                  1] = AAC_CHANNEL_FRONT; // outer front left + outer front right
>    case 6: pcs.che_type[ID_LFE][                  0] = AAC_CHANNEL_LFE;   // LFE
>    case 5: pcs.che_type[ID_CPE][(channels == 7) + 1] = AAC_CHANNEL_BACK;  // back stereo
>
> but then i realized your 7 ch case really uses 8 channels...

Mmm, I never noticed that before. Strange. I'll have to have a look to
see what the spec says about program configurations.

>> +
>> +    return output_configure(ac, &pcs);
>> +}
>
> this can similarly be factored out of the 2

Done.

>> +
>> +
>> +/**
>> + * Parse GA "General Audio" specific configuration; reference: table 4.1.
>> + */
>> +static int GASpecificConfig(AACContext * ac, GetBitContext * gb, int channels) {
>
> Please dont capitalize function names

Done.

> [...]
>
>> +
>> +    if (get_bits1(gb))       // dependsOnCoreCoder
>> +        skip_bits(gb, 14);   // coreCoderDelay
>
>> +    ext = get_bits1(gb);
>
> that variable name is too short and generic

Fixed.

> [...]
>> +
>> +/**
>> + * Parse audio specific configuration; reference: table 1.13.
>> + *
>> + * @param   data        pointer to AVCodecContext extradata
>> + * @param   data_size   size of AVCCodecContext extradata
>> + * @return  Returns error status. 0 - OK, !0 - error
>> + */
>
>> +static int AudioSpecificConfig(AACContext * ac, void *data, int data_size) {
>
> please dont capitalize function names

Done.

> [...]
>> +
>> +    if (avccontext->extradata && avccontext->extradata_size &&
>
> arent these redundant? size!=0 implicates data != NULL

Changed to avccontext->extradata_size <= 0 || audio_specific_config().
I assume extradata_size < 0 is invalid and/or impossible.

>> +
>> +    /* Initialize RNG dither. */
>> +    av_init_random(0x1f2e3d4c, &ac->random_state);
>
> the comment is redundant

Removed.

>> +
>> +/**
>> + * Decode a data_stream_element; reference: table 4.10.
>> + */
>> +static void data_stream_element(AACContext * ac, GetBitContext * gb) {
>
> this looks at best like a skip_something()

Indeed it does. It seems strange to me that a bit stream should
contain such useless bytes but I guess they are (or may be) actually
used for something. I could make it skip_data_stream_element().

>> +/**
>> + * Decode Individual Channel Stream info; reference: table 4.6.
>
> I dont know how it is with the AAC spec, but i really think numbers are not
> a good reference, they tend to be numbered differently in different
> revissions in some specs at least. Using the title of the thing you
> refer to is much nicer or even title + number.

In the spec the function is called ics_info() and is presented as a
'table' with the above number. All the parsing functions are presented
like this. People prodding around the spec should be able to find it
without immense difficulty.

>> + *
>> + * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
>> + */
>> +static int decode_ics_info(AACContext * ac, GetBitContext * gb, int common_window, IndividualChannelStream * ics) {
>
>> +    uint8_t grouping;
>
> why uint8_t ?

Because the variable is an unsigned integer and is 7 bits read from
the bit stream. It can be unsigned int or int as you please. I don't
mind.

>> +    if (get_bits1(gb)) {
>> +        av_log(ac->avccontext, AV_LOG_ERROR, "Reserved bit set.\n");
>> +        return -1;
>> +    }
>
>> +    ics->window_sequence_prev = ics->window_sequence;
>> +    ics->window_sequence = get_bits(gb, 2);
>> +    ics->use_kb_window[1] = ics->use_kb_window[0];
>> +    ics->use_kb_window[0] = get_bits1(gb);
>
> this is inconsistant [0][1] vs. X and X_prev

Changed to [0][1].

>> +    ics->num_window_groups = 1;
>> +    ics->group_len[0] = 1;
>> +    if (ics->window_sequence == EIGHT_SHORT_SEQUENCE) {
>> +        int i;
>> +        ics->max_sfb = get_bits(gb, 4);
>> +        grouping = get_bits(gb, 7);
>> +        for (i = 0; i < 7; i++) {
>> +            if (grouping & (1<<(6-i))) {
>> +                ics->group_len[ics->num_window_groups-1]++;
>> +            } else {
>> +                ics->num_window_groups++;
>> +                ics->group_len[ics->num_window_groups-1] = 1;
>> +            }
>> +        }
>> +        ics->swb_offset    =    swb_offset_128[ac->m4ac.sampling_index];
>> +        ics->num_swb       =       num_swb_128[ac->m4ac.sampling_index];
>
>> +        if(ics->max_sfb > ics->num_swb) {
>> +            av_log(ac->avccontext, AV_LOG_ERROR,
>> +                "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
>> +                ics->max_sfb, ics->num_swb);
>> +            return -1;
>> +        }
>
> is it safe to write invalid values in the context and then exit with an
> error? are they gurranteed not to be used or that their use is harmless?

If this function returns -1 it will fall through to aac_decode_frame
returning -1.

>> +
>> +        ics->num_windows   = 8;
>> +        ics->tns_max_bands = tns_max_bands_128[ac->m4ac.sampling_index];
>> +    } else {
>> +        ics->max_sfb = get_bits(gb, 6);
>> +        ics->swb_offset    =    swb_offset_1024[ac->m4ac.sampling_index];
>> +        ics->num_swb       =       num_swb_1024[ac->m4ac.sampling_index];
>
>> +        if(ics->max_sfb > ics->num_swb) {
>> +            av_log(ac->avccontext, AV_LOG_ERROR,
>> +                "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
>> +                ics->max_sfb, ics->num_swb);
>> +            return -1;
>> +        }
>
> duplicate

Refactored slightly to avoid duplication.

>> +
>> +/**
>> + * Decode section_data payload; reference: table 4.46.
>> + *
>> + * @param   band_type           array of the band type used for a window group's scalefactor band
>> + * @param   band_type_run_end   array of the last scalefactor band of a band type run for a window group's scalefactor band
>> + * @return  Returns error status. 0 - OK, !0 - error
>> + */
>> +static int decode_section(AACContext * ac, GetBitContext * gb, IndividualChannelStream * ics, enum BandType band_type[][64], int band_type_run_end[][64]) {
>
> wouldnt decode_band_types be a better name?

The original function name from the spec is in the description anyway
so yes. Done.

>> +    int g;
>> +    for (g = 0; g < ics->num_window_groups; g++) {
>
>> +        int bits = (ics->window_sequence == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
>
> cant this be moved out of the loop ?

Done and made const.

>> +        int k = 0;
>> +        while (k < ics->max_sfb) {
>
> for(k=0; k < ics->max_sfb; ) {

I'll still need 'int k;' unless the declaration is moved to be
alongside that for 'g'.

>> +            uint8_t sect_len = k;
>> +            int sect_len_incr;
>> +            int sect_band_type = get_bits(gb, 4);
>> +            if (sect_band_type == 12) {
>> +                av_log(ac->avccontext, AV_LOG_ERROR, "invalid band type\n");
>> +                return -1;
>> +            }
>> +            while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits)-1)
>> +                sect_len += sect_len_incr;
>> +            sect_len += sect_len_incr;
>> +            if (sect_len > ics->max_sfb) {
>> +                av_log(ac->avccontext, AV_LOG_ERROR,
>> +                    "Number of bands (%d) exceeds limit (%d).\n",
>> +                    sect_len, ics->max_sfb);
>> +                return -1;
>> +            }
>> +            for (; k < sect_len; k++) {
>
>> +                band_type[g][k] = sect_band_type;
>> +                band_type_run_end[g][k] = sect_len;
>
> band_type        [g][k] = sect_band_type;
> band_type_run_end[g][k] = sect_len;

Done.

>> +            }
>> +        }
>> +    }
>> +    return 0;
>> +}
>> +
>> +/**
>> + * Decode scale_factor_data; reference: table 4.47.
>> + *
>
>> + * @param   mix_gain            channel gain (Not used by AAC bitstream.)
>> + * @param   global_gain         first scalefactor value as scalefactors are differentially coded
>> + * @param   band_type           array of the band type used for a window group's scalefactor band
>
>> + * @param   band_type_run_end   array of the last scalefactor band of a band type run for a window group's scalefactor band
>
> this sounds a little confusing

Someone else (Diego?) said this was confusing but I'm not sure how to
simplify it and keep the same information present. I wanted to clarify
what the indices of the variable were. At the time of complaint I
suggested something like "array of the last scalefactor band of a band
type run with indices [window group][scalefactor band]"

>> + * @param   sf                  array of scalefactors or intensity stereo positions used for a window group's scalefactor band
>> + * @return  Returns error status. 0 - OK, !0 - error
>> + */
>> +static int decode_scalefactors(AACContext * ac, GetBitContext * gb, float mix_gain, unsigned int global_gain,
>> +        IndividualChannelStream * ics, const enum BandType band_type[][64], const int band_type_run_end[][64], float sf[][64]) {
>> +    const int sf_offset = ac->sf_offset + (ics->window_sequence == EIGHT_SHORT_SEQUENCE ? 12 : 0);
>> +    int g, i;
>> +    int offset[3] = { global_gain, global_gain - 90, 100 };
>> +    int noise_flag = 1;
>> +    static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
>> +    ics->intensity_present = 0;
>> +    for (g = 0; g < ics->num_window_groups; g++) {
>> +        for (i = 0; i < ics->max_sfb;) {
>> +            int run_end = band_type_run_end[g][i];
>> +            if (band_type[g][i] == ZERO_BT) {
>> +                for(; i < run_end; i++)
>> +                    sf[g][i] = 0.;
>> +            }else if((band_type[g][i] == INTENSITY_BT) || (band_type[g][i] == INTENSITY_BT2)) {
>> +                ics->intensity_present = 1;
>
>> +                for(; i < run_end; i++) {
>> +                    offset[2] += get_vlc2(gb, mainvlc.table, 7, 3) - 60;
>> +                    if(offset[2] > 255) {
>> +                        av_log(ac->avccontext, AV_LOG_ERROR,
>> +                            "%s (%d) out of range.\n", sf_str[2], offset[2]);
>> +                        return -1;
>> +                    }
>> +                    sf[g][i] =  pow2sf_tab[-offset[2] + 300];
>> +                    sf[g][i] *= mix_gain;
>> +                }
>
> you are checking for a max of 255 here, cant this also overflow on the
> negative side? offset is not unsigned so <0 is not naturally caught

You're probably right. I'm looking for these checks in the spec at the
moment but can't find them in scalefactor_data().

>> +            }else if(band_type[g][i] == NOISE_BT) {
>> +                for(; i < run_end; i++) {
>> +                    if(noise_flag-- > 0)
>> +                        offset[1] += get_bits(gb, 9) - 256;
>> +                    else
>> +                        offset[1] += get_vlc2(gb, mainvlc.table, 7, 3) - 60;
>> +                    if(offset[1] > 255) {
>> +                        av_log(ac->avccontext, AV_LOG_ERROR,
>> +                            "%s (%d) out of range.\n", sf_str[1], offset[1]);
>> +                        return -1;
>> +                    }
>
> same
>
>
>> +                    sf[g][i] = -pow2sf_tab[ offset[1] + sf_offset];
>> +                    sf[g][i] *= mix_gain;
>> +                }
>> +            }else {
>> +                for(; i < run_end; i++) {
>> +                    offset[0] += get_vlc2(gb, mainvlc.table, 7, 3) - 60;
>> +                    if(offset[0] > 255) {
>> +                        av_log(ac->avccontext, AV_LOG_ERROR,
>> +                            "%s (%d) out of range.\n", sf_str[0], offset[0]);
>> +                        return -1;
>> +                    }
>
> and here as well
>
>
>> +                    sf[g][i] = -pow2sf_tab[ offset[0] + sf_offset];
>> +                    sf[g][i] *= mix_gain;
>> +                }
>> +            }
>> +        }
>> +    }
>> +    return 0;
>> +}
>> +
>
>> +/**
>> + * Decode pulse data; reference: table 4.7.
>> + */
>> +static void decode_pulses(AACContext * ac, GetBitContext * gb, Pulse * pulse) {
>
> for what does this need AACContext? and Pulse as destination should be the
> first argument IMHO (true for all similar functions that dont have AACContext
> as first as well)

I was also intending to stop using *Context and use only the needed
variables as arguments as I understand why this is preferred. I'll
spend some time going through all the functions to do this.

>> +    int i;
>> +    pulse->num_pulse = get_bits(gb, 2) + 1;
>> +    pulse->start = get_bits(gb, 6);
>> +    for (i = 0; i < pulse->num_pulse; i++) {
>
>> +        pulse->offset[i] = get_bits(gb, 5);
>> +        pulse->amp[i] = get_bits(gb, 4);
>
> vertical align

Done.

>> +    }
>> +}
>
>> +
>> +/**
>> + * Decode Temporal Noise Shaping data; reference: table 4.48.
>> + */
>> +static void decode_tns(AACContext * ac, GetBitContext * gb, const IndividualChannelStream * ics, TemporalNoiseShaping * tns) {
>> +    int w, filt, i, coef_len, coef_res = 0, coef_compress;
>> +    for (w = 0; w < ics->num_windows; w++) {
>> +        tns->n_filt[w] = get_bits(gb, ics->window_sequence == EIGHT_SHORT_SEQUENCE ? 1 : 2);
>> +
>> +        if (tns->n_filt[w])
>> +            coef_res = get_bits1(gb) + 3;
>> +
>> +        for (filt = 0; filt < tns->n_filt[w]; filt++) {
>> +            tns->length[w][filt] = get_bits(gb, ics->window_sequence == EIGHT_SHORT_SEQUENCE ? 4 : 6);
>> +
>> +            if ((tns->order[w][filt] = get_bits(gb, ics->window_sequence == EIGHT_SHORT_SEQUENCE ? 3 : 5))) {
>
> i think a
> int is8= ics->window_sequence == EIGHT_SHORT_SEQUENCE;
> would simplify the code

Done.

>> +                tns->direction[w][filt] = get_bits1(gb);
>> +                coef_compress = get_bits1(gb);
>> +                coef_len = coef_res - coef_compress;
>> +                tns->tmp2_map[w][filt] = tns_tmp2_map[2*coef_compress + coef_res - 3];
>> +
>> +                for (i = 0; i < tns->order[w][filt]; i++)
>> +                    tns->coef[w][filt][i] = get_bits(gb, coef_len);
>> +            }
>> +        }
>> +    }
>> +}
>> +
>
>> +/**
>> + * Decode Mid/Side data; reference: table 4.54.
>> + */
>> +static void decode_mid_side_stereo(AACContext * ac, GetBitContext * gb, ChannelElement * cpe) {
>> +    MidSideStereo * ms = &cpe->ms;
>> +    int g, i;
>> +    ms->present = get_bits(gb, 2);
>> +    if (ms->present == 1) {
>> +        for (g = 0; g < cpe->ch[0].ics.num_window_groups; g++)
>> +            for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
>> +                ms->mask[g][i] = get_bits1(gb);// << i;
>> +    } else if (ms->present == 2) {
>> +        for (g = 0; g < cpe->ch[0].ics.num_window_groups; g++)
>> +            memset(ms->mask[g], 1, cpe->ch[0].ics.max_sfb * sizeof(ms->mask[g][0]));
>> +    }
>
> ms->present==3 is the "whatever was in the array" variant?
> if not and its invalid, that should be checked for

It's reserved normally so I'll check it and print an error stating it
is invalid.

>> +}
>> +
>> +/**
>> + * Decode spectral data; reference: table 4.50.
>> + *
>
>> + * @param   band_type   array of the band type used for a window group's scalefactor band
>> + * @param   icoef       array of quantized spectral data
>> + * @return  Returns error status. 0 - OK, !0 - error
>> + */
>> +static int decode_spectrum(AACContext * ac, GetBitContext * gb, const IndividualChannelStream * ics, const enum BandType band_type[][64], int icoef[1024]) {
>
> If you want to have all output arrays after the input thats fine as well
> (if done consistently)
> if not i would prefer them to be before the inputs.

To me, reading left to right, in, in/out, out ordered by their use in
the function or alphabetically or something seems most logical. I'll
add it to my list and it will be done before resubmission.

>> +    int i, k, g;
>> +    const uint16_t * offsets = ics->swb_offset;
>> +
>> +    for (g = 0; g < ics->num_window_groups; g++) {
>> +        for (i = 0; i < ics->max_sfb; i++) {
>> +            const int cur_band_type = band_type[g][i];
>> +            const int dim = cur_band_type >= FIRST_PAIR_BT ? 2 : 4;
>> +            int group;
>> +            if (cur_band_type == ZERO_BT) {
>> +                for (group = 0; group < ics->group_len[g]; group++) {
>> +                    memset(icoef + group * 128 + offsets[i], 0, (offsets[i+1] - offsets[i])*sizeof(int));
>> +                }
>> +            }else if (cur_band_type != NOISE_BT && cur_band_type != INTENSITY_BT2 && cur_band_type != INTENSITY_BT) {
>> +                for (group = 0; group < ics->group_len[g]; group++) {
>> +                    for (k = offsets[i]; k < offsets[i+1]; k += dim) {
>> +                        const int index = get_vlc2(gb, books[cur_band_type - 1].table, 6, 3);
>> +                        const int coef_idx = (group << 7) + k;
>> +                        const int8_t *vq_ptr = &codebook_vectors[cur_band_type - 1][index * dim];
>> +                        int j;
>> +                        for (j = 0; j < dim; j++)
>> +                            icoef[coef_idx + j] = 1;
>
>> +                        if (IS_CODEBOOK_UNSIGNED(cur_band_type)) {
>
> this really is a constant within this loop and does not need to be
> recalculated in each iteration

Done.

>> +                            for (j = 0; j < dim; j++)
>> +                                if (vq_ptr[j] && get_bits1(gb))
>> +                                    icoef[coef_idx + j] = -1;
>
> maybe
> if(vq_ptr[j])
>    icoef[coef_idx + j] = 1 - 2*get_bits1(gb);
> is faster?
> it also wouldnt need the =1 loop before it

Done.

> [...]
>> +
>> +/**
>> + * Dequantize and scale spectral data; reference: 4.6.3.3.
>> + *
>> + * @param   icoef       array of quantized spectral data
>> + * @param   band_type   array of the band type used for a window group's scalefactor band
>> + * @param   sf          array of scalefactors or intensity stereo positions used for a window group's scalefactor band
>> + * @param   coef        array of dequantized, scaled spectral data
>> + */
>> +static void dequant(AACContext * ac, const IndividualChannelStream * ics, const int icoef[1024],
>> +        const enum BandType band_type[][64], const float sf[][64], float coef[1024]) {
>> +    const uint16_t * offsets = ics->swb_offset;
>> +    const int c = 1024/ics->num_window_groups;
>> +    int g, i, group, k;
>> +
>> +    for (g = 0; g < ics->num_window_groups; g++) {
>> +        memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float)*(c - offsets[ics->max_sfb]));
>> +        for (i = 0; i < ics->max_sfb; i++) {
>> +            if (band_type[g][i] == NOISE_BT) {
>> +                for (group = 0; group < ics->group_len[g]; group++) {
>
>> +                    float scale = sf[g][i] / ((offsets[i+1] - offsets[i]) * PNS_MEAN_ENERGY);
>
> this value is constant within the loop and can thus be calculated outside

Done and made const.

> [...]
>> +/**
>> + * Decode an individual_channel_stream payload; reference: table 4.44.
>> + *
>> + * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
>> + * @param   scale_flag      scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
>> + * @return  Returns error status. 0 - OK, !0 - error
>> + */
>> +static int decode_ics(AACContext * ac, GetBitContext * gb, int common_window, int scale_flag, SingleChannelElement * sce) {
>> +    int icoeffs[1024];
>> +    Pulse pulse;
>> +    TemporalNoiseShaping * tns = &sce->tns;
>> +    IndividualChannelStream * ics = &sce->ics;
>> +    float * out = sce->coeffs;
>> +    int global_gain;
>> +
>
>> +    pulse.present = 0;
>
> i think that should be moved closer to where it is set to a non 0 value

Done.

>> +    global_gain = get_bits(gb, 8);
>> +
>> +    if (!common_window && !scale_flag) {
>> +        if (decode_ics_info(ac, gb, 0, ics) < 0)
>> +            return -1;
>> +    }
>> +
>> +    if (decode_section(ac, gb, ics, sce->band_type, sce->band_type_run_end) < 0)
>> +        return -1;
>> +    if (decode_scalefactors(ac, gb, sce->mixing_gain, global_gain, ics, sce->band_type, sce->band_type_run_end, sce->sf) < 0)
>> +        return -1;
>> +
>> +    if (!scale_flag) {
>> +        if ((pulse.present = get_bits1(gb))) {
>> +            if (ics->window_sequence == EIGHT_SHORT_SEQUENCE) {
>> +                av_log(ac->avccontext, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
>> +                return -1;
>> +            }
>> +            decode_pulses(ac, gb, &pulse);
>> +        }
>> +        if ((tns->present = get_bits1(gb)))
>> +            decode_tns(ac, gb, ics, tns);
>> +        if (get_bits1(gb)) {
>> +            av_log(ac->avccontext, AV_LOG_ERROR, "SSR not supported.\n");
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    if (decode_spectrum(ac, gb, ics, sce->band_type, icoeffs) < 0)
>> +        return -1;
>> +    if (pulse.present)
>> +        add_pulses(ac, ics, &pulse, icoeffs);
>
> it seems pulse.present is not used by anything outside this function, if true
> it could be a normal local variable (and the field removed from the struct)

Done.

>> +    dequant(ac, ics, icoeffs, sce->band_type, sce->sf, out);
>> +    return 0;
>> +}
>> +
>
>> +/**
>> + * Mid/Side stereo decoding; reference: 4.6.8.1.3.
>> + */
>> +static void apply_mid_side_stereo(AACContext * ac, ChannelElement * cpe) {
>> +    const MidSideStereo * ms = &cpe->ms;
>> +    const IndividualChannelStream * ics = &cpe->ch[0].ics;
>> +    float *ch0 = cpe->ch[0].coeffs;
>> +    float *ch1 = cpe->ch[1].coeffs;
>> +    if (ms->present) {
>
> that if, might be prettier outside the function

I'll go through and move these outside the calls. Added to the to do
before next submission list.

> [...]
>> +/**
>> + * intensity stereo decoding; reference: 4.6.8.2.3
>> + */
>> +static void apply_intensity_stereo(AACContext * ac, ChannelElement * cpe) {
>> +    const IndividualChannelStream * ics = &cpe->ch[1].ics;
>> +    SingleChannelElement * sce1 = &cpe->ch[1];
>> +    float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
>> +    const uint16_t * offsets = ics->swb_offset;
>> +    int g, gp, i, k;
>> +    int c;
>> +    float scale;
>> +    for (g = 0; g < ics->num_window_groups; g++) {
>> +        for (gp = 0; gp < ics->group_len[g]; gp++) {
>> +            for (i = 0; i < ics->max_sfb; i++) {
>
>> +                if (sce1->band_type[g][i] == INTENSITY_BT || sce1->band_type[g][i] == INTENSITY_BT2) {
>> +                    c = -1 + 2 * (sce1->band_type[g][i] - 14);
>
> i think these could be by using band_type_run_end be moved out of the loop
> but maybe its not worth speed vs complexity wise ?

Done.

> [...]
>> +/**
>> + * Decode a channel_pair_element; reference: table 4.4.
>> + *
>> + * @param   tag Identifies the instance of a syntax element.
>> + * @return  Returns error status. 0 - OK, !0 - error
>> + */
>> +static int decode_cpe(AACContext * ac, GetBitContext * gb, int tag) {
>> +    int i, ret;
>> +    ChannelElement * cpe;
>> +
>> +    cpe = ac->che[ID_CPE][tag];
>
>> +    cpe->common_window = get_bits1(gb);
>
> This variable does not seem used outside this function thus could be a local
> variable (and the one in the struct of course removed)

Done.

> [...]
>> +/**
>> + * Parse Spectral Band Replication extension data; reference: table 4.55.
>> + *
>> + * @param   crc flag indicating the presence of CRC checksum
>> + * @param   cnt length of ID_FIL syntactic element in bytes
>> + * @return  Returns number of bytes consumed from the ID_FIL element.
>> + */
>
>> +static int sbr_extension_data(AACContext * ac, GetBitContext * gb, int crc, int cnt) {
>
> The name is not good

decode_sbr_extension() ?

>> +    // TODO : sbr_extension implementation
>> +    av_log(ac->avccontext, AV_LOG_DEBUG, "aac: SBR not yet supported.\n");
>> +    skip_bits_long(gb, 8*cnt - 4); // -4 due to reading extension type
>> +    return cnt;
>> +}
>> +
>
>> +/**
>> + * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
>> + *
>> + * @return  Returns number of bytes consumed.
>> + */
>
>> +static int excluded_channels(AACContext * ac, GetBitContext * gb) {
>
> this one neither

Unless you would be happy with decode_excluded_channels(), which I
think is ambiguous, I'm not sure what to suggest without adding extra
grammar. decode_channels_to_be_excluded_from_drc()?
decode_drc_channel_exclusions()?

>> +    int i;
>> +    int n = 1;
>> +    int num_excl_chan = 7;
>> +
>> +    for (i = 0; i < 7; i++)
>> +         ac->che_drc.exclude_mask[i] = get_bits1(gb);
>> +
>> +    while (n <= MAX_CHANNELS && num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb)) {
>> +        ac->che_drc.additional_excluded_chns[n-1]=1;
>> +        for (i = num_excl_chan; i < num_excl_chan+7; i++)
>> +            ac->che_drc.exclude_mask[i] = get_bits1(gb);
>> +        n++;
>> +        num_excl_chan += 7;
>> +    }
>
> the for and while can maybe be merged

How about this?

Index: aac.c
===================================================================
--- aac.c       (revision 2918)
+++ aac.c       (working copy)
@@ -279,7 +279,7 @@
     int dyn_rng_sgn[17];                            ///< DRC sign
information; 0 - positive, 1 - negative
     int dyn_rng_ctl[17];                            ///< DRC
magnitude information
     int exclude_mask[MAX_CHANNELS];                 ///< Channels to
be excluded from DRC processing.
-    int additional_excluded_chns[MAX_CHANNELS];     /**< The
exclude_mask bits are
+    int additional_excluded_chns[MAX_CHANNELS / 7]; /**< The
exclude_mask bits are
                                                         coded in
groups of 7 with 1 bit preceeding each group (except the first)
                                                         indicating
that 7 more mask bits are coded. */
     int band_incr;                                  ///< Number of
DRC bands greater than 1 having DRC info.
@@ -1607,21 +1607,13 @@
  * @return  Returns number of bytes consumed.
  */
 static int excluded_channels(AACContext * ac, GetBitContext * gb) {
-    int i;
-    int n = 1;
-    int num_excl_chan = 7;
+    int i, n;

-    for (i = 0; i < 7; i++)
-         ac->che_drc.exclude_mask[i] = get_bits1(gb);
+    for (i=0, n=0; i < MAX_CHANNELS && (((i+1)%7) ||
(ac->che_drc.additional_excluded_chns[n++] = get_bits1(gb))); i++)
+        ac->che_drc.exclude_mask[i] = get_bits1(gb);

-    while (n <= MAX_CHANNELS && num_excl_chan < MAX_CHANNELS - 7 &&
get_bits1(gb)) {
-        ac->che_drc.additional_excluded_chns[n-1]=1;
-        for (i = num_excl_chan; i < num_excl_chan+7; i++)
-            ac->che_drc.exclude_mask[i] = get_bits1(gb);
-        n++;
-        num_excl_chan += 7;
-    }
-    return n;
+    skip_bits(gb, 7 - (i + !(i == MAX_CHANNELS)) % 7);
+    return n + 1;
 }

 /**

I'm pretty sure it's right but it's untested. It would be handy if we
had a set of _feature documented_ samples that allow testing of every
feature/tool. As it is, I don't know which files use DRC to test, even
if DRC is unused at the moment.

>> +    return n;
>> +}
>> +
>> +/**
>> + * Decode dynamic range information; reference: table 4.52.
>> + *
>> + * @param   cnt length of ID_FIL syntactic element in bytes
>> + * @return  Returns number of bytes consumed.
>> + */
>
>> +static int dynamic_range_info(AACContext * ac, GetBitContext * gb, int cnt) {
>
> This function name is not good

decode_dynamic_range() ?

>> +    int n = 1;
>> +    int drc_num_bands = 1;
>> +    int i;
>> +
>> +    /* pce_tag_present? */
>> +    if(get_bits1(gb)) {
>> +        ac->che_drc.pce_instance_tag  = get_bits(gb, 4);
>> +        skip_bits(gb, 4); // tag_reserved_bits
>> +        n++;
>> +    }
>> +
>> +    /* excluded_chns_present? */
>> +    if(get_bits1(gb)) {
>> +        n += excluded_channels(ac, gb);
>> +    }
>> +
>> +    /* drc_bands_present? */
>> +    if (get_bits1(gb)) {
>
>> +        ac->che_drc.band_incr = get_bits(gb, 4);
>> +        ac->che_drc.interpolation_scheme = get_bits(gb, 4);
>
> vertical align

Done.

> [...]
>> +/**
>> + * Parse extension data (incomplete); reference: table 4.51.
>> + *
>> + * @param   cnt length of ID_FIL syntactic element in bytes
>> + */
>> +static int extension_payload(AACContext * ac, GetBitContext * gb, int cnt) {
>> +    int i = 0;
>> +    int res = cnt;
>> +    switch (get_bits(gb, 4)) { // extension type
>> +        case EXT_SBR_DATA_CRC:
>> +            i++;
>> +        case EXT_SBR_DATA:
>> +            res = sbr_extension_data(ac, gb, i, cnt);
>
> i is not an appropriate variable name for the existens of a crc.

Changed to 'crc_flag'.

> [...]
>> +/**
>> + * Apply dependent channel coupling.
>> + *
>> + * @param   index   which gain to use for coupling
>> + */
>> +static void dependent_coupling(AACContext * ac, ChannelElement * cc, SingleChannelElement * sce, int index) {
>
> The doxy already has a pretty good name, the function though does not.

apply_dependent_coupling() or apply_dependent_channel_coupling()?
(Same for independent_coupling().)

>> +    IndividualChannelStream * ics = &cc->ch[0].ics;
>> +    const uint16_t * offsets = ics->swb_offset;
>> +    float * dest = sce->coeffs;
>
>> +    float * src = cc->ch[0].coeffs;
>
> nitpick: const

Done.

> [...]
>
>> +
>> +/**
>> + * Apply independent channel coupling.
>> + *
>> + * @param   index   which gain to use for coupling
>> + */
>> +static void independent_coupling(AACContext * ac, ChannelElement * cc, SingleChannelElement * sce, int index) {
>> +    int i;
>> +    float gain = cc->coup.gain[index][0][0] * sce->mixing_gain;
>> +    for (i = 0; i < 1024; i++)
>> +        sce->ret[i] += gain * (cc->ch[0].ret[i] - ac->add_bias);
>> +}
>
> This one is applied to ret the previous coupling func to coeffs, why?
> (yes this should be documented if its correct and intended)

Dependent coupling is applied before IMDCT and windowing hence it is
applied to coeffs. Independent coupling is applied after, hence it is
applied to ret. This can be seen in the calls within the
spectral_to_sample() function but I have added appropriate comments.

>> +
>> +/**
>> + * channel coupling transformation interface
>> + *
>> + * @param   index   which gain to use for coupling
>> + */
>> +static void apply_channel_coupling(AACContext * ac, ChannelElement * cc,
>> +        void (*apply_coupling_method)(AACContext * ac, ChannelElement * cc, SingleChannelElement * sce, int index))
>
> how does a index select a gain?

It seems that there is either one (L/R/shared) or two (L+R
independent) gain_element lists coded for each CCE and these are
parsed into ChannelCoupling.gain[index][][] with index being
incremented with each list. So rather than being 'which gain to use'
it should be 'index into gain array' or something like that.

> and the oter parameters should be documented as well

Is it really necessary to document ac and cc?

>> +{
>> +    int c;
>> +    int index = 0;
>> +    ChannelCoupling * coup = &cc->coup;
>> +    for (c = 0; c <= coup->num_coupled; c++) {
>> +        if (     !coup->is_cpe[c] && ac->che[ID_SCE][coup->tag_select[c]]) {
>> +            apply_coupling_method(ac, cc, &ac->che[ID_SCE][coup->tag_select[c]]->ch[0], index++);
>> +        } else if(coup->is_cpe[c] && ac->che[ID_CPE][coup->tag_select[c]]) {
>> +            if (!coup->l[c] && !coup->r[c]) {
>> +                apply_coupling_method(ac, cc, &ac->che[ID_CPE][coup->tag_select[c]]->ch[0], index);
>> +                apply_coupling_method(ac, cc, &ac->che[ID_CPE][coup->tag_select[c]]->ch[1], index++);
>> +            }
>> +            if (coup->l[c])
>> +                apply_coupling_method(ac, cc, &ac->che[ID_CPE][coup->tag_select[c]]->ch[0], index++);
>> +            if (coup->r[c])
>> +                apply_coupling_method(ac, cc, &ac->che[ID_CPE][coup->tag_select[c]]->ch[1], index++);
>> +        } else {
>> +            av_log(ac->avccontext, AV_LOG_ERROR,
>> +                   "coupling target %sE[%d] not available\n",
>> +                   coup->is_cpe[c] ? "CP" : "SC", coup->tag_select[c]);
>> +            break;
>
> shouldnt this be return -1 ?

There's no such error check in the spec so it would continue through
the loop having done nothing in the iteration.

>> +        }
>> +    }
>> +}
>> +
>
>> +/**
>> + * Convert spectral data to float samples, applying all supported tools as appropriate.
>> + */
>> +static int spectral_to_sample(AACContext * ac) {
>> +    int i, j;
>> +    for (i = 0; i < MAX_TAGID; i++) {
>> +        for(j = 0; j < 4; j++) {
>> +            ChannelElement *che = ac->che[j][i];
>> +            if(che) {
>> +                if(j == ID_CCE && !che->coup.is_indep_coup && (che->coup.domain == 0))
>> +                    apply_channel_coupling(ac, che, dependent_coupling);
>> +                if(               che->ch[0].tns.present)
>> +                    apply_tns(ac, 1, &che->ch[0], che->ch[0].coeffs);
>
>> +                if(j == ID_CPE && che->ch[1].tns.present)
>> +                    apply_tns(ac, 1, &che->ch[1], che->ch[1].coeffs);
>
> is the check for ID_CPE needed here? if so why is tns.present for it non
> zero if its not applied?

Correct. Removed.

> [...]
>> +/**
>> + * Conduct matrix mix-down and float to int16 conversion.
>> + *
>> + * @param   data        pointer to output data
>> + * @param   data_size   output data size in bytes
>> + * @return  Returns error status. 0 - OK, !0 - error
>> + */
>> +static int output_samples(AVCodecContext * avccontext, uint16_t * data, int * data_size) {
>
> the name of the function and the 1 line description in the doxy do really
> describe seperate things.

mixdown_and_convert_to_int16()?

>> +    AACContext * ac = avccontext->priv_data;
>> +    int i;
>> +    float *c, *l, *r, *sl, *sr, *out;
>> +
>> +    if (!ac->is_saved) {
>> +        ac->is_saved = 1;
>> +        *data_size = 0;
>> +        return 0;
>> +    }
>> +
>
>> +    i = 1024 * avccontext->channels * sizeof(int16_t);
>> +    if(*data_size < i)
>> +        return -1;
>
> some error message would be nice

Done.

Rob




More information about the ffmpeg-devel mailing list