[FFmpeg-devel] [PATCH] AAC Decoder - Round 2.

Fri Jun 20 13:09:29 CEST 2008

On Thu, Jun 19, 2008 at 04:22:57PM +0100, Robert Swain wrote:
> Hello,
> 
> I would like to request another full review of the AAC decoder from
> the soc repository as pretty much everything that was addressable has
> been addressed. Though I do expect there will be more things to clean
> up.
> 
> See attached.
[...]

> +static const uint32_t inv_sampling_table[] = { 92017, 75132, 55426, 46009, 37655, 27713, 23004, 18783, 13856, 11502, 9391 };

unused

[...]
> +static const float ltp_coef[] = {
> +    0.570829, 0.696616, 0.813004, 0.911304, 0.984900, 1.067894, 1.194601, 1.369533
> +};

the are alays multiplied by -2, maybe that could be merged in the table

[...]
> +/**
> + * Audio Object Types
> + */
> +enum {
> +    AOT_NULL = 0x0,
> +    AOT_AAC_MAIN,
> +    AOT_AAC_LC,
> +    AOT_AAC_SSR,
> +    AOT_AAC_LTP,
> +    AOT_SBR,
> +    AOT_AAC_SCALABLE,
> +    AOT_TWINVQ,
> +    AOT_CELP,
> +    AOT_HVXC,
> +    AOT_TTSI = 12,
> +    AOT_MAINSYNTH,
> +    AOT_WAVESYNTH,
> +    AOT_MIDI,
> +    AOT_SAFX,
> +    AOT_ER_AAC_LC,
> +    AOT_ER_AAC_LTP = 19,
> +    AOT_ER_AAC_SCALABLE,
> +    AOT_ER_TWINVQ,
> +    AOT_ER_BSAC,
> +    AOT_ER_AAC_LD,
> +    AOT_ER_CELP,
> +    AOT_ER_HVXC,
> +    AOT_ER_HILN,
> +    AOT_ER_PARAM,
> +    AOT_SSC
> +};

please vartically align the "= xy" they are easy to miss in there

[..]
> +/**
> + * Window sequences
> + */
> +enum {
> +    ONLY_LONG_SEQUENCE = 0,
> +    LONG_START_SEQUENCE,
> +    EIGHT_SHORT_SEQUENCE,
> +    LONG_STOP_SEQUENCE
> +};

this should have its own type and the variables holding it should also be
of that type (possibly applies to other enums as well but not to all, i think
one was just used as array index)

[...]
> +/**
> + * Program config. This describes how channels are arranged.
> + *
> + * Either read from stream (ID_PCE) or created based on a default
> + * fixed channel arrangement.
> + */
> +typedef struct {
> +    int che_type[4][MAX_TAGID];   ///< Channel Element type with the first index the first 4 raw_data_block IDs
> +
> +    int mono_mixdown;         ///< The SCE tag to use if user requests mono output,   -1 if not available
> +    int stereo_mixdown;       ///< The CPE tag to use if user requests stereo output, -1 if not available
> +    int mixdown_coeff_index;  ///< 0-3
> +    int pseudo_surround;      ///< Mix surround channels out of phase
> +
> +} program_config_struct;

i would drop the _struct
one can always write "struct program_config" or ProgramConfig
other structs in lav* dont have struct in their names either ...

> +
> +
> +/**
> + * Long Term Prediction
> + */
> +#define MAX_LTP_LONG_SFB 40
> +typedef struct {
> +    int present;
> +    int lag;
> +    float coef;
> +    int used[MAX_LTP_LONG_SFB];
> +} ltp_struct;

isnt !!coef == present ?

> +
> +/**
> + * Individual Channel Stream
> + */
> +typedef struct {

> +    int intensity_present;

This seems to be used to prevent intensity_tool() from being run if its not
used anywhere, I do not think this is efficient.
instead of

if(a.present)
    for(i){
        if(tab[i] == a)
            do something
    }

if(b.present)
    for(i){
        if(tab[i] == b)
            do something
    }

the following looks simpler

    for(i){
        if(tab[i] == a)
            do something
        else if((tab[i] == b)
            do something
    }

or switch/case instead of if/else

Of course above is just a suggestion, i want the fastest and simplest code!
But i think quite a bit of these specific "tools" could be merged into a
single loop. the do_something itself could of course be a sperate function
for clarity ...

> +    int max_sfb;

whats a sfb?

> +    int window_sequence;

> +    int window_shape;             ///< If set, use Kaiser-Bessel window, otherwise use a sinus window

why not name it then use_kb_window ?
same for the other window_shape vars
also the other fields in the struct may benefit from comments (if you know
good explanatory comments)

> +    int window_shape_prev;
> +    int num_window_groups;

> +    uint8_t grouping;

local var is enough for this one

> +    uint8_t group_len[8];
> +    ltp_struct ltp;
> +    ltp_struct ltp2;
> +    const uint16_t *swb_offset;
> +    int num_swb;
> +    int num_windows;
> +    int tns_max_bands;
> +} ics_struct;
> +
> +/**
> + * Temporal Noise Shaping
> + */
> +typedef struct {
> +    int present;
> +    int n_filt[8];
> +    int length[8][4];
> +    int direction[8][4];
> +    int order[8][4];

> +    const float *tmp2_map[8][4];

maybe the question is silly but why tmp2 and not tmp ?

[...]
> +/**
> + * Dyanmic Range Control
> + *
> + * DRC is decoded from bitstream but not further processed.
> + */
> +typedef struct {
> +    int pce_instance_tag;
> +    int drc_tag_reserved_bits;
> +    int dyn_rng_sgn[17];
> +    int dyn_rng_ctl[17];
> +    int exclude_mask[MAX_CHANNELS];
> +    int additional_excluded_chns[MAX_CHANNELS];
> +    int drc_band_incr;
> +    int drc_interpolation_scheme;
> +    int drc_band_top[17];
> +    int prog_ref_level;
> +    int prog_ref_level_reserved_bits;
> +} drc_struct;

does it make sense to prefix variables in a drc_struct with drc_ ?
and dont forget to remove unused fields after it is "further processed"

> +
> +/**
> + * Pulse tool
> + */
> +typedef struct {
> +    int present;

> +    int num_pulse_minus1;

why minus1 ? I think its cleaner to store num_pulse
also present and num_pulse_minus1 look redundant

> +    int start;
> +    int offset[4];

start and offset[0] also are redundant, offset[0] is enough

> +    int amp[4];
> +} pulse_struct;

> +
> +/**
> + * Parameters for the SSR Inverse Polyphase Quadrature Filter
> + */
> +typedef struct {
> +    float q[4][4];
> +    float t0[4][12];
> +    float t1[4][12];
> +} ssr_context;

This looks like it should be a static const thingy not a per instance struct

> +
> +/**
> + * Per-element gain control for SSR
> + */
> +typedef struct {

> +    int max_band;
> +    int adjust_num[4][8];

not used outside a single function

> +    int alev[4][8][8];
> +    int aloc[4][8][8];

written and never read

[...]
> +/**
> + * Main AAC context
> + */
> +typedef struct {
> +    AVCodecContext * avccontext;
> +
> +    MPEG4AudioConfig m4ac;
> +
> +    int is_saved;                 ///< Set if elements have stored overlap from previous frame.
> +    drc_struct che_drc;
> +
> +    /**
> +     * @defgroup elements
> +     * @{
> +     */
> +    program_config_struct pcs;
> +    che_struct * che[4][MAX_TAGID];
> +    /** @} */
> +
> +    /**
> +     * @defgroup temporary Aligned temporary buffers (We do not want to have these on stack)
> +     * @{
> +     */
> +    DECLARE_ALIGNED_16(float, buf_mdct[2048]);
> +    DECLARE_ALIGNED_16(float, revers[1024]);
> +    /** @} */
> +
> +    /**
> +     * @defgroup tables   Computed / setup during init
> +     * @{
> +     */

> +    VLC mainvlc;
> +    VLC books[11];

Should be static not in the context and use INIT_VLC_STATIC!

> +    MDCTContext mdct;
> +    MDCTContext mdct_small;
> +    MDCTContext *mdct_ltp;
> +    DSPContext dsp;

> +    int * vq[11];

also looks like it should not be in the context

[...]
> +static void vector_fmul_add_add_add(AACContext * ac, float * dst, const float * src0, const float * src1, const float * src2, const float * src3, float src4, int len) {
> +    int i;
> +    ac->dsp.vector_fmul_add_add(dst, src0, src1, src2, src4, len, 1);
> +    for (i = 0; i < len; i++)
> +        dst[i] += src3[i];
> +}

why does AAC need this and other codecs not?
(iam askng as my gut says there might be a bug but i could be wrong, i didnt
 investigate this too carefull but IIRC there was some comment somewhere in
 the src about short windows having artifacts and its strange that other 
 codecs do not need above)

> +
> +// aux
> +/**
> + * Generate a sine Window.
> + */
> +static void sine_window_init(float *window, int n) {
> +    const float alpha = M_PI / n;
> +    int i;
> +    for(i = 0; i < n/2; i++)
> +        window[i] = sin((i + 0.5) * alpha);
> +}

greping for sine_window finds some matches in lavc, is this and the table
duplicated?

[...]
> +/**
> + * Configure output channel order and optional mixing based on the current
> + * program config element and user requested channels.
> + *
> + * \param nwepcs New program config struct. We only do somthing if it differs from the current one.

nwepcs

[...]
> +    /* Allocate or free elements depending on if they are in the
> +       current program config struct */
> +
> +    for(i = 0; i < MAX_TAGID; i++) {
> +        channels += !!pcs->che_type[ID_SCE][i] + !!pcs->che_type[ID_CPE][i] * 2 + !!pcs->che_type[ID_LFE][i];
> +
> +        for(j = 0; j < 4; j++) {
> +            if(pcs->che_type[j][i] && !ac->che[j][i]) {
> +                ac->che[j][i] = av_mallocz(sizeof(che_struct));
> +            } else
> +                che_freep(&ac->che[j][i]);
> +        }
> +    }
> +
> +    /* Setup default 1:1 output mapping.
> +     *
> +     * For a 5.1 stream the output order will be:
> +     *    [ Front Left ] [ Front Right ] [ Center ] [ LFE ] [ Surround Left ] [ Surround Right ]
> +     *
> +     * While at it: locate front, center and back for matrix mixdown further down
> +     */
> +
> +    ch = 0;
> +    for(i = 0; i < MAX_TAGID; i++) {
> +        for(j = 0; j < 4; j++) {
> +            if(j != ID_CCE && pcs->che_type[j][i]) {
> +                ac->output_data[ch++] = ac->che[j][i]->ch[0].ret;
> +                ac->che[j][i]->ch[0].mixing_gain = 1.0f;
> +                if(j == ID_CPE) {
> +                    ac->output_data[ch++] = ac->che[j][i]->ch[1].ret;
> +                    ac->che[j][i]->ch[1].mixing_gain = 1.0f;
> +                    if(!mixdown[MIXDOWN_FRONT] && pcs->che_type[j][i] == AAC_CHANNEL_FRONT)
> +                        mixdown[MIXDOWN_FRONT] = ac->che[j][i];
> +                    if(!mixdown[MIXDOWN_BACK ] && pcs->che_type[j][i] == AAC_CHANNEL_BACK)
> +                        mixdown[MIXDOWN_BACK ] = ac->che[j][i];
> +                }
> +                if(j == ID_SCE && !mixdown[MIXDOWN_CENTER] && pcs->che_type[j][i] == AAC_CHANNEL_FRONT)
> +                    mixdown[MIXDOWN_CENTER] = ac->che[j][i];
> +            }
> +        }
> +    }
> +    assert(ch == channels);

if they match they shouldnt be calculated twice, also the 2 loops can be
merged i think

[...]
> +            if(avctx->request_channels == 2) {
> +                b = 1. / (1. + (1. / M_SQRT2) + a * (pcs->pseudo_surround ? 2. : 1.));
> +                mixdown[MIXDOWN_CENTER]->ch[0].mixing_gain      = b / M_SQRT2;
> +            } else {
> +                b = 1. / (3. + 2. * a);
> +                mixdown[MIXDOWN_CENTER]->ch[0].mixing_gain      = b;
> +            }

> +            mixdown[MIXDOWN_FRONT]->ch[0].mixing_gain = b;
> +            mixdown[MIXDOWN_FRONT]->ch[1].mixing_gain = b;
> +            mixdown[MIXDOWN_BACK ]->ch[0].mixing_gain  = b * a;
> +            mixdown[MIXDOWN_BACK ]->ch[1].mixing_gain  = b * a;

interresting vertical (mis)alignment

[...]
> +    program_config_element_parse_tags(gb, pcs.che_type[ID_CPE], pcs.che_type[ID_SCE], num_front, AAC_CHANNEL_FRONT);
> +    program_config_element_parse_tags(gb, pcs.che_type[ID_CPE], pcs.che_type[ID_SCE], num_side,  AAC_CHANNEL_SIDE );
> +    program_config_element_parse_tags(gb, pcs.che_type[ID_CPE], pcs.che_type[ID_SCE], num_back,  AAC_CHANNEL_BACK );
> +    program_config_element_parse_tags(gb, NULL,                 pcs.che_type[ID_LFE], num_lfe,   AAC_CHANNEL_LFE  );
> +
> +    skip_bits_long(gb, 4 * num_assoc_data);
> +

> +    for (i = 0; i < num_cc; i++) {
> +        skip_bits1(gb);    // cc_ind_sw
> +        pcs.che_type[ID_CCE][get_bits(gb, 4)] = AAC_CHANNEL_CC;
> +    }

duplicate of program_config_element_parse_tags()

[...9
> +        // windows init
> +        ff_kbd_window_init(kbd_long_1024, 4.0, 256);
> +        ff_kbd_window_init(kbd_short_128, 6.0, 32);
> +        sine_window_init(sine_long_1024, 512);
> +        sine_window_init(sine_short_128, 64);
> +        ssr_context_init(&ac->ssrctx);
> +    } else {
> +#endif /* AAC_SSR */
> +        ff_mdct_init(&ac->mdct, 11, 1);
> +        ff_mdct_init(&ac->mdct_small, 8, 1);
> +        // windows init
> +        ff_kbd_window_init(kbd_long_1024, 4.0, 1024);
> +        ff_kbd_window_init(kbd_short_128, 6.0, 128);
> +        sine_window_init(sine_long_1024, 2048);
> +        sine_window_init(sine_short_128, 256);

looks like you init static tables to values from the stream ...

> +#ifdef AAC_SSR
> +    }
> +#endif /* AAC_SSR */

> +    for (i = 0; i < 128; i++) {
> +        sine_short_128[i] *= 8.;
> +        kbd_short_128[i] *= 8.;
> +    }

not thread safe, you cannot store wrong values in static tables and then
correct them.

> +    return 0;
> +}
> +
> +// Parsers implementation
> +
> +/**
> + * Decode a data_stream_element
> + * reference: Table 4.10
> + */
> +static int data_stream_element(AACContext * ac, GetBitContext * gb, int id) {
> +    int byte_align = get_bits1(gb);

> +    int count = get_bits(gb, 8);
> +    if (count == 255)
> +        count += get_bits(gb, 8);

without looking at the spec shouldnt that be a while(==255) ?

> +    if (byte_align)
> +        align_get_bits(gb);
> +    skip_bits_long(gb, 8 * count);
> +    return 0;

and when something always returns 0 it doesnt need to return int at all
void is fine ...

[...]
> +
> +/**
> + * Decode Individual Channel Stream info
> + * reference: table 4.6
> + */
> +static int decode_ics_info(AACContext * ac, GetBitContext * gb, int common_window, ics_struct * ics) {
> +    if (get_bits1(gb)) {
> +        av_log(ac->avccontext, AV_LOG_ERROR, "Reserved bit set\n");
> +        return -1;
> +    }
> +    ics->window_sequence = get_bits(gb, 2);

> +    ics->window_shape_prev = ics->window_shape;
> +    ics->window_shape = get_bits1(gb);
> +    if (ics->window_shape_prev == -1)
> +        ics->window_shape_prev = ics->window_shape;

have i missed the place that sets it to -1 ?

> +    ics->num_window_groups = 1;
> +    ics->group_len[0] = 1;
> +    if (ics->window_sequence == EIGHT_SHORT_SEQUENCE) {
> +        int i;
> +        ics->max_sfb = get_bits(gb, 4);
> +        ics->grouping = get_bits(gb, 7);
> +        for (i = 0; i < 7; i++) {
> +            if (ics->grouping & (1<<(6-i))) {
> +                ics->group_len[ics->num_window_groups-1]++;
> +            } else {
> +                ics->num_window_groups++;
> +                ics->group_len[ics->num_window_groups-1] = 1;
> +            }
> +        }
> +        ics->swb_offset = swb_offset_128[ac->m4ac.sampling_index];
> +        ics->num_swb = num_swb_128[ac->m4ac.sampling_index];

shouldnt max_sfb be checked against num_swb  here and below?

also the 2 lines above could be vertically aligned

[...]
> +/**
> + * Decode section_data payload
> + * reference: Table 4.46
> + */
> +static int decode_section_data(AACContext * ac, GetBitContext * gb, ics_struct * ics, int cb[][64]) {
> +    int g;
> +    for (g = 0; g < ics->num_window_groups; g++) {
> +        int bits = (ics->window_sequence == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
> +        int k = 0;
> +        while (k < ics->max_sfb) {
> +            int sect_len = 0;

> +            int sect_len_incr = 1;

the init looks unneeded

> +            int sect_cb = get_bits(gb, 4);
> +            if (sect_cb == 12) {
> +                av_log(ac->avccontext, AV_LOG_ERROR, "Invalid code book\n");
> +                return -1;
> +            }

> +            while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits)-1)
> +                sect_len += sect_len_incr;
> +            sect_len += sect_len_incr;

> +            sect_len += k;

sect_len could be inited to k instead of 0

> +            for (; k < sect_len && k < ics->max_sfb; k++)
> +                cb[g][k] = sect_cb;

isnt sect_len > ics->max_sfb an error ?

> +        }
> +    }
> +    return 0;
> +}
> +

> +/**
> + * Decode scale_factor_data
> + * reference: Table 4.47
> + */
> +static int decode_scale_factor_data(AACContext * ac, GetBitContext * gb, float mix_gain, unsigned int global_gain, ics_struct * ics, const int cb[][64], float sf[][64]) {
> +    int g, i, index;
> +    int offset[3] = { global_gain, global_gain - 90, 100 };
> +    int noise_flag = 1;
> +    static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
> +    ics->intensity_present = 0;
> +    for (g = 0; g < ics->num_window_groups; g++) {
> +        for (i = 0; i < ics->max_sfb; i++) {
> +            if (cb[g][i] == ZERO_HCB) {
> +                sf[g][i] = 0.;
> +                continue;
> +            }else if((cb[g][i] == INTENSITY_HCB) || (cb[g][i] == INTENSITY_HCB2)) {
> +                ics->intensity_present = 1;
> +                index = 2;
> +            }else

> +                index = cb[g][i] == NOISE_HCB ? 1 : 0;

? 1 : 0 is unnneeded

> +            if (cb[g][i] == NOISE_HCB && noise_flag-- > 0)
> +                offset[index] += get_bits(gb, 9) - 256;
> +            else
> +                offset[index] += get_vlc2(gb, ac->mainvlc.table, 7, 3) - 60;
> +            if(offset[index] > 255) {
> +                av_log(ac->avccontext, AV_LOG_ERROR,
> +                        "%s (%d) out of range", sf_str[index], offset[index]);
> +                return -1;
> +            }
> +            if(index == 2)
> +                sf[g][i] =  pow2sf_tab[-offset[index] + 300];
> +            else
> +                sf[g][i] = -pow2sf_tab[ offset[index] + ac->sf_offset];
> +            sf[g][i] *= mix_gain;
> +        }
> +    }
> +    return 0;
> +}

above is inefficient, because cb is stored as run,value expanded into
cb and then this iterates over it
it would be alot more efficient to do above as several loops for each case
with the already known run values

[...]
> +static void decode_tns_data(AACContext * ac, GetBitContext * gb, const ics_struct * ics, tns_struct * tns) {
> +    int w, filt, i, coef_len, coef_res = 0, coef_compress;
> +    for (w = 0; w < ics->num_windows; w++) {
> +        tns->n_filt[w] = get_bits(gb, ics->window_sequence == EIGHT_SHORT_SEQUENCE ? 1 : 2);
> +        if (tns->n_filt[w])
> +            coef_res = get_bits1(gb) + 3;
> +        for (filt = 0; filt < tns->n_filt[w]; filt++) {
> +            tns->length[w][filt] = get_bits(gb, ics->window_sequence == EIGHT_SHORT_SEQUENCE ? 4 : 6);
> +            if ((tns->order[w][filt] = get_bits(gb, ics->window_sequence == EIGHT_SHORT_SEQUENCE ? 3 : 5))) {
> +                tns->direction[w][filt] = get_bits1(gb);
> +                coef_compress = get_bits1(gb);
> +                coef_len = coef_res - coef_compress;

> +                tns->tmp2_map[w][filt] = tns_tmp2_map[(coef_compress << 1) + (coef_res - 3)];

2*coef_compress + coef_res - 3
looks cleaner to me

also some empty lines here ane there might help readability

[...]
> +static void decode_ms_data(AACContext * ac, GetBitContext * gb, che_struct * cpe) {
> +    ms_struct * ms = &cpe->ms;
> +    int g, i;
> +    ms->present = get_bits(gb, 2);
> +    if (ms->present == 1) {
> +        for (g = 0; g < cpe->ch[0].ics.num_window_groups; g++)
> +            for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
> +                ms->mask[g][i] = get_bits1(gb);// << i;
> +    } else if (ms->present == 2) {
> +        for (g = 0; g < cpe->ch[0].ics.num_window_groups; g++)

> +            memset(ms->mask[g], 1, cpe->ch[0].ics.max_sfb);

this is missing some sizeof()

> +    }
> +}
> +

> +/**
> + * Decode spectral data
> + * reference: Table 4.50
> + */
> +static int decode_spectral_data(AACContext * ac, GetBitContext * gb, const ics_struct * ics, const int cb[][64], int * icoef) {

int icoef[1024] makes its size clearer

> +    int i, k, g;
> +    const uint16_t * offsets = ics->swb_offset;
> +
> +    for (g = 0; g < ics->num_window_groups; g++) {
> +        for (i = 0; i < ics->max_sfb; i++) {
> +            const int cur_cb = cb[g][i];
> +            const int dim = cur_cb >= FIRST_PAIR_HCB ? 2 : 4;
> +            int group;
> +            if (cur_cb == NOISE_HCB) {
> +                for (group = 0; group < ics->group_len[g]; group++) {
> +                    for (k = offsets[i]; k < offsets[i+1]; k++)
> +                        icoef[group*128+k] = av_random(&ac->random_state) & 0x0000FFFF;
> +                }
> +            }else if (cur_cb == ZERO_HCB) {
> +                for (group = 0; group < ics->group_len[g]; group++) {
> +                    memset(icoef + group * 128 + offsets[i], 0, (offsets[i+1] - offsets[i])*sizeof(int));
> +                }
> +            }else if (cur_cb != INTENSITY_HCB2 && cur_cb != INTENSITY_HCB) {
> +                for (group = 0; group < ics->group_len[g]; group++) {
> +                    for (k = offsets[i]; k < offsets[i+1]; k += dim) {
> +                        const int index = get_vlc2(gb, ac->books[cur_cb - 1].table, 6, 3);

> +                        const int *ptr = &ac->vq[cur_cb - 1][index * dim], coef_idx = (group << 7) + k;

vq_ptr maybe, ptr confused me for a moment until i saw the line above

> +                        int j;
> +                        if (index == -1) {
> +                            av_log(ac->avccontext, AV_LOG_ERROR, "Error in spectral data\n");
> +                            return -1;
> +                        }

can that happen at all?

> +                        for (j = 0; j < dim; j++) icoef[coef_idx + j] = 1;

more \n please

[...]
> +static void quant_to_spec_tool(AACContext * ac, const ics_struct * ics, const int * icoef, const int cb[][64], const float sf[][64], float * coef) {
> +    const uint16_t * offsets = ics->swb_offset;
> +    int g, i, group, k;
> +

> +    if(ics->window_sequence == EIGHT_SHORT_SEQUENCE) {
> +        for(g = 0; g < 8; g++)
> +            memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float)*(128 - offsets[ics->max_sfb]));
> +    } else {
> +        memset(coef + offsets[ics->max_sfb], 0, sizeof(float)*(1024 - offsets[ics->max_sfb]));
> +    }

for(g = 0; g < ics->num_windows; g++)
    memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float)*(C - offsets[ics->max_sfb]));

> +
> +    for (g = 0; g < ics->num_window_groups; g++) {
> +        for (i = 0; i < ics->max_sfb; i++) {
> +            if (cb[g][i] == NOISE_HCB) {
> +                for (group = 0; group < ics->group_len[g]; group++) {
> +                    float energy = 0;
> +                    float scale = 1.;// / (float)(offsets[i+1] - offsets[i]);
> +                    for (k = offsets[i]; k < offsets[i+1]; k++)
> +                        energy += (float)icoef[group*128+k] * icoef[group*128+k];
> +                    scale *= sf[g][i] / sqrt(energy);

are you sure that the random values have to be normalized like that?
I suspect energy is supposed tp be a constant.

[...]
> +static void tns_filter_tool(AACContext * ac, int decode, sce_struct * sce, float * coef) {
> +    const ics_struct * ics = &sce->ics;
> +    const tns_struct * tns = &sce->tns;
> +    const int mmm = FFMIN(ics->tns_max_bands,  ics->max_sfb);
> +    int w, filt, m, i, ib;
> +    int bottom, top, order, start, end, size, inc;
> +    float tmp;
> +    float lpc[TNS_MAX_ORDER + 1], b[2 * TNS_MAX_ORDER];
> +
> +    for (w = 0; w < ics->num_windows; w++) {
> +        bottom = ics->num_swb;
> +        for (filt = 0; filt < tns->n_filt[w]; filt++) {
> +            top = bottom;
> +            bottom = FFMAX(0, top - tns->length[w][filt]);
> +            order = FFMIN(tns->order[w][filt], TNS_MAX_ORDER);
> +            if (order == 0)
> +                continue;
> +
> +            // tns_decode_coef
> +            lpc[0] = 1;
> +            for (m = 1; m <= order; m++) {
> +                lpc[m] = tns->tmp2_map[w][filt][tns->coef[w][filt][m - 1]];
> +                for (i = 1; i < m; i++)
> +                    b[i] = lpc[i] + lpc[m] * lpc[m-i];
> +                for (i = 1; i < m; i++)
> +                    lpc[i] = b[i];
> +            }
> +

> +            start = ics->swb_offset[FFMIN(bottom, mmm)];
> +            end = ics->swb_offset[FFMIN(top, mmm)];

vertical alig

[...]
> +#ifdef AAC_LTP
> +static void window_ltp_tool(AACContext * ac, sce_struct * sce, float * in, float * out) {
> +    ics_struct * ics = &sce->ics;
> +    const float * lwindow      = ics->window_shape      ? kbd_long_1024 : sine_long_1024;
> +    const float * swindow      = ics->window_shape      ? kbd_short_128 : sine_short_128;
> +    const float * lwindow_prev = ics->window_shape_prev ? kbd_long_1024 : sine_long_1024;
> +    const float * swindow_prev = ics->window_shape_prev ? kbd_short_128 : sine_short_128;
> +    float * buf = ac->buf_mdct;
> +    int i;
> +    assert(ics->window_sequence != EIGHT_SHORT_SEQUENCE);
> +    if (!ac->mdct_ltp) {
> +        ac->mdct_ltp = av_malloc(sizeof(MDCTContext));
> +        ff_mdct_init(ac->mdct_ltp, 11, 0);
> +    }
> +    if (ics->window_sequence != LONG_STOP_SEQUENCE) {
> +        vector_fmul_dst(ac, buf, in, lwindow_prev, 1024);
> +    } else {
> +        memset(buf, 0, 448 * sizeof(float));
> +        for (i = 448; i < 576; i++) in[i] *= 0.125; // normalize
> +        vector_fmul_dst(ac, buf + 448, in + 448, swindow_prev, 128);
> +        memcpy(buf + 576, in + 576, 448 * sizeof(float));
> +    }

> +    if (ics->window_sequence != LONG_START_SEQUENCE) {
> +        ac->dsp.vector_fmul_reverse(buf + 1024, in + 1024, lwindow, 1024);
> +    } else {
> +        memcpy(buf + 1024, in + 1024, 448 * sizeof(float));
> +        for (i = 448; i < 576; i++) in[i + 1024] *= 0.125; // normalize
> +        ac->dsp.vector_fmul_reverse(buf + 1024 + 448, in + 1024 + 448, swindow, 128);
> +        memset(buf + 1024 + 576, 0, 448 * sizeof(float));
> +    }
> +     ff_mdct_calc(ac->mdct_ltp, out, buf, in); // using in as buffer for mdct

indention

> +}
> +
> +static void ltp_trans(AACContext * ac, sce_struct * sce) {
> +    const ltp_struct * ltp = &sce->ics.ltp;
> +    const uint16_t * offsets = sce->ics.swb_offset;
> +    int i, sfb;
> +    if (!ltp->present)
> +        return;
> +    if (!sce->ltp_state)
> +        sce->ltp_state = av_mallocz(4 * 1024 * sizeof(int16_t));
> +    if (sce->ics.window_sequence != EIGHT_SHORT_SEQUENCE && ac->is_saved) {
> +        float x_est[2 * 1024], X_est[2 * 1024];
> +        for (i = 0; i < 2 * 1024; i++)

> +            x_est[i] = (float)sce->ltp_state[i + 2 * 1024 - ltp->lag] * ltp->coef;

useless cast

[...]
> +static void window_trans(AACContext * ac, sce_struct * sce) {
> +    ics_struct * ics = &sce->ics;
> +    float * in = sce->coeffs;
> +    float * out = sce->ret;
> +    float * saved = sce->saved;

> +    const float * lwindow      = ics->window_shape      ? kbd_long_1024 : sine_long_1024;
> +    const float * swindow      = ics->window_shape      ? kbd_short_128 : sine_short_128;
> +    const float * lwindow_prev = ics->window_shape_prev ? kbd_long_1024 : sine_long_1024;
> +    const float * swindow_prev = ics->window_shape_prev ? kbd_short_128 : sine_short_128;

duplicate 

[...]

> +#ifdef AAC_SSR
> +static void window_ssr_tool(AACContext * ac, sce_struct * sce, float * in, float * out) {
> +    ics_struct * ics = &sce->ics;
> +    const float * lwindow      = ics->window_shape      ? kbd_long_1024 : sine_long_1024;
> +    const float * swindow      = ics->window_shape      ? kbd_short_128 : sine_short_128;
> +    const float * lwindow_prev = ics->window_shape_prev ? kbd_long_1024 : sine_long_1024;
> +    const float * swindow_prev = ics->window_shape_prev ? kbd_short_128 : sine_short_128;
> +    float * buf = ac->buf_mdct;
> +    if (ics->window_sequence != EIGHT_SHORT_SEQUENCE) {
> +        int i;
> +        ff_imdct_calc(&ac->mdct, buf, in, out);
> +        if (ics->window_sequence != LONG_STOP_SEQUENCE) {
> +            vector_fmul_dst(ac, out, buf, lwindow_prev, 256);
> +        } else {
> +            memset(out, 0, 112 * sizeof(float));
> +            for (i = 112; i < 144; i++) buf[i] *= 0.125; // normalize
> +            vector_fmul_dst(ac, out + 112, buf + 112, swindow_prev, 32);
> +            memcpy(out + 144, buf + 144, 112 * sizeof(float));
> +        }
> +        if (ics->window_sequence != LONG_START_SEQUENCE) {
> +            ac->dsp.vector_fmul_reverse(out + 256, buf + 256, lwindow, 256);
> +        } else {
> +            memcpy(out + 256, buf + 256, 112 * sizeof(float));
> +            for (i = 112; i < 144; i++) buf[i + 256] *= 0.125; // normalize
> +            ac->dsp.vector_fmul_reverse(out + 256 + 112, buf + 256 + 112, swindow, 32);
> +            memset(out + 144, 0, 112 * sizeof(float));
> +        }

this stuff looks near duplicated as well

[...]
> +static void ssr_gain_tool(AACContext * ac, sce_struct * sce, int band, float * in, float * preret, float * saved) {
> +    // TODO: 'in' buffer gain normalization
> +    if (sce->ics.window_sequence != EIGHT_SHORT_SEQUENCE) {
> +        vector_add_dst(ac, preret, in, saved, 256);
> +        memcpy(saved, in + 256, 256 * sizeof(float));
> +    } else {
> +        memcpy(preret, saved, 112 * sizeof(float));
> +        preret += 112; saved += 112;
> +        vector_add_dst(ac, preret, in, saved, 32);
> +        vector_add_dst(ac, preret + 1*32, in + 0*64 + 32, in + 1*64, 32);
> +        vector_add_dst(ac, preret + 2*32, in + 1*64 + 32, in + 2*64, 32);
> +        vector_add_dst(ac, preret + 3*32, in + 2*64 + 32, in + 3*64, 32);
> +        vector_add_dst(ac, preret + 4*32, in + 3*64 + 32, in + 4*64, 16);
> +

> +        vector_add_dst(ac, saved, in + 3*64 + 32 + 16, in + 4*64 + 16, 16);
> +        vector_add_dst(ac, saved + 16, in + 4*64 + 32, in + 5*64, 32);
> +        vector_add_dst(ac, saved + 1*32 + 16, in + 5*64 + 32, in + 6*64, 32);
> +        vector_add_dst(ac, saved + 2*32 + 16, in + 6*64 + 32, in + 7*64, 32);

vertical align

> +        memcpy(saved + 3*32 + 16, in + 7*64 + 32, 32 * sizeof(float));
> +        memset(saved + 144, 0, 112 * sizeof(float));
> +    }
> +}
> +
> +static void ssr_ipqf_tool(AACContext * ac, sce_struct * sce, float * preret) {
> +    ssr_context * ctx = &ac->ssrctx;
> +    ssr_struct * ssr = sce->ssr;
> +    int i, b, j;
> +    float x;
> +    for (i = 0; i < 256; i++) {

> +        memcpy(&ssr->buf[0][0], &ssr->buf[0][1], 23 * sizeof(float));
> +        memcpy(&ssr->buf[1][0], &ssr->buf[1][1], 23 * sizeof(float));
> +        memcpy(&ssr->buf[2][0], &ssr->buf[2][1], 23 * sizeof(float));
> +        memcpy(&ssr->buf[3][0], &ssr->buf[3][1], 23 * sizeof(float));

23 should be a sizeof

> +

> +        ssr->buf[0][23] = ctx->q[0][0] * preret[0*256+i] + ctx->q[0][1] * preret[1*256+i] +
> +            ctx->q[0][2] * preret[2*256+i] + ctx->q[0][3] * preret[3*256+i];
> +        ssr->buf[1][23] = ctx->q[1][0] * preret[0*256+i] + ctx->q[1][1] * preret[1*256+i] +
> +            ctx->q[1][2] * preret[2*256+i] + ctx->q[1][3] * preret[3*256+i];
> +        ssr->buf[2][23] = ctx->q[2][0] * preret[0*256+i] + ctx->q[2][1] * preret[1*256+i] +
> +            ctx->q[2][2] * preret[2*256+i] + ctx->q[2][3] * preret[3*256+i];
> +        ssr->buf[3][23] = ctx->q[3][0] * preret[0*256+i] + ctx->q[3][1] * preret[1*256+i] +
> +            ctx->q[3][2] * preret[2*256+i] + ctx->q[3][3] * preret[3*256+i];

maybe this would be clearer as a for loop, but maybe thats speed critical
i dunno ?

[...]
> +static void ssr_trans(AACContext * ac, sce_struct * sce) {
> +    float * in = sce->coeffs;
> +    DECLARE_ALIGNED_16(float, tmp_buf[512]);
> +    DECLARE_ALIGNED_16(float, tmp_ret[1024]);
> +    int b;
> +    for (b = 0; b < 4; b++) {

> +        if (b & 1) { // spectral reverse
> +            vector_reverse(ac, tmp_buf, in + 256 * b, 256);
> +            memcpy(in + 256 * b, tmp_buf, 256 * sizeof(float));
> +        }

hmm, cant this be done more effciently than a litteral reverse?

[...]
> +static void coupling_dependent_trans(AACContext * ac, che_struct * cc, sce_struct * sce, int index) {
> +    ics_struct * ics = &cc->ch[0].ics;
> +    const uint16_t * offsets = ics->swb_offset;
> +    float * dest = sce->coeffs;
> +    float * src = cc->ch[0].coeffs;
> +    int g, i, group, k;
> +    if(ac->m4ac.object_type == AOT_AAC_LTP) {
> +        av_log(ac->avccontext, AV_LOG_ERROR,
> +               "Dependent coupling is not supported together with LTP\n");
> +        return;
> +    }
> +    for (g = 0; g < ics->num_window_groups; g++) {
> +        for (i = 0; i < ics->max_sfb; i++) {
> +            if (cc->ch[0].cb[g][i] != ZERO_HCB) {
> +                float gain = cc->coup.gain[index][g][i] * sce->mixing_gain;
> +                for (group = 0; group < ics->group_len[g]; group++) {

> +                    for (k = offsets[i]; k < offsets[i+1]; k++) {
> +                        dest[group*128+k] += gain * src[group*128+k];
> +                    }

this should be moved into dsputil at some point i think (unless we
already have such routine there)

[...]
> +static void transform_coupling_tool(AACContext * ac, che_struct * cc,
> +        void (*cc_trans)(AACContext * ac, che_struct * cc, sce_struct * sce, int index))
> +{
> +    int c;
> +    int index = 0;
> +    coupling_struct * coup = &cc->coup;
> +    for (c = 0; c <= coup->num_coupled; c++) {
> +        if (     !coup->is_cpe[c] && ac->che[ID_SCE][coup->tag_select[c]]) {
> +            cc_trans(ac, cc, &ac->che[ID_SCE][coup->tag_select[c]]->ch[0], index++);
> +        } else if(coup->is_cpe[c] && ac->che[ID_CPE][coup->tag_select[c]]) {

> +            if (!coup->l[c] && !coup->r[c]) {
> +                cc_trans(ac, cc, &ac->che[ID_CPE][coup->tag_select[c]]->ch[0], index);
> +                cc_trans(ac, cc, &ac->che[ID_CPE][coup->tag_select[c]]->ch[1], index++);
> +            }
> +            if (coup->l[c])
> +                cc_trans(ac, cc, &ac->che[ID_CPE][coup->tag_select[c]]->ch[0], index++);
> +            if (coup->r[c])
> +                cc_trans(ac, cc, &ac->che[ID_CPE][coup->tag_select[c]]->ch[1], index++);

if (coup->l[c] || !coup->r[c])
    cc_trans(ac, cc, &ac->che[ID_CPE][coup->tag_select[c]]->ch[0], index);
index += coup->l[c];
if (coup->r[c] || !coup->l[c])
    cc_trans(ac, cc, &ac->che[ID_CPE][coup->tag_select[c]]->ch[1], index++);

Though iam not sure if your variant is not cleaner

[...]
> +static int output_samples(AVCodecContext * avccontext, uint16_t * data, int * data_size) {
> +    AACContext * ac = avccontext->priv_data;
> +    int i, ch;
> +    float *c, *l, *r, *sl, *sr, *out;
> +
> +    if (!ac->is_saved) {
> +        ac->is_saved = 1;
> +        *data_size = 0;
> +        return 0;
> +    }
> +
> +    if(ac->mm[MIXDOWN_CENTER]) {
> +        /* Matrix mixdown */
> +        l   = ac->mm[MIXDOWN_FRONT ]->ch[0].ret;
> +        r   = ac->mm[MIXDOWN_FRONT ]->ch[1].ret;
> +        c   = ac->mm[MIXDOWN_CENTER]->ch[0].ret;
> +        sl  = ac->mm[MIXDOWN_BACK  ]->ch[0].ret;
> +        sr  = ac->mm[MIXDOWN_BACK  ]->ch[1].ret;
> +        out = ac->interleaved_output;
> +

> +        if(avccontext->channels == 2) {
> +            if(ac->pcs.pseudo_surround) {
> +                for(i = 0; i < 1024; i++) {
> +                    *out++ = *l++ + *c   - *sl   - *sr   - ac->add_bias * 3;
> +                    *out++ = *r++ + *c++ + *sl++ + *sr++ - ac->add_bias * 3;
> +                }
> +            } else {
> +                for(i = 0; i < 1024; i++) {
> +                    *out++ = *l++ + *c   + *sl++ - ac->add_bias * 2;
> +                    *out++ = *r++ + *c++ + *sr++ - ac->add_bias * 2;
> +                }
> +            }
> +
> +        } else {
> +            assert(avccontext->channels == 1);
> +            for(i = 0; i < 1024; i++) {
> +                *out++ = *l++ + *r++ + *c++ + *sl++ + *sr++ - ac->add_bias * 4;
> +            }
> +        }

also things for dsputil

[...]
> +        case ID_CPE:
> +            err = !ac->che[ID_CPE][tag] ? -1 : decode_cpe(ac, &gb, tag);
> +            break;
> +        case ID_FIL:
> +            if (tag == 15) tag += get_bits(&gb, 8) - 1;
> +            while (tag > 0)
> +                tag -= extension_payload(ac, &gb, tag);
> +            err = 0; /* FIXME */
> +            break;
> +        case ID_PCE:
> +            err = program_config_element(ac, &gb);
> +            break;
> +        case ID_DSE:
> +            err = data_stream_element(ac, &gb, tag);
> +            break;
> +        case ID_CCE:

> +            err = !ac->che[ID_CCE][tag] ? -1 : decode_cce(ac, &gb, tag);
> +            break;
> +        case ID_LFE:
> +            err = ac->che[ID_LFE][tag] && !decode_ics(ac, &gb, 0, 0, &ac->che[ID_LFE][tag]->ch[0]) ? 0 : -1;

maybe iam too tired but these appear hard to read, maybe a if() and some \n
would help

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

In a rich man's house there is no place to spit but his face.
-- Diogenes of Sinope
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080620/ae9ba1cf/attachment.pgp>