[FFmpeg-devel] [PATCH] ALS decoder

Mon Aug 24 02:56:11 CEST 2009

On Sun, Aug 23, 2009 at 11:51:18PM +0200, Thilo Borgmann wrote:
> Revision 7 attached.
[...]
> +typedef struct {
> +    uint32_t als_id;          ///< ALS identifier
> +    int resolution;           ///< 000 = 8-bit; 001 = 16-bit; 010 = 24-bit; 011 = 32-bit
> +    int floating;             ///< 1 = IEEE 32-bit floating-point, 0 = integer

> +    int msb_first;            ///< original byte order of the input audio data

unused, if you plan to use in the future please clarify for what and add a
TODO in the variable doxy making that clear

> +    int frame_length;         ///< frame Length
> +    int ra_distance;          ///< distance between RA frames (in frames, 0...255)
> +    enum RA_Flag ra_flag;     ///< indicates where the size of ra units is stored
> +    int adapt_order;          ///< adaptive order: 1 = on, 0 = off
> +    int coef_table;           ///< table index of Rice code parameters
> +    int long_term_prediction; ///< long term prediction (LTP): 1 = on, 0 = off
> +    int max_order;            ///< maximum prediction order (0..1023)
> +    int block_switching;      ///< number of block switching levels
> +    int bgmc;                 ///< "Block Gilbert-Moore Code": 1 = on, 0 = off (Rice coding only)
> +    int sb_part;              ///< sub-block partition
> +    int joint_stereo;         ///< joint Stereo: 1 = on, 0 = off
> +    int mc_coding;            ///< extended inter-channel coding: 1 = on, 0 = off
> +    int chan_config;          ///< indicates that a chan_config_info field is present
> +    int chan_sort;            ///< channel rearrangement: 1 = on, 0 = off
> +    int rlslms;               ///< use "Recursive Least Square-Least Mean Square" predictor: 1 = on, 0 = off

> +    int aux_data_enabled;     ///< indicates that auxiliary data is present

same

> +    int chan_config_info;     ///< mapping of channels to loudspeaker locations. Unused until setting channel configuration is implemented.
> +    int *chan_pos;            ///< original channel positions
> +    uint32_t header_size;     ///< header size of original audio file in bytes, provided for debugging
> +    uint32_t trailer_size;    ///< Trailer size of original audio file in bytes, provided for debugging
> +} ALSSpecificConfig;
> +
> +
> +typedef struct {
> +    AVCodecContext *avctx;
> +    ALSSpecificConfig sconf;
> +    GetBitContext gb;          ///< a bit reader context
> +    unsigned int num_frames;   ///< number of frames to decode, 0 if unknown
> +    unsigned int cur_frame_length;  ///< length of the current frame to decode
> +    unsigned int last_frame_length; ///< length of the last frame to decode, 0 if unknown
> +    unsigned int frame_id;     ///< the frame id / number of the current frame
> +    unsigned int js_switch;    ///< if true, joint-stereo decoding is enforced
> +    unsigned int num_blocks;   ///< number of blocks used in the current frame
> +    int32_t *quant_cof;        ///< quantized parcor coefficients
> +    int32_t *lpc_cof;          ///< coefficients of the direct form prediction filter

> +    int64_t *prev_raw_samples; ///< contains unshifted raw samples from the previous block
> +    int64_t **raw_samples;     ///< decoded raw samples for each channel
> +    int64_t *raw_buffer;       ///< contains all decoded raw samples including carryover samples

these do need 64bit?

[...]
> +
> +/** Reads an ALSSpecificConfig from a buffer into the output struct.
> + */
> +static av_cold int read_specific_config(ALSDecContext *ctx)
> +{
> +    GetBitContext gb;
> +    uint64_t ht_size;
> +    int i, config_offset, crc_enabled;
> +    MPEG4AudioConfig m4ac;
> +    ALSSpecificConfig *sconf = &ctx->sconf;
> +    AVCodecContext *avctx    = ctx->avctx;
> +    const uint8_t *buffer    = avctx->extradata;
> +    int buffer_size          = avctx->extradata_size;
> +    uint32_t samples;
> +
> +    init_get_bits(&gb, buffer, buffer_size * 8);
> +
> +    config_offset = ff_mpeg4audio_get_config(&m4ac, buffer, buffer_size);
> +
> +    if (config_offset < 0)
> +        return -1;
> +
> +    skip_bits_long(&gb, config_offset);

> +    buffer_size -= config_offset >> 3;
> +
> +    if (buffer_size < 22)
> +        return -1;

the following might be usefull

static inline int get_bits_left(GetBitContext *s)
{
    return s->size_in_bits - get_bits_count(s);
}

and
if(get_bits_left(&gb) < 22*8)
similar for the others, its IMHO simpler

> +
> +    // read the fixed items
> +    sconf->als_id               = get_bits_long(&gb, 32);
> +    avctx->sample_rate          = m4ac.sample_rate;
> +    skip_bits_long(&gb, 32); // sample rate already known
> +    samples                     = get_bits_long(&gb, 32);
> +    avctx->channels             = m4ac.channels;
> +    skip_bits(&gb, 16);      // number of channels already knwon
> +    skip_bits(&gb, 3);       // skip file_type
> +    sconf->resolution           = get_bits(&gb, 3);
> +    sconf->floating             = get_bits1(&gb);
> +    sconf->msb_first            = get_bits1(&gb);
> +    sconf->frame_length         = get_bits(&gb, 16) + 1;
> +    sconf->ra_distance          = get_bits(&gb, 8);
> +    sconf->ra_flag              = get_bits(&gb, 2);
> +    sconf->adapt_order          = get_bits1(&gb);
> +    sconf->coef_table           = get_bits(&gb, 2);
> +    sconf->long_term_prediction = get_bits1(&gb);
> +    sconf->max_order            = get_bits(&gb, 10);
> +    sconf->block_switching      = get_bits(&gb, 2);
> +    sconf->bgmc                 = get_bits1(&gb);
> +    sconf->sb_part              = get_bits1(&gb);
> +    sconf->joint_stereo         = get_bits1(&gb);
> +    sconf->mc_coding            = get_bits1(&gb);
> +    sconf->chan_config          = get_bits1(&gb);
> +    sconf->chan_sort            = get_bits1(&gb);
> +    crc_enabled                 = get_bits1(&gb);
> +    sconf->rlslms               = get_bits1(&gb);
> +    skip_bits(&gb, 5);       // skip 5 reserved bits
> +    sconf->aux_data_enabled     = get_bits1(&gb);
> +    buffer_size -= 22;
> +
> +
> +    // check for ALSSpecificConfig struct
> +    if (sconf->als_id != MKBETAG('A','L','S','\0'))
> +        return -1;

thus als_id definitly does not need to be in the context ...

> +
> +    ctx->cur_frame_length = sconf->frame_length;
> +
> +    // allocate quantized parcor coefficient buffer

> +    if (!(ctx->quant_cof = av_malloc(sizeof(*ctx->quant_cof) * sconf->max_order)) ||
> +        !(ctx->lpc_cof = av_malloc(sizeof(*ctx->lpc_cof) * sconf->max_order))) {

vertical align

> +        av_log(avctx, AV_LOG_ERROR, "Allocating buffer memory failed.\n");
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    // calculate total number of frames to decode if possible
> +    if (samples != 0xFFFFFFFF) {
> +        ctx->num_frames        = ((samples - 1) / sconf->frame_length) + 1;

> +        ctx->last_frame_length = samples % ctx->sconf.frame_length;
> +        if (!ctx->last_frame_length) {
> +            ctx->last_frame_length = sconf->frame_length;
> +        }

thats the same as
(L-1) % F + 1
i think (unchecked though)

> +    } else {
> +        ctx->num_frames        = 0;
> +        ctx->last_frame_length = 0;
> +    }
> +
> +
> +    // read channel config
> +    if (sconf->chan_config) {
> +        if (buffer_size < 2)
> +            return -1;
> +
> +        sconf->chan_config_info = get_bits(&gb, 16);
> +        buffer_size -= 2;
> +        // TODO: use this to set avctx->channel_layout
> +    }
> +
> +
> +    // read channel sorting
> +    if (sconf->chan_sort && avctx->channels > 1) {
> +        int chan_pos_bits = av_ceil_log2(avctx->channels);
> +        int bytes_needed  = (avctx->channels * chan_pos_bits + 7) >> 3;
> +        if (buffer_size < bytes_needed)
> +            return -1;
> +

> +        if(!(sconf->chan_pos = av_malloc(avctx->channels * sizeof(int))))
> +            return AVERROR(ENOMEM);

sizeof(*sconf->chan_pos)

[...]
> +
> +/** Reads and decodes a Rice codeword.
> + */

> +static int64_t decode_rice(GetBitContext *gb, unsigned int k)

is the 64 bit really needed here?

[...]

> +        }
> +    }
> +}
> +
> +

> +/** Converts PARCOR coefficient k to direct filter coefficient.
> + */
> +static void parcor_to_lpc(unsigned int k, int32_t *par, int32_t *cof)
> +{
> +    int i;
> +
> +    for (i = 0; i < (k+1) >> 1; i++) {
> +        int32_t tmp1 =   cof[    i    ]
> +                       + (((int64_t)par[k] * (int64_t)cof[k - i - 1] + (1 << 19)) >> 20);
> +        int32_t tmp2 =   cof[k - i - 1]
> +                       + (((int64_t)par[k] * (int64_t)cof[    i    ] + (1 << 19)) >> 20);
> +        cof[k - i - 1] = tmp2;
> +        cof[    i    ] = tmp1;

tmp2 looks avoidable

[...]
> +/** Reads the block data for a non-constant block
> + */
> +static int read_var_block(ALSDecContext *ctx, unsigned int ra_block,
> +                          int64_t *raw_samples, unsigned int block_length,
> +                          unsigned int *js_blocks, int64_t *raw_other,
> +                          unsigned int *shift_lsbs)
> +{
> +    ALSSpecificConfig *sconf = &ctx->sconf;
> +    AVCodecContext *avctx    = ctx->avctx;
> +    GetBitContext *gb        = &ctx->gb;
> +    unsigned int k;
> +    unsigned int s[8];
> +    unsigned int sub_blocks, sb_length;
> +    unsigned int opt_order  = 1;
> +    int32_t      *quant_cof = ctx->quant_cof;
> +    int32_t      *lpc_cof   = ctx->lpc_cof;
> +    unsigned int start      = 0;
> +    int          sb, smp;
> +    int64_t      y;
> +
> +    *js_blocks  = get_bits1(gb);
> +
> +    // determine the number of sub blocks for entropy decoding
> +    if (!sconf->bgmc && !sconf->sb_part)
> +        sub_blocks = 1;
> +    else if (sconf->bgmc && sconf->sb_part)
> +        sub_blocks = 1 << get_bits(gb, 2);
> +    else
> +        sub_blocks = 1 << (2 * get_bits1(gb));
> +
> +    // do not continue in case of a damaged stream since
> +    // block_length must be evenly divisible by sub_blocks
> +    if (block_length % sub_blocks) {
> +        av_log(avctx, AV_LOG_WARNING,
> +               "Block length is not evenly divisible by the number of sub blocks.\n");
> +        return -1;
> +    }
> +
> +    sb_length = block_length / sub_blocks;
> +
> +
> +    if (sconf->bgmc) {
> +        // TODO: BGMC mode
> +    } else {
> +        s[0] = get_bits(gb, (sconf->resolution > 1) ? 5 : 4);
> +        for (k = 1; k < sub_blocks; k++)
> +            s[k] = s[k - 1] + decode_rice(gb, 0);
> +    }
> +
> +    if (get_bits1(gb)) {
> +        *shift_lsbs = get_bits(gb, 4) + 1;
> +    }
> +
> +
> +    if (!sconf->rlslms) {
> +        int64_t quant_index;
> +
> +        if (sconf->adapt_order) {
> +            int opt_order_length =
> +                    FFMIN(
> +                    av_ceil_log2(sconf->max_order+1),
> +                    FFMAX(av_ceil_log2((block_length >> 3) - 1), 1)
> +                    );
> +            opt_order = get_bits(gb, opt_order_length);
> +        } else {
> +            opt_order = sconf->max_order;
> +        }
> +
> +        if (opt_order) {
> +            if (sconf->coef_table == 3) {
> +

> +                // read coefficient 0
> +                quant_index = get_bits(gb, 7);
> +                quant_cof[0] = parcor_scaled_values[quant_index];;
> +
> +                // read coefficient 1
> +                quant_index = get_bits(gb, 7);
> +                quant_cof[1] = -parcor_scaled_values[quant_index];

the quant_index intermediate variable is unneeded

[...]
> +/** Decodes blocks independently.
> + */
> +static int decode_blocks_ind(ALSDecContext *ctx, unsigned int ra_frame,
> +                             unsigned int c, unsigned int *div_blocks,
> +                             unsigned int *js_blocks)
> +{
> +    int64_t *raw_sample;
> +    unsigned int b;
> +    raw_sample = ctx->raw_samples[c];
> +
> +    for (b = 0; b < ctx->num_blocks; b++) {
> +        if (read_block_data(ctx, ra_frame, raw_sample,
> +                            div_blocks[b], &js_blocks[0], NULL)) {

> +            // damaged block, write zero for the rest of the frame
> +            while (b < ctx->num_blocks) {
> +                memset(raw_sample, 0, div_blocks[b]);
> +                raw_sample += div_blocks[b];
> +                b++;
> +            }
> +            return -1;
[...]
> +            // damaged block, write zero for the rest of the frame
> +            while (b < ctx->num_blocks) {
> +                memset(raw_samples_L, 0, div_blocks[b]);
> +                memset(raw_samples_R, 0, div_blocks[b]);
> +                raw_samples_L += div_blocks[b];
> +                raw_samples_R += div_blocks[b];
> +                b++;
> +            }
[...]
> +                // damaged block, write zero for the rest of the frame
> +                while (b < ctx->num_blocks) {
> +                    memset(raw_samples_L, 0, div_blocks[b]);
> +                    raw_samples_L += div_blocks[b];
> +                    b++;
> +                }

cant these be factored/combined?

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

When the tyrant has disposed of foreign enemies by conquest or treaty, and
there is nothing more to fear from them, then he is always stirring up
some war or other, in order that the people may require a leader. -- Plato
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090824/3a530362/attachment.pgp>