[FFmpeg-devel] [PATCH] MLP/TrueHD Decoder - 2nd try

Michael Niedermayer michaelni
Tue Jul 1 03:38:47 CEST 2008


On Tue, Jul 01, 2008 at 01:55:39AM +0100, Ramiro Polla wrote:
> Hello Michael,
[...]
>>>     //@{
>>>     /** Restart header data */
>>>     //! The sync word used at the start of the last restart header
>>>     uint16_t    restart_sync_word[MAX_SUBSTREAMS];
>>>
>>>     //! The index of the first channel coded in this substream
>>>     uint8_t     min_channel[MAX_SUBSTREAMS];
>>>     //! The index of the last channel coded in this substream
>>>     uint8_t     max_channel[MAX_SUBSTREAMS];
>>>     //! The number of channels input into the rematrix stage
>>>     uint8_t     max_matrix_channel[MAX_SUBSTREAMS];
>>>
>>>     //! The left shift applied to random noise in 0x31ea substreams
>>>     uint8_t     noise_shift[MAX_SUBSTREAMS];
>>>     //! The current seed value for the pseudorandom noise generator(s)
>>>     uint32_t    noisegen_seed[MAX_SUBSTREAMS];
>>>
>>>     //! Does this substream contain extra info to check the size of VLC 
>>> blocks?
>>>     uint8_t     data_check_present[MAX_SUBSTREAMS];
>>>
>>>     //! Bitmask of which parameter sets are conveyed in a decoding 
>>> parameter block
>>>     uint8_t     param_presence_flags[MAX_SUBSTREAMS];
>>> #define PARAM_BLOCKSIZE     (1 << 7)
>>> #define PARAM_MATRIX        (1 << 6)
>>> #define PARAM_OUTSHIFT      (1 << 5)
>>> #define PARAM_QUANTSTEP     (1 << 4)
>>> #define PARAM_FIR           (1 << 3)
>>> #define PARAM_IIR           (1 << 2)
>>> #define PARAM_HUFFOFFSET    (1 << 1)
>>>     //@}
>>>
>>>     //@{
>>>     /** Matrix data */
>>>
>>>     //! Number of matrices to be applied
>>>     uint8_t     num_primitive_matrices[MAX_SUBSTREAMS];
>>>
>>>     //! Output channel of matrix
>>>     uint8_t     matrix_ch[MAX_SUBSTREAMS][MAX_MATRICES];
>>>
>>>     //! Whether the LSBs of the matrix output are encoded in the 
>>> bitstream
>>>     uint8_t     lsb_bypass[MAX_SUBSTREAMS][MAX_MATRICES];
>>>     //! Matrix coefficients, stored as 2.14 fixed point
>>>     int32_t     
>>> matrix_coeff[MAX_SUBSTREAMS][MAX_MATRICES][MAX_CHANNELS+2];
>>>     //! Left shift to apply to noise values in 0x31eb substreams
>>>     uint8_t     matrix_noise_shift[MAX_SUBSTREAMS][MAX_MATRICES];
>>>     //@}
>>>
>>>     //! Left shift to apply to huffman-decoded residuals
>>>     uint8_t     quant_step_size[MAX_SUBSTREAMS][MAX_CHANNELS];
>>>
>>>     //! Number of PCM samples in current audio block
>>>     uint16_t    blocksize[MAX_SUBSTREAMS];
>>>     //! Number of PCM samples decoded so far in this frame
>>>     uint16_t    blockpos[MAX_SUBSTREAMS];
>>>
>>>     //! Left shift to apply to decoded PCM values to get final 24-bit 
>>> output
>>>     int8_t      output_shift[MAX_SUBSTREAMS][MAX_CHANNELS];
>>>
>>>     //@{
>>>     /** Filter data. */
>>> #define FIR 0
>>> #define IIR 1
>>>     //! Number of taps in filter
>>>     uint8_t     filter_order[MAX_CHANNELS][2];
>>>     //! Right shift to apply to output of filter
>>>     uint8_t     filter_coeff_q[MAX_CHANNELS][2];
>>>
>>>     uint8_t     filter_index[MAX_CHANNELS][2];
>>>
>>>     int32_t     filter_coeff[MAX_CHANNELS][2][MAX_FILTER_ORDER];
>>>     int32_t     filter_state[MAX_CHANNELS][2][MAX_FILTER_ORDER];
>>>     //@}
>>>
>>>     //@{
>>>     /** Sample data coding infomation */
>>>     //! Offset to apply to residual values
>>>     int16_t     huff_offset[MAX_CHANNELS];
>>>     //! Sign/rounding corrected version of huff_offset
>>>     int32_t     sign_huff_offset[MAX_CHANNELS];
>>>     //! Which VLC codebook to use to read residuals
>>>     uint8_t     codebook[MAX_CHANNELS];
>>>     //! Size of residual suffix not encoded using VLC
>>>     uint8_t     huff_lsbs[MAX_CHANNELS];
>>>     //@}
>>>
>>>     //! Running XOR of all output samples
>>>     int32_t     lossless_check_data[MAX_SUBSTREAMS];
>>>
>>>     int8_t      noise_buffer[MAX_BLOCKSIZE_POW2];
>>>     int8_t      bypassed_lsbs[MAX_BLOCKSIZE][MAX_CHANNELS];
>>>     int32_t     sample_buffer[MAX_BLOCKSIZE][MAX_CHANNELS+2];
>>> } MLPDecodeContext;
>> There are so many blah[substr] all over the place, maybe putting these
>> in a SubStream struct would simplify the code?
>
> What kind of simplifications did you have in mind? The result of a half-job 
> looked uglier to me. Attached substr_struct.diff patch is what I started 
> doing.

Well, my idea was more like:

-m->min_channel       [substr] = get_bits(gbp, 4);
-m->max_channel       [substr] = get_bits(gbp, 4);
-m->max_matrix_channel[substr] = get_bits(gbp, 4);
+s->min_channel       = get_bits(gbp, 4);
+s->max_channel       = get_bits(gbp, 4);
+s->max_matrix_channel= get_bits(gbp, 4);

Of course if this doesnt work out and ends up messy, slow or otherwise
has issues then it shouldnt be done ...


[...]
>> [...]
>>> /** Generate a PCM sample using the prediction filters and a residual 
>>> value
>>>  *  read from the data stream, and update the filter state.
>>>  */
>>>
>>> static int filter_sample(MLPDecodeContext *m, unsigned int substr,
>>>                          unsigned int channel, int32_t residual)
>>> {
>>>     unsigned int i, j, index;
>>>     int64_t accum = 0;
>>>     int32_t result;
>>>
>>>     /* TODO: Move this code to DSPContext? */
>>>
>>> #define INDEX(channel, order, pos) \
>>>     ((m->filter_index[channel][order] + (pos)) & (MAX_FILTER_ORDER - 1))
>>>
>>>     for (j = 0; j < 2; j++)
>>>         for (i = 0; i < m->filter_order[channel][j]; i++)
>>>             accum += 
>>> (int64_t)m->filter_state[channel][j][INDEX(channel,j,i)] *
>>>                      m->filter_coeff[channel][j][i];
>>>
>>>     accum = accum >> m->filter_coeff_q[channel][FIR];
>>>     result = (accum + residual)
>>>                 & ~((1 << m->quant_step_size[substr][channel]) - 1);
>>>
>>>     index = INDEX(channel, FIR, -1);
>>>     m->filter_state[channel][FIR][index] = result;
>>>     m->filter_index[channel][FIR] = index;
>>>
>>>     index = INDEX(channel, IIR, -1);
>>>     m->filter_state[channel][IIR][index] = result - accum;
>>>     m->filter_index[channel][IIR] = index;
>>>
>>> #undef INDEX
>>>
>>>     return result;
>>> }
>> Why the complicated indexing?
>> how big would filter_state be if it where a normal flat array? 160 
>> entries?
>
> It would need
> sizeof(int32_t) * MAX_BLOCKSIZE * 2 * MAX_CHANNELS = 20480 = 20k
> bytes more in the context, and attached filter_index_flat_array.diff patch. 

> I assume that's not ok =)

yes, but do we really need that amount?
if the huff decode and filter loops are split (i assume thats possible) then
we can do a
for(channels)
    for(sample in block)
which would only need sizeof(int32_t) * 2 * MAX_BLOCKSIZE
and a memcpy() or 2 for a reading writing the state


[...]
>>>     for (i = 0; i < m->blocksize[substr]; i++) {
>>>         for (mat = 0; mat < m->num_primitive_matrices[substr]; mat++)
>>>             if (m->lsb_bypass[substr][mat])
>>>                 m->bypassed_lsbs[i + m->blockpos[substr]][mat] = 
>>> get_bits1(gbp);
>>>
>>>         for (ch = m->min_channel[substr]; ch <= m->max_channel[substr]; 
>>> ch++) {
>>>             int32_t sample;
>>>
>>>             if (read_huff(m, gbp, substr, ch, &sample) < 0)
>>>                 return -1;
>> please use the return value instead of passing pointers to it
>
> sample may be positive or negative, so a simple < check won't help. But 
> sample must not be bigger than 24 bits.
>
> Is attached read_huff_return_bad_code.diff patch ok?

id use INT32_MAX besides this yes its ok


>
>> [...]
>>>         for (i = 0; i < m->blockpos[substr]; i++) {
>>>             int64_t accum = 0;
>>>             for (src_ch = 0; src_ch <= maxchan; src_ch++) {
>>>                 accum += (int64_t)m->sample_buffer[i][src_ch]
>>>                                   * m->matrix_coeff[substr][mat][src_ch];
>>>             }
>>>             if (m->matrix_noise_shift[substr][mat]) {
>>>                 uint32_t index = m->num_primitive_matrices[substr] - mat;
>>>                 index = (i * (index * 2 + 1) + index) & 
>>> (m->access_unit_size_pow2 - 1);
>>>                 accum += m->noise_buffer[index] << 
>>> (m->matrix_noise_shift[substr][mat] + 7);
>>>             }
>>>             m->sample_buffer[i][dest_ch] = ((accum >> 14) & ~((1 << 
>>> m->quant_step_size[substr][dest_ch]) - 1))
>>>                                              + m->bypassed_lsbs[i][mat];
>>>         }
>> following may be faster ...
>> index = m->num_primitive_matrices[substr] - mat;
>> index2= 2*index+1;
>> for (i = 0; i < m->blockpos[substr]; i++) {
>> ...
>>     if (m->matrix_noise_shift[substr][mat]) {
>>         index &= m->access_unit_size_pow2 - 1;
>>         accum += m->noise_buffer[index] << 
>> (m->matrix_noise_shift[substr][mat] + 7);
>>         index += index2;
>>     }
>
> I don't have any samples that touch this code. I'm still waiting for Ian to 
> send me more samples (and I'm searching for some more also).
>
> Do you prefer if I go ahead and change it anyways?

hmm do what you prefer

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

I know you won't believe me, but the highest form of Human Excellence is
to question oneself and others. -- Socrates
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080701/d0647ed8/attachment.pgp>



More information about the ffmpeg-devel mailing list