[FFmpeg-devel] [PATCH] avcodec/mlpdec: Add decoding of object audio data

Sun Mar 23 19:31:58 EET 2025

On 22/03/2025 18:49, Massimo Eynard wrote:
> This patch adds support for decoding the fourth MLP substream
> which contains the 16-channel presentation used for Atmos
> audio objects.
> 
> By default only the first three substreams are decoded
> unless the new extract_objects flag is enabled as the resulting
> presentation contains audio object feeds instead of classic
> loudspeaker feeds.
> 
> As this introduces interpolation of primitive matrices, precision
> has been increased to 2.18 fixed point. Therefore this requires
> DSP code upgrade which has been done for C and x86 implementations
> but not the ARM implementation.
> 
> Adds two FATE tests using existing atmos.thd sample to reflect
> changes.
> 
> Signed-off-by: Massimo Eynard <eynard.massimo at gmail.com>
> ---
>   libavcodec/arm/mlpdsp_armv5te.S  |   2 +-
>   libavcodec/arm/mlpdsp_init_arm.c |   3 +-
>   libavcodec/mlp.h                 |  10 +-
>   libavcodec/mlp_parse.c           |  31 ++-
>   libavcodec/mlp_parse.h           |   1 +
>   libavcodec/mlp_parser.c          |  11 +-
>   libavcodec/mlpdec.c              | 389 +++++++++++++++++++++++++++----
>   libavcodec/mlpdsp.c              |  50 +++-
>   libavcodec/mlpdsp.h              |  25 ++
>   libavcodec/x86/mlpdsp.asm        |  19 +-
>   tests/fate/truehd.mak            |  10 +
>   11 files changed, 476 insertions(+), 75 deletions(-)
> 
> diff --git a/libavcodec/arm/mlpdsp_armv5te.S b/libavcodec/arm/mlpdsp_armv5te.S
> index d31568611c..5210f391ab 100644
> --- a/libavcodec/arm/mlpdsp_armv5te.S
> +++ b/libavcodec/arm/mlpdsp_armv5te.S
> @@ -21,7 +21,7 @@
>   
>   #include "libavutil/arm/asm.S"
>   
> -#define MAX_CHANNELS        8
> +#define MAX_CHANNELS       16
>   #define MAX_FIR_ORDER       8
>   #define MAX_IIR_ORDER       4
>   #define MAX_RATEFACTOR      4
> diff --git a/libavcodec/arm/mlpdsp_init_arm.c b/libavcodec/arm/mlpdsp_init_arm.c
> index 34a5f61e1d..50c3cf5488 100644
> --- a/libavcodec/arm/mlpdsp_init_arm.c
> +++ b/libavcodec/arm/mlpdsp_init_arm.c
> @@ -113,6 +113,7 @@ static int32_t (*mlp_select_pack_output_armv6(uint8_t *ch_assign,
>           ch_index = 2;
>           break;
>       default:
> +        // max_matrix_channel > 7 requires &7 lossless buffer channel shift
>           return ff_mlp_pack_output;
>       }
>   
> @@ -139,7 +140,7 @@ av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c)
>   
>       if (have_armv5te(cpu_flags)) {
>           c->mlp_filter_channel = ff_mlp_filter_channel_arm;
> -        c->mlp_rematrix_channel = ff_mlp_rematrix_channel_arm;
> +        // c->mlp_rematrix_channel = ff_mlp_rematrix_channel_arm; // TODO: update to 2.18 coeff
>       }
>       if (have_armv6(cpu_flags))
>           c->mlp_select_pack_output = mlp_select_pack_output_armv6;
> diff --git a/libavcodec/mlp.h b/libavcodec/mlp.h
> index bec414c680..264ef64cf1 100644
> --- a/libavcodec/mlp.h
> +++ b/libavcodec/mlp.h
> @@ -31,19 +31,19 @@
>   
>   /** Last possible matrix channel for each codec */
>   #define MAX_MATRIX_CHANNEL_MLP      5
> -#define MAX_MATRIX_CHANNEL_TRUEHD   7
> +#define MAX_MATRIX_CHANNEL_TRUEHD  15
>   /** Maximum number of channels in a valid stream.
>    *  MLP   : 5.1 + 2 noise channels -> 8 channels
> - *  TrueHD: 7.1                    -> 8 channels
> + *  TrueHD: up to 16-ch pres       -> 16 channels
>    */
> -#define MAX_CHANNELS                8
> +#define MAX_CHANNELS               16
>   
>   /** Maximum number of matrices used in decoding; most streams have one matrix
>    *  per output channel, but some rematrix a channel (usually 0) more than once.
>    */
>   #define MAX_MATRICES_MLP            6
> -#define MAX_MATRICES_TRUEHD         8
> -#define MAX_MATRICES                8
> +#define MAX_MATRICES_TRUEHD        16
> +#define MAX_MATRICES               16
>   
>   /** Maximum number of substreams that can be decoded.
>    *  MLP's limit is 2. TrueHD supports at least up to 3.
> diff --git a/libavcodec/mlp_parse.c b/libavcodec/mlp_parse.c
> index 924c731439..c94da860d0 100644
> --- a/libavcodec/mlp_parse.c
> +++ b/libavcodec/mlp_parse.c
> @@ -85,7 +85,7 @@ static int mlp_get_major_sync_size(const uint8_t * buf, int bufsize)
>   
>   int ff_mlp_read_major_sync(void *log, MLPHeaderInfo *mh, GetBitContext *gb)
>   {
> -    int ratebits, channel_arrangement, header_size;
> +    int ratebits, channel_arrangement, header_size, extra_ch_length;
>       uint16_t checksum;
>   
>       av_assert1(get_bits_count(gb) == 0);
> @@ -163,7 +163,34 @@ int ff_mlp_read_major_sync(void *log, MLPHeaderInfo *mh, GetBitContext *gb)
>       mh->extended_substream_info = get_bits(gb, 2);
>       mh->substream_info = get_bits(gb, 8);
>   
> -    skip_bits_long(gb, (header_size - 18) * 8);
> +    extra_ch_length = 0;
> +    mh->channels_thd_stream3 = 0;
> +
> +    if (mh->stream_type == 0xba) {
> +        skip_bits_long(gb, 63);
> +
> +        extra_ch_length = 64;
> +        if (get_bits1(gb) && (mh->substream_info & 0x80)) {
> +            /* 16ch_channel_meaning */
> +            int length = (get_bits(gb, 4) + 1) << 1;
> +            if (header_size - 26 < length) {
> +                av_log(log, AV_LOG_ERROR, "packet too short, "
> +                    "unable to read 16ch extra meaning in major sync %d %d\n",
> +                    header_size, length);
> +                return -1;
> +            }
> +
> +            skip_bits_long(gb, 5+6); // dialogue norm/mix level
> +            mh->channels_thd_stream3 = get_bits(gb, 5) + 1;
> +            if (!get_bits1(gb)) { // dyn_object_only
> +                avpriv_request_sample(log, "16ch presentation with a mixture of channels");
> +                return AVERROR_PATCHWELCOME;
> +            }
> +            extra_ch_length += 21;
> +        }
> +    }
> +
> +    skip_bits_long(gb, (header_size - 18) * 8 - extra_ch_length);
>   
>       return 0;
>   }
> diff --git a/libavcodec/mlp_parse.h b/libavcodec/mlp_parse.h
> index 5f1f953cfe..8011566ff7 100644
> --- a/libavcodec/mlp_parse.h
> +++ b/libavcodec/mlp_parse.h
> @@ -47,6 +47,7 @@ typedef struct MLPHeaderInfo
>       int channels_mlp;                       ///< Channel count for MLP streams
>       int channels_thd_stream1;               ///< Channel count for substream 1 of TrueHD streams ("6-channel presentation")
>       int channels_thd_stream2;               ///< Channel count for substream 2 of TrueHD streams ("8-channel presentation")
> +    int channels_thd_stream3;               ///< Channel count for substream 3 of TrueHD streams ("16-channel presentation")
>       uint64_t channel_layout_mlp;            ///< Channel layout for MLP streams
>       uint64_t channel_layout_thd_stream1;    ///< Channel layout for substream 1 of TrueHD streams ("6-channel presentation")
>       uint64_t channel_layout_thd_stream2;    ///< Channel layout for substream 2 of TrueHD streams ("8-channel presentation")
> diff --git a/libavcodec/mlp_parser.c b/libavcodec/mlp_parser.c
> index d391390dd5..f3d54c40e7 100644
> --- a/libavcodec/mlp_parser.c
> +++ b/libavcodec/mlp_parser.c
> @@ -181,10 +181,15 @@ static int mlp_parse(AVCodecParserContext *s,
>               av_channel_layout_from_mask(&avctx->ch_layout, mh.channel_layout_mlp);
>           } else { /* mh.stream_type == 0xba */
>               /* TrueHD stream */
> -            if (!mh.channels_thd_stream2) {
> -                av_channel_layout_from_mask(&avctx->ch_layout, mh.channel_layout_thd_stream1);
> -            } else {
> +            if (mh.channels_thd_stream3) {
> +                avctx->ch_layout = (AVChannelLayout) {
> +                    AV_CHANNEL_ORDER_UNSPEC,
> +                    mh.channels_thd_stream3
> +                };

Is the order really unspecified? Surely there's some understanding which 
channels map to which position.

> +            } else if (mh.channels_thd_stream2) {
>                   av_channel_layout_from_mask(&avctx->ch_layout, mh.channel_layout_thd_stream2);
> +            } else {
> +                av_channel_layout_from_mask(&avctx->ch_layout, mh.channel_layout_thd_stream1);
>               }
>           }
>   
> diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c
> index e85dac36a7..14fb953265 100644
> --- a/libavcodec/mlpdec.c
> +++ b/libavcodec/mlpdec.c
> @@ -61,8 +61,8 @@ typedef struct SubStream {
>   
>       //@{
>       /** restart header data */
> -    /// The type of noise to be used in the rematrix stage.
> -    uint16_t    noise_type;
> +    /// The type of substream given by the restart header sync word.
> +    uint16_t    substream_type;
>   
>       /// The index of the first channel coded in this substream.
>       uint8_t     min_channel;
> @@ -88,6 +88,13 @@ typedef struct SubStream {
>       /// The current seed value for the pseudorandom noise generator(s).
>       uint32_t    noisegen_seed;
>   
> +    /// Maximum output_shift value.
> +    int8_t      max_shift;
> +    /// Maximum size of coded audio samples LSBs part.
> +    int8_t      max_lsbs;
> +    /// Maximum bit-depth of output audio samples.
> +    int8_t      max_bits;
> +
>       /// Set if the substream contains extra info to check the size of VLC blocks.
>       uint8_t     data_check_present;
>   
> @@ -104,11 +111,23 @@ typedef struct SubStream {
>       /// matrix output channel
>       uint8_t     matrix_out_ch[MAX_MATRICES];
>   
> -    /// Whether the LSBs of the matrix output are encoded in the bitstream.
> +    /// Size of the LSBs of the matrix output encoded in the bitstream.
>       uint8_t     lsb_bypass[MAX_MATRICES];
> -    /// Matrix coefficients, stored as 2.14 fixed point.
> +    /// Matrix coefficients fractional part size in bits.
> +    uint8_t     matrix_coeff_frac_bits[MAX_MATRICES];
> +    /// Matrix coefficients shift amount.
> +    int8_t      matrix_coeff_shift[MAX_MATRICES];
> +    /// Matrix coefficients presence mask.
> +    uint16_t    matrix_coeff_mask[MAX_MATRICES];
> +    /// Matrix coefficients, stored as 2.18 fixed point.
>       DECLARE_ALIGNED(32, int32_t, matrix_coeff)[MAX_MATRICES][MAX_CHANNELS];
> -    /// Left shift to apply to noise values in 0x31eb substreams.
> +    /// Delta matrix coefficients size in bits for 0x31ec substreams.
> +    uint8_t     delta_matrix_coeff_bits[MAX_MATRICES];
> +    /// Delta matrix coefficients precision.
> +    uint8_t     delta_matrix_coeff_prec[MAX_MATRICES];
> +    /// Delta matrix coefficients, stored as 2.18 fixed point.
> +    DECLARE_ALIGNED(32, int32_t, delta_matrix_coeff)[MAX_MATRICES][MAX_CHANNELS];
> +    /// Left shift to apply to noise values in 0x31eb and 0x31ec substreams.
>       uint8_t     matrix_noise_shift[MAX_MATRICES];
>       //@}
>   
> @@ -133,6 +152,8 @@ typedef struct MLPDecodeContext {
>       AVCodecContext *avctx;
>   
>       AVChannelLayout downmix_layout;
> +    /// Set to enable decoding of non-loudspeaker feed (objects) audio channels
> +    int extract_objects;
>   
>       /// Current access unit being read has a major sync.
>       int         is_major_sync_unit;
> @@ -267,14 +288,14 @@ static inline int read_huff_channels(MLPDecodeContext *m, GetBitContext *gbp,
>   
>       for (mat = 0; mat < s->num_primitive_matrices; mat++)
>           if (s->lsb_bypass[mat])
> -            m->bypassed_lsbs[pos + s->blockpos][mat] = get_bits1(gbp);
> +            m->bypassed_lsbs[pos + s->blockpos][mat] = get_bits(gbp, s->lsb_bypass[mat]);
>   
>       for (channel = s->min_channel; channel <= s->max_channel; channel++) {
>           ChannelParams *cp = &s->channel_params[channel];
>           int codebook = cp->codebook;
>           int quant_step_size = s->quant_step_size[channel];
>           int lsb_bits = cp->huff_lsbs - quant_step_size;
> -        int result = 0;
> +        int32_t result = 0;
>   
>           if (codebook > 0)
>               result = get_vlc2(gbp, huff_vlc[codebook-1].table,
> @@ -410,8 +431,12 @@ static int read_major_sync(MLPDecodeContext *m, GetBitContext *gb)
>           m->avctx->profile     = AV_PROFILE_TRUEHD_ATMOS;
>       }
>   
> -    /* limit to decoding 3 substreams, as the 4th is used by Dolby Atmos for non-audio data */
> -    m->max_decoded_substream = FFMIN(m->num_substreams - 1, 2);
> +    /* Limit to decoding the first 3 substreams (or allow the 4th for objects) */
> +    m->max_decoded_substream = FFMIN(m->num_substreams - 1,
> +        m->extract_objects ? 3 : 2);
> +
> +    av_log(m->avctx, AV_LOG_DEBUG, "decoding up to substream %" PRIu8 "\n",
> +        m->max_decoded_substream);
>   
>       m->avctx->sample_rate    = mh.group1_samplerate;
>       m->avctx->frame_size     = mh.access_unit_size;
> @@ -531,23 +556,22 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
>       int sync_word, tmp;
>       uint8_t checksum;
>       uint8_t lossless_check;
> +    uint8_t max_bit_depth;
>       int start_count = get_bits_count(gbp);
> -    int min_channel, max_channel, max_matrix_channel, noise_type;
> +    int min_channel, max_channel, max_matrix_channel;
>       const int std_max_matrix_channel = m->avctx->codec_id == AV_CODEC_ID_MLP
>                                        ? MAX_MATRIX_CHANNEL_MLP
>                                        : MAX_MATRIX_CHANNEL_TRUEHD;
>   
> -    sync_word = get_bits(gbp, 13);
> +    sync_word = get_bits(gbp, 14);
>   
> -    if (sync_word != 0x31ea >> 1) {
> +    if (sync_word < 0x31ea || 0x31ec < sync_word) {
>           av_log(m->avctx, AV_LOG_ERROR,
>                  "restart header sync incorrect (got 0x%04x)\n", sync_word);
>           return AVERROR_INVALIDDATA;
>       }
>   
> -    noise_type = get_bits1(gbp);
> -
> -    if (m->avctx->codec_id == AV_CODEC_ID_MLP && noise_type) {
> +    if (m->avctx->codec_id == AV_CODEC_ID_MLP && 0x31ea != sync_word) {
>           av_log(m->avctx, AV_LOG_ERROR, "MLP must have 0x31ea sync word.\n");
>           return AVERROR_INVALIDDATA;
>       }
> @@ -567,7 +591,7 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
>   
>       /* This should happen for TrueHD streams with >6 channels and MLP's noise
>        * type. It is not yet known if this is allowed. */
> -    if (max_matrix_channel > MAX_MATRIX_CHANNEL_MLP && !noise_type) {
> +    if (max_matrix_channel > MAX_MATRIX_CHANNEL_MLP && 0x31ea == sync_word) {
>           avpriv_request_sample(m->avctx,
>                                 "%d channels (more than the "
>                                 "maximum supported by the decoder)",
> @@ -582,7 +606,7 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
>       s->max_channel        = max_channel;
>       s->coded_channels     = ((1LL << (max_channel - min_channel + 1)) - 1) << min_channel;
>       s->max_matrix_channel = max_matrix_channel;
> -    s->noise_type         = noise_type;
> +    s->substream_type     = sync_word;
>   
>       if (mlp_channel_layout_subset(&m->downmix_layout, s->mask) &&
>           m->max_decoded_substream > substr) {
> @@ -595,8 +619,28 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
>   
>       s->noise_shift   = get_bits(gbp,  4);
>       s->noisegen_seed = get_bits(gbp, 23);
> +    s->max_shift     = get_bits(gbp,  4);
> +    s->max_lsbs      = get_bits(gbp,  5);
> +    s->max_bits      = get_bits(gbp,  5);
> +
> +    max_bit_depth = (0x31ec == sync_word) ? 31 : 24;
> +    if (max_bit_depth < s->max_lsbs) {
> +        av_log(m->avctx, AV_LOG_ERROR,
> +               "Max LSB size %" PRIu8 " for substream %u exceeds "
> +               "%" PRIu8 " bits.\n",
> +               s->max_lsbs, substr, max_bit_depth);
> +        return AVERROR_INVALIDDATA;
> +    }
>   
> -    skip_bits(gbp, 19);
> +    if (max_bit_depth < s->max_bits) {
> +        av_log(m->avctx, AV_LOG_ERROR,
> +               "Max output bit-depth %" PRIu8 " for substream %u exceeds "
> +               "%" PRIu8 " bits.\n",
> +               s->max_bits, substr, max_bit_depth);
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    skip_bits(gbp, 5);
>   
>       s->data_check_present = get_bits1(gbp);
>       lossless_check = get_bits(gbp, 8);
> @@ -615,7 +659,7 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
>   
>       for (ch = 0; ch <= s->max_matrix_channel; ch++) {
>           int ch_assign = get_bits(gbp, 6);
> -        if (m->avctx->codec_id == AV_CODEC_ID_TRUEHD) {
> +        if (m->avctx->codec_id == AV_CODEC_ID_TRUEHD && s->mask) {
>               AVChannelLayout l;
>               enum AVChannel channel = thd_channel_layout_extract_channel(s->mask, ch_assign);
>   
> @@ -656,12 +700,19 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
>           cp->huff_offset      = 0;
>           cp->sign_huff_offset = -(1 << 23);
>           cp->codebook         = 0;
> -        cp->huff_lsbs        = 24;
> +        cp->huff_lsbs        = (3 == substr) ? 31 : 24;
>       }
>   
>       if (substr == m->max_decoded_substream) {
>           av_channel_layout_uninit(&m->avctx->ch_layout);
> -        av_channel_layout_from_mask(&m->avctx->ch_layout, s->mask);
> +        if (substr < 3) /* Loudspeaker feed channels */
> +            av_channel_layout_from_mask(&m->avctx->ch_layout, s->mask);
> +        else /* Object channels */
> +            m->avctx->ch_layout = (AVChannelLayout) {
> +                AV_CHANNEL_ORDER_UNSPEC,
> +                s->max_channel+1
> +            };
> +
>           m->pack_output = m->dsp.mlp_select_pack_output(s->ch_assign,
>                                                          s->output_shift,
>                                                          s->max_matrix_channel,
> @@ -760,17 +811,43 @@ static int read_filter_params(MLPDecodeContext *m, GetBitContext *gbp,
>       return 0;
>   }
>   
> -/** Read parameters for primitive matrices. */
> +/** Get the maximum number of primitive matrices allowed. */
>   
> -static int read_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitContext *gbp)
> +static int get_max_nb_primitive_matrices(MLPDecodeContext *m, unsigned int substr)
> +{
> +    switch (substr) {
> +    case 0: // substream 0 (up to 2 matrix channels)
> +        return 2;
> +    case 1: // substream 1
> +        if (m->substream_info & 0x8) // 6-ch pres carried
> +            return 6;
> +        if (m->substream_info & 0x20) // 8-ch pres carried
> +            return 8;
> +        break;
> +    case 2: // substream 2
> +        if (m->substream_info & 0x40) // 8-ch pres carried
> +            return 8;
> +        break;
> +    case 3: // substream 3
> +        if (m->substream_info & 80) // 16-ch pres carried
> +            return 16;
> +        break;
> +    }
> +
> +    return MAX_MATRICES_TRUEHD;
> +}
> +
> +/** Read parameters for primitive matrices (0x31ea and 0x31eb substreams). */
> +
> +static int read_31ea_31eb_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitContext *gbp)
>   {
>       SubStream *s = &m->substream[substr];
>       unsigned int mat, ch;
>       const int max_primitive_matrices = m->avctx->codec_id == AV_CODEC_ID_MLP
>                                        ? MAX_MATRICES_MLP
> -                                     : MAX_MATRICES_TRUEHD;
> +                                     : get_max_nb_primitive_matrices(m, substr);
>   
> -    if (m->matrix_changed++ > 1) {
> +    if (++m->matrix_changed > 1) {
>           av_log(m->avctx, AV_LOG_ERROR, "Matrices may change only once per access unit.\n");
>           return AVERROR_INVALIDDATA;
>       }
> @@ -779,8 +856,9 @@ static int read_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitCo
>   
>       if (s->num_primitive_matrices > max_primitive_matrices) {
>           av_log(m->avctx, AV_LOG_ERROR,
> -               "Number of primitive matrices cannot be greater than %d.\n",
> -               max_primitive_matrices);
> +               "Number of primitive matrices cannot be greater than %d "
> +               "for substream %u of type 0x%04x.\n",
> +               max_primitive_matrices, substr, s->substream_type);
>           goto error;
>       }
>   
> @@ -803,7 +881,7 @@ static int read_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitCo
>           }
>   
>           max_chan = s->max_matrix_channel;
> -        if (!s->noise_type)
> +        if (0x31ea == s->substream_type)
>               max_chan+=2;
>   
>           for (ch = 0; ch <= max_chan; ch++) {
> @@ -811,10 +889,10 @@ static int read_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitCo
>               if (get_bits1(gbp))
>                   coeff_val = get_sbits(gbp, frac_bits + 2);
>   
> -            s->matrix_coeff[mat][ch] = coeff_val * (1 << (14 - frac_bits));
> +            s->matrix_coeff[mat][ch] = coeff_val * (1 << ((14 + 4) - frac_bits));
>           }
>   
> -        if (s->noise_type)
> +        if (0x31eb == s->substream_type)
>               s->matrix_noise_shift[mat] = get_bits(gbp, 4);
>           else
>               s->matrix_noise_shift[mat] = 0;
> @@ -828,6 +906,124 @@ error:
>       return AVERROR_INVALIDDATA;
>   }
>   
> +/** Read parameters for primitive matrices (0x31ec substreams). */
> +
> +static int read_31ec_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitContext *gbp)
> +{
> +    SubStream *s = &m->substream[substr];
> +    unsigned int mat, ch;
> +
> +    const int max_primitive_matrices = get_max_nb_primitive_matrices(m, substr);
> +
> +    if (++m->matrix_changed > 1) {
> +        av_log(m->avctx, AV_LOG_ERROR, "Matrices may change only once per access unit.\n");
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    /* Seed primitive matrices */
> +
> +    if (get_bits1(gbp)) {
> +        /* New seed primitive matrices */
> +
> +        if (get_bits1(gbp)) {
> +            /* New seed matrices parameters */
> +            s->num_primitive_matrices = get_bits(gbp, 4) + 1;
> +
> +            if (s->num_primitive_matrices > max_primitive_matrices) {
> +                av_log(m->avctx, AV_LOG_ERROR,
> +                    "Number of primitive matrices cannot be greater than %d "
> +                    "in substream %u of type 0x%04x.\n",
> +                    max_primitive_matrices, substr, s->substream_type);
> +                goto error;
> +            }
> +
> +            for (mat = 0; mat < s->num_primitive_matrices; mat++) {
> +                s->matrix_out_ch         [mat] = get_bits(gbp, 4);
> +                s->matrix_coeff_frac_bits[mat] = get_bits(gbp, 4);
> +                s->matrix_coeff_shift    [mat] = ((int) get_bits(gbp, 3)) - 1;
> +                s->lsb_bypass            [mat] = get_bits(gbp, 2);
> +                s->matrix_noise_shift    [mat] = get_bits(gbp, 4);
> +                s->matrix_coeff_mask     [mat] = get_bits(gbp, s->max_matrix_channel + 1);
> +
> +                if (s->matrix_out_ch[mat] > s->max_matrix_channel) {
> +                    av_log(m->avctx, AV_LOG_ERROR,
> +                            "Invalid channel %d specified as output from matrix.\n",
> +                            s->matrix_out_ch[mat]);
> +                    goto error;
> +                }
> +                if (s->matrix_coeff_frac_bits[mat] > 14) {
> +                    av_log(m->avctx, AV_LOG_ERROR,
> +                            "Too many fractional bits specified.\n");
> +                    goto error;
> +                }
> +            }
> +        }
> +
> +        /* Seed matrices coefficients */
> +        for (mat = 0; mat < s->num_primitive_matrices; mat++) {
> +            const int coeff_shift = s->matrix_coeff_shift[mat]
> +                - s->matrix_coeff_frac_bits[mat];
> +
> +            memset(s->matrix_coeff[mat], 0, sizeof(s->matrix_coeff[mat]));
> +
> +            for (ch = 0; ch <= s->max_matrix_channel; ch++) {
> +                int64_t coeff_val;
> +
> +                if (!((s->matrix_coeff_mask[mat] >> ch) & 0x1))
> +                    continue; // skip channel
> +
> +                coeff_val = get_sbits(gbp, s->matrix_coeff_frac_bits[mat] + 2);
> +                s->matrix_coeff[mat][ch] = coeff_val * (1 << (18 + coeff_shift));
> +            }
> +        }
> +    }
> +
> +    if (!get_bits1(gbp)) {
> +        /* No primitive matrices interpolation */
> +        memset(s->delta_matrix_coeff, 0, sizeof(s->delta_matrix_coeff));
> +    }
> +    else if (get_bits1(gbp)) {
> +        /* New delta primitive matrices */
> +
> +        if (get_bits1(gbp)) {
> +            /* New delta primitive matrices parameters */
> +
> +            for (mat = 0; mat < s->num_primitive_matrices; mat++) {
> +                s->delta_matrix_coeff_bits[mat] = get_bits(gbp, 4) + 1;
> +                s->delta_matrix_coeff_prec[mat] = get_bits(gbp, 2);
> +            }
> +        }
> +
> +        for (mat = 0; mat < s->num_primitive_matrices; mat++) {
> +            const int coeff_shift = s->matrix_coeff_shift[mat]
> +                - s->delta_matrix_coeff_prec[mat]
> +                - s->matrix_coeff_frac_bits[mat];
> +
> +            memset(s->delta_matrix_coeff[mat], 0, sizeof(s->delta_matrix_coeff[mat]));
> +
> +            if (s->delta_matrix_coeff_bits[mat] <= 1)
> +                continue; // skip matrice
> +
> +            for (ch = 0; ch <= s->max_matrix_channel; ch++) {
> +                int64_t coeff_val;
> +
> +                if (!((s->matrix_coeff_mask[mat] >> ch) & 0x1))
> +                    continue; // skip channel
> +
> +                coeff_val = get_sbits(gbp, s->delta_matrix_coeff_bits[mat]);
> +                s->delta_matrix_coeff[mat][ch] = coeff_val * (1 << (18 + coeff_shift));
> +            }
> +        }
> +    }
> +
> +    return 0;
> +error:
> +    s->num_primitive_matrices = 0;
> +    memset(s->matrix_out_ch, 0, sizeof(s->matrix_out_ch));
> +
> +    return AVERROR_INVALIDDATA;
> +}
> +
>   /** Read channel parameters. */
>   
>   static int read_channel_params(MLPDecodeContext *m, unsigned int substr,
> @@ -875,8 +1071,10 @@ static int read_channel_params(MLPDecodeContext *m, unsigned int substr,
>       cp->codebook  = get_bits(gbp, 2);
>       cp->huff_lsbs = get_bits(gbp, 5);
>   
> -    if (cp->codebook > 0 && cp->huff_lsbs > 24) {
> -        av_log(m->avctx, AV_LOG_ERROR, "Invalid huff_lsbs.\n");
> +    if (cp->codebook > 0 && cp->huff_lsbs > s->max_lsbs) {
> +        av_log(m->avctx, AV_LOG_ERROR, "Invalid huff_lsbs=%" PRIu8 ", "
> +               "exceeds max_lsbs=%" PRIu8 ".\n",
> +               cp->huff_lsbs, s->max_lsbs);
>           cp->huff_lsbs = 0;
>           return AVERROR_INVALIDDATA;
>       }
> @@ -910,9 +1108,14 @@ static int read_decoding_params(MLPDecodeContext *m, GetBitContext *gbp,
>           }
>   
>       if (s->param_presence_flags & PARAM_MATRIX)
> -        if (get_bits1(gbp))
> -            if ((ret = read_matrix_params(m, substr, gbp)) < 0)
> +        if (get_bits1(gbp)) {
> +            if (0x31ec == s->substream_type)
> +                ret = read_31ec_matrix_params(m, substr, gbp);
> +            else
> +                ret = read_31ea_31eb_matrix_params(m, substr, gbp);
> +            if (ret < 0)
>                   return ret;
> +        }
>   
>       if (s->param_presence_flags & PARAM_OUTSHIFT)
>           if (get_bits1(gbp)) {
> @@ -922,6 +1125,10 @@ static int read_decoding_params(MLPDecodeContext *m, GetBitContext *gbp,
>                       avpriv_request_sample(m->avctx, "Negative output_shift");
>                       s->output_shift[ch] = 0;
>                   }
> +                if (s->max_shift < s->output_shift[ch])
> +                    av_log(m->avctx, AV_LOG_WARNING,
> +                           "output_shift=%d exceeds max_shift=%d\n",
> +                           s->output_shift[ch], s->max_shift);
>               }
>               if (substr == m->max_decoded_substream)
>                   m->pack_output = m->dsp.mlp_select_pack_output(s->ch_assign,
> @@ -1103,6 +1310,56 @@ static void fill_noise_buffer(MLPDecodeContext *m, unsigned int substr)
>       s->noisegen_seed = seed;
>   }
>   
> +#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
> +
> +/** Check matrices-based channel remapping output for saturation. */
> +
> +static void check_rematrix_output(MLPDecodeContext *m, unsigned int substr)
> +{
> +    SubStream *s = &m->substream[substr];
> +    unsigned int mat, sample;
> +
> +    for (mat = 0; mat < s->num_primitive_matrices; mat++) {
> +        unsigned int dest_ch = s->matrix_out_ch[mat];
> +
> +        const uint8_t shift = (3 <= substr && s->min_channel <= dest_ch) ? 31 : 23;
> +        const int32_t min_value = -(1u << shift);
> +        const int32_t max_value =  (1u << shift) - 1;
> +
> +        for (sample = 0; sample < s->blockpos; sample++) {
> +            if (m->sample_buffer[sample][dest_ch] < min_value)
> +                av_log(m->avctx, AV_LOG_WARNING,
> +                    "rematrix negative saturation substr=%u mat=%u sample=%d "
> +                    "value=%" PRId32 "\n",
> +                    substr, mat, sample, m->sample_buffer[sample][dest_ch]);
> +            if (m->sample_buffer[sample][dest_ch] > max_value)
> +                av_log(m->avctx, AV_LOG_WARNING,
> +                    "rematrix positive saturation substr=%u mat=%u sample=%d "
> +                    "value=%" PRId32 "\n",
> +                    substr, mat, sample, m->sample_buffer[sample][dest_ch]);
> +        }
> +    }
> +}
> +
> +/** Check output audio bit-depth. */
> +
> +static void check_output_bit_depth(MLPDecodeContext *m, unsigned int substr)
> +{
> +    SubStream *s = &m->substream[substr];
> +    uint32_t cumul_mask = 0;
> +    unsigned int chan, sample;
> +
> +    for (chan = 0; chan <= s->max_matrix_channel; chan++)
> +        for (sample = 0; sample < s->blockpos; sample++)
> +            cumul_mask |= FFABS(m->sample_buffer[sample][chan]);
> +
> +    if ((1u << s->max_bits) <= cumul_mask)
> +        av_log(m->avctx, AV_LOG_WARNING, "output audio bit-depth exceeds "
> +               "expected %u bits.\n",
> +               s->max_bits);
> +}
> +#endif
> +
>   /** Write the audio data into the output buffer. */
>   
>   static int output_data(MLPDecodeContext *m, unsigned int substr,
> @@ -1110,8 +1367,7 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
>   {
>       AVCodecContext *avctx = m->avctx;
>       SubStream *s = &m->substream[substr];
> -    unsigned int mat;
> -    unsigned int maxchan;
> +    unsigned int mat, chan, maxchan;
>       int ret;
>       int is32 = (m->avctx->sample_fmt == AV_SAMPLE_FMT_S32);
>   
> @@ -1126,7 +1382,7 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
>       }
>   
>       maxchan = s->max_matrix_channel;
> -    if (!s->noise_type) {
> +    if (0x31ea == s->substream_type) {
>           generate_2_noise_channels(m, substr);
>           maxchan += 2;
>       } else {
> @@ -1137,19 +1393,45 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
>        * samples. */
>       for (mat = 0; mat < s->num_primitive_matrices; mat++) {
>           unsigned int dest_ch = s->matrix_out_ch[mat];
> -        m->dsp.mlp_rematrix_channel(&m->sample_buffer[0][0],
> -                                    s->matrix_coeff[mat],
> -                                    &m->bypassed_lsbs[0][mat],
> -                                    m->noise_buffer,
> -                                    s->num_primitive_matrices - mat,
> -                                    dest_ch,
> -                                    s->blockpos,
> -                                    maxchan,
> -                                    s->matrix_noise_shift[mat],
> -                                    m->access_unit_size_pow2,
> -                                    MSB_MASK(s->quant_step_size[dest_ch]));
> +
> +        if (substr < 3) {
> +            /* Single primitive matrices */
> +            m->dsp.mlp_rematrix_channel(&m->sample_buffer[0][0],
> +                                        s->matrix_coeff[mat],
> +                                        &m->bypassed_lsbs[0][mat],
> +                                        m->noise_buffer,
> +                                        s->num_primitive_matrices - mat,
> +                                        dest_ch,
> +                                        s->blockpos,
> +                                        maxchan,
> +                                        s->matrix_noise_shift[mat],
> +                                        m->access_unit_size_pow2,
> +                                        MSB_MASK(s->quant_step_size[dest_ch]));
> +        }
> +        else {
> +            /* Interpolated primitive matrices */
> +            m->dsp.mlp_rematrix_interp_channel(&m->sample_buffer[0][0],
> +                                               s->matrix_coeff[mat],
> +                                               s->delta_matrix_coeff[mat],
> +                                               &m->bypassed_lsbs[0][mat],
> +                                               m->noise_buffer,
> +                                               s->num_primitive_matrices - mat,
> +                                               dest_ch,
> +                                               s->blockpos,
> +                                               maxchan,
> +                                               s->matrix_noise_shift[mat],
> +                                               m->access_unit_size_pow2,
> +                                               MSB_MASK(s->quant_step_size[dest_ch]));
> +
> +            for (chan = 0; chan <= maxchan; chan++)
> +                s->matrix_coeff[mat][chan] += s->delta_matrix_coeff[mat][chan];
> +        }
>       }
>   
> +#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
> +    check_rematrix_output(m, substr);
> +#endif
> +
>       /* get output buffer */
>       frame->nb_samples = s->blockpos;
>       if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
> @@ -1163,6 +1445,10 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
>                                               s->max_matrix_channel,
>                                               is32);
>   
> +#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
> +    check_output_bit_depth(m, substr);
> +#endif
> +
>       /* Update matrix encoding side data */
>       if (s->matrix_encoding != s->prev_matrix_encoding) {
>           if ((ret = ff_side_data_update_matrix_encoding(frame, s->matrix_encoding)) < 0)
> @@ -1320,6 +1606,7 @@ static int read_access_unit(AVCodecContext *avctx, AVFrame *frame,
>                    (avctx->ch_layout.nb_channels == 8 &&
>                     ((m->substream_info >> 4) & 0x7) != 0x7 &&
>                     ((m->substream_info >> 4) & 0x7) != 0x6 &&
> +                  ((m->substream_info >> 4) & 0x7) != 0x4 &&
>                     ((m->substream_info >> 4) & 0x7) != 0x3)) &&
>                   substr > 0 && substr < m->max_decoded_substream &&
>                   (s->min_channel <= m->substream[substr - 1].max_channel)) {
> @@ -1429,8 +1716,10 @@ static void mlp_decode_flush(AVCodecContext *avctx)
>   #define OFFSET(x) offsetof(MLPDecodeContext, x)
>   #define FLAGS (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM)
>   static const AVOption options[] = {
> -    { "downmix", "Request a specific channel layout from the decoder", OFFSET(downmix_layout),
> -        AV_OPT_TYPE_CHLAYOUT, {.str = NULL}, .flags = FLAGS },
> +    { "downmix", "Request a specific channel layout from the decoder",
> +        OFFSET(downmix_layout), AV_OPT_TYPE_CHLAYOUT, {.str = NULL}, .flags = FLAGS },
> +    { "extract_objects", "Enable extraction of audio object channels",
> +        OFFSET(extract_objects), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, .flags = FLAGS },
>       { NULL },

IMHO this should be enabled by default. All streams come with 
compatibility AC3 mixes too.

>   };
>   
> diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c
> index cb40160f67..e28006f2c4 100644
> --- a/libavcodec/mlpdsp.c
> +++ b/libavcodec/mlpdsp.c
> @@ -79,11 +79,51 @@ void ff_mlp_rematrix_channel(int32_t *samples,
>   
>           if (matrix_noise_shift) {
>               index &= access_unit_size_pow2 - 1;
> -            accum += noise_buffer[index] * (1 << (matrix_noise_shift + 7));
> +            accum += noise_buffer[index] * (1 << (matrix_noise_shift + 11));
>               index += index2;
>           }
>   
> -        samples[dest_ch] = ((accum >> 14) & mask) + *bypassed_lsbs;
> +        samples[dest_ch] = ((accum >> 18) & mask) + *bypassed_lsbs;
> +        bypassed_lsbs += MAX_CHANNELS;
> +        samples += MAX_CHANNELS;
> +    }
> +}
> +
> +void ff_mlp_rematrix_interp_channel(int32_t *samples,
> +                                    const int32_t *seed_coeffs,
> +                                    const int32_t *delta_coeffs,
> +                                    const uint8_t *bypassed_lsbs,
> +                                    const int8_t *noise_buffer,
> +                                    int index,
> +                                    unsigned int dest_ch,
> +                                    uint16_t blockpos,
> +                                    unsigned int maxchan,
> +                                    int matrix_noise_shift,
> +                                    int access_unit_size_pow2,
> +                                    int32_t mask)
> +{
> +    unsigned int src_ch, i;
> +    int index2 = 2 * index + 1;
> +
> +    int32_t delta_inc = (1 << 16) / blockpos;
> +
> +    for (i = 0; i < blockpos; i++) {
> +        int64_t accum = 0, delta_accum = 0;
> +
> +        for (src_ch = 0; src_ch <= maxchan; src_ch++) {
> +            accum += (int64_t) samples[src_ch] * seed_coeffs[src_ch];
> +            delta_accum += (int64_t) samples[src_ch] * delta_coeffs[src_ch];
> +        }
> +
> +        accum += ((delta_accum >> 18) * i * delta_inc * (1 << 18)) >> 16;
> +
> +        if (matrix_noise_shift) {
> +            index &= access_unit_size_pow2 - 1;
> +            accum += noise_buffer[index] * (1 << (matrix_noise_shift + 11));
> +            index += index2;
> +        }
> +
> +        samples[dest_ch] = ((accum >> 18) & mask) + *bypassed_lsbs;
>           bypassed_lsbs += MAX_CHANNELS;
>           samples += MAX_CHANNELS;
>       }
> @@ -115,9 +155,10 @@ int32_t ff_mlp_pack_output(int32_t lossless_check_data,
>               int mat_ch = ch_assign[out_ch];
>               int32_t sample = sample_buffer[i][mat_ch] *
>                             (1U << output_shift[mat_ch]);
> -            lossless_check_data ^= (sample & 0xffffff) << mat_ch;
> +            lossless_check_data ^= (sample & 0xffffff) << (mat_ch & 7);
> +
>               if (is32)
> -                *data_32++ = sample * 256U;
> +                *data_32++ = sample * (1 << 8);
>               else
>                   *data_16++ = sample >> 8;
>           }
> @@ -129,6 +170,7 @@ av_cold void ff_mlpdsp_init(MLPDSPContext *c)
>   {
>       c->mlp_filter_channel = mlp_filter_channel;
>       c->mlp_rematrix_channel = ff_mlp_rematrix_channel;
> +    c->mlp_rematrix_interp_channel = ff_mlp_rematrix_interp_channel;
>       c->mlp_select_pack_output = mlp_select_pack_output;
>   #if ARCH_ARM
>       ff_mlpdsp_init_arm(c);
> diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h
> index 7a9ac228d3..fd29db10a7 100644
> --- a/libavcodec/mlpdsp.h
> +++ b/libavcodec/mlpdsp.h
> @@ -37,6 +37,19 @@ void ff_mlp_rematrix_channel(int32_t *samples,
>                                int access_unit_size_pow2,
>                                int32_t mask);
>   
> +void ff_mlp_rematrix_interp_channel(int32_t *samples,
> +                                    const int32_t *seed_coeffs,
> +                                    const int32_t *delta_coeffs,
> +                                    const uint8_t *bypassed_lsbs,
> +                                    const int8_t *noise_buffer,
> +                                    int index,
> +                                    unsigned int dest_ch,
> +                                    uint16_t blockpos,
> +                                    unsigned int maxchan,
> +                                    int matrix_noise_shift,
> +                                    int access_unit_size_pow2,
> +                                    int32_t mask);
> +
>   int32_t ff_mlp_pack_output(int32_t lossless_check_data,
>                              uint16_t blockpos,
>                              int32_t (*sample_buffer)[MAX_CHANNELS],
> @@ -62,6 +75,18 @@ typedef struct MLPDSPContext {
>                                    int matrix_noise_shift,
>                                    int access_unit_size_pow2,
>                                    int32_t mask);
> +    void (*mlp_rematrix_interp_channel)(int32_t *samples,
> +                                        const int32_t *seed_coeffs,
> +                                        const int32_t *delta_coeffs,
> +                                        const uint8_t *bypassed_lsbs,
> +                                        const int8_t *noise_buffer,
> +                                        int index,
> +                                        unsigned int dest_ch,
> +                                        uint16_t blockpos,
> +                                        unsigned int maxchan,
> +                                        int matrix_noise_shift,
> +                                        int access_unit_size_pow2,
> +                                        int32_t mask);
>       int32_t (*(*mlp_select_pack_output)(uint8_t *ch_assign,
>                                           int8_t *output_shift,
>                                           uint8_t max_matrix_channel,
> diff --git a/libavcodec/x86/mlpdsp.asm b/libavcodec/x86/mlpdsp.asm
> index 3dc641e89e..3b232d4551 100644
> --- a/libavcodec/x86/mlpdsp.asm
> +++ b/libavcodec/x86/mlpdsp.asm
> @@ -61,12 +61,12 @@ SECTION .text
>       paddq        xm0, xm1
>       movq      accumq, xm0
>       movzx     blsbsd, byte [blsbs_ptrq]             ; load *bypassed_lsbs
> -    sar       accumq, 14                            ; accum >>= 14
> +    sar       accumq, 18                            ; accum >>= 18
>       and       accumd, maskd                         ; accum &= mask
>       add       accumd, blsbsd                        ; accum += *bypassed_lsbs
>       mov   [samplesq + dest_chq], accumd             ; samples[dest_ch] = accum
> -    add   blsbs_ptrq, 8                             ; bypassed_lsbs += MAX_CHANNELS;
> -    add     samplesq, 32                            ; samples += MAX_CHANNELS;
> +    add   blsbs_ptrq, 16                            ; bypassed_lsbs += MAX_CHANNELS;
> +    add     samplesq, 64                            ; samples += MAX_CHANNELS;
>       cmp   blsbs_ptrq, cntq
>   %endmacro
>   
> @@ -80,12 +80,12 @@ SECTION .text
>       SHLX      noiseq, mns                           ; noise_buffer[index] <<= matrix_noise_shift
>       add       accumq, noiseq                        ; accum += noise_buffer[index]
>       movzx     noised, byte [blsbs_ptrq]             ; load *bypassed_lsbs (reuse tmp noise register)
> -    sar       accumq, 14                            ; accum >>= 14
> +    sar       accumq, 18                            ; accum >>= 18
>       and       accumd, maskd                         ; accum &= mask
>       add       accumd, noised                        ; accum += *bypassed_lsbs
>       mov   [samplesq + dest_chq], accumd             ; samples[dest_ch] = accum
> -    add   blsbs_ptrq, 8                             ; bypassed_lsbs += MAX_CHANNELS;
> -    add     samplesq, 32                            ; samples += MAX_CHANNELS;
> +    add   blsbs_ptrq, 16                            ; bypassed_lsbs += MAX_CHANNELS;
> +    add     samplesq, 64                            ; samples += MAX_CHANNELS;
>       cmp   blsbs_ptrq, cntq
>   %endmacro
>   
> @@ -106,7 +106,8 @@ cglobal mlp_rematrix_channel, 0, 13, 5, samples, coeffs, blsbs_ptr, blsbs, \
>       mov     dest_chd, dest_chm                      ; load dest_chd (not needed on UNIX64)
>   %endif
>       shl     dest_chd, 2
> -    lea         cntq, [blsbs_ptrq + blockposq*8]
> +    lea         cntq, [blsbs_ptrq + blockposq*8]    ; loop end address (bypassed_lsbs + blockpos * MAX_CHANNELS)
> +    lea         cntq, [cntq + blockposq*8]
>       test        mnsd, mnsd                          ; is matrix_noise_shift != 0?
>       jne .shift                                      ; jump if true
>       cmp     maxchand, 4                             ; is maxchan < 4?
> @@ -144,7 +145,7 @@ align 16
>       DEFINE_ARGS samples, coeffs, blsbs_ptr, noise_buffer, \
>                   index, dest_ch, accum, index2, mns, \
>                   ausp, mask, cnt, noise
> -    add         mnsd, 7              ; matrix_noise_shift += 7
> +    add         mnsd, 11             ; matrix_noise_shift += 11
>   %else ; sse4
>       mov           r6, rcx            ; move rcx elsewhere so we can use cl for matrix_noise_shift
>   %if WIN64
> @@ -156,7 +157,7 @@ align 16
>       DEFINE_ARGS samples, coeffs, blsbs_ptr, mns, index, dest_ch, noise_buffer, \
>                   index2, accum, ausp, mask, cnt, noise
>   %endif
> -    lea         mnsd, [r8 + 7]       ; rcx = matrix_noise_shift + 7
> +    lea         mnsd, [r8 + 11]      ; rcx = matrix_noise_shift + 11
>   %endif ; cpuflag
>       sub        auspd, 1              ; access_unit_size_pow2 -= 1
>       cmp          r7d, 4              ; is maxchan < 4?

If the changes are that few, why not do the aarch64 version too?

> diff --git a/tests/fate/truehd.mak b/tests/fate/truehd.mak
> index b0bc86a965..30c0e9628b 100644
> --- a/tests/fate/truehd.mak
> +++ b/tests/fate/truehd.mak
> @@ -18,5 +18,15 @@ fate-truehd-mono1726: CMD = md5pipe -f truehd -i $(TARGET_SAMPLES)/truehd/ticket
>   fate-truehd-mono1726: CMP = oneline
>   fate-truehd-mono1726: REF = 9be9551fac418440bb02101bfdb11df9
>   
> +FATE_TRUEHD-$(call DEMDEC, TRUEHD, TRUEHD) += fate-truehd-atmos-no-obj
> +fate-truehd-atmos-no-obj: CMD = md5pipe -f truehd -extract_objects 0 -i $(TARGET_SAMPLES)/truehd/atmos.thd -f s32le
> +fate-truehd-atmos-no-obj: CMP = oneline
> +fate-truehd-atmos-no-obj: REF = 53da6ce35c778bcc2182ef2160bf16a2
> +
> +FATE_TRUEHD-$(call DEMDEC, TRUEHD, TRUEHD) += fate-truehd-atmos-obj
> +fate-truehd-atmos-obj: CMD = md5pipe -f truehd -extract_objects 1 -i $(TARGET_SAMPLES)/truehd/atmos.thd -f s32le
> +fate-truehd-atmos-obj: CMP = oneline
> +fate-truehd-atmos-obj: REF = f19f6f8ec8b040050aaa019f016f7ddc
> +
>   FATE_SAMPLES_AUDIO += $(FATE_TRUEHD-yes)
>   fate-truehd: $(FATE_TRUEHD-yes)