[FFmpeg-devel] [PATCH] avcodec/mlpdec: Add decoding of object audio data

Massimo Eynard eynard.massimo at gmail.com
Sat Mar 22 19:49:39 EET 2025


This patch adds support for decoding the fourth MLP substream
which contains the 16-channel presentation used for Atmos
audio objects.

By default only the first three substreams are decoded
unless the new extract_objects flag is enabled as the resulting
presentation contains audio object feeds instead of classic
loudspeaker feeds.

As this introduces interpolation of primitive matrices, precision
has been increased to 2.18 fixed point. Therefore this requires
DSP code upgrade which has been done for C and x86 implementations
but not the ARM implementation.

Adds two FATE tests using existing atmos.thd sample to reflect
changes.

Signed-off-by: Massimo Eynard <eynard.massimo at gmail.com>
---
 libavcodec/arm/mlpdsp_armv5te.S  |   2 +-
 libavcodec/arm/mlpdsp_init_arm.c |   3 +-
 libavcodec/mlp.h                 |  10 +-
 libavcodec/mlp_parse.c           |  31 ++-
 libavcodec/mlp_parse.h           |   1 +
 libavcodec/mlp_parser.c          |  11 +-
 libavcodec/mlpdec.c              | 389 +++++++++++++++++++++++++++----
 libavcodec/mlpdsp.c              |  50 +++-
 libavcodec/mlpdsp.h              |  25 ++
 libavcodec/x86/mlpdsp.asm        |  19 +-
 tests/fate/truehd.mak            |  10 +
 11 files changed, 476 insertions(+), 75 deletions(-)

diff --git a/libavcodec/arm/mlpdsp_armv5te.S b/libavcodec/arm/mlpdsp_armv5te.S
index d31568611c..5210f391ab 100644
--- a/libavcodec/arm/mlpdsp_armv5te.S
+++ b/libavcodec/arm/mlpdsp_armv5te.S
@@ -21,7 +21,7 @@
 
 #include "libavutil/arm/asm.S"
 
-#define MAX_CHANNELS        8
+#define MAX_CHANNELS       16
 #define MAX_FIR_ORDER       8
 #define MAX_IIR_ORDER       4
 #define MAX_RATEFACTOR      4
diff --git a/libavcodec/arm/mlpdsp_init_arm.c b/libavcodec/arm/mlpdsp_init_arm.c
index 34a5f61e1d..50c3cf5488 100644
--- a/libavcodec/arm/mlpdsp_init_arm.c
+++ b/libavcodec/arm/mlpdsp_init_arm.c
@@ -113,6 +113,7 @@ static int32_t (*mlp_select_pack_output_armv6(uint8_t *ch_assign,
         ch_index = 2;
         break;
     default:
+        // max_matrix_channel > 7 requires &7 lossless buffer channel shift
         return ff_mlp_pack_output;
     }
 
@@ -139,7 +140,7 @@ av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c)
 
     if (have_armv5te(cpu_flags)) {
         c->mlp_filter_channel = ff_mlp_filter_channel_arm;
-        c->mlp_rematrix_channel = ff_mlp_rematrix_channel_arm;
+        // c->mlp_rematrix_channel = ff_mlp_rematrix_channel_arm; // TODO: update to 2.18 coeff
     }
     if (have_armv6(cpu_flags))
         c->mlp_select_pack_output = mlp_select_pack_output_armv6;
diff --git a/libavcodec/mlp.h b/libavcodec/mlp.h
index bec414c680..264ef64cf1 100644
--- a/libavcodec/mlp.h
+++ b/libavcodec/mlp.h
@@ -31,19 +31,19 @@
 
 /** Last possible matrix channel for each codec */
 #define MAX_MATRIX_CHANNEL_MLP      5
-#define MAX_MATRIX_CHANNEL_TRUEHD   7
+#define MAX_MATRIX_CHANNEL_TRUEHD  15
 /** Maximum number of channels in a valid stream.
  *  MLP   : 5.1 + 2 noise channels -> 8 channels
- *  TrueHD: 7.1                    -> 8 channels
+ *  TrueHD: up to 16-ch pres       -> 16 channels
  */
-#define MAX_CHANNELS                8
+#define MAX_CHANNELS               16
 
 /** Maximum number of matrices used in decoding; most streams have one matrix
  *  per output channel, but some rematrix a channel (usually 0) more than once.
  */
 #define MAX_MATRICES_MLP            6
-#define MAX_MATRICES_TRUEHD         8
-#define MAX_MATRICES                8
+#define MAX_MATRICES_TRUEHD        16
+#define MAX_MATRICES               16
 
 /** Maximum number of substreams that can be decoded.
  *  MLP's limit is 2. TrueHD supports at least up to 3.
diff --git a/libavcodec/mlp_parse.c b/libavcodec/mlp_parse.c
index 924c731439..c94da860d0 100644
--- a/libavcodec/mlp_parse.c
+++ b/libavcodec/mlp_parse.c
@@ -85,7 +85,7 @@ static int mlp_get_major_sync_size(const uint8_t * buf, int bufsize)
 
 int ff_mlp_read_major_sync(void *log, MLPHeaderInfo *mh, GetBitContext *gb)
 {
-    int ratebits, channel_arrangement, header_size;
+    int ratebits, channel_arrangement, header_size, extra_ch_length;
     uint16_t checksum;
 
     av_assert1(get_bits_count(gb) == 0);
@@ -163,7 +163,34 @@ int ff_mlp_read_major_sync(void *log, MLPHeaderInfo *mh, GetBitContext *gb)
     mh->extended_substream_info = get_bits(gb, 2);
     mh->substream_info = get_bits(gb, 8);
 
-    skip_bits_long(gb, (header_size - 18) * 8);
+    extra_ch_length = 0;
+    mh->channels_thd_stream3 = 0;
+
+    if (mh->stream_type == 0xba) {
+        skip_bits_long(gb, 63);
+
+        extra_ch_length = 64;
+        if (get_bits1(gb) && (mh->substream_info & 0x80)) {
+            /* 16ch_channel_meaning */
+            int length = (get_bits(gb, 4) + 1) << 1;
+            if (header_size - 26 < length) {
+                av_log(log, AV_LOG_ERROR, "packet too short, "
+                    "unable to read 16ch extra meaning in major sync %d %d\n",
+                    header_size, length);
+                return -1;
+            }
+
+            skip_bits_long(gb, 5+6); // dialogue norm/mix level
+            mh->channels_thd_stream3 = get_bits(gb, 5) + 1;
+            if (!get_bits1(gb)) { // dyn_object_only
+                avpriv_request_sample(log, "16ch presentation with a mixture of channels");
+                return AVERROR_PATCHWELCOME;
+            }
+            extra_ch_length += 21;
+        }
+    }
+
+    skip_bits_long(gb, (header_size - 18) * 8 - extra_ch_length);
 
     return 0;
 }
diff --git a/libavcodec/mlp_parse.h b/libavcodec/mlp_parse.h
index 5f1f953cfe..8011566ff7 100644
--- a/libavcodec/mlp_parse.h
+++ b/libavcodec/mlp_parse.h
@@ -47,6 +47,7 @@ typedef struct MLPHeaderInfo
     int channels_mlp;                       ///< Channel count for MLP streams
     int channels_thd_stream1;               ///< Channel count for substream 1 of TrueHD streams ("6-channel presentation")
     int channels_thd_stream2;               ///< Channel count for substream 2 of TrueHD streams ("8-channel presentation")
+    int channels_thd_stream3;               ///< Channel count for substream 3 of TrueHD streams ("16-channel presentation")
     uint64_t channel_layout_mlp;            ///< Channel layout for MLP streams
     uint64_t channel_layout_thd_stream1;    ///< Channel layout for substream 1 of TrueHD streams ("6-channel presentation")
     uint64_t channel_layout_thd_stream2;    ///< Channel layout for substream 2 of TrueHD streams ("8-channel presentation")
diff --git a/libavcodec/mlp_parser.c b/libavcodec/mlp_parser.c
index d391390dd5..f3d54c40e7 100644
--- a/libavcodec/mlp_parser.c
+++ b/libavcodec/mlp_parser.c
@@ -181,10 +181,15 @@ static int mlp_parse(AVCodecParserContext *s,
             av_channel_layout_from_mask(&avctx->ch_layout, mh.channel_layout_mlp);
         } else { /* mh.stream_type == 0xba */
             /* TrueHD stream */
-            if (!mh.channels_thd_stream2) {
-                av_channel_layout_from_mask(&avctx->ch_layout, mh.channel_layout_thd_stream1);
-            } else {
+            if (mh.channels_thd_stream3) {
+                avctx->ch_layout = (AVChannelLayout) {
+                    AV_CHANNEL_ORDER_UNSPEC,
+                    mh.channels_thd_stream3
+                };
+            } else if (mh.channels_thd_stream2) {
                 av_channel_layout_from_mask(&avctx->ch_layout, mh.channel_layout_thd_stream2);
+            } else {
+                av_channel_layout_from_mask(&avctx->ch_layout, mh.channel_layout_thd_stream1);
             }
         }
 
diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c
index e85dac36a7..14fb953265 100644
--- a/libavcodec/mlpdec.c
+++ b/libavcodec/mlpdec.c
@@ -61,8 +61,8 @@ typedef struct SubStream {
 
     //@{
     /** restart header data */
-    /// The type of noise to be used in the rematrix stage.
-    uint16_t    noise_type;
+    /// The type of substream given by the restart header sync word.
+    uint16_t    substream_type;
 
     /// The index of the first channel coded in this substream.
     uint8_t     min_channel;
@@ -88,6 +88,13 @@ typedef struct SubStream {
     /// The current seed value for the pseudorandom noise generator(s).
     uint32_t    noisegen_seed;
 
+    /// Maximum output_shift value.
+    int8_t      max_shift;
+    /// Maximum size of coded audio samples LSBs part.
+    int8_t      max_lsbs;
+    /// Maximum bit-depth of output audio samples.
+    int8_t      max_bits;
+
     /// Set if the substream contains extra info to check the size of VLC blocks.
     uint8_t     data_check_present;
 
@@ -104,11 +111,23 @@ typedef struct SubStream {
     /// matrix output channel
     uint8_t     matrix_out_ch[MAX_MATRICES];
 
-    /// Whether the LSBs of the matrix output are encoded in the bitstream.
+    /// Size of the LSBs of the matrix output encoded in the bitstream.
     uint8_t     lsb_bypass[MAX_MATRICES];
-    /// Matrix coefficients, stored as 2.14 fixed point.
+    /// Matrix coefficients fractional part size in bits.
+    uint8_t     matrix_coeff_frac_bits[MAX_MATRICES];
+    /// Matrix coefficients shift amount.
+    int8_t      matrix_coeff_shift[MAX_MATRICES];
+    /// Matrix coefficients presence mask.
+    uint16_t    matrix_coeff_mask[MAX_MATRICES];
+    /// Matrix coefficients, stored as 2.18 fixed point.
     DECLARE_ALIGNED(32, int32_t, matrix_coeff)[MAX_MATRICES][MAX_CHANNELS];
-    /// Left shift to apply to noise values in 0x31eb substreams.
+    /// Delta matrix coefficients size in bits for 0x31ec substreams.
+    uint8_t     delta_matrix_coeff_bits[MAX_MATRICES];
+    /// Delta matrix coefficients precision.
+    uint8_t     delta_matrix_coeff_prec[MAX_MATRICES];
+    /// Delta matrix coefficients, stored as 2.18 fixed point.
+    DECLARE_ALIGNED(32, int32_t, delta_matrix_coeff)[MAX_MATRICES][MAX_CHANNELS];
+    /// Left shift to apply to noise values in 0x31eb and 0x31ec substreams.
     uint8_t     matrix_noise_shift[MAX_MATRICES];
     //@}
 
@@ -133,6 +152,8 @@ typedef struct MLPDecodeContext {
     AVCodecContext *avctx;
 
     AVChannelLayout downmix_layout;
+    /// Set to enable decoding of non-loudspeaker feed (objects) audio channels
+    int extract_objects;
 
     /// Current access unit being read has a major sync.
     int         is_major_sync_unit;
@@ -267,14 +288,14 @@ static inline int read_huff_channels(MLPDecodeContext *m, GetBitContext *gbp,
 
     for (mat = 0; mat < s->num_primitive_matrices; mat++)
         if (s->lsb_bypass[mat])
-            m->bypassed_lsbs[pos + s->blockpos][mat] = get_bits1(gbp);
+            m->bypassed_lsbs[pos + s->blockpos][mat] = get_bits(gbp, s->lsb_bypass[mat]);
 
     for (channel = s->min_channel; channel <= s->max_channel; channel++) {
         ChannelParams *cp = &s->channel_params[channel];
         int codebook = cp->codebook;
         int quant_step_size = s->quant_step_size[channel];
         int lsb_bits = cp->huff_lsbs - quant_step_size;
-        int result = 0;
+        int32_t result = 0;
 
         if (codebook > 0)
             result = get_vlc2(gbp, huff_vlc[codebook-1].table,
@@ -410,8 +431,12 @@ static int read_major_sync(MLPDecodeContext *m, GetBitContext *gb)
         m->avctx->profile     = AV_PROFILE_TRUEHD_ATMOS;
     }
 
-    /* limit to decoding 3 substreams, as the 4th is used by Dolby Atmos for non-audio data */
-    m->max_decoded_substream = FFMIN(m->num_substreams - 1, 2);
+    /* Limit to decoding the first 3 substreams (or allow the 4th for objects) */
+    m->max_decoded_substream = FFMIN(m->num_substreams - 1,
+        m->extract_objects ? 3 : 2);
+
+    av_log(m->avctx, AV_LOG_DEBUG, "decoding up to substream %" PRIu8 "\n",
+        m->max_decoded_substream);
 
     m->avctx->sample_rate    = mh.group1_samplerate;
     m->avctx->frame_size     = mh.access_unit_size;
@@ -531,23 +556,22 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
     int sync_word, tmp;
     uint8_t checksum;
     uint8_t lossless_check;
+    uint8_t max_bit_depth;
     int start_count = get_bits_count(gbp);
-    int min_channel, max_channel, max_matrix_channel, noise_type;
+    int min_channel, max_channel, max_matrix_channel;
     const int std_max_matrix_channel = m->avctx->codec_id == AV_CODEC_ID_MLP
                                      ? MAX_MATRIX_CHANNEL_MLP
                                      : MAX_MATRIX_CHANNEL_TRUEHD;
 
-    sync_word = get_bits(gbp, 13);
+    sync_word = get_bits(gbp, 14);
 
-    if (sync_word != 0x31ea >> 1) {
+    if (sync_word < 0x31ea || 0x31ec < sync_word) {
         av_log(m->avctx, AV_LOG_ERROR,
                "restart header sync incorrect (got 0x%04x)\n", sync_word);
         return AVERROR_INVALIDDATA;
     }
 
-    noise_type = get_bits1(gbp);
-
-    if (m->avctx->codec_id == AV_CODEC_ID_MLP && noise_type) {
+    if (m->avctx->codec_id == AV_CODEC_ID_MLP && 0x31ea != sync_word) {
         av_log(m->avctx, AV_LOG_ERROR, "MLP must have 0x31ea sync word.\n");
         return AVERROR_INVALIDDATA;
     }
@@ -567,7 +591,7 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
 
     /* This should happen for TrueHD streams with >6 channels and MLP's noise
      * type. It is not yet known if this is allowed. */
-    if (max_matrix_channel > MAX_MATRIX_CHANNEL_MLP && !noise_type) {
+    if (max_matrix_channel > MAX_MATRIX_CHANNEL_MLP && 0x31ea == sync_word) {
         avpriv_request_sample(m->avctx,
                               "%d channels (more than the "
                               "maximum supported by the decoder)",
@@ -582,7 +606,7 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
     s->max_channel        = max_channel;
     s->coded_channels     = ((1LL << (max_channel - min_channel + 1)) - 1) << min_channel;
     s->max_matrix_channel = max_matrix_channel;
-    s->noise_type         = noise_type;
+    s->substream_type     = sync_word;
 
     if (mlp_channel_layout_subset(&m->downmix_layout, s->mask) &&
         m->max_decoded_substream > substr) {
@@ -595,8 +619,28 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
 
     s->noise_shift   = get_bits(gbp,  4);
     s->noisegen_seed = get_bits(gbp, 23);
+    s->max_shift     = get_bits(gbp,  4);
+    s->max_lsbs      = get_bits(gbp,  5);
+    s->max_bits      = get_bits(gbp,  5);
+
+    max_bit_depth = (0x31ec == sync_word) ? 31 : 24;
+    if (max_bit_depth < s->max_lsbs) {
+        av_log(m->avctx, AV_LOG_ERROR,
+               "Max LSB size %" PRIu8 " for substream %u exceeds "
+               "%" PRIu8 " bits.\n",
+               s->max_lsbs, substr, max_bit_depth);
+        return AVERROR_INVALIDDATA;
+    }
 
-    skip_bits(gbp, 19);
+    if (max_bit_depth < s->max_bits) {
+        av_log(m->avctx, AV_LOG_ERROR,
+               "Max output bit-depth %" PRIu8 " for substream %u exceeds "
+               "%" PRIu8 " bits.\n",
+               s->max_bits, substr, max_bit_depth);
+        return AVERROR_INVALIDDATA;
+    }
+
+    skip_bits(gbp, 5);
 
     s->data_check_present = get_bits1(gbp);
     lossless_check = get_bits(gbp, 8);
@@ -615,7 +659,7 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
 
     for (ch = 0; ch <= s->max_matrix_channel; ch++) {
         int ch_assign = get_bits(gbp, 6);
-        if (m->avctx->codec_id == AV_CODEC_ID_TRUEHD) {
+        if (m->avctx->codec_id == AV_CODEC_ID_TRUEHD && s->mask) {
             AVChannelLayout l;
             enum AVChannel channel = thd_channel_layout_extract_channel(s->mask, ch_assign);
 
@@ -656,12 +700,19 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
         cp->huff_offset      = 0;
         cp->sign_huff_offset = -(1 << 23);
         cp->codebook         = 0;
-        cp->huff_lsbs        = 24;
+        cp->huff_lsbs        = (3 == substr) ? 31 : 24;
     }
 
     if (substr == m->max_decoded_substream) {
         av_channel_layout_uninit(&m->avctx->ch_layout);
-        av_channel_layout_from_mask(&m->avctx->ch_layout, s->mask);
+        if (substr < 3) /* Loudspeaker feed channels */
+            av_channel_layout_from_mask(&m->avctx->ch_layout, s->mask);
+        else /* Object channels */
+            m->avctx->ch_layout = (AVChannelLayout) {
+                AV_CHANNEL_ORDER_UNSPEC,
+                s->max_channel+1
+            };
+
         m->pack_output = m->dsp.mlp_select_pack_output(s->ch_assign,
                                                        s->output_shift,
                                                        s->max_matrix_channel,
@@ -760,17 +811,43 @@ static int read_filter_params(MLPDecodeContext *m, GetBitContext *gbp,
     return 0;
 }
 
-/** Read parameters for primitive matrices. */
+/** Get the maximum number of primitive matrices allowed. */
 
-static int read_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitContext *gbp)
+static int get_max_nb_primitive_matrices(MLPDecodeContext *m, unsigned int substr)
+{
+    switch (substr) {
+    case 0: // substream 0 (up to 2 matrix channels)
+        return 2;
+    case 1: // substream 1
+        if (m->substream_info & 0x8) // 6-ch pres carried
+            return 6;
+        if (m->substream_info & 0x20) // 8-ch pres carried
+            return 8;
+        break;
+    case 2: // substream 2
+        if (m->substream_info & 0x40) // 8-ch pres carried
+            return 8;
+        break;
+    case 3: // substream 3
+        if (m->substream_info & 80) // 16-ch pres carried
+            return 16;
+        break;
+    }
+
+    return MAX_MATRICES_TRUEHD;
+}
+
+/** Read parameters for primitive matrices (0x31ea and 0x31eb substreams). */
+
+static int read_31ea_31eb_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitContext *gbp)
 {
     SubStream *s = &m->substream[substr];
     unsigned int mat, ch;
     const int max_primitive_matrices = m->avctx->codec_id == AV_CODEC_ID_MLP
                                      ? MAX_MATRICES_MLP
-                                     : MAX_MATRICES_TRUEHD;
+                                     : get_max_nb_primitive_matrices(m, substr);
 
-    if (m->matrix_changed++ > 1) {
+    if (++m->matrix_changed > 1) {
         av_log(m->avctx, AV_LOG_ERROR, "Matrices may change only once per access unit.\n");
         return AVERROR_INVALIDDATA;
     }
@@ -779,8 +856,9 @@ static int read_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitCo
 
     if (s->num_primitive_matrices > max_primitive_matrices) {
         av_log(m->avctx, AV_LOG_ERROR,
-               "Number of primitive matrices cannot be greater than %d.\n",
-               max_primitive_matrices);
+               "Number of primitive matrices cannot be greater than %d "
+               "for substream %u of type 0x%04x.\n",
+               max_primitive_matrices, substr, s->substream_type);
         goto error;
     }
 
@@ -803,7 +881,7 @@ static int read_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitCo
         }
 
         max_chan = s->max_matrix_channel;
-        if (!s->noise_type)
+        if (0x31ea == s->substream_type)
             max_chan+=2;
 
         for (ch = 0; ch <= max_chan; ch++) {
@@ -811,10 +889,10 @@ static int read_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitCo
             if (get_bits1(gbp))
                 coeff_val = get_sbits(gbp, frac_bits + 2);
 
-            s->matrix_coeff[mat][ch] = coeff_val * (1 << (14 - frac_bits));
+            s->matrix_coeff[mat][ch] = coeff_val * (1 << ((14 + 4) - frac_bits));
         }
 
-        if (s->noise_type)
+        if (0x31eb == s->substream_type)
             s->matrix_noise_shift[mat] = get_bits(gbp, 4);
         else
             s->matrix_noise_shift[mat] = 0;
@@ -828,6 +906,124 @@ error:
     return AVERROR_INVALIDDATA;
 }
 
+/** Read parameters for primitive matrices (0x31ec substreams). */
+
+static int read_31ec_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitContext *gbp)
+{
+    SubStream *s = &m->substream[substr];
+    unsigned int mat, ch;
+
+    const int max_primitive_matrices = get_max_nb_primitive_matrices(m, substr);
+
+    if (++m->matrix_changed > 1) {
+        av_log(m->avctx, AV_LOG_ERROR, "Matrices may change only once per access unit.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* Seed primitive matrices */
+
+    if (get_bits1(gbp)) {
+        /* New seed primitive matrices */
+
+        if (get_bits1(gbp)) {
+            /* New seed matrices parameters */
+            s->num_primitive_matrices = get_bits(gbp, 4) + 1;
+
+            if (s->num_primitive_matrices > max_primitive_matrices) {
+                av_log(m->avctx, AV_LOG_ERROR,
+                    "Number of primitive matrices cannot be greater than %d "
+                    "in substream %u of type 0x%04x.\n",
+                    max_primitive_matrices, substr, s->substream_type);
+                goto error;
+            }
+
+            for (mat = 0; mat < s->num_primitive_matrices; mat++) {
+                s->matrix_out_ch         [mat] = get_bits(gbp, 4);
+                s->matrix_coeff_frac_bits[mat] = get_bits(gbp, 4);
+                s->matrix_coeff_shift    [mat] = ((int) get_bits(gbp, 3)) - 1;
+                s->lsb_bypass            [mat] = get_bits(gbp, 2);
+                s->matrix_noise_shift    [mat] = get_bits(gbp, 4);
+                s->matrix_coeff_mask     [mat] = get_bits(gbp, s->max_matrix_channel + 1);
+
+                if (s->matrix_out_ch[mat] > s->max_matrix_channel) {
+                    av_log(m->avctx, AV_LOG_ERROR,
+                            "Invalid channel %d specified as output from matrix.\n",
+                            s->matrix_out_ch[mat]);
+                    goto error;
+                }
+                if (s->matrix_coeff_frac_bits[mat] > 14) {
+                    av_log(m->avctx, AV_LOG_ERROR,
+                            "Too many fractional bits specified.\n");
+                    goto error;
+                }
+            }
+        }
+
+        /* Seed matrices coefficients */
+        for (mat = 0; mat < s->num_primitive_matrices; mat++) {
+            const int coeff_shift = s->matrix_coeff_shift[mat]
+                - s->matrix_coeff_frac_bits[mat];
+
+            memset(s->matrix_coeff[mat], 0, sizeof(s->matrix_coeff[mat]));
+
+            for (ch = 0; ch <= s->max_matrix_channel; ch++) {
+                int64_t coeff_val;
+
+                if (!((s->matrix_coeff_mask[mat] >> ch) & 0x1))
+                    continue; // skip channel
+
+                coeff_val = get_sbits(gbp, s->matrix_coeff_frac_bits[mat] + 2);
+                s->matrix_coeff[mat][ch] = coeff_val * (1 << (18 + coeff_shift));
+            }
+        }
+    }
+
+    if (!get_bits1(gbp)) {
+        /* No primitive matrices interpolation */
+        memset(s->delta_matrix_coeff, 0, sizeof(s->delta_matrix_coeff));
+    }
+    else if (get_bits1(gbp)) {
+        /* New delta primitive matrices */
+
+        if (get_bits1(gbp)) {
+            /* New delta primitive matrices parameters */
+
+            for (mat = 0; mat < s->num_primitive_matrices; mat++) {
+                s->delta_matrix_coeff_bits[mat] = get_bits(gbp, 4) + 1;
+                s->delta_matrix_coeff_prec[mat] = get_bits(gbp, 2);
+            }
+        }
+
+        for (mat = 0; mat < s->num_primitive_matrices; mat++) {
+            const int coeff_shift = s->matrix_coeff_shift[mat]
+                - s->delta_matrix_coeff_prec[mat]
+                - s->matrix_coeff_frac_bits[mat];
+
+            memset(s->delta_matrix_coeff[mat], 0, sizeof(s->delta_matrix_coeff[mat]));
+
+            if (s->delta_matrix_coeff_bits[mat] <= 1)
+                continue; // skip matrice
+
+            for (ch = 0; ch <= s->max_matrix_channel; ch++) {
+                int64_t coeff_val;
+
+                if (!((s->matrix_coeff_mask[mat] >> ch) & 0x1))
+                    continue; // skip channel
+
+                coeff_val = get_sbits(gbp, s->delta_matrix_coeff_bits[mat]);
+                s->delta_matrix_coeff[mat][ch] = coeff_val * (1 << (18 + coeff_shift));
+            }
+        }
+    }
+
+    return 0;
+error:
+    s->num_primitive_matrices = 0;
+    memset(s->matrix_out_ch, 0, sizeof(s->matrix_out_ch));
+
+    return AVERROR_INVALIDDATA;
+}
+
 /** Read channel parameters. */
 
 static int read_channel_params(MLPDecodeContext *m, unsigned int substr,
@@ -875,8 +1071,10 @@ static int read_channel_params(MLPDecodeContext *m, unsigned int substr,
     cp->codebook  = get_bits(gbp, 2);
     cp->huff_lsbs = get_bits(gbp, 5);
 
-    if (cp->codebook > 0 && cp->huff_lsbs > 24) {
-        av_log(m->avctx, AV_LOG_ERROR, "Invalid huff_lsbs.\n");
+    if (cp->codebook > 0 && cp->huff_lsbs > s->max_lsbs) {
+        av_log(m->avctx, AV_LOG_ERROR, "Invalid huff_lsbs=%" PRIu8 ", "
+               "exceeds max_lsbs=%" PRIu8 ".\n",
+               cp->huff_lsbs, s->max_lsbs);
         cp->huff_lsbs = 0;
         return AVERROR_INVALIDDATA;
     }
@@ -910,9 +1108,14 @@ static int read_decoding_params(MLPDecodeContext *m, GetBitContext *gbp,
         }
 
     if (s->param_presence_flags & PARAM_MATRIX)
-        if (get_bits1(gbp))
-            if ((ret = read_matrix_params(m, substr, gbp)) < 0)
+        if (get_bits1(gbp)) {
+            if (0x31ec == s->substream_type)
+                ret = read_31ec_matrix_params(m, substr, gbp);
+            else
+                ret = read_31ea_31eb_matrix_params(m, substr, gbp);
+            if (ret < 0)
                 return ret;
+        }
 
     if (s->param_presence_flags & PARAM_OUTSHIFT)
         if (get_bits1(gbp)) {
@@ -922,6 +1125,10 @@ static int read_decoding_params(MLPDecodeContext *m, GetBitContext *gbp,
                     avpriv_request_sample(m->avctx, "Negative output_shift");
                     s->output_shift[ch] = 0;
                 }
+                if (s->max_shift < s->output_shift[ch])
+                    av_log(m->avctx, AV_LOG_WARNING,
+                           "output_shift=%d exceeds max_shift=%d\n",
+                           s->output_shift[ch], s->max_shift);
             }
             if (substr == m->max_decoded_substream)
                 m->pack_output = m->dsp.mlp_select_pack_output(s->ch_assign,
@@ -1103,6 +1310,56 @@ static void fill_noise_buffer(MLPDecodeContext *m, unsigned int substr)
     s->noisegen_seed = seed;
 }
 
+#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
+
+/** Check matrices-based channel remapping output for saturation. */
+
+static void check_rematrix_output(MLPDecodeContext *m, unsigned int substr)
+{
+    SubStream *s = &m->substream[substr];
+    unsigned int mat, sample;
+
+    for (mat = 0; mat < s->num_primitive_matrices; mat++) {
+        unsigned int dest_ch = s->matrix_out_ch[mat];
+
+        const uint8_t shift = (3 <= substr && s->min_channel <= dest_ch) ? 31 : 23;
+        const int32_t min_value = -(1u << shift);
+        const int32_t max_value =  (1u << shift) - 1;
+
+        for (sample = 0; sample < s->blockpos; sample++) {
+            if (m->sample_buffer[sample][dest_ch] < min_value)
+                av_log(m->avctx, AV_LOG_WARNING,
+                    "rematrix negative saturation substr=%u mat=%u sample=%d "
+                    "value=%" PRId32 "\n",
+                    substr, mat, sample, m->sample_buffer[sample][dest_ch]);
+            if (m->sample_buffer[sample][dest_ch] > max_value)
+                av_log(m->avctx, AV_LOG_WARNING,
+                    "rematrix positive saturation substr=%u mat=%u sample=%d "
+                    "value=%" PRId32 "\n",
+                    substr, mat, sample, m->sample_buffer[sample][dest_ch]);
+        }
+    }
+}
+
+/** Check output audio bit-depth. */
+
+static void check_output_bit_depth(MLPDecodeContext *m, unsigned int substr)
+{
+    SubStream *s = &m->substream[substr];
+    uint32_t cumul_mask = 0;
+    unsigned int chan, sample;
+
+    for (chan = 0; chan <= s->max_matrix_channel; chan++)
+        for (sample = 0; sample < s->blockpos; sample++)
+            cumul_mask |= FFABS(m->sample_buffer[sample][chan]);
+
+    if ((1u << s->max_bits) <= cumul_mask)
+        av_log(m->avctx, AV_LOG_WARNING, "output audio bit-depth exceeds "
+               "expected %u bits.\n",
+               s->max_bits);
+}
+#endif
+
 /** Write the audio data into the output buffer. */
 
 static int output_data(MLPDecodeContext *m, unsigned int substr,
@@ -1110,8 +1367,7 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
 {
     AVCodecContext *avctx = m->avctx;
     SubStream *s = &m->substream[substr];
-    unsigned int mat;
-    unsigned int maxchan;
+    unsigned int mat, chan, maxchan;
     int ret;
     int is32 = (m->avctx->sample_fmt == AV_SAMPLE_FMT_S32);
 
@@ -1126,7 +1382,7 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
     }
 
     maxchan = s->max_matrix_channel;
-    if (!s->noise_type) {
+    if (0x31ea == s->substream_type) {
         generate_2_noise_channels(m, substr);
         maxchan += 2;
     } else {
@@ -1137,19 +1393,45 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
      * samples. */
     for (mat = 0; mat < s->num_primitive_matrices; mat++) {
         unsigned int dest_ch = s->matrix_out_ch[mat];
-        m->dsp.mlp_rematrix_channel(&m->sample_buffer[0][0],
-                                    s->matrix_coeff[mat],
-                                    &m->bypassed_lsbs[0][mat],
-                                    m->noise_buffer,
-                                    s->num_primitive_matrices - mat,
-                                    dest_ch,
-                                    s->blockpos,
-                                    maxchan,
-                                    s->matrix_noise_shift[mat],
-                                    m->access_unit_size_pow2,
-                                    MSB_MASK(s->quant_step_size[dest_ch]));
+
+        if (substr < 3) {
+            /* Single primitive matrices */
+            m->dsp.mlp_rematrix_channel(&m->sample_buffer[0][0],
+                                        s->matrix_coeff[mat],
+                                        &m->bypassed_lsbs[0][mat],
+                                        m->noise_buffer,
+                                        s->num_primitive_matrices - mat,
+                                        dest_ch,
+                                        s->blockpos,
+                                        maxchan,
+                                        s->matrix_noise_shift[mat],
+                                        m->access_unit_size_pow2,
+                                        MSB_MASK(s->quant_step_size[dest_ch]));
+        }
+        else {
+            /* Interpolated primitive matrices */
+            m->dsp.mlp_rematrix_interp_channel(&m->sample_buffer[0][0],
+                                               s->matrix_coeff[mat],
+                                               s->delta_matrix_coeff[mat],
+                                               &m->bypassed_lsbs[0][mat],
+                                               m->noise_buffer,
+                                               s->num_primitive_matrices - mat,
+                                               dest_ch,
+                                               s->blockpos,
+                                               maxchan,
+                                               s->matrix_noise_shift[mat],
+                                               m->access_unit_size_pow2,
+                                               MSB_MASK(s->quant_step_size[dest_ch]));
+
+            for (chan = 0; chan <= maxchan; chan++)
+                s->matrix_coeff[mat][chan] += s->delta_matrix_coeff[mat][chan];
+        }
     }
 
+#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
+    check_rematrix_output(m, substr);
+#endif
+
     /* get output buffer */
     frame->nb_samples = s->blockpos;
     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
@@ -1163,6 +1445,10 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
                                             s->max_matrix_channel,
                                             is32);
 
+#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
+    check_output_bit_depth(m, substr);
+#endif
+
     /* Update matrix encoding side data */
     if (s->matrix_encoding != s->prev_matrix_encoding) {
         if ((ret = ff_side_data_update_matrix_encoding(frame, s->matrix_encoding)) < 0)
@@ -1320,6 +1606,7 @@ static int read_access_unit(AVCodecContext *avctx, AVFrame *frame,
                  (avctx->ch_layout.nb_channels == 8 &&
                   ((m->substream_info >> 4) & 0x7) != 0x7 &&
                   ((m->substream_info >> 4) & 0x7) != 0x6 &&
+                  ((m->substream_info >> 4) & 0x7) != 0x4 &&
                   ((m->substream_info >> 4) & 0x7) != 0x3)) &&
                 substr > 0 && substr < m->max_decoded_substream &&
                 (s->min_channel <= m->substream[substr - 1].max_channel)) {
@@ -1429,8 +1716,10 @@ static void mlp_decode_flush(AVCodecContext *avctx)
 #define OFFSET(x) offsetof(MLPDecodeContext, x)
 #define FLAGS (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM)
 static const AVOption options[] = {
-    { "downmix", "Request a specific channel layout from the decoder", OFFSET(downmix_layout),
-        AV_OPT_TYPE_CHLAYOUT, {.str = NULL}, .flags = FLAGS },
+    { "downmix", "Request a specific channel layout from the decoder",
+        OFFSET(downmix_layout), AV_OPT_TYPE_CHLAYOUT, {.str = NULL}, .flags = FLAGS },
+    { "extract_objects", "Enable extraction of audio object channels",
+        OFFSET(extract_objects), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, .flags = FLAGS },
     { NULL },
 };
 
diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c
index cb40160f67..e28006f2c4 100644
--- a/libavcodec/mlpdsp.c
+++ b/libavcodec/mlpdsp.c
@@ -79,11 +79,51 @@ void ff_mlp_rematrix_channel(int32_t *samples,
 
         if (matrix_noise_shift) {
             index &= access_unit_size_pow2 - 1;
-            accum += noise_buffer[index] * (1 << (matrix_noise_shift + 7));
+            accum += noise_buffer[index] * (1 << (matrix_noise_shift + 11));
             index += index2;
         }
 
-        samples[dest_ch] = ((accum >> 14) & mask) + *bypassed_lsbs;
+        samples[dest_ch] = ((accum >> 18) & mask) + *bypassed_lsbs;
+        bypassed_lsbs += MAX_CHANNELS;
+        samples += MAX_CHANNELS;
+    }
+}
+
+void ff_mlp_rematrix_interp_channel(int32_t *samples,
+                                    const int32_t *seed_coeffs,
+                                    const int32_t *delta_coeffs,
+                                    const uint8_t *bypassed_lsbs,
+                                    const int8_t *noise_buffer,
+                                    int index,
+                                    unsigned int dest_ch,
+                                    uint16_t blockpos,
+                                    unsigned int maxchan,
+                                    int matrix_noise_shift,
+                                    int access_unit_size_pow2,
+                                    int32_t mask)
+{
+    unsigned int src_ch, i;
+    int index2 = 2 * index + 1;
+
+    int32_t delta_inc = (1 << 16) / blockpos;
+
+    for (i = 0; i < blockpos; i++) {
+        int64_t accum = 0, delta_accum = 0;
+
+        for (src_ch = 0; src_ch <= maxchan; src_ch++) {
+            accum += (int64_t) samples[src_ch] * seed_coeffs[src_ch];
+            delta_accum += (int64_t) samples[src_ch] * delta_coeffs[src_ch];
+        }
+
+        accum += ((delta_accum >> 18) * i * delta_inc * (1 << 18)) >> 16;
+
+        if (matrix_noise_shift) {
+            index &= access_unit_size_pow2 - 1;
+            accum += noise_buffer[index] * (1 << (matrix_noise_shift + 11));
+            index += index2;
+        }
+
+        samples[dest_ch] = ((accum >> 18) & mask) + *bypassed_lsbs;
         bypassed_lsbs += MAX_CHANNELS;
         samples += MAX_CHANNELS;
     }
@@ -115,9 +155,10 @@ int32_t ff_mlp_pack_output(int32_t lossless_check_data,
             int mat_ch = ch_assign[out_ch];
             int32_t sample = sample_buffer[i][mat_ch] *
                           (1U << output_shift[mat_ch]);
-            lossless_check_data ^= (sample & 0xffffff) << mat_ch;
+            lossless_check_data ^= (sample & 0xffffff) << (mat_ch & 7);
+
             if (is32)
-                *data_32++ = sample * 256U;
+                *data_32++ = sample * (1 << 8);
             else
                 *data_16++ = sample >> 8;
         }
@@ -129,6 +170,7 @@ av_cold void ff_mlpdsp_init(MLPDSPContext *c)
 {
     c->mlp_filter_channel = mlp_filter_channel;
     c->mlp_rematrix_channel = ff_mlp_rematrix_channel;
+    c->mlp_rematrix_interp_channel = ff_mlp_rematrix_interp_channel;
     c->mlp_select_pack_output = mlp_select_pack_output;
 #if ARCH_ARM
     ff_mlpdsp_init_arm(c);
diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h
index 7a9ac228d3..fd29db10a7 100644
--- a/libavcodec/mlpdsp.h
+++ b/libavcodec/mlpdsp.h
@@ -37,6 +37,19 @@ void ff_mlp_rematrix_channel(int32_t *samples,
                              int access_unit_size_pow2,
                              int32_t mask);
 
+void ff_mlp_rematrix_interp_channel(int32_t *samples,
+                                    const int32_t *seed_coeffs,
+                                    const int32_t *delta_coeffs,
+                                    const uint8_t *bypassed_lsbs,
+                                    const int8_t *noise_buffer,
+                                    int index,
+                                    unsigned int dest_ch,
+                                    uint16_t blockpos,
+                                    unsigned int maxchan,
+                                    int matrix_noise_shift,
+                                    int access_unit_size_pow2,
+                                    int32_t mask);
+
 int32_t ff_mlp_pack_output(int32_t lossless_check_data,
                            uint16_t blockpos,
                            int32_t (*sample_buffer)[MAX_CHANNELS],
@@ -62,6 +75,18 @@ typedef struct MLPDSPContext {
                                  int matrix_noise_shift,
                                  int access_unit_size_pow2,
                                  int32_t mask);
+    void (*mlp_rematrix_interp_channel)(int32_t *samples,
+                                        const int32_t *seed_coeffs,
+                                        const int32_t *delta_coeffs,
+                                        const uint8_t *bypassed_lsbs,
+                                        const int8_t *noise_buffer,
+                                        int index,
+                                        unsigned int dest_ch,
+                                        uint16_t blockpos,
+                                        unsigned int maxchan,
+                                        int matrix_noise_shift,
+                                        int access_unit_size_pow2,
+                                        int32_t mask);
     int32_t (*(*mlp_select_pack_output)(uint8_t *ch_assign,
                                         int8_t *output_shift,
                                         uint8_t max_matrix_channel,
diff --git a/libavcodec/x86/mlpdsp.asm b/libavcodec/x86/mlpdsp.asm
index 3dc641e89e..3b232d4551 100644
--- a/libavcodec/x86/mlpdsp.asm
+++ b/libavcodec/x86/mlpdsp.asm
@@ -61,12 +61,12 @@ SECTION .text
     paddq        xm0, xm1
     movq      accumq, xm0
     movzx     blsbsd, byte [blsbs_ptrq]             ; load *bypassed_lsbs
-    sar       accumq, 14                            ; accum >>= 14
+    sar       accumq, 18                            ; accum >>= 18
     and       accumd, maskd                         ; accum &= mask
     add       accumd, blsbsd                        ; accum += *bypassed_lsbs
     mov   [samplesq + dest_chq], accumd             ; samples[dest_ch] = accum
-    add   blsbs_ptrq, 8                             ; bypassed_lsbs += MAX_CHANNELS;
-    add     samplesq, 32                            ; samples += MAX_CHANNELS;
+    add   blsbs_ptrq, 16                            ; bypassed_lsbs += MAX_CHANNELS;
+    add     samplesq, 64                            ; samples += MAX_CHANNELS;
     cmp   blsbs_ptrq, cntq
 %endmacro
 
@@ -80,12 +80,12 @@ SECTION .text
     SHLX      noiseq, mns                           ; noise_buffer[index] <<= matrix_noise_shift
     add       accumq, noiseq                        ; accum += noise_buffer[index]
     movzx     noised, byte [blsbs_ptrq]             ; load *bypassed_lsbs (reuse tmp noise register)
-    sar       accumq, 14                            ; accum >>= 14
+    sar       accumq, 18                            ; accum >>= 18
     and       accumd, maskd                         ; accum &= mask
     add       accumd, noised                        ; accum += *bypassed_lsbs
     mov   [samplesq + dest_chq], accumd             ; samples[dest_ch] = accum
-    add   blsbs_ptrq, 8                             ; bypassed_lsbs += MAX_CHANNELS;
-    add     samplesq, 32                            ; samples += MAX_CHANNELS;
+    add   blsbs_ptrq, 16                            ; bypassed_lsbs += MAX_CHANNELS;
+    add     samplesq, 64                            ; samples += MAX_CHANNELS;
     cmp   blsbs_ptrq, cntq
 %endmacro
 
@@ -106,7 +106,8 @@ cglobal mlp_rematrix_channel, 0, 13, 5, samples, coeffs, blsbs_ptr, blsbs, \
     mov     dest_chd, dest_chm                      ; load dest_chd (not needed on UNIX64)
 %endif
     shl     dest_chd, 2
-    lea         cntq, [blsbs_ptrq + blockposq*8]
+    lea         cntq, [blsbs_ptrq + blockposq*8]    ; loop end address (bypassed_lsbs + blockpos * MAX_CHANNELS)
+    lea         cntq, [cntq + blockposq*8]
     test        mnsd, mnsd                          ; is matrix_noise_shift != 0?
     jne .shift                                      ; jump if true
     cmp     maxchand, 4                             ; is maxchan < 4?
@@ -144,7 +145,7 @@ align 16
     DEFINE_ARGS samples, coeffs, blsbs_ptr, noise_buffer, \
                 index, dest_ch, accum, index2, mns, \
                 ausp, mask, cnt, noise
-    add         mnsd, 7              ; matrix_noise_shift += 7
+    add         mnsd, 11             ; matrix_noise_shift += 11
 %else ; sse4
     mov           r6, rcx            ; move rcx elsewhere so we can use cl for matrix_noise_shift
 %if WIN64
@@ -156,7 +157,7 @@ align 16
     DEFINE_ARGS samples, coeffs, blsbs_ptr, mns, index, dest_ch, noise_buffer, \
                 index2, accum, ausp, mask, cnt, noise
 %endif
-    lea         mnsd, [r8 + 7]       ; rcx = matrix_noise_shift + 7
+    lea         mnsd, [r8 + 11]      ; rcx = matrix_noise_shift + 11
 %endif ; cpuflag
     sub        auspd, 1              ; access_unit_size_pow2 -= 1
     cmp          r7d, 4              ; is maxchan < 4?
diff --git a/tests/fate/truehd.mak b/tests/fate/truehd.mak
index b0bc86a965..30c0e9628b 100644
--- a/tests/fate/truehd.mak
+++ b/tests/fate/truehd.mak
@@ -18,5 +18,15 @@ fate-truehd-mono1726: CMD = md5pipe -f truehd -i $(TARGET_SAMPLES)/truehd/ticket
 fate-truehd-mono1726: CMP = oneline
 fate-truehd-mono1726: REF = 9be9551fac418440bb02101bfdb11df9
 
+FATE_TRUEHD-$(call DEMDEC, TRUEHD, TRUEHD) += fate-truehd-atmos-no-obj
+fate-truehd-atmos-no-obj: CMD = md5pipe -f truehd -extract_objects 0 -i $(TARGET_SAMPLES)/truehd/atmos.thd -f s32le
+fate-truehd-atmos-no-obj: CMP = oneline
+fate-truehd-atmos-no-obj: REF = 53da6ce35c778bcc2182ef2160bf16a2
+
+FATE_TRUEHD-$(call DEMDEC, TRUEHD, TRUEHD) += fate-truehd-atmos-obj
+fate-truehd-atmos-obj: CMD = md5pipe -f truehd -extract_objects 1 -i $(TARGET_SAMPLES)/truehd/atmos.thd -f s32le
+fate-truehd-atmos-obj: CMP = oneline
+fate-truehd-atmos-obj: REF = f19f6f8ec8b040050aaa019f016f7ddc
+
 FATE_SAMPLES_AUDIO += $(FATE_TRUEHD-yes)
 fate-truehd: $(FATE_TRUEHD-yes)
-- 
2.43.0



More information about the ffmpeg-devel mailing list