[FFmpeg-cvslog] dca: change the core to work with integer coefficients.

Sat Jan 2 13:09:09 CET 2016

ffmpeg | branch: master | Alexandra Hájková <alexandra.khirnova at gmail.com> | Thu Dec 17 15:52:47 2015 +0100| [aebf07075f4244caf591a3af71e5872fe314e87b] | committer: Janne Grunau

dca: change the core to work with integer coefficients.

The DCA core decoder converts integer coefficients read from the
bitstream to floats just after reading them (along with dequantization).
All the other steps of the audio reconstruction are done with floats
which makes the output for the DTS lossless extension (XLL)
actually lossy.
This patch changes the DCA core to work with integer coefficients
until QMF. At this point the integer coefficients are converted to floats.
The coefficients for the LFE channel (lfe_data) are not touched.
This is the first step for the really lossless XLL decoding.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=aebf07075f4244caf591a3af71e5872fe314e87b
---

 libavcodec/dca.h        |    8 ++--
 libavcodec/dcadec.c     |  111 +++++++++++++++++++++++++----------------------
 libavcodec/dcadsp.c     |   34 +++++++++++++++
 libavcodec/dcadsp.h     |    6 +++
 libavcodec/fmtconvert.c |    9 ++++
 libavcodec/fmtconvert.h |   10 +++++
 tests/fate/audio.mak    |    2 +-
 7 files changed, 122 insertions(+), 58 deletions(-)

diff --git a/libavcodec/dca.h b/libavcodec/dca.h
index 6548d75..a85470d 100644
--- a/libavcodec/dca.h
+++ b/libavcodec/dca.h
@@ -138,8 +138,8 @@ typedef struct DCAAudioHeader {
     int transient_huffman[DCA_PRIM_CHANNELS_MAX];   ///< transient mode code book
     int scalefactor_huffman[DCA_PRIM_CHANNELS_MAX]; ///< scale factor code book
     int bitalloc_huffman[DCA_PRIM_CHANNELS_MAX];    ///< bit allocation quantizer select
-    int quant_index_huffman[DCA_PRIM_CHANNELS_MAX][DCA_ABITS_MAX]; ///< quantization index codebook select
-    float scalefactor_adj[DCA_PRIM_CHANNELS_MAX][DCA_ABITS_MAX];   ///< scale factor adjustment
+    int quant_index_huffman[DCA_PRIM_CHANNELS_MAX][DCA_ABITS_MAX];  ///< quantization index codebook select
+    uint32_t scalefactor_adj[DCA_PRIM_CHANNELS_MAX][DCA_ABITS_MAX]; ///< scale factor adjustment
 
     int subframes;              ///< number of subframes
     int total_channels;         ///< number of channels including extensions
@@ -147,10 +147,10 @@ typedef struct DCAAudioHeader {
 } DCAAudioHeader;
 
 typedef struct DCAChan {
-    DECLARE_ALIGNED(32, float, subband_samples)[DCA_BLOCKS_MAX][DCA_SUBBANDS][8];
+    DECLARE_ALIGNED(32, int32_t, subband_samples)[DCA_BLOCKS_MAX][DCA_SUBBANDS][8];
 
     /* Subband samples history (for ADPCM) */
-    DECLARE_ALIGNED(16, float, subband_samples_hist)[DCA_SUBBANDS][4];
+    DECLARE_ALIGNED(32, int32_t, subband_samples_hist)[DCA_SUBBANDS][4];
     int hist_index;
 
     /* Half size is sufficient for core decoding, but for 96 kHz data
diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
index aca6ed3..399b1e5 100644
--- a/libavcodec/dcadec.c
+++ b/libavcodec/dcadec.c
@@ -226,7 +226,7 @@ static inline void get_array(GetBitContext *gb, int *dst, int len, int bits)
 static int dca_parse_audio_coding_header(DCAContext *s, int base_channel)
 {
     int i, j;
-    static const float adj_table[4] = { 1.0, 1.1250, 1.2500, 1.4375 };
+    static const uint8_t adj_table[4] = { 16, 18, 20, 23 };
     static const int bitlen[11] = { 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3 };
     static const int thr[11]    = { 0, 1, 3, 3, 3, 3, 7, 7, 7, 7, 7 };
 
@@ -265,7 +265,7 @@ static int dca_parse_audio_coding_header(DCAContext *s, int base_channel)
     /* Get scale factor adjustment */
     for (j = 0; j < 11; j++)
         for (i = base_channel; i < s->audio_header.prim_channels; i++)
-            s->audio_header.scalefactor_adj[i][j] = 1;
+            s->audio_header.scalefactor_adj[i][j] = 16;
 
     for (j = 1; j < 11; j++)
         for (i = base_channel; i < s->audio_header.prim_channels; i++)
@@ -790,10 +790,7 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
 {
     int k, l;
     int subsubframe = s->current_subsubframe;
-
-    const float *quant_step_table;
-
-    LOCAL_ALIGNED_16(int32_t, block, [SAMPLES_PER_SUBBAND * DCA_SUBBANDS]);
+    const uint32_t *quant_step_table;
 
     /*
      * Audio data
@@ -801,13 +798,12 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
 
     /* Select quantization step size table */
     if (s->bit_rate_index == 0x1f)
-        quant_step_table = ff_dca_lossless_quant_d;
+        quant_step_table = ff_dca_lossless_quant;
     else
-        quant_step_table = ff_dca_lossy_quant_d;
+        quant_step_table = ff_dca_lossy_quant;
 
     for (k = base_channel; k < s->audio_header.prim_channels; k++) {
-        float (*subband_samples)[8] = s->dca_chan[k].subband_samples[block_index];
-        float rscale[DCA_SUBBANDS];
+        int32_t (*subband_samples)[8] = s->dca_chan[k].subband_samples[block_index];
 
         if (get_bits_left(&s->gb) < 0)
             return AVERROR_INVALIDDATA;
@@ -818,27 +814,25 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
             /* Select the mid-tread linear quantizer */
             int abits = s->dca_chan[k].bitalloc[l];
 
-            float quant_step_size = quant_step_table[abits];
-
-            /*
-             * Determine quantization index code book and its type
-             */
-
-            /* Select quantization index code book */
-            int sel = s->audio_header.quant_index_huffman[k][abits];
+            uint32_t quant_step_size = quant_step_table[abits];
 
             /*
              * Extract bits from the bit stream
              */
-            if (!abits) {
-                rscale[l] = 0;
-                memset(block + SAMPLES_PER_SUBBAND * l, 0, SAMPLES_PER_SUBBAND * sizeof(block[0]));
-            } else {
+            if (!abits)
+                memset(subband_samples[l], 0, SAMPLES_PER_SUBBAND *
+                       sizeof(subband_samples[l][0]));
+            else {
+                uint32_t rscale;
                 /* Deal with transients */
                 int sfi = s->dca_chan[k].transition_mode[l] &&
                     subsubframe >= s->dca_chan[k].transition_mode[l];
-                rscale[l] = quant_step_size * s->dca_chan[k].scale_factor[l][sfi] *
-                            s->audio_header.scalefactor_adj[k][sel];
+                /* Determine quantization index code book and its type.
+                   Select quantization index code book */
+                int sel = s->audio_header.quant_index_huffman[k][abits];
+
+                rscale = (s->dca_chan[k].scale_factor[l][sfi] *
+                          s->audio_header.scalefactor_adj[k][sel] + 8) >> 4;
 
                 if (abits >= 11 || !dca_smpl_bitalloc[abits].vlc[sel].table) {
                     if (abits <= 7) {
@@ -851,7 +845,7 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
                         block_code1 = get_bits(&s->gb, size);
                         block_code2 = get_bits(&s->gb, size);
                         err         = decode_blockcodes(block_code1, block_code2,
-                                                        levels, block + SAMPLES_PER_SUBBAND * l);
+                                                        levels, subband_samples[l]);
                         if (err) {
                             av_log(s->avctx, AV_LOG_ERROR,
                                    "ERROR: block code look-up failed\n");
@@ -860,20 +854,18 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
                     } else {
                         /* no coding */
                         for (m = 0; m < SAMPLES_PER_SUBBAND; m++)
-                            block[SAMPLES_PER_SUBBAND * l + m] = get_sbits(&s->gb, abits - 3);
+                            subband_samples[l][m] = get_sbits(&s->gb, abits - 3);
                     }
                 } else {
                     /* Huffman coded */
                     for (m = 0; m < SAMPLES_PER_SUBBAND; m++)
-                        block[SAMPLES_PER_SUBBAND * l + m] = get_bitalloc(&s->gb,
-                                                        &dca_smpl_bitalloc[abits], sel);
+                        subband_samples[l][m] = get_bitalloc(&s->gb,
+                                                             &dca_smpl_bitalloc[abits], sel);
                 }
+                s->dcadsp.dequantize(subband_samples[l], quant_step_size, rscale);
             }
         }
 
-        s->fmt_conv.int32_to_float_fmul_array8(&s->fmt_conv, subband_samples[0],
-                                               block, rscale, SAMPLES_PER_SUBBAND * s->audio_header.vq_start_subband[k]);
-
         for (l = 0; l < s->audio_header.vq_start_subband[k]; l++) {
             int m;
             /*
@@ -883,25 +875,25 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
                 int n;
                 if (s->predictor_history)
                     subband_samples[l][0] += (ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][0] *
-                                                 s->dca_chan[k].subband_samples_hist[l][3] +
-                                                 ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][1] *
-                                                 s->dca_chan[k].subband_samples_hist[l][2] +
-                                                 ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][2] *
-                                                 s->dca_chan[k].subband_samples_hist[l][1] +
-                                                 ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][3] *
-                                                 s->dca_chan[k].subband_samples_hist[l][0]) *
-                                                (1.0f / 8192);
+                                              (int64_t)s->dca_chan[k].subband_samples_hist[l][3] +
+                                              ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][1] *
+                                              (int64_t)s->dca_chan[k].subband_samples_hist[l][2] +
+                                              ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][2] *
+                                              (int64_t)s->dca_chan[k].subband_samples_hist[l][1] +
+                                              ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][3] *
+                                              (int64_t)s->dca_chan[k].subband_samples_hist[l][0]) +
+                                              (1 << 12) >> 13;
                 for (m = 1; m < SAMPLES_PER_SUBBAND; m++) {
-                    float sum = ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][0] *
-                                subband_samples[l][m - 1];
+                    int64_t sum = ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][0] *
+                                  (int64_t)subband_samples[l][m - 1];
                     for (n = 2; n <= 4; n++)
                         if (m >= n)
                             sum += ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][n - 1] *
-                                   subband_samples[l][m - n];
+                                   (int64_t)subband_samples[l][m - n];
                         else if (s->predictor_history)
                             sum += ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][n - 1] *
-                                   s->dca_chan[k].subband_samples_hist[l][m - n + 4];
-                    subband_samples[l][m] += sum * 1.0f / 8192;
+                                   (int64_t)s->dca_chan[k].subband_samples_hist[l][m - n + 4];
+                    subband_samples[l][m] += (int32_t)(sum + (1 << 12) >> 13);
                 }
             }
 
@@ -921,11 +913,12 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
                 s->debug_flag |= 0x01;
             }
 
-            s->dcadsp.decode_hf(subband_samples, s->dca_chan[k].high_freq_vq,
-                                ff_dca_high_freq_vq, subsubframe * SAMPLES_PER_SUBBAND,
-                                s->dca_chan[k].scale_factor,
-                                s->audio_header.vq_start_subband[k],
-                                s->audio_header.subband_activity[k]);
+            s->dcadsp.decode_hf_int(subband_samples, s->dca_chan[k].high_freq_vq,
+                                    ff_dca_high_freq_vq, subsubframe * SAMPLES_PER_SUBBAND,
+                                    s->dca_chan[k].scale_factor,
+                                    s->audio_header.vq_start_subband[k],
+                                    s->audio_header.subband_activity[k]);
+
         }
     }
 
@@ -945,6 +938,8 @@ static int dca_filter_channels(DCAContext *s, int block_index, int upsample)
     int k;
 
     if (upsample) {
+        LOCAL_ALIGNED(32, float, samples, [64], [SAMPLES_PER_SUBBAND]);
+
         if (!s->qmf64_table) {
             s->qmf64_table = qmf64_precompute();
             if (!s->qmf64_table)
@@ -953,21 +948,31 @@ static int dca_filter_channels(DCAContext *s, int block_index, int upsample)
 
         /* 64 subbands QMF */
         for (k = 0; k < s->audio_header.prim_channels; k++) {
-            float (*subband_samples)[SAMPLES_PER_SUBBAND] = s->dca_chan[k].subband_samples[block_index];
+            int32_t (*subband_samples)[SAMPLES_PER_SUBBAND] =
+                     s->dca_chan[k].subband_samples[block_index];
+
+            s->fmt_conv.int32_to_float(samples[0], subband_samples[0],
+                                       64 * SAMPLES_PER_SUBBAND);
 
             if (s->channel_order_tab[k] >= 0)
-                qmf_64_subbands(s, k, subband_samples,
+                qmf_64_subbands(s, k, samples,
                                 s->samples_chanptr[s->channel_order_tab[k]],
                                 /* Upsampling needs a factor 2 here. */
                                 M_SQRT2 / 32768.0);
         }
     } else {
         /* 32 subbands QMF */
+        LOCAL_ALIGNED(32, float, samples, [32], [SAMPLES_PER_SUBBAND]);
+
         for (k = 0; k < s->audio_header.prim_channels; k++) {
-            float (*subband_samples)[SAMPLES_PER_SUBBAND] = s->dca_chan[k].subband_samples[block_index];
+            int32_t (*subband_samples)[SAMPLES_PER_SUBBAND] =
+                     s->dca_chan[k].subband_samples[block_index];
+
+            s->fmt_conv.int32_to_float(samples[0], subband_samples[0],
+                                       32 * SAMPLES_PER_SUBBAND);
 
             if (s->channel_order_tab[k] >= 0)
-                qmf_32_subbands(s, k, subband_samples,
+                qmf_32_subbands(s, k, samples,
                                 s->samples_chanptr[s->channel_order_tab[k]],
                                 M_SQRT1_2 / 32768.0);
         }
diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c
index 9105a4c..c333f4a 100644
--- a/libavcodec/dcadsp.c
+++ b/libavcodec/dcadsp.c
@@ -25,6 +25,7 @@
 #include "libavutil/intreadwrite.h"
 
 #include "dcadsp.h"
+#include "dcamath.h"
 
 static void decode_hf_c(float dst[DCA_SUBBANDS][8],
                         const int32_t vq_num[DCA_SUBBANDS],
@@ -44,6 +45,21 @@ static void decode_hf_c(float dst[DCA_SUBBANDS][8],
     }
 }
 
+static void decode_hf_int_c(int32_t dst[DCA_SUBBANDS][8],
+                            const int32_t vq_num[DCA_SUBBANDS],
+                            const int8_t hf_vq[1024][32], intptr_t vq_offset,
+                            int32_t scale[DCA_SUBBANDS][2],
+                            intptr_t start, intptr_t end)
+{
+    int i, j;
+
+    for (j = start; j < end; j++) {
+        const int8_t *ptr = &hf_vq[vq_num[j]][vq_offset];
+        for (i = 0; i < 8; i++)
+            dst[j][i] = ptr[i] * scale[j][0] + 8 >> 4;
+    }
+}
+
 static inline void dca_lfe_fir(float *out, const float *in, const float *coefs,
                                int decifactor)
 {
@@ -93,6 +109,22 @@ static void dca_qmf_32_subbands(float samples_in[32][8], int sb_act,
     }
 }
 
+static void dequantize_c(int32_t *samples, uint32_t step_size, uint32_t scale)
+{
+    int64_t step = (int64_t)step_size * scale;
+    int shift, i;
+    int32_t step_scale;
+
+    if (step > (1 << 23))
+        shift = av_log2(step >> 23) + 1;
+    else
+        shift = 0;
+    step_scale = (int32_t)(step >> shift);
+
+    for (i = 0; i < 8; i++)
+        samples[i] = dca_clip23(dca_norm((int64_t)samples[i] * step_scale, 22 - shift));
+}
+
 static void dca_lfe_fir0_c(float *out, const float *in, const float *coefs)
 {
     dca_lfe_fir(out, in, coefs, 32);
@@ -109,6 +141,8 @@ av_cold void ff_dcadsp_init(DCADSPContext *s)
     s->lfe_fir[1]      = dca_lfe_fir1_c;
     s->qmf_32_subbands = dca_qmf_32_subbands;
     s->decode_hf       = decode_hf_c;
+    s->decode_hf_int   = decode_hf_int_c;
+    s->dequantize      = dequantize_c;
 
     if (ARCH_AARCH64)
         ff_dcadsp_init_aarch64(s);
diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h
index 0669128..eac9de9 100644
--- a/libavcodec/dcadsp.h
+++ b/libavcodec/dcadsp.h
@@ -37,6 +37,12 @@ typedef struct DCADSPContext {
                       const int8_t hf_vq[1024][32], intptr_t vq_offset,
                       int32_t scale[DCA_SUBBANDS][2],
                       intptr_t start, intptr_t end);
+    void (*decode_hf_int)(int32_t dst[DCA_SUBBANDS][8],
+                          const int32_t vq_num[DCA_SUBBANDS],
+                          const int8_t hf_vq[1024][32], intptr_t vq_offset,
+                          int32_t scale[DCA_SUBBANDS][2],
+                          intptr_t start, intptr_t end);
+    void (*dequantize)(int32_t *samples, uint32_t step_size, uint64_t scale);
 } DCADSPContext;
 
 void ff_dcadsp_init(DCADSPContext *s);
diff --git a/libavcodec/fmtconvert.c b/libavcodec/fmtconvert.c
index 5e29bfa..f94d438 100644
--- a/libavcodec/fmtconvert.c
+++ b/libavcodec/fmtconvert.c
@@ -32,6 +32,14 @@ static void int32_to_float_fmul_scalar_c(float *dst, const int32_t *src,
         dst[i] = src[i] * mul;
 }
 
+static void int32_to_float_c(float *dst, const int32_t *src, intptr_t len)
+{
+    int i;
+
+    for (i = 0; i < len; i++)
+        dst[i] = (float)src[i];
+}
+
 static void int32_to_float_fmul_array8_c(FmtConvertContext *c, float *dst,
                                          const int32_t *src, const float *mul,
                                          int len)
@@ -43,6 +51,7 @@ static void int32_to_float_fmul_array8_c(FmtConvertContext *c, float *dst,
 
 av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx)
 {
+    c->int32_to_float             = int32_to_float_c;
     c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
     c->int32_to_float_fmul_array8 = int32_to_float_fmul_array8_c;
 
diff --git a/libavcodec/fmtconvert.h b/libavcodec/fmtconvert.h
index dd603f9..b2c2356 100644
--- a/libavcodec/fmtconvert.h
+++ b/libavcodec/fmtconvert.h
@@ -37,6 +37,16 @@ typedef struct FmtConvertContext {
      */
     void (*int32_to_float_fmul_scalar)(float *dst, const int32_t *src,
                                        float mul, int len);
+    /**
+     * Convert an array of int32_t to float.
+     * @param dst destination array of float.
+     *            constraints: 32-byte aligned
+     * @param src source array of int32_t.
+     *            constraints: 32-byte aligned
+     * @param len number of elements to convert.
+     *            constraints: multiple of 8
+     */
+    void (*int32_to_float)(float *dst, const int32_t *src, intptr_t len);
 
     /**
      * Convert an array of int32_t to float and multiply by a float value from another array,
diff --git a/tests/fate/audio.mak b/tests/fate/audio.mak
index bc107c5..cf11e9d 100644
--- a/tests/fate/audio.mak
+++ b/tests/fate/audio.mak
@@ -24,7 +24,7 @@ fate-dca-core: REF = $(SAMPLES)/dts/dts.pcm
 FATE_DCA-$(CONFIG_DTS_DEMUXER) += fate-dca-xll
 fate-dca-xll: CMD = pcm -disable_xll 0 -i $(TARGET_SAMPLES)/dts/master_audio_7.1_24bit.dts
 fate-dca-xll: CMP = oneoff
-fate-dca-xll: REF = $(SAMPLES)/dts/master_audio_7.1_24bit.pcm
+fate-dca-xll: REF = $(SAMPLES)/dts/master_audio_7.1_24bit_2.pcm
 
 FATE_SAMPLES_AVCONV-$(CONFIG_DCA_DECODER) += $(FATE_DCA-yes)
 fate-dca: $(FATE_DCA-yes)