[FFmpeg-soc] [soc]AMR-WB decoder branch, master, updated.

Marcelo Póvoa marspeoplester at gmail.com
Tue Jul 27 03:48:07 CEST 2010


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "AMR-WB decoder".

The branch, master has been updated
       via  4e9169a62e5fdb901d0f1a0a4a85613b575e7b2f (commit)
       via  eff9bcbdb922197ef4720daa98bc6e01b8e4fb80 (commit)
       via  fe78e3f003f346c235c38403442d8f4f9f541afd (commit)
      from  0ca8fed46c9ff9b50f5d51f2d1fa79d8583aedab (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 4e9169a62e5fdb901d0f1a0a4a85613b575e7b2f
Author: Marcelo Povoa <marspeoplester at gmail.com>
Date:   Mon Jul 26 22:46:33 2010 -0300

    Implement output upsampling to 16kHz, this finishes
    the lower band generation part, but not functional yet

diff --git a/libavcodec/amrwbdata.h b/libavcodec/amrwbdata.h
index d8a1068..46a97ff 100644
--- a/libavcodec/amrwbdata.h
+++ b/libavcodec/amrwbdata.h
@@ -26,10 +26,14 @@
 
 #define LP_ORDER              16               ///< linear predictive coding filter order
 #define LP_ORDER_16k          20               ///< lpc filter order at 16kHz
+#define UPS_FIR_SIZE          12               ///< upsampling filter size
+#define SAMPLE_MEM            24               ///< number of stored past samples given by
+                                               ///< max(LP_ORDER, UPS_FIR_SIZE * 2)
 #define MIN_ISF_SPACING       (128 / 32768.0)  ///< minimum isf gap
 #define PRED_FACTOR           (1.0 / 3.0)
 #define MIN_ENERGY           -14.0             ///< initial innnovation energy (dB)
 #define ENERGY_MEAN           30.0             ///< mean innovation energy (dB) in all modes
+#define PREEMPH_FAC           0.68             ///< factor used to de-emphasize synthesis
 
 #define AMRWB_SUBFRAME_SIZE   64               ///< samples per subframe at 12.8 kHz
 #define AMRWB_SFR_SIZE_OUT    80               ///< samples per subframe at 16 kHz
@@ -38,8 +42,6 @@
 #define AMRWB_P_DELAY_MAX     231              ///< maximum pitch delay value
 #define AMRWB_P_DELAY_MIN     34
 
-#define PREEMPH_FAC           0.68             ///< factor used to de-emphasize synthesis
-
 /* Mode ordering is sensitive, do not change */
 enum Mode {
     MODE_6k60 = 0,                         ///< 6.60 kbit/s
@@ -1757,7 +1759,6 @@ static const float energy_pred_fac[4] = { 0.2, 0.3, 0.4, 0.5 };
 
 /** impulse response filter tables converted to float from Q15
  * used for anti-sparseness processing */
-// XXX: Not sure whether it is Q15 indeed
 static const float ir_filter_str[64] = {
      0.615906,  0.295807,  0.099792, -0.104889,  0.087402, -0.159912,
      0.048492, -0.041412,  0.018311,  0.118805, -0.045685, -0.021301,
@@ -1802,6 +1803,35 @@ static const float hpf_400_coef[2][3] = {       // 400 kHz cutoff filter
     { 1.787109375, -0.864257812, 0           }
 };
 
+/* Interpolation coefficients for 5/4 signal upsampling
+ * Table from the reference source was reordered for efficiency */
+static const float upsample_fir[4][24] = {
+    { -6.103516e-05,  7.324219e-04, -2.014160e-03,  4.150391e-03,
+      -7.263184e-03,  1.165771e-02, -1.776123e-02,  2.624512e-02,
+      -3.869629e-02,  5.877686e-02, -9.863281e-02,  2.314453e-01,
+       9.348755e-01, -1.523438e-01,  7.861328e-02, -4.937744e-02,
+       3.308105e-02, -2.252197e-02,  1.507568e-02, -9.765625e-03,
+       5.859375e-03, -3.173828e-03,  1.403809e-03, -3.662109e-04  },
+    { -2.441406e-04,  1.464844e-03, -3.784180e-03,  7.568359e-03,
+      -1.300049e-02,  2.062988e-02, -3.112793e-02,  4.589844e-02,
+      -6.781006e-02,  1.042480e-01, -1.815186e-01,  5.016479e-01,
+       7.548828e-01, -2.094727e-01,  1.148071e-01, -7.348633e-02,
+       4.956055e-02, -3.369141e-02,  2.246094e-02, -1.434326e-02,
+       8.483887e-03, -4.455566e-03,  1.831055e-03, -4.272461e-04  },
+    { -4.272461e-04,  1.831055e-03, -4.455566e-03,  8.483887e-03,
+      -1.434326e-02,  2.246094e-02, -3.369141e-02,  4.956055e-02,
+      -7.348633e-02,  1.148071e-01, -2.094727e-01,  7.548828e-01,
+       5.016479e-01, -1.815186e-01,  1.042480e-01, -6.781006e-02,
+       4.589844e-02, -3.112793e-02,  2.062988e-02, -1.300049e-02,
+       7.568359e-03, -3.784180e-03,  1.464844e-03, -2.441406e-04  },
+    { -3.662109e-04,  1.403809e-03, -3.173828e-03,  5.859375e-03,
+      -9.765625e-03,  1.507568e-02, -2.252197e-02,  3.308105e-02,
+      -4.937744e-02,  7.861328e-02, -1.523438e-01,  9.348755e-01,
+       2.314453e-01, -9.863281e-02,  5.877686e-02, -3.869629e-02,
+       2.624512e-02, -1.776123e-02,  1.165771e-02, -7.263184e-03,
+       4.150391e-03, -2.014160e-03,  7.324219e-04, -6.103516e-05  }
+};
+
 /* High band quantized gains for 23k85 in Q14 */
 static const uint16_t qua_hb_gain[16] = {
    3624, 4673, 5597, 6479, 7425, 8378, 9324, 10264,
diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c
index 2a62a17..d7deb39 100644
--- a/libavcodec/amrwbdec.c
+++ b/libavcodec/amrwbdec.c
@@ -72,7 +72,7 @@ typedef struct {
     uint8_t                    prev_ir_filter_nr; ///< previous impulse response filter "impNr": 0 - strong, 1 - medium, 2 - none
     float                           prev_tr_gain; ///< previous initial gain used by noise enhancer for thresold
 
-    float samples_in[LP_ORDER + AMRWB_SUBFRAME_SIZE]; ///< floating point samples
+    float samples_in[SAMPLE_MEM + AMRWB_SUBFRAME_SIZE]; ///< lower band floating point samples at 12.8kHz
 
     float                           demph_mem[1]; ///< previous value in the de-emphasis filter
     float          hpf_31_mem[4], hpf_400_mem[4]; ///< previous values in the high-pass filters
@@ -961,6 +961,33 @@ static void high_pass_filter(float *out, const float hpf_coef[2][3],
 }
 
 /**
+ * Upsample a signal by 5/4 ratio (from 12.8kHz to 16kHz) using
+ * a FIR interpolation filter. Uses past data from before *in address
+ *
+ * @param out                 [out] buffer for interpolated signal
+ * @param in                  [in] current signal data (length 0.8*o_size)
+ * @param o_size              [in] output signal length
+ */
+static void upsample_5_4(float *out, const float *in, int o_size)
+{
+    const float *in0 = in - UPS_FIR_SIZE + 1;
+    int i;
+
+    for (i = 0; i < o_size; i++) {
+        int int_part  = (i << 2) / 5;
+        int frac_part = (i << 2) - 5 * int_part;
+
+        if (!frac_part) {
+            out[i] = in[i];
+        } else
+            out[i] = ff_dot_productf(in0 + int_part, upsample_fir[4 - frac_part],
+                                     UPS_FIR_SIZE << 1);
+
+        out[i] *= 2.0; // upscale output
+    }
+}
+
+/**
  * Calculate the high band gain based on encoded index (23k85 mode) or
  * on the lower band speech signal and the Voice Activity Detection flag
  *
@@ -1068,7 +1095,7 @@ static void update_sub_state(AMRWBContext *ctx)
     memmove(&ctx->fixed_gain[0], &ctx->fixed_gain[1], 4 * sizeof(float));
 
     memmove(&ctx->samples_in[0], &ctx->samples_in[AMRWB_SUBFRAME_SIZE],
-            LP_ORDER * sizeof(float));
+            SAMPLE_MEM * sizeof(float));
 }
 
 static int amrwb_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
@@ -1078,6 +1105,7 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
     AMRWBFrame   *cf   = &ctx->frame;
     const uint8_t *buf = avpkt->data;
     int buf_size       = avpkt->size;
+    float *buf_out = data;
     AMRFixed fixed_sparse = {0};             // fixed vector up to anti-sparseness processing
     float spare_vector[AMRWB_SUBFRAME_SIZE]; // extra stack space to hold result from anti-sparseness processing
     float fixed_gain_factor;                 // fixed gain correction factor (gamma)
@@ -1172,22 +1200,22 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
         pitch_enhancer(synth_fixed_vector, voice_fac);
 
         synthesis(ctx, ctx->lp_coef[sub], synth_exc, synth_fixed_gain,
-                  synth_fixed_vector, &ctx->samples_in[LP_ORDER]);
+                  synth_fixed_vector, &ctx->samples_in[SAMPLE_MEM]);
 
         /* Synthesis speech post-processing */
-        de_emphasis(&ctx->samples_in[LP_ORDER], PREEMPH_FAC, ctx->demph_mem);
-
-        high_pass_filter(&ctx->samples_in[LP_ORDER], hpf_31_coef,
-                         ctx->hpf_31_mem, &ctx->samples_in[LP_ORDER]);
+        de_emphasis(&ctx->samples_in[SAMPLE_MEM], PREEMPH_FAC, ctx->demph_mem);
 
-        // XXX: the 5/4 upsampling for the lower band goes in here
+        high_pass_filter(&ctx->samples_in[SAMPLE_MEM], hpf_31_coef,
+                         ctx->hpf_31_mem, &ctx->samples_in[SAMPLE_MEM]);
 
+        upsample_5_4(buf_out + sub * AMRWB_SFR_SIZE_OUT,
+                     &ctx->samples_in[UPS_FIR_SIZE], AMRWB_SFR_SIZE_OUT);
 
         /* High frequency band generation */
-        high_pass_filter(&ctx->samples_in[LP_ORDER], hpf_400_coef,
-                         ctx->hpf_400_mem, &ctx->samples_in[LP_ORDER]);
+        high_pass_filter(&ctx->samples_in[SAMPLE_MEM], hpf_400_coef,
+                         ctx->hpf_400_mem, &ctx->samples_in[SAMPLE_MEM]);
 
-        hb_gain = find_hb_gain(ctx, &ctx->samples_in[LP_ORDER],
+        hb_gain = find_hb_gain(ctx, &ctx->samples_in[SAMPLE_MEM],
                                cur_subframe->hb_gain, cf->vad);
 
         scaled_hb_excitation(ctx, hb_exc, synth_exc, hb_gain);

commit eff9bcbdb922197ef4720daa98bc6e01b8e4fb80
Author: Marcelo Povoa <marspeoplester at gmail.com>
Date:   Mon Jul 26 18:03:34 2010 -0300

    Remove overflow treatment from synthesis (it was NB specific)

diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c
index 8768863..2a62a17 100644
--- a/libavcodec/amrwbdec.c
+++ b/libavcodec/amrwbdec.c
@@ -872,7 +872,6 @@ static void pitch_enhancer(float *fixed_vector, float voice_fac)
 
 /**
  * Conduct 16th order linear predictive coding synthesis from excitation
- * Return a overflow detection flag
  *
  * @param ctx                 [in] pointer to the AMRWBContext
  * @param lpc                 [in] pointer to the LPC coefficients
@@ -880,25 +879,17 @@ static void pitch_enhancer(float *fixed_vector, float voice_fac)
  * @param fixed_gain          [in] fixed codebook gain for synthesis
  * @param fixed_vector        [in] algebraic codebook vector
  * @param samples             [out] pointer to the output speech samples
- * @param overflow            [in] 16-bit predicted overflow flag
  */
-static uint8_t synthesis(AMRWBContext *ctx, float *lpc, float *excitation,
+static void synthesis(AMRWBContext *ctx, float *lpc, float *excitation,
                      float fixed_gain, const float *fixed_vector,
-                     float *samples, uint8_t overflow)
+                     float *samples)
 {
-    int i;
-
-    // if an overflow has been detected, the pitch vector is scaled down by a
-    // factor of 4
-    if (overflow)
-        for (i = 0; i < AMRWB_SUBFRAME_SIZE; i++)
-            ctx->pitch_vector[i] *= 0.25;
-
     ff_weighted_vector_sumf(excitation, ctx->pitch_vector, fixed_vector,
                             ctx->pitch_gain[4], fixed_gain, AMRWB_SUBFRAME_SIZE);
 
     // emphasize pitch vector contribution in low bitrate modes
-    if (ctx->pitch_gain[4] > 0.5 && !overflow && ctx->fr_cur_mode <= MODE_8k85) {
+    if (ctx->pitch_gain[4] > 0.5 && ctx->fr_cur_mode <= MODE_8k85) {
+        int i;
         float energy = ff_dot_productf(excitation, excitation,
                                        AMRWB_SUBFRAME_SIZE);
 
@@ -915,14 +906,6 @@ static uint8_t synthesis(AMRWBContext *ctx, float *lpc, float *excitation,
 
     ff_celp_lp_synthesis_filterf(samples, lpc, excitation,
                                  AMRWB_SUBFRAME_SIZE, LP_ORDER);
-
-    // detect overflow
-    for (i = 0; i < AMRWB_SUBFRAME_SIZE; i++)
-        if (fabsf(samples[i]) > AMRWB_SAMPLE_BOUND) {
-            return 1;
-        }
-
-    return 0;
 }
 
 /**
@@ -1083,6 +1066,9 @@ static void update_sub_state(AMRWBContext *ctx)
 
     memmove(&ctx->pitch_gain[0], &ctx->pitch_gain[1], 4 * sizeof(float));
     memmove(&ctx->fixed_gain[0], &ctx->fixed_gain[1], 4 * sizeof(float));
+
+    memmove(&ctx->samples_in[0], &ctx->samples_in[AMRWB_SUBFRAME_SIZE],
+            LP_ORDER * sizeof(float));
 }
 
 static int amrwb_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
@@ -1174,7 +1160,6 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
         }
 
         /* Post-processing of excitation elements */
-        /* XXX: Noise_enhancer was not tested yet, needs that 1.5dB check */
         synth_fixed_gain = noise_enhancer(ctx->fixed_gain[4], &ctx->prev_tr_gain,
                                           voice_fac, stab_fac);
 
@@ -1186,13 +1171,8 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
 
         pitch_enhancer(synth_fixed_vector, voice_fac);
 
-        if (synthesis(ctx, ctx->lp_coef[sub], synth_exc, synth_fixed_gain,
-                      synth_fixed_vector, &ctx->samples_in[LP_ORDER], 0))
-            // overflow detected -> rerun synthesis scaling pitch vector down
-            // by a factor of 4, skipping pitch vector contribution emphasis
-            // and adaptive gain control
-            synthesis(ctx, ctx->lp_coef[sub], synth_exc, synth_fixed_gain,
-                      synth_fixed_vector, &ctx->samples_in[LP_ORDER], 1);
+        synthesis(ctx, ctx->lp_coef[sub], synth_exc, synth_fixed_gain,
+                  synth_fixed_vector, &ctx->samples_in[LP_ORDER]);
 
         /* Synthesis speech post-processing */
         de_emphasis(&ctx->samples_in[LP_ORDER], PREEMPH_FAC, ctx->demph_mem);

commit fe78e3f003f346c235c38403442d8f4f9f541afd
Author: Marcelo Povoa <marspeoplester at gmail.com>
Date:   Mon Jul 26 17:57:28 2010 -0300

    Change the stability factor multiplier to 512,
    accurate according to some tests

diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c
index 208c7c4..8768863 100644
--- a/libavcodec/amrwbdec.c
+++ b/libavcodec/amrwbdec.c
@@ -78,7 +78,7 @@ typedef struct {
     float          hpf_31_mem[4], hpf_400_mem[4]; ///< previous values in the high-pass filters
 
     AVLFG                                   prng; ///< random number generator for white noise excitation
-    uint8_t                          first_frame; ///< flag active in the first frame decoded
+    uint8_t                          first_frame; ///< flag active during decoding of the first frame
 } AMRWBContext;
 
 static av_cold int amrwb_decode_init(AVCodecContext *avctx)
@@ -437,7 +437,7 @@ static void decode_pitch_vector(AMRWBContext *ctx,
                           LP_ORDER, AMRWB_SUBFRAME_SIZE);
 
     /* Check which pitch signal path should be used.
-     * 6k60 and 8k85 modes have ltp flag set to 0 */
+     * 6k60 and 8k85 modes have the ltp flag set to 0 */
     if (amr_subframe->ltp) {
         memcpy(ctx->pitch_vector, exc, AMRWB_SUBFRAME_SIZE * sizeof(float));
     } else {
@@ -805,8 +805,8 @@ static float stability_factor(const float *isf, const float *isf_past)
         acc += (isf[i] - isf_past[i]) * (isf[i] - isf_past[i]);
 
     // XXX: I could not understand well this part from ref code
-    // it made more sense changing the "/ 256" to "* 256"
-    return FFMAX(0.0, 1.25 - acc * 0.8 * 256);
+    // it made more sense changing the "/ 256" to "* 512"
+    return FFMAX(0.0, 1.25 - acc * 0.8 * 512);
 }
 
 /**
@@ -855,6 +855,9 @@ static void pitch_enhancer(float *fixed_vector, float voice_fac)
     float cpe = 0.125 * (1 + voice_fac);
     float last = fixed_vector[0]; // holds c(i - 1)
 
+    /* XXX: This procedure seems correct, but due to some roundings
+     * in the opencore code (line 1037 onwards) the resulting fixed_vector
+     * differs quite a bit between the two implementations */
     fixed_vector[0] -= cpe * fixed_vector[1];
 
     for (i = 1; i < AMRWB_SUBFRAME_SIZE - 1; i++) {

-----------------------------------------------------------------------

Summary of changes:
 libavcodec/amrwbdata.h |   36 +++++++++++++++++--
 libavcodec/amrwbdec.c  |   95 +++++++++++++++++++++++++++---------------------
 2 files changed, 86 insertions(+), 45 deletions(-)


hooks/post-receive
-- 
AMR-WB decoder


More information about the FFmpeg-soc mailing list