[FFmpeg-devel] [PATCH] Common ACELP routines (2/3) - filters

Fri Apr 25 09:40:08 CEST 2008

Michael Niedermayer wrote: 
> On Fri, Apr 25, 2008 at 08:22:15AM +0700, Vladimir Voroshilov wrote:
> > On Fri, Apr 25, 2008 at 1:17 AM, Michael Niedermayer <michaelni at gmx.at> wrote:
> > > On Fri, Apr 25, 2008 at 12:07:15AM +0700, Vladimir Voroshilov wrote:
> > >  > On Thu, Apr 24, 2008 at 10:13 AM, Michael Niedermayer <michaelni at gmx.at> wrote:
> > >  [...]
> > >
> > >
> > > > >  > > > +        filter_data[10+n] = out[n] = sum;
> > >  > >  > >
> > >  > >  > > This duplicated storeage is unacceptable.
> > >  > >  >
> > >  > >  > First for all assigned to filter data values will be used in loop later.
> > >  > >  > Thus filter_data can not be eliminated.
> > >  > >  > I can't use "out" instead of it due to necessary 10 items
> > >  > >  > with data from previous subframe at top).
> > >  > >  > Extending out with 10 items at top will require another temporary buffer
> > >  > >  > one memcpy somewhere later (because i will not be able to use output buffer
> > >  > >  > directly).
> > >  > >
> > >  > >  The double write is definitly useless after the first 10 iterations as
> > >  > >  after that you can just work in the out buffer.
> > >  > >
> > >  > >  foobar_filter(filter_data+10, 10);
> > >  > >  memcpy(out, filter_data+10, 10);
> > >  > >  foobar_filter(out+10, N-10);
> > >  > >
> > >  > >  should work fine and will for large N (dunno how large it is, so maybe
> > >  > >  this isnt worth it ...) be faster. Also it allows filter_data to be smaller.
> > >  >
> > >  > ... and code will look like :(
> > >  >
> > >  > if(foobar_filter(filter_data+10, 10)!=OVERFLOW)
> > >  > {
> > >  >   memcpy(out, filter_data+10, 10);
> > >  >   if(foobar_filter(out+10, N-10)==OVERFLOW)
> > >  >   {
> > >  >      for(i=0;i<len;i++) out>>=2;
> > >  >      foobar_filter(filter_data+10, 10);
> > >  >      memcpy(out, filter_data+10, 10);
> > >  >      foobar_filter(out+10, N-10);
> > >  >   }
> > >  > }
> > >  > else
> > >  > {
> > >  >      for(i=0;i<len;i++) out>>=2;
> > >  >      foobar_filter(filter_data+10, 10);
> > >  >      memcpy(out, filter_data+10, 10);
> > >  >      foobar_filter(out+10, N-10);
> > >  > }
> > >
> > >  for(;;){
> > >     overflow= foobar_filter(filter_data+10, 10);
> > >
> > >     memcpy(out, filter_data+10, 10);
> > >     overflow|= foobar_filter(out+10, N-10);
> > >     if(!overflow)
> > >         break;
> > >
> > >     for(i=0;i<len;i++) out>>=2;
> > >  }
> > 
> > This will change filter_data even if overflow occuried.
> > Which cause wrong synthesis result on second iteration.
> > Current code on overflow case just downscales
> > excitation signal (without touching filter data).
> 
> well its a matter of adding if(!overflow)
> 
> Also note that a memcpy is likely faster than the one by one element writing
> in the loop. So if you dislike spliting it in 2 like above, then a seperate
> memcpy is definitly prefered over the double writing.

I've split it in two routines.
first is alike your's foobar_filter.

Second routine calls previous one 
twice - for first 10 samples and the remaining.

I've also moved output parameters to top.

> 
> 
> [...]
> -- 
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
> 
> It is not what we do, but why we do it that matters.



-- 
Regards,
Vladimir Voroshilov mailto:voroshil at gmail.com
Omsk State University
JID: voroshil at jabber.ru
ICQ: 95587719
-------------- next part --------------

diff --git a/libavcodec/acelp_filters.c b/libavcodec/acelp_filters.c
new file mode 100644
index 0000000..1751dd5
--- /dev/null
+++ b/libavcodec/acelp_filters.c
@@ -0,0 +1,222 @@
+/*
+ * Various filters for ACELP-based codecs
+ *
+ * Copyright (c) 2008 Vladimir Voroshilov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <inttypes.h>
+
+#include "avcodec.h"
+#include "acelp_filters.h"
+
+/**
+ * Low-pass FIR (Finite Impulse Response) filter coefficients
+ *
+ *   Similar filter is named b30 in G.729.
+ *
+ *   G.729 specification says:
+ *     b30 is based on Hamming windowed sinc functions, truncated at +/-29 and
+ *     padded with zeros at +/-30 b30[30]=0
+ *     The filter has a cut-off frequency (-3 dB) at 3600 Hz in the oversampled domain.
+ *
+ *   After some analysis, I found this aproximation:
+ *
+ *                                    PI * x
+ *   Hamm(x,N) = 0.53836-0.46164*cos(--------)
+ *                                      N-1
+ *                                      ---
+ *                                       2
+ *
+ *                                                             PI * x
+ *   Hamm'(x,k) = Hamm(x - k, 2*k+1) =  0.53836 + 0.46164*cos(--------)
+ *                                                                k
+ *
+ *             sin(PI * x)
+ *   Sinc(x) = ----------- (normalized sinc function)
+ *               PI * x
+ *
+ *   h(t,B) = 2 * B * Sinc(2 * B * t) (impulse response of sinc low-pass filter)
+ *
+ *   b(k,B, n) = Hamm'(n, k) * h(n, B)
+ *
+ *
+ *       3600
+ *   B = ----
+ *       8000
+ *
+ *   3600 - cut-off frequency
+ *   8000 - sampliny rate
+ *   k    - filter's order
+ *   
+ *   ff_acelp_interp_filter[i][j] = b(10, 3600/8000, i+j/6)
+ *
+ */
+const int16_t ff_acelp_interp_filter[11][6] =
+{ /* Q15 */
+  { 29443, 28346, 25207, 20449, 14701,  8693},
+  {  3143, -1352, -4402, -5865, -5850, -4673},
+  { -2783,  -672,  1211,  2536,  3130,  2991},
+  {  2259,  1170,     0, -1001, -1652, -1868},
+  { -1666, -1147,  -464,   218,   756,  1060},
+  {  1099,   904,   550,   135,  -245,  -514},
+  {  -634,  -602,  -451,  -231,     0,   191},
+  {   308,   340,   296,   198,    78,   -36},
+  {  -120,  -163,  -165,  -132,   -79,   -19},
+  {    34,    73,    91,    89,    70,    38},
+  {     0,     0,     0,     0,     0,     0},
+};
+
+void ff_acelp_interpolate_excitation(int16_t* ac_v, int pitch_delay_6x, int subframe_size)
+{
+    int n, i;
+    int v;
+    // TODO: clarify why used such expression (hint: -1/3 , 0 ,1/3 order in interpol_filter)
+    int pitch_delay_frac = 2 - (pitch_delay_6x%6);
+    int pitch_delay_int  = pitch_delay_6x / 6;
+
+    //Make sure that pitch_delay_frac will be always positive
+    if(pitch_delay_frac < 0)
+    {
+        pitch_delay_frac += 6;
+        pitch_delay_int++;
+    }
+
+    //pitch_delay_frac [0; 5]
+    //pitch_delay_int  [PITCH_LAG_MIN-1; PITCH_LAG_MAX]
+    for(n=0; n<subframe_size; n++)
+    {
+        /* 3.7.1 of G.729, Equation 40 */
+        v=0;
+        for(i=0; i<10; i++)
+        {
+            /*  R(x):=ac_v[-k+x] */
+            v += ac_v[n - pitch_delay_int - i    ] * ff_acelp_interp_filter[i][    pitch_delay_frac];
+            v = av_clip(v, -0x40000000, 0x3fffffff); //v += R(n-i)*ff_acelp_interp_filter(t+3i)
+            v += ac_v[n - pitch_delay_int + i + 1] * ff_acelp_interp_filter[i][6 - pitch_delay_frac];
+            v = av_clip(v, -0x40000000, 0x3fffffff); //v += R(n+i+1)*ff_acelp_interp_filter(3-t+3i)
+        }
+        ac_v[n] = (v + 0x4000) >> 15;
+    }
+}
+
+void ff_acelp_convolve_circ(int16_t* fc_out, const int16_t* fc_in, const int16_t* filter, int subframe_size)
+{
+    int i, k;
+
+    memset(fc_out, 0, subframe_size * sizeof(int16_t));
+
+    for(i=0; i<subframe_size; i++)
+    {
+        if(fc_in[i])
+        {
+            for(k=0; k<i; k++)
+                fc_out[k] += (fc_in[i] * filter[subframe_size + k - i]) >> 15;
+
+            for(k=i; k<subframe_size; k++)
+                fc_out[k] += (fc_in[i] * filter[k - i]) >> 15;
+        }
+    }
+}
+
+int ff_acelp_lp_synthesis_filter_inplace(
+        int16_t *out,
+        const int16_t* filter_coeffs,
+        const int16_t* in,
+        int length,
+        int stop_on_overflow)
+{
+    int i,n;
+    int sum;
+
+    for(n=0; n<length; n++)
+    {
+        sum = in[n] << 12;
+        for(i=1; i<11; i++)
+            sum -= filter_coeffs[i] * out[n-i];
+
+        sum = (sum + 0x800) >> 12;
+
+        //hecking for overflow
+        if(sum + 0x8000 > 0xFFFFU)
+        {
+            if(stop_on_overflow)
+                return 1;
+            sum = (sum >> 31) ^ 32767;
+        }
+        out[n] = sum;
+    }
+
+    return 0;
+}
+
+int ff_acelp_lp_synthesis_filter(
+        int16_t *out,
+        int16_t *filter_data,
+        const int16_t* filter_coeffs,
+        const int16_t *in,
+        int subframe_size,
+        int update_filter_data)
+{
+    int overflow;
+
+    // Processing first 10 samples using filter_data as past data
+    overflow = ff_acelp_lp_synthesis_filter_inplace(
+            filter_data+10,
+            filter_coeffs,
+            in,
+            10,
+            !update_filter_data);
+    if(overflow && !update_filter_data)
+        return 1;
+
+    memcpy(out, filter_data+10, 10*sizeof(int16_t));
+
+    /*
+      Processing remaining samples using previously computed
+      samples in the same (output) buffer as past data
+    */
+    overflow = ff_acelp_lp_synthesis_filter_inplace(
+            out+10,
+            filter_coeffs,
+            in+10,
+            subframe_size-10,
+            !update_filter_data);
+    if(overflow && !update_filter_data)
+        return 1;
+
+    // Saving data for using in next subframe
+    if(update_filter_data)
+        memcpy(filter_data, out + subframe_size - 10, 10 * sizeof(int16_t));
+
+    return overflow;
+}
+
+void ff_acelp_weighted_filter(int16_t *out, const int16_t* in, int16_t weight)
+{
+    int weight_pow = 1 << 15;
+    int n;
+
+    for(n=0; n<11; n++)
+    {
+        // Q15 * Q12 -> Q12 with rounding
+        out[n] = (in[n] * weight_pow + 0x4000) >> 15;
+        // Q15 * Q12 -> Q12 with rounding
+        weight_pow = (weight_pow * weight + 0x4000) >> 15;
+    }
+}
diff --git a/libavcodec/acelp_filters.h b/libavcodec/acelp_filters.h
new file mode 100644
index 0000000..7224136
--- /dev/null
+++ b/libavcodec/acelp_filters.h
@@ -0,0 +1,110 @@
+/*
+ * Various filters for ACELP-based codecs
+ *
+ * Copyright (c) 2008 Vladimir Voroshilov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef FFMPEG_ACELP_FILT_H
+#define FFMPEG_ACELP_FILT_H
+
+/**
+ * \brief Decoding of the adaptive-codebook vector (4.1.3 of G.729)
+ * \param ac_v [out] (Q0) buffer to store decoded vector into
+ * \param pitch_delay pitch delay with 1/6 precision
+ * \param subframe_size length of subframe
+ *
+ * Routine assumes following fractions order (X - integer delay):
+ *
+ * 1/3 precision: X     1/3     -1/3      X     1/3     -1/3      X
+ * 1/6 precision: X 1/6 2/6 3/6 -2/6 -1/6 X 1/6 2/6 3/6 -2/6 -1/6 X
+ *
+ * Second parameter should contain 6*int+frac+2
+ *
+ * Routine can be used for 1/3 precision too, by passing 2*pitch_delay as second parameter
+ */
+void ff_acelp_interpolate_excitation(int16_t* ac_v, int pitch_delay_6x, int subframe_size);
+
+
+/**
+ * \brief Circularly convolve fixed vector with a phase dispersion impulse response filter
+ * \param fc_out vector with filter applied
+ * \param fc_in source vector
+ * \param filter impulse response of phase filter to apply
+ *
+ * \note fc_in and fc_out should not overlap!
+ */
+void ff_acelp_convolve_circ(int16_t* fc_new, const int16_t* fc_cur, const int16_t* filter, int subframe_size);
+
+/**
+ * \brief LP synthesis filter
+ * \param out [out] (Q0) pointer to output buffer
+ * \param filter_coeffs (Q12) filter coefficients
+ * \param in (Q0) input signal
+ * \param length amount of data to proceed
+ * \param stop_on_overflow   1 - return immediately if overflow ocures
+ *                           0 - ignore overflows
+ *
+ * \return 1 if overflow occured, 0 - otherwise
+ *
+ * \note output buffer must contains 10 samples of past
+ *       speech data before pointer
+ *
+ * Routine applies 1/A(z) filter to given speech data
+ */
+int ff_acelp_lp_synthesis_filter_inplace(
+        int16_t *out,
+        const int16_t* filter_coeffs,
+        const int16_t* in,
+        int length,
+        int stop_on_overflow);
+
+/**
+ * \brief LP synthesis filter
+ * \param out [out] (Q0) output (filtered) signal
+ * \param filter_data [in/out] (Q0) filter data array (previous synthesis data)
+ * \param filter_coeffs (Q12) filter coefficients
+ * \param in (Q0) input signal
+ * \param subframe_size length of subframe
+ * \param update_filter_data 1 - update past filter data arrays
+ *                           0 - don't update
+ *
+ * \return 1 if overflow occured, 0 - otherwise
+ *
+ * \note filter_data should be at least subframe_size+10 size
+ * Routine applies 1/A(z) filter to given speech data
+ */
+int ff_acelp_lp_synthesis_filter(
+	int16_t *out,
+	int16_t *filter_data,
+        const int16_t* filter_coeffs,
+	const int16_t *in,
+	int subframe_size,
+	int update_filter_data);
+
+/**
+ * \brief Calculates coefficients of weighted A(z/weight) filter
+ * \param out [out] (Q12) resulted weighted A(z/weight) filter
+ * \param in (Q12) source filter
+ * \param weight (Q15) weight factor of postfiltering
+ *
+ * out[i]=weight^i*in[i] , i=0..9
+ */
+void ff_acelp_weighted_filter(int16_t *out, const int16_t* in, int16_t weight);
+
+#endif // FFMPEG_ACELP_FILT_H