[FFmpeg-devel] [PATCH] TwinVQ decoder

Michael Niedermayer michaelni
Tue Jun 9 21:00:55 CEST 2009


On Mon, Jun 08, 2009 at 10:01:40PM +0200, Vitor Sessak wrote:
> Michael Niedermayer wrote:
>> On Mon, May 18, 2009 at 06:32:55PM +0200, Vitor Sessak wrote:
>>> Michael Niedermayer wrote:
>>>> On Wed, May 06, 2009 at 05:09:50PM +0200, Vitor Sessak wrote:
>>>>> Vitor Sessak wrote:
[...]
>>> /**
>>>  * Evaluate a single LPC amplitude spectrum envelope coefficient from the 
>>> line
>>>  * spectrum pairs
>>>  *
>>>  * @param cos_lsp a vector of the cosinus of the LSP values
>>>  * @param cos_val cos(PI*i/N) where i is the index of the LPC amplitude
>>>  * @return the LPC value
>>>  *
>>>  * @todo reuse code from vorbis_dec.c: vorbis_floor0_decode
>>>  */
>>> static float eval_lpc_spectrum(const float *cos_lsp, float cos_val, int 
>>> size)
>>> {
>>>     int i;
>>>     float a = .5;
>>>     float b = .5;
>>>
>>>     for (i=0; i < size; i += 2) {
>>>         a *= 2*cos_val - cos_lsp[i  ];
>>>         b *= 2*cos_val - cos_lsp[i+1];
>>>     }
>>>
>>>     a *= a*(2+2*cos_val);
>>>     b *= b*(2-2*cos_val);
>>>
>>>     return .5/(a+b);
>>> }
>> if iam not too tired the .5 and 2 are useless
>
> It is, but it is like that to make it easier to see in what way it is 
> similar to vorbis_floor0_decode().

add a comment that says that please


>
> [...]
>
>>> static void extend_pitch(int a1, const float *a2, float a3, float *a4,
>>>                          TwinContext *tctx)
>>> {
>>>     const ModeTab *mtab = tctx->mtab;
>>>     int v5;
>>>     int v33;
>>>     int v35;
>>>     int fcmax_i, fcmin_i;
>>>     int i, j;
>>>     int basf_step = (1 << mtab->basf_bit) - 1;
>>>     int isampf = tctx->avctx->sample_rate/1000;
>>>     int ibps = tctx->avctx->bit_rate/(1000 * tctx->avctx->channels);
>>>
>>>     fcmin_i = (40*2*mtab->size + isampf/2)/isampf;
>>>
>>>     fcmax_i = (40*6*2*mtab->size + isampf/2)/isampf;
>> vertical align
>>>     v33 = fcmin_i +  (a1*(fcmax_i - fcmin_i) + basf_step/2)/basf_step;
>>>
>>>     if ((isampf == 22) && (ibps == 32))
>>>         v35 = ((v33 + 800LL)* mtab->pitch_cst * mtab->pit_cb_len * v33 +
>>>                200LL * mtab->size*v33)/
>>>             (400LL * mtab->size * v33);
>> looks like some things can be facored out here
>
> Without making it yet more ugly?

yes


>
>> and, btw vXY are unacceptable variable names (this one must be fixed and
>> all of them)
>>>     else
>>>         v35 = (mtab->pitch_cst * mtab->pit_cb_len * v33)/(400 * 
>>> mtab->size);
>>>
>>>     for (i=0; i < v35/2; i++)
>>>         a4[i] += a3 * a2[i];
>>>
>>>     v5 = v35/2;
>>>
>>>     for (i=0; i < mtab->pit_cb_len; i++) {
>>>         int v38 = very_broken_op(v33, i + 1);
>>>         for (j = - v35/2; j < (v35 + 1)/2 ; j++) {
>>>             a4[j+v38] += a3 * a2[v5];
>>>             ++v5;
>>>
>>>             if (v5 >= mtab->pit_cb_len)
>>>                 return;
>>>         }
>>>     }
>>> }
>>>
>>> static void dec_gain(enum FrameType ftype, float *out, TwinContext *tctx)
>>> {
>>>     const ModeTab *mtab = tctx->mtab;
>>>     int i, j;
>>>     int sub = mtab->fmode[ftype].sub;
>>>     float step     = AMP_MAX     / ((1 <<     GAIN_BITS) - 1);
>>>     float sub_step = SUB_AMP_MAX / ((1 << SUB_GAIN_BITS) - 1);
>>>
>>>     if (ftype == FT_LONG) {
>>>         for (i=0; i < tctx->avctx->channels; i++)
>>>             out[i] = (1./(1<<10)) *
>>>                 mulawinv(step * .5 + step * get_bits(&tctx->gb, 
>>> GAIN_BITS),
>>>                          AMP_MAX, MU);
>>>     } else {
>>>         for (i=0; i < tctx->avctx->channels; i++) {
>>>             float val = (1./(1<<20)) *
>>>                 mulawinv(step *.5 + step * get_bits(&tctx->gb, 
>>> GAIN_BITS),
>>>                          AMP_MAX, MU);
>>>
>>>             for (j=0; j < sub; j++) {
>>>                 out[i*sub + j] =
>>>                     val*mulawinv(sub_step*.5 +
>>>                                  sub_step* get_bits(&tctx->gb, 
>>> SUB_GAIN_BITS),
>>>                                  SUB_AMP_MAX, MU);
>>>             }
>>>         }
>>>     }
>>> }
>> inefficient, please loose mulawinv() and the pow() it calls
>
> I've tried a LUT, and it doesn't affect total decoding time in a (easily) 
> measurable way. I changed the pow() call to just an exp() (without an 
> addition log() if the compiler do his job properly).
>
>>> static void check_lsp(int a1, float *a2, float a3)
>>> {
>>>     int i;
>>>
>>>     for (i=0; i < a1; i++)
>>>         if (a2[i] - a3 < a2[i-1]) {
>> the variable names are unacceptable, a1 and a2 are not even the same type
>>>             float tmp =(a2[i-1] - a2[i] + a3)*.5;
>>>             a2[i-1] -= tmp;
>>>             a2[i] += tmp;
>>>         }
>>> }
>>>
>>> static void  check_lsp_sort(int a1, float *a2)
>>> {
>>>     int i,j;
>>>
>>>     for (i=0;  i < a1; ++i)
>>>         for (j=0; j < a1; j++)
>>>             if (a2[j-1] > a2[j])
>>>                 FFSWAP(float, a2[j],a2[j-1]);
>>> }
>> same, also doing a O(n^2) by default sort is unacceptable
>> are the values mostly sorted already?
>
> yes

then write the bubble sort accordingly
The property you want is that x values that are out of order
result in a O(x*n) sort not a O(n*n)
one part of that is simply stoping once there are no changes
the other is ensuring that you get rid of at least one misssorted
element in each pass
alternatively, if "a1" is small, you could just try a heapsort
or quicksort (picking a good pivot is trivial if your array is near
sorted)


>
>> [...]
>>> static void dec_bark_env(const uint8_t *in, int use_hist, int ch, float 
>>> *out,
>>>                          enum FrameType ftype, TwinContext *tctx)
>>> {
>>>     const ModeTab *mtab = tctx->mtab;
>>>     int i,j;
>>>     float *hist = tctx->bark_hist[ftype][ch];
>>>     float val = ((const float []) {0.4, 0.35, 0.28})[ftype];
>>>     int bark_n_coef  = mtab->fmode[ftype].bark_n_coef;
>>>     int fw_cb_len = mtab->fmode[ftype].bark_env_size / bark_n_coef;
>>>     int pos = 0;
>>>
>>>     for (i = 0; i < fw_cb_len; i++)
>>>         for (j = 0; j < bark_n_coef; j++) {
>>>             int idx = j + i*bark_n_coef;
>>>             float tmp2 = mtab->fmode[ftype].bark_cb[fw_cb_len*in[j] + i];
>>>             float st = (use_hist) ?
>>>                 (1. - val) * tmp2 + val*hist[idx] + 1. : tmp2 + 1.;
>>>
>>>             hist[idx] = tmp2;
>>>             if (st < -1.) st = 1.;
>>>
>>>             while (pos < (mtab->fmode[ftype].bark_tab)[idx])
>>>                 out[pos++] = st;
>> a memset_float() could come in handy, this one isnt the first case i
>> see that would benefit from it
>
> Here or in a shared file?

here is fine ATM

[...]
> /**
>  * Parameters and tables that are different for every combination of
>  * bitrate/sample rate
>  */
> typedef struct {
>     const FrameMode fmode[3]; ///< frame type-dependant parameters
> 
>     uint16_t     size;        ///< frame size in samples
>     uint8_t      n_lsp;       ///< number of lsp coefficients
>     const float *lspcodebook;
> 
>     /* number of bits of the different LSP CB coefficients */
>     uint8_t      lsp_bit0;
>     uint8_t      lsp_bit1;
>     uint8_t      lsp_bit2;
> 
>     uint8_t      lsp_split;   ///< number of CB entries for the LSP decoding
>     const float *pit_cb;      ///< pitch CB
> 
>     /** number of the bits for the base frequency value */
>     uint8_t      basf_bit;
> 
>     uint8_t      pit_n_bit;   ///< number of bits of the pitch CB coeffs
>     uint8_t      pit_cb_len;  ///< size of pitch CB
>     uint8_t      pgain_bit;   ///< bits for PPC gain

>     uint8_t      pitch_cst;

that is what?


[...]
>     int skip_cnt;

skip what count ?


>     const ModeTab *mtab;
>     float lsp_hist[9][21];         ///< LSP coefficients of the last frame
>     int16_t bit_rev[4][4096];
>     float bark_hist[3][2][20*10];  ///< BSE coefficients of last frame
>     float *prev;                   ///< spectrum of last frame

>     MDCTContext ctx[3];

the variable name is poor

>     GetBitContext gb;
>     uint8_t bits[2][4][700];      ///< number of bits to read the main codebook
>     uint8_t length[4][700];       ///< main codebook stride
>     int n_div[4];
> } TwinContext;
> 
> enum FrameType {
>     FT_SHORT = 0,  ///< Short frame  (divided in n   sub-blocks)
>     FT_MEDIUM,     ///< Medium frame (divided in m<n sub-blocks)
>     FT_LONG,       ///< Long frame   (single sub-block + PPC)
>     FT_PPC,        ///< Periodic Peak Component (part of the long frame)
> };
> 
> #define PIT_CB_SIZE 64

PIT == PITCH ?


> #define SUB_AMP_MAX 4500.

> #define MU 100.

cow: muhhhh?


[...]
> /**
>  * Evaluates the LPC amplitude spectrum envelope from the line spectrum pairs.
>  */
> static void eval_lpcenv(const float *cos_vals, float *a2, TwinContext *tctx)

a2?


> {
>     int i;
>     const ModeTab *mtab = tctx->mtab;
>     int size_s = mtab->size / mtab->fmode[FT_SHORT].sub;
>     int mag_95 = 2 * mtab->fmode[FT_SHORT].sub;
>     float *cos_TT = ff_cos_tabs[av_log2(mtab->size)-1];
> 
>     for (i=0; i < (size_s/2); i++) {
>         a2[i]          = eval_lpc_spectrum(cos_vals,  cos_TT[mag_95*(2*i+1)],
>                                            mtab->n_lsp);
>         a2[size_s-i-1] = eval_lpc_spectrum(cos_vals, -cos_TT[mag_95*(2*i+1)],
>                                            mtab->n_lsp);
>     }
> }
> 

> static void interpolate(float *out, float v1, float v2, int size)
> {
>     int j;
>     float step = (v1-v2)/size;

>     float val = v2;

redundant variable


[...]
> /**
>  * Evaluates the LPC amplitude spectrum envelope from the line spectrum pairs.
>  * Probably for speed reasons, the coefficients are evaluated like
>  * siiiibiiiiisiiiibiiiiisiiiibiiiiisiiiibiiiiis ...
>  * where s is an evaluated value, i is a value interpolated from the others
>  * and b might be either calculated or interpolated, dependent on a
>  * unexplained condition.
>  *
>  * @param step the size of a block "siiiibiiii"
>  * @param in the cosinus of the LSP data
>  */
> static inline void eval_lpcenv_or_interp(float *out, const float *in,
>                                          int size, int step, int sub,
>                                          const ModeTab *mtab, int forward)
> {
>     int i;
>     const float *cos_tab = ff_cos_tabs[av_log2(mtab->size)-1];
> 
>     // Fill the 's'
>     for (i=0; i < size; i += step)
>         out[i] =
>             eval_lpc_spectrum(in,
>                               GET_COS(sub, i*4+2, forward,
>                                       cos_tab, 2*mtab->size - sub*size*2),
>                               mtab->n_lsp);
> 
>     // Fill the 'b'
>     for (i=step; i < size - step; i += step) {
>         if ((out[i + step] + out[i - step] > 1.95*out[i]) ||
>             (out[i - step]               <= out[i + step])) {
>             out[i - step/2] = (out[i] + out[i-step])*.5;
>         } else {
>             out[i - step/2] =
>                 eval_lpc_spectrum(in,
>                                   GET_COS(sub, 4*i-2*step+2, forward,
>                                           cos_tab,
>                                           2*mtab->size - sub*size*2),
>                                   mtab->n_lsp);
>         }
>     }
> 
>     // Fill the 'i'
>     for (i=step; i < size - step; i += step/2)
>         interpolate(out + i-step  +1, out[i-step/2], out[i-step  ], step/2);
> 
>     interpolate(out+i-step+1, out[i], out[i-step], step);

hmm, still ugly

what about something like: (note its buggy but the idea should be clear)
{
    if(!(i&step) || (out[i + step]  + out[i - step] <= 1.95*out[i] &&
                     out[i + step] < out[i - step]                   )){
        eval_lpc_spectrum
        if(i)
            interpolate(i, last_point);
        last_point= i
    }
}

the b really is just a longer interpolation AFAICS


> }
> 

> static void lsptospec(const float *buf, float *a2, int size, int step,
>                       int sub, TwinContext *tctx)

a2 is what?


[...]
> static void dequant(float *a2, enum FrameType ftype, const float *cb0,
>                     const float *cb1, int cb_len, TwinContext *tctx)
> {
>     int pos = 0;
>     int i, j;
> 
>     for (i=0; i < tctx->n_div[ftype]; i++) {
>         int tmp[2];
>         int sign[2] = {1,1};
>         const float *tab0, *tab1;
> 
>         for (j=0; j < 2; j++)
>             if (tctx->bits[j][ftype][i] == 7) {
>                 sign[j] -= get_bits1(&tctx->gb) << 1;
>                 tmp[j] = get_bits(&tctx->gb, 6);
>             } else
>                 tmp[j] = get_bits(&tctx->gb, tctx->bits[j][ftype][i]);
> 
>         tab0 = cb0 + tmp[0]*cb_len;
>         tab1 = cb1 + tmp[1]*cb_len;
> 
>         for (j=0; j < tctx->length[ftype][i]; j++)
>             a2[tctx->bit_rev[ftype][pos++]] =
>                 (sign[1]*tab1[j] + sign[0]*tab0[j])*0.5;

can the 0.5 be merged into some table or constant?


[...]
> static void extend_pitch(int a1, const float *pitch, float pit_gain,
>                          float *speech, TwinContext *tctx)
> {
>     const ModeTab *mtab = tctx->mtab;
>     int pos;

>     int unk1;
>     int unk2;

ehm


>     int fcmax_i, fcmin_i;
>     int i, j;
>     int basf_step = (1 << mtab->basf_bit) - 1;
>     int isampf = tctx->avctx->sample_rate/1000;
>     int ibps = tctx->avctx->bit_rate/(1000 * tctx->avctx->channels);
> 
>     fcmin_i = (  40*2*mtab->size + isampf/2)/isampf;
>     fcmax_i = (6*40*2*mtab->size + isampf/2)/isampf;
> 
>     unk1 = fcmin_i +  (a1*(fcmax_i - fcmin_i) + basf_step/2)/basf_step;
> 
>     if ((isampf == 22) && (ibps == 32)) {
>         unk2 = ((unk1 + 800LL)* mtab->pitch_cst * mtab->pit_cb_len * unk1 +
>                200LL * mtab->size*unk1)/
>             (400LL * mtab->size * unk1);
>     } else
>         unk2 = (mtab->pitch_cst * mtab->pit_cb_len * unk1)/(400 * mtab->size);
> 
>     for (i=0; i < unk2/2; i++)
>         speech[i] += pit_gain * pitch[i];
> 
>     pos = unk2/2;
> 
>     for (i=0; i < mtab->pit_cb_len; i++) {
>         int v38 = very_broken_op(unk1, i + 1);

v38 ?

also i should mention that a 1:1 convertion to C from disassembly might not
be legal, we need an independant implementation of things
variables with names that look like they are copied from a disassembler
raise a big red flag here and you will have to convince me that you really
wrote an independant implementation if you want to see this in svn

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Republics decline into democracies and democracies degenerate into
despotisms. -- Aristotle
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090609/f503f95c/attachment.pgp>



More information about the ffmpeg-devel mailing list