[FFmpeg-devel] [PATCH] MPEG-4 Parametric Stereo decoder

Michael Niedermayer michaelni
Thu Jun 3 18:47:06 CEST 2010


On Wed, Jun 02, 2010 at 10:28:32PM -0400, Alex Converse wrote:
> On Mon, May 31, 2010 at 6:42 PM, Michael Niedermayer <michaelni at gmx.at> wrote:
[...]
> >
> > > +{
> > > + ? ?int b;
> > > + ? ?int table_idx = dt ? huff_icc_dt : huff_icc_df;
> > > + ? ?VLC_TYPE (*vlc_table)[2] = vlc_ps[table_idx].table;
> > > + ? ?if (dt) {
> > > + ? ? ? ?int e_prev = e ? e - 1 : ps->num_env_old - 1;
> > > + ? ? ? ?e_prev = FFMAX(e_prev, 0);
> > > + ? ? ? ?for (b = 0; b < ps->nr_icc_par; b++) {
> > > + ? ? ? ? ? ?ps->icc_par[e][b] = ps->icc_par[e_prev][b] + get_vlc2(gb, vlc_table, 9, 3) - huff_offset[table_idx];
> > > + ? ? ? ? ? ?if (ps->icc_par[e][b] > 7U) {
> > > + ? ? ? ? ? ? ? ?av_log(avctx, AV_LOG_ERROR, "illegal icc\n");
> > > + ? ? ? ? ? ? ? ?return -1;
> > > + ? ? ? ? ? ?}
> > > + ? ? ? ?}
> > > + ? ?} else {
> > > + ? ? ? ?int prev = 0;
> > > + ? ? ? ?for (b = 0; b < ps->nr_icc_par; b++) {
> > > + ? ? ? ? ? ?prev += get_vlc2(gb, vlc_table, 9, 3) - huff_offset[table_idx];
> > > + ? ? ? ? ? ?ps->icc_par[e][b] = prev;
> > > + ? ? ? ? ? ?if (ps->icc_par[e][b] > 7U) {
> > > + ? ? ? ? ? ? ? ?av_log(avctx, AV_LOG_ERROR, "illegal icc\n");
> > > + ? ? ? ? ? ? ? ?return -1;
> > > + ? ? ? ? ? ?}
> > > + ? ? ? ?}
> > > + ? ?}
> >
> > this could be simplified to:
> > for (b = 0; b < ps->nr_icc_par; b++) {
> > ? ?ps->icc_par[e][b]= prev[b] + get_vlc2(gb, vlc_table, 9, 3) - huff_offset[table_idx];
> > ? ?if (ps->icc_par[e][b] > 7U) {
> > ? ? ? ?av_log(avctx, AV_LOG_ERROR, "illegal icc\n");
> > ? ? ? ?return -1;
> > ? ?}
> > }
> > and this possibly applies to other functions as well
> >
> 
> Are you talking about the if case the else case or both combined?

iam talking about combining the if and else by having prev be a pointer
into par


> why is prev dereferenced two levels but not par?
> Am I missing something here?
> 
> 
> > > + ? ?return 0;
> > > +}
> > > +
> > > +static void ipd_data(GetBitContext *gb, PSContext *ps, int e, int dt)
> > > +{
> > > + ? ?int b;
> > > + ? ?int table_idx = dt ? huff_ipd_dt : huff_ipd_df;
> > > + ? ?VLC_TYPE (*vlc_table)[2] = vlc_ps[table_idx].table;
> > > + ? ?if (dt) {
> > > + ? ? ? ?int e_prev = e ? e - 1 : ps->num_env_old - 1;
> > > + ? ? ? ?e_prev = FFMAX(e_prev, 0);
> > > + ? ? ? ?for (b = 0; b < ps->nr_ipdopd_par; b++) {
> > > + ? ? ? ? ? ?ps->ipd_par[e][b] = (ps->ipd_par[e_prev][b] + get_vlc2(gb, vlc_table, 9, 1)) & 0x07;
> > > + ? ? ? ?}
> > > + ? ?} else {
> > > + ? ? ? ?int prev = 0;
> > > + ? ? ? ?for (b = 0; b < ps->nr_ipdopd_par; b++) {
> > > + ? ? ? ? ? ?prev += get_vlc2(gb, vlc_table, 9, 3);
> > > + ? ? ? ? ? ?prev &= 0x07;
> > > + ? ? ? ? ? ?ps->ipd_par[e][b] = prev;
> > > + ? ? ? ?}
> > > + ? ?}
> > > +}
> > > +
> > > +static void opd_data(GetBitContext *gb, PSContext *ps, int e, int dt)
> > > +{
> > > + ? ?int b;
> > > + ? ?int table_idx = dt ? huff_opd_dt : huff_opd_df;
> > > + ? ?VLC_TYPE (*vlc_table)[2] = vlc_ps[table_idx].table;
> > > + ? ?if (dt) {
> > > + ? ? ? ?int e_prev = e ? e - 1 : ps->num_env_old - 1;
> > > + ? ? ? ?e_prev = FFMAX(e_prev, 0);
> > > + ? ? ? ?for (b = 0; b < ps->nr_ipdopd_par; b++) {
> > > + ? ? ? ? ? ?ps->opd_par[e][b] = (ps->opd_par[e_prev][b] + get_vlc2(gb, vlc_table, 9, 1)) & 0x07;
> > > + ? ? ? ?}
> > > + ? ?} else {
> > > + ? ? ? ?int prev = 0;
> > > + ? ? ? ?for (b = 0; b < ps->nr_ipdopd_par; b++) {
> > > + ? ? ? ? ? ?prev += get_vlc2(gb, vlc_table, 9, 3);
> > > + ? ? ? ? ? ?prev &= 0x07;
> > > + ? ? ? ? ? ?ps->opd_par[e][b] = prev;
> > > + ? ? ? ?}
> > > + ? ?}
> > > +}
> >
> > all these functions look like they do pretty much the same, maybe this can be
> > done with just a single function?
> >
> 
> I thought about it but they all have different valid ranges, unless
> you just mean merging ipd/opd which both have the same circular range.

why not pass the range as argument to the function?


[...]
> >
> > [...]
> > > +
> > > +/** Split one subband into 2 subsubbands with a real filter */
> > > +static av_noinline void hybrid2_re(float (*in)[2], float (*out)[32][2], const float filter[7], int len, int reverse)
> > > +{
> > > + ? ?int i, j;
> > > + ? ?for (i = 0; i < len; i++) {
> > > + ? ? ? ?float re_in = filter[6] * in[6+i][0]; ? ? ? ?//real inphase
> >
> > 0.5 * in[6+i][0]
> 
> If we are going to start inlining values here are we better off using
> the g1_Q2 symbol directly in this function rather than making it a
> parameter?

you are the aac expert, i dont know if this function is intended to be ever
used with a different array.


[...]
> >
> > > + ? ? ? ? ? ?}
> > > + ? ? ? ? ? ?temp[ssb][0] = sum_re;
> > > + ? ? ? ? ? ?temp[ssb][1] = sum_im;
> > > + ? ? ? ?}
> > > + ? ? ? ?out[0][i][0] = temp[6][0];
> > > + ? ? ? ?out[0][i][1] = temp[6][1];
> > > + ? ? ? ?out[1][i][0] = temp[7][0];
> > > + ? ? ? ?out[1][i][1] = temp[7][1];
> > > + ? ? ? ?out[2][i][0] = temp[0][0];
> > > + ? ? ? ?out[2][i][1] = temp[0][1];
> > > + ? ? ? ?out[3][i][0] = temp[1][0];
> > > + ? ? ? ?out[3][i][1] = temp[1][1];
> > > + ? ? ? ?out[4][i][0] = temp[2][0] + temp[5][0];
> > > + ? ? ? ?out[4][i][1] = temp[2][1] + temp[5][1];
> > > + ? ? ? ?out[5][i][0] = temp[3][0] + temp[4][0];
> > > + ? ? ? ?out[5][i][1] = temp[3][1] + temp[4][1];
> > > + ? ?}
> > > +}
> > > +
> > > +static av_noinline void hybrid4_8_12_cx(float (*in)[2], float (*out)[32][2], const float (*filter)[7][2], int N, int len)
> > > +{
> > > + ? ?int i, j, ssb;
> > > +
> > > + ? ?for (i = 0; i < len; i++) {
> > > + ? ? ? ?for (ssb = 0; ssb < N; ssb++) {
> > > + ? ? ? ? ? ?float sum_re = filter[ssb][6][0] * in[i+6][0], sum_im = filter[ssb][6][0] * in[i+6][1];
> > > + ? ? ? ? ? ?for (j = 0; j < 6; j++) {
> > > + ? ? ? ? ? ? ? ?float in0_re = in[i+j][0];
> > > + ? ? ? ? ? ? ? ?float in0_im = in[i+j][1];
> > > + ? ? ? ? ? ? ? ?float in1_re = in[i+12-j][0];
> > > + ? ? ? ? ? ? ? ?float in1_im = in[i+12-j][1];
> > > + ? ? ? ? ? ? ? ?sum_re += filter[ssb][j][0] * (in0_re + in1_re) - filter[ssb][j][1] * (in0_im - in1_im);
> > > + ? ? ? ? ? ? ? ?sum_im += filter[ssb][j][0] * (in0_im + in1_im) + filter[ssb][j][1] * (in0_re - in1_re);
> > > + ? ? ? ? ? ?}
> > > + ? ? ? ? ? ?out[ssb][i][0] = sum_re;
> > > + ? ? ? ? ? ?out[ssb][i][1] = sum_im;
> > > + ? ? ? ?}
> > > + ? ?}
> > > +}
> > > +
> > > +static av_noinline void hybrid_analysis(float out[91][32][2], float in[64][44][2], int is34, int len)
> > > +{
> > > + ? ?int i;
> > > + ? ?if(is34) {
> > > + ? ? ? ?hybrid4_8_12_cx(in[0], out, ? ?f34_0_12, 12, len);
> > > + ? ? ? ?hybrid4_8_12_cx(in[1], out+12, f34_1_8, ? 8, len);
> > > + ? ? ? ?hybrid4_8_12_cx(in[2], out+20, f34_2_4, ? 4, len);
> > > + ? ? ? ?hybrid4_8_12_cx(in[3], out+24, f34_2_4, ? 4, len);
> > > + ? ? ? ?hybrid4_8_12_cx(in[4], out+28, f34_2_4, ? 4, len);
> > > + ? ? ? ?for (i = 0; i < 59; i++) {
> > > + ? ? ? ? ? ?memcpy(out[32 + i], in[5 + i]+6, len * sizeof(in[0][0]));
> > > + ? ? ? ?}
> > > + ? ?} else {
> > > + ? ? ? ?hybrid6_cx(in[0], out, f20_0_8, len);
> > > + ? ? ? ?hybrid2_re(in[1], out+6, g1_Q2, len, 1);
> > > + ? ? ? ?hybrid2_re(in[2], out+8, g1_Q2, len, 0);
> > > + ? ? ? ?for (i = 0; i < 61; i++) {
> > > + ? ? ? ? ? ?memcpy(out[10 + i], in[3 + i]+6, len * sizeof(in[0][0]));
> > > + ? ? ? ?}
> >
> > is all that memcpy unavoidable?
> >
> 
> It may be avoidable with a float(**)x[2] instead of a float(*)[][2].
> FFmpeg seems to avoid the former.

we prefer 2d fixed size arrays over pointer array due to extra dereferences


> 
> I was also thinking it may be prudent to merge transpose_in() with
> hybrid_analysis() which would make the point moot, no?

if that gets rid of the memcpy() its of course a good idea.
ive not looked in depth at all functions yet.


[...]
> > > + ? ? ? ? ? ? ? ? ? ?map_idx_10_to_20(opd_mapped[e], ps->opd_par[e], 0);
> > > + ? ? ? ? ? ? ? ? ? ?map_idx_20_to_34(opd_mapped[e], opd_mapped[e], 0);
> > > + ? ? ? ? ? ? ? ?} else {
> > > + ? ? ? ? ? ? ? ? ? ?ipd_mapped = ps->ipd_par;
> > > + ? ? ? ? ? ? ? ? ? ?opd_mapped = ps->opd_par;
> > > + ? ? ? ? ? ? ? ?}
> > > + ? ? ? ? ? ?}
> > > + ? ? ? ?}
> > > + ? ? ? ?if (!ps->is34bands_old) {
> > > + ? ? ? ? ? ?map_val_20_to_34(H11[0], 0);
> > > + ? ? ? ? ? ?map_val_20_to_34(H11[1], 0);
> > > + ? ? ? ? ? ?map_val_20_to_34(H12[0], 0);
> > > + ? ? ? ? ? ?map_val_20_to_34(H12[1], 0);
> > > + ? ? ? ? ? ?map_val_20_to_34(H21[0], 0);
> > > + ? ? ? ? ? ?map_val_20_to_34(H21[1], 0);
> > > + ? ? ? ? ? ?map_val_20_to_34(H22[0], 0);
> > > + ? ? ? ? ? ?map_val_20_to_34(H22[1], 0);
> > > + ? ? ? ? ? ?ipdopd_reset(ps->ipd_smooth, ps->opd_smooth);
> > > + ? ? ? ?}
> > > + ? ?} else {
> > > + ? ? ? ?for (e = 0; e < ps->num_env; e++) {
> > > + ? ? ? ? ? ?if (ps->nr_icc_par == 34)
> > > + ? ? ? ? ? ? ? ?map_idx_34_to_20(icc_mapped[e], ps->icc_par[e], 1);
> > > + ? ? ? ? ? ?else if (ps->nr_icc_par == 10)
> > > + ? ? ? ? ? ? ? ?map_idx_10_to_20(icc_mapped[e], ps->icc_par[e], 1);
> > > + ? ? ? ? ? ?else
> > > + ? ? ? ? ? ? ? ?icc_mapped = ps->icc_par;
> > > + ? ? ? ? ? ?if (ps->nr_iid_par == 34)
> > > + ? ? ? ? ? ? ? ?map_idx_34_to_20(iid_mapped[e], ps->iid_par[e], 1);
> > > + ? ? ? ? ? ?else if (ps->nr_iid_par == 10)
> > > + ? ? ? ? ? ? ? ?map_idx_10_to_20(iid_mapped[e], ps->iid_par[e], 1);
> > > + ? ? ? ? ? ?else
> > > + ? ? ? ? ? ? ? ?iid_mapped = ps->iid_par;
> > > + ? ? ? ? ? ?if (ps->enable_ipdopd) {
> > > + ? ? ? ? ? ? ? ?if (ps->nr_ipdopd_par == 17) {
> > > + ? ? ? ? ? ? ? ? ? ?map_idx_34_to_20(ipd_mapped[e], ps->ipd_par[e], 0);
> > > + ? ? ? ? ? ? ? ? ? ?map_idx_34_to_20(opd_mapped[e], ps->opd_par[e], 0);
> > > + ? ? ? ? ? ? ? ?} else if (ps->nr_ipdopd_par == 5) {
> > > + ? ? ? ? ? ? ? ? ? ?map_idx_10_to_20(ipd_mapped[e], ps->ipd_par[e], 0);
> > > + ? ? ? ? ? ? ? ? ? ?map_idx_10_to_20(opd_mapped[e], ps->opd_par[e], 0);
> > > + ? ? ? ? ? ? ? ?} else {
> > > + ? ? ? ? ? ? ? ? ? ?ipd_mapped = ps->ipd_par;
> > > + ? ? ? ? ? ? ? ? ? ?opd_mapped = ps->opd_par;
> > > + ? ? ? ? ? ? ? ?}
> > > + ? ? ? ? ? ?}
> > > + ? ? ? ?}
> > > + ? ? ? ?if (ps->is34bands_old) {
> > > + ? ? ? ? ? ?map_val_34_to_20(H11[0], 0);
> > > + ? ? ? ? ? ?map_val_34_to_20(H11[1], 0);
> > > + ? ? ? ? ? ?map_val_34_to_20(H12[0], 0);
> > > + ? ? ? ? ? ?map_val_34_to_20(H12[1], 0);
> > > + ? ? ? ? ? ?map_val_34_to_20(H21[0], 0);
> > > + ? ? ? ? ? ?map_val_34_to_20(H21[1], 0);
> > > + ? ? ? ? ? ?map_val_34_to_20(H22[0], 0);
> > > + ? ? ? ? ? ?map_val_34_to_20(H22[1], 0);
> > > + ? ? ? ? ? ?ipdopd_reset(ps->ipd_smooth, ps->opd_smooth);
> > > + ? ? ? ?}
> > > + ? ?}
> > > +
> >
> > > + ? ?//Mixing
> > > + ? ?for (e = 0; e < ps->num_env; e++) {
> > > + ? ? ? ?for (b = 0; b < NR_PAR_BANDS[is34]; b++) {
> > > + ? ? ? ? ? ?float h11, h12, h21, h22;
> > > + ? ? ? ? ? ?h11 = H_LUT[iid_mapped[e][b] + 7 + 23 * ps->iid_quant][icc_mapped[e][b]][0];
> > > + ? ? ? ? ? ?h12 = H_LUT[iid_mapped[e][b] + 7 + 23 * ps->iid_quant][icc_mapped[e][b]][1];
> > > + ? ? ? ? ? ?h21 = H_LUT[iid_mapped[e][b] + 7 + 23 * ps->iid_quant][icc_mapped[e][b]][2];
> > > + ? ? ? ? ? ?h22 = H_LUT[iid_mapped[e][b] + 7 + 23 * ps->iid_quant][icc_mapped[e][b]][3];
> > > + ? ? ? ? ? ?if (!PS_BASELINE && ps->enable_ipdopd && b < ps->nr_ipdopd_par) {
> > > + ? ? ? ? ? ? ? ?//The spec say says to only run this smoother when enable_ipdopd
> > > + ? ? ? ? ? ? ? ?//is set but the reference decoder appears to run it constantly
> > > + ? ? ? ? ? ? ? ?float h11i, h12i, h21i, h22i;
> > > + ? ? ? ? ? ? ? ?float opd_mag, ipd_mag, ipd_adj_re, ipd_adj_im;
> > > + ? ? ? ? ? ? ? ?float opd_re = ipdopd_cos[ps->opd_par[e][b]];
> > > + ? ? ? ? ? ? ? ?float opd_im = ipdopd_sin[ps->opd_par[e][b]];
> > > + ? ? ? ? ? ? ? ?float ipd_re = ipdopd_cos[ps->ipd_par[e][b]];
> > > + ? ? ? ? ? ? ? ?float ipd_im = ipdopd_sin[ps->ipd_par[e][b]];
> > > + ? ? ? ? ? ? ? ?float opd_im_smooth = 0.25f * opd_smooth[b][0][1] + 0.5f * opd_smooth[b][1][1] + opd_im;
> > > + ? ? ? ? ? ? ? ?float opd_re_smooth = 0.25f * opd_smooth[b][0][0] + 0.5f * opd_smooth[b][1][0] + opd_re;
> > > + ? ? ? ? ? ? ? ?float ipd_im_smooth = 0.25f * ipd_smooth[b][0][1] + 0.5f * ipd_smooth[b][1][1] + ipd_im;
> > > + ? ? ? ? ? ? ? ?float ipd_re_smooth = 0.25f * ipd_smooth[b][0][0] + 0.5f * ipd_smooth[b][1][0] + ipd_re;
> > > + ? ? ? ? ? ? ? ?opd_smooth[b][0][0] = opd_smooth[b][1][0];
> > > + ? ? ? ? ? ? ? ?opd_smooth[b][0][1] = opd_smooth[b][1][1];
> > > + ? ? ? ? ? ? ? ?opd_smooth[b][1][0] = opd_re;
> > > + ? ? ? ? ? ? ? ?opd_smooth[b][1][1] = opd_im;
> > > + ? ? ? ? ? ? ? ?ipd_smooth[b][0][0] = ipd_smooth[b][1][0];
> > > + ? ? ? ? ? ? ? ?ipd_smooth[b][0][1] = ipd_smooth[b][1][1];
> > > + ? ? ? ? ? ? ? ?ipd_smooth[b][1][0] = ipd_re;
> > > + ? ? ? ? ? ? ? ?ipd_smooth[b][1][1] = ipd_im;
> > > + ? ? ? ? ? ? ? ?opd_mag = 1 / sqrt(opd_im_smooth * opd_im_smooth + opd_re_smooth * opd_re_smooth);
> > > + ? ? ? ? ? ? ? ?ipd_mag = 1 / sqrt(ipd_im_smooth * ipd_im_smooth + ipd_re_smooth * ipd_re_smooth);
> > > + ? ? ? ? ? ? ? ?opd_re = opd_re_smooth * opd_mag;
> > > + ? ? ? ? ? ? ? ?opd_im = opd_im_smooth * opd_mag;
> > > + ? ? ? ? ? ? ? ?ipd_re = ipd_re_smooth * ipd_mag;
> > > + ? ? ? ? ? ? ? ?ipd_im = ipd_im_smooth * ipd_mag;
> >
> > if i decyphered this nonsense correctly then
> > ?pd_re/im values here can have 512 distinct values
> > thus a LUT that is indexed by 3 ps->o/ipd_par[e][b] values could be used
> >
> 
> Quantized ipd/opd take the the vales of 0-7 so smoothed values
> normalized to unit vectors would be 8x8x8x2 floats 1024 values total
> (512 real and 512 imaginary). Do you think an LUT is worth it here? It
> would eliminate the invsqrt.

i would suspect the lut would is faster than the sqrt() but this of course
depends on cpu cache sizes and how fast the cpu can caclulate a ^-0.5

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

The real ebay dictionary, page 3
"Rare item" - "Common item with rare defect or maybe just a lie"
"Professional" - "'Toy' made in china, not functional except as doorstop"
"Experts will know" - "The seller hopes you are not an expert"
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20100603/7cbf8d71/attachment.pgp>



More information about the ffmpeg-devel mailing list