[FFmpeg-soc] j2k review

Sun Aug 19 21:31:10 CEST 2007

Hi

after the arithmetic coder heres the remaining review

j2k.c:
[...]

> int ff_j2k_getnbctxno(int flag, int bandno)
> {
>     return nbctxno_lut[flag&255][bandno];
> }

this wraper function doesnt do any good, it just slows the code down
as it cannot be inlined

> 
> static int getnbctxno(int flag, int bandno)
> {
>     int h, v, d;
> 
>     h = ((flag & J2K_T1_SIG_E) ? 1:0)+
>         ((flag & J2K_T1_SIG_W) ? 1:0);
>     v = ((flag & J2K_T1_SIG_N) ? 1:0)+
>         ((flag & J2K_T1_SIG_S) ? 1:0);
>     d = ((flag & J2K_T1_SIG_NE) ? 1:0)+
>         ((flag & J2K_T1_SIG_NW) ? 1:0)+
>         ((flag & J2K_T1_SIG_SE) ? 1:0)+
>         ((flag & J2K_T1_SIG_SW) ? 1:0);
>     switch(bandno){
>         case 0: // LL || LH
>         case 2:
>             if (h == 2) return 8;
>             if (h == 1){
>                 if (v >= 1) return 7;
>                 if (d >= 1) return 6;
>                 return 5;
>             }
>             if (v == 2) return 4;
>             if (v == 1) return 3;
>             if (d >= 2) return 2;
>             if (d == 1) return 1;
>             return 0;
>         case 1: // HL
>             if (v == 2) return 8;
>             if (v == 1){
>                 if (h >= 1) return 7;
>                 if (d >= 1) return 6;
>                 return 5;
>             }
>             if (h == 2) return 4;
>             if (h == 1) return 3;
>             if (d >= 2) return 2;
>             if (d >= 1) return 1;
>             return 0;

a if(bandno==1)
    FFSWAP(int,h,v);
would allow the case 012 to be merged

[...]
> int ff_j2k_getrefctxno(int flag)
> {
>     const uint8_t refctxno_lut[2][2] = {{14, 15}, {16, 16}};

static const

>     return refctxno_lut[flag>>14][(flag & 255) != 0];
> }

this should be in a header so it can be inlined or the code should just
be used with no wraper function

> 
> static uint8_t sgnctxno_lut[16][16], xorbit_lut[16][16];
> 
> int ff_j2k_getsgnctxno(int flag, int *xorbit)
> {
>     *xorbit = xorbit_lut[flag&15][(flag>>8)&15];
>     return  sgnctxno_lut[flag&15][(flag>>8)&15];
> }

this wraper function doesnt do any good, it just slows the code down
as it cannot be inlined

[...]

j2k.h:
[...]
> enum J2kMarkers{
>     J2K_SOC = 0xff4f,
>     J2K_SIZ = 0xff51,
>     J2K_COD,
>     J2K_COC,
>     J2K_TLM = 0xff55,
>     J2K_PLM = 0xff57,
>     J2K_PLT,
>     J2K_QCD = 0xff5c,
>     J2K_QCC,
>     J2K_RGN,
>     J2K_POC,
>     J2K_PPM,
>     J2K_PPT,
>     J2K_CRG = 0xff63,
>     J2K_COM,
>     J2K_SOT = 0xff90,
>     J2K_SOP,
>     J2K_EPH,
>     J2K_SOD,
>     J2K_EOC = 0xffd9,
> };

please add comments with the meaning of these like
J2K_SIZ = 0xff51,           ///< Image and tile size

[...]
> enum J2kQuantsty{

Quantsty? quant type?

[...]

j2kenc.c:
[...]

> // TODO: doxygen-compatible comments

yes, there are a lot which arent doxygen compatible ...

> 
> typedef struct {
>     uint16_t rate;
>     int64_t disto;
> } J2kPass;
> 
> typedef struct {
>     uint8_t npassess;

passess ? i dont think such a word exists

[...]
> typedef struct {
>     uint16_t x0, x1, y0, y1;
>     uint16_t cblkw, cblkh;
>     uint16_t cblknx, cblkny;
>     J2kPrec *prec;
>     J2kCblk *cblk;

i think most variable names (and not only the ones above or just the ones in
this file) could be improved,
i dont really know what they are used for (without checking the code which
uses them, and that really defeats the sense in a variable name, you could
as well just call them var0 var1 var2)

[...]
> typedef struct {
>     AVCodecContext *avctx;
>     AVFrame *picture;
> 
>     int Xsiz, Ysiz; // image width and height

width, height

>     uint8_t cbps[4]; // numbps in components
>     uint8_t bbps[4][32][3]; // numbps in bands

what is bps?

>     uint8_t expn[4][32][3]; // quantization exponents
>     int ncomponents;

>     int ppx, ppy; // exponent of the precinct size [global]

log2_prec_width
log2_prec_height
might be good variable names if i understood the comment correctly

>     int xcb, ycb; // exponent of the code block size

log2_cblk_width
log2_cblk_height

>     int XTsiz, YTsiz; // tile size

tile_width, tile_height

[...]
> /* tag tree routines */
> /* code the value stored in node */
> static void tag_tree_code(J2kEncoderContext *s, J2kTgtNode *node, int threshold)
> {
>     J2kTgtNode *stack[30];
>     int sp = 1, curval = 0;
>     stack[0] = node;
> 
>     node = node->parent;
>     while(node != NULL){

the != is superflous

[...]
> /* update the value in node */
> static void tag_tree_update(J2kTgtNode *node)
> {
>     int lev = 0;
>     while (node->parent != NULL){

the != is superflous

[...]

> /* marker segments */
> static void put_marker(J2kEncoderContext *s, uint16_t marker)
> {
>     bytestream_put_be16(&s->buf, marker);
> }
> 
> static void put_soc(J2kEncoderContext *s)
> {
>     put_marker(s, J2K_SOC);
> }

these dont really need wraper functions

[...]
> static void put_qcd(J2kEncoderContext *s, int compno)
> {
>     int reslevelno;
>     put_marker(s, J2K_QCD);
>     bytestream_put_be16(&s->buf, 4+3*(s->nreslevels-1));  // LQcd
>     bytestream_put_byte(&s->buf, s->nguardbits << 5);  // Sqcd

this is only allowed for the 5/3 wavelet, what about 9/7 ?

[...]
>     s->tile = av_malloc(s->numXtiles * s->numYtiles * sizeof(J2kTile));
>     if (s->tile == NULL)
>         return -1;

return AVERROR(ENOMEM)

>     for (tno = 0; tno < s->numXtiles * s->numYtiles; tno++){
>         J2kTile *tile = s->tile + tno;
>         int p = tno % s->numXtiles;
>         int q = tno / s->numXtiles;

why not 2 for() loops for columns and rows? it doesnt seem tno is
used except as above
also p and q are very bad variable names for the x and y tile postions
they have no relation at all to "tile position"

[...]
>                 reslevel->x0 = ff_j2k_ceildivpow2(comp->x0, s->nreslevels - reslevelno - 1);
>                 reslevel->x1 = ff_j2k_ceildivpow2(comp->x1, s->nreslevels - reslevelno - 1);
>                 reslevel->y0 = ff_j2k_ceildivpow2(comp->y0, s->nreslevels - reslevelno - 1);
>                 reslevel->y1 = ff_j2k_ceildivpow2(comp->y1, s->nreslevels - reslevelno - 1);

the x0x1y0y1 should be a [2][2] array this would allow code like the above
to be simplified by using a loop

> 
>                 if (reslevelno == 0)
>                     reslevel->nbands = 1;
>                 else
>                     reslevel->nbands = 3;

the first level has 4 sub bands the others have 3

> 
>                 if (reslevel->x1 == reslevel->x0)
>                     reslevel->nprecw = 0;
>                 else
>                     reslevel->nprecw = ff_j2k_ceildivpow2(reslevel->x1, s->ppx) - reslevel->x0 / (1<<s->ppx);

reslevel->x0 / (1<<s->ppx)
should be the same as
reslevel->x0 >> s->ppx

> 
>                 if (reslevel->y1 == reslevel->y0)
>                     reslevel->nprech = 0;
>                 else
>                     reslevel->nprech = ff_j2k_ceildivpow2(reslevel->y1, s->ppy) - reslevel->y0 / (1<<s->ppy);
> 
>                 reslevel->band = av_malloc(reslevel->nbands * sizeof(J2kBand));
>                 if (reslevel->band == NULL)
>                     return -1;
>                 for (bandno = 0; bandno < reslevel->nbands; bandno++){
>                     J2kBand *band = reslevel->band + bandno;
>                     int cblkno, precx, precy, precno;
>                     int x0, y0, x1, y1;
>                     int xi0, yi0, xi1, yi1;
>                     int cblkperprecw, cblkperprech;
> 
>                     if (reslevelno == 0){  // the same everywhere
>                         band->cblkw = 1 << FFMIN(s->xcb, s->ppx-1);
>                         band->cblkh = 1 << FFMIN(s->ycb, s->ppy-1);
> 
>                         band->x0 = ff_j2k_ceildivpow2(comp->x0, n-1);
>                         band->x1 = ff_j2k_ceildivpow2(comp->x1, n-1);
>                         band->y0 = ff_j2k_ceildivpow2(comp->y0, n-1);
>                         band->y1 = ff_j2k_ceildivpow2(comp->y1, n-1);
>                     }
>                     else{
>                         band->cblkw = 1 << FFMIN(s->xcb, s->ppx);
>                         band->cblkh = 1 << FFMIN(s->ycb, s->ppy);
> 
>                         band->x0 = ff_j2k_ceildivpow2(comp->x0 - (1 << (n-1)) * ((bandno+1)&1), n);
>                         band->x1 = ff_j2k_ceildivpow2(comp->x1 - (1 << (n-1)) * ((bandno+1)&1), n);
>                         band->y0 = ff_j2k_ceildivpow2(comp->y0 - (1 << (n-1)) * (((bandno+1)&2)>>1), n);
>                         band->y1 = ff_j2k_ceildivpow2(comp->y1 - (1 << (n-1)) * (((bandno+1)&2)>>1), n);
>                     }

the bandno (sub band number) does not properly correspond to actual subband
LL as well as LH use 0 and then need these special cases above to workaround
that

also please do not use single letter variables ("n" here) if their meaning
is completely non obvious and the point where they are initalized is pages
away from the point where they are used

[...]
>                     for (cblkno = 0; cblkno < band->cblknx * band->cblkny; cblkno++){
>                         band->cblk[cblkno].zero = 0;
>                     }

here the use of a single letter variable would make sense (its use is limited
to 2 lines)

for (i = 0; i < band->cblknx * band->cblkny; i++){
    band->cblk[i].zero = 0;
}

[...]
>             int nbands;
>             nbands = reslevelno ? 3 : 1;

declaration and initalization can be merged

>             for (bandno = 0; bandno < nbands; bandno++){
>                 int expn;
> 
>                 expn = ((bandno&2)>>1) + (reslevelno>0) + s->cbps[compno];

>                 s->bbps[compno][reslevelno][bandno] = expn + s->nguardbits - 1;

whatever it is, it is used only at one spot and does not need to be
precalculated and stored in a struct

>                 s->expn[compno][reslevelno][bandno] = expn;

additionally the 2 just differ by s->nguardbits - 1 so they dont need
to be stored twice

[...]
> static void init_luts()
> {
>     int i;
>     double u, v, t, pfr;
> 
>     pfr = pow(2, NMSEDEC_FRACBITS);
>     for (i = 0; i < (1 << NMSEDEC_BITS); i++){
>         t = i / pfr;
>         u = t;
>         v = t - 1.5;
>         lut_nmsedec_sig[i]  = FFMAX((int) (floor((u*u - v*v) * pfr + 0.5) / pfr * 8192.0), 0);
>         lut_nmsedec_sig0[i] = FFMAX((int) (floor((u*u) * pfr + 0.5) / pfr * 8192.0), 0);
> 
>         u = t - 1.0;
>         v = t - ((i & (1<<(NMSEDEC_BITS-1))) ? 1.5 : 0.5);
>         lut_nmsedec_ref[i]  = FFMAX((int) (floor((u*u - v*v) * pfr + 0.5) / pfr * 8192.0), 0);
>         lut_nmsedec_ref0[i] = FFMAX((int) (floor((u*u) * pfr + 0.5) / pfr * 8192.0), 0);
>     }
> }

the coefficients which are coded are integers their distortion after
dequant as sum of squared difference is as well an integer
theres no need for floats/doubles or inaccuare rounding
the sum of squared differences can easily be calculated exactly

[...]
> static void dwt_encode53(J2kEncoderContext *s, J2kComponent *comp)
> {
>     int lev = s->nreslevels,
>         *t = comp->data, w = comp->x1 - comp->x0;
>     int *ppv = av_malloc((comp->reslevel[lev-1].y1 + 4)*sizeof(int)), *pv = ppv+2;
>     int *ppu = av_malloc((comp->reslevel[lev-1].x1 + 4)*sizeof(int)), *pu = ppu+2;

please avoid doing malloc/free in the dwt code

> 
>     while (--lev){
>         int u0 = comp->reslevel[lev].x0,
>             u1 = comp->reslevel[lev].x1,
>             v0 = comp->reslevel[lev].y0,
>             v1 = comp->reslevel[lev].y1,
>             u = u0, v = v0;
> 

>         //VER_SD
>         while (u < u1){
>             int i, j;
>             for (i = v0; i < v1; i++)
>                 pv[i] = t[w*(i-v0) + u-u0];
>             sd_1d(pv, v0, v1);
> 
>             // copy back and deinterleave
>             for (i = v0+v0%2, j = 0; i < v1; i+=2, j++){
>                 t[w*j + u-u0] = pv[i];
>             }
>             for (i = v0+1-v0%2; i < v1; i+=2, j++){
>                 t[w*j + u-u0] = pv[i];
>             }

accessing elements in vertical direction first is very slow and
problematic in terms of cache behavior

>             u++;
>         }
> 
>         //HOR_SD
>         while (v < v1){
>             int i, j;
>             for (i = u0; i < u1; i++)
>                 pu[i] = t[w*(v-v0) + i-u0];
>             sd_1d(pu, u0, u1);
> 
>             // copy back and deinterleave
>             for (i = u0+u0%2, j = 0; i < u1; i+=2, j++){
>                 t[w*(v-v0) + j] = pu[i];
>             }
>             for (i = u0+1-u0%2; i < u1; i+=2, j++){
>                 t[w*(v-v0) + j] = pu[i];
>             }
>             v++;
>         }

you perform 2 copy passes here and above in the code for the vertical DWT
these arent needed, try to reduce this please

also the (i)dwt can be moved into its own file

[...]
> static void encode_sigpass(J2kT1Context *t1, int width, int height, int bandno, int *nmsedec, int bpno)
> {
>     int i, j, k, mask = 1 << (bpno + NMSEDEC_FRACBITS);
>     for (i = 0; i < height; i += 4)
>         for (j = 0; j < width; j++)

please use x,y for x y coordinates

[...]
> static void encode_refpass(J2kT1Context *t1, int width, int height, int *nmsedec, int bpno)
> {
>     int i, j, k, mask = 1 << (bpno + NMSEDEC_FRACBITS);
>     for (i = 0; i < height; i += 4)
>         for (j = 0; j < width; j++)

x,y

[...]

> static void encode_clnpass(J2kT1Context *t1, int width, int height, int bandno, int *nmsedec, int bpno)
> {
>     int i, j, k, mask = 1 << (bpno + NMSEDEC_FRACBITS);
>     for (i = 0; i < height; i += 4)
>         for (j = 0; j < width; j++){

x,y

>             if (i + 3 < height && !(
>             (t1->flags[i+1][j+1] & (J2K_T1_SIG_NB | J2K_T1_VIS | J2K_T1_SIG)) ||
>             (t1->flags[i+2][j+1] & (J2K_T1_SIG_NB | J2K_T1_VIS | J2K_T1_SIG)) ||
>             (t1->flags[i+3][j+1] & (J2K_T1_SIG_NB | J2K_T1_VIS | J2K_T1_SIG)) ||
>             (t1->flags[i+4][j+1] & (J2K_T1_SIG_NB | J2K_T1_VIS | J2K_T1_SIG))))
>             {
>                 // aggregation mode
>                 int rlen;
>                 for (rlen = 0; rlen < 4; rlen++)
>                     if (abs(t1->data[i+rlen][j]) & mask)
>                         break;
>                 ff_aec_encode(&t1->aec, AEC_CX_RL, rlen != 4);
>                 if (rlen == 4)
>                     continue;
>                 ff_aec_encode(&t1->aec, AEC_CX_UNI, rlen >> 1);
>                 ff_aec_encode(&t1->aec, AEC_CX_UNI, rlen & 1);
>                 for (k = i + rlen; k < i + 4; k++){
>                     if (!(t1->flags[k+1][j+1] & (J2K_T1_SIG | J2K_T1_VIS))){
>                         int ctxno = ff_j2k_getnbctxno(t1->flags[k+1][j+1], bandno);
>                         if (k > i + rlen)
>                             ff_aec_encode(&t1->aec, ctxno, abs(t1->data[k][j]) & mask ? 1:0);
>                         if (abs(t1->data[k][j]) & mask){ // newly significant
>                             int xorbit;
>                             int ctxno = ff_j2k_getsgnctxno(t1->flags[k+1][j+1], &xorbit);
>                             *nmsedec += getnmsedec_sig(abs(t1->data[k][j]), bpno + NMSEDEC_FRACBITS);
>                             ff_aec_encode(&t1->aec, ctxno, (t1->data[k][j] < 0) ^ xorbit);
>                             ff_j2k_set_significant(t1, j, k);
>                         }
>                     }
>                     t1->flags[k+1][j+1] &= ~J2K_T1_VIS;
>                 }
>             }
>             else{
>                 for (k = i; k < i + 4 && k < height; k++){
>                     if (!(t1->flags[k+1][j+1] & (J2K_T1_SIG | J2K_T1_VIS))){
>                         int ctxno = ff_j2k_getnbctxno(t1->flags[k+1][j+1], bandno);
>                         ff_aec_encode(&t1->aec, ctxno, abs(t1->data[k][j]) & mask ? 1:0);
>                         if (abs(t1->data[k][j]) & mask){ // newly significant
>                             int xorbit;
>                             int ctxno = ff_j2k_getsgnctxno(t1->flags[k+1][j+1], &xorbit);
>                             *nmsedec += getnmsedec_sig(abs(t1->data[k][j]), bpno + NMSEDEC_FRACBITS);
>                             ff_aec_encode(&t1->aec, ctxno, (t1->data[k][j] < 0) ^ xorbit);
>                             ff_j2k_set_significant(t1, j, k);
>                         }
>                     }
>                     t1->flags[k+1][j+1] &= ~J2K_T1_VIS;
>                 }
>             }
>         }
> }
> 
> static void encode_cblk(J2kEncoderContext *s, J2kT1Context *t1, J2kCblk *cblk, J2kTile *tile,
>                         int width, int height, int bandpos, int lev)
> {
>     int pass_t = 2, passno, i, j, max=0, nmsedec, bpno;
>     int64_t wmsedec = 0;
> 
>     for (i = 0; i < height+2; i++)
>         memset(t1->flags[i], 0, (width+2)*sizeof(int));
> 
>     for (i = 0; i < height; i++){
>         for (j = 0; j < width; j++)
>             max = FFMAX(max, abs(t1->data[i][j]));

you convert the data over and over to sign+abs, it would be better to do this
just once

[...]
>     if (empty){
>         put_bits(s, 0, 1);
>         j2k_flush(s);
>         return;
>     }
> 
>     put_bits(s, 1, 1);

put_bits(s, !empty, 1);
if(empty){
...

>     for (bandno = 0; bandno < rlevel->nbands; bandno++){
>         J2kBand *band = rlevel->band + bandno;
>         J2kTgtNode *cblkincl, *zerobits;
>         int cblknw, cblknh, yi, xi, pos;
> 
>         cblknh = band->prec[precno].yi1 - band->prec[precno].yi0;
>         cblknw = band->prec[precno].xi1 - band->prec[precno].xi0;
> 
>         cblkincl = ff_j2k_tag_tree_init(cblknw, cblknh);
>         zerobits = ff_j2k_tag_tree_init(cblknw, cblknh);
> 
>         for (pos=0, yi = band->prec[precno].yi0; yi < band->prec[precno].yi1; yi++){
>             for (xi = band->prec[precno].xi0; xi < band->prec[precno].xi1; xi++, pos++){
>                 cblkincl[pos].val = band->cblk[yi * cblknw + xi].ninclpassess == 0;
>                 tag_tree_update(cblkincl + pos);
>                 zerobits[pos].val = s->bbps[compno][rlevelno][bandno] - band->cblk[yi * cblknw + xi].nonzerobits;
>                 tag_tree_update(zerobits + pos);
>             }
>         }
> 
>         for (pos=0, yi = band->prec[precno].yi0; yi < band->prec[precno].yi1; yi++){
>             for (xi = band->prec[precno].xi0; xi < band->prec[precno].xi1; xi++, pos++){

band->prec[precno] is used all over the place, please use a local
variable for it

[...]
>     int compno, reslevelno;
> 
>     av_log(s->avctx, AV_LOG_DEBUG, "tier2\n");
>     // lay-rlevel-comp-pos progression
>     for (reslevelno = 0; reslevelno < s->nreslevels; reslevelno++){
>         for (compno = 0; compno < s->ncomponents; compno++){
>             int precno;

this declaration can be merged with the one above

[...]
> static int getcut(J2kCblk *cblk, int64_t lambda, int dwt_norm)
> {
>     int passno, res = 0;
>     for (passno = 0; passno < cblk->npassess; passno++){
>         int dr;
>         int64_t dd;
> 
>         dr = cblk->passess[passno].rate
>            - (res ? cblk->passess[res-1].rate:0);
>         dd = cblk->passess[passno].disto
>            - (res ? cblk->passess[res-1].disto:0);
> 
>         if (((dd * dwt_norm) >> WMSEDEC_SHIFT) * dwt_norm >= dr * lambda)
>             res = passno+1;
>     }
>     return res;

int64_t best_rd= INT64_MAX;
int pass, best_pass_count;

for(pass=0; pass < cblk->npassess; pass++){
    int64_t rd=   cblk->passess[pass].disto
               +((cblk->passess[pass].rate * lambda)>>C);
    if(rd < best_rd){
        best_rd= rd;
        best_pass_count= pass;
    }
}
return best_pass_count;

and correct lambda by dwt_norm

[...]
>         for (reslevelno = 0, lev = s->nreslevels-1; reslevelno < s->nreslevels; reslevelno++, lev--){

please dont put so many statements in a single for() line, this makes it hard
to read

[...]
> void cleanup(J2kEncoderContext *s)
> {
>     int tileno, compno, reslevelno, bandno;
>     for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){
>         for (compno = 0; compno < s->ncomponents; compno++){
>             J2kComponent *comp = s->tile[tileno].comp + compno;
> 
>             for (reslevelno = 0; reslevelno < s->nreslevels; reslevelno++){
>                 J2kResLevel *reslevel = comp->reslevel + reslevelno;
> 
>                 for (bandno = 0; bandno < reslevel->nbands ; bandno++){
>                     J2kBand *band = reslevel->band + bandno;
>                         av_free(band->cblk);
>                         av_free(band->prec);
>                     }
>                 av_free(reslevel->band);
>             }
>             av_free(comp->reslevel);
>         }
>         av_free(s->tile[tileno].comp);
>     }
>     av_free(s->tile);

please use av_freep()

[...]
>     s->XTsiz = 256; s->YTsiz = 256;

have you tested the code with other tile sizes?

[...]
>     av_log(s->avctx, AV_LOG_DEBUG, "init\n");
>     init_tiles(s);
>     init_luts();

redoing all the init/malloc stuff per frame is unacceptable

[...]
>     .pix_fmts =
>         (enum PixelFormat[]) {PIX_FMT_GRAY8, PIX_FMT_RGB24, -1}

what about yuv support ?

[...]

j2kdec.c:
[...]

> /** code block */
> typedef struct {
>     uint8_t npassess;  ///< number of coding passess
>     uint8_t nonzerobits;
>     uint16_t lengthinc;
>     uint16_t length;
>     uint8_t lblock;
>     uint8_t zero;
>     uint8_t data[8192];
>     uint8_t included;
> } J2kCblk;
> 
> /** precinct */
> typedef struct {
>     uint16_t xi0, xi1, yi0, yi1; ///< indices of codeblocks included
>     J2kTgtNode *zerobits;
>     J2kTgtNode *cblkincl;
> } J2kPrec;
> 
> /** subband */
> typedef struct {
>     uint16_t x0, x1, y0, y1;
>     uint16_t cblkw, cblkh;
>     uint16_t cblknx, cblkny;
>     uint32_t stepsize; ///< quantization stepsize (* 2^13)
>     J2kPrec *prec;
>     J2kCblk *cblk;
> } J2kBand;
> 
> /** resolution level */
> typedef struct {
>     uint16_t x0, x1, y0, y1;
>     uint8_t nbands;
>     uint16_t nprecw, nprech;
>     uint8_t ppx, ppy; ///< exponent of precinct size
>     J2kBand *band;
> } J2kResLevel;

these structs are (near) duplicated from the encoder

[...]
> static void copy_defaults(J2kDecoderContext *s, J2kTile *tile)
> {
>     int compno;
> 
>     tile->nlayers = s->nlayers;
>     tile->mct = s->mct;
> 
>     for (compno = 0; compno < s->ncomponents; compno++){
>         J2kComponent *comp = tile->comp + compno;
>         int i;
> 
>         comp->nreslevels = s->nreslevels[compno];
>         comp->xcb = s->xcb[compno];
>         comp->ycb = s->ycb[compno];
>         comp->transform = s->transform[compno];
>         comp->qstyle = s->qstyle[compno];
>         for (i = 0; i < 3*32; i++){
>             comp->expn[i] = s->expn[i][compno];
>             comp->mant[i] = s->mant[i][compno];
>             comp->bbps[i] = s->bbps[i][compno];
>         }
>     }
> }

the defaults should be stored in the same struct as teh rest so they can
easily be memcpied 

> 
> /** marker segments */
> static int get_siz(J2kDecoderContext *s)
> {
>     int i;
> 
>     bytestream_get_be16(&s->buf); ///< Rsiz (skipped)
>     s->Xsiz = bytestream_get_be32(&s->buf); ///< Xsiz
>     s->Ysiz = bytestream_get_be32(&s->buf); ///< Ysiz
>     s->X0siz = bytestream_get_be32(&s->buf); ///< X0Siz
>     s->Y0siz = bytestream_get_be32(&s->buf); ///< Y0Siz
> 
>     s->XTsiz = bytestream_get_be32(&s->buf); ///< XTSiz
>     s->YTsiz = bytestream_get_be32(&s->buf); ///< YTSiz
>     s->XT0siz = bytestream_get_be32(&s->buf); ///< XT0Siz
>     s->YT0siz = bytestream_get_be32(&s->buf); ///< YT0Siz

these can be vertically aligned

>     s->ncomponents = bytestream_get_be16(&s->buf); ///< CSiz
> 
>     for (i = 0; i < s->ncomponents; i++){ ///< Ssiz_i XRsiz_i, YRsiz_i
>         uint8_t x = bytestream_get_byte(&s->buf);
>         s->cbps[i] = (x & 0x7f) + 1;
>         s->sgnd[i] = (x & 0x80) == 1;
>         if (bytestream_get_byte(&s->buf) != 1)
>             return -1;
>         if (bytestream_get_byte(&s->buf) != 1)
>             return -1;
>     }

exploitable heap overflow

> 
>     s->numXtiles = ff_j2k_ceildiv(s->Xsiz - s->XT0siz, s->XTsiz);
>     s->numYtiles = ff_j2k_ceildiv(s->Ysiz - s->YT0siz, s->YTsiz);
> 
>     s->tile = av_mallocz(s->numXtiles * s->numYtiles * sizeof(J2kTile));
>     if (!s->tile)
>         return -1;

return AVERROR(ENOMEM)

> 
>     for (i = 0; i < s->numXtiles * s->numYtiles; i++){
>         J2kTile *tile = s->tile + i;
> 
>         tile->comp = av_mallocz(s->ncomponents * sizeof(J2kComponent));
>         if (!tile->comp)
>             return -1;
>     }
> 
>     s->avctx->width = s->Xsiz - s->X0siz;
>     s->avctx->height = s->Ysiz - s->Y0siz;

likely exploitable due to integer overflows here or later
again, please properly check read values before using them

[...]
>     s->avctx->get_buffer(s->avctx, &s->picture);

return value must be checked

> 
>     return 0;
> }
> 

> #define SETFIELD(field, val)\
>     if (s->curtileno == -1)\
>         s->field = val;\
>     else\
>         s->tile[s->curtileno].field = val;
> 
> #define GETFIELD(field)\
>     (s->curtileno == -1 ? s->field : s->tile[s->curtileno].field)
> 
> #define SETFIELDC(field, val)\
>     if (s->curtileno == -1)\
>         s->field[compno] = val;\
>     else\
>         s->tile[s->curtileno].comp[compno].field = val;
> 
> #define GETFIELDC(field)\
>     (s->curtileno == -1 ? s->field[compno] : s->tile[s->curtileno].comp[compno].field)
> 
> static int get_cox(J2kDecoderContext *s, int compno)
> {
>     SETFIELDC(nreslevels, bytestream_get_byte(&s->buf) + 1); ///< num of resolution levels - 1
>     SETFIELDC(xcb, bytestream_get_byte(&s->buf) + 2); ///< cblk width
>     SETFIELDC(ycb, bytestream_get_byte(&s->buf) + 2); ///< cblk height

please remove these macros and write clean C code
and just duplicating the code of the macro everwhere is not clean C code!

[...]
> static int get_qcc(J2kDecoderContext *s, int n)
> {
>     int compno = bytestream_get_byte(&s->buf);
>     if (s->curtileno == -1)
>         s->properties[compno] |= HAD_QCC;
>     else
>         s->tile[s->curtileno].comp[compno].properties |= HAD_QCC;
>     return get_qcx(s, n-1, compno);
> }
> 
> static uint8_t get_sot(J2kDecoderContext *s)
> {
>     s->curtileno = bytestream_get_be16(&s->buf); ///< Isot

this is exploitable
also what is the doxygen comment doing there?

[...]
> static int init_tile(J2kDecoderContext *s, int tileno)
> {
>     int compno, reslevelno, bandno, p, q;
>     J2kTile *tile = s->tile + tileno;
> 
>     p = tileno % s->numXtiles;
>     q = tileno / s->numXtiles;
> 
>     if (tile->comp == NULL)
>         return -1;
>     for (compno = 0; compno < s->ncomponents; compno++){
>         J2kComponent *comp = tile->comp + compno;
>         int gbandno = 0; ///< global bandno
> 
>         comp->x0 = FFMAX(p * s->XTsiz + s->XT0siz, s->X0siz);
>         comp->x1 = FFMIN((p+1)*s->XTsiz + s->XT0siz, s->Xsiz);
>         comp->y0 = FFMAX(q * s->YTsiz + s->YT0siz, s->Y0siz);
>         comp->y1 = FFMIN((q+1)*s->YTsiz + s->YT0siz, s->Ysiz);
> 
>         comp->data = av_malloc((comp->y1 - comp->y0) * (comp->x1 -comp->x0) * sizeof(int));
>         if (comp->data == NULL)
>             return -1;
>         comp->reslevel = av_malloc(comp->nreslevels * sizeof(J2kResLevel));
>         if (comp->reslevel == NULL)
>             return -1;
>         for (reslevelno = 0; reslevelno < comp->nreslevels; reslevelno++){
>             int n = comp->nreslevels - reslevelno;
>             J2kResLevel *reslevel = comp->reslevel + reslevelno;
> 
>             reslevel->x0 = ff_j2k_ceildivpow2(comp->x0, comp->nreslevels - reslevelno - 1);
>             reslevel->x1 = ff_j2k_ceildivpow2(comp->x1, comp->nreslevels - reslevelno - 1);
>             reslevel->y0 = ff_j2k_ceildivpow2(comp->y0, comp->nreslevels - reslevelno - 1);
>             reslevel->y1 = ff_j2k_ceildivpow2(comp->y1, comp->nreslevels - reslevelno - 1);
> 
>             if (reslevelno == 0)
>                 reslevel->nbands = 1;
>             else
>                 reslevel->nbands = 3;
> 
>             if (reslevel->x1 == reslevel->x0)
>                 reslevel->nprecw = 0;
>             else
>                 reslevel->nprecw = ff_j2k_ceildivpow2(reslevel->x1, s->ppx) - reslevel->x0 / (1<<s->ppx);
> 
>             if (reslevel->y1 == reslevel->y0)
>                 reslevel->nprech = 0;
>             else
>                 reslevel->nprech = ff_j2k_ceildivpow2(reslevel->y1, s->ppy) - reslevel->y0 / (1<<s->ppy);
> 
>             reslevel->band = av_malloc(reslevel->nbands * sizeof(J2kBand));
>             if (reslevel->band == NULL)
>                 return -1;
>             for (bandno = 0; bandno < reslevel->nbands; bandno++, gbandno++){
>                 J2kBand *band = reslevel->band + bandno;
>                 int cblkno, precx, precy, precno;
>                 int x0, y0, x1, y1;
>                 int xi0, yi0, xi1, yi1;
>                 int cblkperprecw, cblkperprech;
> 
>                 if (comp->qstyle != J2K_QSTY_NONE){
>                     const static uint8_t lut_gain[2][4] = {{0, 0, 0, 0}, {0, 1, 1, 2}};
>                     int numbps;
> 
>                     numbps = s->cbps[compno] + lut_gain[comp->transform][bandno + reslevelno>0];
>                     band->stepsize = SHL(2048 + comp->mant[gbandno], 2 + numbps - comp->expn[gbandno]);
>                 }
>                 else
>                     band->stepsize = 1 << 13;
> 
>                 if (reslevelno == 0){  // the same everywhere
>                     band->cblkw = 1 << FFMIN(comp->xcb, s->ppx-1);
>                     band->cblkh = 1 << FFMIN(comp->ycb, s->ppy-1);
> 
>                     band->x0 = ff_j2k_ceildivpow2(comp->x0, n-1);
>                     band->x1 = ff_j2k_ceildivpow2(comp->x1, n-1);
>                     band->y0 = ff_j2k_ceildivpow2(comp->y0, n-1);
>                     band->y1 = ff_j2k_ceildivpow2(comp->y1, n-1);
>                 }
>                 else{
>                     band->cblkw = 1 << FFMIN(comp->xcb, s->ppx);
>                     band->cblkh = 1 << FFMIN(comp->ycb, s->ppy);
> 
>                     band->x0 = ff_j2k_ceildivpow2(comp->x0 - (1 << (n-1)) * ((bandno+1)&1), n);
>                     band->x1 = ff_j2k_ceildivpow2(comp->x1 - (1 << (n-1)) * ((bandno+1)&1), n);
>                     band->y0 = ff_j2k_ceildivpow2(comp->y0 - (1 << (n-1)) * (((bandno+1)&2)>>1), n);
>                     band->y1 = ff_j2k_ceildivpow2(comp->y1 - (1 << (n-1)) * (((bandno+1)&2)>>1), n);
>                 }
>                 band->cblknx = ff_j2k_ceildiv(band->x1, band->cblkw) - band->x0 / band->cblkw;
>                 band->cblkny = ff_j2k_ceildiv(band->y1, band->cblkh) - band->y0 / band->cblkh;
> 
>                 band->cblk = av_malloc(band->cblknx * band->cblkny * sizeof(J2kCblk));
>                 if (band->cblk == NULL)
>                     return -1;
>                 band->prec = av_malloc(reslevel->nprecw * reslevel->nprech * sizeof(J2kPrec));
>                 if (band->prec == NULL)
>                     return -1;
> 
>                 for (cblkno = 0; cblkno < band->cblknx * band->cblkny; cblkno++){
>                     J2kCblk *cblk = band->cblk + cblkno;
>                     cblk->zero = 0;
>                     cblk->lblock = 3;
>                     cblk->length = 0;
>                     cblk->lengthinc = 0;
>                     cblk->npassess = 0;
>                     cblk->included = 0;
>                 }
> 
>                 y0 = band->y0;
>                 y1 = (band->y0 + (1<<s->ppy))/(1<<s->ppy)*(1<<s->ppy) - band->y0;
>                 yi0 = 0;
>                 yi1 = ff_j2k_ceildiv(y1 - y0, 1<<comp->ycb) * (1<<comp->ycb);
>                 yi1 = FFMIN(yi1, band->cblkny);
>                 cblkperprech = 1<<(s->ppy - comp->ycb);
>                 for (precy = 0, precno = 0; precy < reslevel->nprech; precy++){
>                     for (precx = 0; precx < reslevel->nprecw; precx++, precno++){
>                         band->prec[precno].yi0 = yi0;
>                         band->prec[precno].yi1 = yi1;
>                     }
>                     yi1 += cblkperprech;
>                     yi0 = yi1 - cblkperprech;
>                     yi1 = FFMIN(yi1, band->cblkny);
>                 }
>                 x0 = band->x0;
>                 x1 = (band->x0 + (1<<s->ppx))/(1<<s->ppx)*(1<<s->ppx) - band->x0;
>                 xi0 = 0;
>                 xi1 = ff_j2k_ceildiv(x1 - x0, 1<<comp->xcb) * (1<<comp->xcb);
>                 xi1 = FFMIN(xi1, band->cblknx);
> 
>                 cblkperprecw = 1<<(s->ppx - comp->xcb);
>                 for (precx = 0, precno = 0; precx < reslevel->nprecw; precx++){
>                     for (precy = 0; precy < reslevel->nprech; precy++, precno = 0){
>                         J2kPrec *prec = band->prec + precno;
>                         prec->xi0 = xi0;
>                         prec->xi1 = xi1;
>                         prec->cblkincl = ff_j2k_tag_tree_init(prec->xi1 - prec->xi0,
>                                                               prec->yi1 - prec->yi0);
>                         prec->zerobits = ff_j2k_tag_tree_init(prec->xi1 - prec->xi0,
>                                                               prec->yi1 - prec->yi0);
>                         if (!prec->cblkincl || !prec->zerobits)
>                             return -1;
> 
>                     }
>                     xi1 += cblkperprecw;
>                     xi0 = xi1 - cblkperprecw;
>                     xi1 = FFMIN(xi1, band->cblknx);
>                 }
>             }
>         }
>     }
>     return 0;
> }

duplicate of init_tiles() from the encoder

[...]
>     for (bandno = 0; bandno < rlevel->nbands; bandno++){
>         J2kBand *band = rlevel->band + bandno;
>         J2kPrec *prec = band->prec + precno;
>         int pos = 0;
> 
>         for (cblkny = prec->yi0; cblkny < prec->yi1; cblkny++)
>             for(cblknx = prec->xi0, cblkno = cblkny * band->cblknx + cblknx; cblknx < prec->xi1; cblknx++, cblkno++, pos++){
>                 J2kCblk *cblk = band->cblk + cblkno;
>                 int incl, newpassess, llen;
> 
>                 if (cblk->included)
>                     incl = get_bits(s, 1);
>                 else{
>                     incl = tag_tree_decode(s, prec->cblkincl + pos, layno+1) == layno;
>                 }
>                 if (!incl){
>                     continue;
>                 }
>                 if (!cblk->included){
>                     cblk->included++;
>                     cblk->nonzerobits = bandbps[bandno] - tag_tree_decode(s, prec->zerobits + pos, 100);
>                 }
>                 newpassess = getnpassess(s);
>                 llen = getlblockinc(s);
>                 cblk->lblock += llen;
>                 cblk->lengthinc = get_bits(s, av_log2(newpassess) + cblk->lblock);
>                 cblk->npassess += newpassess;
>             }
>     }
>     j2k_flush(s);
>     for (bandno = 0; bandno < rlevel->nbands; bandno++){
>         J2kBand *band = rlevel->band + bandno;
>         int cblknw, yi;
>         cblknw = band->prec[precno].xi1 - band->prec[precno].xi0;
>         for (yi = band->prec[precno].yi0; yi < band->prec[precno].yi1; yi++){
>             int xi;
>             for (xi = band->prec[precno].xi0; xi < band->prec[precno].xi1; xi++){
>                 J2kCblk *cblk = band->cblk + yi * cblknw + xi;
>                 bytestream_get_buffer(&s->buf, cblk->data, cblk->lengthinc);

this again looks exploitable, where is the code which checks that the buffer
is large enough?

[...]
> static void sr_1d97(float *p, int i0, int i1)
> {
>     int i;
> 
>     if (i1 == i0 + 1)
>         return;
> 
>     for (i = 1; i <= 4; i++){
>         p[i0 - i] = p[i0 + i];
>         p[i1 + i - 1] = p[i1 - i - 1];
>     }
> 
>     for (i = i0/2 - 1; i < i1/2 + 2; i++)
>         p[2*i] *= 1.230174;
>     for (i = i0/2 - 2; i < i1/2 + 2; i++)
>         p[2*i+1] *= 1.625786;

if theres a scaling pass and i think there is then this can be merged
into it

[...]
> 
> static int dwt_decode53(J2kDecoderContext *s, J2kComponent *comp, int nreslevels)
> {
>     int lev = nreslevels, i,
>         *t = comp->data, w = comp->x1 - comp->x0;
>     int *ppv = av_malloc((comp->reslevel[lev-1].y1 + 4)*sizeof(int)), *pv = ppv+2;
>     int *ppu = av_malloc((comp->reslevel[lev-1].x1 + 4)*sizeof(int)), *pu = ppu+2;
> 
>     if (!ppv || !ppu)
>         return -1;
> 
>     for (i = 1; i < lev; i++){
>         int u0 = comp->reslevel[i].x0,
>             u1 = comp->reslevel[i].x1,
>             v0 = comp->reslevel[i].y0,
>             v1 = comp->reslevel[i].y1,
>             u = u0, v = v0;
> 
>         /// HOR_SD
>         while (v < v1){
>             int i, j;
>             /// copy with interleaving
>             for (i = u0 + (u0 & 1), j = 0; i < u1; i+=2, j++){
>                 pu[i] = t[w*(v-v0) + j];
>             }
>             for (i = u0 + 1 - (u0 % 2); i < u1; i+=2, j++){
>                 pu[i] = t[w*(v-v0) + j];
>             }
> 
>             sr_1d53(pu, u0, u1);
> 
>             for (i = u0; i < u1; i++)
>                 t[w*(v-v0) + i-u0] = pu[i];
> 
>             v++;
>         }
>         /// VER_SD
>         while (u < u1){
>             int i, j;
>             /// copy with interleaving
>             for (i = v0 + (v0 & 1), j = 0; i < v1; i+=2, j++){
>                 pv[i] = t[w*j + u-u0];
>             }
>             for (i = v0 + 1 - (v0 % 2); i < v1; i+=2, j++){
>                 pv[i] = t[w*j + u-u0];
>             }
> 
>             sr_1d53(pv, v0, v1);
> 
>             for (i = v0; i < v1; i++)
>                 t[w*(i-v0) + u-u0] = pv[i];
> 
>             u++;
>         }

same comment as for the encoder:
* please avoid doing malloc/free in the dwt code
* accessing elements in vertical direction first is very slow and
  problematic in terms of cache behavior
* you perform 2 copy passes here and above in the code for the vertical DWT
  these arent needed, try to reduce this please
* also the (i)dwt can be moved into its own file

>     }
>     av_free(ppv);
>     av_free(ppu);
>     return 0;
> }
> 
> static int dwt_decode97(J2kDecoderContext *s, J2kComponent *comp, int nreslevels)
> {
>     int lev = nreslevels, i,
>         *t = comp->data, w = comp->x1 - comp->x0;
>     float *ppv = av_malloc((comp->reslevel[lev-1].y1 + 8)*sizeof(float)), *pv = ppv+4;
>     float *ppu = av_malloc((comp->reslevel[lev-1].x1 + 8)*sizeof(float)), *pu = ppu+4;
> 
>     if (!ppv || !ppu)
>         return -1;
> 
>     for (i = 1; i < lev; i++){
>         int u0 = comp->reslevel[i].x0,
>             u1 = comp->reslevel[i].x1,
>             v0 = comp->reslevel[i].y0,
>             v1 = comp->reslevel[i].y1,
>             u = u0, v = v0;
> 
>         /// HOR_SD
>         while (v < v1){
>             int i, j;
>             /// copy with interleaving
>             for (i = u0 + (u0 & 1), j = 0; i < u1; i+=2, j++){
>                 pu[i] = t[w*(v-v0) + j];
>             }
>             for (i = u0 + 1 - (u0 % 2); i < u1; i+=2, j++){
>                 pu[i] = t[w*(v-v0) + j];
>             }
> 
>             sr_1d97(pu, u0, u1);
> 
>             for (i = u0; i < u1; i++)
>                 t[w*(v-v0) + i-u0] = pu[i];
> 
>             v++;
>         }
>         /// VER_SD
>         while (u < u1){
>             int i, j;
>             /// copy with interleaving
>             for (i = v0 + (v0 & 1), j = 0; i < v1; i+=2, j++){
>                 pv[i] = t[w*j + u-u0];
>             }
>             for (i = v0 + 1 - (v0 % 2); i < v1; i+=2, j++){
>                 pv[i] = t[w*j + u-u0];
>             }
> 
>             sr_1d97(pv, v0, v1);
> 
>             for (i = v0; i < v1; i++)
>                 t[w*(i-v0) + u-u0] = pv[i];
> 
>             u++;
>         }
>     }
>     av_free(ppv);
>     av_free(ppu);
>     return 0;
> }

this is a near duplicate of dwt_decode53()

> 
> static void mct_decode(J2kDecoderContext *s, J2kTile *tile)
> {
>     int i, *src[3], i0, i1, i2;
> 
>     for (i = 0; i < 3; i++)
>         src[i] = tile->comp[i].data;
> 
>     if (tile->comp[0].transform == J2K_DWT97){

>         for (i = 0; i < (tile->comp[0].y1 - tile->comp[0].y0) * (tile->comp[0].x1 - tile->comp[0].x0); i++){
>             i0 = *src[0] + (*src[2] * 46802 >> 16);
>             i1 = *src[0] - (*src[1] * 22553 + *src[2] * 46802 >> 16);
>             i2 = *src[0] + (116130 * *src[1] >> 16);
>             *src[0]++ = i0;
>             *src[1]++ = i1;
>             *src[2]++ = i2;
>         }

standard yuv to rgb transform, this does not belong into a decoder, 
directly export the yuv data please

[...]
> static int decode_frame(AVCodecContext *avctx,
>                         void *data, int *data_size,
>                         uint8_t *buf, int buf_size)
> {
>     J2kDecoderContext *s = avctx->priv_data;
>     AVFrame *picture = data;
>     int tileno;
> 
>     s->avctx = avctx;
>     av_log(s->avctx, AV_LOG_DEBUG, "start\n");
> 
>     /// init
>     s->buf = s->buf_start = buf;
>     s->buf_end = buf + buf_size;
>     s->curtileno = -1;
> 
>     avcodec_get_frame_defaults((AVFrame*)&s->picture);
>     avctx->coded_frame = (AVFrame*)&s->picture;
> 
>     s->ppx = s->ppy = 15;
> 
>     ff_j2k_init_tier1_luts();

as with the encoder, do not malloc, init and free all "const" tables for every
frame

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

I count him braver who overcomes his desires than him who conquers his
enemies for the hardest victory is over self. -- Aristotle
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-soc/attachments/20070819/b9aa83ac/attachment.pgp>