[FFmpeg-devel] [PATCH][RFC] Indeo3 replacement

Michael Niedermayer michaelni
Sun Jul 26 18:06:04 CEST 2009


On Sat, Jul 25, 2009 at 07:24:35PM +0200, Maxim wrote:
> Hi crews,
> 
> as already volunteered I'd like to maintain indeo3 decoder in the
> future. Unfortunately the ffmpeg's decoder is unmaintable (Mike, sorry!)
> because nobody understands how it works. Therefore I want to submit a
> patch proposing a new source for this algorithm. Below its advantages in
> short:
> 
> - deobfuscated algorithm
> - heavily commented source
> - decoding tables will be generated dynamically making "indeo3data.h"
> tiny compared to the existing one!
> - one huge code blob was splitted into several functions
> 
> Disadvantages:
> 
> - it was written fast therefore it contains several simplifications can
> be programmed safer
> - may be further splitted
> - it was not tested on PPC yet
> 
> There is already a documentation for this algorithm here: http://wiki.multimedia.cx/index.php?title=Indeo_3
> 
> Attached files are the sources itself because the new sources are VERY different from the existing ones!
> 
> Plz be merciful to me and help me out to make this stuff good-looking!
> Waiting for reviews...
[...]
> //@{
> //! vq table selector codes
> #define DELTA_DYAD      0
> #define DELTA_QUAD      1
> #define RLE_ESC_F9      2
> #define RLE_ESC_FA      3
> #define RLE_ESC_FB      4
> #define RLE_ESC_FC      5
> #define RLE_ESC_FD      6
> #define RLE_ESC_FE      7
> #define RLE_ESC_FF      8
> #define RLE_FORBIDDEN   9

could be an enum


> //@}
> 
> 
> //@{
> //! some constants for parsing frame flags
> #define BS_8BIT_PEL     1<<1
> #define BS_KEYFRAME     1<<2
> #define BS_MV_Y_HALF    1<<4
> #define BS_MV_X_HALF    1<<5
> #define BS_BUFFER       9
> //@}
> 
> 
> typedef struct Plane {
>     uint8_t     *buffers[2];
>     uint8_t     *pixels[2]; ///< pointer to the actual pixel data of the buffers above
>     uint32_t    width;
>     uint32_t    height;
>     uint32_t    pitch;
> } Plane;
> 
> #define CELL_STACK_MAX  20
> 
> typedef struct Cell {
>     int16_t         xpos;       ///< cell coordinates in 4x4 blocks
>     int16_t         ypos;
>     int16_t         width;      ///< cell width  in 4x4 blocks
>     int16_t         height;     ///< cell height in 4x4 blocks
>     uint8_t         tree;       ///< tree id: 0- MC tree, 1 - VQ tree
>     const int8_t    *mv_ptr;    ///< ptr to the motion vector if any
> } Cell;
> 
> typedef struct Indeo3DecodeContext {
>     AVCodecContext *avctx;
>     AVFrame     frame;
> 
>     int16_t         width, height;

vertical align


>     uint32_t        frame_num;
>     uint16_t        frame_flags;
>     uint32_t        data_size;
>     uint8_t         cb_offset;
>     uint8_t         resync;
>     const uint8_t   *y_data_ptr;
>     const uint8_t   *v_data_ptr;
>     const uint8_t   *u_data_ptr;
>     const uint8_t   *alt_quant;

i thik some of these could benefit from documentation


[...]
> static uint8_t requant_tab[8][128];
> 
> /**
>  *  Build the static requantization table.
>  *  This table is used to remap pixel values according to a specific
>  *  quant index and thus avoid overflows while adding deltas.
>  */
> static av_cold void build_requant_tab(void)
> {
>     int i;
> 
>     for (i = 0; i < 128; i++) {
>         requant_tab[0][i] = (i + 1) - ((i + 1) % 2);
>         requant_tab[1][i] = (i + 2) - ((i + 1) % 3);
>         requant_tab[2][i] = (i + 2) - ((i + 2) % 4);
>         requant_tab[3][i] = (i + 1) - ((i - 3) % 5);
>         requant_tab[4][i] = (i + 1) - ((i - 3) % 6);
>         requant_tab[5][i] = (i + 4) - ((i + 3) % 7);
>         requant_tab[6][i] = (i + 4) - ((i + 4) % 8);
>         requant_tab[7][i] = (i + 5) - ((i + 4) % 9);
>     }

does this look less ugly if written as
(i + C)/D*D + E
?


[...]
> /* FIXME: I know we already have a bitreader in ffmpeg */
> /* it should be adapted to read ahead only one byte */
> /* otherwise it won't work for indeo3 !!! */

elaborate please


[...]
> /**
>  *  Copy pixels of the cell(x + mv_x, y + mv_y) from the previous frame into
>  *  the cell(x, y) in the current frame.
>  */
> static void copy_cell(Indeo3DecodeContext *ctx, Plane *plane, Cell *cell)
> {
>     int     y, buf_switch, mv_x, mv_y, offset;
>     uint8_t *src, *dst;
> 

>     /* use BS_BUFFER flag for buffer switching */
>     buf_switch = (ctx->frame_flags >> BS_BUFFER) & 1;

this should not be done per cell as it eats cpu cycles ...


> 
>     /* setup output and reference pointers */
>     dst = &plane->pixels[buf_switch][(cell->ypos << 2) * plane->pitch + (cell->xpos << 2)];
>     /* reference block = prev_frame(cell_xpos + mv_x, cell_ypos + mv_y) */
>     mv_y = cell->mv_ptr[0];
>     mv_x = cell->mv_ptr[1];
>     offset = ((cell->ypos << 2) + mv_y) * plane->pitch + (cell->xpos << 2) + mv_x;
>     src = &plane->pixels[buf_switch ^ 1][offset];
> 
>     for (y = cell->height << 2; y > 0; src += plane->pitch, dst += plane->pitch, y--)
>         memcpy(dst, src, cell->width << 2);
> }

also, cant the dsputil block copy code be used?


> 
> 
> #define INTERPOLATE_32(dst, src, ref) *(dst) = ((*(src) + *(ref)) >> 1) & 0x7F7F7F7F
> 
> /**
>  *  Interpolate a line in the 8x8 block
>  *  thisLine = average(thisLine-1, thisLine+1)
>  */
> static void interpolate_64(int32_t *buf, const int32_t row_offset)
> {
>     /* average 8 pels in the softSIMD fashion and  */
>     /* make sure that values are in the 7bit range */
>     buf[0] = ((buf[-row_offset] + buf[row_offset]) >> 1) & 0x7F7F7F7F;
>     buf++;
>     buf[0] = ((buf[-row_offset] + buf[row_offset]) >> 1) & 0x7F7F7F7F;
> }

again, cant the dsp util 1/2 pel MC code be used?


> 
> 
> /**
>  *  Copy n lines filled with 32bit pixel values
>  */
> static void copy_32(int32_t *dst, int32_t *src, int n, int row_offset)
> {
>     for (; n > 0; dst += row_offset, src += row_offset, n--)
>         *dst = *src;
> }

and again, dsputil ...


[...]
> /**
>  *  Decode a vector-quantized cell.
>  *  It consists of several routines, each of which handles one or more "modes"
>  *  with which a cell can be encoded.
>  *
>  *  @param ctx      [in] pointer to the decoder context
>  *  @param avctx    [in] ptr to the AVCodecContext
>  *  @param plane    [in] pointer to the plane descriptor
>  *  @param cell     [in] pointer to the cell  descriptor
>  *  @param data_ptr [in] pointer to the compressed data
>  *  @param last_ptr [out] position in the input buffer after decoding will be reported here
>  *  @return         result code: 0 = OK, -1 = error
>  */
> static int decode_cell(Indeo3DecodeContext *ctx, AVCodecContext *avctx, Plane *plane, Cell *cell,
>                        const uint8_t *data_ptr, const uint8_t **last_ptr)
> {
>     int     x, y, buf_switch, mv_x, mv_y, mode, vq_index, prim_indx, second_indx, rle_blocks;
>     int     row_offset, blk_row_offset, line, num_lines, is_first_row, is_top_of_cell, skip_flag;
>     uint8_t code, *block, *ref_block, *prim_sel, *second_sel;
>     int32_t *prim_delta, *second_delta, *delta_tab, *src32, *ref32, *delta_lo, *delta_hi;
>     int32_t *block32, ref_hi, ref_lo;
>     int16_t *src16, *ref16;
> 

>     /* use BS_BUFFER flag for buffer switching */
>     buf_switch = (ctx->frame_flags >> BS_BUFFER) & 1;

duplicate


> 
>     /* get coding mode and VQ table index from the VQ descriptor byte */
>     code     = *data_ptr++;
>     mode     = code >> 4;
>     vq_index = code & 0xF;
> 
>     /* setup output and reference pointers */
>     block = &plane->pixels[buf_switch][(cell->ypos << 2) * plane->pitch + (cell->xpos << 2)];
>     if (!cell->mv_ptr) {
>         /* use previous line as reference for INTRA cells */
>         ref_block = &block[-plane->pitch];
>     } else {
>         if (mode >= 10) {
>             /* for mode 10 and 11 INTER first copy the predicted cell into the current one */
>             /* so we don't need to do data copying for each RLE code later */
>             copy_cell(ctx, plane, cell);
>         } else {
>             mv_y = cell->mv_ptr[0];
>             mv_x = cell->mv_ptr[1];
>             /* reference block = prev_frame(cell_xpos + mv_x, cell_ypos + mv_y) */
>             ref_block = &plane->pixels[buf_switch ^ 1][((cell->ypos << 2) + mv_y) * plane->pitch + (cell->xpos << 2) + mv_x];
>         }
>     }
> 
>     /* select VQ tables as follows: */
>     /* modes 0 and 3 use only the primary table for all lines in a block */
>     /* while modes 1 and 4 switch between primary and secondary tables on alternate lines */
>     if (mode == 1 || mode == 4) {
>         code        = ctx->alt_quant[vq_index];
>         prim_indx   = (code >> 4)  + ctx->cb_offset;
>         second_indx = (code & 0xF) + ctx->cb_offset;
> 

>         assert(prim_indx <= 23 && second_indx <= 23);

just to make sure, this cannot be false with ANY input?


> 
>         prim_delta   = &delta_tabs   [prim_indx]  [0];
>         prim_sel     = &selector_tabs[prim_indx]  [0];
>         second_delta = &delta_tabs   [second_indx][0];
>         second_sel   = &selector_tabs[second_indx][0];
>     } else {
>         vq_index += ctx->cb_offset;
>         assert(vq_index <= 23);
> 
>         prim_delta   = &delta_tabs   [vq_index][0];
>         prim_sel     = &selector_tabs[vq_index][0];
>         second_delta = prim_delta;
>         second_sel   = prim_sel;
>     }
> 
>     /* requantize the prediction if VQ index of this cell differs from VQ index */
>     /* of the predicted cell in order to avoid overflows. */
>     /* FIXME: if (vq_index >= 8 && (mode == 0 || mode == 3 || mode == 10) [win32] */
>     if (vq_index >= 8) {
>         for (x = 0; x < cell->width << 2; x++)
>             ref_block[x] = requant_tab[vq_index & 7][ref_block[x]];
>     }
> 
>     /* convert the pixel offset into 4x4 block one */
>     row_offset     = plane->pitch >> 2;
>     blk_row_offset = (plane->pitch - cell->width) << 2;
> 
>     rle_blocks = 0;  // reset RLE block counter
> 
>     switch (mode) {
>         case 0: /*------------------ MODES 0 & 1 (4x4 block processing) --------------------*/
>         case 1:
>             skip_flag = 0;
> 
>             for (y = 0; y < cell->height; y++) {
>                 for (x = 0; x < cell->width; x++) {
>                     /* address 4 pixels as one 32bit integer */
>                     ref32 = (int32_t *)ref_block;
>                     src32 = (int32_t *)block;
> 
>                     if (rle_blocks > 0) {
>                         /* apply 0 delta to whole next block */
>                         if (cell->mv_ptr || !skip_flag)
>                             copy_32(src32, ref32, 4, row_offset);
>                         rle_blocks--;
>                     } else {
>                         for (line = 0; line < 4;) {
>                             num_lines = 1;
> 
>                             code = *data_ptr++;
>                             /* select primary VQ table for odd, secondary for even lines */
>                             delta_tab = (line & 1) ? prim_delta : second_delta;
> 
>                             /* switch on code type: dyad, quad or RLE escape codes */
>                             switch ((line & 1) ? prim_sel[code] : second_sel[code]) {
>                                 case DELTA_DYAD: /* apply VQ delta to two dyads (2+2 pixels) using softSIMD */
>                                     if (((line & 1) ? prim_sel[*data_ptr] : second_sel[*data_ptr]) != DELTA_DYAD) {
>                                         av_log(avctx, AV_LOG_ERROR, "Mode 0/1: invalid VQ data!\n");
>                                         return -1;
>                                     }
>                                     ref16 = (int16_t *)ref32;
>                                     src16 = (int16_t *)src32;
>                                     src16[0] = ref16[0] + delta_tab[*data_ptr++];
>                                     src16[1] = ref16[1] + delta_tab[code];
>                                     break;
> 
>                                 case DELTA_QUAD: /* apply VQ delta to 4 pixels at once using softSIMD */
>                                     src32[0] = ref32[0] + delta_tab[code];
>                                     break;
> 
>                                 case RLE_ESC_FF: /* apply null delta to all lines up to the 2nd line */
>                                     //assert(line < 1);
>                                     copy_32(src32, ref32, 2, row_offset);
>                                     num_lines = 2;
>                                     break;
> 
>                                 case RLE_ESC_FE: /* apply null delta to all lines up to the 3rd line */
>                                     //assert(line < 2);
>                                     copy_32(src32, ref32, 3 - line, row_offset);
>                                     num_lines = 3 - line;
>                                     break;
> 
>                                 case RLE_ESC_FC:
>                                     /* apply null delta to all remaining lines of this block
>                                        and to whole next block */
>                                     skip_flag  = 0;
>                                     rle_blocks = 1;
> 
>                                 case RLE_ESC_FD: /* apply null delta to all remaining lines of this block */
>                                     copy_32(src32, ref32, 4 - line, row_offset);
>                                     num_lines = 4 - line; /* go to process next block */
>                                     break;
> 
>                                 case RLE_ESC_FB: /* apply null delta to n blocks/skip n blocks */
>                                     /* get next byte after the escape code 0xFB */
>                                     code = *data_ptr++;
>                                     rle_blocks = (code & 0x1F) - 1; /* set the block counter */
>                                     if (code >= 64 || rle_blocks < 0) {
>                                         av_log(avctx, AV_LOG_ERROR, "Mode 0/1: RLE-FB invalid counter: %d!\n", code);
>                                         return -1;
>                                     }
>                                     skip_flag = code & 0x20;
>                                     if (cell->mv_ptr || !skip_flag)
>                                         copy_32(src32, ref32, 4 - line, row_offset);
>                                     num_lines = 4 - line; /* go to process next block */
>                                     break;
> 
>                                 case RLE_ESC_F9: /* skip this block and the next one */
>                                     skip_flag  = 1;
>                                     rle_blocks = 1;
> 
>                                 case RLE_ESC_FA: /* skip this block (INTRA) or copy the reference block (INTER) */
>                                     assert(!line);
>                                     if (cell->mv_ptr)
>                                         copy_32(src32, ref32, 4, row_offset);
>                                     num_lines = 4;
>                                     break;
> 
>                                 default:
>                                     av_log(avctx, AV_LOG_ERROR, "Mode 0/1: unsupported RLE code: %d!\n",
>                                           (line & 1) ? prim_sel[code] : second_sel[code]);
>                                     return(-1);
>                             }// switch code
> 
>                             /* move forward num_lines */
>                             line  += num_lines;
>                             ref32 += row_offset * num_lines;
>                             src32 += row_offset * num_lines;
>                         }// for line
>                     }// if/else
> 
>                     /* move to next block horizontal */
>                     ref_block += 4;
>                     block     += 4;
>                 }// for x
> 
>                 /* move to next line of blocks */
>                 ref_block += blk_row_offset;
>                 block     += blk_row_offset;
>             }// for y
>             break;
> 
>         case 3: /*------------------ MODES 3 & 4 (4x8 block processing) --------------------*/
>         case 4:
>             if (cell->mv_ptr) {
>                 av_log(avctx, AV_LOG_ERROR, "Trying to use Mode 3/4 for an INTER cell!\n");
>                 return -1;
>             }
>             block32        = (int32_t *)block;
>             blk_row_offset = (row_offset << 3) - cell->width;
>             skip_flag      = 0;
> 
>             for (y = 0, is_first_row = 1; y < cell->height; y += 2) {
>                 for (x = 0; x < cell->width; x++) {
>                     /* address 4 pixels as one 32bit integer */
>                     ref32 = &block32[-row_offset];
>                     src32 = &block32[row_offset];
> 
>                     if (rle_blocks > 0) {
>                         /* apply 0 delta to whole next block */
>                         if (!skip_flag)
>                             copy_32(block32, ref32, 8, row_offset);
>                         rle_blocks--;
>                     } else {
>                         for(line = 0; line < 4;) {
>                             num_lines      = 1;
>                             is_top_of_cell = is_first_row & (!line);
> 
>                             code = *data_ptr++;
>                             /* select primary VQ table for odd, secondary for even lines */
>                             delta_tab = (line & 1) ? prim_delta : second_delta;
> 
>                             /* switch on code type: dyad, quad or RLE escape codes */
>                             switch ((line & 1) ? prim_sel[code] : second_sel[code]) {
>                                 case DELTA_DYAD: /* apply VQ delta to two dyads (2+2 pixels) using softSIMD */
>                                     if (((line & 1) ? prim_sel[*data_ptr] : second_sel[*data_ptr]) != DELTA_DYAD) {
>                                         av_log(avctx, AV_LOG_ERROR, "Mode 3/4: invalid VQ data!\n");
>                                         return -1;
>                                     }
>                                     ref16 = (int16_t *)ref32;
>                                     src16 = (int16_t *)src32;
>                                     src16[0] = ref16[0] + delta_tab[*data_ptr++];
>                                     src16[1] = ref16[1] + delta_tab[code];
> 
>                                     /* odd lines are not coded but rather interpolated/replicated */
>                                     /* first line of the cell on the top of image? - replicate */
>                                     /* otherwise - interpolate */
>                                     if (is_top_of_cell && !cell->ypos) {
>                                         src32[-row_offset] = src32[0];
>                                     } else
>                                         INTERPOLATE_32(src32 -row_offset, src32, ref32);
>                                     break;
> 
>                                 case DELTA_QUAD: /* apply VQ delta to 4 pixels at once using softSIMD */
>                                     src32[0] = ref32[0] + delta_tab[code];
>                                     if (is_top_of_cell && !cell->ypos) {
>                                         src32[-row_offset] = src32[0];
>                                     } else
>                                         INTERPOLATE_32(src32 -row_offset, src32, ref32);
>                                     break;
> 
>                                 case RLE_ESC_FF: /* apply null delta to all lines up to the 2nd line */
>                                     assert(line < 1);
>                                     copy_32(src32 - row_offset, ref32, 4, row_offset);
>                                     num_lines = 2;
>                                     break;
> 
>                                 case RLE_ESC_FE: /* apply null delta to all lines up to the 3rd line */
>                                     assert(line < 2);
>                                     copy_32(src32 - row_offset, ref32, (3 - line) << 1, row_offset);
>                                     num_lines = 3 - line;
>                                     break;
> 
>                                 case RLE_ESC_FC:
>                                     /* apply null delta to all remaining lines of this block
>                                     and to whole next block */
>                                     skip_flag  = 0;
>                                     rle_blocks = 1;
> 
>                                 case RLE_ESC_FD: /* apply null delta to all remaining lines of this block */
>                                     copy_32(src32 - row_offset, ref32, (4 - line) << 1, row_offset);
>                                     num_lines = 4 - line; /* go to process next block */
>                                     break;
> 
>                                 case RLE_ESC_FB: /* apply null delta to n blocks/skip n blocks */
>                                     /* get next byte after the escape code 0xFB */
>                                     code = *data_ptr++;
>                                     rle_blocks = (code & 0x1F) - 1; /* set the block counter */
>                                     if (code >= 64 || rle_blocks < 0) {
>                                         av_log(avctx, AV_LOG_ERROR, "Mode 3/4: RLE-FB invalid counter: %d!\n", code);
>                                         return -1;
>                                     }
>                                     skip_flag = code & 0x20;
>                                     if (!skip_flag)
>                                         copy_32(src32 - row_offset, ref32, (4 - line) << 1, row_offset);
>                                     num_lines = 4 - line; /* go to process next block */
>                                     break;
> 
>                                 case RLE_ESC_F9: /* skip this block and the next one */
>                                     skip_flag  = 1;
>                                     rle_blocks = 1;
> 
>                                 case RLE_ESC_FA: /* skip this block */
>                                     assert(!line);
>                                     num_lines = 4;
>                                     break;
> 
>                                 default:
>                                     av_log(avctx, AV_LOG_ERROR, "Mode 3/4: unsupported RLE code: %d!\n",
>                                            (line & 1) ? prim_sel[code] : second_sel[code]);
>                                     return(-1);
>                             }// switch code
> 
>                             /* move to num_lines (even) */
>                             line  += num_lines;
>                             ref32 += row_offset * (num_lines << 1);
>                             src32 += row_offset * (num_lines << 1);
>                         }// for line
>                     }// if/else
> 
>                     /* move to next block horizontal */
>                     block32++;
>                 }// for x
> 
>                 /* move to next line of blocks */
>                 block32      += blk_row_offset;
>                 is_first_row  = 0;
>             }// for y
>             break;

looks very similar to the 4x4 code ...


[...]
>     while (curr_cell >= ctx->cell_stack) {
>         if (!curr_cell->tree) {
>             /* MC tree codes */
>             switch (get_bintree_code(&bitctx)) {
>                 case H_SPLIT:
>                     /* split current cell into two vertical subcells */
>                     prev_cell = curr_cell;
>                     assert(curr_cell < &ctx->cell_stack[CELL_STACK_MAX]);
>                     DUPLICATE_CELL(curr_cell);
>                     SPLIT_CELL(prev_cell->height, curr_cell->height);
>                     prev_cell->ypos   += curr_cell->height;
>                     prev_cell->height -= curr_cell->height;
>                     break;
[...]
>         } else {
>             /* VQ tree codes */
>             switch (get_bintree_code(&bitctx)) {
>                 case H_SPLIT:
>                     /* split current cell into two vertical subcells */
>                     prev_cell = curr_cell;
>                     assert(curr_cell < &ctx->cell_stack[CELL_STACK_MAX]);
>                     DUPLICATE_CELL(curr_cell);
>                     SPLIT_CELL(prev_cell->height, curr_cell->height);
>                     prev_cell->ypos   += curr_cell->height;
>                     prev_cell->height -= curr_cell->height;
>                     break;

please get rid of all the duplicated code


[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

No snowflake in an avalanche ever feels responsible. -- Voltaire
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090726/f81d9a43/attachment.pgp>



More information about the ffmpeg-devel mailing list