FFmpeg: libavcodec/ivi_dsp.c Source File

00001 /*
00002  * DSP functions for Indeo Video Interactive codecs (Indeo4 and Indeo5)
00003  *
00004  * Copyright (c) 2009 Maxim Poliakovski
00005  *
00006  * This file is part of FFmpeg.
00007  *
00008  * FFmpeg is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU Lesser General Public
00010  * License as published by the Free Software Foundation; either
00011  * version 2.1 of the License, or (at your option) any later version.
00012  *
00013  * FFmpeg is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016  * Lesser General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU Lesser General Public
00019  * License along with FFmpeg; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00021  */
00022 
00029 #include "avcodec.h"
00030 #include "dsputil.h"
00031 #include "dwt.h"
00032 #include "ivi_common.h"
00033 #include "ivi_dsp.h"
00034 
00035 void ff_ivi_recompose53(const IVIPlaneDesc *plane, uint8_t *dst,
00036                         const int dst_pitch, const int num_bands)
00037 {
00038     int             x, y, indx;
00039     int32_t         p0, p1, p2, p3, tmp0, tmp1, tmp2;
00040     int32_t         b0_1, b0_2, b1_1, b1_2, b1_3, b2_1, b2_2, b2_3, b2_4, b2_5, b2_6;
00041     int32_t         b3_1, b3_2, b3_3, b3_4, b3_5, b3_6, b3_7, b3_8, b3_9;
00042     int32_t         pitch, back_pitch;
00043     const IDWTELEM *b0_ptr, *b1_ptr, *b2_ptr, *b3_ptr;
00044 
00045     /* all bands should have the same pitch */
00046     pitch = plane->bands[0].pitch;
00047 
00048     /* pixels at the position "y-1" will be set to pixels at the "y" for the 1st iteration */
00049     back_pitch = 0;
00050 
00051     /* get pointers to the wavelet bands */
00052     b0_ptr = plane->bands[0].buf;
00053     b1_ptr = plane->bands[1].buf;
00054     b2_ptr = plane->bands[2].buf;
00055     b3_ptr = plane->bands[3].buf;
00056 
00057     for (y = 0; y < plane->height; y += 2) {
00058         /* load storage variables with values */
00059         if (num_bands > 0) {
00060             b0_1 = b0_ptr[0];
00061             b0_2 = b0_ptr[pitch];
00062         }
00063 
00064         if (num_bands > 1) {
00065             b1_1 = b1_ptr[back_pitch];
00066             b1_2 = b1_ptr[0];
00067             b1_3 = b1_1 - b1_2*6 + b1_ptr[pitch];
00068         }
00069 
00070         if (num_bands > 2) {
00071             b2_2 = b2_ptr[0];     // b2[x,  y  ]
00072             b2_3 = b2_2;          // b2[x+1,y  ] = b2[x,y]
00073             b2_5 = b2_ptr[pitch]; // b2[x  ,y+1]
00074             b2_6 = b2_5;          // b2[x+1,y+1] = b2[x,y+1]
00075         }
00076 
00077         if (num_bands > 3) {
00078             b3_2 = b3_ptr[back_pitch]; // b3[x  ,y-1]
00079             b3_3 = b3_2;               // b3[x+1,y-1] = b3[x  ,y-1]
00080             b3_5 = b3_ptr[0];          // b3[x  ,y  ]
00081             b3_6 = b3_5;               // b3[x+1,y  ] = b3[x  ,y  ]
00082             b3_8 = b3_2 - b3_5*6 + b3_ptr[pitch];
00083             b3_9 = b3_8;
00084         }
00085 
00086         for (x = 0, indx = 0; x < plane->width; x+=2, indx++) {
00087             /* some values calculated in the previous iterations can */
00088             /* be reused in the next ones, so do appropriate copying */
00089             b2_1 = b2_2; // b2[x-1,y  ] = b2[x,  y  ]
00090             b2_2 = b2_3; // b2[x  ,y  ] = b2[x+1,y  ]
00091             b2_4 = b2_5; // b2[x-1,y+1] = b2[x  ,y+1]
00092             b2_5 = b2_6; // b2[x  ,y+1] = b2[x+1,y+1]
00093             b3_1 = b3_2; // b3[x-1,y-1] = b3[x  ,y-1]
00094             b3_2 = b3_3; // b3[x  ,y-1] = b3[x+1,y-1]
00095             b3_4 = b3_5; // b3[x-1,y  ] = b3[x  ,y  ]
00096             b3_5 = b3_6; // b3[x  ,y  ] = b3[x+1,y  ]
00097             b3_7 = b3_8; // vert_HPF(x-1)
00098             b3_8 = b3_9; // vert_HPF(x  )
00099 
00100             p0 = p1 = p2 = p3 = 0;
00101 
00102             /* process the LL-band by applying LPF both vertically and horizontally */
00103             if (num_bands > 0) {
00104                 tmp0 = b0_1;
00105                 tmp2 = b0_2;
00106                 b0_1 = b0_ptr[indx+1];
00107                 b0_2 = b0_ptr[pitch+indx+1];
00108                 tmp1 = tmp0 + b0_1;
00109 
00110                 p0 =  tmp0 << 4;
00111                 p1 =  tmp1 << 3;
00112                 p2 = (tmp0 + tmp2) << 3;
00113                 p3 = (tmp1 + tmp2 + b0_2) << 2;
00114             }
00115 
00116             /* process the HL-band by applying HPF vertically and LPF horizontally */
00117             if (num_bands > 1) {
00118                 tmp0 = b1_2;
00119                 tmp1 = b1_1;
00120                 b1_2 = b1_ptr[indx+1];
00121                 b1_1 = b1_ptr[back_pitch+indx+1];
00122 
00123                 tmp2 = tmp1 - tmp0*6 + b1_3;
00124                 b1_3 = b1_1 - b1_2*6 + b1_ptr[pitch+indx+1];
00125 
00126                 p0 += (tmp0 + tmp1) << 3;
00127                 p1 += (tmp0 + tmp1 + b1_1 + b1_2) << 2;
00128                 p2 +=  tmp2 << 2;
00129                 p3 += (tmp2 + b1_3) << 1;
00130             }
00131 
00132             /* process the LH-band by applying LPF vertically and HPF horizontally */
00133             if (num_bands > 2) {
00134                 b2_3 = b2_ptr[indx+1];
00135                 b2_6 = b2_ptr[pitch+indx+1];
00136 
00137                 tmp0 = b2_1 + b2_2;
00138                 tmp1 = b2_1 - b2_2*6 + b2_3;
00139 
00140                 p0 += tmp0 << 3;
00141                 p1 += tmp1 << 2;
00142                 p2 += (tmp0 + b2_4 + b2_5) << 2;
00143                 p3 += (tmp1 + b2_4 - b2_5*6 + b2_6) << 1;
00144             }
00145 
00146             /* process the HH-band by applying HPF both vertically and horizontally */
00147             if (num_bands > 3) {
00148                 b3_6 = b3_ptr[indx+1];            // b3[x+1,y  ]
00149                 b3_3 = b3_ptr[back_pitch+indx+1]; // b3[x+1,y-1]
00150 
00151                 tmp0 = b3_1 + b3_4;
00152                 tmp1 = b3_2 + b3_5;
00153                 tmp2 = b3_3 + b3_6;
00154 
00155                 b3_9 = b3_3 - b3_6*6 + b3_ptr[pitch+indx+1];
00156 
00157                 p0 += (tmp0 + tmp1) << 2;
00158                 p1 += (tmp0 - tmp1*6 + tmp2) << 1;
00159                 p2 += (b3_7 + b3_8) << 1;
00160                 p3 +=  b3_7 - b3_8*6 + b3_9;
00161             }
00162 
00163             /* output four pixels */
00164             dst[x]             = av_clip_uint8((p0 >> 6) + 128);
00165             dst[x+1]           = av_clip_uint8((p1 >> 6) + 128);
00166             dst[dst_pitch+x]   = av_clip_uint8((p2 >> 6) + 128);
00167             dst[dst_pitch+x+1] = av_clip_uint8((p3 >> 6) + 128);
00168         }// for x
00169 
00170         dst += dst_pitch << 1;
00171 
00172         back_pitch = -pitch;
00173 
00174         b0_ptr += pitch;
00175         b1_ptr += pitch;
00176         b2_ptr += pitch;
00177         b3_ptr += pitch;
00178     }
00179 }
00180 
00182 #define IVI_SLANT_BFLY(s1, s2, o1, o2, t) \
00183     t  = s1 - s2;\
00184     o1 = s1 + s2;\
00185     o2 = t;\
00186 
00187 
00188 #define IVI_IREFLECT(s1, s2, o1, o2, t) \
00189     t  = ((s1 + s2*2 + 2) >> 2) + s1;\
00190     o2 = ((s1*2 - s2 + 2) >> 2) - s2;\
00191     o1 = t;\
00192 
00193 
00194 #define IVI_SLANT_PART4(s1, s2, o1, o2, t) \
00195     t  = s2 + ((s1*4  - s2 + 4) >> 3);\
00196     o2 = s1 + ((-s1 - s2*4 + 4) >> 3);\
00197     o1 = t;\
00198 
00199 
00200 #define IVI_INV_SLANT8(s1, s4, s8, s5, s2, s6, s3, s7,\
00201                        d1, d2, d3, d4, d5, d6, d7, d8,\
00202                        t0, t1, t2, t3, t4, t5, t6, t7, t8) {\
00203     IVI_SLANT_PART4(s4, s5, t4, t5, t0);\
00204 \
00205     IVI_SLANT_BFLY(s1, t5, t1, t5, t0); IVI_SLANT_BFLY(s2, s6, t2, t6, t0);\
00206     IVI_SLANT_BFLY(s7, s3, t7, t3, t0); IVI_SLANT_BFLY(t4, s8, t4, t8, t0);\
00207 \
00208     IVI_SLANT_BFLY(t1, t2, t1, t2, t0); IVI_IREFLECT  (t4, t3, t4, t3, t0);\
00209     IVI_SLANT_BFLY(t5, t6, t5, t6, t0); IVI_IREFLECT  (t8, t7, t8, t7, t0);\
00210     IVI_SLANT_BFLY(t1, t4, t1, t4, t0); IVI_SLANT_BFLY(t2, t3, t2, t3, t0);\
00211     IVI_SLANT_BFLY(t5, t8, t5, t8, t0); IVI_SLANT_BFLY(t6, t7, t6, t7, t0);\
00212     d1 = COMPENSATE(t1);\
00213     d2 = COMPENSATE(t2);\
00214     d3 = COMPENSATE(t3);\
00215     d4 = COMPENSATE(t4);\
00216     d5 = COMPENSATE(t5);\
00217     d6 = COMPENSATE(t6);\
00218     d7 = COMPENSATE(t7);\
00219     d8 = COMPENSATE(t8);}
00220 
00222 #define IVI_INV_SLANT4(s1, s4, s2, s3, d1, d2, d3, d4, t0, t1, t2, t3, t4) {\
00223     IVI_SLANT_BFLY(s1, s2, t1, t2, t0); IVI_IREFLECT  (s4, s3, t4, t3, t0);\
00224 \
00225     IVI_SLANT_BFLY(t1, t4, t1, t4, t0); IVI_SLANT_BFLY(t2, t3, t2, t3, t0);\
00226     d1 = COMPENSATE(t1);\
00227     d2 = COMPENSATE(t2);\
00228     d3 = COMPENSATE(t3);\
00229     d4 = COMPENSATE(t4);}
00230 
00231 void ff_ivi_inverse_slant_8x8(const int32_t *in, int16_t *out, uint32_t pitch, const uint8_t *flags)
00232 {
00233     int     i;
00234     const int32_t *src;
00235     int32_t *dst;
00236     int     tmp[64];
00237     int     t0, t1, t2, t3, t4, t5, t6, t7, t8;
00238 
00239 #define COMPENSATE(x) (x)
00240     src = in;
00241     dst = tmp;
00242     for (i = 0; i < 8; i++) {
00243         if (flags[i]) {
00244             IVI_INV_SLANT8(src[0], src[8], src[16], src[24], src[32], src[40], src[48], src[56],
00245                            dst[0], dst[8], dst[16], dst[24], dst[32], dst[40], dst[48], dst[56],
00246                            t0, t1, t2, t3, t4, t5, t6, t7, t8);
00247         } else
00248             dst[0] = dst[8] = dst[16] = dst[24] = dst[32] = dst[40] = dst[48] = dst[56] = 0;
00249 
00250             src++;
00251             dst++;
00252     }
00253 #undef COMPENSATE
00254 
00255 #define COMPENSATE(x) ((x + 1)>>1)
00256     src = tmp;
00257     for (i = 0; i < 8; i++) {
00258         if (!src[0] && !src[1] && !src[2] && !src[3] && !src[4] && !src[5] && !src[6] && !src[7]) {
00259             memset(out, 0, 8*sizeof(out[0]));
00260         } else {
00261             IVI_INV_SLANT8(src[0], src[1], src[2], src[3], src[4], src[5], src[6], src[7],
00262                            out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7],
00263                            t0, t1, t2, t3, t4, t5, t6, t7, t8);
00264         }
00265         src += 8;
00266         out += pitch;
00267     }
00268 #undef COMPENSATE
00269 }
00270 
00271 void ff_ivi_inverse_slant_4x4(const int32_t *in, int16_t *out, uint32_t pitch, const uint8_t *flags)
00272 {
00273     int     i;
00274     const int32_t *src;
00275     int32_t *dst;
00276     int     tmp[16];
00277     int     t0, t1, t2, t3, t4;
00278 
00279 #define COMPENSATE(x) (x)
00280     src = in;
00281     dst = tmp;
00282     for (i = 0; i < 4; i++) {
00283         if (flags[i]) {
00284             IVI_INV_SLANT4(src[0], src[4], src[8], src[12],
00285                            dst[0], dst[4], dst[8], dst[12],
00286                            t0, t1, t2, t3, t4);
00287         } else
00288             dst[0] = dst[4] = dst[8] = dst[12] = 0;
00289 
00290             src++;
00291             dst++;
00292     }
00293 #undef COMPENSATE
00294 
00295 #define COMPENSATE(x) ((x + 1)>>1)
00296     src = tmp;
00297     for (i = 0; i < 4; i++) {
00298         if (!src[0] && !src[1] && !src[2] && !src[3]) {
00299             out[0] = out[1] = out[2] = out[3] = 0;
00300         } else {
00301             IVI_INV_SLANT4(src[0], src[1], src[2], src[3],
00302                            out[0], out[1], out[2], out[3],
00303                            t0, t1, t2, t3, t4);
00304         }
00305         src += 4;
00306         out += pitch;
00307     }
00308 #undef COMPENSATE
00309 }
00310 
00311 void ff_ivi_dc_slant_2d(const int32_t *in, int16_t *out, uint32_t pitch, int blk_size)
00312 {
00313     int     x, y;
00314     int16_t dc_coeff;
00315 
00316     dc_coeff = (*in + 1) >> 1;
00317 
00318     for (y = 0; y < blk_size; out += pitch, y++) {
00319         for (x = 0; x < blk_size; x++)
00320             out[x] = dc_coeff;
00321     }
00322 }
00323 
00324 void ff_ivi_row_slant8(const int32_t *in, int16_t *out, uint32_t pitch, const uint8_t *flags)
00325 {
00326     int     i;
00327     int     t0, t1, t2, t3, t4, t5, t6, t7, t8;
00328 
00329 #define COMPENSATE(x) ((x + 1)>>1)
00330     for (i = 0; i < 8; i++) {
00331         if (!in[0] && !in[1] && !in[2] && !in[3] && !in[4] && !in[5] && !in[6] && !in[7]) {
00332             memset(out, 0, 8*sizeof(out[0]));
00333         } else {
00334             IVI_INV_SLANT8( in[0],  in[1],  in[2],  in[3],  in[4],  in[5],  in[6],  in[7],
00335                            out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7],
00336                            t0, t1, t2, t3, t4, t5, t6, t7, t8);
00337         }
00338         in += 8;
00339         out += pitch;
00340     }
00341 #undef COMPENSATE
00342 }
00343 
00344 void ff_ivi_dc_row_slant(const int32_t *in, int16_t *out, uint32_t pitch, int blk_size)
00345 {
00346     int     x, y;
00347     int16_t dc_coeff;
00348 
00349     dc_coeff = (*in + 1) >> 1;
00350 
00351     for (x = 0; x < blk_size; x++)
00352         out[x] = dc_coeff;
00353 
00354     out += pitch;
00355 
00356     for (y = 1; y < blk_size; out += pitch, y++) {
00357         for (x = 0; x < blk_size; x++)
00358             out[x] = 0;
00359     }
00360 }
00361 
00362 void ff_ivi_col_slant8(const int32_t *in, int16_t *out, uint32_t pitch, const uint8_t *flags)
00363 {
00364     int     i, row2, row4, row8;
00365     int     t0, t1, t2, t3, t4, t5, t6, t7, t8;
00366 
00367     row2 = pitch << 1;
00368     row4 = pitch << 2;
00369     row8 = pitch << 3;
00370 
00371 #define COMPENSATE(x) ((x + 1)>>1)
00372     for (i = 0; i < 8; i++) {
00373         if (flags[i]) {
00374             IVI_INV_SLANT8(in[0], in[8], in[16], in[24], in[32], in[40], in[48], in[56],
00375                            out[0], out[pitch], out[row2], out[row2 + pitch], out[row4],
00376                            out[row4 + pitch],  out[row4 + row2], out[row8 - pitch],
00377                            t0, t1, t2, t3, t4, t5, t6, t7, t8);
00378         } else {
00379             out[0] = out[pitch] = out[row2] = out[row2 + pitch] = out[row4] =
00380             out[row4 + pitch] =  out[row4 + row2] = out[row8 - pitch] = 0;
00381         }
00382 
00383         in++;
00384         out++;
00385     }
00386 #undef COMPENSATE
00387 }
00388 
00389 void ff_ivi_dc_col_slant(const int32_t *in, int16_t *out, uint32_t pitch, int blk_size)
00390 {
00391     int     x, y;
00392     int16_t dc_coeff;
00393 
00394     dc_coeff = (*in + 1) >> 1;
00395 
00396     for (y = 0; y < blk_size; out += pitch, y++) {
00397         out[0] = dc_coeff;
00398         for (x = 1; x < blk_size; x++)
00399             out[x] = 0;
00400     }
00401 }
00402 
00403 void ff_ivi_put_pixels_8x8(const int32_t *in, int16_t *out, uint32_t pitch,
00404                            const uint8_t *flags)
00405 {
00406     int     x, y;
00407 
00408     for (y = 0; y < 8; out += pitch, in += 8, y++)
00409         for (x = 0; x < 8; x++)
00410             out[x] = in[x];
00411 }
00412 
00413 void ff_ivi_put_dc_pixel_8x8(const int32_t *in, int16_t *out, uint32_t pitch,
00414                              int blk_size)
00415 {
00416     int     y;
00417 
00418     out[0] = in[0];
00419     memset(out + 1, 0, 7*sizeof(out[0]));
00420     out += pitch;
00421 
00422     for (y = 1; y < 8; out += pitch, y++)
00423         memset(out, 0, 8*sizeof(out[0]));
00424 }
00425 
00426 #define IVI_MC_TEMPLATE(size, suffix, OP) \
00427 void ff_ivi_mc_ ## size ##x## size ## suffix (int16_t *buf, const int16_t *ref_buf, \
00428                                               uint32_t pitch, int mc_type) \
00429 { \
00430     int     i, j; \
00431     const int16_t *wptr; \
00432 \
00433     switch (mc_type) { \
00434     case 0: /* fullpel (no interpolation) */ \
00435         for (i = 0; i < size; i++, buf += pitch, ref_buf += pitch) { \
00436             for (j = 0; j < size; j++) {\
00437                 OP(buf[j], ref_buf[j]); \
00438             } \
00439         } \
00440         break; \
00441     case 1: /* horizontal halfpel interpolation */ \
00442         for (i = 0; i < size; i++, buf += pitch, ref_buf += pitch) \
00443             for (j = 0; j < size; j++) \
00444                 OP(buf[j], (ref_buf[j] + ref_buf[j+1]) >> 1); \
00445         break; \
00446     case 2: /* vertical halfpel interpolation */ \
00447         wptr = ref_buf + pitch; \
00448         for (i = 0; i < size; i++, buf += pitch, wptr += pitch, ref_buf += pitch) \
00449             for (j = 0; j < size; j++) \
00450                 OP(buf[j], (ref_buf[j] + wptr[j]) >> 1); \
00451         break; \
00452     case 3: /* vertical and horizontal halfpel interpolation */ \
00453         wptr = ref_buf + pitch; \
00454         for (i = 0; i < size; i++, buf += pitch, wptr += pitch, ref_buf += pitch) \
00455             for (j = 0; j < size; j++) \
00456                 OP(buf[j], (ref_buf[j] + ref_buf[j+1] + wptr[j] + wptr[j+1]) >> 2); \
00457         break; \
00458     } \
00459 } \
00460 
00461 #define OP_PUT(a, b)  (a) = (b)
00462 #define OP_ADD(a, b)  (a) += (b)
00463 
00464 IVI_MC_TEMPLATE(8, _no_delta, OP_PUT)
00465 IVI_MC_TEMPLATE(8, _delta,    OP_ADD)
00466 IVI_MC_TEMPLATE(4, _no_delta, OP_PUT)
00467 IVI_MC_TEMPLATE(4, _delta,    OP_ADD)