FFmpeg: libavcodec/nellymoserenc.c Source File

00001 /*
00002  * Nellymoser encoder
00003  * This code is developed as part of Google Summer of Code 2008 Program.
00004  *
00005  * Copyright (c) 2008 Bartlomiej Wolowiec
00006  *
00007  * This file is part of FFmpeg.
00008  *
00009  * FFmpeg is free software; you can redistribute it and/or
00010  * modify it under the terms of the GNU Lesser General Public
00011  * License as published by the Free Software Foundation; either
00012  * version 2.1 of the License, or (at your option) any later version.
00013  *
00014  * FFmpeg is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017  * Lesser General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU Lesser General Public
00020  * License along with FFmpeg; if not, write to the Free Software
00021  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00022  */
00023 
00038 #include "nellymoser.h"
00039 #include "avcodec.h"
00040 #include "dsputil.h"
00041 #include "fft.h"
00042 
00043 #define BITSTREAM_WRITER_LE
00044 #include "put_bits.h"
00045 
00046 #define POW_TABLE_SIZE (1<<11)
00047 #define POW_TABLE_OFFSET 3
00048 #define OPT_SIZE ((1<<15) + 3000)
00049 
00050 typedef struct NellyMoserEncodeContext {
00051     AVCodecContext  *avctx;
00052     int             last_frame;
00053     int             bufsel;
00054     int             have_saved;
00055     DSPContext      dsp;
00056     FFTContext      mdct_ctx;
00057     DECLARE_ALIGNED(16, float, mdct_out)[NELLY_SAMPLES];
00058     DECLARE_ALIGNED(16, float, in_buff)[NELLY_SAMPLES];
00059     DECLARE_ALIGNED(16, float, buf)[2][3 * NELLY_BUF_LEN];     
00060     float           (*opt )[NELLY_BANDS];
00061     uint8_t         (*path)[NELLY_BANDS];
00062 } NellyMoserEncodeContext;
00063 
00064 static float pow_table[POW_TABLE_SIZE];     
00065 
00066 static const uint8_t sf_lut[96] = {
00067      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
00068      5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
00069     15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
00070     27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
00071     41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
00072     54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
00073 };
00074 
00075 static const uint8_t sf_delta_lut[78] = {
00076      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
00077      4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
00078     13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
00079     23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
00080     28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
00081 };
00082 
00083 static const uint8_t quant_lut[230] = {
00084      0,
00085 
00086      0,  1,  2,
00087 
00088      0,  1,  2,  3,  4,  5,  6,
00089 
00090      0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
00091     12, 13, 13, 13, 14,
00092 
00093      0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
00094      8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
00095     22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
00096     30,
00097 
00098      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
00099      4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
00100     10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
00101     15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
00102     21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
00103     33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
00104     46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
00105     53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
00106     58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
00107     61, 61, 61, 61, 62,
00108 };
00109 
00110 static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
00111 static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
00112 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
00113 
00114 static void apply_mdct(NellyMoserEncodeContext *s)
00115 {
00116     memcpy(s->in_buff, s->buf[s->bufsel], NELLY_BUF_LEN * sizeof(float));
00117     s->dsp.vector_fmul(s->in_buff, ff_sine_128, NELLY_BUF_LEN);
00118     s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
00119                                NELLY_BUF_LEN);
00120     ff_mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
00121 
00122     s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128, NELLY_BUF_LEN);
00123     s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
00124                                NELLY_BUF_LEN);
00125     ff_mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
00126 }
00127 
00128 static av_cold int encode_init(AVCodecContext *avctx)
00129 {
00130     NellyMoserEncodeContext *s = avctx->priv_data;
00131     int i;
00132 
00133     if (avctx->channels != 1) {
00134         av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
00135         return -1;
00136     }
00137 
00138     if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
00139         avctx->sample_rate != 11025 &&
00140         avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
00141         avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
00142         av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
00143         return -1;
00144     }
00145 
00146     avctx->frame_size = NELLY_SAMPLES;
00147     s->avctx = avctx;
00148     ff_mdct_init(&s->mdct_ctx, 8, 0, 1.0);
00149     dsputil_init(&s->dsp, avctx);
00150 
00151     /* Generate overlap window */
00152     ff_sine_window_init(ff_sine_128, 128);
00153     for (i = 0; i < POW_TABLE_SIZE; i++)
00154         pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
00155 
00156     if (s->avctx->trellis) {
00157         s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
00158         s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
00159     }
00160 
00161     return 0;
00162 }
00163 
00164 static av_cold int encode_end(AVCodecContext *avctx)
00165 {
00166     NellyMoserEncodeContext *s = avctx->priv_data;
00167 
00168     ff_mdct_end(&s->mdct_ctx);
00169 
00170     if (s->avctx->trellis) {
00171         av_free(s->opt);
00172         av_free(s->path);
00173     }
00174 
00175     return 0;
00176 }
00177 
00178 #define find_best(val, table, LUT, LUT_add, LUT_size) \
00179     best_idx = \
00180         LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
00181     if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
00182         best_idx++;
00183 
00184 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
00185 {
00186     int band, best_idx, power_idx = 0;
00187     float power_candidate;
00188 
00189     //base exponent
00190     find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
00191     idx_table[0] = best_idx;
00192     power_idx = ff_nelly_init_table[best_idx];
00193 
00194     for (band = 1; band < NELLY_BANDS; band++) {
00195         power_candidate = cand[band] - power_idx;
00196         find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
00197         idx_table[band] = best_idx;
00198         power_idx += ff_nelly_delta_table[best_idx];
00199     }
00200 }
00201 
00202 static inline float distance(float x, float y, int band)
00203 {
00204     //return pow(fabs(x-y), 2.0);
00205     float tmp = x - y;
00206     return tmp * tmp;
00207 }
00208 
00209 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
00210 {
00211     int i, j, band, best_idx;
00212     float power_candidate, best_val;
00213 
00214     float  (*opt )[NELLY_BANDS] = s->opt ;
00215     uint8_t(*path)[NELLY_BANDS] = s->path;
00216 
00217     for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
00218         opt[0][i] = INFINITY;
00219     }
00220 
00221     for (i = 0; i < 64; i++) {
00222         opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
00223         path[0][ff_nelly_init_table[i]] = i;
00224     }
00225 
00226     for (band = 1; band < NELLY_BANDS; band++) {
00227         int q, c = 0;
00228         float tmp;
00229         int idx_min, idx_max, idx;
00230         power_candidate = cand[band];
00231         for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
00232             idx_min = FFMAX(0, cand[band] - q);
00233             idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
00234             for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
00235                 if ( isinf(opt[band - 1][i]) )
00236                     continue;
00237                 for (j = 0; j < 32; j++) {
00238                     idx = i + ff_nelly_delta_table[j];
00239                     if (idx > idx_max)
00240                         break;
00241                     if (idx >= idx_min) {
00242                         tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
00243                         if (opt[band][idx] > tmp) {
00244                             opt[band][idx] = tmp;
00245                             path[band][idx] = j;
00246                             c = 1;
00247                         }
00248                     }
00249                 }
00250             }
00251         }
00252         assert(c); //FIXME
00253     }
00254 
00255     best_val = INFINITY;
00256     best_idx = -1;
00257     band = NELLY_BANDS - 1;
00258     for (i = 0; i < OPT_SIZE; i++) {
00259         if (best_val > opt[band][i]) {
00260             best_val = opt[band][i];
00261             best_idx = i;
00262         }
00263     }
00264     for (band = NELLY_BANDS - 1; band >= 0; band--) {
00265         idx_table[band] = path[band][best_idx];
00266         if (band) {
00267             best_idx -= ff_nelly_delta_table[path[band][best_idx]];
00268         }
00269     }
00270 }
00271 
00278 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
00279 {
00280     PutBitContext pb;
00281     int i, j, band, block, best_idx, power_idx = 0;
00282     float power_val, coeff, coeff_sum;
00283     float pows[NELLY_FILL_LEN];
00284     int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
00285     float cand[NELLY_BANDS];
00286 
00287     apply_mdct(s);
00288 
00289     init_put_bits(&pb, output, output_size * 8);
00290 
00291     i = 0;
00292     for (band = 0; band < NELLY_BANDS; band++) {
00293         coeff_sum = 0;
00294         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
00295             coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
00296                        + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
00297         }
00298         cand[band] =
00299             log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
00300     }
00301 
00302     if (s->avctx->trellis) {
00303         get_exponent_dynamic(s, cand, idx_table);
00304     } else {
00305         get_exponent_greedy(s, cand, idx_table);
00306     }
00307 
00308     i = 0;
00309     for (band = 0; band < NELLY_BANDS; band++) {
00310         if (band) {
00311             power_idx += ff_nelly_delta_table[idx_table[band]];
00312             put_bits(&pb, 5, idx_table[band]);
00313         } else {
00314             power_idx = ff_nelly_init_table[idx_table[0]];
00315             put_bits(&pb, 6, idx_table[0]);
00316         }
00317         power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
00318         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
00319             s->mdct_out[i] *= power_val;
00320             s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
00321             pows[i] = power_idx;
00322         }
00323     }
00324 
00325     ff_nelly_get_sample_bits(pows, bits);
00326 
00327     for (block = 0; block < 2; block++) {
00328         for (i = 0; i < NELLY_FILL_LEN; i++) {
00329             if (bits[i] > 0) {
00330                 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
00331                 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
00332                 best_idx =
00333                     quant_lut[av_clip (
00334                             coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
00335                             quant_lut_offset[bits[i]],
00336                             quant_lut_offset[bits[i]+1] - 1
00337                             )];
00338                 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
00339                     best_idx++;
00340 
00341                 put_bits(&pb, bits[i], best_idx);
00342             }
00343         }
00344         if (!block)
00345             put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
00346     }
00347 
00348     flush_put_bits(&pb);
00349 }
00350 
00351 static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
00352 {
00353     NellyMoserEncodeContext *s = avctx->priv_data;
00354     int16_t *samples = data;
00355     int i;
00356 
00357     if (s->last_frame)
00358         return 0;
00359 
00360     if (data) {
00361         for (i = 0; i < avctx->frame_size; i++) {
00362             s->buf[s->bufsel][i] = samples[i];
00363         }
00364         for (; i < NELLY_SAMPLES; i++) {
00365             s->buf[s->bufsel][i] = 0;
00366         }
00367         s->bufsel = 1 - s->bufsel;
00368         if (!s->have_saved) {
00369             s->have_saved = 1;
00370             return 0;
00371         }
00372     } else {
00373         memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
00374         s->bufsel = 1 - s->bufsel;
00375         s->last_frame = 1;
00376     }
00377 
00378     if (s->have_saved) {
00379         encode_block(s, frame, buf_size);
00380         return NELLY_BLOCK_LEN;
00381     }
00382     return 0;
00383 }
00384 
00385 AVCodec nellymoser_encoder = {
00386     .name = "nellymoser",
00387     .type = AVMEDIA_TYPE_AUDIO,
00388     .id = CODEC_ID_NELLYMOSER,
00389     .priv_data_size = sizeof(NellyMoserEncodeContext),
00390     .init = encode_init,
00391     .encode = encode_frame,
00392     .close = encode_end,
00393     .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
00394     .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
00395 };