FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
nellymoserenc.c
Go to the documentation of this file.
1 /*
2  * Nellymoser encoder
3  * This code is developed as part of Google Summer of Code 2008 Program.
4  *
5  * Copyright (c) 2008 Bartlomiej Wolowiec
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 /**
25  * @file
26  * Nellymoser encoder
27  * by Bartlomiej Wolowiec
28  *
29  * Generic codec information: libavcodec/nellymoserdec.c
30  *
31  * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32  * (Copyright Joseph Artsimovich and UAB "DKD")
33  *
34  * for more information about nellymoser format, visit:
35  * http://wiki.multimedia.cx/index.php?title=Nellymoser
36  */
37 
38 #include "libavutil/float_dsp.h"
39 #include "libavutil/mathematics.h"
40 #include "nellymoser.h"
41 #include "avcodec.h"
42 #include "audio_frame_queue.h"
43 #include "fft.h"
44 #include "internal.h"
45 #include "sinewin.h"
46 
47 #define BITSTREAM_WRITER_LE
48 #include "put_bits.h"
49 
50 #define POW_TABLE_SIZE (1<<11)
51 #define POW_TABLE_OFFSET 3
52 #define OPT_SIZE ((1<<15) + 3000)
53 
54 typedef struct NellyMoserEncodeContext {
62  DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN]; ///< sample buffer
63  float (*opt )[OPT_SIZE];
66 
67 static float pow_table[POW_TABLE_SIZE]; ///< -pow(2, -i / 2048.0 - 3.0);
68 
69 static const uint8_t sf_lut[96] = {
70  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
71  5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
72  15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
73  27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
74  41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
75  54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
76 };
77 
78 static const uint8_t sf_delta_lut[78] = {
79  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
80  4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
81  13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
82  23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
83  28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
84 };
85 
86 static const uint8_t quant_lut[230] = {
87  0,
88 
89  0, 1, 2,
90 
91  0, 1, 2, 3, 4, 5, 6,
92 
93  0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
94  12, 13, 13, 13, 14,
95 
96  0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
97  8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
98  22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
99  30,
100 
101  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
102  4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
103  10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
104  15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
105  21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
106  33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
107  46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
108  53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
109  58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
110  61, 61, 61, 61, 62,
111 };
112 
113 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
114 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
115 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
116 
118 {
119  float *in0 = s->buf;
120  float *in1 = s->buf + NELLY_BUF_LEN;
121  float *in2 = s->buf + 2 * NELLY_BUF_LEN;
122 
123  s->fdsp.vector_fmul (s->in_buff, in0, ff_sine_128, NELLY_BUF_LEN);
124  s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
125  s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
126 
127  s->fdsp.vector_fmul (s->in_buff, in1, ff_sine_128, NELLY_BUF_LEN);
128  s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
130 }
131 
133 {
135 
136  ff_mdct_end(&s->mdct_ctx);
137 
138  if (s->avctx->trellis) {
139  av_free(s->opt);
140  av_free(s->path);
141  }
142  ff_af_queue_close(&s->afq);
143 
144  return 0;
145 }
146 
148 {
150  int i, ret;
151 
152  if (avctx->channels != 1) {
153  av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
154  return AVERROR(EINVAL);
155  }
156 
157  if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
158  avctx->sample_rate != 11025 &&
159  avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
161  av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
162  return AVERROR(EINVAL);
163  }
164 
165  avctx->frame_size = NELLY_SAMPLES;
166  avctx->delay = NELLY_BUF_LEN;
167  ff_af_queue_init(avctx, &s->afq);
168  s->avctx = avctx;
169  if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
170  goto error;
172 
173  /* Generate overlap window */
175  for (i = 0; i < POW_TABLE_SIZE; i++)
176  pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
177 
178  if (s->avctx->trellis) {
179  s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
180  s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
181  if (!s->opt || !s->path) {
182  ret = AVERROR(ENOMEM);
183  goto error;
184  }
185  }
186 
187  return 0;
188 error:
189  encode_end(avctx);
190  return ret;
191 }
192 
193 #define find_best(val, table, LUT, LUT_add, LUT_size) \
194  best_idx = \
195  LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
196  if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
197  best_idx++;
198 
199 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
200 {
201  int band, best_idx, power_idx = 0;
202  float power_candidate;
203 
204  //base exponent
205  find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
206  idx_table[0] = best_idx;
207  power_idx = ff_nelly_init_table[best_idx];
208 
209  for (band = 1; band < NELLY_BANDS; band++) {
210  power_candidate = cand[band] - power_idx;
211  find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
212  idx_table[band] = best_idx;
213  power_idx += ff_nelly_delta_table[best_idx];
214  }
215 }
216 
217 static inline float distance(float x, float y, int band)
218 {
219  //return pow(fabs(x-y), 2.0);
220  float tmp = x - y;
221  return tmp * tmp;
222 }
223 
224 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
225 {
226  int i, j, band, best_idx;
227  float power_candidate, best_val;
228 
229  float (*opt )[OPT_SIZE] = s->opt ;
230  uint8_t(*path)[OPT_SIZE] = s->path;
231 
232  for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
233  opt[0][i] = INFINITY;
234  }
235 
236  for (i = 0; i < 64; i++) {
237  opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
238  path[0][ff_nelly_init_table[i]] = i;
239  }
240 
241  for (band = 1; band < NELLY_BANDS; band++) {
242  int q, c = 0;
243  float tmp;
244  int idx_min, idx_max, idx;
245  power_candidate = cand[band];
246  for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
247  idx_min = FFMAX(0, cand[band] - q);
248  idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
249  for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
250  if ( isinf(opt[band - 1][i]) )
251  continue;
252  for (j = 0; j < 32; j++) {
253  idx = i + ff_nelly_delta_table[j];
254  if (idx > idx_max)
255  break;
256  if (idx >= idx_min) {
257  tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
258  if (opt[band][idx] > tmp) {
259  opt[band][idx] = tmp;
260  path[band][idx] = j;
261  c = 1;
262  }
263  }
264  }
265  }
266  }
267  assert(c); //FIXME
268  }
269 
270  best_val = INFINITY;
271  best_idx = -1;
272  band = NELLY_BANDS - 1;
273  for (i = 0; i < OPT_SIZE; i++) {
274  if (best_val > opt[band][i]) {
275  best_val = opt[band][i];
276  best_idx = i;
277  }
278  }
279  for (band = NELLY_BANDS - 1; band >= 0; band--) {
280  idx_table[band] = path[band][best_idx];
281  if (band) {
282  best_idx -= ff_nelly_delta_table[path[band][best_idx]];
283  }
284  }
285 }
286 
287 /**
288  * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
289  * @param s encoder context
290  * @param output output buffer
291  * @param output_size size of output buffer
292  */
293 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
294 {
295  PutBitContext pb;
296  int i, j, band, block, best_idx, power_idx = 0;
297  float power_val, coeff, coeff_sum;
298  float pows[NELLY_FILL_LEN];
299  int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
300  float cand[NELLY_BANDS];
301 
302  apply_mdct(s);
303 
304  init_put_bits(&pb, output, output_size * 8);
305 
306  i = 0;
307  for (band = 0; band < NELLY_BANDS; band++) {
308  coeff_sum = 0;
309  for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
310  coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
311  + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
312  }
313  cand[band] =
314  log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
315  }
316 
317  if (s->avctx->trellis) {
318  get_exponent_dynamic(s, cand, idx_table);
319  } else {
320  get_exponent_greedy(s, cand, idx_table);
321  }
322 
323  i = 0;
324  for (band = 0; band < NELLY_BANDS; band++) {
325  if (band) {
326  power_idx += ff_nelly_delta_table[idx_table[band]];
327  put_bits(&pb, 5, idx_table[band]);
328  } else {
329  power_idx = ff_nelly_init_table[idx_table[0]];
330  put_bits(&pb, 6, idx_table[0]);
331  }
332  power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
333  for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
334  s->mdct_out[i] *= power_val;
335  s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
336  pows[i] = power_idx;
337  }
338  }
339 
340  ff_nelly_get_sample_bits(pows, bits);
341 
342  for (block = 0; block < 2; block++) {
343  for (i = 0; i < NELLY_FILL_LEN; i++) {
344  if (bits[i] > 0) {
345  const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
346  coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
347  best_idx =
348  quant_lut[av_clip (
349  coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
350  quant_lut_offset[bits[i]],
351  quant_lut_offset[bits[i]+1] - 1
352  )];
353  if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
354  best_idx++;
355 
356  put_bits(&pb, bits[i], best_idx);
357  }
358  }
359  if (!block)
361  }
362 
363  flush_put_bits(&pb);
364  memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
365 }
366 
367 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
368  const AVFrame *frame, int *got_packet_ptr)
369 {
371  int ret;
372 
373  if (s->last_frame)
374  return 0;
375 
376  memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
377  if (frame) {
378  memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
379  frame->nb_samples * sizeof(*s->buf));
380  if (frame->nb_samples < NELLY_SAMPLES) {
381  memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
382  (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
383  if (frame->nb_samples >= NELLY_BUF_LEN)
384  s->last_frame = 1;
385  }
386  if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
387  return ret;
388  } else {
389  memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
390  s->last_frame = 1;
391  }
392 
393  if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN)) < 0)
394  return ret;
395  encode_block(s, avpkt->data, avpkt->size);
396 
397  /* Get the next frame pts/duration */
398  ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
399  &avpkt->duration);
400 
401  *got_packet_ptr = 1;
402  return 0;
403 }
404 
406  .name = "nellymoser",
407  .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
408  .type = AVMEDIA_TYPE_AUDIO,
410  .priv_data_size = sizeof(NellyMoserEncodeContext),
411  .init = encode_init,
412  .encode2 = encode_frame,
413  .close = encode_end,
415  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
417 };