FFmpeg: libavcodec/g722enc.c Source File

00001 /*
00002  * Copyright (c) CMU 1993 Computer Science, Speech Group
00003  *                        Chengxiang Lu and Alex Hauptmann
00004  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
00005  * Copyright (c) 2009 Kenan Gillet
00006  * Copyright (c) 2010 Martin Storsjo
00007  *
00008  * This file is part of Libav.
00009  *
00010  * Libav is free software; you can redistribute it and/or
00011  * modify it under the terms of the GNU Lesser General Public
00012  * License as published by the Free Software Foundation; either
00013  * version 2.1 of the License, or (at your option) any later version.
00014  *
00015  * Libav is distributed in the hope that it will be useful,
00016  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018  * Lesser General Public License for more details.
00019  *
00020  * You should have received a copy of the GNU Lesser General Public
00021  * License along with Libav; if not, write to the Free Software
00022  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00023  */
00024 
00030 #include "avcodec.h"
00031 #include "internal.h"
00032 #include "g722.h"
00033 
00034 #define FREEZE_INTERVAL 128
00035 
00036 /* This is an arbitrary value. Allowing insanely large values leads to strange
00037    problems, so we limit it to a reasonable value */
00038 #define MAX_FRAME_SIZE 32768
00039 
00040 /* We clip the value of avctx->trellis to prevent data type overflows and
00041    undefined behavior. Using larger values is insanely slow anyway. */
00042 #define MIN_TRELLIS 0
00043 #define MAX_TRELLIS 16
00044 
00045 static av_cold int g722_encode_close(AVCodecContext *avctx)
00046 {
00047     G722Context *c = avctx->priv_data;
00048     int i;
00049     for (i = 0; i < 2; i++) {
00050         av_freep(&c->paths[i]);
00051         av_freep(&c->node_buf[i]);
00052         av_freep(&c->nodep_buf[i]);
00053     }
00054 #if FF_API_OLD_ENCODE_AUDIO
00055     av_freep(&avctx->coded_frame);
00056 #endif
00057     return 0;
00058 }
00059 
00060 static av_cold int g722_encode_init(AVCodecContext * avctx)
00061 {
00062     G722Context *c = avctx->priv_data;
00063     int ret;
00064 
00065     if (avctx->channels != 1) {
00066         av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
00067         return AVERROR_INVALIDDATA;
00068     }
00069 
00070     c->band[0].scale_factor = 8;
00071     c->band[1].scale_factor = 2;
00072     c->prev_samples_pos = 22;
00073 
00074     if (avctx->trellis) {
00075         int frontier = 1 << avctx->trellis;
00076         int max_paths = frontier * FREEZE_INTERVAL;
00077         int i;
00078         for (i = 0; i < 2; i++) {
00079             c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
00080             c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
00081             c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
00082             if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
00083                 ret = AVERROR(ENOMEM);
00084                 goto error;
00085             }
00086         }
00087     }
00088 
00089     if (avctx->frame_size) {
00090         /* validate frame size */
00091         if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
00092             int new_frame_size;
00093 
00094             if (avctx->frame_size == 1)
00095                 new_frame_size = 2;
00096             else if (avctx->frame_size > MAX_FRAME_SIZE)
00097                 new_frame_size = MAX_FRAME_SIZE;
00098             else
00099                 new_frame_size = avctx->frame_size - 1;
00100 
00101             av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
00102                    "allowed. Using %d instead of %d\n", new_frame_size,
00103                    avctx->frame_size);
00104             avctx->frame_size = new_frame_size;
00105         }
00106     } else {
00107         /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
00108            a common packet size for VoIP applications */
00109         avctx->frame_size = 320;
00110     }
00111     avctx->delay = 22;
00112 
00113     if (avctx->trellis) {
00114         /* validate trellis */
00115         if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
00116             int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
00117             av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
00118                    "allowed. Using %d instead of %d\n", new_trellis,
00119                    avctx->trellis);
00120             avctx->trellis = new_trellis;
00121         }
00122     }
00123 
00124 #if FF_API_OLD_ENCODE_AUDIO
00125     avctx->coded_frame = avcodec_alloc_frame();
00126     if (!avctx->coded_frame) {
00127         ret = AVERROR(ENOMEM);
00128         goto error;
00129     }
00130 #endif
00131 
00132     return 0;
00133 error:
00134     g722_encode_close(avctx);
00135     return ret;
00136 }
00137 
00138 static const int16_t low_quant[33] = {
00139       35,   72,  110,  150,  190,  233,  276,  323,
00140      370,  422,  473,  530,  587,  650,  714,  786,
00141      858,  940, 1023, 1121, 1219, 1339, 1458, 1612,
00142     1765, 1980, 2195, 2557, 2919
00143 };
00144 
00145 static inline void filter_samples(G722Context *c, const int16_t *samples,
00146                                   int *xlow, int *xhigh)
00147 {
00148     int xout1, xout2;
00149     c->prev_samples[c->prev_samples_pos++] = samples[0];
00150     c->prev_samples[c->prev_samples_pos++] = samples[1];
00151     ff_g722_apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
00152     *xlow  = xout1 + xout2 >> 14;
00153     *xhigh = xout1 - xout2 >> 14;
00154     if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
00155         memmove(c->prev_samples,
00156                 c->prev_samples + c->prev_samples_pos - 22,
00157                 22 * sizeof(c->prev_samples[0]));
00158         c->prev_samples_pos = 22;
00159     }
00160 }
00161 
00162 static inline int encode_high(const struct G722Band *state, int xhigh)
00163 {
00164     int diff = av_clip_int16(xhigh - state->s_predictor);
00165     int pred = 141 * state->scale_factor >> 8;
00166            /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
00167     return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
00168 }
00169 
00170 static inline int encode_low(const struct G722Band* state, int xlow)
00171 {
00172     int diff  = av_clip_int16(xlow - state->s_predictor);
00173            /* = diff >= 0 ? diff : -(diff + 1) */
00174     int limit = diff ^ (diff >> (sizeof(diff)*8-1));
00175     int i = 0;
00176     limit = limit + 1 << 10;
00177     if (limit > low_quant[8] * state->scale_factor)
00178         i = 9;
00179     while (i < 29 && limit > low_quant[i] * state->scale_factor)
00180         i++;
00181     return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
00182 }
00183 
00184 static void g722_encode_trellis(G722Context *c, int trellis,
00185                                 uint8_t *dst, int nb_samples,
00186                                 const int16_t *samples)
00187 {
00188     int i, j, k;
00189     int frontier = 1 << trellis;
00190     struct TrellisNode **nodes[2];
00191     struct TrellisNode **nodes_next[2];
00192     int pathn[2] = {0, 0}, froze = -1;
00193     struct TrellisPath *p[2];
00194 
00195     for (i = 0; i < 2; i++) {
00196         nodes[i] = c->nodep_buf[i];
00197         nodes_next[i] = c->nodep_buf[i] + frontier;
00198         memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf));
00199         nodes[i][0] = c->node_buf[i] + frontier;
00200         nodes[i][0]->ssd = 0;
00201         nodes[i][0]->path = 0;
00202         nodes[i][0]->state = c->band[i];
00203     }
00204 
00205     for (i = 0; i < nb_samples >> 1; i++) {
00206         int xlow, xhigh;
00207         struct TrellisNode *next[2];
00208         int heap_pos[2] = {0, 0};
00209 
00210         for (j = 0; j < 2; j++) {
00211             next[j] = c->node_buf[j] + frontier*(i & 1);
00212             memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
00213         }
00214 
00215         filter_samples(c, &samples[2*i], &xlow, &xhigh);
00216 
00217         for (j = 0; j < frontier && nodes[0][j]; j++) {
00218             /* Only k >> 2 affects the future adaptive state, therefore testing
00219              * small steps that don't change k >> 2 is useless, the original
00220              * value from encode_low is better than them. Since we step k
00221              * in steps of 4, make sure range is a multiple of 4, so that
00222              * we don't miss the original value from encode_low. */
00223             int range = j < frontier/2 ? 4 : 0;
00224             struct TrellisNode *cur_node = nodes[0][j];
00225 
00226             int ilow = encode_low(&cur_node->state, xlow);
00227 
00228             for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
00229                 int decoded, dec_diff, pos;
00230                 uint32_t ssd;
00231                 struct TrellisNode* node;
00232 
00233                 if (k < 0)
00234                     continue;
00235 
00236                 decoded = av_clip((cur_node->state.scale_factor *
00237                                   ff_g722_low_inv_quant6[k] >> 10)
00238                                 + cur_node->state.s_predictor, -16384, 16383);
00239                 dec_diff = xlow - decoded;
00240 
00241 #define STORE_NODE(index, UPDATE, VALUE)\
00242                 ssd = cur_node->ssd + dec_diff*dec_diff;\
00243                 /* Check for wraparound. Using 64 bit ssd counters would \
00244                  * be simpler, but is slower on x86 32 bit. */\
00245                 if (ssd < cur_node->ssd)\
00246                     continue;\
00247                 if (heap_pos[index] < frontier) {\
00248                     pos = heap_pos[index]++;\
00249                     assert(pathn[index] < FREEZE_INTERVAL * frontier);\
00250                     node = nodes_next[index][pos] = next[index]++;\
00251                     node->path = pathn[index]++;\
00252                 } else {\
00253                     /* Try to replace one of the leaf nodes with the new \
00254                      * one, but not always testing the same leaf position */\
00255                     pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
00256                     if (ssd >= nodes_next[index][pos]->ssd)\
00257                         continue;\
00258                     heap_pos[index]++;\
00259                     node = nodes_next[index][pos];\
00260                 }\
00261                 node->ssd = ssd;\
00262                 node->state = cur_node->state;\
00263                 UPDATE;\
00264                 c->paths[index][node->path].value = VALUE;\
00265                 c->paths[index][node->path].prev = cur_node->path;\
00266                 /* Sift the newly inserted node up in the heap to restore \
00267                  * the heap property */\
00268                 while (pos > 0) {\
00269                     int parent = (pos - 1) >> 1;\
00270                     if (nodes_next[index][parent]->ssd <= ssd)\
00271                         break;\
00272                     FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
00273                                                 nodes_next[index][pos]);\
00274                     pos = parent;\
00275                 }
00276                 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
00277             }
00278         }
00279 
00280         for (j = 0; j < frontier && nodes[1][j]; j++) {
00281             int ihigh;
00282             struct TrellisNode *cur_node = nodes[1][j];
00283 
00284             /* We don't try to get any initial guess for ihigh via
00285              * encode_high - since there's only 4 possible values, test
00286              * them all. Testing all of these gives a much, much larger
00287              * gain than testing a larger range around ilow. */
00288             for (ihigh = 0; ihigh < 4; ihigh++) {
00289                 int dhigh, decoded, dec_diff, pos;
00290                 uint32_t ssd;
00291                 struct TrellisNode* node;
00292 
00293                 dhigh = cur_node->state.scale_factor *
00294                         ff_g722_high_inv_quant[ihigh] >> 10;
00295                 decoded = av_clip(dhigh + cur_node->state.s_predictor,
00296                                   -16384, 16383);
00297                 dec_diff = xhigh - decoded;
00298 
00299                 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
00300             }
00301         }
00302 
00303         for (j = 0; j < 2; j++) {
00304             FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
00305 
00306             if (nodes[j][0]->ssd > (1 << 16)) {
00307                 for (k = 1; k < frontier && nodes[j][k]; k++)
00308                     nodes[j][k]->ssd -= nodes[j][0]->ssd;
00309                 nodes[j][0]->ssd = 0;
00310             }
00311         }
00312 
00313         if (i == froze + FREEZE_INTERVAL) {
00314             p[0] = &c->paths[0][nodes[0][0]->path];
00315             p[1] = &c->paths[1][nodes[1][0]->path];
00316             for (j = i; j > froze; j--) {
00317                 dst[j] = p[1]->value << 6 | p[0]->value;
00318                 p[0] = &c->paths[0][p[0]->prev];
00319                 p[1] = &c->paths[1][p[1]->prev];
00320             }
00321             froze = i;
00322             pathn[0] = pathn[1] = 0;
00323             memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
00324             memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
00325         }
00326     }
00327 
00328     p[0] = &c->paths[0][nodes[0][0]->path];
00329     p[1] = &c->paths[1][nodes[1][0]->path];
00330     for (j = i; j > froze; j--) {
00331         dst[j] = p[1]->value << 6 | p[0]->value;
00332         p[0] = &c->paths[0][p[0]->prev];
00333         p[1] = &c->paths[1][p[1]->prev];
00334     }
00335     c->band[0] = nodes[0][0]->state;
00336     c->band[1] = nodes[1][0]->state;
00337 }
00338 
00339 static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
00340                                          const int16_t *samples)
00341 {
00342     int xlow, xhigh, ilow, ihigh;
00343     filter_samples(c, samples, &xlow, &xhigh);
00344     ihigh = encode_high(&c->band[1], xhigh);
00345     ilow  = encode_low (&c->band[0], xlow);
00346     ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
00347                                 ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
00348     ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
00349     *dst = ihigh << 6 | ilow;
00350 }
00351 
00352 static void g722_encode_no_trellis(G722Context *c,
00353                                    uint8_t *dst, int nb_samples,
00354                                    const int16_t *samples)
00355 {
00356     int i;
00357     for (i = 0; i < nb_samples; i += 2)
00358         encode_byte(c, dst++, &samples[i]);
00359 }
00360 
00361 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
00362                              const AVFrame *frame, int *got_packet_ptr)
00363 {
00364     G722Context *c = avctx->priv_data;
00365     const int16_t *samples = (const int16_t *)frame->data[0];
00366     int nb_samples, out_size, ret;
00367 
00368     out_size = (frame->nb_samples + 1) / 2;
00369     if ((ret = ff_alloc_packet2(avctx, avpkt, out_size)))
00370         return ret;
00371 
00372     nb_samples = frame->nb_samples - (frame->nb_samples & 1);
00373 
00374     if (avctx->trellis)
00375         g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
00376     else
00377         g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
00378 
00379     /* handle last frame with odd frame_size */
00380     if (nb_samples < frame->nb_samples) {
00381         int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
00382         encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
00383     }
00384 
00385     if (frame->pts != AV_NOPTS_VALUE)
00386         avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
00387     *got_packet_ptr = 1;
00388     return 0;
00389 }
00390 
00391 AVCodec ff_adpcm_g722_encoder = {
00392     .name           = "g722",
00393     .type           = AVMEDIA_TYPE_AUDIO,
00394     .id             = CODEC_ID_ADPCM_G722,
00395     .priv_data_size = sizeof(G722Context),
00396     .init           = g722_encode_init,
00397     .close          = g722_encode_close,
00398     .encode2        = g722_encode_frame,
00399     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME,
00400     .long_name      = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
00401     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
00402                                                      AV_SAMPLE_FMT_NONE },
00403 };