FFmpeg: libavcodec/libspeexenc.c Source File

00001 /*
00002  * Copyright (C) 2009 Justin Ruggles
00003  * Copyright (c) 2009 Xuggle Incorporated
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00067 #include <speex/speex.h>
00068 #include <speex/speex_header.h>
00069 #include <speex/speex_stereo.h>
00070 #include "libavutil/mathematics.h"
00071 #include "libavutil/opt.h"
00072 #include "avcodec.h"
00073 #include "internal.h"
00074 
00075 typedef struct {
00076     AVClass *class;             
00077     SpeexBits bits;             
00078     SpeexHeader header;         
00079     void *enc_state;            
00080     int frames_per_packet;      
00081     float vbr_quality;          
00082     int cbr_quality;            
00083     int abr;                    
00084     int pkt_frame_count;        
00085     int lookahead;              
00086     int sample_count;           
00087 } LibSpeexEncContext;
00088 
00089 static av_cold void print_enc_params(AVCodecContext *avctx,
00090                                      LibSpeexEncContext *s)
00091 {
00092     const char *mode_str = "unknown";
00093 
00094     av_log(avctx, AV_LOG_DEBUG, "channels: %d\n", avctx->channels);
00095     switch (s->header.mode) {
00096     case SPEEX_MODEID_NB:  mode_str = "narrowband";     break;
00097     case SPEEX_MODEID_WB:  mode_str = "wideband";       break;
00098     case SPEEX_MODEID_UWB: mode_str = "ultra-wideband"; break;
00099     }
00100     av_log(avctx, AV_LOG_DEBUG, "mode: %s\n", mode_str);
00101     if (s->header.vbr) {
00102         av_log(avctx, AV_LOG_DEBUG, "rate control: VBR\n");
00103         av_log(avctx, AV_LOG_DEBUG, "  quality: %f\n", s->vbr_quality);
00104     } else if (s->abr) {
00105         av_log(avctx, AV_LOG_DEBUG, "rate control: ABR\n");
00106         av_log(avctx, AV_LOG_DEBUG, "  bitrate: %d bps\n", avctx->bit_rate);
00107     } else {
00108         av_log(avctx, AV_LOG_DEBUG, "rate control: CBR\n");
00109         av_log(avctx, AV_LOG_DEBUG, "  bitrate: %d bps\n", avctx->bit_rate);
00110     }
00111     av_log(avctx, AV_LOG_DEBUG, "complexity: %d\n",
00112            avctx->compression_level);
00113     av_log(avctx, AV_LOG_DEBUG, "frame size: %d samples\n",
00114            avctx->frame_size);
00115     av_log(avctx, AV_LOG_DEBUG, "frames per packet: %d\n",
00116            s->frames_per_packet);
00117     av_log(avctx, AV_LOG_DEBUG, "packet size: %d\n",
00118            avctx->frame_size * s->frames_per_packet);
00119 }
00120 
00121 static av_cold int encode_init(AVCodecContext *avctx)
00122 {
00123     LibSpeexEncContext *s = avctx->priv_data;
00124     const SpeexMode *mode;
00125     uint8_t *header_data;
00126     int header_size;
00127     int32_t complexity;
00128 
00129     /* channels */
00130     if (avctx->channels < 1 || avctx->channels > 2) {
00131         av_log(avctx, AV_LOG_ERROR, "Invalid channels (%d). Only stereo and "
00132                "mono are supported\n", avctx->channels);
00133         return AVERROR(EINVAL);
00134     }
00135 
00136     /* sample rate and encoding mode */
00137     switch (avctx->sample_rate) {
00138     case  8000: mode = &speex_nb_mode;  break;
00139     case 16000: mode = &speex_wb_mode;  break;
00140     case 32000: mode = &speex_uwb_mode; break;
00141     default:
00142         av_log(avctx, AV_LOG_ERROR, "Sample rate of %d Hz is not supported. "
00143                "Resample to 8, 16, or 32 kHz.\n", avctx->sample_rate);
00144         return AVERROR(EINVAL);
00145     }
00146 
00147     /* initialize libspeex */
00148     s->enc_state = speex_encoder_init(mode);
00149     if (!s->enc_state) {
00150         av_log(avctx, AV_LOG_ERROR, "Error initializing libspeex\n");
00151         return -1;
00152     }
00153     speex_init_header(&s->header, avctx->sample_rate, avctx->channels, mode);
00154 
00155     /* rate control method and parameters */
00156     if (avctx->flags & CODEC_FLAG_QSCALE) {
00157         /* VBR */
00158         s->header.vbr = 1;
00159         speex_encoder_ctl(s->enc_state, SPEEX_SET_VBR, &s->header.vbr);
00160         s->vbr_quality = av_clipf(avctx->global_quality / (float)FF_QP2LAMBDA,
00161                                   0.0f, 10.0f);
00162         speex_encoder_ctl(s->enc_state, SPEEX_SET_VBR_QUALITY, &s->vbr_quality);
00163     } else {
00164         s->header.bitrate = avctx->bit_rate;
00165         if (avctx->bit_rate > 0) {
00166             /* CBR or ABR by bitrate */
00167             if (s->abr) {
00168                 speex_encoder_ctl(s->enc_state, SPEEX_SET_ABR,
00169                                   &s->header.bitrate);
00170                 speex_encoder_ctl(s->enc_state, SPEEX_GET_ABR,
00171                                   &s->header.bitrate);
00172             } else {
00173                 speex_encoder_ctl(s->enc_state, SPEEX_SET_BITRATE,
00174                                   &s->header.bitrate);
00175                 speex_encoder_ctl(s->enc_state, SPEEX_GET_BITRATE,
00176                                   &s->header.bitrate);
00177             }
00178         } else {
00179             /* CBR by quality */
00180             speex_encoder_ctl(s->enc_state, SPEEX_SET_QUALITY,
00181                               &s->cbr_quality);
00182             speex_encoder_ctl(s->enc_state, SPEEX_GET_BITRATE,
00183                               &s->header.bitrate);
00184         }
00185         /* stereo side information adds about 800 bps to the base bitrate */
00186         /* TODO: this should be calculated exactly */
00187         avctx->bit_rate = s->header.bitrate + (avctx->channels == 2 ? 800 : 0);
00188     }
00189 
00190     /* set encoding complexity */
00191     if (avctx->compression_level > FF_COMPRESSION_DEFAULT) {
00192         complexity = av_clip(avctx->compression_level, 0, 10);
00193         speex_encoder_ctl(s->enc_state, SPEEX_SET_COMPLEXITY, &complexity);
00194     }
00195     speex_encoder_ctl(s->enc_state, SPEEX_GET_COMPLEXITY, &complexity);
00196     avctx->compression_level = complexity;
00197 
00198     /* set packet size */
00199     avctx->frame_size = s->header.frame_size;
00200     s->header.frames_per_packet = s->frames_per_packet;
00201 
00202     /* set encoding delay */
00203     speex_encoder_ctl(s->enc_state, SPEEX_GET_LOOKAHEAD, &s->lookahead);
00204     s->sample_count = -s->lookahead;
00205 
00206     /* create header packet bytes from header struct */
00207     /* note: libspeex allocates the memory for header_data, which is freed
00208              below with speex_header_free() */
00209     header_data = speex_header_to_packet(&s->header, &header_size);
00210 
00211     /* allocate extradata and coded_frame */
00212     avctx->extradata   = av_malloc(header_size + FF_INPUT_BUFFER_PADDING_SIZE);
00213     avctx->coded_frame = avcodec_alloc_frame();
00214     if (!avctx->extradata || !avctx->coded_frame) {
00215         speex_header_free(header_data);
00216         speex_encoder_destroy(s->enc_state);
00217         av_log(avctx, AV_LOG_ERROR, "memory allocation error\n");
00218         return AVERROR(ENOMEM);
00219     }
00220 
00221     /* copy header packet to extradata */
00222     memcpy(avctx->extradata, header_data, header_size);
00223     avctx->extradata_size = header_size;
00224     speex_header_free(header_data);
00225 
00226     /* init libspeex bitwriter */
00227     speex_bits_init(&s->bits);
00228 
00229     print_enc_params(avctx, s);
00230     return 0;
00231 }
00232 
00233 static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size,
00234                         void *data)
00235 {
00236     LibSpeexEncContext *s = avctx->priv_data;
00237     int16_t *samples      = data;
00238     int sample_count      = s->sample_count;
00239 
00240     if (data) {
00241         /* encode Speex frame */
00242         if (avctx->channels == 2)
00243             speex_encode_stereo_int(samples, s->header.frame_size, &s->bits);
00244         speex_encode_int(s->enc_state, samples, &s->bits);
00245         s->pkt_frame_count++;
00246         s->sample_count += avctx->frame_size;
00247     } else {
00248         /* handle end-of-stream */
00249         if (!s->pkt_frame_count)
00250             return 0;
00251         /* add extra terminator codes for unused frames in last packet */
00252         while (s->pkt_frame_count < s->frames_per_packet) {
00253             speex_bits_pack(&s->bits, 15, 5);
00254             s->pkt_frame_count++;
00255         }
00256     }
00257 
00258     /* write output if all frames for the packet have been encoded */
00259     if (s->pkt_frame_count == s->frames_per_packet) {
00260         s->pkt_frame_count = 0;
00261         avctx->coded_frame->pts =
00262             av_rescale_q(sample_count, (AVRational){ 1, avctx->sample_rate },
00263                          avctx->time_base);
00264         if (buf_size > speex_bits_nbytes(&s->bits)) {
00265             int ret = speex_bits_write(&s->bits, frame, buf_size);
00266             speex_bits_reset(&s->bits);
00267             return ret;
00268         } else {
00269             av_log(avctx, AV_LOG_ERROR, "output buffer too small");
00270             return AVERROR(EINVAL);
00271         }
00272     }
00273     return 0;
00274 }
00275 
00276 static av_cold int encode_close(AVCodecContext *avctx)
00277 {
00278     LibSpeexEncContext *s = avctx->priv_data;
00279 
00280     speex_bits_destroy(&s->bits);
00281     speex_encoder_destroy(s->enc_state);
00282 
00283     av_freep(&avctx->coded_frame);
00284     av_freep(&avctx->extradata);
00285 
00286     return 0;
00287 }
00288 
00289 #define OFFSET(x) offsetof(LibSpeexEncContext, x)
00290 #define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
00291 static const AVOption options[] = {
00292     { "abr",               "Use average bit rate",                      OFFSET(abr),               AV_OPT_TYPE_INT, { 0 }, 0,   1, AE },
00293     { "cbr_quality",       "Set quality value (0 to 10) for CBR",       OFFSET(cbr_quality),       AV_OPT_TYPE_INT, { 8 }, 0,  10, AE },
00294     { "frames_per_packet", "Number of frames to encode in each packet", OFFSET(frames_per_packet), AV_OPT_TYPE_INT, { 1 }, 1,   8, AE },
00295     { NULL },
00296 };
00297 
00298 static const AVClass class = {
00299     .class_name = "libspeex",
00300     .item_name  = av_default_item_name,
00301     .option     = options,
00302     .version    = LIBAVUTIL_VERSION_INT,
00303 };
00304 
00305 static const AVCodecDefault defaults[] = {
00306     { "b",                 "0" },
00307     { "compression_level", "3" },
00308     { NULL },
00309 };
00310 
00311 AVCodec ff_libspeex_encoder = {
00312     .name           = "libspeex",
00313     .type           = AVMEDIA_TYPE_AUDIO,
00314     .id             = CODEC_ID_SPEEX,
00315     .priv_data_size = sizeof(LibSpeexEncContext),
00316     .init           = encode_init,
00317     .encode         = encode_frame,
00318     .close          = encode_close,
00319     .capabilities   = CODEC_CAP_DELAY,
00320     .sample_fmts    = (const enum SampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
00321     .long_name      = NULL_IF_CONFIG_SMALL("libspeex Speex"),
00322     .priv_class     = &class,
00323     .defaults       = defaults,
00324 };