[FFmpeg-soc] [soc]: r2887 - in aacenc: aacenc.c aacpsy.c aacpsy.h

kostya subversion at mplayerhq.hu
Mon Jul 28 15:22:53 CEST 2008


Author: kostya
Date: Mon Jul 28 15:22:53 2008
New Revision: 2887

Log:
Rudimentary multichannel support

Modified:
   aacenc/aacenc.c
   aacenc/aacpsy.c
   aacenc/aacpsy.h

Modified: aacenc/aacenc.c
==============================================================================
--- aacenc/aacenc.c	(original)
+++ aacenc/aacenc.c	Mon Jul 28 15:22:53 2008
@@ -181,7 +181,9 @@ typedef struct {
     int swb_num1024;
     const uint8_t *swb_sizes128;
     int swb_num128;
-    ChannelElement cpe;
+
+    ProgramConfig pc;
+    ChannelElement *cpe;
     AACPsyContext psy;
 } AACEncContext;
 
@@ -239,6 +241,7 @@ static av_cold int aac_encode_init(AVCod
     ff_sine_window_init(sine_long_1024, 1024);
     ff_sine_window_init(sine_short_128, 128);
 
+    s->cpe = av_mallocz(sizeof(ChannelElement) * ((avctx->channels + 1) >> 1));
     //TODO: psy model selection with some option
     ff_aac_psy_init(&s->psy, avctx, AAC_PSY_3GPP, 0, s->swb_sizes1024, s->swb_num1024, s->swb_sizes128, s->swb_num128);
     avctx->extradata = av_malloc(2);
@@ -300,6 +303,45 @@ static void analyze(AVCodecContext *avct
 }
 
 /**
+ * Encode channel layout (aka program config element).
+ * @see table 4.2
+ */
+static void put_program_config_element(AVCodecContext *avctx, AACEncContext *s)
+{
+    int i;
+    ProgramConfig *pc = &s->pc;
+
+    put_bits(&s->pb, 2, 0); //object type - ?
+    put_bits(&s->pb, 4, s->samplerate_index); //sample rate index
+
+    put_bits(&s->pb, 4, avctx->channels/2); // all channels are front :)
+    put_bits(&s->pb, 4, 0); // no side channels
+    put_bits(&s->pb, 4, 0); // no back channels
+    put_bits(&s->pb, 2, 0); // no LFE
+    put_bits(&s->pb, 3, 0); // no associated data
+    put_bits(&s->pb, 4, 0); // no valid channel couplings
+
+    put_bits(&s->pb, 1, pc->mono_mixdown);
+    if(pc->mono_mixdown)
+        put_bits(&s->pb, 4, pc->mixdown_coeff_index);
+    put_bits(&s->pb, 1, pc->stereo_mixdown);
+    if(pc->stereo_mixdown)
+        put_bits(&s->pb, 4, pc->mixdown_coeff_index);
+    put_bits(&s->pb, 1, pc->matrix_mixdown);
+    if(pc->matrix_mixdown){
+        put_bits(&s->pb, 2, pc->mixdown_coeff_index);
+        put_bits(&s->pb, 1, pc->pseudo_surround);
+    }
+    //TODO: proper channel map output
+    for(i = 0; i < avctx->channels; i += 2){
+        put_bits(&s->pb, 1, 1); // channel is CPE
+        put_bits(&s->pb, 4, i/2);
+    }
+    align_put_bits(&s->pb);
+    put_bits(&s->pb, 8, 0); // no commentary bytes
+}
+
+/**
  * Encode ics_info element.
  * @see Table 4.6
  */
@@ -654,45 +696,47 @@ static int aac_encode_frame(AVCodecConte
                             uint8_t *frame, int buf_size, void *data)
 {
     AACEncContext *s = avctx->priv_data;
-    int16_t *samples = s->samples;
+    int16_t *samples = s->samples, *samples2;
+    ChannelElement *cpe;
+    int i, j, chans;
 
     if(!samples){
         s->samples = av_malloc(1024 * avctx->channels * sizeof(s->samples[0]));
         memcpy(s->samples, data, 1024 * avctx->channels * sizeof(s->samples[0]));
         return 0;
     }
-    ff_aac_psy_suggest_window(&s->psy, samples, data, 0, &s->cpe);
-
-    analyze(avctx, s, &s->cpe, samples, 0);
-    if(avctx->channels > 1)
-        analyze(avctx, s, &s->cpe, samples, 1);
-
-    ff_aac_psy_analyze(&s->psy, 0, &s->cpe);
 
     init_put_bits(&s->pb, frame, buf_size*8);
     if(avctx->frame_number==1 && !(avctx->flags & CODEC_FLAG_BITEXACT)){
         put_bitstream_info(avctx, s, LIBAVCODEC_IDENT);
     }
-    switch(avctx->channels){
-    case 1:
-        put_bits(&s->pb, 3, ID_SCE);
-        put_bits(&s->pb, 4, 0); //tag
-        encode_individual_channel(avctx, &s->cpe, 0);
-        break;
-    case 2:
-        put_bits(&s->pb, 3, ID_CPE);
-        put_bits(&s->pb, 4, 0); //tag
-        put_bits(&s->pb, 1, s->cpe.common_window);
-        if(s->cpe.common_window){
-            put_ics_info(avctx, &s->cpe.ch[0].ics);
-            encode_ms_info(&s->pb, &s->cpe);
+    //encode channels as channel pairs and one optional single channel element
+    /*if(avctx->channels > 2){
+        put_bits(&s->pb, 3, ID_PCE);
+        put_bits(&s->pb, 4, 0);
+        put_program_config_element(avctx, s);
+    }*/
+    for(i = 0; i < avctx->channels; i += 2){
+        chans = FFMIN(avctx->channels - i, 2);
+        cpe = &s->cpe[i/2];
+        samples2 = samples + i;
+        ff_aac_psy_suggest_window(&s->psy, samples2, data, i, cpe);
+        for(j = 0; j < chans; j++){
+            analyze(avctx, s, cpe, samples2, j);
+        }
+        ff_aac_psy_analyze(&s->psy, i, cpe);
+        put_bits(&s->pb, 3, chans > 1 ? ID_CPE : ID_SCE);
+        put_bits(&s->pb, 4, i >> 1);
+        if(chans == 2){
+            put_bits(&s->pb, 1, cpe->common_window);
+            if(cpe->common_window){
+                put_ics_info(avctx, &cpe->ch[0].ics);
+                encode_ms_info(&s->pb, cpe);
+            }
+        }
+        for(j = 0; j < chans; j++){
+            encode_individual_channel(avctx, cpe, j);
         }
-        encode_individual_channel(avctx, &s->cpe, 0);
-        encode_individual_channel(avctx, &s->cpe, 1);
-        break;
-    default:
-        av_log(avctx, AV_LOG_ERROR, "Unsupported number of channels: %d\n", avctx->channels);
-        return -1;
     }
 
     put_bits(&s->pb, 3, ID_END);
@@ -713,6 +757,7 @@ static av_cold int aac_encode_end(AVCode
     ff_mdct_end(&s->mdct128);
     ff_aac_psy_end(&s->psy);
     av_freep(&s->samples);
+    av_freep(&s->cpe);
     return 0;
 }
 

Modified: aacenc/aacpsy.c
==============================================================================
--- aacenc/aacpsy.c	(original)
+++ aacenc/aacpsy.c	Mon Jul 28 15:22:53 2008
@@ -76,13 +76,14 @@ static inline float calc_distortion(floa
 /**
  * Produce integer coefficients from scalefactors provided by model.
  */
-static void psy_create_output(AACPsyContext *apc, ChannelElement *cpe, int search_pulses)
+static void psy_create_output(AACPsyContext *apc, ChannelElement *cpe, int channel, int search_pulses)
 {
     int i, w, w2, g, ch;
     int start, sum, maxsfb, cmaxsfb;
     int pulses, poff[4], pamp[4];
+    int chans = FFMIN(apc->avctx->channels - channel, 2);
 
-    for(ch = 0; ch < apc->avctx->channels; ch++){
+    for(ch = 0; ch < chans; ch++){
         start = 0;
         maxsfb = 0;
         cpe->ch[ch].pulse.present = 0;
@@ -152,7 +153,7 @@ static void psy_create_output(AACPsyCont
         }
     }
 
-    if(apc->avctx->channels > 1 && cpe->common_window){
+    if(apc->avctx->channels - channel > 1 && cpe->common_window){
         int msc = 0;
         cpe->ch[0].ics.max_sfb = FFMAX(cpe->ch[0].ics.max_sfb, cpe->ch[1].ics.max_sfb);
         cpe->ch[1].ics.max_sfb = cpe->ch[0].ics.max_sfb;
@@ -167,8 +168,9 @@ static void psy_create_output(AACPsyCont
 static void psy_null_window(AACPsyContext *apc, int16_t *audio, int16_t *la, int channel, ChannelElement *cpe)
 {
     int ch;
+    int chans = FFMIN(apc->avctx->channels - channel, 2);
 
-    for(ch = 0; ch < apc->avctx->channels; ch++){
+    for(ch = 0; ch < chans; ch++){
         cpe->ch[ch].ics.window_sequence = ONLY_LONG_SEQUENCE;
         cpe->ch[ch].ics.use_kb_window[0] = 1;
         cpe->ch[ch].ics.num_windows = 1;
@@ -184,8 +186,9 @@ static void psy_null_process(AACPsyConte
     int start;
     int ch, g, i;
     int minscale;
+    int chans = FFMIN(apc->avctx->channels - channel, 2);
 
-    for(ch = 0; ch < apc->avctx->channels; ch++){
+    for(ch = 0; ch < chans; ch++){
         start = 0;
         for(g = 0; g < apc->num_bands1024; g++){
             float energy = 0.0f, ffac = 0.0f, thr, dist;
@@ -207,7 +210,7 @@ static void psy_null_process(AACPsyConte
             }
         }
     }
-    for(ch = 0; ch < apc->avctx->channels; ch++){
+    for(ch = 0; ch < chans; ch++){
         minscale = 255;
         for(g = 0; g < apc->num_bands1024; g++)
             if(!cpe->ch[ch].zeroes[0][g])
@@ -217,14 +220,15 @@ static void psy_null_process(AACPsyConte
             if(!cpe->ch[ch].zeroes[0][g])
                 cpe->ch[ch].sf_idx[0][g] = FFMIN(minscale + SCALE_MAX_DIFF, cpe->ch[ch].sf_idx[0][g]);
     }
-    psy_create_output(apc, cpe, 1);
+    psy_create_output(apc, cpe, channel, 1);
 }
 
 static void psy_null8_window(AACPsyContext *apc, int16_t *audio, int16_t *la, int channel, ChannelElement *cpe)
 {
     int ch, i;
+    int chans = FFMIN(apc->avctx->channels - channel, 2);
 
-    for(ch = 0; ch < apc->avctx->channels; ch++){
+    for(ch = 0; ch < chans; ch++){
         int prev_seq = cpe->ch[ch].ics.window_sequence_prev;
         cpe->ch[ch].ics.use_kb_window[1] = cpe->ch[ch].ics.use_kb_window[0];
         cpe->ch[ch].ics.window_sequence_prev = cpe->ch[ch].ics.window_sequence;
@@ -257,9 +261,10 @@ static void psy_null8_process(AACPsyCont
 {
     int start;
     int w, ch, g, i;
+    int chans = FFMIN(apc->avctx->channels - channel, 2);
 
     //detect M/S
-    if(apc->avctx->channels > 1 && cpe->common_window){
+    if(chans > 1 && cpe->common_window){
         start = 0;
         for(w = 0; w < cpe->ch[0].ics.num_windows; w++){
             for(g = 0; g < cpe->ch[0].ics.num_swb; g++){
@@ -271,7 +276,7 @@ static void psy_null8_process(AACPsyCont
             }
         }
     }
-    for(ch = 0; ch < apc->avctx->channels; ch++){
+    for(ch = 0; ch < chans; ch++){
         cpe->ch[ch].gain = SCALE_ONE_POS;
         for(w = 0; w < cpe->ch[ch].ics.num_windows; w++){
             for(g = 0; g < cpe->ch[ch].ics.num_swb; g++){
@@ -280,7 +285,7 @@ static void psy_null8_process(AACPsyCont
             }
         }
     }
-    psy_create_output(apc, cpe, 0);
+    psy_create_output(apc, cpe, channel, 0);
 }
 
 /**
@@ -422,9 +427,10 @@ static av_cold int psy_3gpp_init(AACPsyC
 static void psy_3gpp_window(AACPsyContext *apc, int16_t *audio, int16_t *la, int channel, ChannelElement *cpe)
 {
     int ch;
+    int chans = FFMIN(apc->avctx->channels - channel, 2);
 
 //XXX: stub, because encoder does not support long to short window transition yet :(
-    for(ch = 0; ch < apc->avctx->channels; ch++){
+    for(ch = 0; ch < chans; ch++){
         cpe->ch[ch].ics.window_sequence = ONLY_LONG_SEQUENCE;
         cpe->ch[ch].ics.use_kb_window[0] = 1;
         cpe->ch[ch].ics.num_windows = 1;
@@ -479,9 +485,10 @@ static void psy_3gpp_process(AACPsyConte
     Psy3gppContext *pctx = (Psy3gppContext*) apc->model_priv_data;
     float stereo_att, pe_target;
     int bits_avail;
+    const int chans = FFMIN(apc->avctx->channels - channel, 2);
 
     //calculate and apply stereo attenuation factor - 5.2
-    if(apc->avctx->channels > 1){
+    if(apc->avctx->channels - channel > 1){
         float l, r;
         stereo_att = 1.0 / 2.0; //XXX: find some way to determine it
         for(i = 0; i < 1024; i++){
@@ -494,7 +501,7 @@ static void psy_3gpp_process(AACPsyConte
 
     //calculate energies, initial thresholds and related values - 5.4.2
     memset(pctx->band, 0, sizeof(pctx->band));
-    for(ch = 0; ch < apc->avctx->channels; ch++){
+    for(ch = 0; ch < chans; ch++){
         start = 0;
         cpe->ch[ch].gain = 0;
         for(w = 0; w < cpe->ch[ch].ics.num_windows; w++){
@@ -517,7 +524,7 @@ static void psy_3gpp_process(AACPsyConte
     }
 
     //modify thresholds - spread, threshold in quiet - 5.4.3
-    for(ch = 0; ch < apc->avctx->channels; ch++){
+    for(ch = 0; ch < chans; ch++){
         for(w = 0; w < cpe->ch[ch].ics.num_windows; w++){
             for(g = 1; g < cpe->ch[ch].ics.num_swb; g++){
                 g2 = w*16 + g;
@@ -540,7 +547,7 @@ static void psy_3gpp_process(AACPsyConte
     }
 
     // M/S detection - 5.5.2
-    if(apc->avctx->channels > 1 && cpe->common_window){
+    if(chans > 1 && cpe->common_window){
         start = 0;
         for(w = 0; w < cpe->ch[0].ics.num_windows; w++){
             for(g = 0; g < cpe->ch[0].ics.num_swb; g++){
@@ -571,7 +578,7 @@ static void psy_3gpp_process(AACPsyConte
         }
     }
 
-    for(ch = 0; ch < apc->avctx->channels; ch++){
+    for(ch = 0; ch < chans; ch++){
         pctx->a[ch] = pctx->b[ch] = pctx->pe[ch] = pctx->thr[ch] = 0.0f;
         for(w = 0; w < cpe->ch[ch].ics.num_windows; w++){
             for(g = 0; g < cpe->ch[ch].ics.num_swb; g++){
@@ -594,7 +601,7 @@ static void psy_3gpp_process(AACPsyConte
     bits_avail = pctx->avg_bits + pctx->reservoir;
     bits_avail = FFMIN(bits_avail, pctx->avg_bits * 1.5);
     pe_target = 1.18f * bits_avail / apc->avctx->channels;
-    for(ch = 0; ch < apc->avctx->channels; ch++){
+    for(ch = 0; ch < chans; ch++){
         float t0, pe, r;
         if(pctx->b[ch] == 0.0f) continue;
         for(i = 0; i < 2; i++){
@@ -622,7 +629,7 @@ static void psy_3gpp_process(AACPsyConte
     }
 
     //determine scalefactors - 5.6.2
-    for(ch = 0; ch < apc->avctx->channels; ch++){
+    for(ch = 0; ch < chans; ch++){
         int min_scale = 256;
         prev_scale = -1;
         cpe->ch[ch].gain = 0;
@@ -655,7 +662,7 @@ static void psy_3gpp_process(AACPsyConte
     }
 
     memcpy(pctx->prev_band, pctx->band, sizeof(pctx->band));
-    psy_create_output(apc, cpe, 0);
+    psy_create_output(apc, cpe, channel, 0);
 }
 
 static av_cold void psy_3gpp_end(AACPsyContext *apc)

Modified: aacenc/aacpsy.h
==============================================================================
--- aacenc/aacpsy.h	(original)
+++ aacenc/aacpsy.h	Mon Jul 28 15:22:53 2008
@@ -68,6 +68,21 @@ typedef struct {
     int amp[4];
 } Pulse;
 
+#define MAX_TAGID 16
+
+/**
+ * Program configuration - describes how channels are arranged. Either read from
+ * stream (ID_PCE) or created based on a default fixed channel arrangement.
+ */
+typedef struct {
+    int che_type[4][MAX_TAGID]; ///< channel element type with the first index as the first 4 raw_data_block IDs
+    int mono_mixdown;           ///< The SCE tag to use if user requests mono   output, -1 if not available.
+    int stereo_mixdown;         ///< The CPE tag to use if user requests stereo output, -1 if not available.
+    int matrix_mixdown;         ///< The CPE tag to use if user requests matrixed stereo output, -1 if not available.
+    int mixdown_coeff_index;    ///< 0-3
+    int pseudo_surround;        ///< Mix surround channels out of phase.
+} ProgramConfig;
+
 /**
  * Individual Channel Stream
  */



More information about the FFmpeg-soc mailing list