[FFmpeg-soc] [soc]: r3329 - aacenc/aacenc.c

kostya subversion at mplayerhq.hu
Fri Aug 15 18:55:57 CEST 2008


Author: kostya
Date: Fri Aug 15 18:55:57 2008
New Revision: 3329

Log:
(Almost) optimal band codebook selection

Modified:
   aacenc/aacenc.c

Modified: aacenc/aacenc.c
==============================================================================
--- aacenc/aacenc.c	(original)
+++ aacenc/aacenc.c	Fri Aug 15 18:55:57 2008
@@ -27,8 +27,7 @@
 /***********************************
  *              TODOs:
  * psy model selection with some option
- * change greedy codebook search into something more optimal, like Viterbi algorithm, add sane pulse detection
- * determine run lengths along with codebook
+ * add sane pulse detection
  ***********************************/
 
 #include "avcodec.h"
@@ -158,6 +157,15 @@ static const uint8_t aac_chan_configs[6]
 };
 
 /**
+ * structure used in optimal codebook search
+ */
+typedef struct BandCodingPath {
+    int prev_idx; ///< pointer to the previous path point
+    int codebook; ///< codebook for coding band run
+    int bits;     ///< number of bit needed to code given number of bands
+} BandCodingPath;
+
+/**
  * AAC encoder context
  */
 typedef struct {
@@ -177,6 +185,8 @@ typedef struct {
     ChannelElement *cpe;                         ///< channel elements
     AACPsyContext psy;                           ///< psychoacoustic model context
     int last_frame;
+    BandCodingPath path[64];                     ///< auxiliary data needed for optimal band info coding
+    int band_bits[64][12];                       ///< bits needed to encode each band with each codebook
 } AACEncContext;
 
 /**
@@ -339,78 +349,180 @@ static void encode_ms_info(PutBitContext
 }
 
 /**
- * Scan scalefactor band and determine optimal codebook for it.
+ * Return number of bits needed to write codebook run length value.
+ *
+ * @param run     run length
+ * @param bits    number of bits used to code value (5 for long frames, 3 for short frames)
+ */
+static av_always_inline int calculate_run_bits(int run, const int bits)
+{
+    int esc = (1 << bits) - 1;
+    return (1 + (run >= esc)) * bits;
+}
+
+/**
+ * Calculate the number of bits needed to code given band with given codebook.
  *
  * @param s       encoder context
  * @param cpe     channel element
  * @param channel channel number inside channel pair
  * @param win     window group start number
- * @param band    scalefactor band to analyze
  * @param start   scalefactor band position in spectral coefficients
  * @param size    scalefactor band size
+ * @param cb      codebook number
  */
-static int determine_section_info(AACEncContext *s, ChannelElement *cpe, int channel, int win, int group_len, int band, int start, int size)
+static int calculate_band_bits(AACEncContext *s, ChannelElement *cpe, int channel, int win, int group_len, int start, int size, int cb)
 {
     int i, j, w;
-    int maxval, sign;
-    int score, best, cb, bestcb, dim, idx, start2;
+    int score = 0, dim, idx, start2;
+    int range;
+
+    if(!cb) return 0;
+    cb--;
+    dim = (aac_cb_info[cb].flags & CB_PAIRS) ? 2 : 4;
+    if(aac_cb_info[cb].flags & CB_UNSIGNED)
+        range = aac_cb_info[cb].maxval + 1;
+    else
+        range = aac_cb_info[cb].maxval*2 + 1;
 
-    maxval = 0;
-    sign = 0;
-    w = win;
     start2 = start;
-    for(w = win; w < win + group_len; w++){
-        for(i = start2; i < start2 + size; i++){
-            maxval = FFMAX(maxval, FFABS(cpe->ch[channel].icoefs[i]));
-            if(cpe->ch[channel].icoefs[i] < 0) sign = 1;
+    if(aac_cb_info[cb].flags & CB_ESCAPE){
+        int coef_abs[2];
+        for(w = win; w < win + group_len; w++){
+            for(i = start2; i < start2 + size; i += dim){
+                idx = 0;
+                for(j = 0; j < dim; j++)
+                    coef_abs[j] = FFABS(cpe->ch[channel].icoefs[i+j]);
+                for(j = 0; j < dim; j++)
+                    idx = idx*17 + FFMIN(coef_abs[j], 16);
+                score += ff_aac_spectral_bits[cb][idx];
+                for(j = 0; j < dim; j++)
+                    if(cpe->ch[channel].icoefs[i+j])
+                        score++;
+                for(j = 0; j < dim; j++)
+                    if(coef_abs[j] > 15)
+                        score += av_log2(coef_abs[j]) * 2 - 4 + 1;
+            }
+            start2 += 128;
+       }
+    }else if(aac_cb_info[cb].flags & CB_UNSIGNED){
+        for(w = win; w < win + group_len; w++){
+            for(i = start2; i < start2 + size; i += dim){
+                idx = 0;
+                for(j = 0; j < dim; j++)
+                    idx = idx * range + FFABS(cpe->ch[channel].icoefs[i+j]);
+                score += ff_aac_spectral_bits[cb][idx];
+                for(j = 0; j < dim; j++)
+                     if(cpe->ch[channel].icoefs[i+j])
+                         score++;
+            }
+            start2 += 128;
+        }
+    }else{
+        for(w = win; w < win + group_len; w++){
+            for(i = start2; i < start2 + size; i += dim){
+                idx = 0;
+                for(j = 0; j < dim; j++)
+                    idx = idx * range + cpe->ch[channel].icoefs[i+j] + aac_cb_info[cb].maxval;
+                score += ff_aac_spectral_bits[cb][idx];
+            }
+            start2 += 128;
         }
-        start2 += 128;
     }
+    return score;
+}
 
-    if(maxval > 12) return 11;
-    if(!maxval) return 0;
+/**
+ * Encode band info for single window group bands.
+ */
+static void encode_window_bands_info(AACEncContext *s, ChannelElement *cpe, int channel, int win, int group_len){
+    int maxval;
+    int w, swb, cb, ccb, start, start2, size;
+    int i, j, k;
+    const int max_sfb = cpe->ch[channel].ics.max_sfb;
+    const int run_bits = cpe->ch[channel].ics.num_windows == 1 ? 5 : 3;
+    const int run_esc = (1 << run_bits) - 1;
+    int bits, idx, count;
+    int stack[64], stack_len;
 
-    for(cb = 0; cb < 12; cb++)
-        if(aac_cb_info[cb].maxval >= maxval)
-            break;
-    best = INT_MAX;
-    bestcb = 11;
-    for(; cb < 12; cb++){
-        score = 0;
-        dim = (aac_cb_info[cb].flags & CB_PAIRS) ? 2 : 4;
-        if(!band || cpe->ch[channel].band_type[win][band - 1] != cb)
-            score += 9; //that's for new codebook entry
+    start = win*128;
+    for(swb = 0; swb < max_sfb; swb++){
+        maxval = 0;
         start2 = start;
-        if(aac_cb_info[cb].flags & CB_UNSIGNED){
+        size = cpe->ch[channel].ics.swb_sizes[swb];
+        if(cpe->ch[channel].zeroes[win][swb])
+            maxval = 0;
+        else{
             for(w = win; w < win + group_len; w++){
-                for(i = start2; i < start2 + size; i += dim){
-                    idx = 0;
-                    for(j = 0; j < dim; j++)
-                        idx = idx * aac_cb_info[cb].maxval + FFABS(cpe->ch[channel].icoefs[i+j]);
-                    score += ff_aac_spectral_bits[aac_cb_info[cb].cb_num][idx];
-                    for(j = 0; j < dim; j++)
-                        if(cpe->ch[channel].icoefs[i+j])
-                            score++;
+                for(i = start2; i < start2 + size; i++){
+                    maxval = FFMAX(maxval, FFABS(cpe->ch[channel].icoefs[i]));
                 }
                 start2 += 128;
             }
-        }else{
-            for(w = win; w < win + group_len; w++){
-                for(i = start2; i < start2 + size; i += dim){
-                    idx = 0;
-                    for(j = 0; j < dim; j++)
-                        idx = idx * (aac_cb_info[cb].maxval*2 + 1) + cpe->ch[channel].icoefs[i+j] + aac_cb_info[cb].maxval;
-                    score += ff_aac_spectral_bits[aac_cb_info[cb].cb_num][idx];
+        }
+        for(cb = 0; cb < 12; cb++){
+            if(aac_cb_info[cb].maxval < maxval)
+                s->band_bits[swb][cb] = INT_MAX;
+            else
+                s->band_bits[swb][cb] = calculate_band_bits(s, cpe, channel, win, group_len, start, size, cb);
+        }
+        start += cpe->ch[channel].ics.swb_sizes[swb];
+    }
+    s->path[0].bits = 0;
+    for(i = 1; i <= max_sfb; i++)
+        s->path[i].bits = INT_MAX;
+    for(i = 0; i < max_sfb; i++){
+        for(j = 1; j <= max_sfb - i; j++){
+            bits = INT_MAX;
+            ccb = 0;
+            for(cb = 0; cb < 12; cb++){
+                int sum = 0;
+                for(k = 0; k < j; k++){
+                    if(s->band_bits[i + k][cb] == INT_MAX){
+                        sum = INT_MAX;
+                        break;
+                    }
+                    sum += s->band_bits[i + k][cb];
+                }
+                if(sum < bits){
+                    bits = sum;
+                    ccb  = cb;
                 }
-                start2 += 128;
+            }
+            assert(bits != INT_MAX);
+            bits += s->path[i].bits + calculate_run_bits(j, run_bits);
+            if(bits < s->path[i+j].bits){
+                s->path[i+j].bits     = bits;
+                s->path[i+j].codebook = ccb;
+                s->path[i+j].prev_idx = i;
             }
         }
-        if(score < best){
-            best = score;
-            bestcb = cb;
+    }
+
+    //convert resulting path from backward-linked list
+    stack_len = 0;
+    idx = max_sfb;
+    while(idx > 0){
+        stack[stack_len++] = idx;
+        idx = s->path[idx].prev_idx;
+    }
+
+    //perform actual band info encoding
+    start = 0;
+    for(i = stack_len - 1; i >= 0; i--){
+        put_bits(&s->pb, 4, s->path[stack[i]].codebook);
+        count = stack[i] - s->path[stack[i]].prev_idx;
+        for(j = 0; j < count; j++){
+            cpe->ch[channel].band_type[win][start] =  s->path[stack[i]].codebook;
+            cpe->ch[channel].zeroes[win][start]    = !s->path[stack[i]].codebook;
+            start++;
+        }
+        while(count >= run_esc){
+            put_bits(&s->pb, run_bits, run_esc);
+            count -= run_esc;
         }
+        put_bits(&s->pb, run_bits, count);
     }
-    return bestcb;
 }
 
 /**
@@ -478,35 +590,11 @@ static void encode_band_coeffs(AACEncCon
  */
 static void encode_band_info(AVCodecContext *avctx, AACEncContext *s, ChannelElement *cpe, int channel)
 {
-    int i, w, wg;
-    int bits = cpe->ch[channel].ics.num_windows == 1 ? 5 : 3;
-    int esc = (1 << bits) - 1;
-    int count;
+    int w, wg;
 
     w = 0;
     for(wg = 0; wg < cpe->ch[channel].ics.num_window_groups; wg++){
-        count = 0;
-        for(i = 0; i < cpe->ch[channel].ics.max_sfb; i++){
-            if(!i || cpe->ch[channel].band_type[w][i] != cpe->ch[channel].band_type[w][i-1]){
-                if(count){
-                    while(count >= esc){
-                        put_bits(&s->pb, bits, esc);
-                        count -= esc;
-                    }
-                    put_bits(&s->pb, bits, count);
-                }
-                put_bits(&s->pb, 4, cpe->ch[channel].band_type[w][i]);
-                count = 1;
-            }else
-                count++;
-        }
-        if(count){
-            while(count >= esc){
-                put_bits(&s->pb, bits, esc);
-                count -= esc;
-            }
-            put_bits(&s->pb, bits, count);
-        }
+        encode_window_bands_info(s, cpe, channel, w, cpe->ch[channel].ics.group_len[wg]);
         w += cpe->ch[channel].ics.group_len[wg];
     }
 }
@@ -622,23 +710,9 @@ static void encode_spectral_coeffs(AVCod
 static int encode_individual_channel(AVCodecContext *avctx, ChannelElement *cpe, int channel)
 {
     AACEncContext *s = avctx->priv_data;
-    int i, g, w, wg;
+    int g, w, wg;
     int global_gain;
 
-    w = 0;
-    for(wg = 0; wg < cpe->ch[channel].ics.num_window_groups; wg++){
-        i = w << 7;
-        for(g = 0; g < cpe->ch[channel].ics.max_sfb; g++){
-            if(!cpe->ch[channel].zeroes[w][g]){
-                cpe->ch[channel].band_type[w][g] = determine_section_info(s, cpe, channel, w, cpe->ch[channel].ics.group_len[wg], g, i, cpe->ch[channel].ics.swb_sizes[g]);
-                cpe->ch[channel].zeroes[w][g] = !cpe->ch[channel].band_type[w][g];
-            }else
-                cpe->ch[channel].band_type[w][g] = 0;
-            i += cpe->ch[channel].ics.swb_sizes[g];
-        }
-        w += cpe->ch[channel].ics.group_len[wg];
-    }
-
     //determine global gain as standard recommends - the first scalefactor value
     global_gain = 0;
     w = 0;



More information about the FFmpeg-soc mailing list