[FFmpeg-devel] [PATCH 10/11] aaccoder: implement intensity stereo

Rostislav Pehlivanov atomnuker at gmail.com
Fri Jun 26 22:16:39 CEST 2015


This commit implements intensity stereo band marking and makes use of the previous commits in the series to encode such bands and spectral coefficients. It also adds the new option aac_is, which is 0 by default, used to control wheter such coding is done or not. The interaction of intensity stereo and mid/side coding are again taken from the decoder, more specifically the way MS changes the phase of IS spectral coefficients.
---
 libavcodec/aaccoder.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++
 libavcodec/aacenc.c   | 11 ++++--
 libavcodec/aacenc.h   |  2 ++
 3 files changed, 110 insertions(+), 2 deletions(-)

diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index 3cd9fa2..6b64c8e 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -1178,6 +1178,101 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
     }
 }
 
+static void search_for_is(AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe,
+                          const float lambda)
+{
+    float IS[128];
+    float *L34 = s->scoefs, *R34 = s->scoefs + 128, *I34 = s->scoefs + 128*2;
+    SingleChannelElement *sce0 = &cpe->ch[0];
+    SingleChannelElement *sce1 = &cpe->ch[1];
+    int start = 0, count = 0, i, w, w2, g;
+    const float freq_mult = avctx->sample_rate/(1024.0f/sce0->ics.num_windows)/2.0f;
+
+    memset(cpe->is_mask, 0, sizeof(uint8_t)*128);
+
+    for (w = 0; w < 128; w++)
+        if (sce1->band_type[w] >= INTENSITY_BT2)
+            sce1->band_type[w] = 0;
+
+    if (!cpe->common_window)
+        return;
+    for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
+        for (g = 0;  g < sce0->ics.num_swb; g++) {
+            if (start*freq_mult > INT_STEREO_LOW_LIMIT &&
+                cpe->ch[0].band_type[w*16+g] != NOISE_BT && !cpe->ch[0].zeroes[w*16+g] &&
+                cpe->ch[1].band_type[w*16+g] != NOISE_BT && !cpe->ch[1].zeroes[w*16+g]) {
+                float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f;
+                float s_coef0 = 0.0f, s_coef1 = 0.0f, dist1 = 0.0f, dist2 = 0.0f;
+                for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
+                    FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
+                    FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
+                    float dist_spec_err = 0.0f, sener101, sener101_34;
+                    float minthr = FFMIN(band0->threshold, band1->threshold);
+                    float maxval = find_max_val(1, sce0->ics.swb_sizes[g], I34);
+                    int is_sf_idx = FFMAX(1, sce0->sf_idx[(w+w2)*16+g]-4);
+                    int is_band_type = find_min_book(maxval, is_sf_idx);
+                    for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
+                        float coef0 = sce0->pcoeffs[start+w2*128+i];
+                        float coef1 = sce1->pcoeffs[start+w2*128+i];
+                        ener0 += sqrt(coef0*coef0);
+                        ener1 += sqrt(coef1*coef1);
+                        s_coef0 += coef0;
+                        s_coef1 += coef1;
+                        ener01 += sqrt((coef0 + coef1)*(coef0 + coef1));
+                    }
+                    sener101 = ener1/ener01;
+                    sener101_34 = sqrt(sener101 * sqrt(sener101));
+                    int p = s_coef0*s_coef1 >= 0.0f ? 1 : -1;
+                    if (cpe->ms_mask[w*16+g])
+                        p *= 1 - 2 * cpe->ms_mask[w*16 + g];
+                    for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
+                        IS[i] = (sce0->pcoeffs[start+w2*128+i] + p*sce1->pcoeffs[start+w2*128+i]) * ener0/ener01;
+                    }
+                    abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
+                    abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
+                    abs_pow34_v(I34, IS,                        sce0->ics.swb_sizes[g]);
+                    dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
+                                                L34,
+                                                sce0->ics.swb_sizes[g],
+                                                sce0->sf_idx[(w+w2)*16+g],
+                                                sce0->band_type[(w+w2)*16+g],
+                                                lambda / band0->threshold, INFINITY, NULL);
+                    dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
+                                                R34,
+                                                sce1->ics.swb_sizes[g],
+                                                sce1->sf_idx[(w+w2)*16+g],
+                                                sce1->band_type[(w+w2)*16+g],
+                                                lambda / band1->threshold, INFINITY, NULL);
+                    dist2 += quantize_band_cost(s, IS,
+                                                I34,
+                                                sce0->ics.swb_sizes[g],
+                                                is_sf_idx,
+                                                is_band_type,
+                                                lambda / minthr, INFINITY, NULL);
+                    for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
+                        dist_spec_err += (L34[i] - I34[i])*(L34[i] - I34[i]);
+                        dist_spec_err += (R34[i] - I34[i] * sener101_34)*(R34[i] - I34[i] * sener101_34);
+                    }
+                    dist_spec_err *= lambda / minthr;
+                    dist2 += dist_spec_err;
+                }
+                if (dist2 <= dist1) {
+                    cpe->is_mask[w*16+g] = 1;
+                    cpe->ch[0].is_ener[w*16+g] = ener1/ener01;
+                    cpe->ch[1].is_ener[w*16+g] = ener0/ener1;
+                    if (s_coef0*s_coef1 >= 0.0f)
+                        cpe->ch[1].band_type[w*16+g] = INTENSITY_BT;
+                    else
+                        cpe->ch[1].band_type[w*16+g] = INTENSITY_BT2;
+                    count++;
+                }
+            }
+            start += sce0->ics.swb_sizes[g];
+        }
+    }
+    cpe->is_mode = !!count;
+}
+
 static void search_for_ms(AACEncContext *s, ChannelElement *cpe,
                           const float lambda)
 {
@@ -1247,6 +1342,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
         set_special_band_scalefactors,
         search_for_pns,
         search_for_ms,
+        search_for_is,
     },
     [AAC_CODER_ANMR] = {
         search_for_quantizers_anmr,
@@ -1255,6 +1351,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
         set_special_band_scalefactors,
         search_for_pns,
         search_for_ms,
+        search_for_is,
     },
     [AAC_CODER_TWOLOOP] = {
         search_for_quantizers_twoloop,
@@ -1263,6 +1360,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
         set_special_band_scalefactors,
         search_for_pns,
         search_for_ms,
+        search_for_is,
     },
     [AAC_CODER_FAST] = {
         search_for_quantizers_fast,
@@ -1271,5 +1369,6 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
         set_special_band_scalefactors,
         search_for_pns,
         search_for_ms,
+        search_for_is,
     },
 };
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 3fc0a1f..ec25a98 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -552,7 +552,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     AACEncContext *s = avctx->priv_data;
     float **samples = s->planar_samples, *samples2, *la, *overlap;
     ChannelElement *cpe;
-    int i, ch, w, g, chans, tag, start_ch, ret, ms_mode = 0;
+    int i, ch, w, g, chans, tag, start_ch, ret, ms_mode = 0, is_mode = 0;
     int chan_el_counter[4];
     FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
 
@@ -675,6 +675,10 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                     s->coder->search_for_ms(s, cpe, s->lambda);
                 }
             }
+            if (chans > 1 && s->options.intensity_stereo && s->coder->search_for_is) {
+                s->coder->search_for_is(s, avctx, cpe, s->lambda);
+                if (cpe->is_mode) is_mode = 1;
+            }
             if (s->coder->set_special_band_scalefactors) {
                 for (ch = 0; ch < chans; ch++) {
                     s->cur_channel = start_ch + ch;
@@ -703,7 +707,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
             s->psy.bitres.bits = frame_bits / s->channels;
             break;
         }
-        if (ms_mode) {
+        if (is_mode || ms_mode) {
             for (i = 0; i < s->chan_map[0]; i++) {
                 // Must restore coeffs
                 chans = tag == TYPE_CPE ? 2 : 1;
@@ -881,6 +885,9 @@ static const AVOption aacenc_options[] = {
     {"aac_pns", "Perceptual Noise Substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "aac_pns"},
         {"disable",  "Disable PNS", 0, AV_OPT_TYPE_CONST, {.i64 =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
         {"enable",   "Enable PNS (Proof of concept)",  0, AV_OPT_TYPE_CONST, {.i64 =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
+    {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "intensity_stereo"},
+        {"disable", "Disable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
+        {"enable",   "Enable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
     {NULL}
 };
 
diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
index 8ba5817..966c708 100644
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h
@@ -43,6 +43,7 @@ typedef struct AACEncOptions {
     int stereo_mode;
     int aac_coder;
     int pns;
+    int intensity_stereo;
 } AACEncOptions;
 
 struct AACEncContext;
@@ -57,6 +58,7 @@ typedef struct AACCoefficientsEncoder {
     void (*set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce);
     void (*search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce, const float lambda);
     void (*search_for_ms)(struct AACEncContext *s, ChannelElement *cpe, const float lambda);
+    void (*search_for_is)(struct AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe, const float lambda);
 } AACCoefficientsEncoder;
 
 extern AACCoefficientsEncoder ff_aac_coders[];
-- 
2.1.4



More information about the ffmpeg-devel mailing list