[FFmpeg-devel] [PATCH 09/11] aaccoder: add a new perceptual noise substitution implementation

Rostislav Pehlivanov atomnuker at gmail.com
Fri Jun 26 22:16:38 CEST 2015


This commit finalizes the PNS implementation previously added to the encoder by moving it to a seperate function search_for_pns() and thus making it coder-generic. This new implementation makes use of the spread field of the psy bands and the lambda quality feedback paremeter. The spread of the spectrum in a band prevents PNS from being used in tonal bands.  The lambda parameter allows the number of PNS-marked bands to vary based on the lambda parameter and the amount of bits available, making better choices on which bands are to be marked as noise. Comparisons with the previous PNS implementation can be found here: https://trac.ffmpeg.org/attachment/wiki/Encode/AAC/
---
 libavcodec/aaccoder.c | 38 ++++++++++++++++++++++++++++++++++++++
 libavcodec/aacenc.c   |  6 ++++++
 libavcodec/aacenc.h   |  1 +
 3 files changed, 45 insertions(+)

diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index 10c64c0..3cd9fa2 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -1144,6 +1144,40 @@ static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
                 sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
 }
 
+static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce,
+                           const float lambda)
+{
+    int start = 0, w, w2, g;
+    const float freq_mult = avctx->sample_rate/(1024.0f/sce->ics.num_windows)/2.0f;
+
+    /* Coders !twoloop don't reset the band_types */
+    for (w = 0; w < 128; w++)
+        if (sce->band_type[w] == NOISE_BT)
+            sce->band_type[w] = 0;
+
+    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+        for (g = 0;  g < sce->ics.num_swb; g++) {
+            if (start*freq_mult > NOISE_LOW_LIMIT) {
+                float energy = 0.0f, threshold = 0.0f, spread = 0.0f;
+                for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+                    FFPsyBand *band = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
+                    energy += band->energy;
+                    threshold += band->threshold;
+                    spread += band->spread;
+                }
+                if (spread > NOISE_SPREAD_THRESHOLD &&
+                    ((sce->zeroes[w*16+g] && energy >= threshold) ||
+                    energy < threshold*(NOISE_LAMBDA_NUMERATOR/lambda))) {
+                    sce->band_type[w*16+g] = NOISE_BT;
+                    sce->pns_ener[w*16+g] = energy;
+                    sce->zeroes[w*16+g] = 0;
+                }
+            }
+            start += sce->ics.swb_sizes[g];
+        }
+    }
+}
+
 static void search_for_ms(AACEncContext *s, ChannelElement *cpe,
                           const float lambda)
 {
@@ -1211,6 +1245,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
         encode_window_bands_info,
         quantize_and_encode_band,
         set_special_band_scalefactors,
+        search_for_pns,
         search_for_ms,
     },
     [AAC_CODER_ANMR] = {
@@ -1218,6 +1253,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
         encode_window_bands_info,
         quantize_and_encode_band,
         set_special_band_scalefactors,
+        search_for_pns,
         search_for_ms,
     },
     [AAC_CODER_TWOLOOP] = {
@@ -1225,6 +1261,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
         codebook_trellis_rate,
         quantize_and_encode_band,
         set_special_band_scalefactors,
+        search_for_pns,
         search_for_ms,
     },
     [AAC_CODER_FAST] = {
@@ -1232,6 +1269,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
         encode_window_bands_info,
         quantize_and_encode_band,
         set_special_band_scalefactors,
+        search_for_pns,
         search_for_ms,
     },
 };
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 562d0cc..3fc0a1f 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -658,6 +658,12 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                     }
                 }
             }
+            if (s->options.pns && s->coder->search_for_pns) {
+                for (ch = 0; ch < chans; ch++) {
+                    s->cur_channel = start_ch + ch;
+                    s->coder->search_for_pns(s, avctx, &cpe->ch[ch], s->lambda);
+                }
+            }
             s->cur_channel = start_ch;
             if (s->options.stereo_mode && cpe->common_window) {
                 if (s->options.stereo_mode > 0) {
diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
index cc1b06a..8ba5817 100644
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h
@@ -55,6 +55,7 @@ typedef struct AACCoefficientsEncoder {
     void (*quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, int size,
                                      int scale_idx, int cb, const float lambda);
     void (*set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce);
+    void (*search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce, const float lambda);
     void (*search_for_ms)(struct AACEncContext *s, ChannelElement *cpe, const float lambda);
 } AACCoefficientsEncoder;
 
-- 
2.1.4



More information about the ffmpeg-devel mailing list