[FFmpeg-soc] [soc]: r2430 - in aacenc: aac_enc.patch aacenc.c aacpsy.c aacpsy.h

kostya subversion at mplayerhq.hu
Sat Jun 14 06:57:57 CEST 2008


Author: kostya
Date: Sat Jun 14 06:57:56 2008
New Revision: 2430

Log:
Make psychoacoustic model less dependent from encoder and selectable.
For now, simple model used by encoder was copied into null psy model.


Added:
   aacenc/aacpsy.c
   aacenc/aacpsy.h
Modified:
   aacenc/aac_enc.patch
   aacenc/aacenc.c

Modified: aacenc/aac_enc.patch
==============================================================================
--- aacenc/aac_enc.patch	(original)
+++ aacenc/aac_enc.patch	Sat Jun 14 06:57:56 2008
@@ -6,7 +6,7 @@ index d4f6d1c..0ed9057 100644
  
  OBJS-$(CONFIG_ENCODERS)                += faandct.o jfdctfst.o jfdctint.o
  
-+OBJS-$(CONFIG_AAC_ENCODER)             += aacenc.o mdct.o fft.o mpeg4audio.o
++OBJS-$(CONFIG_AAC_ENCODER)             += aacenc.o aacpsy.o mdct.o fft.o mpeg4audio.o
  OBJS-$(CONFIG_AASC_DECODER)            += aasc.o
  OBJS-$(CONFIG_AC3_DECODER)             += ac3dec.o ac3tab.o ac3.o mdct.o fft.o
  OBJS-$(CONFIG_AC3_ENCODER)             += ac3enc.o ac3tab.o ac3.o

Modified: aacenc/aacenc.c
==============================================================================
--- aacenc/aacenc.c	(original)
+++ aacenc/aacenc.c	Sat Jun 14 06:57:56 2008
@@ -29,6 +29,8 @@
 #include "dsputil.h"
 #include "mpeg4audio.h"
 
+#include "aacpsy.h"
+
 // XXX: borrowed from aac.c, move to some header eventually
 
 #include "aactab.h"
@@ -131,65 +133,6 @@ static const struct {
     {   -1, NULL  , NULL  , 0 }, // intensity in-phase
 };
 
-// data structures borrowed from aac.c with some minor modifications
-
-/**
- * Individual Channel Stream
- */
-typedef struct {
-    int intensity_present;
-    int max_sfb;
-    int window_sequence;
-    int window_shape;             ///< If set, use Kaiser-Bessel window, otherwise use a sinus window
-    int window_shape_prev;
-    int num_window_groups;
-    uint8_t grouping;
-    uint8_t group_len[8];
-    const uint8_t *swb_sizes;
-    int num_swb;
-    int num_windows;
-    int tns_max_bands;
-} ics_struct;
-
-/**
- * M/S joint channel coding
- */
-typedef struct {
-    int present;
-    uint8_t mask[8][64];
-} ms_struct;
-
-/**
- * Single Channel Element
- * Used for both SCE and LFE elements
- */
-typedef struct {
-    int gain;                                 /**< Channel gain (not used by AAC bitstream).
-                                               *   Note that this is applied before joint stereo decoding.
-                                               *   Thus, when used inside CPE elements, both channels must have equal gain.
-                                               */
-    ics_struct ics;
-    int zeroes[64];
-    int sf_idx[64];
-    int cb[8][64];                            ///< Codebooks
-    float sf[8][64];                          ///< Scalefactors
-    DECLARE_ALIGNED_16(float, coeffs[1024]);  ///< Coefficients for IMDCT
-    DECLARE_ALIGNED_16(float, saved[1024]);   ///< Overlap
-    DECLARE_ALIGNED_16(float, ret[1024]);     ///< PCM output
-    DECLARE_ALIGNED_16(int,   icoefs[1024]);  ///< integer coefficients for coding
-} sce_struct;
-
-/**
- * Channel Pair Element
- */
-typedef struct {
-    int common_window;     ///< Set if channels share a common 'ics_struct' in bitstream
-    ms_struct ms;
-    sce_struct ch[2];
-} cpe_struct;
-
-// borrowing temporarily ends here
-
 typedef struct {
     PutBitContext pb;
     MDCTContext mdct;
@@ -201,6 +144,7 @@ typedef struct {
     uint8_t *swb_sizes;
     int swb_num;
     cpe_struct cpe;
+    AACPsyContext psy;
 } AACEncContext;
 
 #define SCALE_ONE_POS   140
@@ -208,9 +152,6 @@ typedef struct {
 #define SCALE_MAX_DIFF   60
 #define SCALE_DIFF_ZERO  60
 
-//borrowed from aac.c
-static float pow2sf_tab[316];
-
 /**
  * Make AAC audio config object.
  * @see 1.6.2.1
@@ -253,66 +194,14 @@ static int aac_encode_init(AVCodecContex
     // window init
     ff_kbd_window_init(s->kbd_long_1024, 4.0, 1024);
 
+    ff_aac_psy_init(&s->psy, avctx, AAC_PSY_NULL, 0, s->swb_sizes, s->swb_num);
     avctx->extradata = av_malloc(2);
     avctx->extradata_size = 2;
     put_audio_specific_config(avctx);
 
-    for (i = 0; i < 316; i++)
-        pow2sf_tab[i] = pow(2, (i - 200)/4.);
     return 0;
 }
 
-static void determine_scales(AVCodecContext *avctx, cpe_struct *cpe, int channel)
-{
-    AACEncContext *s = avctx->priv_data;
-    int i = 0, j, g, count = 0, maxswb;
-    double me, d;
-
-    cpe->ch[channel].ics.swb_sizes = s->swb_sizes;
-    cpe->ch[channel].ics.num_swb = s->swb_num;
-    for(g = 0; g < s->swb_num; g++){
-        me = 0.0;
-        d = 0.0;
-        for(j = 0; j < s->swb_sizes[g]; j++)
-            if(cpe->ch[channel].coeffs[i + j] != 0.0){
-                me += fabs(cpe->ch[channel].coeffs[i + j]);
-                count++;
-            }
-        if(count)
-            me /= count;
-        for(j = 0; j < cpe->ch[channel].ics.swb_sizes[g]; j++)
-            if(cpe->ch[channel].coeffs[i + j] != 0.0)
-                d += (cpe->ch[channel].coeffs[i + j] - me) * (cpe->ch[channel].coeffs[i + j] - me);
-        if(count)
-            d /= count;
-        cpe->ch[channel].zeroes[g] = (me < 0.1 && d < 0.1);
-        cpe->ch[channel].sf_idx[g] = SCALE_ONE_POS + g;
-        i += cpe->ch[channel].ics.swb_sizes[g];
-    }
-    cpe->ch[channel].gain = SCALE_ONE_POS;
-    for(maxswb = s->swb_num; maxswb > 0 && cpe->ch[channel].zeroes[maxswb-1]; maxswb--);
-    cpe->ch[channel].ics.max_sfb = maxswb;
-    cpe->ch[channel].ics.window_sequence = 0;
-    cpe->ch[channel].ics.window_shape = 1;
-}
-
-/* BIG FAT TODO! */
-/* for now it just converts spectra to integer form */
-static void apply_psychoacoustics(AVCodecContext *avctx, cpe_struct *cpe, int channel)
-{
-    AACEncContext *s = avctx->priv_data;
-    int i = 0, j, g;
-
-    for(g = 0; g < cpe->ch[channel].ics.max_sfb; g++)
-        if(cpe->ch[channel].zeroes[g]){
-            memset(cpe->ch[channel].icoefs + i, 0, cpe->ch[channel].ics.swb_sizes[g] * sizeof(cpe->ch[0].icoefs[0]));
-            i += cpe->ch[channel].ics.swb_sizes[g];
-        }else
-            for(j = 0; j < cpe->ch[channel].ics.swb_sizes[g]; j++, i++)
-                cpe->ch[channel].icoefs[i] = (int)(roundf(cpe->ch[channel].coeffs[i] / pow2sf_tab[cpe->ch[channel].sf_idx[g]+60]));
-    memset(cpe->ch[channel].icoefs + i, 0, (1024 - i) * sizeof(cpe->ch[channel].icoefs[0]));
-}
-
 static void analyze(AVCodecContext *avctx, AACEncContext *s, cpe_struct *cpe, short *audio, int channel)
 {
     int i, j;
@@ -328,13 +217,6 @@ static void analyze(AVCodecContext *avct
     //convert coefficients into form used by AAC
     for(i = 0; i < 1024; i++)
         cpe->ch[channel].coeffs[i] = -copysignf(pow(fabsf(cpe->ch[channel].coeffs[i]), 0.75f), cpe->ch[channel].coeffs[i]);
-
-    determine_scales(avctx, cpe, channel);
-    if(channel == 1){
-        cpe->ch[0].ics.max_sfb = FFMAX(cpe->ch[0].ics.max_sfb, cpe->ch[1].ics.max_sfb);
-        cpe->common_window = 1;
-    }
-    apply_psychoacoustics(avctx, cpe, channel);
 }
 
 /**
@@ -513,10 +395,21 @@ static int aac_encode_frame(AVCodecConte
     AACEncContext *s = avctx->priv_data;
     int16_t *samples = data;
 
+    ff_aac_psy_suggest_window(&s->psy, samples, 0, &s->cpe);
+
     analyze(avctx, s, &s->cpe, samples, 0);
     if(avctx->channels > 1)
         analyze(avctx, s, &s->cpe, samples, 1);
 
+    ff_aac_psy_analyze(&s->psy, samples, 0, &s->cpe);
+    if(avctx->channels > 1){
+        s->cpe.common_window = s->cpe.ch[0].ics.window_shape == s->cpe.ch[1].ics.window_shape;
+        if(s->cpe.common_window){
+            s->cpe.ch[0].ics.max_sfb = FFMAX(s->cpe.ch[0].ics.max_sfb, s->cpe.ch[1].ics.max_sfb);
+            s->cpe.ch[1].ics.max_sfb = s->cpe.ch[0].ics.max_sfb;
+        }
+    }
+
     init_put_bits(&s->pb, frame, buf_size*8);
     //output encoded
     switch(avctx->channels){
@@ -550,6 +443,7 @@ static int aac_encode_end(AVCodecContext
     AACEncContext *s = avctx->priv_data;
 
     ff_mdct_end(&s->mdct);
+    ff_aac_psy_end(&s->psy);
     return 0;
 }
 

Added: aacenc/aacpsy.c
==============================================================================
--- (empty file)
+++ aacenc/aacpsy.c	Sat Jun 14 06:57:56 2008
@@ -0,0 +1,121 @@
+/*
+ * AAC encoder psychoacoustic model
+ * Copyright (C) 2008 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file aacpsy.c
+ * AAC encoder psychoacoustic model
+ */
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "aacpsy.h"
+
+//borrowed from aac.c
+static float pow2sf_tab[316];
+
+
+#define SCALE_ONE_POS   140
+#define SCALE_MAX_POS   255
+#define SCALE_MAX_DIFF   60
+
+static void psy_null_window(AACPsyContext *apc, int16_t *audio, int channel, cpe_struct *cpe)
+{
+    int ch;
+
+    for(ch = 0; ch < apc->avctx->channels; ch++){
+        cpe->ch[ch].ics.window_sequence = 0;
+        cpe->ch[ch].ics.window_shape = 1;
+    }
+}
+
+static void psy_null_process(AACPsyContext *apc, int16_t *audio, int channel, cpe_struct *cpe)
+{
+    int start, sum, maxsfb;
+    int ch, g, i;
+
+    for(ch = 0; ch < apc->avctx->channels; ch++){
+        start = 0;
+        cpe->ch[ch].gain = SCALE_ONE_POS;
+        for(g = 0; g < apc->num_bands; g++){
+            sum = 0;
+            cpe->ch[ch].sf_idx[g] = SCALE_ONE_POS;
+            for(i = 0; i < apc->bands[g]; i++){
+                cpe->ch[ch].icoefs[start+i] = av_clip((int)(roundf(cpe->ch[ch].coeffs[start+i] / pow2sf_tab[cpe->ch[ch].sf_idx[g]+60])), -8191, 8191);
+                sum += !!cpe->ch[ch].icoefs[start+i];
+            }
+            cpe->ch[ch].zeroes[g] = !sum;
+            start += apc->bands[g];
+        }
+        for(maxsfb = apc->num_bands; maxsfb > 0 && cpe->ch[ch].zeroes[maxsfb-1]; maxsfb--);
+        cpe->ch[ch].ics.max_sfb = maxsfb;
+    }
+}
+
+static const AACPsyModel psy_models[AAC_NB_PSY_MODELS] =
+{
+    {
+       "Null model",
+        NULL,
+        psy_null_window,
+        psy_null_process,
+        NULL,
+    },
+};
+
+int ff_aac_psy_init(AACPsyContext *ctx, AVCodecContext *avctx, int model, int flags,
+                    const uint8_t *bands, int num_bands)
+{
+    int i;
+
+    if(model >= AAC_NB_PSY_MODELS || !psy_models[model].window || !psy_models[model].process){
+         av_log(avctx, AV_LOG_ERROR, "Invalid psy model\n");
+         return -1;
+    }
+
+    for (i = 0; i < 316; i++)
+        pow2sf_tab[i] = pow(2, (i - 200)/4.);
+
+    ctx->avctx = avctx;
+    ctx->bands = bands;
+    ctx->num_bands = num_bands;
+    dsputil_init(&ctx->dsp, avctx);
+    ctx->model = &psy_models[model];
+
+    if(ctx->model->init)
+        return ctx->model->init(ctx);
+    return 0;
+}
+
+void ff_aac_psy_suggest_window(AACPsyContext *ctx, int16_t *audio, int channel, cpe_struct *cpe)
+{
+    ctx->model->window(ctx, audio, channel, cpe);
+}
+
+void ff_aac_psy_analyze(AACPsyContext *ctx, int16_t *audio, int channel, cpe_struct *cpe)
+{
+    ctx->model->process(ctx, audio, channel, cpe);
+}
+
+void ff_aac_psy_end(AACPsyContext *ctx)
+{
+    if(ctx->model->end)
+        return ctx->model->end(ctx);
+}

Added: aacenc/aacpsy.h
==============================================================================
--- (empty file)
+++ aacenc/aacpsy.h	Sat Jun 14 06:57:56 2008
@@ -0,0 +1,123 @@
+/*
+ * AAC encoder psychoacoustic model
+ * Copyright (C) 2008 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef FFMPEG_AACPSY_H
+#define FFMPEG_AACPSY_H
+
+#include "avcodec.h"
+#include "dsputil.h"
+
+enum AACPsyModelType{
+    AAC_PSY_NULL,              // do nothing on frequencies
+
+    AAC_NB_PSY_MODELS
+};
+
+// data structures borrowed from aac.c with some minor modifications
+
+/**
+ * Individual Channel Stream
+ */
+typedef struct {
+    int intensity_present;
+    int max_sfb;
+    int window_sequence;
+    int window_shape;             ///< If set, use Kaiser-Bessel window, otherwise use a sinus window
+    int window_shape_prev;
+    int num_window_groups;
+    uint8_t grouping;
+    uint8_t group_len[8];
+    const uint8_t *swb_sizes;
+    int num_swb;
+    int num_windows;
+    int tns_max_bands;
+} ics_struct;
+
+/**
+ * M/S joint channel coding
+ */
+typedef struct {
+    int present;
+    uint8_t mask[8][64];
+} ms_struct;
+
+/**
+ * Single Channel Element
+ * Used for both SCE and LFE elements
+ */
+typedef struct {
+    int gain;                                 /**< Channel gain (not used by AAC bitstream).
+                                               *   Note that this is applied before joint stereo decoding.
+                                               *   Thus, when used inside CPE elements, both channels must have equal gain.
+                                               */
+    ics_struct ics;
+    int zeroes[64];
+    int sf_idx[64];
+    int cb[8][64];                            ///< Codebooks
+    float sf[8][64];                          ///< Scalefactors
+    DECLARE_ALIGNED_16(float, coeffs[1024]);  ///< Coefficients for IMDCT
+    DECLARE_ALIGNED_16(float, saved[1024]);   ///< Overlap
+    DECLARE_ALIGNED_16(float, ret[1024]);     ///< PCM output
+    DECLARE_ALIGNED_16(int,   icoefs[1024]);  ///< integer coefficients for coding
+} sce_struct;
+
+/**
+ * Channel Pair Element
+ */
+typedef struct {
+    int common_window;     ///< Set if channels share a common 'ics_struct' in bitstream
+    ms_struct ms;
+    sce_struct ch[2];
+} cpe_struct;
+
+// borrowing temporarily ends here
+
+/**
+ * context used by psychoacoustic model
+ */
+typedef struct AACPsyContext {
+    AVCodecContext *avctx;
+    DSPContext dsp;
+
+    int window_type[2];
+    int window_shape[2];
+    const uint8_t *bands;
+    int num_bands;
+
+    const struct AACPsyModel *model;
+    void* model_priv_data;
+}AACPsyContext;
+
+typedef struct AACPsyModel {
+    const char *name;
+    int   (*init)   (AACPsyContext *apc);
+    void  (*window) (AACPsyContext *apc, int16_t *audio, int channel, cpe_struct *cpe);
+    void  (*process)(AACPsyContext *apc, int16_t *audio, int channel, cpe_struct *cpe);
+    void  (*end)    (AACPsyContext *apc);
+}AACPsyModel;
+
+int ff_aac_psy_init(AACPsyContext *ctx, AVCodecContext *avctx, int model, int flags,
+                    const uint8_t *bands, int num_bands);
+void ff_aac_psy_suggest_window(AACPsyContext *ctx, int16_t *audio, int channel, cpe_struct *cpe);
+void ff_aac_psy_analyze(AACPsyContext *ctx, int16_t *audio, int channel, cpe_struct *cpe);
+void ff_aac_psy_end(AACPsyContext *ctx);
+#endif /* FFMPEG_AACPSY_H */
+



More information about the FFmpeg-soc mailing list