[FFmpeg-devel] [PATCH] Add decoding of > 32-bit residuals to FLAC

Wed Mar 30 13:42:37 EEST 2022

Martijn van Beurden:
> The size of residuals in a FLAC file coding for 24-bit PCM can
> exceed the range of a 32-bit signed integer. One pathological
> example with residuals exceeding [-2^33,2^33) can be found here:
> http://www.audiograaf.nl/misc_stuff/Extreme%20residual%20LPC%20order%2014.flac

Can this happen with real encoders or has this file been specifically
crafted? What is the performance impact of this patch on ordinary files?

> 
> The theorectical maximum bit depth for a residual in a FLAC file is

            ^

> 32 + 1 + 15 + 5 - 0 = 53 bit (max bit depth + extra bit for side
> channel + max lpc coeff precision + log2(max_order) - min
> lpc shift)
> 
> This patch adds detection of the possibilty of such residuals

                                           ^

> occuring and an alternate data path wide enough to handle them

       ^

> ---
>  libavcodec/flacdec.c | 107 ++++++++++++++++++++++++++++++++++++++-----
>  libavcodec/golomb.h  |  56 ++++++++++++++++++++++
>  2 files changed, 152 insertions(+), 11 deletions(-)
> 
> diff --git a/libavcodec/flacdec.c b/libavcodec/flacdec.c
> index dd6026f9de..3be1b63411 100644
> --- a/libavcodec/flacdec.c
> +++ b/libavcodec/flacdec.c
> @@ -64,6 +64,8 @@ typedef struct FLACContext {
>      uint8_t *decoded_buffer;
>      unsigned int decoded_buffer_size;
>      int buggy_lpc;                          ///< use workaround for old lavc encoded files
> +    int64_t *residual64;                    ///< to keep residuals exceeding int32_t
> +    unsigned int residual64_size;
>  
>      FLACDSPContext dsp;
>  } FLACContext;
> @@ -149,6 +151,10 @@ static int allocate_buffers(FLACContext *s)
>      if (!s->decoded_buffer)
>          return AVERROR(ENOMEM);
>  
> +    av_fast_malloc(&s->residual64, &s->residual64_size, 8*s->flac_stream_info.max_blocksize);
> +    if (!s->residual64)
> +        return AVERROR(ENOMEM);

Why not move this allocation to decode_residuals64() so that it is not
performed for ordinary files?

> +
>      ret = av_samples_fill_arrays((uint8_t **)s->decoded, NULL,
>                                   s->decoded_buffer,
>                                   s->flac_stream_info.channels,
> @@ -279,6 +285,66 @@ static int decode_residuals(FLACContext *s, int32_t *decoded, int pred_order)
>      return 0;
>  }
>  
> +static int decode_residuals64(FLACContext *s, int64_t *decoded, int pred_order)
> +{
> +    GetBitContext gb = s->gb;
> +    int i, tmp, partition, method_type, rice_order;
> +    int rice_bits, rice_esc;
> +    int samples;
> +
> +    method_type = get_bits(&gb, 2);
> +    rice_order  = get_bits(&gb, 4);
> +
> +    samples   = s->blocksize >> rice_order;
> +    rice_bits = 4 + method_type;
> +    rice_esc  = (1 << rice_bits) - 1;
> +
> +    decoded += pred_order;
> +    i        = pred_order;
> +
> +    if (method_type > 1) {
> +        av_log(s->avctx, AV_LOG_ERROR, "illegal residual coding method %d\n",
> +               method_type);
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    if (samples << rice_order != s->blocksize) {
> +        av_log(s->avctx, AV_LOG_ERROR, "invalid rice order: %i blocksize %i\n",
> +               rice_order, s->blocksize);
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    if (pred_order > samples) {
> +        av_log(s->avctx, AV_LOG_ERROR, "invalid predictor order: %i > %i\n",
> +               pred_order, samples);
> +        return AVERROR_INVALIDDATA;
> +    }

Everything in this function up until this point coincides with
decode_residuals(). So shouldn't the check for whether the 64bit
codepath is used by put here? (Would it help for this check to know
rice_bits?)

> +
> +    for (partition = 0; partition < (1 << rice_order); partition++) {
> +        tmp = get_bits(&gb, rice_bits);
> +        if (tmp == rice_esc) {
> +            tmp = get_bits(&gb, 5);
> +            for (; i < samples; i++)
> +                *decoded++ = get_sbits_long(&gb, tmp);
> +        } else {
> +            for (; i < samples; i++) {
> +                int64_t v = get_sr_golomb64_flac(&gb, tmp, 1);
> +                if (v == INT64_MAX) {
> +                    av_log(s->avctx, AV_LOG_ERROR, "invalid residual\n");
> +                    return AVERROR_INVALIDDATA;
> +                }
> +
> +                *decoded++ = v;
> +            }
> +        }
> +        i = 0;
> +    }
> +
> +    s->gb = gb;
> +
> +    return 0;
> +}
> +
>  static int decode_subframe_fixed(FLACContext *s, int32_t *decoded,
>                                   int pred_order, int bps)
>  {
> @@ -358,6 +424,21 @@ static void lpc_analyze_remodulate(SUINT32 *decoded, const int coeffs[32],
>      }
>  }
>  
> +static void lpc_residual64(int32_t *decoded, const int64_t *residual,
> +                           const int coeffs[32], int pred_order,
> +                           int qlevel, int len)
> +{
> +    int i, j;

These lines could be avoided if you declared them in the for loop (i.e.
"for (int i = pred_order;").

> +
> +    for (i = pred_order; i < len; i++, decoded++) {
> +        int64_t sum = 0;
> +        for (j = 0; j < pred_order; j++)
> +            sum += (int64_t)coeffs[j] * decoded[j];
> +        decoded[j] = residual[i] + (sum >> qlevel);
> +    }
> +
> +}
> +
>  static int decode_subframe_lpc(FLACContext *s, int32_t *decoded, int pred_order,
>                                 int bps)
>  {
> @@ -386,19 +467,23 @@ static int decode_subframe_lpc(FLACContext *s, int32_t *decoded, int pred_order,
>          coeffs[pred_order - i - 1] = get_sbits(&s->gb, coeff_prec);
>      }
>  
> -    if ((ret = decode_residuals(s, decoded, pred_order)) < 0)

decode_residuals() is also called in decode_subframe_fixed(). Could the
issue also exist in decode_subframe_fixed() (at least rice_bits can
attain the same values whether it is called from decode_subframe_fixed
or decode_subframe_lpc)?

> -        return ret;
> -
> -    if (   (    s->buggy_lpc && s->flac_stream_info.bps <= 16)
> -        || (   !s->buggy_lpc && bps <= 16
> -            && bps + coeff_prec + av_log2(pred_order) <= 32)) {
> -        s->dsp.lpc16(decoded, coeffs, pred_order, qlevel, s->blocksize);
> +    if (bps + coeff_prec + av_log2(pred_order) - qlevel <= 32) {
> +        if ((ret = decode_residuals(s, decoded, pred_order)) < 0)
> +            return ret;
> +        if (   (    s->buggy_lpc && s->flac_stream_info.bps <= 16)
> +            || (   !s->buggy_lpc && bps <= 16
> +                && bps + coeff_prec + av_log2(pred_order) <= 32)) {
> +            s->dsp.lpc16(decoded, coeffs, pred_order, qlevel, s->blocksize);
> +        } else {
> +            s->dsp.lpc32(decoded, coeffs, pred_order, qlevel, s->blocksize);
> +            if (s->flac_stream_info.bps <= 16)
> +                lpc_analyze_remodulate(decoded, coeffs, pred_order, qlevel, s->blocksize, bps);
> +        }
>      } else {
> -        s->dsp.lpc32(decoded, coeffs, pred_order, qlevel, s->blocksize);
> -        if (s->flac_stream_info.bps <= 16)
> -            lpc_analyze_remodulate(decoded, coeffs, pred_order, qlevel, s->blocksize, bps);
> +        if ((ret = decode_residuals64(s, s->residual64, pred_order)) < 0)
> +            return ret;
> +        lpc_residual64(decoded, s->residual64, coeffs, pred_order, qlevel, s->blocksize);

If I am not mistaken, then it is possible for s->flac_stream_info.bps to
be <= 16 here (e.g. coeff_prec == 15, qlevel == 0, pred_order >= 16
gives 19). So is it not necessary to call lpc_analyze_remodulate() here
in case it is so?

>      }
> -
>      return 0;
>  }
>  
> diff --git a/libavcodec/golomb.h b/libavcodec/golomb.h
> index 164c2583b6..5ebcdda059 100644
> --- a/libavcodec/golomb.h
> +++ b/libavcodec/golomb.h
> @@ -543,6 +543,62 @@ static inline int get_sr_golomb_flac(GetBitContext *gb, int k, int limit,
>      return (v >> 1) ^ -(v & 1);
>  }
>  
> +static inline int64_t get_sr_golomb64_flac(GetBitContext *gb, int k,
> +                                     int esc_len)
> +{
> +    uint64_t buf;
> +    int log;
> +
> +    OPEN_READER(re, gb);
> +    UPDATE_CACHE(re, gb);
> +    buf = GET_CACHE(re, gb);
> +
> +    log = av_log2(buf);
> +
> +    av_assert2(k <= 31);
> +
> +    if (log - k >= 64 - MIN_CACHE_BITS + (MIN_CACHE_BITS == 64)) {
> +        buf >>= log - k;
> +        buf  += (62U - log) << k;
> +        LAST_SKIP_BITS(re, gb, 64 + k - log);
> +        CLOSE_READER(re, gb);
> +    } else {
> +        int64_t i;
> +        for (i = 0; SHOW_UBITS(re, gb, MIN_CACHE_BITS) == 0; i += MIN_CACHE_BITS) {
> +            if (gb->size_in_bits <= re_index) {
> +                CLOSE_READER(re, gb);
> +                return INT64_MAX;
> +            }
> +            LAST_SKIP_BITS(re, gb, MIN_CACHE_BITS);
> +            UPDATE_CACHE(re, gb);
> +        }
> +        for (; SHOW_UBITS(re, gb, 1) == 0; i++) {
> +            SKIP_BITS(re, gb, 1);
> +        }
> +        LAST_SKIP_BITS(re, gb, 1);
> +        UPDATE_CACHE(re, gb);
> +
> +        if (k) {
> +            if (k > MIN_CACHE_BITS - 1) {
> +                buf = SHOW_UBITS(re, gb, 16) << (k-16);
> +                LAST_SKIP_BITS(re, gb, 16);
> +                UPDATE_CACHE(re, gb);
> +                buf |= SHOW_UBITS(re, gb, k-16);
> +                LAST_SKIP_BITS(re, gb, k-16);
> +            } else {
> +                buf = SHOW_UBITS(re, gb, k);
> +                LAST_SKIP_BITS(re, gb, k);
> +            }
> +        } else {
> +            buf = 0;
> +        }
> +
> +        buf += (i << k);
> +        CLOSE_READER(re, gb);
> +    }
> +    return (buf >> 1) ^ -(buf & 1);
> +}
> +

Don't put such a huge function that is only used by one file into a
header used by many files.

>  /**
>   * read unsigned golomb rice code (shorten).
>   */