[FFmpeg-devel] [PATCH] avcodec/agm: add support for higher compression
Paul B Mahol
onemda at gmail.com
Mon Apr 8 18:42:44 EEST 2019
On 4/8/19, Nicolas George <george at nsup.org> wrote:
> Paul B Mahol (12019-04-08):
>> Signed-off-by: Paul B Mahol <onemda at gmail.com>
>> ---
>> libavcodec/agm.c | 403 +++++++++++++++++++++++++++++++++++++++++++--
>> libavformat/riff.c | 4 +
>> 2 files changed, 392 insertions(+), 15 deletions(-)
>>
>> diff --git a/libavcodec/agm.c b/libavcodec/agm.c
>> index cbd45e8095..e183b7f508 100644
>> --- a/libavcodec/agm.c
>> +++ b/libavcodec/agm.c
>> @@ -71,9 +71,14 @@ typedef struct AGMContext {
>> unsigned flags;
>> unsigned fflags;
>>
>> + uint8_t *output;
>> + int output_size;
>> +
>> MotionVector *mvectors;
>> int mvectors_size;
>>
>> + VLC vlc;
>> +
>> AVFrame *prev_frame;
>>
>> int luma_quant_matrix[64];
>> @@ -81,6 +86,13 @@ typedef struct AGMContext {
>>
>> ScanTable scantable;
>> DECLARE_ALIGNED(32, int16_t, block)[64];
>> +
>> + int16_t *wblocks;
>> + int wblocks_size;
>> +
>> + int *map;
>> + int map_size;
>> +
>> IDCTDSPContext idsp;
>> } AGMContext;
>>
>> @@ -173,7 +185,84 @@ static int read_code(GetBitContext *gb, int *oskip,
>> int *level, int *map, int mo
>> return 0;
>> }
>>
>> -static int decode_intra_block(AGMContext *s, GetBitContext *gb, int
>> size,
>> +static int decode_intra_blocks(AGMContext *s, GetBitContext *gb,
>> + const int *quant_matrix, int *skip, int
>> *dc_level)
>> +{
>> + const uint8_t *scantable = s->scantable.permutated;
>> + int level, ret, map = 0;
>> +
>> + memset(s->wblocks, 0, s->wblocks_size);
>> +
>> + for (int i = 0; i < 64; i++) {
>> + int16_t *block = s->wblocks + scantable[i];
>> +
>> + for (int j = 0; j < s->blocks_w;) {
>> + if (*skip > 0) {
>> + int rskip;
>> +
>> + rskip = FFMIN(*skip, s->blocks_w - j);
>> + j += rskip;
>> + if (i == 0) {
>> + for (int k = 0; k < rskip; k++)
>> + block[64 * k] = *dc_level * quant_matrix[0];
>> + }
>> + block += rskip * 64;
>> + *skip -= rskip;
>> + } else {
>> + ret = read_code(gb, skip, &level, &map, s->flags & 1);
>> + if (ret < 0)
>> + return ret;
>> +
>> + if (i == 0)
>> + *dc_level += level;
>> +
>> + block[0] = (i == 0 ? *dc_level : level) *
>> quant_matrix[i];
>> + block += 64;
>> + j++;
>> + }
>> + }
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static int decode_inter_blocks(AGMContext *s, GetBitContext *gb,
>> + const int *quant_matrix, int *skip,
>> + int *map)
>> +{
>> + const uint8_t *scantable = s->scantable.permutated;
>> + int level, ret;
>> +
>> + memset(s->wblocks, 0, s->wblocks_size);
>> + memset(s->map, 0, s->map_size);
>> +
>> + for (int i = 0; i < 64; i++) {
>> + int16_t *block = s->wblocks + scantable[i];
>> +
>> + for (int j = 0; j < s->blocks_w;) {
>> + if (*skip > 0) {
>> + int rskip;
>> +
>> + rskip = FFMIN(*skip, s->blocks_w - j);
>> + j += rskip;
>> + block += rskip * 64;
>> + *skip -= rskip;
>> + } else {
>> + ret = read_code(gb, skip, &level, &map[j], s->flags &
>> 1);
>> + if (ret < 0)
>> + return ret;
>> +
>> + block[0] = level * quant_matrix[i];
>> + block += 64;
>> + j++;
>> + }
>> + }
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static int decode_intra_block(AGMContext *s, GetBitContext *gb,
>> const int *quant_matrix, int *skip, int
>> *dc_level)
>> {
>> const uint8_t *scantable = s->scantable.permutated;
>> @@ -218,18 +307,38 @@ static int decode_intra_plane(AGMContext *s,
>> GetBitContext *gb, int size,
>> int plane)
>> {
>> int ret, skip = 0, dc_level = 0;
>> + const int offset = s->plus ? 0 : 1024;
>>
>> if ((ret = init_get_bits8(gb, s->gbyte.buffer, size)) < 0)
>> return ret;
>>
>> - for (int y = 0; y < s->blocks_h; y++) {
>> - for (int x = 0; x < s->blocks_w; x++) {
>> - ret = decode_intra_block(s, gb, size, quant_matrix, &skip,
>> &dc_level);
>> + if (s->flags & 1) {
>> + av_fast_padded_malloc(&s->wblocks, &s->wblocks_size,
>> + 64 * s->blocks_w * sizeof(*s->wblocks));
>> + if (!s->wblocks)
>> + return AVERROR(ENOMEM);
>> +
>> + for (int y = 0; y < s->blocks_h; y++) {
>> + ret = decode_intra_blocks(s, gb, quant_matrix, &skip,
>> &dc_level);
>> if (ret < 0)
>> return ret;
>>
>> - s->idsp.idct_put(frame->data[plane] + (s->blocks_h - 1 - y) *
>> 8 * frame->linesize[plane] + x * 8,
>> - frame->linesize[plane], s->block);
>> + for (int x = 0; x < s->blocks_w; x++) {
>> + s->wblocks[64 * x] += offset;
>> + s->idsp.idct_put(frame->data[plane] + (s->blocks_h - 1 -
>> y) * 8 * frame->linesize[plane] + x * 8,
>> + frame->linesize[plane], s->wblocks + 64
>> * x);
>> + }
>> + }
>> + } else {
>> + for (int y = 0; y < s->blocks_h; y++) {
>> + for (int x = 0; x < s->blocks_w; x++) {
>> + ret = decode_intra_block(s, gb, quant_matrix, &skip,
>> &dc_level);
>> + if (ret < 0)
>> + return ret;
>> +
>> + s->idsp.idct_put(frame->data[plane] + (s->blocks_h - 1 -
>> y) * 8 * frame->linesize[plane] + x * 8,
>> + frame->linesize[plane], s->block);
>> + }
>> }
>> }
>>
>> @@ -242,7 +351,7 @@ static int decode_intra_plane(AGMContext *s,
>> GetBitContext *gb, int size,
>> return 0;
>> }
>>
>> -static int decode_inter_block(AGMContext *s, GetBitContext *gb, int
>> size,
>> +static int decode_inter_block(AGMContext *s, GetBitContext *gb,
>> const int *quant_matrix, int *skip,
>> int *map)
>> {
>> @@ -281,7 +390,54 @@ static int decode_inter_plane(AGMContext *s,
>> GetBitContext *gb, int size,
>> if ((ret = init_get_bits8(gb, s->gbyte.buffer, size)) < 0)
>> return ret;
>>
>> - if (s->flags & 2) {
>> + if (s->flags == 3) {
>> + av_fast_padded_malloc(&s->wblocks, &s->wblocks_size,
>> + 64 * s->blocks_w * sizeof(*s->wblocks));
>> + if (!s->wblocks)
>> + return AVERROR(ENOMEM);
>> +
>> + av_fast_padded_malloc(&s->map, &s->map_size,
>> + s->blocks_w * sizeof(*s->map));
>> + if (!s->map)
>> + return AVERROR(ENOMEM);
>> +
>> + for (int y = 0; y < s->blocks_h; y++) {
>> + ret = decode_inter_blocks(s, gb, quant_matrix, &skip,
>> s->map);
>> + if (ret < 0)
>> + return ret;
>> +
>> + for (int x = 0; x < s->blocks_w; x++) {
>> + int shift = plane == 0;
>> + int mvpos = (y >> shift) * (s->blocks_w >> shift) + (x >>
>> shift);
>> + int orig_mv_x = s->mvectors[mvpos].x;
>> + int mv_x = s->mvectors[mvpos].x / (1 + !shift);
>> + int mv_y = s->mvectors[mvpos].y / (1 + !shift);
>> + int h = s->avctx->coded_height >> !shift;
>> + int w = s->avctx->coded_width >> !shift;
>> + int map = s->map[x];
>> +
>> + if (orig_mv_x >= -32) {
>> + if (y * 8 + mv_y < 0 || y * 8 + mv_y >= h ||
>> + x * 8 + mv_x < 0 || x * 8 + mv_x >= w)
>> + return AVERROR_INVALIDDATA;
>> +
>> + copy_block8(frame->data[plane] + (s->blocks_h - 1 -
>> y) * 8 * frame->linesize[plane] + x * 8,
>> + prev->data[plane] + ((s->blocks_h - 1 -
>> y) * 8 - mv_y) * prev->linesize[plane] + (x * 8 + mv_x),
>> + frame->linesize[plane],
>> prev->linesize[plane], 8);
>> + if (map) {
>> + s->idsp.idct(s->wblocks + x * 64);
>> + for (int i = 0; i < 64; i++)
>> + s->wblocks[i + x * 64] = (s->wblocks[i + x *
>> 64] + 1) & 0xFFFC;
>> + s->idsp.add_pixels_clamped(&s->wblocks[x*64],
>> frame->data[plane] + (s->blocks_h - 1 - y) * 8 * frame->linesize[plane] +
>> x * 8,
>> +
>> frame->linesize[plane]);
>> + }
>> + } else if (map) {
>> + s->idsp.idct_put(frame->data[plane] + (s->blocks_h -
>> 1 - y) * 8 * frame->linesize[plane] + x * 8,
>> + frame->linesize[plane], s->wblocks +
>> x * 64);
>> + }
>> + }
>> + }
>> + } else if (s->flags & 2) {
>> for (int y = 0; y < s->blocks_h; y++) {
>> for (int x = 0; x < s->blocks_w; x++) {
>> int shift = plane == 0;
>> @@ -293,7 +449,7 @@ static int decode_inter_plane(AGMContext *s,
>> GetBitContext *gb, int size,
>> int w = s->avctx->coded_width >> !shift;
>> int map = 0;
>>
>> - ret = decode_inter_block(s, gb, size, quant_matrix,
>> &skip, &map);
>> + ret = decode_inter_block(s, gb, quant_matrix, &skip,
>> &map);
>> if (ret < 0)
>> return ret;
>>
>> @@ -318,12 +474,35 @@ static int decode_inter_plane(AGMContext *s,
>> GetBitContext *gb, int size,
>> }
>> }
>> }
>> + } else if (s->flags & 1) {
>> + av_fast_padded_malloc(&s->wblocks, &s->wblocks_size,
>> + 64 * s->blocks_w * sizeof(*s->wblocks));
>> + if (!s->wblocks)
>> + return AVERROR(ENOMEM);
>> +
>> + av_fast_padded_malloc(&s->map, &s->map_size,
>> + s->blocks_w * sizeof(*s->map));
>> + if (!s->map)
>> + return AVERROR(ENOMEM);
>> +
>> + for (int y = 0; y < s->blocks_h; y++) {
>> + ret = decode_inter_blocks(s, gb, quant_matrix, &skip,
>> s->map);
>> + if (ret < 0)
>> + return ret;
>> +
>> + for (int x = 0; x < s->blocks_w; x++) {
>> + if (!s->map[x])
>> + continue;
>> + s->idsp.idct_add(frame->data[plane] + (s->blocks_h - 1 -
>> y) * 8 * frame->linesize[plane] + x * 8,
>> + frame->linesize[plane], s->wblocks + 64
>> * x);
>> + }
>> + }
>> } else {
>> for (int y = 0; y < s->blocks_h; y++) {
>> for (int x = 0; x < s->blocks_w; x++) {
>> int map = 0;
>>
>> - ret = decode_inter_block(s, gb, size, quant_matrix,
>> &skip, &map);
>> + ret = decode_inter_block(s, gb, quant_matrix, &skip,
>> &map);
>> if (ret < 0)
>> return ret;
>>
>> @@ -501,6 +680,180 @@ static int decode_inter(AVCodecContext *avctx,
>> GetBitContext *gb,
>> return 0;
>> }
>>
>> +typedef struct Node {
>> + int parent;
>> + int child[2];
>> +} Node;
>> +
>> +static void get_tree_codes(uint32_t *codes, Node *nodes, int idx,
>> uint32_t pfx, int bitpos)
>> +{
>> + if (idx < 256 && idx >= 0) {
>> + codes[idx] = pfx;
>> + } else {
>> + get_tree_codes(codes, nodes, nodes[idx].child[0], pfx + (0 <<
>> bitpos), bitpos + 1);
>> + get_tree_codes(codes, nodes, nodes[idx].child[1], pfx + (1 <<
>> bitpos), bitpos + 1);
>> + }
>> +}
>> +
>> +static void make_new_tree(const uint8_t *bitlens, uint32_t *codes)
>> +{
>> + int zlcount = 0, curlen, idx, nindex, last, llast;
>> + int blcounts[32] = { 0 };
>> + int syms[8192];
>> + Node nodes[512];
>> + int node_idx[1024];
>> + int old_idx[512];
>> +
>> + for (int i = 0; i < 256; i++) {
>> + int bitlen = bitlens[i];
>> + int blcount = blcounts[bitlen];
>> +
>> + zlcount += bitlen < 1;
>> + syms[(bitlen << 8) + blcount] = i;
>> + blcounts[bitlen]++;
>> + }
>> +
>> + for (int i = 0; i < 512; i++) {
>> + nodes[i].child[0] = -1;
>> + nodes[i].child[1] = -1;
>> + }
>> +
>> + for (int i = 0; i < 256; i++) {
>> + node_idx[i] = 257 + i;;
>> + }
>> +
>> + curlen = 1;
>> + node_idx[512] = 256;
>> + last = 255;
>> + nindex = 1;
>> +
>> + for (curlen = 1; curlen < 32; curlen++) {
>> + if (blcounts[curlen] > 0) {
>> + int max_zlcount = zlcount + blcounts[curlen];
>> +
>> + for (int i = 0; zlcount < 256 && zlcount < max_zlcount;
>> zlcount++, i++) {
>> + int p = node_idx[nindex - 1 + 512];
>> + int ch = syms[256 * curlen + i];
>> +
>> + if (nodes[p].child[0] == -1) {
>> + nodes[p].child[0] = ch;
>> + } else {
>> + nodes[p].child[1] = ch;
>> + nindex--;
>> + }
>> + nodes[ch].parent = p;
>> + }
>> + }
>> + llast = last - 1;
>> + idx = 0;
>> + while (nindex > 0) {
>> + int p, ch;
>> +
>> + last = llast - idx;
>> + p = node_idx[nindex - 1 + 512];
>> + ch = node_idx[last];
>> + if (nodes[p].child[0] == -1) {
>> + nodes[p].child[0] = ch;
>> + } else {
>> + nodes[p].child[1] = ch;
>> + nindex--;
>> + }
>> + old_idx[idx] = ch;
>> + nodes[ch].parent = p;
>> + if (idx == llast)
>> + goto next;
>> + idx++;
>> + if (nindex <= 0) {
>> + for (int i = 0; i < idx; i++)
>> + node_idx[512 + i] = old_idx[i];
>> + }
>> + }
>> + nindex = idx;
>> + }
>> +
>> +next:
>> +
>> + get_tree_codes(codes, nodes, 256, 0, 0);
>> +}
>> +
>> +static int build_huff(const uint8_t *bitlen, VLC *vlc)
>> +{
>> + uint32_t new_codes[256];
>> + uint8_t bits[256];
>> + uint8_t symbols[256];
>> + uint32_t codes[256];
>> + int nb_codes = 0;
>> +
>> + make_new_tree(bitlen, new_codes);
>> +
>> + for (int i = 0; i < 256; i++) {
>> + if (bitlen[i]) {
>> + bits[nb_codes] = bitlen[i];
>> + codes[nb_codes] = new_codes[i];
>> + symbols[nb_codes] = i;
>> + nb_codes++;
>> + }
>> + }
>> +
>> + ff_free_vlc(vlc);
>> + return ff_init_vlc_sparse(vlc, 13, nb_codes,
>> + bits, 1, 1,
>> + codes, 4, 4,
>> + symbols, 1, 1,
>> + INIT_VLC_LE);
>> +}
>> +
>> +static int decode_huffman2(AVCodecContext *avctx, int header, int size)
>> +{
>> + AGMContext *s = avctx->priv_data;
>> + GetBitContext *gb = &s->gb;
>> + uint8_t lens[256];
>> + uint32_t output_size;
>> + int ret, x, len;
>> +
>> + if ((ret = init_get_bits8(gb, s->gbyte.buffer,
>> + bytestream2_get_bytes_left(&s->gbyte))) <
>> 0)
>> + return ret;
>> +
>
>> + output_size = get_bits_long(gb, 32);
>> +
>> + av_fast_padded_malloc(&s->output, &s->output_size,
>> + output_size * sizeof(*s->output));
>
> Several chances for overflow here.
Yes, changed output_size to int.
More information about the ffmpeg-devel
mailing list