[FFmpeg-devel] [PATCH] MS Video 1 encoder, take 2

Kostya kostya.shishkov
Fri Mar 13 17:34:21 CET 2009


On Wed, Mar 11, 2009 at 08:37:00PM +0100, Michael Niedermayer wrote:
> On Wed, Mar 11, 2009 at 08:11:39AM +0200, Kostya wrote:
> > $subj
> > 
> > It is quality-based encoder since this codec is not fit for bitrate-based encoding,
> > so it should be run as ffmpeg -i infile -vcodec msvideo1 -qscale 3 output.avi
> 
[...]
> 
> > +/**
> > + * Encoder context
> > + */
> > +typedef struct Msvideo1EncContext {
> > +    AVCodecContext *avctx;
> > +    AVFrame pic;
> > +    AVLFG rnd;
> > +    uint8_t *prev;
> > +
> > +    int block[16*3];
> > +    int block2[16*3];
> > +    int codebook[8*3];
> > +    int codebook2[8*3];
> > +    int output[16*3];
> > +    int output2[16*3];
> > +    int avg[3];
> 
> i dont think all that belongs in the context, some clearly are local vars
 
moved
 
> [...]
> > +                for(i = 0; i < 4*4*3; i++){
> > +                    int t = prevptr[i] - c->block[i];
> > +                    bestscore += t*t;
> > +                }
> > +                if(!skips)
> > +                    bestscore += 2;
> 
> this is not a correct method of combining rate and distortion

Ok, now I don't mix them at all.
 
> rest not reviewed, this is a grave error and has to be fixed first

Can you recommend any good book on such matters? I really need it.

> [...]
> -- 
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
-------------- next part --------------
Index: Changelog
===================================================================
--- Changelog	(revision 17880)
+++ Changelog	(working copy)
@@ -4,6 +4,7 @@
 - deprecated vhook subsystem removed
 - deprecated old scaler removed
 - VQF demuxer
+- MS Video 1 15-bpp encoder
 
 
 
Index: libavcodec/Makefile
===================================================================
--- libavcodec/Makefile	(revision 17880)
+++ libavcodec/Makefile	(working copy)
@@ -152,6 +152,7 @@
 OBJS-$(CONFIG_MSMPEG4V3_ENCODER)       += msmpeg4.o msmpeg4data.o mpegvideo_enc.o motion_est.o ratecontrol.o h263.o mpeg12data.o mpegvideo.o error_resilience.o
 OBJS-$(CONFIG_MSRLE_DECODER)           += msrle.o msrledec.o
 OBJS-$(CONFIG_MSVIDEO1_DECODER)        += msvideo1.o
+OBJS-$(CONFIG_MSVIDEO1_ENCODER)        += msvideo1enc.o elbg.o
 OBJS-$(CONFIG_MSZH_DECODER)            += lcldec.o
 OBJS-$(CONFIG_NELLYMOSER_DECODER)      += nellymoserdec.o nellymoser.o
 OBJS-$(CONFIG_NELLYMOSER_ENCODER)      += nellymoserenc.o nellymoser.o
Index: libavcodec/msvideo1enc.c
===================================================================
--- libavcodec/msvideo1enc.c	(revision 0)
+++ libavcodec/msvideo1enc.c	(revision 0)
@@ -0,0 +1,361 @@
+/*
+ * Microsoft Video-1 encoder
+ * Copyright (c) 2009 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavcodec/msvideo1enc.c
+ * Microsoft Video-1 encoder
+ */
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "libavutil/lfg.h"
+#include "elbg.h"
+
+/**
+ * Encoder context
+ */
+typedef struct Msvideo1EncContext {
+    AVCodecContext *avctx;
+    AVFrame pic;
+    AVLFG rnd;
+    uint8_t *prev;
+    int keyint;
+} Msvideo1EncContext;
+
+enum MSV1Mode{
+    MODE_SKIP = 0,
+    MODE_FILL,
+    MODE_2COL,
+    MODE_8COL,
+
+    NB_MODES
+};
+
+#define SKIP_PREFIX 0x8400
+#define SKIPS_MAX 0x03FF
+#define MKRGB555(in, off) ((in[off] << 10) | (in[off + 1] << 5) | (in[off + 2]))
+
+static const uint8_t remap_8col[16] = { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15 };
+
+static inline int calc_skip(Msvideo1EncContext *c, uint8_t *prev, int *src)
+{
+    int score = 0;
+    int i;
+
+    for (i = 0; i < 4*4*3; i++) {
+        int t = prev[i] - src[i];
+        score += t*t;
+    }
+    return score;
+}
+
+static inline int calc_fill(Msvideo1EncContext *c, uint8_t *prev, int *src,
+                            int *avg)
+{
+    int score = 0;
+    int i, j;
+
+    score = 0;
+    for (i = 0; i < 3; i++)
+        avg[i] = 0;
+    for (i = 0; i < 4*4*3; i += 3)
+        for (j = 0; j < 3; j++)
+            avg[j] += src[i+j];
+    for (i = 0; i < 3; i++)
+        avg[i] = (avg[i] + 8) >> 4;
+    if (avg[0] == 1) // red component = 1 will be written as skip code
+        avg[0] = 0;
+    for (i = 0; i < 4*4*3; i += 3) {
+        for (j = 0; j < 3; j++) {
+            int t = avg[j] - src[i+j];
+            score += t*t;
+        }
+    }
+    return score;
+}
+
+static inline int calc_quant2(Msvideo1EncContext *c, uint8_t *prev, int *src,
+                              int codebook[8*3], int output[16])
+{
+    int score = 0;
+    int i, j;
+
+    ff_init_elbg(src, 3, 16, codebook, 2, 1, output, &c->rnd);
+    ff_do_elbg  (src, 3, 16, codebook, 2, 1, output, &c->rnd);
+    // last output value should be always 1, swap codebooks if needed
+    if (!output[15]) {
+        for (i = 0; i < 3; i++)
+            FFSWAP(uint8_t, codebook[i], codebook[i+3]);
+        for (i = 0; i < 16; i++)
+            output[i] ^= 1;
+    }
+    for (i = 0; i < 4*4; i++) {
+        for (j = 0; j < 3; j++) {
+            int t = codebook[output[i]*3 + j] - src[i*3+j];
+            score += t*t;
+        }
+    }
+    return score;
+}
+
+static inline int calc_quant8(Msvideo1EncContext *c, uint8_t *prev, int *src,
+                             int codebook[8*3], int output[16])
+{
+    int score = 0;
+    int i, j;
+
+    for (i = 0; i < 4; i++) {
+        ff_init_elbg(src + i*4*3, 3, 4, codebook + i*2*3, 2, 1, output + i*4, &c->rnd);
+        ff_do_elbg  (src + i*4*3, 3, 4, codebook + i*2*3, 2, 1, output + i*4, &c->rnd);
+    }
+    // last value should be always 1, swap codebooks if needed
+    if (!output[15]) {
+        for (i = 0; i < 3; i++)
+            FFSWAP(uint8_t, codebook[i+18], codebook[i+21]);
+        for (i = 12; i < 16; i++)
+            output[i] ^= 1;
+    }
+    for (i = 0; i < 4*4; i++) {
+        for (j = 0; j < 3; j++) {
+            int t = codebook[(output[remap_8col[i]] + (i&2) + ((i&8)>>1))*3+j] - src[i*3+j];
+            score += t*t;
+        }
+    }
+    return score;
+}
+
+static void encode_block(int mode, uint8_t **dst,
+                         int *avg,
+                         int codebook_2col[8*3], int output_2col[16],
+                         int codebook_8col[8*3], int output_8col[16])
+{
+    int i;
+    int flags = 0;
+
+    switch (mode) {
+    case MODE_FILL:
+        bytestream_put_le16(dst, MKRGB555(avg, 0) | 0x8000);
+        break;
+    case MODE_2COL:
+        for (i = 0; i < 4*4; i++)
+            flags |= (output_2col[i] ^ 1) << i;
+        bytestream_put_le16(dst, flags);
+        bytestream_put_le16(dst, MKRGB555(codebook_2col, 0));
+        bytestream_put_le16(dst, MKRGB555(codebook_2col, 3));
+        break;
+    case MODE_8COL:
+        for (i = 0; i < 4*4; i++)
+            flags |= (output_8col[remap_8col[i]] ^ 1) << i;
+        bytestream_put_le16(dst, flags);
+        bytestream_put_le16(dst, MKRGB555(codebook_8col, 0) | 0x8000);
+        for (i = 3; i < 24; i += 3)
+            bytestream_put_le16(dst, MKRGB555(codebook_8col, i));
+        break;
+    }
+}
+
+static void update_prev(int mode, uint8_t *prev, int *avg,
+                        int codebook_2col[8*3], int output_2col[16],
+                        int codebook_8col[8*3], int output_8col[16])
+{
+    int i, j;
+
+    switch (mode) {
+    case MODE_FILL:
+        for (i = 0; i < 4*4*3; i += 3)
+            for (j = 0; j < 3; j++)
+                prev[i+j] = avg[j];
+        break;
+    case MODE_2COL:
+        for (i = 0; i < 4*4; i++)
+            for (j = 0; j < 3; j++)
+                prev[i*3 + j] = codebook_2col[output_2col[i]*3 + j];
+        break;
+    case MODE_8COL:
+        for (i = 0; i < 4*4; i++)
+            for (j = 0; j < 3; j++)
+                prev[i*3 + j] = codebook_8col[(output_8col[remap_8col[i]]
+                                              + (i&2) + ((i&8)>>1))*3 + j];
+        break;
+    }
+}
+
+static int encode_frame(AVCodecContext *avctx, uint8_t *buf, int buf_size, void *data)
+{
+    Msvideo1EncContext * const c = avctx->priv_data;
+    AVFrame *pict = data;
+    AVFrame * const p = &c->pic;
+    const uint16_t *src;
+    uint8_t *prevptr;
+    uint8_t *dst = buf;
+    int keyframe = 0;
+    int no_skips = 1;
+    int i, j, k, x, y;
+    int skips = 0;
+    int quality;
+
+    int block[16*3];
+    int block_8col[16*3];
+    int avg[3];
+    int codebook_2col[8*3];
+    int codebook_8col[8*3];
+    int output_2col[16];
+    int output_8col[16];
+    int dists[NB_MODES];
+
+    *p = *pict;
+    prevptr = c->prev;
+    src = (uint16_t*)(p->data[0] + p->linesize[0]*(((avctx->height + 3)&~3) - 1));
+    if (c->keyint >= avctx->keyint_min)
+        keyframe = 1;
+
+    p->quality = avctx->global_quality;
+    quality = p->quality;
+
+    for (y = 0; y < avctx->height; y += 4) {
+        for (x = 0; x < avctx->width; x += 4) {
+            int bestmode;
+
+            for (j = 0; j < 4; j++) {
+                for (i = 0; i < 4; i++) {
+                    uint16_t val = src[x + i - j*p->linesize[0]/2];
+                    for (k = 0; k < 3; k++) {
+                        block     [(i + j*4)*3 + k]           = (val >> (10-k*5)) & 0x1F;
+                        block_8col[remap_8col[i + j*4]*3 + k] = (val >> (10-k*5)) & 0x1F;
+                    }
+                }
+            }
+            
+            dists[MODE_SKIP] = keyframe ? INT_MAX : calc_skip(c, prevptr, block);
+            dists[MODE_FILL] = INT_MAX;//calc_fill  (c, prevptr, block, avg);
+            dists[MODE_2COL] = INT_MAX;//calc_quant2(c, prevptr, block,      codebook_2col, output_2col);
+            dists[MODE_8COL] = calc_quant8(c, prevptr, block_8col, codebook_8col, output_8col);
+
+            /* For now, first mode with distortion lower than the limit set by quality
+             * is chosen. Since modes arranged by ascending quality with high threshold
+             * low-quality (and coded with lower number of bits) modes are picked first.
+             *
+             * TODO: replace it with something better
+             */
+            for (i = 0; i < NB_MODES - 1; i++)
+                if (dists[i] < quality)
+                    break;
+
+            bestmode = i;
+
+            if (bestmode == MODE_SKIP)
+                skips++;
+            if ((bestmode != MODE_SKIP && skips) || skips == SKIPS_MAX) {
+                bytestream_put_le16(&dst, skips | SKIP_PREFIX);
+                skips = 0;
+            }
+
+            /* coding costs:
+             * first skip in a row -  2 bytes
+             * consequent skips    -  0 bytes
+             * one-color fill      -  2 bytes
+             * 2-color fill        -  6 bytes
+             * 8-color fill        - 18 bytes
+             */
+            encode_block(bestmode, &dst, avg, codebook_2col, output_2col,
+                         codebook_8col, output_8col);
+            update_prev (bestmode, prevptr, avg, codebook_2col, output_2col,
+                         codebook_8col, output_8col);
+
+            if (skips)
+                no_skips = 0;
+            prevptr += 4*4*3;
+        }
+        src -= p->linesize[0] << 1;
+    }
+    if (skips)
+        bytestream_put_le16(&dst, skips | SKIP_PREFIX);
+    //EOF
+    bytestream_put_le16(&dst, 0);
+
+    if (no_skips)
+        keyframe = 1;
+    if (keyframe)
+        c->keyint = 0;
+    else
+        c->keyint++;
+    p->pict_type = keyframe ? FF_I_TYPE : FF_P_TYPE;
+    p->key_frame = keyframe;
+
+    return dst - buf;
+}
+
+
+/**
+ * init encoder
+ */
+static av_cold int encode_init(AVCodecContext *avctx)
+{
+    Msvideo1EncContext * const c = avctx->priv_data;
+
+    if (!(avctx->flags&CODEC_FLAG_QSCALE)) {
+        av_log(avctx, AV_LOG_ERROR, "This encoder works only with set quality, not bitrate\n");
+        return -1;
+    }
+
+    c->avctx = avctx;
+    if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) {
+        return -1;
+    }
+
+    avctx->coded_frame = (AVFrame*)&c->pic;
+
+    c->keyint = avctx->keyint_min;
+    av_lfg_init(&c->rnd, 0xDEADBEEF);
+
+    c->prev = av_malloc(((avctx->width + 3) & ~3) * ((avctx->height + 3) & ~3) * 3);
+    if (!c->prev) {
+        av_log(avctx, AV_LOG_ERROR, "Cannot allocate buffer");
+        return -1;
+    }
+
+    return 0;
+}
+
+
+/**
+ * Uninit encoder
+ */
+static av_cold int encode_end(AVCodecContext *avctx)
+{
+    Msvideo1EncContext * const c = avctx->priv_data;
+
+    av_freep(&c->prev);
+
+    return 0;
+}
+
+AVCodec msvideo1_encoder = {
+    "msvideo1",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MSVIDEO1,
+    sizeof(Msvideo1EncContext),
+    encode_init,
+    encode_frame,
+    encode_end,
+    .pix_fmts = (enum PixelFormat[]){PIX_FMT_RGB555, PIX_FMT_NONE},
+    .long_name = NULL_IF_CONFIG_SMALL("Microsoft Video-1"),
+};
Index: libavcodec/allcodecs.c
===================================================================
--- libavcodec/allcodecs.c	(revision 17880)
+++ libavcodec/allcodecs.c	(working copy)
@@ -116,7 +116,7 @@
     REGISTER_ENCDEC  (MSMPEG4V2, msmpeg4v2);
     REGISTER_ENCDEC  (MSMPEG4V3, msmpeg4v3);
     REGISTER_DECODER (MSRLE, msrle);
-    REGISTER_DECODER (MSVIDEO1, msvideo1);
+    REGISTER_ENCDEC  (MSVIDEO1, msvideo1);
     REGISTER_DECODER (MSZH, mszh);
     REGISTER_DECODER (NUV, nuv);
     REGISTER_ENCODER (PAM, pam);
Index: doc/general.texi
===================================================================
--- doc/general.texi	(revision 17880)
+++ doc/general.texi	(working copy)
@@ -373,7 +373,7 @@
 @item LOCO                   @tab     @tab  X
 @item lossless MJPEG         @tab  X  @tab  X
 @item Microsoft RLE          @tab     @tab  X
- at item Microsoft Video 1      @tab     @tab  X
+ at item Microsoft Video 1      @tab  X  @tab  X
 @item Mimic                  @tab     @tab  X
     @tab Used in MSN Messenger Webcam streams.
 @item Miro VideoXL           @tab     @tab  X



More information about the ffmpeg-devel mailing list