[FFmpeg-devel] [RFC][PATCH] Add support for 12-bit color mode.

Janusz Krzysztofik jkrzyszt
Sat Feb 27 03:31:35 CET 2010


Hi,

While trying to play video on my ARM OMAP1 based Amstrad Delta (E3) videophone 
with mplayer, I found it not compatible with the 12-bit LCD display my device 
is equipped with. To solve the problem, I patched several mplayer video output 
drivers to support my device. For them to be usefull, I also had to patch 
libswscale to be able to convert video to my device compatible RGB444 format.

The patch tries to extend not only libswscale, but all ffmpeg libraries as 
well, with RGB12 aka RGB444 pixel format handling. All new color conversion 
functions are supposed to work correctly except for dithering that I was not 
able to develop correct table values for by myself.
 
Some changes to video codecs that I was not able to understand deeply enough 
and test them are probably wrong or not applicable.

Created against ffmpeg svn revision 22026 dated 2010-02-24.
Tested with patched mplayer on omapfb framebuffer device with Amstrad Delta 
12-bit LCD display.

Signed-off-by: Janusz Krzysztofik <jkrzyszt at tis.icnet.pl>

---

diff -upr trunk/doc/swscale.txt trunk.rgb12/doc/swscale.txt
--- trunk/doc/swscale.txt	2010-02-24 15:55:28.000000000 +0100
+++ trunk.rgb12/doc/swscale.txt	2010-02-25 04:31:55.000000000 +0100
@@ -30,7 +30,7 @@ slices, that is, consecutive non-overlap
 
 special converter
     These generally are unscaled converters of common
-    formats, like YUV 4:2:0/4:2:2 -> RGB15/16/24/32. Though it could also
+    formats, like YUV 4:2:0/4:2:2 -> RGB12/15/16/24/32. Though it could also
     in principle contain scalers optimized for specific common cases.
 
 Main path
diff -upr trunk/libavcodec/bmp.c trunk.rgb12/libavcodec/bmp.c
--- trunk/libavcodec/bmp.c	2010-02-24 15:55:06.000000000 +0100
+++ trunk.rgb12/libavcodec/bmp.c	2010-02-25 03:19:30.000000000 +0100
@@ -163,7 +163,8 @@ static int bmp_decode_frame(AVCodecConte
         if(comp == BMP_RGB)
             avctx->pix_fmt = PIX_FMT_RGB555;
         if(comp == BMP_BITFIELDS)
-            avctx->pix_fmt = rgb[1] == 0x07E0 ? PIX_FMT_RGB565 : PIX_FMT_RGB555;
+            avctx->pix_fmt = rgb[1] == 0x07E0 ? PIX_FMT_RGB565 :
+                             rgb[1] == 0x03E0 ? PIX_FMT_RGB555 : PIX_FMT_RGB444;
         break;
     case 8:
         if(hsize - ihsize - 14 > 0)
diff -upr trunk/libavcodec/bmpenc.c trunk.rgb12/libavcodec/bmpenc.c
--- trunk/libavcodec/bmpenc.c	2010-02-24 15:55:14.000000000 +0100
+++ trunk.rgb12/libavcodec/bmpenc.c	2010-02-25 03:23:28.000000000 +0100
@@ -25,6 +25,7 @@
 #include "bmp.h"
 
 static const uint32_t monoblack_pal[] = { 0x000000, 0xFFFFFF };
+static const uint32_t rgb444_masks[]  = { 0x0F00, 0x00F0, 0x000F };
 static const uint32_t rgb565_masks[]  = { 0xF800, 0x07E0, 0x001F };
 
 static av_cold int bmp_encode_init(AVCodecContext *avctx){
@@ -52,6 +53,12 @@ static int bmp_encode_frame(AVCodecConte
     case PIX_FMT_BGR24:
         bit_count = 24;
         break;
+    case PIX_FMT_RGB444:
+        bit_count = 16;
+        compression = BMP_BITFIELDS;
+        pal = rgb444_masks; // abuse pal to hold color masks
+        pal_entries = 3;
+        break;
     case PIX_FMT_RGB555:
         bit_count = 16;
         break;
@@ -141,7 +148,7 @@ AVCodec bmp_encoder = {
     NULL, //encode_end,
     .pix_fmts = (const enum PixelFormat[]){
         PIX_FMT_BGR24,
-        PIX_FMT_RGB555, PIX_FMT_RGB565,
+        PIX_FMT_RGB444, PIX_FMT_RGB555, PIX_FMT_RGB565,
         PIX_FMT_RGB8, PIX_FMT_BGR8, PIX_FMT_RGB4_BYTE, PIX_FMT_BGR4_BYTE, PIX_FMT_GRAY8, PIX_FMT_PAL8,
         PIX_FMT_MONOBLACK,
         PIX_FMT_NONE},
diff -upr trunk/libavcodec/flicvideo.c trunk.rgb12/libavcodec/flicvideo.c
--- trunk/libavcodec/flicvideo.c	2010-02-24 15:55:01.000000000 +0100
+++ trunk.rgb12/libavcodec/flicvideo.c	2010-02-25 03:26:40.000000000 +0100
@@ -27,7 +27,7 @@
  * variations, visit:
  *   http://www.compuphase.com/flic.htm
  *
- * This decoder outputs PAL8/RGB555/RGB565 and maybe one day RGB24
+ * This decoder outputs PAL8/RGB444/RGB555/RGB565 and maybe one day RGB24
  * colorspace data, depending on the FLC. To use this decoder, be
  * sure that your demuxer sends the FLI file header to the decoder via
  * the extradata chunk in AVCodecContext. The chunk should be 128 bytes
@@ -107,6 +107,7 @@ static av_cold int flic_decode_init(AVCo
 
     switch (depth) {
         case 8  : avctx->pix_fmt = PIX_FMT_PAL8; break;
+        case 12 : avctx->pix_fmt = PIX_FMT_RGB444; break;
         case 15 : avctx->pix_fmt = PIX_FMT_RGB555; break;
         case 16 : avctx->pix_fmt = PIX_FMT_RGB565; break;
         case 24 : avctx->pix_fmt = PIX_FMT_BGR24; /* Supposedly BGR, but havent any files to test with */
@@ -708,7 +709,8 @@ static int flic_decode_frame(AVCodecCont
       return flic_decode_frame_8BPP(avctx, data, data_size,
                                     buf, buf_size);
     }
-    else if ((avctx->pix_fmt == PIX_FMT_RGB555) ||
+    else if ((avctx->pix_fmt == PIX_FMT_RGB444) ||
+             (avctx->pix_fmt == PIX_FMT_RGB555) ||
              (avctx->pix_fmt == PIX_FMT_RGB565)) {
       return flic_decode_frame_15_16BPP(avctx, data, data_size,
                                         buf, buf_size);
diff -upr trunk/libavcodec/imgconvert.c trunk.rgb12/libavcodec/imgconvert.c
--- trunk/libavcodec/imgconvert.c	2010-02-24 19:21:59.000000000 +0100
+++ trunk.rgb12/libavcodec/imgconvert.c	2010-02-25 03:13:10.000000000 +0100
@@ -238,6 +238,18 @@ static const PixFmtInfo pix_fmt_info[PIX
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 5,
     },
+    [PIX_FMT_RGB444BE] = {
+        .nb_channels = 3,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 4,
+    },
+    [PIX_FMT_RGB444LE] = {
+        .nb_channels = 3,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 4,
+    },
 
     /* gray / mono formats */
     [PIX_FMT_GRAY16BE] = {
@@ -314,6 +326,18 @@ static const PixFmtInfo pix_fmt_info[PIX
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 5,
     },
+    [PIX_FMT_BGR444BE] = {
+        .nb_channels = 3,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 4,
+    },
+    [PIX_FMT_BGR444LE] = {
+        .nb_channels = 3,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 4,
+    },
     [PIX_FMT_RGB8] = {
         .nb_channels = 1,
         .color_type = FF_COLOR_RGB,
@@ -527,10 +551,14 @@ int ff_fill_linesize(AVPicture *picture,
         break;
     case PIX_FMT_GRAY16BE:
     case PIX_FMT_GRAY16LE:
+    case PIX_FMT_BGR444BE:
+    case PIX_FMT_BGR444LE:
     case PIX_FMT_BGR555BE:
     case PIX_FMT_BGR555LE:
     case PIX_FMT_BGR565BE:
     case PIX_FMT_BGR565LE:
+    case PIX_FMT_RGB444BE:
+    case PIX_FMT_RGB444LE:
     case PIX_FMT_RGB555BE:
     case PIX_FMT_RGB555LE:
     case PIX_FMT_RGB565BE:
@@ -624,10 +652,14 @@ int ff_fill_pointer(AVPicture *picture, 
     case PIX_FMT_RGB48LE:
     case PIX_FMT_GRAY16BE:
     case PIX_FMT_GRAY16LE:
+    case PIX_FMT_BGR444BE:
+    case PIX_FMT_BGR444LE:
     case PIX_FMT_BGR555BE:
     case PIX_FMT_BGR555LE:
     case PIX_FMT_BGR565BE:
     case PIX_FMT_BGR565LE:
+    case PIX_FMT_RGB444BE:
+    case PIX_FMT_RGB444LE:
     case PIX_FMT_RGB555BE:
     case PIX_FMT_RGB555LE:
     case PIX_FMT_RGB565BE:
@@ -697,10 +729,14 @@ int avpicture_layout(const AVPicture* sr
             pix_fmt == PIX_FMT_BGR565LE ||
             pix_fmt == PIX_FMT_BGR555BE ||
             pix_fmt == PIX_FMT_BGR555LE ||
+            pix_fmt == PIX_FMT_BGR444BE ||
+            pix_fmt == PIX_FMT_BGR444LE ||
             pix_fmt == PIX_FMT_RGB565BE ||
             pix_fmt == PIX_FMT_RGB565LE ||
             pix_fmt == PIX_FMT_RGB555BE ||
-            pix_fmt == PIX_FMT_RGB555LE)
+            pix_fmt == PIX_FMT_RGB555LE ||
+            pix_fmt == PIX_FMT_RGB444BE ||
+            pix_fmt == PIX_FMT_RGB444LE)
             w = width * 2;
         else if (pix_fmt == PIX_FMT_UYYVYY411)
             w = width + width/2;
@@ -775,6 +811,9 @@ int avcodec_get_pix_fmt_loss(enum PixelF
     loss = 0;
     pf = &pix_fmt_info[dst_pix_fmt];
     if (pf->depth < ps->depth ||
+        ((dst_pix_fmt == PIX_FMT_RGB444BE || dst_pix_fmt == PIX_FMT_RGB444LE) &&
+         (src_pix_fmt == PIX_FMT_RGB555BE || src_pix_fmt == PIX_FMT_RGB555LE ||
+          src_pix_fmt == PIX_FMT_RGB565BE || src_pix_fmt == PIX_FMT_RGB565LE)) ||
         ((dst_pix_fmt == PIX_FMT_RGB555BE || dst_pix_fmt == PIX_FMT_RGB555LE) &&
          (src_pix_fmt == PIX_FMT_RGB565BE || src_pix_fmt == PIX_FMT_RGB565LE)))
         loss |= FF_LOSS_DEPTH;
@@ -834,10 +873,14 @@ static int avg_bits_per_pixel(enum Pixel
         case PIX_FMT_RGB565LE:
         case PIX_FMT_RGB555BE:
         case PIX_FMT_RGB555LE:
+        case PIX_FMT_RGB444BE:
+        case PIX_FMT_RGB444LE:
         case PIX_FMT_BGR565BE:
         case PIX_FMT_BGR565LE:
         case PIX_FMT_BGR555BE:
         case PIX_FMT_BGR555LE:
+        case PIX_FMT_BGR444BE:
+        case PIX_FMT_BGR444LE:
             bits = 16;
             break;
         case PIX_FMT_UYYVYY411:
@@ -954,10 +997,14 @@ int ff_get_plane_bytewidth(enum PixelFor
         case PIX_FMT_RGB565LE:
         case PIX_FMT_RGB555BE:
         case PIX_FMT_RGB555LE:
+        case PIX_FMT_RGB444BE:
+        case PIX_FMT_RGB444LE:
         case PIX_FMT_BGR565BE:
         case PIX_FMT_BGR565LE:
         case PIX_FMT_BGR555BE:
         case PIX_FMT_BGR555LE:
+        case PIX_FMT_BGR444BE:
+        case PIX_FMT_BGR444LE:
             bits = 16;
             break;
         case PIX_FMT_UYYVYY411:
diff -upr trunk/libavcodec/raw.c trunk.rgb12/libavcodec/raw.c
--- trunk/libavcodec/raw.c	2010-02-24 19:49:18.000000000 +0100
+++ trunk.rgb12/libavcodec/raw.c	2010-02-25 02:10:33.000000000 +0100
@@ -56,6 +56,8 @@ const PixelFormatTag ff_raw_pixelFormatT
     { PIX_FMT_UYVY422, MKTAG('A', 'V', 'u', 'p') },
     { PIX_FMT_UYVY422, MKTAG('V', 'D', 'T', 'Z') }, /* SoftLab-NSK VideoTizer */
     { PIX_FMT_GRAY8,   MKTAG('G', 'R', 'E', 'Y') },
+    { PIX_FMT_RGB444,  MKTAG('R', 'G', 'B', 12) },
+    { PIX_FMT_BGR444,  MKTAG('B', 'G', 'R', 12) },
     { PIX_FMT_RGB555,  MKTAG('R', 'G', 'B', 15) },
     { PIX_FMT_BGR555,  MKTAG('B', 'G', 'R', 15) },
     { PIX_FMT_RGB565,  MKTAG('R', 'G', 'B', 16) },
diff -upr trunk/libavcodec/rawdec.c trunk.rgb12/libavcodec/rawdec.c
--- trunk/libavcodec/rawdec.c	2010-02-24 15:55:00.000000000 +0100
+++ trunk.rgb12/libavcodec/rawdec.c	2010-02-25 02:10:33.000000000 +0100
@@ -38,6 +38,7 @@ typedef struct RawVideoContext {
 static const PixelFormatTag pixelFormatBpsAVI[] = {
     { PIX_FMT_PAL8,    4 },
     { PIX_FMT_PAL8,    8 },
+    { PIX_FMT_RGB444, 12 },
     { PIX_FMT_RGB555, 15 },
     { PIX_FMT_RGB555, 16 },
     { PIX_FMT_BGR24,  24 },
diff -upr trunk/libavcodec/targa.c trunk.rgb12/libavcodec/targa.c
--- trunk/libavcodec/targa.c	2010-02-24 15:55:19.000000000 +0100
+++ trunk.rgb12/libavcodec/targa.c	2010-02-25 02:10:33.000000000 +0100
@@ -125,6 +125,9 @@ static int decode_frame(AVCodecContext *
     case 8:
         avctx->pix_fmt = ((compr & (~TGA_RLE)) == TGA_BW) ? PIX_FMT_GRAY8 : PIX_FMT_PAL8;
         break;
+    case 12:
+        avctx->pix_fmt = PIX_FMT_RGB444;
+        break;
     case 15:
         avctx->pix_fmt = PIX_FMT_RGB555;
         break;
diff -upr trunk/libavdevice/v4l2.c trunk.rgb12/libavdevice/v4l2.c
--- trunk/libavdevice/v4l2.c	2010-02-24 15:55:29.000000000 +0100
+++ trunk.rgb12/libavdevice/v4l2.c	2010-02-25 04:33:37.000000000 +0100
@@ -108,6 +108,11 @@ static struct fmt_map fmt_conversion_tab
         .v4l2_fmt = V4L2_PIX_FMT_YUV410,
     },
     {
+        .ff_fmt = PIX_FMT_RGB444,
+        .codec_id = CODEC_ID_RAWVIDEO,
+        .v4l2_fmt = V4L2_PIX_FMT_RGB444,
+    },
+    {
         .ff_fmt = PIX_FMT_RGB555,
         .codec_id = CODEC_ID_RAWVIDEO,
         .v4l2_fmt = V4L2_PIX_FMT_RGB555,
diff -upr trunk/libavdevice/x11grab.c trunk.rgb12/libavdevice/x11grab.c
--- trunk/libavdevice/x11grab.c	2010-02-24 15:55:29.000000000 +0100
+++ trunk.rgb12/libavdevice/x11grab.c	2010-02-25 02:10:33.000000000 +0100
@@ -171,6 +171,11 @@ x11grab_read_header(AVFormatContext *s1,
                    image->blue_mask  == 0x001f ) {
             av_log(s1, AV_LOG_DEBUG, "16 bit RGB555\n");
             input_pixfmt = PIX_FMT_RGB555;
+        } else if (image->red_mask   == 0x0f00 &&
+                   image->green_mask == 0x00f0 &&
+                   image->blue_mask  == 0x000f ) {
+            av_log(s1, AV_LOG_DEBUG, "16 bit RGB444\n");
+            input_pixfmt = PIX_FMT_RGB444;
         } else {
             av_log(s1, AV_LOG_ERROR, "RGB ordering at image depth %i not supported ... aborting\n", image->bits_per_pixel);
             av_log(s1, AV_LOG_ERROR, "color masks: r 0x%.6lx g 0x%.6lx b 0x%.6lx\n", image->red_mask, image->green_mask, image->blue_mask);
diff -upr trunk/libavfilter/vf_crop.c trunk.rgb12/libavfilter/vf_crop.c
--- trunk/libavfilter/vf_crop.c	2010-02-24 15:55:30.000000000 +0100
+++ trunk.rgb12/libavfilter/vf_crop.c	2010-02-25 04:36:31.000000000 +0100
@@ -45,8 +45,10 @@ static int query_formats(AVFilterContext
         PIX_FMT_RGB24,        PIX_FMT_BGR24,
         PIX_FMT_RGB565BE,     PIX_FMT_RGB565LE,
         PIX_FMT_RGB555BE,     PIX_FMT_RGB555LE,
+        PIX_FMT_RGB444BE,     PIX_FMT_RGB444LE,
         PIX_FMT_BGR565BE,     PIX_FMT_BGR565LE,
         PIX_FMT_BGR555BE,     PIX_FMT_BGR555LE,
+        PIX_FMT_BGR444BE,     PIX_FMT_BGR444LE,
         PIX_FMT_GRAY16BE,     PIX_FMT_GRAY16LE,
         PIX_FMT_YUV420P16LE,  PIX_FMT_YUV420P16BE,
         PIX_FMT_YUV422P16LE,  PIX_FMT_YUV422P16BE,
@@ -102,10 +104,14 @@ static int config_input(AVFilterLink *li
     case PIX_FMT_RGB565LE:
     case PIX_FMT_RGB555BE:
     case PIX_FMT_RGB555LE:
+    case PIX_FMT_RGB444BE:
+    case PIX_FMT_RGB444LE:
     case PIX_FMT_BGR565BE:
     case PIX_FMT_BGR565LE:
     case PIX_FMT_BGR555BE:
     case PIX_FMT_BGR555LE:
+    case PIX_FMT_BGR444BE:
+    case PIX_FMT_BGR444LE:
     case PIX_FMT_GRAY16BE:
     case PIX_FMT_GRAY16LE:
     case PIX_FMT_YUV420P16LE:
diff -upr trunk/libavformat/movenc.c trunk.rgb12/libavformat/movenc.c
--- trunk/libavformat/movenc.c	2010-02-24 15:55:40.000000000 +0100
+++ trunk.rgb12/libavformat/movenc.c	2010-02-25 03:29:15.000000000 +0100
@@ -675,6 +675,8 @@ static const struct {
 } mov_pix_fmt_tags[] = {
     { PIX_FMT_YUYV422, MKTAG('y','u','v','s'),  0 },
     { PIX_FMT_UYVY422, MKTAG('2','v','u','y'),  0 },
+    { PIX_FMT_RGB444LE,MKTAG('L','4','4','4'), 16 },
+    { PIX_FMT_RGB444BE,MKTAG('B','4','4','4'), 16 },
     { PIX_FMT_BGR555,  MKTAG('r','a','w',' '), 16 },
     { PIX_FMT_RGB555LE,MKTAG('L','5','5','5'), 16 },
     { PIX_FMT_RGB565LE,MKTAG('L','5','6','5'), 16 },
diff -upr trunk/libavutil/pixdesc.c trunk.rgb12/libavutil/pixdesc.c
--- trunk/libavutil/pixdesc.c	2010-02-24 15:54:36.000000000 +0100
+++ trunk.rgb12/libavutil/pixdesc.c	2010-02-25 04:47:04.000000000 +0100
@@ -593,6 +593,29 @@ const AVPixFmtDescriptor av_pix_fmt_desc
             {0,1,1,0,4},        /* B */
         },
     },
+    [PIX_FMT_RGB444BE] = {
+        .name = "rgb444be",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,0,0,3},        /* R */
+            {0,1,1,4,3},        /* G */
+            {0,1,1,0,3},        /* B */
+        },
+        .flags = PIX_FMT_BE,
+    },
+    [PIX_FMT_RGB444LE] = {
+        .name = "rgb444le",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,2,0,3},        /* R */
+            {0,1,1,4,3},        /* G */
+            {0,1,1,0,3},        /* B */
+        },
+    },
     [PIX_FMT_BGR565BE] = {
         .name = "bgr565be",
         .nb_components= 3,
@@ -639,6 +662,29 @@ const AVPixFmtDescriptor av_pix_fmt_desc
             {0,1,1,0,4},        /* R */
         },
     },
+    [PIX_FMT_BGR444BE] = {
+        .name = "bgr444be",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,0,0,3},       /* B */
+            {0,1,1,4,3},       /* G */
+            {0,1,1,0,3},       /* R */
+        },
+        .flags = PIX_FMT_BE,
+     },
+    [PIX_FMT_BGR444LE] = {
+        .name = "bgr444le",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,2,0,3},        /* B */
+            {0,1,1,4,3},        /* G */
+            {0,1,1,0,3},        /* R */
+        },
+    },
     [PIX_FMT_VAAPI_MOCO] = {
         .name = "vaapi_moco",
         .log2_chroma_w = 1,
diff -upr trunk/libavutil/pixfmt.h trunk.rgb12/libavutil/pixfmt.h
--- trunk/libavutil/pixfmt.h	2010-02-24 15:54:36.000000000 +0100
+++ trunk.rgb12/libavutil/pixfmt.h	2010-02-25 02:10:33.000000000 +0100
@@ -109,11 +109,15 @@ enum PixelFormat {
     PIX_FMT_RGB565LE,  ///< packed RGB 5:6:5, 16bpp, (msb)   5R 6G 5B(lsb), little-endian
     PIX_FMT_RGB555BE,  ///< packed RGB 5:5:5, 16bpp, (msb)1A 5R 5G 5B(lsb), big-endian, most significant bit to 0
     PIX_FMT_RGB555LE,  ///< packed RGB 5:5:5, 16bpp, (msb)1A 5R 5G 5B(lsb), little-endian, most significant bit to 0
+    PIX_FMT_RGB444BE,  ///< packed RGB 4:4:4, 16bpp, (msb)4A 4R 4G 4B(lsb), big-endian, most significant bits to 0
+    PIX_FMT_RGB444LE,  ///< packed RGB 4:4:4, 16bpp, (msb)4A 4R 4G 4B(lsb), little-endian, most significant bits to 0
 
     PIX_FMT_BGR565BE,  ///< packed BGR 5:6:5, 16bpp, (msb)   5B 6G 5R(lsb), big-endian
     PIX_FMT_BGR565LE,  ///< packed BGR 5:6:5, 16bpp, (msb)   5B 6G 5R(lsb), little-endian
     PIX_FMT_BGR555BE,  ///< packed BGR 5:5:5, 16bpp, (msb)1A 5B 5G 5R(lsb), big-endian, most significant bit to 1
     PIX_FMT_BGR555LE,  ///< packed BGR 5:5:5, 16bpp, (msb)1A 5B 5G 5R(lsb), little-endian, most significant bit to 1
+    PIX_FMT_BGR444BE,  ///< packed BGR 4:4:4, 16bpp, (msb)4A 4B 4G 4R(lsb), big-endian, most significant bits to 1
+    PIX_FMT_BGR444LE,  ///< packed BGR 4:4:4, 16bpp, (msb)4A 4B 4G 4R(lsb), little-endian, most significant bits to 1
 
     PIX_FMT_VAAPI_MOCO, ///< HW acceleration through VA API at motion compensation entry-point, Picture.data[3] contains a vaapi_render_state 
struct which contains macroblocks as well as various fields extracted from headers
     PIX_FMT_VAAPI_IDCT, ///< HW acceleration through VA API at IDCT entry-point, Picture.data[3] contains a vaapi_render_state struct which 
contains fields extracted from headers
@@ -145,8 +149,10 @@ enum PixelFormat {
 #define PIX_FMT_RGB48  PIX_FMT_NE(RGB48BE,  RGB48LE)
 #define PIX_FMT_RGB565 PIX_FMT_NE(RGB565BE, RGB565LE)
 #define PIX_FMT_RGB555 PIX_FMT_NE(RGB555BE, RGB555LE)
+#define PIX_FMT_RGB444 PIX_FMT_NE(RGB444BE, RGB444LE)
 #define PIX_FMT_BGR565 PIX_FMT_NE(BGR565BE, BGR565LE)
 #define PIX_FMT_BGR555 PIX_FMT_NE(BGR555BE, BGR555LE)
+#define PIX_FMT_BGR444 PIX_FMT_NE(BGR444BE, BGR444LE)
 
 #define PIX_FMT_YUV420P16 PIX_FMT_NE(YUV420P16BE, YUV420P16LE)
 #define PIX_FMT_YUV422P16 PIX_FMT_NE(YUV422P16BE, YUV422P16LE)
diff -upr trunk/libswscale/bfin/internal_bfin.S trunk.rgb12/libswscale/bfin/internal_bfin.S
--- trunk/libswscale/bfin/internal_bfin.S	2010-02-24 15:55:46.000000000 +0100
+++ trunk.rgb12/libswscale/bfin/internal_bfin.S	2010-02-25 02:10:33.000000000 +0100
@@ -349,6 +349,119 @@ DEFUN(yuv2rgb555_line,MEM,
         rts;
 DEFUN_END(yuv2rgb555_line)
 
+DEFUN(yuv2rgb444_line,MEM,
+   (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)):
+        link 0;
+        [--sp] = (r7:4);
+        p1 = [fp+ARG_OUT];
+        r3 = [fp+ARG_W];
+
+        i0 = r0;
+        i2 = r1;
+        i3 = r2;
+
+        r0 = [fp+ARG_COEFF];
+        i1 = r0;
+        b1 = i1;
+        l1 = COEFF_LEN;
+        m0 = COEFF_REL_CY_OFF;
+        p0 = r3;
+
+        r0   = [i0++];         // 2Y
+        r1.l = w[i2++];        // 2u
+        r1.h = w[i3++];        // 2v
+        p0 = p0>>2;
+
+        lsetup (.L0444, .L1444) lc0 = p0;
+
+        /*
+         * uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv
+         * r0 -- used to load 4ys
+         * r1 -- used to load 2us,2vs
+         * r4 -- y3,y2
+         * r5 -- y1,y0
+         * r6 -- u1,u0
+         * r7 -- v1,v0
+         */
+                                                              r2=[i1++]; // oy
+.L0444:
+        /*
+         * rrrrrrrr gggggggg bbbbbbbb
+         *  5432109876543210
+         *              bbbb >>4
+         *          gggggggg
+         *      rrrrrrrr     <<4
+         *  xxxxrrrrggggbbbb
+         */
+
+        (r4,r5) = byteop16m (r1:0, r3:2)                   || r3=[i1++]; // oc
+        (r7,r6) = byteop16m (r1:0, r3:2) (r);
+        r5 = r5 << 2 (v);                                                // y1,y0
+        r4 = r4 << 2 (v);                                                // y3,y2
+        r6 = r6 << 2 (v)                                   || r0=[i1++]; // u1,u0, r0=zero
+        r7 = r7 << 2 (v)                                   || r1=[i1++]; // v1,v0  r1=cy
+        /* Y' = y*cy */
+        a1 = r1.h*r5.h, a0 = r1.l*r5.l                     || r1=[i1++]; // crv
+
+        /* R = Y+ crv*(Cr-128) */
+        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);
+                a1 -= r1.h*r7.l,          a0 -= r1.l*r7.l  || r5=[i1++]; // rmask
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu
+        r2 = r2 >> 4 (v);
+        r3 = r2 & r5;
+
+        /* B = Y+ cbu*(Cb-128) */
+        r2.h = (a1 += r1.h*r6.l), r2.l = (a0 += r1.l*r6.l);
+                a1 -= r1.h*r6.l,          a0 -= r1.l*r6.l  || r5=[i1++]; // bmask
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu
+        r2 = r2 << 4 (v);
+        r2 = r2 & r5;
+        r3 = r3 | r2;
+
+        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */
+                a1 += r1.h*r6.l,          a0 += r1.l*r6.l  || r1=[i1++]; // cgv
+        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r5=[i1++m0]; // gmask
+        r2 = r2 & r5;
+        r3 = r3 | r2;
+        [p1++]=r3                                          || r1=[i1++]; // cy
+
+        /* Y' = y*cy */
+
+        a1 = r1.h*r4.h, a0 = r1.l*r4.l                     || r1=[i1++]; // crv
+
+        /* R = Y+ crv*(Cr-128) */
+        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h);
+                a1 -= r1.h*r7.h,          a0 -= r1.l*r7.h  || r5=[i1++]; // rmask
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu
+        r2 = r2 >> 4 (v);
+        r3 = r2 & r5;
+
+        /* B = Y+ cbu*(Cb-128) */
+        r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h);
+                a1 -= r1.h*r6.h,          a0 -= r1.l*r6.h  || r5=[i1++]; // bmask
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu
+        r2 = r2 << 4 (v);
+        r2 = r2 & r5;
+        r3 = r3 | r2;
+
+        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */
+                a1 += r1.h*r6.h,          a0 += r1.l*r6.h  || r1=[i1++]; // cgv
+        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h) || r5=[i1++]; // gmask
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r0=[i0++];     // 4Y
+        r2 = r2 & r5;
+        r3 = r3 | r2;
+        [p1++]=r3                                          || r1.h=w[i3++]; // 2v
+
+.L1444:                                                       r2=[i1++]; // oy
+
+        l1 = 0;
+
+        (r7:4) = [sp++];
+        unlink;
+        rts;
+DEFUN_END(yuv2rgb444_line)
+
 DEFUN(yuv2rgb24_line,MEM,
    (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)):
         link 0;
diff -upr trunk/libswscale/bfin/yuv2rgb_bfin.c trunk.rgb12/libswscale/bfin/yuv2rgb_bfin.c
--- trunk/libswscale/bfin/yuv2rgb_bfin.c	2010-02-24 15:55:46.000000000 +0100
+++ trunk.rgb12/libswscale/bfin/yuv2rgb_bfin.c	2010-02-25 03:51:10.000000000 +0100
@@ -38,6 +38,9 @@
 #define L1CODE
 #endif
 
+void ff_bfin_yuv2rgb444_line(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
+                             int w, uint32_t *coeffs) L1CODE;
+
 void ff_bfin_yuv2rgb555_line(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
                              int w, uint32_t *coeffs) L1CODE;
 
@@ -126,6 +129,24 @@ static int core_yuv420_rgb(SwsContext *c
 }
 
 
+static int bfin_yuv420_rgb444(SwsContext *c,
+                              uint8_t **in, int *instrides,
+                              int srcSliceY, int srcSliceH,
+                              uint8_t **oplanes, int *outstrides)
+{
+    return core_yuv420_rgb(c, in, instrides, srcSliceY, srcSliceH, oplanes,
+                           outstrides, ff_bfin_yuv2rgb444_line, 1, 444);
+}
+
+static int bfin_yuv420_bgr444(SwsContext *c,
+                              uint8_t **in, int *instrides,
+                              int srcSliceY, int srcSliceH,
+                              uint8_t **oplanes, int *outstrides)
+{
+    return core_yuv420_rgb(c, in, instrides, srcSliceY, srcSliceH, oplanes,
+                           outstrides, ff_bfin_yuv2rgb444_line, 0, 444);
+}
+
 static int bfin_yuv420_rgb555(SwsContext *c,
                               uint8_t **in, int *instrides,
                               int srcSliceY, int srcSliceH,
@@ -186,6 +207,8 @@ SwsFunc ff_yuv2rgb_get_func_ptr_bfin(Sws
     SwsFunc f;
 
     switch(c->dstFormat) {
+    case PIX_FMT_RGB444: f = bfin_yuv420_rgb444; break;
+    case PIX_FMT_BGR444: f = bfin_yuv420_bgr444; break;
     case PIX_FMT_RGB555: f = bfin_yuv420_rgb555; break;
     case PIX_FMT_BGR555: f = bfin_yuv420_bgr555; break;
     case PIX_FMT_RGB565: f = bfin_yuv420_rgb565; break;
diff -upr trunk/libswscale/colorspace-test.c trunk.rgb12/libswscale/colorspace-test.c
--- trunk/libswscale/colorspace-test.c	2010-02-24 15:55:47.000000000 +0100
+++ trunk.rgb12/libswscale/colorspace-test.c	2010-02-25 02:10:33.000000000 +0100
@@ -81,30 +81,47 @@ int main(int argc, char **argv)
             const char *name;
             void (*func)(const uint8_t *src, uint8_t *dst, long src_size);
         } func_info[] = {
+            FUNC(2, 2, rgb12to15),
+            FUNC(2, 2, rgb12to16),
+            FUNC(2, 3, rgb12to24),
+            FUNC(2, 4, rgb12to32),
+            FUNC(2, 2, rgb15to12),
             FUNC(2, 2, rgb15to16),
             FUNC(2, 3, rgb15to24),
             FUNC(2, 4, rgb15to32),
             FUNC(2, 3, rgb16to24),
             FUNC(2, 4, rgb16to32),
+            FUNC(3, 2, rgb24to12),
             FUNC(3, 2, rgb24to15),
             FUNC(3, 2, rgb24to16),
             FUNC(3, 4, rgb24to32),
+            FUNC(4, 2, rgb32to12),
             FUNC(4, 2, rgb32to15),
             FUNC(4, 2, rgb32to16),
             FUNC(4, 3, rgb32to24),
             FUNC(2, 2, rgb16to15),
+            FUNC(2, 2, rgb16to12),
+            FUNC(2, 2, rgb12tobgr12),
+            FUNC(2, 2, rgb12tobgr15),
+            FUNC(2, 2, rgb12tobgr16),
+            FUNC(2, 3, rgb12tobgr24),
+            FUNC(2, 4, rgb12tobgr32),
+            FUNC(2, 2, rgb15tobgr12),
             FUNC(2, 2, rgb15tobgr15),
             FUNC(2, 2, rgb15tobgr16),
             FUNC(2, 3, rgb15tobgr24),
             FUNC(2, 4, rgb15tobgr32),
+            FUNC(2, 2, rgb16tobgr12),
             FUNC(2, 2, rgb16tobgr15),
             FUNC(2, 2, rgb16tobgr16),
             FUNC(2, 3, rgb16tobgr24),
             FUNC(2, 4, rgb16tobgr32),
+            FUNC(3, 2, rgb24tobgr12),
             FUNC(3, 2, rgb24tobgr15),
             FUNC(3, 2, rgb24tobgr16),
             FUNC(3, 3, rgb24tobgr24),
             FUNC(3, 4, rgb24tobgr32),
+            FUNC(4, 2, rgb32tobgr12),
             FUNC(4, 2, rgb32tobgr15),
             FUNC(4, 2, rgb32tobgr16),
             FUNC(4, 3, rgb32tobgr24),
diff -upr trunk/libswscale/rgb2rgb.c trunk.rgb12/libswscale/rgb2rgb.c
--- trunk/libswscale/rgb2rgb.c	2010-02-24 15:55:47.000000000 +0100
+++ trunk.rgb12/libswscale/rgb2rgb.c	2010-02-25 18:44:09.000000000 +0100
@@ -38,21 +38,31 @@
 void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
 void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
 void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb24tobgr12)(const uint8_t *src, uint8_t *dst, long src_size);
 void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
 void (*rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size);
 void (*rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb32to12)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb12to15)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb12to16)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb12tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb12to32)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb15to12)(const uint8_t *src, uint8_t *dst, long src_size);
 void (*rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size);
 void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
 void (*rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb16to12)(const uint8_t *src, uint8_t *dst, long src_size);
 void (*rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size);
 void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
 void (*rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size);
 void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
 void (*rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size);
 void (*rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb24to12)(const uint8_t *src, uint8_t *dst, long src_size);
 void (*rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
 void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
 void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb32tobgr12)(const uint8_t *src, uint8_t *dst, long src_size);
 
 void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
                    long width, long height,
@@ -122,6 +132,11 @@ DECLARE_ASM_CONST(8, uint64_t, mask24h) 
 DECLARE_ASM_CONST(8, uint64_t, mask24hh)     = 0xffff000000000000ULL;
 DECLARE_ASM_CONST(8, uint64_t, mask24hhh)    = 0xffffffff00000000ULL;
 DECLARE_ASM_CONST(8, uint64_t, mask24hhhh)   = 0xffffffffffff0000ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask12b)      = 0x000F000F000F000FULL; /* 00000000 00001111  xxB */
+DECLARE_ASM_CONST(8, uint64_t, mask12rg)     = 0x0FF00FF00FF00FF0ULL; /* 00001111 11110000  RGx */
+DECLARE_ASM_CONST(8, uint64_t, mask12s)      = 0xFFF0FFF0FFF0FFF0ULL; /* FIXME if wrong (not sure) */
+DECLARE_ASM_CONST(8, uint64_t, mask12g)      = 0x00F000F000F000F0ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask12r)      = 0x0F000F000F000F00ULL;
 DECLARE_ASM_CONST(8, uint64_t, mask15b)      = 0x001F001F001F001FULL; /* 00000000 00011111  xxB */
 DECLARE_ASM_CONST(8, uint64_t, mask15rg)     = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000  RGx */
 DECLARE_ASM_CONST(8, uint64_t, mask15s)      = 0xFFE0FFE0FFE0FFE0ULL;
@@ -136,6 +151,9 @@ DECLARE_ASM_CONST(8, uint64_t, blue_16ma
 DECLARE_ASM_CONST(8, uint64_t, red_15mask)   = 0x00007c0000007c00ULL;
 DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL;
 DECLARE_ASM_CONST(8, uint64_t, blue_15mask)  = 0x0000001f0000001fULL;
+DECLARE_ASM_CONST(8, uint64_t, red_12mask)   = 0x00000f0000000f00ULL;
+DECLARE_ASM_CONST(8, uint64_t, green_12mask) = 0x000000f0000000f0ULL;
+DECLARE_ASM_CONST(8, uint64_t, blue_12mask)  = 0x0000000f0000000fULL;
 #endif /* ARCH_X86 */
 
 #define RGB2YUV_SHIFT 8
@@ -269,6 +287,22 @@ void palette8tobgr15(const uint8_t *src,
         ((uint16_t *)dst)[i] = bswap_16(((const uint16_t *)palette)[src[i]]);
 }
 
+/**
+ * Palette is assumed to contain BGR12, see rgb32to12 to convert the palette.
+ */
+void palette8torgb12(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+{
+    long i;
+    for (i=0; i<num_pixels; i++)
+        ((uint16_t *)dst)[i] = ((const uint16_t *)palette)[src[i]];
+}
+void palette8tobgr12(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+{
+    long i;
+    for (i=0; i<num_pixels; i++)
+        ((uint16_t *)dst)[i] = bswap_16(((const uint16_t *)palette)[src[i]]);
+}
+
 void rgb32to24(const uint8_t *src, uint8_t *dst, long src_size)
 {
     long i;
@@ -366,6 +400,22 @@ void rgb16tobgr15(const uint8_t *src, ui
     }
 }
 
+void rgb16tobgr12(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    long i;
+    long num_pixels = src_size >> 1;
+
+    for (i=0; i<num_pixels; i++) {
+        unsigned b,g,r;
+        register uint16_t rgb;
+        rgb = src[2*i];
+        r = (rgb&0x1F)>>1;
+        g = (rgb&0x7E0)>>7;
+        b = (rgb&0xF800)>>12;
+        dst[2*i] = (b&0x0F) | ((g&0x0F)<<4) | ((r&0x0F)<<8);
+    }
+}
+
 void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
 {
     const uint16_t *end;
@@ -428,6 +479,108 @@ void rgb15tobgr15(const uint8_t *src, ui
     }
 }
 
+void rgb15tobgr12(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    long i;
+    long num_pixels = src_size >> 1;
+
+    for (i=0; i<num_pixels; i++) {
+        unsigned b,g,r;
+        register uint16_t rgb;
+        rgb = src[2*i];
+        r = (rgb&0x1F)>>1;
+        g = (rgb&0x3E0)>>6;
+        b = (rgb&0x7C00)>>11;
+        dst[2*i] = (b&0x0F) | ((g&0x0F)<<4) | ((r&0x0F)<<8);
+    }
+}
+
+void rgb12tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint16_t *end;
+    uint8_t *d = (uint8_t *)dst;
+    const uint16_t *s = (const uint16_t *)src;
+    end = s + (src_size >> 1);
+    while (s < end) {
+        register uint16_t bgr;
+        bgr = *s++;
+        #ifdef WORDS_BIGENDIAN
+            *d++ = 0;
+            *d++ = (bgr&0x0F)<<4;
+            *d++ = bgr&0x0F0;
+            *d++ = (bgr&0x0F00)>>4;
+        #else
+            *d++ = (bgr&0x0F00)>>4;
+            *d++ = bgr&0x0F0;
+            *d++ = (bgr&0x0F)<<4;
+            *d++ = 0;
+        #endif
+    }
+}
+
+void rgb12to24(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint16_t *end;
+    uint8_t *d = dst;
+    const uint16_t *s = (const uint16_t *)src;
+    end = s + (src_size >> 1);
+    while (s < end) {
+        register uint16_t bgr;
+        bgr = *s++;
+        *d++ = (bgr&0x0F00)>>4;
+        *d++ = bgr&0x0F0;
+        *d++ = (bgr&0x0F)<<4;
+    }
+}
+
+void rgb12tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    long i;
+    long num_pixels = src_size >> 1;
+
+    for (i=0; i<num_pixels; i++) {
+        unsigned b,g,r;
+        register uint16_t rgb;
+        rgb = src[2*i];
+        r = (rgb&0x0F)<<1;
+        g = (rgb&0x0F0)>>2;
+        b = (rgb&0x0F00)>>7;
+        dst[2*i] = (b&0x1F) | ((g&0x3F)<<5) | ((r&0x1F)<<11);
+    }
+}
+
+void rgb12tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    long i;
+    long num_pixels = src_size >> 1;
+
+    for (i=0; i<num_pixels; i++) {
+        unsigned b,g,r;
+        register uint16_t rgb;
+        rgb = src[2*i];
+        r = (rgb&0x0F)<<1;
+        g = (rgb&0x0F0)>>3;
+        b = (rgb&0x0F00)>>7;
+        dst[2*i] = (b&0x1F) | ((g&0x1F)<<5) | ((r&0x1F)<<10);
+    }
+}
+
+void rgb12tobgr12(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    long i;
+    long num_pixels = src_size >> 1;
+
+    for (i=0; i<num_pixels; i++) {
+        unsigned b,g,r;
+        register uint16_t rgb;
+        rgb = src[2*i];
+        r = rgb&0x0F;
+        g = (rgb&0x0F0)>>4;
+        b = (rgb&0x0F00)>>8;
+        dst[2*i] = (b&0x0F) | ((g&0x0F)<<4) | ((r&0x0F)<<8);
+    }
+}
+
 void bgr8torgb8(const uint8_t *src, uint8_t *dst, long src_size)
 {
     long i;
diff -upr trunk/libswscale/rgb2rgb.h trunk.rgb12/libswscale/rgb2rgb.h
--- trunk/libswscale/rgb2rgb.h	2010-02-24 15:55:47.000000000 +0100
+++ trunk.rgb12/libswscale/rgb2rgb.h	2010-02-25 02:10:33.000000000 +0100
@@ -32,21 +32,31 @@
 extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
 extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
 extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb24tobgr12)(const uint8_t *src, uint8_t *dst, long src_size);
 extern void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
 extern void (*rgb32to16)   (const uint8_t *src, uint8_t *dst, long src_size);
 extern void (*rgb32to15)   (const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb32to12)   (const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb12to15)   (const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb12to16)   (const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb12tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb12to32)   (const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb15to12)   (const uint8_t *src, uint8_t *dst, long src_size);
 extern void (*rgb15to16)   (const uint8_t *src, uint8_t *dst, long src_size);
 extern void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
 extern void (*rgb15to32)   (const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb16to12)   (const uint8_t *src, uint8_t *dst, long src_size);
 extern void (*rgb16to15)   (const uint8_t *src, uint8_t *dst, long src_size);
 extern void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
 extern void (*rgb16to32)   (const uint8_t *src, uint8_t *dst, long src_size);
 extern void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
 extern void (*rgb24to16)   (const uint8_t *src, uint8_t *dst, long src_size);
 extern void (*rgb24to15)   (const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb24to12)   (const uint8_t *src, uint8_t *dst, long src_size);
 extern void (*rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
 extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
 extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb32tobgr12)(const uint8_t *src, uint8_t *dst, long src_size);
 
 void rgb24to32   (const uint8_t *src, uint8_t *dst, long src_size);
 void rgb32to24   (const uint8_t *src, uint8_t *dst, long src_size);
@@ -54,10 +64,17 @@ void rgb16tobgr32(const uint8_t *src, ui
 void rgb16to24   (const uint8_t *src, uint8_t *dst, long src_size);
 void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
 void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
+void rgb16tobgr12(const uint8_t *src, uint8_t *dst, long src_size);
 void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
 void rgb15to24   (const uint8_t *src, uint8_t *dst, long src_size);
 void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
 void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
+void rgb15tobgr12(const uint8_t *src, uint8_t *dst, long src_size);
+void rgb12tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
+void rgb12to24   (const uint8_t *src, uint8_t *dst, long src_size);
+void rgb12tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
+void rgb12tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
+void rgb12tobgr12(const uint8_t *src, uint8_t *dst, long src_size);
 void bgr8torgb8  (const uint8_t *src, uint8_t *dst, long src_size);
 
 void shuffle_bytes_0321(const uint8_t *src, uint8_t *dst, long src_size);
@@ -72,6 +89,8 @@ void palette8torgb16(const uint8_t *src,
 void palette8tobgr16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
 void palette8torgb15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
 void palette8tobgr15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
+void palette8torgb12(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
+void palette8tobgr12(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
 
 /**
  * Height should be a multiple of 2 and width should be a multiple of 16.
diff -upr trunk/libswscale/rgb2rgb_template.c trunk.rgb12/libswscale/rgb2rgb_template.c
--- trunk/libswscale/rgb2rgb_template.c	2010-02-24 20:29:24.000000000 +0100
+++ trunk.rgb12/libswscale/rgb2rgb_template.c	2010-02-25 19:10:35.000000000 +0100
@@ -215,6 +215,100 @@ static inline void RENAME(rgb32tobgr24)(
  MMX2, 3DNOW optimization by Nick Kurshev
  32-bit C version, and and&add trick by Michael Niedermayer
 */
+static inline void RENAME(rgb12to15)(const uint8_t *src,uint8_t *dst,long src_size)
+{
+    register const uint8_t* s=src;
+    register uint8_t* d=dst;
+    register const uint8_t *end;
+    const uint8_t *mm_end;
+    end = s + src_size;
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s));
+    __asm__ volatile("movq        %0, %%mm4"::"m"(mask12s));
+    mm_end = end - 15;
+    while (s<mm_end) {
+        __asm__ volatile(
+            PREFETCH"  32%1         \n\t"
+            "movq        %1, %%mm0  \n\t"
+            "movq       8%1, %%mm2  \n\t"
+            "movq     %%mm0, %%mm1  \n\t"
+            "movq     %%mm2, %%mm3  \n\t"
+            "pand     %%mm4, %%mm0  \n\t"
+            "pand     %%mm4, %%mm2  \n\t"
+            "paddw    %%mm1, %%mm0  \n\t"
+            "paddw    %%mm3, %%mm2  \n\t"
+            MOVNTQ"   %%mm0,  %0    \n\t"
+            MOVNTQ"   %%mm2, 8%0"
+            :"=m"(*d)
+            :"m"(*s)
+        );
+        d+=16;
+        s+=16;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    mm_end = end - 3;
+    while (s < mm_end) {
+        register unsigned x= *((const uint32_t *)s);
+        *((uint32_t *)d) = ((x<<1)&0x001E001E) | ((x<<2)&0x03C003C0) | \
+                           ((x<<3)&0x78007800);
+        d+=4;
+        s+=4;
+    }
+    if (s < end) {
+        register unsigned short x= *((const uint16_t *)s);
+        *((uint16_t *)d) = ((x<<1)&0x001E) | ((x<<2)&0x03C0) | ((x<<3)&0x7800);
+    }
+}
+
+static inline void RENAME(rgb12to16)(const uint8_t *src,uint8_t *dst,long src_size)
+{
+    register const uint8_t* s=src;
+    register uint8_t* d=dst;
+    register const uint8_t *end;
+    const uint8_t *mm_end;
+    end = s + src_size;
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s));
+    __asm__ volatile("movq        %0, %%mm4"::"m"(mask12s));
+    mm_end = end - 15;
+    while (s<mm_end) {
+        __asm__ volatile(
+            PREFETCH"  32%1         \n\t"
+            "movq        %1, %%mm0  \n\t"
+            "movq       8%1, %%mm2  \n\t"
+            "movq     %%mm0, %%mm1  \n\t"
+            "movq     %%mm2, %%mm3  \n\t"
+            "pand     %%mm4, %%mm0  \n\t"
+            "pand     %%mm4, %%mm2  \n\t"
+            "paddw    %%mm1, %%mm0  \n\t"
+            "paddw    %%mm3, %%mm2  \n\t"
+            MOVNTQ"   %%mm0,  %0    \n\t"
+            MOVNTQ"   %%mm2, 8%0"
+            :"=m"(*d)
+            :"m"(*s)
+        );
+        d+=16;
+        s+=16;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    mm_end = end - 3;
+    while (s < mm_end) {
+        register unsigned x= *((const uint32_t *)s);
+        *((uint32_t *)d) = ((x<<1)&0x001E001E) | ((x<<3)&0x07800780) | \
+                           ((x<<4)&0xF000F000);
+        d+=4;
+        s+=4;
+    }
+    if (s < end) {
+        register unsigned short x= *((const uint16_t *)s);
+        *((uint16_t *)d) = ((x<<1)&0x001E) | ((x<<3)&0x0780) | ((x<<4)&0xF000);
+    }
+}
+
 static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size)
 {
     register const uint8_t* s=src;
@@ -312,6 +406,114 @@ static inline void RENAME(rgb16to15)(con
     }
 }
 
+static inline void RENAME(rgb15to12)(const uint8_t *src,uint8_t *dst,long src_size)
+{
+    register const uint8_t* s=src;
+    register uint8_t* d=dst;
+    register const uint8_t *end;
+    const uint8_t *mm_end;
+    end = s + src_size;
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s));
+    __asm__ volatile("movq        %0, %%mm7"::"m"(mask15rg));
+    __asm__ volatile("movq        %0, %%mm6"::"m"(mask15b));
+    mm_end = end - 15;
+    while (s<mm_end) {
+        __asm__ volatile(
+            PREFETCH"  32%1         \n\t"
+            "movq        %1, %%mm0  \n\t"
+            "movq       8%1, %%mm2  \n\t"
+            "movq     %%mm0, %%mm1  \n\t"
+            "movq     %%mm2, %%mm3  \n\t"
+            "psrlq       $1, %%mm0  \n\t"
+            "psrlq       $1, %%mm2  \n\t"
+            "pand     %%mm7, %%mm0  \n\t"
+            "pand     %%mm7, %%mm2  \n\t"
+            "pand     %%mm6, %%mm1  \n\t"
+            "pand     %%mm6, %%mm3  \n\t"
+            "por      %%mm1, %%mm0  \n\t"
+            "por      %%mm3, %%mm2  \n\t"
+            MOVNTQ"   %%mm0,  %0    \n\t"
+            MOVNTQ"   %%mm2, 8%0"
+            :"=m"(*d)
+            :"m"(*s)
+        );
+        d+=16;
+        s+=16;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    mm_end = end - 3;
+    while (s < mm_end) {
+        register uint32_t x= *((const uint32_t *)s);
+        *((uint32_t *)d) = ((x>>1)&0x000F000F) | ((x>>2)&0x00F000F0) | \
+                           ((x>>3)&0x0F000F00);
+        s+=4;
+        d+=4;
+    }
+    if (s < end) {
+        register uint16_t x= *((const uint16_t *)s);
+        *((uint16_t *)d) = ((x>>1)&0x000F) | ((x>>2)&0x00F0) | ((x>>3)&0x0F00);
+        s+=2;
+        d+=2;
+    }
+}
+
+static inline void RENAME(rgb16to12)(const uint8_t *src,uint8_t *dst,long src_size)
+{
+    register const uint8_t* s=src;
+    register uint8_t* d=dst;
+    register const uint8_t *end;
+    const uint8_t *mm_end;
+    end = s + src_size;
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s));
+    __asm__ volatile("movq        %0, %%mm7"::"m"(mask15rg));
+    __asm__ volatile("movq        %0, %%mm6"::"m"(mask15b));
+    mm_end = end - 15;
+    while (s<mm_end) {
+        __asm__ volatile(
+            PREFETCH"  32%1         \n\t"
+            "movq        %1, %%mm0  \n\t"
+            "movq       8%1, %%mm2  \n\t"
+            "movq     %%mm0, %%mm1  \n\t"
+            "movq     %%mm2, %%mm3  \n\t"
+            "psrlq       $1, %%mm0  \n\t"
+            "psrlq       $1, %%mm2  \n\t"
+            "pand     %%mm7, %%mm0  \n\t"
+            "pand     %%mm7, %%mm2  \n\t"
+            "pand     %%mm6, %%mm1  \n\t"
+            "pand     %%mm6, %%mm3  \n\t"
+            "por      %%mm1, %%mm0  \n\t"
+            "por      %%mm3, %%mm2  \n\t"
+            MOVNTQ"   %%mm0,  %0    \n\t"
+            MOVNTQ"   %%mm2, 8%0"
+            :"=m"(*d)
+            :"m"(*s)
+        );
+        d+=16;
+        s+=16;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    mm_end = end - 3;
+    while (s < mm_end) {
+        register uint32_t x= *((const uint32_t *)s);
+        *((uint32_t *)d) = ((x>>1)&0x000F000F) | ((x>>3)&0x00F000F0) | \
+                           ((x>>4)&0x0F000F00);
+        s+=4;
+        d+=4;
+    }
+    if (s < end) {
+        register uint16_t x= *((const uint16_t *)s);
+        *((uint16_t *)d) = ((x>>1)&0x000F) | ((x>>3)&0x00F0) | ((x>>4)&0x0F00);
+        s+=2;
+        d+=2;
+    }
+}
+
 static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size)
 {
     const uint8_t *s = src;
@@ -622,6 +824,161 @@ static inline void RENAME(rgb32tobgr15)(
     }
 }
 
+static inline void RENAME(rgb32to12)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint8_t *s = src;
+    const uint8_t *end;
+#if HAVE_MMX
+    const uint8_t *mm_end;
+#endif
+    uint16_t *d = (uint16_t *)dst;
+    end = s + src_size;
+#if HAVE_MMX
+    mm_end = end - 15;
+#if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
+    __asm__ volatile(
+        "movq           %3, %%mm5   \n\t"
+        "movq           %4, %%mm6   \n\t"
+        "movq           %5, %%mm7   \n\t"
+        "jmp            2f          \n\t"
+        ASMALIGN(4)
+        "1:                         \n\t"
+        PREFETCH"   32(%1)          \n\t"
+        "movd         (%1), %%mm0   \n\t"
+        "movd        4(%1), %%mm3   \n\t"
+        "punpckldq   8(%1), %%mm0   \n\t"
+        "punpckldq  12(%1), %%mm3   \n\t"
+        "movq        %%mm0, %%mm1   \n\t"
+        "movq        %%mm3, %%mm4   \n\t"
+        "pand        %%mm6, %%mm0   \n\t"
+        "pand        %%mm6, %%mm3   \n\t"
+        "pmaddwd     %%mm7, %%mm0   \n\t"
+        "pmaddwd     %%mm7, %%mm3   \n\t"
+        "pand        %%mm5, %%mm1   \n\t"
+        "pand        %%mm5, %%mm4   \n\t"
+        "por         %%mm1, %%mm0   \n\t"
+        "por         %%mm4, %%mm3   \n\t"
+        "psrld          $6, %%mm0   \n\t"
+        "pslld         $10, %%mm3   \n\t"
+        "por         %%mm3, %%mm0   \n\t"
+        MOVNTQ"      %%mm0, (%0)    \n\t"
+        "add           $16,  %1     \n\t"
+        "add            $8,  %0     \n\t"
+        "2:                         \n\t"
+        "cmp            %2,  %1     \n\t"
+        " jb            1b          \n\t"
+        : "+r" (d), "+r"(s)
+        : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
+    );
+#else
+    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
+    __asm__ volatile(
+        "movq          %0, %%mm7    \n\t"
+        "movq          %1, %%mm6    \n\t"
+        ::"m"(red_15mask),"m"(green_15mask));
+    while (s < mm_end) {
+        __asm__ volatile(
+            PREFETCH"    32%1           \n\t"
+            "movd          %1, %%mm0    \n\t"
+            "movd         4%1, %%mm3    \n\t"
+            "punpckldq    8%1, %%mm0    \n\t"
+            "punpckldq   12%1, %%mm3    \n\t"
+            "movq       %%mm0, %%mm1    \n\t"
+            "movq       %%mm0, %%mm2    \n\t"
+            "movq       %%mm3, %%mm4    \n\t"
+            "movq       %%mm3, %%mm5    \n\t"
+            "psrlq         $3, %%mm0    \n\t"
+            "psrlq         $3, %%mm3    \n\t"
+            "pand          %2, %%mm0    \n\t"
+            "pand          %2, %%mm3    \n\t"
+            "psrlq         $6, %%mm1    \n\t"
+            "psrlq         $6, %%mm4    \n\t"
+            "pand       %%mm6, %%mm1    \n\t"
+            "pand       %%mm6, %%mm4    \n\t"
+            "psrlq         $9, %%mm2    \n\t"
+            "psrlq         $9, %%mm5    \n\t"
+            "pand       %%mm7, %%mm2    \n\t"
+            "pand       %%mm7, %%mm5    \n\t"
+            "por        %%mm1, %%mm0    \n\t"
+            "por        %%mm4, %%mm3    \n\t"
+            "por        %%mm2, %%mm0    \n\t"
+            "por        %%mm5, %%mm3    \n\t"
+            "psllq        $16, %%mm3    \n\t"
+            "por        %%mm3, %%mm0    \n\t"
+            MOVNTQ"     %%mm0, %0       \n\t"
+            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+        d += 4;
+        s += 16;
+    }
+#endif
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    while (s < end) {
+        register int rgb = *(const uint32_t*)s; s += 4;
+        *d++ = ((rgb&0xF0)>>4) + ((rgb&0xF000)>>8) + ((rgb&0xF00000)>>12);
+    }
+}
+
+static inline void RENAME(rgb32tobgr12)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint8_t *s = src;
+    const uint8_t *end;
+#if HAVE_MMX
+    const uint8_t *mm_end;
+#endif
+    uint16_t *d = (uint16_t *)dst;
+    end = s + src_size;
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
+    __asm__ volatile(
+        "movq          %0, %%mm7    \n\t"
+        "movq          %1, %%mm6    \n\t"
+        ::"m"(red_15mask),"m"(green_15mask));
+    mm_end = end - 15;
+    while (s < mm_end) {
+        __asm__ volatile(
+            PREFETCH"    32%1           \n\t"
+            "movd          %1, %%mm0    \n\t"
+            "movd         4%1, %%mm3    \n\t"
+            "punpckldq    8%1, %%mm0    \n\t"
+            "punpckldq   12%1, %%mm3    \n\t"
+            "movq       %%mm0, %%mm1    \n\t"
+            "movq       %%mm0, %%mm2    \n\t"
+            "movq       %%mm3, %%mm4    \n\t"
+            "movq       %%mm3, %%mm5    \n\t"
+            "psllq         $7, %%mm0    \n\t"
+            "psllq         $7, %%mm3    \n\t"
+            "pand       %%mm7, %%mm0    \n\t"
+            "pand       %%mm7, %%mm3    \n\t"
+            "psrlq         $6, %%mm1    \n\t"
+            "psrlq         $6, %%mm4    \n\t"
+            "pand       %%mm6, %%mm1    \n\t"
+            "pand       %%mm6, %%mm4    \n\t"
+            "psrlq        $19, %%mm2    \n\t"
+            "psrlq        $19, %%mm5    \n\t"
+            "pand          %2, %%mm2    \n\t"
+            "pand          %2, %%mm5    \n\t"
+            "por        %%mm1, %%mm0    \n\t"
+            "por        %%mm4, %%mm3    \n\t"
+            "por        %%mm2, %%mm0    \n\t"
+            "por        %%mm5, %%mm3    \n\t"
+            "psllq        $16, %%mm3    \n\t"
+            "por        %%mm3, %%mm0    \n\t"
+            MOVNTQ"     %%mm0, %0       \n\t"
+            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+        d += 4;
+        s += 16;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    while (s < end) {
+        register int rgb = *(const uint32_t*)s; s += 4;
+        *d++ = ((rgb&0xF0)<<4) + ((rgb&0xF000)>>8) + ((rgb&0xF00000)>>20);
+    }
+}
+
 static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
 {
     const uint8_t *s = src;
@@ -866,6 +1223,128 @@ static inline void RENAME(rgb24to15)(con
     }
 }
 
+static inline void RENAME(rgb24tobgr12)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint8_t *s = src;
+    const uint8_t *end;
+#if HAVE_MMX
+    const uint8_t *mm_end;
+#endif
+    uint16_t *d = (uint16_t *)dst;
+    end = s + src_size;
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
+    __asm__ volatile(
+        "movq         %0, %%mm7     \n\t"
+        "movq         %1, %%mm6     \n\t"
+        ::"m"(red_15mask),"m"(green_15mask));
+    mm_end = end - 15;
+    while (s < mm_end) {
+        __asm__ volatile(
+            PREFETCH"   32%1            \n\t"
+            "movd         %1, %%mm0     \n\t"
+            "movd        3%1, %%mm3     \n\t"
+            "punpckldq   6%1, %%mm0     \n\t"
+            "punpckldq   9%1, %%mm3     \n\t"
+            "movq      %%mm0, %%mm1     \n\t"
+            "movq      %%mm0, %%mm2     \n\t"
+            "movq      %%mm3, %%mm4     \n\t"
+            "movq      %%mm3, %%mm5     \n\t"
+            "psllq        $7, %%mm0     \n\t"
+            "psllq        $7, %%mm3     \n\t"
+            "pand      %%mm7, %%mm0     \n\t"
+            "pand      %%mm7, %%mm3     \n\t"
+            "psrlq        $6, %%mm1     \n\t"
+            "psrlq        $6, %%mm4     \n\t"
+            "pand      %%mm6, %%mm1     \n\t"
+            "pand      %%mm6, %%mm4     \n\t"
+            "psrlq       $19, %%mm2     \n\t"
+            "psrlq       $19, %%mm5     \n\t"
+            "pand         %2, %%mm2     \n\t"
+            "pand         %2, %%mm5     \n\t"
+            "por       %%mm1, %%mm0     \n\t"
+            "por       %%mm4, %%mm3     \n\t"
+            "por       %%mm2, %%mm0     \n\t"
+            "por       %%mm5, %%mm3     \n\t"
+            "psllq       $16, %%mm3     \n\t"
+            "por       %%mm3, %%mm0     \n\t"
+            MOVNTQ"    %%mm0, %0        \n\t"
+            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+        d += 4;
+        s += 12;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    while (s < end) {
+        const int r = *s++;
+        const int g = *s++;
+        const int b = *s++;
+        *d++ = (b>>4) | (g&0xF0) | ((r&0xF0)<<4);
+    }
+}
+
+static inline void RENAME(rgb24to12)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint8_t *s = src;
+    const uint8_t *end;
+#if HAVE_MMX
+    const uint8_t *mm_end;
+#endif
+    uint16_t *d = (uint16_t *)dst;
+    end = s + src_size;
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
+    __asm__ volatile(
+        "movq          %0, %%mm7    \n\t"
+        "movq          %1, %%mm6    \n\t"
+        ::"m"(red_15mask),"m"(green_15mask));
+    mm_end = end - 11;
+    while (s < mm_end) {
+        __asm__ volatile(
+            PREFETCH"    32%1           \n\t"
+            "movd          %1, %%mm0    \n\t"
+            "movd         3%1, %%mm3    \n\t"
+            "punpckldq    6%1, %%mm0    \n\t"
+            "punpckldq    9%1, %%mm3    \n\t"
+            "movq       %%mm0, %%mm1    \n\t"
+            "movq       %%mm0, %%mm2    \n\t"
+            "movq       %%mm3, %%mm4    \n\t"
+            "movq       %%mm3, %%mm5    \n\t"
+            "psrlq         $3, %%mm0    \n\t"
+            "psrlq         $3, %%mm3    \n\t"
+            "pand          %2, %%mm0    \n\t"
+            "pand          %2, %%mm3    \n\t"
+            "psrlq         $6, %%mm1    \n\t"
+            "psrlq         $6, %%mm4    \n\t"
+            "pand       %%mm6, %%mm1    \n\t"
+            "pand       %%mm6, %%mm4    \n\t"
+            "psrlq         $9, %%mm2    \n\t"
+            "psrlq         $9, %%mm5    \n\t"
+            "pand       %%mm7, %%mm2    \n\t"
+            "pand       %%mm7, %%mm5    \n\t"
+            "por        %%mm1, %%mm0    \n\t"
+            "por        %%mm4, %%mm3    \n\t"
+            "por        %%mm2, %%mm0    \n\t"
+            "por        %%mm5, %%mm3    \n\t"
+            "psllq        $16, %%mm3    \n\t"
+            "por        %%mm3, %%mm0    \n\t"
+            MOVNTQ"     %%mm0, %0       \n\t"
+            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+        d += 4;
+        s += 12;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    while (s < end) {
+        const int b = *s++;
+        const int g = *s++;
+        const int r = *s++;
+        *d++ = (b>>4) | (g&0xF0) | ((r&0xF0)<<4);
+    }
+}
+
 /*
   I use less accurate approximation here by simply left-shifting the input
   value and filling the low order bits with zeroes. This method improves PNG
@@ -887,6 +1366,146 @@ static inline void RENAME(rgb24to15)(con
        |
    original bits
 */
+static inline void RENAME(rgb12tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint16_t *end;
+#if HAVE_MMX
+    const uint16_t *mm_end;
+#endif
+    uint8_t *d = (uint8_t *)dst;
+    const uint16_t *s = (const uint16_t *)src;
+    end = s + (src_size >> 1);
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
+    mm_end = end - 7;
+    while (s < mm_end) {
+        __asm__ volatile(
+            PREFETCH"    32%1           \n\t"
+            "movq          %1, %%mm0    \n\t"
+            "movq          %1, %%mm1    \n\t"
+            "movq          %1, %%mm2    \n\t"
+            "pand          %2, %%mm0    \n\t"
+            "pand          %3, %%mm1    \n\t"
+            "pand          %4, %%mm2    \n\t"
+            "psllq         $3, %%mm0    \n\t"
+            "psrlq         $2, %%mm1    \n\t"
+            "psrlq         $7, %%mm2    \n\t"
+            "movq       %%mm0, %%mm3    \n\t"
+            "movq       %%mm1, %%mm4    \n\t"
+            "movq       %%mm2, %%mm5    \n\t"
+            "punpcklwd     %5, %%mm0    \n\t"
+            "punpcklwd     %5, %%mm1    \n\t"
+            "punpcklwd     %5, %%mm2    \n\t"
+            "punpckhwd     %5, %%mm3    \n\t"
+            "punpckhwd     %5, %%mm4    \n\t"
+            "punpckhwd     %5, %%mm5    \n\t"
+            "psllq         $8, %%mm1    \n\t"
+            "psllq        $16, %%mm2    \n\t"
+            "por        %%mm1, %%mm0    \n\t"
+            "por        %%mm2, %%mm0    \n\t"
+            "psllq         $8, %%mm4    \n\t"
+            "psllq        $16, %%mm5    \n\t"
+            "por        %%mm4, %%mm3    \n\t"
+            "por        %%mm5, %%mm3    \n\t"
+
+            "movq       %%mm0, %%mm6    \n\t"
+            "movq       %%mm3, %%mm7    \n\t"
+
+            "movq         8%1, %%mm0    \n\t"
+            "movq         8%1, %%mm1    \n\t"
+            "movq         8%1, %%mm2    \n\t"
+            "pand          %2, %%mm0    \n\t"
+            "pand          %3, %%mm1    \n\t"
+            "pand          %4, %%mm2    \n\t"
+            "psllq         $3, %%mm0    \n\t"
+            "psrlq         $2, %%mm1    \n\t"
+            "psrlq         $7, %%mm2    \n\t"
+            "movq       %%mm0, %%mm3    \n\t"
+            "movq       %%mm1, %%mm4    \n\t"
+            "movq       %%mm2, %%mm5    \n\t"
+            "punpcklwd     %5, %%mm0    \n\t"
+            "punpcklwd     %5, %%mm1    \n\t"
+            "punpcklwd     %5, %%mm2    \n\t"
+            "punpckhwd     %5, %%mm3    \n\t"
+            "punpckhwd     %5, %%mm4    \n\t"
+            "punpckhwd     %5, %%mm5    \n\t"
+            "psllq         $8, %%mm1    \n\t"
+            "psllq        $16, %%mm2    \n\t"
+            "por        %%mm1, %%mm0    \n\t"
+            "por        %%mm2, %%mm0    \n\t"
+            "psllq         $8, %%mm4    \n\t"
+            "psllq        $16, %%mm5    \n\t"
+            "por        %%mm4, %%mm3    \n\t"
+            "por        %%mm5, %%mm3    \n\t"
+
+            :"=m"(*d)
+            :"m"(*s),"m"(mask12b),"m"(mask12g),"m"(mask12r), "m"(mmx_null)
+            :"memory");
+        /* Borrowed 32 to 24 */
+        __asm__ volatile(
+            "movq       %%mm0, %%mm4    \n\t"
+            "movq       %%mm3, %%mm5    \n\t"
+            "movq       %%mm6, %%mm0    \n\t"
+            "movq       %%mm7, %%mm1    \n\t"
+
+            "movq       %%mm4, %%mm6    \n\t"
+            "movq       %%mm5, %%mm7    \n\t"
+            "movq       %%mm0, %%mm2    \n\t"
+            "movq       %%mm1, %%mm3    \n\t"
+
+            "psrlq         $8, %%mm2    \n\t"
+            "psrlq         $8, %%mm3    \n\t"
+            "psrlq         $8, %%mm6    \n\t"
+            "psrlq         $8, %%mm7    \n\t"
+            "pand          %2, %%mm0    \n\t"
+            "pand          %2, %%mm1    \n\t"
+            "pand          %2, %%mm4    \n\t"
+            "pand          %2, %%mm5    \n\t"
+            "pand          %3, %%mm2    \n\t"
+            "pand          %3, %%mm3    \n\t"
+            "pand          %3, %%mm6    \n\t"
+            "pand          %3, %%mm7    \n\t"
+            "por        %%mm2, %%mm0    \n\t"
+            "por        %%mm3, %%mm1    \n\t"
+            "por        %%mm6, %%mm4    \n\t"
+            "por        %%mm7, %%mm5    \n\t"
+
+            "movq       %%mm1, %%mm2    \n\t"
+            "movq       %%mm4, %%mm3    \n\t"
+            "psllq        $48, %%mm2    \n\t"
+            "psllq        $32, %%mm3    \n\t"
+            "pand          %4, %%mm2    \n\t"
+            "pand          %5, %%mm3    \n\t"
+            "por        %%mm2, %%mm0    \n\t"
+            "psrlq        $16, %%mm1    \n\t"
+            "psrlq        $32, %%mm4    \n\t"
+            "psllq        $16, %%mm5    \n\t"
+            "por        %%mm3, %%mm1    \n\t"
+            "pand          %6, %%mm5    \n\t"
+            "por        %%mm5, %%mm4    \n\t"
+
+            MOVNTQ"     %%mm0,   %0     \n\t"
+            MOVNTQ"     %%mm1,  8%0     \n\t"
+            MOVNTQ"     %%mm4, 16%0"
+
+            :"=m"(*d)
+            :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
+            :"memory");
+        d += 24;
+        s += 8;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    while (s < end) {
+        register uint16_t bgr;
+        bgr = *s++;
+        *d++ = (bgr&0x0F)<<4;
+        *d++ = bgr&0x0F0;
+        *d++ = (bgr&0x0F00)>>4;
+    }
+}
+
 static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
 {
     const uint16_t *end;
@@ -1185,6 +1804,82 @@ static inline void RENAME(rgb16tobgr24)(
     MOVNTQ"     %%mm0,  %0      \n\t"                               \
     MOVNTQ"     %%mm3, 8%0      \n\t"                               \
 
+static inline void RENAME(rgb12to32)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint16_t *end;
+#if HAVE_MMX
+    const uint16_t *mm_end;
+#endif
+    uint8_t *d = (uint8_t *)dst;
+    const uint16_t *s = (const uint16_t *)src;
+    end = s + (src_size >> 1);
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
+    __asm__ volatile("pxor    %%mm7,%%mm7    \n\t":::"memory");
+    mm_end = end - 3;
+    while (s < mm_end) {
+        __asm__ volatile(
+            PREFETCH"    32%1           \n\t"
+            "movq          %1, %%mm0    \n\t"
+            "movq          %1, %%mm1    \n\t"
+            "movq          %1, %%mm2    \n\t"
+            "pand          %2, %%mm0    \n\t"
+            "pand          %3, %%mm1    \n\t"
+            "pand          %4, %%mm2    \n\t"
+            "psllq         $3, %%mm0    \n\t"
+            "psrlq         $2, %%mm1    \n\t"
+            "psrlq         $7, %%mm2    \n\t"
+            "movq       %%mm0, %%mm3    \n\t"
+            "movq       %%mm1, %%mm4    \n\t"
+            "movq       %%mm2, %%mm5    \n\t"
+            "punpcklwd  %%mm7, %%mm0    \n\t"
+            "punpcklwd  %%mm7, %%mm1    \n\t"
+            "punpcklwd  %%mm7, %%mm2    \n\t"
+            "punpckhwd  %%mm7, %%mm3    \n\t"
+            "punpckhwd  %%mm7, %%mm4    \n\t"
+            "punpckhwd  %%mm7, %%mm5    \n\t"
+            "psllq         $8, %%mm1    \n\t"
+            "psllq        $16, %%mm2    \n\t"
+            "por        %%mm1, %%mm0    \n\t"
+            "por        %%mm2, %%mm0    \n\t"
+            "psllq         $8, %%mm4    \n\t"
+            "psllq        $16, %%mm5    \n\t"
+            "por        %%mm4, %%mm3    \n\t"
+            "por        %%mm5, %%mm3    \n\t"
+            MOVNTQ"     %%mm0,  %0      \n\t"
+            MOVNTQ"     %%mm3, 8%0      \n\t"
+            :"=m"(*d)
+            :"m"(*s),"m"(mask12b),"m"(mask12g),"m"(mask12r)
+            :"memory");
+        d += 16;
+        s += 4;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    while (s < end) {
+#if 0 //slightly slower on athlon
+        int bgr= *s++;
+        *((uint32_t*)d)++ = ((bgr&0x1F)<<3) + ((bgr&0x3E0)<<6) + ((bgr&0x7C00)<<9);
+#else
+        register uint16_t bgr;
+        bgr = *s++;
+#if HAVE_BIGENDIAN
+        *d++ = 0;
+        *d++ = (bgr&0x0F00)>>4;
+        *d++ = bgr&0x0F0;
+        *d++ = (bgr&0x0F)<<4;
+#else
+        *d++ = (bgr&0x0F)<<4;
+        *d++ = bgr&0x0F0;
+        *d++ = (bgr&0x0F00)>>4;
+        *d++ = 0;
+#endif
+
+#endif
+    }
+}
+
 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
 {
     const uint16_t *end;
@@ -2973,24 +3668,34 @@ static void RENAME(uyvytoyuv422)(uint8_t
 
 static inline void RENAME(rgb2rgb_init)(void)
 {
+    rgb12to15       = RENAME(rgb12to15);
+    rgb12to16       = RENAME(rgb12to16);
+    rgb12tobgr24    = RENAME(rgb12tobgr24);
+    rgb12to32       = RENAME(rgb12to32);
+    rgb15to12       = RENAME(rgb15to12);
     rgb15to16       = RENAME(rgb15to16);
     rgb15tobgr24    = RENAME(rgb15tobgr24);
     rgb15to32       = RENAME(rgb15to32);
     rgb16tobgr24    = RENAME(rgb16tobgr24);
     rgb16to32       = RENAME(rgb16to32);
     rgb16to15       = RENAME(rgb16to15);
+    rgb16to12       = RENAME(rgb16to12);
     rgb24tobgr16    = RENAME(rgb24tobgr16);
     rgb24tobgr15    = RENAME(rgb24tobgr15);
+    rgb24tobgr12    = RENAME(rgb24tobgr12);
     rgb24tobgr32    = RENAME(rgb24tobgr32);
     rgb32to16       = RENAME(rgb32to16);
     rgb32to15       = RENAME(rgb32to15);
+    rgb32to12       = RENAME(rgb32to12);
     rgb32tobgr24    = RENAME(rgb32tobgr24);
+    rgb24to12       = RENAME(rgb24to12);
     rgb24to15       = RENAME(rgb24to15);
     rgb24to16       = RENAME(rgb24to16);
     rgb24tobgr24    = RENAME(rgb24tobgr24);
     rgb32tobgr32    = RENAME(rgb32tobgr32);
     rgb32tobgr16    = RENAME(rgb32tobgr16);
     rgb32tobgr15    = RENAME(rgb32tobgr15);
+    rgb32tobgr12    = RENAME(rgb32tobgr12);
     yv12toyuy2      = RENAME(yv12toyuy2);
     yv12touyvy      = RENAME(yv12touyvy);
     yuv422ptoyuy2   = RENAME(yuv422ptoyuy2);
diff -upr trunk/libswscale/swscale.c trunk.rgb12/libswscale/swscale.c
--- trunk/libswscale/swscale.c	2010-02-24 15:55:47.000000000 +0100
+++ trunk.rgb12/libswscale/swscale.c	2010-02-25 11:30:58.000000000 +0100
@@ -22,12 +22,12 @@
  */
 
 /*
-  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
-  supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
-  {BGR,RGB}{1,4,8,15,16} support dithering
+  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, BGR12, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, 
PAL8
+  supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,12,15,16,24,32}, Y8/Y800, YVU9/IF09
+  {BGR,RGB}{1,4,8,12,15,16} support dithering
 
   unscaled special converters (YV12=I420=IYUV, Y800=Y8)
-  YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
+  YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
   x -> x
   YUV9 -> YV12
   YUV9/YV12 -> Y800
@@ -149,12 +149,19 @@ const DECLARE_ALIGNED(8, uint64_t, ff_di
         0x0602060206020602LL,
         0x0004000400040004LL,};
 
+const DECLARE_ALIGNED(8, uint64_t, ff_dither16)[2] = {
+        0x0602060206020602LL,
+        0x0004000400040004LL,};         /* FIXME */
+
 DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
 DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
 DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
 DECLARE_ASM_CONST(8, uint64_t, b15Mask)=   0x001F001F001F001FLL;
 DECLARE_ASM_CONST(8, uint64_t, g15Mask)=   0x03E003E003E003E0LL;
 DECLARE_ASM_CONST(8, uint64_t, r15Mask)=   0x7C007C007C007C00LL;
+DECLARE_ASM_CONST(8, uint64_t, b12Mask)=   0x000F000F000F000FLL;
+DECLARE_ASM_CONST(8, uint64_t, g12Mask)=   0x00F000F000F000F0LL;
+DECLARE_ASM_CONST(8, uint64_t, r12Mask)=   0x0F000F000F000F00LL;
 
 DECLARE_ALIGNED(8, const uint64_t, ff_M24A)         = 0x00FF0000FF0000FFLL;
 DECLARE_ALIGNED(8, const uint64_t, ff_M24B)         = 0xFF0000FF0000FF00LL;
@@ -198,6 +205,11 @@ DECLARE_ALIGNED(8, static const uint8_t,
 {  0,   4,   0,   4,   0,   4,   0,   4, },
 };
 
+DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_16)[2][8]={
+{  6,   2,   6,   2,   6,   2,   6,   2, },
+{  0,   4,   0,   4,   0,   4,   0,   4, },
+};                                           /* FIXME */
+
 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
 { 17,   9,  23,  15,  16,   8,  22,  14, },
 {  5,  29,   3,  27,   4,  28,   2,  26, },
@@ -795,6 +807,21 @@ static inline void yuv2nv12XinC(const in
             }\
         }\
         break;\
+    case PIX_FMT_RGB444:\
+    case PIX_FMT_BGR444:\
+        {\
+            const int dr1= dither_2x2_16[y&1    ][0];\
+            const int dg1= dither_2x2_16[y&1    ][1];\
+            const int db1= dither_2x2_16[(y&1)^1][0];\
+            const int dr2= dither_2x2_16[y&1    ][1];\
+            const int dg2= dither_2x2_16[y&1    ][0];\
+            const int db2= dither_2x2_16[(y&1)^1][1];\
+            func(uint16_t,0)\
+                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
+                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
+            }\
+        }\
+        break;\
     case PIX_FMT_RGB8:\
     case PIX_FMT_BGR8:\
         {\
@@ -1032,8 +1059,10 @@ BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00F
 BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY   , BY<< 8, RGB2YUV_SHIFT+8)
 BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY    , RGB2YUV_SHIFT+8)
 BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY    , RGB2YUV_SHIFT+7)
+BGR2Y(uint16_t, bgr12ToY, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, RY<< 8, GY<<4, BY    , RGB2YUV_SHIFT+7)
 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY    , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY    , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
+BGR2Y(uint16_t, rgb12ToY, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, RY    , GY<<4, BY<< 8, RGB2YUV_SHIFT+7)
 
 static inline void abgrToA(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
 {
@@ -1078,8 +1107,10 @@ BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0xF
 BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0xFF000000,   0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
 BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0,          0,   0x001F, 0x07E0,   0xF800, RU<<11, GU<<5, BU    , RV<<11, GV<<5, BV    , RGB2YUV_SHIFT+8)
 BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0,          0,   0x001F, 0x03E0,   0x7C00, RU<<10, GU<<5, BU    , RV<<10, GV<<5, BV    , RGB2YUV_SHIFT+7)
+BGR2UV(uint16_t, bgr12ToUV, 0, 0, 0,          0,   0x000F, 0x00F0,   0x0F00, RU<< 8, GU<<4, BU    , RV<< 8, GV<<2, BV    , RGB2YUV_SHIFT+7)
 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0,          0,   0xF800, 0x07E0,   0x001F, RU    , GU<<5, BU<<11, RV    , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0,          0,   0x7C00, 0x03E0,   0x001F, RU    , GU<<5, BU<<10, RV    , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
+BGR2UV(uint16_t, rgb12ToUV, 0, 0, 0,          0,   0x0F00, 0x00F0,   0x000F, RU    , GU<<4, BU<< 8, RV    , GV<<2, BV<< 8, RGB2YUV_SHIFT+7)
 
 static inline void palToY(uint8_t *dst, const uint8_t *src, long width, uint32_t *pal)
 {
@@ -1438,8 +1469,9 @@ static int rgbToRgbWrapper(SwsContext *c
     const enum PixelFormat dstFormat= c->dstFormat;
     const int srcBpp= (c->srcFormatBpp + 7) >> 3;
     const int dstBpp= (c->dstFormatBpp + 7) >> 3;
-    const int srcId= c->srcFormatBpp >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
-    const int dstId= c->dstFormatBpp >> 2;
+                                           /* 1:0, 4:1, 8:3, 12:5, 15:6, 16:7, 24:B, 32:F */
+    const int srcId= ((((c->srcFormatBpp&0x1C)>>1)|(c->srcFormatBpp&0x01))-1)&0x0F;
+    const int dstId= ((((c->dstFormatBpp&0x1C)>>1)|(c->dstFormatBpp&0x01))-1)&0x0F;
     void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
 
 #define CONV_IS(src, dst) (srcFormat == PIX_FMT_##src && dstFormat == PIX_FMT_##dst)
@@ -1462,37 +1494,54 @@ static int rgbToRgbWrapper(SwsContext *c
     if (  (isBGRinInt(srcFormat) && isBGRinInt(dstFormat))
        || (isRGBinInt(srcFormat) && isRGBinInt(dstFormat))) {
         switch(srcId | (dstId<<4)) {
-        case 0x34: conv= rgb16to15; break;
-        case 0x36: conv= rgb24to15; break;
-        case 0x38: conv= rgb32to15; break;
-        case 0x43: conv= rgb15to16; break;
-        case 0x46: conv= rgb24to16; break;
-        case 0x48: conv= rgb32to16; break;
-        case 0x63: conv= rgb15to24; break;
-        case 0x64: conv= rgb16to24; break;
-        case 0x68: conv= rgb32to24; break;
-        case 0x83: conv= rgb15to32; break;
-        case 0x84: conv= rgb16to32; break;
-        case 0x86: conv= rgb24to32; break;
+        case 0x56: conv= rgb15to12; break;
+        case 0x57: conv= rgb16to12; break;
+        case 0x5B: conv= rgb24to12; break;
+        case 0x5F: conv= rgb32to12; break;
+        case 0x65: conv= rgb12to15; break;
+        case 0x67: conv= rgb16to15; break;
+        case 0x6B: conv= rgb24to15; break;
+        case 0x6F: conv= rgb32to15; break;
+        case 0x75: conv= rgb12to16; break;
+        case 0x76: conv= rgb15to16; break;
+        case 0x7B: conv= rgb24to16; break;
+        case 0x7F: conv= rgb32to16; break;
+        case 0xB5: conv= rgb12to24; break;
+        case 0xB6: conv= rgb15to24; break;
+        case 0xB7: conv= rgb16to24; break;
+        case 0xBF: conv= rgb32to24; break;
+        case 0xF5: conv= rgb12to32; break;
+        case 0xF6: conv= rgb15to32; break;
+        case 0xF7: conv= rgb16to32; break;
+        case 0xFB: conv= rgb24to32; break;
         }
     } else if (  (isBGRinInt(srcFormat) && isRGBinInt(dstFormat))
              || (isRGBinInt(srcFormat) && isBGRinInt(dstFormat))) {
         switch(srcId | (dstId<<4)) {
-        case 0x33: conv= rgb15tobgr15; break;
-        case 0x34: conv= rgb16tobgr15; break;
-        case 0x36: conv= rgb24tobgr15; break;
-        case 0x38: conv= rgb32tobgr15; break;
-        case 0x43: conv= rgb15tobgr16; break;
-        case 0x44: conv= rgb16tobgr16; break;
-        case 0x46: conv= rgb24tobgr16; break;
-        case 0x48: conv= rgb32tobgr16; break;
-        case 0x63: conv= rgb15tobgr24; break;
-        case 0x64: conv= rgb16tobgr24; break;
-        case 0x66: conv= rgb24tobgr24; break;
-        case 0x68: conv= rgb32tobgr24; break;
-        case 0x83: conv= rgb15tobgr32; break;
-        case 0x84: conv= rgb16tobgr32; break;
-        case 0x86: conv= rgb24tobgr32; break;
+        case 0x55: conv= rgb12tobgr12; break;
+        case 0x56: conv= rgb15tobgr12; break;
+        case 0x57: conv= rgb16tobgr12; break;
+        case 0x5B: conv= rgb24tobgr12; break;
+        case 0x5F: conv= rgb32tobgr12; break;
+        case 0x65: conv= rgb12tobgr15; break;
+        case 0x66: conv= rgb15tobgr15; break;
+        case 0x67: conv= rgb16tobgr15; break;
+        case 0x6B: conv= rgb24tobgr15; break;
+        case 0x6F: conv= rgb32tobgr15; break;
+        case 0x75: conv= rgb12tobgr16; break;
+        case 0x76: conv= rgb15tobgr16; break;
+        case 0x77: conv= rgb16tobgr16; break;
+        case 0x7B: conv= rgb24tobgr16; break;
+        case 0x7F: conv= rgb32tobgr16; break;
+        case 0xB5: conv= rgb12tobgr24; break;
+        case 0xB6: conv= rgb15tobgr24; break;
+        case 0xB7: conv= rgb16tobgr24; break;
+        case 0xBB: conv= rgb24tobgr24; break;
+        case 0xBF: conv= rgb32tobgr24; break;
+        case 0xF5: conv= rgb12tobgr32; break;
+        case 0xF6: conv= rgb15tobgr32; break;
+        case 0xF7: conv= rgb16tobgr32; break;
+        case 0xFB: conv= rgb24tobgr32; break;
         }
     }
 
diff -upr trunk/libswscale/swscale_internal.h trunk.rgb12/libswscale/swscale_internal.h
--- trunk/libswscale/swscale_internal.h	2010-02-24 15:55:47.000000000 +0100
+++ trunk.rgb12/libswscale/swscale_internal.h	2010-02-25 02:10:33.000000000 +0100
@@ -393,6 +393,7 @@ const char *sws_format_name(enum PixelFo
         || (x)==PIX_FMT_RGB24       \
         || (x)==PIX_FMT_RGB565      \
         || (x)==PIX_FMT_RGB555      \
+        || (x)==PIX_FMT_RGB444      \
         || (x)==PIX_FMT_RGB8        \
         || (x)==PIX_FMT_RGB4        \
         || (x)==PIX_FMT_RGB4_BYTE   \
@@ -405,6 +406,7 @@ const char *sws_format_name(enum PixelFo
         || (x)==PIX_FMT_BGR24       \
         || (x)==PIX_FMT_BGR565      \
         || (x)==PIX_FMT_BGR555      \
+        || (x)==PIX_FMT_BGR444      \
         || (x)==PIX_FMT_BGR8        \
         || (x)==PIX_FMT_BGR4        \
         || (x)==PIX_FMT_BGR4_BYTE   \
@@ -437,6 +439,7 @@ const char *sws_format_name(enum PixelFo
 
 extern const uint64_t ff_dither4[2];
 extern const uint64_t ff_dither8[2];
+extern const uint64_t ff_dither16[2];
 
 extern const AVClass sws_context_class;
 
diff -upr trunk/libswscale/swscale_template.c trunk.rgb12/libswscale/swscale_template.c
--- trunk/libswscale/swscale_template.c	2010-02-24 15:55:47.000000000 +0100
+++ trunk.rgb12/libswscale/swscale_template.c	2010-02-25 03:48:46.000000000 +0100
@@ -714,6 +714,32 @@
     " jb             1b             \n\t"
 #define WRITERGB15(dst, dstw, index)  REAL_WRITERGB15(dst, dstw, index)
 
+#define REAL_WRITERGB12(dst, dstw, index) \
+    "pand "MANGLE(bF0)", %%mm2  \n\t" /* B */\
+    "pand "MANGLE(bF0)", %%mm4  \n\t" /* G */\
+    "pand "MANGLE(bF0)", %%mm5  \n\t" /* R */\
+    "psrlq           $4, %%mm2  \n\t"\
+    "psrlq           $4, %%mm5  \n\t"\
+\
+    "movq         %%mm2, %%mm1  \n\t"\
+    "movq         %%mm4, %%mm3  \n\t"\
+\
+    "punpcklbw    %%mm7, %%mm3  \n\t"\
+    "punpcklbw    %%mm5, %%mm2  \n\t"\
+    "punpckhbw    %%mm7, %%mm4  \n\t"\
+    "punpckhbw    %%mm5, %%mm1  \n\t"\
+\
+    "por          %%mm3, %%mm2  \n\t"\
+    "por          %%mm4, %%mm1  \n\t"\
+\
+    MOVNTQ(%%mm2,  (dst, index, 2))\
+    MOVNTQ(%%mm1, 8(dst, index, 2))\
+\
+    "add             $8, "#index"   \n\t"\
+    "cmp        "#dstw", "#index"   \n\t"\
+    " jb             1b             \n\t"
+#define WRITERGB12(dst, dstw, index)  REAL_WRITERGB12(dst, dstw, index)
+
 #define WRITEBGR24OLD(dst, dstw, index) \
     /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
     "movq      %%mm2, %%mm1             \n\t" /* B */\
@@ -1070,6 +1096,20 @@ static inline void RENAME(yuv2packedX)(S
                 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
                 );
                 return;
+            case PIX_FMT_RGB444:
+                YSCALEYUV2PACKEDX_ACCURATE
+                YSCALEYUV2RGBX
+                "pxor %%mm7, %%mm7 \n\t"
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
+                "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
+                "paddusb "RED_DITHER"(%0), %%mm5\n\t"
+#endif
+
+                WRITEBGR12(%4, %5, %%REGa)
+                YSCALEYUV2PACKEDX_END
+                return;
             case PIX_FMT_RGB555:
                 YSCALEYUV2PACKEDX_ACCURATE
                 YSCALEYUV2RGBX
@@ -1144,6 +1184,20 @@ static inline void RENAME(yuv2packedX)(S
                 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
                 );
                 return;
+            case PIX_FMT_RGB444:
+                YSCALEYUV2PACKEDX
+                YSCALEYUV2RGBX
+                "pxor %%mm7, %%mm7 \n\t"
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
+                "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
+                "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
+#endif
+
+                WRITEBGR12(%4, %5, %%REGa)
+                YSCALEYUV2PACKEDX_END
+                return;
             case PIX_FMT_RGB555:
                 YSCALEYUV2PACKEDX
                 YSCALEYUV2RGBX
@@ -1290,6 +1344,28 @@ static inline void RENAME(yuv2packed2)(S
                 "a" (&c->redDither)
             );
             return;
+        case PIX_FMT_RGB444:
+            __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB(%%REGBP, %5)
+                "pxor    %%mm7, %%mm7                   \n\t"
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+#endif
+
+                WRITERGB12(%%REGb, 8280(%5), %%REGBP)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                "a" (&c->redDither)
+            );
+            return;
         case PIX_FMT_RGB555:
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
@@ -1420,6 +1496,27 @@ static inline void RENAME(yuv2packed1)(S
                     "a" (&c->redDither)
                 );
                 return;
+            case PIX_FMT_RGB444:
+                __asm__ volatile(
+                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                    "mov        %4, %%"REG_b"               \n\t"
+                    "push %%"REG_BP"                        \n\t"
+                    YSCALEYUV2RGB1(%%REGBP, %5)
+                    "pxor    %%mm7, %%mm7                   \n\t"
+                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+#endif
+                    WRITERGB12(%%REGb, 8280(%5), %%REGBP)
+                    "pop %%"REG_BP"                         \n\t"
+                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                    "a" (&c->redDither)
+                );
+                return;
             case PIX_FMT_RGB555:
                 __asm__ volatile(
                     "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
@@ -1526,6 +1623,47 @@ static inline void RENAME(yuv2packed1)(S
                     "a" (&c->redDither)
                 );
                 return;
+            case PIX_FMT_RGB444:
+                __asm__ volatile(
+                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                    "mov        %4, %%"REG_b"               \n\t"
+                    "push %%"REG_BP"                        \n\t"
+                    YSCALEYUV2RGB1(%%REGBP, %5)
+                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+#endif
+                    WRITERGB12(%%REGb, 8280(%5), %%REGBP)
+                    "pop %%"REG_BP"                         \n\t"
+                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                    "a" (&c->redDither)
+                );
+                return;
+            case PIX_FMT_RGB444:
+                __asm__ volatile(
+                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                    "mov        %4, %%"REG_b"               \n\t"
+                    "push %%"REG_BP"                        \n\t"
+                    YSCALEYUV2RGB1b(%%REGBP, %5)
+                    "pxor    %%mm7, %%mm7                   \n\t"
+                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+#endif
+                    WRITERGB12(%%REGb, 8280(%5), %%REGBP)
+                    "pop %%"REG_BP"                         \n\t"
+                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                    "a" (&c->redDither)
+                );
+                return;
             case PIX_FMT_RGB555:
                 __asm__ volatile(
                     "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
@@ -2723,12 +2861,18 @@ static int RENAME(swScale)(SwsContext *c
             break; //we can't output a dstY line so let's try with the next slice
 
 #if COMPILE_TEMPLATE_MMX
-        c->blueDither= ff_dither8[dstY&1];
-        if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
-            c->greenDither= ff_dither8[dstY&1];
-        else
-            c->greenDither= ff_dither4[dstY&1];
-        c->redDither= ff_dither8[(dstY+1)&1];
+        if (c->dstFormat == PIX_FMT_RGB444 || c->dstFormat == PIX_FMT_BGR444) {
+            c->blueDither= ff_dither16[dstY&1];
+            c->greenDither= ff_dither16[dstY&1];
+            c->redDither= ff_dither16[(dstY+1)&1];
+        } else {
+            c->blueDither= ff_dither8[dstY&1];
+            if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
+                c->greenDither= ff_dither8[dstY&1];
+            else
+                c->greenDither= ff_dither4[dstY&1];
+            c->redDither= ff_dither8[(dstY+1)&1];
+        }
 #endif
         if (dstY < dstH-2) {
             const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
@@ -2968,11 +3112,13 @@ static void RENAME(sws_init_swScale)(Sws
         case PIX_FMT_BGR24  : c->chrToYV12 = RENAME(bgr24ToUV_half); break;
         case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half; break;
         case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half; break;
+        case PIX_FMT_BGR444 : c->chrToYV12 = bgr12ToUV_half; break;
         case PIX_FMT_BGR32  :
         case PIX_FMT_BGR32_1: c->chrToYV12 = rgb32ToUV_half; break;
         case PIX_FMT_RGB24  : c->chrToYV12 = RENAME(rgb24ToUV_half); break;
         case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half; break;
         case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half; break;
+        case PIX_FMT_RGB444 : c->chrToYV12 = rgb12ToUV_half; break;
         }
     } else {
         switch(srcFormat) {
@@ -2983,11 +3129,13 @@ static void RENAME(sws_init_swScale)(Sws
         case PIX_FMT_BGR24  : c->chrToYV12 = RENAME(bgr24ToUV); break;
         case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV; break;
         case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV; break;
+        case PIX_FMT_BGR444 : c->chrToYV12 = bgr12ToUV; break;
         case PIX_FMT_BGR32  :
         case PIX_FMT_BGR32_1: c->chrToYV12 = rgb32ToUV; break;
         case PIX_FMT_RGB24  : c->chrToYV12 = RENAME(rgb24ToUV); break;
         case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV; break;
         case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV; break;
+        case PIX_FMT_RGB444 : c->chrToYV12 = rgb12ToUV; break;
         }
     }
 
@@ -3007,9 +3155,11 @@ static void RENAME(sws_init_swScale)(Sws
     case PIX_FMT_BGR24    : c->lumToYV12 = RENAME(bgr24ToY); break;
     case PIX_FMT_BGR565   : c->lumToYV12 = bgr16ToY; break;
     case PIX_FMT_BGR555   : c->lumToYV12 = bgr15ToY; break;
+    case PIX_FMT_BGR444   : c->lumToYV12 = bgr12ToY; break;
     case PIX_FMT_RGB24    : c->lumToYV12 = RENAME(rgb24ToY); break;
     case PIX_FMT_RGB565   : c->lumToYV12 = rgb16ToY; break;
     case PIX_FMT_RGB555   : c->lumToYV12 = rgb15ToY; break;
+    case PIX_FMT_RGB444   : c->lumToYV12 = rgb12ToY; break;
     case PIX_FMT_RGB8     :
     case PIX_FMT_BGR8     :
     case PIX_FMT_PAL8     :
diff -upr trunk/libswscale/utils.c trunk.rgb12/libswscale/utils.c
--- trunk/libswscale/utils.c	2010-02-24 15:55:47.000000000 +0100
+++ trunk.rgb12/libswscale/utils.c	2010-02-25 04:11:05.000000000 +0100
@@ -77,11 +77,13 @@ const char *swscale_license(void)
         || (x)==PIX_FMT_BGR24       \
         || (x)==PIX_FMT_BGR565      \
         || (x)==PIX_FMT_BGR555      \
+        || (x)==PIX_FMT_BGR444      \
         || (x)==PIX_FMT_BGR32       \
         || (x)==PIX_FMT_BGR32_1     \
         || (x)==PIX_FMT_RGB24       \
         || (x)==PIX_FMT_RGB565      \
         || (x)==PIX_FMT_RGB555      \
+        || (x)==PIX_FMT_RGB444      \
         || (x)==PIX_FMT_GRAY8       \
         || (x)==PIX_FMT_YUV410P     \
         || (x)==PIX_FMT_YUV440P     \
@@ -1116,7 +1118,7 @@ SwsContext *sws_getContext(int srcW, int
         av_log(c, AV_LOG_INFO, "from %s to %s%s ",
                sws_format_name(srcFormat),
 #ifdef DITHER1XBPP
-               dstFormat == PIX_FMT_BGR555 || dstFormat == PIX_FMT_BGR565 ? "dithered " : "",
+               dstFormat == PIX_FMT_BGR444 || dstFormat == PIX_FMT_BGR555 || dstFormat == PIX_FMT_BGR565 ? "dithered " : "",
 #else
                "",
 #endif
@@ -1185,6 +1187,8 @@ SwsContext *sws_getContext(int srcW, int
             av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
         else if (dstFormat==PIX_FMT_BGR555)
             av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+        else if (dstFormat==PIX_FMT_BGR444)
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR12 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
 
         av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
         av_log(c, AV_LOG_DEBUG, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
diff -upr trunk/libswscale/x86/yuv2rgb_mmx.c trunk.rgb12/libswscale/x86/yuv2rgb_mmx.c
--- trunk/libswscale/x86/yuv2rgb_mmx.c	2010-02-24 15:55:45.000000000 +0100
+++ trunk.rgb12/libswscale/x86/yuv2rgb_mmx.c	2010-02-25 04:30:36.000000000 +0100
@@ -76,6 +76,7 @@ SwsFunc ff_yuv2rgb_init_mmx(SwsContext *
         case PIX_FMT_BGR24:  return yuv420_bgr24_MMX2;
         case PIX_FMT_RGB565: return yuv420_rgb16_MMX2;
         case PIX_FMT_RGB555: return yuv420_rgb15_MMX2;
+        case PIX_FMT_RGB444: return yuv420_rgb12_MMX2;
         }
     }
     if (c->flags & SWS_CPU_CAPS_MMX) {
@@ -94,6 +95,7 @@ SwsFunc ff_yuv2rgb_init_mmx(SwsContext *
         case PIX_FMT_BGR24:  return yuv420_bgr24_MMX;
         case PIX_FMT_RGB565: return yuv420_rgb16_MMX;
         case PIX_FMT_RGB555: return yuv420_rgb15_MMX;
+        case PIX_FMT_RGB444: return yuv420_rgb12_MMX;
         }
     }
 
diff -upr trunk/libswscale/x86/yuv2rgb_template.c trunk.rgb12/libswscale/x86/yuv2rgb_template.c
--- trunk/libswscale/x86/yuv2rgb_template.c	2010-02-24 15:55:45.000000000 +0100
+++ trunk.rgb12/libswscale/x86/yuv2rgb_template.c	2010-02-25 02:10:33.000000000 +0100
@@ -295,6 +295,63 @@ static inline int RENAME(yuv420_rgb15)(S
     YUV2RGB_OPERANDS
 }
 
+static inline int RENAME(yuv420_rgb12)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
+                                       int srcSliceH, uint8_t* dst[], int dstStride[])
+{
+    int y, h_size;
+
+    YUV422_UNSHIFT
+    YUV2RGB_LOOP(2)
+
+        c->blueDither= ff_dither16[y&1];
+        c->greenDither= ff_dither16[y&1];
+        c->redDither= ff_dither16[(y+1)&1];
+
+        YUV2RGB_INIT
+        YUV2RGB
+
+#ifdef DITHER1XBPP
+        "paddusb "BLUE_DITHER"(%4), %%mm0  \n\t"
+        "paddusb "GREEN_DITHER"(%4), %%mm2  \n\t"
+        "paddusb "RED_DITHER"(%4), %%mm1  \n\t"
+#endif
+
+        /* mask unneeded bits off */
+        "pand "MANGLE(mmx_f0f0w)", %%mm0;" /* b7b6b5b4 0_0_0_0 b7b6b5b4 0_0_0_0 */
+        "pand "MANGLE(mmx_f0f0w)", %%mm2;" /* g7g6g5g4 0_0_0_0 g7g6g5g4 0_0_0_0 */
+        "pand "MANGLE(mmx_f0f0w)", %%mm1;" /* r7r6r5r4 0_0_0_0 r7r6r5r4 0_0_0_0 */
+
+        "psrlw   $4, %%mm0;" /* 0_0_0_0 b7b6b5b4 0_0_0_0 b7b6b5b4 */
+        "psrlw   $4, %%mm1;" /* 0_0_0_0 r7r6r5r4 0_0_0_0 r7r6r5r4 */
+        "pxor %%mm4, %%mm4;" /* zero mm4 */
+
+        "movq %%mm0, %%mm5;" /* Copy B7-B0 */
+        "movq %%mm2, %%mm7;" /* Copy G7-G0 */
+
+        /* convert rgb24 plane to rgb12 pack for pixel 0-3 */
+        "punpcklbw %%mm4, %%mm2;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 0_0_0_0 */
+        "punpcklbw %%mm1, %%mm0;" /* 0_0_0_0 r7r6r5r4 0_0_0_0 b7b6b5b4 */
+
+        "por %%mm2, %%mm0;" /* 0_0_0_0 r7r6r5r4 g7g6g5g4 b7b6b5b4 */
+
+        "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
+        MOVNTQ "      %%mm0, (%1);"  /* store pixel 0-3 */
+
+        /* convert rgb24 plane to rgb12 pack for pixel 0-3 */
+        "punpckhbw %%mm4, %%mm7;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 0_0_0_0 */
+        "punpckhbw %%mm1, %%mm5;" /* 0_0_0_0 r7r6r5r4 0_0_0_0 b7b6b5b4 */
+
+        "movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
+
+        "por       %%mm7, %%mm5;" /* 0_0_0_0 r7r6r5r4 g7g6g5g4 b7b6b5b4 */
+        "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
+
+        MOVNTQ " %%mm5, 8 (%1);" /* store pixel 4-7 */
+
+    YUV2RGB_ENDLOOP(2)
+    YUV2RGB_OPERANDS
+}
+
 #undef RGB_PLANAR2PACKED24
 #if HAVE_MMX2
 #define RGB_PLANAR2PACKED24(red, blue)\
diff -upr trunk/libswscale/yuv2rgb.c trunk.rgb12/libswscale/yuv2rgb.c
--- trunk/libswscale/yuv2rgb.c	2010-02-24 15:55:47.000000000 +0100
+++ trunk.rgb12/libswscale/yuv2rgb.c	2010-02-25 04:21:14.000000000 +0100
@@ -551,7 +551,9 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsConte
     case PIX_FMT_RGB565:
     case PIX_FMT_BGR565:
     case PIX_FMT_RGB555:
-    case PIX_FMT_BGR555:     return yuv2rgb_c_16;
+    case PIX_FMT_BGR555:
+    case PIX_FMT_RGB444:
+    case PIX_FMT_BGR444:     return yuv2rgb_c_16;
     case PIX_FMT_RGB8:
     case PIX_FMT_BGR8:       return yuv2rgb_c_8_ordered_dither;
     case PIX_FMT_RGB4:
@@ -598,6 +600,7 @@ av_cold int ff_yuv2rgb_c_init_tables(Sws
                         || c->dstFormat==PIX_FMT_BGR24
                         || c->dstFormat==PIX_FMT_RGB565
                         || c->dstFormat==PIX_FMT_RGB555
+                        || c->dstFormat==PIX_FMT_RGB444
                         || c->dstFormat==PIX_FMT_RGB8
                         || c->dstFormat==PIX_FMT_RGB4
                         || c->dstFormat==PIX_FMT_RGB4_BYTE
@@ -694,6 +697,25 @@ av_cold int ff_yuv2rgb_c_init_tables(Sws
         fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
         fill_gv_table(c->table_gV, 1, cgv);
         break;
+    case 12:
+        rbase = isRgb ? 8 : 0;
+        gbase = 4;
+        bbase = isRgb ? 0 : 8;
+        c->yuvTable = av_malloc(1024*3*2);
+        y_table16 = c->yuvTable;
+        yb = -(384<<16) - oy;
+        for (i = 0; i < 1024; i++) {
+            uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
+            y_table16[i     ] = (yval >> 4)          << rbase;
+            y_table16[i+1024] = (yval >> 4) << gbase;
+            y_table16[i+2048] = (yval >> 4)          << bbase;
+            yb += cy;
+        }
+        fill_table(c->table_rV, 2, crv, y_table16 + yoffs);
+        fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024);
+        fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2048);
+        fill_gv_table(c->table_gV, 2, cgv);
+        break;
     case 15:
     case 16:
         rbase = isRgb ? bpp - 5 : 0;
diff -upr trunk/tests/lavf-regression.sh trunk.rgb12/tests/lavf-regression.sh
--- trunk/tests/lavf-regression.sh	2010-02-24 15:55:26.000000000 +0100
+++ trunk.rgb12/tests/lavf-regression.sh	2010-02-25 02:08:17.000000000 +0100
@@ -208,7 +208,7 @@ fi
 
 if [ -n "$do_pixfmt" ] ; then
 conversions="yuv420p yuv422p yuv444p yuyv422 yuv410p yuv411p yuvj420p \
-             yuvj422p yuvj444p rgb24 bgr24 rgb32 rgb565 rgb555 gray monow \
+             yuvj422p yuvj444p rgb24 bgr24 rgb32 rgb444 rgb565 rgb555 gray monow \
              monob yuv440p yuvj440p"
 for pix_fmt in $conversions ; do
     file=${outfile}lavf-${pix_fmt}.yuv



More information about the ffmpeg-devel mailing list