FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
colorspacedsp_template.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
22 
23 #undef avg
24 #undef ss
25 
26 #if SS_W == 0
27 #define ss 444
28 #define avg(a,b,c,d) (a)
29 #elif SS_H == 0
30 #define ss 422
31 #define avg(a,b,c,d) (((a) + (b) + 1) >> 1)
32 #else
33 #define ss 420
34 #define avg(a,b,c,d) (((a) + (b) + (c) + (d) + 2) >> 2)
35 #endif
36 
37 #undef fn
38 #undef fn2
39 #undef fn3
40 #define fn3(a,b,c) a##_##c##p##b##_c
41 #define fn2(a,b,c) fn3(a,b,c)
42 #define fn(a) fn2(a, BIT_DEPTH, ss)
43 
44 #undef pixel
45 #undef av_clip_pixel
46 #if BIT_DEPTH == 8
47 #define pixel uint8_t
48 #define av_clip_pixel(x) av_clip_uint8(x)
49 #else
50 #define pixel uint16_t
51 #define av_clip_pixel(x) av_clip_uintp2(x, BIT_DEPTH)
52 #endif
53 
54 static void fn(yuv2rgb)(int16_t *rgb[3], ptrdiff_t rgb_stride,
55  uint8_t *_yuv[3], const ptrdiff_t yuv_stride[3],
56  int w, int h, const int16_t yuv2rgb_coeffs[3][3][8],
57  const int16_t yuv_offset[8])
58 {
59  pixel **yuv = (pixel **) _yuv;
60  const pixel *yuv0 = yuv[0], *yuv1 = yuv[1], *yuv2 = yuv[2];
61  int16_t *rgb0 = rgb[0], *rgb1 = rgb[1], *rgb2 = rgb[2];
62  int y, x;
63  int cy = yuv2rgb_coeffs[0][0][0];
64  int crv = yuv2rgb_coeffs[0][2][0];
65  int cgu = yuv2rgb_coeffs[1][1][0];
66  int cgv = yuv2rgb_coeffs[1][2][0];
67  int cbu = yuv2rgb_coeffs[2][1][0];
68  const int sh = BIT_DEPTH - 1, rnd = 1 << (sh - 1);
69  const int uv_offset = 128 << (BIT_DEPTH - 8);
70 
71  av_assert2(yuv2rgb_coeffs[0][1][0] == 0);
72  av_assert2(yuv2rgb_coeffs[2][2][0] == 0);
73  av_assert2(yuv2rgb_coeffs[1][0][0] == cy && yuv2rgb_coeffs[2][0][0] == cy);
74 
75  w = AV_CEIL_RSHIFT(w, SS_W);
76  h = AV_CEIL_RSHIFT(h, SS_H);
77  for (y = 0; y < h; y++) {
78  for (x = 0; x < w; x++) {
79  int y00 = yuv0[x << SS_W] - yuv_offset[0];
80 #if SS_W == 1
81  int y01 = yuv0[2 * x + 1] - yuv_offset[0];
82 #if SS_H == 1
83  int y10 = yuv0[yuv_stride[0] / sizeof(pixel) + 2 * x] - yuv_offset[0];
84  int y11 = yuv0[yuv_stride[0] / sizeof(pixel) + 2 * x + 1] - yuv_offset[0];
85 #endif
86 #endif
87  int u = yuv1[x] - uv_offset, v = yuv2[x] - uv_offset;
88 
89  rgb0[x << SS_W] = av_clip_int16((y00 * cy + crv * v + rnd) >> sh);
90 #if SS_W == 1
91  rgb0[2 * x + 1] = av_clip_int16((y01 * cy + crv * v + rnd) >> sh);
92 #if SS_H == 1
93  rgb0[2 * x + rgb_stride] = av_clip_int16((y10 * cy + crv * v + rnd) >> sh);
94  rgb0[2 * x + rgb_stride + 1] = av_clip_int16((y11 * cy + crv * v + rnd) >> sh);
95 #endif
96 #endif
97 
98  rgb1[x << SS_W] = av_clip_int16((y00 * cy + cgu * u +
99  cgv * v + rnd) >> sh);
100 #if SS_W == 1
101  rgb1[2 * x + 1] = av_clip_int16((y01 * cy + cgu * u +
102  cgv * v + rnd) >> sh);
103 #if SS_H == 1
104  rgb1[2 * x + rgb_stride] = av_clip_int16((y10 * cy + cgu * u +
105  cgv * v + rnd) >> sh);
106  rgb1[2 * x + rgb_stride + 1] = av_clip_int16((y11 * cy + cgu * u +
107  cgv * v + rnd) >> sh);
108 #endif
109 #endif
110 
111  rgb2[x << SS_W] = av_clip_int16((y00 * cy + cbu * u + rnd) >> sh);
112 #if SS_W == 1
113  rgb2[2 * x + 1] = av_clip_int16((y01 * cy + cbu * u + rnd) >> sh);
114 #if SS_H == 1
115  rgb2[2 * x + rgb_stride] = av_clip_int16((y10 * cy + cbu * u + rnd) >> sh);
116  rgb2[2 * x + rgb_stride + 1] = av_clip_int16((y11 * cy + cbu * u + rnd) >> sh);
117 #endif
118 #endif
119  }
120 
121  yuv0 += (yuv_stride[0] * (1 << SS_H)) / sizeof(pixel);
122  yuv1 += yuv_stride[1] / sizeof(pixel);
123  yuv2 += yuv_stride[2] / sizeof(pixel);
124  rgb0 += rgb_stride * (1 << SS_H);
125  rgb1 += rgb_stride * (1 << SS_H);
126  rgb2 += rgb_stride * (1 << SS_H);
127  }
128 }
129 
130 static void fn(rgb2yuv)(uint8_t *_yuv[3], const ptrdiff_t yuv_stride[3],
131  int16_t *rgb[3], ptrdiff_t s,
132  int w, int h, const int16_t rgb2yuv_coeffs[3][3][8],
133  const int16_t yuv_offset[8])
134 {
135  pixel **yuv = (pixel **) _yuv;
136  pixel *yuv0 = yuv[0], *yuv1 = yuv[1], *yuv2 = yuv[2];
137  const int16_t *rgb0 = rgb[0], *rgb1 = rgb[1], *rgb2 = rgb[2];
138  int y, x;
139  const int sh = 29 - BIT_DEPTH;
140  const int rnd = 1 << (sh - 1);
141  int cry = rgb2yuv_coeffs[0][0][0];
142  int cgy = rgb2yuv_coeffs[0][1][0];
143  int cby = rgb2yuv_coeffs[0][2][0];
144  int cru = rgb2yuv_coeffs[1][0][0];
145  int cgu = rgb2yuv_coeffs[1][1][0];
146  int cburv = rgb2yuv_coeffs[1][2][0];
147  int cgv = rgb2yuv_coeffs[2][1][0];
148  int cbv = rgb2yuv_coeffs[2][2][0];
149  ptrdiff_t s0 = yuv_stride[0] / sizeof(pixel);
150  const int uv_offset = 128 << (BIT_DEPTH - 8);
151 
152  av_assert2(rgb2yuv_coeffs[1][2][0] == rgb2yuv_coeffs[2][0][0]);
153  w = AV_CEIL_RSHIFT(w, SS_W);
154  h = AV_CEIL_RSHIFT(h, SS_H);
155  for (y = 0; y < h; y++) {
156  for (x = 0; x < w; x++) {
157  int r00 = rgb0[x << SS_W], g00 = rgb1[x << SS_W], b00 = rgb2[x << SS_W];
158 #if SS_W == 1
159  int r01 = rgb0[x * 2 + 1], g01 = rgb1[x * 2 + 1], b01 = rgb2[x * 2 + 1];
160 #if SS_H == 1
161  int r10 = rgb0[x * 2 + 0 + s], g10 = rgb1[x * 2 + 0 + s], b10 = rgb2[x * 2 + 0 + s];
162  int r11 = rgb0[x * 2 + 1 + s], g11 = rgb1[x * 2 + 1 + s], b11 = rgb2[x * 2 + 1 + s];
163 #endif
164 #endif
165 
166  yuv0[x << SS_W] = av_clip_pixel(yuv_offset[0] +
167  ((r00 * cry + g00 * cgy +
168  b00 * cby + rnd) >> sh));
169 #if SS_W == 1
170  yuv0[x * 2 + 1] = av_clip_pixel(yuv_offset[0] +
171  ((r01 * cry + g01 * cgy +
172  b01 * cby + rnd) >> sh));
173 #if SS_H == 1
174  yuv0[x * 2 + 0 + s0] = av_clip_pixel(yuv_offset[0] +
175  ((r10 * cry + g10 * cgy +
176  b10 * cby + rnd) >> sh));
177  yuv0[x * 2 + 1 + s0] = av_clip_pixel(yuv_offset[0] +
178  ((r11 * cry + g11 * cgy +
179  b11 * cby + rnd) >> sh));
180 #endif
181 #endif
182 
183  yuv1[x] = av_clip_pixel(uv_offset +
184  ((avg(r00, r01, r10, r11) * cru +
185  avg(g00, g01, g10, g11) * cgu +
186  avg(b00, b01, b10, b11) * cburv + rnd) >> sh));
187  yuv2[x] = av_clip_pixel(uv_offset +
188  ((avg(r00, r01, r10, r11) * cburv +
189  avg(g00, g01, g10, g11) * cgv +
190  avg(b00, b01, b10, b11) * cbv + rnd) >> sh));
191  }
192 
193  yuv0 += s0 * (1 << SS_H);
194  yuv1 += yuv_stride[1] / sizeof(pixel);
195  yuv2 += yuv_stride[2] / sizeof(pixel);
196  rgb0 += s * (1 << SS_H);
197  rgb1 += s * (1 << SS_H);
198  rgb2 += s * (1 << SS_H);
199  }
200 }
201 
202 /* floyd-steinberg dithering - for any mid-top pixel A in a 3x2 block of pixels:
203  * 1 A 2
204  * 3 4 5
205  * the rounding error is distributed over the neighbouring pixels:
206  * 2: 7/16th, 3: 3/16th, 4: 5/16th and 5: 1/16th
207  */
208 static void fn(rgb2yuv_fsb)(uint8_t *_yuv[3], const ptrdiff_t yuv_stride[3],
209  int16_t *rgb[3], ptrdiff_t s,
210  int w, int h, const int16_t rgb2yuv_coeffs[3][3][8],
211  const int16_t yuv_offset[8],
212  int *rnd_scratch[3][2])
213 {
214  pixel **yuv = (pixel **) _yuv;
215  pixel *yuv0 = yuv[0], *yuv1 = yuv[1], *yuv2 = yuv[2];
216  const int16_t *rgb0 = rgb[0], *rgb1 = rgb[1], *rgb2 = rgb[2];
217  int y, x;
218  const int sh = 29 - BIT_DEPTH;
219  const int rnd = 1 << (sh - 1);
220  int cry = rgb2yuv_coeffs[0][0][0];
221  int cgy = rgb2yuv_coeffs[0][1][0];
222  int cby = rgb2yuv_coeffs[0][2][0];
223  int cru = rgb2yuv_coeffs[1][0][0];
224  int cgu = rgb2yuv_coeffs[1][1][0];
225  int cburv = rgb2yuv_coeffs[1][2][0];
226  int cgv = rgb2yuv_coeffs[2][1][0];
227  int cbv = rgb2yuv_coeffs[2][2][0];
228  ptrdiff_t s0 = yuv_stride[0] / sizeof(pixel);
229  const int uv_offset = 128 << (BIT_DEPTH - 8);
230  unsigned mask = (1 << sh) - 1;
231 
232  for (x = 0; x < w; x++) {
233  rnd_scratch[0][0][x] =
234  rnd_scratch[0][1][x] = rnd;
235  }
236  av_assert2(rgb2yuv_coeffs[1][2][0] == rgb2yuv_coeffs[2][0][0]);
237  w = AV_CEIL_RSHIFT(w, SS_W);
238  h = AV_CEIL_RSHIFT(h, SS_H);
239  for (x = 0; x < w; x++) {
240  rnd_scratch[1][0][x] =
241  rnd_scratch[1][1][x] =
242  rnd_scratch[2][0][x] =
243  rnd_scratch[2][1][x] = rnd;
244  }
245  for (y = 0; y < h; y++) {
246  for (x = 0; x < w; x++) {
247  int r00 = rgb0[x << SS_W], g00 = rgb1[x << SS_W], b00 = rgb2[x << SS_W];
248  int y00;
249 #if SS_W == 1
250  int r01 = rgb0[x * 2 + 1], g01 = rgb1[x * 2 + 1], b01 = rgb2[x * 2 + 1];
251  int y01;
252 #if SS_H == 1
253  int r10 = rgb0[x * 2 + 0 + s], g10 = rgb1[x * 2 + 0 + s], b10 = rgb2[x * 2 + 0 + s];
254  int r11 = rgb0[x * 2 + 1 + s], g11 = rgb1[x * 2 + 1 + s], b11 = rgb2[x * 2 + 1 + s];
255  int y10, y11;
256 #endif
257 #endif
258  int u, v, diff;
259 
260  y00 = r00 * cry + g00 * cgy + b00 * cby + rnd_scratch[0][y & !SS_H][x << SS_W];
261  diff = (y00 & mask) - rnd;
262  yuv0[x << SS_W] = av_clip_pixel(yuv_offset[0] + (y00 >> sh));
263  rnd_scratch[0][ (y & !SS_H)][(x << SS_W) + 1] += (diff * 7 + 8) >> 4;
264  rnd_scratch[0][!(y & !SS_H)][(x << SS_W) - 1] += (diff * 3 + 8) >> 4;
265  rnd_scratch[0][!(y & !SS_H)][(x << SS_W) + 0] += (diff * 5 + 8) >> 4;
266  rnd_scratch[0][!(y & !SS_H)][(x << SS_W) + 1] += (diff * 1 + 8) >> 4;
267  rnd_scratch[0][ (y & !SS_H)][(x << SS_W) + 0] = rnd;
268 #if SS_W == 1
269  y01 = r01 * cry + g01 * cgy + b01 * cby + rnd_scratch[0][y & !SS_H][x * 2 + 1];
270  diff = (y01 & mask) - rnd;
271  yuv0[x * 2 + 1] = av_clip_pixel(yuv_offset[0] + (y01 >> sh));
272  rnd_scratch[0][ (y & !SS_H)][x * 2 + 2] += (diff * 7 + 8) >> 4;
273  rnd_scratch[0][!(y & !SS_H)][x * 2 + 0] += (diff * 3 + 8) >> 4;
274  rnd_scratch[0][!(y & !SS_H)][x * 2 + 1] += (diff * 5 + 8) >> 4;
275  rnd_scratch[0][!(y & !SS_H)][x * 2 + 2] += (diff * 1 + 8) >> 4;
276  rnd_scratch[0][ (y & !SS_H)][x * 2 + 1] = rnd;
277 #if SS_H == 1
278  y10 = r10 * cry + g10 * cgy + b10 * cby + rnd_scratch[0][1][x * 2 + 0];
279  diff = (y10 & mask) - rnd;
280  yuv0[x * 2 + 0 + s0] = av_clip_pixel(yuv_offset[0] + (y10 >> sh));
281  rnd_scratch[0][1][x * 2 + 1] += (diff * 7 + 8) >> 4;
282  rnd_scratch[0][0][x * 2 - 1] += (diff * 3 + 8) >> 4;
283  rnd_scratch[0][0][x * 2 + 0] += (diff * 5 + 8) >> 4;
284  rnd_scratch[0][0][x * 2 + 1] += (diff * 1 + 8) >> 4;
285  rnd_scratch[0][1][x * 2 + 0] = rnd;
286 
287  y11 = r11 * cry + g11 * cgy + b11 * cby + rnd_scratch[0][1][x * 2 + 1];
288  diff = (y11 & mask) - rnd;
289  yuv0[x * 2 + 1 + s0] = av_clip_pixel(yuv_offset[0] + (y11 >> sh));
290  rnd_scratch[0][1][x * 2 + 2] += (diff * 7 + 8) >> 4;
291  rnd_scratch[0][0][x * 2 + 0] += (diff * 3 + 8) >> 4;
292  rnd_scratch[0][0][x * 2 + 1] += (diff * 5 + 8) >> 4;
293  rnd_scratch[0][0][x * 2 + 2] += (diff * 1 + 8) >> 4;
294  rnd_scratch[0][1][x * 2 + 1] = rnd;
295 #endif
296 #endif
297 
298  u = avg(r00, r01, r10, r11) * cru +
299  avg(g00, g01, g10, g11) * cgu +
300  avg(b00, b01, b10, b11) * cburv + rnd_scratch[1][y & 1][x];
301  diff = (u & mask) - rnd;
302  yuv1[x] = av_clip_pixel(uv_offset + (u >> sh));
303  rnd_scratch[1][ (y & 1)][x + 1] += (diff * 7 + 8) >> 4;
304  rnd_scratch[1][!(y & 1)][x - 1] += (diff * 3 + 8) >> 4;
305  rnd_scratch[1][!(y & 1)][x + 0] += (diff * 5 + 8) >> 4;
306  rnd_scratch[1][!(y & 1)][x + 1] += (diff * 1 + 8) >> 4;
307  rnd_scratch[1][ (y & 1)][x + 0] = rnd;
308 
309  v = avg(r00, r01, r10, r11) * cburv +
310  avg(g00, g01, g10, g11) * cgv +
311  avg(b00, b01, b10, b11) * cbv + rnd_scratch[2][y & 1][x];
312  diff = (v & mask) - rnd;
313  yuv2[x] = av_clip_pixel(uv_offset + (v >> sh));
314  rnd_scratch[2][ (y & 1)][x + 1] += (diff * 7 + 8) >> 4;
315  rnd_scratch[2][!(y & 1)][x - 1] += (diff * 3 + 8) >> 4;
316  rnd_scratch[2][!(y & 1)][x + 0] += (diff * 5 + 8) >> 4;
317  rnd_scratch[2][!(y & 1)][x + 1] += (diff * 1 + 8) >> 4;
318  rnd_scratch[2][ (y & 1)][x + 0] = rnd;
319  }
320 
321  yuv0 += s0 * (1 << SS_H);
322  yuv1 += yuv_stride[1] / sizeof(pixel);
323  yuv2 += yuv_stride[2] / sizeof(pixel);
324  rgb0 += s * (1 << SS_H);
325  rgb1 += s * (1 << SS_H);
326  rgb2 += s * (1 << SS_H);
327  }
328 }
329 
330 #undef IN_BIT_DEPTH
331 #undef OUT_BIT_DEPTH
332 #define OUT_BIT_DEPTH BIT_DEPTH
333 #define IN_BIT_DEPTH 8
335 
336 #undef IN_BIT_DEPTH
337 #define IN_BIT_DEPTH 10
339 
340 #undef IN_BIT_DEPTH
341 #define IN_BIT_DEPTH 12
static void fn() rgb2yuv_fsb(uint8_t *_yuv[3], const ptrdiff_t yuv_stride[3], int16_t *rgb[3], ptrdiff_t s, int w, int h, const int16_t rgb2yuv_coeffs[3][3][8], const int16_t yuv_offset[8], int *rnd_scratch[3][2])
#define pixel
static void fn() rgb2yuv(uint8_t *_yuv[3], const ptrdiff_t yuv_stride[3], int16_t *rgb[3], ptrdiff_t s, int w, int h, const int16_t rgb2yuv_coeffs[3][3][8], const int16_t yuv_offset[8])
#define BIT_DEPTH
static void fn() yuv2rgb(int16_t *rgb[3], ptrdiff_t rgb_stride, uint8_t *_yuv[3], const ptrdiff_t yuv_stride[3], int w, int h, const int16_t yuv2rgb_coeffs[3][3][8], const int16_t yuv_offset[8])
#define fn(a)
#define SS_W
Definition: colorspacedsp.c:62
uint8_t
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:64
#define u(width, name, range_min, range_max)
Definition: cbs_h2645.c:253
static const uint16_t mask[17]
Definition: lzw.c:38
#define s0
Definition: regdef.h:37
simple assert() macros that are a bit more flexible than ISO C assert().
uint8_t w
Definition: llviddspenc.c:38
#define s(width, name)
Definition: cbs_vp9.c:257
uint8_t pixel
Definition: tiny_ssim.c:42
#define avg(a, b, c, d)
#define rnd()
Definition: checkasm.h:101
static av_always_inline int diff(const uint32_t a, const uint32_t b)
#define av_clip_pixel(x)
#define SS_H
Definition: colorspacedsp.c:63
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:58