28 #define avg(a,b,c,d) (a)
31 #define avg(a,b,c,d) (((a) + (b) + 1) >> 1)
34 #define avg(a,b,c,d) (((a) + (b) + (c) + (d) + 2) >> 2)
40 #define fn3(a,b,c) a##_##c##p##b##_c
41 #define fn2(a,b,c) fn3(a,b,c)
42 #define fn(a) fn2(a, BIT_DEPTH, ss)
48 #define av_clip_pixel(x) av_clip_uint8(x)
50 #define pixel uint16_t
51 #define av_clip_pixel(x) av_clip_uintp2(x, BIT_DEPTH)
54 static void fn(
yuv2rgb)(int16_t *rgb[3], ptrdiff_t rgb_stride,
55 uint8_t *_yuv[3],
const ptrdiff_t yuv_stride[3],
56 int w,
int h,
const int16_t yuv2rgb_coeffs[3][3][8],
57 const int16_t yuv_offset[8])
60 const pixel *yuv0 = yuv[0], *yuv1 = yuv[1], *yuv2 = yuv[2];
61 int16_t *rgb0 = rgb[0], *rgb1 = rgb[1], *rgb2 = rgb[2];
63 int cy = yuv2rgb_coeffs[0][0][0];
64 int crv = yuv2rgb_coeffs[0][2][0];
65 int cgu = yuv2rgb_coeffs[1][1][0];
66 int cgv = yuv2rgb_coeffs[1][2][0];
67 int cbu = yuv2rgb_coeffs[2][1][0];
69 const int uv_offset = 128 << (
BIT_DEPTH - 8);
73 av_assert2(yuv2rgb_coeffs[1][0][0] == cy && yuv2rgb_coeffs[2][0][0] == cy);
77 for (y = 0; y <
h; y++) {
78 for (x = 0; x <
w; x++) {
79 int y00 = yuv0[x <<
SS_W] - yuv_offset[0];
81 int y01 = yuv0[2 * x + 1] - yuv_offset[0];
83 int y10 = yuv0[yuv_stride[0] /
sizeof(
pixel) + 2 * x] - yuv_offset[0];
84 int y11 = yuv0[yuv_stride[0] /
sizeof(
pixel) + 2 * x + 1] - yuv_offset[0];
87 int u = yuv1[x] - uv_offset, v = yuv2[x] - uv_offset;
89 rgb0[x << SS_W] = av_clip_int16((y00 * cy + crv * v + rnd) >> sh);
91 rgb0[2 * x + 1] = av_clip_int16((y01 * cy + crv * v + rnd) >> sh);
93 rgb0[2 * x + rgb_stride] = av_clip_int16((y10 * cy + crv * v + rnd) >> sh);
94 rgb0[2 * x + rgb_stride + 1] = av_clip_int16((y11 * cy + crv * v + rnd) >> sh);
98 rgb1[x <<
SS_W] = av_clip_int16((y00 * cy + cgu * u +
99 cgv * v + rnd) >> sh);
101 rgb1[2 * x + 1] = av_clip_int16((y01 * cy + cgu * u +
102 cgv * v + rnd) >> sh);
104 rgb1[2 * x + rgb_stride] = av_clip_int16((y10 * cy + cgu * u +
105 cgv * v + rnd) >> sh);
106 rgb1[2 * x + rgb_stride + 1] = av_clip_int16((y11 * cy + cgu * u +
107 cgv * v + rnd) >> sh);
111 rgb2[x << SS_W] = av_clip_int16((y00 * cy + cbu * u + rnd) >> sh);
113 rgb2[2 * x + 1] = av_clip_int16((y01 * cy + cbu * u + rnd) >> sh);
115 rgb2[2 * x + rgb_stride] = av_clip_int16((y10 * cy + cbu * u + rnd) >> sh);
116 rgb2[2 * x + rgb_stride + 1] = av_clip_int16((y11 * cy + cbu * u + rnd) >> sh);
121 yuv0 += (yuv_stride[0] * (1 <<
SS_H)) /
sizeof(
pixel);
122 yuv1 += yuv_stride[1] /
sizeof(
pixel);
123 yuv2 += yuv_stride[2] /
sizeof(
pixel);
124 rgb0 += rgb_stride * (1 <<
SS_H);
125 rgb1 += rgb_stride * (1 <<
SS_H);
126 rgb2 += rgb_stride * (1 <<
SS_H);
131 int16_t *rgb[3], ptrdiff_t
s,
132 int w,
int h,
const int16_t rgb2yuv_coeffs[3][3][8],
133 const int16_t yuv_offset[8])
136 pixel *yuv0 = yuv[0], *yuv1 = yuv[1], *yuv2 = yuv[2];
137 const int16_t *rgb0 = rgb[0], *rgb1 = rgb[1], *rgb2 = rgb[2];
140 const int rnd = 1 << (sh - 1);
141 int cry = rgb2yuv_coeffs[0][0][0];
142 int cgy = rgb2yuv_coeffs[0][1][0];
143 int cby = rgb2yuv_coeffs[0][2][0];
144 int cru = rgb2yuv_coeffs[1][0][0];
145 int cgu = rgb2yuv_coeffs[1][1][0];
146 int cburv = rgb2yuv_coeffs[1][2][0];
147 int cgv = rgb2yuv_coeffs[2][1][0];
148 int cbv = rgb2yuv_coeffs[2][2][0];
149 ptrdiff_t
s0 = yuv_stride[0] /
sizeof(
pixel);
150 const int uv_offset = 128 << (
BIT_DEPTH - 8);
152 av_assert2(rgb2yuv_coeffs[1][2][0] == rgb2yuv_coeffs[2][0][0]);
155 for (y = 0; y <
h; y++) {
156 for (x = 0; x <
w; x++) {
157 int r00 = rgb0[x <<
SS_W], g00 = rgb1[x <<
SS_W], b00 = rgb2[x <<
SS_W];
159 int r01 = rgb0[x * 2 + 1], g01 = rgb1[x * 2 + 1], b01 = rgb2[x * 2 + 1];
161 int r10 = rgb0[x * 2 + 0 +
s], g10 = rgb1[x * 2 + 0 +
s], b10 = rgb2[x * 2 + 0 +
s];
162 int r11 = rgb0[x * 2 + 1 +
s], g11 = rgb1[x * 2 + 1 +
s], b11 = rgb2[x * 2 + 1 +
s];
167 ((r00 * cry + g00 * cgy +
168 b00 * cby + rnd) >> sh));
171 ((r01 * cry + g01 * cgy +
172 b01 * cby + rnd) >> sh));
175 ((r10 * cry + g10 * cgy +
176 b10 * cby + rnd) >> sh));
178 ((r11 * cry + g11 * cgy +
179 b11 * cby + rnd) >> sh));
184 ((
avg(r00, r01, r10, r11) * cru +
185 avg(g00, g01, g10, g11) * cgu +
186 avg(b00, b01, b10, b11) * cburv + rnd) >> sh));
188 ((
avg(r00, r01, r10, r11) * cburv +
189 avg(g00, g01, g10, g11) * cgv +
190 avg(b00, b01, b10, b11) * cbv + rnd) >> sh));
193 yuv0 += s0 * (1 <<
SS_H);
194 yuv1 += yuv_stride[1] /
sizeof(
pixel);
195 yuv2 += yuv_stride[2] /
sizeof(
pixel);
196 rgb0 +=
s * (1 <<
SS_H);
197 rgb1 +=
s * (1 <<
SS_H);
198 rgb2 +=
s * (1 <<
SS_H);
209 int16_t *rgb[3], ptrdiff_t
s,
210 int w,
int h,
const int16_t rgb2yuv_coeffs[3][3][8],
211 const int16_t yuv_offset[8],
212 int *rnd_scratch[3][2])
215 pixel *yuv0 = yuv[0], *yuv1 = yuv[1], *yuv2 = yuv[2];
216 const int16_t *rgb0 = rgb[0], *rgb1 = rgb[1], *rgb2 = rgb[2];
219 const int rnd = 1 << (sh - 1);
220 int cry = rgb2yuv_coeffs[0][0][0];
221 int cgy = rgb2yuv_coeffs[0][1][0];
222 int cby = rgb2yuv_coeffs[0][2][0];
223 int cru = rgb2yuv_coeffs[1][0][0];
224 int cgu = rgb2yuv_coeffs[1][1][0];
225 int cburv = rgb2yuv_coeffs[1][2][0];
226 int cgv = rgb2yuv_coeffs[2][1][0];
227 int cbv = rgb2yuv_coeffs[2][2][0];
228 ptrdiff_t
s0 = yuv_stride[0] /
sizeof(
pixel);
229 const int uv_offset = 128 << (
BIT_DEPTH - 8);
230 unsigned mask = (1 << sh) - 1;
232 for (x = 0; x <
w; x++) {
233 rnd_scratch[0][0][x] =
234 rnd_scratch[0][1][x] =
rnd;
236 av_assert2(rgb2yuv_coeffs[1][2][0] == rgb2yuv_coeffs[2][0][0]);
239 for (x = 0; x <
w; x++) {
240 rnd_scratch[1][0][x] =
241 rnd_scratch[1][1][x] =
242 rnd_scratch[2][0][x] =
243 rnd_scratch[2][1][x] =
rnd;
245 for (y = 0; y <
h; y++) {
246 for (x = 0; x <
w; x++) {
247 int r00 = rgb0[x <<
SS_W], g00 = rgb1[x <<
SS_W], b00 = rgb2[x <<
SS_W];
250 int r01 = rgb0[x * 2 + 1], g01 = rgb1[x * 2 + 1], b01 = rgb2[x * 2 + 1];
253 int r10 = rgb0[x * 2 + 0 +
s], g10 = rgb1[x * 2 + 0 +
s], b10 = rgb2[x * 2 + 0 +
s];
254 int r11 = rgb0[x * 2 + 1 +
s], g11 = rgb1[x * 2 + 1 +
s], b11 = rgb2[x * 2 + 1 +
s];
260 y00 = r00 * cry + g00 * cgy + b00 * cby + rnd_scratch[0][y & !
SS_H][x <<
SS_W];
261 diff = (y00 &
mask) - rnd;
262 yuv0[x << SS_W] = av_clip_pixel(yuv_offset[0] + (y00 >> sh));
263 rnd_scratch[0][ (y & !
SS_H)][(x <<
SS_W) + 1] += (diff * 7 + 8) >> 4;
264 rnd_scratch[0][!(y & !
SS_H)][(x <<
SS_W) - 1] += (diff * 3 + 8) >> 4;
265 rnd_scratch[0][!(y & !
SS_H)][(x <<
SS_W) + 0] += (diff * 5 + 8) >> 4;
266 rnd_scratch[0][!(y & !
SS_H)][(x <<
SS_W) + 1] += (diff * 1 + 8) >> 4;
267 rnd_scratch[0][ (y & !
SS_H)][(x <<
SS_W) + 0] =
rnd;
269 y01 = r01 * cry + g01 * cgy + b01 * cby + rnd_scratch[0][y & !
SS_H][x * 2 + 1];
270 diff = (y01 &
mask) - rnd;
271 yuv0[x * 2 + 1] =
av_clip_pixel(yuv_offset[0] + (y01 >> sh));
272 rnd_scratch[0][ (y & !
SS_H)][x * 2 + 2] += (diff * 7 + 8) >> 4;
273 rnd_scratch[0][!(y & !
SS_H)][x * 2 + 0] += (diff * 3 + 8) >> 4;
274 rnd_scratch[0][!(y & !
SS_H)][x * 2 + 1] += (diff * 5 + 8) >> 4;
275 rnd_scratch[0][!(y & !
SS_H)][x * 2 + 2] += (diff * 1 + 8) >> 4;
276 rnd_scratch[0][ (y & !
SS_H)][x * 2 + 1] = rnd;
278 y10 = r10 * cry + g10 * cgy + b10 * cby + rnd_scratch[0][1][x * 2 + 0];
279 diff = (y10 &
mask) - rnd;
281 rnd_scratch[0][1][x * 2 + 1] += (diff * 7 + 8) >> 4;
282 rnd_scratch[0][0][x * 2 - 1] += (diff * 3 + 8) >> 4;
283 rnd_scratch[0][0][x * 2 + 0] += (diff * 5 + 8) >> 4;
284 rnd_scratch[0][0][x * 2 + 1] += (diff * 1 + 8) >> 4;
285 rnd_scratch[0][1][x * 2 + 0] =
rnd;
287 y11 = r11 * cry + g11 * cgy + b11 * cby + rnd_scratch[0][1][x * 2 + 1];
288 diff = (y11 &
mask) - rnd;
290 rnd_scratch[0][1][x * 2 + 2] += (diff * 7 + 8) >> 4;
291 rnd_scratch[0][0][x * 2 + 0] += (diff * 3 + 8) >> 4;
292 rnd_scratch[0][0][x * 2 + 1] += (diff * 5 + 8) >> 4;
293 rnd_scratch[0][0][x * 2 + 2] += (diff * 1 + 8) >> 4;
294 rnd_scratch[0][1][x * 2 + 1] =
rnd;
298 u =
avg(r00, r01, r10, r11) * cru +
299 avg(g00, g01, g10, g11) * cgu +
300 avg(b00, b01, b10, b11) * cburv + rnd_scratch[1][y & 1][x];
301 diff = (u &
mask) - rnd;
303 rnd_scratch[1][ (y & 1)][x + 1] += (diff * 7 + 8) >> 4;
304 rnd_scratch[1][!(y & 1)][x - 1] += (diff * 3 + 8) >> 4;
305 rnd_scratch[1][!(y & 1)][x + 0] += (diff * 5 + 8) >> 4;
306 rnd_scratch[1][!(y & 1)][x + 1] += (diff * 1 + 8) >> 4;
307 rnd_scratch[1][ (y & 1)][x + 0] = rnd;
309 v =
avg(r00, r01, r10, r11) * cburv +
310 avg(g00, g01, g10, g11) * cgv +
311 avg(b00, b01, b10, b11) * cbv + rnd_scratch[2][y & 1][x];
312 diff = (v &
mask) - rnd;
314 rnd_scratch[2][ (y & 1)][x + 1] += (diff * 7 + 8) >> 4;
315 rnd_scratch[2][!(y & 1)][x - 1] += (diff * 3 + 8) >> 4;
316 rnd_scratch[2][!(y & 1)][x + 0] += (diff * 5 + 8) >> 4;
317 rnd_scratch[2][!(y & 1)][x + 1] += (diff * 1 + 8) >> 4;
318 rnd_scratch[2][ (y & 1)][x + 0] = rnd;
321 yuv0 += s0 * (1 <<
SS_H);
322 yuv1 += yuv_stride[1] /
sizeof(
pixel);
323 yuv2 += yuv_stride[2] /
sizeof(
pixel);
324 rgb0 +=
s * (1 <<
SS_H);
325 rgb1 +=
s * (1 <<
SS_H);
326 rgb2 +=
s * (1 <<
SS_H);
332 #define OUT_BIT_DEPTH BIT_DEPTH
333 #define IN_BIT_DEPTH 8
337 #define IN_BIT_DEPTH 10
341 #define IN_BIT_DEPTH 12
static void fn() rgb2yuv_fsb(uint8_t *_yuv[3], const ptrdiff_t yuv_stride[3], int16_t *rgb[3], ptrdiff_t s, int w, int h, const int16_t rgb2yuv_coeffs[3][3][8], const int16_t yuv_offset[8], int *rnd_scratch[3][2])
static void fn() rgb2yuv(uint8_t *_yuv[3], const ptrdiff_t yuv_stride[3], int16_t *rgb[3], ptrdiff_t s, int w, int h, const int16_t rgb2yuv_coeffs[3][3][8], const int16_t yuv_offset[8])
static void fn() yuv2rgb(int16_t *rgb[3], ptrdiff_t rgb_stride, uint8_t *_yuv[3], const ptrdiff_t yuv_stride[3], int w, int h, const int16_t yuv2rgb_coeffs[3][3][8], const int16_t yuv_offset[8])
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
#define u(width, name, range_min, range_max)
static const uint16_t mask[17]
simple assert() macros that are a bit more flexible than ISO C assert().
static av_always_inline int diff(const uint32_t a, const uint32_t b)
#define AV_CEIL_RSHIFT(a, b)