FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
proresenc_kostya.c
Go to the documentation of this file.
1 /*
2  * Apple ProRes encoder
3  *
4  * Copyright (c) 2012 Konstantin Shishkov
5  *
6  * This encoder appears to be based on Anatoliy Wassermans considering
7  * similarities in the bugs.
8  *
9  * This file is part of Libav.
10  *
11  * Libav is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * Libav is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with Libav; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25 
26 #include "libavutil/opt.h"
27 #include "libavutil/pixdesc.h"
28 #include "avcodec.h"
29 #include "dsputil.h"
30 #include "put_bits.h"
31 #include "bytestream.h"
32 #include "internal.h"
33 #include "proresdsp.h"
34 #include "proresdata.h"
35 
36 #define CFACTOR_Y422 2
37 #define CFACTOR_Y444 3
38 
39 #define MAX_MBS_PER_SLICE 8
40 
41 #define MAX_PLANES 4
42 
43 enum {
49 };
50 
51 enum {
57 };
58 
59 static const uint8_t prores_quant_matrices[][64] = {
60  { // proxy
61  4, 7, 9, 11, 13, 14, 15, 63,
62  7, 7, 11, 12, 14, 15, 63, 63,
63  9, 11, 13, 14, 15, 63, 63, 63,
64  11, 11, 13, 14, 63, 63, 63, 63,
65  11, 13, 14, 63, 63, 63, 63, 63,
66  13, 14, 63, 63, 63, 63, 63, 63,
67  13, 63, 63, 63, 63, 63, 63, 63,
68  63, 63, 63, 63, 63, 63, 63, 63,
69  },
70  { // LT
71  4, 5, 6, 7, 9, 11, 13, 15,
72  5, 5, 7, 8, 11, 13, 15, 17,
73  6, 7, 9, 11, 13, 15, 15, 17,
74  7, 7, 9, 11, 13, 15, 17, 19,
75  7, 9, 11, 13, 14, 16, 19, 23,
76  9, 11, 13, 14, 16, 19, 23, 29,
77  9, 11, 13, 15, 17, 21, 28, 35,
78  11, 13, 16, 17, 21, 28, 35, 41,
79  },
80  { // standard
81  4, 4, 5, 5, 6, 7, 7, 9,
82  4, 4, 5, 6, 7, 7, 9, 9,
83  5, 5, 6, 7, 7, 9, 9, 10,
84  5, 5, 6, 7, 7, 9, 9, 10,
85  5, 6, 7, 7, 8, 9, 10, 12,
86  6, 7, 7, 8, 9, 10, 12, 15,
87  6, 7, 7, 9, 10, 11, 14, 17,
88  7, 7, 9, 10, 11, 14, 17, 21,
89  },
90  { // high quality
91  4, 4, 4, 4, 4, 4, 4, 4,
92  4, 4, 4, 4, 4, 4, 4, 4,
93  4, 4, 4, 4, 4, 4, 4, 4,
94  4, 4, 4, 4, 4, 4, 4, 5,
95  4, 4, 4, 4, 4, 4, 5, 5,
96  4, 4, 4, 4, 4, 5, 5, 6,
97  4, 4, 4, 4, 5, 5, 6, 7,
98  4, 4, 4, 4, 5, 6, 7, 7,
99  },
100  { // codec default
101  4, 4, 4, 4, 4, 4, 4, 4,
102  4, 4, 4, 4, 4, 4, 4, 4,
103  4, 4, 4, 4, 4, 4, 4, 4,
104  4, 4, 4, 4, 4, 4, 4, 4,
105  4, 4, 4, 4, 4, 4, 4, 4,
106  4, 4, 4, 4, 4, 4, 4, 4,
107  4, 4, 4, 4, 4, 4, 4, 4,
108  4, 4, 4, 4, 4, 4, 4, 4,
109  },
110 };
111 
112 #define NUM_MB_LIMITS 4
113 static const int prores_mb_limits[NUM_MB_LIMITS] = {
114  1620, // up to 720x576
115  2700, // up to 960x720
116  6075, // up to 1440x1080
117  9216, // up to 2048x1152
118 };
119 
120 static const struct prores_profile {
121  const char *full_name;
122  uint32_t tag;
126  int quant;
127 } prores_profile_info[5] = {
128  {
129  .full_name = "proxy",
130  .tag = MKTAG('a', 'p', 'c', 'o'),
131  .min_quant = 4,
132  .max_quant = 8,
133  .br_tab = { 300, 242, 220, 194 },
134  .quant = QUANT_MAT_PROXY,
135  },
136  {
137  .full_name = "LT",
138  .tag = MKTAG('a', 'p', 'c', 's'),
139  .min_quant = 1,
140  .max_quant = 9,
141  .br_tab = { 720, 560, 490, 440 },
142  .quant = QUANT_MAT_LT,
143  },
144  {
145  .full_name = "standard",
146  .tag = MKTAG('a', 'p', 'c', 'n'),
147  .min_quant = 1,
148  .max_quant = 6,
149  .br_tab = { 1050, 808, 710, 632 },
150  .quant = QUANT_MAT_STANDARD,
151  },
152  {
153  .full_name = "high quality",
154  .tag = MKTAG('a', 'p', 'c', 'h'),
155  .min_quant = 1,
156  .max_quant = 6,
157  .br_tab = { 1566, 1216, 1070, 950 },
158  .quant = QUANT_MAT_HQ,
159  },
160  {
161  .full_name = "4444",
162  .tag = MKTAG('a', 'p', '4', 'h'),
163  .min_quant = 1,
164  .max_quant = 6,
165  .br_tab = { 2350, 1828, 1600, 1425 },
166  .quant = QUANT_MAT_HQ,
167  }
168 };
169 
170 #define TRELLIS_WIDTH 16
171 #define SCORE_LIMIT INT_MAX / 2
172 
173 struct TrellisNode {
175  int quant;
176  int bits;
177  int score;
178 };
179 
180 #define MAX_STORED_Q 16
181 
182 typedef struct ProresThreadData {
183  DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
184  DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
185  int16_t custom_q[64];
188 
189 typedef struct ProresContext {
190  AVClass *class;
192  DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
193  int16_t quants[MAX_STORED_Q][64];
194  int16_t custom_q[64];
196 
199 
205  int pictures_per_frame; // 1 for progressive, 2 for interlaced
211 
212  char *vendor;
214 
216 
217  int profile;
219 
220  int *slice_q;
221 
223 } ProresContext;
224 
225 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
226  int linesize, int x, int y, int w, int h,
227  int16_t *blocks, uint16_t *emu_buf,
228  int mbs_per_slice, int blocks_per_mb, int is_chroma)
229 {
230  const uint16_t *esrc;
231  const int mb_width = 4 * blocks_per_mb;
232  int elinesize;
233  int i, j, k;
234 
235  for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
236  if (x >= w) {
237  memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
238  * sizeof(*blocks));
239  return;
240  }
241  if (x + mb_width <= w && y + 16 <= h) {
242  esrc = src;
243  elinesize = linesize;
244  } else {
245  int bw, bh, pix;
246 
247  esrc = emu_buf;
248  elinesize = 16 * sizeof(*emu_buf);
249 
250  bw = FFMIN(w - x, mb_width);
251  bh = FFMIN(h - y, 16);
252 
253  for (j = 0; j < bh; j++) {
254  memcpy(emu_buf + j * 16,
255  (const uint8_t*)src + j * linesize,
256  bw * sizeof(*src));
257  pix = emu_buf[j * 16 + bw - 1];
258  for (k = bw; k < mb_width; k++)
259  emu_buf[j * 16 + k] = pix;
260  }
261  for (; j < 16; j++)
262  memcpy(emu_buf + j * 16,
263  emu_buf + (bh - 1) * 16,
264  mb_width * sizeof(*emu_buf));
265  }
266  if (!is_chroma) {
267  ctx->dsp.fdct(esrc, elinesize, blocks);
268  blocks += 64;
269  if (blocks_per_mb > 2) {
270  ctx->dsp.fdct(esrc + 8, elinesize, blocks);
271  blocks += 64;
272  }
273  ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
274  blocks += 64;
275  if (blocks_per_mb > 2) {
276  ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
277  blocks += 64;
278  }
279  } else {
280  ctx->dsp.fdct(esrc, elinesize, blocks);
281  blocks += 64;
282  ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
283  blocks += 64;
284  if (blocks_per_mb > 2) {
285  ctx->dsp.fdct(esrc + 8, elinesize, blocks);
286  blocks += 64;
287  ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
288  blocks += 64;
289  }
290  }
291 
292  x += mb_width;
293  }
294 }
295 
296 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
297  int linesize, int x, int y, int w, int h,
298  int16_t *blocks, int mbs_per_slice, int abits)
299 {
300  const int slice_width = 16 * mbs_per_slice;
301  int i, j, copy_w, copy_h;
302 
303  copy_w = FFMIN(w - x, slice_width);
304  copy_h = FFMIN(h - y, 16);
305  for (i = 0; i < copy_h; i++) {
306  memcpy(blocks, src, copy_w * sizeof(*src));
307  if (abits == 8)
308  for (j = 0; j < copy_w; j++)
309  blocks[j] >>= 2;
310  else
311  for (j = 0; j < copy_w; j++)
312  blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
313  for (j = copy_w; j < slice_width; j++)
314  blocks[j] = blocks[copy_w - 1];
315  blocks += slice_width;
316  src += linesize >> 1;
317  }
318  for (; i < 16; i++) {
319  memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
320  blocks += slice_width;
321  }
322 }
323 
324 /**
325  * Write an unsigned rice/exp golomb codeword.
326  */
327 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
328 {
329  unsigned int rice_order, exp_order, switch_bits, switch_val;
330  int exponent;
331 
332  /* number of prefix bits to switch between Rice and expGolomb */
333  switch_bits = (codebook & 3) + 1;
334  rice_order = codebook >> 5; /* rice code order */
335  exp_order = (codebook >> 2) & 7; /* exp golomb code order */
336 
337  switch_val = switch_bits << rice_order;
338 
339  if (val >= switch_val) {
340  val -= switch_val - (1 << exp_order);
341  exponent = av_log2(val);
342 
343  put_bits(pb, exponent - exp_order + switch_bits, 0);
344  put_bits(pb, exponent + 1, val);
345  } else {
346  exponent = val >> rice_order;
347 
348  if (exponent)
349  put_bits(pb, exponent, 0);
350  put_bits(pb, 1, 1);
351  if (rice_order)
352  put_sbits(pb, rice_order, val);
353  }
354 }
355 
356 #define GET_SIGN(x) ((x) >> 31)
357 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
358 
359 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
360  int blocks_per_slice, int scale)
361 {
362  int i;
363  int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
364 
365  prev_dc = (blocks[0] - 0x4000) / scale;
367  sign = 0;
368  codebook = 3;
369  blocks += 64;
370 
371  for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
372  dc = (blocks[0] - 0x4000) / scale;
373  delta = dc - prev_dc;
374  new_sign = GET_SIGN(delta);
375  delta = (delta ^ sign) - sign;
376  code = MAKE_CODE(delta);
377  encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
378  codebook = (code + (code & 1)) >> 1;
379  codebook = FFMIN(codebook, 3);
380  sign = new_sign;
381  prev_dc = dc;
382  }
383 }
384 
385 static void encode_acs(PutBitContext *pb, int16_t *blocks,
386  int blocks_per_slice,
387  int plane_size_factor,
388  const uint8_t *scan, const int16_t *qmat)
389 {
390  int idx, i;
391  int run, level, run_cb, lev_cb;
392  int max_coeffs, abs_level;
393 
394  max_coeffs = blocks_per_slice << 6;
395  run_cb = ff_prores_run_to_cb_index[4];
396  lev_cb = ff_prores_lev_to_cb_index[2];
397  run = 0;
398 
399  for (i = 1; i < 64; i++) {
400  for (idx = scan[i]; idx < max_coeffs; idx += 64) {
401  level = blocks[idx] / qmat[scan[i]];
402  if (level) {
403  abs_level = FFABS(level);
404  encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
406  abs_level - 1);
407  put_sbits(pb, 1, GET_SIGN(level));
408 
409  run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
410  lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
411  run = 0;
412  } else {
413  run++;
414  }
415  }
416  }
417 }
418 
420  const uint16_t *src, int linesize,
421  int mbs_per_slice, int16_t *blocks,
422  int blocks_per_mb, int plane_size_factor,
423  const int16_t *qmat)
424 {
425  int blocks_per_slice, saved_pos;
426 
427  saved_pos = put_bits_count(pb);
428  blocks_per_slice = mbs_per_slice * blocks_per_mb;
429 
430  encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
431  encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
432  ctx->scantable.permutated, qmat);
433  flush_put_bits(pb);
434 
435  return (put_bits_count(pb) - saved_pos) >> 3;
436 }
437 
438 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
439 {
440  const int mask = (1 << abits) - 1;
441  const int dbits = (abits == 8) ? 4 : 7;
442  const int dsize = 1 << dbits - 1;
443  int diff = cur - prev;
444 
445  diff &= mask;
446  if (diff >= (1 << abits) - dsize)
447  diff -= 1 << abits;
448  if (diff < -dsize || diff > dsize || !diff) {
449  put_bits(pb, 1, 1);
450  put_bits(pb, abits, diff);
451  } else {
452  put_bits(pb, 1, 0);
453  put_bits(pb, dbits - 1, FFABS(diff) - 1);
454  put_bits(pb, 1, diff < 0);
455  }
456 }
457 
458 static void put_alpha_run(PutBitContext *pb, int run)
459 {
460  if (run) {
461  put_bits(pb, 1, 0);
462  if (run < 0x10)
463  put_bits(pb, 4, run);
464  else
465  put_bits(pb, 15, run);
466  } else {
467  put_bits(pb, 1, 1);
468  }
469 }
470 
471 // todo alpha quantisation for high quants
473  const uint16_t *src, int linesize,
474  int mbs_per_slice, uint16_t *blocks,
475  int quant)
476 {
477  const int abits = ctx->alpha_bits;
478  const int mask = (1 << abits) - 1;
479  const int num_coeffs = mbs_per_slice * 256;
480  int saved_pos = put_bits_count(pb);
481  int prev = mask, cur;
482  int idx = 0;
483  int run = 0;
484 
485  cur = blocks[idx++];
486  put_alpha_diff(pb, cur, prev, abits);
487  prev = cur;
488  do {
489  cur = blocks[idx++];
490  if (cur != prev) {
491  put_alpha_run (pb, run);
492  put_alpha_diff(pb, cur, prev, abits);
493  prev = cur;
494  run = 0;
495  } else {
496  run++;
497  }
498  } while (idx < num_coeffs);
499  if (run)
500  put_alpha_run(pb, run);
501  flush_put_bits(pb);
502  return (put_bits_count(pb) - saved_pos) >> 3;
503 }
504 
505 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
506  PutBitContext *pb,
507  int sizes[4], int x, int y, int quant,
508  int mbs_per_slice)
509 {
510  ProresContext *ctx = avctx->priv_data;
511  int i, xp, yp;
512  int total_size = 0;
513  const uint16_t *src;
514  int slice_width_factor = av_log2(mbs_per_slice);
515  int num_cblocks, pwidth, linesize, line_add;
516  int plane_factor, is_chroma;
517  uint16_t *qmat;
518 
519  if (ctx->pictures_per_frame == 1)
520  line_add = 0;
521  else
522  line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
523 
524  if (ctx->force_quant) {
525  qmat = ctx->quants[0];
526  } else if (quant < MAX_STORED_Q) {
527  qmat = ctx->quants[quant];
528  } else {
529  qmat = ctx->custom_q;
530  for (i = 0; i < 64; i++)
531  qmat[i] = ctx->quant_mat[i] * quant;
532  }
533 
534  for (i = 0; i < ctx->num_planes; i++) {
535  is_chroma = (i == 1 || i == 2);
536  plane_factor = slice_width_factor + 2;
537  if (is_chroma)
538  plane_factor += ctx->chroma_factor - 3;
539  if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
540  xp = x << 4;
541  yp = y << 4;
542  num_cblocks = 4;
543  pwidth = avctx->width;
544  } else {
545  xp = x << 3;
546  yp = y << 4;
547  num_cblocks = 2;
548  pwidth = avctx->width >> 1;
549  }
550 
551  linesize = pic->linesize[i] * ctx->pictures_per_frame;
552  src = (const uint16_t*)(pic->data[i] + yp * linesize +
553  line_add * pic->linesize[i]) + xp;
554 
555  if (i < 3) {
556  get_slice_data(ctx, src, linesize, xp, yp,
557  pwidth, avctx->height / ctx->pictures_per_frame,
558  ctx->blocks[0], ctx->emu_buf,
559  mbs_per_slice, num_cblocks, is_chroma);
560  sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
561  mbs_per_slice, ctx->blocks[0],
562  num_cblocks, plane_factor,
563  qmat);
564  } else {
565  get_alpha_data(ctx, src, linesize, xp, yp,
566  pwidth, avctx->height / ctx->pictures_per_frame,
567  ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
568  sizes[i] = encode_alpha_plane(ctx, pb, src, linesize,
569  mbs_per_slice, ctx->blocks[0],
570  quant);
571  }
572  total_size += sizes[i];
573  }
574  return total_size;
575 }
576 
577 static inline int estimate_vlc(unsigned codebook, int val)
578 {
579  unsigned int rice_order, exp_order, switch_bits, switch_val;
580  int exponent;
581 
582  /* number of prefix bits to switch between Rice and expGolomb */
583  switch_bits = (codebook & 3) + 1;
584  rice_order = codebook >> 5; /* rice code order */
585  exp_order = (codebook >> 2) & 7; /* exp golomb code order */
586 
587  switch_val = switch_bits << rice_order;
588 
589  if (val >= switch_val) {
590  val -= switch_val - (1 << exp_order);
591  exponent = av_log2(val);
592 
593  return exponent * 2 - exp_order + switch_bits + 1;
594  } else {
595  return (val >> rice_order) + rice_order + 1;
596  }
597 }
598 
599 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
600  int scale)
601 {
602  int i;
603  int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
604  int bits;
605 
606  prev_dc = (blocks[0] - 0x4000) / scale;
607  bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
608  sign = 0;
609  codebook = 3;
610  blocks += 64;
611  *error += FFABS(blocks[0] - 0x4000) % scale;
612 
613  for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
614  dc = (blocks[0] - 0x4000) / scale;
615  *error += FFABS(blocks[0] - 0x4000) % scale;
616  delta = dc - prev_dc;
617  new_sign = GET_SIGN(delta);
618  delta = (delta ^ sign) - sign;
619  code = MAKE_CODE(delta);
620  bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
621  codebook = (code + (code & 1)) >> 1;
622  codebook = FFMIN(codebook, 3);
623  sign = new_sign;
624  prev_dc = dc;
625  }
626 
627  return bits;
628 }
629 
630 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
631  int plane_size_factor,
632  const uint8_t *scan, const int16_t *qmat)
633 {
634  int idx, i;
635  int run, level, run_cb, lev_cb;
636  int max_coeffs, abs_level;
637  int bits = 0;
638 
639  max_coeffs = blocks_per_slice << 6;
640  run_cb = ff_prores_run_to_cb_index[4];
641  lev_cb = ff_prores_lev_to_cb_index[2];
642  run = 0;
643 
644  for (i = 1; i < 64; i++) {
645  for (idx = scan[i]; idx < max_coeffs; idx += 64) {
646  level = blocks[idx] / qmat[scan[i]];
647  *error += FFABS(blocks[idx]) % qmat[scan[i]];
648  if (level) {
649  abs_level = FFABS(level);
650  bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
651  bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
652  abs_level - 1) + 1;
653 
654  run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
655  lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
656  run = 0;
657  } else {
658  run++;
659  }
660  }
661  }
662 
663  return bits;
664 }
665 
666 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
667  const uint16_t *src, int linesize,
668  int mbs_per_slice,
669  int blocks_per_mb, int plane_size_factor,
670  const int16_t *qmat, ProresThreadData *td)
671 {
672  int blocks_per_slice;
673  int bits;
674 
675  blocks_per_slice = mbs_per_slice * blocks_per_mb;
676 
677  bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
678  bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
679  plane_size_factor, ctx->scantable.permutated, qmat);
680 
681  return FFALIGN(bits, 8);
682 }
683 
684 static int est_alpha_diff(int cur, int prev, int abits)
685 {
686  const int mask = (1 << abits) - 1;
687  const int dbits = (abits == 8) ? 4 : 7;
688  const int dsize = 1 << dbits - 1;
689  int diff = cur - prev;
690 
691  diff &= mask;
692  if (diff >= (1 << abits) - dsize)
693  diff -= 1 << abits;
694  if (diff < -dsize || diff > dsize || !diff)
695  return abits + 1;
696  else
697  return dbits + 1;
698 }
699 
700 static int estimate_alpha_plane(ProresContext *ctx, int *error,
701  const uint16_t *src, int linesize,
702  int mbs_per_slice, int quant,
703  int16_t *blocks)
704 {
705  const int abits = ctx->alpha_bits;
706  const int mask = (1 << abits) - 1;
707  const int num_coeffs = mbs_per_slice * 256;
708  int prev = mask, cur;
709  int idx = 0;
710  int run = 0;
711  int bits;
712 
713  *error = 0;
714  cur = blocks[idx++];
715  bits = est_alpha_diff(cur, prev, abits);
716  prev = cur;
717  do {
718  cur = blocks[idx++];
719  if (cur != prev) {
720  if (!run)
721  bits++;
722  else if (run < 0x10)
723  bits += 4;
724  else
725  bits += 15;
726  bits += est_alpha_diff(cur, prev, abits);
727  prev = cur;
728  run = 0;
729  } else {
730  run++;
731  }
732  } while (idx < num_coeffs);
733 
734  if (run) {
735  if (run < 0x10)
736  bits += 4;
737  else
738  bits += 15;
739  }
740 
741  return bits;
742 }
743 
744 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
745  int trellis_node, int x, int y, int mbs_per_slice,
747 {
748  ProresContext *ctx = avctx->priv_data;
749  int i, q, pq, xp, yp;
750  const uint16_t *src;
751  int slice_width_factor = av_log2(mbs_per_slice);
752  int num_cblocks[MAX_PLANES], pwidth;
753  int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
754  const int min_quant = ctx->profile_info->min_quant;
755  const int max_quant = ctx->profile_info->max_quant;
756  int error, bits, bits_limit;
757  int mbs, prev, cur, new_score;
758  int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
759  int overquant;
760  uint16_t *qmat;
761  int linesize[4], line_add;
762 
763  if (ctx->pictures_per_frame == 1)
764  line_add = 0;
765  else
766  line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
767  mbs = x + mbs_per_slice;
768 
769  for (i = 0; i < ctx->num_planes; i++) {
770  is_chroma[i] = (i == 1 || i == 2);
771  plane_factor[i] = slice_width_factor + 2;
772  if (is_chroma[i])
773  plane_factor[i] += ctx->chroma_factor - 3;
774  if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
775  xp = x << 4;
776  yp = y << 4;
777  num_cblocks[i] = 4;
778  pwidth = avctx->width;
779  } else {
780  xp = x << 3;
781  yp = y << 4;
782  num_cblocks[i] = 2;
783  pwidth = avctx->width >> 1;
784  }
785 
786  linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
787  src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
788  line_add * pic->linesize[i]) + xp;
789 
790  if (i < 3) {
791  get_slice_data(ctx, src, linesize[i], xp, yp,
792  pwidth, avctx->height / ctx->pictures_per_frame,
793  td->blocks[i], td->emu_buf,
794  mbs_per_slice, num_cblocks[i], is_chroma[i]);
795  } else {
796  get_alpha_data(ctx, src, linesize[i], xp, yp,
797  pwidth, avctx->height / ctx->pictures_per_frame,
798  td->blocks[i], mbs_per_slice, ctx->alpha_bits);
799  }
800  }
801 
802  for (q = min_quant; q < max_quant + 2; q++) {
803  td->nodes[trellis_node + q].prev_node = -1;
804  td->nodes[trellis_node + q].quant = q;
805  }
806 
807  // todo: maybe perform coarser quantising to fit into frame size when needed
808  for (q = min_quant; q <= max_quant; q++) {
809  bits = 0;
810  error = 0;
811  for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
812  bits += estimate_slice_plane(ctx, &error, i,
813  src, linesize[i],
814  mbs_per_slice,
815  num_cblocks[i], plane_factor[i],
816  ctx->quants[q], td);
817  }
818  if (ctx->alpha_bits)
819  bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
820  mbs_per_slice, q, td->blocks[3]);
821  if (bits > 65000 * 8) {
822  error = SCORE_LIMIT;
823  break;
824  }
825  slice_bits[q] = bits;
826  slice_score[q] = error;
827  }
828  if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
829  slice_bits[max_quant + 1] = slice_bits[max_quant];
830  slice_score[max_quant + 1] = slice_score[max_quant] + 1;
831  overquant = max_quant;
832  } else {
833  for (q = max_quant + 1; q < 128; q++) {
834  bits = 0;
835  error = 0;
836  if (q < MAX_STORED_Q) {
837  qmat = ctx->quants[q];
838  } else {
839  qmat = td->custom_q;
840  for (i = 0; i < 64; i++)
841  qmat[i] = ctx->quant_mat[i] * q;
842  }
843  for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
844  bits += estimate_slice_plane(ctx, &error, i,
845  src, linesize[i],
846  mbs_per_slice,
847  num_cblocks[i], plane_factor[i],
848  qmat, td);
849  }
850  if (ctx->alpha_bits)
851  bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
852  mbs_per_slice, q, td->blocks[3]);
853  if (bits <= ctx->bits_per_mb * mbs_per_slice)
854  break;
855  }
856 
857  slice_bits[max_quant + 1] = bits;
858  slice_score[max_quant + 1] = error;
859  overquant = q;
860  }
861  td->nodes[trellis_node + max_quant + 1].quant = overquant;
862 
863  bits_limit = mbs * ctx->bits_per_mb;
864  for (pq = min_quant; pq < max_quant + 2; pq++) {
865  prev = trellis_node - TRELLIS_WIDTH + pq;
866 
867  for (q = min_quant; q < max_quant + 2; q++) {
868  cur = trellis_node + q;
869 
870  bits = td->nodes[prev].bits + slice_bits[q];
871  error = slice_score[q];
872  if (bits > bits_limit)
873  error = SCORE_LIMIT;
874 
875  if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
876  new_score = td->nodes[prev].score + error;
877  else
878  new_score = SCORE_LIMIT;
879  if (td->nodes[cur].prev_node == -1 ||
880  td->nodes[cur].score >= new_score) {
881 
882  td->nodes[cur].bits = bits;
883  td->nodes[cur].score = new_score;
884  td->nodes[cur].prev_node = prev;
885  }
886  }
887  }
888 
889  error = td->nodes[trellis_node + min_quant].score;
890  pq = trellis_node + min_quant;
891  for (q = min_quant + 1; q < max_quant + 2; q++) {
892  if (td->nodes[trellis_node + q].score <= error) {
893  error = td->nodes[trellis_node + q].score;
894  pq = trellis_node + q;
895  }
896  }
897 
898  return pq;
899 }
900 
901 static int find_quant_thread(AVCodecContext *avctx, void *arg,
902  int jobnr, int threadnr)
903 {
904  ProresContext *ctx = avctx->priv_data;
905  ProresThreadData *td = ctx->tdata + threadnr;
906  int mbs_per_slice = ctx->mbs_per_slice;
907  int x, y = jobnr, mb, q = 0;
908 
909  for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
910  while (ctx->mb_width - x < mbs_per_slice)
911  mbs_per_slice >>= 1;
912  q = find_slice_quant(avctx, avctx->coded_frame,
913  (mb + 1) * TRELLIS_WIDTH, x, y,
914  mbs_per_slice, td);
915  }
916 
917  for (x = ctx->slices_width - 1; x >= 0; x--) {
918  ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
919  q = td->nodes[q].prev_node;
920  }
921 
922  return 0;
923 }
924 
926  const AVFrame *pic, int *got_packet)
927 {
928  ProresContext *ctx = avctx->priv_data;
929  uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
930  uint8_t *picture_size_pos;
931  PutBitContext pb;
932  int x, y, i, mb, q = 0;
933  int sizes[4] = { 0 };
934  int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
935  int frame_size, picture_size, slice_size;
936  int pkt_size, ret;
937  uint8_t frame_flags;
938 
939  *avctx->coded_frame = *pic;
941  avctx->coded_frame->key_frame = 1;
942 
943  pkt_size = ctx->frame_size_upper_bound + FF_MIN_BUFFER_SIZE;
944 
945  if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
946  return ret;
947 
948  orig_buf = pkt->data;
949 
950  // frame atom
951  orig_buf += 4; // frame size
952  bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
953  buf = orig_buf;
954 
955  // frame header
956  tmp = buf;
957  buf += 2; // frame header size will be stored here
958  bytestream_put_be16 (&buf, 0); // version 1
959  bytestream_put_buffer(&buf, ctx->vendor, 4);
960  bytestream_put_be16 (&buf, avctx->width);
961  bytestream_put_be16 (&buf, avctx->height);
962 
963  frame_flags = ctx->chroma_factor << 6;
964  if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
965  frame_flags |= pic->top_field_first ? 0x04 : 0x08;
966  bytestream_put_byte (&buf, frame_flags);
967 
968  bytestream_put_byte (&buf, 0); // reserved
969  bytestream_put_byte (&buf, avctx->color_primaries);
970  bytestream_put_byte (&buf, avctx->color_trc);
971  bytestream_put_byte (&buf, avctx->colorspace);
972  bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
973  bytestream_put_byte (&buf, 0); // reserved
974  if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
975  bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
976  // luma quantisation matrix
977  for (i = 0; i < 64; i++)
978  bytestream_put_byte(&buf, ctx->quant_mat[i]);
979  // chroma quantisation matrix
980  for (i = 0; i < 64; i++)
981  bytestream_put_byte(&buf, ctx->quant_mat[i]);
982  } else {
983  bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
984  }
985  bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
986 
987  for (ctx->cur_picture_idx = 0;
989  ctx->cur_picture_idx++) {
990  // picture header
991  picture_size_pos = buf + 1;
992  bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
993  buf += 4; // picture data size will be stored here
994  bytestream_put_be16 (&buf, ctx->slices_per_picture);
995  bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
996 
997  // seek table - will be filled during slice encoding
998  slice_sizes = buf;
999  buf += ctx->slices_per_picture * 2;
1000 
1001  // slices
1002  if (!ctx->force_quant) {
1003  ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
1004  ctx->mb_height);
1005  if (ret)
1006  return ret;
1007  }
1008 
1009  for (y = 0; y < ctx->mb_height; y++) {
1010  int mbs_per_slice = ctx->mbs_per_slice;
1011  for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1012  q = ctx->force_quant ? ctx->force_quant
1013  : ctx->slice_q[mb + y * ctx->slices_width];
1014 
1015  while (ctx->mb_width - x < mbs_per_slice)
1016  mbs_per_slice >>= 1;
1017 
1018  bytestream_put_byte(&buf, slice_hdr_size << 3);
1019  slice_hdr = buf;
1020  buf += slice_hdr_size - 1;
1021  init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
1022  encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
1023 
1024  bytestream_put_byte(&slice_hdr, q);
1025  slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1026  for (i = 0; i < ctx->num_planes - 1; i++) {
1027  bytestream_put_be16(&slice_hdr, sizes[i]);
1028  slice_size += sizes[i];
1029  }
1030  bytestream_put_be16(&slice_sizes, slice_size);
1031  buf += slice_size - slice_hdr_size;
1032  }
1033  }
1034 
1035  picture_size = buf - (picture_size_pos - 1);
1036  bytestream_put_be32(&picture_size_pos, picture_size);
1037  }
1038 
1039  orig_buf -= 8;
1040  frame_size = buf - orig_buf;
1041  bytestream_put_be32(&orig_buf, frame_size);
1042 
1043  pkt->size = frame_size;
1044  pkt->flags |= AV_PKT_FLAG_KEY;
1045  *got_packet = 1;
1046 
1047  return 0;
1048 }
1049 
1051 {
1052  ProresContext *ctx = avctx->priv_data;
1053  int i;
1054 
1055  av_freep(&avctx->coded_frame);
1056 
1057  if (ctx->tdata) {
1058  for (i = 0; i < avctx->thread_count; i++)
1059  av_free(ctx->tdata[i].nodes);
1060  }
1061  av_freep(&ctx->tdata);
1062  av_freep(&ctx->slice_q);
1063 
1064  return 0;
1065 }
1066 
1068 {
1069  ProresContext *ctx = avctx->priv_data;
1070  int mps;
1071  int i, j;
1072  int min_quant, max_quant;
1073  int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
1074 
1075  avctx->bits_per_raw_sample = 10;
1076  avctx->coded_frame = avcodec_alloc_frame();
1077  if (!avctx->coded_frame)
1078  return AVERROR(ENOMEM);
1079 
1080  ff_proresdsp_init(&ctx->dsp, avctx);
1081  ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
1082  interlaced ? ff_prores_interlaced_scan
1084 
1085  mps = ctx->mbs_per_slice;
1086  if (mps & (mps - 1)) {
1087  av_log(avctx, AV_LOG_ERROR,
1088  "there should be an integer power of two MBs per slice\n");
1089  return AVERROR(EINVAL);
1090  }
1092  if (ctx->alpha_bits & 7) {
1093  av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1094  return AVERROR(EINVAL);
1095  }
1096  } else {
1097  ctx->alpha_bits = 0;
1098  }
1099 
1100  ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1101  ? CFACTOR_Y422
1102  : CFACTOR_Y444;
1104  ctx->num_planes = 3 + !!ctx->alpha_bits;
1105 
1106  ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1107 
1108  if (interlaced)
1109  ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1110  else
1111  ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1112 
1113  ctx->slices_width = ctx->mb_width / mps;
1114  ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1115  ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1116  ctx->pictures_per_frame = 1 + interlaced;
1117 
1118  if (ctx->quant_sel == -1)
1120  else
1122 
1123  if (strlen(ctx->vendor) != 4) {
1124  av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1125  return AVERROR_INVALIDDATA;
1126  }
1127 
1128  ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1129  if (!ctx->force_quant) {
1130  if (!ctx->bits_per_mb) {
1131  for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1132  if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1133  ctx->pictures_per_frame)
1134  break;
1135  ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1136  } else if (ctx->bits_per_mb < 128) {
1137  av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1138  return AVERROR_INVALIDDATA;
1139  }
1140 
1141  min_quant = ctx->profile_info->min_quant;
1142  max_quant = ctx->profile_info->max_quant;
1143  for (i = min_quant; i < MAX_STORED_Q; i++) {
1144  for (j = 0; j < 64; j++)
1145  ctx->quants[i][j] = ctx->quant_mat[j] * i;
1146  }
1147 
1148  ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1149  if (!ctx->slice_q) {
1150  encode_close(avctx);
1151  return AVERROR(ENOMEM);
1152  }
1153 
1154  ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1155  if (!ctx->tdata) {
1156  encode_close(avctx);
1157  return AVERROR(ENOMEM);
1158  }
1159 
1160  for (j = 0; j < avctx->thread_count; j++) {
1161  ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1162  * TRELLIS_WIDTH
1163  * sizeof(*ctx->tdata->nodes));
1164  if (!ctx->tdata[j].nodes) {
1165  encode_close(avctx);
1166  return AVERROR(ENOMEM);
1167  }
1168  for (i = min_quant; i < max_quant + 2; i++) {
1169  ctx->tdata[j].nodes[i].prev_node = -1;
1170  ctx->tdata[j].nodes[i].bits = 0;
1171  ctx->tdata[j].nodes[i].score = 0;
1172  }
1173  }
1174  } else {
1175  int ls = 0;
1176 
1177  if (ctx->force_quant > 64) {
1178  av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1179  return AVERROR_INVALIDDATA;
1180  }
1181 
1182  for (j = 0; j < 64; j++) {
1183  ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1184  ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1185  }
1186 
1187  ctx->bits_per_mb = ls * 8;
1188  if (ctx->chroma_factor == CFACTOR_Y444)
1189  ctx->bits_per_mb += ls * 4;
1190  if (ctx->num_planes == 4)
1191  ctx->bits_per_mb += ls * 4;
1192  }
1193 
1195  ctx->slices_per_picture *
1196  (2 + 2 * ctx->num_planes +
1197  (mps * ctx->bits_per_mb) / 8)
1198  + 200;
1199 
1200  avctx->codec_tag = ctx->profile_info->tag;
1201 
1202  av_log(avctx, AV_LOG_DEBUG,
1203  "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1204  ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1205  interlaced ? "yes" : "no", ctx->bits_per_mb);
1206  av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1207  ctx->frame_size_upper_bound);
1208 
1209  return 0;
1210 }
1211 
1212 #define OFFSET(x) offsetof(ProresContext, x)
1213 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1214 
1215 static const AVOption options[] = {
1216  { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1217  AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1218  { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1219  { .i64 = PRORES_PROFILE_STANDARD },
1221  { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1222  0, 0, VE, "profile" },
1223  { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1224  0, 0, VE, "profile" },
1225  { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1226  0, 0, VE, "profile" },
1227  { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1228  0, 0, VE, "profile" },
1229  { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1230  0, 0, VE, "profile" },
1231  { "vendor", "vendor ID", OFFSET(vendor),
1232  AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1233  { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1234  AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1235  { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1236  { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1237  { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1238  0, 0, VE, "quant_mat" },
1239  { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1240  0, 0, VE, "quant_mat" },
1241  { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1242  0, 0, VE, "quant_mat" },
1243  { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1244  0, 0, VE, "quant_mat" },
1245  { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1246  0, 0, VE, "quant_mat" },
1247  { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1248  0, 0, VE, "quant_mat" },
1249  { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1250  { .i64 = 16 }, 0, 16, VE },
1251  { NULL }
1252 };
1253 
1254 static const AVClass proresenc_class = {
1255  .class_name = "ProRes encoder",
1256  .item_name = av_default_item_name,
1257  .option = options,
1258  .version = LIBAVUTIL_VERSION_INT,
1259 };
1260 
1262  .name = "prores_ks",
1263  .type = AVMEDIA_TYPE_VIDEO,
1264  .id = AV_CODEC_ID_PRORES,
1265  .priv_data_size = sizeof(ProresContext),
1266  .init = encode_init,
1267  .close = encode_close,
1268  .encode2 = encode_frame,
1269  .capabilities = CODEC_CAP_SLICE_THREADS,
1270  .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1271  .pix_fmts = (const enum AVPixelFormat[]) {
1274  },
1275  .priv_class = &proresenc_class,
1276 };