00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #define RC_VARIANCE 1 // use variance or ssd for fast rc
00026
00027 #include "libavutil/opt.h"
00028 #include "avcodec.h"
00029 #include "dsputil.h"
00030 #include "mpegvideo.h"
00031 #include "dnxhdenc.h"
00032
00033 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
00034
00035 static const AVOption options[]={
00036 {"nitris_compat", "encode with Avid Nitris compatibility", offsetof(DNXHDEncContext, nitris_compat), FF_OPT_TYPE_INT, {.dbl = 0}, 0, 1, VE},
00037 {NULL}
00038 };
00039 static const AVClass class = { "dnxhd", av_default_item_name, options, LIBAVUTIL_VERSION_INT };
00040
00041 int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
00042
00043 #define LAMBDA_FRAC_BITS 10
00044
00045 static av_always_inline void dnxhd_get_pixels_8x4(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
00046 {
00047 int i;
00048 for (i = 0; i < 4; i++) {
00049 block[0] = pixels[0]; block[1] = pixels[1];
00050 block[2] = pixels[2]; block[3] = pixels[3];
00051 block[4] = pixels[4]; block[5] = pixels[5];
00052 block[6] = pixels[6]; block[7] = pixels[7];
00053 pixels += line_size;
00054 block += 8;
00055 }
00056 memcpy(block , block- 8, sizeof(*block)*8);
00057 memcpy(block+ 8, block-16, sizeof(*block)*8);
00058 memcpy(block+16, block-24, sizeof(*block)*8);
00059 memcpy(block+24, block-32, sizeof(*block)*8);
00060 }
00061
00062 static int dnxhd_init_vlc(DNXHDEncContext *ctx)
00063 {
00064 int i, j, level, run;
00065 int max_level = 1<<(ctx->cid_table->bit_depth+2);
00066
00067 FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->vlc_codes, max_level*4*sizeof(*ctx->vlc_codes), fail);
00068 FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->vlc_bits , max_level*4*sizeof(*ctx->vlc_bits ), fail);
00069 FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->run_codes, 63*2 , fail);
00070 FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->run_bits , 63 , fail);
00071
00072 ctx->vlc_codes += max_level*2;
00073 ctx->vlc_bits += max_level*2;
00074 for (level = -max_level; level < max_level; level++) {
00075 for (run = 0; run < 2; run++) {
00076 int index = (level<<1)|run;
00077 int sign, offset = 0, alevel = level;
00078
00079 MASK_ABS(sign, alevel);
00080 if (alevel > 64) {
00081 offset = (alevel-1)>>6;
00082 alevel -= offset<<6;
00083 }
00084 for (j = 0; j < 257; j++) {
00085 if (ctx->cid_table->ac_level[j] == alevel &&
00086 (!offset || (ctx->cid_table->ac_index_flag[j] && offset)) &&
00087 (!run || (ctx->cid_table->ac_run_flag [j] && run))) {
00088 assert(!ctx->vlc_codes[index]);
00089 if (alevel) {
00090 ctx->vlc_codes[index] = (ctx->cid_table->ac_codes[j]<<1)|(sign&1);
00091 ctx->vlc_bits [index] = ctx->cid_table->ac_bits[j]+1;
00092 } else {
00093 ctx->vlc_codes[index] = ctx->cid_table->ac_codes[j];
00094 ctx->vlc_bits [index] = ctx->cid_table->ac_bits [j];
00095 }
00096 break;
00097 }
00098 }
00099 assert(!alevel || j < 257);
00100 if (offset) {
00101 ctx->vlc_codes[index] = (ctx->vlc_codes[index]<<ctx->cid_table->index_bits)|offset;
00102 ctx->vlc_bits [index]+= ctx->cid_table->index_bits;
00103 }
00104 }
00105 }
00106 for (i = 0; i < 62; i++) {
00107 int run = ctx->cid_table->run[i];
00108 assert(run < 63);
00109 ctx->run_codes[run] = ctx->cid_table->run_codes[i];
00110 ctx->run_bits [run] = ctx->cid_table->run_bits[i];
00111 }
00112 return 0;
00113 fail:
00114 return -1;
00115 }
00116
00117 static int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias)
00118 {
00119
00120 uint16_t weight_matrix[64] = {1,};
00121 int qscale, i;
00122
00123 FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l, (ctx->m.avctx->qmax+1) * 64 * sizeof(int) , fail);
00124 FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c, (ctx->m.avctx->qmax+1) * 64 * sizeof(int) , fail);
00125 FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t), fail);
00126 FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t), fail);
00127
00128 for (i = 1; i < 64; i++) {
00129 int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
00130 weight_matrix[j] = ctx->cid_table->luma_weight[i];
00131 }
00132 ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_l, ctx->qmatrix_l16, weight_matrix,
00133 ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
00134 for (i = 1; i < 64; i++) {
00135 int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
00136 weight_matrix[j] = ctx->cid_table->chroma_weight[i];
00137 }
00138 ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_c, ctx->qmatrix_c16, weight_matrix,
00139 ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
00140 for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
00141 for (i = 0; i < 64; i++) {
00142 ctx->qmatrix_l [qscale] [i] <<= 2; ctx->qmatrix_c [qscale] [i] <<= 2;
00143 ctx->qmatrix_l16[qscale][0][i] <<= 2; ctx->qmatrix_l16[qscale][1][i] <<= 2;
00144 ctx->qmatrix_c16[qscale][0][i] <<= 2; ctx->qmatrix_c16[qscale][1][i] <<= 2;
00145 }
00146 }
00147 return 0;
00148 fail:
00149 return -1;
00150 }
00151
00152 static int dnxhd_init_rc(DNXHDEncContext *ctx)
00153 {
00154 FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_rc, 8160*ctx->m.avctx->qmax*sizeof(RCEntry), fail);
00155 if (ctx->m.avctx->mb_decision != FF_MB_DECISION_RD)
00156 FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_cmp, ctx->m.mb_num*sizeof(RCCMPEntry), fail);
00157
00158 ctx->frame_bits = (ctx->cid_table->coding_unit_size - 640 - 4 - ctx->min_padding) * 8;
00159 ctx->qscale = 1;
00160 ctx->lambda = 2<<LAMBDA_FRAC_BITS;
00161 return 0;
00162 fail:
00163 return -1;
00164 }
00165
00166 static int dnxhd_encode_init(AVCodecContext *avctx)
00167 {
00168 DNXHDEncContext *ctx = avctx->priv_data;
00169 int i, index;
00170
00171 ctx->cid = ff_dnxhd_find_cid(avctx);
00172 if (!ctx->cid || avctx->pix_fmt != PIX_FMT_YUV422P) {
00173 av_log(avctx, AV_LOG_ERROR, "video parameters incompatible with DNxHD\n");
00174 return -1;
00175 }
00176 av_log(avctx, AV_LOG_DEBUG, "cid %d\n", ctx->cid);
00177
00178 index = ff_dnxhd_get_cid_table(ctx->cid);
00179 ctx->cid_table = &ff_dnxhd_cid_table[index];
00180
00181 ctx->m.avctx = avctx;
00182 ctx->m.mb_intra = 1;
00183 ctx->m.h263_aic = 1;
00184
00185 ctx->get_pixels_8x4_sym = dnxhd_get_pixels_8x4;
00186
00187 dsputil_init(&ctx->m.dsp, avctx);
00188 ff_dct_common_init(&ctx->m);
00189 #if HAVE_MMX
00190 ff_dnxhd_init_mmx(ctx);
00191 #endif
00192 if (!ctx->m.dct_quantize)
00193 ctx->m.dct_quantize = dct_quantize_c;
00194
00195 ctx->m.mb_height = (avctx->height + 15) / 16;
00196 ctx->m.mb_width = (avctx->width + 15) / 16;
00197
00198 if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
00199 ctx->interlaced = 1;
00200 ctx->m.mb_height /= 2;
00201 }
00202
00203 ctx->m.mb_num = ctx->m.mb_height * ctx->m.mb_width;
00204
00205 if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
00206 ctx->m.intra_quant_bias = avctx->intra_quant_bias;
00207 if (dnxhd_init_qmat(ctx, ctx->m.intra_quant_bias, 0) < 0)
00208 return -1;
00209
00210
00211 if (ctx->nitris_compat)
00212 ctx->min_padding = 1600;
00213
00214 if (dnxhd_init_vlc(ctx) < 0)
00215 return -1;
00216 if (dnxhd_init_rc(ctx) < 0)
00217 return -1;
00218
00219 FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->slice_size, ctx->m.mb_height*sizeof(uint32_t), fail);
00220 FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->slice_offs, ctx->m.mb_height*sizeof(uint32_t), fail);
00221 FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_bits, ctx->m.mb_num *sizeof(uint16_t), fail);
00222 FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_qscale, ctx->m.mb_num *sizeof(uint8_t) , fail);
00223
00224 ctx->frame.key_frame = 1;
00225 ctx->frame.pict_type = AV_PICTURE_TYPE_I;
00226 ctx->m.avctx->coded_frame = &ctx->frame;
00227
00228 if (avctx->thread_count > MAX_THREADS) {
00229 av_log(avctx, AV_LOG_ERROR, "too many threads\n");
00230 return -1;
00231 }
00232
00233 ctx->thread[0] = ctx;
00234 for (i = 1; i < avctx->thread_count; i++) {
00235 ctx->thread[i] = av_malloc(sizeof(DNXHDEncContext));
00236 memcpy(ctx->thread[i], ctx, sizeof(DNXHDEncContext));
00237 }
00238
00239 return 0;
00240 fail:
00241 return -1;
00242 }
00243
00244 static int dnxhd_write_header(AVCodecContext *avctx, uint8_t *buf)
00245 {
00246 DNXHDEncContext *ctx = avctx->priv_data;
00247 const uint8_t header_prefix[5] = { 0x00,0x00,0x02,0x80,0x01 };
00248
00249 memset(buf, 0, 640);
00250
00251 memcpy(buf, header_prefix, 5);
00252 buf[5] = ctx->interlaced ? ctx->cur_field+2 : 0x01;
00253 buf[6] = 0x80;
00254 buf[7] = 0xa0;
00255 AV_WB16(buf + 0x18, avctx->height>>ctx->interlaced);
00256 AV_WB16(buf + 0x1a, avctx->width);
00257 AV_WB16(buf + 0x1d, avctx->height>>ctx->interlaced);
00258
00259 buf[0x21] = 0x38;
00260 buf[0x22] = 0x88 + (ctx->interlaced<<2);
00261 AV_WB32(buf + 0x28, ctx->cid);
00262 buf[0x2c] = ctx->interlaced ? 0 : 0x80;
00263
00264 buf[0x5f] = 0x01;
00265
00266 buf[0x167] = 0x02;
00267 AV_WB16(buf + 0x16a, ctx->m.mb_height * 4 + 4);
00268 buf[0x16d] = ctx->m.mb_height;
00269 buf[0x16f] = 0x10;
00270
00271 ctx->msip = buf + 0x170;
00272 return 0;
00273 }
00274
00275 static av_always_inline void dnxhd_encode_dc(DNXHDEncContext *ctx, int diff)
00276 {
00277 int nbits;
00278 if (diff < 0) {
00279 nbits = av_log2_16bit(-2*diff);
00280 diff--;
00281 } else {
00282 nbits = av_log2_16bit(2*diff);
00283 }
00284 put_bits(&ctx->m.pb, ctx->cid_table->dc_bits[nbits] + nbits,
00285 (ctx->cid_table->dc_codes[nbits]<<nbits) + (diff & ((1 << nbits) - 1)));
00286 }
00287
00288 static av_always_inline void dnxhd_encode_block(DNXHDEncContext *ctx, DCTELEM *block, int last_index, int n)
00289 {
00290 int last_non_zero = 0;
00291 int slevel, i, j;
00292
00293 dnxhd_encode_dc(ctx, block[0] - ctx->m.last_dc[n]);
00294 ctx->m.last_dc[n] = block[0];
00295
00296 for (i = 1; i <= last_index; i++) {
00297 j = ctx->m.intra_scantable.permutated[i];
00298 slevel = block[j];
00299 if (slevel) {
00300 int run_level = i - last_non_zero - 1;
00301 int rlevel = (slevel<<1)|!!run_level;
00302 put_bits(&ctx->m.pb, ctx->vlc_bits[rlevel], ctx->vlc_codes[rlevel]);
00303 if (run_level)
00304 put_bits(&ctx->m.pb, ctx->run_bits[run_level], ctx->run_codes[run_level]);
00305 last_non_zero = i;
00306 }
00307 }
00308 put_bits(&ctx->m.pb, ctx->vlc_bits[0], ctx->vlc_codes[0]);
00309 }
00310
00311 static av_always_inline void dnxhd_unquantize_c(DNXHDEncContext *ctx, DCTELEM *block, int n, int qscale, int last_index)
00312 {
00313 const uint8_t *weight_matrix;
00314 int level;
00315 int i;
00316
00317 weight_matrix = (n&2) ? ctx->cid_table->chroma_weight : ctx->cid_table->luma_weight;
00318
00319 for (i = 1; i <= last_index; i++) {
00320 int j = ctx->m.intra_scantable.permutated[i];
00321 level = block[j];
00322 if (level) {
00323 if (level < 0) {
00324 level = (1-2*level) * qscale * weight_matrix[i];
00325 if (weight_matrix[i] != 32)
00326 level += 32;
00327 level >>= 6;
00328 level = -level;
00329 } else {
00330 level = (2*level+1) * qscale * weight_matrix[i];
00331 if (weight_matrix[i] != 32)
00332 level += 32;
00333 level >>= 6;
00334 }
00335 block[j] = level;
00336 }
00337 }
00338 }
00339
00340 static av_always_inline int dnxhd_ssd_block(DCTELEM *qblock, DCTELEM *block)
00341 {
00342 int score = 0;
00343 int i;
00344 for (i = 0; i < 64; i++)
00345 score += (block[i]-qblock[i])*(block[i]-qblock[i]);
00346 return score;
00347 }
00348
00349 static av_always_inline int dnxhd_calc_ac_bits(DNXHDEncContext *ctx, DCTELEM *block, int last_index)
00350 {
00351 int last_non_zero = 0;
00352 int bits = 0;
00353 int i, j, level;
00354 for (i = 1; i <= last_index; i++) {
00355 j = ctx->m.intra_scantable.permutated[i];
00356 level = block[j];
00357 if (level) {
00358 int run_level = i - last_non_zero - 1;
00359 bits += ctx->vlc_bits[(level<<1)|!!run_level]+ctx->run_bits[run_level];
00360 last_non_zero = i;
00361 }
00362 }
00363 return bits;
00364 }
00365
00366 static av_always_inline void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
00367 {
00368 const uint8_t *ptr_y = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize) + (mb_x << 4);
00369 const uint8_t *ptr_u = ctx->thread[0]->src[1] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3);
00370 const uint8_t *ptr_v = ctx->thread[0]->src[2] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3);
00371 DSPContext *dsp = &ctx->m.dsp;
00372
00373 dsp->get_pixels(ctx->blocks[0], ptr_y , ctx->m.linesize);
00374 dsp->get_pixels(ctx->blocks[1], ptr_y + 8, ctx->m.linesize);
00375 dsp->get_pixels(ctx->blocks[2], ptr_u , ctx->m.uvlinesize);
00376 dsp->get_pixels(ctx->blocks[3], ptr_v , ctx->m.uvlinesize);
00377
00378 if (mb_y+1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) {
00379 if (ctx->interlaced) {
00380 ctx->get_pixels_8x4_sym(ctx->blocks[4], ptr_y + ctx->dct_y_offset , ctx->m.linesize);
00381 ctx->get_pixels_8x4_sym(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
00382 ctx->get_pixels_8x4_sym(ctx->blocks[6], ptr_u + ctx->dct_uv_offset , ctx->m.uvlinesize);
00383 ctx->get_pixels_8x4_sym(ctx->blocks[7], ptr_v + ctx->dct_uv_offset , ctx->m.uvlinesize);
00384 } else {
00385 dsp->clear_block(ctx->blocks[4]); dsp->clear_block(ctx->blocks[5]);
00386 dsp->clear_block(ctx->blocks[6]); dsp->clear_block(ctx->blocks[7]);
00387 }
00388 } else {
00389 dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset , ctx->m.linesize);
00390 dsp->get_pixels(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
00391 dsp->get_pixels(ctx->blocks[6], ptr_u + ctx->dct_uv_offset , ctx->m.uvlinesize);
00392 dsp->get_pixels(ctx->blocks[7], ptr_v + ctx->dct_uv_offset , ctx->m.uvlinesize);
00393 }
00394 }
00395
00396 static av_always_inline int dnxhd_switch_matrix(DNXHDEncContext *ctx, int i)
00397 {
00398 if (i&2) {
00399 ctx->m.q_intra_matrix16 = ctx->qmatrix_c16;
00400 ctx->m.q_intra_matrix = ctx->qmatrix_c;
00401 return 1 + (i&1);
00402 } else {
00403 ctx->m.q_intra_matrix16 = ctx->qmatrix_l16;
00404 ctx->m.q_intra_matrix = ctx->qmatrix_l;
00405 return 0;
00406 }
00407 }
00408
00409 static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg, int jobnr, int threadnr)
00410 {
00411 DNXHDEncContext *ctx = avctx->priv_data;
00412 int mb_y = jobnr, mb_x;
00413 int qscale = ctx->qscale;
00414 LOCAL_ALIGNED_16(DCTELEM, block, [64]);
00415 ctx = ctx->thread[threadnr];
00416
00417 ctx->m.last_dc[0] =
00418 ctx->m.last_dc[1] =
00419 ctx->m.last_dc[2] = 1024;
00420
00421 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
00422 unsigned mb = mb_y * ctx->m.mb_width + mb_x;
00423 int ssd = 0;
00424 int ac_bits = 0;
00425 int dc_bits = 0;
00426 int i;
00427
00428 dnxhd_get_blocks(ctx, mb_x, mb_y);
00429
00430 for (i = 0; i < 8; i++) {
00431 DCTELEM *src_block = ctx->blocks[i];
00432 int overflow, nbits, diff, last_index;
00433 int n = dnxhd_switch_matrix(ctx, i);
00434
00435 memcpy(block, src_block, 64*sizeof(*block));
00436 last_index = ctx->m.dct_quantize(&ctx->m, block, i, qscale, &overflow);
00437 ac_bits += dnxhd_calc_ac_bits(ctx, block, last_index);
00438
00439 diff = block[0] - ctx->m.last_dc[n];
00440 if (diff < 0) nbits = av_log2_16bit(-2*diff);
00441 else nbits = av_log2_16bit( 2*diff);
00442 dc_bits += ctx->cid_table->dc_bits[nbits] + nbits;
00443
00444 ctx->m.last_dc[n] = block[0];
00445
00446 if (avctx->mb_decision == FF_MB_DECISION_RD || !RC_VARIANCE) {
00447 dnxhd_unquantize_c(ctx, block, i, qscale, last_index);
00448 ctx->m.dsp.idct(block);
00449 ssd += dnxhd_ssd_block(block, src_block);
00450 }
00451 }
00452 ctx->mb_rc[qscale][mb].ssd = ssd;
00453 ctx->mb_rc[qscale][mb].bits = ac_bits+dc_bits+12+8*ctx->vlc_bits[0];
00454 }
00455 return 0;
00456 }
00457
00458 static int dnxhd_encode_thread(AVCodecContext *avctx, void *arg, int jobnr, int threadnr)
00459 {
00460 DNXHDEncContext *ctx = avctx->priv_data;
00461 int mb_y = jobnr, mb_x;
00462 ctx = ctx->thread[threadnr];
00463 init_put_bits(&ctx->m.pb, (uint8_t *)arg + 640 + ctx->slice_offs[jobnr], ctx->slice_size[jobnr]);
00464
00465 ctx->m.last_dc[0] =
00466 ctx->m.last_dc[1] =
00467 ctx->m.last_dc[2] = 1024;
00468 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
00469 unsigned mb = mb_y * ctx->m.mb_width + mb_x;
00470 int qscale = ctx->mb_qscale[mb];
00471 int i;
00472
00473 put_bits(&ctx->m.pb, 12, qscale<<1);
00474
00475 dnxhd_get_blocks(ctx, mb_x, mb_y);
00476
00477 for (i = 0; i < 8; i++) {
00478 DCTELEM *block = ctx->blocks[i];
00479 int last_index, overflow;
00480 int n = dnxhd_switch_matrix(ctx, i);
00481 last_index = ctx->m.dct_quantize(&ctx->m, block, i, qscale, &overflow);
00482
00483 dnxhd_encode_block(ctx, block, last_index, n);
00484
00485 }
00486 }
00487 if (put_bits_count(&ctx->m.pb)&31)
00488 put_bits(&ctx->m.pb, 32-(put_bits_count(&ctx->m.pb)&31), 0);
00489 flush_put_bits(&ctx->m.pb);
00490 return 0;
00491 }
00492
00493 static void dnxhd_setup_threads_slices(DNXHDEncContext *ctx)
00494 {
00495 int mb_y, mb_x;
00496 int offset = 0;
00497 for (mb_y = 0; mb_y < ctx->m.mb_height; mb_y++) {
00498 int thread_size;
00499 ctx->slice_offs[mb_y] = offset;
00500 ctx->slice_size[mb_y] = 0;
00501 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
00502 unsigned mb = mb_y * ctx->m.mb_width + mb_x;
00503 ctx->slice_size[mb_y] += ctx->mb_bits[mb];
00504 }
00505 ctx->slice_size[mb_y] = (ctx->slice_size[mb_y]+31)&~31;
00506 ctx->slice_size[mb_y] >>= 3;
00507 thread_size = ctx->slice_size[mb_y];
00508 offset += thread_size;
00509 }
00510 }
00511
00512 static int dnxhd_mb_var_thread(AVCodecContext *avctx, void *arg, int jobnr, int threadnr)
00513 {
00514 DNXHDEncContext *ctx = avctx->priv_data;
00515 int mb_y = jobnr, mb_x;
00516 ctx = ctx->thread[threadnr];
00517 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
00518 unsigned mb = mb_y * ctx->m.mb_width + mb_x;
00519 uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y<<4) * ctx->m.linesize) + (mb_x<<4);
00520 int sum = ctx->m.dsp.pix_sum(pix, ctx->m.linesize);
00521 int varc = (ctx->m.dsp.pix_norm1(pix, ctx->m.linesize) - (((unsigned)(sum*sum))>>8)+128)>>8;
00522 ctx->mb_cmp[mb].value = varc;
00523 ctx->mb_cmp[mb].mb = mb;
00524 }
00525 return 0;
00526 }
00527
00528 static int dnxhd_encode_rdo(AVCodecContext *avctx, DNXHDEncContext *ctx)
00529 {
00530 int lambda, up_step, down_step;
00531 int last_lower = INT_MAX, last_higher = 0;
00532 int x, y, q;
00533
00534 for (q = 1; q < avctx->qmax; q++) {
00535 ctx->qscale = q;
00536 avctx->execute2(avctx, dnxhd_calc_bits_thread, NULL, NULL, ctx->m.mb_height);
00537 }
00538 up_step = down_step = 2<<LAMBDA_FRAC_BITS;
00539 lambda = ctx->lambda;
00540
00541 for (;;) {
00542 int bits = 0;
00543 int end = 0;
00544 if (lambda == last_higher) {
00545 lambda++;
00546 end = 1;
00547 }
00548 for (y = 0; y < ctx->m.mb_height; y++) {
00549 for (x = 0; x < ctx->m.mb_width; x++) {
00550 unsigned min = UINT_MAX;
00551 int qscale = 1;
00552 int mb = y*ctx->m.mb_width+x;
00553 for (q = 1; q < avctx->qmax; q++) {
00554 unsigned score = ctx->mb_rc[q][mb].bits*lambda+(ctx->mb_rc[q][mb].ssd<<LAMBDA_FRAC_BITS);
00555 if (score < min) {
00556 min = score;
00557 qscale = q;
00558 }
00559 }
00560 bits += ctx->mb_rc[qscale][mb].bits;
00561 ctx->mb_qscale[mb] = qscale;
00562 ctx->mb_bits[mb] = ctx->mb_rc[qscale][mb].bits;
00563 }
00564 bits = (bits+31)&~31;
00565 if (bits > ctx->frame_bits)
00566 break;
00567 }
00568
00569
00570 if (end) {
00571 if (bits > ctx->frame_bits)
00572 return -1;
00573 break;
00574 }
00575 if (bits < ctx->frame_bits) {
00576 last_lower = FFMIN(lambda, last_lower);
00577 if (last_higher != 0)
00578 lambda = (lambda+last_higher)>>1;
00579 else
00580 lambda -= down_step;
00581 down_step *= 5;
00582 up_step = 1<<LAMBDA_FRAC_BITS;
00583 lambda = FFMAX(1, lambda);
00584 if (lambda == last_lower)
00585 break;
00586 } else {
00587 last_higher = FFMAX(lambda, last_higher);
00588 if (last_lower != INT_MAX)
00589 lambda = (lambda+last_lower)>>1;
00590 else if ((int64_t)lambda + up_step > INT_MAX)
00591 return -1;
00592 else
00593 lambda += up_step;
00594 up_step = FFMIN((int64_t)up_step*5, INT_MAX);
00595 down_step = 1<<LAMBDA_FRAC_BITS;
00596 }
00597 }
00598
00599 ctx->lambda = lambda;
00600 return 0;
00601 }
00602
00603 static int dnxhd_find_qscale(DNXHDEncContext *ctx)
00604 {
00605 int bits = 0;
00606 int up_step = 1;
00607 int down_step = 1;
00608 int last_higher = 0;
00609 int last_lower = INT_MAX;
00610 int qscale;
00611 int x, y;
00612
00613 qscale = ctx->qscale;
00614 for (;;) {
00615 bits = 0;
00616 ctx->qscale = qscale;
00617
00618 ctx->m.avctx->execute2(ctx->m.avctx, dnxhd_calc_bits_thread, NULL, NULL, ctx->m.mb_height);
00619 for (y = 0; y < ctx->m.mb_height; y++) {
00620 for (x = 0; x < ctx->m.mb_width; x++)
00621 bits += ctx->mb_rc[qscale][y*ctx->m.mb_width+x].bits;
00622 bits = (bits+31)&~31;
00623 if (bits > ctx->frame_bits)
00624 break;
00625 }
00626
00627
00628 if (bits < ctx->frame_bits) {
00629 if (qscale == 1)
00630 return 1;
00631 if (last_higher == qscale - 1) {
00632 qscale = last_higher;
00633 break;
00634 }
00635 last_lower = FFMIN(qscale, last_lower);
00636 if (last_higher != 0)
00637 qscale = (qscale+last_higher)>>1;
00638 else
00639 qscale -= down_step++;
00640 if (qscale < 1)
00641 qscale = 1;
00642 up_step = 1;
00643 } else {
00644 if (last_lower == qscale + 1)
00645 break;
00646 last_higher = FFMAX(qscale, last_higher);
00647 if (last_lower != INT_MAX)
00648 qscale = (qscale+last_lower)>>1;
00649 else
00650 qscale += up_step++;
00651 down_step = 1;
00652 if (qscale >= ctx->m.avctx->qmax)
00653 return -1;
00654 }
00655 }
00656
00657 ctx->qscale = qscale;
00658 return 0;
00659 }
00660
00661 #define BUCKET_BITS 8
00662 #define RADIX_PASSES 4
00663 #define NBUCKETS (1 << BUCKET_BITS)
00664
00665 static inline int get_bucket(int value, int shift)
00666 {
00667 value >>= shift;
00668 value &= NBUCKETS - 1;
00669 return NBUCKETS - 1 - value;
00670 }
00671
00672 static void radix_count(const RCCMPEntry *data, int size, int buckets[RADIX_PASSES][NBUCKETS])
00673 {
00674 int i, j;
00675 memset(buckets, 0, sizeof(buckets[0][0]) * RADIX_PASSES * NBUCKETS);
00676 for (i = 0; i < size; i++) {
00677 int v = data[i].value;
00678 for (j = 0; j < RADIX_PASSES; j++) {
00679 buckets[j][get_bucket(v, 0)]++;
00680 v >>= BUCKET_BITS;
00681 }
00682 assert(!v);
00683 }
00684 for (j = 0; j < RADIX_PASSES; j++) {
00685 int offset = size;
00686 for (i = NBUCKETS - 1; i >= 0; i--)
00687 buckets[j][i] = offset -= buckets[j][i];
00688 assert(!buckets[j][0]);
00689 }
00690 }
00691
00692 static void radix_sort_pass(RCCMPEntry *dst, const RCCMPEntry *data, int size, int buckets[NBUCKETS], int pass)
00693 {
00694 int shift = pass * BUCKET_BITS;
00695 int i;
00696 for (i = 0; i < size; i++) {
00697 int v = get_bucket(data[i].value, shift);
00698 int pos = buckets[v]++;
00699 dst[pos] = data[i];
00700 }
00701 }
00702
00703 static void radix_sort(RCCMPEntry *data, int size)
00704 {
00705 int buckets[RADIX_PASSES][NBUCKETS];
00706 RCCMPEntry *tmp = av_malloc(sizeof(*tmp) * size);
00707 radix_count(data, size, buckets);
00708 radix_sort_pass(tmp, data, size, buckets[0], 0);
00709 radix_sort_pass(data, tmp, size, buckets[1], 1);
00710 if (buckets[2][NBUCKETS - 1] || buckets[3][NBUCKETS - 1]) {
00711 radix_sort_pass(tmp, data, size, buckets[2], 2);
00712 radix_sort_pass(data, tmp, size, buckets[3], 3);
00713 }
00714 av_free(tmp);
00715 }
00716
00717 static int dnxhd_encode_fast(AVCodecContext *avctx, DNXHDEncContext *ctx)
00718 {
00719 int max_bits = 0;
00720 int ret, x, y;
00721 if ((ret = dnxhd_find_qscale(ctx)) < 0)
00722 return -1;
00723 for (y = 0; y < ctx->m.mb_height; y++) {
00724 for (x = 0; x < ctx->m.mb_width; x++) {
00725 int mb = y*ctx->m.mb_width+x;
00726 int delta_bits;
00727 ctx->mb_qscale[mb] = ctx->qscale;
00728 ctx->mb_bits[mb] = ctx->mb_rc[ctx->qscale][mb].bits;
00729 max_bits += ctx->mb_rc[ctx->qscale][mb].bits;
00730 if (!RC_VARIANCE) {
00731 delta_bits = ctx->mb_rc[ctx->qscale][mb].bits-ctx->mb_rc[ctx->qscale+1][mb].bits;
00732 ctx->mb_cmp[mb].mb = mb;
00733 ctx->mb_cmp[mb].value = delta_bits ?
00734 ((ctx->mb_rc[ctx->qscale][mb].ssd-ctx->mb_rc[ctx->qscale+1][mb].ssd)*100)/delta_bits
00735 : INT_MIN;
00736 }
00737 }
00738 max_bits += 31;
00739 }
00740 if (!ret) {
00741 if (RC_VARIANCE)
00742 avctx->execute2(avctx, dnxhd_mb_var_thread, NULL, NULL, ctx->m.mb_height);
00743 radix_sort(ctx->mb_cmp, ctx->m.mb_num);
00744 for (x = 0; x < ctx->m.mb_num && max_bits > ctx->frame_bits; x++) {
00745 int mb = ctx->mb_cmp[x].mb;
00746 max_bits -= ctx->mb_rc[ctx->qscale][mb].bits - ctx->mb_rc[ctx->qscale+1][mb].bits;
00747 ctx->mb_qscale[mb] = ctx->qscale+1;
00748 ctx->mb_bits[mb] = ctx->mb_rc[ctx->qscale+1][mb].bits;
00749 }
00750 }
00751 return 0;
00752 }
00753
00754 static void dnxhd_load_picture(DNXHDEncContext *ctx, const AVFrame *frame)
00755 {
00756 int i;
00757
00758 for (i = 0; i < 3; i++) {
00759 ctx->frame.data[i] = frame->data[i];
00760 ctx->frame.linesize[i] = frame->linesize[i];
00761 }
00762
00763 for (i = 0; i < ctx->m.avctx->thread_count; i++) {
00764 ctx->thread[i]->m.linesize = ctx->frame.linesize[0]<<ctx->interlaced;
00765 ctx->thread[i]->m.uvlinesize = ctx->frame.linesize[1]<<ctx->interlaced;
00766 ctx->thread[i]->dct_y_offset = ctx->m.linesize *8;
00767 ctx->thread[i]->dct_uv_offset = ctx->m.uvlinesize*8;
00768 }
00769
00770 ctx->frame.interlaced_frame = frame->interlaced_frame;
00771 ctx->cur_field = frame->interlaced_frame && !frame->top_field_first;
00772 }
00773
00774 static int dnxhd_encode_picture(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data)
00775 {
00776 DNXHDEncContext *ctx = avctx->priv_data;
00777 int first_field = 1;
00778 int offset, i, ret;
00779
00780 if (buf_size < ctx->cid_table->frame_size) {
00781 av_log(avctx, AV_LOG_ERROR, "output buffer is too small to compress picture\n");
00782 return -1;
00783 }
00784
00785 dnxhd_load_picture(ctx, data);
00786
00787 encode_coding_unit:
00788 for (i = 0; i < 3; i++) {
00789 ctx->src[i] = ctx->frame.data[i];
00790 if (ctx->interlaced && ctx->cur_field)
00791 ctx->src[i] += ctx->frame.linesize[i];
00792 }
00793
00794 dnxhd_write_header(avctx, buf);
00795
00796 if (avctx->mb_decision == FF_MB_DECISION_RD)
00797 ret = dnxhd_encode_rdo(avctx, ctx);
00798 else
00799 ret = dnxhd_encode_fast(avctx, ctx);
00800 if (ret < 0) {
00801 av_log(avctx, AV_LOG_ERROR,
00802 "picture could not fit ratecontrol constraints, increase qmax\n");
00803 return -1;
00804 }
00805
00806 dnxhd_setup_threads_slices(ctx);
00807
00808 offset = 0;
00809 for (i = 0; i < ctx->m.mb_height; i++) {
00810 AV_WB32(ctx->msip + i * 4, offset);
00811 offset += ctx->slice_size[i];
00812 assert(!(ctx->slice_size[i] & 3));
00813 }
00814
00815 avctx->execute2(avctx, dnxhd_encode_thread, buf, NULL, ctx->m.mb_height);
00816
00817 assert(640 + offset + 4 <= ctx->cid_table->coding_unit_size);
00818 memset(buf + 640 + offset, 0, ctx->cid_table->coding_unit_size - 4 - offset - 640);
00819
00820 AV_WB32(buf + ctx->cid_table->coding_unit_size - 4, 0x600DC0DE);
00821
00822 if (ctx->interlaced && first_field) {
00823 first_field = 0;
00824 ctx->cur_field ^= 1;
00825 buf += ctx->cid_table->coding_unit_size;
00826 buf_size -= ctx->cid_table->coding_unit_size;
00827 goto encode_coding_unit;
00828 }
00829
00830 ctx->frame.quality = ctx->qscale*FF_QP2LAMBDA;
00831
00832 return ctx->cid_table->frame_size;
00833 }
00834
00835 static int dnxhd_encode_end(AVCodecContext *avctx)
00836 {
00837 DNXHDEncContext *ctx = avctx->priv_data;
00838 int max_level = 1<<(ctx->cid_table->bit_depth+2);
00839 int i;
00840
00841 av_free(ctx->vlc_codes-max_level*2);
00842 av_free(ctx->vlc_bits -max_level*2);
00843 av_freep(&ctx->run_codes);
00844 av_freep(&ctx->run_bits);
00845
00846 av_freep(&ctx->mb_bits);
00847 av_freep(&ctx->mb_qscale);
00848 av_freep(&ctx->mb_rc);
00849 av_freep(&ctx->mb_cmp);
00850 av_freep(&ctx->slice_size);
00851 av_freep(&ctx->slice_offs);
00852
00853 av_freep(&ctx->qmatrix_c);
00854 av_freep(&ctx->qmatrix_l);
00855 av_freep(&ctx->qmatrix_c16);
00856 av_freep(&ctx->qmatrix_l16);
00857
00858 for (i = 1; i < avctx->thread_count; i++)
00859 av_freep(&ctx->thread[i]);
00860
00861 return 0;
00862 }
00863
00864 AVCodec ff_dnxhd_encoder = {
00865 "dnxhd",
00866 AVMEDIA_TYPE_VIDEO,
00867 CODEC_ID_DNXHD,
00868 sizeof(DNXHDEncContext),
00869 dnxhd_encode_init,
00870 dnxhd_encode_picture,
00871 dnxhd_encode_end,
00872 .capabilities = CODEC_CAP_SLICE_THREADS,
00873 .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUV422P, PIX_FMT_NONE},
00874 .long_name = NULL_IF_CONFIG_SMALL("VC3/DNxHD"),
00875 .priv_class = &class,
00876 };