39 int n,
int height,
int y_offset,
int list)
42 int filter_height_down = (raw_my & 3) ? 3 : 0;
43 int full_my = (raw_my >> 2) + y_offset;
44 int bottom = full_my + filter_height_down +
height;
48 return FFMAX(0, bottom);
52 int16_t refs[2][48],
int n,
53 int height,
int y_offset,
int list0,
54 int list1,
int *nrefs)
70 if (refs[0][ref_n] < 0)
72 refs[0][ref_n] =
FFMAX(refs[0][ref_n], my);
83 if (refs[1][ref_n] < 0)
85 refs[1][ref_n] =
FFMAX(refs[1][ref_n], my);
97 const int mb_xy = sl->
mb_xy;
100 int nrefs[2] = { 0 };
103 memset(refs, -1,
sizeof(refs));
123 for (i = 0; i < 4; i++) {
126 int y_offset = (i & 2) << 2;
130 IS_DIR(sub_mb_type, 0, 0),
131 IS_DIR(sub_mb_type, 0, 1),
135 IS_DIR(sub_mb_type, 0, 0),
136 IS_DIR(sub_mb_type, 0, 1),
139 IS_DIR(sub_mb_type, 0, 0),
140 IS_DIR(sub_mb_type, 0, 1),
144 IS_DIR(sub_mb_type, 0, 0),
145 IS_DIR(sub_mb_type, 0, 1),
148 IS_DIR(sub_mb_type, 0, 0),
149 IS_DIR(sub_mb_type, 0, 1),
154 for (j = 0; j < 4; j++) {
155 int sub_y_offset = y_offset + 2 * (j & 2);
157 IS_DIR(sub_mb_type, 0, 0),
158 IS_DIR(sub_mb_type, 0, 1),
165 for (list = sl->
list_count - 1; list >= 0; list--)
166 for (ref = 0; ref < 48 && nrefs[list]; ref++) {
167 int row = refs[list][
ref];
172 int pic_height = 16 * h->
mb_height >> ref_field_picture;
180 FFMIN((row >> 1) - !(row & 1),
184 FFMIN((row >> 1), pic_height - 1),
188 FFMIN(row * 2 + ref_field,
193 FFMIN(row, pic_height - 1),
197 FFMIN(row, pic_height - 1),
210 int src_x_offset,
int src_y_offset,
213 int pixel_shift,
int chroma_idc)
215 const int mx = sl->
mv_cache[list][
scan8[
n]][0] + src_x_offset * 8;
217 const int luma_xy = (mx & 3) + ((my & 3) << 2);
222 int extra_height = 0;
224 const int full_mx = mx >> 2;
225 const int full_my = my >> 2;
226 const int pic_width = 16 * h->
mb_width;
235 if (full_mx < 0 - extra_width ||
236 full_my < 0 - extra_height ||
237 full_mx + 16 > pic_width + extra_width ||
238 full_my + 16 > pic_height + extra_height) {
242 16 + 5, 16 + 5 , full_mx - 2,
243 full_my - 2, pic_width, pic_height);
255 if (chroma_idc == 3 ) {
262 full_mx - 2, full_my - 2,
263 pic_width, pic_height);
266 qpix_op[luma_xy](dest_cb, src_cb, sl->
mb_linesize);
276 full_mx - 2, full_my - 2,
277 pic_width, pic_height);
280 qpix_op[luma_xy](dest_cr, src_cr, sl->
mb_linesize);
286 ysh = 3 - (chroma_idc == 2 );
287 if (chroma_idc == 1 &&
MB_FIELD(sl)) {
290 emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1);
293 src_cb = pic->
data[1] + ((mx >> 3) * (1 << pixel_shift)) +
295 src_cr = pic->
data[2] + ((mx >> 3) * (1 << pixel_shift)) +
301 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
302 pic_width >> 1, pic_height >> (chroma_idc == 1 ));
306 height >> (chroma_idc == 1 ),
307 mx & 7, ((
unsigned)my << (chroma_idc == 2 )) & 7);
312 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
313 pic_width >> 1, pic_height >> (chroma_idc == 1 ));
316 chroma_op(dest_cr, src_cr, sl->
mb_uvlinesize, height >> (chroma_idc == 1 ),
317 mx & 7, ((
unsigned)my << (chroma_idc == 2 )) & 7);
325 int x_offset,
int y_offset,
330 int list0,
int list1,
331 int pixel_shift,
int chroma_idc)
336 dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
337 if (chroma_idc == 3 ) {
338 dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
339 dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
340 }
else if (chroma_idc == 2 ) {
341 dest_cb += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
342 dest_cr += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
344 dest_cb += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
345 dest_cr += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
347 x_offset += 8 * sl->
mb_x;
352 mc_dir_part(h, sl, ref, n, square, height, delta, 0,
353 dest_y, dest_cb, dest_cr, x_offset, y_offset,
354 qpix_op, chroma_op, pixel_shift, chroma_idc);
357 chroma_op = chroma_avg;
362 mc_dir_part(h, sl, ref, n, square, height, delta, 1,
363 dest_y, dest_cb, dest_cr, x_offset, y_offset,
364 qpix_op, chroma_op, pixel_shift, chroma_idc);
373 int x_offset,
int y_offset,
380 int list0,
int list1,
381 int pixel_shift,
int chroma_idc)
385 dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
386 if (chroma_idc == 3 ) {
388 chroma_weight_avg = luma_weight_avg;
389 chroma_weight_op = luma_weight_op;
390 dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
391 dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
392 }
else if (chroma_idc == 2 ) {
394 dest_cb += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
395 dest_cr += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
397 chroma_height = height >> 1;
398 dest_cb += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
399 dest_cr += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
401 x_offset += 8 * sl->
mb_x;
404 if (list0 && list1) {
414 dest_y, dest_cb, dest_cr,
415 x_offset, y_offset, qpix_put, chroma_put,
416 pixel_shift, chroma_idc);
418 tmp_y, tmp_cb, tmp_cr,
419 x_offset, y_offset, qpix_put, chroma_put,
420 pixel_shift, chroma_idc);
424 int weight1 = 64 - weight0;
426 height, 5, weight0, weight1, 0);
429 chroma_height, 5, weight0, weight1, 0);
431 chroma_height, 5, weight0, weight1, 0);
434 luma_weight_avg(dest_y, tmp_y, sl->
mb_linesize, height,
441 chroma_weight_avg(dest_cb, tmp_cb, sl->
mb_uvlinesize, chroma_height,
447 chroma_weight_avg(dest_cr, tmp_cr, sl->
mb_uvlinesize, chroma_height,
456 int list = list1 ? 1 : 0;
459 mc_dir_part(h, sl, ref, n, square, height, delta, list,
460 dest_y, dest_cb, dest_cr, x_offset, y_offset,
461 qpix_put, chroma_put, pixel_shift, chroma_idc);
483 int list,
int pixel_shift,
493 int off = mx * (1<< pixel_shift) +
497 if (chroma_idc == 3 ) {
501 off= ((mx>>1)+64) * (1<<pixel_shift) + ((my>>1) + (sl->
mb_x&7))*sl->
uvlinesize;
510 int linesize,
int uvlinesize,
511 int xchg,
int chroma444,
512 int simple,
int pixel_shift)
533 deblock_topleft = (sl->
mb_x > 0);
537 src_y -= linesize + 1 + pixel_shift;
538 src_cb -= uvlinesize + 1 + pixel_shift;
539 src_cr -= uvlinesize + 1 + pixel_shift;
544 #define XCHG(a, b, xchg) \
547 AV_SWAP64(b + 0, a + 0); \
548 AV_SWAP64(b + 8, a + 8); \
558 if (deblock_topleft) {
559 XCHG(top_border_m1 + (8 << pixel_shift),
560 src_y - (7 << pixel_shift), 1);
562 XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
563 XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
566 src_y + (17 << pixel_shift), 1);
570 if (deblock_topleft) {
571 XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
572 XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
574 XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
575 XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
576 XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
577 XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
579 XCHG(sl->
top_borders[top_idx][sl->
mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
580 XCHG(sl->
top_borders[top_idx][sl->
mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
583 if (deblock_topleft) {
584 XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
585 XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
587 XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1);
588 XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1);
597 if (high_bit_depth) {
606 if (high_bit_depth) {
614 int mb_type,
int simple,
615 int transform_bypass,
617 const int *block_offset,
625 block_offset += 16 * p;
628 if (transform_bypass) {
635 for (i = 0; i < 16; i += 4) {
636 uint8_t *
const ptr = dest_y + block_offset[i];
640 h->
hpc.
pred8x8l_add[dir](ptr, sl->
mb + (i * 16 + p * 256 << pixel_shift), linesize);
643 (sl-> topleft_samples_available << i) & 0x8000,
650 if (nnz == 1 &&
dctcoef_get(sl->
mb, pixel_shift, i * 16 + p * 256))
651 idct_dc_add(ptr, sl->
mb + (i * 16 + p * 256 << pixel_shift), linesize);
653 idct_add(ptr, sl->
mb + (i * 16 + p * 256 << pixel_shift), linesize);
658 if (transform_bypass) {
665 for (i = 0; i < 16; i++) {
666 uint8_t *
const ptr = dest_y + block_offset[i];
670 h->
hpc.
pred4x4_add[dir](ptr, sl->
mb + (i * 16 + p * 256 << pixel_shift), linesize);
678 if (!topright_avail) {
680 tr_high = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL;
681 topright = (
uint8_t *)&tr_high;
683 tr = ptr[3 - linesize] * 0x01010101
u;
687 topright = ptr + (4 << pixel_shift) - linesize;
694 if (nnz == 1 &&
dctcoef_get(sl->
mb, pixel_shift, i * 16 + p * 256))
695 idct_dc_add(ptr, sl->
mb + (i * 16 + p * 256 << pixel_shift), linesize);
697 idct_add(ptr, sl->
mb + (i * 16 + p * 256 << pixel_shift), linesize);
705 if (!transform_bypass)
710 static const uint8_t dc_mapping[16] = {
711 0 * 16, 1 * 16, 4 * 16, 5 * 16,
712 2 * 16, 3 * 16, 6 * 16, 7 * 16,
713 8 * 16, 9 * 16, 12 * 16, 13 * 16,
714 10 * 16, 11 * 16, 14 * 16, 15 * 16
716 for (i = 0; i < 16; i++)
718 pixel_shift, dc_mapping[i],
727 int mb_type,
int simple,
728 int transform_bypass,
730 const int *block_offset,
736 block_offset += 16 * p;
739 if (transform_bypass) {
744 sl->
mb + (p * 256 << pixel_shift),
747 for (i = 0; i < 16; i++)
751 sl->
mb + (i * 16 + p * 256 << pixel_shift),
756 sl->
mb + (p * 256 << pixel_shift),
760 }
else if (sl->
cbp & 15) {
761 if (transform_bypass) {
762 const int di =
IS_8x8DCT(mb_type) ? 4 : 1;
765 for (i = 0; i < 16; i += di)
767 idct_add(dest_y + block_offset[i],
768 sl->
mb + (i * 16 + p * 256 << pixel_shift),
773 sl->
mb + (p * 256 << pixel_shift),
778 sl->
mb + (p * 256 << pixel_shift),
800 const int mb_xy = sl->
mb_xy;
802 int is_complex = CONFIG_SMALL || sl->
is_complex ||
807 hl_decode_mb_444_complex(h, sl);
809 hl_decode_mb_444_simple_8(h, sl);
810 }
else if (is_complex) {
811 hl_decode_mb_complex(h, sl);
813 hl_decode_mb_simple_16(h, sl);
815 hl_decode_mb_simple_8(h, sl);
static void await_references(const H264Context *h, H264SliceContext *sl)
Wait until all reference frames are available for MC operations.
void(* h264_idct_add)(uint8_t *dst, int16_t *block, int stride)
void(* prefetch)(uint8_t *buf, ptrdiff_t stride, int h)
Prefetch memory into cache (if supported by hardware).
void(* pred8x8l_add[2])(uint8_t *pix, int16_t *block, ptrdiff_t stride)
int16_t mv_cache[2][5 *8][2]
Motion vector cache.
static av_always_inline void prefetch_motion(const H264Context *h, H264SliceContext *sl, int list, int pixel_shift, int chroma_idc)
unsigned int topleft_samples_available
int chroma_weight[48][2][2][2]
void(* qpel_mc_func)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void(* pred8x8l_filter_add[2])(uint8_t *pix, int16_t *block, int topleft, int topright, ptrdiff_t stride)
void ff_thread_await_progress(ThreadFrame *f, int n, int field)
Wait for earlier decoding threads to finish reference pictures.
void(* h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride)
void(* pred16x16_add[3])(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
void(* h264_idct_add16)(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
void(* emulated_edge_mc)(uint8_t *dst, const uint8_t *src, ptrdiff_t dst_linesize, ptrdiff_t src_linesize, int block_w, int block_h, int src_x, int src_y, int w, int h)
Copy a rectangular area of samples to a temporary buffer and replicate the border samples...
uint16_t sub_mb_type[4]
as a DCT coefficient is int32_t in high depth, we need to reserve twice the space.
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
int field_picture
whether or not picture was encoded in separate fields
void ff_h264_hl_decode_mb(const H264Context *h, H264SliceContext *sl)
Multithreading support functions.
#define LUMA_DC_BLOCK_INDEX
uint8_t(*[2] top_borders)[(16 *3)*2]
#define IS_DIR(a, part, list)
uint32_t(*[6] dequant4_coeff)[16]
void(* h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride)
int luma_weight[48][2][2]
void(* pred4x4[9+3+3])(uint8_t *src, const uint8_t *topright, ptrdiff_t stride)
static av_always_inline void mc_part_weighted(const H264Context *h, H264SliceContext *sl, int n, int square, int height, int delta, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, int x_offset, int y_offset, const qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, int list0, int list1, int pixel_shift, int chroma_idc)
#define AV_CODEC_FLAG_GRAY
Only decode/encode grayscale.
H.264 / AVC / MPEG-4 part10 codec.
unsigned int topright_samples_available
int chroma_log2_weight_denom
void(* h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int height, int log2_denom, int weightd, int weights, int offset)
int8_t intra4x4_pred_mode_cache[5 *8]
void(* h264_idct8_add)(uint8_t *dst, int16_t *block, int stride)
int deblocking_filter
disable_deblocking_filter_idc with 1 <-> 0
void(* h264_luma_dc_dequant_idct)(int16_t *output, int16_t *input, int qmul)
H264SEIUnregistered unregistered
static const uint8_t offset[127][2]
static const uint8_t scan8[16 *3+3]
int16_t mb_luma_dc[3][16 *2]
as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too lar...
static av_always_inline void mc_part_std(const H264Context *h, H264SliceContext *sl, int n, int square, int height, int delta, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, int x_offset, int y_offset, const qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, const qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, int list0, int list1, int pixel_shift, int chroma_idc)
uint16_t * slice_table
slice_table_base + 2*mb_stride + 1
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
GLsizei GLboolean const GLfloat * value
void(* h264_idct_dc_add)(uint8_t *dst, int16_t *block, int stride)
int luma_log2_weight_denom
static av_always_inline void hl_decode_mb_idct_luma(const H264Context *h, H264SliceContext *sl, int mb_type, int simple, int transform_bypass, int pixel_shift, const int *block_offset, int linesize, uint8_t *dest_y, int p)
void(* pred4x4_add[2])(uint8_t *pix, int16_t *block, ptrdiff_t stride)
uint8_t * edge_emu_buffer
static av_always_inline void dctcoef_set(int16_t *mb, int high_bit_depth, int index, int value)
Libavcodec external API header.
uint8_t * data
The data buffer.
int implicit_weight[48][48][2]
static av_always_inline int dctcoef_get(int16_t *mb, int high_bit_depth, int index)
void(* pred16x16[4+3+2])(uint8_t *src, ptrdiff_t stride)
void(* h264_idct8_add4)(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
uint8_t non_zero_count_cache[15 *8]
non zero coeff count cache.
int pixel_shift
0 for 8-bit H.264, 1 for high-bit-depth H.264
void(* h264_weight_func)(uint8_t *block, int stride, int height, int log2_denom, int weight, int offset)
ptrdiff_t mb_linesize
may be equal to s->linesize or s->linesize * 2, for mbaff
void(* h264_idct8_dc_add)(uint8_t *dst, int16_t *block, int stride)
static int get_lowest_part_list_y(H264SliceContext *sl, int n, int height, int y_offset, int list)
static av_always_inline void xchg_mb_border(const H264Context *h, H264SliceContext *sl, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int chroma444, int simple, int pixel_shift)
static av_always_inline void hl_decode_mb_predict_luma(const H264Context *h, H264SliceContext *sl, int mb_type, int simple, int transform_bypass, int pixel_shift, const int *block_offset, int linesize, uint8_t *dest_y, int p)
void(* h264_chroma_mc_func)(uint8_t *dst, uint8_t *src, int srcStride, int h, int x, int y)
common internal and external API header
static int ref[MAX_W *MAX_W]
static void get_lowest_part_y(const H264Context *h, H264SliceContext *sl, int16_t refs[2][48], int n, int height, int y_offset, int list0, int list1, int *nrefs)
int8_t ref_cache[2][5 *8]
void(* h264_idct_add16intra)(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext *sl, H264Ref *pic, int n, int square, int height, int delta, int list, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, int src_x_offset, int src_y_offset, const qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op, int pixel_shift, int chroma_idc)
H264Ref ref_list[2][48]
0..15: frame refs, 16..47: mbaff field refs.
uint8_t * bipred_scratchpad
void(* pred8x8l[9+3])(uint8_t *src, int topleft, int topright, ptrdiff_t stride)