FFmpeg
vf_dnn_detect.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 /**
20  * @file
21  * implementing an object detecting filter using deep learning networks.
22  */
23 
24 #include "libavutil/file_open.h"
25 #include "libavutil/mem.h"
26 #include "libavutil/opt.h"
27 #include "filters.h"
28 #include "dnn_filter_common.h"
29 #include "video.h"
30 #include "libavutil/time.h"
31 #include "libavutil/avstring.h"
33 #include "libavutil/fifo.h"
34 
35 typedef enum {
41 
42 typedef struct DnnDetectContext {
43  const AVClass *class;
45  float confidence;
47  char **labels;
50  int cell_w;
51  int cell_h;
56  char *anchors_str;
57  float *anchors;
58  int nb_anchor;
60 
61 #define OFFSET(x) offsetof(DnnDetectContext, dnnctx.x)
62 #define OFFSET2(x) offsetof(DnnDetectContext, x)
63 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
64 static const AVOption dnn_detect_options[] = {
65  { "dnn_backend", "DNN backend", OFFSET(backend_type), AV_OPT_TYPE_INT, { .i64 = DNN_OV }, INT_MIN, INT_MAX, FLAGS, .unit = "backend" },
66 #if (CONFIG_LIBTENSORFLOW == 1)
67  { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_TF }, 0, 0, FLAGS, .unit = "backend" },
68 #endif
69 #if (CONFIG_LIBOPENVINO == 1)
70  { "openvino", "openvino backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_OV }, 0, 0, FLAGS, .unit = "backend" },
71 #endif
72  { "confidence", "threshold of confidence", OFFSET2(confidence), AV_OPT_TYPE_FLOAT, { .dbl = 0.5 }, 0, 1, FLAGS},
73  { "labels", "path to labels file", OFFSET2(labels_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
74  { "model_type", "DNN detection model type", OFFSET2(model_type), AV_OPT_TYPE_INT, { .i64 = DDMT_SSD }, INT_MIN, INT_MAX, FLAGS, .unit = "model_type" },
75  { "ssd", "output shape [1, 1, N, 7]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_SSD }, 0, 0, FLAGS, .unit = "model_type" },
76  { "yolo", "output shape [1, N*Cx*Cy*DetectionBox]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV1V2 }, 0, 0, FLAGS, .unit = "model_type" },
77  { "yolov3", "outputs shape [1, N*D, Cx, Cy]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV3 }, 0, 0, FLAGS, .unit = "model_type" },
78  { "yolov4", "outputs shape [1, N*D, Cx, Cy]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV4 }, 0, 0, FLAGS, .unit = "model_type" },
79  { "cell_w", "cell width", OFFSET2(cell_w), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INTMAX_MAX, FLAGS },
80  { "cell_h", "cell height", OFFSET2(cell_h), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INTMAX_MAX, FLAGS },
81  { "nb_classes", "The number of class", OFFSET2(nb_classes), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INTMAX_MAX, FLAGS },
82  { "anchors", "anchors, splited by '&'", OFFSET2(anchors_str), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
83  { NULL }
84 };
85 
87 
88 static inline float sigmoid(float x) {
89  return 1.f / (1.f + exp(-x));
90 }
91 
92 static inline float linear(float x) {
93  return x;
94 }
95 
96 static int dnn_detect_get_label_id(int nb_classes, int cell_size, float *label_data)
97 {
98  float max_prob = 0;
99  int label_id = 0;
100  for (int i = 0; i < nb_classes; i++) {
101  if (label_data[i * cell_size] > max_prob) {
102  max_prob = label_data[i * cell_size];
103  label_id = i;
104  }
105  }
106  return label_id;
107 }
108 
109 static int dnn_detect_parse_anchors(char *anchors_str, float **anchors)
110 {
111  char *saveptr = NULL, *token;
112  float *anchors_buf;
113  int nb_anchor = 0, i = 0;
114  while(anchors_str[i] != '\0') {
115  if(anchors_str[i] == '&')
116  nb_anchor++;
117  i++;
118  }
119  nb_anchor++;
120  anchors_buf = av_mallocz(nb_anchor * sizeof(**anchors));
121  if (!anchors_buf) {
122  return 0;
123  }
124  for (int i = 0; i < nb_anchor; i++) {
125  token = av_strtok(anchors_str, "&", &saveptr);
126  if (!token) {
127  av_freep(&anchors_buf);
128  return 0;
129  }
130  anchors_buf[i] = strtof(token, NULL);
131  anchors_str = NULL;
132  }
133  *anchors = anchors_buf;
134  return nb_anchor;
135 }
136 
137 /* Calculate Intersection Over Union */
138 static float dnn_detect_IOU(AVDetectionBBox *bbox1, AVDetectionBBox *bbox2)
139 {
140  float overlapping_width = FFMIN(bbox1->x + bbox1->w, bbox2->x + bbox2->w) - FFMAX(bbox1->x, bbox2->x);
141  float overlapping_height = FFMIN(bbox1->y + bbox1->h, bbox2->y + bbox2->h) - FFMAX(bbox1->y, bbox2->y);
142  float intersection_area =
143  (overlapping_width < 0 || overlapping_height < 0) ? 0 : overlapping_height * overlapping_width;
144  float union_area = bbox1->w * bbox1->h + bbox2->w * bbox2->h - intersection_area;
145  return intersection_area / union_area;
146 }
147 
148 static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int output_index,
150 {
152  float conf_threshold = ctx->confidence;
153  int detection_boxes, box_size;
154  int cell_w = 0, cell_h = 0, scale_w = 0, scale_h = 0;
155  int nb_classes = ctx->nb_classes;
156  float *output_data = output[output_index].data;
157  float *anchors = ctx->anchors;
158  AVDetectionBBox *bbox;
159  float (*post_process_raw_data)(float x) = linear;
160  int is_NHWC = 0;
161 
162  if (ctx->model_type == DDMT_YOLOV1V2) {
163  cell_w = ctx->cell_w;
164  cell_h = ctx->cell_h;
165  scale_w = cell_w;
166  scale_h = cell_h;
167  } else {
168  if (output[output_index].dims[2] != output[output_index].dims[3] &&
169  output[output_index].dims[2] == output[output_index].dims[1]) {
170  is_NHWC = 1;
171  cell_w = output[output_index].dims[2];
172  cell_h = output[output_index].dims[1];
173  } else {
174  cell_w = output[output_index].dims[3];
175  cell_h = output[output_index].dims[2];
176  }
177  scale_w = ctx->scale_width;
178  scale_h = ctx->scale_height;
179  }
180  box_size = nb_classes + 5;
181 
182  switch (ctx->model_type) {
183  case DDMT_YOLOV1V2:
184  case DDMT_YOLOV3:
185  post_process_raw_data = linear;
186  break;
187  case DDMT_YOLOV4:
188  post_process_raw_data = sigmoid;
189  break;
190  }
191 
192  if (!cell_h || !cell_w) {
193  av_log(filter_ctx, AV_LOG_ERROR, "cell_w and cell_h are detected\n");
194  return AVERROR(EINVAL);
195  }
196 
197  if (!nb_classes) {
198  av_log(filter_ctx, AV_LOG_ERROR, "nb_classes is not set\n");
199  return AVERROR(EINVAL);
200  }
201 
202  if (!anchors) {
203  av_log(filter_ctx, AV_LOG_ERROR, "anchors is not set\n");
204  return AVERROR(EINVAL);
205  }
206 
207  if (output[output_index].dims[1] * output[output_index].dims[2] *
208  output[output_index].dims[3] % (box_size * cell_w * cell_h)) {
209  av_log(filter_ctx, AV_LOG_ERROR, "wrong cell_w, cell_h or nb_classes\n");
210  return AVERROR(EINVAL);
211  }
212  detection_boxes = output[output_index].dims[1] *
213  output[output_index].dims[2] *
214  output[output_index].dims[3] / box_size / cell_w / cell_h;
215 
216  anchors = anchors + (detection_boxes * output_index * 2);
217  /**
218  * find all candidate bbox
219  * yolo output can be reshaped to [B, N*D, Cx, Cy]
220  * Detection box 'D' has format [`x`, `y`, `h`, `w`, `box_score`, `class_no_1`, ...,]
221  **/
222  for (int box_id = 0; box_id < detection_boxes; box_id++) {
223  for (int cx = 0; cx < cell_w; cx++)
224  for (int cy = 0; cy < cell_h; cy++) {
225  float x, y, w, h, conf;
226  float *detection_boxes_data;
227  int label_id;
228 
229  if (is_NHWC) {
230  detection_boxes_data = output_data +
231  ((cy * cell_w + cx) * detection_boxes + box_id) * box_size;
232  conf = post_process_raw_data(detection_boxes_data[4]);
233  } else {
234  detection_boxes_data = output_data + box_id * box_size * cell_w * cell_h;
235  conf = post_process_raw_data(
236  detection_boxes_data[cy * cell_w + cx + 4 * cell_w * cell_h]);
237  }
238 
239  if (is_NHWC) {
240  x = post_process_raw_data(detection_boxes_data[0]);
241  y = post_process_raw_data(detection_boxes_data[1]);
242  w = detection_boxes_data[2];
243  h = detection_boxes_data[3];
244  label_id = dnn_detect_get_label_id(ctx->nb_classes, 1, detection_boxes_data + 5);
245  conf = conf * post_process_raw_data(detection_boxes_data[label_id + 5]);
246  } else {
247  x = post_process_raw_data(detection_boxes_data[cy * cell_w + cx]);
248  y = post_process_raw_data(detection_boxes_data[cy * cell_w + cx + cell_w * cell_h]);
249  w = detection_boxes_data[cy * cell_w + cx + 2 * cell_w * cell_h];
250  h = detection_boxes_data[cy * cell_w + cx + 3 * cell_w * cell_h];
251  label_id = dnn_detect_get_label_id(ctx->nb_classes, cell_w * cell_h,
252  detection_boxes_data + cy * cell_w + cx + 5 * cell_w * cell_h);
253  conf = conf * post_process_raw_data(
254  detection_boxes_data[cy * cell_w + cx + (label_id + 5) * cell_w * cell_h]);
255  }
256  if (conf < conf_threshold) {
257  continue;
258  }
259 
260  bbox = av_mallocz(sizeof(*bbox));
261  if (!bbox)
262  return AVERROR(ENOMEM);
263 
264  bbox->w = exp(w) * anchors[box_id * 2] * frame->width / scale_w;
265  bbox->h = exp(h) * anchors[box_id * 2 + 1] * frame->height / scale_h;
266  bbox->x = (cx + x) / cell_w * frame->width - bbox->w / 2;
267  bbox->y = (cy + y) / cell_h * frame->height - bbox->h / 2;
268  bbox->detect_confidence = av_make_q((int)(conf * 10000), 10000);
269  if (ctx->labels && label_id < ctx->label_count) {
270  av_strlcpy(bbox->detect_label, ctx->labels[label_id], sizeof(bbox->detect_label));
271  } else {
272  snprintf(bbox->detect_label, sizeof(bbox->detect_label), "%d", label_id);
273  }
274 
275  if (av_fifo_write(ctx->bboxes_fifo, &bbox, 1) < 0) {
276  av_freep(&bbox);
277  return AVERROR(ENOMEM);
278  }
279  bbox = NULL;
280  }
281  }
282  return 0;
283 }
284 
286 {
288  float conf_threshold = ctx->confidence;
289  AVDetectionBBox *bbox;
290  int nb_bboxes = 0;
292  if (av_fifo_can_read(ctx->bboxes_fifo) == 0) {
293  av_log(filter_ctx, AV_LOG_VERBOSE, "nothing detected in this frame.\n");
294  return 0;
295  }
296 
297  /* remove overlap bboxes */
298  for (int i = 0; i < av_fifo_can_read(ctx->bboxes_fifo); i++){
299  av_fifo_peek(ctx->bboxes_fifo, &bbox, 1, i);
300  for (int j = 0; j < av_fifo_can_read(ctx->bboxes_fifo); j++) {
301  AVDetectionBBox *overlap_bbox;
302  av_fifo_peek(ctx->bboxes_fifo, &overlap_bbox, 1, j);
303  if (!strcmp(bbox->detect_label, overlap_bbox->detect_label) &&
304  av_cmp_q(bbox->detect_confidence, overlap_bbox->detect_confidence) < 0 &&
305  dnn_detect_IOU(bbox, overlap_bbox) >= conf_threshold) {
306  bbox->classify_count = -1; // bad result
307  nb_bboxes++;
308  break;
309  }
310  }
311  }
312  nb_bboxes = av_fifo_can_read(ctx->bboxes_fifo) - nb_bboxes;
314  if (!header) {
315  av_log(filter_ctx, AV_LOG_ERROR, "failed to create side data with %d bounding boxes\n", nb_bboxes);
316  return -1;
317  }
318  av_strlcpy(header->source, ctx->dnnctx.model_filename, sizeof(header->source));
319 
320  while(av_fifo_can_read(ctx->bboxes_fifo)) {
321  AVDetectionBBox *candidate_bbox;
322  av_fifo_read(ctx->bboxes_fifo, &candidate_bbox, 1);
323 
324  if (nb_bboxes > 0 && candidate_bbox->classify_count != -1) {
325  bbox = av_get_detection_bbox(header, header->nb_bboxes - nb_bboxes);
326  memcpy(bbox, candidate_bbox, sizeof(*bbox));
327  nb_bboxes--;
328  }
329  av_freep(&candidate_bbox);
330  }
331  return 0;
332 }
333 
335 {
336  int ret = 0;
338  if (ret < 0)
339  return ret;
341  if (ret < 0)
342  return ret;
343  return 0;
344 }
345 
347  AVFilterContext *filter_ctx, int nb_outputs)
348 {
349  int ret = 0;
350  for (int i = 0; i < nb_outputs; i++) {
352  if (ret < 0)
353  return ret;
354  }
356  if (ret < 0)
357  return ret;
358  return 0;
359 }
360 
361 static int dnn_detect_post_proc_ssd(AVFrame *frame, DNNData *output, int nb_outputs,
363 {
365  float conf_threshold = ctx->confidence;
366  int proposal_count = 0;
367  int detect_size = 0;
368  float *detections = NULL, *labels = NULL;
369  int nb_bboxes = 0;
371  AVDetectionBBox *bbox;
372  int scale_w = ctx->scale_width;
373  int scale_h = ctx->scale_height;
374 
375  if (nb_outputs == 1 && output->dims[3] == 7) {
376  proposal_count = output->dims[2];
377  detect_size = output->dims[3];
378  detections = output->data;
379  } else if (nb_outputs == 2 && output[0].dims[3] == 5) {
380  proposal_count = output[0].dims[2];
381  detect_size = output[0].dims[3];
382  detections = output[0].data;
383  labels = output[1].data;
384  } else if (nb_outputs == 2 && output[1].dims[3] == 5) {
385  proposal_count = output[1].dims[2];
386  detect_size = output[1].dims[3];
387  detections = output[1].data;
388  labels = output[0].data;
389  } else {
390  av_log(filter_ctx, AV_LOG_ERROR, "Model output shape doesn't match ssd requirement.\n");
391  return AVERROR(EINVAL);
392  }
393 
394  if (proposal_count == 0)
395  return 0;
396 
397  for (int i = 0; i < proposal_count; ++i) {
398  float conf;
399  if (nb_outputs == 1)
400  conf = detections[i * detect_size + 2];
401  else
402  conf = detections[i * detect_size + 4];
403  if (conf < conf_threshold) {
404  continue;
405  }
406  nb_bboxes++;
407  }
408 
409  if (nb_bboxes == 0) {
410  av_log(filter_ctx, AV_LOG_VERBOSE, "nothing detected in this frame.\n");
411  return 0;
412  }
413 
415  if (!header) {
416  av_log(filter_ctx, AV_LOG_ERROR, "failed to create side data with %d bounding boxes\n", nb_bboxes);
417  return -1;
418  }
419 
420  av_strlcpy(header->source, ctx->dnnctx.model_filename, sizeof(header->source));
421 
422  for (int i = 0; i < proposal_count; ++i) {
423  int av_unused image_id = (int)detections[i * detect_size + 0];
424  int label_id;
425  float conf, x0, y0, x1, y1;
426 
427  if (nb_outputs == 1) {
428  label_id = (int)detections[i * detect_size + 1];
429  conf = detections[i * detect_size + 2];
430  x0 = detections[i * detect_size + 3];
431  y0 = detections[i * detect_size + 4];
432  x1 = detections[i * detect_size + 5];
433  y1 = detections[i * detect_size + 6];
434  } else {
435  label_id = (int)labels[i];
436  x0 = detections[i * detect_size] / scale_w;
437  y0 = detections[i * detect_size + 1] / scale_h;
438  x1 = detections[i * detect_size + 2] / scale_w;
439  y1 = detections[i * detect_size + 3] / scale_h;
440  conf = detections[i * detect_size + 4];
441  }
442 
443  if (conf < conf_threshold) {
444  continue;
445  }
446 
447  bbox = av_get_detection_bbox(header, header->nb_bboxes - nb_bboxes);
448  bbox->x = (int)(x0 * frame->width);
449  bbox->w = (int)(x1 * frame->width) - bbox->x;
450  bbox->y = (int)(y0 * frame->height);
451  bbox->h = (int)(y1 * frame->height) - bbox->y;
452 
453  bbox->detect_confidence = av_make_q((int)(conf * 10000), 10000);
454  bbox->classify_count = 0;
455 
456  if (ctx->labels && label_id < ctx->label_count) {
457  av_strlcpy(bbox->detect_label, ctx->labels[label_id], sizeof(bbox->detect_label));
458  } else {
459  snprintf(bbox->detect_label, sizeof(bbox->detect_label), "%d", label_id);
460  }
461 
462  nb_bboxes--;
463  if (nb_bboxes == 0) {
464  break;
465  }
466  }
467  return 0;
468 }
469 
470 static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, int nb_outputs,
472 {
473  AVFrameSideData *sd;
475  int ret = 0;
476 
478  if (sd) {
479  av_log(filter_ctx, AV_LOG_ERROR, "already have bounding boxes in side data.\n");
480  return -1;
481  }
482 
483  switch (ctx->model_type) {
484  case DDMT_SSD:
486  if (ret < 0)
487  return ret;
488  break;
489  case DDMT_YOLOV1V2:
491  if (ret < 0)
492  return ret;
493  break;
494  case DDMT_YOLOV3:
495  case DDMT_YOLOV4:
497  if (ret < 0)
498  return ret;
499  break;
500  }
501  return 0;
502 }
503 
505 {
507  int proposal_count;
508  float conf_threshold = ctx->confidence;
509  float *conf, *position, *label_id, x0, y0, x1, y1;
510  int nb_bboxes = 0;
511  AVFrameSideData *sd;
512  AVDetectionBBox *bbox;
514 
515  proposal_count = *(float *)(output[0].data);
516  conf = output[1].data;
517  position = output[3].data;
518  label_id = output[2].data;
519 
521  if (sd) {
522  av_log(filter_ctx, AV_LOG_ERROR, "already have dnn bounding boxes in side data.\n");
523  return -1;
524  }
525 
526  for (int i = 0; i < proposal_count; ++i) {
527  if (conf[i] < conf_threshold)
528  continue;
529  nb_bboxes++;
530  }
531 
532  if (nb_bboxes == 0) {
533  av_log(filter_ctx, AV_LOG_VERBOSE, "nothing detected in this frame.\n");
534  return 0;
535  }
536 
538  if (!header) {
539  av_log(filter_ctx, AV_LOG_ERROR, "failed to create side data with %d bounding boxes\n", nb_bboxes);
540  return -1;
541  }
542 
543  av_strlcpy(header->source, ctx->dnnctx.model_filename, sizeof(header->source));
544 
545  for (int i = 0; i < proposal_count; ++i) {
546  y0 = position[i * 4];
547  x0 = position[i * 4 + 1];
548  y1 = position[i * 4 + 2];
549  x1 = position[i * 4 + 3];
550 
551  bbox = av_get_detection_bbox(header, i);
552 
553  if (conf[i] < conf_threshold) {
554  continue;
555  }
556 
557  bbox->x = (int)(x0 * frame->width);
558  bbox->w = (int)(x1 * frame->width) - bbox->x;
559  bbox->y = (int)(y0 * frame->height);
560  bbox->h = (int)(y1 * frame->height) - bbox->y;
561 
562  bbox->detect_confidence = av_make_q((int)(conf[i] * 10000), 10000);
563  bbox->classify_count = 0;
564 
565  if (ctx->labels && label_id[i] < ctx->label_count) {
566  av_strlcpy(bbox->detect_label, ctx->labels[(int)label_id[i]], sizeof(bbox->detect_label));
567  } else {
568  snprintf(bbox->detect_label, sizeof(bbox->detect_label), "%d", (int)label_id[i]);
569  }
570 
571  nb_bboxes--;
572  if (nb_bboxes == 0) {
573  break;
574  }
575  }
576  return 0;
577 }
578 
580 {
582  DnnContext *dnn_ctx = &ctx->dnnctx;
583  switch (dnn_ctx->backend_type) {
584  case DNN_OV:
586  case DNN_TF:
588  default:
589  avpriv_report_missing_feature(filter_ctx, "Current dnn backend does not support detect filter\n");
590  return AVERROR(EINVAL);
591  }
592 }
593 
595 {
596  for (int i = 0; i < ctx->label_count; i++) {
597  av_freep(&ctx->labels[i]);
598  }
599  ctx->label_count = 0;
600  av_freep(&ctx->labels);
601 }
602 
604 {
605  int line_len;
606  FILE *file;
607  DnnDetectContext *ctx = context->priv;
608 
609  file = avpriv_fopen_utf8(ctx->labels_filename, "r");
610  if (!file){
611  av_log(context, AV_LOG_ERROR, "failed to open file %s\n", ctx->labels_filename);
612  return AVERROR(EINVAL);
613  }
614 
615  while (!feof(file)) {
616  char *label;
617  char buf[256];
618  if (!fgets(buf, 256, file)) {
619  break;
620  }
621 
622  line_len = strlen(buf);
623  while (line_len) {
624  int i = line_len - 1;
625  if (buf[i] == '\n' || buf[i] == '\r' || buf[i] == ' ') {
626  buf[i] = '\0';
627  line_len--;
628  } else {
629  break;
630  }
631  }
632 
633  if (line_len == 0) // empty line
634  continue;
635 
636  if (line_len >= AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE) {
637  av_log(context, AV_LOG_ERROR, "label %s too long\n", buf);
638  fclose(file);
639  return AVERROR(EINVAL);
640  }
641 
642  label = av_strdup(buf);
643  if (!label) {
644  av_log(context, AV_LOG_ERROR, "failed to allocate memory for label %s\n", buf);
645  fclose(file);
646  return AVERROR(ENOMEM);
647  }
648 
649  if (av_dynarray_add_nofree(&ctx->labels, &ctx->label_count, label) < 0) {
650  av_log(context, AV_LOG_ERROR, "failed to do av_dynarray_add\n");
651  fclose(file);
652  av_freep(&label);
653  return AVERROR(ENOMEM);
654  }
655  }
656 
657  fclose(file);
658  return 0;
659 }
660 
661 static int check_output_nb(DnnDetectContext *ctx, DNNBackendType backend_type, int output_nb)
662 {
663  switch(backend_type) {
664  case DNN_TF:
665  if (output_nb != 4) {
666  av_log(ctx, AV_LOG_ERROR, "Only support tensorflow detect model with 4 outputs, \
667  but get %d instead\n", output_nb);
668  return AVERROR(EINVAL);
669  }
670  return 0;
671  case DNN_OV:
672  return 0;
673  default:
674  avpriv_report_missing_feature(ctx, "Dnn detect filter does not support current backend\n");
675  return AVERROR(EINVAL);
676  }
677  return 0;
678 }
679 
681 {
682  DnnDetectContext *ctx = context->priv;
683  DnnContext *dnn_ctx = &ctx->dnnctx;
684  int ret;
685 
687  if (ret < 0)
688  return ret;
689  ret = check_output_nb(ctx, dnn_ctx->backend_type, dnn_ctx->nb_outputs);
690  if (ret < 0)
691  return ret;
692  ctx->bboxes_fifo = av_fifo_alloc2(1, sizeof(AVDetectionBBox *), AV_FIFO_FLAG_AUTO_GROW);
693  if (!ctx->bboxes_fifo)
694  return AVERROR(ENOMEM);
696 
697  if (ctx->labels_filename) {
699  }
700  if (ctx->anchors_str) {
701  ret = dnn_detect_parse_anchors(ctx->anchors_str, &ctx->anchors);
702  if (!ctx->anchors) {
703  av_log(context, AV_LOG_ERROR, "failed to parse anchors_str\n");
704  return AVERROR(EINVAL);
705  }
706  ctx->nb_anchor = ret;
707  }
708  return 0;
709 }
710 
711 static const enum AVPixelFormat pix_fmts[] = {
718 };
719 
720 static int dnn_detect_flush_frame(AVFilterLink *outlink, int64_t pts, int64_t *out_pts)
721 {
722  DnnDetectContext *ctx = outlink->src->priv;
723  int ret;
724  DNNAsyncStatusType async_state;
725 
726  ret = ff_dnn_flush(&ctx->dnnctx);
727  if (ret != 0) {
728  return -1;
729  }
730 
731  do {
732  AVFrame *in_frame = NULL;
733  AVFrame *out_frame = NULL;
734  async_state = ff_dnn_get_result(&ctx->dnnctx, &in_frame, &out_frame);
735  if (async_state == DAST_SUCCESS) {
736  ret = ff_filter_frame(outlink, in_frame);
737  if (ret < 0)
738  return ret;
739  if (out_pts)
740  *out_pts = in_frame->pts + pts;
741  }
742  av_usleep(5000);
743  } while (async_state >= DAST_NOT_READY);
744 
745  return 0;
746 }
747 
749 {
750  AVFilterLink *inlink = filter_ctx->inputs[0];
751  AVFilterLink *outlink = filter_ctx->outputs[0];
753  AVFrame *in = NULL;
754  int64_t pts;
755  int ret, status;
756  int got_frame = 0;
757  int async_state;
758 
760 
761  do {
762  // drain all input frames
764  if (ret < 0)
765  return ret;
766  if (ret > 0) {
767  if (ff_dnn_execute_model(&ctx->dnnctx, in, NULL) != 0) {
768  return AVERROR(EIO);
769  }
770  }
771  } while (ret > 0);
772 
773  // drain all processed frames
774  do {
775  AVFrame *in_frame = NULL;
776  AVFrame *out_frame = NULL;
777  async_state = ff_dnn_get_result(&ctx->dnnctx, &in_frame, &out_frame);
778  if (async_state == DAST_SUCCESS) {
779  ret = ff_filter_frame(outlink, in_frame);
780  if (ret < 0)
781  return ret;
782  got_frame = 1;
783  }
784  } while (async_state == DAST_SUCCESS);
785 
786  // if frame got, schedule to next filter
787  if (got_frame)
788  return 0;
789 
791  if (status == AVERROR_EOF) {
792  int64_t out_pts = pts;
793  ret = dnn_detect_flush_frame(outlink, pts, &out_pts);
794  ff_outlink_set_status(outlink, status, out_pts);
795  return ret;
796  }
797  }
798 
800 
801  return 0;
802 }
803 
805 {
806  DnnDetectContext *ctx = context->priv;
807  AVDetectionBBox *bbox;
808  ff_dnn_uninit(&ctx->dnnctx);
809  if (ctx->bboxes_fifo) {
810  while (av_fifo_can_read(ctx->bboxes_fifo)) {
811  av_fifo_read(ctx->bboxes_fifo, &bbox, 1);
812  av_freep(&bbox);
813  }
814  av_fifo_freep2(&ctx->bboxes_fifo);
815  }
816  av_freep(&ctx->anchors);
818 }
819 
821 {
823  DnnDetectContext *ctx = context->priv;
824  DNNData model_input;
825  int ret, width_idx, height_idx;
826 
827  ret = ff_dnn_get_input(&ctx->dnnctx, &model_input);
828  if (ret != 0) {
829  av_log(ctx, AV_LOG_ERROR, "could not get input from the model\n");
830  return ret;
831  }
832  width_idx = dnn_get_width_idx_by_layout(model_input.layout);
833  height_idx = dnn_get_height_idx_by_layout(model_input.layout);
834  ctx->scale_width = model_input.dims[width_idx] == -1 ? inlink->w :
835  model_input.dims[width_idx];
836  ctx->scale_height = model_input.dims[height_idx] == -1 ? inlink->h :
837  model_input.dims[height_idx];
838 
839  return 0;
840 }
841 
842 static const AVFilterPad dnn_detect_inputs[] = {
843  {
844  .name = "default",
845  .type = AVMEDIA_TYPE_VIDEO,
846  .config_props = config_input,
847  },
848 };
849 
851  .name = "dnn_detect",
852  .description = NULL_IF_CONFIG_SMALL("Apply DNN detect filter to the input."),
853  .priv_size = sizeof(DnnDetectContext),
860  .priv_class = &dnn_detect_class,
861  .activate = dnn_detect_activate,
862 };
pix_fmts
static enum AVPixelFormat pix_fmts[]
Definition: vf_dnn_detect.c:711
DnnDetectContext::nb_classes
int nb_classes
Definition: vf_dnn_detect.c:52
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
dnn_detect_parse_yolo_output
static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int output_index, AVFilterContext *filter_ctx)
Definition: vf_dnn_detect.c:148
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
FILTER_PIXFMTS_ARRAY
#define FILTER_PIXFMTS_ARRAY(array)
Definition: filters.h:242
av_frame_get_side_data
AVFrameSideData * av_frame_get_side_data(const AVFrame *frame, enum AVFrameSideDataType type)
Definition: frame.c:949
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1023
AVERROR_EOF
#define AVERROR_EOF
End of file.
Definition: error.h:57
int64_t
long long int64_t
Definition: coverity.c:34
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:225
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
av_unused
#define av_unused
Definition: attributes.h:131
av_fifo_peek
int av_fifo_peek(const AVFifo *f, void *buf, size_t nb_elems, size_t offset)
Read data from a FIFO without modifying FIFO state.
Definition: fifo.c:255
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: filters.h:262
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:374
AVFrame::pts
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:486
w
uint8_t w
Definition: llviddspenc.c:38
read_detect_label_file
static int read_detect_label_file(AVFilterContext *context)
Definition: vf_dnn_detect.c:603
AVOption
AVOption.
Definition: opt.h:429
data
const char data[16]
Definition: mxf.c:148
dnn_detect_post_proc_ssd
static int dnn_detect_post_proc_ssd(AVFrame *frame, DNNData *output, int nb_outputs, AVFilterContext *filter_ctx)
Definition: vf_dnn_detect.c:361
dnn_detect_init
static av_cold int dnn_detect_init(AVFilterContext *context)
Definition: vf_dnn_detect.c:680
output_data
static int output_data(MLPDecodeContext *m, unsigned int substr, AVFrame *frame, int *got_frame_ptr)
Write the audio data into the output buffer.
Definition: mlpdec.c:1108
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:196
dnn_detect_inputs
static const AVFilterPad dnn_detect_inputs[]
Definition: vf_dnn_detect.c:842
AV_PIX_FMT_BGR24
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:76
preinit
static av_cold int preinit(AVFilterContext *ctx)
Definition: af_aresample.c:48
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:205
dnn_filter_common.h
AVDetectionBBox::y
int y
Definition: detection_bbox.h:32
video.h
FF_FILTER_FORWARD_STATUS_BACK
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
Definition: filters.h:434
AVFILTER_DNN_DEFINE_CLASS
AVFILTER_DNN_DEFINE_CLASS(dnn_detect, DNN_TF|DNN_OV)
ff_inlink_consume_frame
int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)
Take a frame from the link's FIFO and update the link's stats.
Definition: avfilter.c:1451
fifo.h
AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE
#define AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE
Definition: detection_bbox.h:36
dnn_get_width_idx_by_layout
static int dnn_get_width_idx_by_layout(DNNLayout layout)
Definition: dnn_interface.h:197
AVDetectionBBox::detect_label
char detect_label[AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE]
Detect result with confidence.
Definition: detection_bbox.h:41
AVFilterContext::priv
void * priv
private data for use by the filter
Definition: avfilter.h:472
av_fifo_write
int av_fifo_write(AVFifo *f, const void *buf, size_t nb_elems)
Write data into a FIFO.
Definition: fifo.c:188
DnnContext
Definition: dnn_interface.h:143
dnn_detect_IOU
static float dnn_detect_IOU(AVDetectionBBox *bbox1, AVDetectionBBox *bbox2)
Definition: vf_dnn_detect.c:138
filter_ctx
static FilteringContext * filter_ctx
Definition: transcode.c:52
ff_dnn_filter_init_child_class
int ff_dnn_filter_init_child_class(AVFilterContext *filter)
Definition: dnn_filter_common.c:61
dnn_detect_uninit
static av_cold void dnn_detect_uninit(AVFilterContext *context)
Definition: vf_dnn_detect.c:804
DnnDetectContext
Definition: vf_dnn_detect.c:42
pts
static int64_t pts
Definition: transcode_aac.c:644
DnnDetectContext::model_type
DNNDetectionModelType model_type
Definition: vf_dnn_detect.c:49
AVFilterPad
A filter pad used for either input or output.
Definition: filters.h:38
av_get_detection_bbox
static av_always_inline AVDetectionBBox * av_get_detection_bbox(const AVDetectionBBoxHeader *header, unsigned int idx)
Definition: detection_bbox.h:84
DnnDetectContext::scale_height
int scale_height
Definition: vf_dnn_detect.c:55
dnn_detect_post_proc_ov
static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, int nb_outputs, AVFilterContext *filter_ctx)
Definition: vf_dnn_detect.c:470
DNN_TF
@ DNN_TF
Definition: dnn_interface.h:36
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
av_cold
#define av_cold
Definition: attributes.h:90
av_fifo_read
int av_fifo_read(AVFifo *f, void *buf, size_t nb_elems)
Read data from a FIFO.
Definition: fifo.c:240
ff_video_default_filterpad
const AVFilterPad ff_video_default_filterpad[1]
An AVFilterPad array whose only entry has name "default" and is of type AVMEDIA_TYPE_VIDEO.
Definition: video.c:37
DnnDetectContext::bboxes_fifo
AVFifo * bboxes_fifo
Definition: vf_dnn_detect.c:53
float
float
Definition: af_crystalizer.c:122
ff_outlink_set_status
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
Definition: filters.h:424
ff_dnn_set_detect_post_proc
int ff_dnn_set_detect_post_proc(DnnContext *ctx, DetectPostProc post_proc)
Definition: dnn_filter_common.c:146
av_strtok
char * av_strtok(char *s, const char *delim, char **saveptr)
Split the string into several tokens which can be accessed by successive calls to av_strtok().
Definition: avstring.c:178
free_detect_labels
static void free_detect_labels(DnnDetectContext *ctx)
Definition: vf_dnn_detect.c:594
DNNData
Definition: dnn_interface.h:69
dnn_detect_post_proc_yolov3
static int dnn_detect_post_proc_yolov3(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx, int nb_outputs)
Definition: vf_dnn_detect.c:346
filters.h
ff_dnn_get_result
DNNAsyncStatusType ff_dnn_get_result(DnnContext *ctx, AVFrame **in_frame, AVFrame **out_frame)
Definition: dnn_filter_common.c:198
ctx
AVFormatContext * ctx
Definition: movenc.c:49
config_input
static int config_input(AVFilterLink *inlink)
Definition: vf_dnn_detect.c:820
linear
static float linear(float x)
Definition: vf_dnn_detect.c:92
ff_vf_dnn_detect
const AVFilter ff_vf_dnn_detect
Definition: vf_dnn_detect.c:850
DnnDetectContext::scale_width
int scale_width
Definition: vf_dnn_detect.c:54
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
av_usleep
int av_usleep(unsigned usec)
Sleep for a period of time.
Definition: time.c:84
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: filters.h:263
AV_PIX_FMT_GRAYF32
#define AV_PIX_FMT_GRAYF32
Definition: pixfmt.h:511
file_open.h
ff_dnn_get_input
int ff_dnn_get_input(DnnContext *ctx, DNNData *input)
Definition: dnn_filter_common.c:158
DNN_OV
@ DNN_OV
Definition: dnn_interface.h:37
context
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option keep it simple and lowercase description are in without and describe what they for example set the foo of the bar offset is the offset of the field in your context
Definition: writing_filters.txt:91
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:66
NULL
#define NULL
Definition: coverity.c:32
AVDetectionBBoxHeader
Definition: detection_bbox.h:56
DnnDetectContext::dnnctx
DnnContext dnnctx
Definition: vf_dnn_detect.c:44
DnnDetectContext::cell_h
int cell_h
Definition: vf_dnn_detect.c:51
dnn_detect_activate
static int dnn_detect_activate(AVFilterContext *filter_ctx)
Definition: vf_dnn_detect.c:748
DnnDetectContext::labels_filename
char * labels_filename
Definition: vf_dnn_detect.c:46
av_fifo_can_read
size_t av_fifo_can_read(const AVFifo *f)
Definition: fifo.c:87
DnnDetectContext::labels
char ** labels
Definition: vf_dnn_detect.c:47
DDMT_YOLOV3
@ DDMT_YOLOV3
Definition: vf_dnn_detect.c:38
time.h
AV_PIX_FMT_GRAY8
@ AV_PIX_FMT_GRAY8
Y , 8bpp.
Definition: pixfmt.h:81
exp
int8_t exp
Definition: eval.c:73
ff_dnn_flush
int ff_dnn_flush(DnnContext *ctx)
Definition: dnn_filter_common.c:203
ff_inlink_acknowledge_status
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
Definition: avfilter.c:1398
FLAGS
#define FLAGS
Definition: vf_dnn_detect.c:63
DDMT_YOLOV4
@ DDMT_YOLOV4
Definition: vf_dnn_detect.c:39
av_detection_bbox_create_side_data
AVDetectionBBoxHeader * av_detection_bbox_create_side_data(AVFrame *frame, uint32_t nb_bboxes)
Allocates memory for AVDetectionBBoxHeader, plus an array of.
Definition: detection_bbox.c:52
DnnContext::backend_type
DNNBackendType backend_type
Definition: dnn_interface.h:149
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:368
AV_PIX_FMT_RGB24
@ AV_PIX_FMT_RGB24
packed RGB 8:8:8, 24bpp, RGBRGB...
Definition: pixfmt.h:75
AVFifo
Definition: fifo.c:35
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
DnnDetectContext::label_count
int label_count
Definition: vf_dnn_detect.c:48
DAST_SUCCESS
@ DAST_SUCCESS
Definition: dnn_interface.h:53
AVDetectionBBox::w
int w
Definition: detection_bbox.h:33
DNNBackendType
DNNBackendType
Definition: dnn_interface.h:35
DnnContext::nb_outputs
uint32_t nb_outputs
Definition: dnn_interface.h:156
av_make_q
static AVRational av_make_q(int num, int den)
Create an AVRational.
Definition: rational.h:71
avpriv_report_missing_feature
void avpriv_report_missing_feature(void *avc, const char *msg,...) av_printf_format(2
Log a generic warning message about a missing feature.
sigmoid
static float sigmoid(float x)
Definition: vf_dnn_detect.c:88
header
static const uint8_t header[24]
Definition: sdr2.c:68
DNNData::layout
DNNLayout layout
Definition: dnn_interface.h:75
AVDetectionBBox::classify_count
uint32_t classify_count
Definition: detection_bbox.h:51
DDMT_YOLOV1V2
@ DDMT_YOLOV1V2
Definition: vf_dnn_detect.c:37
DnnDetectContext::anchors_str
char * anchors_str
Definition: vf_dnn_detect.c:56
FF_FILTER_FORWARD_WANTED
FF_FILTER_FORWARD_WANTED(outlink, inlink)
dnn_detect_options
static const AVOption dnn_detect_options[]
Definition: vf_dnn_detect.c:64
AV_OPT_TYPE_FLOAT
@ AV_OPT_TYPE_FLOAT
Underlying C type is float.
Definition: opt.h:271
dnn_detect_flush_frame
static int dnn_detect_flush_frame(AVFilterLink *outlink, int64_t pts, int64_t *out_pts)
Definition: vf_dnn_detect.c:720
dnn_detect_post_proc
static int dnn_detect_post_proc(AVFrame *frame, DNNData *output, uint32_t nb, AVFilterContext *filter_ctx)
Definition: vf_dnn_detect.c:579
uninit
static void uninit(AVBSFContext *ctx)
Definition: pcm_rechunk.c:68
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
DFT_ANALYTICS_DETECT
@ DFT_ANALYTICS_DETECT
Definition: dnn_interface.h:59
check_output_nb
static int check_output_nb(DnnDetectContext *ctx, DNNBackendType backend_type, int output_nb)
Definition: vf_dnn_detect.c:661
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
av_mallocz
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:256
AVFilterPad::name
const char * name
Pad name.
Definition: filters.h:44
avpriv_fopen_utf8
FILE * avpriv_fopen_utf8(const char *path, const char *mode)
Open a file using a UTF-8 filename.
Definition: file_open.c:161
dnn_detect_post_proc_yolo
static int dnn_detect_post_proc_yolo(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx)
Definition: vf_dnn_detect.c:334
DnnDetectContext::confidence
float confidence
Definition: vf_dnn_detect.c:45
av_cmp_q
static int av_cmp_q(AVRational a, AVRational b)
Compare two rationals.
Definition: rational.h:89
AVFilter
Filter definition.
Definition: avfilter.h:201
ret
ret
Definition: filter_design.txt:187
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:96
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:264
AVDetectionBBox::h
int h
Definition: detection_bbox.h:34
av_fifo_alloc2
AVFifo * av_fifo_alloc2(size_t nb_elems, size_t elem_size, unsigned int flags)
Allocate and initialize an AVFifo with a given element size.
Definition: fifo.c:47
AVDetectionBBox::detect_confidence
AVRational detect_confidence
Definition: detection_bbox.h:42
av_dynarray_add_nofree
int av_dynarray_add_nofree(void *tab_ptr, int *nb_ptr, void *elem)
Add an element to a dynamic array.
Definition: mem.c:315
DDMT_SSD
@ DDMT_SSD
Definition: vf_dnn_detect.c:36
status
ov_status_e status
Definition: dnn_backend_openvino.c:100
DNNDetectionModelType
DNNDetectionModelType
Definition: vf_dnn_detect.c:35
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Underlying C type is int.
Definition: opt.h:259
AVDetectionBBox::x
int x
Distance in pixels from the left/top edge of the frame, together with width and height,...
Definition: detection_bbox.h:31
DnnDetectContext::nb_anchor
int nb_anchor
Definition: vf_dnn_detect.c:58
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:78
AVFilterContext
An instance of a filter.
Definition: avfilter.h:457
DNNData::dims
int dims[4]
Definition: dnn_interface.h:71
av_strdup
char * av_strdup(const char *s)
Duplicate a string.
Definition: mem.c:272
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:77
mem.h
OFFSET
#define OFFSET(x)
Definition: vf_dnn_detect.c:61
dnn_detect_fill_side_data
static int dnn_detect_fill_side_data(AVFrame *frame, AVFilterContext *filter_ctx)
Definition: vf_dnn_detect.c:285
dnn_get_height_idx_by_layout
static int dnn_get_height_idx_by_layout(DNNLayout layout)
Definition: dnn_interface.h:202
AVFrameSideData
Structure to hold side data for an AVFrame.
Definition: frame.h:250
DnnDetectContext::cell_w
int cell_w
Definition: vf_dnn_detect.c:50
DnnDetectContext::anchors
float * anchors
Definition: vf_dnn_detect.c:57
ff_dnn_init
int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx)
Definition: dnn_filter_common.c:73
dnn_detect_get_label_id
static int dnn_detect_get_label_id(int nb_classes, int cell_size, float *label_data)
Definition: vf_dnn_detect.c:96
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
AV_PIX_FMT_YUV411P
@ AV_PIX_FMT_YUV411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:80
AV_PIX_FMT_YUV410P
@ AV_PIX_FMT_YUV410P
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:79
av_strlcpy
size_t av_strlcpy(char *dst, const char *src, size_t size)
Copy the string src to dst, but no more than size - 1 bytes, and null-terminate dst.
Definition: avstring.c:85
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
av_fifo_freep2
void av_fifo_freep2(AVFifo **f)
Free an AVFifo and reset pointer to NULL.
Definition: fifo.c:286
ff_dnn_uninit
void ff_dnn_uninit(DnnContext *ctx)
Definition: dnn_filter_common.c:208
AVDetectionBBox
Definition: detection_bbox.h:26
h
h
Definition: vp9dsp_template.c:2070
dnn_detect_post_proc_tf
static int dnn_detect_post_proc_tf(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx)
Definition: vf_dnn_detect.c:504
ff_dnn_execute_model
int ff_dnn_execute_model(DnnContext *ctx, AVFrame *in_frame, AVFrame *out_frame)
Definition: dnn_filter_common.c:171
avstring.h
AV_OPT_TYPE_STRING
@ AV_OPT_TYPE_STRING
Underlying C type is a uint8_t* that is either NULL or points to a C string allocated with the av_mal...
Definition: opt.h:276
dnn_detect_parse_anchors
static int dnn_detect_parse_anchors(char *anchors_str, float **anchors)
Definition: vf_dnn_detect.c:109
DAST_NOT_READY
@ DAST_NOT_READY
Definition: dnn_interface.h:52
DNNAsyncStatusType
DNNAsyncStatusType
Definition: dnn_interface.h:49
AV_OPT_TYPE_CONST
@ AV_OPT_TYPE_CONST
Special option type for declaring named constants.
Definition: opt.h:299
snprintf
#define snprintf
Definition: snprintf.h:34
OFFSET2
#define OFFSET2(x)
Definition: vf_dnn_detect.c:62
detection_bbox.h
AV_FIFO_FLAG_AUTO_GROW
#define AV_FIFO_FLAG_AUTO_GROW
Automatically resize the FIFO on writes, so that the data fits.
Definition: fifo.h:63
AV_FRAME_DATA_DETECTION_BBOXES
@ AV_FRAME_DATA_DETECTION_BBOXES
Bounding boxes for object detection and classification, as described by AVDetectionBBoxHeader.
Definition: frame.h:194