FFmpeg
Data Structures | Macros | Enumerations | Functions | Variables
vf_dnn_detect.c File Reference
#include "libavutil/file_open.h"
#include "libavutil/mem.h"
#include "libavutil/opt.h"
#include "filters.h"
#include "dnn_filter_common.h"
#include "video.h"
#include "libavutil/time.h"
#include "libavutil/avstring.h"
#include "libavutil/detection_bbox.h"
#include "libavutil/fifo.h"

Go to the source code of this file.

Data Structures

struct  DnnDetectContext
 

Macros

#define OFFSET(x)   offsetof(DnnDetectContext, dnnctx.x)
 
#define OFFSET2(x)   offsetof(DnnDetectContext, x)
 
#define FLAGS   AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
 

Enumerations

enum  DNNDetectionModelType { DDMT_SSD, DDMT_YOLOV1V2, DDMT_YOLOV3, DDMT_YOLOV4 }
 

Functions

 AVFILTER_DNN_DEFINE_CLASS (dnn_detect, DNN_TF|DNN_OV)
 
static float sigmoid (float x)
 
static float linear (float x)
 
static int dnn_detect_get_label_id (int nb_classes, int cell_size, float *label_data)
 
static int dnn_detect_parse_anchors (char *anchors_str, float **anchors)
 
static float dnn_detect_IOU (AVDetectionBBox *bbox1, AVDetectionBBox *bbox2)
 
static int dnn_detect_parse_yolo_output (AVFrame *frame, DNNData *output, int output_index, AVFilterContext *filter_ctx)
 
static int dnn_detect_fill_side_data (AVFrame *frame, AVFilterContext *filter_ctx)
 
static int dnn_detect_post_proc_yolo (AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx)
 
static int dnn_detect_post_proc_yolov3 (AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx, int nb_outputs)
 
static int dnn_detect_post_proc_ssd (AVFrame *frame, DNNData *output, int nb_outputs, AVFilterContext *filter_ctx)
 
static int dnn_detect_post_proc_ov (AVFrame *frame, DNNData *output, int nb_outputs, AVFilterContext *filter_ctx)
 
static int dnn_detect_post_proc_tf (AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx)
 
static int dnn_detect_post_proc (AVFrame *frame, DNNData *output, uint32_t nb, AVFilterContext *filter_ctx)
 
static void free_detect_labels (DnnDetectContext *ctx)
 
static int read_detect_label_file (AVFilterContext *context)
 
static int check_output_nb (DnnDetectContext *ctx, DNNBackendType backend_type, int output_nb)
 
static av_cold int dnn_detect_init (AVFilterContext *context)
 
static int dnn_detect_flush_frame (AVFilterLink *outlink, int64_t pts, int64_t *out_pts)
 
static int dnn_detect_activate (AVFilterContext *filter_ctx)
 
static av_cold void dnn_detect_uninit (AVFilterContext *context)
 
static int config_input (AVFilterLink *inlink)
 

Variables

static const AVOption dnn_detect_options []
 
static enum AVPixelFormat pix_fmts []
 
static const AVFilterPad dnn_detect_inputs []
 
const FFFilter ff_vf_dnn_detect
 

Detailed Description

implementing an object detecting filter using deep learning networks.

Definition in file vf_dnn_detect.c.

Macro Definition Documentation

◆ OFFSET

#define OFFSET (   x)    offsetof(DnnDetectContext, dnnctx.x)

Definition at line 61 of file vf_dnn_detect.c.

◆ OFFSET2

#define OFFSET2 (   x)    offsetof(DnnDetectContext, x)

Definition at line 62 of file vf_dnn_detect.c.

◆ FLAGS

Definition at line 63 of file vf_dnn_detect.c.

Enumeration Type Documentation

◆ DNNDetectionModelType

Enumerator
DDMT_SSD 
DDMT_YOLOV1V2 
DDMT_YOLOV3 
DDMT_YOLOV4 

Definition at line 35 of file vf_dnn_detect.c.

Function Documentation

◆ AVFILTER_DNN_DEFINE_CLASS()

AVFILTER_DNN_DEFINE_CLASS ( dnn_detect  ,
DNN_TF DNN_OV 
)

◆ sigmoid()

static float sigmoid ( float  x)
inlinestatic

Definition at line 88 of file vf_dnn_detect.c.

Referenced by dnn_detect_parse_yolo_output().

◆ linear()

static float linear ( float  x)
inlinestatic

Definition at line 92 of file vf_dnn_detect.c.

Referenced by dnn_detect_parse_yolo_output().

◆ dnn_detect_get_label_id()

static int dnn_detect_get_label_id ( int  nb_classes,
int  cell_size,
float label_data 
)
static

Definition at line 96 of file vf_dnn_detect.c.

Referenced by dnn_detect_parse_yolo_output().

◆ dnn_detect_parse_anchors()

static int dnn_detect_parse_anchors ( char *  anchors_str,
float **  anchors 
)
static

Definition at line 109 of file vf_dnn_detect.c.

Referenced by dnn_detect_init().

◆ dnn_detect_IOU()

static float dnn_detect_IOU ( AVDetectionBBox bbox1,
AVDetectionBBox bbox2 
)
static

Definition at line 138 of file vf_dnn_detect.c.

Referenced by dnn_detect_fill_side_data().

◆ dnn_detect_parse_yolo_output()

static int dnn_detect_parse_yolo_output ( AVFrame frame,
DNNData output,
int  output_index,
AVFilterContext filter_ctx 
)
static

find all candidate bbox yolo output can be reshaped to [B, N*D, Cx, Cy] Detection box 'D' has format [x, y, h, w, box_score, class_no_1, ...,]

Definition at line 148 of file vf_dnn_detect.c.

Referenced by dnn_detect_post_proc_yolo(), and dnn_detect_post_proc_yolov3().

◆ dnn_detect_fill_side_data()

static int dnn_detect_fill_side_data ( AVFrame frame,
AVFilterContext filter_ctx 
)
static

Definition at line 285 of file vf_dnn_detect.c.

Referenced by dnn_detect_post_proc_yolo(), and dnn_detect_post_proc_yolov3().

◆ dnn_detect_post_proc_yolo()

static int dnn_detect_post_proc_yolo ( AVFrame frame,
DNNData output,
AVFilterContext filter_ctx 
)
static

Definition at line 334 of file vf_dnn_detect.c.

Referenced by dnn_detect_post_proc_ov().

◆ dnn_detect_post_proc_yolov3()

static int dnn_detect_post_proc_yolov3 ( AVFrame frame,
DNNData output,
AVFilterContext filter_ctx,
int  nb_outputs 
)
static

Definition at line 346 of file vf_dnn_detect.c.

Referenced by dnn_detect_post_proc_ov().

◆ dnn_detect_post_proc_ssd()

static int dnn_detect_post_proc_ssd ( AVFrame frame,
DNNData output,
int  nb_outputs,
AVFilterContext filter_ctx 
)
static

Definition at line 361 of file vf_dnn_detect.c.

Referenced by dnn_detect_post_proc_ov().

◆ dnn_detect_post_proc_ov()

static int dnn_detect_post_proc_ov ( AVFrame frame,
DNNData output,
int  nb_outputs,
AVFilterContext filter_ctx 
)
static

Definition at line 470 of file vf_dnn_detect.c.

Referenced by dnn_detect_post_proc().

◆ dnn_detect_post_proc_tf()

static int dnn_detect_post_proc_tf ( AVFrame frame,
DNNData output,
AVFilterContext filter_ctx 
)
static

Definition at line 504 of file vf_dnn_detect.c.

Referenced by dnn_detect_post_proc().

◆ dnn_detect_post_proc()

static int dnn_detect_post_proc ( AVFrame frame,
DNNData output,
uint32_t  nb,
AVFilterContext filter_ctx 
)
static

Definition at line 579 of file vf_dnn_detect.c.

Referenced by dnn_detect_init().

◆ free_detect_labels()

static void free_detect_labels ( DnnDetectContext ctx)
static

Definition at line 594 of file vf_dnn_detect.c.

Referenced by dnn_detect_uninit().

◆ read_detect_label_file()

static int read_detect_label_file ( AVFilterContext context)
static

Definition at line 603 of file vf_dnn_detect.c.

Referenced by dnn_detect_init().

◆ check_output_nb()

static int check_output_nb ( DnnDetectContext ctx,
DNNBackendType  backend_type,
int  output_nb 
)
static

Definition at line 661 of file vf_dnn_detect.c.

Referenced by dnn_detect_init().

◆ dnn_detect_init()

static av_cold int dnn_detect_init ( AVFilterContext context)
static

Definition at line 680 of file vf_dnn_detect.c.

◆ dnn_detect_flush_frame()

static int dnn_detect_flush_frame ( AVFilterLink outlink,
int64_t  pts,
int64_t out_pts 
)
static

Definition at line 725 of file vf_dnn_detect.c.

Referenced by dnn_detect_activate().

◆ dnn_detect_activate()

static int dnn_detect_activate ( AVFilterContext filter_ctx)
static

Definition at line 753 of file vf_dnn_detect.c.

◆ dnn_detect_uninit()

static av_cold void dnn_detect_uninit ( AVFilterContext context)
static

Definition at line 809 of file vf_dnn_detect.c.

◆ config_input()

static int config_input ( AVFilterLink inlink)
static

Definition at line 825 of file vf_dnn_detect.c.

Variable Documentation

◆ dnn_detect_options

const AVOption dnn_detect_options[]
static
Initial value:
= {
{ "dnn_backend", "DNN backend", OFFSET(backend_type), AV_OPT_TYPE_INT, { .i64 = DNN_OV }, INT_MIN, INT_MAX, FLAGS, .unit = "backend" },
{ "confidence", "threshold of confidence", OFFSET2(confidence), AV_OPT_TYPE_FLOAT, { .dbl = 0.5 }, 0, 1, FLAGS},
{ "labels", "path to labels file", OFFSET2(labels_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
{ "model_type", "DNN detection model type", OFFSET2(model_type), AV_OPT_TYPE_INT, { .i64 = DDMT_SSD }, INT_MIN, INT_MAX, FLAGS, .unit = "model_type" },
{ "ssd", "output shape [1, 1, N, 7]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_SSD }, 0, 0, FLAGS, .unit = "model_type" },
{ "yolo", "output shape [1, N*Cx*Cy*DetectionBox]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV1V2 }, 0, 0, FLAGS, .unit = "model_type" },
{ "yolov3", "outputs shape [1, N*D, Cx, Cy]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV3 }, 0, 0, FLAGS, .unit = "model_type" },
{ "yolov4", "outputs shape [1, N*D, Cx, Cy]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV4 }, 0, 0, FLAGS, .unit = "model_type" },
{ "cell_w", "cell width", OFFSET2(cell_w), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INTMAX_MAX, FLAGS },
{ "cell_h", "cell height", OFFSET2(cell_h), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INTMAX_MAX, FLAGS },
{ "nb_classes", "The number of class", OFFSET2(nb_classes), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INTMAX_MAX, FLAGS },
{ "anchors", "anchors, splited by '&'", OFFSET2(anchors_str), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
{ NULL }
}

Definition at line 64 of file vf_dnn_detect.c.

◆ pix_fmts

enum AVPixelFormat pix_fmts[]
static

◆ dnn_detect_inputs

const AVFilterPad dnn_detect_inputs[]
static
Initial value:
= {
{
.name = "default",
.config_props = config_input,
},
}

Definition at line 847 of file vf_dnn_detect.c.

◆ ff_vf_dnn_detect

const FFFilter ff_vf_dnn_detect
Initial value:
= {
.p.name = "dnn_detect",
.p.description = NULL_IF_CONFIG_SMALL("Apply DNN detect filter to the input."),
.p.priv_class = &dnn_detect_class,
.priv_size = sizeof(DnnDetectContext),
.activate = dnn_detect_activate,
}

Definition at line 855 of file vf_dnn_detect.c.

pix_fmts
static enum AVPixelFormat pix_fmts[]
Definition: vf_dnn_detect.c:716
FILTER_PIXFMTS_ARRAY
#define FILTER_PIXFMTS_ARRAY(array)
Definition: filters.h:242
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: filters.h:262
dnn_detect_init
static av_cold int dnn_detect_init(AVFilterContext *context)
Definition: vf_dnn_detect.c:680
dnn_detect_inputs
static const AVFilterPad dnn_detect_inputs[]
Definition: vf_dnn_detect.c:847
AV_PIX_FMT_BGR24
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:76
preinit
static av_cold int preinit(AVFilterContext *ctx)
Definition: af_aresample.c:48
ff_dnn_filter_init_child_class
int ff_dnn_filter_init_child_class(AVFilterContext *filter)
Definition: dnn_filter_common.c:61
dnn_detect_uninit
static av_cold void dnn_detect_uninit(AVFilterContext *context)
Definition: vf_dnn_detect.c:809
DnnDetectContext
Definition: vf_dnn_detect.c:42
ff_video_default_filterpad
const AVFilterPad ff_video_default_filterpad[1]
An AVFilterPad array whose only entry has name "default" and is of type AVMEDIA_TYPE_VIDEO.
Definition: video.c:37
config_input
static int config_input(AVFilterLink *inlink)
Definition: vf_dnn_detect.c:825
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: filters.h:263
AV_PIX_FMT_GRAYF32
#define AV_PIX_FMT_GRAYF32
Definition: pixfmt.h:546
DNN_OV
@ DNN_OV
Definition: dnn_interface.h:37
NULL
#define NULL
Definition: coverity.c:32
dnn_detect_activate
static int dnn_detect_activate(AVFilterContext *filter_ctx)
Definition: vf_dnn_detect.c:753
DDMT_YOLOV3
@ DDMT_YOLOV3
Definition: vf_dnn_detect.c:38
AV_PIX_FMT_GRAY8
@ AV_PIX_FMT_GRAY8
Y , 8bpp.
Definition: pixfmt.h:81
FLAGS
#define FLAGS
Definition: vf_dnn_detect.c:63
DDMT_YOLOV4
@ DDMT_YOLOV4
Definition: vf_dnn_detect.c:39
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:368
AV_PIX_FMT_RGB24
@ AV_PIX_FMT_RGB24
packed RGB 8:8:8, 24bpp, RGBRGB...
Definition: pixfmt.h:75
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
DDMT_YOLOV1V2
@ DDMT_YOLOV1V2
Definition: vf_dnn_detect.c:37
AV_OPT_TYPE_FLOAT
@ AV_OPT_TYPE_FLOAT
Underlying C type is float.
Definition: opt.h:271
uninit
static void uninit(AVBSFContext *ctx)
Definition: pcm_rechunk.c:68
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:96
DDMT_SSD
@ DDMT_SSD
Definition: vf_dnn_detect.c:36
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Underlying C type is int.
Definition: opt.h:259
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:78
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:77
OFFSET
#define OFFSET(x)
Definition: vf_dnn_detect.c:61
AV_PIX_FMT_YUV411P
@ AV_PIX_FMT_YUV411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:80
AV_PIX_FMT_YUV410P
@ AV_PIX_FMT_YUV410P
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:79
AV_OPT_TYPE_STRING
@ AV_OPT_TYPE_STRING
Underlying C type is a uint8_t* that is either NULL or points to a C string allocated with the av_mal...
Definition: opt.h:276
AV_OPT_TYPE_CONST
@ AV_OPT_TYPE_CONST
Special option type for declaring named constants.
Definition: opt.h:299
OFFSET2
#define OFFSET2(x)
Definition: vf_dnn_detect.c:62