[FFmpeg-devel] [PATCH V2 4/4] dnn/vf_dnn_detect: add tensorflow output parse support
Guo, Yejun
yejun.guo at intel.com
Mon May 10 09:13:40 EEST 2021
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces at ffmpeg.org> On Behalf Of Ting
> Fu
> Sent: 2021年5月6日 16:46
> To: ffmpeg-devel at ffmpeg.org
> Subject: [FFmpeg-devel] [PATCH V2 4/4] dnn/vf_dnn_detect: add tensorflow
> output parse support
>
> Testing model is tensorflow offical model in github repo, please refer
> https://github.com/tensorflow/models/blob/master/research/object_detecti
> on/g3doc/tf1_detection_zoo.md
> to download the detect model as you need.
> For example, local testing was carried on with
> 'ssd_mobilenet_v2_coco_2018_03_29.tar.gz', and
> used one image of dog in
> https://github.com/tensorflow/models/blob/master/research/object_detecti
> on/test_images/image1.jpg
>
> Testing command is:
> ./ffmpeg -i image1.jpg -vf
> dnn_detect=dnn_backend=tensorflow:input=image_tensor:output=\
> "num_detections&detection_scores&detection_classes&detection_boxes":m
> odel=ssd_mobilenet_v2_coco.pb,\
> showinfo -f null -
>
> We will see the result similar as below:
> [Parsed_showinfo_1 @ 0x33e65f0] side data - detection bounding boxes:
> [Parsed_showinfo_1 @ 0x33e65f0] source: ssd_mobilenet_v2_coco.pb
> [Parsed_showinfo_1 @ 0x33e65f0] index: 0, region: (382, 60) ->
> (1005, 593), label: 18, confidence: 9834/10000.
> [Parsed_showinfo_1 @ 0x33e65f0] index: 1, region: (12, 8) -> (328,
> 549), label: 18, confidence: 8555/10000.
> [Parsed_showinfo_1 @ 0x33e65f0] index: 2, region: (293, 7) -> (682,
> 458), label: 1, confidence: 8033/10000.
> [Parsed_showinfo_1 @ 0x33e65f0] index: 3, region: (342, 0) -> (690,
> 325), label: 1, confidence: 5878/10000.
>
> There are two boxes of dog with cores 94.05% & 93.45% and two boxes of
> person with scores 80.33% & 58.78%.
>
> Signed-off-by: Ting Fu <ting.fu at intel.com>
> ---
> libavfilter/vf_dnn_detect.c | 95
> ++++++++++++++++++++++++++++++++++++-
> 1 file changed, 94 insertions(+), 1 deletion(-)
>
> diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
> index 7d39acb653..818b53a052 100644
> --- a/libavfilter/vf_dnn_detect.c
> +++ b/libavfilter/vf_dnn_detect.c
> @@ -48,6 +48,9 @@ typedef struct DnnDetectContext {
> #define FLAGS AV_OPT_FLAG_FILTERING_PARAM |
> AV_OPT_FLAG_VIDEO_PARAM
> static const AVOption dnn_detect_options[] = {
> { "dnn_backend", "DNN backend",
> OFFSET(backend_type), AV_OPT_TYPE_INT, { .i64 = 2 },
> INT_MIN, INT_MAX, FLAGS, "backend" },
> +#if (CONFIG_LIBTENSORFLOW == 1)
> + { "tensorflow", "tensorflow backend flag", 0,
> AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" },
> +#endif
> #if (CONFIG_LIBOPENVINO == 1)
> { "openvino", "openvino backend flag", 0,
> AV_OPT_TYPE_CONST, { .i64 = 2 }, 0, 0, FLAGS, "backend" },
> #endif
> @@ -59,7 +62,7 @@ static const AVOption dnn_detect_options[] = {
>
> AVFILTER_DEFINE_CLASS(dnn_detect);
>
> -static int dnn_detect_post_proc(AVFrame *frame, DNNData *output,
> uint32_t nb, AVFilterContext *filter_ctx)
> +static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output,
> AVFilterContext *filter_ctx)
> {
> DnnDetectContext *ctx = filter_ctx->priv;
> float conf_threshold = ctx->confidence;
> @@ -136,6 +139,96 @@ static int dnn_detect_post_proc(AVFrame *frame,
> DNNData *output, uint32_t nb, AV
> return 0;
> }
>
> +static int dnn_detect_post_proc_tf(AVFrame *frame, DNNData *output,
> AVFilterContext *filter_ctx)
> +{
> + DnnDetectContext *ctx = filter_ctx->priv;
> + int proposal_count;
> + float conf_threshold = ctx->confidence;
> + float *conf, *position, *label_id, x0, y0, x1, y1;
> + int nb_bboxes = 0;
> + AVFrameSideData *sd;
> + AVDetectionBBox *bbox;
> + AVDetectionBBoxHeader *header;
> +
> + proposal_count = *(float *)(output[0].data);
> + conf = output[1].data;
> + position = output[3].data;
> + label_id = output[2].data;
> +
> + sd = av_frame_get_side_data(frame,
> AV_FRAME_DATA_DETECTION_BBOXES);
> + if (sd) {
> + av_log(filter_ctx, AV_LOG_ERROR, "already have dnn bounding
> boxes in side data.\n");
> + return -1;
> + }
> +
> + for (int i = 0; i < proposal_count; ++i) {
> + if (conf[i] < conf_threshold)
> + continue;
> + nb_bboxes++;
> + }
> +
> + if (nb_bboxes == 0) {
> + av_log(filter_ctx, AV_LOG_VERBOSE, "nothing detected in this
> frame.\n");
> + return 0;
> + }
> +
> + header = av_detection_bbox_create_side_data(frame, nb_bboxes);
> + if (!header) {
> + av_log(filter_ctx, AV_LOG_ERROR, "failed to create side data
> with %d bounding boxes\n", nb_bboxes);
> + return -1;
> + }
> +
> + av_strlcpy(header->source, ctx->dnnctx.model_filename,
> sizeof(header->source));
> +
> + for (int i = 0; i < proposal_count; ++i) {
> + y0 = position[i * 4];
> + x0 = position[i * 4 + 1];
> + y1 = position[i * 4 + 2];
> + x1 = position[i * 4 + 3];
> +
> + bbox = av_get_detection_bbox(header, i);
> +
> + if (conf[i] < conf_threshold) {
> + continue;
> + }
> +
> + bbox->x = (int)(x0 * frame->width);
> + bbox->w = (int)(x1 * frame->width) - bbox->x;
> + bbox->y = (int)(y0 * frame->height);
> + bbox->h = (int)(y1 * frame->height) - bbox->y;
> +
> + bbox->detect_confidence = av_make_q((int)(conf[i] * 10000),
> 10000);
> + bbox->classify_count = 0;
> +
> + if (ctx->labels && label_id[i] < ctx->label_count) {
> + av_strlcpy(bbox->detect_label, ctx->labels[(int)label_id[i]],
> sizeof(bbox->detect_label));
> + } else {
> + snprintf(bbox->detect_label, sizeof(bbox->detect_label), "%d",
> (int)label_id[i]);
> + }
> +
> + nb_bboxes--;
> + if (nb_bboxes == 0) {
> + break;
> + }
> + }
> + return 0;
> +}
> +
> +static int dnn_detect_post_proc(AVFrame *frame, DNNData *output,
> uint32_t nb, AVFilterContext *filter_ctx)
> +{
> + DnnDetectContext *ctx = filter_ctx->priv;
> + DnnContext *dnn_ctx = &ctx->dnnctx;
> + switch (dnn_ctx->backend_type) {
> + case DNN_OV:
> + return dnn_detect_post_proc_ov(frame, output, filter_ctx);
> + case DNN_TF:
> + return dnn_detect_post_proc_tf(frame, output, filter_ctx);
> + default:
> + avpriv_report_missing_feature(filter_ctx, "Current dnn backend do
> not support detect filter\n");
do -> does, changed locally, will push tomorrow if there's no objection, thanks.
More information about the ffmpeg-devel
mailing list