diff mbox series

[FFmpeg-devel,V2,4/4] dnn/vf_dnn_detect: add tensorflow output parse support

Message ID 20210506084610.23487-4-ting.fu@intel.com
State New
Headers show
Series [FFmpeg-devel,V2,1/4] dnn: add DCO_RGB color order to enum DNNColorOrder | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Fu, Ting May 6, 2021, 8:46 a.m. UTC
Testing model is tensorflow offical model in github repo, please refer
https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md
to download the detect model as you need.
For example, local testing was carried on with 'ssd_mobilenet_v2_coco_2018_03_29.tar.gz', and
used one image of dog in
https://github.com/tensorflow/models/blob/master/research/object_detection/test_images/image1.jpg

Testing command is:
./ffmpeg -i image1.jpg -vf dnn_detect=dnn_backend=tensorflow:input=image_tensor:output=\
"num_detections&detection_scores&detection_classes&detection_boxes":model=ssd_mobilenet_v2_coco.pb,\
showinfo -f null -

We will see the result similar as below:
[Parsed_showinfo_1 @ 0x33e65f0]   side data - detection bounding boxes:
[Parsed_showinfo_1 @ 0x33e65f0] source: ssd_mobilenet_v2_coco.pb
[Parsed_showinfo_1 @ 0x33e65f0] index: 0,       region: (382, 60) -> (1005, 593), label: 18, confidence: 9834/10000.
[Parsed_showinfo_1 @ 0x33e65f0] index: 1,       region: (12, 8) -> (328, 549), label: 18, confidence: 8555/10000.
[Parsed_showinfo_1 @ 0x33e65f0] index: 2,       region: (293, 7) -> (682, 458), label: 1, confidence: 8033/10000.
[Parsed_showinfo_1 @ 0x33e65f0] index: 3,       region: (342, 0) -> (690, 325), label: 1, confidence: 5878/10000.

There are two boxes of dog with cores 94.05% & 93.45% and two boxes of person with scores 80.33% & 58.78%.

Signed-off-by: Ting Fu <ting.fu@intel.com>
---
 libavfilter/vf_dnn_detect.c | 95 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 94 insertions(+), 1 deletion(-)

Comments

Guo, Yejun May 10, 2021, 6:13 a.m. UTC | #1
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Ting
> Fu
> Sent: 2021年5月6日 16:46
> To: ffmpeg-devel@ffmpeg.org
> Subject: [FFmpeg-devel] [PATCH V2 4/4] dnn/vf_dnn_detect: add tensorflow
> output parse support
> 
> Testing model is tensorflow offical model in github repo, please refer
> https://github.com/tensorflow/models/blob/master/research/object_detecti
> on/g3doc/tf1_detection_zoo.md
> to download the detect model as you need.
> For example, local testing was carried on with
> 'ssd_mobilenet_v2_coco_2018_03_29.tar.gz', and
> used one image of dog in
> https://github.com/tensorflow/models/blob/master/research/object_detecti
> on/test_images/image1.jpg
> 
> Testing command is:
> ./ffmpeg -i image1.jpg -vf
> dnn_detect=dnn_backend=tensorflow:input=image_tensor:output=\
> "num_detections&detection_scores&detection_classes&detection_boxes":m
> odel=ssd_mobilenet_v2_coco.pb,\
> showinfo -f null -
> 
> We will see the result similar as below:
> [Parsed_showinfo_1 @ 0x33e65f0]   side data - detection bounding boxes:
> [Parsed_showinfo_1 @ 0x33e65f0] source: ssd_mobilenet_v2_coco.pb
> [Parsed_showinfo_1 @ 0x33e65f0] index: 0,       region: (382, 60) ->
> (1005, 593), label: 18, confidence: 9834/10000.
> [Parsed_showinfo_1 @ 0x33e65f0] index: 1,       region: (12, 8) -> (328,
> 549), label: 18, confidence: 8555/10000.
> [Parsed_showinfo_1 @ 0x33e65f0] index: 2,       region: (293, 7) -> (682,
> 458), label: 1, confidence: 8033/10000.
> [Parsed_showinfo_1 @ 0x33e65f0] index: 3,       region: (342, 0) -> (690,
> 325), label: 1, confidence: 5878/10000.
> 
> There are two boxes of dog with cores 94.05% & 93.45% and two boxes of
> person with scores 80.33% & 58.78%.
> 
> Signed-off-by: Ting Fu <ting.fu@intel.com>
> ---
>  libavfilter/vf_dnn_detect.c | 95
> ++++++++++++++++++++++++++++++++++++-
>  1 file changed, 94 insertions(+), 1 deletion(-)
> 
> diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
> index 7d39acb653..818b53a052 100644
> --- a/libavfilter/vf_dnn_detect.c
> +++ b/libavfilter/vf_dnn_detect.c
> @@ -48,6 +48,9 @@ typedef struct DnnDetectContext {
>  #define FLAGS AV_OPT_FLAG_FILTERING_PARAM |
> AV_OPT_FLAG_VIDEO_PARAM
>  static const AVOption dnn_detect_options[] = {
>      { "dnn_backend", "DNN backend",
> OFFSET(backend_type),     AV_OPT_TYPE_INT,       { .i64 = 2 },
> INT_MIN, INT_MAX, FLAGS, "backend" },
> +#if (CONFIG_LIBTENSORFLOW == 1)
> +    { "tensorflow",  "tensorflow backend flag",    0,
> AV_OPT_TYPE_CONST,     { .i64 = 1 },    0, 0, FLAGS, "backend" },
> +#endif
>  #if (CONFIG_LIBOPENVINO == 1)
>      { "openvino",    "openvino backend flag",      0,
> AV_OPT_TYPE_CONST,     { .i64 = 2 },    0, 0, FLAGS, "backend" },
>  #endif
> @@ -59,7 +62,7 @@ static const AVOption dnn_detect_options[] = {
> 
>  AVFILTER_DEFINE_CLASS(dnn_detect);
> 
> -static int dnn_detect_post_proc(AVFrame *frame, DNNData *output,
> uint32_t nb, AVFilterContext *filter_ctx)
> +static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output,
> AVFilterContext *filter_ctx)
>  {
>      DnnDetectContext *ctx = filter_ctx->priv;
>      float conf_threshold = ctx->confidence;
> @@ -136,6 +139,96 @@ static int dnn_detect_post_proc(AVFrame *frame,
> DNNData *output, uint32_t nb, AV
>      return 0;
>  }
> 
> +static int dnn_detect_post_proc_tf(AVFrame *frame, DNNData *output,
> AVFilterContext *filter_ctx)
> +{
> +    DnnDetectContext *ctx = filter_ctx->priv;
> +    int proposal_count;
> +    float conf_threshold = ctx->confidence;
> +    float *conf, *position, *label_id, x0, y0, x1, y1;
> +    int nb_bboxes = 0;
> +    AVFrameSideData *sd;
> +    AVDetectionBBox *bbox;
> +    AVDetectionBBoxHeader *header;
> +
> +    proposal_count = *(float *)(output[0].data);
> +    conf           = output[1].data;
> +    position       = output[3].data;
> +    label_id       = output[2].data;
> +
> +    sd = av_frame_get_side_data(frame,
> AV_FRAME_DATA_DETECTION_BBOXES);
> +    if (sd) {
> +        av_log(filter_ctx, AV_LOG_ERROR, "already have dnn bounding
> boxes in side data.\n");
> +        return -1;
> +    }
> +
> +    for (int i = 0; i < proposal_count; ++i) {
> +        if (conf[i] < conf_threshold)
> +            continue;
> +        nb_bboxes++;
> +    }
> +
> +    if (nb_bboxes == 0) {
> +        av_log(filter_ctx, AV_LOG_VERBOSE, "nothing detected in this
> frame.\n");
> +        return 0;
> +    }
> +
> +    header = av_detection_bbox_create_side_data(frame, nb_bboxes);
> +    if (!header) {
> +        av_log(filter_ctx, AV_LOG_ERROR, "failed to create side data
> with %d bounding boxes\n", nb_bboxes);
> +        return -1;
> +    }
> +
> +    av_strlcpy(header->source, ctx->dnnctx.model_filename,
> sizeof(header->source));
> +
> +    for (int i = 0; i < proposal_count; ++i) {
> +        y0 = position[i * 4];
> +        x0 = position[i * 4 + 1];
> +        y1 = position[i * 4 + 2];
> +        x1 = position[i * 4 + 3];
> +
> +        bbox = av_get_detection_bbox(header, i);
> +
> +        if (conf[i] < conf_threshold) {
> +            continue;
> +        }
> +
> +        bbox->x = (int)(x0 * frame->width);
> +        bbox->w = (int)(x1 * frame->width) - bbox->x;
> +        bbox->y = (int)(y0 * frame->height);
> +        bbox->h = (int)(y1 * frame->height) - bbox->y;
> +
> +        bbox->detect_confidence = av_make_q((int)(conf[i] * 10000),
> 10000);
> +        bbox->classify_count = 0;
> +
> +        if (ctx->labels && label_id[i] < ctx->label_count) {
> +            av_strlcpy(bbox->detect_label, ctx->labels[(int)label_id[i]],
> sizeof(bbox->detect_label));
> +        } else {
> +            snprintf(bbox->detect_label, sizeof(bbox->detect_label), "%d",
> (int)label_id[i]);
> +        }
> +
> +        nb_bboxes--;
> +        if (nb_bboxes == 0) {
> +            break;
> +        }
> +    }
> +    return 0;
> +}
> +
> +static int dnn_detect_post_proc(AVFrame *frame, DNNData *output,
> uint32_t nb, AVFilterContext *filter_ctx)
> +{
> +    DnnDetectContext *ctx = filter_ctx->priv;
> +    DnnContext *dnn_ctx = &ctx->dnnctx;
> +    switch (dnn_ctx->backend_type) {
> +    case DNN_OV:
> +        return dnn_detect_post_proc_ov(frame, output, filter_ctx);
> +    case DNN_TF:
> +        return dnn_detect_post_proc_tf(frame, output, filter_ctx);
> +    default:
> +        avpriv_report_missing_feature(filter_ctx, "Current dnn backend do
> not support detect filter\n");

do -> does, changed locally, will push tomorrow if there's no objection, thanks.
Guo, Yejun May 11, 2021, 2:55 a.m. UTC | #2
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Guo,
> Yejun
> Sent: 2021年5月10日 14:14
> To: FFmpeg development discussions and patches
> <ffmpeg-devel@ffmpeg.org>
> Subject: Re: [FFmpeg-devel] [PATCH V2 4/4] dnn/vf_dnn_detect: add
> tensorflow output parse support
> 
> 
> 
> > -----Original Message-----
> > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Ting
> > Fu
> > Sent: 2021年5月6日 16:46
> > To: ffmpeg-devel@ffmpeg.org
> > Subject: [FFmpeg-devel] [PATCH V2 4/4] dnn/vf_dnn_detect: add
> tensorflow
> > output parse support
> >
> > Testing model is tensorflow offical model in github repo, please refer
> >
> https://github.com/tensorflow/models/blob/master/research/object_detecti
> > on/g3doc/tf1_detection_zoo.md
> > to download the detect model as you need.
> > For example, local testing was carried on with
> > 'ssd_mobilenet_v2_coco_2018_03_29.tar.gz', and
> > used one image of dog in
> >
> https://github.com/tensorflow/models/blob/master/research/object_detecti
> > on/test_images/image1.jpg
> >
> > Testing command is:
> > ./ffmpeg -i image1.jpg -vf
> > dnn_detect=dnn_backend=tensorflow:input=image_tensor:output=\
> >
> "num_detections&detection_scores&detection_classes&detection_boxes":m
> > odel=ssd_mobilenet_v2_coco.pb,\
> > showinfo -f null -
> >
> > We will see the result similar as below:
> > [Parsed_showinfo_1 @ 0x33e65f0]   side data - detection bounding boxes:
> > [Parsed_showinfo_1 @ 0x33e65f0] source: ssd_mobilenet_v2_coco.pb
> > [Parsed_showinfo_1 @ 0x33e65f0] index: 0,       region: (382, 60) ->
> > (1005, 593), label: 18, confidence: 9834/10000.
> > [Parsed_showinfo_1 @ 0x33e65f0] index: 1,       region: (12, 8) -> (328,
> > 549), label: 18, confidence: 8555/10000.
> > [Parsed_showinfo_1 @ 0x33e65f0] index: 2,       region: (293, 7) -> (682,
> > 458), label: 1, confidence: 8033/10000.
> > [Parsed_showinfo_1 @ 0x33e65f0] index: 3,       region: (342, 0) -> (690,
> > 325), label: 1, confidence: 5878/10000.
> >
> > There are two boxes of dog with cores 94.05% & 93.45% and two boxes of
> > person with scores 80.33% & 58.78%.
> >
> > Signed-off-by: Ting Fu <ting.fu@intel.com>
> > ---
> >  libavfilter/vf_dnn_detect.c | 95
> > ++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 94 insertions(+), 1 deletion(-)
> >
> > diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
> > index 7d39acb653..818b53a052 100644
> > --- a/libavfilter/vf_dnn_detect.c
> > +++ b/libavfilter/vf_dnn_detect.c
> > @@ -48,6 +48,9 @@ typedef struct DnnDetectContext {
> >  #define FLAGS AV_OPT_FLAG_FILTERING_PARAM |
> > AV_OPT_FLAG_VIDEO_PARAM
> >  static const AVOption dnn_detect_options[] = {
> >      { "dnn_backend", "DNN backend",
> > OFFSET(backend_type),     AV_OPT_TYPE_INT,       { .i64 = 2 },
> > INT_MIN, INT_MAX, FLAGS, "backend" },
> > +#if (CONFIG_LIBTENSORFLOW == 1)
> > +    { "tensorflow",  "tensorflow backend flag",    0,
> > AV_OPT_TYPE_CONST,     { .i64 = 1 },    0, 0, FLAGS, "backend" },
> > +#endif
> >  #if (CONFIG_LIBOPENVINO == 1)
> >      { "openvino",    "openvino backend flag",      0,
> > AV_OPT_TYPE_CONST,     { .i64 = 2 },    0, 0, FLAGS, "backend" },
> >  #endif
> > @@ -59,7 +62,7 @@ static const AVOption dnn_detect_options[] = {
> >
> >  AVFILTER_DEFINE_CLASS(dnn_detect);
> >
> > -static int dnn_detect_post_proc(AVFrame *frame, DNNData *output,
> > uint32_t nb, AVFilterContext *filter_ctx)
> > +static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output,
> > AVFilterContext *filter_ctx)
> >  {
> >      DnnDetectContext *ctx = filter_ctx->priv;
> >      float conf_threshold = ctx->confidence;
> > @@ -136,6 +139,96 @@ static int dnn_detect_post_proc(AVFrame *frame,
> > DNNData *output, uint32_t nb, AV
> >      return 0;
> >  }
> >
> > +static int dnn_detect_post_proc_tf(AVFrame *frame, DNNData *output,
> > AVFilterContext *filter_ctx)
> > +{
> > +    DnnDetectContext *ctx = filter_ctx->priv;
> > +    int proposal_count;
> > +    float conf_threshold = ctx->confidence;
> > +    float *conf, *position, *label_id, x0, y0, x1, y1;
> > +    int nb_bboxes = 0;
> > +    AVFrameSideData *sd;
> > +    AVDetectionBBox *bbox;
> > +    AVDetectionBBoxHeader *header;
> > +
> > +    proposal_count = *(float *)(output[0].data);
> > +    conf           = output[1].data;
> > +    position       = output[3].data;
> > +    label_id       = output[2].data;
> > +
> > +    sd = av_frame_get_side_data(frame,
> > AV_FRAME_DATA_DETECTION_BBOXES);
> > +    if (sd) {
> > +        av_log(filter_ctx, AV_LOG_ERROR, "already have dnn bounding
> > boxes in side data.\n");
> > +        return -1;
> > +    }
> > +
> > +    for (int i = 0; i < proposal_count; ++i) {
> > +        if (conf[i] < conf_threshold)
> > +            continue;
> > +        nb_bboxes++;
> > +    }
> > +
> > +    if (nb_bboxes == 0) {
> > +        av_log(filter_ctx, AV_LOG_VERBOSE, "nothing detected in this
> > frame.\n");
> > +        return 0;
> > +    }
> > +
> > +    header = av_detection_bbox_create_side_data(frame, nb_bboxes);
> > +    if (!header) {
> > +        av_log(filter_ctx, AV_LOG_ERROR, "failed to create side data
> > with %d bounding boxes\n", nb_bboxes);
> > +        return -1;
> > +    }
> > +
> > +    av_strlcpy(header->source, ctx->dnnctx.model_filename,
> > sizeof(header->source));
> > +
> > +    for (int i = 0; i < proposal_count; ++i) {
> > +        y0 = position[i * 4];
> > +        x0 = position[i * 4 + 1];
> > +        y1 = position[i * 4 + 2];
> > +        x1 = position[i * 4 + 3];
> > +
> > +        bbox = av_get_detection_bbox(header, i);
> > +
> > +        if (conf[i] < conf_threshold) {
> > +            continue;
> > +        }
> > +
> > +        bbox->x = (int)(x0 * frame->width);
> > +        bbox->w = (int)(x1 * frame->width) - bbox->x;
> > +        bbox->y = (int)(y0 * frame->height);
> > +        bbox->h = (int)(y1 * frame->height) - bbox->y;
> > +
> > +        bbox->detect_confidence = av_make_q((int)(conf[i] * 10000),
> > 10000);
> > +        bbox->classify_count = 0;
> > +
> > +        if (ctx->labels && label_id[i] < ctx->label_count) {
> > +            av_strlcpy(bbox->detect_label, ctx->labels[(int)label_id[i]],
> > sizeof(bbox->detect_label));
> > +        } else {
> > +            snprintf(bbox->detect_label, sizeof(bbox->detect_label),
> "%d",
> > (int)label_id[i]);
> > +        }
> > +
> > +        nb_bboxes--;
> > +        if (nb_bboxes == 0) {
> > +            break;
> > +        }
> > +    }
> > +    return 0;
> > +}
> > +
> > +static int dnn_detect_post_proc(AVFrame *frame, DNNData *output,
> > uint32_t nb, AVFilterContext *filter_ctx)
> > +{
> > +    DnnDetectContext *ctx = filter_ctx->priv;
> > +    DnnContext *dnn_ctx = &ctx->dnnctx;
> > +    switch (dnn_ctx->backend_type) {
> > +    case DNN_OV:
> > +        return dnn_detect_post_proc_ov(frame, output, filter_ctx);
> > +    case DNN_TF:
> > +        return dnn_detect_post_proc_tf(frame, output, filter_ctx);
> > +    default:
> > +        avpriv_report_missing_feature(filter_ctx, "Current dnn backend
> do
> > not support detect filter\n");
> 
> do -> does, changed locally, will push tomorrow if there's no objection,
> thanks.
> 
pushed
diff mbox series

Patch

diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index 7d39acb653..818b53a052 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -48,6 +48,9 @@  typedef struct DnnDetectContext {
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
 static const AVOption dnn_detect_options[] = {
     { "dnn_backend", "DNN backend",                OFFSET(backend_type),     AV_OPT_TYPE_INT,       { .i64 = 2 },    INT_MIN, INT_MAX, FLAGS, "backend" },
+#if (CONFIG_LIBTENSORFLOW == 1)
+    { "tensorflow",  "tensorflow backend flag",    0,                        AV_OPT_TYPE_CONST,     { .i64 = 1 },    0, 0, FLAGS, "backend" },
+#endif
 #if (CONFIG_LIBOPENVINO == 1)
     { "openvino",    "openvino backend flag",      0,                        AV_OPT_TYPE_CONST,     { .i64 = 2 },    0, 0, FLAGS, "backend" },
 #endif
@@ -59,7 +62,7 @@  static const AVOption dnn_detect_options[] = {
 
 AVFILTER_DEFINE_CLASS(dnn_detect);
 
-static int dnn_detect_post_proc(AVFrame *frame, DNNData *output, uint32_t nb, AVFilterContext *filter_ctx)
+static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx)
 {
     DnnDetectContext *ctx = filter_ctx->priv;
     float conf_threshold = ctx->confidence;
@@ -136,6 +139,96 @@  static int dnn_detect_post_proc(AVFrame *frame, DNNData *output, uint32_t nb, AV
     return 0;
 }
 
+static int dnn_detect_post_proc_tf(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx)
+{
+    DnnDetectContext *ctx = filter_ctx->priv;
+    int proposal_count;
+    float conf_threshold = ctx->confidence;
+    float *conf, *position, *label_id, x0, y0, x1, y1;
+    int nb_bboxes = 0;
+    AVFrameSideData *sd;
+    AVDetectionBBox *bbox;
+    AVDetectionBBoxHeader *header;
+
+    proposal_count = *(float *)(output[0].data);
+    conf           = output[1].data;
+    position       = output[3].data;
+    label_id       = output[2].data;
+
+    sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES);
+    if (sd) {
+        av_log(filter_ctx, AV_LOG_ERROR, "already have dnn bounding boxes in side data.\n");
+        return -1;
+    }
+
+    for (int i = 0; i < proposal_count; ++i) {
+        if (conf[i] < conf_threshold)
+            continue;
+        nb_bboxes++;
+    }
+
+    if (nb_bboxes == 0) {
+        av_log(filter_ctx, AV_LOG_VERBOSE, "nothing detected in this frame.\n");
+        return 0;
+    }
+
+    header = av_detection_bbox_create_side_data(frame, nb_bboxes);
+    if (!header) {
+        av_log(filter_ctx, AV_LOG_ERROR, "failed to create side data with %d bounding boxes\n", nb_bboxes);
+        return -1;
+    }
+
+    av_strlcpy(header->source, ctx->dnnctx.model_filename, sizeof(header->source));
+
+    for (int i = 0; i < proposal_count; ++i) {
+        y0 = position[i * 4];
+        x0 = position[i * 4 + 1];
+        y1 = position[i * 4 + 2];
+        x1 = position[i * 4 + 3];
+
+        bbox = av_get_detection_bbox(header, i);
+
+        if (conf[i] < conf_threshold) {
+            continue;
+        }
+
+        bbox->x = (int)(x0 * frame->width);
+        bbox->w = (int)(x1 * frame->width) - bbox->x;
+        bbox->y = (int)(y0 * frame->height);
+        bbox->h = (int)(y1 * frame->height) - bbox->y;
+
+        bbox->detect_confidence = av_make_q((int)(conf[i] * 10000), 10000);
+        bbox->classify_count = 0;
+
+        if (ctx->labels && label_id[i] < ctx->label_count) {
+            av_strlcpy(bbox->detect_label, ctx->labels[(int)label_id[i]], sizeof(bbox->detect_label));
+        } else {
+            snprintf(bbox->detect_label, sizeof(bbox->detect_label), "%d", (int)label_id[i]);
+        }
+
+        nb_bboxes--;
+        if (nb_bboxes == 0) {
+            break;
+        }
+    }
+    return 0;
+}
+
+static int dnn_detect_post_proc(AVFrame *frame, DNNData *output, uint32_t nb, AVFilterContext *filter_ctx)
+{
+    DnnDetectContext *ctx = filter_ctx->priv;
+    DnnContext *dnn_ctx = &ctx->dnnctx;
+    switch (dnn_ctx->backend_type) {
+    case DNN_OV:
+        return dnn_detect_post_proc_ov(frame, output, filter_ctx);
+    case DNN_TF:
+        return dnn_detect_post_proc_tf(frame, output, filter_ctx);
+    default:
+        avpriv_report_missing_feature(filter_ctx, "Current dnn backend do not support detect filter\n");
+        return AVERROR(EINVAL);
+    }
+}
+
 static void free_detect_labels(DnnDetectContext *ctx)
 {
     for (int i = 0; i < ctx->label_count; i++) {