diff mbox series

[FFmpeg-devel,1/3] avfilter/vf_libopencv: add opencv HaarCascade classifier simple face detection filter

Message ID 1589380957-23603-1-git-send-email-lance.lmwang@gmail.com
State New
Headers show
Series [FFmpeg-devel,1/3] avfilter/vf_libopencv: add opencv HaarCascade classifier simple face detection filter
Related show

Checks

Context Check Description
andriy/default pending
andriy/make fail Make failed

Commit Message

Limin Wang May 13, 2020, 2:42 p.m. UTC
From: Limin Wang <lance.lmwang@gmail.com>

Signed-off-by: Limin Wang <lance.lmwang@gmail.com>
---
Have tested with opencv 2.4.13 and 3.4.10 with static link

 configure                  |   1 +
 doc/filters.texi           |  29 ++++++++
 libavfilter/vf_libopencv.c | 164 ++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 191 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/configure b/configure
index a45c0fb..99d008a 100755
--- a/configure
+++ b/configure
@@ -2123,6 +2123,7 @@  HEADERS_LIST="
     machine_ioctl_meteor_h
     malloc_h
     opencv2_core_core_c_h
+    opencv2_objdetect_objdetect_c_h
     OpenGL_gl3_h
     poll_h
     sys_param_h
diff --git a/doc/filters.texi b/doc/filters.texi
index d19fd34..e50dcf3 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -14176,6 +14176,35 @@  other parameters is 0.
 These parameters correspond to the parameters assigned to the
 libopencv function @code{cvSmooth}.
 
+@subsection facedetect
+Face detection using Haar Feature-based Cascade Classifiers.
+
+The filter takes the following parameters:
+@var{xml_model}|@var{qoffset}.
+
+@var{xml_model} is the path of pre-trained classifiers, The C API still
+does not support the newer cascade format, please use the old format
+haarcascade_frontalface_alt.xml which type_id is opencv-haar-classifier.
+
+@var{qoffset}
+If you want export the detected faces by ROI side data in frame, please set the
+parameters, See also the @ref{addroi} filter. The range of qoffset is from [-1.0, 1.0]
+
+By default the filter will report these metadata values if face are
+detected:
+@table @option
+@item lavfi.facedetect.nb_faces
+Display the detected face number
+
+@item lavfi.facedetect.face_id.x, lavfi.facedetect.face_id.y
+Display x and y of every faces, face_id is the face index which is range
+from [0, nb_faces-1]
+
+@item lavfi.facedetect.face_id.w, lavfi.facedetect.face_id.h
+Display width and height of every faces, face_id is the face index
+which is range from [0, nb_faces-1]
+@end table
+
 @section oscilloscope
 
 2D Video Oscilloscope.
diff --git a/libavfilter/vf_libopencv.c b/libavfilter/vf_libopencv.c
index 8128030..b7a9282 100644
--- a/libavfilter/vf_libopencv.c
+++ b/libavfilter/vf_libopencv.c
@@ -1,5 +1,6 @@ 
 /*
  * Copyright (c) 2010 Stefano Sabatini
+ * Copyright (c) 2020 Limin Wang
  *
  * This file is part of FFmpeg.
  *
@@ -27,10 +28,16 @@ 
 #if HAVE_OPENCV2_CORE_CORE_C_H
 #include <opencv2/core/core_c.h>
 #include <opencv2/imgproc/imgproc_c.h>
+#if HAVE_OPENCV2_OBJECTDETECT_OBJECTDETECT_C_H
+#include <opencv2/objdetect/objdetect_c.h>
+#else
+#include <opencv/cv.h>
+#endif
 #else
 #include <opencv/cv.h>
 #include <opencv/cxcore.h>
 #endif
+
 #include "libavutil/avstring.h"
 #include "libavutil/common.h"
 #include "libavutil/file.h"
@@ -82,6 +89,7 @@  typedef struct OCVContext {
     int (*init)(AVFilterContext *ctx, const char *args);
     void (*uninit)(AVFilterContext *ctx);
     void (*end_frame_filter)(AVFilterContext *ctx, IplImage *inimg, IplImage *outimg);
+    void (*update_metadata)(AVFilterContext *ctx, AVFrame *frame);
     void *priv;
 } OCVContext;
 
@@ -326,18 +334,152 @@  static void erode_end_frame_filter(AVFilterContext *ctx, IplImage *inimg, IplIma
     cvErode(inimg, outimg, dilate->kernel, dilate->nb_iterations);
 }
 
+typedef struct FaceDetectContext {
+    char *xml_model;
+    CvHaarClassifierCascade* cascade;
+    CvMemStorage* storage;
+    int nb_faces;
+    CvSeq *faces;
+    int add_roi;
+    AVRational qoffset;
+} FaceDetectContext;
+
+static av_cold int facedetect_init(AVFilterContext *ctx, const char *args)
+{
+    OCVContext *s = ctx->priv;
+    FaceDetectContext *facedetect = s->priv;
+    const char *buf = args;
+    double qoffset;
+
+    if (args) {
+        facedetect->xml_model = av_get_token(&buf, "|");
+        if (!facedetect->xml_model) {
+            av_log(ctx, AV_LOG_ERROR, "failed to get %s, %s\n", args, facedetect->xml_model);
+            return AVERROR(EINVAL);
+        }
+
+        if (buf && sscanf(buf, "|%lf", &qoffset) == 1) {
+            if (qoffset < -1.0 || qoffset > 1.0) {
+                av_log(ctx, AV_LOG_ERROR, "failed to get valid qoffset(%f))\n", qoffset);
+                return AVERROR(EINVAL);
+            }
+            facedetect->add_roi = 1;
+            facedetect->qoffset = av_d2q(qoffset, 255);
+        }
+    } else {
+        av_log(ctx, AV_LOG_ERROR, "failed to get haarcascade_frontalface_alt.xml model file\n");
+        return AVERROR(EINVAL);
+    }
+
+    av_log(ctx, AV_LOG_VERBOSE, "xml_model: %s add_roi: %d qoffset: %d/%d\n",
+           facedetect->xml_model, facedetect->add_roi, facedetect->qoffset.num, facedetect->qoffset.den);
+
+    facedetect->storage = cvCreateMemStorage(0);
+    if (!facedetect->storage) {
+        av_log(ctx, AV_LOG_ERROR, "cvCreateMemStorage() failed\n");
+        return AVERROR(EINVAL);
+    }
+    cvClearMemStorage(facedetect->storage);
+
+    facedetect->cascade = (CvHaarClassifierCascade*)cvLoad( facedetect->xml_model, NULL, NULL, NULL );
+    if (!facedetect->cascade) {
+        av_log(ctx, AV_LOG_ERROR, "failed to load classifier cascade: %s \n", facedetect->xml_model);
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
+static av_cold void facedetect_uninit(AVFilterContext *ctx)
+{
+    OCVContext *s = ctx->priv;
+    FaceDetectContext *facedetect = s->priv;
+
+    if (facedetect->cascade)
+        cvReleaseHaarClassifierCascade(&facedetect->cascade);
+    if (facedetect->storage)
+        cvReleaseMemStorage(&facedetect->storage);
+}
+
+static void set_meta_int(AVDictionary **metadata, const char *key, int idx, int d)
+{
+    char value[128];
+    char key2[128];
+
+    snprintf(value, sizeof(value), "%d", d);
+    snprintf(key2, sizeof(key2), "lavfi.facedetect.%d.%s", idx, key);
+    av_dict_set(metadata, key2, value, 0);
+}
+
+static void facedetect_end_frame_filter(AVFilterContext *ctx, IplImage *inimg, IplImage *outimg)
+{
+    OCVContext *s = ctx->priv;
+    FaceDetectContext *facedetect = s->priv;
+
+    facedetect->faces = cvHaarDetectObjects(inimg, facedetect->cascade, facedetect->storage,
+            1.25, 3, CV_HAAR_DO_CANNY_PRUNING,
+            cvSize(inimg->width/16,inimg->height/16), cvSize(0,0));
+
+    facedetect->nb_faces = facedetect->faces ? facedetect->faces->total : 0;
+}
+
+static void facedetect_update_metadata(AVFilterContext *ctx, AVFrame *out)
+{
+    OCVContext *s = ctx->priv;
+    FaceDetectContext *facedetect = s->priv;
+    AVRegionOfInterest *roi;
+    AVFrameSideData *sd;
+    AVBufferRef *roi_buf;
+    int i;
+
+    if (facedetect->add_roi && facedetect->nb_faces > 0) {
+        sd = av_frame_new_side_data(out, AV_FRAME_DATA_REGIONS_OF_INTEREST,
+                facedetect->nb_faces * sizeof(AVRegionOfInterest));
+        if (!sd) {
+            return AVERROR(ENOMEM);
+        }
+        roi = (AVRegionOfInterest*)sd->data;
+        for(i = 0; i < facedetect->nb_faces; i++ ) {
+            CvRect *r = (CvRect*) cvGetSeqElem(facedetect->faces, i);
+
+            roi[i] = (AVRegionOfInterest) {
+                .self_size = sizeof(*roi),
+                    .top       = r->y,
+                    .bottom    = r->y + r->height,
+                    .left      = r->x,
+                    .right     = r->x + r->width,
+                    .qoffset   = facedetect->qoffset,
+            };
+        }
+    }
+
+    if (facedetect->nb_faces > 0)
+        av_dict_set_int(&out->metadata, "lavfi.facedetect.nb_faces", facedetect->nb_faces, 0);
+
+    for(i = 0; i < facedetect->nb_faces; i++ ) {
+        CvRect *r = (CvRect*) cvGetSeqElem(facedetect->faces, i);
+
+        set_meta_int(&out->metadata, "x", i, r->x);
+        set_meta_int(&out->metadata, "y", i, r->y);
+        set_meta_int(&out->metadata, "w", i, r->width);
+        set_meta_int(&out->metadata, "h", i, r->height);
+    }
+}
+
 typedef struct OCVFilterEntry {
     const char *name;
     size_t priv_size;
     int  (*init)(AVFilterContext *ctx, const char *args);
     void (*uninit)(AVFilterContext *ctx);
     void (*end_frame_filter)(AVFilterContext *ctx, IplImage *inimg, IplImage *outimg);
+    void (*update_metadata)(AVFilterContext *ctx, AVFrame *frame);
 } OCVFilterEntry;
 
 static const OCVFilterEntry ocv_filter_entries[] = {
-    { "dilate", sizeof(DilateContext), dilate_init, dilate_uninit, dilate_end_frame_filter },
-    { "erode",  sizeof(DilateContext), dilate_init, dilate_uninit, erode_end_frame_filter  },
-    { "smooth", sizeof(SmoothContext), smooth_init, NULL, smooth_end_frame_filter },
+    { "dilate", sizeof(DilateContext), dilate_init, dilate_uninit, dilate_end_frame_filter, NULL },
+    { "erode",  sizeof(DilateContext), dilate_init, dilate_uninit, erode_end_frame_filter, NULL },
+    { "smooth", sizeof(SmoothContext), smooth_init, NULL, smooth_end_frame_filter, NULL },
+    { "facedetect", sizeof(FaceDetectContext), facedetect_init, facedetect_uninit, facedetect_end_frame_filter, facedetect_update_metadata },
 };
 
 static av_cold int init(AVFilterContext *ctx)
@@ -355,6 +497,7 @@  static av_cold int init(AVFilterContext *ctx)
             s->init             = entry->init;
             s->uninit           = entry->uninit;
             s->end_frame_filter = entry->end_frame_filter;
+            s->update_metadata  = entry->update_metadata;
 
             if (!(s->priv = av_mallocz(entry->priv_size)))
                 return AVERROR(ENOMEM);
@@ -383,18 +526,33 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     AVFrame *out;
     IplImage inimg, outimg;
 
+    /* facedetect filter will passthrought the input frame */
+    if (strcmp(s->name, "facedetect")) {
     out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
     if (!out) {
         av_frame_free(&in);
         return AVERROR(ENOMEM);
     }
     av_frame_copy_props(out, in);
+    } else {
+        out = in;
+    }
 
     fill_iplimage_from_frame(&inimg , in , inlink->format);
+
+    if (strcmp(s->name, "facedetect")) {
     fill_iplimage_from_frame(&outimg, out, inlink->format);
     s->end_frame_filter(ctx, &inimg, &outimg);
     fill_frame_from_iplimage(out, &outimg, inlink->format);
+    } else {
+        s->end_frame_filter(ctx, &inimg, NULL);
+    }
+
+    if (s->update_metadata) {
+        s->update_metadata(ctx, out);
+    }
 
+    if (out != in)
     av_frame_free(&in);
 
     return ff_filter_frame(outlink, out);