diff mbox

[FFmpeg-devel] avfilter: add vmafmotion filter

Message ID 1501676935-17325-1-git-send-email-ashk43712@gmail.com
State Superseded
Headers show

Commit Message

Ashish Singh Aug. 2, 2017, 12:28 p.m. UTC
From: Ashish Singh <ashk43712@gmail.com>

Signed-off-by: Ashish Singh <ashk43712@gmail.com>
---
 doc/filters.texi            |  19 +++
 libavfilter/Makefile        |   1 +
 libavfilter/allfilters.c    |   1 +
 libavfilter/vf_vmafmotion.c | 404 ++++++++++++++++++++++++++++++++++++++++++++
 libavfilter/vmaf_motion.h   |  42 +++++
 5 files changed, 467 insertions(+)
 create mode 100644 libavfilter/vf_vmafmotion.c
 create mode 100644 libavfilter/vmaf_motion.h

Comments

Nicolas George Aug. 2, 2017, 1:48 p.m. UTC | #1
Le quintidi 15 thermidor, an CCXXV, Ashish Pratap Singh a écrit :
> +    FFDualInputContext dinput;

dualinput is about to be removed, please see the patch series two days
ago.

Regards,
Ashish Singh Aug. 2, 2017, 2:42 p.m. UTC | #2
Hi,

On Aug 2, 2017 19:18, "Nicolas George" <george@nsup.org> wrote:

Le quintidi 15 thermidor, an CCXXV, Ashish Pratap Singh a écrit :
> +    FFDualInputContext dinput;

dualinput is about to be removed, please see the patch series two days
ago.

Regards,

--

Ok

  Nicolas George
diff mbox

Patch

diff --git a/doc/filters.texi b/doc/filters.texi
index 2324b96..ee1e884 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -15137,6 +15137,25 @@  vignette='PI/4+random(1)*PI/50':eval=frame
 
 @end itemize
 
+@section vmafmotion
+
+Obtain the average vmaf motion score between two input videos.
+It is one of the component filters of VMAF.
+
+This filter takes two input videos.
+
+Both input videos must have the same resolution and pixel format.
+Also it assumes that both inputs have the same number of frames.
+
+The obtained average motion score is printed through the logging system.
+
+In the below example the input file @file{main.mpg} being processed is compared
+with the reference file @file{ref.mpg}.
+
+@example
+ffmpeg -i main.mpg -i ref.mpg -lavfi vmafmotion -f null -
+@end example
+
 @section vstack
 Stack input videos vertically.
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index ee16361..771e434 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -322,6 +322,7 @@  OBJS-$(CONFIG_VFLIP_FILTER)                  += vf_vflip.o
 OBJS-$(CONFIG_VIDSTABDETECT_FILTER)          += vidstabutils.o vf_vidstabdetect.o
 OBJS-$(CONFIG_VIDSTABTRANSFORM_FILTER)       += vidstabutils.o vf_vidstabtransform.o
 OBJS-$(CONFIG_VIGNETTE_FILTER)               += vf_vignette.o
+OBJS-$(CONFIG_VMAFMOTION_FILTER)             += vf_vmafmotion.o dualinput.o framesync.o
 OBJS-$(CONFIG_VSTACK_FILTER)                 += vf_stack.o framesync.o
 OBJS-$(CONFIG_W3FDIF_FILTER)                 += vf_w3fdif.o
 OBJS-$(CONFIG_WAVEFORM_FILTER)               += vf_waveform.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index b1c2d11..644ab44 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -333,6 +333,7 @@  static void register_all(void)
     REGISTER_FILTER(VIDSTABDETECT,  vidstabdetect,  vf);
     REGISTER_FILTER(VIDSTABTRANSFORM, vidstabtransform, vf);
     REGISTER_FILTER(VIGNETTE,       vignette,       vf);
+    REGISTER_FILTER(VMAFMOTION,     vmafmotion,     vf);
     REGISTER_FILTER(VSTACK,         vstack,         vf);
     REGISTER_FILTER(W3FDIF,         w3fdif,         vf);
     REGISTER_FILTER(WAVEFORM,       waveform,       vf);
diff --git a/libavfilter/vf_vmafmotion.c b/libavfilter/vf_vmafmotion.c
new file mode 100644
index 0000000..8456939
--- /dev/null
+++ b/libavfilter/vf_vmafmotion.c
@@ -0,0 +1,404 @@ 
+/*
+ * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
+ * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Calculate VMAF Motion score between two input videos.
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "avfilter.h"
+#include "dualinput.h"
+#include "drawutils.h"
+#include "formats.h"
+#include "internal.h"
+#include "vmaf_motion.h"
+#include "video.h"
+
+typedef struct VMAFMotionContext {
+    const AVClass *class;
+    FFDualInputContext dinput;
+    const AVPixFmtDescriptor *desc;
+    int filter[5];
+    int width;
+    int height;
+    uint16_t *prev_blur_data;
+    uint16_t *blur_data;
+    uint16_t *temp_data;
+    double motion_sum;
+    uint64_t nb_frames;
+} VMAFMotionContext;
+
+#define MAX_ALIGN 32
+#define ALIGN_CEIL(x) ((x) + ((x) % MAX_ALIGN ? MAX_ALIGN - (x) % MAX_ALIGN : 0))
+
+static const AVOption vmafmotion_options[] = {
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(vmafmotion);
+
+static double image_sad(const uint16_t *img1, const uint16_t *img2, int w, int h,
+                        ptrdiff_t img1_stride, ptrdiff_t img2_stride)
+{
+    uint64_t sum = 0;
+    int i, j;
+
+    for (i = 0; i < h; i++) {
+        for (j = 0; j < w; j++) {
+            sum += abs(img1[i * img1_stride + j] - img2[i * img2_stride + j]);
+        }
+    }
+
+    return (double) (sum * 1.0 / (w * h));
+}
+
+static inline int floorn(int n, int m)
+{
+    return n - n % m;
+}
+
+static inline int ceiln(int n, int m)
+{
+    return n % m ? n + (m - n % m) : n;
+}
+
+static void convolution_x(const int *filter, int filt_w, const uint16_t *src,
+                          uint16_t *dst, int w, int h, ptrdiff_t src_stride,
+                          ptrdiff_t dst_stride)
+{
+    int radius = filt_w / 2;
+    int borders_left = ceiln(radius, 1);
+    int borders_right = floorn(w - (filt_w - radius), 1);
+    int i, j, k;
+    int sum = 0;
+
+    for (i = 0; i < h; i++) {
+        for (j = 0; j < borders_left; j++) {
+            sum = 0;
+            for (k = 0; k < filt_w; k++) {
+                int j_tap = FFABS(j - radius + k);
+                if (j_tap >= w) {
+                    j_tap = w - (j_tap - w + 1);
+                }
+                sum += filter[k] * src[i * src_stride + j_tap];
+            }
+            dst[i * dst_stride + j] = sum >> N;
+        }
+
+        for (j = borders_left; j < borders_right; j++) {
+            int sum = 0;
+            for (k = 0; k < filt_w; k++) {
+                sum += filter[k] * src[i * src_stride + j - radius + k];
+            }
+            dst[i * dst_stride + j] = sum >> N;
+        }
+
+        for (j = borders_right; j < w; j++) {
+            sum = 0;
+            for (k = 0; k < filt_w; k++) {
+                int j_tap = FFABS(j - radius + k);
+                if (j_tap >= w) {
+                    j_tap = w - (j_tap - w + 1);
+                }
+                sum += filter[k] * src[i * src_stride + j_tap];
+            }
+            dst[i * dst_stride + j] = sum >> N;
+        }
+    }
+}
+
+#define conv_y_fn(type, bits) \
+    static void convolution_y_##bits##bit(const int *filter, int filt_w, \
+                                          const type *src, uint16_t *dst, \
+                                          int w, int h, ptrdiff_t src_stride, \
+                                          ptrdiff_t dst_stride) \
+{ \
+    int radius = filt_w / 2; \
+    int borders_top = ceiln(radius, 1); \
+    int borders_bottom = floorn(h - (filt_w - radius), 1); \
+    int i, j, k; \
+    int sum = 0; \
+    \
+    for (i = 0; i < borders_top; i++) { \
+        for (j = 0; j < w; j++) { \
+            sum = 0; \
+            for (k = 0; k < filt_w; k++) { \
+                int i_tap = FFABS(i - radius + k); \
+                if (i_tap >= h) { \
+                    i_tap = h - (i_tap - h + 1); \
+                } \
+                sum += filter[k] * src[i_tap * src_stride + j]; \
+            } \
+            dst[i* dst_stride + j] = sum >> N; \
+        } \
+    } \
+    for (i = borders_top; i < borders_bottom; i++) { \
+        for (j = 0; j < w; j++) { \
+            sum = 0; \
+            for (k = 0; k < filt_w; k++) { \
+                sum += filter[k] * src[(i - radius + k) * src_stride + j]; \
+            } \
+            dst[i* dst_stride + j] = sum >> N; \
+        } \
+    } \
+    for (i = borders_bottom; i < h; i++) { \
+        for (j = 0; j < w; j++) { \
+            sum = 0; \
+            for (k = 0; k < filt_w; k++) { \
+                int i_tap = FFABS(i - radius + k); \
+                if (i_tap >= h) { \
+                    i_tap = h - (i_tap - h + 1); \
+                } \
+                sum += filter[k] * src[i_tap * src_stride + j]; \
+            } \
+            dst[i* dst_stride + j] = sum >> N; \
+        } \
+    } \
+}
+
+conv_y_fn(uint8_t, 8);
+conv_y_fn(uint16_t, 10);
+
+void convolution_f32(const int *filter, int filt_w, const void *src,
+                     uint16_t *dst, uint16_t *tmp, int w, int h,
+                     ptrdiff_t src_stride, ptrdiff_t dst_stride, uint8_t type)
+{
+    if(type == 8) {
+        convolution_y_8bit(filter, filt_w, (const uint8_t *) src, tmp, w, h,
+                           src_stride, dst_stride);
+    } else {
+        convolution_y_10bit(filter, filt_w, (const uint16_t *) src, tmp, w, h,
+                            src_stride, dst_stride);
+    }
+
+    convolution_x(filter, filt_w, tmp, dst, w, h, dst_stride, dst_stride);
+}
+
+int compute_vmafmotion(const uint16_t *ref, const uint16_t *main, int w, int h,
+                       ptrdiff_t ref_stride, ptrdiff_t main_stride, double *score)
+{
+    *score = image_sad(ref, main, w, h, ref_stride / sizeof(uint16_t),
+                       main_stride / sizeof(uint16_t));
+
+    return 0;
+}
+
+static void set_meta(AVDictionary **metadata, const char *key, float d)
+{
+    char value[128];
+    snprintf(value, sizeof(value), "%0.2f", d);
+    av_dict_set(metadata, key, value, 0);
+}
+
+static AVFrame *do_vmafmotion(AVFilterContext *ctx, AVFrame *main, const AVFrame *ref)
+{
+    VMAFMotionContext *s = ctx->priv;
+    AVDictionary **metadata = &main->metadata;
+    ptrdiff_t ref_stride;
+    ptrdiff_t ref_px_stride;
+    ptrdiff_t stride;
+    ptrdiff_t px_stride;
+    size_t data_sz;
+    double score;
+
+    ref_stride = ref->linesize[0];
+    stride = ALIGN_CEIL(s->width * sizeof(uint16_t));
+    data_sz = (size_t)stride * s->height;
+    px_stride = stride / sizeof(uint16_t);
+
+    if (s->desc->comp[0].depth <= 8) {
+        ref_px_stride = ref_stride / sizeof(uint8_t);
+        convolution_f32(s->filter, 5, (const uint8_t *) ref->data[0],
+                        s->blur_data, s->temp_data, s->width, s->height,
+                        ref_px_stride, px_stride, 8);
+    } else {
+        ref_px_stride = ref_stride / sizeof(uint16_t);
+        convolution_f32(s->filter, 5, (const uint16_t *) ref->data[0],
+                        s->blur_data, s->temp_data, s->width, s->height,
+                        ref_px_stride, px_stride, 10);
+    }
+
+    if(!s->nb_frames) {
+        score = 0.0;
+    } else {
+        compute_vmafmotion(s->prev_blur_data, s->blur_data, s->width, s->height,
+                           stride, stride, &score);
+    }
+
+    memcpy(s->prev_blur_data, s->blur_data, data_sz);
+
+    set_meta(metadata, "lavfi.vmafmotion.score", score);
+
+    s->nb_frames++;
+
+    s->motion_sum += score;
+
+    return main;
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    VMAFMotionContext *s = ctx->priv;
+
+    int i;
+    for(i = 0; i < 5; i++) {
+        s->filter[i] = lrint(FILTER_5[i] * (1 << N));
+    }
+
+    s->dinput.process = do_vmafmotion;
+
+    return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_YUV444P10LE, AV_PIX_FMT_YUV422P10LE, AV_PIX_FMT_YUV420P10LE,
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static int config_input_ref(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx  = inlink->dst;
+    VMAFMotionContext *s = ctx->priv;
+    ptrdiff_t stride;
+    size_t data_sz;
+
+    if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
+        ctx->inputs[0]->h != ctx->inputs[1]->h) {
+        av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
+        return AVERROR(EINVAL);
+    }
+    if (ctx->inputs[0]->format != ctx->inputs[1]->format) {
+        av_log(ctx, AV_LOG_ERROR, "Inputs must be of same pixel format.\n");
+        return AVERROR(EINVAL);
+    }
+
+    s->desc = av_pix_fmt_desc_get(inlink->format);
+    s->width = ctx->inputs[0]->w;
+    s->height = ctx->inputs[0]->h;
+
+    stride = ALIGN_CEIL(s->width * sizeof(uint16_t));
+    data_sz = (size_t)stride * s->height;
+
+    if (!(s->prev_blur_data = av_malloc(data_sz))) {
+        return AVERROR(ENOMEM);
+    }
+    if (!(s->blur_data = av_malloc(data_sz))) {
+        return AVERROR(ENOMEM);
+    }
+    if (!(s->temp_data = av_malloc(data_sz))) {
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    VMAFMotionContext *s = ctx->priv;
+    AVFilterLink *mainlink = ctx->inputs[0];
+    int ret;
+
+    outlink->w = mainlink->w;
+    outlink->h = mainlink->h;
+    outlink->time_base = mainlink->time_base;
+    outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
+    outlink->frame_rate = mainlink->frame_rate;
+    if ((ret = ff_dualinput_init(ctx, &s->dinput)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
+{
+    VMAFMotionContext *s = inlink->dst->priv;
+    return ff_dualinput_filter_frame(&s->dinput, inlink, inpicref);
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    VMAFMotionContext *s = outlink->src->priv;
+    return ff_dualinput_request_frame(&s->dinput, outlink);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    VMAFMotionContext *s = ctx->priv;
+
+    if (s->nb_frames > 0) {
+        av_log(ctx, AV_LOG_INFO, "VMAF Motion avg: %.3f\n", s->motion_sum / s->nb_frames);
+    }
+
+    av_free(s->prev_blur_data);
+    av_free(s->blur_data);
+    av_free(s->temp_data);
+
+    ff_dualinput_uninit(&s->dinput);
+}
+
+static const AVFilterPad vmafmotion_inputs[] = {
+    {
+        .name         = "main",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },{
+        .name         = "reference",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+        .config_props = config_input_ref,
+    },
+    { NULL }
+};
+
+static const AVFilterPad vmafmotion_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+        .request_frame = request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_vmafmotion = {
+    .name          = "vmafmotion",
+    .description   = NULL_IF_CONFIG_SMALL("Calculate the VMAF Motion between two video streams."),
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .priv_size     = sizeof(VMAFMotionContext),
+    .priv_class    = &vmafmotion_class,
+    .inputs        = vmafmotion_inputs,
+    .outputs       = vmafmotion_outputs,
+};
diff --git a/libavfilter/vmaf_motion.h b/libavfilter/vmaf_motion.h
new file mode 100644
index 0000000..eb41636
--- /dev/null
+++ b/libavfilter/vmaf_motion.h
@@ -0,0 +1,42 @@ 
+/*
+ * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
+ * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef MOTION_TOOLS_H_
+#define MOTION_TOOLS_H_
+
+#define N 15
+
+static const float FILTER_5[5] = {
+    0.054488685,
+    0.244201342,
+    0.402619947,
+    0.244201342,
+    0.054488685
+};
+
+void convolution_f32(const int *filter, int filt_width, const void *src,
+                     uint16_t *dst, uint16_t *tmp, int w, int h,
+                     ptrdiff_t src_stride, ptrdiff_t dst_stride, uint8_t type);
+
+int compute_vmafmotion(const uint16_t *ref, const uint16_t *main, int w, int h,
+                       ptrdiff_t ref_stride, ptrdiff_t main_stride, double *score);
+
+#endif /* MOTION_TOOLS_H_ */