diff mbox

[FFmpeg-devel] avfilter: add vmafmotion filter

Message ID 1505508478-17076-1-git-send-email-ashk43712@gmail.com
State New
Headers show

Commit Message

Ashish Singh Sept. 15, 2017, 8:47 p.m. UTC
From: Ashish Singh <ashk43712@gmail.com>

Hi, this patch addresses the previous issues and changes it to a single
input filter.

Signed-off-by: Ashish Singh <ashk43712@gmail.com>
---
 Changelog                   |   1 +
 doc/filters.texi            |  14 ++
 libavfilter/Makefile        |   1 +
 libavfilter/allfilters.c    |   1 +
 libavfilter/vf_vmafmotion.c | 325 ++++++++++++++++++++++++++++++++++++++++++++
 libavfilter/vmaf_motion.h   |  58 ++++++++
 6 files changed, 400 insertions(+)
 create mode 100644 libavfilter/vf_vmafmotion.c
 create mode 100644 libavfilter/vmaf_motion.h

Comments

James Almer Sept. 18, 2017, 1:16 a.m. UTC | #1
On 9/15/2017 5:47 PM, Ashish Pratap Singh wrote:
> From: Ashish Singh <ashk43712@gmail.com>
> 
> Hi, this patch addresses the previous issues and changes it to a single
> input filter.
> 
> Signed-off-by: Ashish Singh <ashk43712@gmail.com>
> ---
>  Changelog                   |   1 +
>  doc/filters.texi            |  14 ++
>  libavfilter/Makefile        |   1 +
>  libavfilter/allfilters.c    |   1 +
>  libavfilter/vf_vmafmotion.c | 325 ++++++++++++++++++++++++++++++++++++++++++++
>  libavfilter/vmaf_motion.h   |  58 ++++++++
>  6 files changed, 400 insertions(+)
>  create mode 100644 libavfilter/vf_vmafmotion.c
>  create mode 100644 libavfilter/vmaf_motion.h
> 
> diff --git a/Changelog b/Changelog
> index ea48e81..574f46e 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -48,6 +48,7 @@ version <next>:
>  - convolve video filter
>  - VP9 tile threading support
>  - KMS screen grabber
> +- vmafmotion video filter
>  
>  version 3.3:
>  - CrystalHD decoder moved to new decode API
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 830de54..d996357 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -15570,6 +15570,20 @@ vignette='PI/4+random(1)*PI/50':eval=frame
>  
>  @end itemize
>  
> +@section vmafmotion
> +
> +Obtain the average vmaf motion score of a video.
> +It is one of the component filters of VMAF.
> +
> +The obtained average motion score is printed through the logging system.
> +
> +In the below example the input file @file{ref.mpg} is being processed and score
> +is computed.
> +
> +@example
> +ffmpeg -i ref.mpg -lavfi vmafmotion -f null -
> +@end example
> +
>  @section vstack
>  Stack input videos vertically.
>  
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 8aa974e..4289ee0 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -330,6 +330,7 @@ OBJS-$(CONFIG_VFLIP_FILTER)                  += vf_vflip.o
>  OBJS-$(CONFIG_VIDSTABDETECT_FILTER)          += vidstabutils.o vf_vidstabdetect.o
>  OBJS-$(CONFIG_VIDSTABTRANSFORM_FILTER)       += vidstabutils.o vf_vidstabtransform.o
>  OBJS-$(CONFIG_VIGNETTE_FILTER)               += vf_vignette.o
> +OBJS-$(CONFIG_VMAFMOTION_FILTER)             += vf_vmafmotion.o framesync.o
>  OBJS-$(CONFIG_VSTACK_FILTER)                 += vf_stack.o framesync.o
>  OBJS-$(CONFIG_W3FDIF_FILTER)                 += vf_w3fdif.o
>  OBJS-$(CONFIG_WAVEFORM_FILTER)               += vf_waveform.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index 63e8672..8ec54be 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -341,6 +341,7 @@ static void register_all(void)
>      REGISTER_FILTER(VIDSTABDETECT,  vidstabdetect,  vf);
>      REGISTER_FILTER(VIDSTABTRANSFORM, vidstabtransform, vf);
>      REGISTER_FILTER(VIGNETTE,       vignette,       vf);
> +    REGISTER_FILTER(VMAFMOTION,     vmafmotion,     vf);
>      REGISTER_FILTER(VSTACK,         vstack,         vf);
>      REGISTER_FILTER(W3FDIF,         w3fdif,         vf);
>      REGISTER_FILTER(WAVEFORM,       waveform,       vf);
> diff --git a/libavfilter/vf_vmafmotion.c b/libavfilter/vf_vmafmotion.c
> new file mode 100644
> index 0000000..c31c37c
> --- /dev/null
> +++ b/libavfilter/vf_vmafmotion.c
> @@ -0,0 +1,325 @@
> +/*
> + * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
> + * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * Calculate VMAF Motion score.
> + */
> +
> +#include "libavutil/opt.h"
> +#include "libavutil/pixdesc.h"
> +#include "avfilter.h"
> +#include "drawutils.h"
> +#include "formats.h"
> +#include "internal.h"
> +#include "vmaf_motion.h"
> +
> +#define vmafmotion_options NULL

This is unused.

> +#define BIT_SHIFT 10
> +
> +static const float FILTER_5[5] = {
> +    0.054488685,
> +    0.244201342,
> +    0.402619947,
> +    0.244201342,
> +    0.054488685
> +};
> +
> +typedef struct VMAFMotionContext {
> +    const AVClass *class;
> +    VMAFMotionData data;
> +} VMAFMotionContext;
> +
> +AVFILTER_DEFINE_CLASS(vmafmotion);
> +
> +static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w,
> +                          int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride)
> +{
> +    ptrdiff_t img1_stride = _img1_stride / sizeof(*img1);
> +    ptrdiff_t img2_stride = _img2_stride / sizeof(*img2);
> +    uint64_t sum = 0;
> +    int i, j;
> +
> +    for (i = 0; i < h; i++) {
> +        for (j = 0; j < w; j++) {
> +            sum += abs(img1[j] - img2[j]);
> +        }
> +        img1 += img1_stride;
> +        img2 += img2_stride;
> +    }
> +
> +    return sum;
> +}
> +
> +static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src,
> +                          uint16_t *dst, int w, int h, ptrdiff_t _src_stride,
> +                          ptrdiff_t _dst_stride)
> +{
> +    ptrdiff_t src_stride = _src_stride / sizeof(*src);
> +    ptrdiff_t dst_stride = _dst_stride / sizeof(*dst);
> +    int radius = filt_w / 2;
> +    int borders_left = radius;
> +    int borders_right = w - (filt_w - radius);
> +    int i, j, k;
> +    int sum = 0;
> +
> +    for (i = 0; i < h; i++) {
> +        for (j = 0; j < borders_left; j++) {
> +            sum = 0;
> +            for (k = 0; k < filt_w; k++) {
> +                int j_tap = FFABS(j - radius + k);
> +                if (j_tap >= w) {
> +                    j_tap = w - (j_tap - w + 1);
> +                }
> +                sum += filter[k] * src[i * src_stride + j_tap];
> +            }
> +            dst[i * dst_stride + j] = sum >> BIT_SHIFT;
> +        }
> +
> +        for (j = borders_left; j < borders_right; j++) {
> +            int sum = 0;
> +            for (k = 0; k < filt_w; k++) {
> +                sum += filter[k] * src[i * src_stride + j - radius + k];
> +            }
> +            dst[i * dst_stride + j] = sum >> BIT_SHIFT;
> +        }
> +
> +        for (j = borders_right; j < w; j++) {
> +            sum = 0;
> +            for (k = 0; k < filt_w; k++) {
> +                int j_tap = FFABS(j - radius + k);
> +                if (j_tap >= w) {
> +                    j_tap = w - (j_tap - w + 1);
> +                }
> +                sum += filter[k] * src[i * src_stride + j_tap];
> +            }
> +            dst[i * dst_stride + j] = sum >> BIT_SHIFT;
> +        }
> +    }
> +}
> +
> +#define conv_y_fn(type, bits) \
> +    static void convolution_y_##bits##bit(const uint16_t *filter, int filt_w, \
> +                                          const uint8_t *_src, uint16_t *dst, \
> +                                          int w, int h, ptrdiff_t _src_stride, \
> +                                          ptrdiff_t _dst_stride) \
> +{ \
> +    const type *src = (const type *) _src; \
> +    ptrdiff_t src_stride = _src_stride / sizeof(*src); \
> +    ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); \
> +    int radius = filt_w / 2; \
> +    int borders_top = radius; \
> +    int borders_bottom = h - (filt_w - radius); \
> +    int i, j, k; \
> +    int sum = 0; \
> +    \
> +    for (i = 0; i < borders_top; i++) { \
> +        for (j = 0; j < w; j++) { \
> +            sum = 0; \
> +            for (k = 0; k < filt_w; k++) { \
> +                int i_tap = FFABS(i - radius + k); \
> +                if (i_tap >= h) { \
> +                    i_tap = h - (i_tap - h + 1); \
> +                } \
> +                sum += filter[k] * src[i_tap * src_stride + j]; \
> +            } \
> +            dst[i * dst_stride + j] = sum >> BIT_SHIFT; \
> +        } \
> +    } \
> +    for (i = borders_top; i < borders_bottom; i++) { \
> +        for (j = 0; j < w; j++) { \
> +            sum = 0; \
> +            for (k = 0; k < filt_w; k++) { \
> +                sum += filter[k] * src[(i - radius + k) * src_stride + j]; \
> +            } \
> +            dst[i * dst_stride + j] = sum >> BIT_SHIFT; \
> +        } \
> +    } \
> +    for (i = borders_bottom; i < h; i++) { \
> +        for (j = 0; j < w; j++) { \
> +            sum = 0; \
> +            for (k = 0; k < filt_w; k++) { \
> +                int i_tap = FFABS(i - radius + k); \
> +                if (i_tap >= h) { \
> +                    i_tap = h - (i_tap - h + 1); \
> +                } \
> +                sum += filter[k] * src[i_tap * src_stride + j]; \
> +            } \
> +            dst[i * dst_stride + j] = sum >> BIT_SHIFT; \
> +        } \
> +    } \
> +}
> +
> +conv_y_fn(uint8_t, 8);
> +conv_y_fn(uint16_t, 10);
> +
> +static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp) {
> +    dsp->convolution_x = convolution_x;
> +    dsp->convolution_y = bpp == 10 ? convolution_y_10bit : convolution_y_8bit;
> +    dsp->sad = image_sad;

You don't seem to use this function pointer anywhere.

> +}
> +
> +double ff_vmafmotion_process(VMAFMotionData *s, AVFrame *ref)

Why is this not static? It's not used outside this file.

> +{
> +    double score;
> +
> +    s->vmafdsp.convolution_y(s->filter, 5, ref->data[0], s->temp_data,
> +                             s->width, s->height, ref->linesize[0], s->stride);
> +    s->vmafdsp.convolution_x(s->filter, 5, s->temp_data, s->blur_data[0],
> +                             s->width, s->height, s->stride, s->stride);
> +
> +    if (!s->nb_frames) {
> +        score = 0.0;
> +    } else {
> +        uint64_t sad = image_sad(s->blur_data[1], s->blur_data[0],

Did you mean to use the s->vmafdsp.sad pointer here?

> +                                 s->width, s->height, s->stride, s->stride);
> +        score = (double) (sad * 1.0 / (s->width * s->height));
> +    }
> +
> +    FFSWAP(uint16_t *, s->blur_data[0], s->blur_data[1]);
> +    s->nb_frames++;
> +    s->motion_sum += score;
> +
> +    return score;
> +}
> +
> +static void set_meta(AVDictionary **metadata, const char *key, float d)
> +{
> +    char value[128];
> +    snprintf(value, sizeof(value), "%0.2f", d);
> +    av_dict_set(metadata, key, value, 0);
> +}
> +
> +static void do_vmafmotion(AVFilterContext *ctx, AVFrame *ref)
> +{
> +    VMAFMotionContext *s = ctx->priv;
> +
> +    double score;
> +
> +    score = ff_vmafmotion_process(&s->data, ref);
> +    set_meta(&ref->metadata, "lavfi.vmafmotion.score", score);
> +}
> +
> +
> +int ff_vmafmotion_init(VMAFMotionData *s,
> +                       int w, int h, enum AVPixelFormat fmt)
> +{

Same. Why not static?

> +    size_t data_sz;
> +    int i;
> +    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
> +
> +    s->width = w;
> +    s->height = h;
> +    s->stride = FFALIGN(w * sizeof(uint16_t), 32);
> +
> +    data_sz = (size_t) s->stride * h;
> +    if (!(s->blur_data[0] = av_malloc(data_sz)) ||
> +        !(s->blur_data[1] = av_malloc(data_sz)) ||
> +        !(s->temp_data    = av_malloc(data_sz))) {
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    for (i = 0; i < 5; i++) {
> +        s->filter[i] = lrint(FILTER_5[i] * (1 << BIT_SHIFT));
> +    }
> +
> +    vmafmotiondsp_init(&s->vmafdsp, desc->comp[0].depth);
> +
> +    return 0;
> +}
> +
> +static int query_formats(AVFilterContext *ctx)
> +{
> +    static const enum AVPixelFormat pix_fmts[] = {
> +        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P,
> +        AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV420P10,
> +        AV_PIX_FMT_NONE
> +    };
> +
> +    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
> +    if (!fmts_list)
> +        return AVERROR(ENOMEM);
> +    return ff_set_common_formats(ctx, fmts_list);
> +}
> +
> +static int config_input_ref(AVFilterLink *inlink)
> +{
> +    AVFilterContext *ctx  = inlink->dst;
> +    VMAFMotionContext *s = ctx->priv;
> +
> +    return ff_vmafmotion_init(&s->data, ctx->inputs[0]->w,
> +                              ctx->inputs[0]->h, ctx->inputs[0]->format);
> +}
> +
> +double ff_vmafmotion_uninit(VMAFMotionData *s)
> +{

Same.

> +    av_free(s->blur_data[0]);
> +    av_free(s->blur_data[1]);
> +    av_free(s->temp_data);
> +
> +    return s->nb_frames > 0 ? s->motion_sum / s->nb_frames : 0.0;
> +}
> +
> +static int filter_frame(AVFilterLink *inlink, AVFrame *ref)
> +{
> +    AVFilterContext *ctx = inlink->dst;
> +    do_vmafmotion(ctx, ref);
> +    return ff_filter_frame(ctx->outputs[0], ref);
> +}
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> +    VMAFMotionContext *s = ctx->priv;
> +    double avg_motion = ff_vmafmotion_uninit(&s->data);
> +
> +    if (s->data.nb_frames > 0) {
> +        av_log(ctx, AV_LOG_INFO, "VMAF Motion avg: %.3f\n", avg_motion);
> +    }
> +}
> +
> +static const AVFilterPad vmafmotion_inputs[] = {
> +    {
> +        .name         = "reference",
> +        .type         = AVMEDIA_TYPE_VIDEO,
> +        .filter_frame = filter_frame,
> +        .config_props = config_input_ref,
> +    },
> +    { NULL }
> +};
> +
> +static const AVFilterPad vmafmotion_outputs[] = {
> +    {
> +        .name          = "default",
> +        .type          = AVMEDIA_TYPE_VIDEO,
> +    },
> +    { NULL }
> +};
> +
> +AVFilter ff_vf_vmafmotion = {
> +    .name          = "vmafmotion",
> +    .description   = NULL_IF_CONFIG_SMALL("Calculate the VMAF Motion score."),
> +    .uninit        = uninit,
> +    .query_formats = query_formats,
> +    .priv_size     = sizeof(VMAFMotionContext),
> +    .priv_class    = &vmafmotion_class,
> +    .inputs        = vmafmotion_inputs,
> +    .outputs       = vmafmotion_outputs,
> +};
> diff --git a/libavfilter/vmaf_motion.h b/libavfilter/vmaf_motion.h
> new file mode 100644
> index 0000000..0c71182
> --- /dev/null
> +++ b/libavfilter/vmaf_motion.h
> @@ -0,0 +1,58 @@
> +/*
> + * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
> + * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVFILTER_VMAFMOTION_H
> +#define AVFILTER_VMAFMOTION_H
> +
> +#include <stddef.h>
> +#include <stdint.h>
> +#include "video.h"
> +
> +typedef struct VMAFMotionDSPContext {
> +    uint64_t (*sad)(const uint16_t *img1, const uint16_t *img2, int w, int h,
> +                    ptrdiff_t img1_stride, ptrdiff_t img2_stride);
> +    void (*convolution_x)(const uint16_t *filter, int filt_w, const uint16_t *src,
> +                          uint16_t *dst, int w, int h, ptrdiff_t src_stride,
> +                          ptrdiff_t dst_stride);
> +    void (*convolution_y)(const uint16_t *filter, int filt_w, const uint8_t *src,
> +                          uint16_t *dst, int w, int h, ptrdiff_t src_stride,
> +                          ptrdiff_t dst_stride);
> +} VMAFMotionDSPContext;
> +
> +void ff_vmafmotion_init_x86(VMAFMotionDSPContext *dsp);

This function doesn't exist.

Did you forget to git add files?

> +
> +typedef struct VMAFMotionData {
> +    uint16_t filter[5];
> +    int width;
> +    int height;
> +    ptrdiff_t stride;
> +    uint16_t *blur_data[2 /* cur, prev */];
> +    uint16_t *temp_data;
> +    double motion_sum;
> +    uint64_t nb_frames;
> +    VMAFMotionDSPContext vmafdsp;
> +} VMAFMotionData;
> +
> +int ff_vmafmotion_init(VMAFMotionData *data, int w, int h, enum AVPixelFormat fmt);
> +double ff_vmafmotion_process(VMAFMotionData *data, AVFrame *frame);
> +double ff_vmafmotion_uninit(VMAFMotionData *data);
> +
> +#endif /* AVFILTER_VMAFMOTION_H */
>
Ashish Singh Sept. 18, 2017, 2:01 p.m. UTC | #2
Hi,

On Mon, Sep 18, 2017 at 6:46 AM, James Almer <jamrial@gmail.com> wrote:

> On 9/15/2017 5:47 PM, Ashish Pratap Singh wrote:
> > From: Ashish Singh <ashk43712@gmail.com>
> >
> > Hi, this patch addresses the previous issues and changes it to a single
> > input filter.
> >
> > Signed-off-by: Ashish Singh <ashk43712@gmail.com>
> > ---
> >  Changelog                   |   1 +
> >  doc/filters.texi            |  14 ++
> >  libavfilter/Makefile        |   1 +
> >  libavfilter/allfilters.c    |   1 +
> >  libavfilter/vf_vmafmotion.c | 325 ++++++++++++++++++++++++++++++
> ++++++++++++++
> >  libavfilter/vmaf_motion.h   |  58 ++++++++
> >  6 files changed, 400 insertions(+)
> >  create mode 100644 libavfilter/vf_vmafmotion.c
> >  create mode 100644 libavfilter/vmaf_motion.h
> >
> > diff --git a/Changelog b/Changelog
> > index ea48e81..574f46e 100644
> > --- a/Changelog
> > +++ b/Changelog
> > @@ -48,6 +48,7 @@ version <next>:
> >  - convolve video filter
> >  - VP9 tile threading support
> >  - KMS screen grabber
> > +- vmafmotion video filter
> >
> >  version 3.3:
> >  - CrystalHD decoder moved to new decode API
> > diff --git a/doc/filters.texi b/doc/filters.texi
> > index 830de54..d996357 100644
> > --- a/doc/filters.texi
> > +++ b/doc/filters.texi
> > @@ -15570,6 +15570,20 @@ vignette='PI/4+random(1)*PI/50':eval=frame
> >
> >  @end itemize
> >
> > +@section vmafmotion
> > +
> > +Obtain the average vmaf motion score of a video.
> > +It is one of the component filters of VMAF.
> > +
> > +The obtained average motion score is printed through the logging system.
> > +
> > +In the below example the input file @file{ref.mpg} is being processed
> and score
> > +is computed.
> > +
> > +@example
> > +ffmpeg -i ref.mpg -lavfi vmafmotion -f null -
> > +@end example
> > +
> >  @section vstack
> >  Stack input videos vertically.
> >
> > diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> > index 8aa974e..4289ee0 100644
> > --- a/libavfilter/Makefile
> > +++ b/libavfilter/Makefile
> > @@ -330,6 +330,7 @@ OBJS-$(CONFIG_VFLIP_FILTER)                  +=
> vf_vflip.o
> >  OBJS-$(CONFIG_VIDSTABDETECT_FILTER)          += vidstabutils.o
> vf_vidstabdetect.o
> >  OBJS-$(CONFIG_VIDSTABTRANSFORM_FILTER)       += vidstabutils.o
> vf_vidstabtransform.o
> >  OBJS-$(CONFIG_VIGNETTE_FILTER)               += vf_vignette.o
> > +OBJS-$(CONFIG_VMAFMOTION_FILTER)             += vf_vmafmotion.o
> framesync.o
> >  OBJS-$(CONFIG_VSTACK_FILTER)                 += vf_stack.o framesync.o
> >  OBJS-$(CONFIG_W3FDIF_FILTER)                 += vf_w3fdif.o
> >  OBJS-$(CONFIG_WAVEFORM_FILTER)               += vf_waveform.o
> > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> > index 63e8672..8ec54be 100644
> > --- a/libavfilter/allfilters.c
> > +++ b/libavfilter/allfilters.c
> > @@ -341,6 +341,7 @@ static void register_all(void)
> >      REGISTER_FILTER(VIDSTABDETECT,  vidstabdetect,  vf);
> >      REGISTER_FILTER(VIDSTABTRANSFORM, vidstabtransform, vf);
> >      REGISTER_FILTER(VIGNETTE,       vignette,       vf);
> > +    REGISTER_FILTER(VMAFMOTION,     vmafmotion,     vf);
> >      REGISTER_FILTER(VSTACK,         vstack,         vf);
> >      REGISTER_FILTER(W3FDIF,         w3fdif,         vf);
> >      REGISTER_FILTER(WAVEFORM,       waveform,       vf);
> > diff --git a/libavfilter/vf_vmafmotion.c b/libavfilter/vf_vmafmotion.c
> > new file mode 100644
> > index 0000000..c31c37c
> > --- /dev/null
> > +++ b/libavfilter/vf_vmafmotion.c
> > @@ -0,0 +1,325 @@
> > +/*
> > + * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
> > + * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> > + */
> > +
> > +/**
> > + * @file
> > + * Calculate VMAF Motion score.
> > + */
> > +
> > +#include "libavutil/opt.h"
> > +#include "libavutil/pixdesc.h"
> > +#include "avfilter.h"
> > +#include "drawutils.h"
> > +#include "formats.h"
> > +#include "internal.h"
> > +#include "vmaf_motion.h"
> > +
> > +#define vmafmotion_options NULL
>
> This is unused.
>
Ok, I'll remove it.

>
> > +#define BIT_SHIFT 10
> > +
> > +static const float FILTER_5[5] = {
> > +    0.054488685,
> > +    0.244201342,
> > +    0.402619947,
> > +    0.244201342,
> > +    0.054488685
> > +};
> > +
> > +typedef struct VMAFMotionContext {
> > +    const AVClass *class;
> > +    VMAFMotionData data;
> > +} VMAFMotionContext;
> > +
> > +AVFILTER_DEFINE_CLASS(vmafmotion);
> > +
> > +static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2,
> int w,
> > +                          int h, ptrdiff_t _img1_stride, ptrdiff_t
> _img2_stride)
> > +{
> > +    ptrdiff_t img1_stride = _img1_stride / sizeof(*img1);
> > +    ptrdiff_t img2_stride = _img2_stride / sizeof(*img2);
> > +    uint64_t sum = 0;
> > +    int i, j;
> > +
> > +    for (i = 0; i < h; i++) {
> > +        for (j = 0; j < w; j++) {
> > +            sum += abs(img1[j] - img2[j]);
> > +        }
> > +        img1 += img1_stride;
> > +        img2 += img2_stride;
> > +    }
> > +
> > +    return sum;
> > +}
> > +
> > +static void convolution_x(const uint16_t *filter, int filt_w, const
> uint16_t *src,
> > +                          uint16_t *dst, int w, int h, ptrdiff_t
> _src_stride,
> > +                          ptrdiff_t _dst_stride)
> > +{
> > +    ptrdiff_t src_stride = _src_stride / sizeof(*src);
> > +    ptrdiff_t dst_stride = _dst_stride / sizeof(*dst);
> > +    int radius = filt_w / 2;
> > +    int borders_left = radius;
> > +    int borders_right = w - (filt_w - radius);
> > +    int i, j, k;
> > +    int sum = 0;
> > +
> > +    for (i = 0; i < h; i++) {
> > +        for (j = 0; j < borders_left; j++) {
> > +            sum = 0;
> > +            for (k = 0; k < filt_w; k++) {
> > +                int j_tap = FFABS(j - radius + k);
> > +                if (j_tap >= w) {
> > +                    j_tap = w - (j_tap - w + 1);
> > +                }
> > +                sum += filter[k] * src[i * src_stride + j_tap];
> > +            }
> > +            dst[i * dst_stride + j] = sum >> BIT_SHIFT;
> > +        }
> > +
> > +        for (j = borders_left; j < borders_right; j++) {
> > +            int sum = 0;
> > +            for (k = 0; k < filt_w; k++) {
> > +                sum += filter[k] * src[i * src_stride + j - radius + k];
> > +            }
> > +            dst[i * dst_stride + j] = sum >> BIT_SHIFT;
> > +        }
> > +
> > +        for (j = borders_right; j < w; j++) {
> > +            sum = 0;
> > +            for (k = 0; k < filt_w; k++) {
> > +                int j_tap = FFABS(j - radius + k);
> > +                if (j_tap >= w) {
> > +                    j_tap = w - (j_tap - w + 1);
> > +                }
> > +                sum += filter[k] * src[i * src_stride + j_tap];
> > +            }
> > +            dst[i * dst_stride + j] = sum >> BIT_SHIFT;
> > +        }
> > +    }
> > +}
> > +
> > +#define conv_y_fn(type, bits) \
> > +    static void convolution_y_##bits##bit(const uint16_t *filter, int
> filt_w, \
> > +                                          const uint8_t *_src, uint16_t
> *dst, \
> > +                                          int w, int h, ptrdiff_t
> _src_stride, \
> > +                                          ptrdiff_t _dst_stride) \
> > +{ \
> > +    const type *src = (const type *) _src; \
> > +    ptrdiff_t src_stride = _src_stride / sizeof(*src); \
> > +    ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); \
> > +    int radius = filt_w / 2; \
> > +    int borders_top = radius; \
> > +    int borders_bottom = h - (filt_w - radius); \
> > +    int i, j, k; \
> > +    int sum = 0; \
> > +    \
> > +    for (i = 0; i < borders_top; i++) { \
> > +        for (j = 0; j < w; j++) { \
> > +            sum = 0; \
> > +            for (k = 0; k < filt_w; k++) { \
> > +                int i_tap = FFABS(i - radius + k); \
> > +                if (i_tap >= h) { \
> > +                    i_tap = h - (i_tap - h + 1); \
> > +                } \
> > +                sum += filter[k] * src[i_tap * src_stride + j]; \
> > +            } \
> > +            dst[i * dst_stride + j] = sum >> BIT_SHIFT; \
> > +        } \
> > +    } \
> > +    for (i = borders_top; i < borders_bottom; i++) { \
> > +        for (j = 0; j < w; j++) { \
> > +            sum = 0; \
> > +            for (k = 0; k < filt_w; k++) { \
> > +                sum += filter[k] * src[(i - radius + k) * src_stride +
> j]; \
> > +            } \
> > +            dst[i * dst_stride + j] = sum >> BIT_SHIFT; \
> > +        } \
> > +    } \
> > +    for (i = borders_bottom; i < h; i++) { \
> > +        for (j = 0; j < w; j++) { \
> > +            sum = 0; \
> > +            for (k = 0; k < filt_w; k++) { \
> > +                int i_tap = FFABS(i - radius + k); \
> > +                if (i_tap >= h) { \
> > +                    i_tap = h - (i_tap - h + 1); \
> > +                } \
> > +                sum += filter[k] * src[i_tap * src_stride + j]; \
> > +            } \
> > +            dst[i * dst_stride + j] = sum >> BIT_SHIFT; \
> > +        } \
> > +    } \
> > +}
> > +
> > +conv_y_fn(uint8_t, 8);
> > +conv_y_fn(uint16_t, 10);
> > +
> > +static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp) {
> > +    dsp->convolution_x = convolution_x;
> > +    dsp->convolution_y = bpp == 10 ? convolution_y_10bit :
> convolution_y_8bit;
> > +    dsp->sad = image_sad;
>
> You don't seem to use this function pointer anywhere.
>
> > +}
> > +
> > +double ff_vmafmotion_process(VMAFMotionData *s, AVFrame *ref)
>
> Why is this not static? It's not used outside this file.
>
> this (and similar) functions will be used later by the vmaf filter.


> > +{
> > +    double score;
> > +
> > +    s->vmafdsp.convolution_y(s->filter, 5, ref->data[0], s->temp_data,
> > +                             s->width, s->height, ref->linesize[0],
> s->stride);
> > +    s->vmafdsp.convolution_x(s->filter, 5, s->temp_data,
> s->blur_data[0],
> > +                             s->width, s->height, s->stride, s->stride);
> > +
> > +    if (!s->nb_frames) {
> > +        score = 0.0;
> > +    } else {
> > +        uint64_t sad = image_sad(s->blur_data[1], s->blur_data[0],
>
> Did you mean to use the s->vmafdsp.sad pointer here?
>
yeah, later.

>
> > +                                 s->width, s->height, s->stride,
> s->stride);
> > +        score = (double) (sad * 1.0 / (s->width * s->height));
> > +    }
> > +
> > +    FFSWAP(uint16_t *, s->blur_data[0], s->blur_data[1]);
> > +    s->nb_frames++;
> > +    s->motion_sum += score;
> > +
> > +    return score;
> > +}
> > +
> > +static void set_meta(AVDictionary **metadata, const char *key, float d)
> > +{
> > +    char value[128];
> > +    snprintf(value, sizeof(value), "%0.2f", d);
> > +    av_dict_set(metadata, key, value, 0);
> > +}
> > +
> > +static void do_vmafmotion(AVFilterContext *ctx, AVFrame *ref)
> > +{
> > +    VMAFMotionContext *s = ctx->priv;
> > +
> > +    double score;
> > +
> > +    score = ff_vmafmotion_process(&s->data, ref);
> > +    set_meta(&ref->metadata, "lavfi.vmafmotion.score", score);
> > +}
> > +
> > +
> > +int ff_vmafmotion_init(VMAFMotionData *s,
> > +                       int w, int h, enum AVPixelFormat fmt)
> > +{
>
> Same. Why not static?
>
> > +    size_t data_sz;
> > +    int i;
> > +    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
> > +
> > +    s->width = w;
> > +    s->height = h;
> > +    s->stride = FFALIGN(w * sizeof(uint16_t), 32);
> > +
> > +    data_sz = (size_t) s->stride * h;
> > +    if (!(s->blur_data[0] = av_malloc(data_sz)) ||
> > +        !(s->blur_data[1] = av_malloc(data_sz)) ||
> > +        !(s->temp_data    = av_malloc(data_sz))) {
> > +        return AVERROR(ENOMEM);
> > +    }
> > +
> > +    for (i = 0; i < 5; i++) {
> > +        s->filter[i] = lrint(FILTER_5[i] * (1 << BIT_SHIFT));
> > +    }
> > +
> > +    vmafmotiondsp_init(&s->vmafdsp, desc->comp[0].depth);
> > +
> > +    return 0;
> > +}
> > +
> > +static int query_formats(AVFilterContext *ctx)
> > +{
> > +    static const enum AVPixelFormat pix_fmts[] = {
> > +        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P,
> > +        AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10,
> AV_PIX_FMT_YUV420P10,
> > +        AV_PIX_FMT_NONE
> > +    };
> > +
> > +    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
> > +    if (!fmts_list)
> > +        return AVERROR(ENOMEM);
> > +    return ff_set_common_formats(ctx, fmts_list);
> > +}
> > +
> > +static int config_input_ref(AVFilterLink *inlink)
> > +{
> > +    AVFilterContext *ctx  = inlink->dst;
> > +    VMAFMotionContext *s = ctx->priv;
> > +
> > +    return ff_vmafmotion_init(&s->data, ctx->inputs[0]->w,
> > +                              ctx->inputs[0]->h,
> ctx->inputs[0]->format);
> > +}
> > +
> > +double ff_vmafmotion_uninit(VMAFMotionData *s)
> > +{
>
> Same.
>
> > +    av_free(s->blur_data[0]);
> > +    av_free(s->blur_data[1]);
> > +    av_free(s->temp_data);
> > +
> > +    return s->nb_frames > 0 ? s->motion_sum / s->nb_frames : 0.0;
> > +}
> > +
> > +static int filter_frame(AVFilterLink *inlink, AVFrame *ref)
> > +{
> > +    AVFilterContext *ctx = inlink->dst;
> > +    do_vmafmotion(ctx, ref);
> > +    return ff_filter_frame(ctx->outputs[0], ref);
> > +}
> > +
> > +static av_cold void uninit(AVFilterContext *ctx)
> > +{
> > +    VMAFMotionContext *s = ctx->priv;
> > +    double avg_motion = ff_vmafmotion_uninit(&s->data);
> > +
> > +    if (s->data.nb_frames > 0) {
> > +        av_log(ctx, AV_LOG_INFO, "VMAF Motion avg: %.3f\n", avg_motion);
> > +    }
> > +}
> > +
> > +static const AVFilterPad vmafmotion_inputs[] = {
> > +    {
> > +        .name         = "reference",
> > +        .type         = AVMEDIA_TYPE_VIDEO,
> > +        .filter_frame = filter_frame,
> > +        .config_props = config_input_ref,
> > +    },
> > +    { NULL }
> > +};
> > +
> > +static const AVFilterPad vmafmotion_outputs[] = {
> > +    {
> > +        .name          = "default",
> > +        .type          = AVMEDIA_TYPE_VIDEO,
> > +    },
> > +    { NULL }
> > +};
> > +
> > +AVFilter ff_vf_vmafmotion = {
> > +    .name          = "vmafmotion",
> > +    .description   = NULL_IF_CONFIG_SMALL("Calculate the VMAF Motion
> score."),
> > +    .uninit        = uninit,
> > +    .query_formats = query_formats,
> > +    .priv_size     = sizeof(VMAFMotionContext),
> > +    .priv_class    = &vmafmotion_class,
> > +    .inputs        = vmafmotion_inputs,
> > +    .outputs       = vmafmotion_outputs,
> > +};
> > diff --git a/libavfilter/vmaf_motion.h b/libavfilter/vmaf_motion.h
> > new file mode 100644
> > index 0000000..0c71182
> > --- /dev/null
> > +++ b/libavfilter/vmaf_motion.h
> > @@ -0,0 +1,58 @@
> > +/*
> > + * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
> > + * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> > + */
> > +
> > +#ifndef AVFILTER_VMAFMOTION_H
> > +#define AVFILTER_VMAFMOTION_H
> > +
> > +#include <stddef.h>
> > +#include <stdint.h>
> > +#include "video.h"
> > +
> > +typedef struct VMAFMotionDSPContext {
> > +    uint64_t (*sad)(const uint16_t *img1, const uint16_t *img2, int w,
> int h,
> > +                    ptrdiff_t img1_stride, ptrdiff_t img2_stride);
> > +    void (*convolution_x)(const uint16_t *filter, int filt_w, const
> uint16_t *src,
> > +                          uint16_t *dst, int w, int h, ptrdiff_t
> src_stride,
> > +                          ptrdiff_t dst_stride);
> > +    void (*convolution_y)(const uint16_t *filter, int filt_w, const
> uint8_t *src,
> > +                          uint16_t *dst, int w, int h, ptrdiff_t
> src_stride,
> > +                          ptrdiff_t dst_stride);
> > +} VMAFMotionDSPContext;
> > +
> > +void ff_vmafmotion_init_x86(VMAFMotionDSPContext *dsp);
>
> This function doesn't exist.
>
> Did you forget to git add files?
>

No, I am currently working on the SIMD for this filter, so it will be used
later.


>
> > +
> > +typedef struct VMAFMotionData {
> > +    uint16_t filter[5];
> > +    int width;
> > +    int height;
> > +    ptrdiff_t stride;
> > +    uint16_t *blur_data[2 /* cur, prev */];
> > +    uint16_t *temp_data;
> > +    double motion_sum;
> > +    uint64_t nb_frames;
> > +    VMAFMotionDSPContext vmafdsp;
> > +} VMAFMotionData;
> > +
> > +int ff_vmafmotion_init(VMAFMotionData *data, int w, int h, enum
> AVPixelFormat fmt);
> > +double ff_vmafmotion_process(VMAFMotionData *data, AVFrame *frame);
> > +double ff_vmafmotion_uninit(VMAFMotionData *data);
> > +
> > +#endif /* AVFILTER_VMAFMOTION_H */
> >
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>

Thanks.
Ronald S. Bultje Sept. 29, 2017, 1:39 p.m. UTC | #3
Hi,

On Fri, Sep 15, 2017 at 4:47 PM, Ashish Pratap Singh <ashk43712@gmail.com>
wrote:

> From: Ashish Singh <ashk43712@gmail.com>
>
> Hi, this patch addresses the previous issues and changes it to a single
> input filter.
>
> Signed-off-by: Ashish Singh <ashk43712@gmail.com>
> ---
>  Changelog                   |   1 +
>  doc/filters.texi            |  14 ++
>  libavfilter/Makefile        |   1 +
>  libavfilter/allfilters.c    |   1 +
>  libavfilter/vf_vmafmotion.c | 325 ++++++++++++++++++++++++++++++
> ++++++++++++++
>  libavfilter/vmaf_motion.h   |  58 ++++++++
>  6 files changed, 400 insertions(+)
>  create mode 100644 libavfilter/vf_vmafmotion.c
>  create mode 100644 libavfilter/vmaf_motion.h


I made tiny changes to this patch (see attached patch). Basically, it adds
an option (similar to psnr/ssim filters) to log the per-frame scores to a
file, which can be convenient for debugging the per-frame scores, or using
them externally using CLI. Secondly, it adjusts the filter/sad so that the
calculations use a slightly higher internal depth for the fixed-point
integer calculations. This makes the score slightly closer to the
floating-point equivalent in the reference code. Lastly, it normalizes the
10-bit score back to 8-bit, which is what the reference code does also. It
also makes use of the vmafdsp.sad() function pointer instead of calling
image_sad() directly, so SIMD will be used when added later on.

This should address all review comments (the rest were basically things
that will be used in subsequent patches, so whether we address it or not
doesn't really affect the final product), so if there's no further
comments, I'll squash the two patches together and push it.

Ronald
Ronald S. Bultje Sept. 30, 2017, 3:52 p.m. UTC | #4
Hi,

On Fri, Sep 29, 2017 at 9:39 AM, Ronald S. Bultje <rsbultje@gmail.com>
wrote:

> Hi,
>
> On Fri, Sep 15, 2017 at 4:47 PM, Ashish Pratap Singh <ashk43712@gmail.com>
> wrote:
>
>> From: Ashish Singh <ashk43712@gmail.com>
>>
>> Hi, this patch addresses the previous issues and changes it to a single
>> input filter.
>>
>> Signed-off-by: Ashish Singh <ashk43712@gmail.com>
>> ---
>>  Changelog                   |   1 +
>>  doc/filters.texi            |  14 ++
>>  libavfilter/Makefile        |   1 +
>>  libavfilter/allfilters.c    |   1 +
>>  libavfilter/vf_vmafmotion.c | 325 ++++++++++++++++++++++++++++++
>> ++++++++++++++
>>  libavfilter/vmaf_motion.h   |  58 ++++++++
>>  6 files changed, 400 insertions(+)
>>  create mode 100644 libavfilter/vf_vmafmotion.c
>>  create mode 100644 libavfilter/vmaf_motion.h
>
>
> I made tiny changes to this patch (see attached patch). Basically, it adds
> an option (similar to psnr/ssim filters) to log the per-frame scores to a
> file, which can be convenient for debugging the per-frame scores, or using
> them externally using CLI. Secondly, it adjusts the filter/sad so that the
> calculations use a slightly higher internal depth for the fixed-point
> integer calculations. This makes the score slightly closer to the
> floating-point equivalent in the reference code. Lastly, it normalizes the
> 10-bit score back to 8-bit, which is what the reference code does also. It
> also makes use of the vmafdsp.sad() function pointer instead of calling
> image_sad() directly, so SIMD will be used when added later on.
>
> This should address all review comments (the rest were basically things
> that will be used in subsequent patches, so whether we address it or not
> doesn't really affect the final product), so if there's no further
> comments, I'll squash the two patches together and push it.
>

And ... pushed.

Ronald
Carl Eugen Hoyos Sept. 30, 2017, 5:31 p.m. UTC | #5
Hi!

2017-09-15 22:47 GMT+02:00 Ashish Pratap Singh <ashk43712@gmail.com>:

> +static int query_formats(AVFilterContext *ctx)
> +{
> +    static const enum AVPixelFormat pix_fmts[] = {
> +        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P,
> AV_PIX_FMT_YUV420P,
> +        AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10,
> AV_PIX_FMT_YUV420P10,

Is the algorithm only defined for these formats and bit-depth
or are there just missing features?
Gray and gray10 come to mind...

Thank you, Carl Eugen
Ronald S. Bultje Sept. 30, 2017, 5:47 p.m. UTC | #6
Hi Carl,

On Sat, Sep 30, 2017 at 1:31 PM, Carl Eugen Hoyos <ceffmpeg@gmail.com>
wrote:

> Hi!
>
> 2017-09-15 22:47 GMT+02:00 Ashish Pratap Singh <ashk43712@gmail.com>:
>
> > +static int query_formats(AVFilterContext *ctx)
> > +{
> > +    static const enum AVPixelFormat pix_fmts[] = {
> > +        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P,
> > AV_PIX_FMT_YUV420P,
> > +        AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10,
> > AV_PIX_FMT_YUV420P10,
>
> Is the algorithm only defined for these formats and bit-depth
> or are there just missing features?
> Gray and gray10 come to mind...
>

Great question! I _believe_ that vmaf overall is luma-only, so it should be
entirely independent of chroma. As such, gray and gray10 could probably be
added to the list and it should just work.

Ronald
Carl Eugen Hoyos Sept. 30, 2017, 6:19 p.m. UTC | #7
2017-09-30 19:47 GMT+02:00 Ronald S. Bultje <rsbultje@gmail.com>:
> Hi Carl,
>
> On Sat, Sep 30, 2017 at 1:31 PM, Carl Eugen Hoyos <ceffmpeg@gmail.com>
> wrote:
>
>> Hi!
>>
>> 2017-09-15 22:47 GMT+02:00 Ashish Pratap Singh <ashk43712@gmail.com>:
>>
>> > +static int query_formats(AVFilterContext *ctx)
>> > +{
>> > +    static const enum AVPixelFormat pix_fmts[] = {
>> > +        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P,
>> > AV_PIX_FMT_YUV420P,
>> > +        AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10,
>> > AV_PIX_FMT_YUV420P10,
>>
>> Is the algorithm only defined for these formats and bit-depth
>> or are there just missing features?
>> Gray and gray10 come to mind...
>>
>
> Great question! I _believe_ that vmaf overall is luma-only, so it should be
> entirely independent of chroma.

Then imo, above function is just wrong, it should check for
non-rgb or similar (think of YUVA444 and friends).

Carl Eugen
Ronald S. Bultje Sept. 30, 2017, 6:30 p.m. UTC | #8
Hi Carl,

On Sat, Sep 30, 2017 at 2:19 PM, Carl Eugen Hoyos <ceffmpeg@gmail.com>
wrote:

> 2017-09-30 19:47 GMT+02:00 Ronald S. Bultje <rsbultje@gmail.com>:
> > Hi Carl,
> >
> > On Sat, Sep 30, 2017 at 1:31 PM, Carl Eugen Hoyos <ceffmpeg@gmail.com>
> > wrote:
> >
> >> Hi!
> >>
> >> 2017-09-15 22:47 GMT+02:00 Ashish Pratap Singh <ashk43712@gmail.com>:
> >>
> >> > +static int query_formats(AVFilterContext *ctx)
> >> > +{
> >> > +    static const enum AVPixelFormat pix_fmts[] = {
> >> > +        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P,
> >> > AV_PIX_FMT_YUV420P,
> >> > +        AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10,
> >> > AV_PIX_FMT_YUV420P10,
> >>
> >> Is the algorithm only defined for these formats and bit-depth
> >> or are there just missing features?
> >> Gray and gray10 come to mind...
> >>
> >
> > Great question! I _believe_ that vmaf overall is luma-only, so it should
> be
> > entirely independent of chroma.
>
> Then imo, above function is just wrong, it should check for
> non-rgb or similar (think of YUVA444 and friends).
>

I don't think I'm familiar enough with lavfi to send a patch, can you send
one? What I've asked Ashish to do (and what he's done here) is simply to
reproduce as closely as possible what Netflix' code does, and they only
support 420, 422 and 444 for 8 and 10 bits/component. I'm happy to support
more if I know how to.

Ronald
diff mbox

Patch

diff --git a/Changelog b/Changelog
index ea48e81..574f46e 100644
--- a/Changelog
+++ b/Changelog
@@ -48,6 +48,7 @@  version <next>:
 - convolve video filter
 - VP9 tile threading support
 - KMS screen grabber
+- vmafmotion video filter
 
 version 3.3:
 - CrystalHD decoder moved to new decode API
diff --git a/doc/filters.texi b/doc/filters.texi
index 830de54..d996357 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -15570,6 +15570,20 @@  vignette='PI/4+random(1)*PI/50':eval=frame
 
 @end itemize
 
+@section vmafmotion
+
+Obtain the average vmaf motion score of a video.
+It is one of the component filters of VMAF.
+
+The obtained average motion score is printed through the logging system.
+
+In the below example the input file @file{ref.mpg} is being processed and score
+is computed.
+
+@example
+ffmpeg -i ref.mpg -lavfi vmafmotion -f null -
+@end example
+
 @section vstack
 Stack input videos vertically.
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 8aa974e..4289ee0 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -330,6 +330,7 @@  OBJS-$(CONFIG_VFLIP_FILTER)                  += vf_vflip.o
 OBJS-$(CONFIG_VIDSTABDETECT_FILTER)          += vidstabutils.o vf_vidstabdetect.o
 OBJS-$(CONFIG_VIDSTABTRANSFORM_FILTER)       += vidstabutils.o vf_vidstabtransform.o
 OBJS-$(CONFIG_VIGNETTE_FILTER)               += vf_vignette.o
+OBJS-$(CONFIG_VMAFMOTION_FILTER)             += vf_vmafmotion.o framesync.o
 OBJS-$(CONFIG_VSTACK_FILTER)                 += vf_stack.o framesync.o
 OBJS-$(CONFIG_W3FDIF_FILTER)                 += vf_w3fdif.o
 OBJS-$(CONFIG_WAVEFORM_FILTER)               += vf_waveform.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 63e8672..8ec54be 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -341,6 +341,7 @@  static void register_all(void)
     REGISTER_FILTER(VIDSTABDETECT,  vidstabdetect,  vf);
     REGISTER_FILTER(VIDSTABTRANSFORM, vidstabtransform, vf);
     REGISTER_FILTER(VIGNETTE,       vignette,       vf);
+    REGISTER_FILTER(VMAFMOTION,     vmafmotion,     vf);
     REGISTER_FILTER(VSTACK,         vstack,         vf);
     REGISTER_FILTER(W3FDIF,         w3fdif,         vf);
     REGISTER_FILTER(WAVEFORM,       waveform,       vf);
diff --git a/libavfilter/vf_vmafmotion.c b/libavfilter/vf_vmafmotion.c
new file mode 100644
index 0000000..c31c37c
--- /dev/null
+++ b/libavfilter/vf_vmafmotion.c
@@ -0,0 +1,325 @@ 
+/*
+ * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
+ * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Calculate VMAF Motion score.
+ */
+
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "avfilter.h"
+#include "drawutils.h"
+#include "formats.h"
+#include "internal.h"
+#include "vmaf_motion.h"
+
+#define vmafmotion_options NULL
+#define BIT_SHIFT 10
+
+static const float FILTER_5[5] = {
+    0.054488685,
+    0.244201342,
+    0.402619947,
+    0.244201342,
+    0.054488685
+};
+
+typedef struct VMAFMotionContext {
+    const AVClass *class;
+    VMAFMotionData data;
+} VMAFMotionContext;
+
+AVFILTER_DEFINE_CLASS(vmafmotion);
+
+static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w,
+                          int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride)
+{
+    ptrdiff_t img1_stride = _img1_stride / sizeof(*img1);
+    ptrdiff_t img2_stride = _img2_stride / sizeof(*img2);
+    uint64_t sum = 0;
+    int i, j;
+
+    for (i = 0; i < h; i++) {
+        for (j = 0; j < w; j++) {
+            sum += abs(img1[j] - img2[j]);
+        }
+        img1 += img1_stride;
+        img2 += img2_stride;
+    }
+
+    return sum;
+}
+
+static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src,
+                          uint16_t *dst, int w, int h, ptrdiff_t _src_stride,
+                          ptrdiff_t _dst_stride)
+{
+    ptrdiff_t src_stride = _src_stride / sizeof(*src);
+    ptrdiff_t dst_stride = _dst_stride / sizeof(*dst);
+    int radius = filt_w / 2;
+    int borders_left = radius;
+    int borders_right = w - (filt_w - radius);
+    int i, j, k;
+    int sum = 0;
+
+    for (i = 0; i < h; i++) {
+        for (j = 0; j < borders_left; j++) {
+            sum = 0;
+            for (k = 0; k < filt_w; k++) {
+                int j_tap = FFABS(j - radius + k);
+                if (j_tap >= w) {
+                    j_tap = w - (j_tap - w + 1);
+                }
+                sum += filter[k] * src[i * src_stride + j_tap];
+            }
+            dst[i * dst_stride + j] = sum >> BIT_SHIFT;
+        }
+
+        for (j = borders_left; j < borders_right; j++) {
+            int sum = 0;
+            for (k = 0; k < filt_w; k++) {
+                sum += filter[k] * src[i * src_stride + j - radius + k];
+            }
+            dst[i * dst_stride + j] = sum >> BIT_SHIFT;
+        }
+
+        for (j = borders_right; j < w; j++) {
+            sum = 0;
+            for (k = 0; k < filt_w; k++) {
+                int j_tap = FFABS(j - radius + k);
+                if (j_tap >= w) {
+                    j_tap = w - (j_tap - w + 1);
+                }
+                sum += filter[k] * src[i * src_stride + j_tap];
+            }
+            dst[i * dst_stride + j] = sum >> BIT_SHIFT;
+        }
+    }
+}
+
+#define conv_y_fn(type, bits) \
+    static void convolution_y_##bits##bit(const uint16_t *filter, int filt_w, \
+                                          const uint8_t *_src, uint16_t *dst, \
+                                          int w, int h, ptrdiff_t _src_stride, \
+                                          ptrdiff_t _dst_stride) \
+{ \
+    const type *src = (const type *) _src; \
+    ptrdiff_t src_stride = _src_stride / sizeof(*src); \
+    ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); \
+    int radius = filt_w / 2; \
+    int borders_top = radius; \
+    int borders_bottom = h - (filt_w - radius); \
+    int i, j, k; \
+    int sum = 0; \
+    \
+    for (i = 0; i < borders_top; i++) { \
+        for (j = 0; j < w; j++) { \
+            sum = 0; \
+            for (k = 0; k < filt_w; k++) { \
+                int i_tap = FFABS(i - radius + k); \
+                if (i_tap >= h) { \
+                    i_tap = h - (i_tap - h + 1); \
+                } \
+                sum += filter[k] * src[i_tap * src_stride + j]; \
+            } \
+            dst[i * dst_stride + j] = sum >> BIT_SHIFT; \
+        } \
+    } \
+    for (i = borders_top; i < borders_bottom; i++) { \
+        for (j = 0; j < w; j++) { \
+            sum = 0; \
+            for (k = 0; k < filt_w; k++) { \
+                sum += filter[k] * src[(i - radius + k) * src_stride + j]; \
+            } \
+            dst[i * dst_stride + j] = sum >> BIT_SHIFT; \
+        } \
+    } \
+    for (i = borders_bottom; i < h; i++) { \
+        for (j = 0; j < w; j++) { \
+            sum = 0; \
+            for (k = 0; k < filt_w; k++) { \
+                int i_tap = FFABS(i - radius + k); \
+                if (i_tap >= h) { \
+                    i_tap = h - (i_tap - h + 1); \
+                } \
+                sum += filter[k] * src[i_tap * src_stride + j]; \
+            } \
+            dst[i * dst_stride + j] = sum >> BIT_SHIFT; \
+        } \
+    } \
+}
+
+conv_y_fn(uint8_t, 8);
+conv_y_fn(uint16_t, 10);
+
+static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp) {
+    dsp->convolution_x = convolution_x;
+    dsp->convolution_y = bpp == 10 ? convolution_y_10bit : convolution_y_8bit;
+    dsp->sad = image_sad;
+}
+
+double ff_vmafmotion_process(VMAFMotionData *s, AVFrame *ref)
+{
+    double score;
+
+    s->vmafdsp.convolution_y(s->filter, 5, ref->data[0], s->temp_data,
+                             s->width, s->height, ref->linesize[0], s->stride);
+    s->vmafdsp.convolution_x(s->filter, 5, s->temp_data, s->blur_data[0],
+                             s->width, s->height, s->stride, s->stride);
+
+    if (!s->nb_frames) {
+        score = 0.0;
+    } else {
+        uint64_t sad = image_sad(s->blur_data[1], s->blur_data[0],
+                                 s->width, s->height, s->stride, s->stride);
+        score = (double) (sad * 1.0 / (s->width * s->height));
+    }
+
+    FFSWAP(uint16_t *, s->blur_data[0], s->blur_data[1]);
+    s->nb_frames++;
+    s->motion_sum += score;
+
+    return score;
+}
+
+static void set_meta(AVDictionary **metadata, const char *key, float d)
+{
+    char value[128];
+    snprintf(value, sizeof(value), "%0.2f", d);
+    av_dict_set(metadata, key, value, 0);
+}
+
+static void do_vmafmotion(AVFilterContext *ctx, AVFrame *ref)
+{
+    VMAFMotionContext *s = ctx->priv;
+
+    double score;
+
+    score = ff_vmafmotion_process(&s->data, ref);
+    set_meta(&ref->metadata, "lavfi.vmafmotion.score", score);
+}
+
+
+int ff_vmafmotion_init(VMAFMotionData *s,
+                       int w, int h, enum AVPixelFormat fmt)
+{
+    size_t data_sz;
+    int i;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
+
+    s->width = w;
+    s->height = h;
+    s->stride = FFALIGN(w * sizeof(uint16_t), 32);
+
+    data_sz = (size_t) s->stride * h;
+    if (!(s->blur_data[0] = av_malloc(data_sz)) ||
+        !(s->blur_data[1] = av_malloc(data_sz)) ||
+        !(s->temp_data    = av_malloc(data_sz))) {
+        return AVERROR(ENOMEM);
+    }
+
+    for (i = 0; i < 5; i++) {
+        s->filter[i] = lrint(FILTER_5[i] * (1 << BIT_SHIFT));
+    }
+
+    vmafmotiondsp_init(&s->vmafdsp, desc->comp[0].depth);
+
+    return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV420P10,
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static int config_input_ref(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx  = inlink->dst;
+    VMAFMotionContext *s = ctx->priv;
+
+    return ff_vmafmotion_init(&s->data, ctx->inputs[0]->w,
+                              ctx->inputs[0]->h, ctx->inputs[0]->format);
+}
+
+double ff_vmafmotion_uninit(VMAFMotionData *s)
+{
+    av_free(s->blur_data[0]);
+    av_free(s->blur_data[1]);
+    av_free(s->temp_data);
+
+    return s->nb_frames > 0 ? s->motion_sum / s->nb_frames : 0.0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *ref)
+{
+    AVFilterContext *ctx = inlink->dst;
+    do_vmafmotion(ctx, ref);
+    return ff_filter_frame(ctx->outputs[0], ref);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    VMAFMotionContext *s = ctx->priv;
+    double avg_motion = ff_vmafmotion_uninit(&s->data);
+
+    if (s->data.nb_frames > 0) {
+        av_log(ctx, AV_LOG_INFO, "VMAF Motion avg: %.3f\n", avg_motion);
+    }
+}
+
+static const AVFilterPad vmafmotion_inputs[] = {
+    {
+        .name         = "reference",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+        .config_props = config_input_ref,
+    },
+    { NULL }
+};
+
+static const AVFilterPad vmafmotion_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_vmafmotion = {
+    .name          = "vmafmotion",
+    .description   = NULL_IF_CONFIG_SMALL("Calculate the VMAF Motion score."),
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .priv_size     = sizeof(VMAFMotionContext),
+    .priv_class    = &vmafmotion_class,
+    .inputs        = vmafmotion_inputs,
+    .outputs       = vmafmotion_outputs,
+};
diff --git a/libavfilter/vmaf_motion.h b/libavfilter/vmaf_motion.h
new file mode 100644
index 0000000..0c71182
--- /dev/null
+++ b/libavfilter/vmaf_motion.h
@@ -0,0 +1,58 @@ 
+/*
+ * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
+ * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_VMAFMOTION_H
+#define AVFILTER_VMAFMOTION_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include "video.h"
+
+typedef struct VMAFMotionDSPContext {
+    uint64_t (*sad)(const uint16_t *img1, const uint16_t *img2, int w, int h,
+                    ptrdiff_t img1_stride, ptrdiff_t img2_stride);
+    void (*convolution_x)(const uint16_t *filter, int filt_w, const uint16_t *src,
+                          uint16_t *dst, int w, int h, ptrdiff_t src_stride,
+                          ptrdiff_t dst_stride);
+    void (*convolution_y)(const uint16_t *filter, int filt_w, const uint8_t *src,
+                          uint16_t *dst, int w, int h, ptrdiff_t src_stride,
+                          ptrdiff_t dst_stride);
+} VMAFMotionDSPContext;
+
+void ff_vmafmotion_init_x86(VMAFMotionDSPContext *dsp);
+
+typedef struct VMAFMotionData {
+    uint16_t filter[5];
+    int width;
+    int height;
+    ptrdiff_t stride;
+    uint16_t *blur_data[2 /* cur, prev */];
+    uint16_t *temp_data;
+    double motion_sum;
+    uint64_t nb_frames;
+    VMAFMotionDSPContext vmafdsp;
+} VMAFMotionData;
+
+int ff_vmafmotion_init(VMAFMotionData *data, int w, int h, enum AVPixelFormat fmt);
+double ff_vmafmotion_process(VMAFMotionData *data, AVFrame *frame);
+double ff_vmafmotion_uninit(VMAFMotionData *data);
+
+#endif /* AVFILTER_VMAFMOTION_H */