[FFmpeg-devel] avfilter: add vmafmotion filter

Message ID	1501676935-17325-1-git-send-email-ashk43712@gmail.com
State	Superseded
Headers	show Delivered-To: ffmpegpatchwork@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; From: Ashish Pratap Singh <ashk43712@gmail.com> To: ffmpeg-devel@ffmpeg.org Date: Wed, 2 Aug 2017 17:58:55 +0530 Message-Id: <1501676935-17325-1-git-send-email-ashk43712@gmail.com> Subject: [FFmpeg-devel] [PATCH] avfilter: add vmafmotion filter Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Cc: Ashish Singh <ashk43712@gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

diff --git a/doc/filters.texi b/doc/filters.texi index 2324b96..ee1e884 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -15137,6 +15137,25 @@ vignette='PI/4+random(1)*PI/50':eval=frame @end itemize +@section vmafmotion + +Obtain the average vmaf motion score between two input videos. +It is one of the component filters of VMAF. + +This filter takes two input videos. + +Both input videos must have the same resolution and pixel format. +Also it assumes that both inputs have the same number of frames. + +The obtained average motion score is printed through the logging system. + +In the below example the input file @file{main.mpg} being processed is compared +with the reference file @file{ref.mpg}. + +@example +ffmpeg -i main.mpg -i ref.mpg -lavfi vmafmotion -f null - +@end example + @section vstack Stack input videos vertically. diff --git a/libavfilter/Makefile b/libavfilter/Makefile index ee16361..771e434 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -322,6 +322,7 @@ OBJS-$(CONFIG_VFLIP_FILTER) += vf_vflip.o OBJS-$(CONFIG_VIDSTABDETECT_FILTER) += vidstabutils.o vf_vidstabdetect.o OBJS-$(CONFIG_VIDSTABTRANSFORM_FILTER) += vidstabutils.o vf_vidstabtransform.o OBJS-$(CONFIG_VIGNETTE_FILTER) += vf_vignette.o +OBJS-$(CONFIG_VMAFMOTION_FILTER) += vf_vmafmotion.o dualinput.o framesync.o OBJS-$(CONFIG_VSTACK_FILTER) += vf_stack.o framesync.o OBJS-$(CONFIG_W3FDIF_FILTER) += vf_w3fdif.o OBJS-$(CONFIG_WAVEFORM_FILTER) += vf_waveform.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index b1c2d11..644ab44 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -333,6 +333,7 @@ static void register_all(void) REGISTER_FILTER(VIDSTABDETECT, vidstabdetect, vf); REGISTER_FILTER(VIDSTABTRANSFORM, vidstabtransform, vf); REGISTER_FILTER(VIGNETTE, vignette, vf); + REGISTER_FILTER(VMAFMOTION, vmafmotion, vf); REGISTER_FILTER(VSTACK, vstack, vf); REGISTER_FILTER(W3FDIF, w3fdif, vf); REGISTER_FILTER(WAVEFORM, waveform, vf); diff --git a/libavfilter/vf_vmafmotion.c b/libavfilter/vf_vmafmotion.c new file mode 100644 index 0000000..8456939 --- /dev/null +++ b/libavfilter/vf_vmafmotion.c @@ -0,0 +1,404 @@ +/* + * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com> + * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Calculate VMAF Motion score between two input videos. + */ + +#include "libavutil/avstring.h" +#include "libavutil/opt.h" +#include "libavutil/pixdesc.h" +#include "avfilter.h" +#include "dualinput.h" +#include "drawutils.h" +#include "formats.h" +#include "internal.h" +#include "vmaf_motion.h" +#include "video.h" + +typedef struct VMAFMotionContext { + const AVClass *class; + FFDualInputContext dinput; + const AVPixFmtDescriptor *desc; + int filter[5]; + int width; + int height; + uint16_t *prev_blur_data; + uint16_t *blur_data; + uint16_t *temp_data; + double motion_sum; + uint64_t nb_frames; +} VMAFMotionContext; + +#define MAX_ALIGN 32 +#define ALIGN_CEIL(x) ((x) + ((x) % MAX_ALIGN ? MAX_ALIGN - (x) % MAX_ALIGN : 0)) + +static const AVOption vmafmotion_options[] = { + { NULL } +}; + +AVFILTER_DEFINE_CLASS(vmafmotion); + +static double image_sad(const uint16_t *img1, const uint16_t *img2, int w, int h, + ptrdiff_t img1_stride, ptrdiff_t img2_stride) +{ + uint64_t sum = 0; + int i, j; + + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + sum += abs(img1[i * img1_stride + j] - img2[i * img2_stride + j]); + } + } + + return (double) (sum * 1.0 / (w * h)); +} + +static inline int floorn(int n, int m) +{ + return n - n % m; +} + +static inline int ceiln(int n, int m) +{ + return n % m ? n + (m - n % m) : n; +} + +static void convolution_x(const int *filter, int filt_w, const uint16_t *src, + uint16_t *dst, int w, int h, ptrdiff_t src_stride, + ptrdiff_t dst_stride) +{ + int radius = filt_w / 2; + int borders_left = ceiln(radius, 1); + int borders_right = floorn(w - (filt_w - radius), 1); + int i, j, k; + int sum = 0; + + for (i = 0; i < h; i++) { + for (j = 0; j < borders_left; j++) { + sum = 0; + for (k = 0; k < filt_w; k++) { + int j_tap = FFABS(j - radius + k); + if (j_tap >= w) { + j_tap = w - (j_tap - w + 1); + } + sum += filter[k] * src[i * src_stride + j_tap]; + } + dst[i * dst_stride + j] = sum >> N; + } + + for (j = borders_left; j < borders_right; j++) { + int sum = 0; + for (k = 0; k < filt_w; k++) { + sum += filter[k] * src[i * src_stride + j - radius + k]; + } + dst[i * dst_stride + j] = sum >> N; + } + + for (j = borders_right; j < w; j++) { + sum = 0; + for (k = 0; k < filt_w; k++) { + int j_tap = FFABS(j - radius + k); + if (j_tap >= w) { + j_tap = w - (j_tap - w + 1); + } + sum += filter[k] * src[i * src_stride + j_tap]; + } + dst[i * dst_stride + j] = sum >> N; + } + } +} + +#define conv_y_fn(type, bits) \ + static void convolution_y_##bits##bit(const int *filter, int filt_w, \ + const type *src, uint16_t *dst, \ + int w, int h, ptrdiff_t src_stride, \ + ptrdiff_t dst_stride) \ +{ \ + int radius = filt_w / 2; \ + int borders_top = ceiln(radius, 1); \ + int borders_bottom = floorn(h - (filt_w - radius), 1); \ + int i, j, k; \ + int sum = 0; \ + \ + for (i = 0; i < borders_top; i++) { \ + for (j = 0; j < w; j++) { \ + sum = 0; \ + for (k = 0; k < filt_w; k++) { \ + int i_tap = FFABS(i - radius + k); \ + if (i_tap >= h) { \ + i_tap = h - (i_tap - h + 1); \ + } \ + sum += filter[k] * src[i_tap * src_stride + j]; \ + } \ + dst[i* dst_stride + j] = sum >> N; \ + } \ + } \ + for (i = borders_top; i < borders_bottom; i++) { \ + for (j = 0; j < w; j++) { \ + sum = 0; \ + for (k = 0; k < filt_w; k++) { \ + sum += filter[k] * src[(i - radius + k) * src_stride + j]; \ + } \ + dst[i* dst_stride + j] = sum >> N; \ + } \ + } \ + for (i = borders_bottom; i < h; i++) { \ + for (j = 0; j < w; j++) { \ + sum = 0; \ + for (k = 0; k < filt_w; k++) { \ + int i_tap = FFABS(i - radius + k); \ + if (i_tap >= h) { \ + i_tap = h - (i_tap - h + 1); \ + } \ + sum += filter[k] * src[i_tap * src_stride + j]; \ + } \ + dst[i* dst_stride + j] = sum >> N; \ + } \ + } \ +} + +conv_y_fn(uint8_t, 8); +conv_y_fn(uint16_t, 10); + +void convolution_f32(const int *filter, int filt_w, const void *src, + uint16_t *dst, uint16_t *tmp, int w, int h, + ptrdiff_t src_stride, ptrdiff_t dst_stride, uint8_t type) +{ + if(type == 8) { + convolution_y_8bit(filter, filt_w, (const uint8_t *) src, tmp, w, h, + src_stride, dst_stride); + } else { + convolution_y_10bit(filter, filt_w, (const uint16_t *) src, tmp, w, h, + src_stride, dst_stride); + } + + convolution_x(filter, filt_w, tmp, dst, w, h, dst_stride, dst_stride); +} + +int compute_vmafmotion(const uint16_t *ref, const uint16_t *main, int w, int h, + ptrdiff_t ref_stride, ptrdiff_t main_stride, double *score) +{ + *score = image_sad(ref, main, w, h, ref_stride / sizeof(uint16_t), + main_stride / sizeof(uint16_t)); + + return 0; +} + +static void set_meta(AVDictionary **metadata, const char *key, float d) +{ + char value[128]; + snprintf(value, sizeof(value), "%0.2f", d); + av_dict_set(metadata, key, value, 0); +} + +static AVFrame *do_vmafmotion(AVFilterContext *ctx, AVFrame *main, const AVFrame *ref) +{ + VMAFMotionContext *s = ctx->priv; + AVDictionary **metadata = &main->metadata; + ptrdiff_t ref_stride; + ptrdiff_t ref_px_stride; + ptrdiff_t stride; + ptrdiff_t px_stride; + size_t data_sz; + double score; + + ref_stride = ref->linesize[0]; + stride = ALIGN_CEIL(s->width * sizeof(uint16_t)); + data_sz = (size_t)stride * s->height; + px_stride = stride / sizeof(uint16_t); + + if (s->desc->comp[0].depth <= 8) { + ref_px_stride = ref_stride / sizeof(uint8_t); + convolution_f32(s->filter, 5, (const uint8_t *) ref->data[0], + s->blur_data, s->temp_data, s->width, s->height, + ref_px_stride, px_stride, 8); + } else { + ref_px_stride = ref_stride / sizeof(uint16_t); + convolution_f32(s->filter, 5, (const uint16_t *) ref->data[0], + s->blur_data, s->temp_data, s->width, s->height, + ref_px_stride, px_stride, 10); + } + + if(!s->nb_frames) { + score = 0.0; + } else { + compute_vmafmotion(s->prev_blur_data, s->blur_data, s->width, s->height, + stride, stride, &score); + } + + memcpy(s->prev_blur_data, s->blur_data, data_sz); + + set_meta(metadata, "lavfi.vmafmotion.score", score); + + s->nb_frames++; + + s->motion_sum += score; + + return main; +} + +static av_cold int init(AVFilterContext *ctx) +{ + VMAFMotionContext *s = ctx->priv; + + int i; + for(i = 0; i < 5; i++) { + s->filter[i] = lrint(FILTER_5[i] * (1 << N)); + } + + s->dinput.process = do_vmafmotion; + + return 0; +} + +static int query_formats(AVFilterContext *ctx) +{ + static const enum AVPixelFormat pix_fmts[] = { + AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P, + AV_PIX_FMT_YUV444P10LE, AV_PIX_FMT_YUV422P10LE, AV_PIX_FMT_YUV420P10LE, + AV_PIX_FMT_NONE + }; + + AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts); + if (!fmts_list) + return AVERROR(ENOMEM); + return ff_set_common_formats(ctx, fmts_list); +} + +static int config_input_ref(AVFilterLink *inlink) +{ + AVFilterContext *ctx = inlink->dst; + VMAFMotionContext *s = ctx->priv; + ptrdiff_t stride; + size_t data_sz; + + if (ctx->inputs[0]->w != ctx->inputs[1]->w || + ctx->inputs[0]->h != ctx->inputs[1]->h) { + av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n"); + return AVERROR(EINVAL); + } + if (ctx->inputs[0]->format != ctx->inputs[1]->format) { + av_log(ctx, AV_LOG_ERROR, "Inputs must be of same pixel format.\n"); + return AVERROR(EINVAL); + } + + s->desc = av_pix_fmt_desc_get(inlink->format); + s->width = ctx->inputs[0]->w; + s->height = ctx->inputs[0]->h; + + stride = ALIGN_CEIL(s->width * sizeof(uint16_t)); + data_sz = (size_t)stride * s->height; + + if (!(s->prev_blur_data = av_malloc(data_sz))) { + return AVERROR(ENOMEM); + } + if (!(s->blur_data = av_malloc(data_sz))) { + return AVERROR(ENOMEM); + } + if (!(s->temp_data = av_malloc(data_sz))) { + return AVERROR(ENOMEM); + } + + return 0; +} + +static int config_output(AVFilterLink *outlink) +{ + AVFilterContext *ctx = outlink->src; + VMAFMotionContext *s = ctx->priv; + AVFilterLink *mainlink = ctx->inputs[0]; + int ret; + + outlink->w = mainlink->w; + outlink->h = mainlink->h; + outlink->time_base = mainlink->time_base; + outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio; + outlink->frame_rate = mainlink->frame_rate; + if ((ret = ff_dualinput_init(ctx, &s->dinput)) < 0) + return ret; + + return 0; +} + +static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref) +{ + VMAFMotionContext *s = inlink->dst->priv; + return ff_dualinput_filter_frame(&s->dinput, inlink, inpicref); +} + +static int request_frame(AVFilterLink *outlink) +{ + VMAFMotionContext *s = outlink->src->priv; + return ff_dualinput_request_frame(&s->dinput, outlink); +} + +static av_cold void uninit(AVFilterContext *ctx) +{ + VMAFMotionContext *s = ctx->priv; + + if (s->nb_frames > 0) { + av_log(ctx, AV_LOG_INFO, "VMAF Motion avg: %.3f\n", s->motion_sum / s->nb_frames); + } + + av_free(s->prev_blur_data); + av_free(s->blur_data); + av_free(s->temp_data); + + ff_dualinput_uninit(&s->dinput); +} + +static const AVFilterPad vmafmotion_inputs[] = { + { + .name = "main", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = filter_frame, + },{ + .name = "reference", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = filter_frame, + .config_props = config_input_ref, + }, + { NULL } +}; + +static const AVFilterPad vmafmotion_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = config_output, + .request_frame = request_frame, + }, + { NULL } +}; + +AVFilter ff_vf_vmafmotion = { + .name = "vmafmotion", + .description = NULL_IF_CONFIG_SMALL("Calculate the VMAF Motion between two video streams."), + .init = init, + .uninit = uninit, + .query_formats = query_formats, + .priv_size = sizeof(VMAFMotionContext), + .priv_class = &vmafmotion_class, + .inputs = vmafmotion_inputs, + .outputs = vmafmotion_outputs, +}; diff --git a/libavfilter/vmaf_motion.h b/libavfilter/vmaf_motion.h new file mode 100644 index 0000000..eb41636 --- /dev/null +++ b/libavfilter/vmaf_motion.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com> + * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef MOTION_TOOLS_H_ +#define MOTION_TOOLS_H_ + +#define N 15 + +static const float FILTER_5[5] = { + 0.054488685, + 0.244201342, + 0.402619947, + 0.244201342, + 0.054488685 +}; + +void convolution_f32(const int *filter, int filt_width, const void *src, + uint16_t *dst, uint16_t *tmp, int w, int h, + ptrdiff_t src_stride, ptrdiff_t dst_stride, uint8_t type); + +int compute_vmafmotion(const uint16_t *ref, const uint16_t *main, int w, int h, + ptrdiff_t ref_stride, ptrdiff_t main_stride, double *score); + +#endif /* MOTION_TOOLS_H_ */

[FFmpeg-devel] avfilter: add vmafmotion filter

Commit Message

Comments

Patch