diff mbox series

[FFmpeg-devel] avfilter: Added siti filter

Message ID MN2PR15MB2605F2669A411F794B6C2B40B5A70@MN2PR15MB2605.namprd15.prod.outlook.com
State New
Headers show
Series [FFmpeg-devel] avfilter: Added siti filter
Related show

Checks

Context Check Description
andriy/x86_make_warn warning New warnings during build
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Boris Baracaldo Jan. 15, 2021, 5:06 a.m. UTC
Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
in ITU-T P.910: Subjective video quality assessment methods for multimedia
applications.
---
 Changelog                |   1 +
 doc/filters.texi         |  25 ++++
 libavfilter/Makefile     |   1 +
 libavfilter/allfilters.c |   1 +
 libavfilter/version.h    |   2 +-
 libavfilter/vf_siti.c    | 359 +++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 388 insertions(+), 1 deletion(-)
 create mode 100644 libavfilter/vf_siti.c

--
2.13.5

Comments

Lynne Jan. 15, 2021, 5:31 a.m. UTC | #1
Jan 15, 2021, 06:06 by borbarak@fb.com:

>
> Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
> in ITU-T P.910: Subjective video quality assessment methods for multimedia
> applications.
> ---
>  Changelog                |   1 +
>  doc/filters.texi         |  25 ++++
>  libavfilter/Makefile     |   1 +
>  libavfilter/allfilters.c |   1 +
>  libavfilter/version.h    |   2 +-
>  libavfilter/vf_siti.c    | 359 +++++++++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 388 insertions(+), 1 deletion(-)
>  create mode 100644 libavfilter/vf_siti.c
>
> +// Determine whether the video is in full or limited range. If not defined, assume limited.
> +static int is_full_range(AVFrame* frame)
> +{
> +    if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
> +    {
> +        // If color range not specified, fallback to pixel format
> +        return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
> +    }
> +    return frame->color_range == AVCOL_RANGE_JPEG;
> +}
> +
> +// Check frame's color range and convert to full range if needed
> +static uint16_t convert_full_range(uint16_t y, SiTiContext *s)
> +{
> +    if (s->full_range == 1)
> +    {
> +        return y;
> +    }
> +
> +    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
> +    double factor = s->pixel_depth == 1? 1 : 4;
> +    double shift = 16 * factor;
> +    double limit_upper = 235 * factor - shift;
> +    double full_upper = 256 * factor - 1;
> +    double limit_y = fmin(fmax(y - shift, 0), limit_upper);
> +    return (uint16_t) (full_upper * limit_y / limit_upper);
> +}
> +
> +// Applies sobel convolution
> +static void convolve_sobel(const unsigned char* src, double* dst, int linesize, SiTiContext *s)
> +{
> +    int filter_width = 3;
> +    int filter_size = filter_width * filter_width;
> +    for (int j=1; j<s->height-1; j++)
> +    {
> +        for (int i=1; i<s->width-1; i++)
> +        {
> +            double x_conv_sum = 0, y_conv_sum = 0;
> +            for (int k=0; k<filter_size; k++)
> +            {
> +                int ki = k % filter_width - 1;
> +                int kj = floor(k / filter_width) - 1;
> +                int index = (j + kj) * (linesize / s->pixel_depth) + (i + ki);
> +                uint16_t data = convert_full_range(get_frame_data(src, s->pixel_depth, index), s);
> +                x_conv_sum += data * X_FILTER[k];
> +                y_conv_sum += data * Y_FILTER[k];
> +            }
> +            double gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum);
> +            // Dst matrix is smaller than src since we ignore edges that can't be convolved
> +            dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient;
> +        }
> +    }
> +}
> +
> +// Calculate pixel difference between current and previous frame, and update previous
> +static void calculate_motion(const unsigned char* curr, double* motion_matrix,
> +                             int linesize, SiTiContext *s)
> +{
> +    for (int j=0; j<s->height; j++)
> +    {
> +        for (int i=0; i<s->width; i++)
> +        {
> +            double motion = 0;
> +            int curr_index = j * (linesize / s->pixel_depth) + i;
> +            int prev_index = j * s->width + i;
> +            uint16_t curr_data = convert_full_range(get_frame_data(curr, s->pixel_depth, curr_index), s);
> +
> +            if (s->nb_frames > 1)
> +            {
> +                // Previous frame is already converted to full range
> +                motion = curr_data - get_frame_data(s->prev_frame, s->pixel_depth, prev_index);
> +            }
> +            set_frame_data(s->prev_frame, s->pixel_depth, prev_index, curr_data);
> +            motion_matrix[j * s->width + i] = motion;
> +        }
> +    }
> +}
> +
> +static double std_deviation(double* img_metrics, int width, int height)
> +{
> +    double size = height * width;
> +
> +    double mean_sum = 0;
> +    for (int j=0; j<height; j++)
> +    {
> +        for (int i=0; i<width; i++)
> +        {
> +            mean_sum += img_metrics[j * width + i];
> +        }
> +    }
> +    double mean = mean_sum / size;
> +
> +    double sqr_diff_sum = 0;
> +    for (int j=0; j<height; j++)
> +    {
> +        for (int i=0; i<width; i++)
> +        {
> +            double mean_diff = img_metrics[j * width + i] - mean;
> +            sqr_diff_sum += (mean_diff * mean_diff);
> +        }
> +    }
>

The coding style mismatches the project's style.
We don't put opening brackets on a new line and in
case of single-line blocks we leave the brackets off entirely.


> +
> +#define OFFSET(x) offsetof(SiTiContext, x)
> +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
> +
> +static const AVOption siti_options[] = {
> +    {"stats_file", "Set file where to store per-frame si-ti scores", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
> +    { NULL }
> +};
>

Make it output the data to the frame metadata instead. That's how
we usually deal with data like this.
The 'metadata' filter can then be used to save the metadata to a file
or alter it.

Just an initial review.
Boris Baracaldo Jan. 19, 2021, 12:07 a.m. UTC | #2
Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
in ITU-T P.910: Subjective video quality assessment methods for multimedia
applications.

Update: Fixed bracket style. I'm already adding the data to the frame's metadata, is the suggestion to remove the file option altogether?

---
 Changelog                |   1 +
 doc/filters.texi         |  25 ++++
 libavfilter/Makefile     |   1 +
 libavfilter/allfilters.c |   1 +
 libavfilter/version.h    |   2 +-
 libavfilter/vf_siti.c    | 321 +++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 350 insertions(+), 1 deletion(-)
 create mode 100644 libavfilter/vf_siti.c

diff --git a/Changelog b/Changelog
index 0b27c15122..5e1f107204 100644
--- a/Changelog
+++ b/Changelog
@@ -56,6 +56,7 @@ version <next>:
 - shufflepixels filter
 - tmidequalizer filter
 - estdif filter
+- siti filter


 version 4.3:
diff --git a/doc/filters.texi b/doc/filters.texi
index 3ce6699d7c..910558e162 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -18239,6 +18239,31 @@ ffmpeg -i input1.mkv -i input2.mkv -filter_complex "[0:v][1:v] signature=nb_inpu

 @end itemize

+@anchor{siti}
+@section siti
+
+Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
+in ITU-T P.910: Subjective video quality assessment methods for multimedia
+applications. Available PDF at @url{https://www.itu.int/rec/T-REC-P.910-199909-S/en }.
+Per frame metrics can be written into a file in csv format.
+
+It accepts the following option:
+
+@table @option
+@item stats_file
+Set the path to the file where per frame SI and TI metrics will be written. If no file
+is specified, only summary statistics will be printed to the console.
+@end table
+
+@subsection Examples
+@itemize
+@item
+To calculate SI/TI metrics and store per frame data to stats.csv:
+@example
+ffmpeg -i input.mp4 -vf siti=stats_file='siti.csv' -f null -
+@end example
+@end itemize
+
 @anchor{smartblur}
 @section smartblur

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 44afa79963..7f96c22b12 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -414,6 +414,7 @@ OBJS-$(CONFIG_SMARTBLUR_FILTER)              += vf_smartblur.o
 OBJS-$(CONFIG_SOBEL_FILTER)                  += vf_convolution.o
 OBJS-$(CONFIG_SOBEL_OPENCL_FILTER)           += vf_convolution_opencl.o opencl.o \
                                                 opencl/convolution.o
+OBJS-$(CONFIG_SITI_FILTER)                   += vf_siti.o
 OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o
 OBJS-$(CONFIG_SPP_FILTER)                    += vf_spp.o qp_table.o
 OBJS-$(CONFIG_SR_FILTER)                     += vf_sr.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 471844a603..0138c22cac 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -394,6 +394,7 @@ extern AVFilter ff_vf_signature;
 extern AVFilter ff_vf_smartblur;
 extern AVFilter ff_vf_sobel;
 extern AVFilter ff_vf_sobel_opencl;
+extern AVFilter ff_vf_siti;
 extern AVFilter ff_vf_split;
 extern AVFilter ff_vf_spp;
 extern AVFilter ff_vf_sr;
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 2136235e54..e949e9bfb8 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,7 +30,7 @@
 #include "libavutil/version.h"

 #define LIBAVFILTER_VERSION_MAJOR   7
-#define LIBAVFILTER_VERSION_MINOR  96
+#define LIBAVFILTER_VERSION_MINOR  97
 #define LIBAVFILTER_VERSION_MICRO 100


diff --git a/libavfilter/vf_siti.c b/libavfilter/vf_siti.c
new file mode 100644
index 0000000000..de2868fd93
--- /dev/null
+++ b/libavfilter/vf_siti.c
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2002 A'rpi
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/**
+ * @file
+ * Calculate Spatial Info (SI) and Temporal Info (TI) scores
+ */
+
+#include <math.h>
+
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+static const int X_FILTER[9] = {
+    1, 0, -1,
+    2, 0, -2,
+    1, 0, -1
+};
+
+static const int Y_FILTER[9] = {
+    1, 2, 1,
+    0, 0, 0,
+    -1, -2, -1
+};
+
+typedef struct SiTiContext {
+    const AVClass *class;
+    int pixel_depth;
+    int width, height;
+    int nb_frames;
+    unsigned char *prev_frame;
+    double max_si;
+    double max_ti;
+    double min_si;
+    double min_ti;
+    double sum_si;
+    double sum_ti;
+    FILE *stats_file;
+    char *stats_file_str;
+    int full_range;
+} SiTiContext;
+
+static int query_formats(AVFilterContext *ctx) {
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10,
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static av_cold int init(AVFilterContext *ctx) {
+    // User options but no input data
+    SiTiContext *s = ctx->priv;
+    s->max_si = 0;
+    if (s->stats_file_str) {
+        s->stats_file = fopen(s->stats_file_str, "w");
+        if (!s->stats_file) {
+            int err = AVERROR(errno);
+            char buf[128];
+            av_strerror(err, buf, sizeof(buf));
+            av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
+                   s->stats_file_str, buf);
+            return err;
+        }
+        fprintf(s->stats_file, "Frame,SI,TI\n");
+    }
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx) {
+    SiTiContext *s = ctx->priv;
+
+    double avg_si = s->sum_si / s->nb_frames;
+    double avg_ti = s->sum_ti / s->nb_frames;
+    av_log(ctx, AV_LOG_INFO,
+           "Summary:\nTotal frames: %d\n\n"
+           "Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n"
+           "Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n",
+           s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti
+    );
+
+    if (s->stats_file && s->stats_file != stdout)
+        fclose(s->stats_file);
+}
+
+static int config_input(AVFilterLink *inlink) {
+    // Video input data avilable
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    int max_pixsteps[4];
+
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
+
+    s->pixel_depth = max_pixsteps[0];
+    s->width = inlink->w;
+    s->height = inlink->h;
+    size_t pixel_sz = s->pixel_depth==1? (size_t) sizeof(uint8_t) : (size_t) sizeof(uint16_t);
+    size_t data_sz = (size_t) s->width * pixel_sz * s->height;
+    s->prev_frame = av_malloc(data_sz);
+
+    return 0;
+}
+
+// Get frame data handling 8 and 10 bit formats
+static uint16_t get_frame_data(const unsigned char* src, int pixel_depth, int index) {
+    const uint16_t *src16 = (const uint16_t *)src;
+    if (pixel_depth == 2)
+        return src16[index];
+    return (uint16_t) src[index];
+}
+
+// Set frame data handling 8 and 10 bit formats
+static void set_frame_data(unsigned char* dst, int pixel_depth, int index, uint16_t data) {
+    uint16_t *dst16 = (uint16_t *)dst;
+    if (pixel_depth == 2)
+        dst16[index] = data;
+    else
+        dst[index] = (uint8_t) data;
+}
+
+// Determine whether the video is in full or limited range. If not defined, assume limited.
+static int is_full_range(AVFrame* frame) {
+    // If color range not specified, fallback to pixel format
+    if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
+        return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
+    return frame->color_range == AVCOL_RANGE_JPEG;
+}
+
+// Check frame's color range and convert to full range if needed
+static uint16_t convert_full_range(uint16_t y, SiTiContext *s) {
+    if (s->full_range == 1)
+        return y;
+
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    double factor = s->pixel_depth == 1? 1 : 4;
+    double shift = 16 * factor;
+    double limit_upper = 235 * factor - shift;
+    double full_upper = 256 * factor - 1;
+    double limit_y = fmin(fmax(y - shift, 0), limit_upper);
+    return (uint16_t) (full_upper * limit_y / limit_upper);
+}
+
+// Applies sobel convolution
+static void convolve_sobel(const unsigned char* src, double* dst, int linesize, SiTiContext *s) {
+    int filter_width = 3;
+    int filter_size = filter_width * filter_width;
+    for (int j=1; j<s->height-1; j++) {
+        for (int i=1; i<s->width-1; i++) {
+            double x_conv_sum = 0, y_conv_sum = 0;
+            for (int k=0; k<filter_size; k++) {
+                int ki = k % filter_width - 1;
+                int kj = floor(k / filter_width) - 1;
+                int index = (j + kj) * (linesize / s->pixel_depth) + (i + ki);
+                uint16_t data = convert_full_range(get_frame_data(src, s->pixel_depth, index), s);
+                x_conv_sum += data * X_FILTER[k];
+                y_conv_sum += data * Y_FILTER[k];
+            }
+            double gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum);
+            // Dst matrix is smaller than src since we ignore edges that can't be convolved
+            dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient;
+        }
+    }
+}
+
+// Calculate pixel difference between current and previous frame, and update previous
+static void calculate_motion(const unsigned char* curr, double* motion_matrix,
+                             int linesize, SiTiContext *s) {
+    for (int j=0; j<s->height; j++) {
+        for (int i=0; i<s->width; i++) {
+            double motion = 0;
+            int curr_index = j * (linesize / s->pixel_depth) + i;
+            int prev_index = j * s->width + i;
+            uint16_t curr_data = convert_full_range(get_frame_data(curr, s->pixel_depth, curr_index), s);
+
+            // Previous frame is already converted to full range
+            if (s->nb_frames > 1)
+                motion = curr_data - get_frame_data(s->prev_frame, s->pixel_depth, prev_index);
+            set_frame_data(s->prev_frame, s->pixel_depth, prev_index, curr_data);
+            motion_matrix[j * s->width + i] = motion;
+        }
+    }
+}
+
+static double std_deviation(double* img_metrics, int width, int height) {
+    double size = height * width;
+    double mean_sum = 0;
+    for (int j=0; j<height; j++)
+        for (int i=0; i<width; i++)
+            mean_sum += img_metrics[j * width + i];
+
+    double mean = mean_sum / size;
+
+    double sqr_diff_sum = 0;
+    for (int j=0; j<height; j++) {
+        for (int i=0; i<width; i++) {
+            double mean_diff = img_metrics[j * width + i] - mean;
+            sqr_diff_sum += (mean_diff * mean_diff);
+        }
+    }
+    double variance = sqr_diff_sum / size;
+    return sqrt(variance);
+}
+
+static void set_meta(AVDictionary **metadata, const char *key, float d) {
+    char value[128];
+    snprintf(value, sizeof(value), "%0.2f", d);
+    av_dict_set(metadata, key, value, 0);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame) {
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+
+    // Gradient matrix will not include the input frame's edges
+    size_t gradient_data_sz = (size_t) (s->width - 2) * sizeof(double) * (s->height - 2);
+    double *gradient_matrix = av_malloc(gradient_data_sz);
+    size_t motion_data_sz = (size_t) s->width * sizeof(double) * s->height;
+    double *motion_matrix = av_malloc(motion_data_sz);
+    if (!gradient_matrix || !motion_matrix) {
+        av_frame_free(&frame);
+        return AVERROR(ENOMEM);
+    }
+
+    s->full_range = is_full_range(frame);
+    s->nb_frames++;
+
+    // Calculate si and ti
+    convolve_sobel(frame->data[0], gradient_matrix, frame->linesize[0], s);
+    calculate_motion(frame->data[0], motion_matrix, frame->linesize[0], s);
+    double si = std_deviation(gradient_matrix, s->width - 2, s->height - 2);
+    double ti = std_deviation(motion_matrix, s->width, s->height);
+
+    // Calculate statistics
+    s->max_si = fmax(si, s->max_si);
+    s->max_ti = fmax(ti, s->max_ti);
+    s->sum_si += si;
+    s->sum_ti += ti;
+    s->min_si = s->nb_frames == 1? si : fmin(si, s->min_si);
+    s->min_ti = s->nb_frames == 1? ti : fmin(ti, s->min_ti);
+
+    // Set si ti information in frame metadata
+    set_meta(&frame->metadata, "lavfi.siti.si", si);
+    set_meta(&frame->metadata, "lavfi.siti.ti", ti);
+
+    // Print per frame csv data to file
+    if (s->stats_file)
+        fprintf(s->stats_file, "%d,%f,%f\n", s->nb_frames, si, ti);
+
+    av_free(gradient_matrix);
+    return ff_filter_frame(inlink->dst->outputs[0], frame);
+}
+
+#define OFFSET(x) offsetof(SiTiContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption siti_options[] = {
+    {"stats_file", "Set file where to store per-frame si-ti scores", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(siti);
+
+static const AVFilterPad avfilter_vf_siti_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avfilter_vf_siti_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_siti = {
+    .name          = "siti",
+    .description   = NULL_IF_CONFIG_SMALL("Calculate spatial info (SI)."),
+    .priv_size     = sizeof(SiTiContext),
+    .priv_class    = &siti_class,
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = avfilter_vf_siti_inputs,
+    .outputs       = avfilter_vf_siti_outputs,
+};
--
2.13.5
Lynne Jan. 19, 2021, 4:49 a.m. UTC | #3
Jan 19, 2021, 01:07 by borbarak@fb.com:

> Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
> in ITU-T P.910: Subjective video quality assessment methods for multimedia
> applications.
>
> Update: Fixed bracket style.
>

Thanks, looks much neater now.



> I'm already adding the data to the frame's metadata, is the suggestion to remove the file option altogether?
>

Yes. We want to avoid filters having their own file in/out options rather
than using generic ones.

 

> +
> +#include "libavutil/imgutils.h"
> +#include "libavutil/internal.h"
> +#include "libavutil/opt.h"
> +
> +#include "avfilter.h"
> +#include "formats.h"
> +#include "internal.h"
> +#include "video.h"
> +
> +static const int X_FILTER[9] = {
> +    1, 0, -1,
> +    2, 0, -2,
> +    1, 0, -1
> +};
> +
> +static const int Y_FILTER[9] = {
> +    1, 2, 1,
> +    0, 0, 0,
> +    -1, -2, -1
> +};
>

We have optimized assembly to apply 3x3 matrices. Check out
libavfilter/x86/vf_convolution.asm:ff_filter_3x3_sse4
 vf_convolution already applies a sobel filter that way. Maybe
look into sharing some DSP code with it?
Werner Robitza Jan. 22, 2021, 11:02 a.m. UTC | #4
On Tue, Jan 19, 2021 at 5:49 AM Lynne <dev@lynne.ee> wrote:
> > I'm already adding the data to the frame's metadata, is the suggestion to remove the file option altogether?
> >
>
> Yes. We want to avoid filters having their own file in/out options rather
> than using generic ones.

As an end user I would find an output file with a known format much
easier to work with.
This works very well for the libvmaf filter, for example.
Could you please explain how to achieve the same kind of output with
the metadata injection?

Werner
Paul B Mahol Jan. 22, 2021, 11:28 a.m. UTC | #5
On Fri, Jan 22, 2021 at 12:03 PM Werner Robitza <werner.robitza@gmail.com>
wrote:

> On Tue, Jan 19, 2021 at 5:49 AM Lynne <dev@lynne.ee> wrote:
> > > I'm already adding the data to the frame's metadata, is the suggestion
> to remove the file option altogether?
> > >
> >
> > Yes. We want to avoid filters having their own file in/out options rather
> > than using generic ones.
>
> As an end user I would find an output file with a known format much
> easier to work with.
> This works very well for the libvmaf filter, for example.
> Could you please explain how to achieve the same kind of output with
> the metadata injection?
>

-vf your_filter,metadata=mode=print


>
> Werner
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Werner Robitza Jan. 22, 2021, 7:40 p.m. UTC | #6
On Fri, Jan 22, 2021 at 12:28 PM Paul B Mahol <onemda@gmail.com> wrote:
> > As an end user I would find an output file with a known format much
> > easier to work with.
> > This works very well for the libvmaf filter, for example.
> > Could you please explain how to achieve the same kind of output with
> > the metadata injection?
> >
>
> -vf your_filter,metadata=mode=print

Thanks, I know this, but this is not a known format that can be easily
parsed like a plain CSV file. At least the way the SITI filter is
used, regular users just want simple output in a file that they can
directly load into Excel, Python, whatever. (Just like for VMAF.)

I understand why you wouldn't want each filter to have its own IO
functionality, but in terms of usability, only having the metadata
option would be extremely bad.

(Perhaps the real solution would be to have the metadata filter
generate different kinds of known output formats.)

Werner
Paul B Mahol Jan. 22, 2021, 8:24 p.m. UTC | #7
On Fri, Jan 22, 2021 at 9:05 PM Werner Robitza <werner.robitza@gmail.com>
wrote:

> On Fri, Jan 22, 2021 at 12:28 PM Paul B Mahol <onemda@gmail.com> wrote:
> > > As an end user I would find an output file with a known format much
> > > easier to work with.
> > > This works very well for the libvmaf filter, for example.
> > > Could you please explain how to achieve the same kind of output with
> > > the metadata injection?
> > >
> >
> > -vf your_filter,metadata=mode=print
>
> Thanks, I know this, but this is not a known format that can be easily
> parsed like a plain CSV file. At least the way the SITI filter is
> used, regular users just want simple output in a file that they can
> directly load into Excel, Python, whatever. (Just like for VMAF.)
>
> I understand why you wouldn't want each filter to have its own IO
> functionality, but in terms of usability, only having the metadata
> option would be extremely bad.
>

It is not bad, feature is already present.


>
> (Perhaps the real solution would be to have the metadata filter
> generate different kinds of known output formats.)
>

You can use ffprobe for that. It support CSV output format and it does not
ignore frame metadata if you set it.
There is no reason for filter to have own IO output file/path, such
functionality should be deprecated and removed.



>
> Werner
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Nicolas George Feb. 1, 2021, 3:57 p.m. UTC | #8
Werner Robitza (12021-01-22):
> Thanks, I know this, but this is not a known format that can be easily
> parsed like a plain CSV file.

What you need to do is to extend the metadata filter so that it lets
output to a file and choose the format.

Regards,
Werner Robitza Feb. 19, 2021, 8:02 p.m. UTC | #9
On Mon, Feb 1, 2021 at 4:57 PM Nicolas George <george@nsup.org> wrote:
>
> Werner Robitza (12021-01-22):
> > Thanks, I know this, but this is not a known format that can be easily
> > parsed like a plain CSV file.
>
> What you need to do is to extend the metadata filter so that it lets
> output to a file and choose the format.

I suppose that would be the best option, yes. However, metadata comes
in all kinds of shapes, so it's not easy to map arbitrary metadata
(that any filter can generate) to a meaningful output format, in
particular when the result should be CSV.

In particular, such output should be tidy [1]. For instance, you don't
want to output "frame, key, value" with lines "1, si, 53.999", but
rather "frame, si, ti". Such transformations to useful/parsable output
must be done in the filter itself, not vf_metadata …

[1]: https://cran.r-project.org/web/packages/tidyr/vignettes/tidy-data.html
Werner Robitza Feb. 19, 2021, 8:05 p.m. UTC | #10
On Fri, Feb 19, 2021 at 9:02 PM Werner Robitza <werner.robitza@gmail.com> wrote:
> In particular, such output should be tidy [1]. For instance, you don't
> want to output "frame, key, value" with lines "1, si, 53.999", but
> rather "frame, si, ti". Such transformations to useful/parsable output
> must be done in the filter itself, not vf_metadata …

Well, looking at it again, one could transform per-frame values,
turning each key into a column, so it could work.
Paul B Mahol Feb. 19, 2021, 8:28 p.m. UTC | #11
On Fri, Feb 19, 2021 at 9:03 PM Werner Robitza <werner.robitza@gmail.com>
wrote:

> On Mon, Feb 1, 2021 at 4:57 PM Nicolas George <george@nsup.org> wrote:
> >
> > Werner Robitza (12021-01-22):
> > > Thanks, I know this, but this is not a known format that can be easily
> > > parsed like a plain CSV file.
> >
> > What you need to do is to extend the metadata filter so that it lets
> > output to a file and choose the format.
>
> I suppose that would be the best option, yes. However, metadata comes
> in all kinds of shapes, so it's not easy to map arbitrary metadata
> (that any filter can generate) to a meaningful output format, in
> particular when the result should be CSV.
>
> In particular, such output should be tidy [1]. For instance, you don't
> want to output "frame, key, value" with lines "1, si, 53.999", but
> rather "frame, si, ti". Such transformations to useful/parsable output
> must be done in the filter itself, not vf_metadata …
>

Huh, that does not make any sense.


>
> [1]:
> https://cran.r-project.org/web/packages/tidyr/vignettes/tidy-data.html
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff mbox series

Patch

diff --git a/Changelog b/Changelog
index dcb80e0ed9..9ca79a12aa 100644
--- a/Changelog
+++ b/Changelog
@@ -55,6 +55,7 @@  version <next>:
 - asuperpass and asuperstop filter
 - shufflepixels filter
 - tmidequalizer filter
+- siti filter


 version 4.3:
diff --git a/doc/filters.texi b/doc/filters.texi
index 813e35c2f9..eb1f23128c 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -18158,6 +18158,31 @@  ffmpeg -i input1.mkv -i input2.mkv -filter_complex "[0:v][1:v] signature=nb_inpu

 @end itemize

+@anchor{siti}
+@section siti
+
+Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
+in ITU-T P.910: Subjective video quality assessment methods for multimedia
+applications. Available PDF at @url{https://www.itu.int/rec/T-REC-P.910-199909-S/en }.
+Per frame metrics can be written into a file in csv format.
+
+It accepts the following option:
+
+@table @option
+@item stats_file
+Set the path to the file where per frame SI and TI metrics will be written. If no file
+is specified, only summary statistics will be printed to the console.
+@end table
+
+@subsection Examples
+@itemize
+@item
+To calculate SI/TI metrics and store per frame data to stats.csv:
+@example
+ffmpeg -i input.mp4 -vf siti=stats_file='siti.csv' -f null -
+@end example
+@end itemize
+
 @anchor{smartblur}
 @section smartblur

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index ad1046d526..5514536463 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -413,6 +413,7 @@  OBJS-$(CONFIG_SMARTBLUR_FILTER)              += vf_smartblur.o
 OBJS-$(CONFIG_SOBEL_FILTER)                  += vf_convolution.o
 OBJS-$(CONFIG_SOBEL_OPENCL_FILTER)           += vf_convolution_opencl.o opencl.o \
                                                 opencl/convolution.o
+OBJS-$(CONFIG_SITI_FILTER)                   += vf_siti.o
 OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o
 OBJS-$(CONFIG_SPP_FILTER)                    += vf_spp.o qp_table.o
 OBJS-$(CONFIG_SR_FILTER)                     += vf_sr.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index ce317dfa1c..0aef9fdcef 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -393,6 +393,7 @@  extern AVFilter ff_vf_signature;
 extern AVFilter ff_vf_smartblur;
 extern AVFilter ff_vf_sobel;
 extern AVFilter ff_vf_sobel_opencl;
+extern AVFilter ff_vf_siti;
 extern AVFilter ff_vf_split;
 extern AVFilter ff_vf_spp;
 extern AVFilter ff_vf_sr;
diff --git a/libavfilter/version.h b/libavfilter/version.h
index fbe2ed62b2..2136235e54 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,7 +30,7 @@ 
 #include "libavutil/version.h"

 #define LIBAVFILTER_VERSION_MAJOR   7
-#define LIBAVFILTER_VERSION_MINOR  95
+#define LIBAVFILTER_VERSION_MINOR  96
 #define LIBAVFILTER_VERSION_MICRO 100


diff --git a/libavfilter/vf_siti.c b/libavfilter/vf_siti.c
new file mode 100644
index 0000000000..e8b6142ae7
--- /dev/null
+++ b/libavfilter/vf_siti.c
@@ -0,0 +1,359 @@ 
+/*
+ * Copyright (c) 2002 A'rpi
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/**
+ * @file
+ * Calculate Spatial Info (SI) and Temporal Info (TI) scores
+ */
+
+#include <math.h>
+
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+static const int X_FILTER[9] = {
+    1, 0, -1,
+    2, 0, -2,
+    1, 0, -1
+};
+
+static const int Y_FILTER[9] = {
+    1, 2, 1,
+    0, 0, 0,
+    -1, -2, -1
+};
+
+typedef struct SiTiContext {
+    const AVClass *class;
+    int pixel_depth;
+    int width, height;
+    int nb_frames;
+    unsigned char *prev_frame;
+    double max_si;
+    double max_ti;
+    double min_si;
+    double min_ti;
+    double sum_si;
+    double sum_ti;
+    FILE *stats_file;
+    char *stats_file_str;
+    int full_range;
+} SiTiContext;
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10,
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    // User options but no input data
+    SiTiContext *s = ctx->priv;
+    s->max_si = 0;
+    if (s->stats_file_str) {
+        s->stats_file = fopen(s->stats_file_str, "w");
+        if (!s->stats_file) {
+            int err = AVERROR(errno);
+            char buf[128];
+            av_strerror(err, buf, sizeof(buf));
+            av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
+                    s->stats_file_str, buf);
+            return err;
+        }
+        fprintf(s->stats_file, "Frame,SI,TI\n");
+    }
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    SiTiContext *s = ctx->priv;
+
+    double avg_si = s->sum_si / s->nb_frames;
+    double avg_ti = s->sum_ti / s->nb_frames;
+    av_log(ctx, AV_LOG_INFO,
+        "Summary:\nTotal frames: %d\n\n"
+        "Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n"
+        "Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n",
+        s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti
+    );
+
+    if (s->stats_file && s->stats_file != stdout)
+        fclose(s->stats_file);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    // Video input data avilable
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    int max_pixsteps[4];
+
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
+
+    s->pixel_depth = max_pixsteps[0];
+    s->width = inlink->w;
+    s->height = inlink->h;
+    size_t pixel_sz = s->pixel_depth==1? (size_t) sizeof(uint8_t) : (size_t) sizeof(uint16_t);
+    size_t data_sz = (size_t) s->width * pixel_sz * s->height;
+    s->prev_frame = av_malloc(data_sz);
+
+    return 0;
+}
+
+// Get frame data handling 8 and 10 bit formats
+static uint16_t get_frame_data(const unsigned char* src, int pixel_depth, int index)
+{
+    const uint16_t *src16 = (const uint16_t *)src;
+    if (pixel_depth == 2)
+    {
+        return src16[index];
+    }
+    return (uint16_t) src[index];
+}
+
+// Set frame data handling 8 and 10 bit formats
+static void set_frame_data(unsigned char* dst, int pixel_depth, int index, uint16_t data)
+{
+    uint16_t *dst16 = (uint16_t *)dst;
+    if (pixel_depth == 2)
+    {
+        dst16[index] = data;
+    }
+    else
+    {
+        dst[index] = (uint8_t) data;
+    }
+}
+
+// Determine whether the video is in full or limited range. If not defined, assume limited.
+static int is_full_range(AVFrame* frame)
+{
+    if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
+    {
+        // If color range not specified, fallback to pixel format
+        return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
+    }
+    return frame->color_range == AVCOL_RANGE_JPEG;
+}
+
+// Check frame's color range and convert to full range if needed
+static uint16_t convert_full_range(uint16_t y, SiTiContext *s)
+{
+    if (s->full_range == 1)
+    {
+        return y;
+    }
+
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    double factor = s->pixel_depth == 1? 1 : 4;
+    double shift = 16 * factor;
+    double limit_upper = 235 * factor - shift;
+    double full_upper = 256 * factor - 1;
+    double limit_y = fmin(fmax(y - shift, 0), limit_upper);
+    return (uint16_t) (full_upper * limit_y / limit_upper);
+}
+
+// Applies sobel convolution
+static void convolve_sobel(const unsigned char* src, double* dst, int linesize, SiTiContext *s)
+{
+    int filter_width = 3;
+    int filter_size = filter_width * filter_width;
+    for (int j=1; j<s->height-1; j++)
+    {
+        for (int i=1; i<s->width-1; i++)
+        {
+            double x_conv_sum = 0, y_conv_sum = 0;
+            for (int k=0; k<filter_size; k++)
+            {
+                int ki = k % filter_width - 1;
+                int kj = floor(k / filter_width) - 1;
+                int index = (j + kj) * (linesize / s->pixel_depth) + (i + ki);
+                uint16_t data = convert_full_range(get_frame_data(src, s->pixel_depth, index), s);
+                x_conv_sum += data * X_FILTER[k];
+                y_conv_sum += data * Y_FILTER[k];
+            }
+            double gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum);
+            // Dst matrix is smaller than src since we ignore edges that can't be convolved
+            dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient;
+        }
+    }
+}
+
+// Calculate pixel difference between current and previous frame, and update previous
+static void calculate_motion(const unsigned char* curr, double* motion_matrix,
+                             int linesize, SiTiContext *s)
+{
+    for (int j=0; j<s->height; j++)
+    {
+        for (int i=0; i<s->width; i++)
+        {
+            double motion = 0;
+            int curr_index = j * (linesize / s->pixel_depth) + i;
+            int prev_index = j * s->width + i;
+            uint16_t curr_data = convert_full_range(get_frame_data(curr, s->pixel_depth, curr_index), s);
+
+            if (s->nb_frames > 1)
+            {
+                // Previous frame is already converted to full range
+                motion = curr_data - get_frame_data(s->prev_frame, s->pixel_depth, prev_index);
+            }
+            set_frame_data(s->prev_frame, s->pixel_depth, prev_index, curr_data);
+            motion_matrix[j * s->width + i] = motion;
+        }
+    }
+}
+
+static double std_deviation(double* img_metrics, int width, int height)
+{
+    double size = height * width;
+
+    double mean_sum = 0;
+    for (int j=0; j<height; j++)
+    {
+        for (int i=0; i<width; i++)
+        {
+            mean_sum += img_metrics[j * width + i];
+        }
+    }
+    double mean = mean_sum / size;
+
+    double sqr_diff_sum = 0;
+    for (int j=0; j<height; j++)
+    {
+        for (int i=0; i<width; i++)
+        {
+            double mean_diff = img_metrics[j * width + i] - mean;
+            sqr_diff_sum += (mean_diff * mean_diff);
+        }
+    }
+    double variance = sqr_diff_sum / size;
+    return sqrt(variance);
+}
+
+static void set_meta(AVDictionary **metadata, const char *key, float d)
+{
+    char value[128];
+    snprintf(value, sizeof(value), "%0.2f", d);
+    av_dict_set(metadata, key, value, 0);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+
+    // Gradient matrix will not include the input frame's edges
+    size_t gradient_data_sz = (size_t) (s->width - 2) * sizeof(double) * (s->height - 2);
+    double *gradient_matrix = av_malloc(gradient_data_sz);
+    size_t motion_data_sz = (size_t) s->width * sizeof(double) * s->height;
+    double *motion_matrix = av_malloc(motion_data_sz);
+    if (!gradient_matrix || !motion_matrix) {
+        av_frame_free(&frame);
+        return AVERROR(ENOMEM);
+    }
+
+    s->full_range = is_full_range(frame);
+    s->nb_frames++;
+
+    // Calculate si and ti
+    convolve_sobel(frame->data[0], gradient_matrix, frame->linesize[0], s);
+    calculate_motion(frame->data[0], motion_matrix, frame->linesize[0], s);
+    double si = std_deviation(gradient_matrix, s->width - 2, s->height - 2);
+    double ti = std_deviation(motion_matrix, s->width, s->height);
+
+    // Calculate statistics
+    s->max_si = fmax(si, s->max_si);
+    s->max_ti = fmax(ti, s->max_ti);
+    s->sum_si += si;
+    s->sum_ti += ti;
+    s->min_si = s->nb_frames == 1? si : fmin(si, s->min_si);
+    s->min_ti = s->nb_frames == 1? ti : fmin(ti, s->min_ti);
+
+    // Set si ti information in frame metadata
+    set_meta(&frame->metadata, "lavfi.siti.si", si);
+    set_meta(&frame->metadata, "lavfi.siti.ti", ti);
+
+    // Print per frame csv data to file
+    if (s->stats_file)
+    {
+        fprintf(s->stats_file, "%d,%f,%f\n", s->nb_frames, si, ti);
+    }
+
+    av_free(gradient_matrix);
+    return ff_filter_frame(inlink->dst->outputs[0], frame);
+}
+
+#define OFFSET(x) offsetof(SiTiContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption siti_options[] = {
+    {"stats_file", "Set file where to store per-frame si-ti scores", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(siti);
+
+static const AVFilterPad avfilter_vf_siti_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avfilter_vf_siti_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_siti = {
+    .name          = "siti",
+    .description   = NULL_IF_CONFIG_SMALL("Calculate spatial info (SI)."),
+    .priv_size     = sizeof(SiTiContext),
+    .priv_class    = &siti_class,
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = avfilter_vf_siti_inputs,
+    .outputs       = avfilter_vf_siti_outputs,
+};