diff mbox series

[FFmpeg-devel] avfilter: Added siti filter

Message ID	MN2PR15MB2605F2669A411F794B6C2B40B5A70@MN2PR15MB2605.namprd15.prod.outlook.com
State	New
Headers	From: Boris Baracaldo <borbarak@fb.com> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Thread-Topic: [PATCH] avfilter: Added siti filter Thread-Index: AQHW6vsduDITvYWV10uY4UQN7Vd3+KooIXIY Date: Fri, 15 Jan 2021 05:06:12 +0000 Message-ID: <MN2PR15MB2605F2669A411F794B6C2B40B5A70@MN2PR15MB2605.namprd15.prod.outlook.com> References: <20210115045832.76405-1-borbarak@fb.com> In-Reply-To: <20210115045832.76405-1-borbarak@fb.com> Accept-Language: en-US Content-Language: en-US MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH] avfilter: Added siti filter Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Series	[FFmpeg-devel] avfilter: Added siti filter \| [FFmpeg-devel] avfilter: Added siti filter

Checks

Context	Check	Description
andriy/x86_make_warn	warning	New warnings during build
andriy/x86_make	success	Make finished
andriy/x86_make_fate	success	Make fate finished
andriy/PPC64_make	success	Make finished
andriy/PPC64_make_fate	success	Make fate finished

Commit Message

Boris Baracaldo Jan. 15, 2021, 5:06 a.m. UTC

Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
in ITU-T P.910: Subjective video quality assessment methods for multimedia
applications.
---
 Changelog                |   1 +
 doc/filters.texi         |  25 ++++
 libavfilter/Makefile     |   1 +
 libavfilter/allfilters.c |   1 +
 libavfilter/version.h    |   2 +-
 libavfilter/vf_siti.c    | 359 +++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 388 insertions(+), 1 deletion(-)
 create mode 100644 libavfilter/vf_siti.c

--
2.13.5

Comments

Lynne Jan. 15, 2021, 5:31 a.m. UTC | #1

Jan 15, 2021, 06:06 by borbarak@fb.com:

>
> Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
> in ITU-T P.910: Subjective video quality assessment methods for multimedia
> applications.
> ---
>  Changelog                |   1 +
>  doc/filters.texi         |  25 ++++
>  libavfilter/Makefile     |   1 +
>  libavfilter/allfilters.c |   1 +
>  libavfilter/version.h    |   2 +-
>  libavfilter/vf_siti.c    | 359 +++++++++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 388 insertions(+), 1 deletion(-)
>  create mode 100644 libavfilter/vf_siti.c
>
> +// Determine whether the video is in full or limited range. If not defined, assume limited.
> +static int is_full_range(AVFrame* frame)
> +{
> +    if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
> +    {
> +        // If color range not specified, fallback to pixel format
> +        return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
> +    }
> +    return frame->color_range == AVCOL_RANGE_JPEG;
> +}
> +
> +// Check frame's color range and convert to full range if needed
> +static uint16_t convert_full_range(uint16_t y, SiTiContext *s)
> +{
> +    if (s->full_range == 1)
> +    {
> +        return y;
> +    }
> +
> +    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
> +    double factor = s->pixel_depth == 1? 1 : 4;
> +    double shift = 16 * factor;
> +    double limit_upper = 235 * factor - shift;
> +    double full_upper = 256 * factor - 1;
> +    double limit_y = fmin(fmax(y - shift, 0), limit_upper);
> +    return (uint16_t) (full_upper * limit_y / limit_upper);
> +}
> +
> +// Applies sobel convolution
> +static void convolve_sobel(const unsigned char* src, double* dst, int linesize, SiTiContext *s)
> +{
> +    int filter_width = 3;
> +    int filter_size = filter_width * filter_width;
> +    for (int j=1; j<s->height-1; j++)
> +    {
> +        for (int i=1; i<s->width-1; i++)
> +        {
> +            double x_conv_sum = 0, y_conv_sum = 0;
> +            for (int k=0; k<filter_size; k++)
> +            {
> +                int ki = k % filter_width - 1;
> +                int kj = floor(k / filter_width) - 1;
> +                int index = (j + kj) * (linesize / s->pixel_depth) + (i + ki);
> +                uint16_t data = convert_full_range(get_frame_data(src, s->pixel_depth, index), s);
> +                x_conv_sum += data * X_FILTER[k];
> +                y_conv_sum += data * Y_FILTER[k];
> +            }
> +            double gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum);
> +            // Dst matrix is smaller than src since we ignore edges that can't be convolved
> +            dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient;
> +        }
> +    }
> +}
> +
> +// Calculate pixel difference between current and previous frame, and update previous
> +static void calculate_motion(const unsigned char* curr, double* motion_matrix,
> +                             int linesize, SiTiContext *s)
> +{
> +    for (int j=0; j<s->height; j++)
> +    {
> +        for (int i=0; i<s->width; i++)
> +        {
> +            double motion = 0;
> +            int curr_index = j * (linesize / s->pixel_depth) + i;
> +            int prev_index = j * s->width + i;
> +            uint16_t curr_data = convert_full_range(get_frame_data(curr, s->pixel_depth, curr_index), s);
> +
> +            if (s->nb_frames > 1)
> +            {
> +                // Previous frame is already converted to full range
> +                motion = curr_data - get_frame_data(s->prev_frame, s->pixel_depth, prev_index);
> +            }
> +            set_frame_data(s->prev_frame, s->pixel_depth, prev_index, curr_data);
> +            motion_matrix[j * s->width + i] = motion;
> +        }
> +    }
> +}
> +
> +static double std_deviation(double* img_metrics, int width, int height)
> +{
> +    double size = height * width;
> +
> +    double mean_sum = 0;
> +    for (int j=0; j<height; j++)
> +    {
> +        for (int i=0; i<width; i++)
> +        {
> +            mean_sum += img_metrics[j * width + i];
> +        }
> +    }
> +    double mean = mean_sum / size;
> +
> +    double sqr_diff_sum = 0;
> +    for (int j=0; j<height; j++)
> +    {
> +        for (int i=0; i<width; i++)
> +        {
> +            double mean_diff = img_metrics[j * width + i] - mean;
> +            sqr_diff_sum += (mean_diff * mean_diff);
> +        }
> +    }
>

The coding style mismatches the project's style.
We don't put opening brackets on a new line and in
case of single-line blocks we leave the brackets off entirely.


> +
> +#define OFFSET(x) offsetof(SiTiContext, x)
> +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
> +
> +static const AVOption siti_options[] = {
> +    {"stats_file", "Set file where to store per-frame si-ti scores", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
> +    { NULL }
> +};
>

Make it output the data to the frame metadata instead. That's how
we usually deal with data like this.
The 'metadata' filter can then be used to save the metadata to a file
or alter it.

Just an initial review.

Boris Baracaldo Jan. 19, 2021, 12:07 a.m. UTC | #2

Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
in ITU-T P.910: Subjective video quality assessment methods for multimedia
applications.

Update: Fixed bracket style. I'm already adding the data to the frame's metadata, is the suggestion to remove the file option altogether?

---
 Changelog                |   1 +
 doc/filters.texi         |  25 ++++
 libavfilter/Makefile     |   1 +
 libavfilter/allfilters.c |   1 +
 libavfilter/version.h    |   2 +-
 libavfilter/vf_siti.c    | 321 +++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 350 insertions(+), 1 deletion(-)
 create mode 100644 libavfilter/vf_siti.c

diff --git a/Changelog b/Changelog
index 0b27c15122..5e1f107204 100644
--- a/Changelog
+++ b/Changelog
@@ -56,6 +56,7 @@ version <next>:
 - shufflepixels filter
 - tmidequalizer filter
 - estdif filter
+- siti filter


 version 4.3:
diff --git a/doc/filters.texi b/doc/filters.texi
index 3ce6699d7c..910558e162 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -18239,6 +18239,31 @@ ffmpeg -i input1.mkv -i input2.mkv -filter_complex "[0:v][1:v] signature=nb_inpu

 @end itemize

+@anchor{siti}
+@section siti
+
+Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
+in ITU-T P.910: Subjective video quality assessment methods for multimedia
+applications. Available PDF at @url{https://www.itu.int/rec/T-REC-P.910-199909-S/en }.
+Per frame metrics can be written into a file in csv format.
+
+It accepts the following option:
+
+@table @option
+@item stats_file
+Set the path to the file where per frame SI and TI metrics will be written. If no file
+is specified, only summary statistics will be printed to the console.
+@end table
+
+@subsection Examples
+@itemize
+@item
+To calculate SI/TI metrics and store per frame data to stats.csv:
+@example
+ffmpeg -i input.mp4 -vf siti=stats_file='siti.csv' -f null -
+@end example
+@end itemize
+
 @anchor{smartblur}
 @section smartblur

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 44afa79963..7f96c22b12 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -414,6 +414,7 @@ OBJS-$(CONFIG_SMARTBLUR_FILTER)              += vf_smartblur.o
 OBJS-$(CONFIG_SOBEL_FILTER)                  += vf_convolution.o
 OBJS-$(CONFIG_SOBEL_OPENCL_FILTER)           += vf_convolution_opencl.o opencl.o \
                                                 opencl/convolution.o
+OBJS-$(CONFIG_SITI_FILTER)                   += vf_siti.o
 OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o
 OBJS-$(CONFIG_SPP_FILTER)                    += vf_spp.o qp_table.o
 OBJS-$(CONFIG_SR_FILTER)                     += vf_sr.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 471844a603..0138c22cac 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -394,6 +394,7 @@ extern AVFilter ff_vf_signature;
 extern AVFilter ff_vf_smartblur;
 extern AVFilter ff_vf_sobel;
 extern AVFilter ff_vf_sobel_opencl;
+extern AVFilter ff_vf_siti;
 extern AVFilter ff_vf_split;
 extern AVFilter ff_vf_spp;
 extern AVFilter ff_vf_sr;
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 2136235e54..e949e9bfb8 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,7 +30,7 @@
 #include "libavutil/version.h"

 #define LIBAVFILTER_VERSION_MAJOR   7
-#define LIBAVFILTER_VERSION_MINOR  96
+#define LIBAVFILTER_VERSION_MINOR  97
 #define LIBAVFILTER_VERSION_MICRO 100


diff --git a/libavfilter/vf_siti.c b/libavfilter/vf_siti.c
new file mode 100644
index 0000000000..de2868fd93
--- /dev/null
+++ b/libavfilter/vf_siti.c
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2002 A'rpi
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/**
+ * @file
+ * Calculate Spatial Info (SI) and Temporal Info (TI) scores
+ */
+
+#include <math.h>
+
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+static const int X_FILTER[9] = {
+    1, 0, -1,
+    2, 0, -2,
+    1, 0, -1
+};
+
+static const int Y_FILTER[9] = {
+    1, 2, 1,
+    0, 0, 0,
+    -1, -2, -1
+};
+
+typedef struct SiTiContext {
+    const AVClass *class;
+    int pixel_depth;
+    int width, height;
+    int nb_frames;
+    unsigned char *prev_frame;
+    double max_si;
+    double max_ti;
+    double min_si;
+    double min_ti;
+    double sum_si;
+    double sum_ti;
+    FILE *stats_file;
+    char *stats_file_str;
+    int full_range;
+} SiTiContext;
+
+static int query_formats(AVFilterContext *ctx) {
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10,
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static av_cold int init(AVFilterContext *ctx) {
+    // User options but no input data
+    SiTiContext *s = ctx->priv;
+    s->max_si = 0;
+    if (s->stats_file_str) {
+        s->stats_file = fopen(s->stats_file_str, "w");
+        if (!s->stats_file) {
+            int err = AVERROR(errno);
+            char buf[128];
+            av_strerror(err, buf, sizeof(buf));
+            av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
+                   s->stats_file_str, buf);
+            return err;
+        }
+        fprintf(s->stats_file, "Frame,SI,TI\n");
+    }
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx) {
+    SiTiContext *s = ctx->priv;
+
+    double avg_si = s->sum_si / s->nb_frames;
+    double avg_ti = s->sum_ti / s->nb_frames;
+    av_log(ctx, AV_LOG_INFO,
+           "Summary:\nTotal frames: %d\n\n"
+           "Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n"
+           "Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n",
+           s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti
+    );
+
+    if (s->stats_file && s->stats_file != stdout)
+        fclose(s->stats_file);
+}
+
+static int config_input(AVFilterLink *inlink) {
+    // Video input data avilable
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    int max_pixsteps[4];
+
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
+
+    s->pixel_depth = max_pixsteps[0];
+    s->width = inlink->w;
+    s->height = inlink->h;
+    size_t pixel_sz = s->pixel_depth==1? (size_t) sizeof(uint8_t) : (size_t) sizeof(uint16_t);
+    size_t data_sz = (size_t) s->width * pixel_sz * s->height;
+    s->prev_frame = av_malloc(data_sz);
+
+    return 0;
+}
+
+// Get frame data handling 8 and 10 bit formats
+static uint16_t get_frame_data(const unsigned char* src, int pixel_depth, int index) {
+    const uint16_t *src16 = (const uint16_t *)src;
+    if (pixel_depth == 2)
+        return src16[index];
+    return (uint16_t) src[index];
+}
+
+// Set frame data handling 8 and 10 bit formats
+static void set_frame_data(unsigned char* dst, int pixel_depth, int index, uint16_t data) {
+    uint16_t *dst16 = (uint16_t *)dst;
+    if (pixel_depth == 2)
+        dst16[index] = data;
+    else
+        dst[index] = (uint8_t) data;
+}
+
+// Determine whether the video is in full or limited range. If not defined, assume limited.
+static int is_full_range(AVFrame* frame) {
+    // If color range not specified, fallback to pixel format
+    if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
+        return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
+    return frame->color_range == AVCOL_RANGE_JPEG;
+}
+
+// Check frame's color range and convert to full range if needed
+static uint16_t convert_full_range(uint16_t y, SiTiContext *s) {
+    if (s->full_range == 1)
+        return y;
+
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    double factor = s->pixel_depth == 1? 1 : 4;
+    double shift = 16 * factor;
+    double limit_upper = 235 * factor - shift;
+    double full_upper = 256 * factor - 1;
+    double limit_y = fmin(fmax(y - shift, 0), limit_upper);
+    return (uint16_t) (full_upper * limit_y / limit_upper);
+}
+
+// Applies sobel convolution
+static void convolve_sobel(const unsigned char* src, double* dst, int linesize, SiTiContext *s) {
+    int filter_width = 3;
+    int filter_size = filter_width * filter_width;
+    for (int j=1; j<s->height-1; j++) {
+        for (int i=1; i<s->width-1; i++) {
+            double x_conv_sum = 0, y_conv_sum = 0;
+            for (int k=0; k<filter_size; k++) {
+                int ki = k % filter_width - 1;
+                int kj = floor(k / filter_width) - 1;
+                int index = (j + kj) * (linesize / s->pixel_depth) + (i + ki);
+                uint16_t data = convert_full_range(get_frame_data(src, s->pixel_depth, index), s);
+                x_conv_sum += data * X_FILTER[k];
+                y_conv_sum += data * Y_FILTER[k];
+            }
+            double gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum);
+            // Dst matrix is smaller than src since we ignore edges that can't be convolved
+            dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient;
+        }
+    }
+}
+
+// Calculate pixel difference between current and previous frame, and update previous
+static void calculate_motion(const unsigned char* curr, double* motion_matrix,
+                             int linesize, SiTiContext *s) {
+    for (int j=0; j<s->height; j++) {
+        for (int i=0; i<s->width; i++) {
+            double motion = 0;
+            int curr_index = j * (linesize / s->pixel_depth) + i;
+            int prev_index = j * s->width + i;
+            uint16_t curr_data = convert_full_range(get_frame_data(curr, s->pixel_depth, curr_index), s);
+
+            // Previous frame is already converted to full range
+            if (s->nb_frames > 1)
+                motion = curr_data - get_frame_data(s->prev_frame, s->pixel_depth, prev_index);
+            set_frame_data(s->prev_frame, s->pixel_depth, prev_index, curr_data);
+            motion_matrix[j * s->width + i] = motion;
+        }
+    }
+}
+
+static double std_deviation(double* img_metrics, int width, int height) {
+    double size = height * width;
+    double mean_sum = 0;
+    for (int j=0; j<height; j++)
+        for (int i=0; i<width; i++)
+            mean_sum += img_metrics[j * width + i];
+
+    double mean = mean_sum / size;
+
+    double sqr_diff_sum = 0;
+    for (int j=0; j<height; j++) {
+        for (int i=0; i<width; i++) {
+            double mean_diff = img_metrics[j * width + i] - mean;
+            sqr_diff_sum += (mean_diff * mean_diff);
+        }
+    }
+    double variance = sqr_diff_sum / size;
+    return sqrt(variance);
+}
+
+static void set_meta(AVDictionary **metadata, const char *key, float d) {
+    char value[128];
+    snprintf(value, sizeof(value), "%0.2f", d);
+    av_dict_set(metadata, key, value, 0);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame) {
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+
+    // Gradient matrix will not include the input frame's edges
+    size_t gradient_data_sz = (size_t) (s->width - 2) * sizeof(double) * (s->height - 2);
+    double *gradient_matrix = av_malloc(gradient_data_sz);
+    size_t motion_data_sz = (size_t) s->width * sizeof(double) * s->height;
+    double *motion_matrix = av_malloc(motion_data_sz);
+    if (!gradient_matrix || !motion_matrix) {
+        av_frame_free(&frame);
+        return AVERROR(ENOMEM);
+    }
+
+    s->full_range = is_full_range(frame);
+    s->nb_frames++;
+
+    // Calculate si and ti
+    convolve_sobel(frame->data[0], gradient_matrix, frame->linesize[0], s);
+    calculate_motion(frame->data[0], motion_matrix, frame->linesize[0], s);
+    double si = std_deviation(gradient_matrix, s->width - 2, s->height - 2);
+    double ti = std_deviation(motion_matrix, s->width, s->height);
+
+    // Calculate statistics
+    s->max_si = fmax(si, s->max_si);
+    s->max_ti = fmax(ti, s->max_ti);
+    s->sum_si += si;
+    s->sum_ti += ti;
+    s->min_si = s->nb_frames == 1? si : fmin(si, s->min_si);
+    s->min_ti = s->nb_frames == 1? ti : fmin(ti, s->min_ti);
+
+    // Set si ti information in frame metadata
+    set_meta(&frame->metadata, "lavfi.siti.si", si);
+    set_meta(&frame->metadata, "lavfi.siti.ti", ti);
+
+    // Print per frame csv data to file
+    if (s->stats_file)
+        fprintf(s->stats_file, "%d,%f,%f\n", s->nb_frames, si, ti);
+
+    av_free(gradient_matrix);
+    return ff_filter_frame(inlink->dst->outputs[0], frame);
+}
+
+#define OFFSET(x) offsetof(SiTiContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption siti_options[] = {
+    {"stats_file", "Set file where to store per-frame si-ti scores", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(siti);
+
+static const AVFilterPad avfilter_vf_siti_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avfilter_vf_siti_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_siti = {
+    .name          = "siti",
+    .description   = NULL_IF_CONFIG_SMALL("Calculate spatial info (SI)."),
+    .priv_size     = sizeof(SiTiContext),
+    .priv_class    = &siti_class,
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = avfilter_vf_siti_inputs,
+    .outputs       = avfilter_vf_siti_outputs,
+};
--
2.13.5

Lynne Jan. 19, 2021, 4:49 a.m. UTC | #3

Jan 19, 2021, 01:07 by borbarak@fb.com:

> Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
> in ITU-T P.910: Subjective video quality assessment methods for multimedia
> applications.
>
> Update: Fixed bracket style.
>

Thanks, looks much neater now.



> I'm already adding the data to the frame's metadata, is the suggestion to remove the file option altogether?
>

Yes. We want to avoid filters having their own file in/out options rather
than using generic ones.

 

> +
> +#include "libavutil/imgutils.h"
> +#include "libavutil/internal.h"
> +#include "libavutil/opt.h"
> +
> +#include "avfilter.h"
> +#include "formats.h"
> +#include "internal.h"
> +#include "video.h"
> +
> +static const int X_FILTER[9] = {
> +    1, 0, -1,
> +    2, 0, -2,
> +    1, 0, -1
> +};
> +
> +static const int Y_FILTER[9] = {
> +    1, 2, 1,
> +    0, 0, 0,
> +    -1, -2, -1
> +};
>

We have optimized assembly to apply 3x3 matrices. Check out
libavfilter/x86/vf_convolution.asm:ff_filter_3x3_sse4
 vf_convolution already applies a sobel filter that way. Maybe
look into sharing some DSP code with it?

Werner Robitza Jan. 22, 2021, 11:02 a.m. UTC | #4

On Tue, Jan 19, 2021 at 5:49 AM Lynne <dev@lynne.ee> wrote:
> > I'm already adding the data to the frame's metadata, is the suggestion to remove the file option altogether?
> >
>
> Yes. We want to avoid filters having their own file in/out options rather
> than using generic ones.

As an end user I would find an output file with a known format much
easier to work with.
This works very well for the libvmaf filter, for example.
Could you please explain how to achieve the same kind of output with
the metadata injection?

Werner

Paul B Mahol Jan. 22, 2021, 11:28 a.m. UTC | #5

On Fri, Jan 22, 2021 at 12:03 PM Werner Robitza <werner.robitza@gmail.com>
wrote:

> On Tue, Jan 19, 2021 at 5:49 AM Lynne <dev@lynne.ee> wrote:
> > > I'm already adding the data to the frame's metadata, is the suggestion
> to remove the file option altogether?
> > >
> >
> > Yes. We want to avoid filters having their own file in/out options rather
> > than using generic ones.
>
> As an end user I would find an output file with a known format much
> easier to work with.
> This works very well for the libvmaf filter, for example.
> Could you please explain how to achieve the same kind of output with
> the metadata injection?
>

-vf your_filter,metadata=mode=print


>
> Werner
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

Werner Robitza Jan. 22, 2021, 7:40 p.m. UTC | #6

On Fri, Jan 22, 2021 at 12:28 PM Paul B Mahol <onemda@gmail.com> wrote:
> > As an end user I would find an output file with a known format much
> > easier to work with.
> > This works very well for the libvmaf filter, for example.
> > Could you please explain how to achieve the same kind of output with
> > the metadata injection?
> >
>
> -vf your_filter,metadata=mode=print

Thanks, I know this, but this is not a known format that can be easily
parsed like a plain CSV file. At least the way the SITI filter is
used, regular users just want simple output in a file that they can
directly load into Excel, Python, whatever. (Just like for VMAF.)

I understand why you wouldn't want each filter to have its own IO
functionality, but in terms of usability, only having the metadata
option would be extremely bad.

(Perhaps the real solution would be to have the metadata filter
generate different kinds of known output formats.)

Werner

Paul B Mahol Jan. 22, 2021, 8:24 p.m. UTC | #7

On Fri, Jan 22, 2021 at 9:05 PM Werner Robitza <werner.robitza@gmail.com>
wrote:

> On Fri, Jan 22, 2021 at 12:28 PM Paul B Mahol <onemda@gmail.com> wrote:
> > > As an end user I would find an output file with a known format much
> > > easier to work with.
> > > This works very well for the libvmaf filter, for example.
> > > Could you please explain how to achieve the same kind of output with
> > > the metadata injection?
> > >
> >
> > -vf your_filter,metadata=mode=print
>
> Thanks, I know this, but this is not a known format that can be easily
> parsed like a plain CSV file. At least the way the SITI filter is
> used, regular users just want simple output in a file that they can
> directly load into Excel, Python, whatever. (Just like for VMAF.)
>
> I understand why you wouldn't want each filter to have its own IO
> functionality, but in terms of usability, only having the metadata
> option would be extremely bad.
>

It is not bad, feature is already present.


>
> (Perhaps the real solution would be to have the metadata filter
> generate different kinds of known output formats.)
>

You can use ffprobe for that. It support CSV output format and it does not
ignore frame metadata if you set it.
There is no reason for filter to have own IO output file/path, such
functionality should be deprecated and removed.



>
> Werner
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

Nicolas George Feb. 1, 2021, 3:57 p.m. UTC | #8

Werner Robitza (12021-01-22):
> Thanks, I know this, but this is not a known format that can be easily
> parsed like a plain CSV file.

What you need to do is to extend the metadata filter so that it lets
output to a file and choose the format.

Regards,

Werner Robitza Feb. 19, 2021, 8:02 p.m. UTC | #9

On Mon, Feb 1, 2021 at 4:57 PM Nicolas George <george@nsup.org> wrote:
>
> Werner Robitza (12021-01-22):
> > Thanks, I know this, but this is not a known format that can be easily
> > parsed like a plain CSV file.
>
> What you need to do is to extend the metadata filter so that it lets
> output to a file and choose the format.

I suppose that would be the best option, yes. However, metadata comes
in all kinds of shapes, so it's not easy to map arbitrary metadata
(that any filter can generate) to a meaningful output format, in
particular when the result should be CSV.

In particular, such output should be tidy [1]. For instance, you don't
want to output "frame, key, value" with lines "1, si, 53.999", but
rather "frame, si, ti". Such transformations to useful/parsable output
must be done in the filter itself, not vf_metadata …

[1]: https://cran.r-project.org/web/packages/tidyr/vignettes/tidy-data.html

Werner Robitza Feb. 19, 2021, 8:05 p.m. UTC | #10

On Fri, Feb 19, 2021 at 9:02 PM Werner Robitza <werner.robitza@gmail.com> wrote:
> In particular, such output should be tidy [1]. For instance, you don't
> want to output "frame, key, value" with lines "1, si, 53.999", but
> rather "frame, si, ti". Such transformations to useful/parsable output
> must be done in the filter itself, not vf_metadata …

Well, looking at it again, one could transform per-frame values,
turning each key into a column, so it could work.

Paul B Mahol Feb. 19, 2021, 8:28 p.m. UTC | #11

On Fri, Feb 19, 2021 at 9:03 PM Werner Robitza <werner.robitza@gmail.com>
wrote:

> On Mon, Feb 1, 2021 at 4:57 PM Nicolas George <george@nsup.org> wrote:
> >
> > Werner Robitza (12021-01-22):
> > > Thanks, I know this, but this is not a known format that can be easily
> > > parsed like a plain CSV file.
> >
> > What you need to do is to extend the metadata filter so that it lets
> > output to a file and choose the format.
>
> I suppose that would be the best option, yes. However, metadata comes
> in all kinds of shapes, so it's not easy to map arbitrary metadata
> (that any filter can generate) to a meaningful output format, in
> particular when the result should be CSV.
>
> In particular, such output should be tidy [1]. For instance, you don't
> want to output "frame, key, value" with lines "1, si, 53.999", but
> rather "frame, si, ti". Such transformations to useful/parsable output
> must be done in the filter itself, not vf_metadata …
>

Huh, that does not make any sense.


>
> [1]:
> https://cran.r-project.org/web/packages/tidyr/vignettes/tidy-data.html
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

Thilo Borgmann Jan. 14, 2022, 4:21 p.m. UTC | #12

Hi,

Am 19.01.21 um 05:49 schrieb Lynne:
> Jan 19, 2021, 01:07 by borbarak@fb.com:
> 
>> Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
>> in ITU-T P.910: Subjective video quality assessment methods for multimedia
>> applications.
>>
>> Update: Fixed bracket style.
>>
> 
> Thanks, looks much neater now.
> 
> 
> 
>> I'm already adding the data to the frame's metadata, is the suggestion to remove the file option altogether?
>>
> 
> Yes. We want to avoid filters having their own file in/out options rather
> than using generic ones.

Updated the patch to apply to git HEAD.
Removed file output.
Made printing summary to console optional.


>> +
>> +#include "libavutil/imgutils.h"
>> +#include "libavutil/internal.h"
>> +#include "libavutil/opt.h"
>> +
>> +#include "avfilter.h"
>> +#include "formats.h"
>> +#include "internal.h"
>> +#include "video.h"
>> +
>> +static const int X_FILTER[9] = {
>> +    1, 0, -1,
>> +    2, 0, -2,
>> +    1, 0, -1
>> +};
>> +
>> +static const int Y_FILTER[9] = {
>> +    1, 2, 1,
>> +    0, 0, 0,
>> +    -1, -2, -1
>> +};
>>
> 
> We have optimized assembly to apply 3x3 matrices. Check out
> libavfilter/x86/vf_convolution.asm:ff_filter_3x3_sse4
>  vf_convolution already applies a sobel filter that way. Maybe
> look into sharing some DSP code with it?

I checked a bit since I also want a common sobel for vf_edgedetect / my patch for vf_blurriness.

For sobel, there is no direct asm implementation. We have a generic filter_3x3 with sse4 optimization. To use sobel with that, you'd need to run two times filter_3x3 plus one pass for gradient calculation.

As another difference, this filter (SITI) does on-the-fly conversion to full-range pixel values during its sobel. While vf_edgedetect / vf_bluriness use an abbreviation for the gradients during its sobel. Which makes them both distinct enough not to fit into a general filter_3x3 or filter_sobel from vf_convolution easily (and more overhead). So I think it's not worth the effort to force them into a common function? (Especially since we don't have a sobel_sse4 or something)

Patch without a common sobel attached.

-Thilo
From 4744ce289f3ddfd797886a6240971aff0359b937 Mon Sep 17 00:00:00 2001
From: Boris Baracaldo <borbarak@fb.com>
Date: Fri, 14 Jan 2022 16:11:54 +0100
Subject: [PATCH 1/2] lavfilter: Add SITI filter

Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
in ITU-T P.910: Subjective video quality assessment methods for multimedia
applications.
---
 Changelog                |   1 +
 doc/filters.texi         |  24 ++++
 libavfilter/Makefile     |   1 +
 libavfilter/allfilters.c |   1 +
 libavfilter/version.h    |   2 +-
 libavfilter/vf_siti.c    | 296 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 324 insertions(+), 1 deletion(-)
 create mode 100644 libavfilter/vf_siti.c

diff --git a/Changelog b/Changelog
index 3dde3326be..dcb2c368d2 100644
--- a/Changelog
+++ b/Changelog
@@ -132,6 +132,7 @@ version 4.4:
 - msad video filter
 - gophers protocol
 - RIST protocol via librist
+- siti filter
 
 
 version 4.3:
diff --git a/doc/filters.texi b/doc/filters.texi
index 05d4b1a56e..dca7171a95 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -19875,6 +19875,30 @@ ffmpeg -i input1.mkv -i input2.mkv -filter_complex "[0:v][1:v] signature=nb_inpu
 
 @end itemize
 
+@anchor{siti}
+@section siti
+
+Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
+in ITU-T P.910: Subjective video quality assessment methods for multimedia
+applications. Available PDF at @url{https://www.itu.int/rec/T-REC-P.910-199909-S/en }.
+Per frame metrics can be written into a file in csv format.
+
+It accepts the following option:
+
+@table @option
+@item print_summary
+If set to 1, Summary statistics will be printed to the console. Default 0.
+@end table
+
+@subsection Examples
+@itemize
+@item
+To calculate SI/TI metrics and print summary:
+@example
+ffmpeg -i input.mp4 -vf siti=print_summary=1 -f null -
+@end example
+@end itemize
+
 @anchor{smartblur}
 @section smartblur
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 1adbea75bd..3261d05311 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -454,6 +454,7 @@ OBJS-$(CONFIG_SMARTBLUR_FILTER)              += vf_smartblur.o
 OBJS-$(CONFIG_SOBEL_FILTER)                  += vf_convolution.o
 OBJS-$(CONFIG_SOBEL_OPENCL_FILTER)           += vf_convolution_opencl.o opencl.o \
                                                 opencl/convolution.o
+OBJS-$(CONFIG_SITI_FILTER)                   += vf_siti.o
 OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o
 OBJS-$(CONFIG_SPP_FILTER)                    += vf_spp.o qp_table.o
 OBJS-$(CONFIG_SR_FILTER)                     += vf_sr.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 4325a3e557..808c172b28 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -430,6 +430,7 @@ extern const AVFilter ff_vf_shuffleplanes;
 extern const AVFilter ff_vf_sidedata;
 extern const AVFilter ff_vf_signalstats;
 extern const AVFilter ff_vf_signature;
+extern const AVFilter ff_vf_siti;
 extern const AVFilter ff_vf_smartblur;
 extern const AVFilter ff_vf_sobel;
 extern const AVFilter ff_vf_sobel_opencl;
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 1a9849ef82..89714bce84 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,7 +30,7 @@
 #include "libavutil/version.h"
 
 #define LIBAVFILTER_VERSION_MAJOR   8
-#define LIBAVFILTER_VERSION_MINOR  25
+#define LIBAVFILTER_VERSION_MINOR  26
 #define LIBAVFILTER_VERSION_MICRO 100
 
 
diff --git a/libavfilter/vf_siti.c b/libavfilter/vf_siti.c
new file mode 100644
index 0000000000..e2ae76f679
--- /dev/null
+++ b/libavfilter/vf_siti.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2021 Boris Baracaldo
+ * Copyright (c) 2022 Thilo Borgmann 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/**
+ * @file
+ * Calculate Spatial Info (SI) and Temporal Info (TI) scores
+ */
+
+#include <math.h>
+
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+static const int X_FILTER[9] = {
+    1, 0, -1,
+    2, 0, -2,
+    1, 0, -1
+};
+
+static const int Y_FILTER[9] = {
+    1, 2, 1,
+    0, 0, 0,
+    -1, -2, -1
+};
+
+typedef struct SiTiContext {
+    const AVClass *class;
+    int pixel_depth;
+    int width, height;
+    int nb_frames;
+    unsigned char *prev_frame;
+    double max_si;
+    double max_ti;
+    double min_si;
+    double min_ti;
+    double sum_si;
+    double sum_ti;
+    int full_range;
+    int print_summary;
+} SiTiContext;
+
+static const enum AVPixelFormat pix_fmts[] = {
+    AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+    AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+    AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10,
+    AV_PIX_FMT_NONE
+};
+
+static av_cold int init(AVFilterContext *ctx) {
+    // User options but no input data
+    SiTiContext *s = ctx->priv;
+    s->max_si = 0;
+    s->max_ti = 0;
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx) {
+    SiTiContext *s = ctx->priv;
+
+    if (s->print_summary) {
+        double avg_si = s->sum_si / s->nb_frames;
+        double avg_ti = s->sum_ti / s->nb_frames;
+        av_log(ctx, AV_LOG_INFO,
+               "SITI Summary:\nTotal frames: %d\n\n"
+               "Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n"
+               "Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n",
+               s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti
+        );
+    }
+}
+
+static int config_input(AVFilterLink *inlink) {
+    // Video input data avilable
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    int max_pixsteps[4];
+
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
+
+    s->pixel_depth = max_pixsteps[0];
+    s->width = inlink->w;
+    s->height = inlink->h;
+    size_t pixel_sz = s->pixel_depth==1? (size_t) sizeof(uint8_t) : (size_t) sizeof(uint16_t);
+    size_t data_sz = (size_t) s->width * pixel_sz * s->height;
+    s->prev_frame = av_malloc(data_sz);
+
+    return 0;
+}
+
+// Get frame data handling 8 and 10 bit formats
+static uint16_t get_frame_data(const unsigned char* src, int pixel_depth, int index) {
+    const uint16_t *src16 = (const uint16_t *)src;
+    if (pixel_depth == 2)
+        return src16[index];
+    return (uint16_t) src[index];
+}
+
+// Set frame data handling 8 and 10 bit formats
+static void set_frame_data(unsigned char* dst, int pixel_depth, int index, uint16_t data) {
+    uint16_t *dst16 = (uint16_t *)dst;
+    if (pixel_depth == 2)
+        dst16[index] = data;
+    else
+        dst[index] = (uint8_t) data;
+}
+
+// Determine whether the video is in full or limited range. If not defined, assume limited.
+static int is_full_range(AVFrame* frame) {
+    // If color range not specified, fallback to pixel format
+    if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
+        return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
+    return frame->color_range == AVCOL_RANGE_JPEG;
+}
+
+// Check frame's color range and convert to full range if needed
+static uint16_t convert_full_range(uint16_t y, SiTiContext *s) {
+    if (s->full_range == 1)
+        return y;
+
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    double factor = s->pixel_depth == 1? 1 : 4;
+    double shift = 16 * factor;
+    double limit_upper = 235 * factor - shift;
+    double full_upper = 256 * factor - 1;
+    double limit_y = fmin(fmax(y - shift, 0), limit_upper);
+    return (uint16_t) (full_upper * limit_y / limit_upper);
+}
+
+// Applies sobel convolution
+static void convolve_sobel(const unsigned char* src, double* dst, int linesize, SiTiContext *s) {
+    int filter_width = 3;
+    int filter_size = filter_width * filter_width;
+    for (int j=1; j<s->height-1; j++) {
+        for (int i=1; i<s->width-1; i++) {
+            double x_conv_sum = 0, y_conv_sum = 0;
+            for (int k=0; k<filter_size; k++) {
+                int ki = k % filter_width - 1;
+                int kj = floor(k / filter_width) - 1;
+                int index = (j + kj) * (linesize / s->pixel_depth) + (i + ki);
+                uint16_t data = convert_full_range(get_frame_data(src, s->pixel_depth, index), s);
+                x_conv_sum += data * X_FILTER[k];
+                y_conv_sum += data * Y_FILTER[k];
+            }
+            double gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum);
+            // Dst matrix is smaller than src since we ignore edges that can't be convolved
+            dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient;
+        }
+    }
+}
+
+// Calculate pixel difference between current and previous frame, and update previous
+static void calculate_motion(const unsigned char* curr, double* motion_matrix,
+                             int linesize, SiTiContext *s) {
+    for (int j=0; j<s->height; j++) {
+        for (int i=0; i<s->width; i++) {
+            double motion = 0;
+            int curr_index = j * (linesize / s->pixel_depth) + i;
+            int prev_index = j * s->width + i;
+            uint16_t curr_data = convert_full_range(get_frame_data(curr, s->pixel_depth, curr_index), s);
+
+            // Previous frame is already converted to full range
+            if (s->nb_frames > 1)
+                motion = curr_data - get_frame_data(s->prev_frame, s->pixel_depth, prev_index);
+            set_frame_data(s->prev_frame, s->pixel_depth, prev_index, curr_data);
+            motion_matrix[j * s->width + i] = motion;
+        }
+    }
+}
+
+static double std_deviation(double* img_metrics, int width, int height) {
+    double size = height * width;
+    double mean_sum = 0;
+    for (int j=0; j<height; j++)
+        for (int i=0; i<width; i++)
+            mean_sum += img_metrics[j * width + i];
+
+    double mean = mean_sum / size;
+
+    double sqr_diff_sum = 0;
+    for (int j=0; j<height; j++) {
+        for (int i=0; i<width; i++) {
+            double mean_diff = img_metrics[j * width + i] - mean;
+            sqr_diff_sum += (mean_diff * mean_diff);
+        }
+    }
+    double variance = sqr_diff_sum / size;
+    return sqrt(variance);
+}
+
+static void set_meta(AVDictionary **metadata, const char *key, float d) {
+    char value[128];
+    snprintf(value, sizeof(value), "%0.2f", d);
+    av_dict_set(metadata, key, value, 0);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame) {
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+
+    // Gradient matrix will not include the input frame's edges
+    size_t gradient_data_sz = (size_t) (s->width - 2) * sizeof(double) * (s->height - 2);
+    double *gradient_matrix = av_malloc(gradient_data_sz);
+    size_t motion_data_sz = (size_t) s->width * sizeof(double) * s->height;
+    double *motion_matrix = av_malloc(motion_data_sz);
+    if (!gradient_matrix || !motion_matrix) {
+        av_frame_free(&frame);
+        return AVERROR(ENOMEM);
+    }
+
+    s->full_range = is_full_range(frame);
+    s->nb_frames++;
+
+    // Calculate si and ti
+    convolve_sobel(frame->data[0], gradient_matrix, frame->linesize[0], s);
+    calculate_motion(frame->data[0], motion_matrix, frame->linesize[0], s);
+    double si = std_deviation(gradient_matrix, s->width - 2, s->height - 2);
+    double ti = std_deviation(motion_matrix, s->width, s->height);
+
+    // Calculate statistics
+    s->max_si = fmax(si, s->max_si);
+    s->max_ti = fmax(ti, s->max_ti);
+    s->sum_si += si;
+    s->sum_ti += ti;
+    s->min_si = s->nb_frames == 1? si : fmin(si, s->min_si);
+    s->min_ti = s->nb_frames == 1? ti : fmin(ti, s->min_ti);
+
+    // Set si ti information in frame metadata
+    set_meta(&frame->metadata, "lavfi.siti.si", si);
+    set_meta(&frame->metadata, "lavfi.siti.ti", ti);
+
+    av_free(gradient_matrix);
+    return ff_filter_frame(inlink->dst->outputs[0], frame);
+}
+
+#define OFFSET(x) offsetof(SiTiContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption siti_options[] = {
+    { "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(siti);
+
+static const AVFilterPad avfilter_vf_siti_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+};
+
+static const AVFilterPad avfilter_vf_siti_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO
+    },
+};
+
+AVFilter ff_vf_siti = {
+    .name          = "siti",
+    .description   = NULL_IF_CONFIG_SMALL("Calculate spatial info (SI)."),
+    .priv_size     = sizeof(SiTiContext),
+    .priv_class    = &siti_class,
+    .init          = init,
+    .uninit        = uninit,
+    FILTER_PIXFMTS_ARRAY(pix_fmts),
+    FILTER_INPUTS(avfilter_vf_siti_inputs),
+    FILTER_OUTPUTS(avfilter_vf_siti_outputs),
+};

Paul B Mahol Jan. 15, 2022, 8:25 a.m. UTC | #13

On Fri, Jan 14, 2022 at 5:22 PM Thilo Borgmann <thilo.borgmann@mail.de>
wrote:

> Hi,
>
> Am 19.01.21 um 05:49 schrieb Lynne:
> > Jan 19, 2021, 01:07 by borbarak@fb.com:
> >
> >> Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video,
> as defined
> >> in ITU-T P.910: Subjective video quality assessment methods for
> multimedia
> >> applications.
> >>
> >> Update: Fixed bracket style.
> >>
> >
> > Thanks, looks much neater now.
> >
> >
> >
> >> I'm already adding the data to the frame's metadata, is the suggestion
> to remove the file option altogether?
> >>
> >
> > Yes. We want to avoid filters having their own file in/out options rather
> > than using generic ones.
>
> Updated the patch to apply to git HEAD.
> Removed file output.
> Made printing summary to console optional.
>
>
> >> +
> >> +#include "libavutil/imgutils.h"
> >> +#include "libavutil/internal.h"
> >> +#include "libavutil/opt.h"
> >> +
> >> +#include "avfilter.h"
> >> +#include "formats.h"
> >> +#include "internal.h"
> >> +#include "video.h"
> >> +
> >> +static const int X_FILTER[9] = {
> >> +    1, 0, -1,
> >> +    2, 0, -2,
> >> +    1, 0, -1
> >> +};
> >> +
> >> +static const int Y_FILTER[9] = {
> >> +    1, 2, 1,
> >> +    0, 0, 0,
> >> +    -1, -2, -1
> >> +};
> >>
> >
> > We have optimized assembly to apply 3x3 matrices. Check out
> > libavfilter/x86/vf_convolution.asm:ff_filter_3x3_sse4
> >  vf_convolution already applies a sobel filter that way. Maybe
> > look into sharing some DSP code with it?
>
> I checked a bit since I also want a common sobel for vf_edgedetect / my
> patch for vf_blurriness.
>
> For sobel, there is no direct asm implementation. We have a generic
> filter_3x3 with sse4 optimization. To use sobel with that, you'd need to
> run two times filter_3x3 plus one pass for gradient calculation.
>
> As another difference, this filter (SITI) does on-the-fly conversion to
> full-range pixel values during its sobel. While vf_edgedetect /
> vf_bluriness use an abbreviation for the gradients during its sobel. Which
> makes them both distinct enough not to fit into a general filter_3x3 or
> filter_sobel from vf_convolution easily (and more overhead). So I think
> it's not worth the effort to force them into a common function? (Especially
> since we don't have a sobel_sse4 or something)
>
> Patch without a common sobel attached.
>
>

Violations of code style.



> -Thilo
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>

Paul B Mahol Jan. 15, 2022, 8:27 a.m. UTC | #14

On Sat, Jan 15, 2022 at 9:25 AM Paul B Mahol <onemda@gmail.com> wrote:

>
>
> On Fri, Jan 14, 2022 at 5:22 PM Thilo Borgmann <thilo.borgmann@mail.de>
> wrote:
>
>> Hi,
>>
>> Am 19.01.21 um 05:49 schrieb Lynne:
>> > Jan 19, 2021, 01:07 by borbarak@fb.com:
>> >
>> >> Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video,
>> as defined
>> >> in ITU-T P.910: Subjective video quality assessment methods for
>> multimedia
>> >> applications.
>> >>
>> >> Update: Fixed bracket style.
>> >>
>> >
>> > Thanks, looks much neater now.
>> >
>> >
>> >
>> >> I'm already adding the data to the frame's metadata, is the suggestion
>> to remove the file option altogether?
>> >>
>> >
>> > Yes. We want to avoid filters having their own file in/out options
>> rather
>> > than using generic ones.
>>
>> Updated the patch to apply to git HEAD.
>> Removed file output.
>> Made printing summary to console optional.
>>
>>
>> >> +
>> >> +#include "libavutil/imgutils.h"
>> >> +#include "libavutil/internal.h"
>> >> +#include "libavutil/opt.h"
>> >> +
>> >> +#include "avfilter.h"
>> >> +#include "formats.h"
>> >> +#include "internal.h"
>> >> +#include "video.h"
>> >> +
>> >> +static const int X_FILTER[9] = {
>> >> +    1, 0, -1,
>> >> +    2, 0, -2,
>> >> +    1, 0, -1
>> >> +};
>> >> +
>> >> +static const int Y_FILTER[9] = {
>> >> +    1, 2, 1,
>> >> +    0, 0, 0,
>> >> +    -1, -2, -1
>> >> +};
>> >>
>> >
>> > We have optimized assembly to apply 3x3 matrices. Check out
>> > libavfilter/x86/vf_convolution.asm:ff_filter_3x3_sse4
>> >  vf_convolution already applies a sobel filter that way. Maybe
>> > look into sharing some DSP code with it?
>>
>> I checked a bit since I also want a common sobel for vf_edgedetect / my
>> patch for vf_blurriness.
>>
>> For sobel, there is no direct asm implementation. We have a generic
>> filter_3x3 with sse4 optimization. To use sobel with that, you'd need to
>> run two times filter_3x3 plus one pass for gradient calculation.
>>
>> As another difference, this filter (SITI) does on-the-fly conversion to
>> full-range pixel values during its sobel. While vf_edgedetect /
>> vf_bluriness use an abbreviation for the gradients during its sobel. Which
>> makes them both distinct enough not to fit into a general filter_3x3 or
>> filter_sobel from vf_convolution easily (and more overhead). So I think
>> it's not worth the effort to force them into a common function? (Especially
>> since we don't have a sobel_sse4 or something)
>>
>> Patch without a common sobel attached.
>>
>>
>
> Violations of code style.
>
> Also why filter description only shows SI part and TI part is missing?


>
>> -Thilo
>>
>>
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>
>

Thilo Borgmann Jan. 18, 2022, 1:58 p.m. UTC | #15

Am 15.01.22 um 09:27 schrieb Paul B Mahol:
> On Sat, Jan 15, 2022 at 9:25 AM Paul B Mahol <onemda@gmail.com> wrote:
> 
>>
>>
>> On Fri, Jan 14, 2022 at 5:22 PM Thilo Borgmann <thilo.borgmann@mail.de>
>> wrote:
>>
>>> Hi,
>>>
>>> Am 19.01.21 um 05:49 schrieb Lynne:
>>>> Jan 19, 2021, 01:07 by borbarak@fb.com:
>>>>
>>>>> Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video,
>>> as defined
>>>>> in ITU-T P.910: Subjective video quality assessment methods for
>>> multimedia
>>>>> applications.
>>>>>
>>>>> Update: Fixed bracket style.
>>>>>
>>>>
>>>> Thanks, looks much neater now.
>>>>
>>>>
>>>>
>>>>> I'm already adding the data to the frame's metadata, is the suggestion
>>> to remove the file option altogether?
>>>>>
>>>>
>>>> Yes. We want to avoid filters having their own file in/out options
>>> rather
>>>> than using generic ones.
>>>
>>> Updated the patch to apply to git HEAD.
>>> Removed file output.
>>> Made printing summary to console optional.
>>>
>>>
>>>>> +
>>>>> +#include "libavutil/imgutils.h"
>>>>> +#include "libavutil/internal.h"
>>>>> +#include "libavutil/opt.h"
>>>>> +
>>>>> +#include "avfilter.h"
>>>>> +#include "formats.h"
>>>>> +#include "internal.h"
>>>>> +#include "video.h"
>>>>> +
>>>>> +static const int X_FILTER[9] = {
>>>>> +    1, 0, -1,
>>>>> +    2, 0, -2,
>>>>> +    1, 0, -1
>>>>> +};
>>>>> +
>>>>> +static const int Y_FILTER[9] = {
>>>>> +    1, 2, 1,
>>>>> +    0, 0, 0,
>>>>> +    -1, -2, -1
>>>>> +};
>>>>>
>>>>
>>>> We have optimized assembly to apply 3x3 matrices. Check out
>>>> libavfilter/x86/vf_convolution.asm:ff_filter_3x3_sse4
>>>>  vf_convolution already applies a sobel filter that way. Maybe
>>>> look into sharing some DSP code with it?
>>>
>>> I checked a bit since I also want a common sobel for vf_edgedetect / my
>>> patch for vf_blurriness.
>>>
>>> For sobel, there is no direct asm implementation. We have a generic
>>> filter_3x3 with sse4 optimization. To use sobel with that, you'd need to
>>> run two times filter_3x3 plus one pass for gradient calculation.
>>>
>>> As another difference, this filter (SITI) does on-the-fly conversion to
>>> full-range pixel values during its sobel. While vf_edgedetect /
>>> vf_bluriness use an abbreviation for the gradients during its sobel. Which
>>> makes them both distinct enough not to fit into a general filter_3x3 or
>>> filter_sobel from vf_convolution easily (and more overhead). So I think
>>> it's not worth the effort to force them into a common function? (Especially
>>> since we don't have a sobel_sse4 or something)
>>>
>>> Patch without a common sobel attached.
>>>
>>>
>>
>> Violations of code style.

Enhanced.


>> Also why filter description only shows SI part and TI part is missing?

Mentioned.

v2 attached.

Thanks,
Thilo
From 0b3f1c15c70f8deb828497d368dabb58f9469e12 Mon Sep 17 00:00:00 2001
From: Boris Baracaldo <borbarak@fb.com>
Date: Tue, 18 Jan 2022 14:57:30 +0100
Subject: [PATCH v2] lavfilter: Add SITI filter

Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
in ITU-T P.910: Subjective video quality assessment methods for multimedia
applications.
---
 Changelog                |   1 +
 doc/filters.texi         |  23 +++
 libavfilter/Makefile     |   1 +
 libavfilter/allfilters.c |   1 +
 libavfilter/version.h    |   2 +-
 libavfilter/vf_siti.c    | 296 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 323 insertions(+), 1 deletion(-)
 create mode 100644 libavfilter/vf_siti.c

diff --git a/Changelog b/Changelog
index 3dde3326be..dcb2c368d2 100644
--- a/Changelog
+++ b/Changelog
@@ -132,6 +132,7 @@ version 4.4:
 - msad video filter
 - gophers protocol
 - RIST protocol via librist
+- siti filter
 
 
 version 4.3:
diff --git a/doc/filters.texi b/doc/filters.texi
index 05d4b1a56e..f8a8a2cb72 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -19875,6 +19875,29 @@ ffmpeg -i input1.mkv -i input2.mkv -filter_complex "[0:v][1:v] signature=nb_inpu
 
 @end itemize
 
+@anchor{siti}
+@section siti
+
+Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
+in ITU-T P.910: Subjective video quality assessment methods for multimedia
+applications. Available PDF at @url{https://www.itu.int/rec/T-REC-P.910-199909-S/en }.
+
+It accepts the following option:
+
+@table @option
+@item print_summary
+If set to 1, Summary statistics will be printed to the console. Default 0.
+@end table
+
+@subsection Examples
+@itemize
+@item
+To calculate SI/TI metrics and print summary:
+@example
+ffmpeg -i input.mp4 -vf siti=print_summary=1 -f null -
+@end example
+@end itemize
+
 @anchor{smartblur}
 @section smartblur
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 1adbea75bd..3261d05311 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -454,6 +454,7 @@ OBJS-$(CONFIG_SMARTBLUR_FILTER)              += vf_smartblur.o
 OBJS-$(CONFIG_SOBEL_FILTER)                  += vf_convolution.o
 OBJS-$(CONFIG_SOBEL_OPENCL_FILTER)           += vf_convolution_opencl.o opencl.o \
                                                 opencl/convolution.o
+OBJS-$(CONFIG_SITI_FILTER)                   += vf_siti.o
 OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o
 OBJS-$(CONFIG_SPP_FILTER)                    += vf_spp.o qp_table.o
 OBJS-$(CONFIG_SR_FILTER)                     += vf_sr.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 4325a3e557..808c172b28 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -430,6 +430,7 @@ extern const AVFilter ff_vf_shuffleplanes;
 extern const AVFilter ff_vf_sidedata;
 extern const AVFilter ff_vf_signalstats;
 extern const AVFilter ff_vf_signature;
+extern const AVFilter ff_vf_siti;
 extern const AVFilter ff_vf_smartblur;
 extern const AVFilter ff_vf_sobel;
 extern const AVFilter ff_vf_sobel_opencl;
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 1a9849ef82..89714bce84 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,7 +30,7 @@
 #include "libavutil/version.h"
 
 #define LIBAVFILTER_VERSION_MAJOR   8
-#define LIBAVFILTER_VERSION_MINOR  25
+#define LIBAVFILTER_VERSION_MINOR  26
 #define LIBAVFILTER_VERSION_MICRO 100
 
 
diff --git a/libavfilter/vf_siti.c b/libavfilter/vf_siti.c
new file mode 100644
index 0000000000..fb14e2fe83
--- /dev/null
+++ b/libavfilter/vf_siti.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2021 Boris Baracaldo
+ * Copyright (c) 2022 Thilo Borgmann 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/**
+ * @file
+ * Calculate Spatial Info (SI) and Temporal Info (TI) scores
+ */
+
+#include <math.h>
+
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+static const int X_FILTER[9] = {
+    1, 0, -1,
+    2, 0, -2,
+    1, 0, -1
+};
+
+static const int Y_FILTER[9] = {
+    1, 2, 1,
+    0, 0, 0,
+    -1, -2, -1
+};
+
+typedef struct SiTiContext {
+    const AVClass *class;
+    int pixel_depth;
+    int width, height;
+    int nb_frames;
+    unsigned char *prev_frame;
+    double max_si;
+    double max_ti;
+    double min_si;
+    double min_ti;
+    double sum_si;
+    double sum_ti;
+    int full_range;
+    int print_summary;
+} SiTiContext;
+
+static const enum AVPixelFormat pix_fmts[] = {
+    AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+    AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+    AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10,
+    AV_PIX_FMT_NONE
+};
+
+static av_cold int init(AVFilterContext *ctx) {
+    // User options but no input data
+    SiTiContext *s = ctx->priv;
+    s->max_si = 0;
+    s->max_ti = 0;
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx) {
+    SiTiContext *s = ctx->priv;
+
+    if (s->print_summary) {
+        double avg_si = s->sum_si / s->nb_frames;
+        double avg_ti = s->sum_ti / s->nb_frames;
+        av_log(ctx, AV_LOG_INFO,
+               "SITI Summary:\nTotal frames: %d\n\n"
+               "Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n"
+               "Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n",
+               s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti
+        );
+    }
+}
+
+static int config_input(AVFilterLink *inlink) {
+    // Video input data avilable
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    int max_pixsteps[4];
+
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
+
+    s->pixel_depth = max_pixsteps[0];
+    s->width = inlink->w;
+    s->height = inlink->h;
+    size_t pixel_sz = s->pixel_depth==1 ? (size_t) sizeof(uint8_t) : (size_t) sizeof(uint16_t);
+    size_t data_sz = (size_t) s->width * pixel_sz * s->height;
+    s->prev_frame = av_malloc(data_sz);
+
+    return 0;
+}
+
+// Get frame data handling 8 and 10 bit formats
+static uint16_t get_frame_data(const unsigned char* src, int pixel_depth, int index) {
+    const uint16_t *src16 = (const uint16_t *)src;
+    if (pixel_depth == 2)
+        return src16[index];
+    return (uint16_t) src[index];
+}
+
+// Set frame data handling 8 and 10 bit formats
+static void set_frame_data(unsigned char* dst, int pixel_depth, int index, uint16_t data) {
+    uint16_t *dst16 = (uint16_t *)dst;
+    if (pixel_depth == 2)
+        dst16[index] = data;
+    else
+        dst[index] = (uint8_t) data;
+}
+
+// Determine whether the video is in full or limited range. If not defined, assume limited.
+static int is_full_range(AVFrame* frame) {
+    // If color range not specified, fallback to pixel format
+    if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
+        return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
+    return frame->color_range == AVCOL_RANGE_JPEG;
+}
+
+// Check frame's color range and convert to full range if needed
+static uint16_t convert_full_range(uint16_t y, SiTiContext *s) {
+    if (s->full_range == 1)
+        return y;
+
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    double factor = s->pixel_depth == 1 ? 1 : 4;
+    double shift = 16 * factor;
+    double limit_upper = 235 * factor - shift;
+    double full_upper = 256 * factor - 1;
+    double limit_y = fmin(fmax(y - shift, 0), limit_upper);
+    return (uint16_t) (full_upper * limit_y / limit_upper);
+}
+
+// Applies sobel convolution
+static void convolve_sobel(const unsigned char* src, double* dst, int linesize, SiTiContext *s) {
+    int filter_width = 3;
+    int filter_size = filter_width * filter_width;
+    for (int j = 1; j < s->height - 1; j++) {
+        for (int i = 1; i < s->width - 1; i++) {
+            double x_conv_sum = 0, y_conv_sum = 0;
+            for (int k = 0; k < filter_size; k++) {
+                int ki = k % filter_width - 1;
+                int kj = floor(k / filter_width) - 1;
+                int index = (j + kj) * (linesize / s->pixel_depth) + (i + ki);
+                uint16_t data = convert_full_range(get_frame_data(src, s->pixel_depth, index), s);
+                x_conv_sum += data * X_FILTER[k];
+                y_conv_sum += data * Y_FILTER[k];
+            }
+            double gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum);
+            // Dst matrix is smaller than src since we ignore edges that can't be convolved
+            dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient;
+        }
+    }
+}
+
+// Calculate pixel difference between current and previous frame, and update previous
+static void calculate_motion(const unsigned char* curr, double* motion_matrix,
+                             int linesize, SiTiContext *s) {
+    for (int j = 0; j < s->height; j++) {
+        for (int i = 0; i < s->width; i++) {
+            double motion = 0;
+            int curr_index = j * (linesize / s->pixel_depth) + i;
+            int prev_index = j * s->width + i;
+            uint16_t curr_data = convert_full_range(get_frame_data(curr, s->pixel_depth, curr_index), s);
+
+            // Previous frame is already converted to full range
+            if (s->nb_frames > 1)
+                motion = curr_data - get_frame_data(s->prev_frame, s->pixel_depth, prev_index);
+            set_frame_data(s->prev_frame, s->pixel_depth, prev_index, curr_data);
+            motion_matrix[j * s->width + i] = motion;
+        }
+    }
+}
+
+static double std_deviation(double* img_metrics, int width, int height) {
+    double size = height * width;
+    double mean_sum = 0;
+    for (int j = 0; j < height; j++)
+        for (int i = 0; i < width; i++)
+            mean_sum += img_metrics[j * width + i];
+
+    double mean = mean_sum / size;
+
+    double sqr_diff_sum = 0;
+    for (int j = 0; j < height; j++) {
+        for (int i = 0; i < width; i++) {
+            double mean_diff = img_metrics[j * width + i] - mean;
+            sqr_diff_sum += (mean_diff * mean_diff);
+        }
+    }
+    double variance = sqr_diff_sum / size;
+    return sqrt(variance);
+}
+
+static void set_meta(AVDictionary **metadata, const char *key, float d) {
+    char value[128];
+    snprintf(value, sizeof(value), "%0.2f", d);
+    av_dict_set(metadata, key, value, 0);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame) {
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+
+    // Gradient matrix will not include the input frame's edges
+    size_t gradient_data_sz = (size_t) (s->width - 2) * sizeof(double) * (s->height - 2);
+    double *gradient_matrix = av_malloc(gradient_data_sz);
+    size_t motion_data_sz = (size_t) s->width * sizeof(double) * s->height;
+    double *motion_matrix = av_malloc(motion_data_sz);
+    if (!gradient_matrix || !motion_matrix) {
+        av_frame_free(&frame);
+        return AVERROR(ENOMEM);
+    }
+
+    s->full_range = is_full_range(frame);
+    s->nb_frames++;
+
+    // Calculate si and ti
+    convolve_sobel(frame->data[0], gradient_matrix, frame->linesize[0], s);
+    calculate_motion(frame->data[0], motion_matrix, frame->linesize[0], s);
+    double si = std_deviation(gradient_matrix, s->width - 2, s->height - 2);
+    double ti = std_deviation(motion_matrix, s->width, s->height);
+
+    // Calculate statistics
+    s->max_si  = fmax(si, s->max_si);
+    s->max_ti  = fmax(ti, s->max_ti);
+    s->sum_si += si;
+    s->sum_ti += ti;
+    s->min_si  = s->nb_frames == 1 ? si : fmin(si, s->min_si);
+    s->min_ti  = s->nb_frames == 1 ? ti : fmin(ti, s->min_ti);
+
+    // Set si ti information in frame metadata
+    set_meta(&frame->metadata, "lavfi.siti.si", si);
+    set_meta(&frame->metadata, "lavfi.siti.ti", ti);
+
+    av_free(gradient_matrix);
+    return ff_filter_frame(inlink->dst->outputs[0], frame);
+}
+
+#define OFFSET(x) offsetof(SiTiContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption siti_options[] = {
+    { "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(siti);
+
+static const AVFilterPad avfilter_vf_siti_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+};
+
+static const AVFilterPad avfilter_vf_siti_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO
+    },
+};
+
+AVFilter ff_vf_siti = {
+    .name          = "siti",
+    .description   = NULL_IF_CONFIG_SMALL("Calculate spatial information (SI) and temporal information (TI)."),
+    .priv_size     = sizeof(SiTiContext),
+    .priv_class    = &siti_class,
+    .init          = init,
+    .uninit        = uninit,
+    FILTER_PIXFMTS_ARRAY(pix_fmts),
+    FILTER_INPUTS(avfilter_vf_siti_inputs),
+    FILTER_OUTPUTS(avfilter_vf_siti_outputs),
+};

Thilo Borgmann Jan. 24, 2022, 11:10 a.m. UTC | #16

Am 18.01.22 um 14:58 schrieb Thilo Borgmann:
> Am 15.01.22 um 09:27 schrieb Paul B Mahol:
>> On Sat, Jan 15, 2022 at 9:25 AM Paul B Mahol <onemda@gmail.com> wrote:
>>
>>>
>>>
>>> On Fri, Jan 14, 2022 at 5:22 PM Thilo Borgmann <thilo.borgmann@mail.de>
>>> wrote:
>>>
>>>> Hi,
>>>>
>>>> Am 19.01.21 um 05:49 schrieb Lynne:
>>>>> Jan 19, 2021, 01:07 by borbarak@fb.com:
>>>>>
>>>>>> Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video,
>>>> as defined
>>>>>> in ITU-T P.910: Subjective video quality assessment methods for
>>>> multimedia
>>>>>> applications.
>>>>>>
>>>>>> Update: Fixed bracket style.
>>>>>>
>>>>>
>>>>> Thanks, looks much neater now.
>>>>>
>>>>>
>>>>>
>>>>>> I'm already adding the data to the frame's metadata, is the suggestion
>>>> to remove the file option altogether?
>>>>>>
>>>>>
>>>>> Yes. We want to avoid filters having their own file in/out options
>>>> rather
>>>>> than using generic ones.
>>>>
>>>> Updated the patch to apply to git HEAD.
>>>> Removed file output.
>>>> Made printing summary to console optional.
>>>>
>>>>
>>>>>> +
>>>>>> +#include "libavutil/imgutils.h"
>>>>>> +#include "libavutil/internal.h"
>>>>>> +#include "libavutil/opt.h"
>>>>>> +
>>>>>> +#include "avfilter.h"
>>>>>> +#include "formats.h"
>>>>>> +#include "internal.h"
>>>>>> +#include "video.h"
>>>>>> +
>>>>>> +static const int X_FILTER[9] = {
>>>>>> +    1, 0, -1,
>>>>>> +    2, 0, -2,
>>>>>> +    1, 0, -1
>>>>>> +};
>>>>>> +
>>>>>> +static const int Y_FILTER[9] = {
>>>>>> +    1, 2, 1,
>>>>>> +    0, 0, 0,
>>>>>> +    -1, -2, -1
>>>>>> +};
>>>>>>
>>>>>
>>>>> We have optimized assembly to apply 3x3 matrices. Check out
>>>>> libavfilter/x86/vf_convolution.asm:ff_filter_3x3_sse4
>>>>>   vf_convolution already applies a sobel filter that way. Maybe
>>>>> look into sharing some DSP code with it?
>>>>
>>>> I checked a bit since I also want a common sobel for vf_edgedetect / my
>>>> patch for vf_blurriness.
>>>>
>>>> For sobel, there is no direct asm implementation. We have a generic
>>>> filter_3x3 with sse4 optimization. To use sobel with that, you'd need to
>>>> run two times filter_3x3 plus one pass for gradient calculation.
>>>>
>>>> As another difference, this filter (SITI) does on-the-fly conversion to
>>>> full-range pixel values during its sobel. While vf_edgedetect /
>>>> vf_bluriness use an abbreviation for the gradients during its sobel. Which
>>>> makes them both distinct enough not to fit into a general filter_3x3 or
>>>> filter_sobel from vf_convolution easily (and more overhead). So I think
>>>> it's not worth the effort to force them into a common function? (Especially
>>>> since we don't have a sobel_sse4 or something)
>>>>
>>>> Patch without a common sobel attached.
>>>>
>>>>
>>>
>>> Violations of code style.
> 
> Enhanced.
> 
> 
>>> Also why filter description only shows SI part and TI part is missing?
> 
> Mentioned.
> 
> v2 attached.

Ping.

-Thilo

Thilo Borgmann Jan. 31, 2022, 10:12 a.m. UTC | #17

Am 24.01.22 um 12:10 schrieb Thilo Borgmann:
> Am 18.01.22 um 14:58 schrieb Thilo Borgmann:
>> Am 15.01.22 um 09:27 schrieb Paul B Mahol:
>>> On Sat, Jan 15, 2022 at 9:25 AM Paul B Mahol <onemda@gmail.com> wrote:
>>>
>>>>
>>>>
>>>> On Fri, Jan 14, 2022 at 5:22 PM Thilo Borgmann <thilo.borgmann@mail.de>
>>>> wrote:
>>>>
>>>>> Hi,
>>>>>
>>>>> Am 19.01.21 um 05:49 schrieb Lynne:
>>>>>> Jan 19, 2021, 01:07 by borbarak@fb.com:
>>>>>>
>>>>>>> Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video,
>>>>> as defined
>>>>>>> in ITU-T P.910: Subjective video quality assessment methods for
>>>>> multimedia
>>>>>>> applications.
>>>>>>>
>>>>>>> Update: Fixed bracket style.
>>>>>>>
>>>>>>
>>>>>> Thanks, looks much neater now.
>>>>>>
>>>>>>
>>>>>>
>>>>>>> I'm already adding the data to the frame's metadata, is the suggestion
>>>>> to remove the file option altogether?
>>>>>>>
>>>>>>
>>>>>> Yes. We want to avoid filters having their own file in/out options
>>>>> rather
>>>>>> than using generic ones.
>>>>>
>>>>> Updated the patch to apply to git HEAD.
>>>>> Removed file output.
>>>>> Made printing summary to console optional.
>>>>>
>>>>>
>>>>>>> +
>>>>>>> +#include "libavutil/imgutils.h"
>>>>>>> +#include "libavutil/internal.h"
>>>>>>> +#include "libavutil/opt.h"
>>>>>>> +
>>>>>>> +#include "avfilter.h"
>>>>>>> +#include "formats.h"
>>>>>>> +#include "internal.h"
>>>>>>> +#include "video.h"
>>>>>>> +
>>>>>>> +static const int X_FILTER[9] = {
>>>>>>> +    1, 0, -1,
>>>>>>> +    2, 0, -2,
>>>>>>> +    1, 0, -1
>>>>>>> +};
>>>>>>> +
>>>>>>> +static const int Y_FILTER[9] = {
>>>>>>> +    1, 2, 1,
>>>>>>> +    0, 0, 0,
>>>>>>> +    -1, -2, -1
>>>>>>> +};
>>>>>>>
>>>>>>
>>>>>> We have optimized assembly to apply 3x3 matrices. Check out
>>>>>> libavfilter/x86/vf_convolution.asm:ff_filter_3x3_sse4
>>>>>>   vf_convolution already applies a sobel filter that way. Maybe
>>>>>> look into sharing some DSP code with it?
>>>>>
>>>>> I checked a bit since I also want a common sobel for vf_edgedetect / my
>>>>> patch for vf_blurriness.
>>>>>
>>>>> For sobel, there is no direct asm implementation. We have a generic
>>>>> filter_3x3 with sse4 optimization. To use sobel with that, you'd need to
>>>>> run two times filter_3x3 plus one pass for gradient calculation.
>>>>>
>>>>> As another difference, this filter (SITI) does on-the-fly conversion to
>>>>> full-range pixel values during its sobel. While vf_edgedetect /
>>>>> vf_bluriness use an abbreviation for the gradients during its sobel. Which
>>>>> makes them both distinct enough not to fit into a general filter_3x3 or
>>>>> filter_sobel from vf_convolution easily (and more overhead). So I think
>>>>> it's not worth the effort to force them into a common function? (Especially
>>>>> since we don't have a sobel_sse4 or something)
>>>>>
>>>>> Patch without a common sobel attached.
>>>>>
>>>>>
>>>>
>>>> Violations of code style.
>>
>> Enhanced.
>>
>>
>>>> Also why filter description only shows SI part and TI part is missing?
>>
>> Mentioned.
>>
>> v2 attached.
> 
> Ping.

Going to apply soon if there are no more comments especially about the sobel functions.

-Thilo

Anton Khirnov Jan. 31, 2022, 11:53 a.m. UTC | #18

Quoting Thilo Borgmann (2022-01-18 14:58:07)
> >> Violations of code style.
> 
> Enhanced.

Not enough. There are still many remaining, e.g.
* opening brace of a function definition should be on its own line
* the context should generally be the first argument
* unsigned char* should be uint8_t*
* mixed declarations and code (the compiler should warn about that)
* pointless casts all over the place

> +typedef struct SiTiContext {
> +    const AVClass *class;
> +    int pixel_depth;
> +    int width, height;
> +    int nb_frames;

uint64_t

> +static int config_input(AVFilterLink *inlink) {
> +    // Video input data avilable
> +    AVFilterContext *ctx = inlink->dst;
> +    SiTiContext *s = ctx->priv;
> +    int max_pixsteps[4];
> +
> +    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
> +    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
> +
> +    s->pixel_depth = max_pixsteps[0];
> +    s->width = inlink->w;
> +    s->height = inlink->h;
> +    size_t pixel_sz = s->pixel_depth==1 ? (size_t) sizeof(uint8_t) : (size_t) sizeof(uint16_t);
> +    size_t data_sz = (size_t) s->width * pixel_sz * s->height;
> +    s->prev_frame = av_malloc(data_sz);

unchecked malloc

> +
> +    return 0;
> +}
> +
> +// Get frame data handling 8 and 10 bit formats
> +static uint16_t get_frame_data(const unsigned char* src, int pixel_depth, int index) {
> +    const uint16_t *src16 = (const uint16_t *)src;
> +    if (pixel_depth == 2)
> +        return src16[index];
> +    return (uint16_t) src[index];
> +}

going through this branch nine times for every single pixel is just atrocious

templatize convolve_sobel()/calculate_motion() for 8/16 bits

> +static int filter_frame(AVFilterLink *inlink, AVFrame *frame) {
> +    AVFilterContext *ctx = inlink->dst;
> +    SiTiContext *s = ctx->priv;
> +
> +    // Gradient matrix will not include the input frame's edges
> +    size_t gradient_data_sz = (size_t) (s->width - 2) * sizeof(double) * (s->height - 2);
> +    double *gradient_matrix = av_malloc(gradient_data_sz);
> +    size_t motion_data_sz = (size_t) s->width * sizeof(double) * s->height;
> +    double *motion_matrix = av_malloc(motion_data_sz);

These have a fixed size that is known at config_input() time, no point
in allocating them anew for each frame.

Also, I don't see where motion_matrix is freed.

James Almer Jan. 31, 2022, 11:55 a.m. UTC | #19

On 1/31/2022 8:53 AM, Anton Khirnov wrote:
> Quoting Thilo Borgmann (2022-01-18 14:58:07)
>>>> Violations of code style.
>>
>> Enhanced.
> 
> Not enough. There are still many remaining, e.g.
> * opening brace of a function definition should be on its own line
> * the context should generally be the first argument
> * unsigned char* should be uint8_t*
> * mixed declarations and code (the compiler should warn about that)

I think someone said that clang (or some versions) is apparently not 
warning about this, hence why so many of these end up being missed in 
reviews or even by the patch author.

Thilo Borgmann Feb. 12, 2022, 10:55 a.m. UTC | #20

Am 31.01.22 um 12:55 schrieb James Almer:
> 
> 
> On 1/31/2022 8:53 AM, Anton Khirnov wrote:
>> Quoting Thilo Borgmann (2022-01-18 14:58:07)
>>>>> Violations of code style.
>>>
>>> Enhanced.
>>
>> Not enough. There are still many remaining, e.g.
>> * opening brace of a function definition should be on its own line
>> * the context should generally be the first argument
>> * unsigned char* should be uint8_t*
>> * mixed declarations and code (the compiler should warn about that)
> 
> I think someone said that clang (or some versions) is apparently not warning about this, hence why so many of these end up being missed in reviews or even by the patch author.

This and all of Anton's comments in v3. Also removed some more obviously useless doubles.

Thanks,
Thilo
From 2445f14d3f4a2bc776c1995cd8dbf67f0adf7373 Mon Sep 17 00:00:00 2001
From: Boris Baracaldo <borbarak@fb.com>
Date: Sat, 12 Feb 2022 11:51:48 +0100
Subject: [PATCH v3] lavfilter: Add SITI filter

Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
in ITU-T P.910: Subjective video quality assessment methods for multimedia
applications.
---
 Changelog                |   1 +
 doc/filters.texi         |  23 +++
 libavfilter/Makefile     |   1 +
 libavfilter/allfilters.c |   1 +
 libavfilter/version.h    |   2 +-
 libavfilter/vf_siti.c    | 346 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 373 insertions(+), 1 deletion(-)
 create mode 100644 libavfilter/vf_siti.c

diff --git a/Changelog b/Changelog
index 3dde3326be..dcb2c368d2 100644
--- a/Changelog
+++ b/Changelog
@@ -132,6 +132,7 @@ version 4.4:
 - msad video filter
 - gophers protocol
 - RIST protocol via librist
+- siti filter
 
 
 version 4.3:
diff --git a/doc/filters.texi b/doc/filters.texi
index 05d4b1a56e..f8a8a2cb72 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -19875,6 +19875,29 @@ ffmpeg -i input1.mkv -i input2.mkv -filter_complex "[0:v][1:v] signature=nb_inpu
 
 @end itemize
 
+@anchor{siti}
+@section siti
+
+Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
+in ITU-T P.910: Subjective video quality assessment methods for multimedia
+applications. Available PDF at @url{https://www.itu.int/rec/T-REC-P.910-199909-S/en }.
+
+It accepts the following option:
+
+@table @option
+@item print_summary
+If set to 1, Summary statistics will be printed to the console. Default 0.
+@end table
+
+@subsection Examples
+@itemize
+@item
+To calculate SI/TI metrics and print summary:
+@example
+ffmpeg -i input.mp4 -vf siti=print_summary=1 -f null -
+@end example
+@end itemize
+
 @anchor{smartblur}
 @section smartblur
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 1adbea75bd..3261d05311 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -454,6 +454,7 @@ OBJS-$(CONFIG_SMARTBLUR_FILTER)              += vf_smartblur.o
 OBJS-$(CONFIG_SOBEL_FILTER)                  += vf_convolution.o
 OBJS-$(CONFIG_SOBEL_OPENCL_FILTER)           += vf_convolution_opencl.o opencl.o \
                                                 opencl/convolution.o
+OBJS-$(CONFIG_SITI_FILTER)                   += vf_siti.o
 OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o
 OBJS-$(CONFIG_SPP_FILTER)                    += vf_spp.o qp_table.o
 OBJS-$(CONFIG_SR_FILTER)                     += vf_sr.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 4325a3e557..808c172b28 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -430,6 +430,7 @@ extern const AVFilter ff_vf_shuffleplanes;
 extern const AVFilter ff_vf_sidedata;
 extern const AVFilter ff_vf_signalstats;
 extern const AVFilter ff_vf_signature;
+extern const AVFilter ff_vf_siti;
 extern const AVFilter ff_vf_smartblur;
 extern const AVFilter ff_vf_sobel;
 extern const AVFilter ff_vf_sobel_opencl;
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 1a9849ef82..89714bce84 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,7 +30,7 @@
 #include "libavutil/version.h"
 
 #define LIBAVFILTER_VERSION_MAJOR   8
-#define LIBAVFILTER_VERSION_MINOR  25
+#define LIBAVFILTER_VERSION_MINOR  26
 #define LIBAVFILTER_VERSION_MICRO 100
 
 
diff --git a/libavfilter/vf_siti.c b/libavfilter/vf_siti.c
new file mode 100644
index 0000000000..9329c6a0c6
--- /dev/null
+++ b/libavfilter/vf_siti.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2021 Boris Baracaldo
+ * Copyright (c) 2022 Thilo Borgmann 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/**
+ * @file
+ * Calculate Spatial Info (SI) and Temporal Info (TI) scores
+ */
+
+#include <math.h>
+
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+static const int X_FILTER[9] = {
+    1, 0, -1,
+    2, 0, -2,
+    1, 0, -1
+};
+
+static const int Y_FILTER[9] = {
+    1, 2, 1,
+    0, 0, 0,
+    -1, -2, -1
+};
+
+typedef struct SiTiContext {
+    const AVClass *class;
+    int pixel_depth;
+    int width, height;
+    uint64_t nb_frames;
+    uint8_t *prev_frame;
+    double max_si;
+    double max_ti;
+    double min_si;
+    double min_ti;
+    double sum_si;
+    double sum_ti;
+    double *gradient_matrix;
+    double *motion_matrix;
+    int full_range;
+    int print_summary;
+} SiTiContext;
+
+static const enum AVPixelFormat pix_fmts[] = {
+    AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+    AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+    AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10,
+    AV_PIX_FMT_NONE
+};
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    // User options but no input data
+    SiTiContext *s = ctx->priv;
+    s->max_si = 0;
+    s->max_ti = 0;
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    SiTiContext *s = ctx->priv;
+
+    if (s->print_summary) {
+        double avg_si = s->sum_si / s->nb_frames;
+        double avg_ti = s->sum_ti / s->nb_frames;
+        av_log(ctx, AV_LOG_INFO,
+               "SITI Summary:\nTotal frames: %"PRId64"\n\n"
+               "Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n"
+               "Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n",
+               s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti
+        );
+    }
+
+    av_freep(&s->prev_frame);
+    av_freep(&s->gradient_matrix);
+    av_freep(&s->motion_matrix);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    // Video input data avilable
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    int max_pixsteps[4];
+    size_t pixel_sz;
+    size_t data_sz;
+    size_t gradient_sz;
+    size_t motion_sz;
+
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
+
+    s->pixel_depth = max_pixsteps[0];
+    s->width = inlink->w;
+    s->height = inlink->h;
+    pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t);
+    data_sz = s->width * pixel_sz * s->height;
+
+    s->prev_frame = av_malloc(data_sz);
+
+    gradient_sz = (s->width - 2) * sizeof(double) * (s->height - 2);
+    s->gradient_matrix = (double*)av_malloc(gradient_sz);
+
+    motion_sz = s->width * sizeof(double) * s->height;
+    s->motion_matrix = (double*)av_malloc(motion_sz);
+
+    if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) {
+        av_freep(&s->prev_frame);
+        av_freep(&s->gradient_matrix);
+        av_freep(&s->motion_matrix);
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+// Determine whether the video is in full or limited range. If not defined, assume limited.
+static int is_full_range(AVFrame* frame)
+{
+    // If color range not specified, fallback to pixel format
+    if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
+        return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
+    return frame->color_range == AVCOL_RANGE_JPEG;
+}
+
+// Check frame's color range and convert to full range if needed
+static uint16_t convert_full_range(SiTiContext *s, uint16_t y)
+{
+    int factor;
+    int shift;
+    int limit_upper;
+    int full_upper;
+    int limit_y;
+
+    if (s->full_range == 1)
+        return y;
+
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    factor = s->pixel_depth == 1 ? 1 : 4;
+    shift = 16 * factor;
+    limit_upper = 235 * factor - shift;
+    full_upper = 256 * factor - 1;
+    limit_y = fmin(fmax(y - shift, 0), limit_upper);
+    return (full_upper * limit_y / limit_upper);
+}
+
+// Applies sobel convolution
+static void convolve_sobel(SiTiContext *s, const uint8_t *src, double *dst, int linesize)
+{
+    double x_conv_sum;
+    double y_conv_sum;
+    double gradient;
+    int ki;
+    int kj;
+    int index;
+    uint16_t data;
+    int filter_width = 3;
+    int filter_size = filter_width * filter_width;
+    int stride = linesize / s->pixel_depth;
+
+    // Dst matrix is smaller than src since we ignore edges that can't be convolved
+    #define CONVOLVE(bps)                                           \
+    {                                                               \
+        uint##bps##_t *vsrc = (uint##bps##_t*)src;                  \
+        for (int j = 1; j < s->height - 1; j++) {                   \
+            for (int i = 1; i < s->width - 1; i++) {                \
+                x_conv_sum = 0.0;                                   \
+                y_conv_sum = 0.0;                                   \
+                for (int k = 0; k < filter_size; k++) {             \
+                    ki = k % filter_width - 1;                      \
+                    kj = floor(k / filter_width) - 1;               \
+                    index = (j + kj) * stride + (i + ki);           \
+                    data = convert_full_range(s, vsrc[index]);      \
+                    x_conv_sum += data * X_FILTER[k];               \
+                    y_conv_sum += data * Y_FILTER[k];               \
+                }                                                   \
+                gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum); \
+                dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient; \
+            }                                                       \
+        }                                                           \
+    }
+
+    if (s->pixel_depth == 2) {
+        CONVOLVE(16);
+    } else {
+        CONVOLVE(8);
+    }
+}
+
+// Calculate pixel difference between current and previous frame, and update previous
+static void calculate_motion(SiTiContext *s, const uint8_t *curr,
+                             double *motion_matrix, int linesize)
+{
+    int stride = linesize / s->pixel_depth;
+    double motion;
+    int curr_index;
+    int prev_index;
+    uint16_t curr_data;
+
+    // Previous frame is already converted to full range
+    #define CALCULATE(bps)                                           \
+    {                                                                \
+        uint##bps##_t *vsrc = (uint##bps##_t*)curr;                  \
+        for (int j = 0; j < s->height; j++) {                        \
+            for (int i = 0; i < s->width; i++) {                     \
+                motion = 0;                                          \
+                curr_index = j * stride + i;                         \
+                prev_index = j * s->width + i;                       \
+                curr_data = convert_full_range(s, vsrc[curr_index]); \
+                if (s->nb_frames > 1)                                \
+                    motion = curr_data - s->prev_frame[prev_index];  \
+                s->prev_frame[prev_index] = curr_data;               \
+                motion_matrix[j * s->width + i] = motion;            \
+            }                                                        \
+        }                                                            \
+    }
+
+    if (s->pixel_depth == 2) {
+        CALCULATE(16);
+    } else {
+        CALCULATE(8);
+    }
+}
+
+static double std_deviation(double *img_metrics, int width, int height)
+{
+    int size = height * width;
+    double mean = 0.0;
+    double sqr_diff = 0;
+
+    for (int j = 0; j < height; j++)
+        for (int i = 0; i < width; i++)
+            mean += img_metrics[j * width + i];
+
+    mean /= size;
+
+    for (int j = 0; j < height; j++) {
+        for (int i = 0; i < width; i++) {
+            double mean_diff = img_metrics[j * width + i] - mean;
+            sqr_diff += (mean_diff * mean_diff);
+        }
+    }
+    sqr_diff = sqr_diff / size;
+    return sqrt(sqr_diff);
+}
+
+static void set_meta(AVDictionary **metadata, const char *key, float d)
+{
+    char value[128];
+    snprintf(value, sizeof(value), "%0.2f", d);
+    av_dict_set(metadata, key, value, 0);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    double si;
+    double ti;
+
+    s->full_range = is_full_range(frame);
+    s->nb_frames++;
+
+    // Calculate si and ti
+    convolve_sobel(s, frame->data[0], s->gradient_matrix, frame->linesize[0]);
+    calculate_motion(s, frame->data[0], s->motion_matrix, frame->linesize[0]);
+    si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2);
+    ti = std_deviation(s->motion_matrix, s->width, s->height);
+
+    // Calculate statistics
+    s->max_si  = fmax(si, s->max_si);
+    s->max_ti  = fmax(ti, s->max_ti);
+    s->sum_si += si;
+    s->sum_ti += ti;
+    s->min_si  = s->nb_frames == 1 ? si : fmin(si, s->min_si);
+    s->min_ti  = s->nb_frames == 1 ? ti : fmin(ti, s->min_ti);
+
+    // Set si ti information in frame metadata
+    set_meta(&frame->metadata, "lavfi.siti.si", si);
+    set_meta(&frame->metadata, "lavfi.siti.ti", ti);
+
+    return ff_filter_frame(inlink->dst->outputs[0], frame);
+}
+
+#define OFFSET(x) offsetof(SiTiContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption siti_options[] = {
+    { "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(siti);
+
+static const AVFilterPad avfilter_vf_siti_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+};
+
+static const AVFilterPad avfilter_vf_siti_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO
+    },
+};
+
+AVFilter ff_vf_siti = {
+    .name          = "siti",
+    .description   = NULL_IF_CONFIG_SMALL("Calculate spatial information (SI) and temporal information (TI)."),
+    .priv_size     = sizeof(SiTiContext),
+    .priv_class    = &siti_class,
+    .init          = init,
+    .uninit        = uninit,
+    FILTER_PIXFMTS_ARRAY(pix_fmts),
+    FILTER_INPUTS(avfilter_vf_siti_inputs),
+    FILTER_OUTPUTS(avfilter_vf_siti_outputs),
+};

Anton Khirnov Feb. 15, 2022, 8:54 a.m. UTC | #21

Quoting Thilo Borgmann (2022-02-12 11:55:39)
> Am 31.01.22 um 12:55 schrieb James Almer:
> +static int config_input(AVFilterLink *inlink)
> +{
> +    // Video input data avilable
> +    AVFilterContext *ctx = inlink->dst;
> +    SiTiContext *s = ctx->priv;
> +    int max_pixsteps[4];
> +    size_t pixel_sz;
> +    size_t data_sz;
> +    size_t gradient_sz;
> +    size_t motion_sz;
> +
> +    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
> +    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
> +
> +    s->pixel_depth = max_pixsteps[0];
> +    s->width = inlink->w;
> +    s->height = inlink->h;
> +    pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t);
> +    data_sz = s->width * pixel_sz * s->height;
> +
> +    s->prev_frame = av_malloc(data_sz);
> +
> +    gradient_sz = (s->width - 2) * sizeof(double) * (s->height - 2);
> +    s->gradient_matrix = (double*)av_malloc(gradient_sz);
> +
> +    motion_sz = s->width * sizeof(double) * s->height;
> +    s->motion_matrix = (double*)av_malloc(motion_sz);

useless casts

> +
> +    if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) {
> +        av_freep(&s->prev_frame);
> +        av_freep(&s->gradient_matrix);
> +        av_freep(&s->motion_matrix);

You don't need to free them on failure, that will be done in uninit. But
you should free them at the beginning of this function, because
config_input can be called multiple times.

> +// Applies sobel convolution
> +static void convolve_sobel(SiTiContext *s, const uint8_t *src, double *dst, int linesize)
> +{
> +    double x_conv_sum;
> +    double y_conv_sum;
> +    double gradient;
> +    int ki;
> +    int kj;
> +    int index;
> +    uint16_t data;
> +    int filter_width = 3;
> +    int filter_size = filter_width * filter_width;
> +    int stride = linesize / s->pixel_depth;
> +
> +    // Dst matrix is smaller than src since we ignore edges that can't be convolved
> +    #define CONVOLVE(bps)                                           \
> +    {                                                               \
> +        uint##bps##_t *vsrc = (uint##bps##_t*)src;                  \
> +        for (int j = 1; j < s->height - 1; j++) {                   \
> +            for (int i = 1; i < s->width - 1; i++) {                \
> +                x_conv_sum = 0.0;                                   \
> +                y_conv_sum = 0.0;                                   \
> +                for (int k = 0; k < filter_size; k++) {             \
> +                    ki = k % filter_width - 1;                      \
> +                    kj = floor(k / filter_width) - 1;               \
> +                    index = (j + kj) * stride + (i + ki);           \
> +                    data = convert_full_range(s, vsrc[index]);      \

Pass bps as a parameter to convert_full_range() instead of accessing
s->pixel_depth, so the compiler can optimize the branch away.

> +// Calculate pixel difference between current and previous frame, and update previous
> +static void calculate_motion(SiTiContext *s, const uint8_t *curr,
> +                             double *motion_matrix, int linesize)
> +{
> +    int stride = linesize / s->pixel_depth;
> +    double motion;
> +    int curr_index;
> +    int prev_index;
> +    uint16_t curr_data;
> +
> +    // Previous frame is already converted to full range
> +    #define CALCULATE(bps)                                           \
> +    {                                                                \
> +        uint##bps##_t *vsrc = (uint##bps##_t*)curr;                  \
> +        for (int j = 0; j < s->height; j++) {                        \
> +            for (int i = 0; i < s->width; i++) {                     \
> +                motion = 0;                                          \
> +                curr_index = j * stride + i;                         \
> +                prev_index = j * s->width + i;                       \
> +                curr_data = convert_full_range(s, vsrc[curr_index]); \
> +                if (s->nb_frames > 1)                                \
> +                    motion = curr_data - s->prev_frame[prev_index];  \
> +                s->prev_frame[prev_index] = curr_data;               \

previous code accessed this as uint8_t or uint16_t based on bps

Paul B Mahol Feb. 15, 2022, 9:10 a.m. UTC | #22

On Tue, Feb 15, 2022 at 9:55 AM Anton Khirnov <anton@khirnov.net> wrote:

> Quoting Thilo Borgmann (2022-02-12 11:55:39)
> > Am 31.01.22 um 12:55 schrieb James Almer:
> > +static int config_input(AVFilterLink *inlink)
> > +{
> > +    // Video input data avilable
> > +    AVFilterContext *ctx = inlink->dst;
> > +    SiTiContext *s = ctx->priv;
> > +    int max_pixsteps[4];
> > +    size_t pixel_sz;
> > +    size_t data_sz;
> > +    size_t gradient_sz;
> > +    size_t motion_sz;
> > +
> > +    const AVPixFmtDescriptor *desc =
> av_pix_fmt_desc_get(inlink->format);
> > +    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
> > +
> > +    s->pixel_depth = max_pixsteps[0];
> > +    s->width = inlink->w;
> > +    s->height = inlink->h;
> > +    pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t);
> > +    data_sz = s->width * pixel_sz * s->height;
> > +
> > +    s->prev_frame = av_malloc(data_sz);
> > +
> > +    gradient_sz = (s->width - 2) * sizeof(double) * (s->height - 2);
> > +    s->gradient_matrix = (double*)av_malloc(gradient_sz);
> > +
> > +    motion_sz = s->width * sizeof(double) * s->height;
> > +    s->motion_matrix = (double*)av_malloc(motion_sz);
>
> useless casts
>
> > +
> > +    if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) {
> > +        av_freep(&s->prev_frame);
> > +        av_freep(&s->gradient_matrix);
> > +        av_freep(&s->motion_matrix);
>
> You don't need to free them on failure, that will be done in uninit. But
> you should free them at the beginning of this function, because
> config_input can be called multiple times.
>

In which scenarios? When its called, uninit is also called before.
Was thinking to add better support for frame w/h changes when midstream
filtering....


>
> > +// Applies sobel convolution
> > +static void convolve_sobel(SiTiContext *s, const uint8_t *src, double
> *dst, int linesize)
> > +{
> > +    double x_conv_sum;
> > +    double y_conv_sum;
> > +    double gradient;
> > +    int ki;
> > +    int kj;
> > +    int index;
> > +    uint16_t data;
> > +    int filter_width = 3;
> > +    int filter_size = filter_width * filter_width;
> > +    int stride = linesize / s->pixel_depth;
> > +
> > +    // Dst matrix is smaller than src since we ignore edges that can't
> be convolved
> > +    #define CONVOLVE(bps)                                           \
> > +    {                                                               \
> > +        uint##bps##_t *vsrc = (uint##bps##_t*)src;                  \
> > +        for (int j = 1; j < s->height - 1; j++) {                   \
> > +            for (int i = 1; i < s->width - 1; i++) {                \
> > +                x_conv_sum = 0.0;                                   \
> > +                y_conv_sum = 0.0;                                   \
> > +                for (int k = 0; k < filter_size; k++) {             \
> > +                    ki = k % filter_width - 1;                      \
> > +                    kj = floor(k / filter_width) - 1;               \
> > +                    index = (j + kj) * stride + (i + ki);           \
> > +                    data = convert_full_range(s, vsrc[index]);      \
>
> Pass bps as a parameter to convert_full_range() instead of accessing
> s->pixel_depth, so the compiler can optimize the branch away.
>
> > +// Calculate pixel difference between current and previous frame, and
> update previous
> > +static void calculate_motion(SiTiContext *s, const uint8_t *curr,
> > +                             double *motion_matrix, int linesize)
> > +{
> > +    int stride = linesize / s->pixel_depth;
> > +    double motion;
> > +    int curr_index;
> > +    int prev_index;
> > +    uint16_t curr_data;
> > +
> > +    // Previous frame is already converted to full range
> > +    #define CALCULATE(bps)                                           \
> > +    {                                                                \
> > +        uint##bps##_t *vsrc = (uint##bps##_t*)curr;                  \
> > +        for (int j = 0; j < s->height; j++) {                        \
> > +            for (int i = 0; i < s->width; i++) {                     \
> > +                motion = 0;                                          \
> > +                curr_index = j * stride + i;                         \
> > +                prev_index = j * s->width + i;                       \
> > +                curr_data = convert_full_range(s, vsrc[curr_index]); \
> > +                if (s->nb_frames > 1)                                \
> > +                    motion = curr_data - s->prev_frame[prev_index];  \
> > +                s->prev_frame[prev_index] = curr_data;               \
>
> previous code accessed this as uint8_t or uint16_t based on bps
>
> --
> Anton Khirnov
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>

Anton Khirnov Feb. 15, 2022, 9:19 a.m. UTC | #23

Quoting Paul B Mahol (2022-02-15 10:10:17)
> On Tue, Feb 15, 2022 at 9:55 AM Anton Khirnov <anton@khirnov.net> wrote:
> 
> > Quoting Thilo Borgmann (2022-02-12 11:55:39)
> > > Am 31.01.22 um 12:55 schrieb James Almer:
> > > +static int config_input(AVFilterLink *inlink)
> > > +{
> > > +    // Video input data avilable
> > > +    AVFilterContext *ctx = inlink->dst;
> > > +    SiTiContext *s = ctx->priv;
> > > +    int max_pixsteps[4];
> > > +    size_t pixel_sz;
> > > +    size_t data_sz;
> > > +    size_t gradient_sz;
> > > +    size_t motion_sz;
> > > +
> > > +    const AVPixFmtDescriptor *desc =
> > av_pix_fmt_desc_get(inlink->format);
> > > +    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
> > > +
> > > +    s->pixel_depth = max_pixsteps[0];
> > > +    s->width = inlink->w;
> > > +    s->height = inlink->h;
> > > +    pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t);
> > > +    data_sz = s->width * pixel_sz * s->height;
> > > +
> > > +    s->prev_frame = av_malloc(data_sz);
> > > +
> > > +    gradient_sz = (s->width - 2) * sizeof(double) * (s->height - 2);
> > > +    s->gradient_matrix = (double*)av_malloc(gradient_sz);
> > > +
> > > +    motion_sz = s->width * sizeof(double) * s->height;
> > > +    s->motion_matrix = (double*)av_malloc(motion_sz);
> >
> > useless casts
> >
> > > +
> > > +    if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) {
> > > +        av_freep(&s->prev_frame);
> > > +        av_freep(&s->gradient_matrix);
> > > +        av_freep(&s->motion_matrix);
> >
> > You don't need to free them on failure, that will be done in uninit. But
> > you should free them at the beginning of this function, because
> > config_input can be called multiple times.
> >
> 
> In which scenarios? When its called, uninit is also called before.
> Was thinking to add better support for frame w/h changes when midstream
> filtering....

Actually I'm not sure it currently will be called more than once, but
IIRC the code was supposed to assume it, precisely for future parameter
change support.

Paul B Mahol Feb. 18, 2022, 4:08 p.m. UTC | #24

On Sat, Feb 12, 2022 at 11:55 AM Thilo Borgmann <thilo.borgmann@mail.de>
wrote:

> Am 31.01.22 um 12:55 schrieb James Almer:
> >
> >
> > On 1/31/2022 8:53 AM, Anton Khirnov wrote:
> >> Quoting Thilo Borgmann (2022-01-18 14:58:07)
> >>>>> Violations of code style.
> >>>
> >>> Enhanced.
> >>
> >> Not enough. There are still many remaining, e.g.
> >> * opening brace of a function definition should be on its own line
> >> * the context should generally be the first argument
> >> * unsigned char* should be uint8_t*
> >> * mixed declarations and code (the compiler should warn about that)
> >
> > I think someone said that clang (or some versions) is apparently not
> warning about this, hence why so many of these end up being missed in
> reviews or even by the patch author.
>
> This and all of Anton's comments in v3. Also removed some more obviously
> useless doubles.
>

Why it uses doubles in so many places?
Is there any real benefit in that, except extra slowdown?



>
> Thanks,
> Thilo_______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>

Thilo Borgmann Feb. 22, 2022, 11:26 a.m. UTC | #25

Am 15.02.22 um 09:54 schrieb Anton Khirnov:
> Quoting Thilo Borgmann (2022-02-12 11:55:39)
>> Am 31.01.22 um 12:55 schrieb James Almer:
>> +static int config_input(AVFilterLink *inlink)
>> +{
>> +    // Video input data avilable
>> +    AVFilterContext *ctx = inlink->dst;
>> +    SiTiContext *s = ctx->priv;
>> +    int max_pixsteps[4];
>> +    size_t pixel_sz;
>> +    size_t data_sz;
>> +    size_t gradient_sz;
>> +    size_t motion_sz;
>> +
>> +    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
>> +    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
>> +
>> +    s->pixel_depth = max_pixsteps[0];
>> +    s->width = inlink->w;
>> +    s->height = inlink->h;
>> +    pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t);
>> +    data_sz = s->width * pixel_sz * s->height;
>> +
>> +    s->prev_frame = av_malloc(data_sz);
>> +
>> +    gradient_sz = (s->width - 2) * sizeof(double) * (s->height - 2);
>> +    s->gradient_matrix = (double*)av_malloc(gradient_sz);
>> +
>> +    motion_sz = s->width * sizeof(double) * s->height;
>> +    s->motion_matrix = (double*)av_malloc(motion_sz);
> 
> useless casts
> 
>> +
>> +    if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) {
>> +        av_freep(&s->prev_frame);
>> +        av_freep(&s->gradient_matrix);
>> +        av_freep(&s->motion_matrix);
> 
> You don't need to free them on failure, that will be done in uninit. But
> you should free them at the beginning of this function, because
> config_input can be called multiple times.

Always freep'd and fail on alloc error.


>> +// Applies sobel convolution
>> +static void convolve_sobel(SiTiContext *s, const uint8_t *src, double *dst, int linesize)
>> +{
>> +    double x_conv_sum;
>> +    double y_conv_sum;
>> +    double gradient;
>> +    int ki;
>> +    int kj;
>> +    int index;
>> +    uint16_t data;
>> +    int filter_width = 3;
>> +    int filter_size = filter_width * filter_width;
>> +    int stride = linesize / s->pixel_depth;
>> +
>> +    // Dst matrix is smaller than src since we ignore edges that can't be convolved
>> +    #define CONVOLVE(bps)                                           \
>> +    {                                                               \
>> +        uint##bps##_t *vsrc = (uint##bps##_t*)src;                  \
>> +        for (int j = 1; j < s->height - 1; j++) {                   \
>> +            for (int i = 1; i < s->width - 1; i++) {                \
>> +                x_conv_sum = 0.0;                                   \
>> +                y_conv_sum = 0.0;                                   \
>> +                for (int k = 0; k < filter_size; k++) {             \
>> +                    ki = k % filter_width - 1;                      \
>> +                    kj = floor(k / filter_width) - 1;               \
>> +                    index = (j + kj) * stride + (i + ki);           \
>> +                    data = convert_full_range(s, vsrc[index]);      \
> 
> Pass bps as a parameter to convert_full_range() instead of accessing
> s->pixel_depth, so the compiler can optimize the branch away.

I am not sure if the changes I did here suit the optimization you had in mind... pls check if v4 does this right.


> 
>> +// Calculate pixel difference between current and previous frame, and update previous
>> +static void calculate_motion(SiTiContext *s, const uint8_t *curr,
>> +                             double *motion_matrix, int linesize)
>> +{
>> +    int stride = linesize / s->pixel_depth;
>> +    double motion;
>> +    int curr_index;
>> +    int prev_index;
>> +    uint16_t curr_data;
>> +
>> +    // Previous frame is already converted to full range
>> +    #define CALCULATE(bps)                                           \
>> +    {                                                                \
>> +        uint##bps##_t *vsrc = (uint##bps##_t*)curr;                  \
>> +        for (int j = 0; j < s->height; j++) {                        \
>> +            for (int i = 0; i < s->width; i++) {                     \
>> +                motion = 0;                                          \
>> +                curr_index = j * stride + i;                         \
>> +                prev_index = j * s->width + i;                       \
>> +                curr_data = convert_full_range(s, vsrc[curr_index]); \
>> +                if (s->nb_frames > 1)                                \
>> +                    motion = curr_data - s->prev_frame[prev_index];  \
>> +                s->prev_frame[prev_index] = curr_data;               \
> 
> previous code accessed this as uint8_t or uint16_t based on bps
> 

Fixed in v4. Attached.

Thanks,
Thilo
From 8f2c410dbfc7e8651c6654ce57efe144dba62592 Mon Sep 17 00:00:00 2001
From: Boris Baracaldo <borbarak@fb.com>
Date: Tue, 22 Feb 2022 12:23:07 +0100
Subject: [PATCH v4] lavfilter: Add SITI filter

Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
in ITU-T P.910: Subjective video quality assessment methods for multimedia
applications.
---
 Changelog                |   1 +
 doc/filters.texi         |  23 +++
 libavfilter/Makefile     |   1 +
 libavfilter/allfilters.c |   1 +
 libavfilter/version.h    |   2 +-
 libavfilter/vf_siti.c    | 348 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 375 insertions(+), 1 deletion(-)
 create mode 100644 libavfilter/vf_siti.c

diff --git a/Changelog b/Changelog
index 3dde3326be..dcb2c368d2 100644
--- a/Changelog
+++ b/Changelog
@@ -132,6 +132,7 @@ version 4.4:
 - msad video filter
 - gophers protocol
 - RIST protocol via librist
+- siti filter
 
 
 version 4.3:
diff --git a/doc/filters.texi b/doc/filters.texi
index 05d4b1a56e..f8a8a2cb72 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -19875,6 +19875,29 @@ ffmpeg -i input1.mkv -i input2.mkv -filter_complex "[0:v][1:v] signature=nb_inpu
 
 @end itemize
 
+@anchor{siti}
+@section siti
+
+Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
+in ITU-T P.910: Subjective video quality assessment methods for multimedia
+applications. Available PDF at @url{https://www.itu.int/rec/T-REC-P.910-199909-S/en }.
+
+It accepts the following option:
+
+@table @option
+@item print_summary
+If set to 1, Summary statistics will be printed to the console. Default 0.
+@end table
+
+@subsection Examples
+@itemize
+@item
+To calculate SI/TI metrics and print summary:
+@example
+ffmpeg -i input.mp4 -vf siti=print_summary=1 -f null -
+@end example
+@end itemize
+
 @anchor{smartblur}
 @section smartblur
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 1adbea75bd..3261d05311 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -454,6 +454,7 @@ OBJS-$(CONFIG_SMARTBLUR_FILTER)              += vf_smartblur.o
 OBJS-$(CONFIG_SOBEL_FILTER)                  += vf_convolution.o
 OBJS-$(CONFIG_SOBEL_OPENCL_FILTER)           += vf_convolution_opencl.o opencl.o \
                                                 opencl/convolution.o
+OBJS-$(CONFIG_SITI_FILTER)                   += vf_siti.o
 OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o
 OBJS-$(CONFIG_SPP_FILTER)                    += vf_spp.o qp_table.o
 OBJS-$(CONFIG_SR_FILTER)                     += vf_sr.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 4325a3e557..808c172b28 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -430,6 +430,7 @@ extern const AVFilter ff_vf_shuffleplanes;
 extern const AVFilter ff_vf_sidedata;
 extern const AVFilter ff_vf_signalstats;
 extern const AVFilter ff_vf_signature;
+extern const AVFilter ff_vf_siti;
 extern const AVFilter ff_vf_smartblur;
 extern const AVFilter ff_vf_sobel;
 extern const AVFilter ff_vf_sobel_opencl;
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 1a9849ef82..89714bce84 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,7 +30,7 @@
 #include "libavutil/version.h"
 
 #define LIBAVFILTER_VERSION_MAJOR   8
-#define LIBAVFILTER_VERSION_MINOR  25
+#define LIBAVFILTER_VERSION_MINOR  26
 #define LIBAVFILTER_VERSION_MICRO 100
 
 
diff --git a/libavfilter/vf_siti.c b/libavfilter/vf_siti.c
new file mode 100644
index 0000000000..bae6ae3a24
--- /dev/null
+++ b/libavfilter/vf_siti.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2021 Boris Baracaldo
+ * Copyright (c) 2022 Thilo Borgmann 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/**
+ * @file
+ * Calculate Spatial Info (SI) and Temporal Info (TI) scores
+ */
+
+#include <math.h>
+
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+static const int X_FILTER[9] = {
+    1, 0, -1,
+    2, 0, -2,
+    1, 0, -1
+};
+
+static const int Y_FILTER[9] = {
+    1, 2, 1,
+    0, 0, 0,
+    -1, -2, -1
+};
+
+typedef struct SiTiContext {
+    const AVClass *class;
+    int pixel_depth;
+    int width, height;
+    uint64_t nb_frames;
+    uint8_t *prev_frame;
+    float max_si;
+    float max_ti;
+    float min_si;
+    float min_ti;
+    float sum_si;
+    float sum_ti;
+    float *gradient_matrix;
+    float *motion_matrix;
+    int full_range;
+    int print_summary;
+} SiTiContext;
+
+static const enum AVPixelFormat pix_fmts[] = {
+    AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+    AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+    AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10,
+    AV_PIX_FMT_NONE
+};
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    // User options but no input data
+    SiTiContext *s = ctx->priv;
+    s->max_si = 0;
+    s->max_ti = 0;
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    SiTiContext *s = ctx->priv;
+
+    if (s->print_summary) {
+        float avg_si = s->sum_si / s->nb_frames;
+        float avg_ti = s->sum_ti / s->nb_frames;
+        av_log(ctx, AV_LOG_INFO,
+               "SITI Summary:\nTotal frames: %"PRId64"\n\n"
+               "Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n"
+               "Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n",
+               s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti
+        );
+    }
+
+    av_freep(&s->prev_frame);
+    av_freep(&s->gradient_matrix);
+    av_freep(&s->motion_matrix);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    // Video input data avilable
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    int max_pixsteps[4];
+    size_t pixel_sz;
+    size_t data_sz;
+    size_t gradient_sz;
+    size_t motion_sz;
+
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
+
+    // free previous buffers in case they are allocated already
+    av_freep(&s->prev_frame);
+    av_freep(&s->gradient_matrix);
+    av_freep(&s->motion_matrix);
+
+    s->pixel_depth = max_pixsteps[0];
+    s->width = inlink->w;
+    s->height = inlink->h;
+    pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t);
+    data_sz = s->width * pixel_sz * s->height;
+
+    s->prev_frame = av_malloc(data_sz);
+
+    gradient_sz = (s->width - 2) * sizeof(float) * (s->height - 2);
+    s->gradient_matrix = (float*)av_malloc(gradient_sz);
+
+    motion_sz = s->width * sizeof(float) * s->height;
+    s->motion_matrix = (float*)av_malloc(motion_sz);
+
+    if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) {
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+// Determine whether the video is in full or limited range. If not defined, assume limited.
+static int is_full_range(AVFrame* frame)
+{
+    // If color range not specified, fallback to pixel format
+    if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
+        return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
+    return frame->color_range == AVCOL_RANGE_JPEG;
+}
+
+// Check frame's color range and convert to full range if needed
+static uint16_t convert_full_range(int factor, uint16_t y)
+{
+    int shift;
+    int limit_upper;
+    int full_upper;
+    int limit_y;
+
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    shift = 16 * factor;
+    limit_upper = 235 * factor - shift;
+    full_upper = 256 * factor - 1;
+    limit_y = fmin(fmax(y - shift, 0), limit_upper);
+    return (full_upper * limit_y / limit_upper);
+}
+
+// Applies sobel convolution
+static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int linesize)
+{
+    float x_conv_sum;
+    float y_conv_sum;
+    float gradient;
+    int ki;
+    int kj;
+    int index;
+    uint16_t data;
+    int filter_width = 3;
+    int filter_size = filter_width * filter_width;
+    int stride = linesize / s->pixel_depth;
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    int factor = s->pixel_depth == 1 ? 1 : 4;
+
+    // Dst matrix is smaller than src since we ignore edges that can't be convolved
+    #define CONVOLVE(bps)                                           \
+    {                                                               \
+        uint##bps##_t *vsrc = (uint##bps##_t*)src;                  \
+        for (int j = 1; j < s->height - 1; j++) {                   \
+            for (int i = 1; i < s->width - 1; i++) {                \
+                x_conv_sum = 0.0;                                   \
+                y_conv_sum = 0.0;                                   \
+                for (int k = 0; k < filter_size; k++) {             \
+                    ki = k % filter_width - 1;                      \
+                    kj = floor(k / filter_width) - 1;               \
+                    index = (j + kj) * stride + (i + ki);           \
+                    data = s->full_range ? vsrc[index] : convert_full_range(factor, vsrc[index]); \
+                    x_conv_sum += data * X_FILTER[k];               \
+                    y_conv_sum += data * Y_FILTER[k];               \
+                }                                                   \
+                gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum); \
+                dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient; \
+            }                                                       \
+        }                                                           \
+    }
+
+    if (s->pixel_depth == 2) {
+        CONVOLVE(16);
+    } else {
+        CONVOLVE(8);
+    }
+}
+
+// Calculate pixel difference between current and previous frame, and update previous
+static void calculate_motion(SiTiContext *s, const uint8_t *curr,
+                             float *motion_matrix, int linesize)
+{
+    int stride = linesize / s->pixel_depth;
+    float motion;
+    int curr_index;
+    int prev_index;
+    uint16_t curr_data;
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    int factor = s->pixel_depth == 1 ? 1 : 4;
+
+    // Previous frame is already converted to full range
+    #define CALCULATE(bps)                                           \
+    {                                                                \
+        uint##bps##_t *vsrc = (uint##bps##_t*)curr;                  \
+        uint##bps##_t *vdst = (uint##bps##_t*)s->prev_frame;         \
+        for (int j = 0; j < s->height; j++) {                        \
+            for (int i = 0; i < s->width; i++) {                     \
+                motion = 0;                                          \
+                curr_index = j * stride + i;                         \
+                prev_index = j * s->width + i;                       \
+                curr_data = s->full_range ? vsrc[curr_index] : convert_full_range(factor, vsrc[curr_index]); \
+                if (s->nb_frames > 1)                                \
+                    motion = curr_data - vdst[prev_index];           \
+                vdst[prev_index] = curr_data;                        \
+                motion_matrix[j * s->width + i] = motion;            \
+            }                                                        \
+        }                                                            \
+    }
+
+    if (s->pixel_depth == 2) {
+        CALCULATE(16);
+    } else {
+        CALCULATE(8);
+    }
+}
+
+static float std_deviation(float *img_metrics, int width, int height)
+{
+    int size = height * width;
+    float mean = 0.0;
+    float sqr_diff = 0;
+
+    for (int j = 0; j < height; j++)
+        for (int i = 0; i < width; i++)
+            mean += img_metrics[j * width + i];
+
+    mean /= size;
+
+    for (int j = 0; j < height; j++) {
+        for (int i = 0; i < width; i++) {
+            float mean_diff = img_metrics[j * width + i] - mean;
+            sqr_diff += (mean_diff * mean_diff);
+        }
+    }
+    sqr_diff = sqr_diff / size;
+    return sqrt(sqr_diff);
+}
+
+static void set_meta(AVDictionary **metadata, const char *key, float d)
+{
+    char value[128];
+    snprintf(value, sizeof(value), "%0.2f", d);
+    av_dict_set(metadata, key, value, 0);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    float si;
+    float ti;
+
+    s->full_range = is_full_range(frame);
+    s->nb_frames++;
+
+    // Calculate si and ti
+    convolve_sobel(s, frame->data[0], s->gradient_matrix, frame->linesize[0]);
+    calculate_motion(s, frame->data[0], s->motion_matrix, frame->linesize[0]);
+    si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2);
+    ti = std_deviation(s->motion_matrix, s->width, s->height);
+
+    // Calculate statistics
+    s->max_si  = fmax(si, s->max_si);
+    s->max_ti  = fmax(ti, s->max_ti);
+    s->sum_si += si;
+    s->sum_ti += ti;
+    s->min_si  = s->nb_frames == 1 ? si : fmin(si, s->min_si);
+    s->min_ti  = s->nb_frames == 1 ? ti : fmin(ti, s->min_ti);
+
+    // Set si ti information in frame metadata
+    set_meta(&frame->metadata, "lavfi.siti.si", si);
+    set_meta(&frame->metadata, "lavfi.siti.ti", ti);
+
+    return ff_filter_frame(inlink->dst->outputs[0], frame);
+}
+
+#define OFFSET(x) offsetof(SiTiContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption siti_options[] = {
+    { "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(siti);
+
+static const AVFilterPad avfilter_vf_siti_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+};
+
+static const AVFilterPad avfilter_vf_siti_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO
+    },
+};
+
+AVFilter ff_vf_siti = {
+    .name          = "siti",
+    .description   = NULL_IF_CONFIG_SMALL("Calculate spatial information (SI) and temporal information (TI)."),
+    .priv_size     = sizeof(SiTiContext),
+    .priv_class    = &siti_class,
+    .init          = init,
+    .uninit        = uninit,
+    FILTER_PIXFMTS_ARRAY(pix_fmts),
+    FILTER_INPUTS(avfilter_vf_siti_inputs),
+    FILTER_OUTPUTS(avfilter_vf_siti_outputs),
+};

Thilo Borgmann Feb. 22, 2022, 11:30 a.m. UTC | #26

Am 18.02.22 um 17:08 schrieb Paul B Mahol:
> On Sat, Feb 12, 2022 at 11:55 AM Thilo Borgmann <thilo.borgmann@mail.de>
> wrote:
> 
>> Am 31.01.22 um 12:55 schrieb James Almer:
>>>
>>>
>>> On 1/31/2022 8:53 AM, Anton Khirnov wrote:
>>>> Quoting Thilo Borgmann (2022-01-18 14:58:07)
>>>>>>> Violations of code style.
>>>>>
>>>>> Enhanced.
>>>>
>>>> Not enough. There are still many remaining, e.g.
>>>> * opening brace of a function definition should be on its own line
>>>> * the context should generally be the first argument
>>>> * unsigned char* should be uint8_t*
>>>> * mixed declarations and code (the compiler should warn about that)
>>>
>>> I think someone said that clang (or some versions) is apparently not
>> warning about this, hence why so many of these end up being missed in
>> reviews or even by the patch author.
>>
>> This and all of Anton's comments in v3. Also removed some more obviously
>> useless doubles.
>>
> 
> Why it uses doubles in so many places?
> Is there any real benefit in that, except extra slowdown?

I guess because it's originating in some c&p Matlab code.
I did %s#double#float#g for v4, loosing some precision we can ignore IMHO.



v3:

Total frames: 2

Spatial Information:
Average: 165.451985
Max: 165.817542
Min: 165.086427

Temporal Information:
Average: 1.007263
Max: 2.014525
Min: 0.000000



v4:

Total frames: 2

Spatial Information:
Average: 164.385895
Max: 164.742325
Min: 164.029480

Temporal Information:
Average: 1.007241
Max: 2.014483
Min: 0.000000

-Thilo

Thilo Borgmann March 6, 2022, 8:39 p.m. UTC | #27

Am 22.02.22 um 12:30 schrieb Thilo Borgmann:
> Am 18.02.22 um 17:08 schrieb Paul B Mahol:
>> On Sat, Feb 12, 2022 at 11:55 AM Thilo Borgmann <thilo.borgmann@mail.de>
>> wrote:
>>
>>> Am 31.01.22 um 12:55 schrieb James Almer:
>>>>
>>>>
>>>> On 1/31/2022 8:53 AM, Anton Khirnov wrote:
>>>>> Quoting Thilo Borgmann (2022-01-18 14:58:07)
>>>>>>>> Violations of code style.
>>>>>>
>>>>>> Enhanced.
>>>>>
>>>>> Not enough. There are still many remaining, e.g.
>>>>> * opening brace of a function definition should be on its own line
>>>>> * the context should generally be the first argument
>>>>> * unsigned char* should be uint8_t*
>>>>> * mixed declarations and code (the compiler should warn about that)
>>>>
>>>> I think someone said that clang (or some versions) is apparently not
>>> warning about this, hence why so many of these end up being missed in
>>> reviews or even by the patch author.
>>>
>>> This and all of Anton's comments in v3. Also removed some more obviously
>>> useless doubles.
>>>
>>
>> Why it uses doubles in so many places?
>> Is there any real benefit in that, except extra slowdown?
> 
> I guess because it's originating in some c&p Matlab code.
> I did %s#double#float#g for v4, loosing some precision we can ignore IMHO.
> 
> 
> 
> v3:
> 
> Total frames: 2
> 
> Spatial Information:
> Average: 165.451985
> Max: 165.817542
> Min: 165.086427
> 
> Temporal Information:
> Average: 1.007263
> Max: 2.014525
> Min: 0.000000
> 
> 
> 
> v4:
> 
> Total frames: 2
> 
> Spatial Information:
> Average: 164.385895
> Max: 164.742325
> Min: 164.029480
> 
> Temporal Information:
> Average: 1.007241
> Max: 2.014483
> Min: 0.000000
> 

Ping.

-Thilo

Paul B Mahol March 6, 2022, 9:25 p.m. UTC | #28

On 3/6/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
> Am 22.02.22 um 12:30 schrieb Thilo Borgmann:
>> Am 18.02.22 um 17:08 schrieb Paul B Mahol:
>>> On Sat, Feb 12, 2022 at 11:55 AM Thilo Borgmann <thilo.borgmann@mail.de>
>>> wrote:
>>>
>>>> Am 31.01.22 um 12:55 schrieb James Almer:
>>>>>
>>>>>
>>>>> On 1/31/2022 8:53 AM, Anton Khirnov wrote:
>>>>>> Quoting Thilo Borgmann (2022-01-18 14:58:07)
>>>>>>>>> Violations of code style.
>>>>>>>
>>>>>>> Enhanced.
>>>>>>
>>>>>> Not enough. There are still many remaining, e.g.
>>>>>> * opening brace of a function definition should be on its own line
>>>>>> * the context should generally be the first argument
>>>>>> * unsigned char* should be uint8_t*
>>>>>> * mixed declarations and code (the compiler should warn about that)
>>>>>
>>>>> I think someone said that clang (or some versions) is apparently not
>>>> warning about this, hence why so many of these end up being missed in
>>>> reviews or even by the patch author.
>>>>
>>>> This and all of Anton's comments in v3. Also removed some more
>>>> obviously
>>>> useless doubles.
>>>>
>>>
>>> Why it uses doubles in so many places?
>>> Is there any real benefit in that, except extra slowdown?
>>
>> I guess because it's originating in some c&p Matlab code.
>> I did %s#double#float#g for v4, loosing some precision we can ignore
>> IMHO.
>>
>>
>>
>> v3:
>>
>> Total frames: 2
>>
>> Spatial Information:
>> Average: 165.451985
>> Max: 165.817542
>> Min: 165.086427
>>
>> Temporal Information:
>> Average: 1.007263
>> Max: 2.014525
>> Min: 0.000000
>>
>>
>>
>> v4:
>>
>> Total frames: 2
>>
>> Spatial Information:
>> Average: 164.385895
>> Max: 164.742325
>> Min: 164.029480
>>
>> Temporal Information:
>> Average: 1.007241
>> Max: 2.014483
>> Min: 0.000000
>>
>
> Ping.

Into wrong section of changelog added entry.

Useless cast of allocation results.

Does filter changes pixels? If not, add metadata flag to appropriate place.

>
> -Thilo
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>

Thilo Borgmann March 7, 2022, 5:09 p.m. UTC | #29

Am 06.03.22 um 22:25 schrieb Paul B Mahol:
> On 3/6/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>> Am 22.02.22 um 12:30 schrieb Thilo Borgmann:
>>> Am 18.02.22 um 17:08 schrieb Paul B Mahol:
>>>> On Sat, Feb 12, 2022 at 11:55 AM Thilo Borgmann <thilo.borgmann@mail.de>
>>>> wrote:
>>>>
>>>>> Am 31.01.22 um 12:55 schrieb James Almer:
>>>>>>
>>>>>>
>>>>>> On 1/31/2022 8:53 AM, Anton Khirnov wrote:
>>>>>>> Quoting Thilo Borgmann (2022-01-18 14:58:07)
>>>>>>>>>> Violations of code style.
>>>>>>>>
>>>>>>>> Enhanced.
>>>>>>>
>>>>>>> Not enough. There are still many remaining, e.g.
>>>>>>> * opening brace of a function definition should be on its own line
>>>>>>> * the context should generally be the first argument
>>>>>>> * unsigned char* should be uint8_t*
>>>>>>> * mixed declarations and code (the compiler should warn about that)
>>>>>>
>>>>>> I think someone said that clang (or some versions) is apparently not
>>>>> warning about this, hence why so many of these end up being missed in
>>>>> reviews or even by the patch author.
>>>>>
>>>>> This and all of Anton's comments in v3. Also removed some more
>>>>> obviously
>>>>> useless doubles.
>>>>>
>>>>
>>>> Why it uses doubles in so many places?
>>>> Is there any real benefit in that, except extra slowdown?
>>>
>>> I guess because it's originating in some c&p Matlab code.
>>> I did %s#double#float#g for v4, loosing some precision we can ignore
>>> IMHO.
>>>
>>>
>>>
>>> v3:
>>>
>>> Total frames: 2
>>>
>>> Spatial Information:
>>> Average: 165.451985
>>> Max: 165.817542
>>> Min: 165.086427
>>>
>>> Temporal Information:
>>> Average: 1.007263
>>> Max: 2.014525
>>> Min: 0.000000
>>>
>>>
>>>
>>> v4:
>>>
>>> Total frames: 2
>>>
>>> Spatial Information:
>>> Average: 164.385895
>>> Max: 164.742325
>>> Min: 164.029480
>>>
>>> Temporal Information:
>>> Average: 1.007241
>>> Max: 2.014483
>>> Min: 0.000000
>>>
>>
>> Ping.
> 
> Into wrong section of changelog added entry.
> 
> Useless cast of allocation results.
> 
> Does filter changes pixels? If not, add metadata flag to appropriate place.

All addressed in v5, thx!

Also added a FATE test for it.

-Thilo
From efe6b29718761492c7b32242404dd078588137ee Mon Sep 17 00:00:00 2001
From: Thilo Borgmann <thilo.borgmann@mail.de>
Date: Mon, 7 Mar 2022 17:35:15 +0100
Subject: [PATCH v5] lavfilter: Add SITI filter

Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
in ITU-T P.910: Subjective video quality assessment methods for multimedia
applications.
---
 Changelog                             |   1 +
 doc/filters.texi                      |  23 ++
 libavfilter/Makefile                  |   1 +
 libavfilter/allfilters.c              |   1 +
 libavfilter/version.h                 |   2 +-
 libavfilter/vf_siti.c                 | 349 ++++++++++++++++++++++++++
 tests/fate-run.sh                     |   9 +
 tests/fate/filter-video.mak           |   3 +
 tests/ref/fate/filter-refcmp-siti-yuv |  15 ++
 9 files changed, 403 insertions(+), 1 deletion(-)
 create mode 100644 libavfilter/vf_siti.c
 create mode 100644 tests/ref/fate/filter-refcmp-siti-yuv

diff --git a/Changelog b/Changelog
index 3dde3326be..ffe320be9d 100644
--- a/Changelog
+++ b/Changelog
@@ -48,6 +48,7 @@ version 5.0:
 - VideoToolbox ProRes encoder
 - anlmf audio filter
 - IMF demuxer (experimental)
+- SITI filter
 
 
 version 4.4:
diff --git a/doc/filters.texi b/doc/filters.texi
index 05d4b1a56e..f8a8a2cb72 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -19875,6 +19875,29 @@ ffmpeg -i input1.mkv -i input2.mkv -filter_complex "[0:v][1:v] signature=nb_inpu
 
 @end itemize
 
+@anchor{siti}
+@section siti
+
+Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
+in ITU-T P.910: Subjective video quality assessment methods for multimedia
+applications. Available PDF at @url{https://www.itu.int/rec/T-REC-P.910-199909-S/en }.
+
+It accepts the following option:
+
+@table @option
+@item print_summary
+If set to 1, Summary statistics will be printed to the console. Default 0.
+@end table
+
+@subsection Examples
+@itemize
+@item
+To calculate SI/TI metrics and print summary:
+@example
+ffmpeg -i input.mp4 -vf siti=print_summary=1 -f null -
+@end example
+@end itemize
+
 @anchor{smartblur}
 @section smartblur
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 1adbea75bd..3261d05311 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -454,6 +454,7 @@ OBJS-$(CONFIG_SMARTBLUR_FILTER)              += vf_smartblur.o
 OBJS-$(CONFIG_SOBEL_FILTER)                  += vf_convolution.o
 OBJS-$(CONFIG_SOBEL_OPENCL_FILTER)           += vf_convolution_opencl.o opencl.o \
                                                 opencl/convolution.o
+OBJS-$(CONFIG_SITI_FILTER)                   += vf_siti.o
 OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o
 OBJS-$(CONFIG_SPP_FILTER)                    += vf_spp.o qp_table.o
 OBJS-$(CONFIG_SR_FILTER)                     += vf_sr.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 4325a3e557..808c172b28 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -430,6 +430,7 @@ extern const AVFilter ff_vf_shuffleplanes;
 extern const AVFilter ff_vf_sidedata;
 extern const AVFilter ff_vf_signalstats;
 extern const AVFilter ff_vf_signature;
+extern const AVFilter ff_vf_siti;
 extern const AVFilter ff_vf_smartblur;
 extern const AVFilter ff_vf_sobel;
 extern const AVFilter ff_vf_sobel_opencl;
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 1a9849ef82..89714bce84 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,7 +30,7 @@
 #include "libavutil/version.h"
 
 #define LIBAVFILTER_VERSION_MAJOR   8
-#define LIBAVFILTER_VERSION_MINOR  25
+#define LIBAVFILTER_VERSION_MINOR  26
 #define LIBAVFILTER_VERSION_MICRO 100
 
 
diff --git a/libavfilter/vf_siti.c b/libavfilter/vf_siti.c
new file mode 100644
index 0000000000..1ed137d23d
--- /dev/null
+++ b/libavfilter/vf_siti.c
@@ -0,0 +1,349 @@
+/*
+ * Copyright (c) 2021 Boris Baracaldo
+ * Copyright (c) 2022 Thilo Borgmann 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/**
+ * @file
+ * Calculate Spatial Info (SI) and Temporal Info (TI) scores
+ */
+
+#include <math.h>
+
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+static const int X_FILTER[9] = {
+    1, 0, -1,
+    2, 0, -2,
+    1, 0, -1
+};
+
+static const int Y_FILTER[9] = {
+    1, 2, 1,
+    0, 0, 0,
+    -1, -2, -1
+};
+
+typedef struct SiTiContext {
+    const AVClass *class;
+    int pixel_depth;
+    int width, height;
+    uint64_t nb_frames;
+    uint8_t *prev_frame;
+    float max_si;
+    float max_ti;
+    float min_si;
+    float min_ti;
+    float sum_si;
+    float sum_ti;
+    float *gradient_matrix;
+    float *motion_matrix;
+    int full_range;
+    int print_summary;
+} SiTiContext;
+
+static const enum AVPixelFormat pix_fmts[] = {
+    AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+    AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+    AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10,
+    AV_PIX_FMT_NONE
+};
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    // User options but no input data
+    SiTiContext *s = ctx->priv;
+    s->max_si = 0;
+    s->max_ti = 0;
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    SiTiContext *s = ctx->priv;
+
+    if (s->print_summary) {
+        float avg_si = s->sum_si / s->nb_frames;
+        float avg_ti = s->sum_ti / s->nb_frames;
+        av_log(ctx, AV_LOG_INFO,
+               "SITI Summary:\nTotal frames: %"PRId64"\n\n"
+               "Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n"
+               "Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n",
+               s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti
+        );
+    }
+
+    av_freep(&s->prev_frame);
+    av_freep(&s->gradient_matrix);
+    av_freep(&s->motion_matrix);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    // Video input data avilable
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    int max_pixsteps[4];
+    size_t pixel_sz;
+    size_t data_sz;
+    size_t gradient_sz;
+    size_t motion_sz;
+
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
+
+    // free previous buffers in case they are allocated already
+    av_freep(&s->prev_frame);
+    av_freep(&s->gradient_matrix);
+    av_freep(&s->motion_matrix);
+
+    s->pixel_depth = max_pixsteps[0];
+    s->width = inlink->w;
+    s->height = inlink->h;
+    pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t);
+    data_sz = s->width * pixel_sz * s->height;
+
+    s->prev_frame = av_malloc(data_sz);
+
+    gradient_sz = (s->width - 2) * sizeof(float) * (s->height - 2);
+    s->gradient_matrix = av_malloc(gradient_sz);
+
+    motion_sz = s->width * sizeof(float) * s->height;
+    s->motion_matrix = av_malloc(motion_sz);
+
+    if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) {
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+// Determine whether the video is in full or limited range. If not defined, assume limited.
+static int is_full_range(AVFrame* frame)
+{
+    // If color range not specified, fallback to pixel format
+    if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
+        return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
+    return frame->color_range == AVCOL_RANGE_JPEG;
+}
+
+// Check frame's color range and convert to full range if needed
+static uint16_t convert_full_range(int factor, uint16_t y)
+{
+    int shift;
+    int limit_upper;
+    int full_upper;
+    int limit_y;
+
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    shift = 16 * factor;
+    limit_upper = 235 * factor - shift;
+    full_upper = 256 * factor - 1;
+    limit_y = fmin(fmax(y - shift, 0), limit_upper);
+    return (full_upper * limit_y / limit_upper);
+}
+
+// Applies sobel convolution
+static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int linesize)
+{
+    float x_conv_sum;
+    float y_conv_sum;
+    float gradient;
+    int ki;
+    int kj;
+    int index;
+    uint16_t data;
+    int filter_width = 3;
+    int filter_size = filter_width * filter_width;
+    int stride = linesize / s->pixel_depth;
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    int factor = s->pixel_depth == 1 ? 1 : 4;
+
+    // Dst matrix is smaller than src since we ignore edges that can't be convolved
+    #define CONVOLVE(bps)                                           \
+    {                                                               \
+        uint##bps##_t *vsrc = (uint##bps##_t*)src;                  \
+        for (int j = 1; j < s->height - 1; j++) {                   \
+            for (int i = 1; i < s->width - 1; i++) {                \
+                x_conv_sum = 0.0;                                   \
+                y_conv_sum = 0.0;                                   \
+                for (int k = 0; k < filter_size; k++) {             \
+                    ki = k % filter_width - 1;                      \
+                    kj = floor(k / filter_width) - 1;               \
+                    index = (j + kj) * stride + (i + ki);           \
+                    data = s->full_range ? vsrc[index] : convert_full_range(factor, vsrc[index]); \
+                    x_conv_sum += data * X_FILTER[k];               \
+                    y_conv_sum += data * Y_FILTER[k];               \
+                }                                                   \
+                gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum); \
+                dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient; \
+            }                                                       \
+        }                                                           \
+    }
+
+    if (s->pixel_depth == 2) {
+        CONVOLVE(16);
+    } else {
+        CONVOLVE(8);
+    }
+}
+
+// Calculate pixel difference between current and previous frame, and update previous
+static void calculate_motion(SiTiContext *s, const uint8_t *curr,
+                             float *motion_matrix, int linesize)
+{
+    int stride = linesize / s->pixel_depth;
+    float motion;
+    int curr_index;
+    int prev_index;
+    uint16_t curr_data;
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    int factor = s->pixel_depth == 1 ? 1 : 4;
+
+    // Previous frame is already converted to full range
+    #define CALCULATE(bps)                                           \
+    {                                                                \
+        uint##bps##_t *vsrc = (uint##bps##_t*)curr;                  \
+        uint##bps##_t *vdst = (uint##bps##_t*)s->prev_frame;         \
+        for (int j = 0; j < s->height; j++) {                        \
+            for (int i = 0; i < s->width; i++) {                     \
+                motion = 0;                                          \
+                curr_index = j * stride + i;                         \
+                prev_index = j * s->width + i;                       \
+                curr_data = s->full_range ? vsrc[curr_index] : convert_full_range(factor, vsrc[curr_index]); \
+                if (s->nb_frames > 1)                                \
+                    motion = curr_data - vdst[prev_index];           \
+                vdst[prev_index] = curr_data;                        \
+                motion_matrix[j * s->width + i] = motion;            \
+            }                                                        \
+        }                                                            \
+    }
+
+    if (s->pixel_depth == 2) {
+        CALCULATE(16);
+    } else {
+        CALCULATE(8);
+    }
+}
+
+static float std_deviation(float *img_metrics, int width, int height)
+{
+    int size = height * width;
+    float mean = 0.0;
+    float sqr_diff = 0;
+
+    for (int j = 0; j < height; j++)
+        for (int i = 0; i < width; i++)
+            mean += img_metrics[j * width + i];
+
+    mean /= size;
+
+    for (int j = 0; j < height; j++) {
+        for (int i = 0; i < width; i++) {
+            float mean_diff = img_metrics[j * width + i] - mean;
+            sqr_diff += (mean_diff * mean_diff);
+        }
+    }
+    sqr_diff = sqr_diff / size;
+    return sqrt(sqr_diff);
+}
+
+static void set_meta(AVDictionary **metadata, const char *key, float d)
+{
+    char value[128];
+    snprintf(value, sizeof(value), "%0.2f", d);
+    av_dict_set(metadata, key, value, 0);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    float si;
+    float ti;
+
+    s->full_range = is_full_range(frame);
+    s->nb_frames++;
+
+    // Calculate si and ti
+    convolve_sobel(s, frame->data[0], s->gradient_matrix, frame->linesize[0]);
+    calculate_motion(s, frame->data[0], s->motion_matrix, frame->linesize[0]);
+    si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2);
+    ti = std_deviation(s->motion_matrix, s->width, s->height);
+
+    // Calculate statistics
+    s->max_si  = fmax(si, s->max_si);
+    s->max_ti  = fmax(ti, s->max_ti);
+    s->sum_si += si;
+    s->sum_ti += ti;
+    s->min_si  = s->nb_frames == 1 ? si : fmin(si, s->min_si);
+    s->min_ti  = s->nb_frames == 1 ? ti : fmin(ti, s->min_ti);
+
+    // Set si ti information in frame metadata
+    set_meta(&frame->metadata, "lavfi.siti.si", si);
+    set_meta(&frame->metadata, "lavfi.siti.ti", ti);
+
+    return ff_filter_frame(inlink->dst->outputs[0], frame);
+}
+
+#define OFFSET(x) offsetof(SiTiContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption siti_options[] = {
+    { "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(siti);
+
+static const AVFilterPad avfilter_vf_siti_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+};
+
+static const AVFilterPad avfilter_vf_siti_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO
+    },
+};
+
+AVFilter ff_vf_siti = {
+    .name          = "siti",
+    .description   = NULL_IF_CONFIG_SMALL("Calculate spatial information (SI) and temporal information (TI)."),
+    .priv_size     = sizeof(SiTiContext),
+    .priv_class    = &siti_class,
+    .init          = init,
+    .uninit        = uninit,
+    .flags         = AVFILTER_FLAG_METADATA_ONLY,
+    FILTER_PIXFMTS_ARRAY(pix_fmts),
+    FILTER_INPUTS(avfilter_vf_siti_inputs),
+    FILTER_OUTPUTS(avfilter_vf_siti_outputs),
+};
diff --git a/tests/fate-run.sh b/tests/fate-run.sh
index fbfc0a925d..a3be44d0eb 100755
--- a/tests/fate-run.sh
+++ b/tests/fate-run.sh
@@ -382,6 +382,15 @@ refcmp_metadata(){
         -f null /dev/null | awk -v ref=${ref} -v fuzz=${fuzz} -f ${base}/refcmp-metadata.awk -
 }
 
+cmp_metadata(){
+    refcmp=$1
+    pixfmt=$2
+    fuzz=${3:-0.001}
+    ffmpeg $FLAGS $ENC_OPTS \
+        -lavfi "testsrc2=size=300x200:rate=1:duration=5,format=${pixfmt},${refcmp},metadata=print:file=-" \
+        -f null /dev/null | awk -v ref=${ref} -v fuzz=${fuzz} -f ${base}/refcmp-metadata.awk -
+}
+
 pixfmt_conversion(){
     conversion="${test#pixfmt-}"
     outdir="tests/data/pixfmt"
diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
index 7df79c70e9..78191e8387 100644
--- a/tests/fate/filter-video.mak
+++ b/tests/fate/filter-video.mak
@@ -859,6 +859,9 @@ fate-filter-refcmp-ssim-rgb: CMD = refcmp_metadata ssim rgb24 0.015
 FATE_FILTER-$(call ALLYES, $(REFCMP_DEPS) SSIM_FILTER) += fate-filter-refcmp-ssim-yuv
 fate-filter-refcmp-ssim-yuv: CMD = refcmp_metadata ssim yuv422p 0.015
 
+FATE_FILTER-$(call ALLYES, $(REFCMP_DEPS) SITI_FILTER) += fate-filter-refcmp-siti-yuv
+fate-filter-refcmp-siti-yuv: CMD = cmp_metadata siti yuv420p 0.015
+
 FATE_SAMPLES_FFPROBE += $(FATE_METADATA_FILTER-yes)
 FATE_SAMPLES_FFMPEG += $(FATE_FILTER_SAMPLES-yes)
 FATE_FFMPEG += $(FATE_FILTER-yes)
diff --git a/tests/ref/fate/filter-refcmp-siti-yuv b/tests/ref/fate/filter-refcmp-siti-yuv
new file mode 100644
index 0000000000..5d99972d95
--- /dev/null
+++ b/tests/ref/fate/filter-refcmp-siti-yuv
@@ -0,0 +1,15 @@
+frame:0    pts:0       pts_time:0
+lavfi.siti.si=106.72
+lavfi.siti.ti=0.00
+frame:1    pts:1       pts_time:1
+lavfi.siti.si=109.40
+lavfi.siti.ti=28.00
+frame:2    pts:2       pts_time:2
+lavfi.siti.si=109.29
+lavfi.siti.ti=28.38
+frame:3    pts:3       pts_time:3
+lavfi.siti.si=113.27
+lavfi.siti.ti=33.42
+frame:4    pts:4       pts_time:4
+lavfi.siti.si=110.87
+lavfi.siti.ti=30.53

Paul B Mahol March 7, 2022, 7:06 p.m. UTC | #30

On 3/7/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
> Am 06.03.22 um 22:25 schrieb Paul B Mahol:
>> On 3/6/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>> Am 22.02.22 um 12:30 schrieb Thilo Borgmann:
>>>> Am 18.02.22 um 17:08 schrieb Paul B Mahol:
>>>>> On Sat, Feb 12, 2022 at 11:55 AM Thilo Borgmann
>>>>> <thilo.borgmann@mail.de>
>>>>> wrote:
>>>>>
>>>>>> Am 31.01.22 um 12:55 schrieb James Almer:
>>>>>>>
>>>>>>>
>>>>>>> On 1/31/2022 8:53 AM, Anton Khirnov wrote:
>>>>>>>> Quoting Thilo Borgmann (2022-01-18 14:58:07)
>>>>>>>>>>> Violations of code style.
>>>>>>>>>
>>>>>>>>> Enhanced.
>>>>>>>>
>>>>>>>> Not enough. There are still many remaining, e.g.
>>>>>>>> * opening brace of a function definition should be on its own line
>>>>>>>> * the context should generally be the first argument
>>>>>>>> * unsigned char* should be uint8_t*
>>>>>>>> * mixed declarations and code (the compiler should warn about that)
>>>>>>>
>>>>>>> I think someone said that clang (or some versions) is apparently not
>>>>>> warning about this, hence why so many of these end up being missed in
>>>>>> reviews or even by the patch author.
>>>>>>
>>>>>> This and all of Anton's comments in v3. Also removed some more
>>>>>> obviously
>>>>>> useless doubles.
>>>>>>
>>>>>
>>>>> Why it uses doubles in so many places?
>>>>> Is there any real benefit in that, except extra slowdown?
>>>>
>>>> I guess because it's originating in some c&p Matlab code.
>>>> I did %s#double#float#g for v4, loosing some precision we can ignore
>>>> IMHO.
>>>>
>>>>
>>>>
>>>> v3:
>>>>
>>>> Total frames: 2
>>>>
>>>> Spatial Information:
>>>> Average: 165.451985
>>>> Max: 165.817542
>>>> Min: 165.086427
>>>>
>>>> Temporal Information:
>>>> Average: 1.007263
>>>> Max: 2.014525
>>>> Min: 0.000000
>>>>
>>>>
>>>>
>>>> v4:
>>>>
>>>> Total frames: 2
>>>>
>>>> Spatial Information:
>>>> Average: 164.385895
>>>> Max: 164.742325
>>>> Min: 164.029480
>>>>
>>>> Temporal Information:
>>>> Average: 1.007241
>>>> Max: 2.014483
>>>> Min: 0.000000
>>>>
>>>
>>> Ping.
>>
>> Into wrong section of changelog added entry.
>>
>> Useless cast of allocation results.
>>
>> Does filter changes pixels? If not, add metadata flag to appropriate
>> place.
>
> All addressed in v5, thx!
>

Changelog entry is still in wrong, 5.0, section.

> Also added a FATE test for it.
>
> -Thilo

Thilo Borgmann March 8, 2022, 12:25 p.m. UTC | #31

Am 07.03.22 um 20:06 schrieb Paul B Mahol:
> On 3/7/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>> Am 06.03.22 um 22:25 schrieb Paul B Mahol:
>>> On 3/6/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>> Am 22.02.22 um 12:30 schrieb Thilo Borgmann:
>>>>> Am 18.02.22 um 17:08 schrieb Paul B Mahol:
>>>>>> On Sat, Feb 12, 2022 at 11:55 AM Thilo Borgmann
>>>>>> <thilo.borgmann@mail.de>
>>>>>> wrote:
>>>>>>
>>>>>>> Am 31.01.22 um 12:55 schrieb James Almer:
>>>>>>>>
>>>>>>>>
>>>>>>>> On 1/31/2022 8:53 AM, Anton Khirnov wrote:
>>>>>>>>> Quoting Thilo Borgmann (2022-01-18 14:58:07)
>>>>>>>>>>>> Violations of code style.
>>>>>>>>>>
>>>>>>>>>> Enhanced.
>>>>>>>>>
>>>>>>>>> Not enough. There are still many remaining, e.g.
>>>>>>>>> * opening brace of a function definition should be on its own line
>>>>>>>>> * the context should generally be the first argument
>>>>>>>>> * unsigned char* should be uint8_t*
>>>>>>>>> * mixed declarations and code (the compiler should warn about that)
>>>>>>>>
>>>>>>>> I think someone said that clang (or some versions) is apparently not
>>>>>>> warning about this, hence why so many of these end up being missed in
>>>>>>> reviews or even by the patch author.
>>>>>>>
>>>>>>> This and all of Anton's comments in v3. Also removed some more
>>>>>>> obviously
>>>>>>> useless doubles.
>>>>>>>
>>>>>>
>>>>>> Why it uses doubles in so many places?
>>>>>> Is there any real benefit in that, except extra slowdown?
>>>>>
>>>>> I guess because it's originating in some c&p Matlab code.
>>>>> I did %s#double#float#g for v4, loosing some precision we can ignore
>>>>> IMHO.
>>>>>
>>>>>
>>>>>
>>>>> v3:
>>>>>
>>>>> Total frames: 2
>>>>>
>>>>> Spatial Information:
>>>>> Average: 165.451985
>>>>> Max: 165.817542
>>>>> Min: 165.086427
>>>>>
>>>>> Temporal Information:
>>>>> Average: 1.007263
>>>>> Max: 2.014525
>>>>> Min: 0.000000
>>>>>
>>>>>
>>>>>
>>>>> v4:
>>>>>
>>>>> Total frames: 2
>>>>>
>>>>> Spatial Information:
>>>>> Average: 164.385895
>>>>> Max: 164.742325
>>>>> Min: 164.029480
>>>>>
>>>>> Temporal Information:
>>>>> Average: 1.007241
>>>>> Max: 2.014483
>>>>> Min: 0.000000
>>>>>
>>>>
>>>> Ping.
>>>
>>> Into wrong section of changelog added entry.
>>>
>>> Useless cast of allocation results.
>>>
>>> Does filter changes pixels? If not, add metadata flag to appropriate
>>> place.
>>
>> All addressed in v5, thx!
>>
> 
> Changelog entry is still in wrong, 5.0, section.

Fixed in v6.

>> Also added a FATE test for it.


-Thilo
From e47e552344866fdba6fa3b1064e218e2447f7a26 Mon Sep 17 00:00:00 2001
From: Boris Baracaldo <borbarak@fb.com>
Date: Tue, 8 Mar 2022 13:23:58 +0100
Subject: [PATCH v6] lavfilter: Add SITI filter

Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
in ITU-T P.910: Subjective video quality assessment methods for multimedia
applications.
---
 Changelog                             |   1 +
 doc/filters.texi                      |  23 ++
 libavfilter/Makefile                  |   1 +
 libavfilter/allfilters.c              |   1 +
 libavfilter/version.h                 |   2 +-
 libavfilter/vf_siti.c                 | 349 ++++++++++++++++++++++++++
 tests/fate-run.sh                     |   9 +
 tests/fate/filter-video.mak           |   3 +
 tests/ref/fate/filter-refcmp-siti-yuv |  15 ++
 9 files changed, 403 insertions(+), 1 deletion(-)
 create mode 100644 libavfilter/vf_siti.c
 create mode 100644 tests/ref/fate/filter-refcmp-siti-yuv

diff --git a/Changelog b/Changelog
index 3af8aa032b..200bd82680 100644
--- a/Changelog
+++ b/Changelog
@@ -5,6 +5,7 @@ version 5.1:
 - dialogue enhance audio filter
 - dropped obsolete XvMC hwaccel
 - pcm-bluray encoder
+- SITI filter
 
 
 version 5.0:
diff --git a/doc/filters.texi b/doc/filters.texi
index 26c5b4db48..9f50fd899f 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -19944,6 +19944,29 @@ ffmpeg -i input1.mkv -i input2.mkv -filter_complex "[0:v][1:v] signature=nb_inpu
 
 @end itemize
 
+@anchor{siti}
+@section siti
+
+Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
+in ITU-T P.910: Subjective video quality assessment methods for multimedia
+applications. Available PDF at @url{https://www.itu.int/rec/T-REC-P.910-199909-S/en }.
+
+It accepts the following option:
+
+@table @option
+@item print_summary
+If set to 1, Summary statistics will be printed to the console. Default 0.
+@end table
+
+@subsection Examples
+@itemize
+@item
+To calculate SI/TI metrics and print summary:
+@example
+ffmpeg -i input.mp4 -vf siti=print_summary=1 -f null -
+@end example
+@end itemize
+
 @anchor{smartblur}
 @section smartblur
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 56d33e6480..43653597d8 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -456,6 +456,7 @@ OBJS-$(CONFIG_SMARTBLUR_FILTER)              += vf_smartblur.o
 OBJS-$(CONFIG_SOBEL_FILTER)                  += vf_convolution.o
 OBJS-$(CONFIG_SOBEL_OPENCL_FILTER)           += vf_convolution_opencl.o opencl.o \
                                                 opencl/convolution.o
+OBJS-$(CONFIG_SITI_FILTER)                   += vf_siti.o
 OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o
 OBJS-$(CONFIG_SPP_FILTER)                    += vf_spp.o qp_table.o
 OBJS-$(CONFIG_SR_FILTER)                     += vf_sr.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index f5caee3a62..9fbaaacf47 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -432,6 +432,7 @@ extern const AVFilter ff_vf_shuffleplanes;
 extern const AVFilter ff_vf_sidedata;
 extern const AVFilter ff_vf_signalstats;
 extern const AVFilter ff_vf_signature;
+extern const AVFilter ff_vf_siti;
 extern const AVFilter ff_vf_smartblur;
 extern const AVFilter ff_vf_sobel;
 extern const AVFilter ff_vf_sobel_opencl;
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 9a890c014f..95dd64d5b5 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,7 +30,7 @@
 #include "libavutil/version.h"
 
 #define LIBAVFILTER_VERSION_MAJOR   8
-#define LIBAVFILTER_VERSION_MINOR  27
+#define LIBAVFILTER_VERSION_MINOR  28
 #define LIBAVFILTER_VERSION_MICRO 100
 
 
diff --git a/libavfilter/vf_siti.c b/libavfilter/vf_siti.c
new file mode 100644
index 0000000000..531c940ae3
--- /dev/null
+++ b/libavfilter/vf_siti.c
@@ -0,0 +1,349 @@
+/*
+ * Copyright (c) 2021 Boris Baracaldo
+ * Copyright (c) 2022 Thilo Borgmann
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/**
+ * @file
+ * Calculate Spatial Info (SI) and Temporal Info (TI) scores
+ */
+
+#include <math.h>
+
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+static const int X_FILTER[9] = {
+    1, 0, -1,
+    2, 0, -2,
+    1, 0, -1
+};
+
+static const int Y_FILTER[9] = {
+    1, 2, 1,
+    0, 0, 0,
+    -1, -2, -1
+};
+
+typedef struct SiTiContext {
+    const AVClass *class;
+    int pixel_depth;
+    int width, height;
+    uint64_t nb_frames;
+    uint8_t *prev_frame;
+    float max_si;
+    float max_ti;
+    float min_si;
+    float min_ti;
+    float sum_si;
+    float sum_ti;
+    float *gradient_matrix;
+    float *motion_matrix;
+    int full_range;
+    int print_summary;
+} SiTiContext;
+
+static const enum AVPixelFormat pix_fmts[] = {
+    AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+    AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+    AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10,
+    AV_PIX_FMT_NONE
+};
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    // User options but no input data
+    SiTiContext *s = ctx->priv;
+    s->max_si = 0;
+    s->max_ti = 0;
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    SiTiContext *s = ctx->priv;
+
+    if (s->print_summary) {
+        float avg_si = s->sum_si / s->nb_frames;
+        float avg_ti = s->sum_ti / s->nb_frames;
+        av_log(ctx, AV_LOG_INFO,
+               "SITI Summary:\nTotal frames: %"PRId64"\n\n"
+               "Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n"
+               "Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n",
+               s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti
+        );
+    }
+
+    av_freep(&s->prev_frame);
+    av_freep(&s->gradient_matrix);
+    av_freep(&s->motion_matrix);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    // Video input data avilable
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    int max_pixsteps[4];
+    size_t pixel_sz;
+    size_t data_sz;
+    size_t gradient_sz;
+    size_t motion_sz;
+
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
+
+    // free previous buffers in case they are allocated already
+    av_freep(&s->prev_frame);
+    av_freep(&s->gradient_matrix);
+    av_freep(&s->motion_matrix);
+
+    s->pixel_depth = max_pixsteps[0];
+    s->width = inlink->w;
+    s->height = inlink->h;
+    pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t);
+    data_sz = s->width * pixel_sz * s->height;
+
+    s->prev_frame = av_malloc(data_sz);
+
+    gradient_sz = (s->width - 2) * sizeof(float) * (s->height - 2);
+    s->gradient_matrix = av_malloc(gradient_sz);
+
+    motion_sz = s->width * sizeof(float) * s->height;
+    s->motion_matrix = av_malloc(motion_sz);
+
+    if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) {
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+// Determine whether the video is in full or limited range. If not defined, assume limited.
+static int is_full_range(AVFrame* frame)
+{
+    // If color range not specified, fallback to pixel format
+    if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
+        return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
+    return frame->color_range == AVCOL_RANGE_JPEG;
+}
+
+// Check frame's color range and convert to full range if needed
+static uint16_t convert_full_range(int factor, uint16_t y)
+{
+    int shift;
+    int limit_upper;
+    int full_upper;
+    int limit_y;
+
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    shift = 16 * factor;
+    limit_upper = 235 * factor - shift;
+    full_upper = 256 * factor - 1;
+    limit_y = fmin(fmax(y - shift, 0), limit_upper);
+    return (full_upper * limit_y / limit_upper);
+}
+
+// Applies sobel convolution
+static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int linesize)
+{
+    float x_conv_sum;
+    float y_conv_sum;
+    float gradient;
+    int ki;
+    int kj;
+    int index;
+    uint16_t data;
+    int filter_width = 3;
+    int filter_size = filter_width * filter_width;
+    int stride = linesize / s->pixel_depth;
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    int factor = s->pixel_depth == 1 ? 1 : 4;
+
+    // Dst matrix is smaller than src since we ignore edges that can't be convolved
+    #define CONVOLVE(bps)                                           \
+    {                                                               \
+        uint##bps##_t *vsrc = (uint##bps##_t*)src;                  \
+        for (int j = 1; j < s->height - 1; j++) {                   \
+            for (int i = 1; i < s->width - 1; i++) {                \
+                x_conv_sum = 0.0;                                   \
+                y_conv_sum = 0.0;                                   \
+                for (int k = 0; k < filter_size; k++) {             \
+                    ki = k % filter_width - 1;                      \
+                    kj = floor(k / filter_width) - 1;               \
+                    index = (j + kj) * stride + (i + ki);           \
+                    data = s->full_range ? vsrc[index] : convert_full_range(factor, vsrc[index]); \
+                    x_conv_sum += data * X_FILTER[k];               \
+                    y_conv_sum += data * Y_FILTER[k];               \
+                }                                                   \
+                gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum); \
+                dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient; \
+            }                                                       \
+        }                                                           \
+    }
+
+    if (s->pixel_depth == 2) {
+        CONVOLVE(16);
+    } else {
+        CONVOLVE(8);
+    }
+}
+
+// Calculate pixel difference between current and previous frame, and update previous
+static void calculate_motion(SiTiContext *s, const uint8_t *curr,
+                             float *motion_matrix, int linesize)
+{
+    int stride = linesize / s->pixel_depth;
+    float motion;
+    int curr_index;
+    int prev_index;
+    uint16_t curr_data;
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    int factor = s->pixel_depth == 1 ? 1 : 4;
+
+    // Previous frame is already converted to full range
+    #define CALCULATE(bps)                                           \
+    {                                                                \
+        uint##bps##_t *vsrc = (uint##bps##_t*)curr;                  \
+        uint##bps##_t *vdst = (uint##bps##_t*)s->prev_frame;         \
+        for (int j = 0; j < s->height; j++) {                        \
+            for (int i = 0; i < s->width; i++) {                     \
+                motion = 0;                                          \
+                curr_index = j * stride + i;                         \
+                prev_index = j * s->width + i;                       \
+                curr_data = s->full_range ? vsrc[curr_index] : convert_full_range(factor, vsrc[curr_index]); \
+                if (s->nb_frames > 1)                                \
+                    motion = curr_data - vdst[prev_index];           \
+                vdst[prev_index] = curr_data;                        \
+                motion_matrix[j * s->width + i] = motion;            \
+            }                                                        \
+        }                                                            \
+    }
+
+    if (s->pixel_depth == 2) {
+        CALCULATE(16);
+    } else {
+        CALCULATE(8);
+    }
+}
+
+static float std_deviation(float *img_metrics, int width, int height)
+{
+    int size = height * width;
+    float mean = 0.0;
+    float sqr_diff = 0;
+
+    for (int j = 0; j < height; j++)
+        for (int i = 0; i < width; i++)
+            mean += img_metrics[j * width + i];
+
+    mean /= size;
+
+    for (int j = 0; j < height; j++) {
+        for (int i = 0; i < width; i++) {
+            float mean_diff = img_metrics[j * width + i] - mean;
+            sqr_diff += (mean_diff * mean_diff);
+        }
+    }
+    sqr_diff = sqr_diff / size;
+    return sqrt(sqr_diff);
+}
+
+static void set_meta(AVDictionary **metadata, const char *key, float d)
+{
+    char value[128];
+    snprintf(value, sizeof(value), "%0.2f", d);
+    av_dict_set(metadata, key, value, 0);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    float si;
+    float ti;
+
+    s->full_range = is_full_range(frame);
+    s->nb_frames++;
+
+    // Calculate si and ti
+    convolve_sobel(s, frame->data[0], s->gradient_matrix, frame->linesize[0]);
+    calculate_motion(s, frame->data[0], s->motion_matrix, frame->linesize[0]);
+    si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2);
+    ti = std_deviation(s->motion_matrix, s->width, s->height);
+
+    // Calculate statistics
+    s->max_si  = fmax(si, s->max_si);
+    s->max_ti  = fmax(ti, s->max_ti);
+    s->sum_si += si;
+    s->sum_ti += ti;
+    s->min_si  = s->nb_frames == 1 ? si : fmin(si, s->min_si);
+    s->min_ti  = s->nb_frames == 1 ? ti : fmin(ti, s->min_ti);
+
+    // Set si ti information in frame metadata
+    set_meta(&frame->metadata, "lavfi.siti.si", si);
+    set_meta(&frame->metadata, "lavfi.siti.ti", ti);
+
+    return ff_filter_frame(inlink->dst->outputs[0], frame);
+}
+
+#define OFFSET(x) offsetof(SiTiContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption siti_options[] = {
+    { "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(siti);
+
+static const AVFilterPad avfilter_vf_siti_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+};
+
+static const AVFilterPad avfilter_vf_siti_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO
+    },
+};
+
+AVFilter ff_vf_siti = {
+    .name          = "siti",
+    .description   = NULL_IF_CONFIG_SMALL("Calculate spatial information (SI) and temporal information (TI)."),
+    .priv_size     = sizeof(SiTiContext),
+    .priv_class    = &siti_class,
+    .init          = init,
+    .uninit        = uninit,
+    .flags         = AVFILTER_FLAG_METADATA_ONLY,
+    FILTER_PIXFMTS_ARRAY(pix_fmts),
+    FILTER_INPUTS(avfilter_vf_siti_inputs),
+    FILTER_OUTPUTS(avfilter_vf_siti_outputs),
+};
diff --git a/tests/fate-run.sh b/tests/fate-run.sh
index fbfc0a925d..a3be44d0eb 100755
--- a/tests/fate-run.sh
+++ b/tests/fate-run.sh
@@ -382,6 +382,15 @@ refcmp_metadata(){
         -f null /dev/null | awk -v ref=${ref} -v fuzz=${fuzz} -f ${base}/refcmp-metadata.awk -
 }
 
+cmp_metadata(){
+    refcmp=$1
+    pixfmt=$2
+    fuzz=${3:-0.001}
+    ffmpeg $FLAGS $ENC_OPTS \
+        -lavfi "testsrc2=size=300x200:rate=1:duration=5,format=${pixfmt},${refcmp},metadata=print:file=-" \
+        -f null /dev/null | awk -v ref=${ref} -v fuzz=${fuzz} -f ${base}/refcmp-metadata.awk -
+}
+
 pixfmt_conversion(){
     conversion="${test#pixfmt-}"
     outdir="tests/data/pixfmt"
diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
index 510bb3ffbc..ae021a74a4 100644
--- a/tests/fate/filter-video.mak
+++ b/tests/fate/filter-video.mak
@@ -862,6 +862,9 @@ fate-filter-refcmp-ssim-rgb: CMD = refcmp_metadata ssim rgb24 0.015
 FATE_FILTER-$(call ALLYES, $(REFCMP_DEPS) SSIM_FILTER) += fate-filter-refcmp-ssim-yuv
 fate-filter-refcmp-ssim-yuv: CMD = refcmp_metadata ssim yuv422p 0.015
 
+FATE_FILTER-$(call ALLYES, $(REFCMP_DEPS) SITI_FILTER) += fate-filter-refcmp-siti-yuv
+fate-filter-refcmp-siti-yuv: CMD = cmp_metadata siti yuv420p 0.015
+
 FATE_SAMPLES_FFPROBE += $(FATE_METADATA_FILTER-yes)
 FATE_SAMPLES_FFMPEG += $(FATE_FILTER_SAMPLES-yes)
 FATE_FFMPEG += $(FATE_FILTER-yes)
diff --git a/tests/ref/fate/filter-refcmp-siti-yuv b/tests/ref/fate/filter-refcmp-siti-yuv
new file mode 100644
index 0000000000..5d99972d95
--- /dev/null
+++ b/tests/ref/fate/filter-refcmp-siti-yuv
@@ -0,0 +1,15 @@
+frame:0    pts:0       pts_time:0
+lavfi.siti.si=106.72
+lavfi.siti.ti=0.00
+frame:1    pts:1       pts_time:1
+lavfi.siti.si=109.40
+lavfi.siti.ti=28.00
+frame:2    pts:2       pts_time:2
+lavfi.siti.si=109.29
+lavfi.siti.ti=28.38
+frame:3    pts:3       pts_time:3
+lavfi.siti.si=113.27
+lavfi.siti.ti=33.42
+frame:4    pts:4       pts_time:4
+lavfi.siti.si=110.87
+lavfi.siti.ti=30.53

Paul B Mahol March 9, 2022, 5:31 p.m. UTC | #32

On 3/8/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
> Am 07.03.22 um 20:06 schrieb Paul B Mahol:
>> On 3/7/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>> Am 06.03.22 um 22:25 schrieb Paul B Mahol:
>>>> On 3/6/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>>> Am 22.02.22 um 12:30 schrieb Thilo Borgmann:
>>>>>> Am 18.02.22 um 17:08 schrieb Paul B Mahol:
>>>>>>> On Sat, Feb 12, 2022 at 11:55 AM Thilo Borgmann
>>>>>>> <thilo.borgmann@mail.de>
>>>>>>> wrote:
>>>>>>>
>>>>>>>> Am 31.01.22 um 12:55 schrieb James Almer:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> On 1/31/2022 8:53 AM, Anton Khirnov wrote:
>>>>>>>>>> Quoting Thilo Borgmann (2022-01-18 14:58:07)
>>>>>>>>>>>>> Violations of code style.
>>>>>>>>>>>
>>>>>>>>>>> Enhanced.
>>>>>>>>>>
>>>>>>>>>> Not enough. There are still many remaining, e.g.
>>>>>>>>>> * opening brace of a function definition should be on its own
>>>>>>>>>> line
>>>>>>>>>> * the context should generally be the first argument
>>>>>>>>>> * unsigned char* should be uint8_t*
>>>>>>>>>> * mixed declarations and code (the compiler should warn about
>>>>>>>>>> that)
>>>>>>>>>
>>>>>>>>> I think someone said that clang (or some versions) is apparently
>>>>>>>>> not
>>>>>>>> warning about this, hence why so many of these end up being missed
>>>>>>>> in
>>>>>>>> reviews or even by the patch author.
>>>>>>>>
>>>>>>>> This and all of Anton's comments in v3. Also removed some more
>>>>>>>> obviously
>>>>>>>> useless doubles.
>>>>>>>>
>>>>>>>
>>>>>>> Why it uses doubles in so many places?
>>>>>>> Is there any real benefit in that, except extra slowdown?
>>>>>>
>>>>>> I guess because it's originating in some c&p Matlab code.
>>>>>> I did %s#double#float#g for v4, loosing some precision we can ignore
>>>>>> IMHO.
>>>>>>
>>>>>>
>>>>>>
>>>>>> v3:
>>>>>>
>>>>>> Total frames: 2
>>>>>>
>>>>>> Spatial Information:
>>>>>> Average: 165.451985
>>>>>> Max: 165.817542
>>>>>> Min: 165.086427
>>>>>>
>>>>>> Temporal Information:
>>>>>> Average: 1.007263
>>>>>> Max: 2.014525
>>>>>> Min: 0.000000
>>>>>>
>>>>>>
>>>>>>
>>>>>> v4:
>>>>>>
>>>>>> Total frames: 2
>>>>>>
>>>>>> Spatial Information:
>>>>>> Average: 164.385895
>>>>>> Max: 164.742325
>>>>>> Min: 164.029480
>>>>>>
>>>>>> Temporal Information:
>>>>>> Average: 1.007241
>>>>>> Max: 2.014483
>>>>>> Min: 0.000000
>>>>>>
>>>>>
>>>>> Ping.
>>>>
>>>> Into wrong section of changelog added entry.
>>>>
>>>> Useless cast of allocation results.
>>>>
>>>> Does filter changes pixels? If not, add metadata flag to appropriate
>>>> place.
>>>
>>> All addressed in v5, thx!
>>>
>>
>> Changelog entry is still in wrong, 5.0, section.
>
> Fixed in v6.
>
>>> Also added a FATE test for it.
>
>

Could use fminf/ float functions instead of double variants.

> -Thilo
>

Thilo Borgmann March 12, 2022, 9:06 a.m. UTC | #33

Am 09.03.22 um 18:31 schrieb Paul B Mahol:
> On 3/8/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>> Am 07.03.22 um 20:06 schrieb Paul B Mahol:
>>> On 3/7/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>> Am 06.03.22 um 22:25 schrieb Paul B Mahol:
>>>>> On 3/6/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>>>> Am 22.02.22 um 12:30 schrieb Thilo Borgmann:
>>>>>>> Am 18.02.22 um 17:08 schrieb Paul B Mahol:
>>>>>>>> On Sat, Feb 12, 2022 at 11:55 AM Thilo Borgmann
>>>>>>>> <thilo.borgmann@mail.de>
>>>>>>>> wrote:
>>>>>>>>
>>>>>>>>> Am 31.01.22 um 12:55 schrieb James Almer:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> On 1/31/2022 8:53 AM, Anton Khirnov wrote:
>>>>>>>>>>> Quoting Thilo Borgmann (2022-01-18 14:58:07)
>>>>>>>>>>>>>> Violations of code style.
>>>>>>>>>>>>
>>>>>>>>>>>> Enhanced.
>>>>>>>>>>>
>>>>>>>>>>> Not enough. There are still many remaining, e.g.
>>>>>>>>>>> * opening brace of a function definition should be on its own
>>>>>>>>>>> line
>>>>>>>>>>> * the context should generally be the first argument
>>>>>>>>>>> * unsigned char* should be uint8_t*
>>>>>>>>>>> * mixed declarations and code (the compiler should warn about
>>>>>>>>>>> that)
>>>>>>>>>>
>>>>>>>>>> I think someone said that clang (or some versions) is apparently
>>>>>>>>>> not
>>>>>>>>> warning about this, hence why so many of these end up being missed
>>>>>>>>> in
>>>>>>>>> reviews or even by the patch author.
>>>>>>>>>
>>>>>>>>> This and all of Anton's comments in v3. Also removed some more
>>>>>>>>> obviously
>>>>>>>>> useless doubles.
>>>>>>>>>
>>>>>>>>
>>>>>>>> Why it uses doubles in so many places?
>>>>>>>> Is there any real benefit in that, except extra slowdown?
>>>>>>>
>>>>>>> I guess because it's originating in some c&p Matlab code.
>>>>>>> I did %s#double#float#g for v4, loosing some precision we can ignore
>>>>>>> IMHO.
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> v3:
>>>>>>>
>>>>>>> Total frames: 2
>>>>>>>
>>>>>>> Spatial Information:
>>>>>>> Average: 165.451985
>>>>>>> Max: 165.817542
>>>>>>> Min: 165.086427
>>>>>>>
>>>>>>> Temporal Information:
>>>>>>> Average: 1.007263
>>>>>>> Max: 2.014525
>>>>>>> Min: 0.000000
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> v4:
>>>>>>>
>>>>>>> Total frames: 2
>>>>>>>
>>>>>>> Spatial Information:
>>>>>>> Average: 164.385895
>>>>>>> Max: 164.742325
>>>>>>> Min: 164.029480
>>>>>>>
>>>>>>> Temporal Information:
>>>>>>> Average: 1.007241
>>>>>>> Max: 2.014483
>>>>>>> Min: 0.000000
>>>>>>>
>>>>>>
>>>>>> Ping.
>>>>>
>>>>> Into wrong section of changelog added entry.
>>>>>
>>>>> Useless cast of allocation results.
>>>>>
>>>>> Does filter changes pixels? If not, add metadata flag to appropriate
>>>>> place.
>>>>
>>>> All addressed in v5, thx!
>>>>
>>>
>>> Changelog entry is still in wrong, 5.0, section.
>>
>> Fixed in v6.
>>
>>>> Also added a FATE test for it.
>>
>>
> 
> Could use fminf/ float functions instead of double variants.

v7.

-Thilo
From 01c3d6dd73ad4c34c44c9cbc8171e330e766144f Mon Sep 17 00:00:00 2001
From: Boris Baracaldo <borbarak@fb.com>
Date: Sat, 12 Mar 2022 10:04:44 +0100
Subject: [PATCH v7] lavfilter: Add SITI filter

Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
in ITU-T P.910: Subjective video quality assessment methods for multimedia
applications.
---
 Changelog                             |   1 +
 doc/filters.texi                      |  23 ++
 libavfilter/Makefile                  |   1 +
 libavfilter/allfilters.c              |   1 +
 libavfilter/version.h                 |   2 +-
 libavfilter/vf_siti.c                 | 349 ++++++++++++++++++++++++++
 tests/fate-run.sh                     |   9 +
 tests/fate/filter-video.mak           |   3 +
 tests/ref/fate/filter-refcmp-siti-yuv |  15 ++
 9 files changed, 403 insertions(+), 1 deletion(-)
 create mode 100644 libavfilter/vf_siti.c
 create mode 100644 tests/ref/fate/filter-refcmp-siti-yuv

diff --git a/Changelog b/Changelog
index 3af8aa032b..200bd82680 100644
--- a/Changelog
+++ b/Changelog
@@ -5,6 +5,7 @@ version 5.1:
 - dialogue enhance audio filter
 - dropped obsolete XvMC hwaccel
 - pcm-bluray encoder
+- SITI filter
 
 
 version 5.0:
diff --git a/doc/filters.texi b/doc/filters.texi
index 26c5b4db48..9f50fd899f 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -19944,6 +19944,29 @@ ffmpeg -i input1.mkv -i input2.mkv -filter_complex "[0:v][1:v] signature=nb_inpu
 
 @end itemize
 
+@anchor{siti}
+@section siti
+
+Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
+in ITU-T P.910: Subjective video quality assessment methods for multimedia
+applications. Available PDF at @url{https://www.itu.int/rec/T-REC-P.910-199909-S/en }.
+
+It accepts the following option:
+
+@table @option
+@item print_summary
+If set to 1, Summary statistics will be printed to the console. Default 0.
+@end table
+
+@subsection Examples
+@itemize
+@item
+To calculate SI/TI metrics and print summary:
+@example
+ffmpeg -i input.mp4 -vf siti=print_summary=1 -f null -
+@end example
+@end itemize
+
 @anchor{smartblur}
 @section smartblur
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 56d33e6480..43653597d8 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -456,6 +456,7 @@ OBJS-$(CONFIG_SMARTBLUR_FILTER)              += vf_smartblur.o
 OBJS-$(CONFIG_SOBEL_FILTER)                  += vf_convolution.o
 OBJS-$(CONFIG_SOBEL_OPENCL_FILTER)           += vf_convolution_opencl.o opencl.o \
                                                 opencl/convolution.o
+OBJS-$(CONFIG_SITI_FILTER)                   += vf_siti.o
 OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o
 OBJS-$(CONFIG_SPP_FILTER)                    += vf_spp.o qp_table.o
 OBJS-$(CONFIG_SR_FILTER)                     += vf_sr.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index f5caee3a62..9fbaaacf47 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -432,6 +432,7 @@ extern const AVFilter ff_vf_shuffleplanes;
 extern const AVFilter ff_vf_sidedata;
 extern const AVFilter ff_vf_signalstats;
 extern const AVFilter ff_vf_signature;
+extern const AVFilter ff_vf_siti;
 extern const AVFilter ff_vf_smartblur;
 extern const AVFilter ff_vf_sobel;
 extern const AVFilter ff_vf_sobel_opencl;
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 9a890c014f..95dd64d5b5 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,7 +30,7 @@
 #include "libavutil/version.h"
 
 #define LIBAVFILTER_VERSION_MAJOR   8
-#define LIBAVFILTER_VERSION_MINOR  27
+#define LIBAVFILTER_VERSION_MINOR  28
 #define LIBAVFILTER_VERSION_MICRO 100
 
 
diff --git a/libavfilter/vf_siti.c b/libavfilter/vf_siti.c
new file mode 100644
index 0000000000..48bfc004cb
--- /dev/null
+++ b/libavfilter/vf_siti.c
@@ -0,0 +1,349 @@
+/*
+ * Copyright (c) 2021 Boris Baracaldo
+ * Copyright (c) 2022 Thilo Borgmann
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/**
+ * @file
+ * Calculate Spatial Info (SI) and Temporal Info (TI) scores
+ */
+
+#include <math.h>
+
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+static const int X_FILTER[9] = {
+    1, 0, -1,
+    2, 0, -2,
+    1, 0, -1
+};
+
+static const int Y_FILTER[9] = {
+    1, 2, 1,
+    0, 0, 0,
+    -1, -2, -1
+};
+
+typedef struct SiTiContext {
+    const AVClass *class;
+    int pixel_depth;
+    int width, height;
+    uint64_t nb_frames;
+    uint8_t *prev_frame;
+    float max_si;
+    float max_ti;
+    float min_si;
+    float min_ti;
+    float sum_si;
+    float sum_ti;
+    float *gradient_matrix;
+    float *motion_matrix;
+    int full_range;
+    int print_summary;
+} SiTiContext;
+
+static const enum AVPixelFormat pix_fmts[] = {
+    AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+    AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+    AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10,
+    AV_PIX_FMT_NONE
+};
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    // User options but no input data
+    SiTiContext *s = ctx->priv;
+    s->max_si = 0;
+    s->max_ti = 0;
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    SiTiContext *s = ctx->priv;
+
+    if (s->print_summary) {
+        float avg_si = s->sum_si / s->nb_frames;
+        float avg_ti = s->sum_ti / s->nb_frames;
+        av_log(ctx, AV_LOG_INFO,
+               "SITI Summary:\nTotal frames: %"PRId64"\n\n"
+               "Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n"
+               "Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n",
+               s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti
+        );
+    }
+
+    av_freep(&s->prev_frame);
+    av_freep(&s->gradient_matrix);
+    av_freep(&s->motion_matrix);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    // Video input data avilable
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    int max_pixsteps[4];
+    size_t pixel_sz;
+    size_t data_sz;
+    size_t gradient_sz;
+    size_t motion_sz;
+
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
+
+    // free previous buffers in case they are allocated already
+    av_freep(&s->prev_frame);
+    av_freep(&s->gradient_matrix);
+    av_freep(&s->motion_matrix);
+
+    s->pixel_depth = max_pixsteps[0];
+    s->width = inlink->w;
+    s->height = inlink->h;
+    pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t);
+    data_sz = s->width * pixel_sz * s->height;
+
+    s->prev_frame = av_malloc(data_sz);
+
+    gradient_sz = (s->width - 2) * sizeof(float) * (s->height - 2);
+    s->gradient_matrix = av_malloc(gradient_sz);
+
+    motion_sz = s->width * sizeof(float) * s->height;
+    s->motion_matrix = av_malloc(motion_sz);
+
+    if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) {
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+// Determine whether the video is in full or limited range. If not defined, assume limited.
+static int is_full_range(AVFrame* frame)
+{
+    // If color range not specified, fallback to pixel format
+    if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
+        return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
+    return frame->color_range == AVCOL_RANGE_JPEG;
+}
+
+// Check frame's color range and convert to full range if needed
+static uint16_t convert_full_range(int factor, uint16_t y)
+{
+    int shift;
+    int limit_upper;
+    int full_upper;
+    int limit_y;
+
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    shift = 16 * factor;
+    limit_upper = 235 * factor - shift;
+    full_upper = 256 * factor - 1;
+    limit_y = fminf(fmax(y - shift, 0), limit_upper);
+    return (full_upper * limit_y / limit_upper);
+}
+
+// Applies sobel convolution
+static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int linesize)
+{
+    float x_conv_sum;
+    float y_conv_sum;
+    float gradient;
+    int ki;
+    int kj;
+    int index;
+    uint16_t data;
+    int filter_width = 3;
+    int filter_size = filter_width * filter_width;
+    int stride = linesize / s->pixel_depth;
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    int factor = s->pixel_depth == 1 ? 1 : 4;
+
+    // Dst matrix is smaller than src since we ignore edges that can't be convolved
+    #define CONVOLVE(bps)                                           \
+    {                                                               \
+        uint##bps##_t *vsrc = (uint##bps##_t*)src;                  \
+        for (int j = 1; j < s->height - 1; j++) {                   \
+            for (int i = 1; i < s->width - 1; i++) {                \
+                x_conv_sum = 0.0;                                   \
+                y_conv_sum = 0.0;                                   \
+                for (int k = 0; k < filter_size; k++) {             \
+                    ki = k % filter_width - 1;                      \
+                    kj = floor(k / filter_width) - 1;               \
+                    index = (j + kj) * stride + (i + ki);           \
+                    data = s->full_range ? vsrc[index] : convert_full_range(factor, vsrc[index]); \
+                    x_conv_sum += data * X_FILTER[k];               \
+                    y_conv_sum += data * Y_FILTER[k];               \
+                }                                                   \
+                gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum); \
+                dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient; \
+            }                                                       \
+        }                                                           \
+    }
+
+    if (s->pixel_depth == 2) {
+        CONVOLVE(16);
+    } else {
+        CONVOLVE(8);
+    }
+}
+
+// Calculate pixel difference between current and previous frame, and update previous
+static void calculate_motion(SiTiContext *s, const uint8_t *curr,
+                             float *motion_matrix, int linesize)
+{
+    int stride = linesize / s->pixel_depth;
+    float motion;
+    int curr_index;
+    int prev_index;
+    uint16_t curr_data;
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    int factor = s->pixel_depth == 1 ? 1 : 4;
+
+    // Previous frame is already converted to full range
+    #define CALCULATE(bps)                                           \
+    {                                                                \
+        uint##bps##_t *vsrc = (uint##bps##_t*)curr;                  \
+        uint##bps##_t *vdst = (uint##bps##_t*)s->prev_frame;         \
+        for (int j = 0; j < s->height; j++) {                        \
+            for (int i = 0; i < s->width; i++) {                     \
+                motion = 0;                                          \
+                curr_index = j * stride + i;                         \
+                prev_index = j * s->width + i;                       \
+                curr_data = s->full_range ? vsrc[curr_index] : convert_full_range(factor, vsrc[curr_index]); \
+                if (s->nb_frames > 1)                                \
+                    motion = curr_data - vdst[prev_index];           \
+                vdst[prev_index] = curr_data;                        \
+                motion_matrix[j * s->width + i] = motion;            \
+            }                                                        \
+        }                                                            \
+    }
+
+    if (s->pixel_depth == 2) {
+        CALCULATE(16);
+    } else {
+        CALCULATE(8);
+    }
+}
+
+static float std_deviation(float *img_metrics, int width, int height)
+{
+    int size = height * width;
+    float mean = 0.0;
+    float sqr_diff = 0;
+
+    for (int j = 0; j < height; j++)
+        for (int i = 0; i < width; i++)
+            mean += img_metrics[j * width + i];
+
+    mean /= size;
+
+    for (int j = 0; j < height; j++) {
+        for (int i = 0; i < width; i++) {
+            float mean_diff = img_metrics[j * width + i] - mean;
+            sqr_diff += (mean_diff * mean_diff);
+        }
+    }
+    sqr_diff = sqr_diff / size;
+    return sqrt(sqr_diff);
+}
+
+static void set_meta(AVDictionary **metadata, const char *key, float d)
+{
+    char value[128];
+    snprintf(value, sizeof(value), "%0.2f", d);
+    av_dict_set(metadata, key, value, 0);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    float si;
+    float ti;
+
+    s->full_range = is_full_range(frame);
+    s->nb_frames++;
+
+    // Calculate si and ti
+    convolve_sobel(s, frame->data[0], s->gradient_matrix, frame->linesize[0]);
+    calculate_motion(s, frame->data[0], s->motion_matrix, frame->linesize[0]);
+    si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2);
+    ti = std_deviation(s->motion_matrix, s->width, s->height);
+
+    // Calculate statistics
+    s->max_si  = fmax(si, s->max_si);
+    s->max_ti  = fmax(ti, s->max_ti);
+    s->sum_si += si;
+    s->sum_ti += ti;
+    s->min_si  = s->nb_frames == 1 ? si : fminf(si, s->min_si);
+    s->min_ti  = s->nb_frames == 1 ? ti : fminf(ti, s->min_ti);
+
+    // Set si ti information in frame metadata
+    set_meta(&frame->metadata, "lavfi.siti.si", si);
+    set_meta(&frame->metadata, "lavfi.siti.ti", ti);
+
+    return ff_filter_frame(inlink->dst->outputs[0], frame);
+}
+
+#define OFFSET(x) offsetof(SiTiContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption siti_options[] = {
+    { "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(siti);
+
+static const AVFilterPad avfilter_vf_siti_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+};
+
+static const AVFilterPad avfilter_vf_siti_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO
+    },
+};
+
+AVFilter ff_vf_siti = {
+    .name          = "siti",
+    .description   = NULL_IF_CONFIG_SMALL("Calculate spatial information (SI) and temporal information (TI)."),
+    .priv_size     = sizeof(SiTiContext),
+    .priv_class    = &siti_class,
+    .init          = init,
+    .uninit        = uninit,
+    .flags         = AVFILTER_FLAG_METADATA_ONLY,
+    FILTER_PIXFMTS_ARRAY(pix_fmts),
+    FILTER_INPUTS(avfilter_vf_siti_inputs),
+    FILTER_OUTPUTS(avfilter_vf_siti_outputs),
+};
diff --git a/tests/fate-run.sh b/tests/fate-run.sh
index fbfc0a925d..a3be44d0eb 100755
--- a/tests/fate-run.sh
+++ b/tests/fate-run.sh
@@ -382,6 +382,15 @@ refcmp_metadata(){
         -f null /dev/null | awk -v ref=${ref} -v fuzz=${fuzz} -f ${base}/refcmp-metadata.awk -
 }
 
+cmp_metadata(){
+    refcmp=$1
+    pixfmt=$2
+    fuzz=${3:-0.001}
+    ffmpeg $FLAGS $ENC_OPTS \
+        -lavfi "testsrc2=size=300x200:rate=1:duration=5,format=${pixfmt},${refcmp},metadata=print:file=-" \
+        -f null /dev/null | awk -v ref=${ref} -v fuzz=${fuzz} -f ${base}/refcmp-metadata.awk -
+}
+
 pixfmt_conversion(){
     conversion="${test#pixfmt-}"
     outdir="tests/data/pixfmt"
diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
index 510bb3ffbc..ae021a74a4 100644
--- a/tests/fate/filter-video.mak
+++ b/tests/fate/filter-video.mak
@@ -862,6 +862,9 @@ fate-filter-refcmp-ssim-rgb: CMD = refcmp_metadata ssim rgb24 0.015
 FATE_FILTER-$(call ALLYES, $(REFCMP_DEPS) SSIM_FILTER) += fate-filter-refcmp-ssim-yuv
 fate-filter-refcmp-ssim-yuv: CMD = refcmp_metadata ssim yuv422p 0.015
 
+FATE_FILTER-$(call ALLYES, $(REFCMP_DEPS) SITI_FILTER) += fate-filter-refcmp-siti-yuv
+fate-filter-refcmp-siti-yuv: CMD = cmp_metadata siti yuv420p 0.015
+
 FATE_SAMPLES_FFPROBE += $(FATE_METADATA_FILTER-yes)
 FATE_SAMPLES_FFMPEG += $(FATE_FILTER_SAMPLES-yes)
 FATE_FFMPEG += $(FATE_FILTER-yes)
diff --git a/tests/ref/fate/filter-refcmp-siti-yuv b/tests/ref/fate/filter-refcmp-siti-yuv
new file mode 100644
index 0000000000..5d99972d95
--- /dev/null
+++ b/tests/ref/fate/filter-refcmp-siti-yuv
@@ -0,0 +1,15 @@
+frame:0    pts:0       pts_time:0
+lavfi.siti.si=106.72
+lavfi.siti.ti=0.00
+frame:1    pts:1       pts_time:1
+lavfi.siti.si=109.40
+lavfi.siti.ti=28.00
+frame:2    pts:2       pts_time:2
+lavfi.siti.si=109.29
+lavfi.siti.ti=28.38
+frame:3    pts:3       pts_time:3
+lavfi.siti.si=113.27
+lavfi.siti.ti=33.42
+frame:4    pts:4       pts_time:4
+lavfi.siti.si=110.87
+lavfi.siti.ti=30.53

Thilo Borgmann March 18, 2022, 1:56 p.m. UTC | #34

On 12 Mar 2022, at 10:06, Thilo Borgmann wrote:

> Am 09.03.22 um 18:31 schrieb Paul B Mahol:
>> On 3/8/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>> Am 07.03.22 um 20:06 schrieb Paul B Mahol:
>>>> On 3/7/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>>> Am 06.03.22 um 22:25 schrieb Paul B Mahol:
>>>>>> On 3/6/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>>>>> Am 22.02.22 um 12:30 schrieb Thilo Borgmann:
>>>>>>>> Am 18.02.22 um 17:08 schrieb Paul B Mahol:
>>>>>>>>> On Sat, Feb 12, 2022 at 11:55 AM Thilo Borgmann
>>>>>>>>> <thilo.borgmann@mail.de>
>>>>>>>>> wrote:
>>>>>>>>>
>>>>>>>>>> Am 31.01.22 um 12:55 schrieb James Almer:
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> On 1/31/2022 8:53 AM, Anton Khirnov wrote:
>>>>>>>>>>>> Quoting Thilo Borgmann (2022-01-18 14:58:07)
>>>>>>>>>>>>>>> Violations of code style.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Enhanced.
>>>>>>>>>>>>
>>>>>>>>>>>> Not enough. There are still many remaining, e.g.
>>>>>>>>>>>> * opening brace of a function definition should be on its own
>>>>>>>>>>>> line
>>>>>>>>>>>> * the context should generally be the first argument
>>>>>>>>>>>> * unsigned char* should be uint8_t*
>>>>>>>>>>>> * mixed declarations and code (the compiler should warn about
>>>>>>>>>>>> that)
>>>>>>>>>>>
>>>>>>>>>>> I think someone said that clang (or some versions) is apparently
>>>>>>>>>>> not
>>>>>>>>>> warning about this, hence why so many of these end up being missed
>>>>>>>>>> in
>>>>>>>>>> reviews or even by the patch author.
>>>>>>>>>>
>>>>>>>>>> This and all of Anton's comments in v3. Also removed some more
>>>>>>>>>> obviously
>>>>>>>>>> useless doubles.
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Why it uses doubles in so many places?
>>>>>>>>> Is there any real benefit in that, except extra slowdown?
>>>>>>>>
>>>>>>>> I guess because it's originating in some c&p Matlab code.
>>>>>>>> I did %s#double#float#g for v4, loosing some precision we can ignore
>>>>>>>> IMHO.
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> v3:
>>>>>>>>
>>>>>>>> Total frames: 2
>>>>>>>>
>>>>>>>> Spatial Information:
>>>>>>>> Average: 165.451985
>>>>>>>> Max: 165.817542
>>>>>>>> Min: 165.086427
>>>>>>>>
>>>>>>>> Temporal Information:
>>>>>>>> Average: 1.007263
>>>>>>>> Max: 2.014525
>>>>>>>> Min: 0.000000
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> v4:
>>>>>>>>
>>>>>>>> Total frames: 2
>>>>>>>>
>>>>>>>> Spatial Information:
>>>>>>>> Average: 164.385895
>>>>>>>> Max: 164.742325
>>>>>>>> Min: 164.029480
>>>>>>>>
>>>>>>>> Temporal Information:
>>>>>>>> Average: 1.007241
>>>>>>>> Max: 2.014483
>>>>>>>> Min: 0.000000
>>>>>>>>
>>>>>>>
>>>>>>> Ping.
>>>>>>
>>>>>> Into wrong section of changelog added entry.
>>>>>>
>>>>>> Useless cast of allocation results.
>>>>>>
>>>>>> Does filter changes pixels? If not, add metadata flag to appropriate
>>>>>> place.
>>>>>
>>>>> All addressed in v5, thx!
>>>>>
>>>>
>>>> Changelog entry is still in wrong, 5.0, section.
>>>
>>> Fixed in v6.
>>>
>>>>> Also added a FATE test for it.
>>>
>>>
>>
>> Could use fminf/ float functions instead of double variants.
>
> v7.

Going to push soon if there are no more comments.

-Thilo

Paul B Mahol March 18, 2022, 2:04 p.m. UTC | #35

On 3/18/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>
>
> On 12 Mar 2022, at 10:06, Thilo Borgmann wrote:
>
>> Am 09.03.22 um 18:31 schrieb Paul B Mahol:
>>> On 3/8/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>> Am 07.03.22 um 20:06 schrieb Paul B Mahol:
>>>>> On 3/7/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>>>> Am 06.03.22 um 22:25 schrieb Paul B Mahol:
>>>>>>> On 3/6/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>>>>>> Am 22.02.22 um 12:30 schrieb Thilo Borgmann:
>>>>>>>>> Am 18.02.22 um 17:08 schrieb Paul B Mahol:
>>>>>>>>>> On Sat, Feb 12, 2022 at 11:55 AM Thilo Borgmann
>>>>>>>>>> <thilo.borgmann@mail.de>
>>>>>>>>>> wrote:
>>>>>>>>>>
>>>>>>>>>>> Am 31.01.22 um 12:55 schrieb James Almer:
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> On 1/31/2022 8:53 AM, Anton Khirnov wrote:
>>>>>>>>>>>>> Quoting Thilo Borgmann (2022-01-18 14:58:07)
>>>>>>>>>>>>>>>> Violations of code style.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Enhanced.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Not enough. There are still many remaining, e.g.
>>>>>>>>>>>>> * opening brace of a function definition should be on its own
>>>>>>>>>>>>> line
>>>>>>>>>>>>> * the context should generally be the first argument
>>>>>>>>>>>>> * unsigned char* should be uint8_t*
>>>>>>>>>>>>> * mixed declarations and code (the compiler should warn about
>>>>>>>>>>>>> that)
>>>>>>>>>>>>
>>>>>>>>>>>> I think someone said that clang (or some versions) is
>>>>>>>>>>>> apparently
>>>>>>>>>>>> not
>>>>>>>>>>> warning about this, hence why so many of these end up being
>>>>>>>>>>> missed
>>>>>>>>>>> in
>>>>>>>>>>> reviews or even by the patch author.
>>>>>>>>>>>
>>>>>>>>>>> This and all of Anton's comments in v3. Also removed some more
>>>>>>>>>>> obviously
>>>>>>>>>>> useless doubles.
>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Why it uses doubles in so many places?
>>>>>>>>>> Is there any real benefit in that, except extra slowdown?
>>>>>>>>>
>>>>>>>>> I guess because it's originating in some c&p Matlab code.
>>>>>>>>> I did %s#double#float#g for v4, loosing some precision we can
>>>>>>>>> ignore
>>>>>>>>> IMHO.
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> v3:
>>>>>>>>>
>>>>>>>>> Total frames: 2
>>>>>>>>>
>>>>>>>>> Spatial Information:
>>>>>>>>> Average: 165.451985
>>>>>>>>> Max: 165.817542
>>>>>>>>> Min: 165.086427
>>>>>>>>>
>>>>>>>>> Temporal Information:
>>>>>>>>> Average: 1.007263
>>>>>>>>> Max: 2.014525
>>>>>>>>> Min: 0.000000
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> v4:
>>>>>>>>>
>>>>>>>>> Total frames: 2
>>>>>>>>>
>>>>>>>>> Spatial Information:
>>>>>>>>> Average: 164.385895
>>>>>>>>> Max: 164.742325
>>>>>>>>> Min: 164.029480
>>>>>>>>>
>>>>>>>>> Temporal Information:
>>>>>>>>> Average: 1.007241
>>>>>>>>> Max: 2.014483
>>>>>>>>> Min: 0.000000
>>>>>>>>>
>>>>>>>>
>>>>>>>> Ping.
>>>>>>>
>>>>>>> Into wrong section of changelog added entry.
>>>>>>>
>>>>>>> Useless cast of allocation results.
>>>>>>>
>>>>>>> Does filter changes pixels? If not, add metadata flag to appropriate
>>>>>>> place.
>>>>>>
>>>>>> All addressed in v5, thx!
>>>>>>
>>>>>
>>>>> Changelog entry is still in wrong, 5.0, section.
>>>>
>>>> Fixed in v6.
>>>>
>>>>>> Also added a FATE test for it.
>>>>
>>>>
>>>
>>> Could use fminf/ float functions instead of double variants.
>>
>> v7.
>
> Going to push soon if there are no more comments.

Check that returned values are correct for bigger w/h, and that not
values reach too high values for floats
which may cause loss of precision in best case, eg. maybe you need to
normalize pixel values from 0-255 to 0.f-1.f so mean/stddev  does not
get bad results.

>
> -Thilo
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>

Thilo Borgmann March 22, 2022, 8:36 a.m. UTC | #36

Am 18.03.22 um 15:04 schrieb Paul B Mahol:
> On 3/18/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>
>>
>> On 12 Mar 2022, at 10:06, Thilo Borgmann wrote:
>>
>>> Am 09.03.22 um 18:31 schrieb Paul B Mahol:
>>>> On 3/8/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>>> Am 07.03.22 um 20:06 schrieb Paul B Mahol:
>>>>>> On 3/7/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>>>>> Am 06.03.22 um 22:25 schrieb Paul B Mahol:
>>>>>>>> On 3/6/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>>>>>>> Am 22.02.22 um 12:30 schrieb Thilo Borgmann:
>>>>>>>>>> Am 18.02.22 um 17:08 schrieb Paul B Mahol:
>>>>>>>>>>> On Sat, Feb 12, 2022 at 11:55 AM Thilo Borgmann
>>>>>>>>>>> <thilo.borgmann@mail.de>
>>>>>>>>>>> wrote:
>>>>>>>>>>>
>>>>>>>>>>>> Am 31.01.22 um 12:55 schrieb James Almer:
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> On 1/31/2022 8:53 AM, Anton Khirnov wrote:
>>>>>>>>>>>>>> Quoting Thilo Borgmann (2022-01-18 14:58:07)
>>>>>>>>>>>>>>>>> Violations of code style.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Enhanced.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Not enough. There are still many remaining, e.g.
>>>>>>>>>>>>>> * opening brace of a function definition should be on its own
>>>>>>>>>>>>>> line
>>>>>>>>>>>>>> * the context should generally be the first argument
>>>>>>>>>>>>>> * unsigned char* should be uint8_t*
>>>>>>>>>>>>>> * mixed declarations and code (the compiler should warn about
>>>>>>>>>>>>>> that)
>>>>>>>>>>>>>
>>>>>>>>>>>>> I think someone said that clang (or some versions) is
>>>>>>>>>>>>> apparently
>>>>>>>>>>>>> not
>>>>>>>>>>>> warning about this, hence why so many of these end up being
>>>>>>>>>>>> missed
>>>>>>>>>>>> in
>>>>>>>>>>>> reviews or even by the patch author.
>>>>>>>>>>>>
>>>>>>>>>>>> This and all of Anton's comments in v3. Also removed some more
>>>>>>>>>>>> obviously
>>>>>>>>>>>> useless doubles.
>>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Why it uses doubles in so many places?
>>>>>>>>>>> Is there any real benefit in that, except extra slowdown?
>>>>>>>>>>
>>>>>>>>>> I guess because it's originating in some c&p Matlab code.
>>>>>>>>>> I did %s#double#float#g for v4, loosing some precision we can
>>>>>>>>>> ignore
>>>>>>>>>> IMHO.
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> v3:
>>>>>>>>>>
>>>>>>>>>> Total frames: 2
>>>>>>>>>>
>>>>>>>>>> Spatial Information:
>>>>>>>>>> Average: 165.451985
>>>>>>>>>> Max: 165.817542
>>>>>>>>>> Min: 165.086427
>>>>>>>>>>
>>>>>>>>>> Temporal Information:
>>>>>>>>>> Average: 1.007263
>>>>>>>>>> Max: 2.014525
>>>>>>>>>> Min: 0.000000
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> v4:
>>>>>>>>>>
>>>>>>>>>> Total frames: 2
>>>>>>>>>>
>>>>>>>>>> Spatial Information:
>>>>>>>>>> Average: 164.385895
>>>>>>>>>> Max: 164.742325
>>>>>>>>>> Min: 164.029480
>>>>>>>>>>
>>>>>>>>>> Temporal Information:
>>>>>>>>>> Average: 1.007241
>>>>>>>>>> Max: 2.014483
>>>>>>>>>> Min: 0.000000
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Ping.
>>>>>>>>
>>>>>>>> Into wrong section of changelog added entry.
>>>>>>>>
>>>>>>>> Useless cast of allocation results.
>>>>>>>>
>>>>>>>> Does filter changes pixels? If not, add metadata flag to appropriate
>>>>>>>> place.
>>>>>>>
>>>>>>> All addressed in v5, thx!
>>>>>>>
>>>>>>
>>>>>> Changelog entry is still in wrong, 5.0, section.
>>>>>
>>>>> Fixed in v6.
>>>>>
>>>>>>> Also added a FATE test for it.
>>>>>
>>>>>
>>>>
>>>> Could use fminf/ float functions instead of double variants.
>>>
>>> v7.
>>
>> Going to push soon if there are no more comments.
> 
> Check that returned values are correct for bigger w/h, and that not
> values reach too high values for floats
> which may cause loss of precision in best case, eg. maybe you need to
> normalize pixel values from 0-255 to 0.f-1.f so mean/stddev  does not
> get bad results.

Did the accumulators as doubles then, good?

Also found another missing fmaxf(). V8 attached.

Thanks,
Thilo
From 8d5a4e0e38d46041cc587bc8badb44ac8d7090c6 Mon Sep 17 00:00:00 2001
From: Boris Baracaldo <borbarak@fb.com>
Date: Tue, 22 Mar 2022 09:34:02 +0100
Subject: [PATCH v8] lavfilter: Add SITI filter

Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
in ITU-T P.910: Subjective video quality assessment methods for multimedia
applications.
---
 Changelog                             |   1 +
 doc/filters.texi                      |  23 ++
 libavfilter/Makefile                  |   1 +
 libavfilter/allfilters.c              |   1 +
 libavfilter/version.h                 |   2 +-
 libavfilter/vf_siti.c                 | 349 ++++++++++++++++++++++++++
 tests/fate-run.sh                     |   9 +
 tests/fate/filter-video.mak           |   3 +
 tests/ref/fate/filter-refcmp-siti-yuv |  15 ++
 9 files changed, 403 insertions(+), 1 deletion(-)
 create mode 100644 libavfilter/vf_siti.c
 create mode 100644 tests/ref/fate/filter-refcmp-siti-yuv

diff --git a/Changelog b/Changelog
index 3af8aa032b..200bd82680 100644
--- a/Changelog
+++ b/Changelog
@@ -5,6 +5,7 @@ version 5.1:
 - dialogue enhance audio filter
 - dropped obsolete XvMC hwaccel
 - pcm-bluray encoder
+- SITI filter
 
 
 version 5.0:
diff --git a/doc/filters.texi b/doc/filters.texi
index 26c5b4db48..9f50fd899f 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -19944,6 +19944,29 @@ ffmpeg -i input1.mkv -i input2.mkv -filter_complex "[0:v][1:v] signature=nb_inpu
 
 @end itemize
 
+@anchor{siti}
+@section siti
+
+Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
+in ITU-T P.910: Subjective video quality assessment methods for multimedia
+applications. Available PDF at @url{https://www.itu.int/rec/T-REC-P.910-199909-S/en }.
+
+It accepts the following option:
+
+@table @option
+@item print_summary
+If set to 1, Summary statistics will be printed to the console. Default 0.
+@end table
+
+@subsection Examples
+@itemize
+@item
+To calculate SI/TI metrics and print summary:
+@example
+ffmpeg -i input.mp4 -vf siti=print_summary=1 -f null -
+@end example
+@end itemize
+
 @anchor{smartblur}
 @section smartblur
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 56d33e6480..43653597d8 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -456,6 +456,7 @@ OBJS-$(CONFIG_SMARTBLUR_FILTER)              += vf_smartblur.o
 OBJS-$(CONFIG_SOBEL_FILTER)                  += vf_convolution.o
 OBJS-$(CONFIG_SOBEL_OPENCL_FILTER)           += vf_convolution_opencl.o opencl.o \
                                                 opencl/convolution.o
+OBJS-$(CONFIG_SITI_FILTER)                   += vf_siti.o
 OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o
 OBJS-$(CONFIG_SPP_FILTER)                    += vf_spp.o qp_table.o
 OBJS-$(CONFIG_SR_FILTER)                     += vf_sr.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index f5caee3a62..9fbaaacf47 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -432,6 +432,7 @@ extern const AVFilter ff_vf_shuffleplanes;
 extern const AVFilter ff_vf_sidedata;
 extern const AVFilter ff_vf_signalstats;
 extern const AVFilter ff_vf_signature;
+extern const AVFilter ff_vf_siti;
 extern const AVFilter ff_vf_smartblur;
 extern const AVFilter ff_vf_sobel;
 extern const AVFilter ff_vf_sobel_opencl;
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 9a890c014f..95dd64d5b5 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,7 +30,7 @@
 #include "libavutil/version.h"
 
 #define LIBAVFILTER_VERSION_MAJOR   8
-#define LIBAVFILTER_VERSION_MINOR  27
+#define LIBAVFILTER_VERSION_MINOR  28
 #define LIBAVFILTER_VERSION_MICRO 100
 
 
diff --git a/libavfilter/vf_siti.c b/libavfilter/vf_siti.c
new file mode 100644
index 0000000000..b0d3d95be2
--- /dev/null
+++ b/libavfilter/vf_siti.c
@@ -0,0 +1,349 @@
+/*
+ * Copyright (c) 2021 Boris Baracaldo
+ * Copyright (c) 2022 Thilo Borgmann
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/**
+ * @file
+ * Calculate Spatial Info (SI) and Temporal Info (TI) scores
+ */
+
+#include <math.h>
+
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+static const int X_FILTER[9] = {
+    1, 0, -1,
+    2, 0, -2,
+    1, 0, -1
+};
+
+static const int Y_FILTER[9] = {
+    1, 2, 1,
+    0, 0, 0,
+    -1, -2, -1
+};
+
+typedef struct SiTiContext {
+    const AVClass *class;
+    int pixel_depth;
+    int width, height;
+    uint64_t nb_frames;
+    uint8_t *prev_frame;
+    float max_si;
+    float max_ti;
+    float min_si;
+    float min_ti;
+    float sum_si;
+    float sum_ti;
+    float *gradient_matrix;
+    float *motion_matrix;
+    int full_range;
+    int print_summary;
+} SiTiContext;
+
+static const enum AVPixelFormat pix_fmts[] = {
+    AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+    AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+    AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10,
+    AV_PIX_FMT_NONE
+};
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    // User options but no input data
+    SiTiContext *s = ctx->priv;
+    s->max_si = 0;
+    s->max_ti = 0;
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    SiTiContext *s = ctx->priv;
+
+    if (s->print_summary) {
+        float avg_si = s->sum_si / s->nb_frames;
+        float avg_ti = s->sum_ti / s->nb_frames;
+        av_log(ctx, AV_LOG_INFO,
+               "SITI Summary:\nTotal frames: %"PRId64"\n\n"
+               "Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n"
+               "Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n",
+               s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti
+        );
+    }
+
+    av_freep(&s->prev_frame);
+    av_freep(&s->gradient_matrix);
+    av_freep(&s->motion_matrix);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    // Video input data avilable
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    int max_pixsteps[4];
+    size_t pixel_sz;
+    size_t data_sz;
+    size_t gradient_sz;
+    size_t motion_sz;
+
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
+
+    // free previous buffers in case they are allocated already
+    av_freep(&s->prev_frame);
+    av_freep(&s->gradient_matrix);
+    av_freep(&s->motion_matrix);
+
+    s->pixel_depth = max_pixsteps[0];
+    s->width = inlink->w;
+    s->height = inlink->h;
+    pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t);
+    data_sz = s->width * pixel_sz * s->height;
+
+    s->prev_frame = av_malloc(data_sz);
+
+    gradient_sz = (s->width - 2) * sizeof(float) * (s->height - 2);
+    s->gradient_matrix = av_malloc(gradient_sz);
+
+    motion_sz = s->width * sizeof(float) * s->height;
+    s->motion_matrix = av_malloc(motion_sz);
+
+    if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) {
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+// Determine whether the video is in full or limited range. If not defined, assume limited.
+static int is_full_range(AVFrame* frame)
+{
+    // If color range not specified, fallback to pixel format
+    if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
+        return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
+    return frame->color_range == AVCOL_RANGE_JPEG;
+}
+
+// Check frame's color range and convert to full range if needed
+static uint16_t convert_full_range(int factor, uint16_t y)
+{
+    int shift;
+    int limit_upper;
+    int full_upper;
+    int limit_y;
+
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    shift = 16 * factor;
+    limit_upper = 235 * factor - shift;
+    full_upper = 256 * factor - 1;
+    limit_y = fminf(fmaxf(y - shift, 0), limit_upper);
+    return (full_upper * limit_y / limit_upper);
+}
+
+// Applies sobel convolution
+static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int linesize)
+{
+    double x_conv_sum;
+    double y_conv_sum;
+    float gradient;
+    int ki;
+    int kj;
+    int index;
+    uint16_t data;
+    int filter_width = 3;
+    int filter_size = filter_width * filter_width;
+    int stride = linesize / s->pixel_depth;
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    int factor = s->pixel_depth == 1 ? 1 : 4;
+
+    // Dst matrix is smaller than src since we ignore edges that can't be convolved
+    #define CONVOLVE(bps)                                           \
+    {                                                               \
+        uint##bps##_t *vsrc = (uint##bps##_t*)src;                  \
+        for (int j = 1; j < s->height - 1; j++) {                   \
+            for (int i = 1; i < s->width - 1; i++) {                \
+                x_conv_sum = 0.0;                                   \
+                y_conv_sum = 0.0;                                   \
+                for (int k = 0; k < filter_size; k++) {             \
+                    ki = k % filter_width - 1;                      \
+                    kj = floor(k / filter_width) - 1;               \
+                    index = (j + kj) * stride + (i + ki);           \
+                    data = s->full_range ? vsrc[index] : convert_full_range(factor, vsrc[index]); \
+                    x_conv_sum += data * X_FILTER[k];               \
+                    y_conv_sum += data * Y_FILTER[k];               \
+                }                                                   \
+                gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum); \
+                dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient; \
+            }                                                       \
+        }                                                           \
+    }
+
+    if (s->pixel_depth == 2) {
+        CONVOLVE(16);
+    } else {
+        CONVOLVE(8);
+    }
+}
+
+// Calculate pixel difference between current and previous frame, and update previous
+static void calculate_motion(SiTiContext *s, const uint8_t *curr,
+                             float *motion_matrix, int linesize)
+{
+    int stride = linesize / s->pixel_depth;
+    float motion;
+    int curr_index;
+    int prev_index;
+    uint16_t curr_data;
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    int factor = s->pixel_depth == 1 ? 1 : 4;
+
+    // Previous frame is already converted to full range
+    #define CALCULATE(bps)                                           \
+    {                                                                \
+        uint##bps##_t *vsrc = (uint##bps##_t*)curr;                  \
+        uint##bps##_t *vdst = (uint##bps##_t*)s->prev_frame;         \
+        for (int j = 0; j < s->height; j++) {                        \
+            for (int i = 0; i < s->width; i++) {                     \
+                motion = 0;                                          \
+                curr_index = j * stride + i;                         \
+                prev_index = j * s->width + i;                       \
+                curr_data = s->full_range ? vsrc[curr_index] : convert_full_range(factor, vsrc[curr_index]); \
+                if (s->nb_frames > 1)                                \
+                    motion = curr_data - vdst[prev_index];           \
+                vdst[prev_index] = curr_data;                        \
+                motion_matrix[j * s->width + i] = motion;            \
+            }                                                        \
+        }                                                            \
+    }
+
+    if (s->pixel_depth == 2) {
+        CALCULATE(16);
+    } else {
+        CALCULATE(8);
+    }
+}
+
+static float std_deviation(float *img_metrics, int width, int height)
+{
+    int size = height * width;
+    double mean = 0.0;
+    double sqr_diff = 0;
+
+    for (int j = 0; j < height; j++)
+        for (int i = 0; i < width; i++)
+            mean += img_metrics[j * width + i];
+
+    mean /= size;
+
+    for (int j = 0; j < height; j++) {
+        for (int i = 0; i < width; i++) {
+            float mean_diff = img_metrics[j * width + i] - mean;
+            sqr_diff += (mean_diff * mean_diff);
+        }
+    }
+    sqr_diff = sqr_diff / size;
+    return sqrt(sqr_diff);
+}
+
+static void set_meta(AVDictionary **metadata, const char *key, float d)
+{
+    char value[128];
+    snprintf(value, sizeof(value), "%0.2f", d);
+    av_dict_set(metadata, key, value, 0);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    float si;
+    float ti;
+
+    s->full_range = is_full_range(frame);
+    s->nb_frames++;
+
+    // Calculate si and ti
+    convolve_sobel(s, frame->data[0], s->gradient_matrix, frame->linesize[0]);
+    calculate_motion(s, frame->data[0], s->motion_matrix, frame->linesize[0]);
+    si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2);
+    ti = std_deviation(s->motion_matrix, s->width, s->height);
+
+    // Calculate statistics
+    s->max_si  = fmaxf(si, s->max_si);
+    s->max_ti  = fmaxf(ti, s->max_ti);
+    s->sum_si += si;
+    s->sum_ti += ti;
+    s->min_si  = s->nb_frames == 1 ? si : fminf(si, s->min_si);
+    s->min_ti  = s->nb_frames == 1 ? ti : fminf(ti, s->min_ti);
+
+    // Set si ti information in frame metadata
+    set_meta(&frame->metadata, "lavfi.siti.si", si);
+    set_meta(&frame->metadata, "lavfi.siti.ti", ti);
+
+    return ff_filter_frame(inlink->dst->outputs[0], frame);
+}
+
+#define OFFSET(x) offsetof(SiTiContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption siti_options[] = {
+    { "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(siti);
+
+static const AVFilterPad avfilter_vf_siti_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+};
+
+static const AVFilterPad avfilter_vf_siti_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO
+    },
+};
+
+AVFilter ff_vf_siti = {
+    .name          = "siti",
+    .description   = NULL_IF_CONFIG_SMALL("Calculate spatial information (SI) and temporal information (TI)."),
+    .priv_size     = sizeof(SiTiContext),
+    .priv_class    = &siti_class,
+    .init          = init,
+    .uninit        = uninit,
+    .flags         = AVFILTER_FLAG_METADATA_ONLY,
+    FILTER_PIXFMTS_ARRAY(pix_fmts),
+    FILTER_INPUTS(avfilter_vf_siti_inputs),
+    FILTER_OUTPUTS(avfilter_vf_siti_outputs),
+};
diff --git a/tests/fate-run.sh b/tests/fate-run.sh
index fbfc0a925d..a3be44d0eb 100755
--- a/tests/fate-run.sh
+++ b/tests/fate-run.sh
@@ -382,6 +382,15 @@ refcmp_metadata(){
         -f null /dev/null | awk -v ref=${ref} -v fuzz=${fuzz} -f ${base}/refcmp-metadata.awk -
 }
 
+cmp_metadata(){
+    refcmp=$1
+    pixfmt=$2
+    fuzz=${3:-0.001}
+    ffmpeg $FLAGS $ENC_OPTS \
+        -lavfi "testsrc2=size=300x200:rate=1:duration=5,format=${pixfmt},${refcmp},metadata=print:file=-" \
+        -f null /dev/null | awk -v ref=${ref} -v fuzz=${fuzz} -f ${base}/refcmp-metadata.awk -
+}
+
 pixfmt_conversion(){
     conversion="${test#pixfmt-}"
     outdir="tests/data/pixfmt"
diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
index 510bb3ffbc..ae021a74a4 100644
--- a/tests/fate/filter-video.mak
+++ b/tests/fate/filter-video.mak
@@ -862,6 +862,9 @@ fate-filter-refcmp-ssim-rgb: CMD = refcmp_metadata ssim rgb24 0.015
 FATE_FILTER-$(call ALLYES, $(REFCMP_DEPS) SSIM_FILTER) += fate-filter-refcmp-ssim-yuv
 fate-filter-refcmp-ssim-yuv: CMD = refcmp_metadata ssim yuv422p 0.015
 
+FATE_FILTER-$(call ALLYES, $(REFCMP_DEPS) SITI_FILTER) += fate-filter-refcmp-siti-yuv
+fate-filter-refcmp-siti-yuv: CMD = cmp_metadata siti yuv420p 0.015
+
 FATE_SAMPLES_FFPROBE += $(FATE_METADATA_FILTER-yes)
 FATE_SAMPLES_FFMPEG += $(FATE_FILTER_SAMPLES-yes)
 FATE_FFMPEG += $(FATE_FILTER-yes)
diff --git a/tests/ref/fate/filter-refcmp-siti-yuv b/tests/ref/fate/filter-refcmp-siti-yuv
new file mode 100644
index 0000000000..5d99972d95
--- /dev/null
+++ b/tests/ref/fate/filter-refcmp-siti-yuv
@@ -0,0 +1,15 @@
+frame:0    pts:0       pts_time:0
+lavfi.siti.si=106.72
+lavfi.siti.ti=0.00
+frame:1    pts:1       pts_time:1
+lavfi.siti.si=109.40
+lavfi.siti.ti=28.00
+frame:2    pts:2       pts_time:2
+lavfi.siti.si=109.29
+lavfi.siti.ti=28.38
+frame:3    pts:3       pts_time:3
+lavfi.siti.si=113.27
+lavfi.siti.ti=33.42
+frame:4    pts:4       pts_time:4
+lavfi.siti.si=110.87
+lavfi.siti.ti=30.53

Thilo Borgmann March 28, 2022, 11:01 a.m. UTC | #37

Am 22.03.22 um 09:36 schrieb Thilo Borgmann:
> Am 18.03.22 um 15:04 schrieb Paul B Mahol:
>> On 3/18/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>
>>>
>>> On 12 Mar 2022, at 10:06, Thilo Borgmann wrote:
>>>
>>>> Am 09.03.22 um 18:31 schrieb Paul B Mahol:
>>>>> On 3/8/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>>>> Am 07.03.22 um 20:06 schrieb Paul B Mahol:
>>>>>>> On 3/7/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>>>>>> Am 06.03.22 um 22:25 schrieb Paul B Mahol:
>>>>>>>>> On 3/6/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>>>>>>>> Am 22.02.22 um 12:30 schrieb Thilo Borgmann:
>>>>>>>>>>> Am 18.02.22 um 17:08 schrieb Paul B Mahol:
>>>>>>>>>>>> On Sat, Feb 12, 2022 at 11:55 AM Thilo Borgmann
>>>>>>>>>>>> <thilo.borgmann@mail.de>
>>>>>>>>>>>> wrote:
>>>>>>>>>>>>
>>>>>>>>>>>>> Am 31.01.22 um 12:55 schrieb James Almer:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> On 1/31/2022 8:53 AM, Anton Khirnov wrote:
>>>>>>>>>>>>>>> Quoting Thilo Borgmann (2022-01-18 14:58:07)
>>>>>>>>>>>>>>>>>> Violations of code style.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Enhanced.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Not enough. There are still many remaining, e.g.
>>>>>>>>>>>>>>> * opening brace of a function definition should be on its own
>>>>>>>>>>>>>>> line
>>>>>>>>>>>>>>> * the context should generally be the first argument
>>>>>>>>>>>>>>> * unsigned char* should be uint8_t*
>>>>>>>>>>>>>>> * mixed declarations and code (the compiler should warn about
>>>>>>>>>>>>>>> that)
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> I think someone said that clang (or some versions) is
>>>>>>>>>>>>>> apparently
>>>>>>>>>>>>>> not
>>>>>>>>>>>>> warning about this, hence why so many of these end up being
>>>>>>>>>>>>> missed
>>>>>>>>>>>>> in
>>>>>>>>>>>>> reviews or even by the patch author.
>>>>>>>>>>>>>
>>>>>>>>>>>>> This and all of Anton's comments in v3. Also removed some more
>>>>>>>>>>>>> obviously
>>>>>>>>>>>>> useless doubles.
>>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Why it uses doubles in so many places?
>>>>>>>>>>>> Is there any real benefit in that, except extra slowdown?
>>>>>>>>>>>
>>>>>>>>>>> I guess because it's originating in some c&p Matlab code.
>>>>>>>>>>> I did %s#double#float#g for v4, loosing some precision we can
>>>>>>>>>>> ignore
>>>>>>>>>>> IMHO.
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> v3:
>>>>>>>>>>>
>>>>>>>>>>> Total frames: 2
>>>>>>>>>>>
>>>>>>>>>>> Spatial Information:
>>>>>>>>>>> Average: 165.451985
>>>>>>>>>>> Max: 165.817542
>>>>>>>>>>> Min: 165.086427
>>>>>>>>>>>
>>>>>>>>>>> Temporal Information:
>>>>>>>>>>> Average: 1.007263
>>>>>>>>>>> Max: 2.014525
>>>>>>>>>>> Min: 0.000000
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> v4:
>>>>>>>>>>>
>>>>>>>>>>> Total frames: 2
>>>>>>>>>>>
>>>>>>>>>>> Spatial Information:
>>>>>>>>>>> Average: 164.385895
>>>>>>>>>>> Max: 164.742325
>>>>>>>>>>> Min: 164.029480
>>>>>>>>>>>
>>>>>>>>>>> Temporal Information:
>>>>>>>>>>> Average: 1.007241
>>>>>>>>>>> Max: 2.014483
>>>>>>>>>>> Min: 0.000000
>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Ping.
>>>>>>>>>
>>>>>>>>> Into wrong section of changelog added entry.
>>>>>>>>>
>>>>>>>>> Useless cast of allocation results.
>>>>>>>>>
>>>>>>>>> Does filter changes pixels? If not, add metadata flag to appropriate
>>>>>>>>> place.
>>>>>>>>
>>>>>>>> All addressed in v5, thx!
>>>>>>>>
>>>>>>>
>>>>>>> Changelog entry is still in wrong, 5.0, section.
>>>>>>
>>>>>> Fixed in v6.
>>>>>>
>>>>>>>> Also added a FATE test for it.
>>>>>>
>>>>>>
>>>>>
>>>>> Could use fminf/ float functions instead of double variants.
>>>>
>>>> v7.
>>>
>>> Going to push soon if there are no more comments.
>>
>> Check that returned values are correct for bigger w/h, and that not
>> values reach too high values for floats
>> which may cause loss of precision in best case, eg. maybe you need to
>> normalize pixel values from 0-255 to 0.f-1.f so mean/stddev  does not
>> get bad results.
> 
> Did the accumulators as doubles then, good?
> 
> Also found another missing fmaxf(). V8 attached.

Ping.

-Thilo

Thilo Borgmann April 1, 2022, 8:58 a.m. UTC | #38

Am 28.03.22 um 13:01 schrieb Thilo Borgmann:
> Am 22.03.22 um 09:36 schrieb Thilo Borgmann:
>> Am 18.03.22 um 15:04 schrieb Paul B Mahol:
>>> On 3/18/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>>
>>>>
>>>> On 12 Mar 2022, at 10:06, Thilo Borgmann wrote:
>>>>
>>>>> Am 09.03.22 um 18:31 schrieb Paul B Mahol:
>>>>>> On 3/8/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>>>>> Am 07.03.22 um 20:06 schrieb Paul B Mahol:
>>>>>>>> On 3/7/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>>>>>>> Am 06.03.22 um 22:25 schrieb Paul B Mahol:
>>>>>>>>>> On 3/6/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>>>>>>>>> Am 22.02.22 um 12:30 schrieb Thilo Borgmann:
>>>>>>>>>>>> Am 18.02.22 um 17:08 schrieb Paul B Mahol:
>>>>>>>>>>>>> On Sat, Feb 12, 2022 at 11:55 AM Thilo Borgmann
>>>>>>>>>>>>> <thilo.borgmann@mail.de>
>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>
>>>>>>>>>>>>>> Am 31.01.22 um 12:55 schrieb James Almer:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> On 1/31/2022 8:53 AM, Anton Khirnov wrote:
>>>>>>>>>>>>>>>> Quoting Thilo Borgmann (2022-01-18 14:58:07)
>>>>>>>>>>>>>>>>>>> Violations of code style.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Enhanced.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Not enough. There are still many remaining, e.g.
>>>>>>>>>>>>>>>> * opening brace of a function definition should be on its own
>>>>>>>>>>>>>>>> line
>>>>>>>>>>>>>>>> * the context should generally be the first argument
>>>>>>>>>>>>>>>> * unsigned char* should be uint8_t*
>>>>>>>>>>>>>>>> * mixed declarations and code (the compiler should warn about
>>>>>>>>>>>>>>>> that)
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> I think someone said that clang (or some versions) is
>>>>>>>>>>>>>>> apparently
>>>>>>>>>>>>>>> not
>>>>>>>>>>>>>> warning about this, hence why so many of these end up being
>>>>>>>>>>>>>> missed
>>>>>>>>>>>>>> in
>>>>>>>>>>>>>> reviews or even by the patch author.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> This and all of Anton's comments in v3. Also removed some more
>>>>>>>>>>>>>> obviously
>>>>>>>>>>>>>> useless doubles.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Why it uses doubles in so many places?
>>>>>>>>>>>>> Is there any real benefit in that, except extra slowdown?
>>>>>>>>>>>>
>>>>>>>>>>>> I guess because it's originating in some c&p Matlab code.
>>>>>>>>>>>> I did %s#double#float#g for v4, loosing some precision we can
>>>>>>>>>>>> ignore
>>>>>>>>>>>> IMHO.
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> v3:
>>>>>>>>>>>>
>>>>>>>>>>>> Total frames: 2
>>>>>>>>>>>>
>>>>>>>>>>>> Spatial Information:
>>>>>>>>>>>> Average: 165.451985
>>>>>>>>>>>> Max: 165.817542
>>>>>>>>>>>> Min: 165.086427
>>>>>>>>>>>>
>>>>>>>>>>>> Temporal Information:
>>>>>>>>>>>> Average: 1.007263
>>>>>>>>>>>> Max: 2.014525
>>>>>>>>>>>> Min: 0.000000
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> v4:
>>>>>>>>>>>>
>>>>>>>>>>>> Total frames: 2
>>>>>>>>>>>>
>>>>>>>>>>>> Spatial Information:
>>>>>>>>>>>> Average: 164.385895
>>>>>>>>>>>> Max: 164.742325
>>>>>>>>>>>> Min: 164.029480
>>>>>>>>>>>>
>>>>>>>>>>>> Temporal Information:
>>>>>>>>>>>> Average: 1.007241
>>>>>>>>>>>> Max: 2.014483
>>>>>>>>>>>> Min: 0.000000
>>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Ping.
>>>>>>>>>>
>>>>>>>>>> Into wrong section of changelog added entry.
>>>>>>>>>>
>>>>>>>>>> Useless cast of allocation results.
>>>>>>>>>>
>>>>>>>>>> Does filter changes pixels? If not, add metadata flag to appropriate
>>>>>>>>>> place.
>>>>>>>>>
>>>>>>>>> All addressed in v5, thx!
>>>>>>>>>
>>>>>>>>
>>>>>>>> Changelog entry is still in wrong, 5.0, section.
>>>>>>>
>>>>>>> Fixed in v6.
>>>>>>>
>>>>>>>>> Also added a FATE test for it.
>>>>>>>
>>>>>>>
>>>>>>
>>>>>> Could use fminf/ float functions instead of double variants.
>>>>>
>>>>> v7.
>>>>
>>>> Going to push soon if there are no more comments.
>>>
>>> Check that returned values are correct for bigger w/h, and that not
>>> values reach too high values for floats
>>> which may cause loss of precision in best case, eg. maybe you need to
>>> normalize pixel values from 0-255 to 0.f-1.f so mean/stddev  does not
>>> get bad results.
>>
>> Did the accumulators as doubles then, good?
>>
>> Also found another missing fmaxf(). V8 attached.
> 
> Ping.

Will apply soon.

-Thilo

Thilo Borgmann April 1, 2022, 6:54 p.m. UTC | #39

Am 01.04.22 um 10:58 schrieb Thilo Borgmann:
> Am 28.03.22 um 13:01 schrieb Thilo Borgmann:
>> Am 22.03.22 um 09:36 schrieb Thilo Borgmann:
>>> Am 18.03.22 um 15:04 schrieb Paul B Mahol:
>>>> On 3/18/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>>>
>>>>>
>>>>> On 12 Mar 2022, at 10:06, Thilo Borgmann wrote:
>>>>>
>>>>>> Am 09.03.22 um 18:31 schrieb Paul B Mahol:
>>>>>>> On 3/8/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>>>>>> Am 07.03.22 um 20:06 schrieb Paul B Mahol:
>>>>>>>>> On 3/7/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>>>>>>>> Am 06.03.22 um 22:25 schrieb Paul B Mahol:
>>>>>>>>>>> On 3/6/22, Thilo Borgmann <thilo.borgmann@mail.de> wrote:
>>>>>>>>>>>> Am 22.02.22 um 12:30 schrieb Thilo Borgmann:
>>>>>>>>>>>>> Am 18.02.22 um 17:08 schrieb Paul B Mahol:
>>>>>>>>>>>>>> On Sat, Feb 12, 2022 at 11:55 AM Thilo Borgmann
>>>>>>>>>>>>>> <thilo.borgmann@mail.de>
>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Am 31.01.22 um 12:55 schrieb James Almer:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> On 1/31/2022 8:53 AM, Anton Khirnov wrote:
>>>>>>>>>>>>>>>>> Quoting Thilo Borgmann (2022-01-18 14:58:07)
>>>>>>>>>>>>>>>>>>>> Violations of code style.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Enhanced.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Not enough. There are still many remaining, e.g.
>>>>>>>>>>>>>>>>> * opening brace of a function definition should be on its own
>>>>>>>>>>>>>>>>> line
>>>>>>>>>>>>>>>>> * the context should generally be the first argument
>>>>>>>>>>>>>>>>> * unsigned char* should be uint8_t*
>>>>>>>>>>>>>>>>> * mixed declarations and code (the compiler should warn about
>>>>>>>>>>>>>>>>> that)
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> I think someone said that clang (or some versions) is
>>>>>>>>>>>>>>>> apparently
>>>>>>>>>>>>>>>> not
>>>>>>>>>>>>>>> warning about this, hence why so many of these end up being
>>>>>>>>>>>>>>> missed
>>>>>>>>>>>>>>> in
>>>>>>>>>>>>>>> reviews or even by the patch author.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> This and all of Anton's comments in v3. Also removed some more
>>>>>>>>>>>>>>> obviously
>>>>>>>>>>>>>>> useless doubles.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Why it uses doubles in so many places?
>>>>>>>>>>>>>> Is there any real benefit in that, except extra slowdown?
>>>>>>>>>>>>>
>>>>>>>>>>>>> I guess because it's originating in some c&p Matlab code.
>>>>>>>>>>>>> I did %s#double#float#g for v4, loosing some precision we can
>>>>>>>>>>>>> ignore
>>>>>>>>>>>>> IMHO.
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> v3:
>>>>>>>>>>>>>
>>>>>>>>>>>>> Total frames: 2
>>>>>>>>>>>>>
>>>>>>>>>>>>> Spatial Information:
>>>>>>>>>>>>> Average: 165.451985
>>>>>>>>>>>>> Max: 165.817542
>>>>>>>>>>>>> Min: 165.086427
>>>>>>>>>>>>>
>>>>>>>>>>>>> Temporal Information:
>>>>>>>>>>>>> Average: 1.007263
>>>>>>>>>>>>> Max: 2.014525
>>>>>>>>>>>>> Min: 0.000000
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> v4:
>>>>>>>>>>>>>
>>>>>>>>>>>>> Total frames: 2
>>>>>>>>>>>>>
>>>>>>>>>>>>> Spatial Information:
>>>>>>>>>>>>> Average: 164.385895
>>>>>>>>>>>>> Max: 164.742325
>>>>>>>>>>>>> Min: 164.029480
>>>>>>>>>>>>>
>>>>>>>>>>>>> Temporal Information:
>>>>>>>>>>>>> Average: 1.007241
>>>>>>>>>>>>> Max: 2.014483
>>>>>>>>>>>>> Min: 0.000000
>>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Ping.
>>>>>>>>>>>
>>>>>>>>>>> Into wrong section of changelog added entry.
>>>>>>>>>>>
>>>>>>>>>>> Useless cast of allocation results.
>>>>>>>>>>>
>>>>>>>>>>> Does filter changes pixels? If not, add metadata flag to appropriate
>>>>>>>>>>> place.
>>>>>>>>>>
>>>>>>>>>> All addressed in v5, thx!
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Changelog entry is still in wrong, 5.0, section.
>>>>>>>>
>>>>>>>> Fixed in v6.
>>>>>>>>
>>>>>>>>>> Also added a FATE test for it.
>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>> Could use fminf/ float functions instead of double variants.
>>>>>>
>>>>>> v7.
>>>>>
>>>>> Going to push soon if there are no more comments.
>>>>
>>>> Check that returned values are correct for bigger w/h, and that not
>>>> values reach too high values for floats
>>>> which may cause loss of precision in best case, eg. maybe you need to
>>>> normalize pixel values from 0-255 to 0.f-1.f so mean/stddev  does not
>>>> get bad results.
>>>
>>> Did the accumulators as doubles then, good?
>>>
>>> Also found another missing fmaxf(). V8 attached.
>>
>> Ping.
> 
> Will apply soon.

Applied, thanks!

-Thilo

diff mbox series

Patch

diff --git a/Changelog b/Changelog
index dcb80e0ed9..9ca79a12aa 100644
--- a/Changelog
+++ b/Changelog
@@ -55,6 +55,7 @@  version <next>:
 - asuperpass and asuperstop filter
 - shufflepixels filter
 - tmidequalizer filter
+- siti filter


 version 4.3:
diff --git a/doc/filters.texi b/doc/filters.texi
index 813e35c2f9..eb1f23128c 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -18158,6 +18158,31 @@  ffmpeg -i input1.mkv -i input2.mkv -filter_complex "[0:v][1:v] signature=nb_inpu

 @end itemize

+@anchor{siti}
+@section siti
+
+Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
+in ITU-T P.910: Subjective video quality assessment methods for multimedia
+applications. Available PDF at @url{https://www.itu.int/rec/T-REC-P.910-199909-S/en }.
+Per frame metrics can be written into a file in csv format.
+
+It accepts the following option:
+
+@table @option
+@item stats_file
+Set the path to the file where per frame SI and TI metrics will be written. If no file
+is specified, only summary statistics will be printed to the console.
+@end table
+
+@subsection Examples
+@itemize
+@item
+To calculate SI/TI metrics and store per frame data to stats.csv:
+@example
+ffmpeg -i input.mp4 -vf siti=stats_file='siti.csv' -f null -
+@end example
+@end itemize
+
 @anchor{smartblur}
 @section smartblur

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index ad1046d526..5514536463 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -413,6 +413,7 @@  OBJS-$(CONFIG_SMARTBLUR_FILTER)              += vf_smartblur.o
 OBJS-$(CONFIG_SOBEL_FILTER)                  += vf_convolution.o
 OBJS-$(CONFIG_SOBEL_OPENCL_FILTER)           += vf_convolution_opencl.o opencl.o \
                                                 opencl/convolution.o
+OBJS-$(CONFIG_SITI_FILTER)                   += vf_siti.o
 OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o
 OBJS-$(CONFIG_SPP_FILTER)                    += vf_spp.o qp_table.o
 OBJS-$(CONFIG_SR_FILTER)                     += vf_sr.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index ce317dfa1c..0aef9fdcef 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -393,6 +393,7 @@  extern AVFilter ff_vf_signature;
 extern AVFilter ff_vf_smartblur;
 extern AVFilter ff_vf_sobel;
 extern AVFilter ff_vf_sobel_opencl;
+extern AVFilter ff_vf_siti;
 extern AVFilter ff_vf_split;
 extern AVFilter ff_vf_spp;
 extern AVFilter ff_vf_sr;
diff --git a/libavfilter/version.h b/libavfilter/version.h
index fbe2ed62b2..2136235e54 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,7 +30,7 @@ 
 #include "libavutil/version.h"

 #define LIBAVFILTER_VERSION_MAJOR   7
-#define LIBAVFILTER_VERSION_MINOR  95
+#define LIBAVFILTER_VERSION_MINOR  96
 #define LIBAVFILTER_VERSION_MICRO 100


diff --git a/libavfilter/vf_siti.c b/libavfilter/vf_siti.c
new file mode 100644
index 0000000000..e8b6142ae7
--- /dev/null
+++ b/libavfilter/vf_siti.c
@@ -0,0 +1,359 @@ 
+/*
+ * Copyright (c) 2002 A'rpi
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/**
+ * @file
+ * Calculate Spatial Info (SI) and Temporal Info (TI) scores
+ */
+
+#include <math.h>
+
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+static const int X_FILTER[9] = {
+    1, 0, -1,
+    2, 0, -2,
+    1, 0, -1
+};
+
+static const int Y_FILTER[9] = {
+    1, 2, 1,
+    0, 0, 0,
+    -1, -2, -1
+};
+
+typedef struct SiTiContext {
+    const AVClass *class;
+    int pixel_depth;
+    int width, height;
+    int nb_frames;
+    unsigned char *prev_frame;
+    double max_si;
+    double max_ti;
+    double min_si;
+    double min_ti;
+    double sum_si;
+    double sum_ti;
+    FILE *stats_file;
+    char *stats_file_str;
+    int full_range;
+} SiTiContext;
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10,
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    // User options but no input data
+    SiTiContext *s = ctx->priv;
+    s->max_si = 0;
+    if (s->stats_file_str) {
+        s->stats_file = fopen(s->stats_file_str, "w");
+        if (!s->stats_file) {
+            int err = AVERROR(errno);
+            char buf[128];
+            av_strerror(err, buf, sizeof(buf));
+            av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
+                    s->stats_file_str, buf);
+            return err;
+        }
+        fprintf(s->stats_file, "Frame,SI,TI\n");
+    }
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    SiTiContext *s = ctx->priv;
+
+    double avg_si = s->sum_si / s->nb_frames;
+    double avg_ti = s->sum_ti / s->nb_frames;
+    av_log(ctx, AV_LOG_INFO,
+        "Summary:\nTotal frames: %d\n\n"
+        "Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n"
+        "Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n",
+        s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti
+    );
+
+    if (s->stats_file && s->stats_file != stdout)
+        fclose(s->stats_file);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    // Video input data avilable
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    int max_pixsteps[4];
+
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
+
+    s->pixel_depth = max_pixsteps[0];
+    s->width = inlink->w;
+    s->height = inlink->h;
+    size_t pixel_sz = s->pixel_depth==1? (size_t) sizeof(uint8_t) : (size_t) sizeof(uint16_t);
+    size_t data_sz = (size_t) s->width * pixel_sz * s->height;
+    s->prev_frame = av_malloc(data_sz);
+
+    return 0;
+}
+
+// Get frame data handling 8 and 10 bit formats
+static uint16_t get_frame_data(const unsigned char* src, int pixel_depth, int index)
+{
+    const uint16_t *src16 = (const uint16_t *)src;
+    if (pixel_depth == 2)
+    {
+        return src16[index];
+    }
+    return (uint16_t) src[index];
+}
+
+// Set frame data handling 8 and 10 bit formats
+static void set_frame_data(unsigned char* dst, int pixel_depth, int index, uint16_t data)
+{
+    uint16_t *dst16 = (uint16_t *)dst;
+    if (pixel_depth == 2)
+    {
+        dst16[index] = data;
+    }
+    else
+    {
+        dst[index] = (uint8_t) data;
+    }
+}
+
+// Determine whether the video is in full or limited range. If not defined, assume limited.
+static int is_full_range(AVFrame* frame)
+{
+    if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
+    {
+        // If color range not specified, fallback to pixel format
+        return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
+    }
+    return frame->color_range == AVCOL_RANGE_JPEG;
+}
+
+// Check frame's color range and convert to full range if needed
+static uint16_t convert_full_range(uint16_t y, SiTiContext *s)
+{
+    if (s->full_range == 1)
+    {
+        return y;
+    }
+
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    double factor = s->pixel_depth == 1? 1 : 4;
+    double shift = 16 * factor;
+    double limit_upper = 235 * factor - shift;
+    double full_upper = 256 * factor - 1;
+    double limit_y = fmin(fmax(y - shift, 0), limit_upper);
+    return (uint16_t) (full_upper * limit_y / limit_upper);
+}
+
+// Applies sobel convolution
+static void convolve_sobel(const unsigned char* src, double* dst, int linesize, SiTiContext *s)
+{
+    int filter_width = 3;
+    int filter_size = filter_width * filter_width;
+    for (int j=1; j<s->height-1; j++)
+    {
+        for (int i=1; i<s->width-1; i++)
+        {
+            double x_conv_sum = 0, y_conv_sum = 0;
+            for (int k=0; k<filter_size; k++)
+            {
+                int ki = k % filter_width - 1;
+                int kj = floor(k / filter_width) - 1;
+                int index = (j + kj) * (linesize / s->pixel_depth) + (i + ki);
+                uint16_t data = convert_full_range(get_frame_data(src, s->pixel_depth, index), s);
+                x_conv_sum += data * X_FILTER[k];
+                y_conv_sum += data * Y_FILTER[k];
+            }
+            double gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum);
+            // Dst matrix is smaller than src since we ignore edges that can't be convolved
+            dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient;
+        }
+    }
+}
+
+// Calculate pixel difference between current and previous frame, and update previous
+static void calculate_motion(const unsigned char* curr, double* motion_matrix,
+                             int linesize, SiTiContext *s)
+{
+    for (int j=0; j<s->height; j++)
+    {
+        for (int i=0; i<s->width; i++)
+        {
+            double motion = 0;
+            int curr_index = j * (linesize / s->pixel_depth) + i;
+            int prev_index = j * s->width + i;
+            uint16_t curr_data = convert_full_range(get_frame_data(curr, s->pixel_depth, curr_index), s);
+
+            if (s->nb_frames > 1)
+            {
+                // Previous frame is already converted to full range
+                motion = curr_data - get_frame_data(s->prev_frame, s->pixel_depth, prev_index);
+            }
+            set_frame_data(s->prev_frame, s->pixel_depth, prev_index, curr_data);
+            motion_matrix[j * s->width + i] = motion;
+        }
+    }
+}
+
+static double std_deviation(double* img_metrics, int width, int height)
+{
+    double size = height * width;
+
+    double mean_sum = 0;
+    for (int j=0; j<height; j++)
+    {
+        for (int i=0; i<width; i++)
+        {
+            mean_sum += img_metrics[j * width + i];
+        }
+    }
+    double mean = mean_sum / size;
+
+    double sqr_diff_sum = 0;
+    for (int j=0; j<height; j++)
+    {
+        for (int i=0; i<width; i++)
+        {
+            double mean_diff = img_metrics[j * width + i] - mean;
+            sqr_diff_sum += (mean_diff * mean_diff);
+        }
+    }
+    double variance = sqr_diff_sum / size;
+    return sqrt(variance);
+}
+
+static void set_meta(AVDictionary **metadata, const char *key, float d)
+{
+    char value[128];
+    snprintf(value, sizeof(value), "%0.2f", d);
+    av_dict_set(metadata, key, value, 0);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+
+    // Gradient matrix will not include the input frame's edges
+    size_t gradient_data_sz = (size_t) (s->width - 2) * sizeof(double) * (s->height - 2);
+    double *gradient_matrix = av_malloc(gradient_data_sz);
+    size_t motion_data_sz = (size_t) s->width * sizeof(double) * s->height;
+    double *motion_matrix = av_malloc(motion_data_sz);
+    if (!gradient_matrix || !motion_matrix) {
+        av_frame_free(&frame);
+        return AVERROR(ENOMEM);
+    }
+
+    s->full_range = is_full_range(frame);
+    s->nb_frames++;
+
+    // Calculate si and ti
+    convolve_sobel(frame->data[0], gradient_matrix, frame->linesize[0], s);
+    calculate_motion(frame->data[0], motion_matrix, frame->linesize[0], s);
+    double si = std_deviation(gradient_matrix, s->width - 2, s->height - 2);
+    double ti = std_deviation(motion_matrix, s->width, s->height);
+
+    // Calculate statistics
+    s->max_si = fmax(si, s->max_si);
+    s->max_ti = fmax(ti, s->max_ti);
+    s->sum_si += si;
+    s->sum_ti += ti;
+    s->min_si = s->nb_frames == 1? si : fmin(si, s->min_si);
+    s->min_ti = s->nb_frames == 1? ti : fmin(ti, s->min_ti);
+
+    // Set si ti information in frame metadata
+    set_meta(&frame->metadata, "lavfi.siti.si", si);
+    set_meta(&frame->metadata, "lavfi.siti.ti", ti);
+
+    // Print per frame csv data to file
+    if (s->stats_file)
+    {
+        fprintf(s->stats_file, "%d,%f,%f\n", s->nb_frames, si, ti);
+    }
+
+    av_free(gradient_matrix);
+    return ff_filter_frame(inlink->dst->outputs[0], frame);
+}
+
+#define OFFSET(x) offsetof(SiTiContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption siti_options[] = {
+    {"stats_file", "Set file where to store per-frame si-ti scores", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(siti);
+
+static const AVFilterPad avfilter_vf_siti_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avfilter_vf_siti_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_siti = {
+    .name          = "siti",
+    .description   = NULL_IF_CONFIG_SMALL("Calculate spatial info (SI)."),
+    .priv_size     = sizeof(SiTiContext),
+    .priv_class    = &siti_class,
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = avfilter_vf_siti_inputs,
+    .outputs       = avfilter_vf_siti_outputs,
+};