diff mbox series

[FFmpeg-devel] GSoC Project Submission: SiTi Filter Update for ITU-T P.910 Compliance and HDR Support

Message ID CAJdhrgngs4Q1seTYSayjKfpin6UUEL=NRTkOUtcujsfNWKWezQ@mail.gmail.com
State New
Headers show
Series [FFmpeg-devel] GSoC Project Submission: SiTi Filter Update for ITU-T P.910 Compliance and HDR Support | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 fail Make fate failed
andriy/make_x86 success Make finished
andriy/make_fate_x86 fail Make fate failed

Commit Message

Poorva Aug. 21, 2024, 7:56 a.m. UTC
Hello FFmpeg Team,

I am submitting the completed work from my Google Summer of Code project,
which focuses on updating the SiTi filter for ITU-T P.910 07/22 compliance
and adding HDR support.

Attached is the patch for your review. I appreciate your feedback and
suggestions.

Comments

Ramiro Polla Aug. 22, 2024, 12:46 p.m. UTC | #1
On 2024-08-21 09:56, Poorva wrote:
> Hello FFmpeg Team,
> 
> I am submitting the completed work from my Google Summer of Code project,
> which focuses on updating the SiTi filter for ITU-T P.910 07/22 compliance
> and adding HDR support.
> 
> Attached is the patch for your review. I appreciate your feedback and
> suggestions.

> diff --git a/libavfilter/vf_siti.c b/libavfilter/vf_siti.c
> index 722e7cecc7..2bbcaa0afd 100644
> --- a/libavfilter/vf_siti.c
> +++ b/libavfilter/vf_siti.c
> @@ -47,6 +47,19 @@ static const int Y_FILTER[9] = {
>      -1, -2, -1
>  };
>  
> +static const float PU21_MATRICES[4][7] = {

In general, variable names are lowercase. Names with all caps will be 
used for #defines. I know the variables above are all caps, but we 
usually avoid that.

> +    // Reference: "PU21: A novel perceptually uniform encoding for adapting existing quality metrics for HDR"
> +    // Rafał K. Mantiuk and M. Azimi, Picture Coding Symposium 2021
> +    // https://github.com/gfxdisp/pu21
> +    {1.070275272f, 0.4088273932f, 0.153224308f, 0.2520326168f, 1.063512885f, 1.14115047f, 521.4527484f},  // BANDING
> +    {0.353487901f, 0.3734658629f, 8.277049286e-05f, 0.9062562627f, 0.09150303166f, 0.9099517204f, 596.3148142f},  // BANDING_GLARE
> +    {1.043882782f, 0.6459495343f, 0.3194584211f, 0.374025247f, 1.114783422f, 1.095360363f, 384.9217577f},  // PEAKS
> +    {816.885024f, 1479.463946f, 0.001253215609f, 0.9329636822f, 0.06746643971f, 1.573435413f, 419.6006374f}  // PEAKS_GLARE
> +};
> +
> +static const float PU21_MIN_VALUES[4] = {-1.5580e-07f, 5.4705e-10f, 1.3674e-07f, -9.7360e-08f};
> +static const float PU21_MAX_VALUES[4] = {520.4673f, 595.3939f, 380.9853f, 407.5066f};
> +
>  typedef struct SiTiContext {
>      const AVClass *class;
>      int pixel_depth;
> @@ -63,8 +76,45 @@ typedef struct SiTiContext {
>      float *motion_matrix;
>      int full_range;
>      int print_summary;
> +    int hdr_mode;
> +    int bit_depth;
> +    int color_range;
> +    int eotf_function;
> +    int calculation_domain;
> +    float l_max;
> +    float l_min;
> +    float gamma;
> +    int pu21_mode;

This following fields are not being used: bit_depth, color_range, l_min, 
and pu21_mode. Perhaps you meant to use them (they seem to make sense in 
a couple of places in the rest of the patch). If they're not used, they 
should be removed (from the context and from the options).

>  } SiTiContext;
>  
> +enum HdrMode {
> +    SDR,
> +    HDR10,
> +    HLG
> +};
> +
> +enum ColorRange {
> +    LIMITED,
> +    FULL
> +};

We already have AVColorRange. Perhaps that could be used instead.

> +
> +enum EotfFunction {
> +    BT1886,
> +    INV_SRGB
> +};
> +
> +enum CalculationDomain {
> +    PQ,
> +    PU21
> +};
> +
> +enum Pu21Mode {
> +    BANDING,
> +    BANDING_GLARE,
> +    PEAKS,
> +    PEAKS_GLARE
> +};
> +
>  static const enum AVPixelFormat pix_fmts[] = {
>      AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
>      AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
> @@ -78,6 +128,15 @@ static av_cold int init(AVFilterContext *ctx)
>      SiTiContext *s = ctx->priv;
>      s->max_si = 0;
>      s->max_ti = 0;
> +    s->hdr_mode = SDR;
> +    s->bit_depth = 8;
> +    s->color_range = LIMITED;
> +    s->eotf_function = BT1886;
> +    s->calculation_domain = PQ;
> +    s->l_max = 300.0;
> +    s->l_min = 0.1;
> +    s->gamma = 2.4;
> +    s->pu21_mode = BANDING;

Like I mentioned above, some of these fields are not being used (except 
for being set here).

>      return 0;
>  }
>  
> @@ -166,6 +225,105 @@ static uint16_t convert_full_range(int factor, uint16_t y)
>      return (full_upper * limit_y / limit_upper);
>  }
>  
> +// EOTF for BT.1886
> +static float eotf_1886(float x, float gamma, float l_min, float l_max)
> +{
> +    // Reference: ITU-R BT.1886, Annex 1
> +    // https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.1886-0-201103-I!!PDF-E.pdf
> +    const float l_max_gamma = powf(l_max, 1.0f / gamma);
> +    const float l_min_gamma = powf(l_min, 1.0f / gamma);
> +    const float l_diff_gamma = l_max_gamma - l_min_gamma;
> +    const float a = powf(l_diff_gamma, gamma);
> +    const float b = l_min_gamma / l_diff_gamma;
> +    const float adjusted_x = fmaxf(x + b, 0.0f);
> +
> +    return a * powf(adjusted_x, gamma);
> +}
> +
> +static float eotf_inv_srgb(float x)
> +{
> +    // Inverse sRGB EOTF (Electro-Optical Transfer Function) according to IEC 61966-2-1:1999
> +    // Section G.2 (Encoding transformation)
> +    // Reference: https://cdn.standards.iteh.ai/samples/10795/ae461684569b40bbbb2d9a22b1047f05/IEC-61966-2-1-1999-AMD1-2003.pdf
> +    return (x <= 0.04045f) ? x / 12.92f : powf((x + 0.055f) / 1.055f, 2.4f);
> +}
> +
> +static float apply_display_model(SiTiContext *s, float x)
> +{
> +    // Apply either BT.1886 or inverse sRGB EOTF based on the context
> +    if (s->eotf_function == BT1886) {
> +        // BT.1886 EOTF
> +        // Reference: ITU-R BT.1886-0, Annex 1
> +        // https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.1886-0-201103-I!!PDF-E.pdf
> +        return eotf_1886(x, s->gamma, 0.0f, 1.0f);
> +    } else {
> +        // Inverse sRGB EOTF
> +        return eotf_inv_srgb(x);
> +    }
> +}
> +
> +static float oetf_pq(float x)
> +{
> +    // PQ (Perceptual Quantizer) OETF according to ITU-R BT.2100-2
> +    // Reference: https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.2100-2-201807-I!!PDF-E.pdf
> +    // See page 5, Table 4
> +
> +    const float m1 = 0.1593017578125f;
> +    const float m2 = 78.84375f;
> +    const float c1 = 0.8359375f;
> +    const float c2 = 18.8515625f;
> +    const float c3 = 18.6875f;
> +
> +    float y = powf(x / 10000.0f, m1);
> +    return powf((c1 + c2 * y) / (1.0f + c3 * y), m2);
> +}
> +
> +static float oetf_pu21(float x, int mode);

This forward declaration can be avoided if you move oetf_pu21_wrapper 
below oetf_pu21.

> +
> +static float oetf_pu21_wrapper(float x) {

Nit: place curly brackets for function scopes on their own lines.

> +    int mode = 0; // Set the appropriate mode or pass it as a parameter
> +    return oetf_pu21(x, mode);

Why are you hardcoding mode to 0?

> +}
> +
> +static float oetf_pu21(float x, int mode)
> +{
> +    // Reference: "PU21: A novel perceptually uniform encoding for adapting existing quality metrics for HDR"
> +    // Rafał K. Mantiuk and M. Azimi, Picture Coding Symposium 2021
> +    // https://github.com/gfxdisp/pu21
> +    // Validate the mode
> +    const float *p;
> +    float p_min;
> +    float p_max;
> +    float numerator;
> +    float denominator;
> +    float base;
> +    float exponentiated;
> +    float scaled;
> +
> +    if (mode < 0 || mode > 3) {
> +        return x;  // Invalid mode, return input unchanged
> +    }

It would be best to check the mode while the option is being set and 
bail out in the initialization if the mode is invalid, instead of 
silently misbehaving when this function is called.

> +
> +    // Get the parameters and min/max values for the given mode
> +
> +
> +    p = PU21_MATRICES[mode];
> +    p_min = PU21_MIN_VALUES[mode];
> +    p_max = PU21_MAX_VALUES[mode];
> +
> +    // Declare all other variables at the beginning of the block
> +    numerator = p[0] + p[1] * powf(x, p[3]);
> +    denominator = 1.0f + p[2] * powf(x, p[3]);
> +    base = numerator / denominator;
> +
> +    // Calculate the exponentiated and scaled value
> +    exponentiated = powf(base, p[4]);
> +    scaled = p[6] * (exponentiated - p[5]);
> +
> +    // Normalize the result to the range [0, 1]
> +    return (scaled - p_min) / (p_max - p_min);
> +}
> +
>  // Applies sobel convolution
>  static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int linesize)
>  {
> @@ -186,20 +344,20 @@ static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int l
>      #define CONVOLVE(bps)                                           \
>      {                                                               \
>          uint##bps##_t *vsrc = (uint##bps##_t*)src;                  \
> -        for (int j = 1; j < s->height - 1; j++) {                   \
> -            for (int i = 1; i < s->width - 1; i++) {                \
> +        for (int img_j = 1; img_j < s->height - 1; img_j++) {                   \
> +            for (int img_i = 1; img_i < s->width - 1; img_i++) {                \
>                  x_conv_sum = 0.0;                                   \
>                  y_conv_sum = 0.0;                                   \
>                  for (int k = 0; k < filter_size; k++) {             \
>                      ki = k % filter_width - 1;                      \
>                      kj = floor(k / filter_width) - 1;               \
> -                    index = (j + kj) * stride + (i + ki);           \
> +                    index = (img_j + kj) * stride + (img_i + ki);           \
>                      data = s->full_range ? vsrc[index] : convert_full_range(factor, vsrc[index]); \
>                      x_conv_sum += data * X_FILTER[k];               \
>                      y_conv_sum += data * Y_FILTER[k];               \
>                  }                                                   \
>                  gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum); \
> -                dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient; \
> +                dst[(img_j - 1) * (s->width - 2) + (img_i - 1)] = gradient; \
>              }                                                       \
>          }                                                           \
>      }

This chunk just renames some variables, which doesn't seem necessary. It 
also seems unrelated to the main patch. When renaming like this is 
indeed necessary, it's a cosmetic change and it should be in a separate 
patch.

Also, pay attention to keeping the alignment of the backslashes at the 
end of the line when changing code inside macros.

> @@ -254,15 +412,16 @@ static float std_deviation(float *img_metrics, int width, int height)
>      int size = height * width;
>      double mean = 0.0;
>      double sqr_diff = 0;
> +    int j, i;
>  
> -    for (int j = 0; j < height; j++)
> -        for (int i = 0; i < width; i++)
> +    for (j = 0; j < height; j++)
> +        for (i = 0; i < width; i++)
>              mean += img_metrics[j * width + i];
>  
>      mean /= size;
>  
> -    for (int j = 0; j < height; j++) {
> -        for (int i = 0; i < width; i++) {
> +    for (j = 0; j < height; j++) {
> +        for (i = 0; i < width; i++) {
>              float mean_diff = img_metrics[j * width + i] - mean;
>              sqr_diff += (mean_diff * mean_diff);
>          }

Again, this chunk is just cosmetics. If it's unnecessary, it should be 
dropped. If it's unrelated, it should be split into its own patch.

> @@ -285,15 +444,46 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
>      float si;
>      float ti;
>  
> -    s->full_range = is_full_range(frame);
> +    s->full_range = is_full_range(frame); // Determine if full range is needed
>      s->nb_frames++;
>  
> +    // Apply EOTF and OETF transformations
> +    for (int idx = 0; idx < s->width * s->height; idx++) {

Nit++: 'i' is shorter than 'idx' :)

> +        float pixel = ((uint8_t*)frame->data[0])[idx] / 255.0f;
> +        float eotf = apply_display_model(s, pixel);
> +        float oetf = oetf_pq(eotf * s->l_max);
> +        ((uint8_t*)frame->data[0])[idx] = (uint8_t)(oetf * 255.0f);
> +    }
> +
>      // Calculate si and ti
>      convolve_sobel(s, frame->data[0], s->gradient_matrix, frame->linesize[0]);
>      calculate_motion(s, frame->data[0], s->motion_matrix, frame->linesize[0]);
>      si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2);
>      ti = std_deviation(s->motion_matrix, s->width, s->height);
>  
> +    // Apply HDR transformations if necessary
> +    if (s->hdr_mode != SDR) {
> +        float (*oetf_func)(float);
> +        if (s->calculation_domain == PQ) {
> +            oetf_func = oetf_pq;
> +        } else {
> +            oetf_func = oetf_pu21_wrapper;
> +        }
> +
> +        for (int i = 0; i < (s->width - 2) * (s->height - 2); i++) {
> +            s->gradient_matrix[i] = oetf_func(apply_display_model(s, s->gradient_matrix[i] / 255.0f)) * 255.0f;
> +        }
> +        for (int i = 0; i < s->width * s->height; i++) {
> +            s->motion_matrix[i] = oetf_func(apply_display_model(s, s->motion_matrix[i] / 255.0f)) * 255.0f;
> +        }
> +
> +        si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2);
> +        ti = std_deviation(s->motion_matrix, s->width, s->height);
> +    }
> +
> +    // Print SI and TI values for each frame
> +    av_log(ctx, AV_LOG_INFO, "Frame %"PRId64" - SI: %.6f, TI: %.6f\n", s->nb_frames, si, ti);

Is this behaviour expected? To always print the SI and TI values for 
each frame? Perhaps it should be an option (similar to "print_summary").

> +
>      // Calculate statistics
>      s->max_si  = fmaxf(si, s->max_si);
>      s->max_ti  = fmaxf(ti, s->max_ti);
> @@ -314,6 +504,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
>  
>  static const AVOption siti_options[] = {
>      { "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS },
> +    { "hdr_mode", "HDR mode (0: SDR, 1: HDR10, 2: HLG)", OFFSET(hdr_mode), AV_OPT_TYPE_INT, {.i64=0}, 0, 2, FLAGS },
> +    { "bit_depth", "Bit depth (8, 10, or 12)", OFFSET(bit_depth), AV_OPT_TYPE_INT, {.i64=8}, 8, 12, FLAGS },
> +    { "color_range", "Color range (0: limited, 1: full)", OFFSET(color_range), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
> +    { "eotf_function", "EOTF function (0: BT.1886, 1: Inverse sRGB)", OFFSET(eotf_function), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
> +    { "calculation_domain", "Calculation domain (0: PQ, 1: PU21)", OFFSET(calculation_domain), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },

Instead of writing the possible values as numbers in the help string, 
declare a list of constants that are accepted. Look at the "dct" option 
and its AV_OPT_TYPE_CONST entries in libavcodec/options_table.h for an 
example.

The same could be done for the other options you added.

> +    { "l_max", "Maximum luminance", OFFSET(l_max), AV_OPT_TYPE_FLOAT, {.dbl=300.0}, 0, 10000, FLAGS },
> +    { "l_min", "Minimum luminance", OFFSET(l_min), AV_OPT_TYPE_FLOAT, {.dbl=0.1}, 0, 1, FLAGS },
> +    { "gamma", "Gamma value for BT.1886", OFFSET(gamma), AV_OPT_TYPE_FLOAT, {.dbl=2.4}, 1, 3, FLAGS },
> +    { "pu21_mode", "PU21 mode (0: BANDING, 1: BANDING_GLARE, 2: PEAKS, 3: PEAKS_GLARE)", OFFSET(pu21_mode), AV_OPT_TYPE_INT, {.i64=0}, 0, 3, FLAGS },
>      { NULL }
>  };
>
diff mbox series

Patch

From e33fa1928164d986b5b62c02f00fab788a1f2f3b Mon Sep 17 00:00:00 2001
From: PoorvaGaikar <2003gaikarpoorva.com>
Date: Wed, 17 Jul 2024 23:36:56 +0530
Subject: [PATCH] libavfilter/siti: Update SITI filter to align with ITU-T
 P.910 07/22 and add HDR support

---
 libavfilter/vf_siti.c | 217 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 208 insertions(+), 9 deletions(-)

diff --git a/libavfilter/vf_siti.c b/libavfilter/vf_siti.c
index 722e7cecc7..2bbcaa0afd 100644
--- a/libavfilter/vf_siti.c
+++ b/libavfilter/vf_siti.c
@@ -47,6 +47,19 @@  static const int Y_FILTER[9] = {
     -1, -2, -1
 };
 
+static const float PU21_MATRICES[4][7] = {
+    // Reference: "PU21: A novel perceptually uniform encoding for adapting existing quality metrics for HDR"
+    // Rafał K. Mantiuk and M. Azimi, Picture Coding Symposium 2021
+    // https://github.com/gfxdisp/pu21
+    {1.070275272f, 0.4088273932f, 0.153224308f, 0.2520326168f, 1.063512885f, 1.14115047f, 521.4527484f},  // BANDING
+    {0.353487901f, 0.3734658629f, 8.277049286e-05f, 0.9062562627f, 0.09150303166f, 0.9099517204f, 596.3148142f},  // BANDING_GLARE
+    {1.043882782f, 0.6459495343f, 0.3194584211f, 0.374025247f, 1.114783422f, 1.095360363f, 384.9217577f},  // PEAKS
+    {816.885024f, 1479.463946f, 0.001253215609f, 0.9329636822f, 0.06746643971f, 1.573435413f, 419.6006374f}  // PEAKS_GLARE
+};
+
+static const float PU21_MIN_VALUES[4] = {-1.5580e-07f, 5.4705e-10f, 1.3674e-07f, -9.7360e-08f};
+static const float PU21_MAX_VALUES[4] = {520.4673f, 595.3939f, 380.9853f, 407.5066f};
+
 typedef struct SiTiContext {
     const AVClass *class;
     int pixel_depth;
@@ -63,8 +76,45 @@  typedef struct SiTiContext {
     float *motion_matrix;
     int full_range;
     int print_summary;
+    int hdr_mode;
+    int bit_depth;
+    int color_range;
+    int eotf_function;
+    int calculation_domain;
+    float l_max;
+    float l_min;
+    float gamma;
+    int pu21_mode;
 } SiTiContext;
 
+enum HdrMode {
+    SDR,
+    HDR10,
+    HLG
+};
+
+enum ColorRange {
+    LIMITED,
+    FULL
+};
+
+enum EotfFunction {
+    BT1886,
+    INV_SRGB
+};
+
+enum CalculationDomain {
+    PQ,
+    PU21
+};
+
+enum Pu21Mode {
+    BANDING,
+    BANDING_GLARE,
+    PEAKS,
+    PEAKS_GLARE
+};
+
 static const enum AVPixelFormat pix_fmts[] = {
     AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
     AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
@@ -78,6 +128,15 @@  static av_cold int init(AVFilterContext *ctx)
     SiTiContext *s = ctx->priv;
     s->max_si = 0;
     s->max_ti = 0;
+    s->hdr_mode = SDR;
+    s->bit_depth = 8;
+    s->color_range = LIMITED;
+    s->eotf_function = BT1886;
+    s->calculation_domain = PQ;
+    s->l_max = 300.0;
+    s->l_min = 0.1;
+    s->gamma = 2.4;
+    s->pu21_mode = BANDING;
     return 0;
 }
 
@@ -166,6 +225,105 @@  static uint16_t convert_full_range(int factor, uint16_t y)
     return (full_upper * limit_y / limit_upper);
 }
 
+// EOTF for BT.1886
+static float eotf_1886(float x, float gamma, float l_min, float l_max)
+{
+    // Reference: ITU-R BT.1886, Annex 1
+    // https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.1886-0-201103-I!!PDF-E.pdf
+    const float l_max_gamma = powf(l_max, 1.0f / gamma);
+    const float l_min_gamma = powf(l_min, 1.0f / gamma);
+    const float l_diff_gamma = l_max_gamma - l_min_gamma;
+    const float a = powf(l_diff_gamma, gamma);
+    const float b = l_min_gamma / l_diff_gamma;
+    const float adjusted_x = fmaxf(x + b, 0.0f);
+
+    return a * powf(adjusted_x, gamma);
+}
+
+static float eotf_inv_srgb(float x)
+{
+    // Inverse sRGB EOTF (Electro-Optical Transfer Function) according to IEC 61966-2-1:1999
+    // Section G.2 (Encoding transformation)
+    // Reference: https://cdn.standards.iteh.ai/samples/10795/ae461684569b40bbbb2d9a22b1047f05/IEC-61966-2-1-1999-AMD1-2003.pdf
+    return (x <= 0.04045f) ? x / 12.92f : powf((x + 0.055f) / 1.055f, 2.4f);
+}
+
+static float apply_display_model(SiTiContext *s, float x)
+{
+    // Apply either BT.1886 or inverse sRGB EOTF based on the context
+    if (s->eotf_function == BT1886) {
+        // BT.1886 EOTF
+        // Reference: ITU-R BT.1886-0, Annex 1
+        // https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.1886-0-201103-I!!PDF-E.pdf
+        return eotf_1886(x, s->gamma, 0.0f, 1.0f);
+    } else {
+        // Inverse sRGB EOTF
+        return eotf_inv_srgb(x);
+    }
+}
+
+static float oetf_pq(float x)
+{
+    // PQ (Perceptual Quantizer) OETF according to ITU-R BT.2100-2
+    // Reference: https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.2100-2-201807-I!!PDF-E.pdf
+    // See page 5, Table 4
+
+    const float m1 = 0.1593017578125f;
+    const float m2 = 78.84375f;
+    const float c1 = 0.8359375f;
+    const float c2 = 18.8515625f;
+    const float c3 = 18.6875f;
+
+    float y = powf(x / 10000.0f, m1);
+    return powf((c1 + c2 * y) / (1.0f + c3 * y), m2);
+}
+
+static float oetf_pu21(float x, int mode);
+
+static float oetf_pu21_wrapper(float x) {
+    int mode = 0; // Set the appropriate mode or pass it as a parameter
+    return oetf_pu21(x, mode);
+}
+
+static float oetf_pu21(float x, int mode)
+{
+    // Reference: "PU21: A novel perceptually uniform encoding for adapting existing quality metrics for HDR"
+    // Rafał K. Mantiuk and M. Azimi, Picture Coding Symposium 2021
+    // https://github.com/gfxdisp/pu21
+    // Validate the mode
+    const float *p;
+    float p_min;
+    float p_max;
+    float numerator;
+    float denominator;
+    float base;
+    float exponentiated;
+    float scaled;
+
+    if (mode < 0 || mode > 3) {
+        return x;  // Invalid mode, return input unchanged
+    }
+
+    // Get the parameters and min/max values for the given mode
+
+
+    p = PU21_MATRICES[mode];
+    p_min = PU21_MIN_VALUES[mode];
+    p_max = PU21_MAX_VALUES[mode];
+
+    // Declare all other variables at the beginning of the block
+    numerator = p[0] + p[1] * powf(x, p[3]);
+    denominator = 1.0f + p[2] * powf(x, p[3]);
+    base = numerator / denominator;
+
+    // Calculate the exponentiated and scaled value
+    exponentiated = powf(base, p[4]);
+    scaled = p[6] * (exponentiated - p[5]);
+
+    // Normalize the result to the range [0, 1]
+    return (scaled - p_min) / (p_max - p_min);
+}
+
 // Applies sobel convolution
 static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int linesize)
 {
@@ -186,20 +344,20 @@  static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int l
     #define CONVOLVE(bps)                                           \
     {                                                               \
         uint##bps##_t *vsrc = (uint##bps##_t*)src;                  \
-        for (int j = 1; j < s->height - 1; j++) {                   \
-            for (int i = 1; i < s->width - 1; i++) {                \
+        for (int img_j = 1; img_j < s->height - 1; img_j++) {                   \
+            for (int img_i = 1; img_i < s->width - 1; img_i++) {                \
                 x_conv_sum = 0.0;                                   \
                 y_conv_sum = 0.0;                                   \
                 for (int k = 0; k < filter_size; k++) {             \
                     ki = k % filter_width - 1;                      \
                     kj = floor(k / filter_width) - 1;               \
-                    index = (j + kj) * stride + (i + ki);           \
+                    index = (img_j + kj) * stride + (img_i + ki);           \
                     data = s->full_range ? vsrc[index] : convert_full_range(factor, vsrc[index]); \
                     x_conv_sum += data * X_FILTER[k];               \
                     y_conv_sum += data * Y_FILTER[k];               \
                 }                                                   \
                 gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum); \
-                dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient; \
+                dst[(img_j - 1) * (s->width - 2) + (img_i - 1)] = gradient; \
             }                                                       \
         }                                                           \
     }
@@ -254,15 +412,16 @@  static float std_deviation(float *img_metrics, int width, int height)
     int size = height * width;
     double mean = 0.0;
     double sqr_diff = 0;
+    int j, i;
 
-    for (int j = 0; j < height; j++)
-        for (int i = 0; i < width; i++)
+    for (j = 0; j < height; j++)
+        for (i = 0; i < width; i++)
             mean += img_metrics[j * width + i];
 
     mean /= size;
 
-    for (int j = 0; j < height; j++) {
-        for (int i = 0; i < width; i++) {
+    for (j = 0; j < height; j++) {
+        for (i = 0; i < width; i++) {
             float mean_diff = img_metrics[j * width + i] - mean;
             sqr_diff += (mean_diff * mean_diff);
         }
@@ -285,15 +444,46 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
     float si;
     float ti;
 
-    s->full_range = is_full_range(frame);
+    s->full_range = is_full_range(frame); // Determine if full range is needed
     s->nb_frames++;
 
+    // Apply EOTF and OETF transformations
+    for (int idx = 0; idx < s->width * s->height; idx++) {
+        float pixel = ((uint8_t*)frame->data[0])[idx] / 255.0f;
+        float eotf = apply_display_model(s, pixel);
+        float oetf = oetf_pq(eotf * s->l_max);
+        ((uint8_t*)frame->data[0])[idx] = (uint8_t)(oetf * 255.0f);
+    }
+
     // Calculate si and ti
     convolve_sobel(s, frame->data[0], s->gradient_matrix, frame->linesize[0]);
     calculate_motion(s, frame->data[0], s->motion_matrix, frame->linesize[0]);
     si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2);
     ti = std_deviation(s->motion_matrix, s->width, s->height);
 
+    // Apply HDR transformations if necessary
+    if (s->hdr_mode != SDR) {
+        float (*oetf_func)(float);
+        if (s->calculation_domain == PQ) {
+            oetf_func = oetf_pq;
+        } else {
+            oetf_func = oetf_pu21_wrapper;
+        }
+
+        for (int i = 0; i < (s->width - 2) * (s->height - 2); i++) {
+            s->gradient_matrix[i] = oetf_func(apply_display_model(s, s->gradient_matrix[i] / 255.0f)) * 255.0f;
+        }
+        for (int i = 0; i < s->width * s->height; i++) {
+            s->motion_matrix[i] = oetf_func(apply_display_model(s, s->motion_matrix[i] / 255.0f)) * 255.0f;
+        }
+
+        si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2);
+        ti = std_deviation(s->motion_matrix, s->width, s->height);
+    }
+
+    // Print SI and TI values for each frame
+    av_log(ctx, AV_LOG_INFO, "Frame %"PRId64" - SI: %.6f, TI: %.6f\n", s->nb_frames, si, ti);
+
     // Calculate statistics
     s->max_si  = fmaxf(si, s->max_si);
     s->max_ti  = fmaxf(ti, s->max_ti);
@@ -314,6 +504,15 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
 
 static const AVOption siti_options[] = {
     { "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS },
+    { "hdr_mode", "HDR mode (0: SDR, 1: HDR10, 2: HLG)", OFFSET(hdr_mode), AV_OPT_TYPE_INT, {.i64=0}, 0, 2, FLAGS },
+    { "bit_depth", "Bit depth (8, 10, or 12)", OFFSET(bit_depth), AV_OPT_TYPE_INT, {.i64=8}, 8, 12, FLAGS },
+    { "color_range", "Color range (0: limited, 1: full)", OFFSET(color_range), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
+    { "eotf_function", "EOTF function (0: BT.1886, 1: Inverse sRGB)", OFFSET(eotf_function), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
+    { "calculation_domain", "Calculation domain (0: PQ, 1: PU21)", OFFSET(calculation_domain), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
+    { "l_max", "Maximum luminance", OFFSET(l_max), AV_OPT_TYPE_FLOAT, {.dbl=300.0}, 0, 10000, FLAGS },
+    { "l_min", "Minimum luminance", OFFSET(l_min), AV_OPT_TYPE_FLOAT, {.dbl=0.1}, 0, 1, FLAGS },
+    { "gamma", "Gamma value for BT.1886", OFFSET(gamma), AV_OPT_TYPE_FLOAT, {.dbl=2.4}, 1, 3, FLAGS },
+    { "pu21_mode", "PU21 mode (0: BANDING, 1: BANDING_GLARE, 2: PEAKS, 3: PEAKS_GLARE)", OFFSET(pu21_mode), AV_OPT_TYPE_INT, {.i64=0}, 0, 3, FLAGS },
     { NULL }
 };
 
-- 
2.43.0