diff mbox series

[FFmpeg-devel,2/3] h274: avoid copying AVFilmGrainH274Params into the stack frame

Message ID 20220305165833.18668-2-ffmpeg@haasn.xyz
State New
Headers show
Series [FFmpeg-devel,1/3] h274: correctly infer missing comp model values | expand

Commit Message

Niklas Haas March 5, 2022, 4:58 p.m. UTC
From: Niklas Haas <git@haasn.dev>

There's very little reason to make a local copy of this entire ~10 kB
struct, only to precompute three minor arithmetic operations. Just move
the logic to the per-block function call instead.

Signed-off-by: Niklas Haas <git@haasn.dev>
---
 libavcodec/h274.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/h274.c b/libavcodec/h274.c
index 170086543f..265bd49ea1 100644
--- a/libavcodec/h274.c
+++ b/libavcodec/h274.c
@@ -192,14 +192,18 @@  static av_always_inline void generate(int8_t *out, int out_stride,
         return;
     }
 
-    h = num_values > 1 ? av_clip(h274->comp_model_value[c][s][1], 2, 14) - 2 : 6;
-    v = num_values > 2 ? av_clip(h274->comp_model_value[c][s][2], 2, 14) - 2 : h;
-    init_slice(database, h, v);
-
     scale = h274->comp_model_value[c][s][0];
     if (invert)
         scale = -scale;
+    if (c > 0)
+        scale >>= 1; // reduce intensity for chroma (as per SMPTE RDD 5-2006)
 
+    h = num_values > 1 ? h274->comp_model_value[c][s][1] : 8;
+    v = num_values > 2 ? h274->comp_model_value[c][s][2] : h;
+    h = av_clip(h << (c > 0 ? 1 : 0), 2, 14) - 2;
+    v = av_clip(v << (c > 0 ? 1 : 0), 2, 14) - 2;
+
+    init_slice(database, h, v);
     synth_grain_8x8_c(out, out_stride, scale, shift,
                       &database->db[h][v][y_offset][x_offset]);
 
@@ -219,9 +223,9 @@  int ff_h274_apply_film_grain(AVFrame *out_frame, const AVFrame *in_frame,
                              H274FilmGrainDatabase *database,
                              const AVFilmGrainParams *params)
 {
-    AVFilmGrainH274Params h274 = params->codec.h274;
+    const AVFilmGrainH274Params *h274 = &params->codec.h274;
     av_assert1(params->type == AV_FILM_GRAIN_PARAMS_H274);
-    if (h274.model_id != 0)
+    if (h274->model_id != 0)
         return AVERROR_PATCHWELCOME;
 
     av_assert1(out_frame->format == in_frame->format);
@@ -241,21 +245,12 @@  int ff_h274_apply_film_grain(AVFrame *out_frame, const AVFrame *in_frame,
         const uint8_t * const in = in_frame->data[c];
         const int in_stride = in_frame->linesize[c];
 
-        if (!h274.component_model_present[c]) {
+        if (!h274->component_model_present[c]) {
             av_image_copy_plane(out, out_stride, in, in_stride,
                                 width * sizeof(uint8_t), height);
             continue;
         }
 
-        if (c > 0) {
-            // Adaptation for 4:2:0 chroma subsampling
-            for (int i = 0; i < h274.num_intensity_intervals[c]; i++) {
-                h274.comp_model_value[c][i][0] >>= 1;
-                h274.comp_model_value[c][i][1] *= 2;
-                h274.comp_model_value[c][i][2] *= 2;
-            }
-        }
-
         // Film grain synthesis is done in 8x8 blocks, but the PRNG state is
         // only advanced in 16x16 blocks, so use a nested loop
         for (int y = 0; y < height; y += 16) {
@@ -271,7 +266,7 @@  int ff_h274_apply_film_grain(AVFrame *out_frame, const AVFrame *in_frame,
                     for (int xx = 0; xx < 16 && x+xx < width; xx += 8) {
                         generate(grain + (y+yy) * grain_stride + (x+xx), grain_stride,
                                  in + (y+yy) * in_stride + (x+xx), in_stride,
-                                 database, &h274, c, invert, (x+xx) > 0,
+                                 database, h274, c, invert, (x+xx) > 0,
                                  y_offset + yy, x_offset + xx);
                     }
                 }