diff mbox

[FFmpeg-devel,v2] avcodec/vc1: fix overlap smoothing filter for P frames

Message ID 79832fd5-4b54-ed48-d76b-349df2e1a526@carpalis.nl
State Accepted
Commit a43cdd769301af21d062d002ec1487908e44474a
Headers show

Commit Message

Jerome Borsboom May 30, 2018, 11:53 a.m. UTC
The v_overlap_filter needs to run on the colocated block of the previous
macroblock. For the luma plane, the colocated block is located two blocks
on the left instead of one. In addition, the overlap filter needs to run
on the non-edge blocks of the first macroblock row and column.

Signed-off-by: Jerome Borsboom <jerome.borsboom@carpalis.nl>
---
This is an improved patch that should also fix the remaining frames in SSL0013.rcv.

 libavcodec/vc1_loopfilter.c | 60 ++++++++++++++++++++++++++-------------------
 1 file changed, 35 insertions(+), 25 deletions(-)

Comments

Carl Eugen Hoyos May 30, 2018, 1:05 p.m. UTC | #1
2018-05-30 13:53 GMT+02:00, Jerome Borsboom <jerome.borsboom@carpalis.nl>:
> The v_overlap_filter needs to run on the colocated block of the previous
> macroblock. For the luma plane, the colocated block is located two blocks
> on the left instead of one. In addition, the overlap filter needs to run
> on the non-edge blocks of the first macroblock row and column.
>
> Signed-off-by: Jerome Borsboom <jerome.borsboom@carpalis.nl>
> ---
> This is an improved patch that should also fix the remaining frames in
> SSL0013.rcv.

The patch also fixes SSL0014.rcv, the only sample in this directory
that still doesn't decode bit-exact is SSL0015.rcv, I don't know if the
issue is also loopfilter-related.

Thank you, Carl Eugen
Jerome Borsboom June 4, 2018, 8:45 a.m. UTC | #2
> The patch also fixes SSL0014.rcv, the only sample in this directory
> that still doesn't decode bit-exact is SSL0015.rcv, I don't know if the
> issue is also loopfilter-related.
> 
> Thank you, Carl Eugen

Could someone with access to the test files check the output of the
Intel hardware decoder through VAAPI for file SSL0015.rcv? I am running
into an issue that may be a hardware bug, but I only have Haswell
platform to test on. It looks like the hardware tries to read beyond the
end of the slice data and subsequently fails to output the last
macroblock of the image.


ffmpeg -hwaccel vaapi -i SSL0015.rcv -pix_fmt yuv420p -f framecrc -

The frames of interest are frames 221 and 301. The CRCs from the
reference decoder are:

0,        221,        221,        1,    38016, 0x0c2f9de6
0,        301,        301,        1,    38016, 0x6877442f

My Haswell gives:

0,        221,        221,        1,    38016, 0xd8709e80
0,        301,        301,        1,    38016, 0x20bd44e7


Thanks,

Jerome
Carl Eugen Hoyos June 16, 2018, 5:18 p.m. UTC | #3
2018-05-30 13:53 GMT+02:00, Jerome Borsboom <jerome.borsboom@carpalis.nl>:
> The v_overlap_filter needs to run on the colocated block of the previous
> macroblock. For the luma plane, the colocated block is located two blocks
> on the left instead of one. In addition, the overlap filter needs to run
> on the non-edge blocks of the first macroblock row and column.
>
> Signed-off-by: Jerome Borsboom <jerome.borsboom@carpalis.nl>
> ---
> This is an improved patch that should also fix the remaining frames in
> SSL0013.rcv.

Patch applied.

Thank you, Carl Eugen
diff mbox

Patch

diff --git a/libavcodec/vc1_loopfilter.c b/libavcodec/vc1_loopfilter.c
index 4c0de7c025..aceb1f77ff 100644
--- a/libavcodec/vc1_loopfilter.c
+++ b/libavcodec/vc1_loopfilter.c
@@ -64,27 +64,23 @@  void ff_vc1_loop_filter_iblk(VC1Context *v, int pq)
 static av_always_inline void vc1_h_overlap_filter(VC1Context *v, int16_t (*left_block)[64],
                                                   int16_t (*right_block)[64], int block_num)
 {
-    if (left_block != right_block || (block_num & 5) == 1) {
-        if (block_num > 3)
-            v->vc1dsp.vc1_h_s_overlap(left_block[block_num], right_block[block_num]);
-        else if (block_num & 1)
-            v->vc1dsp.vc1_h_s_overlap(right_block[block_num - 1], right_block[block_num]);
-        else
-            v->vc1dsp.vc1_h_s_overlap(left_block[block_num + 1], right_block[block_num]);
-    }
+    if (block_num > 3)
+        v->vc1dsp.vc1_h_s_overlap(left_block[block_num], right_block[block_num]);
+    else if (block_num & 1)
+        v->vc1dsp.vc1_h_s_overlap(right_block[block_num - 1], right_block[block_num]);
+    else
+        v->vc1dsp.vc1_h_s_overlap(left_block[block_num + 1], right_block[block_num]);
 }
 
 static av_always_inline void vc1_v_overlap_filter(VC1Context *v, int16_t (*top_block)[64],
                                                   int16_t (*bottom_block)[64], int block_num)
 {
-    if (top_block != bottom_block || block_num & 2) {
-        if (block_num > 3)
-            v->vc1dsp.vc1_v_s_overlap(top_block[block_num], bottom_block[block_num]);
-        else if (block_num & 2)
-            v->vc1dsp.vc1_v_s_overlap(bottom_block[block_num - 2], bottom_block[block_num]);
-        else
-            v->vc1dsp.vc1_v_s_overlap(top_block[block_num + 2], bottom_block[block_num]);
-    }
+    if (block_num > 3)
+        v->vc1dsp.vc1_v_s_overlap(top_block[block_num], bottom_block[block_num]);
+    else if (block_num & 2)
+        v->vc1dsp.vc1_v_s_overlap(bottom_block[block_num - 2], bottom_block[block_num]);
+    else
+        v->vc1dsp.vc1_v_s_overlap(top_block[block_num + 2], bottom_block[block_num]);
 }
 
 void ff_vc1_i_overlap_filter(VC1Context *v)
@@ -108,21 +104,28 @@  void ff_vc1_i_overlap_filter(VC1Context *v)
      * borders. Therefore, the H overlap trails by one MB col and the
      * V overlap trails by one MB row. This is reflected in the time at which
      * we run the put_pixels loop, i.e. delayed by one row and one column. */
-    for (i = 0; i < block_count; i++)
+    for (i = 0; i < block_count; i++) {
+        if (s->mb_x == 0 && (i & 5) != 1)
+            continue;
+
         if (v->pq >= 9 || v->condover == CONDOVER_ALL ||
-            (v->over_flags_plane[mb_pos] && ((i & 5) == 1 || (s->mb_x && v->over_flags_plane[mb_pos - 1]))))
+            (v->over_flags_plane[mb_pos] && ((i & 5) == 1 || v->over_flags_plane[mb_pos - 1])))
             vc1_h_overlap_filter(v, s->mb_x ? left_blk : cur_blk, cur_blk, i);
+    }
 
     if (v->fcm != ILACE_FRAME)
         for (i = 0; i < block_count; i++) {
+            if (s->first_slice_line && !(i & 2))
+                continue;
+
             if (s->mb_x && (v->pq >= 9 || v->condover == CONDOVER_ALL ||
                 (v->over_flags_plane[mb_pos - 1] &&
-                 ((i & 2) || (!s->first_slice_line && v->over_flags_plane[mb_pos - 1 - s->mb_stride])))))
+                 ((i & 2) || v->over_flags_plane[mb_pos - 1 - s->mb_stride]))))
                 vc1_v_overlap_filter(v, s->first_slice_line ? left_blk : topleft_blk, left_blk, i);
             if (s->mb_x == s->mb_width - 1)
                 if (v->pq >= 9 || v->condover == CONDOVER_ALL ||
                     (v->over_flags_plane[mb_pos] &&
-                     ((i & 2) || (!s->first_slice_line && v->over_flags_plane[mb_pos - s->mb_stride]))))
+                     ((i & 2) || v->over_flags_plane[mb_pos - s->mb_stride])))
                     vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i);
         }
 }
@@ -139,18 +142,25 @@  void ff_vc1_p_overlap_filter(VC1Context *v)
     left_blk = v->block[v->left_blk_idx];
     cur_blk = v->block[v->cur_blk_idx];
 
-    for (i = 0; i < block_count; i++)
-        if (v->mb_type[0][s->block_index[i]] && (s->mb_x == 0 || v->mb_type[0][s->block_index[i] - 1]))
+    for (i = 0; i < block_count; i++) {
+        if (s->mb_x == 0 && (i & 5) != 1)
+            continue;
+
+        if (v->mb_type[0][s->block_index[i]] && v->mb_type[0][s->block_index[i] - 1])
             vc1_h_overlap_filter(v, s->mb_x ? left_blk : cur_blk, cur_blk, i);
+    }
 
     if (v->fcm != ILACE_FRAME)
         for (i = 0; i < block_count; i++) {
-            if (s->mb_x && v->mb_type[0][s->block_index[i] - 1] &&
-                (s->first_slice_line || v->mb_type[0][s->block_index[i] - s->block_wrap[i] - 1]))
+            if (s->first_slice_line && !(i & 2))
+                continue;
+
+            if (s->mb_x && v->mb_type[0][s->block_index[i] - 2 + (i > 3)] &&
+                v->mb_type[0][s->block_index[i] - s->block_wrap[i] - 2 + (i > 3)])
                 vc1_v_overlap_filter(v, s->first_slice_line ? left_blk : topleft_blk, left_blk, i);
             if (s->mb_x == s->mb_width - 1)
                 if (v->mb_type[0][s->block_index[i]] &&
-                    (s->first_slice_line || v->mb_type[0][s->block_index[i] - s->block_wrap[i]]))
+                    v->mb_type[0][s->block_index[i] - s->block_wrap[i]])
                     vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i);
         }
 }