diff mbox series

[FFmpeg-devel] avcodec/speedhq: fix decoding 422 subsampling when width is not multiple of 16

Message ID 20210301144122.4731-1-onemda@gmail.com
State New
Headers show
Series [FFmpeg-devel] avcodec/speedhq: fix decoding 422 subsampling when width is not multiple of 16 | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Paul B Mahol March 1, 2021, 2:41 p.m. UTC
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libavcodec/speedhq.c | 42 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 40 insertions(+), 2 deletions(-)

Comments

Anton Khirnov March 2, 2021, 9:01 a.m. UTC | #1
Quoting Paul B Mahol (2021-03-01 15:41:22)
> Signed-off-by: Paul B Mahol <onemda@gmail.com>
> ---
>  libavcodec/speedhq.c | 42 ++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 40 insertions(+), 2 deletions(-)
> 
> diff --git a/libavcodec/speedhq.c b/libavcodec/speedhq.c
> index 759fc6dfc5..2a2fc42efa 100644
> --- a/libavcodec/speedhq.c
> +++ b/libavcodec/speedhq.c
> @@ -283,6 +283,7 @@ static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf
>      int linesize_cb = frame->linesize[1] * line_stride;
>      int linesize_cr = frame->linesize[2] * line_stride;
>      int linesize_a;
> +    GetBitContext gb;
>  
>      if (s->alpha_type != SHQ_NO_ALPHA)
>          linesize_a = frame->linesize[3] * line_stride;
> @@ -304,7 +305,6 @@ static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf
>      }
>  
>      for (slice_number = 0; slice_number < 4; slice_number++) {
> -        GetBitContext gb;
>          uint32_t slice_begin, slice_end;
>          int x, y;
>  
> @@ -333,7 +333,7 @@ static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf
>                  dest_a = frame->data[3] + frame->linesize[3] * (y + field_number);
>              }
>  
> -            for (x = 0; x < frame->width; x += 16) {
> +            for (x = 0; x < frame->width - 8 * (s->subsampling != SHQ_SUBSAMPLING_444); x += 16) {
>                  /* Decode the four luma blocks. */
>                  if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y, linesize_y)) < 0)
>                      return ret;
> @@ -402,6 +402,44 @@ static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf
>          }
>      }
>  
> +    if (s->subsampling == SHQ_SUBSAMPLING_444)
> +        return 0;
> +
> +    if (!(frame->width & 15))
> +        return 0;

This is hard to follow. Seems to me it would be simpler to factor the
below block into a separate function and retain a single successful
return point in this one.

> +
> +    for (int y = 0; y < frame->height; y += 16 * line_stride) {
> +        int last_dc[4] = { 1024, 1024, 1024, 1024 };
> +        uint8_t *dest_y, *dest_cb, *dest_cr;
> +        int x = frame->width - 8;
> +
> +        dest_y = frame->data[0] + frame->linesize[0] * (y + field_number) + x;
> +        if (s->subsampling == SHQ_SUBSAMPLING_420) {

Commit message only says 422.

How come there's the same number of chroma blocks for 422 and 420?

> +            dest_cb = frame->data[1] + frame->linesize[1] * (y/2 + field_number) + x / 2;
> +            dest_cr = frame->data[2] + frame->linesize[2] * (y/2 + field_number) + x / 2;
> +        } else if (s->subsampling == SHQ_SUBSAMPLING_422) {
> +            dest_cb = frame->data[1] + frame->linesize[1] * (y + field_number) + x / 2;
> +            dest_cr = frame->data[2] + frame->linesize[2] * (y + field_number) + x / 2;
> +        }
> +
> +        if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y, linesize_y)) < 0)
> +            return ret;
> +        if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8, linesize_y)) < 0)
> +            return ret;
> +        if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y, linesize_y)) < 0)
> +            return ret;
> +        if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y + 8, linesize_y)) < 0)
> +            return ret;
> +        if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb, linesize_cb)) < 0)
> +            return ret;
> +        if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr, linesize_cr)) < 0)
> +            return ret;
> +        if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb, linesize_cb)) < 0)
> +            return ret;
> +        if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr, linesize_cr)) < 0)
> +            return ret;
> +    }
> +
>      return 0;

A test would be nice.
diff mbox series

Patch

diff --git a/libavcodec/speedhq.c b/libavcodec/speedhq.c
index 759fc6dfc5..2a2fc42efa 100644
--- a/libavcodec/speedhq.c
+++ b/libavcodec/speedhq.c
@@ -283,6 +283,7 @@  static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf
     int linesize_cb = frame->linesize[1] * line_stride;
     int linesize_cr = frame->linesize[2] * line_stride;
     int linesize_a;
+    GetBitContext gb;
 
     if (s->alpha_type != SHQ_NO_ALPHA)
         linesize_a = frame->linesize[3] * line_stride;
@@ -304,7 +305,6 @@  static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf
     }
 
     for (slice_number = 0; slice_number < 4; slice_number++) {
-        GetBitContext gb;
         uint32_t slice_begin, slice_end;
         int x, y;
 
@@ -333,7 +333,7 @@  static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf
                 dest_a = frame->data[3] + frame->linesize[3] * (y + field_number);
             }
 
-            for (x = 0; x < frame->width; x += 16) {
+            for (x = 0; x < frame->width - 8 * (s->subsampling != SHQ_SUBSAMPLING_444); x += 16) {
                 /* Decode the four luma blocks. */
                 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y, linesize_y)) < 0)
                     return ret;
@@ -402,6 +402,44 @@  static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf
         }
     }
 
+    if (s->subsampling == SHQ_SUBSAMPLING_444)
+        return 0;
+
+    if (!(frame->width & 15))
+        return 0;
+
+    for (int y = 0; y < frame->height; y += 16 * line_stride) {
+        int last_dc[4] = { 1024, 1024, 1024, 1024 };
+        uint8_t *dest_y, *dest_cb, *dest_cr;
+        int x = frame->width - 8;
+
+        dest_y = frame->data[0] + frame->linesize[0] * (y + field_number) + x;
+        if (s->subsampling == SHQ_SUBSAMPLING_420) {
+            dest_cb = frame->data[1] + frame->linesize[1] * (y/2 + field_number) + x / 2;
+            dest_cr = frame->data[2] + frame->linesize[2] * (y/2 + field_number) + x / 2;
+        } else if (s->subsampling == SHQ_SUBSAMPLING_422) {
+            dest_cb = frame->data[1] + frame->linesize[1] * (y + field_number) + x / 2;
+            dest_cr = frame->data[2] + frame->linesize[2] * (y + field_number) + x / 2;
+        }
+
+        if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y, linesize_y)) < 0)
+            return ret;
+        if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8, linesize_y)) < 0)
+            return ret;
+        if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y, linesize_y)) < 0)
+            return ret;
+        if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y + 8, linesize_y)) < 0)
+            return ret;
+        if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb, linesize_cb)) < 0)
+            return ret;
+        if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr, linesize_cr)) < 0)
+            return ret;
+        if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb, linesize_cb)) < 0)
+            return ret;
+        if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr, linesize_cr)) < 0)
+            return ret;
+    }
+
     return 0;
 }