diff mbox series

[FFmpeg-devel,v4] avcodec/vvc: Don't use large array on stack

Message ID tencent_6F007D68827C12AF7E3999C0C9B4DC6BA20A@qq.com
State New
Headers show
Series [FFmpeg-devel,v4] avcodec/vvc: Don't use large array on stack | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Zhao Zhili Sept. 22, 2024, 3:39 a.m. UTC
From: Zhao Zhili <zhilizhao@tencent.com>

tmp_array in dmvr_hv takes 33024 bytes on stack, which can be
dangerous.
---
v4:
  1. Add DMVR_FILTER2 macro
  2. Process first line out of loop to remove condition check.

 libavcodec/vvc/inter_template.c | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

Comments

Nuo Mi Oct. 1, 2024, 3:34 a.m. UTC | #1
On Sun, Sep 22, 2024 at 11:40 AM Zhao Zhili <quinkblack@foxmail.com> wrote:

> From: Zhao Zhili <zhilizhao@tencent.com>
>
> tmp_array in dmvr_hv takes 33024 bytes on stack, which can be
> dangerous.
> ---
> v4:
>   1. Add DMVR_FILTER2 macro
>   2. Process first line out of loop to remove condition check.
>
Thank you, Zhili  Merged.

>
>  libavcodec/vvc/inter_template.c | 33 ++++++++++++++++++---------------
>  1 file changed, 18 insertions(+), 15 deletions(-)
>
> diff --git a/libavcodec/vvc/inter_template.c
> b/libavcodec/vvc/inter_template.c
> index c073a73e76..aee4994c17 100644
> --- a/libavcodec/vvc/inter_template.c
> +++ b/libavcodec/vvc/inter_template.c
> @@ -472,6 +472,9 @@ static void FUNC(apply_bdof)(uint8_t *_dst, const
> ptrdiff_t _dst_stride, const i
>      (filter[0] * src[x] +
>        \
>       filter[1] * src[x + stride])
>
> +#define DMVR_FILTER2(filter, src0, src1)        \
> +    (filter[0] * src0 + filter[1] * src1)
> +
>  //8.5.3.2.2 Luma sample bilinear interpolation process
>  static void FUNC(dmvr)(int16_t *dst, const uint8_t *_src, const ptrdiff_t
> _src_stride,
>      const int height, const intptr_t mx, const intptr_t my, const int
> width)
> @@ -541,31 +544,31 @@ static void FUNC(dmvr_v)(int16_t *dst, const uint8_t
> *_src, const ptrdiff_t _src
>  static void FUNC(dmvr_hv)(int16_t *dst, const uint8_t *_src, const
> ptrdiff_t _src_stride,
>      const int height, const intptr_t mx, const intptr_t my, const int
> width)
>  {
> -    int16_t tmp_array[(MAX_PB_SIZE + BILINEAR_EXTRA) * MAX_PB_SIZE];
> -    int16_t *tmp                = tmp_array;
> +    int16_t tmp_array[MAX_PB_SIZE * 2];
> +    int16_t *tmp0               = tmp_array;
> +    int16_t *tmp1               = tmp_array + MAX_PB_SIZE;
>      const pixel *src            = (const pixel*)_src;
>      const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
> -    const int8_t *filter        = ff_vvc_inter_luma_dmvr_filters[mx];
> +    const int8_t *filter_x      = ff_vvc_inter_luma_dmvr_filters[mx];
> +    const int8_t *filter_y      = ff_vvc_inter_luma_dmvr_filters[my];
>      const int shift1            = BIT_DEPTH - 6;
>      const int offset1           = 1 << (shift1 - 1);
>      const int shift2            = 4;
>      const int offset2           = 1 << (shift2 - 1);
>
>      src   -= BILINEAR_EXTRA_BEFORE * src_stride;
> -    for (int y = 0; y < height + BILINEAR_EXTRA; y++) {
> -        for (int x = 0; x < width; x++)
> -            tmp[x] = (DMVR_FILTER(src, 1) + offset1) >> shift1;
> -        src += src_stride;
> -        tmp += MAX_PB_SIZE;
> -    }
> +    for (int x = 0; x < width; x++)
> +        tmp0[x] = (DMVR_FILTER2(filter_x, src[x], src[x + 1]) + offset1)
> >> shift1;
> +    src += src_stride;
>
> -    tmp    = tmp_array + BILINEAR_EXTRA_BEFORE * MAX_PB_SIZE;
> -    filter = ff_vvc_inter_luma_dmvr_filters[my];
> -    for (int y = 0; y < height; y++) {
> -        for (int x = 0; x < width; x++)
> -            dst[x] = (DMVR_FILTER(tmp, MAX_PB_SIZE) + offset2) >> shift2;
> -        tmp += MAX_PB_SIZE;
> +    for (int y = 1; y < height + BILINEAR_EXTRA; y++) {
> +        for (int x = 0; x < width; x++) {
> +            tmp1[x] = (DMVR_FILTER2(filter_x, src[x], src[x + 1]) +
> offset1) >> shift1;
> +            dst[x] = (DMVR_FILTER2(filter_y, tmp0[x], tmp1[x]) + offset2)
> >> shift2;
> +        }
> +        src += src_stride;
>          dst += MAX_PB_SIZE;
> +        FFSWAP(int16_t *, tmp0, tmp1);
>      }
>  }
>
> --
> 2.42.0
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
diff mbox series

Patch

diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c
index c073a73e76..aee4994c17 100644
--- a/libavcodec/vvc/inter_template.c
+++ b/libavcodec/vvc/inter_template.c
@@ -472,6 +472,9 @@  static void FUNC(apply_bdof)(uint8_t *_dst, const ptrdiff_t _dst_stride, const i
     (filter[0] * src[x] +                                                       \
      filter[1] * src[x + stride])
 
+#define DMVR_FILTER2(filter, src0, src1)        \
+    (filter[0] * src0 + filter[1] * src1)
+
 //8.5.3.2.2 Luma sample bilinear interpolation process
 static void FUNC(dmvr)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
     const int height, const intptr_t mx, const intptr_t my, const int width)
@@ -541,31 +544,31 @@  static void FUNC(dmvr_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src
 static void FUNC(dmvr_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
     const int height, const intptr_t mx, const intptr_t my, const int width)
 {
-    int16_t tmp_array[(MAX_PB_SIZE + BILINEAR_EXTRA) * MAX_PB_SIZE];
-    int16_t *tmp                = tmp_array;
+    int16_t tmp_array[MAX_PB_SIZE * 2];
+    int16_t *tmp0               = tmp_array;
+    int16_t *tmp1               = tmp_array + MAX_PB_SIZE;
     const pixel *src            = (const pixel*)_src;
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
-    const int8_t *filter        = ff_vvc_inter_luma_dmvr_filters[mx];
+    const int8_t *filter_x      = ff_vvc_inter_luma_dmvr_filters[mx];
+    const int8_t *filter_y      = ff_vvc_inter_luma_dmvr_filters[my];
     const int shift1            = BIT_DEPTH - 6;
     const int offset1           = 1 << (shift1 - 1);
     const int shift2            = 4;
     const int offset2           = 1 << (shift2 - 1);
 
     src   -= BILINEAR_EXTRA_BEFORE * src_stride;
-    for (int y = 0; y < height + BILINEAR_EXTRA; y++) {
-        for (int x = 0; x < width; x++)
-            tmp[x] = (DMVR_FILTER(src, 1) + offset1) >> shift1;
-        src += src_stride;
-        tmp += MAX_PB_SIZE;
-    }
+    for (int x = 0; x < width; x++)
+        tmp0[x] = (DMVR_FILTER2(filter_x, src[x], src[x + 1]) + offset1) >> shift1;
+    src += src_stride;
 
-    tmp    = tmp_array + BILINEAR_EXTRA_BEFORE * MAX_PB_SIZE;
-    filter = ff_vvc_inter_luma_dmvr_filters[my];
-    for (int y = 0; y < height; y++) {
-        for (int x = 0; x < width; x++)
-            dst[x] = (DMVR_FILTER(tmp, MAX_PB_SIZE) + offset2) >> shift2;
-        tmp += MAX_PB_SIZE;
+    for (int y = 1; y < height + BILINEAR_EXTRA; y++) {
+        for (int x = 0; x < width; x++) {
+            tmp1[x] = (DMVR_FILTER2(filter_x, src[x], src[x + 1]) + offset1) >> shift1;
+            dst[x] = (DMVR_FILTER2(filter_y, tmp0[x], tmp1[x]) + offset2) >> shift2;
+        }
+        src += src_stride;
         dst += MAX_PB_SIZE;
+        FFSWAP(int16_t *, tmp0, tmp1);
     }
 }