diff mbox series

[FFmpeg-devel,RFC] avcodec/vvc: Don't use large array on stack

Message ID tencent_A5AB034E7170382D0A9AF640809A3B9F3508@qq.com
State New
Headers show
Series [FFmpeg-devel,RFC] avcodec/vvc: Don't use large array on stack | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Zhao Zhili Sept. 19, 2024, 5:27 p.m. UTC
From: Zhao Zhili <zhilizhao@tencent.com>

tmp_array in dmvr_hv takes 33024 bytes on stack, which can be
dangerous. This patch fixed the C version and comment out the
x86 asm version.
---
 libavcodec/vvc/ctu.h             |  1 +
 libavcodec/vvc/dsp.h             |  2 +-
 libavcodec/vvc/inter.c           |  2 +-
 libavcodec/vvc/inter_template.c  | 12 +++++++-----
 libavcodec/x86/vvc/vvcdsp_init.c |  7 ++++---
 5 files changed, 14 insertions(+), 10 deletions(-)

Comments

James Almer Sept. 19, 2024, 7:18 p.m. UTC | #1
On 9/19/2024 2:27 PM, Zhao Zhili wrote:
> From: Zhao Zhili <zhilizhao@tencent.com>
> 
> tmp_array in dmvr_hv takes 33024 bytes on stack, which can be
> dangerous. This patch fixed the C version and comment out the
> x86 asm version.
You don't need to comment it out. The x86 versions don't use the new 
argument, so just update the prototypes, like so:

> diff --git a/libavcodec/x86/vvc/vvcdsp_init.c b/libavcodec/x86/vvc/vvcdsp_init.c
> index c50eaf25ce..7ff3e2bdff 100644
> --- a/libavcodec/x86/vvc/vvcdsp_init.c
> +++ b/libavcodec/x86/vvc/vvcdsp_init.c
> @@ -90,13 +90,13 @@ AVG_PROTOTYPES(12, avx2)
> 
>  #define DMVR_PROTOTYPES(bd, opt)                                                                    \
>  void ff_vvc_dmvr_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride,               \
> -     int height, intptr_t mx, intptr_t my, int width);                                              \
> +     int height, intptr_t mx, intptr_t my, int width, int16_t *unused);                             \
>  void ff_vvc_dmvr_h_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride,             \
> -     int height, intptr_t mx, intptr_t my, int width);                                              \
> +     int height, intptr_t mx, intptr_t my, int width, int16_t *unused);                             \
>  void ff_vvc_dmvr_v_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride,             \
> -     int height, intptr_t mx, intptr_t my, int width);                                              \
> +     int height, intptr_t mx, intptr_t my, int width, int16_t *unused);                             \
>  void ff_vvc_dmvr_hv_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride,            \
> -     int height, intptr_t mx, intptr_t my, int width);                                              \
> +     int height, intptr_t mx, intptr_t my, int width, int16_t *unused);                             \
> 
>  DMVR_PROTOTYPES( 8, avx2)
>  DMVR_PROTOTYPES(10, avx2)
> @@ -371,8 +371,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
>              AVG_INIT(8, avx2);
>              MC_LINKS_AVX2(8);
>              OF_INIT(8);
> -            // TODO:
> -            // DMVR_INIT(8);
> +            DMVR_INIT(8);
>              SAD_INIT();
>          }
>          break;
> @@ -386,7 +385,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
>              MC_LINKS_AVX2(10);
>              MC_LINKS_16BPC_AVX2(10);
>              OF_INIT(10);
> -            // DMVR_INIT(10);
> +            DMVR_INIT(10);
>              SAD_INIT();
>          }
>          break;
> @@ -400,7 +399,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
>              MC_LINKS_AVX2(12);
>              MC_LINKS_16BPC_AVX2(12);
>              OF_INIT(12);
> -            // DMVR_INIT(12);
> +            DMVR_INIT(12);
>              SAD_INIT();
>          }
>          break;
diff mbox series

Patch

diff --git a/libavcodec/vvc/ctu.h b/libavcodec/vvc/ctu.h
index eab4612561..eb3e51c7e5 100644
--- a/libavcodec/vvc/ctu.h
+++ b/libavcodec/vvc/ctu.h
@@ -385,6 +385,7 @@  typedef struct VVCLocalContext {
     DECLARE_ALIGNED(32, uint8_t, alf_buffer_luma)[(MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2];
     DECLARE_ALIGNED(32, uint8_t, alf_buffer_chroma)[(MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2];
     DECLARE_ALIGNED(32, int32_t, alf_gradient_tmp)[ALF_GRADIENT_SIZE * ALF_GRADIENT_SIZE * ALF_NUM_DIR];
+    DECLARE_ALIGNED(32, int16_t, dmvr_tmp)[(MAX_PB_SIZE + BILINEAR_EXTRA) * MAX_PB_SIZE];
 
     struct {
         int sbt_num_fourths_tb0;                ///< SbtNumFourthsTb0
diff --git a/libavcodec/vvc/dsp.h b/libavcodec/vvc/dsp.h
index 635ebcafed..3594dfc5f5 100644
--- a/libavcodec/vvc/dsp.h
+++ b/libavcodec/vvc/dsp.h
@@ -99,7 +99,7 @@  typedef struct VVCInterDSPContext {
 
     int (*sad)(const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h);
     void (*dmvr[2][2])(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int height,
-        intptr_t mx, intptr_t my, int width);
+        intptr_t mx, intptr_t my, int width, int16_t *tmp);
 } VVCInterDSPContext;
 
 struct VVCLocalContext;
diff --git a/libavcodec/vvc/inter.c b/libavcodec/vvc/inter.c
index 64a9dd1e46..48b633d580 100644
--- a/libavcodec/vvc/inter.c
+++ b/libavcodec/vvc/inter.c
@@ -806,7 +806,7 @@  static void dmvr_mv_refine(VVCLocalContext *lc, MvField *mvf, MvField *orig_mv,
         const int wrap_enabled  = fc->ps.pps->r->pps_ref_wraparound_enabled_flag;
 
         MC_EMULATED_EDGE_BILINEAR(lc->edge_emu_buffer, &src, &src_stride, ox, oy);
-        fc->vvcdsp.inter.dmvr[!!my][!!mx](tmp[i], src, src_stride, pred_h, mx, my, pred_w);
+        fc->vvcdsp.inter.dmvr[!!my][!!mx](tmp[i], src, src_stride, pred_h, mx, my, pred_w, lc->dmvr_tmp);
     }
 
     min_sad = fc->vvcdsp.inter.sad(tmp[L0], tmp[L1], dx, dy, block_w, block_h);
diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c
index c073a73e76..fad1ba801f 100644
--- a/libavcodec/vvc/inter_template.c
+++ b/libavcodec/vvc/inter_template.c
@@ -474,7 +474,8 @@  static void FUNC(apply_bdof)(uint8_t *_dst, const ptrdiff_t _dst_stride, const i
 
 //8.5.3.2.2 Luma sample bilinear interpolation process
 static void FUNC(dmvr)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
-    const int height, const intptr_t mx, const intptr_t my, const int width)
+    const int height, const intptr_t mx, const intptr_t my, const int width,
+    int16_t *tmp)
 {
 #if BIT_DEPTH != 10
     const pixel *src            = (const pixel *)_src;
@@ -502,7 +503,8 @@  static void FUNC(dmvr)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_s
 
 //8.5.3.2.2 Luma sample bilinear interpolation process
 static void FUNC(dmvr_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
-    const int height, const intptr_t mx, const intptr_t my, const int width)
+    const int height, const intptr_t mx, const intptr_t my, const int width,
+    int16_t *tmp)
 {
     const pixel *src            = (const pixel*)_src;
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
@@ -520,7 +522,8 @@  static void FUNC(dmvr_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src
 
 //8.5.3.2.2 Luma sample bilinear interpolation process
 static void FUNC(dmvr_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
-    const int height, const intptr_t mx, const intptr_t my, const int width)
+    const int height, const intptr_t mx, const intptr_t my, const int width,
+    int16_t *tmp)
 {
     const pixel *src            = (pixel*)_src;
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
@@ -539,9 +542,8 @@  static void FUNC(dmvr_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src
 
 //8.5.3.2.2 Luma sample bilinear interpolation process
 static void FUNC(dmvr_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
-    const int height, const intptr_t mx, const intptr_t my, const int width)
+    const int height, const intptr_t mx, const intptr_t my, const int width, int16_t *tmp_array)
 {
-    int16_t tmp_array[(MAX_PB_SIZE + BILINEAR_EXTRA) * MAX_PB_SIZE];
     int16_t *tmp                = tmp_array;
     const pixel *src            = (const pixel*)_src;
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
diff --git a/libavcodec/x86/vvc/vvcdsp_init.c b/libavcodec/x86/vvc/vvcdsp_init.c
index f3e2e3a27b..c50eaf25ce 100644
--- a/libavcodec/x86/vvc/vvcdsp_init.c
+++ b/libavcodec/x86/vvc/vvcdsp_init.c
@@ -371,7 +371,8 @@  void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
             AVG_INIT(8, avx2);
             MC_LINKS_AVX2(8);
             OF_INIT(8);
-            DMVR_INIT(8);
+            // TODO:
+            // DMVR_INIT(8);
             SAD_INIT();
         }
         break;
@@ -385,7 +386,7 @@  void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
             MC_LINKS_AVX2(10);
             MC_LINKS_16BPC_AVX2(10);
             OF_INIT(10);
-            DMVR_INIT(10);
+            // DMVR_INIT(10);
             SAD_INIT();
         }
         break;
@@ -399,7 +400,7 @@  void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
             MC_LINKS_AVX2(12);
             MC_LINKS_16BPC_AVX2(12);
             OF_INIT(12);
-            DMVR_INIT(12);
+            // DMVR_INIT(12);
             SAD_INIT();
         }
         break;