@@ -385,6 +385,7 @@ typedef struct VVCLocalContext {
DECLARE_ALIGNED(32, uint8_t, alf_buffer_luma)[(MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2];
DECLARE_ALIGNED(32, uint8_t, alf_buffer_chroma)[(MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2];
DECLARE_ALIGNED(32, int32_t, alf_gradient_tmp)[ALF_GRADIENT_SIZE * ALF_GRADIENT_SIZE * ALF_NUM_DIR];
+ DECLARE_ALIGNED(32, int16_t, dmvr_tmp)[(MAX_PB_SIZE + BILINEAR_EXTRA) * MAX_PB_SIZE];
struct {
int sbt_num_fourths_tb0; ///< SbtNumFourthsTb0
@@ -99,7 +99,7 @@ typedef struct VVCInterDSPContext {
int (*sad)(const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h);
void (*dmvr[2][2])(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int height,
- intptr_t mx, intptr_t my, int width);
+ intptr_t mx, intptr_t my, int width, int16_t *tmp);
} VVCInterDSPContext;
struct VVCLocalContext;
@@ -806,7 +806,7 @@ static void dmvr_mv_refine(VVCLocalContext *lc, MvField *mvf, MvField *orig_mv,
const int wrap_enabled = fc->ps.pps->r->pps_ref_wraparound_enabled_flag;
MC_EMULATED_EDGE_BILINEAR(lc->edge_emu_buffer, &src, &src_stride, ox, oy);
- fc->vvcdsp.inter.dmvr[!!my][!!mx](tmp[i], src, src_stride, pred_h, mx, my, pred_w);
+ fc->vvcdsp.inter.dmvr[!!my][!!mx](tmp[i], src, src_stride, pred_h, mx, my, pred_w, lc->dmvr_tmp);
}
min_sad = fc->vvcdsp.inter.sad(tmp[L0], tmp[L1], dx, dy, block_w, block_h);
@@ -474,7 +474,8 @@ static void FUNC(apply_bdof)(uint8_t *_dst, const ptrdiff_t _dst_stride, const i
//8.5.3.2.2 Luma sample bilinear interpolation process
static void FUNC(dmvr)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
- const int height, const intptr_t mx, const intptr_t my, const int width)
+ const int height, const intptr_t mx, const intptr_t my, const int width,
+ int16_t *tmp)
{
#if BIT_DEPTH != 10
const pixel *src = (const pixel *)_src;
@@ -502,7 +503,8 @@ static void FUNC(dmvr)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_s
//8.5.3.2.2 Luma sample bilinear interpolation process
static void FUNC(dmvr_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
- const int height, const intptr_t mx, const intptr_t my, const int width)
+ const int height, const intptr_t mx, const intptr_t my, const int width,
+ int16_t *tmp)
{
const pixel *src = (const pixel*)_src;
const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
@@ -520,7 +522,8 @@ static void FUNC(dmvr_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src
//8.5.3.2.2 Luma sample bilinear interpolation process
static void FUNC(dmvr_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
- const int height, const intptr_t mx, const intptr_t my, const int width)
+ const int height, const intptr_t mx, const intptr_t my, const int width,
+ int16_t *tmp)
{
const pixel *src = (pixel*)_src;
const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
@@ -539,9 +542,8 @@ static void FUNC(dmvr_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src
//8.5.3.2.2 Luma sample bilinear interpolation process
static void FUNC(dmvr_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
- const int height, const intptr_t mx, const intptr_t my, const int width)
+ const int height, const intptr_t mx, const intptr_t my, const int width, int16_t *tmp_array)
{
- int16_t tmp_array[(MAX_PB_SIZE + BILINEAR_EXTRA) * MAX_PB_SIZE];
int16_t *tmp = tmp_array;
const pixel *src = (const pixel*)_src;
const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
@@ -371,7 +371,8 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
AVG_INIT(8, avx2);
MC_LINKS_AVX2(8);
OF_INIT(8);
- DMVR_INIT(8);
+ // TODO:
+ // DMVR_INIT(8);
SAD_INIT();
}
break;
@@ -385,7 +386,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
MC_LINKS_AVX2(10);
MC_LINKS_16BPC_AVX2(10);
OF_INIT(10);
- DMVR_INIT(10);
+ // DMVR_INIT(10);
SAD_INIT();
}
break;
@@ -399,7 +400,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
MC_LINKS_AVX2(12);
MC_LINKS_16BPC_AVX2(12);
OF_INIT(12);
- DMVR_INIT(12);
+ // DMVR_INIT(12);
SAD_INIT();
}
break;
From: Zhao Zhili <zhilizhao@tencent.com> tmp_array in dmvr_hv takes 33024 bytes on stack, which can be dangerous. This patch fixed the C version and comment out the x86 asm version. --- libavcodec/vvc/ctu.h | 1 + libavcodec/vvc/dsp.h | 2 +- libavcodec/vvc/inter.c | 2 +- libavcodec/vvc/inter_template.c | 12 +++++++----- libavcodec/x86/vvc/vvcdsp_init.c | 7 ++++--- 5 files changed, 14 insertions(+), 10 deletions(-)