Message ID | tencent_6F007D68827C12AF7E3999C0C9B4DC6BA20A@qq.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,v4] avcodec/vvc: Don't use large array on stack | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
On Sun, Sep 22, 2024 at 11:40 AM Zhao Zhili <quinkblack@foxmail.com> wrote: > From: Zhao Zhili <zhilizhao@tencent.com> > > tmp_array in dmvr_hv takes 33024 bytes on stack, which can be > dangerous. > --- > v4: > 1. Add DMVR_FILTER2 macro > 2. Process first line out of loop to remove condition check. > Thank you, Zhili Merged. > > libavcodec/vvc/inter_template.c | 33 ++++++++++++++++++--------------- > 1 file changed, 18 insertions(+), 15 deletions(-) > > diff --git a/libavcodec/vvc/inter_template.c > b/libavcodec/vvc/inter_template.c > index c073a73e76..aee4994c17 100644 > --- a/libavcodec/vvc/inter_template.c > +++ b/libavcodec/vvc/inter_template.c > @@ -472,6 +472,9 @@ static void FUNC(apply_bdof)(uint8_t *_dst, const > ptrdiff_t _dst_stride, const i > (filter[0] * src[x] + > \ > filter[1] * src[x + stride]) > > +#define DMVR_FILTER2(filter, src0, src1) \ > + (filter[0] * src0 + filter[1] * src1) > + > //8.5.3.2.2 Luma sample bilinear interpolation process > static void FUNC(dmvr)(int16_t *dst, const uint8_t *_src, const ptrdiff_t > _src_stride, > const int height, const intptr_t mx, const intptr_t my, const int > width) > @@ -541,31 +544,31 @@ static void FUNC(dmvr_v)(int16_t *dst, const uint8_t > *_src, const ptrdiff_t _src > static void FUNC(dmvr_hv)(int16_t *dst, const uint8_t *_src, const > ptrdiff_t _src_stride, > const int height, const intptr_t mx, const intptr_t my, const int > width) > { > - int16_t tmp_array[(MAX_PB_SIZE + BILINEAR_EXTRA) * MAX_PB_SIZE]; > - int16_t *tmp = tmp_array; > + int16_t tmp_array[MAX_PB_SIZE * 2]; > + int16_t *tmp0 = tmp_array; > + int16_t *tmp1 = tmp_array + MAX_PB_SIZE; > const pixel *src = (const pixel*)_src; > const ptrdiff_t src_stride = _src_stride / sizeof(pixel); > - const int8_t *filter = ff_vvc_inter_luma_dmvr_filters[mx]; > + const int8_t *filter_x = ff_vvc_inter_luma_dmvr_filters[mx]; > + const int8_t *filter_y = ff_vvc_inter_luma_dmvr_filters[my]; > const int shift1 = BIT_DEPTH - 6; > const int offset1 = 1 << (shift1 - 1); > const int shift2 = 4; > const int offset2 = 1 << (shift2 - 1); > > src -= BILINEAR_EXTRA_BEFORE * src_stride; > - for (int y = 0; y < height + BILINEAR_EXTRA; y++) { > - for (int x = 0; x < width; x++) > - tmp[x] = (DMVR_FILTER(src, 1) + offset1) >> shift1; > - src += src_stride; > - tmp += MAX_PB_SIZE; > - } > + for (int x = 0; x < width; x++) > + tmp0[x] = (DMVR_FILTER2(filter_x, src[x], src[x + 1]) + offset1) > >> shift1; > + src += src_stride; > > - tmp = tmp_array + BILINEAR_EXTRA_BEFORE * MAX_PB_SIZE; > - filter = ff_vvc_inter_luma_dmvr_filters[my]; > - for (int y = 0; y < height; y++) { > - for (int x = 0; x < width; x++) > - dst[x] = (DMVR_FILTER(tmp, MAX_PB_SIZE) + offset2) >> shift2; > - tmp += MAX_PB_SIZE; > + for (int y = 1; y < height + BILINEAR_EXTRA; y++) { > + for (int x = 0; x < width; x++) { > + tmp1[x] = (DMVR_FILTER2(filter_x, src[x], src[x + 1]) + > offset1) >> shift1; > + dst[x] = (DMVR_FILTER2(filter_y, tmp0[x], tmp1[x]) + offset2) > >> shift2; > + } > + src += src_stride; > dst += MAX_PB_SIZE; > + FFSWAP(int16_t *, tmp0, tmp1); > } > } > > -- > 2.42.0 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c index c073a73e76..aee4994c17 100644 --- a/libavcodec/vvc/inter_template.c +++ b/libavcodec/vvc/inter_template.c @@ -472,6 +472,9 @@ static void FUNC(apply_bdof)(uint8_t *_dst, const ptrdiff_t _dst_stride, const i (filter[0] * src[x] + \ filter[1] * src[x + stride]) +#define DMVR_FILTER2(filter, src0, src1) \ + (filter[0] * src0 + filter[1] * src1) + //8.5.3.2.2 Luma sample bilinear interpolation process static void FUNC(dmvr)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const intptr_t mx, const intptr_t my, const int width) @@ -541,31 +544,31 @@ static void FUNC(dmvr_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src static void FUNC(dmvr_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const intptr_t mx, const intptr_t my, const int width) { - int16_t tmp_array[(MAX_PB_SIZE + BILINEAR_EXTRA) * MAX_PB_SIZE]; - int16_t *tmp = tmp_array; + int16_t tmp_array[MAX_PB_SIZE * 2]; + int16_t *tmp0 = tmp_array; + int16_t *tmp1 = tmp_array + MAX_PB_SIZE; const pixel *src = (const pixel*)_src; const ptrdiff_t src_stride = _src_stride / sizeof(pixel); - const int8_t *filter = ff_vvc_inter_luma_dmvr_filters[mx]; + const int8_t *filter_x = ff_vvc_inter_luma_dmvr_filters[mx]; + const int8_t *filter_y = ff_vvc_inter_luma_dmvr_filters[my]; const int shift1 = BIT_DEPTH - 6; const int offset1 = 1 << (shift1 - 1); const int shift2 = 4; const int offset2 = 1 << (shift2 - 1); src -= BILINEAR_EXTRA_BEFORE * src_stride; - for (int y = 0; y < height + BILINEAR_EXTRA; y++) { - for (int x = 0; x < width; x++) - tmp[x] = (DMVR_FILTER(src, 1) + offset1) >> shift1; - src += src_stride; - tmp += MAX_PB_SIZE; - } + for (int x = 0; x < width; x++) + tmp0[x] = (DMVR_FILTER2(filter_x, src[x], src[x + 1]) + offset1) >> shift1; + src += src_stride; - tmp = tmp_array + BILINEAR_EXTRA_BEFORE * MAX_PB_SIZE; - filter = ff_vvc_inter_luma_dmvr_filters[my]; - for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x++) - dst[x] = (DMVR_FILTER(tmp, MAX_PB_SIZE) + offset2) >> shift2; - tmp += MAX_PB_SIZE; + for (int y = 1; y < height + BILINEAR_EXTRA; y++) { + for (int x = 0; x < width; x++) { + tmp1[x] = (DMVR_FILTER2(filter_x, src[x], src[x + 1]) + offset1) >> shift1; + dst[x] = (DMVR_FILTER2(filter_y, tmp0[x], tmp1[x]) + offset2) >> shift2; + } + src += src_stride; dst += MAX_PB_SIZE; + FFSWAP(int16_t *, tmp0, tmp1); } }
From: Zhao Zhili <zhilizhao@tencent.com> tmp_array in dmvr_hv takes 33024 bytes on stack, which can be dangerous. --- v4: 1. Add DMVR_FILTER2 macro 2. Process first line out of loop to remove condition check. libavcodec/vvc/inter_template.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-)