Message ID | 20220110145836.3449558-1-alankelly@google.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,1/4] libswscale: Re-factor ff_shuffle_filter_coefficients. | expand |
Context | Check | Description |
---|---|---|
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
andriy/make_ppc | success | Make finished |
andriy/make_fate_ppc | success | Make fate finished |
andriy/make_armv7_RPi4 | success | Make finished |
andriy/make_fate_armv7_RPi4 | success | Make fate finished |
Hi, Is anybody interested in this patch set? Thanks! On Mon, Jan 10, 2022, 15:58 Alan Kelly <alankelly@google.com> wrote: > Make the code more readable, follow the style guide and propagate memory > allocation errors. > --- > libswscale/swscale_internal.h | 2 +- > libswscale/utils.c | 68 ++++++++++++++++++++--------------- > 2 files changed, 40 insertions(+), 30 deletions(-) > > diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h > index 3a78d95ba6..26d28d42e6 100644 > --- a/libswscale/swscale_internal.h > +++ b/libswscale/swscale_internal.h > @@ -1144,5 +1144,5 @@ void ff_sws_slice_worker(void *priv, int jobnr, int > threadnr, > #define MAX_LINES_AHEAD 4 > > //shuffle filter and filterPos for hyScale and hcScale filters in avx2 > -void ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int > filterSize, int16_t *filter, int dstW); > +int ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int > filterSize, int16_t *filter, int dstW); > #endif /* SWSCALE_SWSCALE_INTERNAL_H */ > diff --git a/libswscale/utils.c b/libswscale/utils.c > index c5ea8853d5..52f07e1661 100644 > --- a/libswscale/utils.c > +++ b/libswscale/utils.c > @@ -278,39 +278,47 @@ static const FormatEntry format_entries[] = { > [AV_PIX_FMT_P416LE] = { 1, 1 }, > }; > > -void ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int > filterSize, int16_t *filter, int dstW){ > +int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, > + int filterSize, int16_t *filter, > + int dstW) > +{ > #if ARCH_X86_64 > - int i, j, k, l; > + int i = 0, j = 0, k = 0; > int cpu_flags = av_get_cpu_flags(); > + if (!filter || dstW % 16 != 0) return 0; > if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags & > AV_CPU_FLAG_SLOW_GATHER)) { > - if ((c->srcBpc == 8) && (c->dstBpc <= 14)){ > - if (dstW % 16 == 0){ > - if (filter != NULL){ > - for (i = 0; i < dstW; i += 8){ > - FFSWAP(int, filterPos[i + 2], filterPos[i+4]); > - FFSWAP(int, filterPos[i + 3], filterPos[i+5]); > - } > - if (filterSize > 4){ > - int16_t *tmp2 = av_malloc(dstW * filterSize * 2); > - memcpy(tmp2, filter, dstW * filterSize * 2); > - for (i = 0; i < dstW; i += 16){//pixel > - for (k = 0; k < filterSize / 4; ++k){//fcoeff > - for (j = 0; j < 16; ++j){//inner pixel > - for (l = 0; l < 4; ++l){//coeff > - int from = i * filterSize + j * > filterSize + k * 4 + l; > - int to = (i) * filterSize + j * 4 > + l + k * 64; > - filter[to] = tmp2[from]; > - } > - } > - } > - } > - av_free(tmp2); > - } > - } > - } > + if ((c->srcBpc == 8) && (c->dstBpc <= 14)) { > + int16_t *filterCopy = NULL; > + if (filterSize > 4) { > + if (!FF_ALLOC_TYPED_ARRAY(filterCopy, dstW * filterSize)) > + return AVERROR(ENOMEM); > + memcpy(filterCopy, filter, dstW * filterSize * > sizeof(int16_t)); > + } > + // Do not swap filterPos for pixels which won't be processed by > + // the main loop. > + for (i = 0; i + 8 <= dstW; i += 8) { > + FFSWAP(int, filterPos[i + 2], filterPos[i + 4]); > + FFSWAP(int, filterPos[i + 3], filterPos[i + 5]); > + } > + if (filterSize > 4) { > + // 16 pixels are processed at a time. > + for (i = 0; i + 16 <= dstW; i += 16) { > + // 4 filter coeffs are processed at a time. > + for (k = 0; k + 4 <= filterSize; k += 4) { > + for (j = 0; j < 16; ++j) { > + int from = (i + j) * filterSize + k; > + int to = i * filterSize + j * 4 + k * 16; > + memcpy(&filter[to], &filterCopy[from], 4 * > sizeof(int16_t)); > + } > + } > + } > + } > + if (filterCopy) > + av_free(filterCopy); > } > } > #endif > + return 0; > } > > int sws_isSupportedInput(enum AVPixelFormat pix_fmt) > @@ -1836,7 +1844,8 @@ av_cold int sws_init_context(SwsContext *c, > SwsFilter *srcFilter, > get_local_pos(c, 0, 0, 0), > get_local_pos(c, 0, 0, 0))) < 0) > goto fail; > - ff_shuffle_filter_coefficients(c, c->hLumFilterPos, > c->hLumFilterSize, c->hLumFilter, dstW); > + if ((ret = ff_shuffle_filter_coefficients(c, > c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW)) != 0) > + goto nomem; > if ((ret = initFilter(&c->hChrFilter, &c->hChrFilterPos, > &c->hChrFilterSize, c->chrXInc, > c->chrSrcW, c->chrDstW, filterAlign, 1 << 14, > @@ -1846,7 +1855,8 @@ av_cold int sws_init_context(SwsContext *c, > SwsFilter *srcFilter, > get_local_pos(c, c->chrSrcHSubSample, > c->src_h_chr_pos, 0), > get_local_pos(c, c->chrDstHSubSample, > c->dst_h_chr_pos, 0))) < 0) > goto fail; > - ff_shuffle_filter_coefficients(c, c->hChrFilterPos, > c->hChrFilterSize, c->hChrFilter, c->chrDstW); > + if ((ret = ff_shuffle_filter_coefficients(c, > c->hChrFilterPos, c->hChrFilterSize, c->hChrFilter, c->chrDstW)) != 0) > + goto nomem; > } > } // initialize horizontal stuff > > -- > 2.34.1.575.g55b058a8bb-goog > >
On Mon, Jan 10, 2022 at 03:58:33PM +0100, Alan Kelly wrote: > Make the code more readable, follow the style guide and propagate memory > allocation errors. Cosmetics and bugfixes should not be in the same patch > --- > libswscale/swscale_internal.h | 2 +- > libswscale/utils.c | 68 ++++++++++++++++++++--------------- > 2 files changed, 40 insertions(+), 30 deletions(-) > > diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h > index 3a78d95ba6..26d28d42e6 100644 > --- a/libswscale/swscale_internal.h > +++ b/libswscale/swscale_internal.h > @@ -1144,5 +1144,5 @@ void ff_sws_slice_worker(void *priv, int jobnr, int threadnr, > #define MAX_LINES_AHEAD 4 > > //shuffle filter and filterPos for hyScale and hcScale filters in avx2 > -void ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int filterSize, int16_t *filter, int dstW); > +int ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int filterSize, int16_t *filter, int dstW); > #endif /* SWSCALE_SWSCALE_INTERNAL_H */ > diff --git a/libswscale/utils.c b/libswscale/utils.c > index c5ea8853d5..52f07e1661 100644 > --- a/libswscale/utils.c > +++ b/libswscale/utils.c > @@ -278,39 +278,47 @@ static const FormatEntry format_entries[] = { > [AV_PIX_FMT_P416LE] = { 1, 1 }, > }; > > -void ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int filterSize, int16_t *filter, int dstW){ > +int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, > + int filterSize, int16_t *filter, > + int dstW) > +{ > #if ARCH_X86_64 > - int i, j, k, l; > + int i = 0, j = 0, k = 0; why? they are set when used if iam not mistaken > int cpu_flags = av_get_cpu_flags(); > + if (!filter || dstW % 16 != 0) return 0; please add \n also a comment what the dstW & 16 case exactly does and why [...] > int sws_isSupportedInput(enum AVPixelFormat pix_fmt) > @@ -1836,7 +1844,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, > get_local_pos(c, 0, 0, 0), > get_local_pos(c, 0, 0, 0))) < 0) > goto fail; > - ff_shuffle_filter_coefficients(c, c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW); > + if ((ret = ff_shuffle_filter_coefficients(c, c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW)) != 0) > + goto nomem; This is confusing as ret is never used, also error codes are <0 thx [...]
Hi Michael, Thanks for your feedback. I have updated the patches and split this patch into two, one with cosmetic fixes and one propagating the errors. Since there is now an extra patch in the set and the commit messages have changed, new threads have been started. Alan On Thu, Feb 3, 2022 at 3:11 PM Michael Niedermayer <michael@niedermayer.cc> wrote: > On Mon, Jan 10, 2022 at 03:58:33PM +0100, Alan Kelly wrote: > > Make the code more readable, follow the style guide and propagate memory > > allocation errors. > > Cosmetics and bugfixes should not be in the same patch > > > > --- > > libswscale/swscale_internal.h | 2 +- > > libswscale/utils.c | 68 ++++++++++++++++++++--------------- > > 2 files changed, 40 insertions(+), 30 deletions(-) > > > > diff --git a/libswscale/swscale_internal.h > b/libswscale/swscale_internal.h > > index 3a78d95ba6..26d28d42e6 100644 > > --- a/libswscale/swscale_internal.h > > +++ b/libswscale/swscale_internal.h > > @@ -1144,5 +1144,5 @@ void ff_sws_slice_worker(void *priv, int jobnr, > int threadnr, > > #define MAX_LINES_AHEAD 4 > > > > //shuffle filter and filterPos for hyScale and hcScale filters in avx2 > > -void ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int > filterSize, int16_t *filter, int dstW); > > +int ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int > filterSize, int16_t *filter, int dstW); > > #endif /* SWSCALE_SWSCALE_INTERNAL_H */ > > diff --git a/libswscale/utils.c b/libswscale/utils.c > > index c5ea8853d5..52f07e1661 100644 > > --- a/libswscale/utils.c > > +++ b/libswscale/utils.c > > @@ -278,39 +278,47 @@ static const FormatEntry format_entries[] = { > > [AV_PIX_FMT_P416LE] = { 1, 1 }, > > }; > > > > -void ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int > filterSize, int16_t *filter, int dstW){ > > +int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, > > + int filterSize, int16_t *filter, > > + int dstW) > > +{ > > #if ARCH_X86_64 > > > - int i, j, k, l; > > + int i = 0, j = 0, k = 0; > > why? > they are set when used if iam not mistaken > > > > int cpu_flags = av_get_cpu_flags(); > > > + if (!filter || dstW % 16 != 0) return 0; > > please add \n also a comment what the dstW & 16 case exactly does and why > > > [...] > > int sws_isSupportedInput(enum AVPixelFormat pix_fmt) > > @@ -1836,7 +1844,8 @@ av_cold int sws_init_context(SwsContext *c, > SwsFilter *srcFilter, > > get_local_pos(c, 0, 0, 0), > > get_local_pos(c, 0, 0, 0))) < 0) > > goto fail; > > - ff_shuffle_filter_coefficients(c, c->hLumFilterPos, > c->hLumFilterSize, c->hLumFilter, dstW); > > + if ((ret = ff_shuffle_filter_coefficients(c, > c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW)) != 0) > > + goto nomem; > > This is confusing as ret is never used, also error codes are <0 > > thx > > [...] > -- > Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB > > Those who are best at talking, realize last or never when they are wrong. > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 3a78d95ba6..26d28d42e6 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -1144,5 +1144,5 @@ void ff_sws_slice_worker(void *priv, int jobnr, int threadnr, #define MAX_LINES_AHEAD 4 //shuffle filter and filterPos for hyScale and hcScale filters in avx2 -void ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int filterSize, int16_t *filter, int dstW); +int ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int filterSize, int16_t *filter, int dstW); #endif /* SWSCALE_SWSCALE_INTERNAL_H */ diff --git a/libswscale/utils.c b/libswscale/utils.c index c5ea8853d5..52f07e1661 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -278,39 +278,47 @@ static const FormatEntry format_entries[] = { [AV_PIX_FMT_P416LE] = { 1, 1 }, }; -void ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int filterSize, int16_t *filter, int dstW){ +int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, + int filterSize, int16_t *filter, + int dstW) +{ #if ARCH_X86_64 - int i, j, k, l; + int i = 0, j = 0, k = 0; int cpu_flags = av_get_cpu_flags(); + if (!filter || dstW % 16 != 0) return 0; if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_SLOW_GATHER)) { - if ((c->srcBpc == 8) && (c->dstBpc <= 14)){ - if (dstW % 16 == 0){ - if (filter != NULL){ - for (i = 0; i < dstW; i += 8){ - FFSWAP(int, filterPos[i + 2], filterPos[i+4]); - FFSWAP(int, filterPos[i + 3], filterPos[i+5]); - } - if (filterSize > 4){ - int16_t *tmp2 = av_malloc(dstW * filterSize * 2); - memcpy(tmp2, filter, dstW * filterSize * 2); - for (i = 0; i < dstW; i += 16){//pixel - for (k = 0; k < filterSize / 4; ++k){//fcoeff - for (j = 0; j < 16; ++j){//inner pixel - for (l = 0; l < 4; ++l){//coeff - int from = i * filterSize + j * filterSize + k * 4 + l; - int to = (i) * filterSize + j * 4 + l + k * 64; - filter[to] = tmp2[from]; - } - } - } - } - av_free(tmp2); - } - } - } + if ((c->srcBpc == 8) && (c->dstBpc <= 14)) { + int16_t *filterCopy = NULL; + if (filterSize > 4) { + if (!FF_ALLOC_TYPED_ARRAY(filterCopy, dstW * filterSize)) + return AVERROR(ENOMEM); + memcpy(filterCopy, filter, dstW * filterSize * sizeof(int16_t)); + } + // Do not swap filterPos for pixels which won't be processed by + // the main loop. + for (i = 0; i + 8 <= dstW; i += 8) { + FFSWAP(int, filterPos[i + 2], filterPos[i + 4]); + FFSWAP(int, filterPos[i + 3], filterPos[i + 5]); + } + if (filterSize > 4) { + // 16 pixels are processed at a time. + for (i = 0; i + 16 <= dstW; i += 16) { + // 4 filter coeffs are processed at a time. + for (k = 0; k + 4 <= filterSize; k += 4) { + for (j = 0; j < 16; ++j) { + int from = (i + j) * filterSize + k; + int to = i * filterSize + j * 4 + k * 16; + memcpy(&filter[to], &filterCopy[from], 4 * sizeof(int16_t)); + } + } + } + } + if (filterCopy) + av_free(filterCopy); } } #endif + return 0; } int sws_isSupportedInput(enum AVPixelFormat pix_fmt) @@ -1836,7 +1844,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, get_local_pos(c, 0, 0, 0), get_local_pos(c, 0, 0, 0))) < 0) goto fail; - ff_shuffle_filter_coefficients(c, c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW); + if ((ret = ff_shuffle_filter_coefficients(c, c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW)) != 0) + goto nomem; if ((ret = initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc, c->chrSrcW, c->chrDstW, filterAlign, 1 << 14, @@ -1846,7 +1855,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, get_local_pos(c, c->chrSrcHSubSample, c->src_h_chr_pos, 0), get_local_pos(c, c->chrDstHSubSample, c->dst_h_chr_pos, 0))) < 0) goto fail; - ff_shuffle_filter_coefficients(c, c->hChrFilterPos, c->hChrFilterSize, c->hChrFilter, c->chrDstW); + if ((ret = ff_shuffle_filter_coefficients(c, c->hChrFilterPos, c->hChrFilterSize, c->hChrFilter, c->chrDstW)) != 0) + goto nomem; } } // initialize horizontal stuff