diff mbox series

[FFmpeg-devel,1/4] libswscale: Re-factor ff_shuffle_filter_coefficients.

Message ID 20220110145836.3449558-1-alankelly@google.com
State New
Headers show
Series [FFmpeg-devel,1/4] libswscale: Re-factor ff_shuffle_filter_coefficients. | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished
andriy/make_armv7_RPi4 success Make finished
andriy/make_fate_armv7_RPi4 success Make fate finished

Commit Message

Alan Kelly Jan. 10, 2022, 2:58 p.m. UTC
Make the code more readable, follow the style guide and propagate memory
allocation errors.
---
 libswscale/swscale_internal.h |  2 +-
 libswscale/utils.c            | 68 ++++++++++++++++++++---------------
 2 files changed, 40 insertions(+), 30 deletions(-)

Comments

Alan Kelly Feb. 2, 2022, 12:21 p.m. UTC | #1
Hi,

Is anybody interested in this patch set?

Thanks!

On Mon, Jan 10, 2022, 15:58 Alan Kelly <alankelly@google.com> wrote:

> Make the code more readable, follow the style guide and propagate memory
> allocation errors.
> ---
>  libswscale/swscale_internal.h |  2 +-
>  libswscale/utils.c            | 68 ++++++++++++++++++++---------------
>  2 files changed, 40 insertions(+), 30 deletions(-)
>
> diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
> index 3a78d95ba6..26d28d42e6 100644
> --- a/libswscale/swscale_internal.h
> +++ b/libswscale/swscale_internal.h
> @@ -1144,5 +1144,5 @@ void ff_sws_slice_worker(void *priv, int jobnr, int
> threadnr,
>  #define MAX_LINES_AHEAD 4
>
>  //shuffle filter and filterPos for hyScale and hcScale filters in avx2
> -void ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int
> filterSize, int16_t *filter, int dstW);
> +int ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int
> filterSize, int16_t *filter, int dstW);
>  #endif /* SWSCALE_SWSCALE_INTERNAL_H */
> diff --git a/libswscale/utils.c b/libswscale/utils.c
> index c5ea8853d5..52f07e1661 100644
> --- a/libswscale/utils.c
> +++ b/libswscale/utils.c
> @@ -278,39 +278,47 @@ static const FormatEntry format_entries[] = {
>      [AV_PIX_FMT_P416LE]      = { 1, 1 },
>  };
>
> -void ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int
> filterSize, int16_t *filter, int dstW){
> +int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos,
> +                                   int filterSize, int16_t *filter,
> +                                   int dstW)
> +{
>  #if ARCH_X86_64
> -    int i, j, k, l;
> +    int i = 0, j = 0, k = 0;
>      int cpu_flags = av_get_cpu_flags();
> +    if (!filter || dstW % 16 != 0) return 0;
>      if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags &
> AV_CPU_FLAG_SLOW_GATHER)) {
> -        if ((c->srcBpc == 8) && (c->dstBpc <= 14)){
> -            if (dstW % 16 == 0){
> -                if (filter != NULL){
> -                    for (i = 0; i < dstW; i += 8){
> -                        FFSWAP(int, filterPos[i + 2], filterPos[i+4]);
> -                        FFSWAP(int, filterPos[i + 3], filterPos[i+5]);
> -                    }
> -                    if (filterSize > 4){
> -                        int16_t *tmp2 = av_malloc(dstW * filterSize * 2);
> -                        memcpy(tmp2, filter, dstW * filterSize * 2);
> -                        for (i = 0; i < dstW; i += 16){//pixel
> -                            for (k = 0; k < filterSize / 4; ++k){//fcoeff
> -                                for (j = 0; j < 16; ++j){//inner pixel
> -                                    for (l = 0; l < 4; ++l){//coeff
> -                                        int from = i * filterSize + j *
> filterSize + k * 4 + l;
> -                                        int to = (i) * filterSize + j * 4
> + l + k * 64;
> -                                        filter[to] = tmp2[from];
> -                                    }
> -                                }
> -                            }
> -                        }
> -                        av_free(tmp2);
> -                    }
> -                }
> -            }
> +        if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
> +           int16_t *filterCopy = NULL;
> +           if (filterSize > 4) {
> +               if (!FF_ALLOC_TYPED_ARRAY(filterCopy, dstW * filterSize))
> +                   return AVERROR(ENOMEM);
> +               memcpy(filterCopy, filter, dstW * filterSize *
> sizeof(int16_t));
> +           }
> +           // Do not swap filterPos for pixels which won't be processed by
> +           // the main loop.
> +           for (i = 0; i + 8 <= dstW; i += 8) {
> +               FFSWAP(int, filterPos[i + 2], filterPos[i + 4]);
> +               FFSWAP(int, filterPos[i + 3], filterPos[i + 5]);
> +           }
> +           if (filterSize > 4) {
> +               // 16 pixels are processed at a time.
> +               for (i = 0; i + 16 <= dstW; i += 16) {
> +                   // 4 filter coeffs are processed at a time.
> +                   for (k = 0; k + 4 <= filterSize; k += 4) {
> +                       for (j = 0; j < 16; ++j) {
> +                           int from = (i + j) * filterSize + k;
> +                           int to = i * filterSize + j * 4 + k * 16;
> +                           memcpy(&filter[to], &filterCopy[from], 4 *
> sizeof(int16_t));
> +                       }
> +                   }
> +               }
> +           }
> +           if (filterCopy)
> +               av_free(filterCopy);
>          }
>      }
>  #endif
> +    return 0;
>  }
>
>  int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
> @@ -1836,7 +1844,8 @@ av_cold int sws_init_context(SwsContext *c,
> SwsFilter *srcFilter,
>                             get_local_pos(c, 0, 0, 0),
>                             get_local_pos(c, 0, 0, 0))) < 0)
>                  goto fail;
> -            ff_shuffle_filter_coefficients(c, c->hLumFilterPos,
> c->hLumFilterSize, c->hLumFilter, dstW);
> +            if ((ret = ff_shuffle_filter_coefficients(c,
> c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW)) != 0)
> +                goto nomem;
>              if ((ret = initFilter(&c->hChrFilter, &c->hChrFilterPos,
>                             &c->hChrFilterSize, c->chrXInc,
>                             c->chrSrcW, c->chrDstW, filterAlign, 1 << 14,
> @@ -1846,7 +1855,8 @@ av_cold int sws_init_context(SwsContext *c,
> SwsFilter *srcFilter,
>                             get_local_pos(c, c->chrSrcHSubSample,
> c->src_h_chr_pos, 0),
>                             get_local_pos(c, c->chrDstHSubSample,
> c->dst_h_chr_pos, 0))) < 0)
>                  goto fail;
> -            ff_shuffle_filter_coefficients(c, c->hChrFilterPos,
> c->hChrFilterSize, c->hChrFilter, c->chrDstW);
> +            if ((ret = ff_shuffle_filter_coefficients(c,
> c->hChrFilterPos, c->hChrFilterSize, c->hChrFilter, c->chrDstW)) != 0)
> +                goto nomem;
>          }
>      } // initialize horizontal stuff
>
> --
> 2.34.1.575.g55b058a8bb-goog
>
>
Michael Niedermayer Feb. 3, 2022, 2:11 p.m. UTC | #2
On Mon, Jan 10, 2022 at 03:58:33PM +0100, Alan Kelly wrote:
> Make the code more readable, follow the style guide and propagate memory
> allocation errors.

Cosmetics and bugfixes should not be in the same patch


> ---
>  libswscale/swscale_internal.h |  2 +-
>  libswscale/utils.c            | 68 ++++++++++++++++++++---------------
>  2 files changed, 40 insertions(+), 30 deletions(-)
> 
> diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
> index 3a78d95ba6..26d28d42e6 100644
> --- a/libswscale/swscale_internal.h
> +++ b/libswscale/swscale_internal.h
> @@ -1144,5 +1144,5 @@ void ff_sws_slice_worker(void *priv, int jobnr, int threadnr,
>  #define MAX_LINES_AHEAD 4
>  
>  //shuffle filter and filterPos for hyScale and hcScale filters in avx2
> -void ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int filterSize, int16_t *filter, int dstW);
> +int ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int filterSize, int16_t *filter, int dstW);
>  #endif /* SWSCALE_SWSCALE_INTERNAL_H */
> diff --git a/libswscale/utils.c b/libswscale/utils.c
> index c5ea8853d5..52f07e1661 100644
> --- a/libswscale/utils.c
> +++ b/libswscale/utils.c
> @@ -278,39 +278,47 @@ static const FormatEntry format_entries[] = {
>      [AV_PIX_FMT_P416LE]      = { 1, 1 },
>  };
>  
> -void ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int filterSize, int16_t *filter, int dstW){
> +int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos,
> +                                   int filterSize, int16_t *filter,
> +                                   int dstW)
> +{
>  #if ARCH_X86_64

> -    int i, j, k, l;
> +    int i = 0, j = 0, k = 0;

why?
they are set when used if iam not mistaken


>      int cpu_flags = av_get_cpu_flags();

> +    if (!filter || dstW % 16 != 0) return 0;

please add \n also a comment what the dstW & 16 case exactly does and why


[...]
>  int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
> @@ -1836,7 +1844,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
>                             get_local_pos(c, 0, 0, 0),
>                             get_local_pos(c, 0, 0, 0))) < 0)
>                  goto fail;
> -            ff_shuffle_filter_coefficients(c, c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW);
> +            if ((ret = ff_shuffle_filter_coefficients(c, c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW)) != 0)
> +                goto nomem;

This is confusing as ret is never used, also error codes are <0

thx

[...]
Alan Kelly Feb. 9, 2022, 9:17 a.m. UTC | #3
Hi Michael,

Thanks for your feedback. I have updated the patches and split this patch
into two, one with cosmetic fixes and one propagating the errors. Since
there is now an extra patch in the set and the commit messages have
changed, new threads have been started.

Alan

On Thu, Feb 3, 2022 at 3:11 PM Michael Niedermayer <michael@niedermayer.cc>
wrote:

> On Mon, Jan 10, 2022 at 03:58:33PM +0100, Alan Kelly wrote:
> > Make the code more readable, follow the style guide and propagate memory
> > allocation errors.
>
> Cosmetics and bugfixes should not be in the same patch
>
>
> > ---
> >  libswscale/swscale_internal.h |  2 +-
> >  libswscale/utils.c            | 68 ++++++++++++++++++++---------------
> >  2 files changed, 40 insertions(+), 30 deletions(-)
> >
> > diff --git a/libswscale/swscale_internal.h
> b/libswscale/swscale_internal.h
> > index 3a78d95ba6..26d28d42e6 100644
> > --- a/libswscale/swscale_internal.h
> > +++ b/libswscale/swscale_internal.h
> > @@ -1144,5 +1144,5 @@ void ff_sws_slice_worker(void *priv, int jobnr,
> int threadnr,
> >  #define MAX_LINES_AHEAD 4
> >
> >  //shuffle filter and filterPos for hyScale and hcScale filters in avx2
> > -void ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int
> filterSize, int16_t *filter, int dstW);
> > +int ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int
> filterSize, int16_t *filter, int dstW);
> >  #endif /* SWSCALE_SWSCALE_INTERNAL_H */
> > diff --git a/libswscale/utils.c b/libswscale/utils.c
> > index c5ea8853d5..52f07e1661 100644
> > --- a/libswscale/utils.c
> > +++ b/libswscale/utils.c
> > @@ -278,39 +278,47 @@ static const FormatEntry format_entries[] = {
> >      [AV_PIX_FMT_P416LE]      = { 1, 1 },
> >  };
> >
> > -void ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int
> filterSize, int16_t *filter, int dstW){
> > +int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos,
> > +                                   int filterSize, int16_t *filter,
> > +                                   int dstW)
> > +{
> >  #if ARCH_X86_64
>
> > -    int i, j, k, l;
> > +    int i = 0, j = 0, k = 0;
>
> why?
> they are set when used if iam not mistaken
>
>
> >      int cpu_flags = av_get_cpu_flags();
>
> > +    if (!filter || dstW % 16 != 0) return 0;
>
> please add \n also a comment what the dstW & 16 case exactly does and why
>
>
> [...]
> >  int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
> > @@ -1836,7 +1844,8 @@ av_cold int sws_init_context(SwsContext *c,
> SwsFilter *srcFilter,
> >                             get_local_pos(c, 0, 0, 0),
> >                             get_local_pos(c, 0, 0, 0))) < 0)
> >                  goto fail;
> > -            ff_shuffle_filter_coefficients(c, c->hLumFilterPos,
> c->hLumFilterSize, c->hLumFilter, dstW);
> > +            if ((ret = ff_shuffle_filter_coefficients(c,
> c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW)) != 0)
> > +                goto nomem;
>
> This is confusing as ret is never used, also error codes are <0
>
> thx
>
> [...]
> --
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> Those who are best at talking, realize last or never when they are wrong.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
diff mbox series

Patch

diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 3a78d95ba6..26d28d42e6 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -1144,5 +1144,5 @@  void ff_sws_slice_worker(void *priv, int jobnr, int threadnr,
 #define MAX_LINES_AHEAD 4
 
 //shuffle filter and filterPos for hyScale and hcScale filters in avx2
-void ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int filterSize, int16_t *filter, int dstW);
+int ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int filterSize, int16_t *filter, int dstW);
 #endif /* SWSCALE_SWSCALE_INTERNAL_H */
diff --git a/libswscale/utils.c b/libswscale/utils.c
index c5ea8853d5..52f07e1661 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -278,39 +278,47 @@  static const FormatEntry format_entries[] = {
     [AV_PIX_FMT_P416LE]      = { 1, 1 },
 };
 
-void ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int filterSize, int16_t *filter, int dstW){
+int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos,
+                                   int filterSize, int16_t *filter,
+                                   int dstW)
+{
 #if ARCH_X86_64
-    int i, j, k, l;
+    int i = 0, j = 0, k = 0;
     int cpu_flags = av_get_cpu_flags();
+    if (!filter || dstW % 16 != 0) return 0;
     if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_SLOW_GATHER)) {
-        if ((c->srcBpc == 8) && (c->dstBpc <= 14)){
-            if (dstW % 16 == 0){
-                if (filter != NULL){
-                    for (i = 0; i < dstW; i += 8){
-                        FFSWAP(int, filterPos[i + 2], filterPos[i+4]);
-                        FFSWAP(int, filterPos[i + 3], filterPos[i+5]);
-                    }
-                    if (filterSize > 4){
-                        int16_t *tmp2 = av_malloc(dstW * filterSize * 2);
-                        memcpy(tmp2, filter, dstW * filterSize * 2);
-                        for (i = 0; i < dstW; i += 16){//pixel
-                            for (k = 0; k < filterSize / 4; ++k){//fcoeff
-                                for (j = 0; j < 16; ++j){//inner pixel
-                                    for (l = 0; l < 4; ++l){//coeff
-                                        int from = i * filterSize + j * filterSize + k * 4 + l;
-                                        int to = (i) * filterSize + j * 4 + l + k * 64;
-                                        filter[to] = tmp2[from];
-                                    }
-                                }
-                            }
-                        }
-                        av_free(tmp2);
-                    }
-                }
-            }
+        if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
+           int16_t *filterCopy = NULL;
+           if (filterSize > 4) {
+               if (!FF_ALLOC_TYPED_ARRAY(filterCopy, dstW * filterSize))
+                   return AVERROR(ENOMEM);
+               memcpy(filterCopy, filter, dstW * filterSize * sizeof(int16_t));
+           }
+           // Do not swap filterPos for pixels which won't be processed by
+           // the main loop.
+           for (i = 0; i + 8 <= dstW; i += 8) {
+               FFSWAP(int, filterPos[i + 2], filterPos[i + 4]);
+               FFSWAP(int, filterPos[i + 3], filterPos[i + 5]);
+           }
+           if (filterSize > 4) {
+               // 16 pixels are processed at a time.
+               for (i = 0; i + 16 <= dstW; i += 16) {
+                   // 4 filter coeffs are processed at a time.
+                   for (k = 0; k + 4 <= filterSize; k += 4) {
+                       for (j = 0; j < 16; ++j) {
+                           int from = (i + j) * filterSize + k;
+                           int to = i * filterSize + j * 4 + k * 16;
+                           memcpy(&filter[to], &filterCopy[from], 4 * sizeof(int16_t));
+                       }
+                   }
+               }
+           }
+           if (filterCopy)
+               av_free(filterCopy);
         }
     }
 #endif
+    return 0;
 }
 
 int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
@@ -1836,7 +1844,8 @@  av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
                            get_local_pos(c, 0, 0, 0),
                            get_local_pos(c, 0, 0, 0))) < 0)
                 goto fail;
-            ff_shuffle_filter_coefficients(c, c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW);
+            if ((ret = ff_shuffle_filter_coefficients(c, c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW)) != 0)
+                goto nomem;
             if ((ret = initFilter(&c->hChrFilter, &c->hChrFilterPos,
                            &c->hChrFilterSize, c->chrXInc,
                            c->chrSrcW, c->chrDstW, filterAlign, 1 << 14,
@@ -1846,7 +1855,8 @@  av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
                            get_local_pos(c, c->chrSrcHSubSample, c->src_h_chr_pos, 0),
                            get_local_pos(c, c->chrDstHSubSample, c->dst_h_chr_pos, 0))) < 0)
                 goto fail;
-            ff_shuffle_filter_coefficients(c, c->hChrFilterPos, c->hChrFilterSize, c->hChrFilter, c->chrDstW);
+            if ((ret = ff_shuffle_filter_coefficients(c, c->hChrFilterPos, c->hChrFilterSize, c->hChrFilter, c->chrDstW)) != 0)
+                goto nomem;
         }
     } // initialize horizontal stuff