diff mbox

[FFmpeg-devel] lavfi/vf_scale_cuda: Add format option

Message ID 1558655396-11592-1-git-send-email-joshua.allmann@gmail.com
State New
Headers show

Commit Message

Josh Allmann May 23, 2019, 11:49 p.m. UTC
Makes certain usages of the lavfi API easier.
---
 libavfilter/vf_scale_cuda.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

Comments

Timo Rothenpieler May 24, 2019, 1 p.m. UTC | #1
On 24/05/2019 01:49, Josh Allmann wrote:
> Makes certain usages of the lavfi API easier.
> ---
>   libavfilter/vf_scale_cuda.c | 12 +++++++++++-
>   1 file changed, 11 insertions(+), 1 deletion(-)
> 
> diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c
> index b7cdb81081..6b1ef2bb6f 100644
> --- a/libavfilter/vf_scale_cuda.c
> +++ b/libavfilter/vf_scale_cuda.c
> @@ -81,6 +81,7 @@ typedef struct CUDAScaleContext {
>   
>       char *w_expr;               ///< width  expression string
>       char *h_expr;               ///< height expression string
> +    char *format_str;
>   
>       CUcontext   cu_ctx;
>       CUmodule    cu_module;
> @@ -101,7 +102,15 @@ static av_cold int cudascale_init(AVFilterContext *ctx)
>   {
>       CUDAScaleContext *s = ctx->priv;
>   
> -    s->format = AV_PIX_FMT_NONE;
> +    if (!strcmp(s->format_str, "same")) {
> +        s->format = AV_PIX_FMT_NONE;
> +    } else {
> +        s->format = av_get_pix_fmt(s->format_str);
> +        if (s->format == AV_PIX_FMT_NONE) {
> +            av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str);
> +            return AVERROR(EINVAL);
> +        }
> +    }
>       s->frame = av_frame_alloc();
>       if (!s->frame)
>           return AVERROR(ENOMEM);
> @@ -533,6 +542,7 @@ fail:
>   static const AVOption options[] = {
>       { "w",      "Output video width",  OFFSET(w_expr),     AV_OPT_TYPE_STRING, { .str = "iw"   }, .flags = FLAGS },
>       { "h",      "Output video height", OFFSET(h_expr),     AV_OPT_TYPE_STRING, { .str = "ih"   }, .flags = FLAGS },
> +    { "format", "Output pixel format", OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
>       { NULL },
>   };

I'm not sure what to think about a dummy option like this. It might be 
very confusing for users to see a format option, which only accepts a 
single value, "same", and effectively does nothing.

Not strictly against it, since I can see the convenience it adds when 
building command lines, but I'd like some second opinions on this.
Josh Allmann May 24, 2019, 4:27 p.m. UTC | #2
On Fri, 24 May 2019 at 06:00, Timo Rothenpieler <timo@rothenpieler.org> wrote:
>
> On 24/05/2019 01:49, Josh Allmann wrote:
> > Makes certain usages of the lavfi API easier.
> > ---
> >   libavfilter/vf_scale_cuda.c | 12 +++++++++++-
> >   1 file changed, 11 insertions(+), 1 deletion(-)
> >
> > diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c
> > index b7cdb81081..6b1ef2bb6f 100644
> > --- a/libavfilter/vf_scale_cuda.c
> > +++ b/libavfilter/vf_scale_cuda.c
> > @@ -81,6 +81,7 @@ typedef struct CUDAScaleContext {
> >
> >       char *w_expr;               ///< width  expression string
> >       char *h_expr;               ///< height expression string
> > +    char *format_str;
> >
> >       CUcontext   cu_ctx;
> >       CUmodule    cu_module;
> > @@ -101,7 +102,15 @@ static av_cold int cudascale_init(AVFilterContext *ctx)
> >   {
> >       CUDAScaleContext *s = ctx->priv;
> >
> > -    s->format = AV_PIX_FMT_NONE;
> > +    if (!strcmp(s->format_str, "same")) {
> > +        s->format = AV_PIX_FMT_NONE;
> > +    } else {
> > +        s->format = av_get_pix_fmt(s->format_str);
> > +        if (s->format == AV_PIX_FMT_NONE) {
> > +            av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str);
> > +            return AVERROR(EINVAL);
> > +        }
> > +    }
> >       s->frame = av_frame_alloc();
> >       if (!s->frame)
> >           return AVERROR(ENOMEM);
> > @@ -533,6 +542,7 @@ fail:
> >   static const AVOption options[] = {
> >       { "w",      "Output video width",  OFFSET(w_expr),     AV_OPT_TYPE_STRING, { .str = "iw"   }, .flags = FLAGS },
> >       { "h",      "Output video height", OFFSET(h_expr),     AV_OPT_TYPE_STRING, { .str = "ih"   }, .flags = FLAGS },
> > +    { "format", "Output pixel format", OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
> >       { NULL },
> >   };
>
> I'm not sure what to think about a dummy option like this. It might be
> very confusing for users to see a format option, which only accepts a
> single value, "same", and effectively does nothing.
>

Not sure I understand the issue.  "same" is the default (terminology
borrowed from the scale_npp filter), and it'll assign the format to
whatever is passed in (eg, format=yuv420p assigns that).

>
> Not strictly against it, since I can see the convenience it adds when
> building command lines, but I'd like some second opinions on this.
>

Actually I'm using the API, albeit with some of lavfi conveniences to
parse filter strings. This avoids "wiring in" the output format
manually when crossing the lavfi boundary.

Here's a example that demonstrates the issue via CLI (this may
actually be a bug elsewhere?):

Broken:
ffmpeg -loglevel verbose -hwaccel cuvid -c:v h264_cuvid -i input.ts
-an -lavfi scale_cuda=w=426:h=240,hwdownload,format=yuv420p -c:v
libx264 out.ts

Working:
ffmpeg -loglevel verbose -hwaccel cuvid -c:v h264_cuvid -i input.ts
-an -lavfi scale_cuda=w=426:h=240:format=yuv420p,hwdownload,format=yuv420p
-c:v libx264 out.ts
Timo Rothenpieler May 24, 2019, 4:55 p.m. UTC | #3
On 24.05.2019 18:27, Josh Allmann wrote:
> On Fri, 24 May 2019 at 06:00, Timo Rothenpieler <timo@rothenpieler.org> wrote:
>>
>> On 24/05/2019 01:49, Josh Allmann wrote:
>>> Makes certain usages of the lavfi API easier.
>>> ---
>>>    libavfilter/vf_scale_cuda.c | 12 +++++++++++-
>>>    1 file changed, 11 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c
>>> index b7cdb81081..6b1ef2bb6f 100644
>>> --- a/libavfilter/vf_scale_cuda.c
>>> +++ b/libavfilter/vf_scale_cuda.c
>>> @@ -81,6 +81,7 @@ typedef struct CUDAScaleContext {
>>>
>>>        char *w_expr;               ///< width  expression string
>>>        char *h_expr;               ///< height expression string
>>> +    char *format_str;
>>>
>>>        CUcontext   cu_ctx;
>>>        CUmodule    cu_module;
>>> @@ -101,7 +102,15 @@ static av_cold int cudascale_init(AVFilterContext *ctx)
>>>    {
>>>        CUDAScaleContext *s = ctx->priv;
>>>
>>> -    s->format = AV_PIX_FMT_NONE;
>>> +    if (!strcmp(s->format_str, "same")) {
>>> +        s->format = AV_PIX_FMT_NONE;
>>> +    } else {
>>> +        s->format = av_get_pix_fmt(s->format_str);
>>> +        if (s->format == AV_PIX_FMT_NONE) {
>>> +            av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str);
>>> +            return AVERROR(EINVAL);
>>> +        }
>>> +    }
>>>        s->frame = av_frame_alloc();
>>>        if (!s->frame)
>>>            return AVERROR(ENOMEM);
>>> @@ -533,6 +542,7 @@ fail:
>>>    static const AVOption options[] = {
>>>        { "w",      "Output video width",  OFFSET(w_expr),     AV_OPT_TYPE_STRING, { .str = "iw"   }, .flags = FLAGS },
>>>        { "h",      "Output video height", OFFSET(h_expr),     AV_OPT_TYPE_STRING, { .str = "ih"   }, .flags = FLAGS },
>>> +    { "format", "Output pixel format", OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
>>>        { NULL },
>>>    };
>>
>> I'm not sure what to think about a dummy option like this. It might be
>> very confusing for users to see a format option, which only accepts a
>> single value, "same", and effectively does nothing.
>>
> 
> Not sure I understand the issue.  "same" is the default (terminology
> borrowed from the scale_npp filter), and it'll assign the format to
> whatever is passed in (eg, format=yuv420p assigns that).

Oh, I misread that code as just always throwing an error if it's != "same".

Unfortunately, that option is omitted for a reason.
If you look at scalecuda_resize:
https://git.videolan.org/?p=ffmpeg.git;a=blob;f=libavfilter/vf_scale_cuda.c;h=b7cdb81081ff4a34e7b641c533fc23a5714fed61;hb=HEAD#l380

It has the assumption built into it that the output frame has the same 
format as the input frame. So if you were to set format=nv12 and then 
input a yuv420p frame, this will most likely crash or at least severely 
misbehave.

I would not be opposed to scale_cuda gaining the ability to also change 
frame pix_fmts, we are lacking such a filter at the moment if one 
ignores scale_npp.
But in its current state, it can't do that.

>>
>> Not strictly against it, since I can see the convenience it adds when
>> building command lines, but I'd like some second opinions on this.
>>
> 
> Actually I'm using the API, albeit with some of lavfi conveniences to
> parse filter strings. This avoids "wiring in" the output format
> manually when crossing the lavfi boundary.
> 
> Here's a example that demonstrates the issue via CLI (this may
> actually be a bug elsewhere?):
> 
> Broken:
> ffmpeg -loglevel verbose -hwaccel cuvid -c:v h264_cuvid -i input.ts
> -an -lavfi scale_cuda=w=426:h=240,hwdownload,format=yuv420p -c:v
> libx264 out.ts
> 
> Working:
> ffmpeg -loglevel verbose -hwaccel cuvid -c:v h264_cuvid -i input.ts
> -an -lavfi scale_cuda=w=426:h=240:format=yuv420p,hwdownload,format=yuv420p
> -c:v libx264 out.ts

Is this actually working in a sense where the result looks sensible?
Cause with how the code currently is, scale_cuda with format set to 
yuv420p and getting nv12 as input from h264_cuvid will produce a frame 
labeled as yuv420p but containing nv12 data.
Josh Allmann May 24, 2019, 5:44 p.m. UTC | #4
On Fri, 24 May 2019 at 09:55, Timo Rothenpieler <timo@rothenpieler.org> wrote:
>
> On 24.05.2019 18:27, Josh Allmann wrote:
> > On Fri, 24 May 2019 at 06:00, Timo Rothenpieler <timo@rothenpieler.org> wrote:
> >>
> >> On 24/05/2019 01:49, Josh Allmann wrote:
> >>> Makes certain usages of the lavfi API easier.
> >>> ---
> >>>    libavfilter/vf_scale_cuda.c | 12 +++++++++++-
> >>>    1 file changed, 11 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c
> >>> index b7cdb81081..6b1ef2bb6f 100644
> >>> --- a/libavfilter/vf_scale_cuda.c
> >>> +++ b/libavfilter/vf_scale_cuda.c
> >>> @@ -81,6 +81,7 @@ typedef struct CUDAScaleContext {
> >>>
> >>>        char *w_expr;               ///< width  expression string
> >>>        char *h_expr;               ///< height expression string
> >>> +    char *format_str;
> >>>
> >>>        CUcontext   cu_ctx;
> >>>        CUmodule    cu_module;
> >>> @@ -101,7 +102,15 @@ static av_cold int cudascale_init(AVFilterContext *ctx)
> >>>    {
> >>>        CUDAScaleContext *s = ctx->priv;
> >>>
> >>> -    s->format = AV_PIX_FMT_NONE;
> >>> +    if (!strcmp(s->format_str, "same")) {
> >>> +        s->format = AV_PIX_FMT_NONE;
> >>> +    } else {
> >>> +        s->format = av_get_pix_fmt(s->format_str);
> >>> +        if (s->format == AV_PIX_FMT_NONE) {
> >>> +            av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str);
> >>> +            return AVERROR(EINVAL);
> >>> +        }
> >>> +    }
> >>>        s->frame = av_frame_alloc();
> >>>        if (!s->frame)
> >>>            return AVERROR(ENOMEM);
> >>> @@ -533,6 +542,7 @@ fail:
> >>>    static const AVOption options[] = {
> >>>        { "w",      "Output video width",  OFFSET(w_expr),     AV_OPT_TYPE_STRING, { .str = "iw"   }, .flags = FLAGS },
> >>>        { "h",      "Output video height", OFFSET(h_expr),     AV_OPT_TYPE_STRING, { .str = "ih"   }, .flags = FLAGS },
> >>> +    { "format", "Output pixel format", OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
> >>>        { NULL },
> >>>    };
> >>
> >> I'm not sure what to think about a dummy option like this. It might be
> >> very confusing for users to see a format option, which only accepts a
> >> single value, "same", and effectively does nothing.
> >>
> >
> > Not sure I understand the issue.  "same" is the default (terminology
> > borrowed from the scale_npp filter), and it'll assign the format to
> > whatever is passed in (eg, format=yuv420p assigns that).
>
> Oh, I misread that code as just always throwing an error if it's != "same".
>
> Unfortunately, that option is omitted for a reason.
> If you look at scalecuda_resize:
> https://git.videolan.org/?p=ffmpeg.git;a=blob;f=libavfilter/vf_scale_cuda.c;h=b7cdb81081ff4a34e7b641c533fc23a5714fed61;hb=HEAD#l380
>
> It has the assumption built into it that the output frame has the same
> format as the input frame. So if you were to set format=nv12 and then
> input a yuv420p frame, this will most likely crash or at least severely
> misbehave.
>
> I would not be opposed to scale_cuda gaining the ability to also change
> frame pix_fmts, we are lacking such a filter at the moment if one
> ignores scale_npp.
> But in its current state, it can't do that.
>

Ah! Makes sense now - thanks for the explanation.

> >>
> >> Not strictly against it, since I can see the convenience it adds when
> >> building command lines, but I'd like some second opinions on this.
> >>
> >
> > Actually I'm using the API, albeit with some of lavfi conveniences to
> > parse filter strings. This avoids "wiring in" the output format
> > manually when crossing the lavfi boundary.
> >
> > Here's a example that demonstrates the issue via CLI (this may
> > actually be a bug elsewhere?):
> >
> > Broken:
> > ffmpeg -loglevel verbose -hwaccel cuvid -c:v h264_cuvid -i input.ts
> > -an -lavfi scale_cuda=w=426:h=240,hwdownload,format=yuv420p -c:v
> > libx264 out.ts
> >
> > Working:
> > ffmpeg -loglevel verbose -hwaccel cuvid -c:v h264_cuvid -i input.ts
> > -an -lavfi scale_cuda=w=426:h=240:format=yuv420p,hwdownload,format=yuv420p
> > -c:v libx264 out.ts
>
> Is this actually working in a sense where the result looks sensible?
> Cause with how the code currently is, scale_cuda with format set to
> yuv420p and getting nv12 as input from h264_cuvid will produce a frame
> labeled as yuv420p but containing nv12 data.
>

You are correct - I didn't look at the output here.
diff mbox

Patch

diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c
index b7cdb81081..6b1ef2bb6f 100644
--- a/libavfilter/vf_scale_cuda.c
+++ b/libavfilter/vf_scale_cuda.c
@@ -81,6 +81,7 @@  typedef struct CUDAScaleContext {
 
     char *w_expr;               ///< width  expression string
     char *h_expr;               ///< height expression string
+    char *format_str;
 
     CUcontext   cu_ctx;
     CUmodule    cu_module;
@@ -101,7 +102,15 @@  static av_cold int cudascale_init(AVFilterContext *ctx)
 {
     CUDAScaleContext *s = ctx->priv;
 
-    s->format = AV_PIX_FMT_NONE;
+    if (!strcmp(s->format_str, "same")) {
+        s->format = AV_PIX_FMT_NONE;
+    } else {
+        s->format = av_get_pix_fmt(s->format_str);
+        if (s->format == AV_PIX_FMT_NONE) {
+            av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str);
+            return AVERROR(EINVAL);
+        }
+    }
     s->frame = av_frame_alloc();
     if (!s->frame)
         return AVERROR(ENOMEM);
@@ -533,6 +542,7 @@  fail:
 static const AVOption options[] = {
     { "w",      "Output video width",  OFFSET(w_expr),     AV_OPT_TYPE_STRING, { .str = "iw"   }, .flags = FLAGS },
     { "h",      "Output video height", OFFSET(h_expr),     AV_OPT_TYPE_STRING, { .str = "ih"   }, .flags = FLAGS },
+    { "format", "Output pixel format", OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
     { NULL },
 };