diff mbox series

[FFmpeg-devel] avfilter: add scale2ref_npp video filter

Message ID BN9PR12MB527441E86A16A193AA632004D2D99@BN9PR12MB5274.namprd12.prod.outlook.com
State New
Headers show
Series [FFmpeg-devel] avfilter: add scale2ref_npp video filter
Related show

Commit Message

Roman Arzumanyan Sept. 13, 2021, 9:07 a.m. UTC
This patch adds scale2ref_npp video filter which is similar to scale2ref, but accelerated by NPP. CLI sample:


./ffmpeg \

  -hwaccel cuda -hwaccel_output_format cuda \

  -i ./bbb_sunflower_1080p_30fps_normal.mp4 \

  -i ./920px-Wilber-huge-alpha.png \

  -filter_complex "[0:v]scale_npp=format=yuv420p[v0];[1:v]hwupload_cuda[v1];[v1][v0]scale2ref_npp=w=oh*mdar:h=ih/4[foreg][backg];[backg][foreg]overlay_cuda=x=(main_w-overlay_w),scale_npp=w=1280:720[out]" \

  -map "[out]" -c:v h264_nvenc -y ./output_overlay.mp4
Subject: [PATCH] scale2ref_npp filter added

---
 libavfilter/allfilters.c   |   1 +
 libavfilter/vf_scale_npp.c | 531 +++++++++++++++++++++++++++++++++++--
 2 files changed, 509 insertions(+), 23 deletions(-)
diff mbox series

Patch

diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 0c6b2347c8..6a8ae5a99e 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -389,6 +389,7 @@  extern const AVFilter ff_vf_scale_qsv;
 extern const AVFilter ff_vf_scale_vaapi;
 extern const AVFilter ff_vf_scale_vulkan;
 extern const AVFilter ff_vf_scale2ref;
+extern const AVFilter ff_vf_scale2ref_npp;
 extern const AVFilter ff_vf_scdet;
 extern const AVFilter ff_vf_scroll;
 extern const AVFilter ff_vf_segment;
diff --git a/libavfilter/vf_scale_npp.c b/libavfilter/vf_scale_npp.c
index 3e25c2c95f..9fa04b40e6 100644
--- a/libavfilter/vf_scale_npp.c
+++ b/libavfilter/vf_scale_npp.c
@@ -32,7 +32,10 @@ 
 #include "libavutil/cuda_check.h"
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/eval.h"
 #include "libavutil/pixdesc.h"
+#include "libswscale/swscale.h"
 
 #include "avfilter.h"
 #include "formats.h"
@@ -44,12 +47,13 @@ 
 
 static const enum AVPixelFormat supported_formats[] = {
     AV_PIX_FMT_YUV420P,
+    AV_PIX_FMT_YUVA420P,
     AV_PIX_FMT_NV12,
     AV_PIX_FMT_YUV444P,
 };
 
-static const enum AVPixelFormat deinterleaved_formats[][2] = {
-    { AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P },
+static const enum AVPixelFormat deinterleaved_formats[][3] = {
+    { AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P,  AV_PIX_FMT_YUVA420P },
 };
 
 enum ScaleStage {
@@ -67,12 +71,74 @@  typedef struct NPPScaleStageContext {
     struct {
         int width;
         int height;
-    } planes_in[3], planes_out[3];
+    } planes_in[4], planes_out[4];
 
     AVBufferRef *frames_ctx;
     AVFrame     *frame;
 } NPPScaleStageContext;
 
+static const char *const var_names[] = {
+    "in_w",   "iw",
+    "in_h",   "ih",
+    "out_w",  "ow",
+    "out_h",  "oh",
+    "a",
+    "sar",
+    "dar",
+    "hsub",
+    "vsub",
+    "ohsub",
+    "ovsub",
+    "n",
+    "t",
+    "pos",
+    "main_w",
+    "main_h",
+    "main_a",
+    "main_sar",
+    "main_dar", "mdar",
+    "main_hsub",
+    "main_vsub",
+    "main_n",
+    "main_t",
+    "main_pos",
+    NULL
+};
+
+enum var_name {
+    VAR_IN_W,   VAR_IW,
+    VAR_IN_H,   VAR_IH,
+    VAR_OUT_W,  VAR_OW,
+    VAR_OUT_H,  VAR_OH,
+    VAR_A,
+    VAR_SAR,
+    VAR_DAR,
+    VAR_HSUB,
+    VAR_VSUB,
+    VAR_OHSUB,
+    VAR_OVSUB,
+    VAR_N,
+    VAR_T,
+    VAR_POS,
+    VAR_S2R_MAIN_W,
+    VAR_S2R_MAIN_H,
+    VAR_S2R_MAIN_A,
+    VAR_S2R_MAIN_SAR,
+    VAR_S2R_MAIN_DAR, VAR_S2R_MDAR,
+    VAR_S2R_MAIN_HSUB,
+    VAR_S2R_MAIN_VSUB,
+    VAR_S2R_MAIN_N,
+    VAR_S2R_MAIN_T,
+    VAR_S2R_MAIN_POS,
+    VARS_NB
+};
+
+enum EvalMode {
+    EVAL_MODE_INIT,
+    EVAL_MODE_FRAME,
+    EVAL_MODE_NB
+};
+
 typedef struct NPPScaleContext {
     const AVClass *class;
 
@@ -102,8 +168,33 @@  typedef struct NPPScaleContext {
     int force_divisible_by;
 
     int interp_algo;
+    struct SwsContext *sws;
+    struct SwsContext *isws[2];
+    AVDictionary *opts;
+
+    char *size_str;
+    char *flags_str;
+
+    int interlaced;
+
+    int in_range;
+    int out_range;
+
+    AVExpr *w_pexpr;
+    AVExpr *h_pexpr;
+    
+    double var_values[VARS_NB];
+    double param[2]; 
+    
+    int eval_mode;
+
+    unsigned int flags;
 } NPPScaleContext;
 
+const AVFilter ff_vf_scale2ref_npp;
+
+static int config_props(AVFilterLink *outlink);
+
 static int nppscale_init(AVFilterContext *ctx)
 {
     NPPScaleContext *s = ctx->priv;
@@ -131,6 +222,256 @@  static int nppscale_init(AVFilterContext *ctx)
     return 0;
 }
 
+static int check_exprs(AVFilterContext *ctx)
+{
+    NPPScaleContext *scale = ctx->priv;
+    unsigned vars_w[VARS_NB] = { 0 }, vars_h[VARS_NB] = { 0 };
+
+    if (!scale->w_pexpr && !scale->h_pexpr)
+        return AVERROR(EINVAL);
+
+    if (scale->w_pexpr)
+        av_expr_count_vars(scale->w_pexpr, vars_w, VARS_NB);
+    if (scale->h_pexpr)
+        av_expr_count_vars(scale->h_pexpr, vars_h, VARS_NB);
+
+    if (vars_w[VAR_OUT_W] || vars_w[VAR_OW]) {
+        av_log(ctx, AV_LOG_ERROR, "Width expression cannot be self-referencing: '%s'.\n", scale->w_expr);
+        return AVERROR(EINVAL);
+    }
+
+    if (vars_h[VAR_OUT_H] || vars_h[VAR_OH]) {
+        av_log(ctx, AV_LOG_ERROR, "Height expression cannot be self-referencing: '%s'.\n", scale->h_expr);
+        return AVERROR(EINVAL);
+    }
+
+    if ((vars_w[VAR_OUT_H] || vars_w[VAR_OH]) &&
+        (vars_h[VAR_OUT_W] || vars_h[VAR_OW])) {
+        av_log(ctx, AV_LOG_WARNING, "Circular references detected for width '%s' and height '%s' - possibly invalid.\n", scale->w_expr, scale->h_expr);
+    }
+
+    if (ctx->filter != &ff_vf_scale2ref_npp &&
+        (vars_w[VAR_S2R_MAIN_W]    || vars_h[VAR_S2R_MAIN_W]    ||
+         vars_w[VAR_S2R_MAIN_H]    || vars_h[VAR_S2R_MAIN_H]    ||
+         vars_w[VAR_S2R_MAIN_A]    || vars_h[VAR_S2R_MAIN_A]    ||
+         vars_w[VAR_S2R_MAIN_SAR]  || vars_h[VAR_S2R_MAIN_SAR]  ||
+         vars_w[VAR_S2R_MAIN_DAR]  || vars_h[VAR_S2R_MAIN_DAR]  ||
+         vars_w[VAR_S2R_MDAR]      || vars_h[VAR_S2R_MDAR]      ||
+         vars_w[VAR_S2R_MAIN_HSUB] || vars_h[VAR_S2R_MAIN_HSUB] ||
+         vars_w[VAR_S2R_MAIN_VSUB] || vars_h[VAR_S2R_MAIN_VSUB] ||
+         vars_w[VAR_S2R_MAIN_N]    || vars_h[VAR_S2R_MAIN_N]    ||
+         vars_w[VAR_S2R_MAIN_T]    || vars_h[VAR_S2R_MAIN_T]    ||
+         vars_w[VAR_S2R_MAIN_POS]  || vars_h[VAR_S2R_MAIN_POS]) ) {
+        av_log(ctx, AV_LOG_ERROR, "Expressions with scale2ref_npp variables are not valid in scale_npp filter.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (scale->eval_mode == EVAL_MODE_INIT &&
+        (vars_w[VAR_N]            || vars_h[VAR_N]           ||
+         vars_w[VAR_T]            || vars_h[VAR_T]           ||
+         vars_w[VAR_POS]          || vars_h[VAR_POS]         ||
+         vars_w[VAR_S2R_MAIN_N]   || vars_h[VAR_S2R_MAIN_N]  ||
+         vars_w[VAR_S2R_MAIN_T]   || vars_h[VAR_S2R_MAIN_T]  ||
+         vars_w[VAR_S2R_MAIN_POS] || vars_h[VAR_S2R_MAIN_POS]) ) {
+        av_log(ctx, AV_LOG_ERROR, "Expressions with frame variables 'n', 't', 'pos' are not valid in init eval_mode.\n");
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
+static int scale_parse_expr(AVFilterContext *ctx, char *str_expr, AVExpr **pexpr_ptr, const char *var, const char *args)
+{
+    NPPScaleContext *scale = ctx->priv;
+    int ret, is_inited = 0;
+    char *old_str_expr = NULL;
+    AVExpr *old_pexpr = NULL;
+
+    if (str_expr) {
+        old_str_expr = av_strdup(str_expr);
+        if (!old_str_expr)
+            return AVERROR(ENOMEM);
+        av_opt_set(scale, var, args, 0);
+    }
+
+    if (*pexpr_ptr) {
+        old_pexpr = *pexpr_ptr;
+        *pexpr_ptr = NULL;
+        is_inited = 1;
+    }
+
+    ret = av_expr_parse(pexpr_ptr, args, var_names,
+                        NULL, NULL, NULL, NULL, 0, ctx);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Cannot parse expression for %s: '%s'\n", var, args);
+        goto revert;
+    }
+
+    ret = check_exprs(ctx);
+    if (ret < 0)
+        goto revert;
+
+    if (is_inited && (ret = config_props(ctx->outputs[0])) < 0)
+        goto revert;
+
+    av_expr_free(old_pexpr);
+    old_pexpr = NULL;
+    av_freep(&old_str_expr);
+
+    return 0;
+
+revert:
+    av_expr_free(*pexpr_ptr);
+    *pexpr_ptr = NULL;
+    if (old_str_expr) {
+        av_opt_set(scale, var, old_str_expr, 0);
+        av_free(old_str_expr);
+    }
+    if (old_pexpr)
+        *pexpr_ptr = old_pexpr;
+
+    return ret;
+}
+
+static av_cold int init_dict(AVFilterContext *ctx, AVDictionary **opts)
+{
+    NPPScaleContext *scale = ctx->priv;
+    int ret;
+
+    ret = nppscale_init(ctx);
+    if (0 != ret) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to initialize filter.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (scale->size_str && (scale->w_expr || scale->h_expr)) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Size and width/height expressions cannot be set at the same time.\n");
+            return AVERROR(EINVAL);
+    }
+
+    if (scale->w_expr && !scale->h_expr)
+        FFSWAP(char *, scale->w_expr, scale->size_str);
+
+    if (scale->size_str) {
+        char buf[32];
+        if ((ret = av_parse_video_size(&scale->w, &scale->h, scale->size_str)) < 0) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Invalid size '%s'\n", scale->size_str);
+            return ret;
+        }
+        snprintf(buf, sizeof(buf)-1, "%d", scale->w);
+        av_opt_set(scale, "w", buf, 0);
+        snprintf(buf, sizeof(buf)-1, "%d", scale->h);
+        av_opt_set(scale, "h", buf, 0);
+    }
+    if (!scale->w_expr)
+        av_opt_set(scale, "w", "iw", 0);
+    if (!scale->h_expr)
+        av_opt_set(scale, "h", "ih", 0);
+
+    ret = scale_parse_expr(ctx, NULL, &scale->w_pexpr, "width", scale->w_expr);
+    if (ret < 0)
+        return ret;
+
+    ret = scale_parse_expr(ctx, NULL, &scale->h_pexpr, "height", scale->h_expr);
+    if (ret < 0)
+        return ret;
+
+    av_log(ctx, AV_LOG_VERBOSE, "w:%s h:%s flags:'%s' interl:%d\n",
+           scale->w_expr, scale->h_expr, (char *)av_x_if_null(scale->flags_str, ""), scale->interlaced);
+
+    scale->flags = 0;
+
+    if (scale->flags_str) {
+        const AVClass *class = sws_get_class();
+        const AVOption    *o = av_opt_find(&class, "sws_flags", NULL, 0,
+                                           AV_OPT_SEARCH_FAKE_OBJ);
+        int ret = av_opt_eval_flags(&class, o, scale->flags_str, &scale->flags);
+        if (ret < 0)
+            return ret;
+    }
+    scale->opts = *opts;
+    *opts = NULL;
+
+    return 0;
+}
+
+static int scale_eval_dimensions(AVFilterContext *ctx)
+{
+    NPPScaleContext *scale = ctx->priv;
+    const char scale2ref = ctx->filter == &ff_vf_scale2ref_npp;
+    const AVFilterLink *inlink = scale2ref ? ctx->inputs[1] : ctx->inputs[0];
+    const AVFilterLink *outlink = ctx->outputs[0];
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    const AVPixFmtDescriptor *out_desc = av_pix_fmt_desc_get(outlink->format);
+    char *expr;
+    int eval_w, eval_h;
+    int ret;
+    double res;
+    const AVPixFmtDescriptor *main_desc;
+    const AVFilterLink *main_link;
+
+    if (scale2ref) {
+        main_link = ctx->inputs[0];
+        main_desc = av_pix_fmt_desc_get(main_link->format);
+    }
+
+    scale->var_values[VAR_IN_W]  = scale->var_values[VAR_IW] = inlink->w;
+    scale->var_values[VAR_IN_H]  = scale->var_values[VAR_IH] = inlink->h;
+    scale->var_values[VAR_OUT_W] = scale->var_values[VAR_OW] = NAN;
+    scale->var_values[VAR_OUT_H] = scale->var_values[VAR_OH] = NAN;
+    scale->var_values[VAR_A]     = (double) inlink->w / inlink->h;
+    scale->var_values[VAR_SAR]   = inlink->sample_aspect_ratio.num ?
+        (double) inlink->sample_aspect_ratio.num / inlink->sample_aspect_ratio.den : 1;
+    scale->var_values[VAR_DAR]   = scale->var_values[VAR_A] * scale->var_values[VAR_SAR];
+    scale->var_values[VAR_HSUB]  = 1 << desc->log2_chroma_w;
+    scale->var_values[VAR_VSUB]  = 1 << desc->log2_chroma_h;
+    scale->var_values[VAR_OHSUB] = 1 << out_desc->log2_chroma_w;
+    scale->var_values[VAR_OVSUB] = 1 << out_desc->log2_chroma_h;
+
+    if (scale2ref) {
+        scale->var_values[VAR_S2R_MAIN_W] = main_link->w;
+        scale->var_values[VAR_S2R_MAIN_H] = main_link->h;
+        scale->var_values[VAR_S2R_MAIN_A] = (double) main_link->w / main_link->h;
+        scale->var_values[VAR_S2R_MAIN_SAR] = main_link->sample_aspect_ratio.num ?
+            (double) main_link->sample_aspect_ratio.num / main_link->sample_aspect_ratio.den : 1;
+        scale->var_values[VAR_S2R_MAIN_DAR] = scale->var_values[VAR_S2R_MDAR] =
+            scale->var_values[VAR_S2R_MAIN_A] * scale->var_values[VAR_S2R_MAIN_SAR];
+        scale->var_values[VAR_S2R_MAIN_HSUB] = 1 << main_desc->log2_chroma_w;
+        scale->var_values[VAR_S2R_MAIN_VSUB] = 1 << main_desc->log2_chroma_h;
+    }
+
+    res = av_expr_eval(scale->w_pexpr, scale->var_values, NULL);
+    eval_w = scale->var_values[VAR_OUT_W] = scale->var_values[VAR_OW] = (int) res == 0 ? inlink->w : (int) res;
+
+    res = av_expr_eval(scale->h_pexpr, scale->var_values, NULL);
+    if (isnan(res)) {
+        expr = scale->h_expr;
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+    eval_h = scale->var_values[VAR_OUT_H] = scale->var_values[VAR_OH] = (int) res == 0 ? inlink->h : (int) res;
+
+    res = av_expr_eval(scale->w_pexpr, scale->var_values, NULL);
+    if (isnan(res)) {
+        expr = scale->w_expr;
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+    eval_w = scale->var_values[VAR_OUT_W] = scale->var_values[VAR_OW] = (int) res == 0 ? inlink->w : (int) res;
+
+    scale->w = eval_w;
+    scale->h = eval_h;
+
+    return 0;
+
+fail:
+    av_log(ctx, AV_LOG_ERROR,
+           "Error when evaluating the expression '%s'.\n", expr);
+    return ret;
+}
+
 static void nppscale_uninit(AVFilterContext *ctx)
 {
     NPPScaleContext *s = ctx->priv;
@@ -141,6 +482,15 @@  static void nppscale_uninit(AVFilterContext *ctx)
         av_buffer_unref(&s->stages[i].frames_ctx);
     }
     av_frame_free(&s->tmp_frame);
+
+    av_expr_free(s->w_pexpr);
+    av_expr_free(s->h_pexpr);
+    s->w_pexpr = s->h_pexpr = NULL;
+    sws_freeContext(s->sws);
+    sws_freeContext(s->isws[0]);
+    sws_freeContext(s->isws[1]);
+    s->sws = NULL;
+    av_dict_free(&s->opts);    
 }
 
 static int nppscale_query_formats(AVFilterContext *ctx)
@@ -148,7 +498,9 @@  static int nppscale_query_formats(AVFilterContext *ctx)
     static const enum AVPixelFormat pixel_formats[] = {
         AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE,
     };
-    return ff_set_common_formats_from_list(ctx, pixel_formats);
+    AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats);
+
+    return ff_set_common_formats(ctx, pix_fmts);
 }
 
 static int init_stage(NPPScaleStageContext *stage, AVBufferRef *device_ctx)
@@ -172,6 +524,13 @@  static int init_stage(NPPScaleStageContext *stage, AVBufferRef *device_ctx)
         stage->planes_out[i].height = stage->planes_out[0].height >> out_sh;
     }
 
+    if (AV_PIX_FMT_YUVA420P == stage->in_fmt) {
+        stage->planes_in[3].width = stage->planes_in[0].width;
+        stage->planes_in[3].height = stage->planes_in[0].height;
+        stage->planes_out[3].width = stage->planes_out[0].width;
+        stage->planes_out[3].height = stage->planes_out[0].height;
+    }
+
     out_ref = av_hwframe_ctx_alloc(device_ctx);
     if (!out_ref)
         return AVERROR(ENOMEM);
@@ -334,31 +693,36 @@  static int init_processing_chain(AVFilterContext *ctx, int in_width, int in_heig
     return 0;
 }
 
-static int nppscale_config_props(AVFilterLink *outlink)
+static int config_props(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
-    AVFilterLink *inlink = outlink->src->inputs[0];
+    AVFilterLink *inlink0 = outlink->src->inputs[0];
+    AVFilterLink *inlink = (ctx->filter == &ff_vf_scale2ref_npp) ? 
+                           outlink->src->inputs[1] : 
+                           outlink->src->inputs[0];
     NPPScaleContext *s = ctx->priv;
-    int w, h;
     int ret;
 
-    if ((ret = ff_scale_eval_dimensions(s,
-                                        s->w_expr, s->h_expr,
-                                        inlink, outlink,
-                                        &w, &h)) < 0)
+    ret = (ctx->filter == &ff_vf_scale2ref_npp) ?
+        scale_eval_dimensions(ctx):
+        ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink, &s->w, &s->h);
+    
+    if (ret  < 0)
         goto fail;
 
-    ff_scale_adjust_dimensions(inlink, &w, &h,
-                               s->force_original_aspect_ratio, s->force_divisible_by);
+    ff_scale_adjust_dimensions(inlink, &s->w, &s->h,
+                               s->force_original_aspect_ratio,
+                               s->force_divisible_by);
 
-    if (((int64_t)h * inlink->w) > INT_MAX  ||
-        ((int64_t)w * inlink->h) > INT_MAX)
+    if (s->w > INT_MAX || s->h > INT_MAX ||
+        (s->h * inlink->w) > INT_MAX ||
+        (s->w * inlink->h) > INT_MAX)
         av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n");
 
-    outlink->w = w;
-    outlink->h = h;
+    outlink->w = s->w;
+    outlink->h = s->h;
 
-    ret = init_processing_chain(ctx, inlink->w, inlink->h, w, h);
+    ret = init_processing_chain(ctx, inlink0->w, inlink0->h, outlink->w, outlink->h);
     if (ret < 0)
         return ret;
 
@@ -366,8 +730,8 @@  static int nppscale_config_props(AVFilterLink *outlink)
            inlink->w, inlink->h, outlink->w, outlink->h);
 
     if (inlink->sample_aspect_ratio.num)
-        outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w,
-                                                             outlink->w*inlink->h},
+        outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w,
+                                                             outlink->w * inlink->h},
                                                 inlink->sample_aspect_ratio);
     else
         outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
@@ -378,6 +742,23 @@  fail:
     return ret;
 }
 
+static int config_props_ref(AVFilterLink *outlink)
+{
+    AVFilterLink *inlink = outlink->src->inputs[1];
+    AVFilterContext *ctx = outlink->src;
+
+    outlink->w = inlink->w;
+    outlink->h = inlink->h;
+    outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
+    outlink->time_base = inlink->time_base;
+    outlink->frame_rate = inlink->frame_rate;
+
+    // Retain reference to HW context;
+    ctx->outputs[1]->hw_frames_ctx = av_buffer_ref(ctx->inputs[1]->hw_frames_ctx);
+
+    return 0;
+}
+
 static int nppscale_deinterleave(AVFilterContext *ctx, NPPScaleStageContext *stage,
                                  AVFrame *out, AVFrame *in)
 {
@@ -540,6 +921,47 @@  fail:
     return ret;
 }
 
+static int nppscale2ref_filter_frame(AVFilterLink *link, AVFrame *in)
+{
+    NPPScaleContext *scale = link->dst->priv;
+    AVFilterLink *outlink = link->dst->outputs[1];
+    int frame_changed;
+
+    frame_changed = in->width  != link->w ||
+                    in->height != link->h ||
+                    in->format != link->format ||
+                    in->sample_aspect_ratio.den != link->sample_aspect_ratio.den ||
+                    in->sample_aspect_ratio.num != link->sample_aspect_ratio.num;
+
+    if (frame_changed) {
+        link->format = in->format;
+        link->w = in->width;
+        link->h = in->height;
+        link->sample_aspect_ratio.num = in->sample_aspect_ratio.num;
+        link->sample_aspect_ratio.den = in->sample_aspect_ratio.den;
+
+        config_props_ref(outlink);
+    }
+
+    if (scale->eval_mode == EVAL_MODE_FRAME) {
+        scale->var_values[VAR_N] = link->frame_count_out;
+        scale->var_values[VAR_T] = TS2T(in->pts, link->time_base);
+        scale->var_values[VAR_POS] = in->pkt_pos == -1 ? NAN : in->pkt_pos;
+    }
+
+    return ff_filter_frame(outlink, in);
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    return ff_request_frame(outlink->src->inputs[0]);
+}
+
+static int request_frame_ref(AVFilterLink *outlink)
+{
+    return ff_request_frame(outlink->src->inputs[1]);
+}
+
 #define OFFSET(x) offsetof(NPPScaleContext, x)
 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
 static const AVOption options[] = {
@@ -576,15 +998,15 @@  static const AVFilterPad nppscale_inputs[] = {
         .name        = "default",
         .type        = AVMEDIA_TYPE_VIDEO,
         .filter_frame = nppscale_filter_frame,
-    },
+    }
 };
 
 static const AVFilterPad nppscale_outputs[] = {
     {
         .name         = "default",
         .type         = AVMEDIA_TYPE_VIDEO,
-        .config_props = nppscale_config_props,
-    },
+        .config_props = config_props,
+    }
 };
 
 const AVFilter ff_vf_scale_npp = {
@@ -604,3 +1026,66 @@  const AVFilter ff_vf_scale_npp = {
 
     .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
 };
+
+static const AVClass *child_class_iterate(void **iter)
+{
+    const AVClass *c = *iter ? NULL : sws_get_class();
+    *iter = (void*)(uintptr_t)c;
+    return c;
+}
+
+static const AVClass nppscale2ref_class = {
+    .class_name = "nppscale2ref",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+    .category   = AV_CLASS_CATEGORY_FILTER,
+    .child_class_iterate = child_class_iterate,    
+};
+
+static const AVFilterPad nppscale2ref_inputs[] = {
+    {
+        .name        = "default",
+        .type        = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = nppscale_filter_frame,
+    },
+    {
+        .name        = "ref",
+        .type        = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = nppscale2ref_filter_frame,
+    }
+};
+
+static const AVFilterPad nppscale2ref_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_props,
+        .request_frame= request_frame,
+    },
+    {
+        .name         = "ref",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_props_ref,
+        .request_frame= request_frame_ref,
+    }
+};
+
+const AVFilter ff_vf_scale2ref_npp = {
+    .name          = "scale2ref_npp",
+    .description   = NULL_IF_CONFIG_SMALL("NVIDIA Performance Primitives video "
+                                          "scaling and format conversion to the "
+                                          "given reference."),
+    .init_dict     = init_dict,
+
+    .uninit        = nppscale_uninit,
+    .query_formats = nppscale_query_formats,
+
+    .priv_size = sizeof(NPPScaleContext),
+    .priv_class = &nppscale2ref_class,
+
+    FILTER_INPUTS(nppscale2ref_inputs),
+    FILTER_OUTPUTS(nppscale2ref_outputs),
+
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};