diff mbox

[FFmpeg-devel,4/6] lavf/vpp: enable video memory accel for transcoding with vpp. lavc/qsv: export symbols "ff_qsv_*" which will be used by vpp. ffmpeg_qsv: set default hwaccel to qsv.

Message ID 1472108839-22207-5-git-send-email-sdk@nablet.com
State Changes Requested
Headers show

Commit Message

Nablet Developer Aug. 25, 2016, 7:07 a.m. UTC
From: ChaoX A Liu <chaox.a.liu@intel.com>

Signed-off-by: ChaoX A Liu <chaox.a.liu@intel.com>
---
 ffmpeg_qsv.c            |  46 ++++++++++++---
 libavcodec/libavcodec.v |   1 +
 libavcodec/qsv.h        |   2 +
 libavfilter/vf_qsvvpp.c | 153 ++++++++++++++++++++++++++++++++++++++++--------
 4 files changed, 172 insertions(+), 30 deletions(-)

Comments

Jean-Baptiste Kempf Aug. 25, 2016, 7:27 a.m. UTC | #1
Idem. Patch does too many things and changes unrelated code.

On 25 Aug, Nablet Developer wrote :
> From: ChaoX A Liu <chaox.a.liu@intel.com>
> 
> Signed-off-by: ChaoX A Liu <chaox.a.liu@intel.com>
> ---
>  ffmpeg_qsv.c            |  46 ++++++++++++---
>  libavcodec/libavcodec.v |   1 +
>  libavcodec/qsv.h        |   2 +
>  libavfilter/vf_qsvvpp.c | 153 ++++++++++++++++++++++++++++++++++++++++--------
>  4 files changed, 172 insertions(+), 30 deletions(-)
> 
> diff --git a/ffmpeg_qsv.c b/ffmpeg_qsv.c
> index 43402d6..aed9240 100644
> --- a/ffmpeg_qsv.c
> +++ b/ffmpeg_qsv.c
> @@ -386,7 +386,7 @@ static mfxStatus frame_alloc(mfxHDL pthis, mfxFrameAllocRequest *request, mfxFra
>      unsigned int va_fourcc = 0;
>      mfxU32 fourcc = request->Info.FourCC;
>      QSVContext *q = pthis;
> -    AVQSVContext *qsv = q->ost->enc_ctx->hwaccel_context;
> +    AVQSVContext *qsv = NULL;
>      mfxU16 numAllocated = 0;
>      bool bCreateSrfSucceeded = false;
>      mfxU32 mfx_fourcc;
> @@ -394,17 +394,40 @@ static mfxStatus frame_alloc(mfxHDL pthis, mfxFrameAllocRequest *request, mfxFra
>      int width32;
>      int height32;
>      void *avctx = NULL;
> +    FilterGraph *fg = q->ost->filter->graph;
>  
> -    av_log(avctx, AV_LOG_INFO, "=========vaapi alloc frame==============\n");
>      if (!request || !response || !request->NumFrameSuggested)
>          return MFX_ERR_MEMORY_ALLOC;
>  
>      memset(response, 0, sizeof(*response));
>      surface_num = request->NumFrameSuggested;
> -    if ((request->Type & MFX_MEMTYPE_EXTERNAL_FRAME) &&
> -            (request->Type & MFX_MEMTYPE_FROM_DECODE))
> -        surface_num += (qsv->nb_encoder_surfaces + qsv->nb_decoder_surfaces);
> +    if (request->Type & MFX_MEMTYPE_FROM_DECODE) {
> +        avctx = input_streams[q->ost->source_index]->dec_ctx;
> +        if (request->Type & MFX_MEMTYPE_EXTERNAL_FRAME) {
> +            AVFilterContext *qsvvpp = avfilter_graph_get_filter(fg->graph, "Parsed_qsvvpp_0");
> +            qsv = input_streams[q->ost->source_index]->dec_ctx->hwaccel_context;
> +            surface_num += qsv->nb_decoder_surfaces;
> +            if (qsvvpp) {
> +                qsv = qsvvpp->hw_device_ctx->data;
> +                surface_num += qsv->nb_vpp_surfaces;
> +            } else {
> +                qsv = q->ost->enc_ctx->hwaccel_context;
> +                surface_num += qsv->nb_encoder_surfaces;
> +            }
> +        }
> +    } else if (request->Type & MFX_MEMTYPE_FROM_VPPOUT) {
> +        AVFilterContext *qsvvpp = avfilter_graph_get_filter(fg->graph, "Parsed_qsvvpp_0");
> +        avctx = qsvvpp;
> +        if (request->Type & MFX_MEMTYPE_EXTERNAL_FRAME) {
> +            qsv = q->ost->enc_ctx->hwaccel_context;
> +            surface_num += qsv->nb_encoder_surfaces;
> +        }
> +    } else if (request->Type & MFX_MEMTYPE_FROM_ENCODE) {
> +        avctx = q->ost->enc_ctx;
> +    } else
> +        av_log(avctx, AV_LOG_WARNING, "FrameAlloc: may get a bug.\n");
>  
> +    av_log(avctx, AV_LOG_INFO, "=========vaapi alloc frame==============\n");
>      av_log(avctx, AV_LOG_INFO, "VAAPI: va_dpy =%p, surface_num=%d, width=%d, height=%d\n",
>              g_session.va_display, surface_num, request->Info.Width, request->Info.Height);
>      av_log(avctx, AV_LOG_INFO, "VAAPI: request->Type=%x\n",request->Type);
> @@ -720,7 +743,7 @@ static int qsv_check_filters(const OutputStream *ost)
>      AVFilterInOut *inputs, *outputs;
>      int ret = 0;
>      int i;
> -    const char *filter_list = "buffer|buffersink|null|format|setpts";
> +    const char *filter_list = "buffer|buffersink|null|format|setpts|qsvvpp";
>  
>      if (!ost->avfilter)
>          return -1;
> @@ -820,6 +843,7 @@ int qsv_transcode_init_vidmem(OutputStream *ost)
>  
>      QSVContext *qsv = NULL;
>      AVQSVContext *enc_hwctx = NULL;
> +    AVQSVContext *vpp_hwctx = NULL;
>  
>      /* check if the encoder supports QSV */
>      if (!ost->enc->pix_fmts)
> @@ -836,6 +860,8 @@ int qsv_transcode_init_vidmem(OutputStream *ost)
>  
>      /* check if the decoder supports QSV and the output only goes to this stream */
>      ist = input_streams[ost->source_index];
> +    if (ist->hwaccel_id == HWACCEL_NONE || ist->hwaccel_id == HWACCEL_AUTO)
> +        ist->hwaccel_id = HWACCEL_QSV;
>      if (ist->nb_filters || ist->hwaccel_id != HWACCEL_QSV ||
>          !ist->dec || !ist->dec->pix_fmts)
>          return 0;
> @@ -854,7 +880,8 @@ int qsv_transcode_init_vidmem(OutputStream *ost)
>  
>      qsv   = av_mallocz(sizeof(*qsv));
>      enc_hwctx = av_qsv_alloc_context();
> -    if (!qsv || !enc_hwctx)
> +    vpp_hwctx = av_qsv_alloc_context();
> +    if (!qsv || !enc_hwctx || !vpp_hwctx)
>          goto fail;
>  
>      err = ff_qsv_init_internal_session(NULL, &g_session);
> @@ -891,6 +918,11 @@ int qsv_transcode_init_vidmem(OutputStream *ost)
>      ist->resample_pix_fmt            = AV_PIX_FMT_QSV;
>      ist->hwaccel_ctx                 = qsv;
>  
> +    vpp_hwctx->session               = qsv->session;
> +    vpp_hwctx->iopattern             = MFX_IOPATTERN_IN_VIDEO_MEMORY;
> +    vpp_hwctx->pFrameAllocator       = &qsv->frame_allocator;
> +    hw_device_ctx = av_buffer_create(vpp_hwctx, sizeof(*vpp_hwctx), av_buffer_default_free, NULL, 0);
> +
>      return 0;
>  
>  fail:
> diff --git a/libavcodec/libavcodec.v b/libavcodec/libavcodec.v
> index 304c2ef..1a4cac8 100644
> --- a/libavcodec/libavcodec.v
> +++ b/libavcodec/libavcodec.v
> @@ -4,6 +4,7 @@ LIBAVCODEC_MAJOR {
>          #deprecated, remove after next bump
>          audio_resample;
>          audio_resample_close;
> +        ff_qsv_*;
>      local:
>          *;
>  };
> diff --git a/libavcodec/qsv.h b/libavcodec/qsv.h
> index ee968d0..3f7b3c8 100644
> --- a/libavcodec/qsv.h
> +++ b/libavcodec/qsv.h
> @@ -96,7 +96,9 @@ typedef struct AVQSVContext {
>       */
>      int opaque_alloc_type;
>  
> +    mfxFrameAllocator *pFrameAllocator;
>      int nb_decoder_surfaces;
> +    int nb_vpp_surfaces;
>      int nb_encoder_surfaces;
>  } AVQSVContext;
>  
> diff --git a/libavfilter/vf_qsvvpp.c b/libavfilter/vf_qsvvpp.c
> index 3a5d4d3..b1245d2 100644
> --- a/libavfilter/vf_qsvvpp.c
> +++ b/libavfilter/vf_qsvvpp.c
> @@ -21,10 +21,80 @@
>   */
>  
>  #include "internal.h"
> +#include <mfx/mfxvideo.h>
> +#include <mfx/mfxplugin.h>
>  #include <float.h>
>  #include "libavutil/parseutils.h"
>  #include "libavutil/timestamp.h"
> +#include "libavutil/avassert.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/time.h"
> +#include "libavutil/avstring.h"
> +#include "libavutil/error.h"
>  #include "libavcodec/qsv.h"
> +#include "libavcodec/qsv_internal.h"
> +
> +// number of video enhancement filters (denoise, procamp, detail, video_analysis, image stab)
> +#define ENH_FILTERS_COUNT           5
> +
> +typedef struct {
> +    const AVClass *class;
> +
> +    AVFilterContext *ctx;
> +
> +    mfxSession session;
> +    QSVSession internal_qs;
> +    int iopattern;
> +
> +    AVRational framerate;                           // target framerate
> +
> +    QSVFrame *in_work_frames;                       // used for video memory
> +    QSVFrame *out_work_frames;                      // used for video memory
> +
> +    mfxFrameSurface1 *in_surface;
> +    mfxFrameSurface1 *out_surface;
> +
> +    mfxFrameAllocRequest req[2];                    // [0] - in, [1] - out
> +    mfxFrameAllocator *pFrameAllocator;
> +    mfxFrameAllocResponse *in_response;
> +    mfxFrameAllocResponse *out_response;
> +
> +    int num_surfaces_in;                            // input surfaces
> +    int num_surfaces_out;                           // output surfaces
> +    int sysmem_cur_out_idx;
> +    int frame_number;
> +    int vpp_ready;
> +    mfxVideoParam *pVppParam;
> +
> +    AVBufferRef *hw_device_ctx;
> +
> +    /* VPP extension */
> +    mfxExtBuffer*       pExtBuf[1+ENH_FILTERS_COUNT];
> +    mfxExtVppAuxData    extVPPAuxData;
> +
> +    /* Video Enhancement Algorithms */
> +    mfxExtVPPDeinterlacing  deinterlace_conf;
> +    mfxExtVPPFrameRateConversion frc_conf;
> +    mfxExtVPPDenoise denoise_conf;
> +    mfxExtVPPDetail detail_conf;
> +    mfxExtVPPComposite composite_conf;
> +
> +    int out_width;
> +    int out_height;
> +    int dpic;                   // destination picture structure
> +                                // -1 = unknown
> +                                // 0 = interlaced top field first
> +                                // 1 = progressive
> +                                // 2 = interlaced bottom field first
> +
> +    int deinterlace;            // deinterlace mode : 0=off, 1=bob, 2=advanced
> +    int denoise;                // Enable Denoise algorithm. Level is the optional value from the interval [0; 100]
> +    int detail;                 // Enable Detail Enhancement algorithm.
> +                                // Level is the optional value from the interval [0; 100]
> +    int async_depth;            // async dept used by encoder
> +    int max_b_frames;           // maxiumum number of b frames used by encoder
> +    int use_frc;                // use framerate conversion
> +} VPPContext;
>  
>  /**
>   * ToDo :
> @@ -120,6 +190,7 @@ static int avpix_fmt_to_mfx_fourcc(int format)
>  static void vidmem_init_surface(VPPContext *vpp)
>  {
>      int i;
> +    AVQSVContext *qsv = (AVQSVContext*)vpp->hw_device_ctx->data;
>  
>      av_log(vpp->ctx, AV_LOG_INFO, "qsvvpp: vidmem_init_surface: ");
>  
> @@ -134,20 +205,17 @@ static void vidmem_init_surface(VPPContext *vpp)
>      /*
>       * We should care about next stage vpp or encoder's input surfaces.
>       */
> -    av_log(vpp->ctx, AV_LOG_INFO, "in.num = %d, out.num = %d, ",
> +    vpp->req[0].NumFrameSuggested = FFMAX(vpp->req[0].NumFrameSuggested, 1);
> +    vpp->req[1].NumFrameSuggested = FFMAX(vpp->req[1].NumFrameSuggested, 1);
> +    av_log(vpp->ctx, AV_LOG_INFO, "in.num = %d, out.num = %d\n",
>              vpp->req[0].NumFrameSuggested, vpp->req[1].NumFrameSuggested);
> -    if (vpp->enc_ctx) {
> -        vpp->req[1].NumFrameSuggested += vpp->enc_ctx->req.NumFrameSuggested;
> -        av_log(vpp->ctx, AV_LOG_INFO, "enc_ctx.num=%d\n", vpp->enc_ctx->req.NumFrameSuggested);
> -    } else {
> -        av_log(vpp->ctx, AV_LOG_INFO, "enc_ctx.num=%d\n", 0);
> -    }
> -
> -    vpp->req[0].NumFrameSuggested  = FFMAX(vpp->req[0].NumFrameSuggested, 1);
> +    qsv->nb_vpp_surfaces = vpp->req[0].NumFrameSuggested;
>  
> -    vpp->num_surfaces_out = FFMAX(vpp->req[1].NumFrameSuggested, 1);
>      vpp->out_response     = av_mallocz(sizeof(*vpp->out_response));
>      VPP_CHECK_POINTER(vpp->out_response);
> +    vpp->pFrameAllocator->Alloc(vpp->pFrameAllocator->pthis, &vpp->req[1], vpp->out_response);
> +
> +    vpp->num_surfaces_out = vpp->out_response->NumFrameActual;
>      vpp->out_surface      = av_mallocz(sizeof(*vpp->out_surface) * vpp->num_surfaces_out);
>      VPP_CHECK_POINTER(vpp->out_surface);
>  
> @@ -333,6 +401,7 @@ static int sysmem_input_get_surface( AVFilterLink *inlink, AVFrame* picref, mfxF
>  
>  static int vidmem_input_get_surface( AVFilterLink *inlink, AVFrame* picref, mfxFrameSurface1 **surface )
>  {
> +    if (picref->format == AV_PIX_FMT_QSV && picref->data[3]) {
>      if (picref->data[3]) {
>          *surface = (mfxFrameSurface1*)picref->data[3];
>      } else {
> @@ -533,11 +602,19 @@ static int initial_vpp( VPPContext *vpp )
>  static int config_vpp(AVFilterLink *inlink, AVFrame * pic)
>  {
>      AVFilterContext *ctx = inlink->dst;
> -    VPPContext *vpp= ctx->priv;
> -    mfxVideoParam mfxParamsVideo;
> -    int           ret;
> +    VPPContext      *vpp = ctx->priv;
> +    mfxVideoParam    mfxParamsVideo;
> +    int              ret;
>  
>      av_log(vpp->ctx, AV_LOG_INFO, "QSVVPP: vpp configuration and call mfxVideoVPP_Init\n");
> +    if (ctx->hw_device_ctx) {
> +        AVQSVContext *qsv    = (AVQSVContext*)ctx->hw_device_ctx->data;
> +        vpp->hw_device_ctx   = av_buffer_ref(ctx->hw_device_ctx);
> +        vpp->pFrameAllocator = qsv->pFrameAllocator;
> +        vpp->iopattern       = qsv->iopattern;
> +        vpp->session         = qsv->session;
> +    }
> +
>      if (!vpp->session) {
>          ret = ff_qsv_init_internal_session(ctx, &vpp->internal_qs);
>          if (ret < 0)
> @@ -573,6 +650,32 @@ static void deconf_vpp(AVFilterContext *ctx)
>      vpp->vpp_ready = 0;
>  }
>  
> +static void vidmem_buffer_free(void *opaque, uint8_t *data)
> +{
> +    //do nothing
> +}
> +
> +static AVFrame *vidmem_buffer_alloc(AVFilterContext *ctx, mfxFrameSurface1 *pSurface)
> +{
> +    AVFrame *frame = av_frame_alloc();
> +    if (!frame)
> +        return NULL;
> +
> +    frame->buf[0] = av_buffer_create((uint8_t*)pSurface, sizeof(pSurface),
> +                        vidmem_buffer_free, NULL, 0);
> +    if (!frame->buf[0]) {
> +        av_frame_free(&frame);
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    frame->data[3] = frame->buf[0]->data;
> +    frame->width   = ctx->outputs[0]->w;
> +    frame->height  = ctx->outputs[0]->h;
> +    frame->format  = ctx->outputs[0]->format;
> +
> +    return frame;
> +}
> +
>  /*
>   * Real filter func.
>   * Push frame into mSDK and pop out filtered frames.
> @@ -619,11 +722,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
>              break;
>          }
>  
> -        /*
> -         * get an AVFrame for output.
> -         * @NOTE: frame buffer is aligned with 128x64 to compat with GPU-copy.
> -         */
> -        out = ff_get_video_buffer(outlink, FFALIGN(vpp->out_width, 128), FFALIGN(vpp->out_height, 64));
> +        if (!vpp->pFrameAllocator) {
> +            /*
> +            * get an AVFrame for output.
> +            * @NOTE: frame buffer is aligned with 128x64 to compat with GPU-copy.
> +            */
> +            out = ff_get_video_buffer(outlink, FFALIGN(vpp->out_width, 128), FFALIGN(vpp->out_height, 64));
> +        } else {
> +            out = vidmem_buffer_alloc(ctx, pOutSurface);
> +        }
>          if (!out) {
>              ret = MFX_ERR_MEMORY_ALLOC;
>              break;
> @@ -670,10 +777,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
>              out->pts = av_rescale_q(pOutSurface->Data.TimeStamp, (AVRational){1,90000}, outlink->time_base);
>          }
>  
> -        /*For video mem, we use AVFrame->data[3] to transfer surface*/
> -        if (vpp->pFrameAllocator)
> -            out->data[3] = (void*) pOutSurface;
> -
>          filter_frame_ret = ff_filter_frame(inlink->dst->outputs[0], out);
>          if (filter_frame_ret < 0)
>              break;
> @@ -722,7 +825,6 @@ static int config_output(AVFilterLink *outlink)
>      outlink->h             = vpp->out_height;
>      outlink->frame_rate    = vpp->framerate;
>      outlink->time_base     = av_inv_q(vpp->framerate);
> -    outlink->format        = AV_PIX_FMT_NV12;
>  
>      return 0;
>  }
> @@ -763,13 +865,18 @@ static av_cold int qsvvpp_init(AVFilterContext *ctx)
>      vpp->vpp_ready       = 0;
>      vpp->ctx             = ctx;
>      vpp->sysmem_cur_out_idx = 0;
> +    vpp->hw_device_ctx   = NULL;
>  
>      return 0;
>  }
>  
>  static av_cold void qsvvpp_uninit(AVFilterContext *ctx)
>  {
> +    VPPContext *vpp = ctx->priv;
> +
>      deconf_vpp(ctx);
> +    if (vpp->hw_device_ctx)
> +        av_buffer_unref(&vpp->hw_device_ctx);
>  }
>  
>  static int qsvvpp_cmd_size(AVFilterContext *ctx, const char *arg)
> -- 
> 2.5.0
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
diff mbox

Patch

diff --git a/ffmpeg_qsv.c b/ffmpeg_qsv.c
index 43402d6..aed9240 100644
--- a/ffmpeg_qsv.c
+++ b/ffmpeg_qsv.c
@@ -386,7 +386,7 @@  static mfxStatus frame_alloc(mfxHDL pthis, mfxFrameAllocRequest *request, mfxFra
     unsigned int va_fourcc = 0;
     mfxU32 fourcc = request->Info.FourCC;
     QSVContext *q = pthis;
-    AVQSVContext *qsv = q->ost->enc_ctx->hwaccel_context;
+    AVQSVContext *qsv = NULL;
     mfxU16 numAllocated = 0;
     bool bCreateSrfSucceeded = false;
     mfxU32 mfx_fourcc;
@@ -394,17 +394,40 @@  static mfxStatus frame_alloc(mfxHDL pthis, mfxFrameAllocRequest *request, mfxFra
     int width32;
     int height32;
     void *avctx = NULL;
+    FilterGraph *fg = q->ost->filter->graph;
 
-    av_log(avctx, AV_LOG_INFO, "=========vaapi alloc frame==============\n");
     if (!request || !response || !request->NumFrameSuggested)
         return MFX_ERR_MEMORY_ALLOC;
 
     memset(response, 0, sizeof(*response));
     surface_num = request->NumFrameSuggested;
-    if ((request->Type & MFX_MEMTYPE_EXTERNAL_FRAME) &&
-            (request->Type & MFX_MEMTYPE_FROM_DECODE))
-        surface_num += (qsv->nb_encoder_surfaces + qsv->nb_decoder_surfaces);
+    if (request->Type & MFX_MEMTYPE_FROM_DECODE) {
+        avctx = input_streams[q->ost->source_index]->dec_ctx;
+        if (request->Type & MFX_MEMTYPE_EXTERNAL_FRAME) {
+            AVFilterContext *qsvvpp = avfilter_graph_get_filter(fg->graph, "Parsed_qsvvpp_0");
+            qsv = input_streams[q->ost->source_index]->dec_ctx->hwaccel_context;
+            surface_num += qsv->nb_decoder_surfaces;
+            if (qsvvpp) {
+                qsv = qsvvpp->hw_device_ctx->data;
+                surface_num += qsv->nb_vpp_surfaces;
+            } else {
+                qsv = q->ost->enc_ctx->hwaccel_context;
+                surface_num += qsv->nb_encoder_surfaces;
+            }
+        }
+    } else if (request->Type & MFX_MEMTYPE_FROM_VPPOUT) {
+        AVFilterContext *qsvvpp = avfilter_graph_get_filter(fg->graph, "Parsed_qsvvpp_0");
+        avctx = qsvvpp;
+        if (request->Type & MFX_MEMTYPE_EXTERNAL_FRAME) {
+            qsv = q->ost->enc_ctx->hwaccel_context;
+            surface_num += qsv->nb_encoder_surfaces;
+        }
+    } else if (request->Type & MFX_MEMTYPE_FROM_ENCODE) {
+        avctx = q->ost->enc_ctx;
+    } else
+        av_log(avctx, AV_LOG_WARNING, "FrameAlloc: may get a bug.\n");
 
+    av_log(avctx, AV_LOG_INFO, "=========vaapi alloc frame==============\n");
     av_log(avctx, AV_LOG_INFO, "VAAPI: va_dpy =%p, surface_num=%d, width=%d, height=%d\n",
             g_session.va_display, surface_num, request->Info.Width, request->Info.Height);
     av_log(avctx, AV_LOG_INFO, "VAAPI: request->Type=%x\n",request->Type);
@@ -720,7 +743,7 @@  static int qsv_check_filters(const OutputStream *ost)
     AVFilterInOut *inputs, *outputs;
     int ret = 0;
     int i;
-    const char *filter_list = "buffer|buffersink|null|format|setpts";
+    const char *filter_list = "buffer|buffersink|null|format|setpts|qsvvpp";
 
     if (!ost->avfilter)
         return -1;
@@ -820,6 +843,7 @@  int qsv_transcode_init_vidmem(OutputStream *ost)
 
     QSVContext *qsv = NULL;
     AVQSVContext *enc_hwctx = NULL;
+    AVQSVContext *vpp_hwctx = NULL;
 
     /* check if the encoder supports QSV */
     if (!ost->enc->pix_fmts)
@@ -836,6 +860,8 @@  int qsv_transcode_init_vidmem(OutputStream *ost)
 
     /* check if the decoder supports QSV and the output only goes to this stream */
     ist = input_streams[ost->source_index];
+    if (ist->hwaccel_id == HWACCEL_NONE || ist->hwaccel_id == HWACCEL_AUTO)
+        ist->hwaccel_id = HWACCEL_QSV;
     if (ist->nb_filters || ist->hwaccel_id != HWACCEL_QSV ||
         !ist->dec || !ist->dec->pix_fmts)
         return 0;
@@ -854,7 +880,8 @@  int qsv_transcode_init_vidmem(OutputStream *ost)
 
     qsv   = av_mallocz(sizeof(*qsv));
     enc_hwctx = av_qsv_alloc_context();
-    if (!qsv || !enc_hwctx)
+    vpp_hwctx = av_qsv_alloc_context();
+    if (!qsv || !enc_hwctx || !vpp_hwctx)
         goto fail;
 
     err = ff_qsv_init_internal_session(NULL, &g_session);
@@ -891,6 +918,11 @@  int qsv_transcode_init_vidmem(OutputStream *ost)
     ist->resample_pix_fmt            = AV_PIX_FMT_QSV;
     ist->hwaccel_ctx                 = qsv;
 
+    vpp_hwctx->session               = qsv->session;
+    vpp_hwctx->iopattern             = MFX_IOPATTERN_IN_VIDEO_MEMORY;
+    vpp_hwctx->pFrameAllocator       = &qsv->frame_allocator;
+    hw_device_ctx = av_buffer_create(vpp_hwctx, sizeof(*vpp_hwctx), av_buffer_default_free, NULL, 0);
+
     return 0;
 
 fail:
diff --git a/libavcodec/libavcodec.v b/libavcodec/libavcodec.v
index 304c2ef..1a4cac8 100644
--- a/libavcodec/libavcodec.v
+++ b/libavcodec/libavcodec.v
@@ -4,6 +4,7 @@  LIBAVCODEC_MAJOR {
         #deprecated, remove after next bump
         audio_resample;
         audio_resample_close;
+        ff_qsv_*;
     local:
         *;
 };
diff --git a/libavcodec/qsv.h b/libavcodec/qsv.h
index ee968d0..3f7b3c8 100644
--- a/libavcodec/qsv.h
+++ b/libavcodec/qsv.h
@@ -96,7 +96,9 @@  typedef struct AVQSVContext {
      */
     int opaque_alloc_type;
 
+    mfxFrameAllocator *pFrameAllocator;
     int nb_decoder_surfaces;
+    int nb_vpp_surfaces;
     int nb_encoder_surfaces;
 } AVQSVContext;
 
diff --git a/libavfilter/vf_qsvvpp.c b/libavfilter/vf_qsvvpp.c
index 3a5d4d3..b1245d2 100644
--- a/libavfilter/vf_qsvvpp.c
+++ b/libavfilter/vf_qsvvpp.c
@@ -21,10 +21,80 @@ 
  */
 
 #include "internal.h"
+#include <mfx/mfxvideo.h>
+#include <mfx/mfxplugin.h>
 #include <float.h>
 #include "libavutil/parseutils.h"
 #include "libavutil/timestamp.h"
+#include "libavutil/avassert.h"
+#include "libavutil/opt.h"
+#include "libavutil/time.h"
+#include "libavutil/avstring.h"
+#include "libavutil/error.h"
 #include "libavcodec/qsv.h"
+#include "libavcodec/qsv_internal.h"
+
+// number of video enhancement filters (denoise, procamp, detail, video_analysis, image stab)
+#define ENH_FILTERS_COUNT           5
+
+typedef struct {
+    const AVClass *class;
+
+    AVFilterContext *ctx;
+
+    mfxSession session;
+    QSVSession internal_qs;
+    int iopattern;
+
+    AVRational framerate;                           // target framerate
+
+    QSVFrame *in_work_frames;                       // used for video memory
+    QSVFrame *out_work_frames;                      // used for video memory
+
+    mfxFrameSurface1 *in_surface;
+    mfxFrameSurface1 *out_surface;
+
+    mfxFrameAllocRequest req[2];                    // [0] - in, [1] - out
+    mfxFrameAllocator *pFrameAllocator;
+    mfxFrameAllocResponse *in_response;
+    mfxFrameAllocResponse *out_response;
+
+    int num_surfaces_in;                            // input surfaces
+    int num_surfaces_out;                           // output surfaces
+    int sysmem_cur_out_idx;
+    int frame_number;
+    int vpp_ready;
+    mfxVideoParam *pVppParam;
+
+    AVBufferRef *hw_device_ctx;
+
+    /* VPP extension */
+    mfxExtBuffer*       pExtBuf[1+ENH_FILTERS_COUNT];
+    mfxExtVppAuxData    extVPPAuxData;
+
+    /* Video Enhancement Algorithms */
+    mfxExtVPPDeinterlacing  deinterlace_conf;
+    mfxExtVPPFrameRateConversion frc_conf;
+    mfxExtVPPDenoise denoise_conf;
+    mfxExtVPPDetail detail_conf;
+    mfxExtVPPComposite composite_conf;
+
+    int out_width;
+    int out_height;
+    int dpic;                   // destination picture structure
+                                // -1 = unknown
+                                // 0 = interlaced top field first
+                                // 1 = progressive
+                                // 2 = interlaced bottom field first
+
+    int deinterlace;            // deinterlace mode : 0=off, 1=bob, 2=advanced
+    int denoise;                // Enable Denoise algorithm. Level is the optional value from the interval [0; 100]
+    int detail;                 // Enable Detail Enhancement algorithm.
+                                // Level is the optional value from the interval [0; 100]
+    int async_depth;            // async dept used by encoder
+    int max_b_frames;           // maxiumum number of b frames used by encoder
+    int use_frc;                // use framerate conversion
+} VPPContext;
 
 /**
  * ToDo :
@@ -120,6 +190,7 @@  static int avpix_fmt_to_mfx_fourcc(int format)
 static void vidmem_init_surface(VPPContext *vpp)
 {
     int i;
+    AVQSVContext *qsv = (AVQSVContext*)vpp->hw_device_ctx->data;
 
     av_log(vpp->ctx, AV_LOG_INFO, "qsvvpp: vidmem_init_surface: ");
 
@@ -134,20 +205,17 @@  static void vidmem_init_surface(VPPContext *vpp)
     /*
      * We should care about next stage vpp or encoder's input surfaces.
      */
-    av_log(vpp->ctx, AV_LOG_INFO, "in.num = %d, out.num = %d, ",
+    vpp->req[0].NumFrameSuggested = FFMAX(vpp->req[0].NumFrameSuggested, 1);
+    vpp->req[1].NumFrameSuggested = FFMAX(vpp->req[1].NumFrameSuggested, 1);
+    av_log(vpp->ctx, AV_LOG_INFO, "in.num = %d, out.num = %d\n",
             vpp->req[0].NumFrameSuggested, vpp->req[1].NumFrameSuggested);
-    if (vpp->enc_ctx) {
-        vpp->req[1].NumFrameSuggested += vpp->enc_ctx->req.NumFrameSuggested;
-        av_log(vpp->ctx, AV_LOG_INFO, "enc_ctx.num=%d\n", vpp->enc_ctx->req.NumFrameSuggested);
-    } else {
-        av_log(vpp->ctx, AV_LOG_INFO, "enc_ctx.num=%d\n", 0);
-    }
-
-    vpp->req[0].NumFrameSuggested  = FFMAX(vpp->req[0].NumFrameSuggested, 1);
+    qsv->nb_vpp_surfaces = vpp->req[0].NumFrameSuggested;
 
-    vpp->num_surfaces_out = FFMAX(vpp->req[1].NumFrameSuggested, 1);
     vpp->out_response     = av_mallocz(sizeof(*vpp->out_response));
     VPP_CHECK_POINTER(vpp->out_response);
+    vpp->pFrameAllocator->Alloc(vpp->pFrameAllocator->pthis, &vpp->req[1], vpp->out_response);
+
+    vpp->num_surfaces_out = vpp->out_response->NumFrameActual;
     vpp->out_surface      = av_mallocz(sizeof(*vpp->out_surface) * vpp->num_surfaces_out);
     VPP_CHECK_POINTER(vpp->out_surface);
 
@@ -333,6 +401,7 @@  static int sysmem_input_get_surface( AVFilterLink *inlink, AVFrame* picref, mfxF
 
 static int vidmem_input_get_surface( AVFilterLink *inlink, AVFrame* picref, mfxFrameSurface1 **surface )
 {
+    if (picref->format == AV_PIX_FMT_QSV && picref->data[3]) {
     if (picref->data[3]) {
         *surface = (mfxFrameSurface1*)picref->data[3];
     } else {
@@ -533,11 +602,19 @@  static int initial_vpp( VPPContext *vpp )
 static int config_vpp(AVFilterLink *inlink, AVFrame * pic)
 {
     AVFilterContext *ctx = inlink->dst;
-    VPPContext *vpp= ctx->priv;
-    mfxVideoParam mfxParamsVideo;
-    int           ret;
+    VPPContext      *vpp = ctx->priv;
+    mfxVideoParam    mfxParamsVideo;
+    int              ret;
 
     av_log(vpp->ctx, AV_LOG_INFO, "QSVVPP: vpp configuration and call mfxVideoVPP_Init\n");
+    if (ctx->hw_device_ctx) {
+        AVQSVContext *qsv    = (AVQSVContext*)ctx->hw_device_ctx->data;
+        vpp->hw_device_ctx   = av_buffer_ref(ctx->hw_device_ctx);
+        vpp->pFrameAllocator = qsv->pFrameAllocator;
+        vpp->iopattern       = qsv->iopattern;
+        vpp->session         = qsv->session;
+    }
+
     if (!vpp->session) {
         ret = ff_qsv_init_internal_session(ctx, &vpp->internal_qs);
         if (ret < 0)
@@ -573,6 +650,32 @@  static void deconf_vpp(AVFilterContext *ctx)
     vpp->vpp_ready = 0;
 }
 
+static void vidmem_buffer_free(void *opaque, uint8_t *data)
+{
+    //do nothing
+}
+
+static AVFrame *vidmem_buffer_alloc(AVFilterContext *ctx, mfxFrameSurface1 *pSurface)
+{
+    AVFrame *frame = av_frame_alloc();
+    if (!frame)
+        return NULL;
+
+    frame->buf[0] = av_buffer_create((uint8_t*)pSurface, sizeof(pSurface),
+                        vidmem_buffer_free, NULL, 0);
+    if (!frame->buf[0]) {
+        av_frame_free(&frame);
+        return AVERROR(ENOMEM);
+    }
+
+    frame->data[3] = frame->buf[0]->data;
+    frame->width   = ctx->outputs[0]->w;
+    frame->height  = ctx->outputs[0]->h;
+    frame->format  = ctx->outputs[0]->format;
+
+    return frame;
+}
+
 /*
  * Real filter func.
  * Push frame into mSDK and pop out filtered frames.
@@ -619,11 +722,15 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
             break;
         }
 
-        /*
-         * get an AVFrame for output.
-         * @NOTE: frame buffer is aligned with 128x64 to compat with GPU-copy.
-         */
-        out = ff_get_video_buffer(outlink, FFALIGN(vpp->out_width, 128), FFALIGN(vpp->out_height, 64));
+        if (!vpp->pFrameAllocator) {
+            /*
+            * get an AVFrame for output.
+            * @NOTE: frame buffer is aligned with 128x64 to compat with GPU-copy.
+            */
+            out = ff_get_video_buffer(outlink, FFALIGN(vpp->out_width, 128), FFALIGN(vpp->out_height, 64));
+        } else {
+            out = vidmem_buffer_alloc(ctx, pOutSurface);
+        }
         if (!out) {
             ret = MFX_ERR_MEMORY_ALLOC;
             break;
@@ -670,10 +777,6 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
             out->pts = av_rescale_q(pOutSurface->Data.TimeStamp, (AVRational){1,90000}, outlink->time_base);
         }
 
-        /*For video mem, we use AVFrame->data[3] to transfer surface*/
-        if (vpp->pFrameAllocator)
-            out->data[3] = (void*) pOutSurface;
-
         filter_frame_ret = ff_filter_frame(inlink->dst->outputs[0], out);
         if (filter_frame_ret < 0)
             break;
@@ -722,7 +825,6 @@  static int config_output(AVFilterLink *outlink)
     outlink->h             = vpp->out_height;
     outlink->frame_rate    = vpp->framerate;
     outlink->time_base     = av_inv_q(vpp->framerate);
-    outlink->format        = AV_PIX_FMT_NV12;
 
     return 0;
 }
@@ -763,13 +865,18 @@  static av_cold int qsvvpp_init(AVFilterContext *ctx)
     vpp->vpp_ready       = 0;
     vpp->ctx             = ctx;
     vpp->sysmem_cur_out_idx = 0;
+    vpp->hw_device_ctx   = NULL;
 
     return 0;
 }
 
 static av_cold void qsvvpp_uninit(AVFilterContext *ctx)
 {
+    VPPContext *vpp = ctx->priv;
+
     deconf_vpp(ctx);
+    if (vpp->hw_device_ctx)
+        av_buffer_unref(&vpp->hw_device_ctx);
 }
 
 static int qsvvpp_cmd_size(AVFilterContext *ctx, const char *arg)