diff mbox

[FFmpeg-devel,3/6] lavc/qsv: Enable hwaccel qsv_vidmem.

Message ID 1471342207-11982-4-git-send-email-sdk@nablet.com
State Superseded
Headers show

Commit Message

Nablet Developer Aug. 16, 2016, 10:10 a.m. UTC
From: ChaoX A Liu <chaox.a.liu@intel.com>

Signed-off-by: ChaoX A Liu <chaox.a.liu@intel.com>
---
 ffmpeg.c                  |   2 +-
 ffmpeg.h                  |   2 +
 ffmpeg_opt.c              |   2 +-
 ffmpeg_qsv.c              | 636 +++++++++++++++++++++++++++++++++++++++++++++-
 libavcodec/qsv.h          |   3 +
 libavcodec/qsv_internal.h |   2 +
 libavcodec/qsvdec.c       |   5 +-
 libavcodec/qsvenc.c       |   2 +
 8 files changed, 649 insertions(+), 5 deletions(-)
diff mbox

Patch

diff --git a/ffmpeg.c b/ffmpeg.c
index bae515d..a8bc237 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -3050,7 +3050,7 @@  static int transcode_init(void)
             set_encoder_id(output_files[ost->file_index], ost);
 
 #if CONFIG_LIBMFX
-            if (qsv_transcode_init(ost))
+            if (qsv_transcode_init_vidmem(ost))
                 exit_program(1);
 #endif
 
diff --git a/ffmpeg.h b/ffmpeg.h
index 49d65d8..2633336 100644
--- a/ffmpeg.h
+++ b/ffmpeg.h
@@ -585,6 +585,8 @@  int vda_init(AVCodecContext *s);
 int videotoolbox_init(AVCodecContext *s);
 int qsv_init(AVCodecContext *s);
 int qsv_transcode_init(OutputStream *ost);
+int qsv_init_vidmem(AVCodecContext *s);
+int qsv_transcode_init_vidmem(OutputStream *ost);
 int vaapi_decode_init(AVCodecContext *avctx);
 int vaapi_device_init(const char *device);
 int cuvid_init(AVCodecContext *s);
diff --git a/ffmpeg_opt.c b/ffmpeg_opt.c
index 2ea09cf..b5e4483 100644
--- a/ffmpeg_opt.c
+++ b/ffmpeg_opt.c
@@ -79,7 +79,7 @@  const HWAccel hwaccels[] = {
     { "videotoolbox",   videotoolbox_init,   HWACCEL_VIDEOTOOLBOX,   AV_PIX_FMT_VIDEOTOOLBOX },
 #endif
 #if CONFIG_LIBMFX
-    { "qsv",   qsv_init,   HWACCEL_QSV,   AV_PIX_FMT_QSV },
+    { "qsv",   qsv_init_vidmem,   HWACCEL_QSV,   AV_PIX_FMT_QSV },
 #endif
 #if CONFIG_VAAPI
     { "vaapi", vaapi_decode_init, HWACCEL_VAAPI, AV_PIX_FMT_VAAPI },
diff --git a/ffmpeg_qsv.c b/ffmpeg_qsv.c
index 95a2351..ec8a41b 100644
--- a/ffmpeg_qsv.c
+++ b/ffmpeg_qsv.c
@@ -18,11 +18,15 @@ 
 
 #include <mfx/mfxvideo.h>
 #include <stdlib.h>
+#include <stdbool.h>
+#include <va/va.h>
 
 #include "libavutil/dict.h"
 #include "libavutil/mem.h"
 #include "libavutil/opt.h"
+#include "libavutil/avstring.h"
 #include "libavcodec/qsv.h"
+#include "libavcodec/qsv_internal.h"
 
 #include "ffmpeg.h"
 
@@ -34,6 +38,8 @@  typedef struct QSVContext {
     mfxExtOpaqueSurfaceAlloc opaque_alloc;
     AVBufferRef             *opaque_surfaces_buf;
 
+    mfxFrameAllocator frame_allocator;
+
     uint8_t           *surface_used;
     mfxFrameSurface1 **surface_ptrs;
     int nb_surfaces;
@@ -60,7 +66,7 @@  static int qsv_get_buffer(AVCodecContext *s, AVFrame *frame, int flags)
                                          buffer_release, &qsv->surface_used[i], 0);
         if (!frame->buf[0])
             return AVERROR(ENOMEM);
-        frame->data[3]       = (uint8_t*)qsv->surface_ptrs[i];
+        frame->data[3]       = frame->buf[0]->data;
         qsv->surface_used[i] = 1;
         return 0;
     }
@@ -266,3 +272,631 @@  fail:
     av_freep(&qsv);
     return AVERROR_UNKNOWN;
 }
+
+enum {
+    MFX_FOURCC_VP8_NV12    = MFX_MAKEFOURCC('V','P','8','N'),
+    MFX_FOURCC_VP8_MBDATA  = MFX_MAKEFOURCC('V','P','8','M'),
+    MFX_FOURCC_VP8_SEGMAP  = MFX_MAKEFOURCC('V','P','8','S'),
+};
+
+typedef struct vaapiMemId
+{
+    VASurfaceID* m_surface;
+    VAImage m_image;
+    unsigned int m_fourcc;
+    mfxU8* m_sys_buffer;
+    mfxU8* m_va_buffer;
+} vaapiMemId;
+
+static QSVSession g_session;
+
+/* ****************************************************************************** *\
+
+INTEL CORPORATION PROPRIETARY INFORMATION
+This software is supplied under the terms of a license agreement or nondisclosure
+agreement with Intel Corporation and may not be copied or disclosed except in
+accordance with the terms of that agreement
+Copyright(c) 2011-2014 Intel Corporation. All Rights Reserved.
+
+\* ****************************************************************************** */
+static mfxStatus va_to_mfx_status(VAStatus va_res)
+{
+    mfxStatus mfxRes = MFX_ERR_NONE;
+
+    switch (va_res) {
+        case VA_STATUS_SUCCESS:
+            mfxRes = MFX_ERR_NONE;
+            break;
+        case VA_STATUS_ERROR_ALLOCATION_FAILED:
+            mfxRes = MFX_ERR_MEMORY_ALLOC;
+            break;
+        case VA_STATUS_ERROR_ATTR_NOT_SUPPORTED:
+        case VA_STATUS_ERROR_UNSUPPORTED_PROFILE:
+        case VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT:
+        case VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT:
+        case VA_STATUS_ERROR_UNSUPPORTED_BUFFERTYPE:
+        case VA_STATUS_ERROR_FLAG_NOT_SUPPORTED:
+        case VA_STATUS_ERROR_RESOLUTION_NOT_SUPPORTED:
+            mfxRes = MFX_ERR_UNSUPPORTED;
+            break;
+        case VA_STATUS_ERROR_INVALID_DISPLAY:
+        case VA_STATUS_ERROR_INVALID_CONFIG:
+        case VA_STATUS_ERROR_INVALID_CONTEXT:
+        case VA_STATUS_ERROR_INVALID_SURFACE:
+        case VA_STATUS_ERROR_INVALID_BUFFER:
+        case VA_STATUS_ERROR_INVALID_IMAGE:
+        case VA_STATUS_ERROR_INVALID_SUBPICTURE:
+            mfxRes = MFX_ERR_NOT_INITIALIZED;
+            break;
+        case VA_STATUS_ERROR_INVALID_PARAMETER:
+            mfxRes = MFX_ERR_INVALID_VIDEO_PARAM;
+        default:
+            mfxRes = MFX_ERR_UNKNOWN;
+            break;
+    }
+
+    return mfxRes;
+}
+
+static unsigned int ConvertMfxFourccToVAFormat(mfxU32 fourcc)
+{
+    switch (fourcc) {
+        case MFX_FOURCC_NV12:
+            return VA_FOURCC_NV12;
+        case MFX_FOURCC_YUY2:
+            return VA_FOURCC_YUY2;
+        case MFX_FOURCC_YV12:
+            return VA_FOURCC_YV12;
+        case MFX_FOURCC_RGB4:
+            return VA_FOURCC_ARGB;
+        case MFX_FOURCC_P8:
+            return VA_FOURCC_P208;
+
+        default:
+            return 0;
+    }
+}
+
+static unsigned int ConvertVP8FourccToMfxFourcc(mfxU32 fourcc)
+{
+    switch (fourcc) {
+        case MFX_FOURCC_VP8_NV12:
+        case MFX_FOURCC_VP8_MBDATA:
+            return MFX_FOURCC_NV12;
+        case MFX_FOURCC_VP8_SEGMAP:
+            return MFX_FOURCC_P8;
+
+        default:
+            return fourcc;
+    }
+}
+
+static mfxStatus frame_alloc(mfxHDL pthis, mfxFrameAllocRequest *request, mfxFrameAllocResponse *response)
+{
+    int i, format;
+    VAStatus va_res = VA_STATUS_SUCCESS;
+    VASurfaceID* surfaces = NULL;
+    vaapiMemId* vaapi_mid = NULL;
+    mfxFrameSurface1 *mfxsurface = NULL;
+    VAContextID context_id;
+    VABufferType codedbuf_type;
+    mfxStatus mfx_res = MFX_ERR_NONE;
+    mfxMemId* mids = NULL;
+    VASurfaceAttrib attrib;
+    mfxU16 surface_num;
+    unsigned int va_fourcc = 0;
+    mfxU32 fourcc = request->Info.FourCC;
+    QSVContext *q = pthis;
+    AVQSVContext *qsv = q->ost->enc_ctx->hwaccel_context;
+    mfxU16 numAllocated = 0;
+    bool bCreateSrfSucceeded = false;
+    mfxU32 mfx_fourcc;
+    int codedbuf_size;
+    int width32;
+    int height32;
+    void *avctx = NULL;
+
+    av_log(avctx, AV_LOG_INFO, "=========vaapi alloc frame==============\n");
+    if (!request || !response || !request->NumFrameSuggested)
+        return MFX_ERR_MEMORY_ALLOC;
+
+    memset(response, 0, sizeof(*response));
+    surface_num = request->NumFrameSuggested;
+    if ((request->Type & MFX_MEMTYPE_EXTERNAL_FRAME) &&
+            (request->Type & MFX_MEMTYPE_FROM_DECODE))
+        surface_num += (qsv->nb_encoder_surfaces + qsv->nb_decoder_surfaces);
+
+    av_log(avctx, AV_LOG_INFO, "VAAPI: va_dpy =%p, surface_num=%d, width=%d, height=%d\n",
+            g_session.va_display, surface_num, request->Info.Width, request->Info.Height);
+    av_log(avctx, AV_LOG_INFO, "VAAPI: request->Type=%x\n",request->Type);
+
+    surfaces = (VASurfaceID*)av_calloc(surface_num, sizeof(VASurfaceID));
+    mids = (mfxMemId*)av_calloc(surface_num, sizeof(mfxMemId));
+    if (!surfaces || !mids) {
+        av_log(avctx, AV_LOG_ERROR, "ERROR: memory allocation failed\n");
+        return MFX_ERR_MEMORY_ALLOC;
+    }
+
+    mfx_fourcc = ConvertVP8FourccToMfxFourcc(fourcc);
+    va_fourcc  = ConvertMfxFourccToVAFormat(mfx_fourcc);
+    if (va_fourcc != VA_FOURCC_P208) {
+        av_log(avctx, AV_LOG_INFO, "VAAPI: va_fourcc != VA_FOURCC_P208\n");
+        attrib.type  = VASurfaceAttribPixelFormat;
+        attrib.flags = VA_SURFACE_ATTRIB_SETTABLE;
+        attrib.value.type = VAGenericValueTypeInteger;
+        attrib.value.value.i = va_fourcc;
+        format = va_fourcc;
+
+        if (fourcc == MFX_FOURCC_VP8_NV12) {
+            // special configuration for NV12 surf allocation for VP8 hybrid encoder is required
+            attrib.type          = (VASurfaceAttribType)VASurfaceAttribUsageHint;
+            attrib.value.value.i = VA_SURFACE_ATTRIB_USAGE_HINT_ENCODER;
+        } else if (fourcc == MFX_FOURCC_VP8_MBDATA) {
+            // special configuration for MB data surf allocation for VP8 hybrid encoder is required
+            attrib.value.value.i = VA_FOURCC_P208;
+            format               = VA_FOURCC_P208;
+        } else if (va_fourcc == VA_FOURCC_NV12) {
+            format = VA_RT_FORMAT_YUV420;
+        }
+
+        va_res = vaCreateSurfaces(g_session.va_display,
+                                  format,
+                                  request->Info.Width, request->Info.Height,
+                                  surfaces,
+                                  surface_num,
+                                  &attrib, 1);
+        bCreateSrfSucceeded = (va_res==VA_STATUS_SUCCESS);
+    } else {
+        av_log(avctx, AV_LOG_INFO, "VAAPI: va_fourcc == VA_FOURCC_P208\n");
+        context_id = request->reserved[0];
+        width32    = 32 * ((request->Info.Width + 31) >> 5);
+        height32   = 32 * ((request->Info.Height + 31) >> 5);
+
+        if (fourcc == MFX_FOURCC_VP8_SEGMAP) {
+            codedbuf_size = request->Info.Width * request->Info.Height;
+            codedbuf_type = (VABufferType)VAEncMacroblockMapBufferType;
+        } else {
+            codedbuf_size = ((width32 * height32) * 400LL / (16 * 16));
+            codedbuf_type = VAEncCodedBufferType;
+        }
+
+        for (numAllocated = 0; numAllocated < surface_num; numAllocated++) {
+            VABufferID coded_buf;
+            va_res = vaCreateBuffer(g_session.va_display,
+                                    context_id,
+                                    codedbuf_type,
+                                    codedbuf_size,
+                                    1,
+                                    NULL,
+                                    &coded_buf);
+            mfx_res = va_to_mfx_status(va_res);
+            if (MFX_ERR_NONE != mfx_res)
+                break;
+
+            surfaces[numAllocated] = coded_buf;
+        }
+    }
+
+    if (va_res == VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_INFO, "VAAPI: %d VA surfaces have been allocated\n", surface_num);
+        for (i=0; i<surface_num; i++) {
+            vaapi_mid = av_mallocz(sizeof(*vaapi_mid));
+            vaapi_mid->m_fourcc  = fourcc;
+            vaapi_mid->m_surface = surfaces + i;
+            mids[i] = vaapi_mid;
+        }
+
+        if ((request->Type & MFX_MEMTYPE_EXTERNAL_FRAME) &&
+            (request->Type & MFX_MEMTYPE_FROM_DECODE)) {
+            q->surface_ptrs = av_realloc(q->surface_ptrs,
+                    sizeof(*q->surface_ptrs) * (q->nb_surfaces + surface_num));
+            q->surface_used = av_realloc(q->surface_used,
+                    sizeof(*q->surface_used) * (q->nb_surfaces + surface_num));
+
+            for (i=0; i<surface_num; i++,q->nb_surfaces++) {
+                mfxsurface = av_mallocz(sizeof(*mfxsurface));
+                memcpy(&mfxsurface->Info, &request->Info, sizeof(mfxFrameInfo));
+                mfxsurface->Data.MemId = mids[i];
+                q->surface_ptrs[q->nb_surfaces] = mfxsurface;
+                q->surface_used[q->nb_surfaces] = 0;
+            }
+        }
+        response->mids = mids;
+        response->NumFrameActual = surface_num;
+    } else {
+        response->mids = NULL;
+        response->NumFrameActual = 0;
+
+        if (VA_FOURCC_P208 != va_fourcc || fourcc==MFX_FOURCC_VP8_MBDATA) {
+            if (bCreateSrfSucceeded)
+                vaDestroySurfaces(g_session.va_display, surfaces, surface_num);
+        } else {
+            for (i=0; i<numAllocated; i++)
+                vaDestroyBuffer(g_session.va_display, surfaces[i]);
+        }
+
+        for (i=0; i<surface_num; i++) {
+            vaapi_mid = mids[i];
+            av_freep(&vaapi_mid);
+        }
+        av_freep(&mids);
+        av_freep(&surfaces);
+
+        av_log(avctx, AV_LOG_INFO, "ERROR: VA Surfaces allocation failed\n");
+
+        return MFX_ERR_MEMORY_ALLOC;
+    }
+
+    return MFX_ERR_NONE;
+}
+
+static mfxStatus frame_free(mfxHDL pthis, mfxFrameAllocResponse *response)
+{
+    vaapiMemId   *vaapi_mid = NULL;
+    VASurfaceID  *surfaces = NULL;
+    mfxU32        i = 0;
+    bool          isBitstreamMemory = false;
+    mfxU32        mfx_fourcc;
+
+    if (!response)
+        return MFX_ERR_NULL_PTR;
+
+    av_log( NULL, AV_LOG_INFO, "=========vaapi free frame: %d==============\n", response->NumFrameActual);
+    if (response->mids) {
+        surfaces   = ((vaapiMemId*)response->mids[0])->m_surface;
+        mfx_fourcc = ConvertVP8FourccToMfxFourcc(((vaapiMemId*)response->mids[0])->m_fourcc);
+        isBitstreamMemory = (MFX_FOURCC_P8==mfx_fourcc)?true:false;
+
+        for (i = 0; i < response->NumFrameActual; ++i) {
+            vaapi_mid = (vaapiMemId*)response->mids[i];
+            if (MFX_FOURCC_P8 == mfx_fourcc) {
+                vaDestroyBuffer(g_session.va_display, surfaces[i]);
+            } else if (vaapi_mid->m_sys_buffer) {
+                free(vaapi_mid->m_sys_buffer);
+            }
+            av_freep(&vaapi_mid);
+        }
+
+        if (!isBitstreamMemory)
+            vaDestroySurfaces(g_session.va_display, surfaces, response->NumFrameActual);
+
+        av_freep(&response->mids);
+        av_freep(&surfaces);
+    }
+
+    response->NumFrameActual = 0;
+
+    return MFX_ERR_NONE;
+}
+
+static mfxStatus frame_lock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr)
+{
+    mfxStatus             mfx_res = MFX_ERR_NONE;
+    VAStatus              va_res  = VA_STATUS_SUCCESS;
+    VACodedBufferSegment *coded_buffer_segment;
+    vaapiMemId           *vaapi_mid = (vaapiMemId*)mid;
+    mfxU8                *pBuffer = 0;
+    mfxU32                mfx_fourcc;
+
+    if (!mid)
+        return MFX_ERR_INVALID_HANDLE;
+
+    mfx_fourcc = ConvertVP8FourccToMfxFourcc(vaapi_mid->m_fourcc);
+    if (MFX_FOURCC_P8 == mfx_fourcc) {
+        if (vaapi_mid->m_fourcc == MFX_FOURCC_VP8_SEGMAP) {
+            va_res =  vaMapBuffer(g_session.va_display, *(vaapi_mid->m_surface), (void **)(&pBuffer));
+        } else {
+            va_res =  vaMapBuffer(g_session.va_display, *(vaapi_mid->m_surface), (void **)(&coded_buffer_segment));
+        }
+        mfx_res = va_to_mfx_status(va_res);
+        if (MFX_ERR_NONE == mfx_res) {
+            if (vaapi_mid->m_fourcc == MFX_FOURCC_VP8_SEGMAP) {
+                ptr->Y = pBuffer;
+            } else {
+                ptr->Y = (mfxU8*)coded_buffer_segment->buf;
+            }
+        }
+    } else {
+        va_res = vaSyncSurface(g_session.va_display, *(vaapi_mid->m_surface));
+        mfx_res = va_to_mfx_status(va_res);
+        if (MFX_ERR_NONE == mfx_res) {
+            va_res = vaDeriveImage(g_session.va_display, *(vaapi_mid->m_surface), &(vaapi_mid->m_image));
+            mfx_res = va_to_mfx_status(va_res);
+        }
+
+        if (MFX_ERR_NONE == mfx_res) {
+            va_res = vaMapBuffer(g_session.va_display, vaapi_mid->m_image.buf, (void **) &pBuffer);
+            mfx_res = va_to_mfx_status(va_res);
+        }
+
+        if (MFX_ERR_NONE == mfx_res) {
+            switch (vaapi_mid->m_image.format.fourcc) {
+                case VA_FOURCC_NV12:
+                    if (mfx_fourcc == MFX_FOURCC_NV12) {
+                        ptr->Pitch = (mfxU16)vaapi_mid->m_image.pitches[0];
+                        ptr->Y = pBuffer + vaapi_mid->m_image.offsets[0];
+                        ptr->U = pBuffer + vaapi_mid->m_image.offsets[1];
+                        ptr->V = ptr->U + 1;
+                    } else {
+                        mfx_res = MFX_ERR_LOCK_MEMORY;
+                    }
+                    break;
+
+                case VA_FOURCC_YV12:
+                    if (mfx_fourcc == MFX_FOURCC_YV12) {
+                        ptr->Pitch = (mfxU16)vaapi_mid->m_image.pitches[0];
+                        ptr->Y = pBuffer + vaapi_mid->m_image.offsets[0];
+                        ptr->V = pBuffer + vaapi_mid->m_image.offsets[1];
+                        ptr->U = pBuffer + vaapi_mid->m_image.offsets[2];
+                    } else {
+                        mfx_res = MFX_ERR_LOCK_MEMORY;
+                    }
+                    break;
+
+                case VA_FOURCC_YUY2:
+                    if (mfx_fourcc == MFX_FOURCC_YUY2) {
+                        ptr->Pitch = (mfxU16)vaapi_mid->m_image.pitches[0];
+                        ptr->Y = pBuffer + vaapi_mid->m_image.offsets[0];
+                        ptr->U = ptr->Y + 1;
+                        ptr->V = ptr->Y + 3;
+                    } else {
+                        mfx_res = MFX_ERR_LOCK_MEMORY;
+                    }
+                    break;
+
+                case VA_FOURCC_ARGB:
+                    if (mfx_fourcc == MFX_FOURCC_RGB4) {
+                        ptr->Pitch = (mfxU16)vaapi_mid->m_image.pitches[0];
+                        ptr->B = pBuffer + vaapi_mid->m_image.offsets[0];
+                        ptr->G = ptr->B + 1;
+                        ptr->R = ptr->B + 2;
+                        ptr->A = ptr->B + 3;
+                    } else {
+                        mfx_res = MFX_ERR_LOCK_MEMORY;
+                    }
+                    break;
+
+                case VA_FOURCC_P208:
+                    if (mfx_fourcc == MFX_FOURCC_NV12) {
+                        ptr->Pitch = (mfxU16)vaapi_mid->m_image.pitches[0];
+                        ptr->Y = pBuffer + vaapi_mid->m_image.offsets[0];
+                    } else {
+                        mfx_res = MFX_ERR_LOCK_MEMORY;
+                    }
+                    break;
+
+                default:
+                    mfx_res = MFX_ERR_LOCK_MEMORY;
+                    break;
+            }
+        }
+    }
+    return mfx_res;
+}
+
+static mfxStatus frame_unlock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr)
+{
+    mfxU32        mfx_fourcc;
+    vaapiMemId   *vaapi_mid = (vaapiMemId*)mid;
+
+    if (!vaapi_mid || !vaapi_mid->m_surface)
+        return MFX_ERR_INVALID_HANDLE;
+
+    mfx_fourcc = ConvertVP8FourccToMfxFourcc(vaapi_mid->m_fourcc);
+    if (mfx_fourcc == MFX_FOURCC_P8) {
+        vaUnmapBuffer(g_session.va_display, *(vaapi_mid->m_surface));
+    } else {
+        vaUnmapBuffer(g_session.va_display, vaapi_mid->m_image.buf);
+        vaDestroyImage(g_session.va_display, vaapi_mid->m_image.image_id);
+
+        if (ptr) {
+            ptr->Pitch = 0;
+            ptr->Y     = NULL;
+            ptr->U     = NULL;
+            ptr->V     = NULL;
+            ptr->A     = NULL;
+        }
+    }
+
+    return MFX_ERR_NONE;
+}
+
+static mfxStatus frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *handle)
+{
+    vaapiMemId* vaapi_mid = (vaapiMemId*)mid;
+
+    if (!handle || !mid)
+        return MFX_ERR_INVALID_HANDLE;
+
+    *handle = (mfxHDL) vaapi_mid->m_surface;
+
+    return MFX_ERR_NONE;
+}
+
+static int qsv_check_filters(const OutputStream *ost)
+{
+    AVFilterGraph *graph = NULL;
+    char args[512];
+    AVDictionaryEntry *e = NULL;
+    AVFilterInOut *inputs, *outputs;
+    int ret = 0;
+    int i;
+    const char *filter_list = "buffer|buffersink|null|format|setpts";
+
+    if (!ost->avfilter)
+        return -1;
+
+    graph = avfilter_graph_alloc();
+    if (!graph)
+        return AVERROR(ENOMEM);
+
+    args[0] = 0;
+    while ((e = av_dict_get(ost->sws_dict, "", e, AV_DICT_IGNORE_SUFFIX)))
+        av_strlcatf(args, sizeof(args), "%s=%s:", e->key, e->value);
+    if (strlen(args))
+        args[strlen(args)-1] = 0;
+    graph->scale_sws_opts = av_strdup(args);
+
+    if ((ret = avfilter_graph_parse2(graph, ost->avfilter, &inputs, &outputs)) < 0)
+        goto out;
+
+    av_log(NULL, AV_LOG_DEBUG, "total filters %d\n", graph->nb_filters);
+    for (i = 0; i < graph->nb_filters; i++) {
+        av_log(NULL, AV_LOG_DEBUG, "\tfilter name: %s \n",
+                graph->filters[i]->filter->name);
+        if (!av_match_list(graph->filters[i]->filter->name, filter_list, '|')) {
+            ret = -1;
+            goto out;
+        }
+    }
+
+out:
+    avfilter_inout_free(&inputs);
+    avfilter_inout_free(&outputs);
+    avfilter_graph_free(&graph);
+    return ret;
+}
+
+static void qsv_uninit_vidmem(AVCodecContext *s)
+{
+    int i;
+
+    InputStream *ist = s->opaque;
+    QSVContext  *qsv = ist->hwaccel_ctx;
+
+    ff_qsv_close_internal_session(&g_session);
+
+    av_freep(&qsv->ost->enc_ctx->hwaccel_context);
+    av_freep(&s->hwaccel_context);
+
+    for (i = 0; i < qsv->nb_surfaces; i++)
+        av_freep(&qsv->surface_ptrs[i]);
+    av_freep(&qsv->surface_used);
+    av_freep(&qsv->surface_ptrs);
+
+    av_freep(&qsv);
+}
+
+int qsv_init_vidmem(AVCodecContext *s)
+{
+    InputStream  *ist = s->opaque;
+    QSVContext   *qsv = ist->hwaccel_ctx;
+    AVQSVContext *hwctx_dec;
+
+    if (!qsv) {
+        av_log(NULL, AV_LOG_ERROR, "QSV transcoding is not initialized. "
+               "-hwaccel qsv should only be used for one-to-one QSV transcoding "
+               "with no filters.\n");
+        return AVERROR_BUG;
+    }
+
+    hwctx_dec = av_qsv_alloc_context();
+    if (!hwctx_dec)
+        return AVERROR(ENOMEM);
+
+    hwctx_dec->session        = qsv->session;
+    hwctx_dec->iopattern      = MFX_IOPATTERN_OUT_VIDEO_MEMORY;
+    hwctx_dec->ext_buffers    = NULL;
+    hwctx_dec->nb_ext_buffers = 0;
+
+    av_freep(&s->hwaccel_context);
+    s->hwaccel_context        = hwctx_dec;
+
+    ist->hwaccel_get_buffer   = qsv_get_buffer;
+    ist->hwaccel_uninit       = qsv_uninit_vidmem;
+
+    return 0;
+}
+
+int qsv_transcode_init_vidmem(OutputStream *ost)
+{
+    InputStream *ist = NULL;
+    const enum AVPixelFormat *pix_fmt;
+
+    AVDictionaryEntry *e;
+    const AVOption *opt;
+    int flags = 0;
+
+    int err, i;
+
+    QSVContext *qsv = NULL;
+    AVQSVContext *enc_hwctx = NULL;
+
+    /* check if the encoder supports QSV */
+    if (!ost->enc->pix_fmts)
+        return 0;
+    for (pix_fmt = ost->enc->pix_fmts; *pix_fmt != AV_PIX_FMT_NONE; pix_fmt++)
+        if (*pix_fmt == AV_PIX_FMT_QSV)
+            break;
+    if (*pix_fmt == AV_PIX_FMT_NONE)
+        return 0;
+
+    /*Check if the filters support QSV*/
+    if (ost->source_index < 0 || qsv_check_filters(ost) < 0)
+        return 0;
+
+    /* check if the decoder supports QSV and the output only goes to this stream */
+    ist = input_streams[ost->source_index];
+    if (ist->nb_filters || ist->hwaccel_id != HWACCEL_QSV ||
+        !ist->dec || !ist->dec->pix_fmts)
+        return 0;
+    for (pix_fmt = ist->dec->pix_fmts; *pix_fmt != AV_PIX_FMT_NONE; pix_fmt++)
+        if (*pix_fmt == AV_PIX_FMT_QSV)
+            break;
+    if (*pix_fmt == AV_PIX_FMT_NONE)
+        return 0;
+
+    for (i = 0; i < nb_output_streams; i++)
+        if (output_streams[i] != ost &&
+            output_streams[i]->source_index == ost->source_index)
+            return 0;
+
+    av_log(NULL, AV_LOG_VERBOSE, "Setting up QSV transcoding\n");
+
+    qsv   = av_mallocz(sizeof(*qsv));
+    enc_hwctx = av_qsv_alloc_context();
+    if (!qsv || !enc_hwctx)
+        goto fail;
+
+    err = ff_qsv_init_internal_session(NULL, &g_session);
+    if (err != MFX_ERR_NONE) {
+        av_log(NULL, AV_LOG_ERROR, "Error initializing an MFX session: %d\n", err);
+        goto fail;
+    }
+
+    qsv->ost = ost;
+    qsv->session = g_session.session;
+    qsv->frame_allocator.Alloc = frame_alloc;
+    qsv->frame_allocator.Free  = frame_free;
+    qsv->frame_allocator.Lock  = frame_lock;
+    qsv->frame_allocator.Unlock= frame_unlock;
+    qsv->frame_allocator.GetHDL= frame_get_hdl;
+    qsv->frame_allocator.pthis = qsv;
+
+    err = MFXVideoCORE_SetFrameAllocator(qsv->session, &qsv->frame_allocator);
+    if (MFX_ERR_NONE != err) {
+        av_log(NULL, AV_LOG_ERROR, "Error MFXVideoCORE_SetFrameAllocator: %d\n", err);
+        goto fail;
+    }
+
+    e = av_dict_get(ost->encoder_opts, "flags", NULL, 0);
+    opt = av_opt_find(ost->enc_ctx, "flags", NULL, 0, 0);
+    if (e && opt)
+        av_opt_eval_flags(ost->enc_ctx, opt, e->value, &flags);
+
+    enc_hwctx->session               = qsv->session;
+    enc_hwctx->iopattern             = MFX_IOPATTERN_IN_VIDEO_MEMORY;
+    ost->hwaccel_ctx                 = qsv;
+    ost->enc_ctx->hwaccel_context    = enc_hwctx;
+
+    ist->resample_pix_fmt            = AV_PIX_FMT_QSV;
+    ist->hwaccel_ctx                 = qsv;
+
+    return 0;
+
+fail:
+    av_freep(&enc_hwctx);
+    av_freep(&qsv);
+
+    return AVERROR_UNKNOWN;
+}
diff --git a/libavcodec/qsv.h b/libavcodec/qsv.h
index b77158e..ee968d0 100644
--- a/libavcodec/qsv.h
+++ b/libavcodec/qsv.h
@@ -95,6 +95,9 @@  typedef struct AVQSVContext {
      * the opaque allocation request.
      */
     int opaque_alloc_type;
+
+    int nb_decoder_surfaces;
+    int nb_encoder_surfaces;
 } AVQSVContext;
 
 /**
diff --git a/libavcodec/qsv_internal.h b/libavcodec/qsv_internal.h
index e43728b..58589df 100644
--- a/libavcodec/qsv_internal.h
+++ b/libavcodec/qsv_internal.h
@@ -21,6 +21,8 @@ 
 #ifndef AVCODEC_QSV_INTERNAL_H
 #define AVCODEC_QSV_INTERNAL_H
 
+#include "config.h"
+
 #if CONFIG_VAAPI
 #define AVCODEC_QSV_LINUX_SESSION_HANDLE
 #endif //CONFIG_VAAPI
diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c
index b9de0af..47dd818 100644
--- a/libavcodec/qsvdec.c
+++ b/libavcodec/qsvdec.c
@@ -72,6 +72,7 @@  static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q, AVPacket *avpkt
         q->iopattern      = qsv->iopattern;
         q->ext_buffers    = qsv->ext_buffers;
         q->nb_ext_buffers = qsv->nb_ext_buffers;
+        qsv->nb_decoder_surfaces = q->async_depth;
     }
     if (!q->session) {
         if (!q->internal_qs.session) {
@@ -88,7 +89,7 @@  static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q, AVPacket *avpkt
         if (ret < 0) {
             av_log(avctx, AV_LOG_ERROR, "Failed to load plugins %s, ret = %s\n",
                     q->load_plugins, av_err2str(ret));
-            return ff_qsv_error(ret);
+            return ret;
         }
     }
 
@@ -149,7 +150,7 @@  static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q, AVPacket *avpkt
        So weare  pre-allocating fifo big enough for 17 elements:
      */
     if (!q->async_fifo) {
-        q->async_fifo = av_fifo_alloc((1 + 16) *
+        q->async_fifo = av_fifo_alloc((1 + 16 + q->async_depth) *
                                       (sizeof(mfxSyncPoint) + sizeof(QSVFrame*)));
         if (!q->async_fifo)
             return AVERROR(ENOMEM);
diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
index 81b8f6f..041f298 100644
--- a/libavcodec/qsvenc.c
+++ b/libavcodec/qsvenc.c
@@ -760,6 +760,8 @@  int ff_qsv_enc_init(AVCodecContext *avctx, QSVEncContext *q)
 
             q->param.ExtParam[q->param.NumExtParam++] = q->extparam_internal[i];
         }
+
+        qsv->nb_encoder_surfaces = q->req.NumFrameSuggested + q->async_depth;
     } else {
         q->param.ExtParam    = q->extparam_internal;
         q->param.NumExtParam = q->nb_extparam_internal;