[FFmpeg-devel,1/8] compat/cuda: add dynamic loader

Message ID	20161019120036.618-1-timo@rothenpieler.org
State	Accepted
Headers	Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; From: Timo Rothenpieler <timo@rothenpieler.org> To: ffmpeg-devel@ffmpeg.org Date: Wed, 19 Oct 2016 14:00:29 +0200 Message-Id: <20161019120036.618-1-timo@rothenpieler.org> Subject: [FFmpeg-devel] [PATCH 1/8] compat/cuda: add dynamic loader Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Cc: Timo Rothenpieler <timo@rothenpieler.org> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

Message ID

20161019120036.618-1-timo@rothenpieler.org

State

Accepted

Headers

Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org
	designates 79.124.17.100 as permitted sender)
	client-ip=79.124.17.100; 
From: Timo Rothenpieler <timo@rothenpieler.org>
To: ffmpeg-devel@ffmpeg.org
Date: Wed, 19 Oct 2016 14:00:29 +0200
Message-Id: <20161019120036.618-1-timo@rothenpieler.org>
Subject: [FFmpeg-devel] [PATCH 1/8] compat/cuda: add dynamic loader
Precedence: list
Reply-To: FFmpeg development discussions and patches
	<ffmpeg-devel@ffmpeg.org>
Cc: Timo Rothenpieler <timo@rothenpieler.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

Commit Message

Timo Rothenpieler Oct. 19, 2016, noon UTC

---
 compat/cuda/dynlink_cuda.h     |  88 +++++
 compat/cuda/dynlink_cuviddec.h | 808 +++++++++++++++++++++++++++++++++++++++++
 compat/cuda/dynlink_loader.h   | 254 +++++++++++++
 compat/cuda/dynlink_nvcuvid.h  | 316 ++++++++++++++++
 4 files changed, 1466 insertions(+)
 create mode 100644 compat/cuda/dynlink_cuda.h
 create mode 100644 compat/cuda/dynlink_cuviddec.h
 create mode 100644 compat/cuda/dynlink_loader.h
 create mode 100644 compat/cuda/dynlink_nvcuvid.h

Comments

Hendrik Leppkes Oct. 19, 2016, 12:50 p.m. UTC | #1

On Wed, Oct 19, 2016 at 2:00 PM, Timo Rothenpieler
<timo@rothenpieler.org> wrote:
> ---
>  compat/cuda/dynlink_cuda.h     |  88 +++++
>  compat/cuda/dynlink_cuviddec.h | 808 +++++++++++++++++++++++++++++++++++++++++
>  compat/cuda/dynlink_loader.h   | 254 +++++++++++++
>  compat/cuda/dynlink_nvcuvid.h  | 316 ++++++++++++++++
>  4 files changed, 1466 insertions(+)
>  create mode 100644 compat/cuda/dynlink_cuda.h
>  create mode 100644 compat/cuda/dynlink_cuviddec.h
>  create mode 100644 compat/cuda/dynlink_loader.h
>  create mode 100644 compat/cuda/dynlink_nvcuvid.h
>
> diff --git a/compat/cuda/dynlink_cuda.h b/compat/cuda/dynlink_cuda.h
> new file mode 100644
> index 0000000..908f12d
> --- /dev/null
> +++ b/compat/cuda/dynlink_cuda.h
> @@ -0,0 +1,88 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */

So did you write this without once looking at the NVIDIA header file?
Because if you did even read it, then you can't realistically claim
this is LGPL.

> +
> +#if !defined(AV_COMPAT_DYNLINK_CUDA_H) && !defined(CUDA_VERSION)
> +#define AV_COMPAT_DYNLINK_CUDA_H
> +
> +#include <stddef.h>
> +
> +#define CUDA_VERSION 7050
> +
> +#if defined(_WIN32) || defined(__CYGWIN__)
> +#define CUDAAPI __stdcall
> +#else
> +#define CUDAAPI
> +#endif
> +
> +#define CU_CTX_SCHED_BLOCKING_SYNC 4
> +
> +typedef int CUdevice;
> +typedef void* CUarray;
> +typedef void* CUcontext;
> +#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
> +typedef unsigned long long CUdeviceptr;
> +#else
> +typedef unsigned int CUdeviceptr;
> +#endif
> +
> +typedef enum cudaError_enum {
> +    CUDA_SUCCESS = 0
> +} CUresult;
> +
> +typedef enum CUmemorytype_enum {
> +    CU_MEMORYTYPE_HOST = 1,
> +    CU_MEMORYTYPE_DEVICE = 2
> +} CUmemorytype;
> +
> +typedef struct CUDA_MEMCPY2D_st {
> +    size_t srcXInBytes;
> +    size_t srcY;
> +    CUmemorytype srcMemoryType;
> +    const void *srcHost;
> +    CUdeviceptr srcDevice;
> +    CUarray srcArray;
> +    size_t srcPitch;
> +
> +    size_t dstXInBytes;
> +    size_t dstY;
> +    CUmemorytype dstMemoryType;
> +    void *dstHost;
> +    CUdeviceptr dstDevice;
> +    CUarray dstArray;
> +    size_t dstPitch;
> +
> +    size_t WidthInBytes;
> +    size_t Height;
> +} CUDA_MEMCPY2D;
> +
> +typedef CUresult CUDAAPI tcuInit(unsigned int Flags);
> +typedef CUresult CUDAAPI tcuDeviceGetCount(int *count);
> +typedef CUresult CUDAAPI tcuDeviceGet(CUdevice *device, int ordinal);
> +typedef CUresult CUDAAPI tcuDeviceGetName(char *name, int len, CUdevice dev);
> +typedef CUresult CUDAAPI tcuDeviceComputeCapability(int *major, int *minor, CUdevice dev);
> +typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev);
> +typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext *pctx);
> +typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext *pctx);
> +typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx);
> +typedef CUresult CUDAAPI tcuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize);
> +typedef CUresult CUDAAPI tcuMemFree_v2(CUdeviceptr dptr);
> +typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D *pcopy);
> +typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pstr);
> +typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char** pstr);
> +
> +#endif
> diff --git a/compat/cuda/dynlink_cuviddec.h b/compat/cuda/dynlink_cuviddec.h
> new file mode 100644
> index 0000000..17207bc
> --- /dev/null
> +++ b/compat/cuda/dynlink_cuviddec.h
> @@ -0,0 +1,808 @@
> +/*
> + * This copyright notice applies to this header file only:
> + *
> + * Copyright (c) 2010-2016 NVIDIA Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person
> + * obtaining a copy of this software and associated documentation
> + * files (the "Software"), to deal in the Software without
> + * restriction, including without limitation the rights to use,
> + * copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the software, and to permit persons to whom the
> + * software is furnished to do so, subject to the following
> + * conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> + * included in all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
> + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
> + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
> + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + */
> +
> +/**
> + * \file cuviddec.h
> + * NvCuvid API provides Video Decoding interface to NVIDIA GPU devices.
> + * \date 2015-2016
> + * This file contains constants, structure definitions and function prototypes used for decoding.
> + */
> +
> +#if !defined(__CUDA_VIDEO_H__)
> +#define __CUDA_VIDEO_H__
> +
> +#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
> +#if (CUDA_VERSION >= 3020) && (!defined(CUDA_FORCE_API_VERSION) || (CUDA_FORCE_API_VERSION >= 3020))
> +#define __CUVID_DEVPTR64
> +#endif
> +#endif
> +
> +#if defined(__cplusplus)
> +extern "C" {
> +#endif /* __cplusplus */
> +
> +typedef void *CUvideodecoder;
> +typedef struct _CUcontextlock_st *CUvideoctxlock;
> +
> +/**
> + * \addtogroup VIDEO_DECODER Video Decoder
> + * @{
> + */
> +
> +/*!
> + * \enum cudaVideoCodec
> + * Video Codec Enums
> + */
> +typedef enum cudaVideoCodec_enum {
> +    cudaVideoCodec_MPEG1=0,                 /**<  MPEG1   */
> +    cudaVideoCodec_MPEG2,                   /**<  MPEG2  */
> +    cudaVideoCodec_MPEG4,                   /**<  MPEG4   */
> +    cudaVideoCodec_VC1,                     /**<  VC1   */
> +    cudaVideoCodec_H264,                    /**<  H264   */
> +    cudaVideoCodec_JPEG,                    /**<  JPEG   */
> +    cudaVideoCodec_H264_SVC,                /**<  H264-SVC   */
> +    cudaVideoCodec_H264_MVC,                /**<  H264-MVC   */
> +    cudaVideoCodec_HEVC,                    /**<  HEVC   */
> +    cudaVideoCodec_VP8,                     /**<  VP8   */
> +    cudaVideoCodec_VP9,                     /**<  VP9   */
> +    cudaVideoCodec_NumCodecs,               /**<  Max COdecs   */
> +    // Uncompressed YUV
> +    cudaVideoCodec_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')),   /**< Y,U,V (4:2:0)  */
> +    cudaVideoCodec_YV12   = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')),   /**< Y,V,U (4:2:0)  */
> +    cudaVideoCodec_NV12   = (('N'<<24)|('V'<<16)|('1'<<8)|('2')),   /**< Y,UV  (4:2:0)  */
> +    cudaVideoCodec_YUYV   = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')),   /**< YUYV/YUY2 (4:2:2)  */
> +    cudaVideoCodec_UYVY   = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y'))    /**< UYVY (4:2:2)  */
> +} cudaVideoCodec;
> +
> +/*!
> + * \enum cudaVideoSurfaceFormat
> + * Video Surface Formats Enums
> + */
> +typedef enum cudaVideoSurfaceFormat_enum {
> +    cudaVideoSurfaceFormat_NV12=0       /**< NV12 (currently the only supported output format)  */
> +} cudaVideoSurfaceFormat;
> +
> +/*!
> + * \enum cudaVideoDeinterlaceMode
> + * Deinterlacing Modes Enums
> + */
> +typedef enum cudaVideoDeinterlaceMode_enum {
> +    cudaVideoDeinterlaceMode_Weave=0,   /**< Weave both fields (no deinterlacing) */
> +    cudaVideoDeinterlaceMode_Bob,       /**< Drop one field  */
> +    cudaVideoDeinterlaceMode_Adaptive   /**< Adaptive deinterlacing  */
> +} cudaVideoDeinterlaceMode;
> +
> +/*!
> + * \enum cudaVideoChromaFormat
> + * Chroma Formats Enums
> + */
> +typedef enum cudaVideoChromaFormat_enum {
> +    cudaVideoChromaFormat_Monochrome=0,  /**< MonoChrome */
> +    cudaVideoChromaFormat_420,           /**< 4:2:0 */
> +    cudaVideoChromaFormat_422,           /**< 4:2:2 */
> +    cudaVideoChromaFormat_444            /**< 4:4:4 */
> +} cudaVideoChromaFormat;
> +
> +/*!
> + * \enum cudaVideoCreateFlags
> + * Decoder Flags Enums
> + */
> +typedef enum cudaVideoCreateFlags_enum {
> +    cudaVideoCreate_Default = 0x00,     /**< Default operation mode: use dedicated video engines */
> +    cudaVideoCreate_PreferCUDA = 0x01,  /**< Use a CUDA-based decoder if faster than dedicated engines (requires a valid vidLock object for multi-threading) */
> +    cudaVideoCreate_PreferDXVA = 0x02,  /**< Go through DXVA internally if possible (requires D3D9 interop) */
> +    cudaVideoCreate_PreferCUVID = 0x04  /**< Use dedicated video engines directly */
> +} cudaVideoCreateFlags;
> +
> +/*!
> + * \struct CUVIDDECODECREATEINFO
> + * Struct used in create decoder
> + */
> +typedef struct _CUVIDDECODECREATEINFO
> +{
> +    unsigned long ulWidth;              /**< Coded Sequence Width */
> +    unsigned long ulHeight;             /**< Coded Sequence Height */
> +    unsigned long ulNumDecodeSurfaces;  /**< Maximum number of internal decode surfaces */
> +    cudaVideoCodec CodecType;           /**< cudaVideoCodec_XXX */
> +    cudaVideoChromaFormat ChromaFormat; /**< cudaVideoChromaFormat_XXX (only 4:2:0 is currently supported) */
> +    unsigned long ulCreationFlags;      /**< Decoder creation flags (cudaVideoCreateFlags_XXX) */
> +    unsigned long bitDepthMinus8;
> +    unsigned long Reserved1[4];         /**< Reserved for future use - set to zero */
> +    /**
> +    * area of the frame that should be displayed
> +    */
> +    struct {
> +        short left;
> +        short top;
> +        short right;
> +        short bottom;
> +    } display_area;
> +
> +    cudaVideoSurfaceFormat OutputFormat;       /**< cudaVideoSurfaceFormat_XXX */
> +    cudaVideoDeinterlaceMode DeinterlaceMode;  /**< cudaVideoDeinterlaceMode_XXX */
> +    unsigned long ulTargetWidth;               /**< Post-processed Output Width (Should be aligned to 2) */
> +    unsigned long ulTargetHeight;              /**< Post-processed Output Height (Should be aligbed to 2) */
> +    unsigned long ulNumOutputSurfaces;         /**< Maximum number of output surfaces simultaneously mapped */
> +    CUvideoctxlock vidLock;                    /**< If non-NULL, context lock used for synchronizing ownership of the cuda context */
> +    /**
> +    * target rectangle in the output frame (for aspect ratio conversion)
> +    * if a null rectangle is specified, {0,0,ulTargetWidth,ulTargetHeight} will be used
> +    */
> +    struct {
> +        short left;
> +        short top;
> +        short right;
> +        short bottom;
> +    } target_rect;
> +    unsigned long Reserved2[5];                /**< Reserved for future use - set to zero */
> +} CUVIDDECODECREATEINFO;
> +
> +/*!
> + * \struct CUVIDH264DPBENTRY
> + * H.264 DPB Entry
> + */
> +typedef struct _CUVIDH264DPBENTRY
> +{
> +    int PicIdx;                 /**< picture index of reference frame */
> +    int FrameIdx;               /**< frame_num(short-term) or LongTermFrameIdx(long-term) */
> +    int is_long_term;           /**< 0=short term reference, 1=long term reference */
> +    int not_existing;           /**< non-existing reference frame (corresponding PicIdx should be set to -1) */
> +    int used_for_reference;     /**< 0=unused, 1=top_field, 2=bottom_field, 3=both_fields */
> +    int FieldOrderCnt[2];       /**< field order count of top and bottom fields */
> +} CUVIDH264DPBENTRY;
> +
> +/*!
> + * \struct CUVIDH264MVCEXT
> + * H.264 MVC Picture Parameters Ext
> + */
> +typedef struct _CUVIDH264MVCEXT
> +{
> +    int num_views_minus1;
> +    int view_id;
> +    unsigned char inter_view_flag;
> +    unsigned char num_inter_view_refs_l0;
> +    unsigned char num_inter_view_refs_l1;
> +    unsigned char MVCReserved8Bits;
> +    int InterViewRefsL0[16];
> +    int InterViewRefsL1[16];
> +} CUVIDH264MVCEXT;
> +
> +/*!
> + * \struct CUVIDH264SVCEXT
> + * H.264 SVC Picture Parameters Ext
> + */
> +typedef struct _CUVIDH264SVCEXT
> +{
> +    unsigned char profile_idc;
> +    unsigned char level_idc;
> +    unsigned char DQId;
> +    unsigned char DQIdMax;
> +    unsigned char disable_inter_layer_deblocking_filter_idc;
> +    unsigned char ref_layer_chroma_phase_y_plus1;
> +    signed char   inter_layer_slice_alpha_c0_offset_div2;
> +    signed char   inter_layer_slice_beta_offset_div2;
> +
> +    unsigned short DPBEntryValidFlag;
> +    unsigned char inter_layer_deblocking_filter_control_present_flag;
> +    unsigned char extended_spatial_scalability_idc;
> +    unsigned char adaptive_tcoeff_level_prediction_flag;
> +    unsigned char slice_header_restriction_flag;
> +    unsigned char chroma_phase_x_plus1_flag;
> +    unsigned char chroma_phase_y_plus1;
> +
> +    unsigned char tcoeff_level_prediction_flag;
> +    unsigned char constrained_intra_resampling_flag;
> +    unsigned char ref_layer_chroma_phase_x_plus1_flag;
> +    unsigned char store_ref_base_pic_flag;
> +    unsigned char Reserved8BitsA;
> +    unsigned char Reserved8BitsB;
> +    // For the 4 scaled_ref_layer_XX fields below,
> +    // if (extended_spatial_scalability_idc == 1), SPS field, G.7.3.2.1.4, add prefix "seq_"
> +    // if (extended_spatial_scalability_idc == 2), SLH field, G.7.3.3.4,
> +    short scaled_ref_layer_left_offset;
> +    short scaled_ref_layer_top_offset;
> +    short scaled_ref_layer_right_offset;
> +    short scaled_ref_layer_bottom_offset;
> +    unsigned short Reserved16Bits;
> +    struct _CUVIDPICPARAMS *pNextLayer; /**< Points to the picparams for the next layer to be decoded. Linked list ends at the target layer. */
> +    int bRefBaseLayer;                  /**< whether to store ref base pic */
> +} CUVIDH264SVCEXT;
> +
> +/*!
> + * \struct CUVIDH264PICPARAMS
> + * H.264 Picture Parameters
> + */
> +typedef struct _CUVIDH264PICPARAMS
> +{
> +    // SPS
> +    int log2_max_frame_num_minus4;
> +    int pic_order_cnt_type;
> +    int log2_max_pic_order_cnt_lsb_minus4;
> +    int delta_pic_order_always_zero_flag;
> +    int frame_mbs_only_flag;
> +    int direct_8x8_inference_flag;
> +    int num_ref_frames;             // NOTE: shall meet level 4.1 restrictions
> +    unsigned char residual_colour_transform_flag;
> +    unsigned char bit_depth_luma_minus8;    // Must be 0 (only 8-bit supported)
> +    unsigned char bit_depth_chroma_minus8;  // Must be 0 (only 8-bit supported)
> +    unsigned char qpprime_y_zero_transform_bypass_flag;
> +    // PPS
> +    int entropy_coding_mode_flag;
> +    int pic_order_present_flag;
> +    int num_ref_idx_l0_active_minus1;
> +    int num_ref_idx_l1_active_minus1;
> +    int weighted_pred_flag;
> +    int weighted_bipred_idc;
> +    int pic_init_qp_minus26;
> +    int deblocking_filter_control_present_flag;
> +    int redundant_pic_cnt_present_flag;
> +    int transform_8x8_mode_flag;
> +    int MbaffFrameFlag;
> +    int constrained_intra_pred_flag;
> +    int chroma_qp_index_offset;
> +    int second_chroma_qp_index_offset;
> +    int ref_pic_flag;
> +    int frame_num;
> +    int CurrFieldOrderCnt[2];
> +    // DPB
> +    CUVIDH264DPBENTRY dpb[16];          // List of reference frames within the DPB
> +    // Quantization Matrices (raster-order)
> +    unsigned char WeightScale4x4[6][16];
> +    unsigned char WeightScale8x8[2][64];
> +    // FMO/ASO
> +    unsigned char fmo_aso_enable;
> +    unsigned char num_slice_groups_minus1;
> +    unsigned char slice_group_map_type;
> +    signed char pic_init_qs_minus26;
> +    unsigned int slice_group_change_rate_minus1;
> +    union
> +    {
> +        unsigned long long slice_group_map_addr;
> +        const unsigned char *pMb2SliceGroupMap;
> +    } fmo;
> +    unsigned int  Reserved[12];
> +    // SVC/MVC
> +    union
> +    {
> +        CUVIDH264MVCEXT mvcext;
> +        CUVIDH264SVCEXT svcext;
> +    } svcmvc;
> +} CUVIDH264PICPARAMS;
> +
> +
> +/*!
> + * \struct CUVIDMPEG2PICPARAMS
> + * MPEG-2 Picture Parameters
> + */
> +typedef struct _CUVIDMPEG2PICPARAMS
> +{
> +    int ForwardRefIdx;          // Picture index of forward reference (P/B-frames)
> +    int BackwardRefIdx;         // Picture index of backward reference (B-frames)
> +    int picture_coding_type;
> +    int full_pel_forward_vector;
> +    int full_pel_backward_vector;
> +    int f_code[2][2];
> +    int intra_dc_precision;
> +    int frame_pred_frame_dct;
> +    int concealment_motion_vectors;
> +    int q_scale_type;
> +    int intra_vlc_format;
> +    int alternate_scan;
> +    int top_field_first;
> +    // Quantization matrices (raster order)
> +    unsigned char QuantMatrixIntra[64];
> +    unsigned char QuantMatrixInter[64];
> +} CUVIDMPEG2PICPARAMS;
> +
> +////////////////////////////////////////////////////////////////////////////////////////////////
> +//
> +// MPEG-4 Picture Parameters
> +//
> +
> +// MPEG-4 has VOP types instead of Picture types
> +#define I_VOP 0
> +#define P_VOP 1
> +#define B_VOP 2
> +#define S_VOP 3
> +
> +/*!
> + * \struct CUVIDMPEG4PICPARAMS
> + * MPEG-4 Picture Parameters
> + */
> +typedef struct _CUVIDMPEG4PICPARAMS
> +{
> +    int ForwardRefIdx;          // Picture index of forward reference (P/B-frames)
> +    int BackwardRefIdx;         // Picture index of backward reference (B-frames)
> +    // VOL
> +    int video_object_layer_width;
> +    int video_object_layer_height;
> +    int vop_time_increment_bitcount;
> +    int top_field_first;
> +    int resync_marker_disable;
> +    int quant_type;
> +    int quarter_sample;
> +    int short_video_header;
> +    int divx_flags;
> +    // VOP
> +    int vop_coding_type;
> +    int vop_coded;
> +    int vop_rounding_type;
> +    int alternate_vertical_scan_flag;
> +    int interlaced;
> +    int vop_fcode_forward;
> +    int vop_fcode_backward;
> +    int trd[2];
> +    int trb[2];
> +    // Quantization matrices (raster order)
> +    unsigned char QuantMatrixIntra[64];
> +    unsigned char QuantMatrixInter[64];
> +    int gmc_enabled;
> +} CUVIDMPEG4PICPARAMS;
> +
> +/*!
> + * \struct CUVIDVC1PICPARAMS
> + * VC1 Picture Parameters
> + */
> +typedef struct _CUVIDVC1PICPARAMS
> +{
> +    int ForwardRefIdx;      /**< Picture index of forward reference (P/B-frames) */
> +    int BackwardRefIdx;     /**< Picture index of backward reference (B-frames) */
> +    int FrameWidth;         /**< Actual frame width */
> +    int FrameHeight;        /**< Actual frame height */
> +    // PICTURE
> +    int intra_pic_flag;     /**< Set to 1 for I,BI frames */
> +    int ref_pic_flag;       /**< Set to 1 for I,P frames */
> +    int progressive_fcm;    /**< Progressive frame */
> +    // SEQUENCE
> +    int profile;
> +    int postprocflag;
> +    int pulldown;
> +    int interlace;
> +    int tfcntrflag;
> +    int finterpflag;
> +    int psf;
> +    int multires;
> +    int syncmarker;
> +    int rangered;
> +    int maxbframes;
> +    // ENTRYPOINT
> +    int panscan_flag;
> +    int refdist_flag;
> +    int extended_mv;
> +    int dquant;
> +    int vstransform;
> +    int loopfilter;
> +    int fastuvmc;
> +    int overlap;
> +    int quantizer;
> +    int extended_dmv;
> +    int range_mapy_flag;
> +    int range_mapy;
> +    int range_mapuv_flag;
> +    int range_mapuv;
> +    int rangeredfrm;    // range reduction state
> +} CUVIDVC1PICPARAMS;
> +
> +/*!
> + * \struct CUVIDJPEGPICPARAMS
> + * JPEG Picture Parameters
> + */
> +typedef struct _CUVIDJPEGPICPARAMS
> +{
> +    int Reserved;
> +} CUVIDJPEGPICPARAMS;
> +
> +
> + /*!
> + * \struct CUVIDHEVCPICPARAMS
> + * HEVC Picture Parameters
> + */
> +typedef struct _CUVIDHEVCPICPARAMS
> +{
> +    // sps
> +    int pic_width_in_luma_samples;
> +    int pic_height_in_luma_samples;
> +    unsigned char log2_min_luma_coding_block_size_minus3;
> +    unsigned char log2_diff_max_min_luma_coding_block_size;
> +    unsigned char log2_min_transform_block_size_minus2;
> +    unsigned char log2_diff_max_min_transform_block_size;
> +    unsigned char pcm_enabled_flag;
> +    unsigned char log2_min_pcm_luma_coding_block_size_minus3;
> +    unsigned char log2_diff_max_min_pcm_luma_coding_block_size;
> +    unsigned char pcm_sample_bit_depth_luma_minus1;
> +
> +    unsigned char pcm_sample_bit_depth_chroma_minus1;
> +    unsigned char pcm_loop_filter_disabled_flag;
> +    unsigned char strong_intra_smoothing_enabled_flag;
> +    unsigned char max_transform_hierarchy_depth_intra;
> +    unsigned char max_transform_hierarchy_depth_inter;
> +    unsigned char amp_enabled_flag;
> +    unsigned char separate_colour_plane_flag;
> +    unsigned char log2_max_pic_order_cnt_lsb_minus4;
> +
> +    unsigned char num_short_term_ref_pic_sets;
> +    unsigned char long_term_ref_pics_present_flag;
> +    unsigned char num_long_term_ref_pics_sps;
> +    unsigned char sps_temporal_mvp_enabled_flag;
> +    unsigned char sample_adaptive_offset_enabled_flag;
> +    unsigned char scaling_list_enable_flag;
> +    unsigned char IrapPicFlag;
> +    unsigned char IdrPicFlag;
> +
> +    unsigned char bit_depth_luma_minus8;
> +    unsigned char bit_depth_chroma_minus8;
> +    unsigned char reserved1[14];
> +
> +    // pps
> +    unsigned char dependent_slice_segments_enabled_flag;
> +    unsigned char slice_segment_header_extension_present_flag;
> +    unsigned char sign_data_hiding_enabled_flag;
> +    unsigned char cu_qp_delta_enabled_flag;
> +    unsigned char diff_cu_qp_delta_depth;
> +    signed char init_qp_minus26;
> +    signed char pps_cb_qp_offset;
> +    signed char pps_cr_qp_offset;
> +
> +    unsigned char constrained_intra_pred_flag;
> +    unsigned char weighted_pred_flag;
> +    unsigned char weighted_bipred_flag;
> +    unsigned char transform_skip_enabled_flag;
> +    unsigned char transquant_bypass_enabled_flag;
> +    unsigned char entropy_coding_sync_enabled_flag;
> +    unsigned char log2_parallel_merge_level_minus2;
> +    unsigned char num_extra_slice_header_bits;
> +
> +    unsigned char loop_filter_across_tiles_enabled_flag;
> +    unsigned char loop_filter_across_slices_enabled_flag;
> +    unsigned char output_flag_present_flag;
> +    unsigned char num_ref_idx_l0_default_active_minus1;
> +    unsigned char num_ref_idx_l1_default_active_minus1;
> +    unsigned char lists_modification_present_flag;
> +    unsigned char cabac_init_present_flag;
> +    unsigned char pps_slice_chroma_qp_offsets_present_flag;
> +
> +    unsigned char deblocking_filter_override_enabled_flag;
> +    unsigned char pps_deblocking_filter_disabled_flag;
> +    signed char pps_beta_offset_div2;
> +    signed char pps_tc_offset_div2;
> +    unsigned char tiles_enabled_flag;
> +    unsigned char uniform_spacing_flag;
> +    unsigned char num_tile_columns_minus1;
> +    unsigned char num_tile_rows_minus1;
> +
> +    unsigned short column_width_minus1[21];
> +    unsigned short row_height_minus1[21];
> +    unsigned int reserved3[15];
> +
> +    // RefPicSets
> +    int NumBitsForShortTermRPSInSlice;
> +    int NumDeltaPocsOfRefRpsIdx;
> +    int NumPocTotalCurr;
> +    int NumPocStCurrBefore;
> +    int NumPocStCurrAfter;
> +    int NumPocLtCurr;
> +    int CurrPicOrderCntVal;
> +    int RefPicIdx[16];                  // [refpic] Indices of valid reference pictures (-1 if unused for reference)
> +    int PicOrderCntVal[16];             // [refpic]
> +    unsigned char IsLongTerm[16];       // [refpic] 0=not a long-term reference, 1=long-term reference
> +    unsigned char RefPicSetStCurrBefore[8]; // [0..NumPocStCurrBefore-1] -> refpic (0..15)
> +    unsigned char RefPicSetStCurrAfter[8];  // [0..NumPocStCurrAfter-1] -> refpic (0..15)
> +    unsigned char RefPicSetLtCurr[8];       // [0..NumPocLtCurr-1] -> refpic (0..15)
> +    unsigned char RefPicSetInterLayer0[8];
> +    unsigned char RefPicSetInterLayer1[8];
> +    unsigned int reserved4[12];
> +
> +    // scaling lists (diag order)
> +    unsigned char ScalingList4x4[6][16];       // [matrixId][i]
> +    unsigned char ScalingList8x8[6][64];       // [matrixId][i]
> +    unsigned char ScalingList16x16[6][64];     // [matrixId][i]
> +    unsigned char ScalingList32x32[2][64];     // [matrixId][i]
> +    unsigned char ScalingListDCCoeff16x16[6];  // [matrixId]
> +    unsigned char ScalingListDCCoeff32x32[2];  // [matrixId]
> +} CUVIDHEVCPICPARAMS;
> +
> +
> +/*!
> + * \struct CUVIDVP8PICPARAMS
> + * VP8 Picture Parameters
> + */
> +typedef struct _CUVIDVP8PICPARAMS
> +{
> +    int width;
> +    int height;
> +    unsigned int first_partition_size;
> +    //Frame Indexes
> +    unsigned char LastRefIdx;
> +    unsigned char GoldenRefIdx;
> +    unsigned char AltRefIdx;
> +    union {
> +        struct {
> +            unsigned char frame_type : 1;    /**< 0 = KEYFRAME, 1 = INTERFRAME  */
> +            unsigned char version : 3;
> +            unsigned char show_frame : 1;
> +            unsigned char update_mb_segmentation_data : 1;    /**< Must be 0 if segmentation is not enabled */
> +            unsigned char Reserved2Bits : 2;
> +        };
> +        unsigned char wFrameTagFlags;
> +    } tagflags;
> +    unsigned char Reserved1[4];
> +    unsigned int  Reserved2[3];
> +} CUVIDVP8PICPARAMS;
> +
> +/*!
> + * \struct CUVIDVP9PICPARAMS
> + * VP9 Picture Parameters
> + */
> +typedef struct _CUVIDVP9PICPARAMS
> +{
> +    unsigned int width;
> +    unsigned int height;
> +
> +    //Frame Indices
> +    unsigned char LastRefIdx;
> +    unsigned char GoldenRefIdx;
> +    unsigned char AltRefIdx;
> +    unsigned char colorSpace;
> +
> +    unsigned short profile : 3;
> +    unsigned short frameContextIdx : 2;
> +    unsigned short frameType : 1;
> +    unsigned short showFrame : 1;
> +    unsigned short errorResilient : 1;
> +    unsigned short frameParallelDecoding : 1;
> +    unsigned short subSamplingX : 1;
> +    unsigned short subSamplingY : 1;
> +    unsigned short intraOnly : 1;
> +    unsigned short allow_high_precision_mv : 1;
> +    unsigned short refreshEntropyProbs : 1;
> +    unsigned short reserved2Bits : 2;
> +
> +    unsigned short reserved16Bits;
> +
> +    unsigned char  refFrameSignBias[4];
> +
> +    unsigned char bitDepthMinus8Luma;
> +    unsigned char bitDepthMinus8Chroma;
> +    unsigned char loopFilterLevel;
> +    unsigned char loopFilterSharpness;
> +
> +    unsigned char modeRefLfEnabled;
> +    unsigned char log2_tile_columns;
> +    unsigned char log2_tile_rows;
> +
> +    unsigned char segmentEnabled : 1;
> +    unsigned char segmentMapUpdate : 1;
> +    unsigned char segmentMapTemporalUpdate : 1;
> +    unsigned char segmentFeatureMode : 1;
> +    unsigned char reserved4Bits : 4;
> +
> +
> +    unsigned char segmentFeatureEnable[8][4];
> +    short segmentFeatureData[8][4];
> +    unsigned char mb_segment_tree_probs[7];
> +    unsigned char segment_pred_probs[3];
> +    unsigned char reservedSegment16Bits[2];
> +
> +    int qpYAc;
> +    int qpYDc;
> +    int qpChDc;
> +    int qpChAc;
> +
> +    unsigned int activeRefIdx[3];
> +    unsigned int resetFrameContext;
> +    unsigned int mcomp_filter_type;
> +    unsigned int mbRefLfDelta[4];
> +    unsigned int mbModeLfDelta[2];
> +    unsigned int frameTagSize;
> +    unsigned int offsetToDctParts;
> +    unsigned int reserved128Bits[4];
> +
> +} CUVIDVP9PICPARAMS;
> +
> +
> +/*!
> + * \struct CUVIDPICPARAMS
> + * Picture Parameters for Decoding
> + */
> +typedef struct _CUVIDPICPARAMS
> +{
> +    int PicWidthInMbs;                    /**< Coded Frame Size */
> +    int FrameHeightInMbs;                 /**< Coded Frame Height */
> +    int CurrPicIdx;                       /**< Output index of the current picture */
> +    int field_pic_flag;                   /**< 0=frame picture, 1=field picture */
> +    int bottom_field_flag;                /**< 0=top field, 1=bottom field (ignored if field_pic_flag=0) */
> +    int second_field;                     /**< Second field of a complementary field pair */
> +    // Bitstream data
> +    unsigned int nBitstreamDataLen;        /**< Number of bytes in bitstream data buffer */
> +    const unsigned char *pBitstreamData;   /**< Ptr to bitstream data for this picture (slice-layer) */
> +    unsigned int nNumSlices;               /**< Number of slices in this picture */
> +    const unsigned int *pSliceDataOffsets; /**< nNumSlices entries, contains offset of each slice within the bitstream data buffer */
> +    int ref_pic_flag;                      /**< This picture is a reference picture */
> +    int intra_pic_flag;                    /**< This picture is entirely intra coded */
> +    unsigned int Reserved[30];             /**< Reserved for future use */
> +    // Codec-specific data
> +    union {
> +        CUVIDMPEG2PICPARAMS mpeg2;         /**< Also used for MPEG-1 */
> +        CUVIDH264PICPARAMS h264;
> +        CUVIDVC1PICPARAMS vc1;
> +        CUVIDMPEG4PICPARAMS mpeg4;
> +        CUVIDJPEGPICPARAMS jpeg;
> +        CUVIDHEVCPICPARAMS hevc;
> +        CUVIDVP8PICPARAMS vp8;
> +        CUVIDVP9PICPARAMS vp9;
> +        unsigned int CodecReserved[1024];
> +    } CodecSpecific;
> +} CUVIDPICPARAMS;
> +
> +
> +/*!
> + * \struct CUVIDPROCPARAMS
> + * Picture Parameters for Postprocessing
> + */
> +typedef struct _CUVIDPROCPARAMS
> +{
> +    int progressive_frame;  /**< Input is progressive (deinterlace_mode will be ignored)  */
> +    int second_field;       /**< Output the second field (ignored if deinterlace mode is Weave) */
> +    int top_field_first;    /**< Input frame is top field first (1st field is top, 2nd field is bottom) */
> +    int unpaired_field;     /**< Input only contains one field (2nd field is invalid) */
> +    // The fields below are used for raw YUV input
> +    unsigned int reserved_flags;        /**< Reserved for future use (set to zero) */
> +    unsigned int reserved_zero;         /**< Reserved (set to zero) */
> +    unsigned long long raw_input_dptr;  /**< Input CUdeviceptr for raw YUV extensions */
> +    unsigned int raw_input_pitch;       /**< pitch in bytes of raw YUV input (should be aligned appropriately) */
> +    unsigned int raw_input_format;      /**< Reserved for future use (set to zero) */
> +    unsigned long long raw_output_dptr; /**< Reserved for future use (set to zero) */
> +    unsigned int raw_output_pitch;      /**< Reserved for future use (set to zero) */
> +    unsigned int Reserved[48];
> +    void *Reserved3[3];
> +} CUVIDPROCPARAMS;
> +
> +
> +/**
> + *
> + * In order to minimize decode latencies, there should be always at least 2 pictures in the decode
> + * queue at any time, in order to make sure that all decode engines are always busy.
> + *
> + * Overall data flow:
> + *  - cuvidCreateDecoder(...)
> + *  For each picture:
> + *  - cuvidDecodePicture(N)
> + *  - cuvidMapVideoFrame(N-4)
> + *  - do some processing in cuda
> + *  - cuvidUnmapVideoFrame(N-4)
> + *  - cuvidDecodePicture(N+1)
> + *  - cuvidMapVideoFrame(N-3)
> + *    ...
> + *  - cuvidDestroyDecoder(...)
> + *
> + * NOTE:
> + * - When the cuda context is created from a D3D device, the D3D device must also be created
> + *   with the D3DCREATE_MULTITHREADED flag.
> + * - There is a limit to how many pictures can be mapped simultaneously (ulNumOutputSurfaces)
> + * - cuVidDecodePicture may block the calling thread if there are too many pictures pending
> + *   in the decode queue
> + */
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci)
> + * Create the decoder object
> + */
> +typedef CUresult CUDAAPI tcuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder hDecoder)
> + * Destroy the decoder object
> + */
> +typedef CUresult CUDAAPI tcuvidDestroyDecoder(CUvideodecoder hDecoder);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams)
> + * Decode a single picture (field or frame)
> + */
> +typedef CUresult CUDAAPI tcuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams);
> +
> +
> +#if !defined(__CUVID_DEVPTR64) || defined(__CUVID_INTERNAL)
> +/**
> + * \fn CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx, unsigned int *pDevPtr, unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
> + * Post-process and map a video frame for use in cuda
> + */
> +typedef CUresult CUDAAPI tcuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx,
> +                                             unsigned int *pDevPtr, unsigned int *pPitch,
> +                                             CUVIDPROCPARAMS *pVPP);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr)
> + * Unmap a previously mapped video frame
> + */
> +typedef CUresult CUDAAPI tcuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr);
> +#endif
> +
> +#if defined(WIN64) || defined(_WIN64) || defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
> +/**
> + * \fn CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr, unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
> + * map a video frame
> + */
> +typedef CUresult CUDAAPI tcuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr,
> +                                               unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr);
> + * Unmap a previously mapped video frame
> + */
> +typedef CUresult CUDAAPI tcuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr);
> +
> +#if defined(__CUVID_DEVPTR64) && !defined(__CUVID_INTERNAL)
> +#define tcuvidMapVideoFrame      tcuvidMapVideoFrame64
> +#define tcuvidUnmapVideoFrame    tcuvidUnmapVideoFrame64
> +#endif
> +#endif
> +
> +
> +/**
> + *
> + * Context-locking: to facilitate multi-threaded implementations, the following 4 functions
> + * provide a simple mutex-style host synchronization. If a non-NULL context is specified
> + * in CUVIDDECODECREATEINFO, the codec library will acquire the mutex associated with the given
> + * context before making any cuda calls.
> + * A multi-threaded application could create a lock associated with a context handle so that
> + * multiple threads can safely share the same cuda context:
> + *  - use cuCtxPopCurrent immediately after context creation in order to create a 'floating' context
> + *    that can be passed to cuvidCtxLockCreate.
> + *  - When using a floating context, all cuda calls should only be made within a cuvidCtxLock/cuvidCtxUnlock section.
> + *
> + * NOTE: This is a safer alternative to cuCtxPushCurrent and cuCtxPopCurrent, and is not related to video
> + * decoder in any way (implemented as a critical section associated with cuCtx{Push|Pop}Current calls).
> +*/
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx)
> + */
> +typedef CUresult CUDAAPI tcuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidCtxLockDestroy(CUvideoctxlock lck)
> + */
> +typedef CUresult CUDAAPI tcuvidCtxLockDestroy(CUvideoctxlock lck);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags)
> + */
> +typedef CUresult CUDAAPI tcuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags)
> + */
> +typedef CUresult CUDAAPI tcuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags);
> +
> +/** @} */  /* End VIDEO_DECODER */
> +
> +#if defined(__cplusplus)
> +}
> +#endif /* __cplusplus */
> +
> +#endif // __CUDA_VIDEO_H__
> diff --git a/compat/cuda/dynlink_loader.h b/compat/cuda/dynlink_loader.h
> new file mode 100644
> index 0000000..6275664
> --- /dev/null
> +++ b/compat/cuda/dynlink_loader.h
> @@ -0,0 +1,254 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AV_COMPAT_CUDA_DYNLINK_LOADER_H
> +#define AV_COMPAT_CUDA_DYNLINK_LOADER_H
> +
> +#include "compat/cuda/dynlink_cuda.h"
> +#include "compat/cuda/dynlink_nvcuvid.h"
> +#include "compat/nvenc/nvEncodeAPI.h"
> +
> +#include "libavutil/log.h"
> +#include "libavutil/error.h"
> +
> +#if defined(_WIN32)
> +# include <windows.h>
> +# define dlopen(filename, flags) LoadLibrary(TEXT(filename))
> +# define dlsym(handle, symbol) GetProcAddress(handle, symbol)
> +# define dlclose(handle) FreeLibrary(handle)
> +# define LIB_HANDLE HMODULE
> +#else
> +# include <dlfcn.h>
> +# define LIB_HANDLE void*
> +#endif
> +
> +#if defined(_WIN32) || defined(__CYGWIN__)
> +# define CUDA_LIBNAME "nvcuda.dll"
> +# define NVCUVID_LIBNAME "nvcuvid.dll"
> +# if ARCH_X86_64
> +#  define NVENC_LIBNAME "nvEncodeAPI64.dll"
> +# else
> +#  define NVENC_LIBNAME "nvEncodeAPI.dll"
> +# endif
> +#else
> +# define CUDA_LIBNAME "libcuda.so.1"
> +# define NVCUVID_LIBNAME "libnvcuvid.so.1"
> +# define NVENC_LIBNAME "libnvidia-encode.so.1"
> +#endif
> +
> +#define LOAD_LIBRARY(l, path)                                     \
> +    do {                                                          \
> +        if (!((l) = dlopen(path, RTLD_LAZY))) {                   \
> +            av_log(NULL, AV_LOG_ERROR, "Cannot load %s\n", path); \
> +            ret = AVERROR_UNKNOWN;                                \
> +            goto error;                                           \
> +        }                                                         \
> +        av_log(NULL, AV_LOG_TRACE, "Loaded lib: %s\n", path);     \
> +    } while (0)
> +
> +#define LOAD_SYMBOL(fun, symbol)                                    \
> +    do {                                                            \
> +        if (!((f->fun) = dlsym(f->lib, symbol))) {                  \
> +            av_log(NULL, AV_LOG_ERROR, "Cannot load %s\n", symbol); \
> +            ret = AVERROR_UNKNOWN;                                  \
> +            goto error;                                             \
> +        }                                                           \
> +        av_log(NULL, AV_LOG_TRACE, "Loaded sym: %s\n", symbol);     \
> +    } while (0)
> +
> +#define GENERIC_LOAD_FUNC_PREAMBLE(T, n, N)  \
> +    T *f;                                    \
> +    int ret;                                 \
> +                                             \
> +    n##_free_functions(functions);           \
> +                                             \
> +    f = *functions = av_mallocz(sizeof(*f)); \
> +    if (!f)                                  \
> +        return AVERROR(ENOMEM);              \
> +                                             \
> +    LOAD_LIBRARY(f->lib, N);
> +
> +#define GENERIC_LOAD_FUNC_FINALE(n) \
> +    return 0;                       \
> +error:                              \
> +    n##_free_functions(functions);  \
> +    return ret;
> +
> +#define GENERIC_FREE_FUNC()              \
> +    if (!functions)                      \
> +        return;                          \
> +    if (*functions && (*functions)->lib) \
> +        dlclose((*functions)->lib);      \
> +    av_freep(functions);
> +
> +#ifdef AV_COMPAT_DYNLINK_CUDA_H
> +typedef struct CudaFunctions {
> +    tcuInit *cuInit;
> +    tcuDeviceGetCount *cuDeviceGetCount;
> +    tcuDeviceGet *cuDeviceGet;
> +    tcuDeviceGetName *cuDeviceGetName;
> +    tcuDeviceComputeCapability *cuDeviceComputeCapability;
> +    tcuCtxCreate_v2 *cuCtxCreate;
> +    tcuCtxPushCurrent_v2 *cuCtxPushCurrent;
> +    tcuCtxPopCurrent_v2 *cuCtxPopCurrent;
> +    tcuCtxDestroy_v2 *cuCtxDestroy;
> +    tcuMemAlloc_v2 *cuMemAlloc;
> +    tcuMemFree_v2 *cuMemFree;
> +    tcuMemcpy2D_v2 *cuMemcpy2D;
> +    tcuGetErrorName *cuGetErrorName;
> +    tcuGetErrorString *cuGetErrorString;
> +
> +    LIB_HANDLE lib;
> +} CudaFunctions;
> +#else
> +typedef struct CudaFunctions CudaFunctions;
> +#endif
> +
> +typedef struct CuvidFunctions {
> +    tcuvidCreateDecoder *cuvidCreateDecoder;
> +    tcuvidDestroyDecoder *cuvidDestroyDecoder;
> +    tcuvidDecodePicture *cuvidDecodePicture;
> +    tcuvidMapVideoFrame *cuvidMapVideoFrame;
> +    tcuvidUnmapVideoFrame *cuvidUnmapVideoFrame;
> +    tcuvidCtxLockCreate *cuvidCtxLockCreate;
> +    tcuvidCtxLockDestroy *cuvidCtxLockDestroy;
> +    tcuvidCtxLock *cuvidCtxLock;
> +    tcuvidCtxUnlock *cuvidCtxUnlock;
> +
> +    tcuvidCreateVideoSource *cuvidCreateVideoSource;
> +    tcuvidCreateVideoSourceW *cuvidCreateVideoSourceW;
> +    tcuvidDestroyVideoSource *cuvidDestroyVideoSource;
> +    tcuvidSetVideoSourceState *cuvidSetVideoSourceState;
> +    tcuvidGetVideoSourceState *cuvidGetVideoSourceState;
> +    tcuvidGetSourceVideoFormat *cuvidGetSourceVideoFormat;
> +    tcuvidGetSourceAudioFormat *cuvidGetSourceAudioFormat;
> +    tcuvidCreateVideoParser *cuvidCreateVideoParser;
> +    tcuvidParseVideoData *cuvidParseVideoData;
> +    tcuvidDestroyVideoParser *cuvidDestroyVideoParser;
> +
> +    LIB_HANDLE lib;
> +} CuvidFunctions;
> +
> +typedef struct NvencFunctions {
> +    NVENCSTATUS (NVENCAPI *NvEncodeAPICreateInstance)(NV_ENCODE_API_FUNCTION_LIST *functionList);
> +    NVENCSTATUS (NVENCAPI *NvEncodeAPIGetMaxSupportedVersion)(uint32_t* version);
> +
> +    LIB_HANDLE lib;
> +} NvencFunctions;
> +
> +#ifdef AV_COMPAT_DYNLINK_CUDA_H
> +static inline void cuda_free_functions(CudaFunctions **functions)
> +{
> +    GENERIC_FREE_FUNC();
> +}
> +#endif
> +
> +static inline void cuvid_free_functions(CuvidFunctions **functions)
> +{
> +    GENERIC_FREE_FUNC();
> +}
> +
> +static inline void nvenc_free_functions(NvencFunctions **functions)
> +{
> +    GENERIC_FREE_FUNC();
> +}
> +
> +#ifdef AV_COMPAT_DYNLINK_CUDA_H
> +static inline int cuda_load_functions(CudaFunctions **functions)
> +{
> +    GENERIC_LOAD_FUNC_PREAMBLE(CudaFunctions, cuda, CUDA_LIBNAME);
> +
> +    LOAD_SYMBOL(cuInit, "cuInit");
> +    LOAD_SYMBOL(cuDeviceGetCount, "cuDeviceGetCount");
> +    LOAD_SYMBOL(cuDeviceGet, "cuDeviceGet");
> +    LOAD_SYMBOL(cuDeviceGetName, "cuDeviceGetName");
> +    LOAD_SYMBOL(cuDeviceComputeCapability, "cuDeviceComputeCapability");
> +    LOAD_SYMBOL(cuCtxCreate, "cuCtxCreate_v2");
> +    LOAD_SYMBOL(cuCtxPushCurrent, "cuCtxPushCurrent_v2");
> +    LOAD_SYMBOL(cuCtxPopCurrent, "cuCtxPopCurrent_v2");
> +    LOAD_SYMBOL(cuCtxDestroy, "cuCtxDestroy_v2");
> +    LOAD_SYMBOL(cuMemAlloc, "cuMemAlloc_v2");
> +    LOAD_SYMBOL(cuMemFree, "cuMemFree_v2");
> +    LOAD_SYMBOL(cuMemcpy2D, "cuMemcpy2D_v2");
> +    LOAD_SYMBOL(cuGetErrorName, "cuGetErrorName");
> +    LOAD_SYMBOL(cuGetErrorString, "cuGetErrorString");
> +
> +    GENERIC_LOAD_FUNC_FINALE(cuda);
> +}
> +#endif
> +
> +static inline int cuvid_load_functions(CuvidFunctions **functions)
> +{
> +    GENERIC_LOAD_FUNC_PREAMBLE(CuvidFunctions, cuvid, NVCUVID_LIBNAME);
> +
> +    LOAD_SYMBOL(cuvidCreateDecoder, "cuvidCreateDecoder");
> +    LOAD_SYMBOL(cuvidDestroyDecoder, "cuvidDestroyDecoder");
> +    LOAD_SYMBOL(cuvidDecodePicture, "cuvidDecodePicture");
> +#ifdef __CUVID_DEVPTR64
> +    LOAD_SYMBOL(cuvidMapVideoFrame, "cuvidMapVideoFrame64");
> +    LOAD_SYMBOL(cuvidUnmapVideoFrame, "cuvidUnmapVideoFrame64");
> +#else
> +    LOAD_SYMBOL(cuvidMapVideoFrame, "cuvidMapVideoFrame");
> +    LOAD_SYMBOL(cuvidUnmapVideoFrame, "cuvidUnmapVideoFrame");
> +#endif
> +    LOAD_SYMBOL(cuvidCtxLockCreate, "cuvidCtxLockCreate");
> +    LOAD_SYMBOL(cuvidCtxLockDestroy, "cuvidCtxLockDestroy");
> +    LOAD_SYMBOL(cuvidCtxLock, "cuvidCtxLock");
> +    LOAD_SYMBOL(cuvidCtxUnlock, "cuvidCtxUnlock");
> +
> +    LOAD_SYMBOL(cuvidCreateVideoSource, "cuvidCreateVideoSource");
> +    LOAD_SYMBOL(cuvidCreateVideoSourceW, "cuvidCreateVideoSourceW");
> +    LOAD_SYMBOL(cuvidDestroyVideoSource, "cuvidDestroyVideoSource");
> +    LOAD_SYMBOL(cuvidSetVideoSourceState, "cuvidSetVideoSourceState");
> +    LOAD_SYMBOL(cuvidGetVideoSourceState, "cuvidGetVideoSourceState");
> +    LOAD_SYMBOL(cuvidGetSourceVideoFormat, "cuvidGetSourceVideoFormat");
> +    LOAD_SYMBOL(cuvidGetSourceAudioFormat, "cuvidGetSourceAudioFormat");
> +    LOAD_SYMBOL(cuvidCreateVideoParser, "cuvidCreateVideoParser");
> +    LOAD_SYMBOL(cuvidParseVideoData, "cuvidParseVideoData");
> +    LOAD_SYMBOL(cuvidDestroyVideoParser, "cuvidDestroyVideoParser");
> +
> +    GENERIC_LOAD_FUNC_FINALE(cuvid);
> +}
> +
> +static inline int nvenc_load_functions(NvencFunctions **functions)
> +{
> +    GENERIC_LOAD_FUNC_PREAMBLE(NvencFunctions, nvenc, NVENC_LIBNAME);
> +
> +    LOAD_SYMBOL(NvEncodeAPICreateInstance, "NvEncodeAPICreateInstance");
> +    LOAD_SYMBOL(NvEncodeAPIGetMaxSupportedVersion, "NvEncodeAPIGetMaxSupportedVersion");
> +
> +    GENERIC_LOAD_FUNC_FINALE(nvenc);
> +}
> +
> +#undef GENERIC_LOAD_FUNC_PREAMBLE
> +#undef LOAD_LIBRARY
> +#undef LOAD_SYMBOL
> +#undef GENERIC_LOAD_FUNC_FINALE
> +#undef GENERIC_FREE_FUNC
> +#undef CUDA_LIBNAME
> +#undef NVCUVID_LIBNAME
> +#undef NVENC_LIBNAME
> +#undef LIB_HANDLE
> +
> +#if defined(_WIN32)
> +#undef dlopen
> +#undef dlsym
> +#undef dlclose
> +#endif
> +
> +#endif
> \ No newline at end of file
> diff --git a/compat/cuda/dynlink_nvcuvid.h b/compat/cuda/dynlink_nvcuvid.h
> new file mode 100644
> index 0000000..6c197e0
> --- /dev/null
> +++ b/compat/cuda/dynlink_nvcuvid.h
> @@ -0,0 +1,316 @@
> +/*
> + * This copyright notice applies to this header file only:
> + *
> + * Copyright (c) 2010-2016 NVIDIA Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person
> + * obtaining a copy of this software and associated documentation
> + * files (the "Software"), to deal in the Software without
> + * restriction, including without limitation the rights to use,
> + * copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the software, and to permit persons to whom the
> + * software is furnished to do so, subject to the following
> + * conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> + * included in all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
> + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
> + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
> + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + */
> +
> +/**
> + * \file nvcuvid.h
> + *   NvCuvid API provides Video Decoding interface to NVIDIA GPU devices.
> + * \date 2015-2015
> + *  This file contains the interface constants, structure definitions and function prototypes.
> + */
> +
> +#if !defined(__NVCUVID_H__)
> +#define __NVCUVID_H__
> +
> +#include "compat/cuda/dynlink_cuviddec.h"
> +
> +#if defined(__cplusplus)
> +extern "C" {
> +#endif /* __cplusplus */
> +
> +////////////////////////////////////////////////////////////////////////////////////////////////
> +//
> +// High-level helper APIs for video sources
> +//
> +
> +typedef void *CUvideosource;
> +typedef void *CUvideoparser;
> +typedef long long CUvideotimestamp;
> +
> +/**
> + * \addtogroup VIDEO_PARSER Video Parser
> + * @{
> + */
> +
> +/*!
> + * \enum cudaVideoState
> + * Video Source State
> + */
> +typedef enum {
> +    cudaVideoState_Error   = -1,    /**< Error state (invalid source)  */
> +    cudaVideoState_Stopped = 0,     /**< Source is stopped (or reached end-of-stream)  */
> +    cudaVideoState_Started = 1      /**< Source is running and delivering data  */
> +} cudaVideoState;
> +
> +/*!
> + * \enum cudaAudioCodec
> + * Audio compression
> + */
> +typedef enum {
> +    cudaAudioCodec_MPEG1=0,         /**< MPEG-1 Audio  */
> +    cudaAudioCodec_MPEG2,           /**< MPEG-2 Audio  */
> +    cudaAudioCodec_MP3,             /**< MPEG-1 Layer III Audio  */
> +    cudaAudioCodec_AC3,             /**< Dolby Digital (AC3) Audio  */
> +    cudaAudioCodec_LPCM             /**< PCM Audio  */
> +} cudaAudioCodec;
> +
> +/*!
> + * \struct CUVIDEOFORMAT
> + * Video format
> + */
> +typedef struct
> +{
> +    cudaVideoCodec codec;                   /**< Compression format  */
> +   /**
> +    * frame rate = numerator / denominator (for example: 30000/1001)
> +    */
> +    struct {
> +        unsigned int numerator;             /**< frame rate numerator   (0 = unspecified or variable frame rate) */
> +        unsigned int denominator;           /**< frame rate denominator (0 = unspecified or variable frame rate) */
> +    } frame_rate;
> +    unsigned char progressive_sequence;     /**< 0=interlaced, 1=progressive */
> +    unsigned char bit_depth_luma_minus8;    /**< high bit depth Luma */
> +    unsigned char bit_depth_chroma_minus8;  /**< high bit depth Chroma */
> +    unsigned char reserved1;                /**< Reserved for future use */
> +    unsigned int coded_width;               /**< coded frame width */
> +    unsigned int coded_height;              /**< coded frame height  */
> +   /**
> +    *   area of the frame that should be displayed
> +    * typical example:
> +    *   coded_width = 1920, coded_height = 1088
> +    *   display_area = { 0,0,1920,1080 }
> +    */
> +    struct {
> +        int left;                           /**< left position of display rect  */
> +        int top;                            /**< top position of display rect  */
> +        int right;                          /**< right position of display rect  */
> +        int bottom;                         /**< bottom position of display rect  */
> +    } display_area;
> +    cudaVideoChromaFormat chroma_format;    /**<  Chroma format */
> +    unsigned int bitrate;                   /**< video bitrate (bps, 0=unknown) */
> +   /**
> +    * Display Aspect Ratio = x:y (4:3, 16:9, etc)
> +    */
> +    struct {
> +        int x;
> +        int y;
> +    } display_aspect_ratio;
> +    /**
> +    * Video Signal Description
> +    */
> +    struct {
> +        unsigned char video_format          : 3;
> +        unsigned char video_full_range_flag : 1;
> +        unsigned char reserved_zero_bits    : 4;
> +        unsigned char color_primaries;
> +        unsigned char transfer_characteristics;
> +        unsigned char matrix_coefficients;
> +    } video_signal_description;
> +    unsigned int seqhdr_data_length;          /**< Additional bytes following (CUVIDEOFORMATEX)  */
> +} CUVIDEOFORMAT;
> +
> +/*!
> + * \struct CUVIDEOFORMATEX
> + * Video format including raw sequence header information
> + */
> +typedef struct
> +{
> +    CUVIDEOFORMAT format;
> +    unsigned char raw_seqhdr_data[1024];
> +} CUVIDEOFORMATEX;
> +
> +/*!
> + * \struct CUAUDIOFORMAT
> + * Audio Formats
> + */
> +typedef struct
> +{
> +    cudaAudioCodec codec;       /**< Compression format  */
> +    unsigned int channels;      /**< number of audio channels */
> +    unsigned int samplespersec; /**< sampling frequency */
> +    unsigned int bitrate;       /**< For uncompressed, can also be used to determine bits per sample */
> +    unsigned int reserved1;     /**< Reserved for future use */
> +    unsigned int reserved2;     /**< Reserved for future use */
> +} CUAUDIOFORMAT;
> +
> +
> +/*!
> + * \enum CUvideopacketflags
> + * Data packet flags
> + */
> +typedef enum {
> +    CUVID_PKT_ENDOFSTREAM   = 0x01,   /**< Set when this is the last packet for this stream  */
> +    CUVID_PKT_TIMESTAMP     = 0x02,   /**< Timestamp is valid  */
> +    CUVID_PKT_DISCONTINUITY = 0x04    /**< Set when a discontinuity has to be signalled  */
> +} CUvideopacketflags;
> +
> +/*!
> + * \struct CUVIDSOURCEDATAPACKET
> + * Data Packet
> + */
> +typedef struct _CUVIDSOURCEDATAPACKET
> +{
> +    unsigned long flags;            /**< Combination of CUVID_PKT_XXX flags */
> +    unsigned long payload_size;     /**< number of bytes in the payload (may be zero if EOS flag is set) */
> +    const unsigned char *payload;   /**< Pointer to packet payload data (may be NULL if EOS flag is set) */
> +    CUvideotimestamp timestamp;     /**< Presentation timestamp (10MHz clock), only valid if CUVID_PKT_TIMESTAMP flag is set */
> +} CUVIDSOURCEDATAPACKET;
> +
> +// Callback for packet delivery
> +typedef int (CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *);
> +
> +/*!
> + * \struct CUVIDSOURCEPARAMS
> + * Source Params
> + */
> +typedef struct _CUVIDSOURCEPARAMS
> +{
> +    unsigned int ulClockRate;                   /**< Timestamp units in Hz (0=default=10000000Hz)  */
> +    unsigned int uReserved1[7];                 /**< Reserved for future use - set to zero  */
> +    void *pUserData;                            /**< Parameter passed in to the data handlers  */
> +    PFNVIDSOURCECALLBACK pfnVideoDataHandler;   /**< Called to deliver audio packets  */
> +    PFNVIDSOURCECALLBACK pfnAudioDataHandler;   /**< Called to deliver video packets  */
> +    void *pvReserved2[8];                       /**< Reserved for future use - set to NULL */
> +} CUVIDSOURCEPARAMS;
> +
> +/*!
> + * \enum CUvideosourceformat_flags
> + * CUvideosourceformat_flags
> + */
> +typedef enum {
> +    CUVID_FMT_EXTFORMATINFO = 0x100             /**< Return extended format structure (CUVIDEOFORMATEX) */
> +} CUvideosourceformat_flags;
> +
> +#if !defined(__APPLE__)
> +/**
> + * \fn CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams)
> + * Create Video Source
> + */
> +typedef CUresult CUDAAPI tcuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams)
> + * Create Video Source
> + */
> +typedef CUresult CUDAAPI tcuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj)
> + * Destroy Video Source
> + */
> +typedef CUresult CUDAAPI tcuvidDestroyVideoSource(CUvideosource obj);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state)
> + * Set Video Source state
> + */
> +typedef CUresult CUDAAPI tcuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state);
> +
> +/**
> + * \fn cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj)
> + * Get Video Source state
> + */
> +typedef cudaVideoState CUDAAPI tcuvidGetVideoSourceState(CUvideosource obj);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags)
> + * Get Video Source Format
> + */
> +typedef CUresult CUDAAPI tcuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags)
> + * Set Video Source state
> + */
> +typedef CUresult CUDAAPI tcuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags);
> +
> +#endif
> +
> +/**
> + * \struct CUVIDPARSERDISPINFO
> + */
> +typedef struct _CUVIDPARSERDISPINFO
> +{
> +    int picture_index;         /**<                 */
> +    int progressive_frame;     /**<                 */
> +    int top_field_first;       /**<                 */
> +    int repeat_first_field;    /**< Number of additional fields (1=ivtc, 2=frame doubling, 4=frame tripling, -1=unpaired field)  */
> +    CUvideotimestamp timestamp; /**<     */
> +} CUVIDPARSERDISPINFO;
> +
> +//
> +// Parser callbacks
> +// The parser will call these synchronously from within cuvidParseVideoData(), whenever a picture is ready to
> +// be decoded and/or displayed.
> +//
> +typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *);
> +typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *);
> +typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *);
> +
> +/**
> + * \struct CUVIDPARSERPARAMS
> + */
> +typedef struct _CUVIDPARSERPARAMS
> +{
> +    cudaVideoCodec CodecType;               /**< cudaVideoCodec_XXX  */
> +    unsigned int ulMaxNumDecodeSurfaces;    /**< Max # of decode surfaces (parser will cycle through these) */
> +    unsigned int ulClockRate;               /**< Timestamp units in Hz (0=default=10000000Hz) */
> +    unsigned int ulErrorThreshold;          /**< % Error threshold (0-100) for calling pfnDecodePicture (100=always call pfnDecodePicture even if picture bitstream is fully corrupted) */
> +    unsigned int ulMaxDisplayDelay;         /**< Max display queue delay (improves pipelining of decode with display) - 0=no delay (recommended values: 2..4) */
> +    unsigned int uReserved1[5];             /**< Reserved for future use - set to 0 */
> +    void *pUserData;                        /**< User data for callbacks */
> +    PFNVIDSEQUENCECALLBACK pfnSequenceCallback; /**< Called before decoding frames and/or whenever there is a format change */
> +    PFNVIDDECODECALLBACK pfnDecodePicture;      /**< Called when a picture is ready to be decoded (decode order) */
> +    PFNVIDDISPLAYCALLBACK pfnDisplayPicture;    /**< Called whenever a picture is ready to be displayed (display order)  */
> +    void *pvReserved2[7];                       /**< Reserved for future use - set to NULL */
> +    CUVIDEOFORMATEX *pExtVideoInfo;             /**< [Optional] sequence header data from system layer */
> +} CUVIDPARSERPARAMS;
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams)
> + */
> +typedef CUresult CUDAAPI tcuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket)
> + */
> +typedef CUresult CUDAAPI tcuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj)
> + */
> +typedef CUresult CUDAAPI tcuvidDestroyVideoParser(CUvideoparser obj);
> +
> +/** @} */  /* END VIDEO_PARSER */
> +////////////////////////////////////////////////////////////////////////////////////////////////
> +
> +#if defined(__cplusplus)
> +}
> +#endif /* __cplusplus */
> +
> +#endif // __NVCUVID_H__
> +
> +
> --
> 2.10.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Sven C. Dack Oct. 19, 2016, 1:32 p.m. UTC | #2

On 19/10/16 13:50, Hendrik Leppkes wrote:
> On Wed, Oct 19, 2016 at 2:00 PM, Timo Rothenpieler
> <timo@rothenpieler.org> wrote:
>> ---
>>   compat/cuda/dynlink_cuda.h     |  88 +++++
>>   compat/cuda/dynlink_cuviddec.h | 808 +++++++++++++++++++++++++++++++++++++++++
>>   compat/cuda/dynlink_loader.h   | 254 +++++++++++++
>>   compat/cuda/dynlink_nvcuvid.h  | 316 ++++++++++++++++
>>   4 files changed, 1466 insertions(+)
>>   create mode 100644 compat/cuda/dynlink_cuda.h
>>   create mode 100644 compat/cuda/dynlink_cuviddec.h
>>   create mode 100644 compat/cuda/dynlink_loader.h
>>   create mode 100644 compat/cuda/dynlink_nvcuvid.h
>>
>> diff --git a/compat/cuda/dynlink_cuda.h b/compat/cuda/dynlink_cuda.h
>> new file mode 100644
>> index 0000000..908f12d
>> --- /dev/null
>> +++ b/compat/cuda/dynlink_cuda.h
>> @@ -0,0 +1,88 @@
>> +/*
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
> So did you write this without once looking at the NVIDIA header file?
> Because if you did even read it, then you can't realistically claim
> this is LGPL.

Nvidia's copyright notice seem pretty clear to me. It grants permission to 
sublicense it without limitation and without restriction. So which part exactly 
in Nvidia's copyright notice do you have a problem with?

PS: Try not to reply with the entire patch, but cut it off. It's not fun to 
scroll through the entire reply just to find it contains 2-3 lines of comment.

>> +/*
>> + * This copyright notice applies to this header file only:
>> + *
>> + * Copyright (c) 2010-2016 NVIDIA Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person
>> + * obtaining a copy of this software and associated documentation
>> + * files (the "Software"), to deal in the Software without
>> + * restriction, including without limitation the rights to use,
>> + * copy, modify, merge, publish, distribute, sublicense, and/or sell
>> + * copies of the software, and to permit persons to whom the
>> + * software is furnished to do so, subject to the following
>> + * conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be
>> + * included in all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
>> + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
>> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
>> + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
>> + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + */
>> +
>> +/**
>> + * \file cuviddec.h
>> + * NvCuvid API provides Video Decoding interface to NVIDIA GPU devices.
>> + * \date 2015-2016
>> + * This file contains constants, structure definitions and function prototypes used for decoding.
>> + */

Timo Rothenpieler Oct. 19, 2016, 2:15 p.m. UTC | #3

>> +/*
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
> 
> So did you write this without once looking at the NVIDIA header file?
> Because if you did even read it, then you can't realistically claim
> this is LGPL.
> 

This file is entirely based on what was already in ffmpeg, or what was
available publicly via google search results.

I'm not sure if I ever opened and looked at cuda.h, but I definitely
avoided doing so when writing this header.

The only thing that was not obvious from already existing code in ffmpeg
was the CUDA_MEMCPY2D struct, which is conveniently documented by just
entering the struct name to Google:

https://www.cs.cmu.edu/afs/cs/academic/class/15668-s11/www/cuda-doc/html/group__CUDA__MEM_g27f885b30c34cc20a663a671dbf6fc27.html

Hendrik Leppkes Oct. 19, 2016, 2:25 p.m. UTC | #4

On Wed, Oct 19, 2016 at 3:32 PM, Sven C. Dack <sven.c.dack@sky.com> wrote:
> On 19/10/16 13:50, Hendrik Leppkes wrote:
>>
>> On Wed, Oct 19, 2016 at 2:00 PM, Timo Rothenpieler
>> <timo@rothenpieler.org> wrote:
>>>
>>> ---
>>>   compat/cuda/dynlink_cuda.h     |  88 +++++
>>>   compat/cuda/dynlink_cuviddec.h | 808
>>> +++++++++++++++++++++++++++++++++++++++++
>>>   compat/cuda/dynlink_loader.h   | 254 +++++++++++++
>>>   compat/cuda/dynlink_nvcuvid.h  | 316 ++++++++++++++++
>>>   4 files changed, 1466 insertions(+)
>>>   create mode 100644 compat/cuda/dynlink_cuda.h
>>>   create mode 100644 compat/cuda/dynlink_cuviddec.h
>>>   create mode 100644 compat/cuda/dynlink_loader.h
>>>   create mode 100644 compat/cuda/dynlink_nvcuvid.h
>>>
>>> diff --git a/compat/cuda/dynlink_cuda.h b/compat/cuda/dynlink_cuda.h
>>> new file mode 100644
>>> index 0000000..908f12d
>>> --- /dev/null
>>> +++ b/compat/cuda/dynlink_cuda.h
>>> @@ -0,0 +1,88 @@
>>> +/*
>>> + * This file is part of FFmpeg.
>>> + *
>>> + * FFmpeg is free software; you can redistribute it and/or
>>> + * modify it under the terms of the GNU Lesser General Public
>>> + * License as published by the Free Software Foundation; either
>>> + * version 2.1 of the License, or (at your option) any later version.
>>> + *
>>> + * FFmpeg is distributed in the hope that it will be useful,
>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> + * Lesser General Public License for more details.
>>> + *
>>> + * You should have received a copy of the GNU Lesser General Public
>>> + * License along with FFmpeg; if not, write to the Free Software
>>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>>> 02110-1301 USA
>>> + */
>>
>> So did you write this without once looking at the NVIDIA header file?
>> Because if you did even read it, then you can't realistically claim
>> this is LGPL.
>
>
> Nvidia's copyright notice seem pretty clear to me. It grants permission to
> sublicense it without limitation and without restriction. So which part
> exactly in Nvidia's copyright notice do you have a problem with?
>

You seem to be referring to the license of the cuvid files, which is
not the license actually used by the CUDA headers. The CUDA headers
are under a full proprietary license with no such permission.

- Hendrik

Sven C. Dack Oct. 19, 2016, 4:07 p.m. UTC | #5

On 19/10/16 15:25, Hendrik Leppkes wrote:
> You seem to be referring to the license of the cuvid files, which is
> not the license actually used by the CUDA headers. The CUDA headers
> are under a full proprietary license with no such permission.

Are you saying the use of CUDA header files (with #include) during compilation 
makes ffmpeg itself non-free software?

Hendrik Leppkes Oct. 19, 2016, 4:18 p.m. UTC | #6

On Wed, Oct 19, 2016 at 6:07 PM, Sven C. Dack <sven.c.dack@sky.com> wrote:
> On 19/10/16 15:25, Hendrik Leppkes wrote:
>>
>> You seem to be referring to the license of the cuvid files, which is
>> not the license actually used by the CUDA headers. The CUDA headers
>> are under a full proprietary license with no such permission.
>
>
> Are you saying the use of CUDA header files (with #include) during
> compilation makes ffmpeg itself non-free software?
>

Thats the general interpretation of the license situation. If you
include non-free headers, your binary becomes non-free, hence why
building with cuda currently requires the --enable-nonfree option.

- Hendrik

Sven C. Dack Oct. 19, 2016, 4:52 p.m. UTC | #7

On 19/10/16 17:18, Hendrik Leppkes wrote:
> Thats the general interpretation of the license situation. If you
> include non-free headers, your binary becomes non-free, hence why
> building with cuda currently requires the --enable-nonfree option.
No. This is a generalization and cannot make sense. At best does it ignore the 
individual licenses and their particular terms and discards them for a convenience.

 From what I can tell does only one condition apply here, which is regarding the 
use of the header files. There is however no reproduction, disclosure, 
distribution or modification of these headers happening here, which is what is 
prohibited by Nvidia. The note about its use is then meant to free Nvidia itself 
from any warranty and liability claims and all they are asking is for users of 
their libraries and header files to include this note into their code. I find it 
absurd to claim this would make ffmpeg non-free software. Not to acknowledge 
Nvidia's wish for freedom is the same as trying to deny Nvidia their freedom. Or 
you could just absurdly claim that a license such as the GPL, requiring to be 
included at any time, would in itself be a limitation of your freedom and thus 
void the license and agreement. The last thing I want is for anyone to start 
wearing tin foil hats.

I believe a more sensible and reasonable view of the situation is needed here.

Andreas Cadhalpun Oct. 19, 2016, 5:56 p.m. UTC | #8

On 19.10.2016 18:52, Sven C. Dack wrote:
> On 19/10/16 17:18, Hendrik Leppkes wrote:
>> Thats the general interpretation of the license situation. If you
>> include non-free headers, your binary becomes non-free, hence why
>> building with cuda currently requires the --enable-nonfree option.
> No. This is a generalization and cannot make sense. At best does it
> ignore the individual licenses and their particular terms and
> discards them for a convenience.
> 
> From what I can tell does only one condition apply here, which is
> regarding the use of the header files. There is however no
> reproduction, disclosure, distribution or modification of these
> headers happening here, which is what is prohibited by Nvidia.

I think you are missing the main problem here: FFmpeg is licensed
under the LGPL 2.1, which states [1]:
"  4. You may copy and distribute the Library [...] in object code
or executable form [...] provided that you accompany it with the
complete corresponding machine-readable source code"

If during compilation the cuda.h header is used, it is part of the
complete source code and thus the license requires it to be
distributed together with the object code. However, you say that
Nvidia prohibits re-distribution of this header and as a result
the compiled ffmpeg binaries cannot legally be distributed.

This is why it requires --enable-nonfree.

Best regards,
Andreas

1: https://www.gnu.org/licenses/lgpl-2.1.html

Sven C. Dack Oct. 19, 2016, 6:46 p.m. UTC | #9

On 19/10/16 18:56, Andreas Cadhalpun wrote:
> I think you are missing the main problem here: FFmpeg is licensed
> under the LGPL 2.1, which states [1]:
> "  4. You may copy and distribute the Library [...] in object code
> or executable form [...] provided that you accompany it with the
> complete corresponding machine-readable source code"
>
> If during compilation the cuda.h header is used, it is part of the
> complete source code and thus the license requires it to be
> distributed together with the object code. However, you say that
> Nvidia prohibits re-distribution of this header and as a result
> the compiled ffmpeg binaries cannot legally be distributed.
>
> This is why it requires --enable-nonfree.
>
> Best regards,
> Andreas
>
No. This is exactly what I meant with wearing tin foil hats. Just because a 
compiler includes information provided by header files into the compilation 
process does this not imply a transfer of ownership or copyright of this 
information. The header files do not become a part of the source code. You may 
find the ownership and copyright only no longer being easily distinguishable 
once it becomes a binary. The seperation is however still present and there is 
no magical transfer of ownership happening here. Even if there was, who is to 
say ffmpeg isn't becoming part of the header files and thus is now being owned 
by Nvidia?!? For this reason can such an interpretation of the GPL not make good 
sense. It's just absurd.

How is this with compiling under Windows or just using Intel's or Microsoft's 
compiler? Does this make ffmpeg non-free or does ffmpeg there claiming ownership 
over Intel's or Microsoft's header files?

Anyhow, I don't rally want to get too deep into this topic as this is just about 
configure options and I'm already using the --non-free option. I am merely 
curious about this interpretation of the GPL. I find the interpretation to be 
selfish and it's just asking to be challenged.

Sven

Andreas Cadhalpun Oct. 19, 2016, 7:23 p.m. UTC | #10

On 19.10.2016 20:46, Sven C. Dack wrote:
> No. This is exactly what I meant with wearing tin foil hats.

Insults won't help you.

> Just because a compiler includes information provided by header
> files into the compilation process does this not imply a transfer
> of ownership or copyright of this information. The header files
> do not become a part of the source code. You may find the ownership
> and copyright only no longer being easily distinguishable once it
> becomes a binary. The seperation is however still present and there
> is no magical transfer of ownership happening here. Even if there
> was, who is to say ffmpeg isn't becoming part of the header files
> and thus is now being owned by Nvidia?!?

I wrote nothing about ownership.

> For this reason can such an interpretation of the GPL not make
> good sense. It's just absurd.

Have you actually read the license?

It clearly defines what complete source code means:
"For a library, complete source code means all the source code for
all modules it contains, plus any associated interface definition
files, plus the scripts used to control compilation and installation
of the library."

The FAQ explains this as [1]:
"For a typical C program, this translates into all the source code
(.c files) plus header files (.h files) plus the scripts used to control
compilation and installation."

> How is this with compiling under Windows or just using Intel's or
> Microsoft's compiler? Does this make ffmpeg non-free or does ffmpeg
> there claiming ownership over Intel's or Microsoft's header files?

Read the FAQ: "What about the compiler, the toolchain?"

Best regards,
Andreas


1: http://gpl-violations.org/faq/sourcecode-faq/

Sven C. Dack Oct. 19, 2016, 7:41 p.m. UTC | #11

On 19/10/16 20:23, Andreas Cadhalpun wrote:
> Read the FAQ: "What about the compiler, the toolchain?"
>
> Best regards,
> Andreas
>
>
> 1: http://gpl-violations.org/faq/sourcecode-faq/

You are misinterpreting it. The FAQ explicitly excludes external components such 
as compilers, kernels and libraries.

Anyhow, I don't want to challenge the interpretation. If this is what you want 
then I'm happy with the options as they are. I'm not bothered. :)

Andreas Cadhalpun Oct. 19, 2016, 9:02 p.m. UTC | #12

On 19.10.2016 21:41, Sven C. Dack wrote:
> You are misinterpreting it. The FAQ explicitly excludes external
> components such as compilers, kernels and libraries.

What is or is not a system component is quite debatable.
However, it always depends on the specific operating system under
consideration.
For example, my operating system doesn't (normally) come with the
cuda header.
However, a binary built with the header available could run on it.
This is problematic, because I couldn't rebuild the binary on the
system from the distributed source code.

> Anyhow, I don't want to challenge the interpretation. If this
> is what you want then I'm happy with the options as they are.
> I'm not bothered. :)

OK. :)

Best regards,
Andreas

Philip Langdale Oct. 20, 2016, 12:27 a.m. UTC | #13

On Wed, 19 Oct 2016 14:00:29 +0200
Timo Rothenpieler <timo@rothenpieler.org> wrote:

> ---
>  compat/cuda/dynlink_cuda.h     |  88 +++++
>  compat/cuda/dynlink_cuviddec.h | 808
> +++++++++++++++++++++++++++++++++++++++++
> compat/cuda/dynlink_loader.h   | 254 +++++++++++++
> compat/cuda/dynlink_nvcuvid.h  | 316 ++++++++++++++++ 4 files
> changed, 1466 insertions(+) create mode 100644
> compat/cuda/dynlink_cuda.h create mode 100644
> compat/cuda/dynlink_cuviddec.h create mode 100644
> compat/cuda/dynlink_loader.h create mode 100644
> compat/cuda/dynlink_nvcuvid.h

I've been testing this patch test for Timo, and it's good by me.

Ship it.

--phil

Timo Rothenpieler Nov. 20, 2016, 7:04 p.m. UTC | #14

ping

Will push in 2 days if nobody objects.

diff mbox

Patch

diff --git a/compat/cuda/dynlink_cuda.h b/compat/cuda/dynlink_cuda.h
new file mode 100644
index 0000000..908f12d
--- /dev/null
+++ b/compat/cuda/dynlink_cuda.h
@@ -0,0 +1,88 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#if !defined(AV_COMPAT_DYNLINK_CUDA_H) && !defined(CUDA_VERSION)
+#define AV_COMPAT_DYNLINK_CUDA_H
+
+#include <stddef.h>
+
+#define CUDA_VERSION 7050
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#define CUDAAPI __stdcall
+#else
+#define CUDAAPI
+#endif
+
+#define CU_CTX_SCHED_BLOCKING_SYNC 4
+
+typedef int CUdevice;
+typedef void* CUarray;
+typedef void* CUcontext;
+#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
+typedef unsigned long long CUdeviceptr;
+#else
+typedef unsigned int CUdeviceptr;
+#endif
+
+typedef enum cudaError_enum {
+    CUDA_SUCCESS = 0
+} CUresult;
+
+typedef enum CUmemorytype_enum {
+    CU_MEMORYTYPE_HOST = 1,
+    CU_MEMORYTYPE_DEVICE = 2
+} CUmemorytype;
+
+typedef struct CUDA_MEMCPY2D_st {
+    size_t srcXInBytes;
+    size_t srcY;
+    CUmemorytype srcMemoryType;
+    const void *srcHost;
+    CUdeviceptr srcDevice;
+    CUarray srcArray;
+    size_t srcPitch;
+
+    size_t dstXInBytes;
+    size_t dstY;
+    CUmemorytype dstMemoryType;
+    void *dstHost;
+    CUdeviceptr dstDevice;
+    CUarray dstArray;
+    size_t dstPitch;
+
+    size_t WidthInBytes;
+    size_t Height;
+} CUDA_MEMCPY2D;
+
+typedef CUresult CUDAAPI tcuInit(unsigned int Flags);
+typedef CUresult CUDAAPI tcuDeviceGetCount(int *count);
+typedef CUresult CUDAAPI tcuDeviceGet(CUdevice *device, int ordinal);
+typedef CUresult CUDAAPI tcuDeviceGetName(char *name, int len, CUdevice dev);
+typedef CUresult CUDAAPI tcuDeviceComputeCapability(int *major, int *minor, CUdevice dev);
+typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev);
+typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext *pctx);
+typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext *pctx);
+typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx);
+typedef CUresult CUDAAPI tcuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize);
+typedef CUresult CUDAAPI tcuMemFree_v2(CUdeviceptr dptr);
+typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D *pcopy);
+typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pstr);
+typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char** pstr);
+
+#endif
diff --git a/compat/cuda/dynlink_cuviddec.h b/compat/cuda/dynlink_cuviddec.h
new file mode 100644
index 0000000..17207bc
--- /dev/null
+++ b/compat/cuda/dynlink_cuviddec.h
@@ -0,0 +1,808 @@ 
+/*
+ * This copyright notice applies to this header file only:
+ *
+ * Copyright (c) 2010-2016 NVIDIA Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the software, and to permit persons to whom the
+ * software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file cuviddec.h
+ * NvCuvid API provides Video Decoding interface to NVIDIA GPU devices.
+ * \date 2015-2016
+ * This file contains constants, structure definitions and function prototypes used for decoding.
+ */
+
+#if !defined(__CUDA_VIDEO_H__)
+#define __CUDA_VIDEO_H__
+
+#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
+#if (CUDA_VERSION >= 3020) && (!defined(CUDA_FORCE_API_VERSION) || (CUDA_FORCE_API_VERSION >= 3020))
+#define __CUVID_DEVPTR64
+#endif
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* __cplusplus */
+
+typedef void *CUvideodecoder;
+typedef struct _CUcontextlock_st *CUvideoctxlock;
+
+/**
+ * \addtogroup VIDEO_DECODER Video Decoder
+ * @{
+ */
+
+/*!
+ * \enum cudaVideoCodec
+ * Video Codec Enums
+ */
+typedef enum cudaVideoCodec_enum {
+    cudaVideoCodec_MPEG1=0,                 /**<  MPEG1   */
+    cudaVideoCodec_MPEG2,                   /**<  MPEG2  */
+    cudaVideoCodec_MPEG4,                   /**<  MPEG4   */
+    cudaVideoCodec_VC1,                     /**<  VC1   */
+    cudaVideoCodec_H264,                    /**<  H264   */
+    cudaVideoCodec_JPEG,                    /**<  JPEG   */
+    cudaVideoCodec_H264_SVC,                /**<  H264-SVC   */
+    cudaVideoCodec_H264_MVC,                /**<  H264-MVC   */
+    cudaVideoCodec_HEVC,                    /**<  HEVC   */
+    cudaVideoCodec_VP8,                     /**<  VP8   */
+    cudaVideoCodec_VP9,                     /**<  VP9   */
+    cudaVideoCodec_NumCodecs,               /**<  Max COdecs   */
+    // Uncompressed YUV
+    cudaVideoCodec_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')),   /**< Y,U,V (4:2:0)  */
+    cudaVideoCodec_YV12   = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')),   /**< Y,V,U (4:2:0)  */
+    cudaVideoCodec_NV12   = (('N'<<24)|('V'<<16)|('1'<<8)|('2')),   /**< Y,UV  (4:2:0)  */
+    cudaVideoCodec_YUYV   = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')),   /**< YUYV/YUY2 (4:2:2)  */
+    cudaVideoCodec_UYVY   = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y'))    /**< UYVY (4:2:2)  */
+} cudaVideoCodec;
+
+/*!
+ * \enum cudaVideoSurfaceFormat
+ * Video Surface Formats Enums
+ */
+typedef enum cudaVideoSurfaceFormat_enum {
+    cudaVideoSurfaceFormat_NV12=0       /**< NV12 (currently the only supported output format)  */
+} cudaVideoSurfaceFormat;
+
+/*!
+ * \enum cudaVideoDeinterlaceMode
+ * Deinterlacing Modes Enums
+ */
+typedef enum cudaVideoDeinterlaceMode_enum {
+    cudaVideoDeinterlaceMode_Weave=0,   /**< Weave both fields (no deinterlacing) */
+    cudaVideoDeinterlaceMode_Bob,       /**< Drop one field  */
+    cudaVideoDeinterlaceMode_Adaptive   /**< Adaptive deinterlacing  */
+} cudaVideoDeinterlaceMode;
+
+/*!
+ * \enum cudaVideoChromaFormat
+ * Chroma Formats Enums
+ */
+typedef enum cudaVideoChromaFormat_enum {
+    cudaVideoChromaFormat_Monochrome=0,  /**< MonoChrome */
+    cudaVideoChromaFormat_420,           /**< 4:2:0 */
+    cudaVideoChromaFormat_422,           /**< 4:2:2 */
+    cudaVideoChromaFormat_444            /**< 4:4:4 */
+} cudaVideoChromaFormat;
+
+/*!
+ * \enum cudaVideoCreateFlags
+ * Decoder Flags Enums
+ */
+typedef enum cudaVideoCreateFlags_enum {
+    cudaVideoCreate_Default = 0x00,     /**< Default operation mode: use dedicated video engines */
+    cudaVideoCreate_PreferCUDA = 0x01,  /**< Use a CUDA-based decoder if faster than dedicated engines (requires a valid vidLock object for multi-threading) */
+    cudaVideoCreate_PreferDXVA = 0x02,  /**< Go through DXVA internally if possible (requires D3D9 interop) */
+    cudaVideoCreate_PreferCUVID = 0x04  /**< Use dedicated video engines directly */
+} cudaVideoCreateFlags;
+
+/*!
+ * \struct CUVIDDECODECREATEINFO
+ * Struct used in create decoder
+ */
+typedef struct _CUVIDDECODECREATEINFO
+{
+    unsigned long ulWidth;              /**< Coded Sequence Width */
+    unsigned long ulHeight;             /**< Coded Sequence Height */
+    unsigned long ulNumDecodeSurfaces;  /**< Maximum number of internal decode surfaces */
+    cudaVideoCodec CodecType;           /**< cudaVideoCodec_XXX */
+    cudaVideoChromaFormat ChromaFormat; /**< cudaVideoChromaFormat_XXX (only 4:2:0 is currently supported) */
+    unsigned long ulCreationFlags;      /**< Decoder creation flags (cudaVideoCreateFlags_XXX) */
+    unsigned long bitDepthMinus8;
+    unsigned long Reserved1[4];         /**< Reserved for future use - set to zero */
+    /**
+    * area of the frame that should be displayed
+    */
+    struct {
+        short left;
+        short top;
+        short right;
+        short bottom;
+    } display_area;
+
+    cudaVideoSurfaceFormat OutputFormat;       /**< cudaVideoSurfaceFormat_XXX */
+    cudaVideoDeinterlaceMode DeinterlaceMode;  /**< cudaVideoDeinterlaceMode_XXX */
+    unsigned long ulTargetWidth;               /**< Post-processed Output Width (Should be aligned to 2) */
+    unsigned long ulTargetHeight;              /**< Post-processed Output Height (Should be aligbed to 2) */
+    unsigned long ulNumOutputSurfaces;         /**< Maximum number of output surfaces simultaneously mapped */
+    CUvideoctxlock vidLock;                    /**< If non-NULL, context lock used for synchronizing ownership of the cuda context */
+    /**
+    * target rectangle in the output frame (for aspect ratio conversion)
+    * if a null rectangle is specified, {0,0,ulTargetWidth,ulTargetHeight} will be used
+    */
+    struct {
+        short left;
+        short top;
+        short right;
+        short bottom;
+    } target_rect;
+    unsigned long Reserved2[5];                /**< Reserved for future use - set to zero */
+} CUVIDDECODECREATEINFO;
+
+/*!
+ * \struct CUVIDH264DPBENTRY
+ * H.264 DPB Entry
+ */
+typedef struct _CUVIDH264DPBENTRY
+{
+    int PicIdx;                 /**< picture index of reference frame */
+    int FrameIdx;               /**< frame_num(short-term) or LongTermFrameIdx(long-term) */
+    int is_long_term;           /**< 0=short term reference, 1=long term reference */
+    int not_existing;           /**< non-existing reference frame (corresponding PicIdx should be set to -1) */
+    int used_for_reference;     /**< 0=unused, 1=top_field, 2=bottom_field, 3=both_fields */
+    int FieldOrderCnt[2];       /**< field order count of top and bottom fields */
+} CUVIDH264DPBENTRY;
+
+/*!
+ * \struct CUVIDH264MVCEXT
+ * H.264 MVC Picture Parameters Ext
+ */
+typedef struct _CUVIDH264MVCEXT
+{
+    int num_views_minus1;
+    int view_id;
+    unsigned char inter_view_flag;
+    unsigned char num_inter_view_refs_l0;
+    unsigned char num_inter_view_refs_l1;
+    unsigned char MVCReserved8Bits;
+    int InterViewRefsL0[16];
+    int InterViewRefsL1[16];
+} CUVIDH264MVCEXT;
+
+/*!
+ * \struct CUVIDH264SVCEXT
+ * H.264 SVC Picture Parameters Ext
+ */
+typedef struct _CUVIDH264SVCEXT
+{
+    unsigned char profile_idc;
+    unsigned char level_idc;
+    unsigned char DQId;
+    unsigned char DQIdMax;
+    unsigned char disable_inter_layer_deblocking_filter_idc;
+    unsigned char ref_layer_chroma_phase_y_plus1;
+    signed char   inter_layer_slice_alpha_c0_offset_div2;
+    signed char   inter_layer_slice_beta_offset_div2;
+
+    unsigned short DPBEntryValidFlag;
+    unsigned char inter_layer_deblocking_filter_control_present_flag;
+    unsigned char extended_spatial_scalability_idc;
+    unsigned char adaptive_tcoeff_level_prediction_flag;
+    unsigned char slice_header_restriction_flag;
+    unsigned char chroma_phase_x_plus1_flag;
+    unsigned char chroma_phase_y_plus1;
+
+    unsigned char tcoeff_level_prediction_flag;
+    unsigned char constrained_intra_resampling_flag;
+    unsigned char ref_layer_chroma_phase_x_plus1_flag;
+    unsigned char store_ref_base_pic_flag;
+    unsigned char Reserved8BitsA;
+    unsigned char Reserved8BitsB;
+    // For the 4 scaled_ref_layer_XX fields below,
+    // if (extended_spatial_scalability_idc == 1), SPS field, G.7.3.2.1.4, add prefix "seq_"
+    // if (extended_spatial_scalability_idc == 2), SLH field, G.7.3.3.4,
+    short scaled_ref_layer_left_offset;
+    short scaled_ref_layer_top_offset;
+    short scaled_ref_layer_right_offset;
+    short scaled_ref_layer_bottom_offset;
+    unsigned short Reserved16Bits;
+    struct _CUVIDPICPARAMS *pNextLayer; /**< Points to the picparams for the next layer to be decoded. Linked list ends at the target layer. */
+    int bRefBaseLayer;                  /**< whether to store ref base pic */
+} CUVIDH264SVCEXT;
+
+/*!
+ * \struct CUVIDH264PICPARAMS
+ * H.264 Picture Parameters
+ */
+typedef struct _CUVIDH264PICPARAMS
+{
+    // SPS
+    int log2_max_frame_num_minus4;
+    int pic_order_cnt_type;
+    int log2_max_pic_order_cnt_lsb_minus4;
+    int delta_pic_order_always_zero_flag;
+    int frame_mbs_only_flag;
+    int direct_8x8_inference_flag;
+    int num_ref_frames;             // NOTE: shall meet level 4.1 restrictions
+    unsigned char residual_colour_transform_flag;
+    unsigned char bit_depth_luma_minus8;    // Must be 0 (only 8-bit supported)
+    unsigned char bit_depth_chroma_minus8;  // Must be 0 (only 8-bit supported)
+    unsigned char qpprime_y_zero_transform_bypass_flag;
+    // PPS
+    int entropy_coding_mode_flag;
+    int pic_order_present_flag;
+    int num_ref_idx_l0_active_minus1;
+    int num_ref_idx_l1_active_minus1;
+    int weighted_pred_flag;
+    int weighted_bipred_idc;
+    int pic_init_qp_minus26;
+    int deblocking_filter_control_present_flag;
+    int redundant_pic_cnt_present_flag;
+    int transform_8x8_mode_flag;
+    int MbaffFrameFlag;
+    int constrained_intra_pred_flag;
+    int chroma_qp_index_offset;
+    int second_chroma_qp_index_offset;
+    int ref_pic_flag;
+    int frame_num;
+    int CurrFieldOrderCnt[2];
+    // DPB
+    CUVIDH264DPBENTRY dpb[16];          // List of reference frames within the DPB
+    // Quantization Matrices (raster-order)
+    unsigned char WeightScale4x4[6][16];
+    unsigned char WeightScale8x8[2][64];
+    // FMO/ASO
+    unsigned char fmo_aso_enable;
+    unsigned char num_slice_groups_minus1;
+    unsigned char slice_group_map_type;
+    signed char pic_init_qs_minus26;
+    unsigned int slice_group_change_rate_minus1;
+    union
+    {
+        unsigned long long slice_group_map_addr;
+        const unsigned char *pMb2SliceGroupMap;
+    } fmo;
+    unsigned int  Reserved[12];
+    // SVC/MVC
+    union
+    {
+        CUVIDH264MVCEXT mvcext;
+        CUVIDH264SVCEXT svcext;
+    } svcmvc;
+} CUVIDH264PICPARAMS;
+
+
+/*!
+ * \struct CUVIDMPEG2PICPARAMS
+ * MPEG-2 Picture Parameters
+ */
+typedef struct _CUVIDMPEG2PICPARAMS
+{
+    int ForwardRefIdx;          // Picture index of forward reference (P/B-frames)
+    int BackwardRefIdx;         // Picture index of backward reference (B-frames)
+    int picture_coding_type;
+    int full_pel_forward_vector;
+    int full_pel_backward_vector;
+    int f_code[2][2];
+    int intra_dc_precision;
+    int frame_pred_frame_dct;
+    int concealment_motion_vectors;
+    int q_scale_type;
+    int intra_vlc_format;
+    int alternate_scan;
+    int top_field_first;
+    // Quantization matrices (raster order)
+    unsigned char QuantMatrixIntra[64];
+    unsigned char QuantMatrixInter[64];
+} CUVIDMPEG2PICPARAMS;
+
+////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// MPEG-4 Picture Parameters
+//
+
+// MPEG-4 has VOP types instead of Picture types
+#define I_VOP 0
+#define P_VOP 1
+#define B_VOP 2
+#define S_VOP 3
+
+/*!
+ * \struct CUVIDMPEG4PICPARAMS
+ * MPEG-4 Picture Parameters
+ */
+typedef struct _CUVIDMPEG4PICPARAMS
+{
+    int ForwardRefIdx;          // Picture index of forward reference (P/B-frames)
+    int BackwardRefIdx;         // Picture index of backward reference (B-frames)
+    // VOL
+    int video_object_layer_width;
+    int video_object_layer_height;
+    int vop_time_increment_bitcount;
+    int top_field_first;
+    int resync_marker_disable;
+    int quant_type;
+    int quarter_sample;
+    int short_video_header;
+    int divx_flags;
+    // VOP
+    int vop_coding_type;
+    int vop_coded;
+    int vop_rounding_type;
+    int alternate_vertical_scan_flag;
+    int interlaced;
+    int vop_fcode_forward;
+    int vop_fcode_backward;
+    int trd[2];
+    int trb[2];
+    // Quantization matrices (raster order)
+    unsigned char QuantMatrixIntra[64];
+    unsigned char QuantMatrixInter[64];
+    int gmc_enabled;
+} CUVIDMPEG4PICPARAMS;
+
+/*!
+ * \struct CUVIDVC1PICPARAMS
+ * VC1 Picture Parameters
+ */
+typedef struct _CUVIDVC1PICPARAMS
+{
+    int ForwardRefIdx;      /**< Picture index of forward reference (P/B-frames) */
+    int BackwardRefIdx;     /**< Picture index of backward reference (B-frames) */
+    int FrameWidth;         /**< Actual frame width */
+    int FrameHeight;        /**< Actual frame height */
+    // PICTURE
+    int intra_pic_flag;     /**< Set to 1 for I,BI frames */
+    int ref_pic_flag;       /**< Set to 1 for I,P frames */
+    int progressive_fcm;    /**< Progressive frame */
+    // SEQUENCE
+    int profile;
+    int postprocflag;
+    int pulldown;
+    int interlace;
+    int tfcntrflag;
+    int finterpflag;
+    int psf;
+    int multires;
+    int syncmarker;
+    int rangered;
+    int maxbframes;
+    // ENTRYPOINT
+    int panscan_flag;
+    int refdist_flag;
+    int extended_mv;
+    int dquant;
+    int vstransform;
+    int loopfilter;
+    int fastuvmc;
+    int overlap;
+    int quantizer;
+    int extended_dmv;
+    int range_mapy_flag;
+    int range_mapy;
+    int range_mapuv_flag;
+    int range_mapuv;
+    int rangeredfrm;    // range reduction state
+} CUVIDVC1PICPARAMS;
+
+/*!
+ * \struct CUVIDJPEGPICPARAMS
+ * JPEG Picture Parameters
+ */
+typedef struct _CUVIDJPEGPICPARAMS
+{
+    int Reserved;
+} CUVIDJPEGPICPARAMS;
+
+
+ /*!
+ * \struct CUVIDHEVCPICPARAMS
+ * HEVC Picture Parameters
+ */
+typedef struct _CUVIDHEVCPICPARAMS
+{
+    // sps
+    int pic_width_in_luma_samples;
+    int pic_height_in_luma_samples;
+    unsigned char log2_min_luma_coding_block_size_minus3;
+    unsigned char log2_diff_max_min_luma_coding_block_size;
+    unsigned char log2_min_transform_block_size_minus2;
+    unsigned char log2_diff_max_min_transform_block_size;
+    unsigned char pcm_enabled_flag;
+    unsigned char log2_min_pcm_luma_coding_block_size_minus3;
+    unsigned char log2_diff_max_min_pcm_luma_coding_block_size;
+    unsigned char pcm_sample_bit_depth_luma_minus1;
+
+    unsigned char pcm_sample_bit_depth_chroma_minus1;
+    unsigned char pcm_loop_filter_disabled_flag;
+    unsigned char strong_intra_smoothing_enabled_flag;
+    unsigned char max_transform_hierarchy_depth_intra;
+    unsigned char max_transform_hierarchy_depth_inter;
+    unsigned char amp_enabled_flag;
+    unsigned char separate_colour_plane_flag;
+    unsigned char log2_max_pic_order_cnt_lsb_minus4;
+
+    unsigned char num_short_term_ref_pic_sets;
+    unsigned char long_term_ref_pics_present_flag;
+    unsigned char num_long_term_ref_pics_sps;
+    unsigned char sps_temporal_mvp_enabled_flag;
+    unsigned char sample_adaptive_offset_enabled_flag;
+    unsigned char scaling_list_enable_flag;
+    unsigned char IrapPicFlag;
+    unsigned char IdrPicFlag;
+
+    unsigned char bit_depth_luma_minus8;
+    unsigned char bit_depth_chroma_minus8;
+    unsigned char reserved1[14];
+
+    // pps
+    unsigned char dependent_slice_segments_enabled_flag;
+    unsigned char slice_segment_header_extension_present_flag;
+    unsigned char sign_data_hiding_enabled_flag;
+    unsigned char cu_qp_delta_enabled_flag;
+    unsigned char diff_cu_qp_delta_depth;
+    signed char init_qp_minus26;
+    signed char pps_cb_qp_offset;
+    signed char pps_cr_qp_offset;
+
+    unsigned char constrained_intra_pred_flag;
+    unsigned char weighted_pred_flag;
+    unsigned char weighted_bipred_flag;
+    unsigned char transform_skip_enabled_flag;
+    unsigned char transquant_bypass_enabled_flag;
+    unsigned char entropy_coding_sync_enabled_flag;
+    unsigned char log2_parallel_merge_level_minus2;
+    unsigned char num_extra_slice_header_bits;
+
+    unsigned char loop_filter_across_tiles_enabled_flag;
+    unsigned char loop_filter_across_slices_enabled_flag;
+    unsigned char output_flag_present_flag;
+    unsigned char num_ref_idx_l0_default_active_minus1;
+    unsigned char num_ref_idx_l1_default_active_minus1;
+    unsigned char lists_modification_present_flag;
+    unsigned char cabac_init_present_flag;
+    unsigned char pps_slice_chroma_qp_offsets_present_flag;
+
+    unsigned char deblocking_filter_override_enabled_flag;
+    unsigned char pps_deblocking_filter_disabled_flag;
+    signed char pps_beta_offset_div2;
+    signed char pps_tc_offset_div2;
+    unsigned char tiles_enabled_flag;
+    unsigned char uniform_spacing_flag;
+    unsigned char num_tile_columns_minus1;
+    unsigned char num_tile_rows_minus1;
+
+    unsigned short column_width_minus1[21];
+    unsigned short row_height_minus1[21];
+    unsigned int reserved3[15];
+
+    // RefPicSets
+    int NumBitsForShortTermRPSInSlice;
+    int NumDeltaPocsOfRefRpsIdx;
+    int NumPocTotalCurr;
+    int NumPocStCurrBefore;
+    int NumPocStCurrAfter;
+    int NumPocLtCurr;
+    int CurrPicOrderCntVal;
+    int RefPicIdx[16];                  // [refpic] Indices of valid reference pictures (-1 if unused for reference)
+    int PicOrderCntVal[16];             // [refpic]
+    unsigned char IsLongTerm[16];       // [refpic] 0=not a long-term reference, 1=long-term reference
+    unsigned char RefPicSetStCurrBefore[8]; // [0..NumPocStCurrBefore-1] -> refpic (0..15)
+    unsigned char RefPicSetStCurrAfter[8];  // [0..NumPocStCurrAfter-1] -> refpic (0..15)
+    unsigned char RefPicSetLtCurr[8];       // [0..NumPocLtCurr-1] -> refpic (0..15)
+    unsigned char RefPicSetInterLayer0[8];
+    unsigned char RefPicSetInterLayer1[8];
+    unsigned int reserved4[12];
+
+    // scaling lists (diag order)
+    unsigned char ScalingList4x4[6][16];       // [matrixId][i]
+    unsigned char ScalingList8x8[6][64];       // [matrixId][i]
+    unsigned char ScalingList16x16[6][64];     // [matrixId][i]
+    unsigned char ScalingList32x32[2][64];     // [matrixId][i]
+    unsigned char ScalingListDCCoeff16x16[6];  // [matrixId]
+    unsigned char ScalingListDCCoeff32x32[2];  // [matrixId]
+} CUVIDHEVCPICPARAMS;
+
+
+/*!
+ * \struct CUVIDVP8PICPARAMS
+ * VP8 Picture Parameters
+ */
+typedef struct _CUVIDVP8PICPARAMS
+{
+    int width;
+    int height;
+    unsigned int first_partition_size;
+    //Frame Indexes
+    unsigned char LastRefIdx;
+    unsigned char GoldenRefIdx;
+    unsigned char AltRefIdx;
+    union {
+        struct {
+            unsigned char frame_type : 1;    /**< 0 = KEYFRAME, 1 = INTERFRAME  */
+            unsigned char version : 3;
+            unsigned char show_frame : 1;
+            unsigned char update_mb_segmentation_data : 1;    /**< Must be 0 if segmentation is not enabled */
+            unsigned char Reserved2Bits : 2;
+        };
+        unsigned char wFrameTagFlags;
+    } tagflags;
+    unsigned char Reserved1[4];
+    unsigned int  Reserved2[3];
+} CUVIDVP8PICPARAMS;
+
+/*!
+ * \struct CUVIDVP9PICPARAMS
+ * VP9 Picture Parameters
+ */
+typedef struct _CUVIDVP9PICPARAMS
+{
+    unsigned int width;
+    unsigned int height;
+
+    //Frame Indices
+    unsigned char LastRefIdx;
+    unsigned char GoldenRefIdx;
+    unsigned char AltRefIdx;
+    unsigned char colorSpace;
+
+    unsigned short profile : 3;
+    unsigned short frameContextIdx : 2;
+    unsigned short frameType : 1;
+    unsigned short showFrame : 1;
+    unsigned short errorResilient : 1;
+    unsigned short frameParallelDecoding : 1;
+    unsigned short subSamplingX : 1;
+    unsigned short subSamplingY : 1;
+    unsigned short intraOnly : 1;
+    unsigned short allow_high_precision_mv : 1;
+    unsigned short refreshEntropyProbs : 1;
+    unsigned short reserved2Bits : 2;
+
+    unsigned short reserved16Bits;
+
+    unsigned char  refFrameSignBias[4];
+
+    unsigned char bitDepthMinus8Luma;
+    unsigned char bitDepthMinus8Chroma;
+    unsigned char loopFilterLevel;
+    unsigned char loopFilterSharpness;
+
+    unsigned char modeRefLfEnabled;
+    unsigned char log2_tile_columns;
+    unsigned char log2_tile_rows;
+
+    unsigned char segmentEnabled : 1;
+    unsigned char segmentMapUpdate : 1;
+    unsigned char segmentMapTemporalUpdate : 1;
+    unsigned char segmentFeatureMode : 1;
+    unsigned char reserved4Bits : 4;
+
+
+    unsigned char segmentFeatureEnable[8][4];
+    short segmentFeatureData[8][4];
+    unsigned char mb_segment_tree_probs[7];
+    unsigned char segment_pred_probs[3];
+    unsigned char reservedSegment16Bits[2];
+
+    int qpYAc;
+    int qpYDc;
+    int qpChDc;
+    int qpChAc;
+
+    unsigned int activeRefIdx[3];
+    unsigned int resetFrameContext;
+    unsigned int mcomp_filter_type;
+    unsigned int mbRefLfDelta[4];
+    unsigned int mbModeLfDelta[2];
+    unsigned int frameTagSize;
+    unsigned int offsetToDctParts;
+    unsigned int reserved128Bits[4];
+
+} CUVIDVP9PICPARAMS;
+
+
+/*!
+ * \struct CUVIDPICPARAMS
+ * Picture Parameters for Decoding
+ */
+typedef struct _CUVIDPICPARAMS
+{
+    int PicWidthInMbs;                    /**< Coded Frame Size */
+    int FrameHeightInMbs;                 /**< Coded Frame Height */
+    int CurrPicIdx;                       /**< Output index of the current picture */
+    int field_pic_flag;                   /**< 0=frame picture, 1=field picture */
+    int bottom_field_flag;                /**< 0=top field, 1=bottom field (ignored if field_pic_flag=0) */
+    int second_field;                     /**< Second field of a complementary field pair */
+    // Bitstream data
+    unsigned int nBitstreamDataLen;        /**< Number of bytes in bitstream data buffer */
+    const unsigned char *pBitstreamData;   /**< Ptr to bitstream data for this picture (slice-layer) */
+    unsigned int nNumSlices;               /**< Number of slices in this picture */
+    const unsigned int *pSliceDataOffsets; /**< nNumSlices entries, contains offset of each slice within the bitstream data buffer */
+    int ref_pic_flag;                      /**< This picture is a reference picture */
+    int intra_pic_flag;                    /**< This picture is entirely intra coded */
+    unsigned int Reserved[30];             /**< Reserved for future use */
+    // Codec-specific data
+    union {
+        CUVIDMPEG2PICPARAMS mpeg2;         /**< Also used for MPEG-1 */
+        CUVIDH264PICPARAMS h264;
+        CUVIDVC1PICPARAMS vc1;
+        CUVIDMPEG4PICPARAMS mpeg4;
+        CUVIDJPEGPICPARAMS jpeg;
+        CUVIDHEVCPICPARAMS hevc;
+        CUVIDVP8PICPARAMS vp8;
+        CUVIDVP9PICPARAMS vp9;
+        unsigned int CodecReserved[1024];
+    } CodecSpecific;
+} CUVIDPICPARAMS;
+
+
+/*!
+ * \struct CUVIDPROCPARAMS
+ * Picture Parameters for Postprocessing
+ */
+typedef struct _CUVIDPROCPARAMS
+{
+    int progressive_frame;  /**< Input is progressive (deinterlace_mode will be ignored)  */
+    int second_field;       /**< Output the second field (ignored if deinterlace mode is Weave) */
+    int top_field_first;    /**< Input frame is top field first (1st field is top, 2nd field is bottom) */
+    int unpaired_field;     /**< Input only contains one field (2nd field is invalid) */
+    // The fields below are used for raw YUV input
+    unsigned int reserved_flags;        /**< Reserved for future use (set to zero) */
+    unsigned int reserved_zero;         /**< Reserved (set to zero) */
+    unsigned long long raw_input_dptr;  /**< Input CUdeviceptr for raw YUV extensions */
+    unsigned int raw_input_pitch;       /**< pitch in bytes of raw YUV input (should be aligned appropriately) */
+    unsigned int raw_input_format;      /**< Reserved for future use (set to zero) */
+    unsigned long long raw_output_dptr; /**< Reserved for future use (set to zero) */
+    unsigned int raw_output_pitch;      /**< Reserved for future use (set to zero) */
+    unsigned int Reserved[48];
+    void *Reserved3[3];
+} CUVIDPROCPARAMS;
+
+
+/**
+ *
+ * In order to minimize decode latencies, there should be always at least 2 pictures in the decode
+ * queue at any time, in order to make sure that all decode engines are always busy.
+ *
+ * Overall data flow:
+ *  - cuvidCreateDecoder(...)
+ *  For each picture:
+ *  - cuvidDecodePicture(N)
+ *  - cuvidMapVideoFrame(N-4)
+ *  - do some processing in cuda
+ *  - cuvidUnmapVideoFrame(N-4)
+ *  - cuvidDecodePicture(N+1)
+ *  - cuvidMapVideoFrame(N-3)
+ *    ...
+ *  - cuvidDestroyDecoder(...)
+ *
+ * NOTE:
+ * - When the cuda context is created from a D3D device, the D3D device must also be created
+ *   with the D3DCREATE_MULTITHREADED flag.
+ * - There is a limit to how many pictures can be mapped simultaneously (ulNumOutputSurfaces)
+ * - cuVidDecodePicture may block the calling thread if there are too many pictures pending
+ *   in the decode queue
+ */
+
+/**
+ * \fn CUresult CUDAAPI cuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci)
+ * Create the decoder object
+ */
+typedef CUresult CUDAAPI tcuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci);
+
+/**
+ * \fn CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder hDecoder)
+ * Destroy the decoder object
+ */
+typedef CUresult CUDAAPI tcuvidDestroyDecoder(CUvideodecoder hDecoder);
+
+/**
+ * \fn CUresult CUDAAPI cuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams)
+ * Decode a single picture (field or frame)
+ */
+typedef CUresult CUDAAPI tcuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams);
+
+
+#if !defined(__CUVID_DEVPTR64) || defined(__CUVID_INTERNAL)
+/**
+ * \fn CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx, unsigned int *pDevPtr, unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
+ * Post-process and map a video frame for use in cuda
+ */
+typedef CUresult CUDAAPI tcuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx,
+                                             unsigned int *pDevPtr, unsigned int *pPitch,
+                                             CUVIDPROCPARAMS *pVPP);
+
+/**
+ * \fn CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr)
+ * Unmap a previously mapped video frame
+ */
+typedef CUresult CUDAAPI tcuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr);
+#endif
+
+#if defined(WIN64) || defined(_WIN64) || defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
+/**
+ * \fn CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr, unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
+ * map a video frame
+ */
+typedef CUresult CUDAAPI tcuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr,
+                                               unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
+
+/**
+ * \fn CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr);
+ * Unmap a previously mapped video frame
+ */
+typedef CUresult CUDAAPI tcuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr);
+
+#if defined(__CUVID_DEVPTR64) && !defined(__CUVID_INTERNAL)
+#define tcuvidMapVideoFrame      tcuvidMapVideoFrame64
+#define tcuvidUnmapVideoFrame    tcuvidUnmapVideoFrame64
+#endif
+#endif
+
+
+/**
+ *
+ * Context-locking: to facilitate multi-threaded implementations, the following 4 functions
+ * provide a simple mutex-style host synchronization. If a non-NULL context is specified
+ * in CUVIDDECODECREATEINFO, the codec library will acquire the mutex associated with the given
+ * context before making any cuda calls.
+ * A multi-threaded application could create a lock associated with a context handle so that
+ * multiple threads can safely share the same cuda context:
+ *  - use cuCtxPopCurrent immediately after context creation in order to create a 'floating' context
+ *    that can be passed to cuvidCtxLockCreate.
+ *  - When using a floating context, all cuda calls should only be made within a cuvidCtxLock/cuvidCtxUnlock section.
+ *
+ * NOTE: This is a safer alternative to cuCtxPushCurrent and cuCtxPopCurrent, and is not related to video
+ * decoder in any way (implemented as a critical section associated with cuCtx{Push|Pop}Current calls).
+*/
+
+/**
+ * \fn CUresult CUDAAPI cuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx)
+ */
+typedef CUresult CUDAAPI tcuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx);
+
+/**
+ * \fn CUresult CUDAAPI cuvidCtxLockDestroy(CUvideoctxlock lck)
+ */
+typedef CUresult CUDAAPI tcuvidCtxLockDestroy(CUvideoctxlock lck);
+
+/**
+ * \fn CUresult CUDAAPI cuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags)
+ */
+typedef CUresult CUDAAPI tcuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags);
+
+/**
+ * \fn CUresult CUDAAPI cuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags)
+ */
+typedef CUresult CUDAAPI tcuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags);
+
+/** @} */  /* End VIDEO_DECODER */
+
+#if defined(__cplusplus)
+}
+#endif /* __cplusplus */
+
+#endif // __CUDA_VIDEO_H__
diff --git a/compat/cuda/dynlink_loader.h b/compat/cuda/dynlink_loader.h
new file mode 100644
index 0000000..6275664
--- /dev/null
+++ b/compat/cuda/dynlink_loader.h
@@ -0,0 +1,254 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AV_COMPAT_CUDA_DYNLINK_LOADER_H
+#define AV_COMPAT_CUDA_DYNLINK_LOADER_H
+
+#include "compat/cuda/dynlink_cuda.h"
+#include "compat/cuda/dynlink_nvcuvid.h"
+#include "compat/nvenc/nvEncodeAPI.h"
+
+#include "libavutil/log.h"
+#include "libavutil/error.h"
+
+#if defined(_WIN32)
+# include <windows.h>
+# define dlopen(filename, flags) LoadLibrary(TEXT(filename))
+# define dlsym(handle, symbol) GetProcAddress(handle, symbol)
+# define dlclose(handle) FreeLibrary(handle)
+# define LIB_HANDLE HMODULE
+#else
+# include <dlfcn.h>
+# define LIB_HANDLE void*
+#endif
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+# define CUDA_LIBNAME "nvcuda.dll"
+# define NVCUVID_LIBNAME "nvcuvid.dll"
+# if ARCH_X86_64
+#  define NVENC_LIBNAME "nvEncodeAPI64.dll"
+# else
+#  define NVENC_LIBNAME "nvEncodeAPI.dll"
+# endif
+#else
+# define CUDA_LIBNAME "libcuda.so.1"
+# define NVCUVID_LIBNAME "libnvcuvid.so.1"
+# define NVENC_LIBNAME "libnvidia-encode.so.1"
+#endif
+
+#define LOAD_LIBRARY(l, path)                                     \
+    do {                                                          \
+        if (!((l) = dlopen(path, RTLD_LAZY))) {                   \
+            av_log(NULL, AV_LOG_ERROR, "Cannot load %s\n", path); \
+            ret = AVERROR_UNKNOWN;                                \
+            goto error;                                           \
+        }                                                         \
+        av_log(NULL, AV_LOG_TRACE, "Loaded lib: %s\n", path);     \
+    } while (0)
+
+#define LOAD_SYMBOL(fun, symbol)                                    \
+    do {                                                            \
+        if (!((f->fun) = dlsym(f->lib, symbol))) {                  \
+            av_log(NULL, AV_LOG_ERROR, "Cannot load %s\n", symbol); \
+            ret = AVERROR_UNKNOWN;                                  \
+            goto error;                                             \
+        }                                                           \
+        av_log(NULL, AV_LOG_TRACE, "Loaded sym: %s\n", symbol);     \
+    } while (0)
+
+#define GENERIC_LOAD_FUNC_PREAMBLE(T, n, N)  \
+    T *f;                                    \
+    int ret;                                 \
+                                             \
+    n##_free_functions(functions);           \
+                                             \
+    f = *functions = av_mallocz(sizeof(*f)); \
+    if (!f)                                  \
+        return AVERROR(ENOMEM);              \
+                                             \
+    LOAD_LIBRARY(f->lib, N);
+
+#define GENERIC_LOAD_FUNC_FINALE(n) \
+    return 0;                       \
+error:                              \
+    n##_free_functions(functions);  \
+    return ret;
+
+#define GENERIC_FREE_FUNC()              \
+    if (!functions)                      \
+        return;                          \
+    if (*functions && (*functions)->lib) \
+        dlclose((*functions)->lib);      \
+    av_freep(functions);
+
+#ifdef AV_COMPAT_DYNLINK_CUDA_H
+typedef struct CudaFunctions {
+    tcuInit *cuInit;
+    tcuDeviceGetCount *cuDeviceGetCount;
+    tcuDeviceGet *cuDeviceGet;
+    tcuDeviceGetName *cuDeviceGetName;
+    tcuDeviceComputeCapability *cuDeviceComputeCapability;
+    tcuCtxCreate_v2 *cuCtxCreate;
+    tcuCtxPushCurrent_v2 *cuCtxPushCurrent;
+    tcuCtxPopCurrent_v2 *cuCtxPopCurrent;
+    tcuCtxDestroy_v2 *cuCtxDestroy;
+    tcuMemAlloc_v2 *cuMemAlloc;
+    tcuMemFree_v2 *cuMemFree;
+    tcuMemcpy2D_v2 *cuMemcpy2D;
+    tcuGetErrorName *cuGetErrorName;
+    tcuGetErrorString *cuGetErrorString;
+
+    LIB_HANDLE lib;
+} CudaFunctions;
+#else
+typedef struct CudaFunctions CudaFunctions;
+#endif
+
+typedef struct CuvidFunctions {
+    tcuvidCreateDecoder *cuvidCreateDecoder;
+    tcuvidDestroyDecoder *cuvidDestroyDecoder;
+    tcuvidDecodePicture *cuvidDecodePicture;
+    tcuvidMapVideoFrame *cuvidMapVideoFrame;
+    tcuvidUnmapVideoFrame *cuvidUnmapVideoFrame;
+    tcuvidCtxLockCreate *cuvidCtxLockCreate;
+    tcuvidCtxLockDestroy *cuvidCtxLockDestroy;
+    tcuvidCtxLock *cuvidCtxLock;
+    tcuvidCtxUnlock *cuvidCtxUnlock;
+
+    tcuvidCreateVideoSource *cuvidCreateVideoSource;
+    tcuvidCreateVideoSourceW *cuvidCreateVideoSourceW;
+    tcuvidDestroyVideoSource *cuvidDestroyVideoSource;
+    tcuvidSetVideoSourceState *cuvidSetVideoSourceState;
+    tcuvidGetVideoSourceState *cuvidGetVideoSourceState;
+    tcuvidGetSourceVideoFormat *cuvidGetSourceVideoFormat;
+    tcuvidGetSourceAudioFormat *cuvidGetSourceAudioFormat;
+    tcuvidCreateVideoParser *cuvidCreateVideoParser;
+    tcuvidParseVideoData *cuvidParseVideoData;
+    tcuvidDestroyVideoParser *cuvidDestroyVideoParser;
+
+    LIB_HANDLE lib;
+} CuvidFunctions;
+
+typedef struct NvencFunctions {
+    NVENCSTATUS (NVENCAPI *NvEncodeAPICreateInstance)(NV_ENCODE_API_FUNCTION_LIST *functionList);
+    NVENCSTATUS (NVENCAPI *NvEncodeAPIGetMaxSupportedVersion)(uint32_t* version);
+
+    LIB_HANDLE lib;
+} NvencFunctions;
+
+#ifdef AV_COMPAT_DYNLINK_CUDA_H
+static inline void cuda_free_functions(CudaFunctions **functions)
+{
+    GENERIC_FREE_FUNC();
+}
+#endif
+
+static inline void cuvid_free_functions(CuvidFunctions **functions)
+{
+    GENERIC_FREE_FUNC();
+}
+
+static inline void nvenc_free_functions(NvencFunctions **functions)
+{
+    GENERIC_FREE_FUNC();
+}
+
+#ifdef AV_COMPAT_DYNLINK_CUDA_H
+static inline int cuda_load_functions(CudaFunctions **functions)
+{
+    GENERIC_LOAD_FUNC_PREAMBLE(CudaFunctions, cuda, CUDA_LIBNAME);
+
+    LOAD_SYMBOL(cuInit, "cuInit");
+    LOAD_SYMBOL(cuDeviceGetCount, "cuDeviceGetCount");
+    LOAD_SYMBOL(cuDeviceGet, "cuDeviceGet");
+    LOAD_SYMBOL(cuDeviceGetName, "cuDeviceGetName");
+    LOAD_SYMBOL(cuDeviceComputeCapability, "cuDeviceComputeCapability");
+    LOAD_SYMBOL(cuCtxCreate, "cuCtxCreate_v2");
+    LOAD_SYMBOL(cuCtxPushCurrent, "cuCtxPushCurrent_v2");
+    LOAD_SYMBOL(cuCtxPopCurrent, "cuCtxPopCurrent_v2");
+    LOAD_SYMBOL(cuCtxDestroy, "cuCtxDestroy_v2");
+    LOAD_SYMBOL(cuMemAlloc, "cuMemAlloc_v2");
+    LOAD_SYMBOL(cuMemFree, "cuMemFree_v2");
+    LOAD_SYMBOL(cuMemcpy2D, "cuMemcpy2D_v2");
+    LOAD_SYMBOL(cuGetErrorName, "cuGetErrorName");
+    LOAD_SYMBOL(cuGetErrorString, "cuGetErrorString");
+
+    GENERIC_LOAD_FUNC_FINALE(cuda);
+}
+#endif
+
+static inline int cuvid_load_functions(CuvidFunctions **functions)
+{
+    GENERIC_LOAD_FUNC_PREAMBLE(CuvidFunctions, cuvid, NVCUVID_LIBNAME);
+
+    LOAD_SYMBOL(cuvidCreateDecoder, "cuvidCreateDecoder");
+    LOAD_SYMBOL(cuvidDestroyDecoder, "cuvidDestroyDecoder");
+    LOAD_SYMBOL(cuvidDecodePicture, "cuvidDecodePicture");
+#ifdef __CUVID_DEVPTR64
+    LOAD_SYMBOL(cuvidMapVideoFrame, "cuvidMapVideoFrame64");
+    LOAD_SYMBOL(cuvidUnmapVideoFrame, "cuvidUnmapVideoFrame64");
+#else
+    LOAD_SYMBOL(cuvidMapVideoFrame, "cuvidMapVideoFrame");
+    LOAD_SYMBOL(cuvidUnmapVideoFrame, "cuvidUnmapVideoFrame");
+#endif
+    LOAD_SYMBOL(cuvidCtxLockCreate, "cuvidCtxLockCreate");
+    LOAD_SYMBOL(cuvidCtxLockDestroy, "cuvidCtxLockDestroy");
+    LOAD_SYMBOL(cuvidCtxLock, "cuvidCtxLock");
+    LOAD_SYMBOL(cuvidCtxUnlock, "cuvidCtxUnlock");
+
+    LOAD_SYMBOL(cuvidCreateVideoSource, "cuvidCreateVideoSource");
+    LOAD_SYMBOL(cuvidCreateVideoSourceW, "cuvidCreateVideoSourceW");
+    LOAD_SYMBOL(cuvidDestroyVideoSource, "cuvidDestroyVideoSource");
+    LOAD_SYMBOL(cuvidSetVideoSourceState, "cuvidSetVideoSourceState");
+    LOAD_SYMBOL(cuvidGetVideoSourceState, "cuvidGetVideoSourceState");
+    LOAD_SYMBOL(cuvidGetSourceVideoFormat, "cuvidGetSourceVideoFormat");
+    LOAD_SYMBOL(cuvidGetSourceAudioFormat, "cuvidGetSourceAudioFormat");
+    LOAD_SYMBOL(cuvidCreateVideoParser, "cuvidCreateVideoParser");
+    LOAD_SYMBOL(cuvidParseVideoData, "cuvidParseVideoData");
+    LOAD_SYMBOL(cuvidDestroyVideoParser, "cuvidDestroyVideoParser");
+
+    GENERIC_LOAD_FUNC_FINALE(cuvid);
+}
+
+static inline int nvenc_load_functions(NvencFunctions **functions)
+{
+    GENERIC_LOAD_FUNC_PREAMBLE(NvencFunctions, nvenc, NVENC_LIBNAME);
+
+    LOAD_SYMBOL(NvEncodeAPICreateInstance, "NvEncodeAPICreateInstance");
+    LOAD_SYMBOL(NvEncodeAPIGetMaxSupportedVersion, "NvEncodeAPIGetMaxSupportedVersion");
+
+    GENERIC_LOAD_FUNC_FINALE(nvenc);
+}
+
+#undef GENERIC_LOAD_FUNC_PREAMBLE
+#undef LOAD_LIBRARY
+#undef LOAD_SYMBOL
+#undef GENERIC_LOAD_FUNC_FINALE
+#undef GENERIC_FREE_FUNC
+#undef CUDA_LIBNAME
+#undef NVCUVID_LIBNAME
+#undef NVENC_LIBNAME
+#undef LIB_HANDLE
+
+#if defined(_WIN32)
+#undef dlopen
+#undef dlsym
+#undef dlclose
+#endif
+
+#endif
\ No newline at end of file
diff --git a/compat/cuda/dynlink_nvcuvid.h b/compat/cuda/dynlink_nvcuvid.h
new file mode 100644
index 0000000..6c197e0
--- /dev/null
+++ b/compat/cuda/dynlink_nvcuvid.h
@@ -0,0 +1,316 @@ 
+/*
+ * This copyright notice applies to this header file only:
+ *
+ * Copyright (c) 2010-2016 NVIDIA Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the software, and to permit persons to whom the
+ * software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file nvcuvid.h
+ *   NvCuvid API provides Video Decoding interface to NVIDIA GPU devices.
+ * \date 2015-2015
+ *  This file contains the interface constants, structure definitions and function prototypes.
+ */
+
+#if !defined(__NVCUVID_H__)
+#define __NVCUVID_H__
+
+#include "compat/cuda/dynlink_cuviddec.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* __cplusplus */
+
+////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// High-level helper APIs for video sources
+//
+
+typedef void *CUvideosource;
+typedef void *CUvideoparser;
+typedef long long CUvideotimestamp;
+
+/**
+ * \addtogroup VIDEO_PARSER Video Parser
+ * @{
+ */
+
+/*!
+ * \enum cudaVideoState
+ * Video Source State
+ */
+typedef enum {
+    cudaVideoState_Error   = -1,    /**< Error state (invalid source)  */
+    cudaVideoState_Stopped = 0,     /**< Source is stopped (or reached end-of-stream)  */
+    cudaVideoState_Started = 1      /**< Source is running and delivering data  */
+} cudaVideoState;
+
+/*!
+ * \enum cudaAudioCodec
+ * Audio compression
+ */
+typedef enum {
+    cudaAudioCodec_MPEG1=0,         /**< MPEG-1 Audio  */
+    cudaAudioCodec_MPEG2,           /**< MPEG-2 Audio  */
+    cudaAudioCodec_MP3,             /**< MPEG-1 Layer III Audio  */
+    cudaAudioCodec_AC3,             /**< Dolby Digital (AC3) Audio  */
+    cudaAudioCodec_LPCM             /**< PCM Audio  */
+} cudaAudioCodec;
+
+/*!
+ * \struct CUVIDEOFORMAT
+ * Video format
+ */
+typedef struct
+{
+    cudaVideoCodec codec;                   /**< Compression format  */
+   /**
+    * frame rate = numerator / denominator (for example: 30000/1001)
+    */
+    struct {
+        unsigned int numerator;             /**< frame rate numerator   (0 = unspecified or variable frame rate) */
+        unsigned int denominator;           /**< frame rate denominator (0 = unspecified or variable frame rate) */
+    } frame_rate;
+    unsigned char progressive_sequence;     /**< 0=interlaced, 1=progressive */
+    unsigned char bit_depth_luma_minus8;    /**< high bit depth Luma */
+    unsigned char bit_depth_chroma_minus8;  /**< high bit depth Chroma */
+    unsigned char reserved1;                /**< Reserved for future use */
+    unsigned int coded_width;               /**< coded frame width */
+    unsigned int coded_height;              /**< coded frame height  */
+   /**
+    *   area of the frame that should be displayed
+    * typical example:
+    *   coded_width = 1920, coded_height = 1088
+    *   display_area = { 0,0,1920,1080 }
+    */
+    struct {
+        int left;                           /**< left position of display rect  */
+        int top;                            /**< top position of display rect  */
+        int right;                          /**< right position of display rect  */
+        int bottom;                         /**< bottom position of display rect  */
+    } display_area;
+    cudaVideoChromaFormat chroma_format;    /**<  Chroma format */
+    unsigned int bitrate;                   /**< video bitrate (bps, 0=unknown) */
+   /**
+    * Display Aspect Ratio = x:y (4:3, 16:9, etc)
+    */
+    struct {
+        int x;
+        int y;
+    } display_aspect_ratio;
+    /**
+    * Video Signal Description
+    */
+    struct {
+        unsigned char video_format          : 3;
+        unsigned char video_full_range_flag : 1;
+        unsigned char reserved_zero_bits    : 4;
+        unsigned char color_primaries;
+        unsigned char transfer_characteristics;
+        unsigned char matrix_coefficients;
+    } video_signal_description;
+    unsigned int seqhdr_data_length;          /**< Additional bytes following (CUVIDEOFORMATEX)  */
+} CUVIDEOFORMAT;
+
+/*!
+ * \struct CUVIDEOFORMATEX
+ * Video format including raw sequence header information
+ */
+typedef struct
+{
+    CUVIDEOFORMAT format;
+    unsigned char raw_seqhdr_data[1024];
+} CUVIDEOFORMATEX;
+
+/*!
+ * \struct CUAUDIOFORMAT
+ * Audio Formats
+ */
+typedef struct
+{
+    cudaAudioCodec codec;       /**< Compression format  */
+    unsigned int channels;      /**< number of audio channels */
+    unsigned int samplespersec; /**< sampling frequency */
+    unsigned int bitrate;       /**< For uncompressed, can also be used to determine bits per sample */
+    unsigned int reserved1;     /**< Reserved for future use */
+    unsigned int reserved2;     /**< Reserved for future use */
+} CUAUDIOFORMAT;
+
+
+/*!
+ * \enum CUvideopacketflags
+ * Data packet flags
+ */
+typedef enum {
+    CUVID_PKT_ENDOFSTREAM   = 0x01,   /**< Set when this is the last packet for this stream  */
+    CUVID_PKT_TIMESTAMP     = 0x02,   /**< Timestamp is valid  */
+    CUVID_PKT_DISCONTINUITY = 0x04    /**< Set when a discontinuity has to be signalled  */
+} CUvideopacketflags;
+
+/*!
+ * \struct CUVIDSOURCEDATAPACKET
+ * Data Packet
+ */
+typedef struct _CUVIDSOURCEDATAPACKET
+{
+    unsigned long flags;            /**< Combination of CUVID_PKT_XXX flags */
+    unsigned long payload_size;     /**< number of bytes in the payload (may be zero if EOS flag is set) */
+    const unsigned char *payload;   /**< Pointer to packet payload data (may be NULL if EOS flag is set) */
+    CUvideotimestamp timestamp;     /**< Presentation timestamp (10MHz clock), only valid if CUVID_PKT_TIMESTAMP flag is set */
+} CUVIDSOURCEDATAPACKET;
+
+// Callback for packet delivery
+typedef int (CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *);
+
+/*!
+ * \struct CUVIDSOURCEPARAMS
+ * Source Params
+ */
+typedef struct _CUVIDSOURCEPARAMS
+{
+    unsigned int ulClockRate;                   /**< Timestamp units in Hz (0=default=10000000Hz)  */
+    unsigned int uReserved1[7];                 /**< Reserved for future use - set to zero  */
+    void *pUserData;                            /**< Parameter passed in to the data handlers  */
+    PFNVIDSOURCECALLBACK pfnVideoDataHandler;   /**< Called to deliver audio packets  */
+    PFNVIDSOURCECALLBACK pfnAudioDataHandler;   /**< Called to deliver video packets  */
+    void *pvReserved2[8];                       /**< Reserved for future use - set to NULL */
+} CUVIDSOURCEPARAMS;
+
+/*!
+ * \enum CUvideosourceformat_flags
+ * CUvideosourceformat_flags
+ */
+typedef enum {
+    CUVID_FMT_EXTFORMATINFO = 0x100             /**< Return extended format structure (CUVIDEOFORMATEX) */
+} CUvideosourceformat_flags;
+
+#if !defined(__APPLE__)
+/**
+ * \fn CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams)
+ * Create Video Source
+ */
+typedef CUresult CUDAAPI tcuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams);
+
+/**
+ * \fn CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams)
+ * Create Video Source
+ */
+typedef CUresult CUDAAPI tcuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams);
+
+/**
+ * \fn CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj)
+ * Destroy Video Source
+ */
+typedef CUresult CUDAAPI tcuvidDestroyVideoSource(CUvideosource obj);
+
+/**
+ * \fn CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state)
+ * Set Video Source state
+ */
+typedef CUresult CUDAAPI tcuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state);
+
+/**
+ * \fn cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj)
+ * Get Video Source state
+ */
+typedef cudaVideoState CUDAAPI tcuvidGetVideoSourceState(CUvideosource obj);
+
+/**
+ * \fn CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags)
+ * Get Video Source Format
+ */
+typedef CUresult CUDAAPI tcuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags);
+
+/**
+ * \fn CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags)
+ * Set Video Source state
+ */
+typedef CUresult CUDAAPI tcuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags);
+
+#endif
+
+/**
+ * \struct CUVIDPARSERDISPINFO
+ */
+typedef struct _CUVIDPARSERDISPINFO
+{
+    int picture_index;         /**<                 */
+    int progressive_frame;     /**<                 */
+    int top_field_first;       /**<                 */
+    int repeat_first_field;    /**< Number of additional fields (1=ivtc, 2=frame doubling, 4=frame tripling, -1=unpaired field)  */
+    CUvideotimestamp timestamp; /**<     */
+} CUVIDPARSERDISPINFO;
+
+//
+// Parser callbacks
+// The parser will call these synchronously from within cuvidParseVideoData(), whenever a picture is ready to
+// be decoded and/or displayed.
+//
+typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *);
+typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *);
+typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *);
+
+/**
+ * \struct CUVIDPARSERPARAMS
+ */
+typedef struct _CUVIDPARSERPARAMS
+{
+    cudaVideoCodec CodecType;               /**< cudaVideoCodec_XXX  */
+    unsigned int ulMaxNumDecodeSurfaces;    /**< Max # of decode surfaces (parser will cycle through these) */
+    unsigned int ulClockRate;               /**< Timestamp units in Hz (0=default=10000000Hz) */
+    unsigned int ulErrorThreshold;          /**< % Error threshold (0-100) for calling pfnDecodePicture (100=always call pfnDecodePicture even if picture bitstream is fully corrupted) */
+    unsigned int ulMaxDisplayDelay;         /**< Max display queue delay (improves pipelining of decode with display) - 0=no delay (recommended values: 2..4) */
+    unsigned int uReserved1[5];             /**< Reserved for future use - set to 0 */
+    void *pUserData;                        /**< User data for callbacks */
+    PFNVIDSEQUENCECALLBACK pfnSequenceCallback; /**< Called before decoding frames and/or whenever there is a format change */
+    PFNVIDDECODECALLBACK pfnDecodePicture;      /**< Called when a picture is ready to be decoded (decode order) */
+    PFNVIDDISPLAYCALLBACK pfnDisplayPicture;    /**< Called whenever a picture is ready to be displayed (display order)  */
+    void *pvReserved2[7];                       /**< Reserved for future use - set to NULL */
+    CUVIDEOFORMATEX *pExtVideoInfo;             /**< [Optional] sequence header data from system layer */
+} CUVIDPARSERPARAMS;
+
+/**
+ * \fn CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams)
+ */
+typedef CUresult CUDAAPI tcuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams);
+
+/**
+ * \fn CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket)
+ */
+typedef CUresult CUDAAPI tcuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket);
+
+/**
+ * \fn CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj)
+ */
+typedef CUresult CUDAAPI tcuvidDestroyVideoParser(CUvideoparser obj);
+
+/** @} */  /* END VIDEO_PARSER */
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__cplusplus)
+}
+#endif /* __cplusplus */
+
+#endif // __NVCUVID_H__
+
+