diff mbox

[FFmpeg-devel,3/8] avcodec/cuvid: use dynamically loaded CUDA/CUVID

Message ID 20161019120036.618-3-timo@rothenpieler.org
State Accepted
Headers show

Commit Message

Timo Rothenpieler Oct. 19, 2016, noon UTC
And remove the now obsolete compat headers.
---
 compat/cuda/cuviddec.h | 827 -------------------------------------------------
 compat/cuda/nvcuvid.h  | 321 -------------------
 libavcodec/cuvid.c     |  78 +++--
 3 files changed, 47 insertions(+), 1179 deletions(-)
 delete mode 100644 compat/cuda/cuviddec.h
 delete mode 100644 compat/cuda/nvcuvid.h
diff mbox

Patch

diff --git a/compat/cuda/cuviddec.h b/compat/cuda/cuviddec.h
deleted file mode 100644
index f9257ea..0000000
--- a/compat/cuda/cuviddec.h
+++ /dev/null
@@ -1,827 +0,0 @@ 
-/*
- * This copyright notice applies to this header file only:
- *
- * Copyright (c) 2010-2016 NVIDIA Corporation
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the software, and to permit persons to whom the
- * software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file cuviddec.h
- * NvCuvid API provides Video Decoding interface to NVIDIA GPU devices.
- * \date 2015-2016
- * This file contains constants, structure definitions and function prototypes used for decoding.
- */
-
-#if !defined(__CUDA_VIDEO_H__)
-#define __CUDA_VIDEO_H__
-
-#ifndef __cuda_cuda_h__
-#include <cuda.h>
-#endif // __cuda_cuda_h__
-
-#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
-#if (CUDA_VERSION >= 3020) && (!defined(CUDA_FORCE_API_VERSION) || (CUDA_FORCE_API_VERSION >= 3020))
-#define __CUVID_DEVPTR64
-#endif
-#endif
-
-#if defined(__cplusplus)
-extern "C" {
-#endif /* __cplusplus */
-
-typedef void *CUvideodecoder;
-typedef struct _CUcontextlock_st *CUvideoctxlock;
-
-/**
- * \addtogroup VIDEO_DECODER Video Decoder
- * @{
- */
-
-/*!
- * \enum cudaVideoCodec
- * Video Codec Enums
- */
-typedef enum cudaVideoCodec_enum {
-    cudaVideoCodec_MPEG1=0,                 /**<  MPEG1   */
-    cudaVideoCodec_MPEG2,                   /**<  MPEG2  */
-    cudaVideoCodec_MPEG4,                   /**<  MPEG4   */
-    cudaVideoCodec_VC1,                     /**<  VC1   */
-    cudaVideoCodec_H264,                    /**<  H264   */
-    cudaVideoCodec_JPEG,                    /**<  JPEG   */
-    cudaVideoCodec_H264_SVC,                /**<  H264-SVC   */
-    cudaVideoCodec_H264_MVC,                /**<  H264-MVC   */
-    cudaVideoCodec_HEVC,                    /**<  HEVC   */
-    cudaVideoCodec_VP8,                     /**<  VP8   */
-    cudaVideoCodec_VP9,                     /**<  VP9   */
-    cudaVideoCodec_NumCodecs,               /**<  Max COdecs   */
-    // Uncompressed YUV
-    cudaVideoCodec_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')),   /**< Y,U,V (4:2:0)  */
-    cudaVideoCodec_YV12   = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')),   /**< Y,V,U (4:2:0)  */
-    cudaVideoCodec_NV12   = (('N'<<24)|('V'<<16)|('1'<<8)|('2')),   /**< Y,UV  (4:2:0)  */
-    cudaVideoCodec_YUYV   = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')),   /**< YUYV/YUY2 (4:2:2)  */
-    cudaVideoCodec_UYVY   = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y'))    /**< UYVY (4:2:2)  */
-} cudaVideoCodec;
-
-/*!
- * \enum cudaVideoSurfaceFormat
- * Video Surface Formats Enums
- */
-typedef enum cudaVideoSurfaceFormat_enum {
-    cudaVideoSurfaceFormat_NV12=0       /**< NV12 (currently the only supported output format)  */
-} cudaVideoSurfaceFormat;
-
-/*!
- * \enum cudaVideoDeinterlaceMode
- * Deinterlacing Modes Enums
- */
-typedef enum cudaVideoDeinterlaceMode_enum {
-    cudaVideoDeinterlaceMode_Weave=0,   /**< Weave both fields (no deinterlacing) */
-    cudaVideoDeinterlaceMode_Bob,       /**< Drop one field  */
-    cudaVideoDeinterlaceMode_Adaptive   /**< Adaptive deinterlacing  */
-} cudaVideoDeinterlaceMode;
-
-/*!
- * \enum cudaVideoChromaFormat
- * Chroma Formats Enums
- */
-typedef enum cudaVideoChromaFormat_enum {
-    cudaVideoChromaFormat_Monochrome=0,  /**< MonoChrome */
-    cudaVideoChromaFormat_420,           /**< 4:2:0 */
-    cudaVideoChromaFormat_422,           /**< 4:2:2 */
-    cudaVideoChromaFormat_444            /**< 4:4:4 */
-} cudaVideoChromaFormat;
-
-/*!
- * \enum cudaVideoCreateFlags
- * Decoder Flags Enums
- */
-typedef enum cudaVideoCreateFlags_enum {
-    cudaVideoCreate_Default = 0x00,     /**< Default operation mode: use dedicated video engines */
-    cudaVideoCreate_PreferCUDA = 0x01,  /**< Use a CUDA-based decoder if faster than dedicated engines (requires a valid vidLock object for multi-threading) */
-    cudaVideoCreate_PreferDXVA = 0x02,  /**< Go through DXVA internally if possible (requires D3D9 interop) */
-    cudaVideoCreate_PreferCUVID = 0x04  /**< Use dedicated video engines directly */
-} cudaVideoCreateFlags;
-
-/*!
- * \struct CUVIDDECODECREATEINFO
- * Struct used in create decoder
- */
-typedef struct _CUVIDDECODECREATEINFO
-{
-    unsigned long ulWidth;              /**< Coded Sequence Width */
-    unsigned long ulHeight;             /**< Coded Sequence Height */
-    unsigned long ulNumDecodeSurfaces;  /**< Maximum number of internal decode surfaces */
-    cudaVideoCodec CodecType;           /**< cudaVideoCodec_XXX */
-    cudaVideoChromaFormat ChromaFormat; /**< cudaVideoChromaFormat_XXX (only 4:2:0 is currently supported) */
-    unsigned long ulCreationFlags;      /**< Decoder creation flags (cudaVideoCreateFlags_XXX) */
-    unsigned long bitDepthMinus8;
-    unsigned long Reserved1[4];         /**< Reserved for future use - set to zero */
-    /**
-    * area of the frame that should be displayed
-    */
-    struct {
-        short left;
-        short top;
-        short right;
-        short bottom;
-    } display_area;
-
-    cudaVideoSurfaceFormat OutputFormat;       /**< cudaVideoSurfaceFormat_XXX */
-    cudaVideoDeinterlaceMode DeinterlaceMode;  /**< cudaVideoDeinterlaceMode_XXX */
-    unsigned long ulTargetWidth;               /**< Post-processed Output Width (Should be aligned to 2) */
-    unsigned long ulTargetHeight;              /**< Post-processed Output Height (Should be aligbed to 2) */
-    unsigned long ulNumOutputSurfaces;         /**< Maximum number of output surfaces simultaneously mapped */
-    CUvideoctxlock vidLock;                    /**< If non-NULL, context lock used for synchronizing ownership of the cuda context */
-    /**
-    * target rectangle in the output frame (for aspect ratio conversion)
-    * if a null rectangle is specified, {0,0,ulTargetWidth,ulTargetHeight} will be used
-    */
-    struct {
-        short left;
-        short top;
-        short right;
-        short bottom;
-    } target_rect;
-    unsigned long Reserved2[5];                /**< Reserved for future use - set to zero */
-} CUVIDDECODECREATEINFO;
-
-/*!
- * \struct CUVIDH264DPBENTRY
- * H.264 DPB Entry
- */
-typedef struct _CUVIDH264DPBENTRY
-{
-    int PicIdx;                 /**< picture index of reference frame */
-    int FrameIdx;               /**< frame_num(short-term) or LongTermFrameIdx(long-term) */
-    int is_long_term;           /**< 0=short term reference, 1=long term reference */
-    int not_existing;           /**< non-existing reference frame (corresponding PicIdx should be set to -1) */
-    int used_for_reference;     /**< 0=unused, 1=top_field, 2=bottom_field, 3=both_fields */
-    int FieldOrderCnt[2];       /**< field order count of top and bottom fields */
-} CUVIDH264DPBENTRY;
-
-/*!
- * \struct CUVIDH264MVCEXT
- * H.264 MVC Picture Parameters Ext
- */
-typedef struct _CUVIDH264MVCEXT
-{
-    int num_views_minus1;
-    int view_id;
-    unsigned char inter_view_flag;
-    unsigned char num_inter_view_refs_l0;
-    unsigned char num_inter_view_refs_l1;
-    unsigned char MVCReserved8Bits;
-    int InterViewRefsL0[16];
-    int InterViewRefsL1[16];
-} CUVIDH264MVCEXT;
-
-/*!
- * \struct CUVIDH264SVCEXT
- * H.264 SVC Picture Parameters Ext
- */
-typedef struct _CUVIDH264SVCEXT
-{
-    unsigned char profile_idc;
-    unsigned char level_idc;
-    unsigned char DQId;
-    unsigned char DQIdMax;
-    unsigned char disable_inter_layer_deblocking_filter_idc;
-    unsigned char ref_layer_chroma_phase_y_plus1;
-    signed char   inter_layer_slice_alpha_c0_offset_div2;
-    signed char   inter_layer_slice_beta_offset_div2;
-
-    unsigned short DPBEntryValidFlag;
-    unsigned char inter_layer_deblocking_filter_control_present_flag;
-    unsigned char extended_spatial_scalability_idc;
-    unsigned char adaptive_tcoeff_level_prediction_flag;
-    unsigned char slice_header_restriction_flag;
-    unsigned char chroma_phase_x_plus1_flag;
-    unsigned char chroma_phase_y_plus1;
-
-    unsigned char tcoeff_level_prediction_flag;
-    unsigned char constrained_intra_resampling_flag;
-    unsigned char ref_layer_chroma_phase_x_plus1_flag;
-    unsigned char store_ref_base_pic_flag;
-    unsigned char Reserved8BitsA;
-    unsigned char Reserved8BitsB;
-    // For the 4 scaled_ref_layer_XX fields below,
-    // if (extended_spatial_scalability_idc == 1), SPS field, G.7.3.2.1.4, add prefix "seq_"
-    // if (extended_spatial_scalability_idc == 2), SLH field, G.7.3.3.4,
-    short scaled_ref_layer_left_offset;
-    short scaled_ref_layer_top_offset;
-    short scaled_ref_layer_right_offset;
-    short scaled_ref_layer_bottom_offset;
-    unsigned short Reserved16Bits;
-    struct _CUVIDPICPARAMS *pNextLayer; /**< Points to the picparams for the next layer to be decoded. Linked list ends at the target layer. */
-    int bRefBaseLayer;                  /**< whether to store ref base pic */
-} CUVIDH264SVCEXT;
-
-/*!
- * \struct CUVIDH264PICPARAMS
- * H.264 Picture Parameters
- */
-typedef struct _CUVIDH264PICPARAMS
-{
-    // SPS
-    int log2_max_frame_num_minus4;
-    int pic_order_cnt_type;
-    int log2_max_pic_order_cnt_lsb_minus4;
-    int delta_pic_order_always_zero_flag;
-    int frame_mbs_only_flag;
-    int direct_8x8_inference_flag;
-    int num_ref_frames;             // NOTE: shall meet level 4.1 restrictions
-    unsigned char residual_colour_transform_flag;
-    unsigned char bit_depth_luma_minus8;    // Must be 0 (only 8-bit supported)
-    unsigned char bit_depth_chroma_minus8;  // Must be 0 (only 8-bit supported)
-    unsigned char qpprime_y_zero_transform_bypass_flag;
-    // PPS
-    int entropy_coding_mode_flag;
-    int pic_order_present_flag;
-    int num_ref_idx_l0_active_minus1;
-    int num_ref_idx_l1_active_minus1;
-    int weighted_pred_flag;
-    int weighted_bipred_idc;
-    int pic_init_qp_minus26;
-    int deblocking_filter_control_present_flag;
-    int redundant_pic_cnt_present_flag;
-    int transform_8x8_mode_flag;
-    int MbaffFrameFlag;
-    int constrained_intra_pred_flag;
-    int chroma_qp_index_offset;
-    int second_chroma_qp_index_offset;
-    int ref_pic_flag;
-    int frame_num;
-    int CurrFieldOrderCnt[2];
-    // DPB
-    CUVIDH264DPBENTRY dpb[16];          // List of reference frames within the DPB
-    // Quantization Matrices (raster-order)
-    unsigned char WeightScale4x4[6][16];
-    unsigned char WeightScale8x8[2][64];
-    // FMO/ASO
-    unsigned char fmo_aso_enable;
-    unsigned char num_slice_groups_minus1;
-    unsigned char slice_group_map_type;
-    signed char pic_init_qs_minus26;
-    unsigned int slice_group_change_rate_minus1;
-    union
-    {
-        unsigned long long slice_group_map_addr;
-        const unsigned char *pMb2SliceGroupMap;
-    } fmo;
-    unsigned int  Reserved[12];
-    // SVC/MVC
-    union
-    {
-        CUVIDH264MVCEXT mvcext;
-        CUVIDH264SVCEXT svcext;
-    };
-} CUVIDH264PICPARAMS;
-
-
-/*!
- * \struct CUVIDMPEG2PICPARAMS
- * MPEG-2 Picture Parameters
- */
-typedef struct _CUVIDMPEG2PICPARAMS
-{
-    int ForwardRefIdx;          // Picture index of forward reference (P/B-frames)
-    int BackwardRefIdx;         // Picture index of backward reference (B-frames)
-    int picture_coding_type;
-    int full_pel_forward_vector;
-    int full_pel_backward_vector;
-    int f_code[2][2];
-    int intra_dc_precision;
-    int frame_pred_frame_dct;
-    int concealment_motion_vectors;
-    int q_scale_type;
-    int intra_vlc_format;
-    int alternate_scan;
-    int top_field_first;
-    // Quantization matrices (raster order)
-    unsigned char QuantMatrixIntra[64];
-    unsigned char QuantMatrixInter[64];
-} CUVIDMPEG2PICPARAMS;
-
-////////////////////////////////////////////////////////////////////////////////////////////////
-//
-// MPEG-4 Picture Parameters
-//
-
-// MPEG-4 has VOP types instead of Picture types
-#define I_VOP 0
-#define P_VOP 1
-#define B_VOP 2
-#define S_VOP 3
-
-/*!
- * \struct CUVIDMPEG4PICPARAMS
- * MPEG-4 Picture Parameters
- */
-typedef struct _CUVIDMPEG4PICPARAMS
-{
-    int ForwardRefIdx;          // Picture index of forward reference (P/B-frames)
-    int BackwardRefIdx;         // Picture index of backward reference (B-frames)
-    // VOL
-    int video_object_layer_width;
-    int video_object_layer_height;
-    int vop_time_increment_bitcount;
-    int top_field_first;
-    int resync_marker_disable;
-    int quant_type;
-    int quarter_sample;
-    int short_video_header;
-    int divx_flags;
-    // VOP
-    int vop_coding_type;
-    int vop_coded;
-    int vop_rounding_type;
-    int alternate_vertical_scan_flag;
-    int interlaced;
-    int vop_fcode_forward;
-    int vop_fcode_backward;
-    int trd[2];
-    int trb[2];
-    // Quantization matrices (raster order)
-    unsigned char QuantMatrixIntra[64];
-    unsigned char QuantMatrixInter[64];
-    int gmc_enabled;
-} CUVIDMPEG4PICPARAMS;
-
-/*!
- * \struct CUVIDVC1PICPARAMS
- * VC1 Picture Parameters
- */
-typedef struct _CUVIDVC1PICPARAMS
-{
-    int ForwardRefIdx;      /**< Picture index of forward reference (P/B-frames) */
-    int BackwardRefIdx;     /**< Picture index of backward reference (B-frames) */
-    int FrameWidth;         /**< Actual frame width */
-    int FrameHeight;        /**< Actual frame height */
-    // PICTURE
-    int intra_pic_flag;     /**< Set to 1 for I,BI frames */
-    int ref_pic_flag;       /**< Set to 1 for I,P frames */
-    int progressive_fcm;    /**< Progressive frame */
-    // SEQUENCE
-    int profile;
-    int postprocflag;
-    int pulldown;
-    int interlace;
-    int tfcntrflag;
-    int finterpflag;
-    int psf;
-    int multires;
-    int syncmarker;
-    int rangered;
-    int maxbframes;
-    // ENTRYPOINT
-    int panscan_flag;
-    int refdist_flag;
-    int extended_mv;
-    int dquant;
-    int vstransform;
-    int loopfilter;
-    int fastuvmc;
-    int overlap;
-    int quantizer;
-    int extended_dmv;
-    int range_mapy_flag;
-    int range_mapy;
-    int range_mapuv_flag;
-    int range_mapuv;
-    int rangeredfrm;    // range reduction state
-} CUVIDVC1PICPARAMS;
-
-/*!
- * \struct CUVIDJPEGPICPARAMS
- * JPEG Picture Parameters
- */
-typedef struct _CUVIDJPEGPICPARAMS
-{
-    int Reserved;
-} CUVIDJPEGPICPARAMS;
-
-
- /*!
- * \struct CUVIDHEVCPICPARAMS
- * HEVC Picture Parameters
- */
-typedef struct _CUVIDHEVCPICPARAMS
-{
-    // sps
-    int pic_width_in_luma_samples;
-    int pic_height_in_luma_samples;
-    unsigned char log2_min_luma_coding_block_size_minus3;
-    unsigned char log2_diff_max_min_luma_coding_block_size;
-    unsigned char log2_min_transform_block_size_minus2;
-    unsigned char log2_diff_max_min_transform_block_size;
-    unsigned char pcm_enabled_flag;
-    unsigned char log2_min_pcm_luma_coding_block_size_minus3;
-    unsigned char log2_diff_max_min_pcm_luma_coding_block_size;
-    unsigned char pcm_sample_bit_depth_luma_minus1;
-
-    unsigned char pcm_sample_bit_depth_chroma_minus1;
-    unsigned char pcm_loop_filter_disabled_flag;
-    unsigned char strong_intra_smoothing_enabled_flag;
-    unsigned char max_transform_hierarchy_depth_intra;
-    unsigned char max_transform_hierarchy_depth_inter;
-    unsigned char amp_enabled_flag;
-    unsigned char separate_colour_plane_flag;
-    unsigned char log2_max_pic_order_cnt_lsb_minus4;
-
-    unsigned char num_short_term_ref_pic_sets;
-    unsigned char long_term_ref_pics_present_flag;
-    unsigned char num_long_term_ref_pics_sps;
-    unsigned char sps_temporal_mvp_enabled_flag;
-    unsigned char sample_adaptive_offset_enabled_flag;
-    unsigned char scaling_list_enable_flag;
-    unsigned char IrapPicFlag;
-    unsigned char IdrPicFlag;
-
-    unsigned char bit_depth_luma_minus8;
-    unsigned char bit_depth_chroma_minus8;
-    unsigned char reserved1[14];
-
-    // pps
-    unsigned char dependent_slice_segments_enabled_flag;
-    unsigned char slice_segment_header_extension_present_flag;
-    unsigned char sign_data_hiding_enabled_flag;
-    unsigned char cu_qp_delta_enabled_flag;
-    unsigned char diff_cu_qp_delta_depth;
-    signed char init_qp_minus26;
-    signed char pps_cb_qp_offset;
-    signed char pps_cr_qp_offset;
-
-    unsigned char constrained_intra_pred_flag;
-    unsigned char weighted_pred_flag;
-    unsigned char weighted_bipred_flag;
-    unsigned char transform_skip_enabled_flag;
-    unsigned char transquant_bypass_enabled_flag;
-    unsigned char entropy_coding_sync_enabled_flag;
-    unsigned char log2_parallel_merge_level_minus2;
-    unsigned char num_extra_slice_header_bits;
-
-    unsigned char loop_filter_across_tiles_enabled_flag;
-    unsigned char loop_filter_across_slices_enabled_flag;
-    unsigned char output_flag_present_flag;
-    unsigned char num_ref_idx_l0_default_active_minus1;
-    unsigned char num_ref_idx_l1_default_active_minus1;
-    unsigned char lists_modification_present_flag;
-    unsigned char cabac_init_present_flag;
-    unsigned char pps_slice_chroma_qp_offsets_present_flag;
-
-    unsigned char deblocking_filter_override_enabled_flag;
-    unsigned char pps_deblocking_filter_disabled_flag;
-    signed char pps_beta_offset_div2;
-    signed char pps_tc_offset_div2;
-    unsigned char tiles_enabled_flag;
-    unsigned char uniform_spacing_flag;
-    unsigned char num_tile_columns_minus1;
-    unsigned char num_tile_rows_minus1;
-
-    unsigned short column_width_minus1[21];
-    unsigned short row_height_minus1[21];
-    unsigned int reserved3[15];
-
-    // RefPicSets
-    int NumBitsForShortTermRPSInSlice;
-    int NumDeltaPocsOfRefRpsIdx;
-    int NumPocTotalCurr;
-    int NumPocStCurrBefore;
-    int NumPocStCurrAfter;
-    int NumPocLtCurr;
-    int CurrPicOrderCntVal;
-    int RefPicIdx[16];                  // [refpic] Indices of valid reference pictures (-1 if unused for reference)
-    int PicOrderCntVal[16];             // [refpic]
-    unsigned char IsLongTerm[16];       // [refpic] 0=not a long-term reference, 1=long-term reference
-    unsigned char RefPicSetStCurrBefore[8]; // [0..NumPocStCurrBefore-1] -> refpic (0..15)
-    unsigned char RefPicSetStCurrAfter[8];  // [0..NumPocStCurrAfter-1] -> refpic (0..15)
-    unsigned char RefPicSetLtCurr[8];       // [0..NumPocLtCurr-1] -> refpic (0..15)
-    unsigned char RefPicSetInterLayer0[8];
-    unsigned char RefPicSetInterLayer1[8];
-    unsigned int reserved4[12];
-
-    // scaling lists (diag order)
-    unsigned char ScalingList4x4[6][16];       // [matrixId][i]
-    unsigned char ScalingList8x8[6][64];       // [matrixId][i]
-    unsigned char ScalingList16x16[6][64];     // [matrixId][i]
-    unsigned char ScalingList32x32[2][64];     // [matrixId][i]
-    unsigned char ScalingListDCCoeff16x16[6];  // [matrixId]
-    unsigned char ScalingListDCCoeff32x32[2];  // [matrixId]
-} CUVIDHEVCPICPARAMS;
-
-
-/*!
- * \struct CUVIDVP8PICPARAMS
- * VP8 Picture Parameters
- */
-typedef struct _CUVIDVP8PICPARAMS
-{
-    int width;
-    int height;
-    unsigned int first_partition_size;
-    //Frame Indexes
-    unsigned char LastRefIdx;
-    unsigned char GoldenRefIdx;
-    unsigned char AltRefIdx;
-    union {
-        struct {
-            unsigned char frame_type : 1;    /**< 0 = KEYFRAME, 1 = INTERFRAME  */
-            unsigned char version : 3;
-            unsigned char show_frame : 1;
-            unsigned char update_mb_segmentation_data : 1;    /**< Must be 0 if segmentation is not enabled */
-            unsigned char Reserved2Bits : 2;
-        };
-        unsigned char wFrameTagFlags;
-    };
-    unsigned char Reserved1[4];
-    unsigned int  Reserved2[3];
-} CUVIDVP8PICPARAMS;
-
-/*!
- * \struct CUVIDVP9PICPARAMS
- * VP9 Picture Parameters
- */
-typedef struct _CUVIDVP9PICPARAMS
-{
-    unsigned int width;
-    unsigned int height;
-
-    //Frame Indices
-    unsigned char LastRefIdx;
-    unsigned char GoldenRefIdx;
-    unsigned char AltRefIdx;
-    unsigned char colorSpace;
-
-    unsigned short profile : 3;
-    unsigned short frameContextIdx : 2;
-    unsigned short frameType : 1;
-    unsigned short showFrame : 1;
-    unsigned short errorResilient : 1;
-    unsigned short frameParallelDecoding : 1;
-    unsigned short subSamplingX : 1;
-    unsigned short subSamplingY : 1;
-    unsigned short intraOnly : 1;
-    unsigned short allow_high_precision_mv : 1;
-    unsigned short refreshEntropyProbs : 1;
-    unsigned short reserved2Bits : 2;
-
-    unsigned short reserved16Bits;
-
-    unsigned char  refFrameSignBias[4];
-
-    unsigned char bitDepthMinus8Luma;
-    unsigned char bitDepthMinus8Chroma;
-    unsigned char loopFilterLevel;
-    unsigned char loopFilterSharpness;
-
-    unsigned char modeRefLfEnabled;
-    unsigned char log2_tile_columns;
-    unsigned char log2_tile_rows;
-
-    unsigned char segmentEnabled : 1;
-    unsigned char segmentMapUpdate : 1;
-    unsigned char segmentMapTemporalUpdate : 1;
-    unsigned char segmentFeatureMode : 1;
-    unsigned char reserved4Bits : 4;
-
-
-    unsigned char segmentFeatureEnable[8][4];
-    short segmentFeatureData[8][4];
-    unsigned char mb_segment_tree_probs[7];
-    unsigned char segment_pred_probs[3];
-    unsigned char reservedSegment16Bits[2];
-
-    int qpYAc;
-    int qpYDc;
-    int qpChDc;
-    int qpChAc;
-
-    unsigned int activeRefIdx[3];
-    unsigned int resetFrameContext;
-    unsigned int mcomp_filter_type;
-    unsigned int mbRefLfDelta[4];
-    unsigned int mbModeLfDelta[2];
-    unsigned int frameTagSize;
-    unsigned int offsetToDctParts;
-    unsigned int reserved128Bits[4];
-
-} CUVIDVP9PICPARAMS;
-
-
-/*!
- * \struct CUVIDPICPARAMS
- * Picture Parameters for Decoding
- */
-typedef struct _CUVIDPICPARAMS
-{
-    int PicWidthInMbs;                    /**< Coded Frame Size */
-    int FrameHeightInMbs;                 /**< Coded Frame Height */
-    int CurrPicIdx;                       /**< Output index of the current picture */
-    int field_pic_flag;                   /**< 0=frame picture, 1=field picture */
-    int bottom_field_flag;                /**< 0=top field, 1=bottom field (ignored if field_pic_flag=0) */
-    int second_field;                     /**< Second field of a complementary field pair */
-    // Bitstream data
-    unsigned int nBitstreamDataLen;        /**< Number of bytes in bitstream data buffer */
-    const unsigned char *pBitstreamData;   /**< Ptr to bitstream data for this picture (slice-layer) */
-    unsigned int nNumSlices;               /**< Number of slices in this picture */
-    const unsigned int *pSliceDataOffsets; /**< nNumSlices entries, contains offset of each slice within the bitstream data buffer */
-    int ref_pic_flag;                      /**< This picture is a reference picture */
-    int intra_pic_flag;                    /**< This picture is entirely intra coded */
-    unsigned int Reserved[30];             /**< Reserved for future use */
-    // Codec-specific data
-    union {
-        CUVIDMPEG2PICPARAMS mpeg2;         /**< Also used for MPEG-1 */
-        CUVIDH264PICPARAMS h264;
-        CUVIDVC1PICPARAMS vc1;
-        CUVIDMPEG4PICPARAMS mpeg4;
-        CUVIDJPEGPICPARAMS jpeg;
-        CUVIDHEVCPICPARAMS hevc;
-        CUVIDVP8PICPARAMS vp8;
-        CUVIDVP9PICPARAMS vp9;
-        unsigned int CodecReserved[1024];
-    } CodecSpecific;
-} CUVIDPICPARAMS;
-
-
-/*!
- * \struct CUVIDPROCPARAMS
- * Picture Parameters for Postprocessing
- */
-typedef struct _CUVIDPROCPARAMS
-{
-    int progressive_frame;  /**< Input is progressive (deinterlace_mode will be ignored)  */
-    int second_field;       /**< Output the second field (ignored if deinterlace mode is Weave) */
-    int top_field_first;    /**< Input frame is top field first (1st field is top, 2nd field is bottom) */
-    int unpaired_field;     /**< Input only contains one field (2nd field is invalid) */
-    // The fields below are used for raw YUV input
-    unsigned int reserved_flags;        /**< Reserved for future use (set to zero) */
-    unsigned int reserved_zero;         /**< Reserved (set to zero) */
-    unsigned long long raw_input_dptr;  /**< Input CUdeviceptr for raw YUV extensions */
-    unsigned int raw_input_pitch;       /**< pitch in bytes of raw YUV input (should be aligned appropriately) */
-    unsigned int raw_input_format;      /**< Reserved for future use (set to zero) */
-    unsigned long long raw_output_dptr; /**< Reserved for future use (set to zero) */
-    unsigned int raw_output_pitch;      /**< Reserved for future use (set to zero) */
-    unsigned int Reserved[48];
-    void *Reserved3[3];
-} CUVIDPROCPARAMS;
-
-
-/**
- *
- * In order to minimize decode latencies, there should be always at least 2 pictures in the decode
- * queue at any time, in order to make sure that all decode engines are always busy.
- *
- * Overall data flow:
- *  - cuvidCreateDecoder(...)
- *  For each picture:
- *  - cuvidDecodePicture(N)
- *  - cuvidMapVideoFrame(N-4)
- *  - do some processing in cuda
- *  - cuvidUnmapVideoFrame(N-4)
- *  - cuvidDecodePicture(N+1)
- *  - cuvidMapVideoFrame(N-3)
- *    ...
- *  - cuvidDestroyDecoder(...)
- *
- * NOTE:
- * - When the cuda context is created from a D3D device, the D3D device must also be created
- *   with the D3DCREATE_MULTITHREADED flag.
- * - There is a limit to how many pictures can be mapped simultaneously (ulNumOutputSurfaces)
- * - cuVidDecodePicture may block the calling thread if there are too many pictures pending
- *   in the decode queue
- */
-
-/**
- * \fn CUresult CUDAAPI cuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci)
- * Create the decoder object
- */
-CUresult CUDAAPI cuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci);
-
-/**
- * \fn CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder hDecoder)
- * Destroy the decoder object
- */
-CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder hDecoder);
-
-/**
- * \fn CUresult CUDAAPI cuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams)
- * Decode a single picture (field or frame)
- */
-CUresult CUDAAPI cuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams);
-
-
-#if !defined(__CUVID_DEVPTR64) || defined(__CUVID_INTERNAL)
-/**
- * \fn CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx, unsigned int *pDevPtr, unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
- * Post-process and map a video frame for use in cuda
- */
-CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx,
-                                           unsigned int *pDevPtr, unsigned int *pPitch,
-                                           CUVIDPROCPARAMS *pVPP);
-
-/**
- * \fn CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr)
- * Unmap a previously mapped video frame
- */
-CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr);
-#endif
-
-#if defined(WIN64) || defined(_WIN64) || defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
-/**
- * \fn CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr, unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
- * map a video frame
- */
-CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr,
-                                             unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
-
-/**
- * \fn CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr);
- * Unmap a previously mapped video frame
- */
-CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr);
-
-#if defined(__CUVID_DEVPTR64) && !defined(__CUVID_INTERNAL)
-#define cuvidMapVideoFrame      cuvidMapVideoFrame64
-#define cuvidUnmapVideoFrame    cuvidUnmapVideoFrame64
-#endif
-#endif
-
-
-
-/**
- *
- * Context-locking: to facilitate multi-threaded implementations, the following 4 functions
- * provide a simple mutex-style host synchronization. If a non-NULL context is specified
- * in CUVIDDECODECREATEINFO, the codec library will acquire the mutex associated with the given
- * context before making any cuda calls.
- * A multi-threaded application could create a lock associated with a context handle so that
- * multiple threads can safely share the same cuda context:
- *  - use cuCtxPopCurrent immediately after context creation in order to create a 'floating' context
- *    that can be passed to cuvidCtxLockCreate.
- *  - When using a floating context, all cuda calls should only be made within a cuvidCtxLock/cuvidCtxUnlock section.
- *
- * NOTE: This is a safer alternative to cuCtxPushCurrent and cuCtxPopCurrent, and is not related to video
- * decoder in any way (implemented as a critical section associated with cuCtx{Push|Pop}Current calls).
-*/
-
-/**
- * \fn CUresult CUDAAPI cuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx)
- */
-CUresult CUDAAPI cuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx);
-
-/**
- * \fn CUresult CUDAAPI cuvidCtxLockDestroy(CUvideoctxlock lck)
- */
-CUresult CUDAAPI cuvidCtxLockDestroy(CUvideoctxlock lck);
-
-/**
- * \fn CUresult CUDAAPI cuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags)
- */
-CUresult CUDAAPI cuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags);
-
-/**
- * \fn CUresult CUDAAPI cuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags)
- */
-CUresult CUDAAPI cuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags);
-
-/** @} */  /* End VIDEO_DECODER */
-////////////////////////////////////////////////////////////////////////////////////////////////
-
-#if defined(__cplusplus)
-
-// Auto-lock helper for C++ applications
-class CCtxAutoLock
-{
-private:
-    CUvideoctxlock m_ctx;
-public:
-    CCtxAutoLock(CUvideoctxlock ctx);
-    ~CCtxAutoLock();
-};
-
-}
-
-#endif /* __cplusplus */
-
-#endif // __CUDA_VIDEO_H__
-
diff --git a/compat/cuda/nvcuvid.h b/compat/cuda/nvcuvid.h
deleted file mode 100644
index 738d8e3..0000000
--- a/compat/cuda/nvcuvid.h
+++ /dev/null
@@ -1,321 +0,0 @@ 
-/*
- * This copyright notice applies to this header file only:
- *
- * Copyright (c) 2010-2016 NVIDIA Corporation
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the software, and to permit persons to whom the
- * software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file nvcuvid.h
- *   NvCuvid API provides Video Decoding interface to NVIDIA GPU devices.
- * \date 2015-2015
- *  This file contains the interface constants, structure definitions and function prototypes.
- */
-
-#if !defined(__NVCUVID_H__)
-#define __NVCUVID_H__
-
-#include "compat/cuda/cuviddec.h"
-
-#if defined(__cplusplus)
-extern "C" {
-#endif /* __cplusplus */
-
-/*********************************
-** Initialization
-*********************************/
-CUresult  CUDAAPI cuvidInit(unsigned int Flags);
-
-////////////////////////////////////////////////////////////////////////////////////////////////
-//
-// High-level helper APIs for video sources
-//
-
-typedef void *CUvideosource;
-typedef void *CUvideoparser;
-typedef long long CUvideotimestamp;
-
-/**
- * \addtogroup VIDEO_PARSER Video Parser
- * @{
- */
-
-/*!
- * \enum cudaVideoState
- * Video Source State
- */
-typedef enum {
-    cudaVideoState_Error   = -1,    /**< Error state (invalid source)  */
-    cudaVideoState_Stopped = 0,     /**< Source is stopped (or reached end-of-stream)  */
-    cudaVideoState_Started = 1      /**< Source is running and delivering data  */
-} cudaVideoState;
-
-/*!
- * \enum cudaAudioCodec
- * Audio compression
- */
-typedef enum {
-    cudaAudioCodec_MPEG1=0,         /**< MPEG-1 Audio  */
-    cudaAudioCodec_MPEG2,           /**< MPEG-2 Audio  */
-    cudaAudioCodec_MP3,             /**< MPEG-1 Layer III Audio  */
-    cudaAudioCodec_AC3,             /**< Dolby Digital (AC3) Audio  */
-    cudaAudioCodec_LPCM             /**< PCM Audio  */
-} cudaAudioCodec;
-
-/*!
- * \struct CUVIDEOFORMAT
- * Video format
- */
-typedef struct
-{
-    cudaVideoCodec codec;                   /**< Compression format  */
-   /**
-    * frame rate = numerator / denominator (for example: 30000/1001)
-    */
-    struct {
-        unsigned int numerator;             /**< frame rate numerator   (0 = unspecified or variable frame rate) */
-        unsigned int denominator;           /**< frame rate denominator (0 = unspecified or variable frame rate) */
-    } frame_rate;
-    unsigned char progressive_sequence;     /**< 0=interlaced, 1=progressive */
-    unsigned char bit_depth_luma_minus8;    /**< high bit depth Luma */
-    unsigned char bit_depth_chroma_minus8;  /**< high bit depth Chroma */
-    unsigned char reserved1;                /**< Reserved for future use */
-    unsigned int coded_width;               /**< coded frame width */
-    unsigned int coded_height;              /**< coded frame height  */
-   /**
-    *   area of the frame that should be displayed
-    * typical example:
-    *   coded_width = 1920, coded_height = 1088
-    *   display_area = { 0,0,1920,1080 }
-    */
-    struct {
-        int left;                           /**< left position of display rect  */
-        int top;                            /**< top position of display rect  */
-        int right;                          /**< right position of display rect  */
-        int bottom;                         /**< bottom position of display rect  */
-    } display_area;
-    cudaVideoChromaFormat chroma_format;    /**<  Chroma format */
-    unsigned int bitrate;                   /**< video bitrate (bps, 0=unknown) */
-   /**
-    * Display Aspect Ratio = x:y (4:3, 16:9, etc)
-    */
-    struct {
-        int x;
-        int y;
-    } display_aspect_ratio;
-    /**
-    * Video Signal Description
-    */
-    struct {
-        unsigned char video_format          : 3;
-        unsigned char video_full_range_flag : 1;
-        unsigned char reserved_zero_bits    : 4;
-        unsigned char color_primaries;
-        unsigned char transfer_characteristics;
-        unsigned char matrix_coefficients;
-    } video_signal_description;
-    unsigned int seqhdr_data_length;          /**< Additional bytes following (CUVIDEOFORMATEX)  */
-} CUVIDEOFORMAT;
-
-/*!
- * \struct CUVIDEOFORMATEX
- * Video format including raw sequence header information
- */
-typedef struct
-{
-    CUVIDEOFORMAT format;
-    unsigned char raw_seqhdr_data[1024];
-} CUVIDEOFORMATEX;
-
-/*!
- * \struct CUAUDIOFORMAT
- * Audio Formats
- */
-typedef struct
-{
-    cudaAudioCodec codec;       /**< Compression format  */
-    unsigned int channels;      /**< number of audio channels */
-    unsigned int samplespersec; /**< sampling frequency */
-    unsigned int bitrate;       /**< For uncompressed, can also be used to determine bits per sample */
-    unsigned int reserved1;     /**< Reserved for future use */
-    unsigned int reserved2;     /**< Reserved for future use */
-} CUAUDIOFORMAT;
-
-
-/*!
- * \enum CUvideopacketflags
- * Data packet flags
- */
-typedef enum {
-    CUVID_PKT_ENDOFSTREAM   = 0x01,   /**< Set when this is the last packet for this stream  */
-    CUVID_PKT_TIMESTAMP     = 0x02,   /**< Timestamp is valid  */
-    CUVID_PKT_DISCONTINUITY = 0x04    /**< Set when a discontinuity has to be signalled  */
-} CUvideopacketflags;
-
-/*!
- * \struct CUVIDSOURCEDATAPACKET
- * Data Packet
- */
-typedef struct _CUVIDSOURCEDATAPACKET
-{
-    unsigned long flags;            /**< Combination of CUVID_PKT_XXX flags */
-    unsigned long payload_size;     /**< number of bytes in the payload (may be zero if EOS flag is set) */
-    const unsigned char *payload;   /**< Pointer to packet payload data (may be NULL if EOS flag is set) */
-    CUvideotimestamp timestamp;     /**< Presentation timestamp (10MHz clock), only valid if CUVID_PKT_TIMESTAMP flag is set */
-} CUVIDSOURCEDATAPACKET;
-
-// Callback for packet delivery
-typedef int (CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *);
-
-/*!
- * \struct CUVIDSOURCEPARAMS
- * Source Params
- */
-typedef struct _CUVIDSOURCEPARAMS
-{
-    unsigned int ulClockRate;                   /**< Timestamp units in Hz (0=default=10000000Hz)  */
-    unsigned int uReserved1[7];                 /**< Reserved for future use - set to zero  */
-    void *pUserData;                            /**< Parameter passed in to the data handlers  */
-    PFNVIDSOURCECALLBACK pfnVideoDataHandler;   /**< Called to deliver audio packets  */
-    PFNVIDSOURCECALLBACK pfnAudioDataHandler;   /**< Called to deliver video packets  */
-    void *pvReserved2[8];                       /**< Reserved for future use - set to NULL */
-} CUVIDSOURCEPARAMS;
-
-/*!
- * \enum CUvideosourceformat_flags
- * CUvideosourceformat_flags
- */
-typedef enum {
-    CUVID_FMT_EXTFORMATINFO = 0x100             /**< Return extended format structure (CUVIDEOFORMATEX) */
-} CUvideosourceformat_flags;
-
-#if !defined(__APPLE__)
-/**
- * \fn CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams)
- * Create Video Source
- */
-CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams);
-
-/**
- * \fn CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams)
- * Create Video Source
- */
-CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams);
-
-/**
- * \fn CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj)
- * Destroy Video Source
- */
-CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj);
-
-/**
- * \fn CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state)
- * Set Video Source state
- */
-CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state);
-
-/**
- * \fn cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj)
- * Get Video Source state
- */
-cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj);
-
-/**
- * \fn CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags)
- * Get Video Source Format
- */
-CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags);
-
-/**
- * \fn CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags)
- * Set Video Source state
- */
-CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags);
-
-#endif
-
-/**
- * \struct CUVIDPARSERDISPINFO
- */
-typedef struct _CUVIDPARSERDISPINFO
-{
-    int picture_index;         /**<                 */
-    int progressive_frame;     /**<                 */
-    int top_field_first;       /**<                 */
-    int repeat_first_field;    /**< Number of additional fields (1=ivtc, 2=frame doubling, 4=frame tripling, -1=unpaired field)  */
-    CUvideotimestamp timestamp; /**<     */
-} CUVIDPARSERDISPINFO;
-
-//
-// Parser callbacks
-// The parser will call these synchronously from within cuvidParseVideoData(), whenever a picture is ready to
-// be decoded and/or displayed.
-//
-typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *);
-typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *);
-typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *);
-
-/**
- * \struct CUVIDPARSERPARAMS
- */
-typedef struct _CUVIDPARSERPARAMS
-{
-    cudaVideoCodec CodecType;               /**< cudaVideoCodec_XXX  */
-    unsigned int ulMaxNumDecodeSurfaces;    /**< Max # of decode surfaces (parser will cycle through these) */
-    unsigned int ulClockRate;               /**< Timestamp units in Hz (0=default=10000000Hz) */
-    unsigned int ulErrorThreshold;          /**< % Error threshold (0-100) for calling pfnDecodePicture (100=always call pfnDecodePicture even if picture bitstream is fully corrupted) */
-    unsigned int ulMaxDisplayDelay;         /**< Max display queue delay (improves pipelining of decode with display) - 0=no delay (recommended values: 2..4) */
-    unsigned int uReserved1[5];             /**< Reserved for future use - set to 0 */
-    void *pUserData;                        /**< User data for callbacks */
-    PFNVIDSEQUENCECALLBACK pfnSequenceCallback; /**< Called before decoding frames and/or whenever there is a format change */
-    PFNVIDDECODECALLBACK pfnDecodePicture;      /**< Called when a picture is ready to be decoded (decode order) */
-    PFNVIDDISPLAYCALLBACK pfnDisplayPicture;    /**< Called whenever a picture is ready to be displayed (display order)  */
-    void *pvReserved2[7];                       /**< Reserved for future use - set to NULL */
-    CUVIDEOFORMATEX *pExtVideoInfo;             /**< [Optional] sequence header data from system layer */
-} CUVIDPARSERPARAMS;
-
-/**
- * \fn CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams)
- */
-CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams);
-
-/**
- * \fn CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket)
- */
-CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket);
-
-/**
- * \fn CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj)
- */
-CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj);
-
-/** @} */  /* END VIDEO_PARSER */
-////////////////////////////////////////////////////////////////////////////////////////////////
-
-#if defined(__cplusplus)
-}
-#endif /* __cplusplus */
-
-#endif // __NVCUVID_H__
-
-
diff --git a/libavcodec/cuvid.c b/libavcodec/cuvid.c
index 27a4c30..17da924 100644
--- a/libavcodec/cuvid.c
+++ b/libavcodec/cuvid.c
@@ -19,10 +19,12 @@ 
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "compat/cuda/dynlink_loader.h"
+
 #include "libavutil/buffer.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/hwcontext.h"
-#include "libavutil/hwcontext_cuda.h"
+#include "libavutil/hwcontext_cuda_internal.h"
 #include "libavutil/fifo.h"
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
@@ -30,8 +32,6 @@ 
 #include "avcodec.h"
 #include "internal.h"
 
-#include "compat/cuda/nvcuvid.h"
-
 #define MAX_FRAME_COUNT 25
 
 typedef struct CuvidContext
@@ -61,6 +61,9 @@  typedef struct CuvidContext
 
     CUVIDPARSERPARAMS cuparseinfo;
     CUVIDEOFORMATEX cuparse_ext;
+
+    CudaFunctions *cudl;
+    CuvidFunctions *cvdl;
 } CuvidContext;
 
 typedef struct CuvidParsedFrame
@@ -72,6 +75,7 @@  typedef struct CuvidParsedFrame
 
 static int check_cu(AVCodecContext *avctx, CUresult err, const char *func)
 {
+    CuvidContext *ctx = avctx->priv_data;
     const char *err_name;
     const char *err_string;
 
@@ -80,8 +84,8 @@  static int check_cu(AVCodecContext *avctx, CUresult err, const char *func)
     if (err == CUDA_SUCCESS)
         return 0;
 
-    cuGetErrorName(err, &err_name);
-    cuGetErrorString(err, &err_string);
+    ctx->cudl->cuGetErrorName(err, &err_name);
+    ctx->cudl->cuGetErrorString(err, &err_string);
 
     av_log(avctx, AV_LOG_ERROR, "%s failed", func);
     if (err_name && err_string)
@@ -197,7 +201,7 @@  static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
     if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave)
         avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1});
 
-    ctx->internal_error = CHECK_CU(cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
+    ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
     if (ctx->internal_error < 0)
         return 0;
 
@@ -223,7 +227,7 @@  static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* pic
 
     av_log(avctx, AV_LOG_TRACE, "pfnDecodePicture\n");
 
-    ctx->internal_error = CHECK_CU(cuvidDecodePicture(ctx->cudecoder, picparams));
+    ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams));
     if (ctx->internal_error < 0)
         return 0;
 
@@ -289,7 +293,7 @@  static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
         avpkt = &filtered_packet;
     }
 
-    ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
+    ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
     if (ret < 0) {
         av_packet_unref(&filtered_packet);
         return ret;
@@ -313,7 +317,7 @@  static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
         ctx->decoder_flushing = 1;
     }
 
-    ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &cupkt));
+    ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt));
 
     av_packet_unref(&filtered_packet);
 
@@ -328,7 +332,7 @@  static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
     }
 
 error:
-    eret = CHECK_CU(cuCtxPopCurrent(&dummy));
+    eret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
 
     if (eret < 0)
         return eret;
@@ -357,7 +361,7 @@  static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
             return ret;
     }
 
-    ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
+    ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
     if (ret < 0)
         return ret;
 
@@ -375,7 +379,7 @@  static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
         params.second_field = parsed_frame.second_field;
         params.top_field_first = parsed_frame.dispinfo.top_field_first;
 
-        ret = CHECK_CU(cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, &params));
+        ret = CHECK_CU(ctx->cvdl->cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, &params));
         if (ret < 0)
             goto error;
 
@@ -405,7 +409,7 @@  static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
                     .Height        = avctx->height >> (i ? 1 : 0),
                 };
 
-                ret = CHECK_CU(cuMemcpy2D(&cpy));
+                ret = CHECK_CU(ctx->cudl->cuMemcpy2D(&cpy));
                 if (ret < 0)
                     goto error;
 
@@ -490,9 +494,9 @@  FF_ENABLE_DEPRECATION_WARNINGS
 
 error:
     if (mapped_frame)
-        eret = CHECK_CU(cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame));
+        eret = CHECK_CU(ctx->cvdl->cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame));
 
-    eret = CHECK_CU(cuCtxPopCurrent(&dummy));
+    eret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
 
     if (eret < 0)
         return eret;
@@ -541,19 +545,24 @@  static av_cold int cuvid_decode_end(AVCodecContext *avctx)
         av_bsf_free(&ctx->bsf);
 
     if (ctx->cuparser)
-        cuvidDestroyVideoParser(ctx->cuparser);
+        ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser);
 
     if (ctx->cudecoder)
-        cuvidDestroyDecoder(ctx->cudecoder);
+        ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder);
+
+    ctx->cudl = NULL;
 
     av_buffer_unref(&ctx->hwframe);
     av_buffer_unref(&ctx->hwdevice);
 
+    cuvid_free_functions(&ctx->cvdl);
+
     return 0;
 }
 
 static int cuvid_test_dummy_decoder(AVCodecContext *avctx, CUVIDPARSERPARAMS *cuparseinfo)
 {
+    CuvidContext *ctx = avctx->priv_data;
     CUVIDDECODECREATEINFO cuinfo;
     CUvideodecoder cudec = 0;
     int ret = 0;
@@ -581,11 +590,11 @@  static int cuvid_test_dummy_decoder(AVCodecContext *avctx, CUVIDPARSERPARAMS *cu
 
     cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
 
-    ret = CHECK_CU(cuvidCreateDecoder(&cudec, &cuinfo));
+    ret = CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&cudec, &cuinfo));
     if (ret < 0)
         return ret;
 
-    ret = CHECK_CU(cuvidDestroyDecoder(cudec));
+    ret = CHECK_CU(ctx->cvdl->cuvidDestroyDecoder(cudec));
     if (ret < 0)
         return ret;
 
@@ -613,6 +622,13 @@  static av_cold int cuvid_decode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret);
         return ret;
     }
+    avctx->pix_fmt = ret;
+
+    ret = cuvid_load_functions(&ctx->cvdl);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n");
+        goto error;
+    }
 
     ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CuvidParsedFrame));
     if (!ctx->frame_queue) {
@@ -620,8 +636,6 @@  static av_cold int cuvid_decode_init(AVCodecContext *avctx)
         goto error;
     }
 
-    avctx->pix_fmt = ret;
-
     if (avctx->hw_frames_ctx) {
         ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx);
         if (!ctx->hwframe) {
@@ -653,7 +667,9 @@  static av_cold int cuvid_decode_init(AVCodecContext *avctx)
 
     device_ctx = hwframe_ctx->device_ctx;
     device_hwctx = device_ctx->hwctx;
+
     cuda_ctx = device_hwctx->cuda_ctx;
+    ctx->cudl = device_hwctx->internal->cuda_dl;
 
     memset(&ctx->cuparseinfo, 0, sizeof(ctx->cuparseinfo));
     memset(&ctx->cuparse_ext, 0, sizeof(ctx->cuparse_ext));
@@ -753,7 +769,7 @@  static av_cold int cuvid_decode_init(AVCodecContext *avctx)
     ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode;
     ctx->cuparseinfo.pfnDisplayPicture = cuvid_handle_picture_display;
 
-    ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
+    ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
     if (ret < 0)
         goto error;
 
@@ -761,7 +777,7 @@  static av_cold int cuvid_decode_init(AVCodecContext *avctx)
     if (ret < 0)
         goto error;
 
-    ret = CHECK_CU(cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
+    ret = CHECK_CU(ctx->cvdl->cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
     if (ret < 0)
         goto error;
 
@@ -769,12 +785,12 @@  static av_cold int cuvid_decode_init(AVCodecContext *avctx)
     seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
 
     if (seq_pkt.payload && seq_pkt.payload_size) {
-        ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &seq_pkt));
+        ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
         if (ret < 0)
             goto error;
     }
 
-    ret = CHECK_CU(cuCtxPopCurrent(&dummy));
+    ret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
     if (ret < 0)
         goto error;
 
@@ -799,7 +815,7 @@  static void cuvid_flush(AVCodecContext *avctx)
     CUVIDSOURCEDATAPACKET seq_pkt = { 0 };
     int ret;
 
-    ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
+    ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
     if (ret < 0)
         goto error;
 
@@ -812,16 +828,16 @@  static void cuvid_flush(AVCodecContext *avctx)
     }
 
     if (ctx->cudecoder) {
-        cuvidDestroyDecoder(ctx->cudecoder);
+        ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder);
         ctx->cudecoder = NULL;
     }
 
     if (ctx->cuparser) {
-        cuvidDestroyVideoParser(ctx->cuparser);
+        ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser);
         ctx->cuparser = NULL;
     }
 
-    ret = CHECK_CU(cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
+    ret = CHECK_CU(ctx->cvdl->cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
     if (ret < 0)
         goto error;
 
@@ -829,12 +845,12 @@  static void cuvid_flush(AVCodecContext *avctx)
     seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
 
     if (seq_pkt.payload && seq_pkt.payload_size) {
-        ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &seq_pkt));
+        ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
         if (ret < 0)
             goto error;
     }
 
-    ret = CHECK_CU(cuCtxPopCurrent(&dummy));
+    ret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
     if (ret < 0)
         goto error;