Message ID | 20230602080701.1754-2-tong1.wu@intel.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,v3,1/9] libavutil: add hwcontext_d3d12va and AV_PIX_FMT_D3D12 | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
On Vr, 2023-06-02 at 16:06 +0800, Tong Wu wrote: > From: Wu Jianhua <toqsxw@outlook.com> > > The implementation is based on: > https://learn.microsoft.com/en-us/windows/win32/medfound/direct3d-12-video-overview > > With the Direct3D 12 video decoding support, we can render or process > the decoded images by the pixel shaders or compute shaders directly > without the extra copy overhead, which is beneficial especially if you > are trying to render or post-process a 4K or 8K video. > > The command below is how to enable d3d12va: > ffmpeg -hwaccel d3d12va -i input.mp4 output.mp4 > > Signed-off-by: Wu Jianhua <toqsxw@outlook.com> > Signed-off-by: Tong Wu <tong1.wu@intel.com> > --- > configure | 2 + > libavcodec/Makefile | 3 + > libavcodec/d3d11va.h | 3 - > libavcodec/d3d12va.c | 552 ++++++++++++++++++++++++++++++++++++ > libavcodec/d3d12va.h | 184 ++++++++++++ > libavcodec/d3d12va_h264.c | 210 ++++++++++++++ > libavcodec/dxva2.c | 24 ++ > libavcodec/dxva2.h | 3 - > libavcodec/dxva2_h264.c | 12 +- > libavcodec/dxva2_internal.h | 67 +++-- > libavcodec/h264_slice.c | 4 + > libavcodec/h264dec.c | 3 + > libavcodec/hwaccels.h | 1 + > libavcodec/hwconfig.h | 2 + > 14 files changed, 1028 insertions(+), 42 deletions(-) > create mode 100644 libavcodec/d3d12va.c > create mode 100644 libavcodec/d3d12va.h > create mode 100644 libavcodec/d3d12va_h264.c > > diff --git a/configure b/configure > index b86064e36f..f5dad4653f 100755 > --- a/configure > +++ b/configure > @@ -3033,6 +3033,8 @@ h264_d3d11va_hwaccel_deps="d3d11va" > h264_d3d11va_hwaccel_select="h264_decoder" > h264_d3d11va2_hwaccel_deps="d3d11va" > h264_d3d11va2_hwaccel_select="h264_decoder" > +h264_d3d12va_hwaccel_deps="d3d12va" > +h264_d3d12va_hwaccel_select="h264_decoder" > h264_dxva2_hwaccel_deps="dxva2" > h264_dxva2_hwaccel_select="h264_decoder" > h264_nvdec_hwaccel_deps="nvdec" > diff --git a/libavcodec/Makefile b/libavcodec/Makefile > index 9aacc1d477..ae143d8821 100644 > --- a/libavcodec/Makefile > +++ b/libavcodec/Makefile > @@ -977,6 +977,7 @@ OBJS-$(CONFIG_ADPCM_ZORK_DECODER) += adpcm.o > adpcm_data.o > > # hardware accelerators > OBJS-$(CONFIG_D3D11VA) += dxva2.o > +OBJS-$(CONFIG_D3D12VA) += dxva2.o d3d12va.o > OBJS-$(CONFIG_DXVA2) += dxva2.o > OBJS-$(CONFIG_NVDEC) += nvdec.o > OBJS-$(CONFIG_VAAPI) += vaapi_decode.o > @@ -994,6 +995,7 @@ OBJS-$(CONFIG_H263_VAAPI_HWACCEL) += vaapi_mpeg4.o > OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o > OBJS-$(CONFIG_H264_D3D11VA_HWACCEL) += dxva2_h264.o > OBJS-$(CONFIG_H264_DXVA2_HWACCEL) += dxva2_h264.o > +OBJS-$(CONFIG_H264_D3D12VA_HWACCEL) += dxva2_h264.o d3d12va_h264.o > OBJS-$(CONFIG_H264_NVDEC_HWACCEL) += nvdec_h264.o > OBJS-$(CONFIG_H264_QSV_HWACCEL) += qsvdec.o > OBJS-$(CONFIG_H264_VAAPI_HWACCEL) += vaapi_h264.o > @@ -1277,6 +1279,7 @@ SKIPHEADERS += > %_tablegen.h \ > > SKIPHEADERS-$(CONFIG_AMF) += amfenc.h > SKIPHEADERS-$(CONFIG_D3D11VA) += d3d11va.h dxva2_internal.h > +SKIPHEADERS-$(CONFIG_D3D12VA) += d3d12va.h > SKIPHEADERS-$(CONFIG_DXVA2) += dxva2.h dxva2_internal.h > SKIPHEADERS-$(CONFIG_JNI) += ffjni.h > SKIPHEADERS-$(CONFIG_LCMS2) += fflcms2.h > diff --git a/libavcodec/d3d11va.h b/libavcodec/d3d11va.h > index 6816b6c1e6..27f40e5519 100644 > --- a/libavcodec/d3d11va.h > +++ b/libavcodec/d3d11va.h > @@ -45,9 +45,6 @@ > * @{ > */ > > -#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work around for > Direct3D11 and old UVD/UVD+ ATI video cards > -#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 ///< Work around for > Direct3D11 and old Intel GPUs with ClearVideo interface > - > /** > * This structure is used to provides the necessary configurations and data > * to the Direct3D11 FFmpeg HWAccel implementation. > diff --git a/libavcodec/d3d12va.c b/libavcodec/d3d12va.c > new file mode 100644 > index 0000000000..7f1fab7251 > --- /dev/null > +++ b/libavcodec/d3d12va.c > @@ -0,0 +1,552 @@ > +/* > + * Direct3D 12 HW acceleration video decoder > + * > + * copyright (c) 2022-2023 Wu Jianhua <toqsxw@outlook.com> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include <assert.h> > +#include <string.h> > +#include <initguid.h> > + > +#include "libavutil/common.h" > +#include "libavutil/log.h" > +#include "libavutil/time.h" > +#include "libavutil/imgutils.h" > +#include "libavutil/hwcontext_d3d12va_internal.h" > +#include "libavutil/hwcontext_d3d12va.h" > +#include "avcodec.h" > +#include "decode.h" > +#include "d3d12va.h" > + > +typedef struct CommandAllocator { > + ID3D12CommandAllocator *command_allocator; > + uint64_t fence_value; > +} CommandAllocator; > + > +int ff_d3d12va_get_suitable_max_bitstream_size(AVCodecContext *avctx) > +{ > + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); > + return av_image_get_buffer_size(frames_ctx->sw_format, avctx- > >coded_width, avctx->coded_height, 1); > +} > + > +static int d3d12va_get_valid_command_allocator(AVCodecContext *avctx, > ID3D12CommandAllocator **ppAllocator) > +{ > + HRESULT hr; > + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); > + CommandAllocator allocator; > + > + if (av_fifo_peek(ctx->allocator_queue, &allocator, 1, 0) >= 0) { > + uint64_t completion = ID3D12Fence_GetCompletedValue(ctx->sync_ctx- > >fence); > + if (completion >= allocator.fence_value) { > + *ppAllocator = allocator.command_allocator; > + av_fifo_read(ctx->allocator_queue, &allocator, 1); > + return 0; > + } > + } > + > + hr = ID3D12Device_CreateCommandAllocator(ctx->device_ctx->device, > D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, > + &IID_ID3D12CommandAllocator, ppAllocator); > + if (FAILED(hr)) { > + av_log(avctx, AV_LOG_ERROR, "Failed to create a new command > allocator!\n"); > + return AVERROR(EINVAL); > + } > + > + return 0; > +} > + > +static int d3d12va_discard_command_allocator(AVCodecContext *avctx, > ID3D12CommandAllocator *pAllocator, uint64_t fence_value) > +{ > + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); > + > + CommandAllocator allocator = { > + .command_allocator = pAllocator, > + .fence_value = fence_value > + }; > + > + if (av_fifo_write(ctx->allocator_queue, &allocator, 1) < 0) { > + D3D12_OBJECT_RELEASE(pAllocator); > + return AVERROR(ENOMEM); > + } > + > + return 0; > +} > + > +static void bufref_free_interface(void *opaque, uint8_t *data) > +{ > + D3D12_OBJECT_RELEASE(opaque); > +} > + > +static AVBufferRef *bufref_wrap_interface(IUnknown *iface) > +{ > + return av_buffer_create((uint8_t*)iface, 1, bufref_free_interface, iface, > 0); > +} > + > +static int d3d12va_create_buffer(AVCodecContext *avctx, UINT size, > ID3D12Resource **ppResouce) > +{ > + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); > + > + D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD }; > + > + D3D12_RESOURCE_DESC desc = { > + .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, > + .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, > + .Width = size, > + .Height = 1, > + .DepthOrArraySize = 1, > + .MipLevels = 1, > + .Format = DXGI_FORMAT_UNKNOWN, > + .SampleDesc = { .Count = 1, .Quality = 0 }, > + .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, > + .Flags = D3D12_RESOURCE_FLAG_NONE, > + }; > + > + HRESULT hr = ID3D12Device_CreateCommittedResource(ctx->device_ctx- > >device, &heap_props, D3D12_HEAP_FLAG_NONE, > + &desc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, &IID_ID3D12Resource, > ppResouce); > + > + if (FAILED(hr)) { > + av_log(avctx, AV_LOG_ERROR, "Failed to create d3d12 buffer.\n"); > + return AVERROR(EINVAL); > + } > + > + return 0; > +} > + > +static int d3d12va_wait_for_gpu(AVCodecContext *avctx) > +{ > + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); > + AVD3D12VASyncContext *sync_ctx = ctx->sync_ctx; > + > + return av_d3d12va_wait_queue_idle(sync_ctx, ctx->command_queue); > +} > + > +static int d3d12va_create_decoder_heap(AVCodecContext *avctx) > +{ > + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); > + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); > + AVD3D12VADeviceContext *hwctx = ctx->device_ctx; > + > + D3D12_VIDEO_DECODER_HEAP_DESC desc = { > + .NodeMask = 0, > + .Configuration = ctx->cfg, > + .DecodeWidth = frames_ctx->width, > + .DecodeHeight = frames_ctx->height, > + .Format = av_d3d12va_map_sw_to_hw_format(frames_ctx- > >sw_format), > + .FrameRate = { avctx->framerate.num, avctx->framerate.den }, > + .BitRate = avctx->bit_rate, > + .MaxDecodePictureBufferCount = frames_ctx->initial_pool_size, > + }; > + > + DX_CHECK(ID3D12VideoDevice_CreateVideoDecoderHeap(hwctx->video_device, > &desc, > + &IID_ID3D12VideoDecoderHeap, &ctx->decoder_heap)); > + > + return 0; > + > +fail: > + if (ctx->decoder) { > + av_log(avctx, AV_LOG_ERROR, "D3D12 doesn't support decoding frames > with an extent " > + "[width(%d), height(%d)], on your device!\n", frames_ctx->width, > frames_ctx->height); > + } > + > + return AVERROR(EINVAL); > +} > + > +static int d3d12va_create_decoder(AVCodecContext *avctx) > +{ > + D3D12_VIDEO_DECODER_DESC desc; > + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); > + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); > + AVD3D12VADeviceContext *hwctx = ctx->device_ctx; > + > + D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT feature = { > + .NodeIndex = 0, > + .Configuration = ctx->cfg, > + .Width = frames_ctx->width, > + .Height = frames_ctx->height, > + .DecodeFormat = av_d3d12va_map_sw_to_hw_format(frames_ctx- > >sw_format), > + .FrameRate = { avctx->framerate.num, avctx->framerate.den }, > + .BitRate = avctx->bit_rate, > + }; > + > + DX_CHECK(ID3D12VideoDevice_CheckFeatureSupport(hwctx->video_device, > D3D12_FEATURE_VIDEO_DECODE_SUPPORT, &feature, sizeof(feature))); > + if (!(feature.SupportFlags & D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED) > || > + !(feature.DecodeTier >= D3D12_VIDEO_DECODE_TIER_2)) { > + av_log(avctx, AV_LOG_ERROR, "D3D12 decoder doesn't support on this > device\n"); > + return AVERROR(EINVAL); > + } > + > + desc = (D3D12_VIDEO_DECODER_DESC) { > + .NodeMask = 0, > + .Configuration = ctx->cfg, > + }; > + > + DX_CHECK(ID3D12VideoDevice_CreateVideoDecoder(hwctx->video_device, &desc, > &IID_ID3D12VideoDecoder, &ctx->decoder)); > + > + ctx->decoder_ref = bufref_wrap_interface((IUnknown *)ctx->decoder); > + if (!ctx->decoder_ref) > + return AVERROR(ENOMEM); > + > + return 0; > + > +fail: > + return AVERROR(EINVAL); > +} > + > +static inline int d3d12va_get_num_surfaces(enum AVCodecID codec_id) > +{ > + int num_surfaces = 1; > + switch (codec_id) { > + case AV_CODEC_ID_H264: > + case AV_CODEC_ID_HEVC: > + num_surfaces += 16; > + break; > + > + case AV_CODEC_ID_AV1: > + num_surfaces += 12; > + break; > + > + case AV_CODEC_ID_VP9: > + num_surfaces += 8; > + break; > + > + default: > + num_surfaces += 2; > + } > + > + return num_surfaces; > +} > + > +int ff_d3d12va_common_frame_params(AVCodecContext *avctx, AVBufferRef > *hw_frames_ctx) > +{ > + AVHWFramesContext *frames_ctx = (AVHWFramesContext > *)hw_frames_ctx->data; > + AVHWDeviceContext *device_ctx = frames_ctx->device_ctx; > + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; > + > + frames_ctx->format = AV_PIX_FMT_D3D12; > + frames_ctx->sw_format = avctx->sw_pix_fmt == AV_PIX_FMT_YUV420P10 ? > AV_PIX_FMT_P010 : AV_PIX_FMT_NV12; > + frames_ctx->width = avctx->width; > + frames_ctx->height = avctx->height; > + > + frames_ctx->initial_pool_size = d3d12va_get_num_surfaces(avctx- > >codec_id); > + > + return 0; > +} > + > +int ff_d3d12va_decode_init(AVCodecContext *avctx) > +{ > + int ret; > + UINT bitstream_size; > + AVHWFramesContext *frames_ctx; > + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); > + > + ID3D12CommandAllocator *command_allocator = NULL; > + D3D12_COMMAND_QUEUE_DESC queue_desc = { > + .Type = D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, > + .Priority = 0, > + .Flags = D3D12_COMMAND_QUEUE_FLAG_NONE, > + .NodeMask = 0 > + }; > + > + ctx->pix_fmt = avctx->hwaccel->pix_fmt; > + > + ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_D3D12VA); > + if (ret < 0) > + return ret; > + > + frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); > + ctx->device_ctx = (AVD3D12VADeviceContext *)frames_ctx->device_ctx- > >hwctx; > + > + if (frames_ctx->format != ctx->pix_fmt) { > + av_log(avctx, AV_LOG_ERROR, "Invalid pixfmt for hwaccel!\n"); > + goto fail; > + } > + > + ret = d3d12va_create_decoder(avctx); > + if (ret < 0) > + goto fail; > + > + ret = d3d12va_create_decoder_heap(avctx); > + if (ret < 0) > + goto fail; > + > + ctx->max_num_ref = frames_ctx->initial_pool_size; > + > + bitstream_size = ff_d3d12va_get_suitable_max_bitstream_size(avctx); > + ctx->buffers = av_calloc(sizeof(ID3D12Resource *), ctx->max_num_ref); > + for (int i = 0; i < ctx->max_num_ref; i++) { > + ret = d3d12va_create_buffer(avctx, bitstream_size, &ctx->buffers[i]); > + if (ret < 0) > + goto fail; > + } > + > + ctx->ref_resources = av_calloc(sizeof(ID3D12Resource *), ctx- > >max_num_ref); > + if (!ctx->ref_resources) > + return AVERROR(ENOMEM); > + > + ctx->ref_subresources = av_calloc(sizeof(UINT), ctx->max_num_ref); > + if (!ctx->ref_subresources) > + return AVERROR(ENOMEM); > + > + ctx->allocator_queue = av_fifo_alloc2(ctx->max_num_ref, > sizeof(CommandAllocator), AV_FIFO_FLAG_AUTO_GROW); > + if (!ctx->allocator_queue) > + return AVERROR(ENOMEM); > + > + ret = av_d3d12va_sync_context_alloc(ctx->device_ctx, &ctx->sync_ctx); > + if (ret < 0) > + goto fail; > + > + ret = d3d12va_get_valid_command_allocator(avctx, &command_allocator); > + if (ret < 0) > + goto fail; > + > + DX_CHECK(ID3D12Device_CreateCommandQueue(ctx->device_ctx->device, > &queue_desc, > + &IID_ID3D12CommandQueue, &ctx->command_queue)); > + > + DX_CHECK(ID3D12Device_CreateCommandList(ctx->device_ctx->device, 0, > queue_desc.Type, > + command_allocator, NULL, &IID_ID3D12CommandList, &ctx- > >command_list)); > + > + DX_CHECK(ID3D12VideoDecodeCommandList_Close(ctx->command_list)); > + > + ID3D12CommandQueue_ExecuteCommandLists(ctx->command_queue, 1, > (ID3D12CommandList **)&ctx->command_list); > + > + d3d12va_wait_for_gpu(avctx); > + > + d3d12va_discard_command_allocator(avctx, command_allocator, ctx- > >sync_ctx->fence_value); > + > + return 0; > + > +fail: > + D3D12_OBJECT_RELEASE(command_allocator); > + ff_d3d12va_decode_uninit(avctx); > + > + return AVERROR(EINVAL); > +} > + > +int ff_d3d12va_decode_uninit(AVCodecContext *avctx) > +{ > + int i, num_allocator = 0; > + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); > + CommandAllocator allocator; > + > + if (ctx->sync_ctx) > + d3d12va_wait_for_gpu(avctx); > + > + av_freep(&ctx->ref_resources); > + > + av_freep(&ctx->ref_subresources); > + > + for (i = 0; i < ctx->max_num_ref; i++) > + D3D12_OBJECT_RELEASE(ctx->buffers[i]); > + > + av_freep(&ctx->buffers); > + > + D3D12_OBJECT_RELEASE(ctx->command_list); > + > + D3D12_OBJECT_RELEASE(ctx->command_queue); > + > + if (ctx->allocator_queue) { > + while (av_fifo_read(ctx->allocator_queue, &allocator, 1) >= 0) { > + num_allocator++; > + D3D12_OBJECT_RELEASE(allocator.command_allocator); > + } > + > + av_log(avctx, AV_LOG_VERBOSE, "Total number of command allocators > reused: %d\n", num_allocator); > + } > + > + av_fifo_freep2(&ctx->allocator_queue); > + > + av_d3d12va_sync_context_free(&ctx->sync_ctx); > + > + D3D12_OBJECT_RELEASE(ctx->decoder_heap); > + > + av_buffer_unref(&ctx->decoder_ref); > + > + return 0; > +} > + > +static ID3D12Resource *get_surface(const AVFrame *frame) > +{ > + return (ID3D12Resource *)frame->data[0]; > +} > + > +intptr_t ff_d3d12va_get_surface_index(AVCodecContext *ctx, const AVFrame* > frame) > +{ > + return (intptr_t)frame->data[1]; > +} > + > +static AVD3D12VASyncContext *d3d12va_get_sync_context(const AVFrame *frame) > +{ > + return (AVD3D12VASyncContext *)frame->data[2]; > +} > + > +static int d3d12va_begin_update_reference_frames(AVCodecContext *avctx, > D3D12_RESOURCE_BARRIER *barriers, int index) > +{ > + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); > + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); > + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; > + > + int num_barrier = 0; > + > + for (int i = 0; i < ctx->max_num_ref; i++) { > + if (ctx->ref_resources[i] && ctx->ref_resources[i] != frames_hwctx- > >texture_infos[index].texture) { > + barriers[num_barrier].Type = > D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; > + barriers[num_barrier].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; > + barriers[num_barrier].Transition = > (D3D12_RESOURCE_TRANSITION_BARRIER){ > + .pResource = ctx->ref_resources[i], > + .Subresource = 0, > + .StateBefore = D3D12_RESOURCE_STATE_COMMON, > + .StateAfter = D3D12_RESOURCE_STATE_VIDEO_DECODE_READ, > + }; > + num_barrier++; > + } > + } > + > + return num_barrier; > +} > + > +static void d3d12va_end_update_reference_frames(AVCodecContext *avctx, > D3D12_RESOURCE_BARRIER *barriers, int index) > +{ > + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); > + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); > + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; > + int num_barrier = 0; > + > + for (int i = 0; i < ctx->max_num_ref; i++) { > + if (ctx->ref_resources[i] && ctx->ref_resources[i] != frames_hwctx- > >texture_infos[index].texture) { > + barriers[num_barrier].Transition.pResource = ctx- > >ref_resources[i]; > + barriers[num_barrier].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; > + barriers[num_barrier].Transition.StateBefore = > D3D12_RESOURCE_STATE_VIDEO_DECODE_READ; > + barriers[num_barrier].Transition.StateAfter = > D3D12_RESOURCE_STATE_COMMON; > + num_barrier++; > + } > + } > +} > + > +int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame, > + const void *pp, unsigned pp_size, > + const void *qm, unsigned qm_size, > + int(*update_input_arguments)(AVCodecContext *, > D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *, ID3D12Resource *)) > +{ > + int ret; > + D3D12VADecodeContext *ctx = > D3D12VA_DECODE_CONTEXT(avctx); > + AVHWFramesContext *frames_ctx = > D3D12VA_FRAMES_CONTEXT(avctx); > + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; > + ID3D12CommandAllocator *command_allocator = NULL; > + > + ID3D12Resource *resource = get_surface(frame); > + UINT index = ff_d3d12va_get_surface_index(avctx, frame); > + AVD3D12VASyncContext *sync_ctx = d3d12va_get_sync_context(frame); > + > + ID3D12VideoDecodeCommandList *cmd_list = ctx->command_list; > + D3D12_RESOURCE_BARRIER barriers[D3D12VA_MAX_SURFACES] = { 0 }; > + > + D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS input_args = { > + .NumFrameArguments = 2, > + .FrameArguments = { > + [0] = { > + .Type = D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS, > + .Size = pp_size, > + .pData = (void *)pp, > + }, > + [1] = { > + .Type = > D3D12_VIDEO_DECODE_ARGUMENT_TYPE_INVERSE_QUANTIZATION_MATRIX, > + .Size = qm_size, > + .pData = (void *)qm, > + }, > + }, > + .pHeap = ctx->decoder_heap, > + }; > + > + D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS output_args = { > + .ConversionArguments = 0, > + .OutputSubresource = 0, > + .pOutputTexture2D = resource, > + }; > + > + UINT num_barrier = 1; > + barriers[0] = (D3D12_RESOURCE_BARRIER) { > + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, > + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, > + .Transition = { > + .pResource = resource, > + .Subresource = 0, > + .StateBefore = D3D12_RESOURCE_STATE_COMMON, > + .StateAfter = D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, > + }, > + }; > + > + memset(ctx->ref_resources, 0, sizeof(ID3D12Resource *) * ctx- > >max_num_ref); > + memset(ctx->ref_subresources, 0, sizeof(UINT) * ctx->max_num_ref); > + input_args.ReferenceFrames.NumTexture2Ds = ctx->max_num_ref; > + input_args.ReferenceFrames.ppTexture2Ds = ctx->ref_resources; > + input_args.ReferenceFrames.pSubresources = ctx->ref_subresources; > + > + av_d3d12va_wait_idle(sync_ctx); > + > + if (!qm) > + input_args.NumFrameArguments = 1; > + > + ret = update_input_arguments(avctx, &input_args, ctx->buffers[index]); > + if (ret < 0) > + return ret; > + > + ret = d3d12va_get_valid_command_allocator(avctx, &command_allocator); > + if (ret < 0) > + goto fail; > + > + DX_CHECK(ID3D12CommandAllocator_Reset(command_allocator)); > + > + DX_CHECK(ID3D12VideoDecodeCommandList_Reset(cmd_list, > command_allocator)); > + > + num_barrier += d3d12va_begin_update_reference_frames(avctx, &barriers[1], > index); > + > + ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, num_barrier, > barriers); > + > + ID3D12VideoDecodeCommandList_DecodeFrame(cmd_list, ctx->decoder, > &output_args, &input_args); > + > + barriers[0].Transition.StateBefore = barriers[0].Transition.StateAfter; > + barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; > + d3d12va_end_update_reference_frames(avctx, &barriers[1], index); > + > + ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, num_barrier, > barriers); > + > + DX_CHECK(ID3D12VideoDecodeCommandList_Close(cmd_list)); > + > + ID3D12CommandQueue_ExecuteCommandLists(ctx->command_queue, 1, > (ID3D12CommandList **)&ctx->command_list); > + > + DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, sync_ctx->fence, > ++sync_ctx->fence_value)); > + > + DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, ctx->sync_ctx- > >fence, ++ctx->sync_ctx->fence_value)); > + > + ret = d3d12va_discard_command_allocator(avctx, command_allocator, ctx- > >sync_ctx->fence_value); > + if (ret < 0) > + return ret; > + > + if (ctx->device_ctx->sync) { > + ret = av_d3d12va_wait_idle(ctx->sync_ctx); > + if (ret < 0) > + return ret; > + } > + > + return 0; > + > +fail: > + if (command_allocator) > + d3d12va_discard_command_allocator(avctx, command_allocator, ctx- > >sync_ctx->fence_value); > + return AVERROR(EINVAL); > +} > diff --git a/libavcodec/d3d12va.h b/libavcodec/d3d12va.h > new file mode 100644 > index 0000000000..da3e7b7ab9 > --- /dev/null > +++ b/libavcodec/d3d12va.h > @@ -0,0 +1,184 @@ > +/* > + * Direct3D 12 HW acceleration video decoder > + * > + * copyright (c) 2022-2023 Wu Jianhua <toqsxw@outlook.com> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#ifndef AVCODEC_D3D12VA_H > +#define AVCODEC_D3D12VA_H > + > +#include "libavutil/fifo.h" > +#include "libavutil/hwcontext.h" > +#include "libavutil/hwcontext_d3d12va.h" > +#include "avcodec.h" > +#include "internal.h" > + > +/** > + * @brief This structure is used to provides the necessary configurations and > data > + * to the FFmpeg Direct3D 12 HWAccel implementation for video decoder. > + * > + * The application must make it available as AVCodecContext.hwaccel_context. > + */ Does d3d12va support AVCodecContext.hwaccel_context ? If yes, you should make this file public, modify the structures and remove all ff_ functions, otherwise user can't use these structures in an application. If no, please remove the above comment and better to rename this file to d3d12va_decode.h because the structures and functions in this file are for d3d12va decoders. Thanks Haihao > +typedef struct D3D12VADecodeContext { > + AVBufferRef *decoder_ref; > + > + /** > + * D3D12 video decoder > + */ > + ID3D12VideoDecoder *decoder; > + > + /** > + * D3D12 video decoder heap > + */ > + ID3D12VideoDecoderHeap *decoder_heap; > + > + /** > + * D3D12 configuration used to create the decoder > + * > + * Specified by decoders > + */ > + D3D12_VIDEO_DECODE_CONFIGURATION cfg; > + > + /** > + * A cached queue for reusing the D3D12 command allocators > + * > + * @see > https://learn.microsoft.com/en-us/windows/win32/direct3d12/recording-command-lists-and-bundles#id3d12commandallocator > + */ > + AVFifo *allocator_queue; > + > + /** > + * D3D12 command queue > + */ > + ID3D12CommandQueue *command_queue; > + > + /** > + * D3D12 video decode command list > + */ > + ID3D12VideoDecodeCommandList *command_list; > + > + /** > + * The array of buffer resources used to upload compressed bitstream > + * > + * The buffers.length is the same as D3D12VADecodeContext.max_num_ref > + */ > + ID3D12Resource **buffers; > + > + /** > + * The array of resources used for reference frames > + * > + * The ref_resources.length is the same as > D3D12VADecodeContext.max_num_ref > + */ > + ID3D12Resource **ref_resources; > + > + /** > + * The array of subresources used for reference frames > + * > + * The ref_subresources.length is the same as > D3D12VADecodeContext.max_num_ref > + */ > + UINT *ref_subresources; > + > + /** > + * Maximum number of reference frames > + */ > + UINT max_num_ref; > + > + /** > + * The sync context used to sync command queue > + */ > + AVD3D12VASyncContext *sync_ctx; > + > + /** > + * A pointer to AVD3D12VADeviceContext used to create D3D12 objects > + */ > + AVD3D12VADeviceContext *device_ctx; > + > + /** > + * Pixel format > + */ > + enum AVPixelFormat pix_fmt; > + > + /** > + * Private to the FFmpeg AVHWAccel implementation > + */ > + unsigned report_id; > +} D3D12VADecodeContext; > + > +/** > + * @} > + */ > + > +#define D3D12VA_DECODE_CONTEXT(avctx) ((D3D12VADecodeContext *)((avctx)- > >internal->hwaccel_priv_data)) > +#define D3D12VA_FRAMES_CONTEXT(avctx) ((AVHWFramesContext *)(avctx)- > >hw_frames_ctx->data) > + > +/** > + * @brief Get a suitable maximum bitstream size > + * > + * Creating and destroying a resource on d3d12 needs sync and reallocation, > so use this function > + * to help allocate a big enough bitstream buffer to avoid recreating > resources when decoding. > + * > + * @return the suitable size > + */ > +int ff_d3d12va_get_suitable_max_bitstream_size(AVCodecContext *avctx); > + > +/** > + * @brief init D3D12VADecodeContext > + * > + * @return Error code (ret < 0 if failed) > + */ > +int ff_d3d12va_decode_init(AVCodecContext *avctx); > + > +/** > + * @brief uninit D3D12VADecodeContext > + * > + * @return Error code (ret < 0 if failed) > + */ > +int ff_d3d12va_decode_uninit(AVCodecContext *avctx); > + > +/** > + * @brief d3d12va common frame params > + * > + * @return Error code (ret < 0 if failed) > + */ > +int ff_d3d12va_common_frame_params(AVCodecContext *avctx, AVBufferRef > *hw_frames_ctx); > + > +/** > + * @brief d3d12va common end frame > + * > + * @param avctx codec context > + * @param frame current output frame > + * @param pp picture parameters > + * @param pp_size the size of the picture parameters > + * @param qm quantization matrix > + * @param qm_size the size of the quantization matrix > + * @param callback update decoder-specified input stream arguments > + * @return Error code (ret < 0 if failed) > + */ > +int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame, > + const void *pp, unsigned pp_size, > + const void *qm, unsigned qm_size, > + int(*)(AVCodecContext *, D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *, > ID3D12Resource *)); > + > +/** > + * @brief get surface index > + * > + * @return index > + */ > +intptr_t ff_d3d12va_get_surface_index(AVCodecContext *avctx, const AVFrame > *frame); > + > +#endif /* AVCODEC_D3D12VA_DEC_H */ > diff --git a/libavcodec/d3d12va_h264.c b/libavcodec/d3d12va_h264.c > new file mode 100644 > index 0000000000..0810a034b4 > --- /dev/null > +++ b/libavcodec/d3d12va_h264.c > @@ -0,0 +1,210 @@ > +/* > + * Direct3D 12 h264 HW acceleration > + * > + * copyright (c) 2022-2023 Wu Jianhua <toqsxw@outlook.com> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include "config_components.h" > +#include "libavutil/avassert.h" > +#include "h264dec.h" > +#include "h264data.h" > +#include "h264_ps.h" > +#include "mpegutils.h" > +#include "dxva2_internal.h" > +#include "d3d12va.h" > +#include "libavutil/hwcontext_d3d12va_internal.h" > +#include <dxva.h> > + > +typedef struct H264DecodePictureContext { > + DXVA_PicParams_H264 pp; > + DXVA_Qmatrix_H264 qm; > + unsigned slice_count; > + DXVA_Slice_H264_Short slice_short[MAX_SLICES]; > + const uint8_t *bitstream; > + unsigned bitstream_size; > +} H264DecodePictureContext; > + > +static void fill_slice_short(DXVA_Slice_H264_Short *slice, > + unsigned position, unsigned size) > +{ > + memset(slice, 0, sizeof(*slice)); > + slice->BSNALunitDataLocation = position; > + slice->SliceBytesInBuffer = size; > + slice->wBadSliceChopping = 0; > +} > + > +static int d3d12va_h264_start_frame(AVCodecContext *avctx, > + av_unused const uint8_t *buffer, > + av_unused uint32_t size) > +{ > + const H264Context *h = avctx->priv_data; > + H264DecodePictureContext *ctx_pic = h->cur_pic_ptr- > >hwaccel_picture_private; > + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); > + > + if (!ctx) > + return -1; > + > + assert(ctx_pic); > + > + ff_dxva2_h264_fill_picture_parameters(avctx, (AVDXVAContext *)ctx, > &ctx_pic->pp); > + > + ff_dxva2_h264_fill_scaling_lists(avctx, (AVDXVAContext *)ctx, &ctx_pic- > >qm); > + > + ctx_pic->slice_count = 0; > + ctx_pic->bitstream_size = 0; > + ctx_pic->bitstream = NULL; > + > + return 0; > +} > + > +static int d3d12va_h264_decode_slice(AVCodecContext *avctx, const uint8_t > *buffer, uint32_t size) > +{ > + unsigned position; > + const H264Context *h = avctx->priv_data; > + const H264SliceContext *sl = &h->slice_ctx[0]; > + const H264Picture *current_picture = h->cur_pic_ptr; > + H264DecodePictureContext *ctx_pic = current_picture- > >hwaccel_picture_private; > + > + if (ctx_pic->slice_count >= MAX_SLICES) > + return AVERROR(ERANGE); > + > + if (!ctx_pic->bitstream) > + ctx_pic->bitstream = buffer; > + ctx_pic->bitstream_size += size; > + > + position = buffer - ctx_pic->bitstream; > + fill_slice_short(&ctx_pic->slice_short[ctx_pic->slice_count], position, > size); > + ctx_pic->slice_count++; > + > + if (sl->slice_type != AV_PICTURE_TYPE_I && sl->slice_type != > AV_PICTURE_TYPE_SI) > + ctx_pic->pp.wBitFields &= ~(1 << 15); /* Set IntraPicFlag to 0 */ > + > + return 0; > +} > + > +#define START_CODE 65536 > +#define START_CODE_SIZE 3 > +static int update_input_arguments(AVCodecContext *avctx, > D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *input_args, ID3D12Resource *buffer) > +{ > + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); > + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); > + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; > + > + const H264Context *h = avctx->priv_data; > + const H264Picture *current_picture = h->cur_pic_ptr; > + H264DecodePictureContext *ctx_pic = current_picture- > >hwaccel_picture_private; > + > + int i, index; > + uint8_t *mapped_data, *mapped_ptr; > + DXVA_Slice_H264_Short *slice; > + D3D12_VIDEO_DECODE_FRAME_ARGUMENT *args; > + > + if (FAILED(ID3D12Resource_Map(buffer, 0, NULL, &mapped_data))) { > + av_log(avctx, AV_LOG_ERROR, "Failed to map D3D12 Buffer > resource!\n"); > + return AVERROR(EINVAL); > + } > + > + mapped_ptr = mapped_data; > + for (i = 0; i < ctx_pic->slice_count; i++) { > + UINT position, size; > + slice = &ctx_pic->slice_short[i]; > + > + position = slice->BSNALunitDataLocation; > + size = slice->SliceBytesInBuffer; > + > + slice->SliceBytesInBuffer += START_CODE_SIZE; > + slice->BSNALunitDataLocation = mapped_ptr - mapped_data; > + > + *(uint32_t *)mapped_ptr = START_CODE; > + mapped_ptr += START_CODE_SIZE; > + > + memcpy(mapped_ptr, &ctx_pic->bitstream[position], size); > + mapped_ptr += size; > + } > + > + ID3D12Resource_Unmap(buffer, 0, NULL); > + > + input_args->CompressedBitstream = > (D3D12_VIDEO_DECODE_COMPRESSED_BITSTREAM){ > + .pBuffer = buffer, > + .Offset = 0, > + .Size = mapped_ptr - mapped_data, > + }; > + > + args = &input_args->FrameArguments[input_args->NumFrameArguments++]; > + args->Type = D3D12_VIDEO_DECODE_ARGUMENT_TYPE_SLICE_CONTROL; > + args->Size = sizeof(DXVA_Slice_H264_Short) * ctx_pic->slice_count; > + args->pData = ctx_pic->slice_short; > + > + index = ctx_pic->pp.CurrPic.Index7Bits; > + ctx->ref_resources[index] = frames_hwctx->texture_infos[index].texture; > + for (i = 0; i < FF_ARRAY_ELEMS(ctx_pic->pp.RefFrameList); i++) { > + index = ctx_pic->pp.RefFrameList[i].Index7Bits; > + if (index != 0x7f) > + ctx->ref_resources[index] = frames_hwctx- > >texture_infos[index].texture; > + } > + > + return 0; > +} > + > +static int d3d12va_h264_end_frame(AVCodecContext *avctx) > +{ > + H264Context *h = avctx->priv_data; > + H264DecodePictureContext *ctx_pic = h->cur_pic_ptr- > >hwaccel_picture_private; > + H264SliceContext *sl = &h->slice_ctx[0]; > + > + int ret; > + > + if (ctx_pic->slice_count <= 0 || ctx_pic->bitstream_size <= 0) > + return -1; > + > + ret = ff_d3d12va_common_end_frame(avctx, h->cur_pic_ptr->f, > + &ctx_pic->pp, sizeof(ctx_pic->pp), > + &ctx_pic->qm, sizeof(ctx_pic->qm), > + update_input_arguments); > + if (!ret) > + ff_h264_draw_horiz_band(h, sl, 0, h->avctx->height); > + > + return ret; > +} > + > +static int d3d12va_h264_decode_init(AVCodecContext *avctx) > +{ > + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); > + > + ctx->cfg.DecodeProfile = D3D12_VIDEO_DECODE_PROFILE_H264; > + > + return ff_d3d12va_decode_init(avctx); > +} > + > +#if CONFIG_H264_D3D12VA_HWACCEL > +const AVHWAccel ff_h264_d3d12va_hwaccel = { > + .name = "h264_d3d12va", > + .type = AVMEDIA_TYPE_VIDEO, > + .id = AV_CODEC_ID_H264, > + .pix_fmt = AV_PIX_FMT_D3D12, > + .init = d3d12va_h264_decode_init, > + .uninit = ff_d3d12va_decode_uninit, > + .start_frame = d3d12va_h264_start_frame, > + .decode_slice = d3d12va_h264_decode_slice, > + .end_frame = d3d12va_h264_end_frame, > + .frame_params = ff_d3d12va_common_frame_params, > + .frame_priv_data_size = sizeof(H264DecodePictureContext), > + .priv_data_size = sizeof(D3D12VADecodeContext), > +}; > +#endif > diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c > index 568d686f39..b22ea3e8f2 100644 > --- a/libavcodec/dxva2.c > +++ b/libavcodec/dxva2.c > @@ -774,6 +774,10 @@ unsigned ff_dxva2_get_surface_index(const AVCodecContext > *avctx, > void *surface = get_surface(avctx, frame); > unsigned i; > > +#if CONFIG_D3D12VA > + if (avctx->pix_fmt == AV_PIX_FMT_D3D12) > + return (intptr_t)frame->data[1]; > +#endif > #if CONFIG_D3D11VA > if (avctx->pix_fmt == AV_PIX_FMT_D3D11) > return (intptr_t)frame->data[1]; > @@ -1056,3 +1060,23 @@ int ff_dxva2_is_d3d11(const AVCodecContext *avctx) > else > return 0; > } > + > +unsigned *ff_dxva2_get_report_id(const AVCodecContext *avctx, AVDXVAContext > *ctx) > +{ > + unsigned *report_id = NULL; > + > +#if CONFIG_D3D12VA > + if (avctx->pix_fmt == AV_PIX_FMT_D3D12) > + report_id = &ctx->d3d12va.report_id; > +#endif > +#if CONFIG_D3D11VA > + if (ff_dxva2_is_d3d11(avctx)) > + report_id = &ctx->d3d11va.report_id; > +#endif > +#if CONFIG_DXVA2 > + if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) > + report_id = &ctx->dxva2.report_id; > +#endif > + > + return report_id; > +} > diff --git a/libavcodec/dxva2.h b/libavcodec/dxva2.h > index 22c93992f2..bdec6112e9 100644 > --- a/libavcodec/dxva2.h > +++ b/libavcodec/dxva2.h > @@ -45,9 +45,6 @@ > * @{ > */ > > -#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work around for DXVA2 > and old UVD/UVD+ ATI video cards > -#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 ///< Work around for DXVA2 > and old Intel GPUs with ClearVideo interface > - > /** > * This structure is used to provides the necessary configurations and data > * to the DXVA2 FFmpeg HWAccel implementation. > diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c > index 6300b1418d..7a076ea981 100644 > --- a/libavcodec/dxva2_h264.c > +++ b/libavcodec/dxva2_h264.c > @@ -47,9 +47,10 @@ static void fill_picture_entry(DXVA_PicEntry_H264 *pic, > pic->bPicEntry = index | (flag << 7); > } > > -static void fill_picture_parameters(const AVCodecContext *avctx, > AVDXVAContext *ctx, const H264Context *h, > +void ff_dxva2_h264_fill_picture_parameters(const AVCodecContext *avctx, > AVDXVAContext *ctx, > DXVA_PicParams_H264 *pp) > { > + const H264Context *h = avctx->priv_data; > const H264Picture *current_picture = h->cur_pic_ptr; > const SPS *sps = h->ps.sps; > const PPS *pps = h->ps.pps; > @@ -163,9 +164,10 @@ static void fill_picture_parameters(const AVCodecContext > *avctx, AVDXVAContext * > //pp->SliceGroupMap[810]; /* XXX not implemented by FFmpeg > */ > } > > -static void fill_scaling_lists(const AVCodecContext *avctx, AVDXVAContext > *ctx, const H264Context *h, DXVA_Qmatrix_H264 *qm) > +void ff_dxva2_h264_fill_scaling_lists(const AVCodecContext *avctx, > AVDXVAContext *ctx, DXVA_Qmatrix_H264 *qm) > { > - const PPS *pps = h->ps.pps; > + const H264Context *h = avctx->priv_data; > + const PPS *pps = h->ps.pps; > unsigned i, j; > memset(qm, 0, sizeof(*qm)); > if (DXVA_CONTEXT_WORKAROUND(avctx, ctx) & > FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG) { > @@ -453,10 +455,10 @@ static int dxva2_h264_start_frame(AVCodecContext *avctx, > assert(ctx_pic); > > /* Fill up DXVA_PicParams_H264 */ > - fill_picture_parameters(avctx, ctx, h, &ctx_pic->pp); > + ff_dxva2_h264_fill_picture_parameters(avctx, ctx, &ctx_pic->pp); > > /* Fill up DXVA_Qmatrix_H264 */ > - fill_scaling_lists(avctx, ctx, h, &ctx_pic->qm); > + ff_dxva2_h264_fill_scaling_lists(avctx, ctx, &ctx_pic->qm); > > ctx_pic->slice_count = 0; > ctx_pic->bitstream_size = 0; > diff --git a/libavcodec/dxva2_internal.h b/libavcodec/dxva2_internal.h > index b822af59cd..a9a1fc090e 100644 > --- a/libavcodec/dxva2_internal.h > +++ b/libavcodec/dxva2_internal.h > @@ -26,18 +26,34 @@ > #define COBJMACROS > > #include "config.h" > +#include "config_components.h" > > /* define the proper COM entries before forcing desktop APIs */ > #include <objbase.h> > > +#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work around for > DXVA2/Direct3D11 and old UVD/UVD+ ATI video cards > +#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 ///< Work around for > DXVA2/Direct3D11 and old Intel GPUs with ClearVideo interface > + > #if CONFIG_DXVA2 > #include "dxva2.h" > #include "libavutil/hwcontext_dxva2.h" > +#define DXVA2_VAR(ctx, var) ctx->dxva2.var > +#else > +#define DXVA2_VAR(ctx, var) 0 > #endif > + > #if CONFIG_D3D11VA > #include "d3d11va.h" > #include "libavutil/hwcontext_d3d11va.h" > +#define D3D11VA_VAR(ctx, var) ctx->d3d11va.var > +#else > +#define D3D11VA_VAR(ctx, var) 0 > +#endif > + > +#if CONFIG_D3D12VA > +#include "d3d12va.h" > #endif > + > #if HAVE_DXVA_H > /* When targeting WINAPI_FAMILY_PHONE_APP or WINAPI_FAMILY_APP, dxva.h > * defines nothing. Force the struct definitions to be visible. */ > @@ -62,6 +78,9 @@ typedef union { > #if CONFIG_DXVA2 > struct dxva_context dxva2; > #endif > +#if CONFIG_D3D12VA > + struct D3D12VADecodeContext d3d12va; > +#endif > } AVDXVAContext; > > typedef struct FFDXVASharedContext { > @@ -101,39 +120,19 @@ typedef struct FFDXVASharedContext { > #define D3D11VA_CONTEXT(ctx) (&ctx->d3d11va) > #define DXVA2_CONTEXT(ctx) (&ctx->dxva2) > > -#if CONFIG_D3D11VA && CONFIG_DXVA2 > -#define DXVA_CONTEXT_WORKAROUND(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? > ctx->d3d11va.workaround : ctx->dxva2.workaround) > -#define DXVA_CONTEXT_COUNT(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? > ctx->d3d11va.surface_count : ctx->dxva2.surface_count) > -#define DXVA_CONTEXT_DECODER(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? > (void *)ctx->d3d11va.decoder : (void *)ctx->dxva2.decoder) > -#define DXVA_CONTEXT_REPORT_ID(avctx, ctx) (*(ff_dxva2_is_d3d11(avctx) ? > &ctx->d3d11va.report_id : &ctx->dxva2.report_id)) > -#define DXVA_CONTEXT_CFG(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? > (void *)ctx->d3d11va.cfg : (void *)ctx->dxva2.cfg) > -#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? > ctx->d3d11va.cfg->ConfigBitstreamRaw : ctx->dxva2.cfg->ConfigBitstreamRaw) > -#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? > ctx->d3d11va.cfg->ConfigIntraResidUnsigned : ctx->dxva2.cfg- > >ConfigIntraResidUnsigned) > -#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? > ctx->d3d11va.cfg->ConfigResidDiffAccelerator : ctx->dxva2.cfg- > >ConfigResidDiffAccelerator) > +#define DXVA2_CONTEXT_VAR(avctx, ctx, var) (avctx->pix_fmt == > AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? D3D11VA_VAR(ctx, var) : > DXVA2_VAR(ctx, var))) > + > +#define DXVA_CONTEXT_REPORT_ID(avctx, ctx) > (*ff_dxva2_get_report_id(avctx, ctx)) > +#define DXVA_CONTEXT_WORKAROUND(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx, > workaround) > +#define DXVA_CONTEXT_COUNT(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx, > surface_count) > +#define DXVA_CONTEXT_DECODER(avctx, ctx) (avctx->pix_fmt == > AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? (void *)D3D11VA_VAR(ctx, > decoder) : (void *)DXVA2_VAR(ctx, decoder))) > +#define DXVA_CONTEXT_CFG(avctx, ctx) (avctx->pix_fmt == > AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? (void *)D3D11VA_VAR(ctx, > cfg) : (void *)DXVA2_VAR(ctx, cfg))) > +#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx, > cfg->ConfigBitstreamRaw) > +#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx, > cfg->ConfigIntraResidUnsigned) > +#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx, > cfg->ConfigResidDiffAccelerator) > #define DXVA_CONTEXT_VALID(avctx, ctx) (DXVA_CONTEXT_DECODER(avctx, > ctx) && \ > DXVA_CONTEXT_CFG(avctx, > ctx) && \ > - (ff_dxva2_is_d3d11(avctx) || > ctx->dxva2.surface_count)) > -#elif CONFIG_DXVA2 > -#define DXVA_CONTEXT_WORKAROUND(avctx, ctx) (ctx->dxva2.workaround) > -#define DXVA_CONTEXT_COUNT(avctx, ctx) (ctx->dxva2.surface_count) > -#define DXVA_CONTEXT_DECODER(avctx, ctx) (ctx->dxva2.decoder) > -#define DXVA_CONTEXT_REPORT_ID(avctx, ctx) (*(&ctx->dxva2.report_id)) > -#define DXVA_CONTEXT_CFG(avctx, ctx) (ctx->dxva2.cfg) > -#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx) (ctx->dxva2.cfg- > >ConfigBitstreamRaw) > -#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ctx->dxva2.cfg- > >ConfigIntraResidUnsigned) > -#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ctx->dxva2.cfg- > >ConfigResidDiffAccelerator) > -#define DXVA_CONTEXT_VALID(avctx, ctx) (ctx->dxva2.decoder && ctx- > >dxva2.cfg && ctx->dxva2.surface_count) > -#elif CONFIG_D3D11VA > -#define DXVA_CONTEXT_WORKAROUND(avctx, ctx) (ctx->d3d11va.workaround) > -#define DXVA_CONTEXT_COUNT(avctx, ctx) (ctx->d3d11va.surface_count) > -#define DXVA_CONTEXT_DECODER(avctx, ctx) (ctx->d3d11va.decoder) > -#define DXVA_CONTEXT_REPORT_ID(avctx, ctx) (*(&ctx->d3d11va.report_id)) > -#define DXVA_CONTEXT_CFG(avctx, ctx) (ctx->d3d11va.cfg) > -#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx) (ctx->d3d11va.cfg- > >ConfigBitstreamRaw) > -#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ctx->d3d11va.cfg- > >ConfigIntraResidUnsigned) > -#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ctx->d3d11va.cfg- > >ConfigResidDiffAccelerator) > -#define DXVA_CONTEXT_VALID(avctx, ctx) (ctx->d3d11va.decoder && ctx- > >d3d11va.cfg) > -#endif > + (ff_dxva2_is_d3d11(avctx) || > DXVA2_VAR(ctx, surface_count))) > > unsigned ff_dxva2_get_surface_index(const AVCodecContext *avctx, > const AVDXVAContext *, > @@ -161,4 +160,10 @@ int ff_dxva2_common_frame_params(AVCodecContext *avctx, > > int ff_dxva2_is_d3d11(const AVCodecContext *avctx); > > +unsigned *ff_dxva2_get_report_id(const AVCodecContext *avctx, AVDXVAContext > *ctx); > + > +void ff_dxva2_h264_fill_picture_parameters(const AVCodecContext *avctx, > AVDXVAContext *ctx, DXVA_PicParams_H264 *pp); > + > +void ff_dxva2_h264_fill_scaling_lists(const AVCodecContext *avctx, > AVDXVAContext *ctx, DXVA_Qmatrix_H264 *qm); > + > #endif /* AVCODEC_DXVA2_INTERNAL_H */ > diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c > index 41bf30eefc..df70ad8a2f 100644 > --- a/libavcodec/h264_slice.c > +++ b/libavcodec/h264_slice.c > @@ -778,6 +778,7 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, > int force_callback) > { > #define HWACCEL_MAX (CONFIG_H264_DXVA2_HWACCEL + \ > (CONFIG_H264_D3D11VA_HWACCEL * 2) + \ > + CONFIG_H264_D3D12VA_HWACCEL + \ > CONFIG_H264_NVDEC_HWACCEL + \ > CONFIG_H264_VAAPI_HWACCEL + \ > CONFIG_H264_VIDEOTOOLBOX_HWACCEL + \ > @@ -883,6 +884,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, > int force_callback) > *fmt++ = AV_PIX_FMT_D3D11VA_VLD; > *fmt++ = AV_PIX_FMT_D3D11; > #endif > +#if CONFIG_H264_D3D12VA_HWACCEL > + *fmt++ = AV_PIX_FMT_D3D12; > +#endif > #if CONFIG_H264_VAAPI_HWACCEL > *fmt++ = AV_PIX_FMT_VAAPI; > #endif > diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c > index 19f8dba131..853d3262f7 100644 > --- a/libavcodec/h264dec.c > +++ b/libavcodec/h264dec.c > @@ -1089,6 +1089,9 @@ const FFCodec ff_h264_decoder = { > #if CONFIG_H264_D3D11VA2_HWACCEL > HWACCEL_D3D11VA2(h264), > #endif > +#if CONFIG_H264_D3D12VA_HWACCEL > + HWACCEL_D3D12VA(h264), > +#endif > #if CONFIG_H264_NVDEC_HWACCEL > HWACCEL_NVDEC(h264), > #endif > diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h > index 48dfc17f72..be54604b81 100644 > --- a/libavcodec/hwaccels.h > +++ b/libavcodec/hwaccels.h > @@ -32,6 +32,7 @@ extern const AVHWAccel ff_h263_vaapi_hwaccel; > extern const AVHWAccel ff_h263_videotoolbox_hwaccel; > extern const AVHWAccel ff_h264_d3d11va_hwaccel; > extern const AVHWAccel ff_h264_d3d11va2_hwaccel; > +extern const AVHWAccel ff_h264_d3d12va_hwaccel; > extern const AVHWAccel ff_h264_dxva2_hwaccel; > extern const AVHWAccel ff_h264_nvdec_hwaccel; > extern const AVHWAccel ff_h264_vaapi_hwaccel; > diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h > index e8c6186151..e20118c096 100644 > --- a/libavcodec/hwconfig.h > +++ b/libavcodec/hwconfig.h > @@ -82,6 +82,8 @@ void ff_hwaccel_uninit(AVCodecContext *avctx); > HW_CONFIG_HWACCEL(1, 1, 1, VULKAN, VULKAN, ff_ ## codec ## > _vulkan_hwaccel) > #define HWACCEL_D3D11VA(codec) \ > HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD, NONE, ff_ ## codec ## > _d3d11va_hwaccel) > +#define HWACCEL_D3D12VA(codec) \ > + HW_CONFIG_HWACCEL(1, 1, 0, D3D12, D3D12VA, ff_ ## codec ## > _d3d12va_hwaccel) > > #define HW_CONFIG_ENCODER(device, frames, ad_hoc, format, device_type_) \ > &(const AVCodecHWConfigInternal) { \
>On Vr, 2023-06-02 at 16:06 +0800, Tong Wu wrote: >> From: Wu Jianhua <toqsxw@outlook.com> >> >> The implementation is based on: >> https://learn.microsoft.com/en-us/windows/win32/medfound/direct3d-12- >video-overview >> >> With the Direct3D 12 video decoding support, we can render or process >> the decoded images by the pixel shaders or compute shaders directly >> without the extra copy overhead, which is beneficial especially if you >> are trying to render or post-process a 4K or 8K video. >> >> The command below is how to enable d3d12va: >> ffmpeg -hwaccel d3d12va -i input.mp4 output.mp4 >> >> Signed-off-by: Wu Jianhua <toqsxw@outlook.com> >> Signed-off-by: Tong Wu <tong1.wu@intel.com> >> --- >> configure | 2 + >> libavcodec/Makefile | 3 + >> libavcodec/d3d11va.h | 3 - >> libavcodec/d3d12va.c | 552 ++++++++++++++++++++++++++++++++++++ >> libavcodec/d3d12va.h | 184 ++++++++++++ >> libavcodec/d3d12va_h264.c | 210 ++++++++++++++ >> libavcodec/dxva2.c | 24 ++ >> libavcodec/dxva2.h | 3 - >> libavcodec/dxva2_h264.c | 12 +- >> libavcodec/dxva2_internal.h | 67 +++-- >> libavcodec/h264_slice.c | 4 + >> libavcodec/h264dec.c | 3 + >> libavcodec/hwaccels.h | 1 + >> libavcodec/hwconfig.h | 2 + >> 14 files changed, 1028 insertions(+), 42 deletions(-) >> create mode 100644 libavcodec/d3d12va.c >> create mode 100644 libavcodec/d3d12va.h >> create mode 100644 libavcodec/d3d12va_h264.c >> >> diff --git a/configure b/configure >> index b86064e36f..f5dad4653f 100755 >> --- a/configure >> +++ b/configure >> @@ -3033,6 +3033,8 @@ h264_d3d11va_hwaccel_deps="d3d11va" >> h264_d3d11va_hwaccel_select="h264_decoder" >> h264_d3d11va2_hwaccel_deps="d3d11va" >> h264_d3d11va2_hwaccel_select="h264_decoder" >> +h264_d3d12va_hwaccel_deps="d3d12va" >> +h264_d3d12va_hwaccel_select="h264_decoder" >> h264_dxva2_hwaccel_deps="dxva2" >> h264_dxva2_hwaccel_select="h264_decoder" >> h264_nvdec_hwaccel_deps="nvdec" >> diff --git a/libavcodec/Makefile b/libavcodec/Makefile >> index 9aacc1d477..ae143d8821 100644 >> --- a/libavcodec/Makefile >> +++ b/libavcodec/Makefile >> @@ -977,6 +977,7 @@ OBJS-$(CONFIG_ADPCM_ZORK_DECODER) += >adpcm.o >> adpcm_data.o >> >> # hardware accelerators >> OBJS-$(CONFIG_D3D11VA) += dxva2.o >> +OBJS-$(CONFIG_D3D12VA) += dxva2.o d3d12va.o >> OBJS-$(CONFIG_DXVA2) += dxva2.o >> OBJS-$(CONFIG_NVDEC) += nvdec.o >> OBJS-$(CONFIG_VAAPI) += vaapi_decode.o >> @@ -994,6 +995,7 @@ OBJS-$(CONFIG_H263_VAAPI_HWACCEL) += >vaapi_mpeg4.o >> OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o >> OBJS-$(CONFIG_H264_D3D11VA_HWACCEL) += dxva2_h264.o >> OBJS-$(CONFIG_H264_DXVA2_HWACCEL) += dxva2_h264.o >> +OBJS-$(CONFIG_H264_D3D12VA_HWACCEL) += dxva2_h264.o >d3d12va_h264.o >> OBJS-$(CONFIG_H264_NVDEC_HWACCEL) += nvdec_h264.o >> OBJS-$(CONFIG_H264_QSV_HWACCEL) += qsvdec.o >> OBJS-$(CONFIG_H264_VAAPI_HWACCEL) += vaapi_h264.o >> @@ -1277,6 +1279,7 @@ SKIPHEADERS += >> %_tablegen.h \ >> >> SKIPHEADERS-$(CONFIG_AMF) += amfenc.h >> SKIPHEADERS-$(CONFIG_D3D11VA) += d3d11va.h dxva2_internal.h >> +SKIPHEADERS-$(CONFIG_D3D12VA) += d3d12va.h >> SKIPHEADERS-$(CONFIG_DXVA2) += dxva2.h dxva2_internal.h >> SKIPHEADERS-$(CONFIG_JNI) += ffjni.h >> SKIPHEADERS-$(CONFIG_LCMS2) += fflcms2.h >> diff --git a/libavcodec/d3d11va.h b/libavcodec/d3d11va.h >> index 6816b6c1e6..27f40e5519 100644 >> --- a/libavcodec/d3d11va.h >> +++ b/libavcodec/d3d11va.h >> @@ -45,9 +45,6 @@ >> * @{ >> */ >> >> -#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work >around for >> Direct3D11 and old UVD/UVD+ ATI video cards >> -#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 ///< Work >around for >> Direct3D11 and old Intel GPUs with ClearVideo interface >> - >> /** >> * This structure is used to provides the necessary configurations and data >> * to the Direct3D11 FFmpeg HWAccel implementation. >> diff --git a/libavcodec/d3d12va.c b/libavcodec/d3d12va.c >> new file mode 100644 >> index 0000000000..7f1fab7251 >> --- /dev/null >> +++ b/libavcodec/d3d12va.c >> @@ -0,0 +1,552 @@ >> +/* >> + * Direct3D 12 HW acceleration video decoder >> + * >> + * copyright (c) 2022-2023 Wu Jianhua <toqsxw@outlook.com> >> + * >> + * This file is part of FFmpeg. >> + * >> + * FFmpeg is free software; you can redistribute it and/or >> + * modify it under the terms of the GNU Lesser General Public >> + * License as published by the Free Software Foundation; either >> + * version 2.1 of the License, or (at your option) any later version. >> + * >> + * FFmpeg is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >GNU >> + * Lesser General Public License for more details. >> + * >> + * You should have received a copy of the GNU Lesser General Public >> + * License along with FFmpeg; if not, write to the Free Software >> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 >> USA >> + */ >> + >> +#include <assert.h> >> +#include <string.h> >> +#include <initguid.h> >> + >> +#include "libavutil/common.h" >> +#include "libavutil/log.h" >> +#include "libavutil/time.h" >> +#include "libavutil/imgutils.h" >> +#include "libavutil/hwcontext_d3d12va_internal.h" >> +#include "libavutil/hwcontext_d3d12va.h" >> +#include "avcodec.h" >> +#include "decode.h" >> +#include "d3d12va.h" >> + >> +typedef struct CommandAllocator { >> + ID3D12CommandAllocator *command_allocator; >> + uint64_t fence_value; >> +} CommandAllocator; >> + >> +int ff_d3d12va_get_suitable_max_bitstream_size(AVCodecContext *avctx) >> +{ >> + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); >> + return av_image_get_buffer_size(frames_ctx->sw_format, avctx- >> >coded_width, avctx->coded_height, 1); >> +} >> + >> +static int d3d12va_get_valid_command_allocator(AVCodecContext *avctx, >> ID3D12CommandAllocator **ppAllocator) >> +{ >> + HRESULT hr; >> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); >> + CommandAllocator allocator; >> + >> + if (av_fifo_peek(ctx->allocator_queue, &allocator, 1, 0) >= 0) { >> + uint64_t completion = ID3D12Fence_GetCompletedValue(ctx- >>sync_ctx- >> >fence); >> + if (completion >= allocator.fence_value) { >> + *ppAllocator = allocator.command_allocator; >> + av_fifo_read(ctx->allocator_queue, &allocator, 1); >> + return 0; >> + } >> + } >> + >> + hr = ID3D12Device_CreateCommandAllocator(ctx->device_ctx->device, >> D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, >> + &IID_ID3D12CommandAllocator, ppAllocator); >> + if (FAILED(hr)) { >> + av_log(avctx, AV_LOG_ERROR, "Failed to create a new command >> allocator!\n"); >> + return AVERROR(EINVAL); >> + } >> + >> + return 0; >> +} >> + >> +static int d3d12va_discard_command_allocator(AVCodecContext *avctx, >> ID3D12CommandAllocator *pAllocator, uint64_t fence_value) >> +{ >> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); >> + >> + CommandAllocator allocator = { >> + .command_allocator = pAllocator, >> + .fence_value = fence_value >> + }; >> + >> + if (av_fifo_write(ctx->allocator_queue, &allocator, 1) < 0) { >> + D3D12_OBJECT_RELEASE(pAllocator); >> + return AVERROR(ENOMEM); >> + } >> + >> + return 0; >> +} >> + >> +static void bufref_free_interface(void *opaque, uint8_t *data) >> +{ >> + D3D12_OBJECT_RELEASE(opaque); >> +} >> + >> +static AVBufferRef *bufref_wrap_interface(IUnknown *iface) >> +{ >> + return av_buffer_create((uint8_t*)iface, 1, bufref_free_interface, iface, >> 0); >> +} >> + >> +static int d3d12va_create_buffer(AVCodecContext *avctx, UINT size, >> ID3D12Resource **ppResouce) >> +{ >> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); >> + >> + D3D12_HEAP_PROPERTIES heap_props = { .Type = >D3D12_HEAP_TYPE_UPLOAD }; >> + >> + D3D12_RESOURCE_DESC desc = { >> + .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, >> + .Alignment = >D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, >> + .Width = size, >> + .Height = 1, >> + .DepthOrArraySize = 1, >> + .MipLevels = 1, >> + .Format = DXGI_FORMAT_UNKNOWN, >> + .SampleDesc = { .Count = 1, .Quality = 0 }, >> + .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, >> + .Flags = D3D12_RESOURCE_FLAG_NONE, >> + }; >> + >> + HRESULT hr = ID3D12Device_CreateCommittedResource(ctx->device_ctx- >> >device, &heap_props, D3D12_HEAP_FLAG_NONE, >> + &desc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, >&IID_ID3D12Resource, >> ppResouce); >> + >> + if (FAILED(hr)) { >> + av_log(avctx, AV_LOG_ERROR, "Failed to create d3d12 buffer.\n"); >> + return AVERROR(EINVAL); >> + } >> + >> + return 0; >> +} >> + >> +static int d3d12va_wait_for_gpu(AVCodecContext *avctx) >> +{ >> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); >> + AVD3D12VASyncContext *sync_ctx = ctx->sync_ctx; >> + >> + return av_d3d12va_wait_queue_idle(sync_ctx, ctx->command_queue); >> +} >> + >> +static int d3d12va_create_decoder_heap(AVCodecContext *avctx) >> +{ >> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); >> + AVHWFramesContext *frames_ctx = >D3D12VA_FRAMES_CONTEXT(avctx); >> + AVD3D12VADeviceContext *hwctx = ctx->device_ctx; >> + >> + D3D12_VIDEO_DECODER_HEAP_DESC desc = { >> + .NodeMask = 0, >> + .Configuration = ctx->cfg, >> + .DecodeWidth = frames_ctx->width, >> + .DecodeHeight = frames_ctx->height, >> + .Format = av_d3d12va_map_sw_to_hw_format(frames_ctx- >> >sw_format), >> + .FrameRate = { avctx->framerate.num, avctx->framerate.den }, >> + .BitRate = avctx->bit_rate, >> + .MaxDecodePictureBufferCount = frames_ctx->initial_pool_size, >> + }; >> + >> + DX_CHECK(ID3D12VideoDevice_CreateVideoDecoderHeap(hwctx- >>video_device, >> &desc, >> + &IID_ID3D12VideoDecoderHeap, &ctx->decoder_heap)); >> + >> + return 0; >> + >> +fail: >> + if (ctx->decoder) { >> + av_log(avctx, AV_LOG_ERROR, "D3D12 doesn't support decoding >frames >> with an extent " >> + "[width(%d), height(%d)], on your device!\n", frames_ctx->width, >> frames_ctx->height); >> + } >> + >> + return AVERROR(EINVAL); >> +} >> + >> +static int d3d12va_create_decoder(AVCodecContext *avctx) >> +{ >> + D3D12_VIDEO_DECODER_DESC desc; >> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); >> + AVHWFramesContext *frames_ctx = >D3D12VA_FRAMES_CONTEXT(avctx); >> + AVD3D12VADeviceContext *hwctx = ctx->device_ctx; >> + >> + D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT feature = { >> + .NodeIndex = 0, >> + .Configuration = ctx->cfg, >> + .Width = frames_ctx->width, >> + .Height = frames_ctx->height, >> + .DecodeFormat = av_d3d12va_map_sw_to_hw_format(frames_ctx- >> >sw_format), >> + .FrameRate = { avctx->framerate.num, avctx->framerate.den }, >> + .BitRate = avctx->bit_rate, >> + }; >> + >> + DX_CHECK(ID3D12VideoDevice_CheckFeatureSupport(hwctx- >>video_device, >> D3D12_FEATURE_VIDEO_DECODE_SUPPORT, &feature, sizeof(feature))); >> + if (!(feature.SupportFlags & >D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED) >> || >> + !(feature.DecodeTier >= D3D12_VIDEO_DECODE_TIER_2)) { >> + av_log(avctx, AV_LOG_ERROR, "D3D12 decoder doesn't support on >this >> device\n"); >> + return AVERROR(EINVAL); >> + } >> + >> + desc = (D3D12_VIDEO_DECODER_DESC) { >> + .NodeMask = 0, >> + .Configuration = ctx->cfg, >> + }; >> + >> + DX_CHECK(ID3D12VideoDevice_CreateVideoDecoder(hwctx- >>video_device, &desc, >> &IID_ID3D12VideoDecoder, &ctx->decoder)); >> + >> + ctx->decoder_ref = bufref_wrap_interface((IUnknown *)ctx->decoder); >> + if (!ctx->decoder_ref) >> + return AVERROR(ENOMEM); >> + >> + return 0; >> + >> +fail: >> + return AVERROR(EINVAL); >> +} >> + >> +static inline int d3d12va_get_num_surfaces(enum AVCodecID codec_id) >> +{ >> + int num_surfaces = 1; >> + switch (codec_id) { >> + case AV_CODEC_ID_H264: >> + case AV_CODEC_ID_HEVC: >> + num_surfaces += 16; >> + break; >> + >> + case AV_CODEC_ID_AV1: >> + num_surfaces += 12; >> + break; >> + >> + case AV_CODEC_ID_VP9: >> + num_surfaces += 8; >> + break; >> + >> + default: >> + num_surfaces += 2; >> + } >> + >> + return num_surfaces; >> +} >> + >> +int ff_d3d12va_common_frame_params(AVCodecContext *avctx, >AVBufferRef >> *hw_frames_ctx) >> +{ >> + AVHWFramesContext *frames_ctx = (AVHWFramesContext >> *)hw_frames_ctx->data; >> + AVHWDeviceContext *device_ctx = frames_ctx->device_ctx; >> + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; >> + >> + frames_ctx->format = AV_PIX_FMT_D3D12; >> + frames_ctx->sw_format = avctx->sw_pix_fmt == >AV_PIX_FMT_YUV420P10 ? >> AV_PIX_FMT_P010 : AV_PIX_FMT_NV12; >> + frames_ctx->width = avctx->width; >> + frames_ctx->height = avctx->height; >> + >> + frames_ctx->initial_pool_size = d3d12va_get_num_surfaces(avctx- >> >codec_id); >> + >> + return 0; >> +} >> + >> +int ff_d3d12va_decode_init(AVCodecContext *avctx) >> +{ >> + int ret; >> + UINT bitstream_size; >> + AVHWFramesContext *frames_ctx; >> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); >> + >> + ID3D12CommandAllocator *command_allocator = NULL; >> + D3D12_COMMAND_QUEUE_DESC queue_desc = { >> + .Type = D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, >> + .Priority = 0, >> + .Flags = D3D12_COMMAND_QUEUE_FLAG_NONE, >> + .NodeMask = 0 >> + }; >> + >> + ctx->pix_fmt = avctx->hwaccel->pix_fmt; >> + >> + ret = ff_decode_get_hw_frames_ctx(avctx, >AV_HWDEVICE_TYPE_D3D12VA); >> + if (ret < 0) >> + return ret; >> + >> + frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); >> + ctx->device_ctx = (AVD3D12VADeviceContext *)frames_ctx->device_ctx- >> >hwctx; >> + >> + if (frames_ctx->format != ctx->pix_fmt) { >> + av_log(avctx, AV_LOG_ERROR, "Invalid pixfmt for hwaccel!\n"); >> + goto fail; >> + } >> + >> + ret = d3d12va_create_decoder(avctx); >> + if (ret < 0) >> + goto fail; >> + >> + ret = d3d12va_create_decoder_heap(avctx); >> + if (ret < 0) >> + goto fail; >> + >> + ctx->max_num_ref = frames_ctx->initial_pool_size; >> + >> + bitstream_size = ff_d3d12va_get_suitable_max_bitstream_size(avctx); >> + ctx->buffers = av_calloc(sizeof(ID3D12Resource *), ctx->max_num_ref); >> + for (int i = 0; i < ctx->max_num_ref; i++) { >> + ret = d3d12va_create_buffer(avctx, bitstream_size, &ctx->buffers[i]); >> + if (ret < 0) >> + goto fail; >> + } >> + >> + ctx->ref_resources = av_calloc(sizeof(ID3D12Resource *), ctx- >> >max_num_ref); >> + if (!ctx->ref_resources) >> + return AVERROR(ENOMEM); >> + >> + ctx->ref_subresources = av_calloc(sizeof(UINT), ctx->max_num_ref); >> + if (!ctx->ref_subresources) >> + return AVERROR(ENOMEM); >> + >> + ctx->allocator_queue = av_fifo_alloc2(ctx->max_num_ref, >> sizeof(CommandAllocator), AV_FIFO_FLAG_AUTO_GROW); >> + if (!ctx->allocator_queue) >> + return AVERROR(ENOMEM); >> + >> + ret = av_d3d12va_sync_context_alloc(ctx->device_ctx, &ctx->sync_ctx); >> + if (ret < 0) >> + goto fail; >> + >> + ret = d3d12va_get_valid_command_allocator(avctx, >&command_allocator); >> + if (ret < 0) >> + goto fail; >> + >> + DX_CHECK(ID3D12Device_CreateCommandQueue(ctx->device_ctx- >>device, >> &queue_desc, >> + &IID_ID3D12CommandQueue, &ctx->command_queue)); >> + >> + DX_CHECK(ID3D12Device_CreateCommandList(ctx->device_ctx->device, >0, >> queue_desc.Type, >> + command_allocator, NULL, &IID_ID3D12CommandList, &ctx- >> >command_list)); >> + >> + DX_CHECK(ID3D12VideoDecodeCommandList_Close(ctx- >>command_list)); >> + >> + ID3D12CommandQueue_ExecuteCommandLists(ctx->command_queue, >1, >> (ID3D12CommandList **)&ctx->command_list); >> + >> + d3d12va_wait_for_gpu(avctx); >> + >> + d3d12va_discard_command_allocator(avctx, command_allocator, ctx- >> >sync_ctx->fence_value); >> + >> + return 0; >> + >> +fail: >> + D3D12_OBJECT_RELEASE(command_allocator); >> + ff_d3d12va_decode_uninit(avctx); >> + >> + return AVERROR(EINVAL); >> +} >> + >> +int ff_d3d12va_decode_uninit(AVCodecContext *avctx) >> +{ >> + int i, num_allocator = 0; >> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); >> + CommandAllocator allocator; >> + >> + if (ctx->sync_ctx) >> + d3d12va_wait_for_gpu(avctx); >> + >> + av_freep(&ctx->ref_resources); >> + >> + av_freep(&ctx->ref_subresources); >> + >> + for (i = 0; i < ctx->max_num_ref; i++) >> + D3D12_OBJECT_RELEASE(ctx->buffers[i]); >> + >> + av_freep(&ctx->buffers); >> + >> + D3D12_OBJECT_RELEASE(ctx->command_list); >> + >> + D3D12_OBJECT_RELEASE(ctx->command_queue); >> + >> + if (ctx->allocator_queue) { >> + while (av_fifo_read(ctx->allocator_queue, &allocator, 1) >= 0) { >> + num_allocator++; >> + D3D12_OBJECT_RELEASE(allocator.command_allocator); >> + } >> + >> + av_log(avctx, AV_LOG_VERBOSE, "Total number of command >allocators >> reused: %d\n", num_allocator); >> + } >> + >> + av_fifo_freep2(&ctx->allocator_queue); >> + >> + av_d3d12va_sync_context_free(&ctx->sync_ctx); >> + >> + D3D12_OBJECT_RELEASE(ctx->decoder_heap); >> + >> + av_buffer_unref(&ctx->decoder_ref); >> + >> + return 0; >> +} >> + >> +static ID3D12Resource *get_surface(const AVFrame *frame) >> +{ >> + return (ID3D12Resource *)frame->data[0]; >> +} >> + >> +intptr_t ff_d3d12va_get_surface_index(AVCodecContext *ctx, const >AVFrame* >> frame) >> +{ >> + return (intptr_t)frame->data[1]; >> +} >> + >> +static AVD3D12VASyncContext *d3d12va_get_sync_context(const AVFrame >*frame) >> +{ >> + return (AVD3D12VASyncContext *)frame->data[2]; >> +} >> + >> +static int d3d12va_begin_update_reference_frames(AVCodecContext >*avctx, >> D3D12_RESOURCE_BARRIER *barriers, int index) >> +{ >> + D3D12VADecodeContext *ctx = >D3D12VA_DECODE_CONTEXT(avctx); >> + AVHWFramesContext *frames_ctx = >D3D12VA_FRAMES_CONTEXT(avctx); >> + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; >> + >> + int num_barrier = 0; >> + >> + for (int i = 0; i < ctx->max_num_ref; i++) { >> + if (ctx->ref_resources[i] && ctx->ref_resources[i] != frames_hwctx- >> >texture_infos[index].texture) { >> + barriers[num_barrier].Type = >> D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; >> + barriers[num_barrier].Flags = >D3D12_RESOURCE_BARRIER_FLAG_NONE; >> + barriers[num_barrier].Transition = >> (D3D12_RESOURCE_TRANSITION_BARRIER){ >> + .pResource = ctx->ref_resources[i], >> + .Subresource = 0, >> + .StateBefore = D3D12_RESOURCE_STATE_COMMON, >> + .StateAfter = D3D12_RESOURCE_STATE_VIDEO_DECODE_READ, >> + }; >> + num_barrier++; >> + } >> + } >> + >> + return num_barrier; >> +} >> + >> +static void d3d12va_end_update_reference_frames(AVCodecContext >*avctx, >> D3D12_RESOURCE_BARRIER *barriers, int index) >> +{ >> + D3D12VADecodeContext *ctx = >D3D12VA_DECODE_CONTEXT(avctx); >> + AVHWFramesContext *frames_ctx = >D3D12VA_FRAMES_CONTEXT(avctx); >> + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; >> + int num_barrier = 0; >> + >> + for (int i = 0; i < ctx->max_num_ref; i++) { >> + if (ctx->ref_resources[i] && ctx->ref_resources[i] != frames_hwctx- >> >texture_infos[index].texture) { >> + barriers[num_barrier].Transition.pResource = ctx- >> >ref_resources[i]; >> + barriers[num_barrier].Flags = >D3D12_RESOURCE_BARRIER_FLAG_NONE; >> + barriers[num_barrier].Transition.StateBefore = >> D3D12_RESOURCE_STATE_VIDEO_DECODE_READ; >> + barriers[num_barrier].Transition.StateAfter = >> D3D12_RESOURCE_STATE_COMMON; >> + num_barrier++; >> + } >> + } >> +} >> + >> +int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame >*frame, >> + const void *pp, unsigned pp_size, >> + const void *qm, unsigned qm_size, >> + int(*update_input_arguments)(AVCodecContext *, >> D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *, ID3D12Resource >*)) >> +{ >> + int ret; >> + D3D12VADecodeContext *ctx = >> D3D12VA_DECODE_CONTEXT(avctx); >> + AVHWFramesContext *frames_ctx = >> D3D12VA_FRAMES_CONTEXT(avctx); >> + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; >> + ID3D12CommandAllocator *command_allocator = NULL; >> + >> + ID3D12Resource *resource = get_surface(frame); >> + UINT index = ff_d3d12va_get_surface_index(avctx, frame); >> + AVD3D12VASyncContext *sync_ctx = d3d12va_get_sync_context(frame); >> + >> + ID3D12VideoDecodeCommandList *cmd_list = ctx->command_list; >> + D3D12_RESOURCE_BARRIER barriers[D3D12VA_MAX_SURFACES] = { 0 }; >> + >> + D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS input_args = { >> + .NumFrameArguments = 2, >> + .FrameArguments = { >> + [0] = { >> + .Type = >D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS, >> + .Size = pp_size, >> + .pData = (void *)pp, >> + }, >> + [1] = { >> + .Type = >> >D3D12_VIDEO_DECODE_ARGUMENT_TYPE_INVERSE_QUANTIZATION_MATRI >X, >> + .Size = qm_size, >> + .pData = (void *)qm, >> + }, >> + }, >> + .pHeap = ctx->decoder_heap, >> + }; >> + >> + D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS output_args = { >> + .ConversionArguments = 0, >> + .OutputSubresource = 0, >> + .pOutputTexture2D = resource, >> + }; >> + >> + UINT num_barrier = 1; >> + barriers[0] = (D3D12_RESOURCE_BARRIER) { >> + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, >> + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, >> + .Transition = { >> + .pResource = resource, >> + .Subresource = 0, >> + .StateBefore = D3D12_RESOURCE_STATE_COMMON, >> + .StateAfter = D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, >> + }, >> + }; >> + >> + memset(ctx->ref_resources, 0, sizeof(ID3D12Resource *) * ctx- >> >max_num_ref); >> + memset(ctx->ref_subresources, 0, sizeof(UINT) * ctx->max_num_ref); >> + input_args.ReferenceFrames.NumTexture2Ds = ctx->max_num_ref; >> + input_args.ReferenceFrames.ppTexture2Ds = ctx->ref_resources; >> + input_args.ReferenceFrames.pSubresources = ctx->ref_subresources; >> + >> + av_d3d12va_wait_idle(sync_ctx); >> + >> + if (!qm) >> + input_args.NumFrameArguments = 1; >> + >> + ret = update_input_arguments(avctx, &input_args, ctx->buffers[index]); >> + if (ret < 0) >> + return ret; >> + >> + ret = d3d12va_get_valid_command_allocator(avctx, >&command_allocator); >> + if (ret < 0) >> + goto fail; >> + >> + DX_CHECK(ID3D12CommandAllocator_Reset(command_allocator)); >> + >> + DX_CHECK(ID3D12VideoDecodeCommandList_Reset(cmd_list, >> command_allocator)); >> + >> + num_barrier += d3d12va_begin_update_reference_frames(avctx, >&barriers[1], >> index); >> + >> + ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, >num_barrier, >> barriers); >> + >> + ID3D12VideoDecodeCommandList_DecodeFrame(cmd_list, ctx->decoder, >> &output_args, &input_args); >> + >> + barriers[0].Transition.StateBefore = barriers[0].Transition.StateAfter; >> + barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; >> + d3d12va_end_update_reference_frames(avctx, &barriers[1], index); >> + >> + ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, >num_barrier, >> barriers); >> + >> + DX_CHECK(ID3D12VideoDecodeCommandList_Close(cmd_list)); >> + >> + ID3D12CommandQueue_ExecuteCommandLists(ctx->command_queue, >1, >> (ID3D12CommandList **)&ctx->command_list); >> + >> + DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, >sync_ctx->fence, >> ++sync_ctx->fence_value)); >> + >> + DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, ctx- >>sync_ctx- >> >fence, ++ctx->sync_ctx->fence_value)); >> + >> + ret = d3d12va_discard_command_allocator(avctx, command_allocator, >ctx- >> >sync_ctx->fence_value); >> + if (ret < 0) >> + return ret; >> + >> + if (ctx->device_ctx->sync) { >> + ret = av_d3d12va_wait_idle(ctx->sync_ctx); >> + if (ret < 0) >> + return ret; >> + } >> + >> + return 0; >> + >> +fail: >> + if (command_allocator) >> + d3d12va_discard_command_allocator(avctx, command_allocator, ctx- >> >sync_ctx->fence_value); >> + return AVERROR(EINVAL); >> +} >> diff --git a/libavcodec/d3d12va.h b/libavcodec/d3d12va.h >> new file mode 100644 >> index 0000000000..da3e7b7ab9 >> --- /dev/null >> +++ b/libavcodec/d3d12va.h >> @@ -0,0 +1,184 @@ >> +/* >> + * Direct3D 12 HW acceleration video decoder >> + * >> + * copyright (c) 2022-2023 Wu Jianhua <toqsxw@outlook.com> >> + * >> + * This file is part of FFmpeg. >> + * >> + * FFmpeg is free software; you can redistribute it and/or >> + * modify it under the terms of the GNU Lesser General Public >> + * License as published by the Free Software Foundation; either >> + * version 2.1 of the License, or (at your option) any later version. >> + * >> + * FFmpeg is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >GNU >> + * Lesser General Public License for more details. >> + * >> + * You should have received a copy of the GNU Lesser General Public >> + * License along with FFmpeg; if not, write to the Free Software >> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 >> USA >> + */ >> + >> +#ifndef AVCODEC_D3D12VA_H >> +#define AVCODEC_D3D12VA_H >> + >> +#include "libavutil/fifo.h" >> +#include "libavutil/hwcontext.h" >> +#include "libavutil/hwcontext_d3d12va.h" >> +#include "avcodec.h" >> +#include "internal.h" >> + >> +/** >> + * @brief This structure is used to provides the necessary configurations >and >> data >> + * to the FFmpeg Direct3D 12 HWAccel implementation for video decoder. >> + * >> + * The application must make it available as >AVCodecContext.hwaccel_context. >> + */ > >Does d3d12va support AVCodecContext.hwaccel_context ? If yes, you should >make >this file public, modify the structures and remove all ff_ functions, otherwise >user can't use these structures in an application. If no, please remove the >above comment and better to rename this file to d3d12va_decode.h because >the >structures and functions in this file are for d3d12va decoders. > >Thanks >Haihao Makes sense. Will remove the comment and rename the files in V4. Thanks. > >> +typedef struct D3D12VADecodeContext { >> + AVBufferRef *decoder_ref; >> + >> + /** >> + * D3D12 video decoder >> + */ >> + ID3D12VideoDecoder *decoder; >> + >> + /** >> + * D3D12 video decoder heap >> + */ >> + ID3D12VideoDecoderHeap *decoder_heap; >> + >> + /** >> + * D3D12 configuration used to create the decoder >> + * >> + * Specified by decoders >> + */ >> + D3D12_VIDEO_DECODE_CONFIGURATION cfg; >> + >> + /** >> + * A cached queue for reusing the D3D12 command allocators >> + * >> + * @see >> https://learn.microsoft.com/en-us/windows/win32/direct3d12/recording- >command-lists-and-bundles#id3d12commandallocator >> + */ >> + AVFifo *allocator_queue; >> + >> + /** >> + * D3D12 command queue >> + */ >> + ID3D12CommandQueue *command_queue; >> + >> + /** >> + * D3D12 video decode command list >> + */ >> + ID3D12VideoDecodeCommandList *command_list; >> + >> + /** >> + * The array of buffer resources used to upload compressed bitstream >> + * >> + * The buffers.length is the same as >D3D12VADecodeContext.max_num_ref >> + */ >> + ID3D12Resource **buffers; >> + >> + /** >> + * The array of resources used for reference frames >> + * >> + * The ref_resources.length is the same as >> D3D12VADecodeContext.max_num_ref >> + */ >> + ID3D12Resource **ref_resources; >> + >> + /** >> + * The array of subresources used for reference frames >> + * >> + * The ref_subresources.length is the same as >> D3D12VADecodeContext.max_num_ref >> + */ >> + UINT *ref_subresources; >> + >> + /** >> + * Maximum number of reference frames >> + */ >> + UINT max_num_ref; >> + >> + /** >> + * The sync context used to sync command queue >> + */ >> + AVD3D12VASyncContext *sync_ctx; >> + >> + /** >> + * A pointer to AVD3D12VADeviceContext used to create D3D12 objects >> + */ >> + AVD3D12VADeviceContext *device_ctx; >> + >> + /** >> + * Pixel format >> + */ >> + enum AVPixelFormat pix_fmt; >> + >> + /** >> + * Private to the FFmpeg AVHWAccel implementation >> + */ >> + unsigned report_id; >> +} D3D12VADecodeContext; >> + >> +/** >> + * @} >> + */ >> + >> +#define D3D12VA_DECODE_CONTEXT(avctx) ((D3D12VADecodeContext >*)((avctx)- >> >internal->hwaccel_priv_data)) >> +#define D3D12VA_FRAMES_CONTEXT(avctx) ((AVHWFramesContext >*)(avctx)- >> >hw_frames_ctx->data) >> + >> +/** >> + * @brief Get a suitable maximum bitstream size >> + * >> + * Creating and destroying a resource on d3d12 needs sync and reallocation, >> so use this function >> + * to help allocate a big enough bitstream buffer to avoid recreating >> resources when decoding. >> + * >> + * @return the suitable size >> + */ >> +int ff_d3d12va_get_suitable_max_bitstream_size(AVCodecContext *avctx); >> + >> +/** >> + * @brief init D3D12VADecodeContext >> + * >> + * @return Error code (ret < 0 if failed) >> + */ >> +int ff_d3d12va_decode_init(AVCodecContext *avctx); >> + >> +/** >> + * @brief uninit D3D12VADecodeContext >> + * >> + * @return Error code (ret < 0 if failed) >> + */ >> +int ff_d3d12va_decode_uninit(AVCodecContext *avctx); >> + >> +/** >> + * @brief d3d12va common frame params >> + * >> + * @return Error code (ret < 0 if failed) >> + */ >> +int ff_d3d12va_common_frame_params(AVCodecContext *avctx, >AVBufferRef >> *hw_frames_ctx); >> + >> +/** >> + * @brief d3d12va common end frame >> + * >> + * @param avctx codec context >> + * @param frame current output frame >> + * @param pp picture parameters >> + * @param pp_size the size of the picture parameters >> + * @param qm quantization matrix >> + * @param qm_size the size of the quantization matrix >> + * @param callback update decoder-specified input stream arguments >> + * @return Error code (ret < 0 if failed) >> + */ >> +int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame >*frame, >> + const void *pp, unsigned pp_size, >> + const void *qm, unsigned qm_size, >> + int(*)(AVCodecContext *, >D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *, >> ID3D12Resource *)); >> + >> +/** >> + * @brief get surface index >> + * >> + * @return index >> + */ >> +intptr_t ff_d3d12va_get_surface_index(AVCodecContext *avctx, const >AVFrame >> *frame); >> + >> +#endif /* AVCODEC_D3D12VA_DEC_H */ >> diff --git a/libavcodec/d3d12va_h264.c b/libavcodec/d3d12va_h264.c >> new file mode 100644 >> index 0000000000..0810a034b4 >> --- /dev/null >> +++ b/libavcodec/d3d12va_h264.c >> @@ -0,0 +1,210 @@ >> +/* >> + * Direct3D 12 h264 HW acceleration >> + * >> + * copyright (c) 2022-2023 Wu Jianhua <toqsxw@outlook.com> >> + * >> + * This file is part of FFmpeg. >> + * >> + * FFmpeg is free software; you can redistribute it and/or >> + * modify it under the terms of the GNU Lesser General Public >> + * License as published by the Free Software Foundation; either >> + * version 2.1 of the License, or (at your option) any later version. >> + * >> + * FFmpeg is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >GNU >> + * Lesser General Public License for more details. >> + * >> + * You should have received a copy of the GNU Lesser General Public >> + * License along with FFmpeg; if not, write to the Free Software >> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 >> USA >> + */ >> + >> +#include "config_components.h" >> +#include "libavutil/avassert.h" >> +#include "h264dec.h" >> +#include "h264data.h" >> +#include "h264_ps.h" >> +#include "mpegutils.h" >> +#include "dxva2_internal.h" >> +#include "d3d12va.h" >> +#include "libavutil/hwcontext_d3d12va_internal.h" >> +#include <dxva.h> >> + >> +typedef struct H264DecodePictureContext { >> + DXVA_PicParams_H264 pp; >> + DXVA_Qmatrix_H264 qm; >> + unsigned slice_count; >> + DXVA_Slice_H264_Short slice_short[MAX_SLICES]; >> + const uint8_t *bitstream; >> + unsigned bitstream_size; >> +} H264DecodePictureContext; >> + >> +static void fill_slice_short(DXVA_Slice_H264_Short *slice, >> + unsigned position, unsigned size) >> +{ >> + memset(slice, 0, sizeof(*slice)); >> + slice->BSNALunitDataLocation = position; >> + slice->SliceBytesInBuffer = size; >> + slice->wBadSliceChopping = 0; >> +} >> + >> +static int d3d12va_h264_start_frame(AVCodecContext *avctx, >> + av_unused const uint8_t *buffer, >> + av_unused uint32_t size) >> +{ >> + const H264Context *h = avctx->priv_data; >> + H264DecodePictureContext *ctx_pic = h->cur_pic_ptr- >> >hwaccel_picture_private; >> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); >> + >> + if (!ctx) >> + return -1; >> + >> + assert(ctx_pic); >> + >> + ff_dxva2_h264_fill_picture_parameters(avctx, (AVDXVAContext *)ctx, >> &ctx_pic->pp); >> + >> + ff_dxva2_h264_fill_scaling_lists(avctx, (AVDXVAContext *)ctx, &ctx_pic- >> >qm); >> + >> + ctx_pic->slice_count = 0; >> + ctx_pic->bitstream_size = 0; >> + ctx_pic->bitstream = NULL; >> + >> + return 0; >> +} >> + >> +static int d3d12va_h264_decode_slice(AVCodecContext *avctx, const >uint8_t >> *buffer, uint32_t size) >> +{ >> + unsigned position; >> + const H264Context *h = avctx->priv_data; >> + const H264SliceContext *sl = &h->slice_ctx[0]; >> + const H264Picture *current_picture = h->cur_pic_ptr; >> + H264DecodePictureContext *ctx_pic = current_picture- >> >hwaccel_picture_private; >> + >> + if (ctx_pic->slice_count >= MAX_SLICES) >> + return AVERROR(ERANGE); >> + >> + if (!ctx_pic->bitstream) >> + ctx_pic->bitstream = buffer; >> + ctx_pic->bitstream_size += size; >> + >> + position = buffer - ctx_pic->bitstream; >> + fill_slice_short(&ctx_pic->slice_short[ctx_pic->slice_count], position, >> size); >> + ctx_pic->slice_count++; >> + >> + if (sl->slice_type != AV_PICTURE_TYPE_I && sl->slice_type != >> AV_PICTURE_TYPE_SI) >> + ctx_pic->pp.wBitFields &= ~(1 << 15); /* Set IntraPicFlag to 0 */ >> + >> + return 0; >> +} >> + >> +#define START_CODE 65536 >> +#define START_CODE_SIZE 3 >> +static int update_input_arguments(AVCodecContext *avctx, >> D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *input_args, >ID3D12Resource *buffer) >> +{ >> + D3D12VADecodeContext *ctx = >D3D12VA_DECODE_CONTEXT(avctx); >> + AVHWFramesContext *frames_ctx = >D3D12VA_FRAMES_CONTEXT(avctx); >> + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; >> + >> + const H264Context *h = avctx->priv_data; >> + const H264Picture *current_picture = h->cur_pic_ptr; >> + H264DecodePictureContext *ctx_pic = current_picture- >> >hwaccel_picture_private; >> + >> + int i, index; >> + uint8_t *mapped_data, *mapped_ptr; >> + DXVA_Slice_H264_Short *slice; >> + D3D12_VIDEO_DECODE_FRAME_ARGUMENT *args; >> + >> + if (FAILED(ID3D12Resource_Map(buffer, 0, NULL, &mapped_data))) { >> + av_log(avctx, AV_LOG_ERROR, "Failed to map D3D12 Buffer >> resource!\n"); >> + return AVERROR(EINVAL); >> + } >> + >> + mapped_ptr = mapped_data; >> + for (i = 0; i < ctx_pic->slice_count; i++) { >> + UINT position, size; >> + slice = &ctx_pic->slice_short[i]; >> + >> + position = slice->BSNALunitDataLocation; >> + size = slice->SliceBytesInBuffer; >> + >> + slice->SliceBytesInBuffer += START_CODE_SIZE; >> + slice->BSNALunitDataLocation = mapped_ptr - mapped_data; >> + >> + *(uint32_t *)mapped_ptr = START_CODE; >> + mapped_ptr += START_CODE_SIZE; >> + >> + memcpy(mapped_ptr, &ctx_pic->bitstream[position], size); >> + mapped_ptr += size; >> + } >> + >> + ID3D12Resource_Unmap(buffer, 0, NULL); >> + >> + input_args->CompressedBitstream = >> (D3D12_VIDEO_DECODE_COMPRESSED_BITSTREAM){ >> + .pBuffer = buffer, >> + .Offset = 0, >> + .Size = mapped_ptr - mapped_data, >> + }; >> + >> + args = &input_args->FrameArguments[input_args- >>NumFrameArguments++]; >> + args->Type = >D3D12_VIDEO_DECODE_ARGUMENT_TYPE_SLICE_CONTROL; >> + args->Size = sizeof(DXVA_Slice_H264_Short) * ctx_pic->slice_count; >> + args->pData = ctx_pic->slice_short; >> + >> + index = ctx_pic->pp.CurrPic.Index7Bits; >> + ctx->ref_resources[index] = frames_hwctx->texture_infos[index].texture; >> + for (i = 0; i < FF_ARRAY_ELEMS(ctx_pic->pp.RefFrameList); i++) { >> + index = ctx_pic->pp.RefFrameList[i].Index7Bits; >> + if (index != 0x7f) >> + ctx->ref_resources[index] = frames_hwctx- >> >texture_infos[index].texture; >> + } >> + >> + return 0; >> +} >> + >> +static int d3d12va_h264_end_frame(AVCodecContext *avctx) >> +{ >> + H264Context *h = avctx->priv_data; >> + H264DecodePictureContext *ctx_pic = h->cur_pic_ptr- >> >hwaccel_picture_private; >> + H264SliceContext *sl = &h->slice_ctx[0]; >> + >> + int ret; >> + >> + if (ctx_pic->slice_count <= 0 || ctx_pic->bitstream_size <= 0) >> + return -1; >> + >> + ret = ff_d3d12va_common_end_frame(avctx, h->cur_pic_ptr->f, >> + &ctx_pic->pp, sizeof(ctx_pic->pp), >> + &ctx_pic->qm, sizeof(ctx_pic->qm), >> + update_input_arguments); >> + if (!ret) >> + ff_h264_draw_horiz_band(h, sl, 0, h->avctx->height); >> + >> + return ret; >> +} >> + >> +static int d3d12va_h264_decode_init(AVCodecContext *avctx) >> +{ >> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); >> + >> + ctx->cfg.DecodeProfile = D3D12_VIDEO_DECODE_PROFILE_H264; >> + >> + return ff_d3d12va_decode_init(avctx); >> +} >> + >> +#if CONFIG_H264_D3D12VA_HWACCEL >> +const AVHWAccel ff_h264_d3d12va_hwaccel = { >> + .name = "h264_d3d12va", >> + .type = AVMEDIA_TYPE_VIDEO, >> + .id = AV_CODEC_ID_H264, >> + .pix_fmt = AV_PIX_FMT_D3D12, >> + .init = d3d12va_h264_decode_init, >> + .uninit = ff_d3d12va_decode_uninit, >> + .start_frame = d3d12va_h264_start_frame, >> + .decode_slice = d3d12va_h264_decode_slice, >> + .end_frame = d3d12va_h264_end_frame, >> + .frame_params = ff_d3d12va_common_frame_params, >> + .frame_priv_data_size = sizeof(H264DecodePictureContext), >> + .priv_data_size = sizeof(D3D12VADecodeContext), >> +}; >> +#endif >> diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c >> index 568d686f39..b22ea3e8f2 100644 >> --- a/libavcodec/dxva2.c >> +++ b/libavcodec/dxva2.c >> @@ -774,6 +774,10 @@ unsigned ff_dxva2_get_surface_index(const >AVCodecContext >> *avctx, >> void *surface = get_surface(avctx, frame); >> unsigned i; >> >> +#if CONFIG_D3D12VA >> + if (avctx->pix_fmt == AV_PIX_FMT_D3D12) >> + return (intptr_t)frame->data[1]; >> +#endif >> #if CONFIG_D3D11VA >> if (avctx->pix_fmt == AV_PIX_FMT_D3D11) >> return (intptr_t)frame->data[1]; >> @@ -1056,3 +1060,23 @@ int ff_dxva2_is_d3d11(const AVCodecContext >*avctx) >> else >> return 0; >> } >> + >> +unsigned *ff_dxva2_get_report_id(const AVCodecContext *avctx, >AVDXVAContext >> *ctx) >> +{ >> + unsigned *report_id = NULL; >> + >> +#if CONFIG_D3D12VA >> + if (avctx->pix_fmt == AV_PIX_FMT_D3D12) >> + report_id = &ctx->d3d12va.report_id; >> +#endif >> +#if CONFIG_D3D11VA >> + if (ff_dxva2_is_d3d11(avctx)) >> + report_id = &ctx->d3d11va.report_id; >> +#endif >> +#if CONFIG_DXVA2 >> + if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) >> + report_id = &ctx->dxva2.report_id; >> +#endif >> + >> + return report_id; >> +} >> diff --git a/libavcodec/dxva2.h b/libavcodec/dxva2.h >> index 22c93992f2..bdec6112e9 100644 >> --- a/libavcodec/dxva2.h >> +++ b/libavcodec/dxva2.h >> @@ -45,9 +45,6 @@ >> * @{ >> */ >> >> -#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work >around for DXVA2 >> and old UVD/UVD+ ATI video cards >> -#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 ///< Work >around for DXVA2 >> and old Intel GPUs with ClearVideo interface >> - >> /** >> * This structure is used to provides the necessary configurations and data >> * to the DXVA2 FFmpeg HWAccel implementation. >> diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c >> index 6300b1418d..7a076ea981 100644 >> --- a/libavcodec/dxva2_h264.c >> +++ b/libavcodec/dxva2_h264.c >> @@ -47,9 +47,10 @@ static void fill_picture_entry(DXVA_PicEntry_H264 >*pic, >> pic->bPicEntry = index | (flag << 7); >> } >> >> -static void fill_picture_parameters(const AVCodecContext *avctx, >> AVDXVAContext *ctx, const H264Context *h, >> +void ff_dxva2_h264_fill_picture_parameters(const AVCodecContext *avctx, >> AVDXVAContext *ctx, >> DXVA_PicParams_H264 *pp) >> { >> + const H264Context *h = avctx->priv_data; >> const H264Picture *current_picture = h->cur_pic_ptr; >> const SPS *sps = h->ps.sps; >> const PPS *pps = h->ps.pps; >> @@ -163,9 +164,10 @@ static void fill_picture_parameters(const >AVCodecContext >> *avctx, AVDXVAContext * >> //pp->SliceGroupMap[810]; /* XXX not implemented by FFmpeg >> */ >> } >> >> -static void fill_scaling_lists(const AVCodecContext *avctx, AVDXVAContext >> *ctx, const H264Context *h, DXVA_Qmatrix_H264 *qm) >> +void ff_dxva2_h264_fill_scaling_lists(const AVCodecContext *avctx, >> AVDXVAContext *ctx, DXVA_Qmatrix_H264 *qm) >> { >> - const PPS *pps = h->ps.pps; >> + const H264Context *h = avctx->priv_data; >> + const PPS *pps = h->ps.pps; >> unsigned i, j; >> memset(qm, 0, sizeof(*qm)); >> if (DXVA_CONTEXT_WORKAROUND(avctx, ctx) & >> FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG) { >> @@ -453,10 +455,10 @@ static int >dxva2_h264_start_frame(AVCodecContext *avctx, >> assert(ctx_pic); >> >> /* Fill up DXVA_PicParams_H264 */ >> - fill_picture_parameters(avctx, ctx, h, &ctx_pic->pp); >> + ff_dxva2_h264_fill_picture_parameters(avctx, ctx, &ctx_pic->pp); >> >> /* Fill up DXVA_Qmatrix_H264 */ >> - fill_scaling_lists(avctx, ctx, h, &ctx_pic->qm); >> + ff_dxva2_h264_fill_scaling_lists(avctx, ctx, &ctx_pic->qm); >> >> ctx_pic->slice_count = 0; >> ctx_pic->bitstream_size = 0; >> diff --git a/libavcodec/dxva2_internal.h b/libavcodec/dxva2_internal.h >> index b822af59cd..a9a1fc090e 100644 >> --- a/libavcodec/dxva2_internal.h >> +++ b/libavcodec/dxva2_internal.h >> @@ -26,18 +26,34 @@ >> #define COBJMACROS >> >> #include "config.h" >> +#include "config_components.h" >> >> /* define the proper COM entries before forcing desktop APIs */ >> #include <objbase.h> >> >> +#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work >around for >> DXVA2/Direct3D11 and old UVD/UVD+ ATI video cards >> +#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 ///< Work >around for >> DXVA2/Direct3D11 and old Intel GPUs with ClearVideo interface >> + >> #if CONFIG_DXVA2 >> #include "dxva2.h" >> #include "libavutil/hwcontext_dxva2.h" >> +#define DXVA2_VAR(ctx, var) ctx->dxva2.var >> +#else >> +#define DXVA2_VAR(ctx, var) 0 >> #endif >> + >> #if CONFIG_D3D11VA >> #include "d3d11va.h" >> #include "libavutil/hwcontext_d3d11va.h" >> +#define D3D11VA_VAR(ctx, var) ctx->d3d11va.var >> +#else >> +#define D3D11VA_VAR(ctx, var) 0 >> +#endif >> + >> +#if CONFIG_D3D12VA >> +#include "d3d12va.h" >> #endif >> + >> #if HAVE_DXVA_H >> /* When targeting WINAPI_FAMILY_PHONE_APP or WINAPI_FAMILY_APP, >dxva.h >> * defines nothing. Force the struct definitions to be visible. */ >> @@ -62,6 +78,9 @@ typedef union { >> #if CONFIG_DXVA2 >> struct dxva_context dxva2; >> #endif >> +#if CONFIG_D3D12VA >> + struct D3D12VADecodeContext d3d12va; >> +#endif >> } AVDXVAContext; >> >> typedef struct FFDXVASharedContext { >> @@ -101,39 +120,19 @@ typedef struct FFDXVASharedContext { >> #define D3D11VA_CONTEXT(ctx) (&ctx->d3d11va) >> #define DXVA2_CONTEXT(ctx) (&ctx->dxva2) >> >> -#if CONFIG_D3D11VA && CONFIG_DXVA2 >> -#define DXVA_CONTEXT_WORKAROUND(avctx, >ctx) (ff_dxva2_is_d3d11(avctx) ? >> ctx->d3d11va.workaround : ctx->dxva2.workaround) >> -#define DXVA_CONTEXT_COUNT(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? >> ctx->d3d11va.surface_count : ctx->dxva2.surface_count) >> -#define DXVA_CONTEXT_DECODER(avctx, >ctx) (ff_dxva2_is_d3d11(avctx) ? >> (void *)ctx->d3d11va.decoder : (void *)ctx->dxva2.decoder) >> -#define DXVA_CONTEXT_REPORT_ID(avctx, >ctx) (*(ff_dxva2_is_d3d11(avctx) ? >> &ctx->d3d11va.report_id : &ctx->dxva2.report_id)) >> -#define DXVA_CONTEXT_CFG(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? >> (void *)ctx->d3d11va.cfg : (void *)ctx->dxva2.cfg) >> -#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, >ctx) (ff_dxva2_is_d3d11(avctx) ? >> ctx->d3d11va.cfg->ConfigBitstreamRaw : ctx->dxva2.cfg- >>ConfigBitstreamRaw) >> -#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) >(ff_dxva2_is_d3d11(avctx) ? >> ctx->d3d11va.cfg->ConfigIntraResidUnsigned : ctx->dxva2.cfg- >> >ConfigIntraResidUnsigned) >> -#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) >(ff_dxva2_is_d3d11(avctx) ? >> ctx->d3d11va.cfg->ConfigResidDiffAccelerator : ctx->dxva2.cfg- >> >ConfigResidDiffAccelerator) >> +#define DXVA2_CONTEXT_VAR(avctx, ctx, var) (avctx->pix_fmt == >> AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? D3D11VA_VAR(ctx, >var) : >> DXVA2_VAR(ctx, var))) >> + >> +#define DXVA_CONTEXT_REPORT_ID(avctx, ctx) >> (*ff_dxva2_get_report_id(avctx, ctx)) >> +#define DXVA_CONTEXT_WORKAROUND(avctx, >ctx) DXVA2_CONTEXT_VAR(avctx, ctx, >> workaround) >> +#define DXVA_CONTEXT_COUNT(avctx, >ctx) DXVA2_CONTEXT_VAR(avctx, ctx, >> surface_count) >> +#define DXVA_CONTEXT_DECODER(avctx, ctx) (avctx->pix_fmt == >> AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? (void >*)D3D11VA_VAR(ctx, >> decoder) : (void *)DXVA2_VAR(ctx, decoder))) >> +#define DXVA_CONTEXT_CFG(avctx, ctx) (avctx->pix_fmt == >> AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? (void >*)D3D11VA_VAR(ctx, >> cfg) : (void *)DXVA2_VAR(ctx, cfg))) >> +#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, >ctx) DXVA2_CONTEXT_VAR(avctx, ctx, >> cfg->ConfigBitstreamRaw) >> +#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) >DXVA2_CONTEXT_VAR(avctx, ctx, >> cfg->ConfigIntraResidUnsigned) >> +#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) >DXVA2_CONTEXT_VAR(avctx, ctx, >> cfg->ConfigResidDiffAccelerator) >> #define DXVA_CONTEXT_VALID(avctx, >ctx) (DXVA_CONTEXT_DECODER(avctx, >> ctx) && \ >> DXVA_CONTEXT_CFG(avctx, >> ctx) && \ >> - (ff_dxva2_is_d3d11(avctx) || >> ctx->dxva2.surface_count)) >> -#elif CONFIG_DXVA2 >> -#define DXVA_CONTEXT_WORKAROUND(avctx, ctx) (ctx- >>dxva2.workaround) >> -#define DXVA_CONTEXT_COUNT(avctx, ctx) (ctx->dxva2.surface_count) >> -#define DXVA_CONTEXT_DECODER(avctx, ctx) (ctx->dxva2.decoder) >> -#define DXVA_CONTEXT_REPORT_ID(avctx, ctx) (*(&ctx- >>dxva2.report_id)) >> -#define DXVA_CONTEXT_CFG(avctx, ctx) (ctx->dxva2.cfg) >> -#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx) (ctx->dxva2.cfg- >> >ConfigBitstreamRaw) >> -#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ctx->dxva2.cfg- >> >ConfigIntraResidUnsigned) >> -#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ctx->dxva2.cfg- >> >ConfigResidDiffAccelerator) >> -#define DXVA_CONTEXT_VALID(avctx, ctx) (ctx->dxva2.decoder && >ctx- >> >dxva2.cfg && ctx->dxva2.surface_count) >> -#elif CONFIG_D3D11VA >> -#define DXVA_CONTEXT_WORKAROUND(avctx, ctx) (ctx- >>d3d11va.workaround) >> -#define DXVA_CONTEXT_COUNT(avctx, ctx) (ctx- >>d3d11va.surface_count) >> -#define DXVA_CONTEXT_DECODER(avctx, ctx) (ctx->d3d11va.decoder) >> -#define DXVA_CONTEXT_REPORT_ID(avctx, ctx) (*(&ctx- >>d3d11va.report_id)) >> -#define DXVA_CONTEXT_CFG(avctx, ctx) (ctx->d3d11va.cfg) >> -#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx) (ctx->d3d11va.cfg- >> >ConfigBitstreamRaw) >> -#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ctx->d3d11va.cfg- >> >ConfigIntraResidUnsigned) >> -#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ctx->d3d11va.cfg- >> >ConfigResidDiffAccelerator) >> -#define DXVA_CONTEXT_VALID(avctx, ctx) (ctx->d3d11va.decoder && >ctx- >> >d3d11va.cfg) >> -#endif >> + (ff_dxva2_is_d3d11(avctx) || >> DXVA2_VAR(ctx, surface_count))) >> >> unsigned ff_dxva2_get_surface_index(const AVCodecContext *avctx, >> const AVDXVAContext *, >> @@ -161,4 +160,10 @@ int >ff_dxva2_common_frame_params(AVCodecContext *avctx, >> >> int ff_dxva2_is_d3d11(const AVCodecContext *avctx); >> >> +unsigned *ff_dxva2_get_report_id(const AVCodecContext *avctx, >AVDXVAContext >> *ctx); >> + >> +void ff_dxva2_h264_fill_picture_parameters(const AVCodecContext *avctx, >> AVDXVAContext *ctx, DXVA_PicParams_H264 *pp); >> + >> +void ff_dxva2_h264_fill_scaling_lists(const AVCodecContext *avctx, >> AVDXVAContext *ctx, DXVA_Qmatrix_H264 *qm); >> + >> #endif /* AVCODEC_DXVA2_INTERNAL_H */ >> diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c >> index 41bf30eefc..df70ad8a2f 100644 >> --- a/libavcodec/h264_slice.c >> +++ b/libavcodec/h264_slice.c >> @@ -778,6 +778,7 @@ static enum AVPixelFormat >get_pixel_format(H264Context *h, >> int force_callback) >> { >> #define HWACCEL_MAX (CONFIG_H264_DXVA2_HWACCEL + \ >> (CONFIG_H264_D3D11VA_HWACCEL * 2) + \ >> + CONFIG_H264_D3D12VA_HWACCEL + \ >> CONFIG_H264_NVDEC_HWACCEL + \ >> CONFIG_H264_VAAPI_HWACCEL + \ >> CONFIG_H264_VIDEOTOOLBOX_HWACCEL + \ >> @@ -883,6 +884,9 @@ static enum AVPixelFormat >get_pixel_format(H264Context *h, >> int force_callback) >> *fmt++ = AV_PIX_FMT_D3D11VA_VLD; >> *fmt++ = AV_PIX_FMT_D3D11; >> #endif >> +#if CONFIG_H264_D3D12VA_HWACCEL >> + *fmt++ = AV_PIX_FMT_D3D12; >> +#endif >> #if CONFIG_H264_VAAPI_HWACCEL >> *fmt++ = AV_PIX_FMT_VAAPI; >> #endif >> diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c >> index 19f8dba131..853d3262f7 100644 >> --- a/libavcodec/h264dec.c >> +++ b/libavcodec/h264dec.c >> @@ -1089,6 +1089,9 @@ const FFCodec ff_h264_decoder = { >> #if CONFIG_H264_D3D11VA2_HWACCEL >> HWACCEL_D3D11VA2(h264), >> #endif >> +#if CONFIG_H264_D3D12VA_HWACCEL >> + HWACCEL_D3D12VA(h264), >> +#endif >> #if CONFIG_H264_NVDEC_HWACCEL >> HWACCEL_NVDEC(h264), >> #endif >> diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h >> index 48dfc17f72..be54604b81 100644 >> --- a/libavcodec/hwaccels.h >> +++ b/libavcodec/hwaccels.h >> @@ -32,6 +32,7 @@ extern const AVHWAccel ff_h263_vaapi_hwaccel; >> extern const AVHWAccel ff_h263_videotoolbox_hwaccel; >> extern const AVHWAccel ff_h264_d3d11va_hwaccel; >> extern const AVHWAccel ff_h264_d3d11va2_hwaccel; >> +extern const AVHWAccel ff_h264_d3d12va_hwaccel; >> extern const AVHWAccel ff_h264_dxva2_hwaccel; >> extern const AVHWAccel ff_h264_nvdec_hwaccel; >> extern const AVHWAccel ff_h264_vaapi_hwaccel; >> diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h >> index e8c6186151..e20118c096 100644 >> --- a/libavcodec/hwconfig.h >> +++ b/libavcodec/hwconfig.h >> @@ -82,6 +82,8 @@ void ff_hwaccel_uninit(AVCodecContext *avctx); >> HW_CONFIG_HWACCEL(1, 1, 1, VULKAN, VULKAN, ff_ ## codec ## >> _vulkan_hwaccel) >> #define HWACCEL_D3D11VA(codec) \ >> HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD, NONE, ff_ ## codec >## >> _d3d11va_hwaccel) >> +#define HWACCEL_D3D12VA(codec) \ >> + HW_CONFIG_HWACCEL(1, 1, 0, D3D12, D3D12VA, ff_ ## codec ## >> _d3d12va_hwaccel) >> >> #define HW_CONFIG_ENCODER(device, frames, ad_hoc, format, >device_type_) \ >> &(const AVCodecHWConfigInternal) { \
diff --git a/configure b/configure index b86064e36f..f5dad4653f 100755 --- a/configure +++ b/configure @@ -3033,6 +3033,8 @@ h264_d3d11va_hwaccel_deps="d3d11va" h264_d3d11va_hwaccel_select="h264_decoder" h264_d3d11va2_hwaccel_deps="d3d11va" h264_d3d11va2_hwaccel_select="h264_decoder" +h264_d3d12va_hwaccel_deps="d3d12va" +h264_d3d12va_hwaccel_select="h264_decoder" h264_dxva2_hwaccel_deps="dxva2" h264_dxva2_hwaccel_select="h264_decoder" h264_nvdec_hwaccel_deps="nvdec" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 9aacc1d477..ae143d8821 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -977,6 +977,7 @@ OBJS-$(CONFIG_ADPCM_ZORK_DECODER) += adpcm.o adpcm_data.o # hardware accelerators OBJS-$(CONFIG_D3D11VA) += dxva2.o +OBJS-$(CONFIG_D3D12VA) += dxva2.o d3d12va.o OBJS-$(CONFIG_DXVA2) += dxva2.o OBJS-$(CONFIG_NVDEC) += nvdec.o OBJS-$(CONFIG_VAAPI) += vaapi_decode.o @@ -994,6 +995,7 @@ OBJS-$(CONFIG_H263_VAAPI_HWACCEL) += vaapi_mpeg4.o OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o OBJS-$(CONFIG_H264_D3D11VA_HWACCEL) += dxva2_h264.o OBJS-$(CONFIG_H264_DXVA2_HWACCEL) += dxva2_h264.o +OBJS-$(CONFIG_H264_D3D12VA_HWACCEL) += dxva2_h264.o d3d12va_h264.o OBJS-$(CONFIG_H264_NVDEC_HWACCEL) += nvdec_h264.o OBJS-$(CONFIG_H264_QSV_HWACCEL) += qsvdec.o OBJS-$(CONFIG_H264_VAAPI_HWACCEL) += vaapi_h264.o @@ -1277,6 +1279,7 @@ SKIPHEADERS += %_tablegen.h \ SKIPHEADERS-$(CONFIG_AMF) += amfenc.h SKIPHEADERS-$(CONFIG_D3D11VA) += d3d11va.h dxva2_internal.h +SKIPHEADERS-$(CONFIG_D3D12VA) += d3d12va.h SKIPHEADERS-$(CONFIG_DXVA2) += dxva2.h dxva2_internal.h SKIPHEADERS-$(CONFIG_JNI) += ffjni.h SKIPHEADERS-$(CONFIG_LCMS2) += fflcms2.h diff --git a/libavcodec/d3d11va.h b/libavcodec/d3d11va.h index 6816b6c1e6..27f40e5519 100644 --- a/libavcodec/d3d11va.h +++ b/libavcodec/d3d11va.h @@ -45,9 +45,6 @@ * @{ */ -#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work around for Direct3D11 and old UVD/UVD+ ATI video cards -#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 ///< Work around for Direct3D11 and old Intel GPUs with ClearVideo interface - /** * This structure is used to provides the necessary configurations and data * to the Direct3D11 FFmpeg HWAccel implementation. diff --git a/libavcodec/d3d12va.c b/libavcodec/d3d12va.c new file mode 100644 index 0000000000..7f1fab7251 --- /dev/null +++ b/libavcodec/d3d12va.c @@ -0,0 +1,552 @@ +/* + * Direct3D 12 HW acceleration video decoder + * + * copyright (c) 2022-2023 Wu Jianhua <toqsxw@outlook.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <assert.h> +#include <string.h> +#include <initguid.h> + +#include "libavutil/common.h" +#include "libavutil/log.h" +#include "libavutil/time.h" +#include "libavutil/imgutils.h" +#include "libavutil/hwcontext_d3d12va_internal.h" +#include "libavutil/hwcontext_d3d12va.h" +#include "avcodec.h" +#include "decode.h" +#include "d3d12va.h" + +typedef struct CommandAllocator { + ID3D12CommandAllocator *command_allocator; + uint64_t fence_value; +} CommandAllocator; + +int ff_d3d12va_get_suitable_max_bitstream_size(AVCodecContext *avctx) +{ + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); + return av_image_get_buffer_size(frames_ctx->sw_format, avctx->coded_width, avctx->coded_height, 1); +} + +static int d3d12va_get_valid_command_allocator(AVCodecContext *avctx, ID3D12CommandAllocator **ppAllocator) +{ + HRESULT hr; + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + CommandAllocator allocator; + + if (av_fifo_peek(ctx->allocator_queue, &allocator, 1, 0) >= 0) { + uint64_t completion = ID3D12Fence_GetCompletedValue(ctx->sync_ctx->fence); + if (completion >= allocator.fence_value) { + *ppAllocator = allocator.command_allocator; + av_fifo_read(ctx->allocator_queue, &allocator, 1); + return 0; + } + } + + hr = ID3D12Device_CreateCommandAllocator(ctx->device_ctx->device, D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, + &IID_ID3D12CommandAllocator, ppAllocator); + if (FAILED(hr)) { + av_log(avctx, AV_LOG_ERROR, "Failed to create a new command allocator!\n"); + return AVERROR(EINVAL); + } + + return 0; +} + +static int d3d12va_discard_command_allocator(AVCodecContext *avctx, ID3D12CommandAllocator *pAllocator, uint64_t fence_value) +{ + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + + CommandAllocator allocator = { + .command_allocator = pAllocator, + .fence_value = fence_value + }; + + if (av_fifo_write(ctx->allocator_queue, &allocator, 1) < 0) { + D3D12_OBJECT_RELEASE(pAllocator); + return AVERROR(ENOMEM); + } + + return 0; +} + +static void bufref_free_interface(void *opaque, uint8_t *data) +{ + D3D12_OBJECT_RELEASE(opaque); +} + +static AVBufferRef *bufref_wrap_interface(IUnknown *iface) +{ + return av_buffer_create((uint8_t*)iface, 1, bufref_free_interface, iface, 0); +} + +static int d3d12va_create_buffer(AVCodecContext *avctx, UINT size, ID3D12Resource **ppResouce) +{ + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + + D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD }; + + D3D12_RESOURCE_DESC desc = { + .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, + .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, + .Width = size, + .Height = 1, + .DepthOrArraySize = 1, + .MipLevels = 1, + .Format = DXGI_FORMAT_UNKNOWN, + .SampleDesc = { .Count = 1, .Quality = 0 }, + .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + .Flags = D3D12_RESOURCE_FLAG_NONE, + }; + + HRESULT hr = ID3D12Device_CreateCommittedResource(ctx->device_ctx->device, &heap_props, D3D12_HEAP_FLAG_NONE, + &desc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, &IID_ID3D12Resource, ppResouce); + + if (FAILED(hr)) { + av_log(avctx, AV_LOG_ERROR, "Failed to create d3d12 buffer.\n"); + return AVERROR(EINVAL); + } + + return 0; +} + +static int d3d12va_wait_for_gpu(AVCodecContext *avctx) +{ + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + AVD3D12VASyncContext *sync_ctx = ctx->sync_ctx; + + return av_d3d12va_wait_queue_idle(sync_ctx, ctx->command_queue); +} + +static int d3d12va_create_decoder_heap(AVCodecContext *avctx) +{ + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); + AVD3D12VADeviceContext *hwctx = ctx->device_ctx; + + D3D12_VIDEO_DECODER_HEAP_DESC desc = { + .NodeMask = 0, + .Configuration = ctx->cfg, + .DecodeWidth = frames_ctx->width, + .DecodeHeight = frames_ctx->height, + .Format = av_d3d12va_map_sw_to_hw_format(frames_ctx->sw_format), + .FrameRate = { avctx->framerate.num, avctx->framerate.den }, + .BitRate = avctx->bit_rate, + .MaxDecodePictureBufferCount = frames_ctx->initial_pool_size, + }; + + DX_CHECK(ID3D12VideoDevice_CreateVideoDecoderHeap(hwctx->video_device, &desc, + &IID_ID3D12VideoDecoderHeap, &ctx->decoder_heap)); + + return 0; + +fail: + if (ctx->decoder) { + av_log(avctx, AV_LOG_ERROR, "D3D12 doesn't support decoding frames with an extent " + "[width(%d), height(%d)], on your device!\n", frames_ctx->width, frames_ctx->height); + } + + return AVERROR(EINVAL); +} + +static int d3d12va_create_decoder(AVCodecContext *avctx) +{ + D3D12_VIDEO_DECODER_DESC desc; + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); + AVD3D12VADeviceContext *hwctx = ctx->device_ctx; + + D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT feature = { + .NodeIndex = 0, + .Configuration = ctx->cfg, + .Width = frames_ctx->width, + .Height = frames_ctx->height, + .DecodeFormat = av_d3d12va_map_sw_to_hw_format(frames_ctx->sw_format), + .FrameRate = { avctx->framerate.num, avctx->framerate.den }, + .BitRate = avctx->bit_rate, + }; + + DX_CHECK(ID3D12VideoDevice_CheckFeatureSupport(hwctx->video_device, D3D12_FEATURE_VIDEO_DECODE_SUPPORT, &feature, sizeof(feature))); + if (!(feature.SupportFlags & D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED) || + !(feature.DecodeTier >= D3D12_VIDEO_DECODE_TIER_2)) { + av_log(avctx, AV_LOG_ERROR, "D3D12 decoder doesn't support on this device\n"); + return AVERROR(EINVAL); + } + + desc = (D3D12_VIDEO_DECODER_DESC) { + .NodeMask = 0, + .Configuration = ctx->cfg, + }; + + DX_CHECK(ID3D12VideoDevice_CreateVideoDecoder(hwctx->video_device, &desc, &IID_ID3D12VideoDecoder, &ctx->decoder)); + + ctx->decoder_ref = bufref_wrap_interface((IUnknown *)ctx->decoder); + if (!ctx->decoder_ref) + return AVERROR(ENOMEM); + + return 0; + +fail: + return AVERROR(EINVAL); +} + +static inline int d3d12va_get_num_surfaces(enum AVCodecID codec_id) +{ + int num_surfaces = 1; + switch (codec_id) { + case AV_CODEC_ID_H264: + case AV_CODEC_ID_HEVC: + num_surfaces += 16; + break; + + case AV_CODEC_ID_AV1: + num_surfaces += 12; + break; + + case AV_CODEC_ID_VP9: + num_surfaces += 8; + break; + + default: + num_surfaces += 2; + } + + return num_surfaces; +} + +int ff_d3d12va_common_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) +{ + AVHWFramesContext *frames_ctx = (AVHWFramesContext *)hw_frames_ctx->data; + AVHWDeviceContext *device_ctx = frames_ctx->device_ctx; + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; + + frames_ctx->format = AV_PIX_FMT_D3D12; + frames_ctx->sw_format = avctx->sw_pix_fmt == AV_PIX_FMT_YUV420P10 ? AV_PIX_FMT_P010 : AV_PIX_FMT_NV12; + frames_ctx->width = avctx->width; + frames_ctx->height = avctx->height; + + frames_ctx->initial_pool_size = d3d12va_get_num_surfaces(avctx->codec_id); + + return 0; +} + +int ff_d3d12va_decode_init(AVCodecContext *avctx) +{ + int ret; + UINT bitstream_size; + AVHWFramesContext *frames_ctx; + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + + ID3D12CommandAllocator *command_allocator = NULL; + D3D12_COMMAND_QUEUE_DESC queue_desc = { + .Type = D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, + .Priority = 0, + .Flags = D3D12_COMMAND_QUEUE_FLAG_NONE, + .NodeMask = 0 + }; + + ctx->pix_fmt = avctx->hwaccel->pix_fmt; + + ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_D3D12VA); + if (ret < 0) + return ret; + + frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); + ctx->device_ctx = (AVD3D12VADeviceContext *)frames_ctx->device_ctx->hwctx; + + if (frames_ctx->format != ctx->pix_fmt) { + av_log(avctx, AV_LOG_ERROR, "Invalid pixfmt for hwaccel!\n"); + goto fail; + } + + ret = d3d12va_create_decoder(avctx); + if (ret < 0) + goto fail; + + ret = d3d12va_create_decoder_heap(avctx); + if (ret < 0) + goto fail; + + ctx->max_num_ref = frames_ctx->initial_pool_size; + + bitstream_size = ff_d3d12va_get_suitable_max_bitstream_size(avctx); + ctx->buffers = av_calloc(sizeof(ID3D12Resource *), ctx->max_num_ref); + for (int i = 0; i < ctx->max_num_ref; i++) { + ret = d3d12va_create_buffer(avctx, bitstream_size, &ctx->buffers[i]); + if (ret < 0) + goto fail; + } + + ctx->ref_resources = av_calloc(sizeof(ID3D12Resource *), ctx->max_num_ref); + if (!ctx->ref_resources) + return AVERROR(ENOMEM); + + ctx->ref_subresources = av_calloc(sizeof(UINT), ctx->max_num_ref); + if (!ctx->ref_subresources) + return AVERROR(ENOMEM); + + ctx->allocator_queue = av_fifo_alloc2(ctx->max_num_ref, sizeof(CommandAllocator), AV_FIFO_FLAG_AUTO_GROW); + if (!ctx->allocator_queue) + return AVERROR(ENOMEM); + + ret = av_d3d12va_sync_context_alloc(ctx->device_ctx, &ctx->sync_ctx); + if (ret < 0) + goto fail; + + ret = d3d12va_get_valid_command_allocator(avctx, &command_allocator); + if (ret < 0) + goto fail; + + DX_CHECK(ID3D12Device_CreateCommandQueue(ctx->device_ctx->device, &queue_desc, + &IID_ID3D12CommandQueue, &ctx->command_queue)); + + DX_CHECK(ID3D12Device_CreateCommandList(ctx->device_ctx->device, 0, queue_desc.Type, + command_allocator, NULL, &IID_ID3D12CommandList, &ctx->command_list)); + + DX_CHECK(ID3D12VideoDecodeCommandList_Close(ctx->command_list)); + + ID3D12CommandQueue_ExecuteCommandLists(ctx->command_queue, 1, (ID3D12CommandList **)&ctx->command_list); + + d3d12va_wait_for_gpu(avctx); + + d3d12va_discard_command_allocator(avctx, command_allocator, ctx->sync_ctx->fence_value); + + return 0; + +fail: + D3D12_OBJECT_RELEASE(command_allocator); + ff_d3d12va_decode_uninit(avctx); + + return AVERROR(EINVAL); +} + +int ff_d3d12va_decode_uninit(AVCodecContext *avctx) +{ + int i, num_allocator = 0; + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + CommandAllocator allocator; + + if (ctx->sync_ctx) + d3d12va_wait_for_gpu(avctx); + + av_freep(&ctx->ref_resources); + + av_freep(&ctx->ref_subresources); + + for (i = 0; i < ctx->max_num_ref; i++) + D3D12_OBJECT_RELEASE(ctx->buffers[i]); + + av_freep(&ctx->buffers); + + D3D12_OBJECT_RELEASE(ctx->command_list); + + D3D12_OBJECT_RELEASE(ctx->command_queue); + + if (ctx->allocator_queue) { + while (av_fifo_read(ctx->allocator_queue, &allocator, 1) >= 0) { + num_allocator++; + D3D12_OBJECT_RELEASE(allocator.command_allocator); + } + + av_log(avctx, AV_LOG_VERBOSE, "Total number of command allocators reused: %d\n", num_allocator); + } + + av_fifo_freep2(&ctx->allocator_queue); + + av_d3d12va_sync_context_free(&ctx->sync_ctx); + + D3D12_OBJECT_RELEASE(ctx->decoder_heap); + + av_buffer_unref(&ctx->decoder_ref); + + return 0; +} + +static ID3D12Resource *get_surface(const AVFrame *frame) +{ + return (ID3D12Resource *)frame->data[0]; +} + +intptr_t ff_d3d12va_get_surface_index(AVCodecContext *ctx, const AVFrame* frame) +{ + return (intptr_t)frame->data[1]; +} + +static AVD3D12VASyncContext *d3d12va_get_sync_context(const AVFrame *frame) +{ + return (AVD3D12VASyncContext *)frame->data[2]; +} + +static int d3d12va_begin_update_reference_frames(AVCodecContext *avctx, D3D12_RESOURCE_BARRIER *barriers, int index) +{ + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; + + int num_barrier = 0; + + for (int i = 0; i < ctx->max_num_ref; i++) { + if (ctx->ref_resources[i] && ctx->ref_resources[i] != frames_hwctx->texture_infos[index].texture) { + barriers[num_barrier].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barriers[num_barrier].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barriers[num_barrier].Transition = (D3D12_RESOURCE_TRANSITION_BARRIER){ + .pResource = ctx->ref_resources[i], + .Subresource = 0, + .StateBefore = D3D12_RESOURCE_STATE_COMMON, + .StateAfter = D3D12_RESOURCE_STATE_VIDEO_DECODE_READ, + }; + num_barrier++; + } + } + + return num_barrier; +} + +static void d3d12va_end_update_reference_frames(AVCodecContext *avctx, D3D12_RESOURCE_BARRIER *barriers, int index) +{ + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; + int num_barrier = 0; + + for (int i = 0; i < ctx->max_num_ref; i++) { + if (ctx->ref_resources[i] && ctx->ref_resources[i] != frames_hwctx->texture_infos[index].texture) { + barriers[num_barrier].Transition.pResource = ctx->ref_resources[i]; + barriers[num_barrier].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barriers[num_barrier].Transition.StateBefore = D3D12_RESOURCE_STATE_VIDEO_DECODE_READ; + barriers[num_barrier].Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; + num_barrier++; + } + } +} + +int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame, + const void *pp, unsigned pp_size, + const void *qm, unsigned qm_size, + int(*update_input_arguments)(AVCodecContext *, D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *, ID3D12Resource *)) +{ + int ret; + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; + ID3D12CommandAllocator *command_allocator = NULL; + + ID3D12Resource *resource = get_surface(frame); + UINT index = ff_d3d12va_get_surface_index(avctx, frame); + AVD3D12VASyncContext *sync_ctx = d3d12va_get_sync_context(frame); + + ID3D12VideoDecodeCommandList *cmd_list = ctx->command_list; + D3D12_RESOURCE_BARRIER barriers[D3D12VA_MAX_SURFACES] = { 0 }; + + D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS input_args = { + .NumFrameArguments = 2, + .FrameArguments = { + [0] = { + .Type = D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS, + .Size = pp_size, + .pData = (void *)pp, + }, + [1] = { + .Type = D3D12_VIDEO_DECODE_ARGUMENT_TYPE_INVERSE_QUANTIZATION_MATRIX, + .Size = qm_size, + .pData = (void *)qm, + }, + }, + .pHeap = ctx->decoder_heap, + }; + + D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS output_args = { + .ConversionArguments = 0, + .OutputSubresource = 0, + .pOutputTexture2D = resource, + }; + + UINT num_barrier = 1; + barriers[0] = (D3D12_RESOURCE_BARRIER) { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = resource, + .Subresource = 0, + .StateBefore = D3D12_RESOURCE_STATE_COMMON, + .StateAfter = D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, + }, + }; + + memset(ctx->ref_resources, 0, sizeof(ID3D12Resource *) * ctx->max_num_ref); + memset(ctx->ref_subresources, 0, sizeof(UINT) * ctx->max_num_ref); + input_args.ReferenceFrames.NumTexture2Ds = ctx->max_num_ref; + input_args.ReferenceFrames.ppTexture2Ds = ctx->ref_resources; + input_args.ReferenceFrames.pSubresources = ctx->ref_subresources; + + av_d3d12va_wait_idle(sync_ctx); + + if (!qm) + input_args.NumFrameArguments = 1; + + ret = update_input_arguments(avctx, &input_args, ctx->buffers[index]); + if (ret < 0) + return ret; + + ret = d3d12va_get_valid_command_allocator(avctx, &command_allocator); + if (ret < 0) + goto fail; + + DX_CHECK(ID3D12CommandAllocator_Reset(command_allocator)); + + DX_CHECK(ID3D12VideoDecodeCommandList_Reset(cmd_list, command_allocator)); + + num_barrier += d3d12va_begin_update_reference_frames(avctx, &barriers[1], index); + + ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, num_barrier, barriers); + + ID3D12VideoDecodeCommandList_DecodeFrame(cmd_list, ctx->decoder, &output_args, &input_args); + + barriers[0].Transition.StateBefore = barriers[0].Transition.StateAfter; + barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; + d3d12va_end_update_reference_frames(avctx, &barriers[1], index); + + ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, num_barrier, barriers); + + DX_CHECK(ID3D12VideoDecodeCommandList_Close(cmd_list)); + + ID3D12CommandQueue_ExecuteCommandLists(ctx->command_queue, 1, (ID3D12CommandList **)&ctx->command_list); + + DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, sync_ctx->fence, ++sync_ctx->fence_value)); + + DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, ctx->sync_ctx->fence, ++ctx->sync_ctx->fence_value)); + + ret = d3d12va_discard_command_allocator(avctx, command_allocator, ctx->sync_ctx->fence_value); + if (ret < 0) + return ret; + + if (ctx->device_ctx->sync) { + ret = av_d3d12va_wait_idle(ctx->sync_ctx); + if (ret < 0) + return ret; + } + + return 0; + +fail: + if (command_allocator) + d3d12va_discard_command_allocator(avctx, command_allocator, ctx->sync_ctx->fence_value); + return AVERROR(EINVAL); +} diff --git a/libavcodec/d3d12va.h b/libavcodec/d3d12va.h new file mode 100644 index 0000000000..da3e7b7ab9 --- /dev/null +++ b/libavcodec/d3d12va.h @@ -0,0 +1,184 @@ +/* + * Direct3D 12 HW acceleration video decoder + * + * copyright (c) 2022-2023 Wu Jianhua <toqsxw@outlook.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_D3D12VA_H +#define AVCODEC_D3D12VA_H + +#include "libavutil/fifo.h" +#include "libavutil/hwcontext.h" +#include "libavutil/hwcontext_d3d12va.h" +#include "avcodec.h" +#include "internal.h" + +/** + * @brief This structure is used to provides the necessary configurations and data + * to the FFmpeg Direct3D 12 HWAccel implementation for video decoder. + * + * The application must make it available as AVCodecContext.hwaccel_context. + */ +typedef struct D3D12VADecodeContext { + AVBufferRef *decoder_ref; + + /** + * D3D12 video decoder + */ + ID3D12VideoDecoder *decoder; + + /** + * D3D12 video decoder heap + */ + ID3D12VideoDecoderHeap *decoder_heap; + + /** + * D3D12 configuration used to create the decoder + * + * Specified by decoders + */ + D3D12_VIDEO_DECODE_CONFIGURATION cfg; + + /** + * A cached queue for reusing the D3D12 command allocators + * + * @see https://learn.microsoft.com/en-us/windows/win32/direct3d12/recording-command-lists-and-bundles#id3d12commandallocator + */ + AVFifo *allocator_queue; + + /** + * D3D12 command queue + */ + ID3D12CommandQueue *command_queue; + + /** + * D3D12 video decode command list + */ + ID3D12VideoDecodeCommandList *command_list; + + /** + * The array of buffer resources used to upload compressed bitstream + * + * The buffers.length is the same as D3D12VADecodeContext.max_num_ref + */ + ID3D12Resource **buffers; + + /** + * The array of resources used for reference frames + * + * The ref_resources.length is the same as D3D12VADecodeContext.max_num_ref + */ + ID3D12Resource **ref_resources; + + /** + * The array of subresources used for reference frames + * + * The ref_subresources.length is the same as D3D12VADecodeContext.max_num_ref + */ + UINT *ref_subresources; + + /** + * Maximum number of reference frames + */ + UINT max_num_ref; + + /** + * The sync context used to sync command queue + */ + AVD3D12VASyncContext *sync_ctx; + + /** + * A pointer to AVD3D12VADeviceContext used to create D3D12 objects + */ + AVD3D12VADeviceContext *device_ctx; + + /** + * Pixel format + */ + enum AVPixelFormat pix_fmt; + + /** + * Private to the FFmpeg AVHWAccel implementation + */ + unsigned report_id; +} D3D12VADecodeContext; + +/** + * @} + */ + +#define D3D12VA_DECODE_CONTEXT(avctx) ((D3D12VADecodeContext *)((avctx)->internal->hwaccel_priv_data)) +#define D3D12VA_FRAMES_CONTEXT(avctx) ((AVHWFramesContext *)(avctx)->hw_frames_ctx->data) + +/** + * @brief Get a suitable maximum bitstream size + * + * Creating and destroying a resource on d3d12 needs sync and reallocation, so use this function + * to help allocate a big enough bitstream buffer to avoid recreating resources when decoding. + * + * @return the suitable size + */ +int ff_d3d12va_get_suitable_max_bitstream_size(AVCodecContext *avctx); + +/** + * @brief init D3D12VADecodeContext + * + * @return Error code (ret < 0 if failed) + */ +int ff_d3d12va_decode_init(AVCodecContext *avctx); + +/** + * @brief uninit D3D12VADecodeContext + * + * @return Error code (ret < 0 if failed) + */ +int ff_d3d12va_decode_uninit(AVCodecContext *avctx); + +/** + * @brief d3d12va common frame params + * + * @return Error code (ret < 0 if failed) + */ +int ff_d3d12va_common_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx); + +/** + * @brief d3d12va common end frame + * + * @param avctx codec context + * @param frame current output frame + * @param pp picture parameters + * @param pp_size the size of the picture parameters + * @param qm quantization matrix + * @param qm_size the size of the quantization matrix + * @param callback update decoder-specified input stream arguments + * @return Error code (ret < 0 if failed) + */ +int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame, + const void *pp, unsigned pp_size, + const void *qm, unsigned qm_size, + int(*)(AVCodecContext *, D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *, ID3D12Resource *)); + +/** + * @brief get surface index + * + * @return index + */ +intptr_t ff_d3d12va_get_surface_index(AVCodecContext *avctx, const AVFrame *frame); + +#endif /* AVCODEC_D3D12VA_DEC_H */ diff --git a/libavcodec/d3d12va_h264.c b/libavcodec/d3d12va_h264.c new file mode 100644 index 0000000000..0810a034b4 --- /dev/null +++ b/libavcodec/d3d12va_h264.c @@ -0,0 +1,210 @@ +/* + * Direct3D 12 h264 HW acceleration + * + * copyright (c) 2022-2023 Wu Jianhua <toqsxw@outlook.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config_components.h" +#include "libavutil/avassert.h" +#include "h264dec.h" +#include "h264data.h" +#include "h264_ps.h" +#include "mpegutils.h" +#include "dxva2_internal.h" +#include "d3d12va.h" +#include "libavutil/hwcontext_d3d12va_internal.h" +#include <dxva.h> + +typedef struct H264DecodePictureContext { + DXVA_PicParams_H264 pp; + DXVA_Qmatrix_H264 qm; + unsigned slice_count; + DXVA_Slice_H264_Short slice_short[MAX_SLICES]; + const uint8_t *bitstream; + unsigned bitstream_size; +} H264DecodePictureContext; + +static void fill_slice_short(DXVA_Slice_H264_Short *slice, + unsigned position, unsigned size) +{ + memset(slice, 0, sizeof(*slice)); + slice->BSNALunitDataLocation = position; + slice->SliceBytesInBuffer = size; + slice->wBadSliceChopping = 0; +} + +static int d3d12va_h264_start_frame(AVCodecContext *avctx, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + const H264Context *h = avctx->priv_data; + H264DecodePictureContext *ctx_pic = h->cur_pic_ptr->hwaccel_picture_private; + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + + if (!ctx) + return -1; + + assert(ctx_pic); + + ff_dxva2_h264_fill_picture_parameters(avctx, (AVDXVAContext *)ctx, &ctx_pic->pp); + + ff_dxva2_h264_fill_scaling_lists(avctx, (AVDXVAContext *)ctx, &ctx_pic->qm); + + ctx_pic->slice_count = 0; + ctx_pic->bitstream_size = 0; + ctx_pic->bitstream = NULL; + + return 0; +} + +static int d3d12va_h264_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size) +{ + unsigned position; + const H264Context *h = avctx->priv_data; + const H264SliceContext *sl = &h->slice_ctx[0]; + const H264Picture *current_picture = h->cur_pic_ptr; + H264DecodePictureContext *ctx_pic = current_picture->hwaccel_picture_private; + + if (ctx_pic->slice_count >= MAX_SLICES) + return AVERROR(ERANGE); + + if (!ctx_pic->bitstream) + ctx_pic->bitstream = buffer; + ctx_pic->bitstream_size += size; + + position = buffer - ctx_pic->bitstream; + fill_slice_short(&ctx_pic->slice_short[ctx_pic->slice_count], position, size); + ctx_pic->slice_count++; + + if (sl->slice_type != AV_PICTURE_TYPE_I && sl->slice_type != AV_PICTURE_TYPE_SI) + ctx_pic->pp.wBitFields &= ~(1 << 15); /* Set IntraPicFlag to 0 */ + + return 0; +} + +#define START_CODE 65536 +#define START_CODE_SIZE 3 +static int update_input_arguments(AVCodecContext *avctx, D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *input_args, ID3D12Resource *buffer) +{ + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; + + const H264Context *h = avctx->priv_data; + const H264Picture *current_picture = h->cur_pic_ptr; + H264DecodePictureContext *ctx_pic = current_picture->hwaccel_picture_private; + + int i, index; + uint8_t *mapped_data, *mapped_ptr; + DXVA_Slice_H264_Short *slice; + D3D12_VIDEO_DECODE_FRAME_ARGUMENT *args; + + if (FAILED(ID3D12Resource_Map(buffer, 0, NULL, &mapped_data))) { + av_log(avctx, AV_LOG_ERROR, "Failed to map D3D12 Buffer resource!\n"); + return AVERROR(EINVAL); + } + + mapped_ptr = mapped_data; + for (i = 0; i < ctx_pic->slice_count; i++) { + UINT position, size; + slice = &ctx_pic->slice_short[i]; + + position = slice->BSNALunitDataLocation; + size = slice->SliceBytesInBuffer; + + slice->SliceBytesInBuffer += START_CODE_SIZE; + slice->BSNALunitDataLocation = mapped_ptr - mapped_data; + + *(uint32_t *)mapped_ptr = START_CODE; + mapped_ptr += START_CODE_SIZE; + + memcpy(mapped_ptr, &ctx_pic->bitstream[position], size); + mapped_ptr += size; + } + + ID3D12Resource_Unmap(buffer, 0, NULL); + + input_args->CompressedBitstream = (D3D12_VIDEO_DECODE_COMPRESSED_BITSTREAM){ + .pBuffer = buffer, + .Offset = 0, + .Size = mapped_ptr - mapped_data, + }; + + args = &input_args->FrameArguments[input_args->NumFrameArguments++]; + args->Type = D3D12_VIDEO_DECODE_ARGUMENT_TYPE_SLICE_CONTROL; + args->Size = sizeof(DXVA_Slice_H264_Short) * ctx_pic->slice_count; + args->pData = ctx_pic->slice_short; + + index = ctx_pic->pp.CurrPic.Index7Bits; + ctx->ref_resources[index] = frames_hwctx->texture_infos[index].texture; + for (i = 0; i < FF_ARRAY_ELEMS(ctx_pic->pp.RefFrameList); i++) { + index = ctx_pic->pp.RefFrameList[i].Index7Bits; + if (index != 0x7f) + ctx->ref_resources[index] = frames_hwctx->texture_infos[index].texture; + } + + return 0; +} + +static int d3d12va_h264_end_frame(AVCodecContext *avctx) +{ + H264Context *h = avctx->priv_data; + H264DecodePictureContext *ctx_pic = h->cur_pic_ptr->hwaccel_picture_private; + H264SliceContext *sl = &h->slice_ctx[0]; + + int ret; + + if (ctx_pic->slice_count <= 0 || ctx_pic->bitstream_size <= 0) + return -1; + + ret = ff_d3d12va_common_end_frame(avctx, h->cur_pic_ptr->f, + &ctx_pic->pp, sizeof(ctx_pic->pp), + &ctx_pic->qm, sizeof(ctx_pic->qm), + update_input_arguments); + if (!ret) + ff_h264_draw_horiz_band(h, sl, 0, h->avctx->height); + + return ret; +} + +static int d3d12va_h264_decode_init(AVCodecContext *avctx) +{ + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + + ctx->cfg.DecodeProfile = D3D12_VIDEO_DECODE_PROFILE_H264; + + return ff_d3d12va_decode_init(avctx); +} + +#if CONFIG_H264_D3D12VA_HWACCEL +const AVHWAccel ff_h264_d3d12va_hwaccel = { + .name = "h264_d3d12va", + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_H264, + .pix_fmt = AV_PIX_FMT_D3D12, + .init = d3d12va_h264_decode_init, + .uninit = ff_d3d12va_decode_uninit, + .start_frame = d3d12va_h264_start_frame, + .decode_slice = d3d12va_h264_decode_slice, + .end_frame = d3d12va_h264_end_frame, + .frame_params = ff_d3d12va_common_frame_params, + .frame_priv_data_size = sizeof(H264DecodePictureContext), + .priv_data_size = sizeof(D3D12VADecodeContext), +}; +#endif diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c index 568d686f39..b22ea3e8f2 100644 --- a/libavcodec/dxva2.c +++ b/libavcodec/dxva2.c @@ -774,6 +774,10 @@ unsigned ff_dxva2_get_surface_index(const AVCodecContext *avctx, void *surface = get_surface(avctx, frame); unsigned i; +#if CONFIG_D3D12VA + if (avctx->pix_fmt == AV_PIX_FMT_D3D12) + return (intptr_t)frame->data[1]; +#endif #if CONFIG_D3D11VA if (avctx->pix_fmt == AV_PIX_FMT_D3D11) return (intptr_t)frame->data[1]; @@ -1056,3 +1060,23 @@ int ff_dxva2_is_d3d11(const AVCodecContext *avctx) else return 0; } + +unsigned *ff_dxva2_get_report_id(const AVCodecContext *avctx, AVDXVAContext *ctx) +{ + unsigned *report_id = NULL; + +#if CONFIG_D3D12VA + if (avctx->pix_fmt == AV_PIX_FMT_D3D12) + report_id = &ctx->d3d12va.report_id; +#endif +#if CONFIG_D3D11VA + if (ff_dxva2_is_d3d11(avctx)) + report_id = &ctx->d3d11va.report_id; +#endif +#if CONFIG_DXVA2 + if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) + report_id = &ctx->dxva2.report_id; +#endif + + return report_id; +} diff --git a/libavcodec/dxva2.h b/libavcodec/dxva2.h index 22c93992f2..bdec6112e9 100644 --- a/libavcodec/dxva2.h +++ b/libavcodec/dxva2.h @@ -45,9 +45,6 @@ * @{ */ -#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work around for DXVA2 and old UVD/UVD+ ATI video cards -#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 ///< Work around for DXVA2 and old Intel GPUs with ClearVideo interface - /** * This structure is used to provides the necessary configurations and data * to the DXVA2 FFmpeg HWAccel implementation. diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c index 6300b1418d..7a076ea981 100644 --- a/libavcodec/dxva2_h264.c +++ b/libavcodec/dxva2_h264.c @@ -47,9 +47,10 @@ static void fill_picture_entry(DXVA_PicEntry_H264 *pic, pic->bPicEntry = index | (flag << 7); } -static void fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *ctx, const H264Context *h, +void ff_dxva2_h264_fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *ctx, DXVA_PicParams_H264 *pp) { + const H264Context *h = avctx->priv_data; const H264Picture *current_picture = h->cur_pic_ptr; const SPS *sps = h->ps.sps; const PPS *pps = h->ps.pps; @@ -163,9 +164,10 @@ static void fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext * //pp->SliceGroupMap[810]; /* XXX not implemented by FFmpeg */ } -static void fill_scaling_lists(const AVCodecContext *avctx, AVDXVAContext *ctx, const H264Context *h, DXVA_Qmatrix_H264 *qm) +void ff_dxva2_h264_fill_scaling_lists(const AVCodecContext *avctx, AVDXVAContext *ctx, DXVA_Qmatrix_H264 *qm) { - const PPS *pps = h->ps.pps; + const H264Context *h = avctx->priv_data; + const PPS *pps = h->ps.pps; unsigned i, j; memset(qm, 0, sizeof(*qm)); if (DXVA_CONTEXT_WORKAROUND(avctx, ctx) & FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG) { @@ -453,10 +455,10 @@ static int dxva2_h264_start_frame(AVCodecContext *avctx, assert(ctx_pic); /* Fill up DXVA_PicParams_H264 */ - fill_picture_parameters(avctx, ctx, h, &ctx_pic->pp); + ff_dxva2_h264_fill_picture_parameters(avctx, ctx, &ctx_pic->pp); /* Fill up DXVA_Qmatrix_H264 */ - fill_scaling_lists(avctx, ctx, h, &ctx_pic->qm); + ff_dxva2_h264_fill_scaling_lists(avctx, ctx, &ctx_pic->qm); ctx_pic->slice_count = 0; ctx_pic->bitstream_size = 0; diff --git a/libavcodec/dxva2_internal.h b/libavcodec/dxva2_internal.h index b822af59cd..a9a1fc090e 100644 --- a/libavcodec/dxva2_internal.h +++ b/libavcodec/dxva2_internal.h @@ -26,18 +26,34 @@ #define COBJMACROS #include "config.h" +#include "config_components.h" /* define the proper COM entries before forcing desktop APIs */ #include <objbase.h> +#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work around for DXVA2/Direct3D11 and old UVD/UVD+ ATI video cards +#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 ///< Work around for DXVA2/Direct3D11 and old Intel GPUs with ClearVideo interface + #if CONFIG_DXVA2 #include "dxva2.h" #include "libavutil/hwcontext_dxva2.h" +#define DXVA2_VAR(ctx, var) ctx->dxva2.var +#else +#define DXVA2_VAR(ctx, var) 0 #endif + #if CONFIG_D3D11VA #include "d3d11va.h" #include "libavutil/hwcontext_d3d11va.h" +#define D3D11VA_VAR(ctx, var) ctx->d3d11va.var +#else +#define D3D11VA_VAR(ctx, var) 0 +#endif + +#if CONFIG_D3D12VA +#include "d3d12va.h" #endif + #if HAVE_DXVA_H /* When targeting WINAPI_FAMILY_PHONE_APP or WINAPI_FAMILY_APP, dxva.h * defines nothing. Force the struct definitions to be visible. */ @@ -62,6 +78,9 @@ typedef union { #if CONFIG_DXVA2 struct dxva_context dxva2; #endif +#if CONFIG_D3D12VA + struct D3D12VADecodeContext d3d12va; +#endif } AVDXVAContext; typedef struct FFDXVASharedContext { @@ -101,39 +120,19 @@ typedef struct FFDXVASharedContext { #define D3D11VA_CONTEXT(ctx) (&ctx->d3d11va) #define DXVA2_CONTEXT(ctx) (&ctx->dxva2) -#if CONFIG_D3D11VA && CONFIG_DXVA2 -#define DXVA_CONTEXT_WORKAROUND(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.workaround : ctx->dxva2.workaround) -#define DXVA_CONTEXT_COUNT(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.surface_count : ctx->dxva2.surface_count) -#define DXVA_CONTEXT_DECODER(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? (void *)ctx->d3d11va.decoder : (void *)ctx->dxva2.decoder) -#define DXVA_CONTEXT_REPORT_ID(avctx, ctx) (*(ff_dxva2_is_d3d11(avctx) ? &ctx->d3d11va.report_id : &ctx->dxva2.report_id)) -#define DXVA_CONTEXT_CFG(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? (void *)ctx->d3d11va.cfg : (void *)ctx->dxva2.cfg) -#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.cfg->ConfigBitstreamRaw : ctx->dxva2.cfg->ConfigBitstreamRaw) -#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.cfg->ConfigIntraResidUnsigned : ctx->dxva2.cfg->ConfigIntraResidUnsigned) -#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.cfg->ConfigResidDiffAccelerator : ctx->dxva2.cfg->ConfigResidDiffAccelerator) +#define DXVA2_CONTEXT_VAR(avctx, ctx, var) (avctx->pix_fmt == AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? D3D11VA_VAR(ctx, var) : DXVA2_VAR(ctx, var))) + +#define DXVA_CONTEXT_REPORT_ID(avctx, ctx) (*ff_dxva2_get_report_id(avctx, ctx)) +#define DXVA_CONTEXT_WORKAROUND(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx, workaround) +#define DXVA_CONTEXT_COUNT(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx, surface_count) +#define DXVA_CONTEXT_DECODER(avctx, ctx) (avctx->pix_fmt == AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? (void *)D3D11VA_VAR(ctx, decoder) : (void *)DXVA2_VAR(ctx, decoder))) +#define DXVA_CONTEXT_CFG(avctx, ctx) (avctx->pix_fmt == AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? (void *)D3D11VA_VAR(ctx, cfg) : (void *)DXVA2_VAR(ctx, cfg))) +#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx, cfg->ConfigBitstreamRaw) +#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx, cfg->ConfigIntraResidUnsigned) +#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx, cfg->ConfigResidDiffAccelerator) #define DXVA_CONTEXT_VALID(avctx, ctx) (DXVA_CONTEXT_DECODER(avctx, ctx) && \ DXVA_CONTEXT_CFG(avctx, ctx) && \ - (ff_dxva2_is_d3d11(avctx) || ctx->dxva2.surface_count)) -#elif CONFIG_DXVA2 -#define DXVA_CONTEXT_WORKAROUND(avctx, ctx) (ctx->dxva2.workaround) -#define DXVA_CONTEXT_COUNT(avctx, ctx) (ctx->dxva2.surface_count) -#define DXVA_CONTEXT_DECODER(avctx, ctx) (ctx->dxva2.decoder) -#define DXVA_CONTEXT_REPORT_ID(avctx, ctx) (*(&ctx->dxva2.report_id)) -#define DXVA_CONTEXT_CFG(avctx, ctx) (ctx->dxva2.cfg) -#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx) (ctx->dxva2.cfg->ConfigBitstreamRaw) -#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ctx->dxva2.cfg->ConfigIntraResidUnsigned) -#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ctx->dxva2.cfg->ConfigResidDiffAccelerator) -#define DXVA_CONTEXT_VALID(avctx, ctx) (ctx->dxva2.decoder && ctx->dxva2.cfg && ctx->dxva2.surface_count) -#elif CONFIG_D3D11VA -#define DXVA_CONTEXT_WORKAROUND(avctx, ctx) (ctx->d3d11va.workaround) -#define DXVA_CONTEXT_COUNT(avctx, ctx) (ctx->d3d11va.surface_count) -#define DXVA_CONTEXT_DECODER(avctx, ctx) (ctx->d3d11va.decoder) -#define DXVA_CONTEXT_REPORT_ID(avctx, ctx) (*(&ctx->d3d11va.report_id)) -#define DXVA_CONTEXT_CFG(avctx, ctx) (ctx->d3d11va.cfg) -#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx) (ctx->d3d11va.cfg->ConfigBitstreamRaw) -#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ctx->d3d11va.cfg->ConfigIntraResidUnsigned) -#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ctx->d3d11va.cfg->ConfigResidDiffAccelerator) -#define DXVA_CONTEXT_VALID(avctx, ctx) (ctx->d3d11va.decoder && ctx->d3d11va.cfg) -#endif + (ff_dxva2_is_d3d11(avctx) || DXVA2_VAR(ctx, surface_count))) unsigned ff_dxva2_get_surface_index(const AVCodecContext *avctx, const AVDXVAContext *, @@ -161,4 +160,10 @@ int ff_dxva2_common_frame_params(AVCodecContext *avctx, int ff_dxva2_is_d3d11(const AVCodecContext *avctx); +unsigned *ff_dxva2_get_report_id(const AVCodecContext *avctx, AVDXVAContext *ctx); + +void ff_dxva2_h264_fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *ctx, DXVA_PicParams_H264 *pp); + +void ff_dxva2_h264_fill_scaling_lists(const AVCodecContext *avctx, AVDXVAContext *ctx, DXVA_Qmatrix_H264 *qm); + #endif /* AVCODEC_DXVA2_INTERNAL_H */ diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c index 41bf30eefc..df70ad8a2f 100644 --- a/libavcodec/h264_slice.c +++ b/libavcodec/h264_slice.c @@ -778,6 +778,7 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback) { #define HWACCEL_MAX (CONFIG_H264_DXVA2_HWACCEL + \ (CONFIG_H264_D3D11VA_HWACCEL * 2) + \ + CONFIG_H264_D3D12VA_HWACCEL + \ CONFIG_H264_NVDEC_HWACCEL + \ CONFIG_H264_VAAPI_HWACCEL + \ CONFIG_H264_VIDEOTOOLBOX_HWACCEL + \ @@ -883,6 +884,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback) *fmt++ = AV_PIX_FMT_D3D11VA_VLD; *fmt++ = AV_PIX_FMT_D3D11; #endif +#if CONFIG_H264_D3D12VA_HWACCEL + *fmt++ = AV_PIX_FMT_D3D12; +#endif #if CONFIG_H264_VAAPI_HWACCEL *fmt++ = AV_PIX_FMT_VAAPI; #endif diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c index 19f8dba131..853d3262f7 100644 --- a/libavcodec/h264dec.c +++ b/libavcodec/h264dec.c @@ -1089,6 +1089,9 @@ const FFCodec ff_h264_decoder = { #if CONFIG_H264_D3D11VA2_HWACCEL HWACCEL_D3D11VA2(h264), #endif +#if CONFIG_H264_D3D12VA_HWACCEL + HWACCEL_D3D12VA(h264), +#endif #if CONFIG_H264_NVDEC_HWACCEL HWACCEL_NVDEC(h264), #endif diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h index 48dfc17f72..be54604b81 100644 --- a/libavcodec/hwaccels.h +++ b/libavcodec/hwaccels.h @@ -32,6 +32,7 @@ extern const AVHWAccel ff_h263_vaapi_hwaccel; extern const AVHWAccel ff_h263_videotoolbox_hwaccel; extern const AVHWAccel ff_h264_d3d11va_hwaccel; extern const AVHWAccel ff_h264_d3d11va2_hwaccel; +extern const AVHWAccel ff_h264_d3d12va_hwaccel; extern const AVHWAccel ff_h264_dxva2_hwaccel; extern const AVHWAccel ff_h264_nvdec_hwaccel; extern const AVHWAccel ff_h264_vaapi_hwaccel; diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h index e8c6186151..e20118c096 100644 --- a/libavcodec/hwconfig.h +++ b/libavcodec/hwconfig.h @@ -82,6 +82,8 @@ void ff_hwaccel_uninit(AVCodecContext *avctx); HW_CONFIG_HWACCEL(1, 1, 1, VULKAN, VULKAN, ff_ ## codec ## _vulkan_hwaccel) #define HWACCEL_D3D11VA(codec) \ HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD, NONE, ff_ ## codec ## _d3d11va_hwaccel) +#define HWACCEL_D3D12VA(codec) \ + HW_CONFIG_HWACCEL(1, 1, 0, D3D12, D3D12VA, ff_ ## codec ## _d3d12va_hwaccel) #define HW_CONFIG_ENCODER(device, frames, ad_hoc, format, device_type_) \ &(const AVCodecHWConfigInternal) { \