[FFmpeg-devel,v10,2/9] avcodec: add D3D12VA hardware accelerated H264 decoding

Message ID	20231202101250.1410-2-tong1.wu@intel.com
State	New
Headers	show Delivered-To: ffmpegpatchwork2@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; From: Tong Wu <tong1.wu-at-intel.com@ffmpeg.org> To: ffmpeg-devel@ffmpeg.org Date: Sat, 2 Dec 2023 18:12:42 +0800 Message-ID: <20231202101250.1410-2-tong1.wu@intel.com> In-Reply-To: <20231202101250.1410-1-tong1.wu@intel.com> References: <20231202101250.1410-1-tong1.wu@intel.com> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH v10 2/9] avcodec: add D3D12VA hardware accelerated H264 decoding Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Cc: Tong Wu <tong1.wu@intel.com>, Wu Jianhua <toqsxw@outlook.com> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Series	[FFmpeg-devel,v10,1/9] libavutil: add hwcontext_d3d12va and AV_PIX_FMT_D3D12 \| expand [FFmpeg-devel,v10,1/9] libavutil: add hwcontext_d3d12va and AV_PIX_FMT_D3D12 [FFmpeg-devel,v10,2/9] avcodec: add D3D12VA hardware accelerated H264 decoding [FFmpeg-devel,v10,3/9] avcodec: add D3D12VA hardware accelerated HEVC decoding [FFmpeg-devel,v10,4/9] avcodec: add D3D12VA hardware accelerated VP9 decoding [FFmpeg-devel,v10,5/9] avcodec: add D3D12VA hardware accelerated AV1 decoding [FFmpeg-devel,v10,6/9] avcodec: add D3D12VA hardware accelerated MPEG-2 decoding [FFmpeg-devel,v10,7/9] avcodec: add D3D12VA hardware accelerated VC1 decoding [FFmpeg-devel,v10,8/9] Changelog: D3D12VA hardware accelerated H264, HEVC, VP9, AV1, MPEG-2 and VC1… [FFmpeg-devel,v10,9/9] avcodec/d3d12va_hevc: enable allow_profile_mismatch flag for d3d12va msp pro…

Context	Check	Description
andriy/make_x86	success	Make finished
andriy/make_fate_x86	success	Make fate finished

diff --git a/configure b/configure index cdeed9bab1..8709ec2620 100755 --- a/configure +++ b/configure @@ -3082,6 +3082,8 @@ h264_d3d11va_hwaccel_deps="d3d11va" h264_d3d11va_hwaccel_select="h264_decoder" h264_d3d11va2_hwaccel_deps="d3d11va" h264_d3d11va2_hwaccel_select="h264_decoder" +h264_d3d12va_hwaccel_deps="d3d12va" +h264_d3d12va_hwaccel_select="h264_decoder" h264_dxva2_hwaccel_deps="dxva2" h264_dxva2_hwaccel_select="h264_decoder" h264_nvdec_hwaccel_deps="nvdec" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 748806e702..80a8cf5f57 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -986,6 +986,7 @@ OBJS-$(CONFIG_ADPCM_ZORK_DECODER) += adpcm.o adpcm_data.o # hardware accelerators OBJS-$(CONFIG_D3D11VA) += dxva2.o +OBJS-$(CONFIG_D3D12VA) += dxva2.o d3d12va_decode.o OBJS-$(CONFIG_DXVA2) += dxva2.o OBJS-$(CONFIG_NVDEC) += nvdec.o OBJS-$(CONFIG_VAAPI) += vaapi_decode.o @@ -1003,6 +1004,7 @@ OBJS-$(CONFIG_H263_VAAPI_HWACCEL) += vaapi_mpeg4.o OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o OBJS-$(CONFIG_H264_D3D11VA_HWACCEL) += dxva2_h264.o OBJS-$(CONFIG_H264_DXVA2_HWACCEL) += dxva2_h264.o +OBJS-$(CONFIG_H264_D3D12VA_HWACCEL) += dxva2_h264.o d3d12va_h264.o OBJS-$(CONFIG_H264_NVDEC_HWACCEL) += nvdec_h264.o OBJS-$(CONFIG_H264_QSV_HWACCEL) += qsvdec.o OBJS-$(CONFIG_H264_VAAPI_HWACCEL) += vaapi_h264.o @@ -1296,6 +1298,7 @@ SKIPHEADERS += %_tablegen.h \ SKIPHEADERS-$(CONFIG_AMF) += amfenc.h SKIPHEADERS-$(CONFIG_D3D11VA) += d3d11va.h dxva2_internal.h +SKIPHEADERS-$(CONFIG_D3D12VA) += d3d12va_decode.h SKIPHEADERS-$(CONFIG_DXVA2) += dxva2.h dxva2_internal.h SKIPHEADERS-$(CONFIG_JNI) += ffjni.h SKIPHEADERS-$(CONFIG_LCMS2) += fflcms2.h diff --git a/libavcodec/d3d11va.h b/libavcodec/d3d11va.h index 6816b6c1e6..27f40e5519 100644 --- a/libavcodec/d3d11va.h +++ b/libavcodec/d3d11va.h @@ -45,9 +45,6 @@ * @{ */ -#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work around for Direct3D11 and old UVD/UVD+ ATI video cards -#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 ///< Work around for Direct3D11 and old Intel GPUs with ClearVideo interface - /** * This structure is used to provides the necessary configurations and data * to the Direct3D11 FFmpeg HWAccel implementation. diff --git a/libavcodec/d3d12va_decode.c b/libavcodec/d3d12va_decode.c new file mode 100644 index 0000000000..c8228fdaef --- /dev/null +++ b/libavcodec/d3d12va_decode.c @@ -0,0 +1,564 @@ +/* + * Direct3D 12 HW acceleration video decoder + * + * copyright (c) 2022-2023 Wu Jianhua <toqsxw@outlook.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <assert.h> +#include <string.h> +#include <initguid.h> + +#include "libavutil/common.h" +#include "libavutil/log.h" +#include "libavutil/time.h" +#include "libavutil/imgutils.h" +#include "libavutil/hwcontext_d3d12va_internal.h" +#include "libavutil/hwcontext_d3d12va.h" +#include "avcodec.h" +#include "decode.h" +#include "d3d12va_decode.h" + +typedef struct HelperObjects { + ID3D12CommandAllocator *command_allocator; + ID3D12Resource *buffer; + uint64_t fence_value; +} HelperObjects; + +int ff_d3d12va_get_suitable_max_bitstream_size(AVCodecContext *avctx) +{ + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); + return av_image_get_buffer_size(frames_ctx->sw_format, avctx->coded_width, avctx->coded_height, 1); +} + +unsigned ff_d3d12va_get_surface_index(const AVCodecContext *avctx, + D3D12VADecodeContext *ctx, const AVFrame *frame, + int curr) +{ + AVD3D12VAFrame *f; + ID3D12Resource *res; + unsigned i; + + f = (AVD3D12VAFrame *)frame->data[0]; + if (!f) + goto fail; + + res = f->texture; + if (!res) + goto fail; + + if (!curr) { + for (i = 0; i < ctx->max_num_ref; i++) { + if (ctx->ref_resources[i] && res == ctx->ref_resources[i]) { + ctx->used_mask |= 1 << i; + return i; + } + } + } else { + for (i = 0; i < ctx->max_num_ref; i++) { + if (!((ctx->used_mask >> i) & 0x1)) { + ctx->ref_resources[i] = res; + return i; + } + } + } + +fail: + assert(0); + return 0; +} + +static int d3d12va_get_valid_helper_objects(AVCodecContext *avctx, ID3D12CommandAllocator **ppAllocator, + ID3D12Resource **ppBuffer) +{ + HRESULT hr; + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + HelperObjects obj = { 0 }; + D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD }; + + D3D12_RESOURCE_DESC desc = { + .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, + .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, + .Width = ctx->bitstream_size, + .Height = 1, + .DepthOrArraySize = 1, + .MipLevels = 1, + .Format = DXGI_FORMAT_UNKNOWN, + .SampleDesc = { .Count = 1, .Quality = 0 }, + .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + .Flags = D3D12_RESOURCE_FLAG_NONE, + }; + + if (av_fifo_peek(ctx->objects_queue, &obj, 1, 0) >= 0) { + uint64_t completion = ID3D12Fence_GetCompletedValue(ctx->sync_ctx.fence); + if (completion >= obj.fence_value) { + *ppAllocator = obj.command_allocator; + *ppBuffer = obj.buffer; + av_fifo_read(ctx->objects_queue, &obj, 1); + return 0; + } + } + + hr = ID3D12Device_CreateCommandAllocator(ctx->device_ctx->device, D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, + &IID_ID3D12CommandAllocator, (void **)ppAllocator); + if (FAILED(hr)) { + av_log(avctx, AV_LOG_ERROR, "Failed to create a new command allocator!\n"); + return AVERROR(EINVAL); + } + + hr = ID3D12Device_CreateCommittedResource(ctx->device_ctx->device, &heap_props, D3D12_HEAP_FLAG_NONE, + &desc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, + &IID_ID3D12Resource, (void **)ppBuffer); + + if (FAILED(hr)) { + av_log(avctx, AV_LOG_ERROR, "Failed to create a new d3d12 buffer!\n"); + return AVERROR(EINVAL); + } + + return 0; +} + +static int d3d12va_discard_helper_objects(AVCodecContext *avctx, ID3D12CommandAllocator *pAllocator, + ID3D12Resource *pBuffer, uint64_t fence_value) +{ + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + + HelperObjects obj = { + .command_allocator = pAllocator, + .buffer = pBuffer, + .fence_value = fence_value, + }; + + if (av_fifo_write(ctx->objects_queue, &obj, 1) < 0) { + D3D12_OBJECT_RELEASE(pAllocator); + D3D12_OBJECT_RELEASE(pBuffer); + return AVERROR(ENOMEM); + } + + return 0; +} + +static int d3d12va_fence_completion(AVD3D12VASyncContext *psync_ctx) +{ + uint64_t completion = ID3D12Fence_GetCompletedValue(psync_ctx->fence); + if (completion < psync_ctx->fence_value) { + if (FAILED(ID3D12Fence_SetEventOnCompletion(psync_ctx->fence, psync_ctx->fence_value, psync_ctx->event))) + return AVERROR(EINVAL); + + WaitForSingleObjectEx(psync_ctx->event, INFINITE, FALSE); + } + + return 0; +} + +static void bufref_free_interface(void *opaque, uint8_t *data) +{ + D3D12_OBJECT_RELEASE(opaque); +} + +static AVBufferRef *bufref_wrap_interface(IUnknown *iface) +{ + return av_buffer_create((uint8_t*)iface, 1, bufref_free_interface, iface, 0); +} + +static int d3d12va_sync_with_gpu(AVCodecContext *avctx) +{ + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + + DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, ctx->sync_ctx.fence, ++ctx->sync_ctx.fence_value)); + return d3d12va_fence_completion(&ctx->sync_ctx); + +fail: + return AVERROR(EINVAL); +} + +static int d3d12va_create_decoder_heap(AVCodecContext *avctx) +{ + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); + AVD3D12VADeviceContext *device_hwctx = ctx->device_ctx; + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; + + D3D12_VIDEO_DECODER_HEAP_DESC desc = { + .NodeMask = 0, + .Configuration = ctx->cfg, + .DecodeWidth = frames_ctx->width, + .DecodeHeight = frames_ctx->height, + .Format = frames_hwctx->format, + .FrameRate = { avctx->framerate.num, avctx->framerate.den }, + .BitRate = avctx->bit_rate, + .MaxDecodePictureBufferCount = ctx->max_num_ref, + }; + + DX_CHECK(ID3D12VideoDevice_CreateVideoDecoderHeap(device_hwctx->video_device, &desc, + &IID_ID3D12VideoDecoderHeap, (void **)&ctx->decoder_heap)); + + return 0; + +fail: + if (ctx->decoder) { + av_log(avctx, AV_LOG_ERROR, "D3D12 doesn't support decoding frames with an extent " + "[width(%d), height(%d)], on your device!\n", frames_ctx->width, frames_ctx->height); + } + + return AVERROR(EINVAL); +} + +static int d3d12va_create_decoder(AVCodecContext *avctx) +{ + D3D12_VIDEO_DECODER_DESC desc; + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); + AVD3D12VADeviceContext *device_hwctx = ctx->device_ctx; + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; + + D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT feature = { + .NodeIndex = 0, + .Configuration = ctx->cfg, + .Width = frames_ctx->width, + .Height = frames_ctx->height, + .DecodeFormat = frames_hwctx->format, + .FrameRate = { avctx->framerate.num, avctx->framerate.den }, + .BitRate = avctx->bit_rate, + }; + + DX_CHECK(ID3D12VideoDevice_CheckFeatureSupport(device_hwctx->video_device, D3D12_FEATURE_VIDEO_DECODE_SUPPORT, + &feature, sizeof(feature))); + if (!(feature.SupportFlags & D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED) || + !(feature.DecodeTier >= D3D12_VIDEO_DECODE_TIER_2)) { + av_log(avctx, AV_LOG_ERROR, "D3D12 decoder doesn't support on this device\n"); + return AVERROR(EINVAL); + } + + desc = (D3D12_VIDEO_DECODER_DESC) { + .NodeMask = 0, + .Configuration = ctx->cfg, + }; + + DX_CHECK(ID3D12VideoDevice_CreateVideoDecoder(device_hwctx->video_device, &desc, &IID_ID3D12VideoDecoder, + (void **)&ctx->decoder)); + + ctx->decoder_ref = bufref_wrap_interface((IUnknown *)ctx->decoder); + if (!ctx->decoder_ref) + return AVERROR(ENOMEM); + + return 0; + +fail: + return AVERROR(EINVAL); +} + +static inline int d3d12va_get_num_surfaces(enum AVCodecID codec_id) +{ + int num_surfaces = 1; + switch (codec_id) { + case AV_CODEC_ID_H264: + case AV_CODEC_ID_HEVC: + num_surfaces += 16; + break; + + case AV_CODEC_ID_AV1: + num_surfaces += 12; + break; + + case AV_CODEC_ID_VP9: + num_surfaces += 8; + break; + + default: + num_surfaces += 2; + } + + return num_surfaces; +} + +int ff_d3d12va_common_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) +{ + AVHWFramesContext *frames_ctx = (AVHWFramesContext *)hw_frames_ctx->data; + AVHWDeviceContext *device_ctx = frames_ctx->device_ctx; + + frames_ctx->format = AV_PIX_FMT_D3D12; + frames_ctx->sw_format = avctx->sw_pix_fmt == AV_PIX_FMT_YUV420P10 ? AV_PIX_FMT_P010 : AV_PIX_FMT_NV12; + frames_ctx->width = avctx->width; + frames_ctx->height = avctx->height; + + frames_ctx->initial_pool_size = d3d12va_get_num_surfaces(avctx->codec_id); + + return 0; +} + +int ff_d3d12va_decode_init(AVCodecContext *avctx) +{ + int ret; + AVHWFramesContext *frames_ctx; + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + ID3D12Resource *buffer = NULL; + ID3D12CommandAllocator *command_allocator = NULL; + D3D12_COMMAND_QUEUE_DESC queue_desc = { + .Type = D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, + .Priority = 0, + .Flags = D3D12_COMMAND_QUEUE_FLAG_NONE, + .NodeMask = 0, + }; + + ctx->pix_fmt = avctx->hwaccel->pix_fmt; + + ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_D3D12VA); + if (ret < 0) + return ret; + + frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); + ctx->device_ctx = (AVD3D12VADeviceContext *)frames_ctx->device_ctx->hwctx; + + if (frames_ctx->format != ctx->pix_fmt) { + av_log(avctx, AV_LOG_ERROR, "Invalid pixfmt for hwaccel!\n"); + goto fail; + } + + ret = d3d12va_create_decoder(avctx); + if (ret < 0) + goto fail; + + ret = d3d12va_create_decoder_heap(avctx); + if (ret < 0) + goto fail; + + ctx->bitstream_size = ff_d3d12va_get_suitable_max_bitstream_size(avctx); + + ctx->ref_resources = av_calloc(sizeof(ID3D12Resource *), ctx->max_num_ref); + if (!ctx->ref_resources) + return AVERROR(ENOMEM); + + ctx->ref_subresources = av_calloc(sizeof(UINT), ctx->max_num_ref); + if (!ctx->ref_subresources) + return AVERROR(ENOMEM); + + ctx->objects_queue = av_fifo_alloc2(D3D12VA_VIDEO_DEC_ASYNC_DEPTH, + sizeof(HelperObjects), AV_FIFO_FLAG_AUTO_GROW); + if (!ctx->objects_queue) + return AVERROR(ENOMEM); + + + DX_CHECK(ID3D12Device_CreateFence(ctx->device_ctx->device, ctx->sync_ctx.fence_value, D3D12_FENCE_FLAG_NONE, + &IID_ID3D12Fence, (void **)&ctx->sync_ctx.fence)); + + ctx->sync_ctx.event = CreateEvent(NULL, FALSE, FALSE, NULL); + if (!ctx->sync_ctx.event) + goto fail; + + ret = d3d12va_get_valid_helper_objects(avctx, &command_allocator, &buffer); + if (ret < 0) + goto fail; + + DX_CHECK(ID3D12Device_CreateCommandQueue(ctx->device_ctx->device, &queue_desc, + &IID_ID3D12CommandQueue, (void **)&ctx->command_queue)); + + DX_CHECK(ID3D12Device_CreateCommandList(ctx->device_ctx->device, 0, queue_desc.Type, + command_allocator, NULL, &IID_ID3D12CommandList, (void **)&ctx->command_list)); + + DX_CHECK(ID3D12VideoDecodeCommandList_Close(ctx->command_list)); + + ID3D12CommandQueue_ExecuteCommandLists(ctx->command_queue, 1, (ID3D12CommandList **)&ctx->command_list); + + ret = d3d12va_sync_with_gpu(avctx); + if (ret < 0) + goto fail; + + d3d12va_discard_helper_objects(avctx, command_allocator, buffer, ctx->sync_ctx.fence_value); + if (ret < 0) + goto fail; + + return 0; + +fail: + D3D12_OBJECT_RELEASE(command_allocator); + D3D12_OBJECT_RELEASE(buffer); + ff_d3d12va_decode_uninit(avctx); + + return AVERROR(EINVAL); +} + +int ff_d3d12va_decode_uninit(AVCodecContext *avctx) +{ + int num_allocator = 0; + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + HelperObjects obj; + + if (ctx->sync_ctx.fence) + d3d12va_sync_with_gpu(avctx); + + av_freep(&ctx->ref_resources); + av_freep(&ctx->ref_subresources); + + D3D12_OBJECT_RELEASE(ctx->command_list); + D3D12_OBJECT_RELEASE(ctx->command_queue); + + if (ctx->objects_queue) { + while (av_fifo_read(ctx->objects_queue, &obj, 1) >= 0) { + num_allocator++; + D3D12_OBJECT_RELEASE(obj.buffer); + D3D12_OBJECT_RELEASE(obj.command_allocator); + } + + av_log(avctx, AV_LOG_VERBOSE, "Total number of command allocators reused: %d\n", num_allocator); + } + + av_fifo_freep2(&ctx->objects_queue); + + D3D12_OBJECT_RELEASE(ctx->sync_ctx.fence); + if (ctx->sync_ctx.event) + CloseHandle(ctx->sync_ctx.event); + + D3D12_OBJECT_RELEASE(ctx->decoder_heap); + + av_buffer_unref(&ctx->decoder_ref); + + return 0; +} + +static inline int d3d12va_update_reference_frames_state(AVCodecContext *avctx, D3D12_RESOURCE_BARRIER *barriers, int state_before, int state_end) +{ + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; + + int num_barrier = 0; + for (int i = 0; i < ctx->max_num_ref; i++) { + if (((ctx->used_mask >> i) & 0x1) && ctx->ref_resources[i]) { + barriers[num_barrier].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barriers[num_barrier].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barriers[num_barrier].Transition = (D3D12_RESOURCE_TRANSITION_BARRIER){ + .pResource = ctx->ref_resources[i], + .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + .StateBefore = state_before, + .StateAfter = state_end, + }; + num_barrier++; + } + } + + return num_barrier; +} + +int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame, + const void *pp, unsigned pp_size, + const void *qm, unsigned qm_size, + int(*update_input_arguments)(AVCodecContext *, D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *, ID3D12Resource *)) +{ + int ret; + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; + ID3D12Resource *buffer = NULL; + ID3D12CommandAllocator *command_allocator = NULL; + AVD3D12VAFrame *f = (AVD3D12VAFrame *)frame->data[0]; + ID3D12Resource *resource = (ID3D12Resource *)f->texture; + + ID3D12VideoDecodeCommandList *cmd_list = ctx->command_list; + D3D12_RESOURCE_BARRIER barriers[32] = { 0 }; + + D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS input_args = { + .NumFrameArguments = 2, + .FrameArguments = { + [0] = { + .Type = D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS, + .Size = pp_size, + .pData = (void *)pp, + }, + [1] = { + .Type = D3D12_VIDEO_DECODE_ARGUMENT_TYPE_INVERSE_QUANTIZATION_MATRIX, + .Size = qm_size, + .pData = (void *)qm, + }, + }, + .pHeap = ctx->decoder_heap, + }; + + D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS output_args = { + .ConversionArguments = { 0 }, + .OutputSubresource = 0, + .pOutputTexture2D = resource, + }; + + UINT num_barrier = 1; + barriers[0] = (D3D12_RESOURCE_BARRIER) { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = resource, + .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + .StateBefore = D3D12_RESOURCE_STATE_COMMON, + .StateAfter = D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, + }, + }; + + memset(ctx->ref_subresources, 0, sizeof(UINT) * ctx->max_num_ref); + input_args.ReferenceFrames.NumTexture2Ds = ctx->max_num_ref; + input_args.ReferenceFrames.ppTexture2Ds = ctx->ref_resources; + input_args.ReferenceFrames.pSubresources = ctx->ref_subresources; + + ret = d3d12va_fence_completion(&f->sync_ctx); + if (ret < 0) + goto fail; + + if (!qm) + input_args.NumFrameArguments = 1; + + ret = d3d12va_get_valid_helper_objects(avctx, &command_allocator, &buffer); + if (ret < 0) + goto fail; + + ret = update_input_arguments(avctx, &input_args, buffer); + if (ret < 0) + goto fail; + + DX_CHECK(ID3D12CommandAllocator_Reset(command_allocator)); + + DX_CHECK(ID3D12VideoDecodeCommandList_Reset(cmd_list, command_allocator)); + + num_barrier += d3d12va_update_reference_frames_state(avctx, &barriers[1], D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VIDEO_DECODE_READ); + + ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, num_barrier, barriers); + + ID3D12VideoDecodeCommandList_DecodeFrame(cmd_list, ctx->decoder, &output_args, &input_args); + + barriers[0].Transition.StateBefore = barriers[0].Transition.StateAfter; + barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; + d3d12va_update_reference_frames_state(avctx, &barriers[1], D3D12_RESOURCE_STATE_VIDEO_DECODE_READ, D3D12_RESOURCE_STATE_COMMON); + + ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, num_barrier, barriers); + + DX_CHECK(ID3D12VideoDecodeCommandList_Close(cmd_list)); + + ID3D12CommandQueue_ExecuteCommandLists(ctx->command_queue, 1, (ID3D12CommandList **)&ctx->command_list); + + DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, f->sync_ctx.fence, ++f->sync_ctx.fence_value)); + + DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, ctx->sync_ctx.fence, ++ctx->sync_ctx.fence_value)); + + ret = d3d12va_discard_helper_objects(avctx, command_allocator, buffer, ctx->sync_ctx.fence_value); + if (ret < 0) + return ret; + + return 0; + +fail: + if (command_allocator) + d3d12va_discard_helper_objects(avctx, command_allocator, buffer, ctx->sync_ctx.fence_value); + return AVERROR(EINVAL); +} diff --git a/libavcodec/d3d12va_decode.h b/libavcodec/d3d12va_decode.h new file mode 100644 index 0000000000..b64994760a --- /dev/null +++ b/libavcodec/d3d12va_decode.h @@ -0,0 +1,179 @@ +/* + * Direct3D 12 HW acceleration video decoder + * + * copyright (c) 2022-2023 Wu Jianhua <toqsxw@outlook.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_D3D12VA_DECODE_H +#define AVCODEC_D3D12VA_DECODE_H + +#include "libavutil/fifo.h" +#include "libavutil/hwcontext.h" +#include "libavutil/hwcontext_d3d12va.h" +#include "avcodec.h" +#include "internal.h" +#include "hwaccel_internal.h" + +/** + * @brief This structure is used to provide the necessary configurations and data + * to the FFmpeg Direct3D 12 HWAccel implementation for video decoder. + */ +typedef struct D3D12VADecodeContext { + AVBufferRef *decoder_ref; + + /** + * D3D12 video decoder + */ + ID3D12VideoDecoder *decoder; + + /** + * D3D12 video decoder heap + */ + ID3D12VideoDecoderHeap *decoder_heap; + + /** + * D3D12 configuration used to create the decoder + * + * Specified by decoders + */ + D3D12_VIDEO_DECODE_CONFIGURATION cfg; + + /** + * A cached queue for reusing the D3D12 command allocators and upload buffers + * + * @see https://learn.microsoft.com/en-us/windows/win32/direct3d12/recording-command-lists-and-bundles#id3d12commandallocator + */ + AVFifo *objects_queue; + + /** + * D3D12 command queue + */ + ID3D12CommandQueue *command_queue; + + /** + * D3D12 video decode command list + */ + ID3D12VideoDecodeCommandList *command_list; + + /** + * The array of resources used for reference frames + * + * The ref_resources.length is the same as D3D12VADecodeContext.max_num_ref + */ + ID3D12Resource **ref_resources; + + /** + * The array of subresources used for reference frames + * + * The ref_subresources.length is the same as D3D12VADecodeContext.max_num_ref + */ + UINT *ref_subresources; + + /** + * Maximum number of reference frames + */ + UINT max_num_ref; + + /** + * Used mask used to record reference frames indices + */ + UINT used_mask; + + /** + * Bitstream size for each frame + */ + UINT bitstream_size; + + /** + * The sync context used to sync command queue + */ + AVD3D12VASyncContext sync_ctx; + + /** + * A pointer to AVD3D12VADeviceContext used to create D3D12 objects + */ + AVD3D12VADeviceContext *device_ctx; + + /** + * Pixel format + */ + enum AVPixelFormat pix_fmt; + + /** + * Private to the FFmpeg AVHWAccel implementation + */ + unsigned report_id; +} D3D12VADecodeContext; + +/** + * @} + */ +#define D3D12VA_VIDEO_DEC_ASYNC_DEPTH 36 +#define D3D12VA_DECODE_CONTEXT(avctx) ((D3D12VADecodeContext *)((avctx)->internal->hwaccel_priv_data)) +#define D3D12VA_FRAMES_CONTEXT(avctx) ((AVHWFramesContext *)(avctx)->hw_frames_ctx->data) + +/** + * @brief Get a suitable maximum bitstream size + * + * Creating and destroying a resource on d3d12 needs sync and reallocation, so use this function + * to help allocate a big enough bitstream buffer to avoid recreating resources when decoding. + * + * @return the suitable size + */ +int ff_d3d12va_get_suitable_max_bitstream_size(AVCodecContext *avctx); + +/** + * @brief init D3D12VADecodeContext + * + * @return Error code (ret < 0 if failed) + */ +int ff_d3d12va_decode_init(AVCodecContext *avctx); + +/** + * @brief uninit D3D12VADecodeContext + * + * @return Error code (ret < 0 if failed) + */ +int ff_d3d12va_decode_uninit(AVCodecContext *avctx); + +/** + * @brief d3d12va common frame params + * + * @return Error code (ret < 0 if failed) + */ +int ff_d3d12va_common_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx); + +/** + * @brief d3d12va common end frame + * + * @param avctx codec context + * @param frame current output frame + * @param pp picture parameters + * @param pp_size the size of the picture parameters + * @param qm quantization matrix + * @param qm_size the size of the quantization matrix + * @param callback update decoder-specified input stream arguments + * @return Error code (ret < 0 if failed) + */ +int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame, + const void *pp, unsigned pp_size, + const void *qm, unsigned qm_size, + int(*)(AVCodecContext *, D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *, ID3D12Resource *)); + +#endif /* AVCODEC_D3D12VA_DEC_H */ diff --git a/libavcodec/d3d12va_h264.c b/libavcodec/d3d12va_h264.c new file mode 100644 index 0000000000..5a6d6852c8 --- /dev/null +++ b/libavcodec/d3d12va_h264.c @@ -0,0 +1,207 @@ +/* + * Direct3D 12 h264 HW acceleration + * + * copyright (c) 2022-2023 Wu Jianhua <toqsxw@outlook.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config_components.h" +#include "libavutil/avassert.h" +#include "h264dec.h" +#include "h264data.h" +#include "h264_ps.h" +#include "mpegutils.h" +#include "dxva2_internal.h" +#include "d3d12va_decode.h" +#include "libavutil/hwcontext_d3d12va_internal.h" +#include <dxva.h> + +typedef struct H264DecodePictureContext { + DXVA_PicParams_H264 pp; + DXVA_Qmatrix_H264 qm; + unsigned slice_count; + DXVA_Slice_H264_Short slice_short[MAX_SLICES]; + const uint8_t *bitstream; + unsigned bitstream_size; +} H264DecodePictureContext; + +static void fill_slice_short(DXVA_Slice_H264_Short *slice, + unsigned position, unsigned size) +{ + memset(slice, 0, sizeof(*slice)); + slice->BSNALunitDataLocation = position; + slice->SliceBytesInBuffer = size; + slice->wBadSliceChopping = 0; +} + +static int d3d12va_h264_start_frame(AVCodecContext *avctx, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + const H264Context *h = avctx->priv_data; + H264DecodePictureContext *ctx_pic = h->cur_pic_ptr->hwaccel_picture_private; + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + + if (!ctx) + return -1; + + assert(ctx_pic); + + ctx->used_mask = 0; + + ff_dxva2_h264_fill_picture_parameters(avctx, (AVDXVAContext *)ctx, &ctx_pic->pp); + + ff_dxva2_h264_fill_scaling_lists(avctx, (AVDXVAContext *)ctx, &ctx_pic->qm); + + ctx_pic->slice_count = 0; + ctx_pic->bitstream_size = 0; + ctx_pic->bitstream = NULL; + + return 0; +} + +static int d3d12va_h264_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size) +{ + unsigned position; + const H264Context *h = avctx->priv_data; + const H264SliceContext *sl = &h->slice_ctx[0]; + const H264Picture *current_picture = h->cur_pic_ptr; + H264DecodePictureContext *ctx_pic = current_picture->hwaccel_picture_private; + + if (ctx_pic->slice_count >= MAX_SLICES) + return AVERROR(ERANGE); + + if (!ctx_pic->bitstream) + ctx_pic->bitstream = buffer; + ctx_pic->bitstream_size += size; + + position = buffer - ctx_pic->bitstream; + fill_slice_short(&ctx_pic->slice_short[ctx_pic->slice_count], position, size); + ctx_pic->slice_count++; + + if (sl->slice_type != AV_PICTURE_TYPE_I && sl->slice_type != AV_PICTURE_TYPE_SI) + ctx_pic->pp.wBitFields &= ~(1 << 15); /* Set IntraPicFlag to 0 */ + + return 0; +} + +#define START_CODE 65536 +#define START_CODE_SIZE 3 +static int update_input_arguments(AVCodecContext *avctx, D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *input_args, ID3D12Resource *buffer) +{ + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; + + const H264Context *h = avctx->priv_data; + const H264Picture *current_picture = h->cur_pic_ptr; + H264DecodePictureContext *ctx_pic = current_picture->hwaccel_picture_private; + + int i; + uint8_t *mapped_data, *mapped_ptr; + DXVA_Slice_H264_Short *slice; + D3D12_VIDEO_DECODE_FRAME_ARGUMENT *args; + + if (FAILED(ID3D12Resource_Map(buffer, 0, NULL, &mapped_data))) { + av_log(avctx, AV_LOG_ERROR, "Failed to map D3D12 Buffer resource!\n"); + return AVERROR(EINVAL); + } + + mapped_ptr = mapped_data; + for (i = 0; i < ctx_pic->slice_count; i++) { + UINT position, size; + slice = &ctx_pic->slice_short[i]; + + position = slice->BSNALunitDataLocation; + size = slice->SliceBytesInBuffer; + + slice->SliceBytesInBuffer += START_CODE_SIZE; + slice->BSNALunitDataLocation = mapped_ptr - mapped_data; + + *(uint32_t *)mapped_ptr = START_CODE; + mapped_ptr += START_CODE_SIZE; + + memcpy(mapped_ptr, &ctx_pic->bitstream[position], size); + mapped_ptr += size; + } + + ID3D12Resource_Unmap(buffer, 0, NULL); + + input_args->CompressedBitstream = (D3D12_VIDEO_DECODE_COMPRESSED_BITSTREAM){ + .pBuffer = buffer, + .Offset = 0, + .Size = mapped_ptr - mapped_data, + }; + + args = &input_args->FrameArguments[input_args->NumFrameArguments++]; + args->Type = D3D12_VIDEO_DECODE_ARGUMENT_TYPE_SLICE_CONTROL; + args->Size = sizeof(DXVA_Slice_H264_Short) * ctx_pic->slice_count; + args->pData = ctx_pic->slice_short; + + return 0; +} + +static int d3d12va_h264_end_frame(AVCodecContext *avctx) +{ + H264Context *h = avctx->priv_data; + H264DecodePictureContext *ctx_pic = h->cur_pic_ptr->hwaccel_picture_private; + H264SliceContext *sl = &h->slice_ctx[0]; + + int ret; + + if (ctx_pic->slice_count <= 0 || ctx_pic->bitstream_size <= 0) + return -1; + + ret = ff_d3d12va_common_end_frame(avctx, h->cur_pic_ptr->f, + &ctx_pic->pp, sizeof(ctx_pic->pp), + &ctx_pic->qm, sizeof(ctx_pic->qm), + update_input_arguments); + if (!ret) + ff_h264_draw_horiz_band(h, sl, 0, h->avctx->height); + + return ret; +} + +static int d3d12va_h264_decode_init(AVCodecContext *avctx) +{ + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); + DXVA_PicParams_H264 pp; + + ctx->cfg.DecodeProfile = D3D12_VIDEO_DECODE_PROFILE_H264; + + ctx->max_num_ref = FF_ARRAY_ELEMS(pp.RefFrameList) + 1; + + return ff_d3d12va_decode_init(avctx); +} + +#if CONFIG_H264_D3D12VA_HWACCEL +const FFHWAccel ff_h264_d3d12va_hwaccel = { + .p.name = "h264_d3d12va", + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_H264, + .p.pix_fmt = AV_PIX_FMT_D3D12, + .init = d3d12va_h264_decode_init, + .uninit = ff_d3d12va_decode_uninit, + .start_frame = d3d12va_h264_start_frame, + .decode_slice = d3d12va_h264_decode_slice, + .end_frame = d3d12va_h264_end_frame, + .frame_params = ff_d3d12va_common_frame_params, + .frame_priv_data_size = sizeof(H264DecodePictureContext), + .priv_data_size = sizeof(D3D12VADecodeContext), +}; +#endif diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c index d7bc587562..7160a0008b 100644 --- a/libavcodec/dxva2.c +++ b/libavcodec/dxva2.c @@ -768,12 +768,17 @@ static void *get_surface(const AVCodecContext *avctx, const AVFrame *frame) } unsigned ff_dxva2_get_surface_index(const AVCodecContext *avctx, - const AVDXVAContext *ctx, - const AVFrame *frame) + AVDXVAContext *ctx, const AVFrame *frame, + int curr) { void *surface = get_surface(avctx, frame); unsigned i; +#if CONFIG_D3D12VA + if (avctx->pix_fmt == AV_PIX_FMT_D3D12) { + return ff_d3d12va_get_surface_index(avctx, (D3D12VADecodeContext *)ctx, frame, curr); + } +#endif #if CONFIG_D3D11VA if (avctx->pix_fmt == AV_PIX_FMT_D3D11) return (intptr_t)frame->data[1]; @@ -1056,3 +1061,23 @@ int ff_dxva2_is_d3d11(const AVCodecContext *avctx) else return 0; } + +unsigned *ff_dxva2_get_report_id(const AVCodecContext *avctx, AVDXVAContext *ctx) +{ + unsigned *report_id = NULL; + +#if CONFIG_D3D12VA + if (avctx->pix_fmt == AV_PIX_FMT_D3D12) + report_id = &ctx->d3d12va.report_id; +#endif +#if CONFIG_D3D11VA + if (ff_dxva2_is_d3d11(avctx)) + report_id = &ctx->d3d11va.report_id; +#endif +#if CONFIG_DXVA2 + if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) + report_id = &ctx->dxva2.report_id; +#endif + + return report_id; +} diff --git a/libavcodec/dxva2.h b/libavcodec/dxva2.h index 22c93992f2..bdec6112e9 100644 --- a/libavcodec/dxva2.h +++ b/libavcodec/dxva2.h @@ -45,9 +45,6 @@ * @{ */ -#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work around for DXVA2 and old UVD/UVD+ ATI video cards -#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 ///< Work around for DXVA2 and old Intel GPUs with ClearVideo interface - /** * This structure is used to provides the necessary configurations and data * to the DXVA2 FFmpeg HWAccel implementation. diff --git a/libavcodec/dxva2_av1.c b/libavcodec/dxva2_av1.c index ab118a4356..12a895b791 100644 --- a/libavcodec/dxva2_av1.c +++ b/libavcodec/dxva2_av1.c @@ -75,7 +75,7 @@ static int fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *c pp->max_width = seq->max_frame_width_minus_1 + 1; pp->max_height = seq->max_frame_height_minus_1 + 1; - pp->CurrPicTextureIndex = ff_dxva2_get_surface_index(avctx, ctx, h->cur_frame.f); + pp->CurrPicTextureIndex = ff_dxva2_get_surface_index(avctx, ctx, h->cur_frame.f, 1); pp->superres_denom = frame_header->use_superres ? frame_header->coded_denom + AV1_SUPERRES_DENOM_MIN : AV1_SUPERRES_NUM; pp->bitdepth = get_bit_depth_from_seq(seq); pp->seq_profile = seq->seq_profile; @@ -151,7 +151,7 @@ static int fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *c for (i = 0; i < AV1_NUM_REF_FRAMES; i++) { AVFrame *ref_frame = h->ref[i].f; if (ref_frame->buf[0]) - pp->RefFrameMapTextureIndex[i] = ff_dxva2_get_surface_index(avctx, ctx, ref_frame); + pp->RefFrameMapTextureIndex[i] = ff_dxva2_get_surface_index(avctx, ctx, ref_frame, 0); } /* Loop filter parameters */ diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c index 20e64f848d..e0ec4878a7 100644 --- a/libavcodec/dxva2_h264.c +++ b/libavcodec/dxva2_h264.c @@ -48,19 +48,16 @@ static void fill_picture_entry(DXVA_PicEntry_H264 *pic, pic->bPicEntry = index | (flag << 7); } -static void fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *ctx, const H264Context *h, +void ff_dxva2_h264_fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *ctx, DXVA_PicParams_H264 *pp) { + const H264Context *h = avctx->priv_data; const H264Picture *current_picture = h->cur_pic_ptr; const SPS *sps = h->ps.sps; const PPS *pps = h->ps.pps; int i, j; memset(pp, 0, sizeof(*pp)); - /* Configure current picture */ - fill_picture_entry(&pp->CurrPic, - ff_dxva2_get_surface_index(avctx, ctx, current_picture->f), - h->picture_structure == PICT_BOTTOM_FIELD); /* Configure the set of references */ pp->UsedForReferenceFlags = 0; pp->NonExistingFrameFlags = 0; @@ -75,7 +72,7 @@ static void fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext * } if (r) { fill_picture_entry(&pp->RefFrameList[i], - ff_dxva2_get_surface_index(avctx, ctx, r->f), + ff_dxva2_get_surface_index(avctx, ctx, r->f, 0), r->long_ref != 0); if ((r->reference & PICT_TOP_FIELD) && r->field_poc[0] != INT_MAX) @@ -95,6 +92,10 @@ static void fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext * pp->FrameNumList[i] = 0; } } + /* Configure current picture */ + fill_picture_entry(&pp->CurrPic, + ff_dxva2_get_surface_index(avctx, ctx, current_picture->f, 1), + h->picture_structure == PICT_BOTTOM_FIELD); pp->wFrameWidthInMbsMinus1 = h->mb_width - 1; pp->wFrameHeightInMbsMinus1 = h->mb_height - 1; @@ -164,9 +165,10 @@ static void fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext * //pp->SliceGroupMap[810]; /* XXX not implemented by FFmpeg */ } -static void fill_scaling_lists(const AVCodecContext *avctx, AVDXVAContext *ctx, const H264Context *h, DXVA_Qmatrix_H264 *qm) +void ff_dxva2_h264_fill_scaling_lists(const AVCodecContext *avctx, AVDXVAContext *ctx, DXVA_Qmatrix_H264 *qm) { - const PPS *pps = h->ps.pps; + const H264Context *h = avctx->priv_data; + const PPS *pps = h->ps.pps; unsigned i, j; memset(qm, 0, sizeof(*qm)); if (DXVA_CONTEXT_WORKAROUND(avctx, ctx) & FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG) { @@ -253,9 +255,9 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice, unsigned plane; unsigned index; if (DXVA_CONTEXT_WORKAROUND(avctx, ctx) & FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO) - index = ff_dxva2_get_surface_index(avctx, ctx, r->f); + index = ff_dxva2_get_surface_index(avctx, ctx, r->f, 0); else - index = get_refpic_index(pp, ff_dxva2_get_surface_index(avctx, ctx, r->f)); + index = get_refpic_index(pp, ff_dxva2_get_surface_index(avctx, ctx, r->f, 0)); fill_picture_entry(&slice->RefPicList[list][i], index, sl->ref_list[list][i].reference == PICT_BOTTOM_FIELD); for (plane = 0; plane < 3; plane++) { @@ -454,10 +456,10 @@ static int dxva2_h264_start_frame(AVCodecContext *avctx, assert(ctx_pic); /* Fill up DXVA_PicParams_H264 */ - fill_picture_parameters(avctx, ctx, h, &ctx_pic->pp); + ff_dxva2_h264_fill_picture_parameters(avctx, ctx, &ctx_pic->pp); /* Fill up DXVA_Qmatrix_H264 */ - fill_scaling_lists(avctx, ctx, h, &ctx_pic->qm); + ff_dxva2_h264_fill_scaling_lists(avctx, ctx, &ctx_pic->qm); ctx_pic->slice_count = 0; ctx_pic->bitstream_size = 0; diff --git a/libavcodec/dxva2_hevc.c b/libavcodec/dxva2_hevc.c index b6c08943f0..9e1d081412 100644 --- a/libavcodec/dxva2_hevc.c +++ b/libavcodec/dxva2_hevc.c @@ -79,7 +79,7 @@ static void fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext * (0 << 14) | (0 << 15); - fill_picture_entry(&pp->CurrPic, ff_dxva2_get_surface_index(avctx, ctx, current_picture->frame), 0); + fill_picture_entry(&pp->CurrPic, ff_dxva2_get_surface_index(avctx, ctx, current_picture->frame, 1), 0); pp->sps_max_dec_pic_buffering_minus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering - 1; pp->log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3; @@ -171,7 +171,7 @@ static void fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext * } if (frame) { - fill_picture_entry(&pp->RefPicList[i], ff_dxva2_get_surface_index(avctx, ctx, frame->frame), !!(frame->flags & HEVC_FRAME_FLAG_LONG_REF)); + fill_picture_entry(&pp->RefPicList[i], ff_dxva2_get_surface_index(avctx, ctx, frame->frame, 0), !!(frame->flags & HEVC_FRAME_FLAG_LONG_REF)); pp->PicOrderCntValList[i] = frame->poc; } else { pp->RefPicList[i].bPicEntry = 0xff; @@ -186,7 +186,7 @@ static void fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext * while (!frame && j < rpl->nb_refs) \ frame = rpl->ref[j++]; \ if (frame && frame->flags & (HEVC_FRAME_FLAG_LONG_REF | HEVC_FRAME_FLAG_SHORT_REF)) \ - pp->ref_list[i] = get_refpic_index(pp, ff_dxva2_get_surface_index(avctx, ctx, frame->frame)); \ + pp->ref_list[i] = get_refpic_index(pp, ff_dxva2_get_surface_index(avctx, ctx, frame->frame, 0)); \ else \ pp->ref_list[i] = 0xff; \ } \ diff --git a/libavcodec/dxva2_internal.h b/libavcodec/dxva2_internal.h index b822af59cd..645549c20c 100644 --- a/libavcodec/dxva2_internal.h +++ b/libavcodec/dxva2_internal.h @@ -26,18 +26,34 @@ #define COBJMACROS #include "config.h" +#include "config_components.h" /* define the proper COM entries before forcing desktop APIs */ #include <objbase.h> +#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work around for DXVA2/Direct3D11 and old UVD/UVD+ ATI video cards +#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 ///< Work around for DXVA2/Direct3D11 and old Intel GPUs with ClearVideo interface + #if CONFIG_DXVA2 #include "dxva2.h" #include "libavutil/hwcontext_dxva2.h" +#define DXVA2_VAR(ctx, var) ctx->dxva2.var +#else +#define DXVA2_VAR(ctx, var) 0 #endif + #if CONFIG_D3D11VA #include "d3d11va.h" #include "libavutil/hwcontext_d3d11va.h" +#define D3D11VA_VAR(ctx, var) ctx->d3d11va.var +#else +#define D3D11VA_VAR(ctx, var) 0 +#endif + +#if CONFIG_D3D12VA +#include "d3d12va_decode.h" #endif + #if HAVE_DXVA_H /* When targeting WINAPI_FAMILY_PHONE_APP or WINAPI_FAMILY_APP, dxva.h * defines nothing. Force the struct definitions to be visible. */ @@ -62,6 +78,9 @@ typedef union { #if CONFIG_DXVA2 struct dxva_context dxva2; #endif +#if CONFIG_D3D12VA + struct D3D12VADecodeContext d3d12va; +#endif } AVDXVAContext; typedef struct FFDXVASharedContext { @@ -101,43 +120,26 @@ typedef struct FFDXVASharedContext { #define D3D11VA_CONTEXT(ctx) (&ctx->d3d11va) #define DXVA2_CONTEXT(ctx) (&ctx->dxva2) -#if CONFIG_D3D11VA && CONFIG_DXVA2 -#define DXVA_CONTEXT_WORKAROUND(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.workaround : ctx->dxva2.workaround) -#define DXVA_CONTEXT_COUNT(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.surface_count : ctx->dxva2.surface_count) -#define DXVA_CONTEXT_DECODER(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? (void *)ctx->d3d11va.decoder : (void *)ctx->dxva2.decoder) -#define DXVA_CONTEXT_REPORT_ID(avctx, ctx) (*(ff_dxva2_is_d3d11(avctx) ? &ctx->d3d11va.report_id : &ctx->dxva2.report_id)) -#define DXVA_CONTEXT_CFG(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? (void *)ctx->d3d11va.cfg : (void *)ctx->dxva2.cfg) -#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.cfg->ConfigBitstreamRaw : ctx->dxva2.cfg->ConfigBitstreamRaw) -#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.cfg->ConfigIntraResidUnsigned : ctx->dxva2.cfg->ConfigIntraResidUnsigned) -#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.cfg->ConfigResidDiffAccelerator : ctx->dxva2.cfg->ConfigResidDiffAccelerator) +#define DXVA2_CONTEXT_VAR(avctx, ctx, var) (avctx->pix_fmt == AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? D3D11VA_VAR(ctx, var) : DXVA2_VAR(ctx, var))) + +#define DXVA_CONTEXT_REPORT_ID(avctx, ctx) (*ff_dxva2_get_report_id(avctx, ctx)) +#define DXVA_CONTEXT_WORKAROUND(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx, workaround) +#define DXVA_CONTEXT_COUNT(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx, surface_count) +#define DXVA_CONTEXT_DECODER(avctx, ctx) (avctx->pix_fmt == AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? (void *)D3D11VA_VAR(ctx, decoder) : (void *)DXVA2_VAR(ctx, decoder))) +#define DXVA_CONTEXT_CFG(avctx, ctx) (avctx->pix_fmt == AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? (void *)D3D11VA_VAR(ctx, cfg) : (void *)DXVA2_VAR(ctx, cfg))) +#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx, cfg->ConfigBitstreamRaw) +#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx, cfg->ConfigIntraResidUnsigned) +#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx, cfg->ConfigResidDiffAccelerator) #define DXVA_CONTEXT_VALID(avctx, ctx) (DXVA_CONTEXT_DECODER(avctx, ctx) && \ DXVA_CONTEXT_CFG(avctx, ctx) && \ - (ff_dxva2_is_d3d11(avctx) || ctx->dxva2.surface_count)) -#elif CONFIG_DXVA2 -#define DXVA_CONTEXT_WORKAROUND(avctx, ctx) (ctx->dxva2.workaround) -#define DXVA_CONTEXT_COUNT(avctx, ctx) (ctx->dxva2.surface_count) -#define DXVA_CONTEXT_DECODER(avctx, ctx) (ctx->dxva2.decoder) -#define DXVA_CONTEXT_REPORT_ID(avctx, ctx) (*(&ctx->dxva2.report_id)) -#define DXVA_CONTEXT_CFG(avctx, ctx) (ctx->dxva2.cfg) -#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx) (ctx->dxva2.cfg->ConfigBitstreamRaw) -#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ctx->dxva2.cfg->ConfigIntraResidUnsigned) -#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ctx->dxva2.cfg->ConfigResidDiffAccelerator) -#define DXVA_CONTEXT_VALID(avctx, ctx) (ctx->dxva2.decoder && ctx->dxva2.cfg && ctx->dxva2.surface_count) -#elif CONFIG_D3D11VA -#define DXVA_CONTEXT_WORKAROUND(avctx, ctx) (ctx->d3d11va.workaround) -#define DXVA_CONTEXT_COUNT(avctx, ctx) (ctx->d3d11va.surface_count) -#define DXVA_CONTEXT_DECODER(avctx, ctx) (ctx->d3d11va.decoder) -#define DXVA_CONTEXT_REPORT_ID(avctx, ctx) (*(&ctx->d3d11va.report_id)) -#define DXVA_CONTEXT_CFG(avctx, ctx) (ctx->d3d11va.cfg) -#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx) (ctx->d3d11va.cfg->ConfigBitstreamRaw) -#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ctx->d3d11va.cfg->ConfigIntraResidUnsigned) -#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ctx->d3d11va.cfg->ConfigResidDiffAccelerator) -#define DXVA_CONTEXT_VALID(avctx, ctx) (ctx->d3d11va.decoder && ctx->d3d11va.cfg) -#endif + (ff_dxva2_is_d3d11(avctx) || DXVA2_VAR(ctx, surface_count))) unsigned ff_dxva2_get_surface_index(const AVCodecContext *avctx, - const AVDXVAContext *, - const AVFrame *frame); + AVDXVAContext *, const AVFrame *frame, int curr); + +unsigned ff_d3d12va_get_surface_index(const AVCodecContext *avctx, + D3D12VADecodeContext *ctx, const AVFrame *frame, + int curr); int ff_dxva2_commit_buffer(AVCodecContext *, AVDXVAContext *, DECODER_BUFFER_DESC *, @@ -161,4 +163,10 @@ int ff_dxva2_common_frame_params(AVCodecContext *avctx, int ff_dxva2_is_d3d11(const AVCodecContext *avctx); +unsigned *ff_dxva2_get_report_id(const AVCodecContext *avctx, AVDXVAContext *ctx); + +void ff_dxva2_h264_fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *ctx, DXVA_PicParams_H264 *pp); + +void ff_dxva2_h264_fill_scaling_lists(const AVCodecContext *avctx, AVDXVAContext *ctx, DXVA_Qmatrix_H264 *qm); + #endif /* AVCODEC_DXVA2_INTERNAL_H */ diff --git a/libavcodec/dxva2_mpeg2.c b/libavcodec/dxva2_mpeg2.c index 75c416654f..b739dc1e54 100644 --- a/libavcodec/dxva2_mpeg2.c +++ b/libavcodec/dxva2_mpeg2.c @@ -49,14 +49,14 @@ static void fill_picture_parameters(AVCodecContext *avctx, int is_field = s->picture_structure != PICT_FRAME; memset(pp, 0, sizeof(*pp)); - pp->wDecodedPictureIndex = ff_dxva2_get_surface_index(avctx, ctx, current_picture->f); + pp->wDecodedPictureIndex = ff_dxva2_get_surface_index(avctx, ctx, current_picture->f, 1); pp->wDeblockedPictureIndex = 0; if (s->pict_type != AV_PICTURE_TYPE_I) - pp->wForwardRefPictureIndex = ff_dxva2_get_surface_index(avctx, ctx, s->last_picture.f); + pp->wForwardRefPictureIndex = ff_dxva2_get_surface_index(avctx, ctx, s->last_picture.f, 0); else pp->wForwardRefPictureIndex = 0xffff; if (s->pict_type == AV_PICTURE_TYPE_B) - pp->wBackwardRefPictureIndex = ff_dxva2_get_surface_index(avctx, ctx, s->next_picture.f); + pp->wBackwardRefPictureIndex = ff_dxva2_get_surface_index(avctx, ctx, s->next_picture.f, 0); else pp->wBackwardRefPictureIndex = 0xffff; pp->wPicWidthInMBminus1 = s->mb_width - 1; diff --git a/libavcodec/dxva2_vc1.c b/libavcodec/dxva2_vc1.c index b35fb115f7..8ee23feabf 100644 --- a/libavcodec/dxva2_vc1.c +++ b/libavcodec/dxva2_vc1.c @@ -58,13 +58,13 @@ static void fill_picture_parameters(AVCodecContext *avctx, memset(pp, 0, sizeof(*pp)); pp->wDecodedPictureIndex = - pp->wDeblockedPictureIndex = ff_dxva2_get_surface_index(avctx, ctx, current_picture->f); + pp->wDeblockedPictureIndex = ff_dxva2_get_surface_index(avctx, ctx, current_picture->f, 1); if (s->pict_type != AV_PICTURE_TYPE_I && !v->bi_type) - pp->wForwardRefPictureIndex = ff_dxva2_get_surface_index(avctx, ctx, s->last_picture.f); + pp->wForwardRefPictureIndex = ff_dxva2_get_surface_index(avctx, ctx, s->last_picture.f, 0); else pp->wForwardRefPictureIndex = 0xffff; if (s->pict_type == AV_PICTURE_TYPE_B && !v->bi_type) - pp->wBackwardRefPictureIndex = ff_dxva2_get_surface_index(avctx, ctx, s->next_picture.f); + pp->wBackwardRefPictureIndex = ff_dxva2_get_surface_index(avctx, ctx, s->next_picture.f, 0); else pp->wBackwardRefPictureIndex = 0xffff; if (v->profile == PROFILE_ADVANCED) { diff --git a/libavcodec/dxva2_vp9.c b/libavcodec/dxva2_vp9.c index eba4df9031..21699eb3f4 100644 --- a/libavcodec/dxva2_vp9.c +++ b/libavcodec/dxva2_vp9.c @@ -54,7 +54,7 @@ static int fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *c memset(pp, 0, sizeof(*pp)); - fill_picture_entry(&pp->CurrPic, ff_dxva2_get_surface_index(avctx, ctx, h->frames[CUR_FRAME].tf.f), 0); + fill_picture_entry(&pp->CurrPic, ff_dxva2_get_surface_index(avctx, ctx, h->frames[CUR_FRAME].tf.f, 1), 0); pp->profile = h->h.profile; pp->wFormatAndPictureInfoFlags = ((h->h.keyframe == 0) << 0) | @@ -81,7 +81,7 @@ static int fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *c for (i = 0; i < 8; i++) { if (h->refs[i].f->buf[0]) { - fill_picture_entry(&pp->ref_frame_map[i], ff_dxva2_get_surface_index(avctx, ctx, h->refs[i].f), 0); + fill_picture_entry(&pp->ref_frame_map[i], ff_dxva2_get_surface_index(avctx, ctx, h->refs[i].f, 0), 0); pp->ref_frame_coded_width[i] = h->refs[i].f->width; pp->ref_frame_coded_height[i] = h->refs[i].f->height; } else @@ -91,7 +91,7 @@ static int fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *c for (i = 0; i < 3; i++) { uint8_t refidx = h->h.refidx[i]; if (h->refs[refidx].f->buf[0]) - fill_picture_entry(&pp->frame_refs[i], ff_dxva2_get_surface_index(avctx, ctx, h->refs[refidx].f), 0); + fill_picture_entry(&pp->frame_refs[i], ff_dxva2_get_surface_index(avctx, ctx, h->refs[refidx].f, 0), 0); else pp->frame_refs[i].bPicEntry = 0xFF; diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c index 3b6c6c3592..8464a0b34c 100644 --- a/libavcodec/h264_slice.c +++ b/libavcodec/h264_slice.c @@ -784,6 +784,7 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback) { #define HWACCEL_MAX (CONFIG_H264_DXVA2_HWACCEL + \ (CONFIG_H264_D3D11VA_HWACCEL * 2) + \ + CONFIG_H264_D3D12VA_HWACCEL + \ CONFIG_H264_NVDEC_HWACCEL + \ CONFIG_H264_VAAPI_HWACCEL + \ CONFIG_H264_VIDEOTOOLBOX_HWACCEL + \ @@ -887,6 +888,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback) *fmt++ = AV_PIX_FMT_D3D11VA_VLD; *fmt++ = AV_PIX_FMT_D3D11; #endif +#if CONFIG_H264_D3D12VA_HWACCEL + *fmt++ = AV_PIX_FMT_D3D12; +#endif #if CONFIG_H264_VAAPI_HWACCEL *fmt++ = AV_PIX_FMT_VAAPI; #endif diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c index 0bf05b2cfe..9f5893c512 100644 --- a/libavcodec/h264dec.c +++ b/libavcodec/h264dec.c @@ -1131,6 +1131,9 @@ const FFCodec ff_h264_decoder = { #if CONFIG_H264_D3D11VA2_HWACCEL HWACCEL_D3D11VA2(h264), #endif +#if CONFIG_H264_D3D12VA_HWACCEL + HWACCEL_D3D12VA(h264), +#endif #if CONFIG_H264_NVDEC_HWACCEL HWACCEL_NVDEC(h264), #endif diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h index c4630718cf..90b8beb0f5 100644 --- a/libavcodec/hwaccels.h +++ b/libavcodec/hwaccels.h @@ -30,6 +30,7 @@ extern const struct FFHWAccel ff_h263_vaapi_hwaccel; extern const struct FFHWAccel ff_h263_videotoolbox_hwaccel; extern const struct FFHWAccel ff_h264_d3d11va_hwaccel; extern const struct FFHWAccel ff_h264_d3d11va2_hwaccel; +extern const struct FFHWAccel ff_h264_d3d12va_hwaccel; extern const struct FFHWAccel ff_h264_dxva2_hwaccel; extern const struct FFHWAccel ff_h264_nvdec_hwaccel; extern const struct FFHWAccel ff_h264_vaapi_hwaccel; diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h index e164722a94..ee29ca631d 100644 --- a/libavcodec/hwconfig.h +++ b/libavcodec/hwconfig.h @@ -77,6 +77,8 @@ void ff_hwaccel_uninit(AVCodecContext *avctx); HW_CONFIG_HWACCEL(1, 1, 1, VULKAN, VULKAN, ff_ ## codec ## _vulkan_hwaccel) #define HWACCEL_D3D11VA(codec) \ HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD, NONE, ff_ ## codec ## _d3d11va_hwaccel) +#define HWACCEL_D3D12VA(codec) \ + HW_CONFIG_HWACCEL(1, 1, 0, D3D12, D3D12VA, ff_ ## codec ## _d3d12va_hwaccel) #define HW_CONFIG_ENCODER(device, frames, ad_hoc, format, device_type_) \ &(const AVCodecHWConfigInternal) { \

[FFmpeg-devel,v10,2/9] avcodec: add D3D12VA hardware accelerated H264 decoding

Checks

Commit Message

Comments

Patch