Message ID | 20200805071921.29967-1-robux4@ycbcr.xyz |
---|---|
State | Superseded |
Headers | show |
Series | [FFmpeg-devel,v2] dxva: wait until D3D11 buffer copies are done before submitting them | expand |
Context | Check | Description |
---|---|---|
andriy/default | pending | |
andriy/make | success | Make finished |
andriy/make_fate | success | Make fate finished |
On Wed, 5 Aug 2020, Steve Lhomme wrote: > When used aggressively, calling SubmitDecoderBuffers() just after > ReleaseDecoderBuffer() may have the buffers not used properly and created > decoding artifacts. > It's likely due to the time to copy the submitted buffer in CPU mapped memory > to GPU memory. SubmitDecoderBuffers() doesn't appear to wait for the state > of the buffer submitted to become "ready". Is this an API bug or the code is not using the API properly? Please clarify this in the commit message if you can. > > For now it's not supported in the legacy API using AVD3D11VAContext, we need to > add a ID3D11DeviceContext in there as it cannot be derived from the other > interfaces we provide (ID3D11VideoContext is not a kind of ID3D11DeviceContext). > --- > libavcodec/dxva2.c | 33 +++++++++++++++++++++++++++++++++ > libavcodec/dxva2_internal.h | 2 ++ > 2 files changed, 35 insertions(+) > > diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c > index 32416112bf..1a0e5b69b2 100644 > --- a/libavcodec/dxva2.c > +++ b/libavcodec/dxva2.c > @@ -692,6 +692,12 @@ int ff_dxva2_decode_init(AVCodecContext *avctx) > d3d11_ctx->surface = sctx->d3d11_views; > d3d11_ctx->workaround = sctx->workaround; > d3d11_ctx->context_mutex = INVALID_HANDLE_VALUE; > + > + D3D11_QUERY_DESC query = { 0 }; > + query.Query = D3D11_QUERY_EVENT; > + if (FAILED(ID3D11Device_CreateQuery(device_hwctx->device, &query, > + (ID3D11Query**)&sctx->wait_copies))) > + sctx->wait_copies = NULL; > } > #endif > > @@ -729,6 +735,8 @@ int ff_dxva2_decode_uninit(AVCodecContext *avctx) > av_buffer_unref(&sctx->decoder_ref); > > #if CONFIG_D3D11VA > + if (sctx->wait_copies) > + ID3D11Asynchronous_Release(sctx->wait_copies); > for (i = 0; i < sctx->nb_d3d11_views; i++) { > if (sctx->d3d11_views[i]) > ID3D11VideoDecoderOutputView_Release(sctx->d3d11_views[i]); > @@ -932,6 +940,12 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, AVFrame *frame, > > #if CONFIG_D3D11VA > if (ff_dxva2_is_d3d11(avctx)) { > + if (sctx->wait_copies) { > + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data; > + AVD3D11VADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; > + ID3D11DeviceContext_Begin(device_hwctx->device_context, sctx->wait_copies); > + } > + > buffer = &buffer11[buffer_count]; > type = D3D11_VIDEO_DECODER_BUFFER_PICTURE_PARAMETERS; > } > @@ -1005,9 +1019,28 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, AVFrame *frame, > > #if CONFIG_D3D11VA > if (ff_dxva2_is_d3d11(avctx)) > + { coding style, same line opening brackets > + int maxWait = 10; You can push this initialization (and maybe the comment below) one block down as far as I see. > + /* wait until all the buffer release is done copying data to the GPU > + * before doing the submit command */ > + if (sctx->wait_copies) { > + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data; > + AVD3D11VADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; > + ID3D11DeviceContext_End(device_hwctx->device_context, sctx->wait_copies); > + > + while (maxWait-- && S_FALSE == > + ID3D11DeviceContext_GetData(device_hwctx->device_context, > + sctx->wait_copies, NULL, 0, 0)) { > + ff_dxva2_unlock(avctx); > + SleepEx(2, TRUE); > + ff_dxva2_lock(avctx); > + } > + } > + > hr = ID3D11VideoContext_SubmitDecoderBuffers(D3D11VA_CONTEXT(ctx)->video_context, > D3D11VA_CONTEXT(ctx)->decoder, > buffer_count, buffer11); > + } > #endif > #if CONFIG_DXVA2 > if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) { > diff --git a/libavcodec/dxva2_internal.h b/libavcodec/dxva2_internal.h > index b822af59cd..c44e8e09b0 100644 > --- a/libavcodec/dxva2_internal.h > +++ b/libavcodec/dxva2_internal.h > @@ -81,6 +81,8 @@ typedef struct FFDXVASharedContext { > ID3D11VideoDecoderOutputView **d3d11_views; > int nb_d3d11_views; > ID3D11Texture2D *d3d11_texture; > + > + ID3D11Asynchronous *wait_copies; > #endif > Regards, Marton
On 2020-08-05 9:55, Marton Balint wrote: > > > On Wed, 5 Aug 2020, Steve Lhomme wrote: > >> When used aggressively, calling SubmitDecoderBuffers() just after >> ReleaseDecoderBuffer() may have the buffers not used properly and created >> decoding artifacts. >> It's likely due to the time to copy the submitted buffer in CPU mapped >> memory >> to GPU memory. SubmitDecoderBuffers() doesn't appear to wait for the >> state >> of the buffer submitted to become "ready". > > Is this an API bug or the code is not using the API properly? Please > clarify this in the commit message if you can. I do not know. The documentation on SubmitDecoderBuffers or ReleaseDecoderBuffer doesn't say anything about that. Maybe the driver documentation that manufacturers use have more information, but I don't have it. >> >> For now it's not supported in the legacy API using AVD3D11VAContext, >> we need to >> add a ID3D11DeviceContext in there as it cannot be derived from the other >> interfaces we provide (ID3D11VideoContext is not a kind of >> ID3D11DeviceContext). >> --- >> libavcodec/dxva2.c | 33 +++++++++++++++++++++++++++++++++ >> libavcodec/dxva2_internal.h | 2 ++ >> 2 files changed, 35 insertions(+) >> >> diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c >> index 32416112bf..1a0e5b69b2 100644 >> --- a/libavcodec/dxva2.c >> +++ b/libavcodec/dxva2.c >> @@ -692,6 +692,12 @@ int ff_dxva2_decode_init(AVCodecContext *avctx) >> d3d11_ctx->surface = sctx->d3d11_views; >> d3d11_ctx->workaround = sctx->workaround; >> d3d11_ctx->context_mutex = INVALID_HANDLE_VALUE; >> + >> + D3D11_QUERY_DESC query = { 0 }; >> + query.Query = D3D11_QUERY_EVENT; >> + if (FAILED(ID3D11Device_CreateQuery(device_hwctx->device, >> &query, >> + >> (ID3D11Query**)&sctx->wait_copies))) >> + sctx->wait_copies = NULL; >> } >> #endif >> >> @@ -729,6 +735,8 @@ int ff_dxva2_decode_uninit(AVCodecContext *avctx) >> av_buffer_unref(&sctx->decoder_ref); >> >> #if CONFIG_D3D11VA >> + if (sctx->wait_copies) >> + ID3D11Asynchronous_Release(sctx->wait_copies); >> for (i = 0; i < sctx->nb_d3d11_views; i++) { >> if (sctx->d3d11_views[i]) >> ID3D11VideoDecoderOutputView_Release(sctx->d3d11_views[i]); >> @@ -932,6 +940,12 @@ int ff_dxva2_common_end_frame(AVCodecContext >> *avctx, AVFrame *frame, >> >> #if CONFIG_D3D11VA >> if (ff_dxva2_is_d3d11(avctx)) { >> + if (sctx->wait_copies) { >> + AVHWFramesContext *frames_ctx = >> (AVHWFramesContext*)avctx->hw_frames_ctx->data; >> + AVD3D11VADeviceContext *device_hwctx = >> frames_ctx->device_ctx->hwctx; >> + ID3D11DeviceContext_Begin(device_hwctx->device_context, >> sctx->wait_copies); >> + } >> + >> buffer = &buffer11[buffer_count]; >> type = D3D11_VIDEO_DECODER_BUFFER_PICTURE_PARAMETERS; >> } >> @@ -1005,9 +1019,28 @@ int ff_dxva2_common_end_frame(AVCodecContext >> *avctx, AVFrame *frame, >> >> #if CONFIG_D3D11VA >> if (ff_dxva2_is_d3d11(avctx)) >> + { > > coding style, same line opening brackets > >> + int maxWait = 10; > > You can push this initialization (and maybe the comment below) one block > down as far as I see. > >> + /* wait until all the buffer release is done copying data to >> the GPU >> + * before doing the submit command */ >> + if (sctx->wait_copies) { >> + AVHWFramesContext *frames_ctx = >> (AVHWFramesContext*)avctx->hw_frames_ctx->data; >> + AVD3D11VADeviceContext *device_hwctx = >> frames_ctx->device_ctx->hwctx; >> + ID3D11DeviceContext_End(device_hwctx->device_context, >> sctx->wait_copies); >> + >> + while (maxWait-- && S_FALSE == >> + >> ID3D11DeviceContext_GetData(device_hwctx->device_context, >> + sctx->wait_copies, >> NULL, 0, 0)) { >> + ff_dxva2_unlock(avctx); >> + SleepEx(2, TRUE); >> + ff_dxva2_lock(avctx); >> + } >> + } >> + >> hr = >> ID3D11VideoContext_SubmitDecoderBuffers(D3D11VA_CONTEXT(ctx)->video_context, >> >> >> D3D11VA_CONTEXT(ctx)->decoder, >> buffer_count, >> buffer11); >> + } >> #endif >> #if CONFIG_DXVA2 >> if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) { >> diff --git a/libavcodec/dxva2_internal.h b/libavcodec/dxva2_internal.h >> index b822af59cd..c44e8e09b0 100644 >> --- a/libavcodec/dxva2_internal.h >> +++ b/libavcodec/dxva2_internal.h >> @@ -81,6 +81,8 @@ typedef struct FFDXVASharedContext { >> ID3D11VideoDecoderOutputView **d3d11_views; >> int nb_d3d11_views; >> ID3D11Texture2D *d3d11_texture; >> + >> + ID3D11Asynchronous *wait_copies; >> #endif >> > > Regards, > Marton > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c index 32416112bf..1a0e5b69b2 100644 --- a/libavcodec/dxva2.c +++ b/libavcodec/dxva2.c @@ -692,6 +692,12 @@ int ff_dxva2_decode_init(AVCodecContext *avctx) d3d11_ctx->surface = sctx->d3d11_views; d3d11_ctx->workaround = sctx->workaround; d3d11_ctx->context_mutex = INVALID_HANDLE_VALUE; + + D3D11_QUERY_DESC query = { 0 }; + query.Query = D3D11_QUERY_EVENT; + if (FAILED(ID3D11Device_CreateQuery(device_hwctx->device, &query, + (ID3D11Query**)&sctx->wait_copies))) + sctx->wait_copies = NULL; } #endif @@ -729,6 +735,8 @@ int ff_dxva2_decode_uninit(AVCodecContext *avctx) av_buffer_unref(&sctx->decoder_ref); #if CONFIG_D3D11VA + if (sctx->wait_copies) + ID3D11Asynchronous_Release(sctx->wait_copies); for (i = 0; i < sctx->nb_d3d11_views; i++) { if (sctx->d3d11_views[i]) ID3D11VideoDecoderOutputView_Release(sctx->d3d11_views[i]); @@ -932,6 +940,12 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, AVFrame *frame, #if CONFIG_D3D11VA if (ff_dxva2_is_d3d11(avctx)) { + if (sctx->wait_copies) { + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data; + AVD3D11VADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; + ID3D11DeviceContext_Begin(device_hwctx->device_context, sctx->wait_copies); + } + buffer = &buffer11[buffer_count]; type = D3D11_VIDEO_DECODER_BUFFER_PICTURE_PARAMETERS; } @@ -1005,9 +1019,28 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, AVFrame *frame, #if CONFIG_D3D11VA if (ff_dxva2_is_d3d11(avctx)) + { + int maxWait = 10; + /* wait until all the buffer release is done copying data to the GPU + * before doing the submit command */ + if (sctx->wait_copies) { + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data; + AVD3D11VADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; + ID3D11DeviceContext_End(device_hwctx->device_context, sctx->wait_copies); + + while (maxWait-- && S_FALSE == + ID3D11DeviceContext_GetData(device_hwctx->device_context, + sctx->wait_copies, NULL, 0, 0)) { + ff_dxva2_unlock(avctx); + SleepEx(2, TRUE); + ff_dxva2_lock(avctx); + } + } + hr = ID3D11VideoContext_SubmitDecoderBuffers(D3D11VA_CONTEXT(ctx)->video_context, D3D11VA_CONTEXT(ctx)->decoder, buffer_count, buffer11); + } #endif #if CONFIG_DXVA2 if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) { diff --git a/libavcodec/dxva2_internal.h b/libavcodec/dxva2_internal.h index b822af59cd..c44e8e09b0 100644 --- a/libavcodec/dxva2_internal.h +++ b/libavcodec/dxva2_internal.h @@ -81,6 +81,8 @@ typedef struct FFDXVASharedContext { ID3D11VideoDecoderOutputView **d3d11_views; int nb_d3d11_views; ID3D11Texture2D *d3d11_texture; + + ID3D11Asynchronous *wait_copies; #endif #if CONFIG_DXVA2