diff mbox series

[FFmpeg-devel,v2] dxva: wait until D3D11 buffer copies are done before submitting them

Message ID 20200805071921.29967-1-robux4@ycbcr.xyz
State Superseded
Headers show
Series [FFmpeg-devel,v2] dxva: wait until D3D11 buffer copies are done before submitting them
Related show

Checks

Context Check Description
andriy/default pending
andriy/make success Make finished
andriy/make_fate success Make fate finished

Commit Message

Steve Lhomme Aug. 5, 2020, 7:19 a.m. UTC
When used aggressively, calling SubmitDecoderBuffers() just after
ReleaseDecoderBuffer() may have the buffers not used properly and created
decoding artifacts.
It's likely due to the time to copy the submitted buffer in CPU mapped memory
to GPU memory. SubmitDecoderBuffers() doesn't appear to wait for the state
of the buffer submitted to become "ready".

For now it's not supported in the legacy API using AVD3D11VAContext, we need to
add a ID3D11DeviceContext in there as it cannot be derived from the other
interfaces we provide (ID3D11VideoContext is not a kind of ID3D11DeviceContext).
---
 libavcodec/dxva2.c          | 33 +++++++++++++++++++++++++++++++++
 libavcodec/dxva2_internal.h |  2 ++
 2 files changed, 35 insertions(+)

Comments

Marton Balint Aug. 5, 2020, 7:55 a.m. UTC | #1
On Wed, 5 Aug 2020, Steve Lhomme wrote:

> When used aggressively, calling SubmitDecoderBuffers() just after
> ReleaseDecoderBuffer() may have the buffers not used properly and created
> decoding artifacts.
> It's likely due to the time to copy the submitted buffer in CPU mapped memory
> to GPU memory. SubmitDecoderBuffers() doesn't appear to wait for the state
> of the buffer submitted to become "ready".

Is this an API bug or the code is not using the API properly? Please 
clarify this in the commit message if you can.

>
> For now it's not supported in the legacy API using AVD3D11VAContext, we need to
> add a ID3D11DeviceContext in there as it cannot be derived from the other
> interfaces we provide (ID3D11VideoContext is not a kind of ID3D11DeviceContext).
> ---
> libavcodec/dxva2.c          | 33 +++++++++++++++++++++++++++++++++
> libavcodec/dxva2_internal.h |  2 ++
> 2 files changed, 35 insertions(+)
>
> diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c
> index 32416112bf..1a0e5b69b2 100644
> --- a/libavcodec/dxva2.c
> +++ b/libavcodec/dxva2.c
> @@ -692,6 +692,12 @@ int ff_dxva2_decode_init(AVCodecContext *avctx)
>         d3d11_ctx->surface       = sctx->d3d11_views;
>         d3d11_ctx->workaround    = sctx->workaround;
>         d3d11_ctx->context_mutex = INVALID_HANDLE_VALUE;
> +
> +        D3D11_QUERY_DESC query = { 0 };
> +        query.Query = D3D11_QUERY_EVENT;
> +        if (FAILED(ID3D11Device_CreateQuery(device_hwctx->device, &query,
> +                                            (ID3D11Query**)&sctx->wait_copies)))
> +            sctx->wait_copies = NULL;
>     }
> #endif
> 
> @@ -729,6 +735,8 @@ int ff_dxva2_decode_uninit(AVCodecContext *avctx)
>     av_buffer_unref(&sctx->decoder_ref);
> 
> #if CONFIG_D3D11VA
> +    if (sctx->wait_copies)
> +        ID3D11Asynchronous_Release(sctx->wait_copies);
>     for (i = 0; i < sctx->nb_d3d11_views; i++) {
>         if (sctx->d3d11_views[i])
>             ID3D11VideoDecoderOutputView_Release(sctx->d3d11_views[i]);
> @@ -932,6 +940,12 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
> 
> #if CONFIG_D3D11VA
>     if (ff_dxva2_is_d3d11(avctx)) {
> +        if (sctx->wait_copies) {
> +            AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
> +            AVD3D11VADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
> +            ID3D11DeviceContext_Begin(device_hwctx->device_context, sctx->wait_copies);
> +        }
> +
>         buffer = &buffer11[buffer_count];
>         type = D3D11_VIDEO_DECODER_BUFFER_PICTURE_PARAMETERS;
>     }
> @@ -1005,9 +1019,28 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
> 
> #if CONFIG_D3D11VA
>     if (ff_dxva2_is_d3d11(avctx))
> +    {

coding style, same line opening brackets

> +        int maxWait = 10;

You can push this initialization (and maybe the comment below) one block 
down as far as I see.

> +        /* wait until all the buffer release is done copying data to the GPU
> +         * before doing the submit command */
> +        if (sctx->wait_copies) {
> +            AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
> +            AVD3D11VADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
> +            ID3D11DeviceContext_End(device_hwctx->device_context, sctx->wait_copies);
> +
> +            while (maxWait-- && S_FALSE ==
> +                   ID3D11DeviceContext_GetData(device_hwctx->device_context,
> +                                               sctx->wait_copies, NULL, 0, 0)) {
> +                ff_dxva2_unlock(avctx);
> +                SleepEx(2, TRUE);
> +                ff_dxva2_lock(avctx);
> +            }
> +        }
> +
>         hr = ID3D11VideoContext_SubmitDecoderBuffers(D3D11VA_CONTEXT(ctx)->video_context,
>                                                      D3D11VA_CONTEXT(ctx)->decoder,
>                                                      buffer_count, buffer11);
> +    }
> #endif
> #if CONFIG_DXVA2
>     if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) {
> diff --git a/libavcodec/dxva2_internal.h b/libavcodec/dxva2_internal.h
> index b822af59cd..c44e8e09b0 100644
> --- a/libavcodec/dxva2_internal.h
> +++ b/libavcodec/dxva2_internal.h
> @@ -81,6 +81,8 @@ typedef struct FFDXVASharedContext {
>     ID3D11VideoDecoderOutputView  **d3d11_views;
>     int                          nb_d3d11_views;
>     ID3D11Texture2D                *d3d11_texture;
> +
> +    ID3D11Asynchronous             *wait_copies;
> #endif
>

Regards,
Marton
Steve Lhomme Aug. 5, 2020, 8:43 a.m. UTC | #2
On 2020-08-05 9:55, Marton Balint wrote:
> 
> 
> On Wed, 5 Aug 2020, Steve Lhomme wrote:
> 
>> When used aggressively, calling SubmitDecoderBuffers() just after
>> ReleaseDecoderBuffer() may have the buffers not used properly and created
>> decoding artifacts.
>> It's likely due to the time to copy the submitted buffer in CPU mapped 
>> memory
>> to GPU memory. SubmitDecoderBuffers() doesn't appear to wait for the 
>> state
>> of the buffer submitted to become "ready".
> 
> Is this an API bug or the code is not using the API properly? Please 
> clarify this in the commit message if you can.

I do not know. The documentation on SubmitDecoderBuffers or 
ReleaseDecoderBuffer doesn't say anything about that. Maybe the driver 
documentation that manufacturers use have more information, but I don't 
have it.

>>
>> For now it's not supported in the legacy API using AVD3D11VAContext, 
>> we need to
>> add a ID3D11DeviceContext in there as it cannot be derived from the other
>> interfaces we provide (ID3D11VideoContext is not a kind of 
>> ID3D11DeviceContext).
>> ---
>> libavcodec/dxva2.c          | 33 +++++++++++++++++++++++++++++++++
>> libavcodec/dxva2_internal.h |  2 ++
>> 2 files changed, 35 insertions(+)
>>
>> diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c
>> index 32416112bf..1a0e5b69b2 100644
>> --- a/libavcodec/dxva2.c
>> +++ b/libavcodec/dxva2.c
>> @@ -692,6 +692,12 @@ int ff_dxva2_decode_init(AVCodecContext *avctx)
>>         d3d11_ctx->surface       = sctx->d3d11_views;
>>         d3d11_ctx->workaround    = sctx->workaround;
>>         d3d11_ctx->context_mutex = INVALID_HANDLE_VALUE;
>> +
>> +        D3D11_QUERY_DESC query = { 0 };
>> +        query.Query = D3D11_QUERY_EVENT;
>> +        if (FAILED(ID3D11Device_CreateQuery(device_hwctx->device, 
>> &query,
>> +                                            
>> (ID3D11Query**)&sctx->wait_copies)))
>> +            sctx->wait_copies = NULL;
>>     }
>> #endif
>>
>> @@ -729,6 +735,8 @@ int ff_dxva2_decode_uninit(AVCodecContext *avctx)
>>     av_buffer_unref(&sctx->decoder_ref);
>>
>> #if CONFIG_D3D11VA
>> +    if (sctx->wait_copies)
>> +        ID3D11Asynchronous_Release(sctx->wait_copies);
>>     for (i = 0; i < sctx->nb_d3d11_views; i++) {
>>         if (sctx->d3d11_views[i])
>>             ID3D11VideoDecoderOutputView_Release(sctx->d3d11_views[i]);
>> @@ -932,6 +940,12 @@ int ff_dxva2_common_end_frame(AVCodecContext 
>> *avctx, AVFrame *frame,
>>
>> #if CONFIG_D3D11VA
>>     if (ff_dxva2_is_d3d11(avctx)) {
>> +        if (sctx->wait_copies) {
>> +            AVHWFramesContext *frames_ctx = 
>> (AVHWFramesContext*)avctx->hw_frames_ctx->data;
>> +            AVD3D11VADeviceContext *device_hwctx = 
>> frames_ctx->device_ctx->hwctx;
>> +            ID3D11DeviceContext_Begin(device_hwctx->device_context, 
>> sctx->wait_copies);
>> +        }
>> +
>>         buffer = &buffer11[buffer_count];
>>         type = D3D11_VIDEO_DECODER_BUFFER_PICTURE_PARAMETERS;
>>     }
>> @@ -1005,9 +1019,28 @@ int ff_dxva2_common_end_frame(AVCodecContext 
>> *avctx, AVFrame *frame,
>>
>> #if CONFIG_D3D11VA
>>     if (ff_dxva2_is_d3d11(avctx))
>> +    {
> 
> coding style, same line opening brackets
> 
>> +        int maxWait = 10;
> 
> You can push this initialization (and maybe the comment below) one block 
> down as far as I see.
> 
>> +        /* wait until all the buffer release is done copying data to 
>> the GPU
>> +         * before doing the submit command */
>> +        if (sctx->wait_copies) {
>> +            AVHWFramesContext *frames_ctx = 
>> (AVHWFramesContext*)avctx->hw_frames_ctx->data;
>> +            AVD3D11VADeviceContext *device_hwctx = 
>> frames_ctx->device_ctx->hwctx;
>> +            ID3D11DeviceContext_End(device_hwctx->device_context, 
>> sctx->wait_copies);
>> +
>> +            while (maxWait-- && S_FALSE ==
>> +                   
>> ID3D11DeviceContext_GetData(device_hwctx->device_context,
>> +                                               sctx->wait_copies, 
>> NULL, 0, 0)) {
>> +                ff_dxva2_unlock(avctx);
>> +                SleepEx(2, TRUE);
>> +                ff_dxva2_lock(avctx);
>> +            }
>> +        }
>> +
>>         hr = 
>> ID3D11VideoContext_SubmitDecoderBuffers(D3D11VA_CONTEXT(ctx)->video_context, 
>>
>>                                                      
>> D3D11VA_CONTEXT(ctx)->decoder,
>>                                                      buffer_count, 
>> buffer11);
>> +    }
>> #endif
>> #if CONFIG_DXVA2
>>     if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) {
>> diff --git a/libavcodec/dxva2_internal.h b/libavcodec/dxva2_internal.h
>> index b822af59cd..c44e8e09b0 100644
>> --- a/libavcodec/dxva2_internal.h
>> +++ b/libavcodec/dxva2_internal.h
>> @@ -81,6 +81,8 @@ typedef struct FFDXVASharedContext {
>>     ID3D11VideoDecoderOutputView  **d3d11_views;
>>     int                          nb_d3d11_views;
>>     ID3D11Texture2D                *d3d11_texture;
>> +
>> +    ID3D11Asynchronous             *wait_copies;
>> #endif
>>
> 
> Regards,
> Marton
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff mbox series

Patch

diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c
index 32416112bf..1a0e5b69b2 100644
--- a/libavcodec/dxva2.c
+++ b/libavcodec/dxva2.c
@@ -692,6 +692,12 @@  int ff_dxva2_decode_init(AVCodecContext *avctx)
         d3d11_ctx->surface       = sctx->d3d11_views;
         d3d11_ctx->workaround    = sctx->workaround;
         d3d11_ctx->context_mutex = INVALID_HANDLE_VALUE;
+
+        D3D11_QUERY_DESC query = { 0 };
+        query.Query = D3D11_QUERY_EVENT;
+        if (FAILED(ID3D11Device_CreateQuery(device_hwctx->device, &query,
+                                            (ID3D11Query**)&sctx->wait_copies)))
+            sctx->wait_copies = NULL;
     }
 #endif
 
@@ -729,6 +735,8 @@  int ff_dxva2_decode_uninit(AVCodecContext *avctx)
     av_buffer_unref(&sctx->decoder_ref);
 
 #if CONFIG_D3D11VA
+    if (sctx->wait_copies)
+        ID3D11Asynchronous_Release(sctx->wait_copies);
     for (i = 0; i < sctx->nb_d3d11_views; i++) {
         if (sctx->d3d11_views[i])
             ID3D11VideoDecoderOutputView_Release(sctx->d3d11_views[i]);
@@ -932,6 +940,12 @@  int ff_dxva2_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
 
 #if CONFIG_D3D11VA
     if (ff_dxva2_is_d3d11(avctx)) {
+        if (sctx->wait_copies) {
+            AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+            AVD3D11VADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
+            ID3D11DeviceContext_Begin(device_hwctx->device_context, sctx->wait_copies);
+        }
+
         buffer = &buffer11[buffer_count];
         type = D3D11_VIDEO_DECODER_BUFFER_PICTURE_PARAMETERS;
     }
@@ -1005,9 +1019,28 @@  int ff_dxva2_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
 
 #if CONFIG_D3D11VA
     if (ff_dxva2_is_d3d11(avctx))
+    {
+        int maxWait = 10;
+        /* wait until all the buffer release is done copying data to the GPU
+         * before doing the submit command */
+        if (sctx->wait_copies) {
+            AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+            AVD3D11VADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
+            ID3D11DeviceContext_End(device_hwctx->device_context, sctx->wait_copies);
+
+            while (maxWait-- && S_FALSE ==
+                   ID3D11DeviceContext_GetData(device_hwctx->device_context,
+                                               sctx->wait_copies, NULL, 0, 0)) {
+                ff_dxva2_unlock(avctx);
+                SleepEx(2, TRUE);
+                ff_dxva2_lock(avctx);
+            }
+        }
+
         hr = ID3D11VideoContext_SubmitDecoderBuffers(D3D11VA_CONTEXT(ctx)->video_context,
                                                      D3D11VA_CONTEXT(ctx)->decoder,
                                                      buffer_count, buffer11);
+    }
 #endif
 #if CONFIG_DXVA2
     if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) {
diff --git a/libavcodec/dxva2_internal.h b/libavcodec/dxva2_internal.h
index b822af59cd..c44e8e09b0 100644
--- a/libavcodec/dxva2_internal.h
+++ b/libavcodec/dxva2_internal.h
@@ -81,6 +81,8 @@  typedef struct FFDXVASharedContext {
     ID3D11VideoDecoderOutputView  **d3d11_views;
     int                          nb_d3d11_views;
     ID3D11Texture2D                *d3d11_texture;
+
+    ID3D11Asynchronous             *wait_copies;
 #endif
 
 #if CONFIG_DXVA2