diff mbox series

[FFmpeg-devel,v2] avcodec/amfenc: increase precision of Sleep() on Windows

Message ID 20231113143722.1959-1-lucenticus@gmail.com
State New
Headers show
Series [FFmpeg-devel,v2] avcodec/amfenc: increase precision of Sleep() on Windows | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Evgeny Pavlov Nov. 13, 2023, 2:37 p.m. UTC
This commit increase precision of Sleep() function on Windows.
This fix reduces the sleep time on Windows to improve AMF encoding
performance on low resolution input videos.

Fix for issue #10622

v2: use timeBeginPeriod/timeEndPeriod for increasing precision of Sleep()

Signed-off-by: Evgeny Pavlov <lucenticus@gmail.com>
---
 libavcodec/amfenc.c | 31 +++++++++++++++++++++++++++++++
 libavcodec/amfenc.h |  3 +++
 2 files changed, 34 insertions(+)

Comments

Evgeny Pavlov Nov. 20, 2023, 4:01 p.m. UTC | #1
On Mon, Nov 13, 2023 at 3:41 PM Evgeny Pavlov <lucenticus@gmail.com> wrote:

> This commit increase precision of Sleep() function on Windows.
> This fix reduces the sleep time on Windows to improve AMF encoding
> performance on low resolution input videos.
>
> Fix for issue #10622
>
> v2: use timeBeginPeriod/timeEndPeriod for increasing precision of Sleep()
>
> Signed-off-by: Evgeny Pavlov <lucenticus@gmail.com>
> ---
>  libavcodec/amfenc.c | 31 +++++++++++++++++++++++++++++++
>  libavcodec/amfenc.h |  3 +++
>  2 files changed, 34 insertions(+)
>
> diff --git a/libavcodec/amfenc.c b/libavcodec/amfenc.c
> index 061859f85c..55e24856e8 100644
> --- a/libavcodec/amfenc.c
> +++ b/libavcodec/amfenc.c
> @@ -42,7 +42,12 @@
>  #endif
>
>  #ifdef _WIN32
> +#include <timeapi.h>
>  #include "compat/w32dlfcn.h"
> +
> +typedef MMRESULT (*timeapi_fun)(UINT uPeriod);
> +#define WINMM_DLL "winmm.dll"
> +
>  #else
>  #include <dlfcn.h>
>  #endif
> @@ -113,6 +118,9 @@ static int amf_load_library(AVCodecContext *avctx)
>      AMFInit_Fn         init_fun;
>      AMFQueryVersion_Fn version_fun;
>      AMF_RESULT         res;
> +#ifdef _WIN32
> +    timeapi_fun time_begin_fun;
> +#endif
>
>      ctx->delayed_frame = av_frame_alloc();
>      if (!ctx->delayed_frame) {
> @@ -145,6 +153,16 @@ static int amf_load_library(AVCodecContext *avctx)
>      AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "GetTrace()
> failed with error %d\n", res);
>      res = ctx->factory->pVtbl->GetDebug(ctx->factory, &ctx->debug);
>      AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "GetDebug()
> failed with error %d\n", res);
> +
> +#ifdef _WIN32
> +    // Increase precision of Sleep() function on Windows platform
> +    ctx->winmm_lib = dlopen(WINMM_DLL, RTLD_NOW | RTLD_LOCAL);
> +    AMF_RETURN_IF_FALSE(ctx, ctx->winmm_lib != NULL, 0, "DLL %s failed to
> open\n", WINMM_DLL);
> +    time_begin_fun = (timeapi_fun)dlsym(ctx->winmm_lib,
> "timeBeginPeriod");
> +    AMF_RETURN_IF_FALSE(ctx, time_begin_fun != NULL, 0, "DLL %s failed to
> find function %s\n", WINMM_DLL, "timeBeginPeriod");
> +    time_begin_fun(1);
> +#endif //_WIN32
> +
>      return 0;
>  }
>
> @@ -375,6 +393,9 @@ static int amf_init_encoder(AVCodecContext *avctx)
>  int av_cold ff_amf_encode_close(AVCodecContext *avctx)
>  {
>      AmfContext *ctx = avctx->priv_data;
> +#ifdef _WIN32
> +    timeapi_fun time_end_fun;
> +#endif //_WIN32
>
>      if (ctx->delayed_surface) {
>          ctx->delayed_surface->pVtbl->Release(ctx->delayed_surface);
> @@ -410,6 +431,16 @@ int av_cold ff_amf_encode_close(AVCodecContext *avctx)
>      av_frame_free(&ctx->delayed_frame);
>      av_fifo_freep2(&ctx->timestamp_list);
>
> +#ifdef _WIN32
> +    if (ctx->winmm_lib) {
> +        time_end_fun = (timeapi_fun)dlsym(ctx->winmm_lib,
> "timeEndPeriod");
> +        AMF_RETURN_IF_FALSE(ctx, time_end_fun != NULL, 0, "DLL %s failed
> to find function %s\n", WINMM_DLL, "timeEndPeriod");
> +        time_end_fun(1);
> +        dlclose(ctx->winmm_lib);
> +        ctx->winmm_lib = NULL;
> +    }
> +#endif //_WIN32
> +
>      return 0;
>  }
>
> diff --git a/libavcodec/amfenc.h b/libavcodec/amfenc.h
> index 2dbd378ef8..35bcf1dfe3 100644
> --- a/libavcodec/amfenc.h
> +++ b/libavcodec/amfenc.h
> @@ -50,6 +50,9 @@ typedef struct AmfContext {
>      AVClass            *avclass;
>      // access to AMF runtime
>      amf_handle          library; ///< handle to DLL library
> +#ifdef _WIN32
> +    amf_handle          winmm_lib; ///< handle to winmm DLL library
> +#endif //_WIN32
>      AMFFactory         *factory; ///< pointer to AMF factory
>      AMFDebug           *debug;   ///< pointer to AMF debug interface
>      AMFTrace           *trace;   ///< pointer to AMF trace interface
> --
> 2.42.0
>
>
Please take a look on this patch, it helps to improve AMF encoding
performance on small resolution video on Windows platform by using more
precise Sleep()
Mark Thompson Nov. 27, 2023, 1:42 p.m. UTC | #2
On 13/11/2023 14:37, Evgeny Pavlov wrote:
> This commit increase precision of Sleep() function on Windows.
> This fix reduces the sleep time on Windows to improve AMF encoding
> performance on low resolution input videos.
> 
> Fix for issue #10622
> 
> v2: use timeBeginPeriod/timeEndPeriod for increasing precision of Sleep()
> 
> Signed-off-by: Evgeny Pavlov <lucenticus@gmail.com>
> ---
>   libavcodec/amfenc.c | 31 +++++++++++++++++++++++++++++++
>   libavcodec/amfenc.h |  3 +++
>   2 files changed, 34 insertions(+)
> 
> diff --git a/libavcodec/amfenc.c b/libavcodec/amfenc.c
> index 061859f85c..55e24856e8 100644
> --- a/libavcodec/amfenc.c
> +++ b/libavcodec/amfenc.c
> @@ -42,7 +42,12 @@
>   #endif
>   
>   #ifdef _WIN32
> +#include <timeapi.h>
>   #include "compat/w32dlfcn.h"
> +
> +typedef MMRESULT (*timeapi_fun)(UINT uPeriod);
> +#define WINMM_DLL "winmm.dll"
> +
>   #else
>   #include <dlfcn.h>
>   #endif
> @@ -113,6 +118,9 @@ static int amf_load_library(AVCodecContext *avctx)
>       AMFInit_Fn         init_fun;
>       AMFQueryVersion_Fn version_fun;
>       AMF_RESULT         res;
> +#ifdef _WIN32
> +    timeapi_fun time_begin_fun;
> +#endif
>   
>       ctx->delayed_frame = av_frame_alloc();
>       if (!ctx->delayed_frame) {
> @@ -145,6 +153,16 @@ static int amf_load_library(AVCodecContext *avctx)
>       AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "GetTrace() failed with error %d\n", res);
>       res = ctx->factory->pVtbl->GetDebug(ctx->factory, &ctx->debug);
>       AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "GetDebug() failed with error %d\n", res);
> +
> +#ifdef _WIN32
> +    // Increase precision of Sleep() function on Windows platform
> +    ctx->winmm_lib = dlopen(WINMM_DLL, RTLD_NOW | RTLD_LOCAL);
> +    AMF_RETURN_IF_FALSE(ctx, ctx->winmm_lib != NULL, 0, "DLL %s failed to open\n", WINMM_DLL);
> +    time_begin_fun = (timeapi_fun)dlsym(ctx->winmm_lib, "timeBeginPeriod");
> +    AMF_RETURN_IF_FALSE(ctx, time_begin_fun != NULL, 0, "DLL %s failed to find function %s\n", WINMM_DLL, "timeBeginPeriod");
> +    time_begin_fun(1);
> +#endif //_WIN32
> +
>       return 0;
>   }
>   
> @@ -375,6 +393,9 @@ static int amf_init_encoder(AVCodecContext *avctx)
>   int av_cold ff_amf_encode_close(AVCodecContext *avctx)
>   {
>       AmfContext *ctx = avctx->priv_data;
> +#ifdef _WIN32
> +    timeapi_fun time_end_fun;
> +#endif //_WIN32
>   
>       if (ctx->delayed_surface) {
>           ctx->delayed_surface->pVtbl->Release(ctx->delayed_surface);
> @@ -410,6 +431,16 @@ int av_cold ff_amf_encode_close(AVCodecContext *avctx)
>       av_frame_free(&ctx->delayed_frame);
>       av_fifo_freep2(&ctx->timestamp_list);
>   
> +#ifdef _WIN32
> +    if (ctx->winmm_lib) {
> +        time_end_fun = (timeapi_fun)dlsym(ctx->winmm_lib, "timeEndPeriod");
> +        AMF_RETURN_IF_FALSE(ctx, time_end_fun != NULL, 0, "DLL %s failed to find function %s\n", WINMM_DLL, "timeEndPeriod");
> +        time_end_fun(1);
> +        dlclose(ctx->winmm_lib);
> +        ctx->winmm_lib = NULL;
> +    }
> +#endif //_WIN32
> +
>       return 0;
>   }
>   
> diff --git a/libavcodec/amfenc.h b/libavcodec/amfenc.h
> index 2dbd378ef8..35bcf1dfe3 100644
> --- a/libavcodec/amfenc.h
> +++ b/libavcodec/amfenc.h
> @@ -50,6 +50,9 @@ typedef struct AmfContext {
>       AVClass            *avclass;
>       // access to AMF runtime
>       amf_handle          library; ///< handle to DLL library
> +#ifdef _WIN32
> +    amf_handle          winmm_lib; ///< handle to winmm DLL library
> +#endif //_WIN32
>       AMFFactory         *factory; ///< pointer to AMF factory
>       AMFDebug           *debug;   ///< pointer to AMF debug interface
>       AMFTrace           *trace;   ///< pointer to AMF trace interface

Is it reasonable to set this global state from a library without the parent program knowing?  We'd really prefer not to affect the global state unexpectedly.

It's also unclear to me what the effect of this tradeoff on power is, given that the whole reason why this happens is that Windows is trying to keep the CPU asleep for as long as possible to save power.

Thanks,

- Mark
Henrik Gramner Nov. 27, 2023, 2:04 p.m. UTC | #3
On Mon, Nov 27, 2023 at 2:42 PM Mark Thompson <sw@jkqxz.net> wrote:
> Is it reasonable to set this global state from a library without the parent program knowing?  We'd really prefer not to affect the global state unexpectedly.

CreateWaitableTimerExW() with the
CREATE_WAITABLE_TIMER_HIGH_RESOLUTION flag might be an alternative?
Evgeny Pavlov Feb. 19, 2024, 3:26 p.m. UTC | #4
On Mon, Nov 27, 2023 at 3:05 PM Henrik Gramner via ffmpeg-devel
<ffmpeg-devel@ffmpeg.org> wrote:
>
> On Mon, Nov 27, 2023 at 2:42 PM Mark Thompson <sw@jkqxz.net> wrote:
> > Is it reasonable to set this global state from a library without the parent program knowing?  We'd really prefer not to affect the global state unexpectedly.
>
> CreateWaitableTimerExW() with the
> CREATE_WAITABLE_TIMER_HIGH_RESOLUTION flag might be an alternative?
>

We evaluated CreateWaitableTimerExW with
CREATE_WAITABLE_TIMER_HIGH_RESOLUTION flag. In fact, this function has
the same precision level as the Sleep() function.

Usually changing the time resolution will only affect the current
process and will not impact other processes, thus it will not cause a
global effect on the current system. Here is an info from
documentation on timeBeginPeriod
https://learn.microsoft.com/en-us/windows/win32/api/timeapi/nf-timeapi-timebeginperiod

"Prior to Windows 10, version 2004, this function affects a global
Windows setting. For all processes Windows uses the lowest value (that
is, highest resolution) requested by any process. Starting with
Windows 10, version 2004, this function no longer affects global timer
resolution. For processes which call this function, Windows uses the
lowest value (that is, highest resolution) requested by any process.
For processes which have not called this function, Windows does not
guarantee a higher resolution than the default system resolution."

We provide the following measurement to show performance improvements
with this patch.

1. Performance tests show that this high precision sleep will improve
performance, especially for low resolution sequences, it can get about
20% improvement.

Frames Per Second (FPS) being encoded by the hardware encoder (Navi 31
RX7900XT ):

Source Type: H.264 ,  Output Type: H.264
(Sorry for bad formatting)
No. |   Sequence Resolution | No. of Frames|    FPS Before patch    |
FPS after patch   | Difference    | Improvement %
----|-----------------------|--------------|------------------------|-------------------|---------------|----------
1   |   480x360             | 8290         |        2030            |
     2365        | 335           | 16.5%
2   |   720x576             | 8290         |        1440            |
     1790        | 350           | 24.3%
3 |     1280x720            | 8290         |        1120            |
     1190        | 70            | 6.3%
4   |   1920x1080           | 8290         |        692             |
     714         | 22            | 3.2%
5   |   3840x2160           | 8290         |        200             |
     200         | 0             | 0.0%

The sample ffmpeg command line:
$ ffmpeg.exe -y -hwaccel d3d11va -hwaccel_output_format d3d11 -i
input.mp4 -c:v h264_amf out.mp4
where input.mp4 should be changed to corresponding resolution input
H.264 format bitstream.

2. The power tests show an increase in power is within limit scope.

The purpose of the power test is to examine the increase in CPU power
consumption due to the improvement in CPU time resolution after using
this patch. We were testing a product from AMD called Phoenix, which
we refer to as an APU. It combines a general-purpose AMD CPU and a 3D
integrated graphics processing unit (IGPU) on a single die. Only the
APU has a DAP connector to the board's power rails.

We got the power test data shown below:

|                        | 480x360   |  720x576   | 1280x720 |
1920x1080 | 3840x2160 | average
|------------------------|-----------|------------|----------|-----------|-----------|--------
|CPU  power change       |  1.93%    |  2.43%     | -1.69%   | 3.49%
  | 2.92%     | 1.82%
|APU power total change  |  0.86%    |  1.34%     | -0.62%   | 1.54%
  | -0.58%    | 0.51

When using a high precision clock by applying the patch, the average
power consumption for CPU increases 1.82%, and the APU total increases
0.51%. We can see the power increase in power not very significant.
diff mbox series

Patch

diff --git a/libavcodec/amfenc.c b/libavcodec/amfenc.c
index 061859f85c..55e24856e8 100644
--- a/libavcodec/amfenc.c
+++ b/libavcodec/amfenc.c
@@ -42,7 +42,12 @@ 
 #endif
 
 #ifdef _WIN32
+#include <timeapi.h>
 #include "compat/w32dlfcn.h"
+
+typedef MMRESULT (*timeapi_fun)(UINT uPeriod);
+#define WINMM_DLL "winmm.dll"
+
 #else
 #include <dlfcn.h>
 #endif
@@ -113,6 +118,9 @@  static int amf_load_library(AVCodecContext *avctx)
     AMFInit_Fn         init_fun;
     AMFQueryVersion_Fn version_fun;
     AMF_RESULT         res;
+#ifdef _WIN32
+    timeapi_fun time_begin_fun;
+#endif
 
     ctx->delayed_frame = av_frame_alloc();
     if (!ctx->delayed_frame) {
@@ -145,6 +153,16 @@  static int amf_load_library(AVCodecContext *avctx)
     AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "GetTrace() failed with error %d\n", res);
     res = ctx->factory->pVtbl->GetDebug(ctx->factory, &ctx->debug);
     AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "GetDebug() failed with error %d\n", res);
+
+#ifdef _WIN32
+    // Increase precision of Sleep() function on Windows platform
+    ctx->winmm_lib = dlopen(WINMM_DLL, RTLD_NOW | RTLD_LOCAL);
+    AMF_RETURN_IF_FALSE(ctx, ctx->winmm_lib != NULL, 0, "DLL %s failed to open\n", WINMM_DLL);
+    time_begin_fun = (timeapi_fun)dlsym(ctx->winmm_lib, "timeBeginPeriod");
+    AMF_RETURN_IF_FALSE(ctx, time_begin_fun != NULL, 0, "DLL %s failed to find function %s\n", WINMM_DLL, "timeBeginPeriod");
+    time_begin_fun(1);
+#endif //_WIN32
+
     return 0;
 }
 
@@ -375,6 +393,9 @@  static int amf_init_encoder(AVCodecContext *avctx)
 int av_cold ff_amf_encode_close(AVCodecContext *avctx)
 {
     AmfContext *ctx = avctx->priv_data;
+#ifdef _WIN32
+    timeapi_fun time_end_fun;
+#endif //_WIN32
 
     if (ctx->delayed_surface) {
         ctx->delayed_surface->pVtbl->Release(ctx->delayed_surface);
@@ -410,6 +431,16 @@  int av_cold ff_amf_encode_close(AVCodecContext *avctx)
     av_frame_free(&ctx->delayed_frame);
     av_fifo_freep2(&ctx->timestamp_list);
 
+#ifdef _WIN32
+    if (ctx->winmm_lib) {
+        time_end_fun = (timeapi_fun)dlsym(ctx->winmm_lib, "timeEndPeriod");
+        AMF_RETURN_IF_FALSE(ctx, time_end_fun != NULL, 0, "DLL %s failed to find function %s\n", WINMM_DLL, "timeEndPeriod");
+        time_end_fun(1);
+        dlclose(ctx->winmm_lib);
+        ctx->winmm_lib = NULL;
+    }
+#endif //_WIN32
+
     return 0;
 }
 
diff --git a/libavcodec/amfenc.h b/libavcodec/amfenc.h
index 2dbd378ef8..35bcf1dfe3 100644
--- a/libavcodec/amfenc.h
+++ b/libavcodec/amfenc.h
@@ -50,6 +50,9 @@  typedef struct AmfContext {
     AVClass            *avclass;
     // access to AMF runtime
     amf_handle          library; ///< handle to DLL library
+#ifdef _WIN32
+    amf_handle          winmm_lib; ///< handle to winmm DLL library
+#endif //_WIN32
     AMFFactory         *factory; ///< pointer to AMF factory
     AMFDebug           *debug;   ///< pointer to AMF debug interface
     AMFTrace           *trace;   ///< pointer to AMF trace interface