diff mbox

[FFmpeg-devel] avdevice/xcbgrab: Improve non-shm performance

Message ID 20191119135956578.DUDW.12759.ppp.dion.ne.jp@dmta0004.auone-net.jp
State New
Headers show

Commit Message

Kusanagi Kouichi Nov. 19, 2019, 1:59 p.m. UTC
Use AVBufferPool.

before:
$ ffmpeg -benchmark -f x11grab -video_size 1920x1200 -r 200 -t 3 -i +0,0 -f null -
...
frame=  600 fps=102 q=-0.0 Lsize=N/A time=00:00:03.00 bitrate=N/A speed=0.51x
video:314kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: unknown
bench: utime=1.551s stime=3.428s rtime=5.884s
bench: maxrss=40132kB

after:
$ ffmpeg -benchmark -f x11grab -video_size 1920x1200 -r 200 -t 3 -i +0,0 -f null -
...
frame=  600 fps=138 q=-0.0 Lsize=N/A time=00:00:03.00 bitrate=N/A speed=0.69x
video:314kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: unknown
bench: utime=1.231s stime=2.407s rtime=4.349s
bench: maxrss=40096kB

Signed-off-by: Kusanagi Kouichi <slash@ac.auone-net.jp>
---
 libavdevice/xcbgrab.c | 38 +++++++++++++++++++++++++++++++-------
 1 file changed, 31 insertions(+), 7 deletions(-)

Comments

Kusanagi Kouichi Dec. 3, 2019, 1:25 p.m. UTC | #1
On 2019-11-19 22:59:56 +0900, Kusanagi Kouichi wrote:
> Use AVBufferPool.
> 
> before:
> $ ffmpeg -benchmark -f x11grab -video_size 1920x1200 -r 200 -t 3 -i +0,0 -f null -
> ...
> frame=  600 fps=102 q=-0.0 Lsize=N/A time=00:00:03.00 bitrate=N/A speed=0.51x
> video:314kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: unknown
> bench: utime=1.551s stime=3.428s rtime=5.884s
> bench: maxrss=40132kB
> 
> after:
> $ ffmpeg -benchmark -f x11grab -video_size 1920x1200 -r 200 -t 3 -i +0,0 -f null -
> ...
> frame=  600 fps=138 q=-0.0 Lsize=N/A time=00:00:03.00 bitrate=N/A speed=0.69x
> video:314kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: unknown
> bench: utime=1.231s stime=2.407s rtime=4.349s
> bench: maxrss=40096kB
> 
> Signed-off-by: Kusanagi Kouichi <slash@ac.auone-net.jp>
> ---
>  libavdevice/xcbgrab.c | 38 +++++++++++++++++++++++++++++++-------
>  1 file changed, 31 insertions(+), 7 deletions(-)
> 
> diff --git a/libavdevice/xcbgrab.c b/libavdevice/xcbgrab.c
> index b7e689343e..fde9c534cd 100644
> --- a/libavdevice/xcbgrab.c
> +++ b/libavdevice/xcbgrab.c
> @@ -42,6 +42,7 @@
>  #include "libavutil/opt.h"
>  #include "libavutil/parseutils.h"
>  #include "libavutil/time.h"
> +#include "libavutil/buffer.h"
>  
>  #include "libavformat/avformat.h"
>  #include "libavformat/internal.h"
> @@ -71,6 +72,8 @@ typedef struct XCBGrabContext {
>      int region_border;
>      int centered;
>  
> +    AVBufferPool *buf_pool;
> +
>      const char *video_size;
>      const char *framerate;
>  
> @@ -146,6 +149,16 @@ static int xcbgrab_reposition(AVFormatContext *s,
>      return 0;
>  }
>  
> +static AVBufferRef *xcbgrab_allocate_buffer(const int size)
> +{
> +    AVPacket pkt;
> +
> +    if (av_new_packet(&pkt, size))
> +        return NULL;
> +
> +    return pkt.buf;
> +}
> +
>  static int xcbgrab_frame(AVFormatContext *s, AVPacket *pkt)
>  {
>      XCBGrabContext *c = s->priv_data;
> @@ -154,7 +167,7 @@ static int xcbgrab_frame(AVFormatContext *s, AVPacket *pkt)
>      xcb_drawable_t drawable = c->screen->root;
>      xcb_generic_error_t *e = NULL;
>      uint8_t *data;
> -    int length, ret;
> +    int length;
>  
>      iq  = xcb_get_image(c->conn, XCB_IMAGE_FORMAT_Z_PIXMAP, drawable,
>                          c->x, c->y, c->width, c->height, ~0);
> @@ -174,17 +187,21 @@ static int xcbgrab_frame(AVFormatContext *s, AVPacket *pkt)
>      if (!img)
>          return AVERROR(EAGAIN);
>  
> +    pkt->buf = av_buffer_pool_get(c->buf_pool);
> +    if (!pkt->buf) {
> +        free(img);
> +        return AVERROR(ENOMEM);
> +    }
> +
>      data   = xcb_get_image_data(img);
>      length = xcb_get_image_data_length(img);
>  
> -    ret = av_new_packet(pkt, length);
> -
> -    if (!ret)
> -        memcpy(pkt->data, data, length);
> -
> +    memcpy(pkt->buf->data, data, length);
>      free(img);
> +    pkt->data = pkt->buf->data;
> +    pkt->size = pkt->buf->size;
>  
> -    return ret;
> +    return 0;
>  }
>  
>  static void wait_frame(AVFormatContext *s, AVPacket *pkt)
> @@ -442,6 +459,7 @@ static av_cold int xcbgrab_read_close(AVFormatContext *s)
>  #endif
>  
>      xcb_disconnect(ctx->conn);
> +    av_buffer_pool_uninit(&ctx->buf_pool);
>  
>      return 0;
>  }
> @@ -665,6 +683,12 @@ static av_cold int xcbgrab_read_header(AVFormatContext *s)
>          return ret;
>      }
>  
> +    c->buf_pool = av_buffer_pool_init(c->frame_size, xcbgrab_allocate_buffer);
> +    if (ret < 0) {
> +        xcbgrab_read_close(s);
> +        return AVERROR(ENOMEM);
> +    }
> +
>  #if CONFIG_LIBXCB_SHM
>      if ((c->has_shm = check_shm(c->conn)))
>          c->segment = xcb_generate_id(c->conn);
> -- 
> 2.24.0
> 

ping
Marton Balint Dec. 3, 2019, 8:25 p.m. UTC | #2
On Tue, 3 Dec 2019, Kusanagi Kouichi wrote:

> On 2019-11-19 22:59:56 +0900, Kusanagi Kouichi wrote:
>> Use AVBufferPool.

You don't need a buffer pool for the non-shm case, you should wrap the XCB 
data in a buffer ref instead. I will reply with a patch that shows how it 
is done, please check if it works correctly, as I am not 100% sure how xcb 
data structures should be allocated/freed.

What actually would benefit from a buffer pool is the SHM case. A pool of 
SHM buffers which are wrapped in a buffer ref can avoid yet another 
memcpy. Is the amount of SHM space that is usually available is limited 
or not? If so, how much is the limit? Can we safely allocate at least 
10-20 buffers?

Regards,
Marton

>> 
>> before:
>> $ ffmpeg -benchmark -f x11grab -video_size 1920x1200 -r 200 -t 3 -i +0,0 -f null -
>> ...
>> frame=  600 fps=102 q=-0.0 Lsize=N/A time=00:00:03.00 bitrate=N/A speed=0.51x
>> video:314kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: unknown
>> bench: utime=1.551s stime=3.428s rtime=5.884s
>> bench: maxrss=40132kB
>> 
>> after:
>> $ ffmpeg -benchmark -f x11grab -video_size 1920x1200 -r 200 -t 3 -i +0,0 -f null -
>> ...
>> frame=  600 fps=138 q=-0.0 Lsize=N/A time=00:00:03.00 bitrate=N/A speed=0.69x
>> video:314kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: unknown
>> bench: utime=1.231s stime=2.407s rtime=4.349s
>> bench: maxrss=40096kB
>> 
>> Signed-off-by: Kusanagi Kouichi <slash@ac.auone-net.jp>
>> ---
>>  libavdevice/xcbgrab.c | 38 +++++++++++++++++++++++++++++++-------
>>  1 file changed, 31 insertions(+), 7 deletions(-)
>> 
>> diff --git a/libavdevice/xcbgrab.c b/libavdevice/xcbgrab.c
>> index b7e689343e..fde9c534cd 100644
>> --- a/libavdevice/xcbgrab.c
>> +++ b/libavdevice/xcbgrab.c
>> @@ -42,6 +42,7 @@
>>  #include "libavutil/opt.h"
>>  #include "libavutil/parseutils.h"
>>  #include "libavutil/time.h"
>> +#include "libavutil/buffer.h"
>>
>>  #include "libavformat/avformat.h"
>>  #include "libavformat/internal.h"
>> @@ -71,6 +72,8 @@ typedef struct XCBGrabContext {
>>      int region_border;
>>      int centered;
>> 
>> +    AVBufferPool *buf_pool;
>> +
>>      const char *video_size;
>>      const char *framerate;
>> 
>> @@ -146,6 +149,16 @@ static int xcbgrab_reposition(AVFormatContext *s,
>>      return 0;
>>  }
>> 
>> +static AVBufferRef *xcbgrab_allocate_buffer(const int size)
>> +{
>> +    AVPacket pkt;
>> +
>> +    if (av_new_packet(&pkt, size))
>> +        return NULL;
>> +
>> +    return pkt.buf;
>> +}
>> +
>>  static int xcbgrab_frame(AVFormatContext *s, AVPacket *pkt)
>>  {
>>      XCBGrabContext *c = s->priv_data;
>> @@ -154,7 +167,7 @@ static int xcbgrab_frame(AVFormatContext *s, AVPacket *pkt)
>>      xcb_drawable_t drawable = c->screen->root;
>>      xcb_generic_error_t *e = NULL;
>>      uint8_t *data;
>> -    int length, ret;
>> +    int length;
>>
>>      iq  = xcb_get_image(c->conn, XCB_IMAGE_FORMAT_Z_PIXMAP, drawable,
>>                          c->x, c->y, c->width, c->height, ~0);
>> @@ -174,17 +187,21 @@ static int xcbgrab_frame(AVFormatContext *s, AVPacket *pkt)
>>      if (!img)
>>          return AVERROR(EAGAIN);
>> 
>> +    pkt->buf = av_buffer_pool_get(c->buf_pool);
>> +    if (!pkt->buf) {
>> +        free(img);
>> +        return AVERROR(ENOMEM);
>> +    }
>> +
>>      data   = xcb_get_image_data(img);
>>      length = xcb_get_image_data_length(img);
>> 
>> -    ret = av_new_packet(pkt, length);
>> -
>> -    if (!ret)
>> -        memcpy(pkt->data, data, length);
>> -
>> +    memcpy(pkt->buf->data, data, length);
>>      free(img);
>> +    pkt->data = pkt->buf->data;
>> +    pkt->size = pkt->buf->size;
>> 
>> -    return ret;
>> +    return 0;
>>  }
>>
>>  static void wait_frame(AVFormatContext *s, AVPacket *pkt)
>> @@ -442,6 +459,7 @@ static av_cold int xcbgrab_read_close(AVFormatContext *s)
>>  #endif
>>
>>      xcb_disconnect(ctx->conn);
>> +    av_buffer_pool_uninit(&ctx->buf_pool);
>>
>>      return 0;
>>  }
>> @@ -665,6 +683,12 @@ static av_cold int xcbgrab_read_header(AVFormatContext *s)
>>          return ret;
>>      }
>> 
>> +    c->buf_pool = av_buffer_pool_init(c->frame_size, xcbgrab_allocate_buffer);
>> +    if (ret < 0) {
>> +        xcbgrab_read_close(s);
>> +        return AVERROR(ENOMEM);
>> +    }
>> +
>>  #if CONFIG_LIBXCB_SHM
>>      if ((c->has_shm = check_shm(c->conn)))
>>          c->segment = xcb_generate_id(c->conn);
>> -- 
>> 2.24.0
>> 
>
> ping
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Kusanagi Kouichi Dec. 4, 2019, 5:15 a.m. UTC | #3
On 2019-12-03 21:25:37 +0100, Marton Balint wrote:
> 
> 
> On Tue, 3 Dec 2019, Kusanagi Kouichi wrote:
> 
> > On 2019-11-19 22:59:56 +0900, Kusanagi Kouichi wrote:
> > > Use AVBufferPool.
> 
> You don't need a buffer pool for the non-shm case, you should wrap the XCB
> data in a buffer ref instead. I will reply with a patch that shows how it is
> done, please check if it works correctly, as I am not 100% sure how xcb data
> structures should be allocated/freed.
> 

Is it safe to omit AV_INPUT_BUFFER_PADDING_SIZE bytes padding? My first
version is identical to yours. Valgrind reported no error. But I'm not sure.

> What actually would benefit from a buffer pool is the SHM case. A pool of
> SHM buffers which are wrapped in a buffer ref can avoid yet another memcpy.
> Is the amount of SHM space that is usually available is limited or not? If
> so, how much is the limit? Can we safely allocate at least 10-20 buffers?
> 
> Regards,
> Marton
>
Marton Balint Dec. 4, 2019, 9:32 p.m. UTC | #4
On Wed, 4 Dec 2019, Kusanagi Kouichi wrote:

> On 2019-12-03 21:25:37 +0100, Marton Balint wrote:
>> 
>> 
>> On Tue, 3 Dec 2019, Kusanagi Kouichi wrote:
>> 
>> > On 2019-11-19 22:59:56 +0900, Kusanagi Kouichi wrote:
>> > > Use AVBufferPool.
>> 
>> You don't need a buffer pool for the non-shm case, you should wrap the XCB
>> data in a buffer ref instead. I will reply with a patch that shows how it is
>> done, please check if it works correctly, as I am not 100% sure how xcb data
>> structures should be allocated/freed.
>> 
>
> Is it safe to omit AV_INPUT_BUFFER_PADDING_SIZE bytes padding? My first
> version is identical to yours. Valgrind reported no error. But I'm not sure.

Strictly speaking it is against the API, but the performance gains IMHO 
are too big to respect the API requirement blindly. Also I see little 
chance that it will cause issues, because typically avcodec/rawdec.c will 
simply reassign the buffer ref to an AVFrame and keep the data untouched. 
And AVFrame data does not have this requirement. If something breaks, we 
can always reconsider this or find another solution.

Regards,
Marton
Kusanagi Kouichi Dec. 5, 2019, 8:55 a.m. UTC | #5
On 2019-12-04 22:32:52 +0100, Marton Balint wrote:
> 
> 
> On Wed, 4 Dec 2019, Kusanagi Kouichi wrote:
> 
> > On 2019-12-03 21:25:37 +0100, Marton Balint wrote:
> > > 
> > > 
> > > On Tue, 3 Dec 2019, Kusanagi Kouichi wrote:
> > > 
> > > > On 2019-11-19 22:59:56 +0900, Kusanagi Kouichi wrote:
> > > > > Use AVBufferPool.
> > > 
> > > You don't need a buffer pool for the non-shm case, you should wrap the XCB
> > > data in a buffer ref instead. I will reply with a patch that shows how it is
> > > done, please check if it works correctly, as I am not 100% sure how xcb data
> > > structures should be allocated/freed.
> > > 
> > 
> > Is it safe to omit AV_INPUT_BUFFER_PADDING_SIZE bytes padding? My first
> > version is identical to yours. Valgrind reported no error. But I'm not sure.
> 
> Strictly speaking it is against the API, but the performance gains IMHO are
> too big to respect the API requirement blindly. Also I see little chance
> that it will cause issues, because typically avcodec/rawdec.c will simply
> reassign the buffer ref to an AVFrame and keep the data untouched. And
> AVFrame data does not have this requirement. If something breaks, we can
> always reconsider this or find another solution.
> 
> Regards,
> Marton

OK, Then can you commit your patch?
Marton Balint Dec. 6, 2019, 9:11 a.m. UTC | #6
On Thu, 5 Dec 2019, Kusanagi Kouichi wrote:

> On 2019-12-04 22:32:52 +0100, Marton Balint wrote:
>> 
>> 
>> On Wed, 4 Dec 2019, Kusanagi Kouichi wrote:
>> 
>> > On 2019-12-03 21:25:37 +0100, Marton Balint wrote:
>> > > 
>> > > 
>> > > On Tue, 3 Dec 2019, Kusanagi Kouichi wrote:
>> > > 
>> > > > On 2019-11-19 22:59:56 +0900, Kusanagi Kouichi wrote:
>> > > > > Use AVBufferPool.
>> > > 
>> > > You don't need a buffer pool for the non-shm case, you should wrap the XCB
>> > > data in a buffer ref instead. I will reply with a patch that shows how it is
>> > > done, please check if it works correctly, as I am not 100% sure how xcb data
>> > > structures should be allocated/freed.
>> > > 
>> > 
>> > Is it safe to omit AV_INPUT_BUFFER_PADDING_SIZE bytes padding? My first
>> > version is identical to yours. Valgrind reported no error. But I'm not sure.
>> 
>> Strictly speaking it is against the API, but the performance gains IMHO are
>> too big to respect the API requirement blindly. Also I see little chance
>> that it will cause issues, because typically avcodec/rawdec.c will simply
>> reassign the buffer ref to an AVFrame and keep the data untouched. And
>> AVFrame data does not have this requirement. If something breaks, we can
>> always reconsider this or find another solution.
>> 
>> Regards,
>> Marton
>
> OK, Then can you commit your patch?

Sure, applied with some minor changes.

Regards,
Marton
diff mbox

Patch

diff --git a/libavdevice/xcbgrab.c b/libavdevice/xcbgrab.c
index b7e689343e..fde9c534cd 100644
--- a/libavdevice/xcbgrab.c
+++ b/libavdevice/xcbgrab.c
@@ -42,6 +42,7 @@ 
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/time.h"
+#include "libavutil/buffer.h"
 
 #include "libavformat/avformat.h"
 #include "libavformat/internal.h"
@@ -71,6 +72,8 @@  typedef struct XCBGrabContext {
     int region_border;
     int centered;
 
+    AVBufferPool *buf_pool;
+
     const char *video_size;
     const char *framerate;
 
@@ -146,6 +149,16 @@  static int xcbgrab_reposition(AVFormatContext *s,
     return 0;
 }
 
+static AVBufferRef *xcbgrab_allocate_buffer(const int size)
+{
+    AVPacket pkt;
+
+    if (av_new_packet(&pkt, size))
+        return NULL;
+
+    return pkt.buf;
+}
+
 static int xcbgrab_frame(AVFormatContext *s, AVPacket *pkt)
 {
     XCBGrabContext *c = s->priv_data;
@@ -154,7 +167,7 @@  static int xcbgrab_frame(AVFormatContext *s, AVPacket *pkt)
     xcb_drawable_t drawable = c->screen->root;
     xcb_generic_error_t *e = NULL;
     uint8_t *data;
-    int length, ret;
+    int length;
 
     iq  = xcb_get_image(c->conn, XCB_IMAGE_FORMAT_Z_PIXMAP, drawable,
                         c->x, c->y, c->width, c->height, ~0);
@@ -174,17 +187,21 @@  static int xcbgrab_frame(AVFormatContext *s, AVPacket *pkt)
     if (!img)
         return AVERROR(EAGAIN);
 
+    pkt->buf = av_buffer_pool_get(c->buf_pool);
+    if (!pkt->buf) {
+        free(img);
+        return AVERROR(ENOMEM);
+    }
+
     data   = xcb_get_image_data(img);
     length = xcb_get_image_data_length(img);
 
-    ret = av_new_packet(pkt, length);
-
-    if (!ret)
-        memcpy(pkt->data, data, length);
-
+    memcpy(pkt->buf->data, data, length);
     free(img);
+    pkt->data = pkt->buf->data;
+    pkt->size = pkt->buf->size;
 
-    return ret;
+    return 0;
 }
 
 static void wait_frame(AVFormatContext *s, AVPacket *pkt)
@@ -442,6 +459,7 @@  static av_cold int xcbgrab_read_close(AVFormatContext *s)
 #endif
 
     xcb_disconnect(ctx->conn);
+    av_buffer_pool_uninit(&ctx->buf_pool);
 
     return 0;
 }
@@ -665,6 +683,12 @@  static av_cold int xcbgrab_read_header(AVFormatContext *s)
         return ret;
     }
 
+    c->buf_pool = av_buffer_pool_init(c->frame_size, xcbgrab_allocate_buffer);
+    if (ret < 0) {
+        xcbgrab_read_close(s);
+        return AVERROR(ENOMEM);
+    }
+
 #if CONFIG_LIBXCB_SHM
     if ((c->has_shm = check_shm(c->conn)))
         c->segment = xcb_generate_id(c->conn);