diff mbox series

[FFmpeg-devel,v2,4/8] avcodec/v4l2request: Add common decode support for hwaccels

Message ID 20240806090607.43240-5-jonas@kwiboo.se
State New
Headers show
Series Add V4L2 Request API hwaccels for MPEG2, H.264 and HEVC | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished

Commit Message

Jonas Karlman Aug. 6, 2024, 9:06 a.m. UTC
Add common support for decoding using the V4L2 Request API.

Basic flow for decoding follow the kernel Memory-to-memory Stateless
Video Decoder Interface > Decoding [1].

A codec hwaccel typically handle decoding as follow:

In start_frame next OUTPUT buffer and its related request object is
picked from a circular queue and any codec specific CONTROLs is prepared.

In decode_slice the slice bitstream data is appended to the OUTPUT
buffer.

In end_frame a CAPTURE buffer tied to the AVFrame is queued, it will be
used as the decoding target by the driver / hw decoder. The prepared
codec specific CONTROLs get queued as part of the request object.
Finally the request object is submitted for decoding.

For slice based hw decoders only the request for the final slice of the
frame is submitted in end_frame, remaining is submitted in decode_slice.

[1] https://www.kernel.org/doc/html/latest/userspace-api/media/v4l/dev-stateless-decoder.html#decoding

Co-developed-by: Jernej Skrabec <jernej.skrabec@gmail.com>
Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>
Co-developed-by: Alex Bee <knaerzche@gmail.com>
Signed-off-by: Alex Bee <knaerzche@gmail.com>
Signed-off-by: Jonas Karlman <jonas@kwiboo.se>
---
 configure                        |   3 +-
 libavcodec/Makefile              |   2 +-
 libavcodec/hwconfig.h            |   2 +
 libavcodec/v4l2_request.c        |   3 +-
 libavcodec/v4l2_request.h        |  23 ++
 libavcodec/v4l2_request_decode.c | 459 +++++++++++++++++++++++++++++++
 6 files changed, 489 insertions(+), 3 deletions(-)
 create mode 100644 libavcodec/v4l2_request_decode.c
diff mbox series

Patch

diff --git a/configure b/configure
index a5e01fecc3..af6d12f7bb 100755
--- a/configure
+++ b/configure
@@ -3151,7 +3151,7 @@  dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode ole32 user32"
 ffnvcodec_deps_any="libdl LoadLibrary"
 mediacodec_deps="android mediandk"
 nvdec_deps="ffnvcodec"
-v4l2_request_deps="linux_media_h v4l2_timeval_to_ns libdrm libudev"
+v4l2_request_deps="linux_media_h v4l2_timeval_to_ns v4l2_m2m_hold_capture_buf libdrm libudev"
 vaapi_x11_deps="xlib_x11"
 videotoolbox_hwaccel_deps="videotoolbox pthreads"
 videotoolbox_hwaccel_extralibs="-framework QuartzCore"
@@ -7177,6 +7177,7 @@  if enabled v4l2_m2m; then
 fi
 
 if enabled v4l2_request; then
+    check_cc v4l2_m2m_hold_capture_buf linux/videodev2.h "int i = V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF"
     check_func_headers "linux/media.h linux/videodev2.h" v4l2_timeval_to_ns
     check_pkg_config libudev libudev libudev.h udev_new
 fi
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index f1db822568..590d2e3bc2 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -174,7 +174,7 @@  OBJS-$(CONFIG_VP3DSP)                  += vp3dsp.o
 OBJS-$(CONFIG_VP56DSP)                 += vp56dsp.o
 OBJS-$(CONFIG_VP8DSP)                  += vp8dsp.o
 OBJS-$(CONFIG_V4L2_M2M)                += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o
-OBJS-$(CONFIG_V4L2_REQUEST)            += v4l2_request.o v4l2_request_probe.o
+OBJS-$(CONFIG_V4L2_REQUEST)            += v4l2_request.o v4l2_request_probe.o v4l2_request_decode.o
 OBJS-$(CONFIG_WMA_FREQS)               += wma_freqs.o
 OBJS-$(CONFIG_WMV2DSP)                 += wmv2dsp.o
 
diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h
index ee29ca631d..159064a1f1 100644
--- a/libavcodec/hwconfig.h
+++ b/libavcodec/hwconfig.h
@@ -79,6 +79,8 @@  void ff_hwaccel_uninit(AVCodecContext *avctx);
     HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD,  NONE,         ff_ ## codec ## _d3d11va_hwaccel)
 #define HWACCEL_D3D12VA(codec) \
     HW_CONFIG_HWACCEL(1, 1, 0, D3D12,        D3D12VA,      ff_ ## codec ## _d3d12va_hwaccel)
+#define HWACCEL_V4L2REQUEST(codec) \
+    HW_CONFIG_HWACCEL(1, 0, 0, DRM_PRIME,    V4L2REQUEST,  ff_ ## codec ## _v4l2request_hwaccel)
 
 #define HW_CONFIG_ENCODER(device, frames, ad_hoc, format, device_type_) \
     &(const AVCodecHWConfigInternal) { \
diff --git a/libavcodec/v4l2_request.c b/libavcodec/v4l2_request.c
index 6f44c5d826..aae719ead6 100644
--- a/libavcodec/v4l2_request.c
+++ b/libavcodec/v4l2_request.c
@@ -303,7 +303,8 @@  int ff_v4l2_request_uninit(AVCodecContext *avctx)
     V4L2RequestContext *ctx = v4l2_request_context(avctx);
 
     if (ctx->video_fd >= 0) {
-        // TODO: Flush and wait on all pending requests
+        // Flush and wait on all pending requests
+        ff_v4l2_request_flush(avctx);
 
         // Stop output queue
         if (ioctl(ctx->video_fd, VIDIOC_STREAMOFF, &ctx->output_type) < 0) {
diff --git a/libavcodec/v4l2_request.h b/libavcodec/v4l2_request.h
index 621caaf28c..62248feb48 100644
--- a/libavcodec/v4l2_request.h
+++ b/libavcodec/v4l2_request.h
@@ -81,4 +81,27 @@  int ff_v4l2_request_init(AVCodecContext *avctx,
                          uint32_t pixelformat, uint32_t buffersize,
                          struct v4l2_ext_control *control, int count);
 
+uint64_t ff_v4l2_request_get_capture_timestamp(AVFrame *frame);
+
+int ff_v4l2_request_append_output(AVCodecContext *avctx,
+                                  V4L2RequestPictureContext *pic,
+                                  const uint8_t *data, uint32_t size);
+
+int ff_v4l2_request_decode_slice(AVCodecContext *avctx,
+                                 V4L2RequestPictureContext *pic,
+                                 struct v4l2_ext_control *control, int count,
+                                 bool first_slice, bool last_slice);
+
+int ff_v4l2_request_decode_frame(AVCodecContext *avctx,
+                                 V4L2RequestPictureContext *pic,
+                                 struct v4l2_ext_control *control, int count);
+
+int ff_v4l2_request_reset_picture(AVCodecContext *avctx,
+                                  V4L2RequestPictureContext *pic);
+
+int ff_v4l2_request_start_frame(AVCodecContext *avctx,
+                                V4L2RequestPictureContext *pic, AVFrame *frame);
+
+void ff_v4l2_request_flush(AVCodecContext *avctx);
+
 #endif /* AVCODEC_V4L2_REQUEST_H */
diff --git a/libavcodec/v4l2_request_decode.c b/libavcodec/v4l2_request_decode.c
new file mode 100644
index 0000000000..8e7e241e36
--- /dev/null
+++ b/libavcodec/v4l2_request_decode.c
@@ -0,0 +1,459 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include <poll.h>
+#include <sys/ioctl.h>
+
+#include "decode.h"
+#include "v4l2_request_internal.h"
+
+#define INPUT_BUFFER_PADDING_SIZE   (AV_INPUT_BUFFER_PADDING_SIZE * 4)
+
+uint64_t ff_v4l2_request_get_capture_timestamp(AVFrame *frame)
+{
+    V4L2RequestFrameDescriptor *desc = v4l2_request_framedesc(frame);
+
+    /*
+     * The capture buffer index is used as a base for V4L2 frame reference.
+     * This works because frames are decoded into a capture buffer that is
+     * closely tied to an AVFrame.
+     */
+    return desc ? v4l2_timeval_to_ns(&desc->capture.buffer.timestamp) : 0;
+}
+
+static int v4l2_request_queue_buffer(V4L2RequestContext *ctx, int request_fd,
+                                     V4L2RequestBuffer *buf, uint32_t flags)
+{
+    struct v4l2_plane planes[1] = {};
+    struct v4l2_buffer buffer = {
+        .index = buf->index,
+        .type = buf->buffer.type,
+        .memory = buf->buffer.memory,
+        .timestamp = buf->buffer.timestamp,
+        .bytesused = buf->used,
+        .request_fd = request_fd,
+        .flags = ((request_fd >= 0) ? V4L2_BUF_FLAG_REQUEST_FD : 0) | flags,
+    };
+
+    if (V4L2_TYPE_IS_MULTIPLANAR(buffer.type)) {
+        planes[0].bytesused = buf->used;
+        buffer.bytesused = 0;
+        buffer.length = 1;
+        buffer.m.planes = planes;
+    }
+
+    // Queue the buffer
+    if (ioctl(ctx->video_fd, VIDIOC_QBUF, &buffer) < 0)
+        return AVERROR(errno);
+
+    // Mark the buffer as queued
+    if (V4L2_TYPE_IS_OUTPUT(buffer.type))
+        atomic_fetch_or(&ctx->queued_output, 1 << buffer.index);
+    else
+        atomic_fetch_or(&ctx->queued_capture, 1 << buffer.index);
+
+    return 0;
+}
+
+static int v4l2_request_dequeue_buffer(V4L2RequestContext *ctx,
+                                       enum v4l2_buf_type type)
+{
+    struct v4l2_plane planes[1] = {};
+    struct v4l2_buffer buffer = {
+        .type = type,
+        .memory = V4L2_MEMORY_MMAP,
+    };
+
+    if (V4L2_TYPE_IS_MULTIPLANAR(buffer.type)) {
+        buffer.length = 1;
+        buffer.m.planes = planes;
+    }
+
+    // Dequeue next completed buffer
+    if (ioctl(ctx->video_fd, VIDIOC_DQBUF, &buffer) < 0)
+        return AVERROR(errno);
+
+    // Mark the buffer as dequeued
+    if (V4L2_TYPE_IS_OUTPUT(buffer.type))
+        atomic_fetch_and(&ctx->queued_output, ~(1 << buffer.index));
+    else
+        atomic_fetch_and(&ctx->queued_capture, ~(1 << buffer.index));
+
+    return 0;
+}
+
+static inline int v4l2_request_dequeue_completed_buffers(V4L2RequestContext *ctx,
+                                                         enum v4l2_buf_type type)
+{
+    int ret;
+
+    do {
+        ret = v4l2_request_dequeue_buffer(ctx, type);
+    } while (!ret);
+
+    return ret;
+}
+
+static int v4l2_request_wait_on_capture(V4L2RequestContext *ctx,
+                                        V4L2RequestBuffer *capture)
+{
+    struct pollfd pollfd = {
+        .fd = ctx->video_fd,
+        .events = POLLIN,
+    };
+
+    ff_mutex_lock(&ctx->mutex);
+
+    // Dequeue all completed capture buffers
+    if (atomic_load(&ctx->queued_capture))
+        v4l2_request_dequeue_completed_buffers(ctx, ctx->format.type);
+
+    // Wait on the specific capture buffer, when needed
+    while (atomic_load(&ctx->queued_capture) & (1 << capture->index)) {
+        int ret = poll(&pollfd, 1, 2000);
+        if (ret <= 0)
+            goto fail;
+
+        ret = v4l2_request_dequeue_buffer(ctx, ctx->format.type);
+        if (ret < 0 && ret != AVERROR(EAGAIN))
+            goto fail;
+    }
+
+    ff_mutex_unlock(&ctx->mutex);
+    return 0;
+
+fail:
+    ff_mutex_unlock(&ctx->mutex);
+    av_log(ctx, AV_LOG_ERROR, "Failed waiting on capture buffer %d\n",
+           capture->index);
+    return AVERROR(EINVAL);
+}
+
+static V4L2RequestBuffer *v4l2_request_next_output(V4L2RequestContext *ctx)
+{
+    int index;
+    V4L2RequestBuffer *output;
+    struct pollfd pollfd = {
+        .fd = ctx->video_fd,
+        .events = POLLOUT,
+    };
+
+    ff_mutex_lock(&ctx->mutex);
+
+    // Use next output buffer in the circular queue
+    index = atomic_load(&ctx->next_output);
+    output = &ctx->output[index];
+    atomic_store(&ctx->next_output, (index + 1) % FF_ARRAY_ELEMS(ctx->output));
+
+    // Dequeue all completed output buffers
+    if (atomic_load(&ctx->queued_output))
+        v4l2_request_dequeue_completed_buffers(ctx, ctx->output_type);
+
+    // Wait on the specific output buffer, when needed
+    while (atomic_load(&ctx->queued_output) & (1 << output->index)) {
+        int ret = poll(&pollfd, 1, 2000);
+        if (ret <= 0)
+            goto fail;
+
+        ret = v4l2_request_dequeue_buffer(ctx, ctx->output_type);
+        if (ret < 0 && ret != AVERROR(EAGAIN))
+            goto fail;
+    }
+
+    ff_mutex_unlock(&ctx->mutex);
+
+    // Reset used state
+    output->used = 0;
+
+    return output;
+
+fail:
+    ff_mutex_unlock(&ctx->mutex);
+    av_log(ctx, AV_LOG_ERROR, "Failed waiting on output buffer %d\n",
+           output->index);
+    return NULL;
+}
+
+static int v4l2_request_wait_on_request(V4L2RequestContext *ctx,
+                                        V4L2RequestBuffer *output)
+{
+    struct pollfd pollfd = {
+        .fd = output->fd,
+        .events = POLLPRI,
+    };
+
+    // Wait on the specific request to complete, when needed
+    while (atomic_load(&ctx->queued_request) & (1 << output->index)) {
+        int ret = poll(&pollfd, 1, 2000);
+        if (ret <= 0)
+            break;
+
+        // Mark request as dequeued
+        if (pollfd.revents & (POLLPRI | POLLERR)) {
+            atomic_fetch_and(&ctx->queued_request, ~(1 << output->index));
+            break;
+        }
+    }
+
+    // Reinit the request object
+    if (ioctl(output->fd, MEDIA_REQUEST_IOC_REINIT, NULL) < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to reinit request object %d: %s (%d)\n",
+               output->fd, strerror(errno), errno);
+        return AVERROR(errno);
+    }
+
+    // Ensure request is marked as dequeued
+    atomic_fetch_and(&ctx->queued_request, ~(1 << output->index));
+
+    return 0;
+}
+
+int ff_v4l2_request_append_output(AVCodecContext *avctx,
+                                  V4L2RequestPictureContext *pic,
+                                  const uint8_t *data, uint32_t size)
+{
+    V4L2RequestContext *ctx = v4l2_request_context(avctx);
+
+    // Append data to output buffer and ensure there is enough space for padding
+    if (pic->output->used + size + INPUT_BUFFER_PADDING_SIZE <= pic->output->size) {
+        memcpy(pic->output->addr + pic->output->used, data, size);
+        pic->output->used += size;
+        return 0;
+    } else {
+        av_log(ctx, AV_LOG_ERROR,
+               "Failed to append %u bytes data to output buffer %d (%u of %u used)\n",
+               size, pic->output->index, pic->output->used, pic->output->size);
+        return AVERROR(ENOMEM);
+    }
+}
+
+static int v4l2_request_queue_decode(AVCodecContext *avctx,
+                                     V4L2RequestPictureContext *pic,
+                                     struct v4l2_ext_control *control, int count,
+                                     bool first_slice, bool last_slice)
+{
+    V4L2RequestContext *ctx = v4l2_request_context(avctx);
+    uint32_t flags;
+    int ret;
+
+    if (first_slice) {
+        /*
+         * Wait on dequeue of the target capture buffer, when needed. Otherwise
+         * V4L2 decoder may use a different capture buffer than hwaccel expects.
+         *
+         * Normally decoding has already completed when a capture buffer is
+         * reused so this is more or less a no-op, however in some situations
+         * FFmpeg may reuse an AVFrame early, i.e. when no output frame was
+         * produced prior time, and a syncronization is necessary.
+         */
+        ret = v4l2_request_wait_on_capture(ctx, pic->capture);
+        if (ret < 0)
+            return ret;
+    }
+
+    ff_mutex_lock(&ctx->mutex);
+
+    /*
+     * The output buffer tied to prior use of current request object can
+     * independently be dequeued before the full decode request has been
+     * completed. This may happen when a decoder use multi stage decoding,
+     * e.g. rpivid. In such case we can start reusing the output buffer,
+     * however we must wait on the prior request to fully complete before we
+     * can reuse the request object, and a syncronization is necessary.
+     */
+    ret = v4l2_request_wait_on_request(ctx, pic->output);
+    if (ret < 0)
+        goto fail;
+
+    /*
+     * Dequeue any completed output buffers, this is strictly not necessary,
+     * however if a syncronization was necessary for the capture and/or request
+     * there is more than likely one or more output buffers that can be dequeued.
+     */
+    if (atomic_load(&ctx->queued_output))
+        v4l2_request_dequeue_completed_buffers(ctx, ctx->output_type);
+
+    // Set codec controls for current request
+    ret = ff_v4l2_request_set_request_controls(ctx, pic->output->fd, control, count);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to set %d control(s) for request %d: %s (%d)\n",
+               count, pic->output->fd, strerror(errno), errno);
+        goto fail;
+    }
+
+    // Ensure there is zero padding at the end of bitstream data
+    memset(pic->output->addr + pic->output->used, 0, INPUT_BUFFER_PADDING_SIZE);
+
+    // Use timestamp of the capture buffer for V4L2 frame reference
+    pic->output->buffer.timestamp = pic->capture->buffer.timestamp;
+
+    /*
+     * Queue the output buffer of current request. The capture buffer may be
+     * hold by the V4L2 decoder unless this is the last slice of a frame.
+     */
+    flags = last_slice ? 0 : V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF;
+    ret = v4l2_request_queue_buffer(ctx, pic->output->fd, pic->output, flags);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to queue output buffer %d for request %d: %s (%d)\n",
+               pic->output->index, pic->output->fd, strerror(errno), errno);
+        ret = AVERROR(errno);
+        goto fail;
+    }
+
+    if (first_slice) {
+        /*
+         * Queue the target capture buffer, hwaccel expect and depend on that
+         * this specific capture buffer will be used as decode target for
+         * current request, otherwise frames may be output in wrong order or
+         * wrong capture buffer could get used as a reference frame.
+         */
+        ret = v4l2_request_queue_buffer(ctx, -1, pic->capture, 0);
+        if (ret < 0) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to queue capture buffer %d for request %d: %s (%d)\n",
+                   pic->capture->index, pic->output->fd, strerror(errno), errno);
+            ret = AVERROR(errno);
+            goto fail;
+        }
+    }
+
+    // Queue current request
+    ret = ioctl(pic->output->fd, MEDIA_REQUEST_IOC_QUEUE, NULL);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to queue request object %d: %s (%d)\n",
+               pic->output->fd, strerror(errno), errno);
+        ret = AVERROR(errno);
+        goto fail;
+    }
+
+    // Mark current request as queued
+    atomic_fetch_or(&ctx->queued_request, 1 << pic->output->index);
+
+    ret = 0;
+fail:
+    ff_mutex_unlock(&ctx->mutex);
+    return ret;
+}
+
+int ff_v4l2_request_decode_slice(AVCodecContext *avctx,
+                                 V4L2RequestPictureContext *pic,
+                                 struct v4l2_ext_control *control, int count,
+                                 bool first_slice, bool last_slice)
+{
+    /*
+     * Fallback to queue each slice as a full frame when holding capture
+     * buffers is not supported by the driver.
+     */
+    if ((pic->output->capabilities & V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF) !=
+         V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF)
+        return v4l2_request_queue_decode(avctx, pic, control, count, true, true);
+
+    return v4l2_request_queue_decode(avctx, pic, control, count,
+                                     first_slice, last_slice);
+}
+
+int ff_v4l2_request_decode_frame(AVCodecContext *avctx,
+                                 V4L2RequestPictureContext *pic,
+                                 struct v4l2_ext_control *control, int count)
+{
+    return v4l2_request_queue_decode(avctx, pic, control, count, true, true);
+}
+
+static int v4l2_request_post_process(void *logctx, AVFrame *frame)
+{
+    V4L2RequestFrameDescriptor *desc = v4l2_request_framedesc(frame);
+    FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
+    V4L2RequestContext *ctx = fdd->hwaccel_priv;
+
+    // Wait on capture buffer before returning the frame to application
+    return v4l2_request_wait_on_capture(ctx, &desc->capture);
+}
+
+int ff_v4l2_request_reset_picture(AVCodecContext *avctx, V4L2RequestPictureContext *pic)
+{
+    V4L2RequestContext *ctx = v4l2_request_context(avctx);
+
+    // Get and wait on next output buffer from circular queue
+    pic->output = v4l2_request_next_output(ctx);
+    if (!pic->output)
+        return AVERROR(EINVAL);
+
+    return 0;
+}
+
+int ff_v4l2_request_start_frame(AVCodecContext *avctx,
+                                V4L2RequestPictureContext *pic,
+                                AVFrame *frame)
+{
+    V4L2RequestContext *ctx = v4l2_request_context(avctx);
+    V4L2RequestFrameDescriptor *desc = v4l2_request_framedesc(frame);
+    FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
+    int ret;
+
+    // Get next output buffer from circular queue
+    ret = ff_v4l2_request_reset_picture(avctx, pic);
+    if (ret)
+        return ret;
+
+    // Ensure capture buffer is dequeued before reuse
+    ret = v4l2_request_wait_on_capture(ctx, &desc->capture);
+    if (ret)
+        return ret;
+
+    // Wait on capture buffer in post_process() before returning to application
+    fdd->hwaccel_priv = ctx;
+    fdd->post_process = v4l2_request_post_process;
+
+    // Capture buffer used for current frame
+    pic->capture = &desc->capture;
+
+    return 0;
+}
+
+void ff_v4l2_request_flush(AVCodecContext *avctx)
+{
+    V4L2RequestContext *ctx = v4l2_request_context(avctx);
+    struct pollfd pollfd = {
+        .fd = ctx->video_fd,
+        .events = POLLOUT,
+    };
+
+    ff_mutex_lock(&ctx->mutex);
+
+    // Dequeue all completed output buffers
+    if (atomic_load(&ctx->queued_output))
+        v4l2_request_dequeue_completed_buffers(ctx, ctx->output_type);
+
+    // Wait on any remaining output buffer
+    while (atomic_load(&ctx->queued_output)) {
+        int ret = poll(&pollfd, 1, 2000);
+        if (ret <= 0)
+            break;
+
+        ret = v4l2_request_dequeue_buffer(ctx, ctx->output_type);
+        if (ret < 0 && ret != AVERROR(EAGAIN))
+            break;
+    }
+
+    // Dequeue all completed capture buffers
+    if (atomic_load(&ctx->queued_capture))
+        v4l2_request_dequeue_completed_buffers(ctx, ctx->format.type);
+
+    ff_mutex_unlock(&ctx->mutex);
+}